Repository: vegesm/first-cc-gcc Branch: master Commit: 4b3d6c461f18 Files: 28 Total size: 81.5 KB Directory structure: gitextract_9angfijl/ ├── .gitignore ├── Makefile ├── README.md ├── cc ├── emulator/ │ ├── cpfile │ ├── emucat │ └── emucc ├── examples/ │ ├── fizzbuzz.c │ └── helloworld.c ├── fix_tab.sh └── src/ ├── c0.h ├── c00.c ├── c01.c ├── c02.c ├── c03.c ├── c0t.c ├── c0t.s ├── c1.h ├── c10.c ├── c11.c ├── c1t.c ├── c1t.s ├── cctab.s ├── config.h ├── cvopt.c ├── efftab.s ├── regtab.s └── sptab.s ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ .idea *.tmp docs *.exe src/*_fixed.s ================================================ FILE: Makefile ================================================ CC = gcc CFLAGS =-Wno-implicit-int -Wno-int-conversion -m32 c0_src = $(wildcard src/c0*.c) c1_src = $(wildcard src/c1*.c) c1_tabs = $(wildcard src/*tab.s) c1_tabs_fixed = $(c1_tabs:%tab.s=%tab_fixed.s) all: c0 c1 c0: $(c0_src) src/c0.h $(CC) $(CFLAGS) -o c0 $(c0_src) c1: $(c1_src) $(c1_tabs_fixed) src/c1.h $(CC) $(CFLAGS) -o c1 $(c1_src) $(c1_tabs_fixed) cvopt: src/cvopt.c $(CC) $(CFLAGS) -o cvopt src/cvopt.c %tab_fixed.s: %tab.s cvopt ./fix_tab.sh $< > $@ ================================================ FILE: README.md ================================================ # first-cc-gcc A port of the [earliest C compiler](https://www.bell-labs.com/usr/dmr/www/primevalC.html) to modern GCC. The compiler outputs PDP-11 assembly code that can be compiled and run on a PDP-11 emulator (check out [c72](https://github.com/vegesm/c72) if you want x86 code that runs on current Linux). The compiler runs only in 32 bit mode as the original code assumes that the pointer size and word size are the same. ## Usage To compile the compiler and run it simply do: ```shell make ./cc examples/fizzbuzz.c > fizzbuzz.s ``` Note: if you get errors on missing "bits/libc-header-start.h" headers make sure you have the 32bit libc installed. ### Emulator The hard part is to set up an emulator, transfer the file to it(!) and run the assembler. A very [early UNIX implementation](https://github.com/qrush/unix) based on SIMH is available. For Windows, there is also a [pre-built binary](http://sourceforge.net/project/downloading.php?group_id=204974&filename=Research-unixv1-0.3.exe&a=25520957) I could not get the tape emulators working so ended up with a hacky solution to transfer files. The simulator lets you to log in via telnet, so the files are copied by starting up a text editor on the simulator and streaming the characters into it and then saving and closing the file. Also, if you close the connection the session is lost, so it is important to keep the connection to the simulator alive with a hack using ncat. ```shell # Start emulator pdp11 simh.cfg # Open a pipe to the simulator # If you use the prebuilt Windows simulator, use port 12323 ncat -lk -p 5556 | ncat localhost 5555 # send login username to emulator echo root | emulator/emucat # copy file over by typing it into ed emulator/cpfile fizzbuzz.s /fizzbz.s # call assembler and linker emulator/emucc /fizzbz.s # execute the compiled program echo a.out | emulator/emucat ``` Note that the file is called `fizzbz.s` on the emulator. This is because the UNIX used here handles 8 character long filenames only! ## Old C features This version of C is from around 1972. While the general syntax is pretty much the same as today, there are tons of missing features: - no preprocessor, no for loops - even though there is a keyword for `float` and `double`, floating point calculations are not implemented, you can not even write a floating point literal - the type system is very weak: pointers, chars, ints can be freely converted into one another - types of the function parameters are not checked, anything can be passed to any function - compound assignment operators are reversed, they are `=+`, `=*` - only integer global variables can be defined, and the syntax is strange (see helloworld example) - variable names can be of any length but only the first 8 characters are used; i.e. deadbeef1 and deadbeef2 are effectively the same variables Interestingly, some features that were already existing in this early version: - function pointers - the ABI is nearly the same as today's 32 bit ABI - `a[b]` is implemented as `*(a+b)` ================================================ FILE: cc ================================================ #!/bin/sh BUILD_FOLDER=. $BUILD_FOLDER/c0 $1 $1.tmp $BUILD_FOLDER/c1 $1.tmp | tr -d '\r' ================================================ FILE: emulator/cpfile ================================================ #!/bin/bash echo rm $2 | emulator/emucat echo ed $2 | emulator/emucat echo a | emulator/emucat # slow down transfer to prevent dropping characters cat $1 | (lines=0; while read line; do echo $line; if [ $((++lines % 5)) -eq 0 ]; then sleep 1; fi; done) | emulator/emucat echo | emulator/emucat echo . | emulator/emucat echo w | emulator/emucat echo q | emulator/emucat ================================================ FILE: emulator/emucat ================================================ #!/bin/bash ncat localhost 5556 ================================================ FILE: emulator/emucc ================================================ #!/bin/bash echo as -u $1 | emulator/emucat NAME=`basename $1 .s` echo mv a.out $NAME.o | emulator/emucat echo ld /usr/lib/crt0.o $NAME.o -lc -l | emulator/emucat ================================================ FILE: examples/fizzbuzz.c ================================================ main() { auto n,i; n = 20; i = 1; /* no for loops */ while (i <= n) { if (i % 15 == 0) { printf("fizzbuzz\n"); } else if (i % 3 == 0) { printf("fizz\n"); } else if (i % 5 == 0) { printf("buzz\n"); } else { printf("%d\n", i); } i++; } } ================================================ FILE: examples/helloworld.c ================================================ main() { extern printf; printf("Hello world!\n"); printf("%d\n", 4); } globint 4; globarr[] 1, 2, 3; ================================================ FILE: fix_tab.sh ================================================ #!/bin/sh UNDERSCORE=_ if [ "$(expr substr $(uname -s) 1 5)" = "Linux" ]; then UNDERSCORE= fi # convert original asm listing and remove comments cat $1 | ./cvopt | sed 's|/.*||g' | \ # convert the lookup table at the start to use .int directives sed -r 's/([0-9]+)\.;[[:blank:]]+([A-Za-z0-9]+)/.int \1, \2/g' | \ # replace .even directive with .balign, replace first row of the lookup table sed 's/\.even/.balign 4/g' | sed -r 's/_(eff|reg|sp|cc)tab=\.;[[:blank:]]*\.\+2/_\1tab:.int .,.+4/g' | \ # replace single `0` characters with .int directive sed -r 's/^([[:blank:]]+)0$/\1.int 0/g' | \ # remove leading underscore on linux sed -r 's/_(eff|reg|sp|cc)tab/'$UNDERSCORE'\1tab/' ================================================ FILE: src/c0.h ================================================ #include #include "config.h" //#define DEBUG /* c00.c */ setup(); init(char[], int); int *lookup(); symbol(); subseq(int,int,int); getstr(); getcc(); mapch(int); tree(); void declare(int); /* c01.c */ void build(int); int *convert(int[], int, int, int); chkw(int[]); lintyp(int); error(char[]); error1(char[],int); error2(char[],int,int); int *block(int, ...); chklval(int[]); notcompat(int, int); max(int, int); /* c02.c */ function(char[]); void extdef(); void statement(); pexpr(); pswitch(); blkhed(); blkend(); errflush(int); declist(); easystmt(); branch(int); /* c03.c */ jumpc(int[], int, int); rcexpr(int[], int); jump(int); label(int); retseq(); slabel(); void setstk(int); defvec(); defstat(int[]); length(int); rlength(int); putwrd(int); printn(int,int); void cc_printf(char[], ...); cc_putchar(int); /* globals */ extern ossiz; extern ospace[]; extern regtab; extern efftab; extern cctab; extern sptab; extern symbuf[]; extern pssiz; extern namsiz; extern nwps; extern hshused; extern hshsiz; extern hshlen; extern hshtab[]; extern *space; extern *cp; extern cmsiz; extern cmst[]; extern ctyp; extern isn; extern swsiz; extern swtab[]; extern *swp; extern contlab; extern brklab; extern deflab; extern nreg; extern maprel[]; extern nauto; extern stack; extern peeksym; extern peekc; extern eof; extern line; extern int *csym; extern cval; extern ncpw; extern nerror; extern FILE *fout; extern int *paraml; extern int *parame; extern tmpfil; /* code tables */ extern char ctab[]; extern int opdope[]; extern int cvtab[]; #define printf cc_printf #define putchar cc_putchar ================================================ FILE: src/c00.c ================================================ /* C compiler Copyright 1972 Bell Telephone Laboratories, Inc. */ #include "c0.h" #include ossiz = 250; int ospace[250]; /* fake */ /* * Adds the string s with id t to the hash table. */ init(s, t) char s[]; { char *sp; int *np, i; /* copy s to symbuf */ i = namsiz; sp = symbuf; while(i--) if ((*sp++ = *s++)=='\0') --s; np = lookup(); *np++ = 1; *np = t; } /* * First pass of the C compiler. It parses the input and generates an intermediate(?) output, * containing parsed expression trees with some additional assembly code. * * Short overview: the compiler parses global elements, which can be either functions or * global variable declarations. In functions, each expression is parsed into a tree, which is stored * in ospace. Interestingly, the next pass of the compiler is expected to be loaded at the same location as this pass. * So, the child node pointers in the tree are simply saved directly in the output and are expected to be loaded * back at the same memory location. * * The tree parser also does a rough estimation of how many registers are needed for calculating a tree * using the Sethi-Ullman algorithm. Additionally, sides of a binary operation can be flipped such that * the more difficult subtree comes first. See the C compiler tour in the UNIX manual. */ main(argc, argv) char *argv[]; { if(argc<3) { error("Arg count"); exit(1); } if(freopen(argv[1], "r", stdin)==NULL) { error1("Can't find %s", argv[1]); exit(1); } if((fout=fopen(argv[2], "wb"))==NULL) { error1("Can't create %s", argv[2]); exit(1); } init("int", 0); init("char", 1); init("float", 2); init("double", 3); /* init("long", 4); */ init("auto", 5); init("extern", 6); init("static", 7); init("goto", 10); init("return", 11); init("if", 12); init("while", 13); init("else", 14); init("switch", 15); init("case", 16); init("break", 17); init("continue", 18); init("do", 19); init("default", 20); while(!eof) { extdef(); blkend(); } fflush(stdout); exit(nerror!=0); } /* * Looks up an element in the hash table. The key is in symbuf. * Returns a pointer to the 4 word long data section. * Following these 4 words, another 4 words contain the key. */ int *lookup() { auto i, j, *np, *sp, *rp; i = 0; sp = symbuf; j = nwps; while(j--) i += *sp++; if (i<0) i = -i; i %= hshsiz; /* the hash of symbuf */ i *= pssiz; while(*(np = &hshtab[i+4])) { sp = symbuf; j = nwps; while(j--) if (*np++ != *sp++) goto no; /* key does not match, go to next one */ return(&hshtab[i]); no: if ((i += pssiz) >= hshlen) i = 0; } /* not found, add new element */ if(hshused++ > hshsiz) { error("Symbol table overflow"); exit(1); } rp = np = &hshtab[i]; sp = symbuf; j = 4; while(j--) /* clear out data */ *np++ = 0; j = nwps; while(j--) /* copy key into &hshtab[i+4] */ *np++ = *sp++; return(rp); } /* * The lexer, returns the opcode of the next symbol. If the caller does not want to use the symbol, * it can "push it back" using the peeksym global variable. * * The return value is the opcode of the current symbol. See c0t.c for the mapping between * the symbols and numbers. * If the current symbol is a number/character literal, the cval variable is set to the numeric value. * If the current symbol is a string, cval contains its label. * If the current symbol is a keyword, cval contains its id. * If the current symbol is a name, csym will point to the corresponding entry in the hashtable. * See csym comments at the bottom for its contents. */ symbol() { auto b, c; char *sp; if (peeksym>=0) { /* if we have a peeked symbol, return that */ c = peeksym; peeksym = -1; return(c); } if (peekc) { /* use peeked character, if has one */ c = peekc; peekc = 0; } else if (eof) return(0); else c = getchar(); loop: switch(ctab[c]) { case 125: /* newline */ line++; case 126: /* white space */ c = getchar(); goto loop; case 0: /* EOF */ eof++; return(0); case 40: /* + */ return(subseq(c,40,30)); case 41: /* - */ return(subseq(c,41,31)); case 80: /* = */ if (subseq(' ',0,1)) return(80); c = symbol(); if (c>=40 & c<=49) /* c is binary operator */ return(c+30); if (c==80) /* = */ return(60); peeksym = c; return(80); case 63: /* < */ if (subseq(c,0,1)) return(46); return(subseq('=',63,62)); case 65: /* > */ if (subseq(c,0,1)) return(45); return(subseq('=',65,64)); case 34: /* ! */ return(subseq('=',34,61)); case 43: /* / */ if (subseq('*',1,0)) return(43); com: /* inside a comment */ c = getchar(); com1: if (c=='\0') { eof++; error("Nonterminated comment"); return(0); } if (c=='\n') line++; if (c!='*') goto com; c = getchar(); if (c!='/') goto com1; c = getchar(); goto loop; case 124: /* number */ cval = 0; if (c=='0') b = 8; else b = 10; while(ctab[c]==124) { cval = cval*b + c -'0'; c = getchar(); } peekc = c; return(21); case 122: /* " */ return(getstr()); case 121: /* ' */ return(getcc()); case 123: /* letter */ sp = symbuf; while(ctab[c]==123 | ctab[c]==124) { /* while c is alphanumeric */ if (sp<((char *)symbuf)+namsiz) *sp++ = c; c = getchar(); } while(sp<((char *)symbuf)+namsiz) *sp++ = '\0'; peekc = c; csym = lookup(); /* find in hashtable */ if (csym[0]==1) { /* keyword */ cval = csym[1]; return(19); } return(20); case 127: /* unknown */ error("Unknown character"); c = getchar(); goto loop; } return(ctab[c]); } /* * Peeks at the next char and if it is c then eats it and returns b, otherwise returns a. * Useful for two character symbols, e.g. distinguish between ! and = * call subseq('=', note_equal_code, logical_not_code). */ subseq(c,a,b) { if (!peekc) peekc = getchar(); if (peekc != c) return(a); peekc = 0; return(b); } /* * Gets a string. It assumes the opening quotation mark has been already processed. */ getstr() { auto c; printf(".data;l%d:.byte ", cval=isn++); while((c=mapch('"')) >= 0) printf("%o,", c); printf("0;.even;.text\n"); return(22); } /* * Reads a character literal. Assumes opening ' has been read already. */ getcc() { auto c, cc; char *cp; cval = 0; cp = &cval; cc = 0; while((c=mapch('\'')) >= 0) if(cc++ < ncpw) *cp++ = c; if(cc>ncpw) error("Long character constant"); return(21); } /* * Processes a character from a string/character literal. c contains the delimiter char. * This function handles mapping of escape sequences. */ mapch(c) { auto a; if((a=getchar())==c) return(-1); switch(a) { case '\n': case 0: error("Nonterminated string"); peekc = a; return(-1); case '\\': switch (a=getchar()) { case 't': return('\t'); case 'n': return('\n'); case '0': return('\0'); case 'r': return('\r'); case '\n': line++; return('\n'); } } return(a); } /* * Builds an expression tree. The outline of the algorithm: * There are three stacks: * cmst - tree node stack, contains the partially built parts of the expression tree. * opst - operator stack, contains the ids of the operators. The bottom element is the EOF operator * prst - precedence stack, contains the precedences of operators. Note that the precedence stack only contains * element, if the precedence has increased, so it is not in one-to-one correspondence with opst. * * The algorithm goes over the operator produced by the symbol function. Leaf nodes (numbers, * string/char literals, names) are placed on the node stack. If an operator is encountered, * it is placed on the operator stack if its precedence is higher than the current precedence. * Otherwise, the operator and tree node stack is unwinded by building partial subtrees, * until the top of the prst is smaller than current operator's precedence. */ tree() { auto *op, opst[20], *pp, prst[20], andflg, o, p, ps, os; space = ospace; op = opst; /* top of operator stack */ pp = prst; /* top of precedence stack */ cp = cmst; /* top of tree node stack */ *op = 200; /* stack EOF */ *pp = 06; andflg = 0; advanc: switch (o=symbol()) { /* name */ case 20: if (*csym==0) /* storage not yet decided */ if((peeksym=symbol())==6) /* (, mark function calls as extern */ *csym = 6; /* extern */ else { if(csym[2]==0) /* unseen so far */ csym[2] = isn++; } if(*csym==6) /* extern */ *cp++ = block(5,20,csym[1],0,*csym, csym[4],csym[5],csym[6],csym[7]); else *cp++ = block(2,20,csym[1],0,*csym,csym[2]); goto tand; /* short constant */ case 21: case21: *cp++ = block(1,21,ctyp,0,cval); /* ctyp is always 0 */ goto tand; /* string constant */ case 22: *cp++ = block(1,22,17,0,cval); /* 17 is char[] */ tand: if(cp>=cmst+cmsiz) { error("Expression overflow"); exit(1); } if (andflg) goto syntax; andflg = 1; goto advanc; /* ++, -- */ case 30: case 31: if (andflg) /* convert to postfix */ o += 2; goto oponst; /* ! */ case 34: if (andflg) goto syntax; goto oponst; /* - */ case 41: if (!andflg) { peeksym = symbol(); if (peeksym==21) { /* negative literal */ peeksym = -1; cval = -cval; goto case21; } o = 37; } andflg = 0; goto oponst; /* & */ /* * */ case 47: case 42: if (andflg) andflg = 0; else if(o==47) o = 35; else o = 36; goto oponst; /* ( */ case 6: if (andflg) { /* this is a function call */ o = symbol(); if (o==7) /* ) */ o = 101; else { /* 101 - call without args */ peeksym = o; o = 100; andflg = 0; } } goto oponst; /* ) */ /* ] */ case 5: case 7: if (!andflg) goto syntax; goto oponst; } /* binary operators */ if (!andflg) goto syntax; andflg = 0; oponst: /* place operator on stack */ p = (opdope[o]>>9) & 077; /* extract operator precedence */ opon1: ps = *pp; /* currently highest precedence */ if (p>ps | p==ps & (opdope[o]&0200)!=0) { /* right-assoc */ putin: switch (o) { case 6: /* ( */ case 4: /* [ */ case 100: /* call */ p = 04; } if(op>=opst+20) { /* 20 is the size of opstack */ error("expression overflow"); exit(1); } *++op = o; *++pp = p; goto advanc; } --pp; /* pop precedence stack */ switch (os = *op--) { /* EOF */ case 200: peeksym = o; return(*--cp); /* call */ case 100: if (o!=7) /* unmatching parenthesis */ goto syntax; build(os); goto advanc; /* mcall */ case 101: *cp++ = 0; /* 0 arg call */ os = 100; goto fbuild; /* ( */ case 6: if (o!=7) /* unmatching parenthesis */ goto syntax; goto advanc; /* [ */ case 4: if (o!=5) goto syntax; build(4); goto advanc; } fbuild: build(os); goto opon1; /* unwinds precedence stack till at the same level as o */ syntax: error("Expression syntax"); errflush(o); return(0); } /* * Processes a variable declaration, the preceding type/storage keyword has been processed already. * kw - the id of the preceding type/storage keyword, 8 for function parameter list. */ void declare(kw) { int o; while((o=symbol())==20) { /* name */ if(kw>=5) { /* type or storage location keyword? */ if(*csym>0) error1("%p redeclared", &csym[4]); /* storage area redeclared */ *csym = kw; } else { if ((csym[1]&017)!=0) /* type already defined, adding pointer behavior is allowed */ error1("%p redeclared", &csym[4]); csym[1] |= csym[1]&0760 | kw; // set kw to lower 4 bits if (*csym==0) *csym = -2; } /* add pointer indirection */ while((o=symbol())==4) { /* [ */ if((o=symbol())==21) { /* const */ if(csym[1]>=020) error("Bad vector"); csym[3] = cval; o = symbol(); } if (o!=5) /* ] */ goto syntax; csym[1] += 020; } if(kw==8) { /* parameter */ *csym = -1; if (paraml==0) /* paraml points to the first element in the parameter list */ paraml = csym; else *parame = csym; /* set previous parameter's first word to point to the current parameter */ parame = csym; } if (o!=9) /* , */ break; } if(o==1 & kw!=8 | o==7 & kw==8) /* not parameter list and ; or parameter list and ) */ return; syntax: error("Declaration syntax"); errflush(o); } /* constants for code generator tables */ regtab = 0; efftab = 1; cctab = 2; sptab = 3; /* hash table */ symbuf[2]; /* buffer for the key to look up in has table. 8 byte long, should be 8/sizeof(int); original value was 4 */ pssiz = 8; /* size of an entry in the hashtable, should be 4 + nwps */ namsiz = 8; /* maximum length of the key in bytes */ nwps = 2; /* number of words per symbuf - originally 4 */ hshused = 0; /* number of elements in the hash table */ hshsiz= 100; /* maximum number of elements in the table */ hshlen =800; /* size of the table in word, equals to pssiz*hshsiz */ hshtab[800]; /* The hash table for symbols. For eahc entry, the first 4 bytes are the data, the next 4 bytes are the key. */ int *space= 0; int *cp= 0; /* top of the cmst stack */ cmsiz= 40; /* size of the cmst stack */ cmst[40]; /* the tree node stack, contains pointers to the nodes*/ ctyp = 0; /* id of the int type, constant */ isn = 1; /* current label number */ swsiz = 120; swtab[120]; int *swp = 0; contlab = 0; /* label for a continue statement in the current loop */ brklab = 0; /* label for a break statement in the current loop */ deflab = 0; /* label for a deafult statement in the current switch */ nreg = 4; /* number of general registers available */ maprel[]={ 60,61,64,65,62,63,68,69,66,67}; /* maps binary relations to the their flipped pairs */ nauto = 0; stack = 0; peeksym = -1; /* peeked symbol */ peekc= 0; /* peeked character */ eof = 0; /* true if reached end of file */ line = 1; /* current line */ int *csym = 0; /* current symbol see meaning below */ cval = 0; // contains the currently read character literal ncpw = 2; /* number of characters per word */ nerror = 0; /* number of errors during parsing */ FILE *fout; /* putchar prints characters to this file */ int *paraml; /* head of the parameter list */ int *parame; /* last element in the parameter list */ /* * csym - current symbol description, points to the corresponding element in the hash table. * Meaning of the 4 words * 0 - if 1 then keyword, otherwise storage scope (keyword ID), 8 for function parameters * -1 (temporary) for function parameters csym[0] forms a linked list, -1 marks the end * -2 default storage scope (auto) * 1 - keyword * 2 - label * 5 - auto * 6 - extern * 7 - static * 10 - function parameter (set by blkhed) * 1 - type description * lower 4 bits contain raw type (char/double/int) * then 020 is added for every indirection * e.g. 020 refers to int[] * 2 - location/label * in case of parameters/auto the offset from the stack frame * in case of statics, the label of the variable * 3 - the length in case of arrays, 0 otherwise * 4 - the name of the symbol * * cval - value in the current symbol * if number literal - the number * if char literal - character ascii code * if keyword - the keyword ID * if string - the label in the assembly output */ ================================================ FILE: src/c01.c ================================================ #include "c0.h" #include #include /* * Builds a new node of the operator tree and pushes it onto the cmst stack. * The necessary parameters are popped from cmst. */ void build(op) { auto *p1, t1, d1, *p2, t2, d2, t; auto d, dope, lr, cvn; /* replace a[b] with *(a+b) */ if (op==4) { /* [] */ build(40); /* + */ op = 36; } dope = opdope[op]; if ((dope&01)!=0) { /* binary */ p2 = *--cp; /* second subtree */ if(p2 != 0) { /* if op was mcall originally, there is a fake 0 null pointer here, ignore it */ t2 = p2[1]; d2 = p2[2]; } } p1 = *--cp; /* first subtree */ t1 = p1[1]; d1 = p1[2]; switch (op) { /* , */ case 9: *cp++ = block(2, 9, 0, 0, p1, p2); return; /* ? */ case 90: if (*p2!=8) error("Illegal conditional"); goto goon; /* call */ case 100: *cp++ = block(2,100,t1,24,p1,p2); return; /* * */ case 36: if ((t1 -= 16)<0) { error("Illegal indirection"); t1 += 16; } if (*p1!=20 & d1==0) d1 = 1; *cp++ = block(1,36,t1,d1,p1); return; /* & unary */ case 35: if (*p1 == 36) { /* * - shorten &*p to p */ *cp++ = p1[3]; return; } if (*p1 == 20) { /* name */ *cp++ = block(1,p1[3]==5?29:35,t1+16,1,p1); /* is it auto? */ return; } error("Illegal lvalue"); } goon: if ((dope&02)!=0) /* lvalue needed on left? */ chklval(p1); if ((dope&020)!=0) /* word operand on left? */ chkw(p1); if ((dope&040)!=0) /* word operand on right? */ chkw(p2); if ((dope&01)!=0) { /* binary op? */ cvn = cvtab[9*lintyp(t1)+lintyp(t2)]; /* conversion from t1 to t2 ?? */ if ((dope&010)!=0) { /* assignment? */ t = t1; lr = 1; cvn &= 07; } else { t = (cvn&0100)!=0? t2:t1; /* who gets the result */ lr = cvn&0200; cvn = (cvn>>3)&07; } if (cvn) { if (cvn==07) { error("Illegal conversion"); goto nocv; } cvn += (dope&010)!=0? 83:93; if (lr) { /* left argument determines the target type */ t2 = t; d2 = (p2=convert(p2, t, d2, cvn))[2]; } else { t1 = t; d1 = (p1=convert(p1, t, d1, cvn))[2]; } nocv:; } if (d2>d1 & (dope&0100)!=0) { /* flip operation, if second tree is more difficult */ if ((dope&04)!=0) /* relational? */ op = maprel[op-60]; d = d1; d1 = d2; d2 = d; d = p1; p1 = p2; p2 = d; d = t1; t1 = t2; t2 = d; } if (d1==d2) /* calculating registers needed for this node using Sethi-Ullman */ d = d1+1; else d = max(d1,d2); if ((dope&04)!=0) t = 0; /* relational ops have integer type */ *cp++ = block(2,op,t,d,p1,p2); return; } *cp++ = block(1,op,t1,d1==0?1:d1,p1); /* unary operator */ } /* Creates a node for type conversion */ int *convert(p, t, d, cvn) int p[]; { auto c; if (*p==21) { /* constant */ c = p[3]; switch(cvn) { case 99: /* c18 */ c <<= 1; case 98: /* c14 */ c <<= 1; case 97: /* c12 */ c <<= 1; p[3] = c; return(p); } } return(block(1, cvn, t, max(1,d), p)); } /* check if p is a word type */ chkw(p) int p[]; { auto t; if ((t=p[1])>1 & t<16) error("Integer operand required"); } /* Compresses type id to one used in cvtable */ lintyp(t) { return(t<16? t:(t<32? t-12: 8)); } error(s) char s[];{ error2(s, 0, 0); } error1(s, p1) char s[];{ error2(s, p1, 0); } error2(s, p1, p2) char s[];{ FILE *f; nerror++; fflush(fout); f = fout; fout = stderr; printf("%d: ", line); printf(s, p1, p2); putchar('\n'); fflush(stderr); fout = f; } /* * Creates a new node and appends it to the end of *ospace. The parameters of the node * are passed after the argument n, which contains the number of optional parameters. * Each node has 3 mandatory parameters and any number of optional ones. The mandatory parameters: * op - operator id * t - type * d - difficulty, number of registers to calculate the node, or statement difficulty level in c1 * These are followed by the (op dependent) optional parameters, which usually are the subtrees. */ int *block(int n, ...) { auto *p; va_list arguments; #ifdef DEBUG va_list dbg_args; va_start(dbg_args, n); int op = va_arg(dbg_args, int); int type = va_arg(dbg_args, int); int regcnt = va_arg(dbg_args, int); printf("loc %d: %d t=%d d=%d nump=%d ", space-ospace, op, type, regcnt, n); for (int i = 0; i < n; ++i) { int d = va_arg(dbg_args, int); printf("%d (%d) ", d, (int*)d-ospace); } printf("\n"); va_end(dbg_args); #endif p = space; va_start(arguments, n); n += 3; if(space+n >= ospace+ossiz) { error("Expression overflow"); exit(1); } while(n--) *space++ = va_arg(arguments, int); va_end(arguments); return(p); } /* check if p is an lvalue (name or pointer) */ chklval(p) int p[]; { if (*p!=20) if (*p!=36) error("Lvalue required"); } max(a, b) { if (a>b) return(a); return(b); } ================================================ FILE: src/c02.c ================================================ #include "c0.h" /* P * arses a function, the function name and the opening parenthesis of the * argument list has already been read. name contains the name of the function. */ function(name) char name[]; { #ifdef UNIXV5_ABI printf( ".text; %p:\n", name); #else printf( ".data; %p:1f\n.text; 1:", name); #endif printf("mov r5,-(sp); mov sp,r5\n"); /* set up stack frame */ declare(8); /* read parameter list */ declist(); /* type declarations of parameters */ statement(1); retseq(); } /* Parses the next function/global variable definition. */ void extdef() { auto o, c, *cs; char *s; if(((o=symbol())==0) || o==1) /* EOF */ return; if(o!=20) /* not a name -> syntax error */ goto syntax; csym[0] = 6; cs = &csym[4]; // name of the symbol printf(".globl %p\n", cs); s = ".data; %p:1f\n"; switch(o=symbol()) { case 6: /* ( - function definition*/ function(cs); return; case 21: /* const - variable has default value*/ printf(".data; %p: %o\n", cs, cval); if((o=symbol())!=1) /* ; */ goto syntax; return; case 1: /* ; */ printf(".bss; %p: .=.+2\n", cs); /* unitialized variable */ return; case 4: /* [ */ c = 0; if((o=symbol())==21) { /* const */ c = cval<<1; /* multiply by two (number of bytes per word), should be changed for 32bit systems */ o = symbol(); } if(o!=5) /* ] */ goto syntax; printf(s, cs); if((o=symbol())==1) { /* ; */ printf(".bss; 1:.=.+%o\n", c); return; } /* symbol list, e.g.: arrname[] 23, 43 ,5; */ printf("1:"); while(o==21) { /* const */ printf("%o\n", cval); c -= 2; if((o=symbol())==1) /* ; */ goto done; if(o!=9) /* , */ goto syntax; else o = symbol(); } goto syntax; done: if(c>0) printf(".=.+%o\n", c); return; case 0: /* EOF */ return; } syntax: error("External definition syntax"); errflush(o); statement(0); } /* * Parses (a block of) statements. * d - true if this is a start of a function block */ void statement(d) { int o, o1, o2, o3, *np; stmt: switch(o=symbol()) { /* EOF */ case 0: error("Unexpected EOF"); /* ; */ case 1: /* } */ case 3: return; /* { */ case 2: { if(d) blkhed(); /* process definitions at the start of the funciton */ /* recursively process this block of code */ while (!eof) { if ((o=symbol())==3) /* } */ goto bend; peeksym = o; statement(0); } error("Missing '}'"); bend: return; } /* keyword */ case 19: switch(cval) { /* goto */ case 10: o1 = block(1,102,0,0,tree()); rcexpr(o1, regtab); goto semi; /* return */ case 11: if((peeksym=symbol())==6) /* ( */ rcexpr(pexpr(), regtab); retseq(); goto semi; /* if */ case 12: jumpc(pexpr(), o1=isn++, 0); statement(0); if ((o=symbol())==19 & cval==14) { /* else */ o2 = isn++; (easystmt()?branch:jump)(o2); /* branch can only jump to a close location */ label(o1); statement(0); label(o2); return; } peeksym = o; label(o1); return; /* while */ case 13: o1 = contlab; o2 = brklab; label(contlab = isn++); jumpc(pexpr(), brklab=isn++, 0); o3 = easystmt(); statement(0); (o3?branch:jump)(contlab); label(brklab); contlab = o1; brklab = o2; return; /* break */ case 17: if(brklab==0) error("Nothing to break from"); jump(brklab); goto semi; /* continue */ case 18: if(contlab==0) error("Nothing to continue"); jump(contlab); goto semi; /* do */ case 19: o1 = contlab; o2 = brklab; contlab = isn++; brklab = isn++; label(o3 = isn++); statement(0); label(contlab); contlab = o1; if ((o=symbol())==19 & cval==13) { /* while */ jumpc(tree(), o3, 1); label(brklab); brklab = o2; goto semi; } goto syntax; /* case */ case 16: if ((o=symbol())!=21) /* constant */ goto syntax; if ((o=symbol())!=8) /* : */ goto syntax; if (swp==0) { error("Case not in switch"); goto stmt; } if(swp>=swtab+swsiz) { error("Switch table overflow"); } else { *swp++ = isn; /* add value and label to switch table */ *swp++ = cval; label(isn++); } goto stmt; /* switch */ case 15: o1 = brklab; brklab = isn++; np = pexpr(); if (np[1]>1 & np[1]<16) error("Integer required"); rcexpr(np, regtab); pswitch(); brklab = o1; return; /* default */ case 20: if (swp==0) error("Default not in switch"); if ((o=symbol())!=8) /* : */ goto syntax; deflab = isn++; label(deflab); goto stmt; } error("Unknown keyword"); goto syntax; /* name */ case 20: if (peekc==':') { /* label */ peekc = 0; if (csym[0]>0) { error("Redefinition"); goto stmt; } csym[0] = 2; csym[1] = 020; /* int[] */ if (csym[2]==0) csym[2] = isn++; slabel(); goto stmt; } } peeksym = o; rcexpr(tree(), efftab); goto semi; semi: if ((o=symbol())!=1) /* ; */ goto syntax; return; syntax: error("Statement syntax"); errflush(o); goto stmt; } /* * Parses an expression enclosed in parenthesis. * Returns the address to the parsed tree. */ pexpr() { auto o, t; if ((o=symbol())!=6) /* ( */ goto syntax; t = tree(); if ((o=symbol())!=7) /* ) */ goto syntax; return(t); syntax: error("Statement syntax"); errflush(o); return(0); } /* Parses the contents of a switch block. */ pswitch() { int *sswp, dl, cv, swlab; /* ?, default label, current ?, switch label */ sswp = swp; /* save swp */ if (swp==0) swp = swtab; swlab = isn++; printf("jsr pc,bswitch; l%d\n", swlab); dl = deflab; /* save deflab */ deflab = 0; statement(0); if (!deflab) { deflab = isn++; label(deflab); } /* generate switch table */ printf("L%d:.data;L%d:", brklab, swlab); while(swp>sswp & swp>swtab) { cv = *--swp; printf("%o; l%d\n", cv, *--swp); } printf("L%d; 0\n.text\n", deflab); deflab = dl; swp = sswp; } /* * Function block head: processes variable definitions. */ blkhed() { int o, al, pl, *cs, hl; declist(); stack = al = -2; pl = 4; while(paraml) { *parame = 0; /* set the end of linked list to 0 (originally -1), will break the loop */ paraml = *(cs = paraml); /* next element in list */ cs[2] = pl; /* location relative to stack frame */ *cs = 10; pl += rlength(cs[1]); } cs = hshtab; hl = hshsiz; while(hl--) { /* go through symbol table, i.e. all defined names */ if (cs[4]) switch(cs[0]) { /* if defined */ /* sort unmentioned */ case -2: cs[0] = 5; /* auto */ /* auto */ case 5: if (cs[3]) { /* array */ al -= (cs[3]*length(cs[1]-020)+1) & 077776; /* push array on stack */ setstk(al); defvec(al); } cs[2] = al; al -= rlength(cs[1]); goto loop; /* parameter */ case 10: cs[0] = 5; goto loop; /* static */ case 7: cs[2] = isn++; defstat(cs); goto loop; loop:; } cs = cs+pssiz; } setstk(al); } /* * Clears all elements from the symbol table, * except keywords. */ blkend() { auto i, hl; i = 0; hl = hshsiz; while(hl--) { if(hshtab[i+4]) if (hshtab[i]==0) error1("%p undefined", &hshtab[i+4]); if(hshtab[i]!=1) { /* not keyword */ hshused--; hshtab[i+4] = 0; } i += pssiz; } } /* Throw away symbols until end of statement. */ errflush(o) { while(o>3) /* ; { } */ o = symbol(); peeksym = o; } /* * Variable declarations. Either function parameter type declaration * or function local variable declarations. */ declist() { auto o; while((o=symbol())==19 & cval<10) /* cval<10 means it is a type/storage area definition (int/char/extern/static) */ declare(cval); peeksym = o; } /* * Detects whether the next statement is expected to be small - i.e. not a compound statement * This usually means it is a single statement or conditional. * e.g. goto, break, or not a label or block */ easystmt() { if((peeksym=symbol())==20) /* name */ return(peekc!=':'); /* not label */ if (peeksym==19) { /* keyword */ switch(cval) case 10: /* goto */ case 11: /* return */ case 17: /* break */ case 18: /* continue */ return(1); return(0); } return(peeksym!=2); /* { */ } /* Emits a branch instruction */ branch(lab) { printf("br L%d\n", lab); } ================================================ FILE: src/c03.c ================================================ #include "c0.h" #include /* * Records a conditional jump. If the jump is short (easystmt is true), * it generates a branch isntruction, otherwise a jump instruction. * * tree - tree containing a conditional expression * lbl - where to jump * cond - decides whether to jump when condition is true or false */ jumpc(tree, lbl, cond) int tree[]; { rcexpr(block(1,easystmt()+103,tree,lbl,cond),cctab); } /* * Prints the binary representation of the tree. * table - the expression translation table used in the next pass. */ rcexpr(tree, table) int tree[], table; // table: the code generation table { int c, *sp; putchar('#'); c = space-ospace; sp = ospace; putwrd(ospace); /* save start of array location (original code expects it to be loaded at the same place */ putwrd(c); putwrd(tree); putwrd(table); putwrd(line); while(c--) putwrd(*sp++); #ifdef DEBUG printf("\n"); #endif } jump(lab) { printf("jmp\tl%d\n", lab); } label(l) { printf("l%d:", l); } /* Generates code for a return statement. */ retseq() { #ifdef UNIXV5_ABI printf("mov\tr5,sp\nmov\t(sp)+,r5\nrts\tpc\n"); #else printf("jmp\tretrn\n"); #endif } /* Label for a static variable */ slabel() { printf(".data; l%d: 1f; .text; 1:\n", csym[2]); } /* * Reserves space on the stack. * a - amount of bytes to add to the stack */ void setstk(a) { auto ts; ts = a-stack; /* relative distance */ stack = a; switch(ts) { case 0: return; case -2: /* -2 */ printf("tst -(sp)\n"); return; case -4: /* -4 */ printf("cmp -(sp),-(sp)\n"); return; } printf("add $%o,sp\n", ts); } /* define array on stack, simply saves the pointer to the top of the stack */ int defvec() { printf("mov\tsp,r0\nmov\tr0,-(sp)\n"); stack -= 2; } /* * Define static variable. * s - pointer to symbol table entry */ defstat(s) int s[]; { int len; len = length(s[1]); if (s[3]) /* array */ printf(".data; l%d:1f; .bss; 1:.=.+%o; .even; .text\n", s[2], s[3]*len); else /* scalar */ printf(".bss; l%d:.=.+%o; .even; .text\n", s[2], len); } /* * Length of the datatype, t is the type descriptor. * The id of the type is the id of the keyword + 020 for every indirection */ length(t) { if (t<0) t += 020; if (t>=020) /* array/pointer */ return(2); switch(t) { case 0: return(2); case 1: return(1); case 2: return(4); case 3: return(8); case 4: return(4); } return(1024); } /* rounded length */ rlength(c) { auto l; return((l=length(c))==1? 2: l); } /* prints the number n in base b */ printn(n,b) { auto a; if(a=n/b) /* assignment, not test for equality */ printn(a, b); /* recursive */ putchar(n%b + '0'); } putwrd(a) { printf("%d;", a); } cc_putchar(int c) { putc(c, fout); } void cc_printf(char *fmt, ...) { static char *s; auto *adx, x, c, *i; va_list arguments; va_start ( arguments, fmt); loop: while((c = *fmt++) != '%') { if(c == '\0') { va_end(arguments); return; } putchar(c); } switch (c = *fmt++) { case 'd': /* decimal */ case 'o': /* octal */ x = va_arg(arguments, int); if(x < 0) { x = -x; if(x<0) { /* - infinity */ if(c=='o') printf("100000"); else printf("-32767"); goto loop; } putchar('-'); } printn(x, c=='o'?8:10); goto loop; case 's': /* string */ x = va_arg(arguments, int); s=x; while(c = *s++) { putchar(c); } goto loop; case 'p': s =va_arg(arguments, int*); putchar('_'); c = namsiz; while(c--) if(*s) putchar(*s++); goto loop; } putchar('%'); fmt--; goto loop; } ================================================ FILE: src/c0t.c ================================================ #include "c0.h" // Converts ascii characters to symbols // 127 means unknown character, 123 are letters, 124 digits char ctab[]={ 0, 127, 127, 127, 127, 127, 127, 127, /* 0 - 7 */ 127, 126, 125, 127, 127, 127, 127, 127, /* 8 - 15 */ 127, 127, 127, 127, 127, 127, 127, 127, /* 16 - 23 */ 127, 127, 127, 127, 127, 127, 127, 127, /* 24 - 31 */ 126, 34, 122, 127, 127, 44, 47, 121, /* 32 - 39 */ 6, 7, 42, 40, 9, 41, 127, 43, /* 40 - 47 */ 124, 124, 124, 124, 124, 124, 124, 124, /* 48 - 55 */ 124, 124, 8, 1, 63, 80, 65, 90, /* 56 - 63 */ 127, 123, 123, 123, 123, 123, 123, 123, /* 64 - 72 */ 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 4, 127, 5, 49, 127, 127, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 2, 48, 3, 127, 127}; int opdope[] ={ 000000, // EOF 000000, // ; 000000, // { 000000, // } 036000, // [ 002000, // ] 036000, // ( 002000, // ) 014201, // : 007001, // , 000000, // 10 000000, // 11 000000, // 12 000000, // 13 000000, // 14 000000, // 15 000000, // 16 000000, // 17 000000, // 18 000000, // 19 000000, // name 000000, // short constant 000000, // string 000000, // float 000000, // double 000000, // 25 000000, // 26 000000, // 27 000000, // 28 000000, // 29 034202, // ++pre 034202, // --pre 034202, // ++post 034202, // --post 034220, // !un 034202, // &un 034220, // *un 034200, // -un 034220, // ~un 000000, // 39 030101, // + 030001, // - 032101, // * 032001, // // 032001, // % 026061, // >> 026061, // << 020161, // & 016161, // | 016161, // ^ 000000, // 50 000000, // 51 000000, // 52 000000, // 53 000000, // 54 000000, // 55 000000, // 56 000000, // 57 000000, // 58 000000, // 59 022105, // == 022105, // != 024105, // <= 024105, // < 024105, // >= 024105, // > 024105, //

p 024105, // >=p 012213, // =+ 012213, // =- 012213, // =* 012213, // =// 012213, // =% 012253, // =>> 012253, // =<< 012253, // =& 012253, // =| 012253, // =^ 012213, // = 000000, // 81 000000, // 82 000000, // 83 000000, // int -> float 000000, // int -> double 000000, // float -> int 000000, // float -> double 000000, // double -> int 000000, // double -> float 014201, // ? 000000, // 91 000000, // 92 000000, // 93 000000, // int -> float 000000, // int -> double 000000, // float -> double 000000, // int -> int[] 000000, // int -> float[] 000000, // int -> double[] 036001, // call 036001, // mcall }; int cvtab[] ={ 0000, // i:i 0000, // i:c 0113, // i:f 0125, // i:d 0140, // i:i[] 0100, // i:c[] 0150, // i:f[] 0160, // i:d[] 0140, // i:[][] 0100, // c:i 0100, // c:c 0113, // c:f 0125, // c:d 0140, // c:i[] 0100, // c:c[] 0150, // c:f[] 0160, // c:d[] 0140, // c[][] 0211, // f:i 0211, // f:c 0000, // f:f 0136, // f:d 0211, // f:i[] 0211, // f:c[] 0211, // f:f[] 0211, // f:d[] 0211, // f:[][] 0222, // d:i 0222, // d:c 0234, // d:f 0000, // d:d 0222, // d:i[] 0222, // d:c[] 0222, // d:f[] 0222, // d:d[] 0222, // d:[][] 0240, // i[]:i 0240, // i[]:c 0113, // i[]:f 0125, // i[]:d 0000, // i[]:i[] 0000, // i[]:c[] 0100, // i[]:f[] 0100, // i[]:d[] 0100, // i[]:[][] 0000, // c[]:i 0000, // c[]:c 0113, // c[]:f 0125, // c[]:d 0200, // c[]:i[] 0000, // c[]:c[] 0200, // c[]:f[] 0200, // c[]:d[] 0200, // c[]:[][] 0250, // f[]:i 0250, // f[]:c 0113, // f[]:f 0125, // f[]:d 0000, // f[]:i[] 0000, // f[]:c[] 0000, // f[]:f[] 0100, // f[]:d[] 0000, // f[]:[][] 0260, // d[]:i 0260, // d[]:c 0113, // d[]:f 0125, // d[]:d 0000, // d[]:i[] 0000, // d[]:c[] 0000, // d[]:f[] 0000, // d[]:d[] 0000, // d[]:[][] 0240, // [][]:i 0240, // [][]:c 0113, // [][]:f 0125, // [][]:d 0000, // [][]:i[] 0000, // [][]:c[] 0100, // [][]:f[] 0100, // [][]:d[] 0000, // [][]:[][] }; ================================================ FILE: src/c0t.s ================================================ / word I/O .globl _putwrd .globl _tmpfil .globl putw .globl fcreat .globl flush .data _putwrd: 1f .text 1: tst buf / if tmp file not open bne 1f mov _tmpfil,r0 jsr r5,fcreat; buf / open tmpfile bec 1f mov $1,r0 / if couldn't open file, print error message sys write; botch; ebotch-botch sys exit 1: mov 2(sp),r0 jsr r5,putw; buf rts pc .globl _flshw .data _flshw: 1f .text 1: jsr r5,flush; buf rts pc botch: ; ebotch: .even .bss buf: .=.+518. .text / C operator and conversion tables .globl _opdope .globl _cvtab _opdope:.+2 00000 / EOF 00000 / ; 00000 / { 00000 / } 36000 / [ 02000 / ] 36000 / ( 02000 / ) 14201 / : 07001 / , 00000 / 10 00000 / 11 00000 / 12 00000 / 13 00000 / 14 00000 / 15 00000 / 16 00000 / 17 00000 / 18 00000 / 19 00000 / name 00000 / short constant 00000 / string 00000 / float 00000 / double 00000 / 25 00000 / 26 00000 / 27 00000 / 28 00000 / 29 34202 / ++pre 34202 / --pre 34202 / ++post 34202 / --post 34220 / !un 34202 / &un 34220 / *un 34200 / -un 34220 / ~un 00000 / 39 30101 / + 30001 / - 32101 / * 32001 / / 32001 / % 26061 / >> 26061 / << 20161 / & 16161 / | 16161 / ^ 00000 / 50 00000 / 51 00000 / 52 00000 / 53 00000 / 54 00000 / 55 00000 / 56 00000 / 57 00000 / 58 00000 / 59 22105 / == 22105 / != 24105 / <= 24105 / < 24105 / >= 24105 / > 24105 /

p 24105 / >=p 12213 / =+ 12213 / =- 12213 / =* 12213 / =/ 12213 / =% 12253 / =>> 12253 / =<< 12253 / =& 12253 / =| 12253 / =^ 12213 / = 00000 / 81 00000 / 82 00000 / 83 00000 / int -> float 00000 / int -> double 00000 / float -> int 00000 / float -> double 00000 / double -> int 00000 / double -> float 14201 / ? 00000 / 91 00000 / 92 00000 / 93 00000 / int -> float 00000 / int -> double 00000 / float -> double 00000 / int -> int[] 00000 / int -> float[] 00000 / int -> double[] 36001 / call 36001 / mcall _cvtab: .+2 .byte 000 / i:i .byte 000 / i:c .byte 113 / i:f .byte 125 / i:d .byte 140 / i:i[] .byte 100 / i:c[] .byte 150 / i:f[] .byte 160 / i:d[] .byte 140 / i:[][] .byte 100 / c:i .byte 100 / c:c .byte 113 / c:f .byte 125 / c:d .byte 140 / c:i[] .byte 100 / c:c[] .byte 150 / c:f[] .byte 160 / c:d[] .byte 140 / c[][] .byte 211 / f:i .byte 211 / f:c .byte 000 / f:f .byte 136 / f:d .byte 211 / f:i[] .byte 211 / f:c[] .byte 211 / f:f[] .byte 211 / f:d[] .byte 211 / f:[][] .byte 222 / d:i .byte 222 / d:c .byte 234 / d:f .byte 000 / d:d .byte 222 / d:i[] .byte 222 / d:c[] .byte 222 / d:f[] .byte 222 / d:d[] .byte 222 / d:[][] .byte 240 / i[]:i .byte 240 / i[]:c .byte 113 / i[]:f .byte 125 / i[]:d .byte 000 / i[]:i[] .byte 000 / i[]:c[] .byte 100 / i[]:f[] .byte 100 / i[]:d[] .byte 100 / i[]:[][] .byte 000 / c[]:i .byte 000 / c[]:c .byte 113 / c[]:f .byte 125 / c[]:d .byte 200 / c[]:i[] .byte 000 / c[]:c[] .byte 200 / c[]:f[] .byte 200 / c[]:d[] .byte 200 / c[]:[][] .byte 250 / f[]:i .byte 250 / f[]:c .byte 113 / f[]:f .byte 125 / f[]:d .byte 000 / f[]:i[] .byte 000 / f[]:c[] .byte 000 / f[]:f[] .byte 100 / f[]:d[] .byte 000 / f[]:[][] .byte 260 / d[]:i .byte 260 / d[]:c .byte 113 / d[]:f .byte 125 / d[]:d .byte 000 / d[]:i[] .byte 000 / d[]:c[] .byte 000 / d[]:f[] .byte 000 / d[]:d[] .byte 000 / d[]:[][] .byte 240 / [][]:i .byte 240 / [][]:c .byte 113 / [][]:f .byte 125 / [][]:d .byte 000 / [][]:i[] .byte 000 / [][]:c[] .byte 100 / [][]:f[] .byte 100 / [][]:d[] .byte 000 / [][]:[][] .even / character type table .globl _ctab _ctab: .+2 .byte 000.,127.,127.,127.,127.,127.,127.,127. .byte 127.,126.,125.,127.,127.,127.,127.,127. .byte 127.,127.,127.,127.,127.,127.,127.,127. .byte 127.,127.,127.,127.,127.,127.,127.,127. .byte 126.,034.,122.,127.,127.,044.,047.,121. .byte 006.,007.,042.,040.,009.,041.,127.,043. .byte 124.,124.,124.,124.,124.,124.,124.,124. .byte 124.,124.,008.,001.,063.,080.,065.,090. .byte 127.,123.,123.,123.,123.,123.,123.,123. .byte 123.,123.,123.,123.,123.,123.,123.,123. .byte 123.,123.,123.,123.,123.,123.,123.,123. .byte 123.,123.,123.,004.,127.,005.,049.,127. .byte 127.,123.,123.,123.,123.,123.,123.,123. .byte 123.,123.,123.,123.,123.,123.,123.,123. .byte 123.,123.,123.,123.,123.,123.,123.,123. .byte 123.,123.,123.,002.,048.,003.,127.,127. ================================================ FILE: src/c1.h ================================================ // // Created by veges on 2021. 03. 17.. // #ifndef LEGACY_CC_C1_H #define LEGACY_CC_C1_H #include #include #include "config.h" /* c10.c */ generate(); char *match(int*, int*, int); void rcexpr(int*, int*, int); cexpr(int*, int*, int); void pname(int *); dcalc(int *, int); notcompat(int,int ); void prins(int, int); collcon(int*); isfloat(int *, char *s[]); /* c11.c */ void jumpc(int[],int,int); void cbranch(int[], int, int, int); branch(int,int,int); jump(int); label(int); void popstk(int); length(int); rlength(int); getwrd(); printn(int,int); void cc_printf(char*,...); void cc_putchar(int); error(char[]); error1(char[],int); error2(char[],int,int); int *fixp(int*); /* Globals */ extern int regtab[]; extern int efftab[]; extern int cctab[]; extern int sptab[]; extern int opdope[]; extern int ospace[]; extern int *baseptr; extern nreg; extern isn; extern namsiz; extern FILE* fout; extern line; extern tmpfil; extern nerror; extern fltmod; extern int instabcode[]; extern char *instabstr[]; #define printf cc_printf #define putchar cc_putchar #endif //LEGACY_CC_C1_H ================================================ FILE: src/c10.c ================================================ /* C compiler, part 2 Copyright 1972 Bell Telephone Laboratories, Inc. */ #include "c1.h" #include ospace[1000]; /* fake */ /* * Second pass of the compiler. The code was expected to be loaded at the same location as the first pass, * such that ospace is at the same location as in the first pass. Because of this, pointers in the * expression trees were saved using the absolute address. The fixp function converts the incorrect addresses * to ones pointing into this pass' ospace. Throughout this code, unfixed pointer means the fixp function * hasn't been called for the pointer yet. * * For details of the algorithm, see the "A Tour through the UNIX C Compiler" in the UNIX v7 manual. * Though it is for a newer version of C, almost everything is applicable to this compiler. * The difficulty levels used in this compiler: * z - 4 - zero * c - 8 - number * i - 12 - name/string/float/double, pointer address(&) * a - 16 - addressible by PDP11 address instructions * e - 20 - easy statement, value can be calculated using the available registers only * n - 63 - anything */ main(argc, argv) char *argv[]; { int *sp, c, *table, *tabtab[4], tree; if (argc<2) { error("Arg count"); exit(1); } if(freopen(argv[1], "r", stdin)==NULL) { error1("Can't find %s", argv[1]); exit(1); } fout = stdout; tabtab[0] = regtab; tabtab[1] = efftab; tabtab[2] = cctab; tabtab[3] = sptab; while((c=getchar())>0) { if(c=='#') { /* expression */ sp = ospace; baseptr=getwrd(); c = getwrd(); tree = getwrd(); table = tabtab[getwrd()]; line = getwrd(); while(c--) *sp++ = getwrd(); rcexpr(tree, table, 0); } else putchar(c); } exit(nerror!=0); } /* * Finds a code generation template in the generation table. * Returns a pointer to the template string. * * tree is an unfixed pointer * nreg - number of available registers */ char *match(tree, table, nreg) int tree[], table[]; { int op, d1, d2, t1, t2, *p1, *p2; char *mp; /* t1/t2 - type of the result of subtree, d1/d2 - difficulty of the subtree */ if (tree==0) return(0); tree = fixp(tree); op = *tree; if (op>=29) /* if not leaf */ p1 = fixp(tree[3]); else p1 = tree; t1 = p1[1]; d1 = dcalc(p1, nreg); if ((opdope[op]&01)!=0) { /* binary? */ if(tree[4]!=0) { /* in function calls second argument is empty at this point */ p2 = fixp(tree[4]); t2 = p2[1]; d2 = dcalc(p2, nreg); } else { p2 = 0; } } /* Look up the entries for this operator */ while(*table) { if (*table++ == op) goto foundop; table++; } return(0); foundop: table = *table; nxtry: /* Iterate over the list of templates for this op */ mp = table; if (*mp == 0) return(0); if (d1 > (*mp&077) | (*mp>=0100)&(*p1!=36)) /* tree1 has a difficulty level <= than this condition, or this is a pointer */ goto notyet; if (notcompat(t1, mp[1])) goto notyet; if ((opdope[op]&01)!=0 & p2!=0) { /* binary op, check second argument */ if (d2 > (mp[2]&077) | (mp[2]>=0100)&(*p2!=36)) goto notyet; if (notcompat(t2,mp[3])) goto notyet; } now: return(table[1]); /* parameters are 4bytes = 1 word */ notyet: table = table+2; /* jump to next entry in table (on entry is 4 bytes + 1 sizeof(int *) ) */ goto nxtry; } /* * Generates code for this expression. If the instruction does not have a matching entry * in one of the specialized code tables, it falls back to regtab and moves the result * to the appropriate place. * * tree is an unfixed pointer * reg - result should go into this register if table==regtab; * Additionally, the code generator should leave lower numbered registers alone, * and only use reg or higher numbered registers. */ void rcexpr(tree, table, reg) int tree[], table[]; { int *origtree; if(tree==0) return; origtree = tree; tree = fixp(tree); if(*tree >= 103) { /* conditional jump */ if(*tree==103) jumpc(tree[1],tree[2],tree[3]); else cbranch(tree[1],tree[2],tree[3],0); return; } if (cexpr(origtree, table, reg)) return; if (table!=regtab) if(cexpr(origtree, regtab, reg)) { if (table==sptab) printf("mov r%d,-(sp)\n", reg); if (table==cctab) printf("tst r%d\n", reg); return; } error1("No match for op %d", *tree); } /* * Generate code using the given table. If a template was found * and the code successfully generated then returns 1, otherwise 0. * * tree is an unfixed pointer * reg - result should go into this register if table==regtab; * Additionally, the code generator should leave lower numbered registers alone, * and only use reg or higher numbered registers. */ cexpr(tree, table, reg) int tree[], table[]; { int *p1, *fp1, *p2, *fp2, c, r, *p, *otable, *ctable, *origtree; char *string; origtree = tree; tree = fixp(tree); if ((c = *tree)==100) { /* function call, push params on stack */ p1 = tree[3]; p2 = tree[4]; fp2 = fixp(p2); r = 0; if(p2) { while (*fp2==9) { /* comma */ rcexpr(fp2[4], sptab, 0); r += rlength(fixp(p=fp2[4])[1]); p2 = fp2[3]; fp2 = fixp(p2); } rcexpr(p2, sptab, 0); r += rlength(fp2[1]); } *tree = 101; tree[2] = r; /* save arg length */ } if(c==90) { /* ? */ cbranch(tree[3], c=isn++, 0, reg); rcexpr(fixp(tree[4])[3], table, reg); branch(r=isn++, 0, 0); label(c); rcexpr(fixp(tree[4])[4], table, reg); label(r); return(1); } if ((string=match(origtree, table, nreg-reg))==0) /* find code template */ return(0); p1 = tree[3]; p2 = tree[4]; fp1 = fixp(p1); fp2 = fixp(p2); loop: switch(c = *string++) { case '\0': p = tree; if (*p==101 & p[2]>0) { /* if this was a function call, pop parameters from stack. */ popstk(p[2]); } return(1); /* A1 */ case 'A': p = fixp(tree[3]); goto adr; /* A2 */ case 'B': p = fixp(tree[4]); goto adr; /* A */ case 'O': p = tree; adr: pname(p); goto loop; /* I */ case 'M': if ((c = *string)=='\'') string++; else c = 0; prins(*tree, c); goto loop; /* B1 */ case 'C': p = fixp(tree[3]); goto pbyte; /* BF */ case 'P': p = tree; goto pb1; /* B2 */ case 'D': p = fixp(tree[4]); pbyte: if (p[1]==1) /* char type? */ putchar('b'); pb1: if (isfloat(p, 0)) putchar('f'); goto loop; /* BE */ case 'L': if (fixp(tree[3])[1]==1 | fixp(tree[4])[1]==1) putchar('b'); goto loop; /* C1 */ case 'E': p = fp1[3]; goto constl; /* C2 */ case 'F': p = fp2[3]; constl: printf("%o", p); goto loop; /* F */ case 'G': p = p1; goto subtre; /* S */ case 'K': p = p2; goto subtre; /* H */ case 'H': p = origtree; subtre: ctable = regtab; r = reg; c = *string++ - 'A'; if ((c&02)!=0) /* should it go to stack? */ ctable = sptab; if ((c&04)!=0) /* is it a conditional expression? */ ctable = cctab; if((c&010)!=0) /* F1, S1, H1 - using the next register */ r = reg+1; if((c&01)!=0) if(*fixp(p)==36) { /* if this has the shape *(x+c), print tree for x only */ p = fixp(p)[3]; if(collcon(fixp(p)) & (ctable!=sptab)) p = fixp(p)[3]; } rcexpr(p, ctable, r); goto loop; /* R */ case 'I': r = reg; goto preg; /* R1 */ case 'J': r = reg+1; preg: printf("r%d", r); goto loop; case '#': p = fp1[3]; goto nmbr; case '"': p = fp2[3]; goto nmbr; case '~': p = tree[3]; nmbr: p = fixp(p); if(collcon(p)) { /* If this has the form *(x+const), print const */ c = *p; if(r = fixp(p=p[4])[3]) printf("%o", c==40?r:-r); } goto loop; /* M */ case 'N': if ((c=isfloat(tree, &string))==fltmod) goto loop; printf((fltmod=c)==2?"setf\n":"setd\n"); goto loop; /* Z */ case 'Z': printf("$%o", fp1[4]); goto loop; } putchar(c); fflush(stdout); goto loop; } /* Prints the value/label/location of a tree node. p is a fixed pointer */ void pname(p) int *p; { char *np; int i; loop: switch(*p) { case 21: /* const */ printf("$%o", p[3]); return; case 22: /* string */ printf("$l%d", p[3]); return; case 20: /* name */ switch(p[3]) { case 5: /* auto, param */ printf("%o(r5)", p[4]); return; /* extern */ case 6: printf("%p", &p[4]); return; } printf("l%d", p[4]); /* static */ return; case 35: /* & */ putchar('$'); p = fixp(p[3]); goto loop; case 36: /* * */ putchar('*'); p = fixp(p[3]); goto loop; } error("pname called illegally"); } /* * Difficulty level calculation. * p points to a fixed optable entry. * nreg - number of free registers. */ dcalc(p, nreg) int p[]; { int op, t; if (p==0) return(0); op = *p; switch (op) { case 20: /* name */ case 22: /* string */ case 23: /* float */ case 24: /* double */ return(12); case 21: /* short constant */ return(p[3]==0? 4:8); case 35: /* & */ return(12); case 36: /* * */ if ((op=dcalc(fixp(p[3]), nreg))<16) /* if argument of pointer is constant/simple name, this is PDP-11 addressible */ return(16); } def: return(p[2]<=nreg? 20: 24); /* can this tree be calculated using nreg registers? */ } /* * Checks if the two type: at and st are not compatible * at - type in the tree * st - type in the code template table * */ notcompat(at, st) { if (st==0) /* word, byte */ return(at>1 & at<16); /* can store char or int only */ if (st==1) /* word */ return(at>0 & at<16); /* can store int only */ st -= 2; if (st==2 & at==3) at = 2; return(st != at); } /* * Prints the instruction belonging to this opcode. c decides whether * the first or the second subinstruction should be printed. */ void prins(op, c) { int *insp; char **insstrp; char *s; insp = instabcode; insstrp = instabstr; while(*insp) { if (*insp == op) { if ((s = insstrp[c!=0])==NULL) goto err; printf("%s", s); return; } else { insp = insp + 1; insstrp +=2; } } err: error1("No match for op %d", op); } /* True if p is in 'x+const' form */ collcon(p) int p[]; { int *p1; if(*p==40 | *p==41) if(*(p1=fixp(p[4]))==21) return(1); return(0); } /* tree - correct pointer into ospace */ isfloat(t, s) int t[]; char *s[]; { int rt; if(s!= 0) { rt = **s - '0'; if (rt==2 | rt==4) { (*s)++; return(rt>2?3:2); } } if ((opdope[t[0]]&010)!=0) /* relational */ t = fixp(t[3]); if ((rt=t[1])>=2 && rt<=3) /* type is float or double */ return(rt); return(0); } int *baseptr=0; nreg = 4; /* number of available registers */ isn = 10000; namsiz = 8; /* max length of a name */ FILE* fout; line; tmpfil; nerror; fltmod; ================================================ FILE: src/c11.c ================================================ #include "c1.h" /* * Tree is a n unfixed pointer to the conditional expression. * lbl - label to jump to * cond - 0: jump if condition is false, 1: jump if condition is true */ void jumpc(tree, lbl, cond) int tree[]; { int l1, l2, *origtree; if (tree==0) return; origtree=tree; tree=fixp(tree); switch(*tree) { /* & */ case 47: if (cond) { cbranch(tree[3], l1=isn++, 0, 0); cbranch(tree[4], l1, 0, 0); jump(lbl); label(l1); } else { cbranch(tree[3], l1=isn++, 0, 0); cbranch(tree[4], l2=isn++, 1, 0); label(l1); jump(lbl); label(l2); } return; /* | */ case 48: if (cond) { cbranch(tree[3], l1=isn++, 1, 0); cbranch(tree[4], l2=isn++, 0, 0); label(l1); jump(lbl); label(l2); } else { cbranch(tree[3], l1=isn++, 1, 0); cbranch(tree[4], l1, 1, 0); jump(lbl); label(l1); } return; /* ! */ case 34: jumpc(tree[3], lbl, !cond); return; } rcexpr(origtree, cctab, 0); branch(l1=isn++, *tree, cond); jump(lbl); label(l1); return; } /* * Tree is an unfixed pointer to the conditional expression. * lbl - label to jump to * cond - 0: jump if condition is false, 1: jump if condition is true */ void cbranch(tree, lbl, cond, reg) int tree[]; { int l1, *origtree; if (tree==0) return; origtree=tree; tree=fixp(tree); switch(*tree) { /* & */ case 47: if (cond) { cbranch(tree[3], l1=isn++, 0, reg); cbranch(tree[4], lbl, 1, reg); label(l1); } else { cbranch(tree[3], lbl, 0, reg); cbranch(tree[4], lbl, 0, reg); } return; /* | */ case 48: if (cond) { cbranch(tree[3], lbl, 1, reg); cbranch(tree[4], lbl, 1, reg); } else { cbranch(tree[3], l1=isn++, 1, reg); cbranch(tree[4], lbl, 0, reg); label(l1); } return; /* ! */ case 34: cbranch(tree[3], lbl, !cond, reg); return; } rcexpr(origtree, cctab, reg); branch(lbl, *tree, !cond); return; } branch(lbl, op, c) { if(op) { if((opdope[op]&04)==0) /* conditional jump? */ op = 61; prins(op,c); } else printf("br"); printf("\tl%d\n", lbl); } jump(lab) { printf("jmp\tl%d\n", lab); } label(l) { printf("l%d:", l); } /* Decreases the stack size, moving sp the required amounts. */ void popstk(a) { switch(a) { case 0: return; case 2: printf("tst (sp)+\n"); return; case 4: printf("cmp (sp)+,(sp)+\n"); return; } printf("add $%o,sp\n", a); } int *fixp(p) int *p; { return (void*)p-(void*)baseptr+(void*)ospace; } length(t) { if (t<0) t += 020; if (t>=020) return(2); switch(t) { case 0: return(2); case 1: return(1); case 2: return(4); case 3: return(8); case 4: return(4); } return(1024); } /* rounded length */ rlength(c) { auto l; return((l=length(c))==1? 2: l); } /* Get a number from the input file */ int getwrd() { int i; char c; scanf("%d", &i); c = getchar(); if(c!=';') { error1("Unknown char id: %d", (int) c); } return i; } printn(n,b) { auto a; if(a=n/b) /* assignment, not test for equality */ printn(a, b); /* recursive */ putchar(n%b + '0'); } cc_putchar(int c) { putc(c, fout); } void cc_printf(char *fmt, ...) { static char *s; auto *adx, x, c, *i; va_list arguments; va_start ( arguments, fmt); loop: while((c = *fmt++) != '%') { if(c == '\0') { va_end(arguments); fflush(stdout); return; } putchar(c); } switch (c = *fmt++) { case 'd': /* decimal */ case 'o': /* octal */ x = va_arg(arguments, int); if(x < 0) { x = -x; if(x<0) { /* - infinity */ if(c=='o') printf("100000"); else printf("-32767"); goto loop; } putchar('-'); } printn(x, c=='o'?8:10); goto loop; case 's': /* string */ x = va_arg(arguments, int); s=x; while(c = *s++) { putchar(c); } goto loop; case 'p': s =va_arg(arguments, int*); putchar('_'); c = namsiz; while(c--) if(*s) putchar(*s++); goto loop; } putchar('%'); fmt--; goto loop; } error(s) char s[];{ error2(s, 0, 0); } error1(s, p1) char s[];{ error2(s, p1, 0); } error2(s, p1, p2) char s[];{ FILE *f; nerror++; fflush(stdout); f = fout; fout = stdout; printf("%d: ", line); printf(s, p1, p2); putchar('\n'); fout = f; } ================================================ FILE: src/c1t.c ================================================ // // Created by veges on 2021. 03. 19.. // #include "c1.h" int instabcode[]={ 40, 70, 41, 71, 30, 31, 32, 33, 45, 46, 75, 76, 43, 44, 73, 74, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, }; char *instabstr[]= { "add", "add", // 1 "add", "add", "sub", "sub", // 2 sub "sub", "sub", "inc", "add", // 3:inc "dec", "sub", //4dec "inc", "add", "dec", "sub", "sub", "ac", // 5:ac "mov", "r4", //"6:mov, 7:r4 "sub", "ac", "mov", "r4", "r4", "divf", // 1:div7 "ac", NULL, "r4", "add", "ac", NULL, "beq", "bne", // 0:beq, 1:bne "bne", "beq", "ble", "bgt", // 2:ble, 5: bgt "blt", "bge", // 3:blt, 4:bge "bge", "blt", "bgt", "ble", "blos", "bhi", //6:blos, 9:bhi "blo", "bhis", // 7:blo 8:bhis "bhis", "blo", "bhi", "blos" }; int opdope[] = { 000000, // EOF 000000, // ; 000000, // { 000000, // } 036000, // [ 002000, // ] 036000, // ( 002000, // ) 002000, // : 007001, // , 000000, // 10 000000, // 11 000000, // 12 000000, // 13 000000, // 14 000000, // 15 000000, // 16 000000, // 17 000000, // 18 000000, // 19 000000, // name 000000, // short constant 000000, // string 000000, // float 000000, // double 000000, // 25 000000, // 26 000000, // 27 000000, // 28 000000, // 29 034002, // ++pre 034002, // --pre 034002, // ++post 034002, // --post 034020, // !un 034002, // &un 034020, // *un 034000, // -un 034020, // ~un 000000, // 39 030101, // + 030001, // - 032101, // * 032001, // // 032001, // % 026061, // >> 026061, // << 020161, // & 016161, // | 016161, // ^ 000000, // 50 000000, // 51 000000, // 52 000000, // 53 000000, // 54 000000, // 55 000000, // 56 000000, // 57 000000, // 58 000000, // 59 022105, // == 022105, // != 024105, // <= 024105, // < 024105, // >= 024105, // > 024105, //

p 024105, // >=p 012013, // =+ 012013, // =- 012013, // =* 012013, // =// 012013, // =% 012053, // =>> 012053, // =<< 012053, // =& 012053, // =| 012053, // =^ 012013, // = 000000, // 81 000000, // 82 000000, // 83 000000, // int -> float 000000, // int -> double 000000, // float -> int 000000, // float -> double 000000, // double -> int 000000, // double -> float 014001, // ? 000000, // 91 000000, // 92 000000, // 93 000000, // int -> float 000000, // int -> double 000000, // float -> double 000000, // int -> int[] 000000, // int -> float[] 000000, // int -> double[] 036001, // call 036001 // mcall }; ================================================ FILE: src/c1t.s ================================================ / C operator tables .globl _getwrd .globl getw .globl fopen .globl _tmpfil .data _getwrd: 1f .text 1: tst buf bne 1f mov _tmpfil,r0 jsr r5,fopen; buf bes botchp 1: jsr r5,getw; buf bes botchp rts pc botchp: mov $1,r0 sys write; botch; ebotch-botch sys exit botch: ; ebotch: .even .bss buf: .=.+518. .text .globl _opdope .globl _instab _instab:.+2 40.; 1f; 1f; .data; 1:; .text 70.; 1b; 1b 41.; 2f; 2f; .data; 2:; .text 71.; 2b; 2b 30.; 3f; 1b; .data; 3:; .text 31.; 4f; 2b; .data; 4:; .text 32.; 3b; 1b 33.; 4b; 2b 45.; 2b; 5f; .data; 5:; .text 46.; 6f; 7f; .data; 6:; 7:<(r4)\0>; .text 75.; 2b; 5b 76.; 6b; 7b 43.; 7b; 1f; .data; 1:; .text 44.; 5b; 0 73.; 7b; 1b 74.; 5b; 0 60.; 0f; 1f; .data; 0:; 1:; .text 61.; 1b; 0b 62.; 2f; 5f; .data; 2:; 5:; .text 63.; 3f; 4f; .data; 3:; 4:; .text 64.; 4b; 3b 65.; 5b; 2b 66.; 6f; 9f; .data; 6:; 9:; .text 67.; 7f; 8f; .data; 7:; 8:; .text 68.; 8b; 7b 69.; 9b; 6b 0 .data .even .text _opdope:.+2 00000 / EOF 00000 / ; 00000 / { 00000 / } 36000 / [ 02000 / ] 36000 / ( 02000 / ) 02000 / : 07001 / , 00000 / 10 00000 / 11 00000 / 12 00000 / 13 00000 / 14 00000 / 15 00000 / 16 00000 / 17 00000 / 18 00000 / 19 00000 / name 00000 / short constant 00000 / string 00000 / float 00000 / double 00000 / 25 00000 / 26 00000 / 27 00000 / 28 00000 / 29 34002 / ++pre 34002 / --pre 34002 / ++post 34002 / --post 34020 / !un 34002 / &un 34020 / *un 34000 / -un 34020 / ~un 00000 / 39 30101 / + 30001 / - 32101 / * 32001 / / 32001 / % 26061 / >> 26061 / << 20161 / & 16161 / | 16161 / ^ 00000 / 50 00000 / 51 00000 / 52 00000 / 53 00000 / 54 00000 / 55 00000 / 56 00000 / 57 00000 / 58 00000 / 59 22105 / == 22105 / != 24105 / <= 24105 / < 24105 / >= 24105 / > 24105 /

p 24105 / >=p 12013 / =+ 12013 / =- 12013 / =* 12013 / =/ 12013 / =% 12053 / =>> 12053 / =<< 12053 / =& 12053 / =| 12053 / =^ 12013 / = 00000 / 81 00000 / 82 00000 / 83 00000 / int -> float 00000 / int -> double 00000 / float -> int 00000 / float -> double 00000 / double -> int 00000 / double -> float 14001 / ? 00000 / 91 00000 / 92 00000 / 93 00000 / int -> float 00000 / int -> double 00000 / float -> double 00000 / int -> int[] 00000 / int -> float[] 00000 / int -> double[] 36001 / call 36001 / mcall ================================================ FILE: src/cctab.s ================================================ / c code tables-- set condition codes .globl _cctab _cctab=.;.+2 20.; rest 21.; rest 22.; rest 30.; rest 31.; rest 34.; rest 35.; rest 36.; rest 37.; rest 40.; rest 41.; rest 42.; rest 43.; rest 44.; rest 45.; rest 46.; rest 47.; rest 48.; rest 60.; cc60 61.; cc60 62.; cc60 63.; cc60 64.; cc60 65.; cc60 66.; cc60 67.; cc60 68.; cc60 69.; cc60 70.; rest 71.; rest 72.; rest 73.; rest 74.; rest 75.; rest 76.; rest 77.; rest 78.; rest 79.; rest 80.; rest / relationals cc60: %a,z tstB1 A1 %n*,z F* tstB1 #1(R) %n,z F tst R %a,a cmpBE A1,A2 %n*,a F* cmpBE #1(R),A2 %n,a F cmpB2 R,A2 %n*,e* F* S1* cmpBE #1(R),#2(R1) %n*,e F* S1 cmpB1 #1(R),R1 %n,e* F S1* cmpB2 R,#2(R1) %n,e F S1 cmp R,R1 %n*,n* FS* S* cmpBE (sp)+,#2(R) %n*,n FS* S cmpB1 *(sp)+,R %n,n* FS S* cmpB2 (sp)+,#2(R) %n,n FS S cmp (sp)+,R / set codes right rest: %n,n H .data .even .text ================================================ FILE: src/config.h ================================================ // Use V5 style ABI, instead of the original V2 one // the difference is how labels are pointing to functions //#define UNIXV5_ABI ================================================ FILE: src/cvopt.c ================================================ #include #define getc cc_getc getc(); flag(); peekc = 0; nofloat = 0; void main() { /* converts code generation template .s files by replacing % part using the rules below: A1 -> A A2 B A O B1 C B2 D BE L BF P C1 E C2 F F G H H R I R1 J S K I M M N * +1 S +2 C +4 1 +8 z -> 4 c 10 a 14 e 20 n 63 * +0100 */ auto c,snlflg,nlflg,t,smode,m,ssmode; /* * ssmode: next non % should output a data segment header */ smode = nlflg = snlflg = ssmode = 0; loop: c = getc(); if (c!='\n' & c!='\t') nlflg = 0; if (ssmode!=0 & c!='%') { ssmode = 0; printf(".data\n1: .ascii \""); } switch(c) { case '\0': printf(".text; .int 0\n"); fflush(stdout); return; case ':': if (!smode) /* convert into pointer */ printf("=.+4; .int 0"); else /* should be sizeof(int*) */ putchar(':'); goto loop; case 'A': if ((c=getc())=='1' | c=='2') { putchar(c+'A'-'1'); goto loop; } putchar('O'); peekc = c; goto loop; case 'B': switch (getc()) { case '1': putchar('C'); goto loop; case '2': putchar('D'); goto loop; case 'E': putchar('L'); goto loop; case 'F': putchar('P'); goto loop; } putchar('?'); goto loop; case 'C': putchar(getc()+'E'-'1'); goto loop; case 'F': putchar('G'); goto subtre; case 'R': if ((c=getc()) == '1') putchar('J'); else { putchar('I'); peekc = c; } goto loop; case 'H': putchar('H'); goto subtre; case 'I': putchar('M'); goto loop; case 'M': putchar('N'); snlflg++; goto loop; case 'S': putchar('K'); subtre: snlflg = 1; t = 'A'; l1: switch (c=getc()) { case '*': t++; goto l1; case 'S': t += 2; goto l1; case 'C': t += 4; goto l1; case '1': t += 8; goto l1; } peekc = c; putchar(t); goto loop; case '#': if(getc()=='1') putchar('#'); else printf("\\\""); goto loop; case '%': if (smode) printf(".text;"); loop1: switch (c=getc()) { case 'a': m = 16; t = flag(); goto pf; case ',': putchar(';'); goto loop1; case 'i': m = 12; t = flag(); goto pf; case 'z': m = 4; t = 0; goto pf; case 'c': t = 0; m = 8; goto pf; case 'e': t = flag(); m = 20; goto pf; case 'n': t = flag(); m = 63; pf: if ((c=getc())=='*') m += 0100; else peekc = c; printf(".byte 0%o,0%o", m, t); goto loop1; case '\n': printf(";.int 1f\n"); ssmode = 1; nlflg = 1; smode = 1; goto loop; } putchar(c); goto loop1; case '\t': if (nlflg) { nlflg = 0; goto loop; } putchar('\t'); goto loop; case '\n': if (!smode) { /* outside optree definition just emit new line */ putchar('\n'); goto loop; } if (nlflg) { /* empty line, close off optree section */ nlflg = 0; printf("\\0\"\n.text\n"); smode = 0; goto loop; } if (!snlflg) printf("\\n"); snlflg = 0; printf("\"\n.ascii \""); nlflg = 1; goto loop; } putchar(c); goto loop; } /* Gets the next character, ignores blocks of codes inside { } */ getc() { auto t, ifcnt; ifcnt = 0; gc: if (peekc) { t = peekc; peekc = 0; } else t = getchar(); if (t==0 | t==EOF) return(0); if (t=='{') { ifcnt++; t = getchar(); } if (t=='}') { t = getc(); if (--ifcnt==0) if (t=='\n') t = getc(); } if (ifcnt & nofloat) goto gc; return(t); } flag() { auto c, f; f = 0; l1: switch(c=getc()) { case 'w': /* word */ f = 1; goto l1; case 'i': f = 2; goto l1; case 'b': /* byte */ f = 3; goto l1; case 'f': /* float */ f = 4; goto l1; case 'd': /* double */ f = 5; goto l1; case 'p': /* pointer? */ f += 16; goto l1; } peekc = c; return(f); } ================================================ FILE: src/efftab.s ================================================ / c code tables .globl _efftab _efftab=.;.+2 30.; ci30 31.; ci30 32.; ci30 / same as 30 33.; ci30 / same as 31 80.; ci80 70.; ci70 71.; ci70 / - like + 77.; ci77 78.; ci78 0 / ++ prefix ci30: %ai,n %abp,n %ab,n IB1 A1 %aip,n I' $2,A1 %nbp*,n %ni*,n %nb*,n F IB1 #1(R) %nip*,n F* I' $2,#1(R) / = ci80: %a,z clrB1 A1 %n*,z F* clrB1 #1(R) %a,aw movB1 A2,A1 %a,nw* S* movB1 #2(R),A1 %a,n S movB1 R,A1 %n*,aw F* movB1 A2,#1(R) %n*,ew* F* S1* movB1 #2(R1),#1(R) %n*,e F* S1 movB1 R1,#1(R) %e*,nw* S* F1* movB1 #2(R),#1(R1) %e*,n S F1* movB1 R,#1(R1) %n*,nw* FS* S* movB1 #2(R),*(sp)+ %n*,n FS* S movB1 R,*(sp)+ / =| i ci78: %a,a bisBE A2,A1 %a,n S bisB1 R,A1 %n*,a F* bisBE A2,#1(R) %e*,n* S* F1* bisBE #2(R),#1(R1) %e*,n S F1* bisBE R,#1(R1) %n*,e* F* S1* bisBE #2(R1),#1(R) %n*,e F* S1 bisBE R1,#1(R) %n*,n* FS* S* bisBE #2(R),*(sp)+ %n*,n FS* S bisBE R,*(sp)+ / =& i ci77: %a,c bicB1 $!C2,A1 %a,n S com R bicB1 R,A1 %e*,n S F1* com R bicB1 R,#1(R1) %n*,c F* bicB1 $!C2,#1(R) %n*,e F* S1 com R1 bicB1 R1,#1(R) %n*,n FS* S com R bicB1 R,*(sp)+ / =+ ci70: %aw,aw I A2,A1 %aw,nw* S* I #2(R),A1 %aw,n S I R,A1 %ew*,nw* S* F1* I #2(R),#1(R1) %a,nw* S* movB1 A1,R1 I #2(R),R1 movB1 R1,#2(R) %a,n S movB1 A1,R1 I R1,R movB1 R,A1 %ew*,n S F1* I R,#1(R1) %nw*,n SS F* I (sp)+,#1(R) %n*,n SS F* movB1 #1(R),R1 I (sp)+,R1 movB1 R1,#1(R) .data .even ================================================ FILE: src/regtab.s ================================================ / c code tables-- compile to register fp = 1 / enable floating-point unixv5 = 0 / unix v5 style abi, extra instructions .globl _regtab _regtab=.; .+2 20.; cr20 21.; cr20 22.; cr20 30.; cr30 31.; cr30 32.; cr32 33.; cr32 34.; cr34 35.; cr35 29.; cr29 36.; cr36 37.; cr37 38.; cr38 101.; cr100 80.; cr80 40.; cr40 41.; cr40 / - like + 42.; cr42 43.; cr43 44.; cr43 45.; cr45 46.; cr45 47.; cr47 48.; cr48 60.; cr60 61.; cr60 62.; cr60 63.; cr60 64.; cr60 65.; cr60 66.; cr60 67.; cr60 68.; cr60 69.; cr60 70.; cr70 71.; cr70 72.; cr72 73.; cr73 74.; cr73 75.; cr75 76.; cr75 77.; cr77 78.; cr78 102.; cr102 97.; cr97 0 / goto cr102: %i,n jmp *A1 %n*,n F* jmp *#1(R) %n,n F jmp (R) / call .if unixv5 cr100: %n*,n F* jsr pc,#1(R) %a,n jsr pc,A1 %n,n F jsr pc,R .else cr100: %n*,n F* jsr pc,*#1(R) %a,n jsr pc,*A1 %n,n F jsr pc,(R) .endif / name, constant cr20: %z,n clr R %aw,n mov A,R %ab,n movb A,R .if fp %af,n M movf A,R .endif /++,-- prefix cr30: %ai,n %abp,n %ab,n IB1 A1 movB1 A1,R %a,n I' $2,A1 mov A1,R %nbp*,n %ni*,n %nb*,n F* IB1 #1(R) movB1 #1(R),R %n*,n F* I' $2,#1(R) mov #1(R),R / ++,-- postfix cr32: %ai,n %abp,n %ab,n movB1 A1,R IB1 A1 %a,n mov A1,R I' $2,A1 %nbp*,n %nb*,n %ni*,n F* movB1 #1(R),-(sp) IB1 #1(R) movB1 (sp)+,R %n*,n F* mov #1(R),-(sp) I' $2,#1(R) mov (sp)+,R / ! cr34: %n,n FC beq 1f clr R br 2f 1: mov $1,R 2: / &unary cr35: %a,n mov $A1,R / & unary of auto cr29: %e,n mov r5,R add Z,R / *unary cr36: %abp*,n F movb (R),R %a*,n F mov (R),R %abp,n movb *A1,R %a,n mov *A1,R %nbp*,n F* movb *#1(R),R %n*,n F* mov *#1(R),R %nbp,n H* movb ~(R),R %n,n H* mov ~(R),R / - unary cr37: %n,n F neg R / ~ cr38: %n,n F com R / = cr80: %a,n S movB1 R,A1 %n*,a F* movB1 A2,#1(R) movB1 #1(R),R %n*,e F* S1 movB1 R1,#1(R) mov R1,R %n*,n FS* S movB1 R,*(sp)+ / | cr48: %n,a F bisB2 A2,R %n,e* F S1* bisB2 #2(R1),R %n,e F S1 bis R1,R %n,n FS S bis (sp)+,R / & cr47: %n,c F bic $!C2,R %n,e F S1 com R1 bic R1,R %n,n FS S com (sp) bic (sp)+,R / relationals cr60: %n,n HC I 2f clr R br 1f 2: mov $1,R 1: / >>, << cr45: %a,aw movB1 A1,I' I A2,lsh movB1 I',R %n*,aw F* movB1 #1(R),I' I A2,lsh movB1 I',R %n,aw F mov R,I' I A2,lsh mov I',R %a,nw* S* movB1 A1,(r4) I #2(R),lsh mov (r4),R %a,n S movB1 A1,I' I R,lsh mov I',R %n,n FS S mov (sp)+,I' I R,lsh mov I',R / +, - cr40: %n,aw F I A2,R %n,ew* F S1* I #2(R1),R %n,e F S1 I R1,R %n,nw* SS* F I *(sp)+,R %n,n SS F I (sp)+,R / * cr42: %aw,a mov A1,(r4)+ movB2 A2,(r4) mov -(r4),R %n,a F mov R,(r4)+ movB2 A2,(r4) mov -(r4),R %n,e F S1 mov R,(r4)+ mov R1,(r4) mov -(r4),R %n,n FS S mov (sp)+,(r4)+ mov R,(r4) mov -(r4),R / /; mod cr43: .if unixv5 %a,a mov r5, -(sp) movB1 A1,r5 mov $0, r4 div A2,r4 mov r5, R mov (sp)+,r5 %n,n FS S mov r5, -(sp) mov $0, r4 mov 2(sp), r5 div R, r4 mov r5, R mov (sp)+,r5 tst (sp)+ .else %a,a movB1 A1,(r4) movB2 A2,div mov I,R %a,n S movB1 A1,(r4) mov R,div mov I,R %n,a F mov R,(r4) movB2 A2,div mov I,R %n,e F S1 mov R,(r4) mov R1,div mov I,R %e,n S F1 mov R1,(r4) mov R,div mov I,R %n,n FS S mov (sp)+,(r4) mov R,div mov I,R .endif / =* cr72: %a,a movB1 A1,(r4) movB2 A2,mul movB1 (r4),A1 mov (r4),R %a,n S mov R,(r4)+ movB1 A1,(r4) mov -(r4),R movB1 R,A1 %n*,a F* movB1 #1(R),(r4) movB2 A2,mul movB1 (r4),#1(R) mov (r4),R %n*,e F* S1 movB1 #1(R),(r4) mov R1,mul movB1 (r4),#1(R) mov (r4),R %e*,n S F1* movB1 #1(R1),(r4) mov R,mul movB1 (r4),#1(R1) mov (r4),R %n*,n FS* S movB1 *(sp),(r4) mov R,mul movB1 (r4),*(sp)+ mov (r4),R / =mod, =/ cr73: %a,a movB1 A1,(r4) movB2 A2,div movB1 I,A1 mov I,R %a,n S movB1 A1,(r4) mov R,div mov I,R movB1 R,A1 %n*,a F* movB1 #1(R),(r4) movB2 A2,div movB1 I,#1(R) mov I,R %n*,e F* S1 movB1 #1(R),(r4) mov R1,div movB1 I,#1(R) mov I,R %e*,n S F1* movB1 #1(R1),(r4) mov R,div movB1 I,#1(R1) mov I,R %n*,n FS* S movB1 *(sp),(r4) mov R,div movB1 I,*(sp)+ mov I,R / =| cr78: %a,a bisBE A2,A1 movB1 A1,R %a,n S bisB1 R,A1 movB1 A1,R %n*,a F* bisBE A2,#1(R) movB1 #1(R),R %e*,n* S* F1* bisBE #1(R1),#2(R) movB1 #2(R),R %e*,n S F1* bisBE R,#1(R1) movB1 #1(R1),R %n*,e* F* S1* bisBE #2(R1),#1(R) movB1 #1(R),R %n*,e F* S1 bisBE R1,#1(R) movB2 #1(R),R %n*,n* FS* S* bisBE #2(R),*(sp) movB2 *(sp)+,R %n*,n FS* S bisBE R,*(sp) mov *(sp)+,R / =& cr77: %a,c bicB1 $!C2,A1 movB2 A1,R %a,n S com R bicB1 R,A1 movB1 A1,R %e*,n S F1* com R bicB1 R,#1(R1) movB1 #1(R1),R %n*,e F* S1 com R1 bicB1 R1,#1(R) movB1 #1(R),R %n*,n FS* S com R bicB1 R,*(sp) movB1 *(sp)+,R / =>>, =<< cr75: %a,aw movB1 A1,I' I A2,lsh movB1 I',A1 movB1 I',R %a,n S movB1 A1,I' I R,lsh movB1 I',A1 movB1 I',R %n*,e F* S1 movB1 #1(R),I' I R1,lsh movB1 I',#1(R) movB1 I',R %e*,n S F1* movB1 #1(R1),I' I R,lsh movB I',#1(R1) movB1 I',R %n*,n FS* S movB1 *(sp),I' I R,lsh movB1 I',*(sp)+ movB1 I',R / =+ cr70: %aw,aw I A2,A1 mov A1,R %aw,nw* S* I #2(R),A1 mov A1,R %aw,n S I R,A1 mov A1,R %ew*,nw* S* F1* I #2(R),#1(R1) mov #1(R1),R %a,nw* S* movB1 A1,R1 I #2(R),R1 movB1 R1,#2(R) mov R1,R %a,n S movB1 A1,R1 I R1,R movB1 R,A1 %ew*,n S F1* I R,#1(R1) mov #1(R1),R %nw*,n SS F* I (sp)+,#1(R) mov #1(R),R %n*,n SS F* movB1 #1(R),R1 I (sp)+,R1 movB1 R1,#1(R) mov R1,R / int -> int[] cr97: %n,n F asl R .data .even .text ================================================ FILE: src/sptab.s ================================================ / c code tables-- expression to -(sp) .globl _sptab _sptab=.;.+2 20.; cs20 21.; cs21 22.; cs21 30.; cs30 31.; cs30 32.; cs32 33.; cs32 35.; cs35 36.; cs36 40.; cs40 41.; cs40 42.; cs42 47.; cs47 48.; cs48 0 / name cs20: %aw,n mov A,-(sp) / constant cs21: %z,n clr -(sp) %a,n mov A,-(sp) / ++,-- prefix cs30: %ai,n %abp,n I A1 mov A1,-(sp) %aw,n I' $2,A1 mov A1,-(sp) %nbp*,n %ni*,n F* I #1(R) mov #1(R),-(sp) %nip*,n F* mov #1(R),-(sp) I' $2,#1(R) / ++,-- postfix cs32: %ai,n %abp,n mov A1,-(sp) I A1 %aip,n mov A1,-(sp) I' $2,A1 %nbp*,n %ni*,n F* mov #1(R),-(sp) I #1(R) %nip*,n F* mov #1(R),-(sp) I' $2,#1(R) / & unary cs35: %i,n mov $A1,-(sp) / * unary cs36: %aw,n mov *A1,-(sp) %nw*,n F* mov #1(R),-(sp) / + cs40: %n,aw FS I A2,(sp) %n,nw* FS S* I #2(R),(sp) %n,n FS S I R,(sp) / * cs42: %aw,a mov A1,(r4)+ movB2 A2,(r4) mov -(r4),-(sp) %n,a F mov R,(r4)+ movB2 A2,(r4) mov -(r4),-(sp) %n,nw* FS S* mov (sp)+,(r4)+ mov #2(R),(r4) mov -(r4),-(sp) %n,n FS S mov (sp)+,(r4)+ mov R,(r4) mov -(r4),-(sp) / & cs47: %n,c FS bic $!C2,(sp) %n,n FS S com R bic R,(sp) / | cs48: %n,a FS bisB2 A2,(sp) %n,n* FS S* bisB2 #2(R),(sp) %n,n FS S bis R,(sp) .data .even .text