Repository: mofaph/csapp Branch: master Commit: 269dba3478ff Files: 332 Total size: 941.5 KB Directory structure: gitextract_brtqdnlq/ ├── .gitignore ├── README ├── bin/ │ ├── d2h │ └── h2d ├── code/ │ ├── conc/ │ │ ├── badcnt.c │ │ ├── ctime_ts.c │ │ ├── echo_cnt.c │ │ ├── echoserverp.c │ │ ├── echoservers.c │ │ ├── echoservert.c │ │ ├── echoservert_pre.c │ │ ├── hello.c │ │ ├── hellobug.c │ │ ├── norace.c │ │ ├── psum.c │ │ ├── race.c │ │ ├── rand.c │ │ ├── rand_r.c │ │ ├── sbuf.c │ │ ├── sbuf.h │ │ ├── select.c │ │ ├── sharing.c │ │ └── tfgets-main.c │ ├── data/ │ │ └── show-bytes.c │ ├── ecf/ │ │ ├── alarm.c │ │ ├── counterprob.c │ │ ├── fork.c │ │ ├── forkprob0.c │ │ ├── forkprob1.c │ │ ├── forkprob2.c │ │ ├── forkprob3.c │ │ ├── forkprob4.c │ │ ├── forkprob5.c │ │ ├── forkprob6.c │ │ ├── forkprob7.c │ │ ├── forkprob8.c │ │ ├── hello-asm.sa │ │ ├── kill.c │ │ ├── procmask1.c │ │ ├── procmask2.c │ │ ├── restart.c │ │ ├── rfork.c │ │ ├── setjmp.c │ │ ├── shellex.c │ │ ├── sigint1.c │ │ ├── signal1.c │ │ ├── signal2.c │ │ ├── signal3.c │ │ ├── signal4.c │ │ ├── signalprob0.c │ │ ├── waitpid1.c │ │ ├── waitpid2.c │ │ ├── waitprob0.c │ │ ├── waitprob1.c │ │ └── waitprob3.c │ ├── include/ │ │ └── csapp.h │ ├── intro/ │ │ └── hello.c │ ├── io/ │ │ ├── cpfile.c │ │ ├── cpstdin.c │ │ └── statcheck.c │ ├── link/ │ │ ├── addvec.c │ │ ├── dll.c │ │ ├── elfstructs.c │ │ ├── main.c │ │ ├── main2.c │ │ ├── multvec.c │ │ ├── p-exe.d │ │ ├── pdata-exe.d │ │ └── swap.c │ ├── mem/ │ │ ├── matmult/ │ │ │ └── mm.c │ │ └── mountain/ │ │ └── mountain.c │ ├── netp/ │ │ ├── echo.c │ │ ├── echoclient.c │ │ ├── echoserveri.c │ │ ├── hostinfo.c │ │ └── tiny/ │ │ ├── cgi-bin/ │ │ │ └── adder.c │ │ └── tiny.c │ ├── src/ │ │ └── csapp.c │ └── vm/ │ ├── malloc/ │ │ ├── memlib.c │ │ └── mm.c │ ├── memlib.h │ └── mm.h ├── common/ │ ├── csapp.c │ └── csapp.h ├── exercise/ │ ├── 00-topic.txt │ ├── README │ ├── ex10-10.c │ ├── ex10-2.c │ ├── ex10-3.c │ ├── ex10-5.c │ ├── ex10-6.c │ ├── ex10-7.c │ ├── ex10-8.c │ ├── ex10-9.txt │ ├── ex11-10/ │ │ ├── cgi-bin/ │ │ │ └── adder.c │ │ ├── home.html │ │ └── tiny.c │ ├── ex11-11/ │ │ ├── Sequence.mpg │ │ ├── cgi-bin/ │ │ │ └── adder.c │ │ ├── home.html │ │ └── tiny.c │ ├── ex11-12/ │ │ ├── Sequence.mpg │ │ ├── cgi-bin/ │ │ │ └── adder.c │ │ ├── home.html │ │ └── tiny.c │ ├── ex11-13/ │ │ ├── Sequence.mpg │ │ ├── cgi-bin/ │ │ │ └── adder.c │ │ ├── home.html │ │ └── tiny.c │ ├── ex11-2.c │ ├── ex11-3.c │ ├── ex11-6/ │ │ ├── cgi-bin/ │ │ │ └── adder.c │ │ ├── home.html │ │ └── tiny.c │ ├── ex11-7/ │ │ ├── Sequence.mpg │ │ ├── cgi-bin/ │ │ │ └── adder.c │ │ ├── home.html │ │ └── tiny.c │ ├── ex11-8/ │ │ ├── cgi-bin/ │ │ │ └── adder.c │ │ ├── home.html │ │ └── tiny.c │ ├── ex11-9/ │ │ ├── cgi-bin/ │ │ │ └── adder.c │ │ ├── home.html │ │ └── tiny.c │ ├── ex12-22.c │ ├── ex2-36.c │ ├── ex2-42.c │ ├── ex2-58.c │ ├── ex2-59.c │ ├── ex2-60.c │ ├── ex2-61.c │ ├── ex2-62.c │ ├── ex2-63.c │ ├── ex2-64.c │ ├── ex2-65.c │ ├── ex2-66.c │ ├── ex2-67.c │ ├── ex2-68.c │ ├── ex2-69.c │ ├── ex2-7.c │ ├── ex2-70.c │ ├── ex2-71.c │ ├── ex2-72.c │ ├── ex2-73.c │ ├── ex2-74.c │ ├── ex2-75.c │ ├── ex2-76.c │ ├── ex2-77.c │ ├── ex2-78.c │ ├── ex2-79.c │ ├── ex2-80.c │ ├── ex2-81.c │ ├── ex2-82.txt │ ├── ex2-83.c │ ├── ex2-84.txt │ ├── ex2-85.txt │ ├── ex2-86.txt │ ├── ex2-87.txt │ ├── ex2-88.txt │ ├── ex2-89.c │ ├── ex2-90.txt │ ├── ex2-91.c │ ├── ex2-92.c │ ├── ex2-93.c │ ├── ex2-94.c │ ├── ex2-95.c │ ├── ex2-96.c │ ├── ex3-19.c │ ├── ex3-34.c │ ├── ex3-54.c │ ├── ex3-55.asm │ ├── ex3-56.txt │ ├── ex3-57.c │ ├── ex3-58.c │ ├── ex3-59.c │ ├── ex3-60.txt │ ├── ex3-61.c │ ├── ex3-62.c │ ├── ex3-63.txt │ ├── ex3-64.txt │ ├── ex3-65.txt │ ├── ex3-66.txt │ ├── ex3-67.c │ ├── ex3-68.c │ ├── ex3-69.c │ ├── ex3-70.c │ ├── ex7-10.txt │ ├── ex7-11.txt │ ├── ex7-12.txt │ ├── ex7-13.txt │ ├── ex7-14.txt │ ├── ex7-15.txt │ ├── ex7-6.txt │ ├── ex7-7.c │ ├── ex7-8.txt │ ├── ex7-9.txt │ ├── ex8-10.txt │ ├── ex8-11.c │ ├── ex8-12.c │ ├── ex8-13.c │ ├── ex8-14.c │ ├── ex8-15.c │ ├── ex8-16.c │ ├── ex8-17.txt │ ├── ex8-18.c │ ├── ex8-19.txt │ ├── ex8-2.c │ ├── ex8-20.c │ ├── ex8-21.c │ ├── ex8-22.c │ ├── ex8-23.txt │ ├── ex8-24.c │ ├── ex8-25.c │ ├── ex8-26/ │ │ ├── Makefile │ │ ├── job.c │ │ ├── job.h │ │ ├── random_fork.c │ │ ├── shellex.c │ │ └── t-job.c │ ├── ex8-3.c │ ├── ex8-4.c │ ├── ex8-5.c │ ├── ex8-6.c │ ├── ex8-7.c │ ├── ex8-8.c │ ├── ex8-9.txt │ ├── ex9-14.c │ ├── ex9-17.c │ ├── ex9-18.c │ ├── ex9-20/ │ │ ├── Makefile │ │ ├── ex9-20.c │ │ ├── t-block-operate.c │ │ ├── t-block-quick-sort.c │ │ ├── t-malloc.c │ │ ├── t2.c │ │ ├── t3.c │ │ └── t9-20.c │ ├── ex9-5.c │ ├── ex9-8.c │ ├── ex9-9.c │ ├── t2-59.c │ ├── t2-95.c │ ├── t2-96.c │ ├── t3-68.sh │ ├── t8-22.c │ ├── t8-25.c │ ├── t9-14.c │ ├── t9-14.sh │ └── t9-17.c ├── hard-task.txt ├── missing.c ├── notes/ │ ├── .gitignore │ ├── api.txt │ ├── ch03.txt │ ├── ch07.txt │ ├── ch08.txt │ ├── ch09.txt │ └── ch11.txt └── sample/ ├── .gitignore ├── ch02/ │ ├── .gitignore │ └── show-bytes.c ├── ch03/ │ ├── buf-overflow.c │ ├── code.c │ ├── main.c │ ├── simple.c │ └── simple_1.c ├── ch07/ │ ├── .gitignore │ ├── addvec.c │ ├── bar1.c │ ├── bar2.c │ ├── bar3.c │ ├── bar4.c │ ├── bar5.c │ ├── dll.c │ ├── foo1.c │ ├── foo2.c │ ├── foo3.c │ ├── foo4.c │ ├── foo5.c │ ├── linkerror.c │ ├── main.c │ ├── main2.c │ ├── multvec.c │ ├── swap.c │ └── vector.h ├── ch08/ │ ├── .gitignore │ ├── alarm.c │ ├── fork.c │ ├── hello-asm.sa │ ├── kill.c │ ├── procmask1.c │ ├── procmask2.c │ ├── restart.c │ ├── rfork.c │ ├── setjmp.c │ ├── shellex.c │ ├── sigint1.c │ ├── signal1.c │ ├── signal2.c │ ├── signal3.c │ ├── signal4.c │ ├── waitpid1.c │ └── waitpid2.c ├── ch09/ │ ├── .gitignore │ ├── memlib.c │ ├── mm-test.c │ └── mm.c ├── ch10/ │ ├── .gitignore │ ├── cpfile.c │ ├── cpstdin.c │ └── statcheck.c ├── ch11/ │ ├── echo.c │ ├── echoclient.c │ ├── echoserveri.c │ ├── hostinfo.c │ └── tiny/ │ ├── cgi-bin/ │ │ └── adder.c │ ├── home.html │ ├── rfc2616.txt │ └── tiny.c └── ch12/ ├── Makefile ├── badcnt.c ├── echo_cnt.c ├── echoserverp.c ├── echoservers.c ├── echoservert.c ├── echoservert_pre.c ├── goodcnt.c ├── hello.c ├── sbuf.c ├── sbuf.h ├── select.c └── sharing.c ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ test.c t.c a.out missing TAGS tags # Object files *.o *.s # Libraries *.lib *.a # Shared objects (inc. Windows DLLs) *.dll *.so *.so.* *.dylib # Executables *.exe *.out *.app ================================================ FILE: README ================================================ ================================================================================ 深入理解计算机系统·第二版 Randal E. Bryant David R. O'Hallaron 著 龚奕利 雷迎春 译 机械工业出版社 2012 年 7 月第 1 版第 8 次印刷 ISBN: 978-7-111-32133-0 ================================================================================ 注意: 这个仓库是本人自己学习的过程中,记录下来的一些读书笔记和个人的习题解答。 这里面的习题解答将会有许多的错误,请你不要依赖它们的正确性。在这里,欢迎 指出任何错误。 在这个仓库中,除了主分支之外,其他的分支将会消失。所以,请你不要在非主分 支的基础上做任何的修改。如果你做出了修改,希望这些更新合并到这个仓库的主 分支中,那么请你新建一个分支,然后再修改。 顶层各个目录的说明: bin/ 可执行文件 code/ 书本中的示例代码 common/ 共享代码 exercise/ 家庭作业的题目和个人的解答 notes/ 读书笔记 sample/ 手动录入的示例代码 missing.c 这个程序输出还没有完成的家庭作业的题号 官方网站:http://csapp.cs.cmu.edu/ 官方学生资源:http://csapp.cs.cmu.edu/public/students.html 中文版勘误:http://www.yiligong.org/csapp2e/ 豆瓣书评:http://book.douban.com/subject/5333562/ 卓越书评:http://goo.gl/rhpVY mofaph 2012/11/24 ================================================ FILE: bin/d2h ================================================ #!/usr/bin/perl # Convert list of decimal numbers into hex # 将十进制数字转化为十六进制数字 # 用法: # $ ./d2h 100 500 751 # 100=0x64 # 500=0x1f4 # 751=0x2ef for ($i = 0; $i < @ARGV; $i++) { printf("%d=0x%x\n", $ARGV[$i], $ARGV[$i]); } ================================================ FILE: bin/h2d ================================================ #!/usr/bin/perl # Convert list of hex numbers into decimal # 将十六进制数字转化为十进制数字 # 用法: # $ ./h2d 100 0x500 0x751 # 0x100=256 # 0x500=1280 # 0x751=1873 for ($i = 0; $i < @ARGV; $i++) { $val = hex($ARGV[$i]); printf("0x%x=%d\n", $val, $val); } ================================================ FILE: code/conc/badcnt.c ================================================ /* * badcnt.c - An improperly synchronized counter program */ /* $begin badcnt */ #include "csapp.h" void *thread(void *vargp); /* Thread routine prototype */ /* Global shared variable */ volatile int cnt = 0; /* Counter */ int main(int argc, char **argv) { int niters; pthread_t tid1, tid2; /* Check input argument */ if (argc != 2) { printf("usage: %s \n", argv[0]); exit(0); } niters = atoi(argv[1]); /* Create threads and wait for them to finish */ Pthread_create(&tid1, NULL, thread, &niters); Pthread_create(&tid2, NULL, thread, &niters); Pthread_join(tid1, NULL); Pthread_join(tid2, NULL); /* Check result */ if (cnt != (2 * niters)) printf("BOOM! cnt=%d\n", cnt); else printf("OK cnt=%d\n", cnt); exit(0); } /* Thread routine */ void *thread(void *vargp) { int i, niters = *((int *)vargp); for (i = 0; i < niters; i++) //line:conc:badcnt:beginloop cnt++; //line:conc:badcnt:endloop return NULL; } /* $end badcnt */ ================================================ FILE: code/conc/ctime_ts.c ================================================ /* * ctime_ts - A thread-safe wrapper for ctime */ #include "csapp.h" #define MAXSTR 128 static sem_t mutex; /* protects calls to ctime */ static void init_ctime_ts(void) { Sem_init(&mutex, 0, 1); } /* $begin ctime_ts */ char *ctime_ts(const time_t *timep, char *privatep) { char *sharedp; P(&mutex); sharedp = ctime(timep); strcpy(privatep, sharedp); /* Copy string from shared to private */ V(&mutex); return privatep; } /* $end ctime_ts */ int main() { char timestr[MAXSTR]; time_t timeval; /* Thread-safe code to print the current time string */ init_ctime_ts(); timeval = time(NULL); ctime_ts(&timeval, timestr); printf("%s", timestr); exit(0); } ================================================ FILE: code/conc/echo_cnt.c ================================================ /* * A thread-safe version of echo that counts the total number * of bytes received from clients. */ /* $begin echo_cnt */ #include "csapp.h" static int byte_cnt; /* byte counter */ static sem_t mutex; /* and the mutex that protects it */ static void init_echo_cnt(void) { Sem_init(&mutex, 0, 1); byte_cnt = 0; } void echo_cnt(int connfd) { int n; char buf[MAXLINE]; rio_t rio; static pthread_once_t once = PTHREAD_ONCE_INIT; Pthread_once(&once, init_echo_cnt); //line:conc:pre:pthreadonce Rio_readinitb(&rio, connfd); //line:conc:pre:rioinitb while((n = Rio_readlineb(&rio, buf, MAXLINE)) != 0) { P(&mutex); byte_cnt += n; //line:conc:pre:cntaccess1 printf("thread %d received %d (%d total) bytes on fd %d\n", (int) pthread_self(), n, byte_cnt, connfd); //line:conc:pre:cntaccess2 V(&mutex); Rio_writen(connfd, buf, n); } } /* $end echo_cnt */ ================================================ FILE: code/conc/echoserverp.c ================================================ /* * echoserverp.c - A concurrent echo server based on processes */ /* $begin echoserverpmain */ #include "csapp.h" void echo(int connfd); void sigchld_handler(int sig) //line:conc:echoserverp:handlerstart { while (waitpid(-1, 0, WNOHANG) > 0) ; return; } //line:conc:echoserverp:handlerend int main(int argc, char **argv) { int listenfd, connfd, port; socklen_t clientlen=sizeof(struct sockaddr_in); struct sockaddr_in clientaddr; if (argc != 2) { fprintf(stderr, "usage: %s \n", argv[0]); exit(0); } port = atoi(argv[1]); Signal(SIGCHLD, sigchld_handler); listenfd = Open_listenfd(port); while (1) { connfd = Accept(listenfd, (SA *) &clientaddr, &clientlen); if (Fork() == 0) { Close(listenfd); /* Child closes its listening socket */ echo(connfd); /* Child services client */ //line:conc:echoserverp:echofun Close(connfd); /* Child closes connection with client */ //line:conc:echoserverp:childclose exit(0); /* Child exits */ } Close(connfd); /* Parent closes connected socket (important!) */ //line:conc:echoserverp:parentclose } } /* $end echoserverpmain */ ================================================ FILE: code/conc/echoservers.c ================================================ /* * echoservers.c - A concurrent echo server based on select */ /* $begin echoserversmain */ #include "csapp.h" typedef struct { /* represents a pool of connected descriptors */ //line:conc:echoservers:beginpool int maxfd; /* largest descriptor in read_set */ fd_set read_set; /* set of all active descriptors */ fd_set ready_set; /* subset of descriptors ready for reading */ int nready; /* number of ready descriptors from select */ int maxi; /* highwater index into client array */ int clientfd[FD_SETSIZE]; /* set of active descriptors */ rio_t clientrio[FD_SETSIZE]; /* set of active read buffers */ } pool; //line:conc:echoservers:endpool /* $end echoserversmain */ void init_pool(int listenfd, pool *p); void add_client(int connfd, pool *p); void check_clients(pool *p); /* $begin echoserversmain */ int byte_cnt = 0; /* counts total bytes received by server */ int main(int argc, char **argv) { int listenfd, connfd, port; socklen_t clientlen = sizeof(struct sockaddr_in); struct sockaddr_in clientaddr; static pool pool; if (argc != 2) { fprintf(stderr, "usage: %s \n", argv[0]); exit(0); } port = atoi(argv[1]); listenfd = Open_listenfd(port); init_pool(listenfd, &pool); //line:conc:echoservers:initpool while (1) { /* Wait for listening/connected descriptor(s) to become ready */ pool.ready_set = pool.read_set; pool.nready = Select(pool.maxfd+1, &pool.ready_set, NULL, NULL, NULL); /* If listening descriptor ready, add new client to pool */ if (FD_ISSET(listenfd, &pool.ready_set)) { //line:conc:echoservers:listenfdready connfd = Accept(listenfd, (SA *)&clientaddr, &clientlen); //line:conc:echoservers:accept add_client(connfd, &pool); //line:conc:echoservers:addclient } /* Echo a text line from each ready connected descriptor */ check_clients(&pool); //line:conc:echoservers:checkclients } } /* $end echoserversmain */ /* $begin init_pool */ void init_pool(int listenfd, pool *p) { /* Initially, there are no connected descriptors */ int i; p->maxi = -1; //line:conc:echoservers:beginempty for (i=0; i< FD_SETSIZE; i++) p->clientfd[i] = -1; //line:conc:echoservers:endempty /* Initially, listenfd is only member of select read set */ p->maxfd = listenfd; //line:conc:echoservers:begininit FD_ZERO(&p->read_set); FD_SET(listenfd, &p->read_set); //line:conc:echoservers:endinit } /* $end init_pool */ /* $begin add_client */ void add_client(int connfd, pool *p) { int i; p->nready--; for (i = 0; i < FD_SETSIZE; i++) /* Find an available slot */ if (p->clientfd[i] < 0) { /* Add connected descriptor to the pool */ p->clientfd[i] = connfd; //line:conc:echoservers:beginaddclient Rio_readinitb(&p->clientrio[i], connfd); //line:conc:echoservers:endaddclient /* Add the descriptor to descriptor set */ FD_SET(connfd, &p->read_set); //line:conc:echoservers:addconnfd /* Update max descriptor and pool highwater mark */ if (connfd > p->maxfd) //line:conc:echoservers:beginmaxfd p->maxfd = connfd; //line:conc:echoservers:endmaxfd if (i > p->maxi) //line:conc:echoservers:beginmaxi p->maxi = i; //line:conc:echoservers:endmaxi break; } if (i == FD_SETSIZE) /* Couldn't find an empty slot */ app_error("add_client error: Too many clients"); } /* $end add_client */ /* $begin check_clients */ void check_clients(pool *p) { int i, connfd, n; char buf[MAXLINE]; rio_t rio; for (i = 0; (i <= p->maxi) && (p->nready > 0); i++) { connfd = p->clientfd[i]; rio = p->clientrio[i]; /* If the descriptor is ready, echo a text line from it */ if ((connfd > 0) && (FD_ISSET(connfd, &p->ready_set))) { p->nready--; if ((n = Rio_readlineb(&rio, buf, MAXLINE)) != 0) { byte_cnt += n; //line:conc:echoservers:beginecho printf("Server received %d (%d total) bytes on fd %d\n", n, byte_cnt, connfd); Rio_writen(connfd, buf, n); //line:conc:echoservers:endecho } /* EOF detected, remove descriptor from pool */ else { Close(connfd); //line:conc:echoservers:closeconnfd FD_CLR(connfd, &p->read_set); //line:conc:echoservers:beginremove p->clientfd[i] = -1; //line:conc:echoservers:endremove } } } } /* $end check_clients */ ================================================ FILE: code/conc/echoservert.c ================================================ /* * echoservert.c - A concurrent echo server using threads */ /* $begin echoservertmain */ #include "csapp.h" void echo(int connfd); void *thread(void *vargp); int main(int argc, char **argv) { int listenfd, *connfdp, port; socklen_t clientlen=sizeof(struct sockaddr_in); struct sockaddr_in clientaddr; pthread_t tid; if (argc != 2) { fprintf(stderr, "usage: %s \n", argv[0]); exit(0); } port = atoi(argv[1]); listenfd = Open_listenfd(port); while (1) { connfdp = Malloc(sizeof(int)); //line:conc:echoservert:beginmalloc *connfdp = Accept(listenfd, (SA *) &clientaddr, &clientlen); //line:conc:echoservert:endmalloc Pthread_create(&tid, NULL, thread, connfdp); } } /* thread routine */ void *thread(void *vargp) { int connfd = *((int *)vargp); Pthread_detach(pthread_self()); //line:conc:echoservert:detach Free(vargp); //line:conc:echoservert:free echo(connfd); Close(connfd); return NULL; } /* $end echoservertmain */ ================================================ FILE: code/conc/echoservert_pre.c ================================================ /* * echoservert_pre.c - A prethreaded concurrent echo server */ /* $begin echoservertpremain */ #include "csapp.h" #include "sbuf.h" #define NTHREADS 4 #define SBUFSIZE 16 void echo_cnt(int connfd); void *thread(void *vargp); sbuf_t sbuf; /* shared buffer of connected descriptors */ int main(int argc, char **argv) { int i, listenfd, connfd, port; socklen_t clientlen=sizeof(struct sockaddr_in); struct sockaddr_in clientaddr; pthread_t tid; if (argc != 2) { fprintf(stderr, "usage: %s \n", argv[0]); exit(0); } port = atoi(argv[1]); sbuf_init(&sbuf, SBUFSIZE); //line:conc:pre:initsbuf listenfd = Open_listenfd(port); for (i = 0; i < NTHREADS; i++) /* Create worker threads */ //line:conc:pre:begincreate Pthread_create(&tid, NULL, thread, NULL); //line:conc:pre:endcreate while (1) { connfd = Accept(listenfd, (SA *) &clientaddr, &clientlen); sbuf_insert(&sbuf, connfd); /* Insert connfd in buffer */ } } void *thread(void *vargp) { Pthread_detach(pthread_self()); while (1) { int connfd = sbuf_remove(&sbuf); /* Remove connfd from buffer */ //line:conc:pre:removeconnfd echo_cnt(connfd); /* Service client */ Close(connfd); } } /* $end echoservertpremain */ ================================================ FILE: code/conc/hello.c ================================================ /* * hello.c - Pthreads "hello, world" program */ /* $begin hello */ #include "csapp.h" void *thread(void *vargp); //line:conc:hello:prototype int main() //line:conc:hello:main { pthread_t tid; //line:conc:hello:tid Pthread_create(&tid, NULL, thread, NULL); //line:conc:hello:create Pthread_join(tid, NULL); //line:conc:hello:join exit(0); //line:conc:hello:exit } void *thread(void *vargp) /* thread routine */ //line:conc:hello:beginthread { printf("Hello, world!\n"); return NULL; //line:conc:hello:return } //line:conc:hello:endthread /* $end hello */ ================================================ FILE: code/conc/hellobug.c ================================================ /* * hellobug.c - "hello, world" program with a bug */ /* $begin hellobug */ #include "csapp.h" void *thread(void *vargp); int main() { pthread_t tid; Pthread_create(&tid, NULL, thread, NULL); exit(0); } /* thread routine */ void *thread(void *vargp) { Sleep(1); printf("Hello, world!\n"); return NULL; } /* $end hellobug */ ================================================ FILE: code/conc/norace.c ================================================ /* * norace.c - fixes the race in race.c */ /* $begin norace */ #include "csapp.h" #define N 4 void *thread(void *vargp); int main() { pthread_t tid[N]; int i, *ptr; for (i = 0; i < N; i++) { ptr = Malloc(sizeof(int)); //line:conc:norace:createthread1 *ptr = i; //line:conc:norace:createthread2 Pthread_create(&tid[i], NULL, thread, ptr); //line:conc:norace:createthread3 } //line:conc:norace:endloop for (i = 0; i < N; i++) Pthread_join(tid[i], NULL); exit(0); } /* thread routine */ void *thread(void *vargp) { int myid = *((int *)vargp); Free(vargp); printf("Hello from thread %d\n", myid); return NULL; } /* $end norace */ ================================================ FILE: code/conc/psum.c ================================================ /* * psum.c - A simple parallel sum program */ /* $begin psum */ #include "csapp.h" #define MAXTHREADS 32 void *sum(void *vargp); /* Global shared variables */ long psum[MAXTHREADS]; /* Partial sum computed by each thread */ long nelems_per_thread; /* Number of elements summed by each thread */ int main(int argc, char **argv) { long i, nelems, log_nelems, nthreads, result = 0; pthread_t tid[MAXTHREADS]; int myid[MAXTHREADS]; /* Get input arguments */ if (argc != 3) { printf("Usage: %s \n", argv[0]); exit(0); } nthreads = atoi(argv[1]); log_nelems = atoi(argv[2]); nelems = (1L << log_nelems); /* $end psum */ /* Check input arguments */ if ((nelems % nthreads) != 0 || (log_nelems > 31)) { printf("Error: invalid nelems\n"); exit(0); } /* $begin psum */ nelems_per_thread = nelems / nthreads; /* Create peer threads and wait for them to finish */ for (i = 0; i < nthreads; i++) { //line:conc:psum:createloop1 myid[i] = i; //line:conc:psum:createloop2 Pthread_create(&tid[i], NULL, sum, &myid[i]); //line:conc:psum:createloop3 } //line:conc:psum:createloop4 for (i = 0; i < nthreads; i++) //line:conc:psum:waitloop1 Pthread_join(tid[i], NULL); //line:conc:psum:waitloop2 /* Add up the partial sums computed by each thread */ for (i = 0; i < nthreads; i++) //line:conc:psum:sumloop1 result += psum[i]; //line:conc:psum:sumloop2 /* Check final answer */ if (result != (nelems * (nelems-1))/2) //line:conc:psum:check1 printf("Error: result=%ld\n", result); //line:conc:psum:check2 exit(0); } /* $end psum */ /* Thread routine */ /* $begin psum-thread */ void *sum(void *vargp) { int myid = *((int *)vargp); /* Extract the thread ID */ //line:conc:psum:extractid long start = myid * nelems_per_thread; /* Start element index */ //line:conc:psum:getstart long end = start + nelems_per_thread; /* End element index */ //line:conc:psum:getend long i, sum = 0; for (i = start; i < end; i++) { //line:conc:psum:threadsumloop1 sum += i; //line:conc:psum:threadsumloop2 } //line:conc:psum:threadsumloop3 psum[myid] = sum; //line:conc:psum:threadsum return NULL; } /* $end psum-thread */ ================================================ FILE: code/conc/race.c ================================================ /* * race.c - demonstrates a race condition */ /* $begin race */ #include "csapp.h" #define N 4 void *thread(void *vargp); int main() { pthread_t tid[N]; int i; for (i = 0; i < N; i++) Pthread_create(&tid[i], NULL, thread, &i); //line:conc:race:createthread for (i = 0; i < N; i++) Pthread_join(tid[i], NULL); exit(0); } /* thread routine */ void *thread(void *vargp) { int myid = *((int *)vargp); //line:conc:race:derefarg printf("Hello from thread %d\n", myid); return NULL; } /* $end race */ ================================================ FILE: code/conc/rand.c ================================================ #include #include /* $begin rand */ unsigned int next = 1; /* rand - return pseudo-random integer on 0..32767 */ int rand(void) { next = next*1103515245 + 12345; return (unsigned int)(next/65536) % 32768; } /* srand - set seed for rand() */ void srand(unsigned int seed) { next = seed; } /* $end rand */ int main() { srand(100); printf("%d\n", rand()); printf("%d\n", rand()); printf("%d\n", rand()); exit(0); } ================================================ FILE: code/conc/rand_r.c ================================================ #include #include /* $begin rand_r */ /* rand_r - a reentrant pseudo-random integer on 0..32767 */ int rand_r(unsigned int *nextp) { *nextp = *nextp * 1103515245 + 12345; return (unsigned int)(*nextp / 65536) % 32768; } /* $end rand_r */ int main() { unsigned int next = 1; printf("%d\n", rand_r(&next)); printf("%d\n", rand_r(&next)); printf("%d\n", rand_r(&next)); exit(0); } ================================================ FILE: code/conc/sbuf.c ================================================ /* $begin sbufc */ #include "csapp.h" #include "sbuf.h" /* Create an empty, bounded, shared FIFO buffer with n slots */ /* $begin sbuf_init */ void sbuf_init(sbuf_t *sp, int n) { sp->buf = Calloc(n, sizeof(int)); sp->n = n; /* Buffer holds max of n items */ sp->front = sp->rear = 0; /* Empty buffer iff front == rear */ Sem_init(&sp->mutex, 0, 1); /* Binary semaphore for locking */ Sem_init(&sp->slots, 0, n); /* Initially, buf has n empty slots */ Sem_init(&sp->items, 0, 0); /* Initially, buf has zero data items */ } /* $end sbuf_init */ /* Clean up buffer sp */ /* $begin sbuf_deinit */ void sbuf_deinit(sbuf_t *sp) { Free(sp->buf); } /* $end sbuf_deinit */ /* Insert item onto the rear of shared buffer sp */ /* $begin sbuf_insert */ void sbuf_insert(sbuf_t *sp, int item) { P(&sp->slots); /* Wait for available slot */ P(&sp->mutex); /* Lock the buffer */ sp->buf[(++sp->rear)%(sp->n)] = item; /* Insert the item */ V(&sp->mutex); /* Unlock the buffer */ V(&sp->items); /* Announce available item */ } /* $end sbuf_insert */ /* Remove and return the first item from buffer sp */ /* $begin sbuf_remove */ int sbuf_remove(sbuf_t *sp) { int item; P(&sp->items); /* Wait for available item */ P(&sp->mutex); /* Lock the buffer */ item = sp->buf[(++sp->front)%(sp->n)]; /* Remove the item */ V(&sp->mutex); /* Unlock the buffer */ V(&sp->slots); /* Announce available slot */ return item; } /* $end sbuf_remove */ /* $end sbufc */ ================================================ FILE: code/conc/sbuf.h ================================================ #ifndef __SBUF_H__ #define __SBUF_H__ #include "csapp.h" /* $begin sbuft */ typedef struct { int *buf; /* Buffer array */ int n; /* Maximum number of slots */ int front; /* buf[(front+1)%n] is first item */ int rear; /* buf[rear%n] is last item */ sem_t mutex; /* Protects accesses to buf */ sem_t slots; /* Counts available slots */ sem_t items; /* Counts available items */ } sbuf_t; /* $end sbuft */ void sbuf_init(sbuf_t *sp, int n); void sbuf_deinit(sbuf_t *sp); void sbuf_insert(sbuf_t *sp, int item); int sbuf_remove(sbuf_t *sp); #endif /* __SBUF_H__ */ ================================================ FILE: code/conc/select.c ================================================ /* $begin select */ #include "csapp.h" void echo(int connfd); void command(void); int main(int argc, char **argv) { int listenfd, connfd, port; socklen_t clientlen = sizeof(struct sockaddr_in); struct sockaddr_in clientaddr; fd_set read_set, ready_set; if (argc != 2) { fprintf(stderr, "usage: %s \n", argv[0]); exit(0); } port = atoi(argv[1]); listenfd = Open_listenfd(port); //line:conc:select:openlistenfd FD_ZERO(&read_set); /* Clear read set */ //line:conc:select:clearreadset FD_SET(STDIN_FILENO, &read_set); /* Add stdin to read set */ //line:conc:select:addstdin FD_SET(listenfd, &read_set); /* Add listenfd to read set */ //line:conc:select:addlistenfd while (1) { ready_set = read_set; Select(listenfd+1, &ready_set, NULL, NULL, NULL); //line:conc:select:select if (FD_ISSET(STDIN_FILENO, &ready_set)) //line:conc:select:stdinready command(); /* Read command line from stdin */ if (FD_ISSET(listenfd, &ready_set)) { //line:conc:select:listenfdready connfd = Accept(listenfd, (SA *)&clientaddr, &clientlen); echo(connfd); /* Echo client input until EOF */ Close(connfd); } } } void command(void) { char buf[MAXLINE]; if (!Fgets(buf, MAXLINE, stdin)) exit(0); /* EOF */ printf("%s", buf); /* Process the input command */ } /* $end select */ ================================================ FILE: code/conc/sharing.c ================================================ /* $begin sharing */ #include "csapp.h" #define N 2 void *thread(void *vargp); char **ptr; /* global variable */ //line:conc:sharing:ptrdec int main() { int i; pthread_t tid; char *msgs[N] = { "Hello from foo", "Hello from bar" }; ptr = msgs; for (i = 0; i < N; i++) Pthread_create(&tid, NULL, thread, (void *)i); Pthread_exit(NULL); } void *thread(void *vargp) { int myid = (int)vargp; static int cnt = 0; //line:conc:sharing:cntdec printf("[%d]: %s (cnt=%d)\n", myid, ptr[myid], ++cnt); //line:conc:sharing:stack return NULL; } /* $end sharing */ ================================================ FILE: code/conc/tfgets-main.c ================================================ /* $begin tfgetsmain */ #include "csapp.h" char *tfgets(char *s, int size, FILE *stream); int main() { char buf[MAXLINE]; if (tfgets(buf, MAXLINE, stdin) == NULL) printf("BOOM!\n"); else printf("%s", buf); exit(0); } /* $end tfgetsmain */ ================================================ FILE: code/data/show-bytes.c ================================================ /* $begin show-bytes */ #include /* $end show-bytes */ #include #include /* $begin show-bytes */ typedef unsigned char *byte_pointer; void show_bytes(byte_pointer start, int len) { int i; for (i = 0; i < len; i++) printf(" %.2x", start[i]); //line:data:show_bytes_printf printf("\n"); } void show_int(int x) { show_bytes((byte_pointer) &x, sizeof(int)); //line:data:show_bytes_amp1 } void show_float(float x) { show_bytes((byte_pointer) &x, sizeof(float)); //line:data:show_bytes_amp2 } void show_pointer(void *x) { show_bytes((byte_pointer) &x, sizeof(void *)); //line:data:show_bytes_amp3 } /* $end show-bytes */ /* $begin test-show-bytes */ void test_show_bytes(int val) { int ival = val; float fval = (float) ival; int *pval = &ival; show_int(ival); show_float(fval); show_pointer(pval); } /* $end test-show-bytes */ void simple_show_a() { /* $begin simple-show-a */ int val = 0x87654321; byte_pointer valp = (byte_pointer) &val; show_bytes(valp, 1); /* A. */ show_bytes(valp, 2); /* B. */ show_bytes(valp, 3); /* C. */ /* $end simple-show-a */ } void simple_show_b() { /* $begin simple-show-b */ int val = 0x12345678; byte_pointer valp = (byte_pointer) &val; show_bytes(valp, 1); /* A. */ show_bytes(valp, 2); /* B. */ show_bytes(valp, 3); /* C. */ /* $end simple-show-b */ } void float_eg() { int x = 3490593; float f = (float) x; printf("For x = %d\n", x); show_int(x); show_float(f); x = 3510593; f = (float) x; printf("For x = %d\n", x); show_int(x); show_float(f); } void string_ueg() { /* $begin show-ustring */ const char *s = "ABCDEF"; show_bytes((byte_pointer) s, strlen(s)); /* $end show-ustring */ } void string_leg() { /* $begin show-lstring */ const char *s = "abcdef"; show_bytes((byte_pointer) s, strlen(s)); /* $end show-lstring */ } void show_twocomp() { /* $begin show-twocomp */ short x = 12345; short mx = -x; show_bytes((byte_pointer) &x, sizeof(short)); show_bytes((byte_pointer) &mx, sizeof(short)); /* $end show-twocomp */ } int main(int argc, char *argv[]) { int val = 12345; if (argc > 1) { if (argc > 1) { val = strtol(argv[1], NULL, 0); } printf("calling test_show_bytes\n"); test_show_bytes(val); } else { printf("calling show_twocomp\n"); show_twocomp(); printf("Calling simple_show_a\n"); simple_show_a(); printf("Calling simple_show_b\n"); simple_show_b(); printf("Calling float_eg\n"); float_eg(); printf("Calling string_ueg\n"); string_ueg(); printf("Calling string_leg\n"); string_leg(); } return 0; } ================================================ FILE: code/ecf/alarm.c ================================================ /* $begin alarm */ #include "csapp.h" void handler(int sig) { static int beeps = 0; printf("BEEP\n"); if (++beeps < 5) Alarm(1); /* Next SIGALRM will be delivered in 1 second */ else { printf("BOOM!\n"); exit(0); } } int main() { Signal(SIGALRM, handler); /* install SIGALRM handler */ Alarm(1); /* Next SIGALRM will be delivered in 1s */ while (1) { ; /* Signal handler returns control here each time */ } exit(0); } /* $end alarm */ ================================================ FILE: code/ecf/counterprob.c ================================================ /* $begin counterprob */ #include "csapp.h" int counter = 0; void handler(int sig) { counter++; sleep(1); /* Do some work in the handler */ return; } int main() { int i; Signal(SIGUSR2, handler); if (Fork() == 0) { /* Child */ for (i = 0; i < 5; i++) { Kill(getppid(), SIGUSR2); printf("sent SIGUSR2 to parent\n"); } exit(0); } Wait(NULL); printf("counter=%d\n", counter); exit(0); } /* $end counterprob */ ================================================ FILE: code/ecf/fork.c ================================================ /* $begin fork */ #include "csapp.h" int main() { pid_t pid; int x = 1; pid = Fork(); //line:ecf:forkreturn if (pid == 0) { /* Child */ printf("child : x=%d\n", ++x); //line:ecf:childprint exit(0); } /* Parent */ printf("parent: x=%d\n", --x); //line:ecf:parentprint exit(0); } /* $end fork */ ================================================ FILE: code/ecf/forkprob0.c ================================================ /* $begin forkprob0 */ #include "csapp.h" int main() { int x = 1; if (Fork() == 0) printf("printf1: x=%d\n", ++x); printf("printf2: x=%d\n", --x); exit(0); } /* $end forkprob0 */ ================================================ FILE: code/ecf/forkprob1.c ================================================ /* $begin forkprob1 */ #include "csapp.h" int main() { int i; for (i = 0; i < 2; i++) Fork(); printf("hello\n"); exit(0); } /* $end forkprob1 */ ================================================ FILE: code/ecf/forkprob2.c ================================================ /* $begin forkprob2 */ #include "csapp.h" void end(void) { printf("2"); } int main() { if (Fork() == 0) atexit(end); if (Fork() == 0) printf("0"); else printf("1"); exit(0); } /* $end forkprob2 */ ================================================ FILE: code/ecf/forkprob3.c ================================================ /* $begin forkprob3 */ #include "csapp.h" int main() { int x = 3; if (Fork() != 0) printf("x=%d\n", ++x); printf("x=%d\n", --x); exit(0); } /* $end forkprob3 */ ================================================ FILE: code/ecf/forkprob4.c ================================================ /* $begin forkprob4 */ #include "csapp.h" void doit() { Fork(); Fork(); printf("hello\n"); return; } int main() { doit(); printf("hello\n"); exit(0); } /* $end forkprob4 */ ================================================ FILE: code/ecf/forkprob5.c ================================================ /* $begin forkprob5 */ #include "csapp.h" void doit() { if (Fork() == 0) { Fork(); printf("hello\n"); exit(0); } return; } int main() { doit(); printf("hello\n"); exit(0); } /* $end forkprob5 */ ================================================ FILE: code/ecf/forkprob6.c ================================================ /* $begin forkprob6 */ #include "csapp.h" void doit() { if (Fork() == 0) { Fork(); printf("hello\n"); return; } return; } int main() { doit(); printf("hello\n"); exit(0); } /* $end forkprob6 */ ================================================ FILE: code/ecf/forkprob7.c ================================================ /* $begin forkprob7 */ #include "csapp.h" int counter = 1; int main() { if (fork() == 0) { counter--; exit(0); } else { Wait(NULL); printf("counter = %d\n", ++counter); } exit(0); } /* $end forkprob7 */ ================================================ FILE: code/ecf/forkprob8.c ================================================ #include "csapp.h" /* $begin forkprob8 */ void foo(int n) { int i; for (i = 0; i < n; i++) Fork(); printf("hello\n"); exit(0); } /* $end forkprob8 */ int main(int argc, char **argv) { if (argc < 2) { printf("usage: %s \n", argv[0]); exit(0); } foo(atoi(argv[1])); exit(0); } ================================================ FILE: code/ecf/hello-asm.sa ================================================ /* $begin hello-s 1 */ .section .data string: .ascii "hello, world\n" string_end: .equ len, string_end - string .section .text .globl main main: # First, call write(1, "hello, world\n", 13) movl $4, %eax # System call number 4 movl $1, %ebx # stdout has descriptor 1 movl $string, %ecx # Hello world string movl $len, %edx # String length int $0x80 # System call code # Next, call exit(0) movl $1, %eax # System call number 0 movl $0, %ebx # Argument is 0 int $0x80 # System call code /* $end hello-s 1 */ ================================================ FILE: code/ecf/kill.c ================================================ /* $begin kill */ #include "csapp.h" int main() { pid_t pid; /* Child sleeps until SIGKILL signal received, then dies */ if ((pid = Fork()) == 0) { Pause(); /* Wait for a signal to arrive */ printf("control should never reach here!\n"); exit(0); } /* Parent sends a SIGKILL signal to a child */ Kill(pid, SIGKILL); exit(0); } /* $end kill */ ================================================ FILE: code/ecf/procmask1.c ================================================ #include "csapp.h" void initjobs() { } void addjob(int pid) { } void deletejob(int pid) { } /* $begin procmask1 */ void handler(int sig) { pid_t pid; while ((pid = waitpid(-1, NULL, 0)) > 0) /* Reap a zombie child */ deletejob(pid); /* Delete the child from the job list */ if (errno != ECHILD) unix_error("waitpid error"); } int main(int argc, char **argv) { int pid; Signal(SIGCHLD, handler); initjobs(); /* Initialize the job list */ while (1) { /* Child process */ if ((pid = Fork()) == 0) { Execve("/bin/date", argv, NULL); } /* Parent process */ addjob(pid); /* Add the child to the job list */ } exit(0); } /* $end procmask1 */ ================================================ FILE: code/ecf/procmask2.c ================================================ #include "csapp.h" void initjobs() { } void addjob(int pid) { } void deletejob(int pid) { } /* $begin procmask2 */ void handler(int sig) { pid_t pid; while ((pid = waitpid(-1, NULL, 0)) > 0) /* Reap a zombie child */ deletejob(pid); /* Delete the child from the job list */ if (errno != ECHILD) unix_error("waitpid error"); } int main(int argc, char **argv) { int pid; sigset_t mask; Signal(SIGCHLD, handler); initjobs(); /* Initialize the job list */ while (1) { Sigemptyset(&mask); Sigaddset(&mask, SIGCHLD); Sigprocmask(SIG_BLOCK, &mask, NULL); /* Block SIGCHLD */ /* Child process */ if ((pid = Fork()) == 0) { Sigprocmask(SIG_UNBLOCK, &mask, NULL); /* Unblock SIGCHLD */ Execve("/bin/date", argv, NULL); } /* Parent process */ addjob(pid); /* Add the child to the job list */ Sigprocmask(SIG_UNBLOCK, &mask, NULL); /* Unblock SIGCHLD */ } exit(0); } /* $end procmask2 */ ================================================ FILE: code/ecf/restart.c ================================================ /* $begin restart */ #include "csapp.h" sigjmp_buf buf; void handler(int sig) { siglongjmp(buf, 1); } int main() { Signal(SIGINT, handler); if (!sigsetjmp(buf, 1)) printf("starting\n"); else printf("restarting\n"); while(1) { Sleep(1); printf("processing...\n"); } exit(0); } /* $end restart */ ================================================ FILE: code/ecf/rfork.c ================================================ /* * rfork.c - Wrapper for fork() that introduces non-determinism * in the order that the parent and child are executed */ /* $begin rfork */ #include #include #include #include #include /* Sleep for a random period between [0, MAX_SLEEP] us */ #define MAX_SLEEP 100000 /* Macro that maps val into the range [0, RAND_MAX] */ #define CONVERT(val) (((double)val)/(double)RAND_MAX) pid_t Fork(void) { static struct timeval time; unsigned bool, secs; pid_t pid; /* Generate a different seed each time the function is called */ gettimeofday(&time, NULL); srand(time.tv_usec); /* Determine whether to sleep in parent of child and for how long */ bool = (unsigned)(CONVERT(rand()) + 0.5); secs = (unsigned)(CONVERT(rand()) * MAX_SLEEP); /* Call the real fork function */ if ((pid = fork()) < 0) return pid; /* Randomly decide to sleep in the parent or the child */ if (pid == 0) { /* Child */ if(bool) { usleep(secs); } } else { /* Parent */ if (!bool) { usleep(secs); } } /* Return the PID like a normal fork call */ return pid; } /* $end rfork */ ================================================ FILE: code/ecf/setjmp.c ================================================ /* $begin setjmp */ #include "csapp.h" jmp_buf buf; int error1 = 0; int error2 = 1; void foo(void), bar(void); int main() { int rc; rc = setjmp(buf); if (rc == 0) foo(); else if (rc == 1) printf("Detected an error1 condition in foo\n"); else if (rc == 2) printf("Detected an error2 condition in foo\n"); else printf("Unknown error condition in foo\n"); exit(0); } /* Deeply nested function foo */ void foo(void) { if (error1) longjmp(buf, 1); bar(); } void bar(void) { if (error2) longjmp(buf, 2); } /* $end setjmp */ ================================================ FILE: code/ecf/shellex.c ================================================ /* $begin shellmain */ #include "csapp.h" #define MAXARGS 128 /* function prototypes */ void eval(char *cmdline); int parseline(char *buf, char **argv); int builtin_command(char **argv); int main() { char cmdline[MAXLINE]; /* Command line */ while (1) { /* Read */ printf("> "); Fgets(cmdline, MAXLINE, stdin); if (feof(stdin)) exit(0); /* Evaluate */ eval(cmdline); } } /* $end shellmain */ /* $begin eval */ /* eval - Evaluate a command line */ void eval(char *cmdline) { char *argv[MAXARGS]; /* Argument list execve() */ char buf[MAXLINE]; /* Holds modified command line */ int bg; /* Should the job run in bg or fg? */ pid_t pid; /* Process id */ strcpy(buf, cmdline); bg = parseline(buf, argv); if (argv[0] == NULL) return; /* Ignore empty lines */ if (!builtin_command(argv)) { if ((pid = Fork()) == 0) { /* Child runs user job */ if (execve(argv[0], argv, environ) < 0) { printf("%s: Command not found.\n", argv[0]); exit(0); } } /* Parent waits for foreground job to terminate */ if (!bg) { int status; if (waitpid(pid, &status, 0) < 0) unix_error("waitfg: waitpid error"); } else printf("%d %s", pid, cmdline); } return; } /* If first arg is a builtin command, run it and return true */ int builtin_command(char **argv) { if (!strcmp(argv[0], "quit")) /* quit command */ exit(0); if (!strcmp(argv[0], "&")) /* Ignore singleton & */ return 1; return 0; /* Not a builtin command */ } /* $end eval */ /* $begin parseline */ /* parseline - Parse the command line and build the argv array */ int parseline(char *buf, char **argv) { char *delim; /* Points to first space delimiter */ int argc; /* Number of args */ int bg; /* Background job? */ buf[strlen(buf)-1] = ' '; /* Replace trailing '\n' with space */ while (*buf && (*buf == ' ')) /* Ignore leading spaces */ buf++; /* Build the argv list */ argc = 0; while ((delim = strchr(buf, ' '))) { argv[argc++] = buf; *delim = '\0'; buf = delim + 1; while (*buf && (*buf == ' ')) /* Ignore spaces */ buf++; } argv[argc] = NULL; if (argc == 0) /* Ignore blank line */ return 1; /* Should the job run in the background? */ if ((bg = (*argv[argc-1] == '&')) != 0) argv[--argc] = NULL; return bg; } /* $end parseline */ ================================================ FILE: code/ecf/sigint1.c ================================================ /* $begin sigint1 */ #include "csapp.h" void handler(int sig) /* SIGINT handler */ //line:ecf:sigint1:beginhandler { printf("Caught SIGINT\n"); //line:ecf:sigint1:printhandler exit(0); //line:ecf:sigint1:exithandler } //line:ecf:sigint1:endhandler int main() { /* Install the SIGINT handler */ if (signal(SIGINT, handler) == SIG_ERR) //line:ecf:sigint1:begininstall unix_error("signal error"); //line:ecf:sigint1:endinstall pause(); /* Wait for the receipt of a signal */ //line:ecf:sigint1:pause exit(0); } /* $end sigint1 */ ================================================ FILE: code/ecf/signal1.c ================================================ /* $begin signal1 */ #include "csapp.h" void handler1(int sig) { pid_t pid; if ((pid = waitpid(-1, NULL, 0)) < 0) unix_error("waitpid error"); printf("Handler reaped child %d\n", (int)pid); Sleep(2); return; } int main() { int i, n; char buf[MAXBUF]; if (signal(SIGCHLD, handler1) == SIG_ERR) unix_error("signal error"); /* Parent creates children */ for (i = 0; i < 3; i++) { if (Fork() == 0) { printf("Hello from child %d\n", (int)getpid()); Sleep(1); exit(0); } } /* Parent waits for terminal input and then processes it */ if ((n = read(STDIN_FILENO, buf, sizeof(buf))) < 0) unix_error("read"); printf("Parent processing input\n"); while (1) ; exit(0); } /* $end signal1 */ ================================================ FILE: code/ecf/signal2.c ================================================ /* $begin signal2 */ #include "csapp.h" void handler2(int sig) { pid_t pid; while ((pid = waitpid(-1, NULL, 0)) > 0) printf("Handler reaped child %d\n", (int)pid); if (errno != ECHILD) unix_error("waitpid error"); Sleep(2); return; } int main() { int i, n; char buf[MAXBUF]; if (signal(SIGCHLD, handler2) == SIG_ERR) unix_error("signal error"); /* Parent creates children */ for (i = 0; i < 3; i++) { if (Fork() == 0) { printf("Hello from child %d\n", (int)getpid()); Sleep(1); exit(0); } } /* Parent waits for terminal input and then processes it */ if ((n = read(STDIN_FILENO, buf, sizeof(buf))) < 0) unix_error("read error"); printf("Parent processing input\n"); while (1) ; exit(0); } /* $end signal2 */ ================================================ FILE: code/ecf/signal3.c ================================================ /* $begin signal3 */ #include "csapp.h" void handler2(int sig) { pid_t pid; while ((pid = waitpid(-1, NULL, 0)) > 0) printf("Handler reaped child %d\n", (int)pid); if (errno != ECHILD) unix_error("waitpid error"); Sleep(2); return; } int main() { int i, n; char buf[MAXBUF]; pid_t pid; if (signal(SIGCHLD, handler2) == SIG_ERR) unix_error("signal error"); /* Parent creates children */ for (i = 0; i < 3; i++) { pid = Fork(); if (pid == 0) { printf("Hello from child %d\n", (int)getpid()); Sleep(1); exit(0); } } /* Manually restart the read call if it is interrupted */ while ((n = read(STDIN_FILENO, buf, sizeof(buf))) < 0) if (errno != EINTR) unix_error("read error"); printf("Parent processing input\n"); while (1) ; exit(0); } /* $end signal3 */ ================================================ FILE: code/ecf/signal4.c ================================================ /* $begin signal4 */ #include "csapp.h" void handler2(int sig) { pid_t pid; while ((pid = waitpid(-1, NULL, 0)) > 0) printf("Handler reaped child %d\n", (int)pid); if (errno != ECHILD) unix_error("waitpid error"); Sleep(2); return; } int main() { int i, n; char buf[MAXBUF]; pid_t pid; Signal(SIGCHLD, handler2); /* sigaction error-handling wrapper */ /* Parent creates children */ for (i = 0; i < 3; i++) { pid = Fork(); if (pid == 0) { printf("Hello from child %d\n", (int)getpid()); Sleep(1); exit(0); } } /* Parent waits for terminal input and then processes it */ if ((n = read(STDIN_FILENO, buf, sizeof(buf))) < 0) unix_error("read error"); printf("Parent processing input\n"); while (1) ; exit(0); } /* $end signal4 */ ================================================ FILE: code/ecf/signalprob0.c ================================================ #include #include #include #include #include /* $begin signalprob0 */ pid_t pid; int counter = 2; void handler1(int sig) { counter = counter - 1; printf("%d", counter); fflush(stdout); exit(0); } int main() { signal(SIGUSR1, handler1); printf("%d", counter); fflush(stdout); if ((pid = fork()) == 0) { while(1) {}; } kill(pid, SIGUSR1); waitpid(-1, NULL, 0); counter = counter + 1; printf("%d", counter); exit(0); } /* $end signalprob0 */ ================================================ FILE: code/ecf/waitpid1.c ================================================ /* $begin waitpid1 */ #include "csapp.h" #define N 2 int main() { int status, i; pid_t pid; /* Parent creates N children */ for (i = 0; i < N; i++) //line:ecf:waitpid1:for if ((pid = Fork()) == 0) /* child */ //line:ecf:waitpid1:fork exit(100+i); //line:ecf:waitpid1:exit /* Parent reaps N children in no particular order */ while ((pid = waitpid(-1, &status, 0)) > 0) { //line:ecf:waitpid1:waitpid if (WIFEXITED(status)) //line:ecf:waitpid1:wifexited printf("child %d terminated normally with exit status=%d\n", pid, WEXITSTATUS(status)); //line:ecf:waitpid1:wexitstatus else printf("child %d terminated abnormally\n", pid); } /* The only normal termination is if there are no more children */ if (errno != ECHILD) //line:ecf:waitpid1:errno unix_error("waitpid error"); exit(0); } /* $end waitpid1 */ ================================================ FILE: code/ecf/waitpid2.c ================================================ /* $begin waitpid2 */ #include "csapp.h" #define N 2 int main() { int status, i; pid_t pid[N], retpid; /* Parent creates N children */ for (i = 0; i < N; i++) if ((pid[i] = Fork()) == 0) /* Child */ //line:ecf:waitpid2:fork exit(100+i); /* Parent reaps N children in order */ i = 0; while ((retpid = waitpid(pid[i++], &status, 0)) > 0) { //line:ecf:waitpid2:waitpid if (WIFEXITED(status)) printf("child %d terminated normally with exit status=%d\n", retpid, WEXITSTATUS(status)); else printf("child %d terminated abnormally\n", retpid); } /* The only normal termination is if there are no more children */ if (errno != ECHILD) unix_error("waitpid error"); exit(0); } /* $end waitpid2 */ ================================================ FILE: code/ecf/waitprob0.c ================================================ #include "csapp.h" /* $begin waitprob0 */ int main() { if (Fork() == 0) { printf("a"); } else { printf("b"); waitpid(-1, NULL, 0); } printf("c"); exit(0); } /* $end waitprob0 */ ================================================ FILE: code/ecf/waitprob1.c ================================================ #include "csapp.h" /* $begin waitprob1 */ int main() { int status; pid_t pid; printf("Hello\n"); pid = Fork(); printf("%d\n", !pid); if (pid != 0) { if (waitpid(-1, &status, 0) > 0) { if (WIFEXITED(status) != 0) printf("%d\n", WEXITSTATUS(status)); } } printf("Bye\n"); exit(2); } /* $end waitprob1 */ ================================================ FILE: code/ecf/waitprob3.c ================================================ #include "csapp.h" /* $begin waitprob3 */ int main() { if (fork() == 0) { printf("a"); exit(0); } else { printf("b"); waitpid(-1, NULL, 0); } printf("c"); exit(0); } /* $end waitprob3 */ ================================================ FILE: code/include/csapp.h ================================================ /* $begin csapp.h */ #ifndef __CSAPP_H__ #define __CSAPP_H__ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Default file permissions are DEF_MODE & ~DEF_UMASK */ /* $begin createmasks */ #define DEF_MODE S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH #define DEF_UMASK S_IWGRP|S_IWOTH /* $end createmasks */ /* Simplifies calls to bind(), connect(), and accept() */ /* $begin sockaddrdef */ typedef struct sockaddr SA; /* $end sockaddrdef */ /* Persistent state for the robust I/O (Rio) package */ /* $begin rio_t */ #define RIO_BUFSIZE 8192 typedef struct { int rio_fd; /* descriptor for this internal buf */ int rio_cnt; /* unread bytes in internal buf */ char *rio_bufptr; /* next unread byte in internal buf */ char rio_buf[RIO_BUFSIZE]; /* internal buffer */ } rio_t; /* $end rio_t */ /* External variables */ extern int h_errno; /* defined by BIND for DNS errors */ extern char **environ; /* defined by libc */ /* Misc constants */ #define MAXLINE 8192 /* max text line length */ #define MAXBUF 8192 /* max I/O buffer size */ #define LISTENQ 1024 /* second argument to listen() */ /* Our own error-handling functions */ void unix_error(char *msg); void posix_error(int code, char *msg); void dns_error(char *msg); void app_error(char *msg); /* Process control wrappers */ pid_t Fork(void); void Execve(const char *filename, char *const argv[], char *const envp[]); pid_t Wait(int *status); pid_t Waitpid(pid_t pid, int *iptr, int options); void Kill(pid_t pid, int signum); unsigned int Sleep(unsigned int secs); void Pause(void); unsigned int Alarm(unsigned int seconds); void Setpgid(pid_t pid, pid_t pgid); pid_t Getpgrp(); /* Signal wrappers */ typedef void handler_t(int); handler_t *Signal(int signum, handler_t *handler); void Sigprocmask(int how, const sigset_t *set, sigset_t *oldset); void Sigemptyset(sigset_t *set); void Sigfillset(sigset_t *set); void Sigaddset(sigset_t *set, int signum); void Sigdelset(sigset_t *set, int signum); int Sigismember(const sigset_t *set, int signum); /* Unix I/O wrappers */ int Open(const char *pathname, int flags, mode_t mode); ssize_t Read(int fd, void *buf, size_t count); ssize_t Write(int fd, const void *buf, size_t count); off_t Lseek(int fildes, off_t offset, int whence); void Close(int fd); int Select(int n, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout); int Dup2(int fd1, int fd2); void Stat(const char *filename, struct stat *buf); void Fstat(int fd, struct stat *buf) ; /* Memory mapping wrappers */ void *Mmap(void *addr, size_t len, int prot, int flags, int fd, off_t offset); void Munmap(void *start, size_t length); /* Standard I/O wrappers */ void Fclose(FILE *fp); FILE *Fdopen(int fd, const char *type); char *Fgets(char *ptr, int n, FILE *stream); FILE *Fopen(const char *filename, const char *mode); void Fputs(const char *ptr, FILE *stream); size_t Fread(void *ptr, size_t size, size_t nmemb, FILE *stream); void Fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream); /* Dynamic storage allocation wrappers */ void *Malloc(size_t size); void *Realloc(void *ptr, size_t size); void *Calloc(size_t nmemb, size_t size); void Free(void *ptr); /* Sockets interface wrappers */ int Socket(int domain, int type, int protocol); void Setsockopt(int s, int level, int optname, const void *optval, int optlen); void Bind(int sockfd, struct sockaddr *my_addr, int addrlen); void Listen(int s, int backlog); int Accept(int s, struct sockaddr *addr, socklen_t *addrlen); void Connect(int sockfd, struct sockaddr *serv_addr, int addrlen); /* DNS wrappers */ struct hostent *Gethostbyname(const char *name); struct hostent *Gethostbyaddr(const char *addr, int len, int type); /* Pthreads thread control wrappers */ void Pthread_create(pthread_t *tidp, pthread_attr_t *attrp, void * (*routine)(void *), void *argp); void Pthread_join(pthread_t tid, void **thread_return); void Pthread_cancel(pthread_t tid); void Pthread_detach(pthread_t tid); void Pthread_exit(void *retval); pthread_t Pthread_self(void); void Pthread_once(pthread_once_t *once_control, void (*init_function)()); /* POSIX semaphore wrappers */ void Sem_init(sem_t *sem, int pshared, unsigned int value); void P(sem_t *sem); void V(sem_t *sem); /* Rio (Robust I/O) package */ ssize_t rio_readn(int fd, void *usrbuf, size_t n); ssize_t rio_writen(int fd, void *usrbuf, size_t n); void rio_readinitb(rio_t *rp, int fd); ssize_t rio_readnb(rio_t *rp, void *usrbuf, size_t n); ssize_t rio_readlineb(rio_t *rp, void *usrbuf, size_t maxlen); /* Wrappers for Rio package */ ssize_t Rio_readn(int fd, void *usrbuf, size_t n); void Rio_writen(int fd, void *usrbuf, size_t n); void Rio_readinitb(rio_t *rp, int fd); ssize_t Rio_readnb(rio_t *rp, void *usrbuf, size_t n); ssize_t Rio_readlineb(rio_t *rp, void *usrbuf, size_t maxlen); /* Client/server helper functions */ int open_clientfd(char *hostname, int portno); int open_listenfd(int portno); /* Wrappers for client/server helper functions */ int Open_clientfd(char *hostname, int port); int Open_listenfd(int port); #endif /* __CSAPP_H__ */ /* $end csapp.h */ ================================================ FILE: code/intro/hello.c ================================================ /* $begin hello */ #include int main() { printf("hello, world\n"); } /* $end hello */ ================================================ FILE: code/io/cpfile.c ================================================ /* $begin cpfile */ #include "csapp.h" int main(int argc, char **argv) { int n; rio_t rio; char buf[MAXLINE]; Rio_readinitb(&rio, STDIN_FILENO); while((n = Rio_readlineb(&rio, buf, MAXLINE)) != 0) Rio_writen(STDOUT_FILENO, buf, n); /* $end cpfile */ exit(0); /* $begin cpfile */ } /* $end cpfile */ ================================================ FILE: code/io/cpstdin.c ================================================ /* $begin cpstdin */ #include "csapp.h" int main(void) { char c; while(Read(STDIN_FILENO, &c, 1) != 0) Write(STDOUT_FILENO, &c, 1); exit(0); } /* $end cpstdin */ ================================================ FILE: code/io/statcheck.c ================================================ /* $begin statcheck */ #include "csapp.h" int main (int argc, char **argv) { struct stat stat; char *type, *readok; /* $end statcheck */ if (argc != 2) { fprintf(stderr, "usage: %s \n", argv[0]); exit(0); } /* $begin statcheck */ Stat(argv[1], &stat); if (S_ISREG(stat.st_mode)) /* Determine file type */ type = "regular"; else if (S_ISDIR(stat.st_mode)) type = "directory"; else type = "other"; if ((stat.st_mode & S_IRUSR)) /* Check read access */ readok = "yes"; else readok = "no"; printf("type: %s, read: %s\n", type, readok); exit(0); } /* $end statcheck */ ================================================ FILE: code/link/addvec.c ================================================ /* addvec.c */ /* $begin addvec */ void addvec(int *x, int *y, int *z, int n) { int i; for (i = 0; i < n; i++) z[i] = x[i] + y[i]; } /* $end addvec */ ================================================ FILE: code/link/dll.c ================================================ /* $begin dll */ #include #include #include int x[2] = {1, 2}; int y[2] = {3, 4}; int z[2]; int main() { void *handle; void (*addvec)(int *, int *, int *, int); char *error; /* dynamically load the shared library that contains addvec() */ handle = dlopen("./libvector.so", RTLD_LAZY); if (!handle) { fprintf(stderr, "%s\n", dlerror()); exit(1); } /* get a pointer to the addvec() function we just loaded */ addvec = dlsym(handle, "addvec"); if ((error = dlerror()) != NULL) { fprintf(stderr, "%s\n", error); exit(1); } /* Now we can call addvec() it just like any other function */ addvec(x, y, z, 2); printf("z = [%d %d]\n", z[0], z[1]); /* unload the shared library */ if (dlclose(handle) < 0) { fprintf(stderr, "%s\n", dlerror()); exit(1); } return 0; } /* $end dll */ ================================================ FILE: code/link/elfstructs.c ================================================ /* $begin elfsymbol */ typedef struct { int name; /* string table offset */ int value; /* section offset, or VM address */ int size; /* object size in bytes */ char type:4, /* data, func, section, or src file name (4 bits) */ binding:4; /* local or global (4 bits) */ char reserved; /* unused */ char section; /* section header index, ABS, UNDEF, */ /* or COMMON */ } Elf_Symbol; /* $end elfsymbol */ /* $begin elfrelo */ typedef struct { int offset; /* offset of the reference to relocate */ int symbol:24, /* symbol the reference should point to */ type:8; /* relocation type */ } Elf32_Rel; /* $end elfrelo */ ================================================ FILE: code/link/main.c ================================================ /* $begin main */ /* main.c */ void swap(); int buf[2] = {1, 2}; int main() { swap(); return 0; } /* $end main */ ================================================ FILE: code/link/main2.c ================================================ /* $begin main2 */ /* main2.c */ #include #include "vector.h" int x[2] = {1, 2}; int y[2] = {3, 4}; int z[2]; int main() { addvec(x, y, z, 2); printf("z = [%d %d]\n", z[0], z[1]); return 0; } /* $end main2 */ ================================================ FILE: code/link/multvec.c ================================================ /* multvec.c */ /* $begin multvec */ void multvec(int *x, int *y, int *z, int n) { int i; for (i = 0; i < n; i++) z[i] = x[i] * y[i]; } /* $end multvec */ ================================================ FILE: code/link/p-exe.d ================================================ p: file format elf32-i386 p architecture: i386, flags 0x00000112: EXEC_P, HAS_SYMS, D_PAGED start address 0x080482f0 Program Header: PHDR off 0x00000034 vaddr 0x08048034 paddr 0x08048034 align 2**2 filesz 0x000000c0 memsz 0x000000c0 flags r-x INTERP off 0x000000f4 vaddr 0x080480f4 paddr 0x080480f4 align 2**0 filesz 0x00000013 memsz 0x00000013 flags r-- LOAD off 0x00000000 vaddr 0x08048000 paddr 0x08048000 align 2**12 filesz 0x00000448 memsz 0x00000448 flags r-x LOAD off 0x00000448 vaddr 0x08049448 paddr 0x08049448 align 2**12 filesz 0x000000e8 memsz 0x00000104 flags rw- DYNAMIC off 0x00000490 vaddr 0x08049490 paddr 0x08049490 align 2**2 filesz 0x000000a0 memsz 0x000000a0 flags rw- NOTE off 0x00000108 vaddr 0x08048108 paddr 0x08048108 align 2**2 filesz 0x00000020 memsz 0x00000020 flags r-- Dynamic Section: NEEDED libc.so.6 INIT 0x8048274 FINI 0x8048424 HASH 0x8048128 STRTAB 0x80481b4 SYMTAB 0x8048154 STRSZ 0x69 SYMENT 0x10 DEBUG 0x0 PLTGOT 0x8049474 PLTRELSZ 0x18 PLTREL 0x11 JMPREL 0x804825c REL 0x8048254 RELSZ 0x8 RELENT 0x8 VERNEED 0x8048234 VERNEEDNUM 0x1 VERSYM 0x8048228 Version References: required from libc.so.6: 0x0d696910 0x00 02 GLIBC_2.0 Sections: Idx Name Size VMA LMA File off Algn 0 .interp 00000013 080480f4 080480f4 000000f4 2**0 CONTENTS, ALLOC, LOAD, READONLY, DATA 1 .note.ABI-tag 00000020 08048108 08048108 00000108 2**2 CONTENTS, ALLOC, LOAD, READONLY, DATA 2 .hash 0000002c 08048128 08048128 00000128 2**2 CONTENTS, ALLOC, LOAD, READONLY, DATA 3 .dynsym 00000060 08048154 08048154 00000154 2**2 CONTENTS, ALLOC, LOAD, READONLY, DATA 4 .dynstr 00000073 080481b4 080481b4 000001b4 2**0 CONTENTS, ALLOC, LOAD, READONLY, DATA 5 .gnu.version 0000000c 08048228 08048228 00000228 2**1 CONTENTS, ALLOC, LOAD, READONLY, DATA 6 .gnu.version_r 00000020 08048234 08048234 00000234 2**2 CONTENTS, ALLOC, LOAD, READONLY, DATA 7 .rel.got 00000008 08048254 08048254 00000254 2**2 CONTENTS, ALLOC, LOAD, READONLY, DATA 8 .rel.plt 00000018 0804825c 0804825c 0000025c 2**2 CONTENTS, ALLOC, LOAD, READONLY, DATA 9 .init 0000002f 08048274 08048274 00000274 2**2 CONTENTS, ALLOC, LOAD, READONLY, CODE 10 .plt 00000040 080482a4 080482a4 000002a4 2**2 CONTENTS, ALLOC, LOAD, READONLY, CODE 11 .text 00000134 080482f0 080482f0 000002f0 2**4 CONTENTS, ALLOC, LOAD, READONLY, CODE 12 .fini 0000001a 08048424 08048424 00000424 2**2 CONTENTS, ALLOC, LOAD, READONLY, CODE 13 .rodata 00000008 08048440 08048440 00000440 2**2 CONTENTS, ALLOC, LOAD, READONLY, DATA 14 .data 00000018 08049448 08049448 00000448 2**2 CONTENTS, ALLOC, LOAD, DATA 15 .eh_frame 00000004 08049460 08049460 00000460 2**2 CONTENTS, ALLOC, LOAD, DATA 16 .ctors 00000008 08049464 08049464 00000464 2**2 CONTENTS, ALLOC, LOAD, DATA 17 .dtors 00000008 0804946c 0804946c 0000046c 2**2 CONTENTS, ALLOC, LOAD, DATA 18 .got 0000001c 08049474 08049474 00000474 2**2 CONTENTS, ALLOC, LOAD, DATA 19 .dynamic 000000a0 08049490 08049490 00000490 2**2 CONTENTS, ALLOC, LOAD, DATA 20 .bss 0000001c 08049530 08049530 00000530 2**2 ALLOC 21 .stab 00000750 00000000 00000000 00000530 2**2 CONTENTS, READONLY, DEBUGGING 22 .stabstr 0000134f 00000000 00000000 00000c80 2**0 CONTENTS, READONLY, DEBUGGING 23 .comment 0000014f 00000000 00000000 00001fcf 2**0 CONTENTS, READONLY 24 .note 0000008c 0804954c 0804954c 0000211e 2**0 CONTENTS, READONLY SYMBOL TABLE: 080480f4 l d .interp 00000000 08048108 l d .note.ABI-tag 00000000 08048128 l d .hash 00000000 08048154 l d .dynsym 00000000 080481b4 l d .dynstr 00000000 08048228 l d .gnu.version 00000000 08048234 l d .gnu.version_r 00000000 08048254 l d .rel.got 00000000 0804825c l d .rel.plt 00000000 08048274 l d .init 00000000 080482a4 l d .plt 00000000 080482f0 l d .text 00000000 08048424 l d .fini 00000000 08048440 l d .rodata 00000000 08049448 l d .data 00000000 08049460 l d .eh_frame 00000000 08049464 l d .ctors 00000000 0804946c l d .dtors 00000000 08049474 l d .got 00000000 08049490 l d .dynamic 00000000 08049530 l d .bss 00000000 00000000 l d .stab 00000000 00000000 l d .stabstr 00000000 00000000 l d .comment 00000000 0804954c l d .note 00000000 00000000 l d *ABS* 00000000 00000000 l d *ABS* 00000000 00000000 l d *ABS* 00000000 00000000 l df *ABS* 00000000 initfini.c 08048314 l .text 00000000 gcc2_compiled. 00000000 l df *ABS* 00000000 init.c 00000000 l df *ABS* 00000000 crtstuff.c 08048320 l .text 00000000 gcc2_compiled. 0804944c l O .data 00000000 p.3 0804946c l O .dtors 00000000 __DTOR_LIST__ 08049450 l O .data 00000000 completed.4 08048320 l F .text 00000000 __do_global_dtors_aux 08049460 l O .eh_frame 00000000 __EH_FRAME_BEGIN__ 08048374 l F .text 00000000 fini_dummy 08049530 l O .bss 00000018 object.11 08048380 l F .text 00000000 frame_dummy 080483a8 l F .text 00000000 init_dummy 08049454 l O .data 00000000 force_to_data 08049464 l O .ctors 00000000 __CTOR_LIST__ 00000000 l df *ABS* 00000000 crtstuff.c 080483f0 l .text 00000000 gcc2_compiled. 080483f0 l F .text 00000000 __do_global_ctors_aux 08049468 l O .ctors 00000000 __CTOR_END__ 08048418 l F .text 00000000 init_dummy 08049460 l O .data 00000000 force_to_data 08049470 l O .dtors 00000000 __DTOR_END__ 08049460 l O .eh_frame 00000000 __FRAME_END__ 00000000 l df *ABS* 00000000 initfini.c 08048424 l .text 00000000 gcc2_compiled. 00000000 l df *ABS* 00000000 main.c 080483b4 l .text 00000000 gcc2_compiled. 00000000 l df *ABS* 00000000 swap.c 080483c8 l .text 00000000 gcc2_compiled. 0804945c g O .data 00000004 bufp0 08049490 g O .dynamic 00000000 _DYNAMIC 08048424 g O *ABS* 00000000 _etext 080482b4 w F *UND* 00000074 __register_frame_info@@GLIBC_2.0 00000000 *UND* 00000004 _fp_hw 080483c8 g F .text 00000027 swap 08048274 g F .init 00000000 _init 080482c4 w F *UND* 000000a2 __deregister_frame_info@@GLIBC_2.0 080482f0 g .text 00000000 _start 08049530 g O *ABS* 00000000 __bss_start 080483b4 g F .text 00000011 main 080482d4 F *UND* 00000105 __libc_start_main@@GLIBC_2.0 08049448 w .data 00000000 data_start 08048424 g F .fini 00000000 _fini 08049454 g O .data 00000008 buf 08049530 g O *ABS* 00000000 _edata 08049474 g O .got 00000000 _GLOBAL_OFFSET_TABLE_ 0804954c g O *ABS* 00000000 _end 08048444 g O .rodata 00000004 _IO_stdin_used 08049448 g .data 00000000 __data_start 08049548 g O .bss 00000004 bufp1 00000000 w *UND* 00000000 __gmon_start__ Disassembly of section .init: 08048274 <_init>: 8048274: 55 push %ebp 8048275: 89 e5 mov %esp,%ebp 8048277: 53 push %ebx 8048278: e8 00 00 00 00 call 804827d <_init+0x9> 804827d: 5b pop %ebx 804827e: 81 c3 f7 11 00 00 add $0x11f7,%ebx 8048284: 83 bb 18 00 00 00 00 cmpl $0x0,0x18(%ebx) 804828b: 74 05 je 8048292 <_init+0x1e> 804828d: e8 6e 7d fb f7 call 0 <_init-0x8048274> 8048292: 89 f6 mov %esi,%esi 8048294: e8 e7 00 00 00 call 8048380 8048299: e8 52 01 00 00 call 80483f0 <__do_global_ctors_aux> 804829e: 8b 5d fc mov 0xfffffffc(%ebp),%ebx 80482a1: c9 leave 80482a2: c3 ret Disassembly of section .plt: 080482a4 <.plt>: 80482a4: ff 35 78 94 04 08 pushl 0x8049478 80482aa: ff 25 7c 94 04 08 jmp *0x804947c 80482b0: 00 00 add %al,(%eax) 80482b2: 00 00 add %al,(%eax) 80482b4: ff 25 80 94 04 08 jmp *0x8049480 80482ba: 68 00 00 00 00 push $0x0 80482bf: e9 e0 ff ff ff jmp 80482a4 <_init+0x30> 80482c4: ff 25 84 94 04 08 jmp *0x8049484 80482ca: 68 08 00 00 00 push $0x8 80482cf: e9 d0 ff ff ff jmp 80482a4 <_init+0x30> 80482d4: ff 25 88 94 04 08 jmp *0x8049488 80482da: 68 10 00 00 00 push $0x10 80482df: e9 c0 ff ff ff jmp 80482a4 <_init+0x30> Disassembly of section .text: 080482f0 <_start>: 80482f0: 31 ed xor %ebp,%ebp 80482f2: 5e pop %esi 80482f3: 89 e1 mov %esp,%ecx 80482f5: 83 e4 f8 and $0xfffffff8,%esp 80482f8: 50 push %eax 80482f9: 54 push %esp 80482fa: 52 push %edx 80482fb: 68 24 84 04 08 push $0x8048424 8048300: 68 74 82 04 08 push $0x8048274 8048305: 51 push %ecx 8048306: 56 push %esi 8048307: 68 b4 83 04 08 push $0x80483b4 804830c: e8 c3 ff ff ff call 80482d4 <_init+0x60> 8048311: f4 hlt 8048312: 90 nop 8048313: 90 nop 08048314 : 8048314: 90 90 90 90 90 90 90 90 90 90 90 90 ............ 08048320 <__do_global_dtors_aux>: 8048320: 55 push %ebp 8048321: 89 e5 mov %esp,%ebp 8048323: 83 ec 08 sub $0x8,%esp 8048326: 83 3d 50 94 04 08 00 cmpl $0x0,0x8049450 804832d: 75 3e jne 804836d <__do_global_dtors_aux+0x4d> 804832f: eb 12 jmp 8048343 <__do_global_dtors_aux+0x23> 8048331: a1 4c 94 04 08 mov 0x804944c,%eax 8048336: 8d 50 04 lea 0x4(%eax),%edx 8048339: 89 15 4c 94 04 08 mov %edx,0x804944c 804833f: 8b 00 mov (%eax),%eax 8048341: ff d0 call *%eax 8048343: a1 4c 94 04 08 mov 0x804944c,%eax 8048348: 83 38 00 cmpl $0x0,(%eax) 804834b: 75 e4 jne 8048331 <__do_global_dtors_aux+0x11> 804834d: b8 c4 82 04 08 mov $0x80482c4,%eax 8048352: 85 c0 test %eax,%eax 8048354: 74 0d je 8048363 <__do_global_dtors_aux+0x43> 8048356: 83 c4 f4 add $0xfffffff4,%esp 8048359: 68 60 94 04 08 push $0x8049460 804835e: e8 61 ff ff ff call 80482c4 <_init+0x50> 8048363: c7 05 50 94 04 08 01 movl $0x1,0x8049450 804836a: 00 00 00 804836d: 89 ec mov %ebp,%esp 804836f: 5d pop %ebp 8048370: c3 ret 8048371: 8d 76 00 lea 0x0(%esi),%esi 08048374 : 8048374: 55 push %ebp 8048375: 89 e5 mov %esp,%ebp 8048377: 83 ec 08 sub $0x8,%esp 804837a: 89 ec mov %ebp,%esp 804837c: 5d pop %ebp 804837d: c3 ret 804837e: 89 f6 mov %esi,%esi 08048380 : 8048380: 55 push %ebp 8048381: 89 e5 mov %esp,%ebp 8048383: 83 ec 08 sub $0x8,%esp 8048386: b8 b4 82 04 08 mov $0x80482b4,%eax 804838b: 85 c0 test %eax,%eax 804838d: 74 12 je 80483a1 804838f: 83 c4 f8 add $0xfffffff8,%esp 8048392: 68 30 95 04 08 push $0x8049530 8048397: 68 60 94 04 08 push $0x8049460 804839c: e8 13 ff ff ff call 80482b4 <_init+0x40> 80483a1: 89 ec mov %ebp,%esp 80483a3: 5d pop %ebp 80483a4: c3 ret 80483a5: 8d 76 00 lea 0x0(%esi),%esi 080483a8 : 80483a8: 55 push %ebp 80483a9: 89 e5 mov %esp,%ebp 80483ab: 83 ec 08 sub $0x8,%esp 80483ae: 89 ec mov %ebp,%esp 80483b0: 5d pop %ebp 80483b1: c3 ret 80483b2: 90 nop 80483b3: 90 nop 080483b4
: 80483b4: 55 push %ebp 80483b5: 89 e5 mov %esp,%ebp 80483b7: 83 ec 08 sub $0x8,%esp 80483ba: e8 09 00 00 00 call 80483c8 80483bf: 31 c0 xor %eax,%eax 80483c1: 89 ec mov %ebp,%esp 80483c3: 5d pop %ebp 80483c4: c3 ret 80483c5: 90 nop 80483c6: 90 nop 80483c7: 90 nop 080483c8 : 80483c8: 55 push %ebp 80483c9: 8b 15 5c 94 04 08 mov 0x804945c,%edx 80483cf: a1 58 94 04 08 mov 0x8049458,%eax 80483d4: 89 e5 mov %esp,%ebp 80483d6: c7 05 48 95 04 08 58 movl $0x8049458,0x8049548 80483dd: 94 04 08 80483e0: 89 ec mov %ebp,%esp 80483e2: 8b 0a mov (%edx),%ecx 80483e4: 89 02 mov %eax,(%edx) 80483e6: a1 48 95 04 08 mov 0x8049548,%eax 80483eb: 89 08 mov %ecx,(%eax) 80483ed: 5d pop %ebp 80483ee: c3 ret 80483ef: 90 nop 080483f0 <__do_global_ctors_aux>: 80483f0: 55 push %ebp 80483f1: 89 e5 mov %esp,%ebp 80483f3: 83 ec 14 sub $0x14,%esp 80483f6: 53 push %ebx 80483f7: bb 64 94 04 08 mov $0x8049464,%ebx 80483fc: 83 3d 64 94 04 08 ff cmpl $0xffffffff,0x8049464 8048403: 74 0c je 8048411 <__do_global_ctors_aux+0x21> 8048405: 8b 03 mov (%ebx),%eax 8048407: ff d0 call *%eax 8048409: 83 c3 fc add $0xfffffffc,%ebx 804840c: 83 3b ff cmpl $0xffffffff,(%ebx) 804840f: 75 f4 jne 8048405 <__do_global_ctors_aux+0x15> 8048411: 5b pop %ebx 8048412: 89 ec mov %ebp,%esp 8048414: 5d pop %ebp 8048415: c3 ret 8048416: 89 f6 mov %esi,%esi 08048418 : 8048418: 55 push %ebp 8048419: 89 e5 mov %esp,%ebp 804841b: 83 ec 08 sub $0x8,%esp 804841e: 89 ec mov %ebp,%esp 8048420: 5d pop %ebp 8048421: c3 ret 8048422: 90 nop 8048423: 90 nop Disassembly of section .fini: 08048424 <_fini>: 8048424: 55 push %ebp 8048425: 89 e5 mov %esp,%ebp 8048427: 53 push %ebx 8048428: e8 00 00 00 00 call 804842d <_etext+0x9> 804842d: 5b pop %ebx 804842e: 81 c3 47 10 00 00 add $0x1047,%ebx 8048434: e8 e7 fe ff ff call 8048320 <__do_global_dtors_aux> 8048439: 8b 5d fc mov 0xfffffffc(%ebp),%ebx 804843c: c9 leave 804843d: c3 ret ================================================ FILE: code/link/pdata-exe.d ================================================ p: file format elf32-i386 p architecture: i386, flags 0x00000112: EXEC_P, HAS_SYMS, D_PAGED start address 0x080482f0 Program Header: PHDR off 0x00000034 vaddr 0x08048034 paddr 0x08048034 align 2**2 filesz 0x000000c0 memsz 0x000000c0 flags r-x INTERP off 0x000000f4 vaddr 0x080480f4 paddr 0x080480f4 align 2**0 filesz 0x00000013 memsz 0x00000013 flags r-- LOAD off 0x00000000 vaddr 0x08048000 paddr 0x08048000 align 2**12 filesz 0x00000448 memsz 0x00000448 flags r-x LOAD off 0x00000448 vaddr 0x08049448 paddr 0x08049448 align 2**12 filesz 0x000000e8 memsz 0x00000104 flags rw- DYNAMIC off 0x00000490 vaddr 0x08049490 paddr 0x08049490 align 2**2 filesz 0x000000a0 memsz 0x000000a0 flags rw- NOTE off 0x00000108 vaddr 0x08048108 paddr 0x08048108 align 2**2 filesz 0x00000020 memsz 0x00000020 flags r-- Dynamic Section: NEEDED libc.so.6 INIT 0x8048274 FINI 0x8048424 HASH 0x8048128 STRTAB 0x80481b4 SYMTAB 0x8048154 STRSZ 0x69 SYMENT 0x10 DEBUG 0x0 PLTGOT 0x8049474 PLTRELSZ 0x18 PLTREL 0x11 JMPREL 0x804825c REL 0x8048254 RELSZ 0x8 RELENT 0x8 VERNEED 0x8048234 VERNEEDNUM 0x1 VERSYM 0x8048228 Version References: required from libc.so.6: 0x0d696910 0x00 02 GLIBC_2.0 Sections: Idx Name Size VMA LMA File off Algn 0 .interp 00000013 080480f4 080480f4 000000f4 2**0 CONTENTS, ALLOC, LOAD, READONLY, DATA 1 .note.ABI-tag 00000020 08048108 08048108 00000108 2**2 CONTENTS, ALLOC, LOAD, READONLY, DATA 2 .hash 0000002c 08048128 08048128 00000128 2**2 CONTENTS, ALLOC, LOAD, READONLY, DATA 3 .dynsym 00000060 08048154 08048154 00000154 2**2 CONTENTS, ALLOC, LOAD, READONLY, DATA 4 .dynstr 00000073 080481b4 080481b4 000001b4 2**0 CONTENTS, ALLOC, LOAD, READONLY, DATA 5 .gnu.version 0000000c 08048228 08048228 00000228 2**1 CONTENTS, ALLOC, LOAD, READONLY, DATA 6 .gnu.version_r 00000020 08048234 08048234 00000234 2**2 CONTENTS, ALLOC, LOAD, READONLY, DATA 7 .rel.got 00000008 08048254 08048254 00000254 2**2 CONTENTS, ALLOC, LOAD, READONLY, DATA 8 .rel.plt 00000018 0804825c 0804825c 0000025c 2**2 CONTENTS, ALLOC, LOAD, READONLY, DATA 9 .init 0000002f 08048274 08048274 00000274 2**2 CONTENTS, ALLOC, LOAD, READONLY, CODE 10 .plt 00000040 080482a4 080482a4 000002a4 2**2 CONTENTS, ALLOC, LOAD, READONLY, CODE 11 .text 00000134 080482f0 080482f0 000002f0 2**4 CONTENTS, ALLOC, LOAD, READONLY, CODE 12 .fini 0000001a 08048424 08048424 00000424 2**2 CONTENTS, ALLOC, LOAD, READONLY, CODE 13 .rodata 00000008 08048440 08048440 00000440 2**2 CONTENTS, ALLOC, LOAD, READONLY, DATA 14 .data 00000018 08049448 08049448 00000448 2**2 CONTENTS, ALLOC, LOAD, DATA 15 .eh_frame 00000004 08049460 08049460 00000460 2**2 CONTENTS, ALLOC, LOAD, DATA 16 .ctors 00000008 08049464 08049464 00000464 2**2 CONTENTS, ALLOC, LOAD, DATA 17 .dtors 00000008 0804946c 0804946c 0000046c 2**2 CONTENTS, ALLOC, LOAD, DATA 18 .got 0000001c 08049474 08049474 00000474 2**2 CONTENTS, ALLOC, LOAD, DATA 19 .dynamic 000000a0 08049490 08049490 00000490 2**2 CONTENTS, ALLOC, LOAD, DATA 20 .bss 0000001c 08049530 08049530 00000530 2**2 ALLOC 21 .stab 00000750 00000000 00000000 00000530 2**2 CONTENTS, READONLY, DEBUGGING 22 .stabstr 0000134f 00000000 00000000 00000c80 2**0 CONTENTS, READONLY, DEBUGGING 23 .comment 0000014f 00000000 00000000 00001fcf 2**0 CONTENTS, READONLY 24 .note 0000008c 0804954c 0804954c 0000211e 2**0 CONTENTS, READONLY SYMBOL TABLE: 080480f4 l d .interp 00000000 08048108 l d .note.ABI-tag 00000000 08048128 l d .hash 00000000 08048154 l d .dynsym 00000000 080481b4 l d .dynstr 00000000 08048228 l d .gnu.version 00000000 08048234 l d .gnu.version_r 00000000 08048254 l d .rel.got 00000000 0804825c l d .rel.plt 00000000 08048274 l d .init 00000000 080482a4 l d .plt 00000000 080482f0 l d .text 00000000 08048424 l d .fini 00000000 08048440 l d .rodata 00000000 08049448 l d .data 00000000 08049460 l d .eh_frame 00000000 08049464 l d .ctors 00000000 0804946c l d .dtors 00000000 08049474 l d .got 00000000 08049490 l d .dynamic 00000000 08049530 l d .bss 00000000 00000000 l d .stab 00000000 00000000 l d .stabstr 00000000 00000000 l d .comment 00000000 0804954c l d .note 00000000 00000000 l d *ABS* 00000000 00000000 l d *ABS* 00000000 00000000 l d *ABS* 00000000 00000000 l df *ABS* 00000000 initfini.c 08048314 l .text 00000000 gcc2_compiled. 00000000 l df *ABS* 00000000 init.c 00000000 l df *ABS* 00000000 crtstuff.c 08048320 l .text 00000000 gcc2_compiled. 0804944c l O .data 00000000 p.3 0804946c l O .dtors 00000000 __DTOR_LIST__ 08049450 l O .data 00000000 completed.4 08048320 l F .text 00000000 __do_global_dtors_aux 08049460 l O .eh_frame 00000000 __EH_FRAME_BEGIN__ 08048374 l F .text 00000000 fini_dummy 08049530 l O .bss 00000018 object.11 08048380 l F .text 00000000 frame_dummy 080483a8 l F .text 00000000 init_dummy 08049454 l O .data 00000000 force_to_data 08049464 l O .ctors 00000000 __CTOR_LIST__ 00000000 l df *ABS* 00000000 crtstuff.c 080483f0 l .text 00000000 gcc2_compiled. 080483f0 l F .text 00000000 __do_global_ctors_aux 08049468 l O .ctors 00000000 __CTOR_END__ 08048418 l F .text 00000000 init_dummy 08049460 l O .data 00000000 force_to_data 08049470 l O .dtors 00000000 __DTOR_END__ 08049460 l O .eh_frame 00000000 __FRAME_END__ 00000000 l df *ABS* 00000000 initfini.c 08048424 l .text 00000000 gcc2_compiled. 00000000 l df *ABS* 00000000 main.c 080483b4 l .text 00000000 gcc2_compiled. 00000000 l df *ABS* 00000000 swap.c 080483c8 l .text 00000000 gcc2_compiled. 0804945c g O .data 00000004 bufp0 08049490 g O .dynamic 00000000 _DYNAMIC 08048424 g O *ABS* 00000000 _etext 080482b4 w F *UND* 00000074 __register_frame_info@@GLIBC_2.0 00000000 *UND* 00000004 _fp_hw 080483c8 g F .text 00000027 swap 08048274 g F .init 00000000 _init 080482c4 w F *UND* 000000a2 __deregister_frame_info@@GLIBC_2.0 080482f0 g .text 00000000 _start 08049530 g O *ABS* 00000000 __bss_start 080483b4 g F .text 00000011 main 080482d4 F *UND* 00000105 __libc_start_main@@GLIBC_2.0 08049448 w .data 00000000 data_start 08048424 g F .fini 00000000 _fini 08049454 g O .data 00000008 buf 08049530 g O *ABS* 00000000 _edata 08049474 g O .got 00000000 _GLOBAL_OFFSET_TABLE_ 0804954c g O *ABS* 00000000 _end 08048444 g O .rodata 00000004 _IO_stdin_used 08049448 g .data 00000000 __data_start 08049548 g O .bss 00000004 bufp1 00000000 w *UND* 00000000 __gmon_start__ Disassembly of section .data: 08049448 <__data_start>: 8049448: 00 00 add %al,(%eax) ... 0804944c : 804944c: 70 94 04 08 p... 08049450 : 8049450: 00 00 00 00 .... 08049454 : 8049454: 01 00 00 00 02 00 00 00 ........ 0804945c : 804945c: 54 94 04 08 T... ================================================ FILE: code/link/swap.c ================================================ /* $begin swap */ /* swap.c */ extern int buf[]; int *bufp0 = &buf[0]; int *bufp1; void swap() { int temp; bufp1 = &buf[1]; temp = *bufp0; *bufp0 = *bufp1; *bufp1 = temp; } /* $end swap */ ================================================ FILE: code/mem/matmult/mm.c ================================================ /* matrix multiply permutations */ #include #include #include "mm.h" #include "fcycmm.h" #include "clock.h" /* whether or not fcyc should clear the cache */ #define CLEARCACHE 0 /* global arrays */ array ga, gb, gc; /* check the result array for correctness */ void checkresult(array c, int n) { int i, j; for (i = 0; i < n; i++) for (j = 0; j < n; j++) if (c[i][j] != (double)n) { printf("Error: bad number (%f) in result matrix (%d,%d)\n", c[i][j], i, j); fflush(stdout); exit(0); } } /* Run f and return clocks per inner loop iteration */ double run(test_funct f, int n) { double cpi; cpi = fcyc(f, n, CLEARCACHE) / (n*n*n); checkresult(gc, n); return(cpi); } /* reset result array to zero */ void reset(array c, int n) { int i,j; for (i = 0; i < n; i++) { for (j = 0; j < n; j++) { c[i][j] = 0.0; } } } /* initialize input arrays to 1 */ void init(array a, array b, int n) { int i,j; for (i = 0; i < n; i++) { for (j = 0; j < n; j++) { a[i][j] = 1.0; b[i][j] = 1.0; } } } /* print an array (debug) */ void printarray(array a, int n) { int i, j; for (i = 0; i < n; i++) { for (j = 0; j < n; j++) { printf("%5.1f ", a[i][j]); } printf("\n"); } } /*********************************************** * Six different versions of matrix multiply ***********************************************/ void ijk(array A, array B, array C, int n) { int i, j, k; double sum; /* $begin mm-ijk */ for (i = 0; i < n; i++) for (j = 0; j < n; j++) { sum = 0.0; for (k = 0; k < n; k++) sum += A[i][k]*B[k][j]; C[i][j] += sum; } /* $end mm-ijk */ } void jik(array A, array B, array C, int n) { int i, j, k; double sum; /* $begin mm-jik */ for (j = 0; j < n; j++) for (i = 0; i < n; i++) { sum = 0.0; for (k = 0; k < n; k++) sum += A[i][k]*B[k][j]; C[i][j] += sum; } /* $end mm-jik */ } void ikj(array A, array B, array C, int n) { int i, j, k; double r; /* $begin mm-ikj */ for (i = 0; i < n; i++) for (k = 0; k < n; k++) { r = A[i][k]; for (j = 0; j < n; j++) C[i][j] += r*B[k][j]; } /* $end mm-ikj */ } void kij(array A, array B, array C, int n) { int i, j, k; double r; /* $begin mm-kij */ for (k = 0; k < n; k++) for (i = 0; i < n; i++) { r = A[i][k]; for (j = 0; j < n; j++) C[i][j] += r*B[k][j]; } /* $end mm-kij */ } void kji(array A, array B, array C, int n) { int i, j, k; double r; /* $begin mm-kji */ for (k = 0; k < n; k++) for (j = 0; j < n; j++) { r = B[k][j]; for (i = 0; i < n; i++) C[i][j] += A[i][k]*r; } /* $end mm-kji */ } void jki(array A, array B, array C, int n) { int i, j, k; double r; /* $begin mm-jki */ for (j = 0; j < n; j++) for (k = 0; k < n; k++) { r = B[k][j]; for (i = 0; i < n; i++) C[i][j] += A[i][k]*r; } /* $end mm-jki */ } /* * Run the six versions of matrix multiply and display performance * as clock cycles per inner loop iteration. */ int main() { int n; init(ga, gb, MAXN); printf("matmult cycles/loop iteration\n"); printf("%3s%6s%6s%6s%6s%6s%6s\n", "n", "ijk", "jik", "jki", "kji", "kij", "ikj"); fflush(stdout); for (n = MINN; n <= MAXN; n += INCN) { printf("%3d ", n); printf("%5.2f ", run(ijk, n)); printf("%5.2f ", run(jik, n)); printf("%5.2f ", run(jki, n)); printf("%5.2f ", run(kji, n)); printf("%5.2f ", run(kij, n)); printf("%5.2f ", run(ikj, n)); printf("\n"); fflush(stdout); } exit(0); } ================================================ FILE: code/mem/mountain/mountain.c ================================================ /* mountain.c - Generate the memory mountain. */ /* $begin mountainmain */ #include #include #include "fcyc2.h" /* measurement routines */ #include "clock.h" /* routines to access the cycle counter */ #define MINBYTES (1 << 11) /* Working set size ranges from 2 KB */ #define MAXBYTES (1 << 25) /* ... up to 64 MB */ #define MAXSTRIDE 64 /* Strides range from 1 to 64 elems */ #define MAXELEMS MAXBYTES/sizeof(double) /* $begin mountainfuns */ double data[MAXELEMS]; /* The global array we'll be traversing */ /* $end mountainfuns */ /* $end mountainmain */ void init_data(double *data, int n); void test(int elems, int stride); double run(int size, int stride, double Mhz); /* $begin mountainmain */ int main() { int size; /* Working set size (in bytes) */ int stride; /* Stride (in array elements) */ double Mhz; /* Clock frequency */ init_data(data, MAXELEMS); /* Initialize each element in data */ Mhz = mhz(0); /* Estimate the clock frequency */ /* $end mountainmain */ /* Not shown in the text */ printf("Clock frequency is approx. %.1f MHz\n", Mhz); printf("Memory mountain (MB/sec)\n"); printf("\t"); for (stride = 1; stride <= MAXSTRIDE; stride++) printf("s%d\t", stride); printf("\n"); /* $begin mountainmain */ for (size = MAXBYTES; size >= MINBYTES; size >>= 1) { /* $end mountainmain */ /* Not shown in the text */ if (size > (1 << 20)) printf("%dm\t", size / (1 << 20)); else printf("%dk\t", size / 1024); /* $begin mountainmain */ for (stride = 1; stride <= MAXSTRIDE; stride++) { printf("%.1f\t", run(size, stride, Mhz)); } printf("\n"); } exit(0); } /* $end mountainmain */ /* init_data - initializes the array */ void init_data(double *data, int n) { int i; for (i = 0; i < n; i++) data[i] = i; } /* $begin mountainfuns */ /* * test - Iterate over first "elems" elements of array "data" * with stride of "stride". */ void test(int elems, int stride) /* The test function */ { int i; double result = 0.0; volatile double sink; for (i = 0; i < elems; i += stride) { result += data[i]; } sink = result; /* So compiler doesn't optimize away the loop */ } /* * run - Run test(elems, stride) and return read throughput (MB/s). * "size" is in bytes, "stride" is in array elements, and * Mhz is CPU clock frequency in Mhz. */ double run(int size, int stride, double Mhz) { double cycles; int elems = size / sizeof(double); test(elems, stride); /* warm up the cache */ //line:mem:warmup cycles = fcyc2(test, elems, stride, 0); /* call test(elems,stride) */ //line:mem:fcyc return (size / stride) / (cycles / Mhz); /* convert cycles to MB/s */ //line:mem:bwcompute } /* $end mountainfuns */ ================================================ FILE: code/netp/echo.c ================================================ /* * echo - read and echo text lines until client closes connection */ /* $begin echo */ #include "csapp.h" void echo(int connfd) { size_t n; char buf[MAXLINE]; rio_t rio; Rio_readinitb(&rio, connfd); while((n = Rio_readlineb(&rio, buf, MAXLINE)) != 0) { //line:netp:echo:eof printf("server received %d bytes\n", n); Rio_writen(connfd, buf, n); } } /* $end echo */ ================================================ FILE: code/netp/echoclient.c ================================================ /* * echoclient.c - An echo client */ /* $begin echoclientmain */ #include "csapp.h" int main(int argc, char **argv) { int clientfd, port; char *host, buf[MAXLINE]; rio_t rio; if (argc != 3) { fprintf(stderr, "usage: %s \n", argv[0]); exit(0); } host = argv[1]; port = atoi(argv[2]); clientfd = Open_clientfd(host, port); Rio_readinitb(&rio, clientfd); while (Fgets(buf, MAXLINE, stdin) != NULL) { Rio_writen(clientfd, buf, strlen(buf)); Rio_readlineb(&rio, buf, MAXLINE); Fputs(buf, stdout); } Close(clientfd); //line:netp:echoclient:close exit(0); } /* $end echoclientmain */ ================================================ FILE: code/netp/echoserveri.c ================================================ /* * echoserveri.c - An iterative echo server */ /* $begin echoserverimain */ #include "csapp.h" void echo(int connfd); int main(int argc, char **argv) { int listenfd, connfd, port, clientlen; struct sockaddr_in clientaddr; struct hostent *hp; char *haddrp; if (argc != 2) { fprintf(stderr, "usage: %s \n", argv[0]); exit(0); } port = atoi(argv[1]); listenfd = Open_listenfd(port); while (1) { clientlen = sizeof(clientaddr); connfd = Accept(listenfd, (SA *)&clientaddr, &clientlen); /* determine the domain name and IP address of the client */ hp = Gethostbyaddr((const char *)&clientaddr.sin_addr.s_addr, sizeof(clientaddr.sin_addr.s_addr), AF_INET); haddrp = inet_ntoa(clientaddr.sin_addr); printf("server connected to %s (%s)\n", hp->h_name, haddrp); echo(connfd); Close(connfd); } exit(0); } /* $end echoserverimain */ ================================================ FILE: code/netp/hostinfo.c ================================================ /* $begin hostinfo */ #include "csapp.h" int main(int argc, char **argv) { char **pp; struct in_addr addr; struct hostent *hostp; if (argc != 2) { fprintf(stderr, "usage: %s \n", argv[0]); exit(0); } if (inet_aton(argv[1], &addr) != 0) hostp = Gethostbyaddr((const char *)&addr, sizeof(addr), AF_INET); else hostp = Gethostbyname(argv[1]); printf("official hostname: %s\n", hostp->h_name); for (pp = hostp->h_aliases; *pp != NULL; pp++) printf("alias: %s\n", *pp); for (pp = hostp->h_addr_list; *pp != NULL; pp++) { addr.s_addr = ((struct in_addr *)*pp)->s_addr; printf("address: %s\n", inet_ntoa(addr)); } exit(0); } /* $end hostinfo */ ================================================ FILE: code/netp/tiny/cgi-bin/adder.c ================================================ /* * adder.c - a minimal CGI program that adds two numbers together */ /* $begin adder */ #include "csapp.h" int main(void) { char *buf, *p; char arg1[MAXLINE], arg2[MAXLINE], content[MAXLINE]; int n1=0, n2=0; /* Extract the two arguments */ if ((buf = getenv("QUERY_STRING")) != NULL) { p = strchr(buf, '&'); *p = '\0'; strcpy(arg1, buf); strcpy(arg2, p+1); n1 = atoi(arg1); n2 = atoi(arg2); } /* Make the response body */ sprintf(content, "Welcome to add.com: "); sprintf(content, "%sTHE Internet addition portal.\r\n

", content); sprintf(content, "%sThe answer is: %d + %d = %d\r\n

", content, n1, n2, n1 + n2); sprintf(content, "%sThanks for visiting!\r\n", content); /* Generate the HTTP response */ printf("Content-length: %d\r\n", (int)strlen(content)); printf("Content-type: text/html\r\n\r\n"); printf("%s", content); fflush(stdout); exit(0); } /* $end adder */ ================================================ FILE: code/netp/tiny/tiny.c ================================================ /* $begin tinymain */ /* * tiny.c - A simple, iterative HTTP/1.0 Web server that uses the * GET method to serve static and dynamic content. */ #include "csapp.h" void doit(int fd); void read_requesthdrs(rio_t *rp); int parse_uri(char *uri, char *filename, char *cgiargs); void serve_static(int fd, char *filename, int filesize); void get_filetype(char *filename, char *filetype); void serve_dynamic(int fd, char *filename, char *cgiargs); void clienterror(int fd, char *cause, char *errnum, char *shortmsg, char *longmsg); int main(int argc, char **argv) { int listenfd, connfd, port, clientlen; struct sockaddr_in clientaddr; /* Check command line args */ if (argc != 2) { fprintf(stderr, "usage: %s \n", argv[0]); exit(1); } port = atoi(argv[1]); listenfd = Open_listenfd(port); while (1) { clientlen = sizeof(clientaddr); connfd = Accept(listenfd, (SA *)&clientaddr, &clientlen); //line:netp:tiny:accept doit(connfd); //line:netp:tiny:doit Close(connfd); //line:netp:tiny:close } } /* $end tinymain */ /* * doit - handle one HTTP request/response transaction */ /* $begin doit */ void doit(int fd) { int is_static; struct stat sbuf; char buf[MAXLINE], method[MAXLINE], uri[MAXLINE], version[MAXLINE]; char filename[MAXLINE], cgiargs[MAXLINE]; rio_t rio; /* Read request line and headers */ Rio_readinitb(&rio, fd); Rio_readlineb(&rio, buf, MAXLINE); //line:netp:doit:readrequest sscanf(buf, "%s %s %s", method, uri, version); //line:netp:doit:parserequest if (strcasecmp(method, "GET")) { //line:netp:doit:beginrequesterr clienterror(fd, method, "501", "Not Implemented", "Tiny does not implement this method"); return; } //line:netp:doit:endrequesterr read_requesthdrs(&rio); //line:netp:doit:readrequesthdrs /* Parse URI from GET request */ is_static = parse_uri(uri, filename, cgiargs); //line:netp:doit:staticcheck if (stat(filename, &sbuf) < 0) { //line:netp:doit:beginnotfound clienterror(fd, filename, "404", "Not found", "Tiny couldn't find this file"); return; } //line:netp:doit:endnotfound if (is_static) { /* Serve static content */ if (!(S_ISREG(sbuf.st_mode)) || !(S_IRUSR & sbuf.st_mode)) { //line:netp:doit:readable clienterror(fd, filename, "403", "Forbidden", "Tiny couldn't read the file"); return; } serve_static(fd, filename, sbuf.st_size); //line:netp:doit:servestatic } else { /* Serve dynamic content */ if (!(S_ISREG(sbuf.st_mode)) || !(S_IXUSR & sbuf.st_mode)) { //line:netp:doit:executable clienterror(fd, filename, "403", "Forbidden", "Tiny couldn't run the CGI program"); return; } serve_dynamic(fd, filename, cgiargs); //line:netp:doit:servedynamic } } /* $end doit */ /* * read_requesthdrs - read and parse HTTP request headers */ /* $begin read_requesthdrs */ void read_requesthdrs(rio_t *rp) { char buf[MAXLINE]; Rio_readlineb(rp, buf, MAXLINE); while(strcmp(buf, "\r\n")) { //line:netp:readhdrs:checkterm Rio_readlineb(rp, buf, MAXLINE); printf("%s", buf); } return; } /* $end read_requesthdrs */ /* * parse_uri - parse URI into filename and CGI args * return 0 if dynamic content, 1 if static */ /* $begin parse_uri */ int parse_uri(char *uri, char *filename, char *cgiargs) { char *ptr; if (!strstr(uri, "cgi-bin")) { /* Static content */ //line:netp:parseuri:isstatic strcpy(cgiargs, ""); //line:netp:parseuri:clearcgi strcpy(filename, "."); //line:netp:parseuri:beginconvert1 strcat(filename, uri); //line:netp:parseuri:endconvert1 if (uri[strlen(uri)-1] == '/') //line:netp:parseuri:slashcheck strcat(filename, "home.html"); //line:netp:parseuri:appenddefault return 1; } else { /* Dynamic content */ //line:netp:parseuri:isdynamic ptr = index(uri, '?'); //line:netp:parseuri:beginextract if (ptr) { strcpy(cgiargs, ptr+1); *ptr = '\0'; } else strcpy(cgiargs, ""); //line:netp:parseuri:endextract strcpy(filename, "."); //line:netp:parseuri:beginconvert2 strcat(filename, uri); //line:netp:parseuri:endconvert2 return 0; } } /* $end parse_uri */ /* * serve_static - copy a file back to the client */ /* $begin serve_static */ void serve_static(int fd, char *filename, int filesize) { int srcfd; char *srcp, filetype[MAXLINE], buf[MAXBUF]; /* Send response headers to client */ get_filetype(filename, filetype); //line:netp:servestatic:getfiletype sprintf(buf, "HTTP/1.0 200 OK\r\n"); //line:netp:servestatic:beginserve sprintf(buf, "%sServer: Tiny Web Server\r\n", buf); sprintf(buf, "%sContent-length: %d\r\n", buf, filesize); sprintf(buf, "%sContent-type: %s\r\n\r\n", buf, filetype); Rio_writen(fd, buf, strlen(buf)); //line:netp:servestatic:endserve /* Send response body to client */ srcfd = Open(filename, O_RDONLY, 0); //line:netp:servestatic:open srcp = Mmap(0, filesize, PROT_READ, MAP_PRIVATE, srcfd, 0);//line:netp:servestatic:mmap Close(srcfd); //line:netp:servestatic:close Rio_writen(fd, srcp, filesize); //line:netp:servestatic:write Munmap(srcp, filesize); //line:netp:servestatic:munmap } /* * get_filetype - derive file type from file name */ void get_filetype(char *filename, char *filetype) { if (strstr(filename, ".html")) strcpy(filetype, "text/html"); else if (strstr(filename, ".gif")) strcpy(filetype, "image/gif"); else if (strstr(filename, ".jpg")) strcpy(filetype, "image/jpeg"); else strcpy(filetype, "text/plain"); } /* $end serve_static */ /* * serve_dynamic - run a CGI program on behalf of the client */ /* $begin serve_dynamic */ void serve_dynamic(int fd, char *filename, char *cgiargs) { char buf[MAXLINE], *emptylist[] = { NULL }; /* Return first part of HTTP response */ sprintf(buf, "HTTP/1.0 200 OK\r\n"); Rio_writen(fd, buf, strlen(buf)); sprintf(buf, "Server: Tiny Web Server\r\n"); Rio_writen(fd, buf, strlen(buf)); if (Fork() == 0) { /* child */ //line:netp:servedynamic:fork /* Real server would set all CGI vars here */ setenv("QUERY_STRING", cgiargs, 1); //line:netp:servedynamic:setenv Dup2(fd, STDOUT_FILENO); /* Redirect stdout to client */ //line:netp:servedynamic:dup2 Execve(filename, emptylist, environ); /* Run CGI program */ //line:netp:servedynamic:execve } Wait(NULL); /* Parent waits for and reaps child */ //line:netp:servedynamic:wait } /* $end serve_dynamic */ /* * clienterror - returns an error message to the client */ /* $begin clienterror */ void clienterror(int fd, char *cause, char *errnum, char *shortmsg, char *longmsg) { char buf[MAXLINE], body[MAXBUF]; /* Build the HTTP response body */ sprintf(body, "Tiny Error"); sprintf(body, "%s\r\n", body); sprintf(body, "%s%s: %s\r\n", body, errnum, shortmsg); sprintf(body, "%s

%s: %s\r\n", body, longmsg, cause); sprintf(body, "%s


The Tiny Web server\r\n", body); /* Print the HTTP response */ sprintf(buf, "HTTP/1.0 %s %s\r\n", errnum, shortmsg); Rio_writen(fd, buf, strlen(buf)); sprintf(buf, "Content-type: text/html\r\n"); Rio_writen(fd, buf, strlen(buf)); sprintf(buf, "Content-length: %d\r\n\r\n", (int)strlen(body)); Rio_writen(fd, buf, strlen(buf)); Rio_writen(fd, body, strlen(body)); } /* $end clienterror */ ================================================ FILE: code/src/csapp.c ================================================ /* $begin csapp.c */ #include "csapp.h" /************************** * Error-handling functions **************************/ /* $begin errorfuns */ /* $begin unixerror */ void unix_error(char *msg) /* unix-style error */ { fprintf(stderr, "%s: %s\n", msg, strerror(errno)); exit(0); } /* $end unixerror */ void posix_error(int code, char *msg) /* posix-style error */ { fprintf(stderr, "%s: %s\n", msg, strerror(code)); exit(0); } void dns_error(char *msg) /* dns-style error */ { fprintf(stderr, "%s: DNS error %d\n", msg, h_errno); exit(0); } void app_error(char *msg) /* application error */ { fprintf(stderr, "%s\n", msg); exit(0); } /* $end errorfuns */ /********************************************* * Wrappers for Unix process control functions ********************************************/ /* $begin forkwrapper */ pid_t Fork(void) { pid_t pid; if ((pid = fork()) < 0) unix_error("Fork error"); return pid; } /* $end forkwrapper */ void Execve(const char *filename, char *const argv[], char *const envp[]) { if (execve(filename, argv, envp) < 0) unix_error("Execve error"); } /* $begin wait */ pid_t Wait(int *status) { pid_t pid; if ((pid = wait(status)) < 0) unix_error("Wait error"); return pid; } /* $end wait */ pid_t Waitpid(pid_t pid, int *iptr, int options) { pid_t retpid; if ((retpid = waitpid(pid, iptr, options)) < 0) unix_error("Waitpid error"); return(retpid); } /* $begin kill */ void Kill(pid_t pid, int signum) { int rc; if ((rc = kill(pid, signum)) < 0) unix_error("Kill error"); } /* $end kill */ void Pause() { (void)pause(); return; } unsigned int Sleep(unsigned int secs) { unsigned int rc; if ((rc = sleep(secs)) < 0) unix_error("Sleep error"); return rc; } unsigned int Alarm(unsigned int seconds) { return alarm(seconds); } void Setpgid(pid_t pid, pid_t pgid) { int rc; if ((rc = setpgid(pid, pgid)) < 0) unix_error("Setpgid error"); return; } pid_t Getpgrp(void) { return getpgrp(); } /************************************ * Wrappers for Unix signal functions ***********************************/ /* $begin sigaction */ handler_t *Signal(int signum, handler_t *handler) { struct sigaction action, old_action; action.sa_handler = handler; sigemptyset(&action.sa_mask); /* block sigs of type being handled */ action.sa_flags = SA_RESTART; /* restart syscalls if possible */ if (sigaction(signum, &action, &old_action) < 0) unix_error("Signal error"); return (old_action.sa_handler); } /* $end sigaction */ void Sigprocmask(int how, const sigset_t *set, sigset_t *oldset) { if (sigprocmask(how, set, oldset) < 0) unix_error("Sigprocmask error"); return; } void Sigemptyset(sigset_t *set) { if (sigemptyset(set) < 0) unix_error("Sigemptyset error"); return; } void Sigfillset(sigset_t *set) { if (sigfillset(set) < 0) unix_error("Sigfillset error"); return; } void Sigaddset(sigset_t *set, int signum) { if (sigaddset(set, signum) < 0) unix_error("Sigaddset error"); return; } void Sigdelset(sigset_t *set, int signum) { if (sigdelset(set, signum) < 0) unix_error("Sigdelset error"); return; } int Sigismember(const sigset_t *set, int signum) { int rc; if ((rc = sigismember(set, signum)) < 0) unix_error("Sigismember error"); return rc; } /******************************** * Wrappers for Unix I/O routines ********************************/ int Open(const char *pathname, int flags, mode_t mode) { int rc; if ((rc = open(pathname, flags, mode)) < 0) unix_error("Open error"); return rc; } ssize_t Read(int fd, void *buf, size_t count) { ssize_t rc; if ((rc = read(fd, buf, count)) < 0) unix_error("Read error"); return rc; } ssize_t Write(int fd, const void *buf, size_t count) { ssize_t rc; if ((rc = write(fd, buf, count)) < 0) unix_error("Write error"); return rc; } off_t Lseek(int fildes, off_t offset, int whence) { off_t rc; if ((rc = lseek(fildes, offset, whence)) < 0) unix_error("Lseek error"); return rc; } void Close(int fd) { int rc; if ((rc = close(fd)) < 0) unix_error("Close error"); } int Select(int n, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout) { int rc; if ((rc = select(n, readfds, writefds, exceptfds, timeout)) < 0) unix_error("Select error"); return rc; } int Dup2(int fd1, int fd2) { int rc; if ((rc = dup2(fd1, fd2)) < 0) unix_error("Dup2 error"); return rc; } void Stat(const char *filename, struct stat *buf) { if (stat(filename, buf) < 0) unix_error("Stat error"); } void Fstat(int fd, struct stat *buf) { if (fstat(fd, buf) < 0) unix_error("Fstat error"); } /*************************************** * Wrappers for memory mapping functions ***************************************/ void *Mmap(void *addr, size_t len, int prot, int flags, int fd, off_t offset) { void *ptr; if ((ptr = mmap(addr, len, prot, flags, fd, offset)) == ((void *) -1)) unix_error("mmap error"); return(ptr); } void Munmap(void *start, size_t length) { if (munmap(start, length) < 0) unix_error("munmap error"); } /*************************************************** * Wrappers for dynamic storage allocation functions ***************************************************/ void *Malloc(size_t size) { void *p; if ((p = malloc(size)) == NULL) unix_error("Malloc error"); return p; } void *Realloc(void *ptr, size_t size) { void *p; if ((p = realloc(ptr, size)) == NULL) unix_error("Realloc error"); return p; } void *Calloc(size_t nmemb, size_t size) { void *p; if ((p = calloc(nmemb, size)) == NULL) unix_error("Calloc error"); return p; } void Free(void *ptr) { free(ptr); } /****************************************** * Wrappers for the Standard I/O functions. ******************************************/ void Fclose(FILE *fp) { if (fclose(fp) != 0) unix_error("Fclose error"); } FILE *Fdopen(int fd, const char *type) { FILE *fp; if ((fp = fdopen(fd, type)) == NULL) unix_error("Fdopen error"); return fp; } char *Fgets(char *ptr, int n, FILE *stream) { char *rptr; if (((rptr = fgets(ptr, n, stream)) == NULL) && ferror(stream)) app_error("Fgets error"); return rptr; } FILE *Fopen(const char *filename, const char *mode) { FILE *fp; if ((fp = fopen(filename, mode)) == NULL) unix_error("Fopen error"); return fp; } void Fputs(const char *ptr, FILE *stream) { if (fputs(ptr, stream) == EOF) unix_error("Fputs error"); } size_t Fread(void *ptr, size_t size, size_t nmemb, FILE *stream) { size_t n; if (((n = fread(ptr, size, nmemb, stream)) < nmemb) && ferror(stream)) unix_error("Fread error"); return n; } void Fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream) { if (fwrite(ptr, size, nmemb, stream) < nmemb) unix_error("Fwrite error"); } /**************************** * Sockets interface wrappers ****************************/ int Socket(int domain, int type, int protocol) { int rc; if ((rc = socket(domain, type, protocol)) < 0) unix_error("Socket error"); return rc; } void Setsockopt(int s, int level, int optname, const void *optval, int optlen) { int rc; if ((rc = setsockopt(s, level, optname, optval, optlen)) < 0) unix_error("Setsockopt error"); } void Bind(int sockfd, struct sockaddr *my_addr, int addrlen) { int rc; if ((rc = bind(sockfd, my_addr, addrlen)) < 0) unix_error("Bind error"); } void Listen(int s, int backlog) { int rc; if ((rc = listen(s, backlog)) < 0) unix_error("Listen error"); } int Accept(int s, struct sockaddr *addr, socklen_t *addrlen) { int rc; if ((rc = accept(s, addr, addrlen)) < 0) unix_error("Accept error"); return rc; } void Connect(int sockfd, struct sockaddr *serv_addr, int addrlen) { int rc; if ((rc = connect(sockfd, serv_addr, addrlen)) < 0) unix_error("Connect error"); } /************************ * DNS interface wrappers ***********************/ /* $begin gethostbyname */ struct hostent *Gethostbyname(const char *name) { struct hostent *p; if ((p = gethostbyname(name)) == NULL) dns_error("Gethostbyname error"); return p; } /* $end gethostbyname */ struct hostent *Gethostbyaddr(const char *addr, int len, int type) { struct hostent *p; if ((p = gethostbyaddr(addr, len, type)) == NULL) dns_error("Gethostbyaddr error"); return p; } /************************************************ * Wrappers for Pthreads thread control functions ************************************************/ void Pthread_create(pthread_t *tidp, pthread_attr_t *attrp, void * (*routine)(void *), void *argp) { int rc; if ((rc = pthread_create(tidp, attrp, routine, argp)) != 0) posix_error(rc, "Pthread_create error"); } void Pthread_cancel(pthread_t tid) { int rc; if ((rc = pthread_cancel(tid)) != 0) posix_error(rc, "Pthread_cancel error"); } void Pthread_join(pthread_t tid, void **thread_return) { int rc; if ((rc = pthread_join(tid, thread_return)) != 0) posix_error(rc, "Pthread_join error"); } /* $begin detach */ void Pthread_detach(pthread_t tid) { int rc; if ((rc = pthread_detach(tid)) != 0) posix_error(rc, "Pthread_detach error"); } /* $end detach */ void Pthread_exit(void *retval) { pthread_exit(retval); } pthread_t Pthread_self(void) { return pthread_self(); } void Pthread_once(pthread_once_t *once_control, void (*init_function)()) { pthread_once(once_control, init_function); } /******************************* * Wrappers for Posix semaphores *******************************/ void Sem_init(sem_t *sem, int pshared, unsigned int value) { if (sem_init(sem, pshared, value) < 0) unix_error("Sem_init error"); } void P(sem_t *sem) { if (sem_wait(sem) < 0) unix_error("P error"); } void V(sem_t *sem) { if (sem_post(sem) < 0) unix_error("V error"); } /********************************************************************* * The Rio package - robust I/O functions **********************************************************************/ /* * rio_readn - robustly read n bytes (unbuffered) */ /* $begin rio_readn */ ssize_t rio_readn(int fd, void *usrbuf, size_t n) { size_t nleft = n; ssize_t nread; char *bufp = usrbuf; while (nleft > 0) { if ((nread = read(fd, bufp, nleft)) < 0) { if (errno == EINTR) /* interrupted by sig handler return */ nread = 0; /* and call read() again */ else return -1; /* errno set by read() */ } else if (nread == 0) break; /* EOF */ nleft -= nread; bufp += nread; } return (n - nleft); /* return >= 0 */ } /* $end rio_readn */ /* * rio_writen - robustly write n bytes (unbuffered) */ /* $begin rio_writen */ ssize_t rio_writen(int fd, void *usrbuf, size_t n) { size_t nleft = n; ssize_t nwritten; char *bufp = usrbuf; while (nleft > 0) { if ((nwritten = write(fd, bufp, nleft)) <= 0) { if (errno == EINTR) /* interrupted by sig handler return */ nwritten = 0; /* and call write() again */ else return -1; /* errno set by write() */ } nleft -= nwritten; bufp += nwritten; } return n; } /* $end rio_writen */ /* * rio_read - This is a wrapper for the Unix read() function that * transfers min(n, rio_cnt) bytes from an internal buffer to a user * buffer, where n is the number of bytes requested by the user and * rio_cnt is the number of unread bytes in the internal buffer. On * entry, rio_read() refills the internal buffer via a call to * read() if the internal buffer is empty. */ /* $begin rio_read */ static ssize_t rio_read(rio_t *rp, char *usrbuf, size_t n) { int cnt; while (rp->rio_cnt <= 0) { /* refill if buf is empty */ rp->rio_cnt = read(rp->rio_fd, rp->rio_buf, sizeof(rp->rio_buf)); if (rp->rio_cnt < 0) { if (errno != EINTR) /* interrupted by sig handler return */ return -1; } else if (rp->rio_cnt == 0) /* EOF */ return 0; else rp->rio_bufptr = rp->rio_buf; /* reset buffer ptr */ } /* Copy min(n, rp->rio_cnt) bytes from internal buf to user buf */ cnt = n; if (rp->rio_cnt < n) cnt = rp->rio_cnt; memcpy(usrbuf, rp->rio_bufptr, cnt); rp->rio_bufptr += cnt; rp->rio_cnt -= cnt; return cnt; } /* $end rio_read */ /* * rio_readinitb - Associate a descriptor with a read buffer and reset buffer */ /* $begin rio_readinitb */ void rio_readinitb(rio_t *rp, int fd) { rp->rio_fd = fd; rp->rio_cnt = 0; rp->rio_bufptr = rp->rio_buf; } /* $end rio_readinitb */ /* * rio_readnb - Robustly read n bytes (buffered) */ /* $begin rio_readnb */ ssize_t rio_readnb(rio_t *rp, void *usrbuf, size_t n) { size_t nleft = n; ssize_t nread; char *bufp = usrbuf; while (nleft > 0) { if ((nread = rio_read(rp, bufp, nleft)) < 0) { if (errno == EINTR) /* interrupted by sig handler return */ nread = 0; /* call read() again */ else return -1; /* errno set by read() */ } else if (nread == 0) break; /* EOF */ nleft -= nread; bufp += nread; } return (n - nleft); /* return >= 0 */ } /* $end rio_readnb */ /* * rio_readlineb - robustly read a text line (buffered) */ /* $begin rio_readlineb */ ssize_t rio_readlineb(rio_t *rp, void *usrbuf, size_t maxlen) { int n, rc; char c, *bufp = usrbuf; for (n = 1; n < maxlen; n++) { if ((rc = rio_read(rp, &c, 1)) == 1) { *bufp++ = c; if (c == '\n') break; } else if (rc == 0) { if (n == 1) return 0; /* EOF, no data read */ else break; /* EOF, some data was read */ } else return -1; /* error */ } *bufp = 0; return n; } /* $end rio_readlineb */ /********************************** * Wrappers for robust I/O routines **********************************/ ssize_t Rio_readn(int fd, void *ptr, size_t nbytes) { ssize_t n; if ((n = rio_readn(fd, ptr, nbytes)) < 0) unix_error("Rio_readn error"); return n; } void Rio_writen(int fd, void *usrbuf, size_t n) { if (rio_writen(fd, usrbuf, n) != n) unix_error("Rio_writen error"); } void Rio_readinitb(rio_t *rp, int fd) { rio_readinitb(rp, fd); } ssize_t Rio_readnb(rio_t *rp, void *usrbuf, size_t n) { ssize_t rc; if ((rc = rio_readnb(rp, usrbuf, n)) < 0) unix_error("Rio_readnb error"); return rc; } ssize_t Rio_readlineb(rio_t *rp, void *usrbuf, size_t maxlen) { ssize_t rc; if ((rc = rio_readlineb(rp, usrbuf, maxlen)) < 0) unix_error("Rio_readlineb error"); return rc; } /******************************** * Client/server helper functions ********************************/ /* * open_clientfd - open connection to server at * and return a socket descriptor ready for reading and writing. * Returns -1 and sets errno on Unix error. * Returns -2 and sets h_errno on DNS (gethostbyname) error. */ /* $begin open_clientfd */ int open_clientfd(char *hostname, int port) { int clientfd; struct hostent *hp; struct sockaddr_in serveraddr; if ((clientfd = socket(AF_INET, SOCK_STREAM, 0)) < 0) return -1; /* check errno for cause of error */ /* Fill in the server's IP address and port */ if ((hp = gethostbyname(hostname)) == NULL) return -2; /* check h_errno for cause of error */ bzero((char *) &serveraddr, sizeof(serveraddr)); serveraddr.sin_family = AF_INET; bcopy((char *)hp->h_addr_list[0], (char *)&serveraddr.sin_addr.s_addr, hp->h_length); serveraddr.sin_port = htons(port); /* Establish a connection with the server */ if (connect(clientfd, (SA *) &serveraddr, sizeof(serveraddr)) < 0) return -1; return clientfd; } /* $end open_clientfd */ /* * open_listenfd - open and return a listening socket on port * Returns -1 and sets errno on Unix error. */ /* $begin open_listenfd */ int open_listenfd(int port) { int listenfd, optval=1; struct sockaddr_in serveraddr; /* Create a socket descriptor */ if ((listenfd = socket(AF_INET, SOCK_STREAM, 0)) < 0) return -1; /* Eliminates "Address already in use" error from bind. */ if (setsockopt(listenfd, SOL_SOCKET, SO_REUSEADDR, (const void *)&optval , sizeof(int)) < 0) return -1; /* Listenfd will be an endpoint for all requests to port on any IP address for this host */ bzero((char *) &serveraddr, sizeof(serveraddr)); serveraddr.sin_family = AF_INET; serveraddr.sin_addr.s_addr = htonl(INADDR_ANY); serveraddr.sin_port = htons((unsigned short)port); if (bind(listenfd, (SA *)&serveraddr, sizeof(serveraddr)) < 0) return -1; /* Make it a listening socket ready to accept connection requests */ if (listen(listenfd, LISTENQ) < 0) return -1; return listenfd; } /* $end open_listenfd */ /****************************************** * Wrappers for the client/server helper routines ******************************************/ int Open_clientfd(char *hostname, int port) { int rc; if ((rc = open_clientfd(hostname, port)) < 0) { if (rc == -1) unix_error("Open_clientfd Unix error"); else dns_error("Open_clientfd DNS error"); } return rc; } int Open_listenfd(int port) { int rc; if ((rc = open_listenfd(port)) < 0) unix_error("Open_listenfd error"); return rc; } /* $end csapp.c */ ================================================ FILE: code/vm/malloc/memlib.c ================================================ /* * memlib.c - a module that simulates the memory system. Needed * because it allows us to interleave calls from the student's malloc * package with the system's malloc package in libc. * */ #include #include #include #include #include #include #include #include "csapp.h" #include "memlib.h" #include "config.h" /* $begin memlib */ /* Private global variables */ static char *mem_heap; /* Points to first byte of heap */ static char *mem_brk; /* Points to last byte of heap plus 1 */ static char *mem_max_addr; /* Max legal heap addr plus 1*/ /* * mem_init - Initialize the memory system model */ void mem_init(void) { mem_heap = (char *)Malloc(MAX_HEAP); mem_brk = (char *)mem_heap; mem_max_addr = (char *)(mem_heap + MAX_HEAP); } /* * mem_sbrk - Simple model of the sbrk function. Extends the heap * by incr bytes and returns the start address of the new area. In * this model, the heap cannot be shrunk. */ void *mem_sbrk(int incr) { char *old_brk = mem_brk; if ( (incr < 0) || ((mem_brk + incr) > mem_max_addr)) { errno = ENOMEM; fprintf(stderr, "ERROR: mem_sbrk failed. Ran out of memory...\n"); return (void *)-1; } mem_brk += incr; return (void *)old_brk; } /* $end memlib */ /* * mem_deinit - free the storage used by the memory system model */ void mem_deinit(void) { } /* * mem_reset_brk - reset the simulated brk pointer to make an empty heap */ void mem_reset_brk() { mem_brk = (char *)mem_heap; } /* * mem_heap_lo - return address of the first heap byte */ void *mem_heap_lo() { return (void *)mem_heap; } /* * mem_heap_hi - return address of last heap byte */ void *mem_heap_hi() { return (void *)(mem_brk - 1); } /* * mem_heapsize() - returns the heap size in bytes */ size_t mem_heapsize() { return (size_t)((void *)mem_brk - (void *)mem_heap); } /* * mem_pagesize() - returns the page size of the system */ size_t mem_pagesize() { return (size_t)getpagesize(); } ================================================ FILE: code/vm/malloc/mm.c ================================================ /* * Simple, 32-bit and 64-bit clean allocator based on implicit free * lists, first fit placement, and boundary tag coalescing, as described * in the CS:APP2e text. Blocks must be aligned to doubleword (8 byte) * boundaries. Minimum block size is 16 bytes. */ #include #include #include #include "mm.h" #include "memlib.h" /* * If NEXT_FIT defined use next fit search, else use first fit search */ #define NEXT_FITx /* $begin mallocmacros */ /* Basic constants and macros */ #define WSIZE 4 /* Word and header/footer size (bytes) */ //line:vm:mm:beginconst #define DSIZE 8 /* Doubleword size (bytes) */ #define CHUNKSIZE (1<<12) /* Extend heap by this amount (bytes) */ //line:vm:mm:endconst #define MAX(x, y) ((x) > (y)? (x) : (y)) /* Pack a size and allocated bit into a word */ #define PACK(size, alloc) ((size) | (alloc)) //line:vm:mm:pack /* Read and write a word at address p */ #define GET(p) (*(unsigned int *)(p)) //line:vm:mm:get #define PUT(p, val) (*(unsigned int *)(p) = (val)) //line:vm:mm:put /* Read the size and allocated fields from address p */ #define GET_SIZE(p) (GET(p) & ~0x7) //line:vm:mm:getsize #define GET_ALLOC(p) (GET(p) & 0x1) //line:vm:mm:getalloc /* Given block ptr bp, compute address of its header and footer */ #define HDRP(bp) ((char *)(bp) - WSIZE) //line:vm:mm:hdrp #define FTRP(bp) ((char *)(bp) + GET_SIZE(HDRP(bp)) - DSIZE) //line:vm:mm:ftrp /* Given block ptr bp, compute address of next and previous blocks */ #define NEXT_BLKP(bp) ((char *)(bp) + GET_SIZE(((char *)(bp) - WSIZE))) //line:vm:mm:nextblkp #define PREV_BLKP(bp) ((char *)(bp) - GET_SIZE(((char *)(bp) - DSIZE))) //line:vm:mm:prevblkp /* $end mallocmacros */ /* Global variables */ static char *heap_listp = 0; /* Pointer to first block */ #ifdef NEXT_FIT static char *rover; /* Next fit rover */ #endif /* Function prototypes for internal helper routines */ static void *extend_heap(size_t words); static void place(void *bp, size_t asize); static void *find_fit(size_t asize); static void *coalesce(void *bp); static void printblock(void *bp); static void checkheap(int verbose); static void checkblock(void *bp); /* * mm_init - Initialize the memory manager */ /* $begin mminit */ int mm_init(void) { /* Create the initial empty heap */ if ((heap_listp = mem_sbrk(4*WSIZE)) == (void *)-1) //line:vm:mm:begininit return -1; PUT(heap_listp, 0); /* Alignment padding */ PUT(heap_listp + (1*WSIZE), PACK(DSIZE, 1)); /* Prologue header */ PUT(heap_listp + (2*WSIZE), PACK(DSIZE, 1)); /* Prologue footer */ PUT(heap_listp + (3*WSIZE), PACK(0, 1)); /* Epilogue header */ heap_listp += (2*WSIZE); //line:vm:mm:endinit /* $end mminit */ #ifdef NEXT_FIT rover = heap_listp; #endif /* $begin mminit */ /* Extend the empty heap with a free block of CHUNKSIZE bytes */ if (extend_heap(CHUNKSIZE/WSIZE) == NULL) return -1; return 0; } /* $end mminit */ /* * mm_malloc - Allocate a block with at least size bytes of payload */ /* $begin mmmalloc */ void *mm_malloc(size_t size) { size_t asize; /* Adjusted block size */ size_t extendsize; /* Amount to extend heap if no fit */ char *bp; /* $end mmmalloc */ if (heap_listp == 0){ mm_init(); } /* $begin mmmalloc */ /* Ignore spurious requests */ if (size == 0) return NULL; /* Adjust block size to include overhead and alignment reqs. */ if (size <= DSIZE) //line:vm:mm:sizeadjust1 asize = 2*DSIZE; //line:vm:mm:sizeadjust2 else asize = DSIZE * ((size + (DSIZE) + (DSIZE-1)) / DSIZE); //line:vm:mm:sizeadjust3 /* Search the free list for a fit */ if ((bp = find_fit(asize)) != NULL) { //line:vm:mm:findfitcall place(bp, asize); //line:vm:mm:findfitplace return bp; } /* No fit found. Get more memory and place the block */ extendsize = MAX(asize,CHUNKSIZE); //line:vm:mm:growheap1 if ((bp = extend_heap(extendsize/WSIZE)) == NULL) return NULL; //line:vm:mm:growheap2 place(bp, asize); //line:vm:mm:growheap3 return bp; } /* $end mmmalloc */ /* * mm_free - Free a block */ /* $begin mmfree */ void mm_free(void *bp) { /* $end mmfree */ if(bp == 0) return; /* $begin mmfree */ size_t size = GET_SIZE(HDRP(bp)); /* $end mmfree */ if (heap_listp == 0){ mm_init(); } /* $begin mmfree */ PUT(HDRP(bp), PACK(size, 0)); PUT(FTRP(bp), PACK(size, 0)); coalesce(bp); } /* $end mmfree */ /* * coalesce - Boundary tag coalescing. Return ptr to coalesced block */ /* $begin mmfree */ static void *coalesce(void *bp) { size_t prev_alloc = GET_ALLOC(FTRP(PREV_BLKP(bp))); size_t next_alloc = GET_ALLOC(HDRP(NEXT_BLKP(bp))); size_t size = GET_SIZE(HDRP(bp)); if (prev_alloc && next_alloc) { /* Case 1 */ return bp; } else if (prev_alloc && !next_alloc) { /* Case 2 */ size += GET_SIZE(HDRP(NEXT_BLKP(bp))); PUT(HDRP(bp), PACK(size, 0)); PUT(FTRP(bp), PACK(size,0)); } else if (!prev_alloc && next_alloc) { /* Case 3 */ size += GET_SIZE(HDRP(PREV_BLKP(bp))); PUT(FTRP(bp), PACK(size, 0)); PUT(HDRP(PREV_BLKP(bp)), PACK(size, 0)); bp = PREV_BLKP(bp); } else { /* Case 4 */ size += GET_SIZE(HDRP(PREV_BLKP(bp))) + GET_SIZE(FTRP(NEXT_BLKP(bp))); PUT(HDRP(PREV_BLKP(bp)), PACK(size, 0)); PUT(FTRP(NEXT_BLKP(bp)), PACK(size, 0)); bp = PREV_BLKP(bp); } /* $end mmfree */ #ifdef NEXT_FIT /* Make sure the rover isn't pointing into the free block */ /* that we just coalesced */ if ((rover > (char *)bp) && (rover < NEXT_BLKP(bp))) rover = bp; #endif /* $begin mmfree */ return bp; } /* $end mmfree */ /* * mm_realloc - Naive implementation of realloc */ void *mm_realloc(void *ptr, size_t size) { size_t oldsize; void *newptr; /* If size == 0 then this is just free, and we return NULL. */ if(size == 0) { mm_free(ptr); return 0; } /* If oldptr is NULL, then this is just malloc. */ if(ptr == NULL) { return mm_malloc(size); } newptr = mm_malloc(size); /* If realloc() fails the original block is left untouched */ if(!newptr) { return 0; } /* Copy the old data. */ oldsize = GET_SIZE(HDRP(ptr)); if(size < oldsize) oldsize = size; memcpy(newptr, ptr, oldsize); /* Free the old block. */ mm_free(ptr); return newptr; } /* * checkheap - We don't check anything right now. */ void mm_checkheap(int verbose) { } /* * The remaining routines are internal helper routines */ /* * extend_heap - Extend heap with free block and return its block pointer */ /* $begin mmextendheap */ static void *extend_heap(size_t words) { char *bp; size_t size; /* Allocate an even number of words to maintain alignment */ size = (words % 2) ? (words+1) * WSIZE : words * WSIZE; //line:vm:mm:beginextend if ((long)(bp = mem_sbrk(size)) == -1) return NULL; //line:vm:mm:endextend /* Initialize free block header/footer and the epilogue header */ PUT(HDRP(bp), PACK(size, 0)); /* Free block header */ //line:vm:mm:freeblockhdr PUT(FTRP(bp), PACK(size, 0)); /* Free block footer */ //line:vm:mm:freeblockftr PUT(HDRP(NEXT_BLKP(bp)), PACK(0, 1)); /* New epilogue header */ //line:vm:mm:newepihdr /* Coalesce if the previous block was free */ return coalesce(bp); //line:vm:mm:returnblock } /* $end mmextendheap */ /* * place - Place block of asize bytes at start of free block bp * and split if remainder would be at least minimum block size */ /* $begin mmplace */ /* $begin mmplace-proto */ static void place(void *bp, size_t asize) /* $end mmplace-proto */ { size_t csize = GET_SIZE(HDRP(bp)); if ((csize - asize) >= (2*DSIZE)) { PUT(HDRP(bp), PACK(asize, 1)); PUT(FTRP(bp), PACK(asize, 1)); bp = NEXT_BLKP(bp); PUT(HDRP(bp), PACK(csize-asize, 0)); PUT(FTRP(bp), PACK(csize-asize, 0)); } else { PUT(HDRP(bp), PACK(csize, 1)); PUT(FTRP(bp), PACK(csize, 1)); } } /* $end mmplace */ /* * find_fit - Find a fit for a block with asize bytes */ /* $begin mmfirstfit */ /* $begin mmfirstfit-proto */ static void *find_fit(size_t asize) /* $end mmfirstfit-proto */ { /* $end mmfirstfit */ #ifdef NEXT_FIT /* Next fit search */ char *oldrover = rover; /* Search from the rover to the end of list */ for ( ; GET_SIZE(HDRP(rover)) > 0; rover = NEXT_BLKP(rover)) if (!GET_ALLOC(HDRP(rover)) && (asize <= GET_SIZE(HDRP(rover)))) return rover; /* search from start of list to old rover */ for (rover = heap_listp; rover < oldrover; rover = NEXT_BLKP(rover)) if (!GET_ALLOC(HDRP(rover)) && (asize <= GET_SIZE(HDRP(rover)))) return rover; return NULL; /* no fit found */ #else /* $begin mmfirstfit */ /* First fit search */ void *bp; for (bp = heap_listp; GET_SIZE(HDRP(bp)) > 0; bp = NEXT_BLKP(bp)) { if (!GET_ALLOC(HDRP(bp)) && (asize <= GET_SIZE(HDRP(bp)))) { return bp; } } return NULL; /* No fit */ /* $end mmfirstfit */ #endif } static void printblock(void *bp) { size_t hsize, halloc, fsize, falloc; checkheap(0); hsize = GET_SIZE(HDRP(bp)); halloc = GET_ALLOC(HDRP(bp)); fsize = GET_SIZE(FTRP(bp)); falloc = GET_ALLOC(FTRP(bp)); if (hsize == 0) { printf("%p: EOL\n", bp); return; } /* printf("%p: header: [%p:%c] footer: [%p:%c]\n", bp, hsize, (halloc ? 'a' : 'f'), fsize, (falloc ? 'a' : 'f')); */ } static void checkblock(void *bp) { if ((size_t)bp % 8) printf("Error: %p is not doubleword aligned\n", bp); if (GET(HDRP(bp)) != GET(FTRP(bp))) printf("Error: header does not match footer\n"); } /* * checkheap - Minimal check of the heap for consistency */ void checkheap(int verbose) { char *bp = heap_listp; if (verbose) printf("Heap (%p):\n", heap_listp); if ((GET_SIZE(HDRP(heap_listp)) != DSIZE) || !GET_ALLOC(HDRP(heap_listp))) printf("Bad prologue header\n"); checkblock(heap_listp); for (bp = heap_listp; GET_SIZE(HDRP(bp)) > 0; bp = NEXT_BLKP(bp)) { if (verbose) printblock(bp); checkblock(bp); } if (verbose) printblock(bp); if ((GET_SIZE(HDRP(bp)) != 0) || !(GET_ALLOC(HDRP(bp)))) printf("Bad epilogue header\n"); } ================================================ FILE: code/vm/memlib.h ================================================ void *mem_init(int size); void *mem_sbrk(int incr); ================================================ FILE: code/vm/mm.h ================================================ /* $begin mallocinterface */ int mm_init(void); void *mm_malloc(size_t size); void mm_free(void *bp); /* $end mallocinterface */ void mm_checkheap(int verbose); void *mm_realloc(void *ptr, size_t size); /* Unused. Just to keep us compatible with the 15-213 malloc driver */ typedef struct { char *team; char *name1, *email1; char *name2, *email2; } team_t; extern team_t team; ================================================ FILE: common/csapp.c ================================================ /* $begin csapp.c */ #include "csapp.h" /************************** * Error-handling functions **************************/ /* $begin errorfuns */ /* $begin unixerror */ void unix_error(char *msg) /* unix-style error */ { fprintf(stderr, "%s: %s\n", msg, strerror(errno)); exit(0); } /* $end unixerror */ void posix_error(int code, char *msg) /* posix-style error */ { fprintf(stderr, "%s: %s\n", msg, strerror(code)); exit(0); } void dns_error(char *msg) /* dns-style error */ { fprintf(stderr, "%s: DNS error %d\n", msg, h_errno); exit(0); } void app_error(char *msg) /* application error */ { fprintf(stderr, "%s\n", msg); exit(0); } /* $end errorfuns */ /********************************************* * Wrappers for Unix process control functions ********************************************/ /* $begin forkwrapper */ pid_t Fork(void) { pid_t pid; if ((pid = fork()) < 0) unix_error("Fork error"); return pid; } /* $end forkwrapper */ void Execve(const char *filename, char *const argv[], char *const envp[]) { if (execve(filename, argv, envp) < 0) unix_error("Execve error"); } /* $begin wait */ pid_t Wait(int *status) { pid_t pid; if ((pid = wait(status)) < 0) unix_error("Wait error"); return pid; } /* $end wait */ pid_t Waitpid(pid_t pid, int *iptr, int options) { pid_t retpid; if ((retpid = waitpid(pid, iptr, options)) < 0) unix_error("Waitpid error"); return(retpid); } /* $begin kill */ void Kill(pid_t pid, int signum) { int rc; if ((rc = kill(pid, signum)) < 0) unix_error("Kill error"); } /* $end kill */ void Pause() { (void)pause(); return; } unsigned int Sleep(unsigned int secs) { unsigned int rc; if ((rc = sleep(secs)) < 0) unix_error("Sleep error"); return rc; } unsigned int Alarm(unsigned int seconds) { return alarm(seconds); } void Setpgid(pid_t pid, pid_t pgid) { int rc; if ((rc = setpgid(pid, pgid)) < 0) unix_error("Setpgid error"); return; } pid_t Getpgrp(void) { return getpgrp(); } /************************************ * Wrappers for Unix signal functions ***********************************/ /* $begin sigaction */ handler_t *Signal(int signum, handler_t *handler) { struct sigaction action, old_action; action.sa_handler = handler; sigemptyset(&action.sa_mask); /* block sigs of type being handled */ action.sa_flags = SA_RESTART; /* restart syscalls if possible */ if (sigaction(signum, &action, &old_action) < 0) unix_error("Signal error"); return (old_action.sa_handler); } /* $end sigaction */ void Sigprocmask(int how, const sigset_t *set, sigset_t *oldset) { if (sigprocmask(how, set, oldset) < 0) unix_error("Sigprocmask error"); return; } void Sigemptyset(sigset_t *set) { if (sigemptyset(set) < 0) unix_error("Sigemptyset error"); return; } void Sigfillset(sigset_t *set) { if (sigfillset(set) < 0) unix_error("Sigfillset error"); return; } void Sigaddset(sigset_t *set, int signum) { if (sigaddset(set, signum) < 0) unix_error("Sigaddset error"); return; } void Sigdelset(sigset_t *set, int signum) { if (sigdelset(set, signum) < 0) unix_error("Sigdelset error"); return; } int Sigismember(const sigset_t *set, int signum) { int rc; if ((rc = sigismember(set, signum)) < 0) unix_error("Sigismember error"); return rc; } /******************************** * Wrappers for Unix I/O routines ********************************/ int Open(const char *pathname, int flags, mode_t mode) { int rc; if ((rc = open(pathname, flags, mode)) < 0) unix_error("Open error"); return rc; } ssize_t Read(int fd, void *buf, size_t count) { ssize_t rc; if ((rc = read(fd, buf, count)) < 0) unix_error("Read error"); return rc; } ssize_t Write(int fd, const void *buf, size_t count) { ssize_t rc; if ((rc = write(fd, buf, count)) < 0) unix_error("Write error"); return rc; } off_t Lseek(int fildes, off_t offset, int whence) { off_t rc; if ((rc = lseek(fildes, offset, whence)) < 0) unix_error("Lseek error"); return rc; } void Close(int fd) { int rc; if ((rc = close(fd)) < 0) unix_error("Close error"); } int Select(int n, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout) { int rc; if ((rc = select(n, readfds, writefds, exceptfds, timeout)) < 0) unix_error("Select error"); return rc; } int Dup2(int fd1, int fd2) { int rc; if ((rc = dup2(fd1, fd2)) < 0) unix_error("Dup2 error"); return rc; } void Stat(const char *filename, struct stat *buf) { if (stat(filename, buf) < 0) unix_error("Stat error"); } void Fstat(int fd, struct stat *buf) { if (fstat(fd, buf) < 0) unix_error("Fstat error"); } /*************************************** * Wrappers for memory mapping functions ***************************************/ void *Mmap(void *addr, size_t len, int prot, int flags, int fd, off_t offset) { void *ptr; if ((ptr = mmap(addr, len, prot, flags, fd, offset)) == ((void *) -1)) unix_error("mmap error"); return(ptr); } void Munmap(void *start, size_t length) { if (munmap(start, length) < 0) unix_error("munmap error"); } /*************************************************** * Wrappers for dynamic storage allocation functions ***************************************************/ void *Malloc(size_t size) { void *p; if ((p = malloc(size)) == NULL) unix_error("Malloc error"); return p; } void *Realloc(void *ptr, size_t size) { void *p; if ((p = realloc(ptr, size)) == NULL) unix_error("Realloc error"); return p; } void *Calloc(size_t nmemb, size_t size) { void *p; if ((p = calloc(nmemb, size)) == NULL) unix_error("Calloc error"); return p; } void Free(void *ptr) { free(ptr); } /****************************************** * Wrappers for the Standard I/O functions. ******************************************/ void Fclose(FILE *fp) { if (fclose(fp) != 0) unix_error("Fclose error"); } FILE *Fdopen(int fd, const char *type) { FILE *fp; if ((fp = fdopen(fd, type)) == NULL) unix_error("Fdopen error"); return fp; } char *Fgets(char *ptr, int n, FILE *stream) { char *rptr; if (((rptr = fgets(ptr, n, stream)) == NULL) && ferror(stream)) app_error("Fgets error"); return rptr; } FILE *Fopen(const char *filename, const char *mode) { FILE *fp; if ((fp = fopen(filename, mode)) == NULL) unix_error("Fopen error"); return fp; } void Fputs(const char *ptr, FILE *stream) { if (fputs(ptr, stream) == EOF) unix_error("Fputs error"); } size_t Fread(void *ptr, size_t size, size_t nmemb, FILE *stream) { size_t n; if (((n = fread(ptr, size, nmemb, stream)) < nmemb) && ferror(stream)) unix_error("Fread error"); return n; } void Fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream) { if (fwrite(ptr, size, nmemb, stream) < nmemb) unix_error("Fwrite error"); } /**************************** * Sockets interface wrappers ****************************/ int Socket(int domain, int type, int protocol) { int rc; if ((rc = socket(domain, type, protocol)) < 0) unix_error("Socket error"); return rc; } void Setsockopt(int s, int level, int optname, const void *optval, int optlen) { int rc; if ((rc = setsockopt(s, level, optname, optval, optlen)) < 0) unix_error("Setsockopt error"); } void Bind(int sockfd, struct sockaddr *my_addr, int addrlen) { int rc; if ((rc = bind(sockfd, my_addr, addrlen)) < 0) unix_error("Bind error"); } void Listen(int s, int backlog) { int rc; if ((rc = listen(s, backlog)) < 0) unix_error("Listen error"); } int Accept(int s, struct sockaddr *addr, socklen_t *addrlen) { int rc; if ((rc = accept(s, addr, addrlen)) < 0) unix_error("Accept error"); return rc; } void Connect(int sockfd, struct sockaddr *serv_addr, int addrlen) { int rc; if ((rc = connect(sockfd, serv_addr, addrlen)) < 0) unix_error("Connect error"); } /************************ * DNS interface wrappers ***********************/ /* $begin gethostbyname */ struct hostent *Gethostbyname(const char *name) { struct hostent *p; if ((p = gethostbyname(name)) == NULL) dns_error("Gethostbyname error"); return p; } /* $end gethostbyname */ struct hostent *Gethostbyaddr(const char *addr, int len, int type) { struct hostent *p; if ((p = gethostbyaddr(addr, len, type)) == NULL) dns_error("Gethostbyaddr error"); return p; } /************************************************ * Wrappers for Pthreads thread control functions ************************************************/ void Pthread_create(pthread_t *tidp, pthread_attr_t *attrp, void * (*routine)(void *), void *argp) { int rc; if ((rc = pthread_create(tidp, attrp, routine, argp)) != 0) posix_error(rc, "Pthread_create error"); } void Pthread_cancel(pthread_t tid) { int rc; if ((rc = pthread_cancel(tid)) != 0) posix_error(rc, "Pthread_cancel error"); } void Pthread_join(pthread_t tid, void **thread_return) { int rc; if ((rc = pthread_join(tid, thread_return)) != 0) posix_error(rc, "Pthread_join error"); } /* $begin detach */ void Pthread_detach(pthread_t tid) { int rc; if ((rc = pthread_detach(tid)) != 0) posix_error(rc, "Pthread_detach error"); } /* $end detach */ void Pthread_exit(void *retval) { pthread_exit(retval); } pthread_t Pthread_self(void) { return pthread_self(); } void Pthread_once(pthread_once_t *once_control, void (*init_function)()) { pthread_once(once_control, init_function); } /******************************* * Wrappers for Posix semaphores *******************************/ void Sem_init(sem_t *sem, int pshared, unsigned int value) { if (sem_init(sem, pshared, value) < 0) unix_error("Sem_init error"); } void P(sem_t *sem) { if (sem_wait(sem) < 0) unix_error("P error"); } void V(sem_t *sem) { if (sem_post(sem) < 0) unix_error("V error"); } /********************************************************************* * The Rio package - robust I/O functions **********************************************************************/ /* * rio_readn - robustly read n bytes (unbuffered) */ /* $begin rio_readn */ ssize_t rio_readn(int fd, void *usrbuf, size_t n) { size_t nleft = n; ssize_t nread; char *bufp = usrbuf; while (nleft > 0) { if ((nread = read(fd, bufp, nleft)) < 0) { if (errno == EINTR) /* interrupted by sig handler return */ nread = 0; /* and call read() again */ else return -1; /* errno set by read() */ } else if (nread == 0) break; /* EOF */ nleft -= nread; bufp += nread; } return (n - nleft); /* return >= 0 */ } /* $end rio_readn */ /* * rio_writen - robustly write n bytes (unbuffered) */ /* $begin rio_writen */ ssize_t rio_writen(int fd, void *usrbuf, size_t n) { size_t nleft = n; ssize_t nwritten; char *bufp = usrbuf; while (nleft > 0) { if ((nwritten = write(fd, bufp, nleft)) <= 0) { if (errno == EINTR) /* interrupted by sig handler return */ nwritten = 0; /* and call write() again */ else return -1; /* errno set by write() */ } nleft -= nwritten; bufp += nwritten; } return n; } /* $end rio_writen */ /* * rio_read - This is a wrapper for the Unix read() function that * transfers min(n, rio_cnt) bytes from an internal buffer to a user * buffer, where n is the number of bytes requested by the user and * rio_cnt is the number of unread bytes in the internal buffer. On * entry, rio_read() refills the internal buffer via a call to * read() if the internal buffer is empty. */ /* $begin rio_read */ static ssize_t rio_read(rio_t *rp, char *usrbuf, size_t n) { int cnt; while (rp->rio_cnt <= 0) { /* refill if buf is empty */ rp->rio_cnt = read(rp->rio_fd, rp->rio_buf, sizeof(rp->rio_buf)); if (rp->rio_cnt < 0) { if (errno != EINTR) /* interrupted by sig handler return */ return -1; } else if (rp->rio_cnt == 0) /* EOF */ return 0; else rp->rio_bufptr = rp->rio_buf; /* reset buffer ptr */ } /* Copy min(n, rp->rio_cnt) bytes from internal buf to user buf */ cnt = n; if (rp->rio_cnt < n) cnt = rp->rio_cnt; memcpy(usrbuf, rp->rio_bufptr, cnt); rp->rio_bufptr += cnt; rp->rio_cnt -= cnt; return cnt; } /* $end rio_read */ /* * rio_readinitb - Associate a descriptor with a read buffer and reset buffer */ /* $begin rio_readinitb */ void rio_readinitb(rio_t *rp, int fd) { rp->rio_fd = fd; rp->rio_cnt = 0; rp->rio_bufptr = rp->rio_buf; } /* $end rio_readinitb */ /* * rio_readnb - Robustly read n bytes (buffered) */ /* $begin rio_readnb */ ssize_t rio_readnb(rio_t *rp, void *usrbuf, size_t n) { size_t nleft = n; ssize_t nread; char *bufp = usrbuf; while (nleft > 0) { if ((nread = rio_read(rp, bufp, nleft)) < 0) { if (errno == EINTR) /* interrupted by sig handler return */ nread = 0; /* call read() again */ else return -1; /* errno set by read() */ } else if (nread == 0) break; /* EOF */ nleft -= nread; bufp += nread; } return (n - nleft); /* return >= 0 */ } /* $end rio_readnb */ /* * rio_readlineb - robustly read a text line (buffered) */ /* $begin rio_readlineb */ ssize_t rio_readlineb(rio_t *rp, void *usrbuf, size_t maxlen) { int n, rc; char c, *bufp = usrbuf; for (n = 1; n < maxlen; n++) { if ((rc = rio_read(rp, &c, 1)) == 1) { *bufp++ = c; if (c == '\n') break; } else if (rc == 0) { if (n == 1) return 0; /* EOF, no data read */ else break; /* EOF, some data was read */ } else return -1; /* error */ } *bufp = 0; return n; } /* $end rio_readlineb */ /********************************** * Wrappers for robust I/O routines **********************************/ ssize_t Rio_readn(int fd, void *ptr, size_t nbytes) { ssize_t n; if ((n = rio_readn(fd, ptr, nbytes)) < 0) unix_error("Rio_readn error"); return n; } void Rio_writen(int fd, void *usrbuf, size_t n) { if (rio_writen(fd, usrbuf, n) != n) unix_error("Rio_writen error"); } void Rio_readinitb(rio_t *rp, int fd) { rio_readinitb(rp, fd); } ssize_t Rio_readnb(rio_t *rp, void *usrbuf, size_t n) { ssize_t rc; if ((rc = rio_readnb(rp, usrbuf, n)) < 0) unix_error("Rio_readnb error"); return rc; } ssize_t Rio_readlineb(rio_t *rp, void *usrbuf, size_t maxlen) { ssize_t rc; if ((rc = rio_readlineb(rp, usrbuf, maxlen)) < 0) unix_error("Rio_readlineb error"); return rc; } /******************************** * Client/server helper functions ********************************/ /* * open_clientfd - open connection to server at * and return a socket descriptor ready for reading and writing. * Returns -1 and sets errno on Unix error. * Returns -2 and sets h_errno on DNS (gethostbyname) error. */ /* $begin open_clientfd */ int open_clientfd(char *hostname, int port) { int clientfd; struct hostent *hp; struct sockaddr_in serveraddr; if ((clientfd = socket(AF_INET, SOCK_STREAM, 0)) < 0) return -1; /* check errno for cause of error */ /* Fill in the server's IP address and port */ if ((hp = gethostbyname(hostname)) == NULL) return -2; /* check h_errno for cause of error */ bzero((char *) &serveraddr, sizeof(serveraddr)); serveraddr.sin_family = AF_INET; bcopy((char *)hp->h_addr_list[0], (char *)&serveraddr.sin_addr.s_addr, hp->h_length); serveraddr.sin_port = htons(port); /* Establish a connection with the server */ if (connect(clientfd, (SA *) &serveraddr, sizeof(serveraddr)) < 0) return -1; return clientfd; } /* $end open_clientfd */ /* * open_listenfd - open and return a listening socket on port * Returns -1 and sets errno on Unix error. */ /* $begin open_listenfd */ int open_listenfd(int port) { int listenfd, optval=1; struct sockaddr_in serveraddr; /* Create a socket descriptor */ if ((listenfd = socket(AF_INET, SOCK_STREAM, 0)) < 0) return -1; /* Eliminates "Address already in use" error from bind. */ if (setsockopt(listenfd, SOL_SOCKET, SO_REUSEADDR, (const void *)&optval , sizeof(int)) < 0) return -1; /* Listenfd will be an endpoint for all requests to port on any IP address for this host */ bzero((char *) &serveraddr, sizeof(serveraddr)); serveraddr.sin_family = AF_INET; serveraddr.sin_addr.s_addr = htonl(INADDR_ANY); serveraddr.sin_port = htons((unsigned short)port); if (bind(listenfd, (SA *)&serveraddr, sizeof(serveraddr)) < 0) return -1; /* Make it a listening socket ready to accept connection requests */ if (listen(listenfd, LISTENQ) < 0) return -1; return listenfd; } /* $end open_listenfd */ /****************************************** * Wrappers for the client/server helper routines ******************************************/ int Open_clientfd(char *hostname, int port) { int rc; if ((rc = open_clientfd(hostname, port)) < 0) { if (rc == -1) unix_error("Open_clientfd Unix error"); else dns_error("Open_clientfd DNS error"); } return rc; } int Open_listenfd(int port) { int rc; if ((rc = open_listenfd(port)) < 0) unix_error("Open_listenfd error"); return rc; } /* $end csapp.c */ ================================================ FILE: common/csapp.h ================================================ /* $begin csapp.h */ #ifndef __CSAPP_H__ #define __CSAPP_H__ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Default file permissions are DEF_MODE & ~DEF_UMASK */ /* $begin createmasks */ #define DEF_MODE S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH #define DEF_UMASK S_IWGRP|S_IWOTH /* $end createmasks */ /* Simplifies calls to bind(), connect(), and accept() */ /* $begin sockaddrdef */ typedef struct sockaddr SA; /* $end sockaddrdef */ /* Persistent state for the robust I/O (Rio) package */ /* $begin rio_t */ #define RIO_BUFSIZE 8192 typedef struct { int rio_fd; /* descriptor for this internal buf */ int rio_cnt; /* unread bytes in internal buf */ char *rio_bufptr; /* next unread byte in internal buf */ char rio_buf[RIO_BUFSIZE]; /* internal buffer */ } rio_t; /* $end rio_t */ /* External variables */ extern int h_errno; /* defined by BIND for DNS errors */ extern char **environ; /* defined by libc */ /* Misc constants */ #define MAXLINE 8192 /* max text line length */ #define MAXBUF 8192 /* max I/O buffer size */ #define LISTENQ 1024 /* second argument to listen() */ /* Our own error-handling functions */ void unix_error(char *msg); void posix_error(int code, char *msg); void dns_error(char *msg); void app_error(char *msg); /* Process control wrappers */ pid_t Fork(void); void Execve(const char *filename, char *const argv[], char *const envp[]); pid_t Wait(int *status); pid_t Waitpid(pid_t pid, int *iptr, int options); void Kill(pid_t pid, int signum); unsigned int Sleep(unsigned int secs); void Pause(void); unsigned int Alarm(unsigned int seconds); void Setpgid(pid_t pid, pid_t pgid); pid_t Getpgrp(); /* Signal wrappers */ typedef void handler_t(int); handler_t *Signal(int signum, handler_t *handler); void Sigprocmask(int how, const sigset_t *set, sigset_t *oldset); void Sigemptyset(sigset_t *set); void Sigfillset(sigset_t *set); void Sigaddset(sigset_t *set, int signum); void Sigdelset(sigset_t *set, int signum); int Sigismember(const sigset_t *set, int signum); /* Unix I/O wrappers */ int Open(const char *pathname, int flags, mode_t mode); ssize_t Read(int fd, void *buf, size_t count); ssize_t Write(int fd, const void *buf, size_t count); off_t Lseek(int fildes, off_t offset, int whence); void Close(int fd); int Select(int n, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout); int Dup2(int fd1, int fd2); void Stat(const char *filename, struct stat *buf); void Fstat(int fd, struct stat *buf) ; /* Memory mapping wrappers */ void *Mmap(void *addr, size_t len, int prot, int flags, int fd, off_t offset); void Munmap(void *start, size_t length); /* Standard I/O wrappers */ void Fclose(FILE *fp); FILE *Fdopen(int fd, const char *type); char *Fgets(char *ptr, int n, FILE *stream); FILE *Fopen(const char *filename, const char *mode); void Fputs(const char *ptr, FILE *stream); size_t Fread(void *ptr, size_t size, size_t nmemb, FILE *stream); void Fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream); /* Dynamic storage allocation wrappers */ void *Malloc(size_t size); void *Realloc(void *ptr, size_t size); void *Calloc(size_t nmemb, size_t size); void Free(void *ptr); /* Sockets interface wrappers */ int Socket(int domain, int type, int protocol); void Setsockopt(int s, int level, int optname, const void *optval, int optlen); void Bind(int sockfd, struct sockaddr *my_addr, int addrlen); void Listen(int s, int backlog); int Accept(int s, struct sockaddr *addr, socklen_t *addrlen); void Connect(int sockfd, struct sockaddr *serv_addr, int addrlen); /* DNS wrappers */ struct hostent *Gethostbyname(const char *name); struct hostent *Gethostbyaddr(const char *addr, int len, int type); /* Pthreads thread control wrappers */ void Pthread_create(pthread_t *tidp, pthread_attr_t *attrp, void * (*routine)(void *), void *argp); void Pthread_join(pthread_t tid, void **thread_return); void Pthread_cancel(pthread_t tid); void Pthread_detach(pthread_t tid); void Pthread_exit(void *retval); pthread_t Pthread_self(void); void Pthread_once(pthread_once_t *once_control, void (*init_function)()); /* POSIX semaphore wrappers */ void Sem_init(sem_t *sem, int pshared, unsigned int value); void P(sem_t *sem); void V(sem_t *sem); /* Rio (Robust I/O) package */ ssize_t rio_readn(int fd, void *usrbuf, size_t n); ssize_t rio_writen(int fd, void *usrbuf, size_t n); void rio_readinitb(rio_t *rp, int fd); ssize_t rio_readnb(rio_t *rp, void *usrbuf, size_t n); ssize_t rio_readlineb(rio_t *rp, void *usrbuf, size_t maxlen); /* Wrappers for Rio package */ ssize_t Rio_readn(int fd, void *usrbuf, size_t n); void Rio_writen(int fd, void *usrbuf, size_t n); void Rio_readinitb(rio_t *rp, int fd); ssize_t Rio_readnb(rio_t *rp, void *usrbuf, size_t n); ssize_t Rio_readlineb(rio_t *rp, void *usrbuf, size_t maxlen); /* Client/server helper functions */ int open_clientfd(char *hostname, int portno); int open_listenfd(int portno); /* Wrappers for client/server helper functions */ int Open_clientfd(char *hostname, int port); int Open_listenfd(int port); #endif /* __CSAPP_H__ */ /* $end csapp.h */ ================================================ FILE: exercise/00-topic.txt ================================================ ================================================================================ 这个文件收集了《深入理解计算机系统·第二版》的家庭作业的题目。关于这本书的详细信 息如下: 深入理解计算机系统·第二版 Randal E. Bryant David R. O'Hallaron 著 龚奕利 雷迎春 译 机械工业出版社 2012 年 7 月第 1 版第 8 次印刷 ================================================================================ 2.55 在你能够访问的不同机器上,使用 show_bytes(文件 show-bytes.c)编译并运行示 例代码。确定这些机器使用的字节顺序。 2.56 试着用不同的示例值来运行 show_bytes 的代码。 2.57 编写程序 show_short、show_long 和 show_double,它们分别打印类型为 short int、 long int 和 double 的 C 语言对象的字节表示。请试着在几种机器上运行。 2.58 编写过程 is_little_endian,当在小端法机器上编译和运行时返回 1,在大端法机器 上编译运行时返回 0.这个程序应该可以运行在任何机器上,无论机器的字长是多少。 2.59 编写一个 C 表达式,使它生成一个字,由 x 的最低有效字节和 y 中剩下的字节组成。 对于运算数 x=0x89ABCDEF 和 y=0x76543210,就得到 0x765432EF。 2.60 假设我们将一个 w 位的字中的字节从 0(最低位)到 w/8-1(最高位)编号。写出下 面的代码,它会返回一个无符号值,其中参数 x 的字节 i 被替换成字节 b: unsigned replace_byte(unsigned x, unsigned char b, int i); 以下的一些示例,说明了这个函数如何工作: replace_byte(0x12345678, 0xAB, 2) --> 0x12AB5678 replace_byte(0x12345678, 0xAB, 0) --> 0x123456AB 位级整数编码规则 在接下来的作业中,我们特意限制了你能使用的编程结构,来帮你更好地理解 C 语言 的位级、逻辑和算数运算。在回答这些问题时,你的代码必须遵守下面这些规则: o 假设 · 整数用补码形式表示。 · 有符号数的右移是算数右移。 · 数据类型 int 是 w 位长的。对于某些题目,会给定 w 的值,但是在其他情况下, 只要 w 是 8 的整数倍,你的代码就应该能工作。你可以用表达式 sizeof(int)<<3 来计算 w。 o 禁止使用 · 条件语句(if 或者 ?:)、循环、分支语句、函数调用和宏调用。 · 除法、模运算和乘法。 · 相对比较运算符(<、>、<=和>=)。 o 允许的运算 · 所有的位级和逻辑运算。 · 左移和右移,但是位移的数量只能在 0 和 w-1 之间。 · 加法和减法。 · 相等(==)和不相等(!=)测试。(在有些题目中。也不允许这些运算。) · 整形常数 INT_MIN 和 INT_MAX。 · 强制类型转换,无论是显式的还是隐式的。 即使有这些条件的限制,你仍然可以选择描述性的变量名,并且使用注释来描述你的 解决方案的逻辑,尽量提高代码的可读性。例如,下面这段代码从整数参数 x 中抽取 出最高有效字节: /* Get most significant byte from x */ int get_msb(int x) { /* Shift by w-8 */ int shift_val = (sizeof(int)-1) << 3; /* Arithmetic shift */ int xright = x >> shift_val; /* Zero all but LSB */ return xright & 0xFF; } 2.61 写一个 C 表达式,在下列描述的条件下产生 1,而在其他情况下得到 0。假设 x 是 int 类型。 A. x 的任何位都等于 1 B. x 的任何位都等于 0 C. x 的最高有效字节中的位都等于 1 D. x 的最低有效字节中的位都等于 0 代码应该遵循位级整数编码规则,另外还有一个限制,你不能使用相等(==)和不相 等(==)测试。 2.62 编写一个函数 int_shifts_are_logical(),在对 int 类型的数使用算术右移的机器 上运行时,这个函数生成 1,而其他情况下生成 0。你的代码应该可以运行在任何字 长的机器上。在几种机器上测试你的代码。 2.63 将下面的 C 函数代码补充完整。函数 srl 用算术右移(由值 xsra 给出)来完成逻 辑右移,后面的其他操作不包括右移或者除法。函数 sra 用逻辑右移(由值 xsrl 给 出)来完成算术右移,后面的其他操作不包括右移或者除法。可以通过计算 8*sizeof(int) 来确定数据类型 int 中的位数 w。位移量 k 的取值范围位 0~w-1。 int sra(int x, int k) { /* Perform shift logically */ int xsrl = (unsigned) x >> k; . . . } unsigned srl(unsigned x, int k) { /* Perform shift arithmetically */ unsigned xsra = (int) x >> k; . . . } 2.64 写出代码实现如下函数: /* Return 1 when any even bit of x equals 1; 0 otherwise. Assume w=32 */ int any_even_one(unsigned x); 函数应该遵循位级整数编码规则,不过你可以假设数据类型 int 有 w=32 位。 2.65 写出代码实现如下函数: /* Return 1 when x contains an even number of 1s; 0 otherwise. Assume w=32 */ int even_ones(unsigned x); 函数应该遵循位级编码规则,不过你可以假设数据类型 int 有 w=32 位。 你的代码最多只能包含 12 个算术运算、位运算和逻辑运算。 2.66 写出代码实现如下的函数: /* * Generate mask indicating leftmost 1 in x. Assume w=32. * For example 0xFF00 -> 0x8000, and 0x6600 --> 0x4000. * If x = 0, then return 0. */ int leftmost_one(unsigned x); 函数应该遵循位级整数编码规则,不过你可以假设数据类型 int 有 w=32 位。 你的代码最多只能包含 15 个算数运算、位运算和逻辑运算。 提示:先将 x 转换成形如 [0...011...1] 的位向量。 2.67 给你一个任务,编写一个过程 int_size_is_32(),当在一个 int 是 32 位的机器上 运行时,该程序产生 1,而其他情况则产生 0。不允许使用 sizeof 运算符。下面是 开始时的尝试: /* The following code does not run properly on some machines */ int bad_int_size_is_32() { /* Set most significant bit (msb) of 32-bit machine */ int set_msb = 1 << 31; /* Shift past msb of 32-bit word */ int beyond_msb = 1 << 32; /* set_msb is nonzero when word size >= 32 beyond_msb is zero when word size <= 32 */ return set_msb && !beyond_msb; } 当在 SUN SPARC 这样的 32 位机器上编译并运行时,这个过程返回的却是 0。下面的 编译器信息给了我们一个问题的指示: warning: left shift count >= width of type A. 我们的代码在哪个方面没有遵守 C 语言标准? B. 修改代码,使得它在 int 至少为 32 位的任何机器上都能正确运行。 C. 修改代码,使得它在 int 至少位 16 位的任何机器上都能正确运行。 2.68 写出具有如下原型的函数的代码: /* * Make with least signficant n bits set to 1 * Examples: n=6 --> 0x3f, n=17 --> 0x1FFFF * Assume 1 <= n <= w */ int lower_bits(int x, int n); 函数应该遵循位级整数编码规则。要注意 n = w 的情况。 2.69 写出具有如下原型的函数的代码: /* * Do rotating right shift. Assume 0 <= n < w * Examples when x = 0x12345678 and w = 32: * n=4 -> 0x81234567, n=20 -> 0x45678123 */ unsigned rotate_right(unsigned x, int n); 函数应该遵循位级整数编码规则。要注意 n = 0 的情况。 2.70 写出具有如下原型的函数的代码: /* * Return 1 when x can be represented as an n-bit, 2's complement * number; 0 otherwise * Assume 1 <= n <= w */ int fits_bits(int x, int n); 函数应该遵循位级整数编码规则。 2.71 你刚刚开始在一家公司工作,他们要实现一组过程来操作一个数据结构,要将 4 个有 符号字节封装成一个 32 位 unsigned。一个字中的字节从 0(最低有效字节)编号到 3(最高有效字节)。分配给你的任务是:为一个使用补码运算和算数右移的机器编写 一个具有如下原型的函数: /* Declaration of data type where 4 bytes are packed into an unsigned */ typedef unsigned packed_t; /* Extract byte from word. Return as signed integer */ int xbyte(packed_t word, int bytenum); 也就是说,函数会抽取出指定的字节,再把它符号扩展为一个 32 位 int。你的前任 (因为水平不够高而被解雇了)编写了下面的代码: /* Failed attempt at xbyte */ int xbyte(packed_t word, int bytenum) { return (word >> (bytenum << 3)) & 0xFF; } A. 这段代码错在哪里? B. 给出函数的正确实现,只能使用左右移位和一个减法。 2.72 给你一个任务,写一个函数,将整数 val 复制到缓冲区 buf 中,但是只有缓冲区 buf 中,但是只有当缓冲区中有足够可用的空间时,才执行复制。 你写的代码如下: /* Copy integer into buffer if space is available */ /* WARNING: The following code is buggy */ void copy_int(int val, void *buf, int maxbytes) { if (maxbytes-sizeof(val) >= 0) memcpy(buf, (void *)&val, sizeof(val)); } 这段代码使用了库函数 memcpy。虽然在这里用这个函数有点刻意,因为我们只是想复 制一个 int,但是说明了一种复制较大数据结构的常见方法。 你仔细地测试了这段代码后发现,哪怕 maxbytes 很小的时候,它也能把值复制到缓 冲区中。 A. 解释为什么代码中的条件测试总是成功。提示:sizeof 运算符返回类型为 size_t 的值。 B. 你该如何重写这个条件测试,使之工作正确。 2.73 写出具有如下原型的函数的代码: /* Addition that saturates to TMin or TMax */ int saturating_add(int x, int y); 同正常的补码溢出的方式不同,当正溢出时,saturating_add 返回 TMax,负溢出时, 返回 TMin。这种运算常常用在执行数字信号处理的程序中。 你的函数应该遵循位级整数编码规则。 2.74 写出具有如下原型的函数的代码: /* Determine whether subtracting arguments will cause overflow */ int tsub_ovf(int x, int y); 如果计算 x-y 导致溢出,这个函数就返回 1。 2.75 假设我们想要计算 x·y 的完整的 2w 位表示,其中,x 和 y 都是无符号数,并且运 行在数据类型 unsigned 是 w 位的机器上。乘积的低 w 位能够用表达式 x·y 计算, 所以,我们只需要一个具有下列原型的函数: unsigned unsigned_high_prod(unsigned x, unsigned y); 这个函数计算无符号变量 x·y 的高 w 位。 我们使用一个具有下面原型的库函数: int signed_high_prod(int x, int y); 它计算在 x 和 y 采用补码形式的情况下,x·y 的高 w 位。编写代码调用这个过程, 以实现用无符号数为参数的函数。验证你的解答的正确性。 提示:看看等式(2-18)的推导中,有符号乘积 x·y 和无符号乘积 x'·y' 之间的关 系。 2.76 假设我们有一个任务:生成一段代码,将整数变量 x 乘以不同的常数因子 K。为了提 高效率,我们想只使用 +、- 和 << 运算。对于下列的 K 的值,写出执行乘法运算的 C 表达式,每个表达式中最多使用 3 个运算。 A. K=5 B. K=9 C. K=30 D. K=-56 2.77 写出具有如下原型的函数的代码: /* Divide by power of two. Assume 0 <= k < w-1 */ int divide_power2(int x, int k); 该函数要用正确的舍入计算 x/2^k,并且应该遵循位级整数编码规则。 2.78 写出函数 mul5div8 的代码,对于整数参数 x,计算 5*x/8,但是要遵循位级整数编 码规则。你的代码计算 5*x 也会产生溢出。 2.79 写出函数 fiveeighths 的代码,对于整数参数 x,计算 5/8x 的值,向零舍入。它不 会溢出。函数应该遵循整数位级编码规则。 2.80 编写 C 表达式产生如下位模式,其中 a^n 表示符号 a 重复 n 次。假设一个 w 位的 数据类型。你的代码可以包含对参数 m 和 n 的引用,它们分别表示 m 和 n 的值,但 是不能使用表示 w 的参数。 A. 1^(w-n)0^n B. 0^(w-n-m)1^n0^m 2.81 我们在一个 int 类型值为 32 位的机器上运行程序。这些值以补码形式表示,而且它 们都是算术右移的。unsigned 类型的值也是 32 位的。 我们产生随机数 x 和 y,并且把它们转换成无符号数,显示如下: /* Create some arbitrary values */ int x = random(); int y = random(); /* Convert to unsigned */ unsigned ux = (unsigned) x; unsigned uy = (unsigned) y; 对于下列每个 C 表达式,你要指出表达式是否总是为 1。如果它总是为 1,那么请描 述其中的数学原理。否则,列举一个使它为 0 的参数示例。 A. (x > y) == (-x < -y) B. ((x + y) << 5) + x - y == 31 * y + 33 * x C. ~x + ~y == ~(x + y) D. (int)(ux - uy) == -(y - x) E. ((x >> 1) << 1) <= x 2.82 一些数字的二进制表示是由形如 0.yyyyyy... 的无穷串组成的,其中 y 是一个 k 位 的序列。例如,1/3 的二进制表示是 0.01010101...(y=01),而 1/5 的二进制表示 是0.001100110011(y=0011)。 A. 设 Y=B2Uk(y),也就是说,这个数具有二进制表示 y。给出一个由 Y 和 k 组成的 公式表示这个无穷串的值。提示:请考虑将二进制小数点右移 k 位的结果。 B. 对于下列 y 的值,串的数值是多少? (a) 001 (b) 1001 (c) 000111 2.83 填写下列程序的返回值,这个程序是测试它的第一个参数是否大于或者等于第二个参 数。假定函数 f2u 返回一个无符号 32 位数字,其位表示与它的浮点参数相同。你可 以假设两个参数都不是 NaN。两种 0,+0 和 -0 都认为是相等的。 int float_ge(float x, float y) { unsigned ux = f2u(x); unsigned uy = f2u(y); /* Get the sign bits */ unsigned sx = ux >> 31; unsigned sy = uy >> 31; /* Given an expression using only ux, uy, sx, sy */ return ________; } 2.84 给定一个浮点格式,有 k 位指数和 n 位小数,对于小列数,写出阶码 E、尾数 M、 小数 f 和值 V 的公式。另外,请描述其位表示。 A. 数 5.0。 B. 能够被准确描述的最大奇整数。 C. 最小的规格化数的倒数。 2.85 与 Intel 兼容的处理器也支持“扩展精度”浮点形式,这种格式具有 80 位字长,被 分成 1 个符号位、k = 15 个阶码位、1 个单独的整数位和 n = 63 个小数位。整数 位是 IEEE 浮点表示中隐含位的显式副本。也就是说,对于规格化的值它等于 1,对 于非规格化的值它等于 0。填写小表,给出用这种格式表示的一些“有趣的”数字的 近似值。 | 描述 | 扩展精度 | | | | 值 | 十进制 | | 最小的正非规格化数 | | | | 最小的正规格化数 | | | | 最大的规格化数 | | | 2.86 考虑一个基于 IEEE 浮点格式的 16 位浮点表示,它具有 1 个符号位、7 个阶码位 (k=7)和 8 个小数位(n=8)。阶码偏置量是 2^(7-1)-1 = 63。 对于每个给定的数,填写下表,其中每一列具有如下指示说明: Hex: 描述编码形式的 4 个十六进制数字。 M: 尾数的值。这应该是一个形如 x 或 x/y 的数,其中 x 是一个整数,而 y 是 2 的整数幂。例如,0、67/64 和 1/25。 E: 阶码的整数值。 V: 所表示的数字值。使用 x 或者 x * 2^z 表示,其中 x 和 z 都是整数。 举一个例子,为了表示 7/2,我们有 s=0,M=7/4 和 E=1。因此这个数的阶码字段为 0x40(十进制值 63+1=64),尾数字段为 0xC0(二进制 1100 0000_2),得到一个十 六进制的表示 40C0。 标记为“--”的条目不用填写。 |--------------------------+-----+----+----+----| | 描述 | Hex | M | E | V | |--------------------------+-----+----+----+----| | -0 | | | | -- | | 最小的值 >1 | | | | | | 256 | | | | -- | | 最大的非规格化数 | | | | | | -oo | | -- | -- | -- | | 十六进制表示为 3AA0 的数 | | | | | |--------------------------+-----+----+----+----| 2.87 考虑下面两个基于 IEEE 浮点格式的 9 位浮点表示。 1. 格式 A o 有一个符号位 o 有 k=5 个阶码位。阶码偏置量是 15。 o 有 n=3 个小数位。 2. 格式 B o 有一个符号位 o 有 k=4 个阶码位。阶码偏置量是 7。 o 有 n=4 个小数位。 下面给出了一些格式 A 表示的位模式,你的任务是把它们转换成最接近的格式 B 表 示的值。如果需要舍入,你要向 +oo 舍入。另外,给出格式 A 和格式 B 表示的位模 式对应的值。要么是整数(例如,17),要么是小数(例如,17/64 或 17/2^6)。 |-------------+-------+-------------+-------| | 格式A | | 格式B | | |-------------+-------+-------------+-------| | 位 | 值 | 位 | 值 | |-------------+-------+-------------+-------| | 1 01110 001 | -9/16 | 1 0110 0010 | -9/16 | | 0 10110 101 | | | | | 1 00111 110 | | | | | 0 00000 101 | | | | | 1 11011 000 | | | | | 0 11000 100 | | | | |-------------+-------+-------------+-------| 2.88 我们在一个 int 类型为 32 位补码表示的机器上运行程序。float 类型的值使用 32 位 IEEE 格式,而 double 类型的值使用 64 位 IEEE 格式。 我们产生随机数 x、y 和 z,并且把它们转换成 double 类型的值: /* Create some arbitrary values */ int x = random(); int y = random(); int z = random(); /* Convert to double */ double dx = (double)x; double dy = (double)y; double dz = (double)z; 对于下列的每个 C 表达式,你要指出表达式是否总是为 1。如果它总是为 1,描述其 中的数学原理。否则,列举出使它为 0 的参数的例子。请注意,不能使用 IA32 机器 运行 GCC 来测试你的答案,因为对于 float 和 double,它使用的都是 80 位的扩展 精度表示。 A. (double)(float)x == dx B. dx + dy == (double)(x+y) C. dx + dy + dz == dz + dy + dx D. dx * dy * dz == dz * dy * dx E. dx / dx == dy / dy 2.89 分配给你一个任务,编写一个 C 函数来计算 2^x 的浮点表示。你意识到完成这个任 务的最好方法是直接创建结果的 IEEE 单精度表示。当 x 太小时,你的程序将返回 0.0。当 x 太大时,它会返回 +oo。填写下列代码的空白部分,以计算出正确的结果。 假设函数 u2f 返回的浮点值与它的无符号参数有相同的位表示。 2.90 大约在公元前 250 年,希腊数学家阿基米德证明了 223/71 < PI < 22/7。如果当时 有一台计算机和标准库 ,他就能够确定 n 的单精度浮点近似值的十六进制 表示为 0x40490FDB。当然,所有的这些都只是近似值,因为 PI 不是有理数。 A. 这个浮点值表示的二进制小数是多少? B. 22/7 的二进制小数表示是什么?提示:参见家庭作业 2.82。 C. 这两个 22/7 的近似值从哪一位(相对于二进制小数点)开始不同的? 位级浮点编码规则 在接下来的题目中,你要写的代码要实现浮点函数在浮点数的位级表示上直接运算。 你的代码应该完全遵循 IEEE 浮点运算的规则,包括当需要舍入时,要使用向偶数舍 入的方式。为此,我们定义数据类型 float_bits 等价于 unsigned: /* Access bit-level representation floating-point number */ typedef unsigned float_bits; 你的代码中不使用数据类型 float,而要使用 float_bits。你可以使用数据类型 int 和 unsigned,包括无符号和整数常数和运算。你不可以使用任何联合、结构和数组。 更重要的是,你不能使用任何浮点数据类型、运算或者常数。取而代之的是,你的代 码应该执行实现这些指定的浮点运算的位操作。 下面的函数说明了对这些规则的使用。对于参数 f,如果 f 是非规格化的,该函数返 回 ±0(保持 f 的符号),否则,返回 f。 /* If f is denorm, return 0. Otherwise, return f */ float_bits float_denorm_zero(float_bits f) { /* Decompose bit representation into parts */ unsigned sign = f>>31; unsigned exp = f>>23 & 0xFF; unsigned frac = f & 0x7FFFFF; if (exp == 0) { /* Denormalized. Set fraction to 0 */ frac = 0; } /* Reassemble bits */ return (sign << 31) | (exp << 23) | frac; } 2.91 遵循位级浮点编码规则,实现具有如下原型的函数: /* Compute |f|. If f is NaN, then return f. */ float_bits float_absval(float_bits f); 对于浮点数 f,这个函数计算 |f|。如果 f 是 NaN,你的函数应该简单地返回 f。测 试你的函数,对参数 f 可以取的所有 2^32 个值求值,将结果与你实用机器的浮点运 算得到的结果相比较。 2.92 遵循位级浮点编码规则,实现具有如下原型的函数: /* Compute -f. If f is Nan, then return f. */ float_bits float_negate(float_bits f); 对于浮点数 f,这个函数计算 -f。如果 f 是 NaN,你的函数应该简单地返回 f。测试 你的函数,对参数 f 可以取的所有 2^32 个值求值,将结果与你使用机器的浮点运算 得到的结果相比较。 2.93 遵循位级浮点编码规则,实现具有如下原型的函数: /* Compute 0.5*f. If f is NaN, then return f. */ float_bits float_half(float_bits f); 对于浮点数 f,这个函数计算 0.5*f。如果 f 是 NaN,你的函数应该简单返回 f。测 试你的函数,对参数 f 可以取的所有 2^32 个值求值,将结果与你使用机器的浮点运 算得到的结果相比较。 2.94 遵循位级浮点编码规则,实现具有如下原型的函数: /* Compute 2*f. If f is NaN, then return f. */ float_bits float_twice(float_bits f); 对于浮点数 f,这个函数计算 2.0*f。如果 f 是 NaN,你的函数应该简单地返回 f。 测试你的函数,对参数 f 可以取的所有 2^32 个值求值,将结果与你使用机器的浮点 运算得到的结果相比较。 2.95 遵循位级浮点编码规则,实现具有如下原型的函数: /* Compute (float) i */ float_bits float_i2f(int i); 对于参数 i,这个函数计算 (float)i 的位级表示。 测试你的函数,对参数 f 可以取的所有 2^32 个值求值,将结果与你使用机器的浮点 运算得到的结果相比较。 2.96 遵循位级浮点编码规则,实现具有如下原型的函数: /* * Compute (int) f. * If conversion causes overflow or f is NaN, return 0x80000000 */ int float_f2i(float_bits f); 对于浮点数 f,这个函数计算 (int)f。你的函数应该向零舍入。如果f 不能用整数表示 (例如,超出表示范围,或者它是一个 NaN),那么函数应该返回0x80000000。测试你 的函数,对参数 f 可以取的所有 2^32 个值求值,将结果与你使用机器的浮点运算得到 的结果相比较。 3.54 一个函数的原型为 int decode2(int x, int y, int z); 将这个函数编译成 IA32 汇编代码。代码体如下: # x at %ebp+8, y at %ebp+12, z at %ebp+16 1 movl 16(%ebp), %edx 2 subl 12(%ebp), %edx 3 movl %edx, %eax 4 sall $15, %eax 5 sarl $15, %eax 6 xorl 8(%ebp), %edx 7 imull %edx, %eax 参数 x、y 和 z 存放在存储器中相对于寄存器 %ebp 中地址偏移量为 8、12 和 16 的地方。代码将返回值存放在寄存器 %eax 中。 写出等价于我们汇编代码的 decode2 的 C 代码。 3.55 下面的代码计算 x 和 y 的乘积,并将结果存放在存储器中。数据类型 ll_t 被定义 为等价于 long long。 typedef long long ll_t; void store_prod(ll_t *dest, ll_t x, int y) { *dest = x * y; } GCC 生成下面的汇编代码实现计算: # dest at %ebp+8, x at %ebp+12, y at %ebp+20 1 movl 12(%ebp), %esi 2 movl 20(%ebp), %eax 3 movl %eax, %edx 4 sarl $31, %edx 5 movl %edx, %ecx 6 imull %esi, %ecx 7 movl 16(%ebp), %ebx 8 imull %eax, %ebx 9 addl %ebx, %ecx 10 mull %esi 11 leal (%ecx, %edx), %edx 12 movl 8(%ebp), %ecx 13 movl %eax, (%ecx) 14 movl %edx, 4(%ecx) 这段代码用了三个乘法来实现多精度运算,这个多精度运算是在 32 位机器上实现 64 位运算所需要的。描述用来计算这个乘积的算法,并对汇编代码添加注释,说明它是 如何实现你的算法的。 提示:参考练习题 3.12 及其答案。 3.56 考虑下面的汇编代码: # x at %ebp+8, n at %ebp+12 1 movl 8(%ebp), %esi 2 movl 12(%ebp), %ebx 3 movl $1431655765, %edi 4 movl $-2147483648, %edx 5 .L2: 6 movl %edx, %eax 7 andl %esi, %eax 8 xorl %eax, %edi 9 movl %ebx, %ecx 10 shrl %cl, %edx 11 testl %edx, %edx 12 jne .L2 13 movl %edi, %eax 以上代码是以下整体形式的 C 代码编译产生的: 1 int loop(int x, int n) 2 { 3 int result = ____; 4 int mask; 5 for (mask = ____; mask ____; mask = ____) { 6 result ^= ____; 7 } 8 return result; 9 } 你的任务是填写这个 C 代码中缺失的部分,得到一个程序等价于产生的汇编代码。回 想一下,这个函数的结果是在寄存器 %eax 中返回的。你会发现以下工作很有帮助: 检查循环之前、之中和之后的汇编代码,形成一个寄存器和程序变量之间一致的映射。 A. 哪个寄存器保存着程序值 x、n、result 和 mask? B. result 和 mask 的初始值是什么? C. mask 的测试条件是什么? D. mask 是如何被修改的? E. result 是如何被修改的? F. 填写这段 C 代码中所有缺失的部分。 3.57 在 3.6.6 节,我们查看了下面的代码,作为使用条件数据传输的一种选择: int cread(int *xp) { return (xp ? *xp : 0); } 我们给出了使用条件传送指令的一个尝试实现,但是认为它是不合法的,因为它试图 从一个空地址读数据。 写一个 C 函数 cread_alt,它与 cread 有一样的行为,除了它可以被编译成使用条 件数据传送。当用命令行选项 '-march=i686' 来编译时,产生的代码应该使用条件传 送指令而不是某种跳转指令。 3.58 下面的代码是在一个开关语句中根据枚举类型值进行分支选择的例子。回忆一下,C 语言中枚举类型只是一种引入一组与整数值相对应的名字的方法。默认情况下,值是 从 0 向上依次赋给名字的。在我们的代码中,省略了与各种情况标号相对应的动作。 /* Enumerated type creates set of constants numbered 0 and upward */ typedef enum { MODE_A, MODE_B, MODE_C, MODE_D, MODE_E } mode_t; int switch3(int *p1, int *p2, mode_t action) { int result = 0; switch (action) { case MODE_A: case MODE_B: case MODE_C: case MODE_D: case MODE_E: default: } return result; } 产生的实现各个动作的汇编代码部分如下所示。注释指明了参数位置,寄存器值,以 及各个跳转目的的情况标号。寄存器 %edx 对应于程序变量 result,并被初始化为 -1。填写 C 代码中缺失的部分。注意那些会落入其他情况中的情况。 # Arguments: p1 at %ebp+8, p2 at %ebp+12, action at %ebp+16 # Registers: result in %edx (initialized to -1) # The jump targets: 1 .L17: # MODE_E 2 movl $17, %edx 3 jmp .L19 4 .L13: # MODE_A 5 movl 8(%ebp), %eax 6 movl (%eax), %edx 7 movl 12(%ebp), %ecx 8 movl (%ecx), %eax 9 movl 8(%ebp), %ecx 10 movl %eax, (%ecx) 11 jmp .L19 12 .L14: # MODE_B 13 movl 12(%ebp), %edx 14 movl (%edx), %eax 15 movl %eax, %edx 16 movl 8(%ebp), %ecx 17 addl (%ecx), %edx 18 movl 12(%ebp), %eax 19 movl %edx, (%eax) 20 jmp .L19 21 .L15: # MODE_C 22 movl 12(%ebp), %edx 23 movl $15, (%edx) 24 movl 8(%ebp), %ecx 25 movl (%ecx), %edx 26 jmp .L19 27 .L16: # MODE_D 28 movl 8(%ebp), %edx 29 movl (%edx), %eax 30 movl 12(%ebp), %ecx 31 movl %eax, (%ecx) 32 movl $17, %edx 33 .L19: # default 34 movl %edx, %eax # Set return value 上面的代码实现了 switch 语句的各个分支 3.59 这个程序给你一个机会,逆向工程一个 switch 语句。在下面这个过程中,去掉了 switch 语句的主体: int switch_prob(int x, int n) { int result = x; switch (n) { /* Fill in code here */ } return result; } 下面给出了这个过程的反汇编机器代码。我们可以看到,在第 4 行,参数 n 被加载 到寄存器 %eax 中。 1 08048420 : 2 8048420: 55 push %ebp 3 8048421: 89 e5 mov %esp, %ebp 4 8048423: 8b 45 0c mov 0xc(%ebp), %eax 5 8048426: 83 e8 28 sub $0x28, %eax 6 8048429: 83 f8 05 cmp $0x5, %eax 7 804842c: 77 07 ja 8048435 8 804842e: ff 24 85 f0 85 04 08 jmp *0x80485f0(,%eax,4) 9 8048435: 8b 45 08 mov 0x8(%ebp), %eax 10 8048438: eb 24 jmp 804845e 11 804843a: 8b 45 08 mov 0x8(%ebp), %eax 12 804843d: 8d 76 00 lea 0x0(%esi), %esi 13 8048440: eb 19 jmp 804845b 14 8048442: 8b 45 08 mov 0x8(%ebp), %eax 15 8048445: c1 e0 03 shl $0x3, %eax 16 8048448: eb 17 jmp 8048461 17 804844a: 8b 45 08 mov 0x8(%ebp), %eax 18 804844d: c1 f8 03 sar $0x3, %eax 19 8048450: eb 0f jmp 8048461 20 8048452: 8b 45 08 mov 0x8(%ebp), %eax 21 8048455: c1 e0 03 shl $0x3, %eax 22 8048458: 2b 45 08 sub 0x8(%ebp), %eax 23 804845b: 0f af c0 imul %eax, %eax 24 804845e: 83 c0 11 add $0x11, %eax 25 8048461: 5d pop %ebp 26 8048462: c3 ret 跳转表驻留在另一个存储器区域中。可以从第 8 行的间接跳转看出,跳转表的起始地 址为 0x80485f0。用调试器 GDB,我们可以用命令 x/6w 0x80485f0 来检查存储器中 的 6 个 4 字节的字。GDB 打印下面的内容: (gdb) x/6w 0x80485f0 0x80485f0: 0x08048442 0x08048435 0x08048442 0x0804844a 0x8048600: 0x08048452 0x0804843a 用 C 代码填写开关语句的主体,使它的行为与机器代码一致。 3.60 考虑下面的源代码,这里 R、S 和 T 都是用 #define 声明的常数: int A[R][S][T]; int store_ele(int i, int j, int k, int *dest) { *dest = A[i][j][k]; return sizeof(A); } 编译这个程序,GCC 产生下面的汇编代码: ; i at %ebp+8, j at %ebp+12, k at %ebp+16, dest at %ebp+20 1 movl 8(%ebp), %ecx 2 movl 12(%ebp), %eax 3 leal (%eax,%eax,8), %eax 4 movl %ecx, %edx 5 sall $6, %edx 6 subl %ecx, %edx 7 addl %edx, %eax 8 addl 16(%ebp), %eax 9 movl A(,%eax,4), %edx 10 movl 20(%ebp), %eax 11 movl %edx, (%eax) 12 movl $2772, %eax A. 将等式(3-1)从二维扩展到三维,提供数组元素 A[i][j][k] 的位置的公式。 B. 运用你的逆向工程技术,根据汇编代码,确定 R、S 和 T 的值。 3.61 C 编译器为 var_prod_ele 产生的代码不能将它在循环中使用的所有值都放进寄存器中, 因此它必须在每次循环时都从存储器中读出 n 的值。写出这个函数的 C 代码,使用类 似于 GCC 执行的那些优化,但是它的编译代码不会让循环值溢出到存储器中。 /* Compute i,k of variable matrix product */ int var_prod_ele(int n, int A[n][n], int B[n][n], int i, int k) { int j; int result = 0; for (j = 0; j < n; j++) result += A[i][j] * B[j][k]; return result; } 回忆一下,处理器只有 6 个寄存器可用来保存临时数据,因为寄存器 %ebp 和 %esp 不能用于此目的。其中一个寄存器还必须用来保存乘法指令的结果。因此,你必须把 循环中的值的数量从 6 个(result、Arow、Bcol、j、n 和 4*n)减少到 5 个。 需要找到一个对你那种编译器行之有效的策略。不断尝试各种不同的策略,直到有一 种能工作。 3.62 下面的代码转置一个 MxM 矩阵的元素,这里 M 是一个用 #define 定义的常数: void transpose(int A[M][M]) { int i,j; for (i = 0; i < M; i++) for (j = 0; j < i; j++) { int t = A[i][j]; A[i][j] = A[j][i]; A[j][i] = t; } } 当用优化等级 -O2 编译时,GCC 为这个函数的内部循环产生下面的代码: 1 .L3: 2 movl (%ebx), %eax 3 movl (%esi,%ecx,4), %edx 4 movl %eax, (%esi,%ecx,4) 5 addl $1, %ecx 6 movl %edx, (%ebx) 7 addl $76, %ebx 8 cmpl %edi, %ecx 9 jl .L3 A. M 的值是多少? B. 哪个寄存器保持着程序值 i 和 j? C. 写 transpose 的一个 C 代码版本,使用在这个循环中出现的优化。在你的代码中, 使用参数 M,而不要使用常数值。 3.63 考虑下面的源代码,这里 E1 和 E2 是用 #define 声明的宏表达式,计算用参数 n 表示的矩阵 A 的维度。这段代码计算矩阵的第 j 列的元素之和。 int sum_col(int n, int A[E1(n)][E2(n)], int j) { int i; int result = 0; for (i = 0; i < E1(n); i++) result += A[i][j]; return result; } 编译这个程序,GCC 产生下面的汇编代码: ; n at %ebp+8, A at %ebp+12, j at %ebp+16 1 movl 8(%ebp), %edx 2 leal (%edx,%edx), %eax 3 leal -1(%eax), %ecx 4 leal (%eax,%edx), %esi 5 movl $0, %eax 6 testl %esi, %esi 7 jle .L3 8 leal 0(,%ecx,4), %ebx 9 movl 16(%ebp), %eax 10 movl 12(%ebp), %edx 11 leal (%edx,%eax,4), %ecx 12 movl $0, %edx 13 movl $0, %eax 14 .L4: 15 addl (%ecx), %eax 16 addl $1, %edx 17 addl %ebx, %ecx 18 cmpl %esi, %edx 19 jl .L4 20 .L3: 运用你的逆向工程技术,确定 E1 和 E2 的定义。 3.64 这个作业要查看 GCC 为参数和返回值中有结构的函数产生的代码,由此可以看到通常 这些语句特性是如何实现的。 下面的 C 代码是函数 word_sum,它用结构作为参数和返回值,还有一个函数 diff 调用 word_sum。 typedef struct { int *p; int v; } str1; typedef struct { int prod; int sum; } str2; str2 word_sum(str1 s1) { str2 result; result.prod = *s1.p * s1.v; result.sum = *s1.p + s1.v; return result; } int diff(int x, int y) { str1 s1; str2 s2; s1.p = &x; s1.v = y; s2 = word_sum(s1); return s2.prod - s2.sum; } GCC 为这两个函数产生下面的代码: 1 word_sum: 1 diff: 2 pushl %ebp 2 pushl %ebp 3 movl %esp, %ebp 3 movl %esp, %bp 4 pushl %ebx 4 subl $20, %esp 5 movl 8(%ebp), %eax 5 leal -8(%ebp), %edx 6 movl 16(%ebp), %ebx 6 leal 8(%ebp), %eax 7 movl 12(%ebp), %edx 7 movl %eax, 4(%esp) 8 movl (%edx), %edx 8 movl 12(%ebp), %eax 9 leal (%edx,%ebx), %ecx 9 movl %eax, 8(%esp) 10 movl %ecx, 4(%eax) 10 movl %edx, (%esp) 11 imull %ebx, %edx 11 call word_sum 12 movl %edx, (%eax) 12 subl $4, %esp 13 popl %ebx 13 movl -8(%ebp), %eax 14 popl %ebp 14 subl -4(%ebp), %eax 15 ret $4 15 leave 16 16 ret 指令 ret $4 很像普通的返回指令,但是它将栈指针增加了 8(4 个是为了返回地址, 加上 4 的加法),而不是 4。 A. 从 word_sum 代码的第 5~7 行我们可以看到,虽然函数只有一个参数,但是看上 去好像从栈中取出了 3 个值。描述这三个值分别是什么。 B. 从 diff 代码的第 4 行我们可以看到,栈桢中分配了 20 个字节。把他们当作 5 个字段来使用,每个字段 4 个字节。描述每个字段都是怎么用的。 C. 你要如何描述向函数传递结构参数的通用策略? D. 你要如何描述处理从函数返回结构值的通用策略? 3.65 在下面的代码中,A 和 B 是用 #define 定义的常数: typedef struct { short x[A][B]; /* Unknown constants A and B */ int y; } str1; typedef struct { char array[B]; int t; short s[B]; int u; } str2; void setVal(str1 *p, str2 *q) { int v1 = q->t; int v2 = q->u; p->y = v1 + v2; } GCC 为 setVal 的主体产生下面的代码: movl 12(%ebp), %eax movl 28(%eax), %edx addl 8(%eax), %edx movl 8(%ebp), %eax movl %edx, 44(%eax) A 和 B 的值是多少?(答案是唯一的。) 3.66 你负责维护一个大型的 C 程序时,遇到下面这样的代码: 1 typedef struct { 2 int left; 3 a_struct a[CNT]; 4 int right; 5 } b_struct; 6 7 void test(int i, b_struct *bp) 8 { 9 int n = bp->left + bp->right; 10 a_struct *ap = &bp->a[i]; 11 ap->x[ap->idx] = n; 12 } 编译时常数 CNT 和结构 a_struct 的声明在一个你没有访问权限的文件中。幸好,你有代 码的'.o'版本,可以用 objdump 程序来反汇编这些文件,得到下面的反汇编代码: 1 00000000 : 2 0: 55 push %ebp 3 1: 89 e5 mov %esp, %ebp 4 3: 53 push %ebx 5 4: 8b 45 08 mov 0x8(%ebp), %eax 6 7: 8b 4d 0c mov 0xc(%ebp), %ecx 7 a: 6b d8 1c imul $0x1c, %eax, %ebx 8 d: 8d 14 c5 00 00 00 00 lea 0x0(,%eax,8), %edx 9 14: 29 c2 sub %eax, %edx 10 16: 03 54 19 04 add 0x4(%ecx,%ebx,1), %edx 11 1a: 8b 81 c3 00 00 00 mov 0xc8(%ecx), %eax 12 20: 03 01 add (%ecx), %eax 13 22: 89 44 91 08 mov %eax, 0x8(%ecx,%edx,4) 14 25: 5b pop %ebx 15 27: 5d pop %ebp 16 28: c3 ret 运用你的逆向工程技术,推断下列的内容: A. CNT 的值 B. 结构 a_struct 的完成声明。假设这个结构只有字段 idx 和 x。 3.67 考虑下面的联合声明: union ele { struct { int *p; int x; } e1; struct { int y; union ele *next; } e2; }; 这个声明说明联合中可以嵌套结构。 下面的过程(省略了一些表达式)对一个链表进行操作,链表是以上述联合作为元素的: void proc(union ele *up) { up->____ = *(up->____) - up->____; } A. 小列字段的偏移量是多少(以字节为单位): e1.p: ____ e2.x: ____ e2.y: ____ e2.next: ____ B. 这个结构总共需要多少个字节? C. 编译器为 proc 的主体产生下面的汇编代码: ; up at %ebp+8 1 movl 8(%ebp), %edx 2 movl 4(%edx), %ecx 3 movl (%ecx), %eax 4 movl (%eax), %eax 5 subl (%edx), %eax 6 movl %eax, 4(%ecx) 在这些信息的基础上,填写 proc 代码中缺失的表达式。提示:有些联合引用的解释可以有 歧义。当你清楚引用指引到哪里的时候,就能够澄清这些歧义。只有一个答案,不需要进行 强制类型转换,且不违反任何类型限制。 3.68 写一个函数 good_echo,它从标准输入读取一行,再把它写到标准输出。你的实现应 该对任意长度的输入行都能工作。可以使用库函数 fgets,但是你必须确保即使当输 入行要求比你应该为缓冲区分配的更多的空间时,你的函数也能正确地工作。你的代 码还应该检查错误条件,要在遇到 1 时返回。参考标准 I/O 函数的定义文档[48, 58]。 3.69 下面的声明定义了一类结构,用来构建二叉树: 1 typedef struct ELE *tree_ptr; 2 3 struct ELE { 4 tree_ptr left; 5 tree_ptr right; 6 long val; 7 }; 对于具有如下原型的函数: long trace(tree_ptr tp); GCC 产生下面的 x86-64 代码: 1 trace: ; tp in %rdi 2 movl $0, %eax 3 testq %rdi, %rdi 4 je .L3 5 .L5: 6 movq 16(%rdi), %rax 7 movq (%rdi), %rdi 8 testq %rdi, %rdi 9 jne .L5 10 .L3: 11 rep 12 ret A. 给出一个该函数的 C 版本,使用 while 循环。 B. 用自然语言解释这个函数计算的是什么。 3.70 用家庭作业 3.69 中的树结构,以及一个具有以下原型的函数 long traverse(tree_ptr tp) GCC 产生下面的 x86-64 代码: 1 traverse: ; tp in %rdi 2 movq %rbx, -24(%rsp) 3 movq %rbp, -16(%rsp) 4 movq %r12, -8(%rsp) 5 subq $24, %rsp 6 movq %rdi, %rbp 7 movabsq $9223372036854775807, %rax 8 test %rdi, %rdi 9 je .L9 10 movq 16(%rdi), %rbx 11 movq (%rdi), %rdi 12 call traverse 13 movq %rax, %r12 14 movq 8(%rbp), %rdi 15 call traverse 16 cmpq %rax, %r12 17 cmovle %r12, %rax 18 cmpq %rbx, %rax 19 cmovg %rbx, %rax 20 .L9: 21 movq (%rsp), %rbx 22 movq 8(%rsp), %rbp 23 movq 16(%rsp), %r12 24 addq $24, %rsp 25 ret A. 生成这个函数的 C 版本。 B. 用自然语言解释这个函数计算的是什么。 7.6 考虑下面的 swap.c 函数版本,它计算自己被调用的次数: 1 extern int buf[]; 2 3 int *bufp0 = &buf[0]; 4 static int *bufp1; 5 6 static void incr() 7 { 8 static int count = 0; 9 10 count++; 11 } 12 13 void swap() 14 { 15 int temp; 16 17 incr(); 18 bufp1 = &buf[1]; 19 temp = *bufp0; 20 *bufp0 = *bufp1; 21 *bufp1 = temp; 22 } 对于每个 swap.o 中定义和引用的符号,请指出它是否在模块 swap.o 的 .symtab 节中有 符号表条目。如果是这样,请指出定义该符号的模块(swap.o 或 main.o)、符号类型(本 地、全局或外部)以及它在模块中所处的节(.text、.data 或 .bss)。 |-------+----------------------+----------+----------------+----| | 符号 | .swap.o.symtab条目? | 符号类型 | 定义符号的模块 | 节 | |-------+----------------------+----------+----------------+----| | buf | | | | | |-------+----------------------+----------+----------------+----| | bufp0 | | | | | |-------+----------------------+----------+----------------+----| | bufp1 | | | | | |-------+----------------------+----------+----------------+----| | swap | | | | | |-------+----------------------+----------+----------------+----| | temp | | | | | |-------+----------------------+----------+----------------+----| | incr | | | | | |-------+----------------------+----------+----------------+----| | count | | | | | |-------+----------------------+----------+----------------+----| 7.7 不改变任何变量名字,修改 7.6.1 节中的 bar5.c,使得 foo5.c 输出 x 和 y 的正确 值(也就是整数 15213 和 15212 的十六进制表示)。 7.8 在此题中,REF(x,i) --> DEF(x,k) 表示链接器将任意对模块 i 中符号 x 的引用与模 块 k 中符号 x 的定义相关联。在下面每个例子中,用这种符号来说明链接器是如何解 析在每个模块中有多重定义的引用的。如果出现链接时错误(规则 1),写“ERROR”。 如果链接器从定义中任意选择一个(规则 3),那么写“UNKNOWN”。 A. /* Module 1*/ /* Module 2 */ int main() static int main = 1; { int p2() } { } a) REF(main.1) --> DEF(____.____) b) REF(main.2) --> DEF(____.____) B. /* Module 1 */ /* Module 2 */ int x; double x; void main() int p2() { { } } a) REF(x.1) --> DEF(____.____) b) REF(x.2) --> DEF(____.____) C. /* Module 1 */ /* Module 2 */ int x = 1; double x = 1.0; void main() int p2() { { } } a) REF(x.1) --> DEF(____.____) b) REF(x.2) --> DEF(____.____) 7.9 考虑下面的程序,它由两个目标模块组成: 1 /* foo6.c */ 1 /* bar6.c */ 2 void p2(void); 2 #include 3 3 4 int main() 4 char main; 5 { 5 6 p2(); 6 void p2() 7 return 0; 7 { 8 } 8 printf("0x%x\n", main); 9 } 当在 Linux 系统中编译和执行这个程序时,即使 p2 不初始化变量 main,它也能打印字符 串“0x55\n”并正常终止。你能解释这一点吗? 7.10 a 和 b 表示当前路径中的目标模块或静态库,而 a->b 表示 a 依赖于 b,也就是说a 引用了一个 b 定义的符号。对于下面的每个场景,给出使得静态链接器能够解析所有 符号引用的最小的命令行(含有最少数量的目标文件和库参数的命令): A. p.o -> libx.a -> p.o B. p.o -> libx.a -> liby.a 和 liby.a -> libx.a C. p.o -> libx.a -> liby.a 和 liby.a -> libx.a -> libz.a 7.11 图 7-12 中的段头部表明数据段占用了存储器中 0x104 个字节。然后,只有开始的 0xe8 字节来自可执行文件的节。引起这种差异的原因是什么? 7.12 图 7-10 中的 swap 程序包含 5 个重定位的引用。对于每个重定位的引用,给出它在 图 7-10 中的行号、运行时存储器地址和值。swap.o 模块中的原始代码和重定位条目 如图7-19 所示。 1 0000000 : 2 0: 55 push %ebp 3 1: 8b 15 00 00 00 00 mov 0x0,%edx ; Get *bufp0 = &buf[0] 4 3: R_386_32 bufp0 ; Relocation entry 5 7: a1 04 00 00 00 mov 0x4,%eax ; Get buf[1] 6 8: R_386_32 buf ; Relocation entry 7 c: 89 e5 mov %esp, %ebp 8 e: c7 05 00 00 00 00 04 movl $0x4, 0x0 ; bufp1 = &buf[1]; 9 15: 00 00 00 10 10: R_386_32 bufp1 ; Relocation entry 11 14: R_386_32 buf ; Relocation entry 12 18: 89 ec mov %ebp, %esp 13 1a: 8b 01 mov (%edx), %ecx ; temp = buf[0]; 14 1c: 89 02 mov %eax, (%edx) ; buf[0] = buf[1]; 15 1e: a1 00 00 00 00 mov 0x0, %eax ; Get *bufp1 = &buf[1] 16 1f: R_386_32 bufp1 ; Relocation entry 17 23: 89 08 mov %ecx, (%eax) ; buf[1] = temp; 18 25: 5d pop %ebp 19 26: c3 ret |------------------+------+----| | 图 7-10 中的符号 | 地址 | 值 | |------------------+------+----| | | | | |------------------+------+----| | | | | |------------------+------+----| | | | | |------------------+------+----| | | | | |------------------+------+----| | | | | |------------------+------+----| 图 7-19 家庭作业 7.12 的代买和重定位条目 7.13 考虑图 7-20 中的 C 代码和相应的可重定位目标模块。 A. 确定当模块被重定位时,链接器将修改 .text 中的哪些指令。对于每条这样的指令, 列出它的重定位条目中的信息:节偏移、重定位类型符号名字。 B. 确定当模块被重定位时,链接器将修改 .data 中的哪些数据目标。对于每条这样的 指令,列出它的重定位条目中的信息:节偏移、重定位类型和符号名字。 可以随意使用诸如 objdump 之类的工具来帮助你解答这个题目。 --------------------------------------------------------------------------- 1 extern int p3(void); 2 int x = 1; 3 int *xp = &x; 4 5 void p2(int y) { 6 } 7 8 void p1() { 9 p2(*xp + p3()); 10 } --------------------------------------------------------------------------- a) C 代码 --------------------------------------------------------------------------- 1 00000000 : 2 0: 55 push %ebp 3 1: 89 e5 mov %esp, %ebp 4 3: 89 ec mov %ebp, %esp 5 5: 5d pop %ebp 6 6: c3 ret 7 00000008 : 8 8: 55 push %ebp 9 9: 89 e5 mov %esp, %ebp 10 b: 83 ec 08 sub $0x8, %esp 11 e: 83 c4 f4 add $0xfffffff4, %esp 12 11: e8 fc ff ff ff call 12 13 16: 89 c3 mov %eax, %edx 14 18: a1 00 00 00 00 mov 0x0, %eax 15 1d: 03 10 add (%eax), %edx 16 1f: 52 push %edx 17 20: e8 fc ff ff ff call 21 18 25: 89 ec mov %ebp, %esp 19 27: 5d pop %ebp 20 28: c3 ret --------------------------------------------------------------------------- b) 可重定位目标文件的 .text 节 --------------------------------------------------------------------------- 1 00000000 : 2 0: 01 00 00 00 3 00000004 : 4 4: 00 00 00 00 --------------------------------------------------------------------------- c) 可重定位目标文件的 .data 节 图 7-20 家庭作业 7.13 的示例代码 7.14 考虑图 7-21 中的 C 代码和相应的可重定位目标模块。 A. 确定当模块被重定位时,链接器将修改 .text 中的哪些指令。对于每条这样的指令, 列出它的重定位条目中的信息:节偏移、重定位类型和符号名字。 B. 确定当模块被重定位时,链接器将修改 .rodata 中的哪些数据。对于每条这样的指 令,列出它的重定位条目中的信息:节偏移、重定位类型和符号名字。 可以随意使用诸如 objdump 之类的工具来帮助你解答这个题目。 ---------------------------------------------------------------------------- 1 int relo3(int val) { 2 switch (val) { 3 case 100: 4 return(val); 5 case 101: 6 return(val+1); 7 case 103: case 104: 8 return(val+3); 9 case 105: 10 return(val+5); 11 default: 12 return(val+6); 13 } 14 } ---------------------------------------------------------------------------- a) C 代码 ---------------------------------------------------------------------------- 1 00000000 : 2 0: 55 push %ebp 3 1: 89 e5 mov %esp, %ebp 4 3: 8b 45 08 mov 0x8(%ebp), %eax 5 6: 8d 50 9c lea 0xffffff9c(%eax), %edx 6 9: 83 fa 05 cmp $0x5, %edx 7 c: 77 17 ja 25 8 e: ff 24 95 00 00 00 00 jmp *0x0(,%edx,4) 9 15: 40 jnc %eax 10 16: eb 10 jmp 28 11 18: 83 c0 03 add $0x3, %eax 12 1b: eb 0b jmp 28 13 1d: 8d 76 00 lea 0x0(%esi), %esi 14 20: 83 c0 05 add $0x5, %eax 15 23: eb 03 jmp 28 16 25: 83 c0 06 add $0x6, %eax 17 28: 89 ec mov %ebp, %esp 18 2a: 5d pop %ebp 19 2b: c3 ret ---------------------------------------------------------------------------- b) 可重定位木条文件的 .text 节 ---------------------------------------------------------------------------- This is the jump table for the switch statement 1 0000 28000000 15000000 25000000 18000000 4 words at offsets 0x0,0x4,0x8,and 0xc 2 0010 18000000 20000000 2 words at offsets 0x10 and 0x14 ---------------------------------------------------------------------------- c) 可重定位目标文件的 .rodata 节 图 7-21 家庭作业 7.14 的示例代码 7.15 完成下面的任务将帮助你更熟悉处理目标文件的各种工具。 A. 在你的系统上,libc.a 和 libm.a 的版本中包含多少目标文件? B. gcc -O2 产生的可执行代码与 gcc -O2 -g 产生的不同吗? C. 在你的系统上,GCC 驱动程序使用的是什么共享库? 8.9 考虑四个具有如下开始和结束时间的进程: |------+----------+----------| | 进程 | 开始时间 | 结束时间 | |------+----------+----------| | A | 5 | 7 | | B | 2 | 4 | | C | 3 | 6 | | D | 1 | 8 | |------+----------+----------| 对于每对进程,指明它们是否是并发地运行的: |--------+----------| | 进程对 | 并发地? | |--------+----------| | AB | | |--------+----------| | AC | | |--------+----------| | AD | | |--------+----------| | BC | | |--------+----------| | BD | | |--------+----------| | CD | | |--------+----------| 8.10 在这一章里,我们介绍了一些具有不寻常的调用和返回行为的函数:setjmp、longjmp、 execve 和 fork。找到下列行为中和每个函数相匹配的一种: A. 调用一次,返回两次。 B. 调用一次,从不返回。 C. 调用一次,返回一次或者多次。 8.11 这个程序会输出多少个“hello”输出行? int main() { int i; for (i = 0; i < 2; i++) Fork(); printf("hello\n"); exit(0); } 8.12 这个程序会输出多少个“hello”输出行? #include "csapp.h" void doit() { Fork(); Fork(); printf("hello\n"); return; } int main() { doit(); printf("hello\n"); return; } 8.13 下面程序的一种可能的输出是什么? #include "csapp.h" int main() { int x = 3; if (Fork() != 0) printf("x=%d\n", ++x); printf("x=%d\n", --x); exit(0); } 8.14 下面这个程序会输出多少个“hello”输出行? #include "csapp.h" void doit() { if (Fork() == 0) { Fork(); printf("hello\n"); exit(0); } return; } int main() { doit(); printf("hello\n"); exit(0); } 8.15 下面这个程序会输出多少个“hello”输出行? #include "csapp.h" void doit() { if (Fork() == 0) { Fork(); printf("hello\n"); return; } return; } int main() { doit(); printf("hello\n"); exit(0); } 8.16 下面这个程序的输出是什么? #include "csapp.h" int counter = 1; int main() { if (fork() == 0) { counter--; exit(0); } else { Wait(NULL); printf("counter = %d\n", ++counter); } exit(0); } 8.17 列举练习题 8.4 中程序所有可能的输出。 8.18 考虑下面的程序: #include "csapp.h" void end(void) { printf("2"); } int main() { if (Fork() == 0) atexit(end); if (Fork() == 0) printf("0"); else printf("1"); exit(0); } 判断下面哪个输出是可能的。注意:atexit 函数以一个指向函数的指针为输入,并将 它添加到函数列表中(初始为空),当 exit 函数被调用时,会调用该列表中的函数。 A. 112002 B. 211020 C. 102120 D. 122001 E. 100212 8.19 下面的函数打印多少行输出?给出一个答案为 n 的函数。假设 n >= 1。 void foo(int n) { int i; for (i = 0; i < n; i++) Fork(); printf("hello\n"); exit(0); } 8.20 使用 execve 编写一个叫做 myls 的程序,该程序的行为和 /bin/ls 程序的一样。你 的程序应该接受相同的命令行参数,解释同样的环境变量,并产生相同的输出。 ls 程序从 COLUMNS 环境变量中获得屏幕的宽度。如果没有设置 COLUMNS,那么 ls 会 假设屏幕宽 80 列。因此,你可以通过把 COLUMNS 环境设置小于 80,来检查你对环境 变量的处理: unix> cc ex8-20.c -o myls unix> setenv COLUMNS 40 ...output is 40 column wide unix> ./myls unix> unsetenv COLUMNS unix> ./myls ...output is now 80 columns wide 8.21 下面程序的可能的输出序列是多少? int main() { if (fork() == 0) { printf("a"); exit(0); } else { printf("b"); waitpid(-1, NULL, 0); } printf("c"); exit(0); } 8.22 编写 UNIX system 函数的你自己的版本 int mysystem(char *command); mysystem 函数通过调用“/bin/sh -c command”来执行 command,然后在 command 完 成后返回。如果 command(通过调用 exit 函数或者执行一条 return 语句)正常退出, 那么 mysystem 返回 command 退出状态。例如,如果 command 通过调用 exit(8) 终 止,那么 mysystem 返回值 8。否则,如果 command 是异常终止的,那么 mysystem 就返回外壳返回的状态。 8.23 你的一个同事想要使用信号来让一个父进程对发生在一个子进程中的事件计数。其思 想是每次发生一个事件时,通过向父进程发送一个信号来通知它,并且让父进程的信 号处理程序对一个全局变量 counter 加一,在子进程终止之后,父进程就可以检查这 个变量。然而,当他在系统上运行下面测试代码时,发现父进程调用 printf 时, counter 的值总是 2,即使子进程向父进程发送了 5 个信号。他很困惑,向你寻求帮 助。你能解释这个程序有什么错误吗? #include "csapp.h" int counter = 0; void handler(int sig) { counter++; sleep(1); /* Do some work in the handler */ return; } int main() { int i; Signal(SIGUSR2, handler); if (Fork() == 0) { /* Child */ for (i = 0; i < 5; i++) { Kill(getppid(), SIGUSR2); printf("sent SIGUSR2 to parent\n"); } exit(0); } Wait(NULL); printf("counter=%d\n", counter); exit(0); } 8.24 修改下面的程序,以满足下面两个条件: #include "csapp.h" #define N 2 int main(void) { int status, i; pid_t pid; /* Parent creates N children */ for (i = 0; i < N; i++) if ((pid = Fork()) == 0) /* Child */ exit(100+i); /* Parent reaps N children in no particular order */ while ((pid = waitpid(-1, &status, 0)) > 0) { if (WIFEXITED(status)) printf("child %d terminated normally with exit status=%d\n", pid, WEXITSTATUS(status)); else printf("child %d terminated abnormally\n", pid); } /* The normal termination is if there are no more children */ if (errno != ECHILD) unix_error("waitpid error"); exit(0); } 1. 每个子进程在试图写一个只读文本段中的位置时会异常终止。 2. 父进程打印和下面所示相同(除了 PID)的输出: child 12255 terminated by signal 11: Segmentation fault child 12254 terminated by signal 11: Segmentation fault 提示:请参考 psignal(3) 的 man 页。 8.25 编写 fgets 函数的一个版本,叫做 tfgets,它 5 秒中后会超时。tfgets 函数接收和 fgets 相同的输入。如果用户在 5 秒内不键入一个输入行,tfgets 返回 NULL。否则, 它返回一个指向输入行的指针。 8.26 以图 8-22(sample/ch08/shellex.c)中的示例作为开始点,编写一个支持作业控制的 外壳程序。外壳必须具有以下特征: o 用户输入的命令行由一个 name、零个或者多个参数组成,它们都是由一个或者多个 空格分隔开的。如果 name 是一个内置命令,那么外壳就立刻处理它,并等待下一个 命令行。否则,外壳就假设 name 是一个可执行的文件,在一个初始的子进程(作业) 的上下文中加载并运行它。作业的进程组 ID 与子进程的 PID 相同。 o 每个作业是由一个进程 ID(PID)或者一个作业 ID(JID)来标识的,它是由一个外 壳分配的任意的小正整数。JID 在命令行上用前缀“%”来表示。比如,“%5”表示 JID 5,而“5”表示 PID 5。 o 如果命令行以 & 结束,那么外壳程序就在后台运行这个作业。否则,外壳就在前台 运行这个作业。 o 输入 ctrl-c(ctrl-z),使得外壳发送一个 SIGINT(SIGTSTP)信号给前台进程组 中的每个进程。 o 内置命令 jobs 列出所有的后台作业。 o 内置命令 bg 通过发送一个 SIGCONT 信号重启 ,然后在后台运行它。 参数可以是一个 PID,也可以是一个 JID。 o 内置命令 fg 通过发送一个 SIGCONT 信号重启 ,然后在前台运行它。 o 外壳回收它所有的僵死子进程。如果任何作业因为它收到一个未捕获的信号而终止, 那么外壳就输出一条信息到终端,包含该作业的 PID 和对违规信号的描述。 下面展示了一个示例的外壳会话: unix> ./shell # Run your shell program > bogus bogus: Command not found. # Execve can't find executable > foo 10 Job 5035 terminated by signal: Interrupt # User types ctrl-c > foo 100 & [1] 5036 foo 100 & > foo 200 & [2] 5037 foo 200 & > fg %1 Job [1] 5036 stopped by signal: Stopped # User types ctrl-z > jobs [1] 5036 Stopped foo 100 & [2] 5037 Running foo 200 & > bg 5035 5035: No such process > bg 5036 [1] 5036 foo 100 & > /bin/kill 5036 Job 5036 terminated by signal: Terminated > fg %2 # Wait for fg job to finish. > quit unix> # Back to the UNIX shell 9.14 假设有一个输入文件 hello.txt,由字符串“hello, world!\n”组成,编写一个 C 程 序,使用 mmap 将 hello.txt 的内容改为“Jello, world!\n”。 9.17 开发 9.9.12 节中的分配器的一个版本,执行下一次适配搜索,而不是首次适配搜索。 9.18 9.9.12 节中的分配器要求每个块既有头部也有脚部,以实现常数时间的合并。修改分 配器,使得空闲块需要头部和脚部,而已分配块只需要头部。 9.20 编写你自己的 malloc 和 free 版本,将它的运行时间和空间利用率与标准 C 库提供 的 malloc 版本进行比较。 10.6 下面程序的输出是什么? #include "csapp.h" int main() { int fd1, fd2; fd1 = Open("foo.txt", O_RDONLY, 0); fd2 = Open("bar.txt", O_RDONLY, 0); Close(fd2); fd2 = Open("baz.txt", O_RDONLY, 0); printf("fd2 = %d\n", fd2); exit(0); } 10.7 修改图 10-4 中所示的 cpfile 程序,使得它用 RIO 函数从标准输入拷贝到标准输出, 一次 MAXBUF 个字节。 10.8 编写图 10-10 中的 statcheck 程序的一个版本,叫做 fstatcheck,它从命令行取得 一个描述符数字而不是文件名。 10.9 考虑下面对家庭作业题 10.8 中的 fstatcheck 程序的调用 unix> fstatcheck 3 < foo.txt 你可能会预想这个对 fstatcheck 的调用将提取和显示文件 foo.txt 的元数据。然而, 当我们在系统上运行它时,它将失败,返回“坏的文件描述符”。根据这种情况,填 写出外壳在 fork 和 execve 调用之间必须执行的伪代码: if (Fork() == 0) { /* Child */ /* What code is the shell executing right here? */ Execve("fstatcheck", argv, envp); } 10.10 修改图 10-4 中的 cpfile 程序,使得它有一个可选的命令行参数 infile。如果给定 了 infile,那么拷贝 infile 到标准输出,否则像以前那样拷贝标准输入到标准输出。 一个要求是对于两种情况,你的解答都必须使用原来的拷贝循环(第 9~11 行)。只 允许你插入代码,而不允许更改任何已经存在的代码。 11.6 A. 修改 TINY 使得它会原样返回每个请求行和请求报头。 B. 使用你喜欢的浏览器向 TINY 发送一个对静态内容的请求。把 TINY 的输出记录到一个文件中。 C. 检查 TINY 的输出,确定你的浏览器使用的 HTTP 的版本。 D. 参考 RFC2616 中的 HTTP/1.1 标准,确定你的浏览器的 HTTP 请求中每个报头的 含义。你可以从 www.rfc-editor.org/rfc.html 获得 RFC2616。 11.7 扩展 TINY,使得它可以提供 MPG 视频文件。用一个真正的浏览器来检验你的工作。 11.8 修改 TINY,使得它在 SIGCHLD 处理程序中回收操作系统分配给 CGI 子进程的资源, 而不是显式地等待它们终止。 11.9 修改 TINY,使得当它服务静态内容时,使用 malloc、rio_readn 和 rio_writen,而 不是 mmap 和 rio_writen,来拷贝被请求文件到已连接描述符。 11.10 A. 写出图 11-26 中 CGI adder 函数的 HTML 表单。你的表单应该包括两个文本框, 用户将需要相加的两个数字填在这两个文本框中。你的表单应该使用 GET 方法请 求内容。 B. 用这样的方法来检查你的程序:使用一个真正的浏览器向 TINY 请求表单,向 TINY 提交填写好的表单,然后显示 adder 生成的动态内容。 11.11 扩展 TINY,以支持 HTTP HEAD 方法。使用 TELNET 作为 Web 客户端来验证你的工 作。 11.12 扩展 TINY,使得它服务以 HTTP POST 方式请求的动态内容。用你喜欢的 Web 浏览 器来验证你的工作。 11.13 修改 TINY,使得它可以干净地处理(而不是终止)在 write 函数试图写一个过早关 闭的连接时发生的 SIGPIPE 信号和 EPIPE 错误。 12.16 编写 hello.c(见图 12-13)的一个版本,它创建和回收 n 个可结合的对等线程,其 中 n 是一个命令行参数。 12.22 检查一下你对 select 函数的理解,请修改图 12-6 中(select.c)的服务器,使得 它在主服务器的每次迭代中最多回送一个文本行。 12.23 图 12-8 中(echoservers.c)的事件驱动并发 echo 服务器是有缺陷的,因为一个恶 意的客户端能够通过发送部分的文本行,使服务器拒绝为其他客户端服务。编写一个 改进的服务器版本,使之能够非阻塞地处理这些部分文本行。 12.24 RIO/IO 包中的函数(见 10.4 节)都是线程安全的。它们也都是可重入函数吗? 12.31 实现标准 I/O 函数 fgets 的一个版本,叫做 tfgets,加入它在 5 秒之内没有从标 准输入上接收到一个输入行,那么就超时,并返回一个 NULL 指针。你的函数应该实 现在一个叫做 tfgets-proc.c 的包中,使用进程、信号和非本地跳转。它不应该使用 UNIX 的 alarm 函数。使用下面的驱动程序测试你的结果。 #include "csapp.h" char *tfgets(char *s, int size, FILE *stream); int main() { char buf[MAXLINE]; if (tfgets(buf, MAXLINE, stdin) == NULL) printf("BOOM!\n"); else printf("%s", buf); exit(0); } 12.32 使用 select 函数来实现练习题 12.31 中 tfgets 函数的一个版本。你的函数应该在 一个叫做 tfgets-select.c 的包中实现。用练习题 12.31 中的驱动程序测试你的结 果。你可以假定标准输入被赋值为描述符 0。 12.33 实现练习题 12.31 中 tfgets 函数的一个线程化的版本。你的函数应该在一个叫做 tfgets-thread.c 的包中实现。用练习题 12.31 中的驱动程序测试你的结果。 ================================================ FILE: exercise/README ================================================ 这个文件对这个文件夹下面的文件做出说明。 00-topic.txt: 这个文件收集了这本书的所有的家庭作业的题目 ex*: 其他的所有文件都是题目的解答 t*: 以 t 为前缀的文件是对题目的测试 mofaph 2012/11/24 ================================================ FILE: exercise/ex10-10.c ================================================ /* * 10.10 * * mofaph@gmail.com * 2013-5-11 * * 感谢 @oymy,指出了这道题应该使用 dup2()。 * * $ cc -I../common ../common/csapp.c ex10-10.c -lpthread */ #include "csapp.h" int main(int argc, char **argv) { int n; rio_t rio; char buf[MAXLINE]; if (argc > 2) { fprintf(stderr, "usage: %s [filename]\n", argv[0]); return -1; } /* 这里将标准输入的文件描述符,重定位到输入文件的描述符 */ if (argc == 2) { char *filename = argv[1]; int fd = Open(filename, O_RDONLY, 0); int ret = dup2(fd, STDIN_FILENO); /* 将标准输入重定位到 fd */ if (ret < 0) { perror("dup2"); return -1; } } /* 没有输入文件名,将标准输入拷贝到标准输出 */ Rio_readinitb(&rio, STDIN_FILENO); while ((n = Rio_readlineb(&rio, buf, MAXLINE)) != 0) Rio_writen(STDOUT_FILENO, buf, n); return 0; } ================================================ FILE: exercise/ex10-2.c ================================================ /* * 10.2 * * mofaph@gmail.com * * 假设磁盘文件 foobar.txt 由 6 个 ASCII 码字符“foobar”组成。那么,下列程序的输 * 出是什么? * * $ echo "foobar" > foobar.txt * $ cc -I../common ../common/csapp.c ex10-2.c -lpthread */ #include "csapp.h" int main() { int fd1, fd2; char c; fd1 = Open("foobar.txt", O_RDONLY, 0); fd2 = Open("foobar.txt", O_RDONLY, 0); Read(fd1, &c, 1); Read(fd2, &c, 1); printf("c = %c\n", c); exit(0); } ================================================ FILE: exercise/ex10-3.c ================================================ /* * 10.3 * * 就像前面 10.2 那样,假设磁盘文件 foobar.txt 由 6 个 ASCII 码字符“foobar”组成。 * 那么下列程序的输出是什么? * * $ echo "foobar" > foobar.txt * $ cc -I../common ../common/csapp.c ex10-3.c -lpthread */ #include "csapp.h" int main() { int fd; char c; fd = Open("foobar.txt", O_RDONLY, 0); if (Fork() == 0) { /* child */ Read(fd, &c, 1); exit(0); } /* parent */ Wait(NULL); Read(fd, &c, 1); printf("c = %c\n", c); exit(0); } ================================================ FILE: exercise/ex10-5.c ================================================ /* * 10.5 * * mofaph@gmail.com * * 假设磁盘文件 foobar.txt 由 6 个 ASCII 码字符“foobar”组成,那么下列程序的输出 * 是什么? * * $ echo "foobar" > foobar.txt * $ cc -I../common ../common/csapp.c ex10-5.c -lpthread */ #include "csapp.h" int main() { int fd1, fd2; char c; fd1 = Open("foobar.txt", O_RDONLY, 0); fd2 = Open("foobar.txt", O_RDONLY, 0); Read(fd2, &c, 1); Dup2(fd2, fd1); Read(fd1, &c, 1); printf("c = %c\n", c); exit(0); } ================================================ FILE: exercise/ex10-6.c ================================================ /* * 10.6 * * mofaph@gmail.com * 2013/5/11 * * 下面程序的输出是什么? * * $ touch {foo,bar,baz}.txt * $ cc -I../common ../common/csapp.c ex10-6.c -lpthread */ #include "csapp.h" int main(void) { int fd1, fd2; fd1 = Open("foo.txt", O_RDONLY, 0); /* fd1: 3 */ fd2 = Open("bar.txt", O_RDONLY, 0); /* fd2: 4 */ Close(fd2); fd2 = Open("baz.txt", O_RDONLY, 0); /* fd2: 4 */ printf("fd2 = %d\n", fd2); exit(0); } ================================================ FILE: exercise/ex10-7.c ================================================ /* * 10.7 * * mofaph@gmail.com * 2013-5-11 * * $ cc -I../common ../common/csapp.c ex10-7.c -lpthread * # 注意,上面链接到 pthread 库不是代码必须的,是由于 csapp.c 中需要 */ #include "csapp.h" #undef MAXBUF #define MAXBUF 6 #undef MAXLINE #define MAXLINE 10 int main(int argc, char **argv) { int n; rio_t rio; char buf[MAXLINE]; int count = 0; /* 记录 buf 中已经读入的字符数 */ Rio_readinitb(&rio, STDIN_FILENO); n = Rio_readnb(&rio, buf, MAXLINE); while (n > 0) { count += n; if (count >= MAXBUF) { Rio_writen(STDOUT_FILENO, buf, MAXBUF); count -= MAXBUF; if (count > 0) memmove(buf, buf+MAXBUF, count); } n = Rio_readnb(&rio, buf+count, MAXLINE-count); } return 0; } ================================================ FILE: exercise/ex10-8.c ================================================ /* * 10.8 * * mofaph@gmail.com * 2013-5-11 * * 编写图 10-10 中的 statcheck 程序的一个版本,叫做 fstatcheck,它从命令行取得一 * 个描述符数字而不是文件名。 * * $ cc -I../common ../common/csapp.c ex10-8.c -lpthread */ #include #include #include #include #include int main(int argc, char **argv) { if (argc != 2) { fprintf(stderr, "usage: %s fd\n", argv[0]); return -1; } char *c = argv[1]; for (;;) { if (isalpha(*c)) { fprintf(stderr, "%s: not a number\n", argv[1]); return -1; } if (*c == '\0') break; c += 1; } int fd = atoi(argv[1]); struct stat stat; int ret = fstat(fd, &stat); if (ret) { perror("fstat"); return -1; } char *type; if (S_ISREG(stat.st_mode)) /* Determine file type */ type = "regular"; else if (S_ISDIR(stat.st_mode)) type = "directory"; else type = "other"; char *readok; if ((stat.st_mode & S_IRUSR)) /* Check read access */ readok = "yes"; else readok = "no"; printf("type: %s, read: %s\n", type, readok); exit(0); } ================================================ FILE: exercise/ex10-9.txt ================================================ 10.9 bskim45@gmail.com 2013-12-17 -------------------------------------------------------------------------------- Before the call to execve, the child process opens foo.txt as descriptor 3, redirects stdin to foo.txt, and then (here is the kicker) closes descriptor 3: if (Fork() == 0) { /* Child */ /* The Shell may be run these code? */ fd = Open("foo.txt", O_RDONLY, 0); /* fd == 3 */ Dup2(fd, STDIN_FILENO); Close(fd); Execve("fstatcheck", argv, envp); } When fstatcheck begins running in the child, there are exactly three open files, corresponding to descriptors 0, 1, and 2, with descriptor 1 redirected to foo.txt. ================================================ FILE: exercise/ex11-10/cgi-bin/adder.c ================================================ /* * 11.10 * * mofaph@gmail.com * 2013-5-31 * * unix> cc -I../../../common ../../../common/csapp.c adder.c -lpthread -o adder */ #include "csapp.h" int main(void) { char *buf, *p; char arg1[MAXLINE], arg2[MAXLINE], content[MAXLINE]; int n1 = 0, n2 = 0; buf = getenv("QUERY_STRING"); if (buf == NULL || strlen(buf) == 0) { snprintf(content, sizeof(content), "
" "" "+" "" "
" "" "
"); } else { p = strchr(buf, '&'); *p = '\0'; char *index = strchr(buf, '='); if (index == NULL) { strcpy(arg1, buf); } else { strcpy(arg1, index+1); } n1 = atoi(arg1); index = strchr(p+1, '='); if (index == NULL) { strcpy(arg2, p+1); } else { strcpy(arg2, index+1); } n2 = atoi(arg2); /* Make the response body */ snprintf(content, sizeof(content), "Welcome to add.com: " "THE Internet addition portal.\r\n

" "The answer is: %d + %d = %d\r\n

" "Thanks for visiting!\r\n", n1, n2, n1+n2); } /* Generate the HTTP response */ printf("Content-length: %d\r\n", (int)strlen(content)); printf("Content-type: text/html\r\n\r\n"); printf("%s", content); fflush(stdout); exit(0); } ================================================ FILE: exercise/ex11-10/home.html ================================================ Welcome to add.com ================================================ FILE: exercise/ex11-10/tiny.c ================================================ /* * 11.10 * * mofaph@gmail.com * 2013-5-31 * * unix> cc -Wall -I../../common ../../common/csapp.c tiny.c -lpthread -o tiny */ #include "csapp.h" #include /* INT_MIN */ void doit(int fd); int parse_uri(char *uri, char *filename, char *cgiargs); void serve_static(int fd, char *filename, int filesize); void get_filetype(char *filename, char *filetype); void serve_dynamic(int fd, char *filename, char *cgiargs); void clienterror(int fd, char *cause, char *errnum, char *shortmsg, char *longmsg); void reap_child(int signo) { for (;;) { /* We don't care child's status */ int ret = waitpid(-1, NULL, WNOHANG); if (ret == 0) /* child didn't terminated, return immediately */ break; else if (ret < 0 && errno == ECHILD) /* no child */ break; else /* continue */; } } static int is_wait; /* Waits for child when failed to install handler */ int main(int argc, char **argv) { int listenfd, connfd, port; struct sockaddr_in clientaddr; socklen_t clientlen; /* avoid compiler warning */ /* Check command line args */ if (argc != 2) { fprintf(stderr, "usage: %s \n", argv[0]); exit(1); } port = atoi(argv[1]); /* Install SIGCHILD handler */ struct sigaction action, old_action; action.sa_handler = reap_child; sigemptyset(&action.sa_mask); action.sa_flags = SA_RESTART; int ret = sigaction(SIGCHLD, &action, &old_action); is_wait = ret < 0; if (is_wait) { fprintf(stderr, "sigaction: %s, we will wait child to terminated\n", strerror(errno)); } listenfd = Open_listenfd(port); while (1) { clientlen = sizeof(clientaddr); connfd = Accept(listenfd, (SA *)&clientaddr, &clientlen); doit(connfd); Close(connfd); } } void doit(int fd) { int is_static; struct stat sbuf; char buf[MAXLINE], method[MAXLINE], uri[MAXLINE], version[MAXLINE]; char filename[MAXLINE], cgiargs[MAXLINE]; rio_t rio; /* Read request line and headers, put them into a file */ char *request = NULL; Rio_readinitb(&rio, fd); for (;;) { Rio_readlineb(&rio, buf, MAXLINE); /* * Firefox will sent \x16\x3. * \x16: Synchronous Idle, \x3: End of Text */ if (!strncmp(buf, "\x16\x3", 2)) { free(request); return; } int buflen = strlen(buf); char *old_request = request; int old_reqlen = old_request==NULL ? 0 : strlen(old_request); request = realloc(old_request, buflen + old_reqlen + 1); if (request == NULL) { fprintf(stderr, "realloc: run out of memory\n"); free(old_request); return; } memmove(request+old_reqlen, buf, buflen+1); if (!strcmp(buf, "\r\n")) /* Copy before stop */ break; } sscanf(request, "%s %s %s", method, uri, version); if (strcasecmp(method, "GET")) { clienterror(fd, method, "501", "Not implemented", "Tiny does not implement this method"); return; } FILE *out = fopen("tiny-request.txt", "w+"); if (out == NULL) { perror("fopen"); free(request); return; } fprintf(out, "%s", request); fflush(out); fclose(out); free(request); /* Parse URI from GET request */ is_static = parse_uri(uri, filename, cgiargs); if (stat(filename, &sbuf) < 0) { clienterror(fd, filename, "404", "Not found", "Tiny couldn't read the file"); return; } if (is_static) { /* Serve static content */ if (!(S_ISREG(sbuf.st_mode)) || !(S_IRUSR & sbuf.st_mode)) { clienterror(fd, filename, "403", "Forbidden", "Tiny couldn't read the filetype"); return; } serve_static(fd, filename, sbuf.st_size); } else { /* Serve dynamic content */ if (!(S_ISREG(sbuf.st_mode)) || !(S_IXUSR & sbuf.st_mode)) { clienterror(fd, filename, "403", "Forbidden", "Tiny couldn't run the CGI program"); return; } serve_dynamic(fd, filename, cgiargs); } } void clienterror(int fd, char *cause, char *errnum, char *shortmsg, char *longmsg) { char buf[MAXLINE], body[MAXBUF]; /* Build the HTTP response body */ sprintf(body, "Tiny Error"); sprintf(body, "%s\r\n", body); sprintf(body, "%s%s: %s\r\n", body, errnum, shortmsg); sprintf(body, "%s

%s: %s\r\n", body, longmsg, cause); sprintf(body, "%s


The Tiny Web server\r\n", body); /* Print the HTTP response */ sprintf(buf, "HTTP/1.0 %s %s\r\n", errnum, shortmsg); Rio_writen(fd, buf, strlen(buf)); sprintf(buf, "Content-type: text/html\r\n"); Rio_writen(fd, buf, strlen(buf)); sprintf(buf, "Content-length: %d\r\n\r\n", (int)strlen(body)); Rio_writen(fd, buf, strlen(buf)); Rio_writen(fd, body, strlen(body)); } int parse_uri(char *uri, char *filename, char *cgiargs) { char *ptr; if (!strstr(uri, "cgi-bin")) { /* Static content */ strcpy(cgiargs, ""); strcpy(filename, "."); strcat(filename, uri); if (uri[strlen(uri)-1] == '/') strcat(filename, "home.html"); return 1; } else { /* Dynamic content */ ptr = strchr(uri, '?'); if (ptr) { strcpy(cgiargs, ptr+1); *ptr = '\0'; } else { strcpy(cgiargs, ""); } strcpy(filename, "."); strcat(filename, uri); return 0; } } void serve_static(int fd, char *filename, int filesize) { char filetype[MAXLINE], buf[MAXBUF]; /* Send response headers to client */ get_filetype(filename, filetype); snprintf(buf, sizeof(buf), "HTTP/1.0 200 OK\r\n"); snprintf(buf+strlen(buf), sizeof(buf)-strlen(buf), "Server: Tiny Web Server\r\n"); snprintf(buf+strlen(buf), sizeof(buf)-strlen(buf), "Content-length: %d\r\n", filesize); snprintf(buf+strlen(buf), sizeof(buf)-strlen(buf), "Content-type: %s\r\n\r\n", filetype); Rio_writen(fd, buf, strlen(buf)); /* * Send response body to client * * If the file is big enough, we read many times, and each time we * double increase buffer. */ int srcfd = Open(filename, O_RDONLY, 0); char *body = NULL; const int min_size = (1<<10); /* 1KB */ int max_size = (unsigned)INT_MIN >> 1; /* 16bit: 16KB, 32bit: 1GB */ size_t size = min_size; for (;;) { if (size != max_size) { char *old_body = body; body = realloc(old_body, size); if (body == NULL) { if (size == min_size) { free(old_body); perror("realloc"); break; } else { /* size > min_size */ max_size = size >> 1; size = max_size; body = old_body; } } } int n = Rio_readn(srcfd, body, size); if (n > 0) /* read something */ Rio_writen(fd, body, n); if (n != size) /* EOF or read all the content */ break; if (size != max_size) size <<= 1; /* increase buffer, read more next time */ } /* Clean */ free(body); Close(srcfd); } /* * get_filetype - derive file type from name */ void get_filetype(char *filename, char *filetype) { if (strstr(filename, ".html")) strcpy(filetype, "text/html"); else if (strstr(filename, ".gif")) strcpy(filetype, "image/gif"); else if (strstr(filename, ".jpg")) strcpy(filetype, "image/jpeg"); else strcpy(filetype, "text/plain"); } void serve_dynamic(int fd, char *filename, char *cgiargs) { char buf[MAXLINE], *emptylist[] = { NULL }; /* Return first part of HTTP response */ sprintf(buf, "HTTP/1.0 200 OK\r\n"); Rio_writen(fd, buf, strlen(buf)); sprintf(buf, "Server: Tiny Web Server\r\n"); Rio_writen(fd, buf, strlen(buf)); if (Fork() == 0) { /* child */ /* Real server would set all CGI vars here */ setenv("QUERY_STRING", cgiargs, 1); Dup2(fd, STDOUT_FILENO); /* Redirect stdout to client */ Execve(filename, emptylist, environ); /* Run CGI program */ } if (is_wait) Wait(NULL); /* Parent waits for and reaps child */ } ================================================ FILE: exercise/ex11-11/cgi-bin/adder.c ================================================ /* * 11.11 * * mofaph@gmail.com * 2013-6-8 * * unix> cc -I../../../common ../../../common/csapp.c adder.c -lpthread -o adder */ #include "csapp.h" int main(void) { char *buf, *p; char arg1[MAXLINE], arg2[MAXLINE], content[MAXLINE]; int n1 = 0, n2 = 0; char *method = getenv("REQUEST_METHOD"); int is_response = !strcasecmp(method, "GET"); buf = getenv("QUERY_STRING"); if (buf == NULL || strlen(buf) == 0) { snprintf(content, sizeof(content), "
" "" "+" "" "
" "" "
"); } else { p = strchr(buf, '&'); *p = '\0'; char *index = strchr(buf, '='); if (index == NULL) { strcpy(arg1, buf); } else { strcpy(arg1, index+1); } n1 = atoi(arg1); index = strchr(p+1, '='); if (index == NULL) { strcpy(arg2, p+1); } else { strcpy(arg2, index+1); } n2 = atoi(arg2); /* Make the response body */ if (is_response) { snprintf(content, sizeof(content), "Welcome to add.com: " "THE Internet addition portal.\r\n

" "The answer is: %d + %d = %d\r\n

" "Thanks for visiting!\r\n", n1, n2, n1+n2); } } /* Generate the HTTP is_response */ printf("Content-length: %d\r\n", is_response ? (int)strlen(content) : 0); printf("Content-type: text/html\r\n\r\n"); printf("%s", content); fflush(stdout); exit(0); } ================================================ FILE: exercise/ex11-11/home.html ================================================ Welcome to add.com ================================================ FILE: exercise/ex11-11/tiny.c ================================================ /* * 11.11 * * mofaph@gmail.com * 2013-6-4 * * unix> cc -Wall -I../../common ../../common/csapp.c tiny.c -lpthread -o tiny */ #include "csapp.h" #include /* INT_MIN */ void doit(int fd); int parse_uri(char *uri, char *filename, char *cgiargs); void serve_static(int fd, char *filename, int filesize, char *method); void get_filetype(char *filename, char *filetype); void serve_dynamic(int fd, char *filename, char *cgiargs, char *method); void clienterror(int fd, char *cause, char *errnum, char *shortmsg, char *longmsg); void reap_child(int signo) { for (;;) { /* We don't care child's status */ int ret = waitpid(-1, NULL, WNOHANG); if (ret == 0) /* child didn't terminated, return immediately */ break; else if (ret < 0 && errno == ECHILD) /* no child */ break; else /* continue */; } } static int is_wait; /* Waits for child when failed to install handler */ int main(int argc, char **argv) { int listenfd, connfd, port; struct sockaddr_in clientaddr; socklen_t clientlen; /* avoid compiler warning */ /* Check command line args */ if (argc != 2) { fprintf(stderr, "usage: %s \n", argv[0]); exit(1); } port = atoi(argv[1]); /* Install SIGCHILD handler */ struct sigaction action, old_action; action.sa_handler = reap_child; sigemptyset(&action.sa_mask); action.sa_flags = SA_RESTART; int ret = sigaction(SIGCHLD, &action, &old_action); is_wait = ret < 0; if (is_wait) { fprintf(stderr, "sigaction: %s, we will wait child to terminated\n", strerror(errno)); } listenfd = Open_listenfd(port); while (1) { clientlen = sizeof(clientaddr); connfd = Accept(listenfd, (SA *)&clientaddr, &clientlen); doit(connfd); Close(connfd); } } /* Return a pointer to request, the caller should free the buffer */ char *read_request(rio_t *rp) { char buf[MAXLINE]; char *request = NULL; for (;;) { Rio_readlineb(rp, buf, MAXLINE); /* * Firefox will sent \x16\x3. * \x16: Synchronous Idle, \x3: End of Text */ if (!strncmp(buf, "\x16\x3", 2)) { free(request); return NULL; } int buflen = strlen(buf); char *old_request = request; int old_reqlen = old_request==NULL ? 0 : strlen(old_request); request = realloc(old_request, buflen + old_reqlen + 1); if (request == NULL) { fprintf(stderr, "realloc: run out of memory\n"); free(old_request); return NULL; } memmove(request+old_reqlen, buf, buflen+1); if (!strcmp(buf, "\r\n")) /* Copy before stop */ break; } return request; } void write_to_file(char *request) { if (request == NULL) return; FILE *out = fopen("tiny-request.txt", "w+"); if (out == NULL) { perror("fopen"); } else { fprintf(out, "%s", request); fflush(out); fclose(out); } } void doit(int fd) { rio_t rio; Rio_readinitb(&rio, fd); char *request = read_request(&rio); if (request == NULL) return; write_to_file(request); char method[MAXLINE], uri[MAXLINE], version[MAXLINE]; sscanf(request, "%s %s %s", method, uri, version); free(request); if (strcasecmp(method, "GET") && strcasecmp(method, "HEAD")) { clienterror(fd, method, "501", "Not implemented", "Tiny does not implement this method"); return; } char filename[MAXLINE], cgiargs[MAXLINE]; int is_static = parse_uri(uri, filename, cgiargs); struct stat sbuf; if (stat(filename, &sbuf) < 0) { clienterror(fd, filename, "404", "Not found", "Tiny couldn't read the file"); return; } if (is_static) { /* Serve static content */ if (!(S_ISREG(sbuf.st_mode)) || !(S_IRUSR & sbuf.st_mode)) { clienterror(fd, filename, "403", "Forbidden", "Tiny couldn't read the filetype"); return; } serve_static(fd, filename, sbuf.st_size, method); } else { /* Serve dynamic content */ if (!(S_ISREG(sbuf.st_mode)) || !(S_IXUSR & sbuf.st_mode)) { clienterror(fd, filename, "403", "Forbidden", "Tiny couldn't run the CGI program"); return; } serve_dynamic(fd, filename, cgiargs, method); } } void clienterror(int fd, char *cause, char *errnum, char *shortmsg, char *longmsg) { char buf[MAXLINE], body[MAXBUF]; /* Build the HTTP response body */ snprintf(body, sizeof(body), "Tiny Error"); snprintf(body, sizeof(body), "%s\r\n", body); snprintf(body, sizeof(body), "%s%s: %s\r\n", body, errnum, shortmsg); snprintf(body, sizeof(body), "%s

%s: %s\r\n", body, longmsg, cause); snprintf(body, sizeof(body), "%s


The Tiny Web server\r\n", body); /* Print the HTTP response */ snprintf(buf, sizeof(buf), "HTTP/1.0 %s %s\r\n", errnum, shortmsg); Rio_writen(fd, buf, strlen(buf)); snprintf(buf, sizeof(buf), "Content-type: text/html\r\n"); Rio_writen(fd, buf, strlen(buf)); snprintf(buf, sizeof(buf), "Content-length: %d\r\n\r\n", (int)strlen(body)); Rio_writen(fd, buf, strlen(buf)); Rio_writen(fd, body, strlen(body)); } int parse_uri(char *uri, char *filename, char *cgiargs) { char *ptr; if (!strstr(uri, "cgi-bin")) { /* Static content */ strcpy(cgiargs, ""); strcpy(filename, "."); strcat(filename, uri); if (uri[strlen(uri)-1] == '/') strcat(filename, "home.html"); return 1; } else { /* Dynamic content */ ptr = strchr(uri, '?'); if (ptr) { strcpy(cgiargs, ptr+1); *ptr = '\0'; } else { strcpy(cgiargs, ""); } strcpy(filename, "."); strcat(filename, uri); return 0; } } void send_response_body(int fd, char *filename) { /* * Send response body to client * * If the file is big enough, we read many times, and each time we * double increase buffer. */ int srcfd = Open(filename, O_RDONLY, 0); char *body = NULL; const int min_size = (1<<10); /* 1KB */ int max_size = (unsigned)INT_MIN >> 1; /* 16bit: 16KB, 32bit: 1GB */ size_t size = min_size; for (;;) { if (size != max_size) { char *old_body = body; body = realloc(old_body, size); if (body == NULL) { if (size == min_size) { free(old_body); perror("realloc"); break; } else { /* size > min_size */ max_size = size >> 1; size = max_size; body = old_body; } } } int n = Rio_readn(srcfd, body, size); if (n > 0) /* read something */ Rio_writen(fd, body, n); if (n != size) /* EOF or read all the content */ break; if (size != max_size) size <<= 1; /* increase buffer, read more next time */ } /* Clean */ free(body); Close(srcfd); } void serve_static(int fd, char *filename, int filesize, char *method) { char filetype[MAXLINE], buf[MAXBUF]; /* Send response headers to client */ get_filetype(filename, filetype); snprintf(buf, sizeof(buf), "HTTP/1.0 200 OK\r\n"); snprintf(buf+strlen(buf), sizeof(buf)-strlen(buf), "Server: Tiny Web Server\r\n"); snprintf(buf+strlen(buf), sizeof(buf)-strlen(buf), "Content-length: %d\r\n", filesize); snprintf(buf+strlen(buf), sizeof(buf)-strlen(buf), "Content-type: %s\r\n\r\n", filetype); Rio_writen(fd, buf, strlen(buf)); if (!strcasecmp(method, "HEAD")) return; send_response_body(fd, filename); } /* * get_filetype - derive file type from name */ void get_filetype(char *filename, char *filetype) { if (strstr(filename, ".html")) strcpy(filetype, "text/html"); else if (strstr(filename, ".gif")) strcpy(filetype, "image/gif"); else if (strstr(filename, ".jpg")) strcpy(filetype, "image/jpeg"); else if (strstr(filename, ".mpg")) strcpy(filetype, "video/mpeg"); else strcpy(filetype, "text/plain"); } void serve_dynamic(int fd, char *filename, char *cgiargs, char *method) { char buf[MAXLINE], *emptylist[] = { NULL }; /* Return first part of HTTP response */ snprintf(buf, sizeof(buf), "HTTP/1.0 200 OK\r\n"); Rio_writen(fd, buf, strlen(buf)); snprintf(buf, sizeof(buf), "Server: Tiny Web Server\r\n"); Rio_writen(fd, buf, strlen(buf)); if (Fork() == 0) { /* child */ /* Real server would set all CGI vars here */ setenv("QUERY_STRING", cgiargs, 1); setenv("REQUEST_METHOD", method, 1); Dup2(fd, STDOUT_FILENO); /* Redirect stdout to client */ Execve(filename, emptylist, environ); /* Run CGI program */ } if (is_wait) Wait(NULL); /* Parent waits for and reaps child */ } ================================================ FILE: exercise/ex11-12/cgi-bin/adder.c ================================================ /* * 11.12 * * mofaph@gmail.com * 2013-6-11 * * unix> cc -I../../../common ../../../common/csapp.c adder.c -lpthread -o adder */ #include "csapp.h" int main(void) { char *buf, *p; char arg1[MAXLINE], arg2[MAXLINE], content[MAXLINE]; int n1 = 0, n2 = 0; char *method = getenv("REQUEST_METHOD"); int is_response = !strcasecmp(method, "GET") || !strcasecmp(method, "POST"); buf = getenv("QUERY_STRING"); if (buf == NULL || strlen(buf) == 0) { snprintf(content, sizeof(content), "
" "" "+" "" "
" "" "
"); } else { p = strchr(buf, '&'); *p = '\0'; char *index = strchr(buf, '='); if (index == NULL) { strcpy(arg1, buf); } else { strcpy(arg1, index+1); } n1 = atoi(arg1); index = strchr(p+1, '='); if (index == NULL) { strcpy(arg2, p+1); } else { strcpy(arg2, index+1); } n2 = atoi(arg2); /* Make the response body */ if (is_response) { snprintf(content, sizeof(content), "Welcome to add.com: " "THE Internet addition portal.\r\n

" "The answer is: %d + %d = %d\r\n

" "Thanks for visiting!\r\n", n1, n2, n1+n2); } } /* Generate the HTTP is_response */ printf("Content-length: %d\r\n", is_response ? (int)strlen(content) : 0); printf("Content-type: text/html\r\n\r\n"); printf("%s", content); fflush(stdout); exit(0); } ================================================ FILE: exercise/ex11-12/home.html ================================================ Welcome to add.com ================================================ FILE: exercise/ex11-12/tiny.c ================================================ /* * 11.12 * * mofaph@gmail.com * 2013-6-11 * * unix> cc -Wall -I../../common ../../common/csapp.c tiny.c -lpthread -o tiny */ #include "csapp.h" #include /* INT_MIN */ void doit(int fd); int parse_uri(char *uri, char *filename, char *cgiargs); void serve_static(int fd, char *filename, int filesize, char *method); void get_filetype(char *filename, char *filetype); void serve_dynamic(int fd, char *filename, char *cgiargs, char *method); void clienterror(int fd, char *cause, char *errnum, char *shortmsg, char *longmsg); void reap_child(int signo) { for (;;) { /* We don't care child's status */ int ret = waitpid(-1, NULL, WNOHANG); if (ret == 0) /* child didn't terminated, return immediately */ break; else if (ret < 0 && errno == ECHILD) /* no child */ break; else /* continue */; } } static int is_wait; /* Waits for child when failed to install handler */ int main(int argc, char **argv) { int listenfd, connfd, port; struct sockaddr_in clientaddr; socklen_t clientlen; /* avoid compiler warning */ /* Check command line args */ if (argc != 2) { fprintf(stderr, "usage: %s \n", argv[0]); exit(1); } port = atoi(argv[1]); /* Install SIGCHILD handler */ struct sigaction action, old_action; action.sa_handler = reap_child; sigemptyset(&action.sa_mask); action.sa_flags = SA_RESTART; int ret = sigaction(SIGCHLD, &action, &old_action); is_wait = ret < 0; if (is_wait) { fprintf(stderr, "sigaction: %s, we will wait child to terminated\n", strerror(errno)); } listenfd = Open_listenfd(port); while (1) { clientlen = sizeof(clientaddr); connfd = Accept(listenfd, (SA *)&clientaddr, &clientlen); doit(connfd); Close(connfd); } } /* Return a pointer to request, the caller should free the buffer */ char *read_request(rio_t *rp) { char buf[MAXLINE]; char *request = NULL; for (;;) { Rio_readlineb(rp, buf, MAXLINE); /* * Firefox will sent \x16\x3. * \x16: Synchronous Idle, \x3: End of Text */ if (!strncmp(buf, "\x16\x3", 2)) { free(request); return NULL; } int buflen = strlen(buf); char *old_request = request; int old_reqlen = old_request==NULL ? 0 : strlen(old_request); request = realloc(old_request, buflen + old_reqlen + 1); if (request == NULL) { fprintf(stderr, "realloc: run out of memory\n"); free(old_request); return NULL; } memmove(request+old_reqlen, buf, buflen+1); if (!strcmp(buf, "\r\n")) /* Copy before stop */ break; } return request; } void write_to_file(char *request) { if (request == NULL) return; FILE *out = fopen("tiny-request.txt", "w+"); if (out == NULL) { perror("fopen"); } else { fprintf(out, "%s", request); fflush(out); fclose(out); } } /* store the result to entity */ void get_post_entity(char *request, char *entity, int len) { int i = 0, j = 0; for (;;) { /* Read a line */ for (;;) { if (request[i] == '\0') return; if (request[i] == '\r') { i += 2; break; } entity[j] = request[i]; i += 1, j += 1; if (j+1 == len) /* overflow? */ break; } entity[j] = '\0'; j = 0; /* Is something like "23&45"? */ if (strchr(entity, '&')) break; } } void doit(int fd) { rio_t rio; Rio_readinitb(&rio, fd); char *request = read_request(&rio); if (request == NULL) return; write_to_file(request); char method[MAXLINE], uri[MAXLINE], version[MAXLINE]; sscanf(request, "%s %s %s", method, uri, version); if (strcasecmp(method, "GET") && strcasecmp(method, "HEAD") && strcasecmp(method, "POST")) { clienterror(fd, method, "501", "Not implemented", "Tiny does not implement this method"); free(request); return; } char filename[MAXLINE], cgiargs[MAXLINE]; int is_static = parse_uri(uri, filename, cgiargs); if (!strcasecmp(method, "POST")) get_post_entity(request, cgiargs, sizeof(cgiargs)/sizeof(cgiargs[0])); free(request); struct stat sbuf; if (stat(filename, &sbuf) < 0) { clienterror(fd, filename, "404", "Not found", "Tiny couldn't read the file"); return; } if (is_static) { /* Serve static content */ if (!(S_ISREG(sbuf.st_mode)) || !(S_IRUSR & sbuf.st_mode)) { clienterror(fd, filename, "403", "Forbidden", "Tiny couldn't read the filetype"); return; } serve_static(fd, filename, sbuf.st_size, method); } else { /* Serve dynamic content */ if (!(S_ISREG(sbuf.st_mode)) || !(S_IXUSR & sbuf.st_mode)) { clienterror(fd, filename, "403", "Forbidden", "Tiny couldn't run the CGI program"); return; } serve_dynamic(fd, filename, cgiargs, method); } } void clienterror(int fd, char *cause, char *errnum, char *shortmsg, char *longmsg) { char buf[MAXLINE], body[MAXBUF]; /* Build the HTTP response body */ snprintf(body, sizeof(body), "Tiny Error"); snprintf(body, sizeof(body), "%s\r\n", body); snprintf(body, sizeof(body), "%s%s: %s\r\n", body, errnum, shortmsg); snprintf(body, sizeof(body), "%s

%s: %s\r\n", body, longmsg, cause); snprintf(body, sizeof(body), "%s


The Tiny Web server\r\n", body); /* Print the HTTP response */ snprintf(buf, sizeof(buf), "HTTP/1.0 %s %s\r\n", errnum, shortmsg); Rio_writen(fd, buf, strlen(buf)); snprintf(buf, sizeof(buf), "Content-type: text/html\r\n"); Rio_writen(fd, buf, strlen(buf)); snprintf(buf, sizeof(buf), "Content-length: %d\r\n\r\n", (int)strlen(body)); Rio_writen(fd, buf, strlen(buf)); Rio_writen(fd, body, strlen(body)); } int parse_uri(char *uri, char *filename, char *cgiargs) { char *ptr; if (!strstr(uri, "cgi-bin")) { /* Static content */ strcpy(cgiargs, ""); strcpy(filename, "."); strcat(filename, uri); if (uri[strlen(uri)-1] == '/') strcat(filename, "home.html"); return 1; } else { /* Dynamic content */ ptr = strchr(uri, '?'); if (ptr) { strcpy(cgiargs, ptr+1); *ptr = '\0'; } else { strcpy(cgiargs, ""); } strcpy(filename, "."); strcat(filename, uri); return 0; } } void send_response_body(int fd, char *filename) { /* * Send response body to client * * If the file is big enough, we read many times, and each time we * double increase buffer. */ int srcfd = Open(filename, O_RDONLY, 0); char *body = NULL; const int min_size = (1<<10); /* 1KB */ int max_size = (unsigned)INT_MIN >> 1; /* 16bit: 16KB, 32bit: 1GB */ size_t size = min_size; for (;;) { if (size != max_size) { char *old_body = body; body = realloc(old_body, size); if (body == NULL) { if (size == min_size) { free(old_body); perror("realloc"); break; } else { /* size > min_size */ max_size = size >> 1; size = max_size; body = old_body; } } } int n = Rio_readn(srcfd, body, size); if (n > 0) /* read something */ Rio_writen(fd, body, n); if (n != size) /* EOF or read all the content */ break; if (size != max_size) size <<= 1; /* increase buffer, read more next time */ } /* Clean */ free(body); Close(srcfd); } void serve_static(int fd, char *filename, int filesize, char *method) { char filetype[MAXLINE], buf[MAXBUF]; /* Send response headers to client */ get_filetype(filename, filetype); snprintf(buf, sizeof(buf), "HTTP/1.0 200 OK\r\n"); snprintf(buf+strlen(buf), sizeof(buf)-strlen(buf), "Server: Tiny Web Server\r\n"); snprintf(buf+strlen(buf), sizeof(buf)-strlen(buf), "Content-length: %d\r\n", filesize); snprintf(buf+strlen(buf), sizeof(buf)-strlen(buf), "Content-type: %s\r\n\r\n", filetype); Rio_writen(fd, buf, strlen(buf)); if (!strcasecmp(method, "HEAD")) return; send_response_body(fd, filename); } /* * get_filetype - derive file type from name */ void get_filetype(char *filename, char *filetype) { if (strstr(filename, ".html")) strcpy(filetype, "text/html"); else if (strstr(filename, ".gif")) strcpy(filetype, "image/gif"); else if (strstr(filename, ".jpg")) strcpy(filetype, "image/jpeg"); else if (strstr(filename, ".mpg")) strcpy(filetype, "video/mpeg"); else strcpy(filetype, "text/plain"); } void serve_dynamic(int fd, char *filename, char *cgiargs, char *method) { char buf[MAXLINE], *emptylist[] = { NULL }; /* Return first part of HTTP response */ snprintf(buf, sizeof(buf), "HTTP/1.0 200 OK\r\n"); Rio_writen(fd, buf, strlen(buf)); snprintf(buf, sizeof(buf), "Server: Tiny Web Server\r\n"); Rio_writen(fd, buf, strlen(buf)); if (Fork() == 0) { /* child */ /* Real server would set all CGI vars here */ setenv("QUERY_STRING", cgiargs, 1); setenv("REQUEST_METHOD", method, 1); Dup2(fd, STDOUT_FILENO); /* Redirect stdout to client */ Execve(filename, emptylist, environ); /* Run CGI program */ } if (is_wait) Wait(NULL); /* Parent waits for and reaps child */ } ================================================ FILE: exercise/ex11-13/cgi-bin/adder.c ================================================ /* * 11.12 * * mofaph@gmail.com * 2013-6-11 * * unix> cc -I../../../common ../../../common/csapp.c adder.c -lpthread -o adder */ #include "csapp.h" int main(void) { char *buf, *p; char arg1[MAXLINE], arg2[MAXLINE], content[MAXLINE]; int n1 = 0, n2 = 0; char *method = getenv("REQUEST_METHOD"); int is_response = !strcasecmp(method, "GET") || !strcasecmp(method, "POST"); buf = getenv("QUERY_STRING"); if (buf == NULL || strlen(buf) == 0) { snprintf(content, sizeof(content), "
" "" "+" "" "
" "" "
"); } else { p = strchr(buf, '&'); *p = '\0'; char *index = strchr(buf, '='); if (index == NULL) { strcpy(arg1, buf); } else { strcpy(arg1, index+1); } n1 = atoi(arg1); index = strchr(p+1, '='); if (index == NULL) { strcpy(arg2, p+1); } else { strcpy(arg2, index+1); } n2 = atoi(arg2); /* Make the response body */ if (is_response) { snprintf(content, sizeof(content), "Welcome to add.com: " "THE Internet addition portal.\r\n

" "The answer is: %d + %d = %d\r\n

" "Thanks for visiting!\r\n", n1, n2, n1+n2); } } /* Generate the HTTP is_response */ printf("Content-length: %d\r\n", is_response ? (int)strlen(content) : 0); printf("Content-type: text/html\r\n\r\n"); printf("%s", content); fflush(stdout); exit(0); } ================================================ FILE: exercise/ex11-13/home.html ================================================ Welcome to add.com ================================================ FILE: exercise/ex11-13/tiny.c ================================================ /* * 11.13 * * mofaph@gmail.com * 2013-6-22 * * 如果一个服务器写一个已经被客户端关闭了的连接(比如,在浏览器上单击了“Stop”按 * 钮),那么第一次这样的写会正常返回,但是第二次写就会引起发送 SIGPIPE 信号,这 * 个信号的默认行为就是终止这个进程。 * * 如果捕获或者忽略 SIGPIPE 信号,那么第二次写操作会返回值 -1,并将 errno 设置为 * EPIPE。 * * 总的来说,一个健壮的服务器必须捕获这些 SIGPIPE 信号,并且检查 write() 是否有 * EPIPE 错误。 * * csapp, p645 * * unix> cc -Wall -I../../common ../../common/csapp.c tiny.c -lpthread -o tiny */ #include "csapp.h" #include #include /* INT_MIN */ void doit(int fd); int parse_uri(char *uri, char *filename, char *cgiargs); void serve_static(int fd, char *filename, int filesize, char *method); void get_filetype(char *filename, char *filetype); void serve_dynamic(int fd, char *filename, char *cgiargs, char *method); void clienterror(int fd, char *cause, char *errnum, char *shortmsg, char *longmsg); void reap_child(int signo) { for (;;) { /* We don't care child's status */ int ret = waitpid(-1, NULL, WNOHANG); if (ret == 0) /* child didn't terminated, return immediately */ break; else if (ret < 0 && errno == ECHILD) /* no child */ break; else /* continue */; } } /* child process call this function */ void client_offline(int signo, siginfo_t *info, void *context) { /* If client is no longer alive, we just log something useful and exit */ openlog("tiny", LOG_CONS, LOG_WARNING); syslog(LOG_WARNING, "pid %d: caught signal %d, exit with status code -1\n", info->si_pid, info->si_signo); closelog(); exit(-1); } static int is_wait; /* Waits for child when failed to install handler */ int main(int argc, char **argv) { int listenfd, connfd, port; struct sockaddr_in clientaddr; socklen_t clientlen; /* avoid compiler warning */ /* Check command line args */ if (argc != 2) { fprintf(stderr, "usage: %s \n", argv[0]); exit(1); } port = atoi(argv[1]); /* Install SIGCHILD handler */ struct sigaction action, old_action; action.sa_handler = reap_child; sigemptyset(&action.sa_mask); action.sa_flags = SA_RESTART; int ret = sigaction(SIGCHLD, &action, &old_action); is_wait = ret < 0; if (is_wait) { fprintf(stderr, "sigaction: %s, we will wait child to terminated\n", strerror(errno)); } /* Install SIGPIPE handler */ struct sigaction old2_action; memset(&action, 0, sizeof(action)); action.sa_sigaction = client_offline; sigemptyset(&action.sa_mask); action.sa_flags = SA_SIGINFO; ret = sigaction(SIGPIPE, &action, &old2_action); if (ret < 0) { fprintf(stderr, "sigaction: failed to install SIGPIPE handler, %s", strerror(errno)); } listenfd = Open_listenfd(port); while (1) { clientlen = sizeof(clientaddr); connfd = Accept(listenfd, (SA *)&clientaddr, &clientlen); int pid = Fork(); /* fork() wapper, already handle error */ if (pid == 0) break; if (is_wait) Wait(NULL); /* Parent waits for and reaps child */ } /* Child serve client */ doit(connfd); Close(connfd); exit(0); } /* Return a pointer to request, the caller should free the buffer */ char *read_request(rio_t *rp) { char buf[MAXLINE]; char *request = NULL; for (;;) { Rio_readlineb(rp, buf, MAXLINE); /* * Firefox will sent \x16\x3. * \x16: Synchronous Idle, \x3: End of Text */ if (!strncmp(buf, "\x16\x3", 2)) { free(request); return NULL; } int buflen = strlen(buf); char *old_request = request; int old_reqlen = old_request==NULL ? 0 : strlen(old_request); request = realloc(old_request, buflen + old_reqlen + 1); if (request == NULL) { fprintf(stderr, "realloc: run out of memory\n"); free(old_request); return NULL; } memmove(request+old_reqlen, buf, buflen+1); if (!strcmp(buf, "\r\n")) /* Copy before stop */ break; } return request; } void write_to_file(char *request) { if (request == NULL) return; FILE *out = fopen("tiny-request.txt", "w+"); if (out == NULL) { perror("fopen"); } else { fprintf(out, "%s", request); fflush(out); fclose(out); } } /* store the result to entity */ void get_post_entity(char *request, char *entity, int len) { int i = 0, j = 0; for (;;) { /* Read a line */ for (;;) { if (request[i] == '\0') return; if (request[i] == '\r') { i += 2; break; } entity[j] = request[i]; i += 1, j += 1; if (j+1 == len) /* overflow? */ break; } entity[j] = '\0'; j = 0; /* Is something like "23&45"? */ if (strchr(entity, '&')) break; } } void doit(int fd) { rio_t rio; Rio_readinitb(&rio, fd); char *request = read_request(&rio); if (request == NULL) return; write_to_file(request); char method[MAXLINE], uri[MAXLINE], version[MAXLINE]; sscanf(request, "%s %s %s", method, uri, version); if (strcasecmp(method, "GET") && strcasecmp(method, "HEAD") && strcasecmp(method, "POST")) { clienterror(fd, method, "501", "Not implemented", "Tiny does not implement this method"); free(request); return; } char filename[MAXLINE], cgiargs[MAXLINE]; int is_static = parse_uri(uri, filename, cgiargs); if (!strcasecmp(method, "POST")) get_post_entity(request, cgiargs, sizeof(cgiargs)/sizeof(cgiargs[0])); free(request); struct stat sbuf; if (stat(filename, &sbuf) < 0) { clienterror(fd, filename, "404", "Not found", "Tiny couldn't read the file"); return; } if (is_static) { /* Serve static content */ if (!(S_ISREG(sbuf.st_mode)) || !(S_IRUSR & sbuf.st_mode)) { clienterror(fd, filename, "403", "Forbidden", "Tiny couldn't read the filetype"); return; } serve_static(fd, filename, sbuf.st_size, method); } else { /* Serve dynamic content */ if (!(S_ISREG(sbuf.st_mode)) || !(S_IXUSR & sbuf.st_mode)) { clienterror(fd, filename, "403", "Forbidden", "Tiny couldn't run the CGI program"); return; } serve_dynamic(fd, filename, cgiargs, method); } } void clienterror(int fd, char *cause, char *errnum, char *shortmsg, char *longmsg) { char buf[MAXLINE], body[MAXBUF]; /* Build the HTTP response body */ snprintf(body, sizeof(body), "Tiny Error"); snprintf(body, sizeof(body), "%s\r\n", body); snprintf(body, sizeof(body), "%s%s: %s\r\n", body, errnum, shortmsg); snprintf(body, sizeof(body), "%s

%s: %s\r\n", body, longmsg, cause); snprintf(body, sizeof(body), "%s


The Tiny Web server\r\n", body); /* Print the HTTP response */ snprintf(buf, sizeof(buf), "HTTP/1.0 %s %s\r\n", errnum, shortmsg); Rio_writen(fd, buf, strlen(buf)); snprintf(buf, sizeof(buf), "Content-type: text/html\r\n"); Rio_writen(fd, buf, strlen(buf)); snprintf(buf, sizeof(buf), "Content-length: %d\r\n\r\n", (int)strlen(body)); Rio_writen(fd, buf, strlen(buf)); Rio_writen(fd, body, strlen(body)); } int parse_uri(char *uri, char *filename, char *cgiargs) { char *ptr; if (!strstr(uri, "cgi-bin")) { /* Static content */ strcpy(cgiargs, ""); strcpy(filename, "."); strcat(filename, uri); if (uri[strlen(uri)-1] == '/') strcat(filename, "home.html"); return 1; } else { /* Dynamic content */ ptr = strchr(uri, '?'); if (ptr) { strcpy(cgiargs, ptr+1); *ptr = '\0'; } else { strcpy(cgiargs, ""); } strcpy(filename, "."); strcat(filename, uri); return 0; } } void send_response_body(int fd, char *filename) { /* * Send response body to client * * If the file is big enough, we read many times, and each time we * double increase buffer. */ int srcfd = Open(filename, O_RDONLY, 0); char *body = NULL; const int min_size = (1<<10); /* 1KB */ int max_size = (unsigned)INT_MIN >> 1; /* 16bit: 16KB, 32bit: 1GB */ size_t size = min_size; for (;;) { if (size != max_size) { char *old_body = body; body = realloc(old_body, size); if (body == NULL) { if (size == min_size) { free(old_body); perror("realloc"); break; } else { /* size > min_size */ max_size = size >> 1; size = max_size; body = old_body; } } } int n = Rio_readn(srcfd, body, size); if (n > 0) /* read something */ Rio_writen(fd, body, n); if (n != size) /* EOF or read all the content */ break; if (size != max_size) size <<= 1; /* increase buffer, read more next time */ } /* Clean */ free(body); Close(srcfd); } void serve_static(int fd, char *filename, int filesize, char *method) { char filetype[MAXLINE], buf[MAXBUF]; /* Send response headers to client */ get_filetype(filename, filetype); snprintf(buf, sizeof(buf), "HTTP/1.0 200 OK\r\n"); snprintf(buf+strlen(buf), sizeof(buf)-strlen(buf), "Server: Tiny Web Server\r\n"); snprintf(buf+strlen(buf), sizeof(buf)-strlen(buf), "Content-length: %d\r\n", filesize); snprintf(buf+strlen(buf), sizeof(buf)-strlen(buf), "Content-type: %s\r\n\r\n", filetype); Rio_writen(fd, buf, strlen(buf)); if (!strcasecmp(method, "HEAD")) return; send_response_body(fd, filename); } /* * get_filetype - derive file type from name */ void get_filetype(char *filename, char *filetype) { if (strstr(filename, ".html")) strcpy(filetype, "text/html"); else if (strstr(filename, ".gif")) strcpy(filetype, "image/gif"); else if (strstr(filename, ".jpg")) strcpy(filetype, "image/jpeg"); else if (strstr(filename, ".mpg")) strcpy(filetype, "video/mpeg"); else strcpy(filetype, "text/plain"); } void serve_dynamic(int fd, char *filename, char *cgiargs, char *method) { char buf[MAXLINE], *emptylist[] = { NULL }; /* Return first part of HTTP response */ snprintf(buf, sizeof(buf), "HTTP/1.0 200 OK\r\n"); Rio_writen(fd, buf, strlen(buf)); snprintf(buf, sizeof(buf), "Server: Tiny Web Server\r\n"); Rio_writen(fd, buf, strlen(buf)); /* Real server would set all CGI vars here */ setenv("QUERY_STRING", cgiargs, 1); setenv("REQUEST_METHOD", method, 1); Dup2(fd, STDOUT_FILENO); /* Redirect stdout to client */ Execve(filename, emptylist, environ); /* Run CGI program */ } ================================================ FILE: exercise/ex11-2.c ================================================ /* * 11.2 * * mofaph@gmail.com * 2013-5-13 * * 编写程序 hex2dd.c,它将十六进制参数转换为点分十进制串并打印出结果。例如: * * unix> cc ex11-2.c -o hex2dd * unix> ./hex2dd 0x8002c2f2 * 128.2.194.242 */ #include #include #include #include #include #include #include int main(int argc, char *argv[]) { if (argc != 2) { fprintf(stderr, "usage: %s \n", argv[0]); return -1; } /* 1. 将接收的字符串参数转换成整数 */ unsigned int host = strtoul(argv[1], NULL, 0); if (host == ULONG_MAX && errno == ERANGE) { perror("strtoul"); return -1; } /* 2. 将数字转换成网络字节序 */ unsigned int net = htonl(host); /* 3. 将整数转换成点分十进制 */ struct in_addr in; in.s_addr = net; char *decimal = inet_ntoa(in); printf("%s\n", decimal); return 0; } ================================================ FILE: exercise/ex11-3.c ================================================ /* * 11.3 * * mofaph@gmail.com * 2013-5-13 * * 编写程序 dd2hex.c,它将它的点分十进制参数转换为十六进制数并打印出结果。例如 * * unix> cc ex11-3.c -o dd2hex * unix> ./dd2hex 128.2.194.242 * 0x8002c2f2 */ #include #include #include #include int main(int argc, char *argv[]) { /* 0. 检查参数是否正确 */ if (argc != 2) { fprintf(stderr, "usage: %s \n", argv[0]); return -1; } /* 1. 将点分十进制转换成网络字节序的 IP 地址 */ struct in_addr net; int ret = inet_aton(argv[1], &net); if (ret == 0) { fprintf(stderr, "inet_aton: convert %s failed\n", argv[1]); return -1; } /* 2. 将网络字节序的 IP 地址转换成本地字节序 */ unsigned int host = ntohl(net.s_addr); /* 3. 打印结果 */ printf("0x%x\n", host); return 0; } ================================================ FILE: exercise/ex11-6/cgi-bin/adder.c ================================================ /* * p638 * * unix> cc -I../../../common ../../../common/csapp.c adder.c -lpthread -o adder */ #include "csapp.h" int main(void) { char *buf, *p; char arg1[MAXLINE], arg2[MAXLINE], content[MAXLINE]; int n1 = 0, n2 = 0; /* Extract the two arguments */ if ((buf = getenv("QUERY_STRING")) != NULL) { p = strchr(buf, '&'); *p = '\0'; strcpy(arg1, buf); strcpy(arg2, p+1); n1 = atoi(arg1); n2 = atoi(arg2); } /* Make the response body */ sprintf(content, "Welcome to add.com: "); sprintf(content, "%sTHE Internet addition portal.\r\n

", content); sprintf(content, "%sThe answer is: %d + %d = %d\r\n

", content, n1, n2, n1+n2); sprintf(content, "%sThanks for visiting!\r\n", content); /* Generate the HTTP response */ printf("Content-length: %d\r\n", (int)strlen(content)); printf("Content-type: text/html\r\n\r\n"); printf("%s", content); fflush(stdout); exit(0); } ================================================ FILE: exercise/ex11-6/home.html ================================================ Welcome to add.com ================================================ FILE: exercise/ex11-6/tiny.c ================================================ /* * 11.6 * * mofaph@gmail.com * 2013-5-25 * * unix> cc -Wall -I../../common ../../common/csapp.c tiny.c -lpthread -o tiny */ #include "csapp.h" void doit(int fd); int parse_uri(char *uri, char *filename, char *cgiargs); void serve_static(int fd, char *filename, int filesize); void get_filetype(char *filename, char *filetype); void serve_dynamic(int fd, char *filename, char *cgiargs); void clienterror(int fd, char *cause, char *errnum, char *shortmsg, char *longmsg); int main(int argc, char **argv) { int listenfd, connfd, port; struct sockaddr_in clientaddr; socklen_t clientlen; /* avoid compiler warning */ /* Check command line args */ if (argc != 2) { fprintf(stderr, "usage: %s \n", argv[0]); exit(1); } port = atoi(argv[1]); listenfd = Open_listenfd(port); while (1) { clientlen = sizeof(clientaddr); connfd = Accept(listenfd, (SA *)&clientaddr, &clientlen); doit(connfd); Close(connfd); } } void doit(int fd) { int is_static; struct stat sbuf; char buf[MAXLINE], method[MAXLINE], uri[MAXLINE], version[MAXLINE]; char filename[MAXLINE], cgiargs[MAXLINE]; rio_t rio; /* Read request line and headers, put them into a file */ char *request = NULL; Rio_readinitb(&rio, fd); for (;;) { Rio_readlineb(&rio, buf, MAXLINE); /* * Firefox will sent \x16\x3. * \x16: Synchronous Idle, \x3: End of Text */ if (!strncmp(buf, "\x16\x3", 2)) { free(request); return; } int buflen = strlen(buf); char *old_request = request; int old_reqlen = old_request==NULL ? 0 : strlen(old_request); request = realloc(old_request, buflen + old_reqlen + 1); if (request == NULL) { fprintf(stderr, "realloc: run out of memory\n"); free(old_request); return; } memmove(request+old_reqlen, buf, buflen+1); if (!strcmp(buf, "\r\n")) /* Copy before stop */ break; } sscanf(request, "%s %s %s", method, uri, version); if (strcasecmp(method, "GET")) { clienterror(fd, method, "501", "Not implemented", "Tiny does not implement this method"); return; } FILE *out = fopen("tiny-request.txt", "w+"); if (out == NULL) { perror("fopen"); free(request); return; } fprintf(out, "%s", request); fflush(out); fclose(out); free(request); /* Parse URI from GET request */ is_static = parse_uri(uri, filename, cgiargs); if (stat(filename, &sbuf) < 0) { clienterror(fd, filename, "404", "Not found", "Tiny couldn't read the file"); return; } if (is_static) { /* Serve static content */ if (!(S_ISREG(sbuf.st_mode)) || !(S_IRUSR & sbuf.st_mode)) { clienterror(fd, filename, "403", "Forbidden", "Tiny couldn't read the filetype"); return; } serve_static(fd, filename, sbuf.st_size); } else { /* Serve dynamic content */ if (!(S_ISREG(sbuf.st_mode)) || !(S_IXUSR & sbuf.st_mode)) { clienterror(fd, filename, "403", "Forbidden", "Tiny couldn't run the CGI program"); return; } serve_dynamic(fd, filename, cgiargs); } } void clienterror(int fd, char *cause, char *errnum, char *shortmsg, char *longmsg) { char buf[MAXLINE], body[MAXBUF]; /* Build the HTTP response body */ sprintf(body, "Tiny Error"); sprintf(body, "%s\r\n", body); sprintf(body, "%s%s: %s\r\n", body, errnum, shortmsg); sprintf(body, "%s

%s: %s\r\n", body, longmsg, cause); sprintf(body, "%s


The Tiny Web server\r\n", body); /* Print the HTTP response */ sprintf(buf, "HTTP/1.0 %s %s\r\n", errnum, shortmsg); Rio_writen(fd, buf, strlen(buf)); sprintf(buf, "Content-type: text/html\r\n"); Rio_writen(fd, buf, strlen(buf)); sprintf(buf, "Content-length: %d\r\n\r\n", (int)strlen(body)); Rio_writen(fd, buf, strlen(buf)); Rio_writen(fd, body, strlen(body)); } int parse_uri(char *uri, char *filename, char *cgiargs) { char *ptr; if (!strstr(uri, "cgi-bin")) { /* Static content */ strcpy(cgiargs, ""); strcpy(filename, "."); strcat(filename, uri); if (uri[strlen(uri)-1] == '/') strcat(filename, "home.html"); return 1; } else { /* Dynamic content */ ptr = index(uri, '?'); if (ptr) { strcpy(cgiargs, ptr+1); *ptr = '\0'; } else strcpy(cgiargs, ""); strcpy(filename, "."); strcat(filename, uri); return 0; } } void serve_static(int fd, char *filename, int filesize) { int srcfd; char *srcp, filetype[MAXLINE], buf[MAXBUF]; /* Send response headers to client */ get_filetype(filename, filetype); sprintf(buf, "HTTP/1.0 200 OK\r\n"); sprintf(buf, "%sServer: Tiny Web Server\r\n", buf); sprintf(buf, "%sContent-length: %d\r\n", buf, filesize); sprintf(buf, "%sContent-type: %s\r\n\r\n", buf, filetype); Rio_writen(fd, buf, strlen(buf)); /* Send response body to client */ srcfd = Open(filename, O_RDONLY, 0); srcp = Mmap(0, filesize, PROT_READ, MAP_PRIVATE, srcfd, 0); Close(srcfd); Rio_writen(fd, srcp, filesize); Munmap(srcp, filesize); } /* * get_filetype - derive file type from name */ void get_filetype(char *filename, char *filetype) { if (strstr(filename, ".html")) strcpy(filetype, "text/html"); else if (strstr(filename, ".gif")) strcpy(filetype, "image/gif"); else if (strstr(filename, ".jpg")) strcpy(filetype, "image/jpeg"); else strcpy(filetype, "text/plain"); } void serve_dynamic(int fd, char *filename, char *cgiargs) { char buf[MAXLINE], *emptylist[] = { NULL }; /* Return first part of HTTP response */ sprintf(buf, "HTTP/1.0 200 OK\r\n"); Rio_writen(fd, buf, strlen(buf)); sprintf(buf, "Server: Tiny Web Server\r\n"); Rio_writen(fd, buf, strlen(buf)); if (Fork() == 0) { /* child */ /* Real server would set all CGI vars here */ setenv("QUERY_STRING", cgiargs, 1); Dup2(fd, STDOUT_FILENO); /* Redirect stdout to client */ Execve(filename, emptylist, environ); /* Run CGI program */ } Wait(NULL); /* Parent waits for and reaps child */ } ================================================ FILE: exercise/ex11-7/cgi-bin/adder.c ================================================ /* * 11.7 * * mofaph@gmail.com * 2013-6-2 * * unix> cc -I../../../common ../../../common/csapp.c adder.c -lpthread -o adder */ #include "csapp.h" int main(void) { char *buf, *p; char arg1[MAXLINE], arg2[MAXLINE], content[MAXLINE]; int n1 = 0, n2 = 0; buf = getenv("QUERY_STRING"); if (buf == NULL || strlen(buf) == 0) { snprintf(content, sizeof(content), "
" "" "+" "" "
" "" "
"); } else { p = strchr(buf, '&'); *p = '\0'; char *index = strchr(buf, '='); if (index == NULL) { strcpy(arg1, buf); } else { strcpy(arg1, index+1); } n1 = atoi(arg1); index = strchr(p+1, '='); if (index == NULL) { strcpy(arg2, p+1); } else { strcpy(arg2, index+1); } n2 = atoi(arg2); /* Make the response body */ snprintf(content, sizeof(content), "Welcome to add.com: " "THE Internet addition portal.\r\n

" "The answer is: %d + %d = %d\r\n

" "Thanks for visiting!\r\n", n1, n2, n1+n2); } /* Generate the HTTP response */ printf("Content-length: %d\r\n", (int)strlen(content)); printf("Content-type: text/html\r\n\r\n"); printf("%s", content); fflush(stdout); exit(0); } ================================================ FILE: exercise/ex11-7/home.html ================================================ Welcome to add.com ================================================ FILE: exercise/ex11-7/tiny.c ================================================ /* * 11.7 * * mofaph@gmail.com * 2013-6-2 * * unix> cc -Wall -I../../common ../../common/csapp.c tiny.c -lpthread -o tiny */ #include "csapp.h" #include /* INT_MIN */ void doit(int fd); int parse_uri(char *uri, char *filename, char *cgiargs); void serve_static(int fd, char *filename, int filesize); void get_filetype(char *filename, char *filetype); void serve_dynamic(int fd, char *filename, char *cgiargs); void clienterror(int fd, char *cause, char *errnum, char *shortmsg, char *longmsg); void reap_child(int signo) { for (;;) { /* We don't care child's status */ int ret = waitpid(-1, NULL, WNOHANG); if (ret == 0) /* child didn't terminated, return immediately */ break; else if (ret < 0 && errno == ECHILD) /* no child */ break; else /* continue */; } } static int is_wait; /* Waits for child when failed to install handler */ int main(int argc, char **argv) { int listenfd, connfd, port; struct sockaddr_in clientaddr; socklen_t clientlen; /* avoid compiler warning */ /* Check command line args */ if (argc != 2) { fprintf(stderr, "usage: %s \n", argv[0]); exit(1); } port = atoi(argv[1]); /* Install SIGCHILD handler */ struct sigaction action, old_action; action.sa_handler = reap_child; sigemptyset(&action.sa_mask); action.sa_flags = SA_RESTART; int ret = sigaction(SIGCHLD, &action, &old_action); is_wait = ret < 0; if (is_wait) { fprintf(stderr, "sigaction: %s, we will wait child to terminated\n", strerror(errno)); } listenfd = Open_listenfd(port); while (1) { clientlen = sizeof(clientaddr); connfd = Accept(listenfd, (SA *)&clientaddr, &clientlen); doit(connfd); Close(connfd); } } void doit(int fd) { int is_static; struct stat sbuf; char buf[MAXLINE], method[MAXLINE], uri[MAXLINE], version[MAXLINE]; char filename[MAXLINE], cgiargs[MAXLINE]; rio_t rio; /* Read request line and headers, put them into a file */ char *request = NULL; Rio_readinitb(&rio, fd); for (;;) { Rio_readlineb(&rio, buf, MAXLINE); /* * Firefox will sent \x16\x3. * \x16: Synchronous Idle, \x3: End of Text */ if (!strncmp(buf, "\x16\x3", 2)) { free(request); return; } int buflen = strlen(buf); char *old_request = request; int old_reqlen = old_request==NULL ? 0 : strlen(old_request); request = realloc(old_request, buflen + old_reqlen + 1); if (request == NULL) { fprintf(stderr, "realloc: run out of memory\n"); free(old_request); return; } memmove(request+old_reqlen, buf, buflen+1); if (!strcmp(buf, "\r\n")) /* Copy before stop */ break; } sscanf(request, "%s %s %s", method, uri, version); if (strcasecmp(method, "GET")) { clienterror(fd, method, "501", "Not implemented", "Tiny does not implement this method"); return; } FILE *out = fopen("tiny-request.txt", "w+"); if (out == NULL) { perror("fopen"); free(request); return; } fprintf(out, "%s", request); fflush(out); fclose(out); free(request); /* Parse URI from GET request */ is_static = parse_uri(uri, filename, cgiargs); if (stat(filename, &sbuf) < 0) { clienterror(fd, filename, "404", "Not found", "Tiny couldn't read the file"); return; } if (is_static) { /* Serve static content */ if (!(S_ISREG(sbuf.st_mode)) || !(S_IRUSR & sbuf.st_mode)) { clienterror(fd, filename, "403", "Forbidden", "Tiny couldn't read the filetype"); return; } serve_static(fd, filename, sbuf.st_size); } else { /* Serve dynamic content */ if (!(S_ISREG(sbuf.st_mode)) || !(S_IXUSR & sbuf.st_mode)) { clienterror(fd, filename, "403", "Forbidden", "Tiny couldn't run the CGI program"); return; } serve_dynamic(fd, filename, cgiargs); } } void clienterror(int fd, char *cause, char *errnum, char *shortmsg, char *longmsg) { char buf[MAXLINE], body[MAXBUF]; /* Build the HTTP response body */ sprintf(body, "Tiny Error"); sprintf(body, "%s\r\n", body); sprintf(body, "%s%s: %s\r\n", body, errnum, shortmsg); sprintf(body, "%s

%s: %s\r\n", body, longmsg, cause); sprintf(body, "%s


The Tiny Web server\r\n", body); /* Print the HTTP response */ sprintf(buf, "HTTP/1.0 %s %s\r\n", errnum, shortmsg); Rio_writen(fd, buf, strlen(buf)); sprintf(buf, "Content-type: text/html\r\n"); Rio_writen(fd, buf, strlen(buf)); sprintf(buf, "Content-length: %d\r\n\r\n", (int)strlen(body)); Rio_writen(fd, buf, strlen(buf)); Rio_writen(fd, body, strlen(body)); } int parse_uri(char *uri, char *filename, char *cgiargs) { char *ptr; if (!strstr(uri, "cgi-bin")) { /* Static content */ strcpy(cgiargs, ""); strcpy(filename, "."); strcat(filename, uri); if (uri[strlen(uri)-1] == '/') strcat(filename, "home.html"); return 1; } else { /* Dynamic content */ ptr = strchr(uri, '?'); if (ptr) { strcpy(cgiargs, ptr+1); *ptr = '\0'; } else { strcpy(cgiargs, ""); } strcpy(filename, "."); strcat(filename, uri); return 0; } } void serve_static(int fd, char *filename, int filesize) { char filetype[MAXLINE], buf[MAXBUF]; /* Send response headers to client */ get_filetype(filename, filetype); snprintf(buf, sizeof(buf), "HTTP/1.0 200 OK\r\n"); snprintf(buf+strlen(buf), sizeof(buf)-strlen(buf), "Server: Tiny Web Server\r\n"); snprintf(buf+strlen(buf), sizeof(buf)-strlen(buf), "Content-length: %d\r\n", filesize); snprintf(buf+strlen(buf), sizeof(buf)-strlen(buf), "Content-type: %s\r\n\r\n", filetype); Rio_writen(fd, buf, strlen(buf)); /* * Send response body to client * * If the file is big enough, we read many times, and each time we * double increase buffer. */ int srcfd = Open(filename, O_RDONLY, 0); char *body = NULL; const int min_size = (1<<10); /* 1KB */ int max_size = (unsigned)INT_MIN >> 1; /* 16bit: 16KB, 32bit: 1GB */ size_t size = min_size; for (;;) { if (size != max_size) { char *old_body = body; body = realloc(old_body, size); if (body == NULL) { if (size == min_size) { free(old_body); perror("realloc"); break; } else { /* size > min_size */ max_size = size >> 1; size = max_size; body = old_body; } } } int n = Rio_readn(srcfd, body, size); if (n > 0) /* read something */ Rio_writen(fd, body, n); if (n != size) /* EOF or read all the content */ break; if (size != max_size) size <<= 1; /* increase buffer, read more next time */ } /* Clean */ free(body); Close(srcfd); } /* * get_filetype - derive file type from name */ void get_filetype(char *filename, char *filetype) { if (strstr(filename, ".html")) strcpy(filetype, "text/html"); else if (strstr(filename, ".gif")) strcpy(filetype, "image/gif"); else if (strstr(filename, ".jpg")) strcpy(filetype, "image/jpeg"); else if (strstr(filename, ".mpg")) strcpy(filetype, "video/mpeg"); else strcpy(filetype, "text/plain"); } void serve_dynamic(int fd, char *filename, char *cgiargs) { char buf[MAXLINE], *emptylist[] = { NULL }; /* Return first part of HTTP response */ sprintf(buf, "HTTP/1.0 200 OK\r\n"); Rio_writen(fd, buf, strlen(buf)); sprintf(buf, "Server: Tiny Web Server\r\n"); Rio_writen(fd, buf, strlen(buf)); if (Fork() == 0) { /* child */ /* Real server would set all CGI vars here */ setenv("QUERY_STRING", cgiargs, 1); Dup2(fd, STDOUT_FILENO); /* Redirect stdout to client */ Execve(filename, emptylist, environ); /* Run CGI program */ } if (is_wait) Wait(NULL); /* Parent waits for and reaps child */ } ================================================ FILE: exercise/ex11-8/cgi-bin/adder.c ================================================ /* * p638 * * unix> cc -I../../../common ../../../common/csapp.c adder.c -lpthread -o adder */ #include "csapp.h" int main(void) { char *buf, *p; char arg1[MAXLINE], arg2[MAXLINE], content[MAXLINE]; int n1 = 0, n2 = 0; sleep(5); /* Just to test */ /* Extract the two arguments */ if ((buf = getenv("QUERY_STRING")) != NULL) { p = strchr(buf, '&'); *p = '\0'; strcpy(arg1, buf); strcpy(arg2, p+1); n1 = atoi(arg1); n2 = atoi(arg2); } /* Make the response body */ sprintf(content, "Welcome to add.com: "); sprintf(content, "%sTHE Internet addition portal.\r\n

", content); sprintf(content, "%sThe answer is: %d + %d = %d\r\n

", content, n1, n2, n1+n2); sprintf(content, "%sThanks for visiting!\r\n", content); /* Generate the HTTP response */ printf("Content-length: %d\r\n", (int)strlen(content)); printf("Content-type: text/html\r\n\r\n"); printf("%s", content); fflush(stdout); exit(0); } ================================================ FILE: exercise/ex11-8/home.html ================================================ Welcome to add.com ================================================ FILE: exercise/ex11-8/tiny.c ================================================ /* * 11.8 * * mofaph@gmail.com * 2013-5-28 * * 如果要使用信号处理程序来回收终止的子进程,需要注意的问题是“待处理信号”的问题。 * 由于 UNIX 系统在同一个时刻中,只会保留一个待处理信号,其余的将会丢弃。 * * 也就是说,当生成的子进程中,有 N 个同时终止(同时发送 SIGCHLD 信号),那么内核 * 将会将其中的 N-1 个子进程发送的信号丢弃掉。 * * 因此,我们在接收到一个信号的时候,应该尽可能多地回收子进程。 * * unix> cc -Wall -I../../common ../../common/csapp.c tiny.c -lpthread -o tiny */ #include "csapp.h" void doit(int fd); int parse_uri(char *uri, char *filename, char *cgiargs); void serve_static(int fd, char *filename, int filesize); void get_filetype(char *filename, char *filetype); void serve_dynamic(int fd, char *filename, char *cgiargs); void clienterror(int fd, char *cause, char *errnum, char *shortmsg, char *longmsg); void reap_child(int signo) { for (;;) { /* We don't care child's status */ int ret = waitpid(-1, NULL, WNOHANG); if (ret == 0) /* child didn't terminated, return immediately */ break; else if (ret < 0 && errno == ECHILD) /* no child */ break; else /* continue */; } } static int is_wait; /* Waits for child when failed to install handler */ int main(int argc, char **argv) { int listenfd, connfd, port; struct sockaddr_in clientaddr; socklen_t clientlen; /* avoid compiler warning */ /* Check command line args */ if (argc != 2) { fprintf(stderr, "usage: %s \n", argv[0]); exit(1); } port = atoi(argv[1]); /* Install SIGCHILD handler */ struct sigaction action, old_action; action.sa_handler = reap_child; sigemptyset(&action.sa_mask); action.sa_flags = SA_RESTART; int ret = sigaction(SIGCHLD, &action, &old_action); is_wait = ret < 0; if (is_wait) { fprintf(stderr, "sigaction: %s, we will wait child to terminated\n", strerror(errno)); } listenfd = Open_listenfd(port); while (1) { clientlen = sizeof(clientaddr); connfd = Accept(listenfd, (SA *)&clientaddr, &clientlen); doit(connfd); Close(connfd); } } void doit(int fd) { int is_static; struct stat sbuf; char buf[MAXLINE], method[MAXLINE], uri[MAXLINE], version[MAXLINE]; char filename[MAXLINE], cgiargs[MAXLINE]; rio_t rio; /* Read request line and headers, put them into a file */ char *request = NULL; Rio_readinitb(&rio, fd); for (;;) { Rio_readlineb(&rio, buf, MAXLINE); /* * Firefox will sent \x16\x3. * \x16: Synchronous Idle, \x3: End of Text */ if (!strncmp(buf, "\x16\x3", 2)) { free(request); return; } int buflen = strlen(buf); char *old_request = request; int old_reqlen = old_request==NULL ? 0 : strlen(old_request); request = realloc(old_request, buflen + old_reqlen + 1); if (request == NULL) { fprintf(stderr, "realloc: run out of memory\n"); free(old_request); return; } memmove(request+old_reqlen, buf, buflen+1); if (!strcmp(buf, "\r\n")) /* Copy before stop */ break; } sscanf(request, "%s %s %s", method, uri, version); if (strcasecmp(method, "GET")) { clienterror(fd, method, "501", "Not implemented", "Tiny does not implement this method"); return; } FILE *out = fopen("tiny-request.txt", "w+"); if (out == NULL) { perror("fopen"); free(request); return; } fprintf(out, "%s", request); fflush(out); fclose(out); free(request); /* Parse URI from GET request */ is_static = parse_uri(uri, filename, cgiargs); if (stat(filename, &sbuf) < 0) { clienterror(fd, filename, "404", "Not found", "Tiny couldn't read the file"); return; } if (is_static) { /* Serve static content */ if (!(S_ISREG(sbuf.st_mode)) || !(S_IRUSR & sbuf.st_mode)) { clienterror(fd, filename, "403", "Forbidden", "Tiny couldn't read the filetype"); return; } serve_static(fd, filename, sbuf.st_size); } else { /* Serve dynamic content */ if (!(S_ISREG(sbuf.st_mode)) || !(S_IXUSR & sbuf.st_mode)) { clienterror(fd, filename, "403", "Forbidden", "Tiny couldn't run the CGI program"); return; } serve_dynamic(fd, filename, cgiargs); } } void clienterror(int fd, char *cause, char *errnum, char *shortmsg, char *longmsg) { char buf[MAXLINE], body[MAXBUF]; /* Build the HTTP response body */ sprintf(body, "Tiny Error"); sprintf(body, "%s\r\n", body); sprintf(body, "%s%s: %s\r\n", body, errnum, shortmsg); sprintf(body, "%s

%s: %s\r\n", body, longmsg, cause); sprintf(body, "%s


The Tiny Web server\r\n", body); /* Print the HTTP response */ sprintf(buf, "HTTP/1.0 %s %s\r\n", errnum, shortmsg); Rio_writen(fd, buf, strlen(buf)); sprintf(buf, "Content-type: text/html\r\n"); Rio_writen(fd, buf, strlen(buf)); sprintf(buf, "Content-length: %d\r\n\r\n", (int)strlen(body)); Rio_writen(fd, buf, strlen(buf)); Rio_writen(fd, body, strlen(body)); } int parse_uri(char *uri, char *filename, char *cgiargs) { char *ptr; if (!strstr(uri, "cgi-bin")) { /* Static content */ strcpy(cgiargs, ""); strcpy(filename, "."); strcat(filename, uri); if (uri[strlen(uri)-1] == '/') strcat(filename, "home.html"); return 1; } else { /* Dynamic content */ ptr = index(uri, '?'); if (ptr) { strcpy(cgiargs, ptr+1); *ptr = '\0'; } else strcpy(cgiargs, ""); strcpy(filename, "."); strcat(filename, uri); return 0; } } void serve_static(int fd, char *filename, int filesize) { int srcfd; char *srcp, filetype[MAXLINE], buf[MAXBUF]; /* Send response headers to client */ get_filetype(filename, filetype); sprintf(buf, "HTTP/1.0 200 OK\r\n"); sprintf(buf, "%sServer: Tiny Web Server\r\n", buf); sprintf(buf, "%sContent-length: %d\r\n", buf, filesize); sprintf(buf, "%sContent-type: %s\r\n\r\n", buf, filetype); Rio_writen(fd, buf, strlen(buf)); /* Send response body to client */ srcfd = Open(filename, O_RDONLY, 0); srcp = Mmap(0, filesize, PROT_READ, MAP_PRIVATE, srcfd, 0); Close(srcfd); Rio_writen(fd, srcp, filesize); Munmap(srcp, filesize); } /* * get_filetype - derive file type from name */ void get_filetype(char *filename, char *filetype) { if (strstr(filename, ".html")) strcpy(filetype, "text/html"); else if (strstr(filename, ".gif")) strcpy(filetype, "image/gif"); else if (strstr(filename, ".jpg")) strcpy(filetype, "image/jpeg"); else strcpy(filetype, "text/plain"); } void serve_dynamic(int fd, char *filename, char *cgiargs) { char buf[MAXLINE], *emptylist[] = { NULL }; /* Return first part of HTTP response */ sprintf(buf, "HTTP/1.0 200 OK\r\n"); Rio_writen(fd, buf, strlen(buf)); sprintf(buf, "Server: Tiny Web Server\r\n"); Rio_writen(fd, buf, strlen(buf)); if (Fork() == 0) { /* child */ /* Real server would set all CGI vars here */ setenv("QUERY_STRING", cgiargs, 1); Dup2(fd, STDOUT_FILENO); /* Redirect stdout to client */ Execve(filename, emptylist, environ); /* Run CGI program */ } if (is_wait) Wait(NULL); /* Parent waits for and reaps child */ } ================================================ FILE: exercise/ex11-9/cgi-bin/adder.c ================================================ /* * p638 * * unix> cc -I../../../common ../../../common/csapp.c adder.c -lpthread -o adder */ #include "csapp.h" int main(void) { char *buf, *p; char arg1[MAXLINE], arg2[MAXLINE], content[MAXLINE]; int n1 = 0, n2 = 0; sleep(5); /* Just to test */ /* Extract the two arguments */ if ((buf = getenv("QUERY_STRING")) != NULL) { p = strchr(buf, '&'); *p = '\0'; strcpy(arg1, buf); strcpy(arg2, p+1); n1 = atoi(arg1); n2 = atoi(arg2); } /* Make the response body */ sprintf(content, "Welcome to add.com: "); sprintf(content, "%sTHE Internet addition portal.\r\n

", content); sprintf(content, "%sThe answer is: %d + %d = %d\r\n

", content, n1, n2, n1+n2); sprintf(content, "%sThanks for visiting!\r\n", content); /* Generate the HTTP response */ printf("Content-length: %d\r\n", (int)strlen(content)); printf("Content-type: text/html\r\n\r\n"); printf("%s", content); fflush(stdout); exit(0); } ================================================ FILE: exercise/ex11-9/home.html ================================================ Welcome to add.com ================================================ FILE: exercise/ex11-9/tiny.c ================================================ /* * 11.9 * * mofaph@gmail.com * 2013-5-29 * * 1. 打开文件 * 2. 将文件内容读入缓冲区 * 3. 将缓冲区内容传送到客户端 * 4. 判断文件是否还有内容 * 5. 无内容,退出 * 6. 有内容,增大缓冲区 * 7. 能够增大缓冲区,返回第 2 步 * 8. 不能增大缓冲区 * 8.1 请求的大小等于最小值,报错退出 * 8.2 请求的大小大于最小值,保留能够分配的最大值,返回第 2 步 * * unix> cc -Wall -I../../common ../../common/csapp.c tiny.c -lpthread -o tiny */ #include "csapp.h" #include /* INT_MIN */ void doit(int fd); int parse_uri(char *uri, char *filename, char *cgiargs); void serve_static(int fd, char *filename, int filesize); void get_filetype(char *filename, char *filetype); void serve_dynamic(int fd, char *filename, char *cgiargs); void clienterror(int fd, char *cause, char *errnum, char *shortmsg, char *longmsg); void reap_child(int signo) { for (;;) { /* We don't care child's status */ int ret = waitpid(-1, NULL, WNOHANG); if (ret == 0) /* child didn't terminated, return immediately */ break; else if (ret < 0 && errno == ECHILD) /* no child */ break; else /* continue */; } } static int is_wait; /* Waits for child when failed to install handler */ int main(int argc, char **argv) { int listenfd, connfd, port; struct sockaddr_in clientaddr; socklen_t clientlen; /* avoid compiler warning */ /* Check command line args */ if (argc != 2) { fprintf(stderr, "usage: %s \n", argv[0]); exit(1); } port = atoi(argv[1]); /* Install SIGCHILD handler */ struct sigaction action, old_action; action.sa_handler = reap_child; sigemptyset(&action.sa_mask); action.sa_flags = SA_RESTART; int ret = sigaction(SIGCHLD, &action, &old_action); is_wait = ret < 0; if (is_wait) { fprintf(stderr, "sigaction: %s, we will wait child to terminated\n", strerror(errno)); } listenfd = Open_listenfd(port); while (1) { clientlen = sizeof(clientaddr); connfd = Accept(listenfd, (SA *)&clientaddr, &clientlen); doit(connfd); Close(connfd); } } void doit(int fd) { int is_static; struct stat sbuf; char buf[MAXLINE], method[MAXLINE], uri[MAXLINE], version[MAXLINE]; char filename[MAXLINE], cgiargs[MAXLINE]; rio_t rio; /* Read request line and headers, put them into a file */ char *request = NULL; Rio_readinitb(&rio, fd); for (;;) { Rio_readlineb(&rio, buf, MAXLINE); /* * Firefox will sent \x16\x3. * \x16: Synchronous Idle, \x3: End of Text */ if (!strncmp(buf, "\x16\x3", 2)) { free(request); return; } int buflen = strlen(buf); char *old_request = request; int old_reqlen = old_request==NULL ? 0 : strlen(old_request); request = realloc(old_request, buflen + old_reqlen + 1); if (request == NULL) { fprintf(stderr, "realloc: run out of memory\n"); free(old_request); return; } memmove(request+old_reqlen, buf, buflen+1); if (!strcmp(buf, "\r\n")) /* Copy before stop */ break; } sscanf(request, "%s %s %s", method, uri, version); if (strcasecmp(method, "GET")) { clienterror(fd, method, "501", "Not implemented", "Tiny does not implement this method"); return; } FILE *out = fopen("tiny-request.txt", "w+"); if (out == NULL) { perror("fopen"); free(request); return; } fprintf(out, "%s", request); fflush(out); fclose(out); free(request); /* Parse URI from GET request */ is_static = parse_uri(uri, filename, cgiargs); if (stat(filename, &sbuf) < 0) { clienterror(fd, filename, "404", "Not found", "Tiny couldn't read the file"); return; } if (is_static) { /* Serve static content */ if (!(S_ISREG(sbuf.st_mode)) || !(S_IRUSR & sbuf.st_mode)) { clienterror(fd, filename, "403", "Forbidden", "Tiny couldn't read the filetype"); return; } serve_static(fd, filename, sbuf.st_size); } else { /* Serve dynamic content */ if (!(S_ISREG(sbuf.st_mode)) || !(S_IXUSR & sbuf.st_mode)) { clienterror(fd, filename, "403", "Forbidden", "Tiny couldn't run the CGI program"); return; } serve_dynamic(fd, filename, cgiargs); } } void clienterror(int fd, char *cause, char *errnum, char *shortmsg, char *longmsg) { char buf[MAXLINE], body[MAXBUF]; /* Build the HTTP response body */ sprintf(body, "Tiny Error"); sprintf(body, "%s\r\n", body); sprintf(body, "%s%s: %s\r\n", body, errnum, shortmsg); sprintf(body, "%s

%s: %s\r\n", body, longmsg, cause); sprintf(body, "%s


The Tiny Web server\r\n", body); /* Print the HTTP response */ sprintf(buf, "HTTP/1.0 %s %s\r\n", errnum, shortmsg); Rio_writen(fd, buf, strlen(buf)); sprintf(buf, "Content-type: text/html\r\n"); Rio_writen(fd, buf, strlen(buf)); sprintf(buf, "Content-length: %d\r\n\r\n", (int)strlen(body)); Rio_writen(fd, buf, strlen(buf)); Rio_writen(fd, body, strlen(body)); } int parse_uri(char *uri, char *filename, char *cgiargs) { char *ptr; if (!strstr(uri, "cgi-bin")) { /* Static content */ strcpy(cgiargs, ""); strcpy(filename, "."); strcat(filename, uri); if (uri[strlen(uri)-1] == '/') strcat(filename, "home.html"); return 1; } else { /* Dynamic content */ ptr = index(uri, '?'); if (ptr) { strcpy(cgiargs, ptr+1); *ptr = '\0'; } else strcpy(cgiargs, ""); strcpy(filename, "."); strcat(filename, uri); return 0; } } void serve_static(int fd, char *filename, int filesize) { char filetype[MAXLINE], buf[MAXBUF]; /* Send response headers to client */ get_filetype(filename, filetype); snprintf(buf, sizeof(buf), "HTTP/1.0 200 OK\r\n"); snprintf(buf+strlen(buf), sizeof(buf)-strlen(buf), "Server: Tiny Web Server\r\n"); snprintf(buf+strlen(buf), sizeof(buf)-strlen(buf), "Content-length: %d\r\n", filesize); snprintf(buf+strlen(buf), sizeof(buf)-strlen(buf), "Content-type: %s\r\n\r\n", filetype); Rio_writen(fd, buf, strlen(buf)); /* * Send response body to client * * If the file is big enough, we read many times, and each time we * double increase buffer. */ int srcfd = Open(filename, O_RDONLY, 0); char *body = NULL; const int min_size = (1<<10); /* 1KB */ int max_size = (unsigned)INT_MIN >> 1; /* 16bit: 16KB, 32bit: 1GB */ size_t size = min_size; for (;;) { if (size != max_size) { char *old_body = body; body = realloc(old_body, size); if (body == NULL) { if (size == min_size) { free(old_body); perror("realloc"); break; } else { /* size > min_size */ max_size = size >> 1; size = max_size; body = old_body; } } } int n = Rio_readn(srcfd, body, size); if (n > 0) /* read something */ Rio_writen(fd, body, n); if (n != size) /* EOF or read all the content */ break; if (size != max_size) size <<= 1; /* increase buffer, read more next time */ } /* Clean */ free(body); Close(srcfd); } /* * get_filetype - derive file type from name */ void get_filetype(char *filename, char *filetype) { if (strstr(filename, ".html")) strcpy(filetype, "text/html"); else if (strstr(filename, ".gif")) strcpy(filetype, "image/gif"); else if (strstr(filename, ".jpg")) strcpy(filetype, "image/jpeg"); else strcpy(filetype, "text/plain"); } void serve_dynamic(int fd, char *filename, char *cgiargs) { char buf[MAXLINE], *emptylist[] = { NULL }; /* Return first part of HTTP response */ sprintf(buf, "HTTP/1.0 200 OK\r\n"); Rio_writen(fd, buf, strlen(buf)); sprintf(buf, "Server: Tiny Web Server\r\n"); Rio_writen(fd, buf, strlen(buf)); if (Fork() == 0) { /* child */ /* Real server would set all CGI vars here */ setenv("QUERY_STRING", cgiargs, 1); Dup2(fd, STDOUT_FILENO); /* Redirect stdout to client */ Execve(filename, emptylist, environ); /* Run CGI program */ } if (is_wait) Wait(NULL); /* Parent waits for and reaps child */ } ================================================ FILE: exercise/ex12-22.c ================================================ /* * 12.22 * * mofaph@gmail.com * 2013-8-17 * * 使用 I/O 多路复用的 echo 服务器。服务器使用 select 等待监听描述符上的连接请求 * 和标准输入上的命令。 * * unix> cc -Wall -I../common ../common/csapp.c ex12-22.c -lpthread -o ex12-22 * unix> ./ex12-22 */ #include #include #include #include #include #include #include #include #define MAXLINE 4096 #define LISTENQ 1024 /* second argument to listen() */ void echo(int connfd) { char buf[MAXLINE]; int n = read(connfd, buf, sizeof(buf)); if (n < 0) { perror("read"); exit(EXIT_FAILURE); } if (n > 0) { printf("server received %d bytes\n", (int)n); int out = write(connfd, buf, n); if (out != n) { fprintf(stderr, "write error\n"); exit(EXIT_FAILURE); } } } void command(void) { char buf[MAXLINE]; char *ret = fgets(buf, MAXLINE, stdin); if (ret == NULL) { if (feof(stdin)) { /* EOF */ exit(EXIT_SUCCESS); } if (ferror(stdin)) { perror("fgets"); exit(EXIT_FAILURE); } } printf("%s", buf); /* Process the input command */ } /* token from common/csapp.c */ int open_listenfd(int port) { int listenfd, optval=1; struct sockaddr_in serveraddr; /* Create a socket descriptor */ if ((listenfd = socket(AF_INET, SOCK_STREAM, 0)) < 0) return -1; /* Eliminates "Address already in use" error from bind. */ if (setsockopt(listenfd, SOL_SOCKET, SO_REUSEADDR, (const void *)&optval , sizeof(int)) < 0) return -1; /* Listenfd will be an endpoint for all requests to port on any IP address for this host */ memset((char *) &serveraddr, 0, sizeof(serveraddr)); serveraddr.sin_family = AF_INET; serveraddr.sin_addr.s_addr = htonl(INADDR_ANY); serveraddr.sin_port = htons((unsigned short)port); if (bind(listenfd, (struct sockaddr *)&serveraddr, sizeof(serveraddr)) < 0) return -1; /* Make it a listening socket ready to accept connection requests */ if (listen(listenfd, LISTENQ) < 0) return -1; return listenfd; } /* token from common/csapp.c */ int Open_listenfd(int port) { int rc = open_listenfd(port); if (rc < 0) { perror("Open_listenfd error"); exit(EXIT_FAILURE); } return rc; } /* token from common/csapp.c */ int Select(int n, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout) { int rc = select(n, readfds, writefds, exceptfds, timeout); if (rc < 0) { perror("Select error"); exit(EXIT_FAILURE); } return rc; } /* token from common/csapp.c */ int Accept(int s, struct sockaddr *addr, socklen_t *addrlen) { int rc = accept(s, addr, addrlen); if (rc < 0) { perror("Accept error"); exit(EXIT_FAILURE); } return rc; } /* token from common/csapp.c */ void Close(int fd) { int rc = close(fd); if (rc < 0) { perror("Close error"); exit(EXIT_FAILURE); } } int main(int argc, char **argv) { int listenfd, connfd, port; socklen_t clientlen = sizeof(struct sockaddr_in); struct sockaddr_in clientaddr; fd_set read_set, ready_set; if (argc != 2) { fprintf(stderr, "usage: %s \n", argv[0]); exit(0); } port = atoi(argv[1]); listenfd = Open_listenfd(port); FD_ZERO(&read_set); /* Clear read set */ FD_SET(STDIN_FILENO, &read_set); /* Add stdin to read set */ FD_SET(listenfd, &read_set); while (1) { ready_set = read_set; Select(listenfd+1, &ready_set, NULL, NULL, NULL); if (FD_ISSET(STDIN_FILENO, &ready_set)) command(); /* Read command line from stdin */ if (FD_ISSET(listenfd, &ready_set)) { connfd = Accept(listenfd, (struct sockaddr *)&clientaddr, &clientlen); echo(connfd); /* Echo client one line or nothing(EOF) */ Close(connfd); } } return 0; /* Avoid compiler warning */ } ================================================ FILE: exercise/ex2-36.c ================================================ /* * 练习题 2.36 * * 对于数据类型 int 为 32 位的情况,设计一个版本的 tmult_ok 函数(见练习题 2.35), * 要使用 64 位精度的数据类型 long long,而不使用除法。 */ /* * Determine whether arguments can be multipled without overflow * * overflow: 0, normal: 1 */ int tmult_ok(int x, int y) { long long result = (long long)x * (long long)y; int iresult = x * y; return result == iresult; } int book_tmult_ok(int x, int y) { /* Compute product without overflow */ long long pll = (long long)x * y; /* See if casting to int preserves value */ return pll == (int)pll; } ================================================ FILE: exercise/ex2-42.c ================================================ /* * 练习题 2.42 * * 写一个函数 div16,对于整数参数 x 返回 x/16 的值。你的函数不能使用除法、模运算、 * 乘法、任何条件语句(if 或者 ?:)、任何比较运算符(例如 <、>或==)或任何循环。 * 你可以假设数据类型 int 是 32 位长,使用补码表示,而右移是算术右移。 */ /* * 这里用到了左移运算符,不知符不符合要求 */ int div16(int x) { int sign = (x & (1<<31)) >> 31; /* sign 的位级表示是全 0 或者全 1 */ return (x + (sign & 0x0F)) >> 4; } /* * 书本的实现更加简洁优雅 * * 这里唯一的挑战是不用任何测试或条件运算计算偏置值。我们利用了一个诀窍,表达式 * x>>31 产生一个字,如果 x 是负数,这个字为全 1,否则为全 0.通过掩码屏蔽适当的位, * 我们就得到期望的偏置值。 */ int book_div16(int x) { /* Compute bias to be either 0 (x >= 0) or 15 (x < 0) */ int bias = (x >> 31) & 0xF; return (x + bias) >> 4; } ================================================ FILE: exercise/ex2-58.c ================================================ /* * 练习题 2.58 * * 编写过程 is_little_endian,当在小端法机器上编译和运行时返回 1,在大端法机器上 * 编译运行时返回 0。这个程序应该可以运行在任何机器上,无论机器的字长是多少。 */ int is_little_endian(void) { int i = 1; char *p = (char *)&i; /* * 小端法的机器上,最低有效字节是 1 * 大端法的机器上,最低有效字节是 0 */ return *p; } ================================================ FILE: exercise/ex2-59.c ================================================ /* * 练习题 2.59 * * 编写一个 C 表达式,使它生成一个字,由 x 的最低有效字节和 y 中剩下的字节组成。 * 对于运算数 x=0x89ABCDEF 和 y=0x76543210,就得到 0x765432EF。 */ int combine_word(unsigned x, unsigned y) { return (x & 0xFF) | (y & ~0xFF); } ================================================ FILE: exercise/ex2-60.c ================================================ /* * 练习题 2.60 * * 假设我们将一个 w 位的字中的字节从 0(最低位)到 w/8-1(最高位)编号。写出下面 * C 函数的代码,它会返回一个无符号值,其中参数 x 的字节 i 被替换成字节 b: * * unsigned replace_byte(unsigned x, unsigned char b, int i); * * 以下的一些示例,说明了这个函数该如何工作: * * replace_byte(0x12345678, 0xAB, 2) --> 0x12AB5678 * replace_byte(0x12345678, 0xAB, 0) --> 0x123456AB */ unsigned replace_byte(unsigned x, unsigned char b, int i) { /* 1. 首先将 0x12345678 变为 0x12005678 */ int w = sizeof(int) - 1; /* 取值范围: 0~3 */ int shift_left = (i & w)<<3; unsigned m = x & ~(0xFF << shift_left); /* 2. 将 0xAB 左移 2 两个字节得到数值 0x00AB0000 */ unsigned int n = (unsigned)b << shift_left; /* 3. 将 0x12005678+0x00AB0000 作为结果返回 */ return m + n; } ================================================ FILE: exercise/ex2-61.c ================================================ /* * 练习题 2.61 * * 写一个 C 表达式,在下列描述的条件下产生 1,而在其他情况下得到 0。假设 x 是 * int 类型。 * * A. x 的任何位都等于 1 * B. x 的任何位都等于 0 * C. x 的最高有效字节中的位都等于 1 * D. x 的最低有效字节中的位都等于 0 * * 代码应该遵循位级整数编码规则,另外还有一个限制,你不能使用相等(==)和不相 * 等(==)测试。 */ #include /* * A. !(x+1) * B. !x * C. !((x & (0xFF << ((sizeof(int)-1) << 3))) + (1 << ((sizeof(int)-1) << 3))) * D. !(x & 0xFF) */ void ex2_61(void) { int x; int w = sizeof(int) << 3; /* A. x=111...111 */ printf("x=111...111 return 1\n"); x = ~0; printf("!(0x%X+1): %s\n", x, !(x+1) ? "right" : "wrong"); x = 0xFFFFFFFE; printf("!(0x%X+1): %s\n", x, !(x+1) ? "right" : "wrong"); /* B. x=000...000 */ printf("x=000...000 return 1\n"); x = 0; printf("!0x%X: %s\n", x, !x ? "right" : "wrong"); x = 1; printf("!0x%X: %s\n", x, !x ? "right" : "wrong"); /* C. 1111 1111 0101 ... 0101 */ printf("x=_1111 1111_else return 1\n"); x = 0xFF123456; printf("(0x%X & (0xFF << ((sizeof(int)-1) << 3))) + (1 << ((sizeof(int)-1) << 3)): %s\n", x, !((x & (0xFF << (w-8))) + (1 << (w-8))) ? "right" : "wrong"); x = 0xEFFFFFFF; printf("(0x%X & (0xFF << ((sizeof(int)-1) << 3))) + (1 << ((sizeof(int)-1) << 3)): %s\n", x, !((x & (0xFF << (w-8))) + (1 << (w-8))) ? "right" : "wrong"); /* D. 1010 0101 ... 0000 0000 */ printf("x=else..._0000 0000_ return 1\n"); x = 0x12345600; printf("!(0x%X & 0xFF): %s\n", x, !(x & 0xFF) ? "right" : "wrong"); x = 0x12345601; printf("!(0x%X & 0xFF): %s\n", x, !(x & 0xFF) ? "right" : "wrong"); } ================================================ FILE: exercise/ex2-62.c ================================================ /* * 练习题 2.62 * * 编写一个函数 int_shifts_are_logical(),在对 int 类型的数使用算术右移的机器上运 * 行时,这个函数生成 1,而其他情况下生成 0。你的代码应该可以运行在任何字长的机器 * 上。在几种机器上测试你的代码。 */ #include int int_shifts_are_logical(void) { int x = (~0 - 1) >> 1; return x != INT_MAX; } ================================================ FILE: exercise/ex2-63.c ================================================ /* * 练习题 2.63 * * 将下面的 C 函数代码补充完整。函数 srl 用算术右移(由值 xsra 给出)来完成逻辑右 * 移,后面的其他操作不包括右移或者除法。函数 sra 用逻辑右移(由值 xsrl 给出)来 * 完成算术右移,后面的其他操作不包括右移或者除法。可以通过计算8*sizeof(int) 来确 * 定数据类型 int 中的位数 w。位移量 k 的取值范围位 0~w-1。 */ static int w = sizeof(int) << 3; int sra(int x, int k) { /* Perform shift logically */ int xsrl = (unsigned) x >> k; int sign = ((1 << (w - 1 - k)) & xsrl) << 1; int t = ~((1 << (w - k)) - 1); /* t=[11...1100...00], w-1...w-k 位全 1 */ /* * sign = 0, n = 0 * sign = [0...010...0], n = t */ int n = t + (sign ^ (1 << (w - k))); return n + xsrl; } unsigned srl(unsigned x, int k) { /* Perform shift arithmetically */ unsigned xsra = (int) x >> k; return xsra & ((1 << (w-k)) - 1); } ================================================ FILE: exercise/ex2-64.c ================================================ /* * 写出代码实现如下函数: * * Return 1 when any even bit of x equals 1; 0 otherwise. Assume w=32 * * int any_even_one(unsigned x); * * 函数应该遵循位级整数编码规则,不过你可以假设数据类型 int 有 w=32 位。 */ int any_even_one(unsigned x) { return (x & 0x5555) & 1; } ================================================ FILE: exercise/ex2-65.c ================================================ /* * 练习题 2.65 * * Return 1 when x contains an even number of 1s; 0 otherwise. * Assume w=32 * * 解决这个问题的关键是,我们利用了异或运算的一个属性:相同的位异或结果为 0。异或 * 可以看作没有进位的加法。 * * 我们现在可以假定 int 的宽度为 32 位。如果我们把高 16 位和底 16 位取异或,那么 * 相同的的位会变为 0。也就是说,两个有 1 的相同的位变为 0 了。所有余下来的位都是 * 取异或时两个位级表示不相同的。依次类推,最终我们将会在第 0 位得到一个数。如果 * 这个数是 0 的话,那么就说明这个整数具有偶数个 1(0 也是偶数);如果是 1 的话, * 说明这个整数具有奇数个 1。 * * 我们只需要将最终的结果和 1 进行与运算,就可以判断这个整数是有奇数个 1 还是偶数 * 个 1 了。 * * 在解决这个问题之前,我曾经想过把 x 变为 00...0011...11 这样的形式。还有将 x 左 * 移和右移,然后进行与运算。 * * 最终想到的这个解决方法,是我在想对两个数进行或运算和与运算之后。或运算将所有有 * 1 的位保存下来,而与运算将所有有相同的 1 的位保存下来。如果将或运算和与运算的 * 结果保存下来,我们就得到了唯一具有 1 的所有位。消去的 1 都是成对消去的,而保留 * 下来的,都是不成对的。而我们如果能够这样进行运算知道只剩下一位的话,那么就可以 * 判断奇偶数了。然后,我们会发现这样的运算性质正好是异或运算所具有的。 */ int even_ones(unsigned x) { x ^= x >> 16; x ^= x >> 8; x ^= x >> 4; x ^= x >> 2; x ^= x >> 1; /* 注意:在第 0 位之外的位,值不一定为 0 */ return !(x & 1); } ================================================ FILE: exercise/ex2-66.c ================================================ /* * 练习题 2.66 * * Generate mask indicating leftmost 1 in x. Assume w=32. * For example, 0xFF00 -> 0x8000, and 0x6000 -> 0x4000. * If x = 0, then return 0. * * int leftmost_one(unsigned x); * * 函数应该遵循位级整数编码规则,不过你可以假设数据类型 int 有 w=32 位。 * 你的代码最多只能包含 15 个算术运算、位运算和逻辑运算。 * * 提示:先将 x 转换成形如 [0...011...1] 的位向量。 */ int leftmost_one(unsigned x) { x |= x >> 1; x |= x >> 2; x |= x >> 4; x |= x >> 8; x |= x >> 16; /* 以上的代码将会把 x 转换成形如 [0...01...1] 的形式 */ return x - (x >> 1); } /* * 在解决这道题的过程中,我想到了找到最右边的 1 的方法。 * * 如果能够找到翻转一个位向量的方法,则可以找到最左边的 1 的方法。并且,这个方法 * 可以不限制字长的长度。 */ int rightmost_one(unsigned x) { return ~((~x + 1) | x) + 1; } ================================================ FILE: exercise/ex2-67.c ================================================ /* * 练习题 2.67 * * 给你一个任务,编写一个过程 int_size_is_32(),当在一个 int 是 32 位的机器上 * 运行时,该程序产生 1,而其他情况则产生 0。不允许使用 sizeof 运算符。下面是 * 开始时的尝试: * * // The following code does not run properly on some machines * int bad_int_size_is_32() { * // Set most significant bit (msb) of 32-bit machine * int set_msb = 1 << 31; * // Shift past msb of 32-bit word * int beyond_msb = 1 << 32; * * // set_msb is nonzero when word size >= 32 * // beyond_msb is zero when word size <= 32 * return set_msb && !beyond_msb; * } * * 当在 SUN SPARC 这样的 32 位机器上编译并运行时,这个过程返回的却是 0。下面的 * 编译器信息给了我们一个问题的指示: * * warning: left shift count >= width of type * * A. 我们的代码在哪个方面没有遵守 C 语言标准? * B. 修改代码,使得它在 int 至少为 32 位的任何机器上都能正确运行。 * C. 修改代码,使得它在 int 至少为 16 位的任何机器上都能正确运行。 */ #include int int_size_is_32(void) { int t = 0x80000000; return t == INT_MIN; } /* * A. 在 C99 标准的 6.5.7 节中,有这样的描述: * * If the value of the right operand is negative or is greater than or equal to * the width of the promoted left operand, the behavior is undefined. * * 因为在 int 是 32 位的机器上,1<<32 等于 int 的位宽度,所以行为是“未定义”的。 */ /* * B. 下面的代码在 int 至少为 32 位的任何机器上都能正确运行 */ int int_size_is_at_least_32(void) { int t = 0x80000000; return t; } /* * C. 下面的代码在 int 至少位 16 位的任何机器上都能正确运行 */ int int_size_is_at_least_16(void) { int t = 0x8000; return t; } ================================================ FILE: exercise/ex2-68.c ================================================ /* * 练习题 2.68 * * 写出具有如下原型的函数的代码 * * Make with least signficant n bits set to 1 * Example: n = 6 --> 0x3f, n = 17 --> 0x1FFFF * Assume 1 <= n <= w * * int lower_bits(int x, int n); * * 函数应该遵循位级整数编码规则。要注意 n = w 的情况 */ int lower_bits(int x, int n) { int r = 0; int s = n - 1; /* 1. 构造一个最低 n 位为 1 的数 */ r = (1< 0x81234567, n=20 -> 0x45678123 * * 函数应该遵循位级整数编码规则。要注意 n = 0 的情况。 */ unsigned rotate_right(unsigned x, int n) { int w = sizeof(int) << 3; /* 这里左移一位,解决 n = 0 的情况 */ return ((x << (w-n-1)) << 1) + (x >> n); } ================================================ FILE: exercise/ex2-7.c ================================================ /* * 编译方法: * $ cc ex2-7.c ../show-bytes.c */ #include #include typedef unsigned char *byte_pointer; extern void show_bytes(byte_pointer start, int len); int main(void) { const char *s = "abcdef"; show_bytes((byte_pointer)s, strlen(s)); return 0; } ================================================ FILE: exercise/ex2-70.c ================================================ /* * 练习题 2.70 * * 写出具有如下原型的函数的代码: * * Return 1 when x can be represented as an n-bit, 2's complement * number; 0 otherwise * Assume 1 <= n <= w * * int fits_bits(int x, int n); * * 函数应该遵循位级整数编码规则。 */ /* * 如果我们需要观察一个数是否可以使用 n 位补码和表示。我们可以将这个数的低 n-1 位 * 全部置零,然后观察 w...n 这些位。 * * 对于正数来说,如果这些 w...n 这些位非零的话,则说明 n 位补码不足以表示这个数。 * 对于负数来说,如果这些 w...n 这些位不是全 1 的话,则说明 n 位补码不足以表示这 * 个数。 */ int fits_bits(int x, int n) { /* 1. 构造位 w...n 全 1,位 n-1...0 全 0 的位向量 */ int s = ~((1 << (n - 1)) - 1); /* 2. 将 x 的低 n-1 位置零,其他位保留 */ x &= s; /* 3. 此时,w...n 位是全 0 或者全 1,才可以用 n 位补码表示这个数 */ return !x || x == s; } ================================================ FILE: exercise/ex2-71.c ================================================ /* * 练习题 2.71 * * 你刚刚开始在一家公司工作,他们要实现一组过程来操作一个数据结构,要将 4 个有 * 符号字节封装成一个 32 位 unsigned。一个字中的字节从 0(最低有效字节)编号到 * 3(最高有效字节)。分配给你的任务是:为一个使用补码运算和算数右移的机器编写 * 一个具有如下原型的函数: * * Declaration of data type where 4 bytes are packed into an unsigned * * typedef unsigned packed_t; * * Extract byte from word. Return as signed integer * * int xbyte(packed_t word, int bytenum); * * 也就是说,函数会抽取出指定的字节,再把它符号扩展为一个 32 位 int。你的前任 * (因为水平不够高而被解雇了)编写了下面的代码: * * Failed attempt at xbyte * int xbyte(packed_t word, int bytenum) * { * return (word >> (bytenum << 3)) & 0xFF; * } * * A. 这段代码错在哪里? * B. 给出函数的正确实现,只能使用左右移位和一个减法。 */ /* * A. 这段代码不能处理截断的字节是负数的情况。 */ /* * B. 要实现这个函数,我们利用了在算数右移的机器上,无符号是零扩展的,而有符号数 * 是符号扩展的这个特性。 */ typedef unsigned packed_t; int xbyte(packed_t word, int bytenum) { int shift_left = (3 - bytenum) << 3; return (word << shift_left) >> 24; } ================================================ FILE: exercise/ex2-72.c ================================================ /* * * 给你一个任务,写一个函数,将整数 val 复制到缓冲区 buf 中,但是只有缓冲区 * buf 中,但是只有当缓冲区中有足够可用的空间时,才执行复制。 * * 你写的代码如下: * * // Copy integer into buffer if space is available * // WARNING: The following code is buggy * void copy_int(int val, void *buf, int maxbytes) { * if (maxbytes-sizeof(val) >= 0) * memcpy(buf, (void *)&val, sizeof(val)); * } * * 这段代码使用了库函数 memcpy。虽然在这里用这个函数有点刻意,因为我们只是想复 * 制一个 int,但是说明了一种复制较大数据结构的常见方法。 * * 你仔细地测试了这段代码后发现,哪怕 maxbytes 很小的时候,它也能把值复制到缓 * 冲区中。 * * A. 解释为什么代码中的条件测试总是成功。提示:sizeof 运算符返回类型为 size_t 的值。 * B. 你该如何重写这个条件测试,使之工作正确。 * */ /* * A. 无论 maxbytes 等于多少,maxbytes-sizeof(int) >= 0 永远为真。因为有符号数和无符号 * 数进行运算,最终会转换为无符号数。而无符号数的结果永远 >= 0。 */ /* * B. 修改的代码如下 */ void copy_int(int val, void *buf, int maxbytes) { if (maxbytes > 0 && maxbytes >= sizeof(int)) memcpy(buf, (void *)&val, sizeof(val)); } ================================================ FILE: exercise/ex2-73.c ================================================ /* * 练习题 2.73 * * Addition that saturates to TMin or TMax * * 通正常的补码溢出的方式不同,当正溢出时,saturating_add 返回 TMax,负溢出时,返 * 回 TMin。这种运算常常用在执行数字信号处理的程序中。 * * 你的函数应该遵循位级整数编码规则。 * * 通过观察研究(其实想了很久),我们发现,对于不同的情况,补码加法的结果如下: * * (r | 0) + 0 正常 * (r | INT_MAX) + INT_MIN 正溢出 * (r | INT_MAX) + 1 负溢出 * * 0 = INT_MIN + INT_MAX + 1 + 0 * INT_MIN = INT_MIN + 0 + 0 + 0 * 1 = INT_MIN + INT_MAX + 1 + 1 * * 观察上面的等式。在等号右边的第二栏,当正溢出时为 0,不是正溢出时是 INT_MAX。观 * 察第三栏,当正溢出时是 0,不是正溢出时是 1。观察第三栏,当负溢出时是 1,不是负 * 溢出时是 0。 * * 注意:在下面的代码中,当变量名为 xxx_when_condition 有两种取值。当条件不满足时 * 是 0,当条件满足时是 1。 */ #include /* INT_MAX, INT_MIN */ int saturating_add(int x, int y) { int r = x + y; int pos_overflow = !(x & INT_MIN) && !(y & INT_MIN) && (r & INT_MIN); int neg_overflow = (x & INT_MIN) && (y & INT_MIN) && !(r & INT_MIN); int TMax_when_overflow = INT_MAX & (INT_MIN - (pos_overflow | neg_overflow)); int TMax_when_not_pos_overflow = (INT_MAX & (INT_MIN - !pos_overflow)); int bias = INT_MIN + TMax_when_not_pos_overflow + !pos_overflow + neg_overflow; return (r | TMax_when_overflow) + bias; } ================================================ FILE: exercise/ex2-74.c ================================================ /* * 练习题 2.74 * * 写出具有如下原型的函数的代码: * * // Determine whether subtracting arguments will cause overflow * int tsub_ovf(int x, int y); * * 如果计算 x-y 导致溢出,这个函数就返回 1。 * */ #include /* INT_MIN */ /* Determine whether subtracting arguments will cause overflow */ int tsub_ovf(int x, int y) { int r = x - y; int pos_overflow = !(x & INT_MIN) && (y & INT_MIN) && (r & INT_MIN); int neg_overflow = (x & INT_MIN) && !(y & INT_MIN) && !(r & INT_MIN); return pos_overflow || neg_overflow; } ================================================ FILE: exercise/ex2-75.c ================================================ /* * 家庭作业 2.75 * * 假设我们想要计算 x·y 的完整的 2w 位表示,其中,x 和 y 都是无符号数,并且运 * 行在数据类型 unsigned 是 w 位的机器上。乘积的低 w 位能够用表达式 x·y 计算, * 所以,我们只需要一个具有下列原型的函数: * * unsigned unsigned_high_prod(unsigned x, unsigned y); * * 这个函数计算无符号变量 x·y 的高 w 位。 * 我们使用一个具有下面原型的库函数: * * int signed_high_prod(int x, int y); * * 它计算在 x 和 y 采用补码形式的情况下,x·y 的高 w 位。编写代码调用这个过程, * 以实现用无符号数为参数的函数。验证你的解答的正确性。 * * 提示:看看等式(2-18)的推导中,有符号乘积 x·y 和无符号乘积 x'·y' 之间的关 * 系。 */ #include /* INT_MIN */ unsigned unsigned_high_prod(unsigned x, unsigned y) { int x_highest_bit = (x & INT_MIN) == INT_MIN; int y_highest_bit = (y & INT_MIN) == INT_MIN; int result = signed_high_prod(x, y) + x * y_highest_bit + y * x_highest_bit; return result; } ================================================ FILE: exercise/ex2-76.c ================================================ /* * 练习题 2.76 * * 假设我们有一个任务:生成一段代码,将整数变量 x 乘以不同的常数因子 K。为了提高 * 效率,我们想只使用 +、- 和 << 运算。对于下列的 K 的值,写出执行乘法运算的C 表 * 达式,每个表达式中最多使用 3 个运算。 * * A. K=5 * B. K=9 * C. K=30 * D. K=-56 */ #include int main(void) { int x = 6; /* 一个完美的数字 */ int t; /* A. K=5: (1<<2) + 1 */ t = (x << 2) + x; printf("%d * %d = %d\n", x, 5, t); /* B. K=9: (1<<3) + 1 */ t = (x << 3) + x; printf("%d * %d = %d\n", x, 9, t); /* C. K=30: (1<<5) - (1<<1) */ t = (x << 5) - (x << 1); printf("%d * %d = %d\n", x, 30, t); /* D. K=-56: (1<<3) - (1<<6) */ t = (x << 3) - (x << 6); printf("%d * %d = %d\n", x, -56, t); return 0; } ================================================ FILE: exercise/ex2-77.c ================================================ /* * 家庭作业 2.77 * * 写出具有如下原型的函数的代码: * * // Divide by power of two. Assume 0 <= k < w-1 * int divide_power2(int x, int k); * * 该函数要用正确的舍入计算 x/2^k,并且应该遵循位级整数编码规则。 */ #include /* INT_MIN */ /* * 对于整数除法,结果应该是向零舍入。而对于右移运算,结果是向下舍入。 * * 对于 x >= 0 的情况,简单的右移运算的结果也是整数除法的结果。而对于 x < 0 的情 * 况,因为结果是向下舍入的,而我们需要的是向上舍入的结果。这样子,当 x < 0 时, * 我们为 x 加上一个偏置值 (1<> k; } ================================================ FILE: exercise/ex2-78.c ================================================ /* * 家庭作业 2.78 * * 写出函数 mul5div8 的代码,对于整数参数 x,计算 5*x/8,但是要遵循位级整数编 * 码规则。你的代码计算 5*x 也会产生溢出。 */ #include int mul5div8(int x) { int sign = (x & INT_MIN) == INT_MIN; int bias = (7 + !sign) & 7; int t = (x << 2) + x; return (t + bias) >> 3; } ================================================ FILE: exercise/ex2-79.c ================================================ /* * 家庭作业 2.79 * * 写出函数 fiveeighths 的代码,对于整数参数 x,计算 5/8x 的值,向零舍入。它不会 * 溢出。函数应该遵循整数位级编码规则。 */ #include /* * 对于计算 5/8x 的值,可以理解为两种情况: * * 1. 5/(8*x) * 2. (5/8)*x * * 对于第一种情况,x != 0 时,所得的结果是 0。x == 0 时,结果是没有定义的。对于这 * 种求值,没有意义。因此,下面我们将题目理解为求值 (5/8)*x。 * * 基本的思路是,5*x/8 = x/2 + x/8 + f * * f 的取值是 x/2 和 x/8 的小数部分之和 >=1 时,f=1。对于负数的情况,小数点之和 * <= -1,则 f = -1。否则,f=0。 * * 对于正数的情况,求两个整数的小数点之和就是截断两个数的最低三位,然后右移 3 位。 * * 考虑负数的情况,我们可以发现,最低三位的数值+(-8)就是余数的数值(此时余数小于 * 0)。对两个整数都这样处理,我们就可以得到两个负数的余数之和。然后加上偏执值右 * 移 3 位,得到的就是小数点之和。只有当符号位和最低三位数值都是 1 时,才会有 * f=-1。 */ int fiveeighths(int x) { int sign = (x & INT_MIN) == INT_MIN; int lowest_one = x & 1; int lowest_three = !!(x & 7); /* * 只有 sign=1 && (lowest_one||lowest_three) 求值为 1 时,mask=mask2=-8 * 否则,mask=mask2=0 */ int mask = ~(((sign & lowest_one) << 3) - 1) & -8; int mask2 = ~(((sign & lowest_three) << 3) - 1) & -8; int fraction = ((x & 1) << 2) + mask; int fraction2 = (x & 7) + mask2; int bias = (1 + !sign) & 1; int bias2 = (7 + !sign) & 7; int remainder = (fraction + fraction2 + bias2) >> 3; return ((x + bias) >> 1) + ((x + bias2) >> 3) + remainder; } ================================================ FILE: exercise/ex2-80.c ================================================ /* * 家庭作业 2.80 * * 编写 C 表达式产生如下位模式,其中 a^n 表示符号 a 重复 n 次。假设一个 w 位的 * 数据类型。你的代码可以包含对参数 m 和 n 的引用,它们分别表示 m 和 n 的值,但 * 是不能使用表示 w 的参数。 * * A. 1^(w-n)0^n * B. 0^(w-n-m)1^n0^m */ /* * A. ~((1 << n) - 1) */ int nlowest_zero(int n) { return ~((1 << n) - 1); } /* * B. ((1 << n) - 1) - ((1 << m) - 1) = (1 << n) - (1 << m) */ int nlowestone_minus_mlowestone(int n, int m) { return (1 << (m + n)) - (1 << m); } ================================================ FILE: exercise/ex2-81.c ================================================ /* * 练习题 2.81 * * 我们在一个 int 类型值为 32 位的机器上运行程序。这些值以补码形式表示,而且它们 * 都是算术右移的。unsigned 类型的值也是 32 位的。 * * 我们产生随机数 x 和 y,并且把它们转换成无符号数,显示如下: * * // Create some arbitrary values * int x = random(); * int y = random(); * // Convert to unsigned * unsigned ux = (unsigned) x; * unsigned uy = (unsigned) y; * * 对于下列每个 C 表达式,你要指出表达式是否总是为 1。如果它总是为 1,那么请描 * 述其中的数学原理。否则,列举一个使它为 0 的参数示例。 * * A. (x > y) == (-x < -y) * B. ((x + y) << 5) + x - y == 31 * y + 33 * x * C. ~x + ~y == ~(x + y) * D. (int)(ux - uy) == -(y - x) * E. ((x >> 1) << 1) <= x * */ #include /* INT_MIN */ #include /* random() */ #include /* printf() */ int main(void) { /* Create some arbitrary values */ int x = random(); int y = random(); /* Convert to unsigned */ unsigned ux = (unsigned)x; unsigned uy = (unsigned)y; /* * A. (x > y) == (-x < -y) * * 当 y=INT_MIN 时,这个表达式为 0 */ printf("(x > INT_MIN) == (-x < -INT_MIN): %d\n", (x > INT_MIN) == (-x < -INT_MIN)); /* * B. ((x + y) << 5) + x - y == 31 * y + 33 * x * * 这个表达式求值为 1 * * 补码加减法符合基本的数学原理 */ printf("((x + y) << 5) + x - y == 31 * y + 33 * x: %d\n", ((x + y) << 5) + x - y == 31 * y + 33 * x); /* * C. ~x + ~y == ~(x + y) * * 这个表达式求值为 0 * * ~x = 2^w - x - 1 * ~y = 2^w - y - 1 * ~x + ~y = 2^(w+1) - x - y -2 * * ~(x+y) = 2^w - x - y - 1 */ printf("~x + ~y == ~(x + y): %d\n", ~x + ~y == ~(x + y)); /* * D. (int)(ux - uy) == -(y - x) * * 这个表达式求值为 1 * * 对于任意的数字 a,有如下的等式成立: * * -a = ~a + 1 = 2^w - a * * -(y - x) = 2^w - (y - x) = 2^w + x - y * 对于 w 位补码,算术运算的结果截断为 w 位。 * * ux - uy == 2^w + x - y */ printf("(int)(ux - uy) == -(y - x): %d\n", (int)(ux - uy) == -(y - x)); /* * E. ((x >> 1) << 1) <= x * * 这个表达式值为 1 * * 因为右移会将最低位置零,如果最低位是 1 的话,那么左移回来就会小于移位 * 之前的数;如果最低位是 0 的话,那么左移回来的数就会等于移位之前的数。 */ printf("((x >> 1) << 1) <= x: %d\n", ((x >> 1) << 1) <= x); return 0; } ================================================ FILE: exercise/ex2-82.txt ================================================ A. 对于串 Y,如果我们将小数点右移 k 位,得到的数字将是无穷串的开始 k 位。如果我们将 Y 右移 2k 位,得到的数字将是无穷串的 k+1~2k 位。依次类推,我们可以得到下列的公式: S = Y * (2^-k + 2^-2k + ... + 2^-nk) 我们可以发现,括号内是一个等比数列。关于等比数列的性质和求值公式,请参考维基 百科的资料:http://zh.wikipedia.org/wiki/%E7%AD%89%E6%AF%94%E6%95%B0%E5%88%97 因为这个等比数列的公比是 0 <= q < 1,因此,根据维基百科的资料,我们的公式最 后可得: S = Y * (2^-k / (1 - 2^-k)) B. (a) 001 => 1/7 (b) 1001 => 3/5 (c) 000111 => 1/9 ================================================ FILE: exercise/ex2-83.c ================================================ /* * 家庭作业 2.83 * * 填写下列程序的返回值,这个程序是测试它的第一个参数是否大于或者等于第二个参 * 数。假定函数 f2u 返回一个无符号 32 位数字,其位表示与它的浮点参数相同。你可 * 以假设两个参数都不是 NaN。两种 0,+0 和 -0 都认为是相等的。 * * int float_ge(float x, float y) { * unsigned ux = f2u(x); * unsigned uy = f2u(y); * * // Get the sign bits * unsigned sx = ux >> 31; * unsigned sy = uy >> 31; * * // Given an expression using only ux, uy, sx, sy * return ________; * } */ /* * 我们用 8 位表示浮点数字,1 位符号位,3 位阶码,4 位小数位。 * * 0 000 0000 +0 * 0 000 0001 正数最小 * ..... 递增 * 0 110 1111 正数最大 * 0 111 0000 +oo * 0 111 0001 NaN * ..... * 1 000 0000 -0 * 1 000 0001 负数最大 * ..... 递减 * 1 110 1111 负数最小 * 1 111 0000 -oo * 1 111 0001 NaN * ..... * 1 111 1111 NaN * * 题目又要求对 +0 和 -0 要做特殊处理(+0 和 -0 应该相等)。也就是位模式 00...0 * 和 10...0 应该返回 1。 * * 对于符号位为 0 的情况,无符号和浮点数都是正相关的关系(无符号数越大,相应的浮 * 点表示就越大)。对于符号位为 1 的情况,无符号和浮点数是负相关的关系(无符号数 * 越大,相应的浮点数越小)。 * * 但是,对于有符号数来说,它和浮点数呈正相关的关系。 */ int float_ge(float x, float y) { unsigned ux = f2u(x); unsigned uy = f2u(y); /* Get the sign bits */ unsigned sx = ux >> 31; unsigned sy = uy >> 31; /* * Give an expression using only ux, uy, sx, and sy * * The following code should be work, but I'm afraid it doesn't conform * to the request. * * ux+uy==ux-uy && ux+uy==uy-ux ? ux+uy : (int)ux>=(int)uy * * If ux,uy=[0,INT_MIN], ux+uy==ux-uy && ux+uy==uy-ux should be * 1. Otherwise, 0. */ return ux + uy == ux - uy && ux + uy == uy - ux ? ux + uy : sx ^ sy ? !sx : !sx ? ux >= uy : ux <= uy; } /* ftp://202.120.40.101/Courses/Computer_Architecture/csapp.cs.cmu.edu/im/code/data/floatge-ans.c */ int float_ge_ans(float x, float y) { unsigned ux = f2u(x); unsigned uy = f2u(y); unsigned sx = ux >> 31; unsigned sy = uy >> 31; return (ux << 1 == 0 && uy << 1 == 0) || /* ux = uy = 0 */ (!sx && sy) || /* x >= 0, y < 0 */ (!sx && !sy && ux >= uy) || /* x >= 0, y >= 0 */ (sx && sy && ux <= uy); /* x < 0, y < 0 */ } ================================================ FILE: exercise/ex2-84.txt ================================================ ================================================================================ 给定一个浮点格式,有 k 位指数和 n 位小数,对于小列数,写出阶码 E、尾数 M、 小数 f 和值 V 的公式。另外,请描述其位表示。 A. 数 5.0。 B. 能够被准确描述的最大奇整数。 C. 最小的规格化数的倒数。 ================================================================================ A. 数 5.0 E = 2, M = 1.25, f = 0.25, V = 1.25 x 2^2 B. 能够被准确描述的最大奇整数 1. n <= 2^k-1, E = n, M = 2-§, V = -1^s x M x 2^n 低 n 位全 1,阶码域是 n+2^(k-1)-1,符号位是 1 2. n > 2^k-1, E=k-1, M = 2-§, V = -1^s x M x 2^k-1 低 2^k-1 位全 1,阶码域是 2^(k-1)+k-2,符号位是 1 C. 最小的规格化的倒数 E = 1-k, M = 1/(2-§), V = -1 x M x 2^1-k 小数部分的最高 k-1 是 1,小数部分是 1/(2-§) ================================================ FILE: exercise/ex2-85.txt ================================================ ================================================================================ 家庭作业 2.85 的解答,题目请看 00-topic.txt ================================================================================ 0x0000 0000 0001 2^(-16445) 0x0001 8000 0000 2^(-16382) 0x7FFE FFFF FFFF 2^(16384) ================================================ FILE: exercise/ex2-86.txt ================================================ ================================================================================ 家庭作业 2.86 的解答,题目请看 00-topic.txt ================================================================================ 0x8000 0 -63 - 0x3F01 257/256 0 257×2^(-8) 0x4700 1 8 - 0x00FF 255/256 -62 255×2^(-70) 0xFF00 - - - 0x3AA0 13/8 -5 13×2^(-8) ================================================ FILE: exercise/ex2-87.txt ================================================ ================================================================================ 家庭作业 2.87 的解答,题目请看 00-topic.txt ================================================================================ 向 +oo 舍入的舍入方向为数轴正向,即舍入值>=原始值。向 +oo 摄入又称向上舍入。 208 0 1110 1010 208 -7/(2^10) 1 0000 0111 -7/(2^10) 5/(2^17) 0 0000 0001 1/(2^10) -4096 1 1110 1111 -248 768 0 1111 0000 +∞ ================================================ FILE: exercise/ex2-88.txt ================================================ ================================================================================ 家庭作业 2.88 的解答,题目请看 00-topic.txt ================================================================================ A. 0x20001 B. 1 C. dx:1, dy: 1e10, dz: -1e10 (浮点运算无结合性) D. INT_MAX E. 1 ================================================ FILE: exercise/ex2-89.c ================================================ /* * 家庭作业 2.89 * * 分配给你一个任务,编写一个 C 函数来计算 2^x 的浮点表示。你意识到完成这个任务的 * 最好方法是直接创建结果的 IEEE 单精度表示。当 x 太小时,你的程序将返回0.0。当 * x 太大时,它会返回 +oo。填写下列代码的空白部分,以计算出正确的结果。假设函数 * u2f 返回的浮点值与它的无符号参数有相同的位表示。 * * 对于单精度 IEEE 浮点格式,下面列出一些特殊的位模式: * * [0111 1111 1000 0000 0000 0000 0000 0000] +oo * [0000 0000 0000 0000 0000 0000 0000 0000] +0.0 * [1000 0000 0000 0000 0000 0000 0000 0000] -0.0 * [?111 1111 1??? ???? ???? ???? ???? ????] NaN * * [?000 0000 0??? ???? ???? ???? ???? ????] 非规格化数 * [?*** **** *??? ???? ???? ???? ???? ????] 规格化数(阶码域不是全 0 并且不是全 1) */ unsigned f2u(float f) { union { float f; unsigned u; } a; a.f = f; return a.u; } float u2f(unsigned x) { /* 这里假设无符号整数和单精度浮点数的位表示相同 */ union { unsigned u; float f; } a; a.u = x; return a.f; } float fpwr2(int x) { /* Result exponent and fraction */ unsigned exp, frac; unsigned u; if (x < -149) { /* Too small. Return 0.0 */ exp = 0; frac = 0; } else if (x < -126) { /* Denormalized result */ exp = 0; frac = 1 << (x + 149); } else if (x < 129) { /* Normalized result */ exp = x + 127; frac = 0; } else { /* Too big. Return +oo */ exp = 0xFF; frac = 0; } /* Pack exp and frac into 32 bits */ u = exp << 23 | frac; /* Return as float */ return u2f(u); } ================================================ FILE: exercise/ex2-90.txt ================================================ ================================================================================ 大约在公元前 250 年,希腊数学家阿基米德证明了 223/71 < PI < 22/7。如果当时 有一台计算机和标准库 ,他就能够确定 n 的单精度浮点近似值的十六进制 表示为 0x40490FDB。当然,所有的这些都只是近似值,因为 PI 不是有理数。 A. 这个浮点值表示的二进制小数是多少? B. 22/7 的二进制小数表示是什么?提示:参见家庭作业 2.82。 C. 这两个 22/7 的近似值从哪一位(相对于二进制小数点)开始不同的? ================================================================================ 我们先将 PI 的十六进制的近似值用二进制表示出来: 0100 0000 0100 1001 0000 1111 1101 1011 我们在这里标注出 PI 的符号位,阶码位和小数位: 0 | 100 0000 0 | 100 1001 0000 1111 1101 1011 s e M A. 这个浮点值的表示的二进制小数是多少? ==> 1.100 1001 0000 1111 1101 1011 B. 22/7 的二进制小数表示是什么?提示:参见家庭作业 2.82。 ==> 1.1001 001 001... C. 这两个 22/7 的近似值从哪一位(相对于二进制小数点)开始不同的? ==> 从小数点右边的第 10 位开始不同。 ================================================ FILE: exercise/ex2-91.c ================================================ /* * 家庭作业 2.91 * mofaph@gmail.com * * 遵循位级浮点编码规则,实现具有如下原型的函数: * * // Compute |f|. If f is NaN, then return f. * float_bits float_absval(float_bits f); * * 对于浮点数 f,这个函数计算 |f|。如果 f 是 NaN,你的函数应该简单地返回 f。测 * 试你的函数,对参数 f 可以取的所有 2^32 个值求值,将结果与你实用机器的浮点运 * 算得到的结果相比较。 */ typedef unsigned float_bits; float_bits float_absval(float_bits f) { unsigned exp = f >> 23 & 0xFF; unsigned frac = f & 0x7FFFFF; /* NaN */ if (exp == 0xFF && frac != 0) return f; return (exp << 23) | frac; /* sign=0 */ } /* * How to build: * $ cc ex2-89.c ex2-91.c */ #include extern float u2f(unsigned x); /* ex2-89.c */ unsigned f2u(float f) { union { float f; unsigned u; } a; a.f = f; return a.u; } /* test driver */ int main(void) { float f = -0.0; unsigned uf = f2u(f); printf("%f(0x%X): %f\n", f, uf, u2f(float_absval(uf))); f = 0.0; uf = f2u(f); printf("%f(0x%X): %f\n", f, uf, u2f(float_absval(uf))); f = 1.0; uf = f2u(f); printf("%f(0x%X): %f\n", f, uf, u2f(float_absval(uf))); f = -1.0; uf = f2u(f); printf("%f(0x%X): %f\n", f, uf, u2f(float_absval(uf))); f = u2f(0x7F800000); /* +oo */ uf = f2u(f); printf("%f(0x%X): %f\n", f, uf, u2f(float_absval(uf))); f = u2f(0xFF800000); /* -oo */ uf = f2u(f); printf("%f(0x%X): %f\n", f, uf, u2f(float_absval(uf))); f = u2f(0x7F800001); /* NaN */ uf = f2u(f); printf("%f(0x%X): %f\n", f, uf, u2f(float_absval(uf))); return 0; } ================================================ FILE: exercise/ex2-92.c ================================================ /* * 家庭作业 2.92 * mofaph@gmail.com * * 遵循位级浮点编码规则,实现具有如下原型的函数: * * // Compute -f. If f is Nan, then return f. * float_bits float_absval(float_bits f); * * 对于浮点数 f,这个函数计算 -f。如果 f 是 NaN,你的函数应该简单地返回 f。测试你 * 的函数,对参数 f 可以取的所有 2^32 个值求值,将结果与你使用机器的浮点运算得到 * 的结果相比较。 */ typedef unsigned float_bits; /* Compute -f. If f is NaN, then return f */ float_bits float_negate(float_bits f) { unsigned sign = f >> 31; unsigned exp = f >> 23 & 0xFF; unsigned frac = f & 0x7FFFFF; if (exp == 0xFF && frac != 0) /* NaN */ return f; return (~sign << 31) | (exp << 23) | frac; } ================================================ FILE: exercise/ex2-93.c ================================================ /* * 家庭作业 2.93 * mofaph@gmail.com * * 遵循位级浮点编码规则,实现具有如下原型的函数: * * // Compute 0.5*f. If f is NaN, then return f. * float_bits float_half(float_bits f); * * 对于浮点数 f,这个函数计算 0.5*f。如果 f 是 NaN,你的函数应该简单返回 f。测 * 试你的函数,对参数 f 可以取的所有 2^32 个值求值,将结果与你使用机器的浮点运 * 算得到的结果相比较。 */ typedef unsigned float_bits; /* Compute 0.5*f. If f is NaN, then return f. */ float_bits float_half(float_bits f) { unsigned sign = f >> 31; unsigned exp = f >> 23 & 0xFF; unsigned frac = f & 0x7FFFFF; /* NaN, +oo, -oo */ if (exp == 0xFF) return f; /* 按照向偶数舍入的方式,只有最低 2 位为全 1 时,才需要舍入 */ unsigned round = (frac & 3) == 3; /* * 我们需要特别注意当 exp=1 的情况。因为这时阶码域的数值是可表示的最小范 * 围,exp 的最终取值不能单纯地减去 1,而应该对小数域进行操作。此时,小数 * 域有一个隐藏的小数点右边的 1。 * * frac==0x7FFFFF(小数域全 1),这时小数点的右边一位应该是 1,此时小数域 * 余下的部分应该是全 1,加上舍入的 1,此时 M=1.0。因此,exp=1,frac=0。 * * frac!=0x7FFFFF,这时只需要处理舍入的情况。 */ if (exp > 1) { exp--; } else if (exp == 1) { if (frac == 0x7FFFFF) { frac = 0; /* 舍入后 M = 1.0 */ } else /* frac != 0x7FFFFF */ { exp = 0; frac = (1 << 22) + (frac >> 1) + round; } } else /* exp == 0 */ { frac = (frac >> 1) + round; } return (sign << 31) | (exp << 23) | frac; } ================================================ FILE: exercise/ex2-94.c ================================================ /* * 家庭作业 2.94 * mofaph@gmail.com * * 遵循位级浮点编码规则,实现具有如下原型的函数: * * // Compute 2*f. If f is NaN, then return f. * float_bits float_twice(float_bits f); * * 对于浮点数 f,这个函数计算 2.0*f。如果 f 是 NaN,你的函数应该简单地返回 f。 * 测试你的函数,对参数 f 可以取的所有 2^32 个值求值,将结果与你使用机器的浮点 * 运算得到的结果相比较。 */ typedef unsigned float_bits; /* Compute 2*f. If f is NaN, then return f. */ float_bits float_twice(float_bits f) { unsigned sign = f >> 31; unsigned exp = (f >> 23) & 0xFF; unsigned frac = f & 0x7FFFFF; /* NaN, +oo, -oo */ if (exp == 0xFF) return f; if (exp == 0xFE) { exp = 0xFF; frac = 0; } else if (exp > 0) { exp++; } else /* exp == 0 */ { if ((frac & 0x400000) == 0x400000) { exp = 1; frac = (frac << 1) & 0x7FFFFF; } else { frac <<= 1; } } return (sign << 31) | (exp << 23) | frac; } ================================================ FILE: exercise/ex2-95.c ================================================ /* * 家庭作业 2.95 * mofaph@gmail.com * * 遵循位级浮点编码规则,实现具有如下原型的函数: * * // Compute (float) i * float_bits float_i2f(int i); * * 对于参数 i,这个函数计算 (float)i 的位级表示。 * 测试你的函数,对参数 f 可以取的所有 2^32 个值求值,将结果与你使用机器的浮点 * 运算得到的结果相比较。 */ #include typedef unsigned float_bits; /* * 假设整数的位宽和单精度浮点数的位宽是相同的 * * 当一个整数需要至少 24 位表示,则表明这个整数转换成浮点数时需要舍入。由于一个整 * 数转换将会转换成规格化的浮点数,因此最左边的 1 将会成为隐藏的小数部分。(0 是 * 唯一的非规格化数) * * 当转换的数字是负数时,由于补码表示的原因,因此我们需要把它转换成正数表示的数字 * 形式。同时,我们需要记录负数的符号位。 * * 当出现需要舍入的时候,将会出现 3 种情况: * * 1. 截断的部分大于截断的一半,向上舍入 * 2. 截断的部分等于截断的一半,向偶数舍入 * 3. 截断的部分小于截断的一半,向下舍入 */ float_bits float_i2f(int i) { unsigned sign, exp, frac; unsigned leftmost, rightmost, t; if (i == 0) return i; sign = (i & INT_MIN) == INT_MIN; if (sign) i = ~i + 1; for (rightmost = 1, t = INT_MIN; (t & i) != t; t >>= 1) rightmost++; leftmost = (sizeof(int) << 3) - rightmost; exp = leftmost + 127; int shift; if (leftmost > 23) { shift = leftmost - 23; int mask = ((1 << shift) - 1) & i; int half = 1 << (shift - 1); int hide = 1 << shift; int round = mask>half || (mask==half && (i&hide)==hide); frac = (i >> shift) & 0x7FFFFF; if (frac == 0x7FFFFF && round == 1) frac = 0, exp++; else frac += round; } else { shift = 23 - leftmost; frac = (i << shift) & 0x7FFFFF; } return (sign << 31) | (exp << 23) | frac; } ================================================ FILE: exercise/ex2-96.c ================================================ /* * 家庭作业 2.96 * mofaph@gmail.com * * 遵循位级浮点编码规则,实现具有如下原型的函数: * * // Compute (int) f. * // If conversion causes overflow or f is NaN, return 0x80000000 * int float_f2i(float_bits f); * * 对于浮点数 f,这个函数计算 (int)f。你的函数应该向零舍入。如果f 不能用整数表示 * (例如,超出表示范围,或者它是一个 NaN),那么函数应该返回0x80000000。测试你的 * 函数,对参数 f 可以取的所有 2^32 个值求值,将结果与你使用机器的浮点运算得到的 * 结果相比较。 */ #include /* INT_MIN */ typedef unsigned float_bits; /* * 从 float 或者 double 转换成 int,值将会向零舍入。例如,1.999 将被转换成 1,而 * -1.999 将被转换成 -1。 * * 进一步来说,值可能会溢出。C 语言标准没有对这种情况指定固定的结果。 * * 与 Intel 兼容的微处理器指定位模式 [10...00](字长位 w 时的 INT_MIN)为整数不确 * 定(integer indefinite)值。一个从浮点数到整数的转换,如果不能为该浮点数找到一 * 个合理的整数近似值,就会产生这样一个值。因此,表达式 (int)+1e10 会得到 * -21483648,即从一个正值变成了一个负值。 * * 注意:单精度浮点数精确只能表示 0~33,554,432 的整数。 */ int float_f2i(float_bits f) { unsigned sign = f >> 31; unsigned exp = (f >> 23) & 0xFF; unsigned frac = f & 0x7FFFFF; int E = (int)exp - 127; if (exp == 0xFF) /* NaN, +oo, -oo */ return INT_MIN; if (exp == 0 || E < 0) return 0; if (sign == 0 && E > 30) return INT_MIN; if (sign == 1 && E > 31) return INT_MIN; if (sign == 1 && E == 31 && frac > 0) return INT_MIN; int shift; if (E > 23) { shift = E - 23; frac = (1 << E) + (frac << shift); } else /* 0 <= E <= 23 */ { shift = 23 - E; frac = (1 << E) + (frac >> shift); } if (sign == 1) frac = ~frac + 1; return frac; } ================================================ FILE: exercise/ex3-19.c ================================================ /* * 练习题 3.19 * * A. 用一个 32 位 int 表示 n!,最大的 n 的值是多少? * B. 如果用一个 64 位 long long int 表示,最大的 n 的值是多少? */ /* * Return 1 when OK, otherwise return 0 */ int tmul_ok_32(int x, int y) { int p = x * y; return !x || p / x == y; } /* * A solution */ int largest_factorial_32(void) { int result = 1; int n = 1, overflow; for (;;) { n++; overflow = !tmul_ok_32(result, n); if (overflow) break; result *= n; } return n - 1; } int tmul_ok_64(long long int x, long long int y) { long long int p = x * y; return !x || p / x == y; } /* * B solution */ int largest_factorial_64(void) { long long int result = 1; int n = 1, overflow; for (;;) { n++; overflow = !tmul_ok_64(result, n); if (overflow) break; result *= n; } return n - 1; } ================================================ FILE: exercise/ex3-34.c ================================================ /* * 练习题 3.34 */ int rfun(unsigned x) { if (!x) return 0; unsigned nx = x >> 1; int rv = rfun(nx); return rv + (x & 1); } #include int main(void) { int x = 0x73; int n = rfun(x); printf("0x%X: %d\n", x, n); return 0; } ================================================ FILE: exercise/ex3-54.c ================================================ /* * 家庭作业 3.54 */ int decode2(int x, int y, int z) { int t = (z - y) & 0x7FFF; return (x ^ t) * t; } ================================================ FILE: exercise/ex3-55.asm ================================================ ;; 3.55 ;; 在完成这道练习的过程中,因为没有考虑到 x * y 会发生隐式类型转换。 ;; 也就是,y 从 int 类型转换为 long long 类型。这种转换是隐式进行的。 ;; 因此,我就不能理解为什么要用 x_low * (y>>31) 了。 ;; 然后,通过在网络上搜索(感谢网络)。我发现了在豆瓣和 stackoverflow 的参考链接 ;; http://book.douban.com/subject/1230413/annotation?sort=rank&start=20 ;; http://stackoverflow.com/questions/11680720/implement-64-bit-arithmetic-on-a-32-bit-machine ;; dest at %ebp+8, x at %ebp+12, y at %ebp+20 movl 12(%ebp), %esi ; x_low -> esi movl 20(%ebp), %eax ; y -> eax movl %eax, %edx ; y -> edx sarl $31, %edx ; y>>31 -> edx movl %edx, %ecx ; y>>31 -> ecx imull %esi, %ecx ; x_low * (y>>31) -> ecx movl 16(%ebp), %ebx ; x_high -> ebx imull %eax, %ebx ; y * x_high -> ebx addl %ebx, %ecx ; y * x_high + (y>>31) * x_low -> ecx mull %esi ; x_low * y -> edx:eax leal (%ecx, %edx), %edx ; y * x_high + (y>>31)* x_low + edx -> edx movl 8(%ebp), %ecx ; dest -> ecx movl %eax, (%ecx) ; (x*y)_low -> *dest movl %edx, 4(%ecx) ; (x*y)_high -> *(dest+4) ;; 现在描述实现以上运算的算法: ;; 由于两个 64 位数字相乘,它的结果的 64 位表示,对于有符号和无符号都是相同的 ;; 1. a = x_low * y_low ;; 2. b = x_low * y_high ;; 3. c = x_high * y_low ;; 4. result = a + b + c ================================================ FILE: exercise/ex3-56.txt ================================================ ==== 3.56 ==== A. esi: x ebx: n edi: result edx: mask B. result: 0x55555555 mask: 0x80000000 C. mask != 0 D. mask = (unsigned)mask >> (n&0xFF) E. result ^= x & mask ================================================================================ int loop(int x, int n) { int result = 0x55555555; int mask; for (mask = 0x80000000; mask != 0; mask = (unsigned)mask & (n&0xFF)) result ^= x & mask; return result; } ================================================================================ ================================================ FILE: exercise/ex3-57.c ================================================ /* * 3.57 * * 参考链接: * * http://book.douban.com/annotation/20427266/ */ /* * 在条件传送中,既然不能对数值为 0 的地址取值。通过思维转换,那么就取数值 0 的地 * 址和数值非 0 的地址。然后再通过这个指针取值 */ int cread_alt(int *xp) { int t = 0; return *(xp ? xp : &t); } ================================================ FILE: exercise/ex3-58.c ================================================ /* * 3.58 */ /* Enumberated type creates set of constants numbered 0 and upward */ typedef enum { MODE_A, MODE_B, MODE_C, MODE_D, MODE_E } mode_t; int switch3(int *p1, int *p2, mode_t action) { int result = 0; switch (action) { case MODE_A: result = *p1; *p1 = *p2; break; case MODE_B: result = *p1 + *p2; *p2 = result; break; case MODE_C: *p2 = 15; result = *p1; break; case MODE_D: *p2 = *p1; case MODE_E: result = 17; default: break; } return result; } ================================================ FILE: exercise/ex3-59.c ================================================ /* * 3.59 */ int switch_prob(int x, int n) { int result = x; switch (n) { case 42: case 40: result = x << 3; break; case 41: result += 17; break; case 43: result = x >> 3; break; case 45: result = x * x + 17; break; case 44: result = 49 * x * x; break; default: result += 17; } return result; } ================================================ FILE: exercise/ex3-60.txt ================================================ 3.60 A. &A[i][j][k] = X_D + L(S·L·i + T·j + k) B. R = 44, S = 7, T = 9 ================================================ FILE: exercise/ex3-61.c ================================================ /* * 3.61 * * $ gcc -O2 -S ex3-61.c # 这样就不会使循环的值溢出到存储器了 */ /* Compute i,k of variable matrix product */ int var_prod_ele(int n, int A[n][n], int B[n][n], int i, int k) { int j; int result = 0; void *Arow = (void *)&A[i][0]; void *Bcol = (void *)&B[0][k]; int N = 4 * n; for (j = 0; j < N; j += 4) result += *(int *)(Arow + j) * *(int *)(Bcol + N); return result; } ================================================ FILE: exercise/ex3-62.c ================================================ /* * 3.62 */ /* * A. M=16 * B. i: edi, j: ecx * C. 如下所示 */ void transpose(int M, int A[M][M]) { int i, j; for (i = 0; i < M; i++) { int *col = &A[0][i]; for (j = 0; j < i; j++) { int t = A[i][j]; A[i][j] = *col; col += M; } } } ================================================ FILE: exercise/ex3-63.txt ================================================ 3.63 #define E1(n) 3*n #define E2(n) 2*n-1 ================================================ FILE: exercise/ex3-64.txt ================================================ 3.64 A. eax: result, ebx: s1.v, edx: s1.p B. ebp-4: s1, ebp-8: s2, ebp-12: y, ebp-16: &x, ebp-20: s2 C. 将结构的各位成员,按从高到低地址压入栈中 D. 在栈中的 retaddr+4 位置,分配一个指向结构的指针作为返回地址 ================================================ FILE: exercise/ex3-65.txt ================================================ 3.65 A=2, B=8 将 x 看作是有 A 个元素的数组,每个元素是一个含有 B 个元素的数组。由于需要每个元 素对齐,B 数组需要 16 个字节,所以 16 字节对齐。 ================================================ FILE: exercise/ex3-66.txt ================================================ 3.66 A. CNT=7 B. struct a_struct { int idx; int x[6]; }; 这里要注意的是,生成的汇编代码中使用了一个技巧。这样就不容易看出来地址间的运算关 系了: 10 add 0x4(%ecx,%ebx,1), %edx ; memory[bp+28i+4] + 7i -> edx 13 mov %eax, 0x(%ecx,%edx,4) ; eax -> memory[bp+4*edx+8] 我们把 10 行的 edx 的值,带入 13 行中,可以得到: bp + 4 + 4 + 4*? + 28i 而在前面的第 10 行,我们其实已经取出了 idx 的值了: 10 add 0x4(%ecx,%ebx,1), %edx ; memory[bp+4 + 28i] 汇编代码在 bp+4+28i 的地址处取出 4 个字节的值,然后又在 bp+4+28i + 4 + 4*? 的地 址处写入一个值(这个值就是 n)。通过观察,我们就可以知道这个地址寻址的规律了: a_struct 前面有 4 个字节需要跳过,然后才到数组的寻址,而这个数组的每个元素也是 4 个字节的。 ================================================ FILE: exercise/ex3-67.c ================================================ /* * 3.67 * * mofaph@gmail.com */ union ele { struct { int *p; int x; } e1; struct { int y; union ele *next; } e2; }; /* * A. e1.p: 0, e1.x: 4, e2.y: 0, e2.next = 4 * B. 8 个字节 * C. 填写的缺失代码如下 */ void proc(union ele *up) { up->next->x = *(up->next->p) - up->y; } ================================================ FILE: exercise/ex3-68.c ================================================ /* * 3.68 * * mofaph@gmail.com */ #include #define MAX_INPUT 5 /* 以小一点的数字,使问题可以方便地出现(如果有的话) */ int good_echo(void) { char s[MAX_INPUT]; char *ret; ret = fgets(s, sizeof(s), stdin); if (ret == NULL) { if (!feof(stdin)) /* 输入错误 */ return -1; goto done; /* 输入了 EOF */ } int out; out = fputs(s, stdout); if (out == EOF) return -1; /* 输出错误 */ done: return 0; } ================================================ FILE: exercise/ex3-69.c ================================================ /* * 3.69 * * $ cc -m64 -S ex3-69.c # 使用 -m64 生成 64 位代码 * * mofaph@gmail.com */ typedef struct ELE *tree_ptr; struct ELE { tree_ptr left; tree_ptr right; long val; }; /* * 这个函数用于找到二叉树的左子树的第一个叶子节点的 val * * o * / \ * o o * / \ / \ * o o o o * ^ * | * 找到这个节点的值 */ long trace(tree_ptr tp) { long node = 0; while (tp) { node = tp->val; tp = tp->left; } return node; } ================================================ FILE: exercise/ex3-70.c ================================================ /* * 3.70 * * $ gcc -m64 -S ex3-70.c # 生成 64 位的汇编代码 * * mofaph@gmail.com */ typedef struct ELE *tree_ptr; struct ELE { tree_ptr left; tree_ptr right; long val; }; /* * 递归计算二叉树中所有节点中的最小值 */ long traverse(tree_ptr tp) { if (!tp) return -1; long v = tp->val; long vleft = traverse(tp->left); long vright = traverse(tp->right); if (vright > vleft) vright = vleft; if (v > vright) v = vright; return v; } ================================================ FILE: exercise/ex7-10.txt ================================================ 7.10 mofaph@gmail.com -------------------------------------------------------------------------------- A. gcc -static p.o libx.a B. gcc -static p.o libx.a liby.a libx.a C. x -> y -> z \--> x -> z gcc -static p.o libx.a liby.a libx.a libz.a ================================================ FILE: exercise/ex7-11.txt ================================================ 7.11 mofaph@gmail.com -------------------------------------------------------------------------------- .bss ================================================ FILE: exercise/ex7-12.txt ================================================ 7.12 mofaph@gmail.com -------------------------------------------------------------------------------- |------------------+-----------+-----------| | 图 7-10 中的符号 | 地址 | 值 | |------------------+-----------+-----------| | 15 | 0x80483c8 | 0x804945c | |------------------+-----------+-----------| | 16 | 0x80483d0 | 0x8049458 | |------------------+-----------+-----------| | 18 | 0x80483d8 | 0x8049548 | |------------------+-----------+-----------| | 18 | 0x80483dc | 0x8049458 | |------------------+-----------+-----------| | 23 | 0x80483e7 | 0x8049548 | |------------------+-----------+-----------| ================================================ FILE: exercise/ex7-13.txt ================================================ 7.13 mofaph@gmail.com -------------------------------------------------------------------------------- A. 0xc R_386_PC32 p3 0x13 R_386_32 xp 0x15 R_386_PC32 p2 B. 0x4 R_386_32 x 我们也可以使用 objdump 来帮助我们完成这道题目,假设有一个目标文件是 a.o,那么我 们可以这样查看它的重定位条目的信息: $ objdump -rs a.o ================================================ FILE: exercise/ex7-14.txt ================================================ 7.14 mofaph@gmail.com -------------------------------------------------------------------------------- A. .text: 0x25 R_386_32 .text 0x11 R_386_32 .text 0x28 R_386_32 .text 0x28 R_386_32 .text 0x28 R_386_32 .text B. .rodata: 0x0 R_386_32 .rodata 0x4 R_386_32 .rodata 0x8 R_386_32 .rodata 0xc R_386_32 .rodata 0x10 R_386_32 .rodata 0x14 R_386_32 .rodata 也可以使用 objdump 来帮助分析: $ objdump -rs a.o ================================================ FILE: exercise/ex7-15.txt ================================================ 7.15 mofaph@gmail.com -------------------------------------------------------------------------------- A. $ libs=$(find /{,usr/}lib -type f -name "libc.a" -o -name "libm.a") $ for lib in $lib; do printf "$lib: "; ar t $lib | wc -l; done B. $ gcc -O2 main.c swap.c -o t $ gcc -O2 -g main.c swap.c -o t2 $ readelf -a t > t.elf $ readelf -a t2 > t2.elf $ diff -u t.elf t2.elf C. $ ldd $(which gcc) or $ readelf -a t | grep interp ================================================ FILE: exercise/ex7-6.txt ================================================ 7.6 |-------+----------------------+----------+----------------+-------| | 符号 | .swap.o.symtab条目? | 符号类型 | 定义符号的模块 | 节 | |-------+----------------------+----------+----------------+-------| | buf | Y | extern | main.o | .data | |-------+----------------------+----------+----------------+-------| | bufp0 | Y | global | swap.o | .data | |-------+----------------------+----------+----------------+-------| | bufp1 | Y | local | swap.o | .bss | |-------+----------------------+----------+----------------+-------| | swap | Y | global | swap.o | .text | |-------+----------------------+----------+----------------+-------| | temp | N | - | - | - | |-------+----------------------+----------+----------------+-------| | incr | Y | local | swap.o | .text | |-------+----------------------+----------+----------------+-------| | count | Y | local | swap.o | .data | |-------+----------------------+----------+----------------+-------| ================================================ FILE: exercise/ex7-7.c ================================================ /* * 7.7 * * mofaph@gmail.com */ /* * Solution 1 */ int x; void f() { x = 15213; } /* * Solution 2 */ double x; void f() { return; x = -0.0; } /* * Solution 3 */ /* double x; */ void f() { /* x = -0.0; */ } ================================================ FILE: exercise/ex7-8.txt ================================================ 7.8 mofaph@gmail.com A. a) REF(main.1) --> DEF(main.1) b) REF(main.2) --> DEF(main.2) B. a) REF(x.1) --> DEF(x.UNKNOWN) b) REF(x.2) --> DEF(x.UNKNOWN) C. a) REF(x.1) --> DEF(x.ERROR) b) REF(x.2) --> DEF(x.ERROR) ================================================ FILE: exercise/ex7-9.txt ================================================ 7.9 mofaph@gmail.com -------------------------------------------------------------------------------- 这道习题我没能解决,因为我没有注意到 foo6.c 中的 main 是一个强符号。不过,幸好我 看了第一版的参考答案,幸好第一版也有这道题目,幸好我手里有这份参考答案。 :-) 要理解这道题目,我们需要知道 UNIX 链接器处理多重符号的 3 个规则: 1. 不允许有多个强符号 2. 如果同时有一个强符号和多个弱符号,选择强符号 3. 如果有多个弱符号,那么从这些弱符号中任选一个 在这道题中,foo6.c 中的符号 main 是强符号,bar6.c 中的 main 是弱符号。 在 bar6.c 中打印 main 时,链接器选择了强符号,也就是在 foo6.c 中的 main 函数。在 目标文件中,符号 main 的地址就是 main 函数的起始地址。 因此,在 bar6.c 中的打印语句中,将会打印 main 函数的第一个字节的值。在 IA32 中, 函数的第一条指令通常是压栈指令: pushl %ebp 这条指令的机器码就是 0x55。 PS: 可以在下面给出的地址找到参考答案 [1] resource/im/im/im.pdf [2] ftp://202.120.40.101/Courses/Computer_Architecture/csapp.cs.cmu.edu/im/im/im.pdf ================================================ FILE: exercise/ex8-10.txt ================================================ 8.10 mofaph@gmail.com -------------------------------------------------------------------------------- A. 调用一次,返回两次。 fork B. 调用一次,从不返回。 longjmp execve C. 调用一次,返回一次或者多次。 setjmp ================================================ FILE: exercise/ex8-11.c ================================================ /* * 8.11 * * 这个程序会输出多少个“hello”输出行? * * unix> gcc -I../common ex8-11.c ../common/csapp.c -lpthread * #4 */ #include "csapp.h" /* * .------------------------ * | * | * | * .-----------.------------------------ * | * | * | .------------------------ * | | * | | * | | * .-----------.------------------------ * fork fork * i=0 i=1 */ int main() { int i; for (i = 0; i < 2; i++) Fork(); printf("hello\n"); exit(0); } ================================================ FILE: exercise/ex8-12.c ================================================ /* * 8.12 * * 这个程序会输出多少个“hello”输出行? * * unix> gcc -I../common ex8-12.c ../common/csapp.c -lpthread * #8 */ #include "csapp.h" void doit() { Fork(); Fork(); printf("hello\n"); return; } int main() { doit(); printf("hello\n"); return; } ================================================ FILE: exercise/ex8-13.c ================================================ /* * 8.13 * * 下面程序的一种可能的输出是什么? * * unix> gcc -I../common ex8-13.c ../common/csapp.c -lpthread * * 父进程先运行: * 4 * 3 * 2 * * 子进程先运行: * 2 * 4 * 3 * * 创建进程失败: * 4 * 3 */ #include "csapp.h" int main() { int x = 3; if (Fork() != 0) printf("x=%d\n", ++x); printf("x=%d\n", --x); exit(0); } ================================================ FILE: exercise/ex8-14.c ================================================ /* * 8.14 * * 下面这个程序会输出多少个“hello”输出行? * * unix> gcc -I../common ex8-14.c ../common/csapp.c -lpthread * * 第一个 Fork() 成功,第二个 Fork() 失败: 1 * 第一个 Fork() 成功,第二个 Fork() 成功: 3 * 第一个 Fork() 失败: 0 */ #include "csapp.h" void doit() { if (Fork() == 0) { Fork(); printf("hello\n"); exit(0); } return; } int main() { doit(); printf("hello\n"); exit(0); } ================================================ FILE: exercise/ex8-15.c ================================================ /* * 8.15 * * 下面这个程序会输出多少个“hello”输出行? * * unix> gcc -I../common ex8-15.c ../common/csapp.c -lpthread * * 第一个 Fork 失败: 0 * 第一个 Fork 成功,第二个 Fork 失败: 1 * 第一个 Fork 成功,第二个 Fork 成功: 5 */ #include "csapp.h" void doit() { if (Fork() == 0) { Fork(); printf("hello\n"); return; } return; } int main() { doit(); printf("hello\n"); exit(0); } ================================================ FILE: exercise/ex8-16.c ================================================ /* * 8.16 * * unix> gcc -I../common ex8-16.c ../common/csapp.c -lpthread * counter = 2 */ #include "csapp.h" int counter = 1; int main() { if (fork() == 0) { counter--; exit(0); } else { Wait(NULL); printf("counter = %d\n", ++counter); } exit(0); } ================================================ FILE: exercise/ex8-17.txt ================================================ 8.17 mofaph@gmail.com -------------------------------------------------------------------------------- fork() 失败: Hello 0 Bye fork() 成功: 1. 父进程先运行 Hello 0 1 Bye 2 Bye 2. 子进程先运行 Hello 1 Bye 0 2 Bye 3. 子进程先运行,在它运行期间,内核调度父进程运行 Hello 1 0 Bye 2 Bye ================================================ FILE: exercise/ex8-18.c ================================================ /* * 8.18 * * mofaph@gmail.com * * unix> cc -I../common ex8-18.c ../common/csapp.c -lpthread * * 判断下面哪个输出是可能的。注意:atexit 函数以一个指向函数的指针为输入,并将它 * 添加到函数列表中(初始为空),当 exit 函数被调用时,会调用该列表中的函数。 * * A. 112002 * B. 211020 * C. 102120 * D. 122001 * E. 100212 * * 可能的序列: A C E */ #include "csapp.h" void end(void) { printf("2"); } int main() { if (Fork() == 0) atexit(end); if (Fork() == 0) printf("0"); else printf("1"); exit(0); } ================================================ FILE: exercise/ex8-19.txt ================================================ 8.19 mofaph@gmail.com -------------------------------------------------------------------------------- 2 的 n 次方 输出的 hello 行 = 2^n ================================================ FILE: exercise/ex8-2.c ================================================ #include "csapp.h" int main() { int x = 1; if (Fork() == 0) printf("printf1: x=%d\n", ++x); printf("printf2: x=%d\n", --x); exit(0); } ================================================ FILE: exercise/ex8-20.c ================================================ /* * 8.20 * * mofaph@gmail.com * * 使用 execve 编写一个叫做 myls 的程序,该程序的行为和 /bin/ls 程序的一样。你的 * 程序应该接受相同的命令行参数,解释同样的环境变量,并产生相同的输出。 * * ls 程序从 COLUMNS 环境变量中获得屏幕的宽度。如果没有设置 COLUMNS,那么 ls 会假 * 设屏幕宽 80 列。因此,你可以通过把 COLUMNS 环境设置小于 80,来检查你对环境变量 * 的处理: * * unix> cc ex8-20.c -o myls * unix> setenv COLUMNS 40 * ...output is 40 column wide * unix> ./myls * unix> unsetenv COLUMNS * unix> ./myls * ...output is now 80 columns wide */ #include #include #include #include #include extern char *environ[]; int main(int argc, char *argv[]) { pid_t pid; pid = fork(); if (pid < 0) { perror("fork"); exit(1); } /* child */ else if (pid == 0) { execve("/bin/ls", argv, environ); /* Never reach here */ perror("execve"); exit(2); } /* parent */ else { int status, ret; ret = waitpid(pid, &status, 0); /* error */ if (ret < 0) { perror("waitpid"); exit(3); } /* nothing */ } return 0; } ================================================ FILE: exercise/ex8-21.c ================================================ /* * 8.21 * * mofaph@gmail.com * * 下面程序可能的输出序列是什么? * * unix> cc ex8-21.c * unix> ./a.out * * 1. fork 失败: bc * 2. fork 成功 * 2.1 子进程先运行: abc * 2.2 父进程先运行: bac */ #include #include #include #include #include int main() { if (fork() == 0) { printf("a"); exit(0); } else { printf("b"); waitpid(-1, NULL, 0); } printf("c"); exit(0); } ================================================ FILE: exercise/ex8-22.c ================================================ /* * 8.22 * * mofaph@gmail.com * * 编写 UNIX system 函数的你自己的版本 * * int mysystem(char *command); * * mysystem 函数通过调用“/bin/sh -c command”来执行 command,然后在 command 完成 * 后返回。如果 command(通过调用 exit 函数或者执行一条 return 语句)正常退出,那 * 么 mysystem 返回 command 退出状态。例如,如果 command 通过调用 exit(8) 终止, * 那么 mysystem 返回值 8。否则,如果 command 是异常终止的,那么 mysystem 就返回 * 外壳返回的状态。 * * unix> cc ex8-22.c t8-22.c */ #include #include #include #include #include extern char *environ[]; int mysystem(char *command) { pid_t pid; pid = fork(); /* error */ if (pid < 0) { perror("fork"); return -1; } /* child */ else if (pid == 0) { const int MAX_OPTION = 4096; char *argv[MAX_OPTION]; if (!command) { fprintf(stderr, "mysystem: command is NULL\n"); exit(-1); } /* * BUG: The code below can't handler command which is * emacs/nano/top. * * The error message is something like: TERM not set. */ char *term = getenv("TERM"); if (!term) { int err = setenv("TERM", "xterm", 0); if (err) fprintf(stderr, "TERM not set\n"); } argv[0] = "sh"; argv[1] = "-c"; argv[2] = command; argv[3] = NULL; execve("/bin/sh", argv, environ); /* Something went wrong */ perror("execve"); return -1; } /* parent */ else { int status, ret; ret = waitpid(pid, &status, 0); /* waitpid error */ if (ret < 0) { perror("waitpid"); return -1; } /* The child process is terminated */ if (WIFEXITED(status)) { int child_ret = WEXITSTATUS(status); return child_ret; } /* Ignore other situation. Good but not great? */ return 0; } } ================================================ FILE: exercise/ex8-23.txt ================================================ 8.23 mofaph@gmail.com -------------------------------------------------------------------------------- 当父进程处理一个信号时,内核就会阻塞这个信号。也就是说,第二个信号是未决信号,从 第三个信号开始,内核就会丢弃这个信号。 ================================================ FILE: exercise/ex8-24.c ================================================ /* * 8.24 * * mofaph@gmail.com * * unix> cc -I../common ex8-24.c ../common/csapp.c -lpthread */ #include "csapp.h" #define N 2 int main(void) { int status, i; pid_t pid; /* Parent creates N children */ for (i = 0; i < N; i++) if ((pid = Fork()) == 0) { /* Child */ int *p = NULL; *p = 0; /* write to invalid address */ exit(100+i); } /* Parent reaps N children in no particular order */ while ((pid = waitpid(-1, &status, 0)) > 0) { if (WIFEXITED(status)) { printf("child %d terminated normally with exit status=%d\n", pid, WEXITSTATUS(status)); } else if (WIFSIGNALED(status)) { int signo = WTERMSIG(status); char str[1024]; snprintf(str, sizeof(str), "child %d terminated by signal %d", pid, signo); psignal(signo, str); } else { printf("child %d terminated abnormally\n", pid); } } /* The normal termination is if there are no more children */ if (errno != ECHILD) unix_error("waitpid error"); exit(0); } ================================================ FILE: exercise/ex8-25.c ================================================ /* * 8.25 * * mofaph@gmail.com * * 编写 fgets 函数的一个版本,叫做 tfgets,它 5 秒中后会超时。tfgets 函数接收和 * fgets 相同的输入。如果用户在 5 秒内不键入一个输入行,tfgets 返回 NULL。否则, * 它返回一个指向输入行的指针。 * * unix> cc ex8-25.c t8-25.c */ #include #include #include #include static sigjmp_buf env; void signal_handler(int signo) { const int TIME_OUT = 1; if (signo == SIGALRM) siglongjmp(env, TIME_OUT); } char *tfgets(char *s, int size, FILE *stream) { /* The following code was token from common/csapp.c, Signal() implemation */ struct sigaction action, old_action; action.sa_handler = signal_handler; sigemptyset(&action.sa_mask); /* block sigs of type being handled */ action.sa_flags = SA_RESTART; /* restart syscalls if possible */ if (sigaction(SIGALRM, &action, &old_action) < 0) { perror("sigaction"); return NULL; } unsigned int max_waiting = 5; /* seconds */ alarm(max_waiting); int rc; rc = sigsetjmp(env, 1); /* saving signal mask in env */ if (rc == 0) { return fgets(s, size, stream); } else { /* time out */ return NULL; } } ================================================ FILE: exercise/ex8-26/Makefile ================================================ # Makefile for ex8-26 # mofaph@gmail.com # 2013-7-7 CC = gcc CFLAGS = -Wall PROGRAM += t PROGRAM += t-job OBJS += ex8-26.o OBJS += random_fork.o OBJS += job.o TOBJS += t-job.o PHONY += all PHONY += clean PHONY += TAGS .PHONY: $(PHONY) all: $(PROGRAM) t: $(OBJS) $(CC) $(CFLAGS) $^ -o $@ t-job: t-job.o job.o $(CC) $(CFLAGS) $^ -o $@ ex8-26.o: shellex.c $(CC) $(CFLAGS) -c $< -o $@ t-job.o: t-job.c job.h job.o: job.c job.h TAGS: find . -type f -name "*.[ch]" -print | xargs etags - clean: rm -f $(PROGRAM) $(OBJS) $(TOBJS) ================================================ FILE: exercise/ex8-26/job.c ================================================ /* * 8.26 * * mofaph@gmail.com * 2013-7-14 * * 这个文件包含了用来处理作业控制的代码 */ #include #include #include #include "job.h" /* * 作业控制列表从 1 开始计数,第 0 个位置是前台进程组 */ static const int beg = 1; static int max, end; static struct job **job; /* empty: 1, otherwise: 0 */ int is_empty(struct job *job_list[], int len) { int i; for (i = beg; i < len; i++) { if (job_list[i]) return 0; } return 1; } void init_job(void) { /* Background */ max = 2; end = 1; job = malloc(sizeof(struct job *) * max); if (!job) { fprintf(stderr, "init_job: malloc error\n"); exit(-1); } /* Foreground */ job[0] = malloc(sizeof(struct job)); if (!job[0]) { fprintf(stderr, "init_job: init foreground process failed!\n"); free_job(); exit(-1); } job[0]->command = NULL; } void free_job(void) { int i; for (i = 0; i < end; i++) free(job[i]); free(job); } int add_job(pid_t pgid, int state, char *command, int len) { struct job *j = malloc(sizeof(struct job)); if (!j) goto malloc_failed; j->pgid = pgid; j->state = state; j->command = malloc(len); if (!j->command) goto malloc_failed; memmove(j->command, command, len); if (end != beg && is_empty(job, end)) end = beg; if (end+1 == max) { struct job **old_job = job; int new_max = max * 3 / 2; job = realloc(old_job, new_max*sizeof(struct job *)); if (!job) { job = old_job; free(j->command); goto malloc_failed; } max = new_max; } job[end] = j; end += 1; printf("add_job: job list %d long now\n", end); return 0; malloc_failed: perror("malloc"); fprintf(stderr, "add_job: malloc failed!\n"); free(j); /* 释放 NULL 是无害的 */ return -1; } int find_job(pid_t pgid) { int i; for (i = beg; i < end; i++) if (job[i] && job[i]->pgid == pgid) return i; return -1; } void delete_job(pid_t pgid) { int jid = find_job(pgid); if (jid >= beg) { free(job[jid]); job[jid] = NULL; } } void update_job(pid_t pgid, int state) { int jid = find_job(pgid); if (beg <= jid && jid < end) { job[jid]->state = state; printf("[%d] %d %s\n", jid, (int)pgid, job[jid]->command); } else { printf("%d: No such process\n", (int)pgid); } } pid_t get_pgid(int jid) { if (jid < end && job[jid]) return job[jid]->pgid; return -1; } void print_job(int i) { printf("[%d] %d", i, (int)job[i]->pgid); char *job_state; if (job[i]->state == JOB_RUNNING) job_state = "Running"; else if (job[i]->state == JOB_STOPPED) job_state = "Stopped"; else if (job[i]->state == JOB_DONE) job_state = "Done"; else job_state = "Unkown"; printf(" %s %s\n", job_state, job[i]->command); } void print_job_by_pgid(pid_t pgid) { int jid = find_job(pgid); print_job(jid); } void print_all_job(void) { int i; for (i = beg; i < end; i++) { if (!job[i]) continue; print_job(i); if (job[i]->state == JOB_DONE) delete_job(job[i]->pgid); } } void print_foreground(void) { if (job[0]->command) printf("%s\n", job[0]->command); } void set_foreground(pid_t pgid, char *command, int len) { job[0]->pgid = pgid; job[0]->state = JOB_RUNNING; job[0]->command = malloc(len); if (!job[0]->command) { perror("malloc"); exit(-1); } memmove(job[0]->command, command, len); } pid_t foreground_pgid(void) { return job[0]->pgid; } void get_foreground_command(char *command) { int len = strlen(job[0]->command) + 1; memmove(command, job[0]->command, len); } void move_to_background(pid_t pgid, int state, char *command, int len) { int jid = find_job(pgid); if (jid < 0) add_job(pgid, state, command, len); else update_job(pgid, state); } void move_to_foreground(pid_t pgid) { int jid = find_job(pgid); if (jid >= beg) *job[0] = *job[jid]; else printf("%d: No such process\n", (int)pgid); } ================================================ FILE: exercise/ex8-26/job.h ================================================ /* * 8.26 * * mofaph@gmail.com * 2013-7-14 */ #ifndef __job_h #define __job_h #include #define JOB_RUNNING 0 #define JOB_STOPPED 1 #define JOB_DONE 2 struct job { pid_t pgid; /* 进程组 ID */ int state; /* 作业状态 */ char *command; /* 运行作业的命令 */ }; extern void init_job(void); extern int add_job(pid_t pgid, int state, char *command, int len); extern void delete_job(pid_t pgid); extern int find_job(pid_t pgid); extern void free_job(void); extern void update_job(pid_t pgid, int state); extern pid_t get_pgid(int jid); extern void print_job(int i); extern void print_job_by_pgid(pid_t pgid); extern void print_all_job(void); extern pid_t foreground_pgid(void); extern void get_foreground_command(char *command); extern void print_foreground(void); extern void set_foreground(pid_t pgid, char *command, int len); extern void move_to_background(pid_t pgid, int state, char *command, int len); extern void move_to_foreground(pid_t pgid); #endif /* __job_h */ ================================================ FILE: exercise/ex8-26/random_fork.c ================================================ /* * p521 -- code/ecf/rfork.c * * 下面的代码包含了一个暴露竞争的简便技巧 * * 像 procmask2.c 那样的竞争难以发现,因为它们依赖于内核相关的调度决策。在一次 * fork() 调用之后,有些内核调度子进程先运行,而有些内核调度父进程先运行。如果你 * 要在后一种系统上运行 procmask1.c 的代码,它绝不会失败,无论你测试多少遍。但是 * 一旦在前一种系统上运行这段代码,那么竞争就会暴露出来,代码会失败。 * * 下面的代码是一个 fork() 的包装函数,它随机地决定父进程和子进程执行的顺序。父进 * 程和子进程扔一枚硬币来决定谁会休眠,因而给另一个进程被调度的机会。 * * 如果我们运行这个代码多次,那么我们就有极高的概率会测试到父子进程执行的两种顺序, * 无论这个特定内核的调度策略是什么样子的。 */ #include #include #include #include #include /* Sleep for a random period between [0, MAX_SLEEP] us. */ #define MAX_SLEEP 100000 /* Macro that maps val into the range [0, RAND_MAX] */ #define CONVERT(val) (((double)val)/(double)RAND_MAX) pid_t random_fork(void) { static struct timeval time; unsigned bool, secs; pid_t pid; /* Generate a different seed each time the function is called */ gettimeofday(&time, NULL); srand(time.tv_usec); /* Determine whether to sleep in parent of child and for how long */ bool = (unsigned)(CONVERT(rand()) + 0.5); secs = (unsigned)(CONVERT(rand()) * MAX_SLEEP); /* Call the real fork function */ if ((pid = fork()) < 0) return pid; /* Randomly decide to sleep in the parent or the child */ if (pid == 0) { /* Child */ if (bool) { usleep(secs); } } else { /* Parent */ if (!bool) { usleep(secs); } } /* Return the PID like a normal fork call */ return pid; } ================================================ FILE: exercise/ex8-26/shellex.c ================================================ /* * 8.26 * * mofaph@gmail.com * 2013-7-7 * * unix> make * unix> ./t */ #include #include #include #include #include #include #include #include #include "job.h" extern pid_t random_fork(void); #define MAXLINE 4096 #define MAXARGS 128 extern char **environ; void reap_child(int signo) { for (;;) { /* We don't care child's terminated status */ int status; int ret = waitpid(-1, &status, WNOHANG); if (ret == 0) { /* child didn't terminated, return immediately */ break; } else if (ret < 0 && errno == ECHILD) /* no child */ { break; } else if (ret > 0) { if (WIFSIGNALED(status)) printf("Job %d terminated by signal: %s\n", ret, strsignal(WTERMSIG(status))); delete_job(ret); } else { /* nothing */; } } } void wait_for_job(pid_t pgid) { int status; int ret = waitpid(-pgid, &status, WUNTRACED); if (ret == -1) { perror("waitpid"); return; } /* Child stopped or terminated */ if (WIFSTOPPED(status)) { printf("Job %d stopped by signal: %s\n", (int)pgid, strsignal(WSTOPSIG(status))); } else if (WIFSIGNALED(status)) { printf("Job %d terminated by signal: %s\n", (int)pgid, strsignal(WTERMSIG(status))); delete_job(pgid); } else { delete_job(pgid); } } /* If first arg is a builtin command, run it and return true */ int builtin_command(char **argv) { if (!strcmp(argv[0], "quit")) { exit(0); } else if (!strcmp(argv[0], "bg") || !strcmp(argv[0], "fg")) { int jid; pid_t pgid; char *p = argv[1]; if (*p == '%') { p += 1; jid = atoi(p); pgid = get_pgid(jid); if (pgid < 0) { fprintf(stderr, "%d: No such job\n", jid); return 1; } } else { pgid = atoi(p); } kill(-pgid, SIGCONT); if (!strcmp(argv[0], "fg")) { move_to_foreground(pgid); print_foreground(); wait_for_job(pgid); } else { update_job(pgid, JOB_RUNNING); } return 1; } else if (!strcmp(argv[0], "jobs")) { print_all_job(); return 1; } else if (!strcmp(argv[0], "&")) { /* Ignore singleton & */ return 1; } else { return 0; /* Not a builtin command */ } } /* parseline - Parse the command line and build the argv array */ int parseline(char *buf, char **argv) { char *delim; /* Points to first space delimiter */ int argc; /* Number of args */ int bg; /* Background job? */ buf[strlen(buf)-1] = ' '; /* Replace trailing '\n' with space */ while (*buf && (*buf == ' ')) /* Ignore leading spaces */ buf++; /* Build the argv list */ argc = 0; while ((delim = strchr(buf, ' '))) { argv[argc++] = buf; *delim = '\0'; buf = delim + 1; while (*buf && (*buf == ' ')) /* Ignore spaces */ buf++; } argv[argc] = NULL; if (argc == 0) /* Ignore blank line */ return 1; /* Should the job run in the background? */ if ((bg = (*argv[argc-1] == '&')) != 0) argv[--argc] = NULL; return bg; } void run_the_job(char *filename, char *argv[], char *env[]) { int ret = setpgid(0, 0); if (ret < 0) { fprintf(stderr, "Failed to set group process id\n"); exit(-1); } if (execve(argv[0], argv, env) < 0) { printf("%s: Command not found.\n", argv[0]); exit(0); } } /* eval - Evaluate a command line */ void eval(char *cmdline) { char buf[MAXLINE]; /* Holds modified command line */ strcpy(buf, cmdline); char *argv[MAXARGS]; /* Argument list execve() */ int bg = parseline(buf, argv); /* Should the job run in bg or fg? */ if (argv[0] == NULL) return; /* Ignore empty lines */ if (builtin_command(argv)) return; pid_t pid = random_fork(); if (pid < 0) { perror("random_fork"); exit(0); } /* Child runs user job */ if (pid == 0) run_the_job(argv[0], argv, environ); /* Parent waits for foreground job to terminate */ int ret = setpgid(pid, pid); if (ret < 0 && errno != EPERM && errno != EACCES) return; char buf2[MAXLINE]; strcpy(buf2, cmdline); int len2 = strlen(buf2); buf2[len2] = '\0'; if (!bg) { set_foreground(pid, buf2, len2); wait_for_job(pid); } else { printf("%d %s", pid, cmdline); pid_t ret = waitpid(pid, NULL, WNOHANG | WUNTRACED); if (ret == 0) move_to_background(pid, JOB_RUNNING, buf2, len2); else if (ret == pid) move_to_background(pid, JOB_STOPPED, buf2, len2); } } void stop_handler(int signo) { pid_t pgid = foreground_pgid(); char command[MAXLINE]; get_foreground_command(command); int len = strlen(command) + 1; kill(-(int)pgid, SIGSTOP); if (pgid > 0) move_to_background(pgid, JOB_STOPPED, command, len); fprintf(stderr, "move job %d to background\n", pgid); } void interrupt_handler(int signo) { pid_t pgid = foreground_pgid(); kill(-(int)pgid, SIGINT); } void install_signal_handler(void) { int ret; /* Install SIGTSTP handler */ struct sigaction new, old; new.sa_handler = stop_handler; sigemptyset(&new.sa_mask); new.sa_flags = SA_RESTART; ret = sigaction(SIGTSTP, &new, &old); if (ret < 0) { fprintf(stderr, "Warning: Failed to install SIGTSTP handler, " "Leave to default handler\n"); } /* Install SIGINT handler */ memset(&new, 0, sizeof(new)); new.sa_handler = interrupt_handler; sigemptyset(&new.sa_mask); new.sa_flags = SA_RESTART; ret = sigaction(SIGINT, &new, &old); if (ret < 0) { fprintf(stderr, "Warning: Failed to install SIGINT handler, " "Leave to default handler\n"); } /* Install SIGCHLD handler */ memset(&new, 0, sizeof(new)); new.sa_handler = reap_child; sigemptyset(&new.sa_mask); new.sa_flags = SA_RESTART; ret = sigaction(SIGCHLD, &new, &old); if (ret < 0) { fprintf(stderr, "Warning: Failed to install SIGCHLD handler, " "Leave to default handler"); } } int main(void) { init_job(); install_signal_handler(); for (;;) { /* Read */ printf("> "); char cmdline[MAXLINE]; /* Command line */ char *ret = fgets(cmdline, MAXLINE, stdin); if (ret == NULL) { if (ferror(stdin)) { perror("fgets"); free_job(); return -1; } } if (feof(stdin)) { free_job(); return 0; } /* Evaluate */ eval(cmdline); } } ================================================ FILE: exercise/ex8-26/t-job.c ================================================ /* * 8.26 * * mofaph@gmail.com * 2013-7-14 * * 测试 job.c * * unix> cc -Wall t-job.c job.c -o t-job */ #include #include #include "job.h" #define MAXLINE 1024 int main(void) { char buf[MAXLINE]; init_job(); pid_t pgid; int state; char command[MAXLINE]; do { printf("[A]dd/[D]elete/[P]rint/[Q]uit/[S]et_fg/[F]g/[B]g/[p]rint_fg?\n"); fgets(buf, sizeof(buf), stdin); switch (buf[0]) { case 'A': printf("pgid state command: "); fflush(stdin); scanf("%d %d %s", &pgid, &state, command); add_job(pgid, state, command, MAXLINE); printf("Add job %d to job list\n", (int)pgid); break; case 'D': print_all_job(); printf("JID? "); scanf("%d", &pgid); delete_job((int)pgid); break; case 'P': print_all_job(); break; case 'S': printf("pgid command: "); fflush(stdin); scanf("%d %s", &pgid, command); set_foreground(pgid, command, MAXLINE); break; case 'p': print_foreground(); break; case 'F': print_all_job(); printf("JID? "); scanf("%d", &pgid); move_to_foreground(pgid); break; case 'B': printf("pgid state command: "); fflush(stdin); scanf("%d %d %s", &pgid, &state, command); int len = strlen(command) + 1; move_to_background(pgid, state, command, len); break; default: break; } } while (buf[0] != 'Q'); free_job(); return 0; } ================================================ FILE: exercise/ex8-3.c ================================================ /* * 8.3 -- code/ecf/waitprob0.c * * 列出下面程序所有可能的输出序列 * * $ gcc -iquote../common ../common/csapp.c ex8-3.c -lpthread */ #include #include #include "csapp.h" int main(void) { if (Fork() == 0) { /* child */ printf("a"); } else { /* parent */ printf("b"); waitpid(-1, NULL, 0); } printf("c"); exit(0); } ================================================ FILE: exercise/ex8-4.c ================================================ /* * 8.4 -- code/ecf/waitprob1.c * * A. 这个程序会产生多少输出行? * B. 这些输出行的一种可能的顺序是什么? * * 个人解答在文件末尾 * * 编译运行: * * $ gcc -iquote../../common ex8-4.c ../../common/csapp.c -lpthread */ #include #include "csapp.h" int main() { int status; pid_t pid; printf("Hello\n"); pid = Fork(); printf("%d\n", !pid); if (pid != 0) if (waitpid(-1, &status, 0) > 0) if (WIFEXITED(status) != 0) printf("%d\n", WEXITSTATUS(status)); printf("Bye\n"); exit(2); } /* * A. 6 * * B. Hello/0/1/Bye/2/Bye * Hello/1/Bye/0/2/Bye * Hello/1/0/Bye/2/Bye */ ================================================ FILE: exercise/ex8-5.c ================================================ /* * 8.5 * * 编写一个 sleep 的包装函数,叫做 snooze,带有下面的接口 * * unsigned int snooze(unsigned int secs); * * 除了 snooze 函数会打印出一条信息来描述进程实际休眠了多长时间外,它和 sleep 函 * 数的行为完全一样: * * Slept for 4 of 5 secs. */ #include #include unsigned int snooze(unsigned int secs) { unsigned sleep_secs; sleep_secs = sleep(secs); printf("Slept for %u of %u secs\n", secs - sleep_secs, secs); return sleep_secs; } int main(void) { snooze(3); return 0; } ================================================ FILE: exercise/ex8-6.c ================================================ /* * 8.6 -- p502 * * 编写一个叫做 myecho 的程序,它打印出它的命令行参数和环境变量 * * unix> cc ex8-6.c -o myecho */ #include int main(int argc, char *argv[]) { int i; printf("Command line arguments:\n"); for (i = 0; i < argc; i++) printf("\targv[%d]: %s\n", i, argv[i]); printf("\n"); printf("Environment variables:\n"); extern char **environ; for (i = 0; environ[i] != NULL; i++) printf("\tenvp[%2d]: %s\n", i, environ[i]); return 0; } ================================================ FILE: exercise/ex8-7.c ================================================ /* * 练习题 8.7 编写一个叫做 snooze 的程序 * * 有一个命令行参数,用这个参数调用练习题中的 snooze 函数,然后终止。 * 编写程序,使得用户可以通过在键盘上输入 ctrl-c 中断 snooze 函数。 * * unix> cc ex8-7.c * unix> ./a.out 5 * Slept for 3 of 5 secs. User hits ctrl-c after 3 seconds */ #include #include #include #include #include #include void handler(int sig) { return; } unsigned int snooze(unsigned int secs) { unsigned sleep_secs, reserve_secs; reserve_secs = sleep(secs); sleep_secs = secs - reserve_secs; printf("Slept for %u of %u secs\t\t" "User hits ctrl-c after %u seconds\n", sleep_secs, secs, sleep_secs); return reserve_secs; } int main(int argc, char *argv[]) { unsigned secs; if (argc != 2) { fprintf(stderr, "usage: %s seconds\n", argv[0]); exit(1); } if (signal(SIGINT, handler) == SIG_ERR) { fprintf(stderr, "signal error: %s\n", strerror(errno)); exit(0); } secs = atoi(argv[1]); snooze((unsigned)secs); return 0; } ================================================ FILE: exercise/ex8-8.c ================================================ /* * 练习题 8.8 下面这个程序的输出是什么? * * 这个程序打印“213”,这是卡尔基梅隆大学 CS:APP 课程的缩写名 */ #include /* printf() fflush() */ #include /* exit() */ #include /* pid_t */ #include /* waitpid() */ #include /* kill() signal() */ #include /* fork() */ pid_t pid; int counter = 2; void handler1(int sig) { counter = counter - 1; printf("%d", counter); fflush(stdout); exit(0); } int main() { signal(SIGUSR1, handler1); printf("%d", counter); fflush(stdout); if ((pid = fork()) == 0) { /* child */ while (1) {}; } kill(pid, SIGUSR1); waitpid(-1, NULL, 0); counter = counter + 1; printf("%d", counter); exit(0); } ================================================ FILE: exercise/ex8-9.txt ================================================ 8.9 mofaph@gmail.com -------------------------------------------------------------------------------- |------+----------+----------| | 进程 | 开始时间 | 结束时间 | |------+----------+----------| | A | 5 | 7 | | B | 2 | 4 | | C | 3 | 6 | | D | 1 | 8 | |------+----------+----------| 对于每对进程,指明它们是否是并发地运行的: |--------+----------| | 进程对 | 并发地? | |--------+----------| | AB | N | |--------+----------| | AC | Y | |--------+----------| | AD | Y | |--------+----------| | BC | Y | |--------+----------| | BD | Y | |--------+----------| | CD | Y | |--------+----------| ================================================ FILE: exercise/ex9-14.c ================================================ /* * 9.14 * * 假设有一个输入文件 hello.txt,由字符串“Hello, world!\n”组成,编写一个 C 程序, * 使用 mmap 将 hello.txt 的内容改为“Jello, world!\n”。 * * unix> gcc -Wall ex9-14.c t9-14.c * * mofaph@gmail.com */ #include #include #include #include #include int modify_file_using_mmap(int fd) { struct stat st; int ret = fstat(fd, &st); if (ret) { perror("fstat"); return -1; } void *map_addr = mmap(NULL, st.st_size, PROT_WRITE, MAP_SHARED, fd, 0); if (map_addr == MAP_FAILED) { perror("mmap"); return -1; } char *buf = map_addr; *buf = 'J'; return 0; } ================================================ FILE: exercise/ex9-17.c ================================================ /* * 9.17 * * mofaph@gmail.com * * unix> gcc -I../common ex9-17.c t9-17.c ../sample/ch09/memlib.c ../common/csapp.c -lpthread * * 开发 9.9.12 节中的分配器的一个版本,执行下一次适配,而不是首次适配 */ #include /* Basic constants and macros */ #define WSIZE 4 /* Word and header/footer size (bytes) */ #define DSIZE 8 /* Double word size (bytes) */ #define CHUNKSIZE (1<<12) /* Extend heap by this amount (bytes) */ #define MAX(x, y) ((x) > (y) ? (x) : (y)) /* Pack a size and allocated bit into a word */ #define PACK(size, alloc) ((size) | (alloc)) /* Read and write a word at address p */ #define GET(p) (*(unsigned int *)(p)) #define PUT(p, val) (*(unsigned int *)(p) = (val)) /* Read the size and allocated fields from address p */ #define GET_SIZE(p) (GET(p) & ~0x7) #define GET_ALLOC(p) (GET(p) & 0x1) /* Given block ptr bp, compute address of its header and footer */ #define HDRP(bp) ((char *)(bp) - WSIZE) #define FTRP(bp) ((char *)(bp) + GET_SIZE(HDRP(bp)) - DSIZE) /* Given block ptr bp, compute address of next and previous blocks */ #define NEXT_BLKP(bp) ((char *)(bp) + GET_SIZE(((char *)(bp) - WSIZE))) #define PREV_BLKP(bp) ((char *)(bp) - GET_SIZE(((char *)(bp) - DSIZE))) static char *heap_listp; static char *fit; /* next free block */ static void *extend_heap(size_t words); static void *coalesce(void *bp); static void *find_fit(size_t asize); static void place(void *bp, size_t asize); /* Extern functions from memlib.c */ extern void mem_init(void); extern void *mem_sbrk(int incr); int mm_init(void) { /* Create the initial empty heap */ if ((heap_listp = mem_sbrk(4*WSIZE)) == (void *)-1) return -1; PUT(heap_listp, 0); /* Alignment padding */ PUT(heap_listp + (1*WSIZE), PACK(DSIZE, 1)); /* Prologue header */ PUT(heap_listp + (2*WSIZE), PACK(DSIZE, 1)); /* Prologue footer */ PUT(heap_listp + (3*WSIZE), PACK(0, 1)); /* Epilogue header */ heap_listp += (2*WSIZE); fit = heap_listp; /* Extend the empty heap with a free block of CHUNKSIZE bytes */ if (extend_heap(CHUNKSIZE/WSIZE) == NULL) return -1; return 0; } static void *extend_heap(size_t words) { char *bp; size_t size; /* Allocate an even number of words to maintain aligment */ size = (words % 2) ? (words+1) * WSIZE : words * WSIZE; if ((long)(bp = mem_sbrk(size)) == -1) return NULL; /* Initialize free block header/footer and the epilogue header */ PUT(HDRP(bp), PACK(size, 0)); /* Free block header */ PUT(FTRP(bp), PACK(size, 0)); /* Free block footer */ PUT(HDRP(NEXT_BLKP(bp)), PACK(0, 1)); /* New epilogue header */ /* Coalesce if the previous block was free */ return coalesce(bp); } void mm_free(void *bp) { size_t size = GET_SIZE(HDRP(bp)); PUT(HDRP(bp), PACK(size, 0)); PUT(FTRP(bp), PACK(size, 0)); coalesce(bp); } static void *coalesce(void *bp) { size_t prev_alloc = GET_ALLOC(FTRP(PREV_BLKP(bp))); size_t next_alloc = GET_ALLOC(HDRP(NEXT_BLKP(bp))); size_t size = GET_SIZE(HDRP(bp)); if (prev_alloc && next_alloc) /* Case 1 */ return bp; else if (prev_alloc && !next_alloc) { /* Case 2 */ size += GET_SIZE(HDRP(NEXT_BLKP(bp))); PUT(HDRP(bp), PACK(size, 0)); PUT(FTRP(bp), PACK(size, 0)); } else if (!prev_alloc && next_alloc) { /* Case 3 */ size += GET_SIZE(HDRP(PREV_BLKP(bp))); PUT(FTRP(bp), PACK(size, 0)); PUT(HDRP(PREV_BLKP(bp)), PACK(size, 0)); bp = PREV_BLKP(bp); } else { /* Case 4 */ size += GET_SIZE(HDRP(PREV_BLKP(bp))) + GET_SIZE(FTRP(NEXT_BLKP(bp))); PUT(HDRP(PREV_BLKP(bp)), PACK(size, 0)); PUT(FTRP(NEXT_BLKP(bp)), PACK(size, 0)); bp = PREV_BLKP(bp); } return bp; } void *mm_malloc(size_t size) { size_t asize; /* Adjusted block size */ size_t extendsize; /* Amount to extend heap if no fit */ char *bp; /* Ignore spurious requests */ if (size == 0) return NULL; /* Adjust block size to include overhead and alignment reqs. */ if (size <= DSIZE) asize = 2*DSIZE; else asize = DSIZE * ((size + (DSIZE) + (DSIZE-1)) / DSIZE); /* Search the free list for a fit */ if ((bp = find_fit(asize)) != NULL) { place(bp, asize); return bp; } /* No fit found. Get more memory and place the block */ extendsize = MAX(asize,CHUNKSIZE); if ((bp = extend_heap(extendsize/WSIZE)) == NULL) return NULL; place(bp, asize); return bp; } /* * 9.8 */ static void *find_fit(size_t asize) { /* Next fit search */ void *bp; for (bp = fit; GET_SIZE(HDRP(bp)) > 0; bp = NEXT_BLKP(bp)) if (!GET_ALLOC(HDRP(bp)) && (asize <= GET_SIZE(HDRP(bp)))) goto found; for (bp = heap_listp; bp != fit; bp = NEXT_BLKP(bp)) if (!GET_ALLOC(HDRP(bp)) && (asize <= GET_SIZE(HDRP(bp)))) goto found; return NULL; /* No fit */ found: fit = NEXT_BLKP(bp); return bp; } /* * 9.9 */ static void place(void *bp, size_t asize) { size_t csize = GET_SIZE(HDRP(bp)); if ((csize - asize) >= (2*DSIZE)) { PUT(HDRP(bp), PACK(asize, 1)); PUT(FTRP(bp), PACK(asize, 1)); bp = NEXT_BLKP(bp); fit = bp; PUT(HDRP(bp), PACK(csize-asize, 0)); PUT(HDRP(bp), PACK(csize-asize, 0)); } else { PUT(HDRP(bp), PACK(csize, 1)); PUT(FTRP(bp), PACK(csize, 1)); } } ================================================ FILE: exercise/ex9-18.c ================================================ /* * 9.18 * * mofaph@gmail.com * * 在书本的 569 页的最后一段,作者提到了一种非常聪明的边界标记的优化方法,能够使 * 得在已分配块中不再需要脚部。 * * 这个程序是对边界优化的一种尝试。以 8 个字节对齐,最小块也是 8 个字节。因此,可 * 以使用最低的一位表示当前的块是否已分配,使用最低的第二位表示前一个块是否已分配。 * * 这个程序其实就是书本的家庭作业 9.18,题目是: * * 9.9.12 节中的分配器要求每个块既有头部也有脚部,以实现常数时间的合并。修改分配 * 器,使得空闲块需要头部和脚部,而已分配块只需要头部。 * * unix> gcc -I../common ../common/csapp.c ../sample/ch09/memlib.c ex9-18.c t9-17.c -lpthread */ #include /* Basic constants and macros */ #define WSIZE 4 /* Word and header/footer size (bytes) */ #define DSIZE 8 /* Double word size (bytes) */ #define CHUNKSIZE (1<<12) /* Extend heap by this amount (bytes) */ #define MAX(x, y) ((x) > (y) ? (x) : (y)) /* Pack a size and allocated bit into a word */ #define PACK(size, alloc) ((size) | (alloc)) /* Read and write a word at address p */ #define GET(p) (*(unsigned int *)(p)) #define PUT(p, val) (*(unsigned int *)(p) = (val)) /* Read the size and allocated fields from address p */ #define GET_SIZE(p) (GET(p) & ~0x7) #define GET_ALLOC(p) (GET(p) & 0x1) #define GET_ALLOC_PREV(p) (GET(p) & 0x2) /* Given block ptr bp, compute address of its header and footer */ #define HDRP(bp) ((char *)(bp) - WSIZE) #define FTRP(bp) ((char *)(bp) + GET_SIZE(HDRP(bp)) - DSIZE) /* Given block ptr bp, compute address of next and previous blocks */ #define NEXT_BLKP(bp) ((char *)(bp) + GET_SIZE(((char *)(bp) - WSIZE))) #define PREV_BLKP(bp) ((char *)(bp) - GET_SIZE(((char *)(bp) - DSIZE))) static char *heap_listp; static void *extend_heap(size_t words); static void *coalesce(void *bp); static void *find_fit(size_t asize); static void place(void *bp, size_t asize); /* Extern functions from memlib.c */ extern void mem_init(void); extern void *mem_sbrk(int incr); int mm_init(void) { /* Create the initial empty heap */ heap_listp = mem_sbrk(2*WSIZE); if (heap_listp == (void *)-1) return -1; PUT(heap_listp + (0*WSIZE), PACK(WSIZE, 1)); /* Prologue header */ PUT(heap_listp + (1*WSIZE), PACK(0, 3)); /* Epilogue header */ heap_listp += WSIZE; /* Extend the empty heap with a free block of CHUNKSIZE bytes */ if (extend_heap(CHUNKSIZE/WSIZE) == NULL) return -1; return 0; } static void *extend_heap(size_t words) { char *bp; size_t size; /* Allocate an even number of words to maintain aligment */ size = (words % 2) ? (words+1) * WSIZE : words * WSIZE; bp = mem_sbrk(size); if ((long)bp == -1) return NULL; /* Initialize free block header/footer and the epilogue header */ int prev_alloc = GET_ALLOC_PREV(HDRP(bp)); PUT(HDRP(bp), PACK(size, prev_alloc)); /* Free block header */ PUT(FTRP(bp), PACK(size, prev_alloc)); /* Free block footer */ PUT(HDRP(NEXT_BLKP(bp)), PACK(0, 1)); /* New epilogue header */ /* Coalesce if the previous block was free */ return coalesce(bp); } void mm_free(void *bp) { size_t size; int prev_alloc = GET_ALLOC_PREV(HDRP(bp)); size = GET_SIZE(HDRP(bp)); PUT(HDRP(bp), PACK(size, prev_alloc)); PUT(FTRP(bp), PACK(size, prev_alloc)); char *next_block = NEXT_BLKP(bp); int next_alloc = GET_ALLOC(HDRP(next_block)); size = GET_SIZE(HDRP(next_block)); PUT(HDRP(next_block), PACK(size, next_alloc)); if (!next_alloc) PUT(FTRP(next_block), PACK(size, next_alloc)); coalesce(bp); } static void *coalesce(void *bp) { size_t prev_alloc = GET_ALLOC_PREV(HDRP(bp)); size_t next_alloc = GET_ALLOC(HDRP(NEXT_BLKP(bp))); size_t size = GET_SIZE(HDRP(bp)); char *next_block; size_t next_size; if (prev_alloc && next_alloc) /* Case 1 */ return bp; else if (prev_alloc && !next_alloc) { /* Case 2 */ size += GET_SIZE(HDRP(NEXT_BLKP(bp))); PUT(HDRP(bp), PACK(size, prev_alloc)); PUT(FTRP(bp), PACK(size, prev_alloc)); } else if (!prev_alloc && next_alloc) { /* Case 3 */ bp = PREV_BLKP(bp); size += GET_SIZE(HDRP(bp)); prev_alloc = GET_ALLOC_PREV(HDRP(bp)); PUT(HDRP(bp), PACK(size, prev_alloc)); PUT(FTRP(bp), PACK(size, prev_alloc)); } else { /* Case 4 */ size += GET_SIZE(HDRP(PREV_BLKP(bp))) + GET_SIZE(HDRP(NEXT_BLKP(bp))); bp = PREV_BLKP(bp); prev_alloc = GET_ALLOC_PREV(HDRP(bp)); PUT(HDRP(bp), PACK(size, prev_alloc)); PUT(FTRP(bp), PACK(size, prev_alloc)); } return bp; } void *mm_malloc(size_t size) { size_t asize; /* Adjusted block size */ size_t extendsize; /* Amount to extend heap if no fit */ char *bp; /* Ignore spurious requests */ if (size == 0) return NULL; /* Adjust block size to include overhead and alignment reqs. */ asize = DSIZE * ((size + (WSIZE) + (DSIZE-1)) / DSIZE); /* Search the free list for a fit */ bp = find_fit(asize); /* No fit found. Get more memory and place the block */ if (bp == NULL) { extendsize = MAX(asize, CHUNKSIZE); bp = extend_heap(extendsize/WSIZE); if (bp == NULL) return NULL; } /* Here, we found a fit. Just place info into it. */ place(bp, asize); return bp; } /* * 9.8 */ static void *find_fit(size_t asize) { /* First fit search */ void *bp; for (bp = heap_listp; GET_SIZE(HDRP(bp)) > 0; bp = NEXT_BLKP(bp)) { if (!GET_ALLOC(HDRP(bp)) && (asize <= GET_SIZE(HDRP(bp)))) { return bp; } } return NULL; /* No fit */ } /* * place: 分割空闲块 * * 只有当剩余部分的大小等于或者超出最小块的大小时,才进行分割 */ static void place(void *bp, size_t size) { size_t block_size = GET_SIZE(HDRP(bp)); size_t reserve_size = block_size - size; int prev_alloc = GET_ALLOC_PREV(HDRP(bp)); char *next_block; size_t next_block_size; int next_alloc; if (reserve_size < DSIZE) { PUT(HDRP(bp), PACK(block_size, prev_alloc | 1)); next_block = NEXT_BLKP(bp); next_block_size = GET_SIZE(HDRP(next_block)); next_alloc = GET_ALLOC(HDRP(next_block)); PUT(HDRP(next_block), PACK(next_block_size, next_alloc | 0x2)); } /* 有足够的空间分割空闲块 */ else if (DSIZE <= reserve_size && reserve_size < block_size) { PUT(HDRP(bp), PACK(size, prev_alloc | 1)); next_block = NEXT_BLKP(bp); next_alloc = GET_ALLOC(HDRP(next_block)); PUT(HDRP(next_block), PACK(reserve_size, next_alloc | 0x2)); PUT(FTRP(next_block), PACK(reserve_size, next_alloc | 0x2)); } } ================================================ FILE: exercise/ex9-20/Makefile ================================================ # Makefile for ex9-20 # mofaph@gmail.com # 2013-6-30 CC = gcc CFLAGS = -Wall OBJS += ex9-20.o OBJS += t9-20.o OBJS += t-malloc.o PHONY += clean PHONY += all PHONY += TAGS PHONY += ex9-20 PHONY += tm .PHONY: $(PHONY) all: ex9-20 tm TAGS: rm -f TAGS find . \( -type d -name .git -prune \) -o \( -type f -name "*.[ch]" \) | xargs etags - ex9-20: ex9-20.o t9-20.o $(CC) $(CFLAGS) $^ -o $@ tm: t-malloc.o $(CC) $(CFLAGS) $^ -o $@ clean: rm -f $(OBJS) ex9-20 tm TAGS ================================================ FILE: exercise/ex9-20/ex9-20.c ================================================ /* * 9.20 * * mofaph@gmail.com * 2013-6-28 * * 编写你自己的 malloc 和 free 版本,将它的运行时间和空间利用率与标准 C 库提供的 * malloc 版本进行比较。 * * 对于 32 位系统,最小块是 8 字节;对于 64 位系统,最小块是 16 字节。都是 8 字节 * 对齐。 * * 这个内存分配器的设计思路大致是这样的:空闲块的管理放在可用内存的末尾,记录空闲 * 块的信息使用专门的数据结构进行管理,这个数据结构记录了空闲块的地址和大小,它放 * 在堆的前面(堆在高地址方向)。已分配内存的信息则在已分配块的头部写入这个块的大 * 小,这样可以在释放的时候得到它的大小,方便插入到空闲块中。整个分配器在内存中的 * 布局基本是可用内存和记录空闲块的数据结构。这两部分都位于操作系统维护的堆中。 * * 当向这个分配器请求一个内存块时,分配器将会执行首次适配算法,返回第一个找到的内 * 存块。值得注意的是,这时内存中的空闲块数据结构是按升序排列的。当找到这样的一个 * 内存块时,可能需要分割这个内存块(只有分割后空闲块的大小大于最下块时才分割), * 然后将分割后的空闲块放到合适的位置。如果不需要分割,则从空闲块结构中中删除这个 * 空闲块。 * * 当分配器没有更多的可用内存时,它首先会尝试合并空闲块。具体的实现是,使用快速排 * 序先将空闲块按地址升序排序,然后查看空闲块的地址和它的大小相加是否等于下一个空 * 闲块的地址,如果相等则合并,不相等则不用合并。当合并完成之后,再使用快速排序对 * 空闲块排序,这一次按照大小升序排序。 * * 如果合并之后,仍然没有合适的空闲块,这时就向操作系统申请更多的堆空间。然后申请 * 成功,则需要将空闲块的数据结构移动到新的位置。 * * 当分配器释放一个空闲块时,它从已分配块的头部中得到块的大小,然后将这个释放的块 * 插入到空闲块列表中。 * * 这个分配器与标准 C 库提供的 malloc/free 的效率相差极大,标准库比这个分配器快了 * 大约 100 倍。 * * unix> cc -Wall ex9-20.c t9-20.c -o ex9-20 * unix> ./ex9-20 */ #include #include #include #include #include #define CHUNK (1<<20) /* 默认向系统请求 1MB 的堆空间 */ #define BLOCK (1<<12) /* 记录空闲块的数据结构 */ #define ALIGN (1<<3) /* 8 字节对齐 */ struct free_block { long size; /* 空闲块的大小 */ char *addr; /* 空闲块的地址 */ }; #define header_size (offsetof(struct free_block, addr)) #define free_block_size(n) ((n) * sizeof(struct free_block)) #define MAX(m, n) ((m) < (n) ? (n) : (m)) static struct free_block *start, *end; static int min_block(void) { int header = header_size; if (header == ALIGN) return ALIGN << 1; if (header < ALIGN) return ALIGN; return (header + 7) & ~7; } static void swap(struct free_block *b1, struct free_block *b2) { struct free_block tmp; tmp = *b1; *b1 = *b2; *b2 = tmp; } #define SORT_BY_ADDRESS 001 #define SORT_BY_SIZE 002 static void block_quick_sort(struct free_block *start, struct free_block *end, int type) { if (start >= end) return; int k = (end - start) >> 1; swap(start, start+k); struct free_block *last = start; int i; for (i = 1; start+i < end; i++) { struct free_block *block = start+i; if ((type == SORT_BY_ADDRESS && block->addr < start->addr) || (type == SORT_BY_SIZE && block->size < start->size)) { last += 1; swap(last, block); } } swap(start, last); block_quick_sort(start, last, type); block_quick_sort(last+1, end, type); } /* bs: block start, be: block end */ static long block_binary_search(struct free_block *bs, struct free_block *be, long size) { long first = 0; long last = be - bs; long mid = 0; while (last > first) { mid = (last + first) >> 1; struct free_block *ptr = bs + mid; if (ptr->size < size) { first = mid + 1; mid += 1; } else if (ptr->size > size) { last = mid; } else /* ptr->size == size */ { return mid; } } return -mid; } /* * 在这里,空闲块集合中是有序的,按块大小的升序排列(从小到大) */ static int insert_free_block(struct free_block *block) { static int max_nblocks = BLOCK / sizeof(struct free_block); int block_nr = BLOCK / sizeof(struct free_block); long index = block_binary_search(start, end, block->size); if (index < 0) index = -index; int all = end - start + 1; if (all > max_nblocks) return -1; if (all == max_nblocks) { void *old_heap = sbrk(BLOCK); if (old_heap == (void *)-1) return -1; max_nblocks += block_nr; } int nblocks = all - index; size_t size = free_block_size(nblocks); memmove(start+index+1, start+index, size); *(start+index) = *block; end += 1; return 0; } static void remove_free_block(struct free_block *block) { long index = block_binary_search(start, end, block->size); if (index <= 0 || start + index == end) return; int nblocks = end - start - index; size_t size = free_block_size(nblocks); memmove(start+index, start+index+1, size); end -= 1; } /* * 这个函数用于分割块之后,把余下的空闲块放到合适的位置 */ static void adjust_free_block(struct free_block *block) { long index = block_binary_search(start, block, block->size); if (index < 0) index = -index; if (start + index != block) { struct free_block tmp = *block; int nblocks = block - start - index; size_t size = free_block_size(nblocks); memmove(start+index+1, start+index, size); *(start+index) = tmp; } } void coalesce(void) { if (start + 2 >= end) return; block_quick_sort(start, end, SORT_BY_ADDRESS); struct free_block *curr, *next; curr = start + 1; next = curr + 1; while (next != end) { if (curr->addr + curr->size == next->addr) { curr->size += next->size; int nblocks = end - next; long size = free_block_size(nblocks); memmove(next, next+1, size); end -= 1; } else { curr += 1; next = curr + 1; } } block_quick_sort(start, end, SORT_BY_SIZE); } /* 首次适配 */ struct free_block *find_fit(size_t size) { int len = end - start; int i; for (i = 1; i < len; i++) { struct free_block *bp = start + i; if (bp->size >= size) return bp; } return NULL; } void place(struct free_block *bp, long asize) { long *header = (long *)bp->addr; long reserve = bp->size - asize; long min = min_block(); if (reserve >= min) { *header = asize; bp->addr += asize; bp->size -= asize; adjust_free_block(bp); } else { *header = bp->size; remove_free_block(bp); } } int mo_init(void) { char *orig = sbrk(0); char *align = (char *)(((unsigned long)orig + 7) & ~7); int unused = align - orig; void *addr = sbrk(CHUNK+BLOCK+unused); if (addr == (void *)-1) { /* 当失败时,sbrk 已经设置了 errno */ return -1; } struct free_block padding = { 0, NULL }; start = (struct free_block *)((char *)addr + unused + CHUNK); *start = padding; end = start + 2; *end = padding; struct free_block *ptr = start + 1; int align_padding = min_block() == 8 ? 4 : 0; ptr->addr = addr + unused + align_padding; ptr->size = CHUNK - align_padding; return 0; } void *mo_malloc(size_t size) { size_t asize = size + offsetof(struct free_block, addr); asize = ((asize+ALIGN-1) / ALIGN) * ALIGN; /* 字节对齐 */ struct free_block *fit; try_again: fit = find_fit(asize); if (fit == NULL) { coalesce(); fit = find_fit(asize); if (fit == NULL) { char *old_start = (char *)start; size_t alloc_size = MAX(asize, CHUNK); struct free_block block; block.addr = old_start; block.size = alloc_size; int is_ok = insert_free_block(&block); if (is_ok == -1) return NULL; void *old_heap = sbrk(alloc_size); if (old_heap == (void *)-1) { int save_errno = errno; remove_free_block(&block); errno = save_errno; return NULL; } int nblocks = end - start + 1; long block_size = free_block_size(nblocks); memmove((char *)start+alloc_size, (char *)start, block_size); start = (struct free_block *)((char *)start + alloc_size); end = (struct free_block *)((char *)end + alloc_size); goto try_again; } } /* * 找到一个空闲块,在空闲块头部填入大小,可能会分割空闲块。因此,就有可能 * 改变 fit 所指结构的内容,所以在调用 place() 之前,需要保存空闲块的有效 * 地址 */ char *addr = fit->addr + sizeof(fit->size); place(fit, asize); return addr; } void mo_free(void *ptr) { int offset = offsetof(struct free_block, addr); char *addr = (char *)ptr - offset; long size = *((long *)addr); struct free_block block; block.size = size; block.addr = addr; insert_free_block(&block); } ================================================ FILE: exercise/ex9-20/t-block-operate.c ================================================ #include #include struct free_block { char *addr; /* 空闲块的地址 */ long size; /* 空闲块的大小 */ }; #define free_block_size(n) ((n) * sizeof(struct free_block)) static struct free_block *start, *end; static long block_binary_search(long size) { long first = 0; long last = end - start; long mid = 0; while (last > first) { mid = (last + first) >> 1; struct free_block *ptr = start + mid; if (ptr->size < size) { first = mid + 1; mid += 1; } else if (ptr->size > size) { last = mid; } else /* ptr->size == size */ { return mid; } } return -mid; } /* * 在这里,空闲块集合中是有序的,按块大小的升序排列(从小到大) */ static void insert_free_block(struct free_block *block) { long index = block_binary_search(block->size); if (index < 0) index = -index; end += 1; int nblocks = end - start - index; size_t size = free_block_size(nblocks); memmove(start+index+1, start+index, size); *(start+index) = *block; } static void remove_free_block(struct free_block *block) { long index = block_binary_search(block->size); if (index < 0) return; int nblocks = end - start - index; size_t size = free_block_size(nblocks); memmove(start+index, start+index+1, size); end -= 1; } /* * 这个函数用于分割块之后,把余下的空闲块放到合适的位置 */ static void adjust_free_block(struct free_block *block) { long index = block_binary_search(block->size); if (index < 0) index = -index; struct free_block tmp = *block; int nblocks = block - start - index; size_t size = free_block_size(nblocks); memmove(start+index+1, start+index, size); *(start+index) = tmp; } #define M 5 #define N 10 static struct free_block block[N]; static void init_blocks(void) { int i; for (i = 0; i < M; i++) { block[i].addr = (char *)0x100 + i; block[i].size = 100 + i; } for (i = M; i < N; i++) { block[i].addr = (char *)0; block[i].size = 300 + i; } start = &block[0]; end = &block[M]; struct free_block padding = { NULL, 0 }; *start = padding; *end = padding; } void print_blocks(void) { int i; for (i = 0; i < N; i++) { printf("%02d: %p, %ld\n", i, block[i].addr, block[i].size); } printf("\n"); } int main(void) { init_blocks(); print_blocks(); char c[10]; struct free_block b; do { printf("Your choice: "); scanf("%s", c); if (c[0] == 'i') { printf("[addr size]: "); scanf("%p %ld", &b.addr, &b.size); insert_free_block(&b); print_blocks(); } else if (c[0] == 'd') { printf("[addr size]: "); scanf("%p %ld", &b.addr, &b.size); remove_free_block(&b); print_blocks(); } else if (c[0] == 'p') { print_blocks(); } else if (c[0] == 'a') { printf("[oldsize newsize]: "); long oldsize, newsize; scanf("%ld %ld", &oldsize, &newsize); int index = block_binary_search(oldsize); block[index].size = newsize; adjust_free_block(&block[index]); print_blocks(); } } while (c[0] != 'q'); return 0; } ================================================ FILE: exercise/ex9-20/t-block-quick-sort.c ================================================ #include struct free_block { char *addr; /* 空闲块的地址 */ long size; /* 空闲块的大小 */ }; static struct free_block *start, *end; static void swap(struct free_block *b1, struct free_block *b2) { struct free_block tmp; tmp = *b1; *b1 = *b2; *b2 = tmp; } #define SORT_BY_ADDRESS 001 #define SORT_BY_SIZE 002 static void block_quick_sort(struct free_block *start, struct free_block *end, int type) { if (start >= end) return; int k = (end - start) >> 1; swap(start, start+k); struct free_block *last = start; int i; for (i = 1; start+i < end; i++) { struct free_block *block = start+i; if ((type == SORT_BY_ADDRESS && block->addr < start->addr) || (type == SORT_BY_SIZE && block->size < start->size)) { last += 1; swap(last, block); } } swap(start, last); block_quick_sort(start, last, type); block_quick_sort(last+1, end, type); } #define N 15 static struct free_block block[N]; static void init_blocks(void) { int i; for (i = 0; i < N; i++) { block[i].addr = (char *)0x100 + random() % 100; block[i].size = 100 + random() % 100; } start = &block[0]; end = &block[N-1]; } void print_blocks(void) { int i; for (i = 0; i < N; i++) { printf("%02d: %p, %ld\n", i, block[i].addr, block[i].size); } printf("\n"); } int main(void) { init_blocks(); print_blocks(); block_quick_sort(start, end+1, SORT_BY_ADDRESS); print_blocks(); return 0; } ================================================ FILE: exercise/ex9-20/t-malloc.c ================================================ #include #ifndef MAX_ITEM # define MAX_ITEM (1<<17) #endif int main(void) { int i; char *buf[MAX_ITEM]; for (i = 1; i < MAX_ITEM; i++) { buf[i] = malloc(i); } for (i = 1; i < MAX_ITEM; i++) { free(buf[i]); } return 0; } ================================================ FILE: exercise/ex9-20/t2.c ================================================ #include #include int main(void) { struct free_block { char *addr; long size; }; struct free_block *s, *e; long s1 = 0x8040a0a0; long s2 = s1 + 9*sizeof(struct free_block); s = (struct free_block *)s1; e = (struct free_block *)s2; printf("e: %p, s: %p, e-s: %d\n", e, s, e-s); return 0; } ================================================ FILE: exercise/ex9-20/t3.c ================================================ #include int main(void) { char s[10]; do { printf("Input: "); scanf("%s", s); printf("Output: %s\n", s); } while (s[0] != 'q'); return 0; } ================================================ FILE: exercise/ex9-20/t9-20.c ================================================ /* * 9.20 -- test driver * * mofaph@gmail.com * 2013-6-28 * * unix> cc -Wall ex9-20.c t9-20.c -o ex9-20 * unix> ./ex9-20 */ #include extern int mo_init(void); extern void mo_free(void *bp); extern void *mo_malloc(size_t size); #ifndef MAX_ITEM # define MAX_ITEM (1<<17) #endif int main(void) { if (mo_init() < 0) return -1; int i; char *buf[MAX_ITEM]; for (i = 1; i < MAX_ITEM; i++) { buf[i] = mo_malloc(i); } for (i = 1; i < MAX_ITEM; i++) { mo_free(buf[i]); } return 0; } ================================================ FILE: exercise/ex9-5.c ================================================ /* * 9.5 * * 使用 mmap() 将一个任意大小的磁盘文件拷贝到 stdout。 * 输入文件的名字必须作为一个命令行参数来传递。 * * unix> cc ex9-5.c -o mmapcopy * * #include * #include * * void *mmap(void *start, size_t length, int prot, int flags, * int fd, off_t offset); */ #include #include #include #include #include #include #include int main(int argc, char *argv[]) { int n, fd; void *map; struct stat statbuf; if (argc != 2) { fprintf(stderr, "usage: %s \n", argv[0]); exit(1); } fd = open(argv[1], O_RDONLY); if (fd < 0) { perror("open"); exit(1); } if (fstat(fd, &statbuf) != 0) { perror("fstat"); exit(1); } map = mmap(NULL, statbuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0); if (map == MAP_FAILED) { perror("mmap"); exit(1); } if (close(fd) < 0) { perror("close"); exit(1); } n = write(STDOUT_FILENO, map, statbuf.st_size); if (n != statbuf.st_size) { perror("write"); exit(1); } return 0; } ================================================ FILE: exercise/ex9-8.c ================================================ /* * 9.8 * * 为 9.9.12 节中描述的简单分配器实现一个 find_fit 函数 * * static void *find_fit(size_t asize); * * 你的解答应该对隐式空闲链表执行首次适配搜索。 */ static void *find_fit(size_t asize) { char *fit; for (fit = NEXT_BLKP(heap_listp); GET_SIZE(fit) > 0; fit = NEXT_BLKP(fit)) { if (GET_ALLOC(fit) && GET_SIZE(fit) >= asize) return (void *)fit; } return NULL; } ================================================ FILE: exercise/ex9-9.c ================================================ /* * 9.9 * * 为示例的分配器编写一个 place 函数。 * * static void place(void *bp, size_t asize); * * 你的解答应该将请求块放置在空闲块的位置,只有当剩余部分的大小等于或者超出最小块 * 的大小时,才进行分割。 */ static void place(void *bp, size_t size) { size_t block_size = GET_SIZE(bp); size_t reserve_size = block_size - size; if (reserve_size < WSIZE) { PUT(HDRP(bp), PACK(block_size, 1)); PUT(FTRP(bp), PACK(block_size, 1)); } else if (WSIZE <= reserve_size && reserve_size < block_size) { PUT(HDRP(bp), PACK(size, 1)); PUT(FTRP(bp), PACK(size, 1)); PUT(HDRP(NEXT_BLKP(bp)), PACK(reserve_size, 0)); PUT(FTRP(NEXT_BLKP(bp)), PACK(reserve_size, 0)); } } ================================================ FILE: exercise/t2-59.c ================================================ /* * 2.59 * * mofaph@gmail.com * 2013-12-17 * * unix> cc -Wall ex2-59.c t2-59.c -o ex2-59 */ #include extern int combine_word(unsigned x, unsigned y); int main(void) { int x = 0x89ABCDEF; int y = 0x76543210; int z = combine_word(x, y); printf("x=0x%X, y=0x%X, combine_word=0x%X\n", x, y, z); return 0; } ================================================ FILE: exercise/t2-95.c ================================================ /* * 用于测试 ex2-95.c 的代码 * * 编译: * $ cc ex2-89.c ex2-95.c t2-95.c * * 测试方法: * $ ./a.out * $ time ./a.out > t2-95.test * * mofaph@gmail.com */ #include /* printf() */ #include /* INT_MIN INT_MAX */ typedef unsigned float_bits; extern unsigned f2u(float f); /* ex2-89.c */ extern float_bits float_i2f(int i); /* ex2-95.c */ int main(void) { float f; unsigned u, t; int i = INT_MIN; do { f = (float) i; u = f2u(f); t = float_i2f(i); if (u != t) printf("BAD: 0x%X, 0x%X, 0x%X\n", t, i, u); } while (i++ != INT_MAX); return 0; } ================================================ FILE: exercise/t2-96.c ================================================ /* * 测试家庭作业 2.96 * * 编译: * $ cc ex2-89.c ex2-96.c t2-96.c * * 测试: * $ ./a.out * * mofaph@gmail.com */ #include #include typedef unsigned float_bits; extern float u2f(unsigned u); extern int float_f2i(float_bits f); /* (int)f */ int main(void) { float f; unsigned u, t; int i = INT_MIN; do { u = float_f2i(i); f = u2f(i); t = (int)f; if (u != t) printf("0x%08X: [GOOD: 0x%08X] [BAD: 0x%08X]\n", i, t, u); } while (i++ != INT_MAX); return 0; } ================================================ FILE: exercise/t3-68.sh ================================================ #!/bin/sh # 测试家庭作业 3.68 # mofaph@gmail.com extern_file=ex3-68 test_file=t3-68 suffix=c # 自动生成一个测试文件 cat <${test_file}.${suffix} /* NOTE!!! This is auto generate by a shell script */ extern int good_echo(void); int main(void) { int ret; ret = good_echo(); return ret; } EOF # 编译 cc ${test_file}.${suffix} ${extern_file}.${suffix} -o ${test_file} # 产生数据以测试 printf "hel\n" | ./${test_file} # 正常输入 printf "\0" | ./${test_file} # 正常输入,但只有一个文件结束符 printf "Barcelona" | ./${test_file} # 非法输入 exit $? ================================================ FILE: exercise/t8-22.c ================================================ /* * 8.22 * * unix> cc ex8-22.c t8-22.c */ #include extern int mysystem(char *command); /* ex8-22.c */ int main(int argc, char *argv[]) { return mysystem(argv[1]); } ================================================ FILE: exercise/t8-25.c ================================================ /* * test ex8-25.c * * unix> cc ex8-25.c t8-25.c */ #include /* define in ex8-25.c */ extern char *tfgets(char *s, int size, FILE *stream); int main(void) { char buf[BUFSIZ]; char *bufp; bufp = tfgets(buf, sizeof(buf), stdin); printf("bufp: %p\n", bufp); return 0; } ================================================ FILE: exercise/t9-14.c ================================================ /* * test ex9-14.c * * unix> gcc -Wall ex9-14.c t9-14.c * * mofaph@gmail.com */ #include #include #include #include #include #include extern int modify_file_using_mmap(int fd); /* ex9-14.c */ #define HELLO_FILE "t9-14.txt" int main(void) { int fd = open(HELLO_FILE, O_RDWR); if (fd < 0) { perror("open"); return -1; } int ret = modify_file_using_mmap(fd); close(fd); return ret; } ================================================ FILE: exercise/t9-14.sh ================================================ #!/bin/sh # mofaph@gmail.com hello_file="hello.txt" t_914="t9-14.txt" out="t914" # Create test file printf "Hello, world\n" > $hello_file cp $hello_file $t_914 # Compile and run cc -Wall ex9-14.c t9-14.c -o $out ./$out # Test the output if cmp -s $hello_file $t_914; then printf "FAILED: 9.14\n" fi # Test done, delete all the generated files rm -f $hello_file $t_914 $out exit $? ================================================ FILE: exercise/t9-17.c ================================================ /* * test 9.17 * * mofaph@gmail.com * * unix> gcc -I../common ex9-17.c t9-17.c ../sample/ch09/memlib.c ../common/csapp.c -lpthread */ #include extern void mem_init(void); extern int mm_init(void); extern void mm_free(void *bp); extern void *mm_malloc(size_t size); int main(void) { mem_init(); if (mm_init() < 0) return -1; char *buf = mm_malloc(1); printf("buf : %p\n", buf); if (buf) mm_free(buf); char *buf2 = mm_malloc(4); printf("buf2: %p\n", buf2); if (buf2) mm_free(buf2); char *buf3 = mm_malloc(8); printf("buf3: %p\n", buf3); char *buf4 = mm_malloc(12); printf("buf4: %p\n", buf4); if (buf3) mm_free(buf3); if (buf4) mm_free(buf4); buf = mm_malloc(4); printf("buf : %p\n", buf); buf2 = mm_malloc(9); printf("buf2: %p\n", buf2); if (buf) mm_free(buf); if (buf2) mm_free(buf2); return 0; } ================================================ FILE: hard-task.txt ================================================ 这个文件列出了一些我认为比较困难的习题。一般是书本中的四星或者三星的习题。 o 2.65 o 2.95 o 2.96 o 6.46 o 6.47 o 8.26 o 9.20 o 12.21 o 12.38 o 12.39 以上提到的习题,可以在 exercise/00-topic.txt 中查看。 mofaph 2013/5/19 ================================================ FILE: missing.c ================================================ /* * 给出还没有完成的家庭作业 * * mofaph@gmail.com * 2013-5-12 * * 基本的想法就是,将所有的题目看作一个题目组成的集合。在初始化的时候,将所有的题 * 目都设置为没有完成的标志,如果一道题目已经完成了,就设置一个完成的标志。 * * 因为我将所有完成的题目都放在 exercise/ 这个目录下,并且以同一种风格命名:ex章 * 节号-题目号。因此,扫描这个目录下的所有文件,得到章节号和题目号,然后映射到所 * 有题目的集合中。 * * 当需要输出结果的时候,只需要扫描题目集合,收集所有的标记为未完成的题目号。然后, * 将这些题目号映射为章节号和题目号就大功告成了。 * * unix> cc missing.c -o missing * unix> ./missing */ #include #include #include #include #include #include #include struct homework { int beg; /* 起始题号 */ int end; /* 结束题号 */ }; #define MAX_CHAPTER 13 struct homework homework_topic[MAX_CHAPTER] = { /* * 由于计算题目的个数是用 end-beg+1 的方式,所以为了确保没有题目的章节的 * 题目数是零。因此,人为地设置起始题号是 1,结束题号是 0,就可以统一使用 * end-beg+1 的方式计算题目数 */ { 1, 0}, /* 0 */ { 1, 0}, {55, 90}, {54, 70}, {43, 58}, /* 1-4 */ {15, 22}, {23, 47}, { 6, 15}, { 9, 26}, /* 5-8 */ {11, 20}, { 6, 10}, { 6, 13}, {16, 39}, /* 9-12 */ }; int total_topic[1024] = {0}; int init_total_topic(void) { memset(total_topic, 0, sizeof(total_topic)); int i, all; for (i = 0, all = 0; i < MAX_CHAPTER; i++) all += homework_topic[i].end - homework_topic[i].beg + 1; total_topic[all] = -1; return all; } /* 根据章节号和题号,给出总题号 */ int index_total(int chapter_num, int topic_num) { if (chapter_num < 0 || chapter_num > MAX_CHAPTER) return -1; if (topic_num < homework_topic[chapter_num].beg || topic_num > homework_topic[chapter_num].end) return -1; int i, index = 0; for (i = 0; i < chapter_num; i++) index += homework_topic[i].end-homework_topic[i].beg+1; index += topic_num-homework_topic[chapter_num].beg; return index; } /* 根据总题号,给出章节号和题号 */ void get_chapter_topic_from_index(int index, int *chapter_num, int *topic_num) { int i, n; n = index; for (i = 0; i < MAX_CHAPTER; i++) { int all = homework_topic[i].end - homework_topic[i].beg + 1; if (n < all) { *chapter_num = i; *topic_num = homework_topic[i].beg + n; break; } else { n -= all; } } } /* 从文件名中,获取章节号和题号 */ void get_chapter_topic_from_filename(char *filename, int *chapter_num, int *topic_num) { int ret; regex_t preg; char *regex = "^ex([0-9]+)-([0-9]+)"; ret = regcomp(&preg, regex, REG_EXTENDED); if (ret != 0) { perror("regcomp"); return; } regmatch_t pmatch[3]; ret = regexec(&preg, filename, 3, pmatch, 0); if (ret == REG_NOMATCH) { regfree(&preg); return; } char buf[10]; memset(buf, 0, sizeof(buf)); memcpy(buf, filename+pmatch[1].rm_so, pmatch[1].rm_eo-pmatch[1].rm_so); *chapter_num = atoi(buf); memset(buf, 0, sizeof(buf)); memcpy(buf, filename+pmatch[2].rm_so, pmatch[2].rm_eo-pmatch[2].rm_so); *topic_num = atoi(buf); regfree(&preg); } /* 如果题目已经完成了,就在题目集合中设置完成标志 */ void set_total_topic(char *dirname) { DIR *dp = opendir(dirname); if (dp == NULL) { perror("opendir"); return; } for (;;) { struct dirent *dirp; dirp = readdir(dp); if (dirp == NULL) break; char *filename = dirp->d_name; int chapter, topic, index; get_chapter_topic_from_filename(filename, &chapter, &topic); index = index_total(chapter, topic); if (index >= 0) total_topic[index] = 1; } closedir(dp); } int main(void) { int len = init_total_topic(); set_total_topic("exercise"); int i; int old_chapter = 0, new_chapter = 0, topic; int unfinish = 0; for (i = 0; i < len; i++) { if (total_topic[i] == -1) break; if (total_topic[i] == 0) { unfinish += 1; get_chapter_topic_from_index(i, &new_chapter, &topic); static int count = 0; if (old_chapter != new_chapter) { if (old_chapter != 0) printf("\n"); old_chapter = new_chapter; count = 0; } else { count += 1; if (count % 8 == 0) printf("\n "); } printf(" %d.%d", new_chapter, topic); } } printf("\n\n"); int finish = len - unfinish; printf("total: %d, finish: %d(%.2f%%), unfinish: %d(%.2f%%)\n", len, finish, (double)finish/len*100, unfinish, (double)unfinish/len*100); return 0; } ================================================ FILE: notes/.gitignore ================================================ ================================================ FILE: notes/api.txt ================================================ * 进程控制 ** getpid/getppid 获取进程 ID #include #include pid_t getpid(void); pid_t getppid(void); 返回:getpid() 返回进程的 PID,getppid() 返回父进程的 PID ** exit 终止进程 #include void exit(int status); 返回:该函数无返回值 ** fork 创建进程 #include #include pid_t fork(void); 返回:子进程返回 0,父进程返回子进程的 PID,如果出错,则为 -1 ** waitpid/wait 等待子进程的终止或者停止 #include #include pid_t waitpid(pid_t pid, int *status, int options); 返回:如果成功,则为子进程的 PID,如果 WNOHANG,则为 0,如果其他错误,则为 -1 wait 函数是 waitpid 函数的简单版本 #include #include pid_t wait(int *status); 返回:如果成功,则为子进程的 PID,如果出错,则为 -1 ** sleep/pause 让进程休眠 sleep() 将一个进程挂起一段指定的时间。 #include unsigned int sleep(unsigned int secs); 返回:还要休眠的秒数 pause() 让调用函数休眠,直到该进程收到一个信号。 #include int pause(void); 返回:总是返回 -1 ** execve 加载并运行程序 #include int execve(const char *filename, const char *argv[], const char *envp[]); 返回:如果成功,则不返回;如果错误,则返回 -1 ** getenv/setenv/unsetenv 操作环境数组 #include char *getenv(const char *name); int setenv(const char *name, const char *newvalue, int overwrite); void unsetenv(const char *name); getenv:若存在则为指向 name 的指针,若无匹配,则为 NULL setenv:若成功返回 0,若错误返回 -1 unsetenv:无 * 信号处理 ** getpgrp/setpgid 获取/设置 进程组 ID #include pid_t getpgrp(void); int setpgid(pid_t pid, pid_t pgid); getpgrp: 返回调用进程的进程组 ID setpgid: 改变自己或其他进程组的 ID。成功返回 0,失败返回 -1 ** kill/alarm 发送信号 #include #include int kill(pid_t pid, int sig); kill: 发送信号给其它进程(包括它自己)。成功返回 0,失败返回 -1 #include unsigned int alarm(unsigned int secs); alarm: 向自己发送 SIGALARM 信号。返回前一次闹钟剩余的秒数,若以前没有设定闹钟, 则为 0 ** signal 接收信号 #include typedef void (*sighandler_t)(int); sighandler_t signal(int signum, sighandler_t handler); signal: 修改和信号相关联的默认行为。成功则为指向前次处理程序的指针,错误则为 SIG_ERR(不设置 errno) ** sigaction/sigprocmask 可移植的信号处理 #include int sigaction(int signum, struct sigaction *act, struct sigaction *oldact); int sigprocmask(int how, const sigset_t *set, sigset_t *oldset); int sigemptyset(sigset_t *set); int sigfillset(sigset_t *set); int sigaddset(sigset_t *set, int signum); int sigdelset(sigset_t *set, int signum); int sigmember(const sigset_t *set, int signum); sigaction: 同下 sigprocmask: 同下 sigemptyset: 同下 sigfillset: 同下 sigaddset: 同下 sigdelset: 成功返回 0,出错返回 -1 sigmember: 若 signum 是 sef 的成员返回 1,如果不是返回 0,出错返回 -1 ** setjmp/sigsetjmp/longjmp/siglongjmp 非本地跳转 #include int setjmp(jmp_buf env); int sigsetjmp(sigjmp_buf env, int savesigs); void longjmp(jmp_buf env, int retval); void siglongjmp(sigjmp_buf env, int retval); setjmp: 返回 0 * 文件操作 ** open/close #include #include #include int open(char *filename, int flags, mode_t mode); open: 若成功则为新文件描述符,若出错为 -1 #include int close(int fd); close: 若成功返回 0,若出错则为 -1 ** read/write #include ssize_t read(int fd, void *buf, size_t n); ssize_t write(int fd, const void *buf, size_t n); read: 若成功返回读的字节数,若EOF则为0,若出错则为 -1 write: 成功返回写的字节数,出错则为 -1 ** stat/fstat #include #include int stat(const char *filename, struct stat *buf); int fstat(int fd, struct stat *buf); stat: 同下 fstat: 成功返回 0,出错返回 -1 ** dup/dup2 #include int dup2(int oldfd, int newfd); 返回:若成功则为非负的描述符,若出错则为 -1 * 网络编程 ** htonl/htons ntohl/ntohs #include unsigned long int htonl(unsigned long int hostlong); unsigned short int htons(unsigned short int hostshort); unsigned long int ntohl(unsigned long int netlong); unsigned short int ntohs(unsigned short int netshort); htonl/htons: 返回按照网络字节顺序的值 ntohl/ntohs: 返回按照主机字节顺序的值 ** inet_aton/inet_ntoa #include #include int inet_aton(const char *cp, struct in_addr *inp); char *inet_ntoa(struct in_addr in); inet_aton: 将一个点分十进制串转换为一个网络字节顺序的 IP 地址 成功返回 1,出错则为 0 inet_ntoa: 将一个网络字节顺序的 IP 地址转换为它所对应的点分十进制 返回指向点分十进制字符串的指针 ** gethostbyname/gethostbyaddr #include struct hostent *gethostbyname(const char *name); struct hostent *gethostbyaddr(const char *addr, int len, 0); gethostbyname/gethostbyaddr: 成功返回 非NULL 指针,出错则为 NULL,同时设置 h_errno ** socket/connect/bind/listen/accept #include #include int socket(int domain, int type, int protocol); socket: 成功返回非负描述符,出错返回 -1 #include int connect(int sockfd, struct sockaddr *serv_addr, int addrlen); connect: 成功返回 0,出错返回 -1 #include int bind(int sockfd, struct sockaddr *my_addr, int addrlen); int listen(int sockfd, int backlog); int accept(int listenfd, struct sockaddr *addr, int *addrlen); bind/listen: 成功返回 0,出错返回 -1 accept: 成功返回非负连接描述符,出错返回 -1 * 线程控制 ** pthread_create 创建线程 #include typedef void *(func)(void *); int pthread_create(pthread_t *tid, pthread_attr_t *attr, func *f, void *arg); 这个函数创建一个新的线程,并带着一个输入变量 arg,在新线程的上下文中运行线程例 程 f。能用 attr 参数来改变新创建线程的默认属性。详情请参考《UNIX高级编程环境·第2 版》的第 11 章和第 12 章。 返回值:成功返回 0,出错则为非零 pthread_t pthread_self(void); 返回值:调用者的线程 ID ** pthread_exit/pthread_cancel 终止线程 #include void pthread_exit(void *thread_return); int pthread_cancel(pthread_t tid); 返回值:成功返回 0,出错返回非零 ** pthread_join 回收已终止线程的资源 #include int pthread_cancel(pthread_t pid, void **thread_return); 返回值:成功返回 0,出错返回非零 这个函数等待其他线程终止。它会阻塞,直到线程 tid 终止,将线程例程返回的 (void *) 指针赋值为 thread_return 指向的位置,然后回收已终止线程占用的所有存储器 资源。 pthread_join() 只能等待一个指定的线程终止,没有办法让 pthread_join() 等待任意一个 线程终止。 ** pthread_detach 分离线程 在任何一个时间点上,线程是可结合的或者是可分离的。一个分离的线程是不能被其他线程 回收或杀死的,它的存储器资源在它终止时由系统自动释放。 默认情况下,线程被创建成可结合的。一个可结合的线程能够被其他线程回收其资源和杀死; 在被其他线程回收之前,它的存储器资源是没有被释放的。 #include int pthread_detach(pthread_t pid); 返回值:成功返回 0,出错返回非零 ** pthread_once 初始化线程 #include pthread_once_t once_control = PTHREAD_ONCE_INIT; int pthread_once(pthread_once_t *once_control, void (*init_routine)(void)); 返回值:总是返回 0 当第一次用参数 once_control 调用 pthread_once() 时,它调用 init_routine,这是一 个没有任何参数也不返回任何值的函数。 当需要动态初始化多个线程共享的全局变量时,pthread_once() 是很有用的。 * 信号量 #include int sem_init(sem_t *sem, 0, unsigned int value); int sem_wait(sem_t *s); /* P(s) */ int sem_post(sem_t *s); /* V(s) */ 返回值:若成功返回 0,若出错返回 -1 ================================================ FILE: notes/ch03.txt ================================================ 第 3 章 程序的机器级表示 超线程(hyperthreading):在一个处理器上同时运行两个程序。 p104 IA32(Intel Architecture 32-bit):Intel 32位体系结构 p104 计算机系统使用了多种不同形式的抽象,利用更简单的抽象模型来隐藏实现的细节。对于机 器级编程来说,其中两种抽象尤为重要。 第一种是机器级程序的格式和行为,定义为指令集体系结构(Instruction set architecture, ISA),它定义了处理器状态、指令的格式,以及每条指令对状态的影响。 第二种抽象是,机器级程序使用的存储器地址是虚拟地址,提供的存储器模型看上去是一个 非常大的字节数组。 p106 虽然 C 语言提供了一种模型,可以在存储器中声明和分配各种数据类型的对象,但是机器 代码只是简单地将存储器看成是一个很大的、按字节寻址的数组。C 语言中的聚合数据类型, 例如数组和结构,在机器代码中用连续的一组字节来表示。即使是标量数据类型,汇编代码 也不区分有符号或无符号整数,不区分各种类型的指针,甚至不区分指针和整数。 p106~p107 (gdb) x/17xb sum 这条命令告诉 GDB 检查(简写为“x”)17 个十六进制格式(也简写为“x”)的字节(简 写为“b”)。 p108 unix> gcc -O1 -S -masm=intel code.c p110 由于是从 16 位体系结构扩展成 32 位的,Intel 用术语“字”(word)表示 16 位数据类型。 因此,称 32 位数为“双字”(double words),称 64 位数位“四字”(quad words)。我们 后面遇到的大多数指令都是对字节或双字操作的。 p111 大多数指令有一个或多个操作数(operand),指示出执行一个操作中要引用的源数据值,以 及放置结果的目标位置。IA32 支持多种操作数格式。源数据值可以以常数形式给出,或是从 寄存器或存储器中读出。结果可以存放在寄存器或存储器中。因此,各种不同的操作数的可 能性被分为三种类型。 第一种类型是立即数(immediate),也就是常数值。第二种类型是寄存器(register),它 表示某个寄存器的内容。第三种操作数是存储器(memory)引用,它会根据计算出来的地址 (通常称为有效地址)访问某个存储器位置。 p112~p113 mov 类中的指令将源操作数的值复制到目的操作数中。源操作数指定的值是一个立即数,存 储在寄存器中或者存储器中。目的操作数指定一个位置,要么是一个寄存器,要么是一个存 储器地址。IA32 加了一条限制,传送指令的两个操作数不能都指向存储器位置。 p114 C 语言中所谓的“指针”其实就是地址,局部变量通常是保存在寄存器中,而不是在存储器 中。寄存器访问比存储器访问要快得多。 p117 机器代码提供两种基本的低级机制来实现有条件的行为:测试数据值,然后根据测试的结果 来改变控制流或者数据流。 数据相关的控制流是实现有条件行为的更通用和更常见的方法。 p123 除了整数寄存器,CPU 还维护着一组单个位的条件码(condition code)寄存器,它们描述 了最近的算术或逻辑操作的属性。 CF: 进位标志。最近的操作使最高位产生了进位。可以用来检查无符号操作数的溢出。 ZF: 零标志。最近的操作得出的结果为 0。 SF: 符号标志。最近的操作得到的结果为负数。 OF: 溢出标志。最近的操作数导致一个补码溢出--正溢出或负溢出。 p124 条件码通常不会直接读取,常用的使用方法有三种: 1. 可以根据条件码的某个组合,将一个字节设置为 0 或者 1 2. 可以条件跳转到程序的某个其他的部分 3. 可以有条件地传送数据 p125 在产生目标代码文件时,汇编器会确定所有带标号指令的地址,并将跳转目标(目的指令的 地址)编码为跳转指令的一部分。 p127 jmp 指令是无条件跳转,它可以是直接跳转,即跳转目标是作为指令的一部分编码的;也可 以是间接跳转,即跳转目标是从寄存器或存储器位置中读出的。 在 ATT 汇编语言中,直接跳转是给出一个标号作为跳转目标的,例如:标号“.L1”。间接跳 转的写法是“*”后面跟一个操作数指示符,例如:“jmp *%eax”。 p127 理解跳转指令的目标如何编码,对研究链接非常重要。 p127 跳转指令有几种不同的编码,但是最常用的都是 PC(Program Counter)相关的 (PC-relative)。它们会将目标指令的地址与紧跟在跳转指令后面那条指令的地址之间的 差作为编码。 p127 当执行与 PC 相关的寻址时,程序计数器的值是跳转指令后面的那条指令的地址,而不是跳 转指令本身的地址。这种惯例可以追溯到早期实现,当时的处理器会将更新程序计数器作为 执行一条指令的第一步。 p129 C 语言中三种形式的所有循环 -- do-while、while 和 for -- 都可以用一种简单的策略来 翻译,产生包含一个或多个条件分支的代码。控制的条件转移为循环翻译成机器代码提供了 基本机制。 p138 数据的条件转移是一种替代的策略。这种方法先计算一个条件操作的两种结果,然后再根据 条件是否满足从而选取一个。只有在一些受限制的情况下,这种策略才可行,但是如果可行, 就可以用一条简单的条件传送指令来实现它。条件传送指令更好地匹配了现代处理器的性能 特性。 p139 从 1995 年的 PentiumPro 开始,近代 IA32 处理器都拥有条件传送指令,这些指令会根据 条件码的值,选择要么什么都不做,要么将一个值复制到一个寄存器。 p140 基于条件数据传送的代码比基于条件控制转移的代码性能好,其中的原因,是因为现代处理 器通过使用流水线(pipelining)来获得高性能。 在流水线中,一条指令的处理要经过一系列的阶段,每个阶段执行所需要操作的一小部分。 这种方法通过重叠连续指令的步骤来获得高性能。例如,在取一条指令的时候,执行它前面 一条指令的算术运算。要做到这一点,要求能够事先确定要执行指令的序列,这样才能保持 流水线中充满了待执行的指令。 错误预测一个跳转要求处理器丢掉它为该跳转后所有指令已经做了的工作,然后再开始用从 正确位置处起始的指令区填充流水线。这样一个错误预测会招致很严重的惩罚。大约 20~40 时钟周期的浪费,导致程序性能的严重下降。 p141 T_arg(p) = (1-p)*T_ok + p*(T_ok+T_mp) = T_ok + p*T_mp p141 使用条件传送也不是总会改进代码的效率。编译器必须考虑浪费的计算和由于分支预测错误 所造成的性能处罚之间的相对性能。说实话,编译器并不具有足够的信息来做出可靠的决定。 p141 总的来说,条件数据传送提供了一种用条件控制转移来实现条件操作的替代策略。它们只能 用于很受限制的情况,但是这些情况还是相当常见的,而且充分利用了现代处理器的运行方 式。 p141 跳转表是一个数组,表项 i 是一个代码段的地址,这个代码段实现当开关索引值等于 i 时 程序应该采取的动作。程序代码用开关索引值来执行一个跳转表内的数组引用,确定跳转指 令的目标。 和使用一组很长的 if-else 语句相比,使用跳转表的优点是执行开关语句的时间与开关情 况的数量无关。 p144 执行 switch 语句的关键步骤是通过跳转表来访问代码位置。 p146 跳转表是一种非常高效的实现多重分支的方法。 p147 动态的数组分配版本必须用乘法指令对 i 伸展 n 倍,而不能用一系列的移位和加法。 p163 寄存器溢出(register spiling):没有足够多的寄存器来保存需要的临时数据,因此编译 器必须把一些局部变量放在存储器中。 p164 编译器维护关于每个结构类型的信息,指示每个字段(field)的字节偏移。它以这些偏移 作为存储器引用指令的位移,从而产生对结构元素的引用。 p164 许多计算机系统对基本数据类型合法地址做出了一些限制,要求某种类型对象的地址必须是 某个值 K(通常是 2、4 或 8)的倍数。这种对齐限制简化了形成处理器和存储器系统之间 接口的硬件设计。 p170 对于大多数 IA32 指令来说,保持数据对齐能够提高效率,但是它不会影响程序的行为。 另一方面,如果数据未对齐,有些实现多媒体操作的 SSE 指令就无法正确地工作。这些指 令对 16 字节数据块进行操作,在 SSE 单元和存储器之间传送数据的指令要求存储器地址 必须是 16 的倍数。任何视图以不满足对齐要求的地址来访问存储器都会导致异常 (exception),默认的行为是程序终止。 因此,IA32 的一个惯例是,确保每个栈帧的长度都是 16 字节的整数倍。编译器就可以在 栈帧中以每个块的存储都是 16 字节对齐的方式来分配存储。 p170~p171 确保每种数据类型都按照指定方式来组织和分配,即每种类型的对象都满足它的对齐限制, 就可保证实施对齐。 p171 今天使用的 IA32 格式中的大部分是在 1985 年随着 i386 微处理器的出现所定义的,当时 是将原来 8086 的 16 位指令集扩展到了 32 位。虽然后续的处理器系列引入了新的指令类 型和格式,但是为了保持向后兼容性,许多编译器,包括 GCC,都避免使用这些特性。 条件传送指令,是 Intel 在 1995 年引入的,比起更传统的条件分支,能够产生显著的性 能提升,但是在大多数的 GCC 配置中,都不会产生这样的指令。 p183 4GHz 时钟,每秒运行大约 50 亿条指令。 p184 基于微处理器的系统已经变得很普遍。即使今天的超级计算机也是基于利用许多微处理器并 行计算的能力。 p184 x86-64 是 Intel 指令集到 64 位的一个演化。它保持了与 IA32 完全的后向兼容性,并且 又增加了新的数据格式,以及其他一些特性,使得能力更强,性能更高。 p185 在编译器方面,GCC 的开发者坚定地保持与 i386 的二进制兼容性,即使是 IA32 指令集中 添加了有用的特性,包括条件传送和更现代的浮点指令集。只有以特殊的命令行选项设置编 译时,才会使用这些特性。 p185 x86-64 与 IA32 的一些区别: o 寄存器的数量翻倍至 16 个 o 所有的寄存器都是 64 位长。IA32 寄存器的 64 位扩展分别为 %rax、%rcx、%rdx、 %rbx、%rsi、%rdi、%rsp 和 %rbp。新增加的寄存器命名为 %r8~%r15。 o 可以直接访问每个寄存器的低 32 位。这就给了我们 IA32 中熟悉的那些寄存器: %eax、%ecx、%edx、%ebx、%esi、%edi、%esp和 %ebp,以及 8 个新 32 位寄存器: %r8d~%r15d。 o 可以直接访问每个寄存器的低 16 位。新寄存器的字大小版本命名为 %r8w~%r15w。 o 可以直接访问每个寄存器的低 8 位。新寄存器的字节大小版本命名为 %r8b~%r15b。 o 为了后向兼容性,具有单字节操作数的指令可以直接访问 %rax、%rcx、%rdx 和 %rbx 的第二个字节 寄存器 %rsp 有特殊的状态,它会保存指向栈顶元素的指针。与 IA32 不同的是,没有帧指 针寄存器;可以用寄存器 %rbp 作为通用寄存器。 此外,有一些算术指令对寄存器 %rax 和 %rdx 有特殊的用法。 p188~p189 ATT 格式指令 cqto 在 Intel 和 AMD 文档中被称为 cqo。 ATT 格式指令 cltq 在 Intel 和 AMD 文档中被称为 cdqe。 cltq: 将 %eax 转换成四字 cqto: 转换成八字 p192 x86-64 最多可以有 6 个整型(整数和指针)参数可以通过寄存器进行传递。寄存器按照指 定的顺序来使用,使用的寄存器名对应于所传递的数据的大小 |----+------+------+------+------+------+------| | | 1 | 2 | 3 | 4 | 5 | 6 | |----+------+------+------+------+------+------| | 64 | %rdi | %rsi | %rdx | %rcx | %r8 | %r9 | | 32 | %edi | %esi | %edx | %ecx | %r8d | %r9d | | 16 | %di | %si | %dx | %cx | %r8w | %r9w | | 8 | %dil | %sil | %dl | %cl | %r8b | %r9b | |----+------+------+------+------+------+------| p195 如果所有的局部变量都能保存在寄存器中,而且这个函数也不会调用其他函数(参考过程调 用的树结构,有时称之为叶子过程(leaf procedure)),那么需要栈的唯一原因就是用来 保存返回地址。 p196 另一方面,使得函数可能需要栈帧的原因如下: o 局部变量太多,不能都放在寄存器中 o 有些局部变量是数组或者结构 o 函数用取地址操作符(&)来计算一个局部变量的地址 o 函数必须将栈上的某些参数传递到另一个函数 o 在修改一个被调用者保存寄存器之前,函数需要保存它的状态 p196 x86-64 过程的栈帧通常有固定的大小,在过程开始时通过减小栈指针(寄存器 %rsp)来设 置。在调用过程中,栈指针保持在固定的位置,使得可以用相对于栈指针的偏移量来访问数 据。因此,就不再需要 IA32 代码中可见的帧指针(寄存器 %ebp)了。 p196 每当一个函数(调用者)要调用另一个函数(被调用者)时,返回地址会被压入栈中。通常, 我们认为这是调用者栈帧的一部分,它编码的是某种调用者的状态。但是,当控制返回到调 用者时,会把这个信息从栈中弹出来,所以它不会影响调用者访问栈帧中值所使用的偏移量。 p196 寄存器保存惯例: 调用者保存:函数可以自由地覆盖这些寄存器的值; 被调用者保存:函数在写这些寄存器之前,必须在栈上保存它们的值 在 x86-64 中,指定为被调用者保存的寄存器有: o %rbx o %rbp o %r12 o %r13 o %r14 o %r15 p197 在 x86-64 的 16 个通用目的寄存器中,我们看到有 6 个是用来传递参数,6 个是由被调 用者保存的临时寄存器,1 个(%rax)保存函数的返回值,还有 1 个(%rsp)作为栈指针, 只剩下 %r10 和 %r11 是作为调用者保存的临时寄存器。当然,当参数少于 6 个或者当函 数用完了参数参数时,就可以使用参数寄存器了,而在产生处最终的结果之前,%rax 可以 重复利用。 p198 x86-64 的一个不同寻常的特性是能够访问栈指针之外的存储器。它要求虚拟存储器管理系 统为这段区域分配存储器。x86-64 ABI 指明程序可以使用当前栈指针之外 128 字节的范围 (即低于当前栈指针的值)。ABI 将这个区域称为红色地带(red zone)。必须保持当栈指 针移动时,红色地带可读可写。 p199 x86-64 遵循一组更严格的对齐要求。对于任何需要 K 字节的标量数据类型来说,它的起始 地址必须是 K 的倍数。 p200 将 x86 处理器带入新纪元的功臣是 AMD 和 GCC 的作者。x86-64 硬件和编程规则的形成改 变了处理器,过去它严重依赖于栈来保存程序的状态,现在则是将最常使用的状态部分保存 在更快并扩展了的寄存器组中。x86 终于赶上了 20 世纪 80 年代早期 RISC 处理器提出的 理念! 既能运行 IA32 代码又能运行 x86-64 代码的处理器变得越来越常见。现在许多桌面电脑和 笔记本系统都还是运行着它们操作系统的 32 位版本,这也限制了这些机器只能运行 32 位 应用。运行 64 位操作系统的机器,由于其能够运行 32 位和 64 位应用,已经成为高端机 器的普遍选择,例如,数据库服务器和科学计算。将应用从 32 位转换成 64 位最大的缺陷 是指针变量的大小翻倍了,由于许多数据结构都包含指针,这也意味着总的存储器需求也几 乎翻倍了。只有对内存需求超过 IA32 的 4GB 地址空间限制的应用才执行这种 32 位到 64 位的转换。历史表明应用总是能变得充分使用所有可得的处理能力和存储器大小,因此我们 可以很放心地预测,运行 64 位操作系统和应用的 64 位处理器会逐渐变得更普遍。 p200~p201 我们把存储模型、指令和传递规则的组合称为机器的浮点体系结构。 p201 由于 x86 处理器有很长的发展演变历史,它提供了多种浮点体系结构,目前有两种还在使 用。第一种,称为“x87”,可以追溯到早期的 Intel 微处理器,直到现在都还是标准的实 现。第二种,称为“SSE”,是基于较新的对 x86 处理器增加多媒体应用的支持。 p201 在原来的 Intel 机器中,浮点计算是由一个独立的协处理器完成的。 协处理器是一个具有自己的寄存器和执行一组指令的处理能力的单元。这个协处理器用一个 独立的芯片实现,称为 8087、80287 和 i387,分别同处理芯片 8086、80286 和 i386 配 套,因而俗称“x87”。 p201 ================================================ FILE: notes/ch07.txt ================================================ 第 7 章 链接 链接(linking)是将各种代码和数据部分收集起来并组合成为一个单一文件的过程,这个 文件可被加载(或被拷贝)到存储器并执行。 链接可以执行于编译时(compile time),也就是在源代码被翻译成机器代码时;也可以执 行于加载时(load time),也就是在程序被加载器(loader)加载到存储器并执行时;甚 至执行于运行时(run time),由应用程序来执行。 在早期的计算机系统中,链接是手动执行的。在现代系统中,链接是由叫做链接器(linker) 的程序自动执行的。 p448 链接器在软件开发中扮演着一个关键的角色,因为它们使得分离编译(separate compilation)成为可能。我们不用将一个大型的应用程序组织为一个巨大的源文件,而是 可以把它分解为更小、更好管理的模块,可以独立地修改和编译这些模块。当我们改变这些 模块中的一个时,只需简单地重新编译它,并重新链接应用,而不必重新编译其他文件。 p448 为什么要这么麻烦地学习关于链接的知识呢? o 理解链接器将帮助你构造大型程序 构造大型程序的程序员经常会遇到由于缺少模块、缺少库或者不兼容的库版本引起的 链接器错误。除非你理解链接器是如何解析引用、什么是库以及链接器是如何使用库 来解析引用的,否则这类错误将令你感到迷惑和挫败。 o 理解链接器将帮助你避免一些危险的编程错误 UNIX 链接器解析符号引用时所做的决定可以不动声色地影响你程序的正确性。在默 认情况下,错误地定义多个全局变量的程序将通过链接器,而不产生任何警告信息。 由此得到的程序会产生令人迷惑的运行时行为,而且非常难以调试。我们将向你展示 这是如何发生的,以及该如何避免它。 o 理解链接器将帮助你理解语言的作用域规则是如何实现的 例如,全局和局部变量之间的区别是什么?当你定义一个具有 static 属性的变量或 函数时,到底实际意味着什么? o 理解链接器将帮助你理解其他重要的系统概念 链接器产生的可执行目标文件在重要的系统功能中扮演着关键角色,比如加载和运行 程序、虚拟存储器、分页和存储器映射。 o 理解链接器将使你能够利用共享库 多年以来,链接都被认为是相当简单和无趣的。然而,随着共享库和动态链接在现代 操作系统中重要性的日益加强,链接成为一个复杂的过程,它为知识丰富的程序员提 供了强大的能力。比如,许多软件产品在运行时使用共享库来升级压缩包装的 (shrink-wrapped)二进制程序。还有,大多数 Web 服务器都依赖于共享库的动态 链接来提供动态内容。 p448 无论是什么样的操作系统、ISA 或者目标文件格式,基本的链接概念是通用的,认识到这一 点是很重要的。 p448 大多数编译系统提供编译驱动程序(compiler driver),它代表用户在需要时调用语言处 理器、编译器、汇编器和链接器。比如,要用 GNU 编译系统构造示例程序,我们就要通过 在外壳中输入下列命令行来调用 GCC 驱动程序: unix> gcc -O2 -g -o p main.c swap.c 驱动程序在将示例程序从 ASCII 码源文件翻译成可执行目标文件时的行为: 1. 运行 C 预处理其(cpp),将 C 源程序 main.c 翻译成一个 ASCII 码的中间文件 main.i unix> cpp [other arguments] main.c /tmp/main.i 2. 运行 C 编译器(cc1),它将 main.i 翻译成一个 ASCII 汇编语言文件 main.s unix> cc1 /tmp/main.c -O2 [other arguments] -o /tmp/main.s 3. 运行汇编器(as),它将 main.s 翻译成一个可重定位目标文件(relocatable object file)main.o: unix> as [other arguments] -o /tmp/main.o /tmp/main.s 4. 运行链接器 ld,将 main.o 和 swap.o(驱动程序经过相同的过程生成 swap.o) 以及一 些必要的系统目标文件组合起来,创建一个可执行目标文件(executable object file) p: unix> ld -o p [system object files and args] /tmp/main.o /tmp/swap.o p449~p450 要运行可执行文件 p,我们在 UNIX 外壳的命令行上输入它的名字: unix> ./p 外壳调用操作系统中一个叫做加载器的函数,它拷贝可执行文件 p 中的代码和数据到存储 器,然后将控制转移到这个程序的开头。 p450 像 UNIX ld 程序这样的静态链接器(static linker)以一组可重定位目标文件和命令行参 数作为输入,生成一个完全链接的可以加载和运行的可执行目标文件作为输出。 输入的可重定位目标文件由各种不同的代码和数据节(section)组成。指令在一个节中, 初始化的全局变量在另一个节中,而未初始化的变量又在另外一个节中。 p450 为了构造可执行文件,链接器必须完成两个主要任务: o 符号解析(symbol resolution) 目标文件定义和引用符号。符号解析的目的是将每个符号引用刚好和一个符号定义联 系起来 o 重定位(relocation) 编译器和汇编器生成从地址 0 开始的代码和数据节。链接器通过把每个符号定义与 一个存储器位置联系起来,然后修改所有对这些符号的引用,使得他们指向这个存储 器位置,从而重定位这些节 要记住关于链接器的一些基本事实:目标文件纯粹是字节块的集合。 这些块中,有些包含程序代码,有些则包含程序数据,而其他的则包含知道链接器和加载器 的数据结构。链接器将这些块连接起来,确定被连接块的运行时位置,并且修改代码和数据 块中的各种位置。 链接器对目标机器了解甚少。产生目标文件的编译器和汇编器已经完成了大部分工作。 p450 目标文件有三种形式: o 可重定位目标文件 包含二进制代码和数据,其形式可以在编译时与其他可重定位目标文件合并起来,创 建一个可执行目标文件 o 可执行目标文件 包含二进制代码和数据,其形式可以被直接拷贝到存储器并执行 o 共享目标文件 一种特殊类型的可重定位目标文件,可以在加载或者运行时被动态地加载到存储器并 链接 p450 编译器和汇编器生成可重定位目标文件(包括共享目标文件)。链接器生成可执行目标文件。 从技术上来说,一个目标模块(objcet module)就是一个字节序列,而一个目标文件 (object file)就是一个存放在磁盘文件中的目标模块。 p450 ELF头(ELF header)以一个 16 字节的序列开始,这个序列描述了生成该文件的系统的字的 大小和字节顺序。 ELF头 剩下的部分包含帮助链接器语法分析和解释目标文件的信息。其中包括 ELF头 的大小、 目标文件的类型(如可重定位、可执行或者是共享的)、机器类型(如 IA32)、节头部表 (section header table)的文件偏移,以及节头部表中的条目大小和数量。 不同节的位置和大小是由节头部表描述的,其中目标文件中每个节都有一个固定大小的条目 (entry)。夹在 ELF头 和节头部表之间的都是节。 p451 每个可重定位目标模块 m 都有一个符号表,它包含 m 所定义和引用的符号的信息。在链接 器的上下文中,有三种不同的符号: o 全局符号(global) 由 m 定义并能被其他模块引用的全局符号。全局链接器符号对应于非静态的 C 函数 以及被定义为不带 C static 属性的全局变量 o 外部符号(external) 由其他模块定义并被模块 m 引用的全局符号。这些符号称为外部符号,应对与定义 在其他模块中的 C 函数和变量 o 本地符号(local) 只被模块 m 定义和引用的本地符号。有的本地链接器符号对应与带 static 属性的 C 函数和全局变量。这些符号在模块 m 中随处可见,但是不能被其他模块引用。目 标文件中对应于模块 m 的节和相应的源文件的名字也能获得本地符号 p452 认识到本地链接器符号和本地程序变量的不同是很重要的。.symtab 中的符号表不包含对应 于本地非静态程序变量的任何符号。这些符号在运行时在栈中被管理,链接器对此类符号不 感兴趣。 p452 有趣的是,定义为带有 C static 属性的本地过程变量是不在栈中管理的。相反,编译器在 .data 和 .bss 中为每个定义分配空间,并在符号表中创建一个有唯一名字的本地链接器符 号。 p452 符号表是由汇编器构造的,使用编译器输出到汇编语言 .s 文件的符号。.symtab 节中包含 ELF 符号表。这张符号表包含一个条目的数组。 p453 每个符号都和目标文件的某个节相关联,由 section 字段表示,该字段也是一个到节头部 表的索引。有三个特殊的伪节(pseudo section),他们在节头部表中是没有条目的: o ABS 代表不该被重定位的符号 o UNDEF 代表未定义的符号,也就是在本目标模块中引用,但是却在其他地方定义的符号 o COMMON 表示还未被分配位置的未初始化的数据目标 p453 链接器解析符号引用的方法是将每个引用与它输入的可重定位目标文件的符号表中的一个确 定的符号定义联系起来。 p454 对 C++ 和 Java 中链接器符号的毁坏(mangling) C++ 和 Java 都允许重载方法,这些方法在源代码中有相同的名字,却有不同的参数列表。 那么链接器是如何区别这些不同的重载函数之间的差异呢?C++ 和 Java 中能使用重载函数, 是因为编译器将每个唯一的方法和参数列表组合编码成一个对链接器来说唯一的名字。 这种编码过程叫做毁坏(mangling),而相反的过程叫做恢复(demangling)。 幸运的是,C++ 和 Java 使用兼容的毁坏策略。一个被毁坏的类名字是由名字中字符的整数 数量,后面跟原始名字组成的。比如,类 Foo 被编码成 3Foo。方法被编码为原始方法名, 后面加上 __,加上被毁坏的类名,再加上每个参数的单个字母编码。比如,Foo::bar(int, long)被编码为 bar__3Fooil。毁坏全局变量和模板名字的策略是相似的。 p454~p455 实际上,所有的编译系统都提供一种机制,将所有相关的目标模块打包成为一个单独的文件, 称为静态库(static library)。它可以用作链接器的输入。当链接器构造一个输出的可执 行文件时,它只拷贝静态库里被应用程序引用的目标模块。 p457 在 UNIX 系统中,静态库以一种称为存档(archive)的特殊文件格式存放在磁盘中。存档 文件是一组连接起来的可重定位目标文件的集合,有一个头部用来描述每个成员目标文件的 大小和位置。存档文件名由后缀 .a 标识。 p458 在符号解析的阶段,链接器从左到右按照它们在编译器驱动程序命令行上出现的相同顺序来 扫描可重定位目标文件和存档文件。 在这次扫描中,链接器维持一个可重定位目标文件的集合 E(这个集合中的文件会被合并起 来形成可执行文件),一个未解析的符号(即引用了但是尚未定义的符号)集合 U,以及一 个在前面输入文件中已定义的符号集合 D。 初始时,E、U、D 都是空的。 o 对于命令行上的每个输入文件 f,链接器会判断 f 是一个目标文件还是一个存档文 件。如果 f 是一个目标文件,那么链接器把 f 添加到 E,修改 U 和 D 来反映 f 中的符号定义和引用,并继续下一个输入文件。 o 如果 f 是一个存档文件,那么链接器就尝试匹配 U 中未解析的符号和由存档文件成 员定义的符号。如果某个存档文件成员 m,定义了一个符号来解析 U 中的一个引用, 那么就将 m 加到 E 中,并且链接器修改 U 和 D 来反映 m 中的符号定义和引用。 对存档文件中所有的成员目标文件都反复进行这个过程,直到 U 和 D 都不再发生变 化。在此时,任何不包含在 E 中的成员目标文件都简单地被丢弃,而链接器将继续 处理下一个输入文件。 o 如果当链接器完成对命令行上输入文件的扫描后,U 是非空的,那么链接器就会输出 一个错误并终止。否则,它会合并和重定位 E 中的目标文件,从而构建输出的可执 行文件 不幸的是,这种算法会导致一些令人困扰的链接时错误,因为命令行上的库和目标文件的顺 序非常重要。在命令行中,如果定义一个符号的库出现在引用这个符号的目标文件之前,那 么引用就不能被解析,链接会失败。 p460 关于库的一般准则是将它们放在命令行的结尾。 p460 一旦链接器完成了符号解析这一步,它就把代码中的每个符号引用和确定的一个符号定义 (即它的一个输入目标模块中的一个符号表条目)联系起来。在此时,链接器就知道它的输 入目标模块中的代码节和数据节的确切大小。现在就可以开始重定位了,在这个步骤中,将 合并输入模块,并为每个符号分配运行时地址。 重定位由两步组成: o 重定位节和符号定义 在这一步中,链接器将所有相同类型的节合并为同一类型的新的聚合节。例如,来自 输入模块的 .data 节被全部合并成一个节,这个节成为输出的可执行目标文件的 .data 节。然后,链接器将运行时存储器地址赋给新的聚合节,赋给输入模块定义的 每个节,以及赋给输入模块定义的每个符号。当这一步完成时,程序中的每个指令和 全局变量都有唯一的运行时存储器地址了。 o 重定位节中的符号引用 在这一步中,链接器修改代码节和数据节中对每个符号的引用,使得它们指向正确的 运行时地址。为了执行这一步,链接器依赖于称为重定位条目(relocation entry) 的可重定位目标模块中的数据结构。 p461 当汇编器生成一个目标模块时,它并不知道数据和代码最终将存放在存储器中的什么位置。 它也不知道这个模块引用的任何外部定义的函数或者全局变量的位置。所以,无论何时汇编 器遇到对最终位置未知的目标引用,它就会生成一个重定位条目,告诉链接器在将目标文件 合并成可执行文件时如何修改这个引用。 p461 代码的重定位条目放在 .rel.text 中。已初始化数据的重定位条目放在 .rel.data 中。 p461 重定位算法的伪代码: foreach section s { foreach relocation entry { refptr = s + r.offset; /* ptr to reference to be relocated */ /* Relocate a PC-relative reference */ if (r.type == R_386_PC32) { refaddr = ADDR(s) + r.offset; /* ref's run-time address */ *refptr = (unsigned)(ADDR(r.symbol) + *refptr - refaddr) } /* Relocate an absolute reference */ if (r.type == R_386_32) *refptr = (unsigned)(ADDR(r.symbol) + *refptr) } } 假设当运行时,链接器已经为每个节(用 ADDR(s) 表示)和每个符号都选择了运行时地址 (用 ADDR(r.symbol) 表示)。 p462 通过调用某个驻留在存储器中称为加载器(loader)的操作系统代码来运行可执行文件。任 何 UNIX 程序都可以通过调用 execve 函数来调用加载器。 加载器将可执行目标文件中的代码和数据从磁盘拷贝到存储器中,然后通过跳转到程序的第 一条指令或入口段(entry point)来运行该程序。这个将程序拷贝到存储器并运行的过程 叫做加载(loading)。 p466 当加载器运行时,它创建一个存储器映像。在可执行文件中段头部表的指导下,加载器将可 执行文件的相关内容拷贝到代码和数据段。接下来,加载器跳转到程序的入口点,也就是符 号 _start 的地址。在 _start 地址处的启动代码(startup code)是在目标文件 ctl1.o 中定义的,对所有的 C 程序都是一样的。 p466 加载器实际上是如何工作的? UNIX 系统中的每个程序都运行在一个进程上下文中,有自己的虚拟地址空间。当外壳运行 一个程序时,父外壳生成一个子进程,它是父进程的一个复制品。子进程通过 execve 系统 调用启动加载器。 加载器删除子进程现有的虚拟存储器段,并创建一组新的代码、数据、堆和栈段。新的栈和 堆段被初始化为零。通过将虚拟地址空间中的页映射到可执行文件的页大小的片(chunk), 新的代码和数据段被初始化为可执行文件的内容。 最后,加载器跳转到 _start 地址,它最终会调用应用程序的 main 函数。 除了一些头部信息,在加载过程中没有任何从磁盘到存储器的数据拷贝。直到 CPU 引用一 个被映射的虚拟页才会进行拷贝,此时,操作系统利用它的页面调度机制自动将页面从磁盘 传送到存储器。 p467 静态库有一些明显的缺点。 静态库和所有的软件一样,需要定期维护和更新。如果应用程序员想要使用一个库的最新版 本,他们必须以某种方式了解到该库的更新情况,然后显式地将他们的程序与更新了的库重 新链接。 另一个问题是几乎每个 C 程序都使用标准的 I/O 函数,如 printf 和 scanf。在运行时, 这些函数的代码会被复制到每个运行进程的文本段中。在一个运行 50~100 个进程的典型系 统上,这将是对稀缺的存储器系统资源的极大浪费。 p467 共享库(shared library)是致力于解决静态库缺陷的一个现代创新产物。共享库是一个目 标模块,在运行时,可以加载到任意的存储器地址,并和一个在存储器中的程序链接起来。 这个过程称为动态链接(dynamic linking),是由一个佳作动态链接器(dynamic linker) 的程序来执行的。 共享库也称为共享目标(shared object),在 UNIX 系统中通常用 .so 后缀来表示。微软 的操作系统大量地利用了共享库,它们称为 DLL(动态链接库)。 p467 共享库是以两种不同的方式来“共享”的。 o 在任何给定的文件系统中,对于一个库只有一个 .so 文件 所有引用该库的可执行目标文件共享这个 .so 文件中的代码和数据,而不是像静态 库的内容那样被拷贝和嵌入到引用它们的可执行文件中 o 在存储器中,一个共享库的 .text 节的一个副本可以被不同的正在运行的进程共享 p468 unix> gcc -shared -fPIC -o libvector.so addvec.c multvec.c unix> gcc -o p2 main2.c ./libvector.so 这样就创建了一个可执行文件 p2,而此文件的形式使得它在运行时可以和 libvector.so 链接。基本的思路是当创建可执行文件时,静态执行一些链接,然后在程序加载时,动态完 成链接过程。 认识到这一点很重要:此时,没有任何 libvector.so 的代码和数据真的被拷贝到可执行文 件 p2 中。反之,链接器拷贝了一些重定位和符号表信息,它们使得运行时可以解析对 libvector.so 中代码和数据的引用。 当加载器加载和运行可执行文件 p2 时,加载部分链接的可执行文件 p2。接着,它注意到 p2 包含一个 .interp 节,这个节包含动态链接器的路径名,动态链接器本身就是一个共享 目标(比如,在 Linux 系统上的 ld-linux.so)。加载器不再像它通常那样将控制传递给 应用,而是加载和运行这个动态链接器。 动态链接器通过执行下面的重定位完成链接任务: o 重定位 libc.so 的文本和数据到某个存储器段 o 重定位 libvector.so 的文本和数据到另一个存储器段 o 重定位 p2 中所有对 libc.so 和 libvector.so 定义的符号的引用 最后,动态链接器将控制传递到应用程序。从这个时刻开始,共享库的位置就固定了,并且 在程序执行的过程中都不会改变。 p468 编译库代码,使得不需要链接器修改库代码就可以在任何地址加载和执行这些代码。这样的 代码叫做与位置无关的代码(Position-Independent Code, PIC)。用户对 GCC 使用 -fPIC 选项指示 GNU 编译系统生成 PIC 代码。 p471 编译器通过运用以下这个有趣的事实来生成对全局变量的 PIC 引用:无论我们在存储器中 的何处加载一个目标模块(包括共享目标模块),数据段总是被分配成紧随在代码段后面。 因此,代码段中任何指令和数据段中任何变量之间的距离都是一个运行时常量,与代码段和 数据段的绝对存储器位置是无关的。 为了运用这个事实,编译器在数据段开始的地方创建了一个表,叫做全局偏移量表(Global Offset Table, GOT)。在 GOT 中,每个被这个目标模块引用的全局数据对象都有一个条目。 编译器还为 GOT 中每个条目生成一个重定位记录。在加载时,动态链接器会重定位 GOT 中 的每个条目,使得它包含正确的绝对地址。每个引用全局数据的目标模块都有自己的 GOT。 p471 PIC 代码有性能缺陷。每个全局变量的引用需要额外的 4 条指令,对每个运行时过程调用 需要额外的 3 条指令。 p471~p472 延迟绑定(lazy binding),将过程地址的绑定推迟到第一次调用该过程时。 延迟绑定是通过两个数据结构之间简洁但又有些复杂的交互来实现的,这两个数据结构是: GOT 和 PLT(过程连接表,Procedure Linkage Table)。如果一个目标模块调用定义在共 享库中的任何函数,那么它就有自己的 GOT 和 PLT。 GOT 是 .data 节的一部分,PLT 是 .text 节的一部分。 p472 ================================================ FILE: notes/ch08.txt ================================================ 第 8 章 异常控制流 从给处理器加电开始,直到断电为止,程序计数器假设一个值的序列 a[0],a[1],...,a[n-1] 其中,每个 a[k] 是某个相应的指令 I[k] 的地址。每次从 a[k] 到 a[k+1] 的过渡称为控 制转移(control transfer)。这样的控制转移序列叫做处理器的控制流(flow of control 或 control flow)。 p480 现代系统通过使控制流发生突变来对系统状态做出反应。一般而言,我们把这些突变称为异 常控制流(Exceptional Control Flow, ECF)。异常控制流发生在计算机系统的各个层次。 p480 作为程序员,理解 ECF 很重要,这有很多原因: o 帮助你理解重要的系统概念。ECF 是操作系统用来实现 I/O、进程和虚拟存储器的基 本机制。在能够真正理解这些重要概念之前,你必须理解 ECF。 o 帮助你理解应用程序是如何与操作系统交互的。应用程序通过一个叫做陷阱(trap) 或者系统调用(system call)的 ECF 形式,向操作系统请求服务。比如,向磁盘写 数据、从网络读取数据、创建一个新进程,以及终止当前进程,都是通过应用程序调 用系统调用来实现的。理解基本的系统调用机制将帮助你理解是如何向应用出那个需 提供这些服务的。 o 帮助你编写有趣的新应用程序。操作系统为应用程序提供了强大的 ECF 机制,用来创 建进程、等待进程终止、通知其他进程系统中的异常事件,以及检测和响应这些事件。 如果你理解这些 ECF 机制,那么你就能用它们来编写诸如 UNIX 外壳和 Web 服务器 之类的有趣程序了。 o 帮助你理解并发。ECF 是计算机系统中实现并发的基本机制。中断应用程序、进程和 线程(它们的执行在时间上是重叠的)执行的异常处理程序和中断应用程序执行的信 号处理程序都是在运行中的并发的例子。理解 ECF 是理解并发的第一步。 o 帮助你理解软件异常如何工作。像 C++ 和 Java 这样的语言通过 try、catch 以及 throw 语句来提供软件异常机制。软件异常允许程序进行非本地跳转(违反通常的调 用/返回栈规则的跳转)来响应错误情况。非本地跳转是一种应用层 ECF,在 C 中是 通过setjmp() 和 longjmp() 函数提供的。理解这些低级函数将帮助你理解高级软件 异常如何得以实现。 p480 异常是异常控制流的一种形式,它一部分是由硬件实现的,一部分是由操作系统实现的。 p481 在任何情况下,当处理器检测到有事件发生时,它就会通过一张叫做异常表(exception table)的跳转表,进行一个间接过程调用(异常),到一个专门设计用来处理这类事件的 操作系统子程序(异常处理程序,exception handler)。 当异常处理程序完成处理后,根据引起异常的事件的类型,会发生以下情况中的一种: o 处理程序将控制返回给当前指令 I[curr],即当事件发生时正在执行的指令 o 处理程序将控制返回给 I[next],即如果没有发生异常将会执行的下一条指令 o 处理程序终止被中断的程序 p481 系统中可能的每种类型的异常都分配了一个唯一的非负整数的异常号(exception number)。 其中一些号码是由处理器的设计者分配的,其他号码是由操作系统内核(操作系统常驻存储 器部分)的设计者分配的。前者的示例包括被零除、缺页、存储器访问违例、断点以及算术 溢出。后者的示例包括系统调用和来自外部 I/O 设备的信号。 p481~p482 在系统启动时(当计算机重启或者加电时),操作系统分配和初始化一张称为异常表的跳转 表,使得条目 k 包含异常 k 的处理程序的地址。 在运行时(当系统在执行某个程序时),处理器检测到发生了一个事件,并且确定了相应的 异常号 k。随后,处理器触发异常,方法是执行间接过程调用,通过异常表的条目 k 转到 相应的处理程序。异常号是到异常表的索引,异常表的起始地址放在一个叫做异常表基址寄 存器(exception table base register)的特殊 CPU 寄存器里。 p482 异常类似与过程调用,但是有一些重要的不同之处: o 过程调用时,在跳转到处理程序之前,处理器将返回地址压入栈中。然而,根据异常 的类型,返回地址要么是当前指令(当事件发生时正在执行的指令),要么是下一条 指令(如果事件不发生,将会在当前指令后执行的指令) o 处理器也把一些额外的处理器状态压到栈里,在处理程序返回时,重新开始被中断的 程序会需要这些状态。比如,一个 IA32 系统将包含当前条件吗和其他内容的 EFLAGS寄存器压入栈中。 o 如果控制从一个用户程序转移到内核,那么所有这些项目都被压到内核栈中,而不是 压到用户栈中。 o 异常处理程序运行在内核模式下,这意味着它们对所有的系统资源都有完全的访问权限 p482 一旦硬件触发了异常,剩下的工作就是由异常处理程序在软件中完成。在处理程序处理完事 件之后,它通过执行一条特殊的“从中断返回”指令,可选地返回到被中断的程序,该指令 将适当的状态弹回到处理器的控制和数据寄存器中,如果异常中断的是一个用户程序,就将 状态恢复为用户模式,然后将控制返回给被中断的程序。 p482 异常可以分为四类:中断(interrupt)、陷阱(trap)、故障(fault)和终止(abort)。 o 中断 中断是异步发生的,是来自处理器外部的 I/O 设备的信号的结果 o 陷阱 陷阱是有意的异常,是执行一条指令的结果。陷阱最重要的用途是在用户程序和 内核之间提供一个像过程一样的接口,叫做系统调用。 o 故障 故障是由错误情况引起的,他可能能够被故障处理程序修正。一个经典的故障 示例是缺页异常。 o 终止 终止是不可恢复的致命错误造成的结果,通常是一些硬件错误,比如 DRAM 或者 SRAM 位被损坏时发生的奇偶错误。终止处理程序从不将控制返回给应用程序。 p482~p484 各种异常类型的术语是根据系统的不同而有所不同的。处理器宏体系结构 (macroarchitecture)规范通常会区分异步的“中断”和同步的“异常”,但是并没有提 供描述这些非常相似的概念的概括性的属于。为了避免不断地提供“异常和中断”以及“异 常或者中断”,我们用“异常”作为通用的术语,而且只有在必要时才区别异步异常(中断) 和同步异常(陷阱、故障和终止)。正如我们提到过的,对于每个系统而言,基本的概念都 是相同的,但是你应该意识到一些制造厂商的手册会用“异常”仅仅表示同步事件引起的控 制流的改变。 p486 进程的经典定义就是:一个执行中的程序的实例。系统中的每个程序都是运行在某个进程的 上下文(context)中的。上下文是由程序正确运行所需的状态组成的。这个状态包括存放在 存储器中的程序的代码和数据,它的栈、通用目的寄存器的内容、程序计数器、环境变量以 及打开文件描述符的集合。 p487 进程提供给应用程序的关键抽象: o 一个独立的逻辑控制流,它提供一种假象,好像我们的程序独占地使用处理器 o 一个私有的地址空间,它提供一个假象,好像我们的程序独占地使用存储器系统 p487 操作系统使用一种称为上下文切换(context switch)的较高层形式的异常控制流来实现多 任务。上下文切换机制是建立在较低层异常机制之上的。 内核为每个进程维持一个上下文。上下文就是内核重新启动一个被抢占的进程所需的状态。 它由一些对象的值组成,这些对象包括通用目的寄存器、浮点寄存器、程序计数器、用户栈、 状态寄存器、内核栈和各种内核数据结构,比如描绘地址空间的页表、包含有关当前进程信 息的进程表,以及包含进程已打开文件的信息的文件表。 在进程执行的某些时刻,内核可以决定抢占当前进程,并重新开始一个先前被抢占的进程。 这种决定就叫做调度(schedule),是由内核中称为调度器(scheduler)的代码处理的。 当内核选择一个新的进程运行时,我们就说内核调度了这个进程。在内核调度了一个新的进 程运行后,它就抢占当前进程,并使用一种称为上下文切换的机制来将控制转移到新的进程。 上下文切换: o 保存当前进程的上下文 o 恢复某个先前被抢占的进程被保存的上下文 o 将控制传递给这个新恢复的进程 p489~p490 高速缓存污染(pollution)和异常控制流 一般而言,硬件高速缓存存储器不能和诸如中断和上下文切换这样的异常控制流很好地交互。 如果当前进程被一个中断暂时中断,那么对于中断处理程序来说高速缓存是冷的(cold) (译者注:“高速缓存是冷的”意思是程序所需要的数据都不在高速缓存中)。如果处理程 序从主存中访问了足够多的表项,那么当被中断的进程继续时,高速缓存对它来说也是冷的 了。在这种情况下,我们就说中断处理程序污染(pollute)了高速缓存。使用上下文切换 也会发生类似的现象。当一个进程在上下文切换后继续执行时,高速缓存对于应用程序而言 也是冷的,必须再次热身。 p490 一个信号就是一条小消息,它通知进程系统中发生了一个某种类型的事件。每种信号类型都 对应于某种系统事件。 p504~p505 传送一个信号到达目的进程是由两个不同步骤组成的: o 发送信号。内核通过更新目的进程上下文中的某个状态,发送(递送)一个信号给目 的进程。发送信号可以有如下原因:1)内核检测到一个系统事件,比如被零除错误或 者子进程终止。2)一个进程调用了 kill 函数,显示要求内核发送一个信号给目的进 程。一个进程可以发送信号给它自己。 o 接收信号。当目的进程被内核强迫以某种方式对信号的发送做出反应时,目的进程就 接收了信号。进程可以忽略这个信号,终止或者通过执行一个称为信号处理程序 (signal handler)的用户层函数捕获这个信号。 一个只发出而没有被接收的信号叫做待处理信号(pending signal)。在任何时刻,一种类 型至多只会有一个类型为 k 的待处理信号,那么任何接下来发送到这个进程的类型为 k 的 信号都不会排队等待,它们只是被简单地丢弃。 一个进程可以有选择性地阻塞接收某种信号。当一种信号被阻塞时,它仍可以被发送,但是 产生的待处理信号不会被接收,直到进程取消对这种信号的阻塞。 一个待处理信号最多只能被接收一次。内核为每个进程在 pending 位向量中维护着待处理 信号的集合,而在 blocked 位向量中维护着被阻塞的信号集合。只要传送了一个类型为 k 的信号,内核就会被设置 pending 中的第 k 位,而只要接收了一个类型为 k 的信号,内 核就会清除 pending 中的第 k 位。 p505~p506 C 语言提供了一种用户级异常控制流形式,称为非本地跳转(nonlocal jump),它将控制 直接从一个函数转移到另一个当前正在执行的函数,而不需要经过正常的调用-返回序列。 非本地跳转是通过 setjmp() 和 longjmp() 函数来提供的。 非本地跳转的一个重要应用就是允许从一个深层嵌套的函数调用中立即返回,通常是由检测 到某个错误情况引起的。如果在一个深层嵌套的函数调用中发现了一个错误,我们可以使用 非本地跳转直接返回到一个普通的本地化的错误处理程序,而不是费力地解开调用栈。 非本地跳转的另一个重要应用是使一个信号处理程序分支到一个特殊的代码位置,而不是返 回到被信号到达中断了的指令的位置。 p521~p522 Linux 系统提供了大量的监控和操作进程的有用工具: o strace 打印一个正在运行的程序和它的子进程调用的每个系统调用的轨迹。对于好奇的学生 而言,这是一个令人着迷的工具。用 -static 编译你的程序,能得到一个更干净的、 不带有大量与共享库相关的输出的轨迹。 o ps 列出当前系统中的进程(包括僵死进程) o top 打印出关于当前进程资源使用的信息 o pmap 显示进程的存储器映射 p524 ================================================ FILE: notes/ch09.txt ================================================ 第 9 章 虚拟存储器 虚拟存储器是硬件异常、硬件地址翻译、主存、磁盘文件和内核软件的完美交互,它为每个 进程提供了一个大的、一致的和私有的地址空间。 通过一个很清晰的机制,虚拟存储器提供了三个重要的能力: o 它将主存看成是一个存储在磁盘上的地址空间的高速缓存,在主存中只保存活动区域, 并根据需要在磁盘和主存之间来回传送数据,通过这种方式,它高效地使用了主存 o 它为每个进程提供了一致的地址空间,从而简化了存储器管理 o 它保护了每个进程的地址空间不被其他进程破坏 p534 程序员需要学习虚拟存储器的原因: o 虚拟存储器是中心的。虚拟存储器遍及计算机系统的所有层面,在硬件异常、汇编器、 链接器、加载器、共享对象、文件和进程的设计中扮演着重要角色。理解虚拟存储器 将帮助你更好地理解系统通常是如何工作的。 o 虚拟存储器是强大的。虚拟存储器给予应用程序强大的能力,可以创建和销毁存储器 片(chunk)、将存储器片映射到磁盘文件的某个部分,以及与其他进程共享存储器。 理解虚拟存储器将帮助你利用它的强大功能在你的应用程序中添加动力。 o 虚拟存储器是危险的。每次应用程序引用一个变量、间接引用一个指针,或者调用一 个诸如 malloc 这样的动态分配程序时,它就会和虚拟存储器发生交互。如果虚拟存 储器使用不当,应用将遇到复杂危险的与存储器有关的错误。理解虚拟存储器以及诸 如malloc 之类的管理虚拟存储器的分配程序,可以帮助你避免这些错误。 p534 使用虚拟寻址(virtual addressing)时,CPU 通过生成一个虚拟地址(Virtual Address, VA)来访问主存,这个虚拟地址在被送到存储器之前先转换成适当的物理地址。将一个虚拟 地址转换为物理地址的任务叫做地址翻译(address translation)。就像异常处理一样, 地址翻译需要 CPU 硬件和操作系统之间的紧密合作。 CPU 芯片上叫做存储器管理单元(Memory Management Unit, MMU)的专用硬件,利用存放 在主存中的查询表来动态翻译虚拟地址,该表的内容是操作系统管理的。 p535 地址空间的概念是很重要的,因为它清楚地区分了数据对象(字节)和它们的属性(地址)。 一旦认识到这种区别,那么我们就可以将其推广,允许每个数据对象有多个独立的地址,其 中每个地址都选自一个不同的地址空间。这就是虚拟存储器的基本思想。主存中的每个字节 都有一个选自虚拟地址空间的虚拟地址和一个选自物理地址空间的物理地址。 p536 概念上而言,虚拟存储器(VM)被组织为一个由存放在磁盘上的 N 个连续的字节大小的单 元组成的数组。 在任意时刻,虚拟页面的集合都分为三个不相交的子集: o 未分配的: VM 系统还未分配(或者创建)的页。未分配的块没有任何数据和它们相 关联,因此也就不占用任何磁盘空间。 o 缓存的:当前缓存在物理存储器中的已分配页。 o 未缓存的:没有缓存在物理存储器中的已分配页。 p536 实际上,操作系统为每个进程提供了一个独立的页表,因而也就是一个独立的虚拟地址空间。 p540 注意,多个虚拟页面可以映射到同一个共享物理页面上。 p540 按需页面调度和独立的虚拟地址空间的结合,对系统中存储器的使用和管理造成了深远的影 响。特别地,VM 简化了链接和加载、代码和数据共享,以及应用程序的存储器分配。 p540 将一组连续的虚拟页映射到任意一个文件中的任意位置的表示法称做存储器映射(memory mapping)。UNIX 提供一个称为 mmap 的系统调用,允许应用程序自己做存储器映射。 541 操作系统通过将不同进程中适当的虚拟页面映射到相同的物理页面,从而安排多个进程共享 这部分代码的一个拷贝,而不是在每个进程中都包括单独的内核和 C 标准库的拷贝。 p541 地址翻译机制可以以一种自然地方式扩展到提供更好的访问控制。因为每次 CPU 生成一个 地址时,地址翻译硬件都会读一个页表条目(Page Table Entry,PTE),所以通过在 PTE 上添加一些额外的许可位来控制对一个虚拟页面内容的访问十分简单。 p541 ================================================ FILE: notes/ch11.txt ================================================ 第 11 章 网络编程 所有的网络应用都是基于相同的基本编程模型,有着相似的整体逻辑结构,并且依赖相同的 编程接口。 p614 认识到客户端和服务器是进程,而不是常常提到的机器或者主机,这是很重要的。 一台主机可以同时运行许多不同的客户端和服务器,而且一个客户端和服务器的事务可以在 同一台或是不同的主机上运行。 无论客户端和服务器是怎样映射到主机上的,客户端-服务器模型是相同的。 p614 客户端-服务器事务不是数据库事务,没有数据库事务的任何特性,例如原子性。在我们上 下文中,事务仅仅是客户端和服务器执行的一系列步骤。 p614 对于一个主机而言,网络只是又一种 I/O 设备,作为数据源和数据接收方。 p615 集线器不加分辨地将从一个端口上收到的每个位复制到其他所有的端口上。因此,每台主机 都能看到每个位。 p615 每个以太网适配器都有一个全球唯一的48位地址,它存储在这个适配器的非易失性存储器上。 p615 一台主机可以发送一段位,称为帧(frame),到这个网段内其他任何主机。每个帧包括一些 固定数量的头部(header)位,用来标识此帧的源和目的地址以及此帧的长度,此后紧随的 就是数据位的有效载荷。每个主机适配器都能看到这个帧,但是只有目的主机实际读取它。 p615 在层次的更高级别中,多个不兼容的局域网可以通过叫做路由器(router)的特殊计算机连 接起来,组成一个互联网络(internet)。 我们总是用小写字母的 internet 描述一般概念,而用大写字母的 Internet 来描述一种具 体的实现,也就是所谓的全球 IP 因特网。 p616 每台路由器对于它所连接到的每个网络都有一个适配器(端口)。路由器也能连接高速点到 点电话连接,这是称为 WAN(Wide-Area Network,广域网)。 p616 互联网络至关重要的特性是,它能由采用完全不同和不兼容技术的各种局域网和广域网组成。 p617 ================================================ FILE: sample/.gitignore ================================================ ================================================ FILE: sample/ch02/.gitignore ================================================ ================================================ FILE: sample/ch02/show-bytes.c ================================================ /* * p28 -- code/data/show-bytes.c * * 打印程序对象的字节表示。这段代码使用强制类型转换来规避类型系统。 */ #include #include #include typedef unsigned char *byte_pointer; void show_bytes(byte_pointer start, int len) { int i; for (i = 0; i < len; i++) printf(" %.2x", start[i]); printf("\n"); } void show_int(int x) { show_bytes((byte_pointer) &x, sizeof(int)); } void show_float(float x) { show_bytes((byte_pointer) &x, sizeof(float)); } void show_pointer(void *x) { show_bytes((byte_pointer) &x, sizeof(void *)); } ================================================ FILE: sample/ch03/buf-overflow.c ================================================ #include /* Sample implementation of library function gets() */ char *our_gets(char *s) { int c; char *dest = s; int gotchar = 0; /* Has at least one character been read? */ while ((c = getchar()) != '\n' && c != EOF) { *dest++ = c; /* No bounds checking! */ gotchar = 1; } *dest++ = '\0'; /* Terminate string */ if (c == EOF && !gotchar) return NULL; /* End of file or error */ return s; } /* Read input line and write it back */ void echo() { char buf[8]; /* Way too small */ our_gets(buf); puts(buf); } ================================================ FILE: sample/ch03/code.c ================================================ /* * p107 * * 使用 GCC 产生汇编代码: * unix> gcc -O1 -S code.c * * 使用 GCC 产生目标代码文件: * unix> gcc -O1 -c code.c */ int accum = 0; int sum(int x, int y) { int t = x + y; accum += t; return t; } ================================================ FILE: sample/ch03/main.c ================================================ /* * p109 * * unix> gcc -O1 -o prog code.o main.c */ int main() { return sum(1, 3); } ================================================ FILE: sample/ch03/simple.c ================================================ /* * p109 * * unix> gcc -S -O1 */ int simple(int *xp, int y) { int t = *xp + y; *xp = t; return t; } ================================================ FILE: sample/ch03/simple_1.c ================================================ /* * unix> gcc -O1 -S -m32 simple_1.c -o code32.s * * 或者 * * unix> gcc -O1 -S -m64 simple_1.c -o code64.s */ long int simple_1(long int *xp, long int y) { long int t = *xp + t; *xp = t; return t; } ================================================ FILE: sample/ch07/.gitignore ================================================ ================================================ FILE: sample/ch07/addvec.c ================================================ /* * 这个文件和 multvec.c 用于制作静态库 libvector.a * * unix> gcc -c addvec.c multvec.c * unix> ar rcs libvector.a addvec.o multvec.o */ void addvec(int *x, int *y, int *z, int n) { int i; for (i = 0; i < n; i++) z[i] = x[i] + y[i]; } ================================================ FILE: sample/ch07/bar1.c ================================================ /* * bar1.c -- p455 */ int main() { return 0; } ================================================ FILE: sample/ch07/bar2.c ================================================ /* * bar2.c -- p455 */ int x = 15213; void f() { } ================================================ FILE: sample/ch07/bar3.c ================================================ /* * bar3.c -- p455 */ int x; void f() { x = 15212; } ================================================ FILE: sample/ch07/bar4.c ================================================ /* * bar4.c -- p456 */ int x; void f() { x = 15212; } ================================================ FILE: sample/ch07/bar5.c ================================================ /* * bar5.c -- p456 */ double x; void f() { x = -0.0; } ================================================ FILE: sample/ch07/dll.c ================================================ /* * 演示动态加载和链接共享库 * * unix> gcc -shared -fPIC -o libvector.so addvec.c multvec.c * unix> gcc -rdynamic -O2 -o p3 dll.c -ldl */ #include #include #include int x[2] = {1, 2}; int y[2] = {3, 4}; int z[2]; int main() { void *handle; void (*addvec)(int *, int *, int *, int); char *error; /* Dynamically load shared library that contains addvec() */ handle = dlopen("./libvector.so", RTLD_LAZY); if (!handle) { fprintf(stderr, "%s\n", dlerror()); exit(1); } /* Get a pointer to the addvec() function we just loaded */ addvec = dlsym(handle, "addvec"); if ((error = dlerror()) != NULL) { fprintf(stderr, "%s\n", error); exit(1); } /* Now we can call addvec() just like any other function */ addvec(x, y, z, 2); printf("z = [%d %d]\n", z[0], z[1]); /* Unload the shared library */ if (dlclose(handle) < 0) { fprintf(stderr, "%s\n", dlerror()); exit(1); } return 0; } ================================================ FILE: sample/ch07/foo1.c ================================================ /* * foo1.c -- p455 */ int main() { return 0; } ================================================ FILE: sample/ch07/foo2.c ================================================ /* * foo2.c -- p455 */ int x = 15213; int main() { return 0; } ================================================ FILE: sample/ch07/foo3.c ================================================ /* * foo3.c -- p455~p456 */ #include void f(void); int x = 15213; int main() { f(); printf("x = %d\n", x); return 0; } ================================================ FILE: sample/ch07/foo4.c ================================================ /* * foo4.c -- p456 */ #include void f(void); int x; int main() { x = 15213; f(); printf("x = %d\n", x); return 0; } ================================================ FILE: sample/ch07/foo5.c ================================================ /* * foo5.c -- p456 */ #include void f(void); /* * 在一台 IA32/Linux 机器上,double 类型是 8 个字节,而 int 类型是 4 个字节。因此, * bar5.c 的第 6 行中的赋值 x=-0.0 将用负零的双精度浮点表示覆盖存储器中 x 和 y 的位置! * * 当我们怀疑有此类错误的时候,使用像 GCC 的 -fno-common 这样的选项调用链接器,这 * 个选项会告诉链接器,在遇到多重定义的全局符号时,输出一条警告信息 */ int x = 15213; int y = 15212; int main() { f(); printf("x = 0x%x, y = 0x%x\n", x, y); return 0; } ================================================ FILE: sample/ch07/linkerror.c ================================================ /* * p454 */ void foo(void); int main(void) { foo(); return 0; } ================================================ FILE: sample/ch07/main.c ================================================ /* * p449 */ void swap(); int buf[2] = {1, 2}; int main() { swap(); return 0; } ================================================ FILE: sample/ch07/main2.c ================================================ /* * 这个文件用于演示静态库的使用 * * 在使用之前,我们需要生成 libvector.a * * unix> gcc -O2 -c main2.c * unix> gcc -static -o p2 main2.o ./libvector.a */ #include #include "vector.h" int x[2] = {1, 2}; int y[2] = {3, 4}; int z[2]; int main() { addvec(x, y, z, 2); printf("z = [%d %d]\n", z[0], z[1]); return 0; } ================================================ FILE: sample/ch07/multvec.c ================================================ /* * 这个文件用于制作 libvector.a 静态库,具体方法见 addvec.c */ void multvec(int *x, int *y, int *z, int n) { int i; for (i = 0; i < n; i++) z[i] = x[i] * y[i]; } ================================================ FILE: sample/ch07/swap.c ================================================ /* * p449 */ extern int buf[]; int *bufp0 = &buf[0]; int *bufp1; void swap() { int temp; bufp1 = &buf[1]; temp = *bufp0; *bufp0 = *bufp1; *bufp1 = temp; } ================================================ FILE: sample/ch07/vector.h ================================================ extern void multvec(int *x, int *y, int *z, int n); extern void addvec(int *x, int *y, int *z, int n); ================================================ FILE: sample/ch08/.gitignore ================================================ ================================================ FILE: sample/ch08/alarm.c ================================================ /* * p509 -- code/ecf/alarm.c * * 使用 alarm() 函数来调度周期性事件 * * 这个程序安排自己被 SIGALRM 信号在 5 秒内每秒中断一次。当传送第 6 个 SIGALRM 信 * 号时,它就终止。 * * 注意:这里设置了一个信号处理函数,只要进程收到一个 SIGALRM 信号,就异步地调用 * 该函数,中断 main 程序中的无限 while 循环 * * $ gcc -I../../common alarm.c ../../common/csapp.c -lpthread */ #include "csapp.h" void handler(int sig) { static int beeps = 0; printf("BEEP\n"); if (++beeps < 5) Alarm(1); /* Next SIGALRM will be delivered in 1 second */ else { printf("BOMP!\n"); exit(0); } } int main() { Signal(SIGALRM, handler); /* Install SIGALRM handler */ Alarm(1); /* Next SIGALRM will be delivered in 1s */ while (1) { ; /* Signal handler returns control here each time */ } exit(0); } ================================================ FILE: sample/ch08/fork.c ================================================ /* * 8.2 * * A. 子进程的输出是什么? * B. 父进程的输出是什么? */ #include "csapp.h" int main() { pid_t pid; int x = 1; pid = Fork(); if (pid == 0) { /* child */ printf("child: x=%d\n", ++x); exit(0); } /* parent */ printf("parent: x=%d\n", --x); exit(0); } ================================================ FILE: sample/ch08/hello-asm.sa ================================================ ;; p486 ;; 下面的 C 代码等价于下面的汇编代码 ;; int main() ;; { ;; write(1, "hello world\n", 13); ;; exit(0); ;; } .section .data string: .ascii "hello world\n" string_end: .equ len, string_end - string .section .text .global main main: ;; First, call write(1, "hello world\n", 13) movl $4, %eax ; System call number 4 movl $1, %ebx ; stdout has descriptor 1 movl $string, %ecx ; hello world string movl $len, %edx ; String length int $0x80 ; System call code ;; Next, call exit(0) movl $1, %eax ; System call number movl $0, %ebx ; Argument is 0 int $0x80 ; System call code ================================================ FILE: sample/ch08/kill.c ================================================ /* * p508 -- code/ecf/kill.c * * 父进程用 kill() 发送 SIGKILL 信号给它的子进程 * * $ gcc -I../../common kill.c ../../common/csapp.c -lpthread */ #include "csapp.h" int main() { pid_t pid; /* Child sleeps until SIGKILL signal received, then dies */ if ((pid = Fork()) == 0) { Pause(); /* Wait for a signal to arrive */ printf("control should never reach here!\n"); exit(0); } /* Parent sends a SIGKILL signal to a child */ Kill(pid, SIGKILL); exit(0); } ================================================ FILE: sample/ch08/procmask1.c ================================================ /* * p519 -- code/ecf/procmask1.c * * 一个具有细微同步错误的外壳程序。如果子进程在父进程能够开始运行前就结束了,那么 * addjob() 和 deletejob() 会以错误的方式被调用 * * 这个程序希望父进程在一个作业列表中记录着它的当前子进程,每个作业条目。 * addjob() 和 deletejob() 分别想这个作业列表添加和从中删除作业。 * * 当父进程创建一个新的子进程时,它就把这个子进程添加到作业列表中。当父进程在 * SIGCHLD 处理程序中回收一个终止的(僵死)子进程时,它就从作业列表中删除这个子进 * 程。乍一看,这段代码是对的。不幸的是,可能发生下面的情况: * * 1. 父进程执行 fork(),内核调度新创建的子进程运行,而不是父进程 * * 2. 在父进程能够再次运行之前,子进程就终止,并且变成一个僵死进程,使得内核传递 * 一个 SIGCHLD 信号给父进程 * * 3. 后来,当父进程再次变成可运行但又在它执行之前,内核注意到待处理的 SIGCHLD 信 * 号,并通过在父进程中运行处理程序接收这个信号 * * 4. 处理程序回收终止的子进程,并调用 deletejob(),这个函数什么都不做,因为父进 * 程还没有把该子进程添加到列表中 * * 5. 在处理程序运行结束后,内核运行父进程,父进程从 fork() 返回,通过调用 * addjob() 错误地把(不存在的)子进程添加到作业列表中 * * 因此,对于父进程的 main 函数流和信号处理流的某些交错,可能会在 addjob() 之前调 * 用 deletejob()。这导致作业列表中出现一个不正确的条目,对应于一个不再存在而且永 * 远不会被删除的作业。另一方面,也有一些交错,事件按照正确的顺序发生。例如,如果 * 在 fork() 调用返回时,内核刚好调度父进程而不是子进程运行,那么父进程就会正确地 * 把子进程添加到作业列表中,然后子进程终止,信号处理函数把该作业从列表中删除。 * * 这是一个称为竞争(race)的经典同步错误的示例。在这种情况下,main() 中调用 * addjob() 和处理程序中调用 deletejob() 之间存在竞争。如果 addjob() 赢得进展,那 * 么结果就是正确的。如果它没有,那么结果就是错误的。这样的错误非常难以调试,因为 * 几乎不可能测试所有的交错。你可能运行这段代码十亿次,也没有一次错误,但是下一次 * 测试却导致引发竞争的交错。 */ void handler(int sig) { pid_t pid; while ((pid = waitpid(-1, NULL, 0)) > 0) /* Reap a zombie child */ deletejob(pid); /* Delete the child from the job list */ if (errno != ECHILD) unix_error("waitpid error"); } int main(int argc, char **argv) { int pid; Signal(SIGCHLD, handler); initjobs(); /* Initialize the job list */ while (1) { /* Child process */ if ((pid = Fork()) == 0) { Execve("/bin/date", argv, NULL); } /* Parent process */ addjob(pid); /* Add the child to the job list */ } exit(0); } ================================================ FILE: sample/ch08/procmask2.c ================================================ /* * p520 -- code/ecf/procmask2.c * * 这个程序消除了 procmask1.c 中的竞争,用 sigprocmask() 来同步进程。 * * 通过在调用 fork() 之前,阻塞 SIGCHLD 信号,然后在我们调用了 addjob() 之后就取 * 消阻塞这些信号,我们保证了在子进程被添加到作业列表之后回收该子进程。 * * 注意,子进程继承了它们父进程的被阻塞集合,所以我们必须在调用 execve() 之前,小 * 心地解除子进程中阻塞的 SIGCHLD 信号。 * * 这样,父进程保证在相应的 deletejob() 之前执行 addjob()。 */ #include "csapp.h" void handler(int sig) { pid_t pid; while ((pid = waitpid(-1, NULL, 0)) > 0) /* Reap a zombie child */ deletejob(pid); /* Delete the child from the job list */ if (errno != ECHILD) unix_error("waitpid error"); } int main(int argc, char **argv) { int pid; sigset_t mask; Signal(SIGCHLD, handler); initjobs(); /* Initialize the job list */ while (1) { Sigemptyset(&mask); Sigaddset(&mask, SIGCHLD); Sigprocmask(SIG_BLOCK, &mask, NULL); /* Block SIGCHLD */ /* Child process */ if ((pid = Fork()) == 0) { Sigprocmask(SIG_UNBLOCK, &mask, NULL); /* Unblock SIGCHLD */ Execve("/bin/date", argv, NULL); } /* Parent process */ addjob(pid); /* Add the child to the job list */ Sigprocmask(SIG_UNBLOCK, &mask, NULL); /* Unblock SIGCHLD */ } exit(0); } ================================================ FILE: sample/ch08/restart.c ================================================ /* * p523 -- code/ecf/restart.c * * 非本地跳转的另一个重要应用是使一个信号处理程序分支到一个特殊的代码位置,而不是 * 返回到被信号到达中断了的指令的位置。 * * 这个示例程序,在一个用户键入 ctrl-c 时,使用非本地跳转来重启到它自身 * * 在程序第一次启动时,对 sigsetjmp() 的初始调用保存调用环境和信号的上下文(包括 * 待处理的和被阻塞的信号向量)。随后,主函数进入一个无限处理循环。当用户键入 * ctrl-c 时,外壳发送一个 SIGINT 信号给这个进程,该进程捕获这个信号。不是从信号 * 处理程序返回,如果是这样信号处理程序会将控制返回给被中断的处理循环,反之,处理 * 程序执行一个非本地跳转,回到主函数的开始处。 * * unix> gcc -I../../common restart.c ../../common/csapp.c -lpthread */ #include "csapp.h" sigjmp_buf buf; void handler(int sig) { siglongjmp(buf, 1); } int main() { Signal(SIGINT, handler); if (!sigsetjmp(buf, 1)) printf("starting\n"); else printf("restarting\n"); while (1) { Sleep(1); printf("processing...\n"); } exit(0); } ================================================ FILE: sample/ch08/rfork.c ================================================ /* * p521 -- code/ecf/rfork.c * * 下面的代码包含了一个暴露竞争的简便技巧 * * 像 procmask2.c 那样的竞争难以发现,因为它们依赖于内核相关的调度决策。在一次 * fork() 调用之后,有些内核调度子进程先运行,而有些内核调度父进程先运行。如果你 * 要在后一种系统上运行 procmask1.c 的代码,它绝不会失败,无论你测试多少遍。但是 * 一旦在前一种系统上运行这段代码,那么竞争就会暴露出来,代码会失败。 * * 下面的代码是一个 fork() 的包装函数,它随机地决定父进程和子进程执行的顺序。父进 * 程和子进程扔一枚硬币来决定谁会休眠,因而给另一个进程被调度的机会。 * * 如果我们运行这个代码多次,那么我们就有极高的概率会测试到父子进程执行的两种顺序, * 无论这个特定内核的调度策略是什么样子的。 */ #include #include #include #include #include /* Sleep for a random period between [0, MAX_SLEEP] us. */ #define MAX_SLEEP 100000 /* Macro that maps val into the range [0, RAND_MAX] */ #define CONVERT(val) (((double)val)/(double)RAND_MAX) pid_t Fork(void) { static struct timeval time; unsigned bool, secs; pid_t pid; /* Generate a different seed each time the function is called */ gettimeofday(&time, NULL); srand(time.tv_usec); /* Determine whether to sleep in parent of child and for how long */ bool = (unsigned)(CONVERT(rand()) + 0.5); secs = (unsigned)(CONVERT(rand()) * MAX_SLEEP); /* Call the real fork function */ if ((pid = fork()) < 0) return pid; /* Randomly decide to sleep in the parent or the child */ if (pid == 0) { /* Child */ if (bool) { usleep(secs); } } else { /* Parent */ if (!bool) { usleep(secs); } } /* Return the PID like a normal fork call */ return pid; } ================================================ FILE: sample/ch08/setjmp.c ================================================ /* * p523 -- code/ecf/setjmp.c * * 非本地跳转的一个重要应用就是允许从一个深层嵌套的函数调用中立即返回,通常是由检 * 测到某个错误情况引起的。如果在一个深层嵌套的函数调用中发现了一个错误,我们可以 * 使用非本地跳转直接返回到一个普通的本地化的错误处理程序,而不是费力地解开调用栈 * * 这个示例程序说明了非本地跳转将是如何工作的。main() 首先调用 setjmp() 以保存当 * 前的调用环境,然后调用 foo(),foo() 一次调用 bar()。如果 foo() 或 bar() 遇到一 * 个错误,它们立即通过一次 longjmp() 调用从 setjmp() 返回。setjmp() 的非零返回值 * 指明了错误类型,随后可以被解码,且在代码中的某个位置进行处理。 * * unix> gcc -I../../common setjmp.c ../../common/csapp.c -lpthread */ #include "csapp.h" jmp_buf buf; int error1 = 0; int error2 = 1; void foo(void), bar(void); int main() { int rc; rc = setjmp(buf); if (rc == 0) foo(); else if (rc == 1) printf("Detected an error1 condition in foo\n"); else if (rc == 2) printf("Detected an error2 condition in foo\n"); else printf("Unknown error condition in foo\n"); exit(0); } /* Deeply nested function foo */ void foo(void) { if (error1) longjmp(buf, 1); bar(); } void bar(void) { if (error2) longjmp(buf, 2); } ================================================ FILE: sample/ch08/shellex.c ================================================ /* * p502~p504 -- code/ecf/shellex.c * * unix> gcc -I../../common shellex.c ../../common/csapp.c -lpthread */ #include "csapp.h" #define MAXARGS 128 /* Function prototypes */ void eval(char *cmdline); int parseline(char *buf, char **argv); int builtin_command(char **argv); int main() { char cmdline[MAXLINE]; /* Command line */ while (1) { /* Read */ printf("> "); Fgets(cmdline, MAXLINE, stdin); if (feof(stdin)) exit(0); /* Evaluate */ eval(cmdline); } } /* eval - Evaluate a command line */ void eval(char *cmdline) { char *argv[MAXARGS]; /* Argument list execve() */ char buf[MAXLINE]; /* Holds modified command line */ int bg; /* Should the job run in bg or fg? */ pid_t pid; /* Process id */ strcpy(buf, cmdline); bg = parseline(buf, argv); if (argv[0] == NULL) return; /* Ignore empty lines */ if (!builtin_command(argv)) { if ((pid = Fork()) == 0) { /* Child runs user job */ if (execve(argv[0], argv, environ) < 0) { printf("%s: Command not found.\n", argv[0]); exit(0); } } /* Parent waits for foreground job to terminate */ if (!bg) { int status; if (waitpid(pid, &status, 0) < 0) unix_error("waitfg: waitpid error"); } else { printf("%d %s", pid, cmdline); } } return; } /* If first arg is a builtin command, run it and return true */ int builtin_command(char **argv) { if (!strcmp(argv[0], "quit")) /* quit command */ exit(0); if (!strcmp(argv[0], "&")) /* Ignore singleton & */ return 1; return 0; /* Not a builtin command */ } /* parseline - Parse the command line and build the argv array */ int parseline(char *buf, char **argv) { char *delim; /* Points to first space delimiter */ int argc; /* Number of args */ int bg; /* Background job? */ buf[strlen(buf)-1] = ' '; /* Replace trailing '\n' with space */ while (*buf && (*buf == ' ')) /* Ignore leading spaces */ buf++; /* Build the argv list */ argc = 0; while ((delim = strchr(buf, ' '))) { argv[argc++] = buf; *delim = '\0'; buf = delim + 1; while (*buf && (*buf == ' ')) /* Ignore spaces */ buf++; } argv[argc] = NULL; if (argc == 0) /* Ignore blank line */ return 1; /* Should the job run in the background? */ if ((bg = (*argv[argc-1] == '&')) != 0) argv[--argc] = NULL; return bg; } ================================================ FILE: sample/ch08/sigint1.c ================================================ /* * p510 -- code/ecf/sigint1.c * * 这个程序捕获用户在键盘上输入 Ctrl-C 时外壳发送的 SIGINT 信号。SIGINT 的默认行 * 为是立刻终止该进程。在这个示例中,我们将默认行为修改为捕获信号,输出一条信息, * 然后终止该进程 * * $ gcc -I../../common sigint1.c ../../common/csapp.c -lpthread */ #include "csapp.h" void handler(int sig) /* SIGINT handler */ { printf("Caught SIGINT\n"); exit(0); } int main() { /* Install the SIGINT handler */ if (signal(SIGINT, handler) == SIG_ERR) unix_error("signal error"); pause(); /* Wait for the receipt of a signal */ exit(0); } ================================================ FILE: sample/ch08/signal1.c ================================================ /* * p512 -- code/ecf/signal1.c * * 这个程序的基本结构是一个父进程创建一些子进程,这些子进程独立运行一会儿,然后终 * 止。父进程必须回收子进程,以避免在系统中留下僵死进程。父进程在子进程运行时可以 * 自由地做其他工作。 * * 这个程序是有缺陷的,因为它无法处理信号阻塞、信号不排队等待和系统调用被中断这些 * 情况 * * 哪里出错了呢?问题在于我们的代码没有解决信号可以阻塞和不会排队等待这样的情况。 * 发生的情况是:父进程接受并捕获了第一个信号。当处理程序还在处理第一个信号时,第 * 二个信号就传送并添加到了待处理信号集合里。然而,因为 SIGCHLD 信号被 SIGCHLD 处 * 理程序阻塞了,所以第二个信号就不会被接收。此后不久,就在处理程序还在处理第一个 * 信号时,第三个信号到达了。因为已经有了一个待处理的 SIGCHLD,第三个 SIGCHLD 信 * 号会被丢弃。一段时间之后,处理程序返回,内核注意到有一个待处理的 SIGCHLD 信号, * 就迫使父进程接收这个信号。父进程捕获这个信号,并第二次执行处理程序。在处理程序 * 完成对第二个信号的处理之后,已经没有待处理的 SIGCHLD 信号了,而且也绝不会有, * 因为第三个 SIGCHLD 的所有信息都已经丢失了。 * * 由此得到的重要教训是,不可以用信号来对其他进程中发生的事件计数 * * $ gcc -I../../common signal1.c ../../common/csapp.c -lpthread */ #include "csapp.h" void handler1(int sig) { pid_t pid; if ((pid = waitpid(-1, NULL, 0)) < 0) unix_error("waitpid error"); printf("Handler reaped child %d\n", (int)pid); Sleep(2); return; } int main() { int i, n; char buf[MAXBUF]; if (signal(SIGCHLD, handler1) == SIG_ERR) unix_error("signal error"); /* Parent creates children */ for (i = 0; i < 3; i++) { if (Fork() == 0) { printf("Hello from child %d\n", (int)getpid()); Sleep(1); exit(0); } } /* Parent waits for terminal input and then processes it */ if ((n = read(STDIN_FILENO, buf, sizeof(buf))) < 0) unix_error("read"); printf("Parent processing input\n"); while (1) ; exit(0); } ================================================ FILE: sample/ch08/signal2.c ================================================ /* * p512 -- code/ecf/signal2.c * * 这个程序是 signal1.c 的改进版本,它能够正确解决信号会阻塞和不会排队等待的情况。 * * 修复 signal1.c 的缺陷的一个思路是:存在一个待处理的信号只是暗示自进程最后一次 * 收到一个信号以来,至少已经有一个这种类型的信号被发送了。所以我们必须修改 * SIGCHLD 处理程序,使得每次 SIGCHLD 处理程序被调用时,回收尽可能多的僵死子进程。 * * 然而,它没有考虑系统调用被中断的可能性。 * * 在一个较老版本的 Solaris 系统中,在从键盘上进行输入之前,被阻塞的 read 系统调 * 用就提前返回一个错误。出现这个问题是因为在特定的 Solaris 系统上,诸如 read 这 * 样的慢速系统调用在被信号发送中断后,是不会自动重启的。相反,和 Linux 系统自动 * 重启被中断的系统调用不同,它们会提前返回给调用应用程序一个错误条件。 * * $ gcc -I../../common signal2.c ../../common/csapp.c -lpthread */ #include "csapp.h" void handler2(int sig) { pid_t pid; while ((pid = waitpid(-1, NULL, 0)) > 0) printf("Handler reaped child %d\n", (int)pid); if (errno != ECHILD) unix_error("waitpid error"); Sleep(2); return; } int main() { int i, n; char buf[MAXBUF]; if (signal(SIGCHLD, handler2) == SIG_ERR) unix_error("signal error"); /* Parent creates children */ for (i = 0; i < 3; i++) { if (Fork() == 0) { printf("Hello from child %d\n", (int)getpid()); Sleep(1); exit(0); } } /* Parent waits for terminal input and then processes it */ if ((n = read(STDIN_FILENO, buf, sizeof(buf))) < 0) unix_error("read"); printf("Parent processing input\n"); while (1) ; exit(0); } ================================================ FILE: sample/ch08/signal3.c ================================================ /* * p515 -- code/ecf/signal3.c * * 这个程序是 signal2.c 的改进版本,它正确地解决了系统调用可能被中断的可能性。 * * 为了编写可移植的信号处理代码,我们必须考虑系统调用过早返回的可能性,然后当它发 * 生时手动重启它们。 * * $ gcc -I../../common signal3.c ../../common/csapp.c -lpthread */ #include "csapp.h" void handler2(int sig) { pid_t pid; while ((pid = waitpid(-1, NULL, 0)) > 0) printf("Handler reaped child %d\n", (int)pid); if (errno != ECHILD) unix_error("waitpid error"); Sleep(2); return; } int main() { int i, n; char buf[MAXBUF]; pid_t pid; if (signal(SIGCHLD, handler2) == SIG_ERR) unix_error("signal error"); /* Parent creates children */ for (i = 0; i < 3; i++) { pid = Fork(); if (pid == 0) { printf("Hello from child %d\n", (int)getpid()); Sleep(1); exit(0); } } /* Manually restart the read call if it is interrupted */ while ((n = read(STDIN_FILENO, buf, sizeof(buf))) < 0) if (errno != EINTR) unix_error("read"); printf("Parent processing input\n"); while (1) ; exit(0); } ================================================ FILE: sample/ch08/signal4.c ================================================ /* * p518 -- code/ecf/signal4.c * * signal2.c 的一个版本,该版本通过使用 Signal 包装函数得到可移植的信号处理语义 * * $ gcc -I../../common signal4.c ../../common/csapp.c -lpthread */ #include "csapp.h" void handler2(int sig) { pid_t pid; while ((pid = waitpid(-1, NULL, 0)) > 0) printf("Handler reaped child %d\n", (int)pid); if (errno != ECHILD) unix_error("waitpid error"); Sleep(2); return; } int main() { int i, n; char buf[MAXBUF]; pid_t pid; Signal(SIGCHLD, handler2); /* sigaction error-handling wrapper */ /* Parent creates children */ for (i = 0; i < 3; i++) { pid = Fork(); if (pid == 0) { printf("Hello from child %d\n", (int)getpid()); Sleep(1); exit(0); } } /* Parent waits for terminal input and then processes it */ if ((n = read(STDIN_FILENO, buf, sizeof(buf))) < 0) unix_error("read"); printf("Parent processing input\n"); while (1) ; exit(0); } ================================================ FILE: sample/ch08/waitpid1.c ================================================ /* * p498 -- code/ecf/waitpid1.c * * 使用 waitpid 函数不按照特定的顺序回收僵死子进程 */ #include "csapp.h" #define N 2 int main(void) { int status, i; pid_t pid; /* Parent creates N children */ for (i = 0; i < N; i++) if ((pid = Fork()) == 0) /* Child */ exit(100+i); /* Parent reaps N children in no particular order */ while ((pid = waitpid(-1, &status, 0)) > 0) { if (WIFEXITED(status)) printf("child %d terminated normally with exit status=%d\n", pid, WEXITSTATUS(status)); else printf("child %d terminated abnormally\n", pid); } /* The normal termination is if there are no more children */ if (errno != ECHILD) unix_error("waitpid error"); exit(0); } ================================================ FILE: sample/ch08/waitpid2.c ================================================ /* * p498 -- code/ecf/waitpid2.c * * 使用 waitpid 按照创建子进程的顺序来回收僵死子进程 */ #include "csapp.h" #define N 2 int main(void) { int status, i; pid_t pid[N], retpid; /* Parent creates N children */ for (i = 0; i < N; i++) if ((pid[i] = Fork()) == 0) /* Child */ exit(100+i); /* Parent reaps N children is order */ i = 0; while ((retpid = waitpid(pid[i++], &status, 0)) > 0) { if (WIFEXITED(status)) printf("child %d terminated normally with exit status=%d\n", retpid, WEXITSTATUS(status)); else printf("child %d terminated abnormally\n", retpid); } /* The only normal termination is if there are no more children */ if (errno != ECHILD) unix_error("waitpid error"); exit(0); } ================================================ FILE: sample/ch09/.gitignore ================================================ ================================================ FILE: sample/ch09/memlib.c ================================================ #include "csapp.h" #define MAX_HEAP (1<<20) /* Private global variables */ static char *mem_heap; /* Points to first byte of heap */ static char *mem_brk; /* Points to last byte of heap plus 1 */ static char *mem_max_addr; /* Max legal heap addr plus 1 */ /* * mem_init - Initialize the memory system model */ void mem_init(void) { mem_heap = (char *)Malloc(MAX_HEAP); mem_brk = (char *)mem_heap; mem_max_addr = (char *)(mem_heap + MAX_HEAP); } /* * mem_sbrk - Simple model of the sbrk function. Extends the heap * by incr bytes and returns the start address of the new area. In * this model, the heap cannot be shrunk. */ void *mem_sbrk(int incr) { char *old_brk = mem_brk; if ((incr < 0) || ((mem_brk + incr) > mem_max_addr)) { errno = ENOMEM; fprintf(stderr, "ERROR: mem_sbrk failed. Ran out of memory...\n"); return (void *)-1; } mem_brk += incr; return (void *)old_brk; } ================================================ FILE: sample/ch09/mm-test.c ================================================ #include extern void mem_init(void); extern int mm_init(void); extern void mm_free(void *bp); extern void *mm_malloc(size_t size); int main(void) { mem_init(); if (mm_init() < 0) return -1; char *buf = mm_malloc(1); printf("buf : %p\n", buf); if (buf) mm_free(buf); char *buf2 = mm_malloc(4); printf("buf2: %p\n", buf2); if (buf2) mm_free(buf2); char *buf3 = mm_malloc(8); printf("buf3: %p\n", buf3); char *buf4 = mm_malloc(12); printf("buf4: %p\n", buf4); if (buf3) mm_free(buf3); if (buf4) mm_free(buf4); buf = mm_malloc(4); printf("buf : %p\n", buf); buf2 = mm_malloc(9); printf("buf2: %p\n", buf2); if (buf) mm_free(buf); if (buf2) mm_free(buf2); return 0; } ================================================ FILE: sample/ch09/mm.c ================================================ #include /* Basic constants and macros */ #define WSIZE 4 /* Word and header/footer size (bytes) */ #define DSIZE 8 /* Double word size (bytes) */ #define CHUNKSIZE (1<<12) /* Extend heap by this amount (bytes) */ #define MAX(x, y) ((x) > (y) ? (x) : (y)) /* Pack a size and allocated bit into a word */ #define PACK(size, alloc) ((size) | (alloc)) /* Read and write a word at address p */ #define GET(p) (*(unsigned int *)(p)) #define PUT(p, val) (*(unsigned int *)(p) = (val)) /* Read the size and allocated fields from address p */ #define GET_SIZE(p) (GET(p) & ~0x7) #define GET_ALLOC(p) (GET(p) & 0x1) /* Given block ptr bp, compute address of its header and footer */ #define HDRP(bp) ((char *)(bp) - WSIZE) #define FTRP(bp) ((char *)(bp) + GET_SIZE(HDRP(bp)) - DSIZE) /* Given block ptr bp, compute address of next and previous blocks */ #define NEXT_BLKP(bp) ((char *)(bp) + GET_SIZE(((char *)(bp) - WSIZE))) #define PREV_BLKP(bp) ((char *)(bp) - GET_SIZE(((char *)(bp) - DSIZE))) static char *heap_listp; static void *extend_heap(size_t words); static void *coalesce(void *bp); static void *find_fit(size_t asize); static void place(void *bp, size_t asize); /* Extern functions from memlib.c */ extern void mem_init(void); extern void *mem_sbrk(int incr); int mm_init(void) { /* Create the initial empty heap */ if ((heap_listp = mem_sbrk(4*WSIZE)) == (void *)-1) return -1; PUT(heap_listp, 0); /* Alignment padding */ PUT(heap_listp + (1*WSIZE), PACK(DSIZE, 1)); /* Prologue header */ PUT(heap_listp + (2*WSIZE), PACK(DSIZE, 1)); /* Prologue footer */ PUT(heap_listp + (3*WSIZE), PACK(0, 1)); /* Epilogue header */ heap_listp += (2*WSIZE); /* Extend the empty heap with a free block of CHUNKSIZE bytes */ if (extend_heap(CHUNKSIZE/WSIZE) == NULL) return -1; return 0; } static void *extend_heap(size_t words) { char *bp; size_t size; /* Allocate an even number of words to maintain aligment */ size = (words % 2) ? (words+1) * WSIZE : words * WSIZE; if ((long)(bp = mem_sbrk(size)) == -1) return NULL; /* Initialize free block header/footer and the epilogue header */ PUT(HDRP(bp), PACK(size, 0)); /* Free block header */ PUT(FTRP(bp), PACK(size, 0)); /* Free block footer */ PUT(HDRP(NEXT_BLKP(bp)), PACK(0, 1)); /* New epilogue header */ /* Coalesce if the previous block was free */ return coalesce(bp); } void mm_free(void *bp) { size_t size = GET_SIZE(HDRP(bp)); PUT(HDRP(bp), PACK(size, 0)); PUT(FTRP(bp), PACK(size, 0)); coalesce(bp); } static void *coalesce(void *bp) { size_t prev_alloc = GET_ALLOC(FTRP(PREV_BLKP(bp))); size_t next_alloc = GET_ALLOC(HDRP(NEXT_BLKP(bp))); size_t size = GET_SIZE(HDRP(bp)); if (prev_alloc && next_alloc) /* Case 1 */ return bp; else if (prev_alloc && !next_alloc) { /* Case 2 */ size += GET_SIZE(HDRP(NEXT_BLKP(bp))); PUT(HDRP(bp), PACK(size, 0)); PUT(FTRP(bp), PACK(size, 0)); } else if (!prev_alloc && next_alloc) { /* Case 3 */ size += GET_SIZE(HDRP(PREV_BLKP(bp))); PUT(FTRP(bp), PACK(size, 0)); PUT(HDRP(PREV_BLKP(bp)), PACK(size, 0)); bp = PREV_BLKP(bp); } else { /* Case 4 */ size += GET_SIZE(HDRP(PREV_BLKP(bp))) + GET_SIZE(FTRP(NEXT_BLKP(bp))); PUT(HDRP(PREV_BLKP(bp)), PACK(size, 0)); PUT(FTRP(NEXT_BLKP(bp)), PACK(size, 0)); bp = PREV_BLKP(bp); } return bp; } void *mm_malloc(size_t size) { size_t asize; /* Adjusted block size */ size_t extendsize; /* Amount to extend heap if no fit */ char *bp; /* Ignore spurious requests */ if (size == 0) return NULL; /* Adjust block size to include overhead and alignment reqs. */ if (size <= DSIZE) asize = 2*DSIZE; else asize = DSIZE * ((size + (DSIZE) + (DSIZE-1)) / DSIZE); /* Search the free list for a fit */ if ((bp = find_fit(asize)) != NULL) { place(bp, asize); return bp; } /* No fit found. Get more memory and place the block */ extendsize = MAX(asize,CHUNKSIZE); if ((bp = extend_heap(extendsize/WSIZE)) == NULL) return NULL; place(bp, asize); return bp; } /* * 9.8 */ static void *find_fit(size_t asize) { /* First fit search */ void *bp; for (bp = heap_listp; GET_SIZE(HDRP(bp)) > 0; bp = NEXT_BLKP(bp)) { if (!GET_ALLOC(HDRP(bp)) && (asize <= GET_SIZE(HDRP(bp)))) { return bp; } } return NULL; /* No fit */ } /* * 9.9 */ static void place(void *bp, size_t asize) { size_t csize = GET_SIZE(HDRP(bp)); if ((csize - asize) >= (2*DSIZE)) { PUT(HDRP(bp), PACK(asize, 1)); PUT(FTRP(bp), PACK(asize, 1)); bp = NEXT_BLKP(bp); PUT(HDRP(bp), PACK(csize-asize, 0)); PUT(HDRP(bp), PACK(csize-asize, 0)); } else { PUT(HDRP(bp), PACK(csize, 1)); PUT(FTRP(bp), PACK(csize, 1)); } } ================================================ FILE: sample/ch10/.gitignore ================================================ ================================================ FILE: sample/ch10/cpfile.c ================================================ /* * p602 * * 使用 RIO 函数一次一行地从标准输入拷贝一个文本文件到标准输出 * * unix> cc -I../../common ../../common/csapp.c cpfile.c -lpthread */ #include "csapp.h" int main(int argc, char *argv[]) { int n; rio_t rio; char buf[MAXLINE]; Rio_readinitb(&rio, STDIN_FILENO); while ((n = Rio_readlineb(&rio, buf, MAXLINE)) != 0) Rio_writen(STDOUT_FILENO, buf, n); } ================================================ FILE: sample/ch10/cpstdin.c ================================================ /* * 使用 read/write 一次一个字节地从标准输入拷贝到标准输出 * * $ cc -I../../common cpstdin.c ../../common/csapp.c -lpthread */ #include "csapp.h" int main(void) { char c; while (Read(STDIN_FILENO, &c, 1) != 0) Write(STDOUT_FILENO, &c, 1); exit(0); } ================================================ FILE: sample/ch10/statcheck.c ================================================ /* * 查询和处理一个文件的 st_mode 位 * * $ cc -I../../common statcheck.c ../../common/csapp.c -lpthread */ #include "csapp.h" int main(int argc, char **argv) { struct stat stat; char *type, *readok; Stat(argv[1], &stat); if (S_ISREG(stat.st_mode)) /* Determine file type */ type = "regular"; else if (S_ISDIR(stat.st_mode)) type = "directory"; else type = "other"; if ((stat.st_mode & S_IRUSR)) /* Check read access */ readok = "yes"; else readok = "no"; printf("type: %s, read: %s\n", type, readok); exit(0); } ================================================ FILE: sample/ch11/echo.c ================================================ /* * p632 * * 读和回送文本行的 echo 函数 * * unix> cc -I../../common ../../common/csapp.c echoserveri.c echo.c -lpthread -o echod */ #include "csapp.h" void echo(int connfd) { size_t n; char buf[MAXLINE]; rio_t rio; Rio_readinitb(&rio, connfd); while ((n = Rio_readlineb(&rio, buf, MAXLINE)) != 0) { printf("server received %d bytes\n", (int)n); Rio_writen(connfd, buf, n); } } ================================================ FILE: sample/ch11/echoclient.c ================================================ /* * p631 * * echo 客户端的主程序 * * unix> cc -I../../common ../../common/csapp.c echoclient.c -lpthread -o echoclient * unix> ./echoclient */ #include "csapp.h" int main(int argc, char **argv) { int clientfd, port; char *host, buf[MAXLINE]; rio_t rio; if (argc != 3) { fprintf(stderr, "usage: %s \n", argv[0]); exit(0); } host = argv[1]; port = atoi(argv[2]); clientfd = Open_clientfd(host, port); Rio_readinitb(&rio, clientfd); while (Fgets(buf, MAXLINE, stdin) != NULL) { Rio_writen(clientfd, buf, strlen(buf)); Rio_readlineb(&rio, buf, MAXLINE); Fputs(buf, stdout); } Close(clientfd); exit(0); } ================================================ FILE: sample/ch11/echoserveri.c ================================================ /* * p632 * * 迭代 echo 服务器的主程序 * * unix> cc -I../../common ../../common/csapp.c echoserveri.c echo.c -lpthread -o echod */ #include "csapp.h" void echo(int connfd); int main(int argc, char **argv) { int listenfd, connfd, port, clientlen; struct sockaddr_in clientaddr; struct hostent *hp; char *haddrp; if (argc != 2) { fprintf(stderr, "usage: %s \n", argv[0]); exit(0); } port = atoi(argv[1]); listenfd = Open_listenfd(port); while (1) { clientlen = sizeof(clientaddr); connfd = Accept(listenfd, (SA *)&clientaddr, &clientlen); /* Determine the domain name and IP address of the client */ hp = Gethostbyaddr((const char *)&clientaddr.sin_addr.s_addr, sizeof(clientaddr.sin_addr.s_addr), AF_INET); haddrp = inet_ntoa(clientaddr.sin_addr); printf("server connected to %s (%s)\n", hp->h_name, haddrp); echo(connfd); Close(connfd); } exit(0); } ================================================ FILE: sample/ch11/hostinfo.c ================================================ /* * p622 * * 检索并打印一个 DNS 主机条目 * * unix> cc -I../../common ../../common/csapp.c hostinfo.c -lpthread -o hostinfo * * unix> ./hostinfo localhost #1 * unix> ./hostname #2 * unix> ./hostinfo bluefish.ics.cs.cmu.edu * unix> ./hostinfo cs.mit.edu * unix> ./hostinfo google.com * unix> ./hostinfo edu */ #include "csapp.h" int main(int argc, char **argv) { char **pp; struct in_addr addr; struct hostent *hostp; if (argc != 2) { fprintf(stderr, "usage: %s \n", argv[0]); exit(0); } if (inet_aton(argv[1], &addr) != 0) hostp = Gethostbyaddr((const char *)&addr, sizeof(addr), AF_INET); else hostp = Gethostbyname(argv[1]); printf("official hostname: %s\n", hostp->h_name); for (pp = hostp->h_aliases; *pp != NULL; pp++) printf("alias: %s\n", *pp); for (pp = hostp->h_addr_list; *pp != NULL; pp++) { addr.s_addr = ((struct in_addr *)*pp)->s_addr; printf("address: %s\n", inet_ntoa(addr)); } exit(0); } ================================================ FILE: sample/ch11/tiny/cgi-bin/adder.c ================================================ /* * p638 * * unix> cc -I../../../../common ../../../../common/csapp.c adder.c -lpthread -o adder */ #include "csapp.h" int main(void) { char *buf, *p; char arg1[MAXLINE], arg2[MAXLINE], content[MAXLINE]; int n1 = 0, n2 = 0; /* Extract the two arguments */ if ((buf = getenv("QUERY_STRING")) != NULL) { p = strchr(buf, '&'); *p = '\0'; strcpy(arg1, buf); strcpy(arg2, p+1); n1 = atoi(arg1); n2 = atoi(arg2); } /* Make the response body */ sprintf(content, "Welcome to add.com: "); sprintf(content, "%sTHE Internet addition portal.\r\n

", content); sprintf(content, "%sThe answer is: %d + %d = %d\r\n

", content, n1, n2, n1+n2); sprintf(content, "%sThanks for visiting!\r\n", content); /* Generate the HTTP response */ printf("Content-length: %d\r\n", (int)strlen(content)); printf("Content-type: text/html\r\n\r\n"); printf("%s", content); fflush(stdout); exit(0); } ================================================ FILE: sample/ch11/tiny/home.html ================================================ Welcome to add.com ================================================ FILE: sample/ch11/tiny/rfc2616.txt ================================================ Network Working Group R. Fielding Request for Comments: 2616 UC Irvine Obsoletes: 2068 J. Gettys Category: Standards Track Compaq/W3C J. Mogul Compaq H. Frystyk W3C/MIT L. Masinter Xerox P. Leach Microsoft T. Berners-Lee W3C/MIT June 1999 Hypertext Transfer Protocol -- HTTP/1.1 Status of this Memo This document specifies an Internet standards track protocol for the Internet community, and requests discussion and suggestions for improvements. Please refer to the current edition of the "Internet Official Protocol Standards" (STD 1) for the standardization state and status of this protocol. Distribution of this memo is unlimited. Copyright Notice Copyright (C) The Internet Society (1999). All Rights Reserved. Abstract The Hypertext Transfer Protocol (HTTP) is an application-level protocol for distributed, collaborative, hypermedia information systems. It is a generic, stateless, protocol which can be used for many tasks beyond its use for hypertext, such as name servers and distributed object management systems, through extension of its request methods, error codes and headers [47]. A feature of HTTP is the typing and negotiation of data representation, allowing systems to be built independently of the data being transferred. HTTP has been in use by the World-Wide Web global information initiative since 1990. This specification defines the protocol referred to as "HTTP/1.1", and is an update to RFC 2068 [33]. Fielding, et al. Standards Track [Page 1] RFC 2616 HTTP/1.1 June 1999 Table of Contents 1 Introduction ...................................................7 1.1 Purpose......................................................7 1.2 Requirements .................................................8 1.3 Terminology ..................................................8 1.4 Overall Operation ...........................................12 2 Notational Conventions and Generic Grammar ....................14 2.1 Augmented BNF ...............................................14 2.2 Basic Rules .................................................15 3 Protocol Parameters ...........................................17 3.1 HTTP Version ................................................17 3.2 Uniform Resource Identifiers ................................18 3.2.1 General Syntax ...........................................19 3.2.2 http URL .................................................19 3.2.3 URI Comparison ...........................................20 3.3 Date/Time Formats ...........................................20 3.3.1 Full Date ................................................20 3.3.2 Delta Seconds ............................................21 3.4 Character Sets ..............................................21 3.4.1 Missing Charset ..........................................22 3.5 Content Codings .............................................23 3.6 Transfer Codings ............................................24 3.6.1 Chunked Transfer Coding ..................................25 3.7 Media Types .................................................26 3.7.1 Canonicalization and Text Defaults .......................27 3.7.2 Multipart Types ..........................................27 3.8 Product Tokens ..............................................28 3.9 Quality Values ..............................................29 3.10 Language Tags ...............................................29 3.11 Entity Tags .................................................30 3.12 Range Units .................................................30 4 HTTP Message ..................................................31 4.1 Message Types ...............................................31 4.2 Message Headers .............................................31 4.3 Message Body ................................................32 4.4 Message Length ..............................................33 4.5 General Header Fields .......................................34 5 Request .......................................................35 5.1 Request-Line ................................................35 5.1.1 Method ...................................................36 5.1.2 Request-URI ..............................................36 5.2 The Resource Identified by a Request ........................38 5.3 Request Header Fields .......................................38 6 Response ......................................................39 6.1 Status-Line .................................................39 6.1.1 Status Code and Reason Phrase ............................39 6.2 Response Header Fields ......................................41 Fielding, et al. Standards Track [Page 2] RFC 2616 HTTP/1.1 June 1999 7 Entity ........................................................42 7.1 Entity Header Fields ........................................42 7.2 Entity Body .................................................43 7.2.1 Type .....................................................43 7.2.2 Entity Length ............................................43 8 Connections ...................................................44 8.1 Persistent Connections ......................................44 8.1.1 Purpose ..................................................44 8.1.2 Overall Operation ........................................45 8.1.3 Proxy Servers ............................................46 8.1.4 Practical Considerations .................................46 8.2 Message Transmission Requirements ...........................47 8.2.1 Persistent Connections and Flow Control ..................47 8.2.2 Monitoring Connections for Error Status Messages .........48 8.2.3 Use of the 100 (Continue) Status .........................48 8.2.4 Client Behavior if Server Prematurely Closes Connection ..50 9 Method Definitions ............................................51 9.1 Safe and Idempotent Methods .................................51 9.1.1 Safe Methods .............................................51 9.1.2 Idempotent Methods .......................................51 9.2 OPTIONS .....................................................52 9.3 GET .........................................................53 9.4 HEAD ........................................................54 9.5 POST ........................................................54 9.6 PUT .........................................................55 9.7 DELETE ......................................................56 9.8 TRACE .......................................................56 9.9 CONNECT .....................................................57 10 Status Code Definitions ......................................57 10.1 Informational 1xx ...........................................57 10.1.1 100 Continue .............................................58 10.1.2 101 Switching Protocols ..................................58 10.2 Successful 2xx ..............................................58 10.2.1 200 OK ...................................................58 10.2.2 201 Created ..............................................59 10.2.3 202 Accepted .............................................59 10.2.4 203 Non-Authoritative Information ........................59 10.2.5 204 No Content ...........................................60 10.2.6 205 Reset Content ........................................60 10.2.7 206 Partial Content ......................................60 10.3 Redirection 3xx .............................................61 10.3.1 300 Multiple Choices .....................................61 10.3.2 301 Moved Permanently ....................................62 10.3.3 302 Found ................................................62 10.3.4 303 See Other ............................................63 10.3.5 304 Not Modified .........................................63 10.3.6 305 Use Proxy ............................................64 10.3.7 306 (Unused) .............................................64 Fielding, et al. Standards Track [Page 3] RFC 2616 HTTP/1.1 June 1999 10.3.8 307 Temporary Redirect ...................................65 10.4 Client Error 4xx ............................................65 10.4.1 400 Bad Request .........................................65 10.4.2 401 Unauthorized ........................................66 10.4.3 402 Payment Required ....................................66 10.4.4 403 Forbidden ...........................................66 10.4.5 404 Not Found ...........................................66 10.4.6 405 Method Not Allowed ..................................66 10.4.7 406 Not Acceptable ......................................67 10.4.8 407 Proxy Authentication Required .......................67 10.4.9 408 Request Timeout .....................................67 10.4.10 409 Conflict ............................................67 10.4.11 410 Gone ................................................68 10.4.12 411 Length Required .....................................68 10.4.13 412 Precondition Failed .................................68 10.4.14 413 Request Entity Too Large ............................69 10.4.15 414 Request-URI Too Long ................................69 10.4.16 415 Unsupported Media Type ..............................69 10.4.17 416 Requested Range Not Satisfiable .....................69 10.4.18 417 Expectation Failed ..................................70 10.5 Server Error 5xx ............................................70 10.5.1 500 Internal Server Error ................................70 10.5.2 501 Not Implemented ......................................70 10.5.3 502 Bad Gateway ..........................................70 10.5.4 503 Service Unavailable ..................................70 10.5.5 504 Gateway Timeout ......................................71 10.5.6 505 HTTP Version Not Supported ...........................71 11 Access Authentication ........................................71 12 Content Negotiation ..........................................71 12.1 Server-driven Negotiation ...................................72 12.2 Agent-driven Negotiation ....................................73 12.3 Transparent Negotiation .....................................74 13 Caching in HTTP ..............................................74 13.1.1 Cache Correctness ........................................75 13.1.2 Warnings .................................................76 13.1.3 Cache-control Mechanisms .................................77 13.1.4 Explicit User Agent Warnings .............................78 13.1.5 Exceptions to the Rules and Warnings .....................78 13.1.6 Client-controlled Behavior ...............................79 13.2 Expiration Model ............................................79 13.2.1 Server-Specified Expiration ..............................79 13.2.2 Heuristic Expiration .....................................80 13.2.3 Age Calculations .........................................80 13.2.4 Expiration Calculations ..................................83 13.2.5 Disambiguating Expiration Values .........................84 13.2.6 Disambiguating Multiple Responses ........................84 13.3 Validation Model ............................................85 13.3.1 Last-Modified Dates ......................................86 Fielding, et al. Standards Track [Page 4] RFC 2616 HTTP/1.1 June 1999 13.3.2 Entity Tag Cache Validators ..............................86 13.3.3 Weak and Strong Validators ...............................86 13.3.4 Rules for When to Use Entity Tags and Last-Modified Dates.89 13.3.5 Non-validating Conditionals ..............................90 13.4 Response Cacheability .......................................91 13.5 Constructing Responses From Caches ..........................92 13.5.1 End-to-end and Hop-by-hop Headers ........................92 13.5.2 Non-modifiable Headers ...................................92 13.5.3 Combining Headers ........................................94 13.5.4 Combining Byte Ranges ....................................95 13.6 Caching Negotiated Responses ................................95 13.7 Shared and Non-Shared Caches ................................96 13.8 Errors or Incomplete Response Cache Behavior ................97 13.9 Side Effects of GET and HEAD ................................97 13.10 Invalidation After Updates or Deletions ...................97 13.11 Write-Through Mandatory ...................................98 13.12 Cache Replacement .........................................99 13.13 History Lists .............................................99 14 Header Field Definitions ....................................100 14.1 Accept .....................................................100 14.2 Accept-Charset .............................................102 14.3 Accept-Encoding ............................................102 14.4 Accept-Language ............................................104 14.5 Accept-Ranges ..............................................105 14.6 Age ........................................................106 14.7 Allow ......................................................106 14.8 Authorization ..............................................107 14.9 Cache-Control ..............................................108 14.9.1 What is Cacheable .......................................109 14.9.2 What May be Stored by Caches ............................110 14.9.3 Modifications of the Basic Expiration Mechanism .........111 14.9.4 Cache Revalidation and Reload Controls ..................113 14.9.5 No-Transform Directive ..................................115 14.9.6 Cache Control Extensions ................................116 14.10 Connection ...............................................117 14.11 Content-Encoding .........................................118 14.12 Content-Language .........................................118 14.13 Content-Length ...........................................119 14.14 Content-Location .........................................120 14.15 Content-MD5 ..............................................121 14.16 Content-Range ............................................122 14.17 Content-Type .............................................124 14.18 Date .....................................................124 14.18.1 Clockless Origin Server Operation ......................125 14.19 ETag .....................................................126 14.20 Expect ...................................................126 14.21 Expires ..................................................127 14.22 From .....................................................128 Fielding, et al. Standards Track [Page 5] RFC 2616 HTTP/1.1 June 1999 14.23 Host .....................................................128 14.24 If-Match .................................................129 14.25 If-Modified-Since ........................................130 14.26 If-None-Match ............................................132 14.27 If-Range .................................................133 14.28 If-Unmodified-Since ......................................134 14.29 Last-Modified ............................................134 14.30 Location .................................................135 14.31 Max-Forwards .............................................136 14.32 Pragma ...................................................136 14.33 Proxy-Authenticate .......................................137 14.34 Proxy-Authorization ......................................137 14.35 Range ....................................................138 14.35.1 Byte Ranges ...........................................138 14.35.2 Range Retrieval Requests ..............................139 14.36 Referer ..................................................140 14.37 Retry-After ..............................................141 14.38 Server ...................................................141 14.39 TE .......................................................142 14.40 Trailer ..................................................143 14.41 Transfer-Encoding..........................................143 14.42 Upgrade ..................................................144 14.43 User-Agent ...............................................145 14.44 Vary .....................................................145 14.45 Via ......................................................146 14.46 Warning ..................................................148 14.47 WWW-Authenticate .........................................150 15 Security Considerations .......................................150 15.1 Personal Information....................................151 15.1.1 Abuse of Server Log Information .........................151 15.1.2 Transfer of Sensitive Information .......................151 15.1.3 Encoding Sensitive Information in URI's .................152 15.1.4 Privacy Issues Connected to Accept Headers ..............152 15.2 Attacks Based On File and Path Names .......................153 15.3 DNS Spoofing ...............................................154 15.4 Location Headers and Spoofing ..............................154 15.5 Content-Disposition Issues .................................154 15.6 Authentication Credentials and Idle Clients ................155 15.7 Proxies and Caching ........................................155 15.7.1 Denial of Service Attacks on Proxies....................156 16 Acknowledgments .............................................156 17 References ..................................................158 18 Authors' Addresses ..........................................162 19 Appendices ..................................................164 19.1 Internet Media Type message/http and application/http ......164 19.2 Internet Media Type multipart/byteranges ...................165 19.3 Tolerant Applications ......................................166 19.4 Differences Between HTTP Entities and RFC 2045 Entities ....167 Fielding, et al. Standards Track [Page 6] RFC 2616 HTTP/1.1 June 1999 19.4.1 MIME-Version ............................................167 19.4.2 Conversion to Canonical Form ............................167 19.4.3 Conversion of Date Formats ..............................168 19.4.4 Introduction of Content-Encoding ........................168 19.4.5 No Content-Transfer-Encoding ............................168 19.4.6 Introduction of Transfer-Encoding .......................169 19.4.7 MHTML and Line Length Limitations .......................169 19.5 Additional Features ........................................169 19.5.1 Content-Disposition .....................................170 19.6 Compatibility with Previous Versions .......................170 19.6.1 Changes from HTTP/1.0 ...................................171 19.6.2 Compatibility with HTTP/1.0 Persistent Connections ......172 19.6.3 Changes from RFC 2068 ...................................172 20 Index .......................................................175 21 Full Copyright Statement ....................................176 1 Introduction 1.1 Purpose The Hypertext Transfer Protocol (HTTP) is an application-level protocol for distributed, collaborative, hypermedia information systems. HTTP has been in use by the World-Wide Web global information initiative since 1990. The first version of HTTP, referred to as HTTP/0.9, was a simple protocol for raw data transfer across the Internet. HTTP/1.0, as defined by RFC 1945 [6], improved the protocol by allowing messages to be in the format of MIME-like messages, containing metainformation about the data transferred and modifiers on the request/response semantics. However, HTTP/1.0 does not sufficiently take into consideration the effects of hierarchical proxies, caching, the need for persistent connections, or virtual hosts. In addition, the proliferation of incompletely-implemented applications calling themselves "HTTP/1.0" has necessitated a protocol version change in order for two communicating applications to determine each other's true capabilities. This specification defines the protocol referred to as "HTTP/1.1". This protocol includes more stringent requirements than HTTP/1.0 in order to ensure reliable implementation of its features. Practical information systems require more functionality than simple retrieval, including search, front-end update, and annotation. HTTP allows an open-ended set of methods and headers that indicate the purpose of a request [47]. It builds on the discipline of reference provided by the Uniform Resource Identifier (URI) [3], as a location (URL) [4] or name (URN) [20], for indicating the resource to which a Fielding, et al. Standards Track [Page 7] RFC 2616 HTTP/1.1 June 1999 method is to be applied. Messages are passed in a format similar to that used by Internet mail [9] as defined by the Multipurpose Internet Mail Extensions (MIME) [7]. HTTP is also used as a generic protocol for communication between user agents and proxies/gateways to other Internet systems, including those supported by the SMTP [16], NNTP [13], FTP [18], Gopher [2], and WAIS [10] protocols. In this way, HTTP allows basic hypermedia access to resources available from diverse applications. 1.2 Requirements The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be interpreted as described in RFC 2119 [34]. An implementation is not compliant if it fails to satisfy one or more of the MUST or REQUIRED level requirements for the protocols it implements. An implementation that satisfies all the MUST or REQUIRED level and all the SHOULD level requirements for its protocols is said to be "unconditionally compliant"; one that satisfies all the MUST level requirements but not all the SHOULD level requirements for its protocols is said to be "conditionally compliant." 1.3 Terminology This specification uses a number of terms to refer to the roles played by participants in, and objects of, the HTTP communication. connection A transport layer virtual circuit established between two programs for the purpose of communication. message The basic unit of HTTP communication, consisting of a structured sequence of octets matching the syntax defined in section 4 and transmitted via the connection. request An HTTP request message, as defined in section 5. response An HTTP response message, as defined in section 6. Fielding, et al. Standards Track [Page 8] RFC 2616 HTTP/1.1 June 1999 resource A network data object or service that can be identified by a URI, as defined in section 3.2. Resources may be available in multiple representations (e.g. multiple languages, data formats, size, and resolutions) or vary in other ways. entity The information transferred as the payload of a request or response. An entity consists of metainformation in the form of entity-header fields and content in the form of an entity-body, as described in section 7. representation An entity included with a response that is subject to content negotiation, as described in section 12. There may exist multiple representations associated with a particular response status. content negotiation The mechanism for selecting the appropriate representation when servicing a request, as described in section 12. The representation of entities in any response can be negotiated (including error responses). variant A resource may have one, or more than one, representation(s) associated with it at any given instant. Each of these representations is termed a `varriant'. Use of the term `variant' does not necessarily imply that the resource is subject to content negotiation. client A program that establishes connections for the purpose of sending requests. user agent The client which initiates a request. These are often browsers, editors, spiders (web-traversing robots), or other end user tools. server An application program that accepts connections in order to service requests by sending back responses. Any given program may be capable of being both a client and a server; our use of these terms refers only to the role being performed by the program for a particular connection, rather than to the program's capabilities in general. Likewise, any server may act as an origin server, proxy, gateway, or tunnel, switching behavior based on the nature of each request. Fielding, et al. Standards Track [Page 9] RFC 2616 HTTP/1.1 June 1999 origin server The server on which a given resource resides or is to be created. proxy An intermediary program which acts as both a server and a client for the purpose of making requests on behalf of other clients. Requests are serviced internally or by passing them on, with possible translation, to other servers. A proxy MUST implement both the client and server requirements of this specification. A "transparent proxy" is a proxy that does not modify the request or response beyond what is required for proxy authentication and identification. A "non-transparent proxy" is a proxy that modifies the request or response in order to provide some added service to the user agent, such as group annotation services, media type transformation, protocol reduction, or anonymity filtering. Except where either transparent or non-transparent behavior is explicitly stated, the HTTP proxy requirements apply to both types of proxies. gateway A server which acts as an intermediary for some other server. Unlike a proxy, a gateway receives requests as if it were the origin server for the requested resource; the requesting client may not be aware that it is communicating with a gateway. tunnel An intermediary program which is acting as a blind relay between two connections. Once active, a tunnel is not considered a party to the HTTP communication, though the tunnel may have been initiated by an HTTP request. The tunnel ceases to exist when both ends of the relayed connections are closed. cache A program's local store of response messages and the subsystem that controls its message storage, retrieval, and deletion. A cache stores cacheable responses in order to reduce the response time and network bandwidth consumption on future, equivalent requests. Any client or server may include a cache, though a cache cannot be used by a server that is acting as a tunnel. cacheable A response is cacheable if a cache is allowed to store a copy of the response message for use in answering subsequent requests. The rules for determining the cacheability of HTTP responses are defined in section 13. Even if a resource is cacheable, there may be additional constraints on whether a cache can use the cached copy for a particular request. Fielding, et al. Standards Track [Page 10] RFC 2616 HTTP/1.1 June 1999 first-hand A response is first-hand if it comes directly and without unnecessary delay from the origin server, perhaps via one or more proxies. A response is also first-hand if its validity has just been checked directly with the origin server. explicit expiration time The time at which the origin server intends that an entity should no longer be returned by a cache without further validation. heuristic expiration time An expiration time assigned by a cache when no explicit expiration time is available. age The age of a response is the time since it was sent by, or successfully validated with, the origin server. freshness lifetime The length of time between the generation of a response and its expiration time. fresh A response is fresh if its age has not yet exceeded its freshness lifetime. stale A response is stale if its age has passed its freshness lifetime. semantically transparent A cache behaves in a "semantically transparent" manner, with respect to a particular response, when its use affects neither the requesting client nor the origin server, except to improve performance. When a cache is semantically transparent, the client receives exactly the same response (except for hop-by-hop headers) that it would have received had its request been handled directly by the origin server. validator A protocol element (e.g., an entity tag or a Last-Modified time) that is used to find out whether a cache entry is an equivalent copy of an entity. upstream/downstream Upstream and downstream describe the flow of a message: all messages flow from upstream to downstream. Fielding, et al. Standards Track [Page 11] RFC 2616 HTTP/1.1 June 1999 inbound/outbound Inbound and outbound refer to the request and response paths for messages: "inbound" means "traveling toward the origin server", and "outbound" means "traveling toward the user agent" 1.4 Overall Operation The HTTP protocol is a request/response protocol. A client sends a request to the server in the form of a request method, URI, and protocol version, followed by a MIME-like message containing request modifiers, client information, and possible body content over a connection with a server. The server responds with a status line, including the message's protocol version and a success or error code, followed by a MIME-like message containing server information, entity metainformation, and possible entity-body content. The relationship between HTTP and MIME is described in appendix 19.4. Most HTTP communication is initiated by a user agent and consists of a request to be applied to a resource on some origin server. In the simplest case, this may be accomplished via a single connection (v) between the user agent (UA) and the origin server (O). request chain ------------------------> UA -------------------v------------------- O <----------------------- response chain A more complicated situation occurs when one or more intermediaries are present in the request/response chain. There are three common forms of intermediary: proxy, gateway, and tunnel. A proxy is a forwarding agent, receiving requests for a URI in its absolute form, rewriting all or part of the message, and forwarding the reformatted request toward the server identified by the URI. A gateway is a receiving agent, acting as a layer above some other server(s) and, if necessary, translating the requests to the underlying server's protocol. A tunnel acts as a relay point between two connections without changing the messages; tunnels are used when the communication needs to pass through an intermediary (such as a firewall) even when the intermediary cannot understand the contents of the messages. request chain --------------------------------------> UA -----v----- A -----v----- B -----v----- C -----v----- O <------------------------------------- response chain The figure above shows three intermediaries (A, B, and C) between the user agent and origin server. A request or response message that travels the whole chain will pass through four separate connections. This distinction is important because some HTTP communication options Fielding, et al. Standards Track [Page 12] RFC 2616 HTTP/1.1 June 1999 may apply only to the connection with the nearest, non-tunnel neighbor, only to the end-points of the chain, or to all connections along the chain. Although the diagram is linear, each participant may be engaged in multiple, simultaneous communications. For example, B may be receiving requests from many clients other than A, and/or forwarding requests to servers other than C, at the same time that it is handling A's request. Any party to the communication which is not acting as a tunnel may employ an internal cache for handling requests. The effect of a cache is that the request/response chain is shortened if one of the participants along the chain has a cached response applicable to that request. The following illustrates the resulting chain if B has a cached copy of an earlier response from O (via C) for a request which has not been cached by UA or A. request chain ----------> UA -----v----- A -----v----- B - - - - - - C - - - - - - O <--------- response chain Not all responses are usefully cacheable, and some requests may contain modifiers which place special requirements on cache behavior. HTTP requirements for cache behavior and cacheable responses are defined in section 13. In fact, there are a wide variety of architectures and configurations of caches and proxies currently being experimented with or deployed across the World Wide Web. These systems include national hierarchies of proxy caches to save transoceanic bandwidth, systems that broadcast or multicast cache entries, organizations that distribute subsets of cached data via CD-ROM, and so on. HTTP systems are used in corporate intranets over high-bandwidth links, and for access via PDAs with low-power radio links and intermittent connectivity. The goal of HTTP/1.1 is to support the wide diversity of configurations already deployed while introducing protocol constructs that meet the needs of those who build web applications that require high reliability and, failing that, at least reliable indications of failure. HTTP communication usually takes place over TCP/IP connections. The default port is TCP 80 [19], but other ports can be used. This does not preclude HTTP from being implemented on top of any other protocol on the Internet, or on other networks. HTTP only presumes a reliable transport; any protocol that provides such guarantees can be used; the mapping of the HTTP/1.1 request and response structures onto the transport data units of the protocol in question is outside the scope of this specification. Fielding, et al. Standards Track [Page 13] RFC 2616 HTTP/1.1 June 1999 In HTTP/1.0, most implementations used a new connection for each request/response exchange. In HTTP/1.1, a connection may be used for one or more request/response exchanges, although connections may be closed for a variety of reasons (see section 8.1). 2 Notational Conventions and Generic Grammar 2.1 Augmented BNF All of the mechanisms specified in this document are described in both prose and an augmented Backus-Naur Form (BNF) similar to that used by RFC 822 [9]. Implementors will need to be familiar with the notation in order to understand this specification. The augmented BNF includes the following constructs: name = definition The name of a rule is simply the name itself (without any enclosing "<" and ">") and is separated from its definition by the equal "=" character. White space is only significant in that indentation of continuation lines is used to indicate a rule definition that spans more than one line. Certain basic rules are in uppercase, such as SP, LWS, HT, CRLF, DIGIT, ALPHA, etc. Angle brackets are used within definitions whenever their presence will facilitate discerning the use of rule names. "literal" Quotation marks surround literal text. Unless stated otherwise, the text is case-insensitive. rule1 | rule2 Elements separated by a bar ("|") are alternatives, e.g., "yes | no" will accept yes or no. (rule1 rule2) Elements enclosed in parentheses are treated as a single element. Thus, "(elem (foo | bar) elem)" allows the token sequences "elem foo elem" and "elem bar elem". *rule The character "*" preceding an element indicates repetition. The full form is "*element" indicating at least and at most occurrences of element. Default values are 0 and infinity so that "*(element)" allows any number, including zero; "1*element" requires at least one; and "1*2element" allows one or two. [rule] Square brackets enclose optional elements; "[foo bar]" is equivalent to "*1(foo bar)". Fielding, et al. Standards Track [Page 14] RFC 2616 HTTP/1.1 June 1999 N rule Specific repetition: "(element)" is equivalent to "*(element)"; that is, exactly occurrences of (element). Thus 2DIGIT is a 2-digit number, and 3ALPHA is a string of three alphabetic characters. #rule A construct "#" is defined, similar to "*", for defining lists of elements. The full form is "#element" indicating at least and at most elements, each separated by one or more commas (",") and OPTIONAL linear white space (LWS). This makes the usual form of lists very easy; a rule such as ( *LWS element *( *LWS "," *LWS element )) can be shown as 1#element Wherever this construct is used, null elements are allowed, but do not contribute to the count of elements present. That is, "(element), , (element) " is permitted, but counts as only two elements. Therefore, where at least one element is required, at least one non-null element MUST be present. Default values are 0 and infinity so that "#element" allows any number, including zero; "1#element" requires at least one; and "1#2element" allows one or two. ; comment A semi-colon, set off some distance to the right of rule text, starts a comment that continues to the end of line. This is a simple way of including useful notes in parallel with the specifications. implied *LWS The grammar described by this specification is word-based. Except where noted otherwise, linear white space (LWS) can be included between any two adjacent words (token or quoted-string), and between adjacent words and separators, without changing the interpretation of a field. At least one delimiter (LWS and/or separators) MUST exist between any two tokens (for the definition of "token" below), since they would otherwise be interpreted as a single token. 2.2 Basic Rules The following rules are used throughout this specification to describe basic parsing constructs. The US-ASCII coded character set is defined by ANSI X3.4-1986 [21]. Fielding, et al. Standards Track [Page 15] RFC 2616 HTTP/1.1 June 1999 OCTET = CHAR = UPALPHA = LOALPHA = ALPHA = UPALPHA | LOALPHA DIGIT = CTL = CR = LF = SP = HT = <"> = HTTP/1.1 defines the sequence CR LF as the end-of-line marker for all protocol elements except the entity-body (see appendix 19.3 for tolerant applications). The end-of-line marker within an entity-body is defined by its associated media type, as described in section 3.7. CRLF = CR LF HTTP/1.1 header field values can be folded onto multiple lines if the continuation line begins with a space or horizontal tab. All linear white space, including folding, has the same semantics as SP. A recipient MAY replace any linear white space with a single SP before interpreting the field value or forwarding the message downstream. LWS = [CRLF] 1*( SP | HT ) The TEXT rule is only used for descriptive field contents and values that are not intended to be interpreted by the message parser. Words of *TEXT MAY contain characters from character sets other than ISO- 8859-1 [22] only when encoded according to the rules of RFC 2047 [14]. TEXT = A CRLF is allowed in the definition of TEXT only as part of a header field continuation. It is expected that the folding LWS will be replaced with a single SP before interpretation of the TEXT value. Hexadecimal numeric characters are used in several protocol elements. HEX = "A" | "B" | "C" | "D" | "E" | "F" | "a" | "b" | "c" | "d" | "e" | "f" | DIGIT Fielding, et al. Standards Track [Page 16] RFC 2616 HTTP/1.1 June 1999 Many HTTP/1.1 header field values consist of words separated by LWS or special characters. These special characters MUST be in a quoted string to be used within a parameter value (as defined in section 3.6). token = 1* separators = "(" | ")" | "<" | ">" | "@" | "," | ";" | ":" | "\" | <"> | "/" | "[" | "]" | "?" | "=" | "{" | "}" | SP | HT Comments can be included in some HTTP header fields by surrounding the comment text with parentheses. Comments are only allowed in fields containing "comment" as part of their field value definition. In all other fields, parentheses are considered part of the field value. comment = "(" *( ctext | quoted-pair | comment ) ")" ctext = A string of text is parsed as a single word if it is quoted using double-quote marks. quoted-string = ( <"> *(qdtext | quoted-pair ) <"> ) qdtext = > The backslash character ("\") MAY be used as a single-character quoting mechanism only within quoted-string and comment constructs. quoted-pair = "\" CHAR 3 Protocol Parameters 3.1 HTTP Version HTTP uses a "." numbering scheme to indicate versions of the protocol. The protocol versioning policy is intended to allow the sender to indicate the format of a message and its capacity for understanding further HTTP communication, rather than the features obtained via that communication. No change is made to the version number for the addition of message components which do not affect communication behavior or which only add to extensible field values. The number is incremented when the changes made to the protocol add features which do not change the general message parsing algorithm, but which may add to the message semantics and imply additional capabilities of the sender. The number is incremented when the format of a message within the protocol is changed. See RFC 2145 [36] for a fuller explanation. Fielding, et al. Standards Track [Page 17] RFC 2616 HTTP/1.1 June 1999 The version of an HTTP message is indicated by an HTTP-Version field in the first line of the message. HTTP-Version = "HTTP" "/" 1*DIGIT "." 1*DIGIT Note that the major and minor numbers MUST be treated as separate integers and that each MAY be incremented higher than a single digit. Thus, HTTP/2.4 is a lower version than HTTP/2.13, which in turn is lower than HTTP/12.3. Leading zeros MUST be ignored by recipients and MUST NOT be sent. An application that sends a request or response message that includes HTTP-Version of "HTTP/1.1" MUST be at least conditionally compliant with this specification. Applications that are at least conditionally compliant with this specification SHOULD use an HTTP-Version of "HTTP/1.1" in their messages, and MUST do so for any message that is not compatible with HTTP/1.0. For more details on when to send specific HTTP-Version values, see RFC 2145 [36]. The HTTP version of an application is the highest HTTP version for which the application is at least conditionally compliant. Proxy and gateway applications need to be careful when forwarding messages in protocol versions different from that of the application. Since the protocol version indicates the protocol capability of the sender, a proxy/gateway MUST NOT send a message with a version indicator which is greater than its actual version. If a higher version request is received, the proxy/gateway MUST either downgrade the request version, or respond with an error, or switch to tunnel behavior. Due to interoperability problems with HTTP/1.0 proxies discovered since the publication of RFC 2068[33], caching proxies MUST, gateways MAY, and tunnels MUST NOT upgrade the request to the highest version they support. The proxy/gateway's response to that request MUST be in the same major version as the request. Note: Converting between versions of HTTP may involve modification of header fields required or forbidden by the versions involved. 3.2 Uniform Resource Identifiers URIs have been known by many names: WWW addresses, Universal Document Identifiers, Universal Resource Identifiers [3], and finally the combination of Uniform Resource Locators (URL) [4] and Names (URN) [20]. As far as HTTP is concerned, Uniform Resource Identifiers are simply formatted strings which identify--via name, location, or any other characteristic--a resource. Fielding, et al. Standards Track [Page 18] RFC 2616 HTTP/1.1 June 1999 3.2.1 General Syntax URIs in HTTP can be represented in absolute form or relative to some known base URI [11], depending upon the context of their use. The two forms are differentiated by the fact that absolute URIs always begin with a scheme name followed by a colon. For definitive information on URL syntax and semantics, see "Uniform Resource Identifiers (URI): Generic Syntax and Semantics," RFC 2396 [42] (which replaces RFCs 1738 [4] and RFC 1808 [11]). This specification adopts the definitions of "URI-reference", "absoluteURI", "relativeURI", "port", "host","abs_path", "rel_path", and "authority" from that specification. The HTTP protocol does not place any a priori limit on the length of a URI. Servers MUST be able to handle the URI of any resource they serve, and SHOULD be able to handle URIs of unbounded length if they provide GET-based forms that could generate such URIs. A server SHOULD return 414 (Request-URI Too Long) status if a URI is longer than the server can handle (see section 10.4.15). Note: Servers ought to be cautious about depending on URI lengths above 255 bytes, because some older client or proxy implementations might not properly support these lengths. 3.2.2 http URL The "http" scheme is used to locate network resources via the HTTP protocol. This section defines the scheme-specific syntax and semantics for http URLs. http_URL = "http:" "//" host [ ":" port ] [ abs_path [ "?" query ]] If the port is empty or not given, port 80 is assumed. The semantics are that the identified resource is located at the server listening for TCP connections on that port of that host, and the Request-URI for the resource is abs_path (section 5.1.2). The use of IP addresses in URLs SHOULD be avoided whenever possible (see RFC 1900 [24]). If the abs_path is not present in the URL, it MUST be given as "/" when used as a Request-URI for a resource (section 5.1.2). If a proxy receives a host name which is not a fully qualified domain name, it MAY add its domain to the host name it received. If a proxy receives a fully qualified domain name, the proxy MUST NOT change the host name. Fielding, et al. Standards Track [Page 19] RFC 2616 HTTP/1.1 June 1999 3.2.3 URI Comparison When comparing two URIs to decide if they match or not, a client SHOULD use a case-sensitive octet-by-octet comparison of the entire URIs, with these exceptions: - A port that is empty or not given is equivalent to the default port for that URI-reference; - Comparisons of host names MUST be case-insensitive; - Comparisons of scheme names MUST be case-insensitive; - An empty abs_path is equivalent to an abs_path of "/". Characters other than those in the "reserved" and "unsafe" sets (see RFC 2396 [42]) are equivalent to their ""%" HEX HEX" encoding. For example, the following three URIs are equivalent: http://abc.com:80/~smith/home.html http://ABC.com/%7Esmith/home.html http://ABC.com:/%7esmith/home.html 3.3 Date/Time Formats 3.3.1 Full Date HTTP applications have historically allowed three different formats for the representation of date/time stamps: Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123 Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036 Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format The first format is preferred as an Internet standard and represents a fixed-length subset of that defined by RFC 1123 [8] (an update to RFC 822 [9]). The second format is in common use, but is based on the obsolete RFC 850 [12] date format and lacks a four-digit year. HTTP/1.1 clients and servers that parse the date value MUST accept all three formats (for compatibility with HTTP/1.0), though they MUST only generate the RFC 1123 format for representing HTTP-date values in header fields. See section 19.3 for further information. Note: Recipients of date values are encouraged to be robust in accepting date values that may have been sent by non-HTTP applications, as is sometimes the case when retrieving or posting messages via proxies/gateways to SMTP or NNTP. Fielding, et al. Standards Track [Page 20] RFC 2616 HTTP/1.1 June 1999 All HTTP date/time stamps MUST be represented in Greenwich Mean Time (GMT), without exception. For the purposes of HTTP, GMT is exactly equal to UTC (Coordinated Universal Time). This is indicated in the first two formats by the inclusion of "GMT" as the three-letter abbreviation for time zone, and MUST be assumed when reading the asctime format. HTTP-date is case sensitive and MUST NOT include additional LWS beyond that specifically included as SP in the grammar. HTTP-date = rfc1123-date | rfc850-date | asctime-date rfc1123-date = wkday "," SP date1 SP time SP "GMT" rfc850-date = weekday "," SP date2 SP time SP "GMT" asctime-date = wkday SP date3 SP time SP 4DIGIT date1 = 2DIGIT SP month SP 4DIGIT ; day month year (e.g., 02 Jun 1982) date2 = 2DIGIT "-" month "-" 2DIGIT ; day-month-year (e.g., 02-Jun-82) date3 = month SP ( 2DIGIT | ( SP 1DIGIT )) ; month day (e.g., Jun 2) time = 2DIGIT ":" 2DIGIT ":" 2DIGIT ; 00:00:00 - 23:59:59 wkday = "Mon" | "Tue" | "Wed" | "Thu" | "Fri" | "Sat" | "Sun" weekday = "Monday" | "Tuesday" | "Wednesday" | "Thursday" | "Friday" | "Saturday" | "Sunday" month = "Jan" | "Feb" | "Mar" | "Apr" | "May" | "Jun" | "Jul" | "Aug" | "Sep" | "Oct" | "Nov" | "Dec" Note: HTTP requirements for the date/time stamp format apply only to their usage within the protocol stream. Clients and servers are not required to use these formats for user presentation, request logging, etc. 3.3.2 Delta Seconds Some HTTP header fields allow a time value to be specified as an integer number of seconds, represented in decimal, after the time that the message was received. delta-seconds = 1*DIGIT 3.4 Character Sets HTTP uses the same definition of the term "character set" as that described for MIME: Fielding, et al. Standards Track [Page 21] RFC 2616 HTTP/1.1 June 1999 The term "character set" is used in this document to refer to a method used with one or more tables to convert a sequence of octets into a sequence of characters. Note that unconditional conversion in the other direction is not required, in that not all characters may be available in a given character set and a character set may provide more than one sequence of octets to represent a particular character. This definition is intended to allow various kinds of character encoding, from simple single-table mappings such as US-ASCII to complex table switching methods such as those that use ISO-2022's techniques. However, the definition associated with a MIME character set name MUST fully specify the mapping to be performed from octets to characters. In particular, use of external profiling information to determine the exact mapping is not permitted. Note: This use of the term "character set" is more commonly referred to as a "character encoding." However, since HTTP and MIME share the same registry, it is important that the terminology also be shared. HTTP character sets are identified by case-insensitive tokens. The complete set of tokens is defined by the IANA Character Set registry [19]. charset = token Although HTTP allows an arbitrary token to be used as a charset value, any token that has a predefined value within the IANA Character Set registry [19] MUST represent the character set defined by that registry. Applications SHOULD limit their use of character sets to those defined by the IANA registry. Implementors should be aware of IETF character set requirements [38] [41]. 3.4.1 Missing Charset Some HTTP/1.0 software has interpreted a Content-Type header without charset parameter incorrectly to mean "recipient should guess." Senders wishing to defeat this behavior MAY include a charset parameter even when the charset is ISO-8859-1 and SHOULD do so when it is known that it will not confuse the recipient. Unfortunately, some older HTTP/1.0 clients did not deal properly with an explicit charset parameter. HTTP/1.1 recipients MUST respect the charset label provided by the sender; and those user agents that have a provision to "guess" a charset MUST use the charset from the Fielding, et al. Standards Track [Page 22] RFC 2616 HTTP/1.1 June 1999 content-type field if they support that charset, rather than the recipient's preference, when initially displaying a document. See section 3.7.1. 3.5 Content Codings Content coding values indicate an encoding transformation that has been or can be applied to an entity. Content codings are primarily used to allow a document to be compressed or otherwise usefully transformed without losing the identity of its underlying media type and without loss of information. Frequently, the entity is stored in coded form, transmitted directly, and only decoded by the recipient. content-coding = token All content-coding values are case-insensitive. HTTP/1.1 uses content-coding values in the Accept-Encoding (section 14.3) and Content-Encoding (section 14.11) header fields. Although the value describes the content-coding, what is more important is that it indicates what decoding mechanism will be required to remove the encoding. The Internet Assigned Numbers Authority (IANA) acts as a registry for content-coding value tokens. Initially, the registry contains the following tokens: gzip An encoding format produced by the file compression program "gzip" (GNU zip) as described in RFC 1952 [25]. This format is a Lempel-Ziv coding (LZ77) with a 32 bit CRC. compress The encoding format produced by the common UNIX file compression program "compress". This format is an adaptive Lempel-Ziv-Welch coding (LZW). Use of program names for the identification of encoding formats is not desirable and is discouraged for future encodings. Their use here is representative of historical practice, not good design. For compatibility with previous implementations of HTTP, applications SHOULD consider "x-gzip" and "x-compress" to be equivalent to "gzip" and "compress" respectively. deflate The "zlib" format defined in RFC 1950 [31] in combination with the "deflate" compression mechanism described in RFC 1951 [29]. Fielding, et al. Standards Track [Page 23] RFC 2616 HTTP/1.1 June 1999 identity The default (identity) encoding; the use of no transformation whatsoever. This content-coding is used only in the Accept- Encoding header, and SHOULD NOT be used in the Content-Encoding header. New content-coding value tokens SHOULD be registered; to allow interoperability between clients and servers, specifications of the content coding algorithms needed to implement a new value SHOULD be publicly available and adequate for independent implementation, and conform to the purpose of content coding defined in this section. 3.6 Transfer Codings Transfer-coding values are used to indicate an encoding transformation that has been, can be, or may need to be applied to an entity-body in order to ensure "safe transport" through the network. This differs from a content coding in that the transfer-coding is a property of the message, not of the original entity. transfer-coding = "chunked" | transfer-extension transfer-extension = token *( ";" parameter ) Parameters are in the form of attribute/value pairs. parameter = attribute "=" value attribute = token value = token | quoted-string All transfer-coding values are case-insensitive. HTTP/1.1 uses transfer-coding values in the TE header field (section 14.39) and in the Transfer-Encoding header field (section 14.41). Whenever a transfer-coding is applied to a message-body, the set of transfer-codings MUST include "chunked", unless the message is terminated by closing the connection. When the "chunked" transfer- coding is used, it MUST be the last transfer-coding applied to the message-body. The "chunked" transfer-coding MUST NOT be applied more than once to a message-body. These rules allow the recipient to determine the transfer-length of the message (section 4.4). Transfer-codings are analogous to the Content-Transfer-Encoding values of MIME [7], which were designed to enable safe transport of binary data over a 7-bit transport service. However, safe transport has a different focus for an 8bit-clean transfer protocol. In HTTP, the only unsafe characteristic of message-bodies is the difficulty in determining the exact body length (section 7.2.2), or the desire to encrypt data over a shared transport. Fielding, et al. Standards Track [Page 24] RFC 2616 HTTP/1.1 June 1999 The Internet Assigned Numbers Authority (IANA) acts as a registry for transfer-coding value tokens. Initially, the registry contains the following tokens: "chunked" (section 3.6.1), "identity" (section 3.6.2), "gzip" (section 3.5), "compress" (section 3.5), and "deflate" (section 3.5). New transfer-coding value tokens SHOULD be registered in the same way as new content-coding value tokens (section 3.5). A server which receives an entity-body with a transfer-coding it does not understand SHOULD return 501 (Unimplemented), and close the connection. A server MUST NOT send transfer-codings to an HTTP/1.0 client. 3.6.1 Chunked Transfer Coding The chunked encoding modifies the body of a message in order to transfer it as a series of chunks, each with its own size indicator, followed by an OPTIONAL trailer containing entity-header fields. This allows dynamically produced content to be transferred along with the information necessary for the recipient to verify that it has received the full message. Chunked-Body = *chunk last-chunk trailer CRLF chunk = chunk-size [ chunk-extension ] CRLF chunk-data CRLF chunk-size = 1*HEX last-chunk = 1*("0") [ chunk-extension ] CRLF chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] ) chunk-ext-name = token chunk-ext-val = token | quoted-string chunk-data = chunk-size(OCTET) trailer = *(entity-header CRLF) The chunk-size field is a string of hex digits indicating the size of the chunk. The chunked encoding is ended by any chunk whose size is zero, followed by the trailer, which is terminated by an empty line. The trailer allows the sender to include additional HTTP header fields at the end of the message. The Trailer header field can be used to indicate which header fields are included in a trailer (see section 14.40). Fielding, et al. Standards Track [Page 25] RFC 2616 HTTP/1.1 June 1999 A server using chunked transfer-coding in a response MUST NOT use the trailer for any header fields unless at least one of the following is true: a)the request included a TE header field that indicates "trailers" is acceptable in the transfer-coding of the response, as described in section 14.39; or, b)the server is the origin server for the response, the trailer fields consist entirely of optional metadata, and the recipient could use the message (in a manner acceptable to the origin server) without receiving this metadata. In other words, the origin server is willing to accept the possibility that the trailer fields might be silently discarded along the path to the client. This requirement prevents an interoperability failure when the message is being received by an HTTP/1.1 (or later) proxy and forwarded to an HTTP/1.0 recipient. It avoids a situation where compliance with the protocol would have necessitated a possibly infinite buffer on the proxy. An example process for decoding a Chunked-Body is presented in appendix 19.4.6. All HTTP/1.1 applications MUST be able to receive and decode the "chunked" transfer-coding, and MUST ignore chunk-extension extensions they do not understand. 3.7 Media Types HTTP uses Internet Media Types [17] in the Content-Type (section 14.17) and Accept (section 14.1) header fields in order to provide open and extensible data typing and type negotiation. media-type = type "/" subtype *( ";" parameter ) type = token subtype = token Parameters MAY follow the type/subtype in the form of attribute/value pairs (as defined in section 3.6). The type, subtype, and parameter attribute names are case- insensitive. Parameter values might or might not be case-sensitive, depending on the semantics of the parameter name. Linear white space (LWS) MUST NOT be used between the type and subtype, nor between an attribute and its value. The presence or absence of a parameter might be significant to the processing of a media-type, depending on its definition within the media type registry. Fielding, et al. Standards Track [Page 26] RFC 2616 HTTP/1.1 June 1999 Note that some older HTTP applications do not recognize media type parameters. When sending data to older HTTP applications, implementations SHOULD only use media type parameters when they are required by that type/subtype definition. Media-type values are registered with the Internet Assigned Number Authority (IANA [19]). The media type registration process is outlined in RFC 1590 [17]. Use of non-registered media types is discouraged. 3.7.1 Canonicalization and Text Defaults Internet media types are registered with a canonical form. An entity-body transferred via HTTP messages MUST be represented in the appropriate canonical form prior to its transmission except for "text" types, as defined in the next paragraph. When in canonical form, media subtypes of the "text" type use CRLF as the text line break. HTTP relaxes this requirement and allows the transport of text media with plain CR or LF alone representing a line break when it is done consistently for an entire entity-body. HTTP applications MUST accept CRLF, bare CR, and bare LF as being representative of a line break in text media received via HTTP. In addition, if the text is represented in a character set that does not use octets 13 and 10 for CR and LF respectively, as is the case for some multi-byte character sets, HTTP allows the use of whatever octet sequences are defined by that character set to represent the equivalent of CR and LF for line breaks. This flexibility regarding line breaks applies only to text media in the entity-body; a bare CR or LF MUST NOT be substituted for CRLF within any of the HTTP control structures (such as header fields and multipart boundaries). If an entity-body is encoded with a content-coding, the underlying data MUST be in a form defined above prior to being encoded. The "charset" parameter is used with some media types to define the character set (section 3.4) of the data. When no explicit charset parameter is provided by the sender, media subtypes of the "text" type are defined to have a default charset value of "ISO-8859-1" when received via HTTP. Data in character sets other than "ISO-8859-1" or its subsets MUST be labeled with an appropriate charset value. See section 3.4.1 for compatibility problems. 3.7.2 Multipart Types MIME provides for a number of "multipart" types -- encapsulations of one or more entities within a single message-body. All multipart types share a common syntax, as defined in section 5.1.1 of RFC 2046 Fielding, et al. Standards Track [Page 27] RFC 2616 HTTP/1.1 June 1999 [40], and MUST include a boundary parameter as part of the media type value. The message body is itself a protocol element and MUST therefore use only CRLF to represent line breaks between body-parts. Unlike in RFC 2046, the epilogue of any multipart message MUST be empty; HTTP applications MUST NOT transmit the epilogue (even if the original multipart contains an epilogue). These restrictions exist in order to preserve the self-delimiting nature of a multipart message- body, wherein the "end" of the message-body is indicated by the ending multipart boundary. In general, HTTP treats a multipart message-body no differently than any other media type: strictly as payload. The one exception is the "multipart/byteranges" type (appendix 19.2) when it appears in a 206 (Partial Content) response, which will be interpreted by some HTTP caching mechanisms as described in sections 13.5.4 and 14.16. In all other cases, an HTTP user agent SHOULD follow the same or similar behavior as a MIME user agent would upon receipt of a multipart type. The MIME header fields within each body-part of a multipart message- body do not have any significance to HTTP beyond that defined by their MIME semantics. In general, an HTTP user agent SHOULD follow the same or similar behavior as a MIME user agent would upon receipt of a multipart type. If an application receives an unrecognized multipart subtype, the application MUST treat it as being equivalent to "multipart/mixed". Note: The "multipart/form-data" type has been specifically defined for carrying form data suitable for processing via the POST request method, as described in RFC 1867 [15]. 3.8 Product Tokens Product tokens are used to allow communicating applications to identify themselves by software name and version. Most fields using product tokens also allow sub-products which form a significant part of the application to be listed, separated by white space. By convention, the products are listed in order of their significance for identifying the application. product = token ["/" product-version] product-version = token Examples: User-Agent: CERN-LineMode/2.15 libwww/2.17b3 Server: Apache/0.8.4 Fielding, et al. Standards Track [Page 28] RFC 2616 HTTP/1.1 June 1999 Product tokens SHOULD be short and to the point. They MUST NOT be used for advertising or other non-essential information. Although any token character MAY appear in a product-version, this token SHOULD only be used for a version identifier (i.e., successive versions of the same product SHOULD only differ in the product-version portion of the product value). 3.9 Quality Values HTTP content negotiation (section 12) uses short "floating point" numbers to indicate the relative importance ("weight") of various negotiable parameters. A weight is normalized to a real number in the range 0 through 1, where 0 is the minimum and 1 the maximum value. If a parameter has a quality value of 0, then content with this parameter is `not acceptable' for the client. HTTP/1.1 applications MUST NOT generate more than three digits after the decimal point. User configuration of these values SHOULD also be limited in this fashion. qvalue = ( "0" [ "." 0*3DIGIT ] ) | ( "1" [ "." 0*3("0") ] ) "Quality values" is a misnomer, since these values merely represent relative degradation in desired quality. 3.10 Language Tags A language tag identifies a natural language spoken, written, or otherwise conveyed by human beings for communication of information to other human beings. Computer languages are explicitly excluded. HTTP uses language tags within the Accept-Language and Content- Language fields. The syntax and registry of HTTP language tags is the same as that defined by RFC 1766 [1]. In summary, a language tag is composed of 1 or more parts: A primary language tag and a possibly empty series of subtags: language-tag = primary-tag *( "-" subtag ) primary-tag = 1*8ALPHA subtag = 1*8ALPHA White space is not allowed within the tag and all tags are case- insensitive. The name space of language tags is administered by the IANA. Example tags include: en, en-US, en-cockney, i-cherokee, x-pig-latin Fielding, et al. Standards Track [Page 29] RFC 2616 HTTP/1.1 June 1999 where any two-letter primary-tag is an ISO-639 language abbreviation and any two-letter initial subtag is an ISO-3166 country code. (The last three tags above are not registered tags; all but the last are examples of tags which could be registered in future.) 3.11 Entity Tags Entity tags are used for comparing two or more entities from the same requested resource. HTTP/1.1 uses entity tags in the ETag (section 14.19), If-Match (section 14.24), If-None-Match (section 14.26), and If-Range (section 14.27) header fields. The definition of how they are used and compared as cache validators is in section 13.3.3. An entity tag consists of an opaque quoted string, possibly prefixed by a weakness indicator. entity-tag = [ weak ] opaque-tag weak = "W/" opaque-tag = quoted-string A "strong entity tag" MAY be shared by two entities of a resource only if they are equivalent by octet equality. A "weak entity tag," indicated by the "W/" prefix, MAY be shared by two entities of a resource only if the entities are equivalent and could be substituted for each other with no significant change in semantics. A weak entity tag can only be used for weak comparison. An entity tag MUST be unique across all versions of all entities associated with a particular resource. A given entity tag value MAY be used for entities obtained by requests on different URIs. The use of the same entity tag value in conjunction with entities obtained by requests on different URIs does not imply the equivalence of those entities. 3.12 Range Units HTTP/1.1 allows a client to request that only part (a range of) the response entity be included within the response. HTTP/1.1 uses range units in the Range (section 14.35) and Content-Range (section 14.16) header fields. An entity can be broken down into subranges according to various structural units. range-unit = bytes-unit | other-range-unit bytes-unit = "bytes" other-range-unit = token The only range unit defined by HTTP/1.1 is "bytes". HTTP/1.1 implementations MAY ignore ranges specified using other units. Fielding, et al. Standards Track [Page 30] RFC 2616 HTTP/1.1 June 1999 HTTP/1.1 has been designed to allow implementations of applications that do not depend on knowledge of ranges. 4 HTTP Message 4.1 Message Types HTTP messages consist of requests from client to server and responses from server to client. HTTP-message = Request | Response ; HTTP/1.1 messages Request (section 5) and Response (section 6) messages use the generic message format of RFC 822 [9] for transferring entities (the payload of the message). Both types of message consist of a start-line, zero or more header fields (also known as "headers"), an empty line (i.e., a line with nothing preceding the CRLF) indicating the end of the header fields, and possibly a message-body. generic-message = start-line *(message-header CRLF) CRLF [ message-body ] start-line = Request-Line | Status-Line In the interest of robustness, servers SHOULD ignore any empty line(s) received where a Request-Line is expected. In other words, if the server is reading the protocol stream at the beginning of a message and receives a CRLF first, it should ignore the CRLF. Certain buggy HTTP/1.0 client implementations generate extra CRLF's after a POST request. To restate what is explicitly forbidden by the BNF, an HTTP/1.1 client MUST NOT preface or follow a request with an extra CRLF. 4.2 Message Headers HTTP header fields, which include general-header (section 4.5), request-header (section 5.3), response-header (section 6.2), and entity-header (section 7.1) fields, follow the same generic format as that given in Section 3.1 of RFC 822 [9]. Each header field consists of a name followed by a colon (":") and the field value. Field names are case-insensitive. The field value MAY be preceded by any amount of LWS, though a single SP is preferred. Header fields can be extended over multiple lines by preceding each extra line with at least one SP or HT. Applications ought to follow "common form", where one is known or indicated, when generating HTTP constructs, since there might exist some implementations that fail to accept anything Fielding, et al. Standards Track [Page 31] RFC 2616 HTTP/1.1 June 1999 beyond the common forms. message-header = field-name ":" [ field-value ] field-name = token field-value = *( field-content | LWS ) field-content = The field-content does not include any leading or trailing LWS: linear white space occurring before the first non-whitespace character of the field-value or after the last non-whitespace character of the field-value. Such leading or trailing LWS MAY be removed without changing the semantics of the field value. Any LWS that occurs between field-content MAY be replaced with a single SP before interpreting the field value or forwarding the message downstream. The order in which header fields with differing field names are received is not significant. However, it is "good practice" to send general-header fields first, followed by request-header or response- header fields, and ending with the entity-header fields. Multiple message-header fields with the same field-name MAY be present in a message if and only if the entire field-value for that header field is defined as a comma-separated list [i.e., #(values)]. It MUST be possible to combine the multiple header fields into one "field-name: field-value" pair, without changing the semantics of the message, by appending each subsequent field-value to the first, each separated by a comma. The order in which header fields with the same field-name are received is therefore significant to the interpretation of the combined field value, and thus a proxy MUST NOT change the order of these field values when a message is forwarded. 4.3 Message Body The message-body (if any) of an HTTP message is used to carry the entity-body associated with the request or response. The message-body differs from the entity-body only when a transfer-coding has been applied, as indicated by the Transfer-Encoding header field (section 14.41). message-body = entity-body | Transfer-Encoding MUST be used to indicate any transfer-codings applied by an application to ensure safe and proper transfer of the message. Transfer-Encoding is a property of the message, not of the Fielding, et al. Standards Track [Page 32] RFC 2616 HTTP/1.1 June 1999 entity, and thus MAY be added or removed by any application along the request/response chain. (However, section 3.6 places restrictions on when certain transfer-codings may be used.) The rules for when a message-body is allowed in a message differ for requests and responses. The presence of a message-body in a request is signaled by the inclusion of a Content-Length or Transfer-Encoding header field in the request's message-headers. A message-body MUST NOT be included in a request if the specification of the request method (section 5.1.1) does not allow sending an entity-body in requests. A server SHOULD read and forward a message-body on any request; if the request method does not include defined semantics for an entity-body, then the message-body SHOULD be ignored when handling the request. For response messages, whether or not a message-body is included with a message is dependent on both the request method and the response status code (section 6.1.1). All responses to the HEAD request method MUST NOT include a message-body, even though the presence of entity- header fields might lead one to believe they do. All 1xx (informational), 204 (no content), and 304 (not modified) responses MUST NOT include a message-body. All other responses do include a message-body, although it MAY be of zero length. 4.4 Message Length The transfer-length of a message is the length of the message-body as it appears in the message; that is, after any transfer-codings have been applied. When a message-body is included with a message, the transfer-length of that body is determined by one of the following (in order of precedence): 1.Any response message which "MUST NOT" include a message-body (such as the 1xx, 204, and 304 responses and any response to a HEAD request) is always terminated by the first empty line after the header fields, regardless of the entity-header fields present in the message. 2.If a Transfer-Encoding header field (section 14.41) is present and has any value other than "identity", then the transfer-length is defined by use of the "chunked" transfer-coding (section 3.6), unless the message is terminated by closing the connection. 3.If a Content-Length header field (section 14.13) is present, its decimal value in OCTETs represents both the entity-length and the transfer-length. The Content-Length header field MUST NOT be sent if these two lengths are different (i.e., if a Transfer-Encoding Fielding, et al. Standards Track [Page 33] RFC 2616 HTTP/1.1 June 1999 header field is present). If a message is received with both a Transfer-Encoding header field and a Content-Length header field, the latter MUST be ignored. 4.If the message uses the media type "multipart/byteranges", and the ransfer-length is not otherwise specified, then this self- elimiting media type defines the transfer-length. This media type UST NOT be used unless the sender knows that the recipient can arse it; the presence in a request of a Range header with ultiple byte- range specifiers from a 1.1 client implies that the lient can parse multipart/byteranges responses. A range header might be forwarded by a 1.0 proxy that does not understand multipart/byteranges; in this case the server MUST delimit the message using methods defined in items 1,3 or 5 of this section. 5.By the server closing the connection. (Closing the connection cannot be used to indicate the end of a request body, since that would leave no possibility for the server to send back a response.) For compatibility with HTTP/1.0 applications, HTTP/1.1 requests containing a message-body MUST include a valid Content-Length header field unless the server is known to be HTTP/1.1 compliant. If a request contains a message-body and a Content-Length is not given, the server SHOULD respond with 400 (bad request) if it cannot determine the length of the message, or with 411 (length required) if it wishes to insist on receiving a valid Content-Length. All HTTP/1.1 applications that receive entities MUST accept the "chunked" transfer-coding (section 3.6), thus allowing this mechanism to be used for messages when the message length cannot be determined in advance. Messages MUST NOT include both a Content-Length header field and a non-identity transfer-coding. If the message does include a non- identity transfer-coding, the Content-Length MUST be ignored. When a Content-Length is given in a message where a message-body is allowed, its field value MUST exactly match the number of OCTETs in the message-body. HTTP/1.1 user agents MUST notify the user when an invalid length is received and detected. 4.5 General Header Fields There are a few header fields which have general applicability for both request and response messages, but which do not apply to the entity being transferred. These header fields apply only to the Fielding, et al. Standards Track [Page 34] RFC 2616 HTTP/1.1 June 1999 message being transmitted. general-header = Cache-Control ; Section 14.9 | Connection ; Section 14.10 | Date ; Section 14.18 | Pragma ; Section 14.32 | Trailer ; Section 14.40 | Transfer-Encoding ; Section 14.41 | Upgrade ; Section 14.42 | Via ; Section 14.45 | Warning ; Section 14.46 General-header field names can be extended reliably only in combination with a change in the protocol version. However, new or experimental header fields may be given the semantics of general header fields if all parties in the communication recognize them to be general-header fields. Unrecognized header fields are treated as entity-header fields. 5 Request A request message from a client to a server includes, within the first line of that message, the method to be applied to the resource, the identifier of the resource, and the protocol version in use. Request = Request-Line ; Section 5.1 *(( general-header ; Section 4.5 | request-header ; Section 5.3 | entity-header ) CRLF) ; Section 7.1 CRLF [ message-body ] ; Section 4.3 5.1 Request-Line The Request-Line begins with a method token, followed by the Request-URI and the protocol version, and ending with CRLF. The elements are separated by SP characters. No CR or LF is allowed except in the final CRLF sequence. Request-Line = Method SP Request-URI SP HTTP-Version CRLF Fielding, et al. Standards Track [Page 35] RFC 2616 HTTP/1.1 June 1999 5.1.1 Method The Method token indicates the method to be performed on the resource identified by the Request-URI. The method is case-sensitive. Method = "OPTIONS" ; Section 9.2 | "GET" ; Section 9.3 | "HEAD" ; Section 9.4 | "POST" ; Section 9.5 | "PUT" ; Section 9.6 | "DELETE" ; Section 9.7 | "TRACE" ; Section 9.8 | "CONNECT" ; Section 9.9 | extension-method extension-method = token The list of methods allowed by a resource can be specified in an Allow header field (section 14.7). The return code of the response always notifies the client whether a method is currently allowed on a resource, since the set of allowed methods can change dynamically. An origin server SHOULD return the status code 405 (Method Not Allowed) if the method is known by the origin server but not allowed for the requested resource, and 501 (Not Implemented) if the method is unrecognized or not implemented by the origin server. The methods GET and HEAD MUST be supported by all general-purpose servers. All other methods are OPTIONAL; however, if the above methods are implemented, they MUST be implemented with the same semantics as those specified in section 9. 5.1.2 Request-URI The Request-URI is a Uniform Resource Identifier (section 3.2) and identifies the resource upon which to apply the request. Request-URI = "*" | absoluteURI | abs_path | authority The four options for Request-URI are dependent on the nature of the request. The asterisk "*" means that the request does not apply to a particular resource, but to the server itself, and is only allowed when the method used does not necessarily apply to a resource. One example would be OPTIONS * HTTP/1.1 The absoluteURI form is REQUIRED when the request is being made to a proxy. The proxy is requested to forward the request or service it from a valid cache, and return the response. Note that the proxy MAY forward the request on to another proxy or directly to the server Fielding, et al. Standards Track [Page 36] RFC 2616 HTTP/1.1 June 1999 specified by the absoluteURI. In order to avoid request loops, a proxy MUST be able to recognize all of its server names, including any aliases, local variations, and the numeric IP address. An example Request-Line would be: GET http://www.w3.org/pub/WWW/TheProject.html HTTP/1.1 To allow for transition to absoluteURIs in all requests in future versions of HTTP, all HTTP/1.1 servers MUST accept the absoluteURI form in requests, even though HTTP/1.1 clients will only generate them in requests to proxies. The authority form is only used by the CONNECT method (section 9.9). The most common form of Request-URI is that used to identify a resource on an origin server or gateway. In this case the absolute path of the URI MUST be transmitted (see section 3.2.1, abs_path) as the Request-URI, and the network location of the URI (authority) MUST be transmitted in a Host header field. For example, a client wishing to retrieve the resource above directly from the origin server would create a TCP connection to port 80 of the host "www.w3.org" and send the lines: GET /pub/WWW/TheProject.html HTTP/1.1 Host: www.w3.org followed by the remainder of the Request. Note that the absolute path cannot be empty; if none is present in the original URI, it MUST be given as "/" (the server root). The Request-URI is transmitted in the format specified in section 3.2.1. If the Request-URI is encoded using the "% HEX HEX" encoding [42], the origin server MUST decode the Request-URI in order to properly interpret the request. Servers SHOULD respond to invalid Request-URIs with an appropriate status code. A transparent proxy MUST NOT rewrite the "abs_path" part of the received Request-URI when forwarding it to the next inbound server, except as noted above to replace a null abs_path with "/". Note: The "no rewrite" rule prevents the proxy from changing the meaning of the request when the origin server is improperly using a non-reserved URI character for a reserved purpose. Implementors should be aware that some pre-HTTP/1.1 proxies have been known to rewrite the Request-URI. Fielding, et al. Standards Track [Page 37] RFC 2616 HTTP/1.1 June 1999 5.2 The Resource Identified by a Request The exact resource identified by an Internet request is determined by examining both the Request-URI and the Host header field. An origin server that does not allow resources to differ by the requested host MAY ignore the Host header field value when determining the resource identified by an HTTP/1.1 request. (But see section 19.6.1.1 for other requirements on Host support in HTTP/1.1.) An origin server that does differentiate resources based on the host requested (sometimes referred to as virtual hosts or vanity host names) MUST use the following rules for determining the requested resource on an HTTP/1.1 request: 1. If Request-URI is an absoluteURI, the host is part of the Request-URI. Any Host header field value in the request MUST be ignored. 2. If the Request-URI is not an absoluteURI, and the request includes a Host header field, the host is determined by the Host header field value. 3. If the host as determined by rule 1 or 2 is not a valid host on the server, the response MUST be a 400 (Bad Request) error message. Recipients of an HTTP/1.0 request that lacks a Host header field MAY attempt to use heuristics (e.g., examination of the URI path for something unique to a particular host) in order to determine what exact resource is being requested. 5.3 Request Header Fields The request-header fields allow the client to pass additional information about the request, and about the client itself, to the server. These fields act as request modifiers, with semantics equivalent to the parameters on a programming language method invocation. request-header = Accept ; Section 14.1 | Accept-Charset ; Section 14.2 | Accept-Encoding ; Section 14.3 | Accept-Language ; Section 14.4 | Authorization ; Section 14.8 | Expect ; Section 14.20 | From ; Section 14.22 | Host ; Section 14.23 | If-Match ; Section 14.24 Fielding, et al. Standards Track [Page 38] RFC 2616 HTTP/1.1 June 1999 | If-Modified-Since ; Section 14.25 | If-None-Match ; Section 14.26 | If-Range ; Section 14.27 | If-Unmodified-Since ; Section 14.28 | Max-Forwards ; Section 14.31 | Proxy-Authorization ; Section 14.34 | Range ; Section 14.35 | Referer ; Section 14.36 | TE ; Section 14.39 | User-Agent ; Section 14.43 Request-header field names can be extended reliably only in combination with a change in the protocol version. However, new or experimental header fields MAY be given the semantics of request- header fields if all parties in the communication recognize them to be request-header fields. Unrecognized header fields are treated as entity-header fields. 6 Response After receiving and interpreting a request message, a server responds with an HTTP response message. Response = Status-Line ; Section 6.1 *(( general-header ; Section 4.5 | response-header ; Section 6.2 | entity-header ) CRLF) ; Section 7.1 CRLF [ message-body ] ; Section 7.2 6.1 Status-Line The first line of a Response message is the Status-Line, consisting of the protocol version followed by a numeric status code and its associated textual phrase, with each element separated by SP characters. No CR or LF is allowed except in the final CRLF sequence. Status-Line = HTTP-Version SP Status-Code SP Reason-Phrase CRLF 6.1.1 Status Code and Reason Phrase The Status-Code element is a 3-digit integer result code of the attempt to understand and satisfy the request. These codes are fully defined in section 10. The Reason-Phrase is intended to give a short textual description of the Status-Code. The Status-Code is intended for use by automata and the Reason-Phrase is intended for the human user. The client is not required to examine or display the Reason- Phrase. Fielding, et al. Standards Track [Page 39] RFC 2616 HTTP/1.1 June 1999 The first digit of the Status-Code defines the class of response. The last two digits do not have any categorization role. There are 5 values for the first digit: - 1xx: Informational - Request received, continuing process - 2xx: Success - The action was successfully received, understood, and accepted - 3xx: Redirection - Further action must be taken in order to complete the request - 4xx: Client Error - The request contains bad syntax or cannot be fulfilled - 5xx: Server Error - The server failed to fulfill an apparently valid request The individual values of the numeric status codes defined for HTTP/1.1, and an example set of corresponding Reason-Phrase's, are presented below. The reason phrases listed here are only recommendations -- they MAY be replaced by local equivalents without affecting the protocol. Status-Code = "100" ; Section 10.1.1: Continue | "101" ; Section 10.1.2: Switching Protocols | "200" ; Section 10.2.1: OK | "201" ; Section 10.2.2: Created | "202" ; Section 10.2.3: Accepted | "203" ; Section 10.2.4: Non-Authoritative Information | "204" ; Section 10.2.5: No Content | "205" ; Section 10.2.6: Reset Content | "206" ; Section 10.2.7: Partial Content | "300" ; Section 10.3.1: Multiple Choices | "301" ; Section 10.3.2: Moved Permanently | "302" ; Section 10.3.3: Found | "303" ; Section 10.3.4: See Other | "304" ; Section 10.3.5: Not Modified | "305" ; Section 10.3.6: Use Proxy | "307" ; Section 10.3.8: Temporary Redirect | "400" ; Section 10.4.1: Bad Request | "401" ; Section 10.4.2: Unauthorized | "402" ; Section 10.4.3: Payment Required | "403" ; Section 10.4.4: Forbidden | "404" ; Section 10.4.5: Not Found | "405" ; Section 10.4.6: Method Not Allowed | "406" ; Section 10.4.7: Not Acceptable Fielding, et al. Standards Track [Page 40] RFC 2616 HTTP/1.1 June 1999 | "407" ; Section 10.4.8: Proxy Authentication Required | "408" ; Section 10.4.9: Request Time-out | "409" ; Section 10.4.10: Conflict | "410" ; Section 10.4.11: Gone | "411" ; Section 10.4.12: Length Required | "412" ; Section 10.4.13: Precondition Failed | "413" ; Section 10.4.14: Request Entity Too Large | "414" ; Section 10.4.15: Request-URI Too Large | "415" ; Section 10.4.16: Unsupported Media Type | "416" ; Section 10.4.17: Requested range not satisfiable | "417" ; Section 10.4.18: Expectation Failed | "500" ; Section 10.5.1: Internal Server Error | "501" ; Section 10.5.2: Not Implemented | "502" ; Section 10.5.3: Bad Gateway | "503" ; Section 10.5.4: Service Unavailable | "504" ; Section 10.5.5: Gateway Time-out | "505" ; Section 10.5.6: HTTP Version not supported | extension-code extension-code = 3DIGIT Reason-Phrase = * HTTP status codes are extensible. HTTP applications are not required to understand the meaning of all registered status codes, though such understanding is obviously desirable. However, applications MUST understand the class of any status code, as indicated by the first digit, and treat any unrecognized response as being equivalent to the x00 status code of that class, with the exception that an unrecognized response MUST NOT be cached. For example, if an unrecognized status code of 431 is received by the client, it can safely assume that there was something wrong with its request and treat the response as if it had received a 400 status code. In such cases, user agents SHOULD present to the user the entity returned with the response, since that entity is likely to include human- readable information which will explain the unusual status. 6.2 Response Header Fields The response-header fields allow the server to pass additional information about the response which cannot be placed in the Status- Line. These header fields give information about the server and about further access to the resource identified by the Request-URI. response-header = Accept-Ranges ; Section 14.5 | Age ; Section 14.6 | ETag ; Section 14.19 | Location ; Section 14.30 | Proxy-Authenticate ; Section 14.33 Fielding, et al. Standards Track [Page 41] RFC 2616 HTTP/1.1 June 1999 | Retry-After ; Section 14.37 | Server ; Section 14.38 | Vary ; Section 14.44 | WWW-Authenticate ; Section 14.47 Response-header field names can be extended reliably only in combination with a change in the protocol version. However, new or experimental header fields MAY be given the semantics of response- header fields if all parties in the communication recognize them to be response-header fields. Unrecognized header fields are treated as entity-header fields. 7 Entity Request and Response messages MAY transfer an entity if not otherwise restricted by the request method or response status code. An entity consists of entity-header fields and an entity-body, although some responses will only include the entity-headers. In this section, both sender and recipient refer to either the client or the server, depending on who sends and who receives the entity. 7.1 Entity Header Fields Entity-header fields define metainformation about the entity-body or, if no body is present, about the resource identified by the request. Some of this metainformation is OPTIONAL; some might be REQUIRED by portions of this specification. entity-header = Allow ; Section 14.7 | Content-Encoding ; Section 14.11 | Content-Language ; Section 14.12 | Content-Length ; Section 14.13 | Content-Location ; Section 14.14 | Content-MD5 ; Section 14.15 | Content-Range ; Section 14.16 | Content-Type ; Section 14.17 | Expires ; Section 14.21 | Last-Modified ; Section 14.29 | extension-header extension-header = message-header The extension-header mechanism allows additional entity-header fields to be defined without changing the protocol, but these fields cannot be assumed to be recognizable by the recipient. Unrecognized header fields SHOULD be ignored by the recipient and MUST be forwarded by transparent proxies. Fielding, et al. Standards Track [Page 42] RFC 2616 HTTP/1.1 June 1999 7.2 Entity Body The entity-body (if any) sent with an HTTP request or response is in a format and encoding defined by the entity-header fields. entity-body = *OCTET An entity-body is only present in a message when a message-body is present, as described in section 4.3. The entity-body is obtained from the message-body by decoding any Transfer-Encoding that might have been applied to ensure safe and proper transfer of the message. 7.2.1 Type When an entity-body is included with a message, the data type of that body is determined via the header fields Content-Type and Content- Encoding. These define a two-layer, ordered encoding model: entity-body := Content-Encoding( Content-Type( data ) ) Content-Type specifies the media type of the underlying data. Content-Encoding may be used to indicate any additional content codings applied to the data, usually for the purpose of data compression, that are a property of the requested resource. There is no default encoding. Any HTTP/1.1 message containing an entity-body SHOULD include a Content-Type header field defining the media type of that body. If and only if the media type is not given by a Content-Type field, the recipient MAY attempt to guess the media type via inspection of its content and/or the name extension(s) of the URI used to identify the resource. If the media type remains unknown, the recipient SHOULD treat it as type "application/octet-stream". 7.2.2 Entity Length The entity-length of a message is the length of the message-body before any transfer-codings have been applied. Section 4.4 defines how the transfer-length of a message-body is determined. Fielding, et al. Standards Track [Page 43] RFC 2616 HTTP/1.1 June 1999 8 Connections 8.1 Persistent Connections 8.1.1 Purpose Prior to persistent connections, a separate TCP connection was established to fetch each URL, increasing the load on HTTP servers and causing congestion on the Internet. The use of inline images and other associated data often require a client to make multiple requests of the same server in a short amount of time. Analysis of these performance problems and results from a prototype implementation are available [26] [30]. Implementation experience and measurements of actual HTTP/1.1 (RFC 2068) implementations show good results [39]. Alternatives have also been explored, for example, T/TCP [27]. Persistent HTTP connections have a number of advantages: - By opening and closing fewer TCP connections, CPU time is saved in routers and hosts (clients, servers, proxies, gateways, tunnels, or caches), and memory used for TCP protocol control blocks can be saved in hosts. - HTTP requests and responses can be pipelined on a connection. Pipelining allows a client to make multiple requests without waiting for each response, allowing a single TCP connection to be used much more efficiently, with much lower elapsed time. - Network congestion is reduced by reducing the number of packets caused by TCP opens, and by allowing TCP sufficient time to determine the congestion state of the network. - Latency on subsequent requests is reduced since there is no time spent in TCP's connection opening handshake. - HTTP can evolve more gracefully, since errors can be reported without the penalty of closing the TCP connection. Clients using future versions of HTTP might optimistically try a new feature, but if communicating with an older server, retry with old semantics after an error is reported. HTTP implementations SHOULD implement persistent connections. Fielding, et al. Standards Track [Page 44] RFC 2616 HTTP/1.1 June 1999 8.1.2 Overall Operation A significant difference between HTTP/1.1 and earlier versions of HTTP is that persistent connections are the default behavior of any HTTP connection. That is, unless otherwise indicated, the client SHOULD assume that the server will maintain a persistent connection, even after error responses from the server. Persistent connections provide a mechanism by which a client and a server can signal the close of a TCP connection. This signaling takes place using the Connection header field (section 14.10). Once a close has been signaled, the client MUST NOT send any more requests on that connection. 8.1.2.1 Negotiation An HTTP/1.1 server MAY assume that a HTTP/1.1 client intends to maintain a persistent connection unless a Connection header including the connection-token "close" was sent in the request. If the server chooses to close the connection immediately after sending the response, it SHOULD send a Connection header including the connection-token close. An HTTP/1.1 client MAY expect a connection to remain open, but would decide to keep it open based on whether the response from a server contains a Connection header with the connection-token close. In case the client does not want to maintain a connection for more than that request, it SHOULD send a Connection header including the connection-token close. If either the client or the server sends the close token in the Connection header, that request becomes the last one for the connection. Clients and servers SHOULD NOT assume that a persistent connection is maintained for HTTP versions less than 1.1 unless it is explicitly signaled. See section 19.6.2 for more information on backward compatibility with HTTP/1.0 clients. In order to remain persistent, all messages on the connection MUST have a self-defined message length (i.e., one not defined by closure of the connection), as described in section 4.4. Fielding, et al. Standards Track [Page 45] RFC 2616 HTTP/1.1 June 1999 8.1.2.2 Pipelining A client that supports persistent connections MAY "pipeline" its requests (i.e., send multiple requests without waiting for each response). A server MUST send its responses to those requests in the same order that the requests were received. Clients which assume persistent connections and pipeline immediately after connection establishment SHOULD be prepared to retry their connection if the first pipelined attempt fails. If a client does such a retry, it MUST NOT pipeline before it knows the connection is persistent. Clients MUST also be prepared to resend their requests if the server closes the connection before sending all of the corresponding responses. Clients SHOULD NOT pipeline requests using non-idempotent methods or non-idempotent sequences of methods (see section 9.1.2). Otherwise, a premature termination of the transport connection could lead to indeterminate results. A client wishing to send a non-idempotent request SHOULD wait to send that request until it has received the response status for the previous request. 8.1.3 Proxy Servers It is especially important that proxies correctly implement the properties of the Connection header field as specified in section 14.10. The proxy server MUST signal persistent connections separately with its clients and the origin servers (or other proxy servers) that it connects to. Each persistent connection applies to only one transport link. A proxy server MUST NOT establish a HTTP/1.1 persistent connection with an HTTP/1.0 client (but see RFC 2068 [33] for information and discussion of the problems with the Keep-Alive header implemented by many HTTP/1.0 clients). 8.1.4 Practical Considerations Servers will usually have some time-out value beyond which they will no longer maintain an inactive connection. Proxy servers might make this a higher value since it is likely that the client will be making more connections through the same server. The use of persistent connections places no requirements on the length (or existence) of this time-out for either the client or the server. Fielding, et al. Standards Track [Page 46] RFC 2616 HTTP/1.1 June 1999 When a client or server wishes to time-out it SHOULD issue a graceful close on the transport connection. Clients and servers SHOULD both constantly watch for the other side of the transport close, and respond to it as appropriate. If a client or server does not detect the other side's close promptly it could cause unnecessary resource drain on the network. A client, server, or proxy MAY close the transport connection at any time. For example, a client might have started to send a new request at the same time that the server has decided to close the "idle" connection. From the server's point of view, the connection is being closed while it was idle, but from the client's point of view, a request is in progress. This means that clients, servers, and proxies MUST be able to recover from asynchronous close events. Client software SHOULD reopen the transport connection and retransmit the aborted sequence of requests without user interaction so long as the request sequence is idempotent (see section 9.1.2). Non-idempotent methods or sequences MUST NOT be automatically retried, although user agents MAY offer a human operator the choice of retrying the request(s). Confirmation by user-agent software with semantic understanding of the application MAY substitute for user confirmation. The automatic retry SHOULD NOT be repeated if the second sequence of requests fails. Servers SHOULD always respond to at least one request per connection, if at all possible. Servers SHOULD NOT close a connection in the middle of transmitting a response, unless a network or client failure is suspected. Clients that use persistent connections SHOULD limit the number of simultaneous connections that they maintain to a given server. A single-user client SHOULD NOT maintain more than 2 connections with any server or proxy. A proxy SHOULD use up to 2*N connections to another server or proxy, where N is the number of simultaneously active users. These guidelines are intended to improve HTTP response times and avoid congestion. 8.2 Message Transmission Requirements 8.2.1 Persistent Connections and Flow Control HTTP/1.1 servers SHOULD maintain persistent connections and use TCP's flow control mechanisms to resolve temporary overloads, rather than terminating connections with the expectation that clients will retry. The latter technique can exacerbate network congestion. Fielding, et al. Standards Track [Page 47] RFC 2616 HTTP/1.1 June 1999 8.2.2 Monitoring Connections for Error Status Messages An HTTP/1.1 (or later) client sending a message-body SHOULD monitor the network connection for an error status while it is transmitting the request. If the client sees an error status, it SHOULD immediately cease transmitting the body. If the body is being sent using a "chunked" encoding (section 3.6), a zero length chunk and empty trailer MAY be used to prematurely mark the end of the message. If the body was preceded by a Content-Length header, the client MUST close the connection. 8.2.3 Use of the 100 (Continue) Status The purpose of the 100 (Continue) status (see section 10.1.1) is to allow a client that is sending a request message with a request body to determine if the origin server is willing to accept the request (based on the request headers) before the client sends the request body. In some cases, it might either be inappropriate or highly inefficient for the client to send the body if the server will reject the message without looking at the body. Requirements for HTTP/1.1 clients: - If a client will wait for a 100 (Continue) response before sending the request body, it MUST send an Expect request-header field (section 14.20) with the "100-continue" expectation. - A client MUST NOT send an Expect request-header field (section 14.20) with the "100-continue" expectation if it does not intend to send a request body. Because of the presence of older implementations, the protocol allows ambiguous situations in which a client may send "Expect: 100- continue" without receiving either a 417 (Expectation Failed) status or a 100 (Continue) status. Therefore, when a client sends this header field to an origin server (possibly via a proxy) from which it has never seen a 100 (Continue) status, the client SHOULD NOT wait for an indefinite period before sending the request body. Requirements for HTTP/1.1 origin servers: - Upon receiving a request which includes an Expect request-header field with the "100-continue" expectation, an origin server MUST either respond with 100 (Continue) status and continue to read from the input stream, or respond with a final status code. The origin server MUST NOT wait for the request body before sending the 100 (Continue) response. If it responds with a final status code, it MAY close the transport connection or it MAY continue Fielding, et al. Standards Track [Page 48] RFC 2616 HTTP/1.1 June 1999 to read and discard the rest of the request. It MUST NOT perform the requested method if it returns a final status code. - An origin server SHOULD NOT send a 100 (Continue) response if the request message does not include an Expect request-header field with the "100-continue" expectation, and MUST NOT send a 100 (Continue) response if such a request comes from an HTTP/1.0 (or earlier) client. There is an exception to this rule: for compatibility with RFC 2068, a server MAY send a 100 (Continue) status in response to an HTTP/1.1 PUT or POST request that does not include an Expect request-header field with the "100- continue" expectation. This exception, the purpose of which is to minimize any client processing delays associated with an undeclared wait for 100 (Continue) status, applies only to HTTP/1.1 requests, and not to requests with any other HTTP- version value. - An origin server MAY omit a 100 (Continue) response if it has already received some or all of the request body for the corresponding request. - An origin server that sends a 100 (Continue) response MUST ultimately send a final status code, once the request body is received and processed, unless it terminates the transport connection prematurely. - If an origin server receives a request that does not include an Expect request-header field with the "100-continue" expectation, the request includes a request body, and the server responds with a final status code before reading the entire request body from the transport connection, then the server SHOULD NOT close the transport connection until it has read the entire request, or until the client closes the connection. Otherwise, the client might not reliably receive the response message. However, this requirement is not be construed as preventing a server from defending itself against denial-of-service attacks, or from badly broken client implementations. Requirements for HTTP/1.1 proxies: - If a proxy receives a request that includes an Expect request- header field with the "100-continue" expectation, and the proxy either knows that the next-hop server complies with HTTP/1.1 or higher, or does not know the HTTP version of the next-hop server, it MUST forward the request, including the Expect header field. Fielding, et al. Standards Track [Page 49] RFC 2616 HTTP/1.1 June 1999 - If the proxy knows that the version of the next-hop server is HTTP/1.0 or lower, it MUST NOT forward the request, and it MUST respond with a 417 (Expectation Failed) status. - Proxies SHOULD maintain a cache recording the HTTP version numbers received from recently-referenced next-hop servers. - A proxy MUST NOT forward a 100 (Continue) response if the request message was received from an HTTP/1.0 (or earlier) client and did not include an Expect request-header field with the "100-continue" expectation. This requirement overrides the general rule for forwarding of 1xx responses (see section 10.1). 8.2.4 Client Behavior if Server Prematurely Closes Connection If an HTTP/1.1 client sends a request which includes a request body, but which does not include an Expect request-header field with the "100-continue" expectation, and if the client is not directly connected to an HTTP/1.1 origin server, and if the client sees the connection close before receiving any status from the server, the client SHOULD retry the request. If the client does retry this request, it MAY use the following "binary exponential backoff" algorithm to be assured of obtaining a reliable response: 1. Initiate a new connection to the server 2. Transmit the request-headers 3. Initialize a variable R to the estimated round-trip time to the server (e.g., based on the time it took to establish the connection), or to a constant value of 5 seconds if the round- trip time is not available. 4. Compute T = R * (2**N), where N is the number of previous retries of this request. 5. Wait either for an error response from the server, or for T seconds (whichever comes first) 6. If no error response is received, after T seconds transmit the body of the request. 7. If client sees that the connection is closed prematurely, repeat from step 1 until the request is accepted, an error response is received, or the user becomes impatient and terminates the retry process. Fielding, et al. Standards Track [Page 50] RFC 2616 HTTP/1.1 June 1999 If at any point an error status is received, the client - SHOULD NOT continue and - SHOULD close the connection if it has not completed sending the request message. 9 Method Definitions The set of common methods for HTTP/1.1 is defined below. Although this set can be expanded, additional methods cannot be assumed to share the same semantics for separately extended clients and servers. The Host request-header field (section 14.23) MUST accompany all HTTP/1.1 requests. 9.1 Safe and Idempotent Methods 9.1.1 Safe Methods Implementors should be aware that the software represents the user in their interactions over the Internet, and should be careful to allow the user to be aware of any actions they might take which may have an unexpected significance to themselves or others. In particular, the convention has been established that the GET and HEAD methods SHOULD NOT have the significance of taking an action other than retrieval. These methods ought to be considered "safe". This allows user agents to represent other methods, such as POST, PUT and DELETE, in a special way, so that the user is made aware of the fact that a possibly unsafe action is being requested. Naturally, it is not possible to ensure that the server does not generate side-effects as a result of performing a GET request; in fact, some dynamic resources consider that a feature. The important distinction here is that the user did not request the side-effects, so therefore cannot be held accountable for them. 9.1.2 Idempotent Methods Methods can also have the property of "idempotence" in that (aside from error or expiration issues) the side-effects of N > 0 identical requests is the same as for a single request. The methods GET, HEAD, PUT and DELETE share this property. Also, the methods OPTIONS and TRACE SHOULD NOT have side effects, and so are inherently idempotent. Fielding, et al. Standards Track [Page 51] RFC 2616 HTTP/1.1 June 1999 However, it is possible that a sequence of several requests is non- idempotent, even if all of the methods executed in that sequence are idempotent. (A sequence is idempotent if a single execution of the entire sequence always yields a result that is not changed by a reexecution of all, or part, of that sequence.) For example, a sequence is non-idempotent if its result depends on a value that is later modified in the same sequence. A sequence that never has side effects is idempotent, by definition (provided that no concurrent operations are being executed on the same set of resources). 9.2 OPTIONS The OPTIONS method represents a request for information about the communication options available on the request/response chain identified by the Request-URI. This method allows the client to determine the options and/or requirements associated with a resource, or the capabilities of a server, without implying a resource action or initiating a resource retrieval. Responses to this method are not cacheable. If the OPTIONS request includes an entity-body (as indicated by the presence of Content-Length or Transfer-Encoding), then the media type MUST be indicated by a Content-Type field. Although this specification does not define any use for such a body, future extensions to HTTP might use the OPTIONS body to make more detailed queries on the server. A server that does not support such an extension MAY discard the request body. If the Request-URI is an asterisk ("*"), the OPTIONS request is intended to apply to the server in general rather than to a specific resource. Since a server's communication options typically depend on the resource, the "*" request is only useful as a "ping" or "no-op" type of method; it does nothing beyond allowing the client to test the capabilities of the server. For example, this can be used to test a proxy for HTTP/1.1 compliance (or lack thereof). If the Request-URI is not an asterisk, the OPTIONS request applies only to the options that are available when communicating with that resource. A 200 response SHOULD include any header fields that indicate optional features implemented by the server and applicable to that resource (e.g., Allow), possibly including extensions not defined by this specification. The response body, if any, SHOULD also include information about the communication options. The format for such a Fielding, et al. Standards Track [Page 52] RFC 2616 HTTP/1.1 June 1999 body is not defined by this specification, but might be defined by future extensions to HTTP. Content negotiation MAY be used to select the appropriate response format. If no response body is included, the response MUST include a Content-Length field with a field-value of "0". The Max-Forwards request-header field MAY be used to target a specific proxy in the request chain. When a proxy receives an OPTIONS request on an absoluteURI for which request forwarding is permitted, the proxy MUST check for a Max-Forwards field. If the Max-Forwards field-value is zero ("0"), the proxy MUST NOT forward the message; instead, the proxy SHOULD respond with its own communication options. If the Max-Forwards field-value is an integer greater than zero, the proxy MUST decrement the field-value when it forwards the request. If no Max-Forwards field is present in the request, then the forwarded request MUST NOT include a Max-Forwards field. 9.3 GET The GET method means retrieve whatever information (in the form of an entity) is identified by the Request-URI. If the Request-URI refers to a data-producing process, it is the produced data which shall be returned as the entity in the response and not the source text of the process, unless that text happens to be the output of the process. The semantics of the GET method change to a "conditional GET" if the request message includes an If-Modified-Since, If-Unmodified-Since, If-Match, If-None-Match, or If-Range header field. A conditional GET method requests that the entity be transferred only under the circumstances described by the conditional header field(s). The conditional GET method is intended to reduce unnecessary network usage by allowing cached entities to be refreshed without requiring multiple requests or transferring data already held by the client. The semantics of the GET method change to a "partial GET" if the request message includes a Range header field. A partial GET requests that only part of the entity be transferred, as described in section 14.35. The partial GET method is intended to reduce unnecessary network usage by allowing partially-retrieved entities to be completed without transferring data already held by the client. The response to a GET request is cacheable if and only if it meets the requirements for HTTP caching described in section 13. See section 15.1.3 for security considerations when used for forms. Fielding, et al. Standards Track [Page 53] RFC 2616 HTTP/1.1 June 1999 9.4 HEAD The HEAD method is identical to GET except that the server MUST NOT return a message-body in the response. The metainformation contained in the HTTP headers in response to a HEAD request SHOULD be identical to the information sent in response to a GET request. This method can be used for obtaining metainformation about the entity implied by the request without transferring the entity-body itself. This method is often used for testing hypertext links for validity, accessibility, and recent modification. The response to a HEAD request MAY be cacheable in the sense that the information contained in the response MAY be used to update a previously cached entity from that resource. If the new field values indicate that the cached entity differs from the current entity (as would be indicated by a change in Content-Length, Content-MD5, ETag or Last-Modified), then the cache MUST treat the cache entry as stale. 9.5 POST The POST method is used to request that the origin server accept the entity enclosed in the request as a new subordinate of the resource identified by the Request-URI in the Request-Line. POST is designed to allow a uniform method to cover the following functions: - Annotation of existing resources; - Posting a message to a bulletin board, newsgroup, mailing list, or similar group of articles; - Providing a block of data, such as the result of submitting a form, to a data-handling process; - Extending a database through an append operation. The actual function performed by the POST method is determined by the server and is usually dependent on the Request-URI. The posted entity is subordinate to that URI in the same way that a file is subordinate to a directory containing it, a news article is subordinate to a newsgroup to which it is posted, or a record is subordinate to a database. The action performed by the POST method might not result in a resource that can be identified by a URI. In this case, either 200 (OK) or 204 (No Content) is the appropriate response status, depending on whether or not the response includes an entity that describes the result. Fielding, et al. Standards Track [Page 54] RFC 2616 HTTP/1.1 June 1999 If a resource has been created on the origin server, the response SHOULD be 201 (Created) and contain an entity which describes the status of the request and refers to the new resource, and a Location header (see section 14.30). Responses to this method are not cacheable, unless the response includes appropriate Cache-Control or Expires header fields. However, the 303 (See Other) response can be used to direct the user agent to retrieve a cacheable resource. POST requests MUST obey the message transmission requirements set out in section 8.2. See section 15.1.3 for security considerations. 9.6 PUT The PUT method requests that the enclosed entity be stored under the supplied Request-URI. If the Request-URI refers to an already existing resource, the enclosed entity SHOULD be considered as a modified version of the one residing on the origin server. If the Request-URI does not point to an existing resource, and that URI is capable of being defined as a new resource by the requesting user agent, the origin server can create the resource with that URI. If a new resource is created, the origin server MUST inform the user agent via the 201 (Created) response. If an existing resource is modified, either the 200 (OK) or 204 (No Content) response codes SHOULD be sent to indicate successful completion of the request. If the resource could not be created or modified with the Request-URI, an appropriate error response SHOULD be given that reflects the nature of the problem. The recipient of the entity MUST NOT ignore any Content-* (e.g. Content-Range) headers that it does not understand or implement and MUST return a 501 (Not Implemented) response in such cases. If the request passes through a cache and the Request-URI identifies one or more currently cached entities, those entries SHOULD be treated as stale. Responses to this method are not cacheable. The fundamental difference between the POST and PUT requests is reflected in the different meaning of the Request-URI. The URI in a POST request identifies the resource that will handle the enclosed entity. That resource might be a data-accepting process, a gateway to some other protocol, or a separate entity that accepts annotations. In contrast, the URI in a PUT request identifies the entity enclosed with the request -- the user agent knows what URI is intended and the server MUST NOT attempt to apply the request to some other resource. If the server desires that the request be applied to a different URI, Fielding, et al. Standards Track [Page 55] RFC 2616 HTTP/1.1 June 1999 it MUST send a 301 (Moved Permanently) response; the user agent MAY then make its own decision regarding whether or not to redirect the request. A single resource MAY be identified by many different URIs. For example, an article might have a URI for identifying "the current version" which is separate from the URI identifying each particular version. In this case, a PUT request on a general URI might result in several other URIs being defined by the origin server. HTTP/1.1 does not define how a PUT method affects the state of an origin server. PUT requests MUST obey the message transmission requirements set out in section 8.2. Unless otherwise specified for a particular entity-header, the entity-headers in the PUT request SHOULD be applied to the resource created or modified by the PUT. 9.7 DELETE The DELETE method requests that the origin server delete the resource identified by the Request-URI. This method MAY be overridden by human intervention (or other means) on the origin server. The client cannot be guaranteed that the operation has been carried out, even if the status code returned from the origin server indicates that the action has been completed successfully. However, the server SHOULD NOT indicate success unless, at the time the response is given, it intends to delete the resource or move it to an inaccessible location. A successful response SHOULD be 200 (OK) if the response includes an entity describing the status, 202 (Accepted) if the action has not yet been enacted, or 204 (No Content) if the action has been enacted but the response does not include an entity. If the request passes through a cache and the Request-URI identifies one or more currently cached entities, those entries SHOULD be treated as stale. Responses to this method are not cacheable. 9.8 TRACE The TRACE method is used to invoke a remote, application-layer loop- back of the request message. The final recipient of the request SHOULD reflect the message received back to the client as the entity-body of a 200 (OK) response. The final recipient is either the Fielding, et al. Standards Track [Page 56] RFC 2616 HTTP/1.1 June 1999 origin server or the first proxy or gateway to receive a Max-Forwards value of zero (0) in the request (see section 14.31). A TRACE request MUST NOT include an entity. TRACE allows the client to see what is being received at the other end of the request chain and use that data for testing or diagnostic information. The value of the Via header field (section 14.45) is of particular interest, since it acts as a trace of the request chain. Use of the Max-Forwards header field allows the client to limit the length of the request chain, which is useful for testing a chain of proxies forwarding messages in an infinite loop. If the request is valid, the response SHOULD contain the entire request message in the entity-body, with a Content-Type of "message/http". Responses to this method MUST NOT be cached. 9.9 CONNECT This specification reserves the method name CONNECT for use with a proxy that can dynamically switch to being a tunnel (e.g. SSL tunneling [44]). 10 Status Code Definitions Each Status-Code is described below, including a description of which method(s) it can follow and any metainformation required in the response. 10.1 Informational 1xx This class of status code indicates a provisional response, consisting only of the Status-Line and optional headers, and is terminated by an empty line. There are no required headers for this class of status code. Since HTTP/1.0 did not define any 1xx status codes, servers MUST NOT send a 1xx response to an HTTP/1.0 client except under experimental conditions. A client MUST be prepared to accept one or more 1xx status responses prior to a regular response, even if the client does not expect a 100 (Continue) status message. Unexpected 1xx status responses MAY be ignored by a user agent. Proxies MUST forward 1xx responses, unless the connection between the proxy and its client has been closed, or unless the proxy itself requested the generation of the 1xx response. (For example, if a Fielding, et al. Standards Track [Page 57] RFC 2616 HTTP/1.1 June 1999 proxy adds a "Expect: 100-continue" field when it forwards a request, then it need not forward the corresponding 100 (Continue) response(s).) 10.1.1 100 Continue The client SHOULD continue with its request. This interim response is used to inform the client that the initial part of the request has been received and has not yet been rejected by the server. The client SHOULD continue by sending the remainder of the request or, if the request has already been completed, ignore this response. The server MUST send a final response after the request has been completed. See section 8.2.3 for detailed discussion of the use and handling of this status code. 10.1.2 101 Switching Protocols The server understands and is willing to comply with the client's request, via the Upgrade message header field (section 14.42), for a change in the application protocol being used on this connection. The server will switch protocols to those defined by the response's Upgrade header field immediately after the empty line which terminates the 101 response. The protocol SHOULD be switched only when it is advantageous to do so. For example, switching to a newer version of HTTP is advantageous over older versions, and switching to a real-time, synchronous protocol might be advantageous when delivering resources that use such features. 10.2 Successful 2xx This class of status code indicates that the client's request was successfully received, understood, and accepted. 10.2.1 200 OK The request has succeeded. The information returned with the response is dependent on the method used in the request, for example: GET an entity corresponding to the requested resource is sent in the response; HEAD the entity-header fields corresponding to the requested resource are sent in the response without any message-body; POST an entity describing or containing the result of the action; Fielding, et al. Standards Track [Page 58] RFC 2616 HTTP/1.1 June 1999 TRACE an entity containing the request message as received by the end server. 10.2.2 201 Created The request has been fulfilled and resulted in a new resource being created. The newly created resource can be referenced by the URI(s) returned in the entity of the response, with the most specific URI for the resource given by a Location header field. The response SHOULD include an entity containing a list of resource characteristics and location(s) from which the user or user agent can choose the one most appropriate. The entity format is specified by the media type given in the Content-Type header field. The origin server MUST create the resource before returning the 201 status code. If the action cannot be carried out immediately, the server SHOULD respond with 202 (Accepted) response instead. A 201 response MAY contain an ETag response header field indicating the current value of the entity tag for the requested variant just created, see section 14.19. 10.2.3 202 Accepted The request has been accepted for processing, but the processing has not been completed. The request might or might not eventually be acted upon, as it might be disallowed when processing actually takes place. There is no facility for re-sending a status code from an asynchronous operation such as this. The 202 response is intentionally non-committal. Its purpose is to allow a server to accept a request for some other process (perhaps a batch-oriented process that is only run once per day) without requiring that the user agent's connection to the server persist until the process is completed. The entity returned with this response SHOULD include an indication of the request's current status and either a pointer to a status monitor or some estimate of when the user can expect the request to be fulfilled. 10.2.4 203 Non-Authoritative Information The returned metainformation in the entity-header is not the definitive set as available from the origin server, but is gathered from a local or a third-party copy. The set presented MAY be a subset or superset of the original version. For example, including local annotation information about the resource might result in a superset of the metainformation known by the origin server. Use of this response code is not required and is only appropriate when the response would otherwise be 200 (OK). Fielding, et al. Standards Track [Page 59] RFC 2616 HTTP/1.1 June 1999 10.2.5 204 No Content The server has fulfilled the request but does not need to return an entity-body, and might want to return updated metainformation. The response MAY include new or updated metainformation in the form of entity-headers, which if present SHOULD be associated with the requested variant. If the client is a user agent, it SHOULD NOT change its document view from that which caused the request to be sent. This response is primarily intended to allow input for actions to take place without causing a change to the user agent's active document view, although any new or updated metainformation SHOULD be applied to the document currently in the user agent's active view. The 204 response MUST NOT include a message-body, and thus is always terminated by the first empty line after the header fields. 10.2.6 205 Reset Content The server has fulfilled the request and the user agent SHOULD reset the document view which caused the request to be sent. This response is primarily intended to allow input for actions to take place via user input, followed by a clearing of the form in which the input is given so that the user can easily initiate another input action. The response MUST NOT include an entity. 10.2.7 206 Partial Content The server has fulfilled the partial GET request for the resource. The request MUST have included a Range header field (section 14.35) indicating the desired range, and MAY have included an If-Range header field (section 14.27) to make the request conditional. The response MUST include the following header fields: - Either a Content-Range header field (section 14.16) indicating the range included with this response, or a multipart/byteranges Content-Type including Content-Range fields for each part. If a Content-Length header field is present in the response, its value MUST match the actual number of OCTETs transmitted in the message-body. - Date - ETag and/or Content-Location, if the header would have been sent in a 200 response to the same request Fielding, et al. Standards Track [Page 60] RFC 2616 HTTP/1.1 June 1999 - Expires, Cache-Control, and/or Vary, if the field-value might differ from that sent in any previous response for the same variant If the 206 response is the result of an If-Range request that used a strong cache validator (see section 13.3.3), the response SHOULD NOT include other entity-headers. If the response is the result of an If-Range request that used a weak validator, the response MUST NOT include other entity-headers; this prevents inconsistencies between cached entity-bodies and updated headers. Otherwise, the response MUST include all of the entity-headers that would have been returned with a 200 (OK) response to the same request. A cache MUST NOT combine a 206 response with other previously cached content if the ETag or Last-Modified headers do not match exactly, see 13.5.4. A cache that does not support the Range and Content-Range headers MUST NOT cache 206 (Partial) responses. 10.3 Redirection 3xx This class of status code indicates that further action needs to be taken by the user agent in order to fulfill the request. The action required MAY be carried out by the user agent without interaction with the user if and only if the method used in the second request is GET or HEAD. A client SHOULD detect infinite redirection loops, since such loops generate network traffic for each redirection. Note: previous versions of this specification recommended a maximum of five redirections. Content developers should be aware that there might be clients that implement such a fixed limitation. 10.3.1 300 Multiple Choices The requested resource corresponds to any one of a set of representations, each with its own specific location, and agent- driven negotiation information (section 12) is being provided so that the user (or user agent) can select a preferred representation and redirect its request to that location. Unless it was a HEAD request, the response SHOULD include an entity containing a list of resource characteristics and location(s) from which the user or user agent can choose the one most appropriate. The entity format is specified by the media type given in the Content- Type header field. Depending upon the format and the capabilities of Fielding, et al. Standards Track [Page 61] RFC 2616 HTTP/1.1 June 1999 the user agent, selection of the most appropriate choice MAY be performed automatically. However, this specification does not define any standard for such automatic selection. If the server has a preferred choice of representation, it SHOULD include the specific URI for that representation in the Location field; user agents MAY use the Location field value for automatic redirection. This response is cacheable unless indicated otherwise. 10.3.2 301 Moved Permanently The requested resource has been assigned a new permanent URI and any future references to this resource SHOULD use one of the returned URIs. Clients with link editing capabilities ought to automatically re-link references to the Request-URI to one or more of the new references returned by the server, where possible. This response is cacheable unless indicated otherwise. The new permanent URI SHOULD be given by the Location field in the response. Unless the request method was HEAD, the entity of the response SHOULD contain a short hypertext note with a hyperlink to the new URI(s). If the 301 status code is received in response to a request other than GET or HEAD, the user agent MUST NOT automatically redirect the request unless it can be confirmed by the user, since this might change the conditions under which the request was issued. Note: When automatically redirecting a POST request after receiving a 301 status code, some existing HTTP/1.0 user agents will erroneously change it into a GET request. 10.3.3 302 Found The requested resource resides temporarily under a different URI. Since the redirection might be altered on occasion, the client SHOULD continue to use the Request-URI for future requests. This response is only cacheable if indicated by a Cache-Control or Expires header field. The temporary URI SHOULD be given by the Location field in the response. Unless the request method was HEAD, the entity of the response SHOULD contain a short hypertext note with a hyperlink to the new URI(s). Fielding, et al. Standards Track [Page 62] RFC 2616 HTTP/1.1 June 1999 If the 302 status code is received in response to a request other than GET or HEAD, the user agent MUST NOT automatically redirect the request unless it can be confirmed by the user, since this might change the conditions under which the request was issued. Note: RFC 1945 and RFC 2068 specify that the client is not allowed to change the method on the redirected request. However, most existing user agent implementations treat 302 as if it were a 303 response, performing a GET on the Location field-value regardless of the original request method. The status codes 303 and 307 have been added for servers that wish to make unambiguously clear which kind of reaction is expected of the client. 10.3.4 303 See Other The response to the request can be found under a different URI and SHOULD be retrieved using a GET method on that resource. This method exists primarily to allow the output of a POST-activated script to redirect the user agent to a selected resource. The new URI is not a substitute reference for the originally requested resource. The 303 response MUST NOT be cached, but the response to the second (redirected) request might be cacheable. The different URI SHOULD be given by the Location field in the response. Unless the request method was HEAD, the entity of the response SHOULD contain a short hypertext note with a hyperlink to the new URI(s). Note: Many pre-HTTP/1.1 user agents do not understand the 303 status. When interoperability with such clients is a concern, the 302 status code may be used instead, since most user agents react to a 302 response as described here for 303. 10.3.5 304 Not Modified If the client has performed a conditional GET request and access is allowed, but the document has not been modified, the server SHOULD respond with this status code. The 304 response MUST NOT contain a message-body, and thus is always terminated by the first empty line after the header fields. The response MUST include the following header fields: - Date, unless its omission is required by section 14.18.1 Fielding, et al. Standards Track [Page 63] RFC 2616 HTTP/1.1 June 1999 If a clockless origin server obeys these rules, and proxies and clients add their own Date to any response received without one (as already specified by [RFC 2068], section 14.19), caches will operate correctly. - ETag and/or Content-Location, if the header would have been sent in a 200 response to the same request - Expires, Cache-Control, and/or Vary, if the field-value might differ from that sent in any previous response for the same variant If the conditional GET used a strong cache validator (see section 13.3.3), the response SHOULD NOT include other entity-headers. Otherwise (i.e., the conditional GET used a weak validator), the response MUST NOT include other entity-headers; this prevents inconsistencies between cached entity-bodies and updated headers. If a 304 response indicates an entity not currently cached, then the cache MUST disregard the response and repeat the request without the conditional. If a cache uses a received 304 response to update a cache entry, the cache MUST update the entry to reflect any new field values given in the response. 10.3.6 305 Use Proxy The requested resource MUST be accessed through the proxy given by the Location field. The Location field gives the URI of the proxy. The recipient is expected to repeat this single request via the proxy. 305 responses MUST only be generated by origin servers. Note: RFC 2068 was not clear that 305 was intended to redirect a single request, and to be generated by origin servers only. Not observing these limitations has significant security consequences. 10.3.7 306 (Unused) The 306 status code was used in a previous version of the specification, is no longer used, and the code is reserved. Fielding, et al. Standards Track [Page 64] RFC 2616 HTTP/1.1 June 1999 10.3.8 307 Temporary Redirect The requested resource resides temporarily under a different URI. Since the redirection MAY be altered on occasion, the client SHOULD continue to use the Request-URI for future requests. This response is only cacheable if indicated by a Cache-Control or Expires header field. The temporary URI SHOULD be given by the Location field in the response. Unless the request method was HEAD, the entity of the response SHOULD contain a short hypertext note with a hyperlink to the new URI(s) , since many pre-HTTP/1.1 user agents do not understand the 307 status. Therefore, the note SHOULD contain the information necessary for a user to repeat the original request on the new URI. If the 307 status code is received in response to a request other than GET or HEAD, the user agent MUST NOT automatically redirect the request unless it can be confirmed by the user, since this might change the conditions under which the request was issued. 10.4 Client Error 4xx The 4xx class of status code is intended for cases in which the client seems to have erred. Except when responding to a HEAD request, the server SHOULD include an entity containing an explanation of the error situation, and whether it is a temporary or permanent condition. These status codes are applicable to any request method. User agents SHOULD display any included entity to the user. If the client is sending data, a server implementation using TCP SHOULD be careful to ensure that the client acknowledges receipt of the packet(s) containing the response, before the server closes the input connection. If the client continues sending data to the server after the close, the server's TCP stack will send a reset packet to the client, which may erase the client's unacknowledged input buffers before they can be read and interpreted by the HTTP application. 10.4.1 400 Bad Request The request could not be understood by the server due to malformed syntax. The client SHOULD NOT repeat the request without modifications. Fielding, et al. Standards Track [Page 65] RFC 2616 HTTP/1.1 June 1999 10.4.2 401 Unauthorized The request requires user authentication. The response MUST include a WWW-Authenticate header field (section 14.47) containing a challenge applicable to the requested resource. The client MAY repeat the request with a suitable Authorization header field (section 14.8). If the request already included Authorization credentials, then the 401 response indicates that authorization has been refused for those credentials. If the 401 response contains the same challenge as the prior response, and the user agent has already attempted authentication at least once, then the user SHOULD be presented the entity that was given in the response, since that entity might include relevant diagnostic information. HTTP access authentication is explained in "HTTP Authentication: Basic and Digest Access Authentication" [43]. 10.4.3 402 Payment Required This code is reserved for future use. 10.4.4 403 Forbidden The server understood the request, but is refusing to fulfill it. Authorization will not help and the request SHOULD NOT be repeated. If the request method was not HEAD and the server wishes to make public why the request has not been fulfilled, it SHOULD describe the reason for the refusal in the entity. If the server does not wish to make this information available to the client, the status code 404 (Not Found) can be used instead. 10.4.5 404 Not Found The server has not found anything matching the Request-URI. No indication is given of whether the condition is temporary or permanent. The 410 (Gone) status code SHOULD be used if the server knows, through some internally configurable mechanism, that an old resource is permanently unavailable and has no forwarding address. This status code is commonly used when the server does not wish to reveal exactly why the request has been refused, or when no other response is applicable. 10.4.6 405 Method Not Allowed The method specified in the Request-Line is not allowed for the resource identified by the Request-URI. The response MUST include an Allow header containing a list of valid methods for the requested resource. Fielding, et al. Standards Track [Page 66] RFC 2616 HTTP/1.1 June 1999 10.4.7 406 Not Acceptable The resource identified by the request is only capable of generating response entities which have content characteristics not acceptable according to the accept headers sent in the request. Unless it was a HEAD request, the response SHOULD include an entity containing a list of available entity characteristics and location(s) from which the user or user agent can choose the one most appropriate. The entity format is specified by the media type given in the Content-Type header field. Depending upon the format and the capabilities of the user agent, selection of the most appropriate choice MAY be performed automatically. However, this specification does not define any standard for such automatic selection. Note: HTTP/1.1 servers are allowed to return responses which are not acceptable according to the accept headers sent in the request. In some cases, this may even be preferable to sending a 406 response. User agents are encouraged to inspect the headers of an incoming response to determine if it is acceptable. If the response could be unacceptable, a user agent SHOULD temporarily stop receipt of more data and query the user for a decision on further actions. 10.4.8 407 Proxy Authentication Required This code is similar to 401 (Unauthorized), but indicates that the client must first authenticate itself with the proxy. The proxy MUST return a Proxy-Authenticate header field (section 14.33) containing a challenge applicable to the proxy for the requested resource. The client MAY repeat the request with a suitable Proxy-Authorization header field (section 14.34). HTTP access authentication is explained in "HTTP Authentication: Basic and Digest Access Authentication" [43]. 10.4.9 408 Request Timeout The client did not produce a request within the time that the server was prepared to wait. The client MAY repeat the request without modifications at any later time. 10.4.10 409 Conflict The request could not be completed due to a conflict with the current state of the resource. This code is only allowed in situations where it is expected that the user might be able to resolve the conflict and resubmit the request. The response body SHOULD include enough Fielding, et al. Standards Track [Page 67] RFC 2616 HTTP/1.1 June 1999 information for the user to recognize the source of the conflict. Ideally, the response entity would include enough information for the user or user agent to fix the problem; however, that might not be possible and is not required. Conflicts are most likely to occur in response to a PUT request. For example, if versioning were being used and the entity being PUT included changes to a resource which conflict with those made by an earlier (third-party) request, the server might use the 409 response to indicate that it can't complete the request. In this case, the response entity would likely contain a list of the differences between the two versions in a format defined by the response Content-Type. 10.4.11 410 Gone The requested resource is no longer available at the server and no forwarding address is known. This condition is expected to be considered permanent. Clients with link editing capabilities SHOULD delete references to the Request-URI after user approval. If the server does not know, or has no facility to determine, whether or not the condition is permanent, the status code 404 (Not Found) SHOULD be used instead. This response is cacheable unless indicated otherwise. The 410 response is primarily intended to assist the task of web maintenance by notifying the recipient that the resource is intentionally unavailable and that the server owners desire that remote links to that resource be removed. Such an event is common for limited-time, promotional services and for resources belonging to individuals no longer working at the server's site. It is not necessary to mark all permanently unavailable resources as "gone" or to keep the mark for any length of time -- that is left to the discretion of the server owner. 10.4.12 411 Length Required The server refuses to accept the request without a defined Content- Length. The client MAY repeat the request if it adds a valid Content-Length header field containing the length of the message-body in the request message. 10.4.13 412 Precondition Failed The precondition given in one or more of the request-header fields evaluated to false when it was tested on the server. This response code allows the client to place preconditions on the current resource metainformation (header field data) and thus prevent the requested method from being applied to a resource other than the one intended. Fielding, et al. Standards Track [Page 68] RFC 2616 HTTP/1.1 June 1999 10.4.14 413 Request Entity Too Large The server is refusing to process a request because the request entity is larger than the server is willing or able to process. The server MAY close the connection to prevent the client from continuing the request. If the condition is temporary, the server SHOULD include a Retry- After header field to indicate that it is temporary and after what time the client MAY try again. 10.4.15 414 Request-URI Too Long The server is refusing to service the request because the Request-URI is longer than the server is willing to interpret. This rare condition is only likely to occur when a client has improperly converted a POST request to a GET request with long query information, when the client has descended into a URI "black hole" of redirection (e.g., a redirected URI prefix that points to a suffix of itself), or when the server is under attack by a client attempting to exploit security holes present in some servers using fixed-length buffers for reading or manipulating the Request-URI. 10.4.16 415 Unsupported Media Type The server is refusing to service the request because the entity of the request is in a format not supported by the requested resource for the requested method. 10.4.17 416 Requested Range Not Satisfiable A server SHOULD return a response with this status code if a request included a Range request-header field (section 14.35), and none of the range-specifier values in this field overlap the current extent of the selected resource, and the request did not include an If-Range request-header field. (For byte-ranges, this means that the first- byte-pos of all of the byte-range-spec values were greater than the current length of the selected resource.) When this status code is returned for a byte-range request, the response SHOULD include a Content-Range entity-header field specifying the current length of the selected resource (see section 14.16). This response MUST NOT use the multipart/byteranges content- type. Fielding, et al. Standards Track [Page 69] RFC 2616 HTTP/1.1 June 1999 10.4.18 417 Expectation Failed The expectation given in an Expect request-header field (see section 14.20) could not be met by this server, or, if the server is a proxy, the server has unambiguous evidence that the request could not be met by the next-hop server. 10.5 Server Error 5xx Response status codes beginning with the digit "5" indicate cases in which the server is aware that it has erred or is incapable of performing the request. Except when responding to a HEAD request, the server SHOULD include an entity containing an explanation of the error situation, and whether it is a temporary or permanent condition. User agents SHOULD display any included entity to the user. These response codes are applicable to any request method. 10.5.1 500 Internal Server Error The server encountered an unexpected condition which prevented it from fulfilling the request. 10.5.2 501 Not Implemented The server does not support the functionality required to fulfill the request. This is the appropriate response when the server does not recognize the request method and is not capable of supporting it for any resource. 10.5.3 502 Bad Gateway The server, while acting as a gateway or proxy, received an invalid response from the upstream server it accessed in attempting to fulfill the request. 10.5.4 503 Service Unavailable The server is currently unable to handle the request due to a temporary overloading or maintenance of the server. The implication is that this is a temporary condition which will be alleviated after some delay. If known, the length of the delay MAY be indicated in a Retry-After header. If no Retry-After is given, the client SHOULD handle the response as it would for a 500 response. Note: The existence of the 503 status code does not imply that a server must use it when becoming overloaded. Some servers may wish to simply refuse the connection. Fielding, et al. Standards Track [Page 70] RFC 2616 HTTP/1.1 June 1999 10.5.5 504 Gateway Timeout The server, while acting as a gateway or proxy, did not receive a timely response from the upstream server specified by the URI (e.g. HTTP, FTP, LDAP) or some other auxiliary server (e.g. DNS) it needed to access in attempting to complete the request. Note: Note to implementors: some deployed proxies are known to return 400 or 500 when DNS lookups time out. 10.5.6 505 HTTP Version Not Supported The server does not support, or refuses to support, the HTTP protocol version that was used in the request message. The server is indicating that it is unable or unwilling to complete the request using the same major version as the client, as described in section 3.1, other than with this error message. The response SHOULD contain an entity describing why that version is not supported and what other protocols are supported by that server. 11 Access Authentication HTTP provides several OPTIONAL challenge-response authentication mechanisms which can be used by a server to challenge a client request and by a client to provide authentication information. The general framework for access authentication, and the specification of "basic" and "digest" authentication, are specified in "HTTP Authentication: Basic and Digest Access Authentication" [43]. This specification adopts the definitions of "challenge" and "credentials" from that specification. 12 Content Negotiation Most HTTP responses include an entity which contains information for interpretation by a human user. Naturally, it is desirable to supply the user with the "best available" entity corresponding to the request. Unfortunately for servers and caches, not all users have the same preferences for what is "best," and not all user agents are equally capable of rendering all entity types. For that reason, HTTP has provisions for several mechanisms for "content negotiation" -- the process of selecting the best representation for a given response when there are multiple representations available. Note: This is not called "format negotiation" because the alternate representations may be of the same media type, but use different capabilities of that type, be in different languages, etc. Fielding, et al. Standards Track [Page 71] RFC 2616 HTTP/1.1 June 1999 Any response containing an entity-body MAY be subject to negotiation, including error responses. There are two kinds of content negotiation which are possible in HTTP: server-driven and agent-driven negotiation. These two kinds of negotiation are orthogonal and thus may be used separately or in combination. One method of combination, referred to as transparent negotiation, occurs when a cache uses the agent-driven negotiation information provided by the origin server in order to provide server-driven negotiation for subsequent requests. 12.1 Server-driven Negotiation If the selection of the best representation for a response is made by an algorithm located at the server, it is called server-driven negotiation. Selection is based on the available representations of the response (the dimensions over which it can vary; e.g. language, content-coding, etc.) and the contents of particular header fields in the request message or on other information pertaining to the request (such as the network address of the client). Server-driven negotiation is advantageous when the algorithm for selecting from among the available representations is difficult to describe to the user agent, or when the server desires to send its "best guess" to the client along with the first response (hoping to avoid the round-trip delay of a subsequent request if the "best guess" is good enough for the user). In order to improve the server's guess, the user agent MAY include request header fields (Accept, Accept-Language, Accept-Encoding, etc.) which describe its preferences for such a response. Server-driven negotiation has disadvantages: 1. It is impossible for the server to accurately determine what might be "best" for any given user, since that would require complete knowledge of both the capabilities of the user agent and the intended use for the response (e.g., does the user want to view it on screen or print it on paper?). 2. Having the user agent describe its capabilities in every request can be both very inefficient (given that only a small percentage of responses have multiple representations) and a potential violation of the user's privacy. 3. It complicates the implementation of an origin server and the algorithms for generating responses to a request. Fielding, et al. Standards Track [Page 72] RFC 2616 HTTP/1.1 June 1999 4. It may limit a public cache's ability to use the same response for multiple user's requests. HTTP/1.1 includes the following request-header fields for enabling server-driven negotiation through description of user agent capabilities and user preferences: Accept (section 14.1), Accept- Charset (section 14.2), Accept-Encoding (section 14.3), Accept- Language (section 14.4), and User-Agent (section 14.43). However, an origin server is not limited to these dimensions and MAY vary the response based on any aspect of the request, including information outside the request-header fields or within extension header fields not defined by this specification. The Vary header field can be used to express the parameters the server uses to select a representation that is subject to server- driven negotiation. See section 13.6 for use of the Vary header field by caches and section 14.44 for use of the Vary header field by servers. 12.2 Agent-driven Negotiation With agent-driven negotiation, selection of the best representation for a response is performed by the user agent after receiving an initial response from the origin server. Selection is based on a list of the available representations of the response included within the header fields or entity-body of the initial response, with each representation identified by its own URI. Selection from among the representations may be performed automatically (if the user agent is capable of doing so) or manually by the user selecting from a generated (possibly hypertext) menu. Agent-driven negotiation is advantageous when the response would vary over commonly-used dimensions (such as type, language, or encoding), when the origin server is unable to determine a user agent's capabilities from examining the request, and generally when public caches are used to distribute server load and reduce network usage. Agent-driven negotiation suffers from the disadvantage of needing a second request to obtain the best alternate representation. This second request is only efficient when caching is used. In addition, this specification does not define any mechanism for supporting automatic selection, though it also does not prevent any such mechanism from being developed as an extension and used within HTTP/1.1. Fielding, et al. Standards Track [Page 73] RFC 2616 HTTP/1.1 June 1999 HTTP/1.1 defines the 300 (Multiple Choices) and 406 (Not Acceptable) status codes for enabling agent-driven negotiation when the server is unwilling or unable to provide a varying response using server-driven negotiation. 12.3 Transparent Negotiation Transparent negotiation is a combination of both server-driven and agent-driven negotiation. When a cache is supplied with a form of the list of available representations of the response (as in agent-driven negotiation) and the dimensions of variance are completely understood by the cache, then the cache becomes capable of performing server- driven negotiation on behalf of the origin server for subsequent requests on that resource. Transparent negotiation has the advantage of distributing the negotiation work that would otherwise be required of the origin server and also removing the second request delay of agent-driven negotiation when the cache is able to correctly guess the right response. This specification does not define any mechanism for transparent negotiation, though it also does not prevent any such mechanism from being developed as an extension that could be used within HTTP/1.1. 13 Caching in HTTP HTTP is typically used for distributed information systems, where performance can be improved by the use of response caches. The HTTP/1.1 protocol includes a number of elements intended to make caching work as well as possible. Because these elements are inextricable from other aspects of the protocol, and because they interact with each other, it is useful to describe the basic caching design of HTTP separately from the detailed descriptions of methods, headers, response codes, etc. Caching would be useless if it did not significantly improve performance. The goal of caching in HTTP/1.1 is to eliminate the need to send requests in many cases, and to eliminate the need to send full responses in many other cases. The former reduces the number of network round-trips required for many operations; we use an "expiration" mechanism for this purpose (see section 13.2). The latter reduces network bandwidth requirements; we use a "validation" mechanism for this purpose (see section 13.3). Requirements for performance, availability, and disconnected operation require us to be able to relax the goal of semantic transparency. The HTTP/1.1 protocol allows origin servers, caches, Fielding, et al. Standards Track [Page 74] RFC 2616 HTTP/1.1 June 1999 and clients to explicitly reduce transparency when necessary. However, because non-transparent operation may confuse non-expert users, and might be incompatible with certain server applications (such as those for ordering merchandise), the protocol requires that transparency be relaxed - only by an explicit protocol-level request when relaxed by client or origin server - only with an explicit warning to the end user when relaxed by cache or client Therefore, the HTTP/1.1 protocol provides these important elements: 1. Protocol features that provide full semantic transparency when this is required by all parties. 2. Protocol features that allow an origin server or user agent to explicitly request and control non-transparent operation. 3. Protocol features that allow a cache to attach warnings to responses that do not preserve the requested approximation of semantic transparency. A basic principle is that it must be possible for the clients to detect any potential relaxation of semantic transparency. Note: The server, cache, or client implementor might be faced with design decisions not explicitly discussed in this specification. If a decision might affect semantic transparency, the implementor ought to err on the side of maintaining transparency unless a careful and complete analysis shows significant benefits in breaking transparency. 13.1.1 Cache Correctness A correct cache MUST respond to a request with the most up-to-date response held by the cache that is appropriate to the request (see sections 13.2.5, 13.2.6, and 13.12) which meets one of the following conditions: 1. It has been checked for equivalence with what the origin server would have returned by revalidating the response with the origin server (section 13.3); Fielding, et al. Standards Track [Page 75] RFC 2616 HTTP/1.1 June 1999 2. It is "fresh enough" (see section 13.2). In the default case, this means it meets the least restrictive freshness requirement of the client, origin server, and cache (see section 14.9); if the origin server so specifies, it is the freshness requirement of the origin server alone. If a stored response is not "fresh enough" by the most restrictive freshness requirement of both the client and the origin server, in carefully considered circumstances the cache MAY still return the response with the appropriate Warning header (see section 13.1.5 and 14.46), unless such a response is prohibited (e.g., by a "no-store" cache-directive, or by a "no-cache" cache-request-directive; see section 14.9). 3. It is an appropriate 304 (Not Modified), 305 (Proxy Redirect), or error (4xx or 5xx) response message. If the cache can not communicate with the origin server, then a correct cache SHOULD respond as above if the response can be correctly served from the cache; if not it MUST return an error or warning indicating that there was a communication failure. If a cache receives a response (either an entire response, or a 304 (Not Modified) response) that it would normally forward to the requesting client, and the received response is no longer fresh, the cache SHOULD forward it to the requesting client without adding a new Warning (but without removing any existing Warning headers). A cache SHOULD NOT attempt to revalidate a response simply because that response became stale in transit; this might lead to an infinite loop. A user agent that receives a stale response without a Warning MAY display a warning indication to the user. 13.1.2 Warnings Whenever a cache returns a response that is neither first-hand nor "fresh enough" (in the sense of condition 2 in section 13.1.1), it MUST attach a warning to that effect, using a Warning general-header. The Warning header and the currently defined warnings are described in section 14.46. The warning allows clients to take appropriate action. Warnings MAY be used for other purposes, both cache-related and otherwise. The use of a warning, rather than an error status code, distinguish these responses from true failures. Warnings are assigned three digit warn-codes. The first digit indicates whether the Warning MUST or MUST NOT be deleted from a stored cache entry after a successful revalidation: Fielding, et al. Standards Track [Page 76] RFC 2616 HTTP/1.1 June 1999 1xx Warnings that describe the freshness or revalidation status of the response, and so MUST be deleted after a successful revalidation. 1XX warn-codes MAY be generated by a cache only when validating a cached entry. It MUST NOT be generated by clients. 2xx Warnings that describe some aspect of the entity body or entity headers that is not rectified by a revalidation (for example, a lossy compression of the entity bodies) and which MUST NOT be deleted after a successful revalidation. See section 14.46 for the definitions of the codes themselves. HTTP/1.0 caches will cache all Warnings in responses, without deleting the ones in the first category. Warnings in responses that are passed to HTTP/1.0 caches carry an extra warning-date field, which prevents a future HTTP/1.1 recipient from believing an erroneously cached Warning. Warnings also carry a warning text. The text MAY be in any appropriate natural language (perhaps based on the client's Accept headers), and include an OPTIONAL indication of what character set is used. Multiple warnings MAY be attached to a response (either by the origin server or by a cache), including multiple warnings with the same code number. For example, a server might provide the same warning with texts in both English and Basque. When multiple warnings are attached to a response, it might not be practical or reasonable to display all of them to the user. This version of HTTP does not specify strict priority rules for deciding which warnings to display and in what order, but does suggest some heuristics. 13.1.3 Cache-control Mechanisms The basic cache mechanisms in HTTP/1.1 (server-specified expiration times and validators) are implicit directives to caches. In some cases, a server or client might need to provide explicit directives to the HTTP caches. We use the Cache-Control header for this purpose. The Cache-Control header allows a client or server to transmit a variety of directives in either requests or responses. These directives typically override the default caching algorithms. As a general rule, if there is any apparent conflict between header values, the most restrictive interpretation is applied (that is, the one that is most likely to preserve semantic transparency). However, Fielding, et al. Standards Track [Page 77] RFC 2616 HTTP/1.1 June 1999 in some cases, cache-control directives are explicitly specified as weakening the approximation of semantic transparency (for example, "max-stale" or "public"). The cache-control directives are described in detail in section 14.9. 13.1.4 Explicit User Agent Warnings Many user agents make it possible for users to override the basic caching mechanisms. For example, the user agent might allow the user to specify that cached entities (even explicitly stale ones) are never validated. Or the user agent might habitually add "Cache- Control: max-stale=3600" to every request. The user agent SHOULD NOT default to either non-transparent behavior, or behavior that results in abnormally ineffective caching, but MAY be explicitly configured to do so by an explicit action of the user. If the user has overridden the basic caching mechanisms, the user agent SHOULD explicitly indicate to the user whenever this results in the display of information that might not meet the server's transparency requirements (in particular, if the displayed entity is known to be stale). Since the protocol normally allows the user agent to determine if responses are stale or not, this indication need only be displayed when this actually happens. The indication need not be a dialog box; it could be an icon (for example, a picture of a rotting fish) or some other indicator. If the user has overridden the caching mechanisms in a way that would abnormally reduce the effectiveness of caches, the user agent SHOULD continually indicate this state to the user (for example, by a display of a picture of currency in flames) so that the user does not inadvertently consume excess resources or suffer from excessive latency. 13.1.5 Exceptions to the Rules and Warnings In some cases, the operator of a cache MAY choose to configure it to return stale responses even when not requested by clients. This decision ought not be made lightly, but may be necessary for reasons of availability or performance, especially when the cache is poorly connected to the origin server. Whenever a cache returns a stale response, it MUST mark it as such (using a Warning header) enabling the client software to alert the user that there might be a potential problem. Fielding, et al. Standards Track [Page 78] RFC 2616 HTTP/1.1 June 1999 It also allows the user agent to take steps to obtain a first-hand or fresh response. For this reason, a cache SHOULD NOT return a stale response if the client explicitly requests a first-hand or fresh one, unless it is impossible to comply for technical or policy reasons. 13.1.6 Client-controlled Behavior While the origin server (and to a lesser extent, intermediate caches, by their contribution to the age of a response) are the primary source of expiration information, in some cases the client might need to control a cache's decision about whether to return a cached response without validating it. Clients do this using several directives of the Cache-Control header. A client's request MAY specify the maximum age it is willing to accept of an unvalidated response; specifying a value of zero forces the cache(s) to revalidate all responses. A client MAY also specify the minimum time remaining before a response expires. Both of these options increase constraints on the behavior of caches, and so cannot further relax the cache's approximation of semantic transparency. A client MAY also specify that it will accept stale responses, up to some maximum amount of staleness. This loosens the constraints on the caches, and so might violate the origin server's specified constraints on semantic transparency, but might be necessary to support disconnected operation, or high availability in the face of poor connectivity. 13.2 Expiration Model 13.2.1 Server-Specified Expiration HTTP caching works best when caches can entirely avoid making requests to the origin server. The primary mechanism for avoiding requests is for an origin server to provide an explicit expiration time in the future, indicating that a response MAY be used to satisfy subsequent requests. In other words, a cache can return a fresh response without first contacting the server. Our expectation is that servers will assign future explicit expiration times to responses in the belief that the entity is not likely to change, in a semantically significant way, before the expiration time is reached. This normally preserves semantic transparency, as long as the server's expiration times are carefully chosen. Fielding, et al. Standards Track [Page 79] RFC 2616 HTTP/1.1 June 1999 The expiration mechanism applies only to responses taken from a cache and not to first-hand responses forwarded immediately to the requesting client. If an origin server wishes to force a semantically transparent cache to validate every request, it MAY assign an explicit expiration time in the past. This means that the response is always stale, and so the cache SHOULD validate it before using it for subsequent requests. See section 14.9.4 for a more restrictive way to force revalidation. If an origin server wishes to force any HTTP/1.1 cache, no matter how it is configured, to validate every request, it SHOULD use the "must- revalidate" cache-control directive (see section 14.9). Servers specify explicit expiration times using either the Expires header, or the max-age directive of the Cache-Control header. An expiration time cannot be used to force a user agent to refresh its display or reload a resource; its semantics apply only to caching mechanisms, and such mechanisms need only check a resource's expiration status when a new request for that resource is initiated. See section 13.13 for an explanation of the difference between caches and history mechanisms. 13.2.2 Heuristic Expiration Since origin servers do not always provide explicit expiration times, HTTP caches typically assign heuristic expiration times, employing algorithms that use other header values (such as the Last-Modified time) to estimate a plausible expiration time. The HTTP/1.1 specification does not provide specific algorithms, but does impose worst-case constraints on their results. Since heuristic expiration times might compromise semantic transparency, they ought to used cautiously, and we encourage origin servers to provide explicit expiration times as much as possible. 13.2.3 Age Calculations In order to know if a cached entry is fresh, a cache needs to know if its age exceeds its freshness lifetime. We discuss how to calculate the latter in section 13.2.4; this section describes how to calculate the age of a response or cache entry. In this discussion, we use the term "now" to mean "the current value of the clock at the host performing the calculation." Hosts that use HTTP, but especially hosts running origin servers and caches, SHOULD use NTP [28] or some similar protocol to synchronize their clocks to a globally accurate time standard. Fielding, et al. Standards Track [Page 80] RFC 2616 HTTP/1.1 June 1999 HTTP/1.1 requires origin servers to send a Date header, if possible, with every response, giving the time at which the response was generated (see section 14.18). We use the term "date_value" to denote the value of the Date header, in a form appropriate for arithmetic operations. HTTP/1.1 uses the Age response-header to convey the estimated age of the response message when obtained from a cache. The Age field value is the cache's estimate of the amount of time since the response was generated or revalidated by the origin server. In essence, the Age value is the sum of the time that the response has been resident in each of the caches along the path from the origin server, plus the amount of time it has been in transit along network paths. We use the term "age_value" to denote the value of the Age header, in a form appropriate for arithmetic operations. A response's age can be calculated in two entirely independent ways: 1. now minus date_value, if the local clock is reasonably well synchronized to the origin server's clock. If the result is negative, the result is replaced by zero. 2. age_value, if all of the caches along the response path implement HTTP/1.1. Given that we have two independent ways to compute the age of a response when it is received, we can combine these as corrected_received_age = max(now - date_value, age_value) and as long as we have either nearly synchronized clocks or all- HTTP/1.1 paths, one gets a reliable (conservative) result. Because of network-imposed delays, some significant interval might pass between the time that a server generates a response and the time it is received at the next outbound cache or client. If uncorrected, this delay could result in improperly low ages. Because the request that resulted in the returned Age value must have been initiated prior to that Age value's generation, we can correct for delays imposed by the network by recording the time at which the request was initiated. Then, when an Age value is received, it MUST be interpreted relative to the time the request was initiated, not Fielding, et al. Standards Track [Page 81] RFC 2616 HTTP/1.1 June 1999 the time that the response was received. This algorithm results in conservative behavior no matter how much delay is experienced. So, we compute: corrected_initial_age = corrected_received_age + (now - request_time) where "request_time" is the time (according to the local clock) when the request that elicited this response was sent. Summary of age calculation algorithm, when a cache receives a response: /* * age_value * is the value of Age: header received by the cache with * this response. * date_value * is the value of the origin server's Date: header * request_time * is the (local) time when the cache made the request * that resulted in this cached response * response_time * is the (local) time when the cache received the * response * now * is the current (local) time */ apparent_age = max(0, response_time - date_value); corrected_received_age = max(apparent_age, age_value); response_delay = response_time - request_time; corrected_initial_age = corrected_received_age + response_delay; resident_time = now - response_time; current_age = corrected_initial_age + resident_time; The current_age of a cache entry is calculated by adding the amount of time (in seconds) since the cache entry was last validated by the origin server to the corrected_initial_age. When a response is generated from a cache entry, the cache MUST include a single Age header field in the response with a value equal to the cache entry's current_age. The presence of an Age header field in a response implies that a response is not first-hand. However, the converse is not true, since the lack of an Age header field in a response does not imply that the Fielding, et al. Standards Track [Page 82] RFC 2616 HTTP/1.1 June 1999 response is first-hand unless all caches along the request path are compliant with HTTP/1.1 (i.e., older HTTP caches did not implement the Age header field). 13.2.4 Expiration Calculations In order to decide whether a response is fresh or stale, we need to compare its freshness lifetime to its age. The age is calculated as described in section 13.2.3; this section describes how to calculate the freshness lifetime, and to determine if a response has expired. In the discussion below, the values can be represented in any form appropriate for arithmetic operations. We use the term "expires_value" to denote the value of the Expires header. We use the term "max_age_value" to denote an appropriate value of the number of seconds carried by the "max-age" directive of the Cache-Control header in a response (see section 14.9.3). The max-age directive takes priority over Expires, so if max-age is present in a response, the calculation is simply: freshness_lifetime = max_age_value Otherwise, if Expires is present in the response, the calculation is: freshness_lifetime = expires_value - date_value Note that neither of these calculations is vulnerable to clock skew, since all of the information comes from the origin server. If none of Expires, Cache-Control: max-age, or Cache-Control: s- maxage (see section 14.9.3) appears in the response, and the response does not include other restrictions on caching, the cache MAY compute a freshness lifetime using a heuristic. The cache MUST attach Warning 113 to any response whose age is more than 24 hours if such warning has not already been added. Also, if the response does have a Last-Modified time, the heuristic expiration value SHOULD be no more than some fraction of the interval since that time. A typical setting of this fraction might be 10%. The calculation to determine if a response has expired is quite simple: response_is_fresh = (freshness_lifetime > current_age) Fielding, et al. Standards Track [Page 83] RFC 2616 HTTP/1.1 June 1999 13.2.5 Disambiguating Expiration Values Because expiration values are assigned optimistically, it is possible for two caches to contain fresh values for the same resource that are different. If a client performing a retrieval receives a non-first-hand response for a request that was already fresh in its own cache, and the Date header in its existing cache entry is newer than the Date on the new response, then the client MAY ignore the response. If so, it MAY retry the request with a "Cache-Control: max-age=0" directive (see section 14.9), to force a check with the origin server. If a cache has two fresh responses for the same representation with different validators, it MUST use the one with the more recent Date header. This situation might arise because the cache is pooling responses from other caches, or because a client has asked for a reload or a revalidation of an apparently fresh cache entry. 13.2.6 Disambiguating Multiple Responses Because a client might be receiving responses via multiple paths, so that some responses flow through one set of caches and other responses flow through a different set of caches, a client might receive responses in an order different from that in which the origin server sent them. We would like the client to use the most recently generated response, even if older responses are still apparently fresh. Neither the entity tag nor the expiration value can impose an ordering on responses, since it is possible that a later response intentionally carries an earlier expiration time. The Date values are ordered to a granularity of one second. When a client tries to revalidate a cache entry, and the response it receives contains a Date header that appears to be older than the one for the existing entry, then the client SHOULD repeat the request unconditionally, and include Cache-Control: max-age=0 to force any intermediate caches to validate their copies directly with the origin server, or Cache-Control: no-cache to force any intermediate caches to obtain a new copy from the origin server. Fielding, et al. Standards Track [Page 84] RFC 2616 HTTP/1.1 June 1999 If the Date values are equal, then the client MAY use either response (or MAY, if it is being extremely prudent, request a new response). Servers MUST NOT depend on clients being able to choose deterministically between responses generated during the same second, if their expiration times overlap. 13.3 Validation Model When a cache has a stale entry that it would like to use as a response to a client's request, it first has to check with the origin server (or possibly an intermediate cache with a fresh response) to see if its cached entry is still usable. We call this "validating" the cache entry. Since we do not want to have to pay the overhead of retransmitting the full response if the cached entry is good, and we do not want to pay the overhead of an extra round trip if the cached entry is invalid, the HTTP/1.1 protocol supports the use of conditional methods. The key protocol features for supporting conditional methods are those concerned with "cache validators." When an origin server generates a full response, it attaches some sort of validator to it, which is kept with the cache entry. When a client (user agent or proxy cache) makes a conditional request for a resource for which it has a cache entry, it includes the associated validator in the request. The server then checks that validator against the current validator for the entity, and, if they match (see section 13.3.3), it responds with a special status code (usually, 304 (Not Modified)) and no entity-body. Otherwise, it returns a full response (including entity-body). Thus, we avoid transmitting the full response if the validator matches, and we avoid an extra round trip if it does not match. In HTTP/1.1, a conditional request looks exactly the same as a normal request for the same resource, except that it carries a special header (which includes the validator) that implicitly turns the method (usually, GET) into a conditional. The protocol includes both positive and negative senses of cache- validating conditions. That is, it is possible to request either that a method be performed if and only if a validator matches or if and only if no validators match. Fielding, et al. Standards Track [Page 85] RFC 2616 HTTP/1.1 June 1999 Note: a response that lacks a validator may still be cached, and served from cache until it expires, unless this is explicitly prohibited by a cache-control directive. However, a cache cannot do a conditional retrieval if it does not have a validator for the entity, which means it will not be refreshable after it expires. 13.3.1 Last-Modified Dates The Last-Modified entity-header field value is often used as a cache validator. In simple terms, a cache entry is considered to be valid if the entity has not been modified since the Last-Modified value. 13.3.2 Entity Tag Cache Validators The ETag response-header field value, an entity tag, provides for an "opaque" cache validator. This might allow more reliable validation in situations where it is inconvenient to store modification dates, where the one-second resolution of HTTP date values is not sufficient, or where the origin server wishes to avoid certain paradoxes that might arise from the use of modification dates. Entity Tags are described in section 3.11. The headers used with entity tags are described in sections 14.19, 14.24, 14.26 and 14.44. 13.3.3 Weak and Strong Validators Since both origin servers and caches will compare two validators to decide if they represent the same or different entities, one normally would expect that if the entity (the entity-body or any entity- headers) changes in any way, then the associated validator would change as well. If this is true, then we call this validator a "strong validator." However, there might be cases when a server prefers to change the validator only on semantically significant changes, and not when insignificant aspects of the entity change. A validator that does not always change when the resource changes is a "weak validator." Entity tags are normally "strong validators," but the protocol provides a mechanism to tag an entity tag as "weak." One can think of a strong validator as one that changes whenever the bits of an entity changes, while a weak value changes whenever the meaning of an entity changes. Alternatively, one can think of a strong validator as part of an identifier for a specific entity, while a weak validator is part of an identifier for a set of semantically equivalent entities. Note: One example of a strong validator is an integer that is incremented in stable storage every time an entity is changed. Fielding, et al. Standards Track [Page 86] RFC 2616 HTTP/1.1 June 1999 An entity's modification time, if represented with one-second resolution, could be a weak validator, since it is possible that the resource might be modified twice during a single second. Support for weak validators is optional. However, weak validators allow for more efficient caching of equivalent objects; for example, a hit counter on a site is probably good enough if it is updated every few days or weeks, and any value during that period is likely "good enough" to be equivalent. A "use" of a validator is either when a client generates a request and includes the validator in a validating header field, or when a server compares two validators. Strong validators are usable in any context. Weak validators are only usable in contexts that do not depend on exact equality of an entity. For example, either kind is usable for a conditional GET of a full entity. However, only a strong validator is usable for a sub-range retrieval, since otherwise the client might end up with an internally inconsistent entity. Clients MAY issue simple (non-subrange) GET requests with either weak validators or strong validators. Clients MUST NOT use weak validators in other forms of request. The only function that the HTTP/1.1 protocol defines on validators is comparison. There are two validator comparison functions, depending on whether the comparison context allows the use of weak validators or not: - The strong comparison function: in order to be considered equal, both validators MUST be identical in every way, and both MUST NOT be weak. - The weak comparison function: in order to be considered equal, both validators MUST be identical in every way, but either or both of them MAY be tagged as "weak" without affecting the result. An entity tag is strong unless it is explicitly tagged as weak. Section 3.11 gives the syntax for entity tags. A Last-Modified time, when used as a validator in a request, is implicitly weak unless it is possible to deduce that it is strong, using the following rules: - The validator is being compared by an origin server to the actual current validator for the entity and, Fielding, et al. Standards Track [Page 87] RFC 2616 HTTP/1.1 June 1999 - That origin server reliably knows that the associated entity did not change twice during the second covered by the presented validator. or - The validator is about to be used by a client in an If- Modified-Since or If-Unmodified-Since header, because the client has a cache entry for the associated entity, and - That cache entry includes a Date value, which gives the time when the origin server sent the original response, and - The presented Last-Modified time is at least 60 seconds before the Date value. or - The validator is being compared by an intermediate cache to the validator stored in its cache entry for the entity, and - That cache entry includes a Date value, which gives the time when the origin server sent the original response, and - The presented Last-Modified time is at least 60 seconds before the Date value. This method relies on the fact that if two different responses were sent by the origin server during the same second, but both had the same Last-Modified time, then at least one of those responses would have a Date value equal to its Last-Modified time. The arbitrary 60- second limit guards against the possibility that the Date and Last- Modified values are generated from different clocks, or at somewhat different times during the preparation of the response. An implementation MAY use a value larger than 60 seconds, if it is believed that 60 seconds is too short. If a client wishes to perform a sub-range retrieval on a value for which it has only a Last-Modified time and no opaque validator, it MAY do this only if the Last-Modified time is strong in the sense described here. A cache or origin server receiving a conditional request, other than a full-body GET request, MUST use the strong comparison function to evaluate the condition. These rules allow HTTP/1.1 caches and clients to safely perform sub- range retrievals on values that have been obtained from HTTP/1.0 Fielding, et al. Standards Track [Page 88] RFC 2616 HTTP/1.1 June 1999 servers. 13.3.4 Rules for When to Use Entity Tags and Last-Modified Dates We adopt a set of rules and recommendations for origin servers, clients, and caches regarding when various validator types ought to be used, and for what purposes. HTTP/1.1 origin servers: - SHOULD send an entity tag validator unless it is not feasible to generate one. - MAY send a weak entity tag instead of a strong entity tag, if performance considerations support the use of weak entity tags, or if it is unfeasible to send a strong entity tag. - SHOULD send a Last-Modified value if it is feasible to send one, unless the risk of a breakdown in semantic transparency that could result from using this date in an If-Modified-Since header would lead to serious problems. In other words, the preferred behavior for an HTTP/1.1 origin server is to send both a strong entity tag and a Last-Modified value. In order to be legal, a strong entity tag MUST change whenever the associated entity value changes in any way. A weak entity tag SHOULD change whenever the associated entity changes in a semantically significant way. Note: in order to provide semantically transparent caching, an origin server must avoid reusing a specific strong entity tag value for two different entities, or reusing a specific weak entity tag value for two semantically different entities. Cache entries might persist for arbitrarily long periods, regardless of expiration times, so it might be inappropriate to expect that a cache will never again attempt to validate an entry using a validator that it obtained at some point in the past. HTTP/1.1 clients: - If an entity tag has been provided by the origin server, MUST use that entity tag in any cache-conditional request (using If- Match or If-None-Match). - If only a Last-Modified value has been provided by the origin server, SHOULD use that value in non-subrange cache-conditional requests (using If-Modified-Since). Fielding, et al. Standards Track [Page 89] RFC 2616 HTTP/1.1 June 1999 - If only a Last-Modified value has been provided by an HTTP/1.0 origin server, MAY use that value in subrange cache-conditional requests (using If-Unmodified-Since:). The user agent SHOULD provide a way to disable this, in case of difficulty. - If both an entity tag and a Last-Modified value have been provided by the origin server, SHOULD use both validators in cache-conditional requests. This allows both HTTP/1.0 and HTTP/1.1 caches to respond appropriately. An HTTP/1.1 origin server, upon receiving a conditional request that includes both a Last-Modified date (e.g., in an If-Modified-Since or If-Unmodified-Since header field) and one or more entity tags (e.g., in an If-Match, If-None-Match, or If-Range header field) as cache validators, MUST NOT return a response status of 304 (Not Modified) unless doing so is consistent with all of the conditional header fields in the request. An HTTP/1.1 caching proxy, upon receiving a conditional request that includes both a Last-Modified date and one or more entity tags as cache validators, MUST NOT return a locally cached response to the client unless that cached response is consistent with all of the conditional header fields in the request. Note: The general principle behind these rules is that HTTP/1.1 servers and clients should transmit as much non-redundant information as is available in their responses and requests. HTTP/1.1 systems receiving this information will make the most conservative assumptions about the validators they receive. HTTP/1.0 clients and caches will ignore entity tags. Generally, last-modified values received or used by these systems will support transparent and efficient caching, and so HTTP/1.1 origin servers should provide Last-Modified values. In those rare cases where the use of a Last-Modified value as a validator by an HTTP/1.0 system could result in a serious problem, then HTTP/1.1 origin servers should not provide one. 13.3.5 Non-validating Conditionals The principle behind entity tags is that only the service author knows the semantics of a resource well enough to select an appropriate cache validation mechanism, and the specification of any validator comparison function more complex than byte-equality would open up a can of worms. Thus, comparisons of any other headers (except Last-Modified, for compatibility with HTTP/1.0) are never used for purposes of validating a cache entry. Fielding, et al. Standards Track [Page 90] RFC 2616 HTTP/1.1 June 1999 13.4 Response Cacheability Unless specifically constrained by a cache-control (section 14.9) directive, a caching system MAY always store a successful response (see section 13.8) as a cache entry, MAY return it without validation if it is fresh, and MAY return it after successful validation. If there is neither a cache validator nor an explicit expiration time associated with a response, we do not expect it to be cached, but certain caches MAY violate this expectation (for example, when little or no network connectivity is available). A client can usually detect that such a response was taken from a cache by comparing the Date header to the current time. Note: some HTTP/1.0 caches are known to violate this expectation without providing any Warning. However, in some cases it might be inappropriate for a cache to retain an entity, or to return it in response to a subsequent request. This might be because absolute semantic transparency is deemed necessary by the service author, or because of security or privacy considerations. Certain cache-control directives are therefore provided so that the server can indicate that certain resource entities, or portions thereof, are not to be cached regardless of other considerations. Note that section 14.8 normally prevents a shared cache from saving and returning a response to a previous request if that request included an Authorization header. A response received with a status code of 200, 203, 206, 300, 301 or 410 MAY be stored by a cache and used in reply to a subsequent request, subject to the expiration mechanism, unless a cache-control directive prohibits caching. However, a cache that does not support the Range and Content-Range headers MUST NOT cache 206 (Partial Content) responses. A response received with any other status code (e.g. status codes 302 and 307) MUST NOT be returned in a reply to a subsequent request unless there are cache-control directives or another header(s) that explicitly allow it. For example, these include the following: an Expires header (section 14.21); a "max-age", "s-maxage", "must- revalidate", "proxy-revalidate", "public" or "private" cache-control directive (section 14.9). Fielding, et al. Standards Track [Page 91] RFC 2616 HTTP/1.1 June 1999 13.5 Constructing Responses From Caches The purpose of an HTTP cache is to store information received in response to requests for use in responding to future requests. In many cases, a cache simply returns the appropriate parts of a response to the requester. However, if the cache holds a cache entry based on a previous response, it might have to combine parts of a new response with what is held in the cache entry. 13.5.1 End-to-end and Hop-by-hop Headers For the purpose of defining the behavior of caches and non-caching proxies, we divide HTTP headers into two categories: - End-to-end headers, which are transmitted to the ultimate recipient of a request or response. End-to-end headers in responses MUST be stored as part of a cache entry and MUST be transmitted in any response formed from a cache entry. - Hop-by-hop headers, which are meaningful only for a single transport-level connection, and are not stored by caches or forwarded by proxies. The following HTTP/1.1 headers are hop-by-hop headers: - Connection - Keep-Alive - Proxy-Authenticate - Proxy-Authorization - TE - Trailers - Transfer-Encoding - Upgrade All other headers defined by HTTP/1.1 are end-to-end headers. Other hop-by-hop headers MUST be listed in a Connection header, (section 14.10) to be introduced into HTTP/1.1 (or later). 13.5.2 Non-modifiable Headers Some features of the HTTP/1.1 protocol, such as Digest Authentication, depend on the value of certain end-to-end headers. A transparent proxy SHOULD NOT modify an end-to-end header unless the definition of that header requires or specifically allows that. Fielding, et al. Standards Track [Page 92] RFC 2616 HTTP/1.1 June 1999 A transparent proxy MUST NOT modify any of the following fields in a request or response, and it MUST NOT add any of these fields if not already present: - Content-Location - Content-MD5 - ETag - Last-Modified A transparent proxy MUST NOT modify any of the following fields in a response: - Expires but it MAY add any of these fields if not already present. If an Expires header is added, it MUST be given a field-value identical to that of the Date header in that response. A proxy MUST NOT modify or add any of the following fields in a message that contains the no-transform cache-control directive, or in any request: - Content-Encoding - Content-Range - Content-Type A non-transparent proxy MAY modify or add these fields to a message that does not include no-transform, but if it does so, it MUST add a Warning 214 (Transformation applied) if one does not already appear in the message (see section 14.46). Warning: unnecessary modification of end-to-end headers might cause authentication failures if stronger authentication mechanisms are introduced in later versions of HTTP. Such authentication mechanisms MAY rely on the values of header fields not listed here. The Content-Length field of a request or response is added or deleted according to the rules in section 4.4. A transparent proxy MUST preserve the entity-length (section 7.2.2) of the entity-body, although it MAY change the transfer-length (section 4.4). Fielding, et al. Standards Track [Page 93] RFC 2616 HTTP/1.1 June 1999 13.5.3 Combining Headers When a cache makes a validating request to a server, and the server provides a 304 (Not Modified) response or a 206 (Partial Content) response, the cache then constructs a response to send to the requesting client. If the status code is 304 (Not Modified), the cache uses the entity- body stored in the cache entry as the entity-body of this outgoing response. If the status code is 206 (Partial Content) and the ETag or Last-Modified headers match exactly, the cache MAY combine the contents stored in the cache entry with the new contents received in the response and use the result as the entity-body of this outgoing response, (see 13.5.4). The end-to-end headers stored in the cache entry are used for the constructed response, except that - any stored Warning headers with warn-code 1xx (see section 14.46) MUST be deleted from the cache entry and the forwarded response. - any stored Warning headers with warn-code 2xx MUST be retained in the cache entry and the forwarded response. - any end-to-end headers provided in the 304 or 206 response MUST replace the corresponding headers from the cache entry. Unless the cache decides to remove the cache entry, it MUST also replace the end-to-end headers stored with the cache entry with corresponding headers received in the incoming response, except for Warning headers as described immediately above. If a header field- name in the incoming response matches more than one header in the cache entry, all such old headers MUST be replaced. In other words, the set of end-to-end headers received in the incoming response overrides all corresponding end-to-end headers stored with the cache entry (except for stored Warning headers with warn-code 1xx, which are deleted even if not overridden). Note: this rule allows an origin server to use a 304 (Not Modified) or a 206 (Partial Content) response to update any header associated with a previous response for the same entity or sub- ranges thereof, although it might not always be meaningful or correct to do so. This rule does not allow an origin server to use a 304 (Not Modified) or a 206 (Partial Content) response to entirely delete a header that it had provided with a previous response. Fielding, et al. Standards Track [Page 94] RFC 2616 HTTP/1.1 June 1999 13.5.4 Combining Byte Ranges A response might transfer only a subrange of the bytes of an entity- body, either because the request included one or more Range specifications, or because a connection was broken prematurely. After several such transfers, a cache might have received several ranges of the same entity-body. If a cache has a stored non-empty set of subranges for an entity, and an incoming response transfers another subrange, the cache MAY combine the new subrange with the existing set if both the following conditions are met: - Both the incoming response and the cache entry have a cache validator. - The two cache validators match using the strong comparison function (see section 13.3.3). If either requirement is not met, the cache MUST use only the most recent partial response (based on the Date values transmitted with every response, and using the incoming response if these values are equal or missing), and MUST discard the other partial information. 13.6 Caching Negotiated Responses Use of server-driven content negotiation (section 12.1), as indicated by the presence of a Vary header field in a response, alters the conditions and procedure by which a cache can use the response for subsequent requests. See section 14.44 for use of the Vary header field by servers. A server SHOULD use the Vary header field to inform a cache of what request-header fields were used to select among multiple representations of a cacheable response subject to server-driven negotiation. The set of header fields named by the Vary field value is known as the "selecting" request-headers. When the cache receives a subsequent request whose Request-URI specifies one or more cache entries including a Vary header field, the cache MUST NOT use such a cache entry to construct a response to the new request unless all of the selecting request-headers present in the new request match the corresponding stored request-headers in the original request. The selecting request-headers from two requests are defined to match if and only if the selecting request-headers in the first request can be transformed to the selecting request-headers in the second request Fielding, et al. Standards Track [Page 95] RFC 2616 HTTP/1.1 June 1999 by adding or removing linear white space (LWS) at places where this is allowed by the corresponding BNF, and/or combining multiple message-header fields with the same field name following the rules about message headers in section 4.2. A Vary header field-value of "*" always fails to match and subsequent requests on that resource can only be properly interpreted by the origin server. If the selecting request header fields for the cached entry do not match the selecting request header fields of the new request, then the cache MUST NOT use a cached entry to satisfy the request unless it first relays the new request to the origin server in a conditional request and the server responds with 304 (Not Modified), including an entity tag or Content-Location that indicates the entity to be used. If an entity tag was assigned to a cached representation, the forwarded request SHOULD be conditional and include the entity tags in an If-None-Match header field from all its cache entries for the resource. This conveys to the server the set of entities currently held by the cache, so that if any one of these entities matches the requested entity, the server can use the ETag header field in its 304 (Not Modified) response to tell the cache which entry is appropriate. If the entity-tag of the new response matches that of an existing entry, the new response SHOULD be used to update the header fields of the existing entry, and the result MUST be returned to the client. If any of the existing cache entries contains only partial content for the associated entity, its entity-tag SHOULD NOT be included in the If-None-Match header field unless the request is for a range that would be fully satisfied by that entry. If a cache receives a successful response whose Content-Location field matches that of an existing cache entry for the same Request- ]URI, whose entity-tag differs from that of the existing entry, and whose Date is more recent than that of the existing entry, the existing entry SHOULD NOT be returned in response to future requests and SHOULD be deleted from the cache. 13.7 Shared and Non-Shared Caches For reasons of security and privacy, it is necessary to make a distinction between "shared" and "non-shared" caches. A non-shared cache is one that is accessible only to a single user. Accessibility in this case SHOULD be enforced by appropriate security mechanisms. All other caches are considered to be "shared." Other sections of Fielding, et al. Standards Track [Page 96] RFC 2616 HTTP/1.1 June 1999 this specification place certain constraints on the operation of shared caches in order to prevent loss of privacy or failure of access controls. 13.8 Errors or Incomplete Response Cache Behavior A cache that receives an incomplete response (for example, with fewer bytes of data than specified in a Content-Length header) MAY store the response. However, the cache MUST treat this as a partial response. Partial responses MAY be combined as described in section 13.5.4; the result might be a full response or might still be partial. A cache MUST NOT return a partial response to a client without explicitly marking it as such, using the 206 (Partial Content) status code. A cache MUST NOT return a partial response using a status code of 200 (OK). If a cache receives a 5xx response while attempting to revalidate an entry, it MAY either forward this response to the requesting client, or act as if the server failed to respond. In the latter case, it MAY return a previously received response unless the cached entry includes the "must-revalidate" cache-control directive (see section 14.9). 13.9 Side Effects of GET and HEAD Unless the origin server explicitly prohibits the caching of their responses, the application of GET and HEAD methods to any resources SHOULD NOT have side effects that would lead to erroneous behavior if these responses are taken from a cache. They MAY still have side effects, but a cache is not required to consider such side effects in its caching decisions. Caches are always expected to observe an origin server's explicit restrictions on caching. We note one exception to this rule: since some applications have traditionally used GETs and HEADs with query URLs (those containing a "?" in the rel_path part) to perform operations with significant side effects, caches MUST NOT treat responses to such URIs as fresh unless the server provides an explicit expiration time. This specifically means that responses from HTTP/1.0 servers for such URIs SHOULD NOT be taken from a cache. See section 9.1.1 for related information. 13.10 Invalidation After Updates or Deletions The effect of certain methods performed on a resource at the origin server might cause one or more existing cache entries to become non- transparently invalid. That is, although they might continue to be "fresh," they do not accurately reflect what the origin server would return for a new request on that resource. Fielding, et al. Standards Track [Page 97] RFC 2616 HTTP/1.1 June 1999 There is no way for the HTTP protocol to guarantee that all such cache entries are marked invalid. For example, the request that caused the change at the origin server might not have gone through the proxy where a cache entry is stored. However, several rules help reduce the likelihood of erroneous behavior. In this section, the phrase "invalidate an entity" means that the cache will either remove all instances of that entity from its storage, or will mark these as "invalid" and in need of a mandatory revalidation before they can be returned in response to a subsequent request. Some HTTP methods MUST cause a cache to invalidate an entity. This is either the entity referred to by the Request-URI, or by the Location or Content-Location headers (if present). These methods are: - PUT - DELETE - POST In order to prevent denial of service attacks, an invalidation based on the URI in a Location or Content-Location header MUST only be performed if the host part is the same as in the Request-URI. A cache that passes through requests for methods it does not understand SHOULD invalidate any entities referred to by the Request-URI. 13.11 Write-Through Mandatory All methods that might be expected to cause modifications to the origin server's resources MUST be written through to the origin server. This currently includes all methods except for GET and HEAD. A cache MUST NOT reply to such a request from a client before having transmitted the request to the inbound server, and having received a corresponding response from the inbound server. This does not prevent a proxy cache from sending a 100 (Continue) response before the inbound server has sent its final reply. The alternative (known as "write-back" or "copy-back" caching) is not allowed in HTTP/1.1, due to the difficulty of providing consistent updates and the problems arising from server, cache, or network failure prior to write-back. Fielding, et al. Standards Track [Page 98] RFC 2616 HTTP/1.1 June 1999 13.12 Cache Replacement If a new cacheable (see sections 14.9.2, 13.2.5, 13.2.6 and 13.8) response is received from a resource while any existing responses for the same resource are cached, the cache SHOULD use the new response to reply to the current request. It MAY insert it into cache storage and MAY, if it meets all other requirements, use it to respond to any future requests that would previously have caused the old response to be returned. If it inserts the new response into cache storage the rules in section 13.5.3 apply. Note: a new response that has an older Date header value than existing cached responses is not cacheable. 13.13 History Lists User agents often have history mechanisms, such as "Back" buttons and history lists, which can be used to redisplay an entity retrieved earlier in a session. History mechanisms and caches are different. In particular history mechanisms SHOULD NOT try to show a semantically transparent view of the current state of a resource. Rather, a history mechanism is meant to show exactly what the user saw at the time when the resource was retrieved. By default, an expiration time does not apply to history mechanisms. If the entity is still in storage, a history mechanism SHOULD display it even if the entity has expired, unless the user has specifically configured the agent to refresh expired history documents. This is not to be construed to prohibit the history mechanism from telling the user that a view might be stale. Note: if history list mechanisms unnecessarily prevent users from viewing stale resources, this will tend to force service authors to avoid using HTTP expiration controls and cache controls when they would otherwise like to. Service authors may consider it important that users not be presented with error messages or warning messages when they use navigation controls (such as BACK) to view previously fetched resources. Even though sometimes such resources ought not to cached, or ought to expire quickly, user interface considerations may force service authors to resort to other means of preventing caching (e.g. "once-only" URLs) in order not to suffer the effects of improperly functioning history mechanisms. Fielding, et al. Standards Track [Page 99] RFC 2616 HTTP/1.1 June 1999 14 Header Field Definitions This section defines the syntax and semantics of all standard HTTP/1.1 header fields. For entity-header fields, both sender and recipient refer to either the client or the server, depending on who sends and who receives the entity. 14.1 Accept The Accept request-header field can be used to specify certain media types which are acceptable for the response. Accept headers can be used to indicate that the request is specifically limited to a small set of desired types, as in the case of a request for an in-line image. Accept = "Accept" ":" #( media-range [ accept-params ] ) media-range = ( "*/*" | ( type "/" "*" ) | ( type "/" subtype ) ) *( ";" parameter ) accept-params = ";" "q" "=" qvalue *( accept-extension ) accept-extension = ";" token [ "=" ( token | quoted-string ) ] The asterisk "*" character is used to group media types into ranges, with "*/*" indicating all media types and "type/*" indicating all subtypes of that type. The media-range MAY include media type parameters that are applicable to that range. Each media-range MAY be followed by one or more accept-params, beginning with the "q" parameter for indicating a relative quality factor. The first "q" parameter (if any) separates the media-range parameter(s) from the accept-params. Quality factors allow the user or user agent to indicate the relative degree of preference for that media-range, using the qvalue scale from 0 to 1 (section 3.9). The default value is q=1. Note: Use of the "q" parameter name to separate media type parameters from Accept extension parameters is due to historical practice. Although this prevents any media type parameter named "q" from being used with a media range, such an event is believed to be unlikely given the lack of any "q" parameters in the IANA media type registry and the rare usage of any media type parameters in Accept. Future media types are discouraged from registering any parameter named "q". Fielding, et al. Standards Track [Page 100] RFC 2616 HTTP/1.1 June 1999 The example Accept: audio/*; q=0.2, audio/basic SHOULD be interpreted as "I prefer audio/basic, but send me any audio type if it is the best available after an 80% mark-down in quality." If no Accept header field is present, then it is assumed that the client accepts all media types. If an Accept header field is present, and if the server cannot send a response which is acceptable according to the combined Accept field value, then the server SHOULD send a 406 (not acceptable) response. A more elaborate example is Accept: text/plain; q=0.5, text/html, text/x-dvi; q=0.8, text/x-c Verbally, this would be interpreted as "text/html and text/x-c are the preferred media types, but if they do not exist, then send the text/x-dvi entity, and if that does not exist, send the text/plain entity." Media ranges can be overridden by more specific media ranges or specific media types. If more than one media range applies to a given type, the most specific reference has precedence. For example, Accept: text/*, text/html, text/html;level=1, */* have the following precedence: 1) text/html;level=1 2) text/html 3) text/* 4) */* The media type quality factor associated with a given type is determined by finding the media range with the highest precedence which matches that type. For example, Accept: text/*;q=0.3, text/html;q=0.7, text/html;level=1, text/html;level=2;q=0.4, */*;q=0.5 would cause the following values to be associated: text/html;level=1 = 1 text/html = 0.7 text/plain = 0.3 Fielding, et al. Standards Track [Page 101] RFC 2616 HTTP/1.1 June 1999 image/jpeg = 0.5 text/html;level=2 = 0.4 text/html;level=3 = 0.7 Note: A user agent might be provided with a default set of quality values for certain media ranges. However, unless the user agent is a closed system which cannot interact with other rendering agents, this default set ought to be configurable by the user. 14.2 Accept-Charset The Accept-Charset request-header field can be used to indicate what character sets are acceptable for the response. This field allows clients capable of understanding more comprehensive or special- purpose character sets to signal that capability to a server which is capable of representing documents in those character sets. Accept-Charset = "Accept-Charset" ":" 1#( ( charset | "*" )[ ";" "q" "=" qvalue ] ) Character set values are described in section 3.4. Each charset MAY be given an associated quality value which represents the user's preference for that charset. The default value is q=1. An example is Accept-Charset: iso-8859-5, unicode-1-1;q=0.8 The special value "*", if present in the Accept-Charset field, matches every character set (including ISO-8859-1) which is not mentioned elsewhere in the Accept-Charset field. If no "*" is present in an Accept-Charset field, then all character sets not explicitly mentioned get a quality value of 0, except for ISO-8859-1, which gets a quality value of 1 if not explicitly mentioned. If no Accept-Charset header is present, the default is that any character set is acceptable. If an Accept-Charset header is present, and if the server cannot send a response which is acceptable according to the Accept-Charset header, then the server SHOULD send an error response with the 406 (not acceptable) status code, though the sending of an unacceptable response is also allowed. 14.3 Accept-Encoding The Accept-Encoding request-header field is similar to Accept, but restricts the content-codings (section 3.5) that are acceptable in the response. Accept-Encoding = "Accept-Encoding" ":" Fielding, et al. Standards Track [Page 102] RFC 2616 HTTP/1.1 June 1999 1#( codings [ ";" "q" "=" qvalue ] ) codings = ( content-coding | "*" ) Examples of its use are: Accept-Encoding: compress, gzip Accept-Encoding: Accept-Encoding: * Accept-Encoding: compress;q=0.5, gzip;q=1.0 Accept-Encoding: gzip;q=1.0, identity; q=0.5, *;q=0 A server tests whether a content-coding is acceptable, according to an Accept-Encoding field, using these rules: 1. If the content-coding is one of the content-codings listed in the Accept-Encoding field, then it is acceptable, unless it is accompanied by a qvalue of 0. (As defined in section 3.9, a qvalue of 0 means "not acceptable.") 2. The special "*" symbol in an Accept-Encoding field matches any available content-coding not explicitly listed in the header field. 3. If multiple content-codings are acceptable, then the acceptable content-coding with the highest non-zero qvalue is preferred. 4. The "identity" content-coding is always acceptable, unless specifically refused because the Accept-Encoding field includes "identity;q=0", or because the field includes "*;q=0" and does not explicitly include the "identity" content-coding. If the Accept-Encoding field-value is empty, then only the "identity" encoding is acceptable. If an Accept-Encoding field is present in a request, and if the server cannot send a response which is acceptable according to the Accept-Encoding header, then the server SHOULD send an error response with the 406 (Not Acceptable) status code. If no Accept-Encoding field is present in a request, the server MAY assume that the client will accept any content coding. In this case, if "identity" is one of the available content-codings, then the server SHOULD use the "identity" content-coding, unless it has additional information that a different content-coding is meaningful to the client. Note: If the request does not include an Accept-Encoding field, and if the "identity" content-coding is unavailable, then content-codings commonly understood by HTTP/1.0 clients (i.e., Fielding, et al. Standards Track [Page 103] RFC 2616 HTTP/1.1 June 1999 "gzip" and "compress") are preferred; some older clients improperly display messages sent with other content-codings. The server might also make this decision based on information about the particular user-agent or client. Note: Most HTTP/1.0 applications do not recognize or obey qvalues associated with content-codings. This means that qvalues will not work and are not permitted with x-gzip or x-compress. 14.4 Accept-Language The Accept-Language request-header field is similar to Accept, but restricts the set of natural languages that are preferred as a response to the request. Language tags are defined in section 3.10. Accept-Language = "Accept-Language" ":" 1#( language-range [ ";" "q" "=" qvalue ] ) language-range = ( ( 1*8ALPHA *( "-" 1*8ALPHA ) ) | "*" ) Each language-range MAY be given an associated quality value which represents an estimate of the user's preference for the languages specified by that range. The quality value defaults to "q=1". For example, Accept-Language: da, en-gb;q=0.8, en;q=0.7 would mean: "I prefer Danish, but will accept British English and other types of English." A language-range matches a language-tag if it exactly equals the tag, or if it exactly equals a prefix of the tag such that the first tag character following the prefix is "-". The special range "*", if present in the Accept-Language field, matches every tag not matched by any other range present in the Accept-Language field. Note: This use of a prefix matching rule does not imply that language tags are assigned to languages in such a way that it is always true that if a user understands a language with a certain tag, then this user will also understand all languages with tags for which this tag is a prefix. The prefix rule simply allows the use of prefix tags if this is the case. The language quality factor assigned to a language-tag by the Accept-Language field is the quality value of the longest language- range in the field that matches the language-tag. If no language- range in the field matches the tag, the language quality factor assigned is 0. If no Accept-Language header is present in the request, the server Fielding, et al. Standards Track [Page 104] RFC 2616 HTTP/1.1 June 1999 SHOULD assume that all languages are equally acceptable. If an Accept-Language header is present, then all languages which are assigned a quality factor greater than 0 are acceptable. It might be contrary to the privacy expectations of the user to send an Accept-Language header with the complete linguistic preferences of the user in every request. For a discussion of this issue, see section 15.1.4. As intelligibility is highly dependent on the individual user, it is recommended that client applications make the choice of linguistic preference available to the user. If the choice is not made available, then the Accept-Language header field MUST NOT be given in the request. Note: When making the choice of linguistic preference available to the user, we remind implementors of the fact that users are not familiar with the details of language matching as described above, and should provide appropriate guidance. As an example, users might assume that on selecting "en-gb", they will be served any kind of English document if British English is not available. A user agent might suggest in such a case to add "en" to get the best matching behavior. 14.5 Accept-Ranges The Accept-Ranges response-header field allows the server to indicate its acceptance of range requests for a resource: Accept-Ranges = "Accept-Ranges" ":" acceptable-ranges acceptable-ranges = 1#range-unit | "none" Origin servers that accept byte-range requests MAY send Accept-Ranges: bytes but are not required to do so. Clients MAY generate byte-range requests without having received this header for the resource involved. Range units are defined in section 3.12. Servers that do not accept any kind of range request for a resource MAY send Accept-Ranges: none to advise the client not to attempt a range request. Fielding, et al. Standards Track [Page 105] RFC 2616 HTTP/1.1 June 1999 14.6 Age The Age response-header field conveys the sender's estimate of the amount of time since the response (or its revalidation) was generated at the origin server. A cached response is "fresh" if its age does not exceed its freshness lifetime. Age values are calculated as specified in section 13.2.3. Age = "Age" ":" age-value age-value = delta-seconds Age values are non-negative decimal integers, representing time in seconds. If a cache receives a value larger than the largest positive integer it can represent, or if any of its age calculations overflows, it MUST transmit an Age header with a value of 2147483648 (2^31). An HTTP/1.1 server that includes a cache MUST include an Age header field in every response generated from its own cache. Caches SHOULD use an arithmetic type of at least 31 bits of range. 14.7 Allow The Allow entity-header field lists the set of methods supported by the resource identified by the Request-URI. The purpose of this field is strictly to inform the recipient of valid methods associated with the resource. An Allow header field MUST be present in a 405 (Method Not Allowed) response. Allow = "Allow" ":" #Method Example of use: Allow: GET, HEAD, PUT This field cannot prevent a client from trying other methods. However, the indications given by the Allow header field value SHOULD be followed. The actual set of allowed methods is defined by the origin server at the time of each request. The Allow header field MAY be provided with a PUT request to recommend the methods to be supported by the new or modified resource. The server is not required to support these methods and SHOULD include an Allow header in the response giving the actual supported methods. Fielding, et al. Standards Track [Page 106] RFC 2616 HTTP/1.1 June 1999 A proxy MUST NOT modify the Allow header field even if it does not understand all the methods specified, since the user agent might have other means of communicating with the origin server. 14.8 Authorization A user agent that wishes to authenticate itself with a server-- usually, but not necessarily, after receiving a 401 response--does so by including an Authorization request-header field with the request. The Authorization field value consists of credentials containing the authentication information of the user agent for the realm of the resource being requested. Authorization = "Authorization" ":" credentials HTTP access authentication is described in "HTTP Authentication: Basic and Digest Access Authentication" [43]. If a request is authenticated and a realm specified, the same credentials SHOULD be valid for all other requests within this realm (assuming that the authentication scheme itself does not require otherwise, such as credentials that vary according to a challenge value or using synchronized clocks). When a shared cache (see section 13.7) receives a request containing an Authorization field, it MUST NOT return the corresponding response as a reply to any other request, unless one of the following specific exceptions holds: 1. If the response includes the "s-maxage" cache-control directive, the cache MAY use that response in replying to a subsequent request. But (if the specified maximum age has passed) a proxy cache MUST first revalidate it with the origin server, using the request-headers from the new request to allow the origin server to authenticate the new request. (This is the defined behavior for s-maxage.) If the response includes "s- maxage=0", the proxy MUST always revalidate it before re-using it. 2. If the response includes the "must-revalidate" cache-control directive, the cache MAY use that response in replying to a subsequent request. But if the response is stale, all caches MUST first revalidate it with the origin server, using the request-headers from the new request to allow the origin server to authenticate the new request. 3. If the response includes the "public" cache-control directive, it MAY be returned in reply to any subsequent request. Fielding, et al. Standards Track [Page 107] RFC 2616 HTTP/1.1 June 1999 14.9 Cache-Control The Cache-Control general-header field is used to specify directives that MUST be obeyed by all caching mechanisms along the request/response chain. The directives specify behavior intended to prevent caches from adversely interfering with the request or response. These directives typically override the default caching algorithms. Cache directives are unidirectional in that the presence of a directive in a request does not imply that the same directive is to be given in the response. Note that HTTP/1.0 caches might not implement Cache-Control and might only implement Pragma: no-cache (see section 14.32). Cache directives MUST be passed through by a proxy or gateway application, regardless of their significance to that application, since the directives might be applicable to all recipients along the request/response chain. It is not possible to specify a cache- directive for a specific cache. Cache-Control = "Cache-Control" ":" 1#cache-directive cache-directive = cache-request-directive | cache-response-directive cache-request-directive = "no-cache" ; Section 14.9.1 | "no-store" ; Section 14.9.2 | "max-age" "=" delta-seconds ; Section 14.9.3, 14.9.4 | "max-stale" [ "=" delta-seconds ] ; Section 14.9.3 | "min-fresh" "=" delta-seconds ; Section 14.9.3 | "no-transform" ; Section 14.9.5 | "only-if-cached" ; Section 14.9.4 | cache-extension ; Section 14.9.6 cache-response-directive = "public" ; Section 14.9.1 | "private" [ "=" <"> 1#field-name <"> ] ; Section 14.9.1 | "no-cache" [ "=" <"> 1#field-name <"> ]; Section 14.9.1 | "no-store" ; Section 14.9.2 | "no-transform" ; Section 14.9.5 | "must-revalidate" ; Section 14.9.4 | "proxy-revalidate" ; Section 14.9.4 | "max-age" "=" delta-seconds ; Section 14.9.3 | "s-maxage" "=" delta-seconds ; Section 14.9.3 | cache-extension ; Section 14.9.6 cache-extension = token [ "=" ( token | quoted-string ) ] Fielding, et al. Standards Track [Page 108] RFC 2616 HTTP/1.1 June 1999 When a directive appears without any 1#field-name parameter, the directive applies to the entire request or response. When such a directive appears with a 1#field-name parameter, it applies only to the named field or fields, and not to the rest of the request or response. This mechanism supports extensibility; implementations of future versions of the HTTP protocol might apply these directives to header fields not defined in HTTP/1.1. The cache-control directives can be broken down into these general categories: - Restrictions on what are cacheable; these may only be imposed by the origin server. - Restrictions on what may be stored by a cache; these may be imposed by either the origin server or the user agent. - Modifications of the basic expiration mechanism; these may be imposed by either the origin server or the user agent. - Controls over cache revalidation and reload; these may only be imposed by a user agent. - Control over transformation of entities. - Extensions to the caching system. 14.9.1 What is Cacheable By default, a response is cacheable if the requirements of the request method, request header fields, and the response status indicate that it is cacheable. Section 13.4 summarizes these defaults for cacheability. The following Cache-Control response directives allow an origin server to override the default cacheability of a response: public Indicates that the response MAY be cached by any cache, even if it would normally be non-cacheable or cacheable only within a non- shared cache. (See also Authorization, section 14.8, for additional details.) private Indicates that all or part of the response message is intended for a single user and MUST NOT be cached by a shared cache. This allows an origin server to state that the specified parts of the Fielding, et al. Standards Track [Page 109] RFC 2616 HTTP/1.1 June 1999 response are intended for only one user and are not a valid response for requests by other users. A private (non-shared) cache MAY cache the response. Note: This usage of the word private only controls where the response may be cached, and cannot ensure the privacy of the message content. no-cache If the no-cache directive does not specify a field-name, then a cache MUST NOT use the response to satisfy a subsequent request without successful revalidation with the origin server. This allows an origin server to prevent caching even by caches that have been configured to return stale responses to client requests. If the no-cache directive does specify one or more field-names, then a cache MAY use the response to satisfy a subsequent request, subject to any other restrictions on caching. However, the specified field-name(s) MUST NOT be sent in the response to a subsequent request without successful revalidation with the origin server. This allows an origin server to prevent the re-use of certain header fields in a response, while still allowing caching of the rest of the response. Note: Most HTTP/1.0 caches will not recognize or obey this directive. 14.9.2 What May be Stored by Caches no-store The purpose of the no-store directive is to prevent the inadvertent release or retention of sensitive information (for example, on backup tapes). The no-store directive applies to the entire message, and MAY be sent either in a response or in a request. If sent in a request, a cache MUST NOT store any part of either this request or any response to it. If sent in a response, a cache MUST NOT store any part of either this response or the request that elicited it. This directive applies to both non- shared and shared caches. "MUST NOT store" in this context means that the cache MUST NOT intentionally store the information in non-volatile storage, and MUST make a best-effort attempt to remove the information from volatile storage as promptly as possible after forwarding it. Even when this directive is associated with a response, users might explicitly store such a response outside of the caching system (e.g., with a "Save As" dialog). History buffers MAY store such responses as part of their normal operation. Fielding, et al. Standards Track [Page 110] RFC 2616 HTTP/1.1 June 1999 The purpose of this directive is to meet the stated requirements of certain users and service authors who are concerned about accidental releases of information via unanticipated accesses to cache data structures. While the use of this directive might improve privacy in some cases, we caution that it is NOT in any way a reliable or sufficient mechanism for ensuring privacy. In particular, malicious or compromised caches might not recognize or obey this directive, and communications networks might be vulnerable to eavesdropping. 14.9.3 Modifications of the Basic Expiration Mechanism The expiration time of an entity MAY be specified by the origin server using the Expires header (see section 14.21). Alternatively, it MAY be specified using the max-age directive in a response. When the max-age cache-control directive is present in a cached response, the response is stale if its current age is greater than the age value given (in seconds) at the time of a new request for that resource. The max-age directive on a response implies that the response is cacheable (i.e., "public") unless some other, more restrictive cache directive is also present. If a response includes both an Expires header and a max-age directive, the max-age directive overrides the Expires header, even if the Expires header is more restrictive. This rule allows an origin server to provide, for a given response, a longer expiration time to an HTTP/1.1 (or later) cache than to an HTTP/1.0 cache. This might be useful if certain HTTP/1.0 caches improperly calculate ages or expiration times, perhaps due to desynchronized clocks. Many HTTP/1.0 cache implementations will treat an Expires value that is less than or equal to the response Date value as being equivalent to the Cache-Control response directive "no-cache". If an HTTP/1.1 cache receives such a response, and the response does not include a Cache-Control header field, it SHOULD consider the response to be non-cacheable in order to retain compatibility with HTTP/1.0 servers. Note: An origin server might wish to use a relatively new HTTP cache control feature, such as the "private" directive, on a network including older caches that do not understand that feature. The origin server will need to combine the new feature with an Expires field whose value is less than or equal to the Date value. This will prevent older caches from improperly caching the response. Fielding, et al. Standards Track [Page 111] RFC 2616 HTTP/1.1 June 1999 s-maxage If a response includes an s-maxage directive, then for a shared cache (but not for a private cache), the maximum age specified by this directive overrides the maximum age specified by either the max-age directive or the Expires header. The s-maxage directive also implies the semantics of the proxy-revalidate directive (see section 14.9.4), i.e., that the shared cache must not use the entry after it becomes stale to respond to a subsequent request without first revalidating it with the origin server. The s- maxage directive is always ignored by a private cache. Note that most older caches, not compliant with this specification, do not implement any cache-control directives. An origin server wishing to use a cache-control directive that restricts, but does not prevent, caching by an HTTP/1.1-compliant cache MAY exploit the requirement that the max-age directive overrides the Expires header, and the fact that pre-HTTP/1.1-compliant caches do not observe the max-age directive. Other directives allow a user agent to modify the basic expiration mechanism. These directives MAY be specified on a request: max-age Indicates that the client is willing to accept a response whose age is no greater than the specified time in seconds. Unless max- stale directive is also included, the client is not willing to accept a stale response. min-fresh Indicates that the client is willing to accept a response whose freshness lifetime is no less than its current age plus the specified time in seconds. That is, the client wants a response that will still be fresh for at least the specified number of seconds. max-stale Indicates that the client is willing to accept a response that has exceeded its expiration time. If max-stale is assigned a value, then the client is willing to accept a response that has exceeded its expiration time by no more than the specified number of seconds. If no value is assigned to max-stale, then the client is willing to accept a stale response of any age. If a cache returns a stale response, either because of a max-stale directive on a request, or because the cache is configured to override the expiration time of a response, the cache MUST attach a Warning header to the stale response, using Warning 110 (Response is stale). Fielding, et al. Standards Track [Page 112] RFC 2616 HTTP/1.1 June 1999 A cache MAY be configured to return stale responses without validation, but only if this does not conflict with any "MUST"-level requirements concerning cache validation (e.g., a "must-revalidate" cache-control directive). If both the new request and the cached entry include "max-age" directives, then the lesser of the two values is used for determining the freshness of the cached entry for that request. 14.9.4 Cache Revalidation and Reload Controls Sometimes a user agent might want or need to insist that a cache revalidate its cache entry with the origin server (and not just with the next cache along the path to the origin server), or to reload its cache entry from the origin server. End-to-end revalidation might be necessary if either the cache or the origin server has overestimated the expiration time of the cached response. End-to-end reload may be necessary if the cache entry has become corrupted for some reason. End-to-end revalidation may be requested either when the client does not have its own local cached copy, in which case we call it "unspecified end-to-end revalidation", or when the client does have a local cached copy, in which case we call it "specific end-to-end revalidation." The client can specify these three kinds of action using Cache- Control request directives: End-to-end reload The request includes a "no-cache" cache-control directive or, for compatibility with HTTP/1.0 clients, "Pragma: no-cache". Field names MUST NOT be included with the no-cache directive in a request. The server MUST NOT use a cached copy when responding to such a request. Specific end-to-end revalidation The request includes a "max-age=0" cache-control directive, which forces each cache along the path to the origin server to revalidate its own entry, if any, with the next cache or server. The initial request includes a cache-validating conditional with the client's current validator. Unspecified end-to-end revalidation The request includes "max-age=0" cache-control directive, which forces each cache along the path to the origin server to revalidate its own entry, if any, with the next cache or server. The initial request does not include a cache-validating Fielding, et al. Standards Track [Page 113] RFC 2616 HTTP/1.1 June 1999 conditional; the first cache along the path (if any) that holds a cache entry for this resource includes a cache-validating conditional with its current validator. max-age When an intermediate cache is forced, by means of a max-age=0 directive, to revalidate its own cache entry, and the client has supplied its own validator in the request, the supplied validator might differ from the validator currently stored with the cache entry. In this case, the cache MAY use either validator in making its own request without affecting semantic transparency. However, the choice of validator might affect performance. The best approach is for the intermediate cache to use its own validator when making its request. If the server replies with 304 (Not Modified), then the cache can return its now validated copy to the client with a 200 (OK) response. If the server replies with a new entity and cache validator, however, the intermediate cache can compare the returned validator with the one provided in the client's request, using the strong comparison function. If the client's validator is equal to the origin server's, then the intermediate cache simply returns 304 (Not Modified). Otherwise, it returns the new entity with a 200 (OK) response. If a request includes the no-cache directive, it SHOULD NOT include min-fresh, max-stale, or max-age. only-if-cached In some cases, such as times of extremely poor network connectivity, a client may want a cache to return only those responses that it currently has stored, and not to reload or revalidate with the origin server. To do this, the client may include the only-if-cached directive in a request. If it receives this directive, a cache SHOULD either respond using a cached entry that is consistent with the other constraints of the request, or respond with a 504 (Gateway Timeout) status. However, if a group of caches is being operated as a unified system with good internal connectivity, such a request MAY be forwarded within that group of caches. must-revalidate Because a cache MAY be configured to ignore a server's specified expiration time, and because a client request MAY include a max- stale directive (which has a similar effect), the protocol also includes a mechanism for the origin server to require revalidation of a cache entry on any subsequent use. When the must-revalidate directive is present in a response received by a cache, that cache MUST NOT use the entry after it becomes stale to respond to a Fielding, et al. Standards Track [Page 114] RFC 2616 HTTP/1.1 June 1999 subsequent request without first revalidating it with the origin server. (I.e., the cache MUST do an end-to-end revalidation every time, if, based solely on the origin server's Expires or max-age value, the cached response is stale.) The must-revalidate directive is necessary to support reliable operation for certain protocol features. In all circumstances an HTTP/1.1 cache MUST obey the must-revalidate directive; in particular, if the cache cannot reach the origin server for any reason, it MUST generate a 504 (Gateway Timeout) response. Servers SHOULD send the must-revalidate directive if and only if failure to revalidate a request on the entity could result in incorrect operation, such as a silently unexecuted financial transaction. Recipients MUST NOT take any automated action that violates this directive, and MUST NOT automatically provide an unvalidated copy of the entity if revalidation fails. Although this is not recommended, user agents operating under severe connectivity constraints MAY violate this directive but, if so, MUST explicitly warn the user that an unvalidated response has been provided. The warning MUST be provided on each unvalidated access, and SHOULD require explicit user confirmation. proxy-revalidate The proxy-revalidate directive has the same meaning as the must- revalidate directive, except that it does not apply to non-shared user agent caches. It can be used on a response to an authenticated request to permit the user's cache to store and later return the response without needing to revalidate it (since it has already been authenticated once by that user), while still requiring proxies that service many users to revalidate each time (in order to make sure that each user has been authenticated). Note that such authenticated responses also need the public cache control directive in order to allow them to be cached at all. 14.9.5 No-Transform Directive no-transform Implementors of intermediate caches (proxies) have found it useful to convert the media type of certain entity bodies. A non- transparent proxy might, for example, convert between image formats in order to save cache space or to reduce the amount of traffic on a slow link. Serious operational problems occur, however, when these transformations are applied to entity bodies intended for certain kinds of applications. For example, applications for medical Fielding, et al. Standards Track [Page 115] RFC 2616 HTTP/1.1 June 1999 imaging, scientific data analysis and those using end-to-end authentication, all depend on receiving an entity body that is bit for bit identical to the original entity-body. Therefore, if a message includes the no-transform directive, an intermediate cache or proxy MUST NOT change those headers that are listed in section 13.5.2 as being subject to the no-transform directive. This implies that the cache or proxy MUST NOT change any aspect of the entity-body that is specified by these headers, including the value of the entity-body itself. 14.9.6 Cache Control Extensions The Cache-Control header field can be extended through the use of one or more cache-extension tokens, each with an optional assigned value. Informational extensions (those which do not require a change in cache behavior) MAY be added without changing the semantics of other directives. Behavioral extensions are designed to work by acting as modifiers to the existing base of cache directives. Both the new directive and the standard directive are supplied, such that applications which do not understand the new directive will default to the behavior specified by the standard directive, and those that understand the new directive will recognize it as modifying the requirements associated with the standard directive. In this way, extensions to the cache-control directives can be made without requiring changes to the base protocol. This extension mechanism depends on an HTTP cache obeying all of the cache-control directives defined for its native HTTP-version, obeying certain extensions, and ignoring all directives that it does not understand. For example, consider a hypothetical new response directive called community which acts as a modifier to the private directive. We define this new directive to mean that, in addition to any non-shared cache, any cache which is shared only by members of the community named within its value may cache the response. An origin server wishing to allow the UCI community to use an otherwise private response in their shared cache(s) could do so by including Cache-Control: private, community="UCI" A cache seeing this header field will act correctly even if the cache does not understand the community cache-extension, since it will also see and understand the private directive and thus default to the safe behavior. Fielding, et al. Standards Track [Page 116] RFC 2616 HTTP/1.1 June 1999 Unrecognized cache-directives MUST be ignored; it is assumed that any cache-directive likely to be unrecognized by an HTTP/1.1 cache will be combined with standard directives (or the response's default cacheability) such that the cache behavior will remain minimally correct even if the cache does not understand the extension(s). 14.10 Connection The Connection general-header field allows the sender to specify options that are desired for that particular connection and MUST NOT be communicated by proxies over further connections. The Connection header has the following grammar: Connection = "Connection" ":" 1#(connection-token) connection-token = token HTTP/1.1 proxies MUST parse the Connection header field before a message is forwarded and, for each connection-token in this field, remove any header field(s) from the message with the same name as the connection-token. Connection options are signaled by the presence of a connection-token in the Connection header field, not by any corresponding additional header field(s), since the additional header field may not be sent if there are no parameters associated with that connection option. Message headers listed in the Connection header MUST NOT include end-to-end headers, such as Cache-Control. HTTP/1.1 defines the "close" connection option for the sender to signal that the connection will be closed after completion of the response. For example, Connection: close in either the request or the response header fields indicates that the connection SHOULD NOT be considered `persistent' (section 8.1) after the current request/response is complete. HTTP/1.1 applications that do not support persistent connections MUST include the "close" connection option in every message. A system receiving an HTTP/1.0 (or lower-version) message that includes a Connection header MUST, for each connection-token in this field, remove and ignore any header field(s) from the message with the same name as the connection-token. This protects against mistaken forwarding of such header fields by pre-HTTP/1.1 proxies. See section 19.6.2. Fielding, et al. Standards Track [Page 117] RFC 2616 HTTP/1.1 June 1999 14.11 Content-Encoding The Content-Encoding entity-header field is used as a modifier to the media-type. When present, its value indicates what additional content codings have been applied to the entity-body, and thus what decoding mechanisms must be applied in order to obtain the media-type referenced by the Content-Type header field. Content-Encoding is primarily used to allow a document to be compressed without losing the identity of its underlying media type. Content-Encoding = "Content-Encoding" ":" 1#content-coding Content codings are defined in section 3.5. An example of its use is Content-Encoding: gzip The content-coding is a characteristic of the entity identified by the Request-URI. Typically, the entity-body is stored with this encoding and is only decoded before rendering or analogous usage. However, a non-transparent proxy MAY modify the content-coding if the new coding is known to be acceptable to the recipient, unless the "no-transform" cache-control directive is present in the message. If the content-coding of an entity is not "identity", then the response MUST include a Content-Encoding entity-header (section 14.11) that lists the non-identity content-coding(s) used. If the content-coding of an entity in a request message is not acceptable to the origin server, the server SHOULD respond with a status code of 415 (Unsupported Media Type). If multiple encodings have been applied to an entity, the content codings MUST be listed in the order in which they were applied. Additional information about the encoding parameters MAY be provided by other entity-header fields not defined by this specification. 14.12 Content-Language The Content-Language entity-header field describes the natural language(s) of the intended audience for the enclosed entity. Note that this might not be equivalent to all the languages used within the entity-body. Content-Language = "Content-Language" ":" 1#language-tag Fielding, et al. Standards Track [Page 118] RFC 2616 HTTP/1.1 June 1999 Language tags are defined in section 3.10. The primary purpose of Content-Language is to allow a user to identify and differentiate entities according to the user's own preferred language. Thus, if the body content is intended only for a Danish-literate audience, the appropriate field is Content-Language: da If no Content-Language is specified, the default is that the content is intended for all language audiences. This might mean that the sender does not consider it to be specific to any natural language, or that the sender does not know for which language it is intended. Multiple languages MAY be listed for content that is intended for multiple audiences. For example, a rendition of the "Treaty of Waitangi," presented simultaneously in the original Maori and English versions, would call for Content-Language: mi, en However, just because multiple languages are present within an entity does not mean that it is intended for multiple linguistic audiences. An example would be a beginner's language primer, such as "A First Lesson in Latin," which is clearly intended to be used by an English-literate audience. In this case, the Content-Language would properly only include "en". Content-Language MAY be applied to any media type -- it is not limited to textual documents. 14.13 Content-Length The Content-Length entity-header field indicates the size of the entity-body, in decimal number of OCTETs, sent to the recipient or, in the case of the HEAD method, the size of the entity-body that would have been sent had the request been a GET. Content-Length = "Content-Length" ":" 1*DIGIT An example is Content-Length: 3495 Applications SHOULD use this field to indicate the transfer-length of the message-body, unless this is prohibited by the rules in section 4.4. Fielding, et al. Standards Track [Page 119] RFC 2616 HTTP/1.1 June 1999 Any Content-Length greater than or equal to zero is a valid value. Section 4.4 describes how to determine the length of a message-body if a Content-Length is not given. Note that the meaning of this field is significantly different from the corresponding definition in MIME, where it is an optional field used within the "message/external-body" content-type. In HTTP, it SHOULD be sent whenever the message's length can be determined prior to being transferred, unless this is prohibited by the rules in section 4.4. 14.14 Content-Location The Content-Location entity-header field MAY be used to supply the resource location for the entity enclosed in the message when that entity is accessible from a location separate from the requested resource's URI. A server SHOULD provide a Content-Location for the variant corresponding to the response entity; especially in the case where a resource has multiple entities associated with it, and those entities actually have separate locations by which they might be individually accessed, the server SHOULD provide a Content-Location for the particular variant which is returned. Content-Location = "Content-Location" ":" ( absoluteURI | relativeURI ) The value of Content-Location also defines the base URI for the entity. The Content-Location value is not a replacement for the original requested URI; it is only a statement of the location of the resource corresponding to this particular entity at the time of the request. Future requests MAY specify the Content-Location URI as the request- URI if the desire is to identify the source of that particular entity. A cache cannot assume that an entity with a Content-Location different from the URI used to retrieve it can be used to respond to later requests on that Content-Location URI. However, the Content- Location can be used to differentiate between multiple entities retrieved from a single requested resource, as described in section 13.6. If the Content-Location is a relative URI, the relative URI is interpreted relative to the Request-URI. The meaning of the Content-Location header in PUT or POST requests is undefined; servers are free to ignore it in those cases. Fielding, et al. Standards Track [Page 120] RFC 2616 HTTP/1.1 June 1999 14.15 Content-MD5 The Content-MD5 entity-header field, as defined in RFC 1864 [23], is an MD5 digest of the entity-body for the purpose of providing an end-to-end message integrity check (MIC) of the entity-body. (Note: a MIC is good for detecting accidental modification of the entity-body in transit, but is not proof against malicious attacks.) Content-MD5 = "Content-MD5" ":" md5-digest md5-digest = The Content-MD5 header field MAY be generated by an origin server or client to function as an integrity check of the entity-body. Only origin servers or clients MAY generate the Content-MD5 header field; proxies and gateways MUST NOT generate it, as this would defeat its value as an end-to-end integrity check. Any recipient of the entity- body, including gateways and proxies, MAY check that the digest value in this header field matches that of the entity-body as received. The MD5 digest is computed based on the content of the entity-body, including any content-coding that has been applied, but not including any transfer-encoding applied to the message-body. If the message is received with a transfer-encoding, that encoding MUST be removed prior to checking the Content-MD5 value against the received entity. This has the result that the digest is computed on the octets of the entity-body exactly as, and in the order that, they would be sent if no transfer-encoding were being applied. HTTP extends RFC 1864 to permit the digest to be computed for MIME composite media-types (e.g., multipart/* and message/rfc822), but this does not change how the digest is computed as defined in the preceding paragraph. There are several consequences of this. The entity-body for composite types MAY contain many body-parts, each with its own MIME and HTTP headers (including Content-MD5, Content-Transfer-Encoding, and Content-Encoding headers). If a body-part has a Content-Transfer- Encoding or Content-Encoding header, it is assumed that the content of the body-part has had the encoding applied, and the body-part is included in the Content-MD5 digest as is -- i.e., after the application. The Transfer-Encoding header field is not allowed within body-parts. Conversion of all line breaks to CRLF MUST NOT be done before computing or checking the digest: the line break convention used in the text actually transmitted MUST be left unaltered when computing the digest. Fielding, et al. Standards Track [Page 121] RFC 2616 HTTP/1.1 June 1999 Note: while the definition of Content-MD5 is exactly the same for HTTP as in RFC 1864 for MIME entity-bodies, there are several ways in which the application of Content-MD5 to HTTP entity-bodies differs from its application to MIME entity-bodies. One is that HTTP, unlike MIME, does not use Content-Transfer-Encoding, and does use Transfer-Encoding and Content-Encoding. Another is that HTTP more frequently uses binary content types than MIME, so it is worth noting that, in such cases, the byte order used to compute the digest is the transmission byte order defined for the type. Lastly, HTTP allows transmission of text types with any of several line break conventions and not just the canonical form using CRLF. 14.16 Content-Range The Content-Range entity-header is sent with a partial entity-body to specify where in the full entity-body the partial body should be applied. Range units are defined in section 3.12. Content-Range = "Content-Range" ":" content-range-spec content-range-spec = byte-content-range-spec byte-content-range-spec = bytes-unit SP byte-range-resp-spec "/" ( instance-length | "*" ) byte-range-resp-spec = (first-byte-pos "-" last-byte-pos) | "*" instance-length = 1*DIGIT The header SHOULD indicate the total length of the full entity-body, unless this length is unknown or difficult to determine. The asterisk "*" character means that the instance-length is unknown at the time when the response was generated. Unlike byte-ranges-specifier values (see section 14.35.1), a byte- range-resp-spec MUST only specify one range, and MUST contain absolute byte positions for both the first and last byte of the range. A byte-content-range-spec with a byte-range-resp-spec whose last- byte-pos value is less than its first-byte-pos value, or whose instance-length value is less than or equal to its last-byte-pos value, is invalid. The recipient of an invalid byte-content-range- spec MUST ignore it and any content transferred along with it. A server sending a response with status code 416 (Requested range not satisfiable) SHOULD include a Content-Range field with a byte-range- resp-spec of "*". The instance-length specifies the current length of Fielding, et al. Standards Track [Page 122] RFC 2616 HTTP/1.1 June 1999 the selected resource. A response with status code 206 (Partial Content) MUST NOT include a Content-Range field with a byte-range- resp-spec of "*". Examples of byte-content-range-spec values, assuming that the entity contains a total of 1234 bytes: . The first 500 bytes: bytes 0-499/1234 . The second 500 bytes: bytes 500-999/1234 . All except for the first 500 bytes: bytes 500-1233/1234 . The last 500 bytes: bytes 734-1233/1234 When an HTTP message includes the content of a single range (for example, a response to a request for a single range, or to a request for a set of ranges that overlap without any holes), this content is transmitted with a Content-Range header, and a Content-Length header showing the number of bytes actually transferred. For example, HTTP/1.1 206 Partial content Date: Wed, 15 Nov 1995 06:25:24 GMT Last-Modified: Wed, 15 Nov 1995 04:58:08 GMT Content-Range: bytes 21010-47021/47022 Content-Length: 26012 Content-Type: image/gif When an HTTP message includes the content of multiple ranges (for example, a response to a request for multiple non-overlapping ranges), these are transmitted as a multipart message. The multipart media type used for this purpose is "multipart/byteranges" as defined in appendix 19.2. See appendix 19.6.3 for a compatibility issue. A response to a request for a single range MUST NOT be sent using the multipart/byteranges media type. A response to a request for multiple ranges, whose result is a single range, MAY be sent as a multipart/byteranges media type with one part. A client that cannot decode a multipart/byteranges message MUST NOT ask for multiple byte-ranges in a single request. When a client requests multiple byte-ranges in one request, the server SHOULD return them in the order that they appeared in the request. Fielding, et al. Standards Track [Page 123] RFC 2616 HTTP/1.1 June 1999 If the server ignores a byte-range-spec because it is syntactically invalid, the server SHOULD treat the request as if the invalid Range header field did not exist. (Normally, this means return a 200 response containing the full entity). If the server receives a request (other than one including an If- Range request-header field) with an unsatisfiable Range request- header field (that is, all of whose byte-range-spec values have a first-byte-pos value greater than the current length of the selected resource), it SHOULD return a response code of 416 (Requested range not satisfiable) (section 10.4.17). Note: clients cannot depend on servers to send a 416 (Requested range not satisfiable) response instead of a 200 (OK) response for an unsatisfiable Range request-header, since not all servers implement this request-header. 14.17 Content-Type The Content-Type entity-header field indicates the media type of the entity-body sent to the recipient or, in the case of the HEAD method, the media type that would have been sent had the request been a GET. Content-Type = "Content-Type" ":" media-type Media types are defined in section 3.7. An example of the field is Content-Type: text/html; charset=ISO-8859-4 Further discussion of methods for identifying the media type of an entity is provided in section 7.2.1. 14.18 Date The Date general-header field represents the date and time at which the message was originated, having the same semantics as orig-date in RFC 822. The field value is an HTTP-date, as described in section 3.3.1; it MUST be sent in RFC 1123 [8]-date format. Date = "Date" ":" HTTP-date An example is Date: Tue, 15 Nov 1994 08:12:31 GMT Origin servers MUST include a Date header field in all responses, except in these cases: Fielding, et al. Standards Track [Page 124] RFC 2616 HTTP/1.1 June 1999 1. If the response status code is 100 (Continue) or 101 (Switching Protocols), the response MAY include a Date header field, at the server's option. 2. If the response status code conveys a server error, e.g. 500 (Internal Server Error) or 503 (Service Unavailable), and it is inconvenient or impossible to generate a valid Date. 3. If the server does not have a clock that can provide a reasonable approximation of the current time, its responses MUST NOT include a Date header field. In this case, the rules in section 14.18.1 MUST be followed. A received message that does not have a Date header field MUST be assigned one by the recipient if the message will be cached by that recipient or gatewayed via a protocol which requires a Date. An HTTP implementation without a clock MUST NOT cache responses without revalidating them on every use. An HTTP cache, especially a shared cache, SHOULD use a mechanism, such as NTP [28], to synchronize its clock with a reliable external standard. Clients SHOULD only send a Date header field in messages that include an entity-body, as in the case of the PUT and POST requests, and even then it is optional. A client without a clock MUST NOT send a Date header field in a request. The HTTP-date sent in a Date header SHOULD NOT represent a date and time subsequent to the generation of the message. It SHOULD represent the best available approximation of the date and time of message generation, unless the implementation has no means of generating a reasonably accurate date and time. In theory, the date ought to represent the moment just before the entity is generated. In practice, the date can be generated at any time during the message origination without affecting its semantic value. 14.18.1 Clockless Origin Server Operation Some origin server implementations might not have a clock available. An origin server without a clock MUST NOT assign Expires or Last- Modified values to a response, unless these values were associated with the resource by a system or user with a reliable clock. It MAY assign an Expires value that is known, at or before server configuration time, to be in the past (this allows "pre-expiration" of responses without storing separate Expires values for each resource). Fielding, et al. Standards Track [Page 125] RFC 2616 HTTP/1.1 June 1999 14.19 ETag The ETag response-header field provides the current value of the entity tag for the requested variant. The headers used with entity tags are described in sections 14.24, 14.26 and 14.44. The entity tag MAY be used for comparison with other entities from the same resource (see section 13.3.3). ETag = "ETag" ":" entity-tag Examples: ETag: "xyzzy" ETag: W/"xyzzy" ETag: "" 14.20 Expect The Expect request-header field is used to indicate that particular server behaviors are required by the client. Expect = "Expect" ":" 1#expectation expectation = "100-continue" | expectation-extension expectation-extension = token [ "=" ( token | quoted-string ) *expect-params ] expect-params = ";" token [ "=" ( token | quoted-string ) ] A server that does not understand or is unable to comply with any of the expectation values in the Expect field of a request MUST respond with appropriate error status. The server MUST respond with a 417 (Expectation Failed) status if any of the expectations cannot be met or, if there are other problems with the request, some other 4xx status. This header field is defined with extensible syntax to allow for future extensions. If a server receives a request containing an Expect field that includes an expectation-extension that it does not support, it MUST respond with a 417 (Expectation Failed) status. Comparison of expectation values is case-insensitive for unquoted tokens (including the 100-continue token), and is case-sensitive for quoted-string expectation-extensions. Fielding, et al. Standards Track [Page 126] RFC 2616 HTTP/1.1 June 1999 The Expect mechanism is hop-by-hop: that is, an HTTP/1.1 proxy MUST return a 417 (Expectation Failed) status if it receives a request with an expectation that it cannot meet. However, the Expect request-header itself is end-to-end; it MUST be forwarded if the request is forwarded. Many older HTTP/1.0 and HTTP/1.1 applications do not understand the Expect header. See section 8.2.3 for the use of the 100 (continue) status. 14.21 Expires The Expires entity-header field gives the date/time after which the response is considered stale. A stale cache entry may not normally be returned by a cache (either a proxy cache or a user agent cache) unless it is first validated with the origin server (or with an intermediate cache that has a fresh copy of the entity). See section 13.2 for further discussion of the expiration model. The presence of an Expires field does not imply that the original resource will change or cease to exist at, before, or after that time. The format is an absolute date and time as defined by HTTP-date in section 3.3.1; it MUST be in RFC 1123 date format: Expires = "Expires" ":" HTTP-date An example of its use is Expires: Thu, 01 Dec 1994 16:00:00 GMT Note: if a response includes a Cache-Control field with the max- age directive (see section 14.9.3), that directive overrides the Expires field. HTTP/1.1 clients and caches MUST treat other invalid date formats, especially including the value "0", as in the past (i.e., "already expired"). To mark a response as "already expired," an origin server sends an Expires date that is equal to the Date header value. (See the rules for expiration calculations in section 13.2.4.) Fielding, et al. Standards Track [Page 127] RFC 2616 HTTP/1.1 June 1999 To mark a response as "never expires," an origin server sends an Expires date approximately one year from the time the response is sent. HTTP/1.1 servers SHOULD NOT send Expires dates more than one year in the future. The presence of an Expires header field with a date value of some time in the future on a response that otherwise would by default be non-cacheable indicates that the response is cacheable, unless indicated otherwise by a Cache-Control header field (section 14.9). 14.22 From The From request-header field, if given, SHOULD contain an Internet e-mail address for the human user who controls the requesting user agent. The address SHOULD be machine-usable, as defined by "mailbox" in RFC 822 [9] as updated by RFC 1123 [8]: From = "From" ":" mailbox An example is: From: webmaster@w3.org This header field MAY be used for logging purposes and as a means for identifying the source of invalid or unwanted requests. It SHOULD NOT be used as an insecure form of access protection. The interpretation of this field is that the request is being performed on behalf of the person given, who accepts responsibility for the method performed. In particular, robot agents SHOULD include this header so that the person responsible for running the robot can be contacted if problems occur on the receiving end. The Internet e-mail address in this field MAY be separate from the Internet host which issued the request. For example, when a request is passed through a proxy the original issuer's address SHOULD be used. The client SHOULD NOT send the From header field without the user's approval, as it might conflict with the user's privacy interests or their site's security policy. It is strongly recommended that the user be able to disable, enable, and modify the value of this field at any time prior to a request. 14.23 Host The Host request-header field specifies the Internet host and port number of the resource being requested, as obtained from the original URI given by the user or referring resource (generally an HTTP URL, Fielding, et al. Standards Track [Page 128] RFC 2616 HTTP/1.1 June 1999 as described in section 3.2.2). The Host field value MUST represent the naming authority of the origin server or gateway given by the original URL. This allows the origin server or gateway to differentiate between internally-ambiguous URLs, such as the root "/" URL of a server for multiple host names on a single IP address. Host = "Host" ":" host [ ":" port ] ; Section 3.2.2 A "host" without any trailing port information implies the default port for the service requested (e.g., "80" for an HTTP URL). For example, a request on the origin server for would properly include: GET /pub/WWW/ HTTP/1.1 Host: www.w3.org A client MUST include a Host header field in all HTTP/1.1 request messages . If the requested URI does not include an Internet host name for the service being requested, then the Host header field MUST be given with an empty value. An HTTP/1.1 proxy MUST ensure that any request message it forwards does contain an appropriate Host header field that identifies the service being requested by the proxy. All Internet-based HTTP/1.1 servers MUST respond with a 400 (Bad Request) status code to any HTTP/1.1 request message which lacks a Host header field. See sections 5.2 and 19.6.1.1 for other requirements relating to Host. 14.24 If-Match The If-Match request-header field is used with a method to make it conditional. A client that has one or more entities previously obtained from the resource can verify that one of those entities is current by including a list of their associated entity tags in the If-Match header field. Entity tags are defined in section 3.11. The purpose of this feature is to allow efficient updates of cached information with a minimum amount of transaction overhead. It is also used, on updating requests, to prevent inadvertent modification of the wrong version of a resource. As a special case, the value "*" matches any current entity of the resource. If-Match = "If-Match" ":" ( "*" | 1#entity-tag ) If any of the entity tags match the entity tag of the entity that would have been returned in the response to a similar GET request (without the If-Match header) on that resource, or if "*" is given Fielding, et al. Standards Track [Page 129] RFC 2616 HTTP/1.1 June 1999 and any current entity exists for that resource, then the server MAY perform the requested method as if the If-Match header field did not exist. A server MUST use the strong comparison function (see section 13.3.3) to compare the entity tags in If-Match. If none of the entity tags match, or if "*" is given and no current entity exists, the server MUST NOT perform the requested method, and MUST return a 412 (Precondition Failed) response. This behavior is most useful when the client wants to prevent an updating method, such as PUT, from modifying a resource that has changed since the client last retrieved it. If the request would, without the If-Match header field, result in anything other than a 2xx or 412 status, then the If-Match header MUST be ignored. The meaning of "If-Match: *" is that the method SHOULD be performed if the representation selected by the origin server (or by a cache, possibly using the Vary mechanism, see section 14.44) exists, and MUST NOT be performed if the representation does not exist. A request intended to update a resource (e.g., a PUT) MAY include an If-Match header field to signal that the request method MUST NOT be applied if the entity corresponding to the If-Match value (a single entity tag) is no longer a representation of that resource. This allows the user to indicate that they do not wish the request to be successful if the resource has been changed without their knowledge. Examples: If-Match: "xyzzy" If-Match: "xyzzy", "r2d2xxxx", "c3piozzzz" If-Match: * The result of a request having both an If-Match header field and either an If-None-Match or an If-Modified-Since header fields is undefined by this specification. 14.25 If-Modified-Since The If-Modified-Since request-header field is used with a method to make it conditional: if the requested variant has not been modified since the time specified in this field, an entity will not be returned from the server; instead, a 304 (not modified) response will be returned without any message-body. If-Modified-Since = "If-Modified-Since" ":" HTTP-date Fielding, et al. Standards Track [Page 130] RFC 2616 HTTP/1.1 June 1999 An example of the field is: If-Modified-Since: Sat, 29 Oct 1994 19:43:31 GMT A GET method with an If-Modified-Since header and no Range header requests that the identified entity be transferred only if it has been modified since the date given by the If-Modified-Since header. The algorithm for determining this includes the following cases: a) If the request would normally result in anything other than a 200 (OK) status, or if the passed If-Modified-Since date is invalid, the response is exactly the same as for a normal GET. A date which is later than the server's current time is invalid. b) If the variant has been modified since the If-Modified-Since date, the response is exactly the same as for a normal GET. c) If the variant has not been modified since a valid If- Modified-Since date, the server SHOULD return a 304 (Not Modified) response. The purpose of this feature is to allow efficient updates of cached information with a minimum amount of transaction overhead. Note: The Range request-header field modifies the meaning of If- Modified-Since; see section 14.35 for full details. Note: If-Modified-Since times are interpreted by the server, whose clock might not be synchronized with the client. Note: When handling an If-Modified-Since header field, some servers will use an exact date comparison function, rather than a less-than function, for deciding whether to send a 304 (Not Modified) response. To get best results when sending an If- Modified-Since header field for cache validation, clients are advised to use the exact date string received in a previous Last- Modified header field whenever possible. Note: If a client uses an arbitrary date in the If-Modified-Since header instead of a date taken from the Last-Modified header for the same request, the client should be aware of the fact that this date is interpreted in the server's understanding of time. The client should consider unsynchronized clocks and rounding problems due to the different encodings of time between the client and server. This includes the possibility of race conditions if the document has changed between the time it was first requested and the If-Modified-Since date of a subsequent request, and the Fielding, et al. Standards Track [Page 131] RFC 2616 HTTP/1.1 June 1999 possibility of clock-skew-related problems if the If-Modified- Since date is derived from the client's clock without correction to the server's clock. Corrections for different time bases between client and server are at best approximate due to network latency. The result of a request having both an If-Modified-Since header field and either an If-Match or an If-Unmodified-Since header fields is undefined by this specification. 14.26 If-None-Match The If-None-Match request-header field is used with a method to make it conditional. A client that has one or more entities previously obtained from the resource can verify that none of those entities is current by including a list of their associated entity tags in the If-None-Match header field. The purpose of this feature is to allow efficient updates of cached information with a minimum amount of transaction overhead. It is also used to prevent a method (e.g. PUT) from inadvertently modifying an existing resource when the client believes that the resource does not exist. As a special case, the value "*" matches any current entity of the resource. If-None-Match = "If-None-Match" ":" ( "*" | 1#entity-tag ) If any of the entity tags match the entity tag of the entity that would have been returned in the response to a similar GET request (without the If-None-Match header) on that resource, or if "*" is given and any current entity exists for that resource, then the server MUST NOT perform the requested method, unless required to do so because the resource's modification date fails to match that supplied in an If-Modified-Since header field in the request. Instead, if the request method was GET or HEAD, the server SHOULD respond with a 304 (Not Modified) response, including the cache- related header fields (particularly ETag) of one of the entities that matched. For all other request methods, the server MUST respond with a status of 412 (Precondition Failed). See section 13.3.3 for rules on how to determine if two entities tags match. The weak comparison function can only be used with GET or HEAD requests. Fielding, et al. Standards Track [Page 132] RFC 2616 HTTP/1.1 June 1999 If none of the entity tags match, then the server MAY perform the requested method as if the If-None-Match header field did not exist, but MUST also ignore any If-Modified-Since header field(s) in the request. That is, if no entity tags match, then the server MUST NOT return a 304 (Not Modified) response. If the request would, without the If-None-Match header field, result in anything other than a 2xx or 304 status, then the If-None-Match header MUST be ignored. (See section 13.3.4 for a discussion of server behavior when both If-Modified-Since and If-None-Match appear in the same request.) The meaning of "If-None-Match: *" is that the method MUST NOT be performed if the representation selected by the origin server (or by a cache, possibly using the Vary mechanism, see section 14.44) exists, and SHOULD be performed if the representation does not exist. This feature is intended to be useful in preventing races between PUT operations. Examples: If-None-Match: "xyzzy" If-None-Match: W/"xyzzy" If-None-Match: "xyzzy", "r2d2xxxx", "c3piozzzz" If-None-Match: W/"xyzzy", W/"r2d2xxxx", W/"c3piozzzz" If-None-Match: * The result of a request having both an If-None-Match header field and either an If-Match or an If-Unmodified-Since header fields is undefined by this specification. 14.27 If-Range If a client has a partial copy of an entity in its cache, and wishes to have an up-to-date copy of the entire entity in its cache, it could use the Range request-header with a conditional GET (using either or both of If-Unmodified-Since and If-Match.) However, if the condition fails because the entity has been modified, the client would then have to make a second request to obtain the entire current entity-body. The If-Range header allows a client to "short-circuit" the second request. Informally, its meaning is `if the entity is unchanged, send me the part(s) that I am missing; otherwise, send me the entire new entity'. If-Range = "If-Range" ":" ( entity-tag | HTTP-date ) Fielding, et al. Standards Track [Page 133] RFC 2616 HTTP/1.1 June 1999 If the client has no entity tag for an entity, but does have a Last- Modified date, it MAY use that date in an If-Range header. (The server can distinguish between a valid HTTP-date and any form of entity-tag by examining no more than two characters.) The If-Range header SHOULD only be used together with a Range header, and MUST be ignored if the request does not include a Range header, or if the server does not support the sub-range operation. If the entity tag given in the If-Range header matches the current entity tag for the entity, then the server SHOULD provide the specified sub-range of the entity using a 206 (Partial content) response. If the entity tag does not match, then the server SHOULD return the entire entity using a 200 (OK) response. 14.28 If-Unmodified-Since The If-Unmodified-Since request-header field is used with a method to make it conditional. If the requested resource has not been modified since the time specified in this field, the server SHOULD perform the requested operation as if the If-Unmodified-Since header were not present. If the requested variant has been modified since the specified time, the server MUST NOT perform the requested operation, and MUST return a 412 (Precondition Failed). If-Unmodified-Since = "If-Unmodified-Since" ":" HTTP-date An example of the field is: If-Unmodified-Since: Sat, 29 Oct 1994 19:43:31 GMT If the request normally (i.e., without the If-Unmodified-Since header) would result in anything other than a 2xx or 412 status, the If-Unmodified-Since header SHOULD be ignored. If the specified date is invalid, the header is ignored. The result of a request having both an If-Unmodified-Since header field and either an If-None-Match or an If-Modified-Since header fields is undefined by this specification. 14.29 Last-Modified The Last-Modified entity-header field indicates the date and time at which the origin server believes the variant was last modified. Last-Modified = "Last-Modified" ":" HTTP-date Fielding, et al. Standards Track [Page 134] RFC 2616 HTTP/1.1 June 1999 An example of its use is Last-Modified: Tue, 15 Nov 1994 12:45:26 GMT The exact meaning of this header field depends on the implementation of the origin server and the nature of the original resource. For files, it may be just the file system last-modified time. For entities with dynamically included parts, it may be the most recent of the set of last-modify times for its component parts. For database gateways, it may be the last-update time stamp of the record. For virtual objects, it may be the last time the internal state changed. An origin server MUST NOT send a Last-Modified date which is later than the server's time of message origination. In such cases, where the resource's last modification would indicate some time in the future, the server MUST replace that date with the message origination date. An origin server SHOULD obtain the Last-Modified value of the entity as close as possible to the time that it generates the Date value of its response. This allows a recipient to make an accurate assessment of the entity's modification time, especially if the entity changes near the time that the response is generated. HTTP/1.1 servers SHOULD send Last-Modified whenever feasible. 14.30 Location The Location response-header field is used to redirect the recipient to a location other than the Request-URI for completion of the request or identification of a new resource. For 201 (Created) responses, the Location is that of the new resource which was created by the request. For 3xx responses, the location SHOULD indicate the server's preferred URI for automatic redirection to the resource. The field value consists of a single absolute URI. Location = "Location" ":" absoluteURI An example is: Location: http://www.w3.org/pub/WWW/People.html Note: The Content-Location header field (section 14.14) differs from Location in that the Content-Location identifies the original location of the entity enclosed in the request. It is therefore possible for a response to contain header fields for both Location and Content-Location. Also see section 13.10 for cache requirements of some methods. Fielding, et al. Standards Track [Page 135] RFC 2616 HTTP/1.1 June 1999 14.31 Max-Forwards The Max-Forwards request-header field provides a mechanism with the TRACE (section 9.8) and OPTIONS (section 9.2) methods to limit the number of proxies or gateways that can forward the request to the next inbound server. This can be useful when the client is attempting to trace a request chain which appears to be failing or looping in mid-chain. Max-Forwards = "Max-Forwards" ":" 1*DIGIT The Max-Forwards value is a decimal integer indicating the remaining number of times this request message may be forwarded. Each proxy or gateway recipient of a TRACE or OPTIONS request containing a Max-Forwards header field MUST check and update its value prior to forwarding the request. If the received value is zero (0), the recipient MUST NOT forward the request; instead, it MUST respond as the final recipient. If the received Max-Forwards value is greater than zero, then the forwarded message MUST contain an updated Max-Forwards field with a value decremented by one (1). The Max-Forwards header field MAY be ignored for all other methods defined by this specification and for any extension methods for which it is not explicitly referred to as part of that method definition. 14.32 Pragma The Pragma general-header field is used to include implementation- specific directives that might apply to any recipient along the request/response chain. All pragma directives specify optional behavior from the viewpoint of the protocol; however, some systems MAY require that behavior be consistent with the directives. Pragma = "Pragma" ":" 1#pragma-directive pragma-directive = "no-cache" | extension-pragma extension-pragma = token [ "=" ( token | quoted-string ) ] When the no-cache directive is present in a request message, an application SHOULD forward the request toward the origin server even if it has a cached copy of what is being requested. This pragma directive has the same semantics as the no-cache cache-directive (see section 14.9) and is defined here for backward compatibility with HTTP/1.0. Clients SHOULD include both header fields when a no-cache request is sent to a server not known to be HTTP/1.1 compliant. Fielding, et al. Standards Track [Page 136] RFC 2616 HTTP/1.1 June 1999 Pragma directives MUST be passed through by a proxy or gateway application, regardless of their significance to that application, since the directives might be applicable to all recipients along the request/response chain. It is not possible to specify a pragma for a specific recipient; however, any pragma directive not relevant to a recipient SHOULD be ignored by that recipient. HTTP/1.1 caches SHOULD treat "Pragma: no-cache" as if the client had sent "Cache-Control: no-cache". No new Pragma directives will be defined in HTTP. Note: because the meaning of "Pragma: no-cache as a response header field is not actually specified, it does not provide a reliable replacement for "Cache-Control: no-cache" in a response 14.33 Proxy-Authenticate The Proxy-Authenticate response-header field MUST be included as part of a 407 (Proxy Authentication Required) response. The field value consists of a challenge that indicates the authentication scheme and parameters applicable to the proxy for this Request-URI. Proxy-Authenticate = "Proxy-Authenticate" ":" 1#challenge The HTTP access authentication process is described in "HTTP Authentication: Basic and Digest Access Authentication" [43]. Unlike WWW-Authenticate, the Proxy-Authenticate header field applies only to the current connection and SHOULD NOT be passed on to downstream clients. However, an intermediate proxy might need to obtain its own credentials by requesting them from the downstream client, which in some circumstances will appear as if the proxy is forwarding the Proxy-Authenticate header field. 14.34 Proxy-Authorization The Proxy-Authorization request-header field allows the client to identify itself (or its user) to a proxy which requires authentication. The Proxy-Authorization field value consists of credentials containing the authentication information of the user agent for the proxy and/or realm of the resource being requested. Proxy-Authorization = "Proxy-Authorization" ":" credentials The HTTP access authentication process is described in "HTTP Authentication: Basic and Digest Access Authentication" [43] . Unlike Authorization, the Proxy-Authorization header field applies only to the next outbound proxy that demanded authentication using the Proxy- Authenticate field. When multiple proxies are used in a chain, the Fielding, et al. Standards Track [Page 137] RFC 2616 HTTP/1.1 June 1999 Proxy-Authorization header field is consumed by the first outbound proxy that was expecting to receive credentials. A proxy MAY relay the credentials from the client request to the next proxy if that is the mechanism by which the proxies cooperatively authenticate a given request. 14.35 Range 14.35.1 Byte Ranges Since all HTTP entities are represented in HTTP messages as sequences of bytes, the concept of a byte range is meaningful for any HTTP entity. (However, not all clients and servers need to support byte- range operations.) Byte range specifications in HTTP apply to the sequence of bytes in the entity-body (not necessarily the same as the message-body). A byte range operation MAY specify a single range of bytes, or a set of ranges within a single entity. ranges-specifier = byte-ranges-specifier byte-ranges-specifier = bytes-unit "=" byte-range-set byte-range-set = 1#( byte-range-spec | suffix-byte-range-spec ) byte-range-spec = first-byte-pos "-" [last-byte-pos] first-byte-pos = 1*DIGIT last-byte-pos = 1*DIGIT The first-byte-pos value in a byte-range-spec gives the byte-offset of the first byte in a range. The last-byte-pos value gives the byte-offset of the last byte in the range; that is, the byte positions specified are inclusive. Byte offsets start at zero. If the last-byte-pos value is present, it MUST be greater than or equal to the first-byte-pos in that byte-range-spec, or the byte- range-spec is syntactically invalid. The recipient of a byte-range- set that includes one or more syntactically invalid byte-range-spec values MUST ignore the header field that includes that byte-range- set. If the last-byte-pos value is absent, or if the value is greater than or equal to the current length of the entity-body, last-byte-pos is taken to be equal to one less than the current length of the entity- body in bytes. By its choice of last-byte-pos, a client can limit the number of bytes retrieved without knowing the size of the entity. Fielding, et al. Standards Track [Page 138] RFC 2616 HTTP/1.1 June 1999 suffix-byte-range-spec = "-" suffix-length suffix-length = 1*DIGIT A suffix-byte-range-spec is used to specify the suffix of the entity-body, of a length given by the suffix-length value. (That is, this form specifies the last N bytes of an entity-body.) If the entity is shorter than the specified suffix-length, the entire entity-body is used. If a syntactically valid byte-range-set includes at least one byte- range-spec whose first-byte-pos is less than the current length of the entity-body, or at least one suffix-byte-range-spec with a non- zero suffix-length, then the byte-range-set is satisfiable. Otherwise, the byte-range-set is unsatisfiable. If the byte-range-set is unsatisfiable, the server SHOULD return a response with a status of 416 (Requested range not satisfiable). Otherwise, the server SHOULD return a response with a status of 206 (Partial Content) containing the satisfiable ranges of the entity-body. Examples of byte-ranges-specifier values (assuming an entity-body of length 10000): - The first 500 bytes (byte offsets 0-499, inclusive): bytes=0- 499 - The second 500 bytes (byte offsets 500-999, inclusive): bytes=500-999 - The final 500 bytes (byte offsets 9500-9999, inclusive): bytes=-500 - Or bytes=9500- - The first and last bytes only (bytes 0 and 9999): bytes=0-0,-1 - Several legal but not canonical specifications of the second 500 bytes (byte offsets 500-999, inclusive): bytes=500-600,601-999 bytes=500-700,601-999 14.35.2 Range Retrieval Requests HTTP retrieval requests using conditional or unconditional GET methods MAY request one or more sub-ranges of the entity, instead of the entire entity, using the Range request header, which applies to the entity returned as the result of the request: Range = "Range" ":" ranges-specifier Fielding, et al. Standards Track [Page 139] RFC 2616 HTTP/1.1 June 1999 A server MAY ignore the Range header. However, HTTP/1.1 origin servers and intermediate caches ought to support byte ranges when possible, since Range supports efficient recovery from partially failed transfers, and supports efficient partial retrieval of large entities. If the server supports the Range header and the specified range or ranges are appropriate for the entity: - The presence of a Range header in an unconditional GET modifies what is returned if the GET is otherwise successful. In other words, the response carries a status code of 206 (Partial Content) instead of 200 (OK). - The presence of a Range header in a conditional GET (a request using one or both of If-Modified-Since and If-None-Match, or one or both of If-Unmodified-Since and If-Match) modifies what is returned if the GET is otherwise successful and the condition is true. It does not affect the 304 (Not Modified) response returned if the conditional is false. In some cases, it might be more appropriate to use the If-Range header (see section 14.27) in addition to the Range header. If a proxy that supports ranges receives a Range request, forwards the request to an inbound server, and receives an entire entity in reply, it SHOULD only return the requested range to its client. It SHOULD store the entire received response in its cache if that is consistent with its cache allocation policies. 14.36 Referer The Referer[sic] request-header field allows the client to specify, for the server's benefit, the address (URI) of the resource from which the Request-URI was obtained (the "referrer", although the header field is misspelled.) The Referer request-header allows a server to generate lists of back-links to resources for interest, logging, optimized caching, etc. It also allows obsolete or mistyped links to be traced for maintenance. The Referer field MUST NOT be sent if the Request-URI was obtained from a source that does not have its own URI, such as input from the user keyboard. Referer = "Referer" ":" ( absoluteURI | relativeURI ) Example: Referer: http://www.w3.org/hypertext/DataSources/Overview.html Fielding, et al. Standards Track [Page 140] RFC 2616 HTTP/1.1 June 1999 If the field value is a relative URI, it SHOULD be interpreted relative to the Request-URI. The URI MUST NOT include a fragment. See section 15.1.3 for security considerations. 14.37 Retry-After The Retry-After response-header field can be used with a 503 (Service Unavailable) response to indicate how long the service is expected to be unavailable to the requesting client. This field MAY also be used with any 3xx (Redirection) response to indicate the minimum time the user-agent is asked wait before issuing the redirected request. The value of this field can be either an HTTP-date or an integer number of seconds (in decimal) after the time of the response. Retry-After = "Retry-After" ":" ( HTTP-date | delta-seconds ) Two examples of its use are Retry-After: Fri, 31 Dec 1999 23:59:59 GMT Retry-After: 120 In the latter example, the delay is 2 minutes. 14.38 Server The Server response-header field contains information about the software used by the origin server to handle the request. The field can contain multiple product tokens (section 3.8) and comments identifying the server and any significant subproducts. The product tokens are listed in order of their significance for identifying the application. Server = "Server" ":" 1*( product | comment ) Example: Server: CERN/3.0 libwww/2.17 If the response is being forwarded through a proxy, the proxy application MUST NOT modify the Server response-header. Instead, it SHOULD include a Via field (as described in section 14.45). Note: Revealing the specific software version of the server might allow the server machine to become more vulnerable to attacks against software that is known to contain security holes. Server implementors are encouraged to make this field a configurable option. Fielding, et al. Standards Track [Page 141] RFC 2616 HTTP/1.1 June 1999 14.39 TE The TE request-header field indicates what extension transfer-codings it is willing to accept in the response and whether or not it is willing to accept trailer fields in a chunked transfer-coding. Its value may consist of the keyword "trailers" and/or a comma-separated list of extension transfer-coding names with optional accept parameters (as described in section 3.6). TE = "TE" ":" #( t-codings ) t-codings = "trailers" | ( transfer-extension [ accept-params ] ) The presence of the keyword "trailers" indicates that the client is willing to accept trailer fields in a chunked transfer-coding, as defined in section 3.6.1. This keyword is reserved for use with transfer-coding values even though it does not itself represent a transfer-coding. Examples of its use are: TE: deflate TE: TE: trailers, deflate;q=0.5 The TE header field only applies to the immediate connection. Therefore, the keyword MUST be supplied within a Connection header field (section 14.10) whenever TE is present in an HTTP/1.1 message. A server tests whether a transfer-coding is acceptable, according to a TE field, using these rules: 1. The "chunked" transfer-coding is always acceptable. If the keyword "trailers" is listed, the client indicates that it is willing to accept trailer fields in the chunked response on behalf of itself and any downstream clients. The implication is that, if given, the client is stating that either all downstream clients are willing to accept trailer fields in the forwarded response, or that it will attempt to buffer the response on behalf of downstream recipients. Note: HTTP/1.1 does not define any means to limit the size of a chunked response such that a client can be assured of buffering the entire response. 2. If the transfer-coding being tested is one of the transfer- codings listed in the TE field, then it is acceptable unless it is accompanied by a qvalue of 0. (As defined in section 3.9, a qvalue of 0 means "not acceptable.") Fielding, et al. Standards Track [Page 142] RFC 2616 HTTP/1.1 June 1999 3. If multiple transfer-codings are acceptable, then the acceptable transfer-coding with the highest non-zero qvalue is preferred. The "chunked" transfer-coding always has a qvalue of 1. If the TE field-value is empty or if no TE field is present, the only transfer-coding is "chunked". A message with no transfer-coding is always acceptable. 14.40 Trailer The Trailer general field value indicates that the given set of header fields is present in the trailer of a message encoded with chunked transfer-coding. Trailer = "Trailer" ":" 1#field-name An HTTP/1.1 message SHOULD include a Trailer header field in a message using chunked transfer-coding with a non-empty trailer. Doing so allows the recipient to know which header fields to expect in the trailer. If no Trailer header field is present, the trailer SHOULD NOT include any header fields. See section 3.6.1 for restrictions on the use of trailer fields in a "chunked" transfer-coding. Message header fields listed in the Trailer header field MUST NOT include the following header fields: . Transfer-Encoding . Content-Length . Trailer 14.41 Transfer-Encoding The Transfer-Encoding general-header field indicates what (if any) type of transformation has been applied to the message body in order to safely transfer it between the sender and the recipient. This differs from the content-coding in that the transfer-coding is a property of the message, not of the entity. Transfer-Encoding = "Transfer-Encoding" ":" 1#transfer-coding Transfer-codings are defined in section 3.6. An example is: Transfer-Encoding: chunked Fielding, et al. Standards Track [Page 143] RFC 2616 HTTP/1.1 June 1999 If multiple encodings have been applied to an entity, the transfer- codings MUST be listed in the order in which they were applied. Additional information about the encoding parameters MAY be provided by other entity-header fields not defined by this specification. Many older HTTP/1.0 applications do not understand the Transfer- Encoding header. 14.42 Upgrade The Upgrade general-header allows the client to specify what additional communication protocols it supports and would like to use if the server finds it appropriate to switch protocols. The server MUST use the Upgrade header field within a 101 (Switching Protocols) response to indicate which protocol(s) are being switched. Upgrade = "Upgrade" ":" 1#product For example, Upgrade: HTTP/2.0, SHTTP/1.3, IRC/6.9, RTA/x11 The Upgrade header field is intended to provide a simple mechanism for transition from HTTP/1.1 to some other, incompatible protocol. It does so by allowing the client to advertise its desire to use another protocol, such as a later version of HTTP with a higher major version number, even though the current request has been made using HTTP/1.1. This eases the difficult transition between incompatible protocols by allowing the client to initiate a request in the more commonly supported protocol while indicating to the server that it would like to use a "better" protocol if available (where "better" is determined by the server, possibly according to the nature of the method and/or resource being requested). The Upgrade header field only applies to switching application-layer protocols upon the existing transport-layer connection. Upgrade cannot be used to insist on a protocol change; its acceptance and use by the server is optional. The capabilities and nature of the application-layer communication after the protocol change is entirely dependent upon the new protocol chosen, although the first action after changing the protocol MUST be a response to the initial HTTP request containing the Upgrade header field. The Upgrade header field only applies to the immediate connection. Therefore, the upgrade keyword MUST be supplied within a Connection header field (section 14.10) whenever Upgrade is present in an HTTP/1.1 message. Fielding, et al. Standards Track [Page 144] RFC 2616 HTTP/1.1 June 1999 The Upgrade header field cannot be used to indicate a switch to a protocol on a different connection. For that purpose, it is more appropriate to use a 301, 302, 303, or 305 redirection response. This specification only defines the protocol name "HTTP" for use by the family of Hypertext Transfer Protocols, as defined by the HTTP version rules of section 3.1 and future updates to this specification. Any token can be used as a protocol name; however, it will only be useful if both the client and server associate the name with the same protocol. 14.43 User-Agent The User-Agent request-header field contains information about the user agent originating the request. This is for statistical purposes, the tracing of protocol violations, and automated recognition of user agents for the sake of tailoring responses to avoid particular user agent limitations. User agents SHOULD include this field with requests. The field can contain multiple product tokens (section 3.8) and comments identifying the agent and any subproducts which form a significant part of the user agent. By convention, the product tokens are listed in order of their significance for identifying the application. User-Agent = "User-Agent" ":" 1*( product | comment ) Example: User-Agent: CERN-LineMode/2.15 libwww/2.17b3 14.44 Vary The Vary field value indicates the set of request-header fields that fully determines, while the response is fresh, whether a cache is permitted to use the response to reply to a subsequent request without revalidation. For uncacheable or stale responses, the Vary field value advises the user agent about the criteria that were used to select the representation. A Vary field value of "*" implies that a cache cannot determine from the request headers of a subsequent request whether this response is the appropriate representation. See section 13.6 for use of the Vary header field by caches. Vary = "Vary" ":" ( "*" | 1#field-name ) An HTTP/1.1 server SHOULD include a Vary header field with any cacheable response that is subject to server-driven negotiation. Doing so allows a cache to properly interpret future requests on that resource and informs the user agent about the presence of negotiation Fielding, et al. Standards Track [Page 145] RFC 2616 HTTP/1.1 June 1999 on that resource. A server MAY include a Vary header field with a non-cacheable response that is subject to server-driven negotiation, since this might provide the user agent with useful information about the dimensions over which the response varies at the time of the response. A Vary field value consisting of a list of field-names signals that the representation selected for the response is based on a selection algorithm which considers ONLY the listed request-header field values in selecting the most appropriate representation. A cache MAY assume that the same selection will be made for future requests with the same values for the listed field names, for the duration of time for which the response is fresh. The field-names given are not limited to the set of standard request-header fields defined by this specification. Field names are case-insensitive. A Vary field value of "*" signals that unspecified parameters not limited to the request-headers (e.g., the network address of the client), play a role in the selection of the response representation. The "*" value MUST NOT be generated by a proxy server; it may only be generated by an origin server. 14.45 Via The Via general-header field MUST be used by gateways and proxies to indicate the intermediate protocols and recipients between the user agent and the server on requests, and between the origin server and the client on responses. It is analogous to the "Received" field of RFC 822 [9] and is intended to be used for tracking message forwards, avoiding request loops, and identifying the protocol capabilities of all senders along the request/response chain. Via = "Via" ":" 1#( received-protocol received-by [ comment ] ) received-protocol = [ protocol-name "/" ] protocol-version protocol-name = token protocol-version = token received-by = ( host [ ":" port ] ) | pseudonym pseudonym = token The received-protocol indicates the protocol version of the message received by the server or client along each segment of the request/response chain. The received-protocol version is appended to the Via field value when the message is forwarded so that information about the protocol capabilities of upstream applications remains visible to all recipients. Fielding, et al. Standards Track [Page 146] RFC 2616 HTTP/1.1 June 1999 The protocol-name is optional if and only if it would be "HTTP". The received-by field is normally the host and optional port number of a recipient server or client that subsequently forwarded the message. However, if the real host is considered to be sensitive information, it MAY be replaced by a pseudonym. If the port is not given, it MAY be assumed to be the default port of the received-protocol. Multiple Via field values represents each proxy or gateway that has forwarded the message. Each recipient MUST append its information such that the end result is ordered according to the sequence of forwarding applications. Comments MAY be used in the Via header field to identify the software of the recipient proxy or gateway, analogous to the User-Agent and Server header fields. However, all comments in the Via field are optional and MAY be removed by any recipient prior to forwarding the message. For example, a request message could be sent from an HTTP/1.0 user agent to an internal proxy code-named "fred", which uses HTTP/1.1 to forward the request to a public proxy at nowhere.com, which completes the request by forwarding it to the origin server at www.ics.uci.edu. The request received by www.ics.uci.edu would then have the following Via header field: Via: 1.0 fred, 1.1 nowhere.com (Apache/1.1) Proxies and gateways used as a portal through a network firewall SHOULD NOT, by default, forward the names and ports of hosts within the firewall region. This information SHOULD only be propagated if explicitly enabled. If not enabled, the received-by host of any host behind the firewall SHOULD be replaced by an appropriate pseudonym for that host. For organizations that have strong privacy requirements for hiding internal structures, a proxy MAY combine an ordered subsequence of Via header field entries with identical received-protocol values into a single such entry. For example, Via: 1.0 ricky, 1.1 ethel, 1.1 fred, 1.0 lucy could be collapsed to Via: 1.0 ricky, 1.1 mertz, 1.0 lucy Fielding, et al. Standards Track [Page 147] RFC 2616 HTTP/1.1 June 1999 Applications SHOULD NOT combine multiple entries unless they are all under the same organizational control and the hosts have already been replaced by pseudonyms. Applications MUST NOT combine entries which have different received-protocol values. 14.46 Warning The Warning general-header field is used to carry additional information about the status or transformation of a message which might not be reflected in the message. This information is typically used to warn about a possible lack of semantic transparency from caching operations or transformations applied to the entity body of the message. Warning headers are sent with responses using: Warning = "Warning" ":" 1#warning-value warning-value = warn-code SP warn-agent SP warn-text [SP warn-date] warn-code = 3DIGIT warn-agent = ( host [ ":" port ] ) | pseudonym ; the name or pseudonym of the server adding ; the Warning header, for use in debugging warn-text = quoted-string warn-date = <"> HTTP-date <"> A response MAY carry more than one Warning header. The warn-text SHOULD be in a natural language and character set that is most likely to be intelligible to the human user receiving the response. This decision MAY be based on any available knowledge, such as the location of the cache or user, the Accept-Language field in a request, the Content-Language field in a response, etc. The default language is English and the default character set is ISO-8859-1. If a character set other than ISO-8859-1 is used, it MUST be encoded in the warn-text using the method described in RFC 2047 [14]. Warning headers can in general be applied to any message, however some specific warn-codes are specific to caches and can only be applied to response messages. New Warning headers SHOULD be added after any existing Warning headers. A cache MUST NOT delete any Warning header that it received with a message. However, if a cache successfully validates a cache entry, it SHOULD remove any Warning headers previously attached to that entry except as specified for Fielding, et al. Standards Track [Page 148] RFC 2616 HTTP/1.1 June 1999 specific Warning codes. It MUST then add any Warning headers received in the validating response. In other words, Warning headers are those that would be attached to the most recent relevant response. When multiple Warning headers are attached to a response, the user agent ought to inform the user of as many of them as possible, in the order that they appear in the response. If it is not possible to inform the user of all of the warnings, the user agent SHOULD follow these heuristics: - Warnings that appear early in the response take priority over those appearing later in the response. - Warnings in the user's preferred character set take priority over warnings in other character sets but with identical warn- codes and warn-agents. Systems that generate multiple Warning headers SHOULD order them with this user agent behavior in mind. Requirements for the behavior of caches with respect to Warnings are stated in section 13.1.2. This is a list of the currently-defined warn-codes, each with a recommended warn-text in English, and a description of its meaning. 110 Response is stale MUST be included whenever the returned response is stale. 111 Revalidation failed MUST be included if a cache returns a stale response because an attempt to revalidate the response failed, due to an inability to reach the server. 112 Disconnected operation SHOULD be included if the cache is intentionally disconnected from the rest of the network for a period of time. 113 Heuristic expiration MUST be included if the cache heuristically chose a freshness lifetime greater than 24 hours and the response's age is greater than 24 hours. 199 Miscellaneous warning The warning text MAY include arbitrary information to be presented to a human user, or logged. A system receiving this warning MUST NOT take any automated action, besides presenting the warning to the user. Fielding, et al. Standards Track [Page 149] RFC 2616 HTTP/1.1 June 1999 214 Transformation applied MUST be added by an intermediate cache or proxy if it applies any transformation changing the content-coding (as specified in the Content-Encoding header) or media-type (as specified in the Content-Type header) of the response, or the entity-body of the response, unless this Warning code already appears in the response. 299 Miscellaneous persistent warning The warning text MAY include arbitrary information to be presented to a human user, or logged. A system receiving this warning MUST NOT take any automated action. If an implementation sends a message with one or more Warning headers whose version is HTTP/1.0 or lower, then the sender MUST include in each warning-value a warn-date that matches the date in the response. If an implementation receives a message with a warning-value that includes a warn-date, and that warn-date is different from the Date value in the response, then that warning-value MUST be deleted from the message before storing, forwarding, or using it. (This prevents bad consequences of naive caching of Warning header fields.) If all of the warning-values are deleted for this reason, the Warning header MUST be deleted as well. 14.47 WWW-Authenticate The WWW-Authenticate response-header field MUST be included in 401 (Unauthorized) response messages. The field value consists of at least one challenge that indicates the authentication scheme(s) and parameters applicable to the Request-URI. WWW-Authenticate = "WWW-Authenticate" ":" 1#challenge The HTTP access authentication process is described in "HTTP Authentication: Basic and Digest Access Authentication" [43]. User agents are advised to take special care in parsing the WWW- Authenticate field value as it might contain more than one challenge, or if more than one WWW-Authenticate header field is provided, the contents of a challenge itself can contain a comma-separated list of authentication parameters. 15 Security Considerations This section is meant to inform application developers, information providers, and users of the security limitations in HTTP/1.1 as described by this document. The discussion does not include definitive solutions to the problems revealed, though it does make some suggestions for reducing security risks. Fielding, et al. Standards Track [Page 150] RFC 2616 HTTP/1.1 June 1999 15.1 Personal Information HTTP clients are often privy to large amounts of personal information (e.g. the user's name, location, mail address, passwords, encryption keys, etc.), and SHOULD be very careful to prevent unintentional leakage of this information via the HTTP protocol to other sources. We very strongly recommend that a convenient interface be provided for the user to control dissemination of such information, and that designers and implementors be particularly careful in this area. History shows that errors in this area often create serious security and/or privacy problems and generate highly adverse publicity for the implementor's company. 15.1.1 Abuse of Server Log Information A server is in the position to save personal data about a user's requests which might identify their reading patterns or subjects of interest. This information is clearly confidential in nature and its handling can be constrained by law in certain countries. People using the HTTP protocol to provide data are responsible for ensuring that such material is not distributed without the permission of any individuals that are identifiable by the published results. 15.1.2 Transfer of Sensitive Information Like any generic data transfer protocol, HTTP cannot regulate the content of the data that is transferred, nor is there any a priori method of determining the sensitivity of any particular piece of information within the context of any given request. Therefore, applications SHOULD supply as much control over this information as possible to the provider of that information. Four header fields are worth special mention in this context: Server, Via, Referer and From. Revealing the specific software version of the server might allow the server machine to become more vulnerable to attacks against software that is known to contain security holes. Implementors SHOULD make the Server header field a configurable option. Proxies which serve as a portal through a network firewall SHOULD take special precautions regarding the transfer of header information that identifies the hosts behind the firewall. In particular, they SHOULD remove, or replace with sanitized versions, any Via fields generated behind the firewall. The Referer header allows reading patterns to be studied and reverse links drawn. Although it can be very useful, its power can be abused if user details are not separated from the information contained in Fielding, et al. Standards Track [Page 151] RFC 2616 HTTP/1.1 June 1999 the Referer. Even when the personal information has been removed, the Referer header might indicate a private document's URI whose publication would be inappropriate. The information sent in the From field might conflict with the user's privacy interests or their site's security policy, and hence it SHOULD NOT be transmitted without the user being able to disable, enable, and modify the contents of the field. The user MUST be able to set the contents of this field within a user preference or application defaults configuration. We suggest, though do not require, that a convenient toggle interface be provided for the user to enable or disable the sending of From and Referer information. The User-Agent (section 14.43) or Server (section 14.38) header fields can sometimes be used to determine that a specific client or server have a particular security hole which might be exploited. Unfortunately, this same information is often used for other valuable purposes for which HTTP currently has no better mechanism. 15.1.3 Encoding Sensitive Information in URI's Because the source of a link might be private information or might reveal an otherwise private information source, it is strongly recommended that the user be able to select whether or not the Referer field is sent. For example, a browser client could have a toggle switch for browsing openly/anonymously, which would respectively enable/disable the sending of Referer and From information. Clients SHOULD NOT include a Referer header field in a (non-secure) HTTP request if the referring page was transferred with a secure protocol. Authors of services which use the HTTP protocol SHOULD NOT use GET based forms for the submission of sensitive data, because this will cause this data to be encoded in the Request-URI. Many existing servers, proxies, and user agents will log the request URI in some place where it might be visible to third parties. Servers can use POST-based form submission instead 15.1.4 Privacy Issues Connected to Accept Headers Accept request-headers can reveal information about the user to all servers which are accessed. The Accept-Language header in particular can reveal information the user would consider to be of a private nature, because the understanding of particular languages is often Fielding, et al. Standards Track [Page 152] RFC 2616 HTTP/1.1 June 1999 strongly correlated to the membership of a particular ethnic group. User agents which offer the option to configure the contents of an Accept-Language header to be sent in every request are strongly encouraged to let the configuration process include a message which makes the user aware of the loss of privacy involved. An approach that limits the loss of privacy would be for a user agent to omit the sending of Accept-Language headers by default, and to ask the user whether or not to start sending Accept-Language headers to a server if it detects, by looking for any Vary response-header fields generated by the server, that such sending could improve the quality of service. Elaborate user-customized accept header fields sent in every request, in particular if these include quality values, can be used by servers as relatively reliable and long-lived user identifiers. Such user identifiers would allow content providers to do click-trail tracking, and would allow collaborating content providers to match cross-server click-trails or form submissions of individual users. Note that for many users not behind a proxy, the network address of the host running the user agent will also serve as a long-lived user identifier. In environments where proxies are used to enhance privacy, user agents ought to be conservative in offering accept header configuration options to end users. As an extreme privacy measure, proxies could filter the accept headers in relayed requests. General purpose user agents which provide a high degree of header configurability SHOULD warn users about the loss of privacy which can be involved. 15.2 Attacks Based On File and Path Names Implementations of HTTP origin servers SHOULD be careful to restrict the documents returned by HTTP requests to be only those that were intended by the server administrators. If an HTTP server translates HTTP URIs directly into file system calls, the server MUST take special care not to serve files that were not intended to be delivered to HTTP clients. For example, UNIX, Microsoft Windows, and other operating systems use ".." as a path component to indicate a directory level above the current one. On such a system, an HTTP server MUST disallow any such construct in the Request-URI if it would otherwise allow access to a resource outside those intended to be accessible via the HTTP server. Similarly, files intended for reference only internally to the server (such as access control files, configuration files, and script code) MUST be protected from inappropriate retrieval, since they might contain sensitive information. Experience has shown that minor bugs in such HTTP server implementations have turned into security risks. Fielding, et al. Standards Track [Page 153] RFC 2616 HTTP/1.1 June 1999 15.3 DNS Spoofing Clients using HTTP rely heavily on the Domain Name Service, and are thus generally prone to security attacks based on the deliberate mis-association of IP addresses and DNS names. Clients need to be cautious in assuming the continuing validity of an IP number/DNS name association. In particular, HTTP clients SHOULD rely on their name resolver for confirmation of an IP number/DNS name association, rather than caching the result of previous host name lookups. Many platforms already can cache host name lookups locally when appropriate, and they SHOULD be configured to do so. It is proper for these lookups to be cached, however, only when the TTL (Time To Live) information reported by the name server makes it likely that the cached information will remain useful. If HTTP clients cache the results of host name lookups in order to achieve a performance improvement, they MUST observe the TTL information reported by DNS. If HTTP clients do not observe this rule, they could be spoofed when a previously-accessed server's IP address changes. As network renumbering is expected to become increasingly common [24], the possibility of this form of attack will grow. Observing this requirement thus reduces this potential security vulnerability. This requirement also improves the load-balancing behavior of clients for replicated servers using the same DNS name and reduces the likelihood of a user's experiencing failure in accessing sites which use that strategy. 15.4 Location Headers and Spoofing If a single server supports multiple organizations that do not trust one another, then it MUST check the values of Location and Content- Location headers in responses that are generated under control of said organizations to make sure that they do not attempt to invalidate resources over which they have no authority. 15.5 Content-Disposition Issues RFC 1806 [35], from which the often implemented Content-Disposition (see section 19.5.1) header in HTTP is derived, has a number of very serious security considerations. Content-Disposition is not part of the HTTP standard, but since it is widely implemented, we are documenting its use and risks for implementors. See RFC 2183 [49] (which updates RFC 1806) for details. Fielding, et al. Standards Track [Page 154] RFC 2616 HTTP/1.1 June 1999 15.6 Authentication Credentials and Idle Clients Existing HTTP clients and user agents typically retain authentication information indefinitely. HTTP/1.1. does not provide a method for a server to direct clients to discard these cached credentials. This is a significant defect that requires further extensions to HTTP. Circumstances under which credential caching can interfere with the application's security model include but are not limited to: - Clients which have been idle for an extended period following which the server might wish to cause the client to reprompt the user for credentials. - Applications which include a session termination indication (such as a `logout' or `commit' button on a page) after which the server side of the application `knows' that there is no further reason for the client to retain the credentials. This is currently under separate study. There are a number of work- arounds to parts of this problem, and we encourage the use of password protection in screen savers, idle time-outs, and other methods which mitigate the security problems inherent in this problem. In particular, user agents which cache credentials are encouraged to provide a readily accessible mechanism for discarding cached credentials under user control. 15.7 Proxies and Caching By their very nature, HTTP proxies are men-in-the-middle, and represent an opportunity for man-in-the-middle attacks. Compromise of the systems on which the proxies run can result in serious security and privacy problems. Proxies have access to security-related information, personal information about individual users and organizations, and proprietary information belonging to users and content providers. A compromised proxy, or a proxy implemented or configured without regard to security and privacy considerations, might be used in the commission of a wide range of potential attacks. Proxy operators should protect the systems on which proxies run as they would protect any system that contains or transports sensitive information. In particular, log information gathered at proxies often contains highly sensitive personal information, and/or information about organizations. Log information should be carefully guarded, and appropriate guidelines for use developed and followed. (Section 15.1.1). Fielding, et al. Standards Track [Page 155] RFC 2616 HTTP/1.1 June 1999 Caching proxies provide additional potential vulnerabilities, since the contents of the cache represent an attractive target for malicious exploitation. Because cache contents persist after an HTTP request is complete, an attack on the cache can reveal information long after a user believes that the information has been removed from the network. Therefore, cache contents should be protected as sensitive information. Proxy implementors should consider the privacy and security implications of their design and coding decisions, and of the configuration options they provide to proxy operators (especially the default configuration). Users of a proxy need to be aware that they are no trustworthier than the people who run the proxy; HTTP itself cannot solve this problem. The judicious use of cryptography, when appropriate, may suffice to protect against a broad range of security and privacy attacks. Such cryptography is beyond the scope of the HTTP/1.1 specification. 15.7.1 Denial of Service Attacks on Proxies They exist. They are hard to defend against. Research continues. Beware. 16 Acknowledgments This specification makes heavy use of the augmented BNF and generic constructs defined by David H. Crocker for RFC 822 [9]. Similarly, it reuses many of the definitions provided by Nathaniel Borenstein and Ned Freed for MIME [7]. We hope that their inclusion in this specification will help reduce past confusion over the relationship between HTTP and Internet mail message formats. The HTTP protocol has evolved considerably over the years. It has benefited from a large and active developer community--the many people who have participated on the www-talk mailing list--and it is that community which has been most responsible for the success of HTTP and of the World-Wide Web in general. Marc Andreessen, Robert Cailliau, Daniel W. Connolly, Bob Denny, John Franks, Jean-Francois Groff, Phillip M. Hallam-Baker, Hakon W. Lie, Ari Luotonen, Rob McCool, Lou Montulli, Dave Raggett, Tony Sanders, and Marc VanHeyningen deserve special recognition for their efforts in defining early aspects of the protocol. This document has benefited greatly from the comments of all those participating in the HTTP-WG. In addition to those already mentioned, the following individuals have contributed to this specification: Fielding, et al. Standards Track [Page 156] RFC 2616 HTTP/1.1 June 1999 Gary Adams Ross Patterson Harald Tveit Alvestrand Albert Lunde Keith Ball John C. Mallery Brian Behlendorf Jean-Philippe Martin-Flatin Paul Burchard Mitra Maurizio Codogno David Morris Mike Cowlishaw Gavin Nicol Roman Czyborra Bill Perry Michael A. Dolan Jeffrey Perry David J. Fiander Scott Powers Alan Freier Owen Rees Marc Hedlund Luigi Rizzo Greg Herlihy David Robinson Koen Holtman Marc Salomon Alex Hopmann Rich Salz Bob Jernigan Allan M. Schiffman Shel Kaphan Jim Seidman Rohit Khare Chuck Shotton John Klensin Eric W. Sink Martijn Koster Simon E. Spero Alexei Kosut Richard N. Taylor David M. Kristol Robert S. Thau Daniel LaLiberte Bill (BearHeart) Weinman Ben Laurie Francois Yergeau Paul J. Leach Mary Ellen Zurko Daniel DuBois Josh Cohen Much of the content and presentation of the caching design is due to suggestions and comments from individuals including: Shel Kaphan, Paul Leach, Koen Holtman, David Morris, and Larry Masinter. Most of the specification of ranges is based on work originally done by Ari Luotonen and John Franks, with additional input from Steve Zilles. Thanks to the "cave men" of Palo Alto. You know who you are. Jim Gettys (the current editor of this document) wishes particularly to thank Roy Fielding, the previous editor of this document, along with John Klensin, Jeff Mogul, Paul Leach, Dave Kristol, Koen Holtman, John Franks, Josh Cohen, Alex Hopmann, Scott Lawrence, and Larry Masinter for their help. And thanks go particularly to Jeff Mogul and Scott Lawrence for performing the "MUST/MAY/SHOULD" audit. Fielding, et al. Standards Track [Page 157] RFC 2616 HTTP/1.1 June 1999 The Apache Group, Anselm Baird-Smith, author of Jigsaw, and Henrik Frystyk implemented RFC 2068 early, and we wish to thank them for the discovery of many of the problems that this document attempts to rectify. 17 References [1] Alvestrand, H., "Tags for the Identification of Languages", RFC 1766, March 1995. [2] Anklesaria, F., McCahill, M., Lindner, P., Johnson, D., Torrey, D. and B. Alberti, "The Internet Gopher Protocol (a distributed document search and retrieval protocol)", RFC 1436, March 1993. [3] Berners-Lee, T., "Universal Resource Identifiers in WWW", RFC 1630, June 1994. [4] Berners-Lee, T., Masinter, L. and M. McCahill, "Uniform Resource Locators (URL)", RFC 1738, December 1994. [5] Berners-Lee, T. and D. Connolly, "Hypertext Markup Language - 2.0", RFC 1866, November 1995. [6] Berners-Lee, T., Fielding, R. and H. Frystyk, "Hypertext Transfer Protocol -- HTTP/1.0", RFC 1945, May 1996. [7] Freed, N. and N. Borenstein, "Multipurpose Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies", RFC 2045, November 1996. [8] Braden, R., "Requirements for Internet Hosts -- Communication Layers", STD 3, RFC 1123, October 1989. [9] Crocker, D., "Standard for The Format of ARPA Internet Text Messages", STD 11, RFC 822, August 1982. [10] Davis, F., Kahle, B., Morris, H., Salem, J., Shen, T., Wang, R., Sui, J., and M. Grinbaum, "WAIS Interface Protocol Prototype Functional Specification," (v1.5), Thinking Machines Corporation, April 1990. [11] Fielding, R., "Relative Uniform Resource Locators", RFC 1808, June 1995. [12] Horton, M. and R. Adams, "Standard for Interchange of USENET Messages", RFC 1036, December 1987. Fielding, et al. Standards Track [Page 158] RFC 2616 HTTP/1.1 June 1999 [13] Kantor, B. and P. Lapsley, "Network News Transfer Protocol", RFC 977, February 1986. [14] Moore, K., "MIME (Multipurpose Internet Mail Extensions) Part Three: Message Header Extensions for Non-ASCII Text", RFC 2047, November 1996. [15] Nebel, E. and L. Masinter, "Form-based File Upload in HTML", RFC 1867, November 1995. [16] Postel, J., "Simple Mail Transfer Protocol", STD 10, RFC 821, August 1982. [17] Postel, J., "Media Type Registration Procedure", RFC 1590, November 1996. [18] Postel, J. and J. Reynolds, "File Transfer Protocol", STD 9, RFC 959, October 1985. [19] Reynolds, J. and J. Postel, "Assigned Numbers", STD 2, RFC 1700, October 1994. [20] Sollins, K. and L. Masinter, "Functional Requirements for Uniform Resource Names", RFC 1737, December 1994. [21] US-ASCII. Coded Character Set - 7-Bit American Standard Code for Information Interchange. Standard ANSI X3.4-1986, ANSI, 1986. [22] ISO-8859. International Standard -- Information Processing -- 8-bit Single-Byte Coded Graphic Character Sets -- Part 1: Latin alphabet No. 1, ISO-8859-1:1987. Part 2: Latin alphabet No. 2, ISO-8859-2, 1987. Part 3: Latin alphabet No. 3, ISO-8859-3, 1988. Part 4: Latin alphabet No. 4, ISO-8859-4, 1988. Part 5: Latin/Cyrillic alphabet, ISO-8859-5, 1988. Part 6: Latin/Arabic alphabet, ISO-8859-6, 1987. Part 7: Latin/Greek alphabet, ISO-8859-7, 1987. Part 8: Latin/Hebrew alphabet, ISO-8859-8, 1988. Part 9: Latin alphabet No. 5, ISO-8859-9, 1990. [23] Meyers, J. and M. Rose, "The Content-MD5 Header Field", RFC 1864, October 1995. [24] Carpenter, B. and Y. Rekhter, "Renumbering Needs Work", RFC 1900, February 1996. [25] Deutsch, P., "GZIP file format specification version 4.3", RFC 1952, May 1996. Fielding, et al. Standards Track [Page 159] RFC 2616 HTTP/1.1 June 1999 [26] Venkata N. Padmanabhan, and Jeffrey C. Mogul. "Improving HTTP Latency", Computer Networks and ISDN Systems, v. 28, pp. 25-35, Dec. 1995. Slightly revised version of paper in Proc. 2nd International WWW Conference '94: Mosaic and the Web, Oct. 1994, which is available at http://www.ncsa.uiuc.edu/SDG/IT94/Proceedings/DDay/mogul/HTTPLat ency.html. [27] Joe Touch, John Heidemann, and Katia Obraczka. "Analysis of HTTP Performance", , ISI Research Report ISI/RR-98-463, (original report dated Aug. 1996), USC/Information Sciences Institute, August 1998. [28] Mills, D., "Network Time Protocol (Version 3) Specification, Implementation and Analysis", RFC 1305, March 1992. [29] Deutsch, P., "DEFLATE Compressed Data Format Specification version 1.3", RFC 1951, May 1996. [30] S. Spero, "Analysis of HTTP Performance Problems," http://sunsite.unc.edu/mdma-release/http-prob.html. [31] Deutsch, P. and J. Gailly, "ZLIB Compressed Data Format Specification version 3.3", RFC 1950, May 1996. [32] Franks, J., Hallam-Baker, P., Hostetler, J., Leach, P., Luotonen, A., Sink, E. and L. Stewart, "An Extension to HTTP: Digest Access Authentication", RFC 2069, January 1997. [33] Fielding, R., Gettys, J., Mogul, J., Frystyk, H. and T. Berners-Lee, "Hypertext Transfer Protocol -- HTTP/1.1", RFC 2068, January 1997. [34] Bradner, S., "Key words for use in RFCs to Indicate Requirement Levels", BCP 14, RFC 2119, March 1997. [35] Troost, R. and Dorner, S., "Communicating Presentation Information in Internet Messages: The Content-Disposition Header", RFC 1806, June 1995. [36] Mogul, J., Fielding, R., Gettys, J. and H. Frystyk, "Use and Interpretation of HTTP Version Numbers", RFC 2145, May 1997. [jg639] [37] Palme, J., "Common Internet Message Headers", RFC 2076, February 1997. [jg640] Fielding, et al. Standards Track [Page 160] RFC 2616 HTTP/1.1 June 1999 [38] Yergeau, F., "UTF-8, a transformation format of Unicode and ISO-10646", RFC 2279, January 1998. [jg641] [39] Nielsen, H.F., Gettys, J., Baird-Smith, A., Prud'hommeaux, E., Lie, H., and C. Lilley. "Network Performance Effects of HTTP/1.1, CSS1, and PNG," Proceedings of ACM SIGCOMM '97, Cannes France, September 1997.[jg642] [40] Freed, N. and N. Borenstein, "Multipurpose Internet Mail Extensions (MIME) Part Two: Media Types", RFC 2046, November 1996. [jg643] [41] Alvestrand, H., "IETF Policy on Character Sets and Languages", BCP 18, RFC 2277, January 1998. [jg644] [42] Berners-Lee, T., Fielding, R. and L. Masinter, "Uniform Resource Identifiers (URI): Generic Syntax and Semantics", RFC 2396, August 1998. [jg645] [43] Franks, J., Hallam-Baker, P., Hostetler, J., Lawrence, S., Leach, P., Luotonen, A., Sink, E. and L. Stewart, "HTTP Authentication: Basic and Digest Access Authentication", RFC 2617, June 1999. [jg646] [44] Luotonen, A., "Tunneling TCP based protocols through Web proxy servers," Work in Progress. [jg647] [45] Palme, J. and A. Hopmann, "MIME E-mail Encapsulation of Aggregate Documents, such as HTML (MHTML)", RFC 2110, March 1997. [46] Bradner, S., "The Internet Standards Process -- Revision 3", BCP 9, RFC 2026, October 1996. [47] Masinter, L., "Hyper Text Coffee Pot Control Protocol (HTCPCP/1.0)", RFC 2324, 1 April 1998. [48] Freed, N. and N. Borenstein, "Multipurpose Internet Mail Extensions (MIME) Part Five: Conformance Criteria and Examples", RFC 2049, November 1996. [49] Troost, R., Dorner, S. and K. Moore, "Communicating Presentation Information in Internet Messages: The Content-Disposition Header Field", RFC 2183, August 1997. Fielding, et al. Standards Track [Page 161] RFC 2616 HTTP/1.1 June 1999 18 Authors' Addresses Roy T. Fielding Information and Computer Science University of California, Irvine Irvine, CA 92697-3425, USA Fax: +1 (949) 824-1715 EMail: fielding@ics.uci.edu James Gettys World Wide Web Consortium MIT Laboratory for Computer Science 545 Technology Square Cambridge, MA 02139, USA Fax: +1 (617) 258 8682 EMail: jg@w3.org Jeffrey C. Mogul Western Research Laboratory Compaq Computer Corporation 250 University Avenue Palo Alto, California, 94305, USA EMail: mogul@wrl.dec.com Henrik Frystyk Nielsen World Wide Web Consortium MIT Laboratory for Computer Science 545 Technology Square Cambridge, MA 02139, USA Fax: +1 (617) 258 8682 EMail: frystyk@w3.org Larry Masinter Xerox Corporation 3333 Coyote Hill Road Palo Alto, CA 94034, USA EMail: masinter@parc.xerox.com Fielding, et al. Standards Track [Page 162] RFC 2616 HTTP/1.1 June 1999 Paul J. Leach Microsoft Corporation 1 Microsoft Way Redmond, WA 98052, USA EMail: paulle@microsoft.com Tim Berners-Lee Director, World Wide Web Consortium MIT Laboratory for Computer Science 545 Technology Square Cambridge, MA 02139, USA Fax: +1 (617) 258 8682 EMail: timbl@w3.org Fielding, et al. Standards Track [Page 163] RFC 2616 HTTP/1.1 June 1999 19 Appendices 19.1 Internet Media Type message/http and application/http In addition to defining the HTTP/1.1 protocol, this document serves as the specification for the Internet media type "message/http" and "application/http". The message/http type can be used to enclose a single HTTP request or response message, provided that it obeys the MIME restrictions for all "message" types regarding line length and encodings. The application/http type can be used to enclose a pipeline of one or more HTTP request or response messages (not intermixed). The following is to be registered with IANA [17]. Media Type name: message Media subtype name: http Required parameters: none Optional parameters: version, msgtype version: The HTTP-Version number of the enclosed message (e.g., "1.1"). If not present, the version can be determined from the first line of the body. msgtype: The message type -- "request" or "response". If not present, the type can be determined from the first line of the body. Encoding considerations: only "7bit", "8bit", or "binary" are permitted Security considerations: none Media Type name: application Media subtype name: http Required parameters: none Optional parameters: version, msgtype version: The HTTP-Version number of the enclosed messages (e.g., "1.1"). If not present, the version can be determined from the first line of the body. msgtype: The message type -- "request" or "response". If not present, the type can be determined from the first line of the body. Encoding considerations: HTTP messages enclosed by this type are in "binary" format; use of an appropriate Content-Transfer-Encoding is required when transmitted via E-mail. Security considerations: none Fielding, et al. Standards Track [Page 164] RFC 2616 HTTP/1.1 June 1999 19.2 Internet Media Type multipart/byteranges When an HTTP 206 (Partial Content) response message includes the content of multiple ranges (a response to a request for multiple non-overlapping ranges), these are transmitted as a multipart message-body. The media type for this purpose is called "multipart/byteranges". The multipart/byteranges media type includes two or more parts, each with its own Content-Type and Content-Range fields. The required boundary parameter specifies the boundary string used to separate each body-part. Media Type name: multipart Media subtype name: byteranges Required parameters: boundary Optional parameters: none Encoding considerations: only "7bit", "8bit", or "binary" are permitted Security considerations: none For example: HTTP/1.1 206 Partial Content Date: Wed, 15 Nov 1995 06:25:24 GMT Last-Modified: Wed, 15 Nov 1995 04:58:08 GMT Content-type: multipart/byteranges; boundary=THIS_STRING_SEPARATES --THIS_STRING_SEPARATES Content-type: application/pdf Content-range: bytes 500-999/8000 ...the first range... --THIS_STRING_SEPARATES Content-type: application/pdf Content-range: bytes 7000-7999/8000 ...the second range --THIS_STRING_SEPARATES-- Notes: 1) Additional CRLFs may precede the first boundary string in the entity. Fielding, et al. Standards Track [Page 165] RFC 2616 HTTP/1.1 June 1999 2) Although RFC 2046 [40] permits the boundary string to be quoted, some existing implementations handle a quoted boundary string incorrectly. 3) A number of browsers and servers were coded to an early draft of the byteranges specification to use a media type of multipart/x-byteranges, which is almost, but not quite compatible with the version documented in HTTP/1.1. 19.3 Tolerant Applications Although this document specifies the requirements for the generation of HTTP/1.1 messages, not all applications will be correct in their implementation. We therefore recommend that operational applications be tolerant of deviations whenever those deviations can be interpreted unambiguously. Clients SHOULD be tolerant in parsing the Status-Line and servers tolerant when parsing the Request-Line. In particular, they SHOULD accept any amount of SP or HT characters between fields, even though only a single SP is required. The line terminator for message-header fields is the sequence CRLF. However, we recommend that applications, when parsing such headers, recognize a single LF as a line terminator and ignore the leading CR. The character set of an entity-body SHOULD be labeled as the lowest common denominator of the character codes used within that body, with the exception that not labeling the entity is preferred over labeling the entity with the labels US-ASCII or ISO-8859-1. See section 3.7.1 and 3.4.1. Additional rules for requirements on parsing and encoding of dates and other potential problems with date encodings include: - HTTP/1.1 clients and caches SHOULD assume that an RFC-850 date which appears to be more than 50 years in the future is in fact in the past (this helps solve the "year 2000" problem). - An HTTP/1.1 implementation MAY internally represent a parsed Expires date as earlier than the proper value, but MUST NOT internally represent a parsed Expires date as later than the proper value. - All expiration-related calculations MUST be done in GMT. The local time zone MUST NOT influence the calculation or comparison of an age or expiration time. Fielding, et al. Standards Track [Page 166] RFC 2616 HTTP/1.1 June 1999 - If an HTTP header incorrectly carries a date value with a time zone other than GMT, it MUST be converted into GMT using the most conservative possible conversion. 19.4 Differences Between HTTP Entities and RFC 2045 Entities HTTP/1.1 uses many of the constructs defined for Internet Mail (RFC 822 [9]) and the Multipurpose Internet Mail Extensions (MIME [7]) to allow entities to be transmitted in an open variety of representations and with extensible mechanisms. However, RFC 2045 discusses mail, and HTTP has a few features that are different from those described in RFC 2045. These differences were carefully chosen to optimize performance over binary connections, to allow greater freedom in the use of new media types, to make date comparisons easier, and to acknowledge the practice of some early HTTP servers and clients. This appendix describes specific areas where HTTP differs from RFC 2045. Proxies and gateways to strict MIME environments SHOULD be aware of these differences and provide the appropriate conversions where necessary. Proxies and gateways from MIME environments to HTTP also need to be aware of the differences because some conversions might be required. 19.4.1 MIME-Version HTTP is not a MIME-compliant protocol. However, HTTP/1.1 messages MAY include a single MIME-Version general-header field to indicate what version of the MIME protocol was used to construct the message. Use of the MIME-Version header field indicates that the message is in full compliance with the MIME protocol (as defined in RFC 2045[7]). Proxies/gateways are responsible for ensuring full compliance (where possible) when exporting HTTP messages to strict MIME environments. MIME-Version = "MIME-Version" ":" 1*DIGIT "." 1*DIGIT MIME version "1.0" is the default for use in HTTP/1.1. However, HTTP/1.1 message parsing and semantics are defined by this document and not the MIME specification. 19.4.2 Conversion to Canonical Form RFC 2045 [7] requires that an Internet mail entity be converted to canonical form prior to being transferred, as described in section 4 of RFC 2049 [48]. Section 3.7.1 of this document describes the forms allowed for subtypes of the "text" media type when transmitted over HTTP. RFC 2046 requires that content with a type of "text" represent line breaks as CRLF and forbids the use of CR or LF outside of line Fielding, et al. Standards Track [Page 167] RFC 2616 HTTP/1.1 June 1999 break sequences. HTTP allows CRLF, bare CR, and bare LF to indicate a line break within text content when a message is transmitted over HTTP. Where it is possible, a proxy or gateway from HTTP to a strict MIME environment SHOULD translate all line breaks within the text media types described in section 3.7.1 of this document to the RFC 2049 canonical form of CRLF. Note, however, that this might be complicated by the presence of a Content-Encoding and by the fact that HTTP allows the use of some character sets which do not use octets 13 and 10 to represent CR and LF, as is the case for some multi-byte character sets. Implementors should note that conversion will break any cryptographic checksums applied to the original content unless the original content is already in canonical form. Therefore, the canonical form is recommended for any content that uses such checksums in HTTP. 19.4.3 Conversion of Date Formats HTTP/1.1 uses a restricted set of date formats (section 3.3.1) to simplify the process of date comparison. Proxies and gateways from other protocols SHOULD ensure that any Date header field present in a message conforms to one of the HTTP/1.1 formats and rewrite the date if necessary. 19.4.4 Introduction of Content-Encoding RFC 2045 does not include any concept equivalent to HTTP/1.1's Content-Encoding header field. Since this acts as a modifier on the media type, proxies and gateways from HTTP to MIME-compliant protocols MUST either change the value of the Content-Type header field or decode the entity-body before forwarding the message. (Some experimental applications of Content-Type for Internet mail have used a media-type parameter of ";conversions=" to perform a function equivalent to Content-Encoding. However, this parameter is not part of RFC 2045.) 19.4.5 No Content-Transfer-Encoding HTTP does not use the Content-Transfer-Encoding (CTE) field of RFC 2045. Proxies and gateways from MIME-compliant protocols to HTTP MUST remove any non-identity CTE ("quoted-printable" or "base64") encoding prior to delivering the response message to an HTTP client. Proxies and gateways from HTTP to MIME-compliant protocols are responsible for ensuring that the message is in the correct format and encoding for safe transport on that protocol, where "safe Fielding, et al. Standards Track [Page 168] RFC 2616 HTTP/1.1 June 1999 transport" is defined by the limitations of the protocol being used. Such a proxy or gateway SHOULD label the data with an appropriate Content-Transfer-Encoding if doing so will improve the likelihood of safe transport over the destination protocol. 19.4.6 Introduction of Transfer-Encoding HTTP/1.1 introduces the Transfer-Encoding header field (section 14.41). Proxies/gateways MUST remove any transfer-coding prior to forwarding a message via a MIME-compliant protocol. A process for decoding the "chunked" transfer-coding (section 3.6) can be represented in pseudo-code as: length := 0 read chunk-size, chunk-extension (if any) and CRLF while (chunk-size > 0) { read chunk-data and CRLF append chunk-data to entity-body length := length + chunk-size read chunk-size and CRLF } read entity-header while (entity-header not empty) { append entity-header to existing header fields read entity-header } Content-Length := length Remove "chunked" from Transfer-Encoding 19.4.7 MHTML and Line Length Limitations HTTP implementations which share code with MHTML [45] implementations need to be aware of MIME line length limitations. Since HTTP does not have this limitation, HTTP does not fold long lines. MHTML messages being transported by HTTP follow all conventions of MHTML, including line length limitations and folding, canonicalization, etc., since HTTP transports all message-bodies as payload (see section 3.7.2) and does not interpret the content or any MIME header lines that might be contained therein. 19.5 Additional Features RFC 1945 and RFC 2068 document protocol elements used by some existing HTTP implementations, but not consistently and correctly across most HTTP/1.1 applications. Implementors are advised to be aware of these features, but cannot rely upon their presence in, or interoperability with, other HTTP/1.1 applications. Some of these Fielding, et al. Standards Track [Page 169] RFC 2616 HTTP/1.1 June 1999 describe proposed experimental features, and some describe features that experimental deployment found lacking that are now addressed in the base HTTP/1.1 specification. A number of other headers, such as Content-Disposition and Title, from SMTP and MIME are also often implemented (see RFC 2076 [37]). 19.5.1 Content-Disposition The Content-Disposition response-header field has been proposed as a means for the origin server to suggest a default filename if the user requests that the content is saved to a file. This usage is derived from the definition of Content-Disposition in RFC 1806 [35]. content-disposition = "Content-Disposition" ":" disposition-type *( ";" disposition-parm ) disposition-type = "attachment" | disp-extension-token disposition-parm = filename-parm | disp-extension-parm filename-parm = "filename" "=" quoted-string disp-extension-token = token disp-extension-parm = token "=" ( token | quoted-string ) An example is Content-Disposition: attachment; filename="fname.ext" The receiving user agent SHOULD NOT respect any directory path information present in the filename-parm parameter, which is the only parameter believed to apply to HTTP implementations at this time. The filename SHOULD be treated as a terminal component only. If this header is used in a response with the application/octet- stream content-type, the implied suggestion is that the user agent should not display the response, but directly enter a `save response as...' dialog. See section 15.5 for Content-Disposition security issues. 19.6 Compatibility with Previous Versions It is beyond the scope of a protocol specification to mandate compliance with previous versions. HTTP/1.1 was deliberately designed, however, to make supporting previous versions easy. It is worth noting that, at the time of composing this specification (1996), we would expect commercial HTTP/1.1 servers to: - recognize the format of the Request-Line for HTTP/0.9, 1.0, and 1.1 requests; Fielding, et al. Standards Track [Page 170] RFC 2616 HTTP/1.1 June 1999 - understand any valid request in the format of HTTP/0.9, 1.0, or 1.1; - respond appropriately with a message in the same major version used by the client. And we would expect HTTP/1.1 clients to: - recognize the format of the Status-Line for HTTP/1.0 and 1.1 responses; - understand any valid response in the format of HTTP/0.9, 1.0, or 1.1. For most implementations of HTTP/1.0, each connection is established by the client prior to the request and closed by the server after sending the response. Some implementations implement the Keep-Alive version of persistent connections described in section 19.7.1 of RFC 2068 [33]. 19.6.1 Changes from HTTP/1.0 This section summarizes major differences between versions HTTP/1.0 and HTTP/1.1. 19.6.1.1 Changes to Simplify Multi-homed Web Servers and Conserve IP Addresses The requirements that clients and servers support the Host request- header, report an error if the Host request-header (section 14.23) is missing from an HTTP/1.1 request, and accept absolute URIs (section 5.1.2) are among the most important changes defined by this specification. Older HTTP/1.0 clients assumed a one-to-one relationship of IP addresses and servers; there was no other established mechanism for distinguishing the intended server of a request than the IP address to which that request was directed. The changes outlined above will allow the Internet, once older HTTP clients are no longer common, to support multiple Web sites from a single IP address, greatly simplifying large operational Web servers, where allocation of many IP addresses to a single host has created serious problems. The Internet will also be able to recover the IP addresses that have been allocated for the sole purpose of allowing special-purpose domain names to be used in root-level HTTP URLs. Given the rate of growth of the Web, and the number of servers already deployed, it is extremely Fielding, et al. Standards Track [Page 171] RFC 2616 HTTP/1.1 June 1999 important that all implementations of HTTP (including updates to existing HTTP/1.0 applications) correctly implement these requirements: - Both clients and servers MUST support the Host request-header. - A client that sends an HTTP/1.1 request MUST send a Host header. - Servers MUST report a 400 (Bad Request) error if an HTTP/1.1 request does not include a Host request-header. - Servers MUST accept absolute URIs. 19.6.2 Compatibility with HTTP/1.0 Persistent Connections Some clients and servers might wish to be compatible with some previous implementations of persistent connections in HTTP/1.0 clients and servers. Persistent connections in HTTP/1.0 are explicitly negotiated as they are not the default behavior. HTTP/1.0 experimental implementations of persistent connections are faulty, and the new facilities in HTTP/1.1 are designed to rectify these problems. The problem was that some existing 1.0 clients may be sending Keep-Alive to a proxy server that doesn't understand Connection, which would then erroneously forward it to the next inbound server, which would establish the Keep-Alive connection and result in a hung HTTP/1.0 proxy waiting for the close on the response. The result is that HTTP/1.0 clients must be prevented from using Keep-Alive when talking to proxies. However, talking to proxies is the most important use of persistent connections, so that prohibition is clearly unacceptable. Therefore, we need some other mechanism for indicating a persistent connection is desired, which is safe to use even when talking to an old proxy that ignores Connection. Persistent connections are the default for HTTP/1.1 messages; we introduce a new keyword (Connection: close) for declaring non-persistence. See section 14.10. The original HTTP/1.0 form of persistent connections (the Connection: Keep-Alive and Keep-Alive header) is documented in RFC 2068. [33] 19.6.3 Changes from RFC 2068 This specification has been carefully audited to correct and disambiguate key word usage; RFC 2068 had many problems in respect to the conventions laid out in RFC 2119 [34]. Clarified which error code should be used for inbound server failures (e.g. DNS failures). (Section 10.5.5). Fielding, et al. Standards Track [Page 172] RFC 2616 HTTP/1.1 June 1999 CREATE had a race that required an Etag be sent when a resource is first created. (Section 10.2.2). Content-Base was deleted from the specification: it was not implemented widely, and there is no simple, safe way to introduce it without a robust extension mechanism. In addition, it is used in a similar, but not identical fashion in MHTML [45]. Transfer-coding and message lengths all interact in ways that required fixing exactly when chunked encoding is used (to allow for transfer encoding that may not be self delimiting); it was important to straighten out exactly how message lengths are computed. (Sections 3.6, 4.4, 7.2.2, 13.5.2, 14.13, 14.16) A content-coding of "identity" was introduced, to solve problems discovered in caching. (section 3.5) Quality Values of zero should indicate that "I don't want something" to allow clients to refuse a representation. (Section 3.9) The use and interpretation of HTTP version numbers has been clarified by RFC 2145. Require proxies to upgrade requests to highest protocol version they support to deal with problems discovered in HTTP/1.0 implementations (Section 3.1) Charset wildcarding is introduced to avoid explosion of character set names in accept headers. (Section 14.2) A case was missed in the Cache-Control model of HTTP/1.1; s-maxage was introduced to add this missing case. (Sections 13.4, 14.8, 14.9, 14.9.3) The Cache-Control: max-age directive was not properly defined for responses. (Section 14.9.3) There are situations where a server (especially a proxy) does not know the full length of a response but is capable of serving a byterange request. We therefore need a mechanism to allow byteranges with a content-range not indicating the full length of the message. (Section 14.16) Range request responses would become very verbose if all meta-data were always returned; by allowing the server to only send needed headers in a 206 response, this problem can be avoided. (Section 10.2.7, 13.5.3, and 14.27) Fielding, et al. Standards Track [Page 173] RFC 2616 HTTP/1.1 June 1999 Fix problem with unsatisfiable range requests; there are two cases: syntactic problems, and range doesn't exist in the document. The 416 status code was needed to resolve this ambiguity needed to indicate an error for a byte range request that falls outside of the actual contents of a document. (Section 10.4.17, 14.16) Rewrite of message transmission requirements to make it much harder for implementors to get it wrong, as the consequences of errors here can have significant impact on the Internet, and to deal with the following problems: 1. Changing "HTTP/1.1 or later" to "HTTP/1.1", in contexts where this was incorrectly placing a requirement on the behavior of an implementation of a future version of HTTP/1.x 2. Made it clear that user-agents should retry requests, not "clients" in general. 3. Converted requirements for clients to ignore unexpected 100 (Continue) responses, and for proxies to forward 100 responses, into a general requirement for 1xx responses. 4. Modified some TCP-specific language, to make it clearer that non-TCP transports are possible for HTTP. 5. Require that the origin server MUST NOT wait for the request body before it sends a required 100 (Continue) response. 6. Allow, rather than require, a server to omit 100 (Continue) if it has already seen some of the request body. 7. Allow servers to defend against denial-of-service attacks and broken clients. This change adds the Expect header and 417 status code. The message transmission requirements fixes are in sections 8.2, 10.4.18, 8.1.2.2, 13.11, and 14.20. Proxies should be able to add Content-Length when appropriate. (Section 13.5.2) Clean up confusion between 403 and 404 responses. (Section 10.4.4, 10.4.5, and 10.4.11) Warnings could be cached incorrectly, or not updated appropriately. (Section 13.1.2, 13.2.4, 13.5.2, 13.5.3, 14.9.3, and 14.46) Warning also needed to be a general header, as PUT or other methods may have need for it in requests. Fielding, et al. Standards Track [Page 174] RFC 2616 HTTP/1.1 June 1999 Transfer-coding had significant problems, particularly with interactions with chunked encoding. The solution is that transfer- codings become as full fledged as content-codings. This involves adding an IANA registry for transfer-codings (separate from content codings), a new header field (TE) and enabling trailer headers in the future. Transfer encoding is a major performance benefit, so it was worth fixing [39]. TE also solves another, obscure, downward interoperability problem that could have occurred due to interactions between authentication trailers, chunked encoding and HTTP/1.0 clients.(Section 3.6, 3.6.1, and 14.39) The PATCH, LINK, UNLINK methods were defined but not commonly implemented in previous versions of this specification. See RFC 2068 [33]. The Alternates, Content-Version, Derived-From, Link, URI, Public and Content-Base header fields were defined in previous versions of this specification, but not commonly implemented. See RFC 2068 [33]. 20 Index Please see the PostScript version of this RFC for the INDEX. Fielding, et al. Standards Track [Page 175] RFC 2616 HTTP/1.1 June 1999 21. Full Copyright Statement Copyright (C) The Internet Society (1999). All Rights Reserved. This document and translations of it may be copied and furnished to others, and derivative works that comment on or otherwise explain it or assist in its implementation may be prepared, copied, published and distributed, in whole or in part, without restriction of any kind, provided that the above copyright notice and this paragraph are included on all such copies and derivative works. However, this document itself may not be modified in any way, such as by removing the copyright notice or references to the Internet Society or other Internet organizations, except as needed for the purpose of developing Internet standards in which case the procedures for copyrights defined in the Internet Standards process must be followed, or as required to translate it into languages other than English. The limited permissions granted above are perpetual and will not be revoked by the Internet Society or its successors or assigns. This document and the information contained herein is provided on an "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Acknowledgement Funding for the RFC Editor function is currently provided by the Internet Society. Fielding, et al. Standards Track [Page 176] ================================================ FILE: sample/ch11/tiny/tiny.c ================================================ /* * tiny.c - A simple, iterative HTTP/1.0 Web server that uses the * GET method to serve static and dynamic content. * * unix> cc -I../../../common ../../../common/csapp.c tiny.c -lpthread -o tiny */ #include "csapp.h" void doit(int fd); void read_requesthdrs(rio_t *rp); int parse_uri(char *uri, char *filename, char *cgiargs); void serve_static(int fd, char *filename, int filesize); void get_filetype(char *filename, char *filetype); void serve_dynamic(int fd, char *filename, char *cgiargs); void clienterror(int fd, char *cause, char *errnum, char *shortmsg, char *longmsg); int main(int argc, char **argv) { int listenfd, connfd, port, clientlen; struct sockaddr_in clientaddr; /* Check command line args */ if (argc != 2) { fprintf(stderr, "usage: %s \n", argv[0]); exit(1); } port = atoi(argv[1]); listenfd = Open_listenfd(port); while (1) { clientlen = sizeof(clientaddr); connfd = Accept(listenfd, (SA *)&clientaddr, &clientlen); doit(connfd); Close(connfd); } } void doit(int fd) { int is_static; struct stat sbuf; char buf[MAXLINE], method[MAXLINE], uri[MAXLINE], version[MAXLINE]; char filename[MAXLINE], cgiargs[MAXLINE]; rio_t rio; /* Read request line and headers */ Rio_readinitb(&rio, fd); Rio_readlineb(&rio, buf, MAXLINE); sscanf(buf, "%s %s %s", method, uri, version); if (strcasecmp(method, "GET")) { clienterror(fd, method, "501", "Not implemented", "Tiny does not implement this method"); return; } read_requesthdrs(&rio); /* Parse URI from GET request */ is_static = parse_uri(uri, filename, cgiargs); if (stat(filename, &sbuf) < 0) { clienterror(fd, filename, "404", "Not found", "Tiny couldn't read the file"); return; } if (is_static) { /* Serve static content */ if (!(S_ISREG(sbuf.st_mode)) || !(S_IRUSR & sbuf.st_mode)) { clienterror(fd, filename, "403", "Forbidden", "Tiny couldn't read the filetype"); return; } serve_static(fd, filename, sbuf.st_size); } else { /* Serve dynamic content */ if (!(S_ISREG(sbuf.st_mode)) || !(S_IXUSR & sbuf.st_mode)) { clienterror(fd, filename, "403", "Forbidden", "Tiny couldn't run the CGI program"); return; } serve_dynamic(fd, filename, cgiargs); } } void clienterror(int fd, char *cause, char *errnum, char *shortmsg, char *longmsg) { char buf[MAXLINE], body[MAXBUF]; /* Build the HTTP response body */ sprintf(body, "Tiny Error"); sprintf(body, "%s\r\n", body); sprintf(body, "%s%s: %s\r\n", body, errnum, shortmsg); sprintf(body, "%s

%s: %s\r\n", body, longmsg, cause); sprintf(body, "%s


The Tiny Web server\r\n", body); /* Print the HTTP response */ sprintf(buf, "HTTP/1.0 %s %s\r\n", errnum, shortmsg); Rio_writen(fd, buf, strlen(buf)); sprintf(buf, "Content-type: text/html\r\n"); Rio_writen(fd, buf, strlen(buf)); sprintf(buf, "Content-length: %d\r\n\r\n", (int)strlen(body)); Rio_writen(fd, buf, strlen(buf)); Rio_writen(fd, body, strlen(body)); } void read_requesthdrs(rio_t *rp) { char buf[MAXLINE]; Rio_readlineb(rp, buf, MAXLINE); while (strcmp(buf, "\r\n")) { Rio_readlineb(rp, buf, MAXLINE); printf("%s", buf); } return; } int parse_uri(char *uri, char *filename, char *cgiargs) { char *ptr; if (!strstr(uri, "cgi-bin")) { /* Static content */ strcpy(cgiargs, ""); strcpy(filename, "."); strcat(filename, uri); if (uri[strlen(uri)-1] == '/') strcat(filename, "home.html"); return 1; } else { /* Dynamic content */ ptr = index(uri, '?'); if (ptr) { strcpy(cgiargs, ptr+1); *ptr = '\0'; } else strcpy(cgiargs, ""); strcpy(filename, "."); strcat(filename, uri); return 0; } } void serve_static(int fd, char *filename, int filesize) { int srcfd; char *srcp, filetype[MAXLINE], buf[MAXBUF]; /* Send response headers to client */ get_filetype(filename, filetype); sprintf(buf, "HTTP/1.0 200 OK\r\n"); sprintf(buf, "%sServer: Tiny Web Server\r\n", buf); sprintf(buf, "%sContent-length: %d\r\n", buf, filesize); sprintf(buf, "%sContent-type: %s\r\n\r\n", buf, filetype); Rio_writen(fd, buf, strlen(buf)); /* Send response body to client */ srcfd = Open(filename, O_RDONLY, 0); srcp = Mmap(0, filesize, PROT_READ, MAP_PRIVATE, srcfd, 0); Close(srcfd); Rio_writen(fd, srcp, filesize); Munmap(srcp, filesize); } /* * get_filetype - derive file type from name */ void get_filetype(char *filename, char *filetype) { if (strstr(filename, ".html")) strcpy(filetype, "text/html"); else if (strstr(filename, ".gif")) strcpy(filetype, "image/gif"); else if (strstr(filename, ".jpg")) strcpy(filetype, "image/jpeg"); else strcpy(filetype, "text/plain"); } void serve_dynamic(int fd, char *filename, char *cgiargs) { char buf[MAXLINE], *emptylist[] = { NULL }; /* Return first part of HTTP response */ sprintf(buf, "HTTP/1.0 200 OK\r\n"); Rio_writen(fd, buf, strlen(buf)); sprintf(buf, "Server: Tiny Web Server\r\n"); Rio_writen(fd, buf, strlen(buf)); if (Fork() == 0) { /* child */ /* Real server would set all CGI vars here */ setenv("QUERY_STRING", cgiargs, 1); Dup2(fd, STDOUT_FILENO); /* Redirect stdout to client */ Execve(filename, emptylist, environ); /* Run CGI program */ } Wait(NULL); /* Parent waits for and reaps child */ } ================================================ FILE: sample/ch12/Makefile ================================================ # Makefile for csapp chapter 12 # mofaph@gmail.com # 2013-8-10 CC = gcc CFLAGS = -Wall INCLUDES += -I../../common LIBS += -lpthread OBJS += csapp.o OBJS += hello.o OBJS += echoservert.o OBJS += echo.o OBJS += sharing.o OBJS += badcnt.o OBJS += goodcnt.o OBJS += sbuf.o OBJS += echoservert_pre.o OBJS += echo_cnt.o OBJS += echoserverp.o OBJS += select.o OBJS += echoservers.o PROGRAMS += hello PROGRAMS += echoservert PROGRAMS += sharing PROGRAMS += badcnt PROGRAMS += goodcnt PROGRAMS += echoservert_pre PROGRAMS += echoserverp PROGRAMS += select PROGRAMS += echoservers PHONY += all PHONY += clean csapp_h = ../../common/csapp.h csapp_c = ../../common/csapp.c .PHONY: $(PHONY) all: $(PROGRAMS) hello: hello.o csapp.o $(CC) $(CFLAGS) $^ $(LIBS) -o $@ hello.o: hello.c $(csapp_h) $(CC) $(CFLAGS) $(INCLUDES) -c $< -o $@ csapp.o: $(csapp_c) $(csapp_h) $(CC) $(CFLAGS) $(INCLUDES) -c $< -o $@ echoservert: echoservert.o csapp.o echo.o $(CC) $(CFLAGS) $^ $(LIBS) -o $@ echoservert.o: echoservert.c $(csapp_h) $(CC) $(CFLAGS) $(INCLUDES) -c $< -o $@ echo.o: ../ch11/echo.c $(csapp_h) $(CC) $(CFLAGS) $(INCLUDES) -c $< -o $@ sharing: sharing.o csapp.o $(CC) $(CFLAGS) $^ $(LIBS) -o $@ sharing.o: sharing.c $(csapp_h) $(CC) $(CFLAGS) $(INCLUDES) -c $< -o $@ badcnt: badcnt.o csapp.o $(CC) $(CFLAGS) $^ $(LIBS) -o $@ badcnt.o: badcnt.c $(csapp_h) $(CC) $(CFLAGS) $(INCLUDES) -c $< -o $@ goodcnt: goodcnt.o csapp.o $(CC) $(CFLAGS) $^ $(LIBS) -o $@ goodcnt.o: goodcnt.c $(csapp_h) $(CC) $(CFLAGS) $(INCLUDES) -c $< -o $@ echoservert_pre: echoservert_pre.o echo_cnt.o sbuf.o csapp.o $(CC) $(CFLAGS) $^ $(LIBS) -o $@ echoservert_pre.o: echoservert_pre.c $(csapp_h) sbuf.h $(CC) $(CFLAGS) $(INCLUDES) -c $< -o $@ echo_cnt.o: echo_cnt.c $(csapp_h) $(CC) $(CFLAGS) $(INCLUDES) -c $< -o $@ sbuf.o: sbuf.c sbuf.h $(CC) $(CFLAGS) $(INCLUDES) -c $< -o $@ echoserverp: echoserverp.o echo.o csapp.o $(CC) $(CFLAGS) $^ $(LIBS) -o $@ echoserverp.o: echoserverp.c $(csapp_h) $(CC) $(CFLAGS) $(INCLUDES) -c $< -o $@ select: select.o echo.o csapp.o $(CC) $(CFLAGS) $^ $(LIBS) -o $@ select.o: select.c $(csapp_h) $(CC) $(CFLAGS) $(INCLUDES) -c $< -o $@ echoservers: echoservers.o csapp.o $(CC) $(CFLAGS) $^ $(LIBS) -o $@ echoservers.o: echoservers.c $(csapp_h) $(CC) $(CFLAGS) $(INCLUDES) -c $< -o $@ clean: rm -f $(OBJS) $(PROGRAMS) ================================================ FILE: sample/ch12/badcnt.c ================================================ /* * p664 -- code/conc/badcnt.c * * 下面的代码展示了多线程程序中的共享变量的同步错误问题。它创建两个线程,每个线程 * 都对共享计数变量 cnt 加 1.因为每个线程都对计数器增加了 niters 次,我们预计它的 * 最终值是 2×niters。这看上去简单而直接。然后,在 Linux 系统上运行这个程序时, * 我们不仅得到错误的答案,而且每次得到的答案都还不相同。 * * 在类 UNIX 系统中编译和运行: * * unix> make badcnt * unix> ./badcnt */ #include "csapp.h" void *thread(void *vargp); /* Thread routine prototype */ /* Global shared variable */ volatile int cnt = 0; /* Counter */ int main(int argc, char **argv) { int niters; pthread_t tid1, tid2; /* Check input argument */ if (argc != 2) { printf("usage: %s \n", argv[0]); exit(0); } niters = atoi(argv[1]); /* Created threads and wait for them to finish */ Pthread_create(&tid1, NULL, thread, &niters); Pthread_create(&tid2, NULL, thread, &niters); Pthread_join(tid1, NULL); Pthread_join(tid2, NULL); /* Check result */ if (cnt != (2 * niters)) printf("BOOM! cnt=%d\n", cnt); else printf("OK cnt=%d\n", cnt); exit(0); } /* Thread routine */ void *thread(void *vargp) { int i, niters = *((int *)vargp); for (i = 0; i < niters; i++) cnt++; return NULL; } ================================================ FILE: sample/ch12/echo_cnt.c ================================================ /* * p675 -- code/conc/echo_cnt.c * * 从客户端接收的所有字节计数 * * 下面的代码展示了一个从线程例程调用的初始化程序包的一般技术。 */ #include "csapp.h" static int byte_cnt; /* Byte counter */ static sem_t mutex; /* and the mutex that protects it */ static void init_echo_cnt(void) { Sem_init(&mutex, 0, 1); byte_cnt = 0; } void echo_cnt(int connfd) { int n; char buf[MAXLINE]; rio_t rio; static pthread_once_t once = PTHREAD_ONCE_INIT; Pthread_once(&once, init_echo_cnt); Rio_readinitb(&rio, connfd); while ((n = Rio_readlineb(&rio, buf, MAXLINE)) != 0) { P(&mutex); byte_cnt += n; printf("thread %d receive %d (%d total) bytes on fd %d\n", (int)pthread_self(), n, byte_cnt, connfd); V(&mutex); Rio_writen(connfd, buf, n); } } ================================================ FILE: sample/ch12/echoserverp.c ================================================ /* * p650 -- code/conc/echoserverp.c * * 基于进程的并发 echo 服务器。父进程派生一个子进程来处理每个新的连接请求 * * 在 UNIX 系统下编译和运行这个程序: * * unix> make echoserverp * unix> ./echoserverp 16384 */ #include "csapp.h" void echo(int connfd); void sigchld_handler(int sig) { while (waitpid(-1, 0, WNOHANG) > 0) ; return; } int main(int argc, char **argv) { int listenfd, connfd, port; socklen_t clientlen = sizeof(struct sockaddr_in); struct sockaddr_in clientaddr; if (argc != 2) { fprintf(stderr, "usage: %s \n", argv[0]); exit(0); } port = atoi(argv[1]); Signal(SIGCHLD, sigchld_handler); listenfd = Open_listenfd(port); while (1) { connfd = Accept(listenfd, (SA *) &clientaddr, &clientlen); if (Fork() == 0) { Close(listenfd); /* Child closes its listening socket */ echo(connfd); /* Child services client */ Close(connfd); /* Child closes connection with client */ exit(0); } Close(connfd); /* Parent closes connected socket (important!) */ } return 0; /* Avoid compiler warning */ } ================================================ FILE: sample/ch12/echoservers.c ================================================ /* * p654~p656 * * 基于 I/O 多路复用的并发 echo 服务器。每次服务器迭代都回送来自每个准备好的描述 * 符的文本行 * * unix> make echoservers * unix> ./echoservers */ #include "csapp.h" typedef struct { /* Represents a pool of connected descriptors */ int maxfd; /* Largest descriptor in read_set */ fd_set read_set; /* Set of all active descriptors */ fd_set ready_set; /* Subset of descriptors ready for reading */ int nready; /* Number of ready descriptors from select */ int maxi; /* Highwater index into client array */ int clientfd[FD_SETSIZE]; /* Set of active descriptors */ rio_t clientrio[FD_SETSIZE]; /* Set of active read buffers */ } pool; int byte_cnt = 0; /* Counts total bytes received by server */ void init_pool(int listenfd, pool *p) { /* Initially, there are no connected descriptors */ int i; p->maxi = -1; for (i = 0; i < FD_SETSIZE; i++) p->clientfd[i] = -1; /* Initially, listenfd is only member of select read set */ p->maxfd = listenfd; FD_ZERO(&p->read_set); FD_SET(listenfd, &p->read_set); } void add_client(int connfd, pool *p) { int i; p->nready--; for (i = 0; i < FD_SETSIZE; i++) /* Find an available slot */ if (p->clientfd[i] < 0) { /* Add connected descriptor to the pool */ p->clientfd[i] = connfd; Rio_readinitb(&p->clientrio[i], connfd); /* Add the descriptor to descriptor set */ FD_SET(connfd, &p->read_set); /* Update max descriptor and pool highwater mark */ if (connfd > p->maxfd) p->maxfd = connfd; if (i > p->maxi) p->maxi = i; break; } if (i == FD_SETSIZE) /* Couldn't find an empty slot */ app_error("add_client error: Too many clients"); } void check_client(pool *p) { int i, connfd, n; char buf[MAXLINE]; rio_t rio; for (i = 0; (i <= p->maxi) && (p->nready > 0); i++) { connfd = p->clientfd[i]; rio = p->clientrio[i]; /* If the descriptor is ready, echo a text line from it */ if ((connfd > 0) && (FD_ISSET(connfd, &p->ready_set))) { p->nready--; if ((n = Rio_readlineb(&rio, buf, MAXLINE)) != 0) { byte_cnt += n; printf("Server received %d (%d total) bytes on fd %d\n", n, byte_cnt, connfd); Rio_writen(connfd, buf, n); } /* EOF detected, remove descriptor from pool */ else { Close(connfd); FD_CLR(connfd, &p->read_set); p->clientfd[i] = -1; } } } } int main(int argc, char **argv) { int listenfd, connfd, port; socklen_t clientlen = sizeof(struct sockaddr_in); struct sockaddr_in clientaddr; static pool pool; if (argc != 2) { fprintf(stderr, "usage: %s \n", argv[0]); exit(0); } port = atoi(argv[1]); listenfd = Open_listenfd(port); init_pool(listenfd, &pool); while (1) { /* Wait for listening/connected descriptor(s) to become ready */ pool.ready_set = pool.read_set; pool.nready = Select(pool.maxfd+1, &pool.ready_set, NULL, NULL, NULL); /* If listening descriptor ready, add new client to pool */ if (FD_ISSET(listenfd, &pool.ready_set)) { connfd = Accept(listenfd, (SA *)&clientaddr, &clientlen); add_client(connfd, &pool); } /* Echo a text line from each ready connected descriptor */ check_client(&pool); } } ================================================ FILE: sample/ch12/echoservert.c ================================================ /* * p661 -- code/conc/echoservert.c * * 在 UNIX 系统下编译和运行这段代码: * * unix> make echoservert * unix> ./echoservert 8192 */ #include "csapp.h" void echo(int connfd); void *thread(void *vargp); int main(int argc, char **argv) { int listenfd, *connfdp, port; socklen_t clientlen = sizeof(struct sockaddr_in); struct sockaddr_in clientaddr; pthread_t tid; if (argc != 2) { fprintf(stderr, "usage: %s \n", argv[0]); exit(0); } port = atoi(argv[1]); listenfd = Open_listenfd(port); while (1) { connfdp = Malloc(sizeof(int)); *connfdp = Accept(listenfd, (SA *) &clientaddr, &clientlen); Pthread_create(&tid, NULL, thread, connfdp); } } /* Thread routine */ void *thread(void *vargp) { int connfd = *((int *)vargp); Pthread_detach(pthread_self()); Free(vargp); echo(connfd); Close(connfd); return NULL; } ================================================ FILE: sample/ch12/echoservert_pre.c ================================================ /* * p674 -- code/conc/echoservert_pre.c * * 基于预线程化的并发服务器 * * 服务器是由一个主线程和一组工作者线程构成的。主线程不断地接受来自客户端的连接请 * 求,并将得到的连接描述福放在一个有限缓冲区中。每一个工作者线程反复地从共享缓冲 * 区中取出描述符,为客户端服务,然后等待下一个描述符。 * * 在类 UNIX 系统下编译和运行这个程序: * * unix> make echoserver_pre * unix> ./echoserver_pre 8192 # server * * # client 1 * unix> telnet 127.0.0.1 8192 * unix> hello # press return, and see what server output */ #include "csapp.h" #include "sbuf.h" #define NTHREADS 4 #define SBUFSIZE 16 void echo_cnt(int connfd); void *thread(void *vargp); sbuf_t sbuf; /* Shared buffer of connected descriptors */ int main(int argc, char **argv) { int i, listenfd, connfd, port; socklen_t clientlen = sizeof(struct sockaddr_in); struct sockaddr_in clientaddr; pthread_t tid; if (argc != 2) { fprintf(stderr, "usage: %s \n", argv[0]); exit(0); } port = atoi(argv[1]); sbuf_init(&sbuf, SBUFSIZE); listenfd = Open_listenfd(port); for (i = 0; i < NTHREADS; i++) /* Create worker threads */ Pthread_create(&tid, NULL, thread, NULL); while (1) { connfd = Accept(listenfd, (SA *) &clientaddr, &clientlen); sbuf_insert(&sbuf, connfd); /* Insert connfd in buffer */ } return 0; /* fixed compiler warning */ } void *thread(void *vargp) { Pthread_detach(pthread_self()); while (1) { int connfd = sbuf_remove(&sbuf); /* Remove connfd from buffer */ echo_cnt(connfd); /* Service client */ Close(connfd); } } ================================================ FILE: sample/ch12/goodcnt.c ================================================ /* * p669 * * 使用信号量来实现互斥 * * 在类 UNIX 系统中编译和运行: * * unix> make goodcnt * unix> ./goodcnt */ #include "csapp.h" void *thread(void *vargp); /* Thread routine prototype */ /* Global shared variable */ volatile int cnt = 0; /* Counter */ sem_t mutex; /* Semaphore that protects counter */ int main(int argc, char **argv) { int niters; pthread_t tid1, tid2; /* Check input argument */ if (argc != 2) { printf("usage: %s \n", argv[0]); exit(0); } niters = atoi(argv[1]); Sem_init(&mutex, 0, 1); /* mutex = 1 */ /* Created threads and wait for them to finish */ Pthread_create(&tid1, NULL, thread, &niters); Pthread_create(&tid2, NULL, thread, &niters); Pthread_join(tid1, NULL); Pthread_join(tid2, NULL); /* Check result */ if (cnt != (2 * niters)) printf("BOOM! cnt=%d\n", cnt); else printf("OK cnt=%d\n", cnt); exit(0); } /* Thread routine */ void *thread(void *vargp) { int i, niters = *((int *)vargp); for (i = 0; i < niters; i++) { P(&mutex); cnt++; V(&mutex); } return NULL; } ================================================ FILE: sample/ch12/hello.c ================================================ /* * p658 -- code/conc/hello.c * * 下面的代码展示了一个简单的 Pthreads 程序。主线程创建一个对等线程,然后等待它的 * 终止。对等线程输出“Hello, world\n”并且终止。当主线程检测到对等线程终止后,它 * 就通过调用 exit() 终止该进程。 * * unix> cc -Wall -I../../common hello.c ../../common/csapp.c -lpthread -o hello * unix> ./hello */ #include "csapp.h" void *thread(void *argvp); int main() { pthread_t tid; Pthread_create(&tid, NULL, thread, NULL); Pthread_join(tid, NULL); exit(0); } void *thread(void *argvp) /* Thread routine */ { printf("Hello, world!\n"); return NULL; } ================================================ FILE: sample/ch12/sbuf.c ================================================ /* * p671~p672 -- code/conc/sbuf.c */ #include "csapp.h" #include "sbuf.h" /* Create an empty, bounded, shared FIFO buffer with n slots */ void sbuf_init(sbuf_t *sp, int n) { sp->buf = Calloc(n, sizeof(int)); sp->n = n; /* Buffer holds max of n items */ sp->front = sp->rear = 0; /* Empty buffer if front == rear */ Sem_init(&sp->mutex, 0, 1); /* Binary semaphore for locking */ Sem_init(&sp->slots, 0, n); /* Initially, buf has n empty slots */ Sem_init(&sp->items, 0, 0); /* Initially, buf has zero data items */ } /* Clean up buffer sp */ void sbuf_deinit(sbuf_t *sp) { Free(sp->buf); } /* Insert item onto the rear of shared buffer sp */ void sbuf_insert(sbuf_t *sp, int item) { P(&sp->slots); /* Wait for available slot */ P(&sp->mutex); /* Lock the buffer */ sp->buf[(++sp->rear) % (sp->n)] = item; /* Insert the item */ V(&sp->mutex); /* Unlock the buffer */ V(&sp->items); /* Announce available item */ } /* Remove and return the first item from buffer sp */ int sbuf_remove(sbuf_t *sp) { int item; P(&sp->items); /* Wait for available item */ P(&sp->mutex); /* Lock the buffer */ item = sp->buf[(++sp->front) % (sp->n)]; /* Remove the item */ V(&sp->mutex); /* Unlock the buffer */ V(&sp->slots); /* Announce available slot */ return item; } ================================================ FILE: sample/ch12/sbuf.h ================================================ /* * p671 -- code/conc/sbuf.h */ #ifndef __SBUF_H__ #define __SBUF_H__ #include typedef struct { int *buf; /* Buffer array */ int n; /* Maximum number of slots */ int front; /* buf[(front+1)%n] is first item */ int rear; /* buf[rear%n] is last item */ sem_t mutex; /* Protects accesses to buf */ sem_t slots; /* Counts available slots */ sem_t items; /* Counts available items */ } sbuf_t; void sbuf_init(sbuf_t *sp, int n); void sbuf_deinit(sbuf_t *sp); void sbuf_insert(sbuf_t *sp, int item); int sbuf_remove(sbuf_t *sp); #endif /* __SBUF_H__ */ ================================================ FILE: sample/ch12/select.c ================================================ /* * p652 -- code/conc/select.c * * 使用 I/O 多路复用的 echo 服务器。服务器使用 select 等待监听描述符上的连接请求 * 和标准输入上的命令。 * * 在类 UNIX 系统下编译和运行: * * unix> make select * unix> ./select 16384 */ #include "csapp.h" void echo(int connfd); void command(void); int main(int argc, char **argv) { int listenfd, connfd, port; socklen_t clientlen = sizeof(struct sockaddr_in); struct sockaddr_in clientaddr; fd_set read_set, ready_set; if (argc != 2) { fprintf(stderr, "usage: %s \n", argv[0]); exit(0); } port = atoi(argv[1]); listenfd = Open_listenfd(port); FD_ZERO(&read_set); /* Clear read set */ FD_SET(STDIN_FILENO, &read_set); /* Add stdin to read set */ FD_SET(listenfd, &read_set); while (1) { ready_set = read_set; Select(listenfd+1, &ready_set, NULL, NULL, NULL); if (FD_ISSET(STDIN_FILENO, &ready_set)) command(); /* Read command line from stdin */ if (FD_ISSET(listenfd, &ready_set)) { connfd = Accept(listenfd, (SA *)&clientaddr, &clientlen); echo(connfd); /* Echo client input until EOF */ Close(connfd); } } return 0; /* Avoid compiler warning */ } void command(void) { char buf[MAXLINE]; if (!Fgets(buf, MAXLINE, stdin)) exit(0); /* EOF */ printf("%s", buf); /* Process the input command */ } ================================================ FILE: sample/ch12/sharing.c ================================================ /* * p662 -- code/conc/sharing.c * * 这段代码用来说明关于多线程程序中的共享变量。下面的代码由一个创建了两个对等线程 * 的主线程组成。主线程传递一个唯一的 ID 给每个对等线程,每个对等线程利用这个 ID * 输出一条个性化的信息,以及调用该线程例程的总次数。 * * 在 UNIX 系统下编译和运行: * * unix> make sharing # 编译时可能会有警告,可以忽略 * unix> ./sharing */ #include "csapp.h" #define N 2 void *thread(void *vargp); char **ptr; /* Global variable */ int main() { int i; pthread_t tid; char *msgs[N] = { "Hello from foo", "Hello from bar" }; ptr = msgs; for (i = 0; i < N; i++) Pthread_create(&tid, NULL, thread, (void *)i); Pthread_exit(NULL); return 0; } void *thread(void *vargp) { int myid = (int)vargp; static int cnt = 0; printf("[%d]: %s (cnt=%d)\n", myid, ptr[myid], ++cnt); return NULL; }