tinyhttpd源码详解
tinyhttpd是一輕量級的web 服務器,最近幾天終于抽出時間來研究研究了。其源碼百度即可下載,500多行,確實是學習linux編程的好材料。很多網友都寫了關于tinyhttpd的博文,但是我還是覺得不夠深入,嚴格說是寫得不夠深入,往往就是把500多行代碼一扔,分析下主要過程,畫個流程圖就完事了。我怎么覺得還有很多東西可以挖一挖呢,也許還可再調整一下代碼,雖然目前也不清楚可調整多少,待我細細道來。
我分析的過程就按主要路線走,也就是這樣一個主干道流程:服務器創建socket并監聽某一端口->瀏覽器輸入url發出請求->服務器收到請求,創建線程處理請求,主線程繼續等待->新線程讀取http請求,并解析相關字段,讀取文件內容或者執行CGI程序并返回給瀏覽器->關閉客戶端套接字,新線程退出
咱們先來看看main函數
int main(void) {int server_sock = -1;u_short port = 0;int client_sock = -1;struct sockaddr_in client_name;int client_name_len = sizeof(client_name);pthread_t newthread;server_sock = startup(&port);printf("httpd running on port %d\n", port);while (1){client_sock = accept(server_sock,(struct sockaddr *)&client_name,&client_name_len);if (client_sock == -1)error_die("accept");if (pthread_create(&newthread , NULL, accept_request, client_sock) != 0)perror("pthread_create");}close(server_sock);return(0); }
這段代碼,只要是稍微了解linux的網絡編程就很好懂,創建服務端socket,綁定、監聽、等待客戶端連接。只不過作者把這些步驟都放在了一個叫startup的函數里。那來看startup
int startup(u_short *port) {int httpd = 0;struct sockaddr_in name;httpd = socket(PF_INET, SOCK_STREAM, 0);if (httpd == -1)error_die("socket");memset(&name, 0, sizeof(name));//也可以用bzeroname.sin_family = AF_INET;name.sin_port = htons(*port);name.sin_addr.s_addr = htonl(INADDR_ANY);//任何網絡接口if (bind(httpd, (struct sockaddr *)&name, sizeof(name)) < 0)error_die("bind");if (*port == 0) /* if dynamically allocating a port */{int namelen = sizeof(name);if (getsockname(httpd, (struct sockaddr *)&name, &namelen) == -1)error_die("getsockname");*port = ntohs(name.sin_port);//系統動態分配一個端口號}if (listen(httpd, 5) < 0)error_die("listen");return(httpd);//返回服務套接字描述符 } 很常見的步驟,就不多說了。
此后,服務端就accept等待連接,作者其實沒有關心客戶端來自哪里,那accept的第二、第三參數完全可以為NULL。接著就創建線程把客戶端套接字作為參數傳過去了,由新線程處理請求,這是服務器編程的常用手段,提高并發性。注意這里的線程函數并不完全合法,至少在linux上就不符合線程函數的原型定義,編譯時編譯器也只是警告而未報錯。
接下來重點就在線程函數accept_request上了
void accept_request(int client) {char buf[1024];int numchars;char method[255];char url[255];char path[512];size_t i, j;struct stat st;int cgi = 0; /* becomes true if server decides this is a CGI* program */char *query_string = NULL;numchars = get_line(client, buf, sizeof(buf));i = 0; j = 0;while (!ISspace(buf[j]) && (i < sizeof(method) - 1)){method[i] = buf[j];i++; j++;}method[i] = '\0';if (strcasecmp(method, "GET") && strcasecmp(method, "POST")){unimplemented(client);return;}if (strcasecmp(method, "POST") == 0)cgi = 1;i = 0;while (ISspace(buf[j]) && (j < sizeof(buf)))j++;while (!ISspace(buf[j]) && (i < sizeof(url) - 1) && (j < sizeof(buf))){url[i] = buf[j];i++; j++;}url[i] = '\0';if (strcasecmp(method, "GET") == 0){query_string = url;while ((*query_string != '?') && (*query_string != '\0'))query_string++;if (*query_string == '?'){cgi = 1;*query_string = '\0';query_string++;}}sprintf(path, "htdocs%s", url);if (path[strlen(path) - 1] == '/')strcat(path, "index.html");if (stat(path, &st) == -1) {while ((numchars > 0) && strcmp("\n", buf)) /* read & discard headers */numchars = get_line(client, buf, sizeof(buf));not_found(client);}else{if ((st.st_mode & S_IFMT) == S_IFDIR)strcat(path, "/index.html");if ((st.st_mode & S_IXUSR) ||(st.st_mode & S_IXGRP) ||(st.st_mode & S_IXOTH) )cgi = 1;if (!cgi)serve_file(client, path);elseexecute_cgi(client, path, method, query_string);}close(client); }
首先很關鍵一點要理解get_line的意思。我們要知道當在瀏覽器中輸入url后enter之后,它發給服務器是文本型的字符串,遵循http請求格式,類似下面的:
GET / HTTP/1.1
HOST:www.abc.com
Content-type:text/html
...
get_line干的事就是讀取一行,并且不管原來是以\n還是\r\n結束,均轉化為以\n再加\0字符結束。其實現如下:
int get_line(int sock, char *buf, int size) {int i = 0;char c = '\0';int n;while ((i < size - 1) && (c != '\n')){n = recv(sock, &c, 1, 0);//從sock中一次讀一個字符,循環讀if (n > 0){if (c == '\r') //如果讀到回車,一般緊接著字符就是\n{n = recv(sock, &c, 1, MSG_PEEK);if ((n > 0) && (c == '\n'))recv(sock, &c, 1, 0);//這時再讀,c還是\n,循環跳出elsec = '\n';}buf[i] = c;i++;}elsec = '\n';}buf[i] = '\0';return(i);//返回讀取的字符數 }
get_line完后,就是開始解析第一行,判斷是GET方法還是POST方法,目前只支持這兩種。如果是POST,還是把cgi置1,表明要運行CGI程序;如果是GET方法且附帶以?開頭的參數時,也認為是執行CGI程序
還是獲取要訪問的url,可以是很常見的/,/index.html等等。該程序默認為根目錄是在htdocs下的,且默認文件是index.html。另外還判斷了給定文件是否有可執權限,如果有,則認為是CGI程序。最后根據變量cgi的值來進行相應選擇:讀取靜態文件或者執行CGI程序返回結果。
我們首先看看最簡單的靜態文件情況,調用函數serve_file
void serve_file(int client, const char *filename) {FILE *resource = NULL;int numchars = 1;char buf[1024];buf[0] = 'A'; buf[1] = '\0';while ((numchars > 0) && strcmp("\n", buf)) /* read & discard headers */numchars = get_line(client, buf, sizeof(buf));//必須要讀完客戶端發來的頭部,否則后來的send不能正常顯示在瀏覽器中。resource = fopen(filename, "r");if (resource == NULL)not_found(client);else{headers(client, filename);cat(client, resource);}fclose(resource); }
將文件名作為參數,首先讀完客戶端的頭部,然后打開創建文件流。為了模擬http響應,首先向客戶端發送頭部,頭部信息至少包含以下幾點:
http/1.0 200 ok
server:
content-type:
\r\n(一個空白行,標識頭部結束)
最后發送數據體部分,即文件內容,在cat方法中,fgets每讀入一行,就send,直到末尾。headers和cat函數就不在這里列出了。下面,我們來看看一個具體測試例子,緊接著在gdb中調試
我在根目錄下的htdocs下建立一個新文件index2.html,內容如下:
<a href="http://10.108.222.96:54205/test.sh">Display Date</a>
我在這里放了一個鏈接,href部分是關于cgi的,先不管,就只看文本部分能否顯示在瀏覽器中。
首先編譯之后直接運行./httpd,程序打印"httpd running on port 53079"
我們在瀏覽器中訪問index2.html文件,如下圖所示:
文本能正確顯示了。那如何在gdb中調試觀察呢?
xiaoqiang@ljq-Lenovo:~/chenshi/tinyhttpd-0.1.0$ gdb attach 7029 【通過ps查看httpd進程的PID,然后gdb attach之】 Attaching to process 7029 Reading symbols from /home/xiaoqiang/chenshi/tinyhttpd-0.1.0/httpd...done. Reading symbols from /lib/i386-linux-gnu/libpthread.so.0...(no debugging symbols found)...done. [Thread debugging using libthread_db enabled] Using host libthread_db library "/lib/i386-linux-gnu/libthread_db.so.1". Loaded symbols for /lib/i386-linux-gnu/libpthread.so.0 Reading symbols from /lib/i386-linux-gnu/libc.so.6...(no debugging symbols found)...done. Loaded symbols for /lib/i386-linux-gnu/libc.so.6 Reading symbols from /lib/ld-linux.so.2...(no debugging symbols found)...done. Loaded symbols for /lib/ld-linux.so.2 0xb7750424 in __kernel_vsyscall () (gdb) bt #0 0xb7750424 in __kernel_vsyscall () #1 0xb772dc08 in accept () from /lib/i386-linux-gnu/libpthread.so.0 #2 0x0804a8d6 in main () at httpd.c:516 (gdb) l accept_request warning: Source file is more recent than executable. 47 /* A request has caused a call to accept() on the server port to 48 * return. Process the request appropriately. 49 * Parameters: the socket connected to the client */ 50 /**********************************************************************/ 51 void accept_request(int client) 52 { 53 char buf[1024]; 54 int numchars; 55 char method[255]; 56 char url[255]; (gdb) l 57 char path[512]; 58 size_t i, j; 59 struct stat st; 60 int cgi = 0; /* becomes true if server decides this is a CGI 61 * program */ 62 char *query_string = NULL; 63 64 numchars = get_line(client, buf, sizeof(buf));//從套接字中讀取一行 65 i = 0; j = 0; 66 while (!ISspace(buf[j]) && (i < sizeof(method) - 1)) (gdb) b 64 【在64行設置斷點,觀察讀到的是什么】 Breakpoint 1 at 0x8048b3f: file httpd.c, line 64. (gdb) c Continuing. 【直到在瀏覽器中發起了請求,后面的才會打印出來】 [New Thread 0xb63feb40 (LWP 7655)] [Switching to Thread 0xb63feb40 (LWP 7655)]Breakpoint 1, accept_request (client=4) at httpd.c:64 64 numchars = get_line(client, buf, sizeof(buf));//從套接字中讀取一行 (gdb) n 65 i = 0; j = 0; (gdb) p buf 【打印讀到的一行】 $1 = "GET /index2.html HTTP/1.1\n", '\000' <repeats 997 times> 【果真是HTTP GET請求的第一行】 (gdb) l 60 int cgi = 0; /* becomes true if server decides this is a CGI 61 * program */ 62 char *query_string = NULL; 63 64 numchars = get_line(client, buf, sizeof(buf));//從套接字中讀取一行 65 i = 0; j = 0; 66 while (!ISspace(buf[j]) && (i < sizeof(method) - 1)) 67 { 68 method[i] = buf[j]; 69 i++; j++; (gdb) l 70 } 71 method[i] = '\0';//獲取到了HTTP方法 72 73 if (strcasecmp(method, "GET") && strcasecmp(method, "POST")) 74 { 75 //忽略大小寫比較 76 unimplemented(client); 77 return;//尚未支持的請求方法,線程返回 78 } 79 (gdb) l serve_file 【其它的細節調試就不在這里演示了,直接跳到serve_file里】 412 * Parameters: a pointer to a file structure produced from the socket 413 * file descriptor 414 * the name of the file to serve */ 415 /**********************************************************************/ 416 void serve_file(int client, const char *filename) 417 { 418 FILE *resource = NULL; 419 int numchars = 1; 420 char buf[1024]; 421 (gdb) l 422 buf[0] = 'A'; buf[1] = '\0'; 423 while ((numchars > 0) && strcmp("\n", buf)) /* read & discard headers */ 424 numchars = get_line(client, buf, sizeof(buf)); 425 426 resource = fopen(filename, "r"); 427 if (resource == NULL) 428 not_found(client); 429 else 430 { 431 headers(client, filename); (gdb) b 426 【在426行設置斷點】 Breakpoint 2 at 0x804a247: file httpd.c, line 426. (gdb) c Continuing.Breakpoint 2, serve_file (client=4, filename=0xb63fdf4e "htdocs/index2.html") at httpd.c:426 426 resource = fopen(filename, "r"); (gdb) p filename $2 = 0xb63fdf4e "htdocs/index2.html" (gdb) n 427 if (resource == NULL) (gdb) n 431 headers(client, filename); (gdb) n 432 cat(client, resource); (gdb) s 【進入cat里面看看】 cat (client=4, resource=0xb6c00468) at httpd.c:170 170 { (gdb) l 165 * easier just to do something like pipe, fork, and exec("cat"). 166 * Parameters: the client socket descriptor 167 * FILE pointer for the file to cat */ 168 /**********************************************************************/ 169 void cat(int client, FILE *resource) 170 { 171 char buf[1024]; 172 173 fgets(buf, sizeof(buf), resource); 174 while (!feof(resource)) (gdb) n 173 fgets(buf, sizeof(buf), resource); (gdb) n 174 while (!feof(resource)) (gdb) p buf 【講到了index2.html的一行,然后send】 $3 = "<a href=\"http://10.108.222.96:54205/test.sh\">Display Date</a>\n", '\000' <repeats 306 times>, "\"\225^\267\000\000\000\000 \312q\267\000\320t\267 \000\000\000 \312q\267\304Re\267 \000\000\000El^\267\001\000\000\000\000\320t\267 \000\000\000\364\277q\267\360\331?\266V\003_\267\364\277q\267 \000\000\000 \312q\267\000\320t\267\000\000\000\000$k^\267 \312q\267\000\320t\267 ", '\000' <repeats 15 times>, "A\252\004\b\364\277q\267 \000\000\000\377\377\377\377\000\000\000\000\236\201^\267 ", '\000' <repeats 23 times>, " \312q\267U\205^\267 \312q\267\000\320t\267 ", '\000' <repeats 19 times>"\364, \277q\267\001\000\000\000R\252\004\b\000\000\000\000\343v^\267"... (gdb) n 176 send(client, buf, strlen(buf), 0); (gdb) n 177 fgets(buf, sizeof(buf), resource); (gdb) n 174 while (!feof(resource)) (gdb) n 179 } (gdb) n serve_file (client=4, filename=0xb63fdf4e "htdocs/index2.html") at httpd.c:434 434 fclose(resource); (gdb) bt #0 serve_file (client=4, filename=0xb63fdf4e "htdocs/index2.html") at httpd.c:434 #1 0x08048f83 in accept_request (client=4) at httpd.c:130 #2 0xb7726d4c in start_thread () from /lib/i386-linux-gnu/libpthread.so.0 #3 0xb7665b8e in clone () from /lib/i386-linux-gnu/libc.so.6 (gdb) n 435 } (gdb) s accept_request (client=4) at httpd.c:139 139 close(client); <span style="background-color: rgb(255, 255, 255);">【直到運行在這里,瀏覽器的請求才會真正停止,意味著標簽欄那個不斷旋轉的標志就停了】</span> (gdb) s 140 } (gdb) s 0xb7726d4c in start_thread () from /lib/i386-linux-gnu/libpthread.so.0 (gdb) s Single stepping until exit from function start_thread, which has no line number information. [New Thread 0xb5bfdb40 (LWP 7656)] [Switching to Thread 0xb5bfdb40 (LWP 7656)]Breakpoint 1, accept_request (client=4) at httpd.c:64 64 numchars = get_line(client, buf, sizeof(buf));//從套接字中讀取一行 (gdb) n [Thread 0xb63feb40 (LWP 7655) exited] 65 i = 0; j = 0; (gdb) p buf $4 = "GET /favicon.ico HTTP/1.1\n", '\000' <repeats 997 times> 【再讀一行時,竟讀到favicon.ico,目前沒弄明白這怎么回事】 (gdb)
前面已說過,tinyhttpd目前就支持兩種請求形式,純get請求或者帶?的get和直接POST請求。了解到源碼htdocs目錄下的cgi都是perl寫的,不知讀者你懂不懂,反正博主我不懂,所以就改一改,改成自己的需求,用shell寫。正如index2.html所示:
<a href="http://10.108.222.96:54205/test.sh">Display Date</a>
test.sh腳本如下:
#!/bin/sh?
#echo "Content-type:text/html"
echo
echo "<html><head><meta charset="utf-8"><title>MyTitle</title></head><body>"
time=`date`
echo "<p>Server Time:$time"
echo "</body></html>"
即包括服務器響應給客戶的字符數據,順便把服務器時間傳過去。注意要加test.sh添加執行權限,才會被視為執行cgi程序,且href中的端口號要改為你具體的端口號,這里只是個示例。來看當在瀏覽器中點擊“Display Date”時,服務器作出的響應:
(gdb) l execute_cgi 【為了節省空間,以下內容我刪除了無關內容】 warning: Source file is more recent than executable. 214 * Parameters: client socket descriptor 215 * path to the CGI script */ 216 /**********************************************************************/ 217 void execute_cgi(int client, const char *path, 218 const char *method, const char *query_string) 219 { 220 char buf[1024]; 229 230 buf[0] = 'A'; buf[1] = '\0'; 231 if (strcasecmp(method, "GET") == 0) (gdb) b 231 【在execute_cgi處設置斷點】 Breakpoint 1 at 0x8049555: file httpd.c, line 231. (gdb) c Continuing. 【當在瀏覽器發起請求時,serve_file被調用,但此時斷點在execute_cgi處,所以此處沒有反應直到鼠標點擊鏈接】 [New Thread 0xb7567b40 (LWP 7708)] [Thread 0xb7567b40 (LWP 7708) exited] [New Thread 0xb6bffb40 (LWP 7709)] [Thread 0xb6bffb40 (LWP 7709) exited] [New Thread 0xb63feb40 (LWP 7710)] [Switching to Thread 0xb63feb40 (LWP 7710)]Breakpoint 1, execute_cgi (client=4, path=0xb63fdf4e "htdocs/test.sh", method=0xb63fe14e "GET", query_string=0xb63fe255 "") at httpd.c:231 231 if (strcasecmp(method, "GET") == 0) (gdb) info args 【查看此函數調用參數值】 client = 4 path = 0xb63fdf4e "htdocs/test.sh" 【文件為test.sh腳本】 method = 0xb63fe14e "GET" query_string = 0xb63fe255 "" 257 258 if (pipe(cgi_output) < 0) { 259 cannot_execute(client); 260 return; 261 } 262 if (pipe(cgi_input) < 0) { 263 cannot_execute(client); 264 return; 265 } 266 (gdb) b 258 【在創建管道處設置斷點】 Breakpoint 2 at 0x804973e: file httpd.c, line 258. (gdb) c Continuing.Breakpoint 2, execute_cgi (client=4, path=0xb63fdf4e "htdocs/test.sh", method=0xb63fe14e "GET", query_string=0xb63fe255 "") at httpd.c:258 258 if (pipe(cgi_output) < 0) { (gdb) n 262 if (pipe(cgi_input) < 0) { (gdb) n 267 if ( (pid = fork()) < 0 ) { (gdb) l 262 if (pipe(cgi_input) < 0) { 263 cannot_execute(client); 264 return; 265 } 266 267 if ( (pid = fork()) < 0 ) { 268 cannot_execute(client); 269 return; 270 } 271 if (pid == 0) /* child: CGI script */ (gdb) l 272 { 273 char meth_env[255]; 274 char query_env[255]; 275 char length_env[255]; 276 277 dup2(cgi_output[1], 1); 278 dup2(cgi_input[0], 0); 279 close(cgi_output[0]); 280 close(cgi_input[1]); 281 sprintf(meth_env, "REQUEST_METHOD=%s", method); (gdb) l 282 putenv(meth_env); 283 if (strcasecmp(method, "GET") == 0) { 【我的測試例子雖說是get請求,但不需要設置什么環境變量】 284 sprintf(query_env, "QUERY_STRING=%s", query_string); 285 putenv(query_env); 286 } 287 else { /* POST */ 288 sprintf(length_env, "CONTENT_LENGTH=%d", content_length); 289 putenv(length_env); 290 } 291 execl(path, path, NULL); 【子進程執行test.sh】 (gdb) l 292 exit(0); 293 } 294 295 else { /* parent */ 296 close(cgi_output[1]); 297 close(cgi_input[0]); 298 if (strcasecmp(method, "POST") == 0) 299 for (i = 0; i < content_length; i++) { 300 recv(client, &c, 1, 0); 301 write(cgi_input[1], &c, 1); (gdb) b 298 【由于子進程執行test.sh,父進程發送響應給瀏覽器,所以先進入父進程,看發的是什么】 Breakpoint 3 at 0x80498ec: file httpd.c, line 298. (gdb) c Continuing.Breakpoint 3, execute_cgi (client=4, path=0xb63fdf4e "htdocs/test.sh", method=0xb63fe14e "GET", query_string=0xb63fe255 "") at httpd.c:298 298 if (strcasecmp(method, "POST") == 0) (gdb) n 304 while (read(cgi_output[0], &c, 1) > 0) (gdb) l 299 for (i = 0; i < content_length; i++) { 【如果是POST,則還要繼續從cgi_input中讀取數據體,它被導入到標準輸入,從而經由管道進入cgi_output[1]】 300 recv(client, &c, 1, 0); 301 write(cgi_input[1], &c, 1); 302 } 303 304 while (read(cgi_output[0], &c, 1) > 0) 305 send(client, &c, 1, 0); 306 307 close(cgi_output[0]); 308 close(cgi_input[1]); (gdb) s 【單步從cgi_output[0]中讀】 305 send(client, &c, 1, 0); (gdb) p c $1 = 10 '\n' (gdb) s 305 send(client, &c, 1, 0); (gdb) p c 【以下部分剛好讀到的是test腳本的"<html"】 $2 = 60 '<' (gdb) s 305 send(client, &c, 1, 0); (gdb) p c $3 = 104 'h' (gdb) s 305 send(client, &c, 1, 0); (gdb) p c $4 = 116 't' (gdb) s 305 send(client, &c, 1, 0); (gdb) p c $5 = 109 'm' (gdb) s 305 send(client, &c, 1, 0); (gdb) p c $6 = 108 'l' (gdb) l 300 recv(client, &c, 1, 0); 301 write(cgi_input[1], &c, 1); 302 } 303 304 while (read(cgi_output[0], &c, 1) > 0) 305 send(client, &c, 1, 0); 306 307 close(cgi_output[0]); 308 close(cgi_input[1]); 309 waitpid(pid, &status, 0); (gdb) b 307 Breakpoint 4 at 0x80499be: file httpd.c, line 307. (gdb) c Continuing.Breakpoint 4, execute_cgi (client=4, path=0xb63fdf4e "htdocs/test.sh", method=0xb63fe14e "GET", query_string=0xb63fe255 "") at httpd.c:307 307 close(cgi_output[0]); (gdb) n 308 close(cgi_input[1]); (gdb) n 309 waitpid(pid, &status, 0); (gdb) n 311 } (gdb) p status $7 = 0 (gdb) n accept_request (client=4) at httpd.c:139 139 close(client); <span style="background-color: rgb(255, 255, 255);">【直到這里,瀏覽器才顯示了返回結果】</span> (gdb) n 140 } (gdb) 結果顯示: 當然我在這里只是演示了其中的一種情況,至于情況如get請求帶?查詢的,POST請求帶數據體的,只有靠讀者自己去嘗試了,博主暫時拋磚引玉于此。
呃,感覺講解至此結束了呢。貌似還有一點點細節博主還得繼續研究下,總之通過這個例子確實對Linux編程了解了更多了,感謝開源,哈哈!
參考鏈接
1 http://blog.csdn.net/jcjc918/article/details/42129311
2?http://blog.sina.com.cn/s/blog_a5191b5c0102v9yr.html
3 CGI介紹:http://www.jdon.com/idea/cgi.htm
4?http://www.scholat.com/vpost.html?pid=7337
超強干貨來襲 云風專訪:近40年碼齡,通宵達旦的技術人生總結
以上是生活随笔為你收集整理的tinyhttpd源码详解的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: diy一下devise的验证
- 下一篇: [MySQL]--gt;查询5天之内过生