pthread_cancel 退出线程引起死锁的问题和解决方法
??????? Posix的線程終止有兩種情況:正常終止和非正常終止。線程主動調(diào)用pthread_exit()或者從線程函數(shù)中return都將使線程正常退出,這是可預(yù)見的退出方式;非正常終止是線程在其他線程的干預(yù)下,或者由于自身運行出錯(比如訪問非法地址)而退出,比如pthreead_cancel,這種退出方式是不可預(yù)見的。不論是可預(yù)見的線程終止還是異常終止,都會存在資源釋放的問題,在不考慮因運行出錯而退出的前提下,如何保證線程終止時能順利的釋放掉自己所占用的資源,特別是鎖資源,就是一個必須考慮解決的問題。
?????? 最經(jīng)常出現(xiàn)的情形是資源獨占鎖的使用:線程為了訪問臨界資源而為其加上鎖,但在訪問過程中被外界取消,如果線程處于響應(yīng)取消狀態(tài),且采用異步方式響應(yīng),或者在打開獨占鎖以前的運行路徑上存在取消點,則該臨界資源將永遠處于鎖定狀態(tài)得不到釋放。外界取消操作是不可預(yù)見的,因此的確需要一個機制來簡化用于資源釋放的編程。
POSIX中的函數(shù)cancellation點的:
??????????? pthread_join
??????????? pthread_cond_wait
??????????? thread_cond_timewait
??????????? pthread_testcancel
??????????? sem_wait
??????????? sigwait?????? 都是cancellation點.
??????????? 下面的這些系統(tǒng)函數(shù)也是cancellation點:
???????????? accept
???????????? fcntl
???????????? open
???????????? read
???????????? write
???????????? lseek
???????????? close
???????????? send
??????????? sendmsg
???????????? sendto
??????????? connect
???????????? recv
??????????? recvfrom
??????????? recvmsg
???????????? system
??????????? tcdrain
???????????? fsync
???????????? msync
???????????? pause
???????????? wait
??????????? waitpid
??????????? nanosleep
當其他線程調(diào)用pthreead_cancel都會讓本線程在這些函數(shù)后退出線程。
? 默認測試代碼如下:
#include <stdio.h> #include <string.h> #include <stdlib.h> #include <unistd.h> #include <pthread.h> #include <sys/syscall.h>pthread_mutex_t mutexA; int thStop = 0; int is_safemode = 0; int is_safe_exit = 0; int is_setcancle = 0; int is_notify = 0;void *thread_function1(void *arg) {pthread_t threadId = 0;long int pid = getpid();long int lwpId = syscall(SYS_gettid);threadId = (pthread_t)(pthread_self());printf("thread[0x%lx][%ld][%ld] in function1\n",threadId,lwpId,pid);while(1){printf("function1 owner:%ld waiting lock owner:%d ...\n",lwpId,mutexA.__data.__owner);pthread_mutex_lock(&mutexA);printf("function1 mutex:owner::%d;count::%d;lock:%d\n",mutexA.__data.__owner,mutexA.__data.__count,mutexA.__data.__lock);printf("I an thread[0x%lx][%ld] function1\n",threadId,lwpId);sleep(1);pthread_mutex_unlock(&mutexA);sleep(1);} }void clean_function2_res(void *arg) {int lwpid = (int)*((int *)arg);if(!is_notify){return;}printf("clean function2 res lwpid:%d\n",lwpid);if(mutexA.__data.__owner == lwpid){pthread_mutex_unlock(&mutexA);printf("clean function2 res lock\n");} }void *thread_function2(void *arg) {int oldstate = 0;int waitCount = 0;pthread_t threadId = 0;long int pid = getpid();int lwpId = syscall(SYS_gettid);threadId = (pthread_t)(pthread_self());printf("thread[0x%lx][%d][%ld] in function2\n",threadId,lwpId,pid);pthread_cleanup_push(clean_function2_res,(void *)&lwpId);while(1){printf("function2 owner:%d waiting lock owner:%d ...\n",lwpId,mutexA.__data.__owner);pthread_mutex_lock(&mutexA);printf("function2 mutex:owner::%d;count::%d;lock:%d\n",mutexA.__data.__owner,mutexA.__data.__count,mutexA.__data.__lock);if(thStop){while(1){if((is_safemode) && (is_safe_exit)){break;}printf("waiting thread[0x%ld] cancel...\n",threadId);usleep(500000);if(is_setcancle){waitCount ++;pthread_setcancelstate(PTHREAD_CANCEL_DISABLE,&oldstate);printf("pthread cancel oldstatue:%d;[%d]:[%d]\n",oldstate,PTHREAD_CANCEL_DISABLE,PTHREAD_CANCEL_ENABLE);if(waitCount > 10){printf("it will into cancel pthread point\n");pthread_mutex_unlock(&mutexA);sleep(1);pthread_setcancelstate(PTHREAD_CANCEL_ENABLE,NULL);//printf("waiting cancel point sleep\n");//usleep(500000);printf("waiting cancel testcancel point\n");pthread_testcancel();printf("test cancel point\n");while(1){printf("waiting cancel pthread...\n");usleep(500000);}}}}}else{printf("I an thread[0x%lx][%d] function2\n",threadId,lwpId);sleep(1);}pthread_mutex_unlock(&mutexA);sleep(1);if((is_safemode) && (is_safe_exit)){break;}}if(is_safemode){printf("exit pthread by safe mode\n");pthread_exit(NULL);}pthread_cleanup_pop(0);}int main(int avgc,char **pp_argv) {pthread_t mthid = -1;unsigned int count = 0;int ret = -1;int mode = 0;if(avgc >= 2){mode = atoi(pp_argv[1]);}switch(mode){case 1:is_notify = 1;break;case 2:is_safemode = 1;break;case 3:is_setcancle = 1;break;case 0:default:break;}printf("notify clean mode:%d\n",is_notify);printf("safe mode:%d\n",is_safemode);printf("set cancle mode:%d\n",is_setcancle);is_safe_exit = 0;thStop = 0;pthread_mutex_init(&mutexA, NULL);pthread_create(&mthid,NULL,thread_function1,NULL);printf("create thread:0x%lx\n",mthid);pthread_create(&mthid,NULL,thread_function2,NULL);printf("create thread:0x%lx\n",mthid);do{sleep(1);count ++;printf("main thread count:%d...\n",count);}while(count < 10);thStop = 1;sleep(3);if(is_safemode){is_safe_exit = 1;}else{pthread_cancel(mthid);}pthread_join(mthid,(void *)&ret);while(1){printf("main thread function...\n");sleep(1);}pthread_mutex_destroy(&mutexA);}編譯:gcc -g mylock.c -lpthread -o mylock
復(fù)現(xiàn)問題:./mylock 0?? 強制進入死鎖環(huán)境;
?????? 主線程調(diào)用thStop = 1;讓thread_function2進入lock狀態(tài),然后調(diào)用pthread_cancel(mthid);終止線程thread_function2 ,thread_function1因為thread_function2 的退出沒有是否互斥鎖導(dǎo)致無法獲取互斥鎖導(dǎo)致死鎖停止運行;
解決方案1,注冊線程清理回調(diào)
void pthread_cleanup_push(void (*routine) (void *), void *arg)
void pthread_cleanup_pop(int execute)
pthread_cleanup_push()/pthread_cleanup_pop()采用先入后出的棧結(jié)構(gòu)管理,void routine(void *arg)函數(shù)在調(diào)用pthread_cleanup_push()時壓入清理函數(shù)棧,多次對pthread_cleanup_push() 的調(diào)用將在清理函數(shù)棧中形成一個函數(shù)鏈;從pthread_cleanup_push的調(diào)用點到pthread_cleanup_pop之間的程序段中的終止動作(包括調(diào)用pthread_exit()、pthread_cancel和異常終止,不包括return)都將執(zhí)行pthread_cleanup_push()所指定的清理函數(shù)。
運行結(jié)果參考 ./mylock 1
解決方案2,線程安全退出,外部線程不要采用pthread_cancel結(jié)束線程,而是采用通知方法,由本線程接受到消息或參數(shù)后釋放資源安全退出,
運行結(jié)果參考 ./mylock 2
解決方案3,在安全公共資源取消線程對pthread_cancel的響應(yīng)。
???? 設(shè)置本線程對Cancel信號的反應(yīng),state有兩種值:PTHREAD_CANCEL_ENABLE(缺省)和 PTHREAD_CANCEL_DISABLE,分別表示收到信號后設(shè)為CANCLED狀態(tài)和忽略CANCEL信號繼續(xù)運行;old_state如果不為 NULL則存入原來的Cancel狀態(tài)以便恢復(fù)。
pthread_setcancelstate(PTHREAD_CANCEL_DISABLE,&oldstate);
/***free resource安全執(zhí)行完代碼***/
pthread_setcancelstate(PTHREAD_CANCEL_ENABLE,NULL);
設(shè)置取消點 pthread_testcancel,
運行結(jié)果參考 ./mylock 3
?
gdb 調(diào)試mutexA 數(shù)據(jù):
#gdb ./mylock
(gdb) b thread_function1
(gdb) n
(gdb) ptype pthread_mutex_t
(gdb) p &mutexA
$4 = (pthread_mutex_t *) 0x602100 <mutexA>
(gdb) p {pthread_mutex_t} 0x602100
$5 = {__data = {__lock = 1, __count = 0, __owner = 10237, __nusers = 1, __kind = 0, __spins = 0, __elision = 0, __list = {__prev = 0x0,
????? __next = 0x0}}, __size = "\001\000\000\000\000\000\000\000\375'\000\000\001", '\000' <repeats 26 times>, __align = 1}
(gdb) p mutexA
(gdb) b thread_function2
?
?
?
?
總結(jié)
以上是生活随笔為你收集整理的pthread_cancel 退出线程引起死锁的问题和解决方法的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: flink如何设置以每天零点到第二天零点
- 下一篇: linux用户(user)和用户组(gr