Dalvik解释器源码到VMP分析
前言
學習這塊的主要目的還是想知道vmp是如何實現的,如何與系統本身的虛擬機配合工作,所以簡單的學習了Dalvik的源碼并對比分析了數字公司的解釋器。筆記結構如下:
dalvik解釋器分析
dalvik解釋器解釋指令前的準備工作
從外部進入解釋器的調用鏈如下:
dvmCallMethod -> dvmCallMethodV -> dvmInterpret
?
這三個函數是在解釋器取指令,選分支之前被調用,主要負責一些準備工作,包括分配虛擬寄存器,放入參數,初始化解釋器參數等。其中dvmCallMethod,直接調用了dvmCallMethodV.下面分析下后兩個函數。
dvmCallMethodV
dalvik虛擬機是基于寄存器架構的,可想而知,在具體執行函數之前,首先要做的就是分配好虛擬寄存器空間,并且將函數所需的參數,放入虛擬寄存器中。主要流程:
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 | void dvmCallMethodV(Thread*?self, const Method*?method,?Object*?obj,?bool?fromJni, JValue*?pResult, va_list args) { ????//取出方法的簡要聲明 ????const char*?desc?=?&(method->shorty[1]);?//?[0]?is?the?return?type. ????int?verifyCount?=?0; ????ClassObject*?clazz; ????u4*?ins; ????//訪問權限檢查,以及分配函數調用棧,在棧中維護了一份虛擬寄存器列表。 ????clazz?=?callPrep(self, method, obj, false); ????if?(clazz?==?NULL) ????????return; ? ????/*?"ins"?for?new frame start at frame pointer plus?locals?*/ ????//指向第一個參數 ????ins?=?((u4*)self->interpSave.curFrame)?+?(method->registersSize?-?method->insSize); ? ????//放入this指針,到第一個參數。 ????/*?put?"this"?pointer into in0?if?appropriate?*/ ????if?(!dvmIsStaticMethod(method)) { ????????*ins++?=?(u4) obj; ????????verifyCount++; ????} ????//根據后續參數的類型,放入后續參數 ????while?(*desc !=?'\0') { ????????switch (*(desc++)) { ????????????case?'D': case?'J': { ????????????????u8 val?=?va_arg(args, u8); ????????????????memcpy(ins, &val,?8);???????//?EABI prevents direct store ????????????????ins?+=?2; ????????????????verifyCount?+=?2; ????????????????break; ????????????} ????????????case?'F': { ????????????????/*?floats were normalized to doubles; convert back?*/ ????????????????float?f?=?(float) va_arg(args, double); ????????????????*ins++?=?dvmFloatToU4(f); ????????????????verifyCount++; ????????????????break; ????????????} ????????????case?'L': {?????/*?'shorty'?descr uses L?for?all?refs, incl array?*/ ????????????????void*?arg?=?va_arg(args, void*); ????????????????assert(obj?==?NULL || dvmIsHeapAddress(obj)); ????????????????jobject argObj?=?reinterpret_cast<jobject>(arg); ????????????????if?(fromJni) ????????????????????*ins++?=?(u4) dvmDecodeIndirectRef(self, argObj); ????????????????else ????????????????????*ins++?=?(u4) argObj; ????????????????verifyCount++; ????????????????break; ????????????} ????????????default: { ????????????????/*?Z B C S I?--?all?passed as?32-bit integers?*/ ????????????????*ins++?=?va_arg(args, u4); ????????????????verifyCount++; ????????????????break; ????????????} ????????} ????} ????//如果是本地方法,就直接跳轉到本地方法,若是java方法,進入解釋器,解釋執行。 ????if?(dvmIsNativeMethod(method)) { ????????TRACE_METHOD_ENTER(self, method); ????????/* ?????????*?Because we leave no space?for?local variables,?"curFrame"?points ?????????*?directly at the method arguments. ?????????*/ ????????(*method->nativeFunc)((u4*)self->interpSave.curFrame, pResult, ??????????????????????????????method,?self); ????????TRACE_METHOD_EXIT(self, method); ????}?else?{ ????????dvmInterpret(self, method, pResult); ????} ????dvmPopFrame(self); } |
dvmInterpret
dvmInterpret作為虛擬機的入口,主要做了如下工作:
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 | void dvmInterpret(Thread*?self, const Method*?method, JValue*?pResult) { ????//解釋器的狀態 ????InterpSaveState interpSaveState; ????ExecutionSubModes savedSubModes; ? #if defined(WITH_JIT) ????double calleeSave[JIT_CALLEE_SAVE_DOUBLE_COUNT]; #endif ? ????//保存之前的解釋器狀態,并將新的狀態和之前的狀態連接起來(鏈表) ????interpSaveState?=?self->interpSave; ????self->interpSave.prev?=?&interpSaveState; ????/* ?????*?Strip out?and?save?any?flags that should?not?be inherited by ?????*?nested interpreter activation. ?????*/ ????savedSubModes?=?(ExecutionSubModes)( ??????????????self->interpBreak.ctl.subMode & LOCAL_SUBMODE); ????if?(savedSubModes !=?kSubModeNormal) { ????????dvmDisableSubMode(self, savedSubModes); ????} #if defined(WITH_JIT) ????dvmJitCalleeSave(calleeSave); #endif ? #if defined(WITH_TRACKREF_CHECKS) ????self->interpSave.debugTrackedRefStart?= ????????dvmReferenceTableEntries(&self->internalLocalRefTable); #endif ????self->debugIsMethodEntry?=?true; #if defined(WITH_JIT) ????/*?Initialize the state to kJitNot?*/ ????self->jitState?=?kJitNot; #endif ? ????/初始化解釋器的執行環境 ? ????self->interpSave.method?=?method;??//初始化執行的方法 ????self->interpSave.curFrame?=?(u4*)?self->interpSave.curFrame;?//初始化函數調用棧 ????self->interpSave.pc?=?method->insns;??//初始化程序計數器 ????//檢查方法是否為本地方法 ????assert(!dvmIsNativeMethod(method)); ????//方法的類是否初始化 ????if?(method->clazz->status < CLASS_INITIALIZING || method->clazz->status?==?CLASS_ERROR) ????{ ????????ALOGE("ERROR: tried to execute code in unprepared class '%s' (%d)", ????????????method->clazz->descriptor, method->clazz->status); ????????dvmDumpThread(self, false); ????????dvmAbort(); ????} ????//?選擇解釋器 ????typedef void (*Interpreter)(Thread*); ????Interpreter stdInterp; ????if?(gDvm.executionMode?==?kExecutionModeInterpFast) ????????stdInterp?=?dvmMterpStd; #if defined(WITH_JIT) ????else?if?(gDvm.executionMode?==?kExecutionModeJit || ?????????????gDvm.executionMode?==?kExecutionModeNcgO0 || ?????????????gDvm.executionMode?==?kExecutionModeNcgO1) ????????stdInterp?=?dvmMterpStd; #endif ????else ????????stdInterp?=?dvmInterpretPortable; ? ????//?Call the interpreter ????(*stdInterp)(self); ? ????*pResult?=?self->interpSave.retval; ? ????/*?Restore interpreter state?from?previous activation?*/ ????self->interpSave?=?interpSaveState; #if defined(WITH_JIT) ????dvmJitCalleeRestore(calleeSave); #endif ????if?(savedSubModes !=?kSubModeNormal) { ????????dvmEnableSubMode(self, savedSubModes); ????} } |
dalvik解釋器流程分析
dalvik解釋器有兩種:Fast解釋器,Portable解釋器。選擇分析Portable解釋器,因為Portable解釋器的可讀性更好。在分析前,先看下Portable解釋器的模型。
Thread Code技術
實現解釋器的一個常見思路如下代碼,循環取指令,然后判斷指令類型,去相應分支執行,執行完成后,再返回到switch執行下條指令。
| 1 2 3 4 5 6 7 8 9 | while?(*ins) { ????switch (*ins) { ????????case NOP: ????????????break; ????????case MOV: ????????????break; ????????...... ????} } |
但是當每次執行一條指令,都需要重新判斷下條指令類型,然后選擇switch分支,這是個昂貴的開銷。Dalvik為了解決這個問題,引入了Thread Code技術。簡單的說就是在執行函數之前,建立一個分發表GOTO_TABLE,每條指令在表中有一個對應條目,條目里存放的就是處理該條指令的handler地址。比如invoke-super指令,它的opcode為6f,那么處理該條指令的handler地址就是:GOTO_TABLE[6f].那么在每條指令的解釋程序末尾,都可以加上取指動作,然后goto到下條指令的handler。
dvmInterpretPortable源碼分析
dvmInterpretPortable是Portable型虛擬機的具體實現,流程如下
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 | void dvmInterpretPortable(Thread*?self) { ? ????DvmDex*?methodClassDex;?????//?curMethod->clazz->pDvmDex ????JValue retval; ? ????//一些核心的狀態 ????const Method*?curMethod;????//?要執行的方法 ????const u2*?pc;???????????????//?指令計數器 ????u4*?fp;?????????????????????//?函數棧指針 ????u2 inst;????????????????????//?當前指令 ????/*?instruction decoding?*/ ????u4 ref;?????????????????????//?用來表示類的引用 ????u2 vsrc1, vsrc2, vdst;??????//?寄存器索引 ????/*?method call setup?*/ ????const Method*?methodToCall; ????bool?methodCallRange; ? ????//建立分發表 ????DEFINE_GOTO_TABLE(handlerTable); ? ????//初始化上面定義的變量 ????curMethod?=?self->interpSave.method; ????pc?=?self->interpSave.pc; ????fp?=?self->interpSave.curFrame; ????retval?=?self->interpSave.retval;???/*?only need?for?kInterpEntryReturn??*/ ????methodClassDex?=?curMethod->clazz->pDvmDex; ? ????if?(self->interpBreak.ctl.subMode !=?0) { ????????TRACE_METHOD_ENTER(self, curMethod); ????????self->debugIsMethodEntry?=?true;???//?Always true on startup ????} ? ????methodToCall?=?(const Method*)?-1; ? ????//取出第一條指令,并且執行 ????FINISH(0);??????????????????/*?fetch?and?execute first instruction?*/ ? //下面就是定義了每條指令的處理分支。 //NOP指令的處理程序:什么都不做,然后處理下條指令 HANDLE_OPCODE(OP_NOP) ????FINISH(1); OP_END ..... |
invoke-super指令實例分析
invoke-super這條指令的handler如下:
| 1 2 3 4 5 6 7 8 9 | #define GOTO_invoke(_target, _methodCallRange)????????????????????????????? \ ????do {??????????????????????????????????????????????????????????????????? \ ????????methodCallRange?=?_methodCallRange;???????????????????????????????? \ ????????goto _target;?????????????????????????????????????????????????????? \ ????}?while(false) ? HANDLE_OPCODE(OP_INVOKE_SUPER?/*vB, {vD, vE, vF, vG, vA}, meth@CCCC*/) ????GOTO_invoke(invokeSuper, false); OP_END |
invokeSuper這個標簽定義如下:
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 | //invoke-super位描述符如下:A|G|op BBBB F|E|D|C //methodCallRange depending on whether this?is?a?"/range"?instruction. GOTO_TARGET(invokeSuper,?bool?methodCallRange) ????{ ????????Method*?baseMethod; ????????u2 thisReg; ? ????????EXPORT_PC(); ????????//取出AG的值 ????????vsrc1?=?INST_AA(inst);? ????????//要調用的method索引 ????????ref?=?FETCH(1); ????????//要作為參數的寄存器的索引 ????????vdst?=?FETCH(2);???????? ? ????????//取出this寄存器的索引,比如thisReg為3的話,表示第三個寄存器,放的是this參數。 ????????if?(methodCallRange) { ????????????ILOGV("|invoke-super-range args=%d @0x%04x {regs=v%d-v%d}", ????????????????vsrc1, ref, vdst, vdst+vsrc1-1); ????????????thisReg?=?vdst; ????????}?else?{ ????????????ILOGV("|invoke-super args=%d @0x%04x {regs=0x%04x %x}", ????????????????vsrc1 >>?4, ref, vdst, vsrc1 &?0x0f); ????????????thisReg?=?vdst &?0x0f; ????????} ? ????????//檢查this 是否為空 ????????if?(!checkForNull((Object*) GET_REGISTER(thisReg))) ????????????GOTO_exceptionThrown(); ? ????????//解析要調用的方法 ????????baseMethod?=?dvmDexGetResolvedMethod(methodClassDex, ref); ????????if?(baseMethod?==?NULL) { ????????????baseMethod?=?dvmResolveMethod(curMethod->clazz, ref,METHOD_VIRTUAL); ????????????if?(baseMethod?==?NULL) { ????????????????ILOGV("+ unknown method or access denied"); ????????????????GOTO_exceptionThrown(); ????????????} ????????} ? ????????if?(baseMethod->methodIndex >=?curMethod->clazz->super->vtableCount) { ????????????/* ?????????????*?Method does?not?exist?in?the superclass.? Could happen?if ?????????????*?superclass gets updated. ?????????????*/ ????????????dvmThrowNoSuchMethodError(baseMethod->name); ????????????GOTO_exceptionThrown(); ????????} ????????methodToCall?=?curMethod->clazz->super->vtable[baseMethod->methodIndex]; ? #if 0 ????????if?(dvmIsAbstractMethod(methodToCall)) { ????????????dvmThrowAbstractMethodError("abstract method not implemented"); ????????????GOTO_exceptionThrown(); ????????} #else ????????assert(!dvmIsAbstractMethod(methodToCall) || ????????????methodToCall->nativeFunc !=?NULL); #endif ????????LOGVV("+++ base=%s.%s super-virtual=%s.%s", ????????????baseMethod->clazz->descriptor, baseMethod->name, ????????????methodToCall->clazz->descriptor, methodToCall->name); ????????assert(methodToCall !=?NULL); ????????//調用方法 ????????GOTO_invokeMethod(methodCallRange, methodToCall, vsrc1, vdst); ????} GOTO_TARGET_END |
解析完要調用的方法后,跳轉到invokeMethod結構來執行函數調用,invokeMethod為要調用的函數創建虛擬寄存器棧,新的寄存器棧和之前的棧是由重疊的。然后重新設置解釋器執行環境的參數,調用FINISH(0)執行函數
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 | GOTO_TARGET(invokeMethod,?bool?methodCallRange, const Method*?_methodToCall, u2 count, u2 regs) {?????? ????????//節選 ????????if?(!dvmIsNativeMethod(methodToCall)) { ????????????/* ?????????????*?"Call"?interpreted code.? Reposition the PC, update the ?????????????*?frame pointer?and?other local state,?and?continue. ?????????????*/ ????????????curMethod?=?methodToCall;?????//設置要調用的方法 ????????????self->interpSave.method?=?curMethod;? ????????????methodClassDex?=?curMethod->clazz->pDvmDex;?? ????????????pc?=?methodToCall->insns;?????//重置pc到要調用的方法 ????????????fp?=?newFp; ????????????self->interpSave.curFrame?=?fp; #ifdef EASY_GDB ????????????debugSaveArea?=?SAVEAREA_FROM_FP(newFp); #endif ????????????self->debugIsMethodEntry?=?true;????????//?profiling, debugging ????????????ILOGD("> pc <-- %s.%s %s", curMethod->clazz->descriptor, ????????????????curMethod->name, curMethod->shorty); ????????????DUMP_REGS(curMethod, fp, true);?????????//?show?input?args ????????????FINISH(0);??????????????????????????????//?jump to method start ????????} |
數字殼解釋器分析
數字殼解釋執行前的準備工作
進入解釋器的流程為onCreate->sub_D930->sub_3FE5C->sub_3FF5C。sub_3FF5C真正的解釋器入口,sub_D930和sub_3FE5C負責執行前的準備工作。這部分準備工作和dalvik解釋器的準備工作類似。
sub_D930
sub_D930分為兩部分,調用sub_66BD4之前為第一部分,之后為第二部分。這兩部分主要做的事情如下:
- 第一部分
- jni的一些初始化工作,FindClass,GetMethodID之類的工作
- 利用java.lang.Thread.getStackTrace獲取到調用當前方法的類的類名以及函數名
- 第二部分
- 調用sub_66BD4獲取一些全局信息,以及待解釋函數的信息
- 構建解釋器的虛擬寄存器棧
- 解析待解釋函數的簡單聲明,將函數參數放入虛擬寄存器
主要分析第二部分,首先引入一些數據結構,這類數據結構是動態分析出來的,有些字段的含義還不清楚標記為unkonw。
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 | struct GlobalInfo { ????DexInfo?*dex_info; ????vector<MethodInfo*> method_list; }; ? struct DexInfo { ????void?*dexBase;?//指向內存中dex的基址 ????int?unknow[12]; ????void?*dexHeader;?//指向dex header,用來解析dex的。 ????..... ????optable[]; }; ? struct MethodInfo { ????int?dex_type_id;?????//該方法在dex文件中DexTypeId列表的索引 ????int?dex_class_def_id;??//?該方法所在類在dex文件中DexClassDef列表索引 ????int?unknow; ????int?codeoff;??//該方法的DexCode結構距離dex頭的偏移。 }; ? struct StackInfo { ????JNIEnv?*?jni_env; ????int?registerSize;?//函數所需寄存器數量 ????DexCode?*dexCode;????//指向DexCode結構 ????int?unkonw;?????? ????void?*registerSpace;???//?放入原始參數, malloc(4?*?registerSize) ????void?*registerSpace2;??//?新建一個object引用上面的參數, malloc(4?*?registerSize) ????int?unkonw2;??????? ????char key;?????????????//解密指令的key? ????char origin_key;??????//計算解密指令key所需的一個數據 }; |
sub_66BD4返回的是指向GlobalInfo結構的指針。這個全局信息里面包含了dex有關的信息和待解釋函數的信息。有了這個信息就可以構建解釋器所需的虛擬寄存器棧,完成準備工作。
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 | int?__fastcall sub_D930(unsigned?int?a1, JNIEnv?*a2, _DWORD?*a3) { ??//節選 ??//獲取全局信息 ??global_info?=?sub_66BD4(v109, &v114); ??if?( v114 &?1?) ????j_j_j__ZdlPv(*(v47?+?5)); ??if?( global_info && (v52?=?*(global_info?+?4), (*(global_info?+?8)?-?v52) >>?2?> v4) ) ??{ ????v53?=?v52?+?4?*?v4; ????a2c?=?v3; ????method_info?=?*v53; ????dexInfo?=?*global_info; ????DexCode?=?(**global_info?+?*(*v53?+?12));???//?DexCode ????((*v3)->PushLocalFrame)(); ????//創建并初始化StackInfo結構 ????stackinfo?=?malloc_1(0x20u);????????????????//?創建StackInfo結構 ????v56?=?*DexCode; ????*stackinfo?=?a2c;???????????????????????????//?stackinfo->jni_env ????*(stackinfo?+?4)?=?v56;?????????????????????//?stackinfo->registerSize ????*(stackinfo?+?8)?=?DexCode;?????????????????//?stackinfo->dexCode ????*(stackinfo?+?12)?=?0; ????v57?=?4?*?v56; ????v58?=?malloc_0(4?*?v56);????????????????????//?創建虛擬寄存器棧 ????*(stackinfo?+?16)?=?v58; ????memset(v58, v57,?0); ????v59?=?malloc_0(v57);????????????????????????//?創建虛擬寄存器棧 ????*(stackinfo?+?20)?=?v59; ????memset(v59, v57,?0); ????*(stackinfo?+?29)?=?0; ????*(stackinfo?+?28)?=?0; ????v60?=?sub_3E268(); ????v61?=?DexCode; ????*(stackinfo?+?28)?=?*DexCode ^?*(v60?+?24) ^?*(method_info?+?4) ^?*method_info;??//?stackinfo->key ????*(stackinfo?+?29)?=?*(v60?+?24); } |
通過創建并初始化StackInfo結構,就完成了虛擬寄存器棧的創建,可以看到這里分配了兩個虛擬寄存器棧。后面調試發現主要使用的是第二個虛擬寄存器棧。猜測這兩個虛擬寄存器棧和dalvik擁有兩個虛擬寄存器棧一樣的原因一樣,是一個用來執行native方法,一個執行java方法。
?
創建完虛擬寄存器棧的下一步工作就是放入函數參數。
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 | int?__fastcall sub_D930(unsigned?int?a1, JNIEnv?*a2, _DWORD?*a3) { ????//節選 ????v107?=?stackinfo; ????proto?=?(*dexInfo ???????????+?*(*dexInfo ?????????????+?*(dexInfo[4]?+?60) ?????????????+?4?*?*(*dexInfo?+?*(dexInfo[4]?+?76)?+?12?*?*(*dexInfo?+?*(dexInfo[4]?+?92)?+?8?*?*(method_info?+?4)?+?2)))); ????do ??????v63?=?proto++;????????????????????????????//?方法的簡單聲明: VL ????while?(?*v63 <?0?); ????v64?=?j_j_strlen(proto); ????v99?=?v64; ????v65?=?*v61?-?v61[1];????????????????????????//?函數內部使用寄存器個數:DexCode.registerSize?-?DexCode.insSize ????v105?=?v65?-?1; ????v66?=?v95?+?1; ????v67?=?v107; ????if?( !*(method_info?+?8) ) ????{ ??????v105?=?v65; ??????v68?=?4?*?v65; ??????v69?=?*(*(v107?+?20)?+?v68);??????????????//?函數的第一個參數 thisReg ??????v96?=?*v95; ??????//節選 ??????v111?=?v68; ??????v73?=?(*(v67?+?20)?+?v68); ??????v74?=?*v73; ??????if?(?*v73 && !*(v74?+?4) ) ??????{ ????????*(v74?+?4)?=?1; ????????v78?=?v111; ????????v77?=?v96; ????????**(*(v67?+?20)?+?v111)?=?v96; ??????} ??????else ??????{ ????????v75?=?v64; ????????v76?=?malloc_1(8u);?????????????????????//?重新創建了一個MainActivity?object,并放入第二個register space位置。 ????????*v76?=?0; ????????*(v76?+?4)?=?0; ????????v77?=?v96; ????????*v76?=?v96; ????????*(v76?+?4)?=?1; ????????*v73?=?v76; ????????v67?=?v107; ????????v64?=?v75; ????????v78?=?v111; ??????} ??????*(*(v67?+?16)?+?v78)?=?v77;???????????????//?直接將Activity實例,放入第一個register space的相應位置、 ????} ????v108?=?v67; ????if?( v64 >=?2?)?????????????????????????????//?處理除了this之外的其他參數。 ????{ ??????v79?=?1; ??????v112?=?1; ??????do ??????{ ????????v80?=?v63[v79++?+?1]; ????????if?( v80 >?89?) ????????{ ??????????if?( v80?==?90?) ??????????{ ????????????*(*(v67?+?16)?+?4?*?(v112++?+?v105))?=?*v66; ????????????++v66; ??????????} ????????} ????????else ????????{ ??????????v81?=?v80?-?66; ??????????if?( v81 <=?0x11?) ????????????JUMPOUT(__CS__,?*(&off_E308?+?v81)?+?58120);//?調用jni->newGlobalRef,構造將傳遞給onCreate的參數 ????????} ??????} ??????while?( v79 < v64 ); ????} ? } |
這里是從dex文件中提取出函數的簡要聲明,onCreate的簡單聲明為VL,然后根據聲明,放入參數。首先放入this參數,然后根據后續參數的類型,將參數放入相應位置。和dalvik虛擬機流程類似。
sub_3FE5C
sub_D930完成了虛擬寄存器棧的構建并放入參數后,調用了sub_3FE5C。sub_3FE5C主要負責初始化解釋器的一些狀態,主要是InterpState結構。
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 | struct InterpState { ????void?*pc; ????char key; ????DexInfo?*dex_info; }; ? LOAD:0003FF18?????????????????BL????????????? malloc_1????????????????? ; 創建InterpState結構 LOAD:0003FF1C?????????????????PUSH??????????? {R4} LOAD:0003FF1E?????????????????POP???????????? {R1} LOAD:0003FF20?????????????????PUSH??????????? {R0} LOAD:0003FF22?????????????????POP???????????? {R4} LOAD:0003FF24?????????????????STR?????????????R4, [SP,#0x74+var_24] LOAD:0003FF26?????????????????LDRB??????????? R0, [R1,#0x1C] LOAD:0003FF28?????????????????ADDS??????????? R6,?#0x10 LOAD:0003FF2A?????????????????STR?????????????R6, [SP,#0x74+var_2C] LOAD:0003FF2C?????????????????STR?????????????R6, [R4]????????????????? ; InterpState->pc LOAD:0003FF2E?????????????????STRB??????????? R0, [R4,#4]?????????????? ; InterpState->key LOAD:0003FF30?????????????????STR?????????????R5, [R4,#8]?????????????? ; InterpState->dex_info |
數字殼解釋器模型
sub_D930和sub_3FE5C完成了解釋器的準備工作。sub_3FF5C負責解釋執行。數字殼的解釋器模型就是上面提到的那種最直觀的模型,循環取指令,然后判斷指令類型,去相應分支執行,執行完成后,再返回到switch執行下條指令。
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 | //R4寄存器存放的是InterpState結構 LOAD:0003FF5C?????????????????LDRB??????????? R1, [R4,#4]?? ; 取出key,InterpState->key LOAD:0003FF5E?????????????????MOVS??????????? R6,?#0x31 ; '1' LOAD:0003FF60?????????????????PUSH??????????? {R1} LOAD:0003FF62?????????????????POP???????????? {R2} LOAD:0003FF64?????????????????ANDS??????????? R2, R6 LOAD:0003FF66?????????????????LDR???????????? R0, loc_402C8 LOAD:0003FF68?????????????????PUSH??????????? {R0} LOAD:0003FF6A?????????????????POP???????????? {R3} LOAD:0003FF6C?????????????????BICS??????????? R3, R1 LOAD:0003FF6E?????????????????ORRS??????????? R3, R2 LOAD:0003FF70?????????????????MOVS??????????? R2,?#0xEF00 LOAD:0003FF74?????????????????LSLS??????????? R1, R1,?#8 LOAD:0003FF76?????????????????ANDS??????????? R2, R1 LOAD:0003FF78?????????????????BICS??????????? R0, R1 LOAD:0003FF7A?????????????????ORRS??????????? R0, R2 LOAD:0003FF7C?????????????????EORS??????????? R0, R3 LOAD:0003FF7E?????????????????LDR???????????? R1, [R4]??? ; 取出pc, Interpstate->pc LOAD:0003FF80?????????????????LDRH??????????? R4, [R1]??? ; 取出指令 ? //執行完invoke-super后 LOAD:000463EC?loc_463EC????????????????????????????? LOAD:000463EC???????????????????????????????????? LOAD:000463EC?????????????????LDR???????????? R0, [R4] LOAD:000463EE?????????????????ADDS??????????? R0,?#6? LOAD:000463F0?????????????????STR?????????????R0, [R4]? ; InterpState->pc?=?InterpState->pc?+?6 LOAD:000463F2?????????????????BL????????????? loc_3FF5C ; 跳轉到解釋器開頭,執行下一條指令。 |
數字殼invoke-super指令分析
invoke-super指令包含了函數調用的過程,可以看到dalvik解釋器虛擬寄存器棧是比較復雜的,設計很多數據結構。目前為止數字殼的相關結構中,并未發現類似的結構。所以想分析下數字殼的解釋器是如何處理函數調用的。調試分析后發現,數字殼的解釋器其實并未實現真正的函數調用,它是通過調用jni中的CallVoidMethod方法來實現函數調用。
?
處理invoke-super的handler為sub_4878C,流程如下:
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 | int??sub_4878C(int?a1,?bool?methodCallRange, JNIEnv?*a3, DexInfo?*a4, StackInfo?*a5, InterpState?*a6,?int?opNumber, void?*a8) { ??//節選 ??a1a?=?v8; ??dex_base?=?*v8; ??dex_header?=?*(a1a?+?16); ??key?=?*(a6?+?4);??????????????????????????????//?取出key ??v11?=?(key <<?8) | key; ??DexMethodId?=?dex_base?+?*(dex_header?+?92)?+?8?*?(*(*a6?+?2) ^ v11); ??v13?=?dex_base?+?*(dex_header?+?60); ??methodName?=?(dex_base?+?*(v13?+?4?*?*(DexMethodId?+?4)));???//獲取函數名 ??do ????v15?=?*methodName++?<?0; ??while?( v15 ); ??DexMethodId2?=?(dex_base?+?*(dex_header?+?92)?+?8?*?(*(*a6?+?2) ^ ((key <<?8) | key))); ??proto?=?(dex_base ?????????+?*(v13 ???????????+?4?*?*(dex_base?+?*(dex_header?+?68)?+?4?*?*(dex_base?+?*(dex_header?+?76)?+?12?*?*(DexMethodId?+?2)?+?4)))); ??do ????v17?=?*proto++;????????????????????????//獲取函數簡單聲明 ??while?( v17 <?0?); ??v18?=?*(*a6?+?4); ??if?( v52?==?1?) ????thisReg?=?v18 ^ ((key <<?8) | key);?????//this參數 ??else ????thisReg?=?(v18 ^ key) &?0xF; ??v20?=?0; ??if?( v51 ) ??{ ????v21?=?*(*(a5?+?20)?+?4?*?thisReg); ????if?( !v21 || (v20?=?*v21)?==?0?) ????{ ??????v30?=?((*v53)->FindClass)(v53,?"java/lang/NullPointerException"); ??????..... ????} ??} ??arg0?=?v20;???????????????????????????????????//?this?object ??v62?=?v53; ??v63?=?0; ??if?( ((*v53)->ExceptionCheck)(v53) ) ????v63?=?0; ??if?( v51 !=?2?&& v51 !=?4?) ??{ ????sub_610BC(a1a, v53,?*DexMethodId2);?????????//?獲取到classid所指定的類 ????return?sub_48AE2(v32, v33, v34, v35, a5);???//?利用jni->CallVoidMethod調用父類方法 ??} ? ? //48EAA: 構建invoke-super除this外的參數 //49770:調用jni->CallVoidMethod方法 |
數字殼比較復雜,通過分析可以學到很多東西,比如各種反調試,linker,適配很多版本的動態加載,解釋器等,感謝數字公司提供的免費加固。筆記如果有錯誤,歡迎指正,也希望大佬們可以交流下其他vmp的實現思路。
https://bbs.pediy.com/thread-226214.htm
總結
以上是生活随笔為你收集整理的Dalvik解释器源码到VMP分析的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 【译】Consortium Chain
- 下一篇: [原创]基于frida的脱壳工具