jvm开发笔记
筆者最近對(duì)java虛擬機(jī)產(chǎn)生了濃厚的興趣, 想了解下最簡(jiǎn)單的jvm是如何寫(xiě)出來(lái)的,于是看起了《java虛擬機(jī)規(guī)范》,這個(gè)規(guī)范如同intel開(kāi)發(fā)手冊(cè)一樣,是每個(gè)jvm開(kāi)發(fā)人員必須掌握的。 要想翻譯執(zhí)行java byte code, 首先得從java class文件中把Code屬性解析出來(lái)才行。 在筆者看來(lái), java的class文件結(jié)構(gòu)著實(shí)比elf文件結(jié)構(gòu)復(fù)雜很多,不過(guò)在復(fù)雜的結(jié)構(gòu), 只要耐心對(duì)照著手冊(cè)中的結(jié)構(gòu)一一解析即可, 經(jīng)過(guò)幾天的努力, 用c實(shí)現(xiàn)了一個(gè)class文件解析器,目前它只能解析手冊(cè)中規(guī)定的jvm最基本的要解析出來(lái)的一些屬性:Code, StackMapTable, LineNumberTable。當(dāng)然, 隨著開(kāi)發(fā)的深入, 它會(huì)不斷的健壯起來(lái)。
下面說(shuō)說(shuō)我在解析java class文件格式中碰到的幾個(gè)問(wèn)題, 幫助后面也要自己動(dòng)手寫(xiě)解析器的朋友少走一點(diǎn)彎路:
1、為了提高解析性能, 使用了mmap講class文件全部映射到內(nèi)存中, 而不是每次解析都要使用read讀磁盤(pán)文件。
int mmap_class_file(const char *class_file) {struct stat f_stat;class_fd = open(class_file, O_RDONLY);if (class_fd == -1) {perror("open");return -1;}if (stat(class_file, &f_stat) == -1) {perror("stat");close(class_fd);return -1;}class_file_len = f_stat.st_size;printf("%s file len: %d\n", class_file, class_file_len);class_start_mem = mmap(NULL, class_file_len, PROT_READ, MAP_PRIVATE, class_fd, 0);if (!class_start_mem) {perror("mmap");close(class_fd);return -1;}printf("mmap %s at %p\n", class_file, class_start_mem);return 0; }2、java class使用的是big-endian字節(jié)序,x86使用的litte-endian字節(jié)序, 所以要轉(zhuǎn)換一下,就是移位操作而已。
?
#define CLASS_READ_U4(s, p) \do { \s = (((p >> 24) & 0x000000ff) | \((p >> 8) & 0x0000ff00) | \((p << 24) & 0xff000000) | \((p << 8) & 0x00ff0000)); \} while (0);#define CLASS_READ_U2(s, p) \do { \s = (((p >> 8) & 0x00ff) | \((p << 8) & 0xff00)); \} while (0);#define CLASS_READ_U1(s, p) \do { \s = p; \} while (0);?
?
例如讀一個(gè)4字節(jié)內(nèi)容:
?
u4 class_magic;/* read class magic number. */CLASS_READ_U4(class_magic, (*(u4 *)p_mem))p_mem = 4;printf("magic: 0x%x\n", class_magic);
下面是全部的源碼:
?
jvm.h#ifndef JVM_H #define JVM_H#define JVM_CLASS_MAGIC 0xcafebabe#define CLASS_READ_U4(s, p) \do { \s = (((p >> 24) & 0x000000ff) | \((p >> 8) & 0x0000ff00) | \((p << 24) & 0xff000000) | \((p << 8) & 0x00ff0000)); \} while (0);#define CLASS_READ_U2(s, p) \do { \s = (((p >> 8) & 0x00ff) | \((p << 8) & 0xff00)); \} while (0);#define CLASS_READ_U1(s, p) \do { \s = p; \} while (0);#define CLASS_READ_STRING(s, p, len) \do { \memcpy(s, p, len); \} while (0);typedef unsigned int u4; typedef unsigned short u2; typedef unsigned char u1;#define CONSTANT_Class 7 #define CONSTANT_Fieldref 9 #define CONSTANT_Methodref 10 #define CONSTANT_InterfaceMethodref 11 #define CONSTANT_String 8 #define CONSTANT_Integer 3 #define CONSTANT_Float 4 #define CONSTANT_Long 5 #define CONSTANT_Double 6 #define CONSTANT_NameAndType 12 #define CONSTANT_Utf8 1 #define CONSTANT_MethodHandle 15 #define CONSTANT_MethodType 16 #define CONSTANT_InvokeDynamic 18#define ACC_PUBLIC 0x0001 #define ACC_FINAL 0x0010 #define ACC_SUPER 0x0020 #define ACC_INTERFACE 0x0200 #define ACC_ABSTRACT 0X0400 #define ACC_SYNTHETIC 0x1000 #define ACC_ANNOTATION 0x2000 #define ACC_ENUM 0x4000#define METHOD_ACC_PUBLIC 0x0001 #define METHOD_ACC_PRIVATE 0x0002 #define METHOD_ACC_PROTECTED 0x0004 #define METHOD_ACC_STATIC 0x0008 #define METHOD_ACC_FINAL 0x0010 #define METHOD_ACC_SYNCHRONIED 0x0020 #define METHOD_ACC_BRIDGE 0x0040 #define METHOD_ACC_VARARGS 0x0080 #define METHOD_ACC_NATIVE 0x0100 #define METHOD_ACC_ABSTRACT 0x0400 #define METHOD_ACC_STRICT 0x0800 #define METHOD_ACC_SYNTHETIC 0x1000#define ITEM_Top 0 #define ITEM_Integer 1 #define ITEM_Float 2 #define ITEM_Double 3 #define ITEM_Long 4 #define ITEM_Null 5 #define ITEM_UninitializedThis 6 #define ITEM_Object 7 #define ITEM_Uninitialized 8struct constant_info_st {u2 index;u1 *base; }__attribute__ ((packed));struct cp_info {u1 tag;u1 info[]; }__attribute__ ((packed));struct CONSTANT_Class_info {//u1 tag;u2 name_index; }__attribute__ ((packed));struct CONSTANT_Fieldref_info {//u1 tag;u2 class_index;u2 name_and_type_index; }__attribute__ ((packed));struct CONSTANT_Methodref_info {//u1 tag;u2 class_index;u2 name_and_type_index; }__attribute__ ((packed));struct CONSTANT_InterfaceMethodref_info {//u1 tag;u2 class_index;u2 name_and_type_inex; }__attribute__ ((packed));struct CONSTANT_String_info {//u1 tag;u2 string_index; }__attribute__ ((packed));struct CONSTANT_Integer_info {//u1 tag;u4 bytes; }__attribute__ ((packed));struct CONSTANT_Float_info {//u1 tag;u4 bytes; }__attribute__ ((packed));struct CONSTANT_Long_info {//u1 tag;u4 high_bytes;u4 low_bytes; }__attribute__ ((packed));struct CONSTANT_Double_info {//u1 tag;u4 high_bytes;u4 low_bytes; }__attribute__ ((packed));struct CONSTANT_NameAndType_info {//u1 tag;u2 name_index;u2 descriptor_index; }__attribute__ ((packed));struct CONSTANT_Utf8_info {//u1 tag;u2 length;u1 bytes[]; }__attribute__ ((packed));struct CONSTANT_MethodHandle_info {//u1 tag;u1 reference_kind;u2 reference_index; }__attribute__ ((packed));struct CONSTANT_MethodType_info {//u1 tag;u2 descriptor_index; }__attribute__ ((packed));struct CONSTANT_InvokeDynamic_info {//u1 tag;u2 bootstrap_method_attr_index;u2 name_and_type_index; }__attribute__ ((packed));#endifclassreader.c:/** classreader.c - jvm class file parser.** (c) wzt 2012 http://www.cloud-sec.org**/#include #include #include #include #include #include #include #include #include #include "jvm.h"static int class_fd; static int class_file_len; static void *class_start_mem; static char *p_mem; static struct constant_info_st *constant_info;int mmap_class_file(const char *class_file) {struct stat f_stat;class_fd = open(class_file, O_RDONLY);if (class_fd == -1) {perror("open");return -1;}if (stat(class_file, &f_stat) == -1) {perror("stat");close(class_fd);return -1;}class_file_len = f_stat.st_size;printf("%s file len: %d\n", class_file, class_file_len);class_start_mem = mmap(NULL, class_file_len, PROT_READ, MAP_PRIVATE, class_fd, 0);if (!class_start_mem) {perror("mmap");close(class_fd);return -1;}printf("mmap %s at %p\n", class_file, class_start_mem);return 0; }int mmap_exit(void) {if (munmap(class_start_mem, class_file_len) == -1) {perror("munmap");return -1;}close(class_fd);return 0; }int parse_class_magic(void) {u4 class_magic;/* read class magic number. */CLASS_READ_U4(class_magic, (*(u4 *)p_mem))p_mem = 4;printf("magic: 0x%x\n", class_magic);if (class_magic != JVM_CLASS_MAGIC) {printf("jvm class magic not match.\n");return -1;}printf("jvm class magic match: 0x%x\n", class_magic);return 0; }int parse_class_version(void) {u2 minor_version, major_version;u2 constant_pool_count;/* read class minor_version. */CLASS_READ_U2(minor_version, (*(u2 *)p_mem))p_mem = 2;printf("jvm class minor_version: %d\n", minor_version);/* read class major_version. */CLASS_READ_U2(major_version, (*(u2 *)p_mem))p_mem = 2;printf("jvm class major_version: %d\n", major_version);return 0; }int parse_class_constant(void) {u2 constant_pool_count;u1 constant_tag;u2 idx;printf("\n-----------parse contant pool count----------------------:\n\n");/* read constant_pool_count */CLASS_READ_U2(constant_pool_count, (*(u2 *)p_mem))p_mem = 2;printf("jvm constant_pool_count: %d\n", constant_pool_count);constant_info = (struct constant_info_st *)malloc(sizeof(struct constant_info_st) *constant_pool_count);if (!constant_info) {printf("Malloc failed.\n");return -1;}for (idx = 1; idx <= constant_pool_count - 1; idx ) {CLASS_READ_U1(constant_tag, (*(u1 *)p_mem))p_mem = 1;printf("- idx: - constant tag: %d\t", idx, (int)constant_tag);switch (constant_tag) {case CONSTANT_Fieldref:case CONSTANT_Methodref:case CONSTANT_InterfaceMethodref:{struct CONSTANT_Methodref_info methodref_info;CLASS_READ_U2(methodref_info.class_index, (*(u2 *)p_mem));p_mem = 2;assert(methodref_info.class_index > 0 &&methodref_info.class_index < constant_pool_count);CLASS_READ_U2(methodref_info.name_and_type_index, (*(u2 *)p_mem));p_mem = 2;assert(methodref_info.class_index > 0 &&methodref_info.class_index < constant_pool_count);printf("class_index: %d, name_and_type_index: %d\n",methodref_info.class_index,methodref_info.name_and_type_index);break;}case CONSTANT_Class:{struct CONSTANT_Class_info class_info;CLASS_READ_U2(class_info.name_index, (*(u2 *)p_mem));p_mem = 2;assert(class_info.name_index > 0 &&class_info.name_index < constant_pool_count);printf("name_index: %d\n", class_info.name_index);break;}case CONSTANT_String:{struct CONSTANT_String_info string_info;CLASS_READ_U2(string_info.string_index, (*(u2 *)p_mem));p_mem = 2;assert(string_info.string_index > 0 &&string_info.string_index < constant_pool_count);printf("string index: %d\n", string_info.string_index);break;}case CONSTANT_Long:{struct CONSTANT_Long_info long_info;CLASS_READ_U2(long_info.high_bytes, (*(u2 *)p_mem));p_mem = 2;CLASS_READ_U2(long_info.low_bytes, (*(u2 *)p_mem));p_mem = 2;printf("high bytes: %d, low bytes: %d\n",long_info.high_bytes, long_info.low_bytes);break;}case CONSTANT_Integer:{struct CONSTANT_Integer_info integer_info;CLASS_READ_U4(integer_info.bytes, (*(u4 *)p_mem));p_mem = 4;printf("bytes: %d\n", integer_info.bytes);break;}case CONSTANT_Float:{struct CONSTANT_Float_info float_info;CLASS_READ_U4(float_info.bytes, (*(u4 *)p_mem));p_mem = 4;printf("bytes: %d\n", float_info.bytes);break;}case CONSTANT_Double:{struct CONSTANT_Double_info double_info;CLASS_READ_U4(double_info.high_bytes, (*(u4 *)p_mem));p_mem = 4;CLASS_READ_U4(double_info.low_bytes, (*(u4 *)p_mem));p_mem = 4;printf("high_bytes: %d, low_bytes: %d\n",double_info.high_bytes, double_info.low_bytes);break;}case CONSTANT_NameAndType:{struct CONSTANT_NameAndType_info name_type_info;CLASS_READ_U2(name_type_info.name_index, (*(u2 *)p_mem));p_mem = 2;CLASS_READ_U2(name_type_info.descriptor_index, (*(u2 *)p_mem));p_mem = 2;printf("name_index: %d, descriptor_index: %d\n",name_type_info.name_index, name_type_info.descriptor_index);break;}case CONSTANT_MethodHandle:{struct CONSTANT_MethodHandle_info method_handle_info;CLASS_READ_U1(method_handle_info.reference_kind, (*(u1 *)p_mem));p_mem = 1;CLASS_READ_U2(method_handle_info.reference_index, (*(u2 *)p_mem));p_mem = 2;printf("reference_kind: %d, reference_index: %d\n",method_handle_info.reference_kind,method_handle_info.reference_index);break;}case CONSTANT_MethodType:{struct CONSTANT_MethodType_info method_type_info;CLASS_READ_U2(method_type_info.descriptor_index, (*(u2 *)p_mem));p_mem = 2;printf("descriptor_index %d\n", method_type_info.descriptor_index);break;}case CONSTANT_InvokeDynamic:{struct CONSTANT_InvokeDynamic_info invoke_dyc_info;CLASS_READ_U2(invoke_dyc_info.bootstrap_method_attr_index, (*(u2 *)p_mem));p_mem = 2;CLASS_READ_U2(invoke_dyc_info.name_and_type_index, (*(u2 *)p_mem));p_mem = 2;printf("bootstrap_method_attr_index: %d, name_and_type_index: %d\n",invoke_dyc_info.bootstrap_method_attr_index,invoke_dyc_info.name_and_type_index);break;}case CONSTANT_Utf8:{u2 len;char *buf;CLASS_READ_U2(len, (*(u2 *)p_mem));p_mem = 2;buf = malloc(len 1);buf[len] = '\0';assert(buf != NULL);memcpy(buf, p_mem, len);printf("len: %d\t%s\n", len, buf);p_mem = len;constant_info[idx].index = idx;constant_info[idx].base = buf;break;}default:;}}printf("\n"); /*for (idx = 1; idx <= constant_pool_count - 1; idx )printf("%d: %s\n", constant_info[idx].index, constant_info[idx].base); */return 0;out:mmap_exit();return -1; }int parse_class_access_flag(void) {u2 access_flag;/* read class access flag. */CLASS_READ_U2(access_flag, (*(u2 *)p_mem))p_mem = 2;printf("access_flag: 0x%x\n", access_flag);return 0; } int parse_class_this_super(void) {u2 this_class;u2 super_class;CLASS_READ_U2(this_class, (*(u2 *)p_mem))p_mem = 2;CLASS_READ_U2(super_class, (*(u2 *)p_mem))p_mem = 2;printf("this_class: %d\tsuper_class: %d\n\n", this_class, super_class);return 0; }int parse_class_interface(void) {u2 interfaces_count;u2 idx, index;CLASS_READ_U2(interfaces_count, (*(u2 *)p_mem))p_mem = 2;printf("interfaces_count: %d\n", interfaces_count);for (idx = 0; idx < interfaces_count; idx ) {CLASS_READ_U2(index, (*(u2 *)p_mem));p_mem = 2;printf("index: %d\n", index);}return 0; }int parse_class_filed(void) {u2 fileds_count;u2 idx;CLASS_READ_U2(fileds_count, (*(u2 *)p_mem))p_mem = 2;printf("filed_count: %d\n", fileds_count);return 0; } int __parse_exception_table(int len) {u2 start_pc, end_pc;u2 handler_pc, catch_type;u2 idx;for (idx = 0; idx < len; idx ) {CLASS_READ_U2(start_pc, (*(u2 *)p_mem))p_mem = 2;printf("start_pc: %d\n", start_pc);CLASS_READ_U2(end_pc, (*(u2 *)p_mem))p_mem = 2;printf("end_pc: %d\n", end_pc);CLASS_READ_U2(handler_pc, (*(u2 *)p_mem))p_mem = 2;printf("handler_pc: %d\n", handler_pc);CLASS_READ_U2(catch_type, (*(u2 *)p_mem))p_mem = 2;printf("catch_type: %d\n", catch_type);}return 0; }int __parse_line_number_table(void) {u4 attribute_length;u2 line_number_table_length;u2 start_pc, line_number;u2 idx;CLASS_READ_U4(attribute_length, (*(u4 *)p_mem))p_mem = 4;printf("\t\tattribute_length: %d\n", attribute_length);CLASS_READ_U2(line_number_table_length, (*(u2 *)p_mem))p_mem = 2;printf("\t\tline_number_table_length: %d\n", line_number_table_length);for (idx = 0; idx < line_number_table_length; idx ) {CLASS_READ_U2(start_pc, (*(u2 *)p_mem))p_mem = 2;printf("\t\tstart_pc: %d\n", start_pc);CLASS_READ_U2(line_number, (*(u2 *)p_mem))p_mem = 2;printf("\t\tline_number: %d\n", line_number);}return 0; }int __parse_verification_type_info(u1 number) {u1 idx, tag;for (idx = 0; idx < number; idx ) {CLASS_READ_U1(tag, (*(u1 *)p_mem))p_mem = 1;printf("\t\ttag: %d\n", tag);switch (tag) {case ITEM_Top:printf("\t\tITEM_Top.\n");break;case ITEM_Integer:printf("\t\tITEM_Integer.\n");break;case ITEM_Float:printf("\t\tITEM_float.\n");break;case ITEM_Double:printf("\t\tITEM_Double.\n");break;case ITEM_Long:printf("\t\tITEM_Long.\n");break;case ITEM_Null:printf("\t\tITEM_Long.\n");break;case ITEM_UninitializedThis:printf("\t\tITEM_UninitializedThis.\n");break;case ITEM_Object:{u2 cpool_index;printf("\t\tITEM_Object.\n");CLASS_READ_U2(cpool_index, (*(u2 *)p_mem))p_mem = 2;printf("\t\tcpool_index: %d\n", cpool_index);break;}case ITEM_Uninitialized:{u2 offset;printf("\t\tITEM_Uninitialized.\n");CLASS_READ_U2(offset, (*(u2 *)p_mem))p_mem = 2;printf("\t\toffset: %d\n", offset);break;}default:return -1;}}return 0; }int __parse_stack_map_frame(u2 number) {u1 frame_type;u1 offset_delta;u2 idx;u1 stack_num;u1 locals_num;u1 local_idx;for (idx = 0; idx < number; idx ) {CLASS_READ_U1(frame_type, (*(u1 *)p_mem))p_mem = 1;printf("\t\tframe_type: %d\n", frame_type);if (frame_type >= 0 && frame_type <= 63) {offset_delta = frame_type;printf("\t\tsame_frame\toffset_delta: %d\n", offset_delta);}if (frame_type >= 64 && frame_type <= 127) {offset_delta = frame_type - 64;stack_num = 1;printf("\t\tsame_locals_l_stack_item_frame\toffset_delta: %d\n",offset_delta);__parse_verification_type_info(stack_num);}if (frame_type == 247) {stack_num = 1;CLASS_READ_U2(offset_delta, (*(u2 *)p_mem))p_mem = 2;printf("\t\tsame_locals_l_stack_item_frame_extendedn\toffset_delta: %d\n",offset_delta);__parse_verification_type_info(stack_num);}if (frame_type >= 248 && frame_type <= 250) {CLASS_READ_U2(offset_delta, (*(u2 *)p_mem))p_mem = 2;printf("\t\tsame_locals_l_stack_item_frame_extended\toffset_delta: %d\n",offset_delta);}if (frame_type == 251) {CLASS_READ_U2(offset_delta, (*(u2 *)p_mem))p_mem = 2;printf("\t\tsame_frame_extended\toffset_delta: %d\n", offset_delta);}if (frame_type >= 252 && frame_type <= 254) {CLASS_READ_U2(offset_delta, (*(u2 *)p_mem))p_mem = 2;printf("\t\tappend_frame\toffset_delta: %d\n", offset_delta);locals_num = frame_type - 251;printf("\t\tlocals_num: %d\n", locals_num);__parse_verification_type_info(locals_num);}} } int __parse_stack_map_table(void) {u4 attribute_length;u2 number_of_entries;u2 idx;CLASS_READ_U4(attribute_length, (*(u4 *)p_mem))p_mem = 4;printf("\t\tattribute_length: %d\n", attribute_length);CLASS_READ_U2(number_of_entries, (*(u2 *)p_mem))p_mem = 2;printf("\t\tnumber_of_entries: %d\n", number_of_entries);__parse_stack_map_frame(number_of_entries);return 0; } /* attribute_name_index has been parsed before. */ int parse_code_attribute(void) {u2 attribute_name_index;u4 attribute_length;u2 max_stack;u2 max_locals;u4 code_length;u1 *code;u2 exception_table_length;u2 attributes_count;u2 idx;CLASS_READ_U4(attribute_length, (*(u4 *)p_mem))p_mem = 4;printf("\tattribute_length: %d\n", attribute_length);CLASS_READ_U2(max_stack, (*(u2 *)p_mem))p_mem = 2;printf("\tmax_stack: %d\n", max_stack);CLASS_READ_U2(max_locals, (*(u2 *)p_mem))p_mem = 2;printf("\tmax_locals: %d\n", max_locals);CLASS_READ_U4(code_length, (*(u4 *)p_mem))p_mem = 4;printf("\tcode_length: %d\n", code_length);code = (u1 *)malloc(code_length 1);if (!code) {printf("Malloc failed.\n");return -1;}memcpy(code, p_mem, code_length);code[code_length] = '\0';p_mem = code_length;CLASS_READ_U2(exception_table_length, (*(u2 *)p_mem))p_mem = 2;printf("\texception_table_length: %d\n", exception_table_length);__parse_exception_table(exception_table_length);CLASS_READ_U2(attributes_count, (*(u2 *)p_mem))p_mem = 2;printf("\tattributes_count: %d\n", attributes_count);/* parse attributes */for (idx = 0; idx < attributes_count; idx ) {CLASS_READ_U2(attribute_name_index, (*(u2 *)p_mem))p_mem = 2;printf("\tidx: %d attribute_name_index: %d", idx 1, attribute_name_index);if (!strcmp(constant_info[attribute_name_index].base, "LineNumberTable")) {printf("\n\tparse LineNumberTable:\n");__parse_line_number_table();}if (!strcmp(constant_info[attribute_name_index].base, "StackMapTable")) {printf("\n\tparse StackMapTable:\n");__parse_stack_map_table();}if (!strcmp(constant_info[attribute_name_index].base, "LocalVariableTable")) {;}if (!strcmp(constant_info[attribute_name_index].base, "LocalVariableTypeTable")) {;}if (!strcmp(constant_info[attribute_name_index].base, "StackMapTable")) {;}}return 0; }int parse_class_method(void) {u2 method_count;u2 access_flags, name_index;u2 descriptor_index, attributes_count;u2 idx;printf("\n---------------parse class method-------------------------:\n\n");CLASS_READ_U2(method_count, (*(u2 *)p_mem))p_mem = 2;printf("method_count: %d\n", method_count);for (idx = 0; idx < method_count; idx ) {CLASS_READ_U2(access_flags, (*(u2 *)p_mem))p_mem = 2;printf("access_flags: 0x%x\n", access_flags);CLASS_READ_U2(name_index, (*(u2 *)p_mem))p_mem = 2;printf("name_index: %d\n", name_index);CLASS_READ_U2(descriptor_index, (*(u2 *)p_mem))p_mem = 2;printf("descriptor_index: %d\n", descriptor_index);CLASS_READ_U2(attributes_count, (*(u2 *)p_mem))p_mem = 2;printf("attributes_count: %d\n\n", attributes_count);/* parse attributes */CLASS_READ_U2(name_index, (*(u2 *)p_mem))p_mem = 2;printf("attritbutes name_index: %d\n", name_index);if (!strcmp(constant_info[name_index].base, "Code")) {printf("parse code attribute:\n");parse_code_attribute();}if (!strcmp(constant_info[name_index].base, "Exceptions")) {;}if (!strcmp(constant_info[name_index].base, "Signature")) {;}}return 0; }int jvm_parse_class_file(const char *class_file) {assert(class_file != NULL);if (mmap_class_file(class_file) == -1)return -1;p_mem = class_start_mem;if (parse_class_magic() == -1)goto out;if (parse_class_version() == -1)goto out;if (parse_class_constant() == -1)goto out;if (parse_class_access_flag() == -1)goto out;if (parse_class_this_super() == -1)goto out;if (parse_class_interface() == -1)goto out;if (parse_class_filed() == -1)goto out;if (parse_class_method() == -1)goto out;mmap_exit();return 0; out:mmap_exit();return -1; }void jvm_usage(const char *proc) {fprintf(stdout, "usage: %s \n", proc); }int main(int argc, char **argv) {if (argc == 1) {jvm_usage(argv[0]);return 0;}jvm_parse_class_file(argv[1]);return 0; }.h>.h>.h>.h>.h>.h>?
?
root@localhost.localdomain # gcc -o classreader classreader.c -w root@localhost.localdomain # ./classreader test.class test.class file len: 462 mmap test.class at 0x2b0b78fa5000 magic: 0xcafebabe jvm class magic match: 0xcafebabe jvm class minor_version: 0 jvm class major_version: 50-----------parse contant pool count----------------------:jvm constant_pool_count: 30 - idx: 1 constant tag: 10 class_index: 6, name_and_type_index: 16 - idx: 2 constant tag: 9 class_index: 17, name_and_type_index: 18 - idx: 3 constant tag: 8 string index: 19 - idx: 4 constant tag: 10 class_index: 20, name_and_type_index: 21 - idx: 5 constant tag: 7 name_index: 22 - idx: 6 constant tag: 7 name_index: 23 - idx: 7 constant tag: 1 len: 6 - idx: 8 constant tag: 1 len: 3 ()V - idx: 9 constant tag: 1 len: 4 Code - idx: 10 constant tag: 1 len: 15 LineNumberTable - idx: 11 constant tag: 1 len: 4 main - idx: 12 constant tag: 1 len: 22 ([Ljava/lang/String;)V - idx: 13 constant tag: 1 len: 13 StackMapTable - idx: 14 constant tag: 1 len: 10 SourceFile - idx: 15 constant tag: 1 len: 9 test.java - idx: 16 constant tag: 12 name_index: 7, descriptor_index: 8 - idx: 17 constant tag: 7 name_index: 24 - idx: 18 constant tag: 12 name_index: 25, descriptor_index: 26 - idx: 19 constant tag: 1 len: 4 hehe - idx: 20 constant tag: 7 name_index: 27 - idx: 21 constant tag: 12 name_index: 28, descriptor_index: 29 - idx: 22 constant tag: 1 len: 4 test - idx: 23 constant tag: 1 len: 16 java/lang/Object - idx: 24 constant tag: 1 len: 16 java/lang/System - idx: 25 constant tag: 1 len: 3 out - idx: 26 constant tag: 1 len: 21 Ljava/io/PrintStream; - idx: 27 constant tag: 1 len: 19 java/io/PrintStream - idx: 28 constant tag: 1 len: 7 println - idx: 29 constant tag: 1 len: 21 (Ljava/lang/String;)Vaccess_flag: 0x21 this_class: 5 super_class: 6interfaces_count: 0 filed_count: 0---------------parse class method-------------------------:method_count: 2 access_flags: 0x1 name_index: 7 descriptor_index: 8 attributes_count: 1attritbutes name_index: 9 parse code attribute:attribute_length: 29max_stack: 1max_locals: 1code_length: 5exception_table_length: 0attributes_count: 1idx: 1 attribute_name_index: 10parse LineNumberTable:attribute_length: 6line_number_table_length: 1start_pc: 0line_number: 5 access_flags: 0x9 name_index: 11 descriptor_index: 12 attributes_count: 1attritbutes name_index: 9 parse code attribute:attribute_length: 77max_stack: 2max_locals: 2code_length: 24exception_table_length: 0attributes_count: 2idx: 1 attribute_name_index: 10parse LineNumberTable:attribute_length: 22line_number_table_length: 5start_pc: 0line_number: 7start_pc: 2line_number: 9start_pc: 9line_number: 10start_pc: 17line_number: 9start_pc: 23line_number: 11idx: 2 attribute_name_index: 13parse StackMapTable:attribute_length: 7number_of_entries: 2frame_type: 252append_frame offset_delta: 4locals_num: 1tag: 1ITEM_Integer.frame_type: 18same_frame offset_delta: 18 root@localhost.localdomain #
這兩天在class文件解析器的基礎(chǔ)上, 加上了java反匯編的功能, 反匯編器是指令解釋器的基礎(chǔ),通過(guò)編寫(xiě)反匯編器可以熟悉jvm的指令系統(tǒng), 不過(guò)jvm的指令一共有201個(gè),反匯編過(guò)程基本就是個(gè)體力活。在《java虛擬機(jī)規(guī)范》中對(duì)每一條指令都有了詳細(xì)的描述,下面說(shuō)說(shuō)我是如何解析bytecode的:
一個(gè)java文件經(jīng)過(guò)javac編譯后會(huì)生成class格式文件, 在class格式中method字段里會(huì)有Code屬性,Code屬性包含了java的指令碼和長(zhǎng)度。 首先用class解析器將指令碼提取出來(lái), 舉個(gè)例子:
test.java
class aa {int a = 6; };public class test {public static void main(String args[]) {int i = 0;for (i = 0; i < 5; i++)System.out.println("hehe");} }我們用class文件解析器把test對(duì)應(yīng)的bytecode打印出來(lái):
len: 5
0x2a0xb70x00x10xb1
這一串bytecode為:0x2a0xb70x00x10xb1, 長(zhǎng)度是5個(gè)字節(jié)。
對(duì)照《java虛擬機(jī)規(guī)范》我們來(lái)一步步手工解析:
0x2a代表aload_0指令, 它將本地局部變量中的第一個(gè)變量壓入到堆棧里。這個(gè)指令本身長(zhǎng)度就是一個(gè)字節(jié),沒(méi)有參數(shù), 因此0x2a的解析就非常簡(jiǎn)單, 直接在屏幕打印出aload_0即可:
printf(“%s\n”, symbol);
0xb7代表invokespecial 它用來(lái)調(diào)用超類構(gòu)造方法,實(shí)例初始化方法, 私有方法。它的用法如下:
invokespecial indexbyte1 indexbyte2,indexbyte1和indexbyte2各占一個(gè)字節(jié),用(indexbyte1 << 8) | indexbyte2來(lái)構(gòu)建一個(gè)常量池中的索引。每個(gè)jvm指令本身都占用一個(gè)字節(jié),加上它的兩個(gè)參數(shù), invokespecial語(yǔ)句它將占用3個(gè)字節(jié)空間。 所以它的解析算法如下:
注意0xb7解析完后,我們要跳過(guò)3個(gè)字節(jié)的地址,那么就是0xb1了, 它是return指令,沒(méi)有參數(shù),因此它的解析方法跟aload_0一樣:
printf(“%s\n”, symbol);
以上是我們手工解析的過(guò)程, 但是jvm有201條指令, 我們需要建立一個(gè)合適的數(shù)據(jù)結(jié)構(gòu):
typedef int (*interp_func)(u2 opcode_len, char *symbol, void *base);typedef struct bytecode_st {u2 opcode; // jvm的指令碼u2 opcode_len; // 指令總的長(zhǎng)度,包括參數(shù)char symbol[OPCODE_SYMBOL_LEN]; // 指令對(duì)應(yīng)的助記符interp_func func; // 解析指令的回調(diào)函數(shù) }BYTECODE;我們可以直接建立一個(gè)大的BYTECODE數(shù)組:
BYTECODE jvm_byte_code[OPCODE_LEN] = {{0x00, 1, "nop", jvm_interp_nop},{0x01, 1, "aconst_null", jvm_interp_aconst_null},{0x02, 1, "iconst_m1", jvm_interp_iconst_m1},{0x03, 1, "iconst_0", jvm_interp_iconst_0},{0x04, 1, "iconst_1", jvm_interp_iconst_1},{0x05, 1, "iconst_2", jvm_interp_iconst_2},{0x06, 1, "iconst_3", jvm_interp_iconst_3},{0x07, 1, "iconst_4", jvm_interp_iconst_4},{0x08, 1, "iconst_5", jvm_interp_iconst_5},{0x09, 1, "lconst_0", jvm_interp_lconst_0},{0x0a, 1, "lconst_1", jvm_interp_lconst_1},{0x0b, 1, "fconst_0", jvm_interp_fconst_0},{0x0c, 1, "fconst_1", jvm_interp_fconst_1},{0x0d, 1, "fconst_2", jvm_interp_fconst_2},{0x0e, 1, "dconst_0", jvm_interp_dconst_0},{0x0f, 1, "dconst_1", jvm_interp_dconst_1},{0x10, 1, "bipush", jvm_interp_bipush},{0x11, 1, "sipush", jvm_interp_sipush},{0x12, 2, "ldc", jvm_interp_ldc},{0x13, 1, "ldc_w", jvm_interp_ldc_w},{0x14, 1, "ldc2_w", jvm_interp_ldc2_w},{0x15, 1, "iload", jvm_interp_iload},{0x16, 1, "lload", jvm_interp_lload},{0x17, 1, "fload", jvm_interp_fload},{0x18, 1, "dload", jvm_interp_dload},{0x19, 1, "aload", jvm_interp_aload},{0x1a, 1, "iload_0", jvm_interp_iload_0},{0x1b, 1, "iload_1", jvm_interp_iload_1},{0x1c, 1, "iload_2", jvm_interp_iload_2},{0x1d, 1, "iload_3", jvm_interp_iload_3},{0x1e, 1, "lload_0", jvm_interp_lload_0},{0x1f, 1, "lload_1", jvm_interp_lload_1},{0x20, 1, "lload_2", jvm_interp_lload_2},{0x21, 1, "lload_3", jvm_interp_lload_3},{0x22, 1, "fload_0", jvm_interp_fload_0},{0x23, 1, "fload_1", jvm_interp_fload_1},{0x24, 1, "fload_2", jvm_interp_fload_2},{0x25, 1, "fload_3", jvm_interp_fload_3},{0x26, 1, "dload_0", jvm_interp_dload_0},{0x27, 1, "dload_1", jvm_interp_dload_1},{0x28, 1, "dload_2", jvm_interp_dload_2},{0x29, 1, "dload_3", jvm_interp_dload_3},{0x2a, 1, "aload_0", jvm_interp_aload_0},{0x2b, 1, "aload_1", jvm_interp_aload_1},{0x2c, 1, "aload_2", jvm_interp_aload_2},{0x2d, 1, "aload_3", jvm_interp_aload_3},{0x2e, 1, "iaload", jvm_interp_iaload},{0x2f, 1, "laload", jvm_interp_laload},{0x30, 1, "faload", jvm_interp_faload},{0x31, 1, "daload", jvm_interp_daload},{0x32, 1, "aaload", jvm_interp_aaload},{0x33, 1, "baload", jvm_interp_baload},{0x34, 1, "caload", jvm_interp_caload},{0x35, 1, "saload", jvm_interp_saload},{0x36, 1, "istore", jvm_interp_istore},{0x37, 1, "lstore", jvm_interp_lstore},{0x38, 1, "fstore", jvm_interp_fstore},{0x39, 1, "dstore", jvm_interp_dstore},{0x3a, 1, "astore", jvm_interp_astore},{0x3b, 1, "istore_0", jvm_interp_istore_0},{0x3c, 1, "istore_1", jvm_interp_istore_1},{0x3d, 1, "istore_2", jvm_interp_istore_2},{0x3e, 1, "istore_3", jvm_interp_istore_3},{0x3f, 1, "lstore_0", jvm_interp_lstore_0},{0x40, 1, "lstore_1", jvm_interp_lstore_1},{0x41, 1, "lstore_2", jvm_interp_lstore_2},{0x42, 1, "lstore_3", jvm_interp_lstore_3},{0x43, 1, "fstore_0", jvm_interp_fstore_0},{0x44, 1, "fstore_1", jvm_interp_fstore_1},{0x45, 1, "fstore_2", jvm_interp_fstore_2},{0x46, 1, "fstore_3", jvm_interp_fstore_3},{0x47, 1, "dstore_0", jvm_interp_dstore_0},{0x48, 1, "dstore_1", jvm_interp_dstore_1},{0x49, 1, "dstore_2", jvm_interp_dstore_2},{0x4a, 1, "dstore_3", jvm_interp_dstore_3},{0x4b, 1, "astore_0", jvm_interp_astore_0},{0x4c, 1, "astore_1", jvm_interp_astore_1},{0x4d, 1, "astore_2", jvm_interp_astore_2},{0x4e, 1, "astore_3", jvm_interp_astore_3},{0x4f, 1, "iastore", jvm_interp_iastore},{0x50, 1, "lastore", jvm_interp_lastore},{0x51, 1, "fastore", jvm_interp_fastore},{0x52, 1, "dastore", jvm_interp_dastore},{0x53, 1, "aastore", jvm_interp_aastore},{0x54, 1, "bastore", jvm_interp_bastore},{0x55, 1, "castore", jvm_interp_castore},{0x56, 1, "sastore", jvm_interp_sastore},{0x57, 1, "pop", jvm_interp_pop},{0x58, 1, "pop2", jvm_interp_pop2},{0x59, 1, "dup", jvm_interp_dup},{0x5a, 1, "dup_x1", jvm_interp_dup_x1},{0x5b, 1, "dup_x2", jvm_interp_dup_x2},{0x5c, 1, "dup2", jvm_interp_dup2},{0x5d, 1, "dup2_x1", jvm_interp_dup2_x1},{0x5e, 1, "dup2_x2", jvm_interp_dup2_x2},{0x5f, 1, "swap", jvm_interp_swap},{0x60, 1, "iadd", jvm_interp_iadd},{0x61, 1, "ladd", jvm_interp_ladd},{0x62, 1, "fadd", jvm_interp_fadd},{0x63, 1, "dadd", jvm_interp_dadd},{0x64, 1, "isub", jvm_interp_isub},{0x65, 1, "lsub", jvm_interp_lsub},{0x66, 1, "fsub", jvm_interp_fsub},{0x67, 1, "dsub", jvm_interp_dsub},{0x68, 1, "imul", jvm_interp_imul},{0x69, 1, "lmul", jvm_interp_lmul},{0x6a, 1, "fmul", jvm_interp_fmul},{0x6b, 1, "dmul", jvm_interp_dmul},{0x6c, 1, "idiv", jvm_interp_idiv},{0x6d, 1, "ldiv", jvm_interp_ldiv},{0x6e, 1, "fdiv", jvm_interp_fdiv},{0x6f, 1, "ddiv", jvm_interp_ddiv},{0x70, 1, "irem", jvm_interp_irem},{0x71, 1, "lrem", jvm_interp_lrem},{0x72, 1, "frem", jvm_interp_frem},{0x73, 1, "drem", jvm_interp_drem},{0x74, 1, "ineg", jvm_interp_ineg},{0x75, 1, "lneg", jvm_interp_lneg},{0x76, 1, "fneg", jvm_interp_fneg},{0x77, 1, "dneg", jvm_interp_dneg},{0x78, 1, "ishl", jvm_interp_ishl},{0x79, 1, "lshl", jvm_interp_lshl},{0x7a, 1, "ishr", jvm_interp_ishr},{0x7b, 1, "lshr", jvm_interp_lshr},{0x7c, 1, "iushr", jvm_interp_iushr},{0x7d, 1, "lushr", jvm_interp_lushr},{0x7e, 1, "iand", jvm_interp_iand},{0x7f, 1, "land", jvm_interp_land},{0x80, 1, "ior", jvm_interp_ior},{0x81, 1, "lor", jvm_interp_lor},{0x82, 1, "ixor", jvm_interp_ixor},{0x83, 1, "lxor", jvm_interp_lxor},{0x84, 3, "iinc", jvm_interp_iinc},{0x85, 1, "i2l", jvm_interp_i2l},{0x86, 1, "i2f", jvm_interp_i2f},{0x87, 1, "i2d", jvm_interp_i2d},{0x88, 1, "l2i", jvm_interp_l2i},{0x89, 1, "l2f", jvm_interp_l2f},{0x8a, 1, "l2d", jvm_interp_l2d},{0x8b, 1, "f2i", jvm_interp_f2i},{0x8c, 1, "f2l", jvm_interp_f2l},{0x8d, 1, "f2d", jvm_interp_f2d},{0x8e, 1, "d2i", jvm_interp_d2i},{0x8f, 1, "d2l", jvm_interp_d2l},{0x90, 1, "d2f", jvm_interp_d2f},{0x91, 1, "i2b", jvm_interp_i2b},{0x92, 1, "i2c", jvm_interp_i2c},{0x93, 1, "i2s", jvm_interp_i2s},{0x94, 1, "lcmp", jvm_interp_lcmp},{0x95, 1, "fcmpl", jvm_interp_fcmpl},{0x96, 1, "fcmpg", jvm_interp_fcmpg},{0x97, 1, "dcmpl", jvm_interp_dcmpl},{0x98, 1, "dcmpg", jvm_interp_dcmpg},{0x99, 1, "ifeq", jvm_interp_ifeq},{0x9a, 1, "ifne", jvm_interp_ifne},{0x9b, 1, "iflt", jvm_interp_iflt},{0x9c, 1, "ifge", jvm_interp_ifge},{0x9d, 1, "ifgt", jvm_interp_ifgt},{0x9e, 1, "ifle", jvm_interp_ifle},{0x9f, 1, "if_icmpeq", jvm_interp_if_icmpeq},{0xa0, 1, "if_icmpne", jvm_interp_if_icmpne},{0xa1, 1, "if_icmplt", jvm_interp_if_icmplt},{0xa2, 3, "if_icmpge", jvm_interp_if_icmpge},{0xa3, 1, "if_icmpgt", jvm_interp_if_icmpgt},{0xa4, 1, "if_icmple", jvm_interp_if_icmple},{0xa5, 1, "if_acmpeq", jvm_interp_if_acmpeq},{0xa6, 1, "if_acmpne", jvm_interp_if_acmpne},{0xa7, 3, "goto", jvm_interp_goto},{0xa8, 1, "jsr", jvm_interp_jsr},{0xa9, 1, "ret", jvm_interp_ret},{0xaa, 1, "tableswitch", jvm_interp_tableswitch},{0xab, 1, "lookupswitch", jvm_interp_lookupswitch},{0xac, 1, "ireturn", jvm_interp_ireturn},{0xad, 1, "lreturn", jvm_interp_lreturn},{0xae, 1, "freturn", jvm_interp_freturn},{0xaf, 1, "dreturn", jvm_interp_dreturn},{0xb0, 1, "areturn", jvm_interp_areturn},{0xb1, 1, "return", jvm_interp_return},{0xb2, 3, "getstatic", jvm_interp_getstatic},{0xb3, 1, "putstatic", jvm_interp_putstatic},{0xb4, 1, "getfield", jvm_interp_getfield},{0xb5, 1, "putfield", jvm_interp_putfield},{0xb6, 3, "invokevirtual", jvm_interp_invokevirtual},{0xb7, 3, "invokespecial", jvm_interp_invokespecial},{0xb8, 1, "invokestatic", jvm_interp_invokestatic},{0xb9, 1, "invokeinterface", jvm_interp_invokeinterface},{0xba, 1, "invokedynamic", jvm_interp_invokedynamic},{0xbb, 1, "new", jvm_interp_new},{0xbc, 1, "newarray", jvm_interp_newarray},{0xbd, 1, "anewarray", jvm_interp_anewarray},{0xbe, 1, "arraylength", jvm_interp_arraylength},{0xbf, 1, "athrow", jvm_interp_athrow},{0xc0, 1, "checkcast", jvm_interp_checkcast},{0xc1, 1, "instanceof", jvm_interp_instanceof},{0xc2, 1, "monitorenter", jvm_interp_monitorenter},{0xc3, 1, "monitorexit", jvm_interp_monitorexit},{0xc4, 1, "wide", jvm_interp_wide},{0xc5, 1, "multianewarray", jvm_interp_multianewarray},{0xc6, 1, "ifnull", jvm_interp_ifnull},{0xc7, 1, "ifnonnull", jvm_interp_ifnonnull},{0xc8, 1, "goto_w", jvm_interp_goto_w},{0xc9, 1, "jsr_w", jvm_interp_jsr_w},};每個(gè)jvm指令的指令碼就是數(shù)組的索引, 這樣就能找到指令對(duì)應(yīng)的BYTECODE結(jié)構(gòu),通過(guò)調(diào)用其回調(diào)函數(shù), 就可以進(jìn)入具體的解析過(guò)程了。 這樣做的好處就是不用switch case一大堆分支了。
?
?
int jvm_interp_invokespecial(u2 len, char *symbol, void *base) {u2 index;index = ((*(u1 *)(base + 1)) << 8) | (*(u1 *)(base + 2));printf("%s #%x\n", symbol, index); }int jvm_interp_aload_0(u2 len, char *symbol, void *base) {printf("%s\n", symbol); }int jvm_interp_return(u2 len, char *symbol, void *base) {printf("%s\n", symbol); }int __disass_bytecode(u1 *base, u2 len) {u1 idx = 0;u1 index;while (idx < len) {index = *(u1 *)(base + idx);//printf("!0x%x\n", index);jvm_byte_code[index].func(jvm_byte_code[index].opcode_len,jvm_byte_code[index].symbol, base + idx);idx += (u1)jvm_byte_code[index].opcode_len;} }目前這個(gè)反匯編器只能解析一小部分指令, 隨著開(kāi)發(fā)的深入, 會(huì)慢慢補(bǔ)全的, 下面是反匯編test.class的結(jié)果:
diassember bytecode:aload_0 invokespecial #1 return----------------------------- iconst_0 istore_1 iconst_0 istore_1 iload_1 iconst_5 if_icmpge 17 getstatic #2 ldc #3 invokevirtual #4 iinc 1 1 goto 0xfff0 returnjava工具集中提供了javap, 可以反匯編java指令,本來(lái)是想山寨一個(gè)javap的, 但是現(xiàn)在對(duì)jvm整體結(jié)構(gòu)還是不清晰,數(shù)據(jù)結(jié)構(gòu)還不能很好的設(shè)計(jì)出來(lái), 但是隨著對(duì)jvm的了解深入, 反匯編器會(huì)越來(lái)越成熟。
一、背景
筆者希望通過(guò)自己動(dòng)手編寫(xiě)一個(gè)簡(jiǎn)單的jvm來(lái)了解java虛擬機(jī)內(nèi)部的工作細(xì)節(jié)畢竟hotsopt以及android的dalvik都有幾十萬(wàn)行的c代碼級(jí)別。 在前面的2篇開(kāi)發(fā)筆記中已經(jīng)實(shí)現(xiàn)了一個(gè)class文件解析器和一個(gè)java反匯編器 在這基礎(chǔ)上 java虛擬機(jī)的雛形也已經(jīng)寫(xiě)好。還沒(méi)有內(nèi)存管理功能 沒(méi)有線程支持。它能解釋執(zhí)行的指令取決于我的java語(yǔ)法范圍 在這之前我對(duì)java一無(wú)所知 通過(guò)寫(xiě)這個(gè)jvm順便也把java學(xué)會(huì)了
它現(xiàn)在的功能如下
1、java反匯編器 山寨了javap的部分功能。 2、能解釋執(zhí)行如下jvm指令
iload_n, istore_n, aload_n, astore_n, iadd, isub, bipush, invokespecail, invokestatic, invokevirtual, goto, return, ireturn, if_icmpge, putfiled, new, dup
?
源碼地址?http://www.cloud-sec.org/jvm.tgz 舉2個(gè)測(cè)試?yán)?/p>
test.java =========
class aa {int a = 6;int debug(int a, int b){int sum;sum = a + b;return sum;} }public class test {public static void main(String args[]) {int a;aa bb = new aa();a = bb.debug(1, 2);} }test7.java
==========
二、JVM架構(gòu)
2個(gè)核心文件:
classloader.c?? – 從硬盤(pán)加載class文件并解析。
interp_engine.c – bytecode解釋器。
運(yùn)行時(shí)數(shù)據(jù)區(qū)
————————————————————–
| 方法區(qū)(method) | 堆棧(stack) | 程序計(jì)數(shù)器(pc) |
————————————————————–
注意這里缺少了heap, native stack 因?yàn)槲覀儸F(xiàn)在還不支持這些功能。
每個(gè)method都有自己對(duì)應(yīng)的棧幀stack frame 在class文件解析的時(shí)候就已經(jīng)創(chuàng)建好。
定義了一個(gè)叫curr_jvm_stack的全局變量 它用來(lái)保存當(dāng)前解釋器使用的棧幀結(jié)構(gòu) 在jvm初始化的時(shí)候進(jìn)行設(shè)置
int jvm_stack_init(void) {curr_jvm_stack = (JVM_STACK_FRAME *)malloc(sizeof(JVM_STACK_FRAME));if (!curr_jvm_stack) {__error("malloc failed.");return -1;}memset(curr_jvm_stack, '', sizeof(JVM_STACK_FRAME));jvm_stack_depth = 0;return 0; }三、實(shí)現(xiàn)細(xì)節(jié)
1、 虛擬機(jī)執(zhí)行過(guò)程
初始化jvm_init()
從磁盤(pán)加載class文件并解析在內(nèi)存建立方法區(qū)數(shù)據(jù)結(jié)構(gòu) 初始化內(nèi)存堆棧 初始化jvm運(yùn)行環(huán)境。
解釋器運(yùn)行 jvm_run()
初始化程序計(jì)數(shù)器pc, 從方法區(qū)中查找main函數(shù)開(kāi)始解釋執(zhí)行。
退出 jvm_exit()
釋放所有數(shù)據(jù)結(jié)構(gòu)
2、class文件加載與解析
對(duì)于每一個(gè)class文件使用CLASS數(shù)據(jù)結(jié)構(gòu)表示
typedef struct jvm_class {u4 class_magic; u2 access_flag; u2 this_class;u2 super_class;u2 minor_version;u2 major_version;u2 constant_pool_count;u2 interfaces_count;u2 fileds_count;u2 method_count;char class_file[1024];struct constant_info_st *constant_info;struct list_head interface_list_head;struct list_head filed_list_head;struct list_head method_list_head;struct list_head list; }CLASS;CLASS結(jié)構(gòu)的前部分是按java虛擬機(jī)規(guī)范中對(duì)class文件結(jié)構(gòu)的描述設(shè)置的。 class_file保存的是這個(gè)CLASS結(jié)構(gòu)對(duì)應(yīng)的磁盤(pán)class文件名。constant_info保存的是class文件常量池的字符串。utf8interface_list_headfiled_list_headmethod_list_head分別是接口字段 方法的鏈表頭。
在解析class文件的時(shí)候 只解析了java虛擬機(jī)規(guī)范中規(guī)定的一個(gè)jvm最起碼能解析的屬性。 這個(gè)部分沒(méi)什么好說(shuō)的大家直接看源碼 在對(duì)照java虛擬機(jī)規(guī)范就能看懂了。
3、解釋器設(shè)計(jì)
java虛擬機(jī)規(guī)范中一共涉及了201條指令。沒(méi)有使用switch case這種常用的算法。而是為每個(gè)jvm指令設(shè)計(jì)了一個(gè)數(shù)據(jù)結(jié)構(gòu)
typedef int (*interp_func)(u2 opcode_len, char *symbol, void *base);typedef struct bytecode_st {u2 opcode;u2 opcode_len;char symbol[OPCODE_SYMBOL_LEN];interp_func func; }BYTECODE;opcode是jvm指令的機(jī)器碼 opcode_len是這條jvm指令的長(zhǎng)度symbol指令的助記符func是具體的這條指令解釋函數(shù)。事先建立了一個(gè)BYTECODE數(shù)組
BYTECODE jvm_byte_code[OPCODE_LEN] = {{0x00, 1, "nop", jvm_interp_nop},{0x01, 1, "aconst_null", jvm_interp_aconst_null},{0x02, 1, "iconst_m1", jvm_interp_iconst_m1},{0x03, 1, "iconst_0", jvm_interp_iconst_0},{0x04, 1, "iconst_1", jvm_interp_iconst_1},{0x05, 1, "iconst_2", jvm_interp_iconst_2},{0x06, 1, "iconst_3", jvm_interp_iconst_3},{0x07, 1, "iconst_4", jvm_interp_iconst_4},{0x08, 1, "iconst_5", jvm_interp_iconst_5},{0x09, 1, "lconst_0", jvm_interp_lconst_0},{0x0a, 1, "lconst_1", jvm_interp_lconst_1},{0x0b, 1, "fconst_0", jvm_interp_fconst_0},...{0xc5, 1, "multianewarray", jvm_interp_multianewarray},{0xc6, 1, "ifnull", jvm_interp_ifnull},{0xc7, 1, "ifnonnull", jvm_interp_ifnonnull},{0xc8, 1, "goto_w", jvm_interp_goto_w},{0xc9, 1, "jsr_w", jvm_interp_jsr_w},};int jvm_interp_invokespecial(u2 len, char *symbol, void *base) {u2 index;index = ((*(u1 *)(base + 1)) << 8) | (*(u1 *)(base + 2));printf("%s #%xn", symbol, index); }int jvm_interp_aload_0(u2 len, char *symbol, void *base) {printf("%sn", symbol); }int jvm_interp_return(u2 len, char *symbol, void *base) {printf("%sn", symbol); }對(duì)于一段bytecode0x2a0xb70x00x10xb1 手工解析如下
0x2a代表aload_0指令 它將本地局部變量中的第一個(gè)變量壓入到堆棧里。這個(gè)指令本身長(zhǎng)度就是一個(gè)字節(jié)沒(méi)有參數(shù) 因此0x2a的解析就非常簡(jiǎn)單 直接在屏幕打印出aload_0即可
printf(“%sn”, symbol);
0xb7代表invokespecial 它用來(lái)調(diào)用超類構(gòu)造方法實(shí)例初始化方法 私有方法。它的用法如下
invokespecial indexbyte1 indexbyte2indexbyte1和indexbyte2各占一個(gè)字節(jié)用(indexbyte1 << 8) | indexbyte2來(lái)構(gòu)建一個(gè)常量池中的索引。每個(gè)jvm指令本身都占用一個(gè)字節(jié)加上它的兩個(gè)參數(shù) invokespecial語(yǔ)句它將占用3個(gè)字節(jié)空間。 所以它的解析算法如下
注意0xb7解析完后我們要跳過(guò)3個(gè)字節(jié)的地址那么就是0xb1了 它是return指令沒(méi)有參數(shù)因此它的解析方法跟aload_0一樣
printf(“%sn”, symbol);
用程序代碼實(shí)現(xiàn)是
int interp_bytecode(CLASS_METHOD *method) {jvm_stack_depth++; // 函數(shù)掉用計(jì)數(shù)加1curr_jvm_stack = &method->code_attr->stack_frame; // 設(shè)置當(dāng)前棧幀指針curr_jvm_interp_env->constant_info = method->class->constant_info; // 設(shè)置當(dāng)前運(yùn)行環(huán)境curr_jvm_interp_env->prev_env = NULL;for (;;) {if (jvm_stack_depth == 0) { // 為0代表所有函數(shù)執(zhí)行完畢printf("interpret bytecode done.n");break;}index = *(u1 *)jvm_pc.pc; // 設(shè)置程序計(jì)數(shù)器jvm_byte_code[index].func(jvm_byte_code[index].opcode_len, // 解釋具體指令jvm_byte_code[index].symbol, jvm_pc.pc);sleep(1);} }舉個(gè)例子
int jvm_interp_iadd(u2 len, char *symbol, void *base) {u4 tmp1, tmp2;printf("%sn", symbol);pop_operand_stack(int, tmp1)pop_operand_stack(int, tmp2)push_operand_stack(int, (tmp1 + tmp2))jvm_pc.pc += len; }jvm_interp_iadd用于解釋執(zhí)行iadd指令 首先從操作數(shù)棧中彈出2個(gè)int型變量tmp1, tmp2。
把tmp1 + tmp2相加后在壓入到操作數(shù)棧里。
下面是test7.java的執(zhí)行演示
public class test7 {static int sub(int value){int a = 1;return value - 1;}static int add(int a, int b){int sum = 0;int c;sum = a + b;c = sub(sum);return c;}public static void main(String args[]) {int a = 1, b = 2;int ret;ret = add(a, b);return ;} }?
ajvm是一個(gè)筆者正在開(kāi)發(fā)中的java虛擬機(jī), 用c和少量匯編語(yǔ)言編寫(xiě), 目的在于探究一個(gè)可運(yùn)行的java虛擬機(jī)是如何實(shí)現(xiàn)的, 目前整個(gè)jvm的source code代碼量在5000行左右, 預(yù)計(jì)控制在1w行以內(nèi),只要能運(yùn)行簡(jiǎn)單的java代碼即可。筆者希望ajvm能變成一個(gè)教學(xué)用的簡(jiǎn)單java虛擬機(jī)實(shí)現(xiàn), 幫助java程序員在陷入龐大的hotspot vm源碼之前, 能對(duì)jvm的結(jié)構(gòu)有個(gè)清晰的認(rèn)識(shí)。 ajvm是筆者利用業(yè)余時(shí)間編寫(xiě)的, 每次完成一個(gè)重要功能都會(huì)以筆記的形式發(fā)布到ata, 和大家共同學(xué)習(xí)和探討。
?
git repo: https://github.com/cloudsec/ajvm git clone git@github.com:cloudsec/ajvm.git?
最近筆者給ajvm增加了stack calltrace的功能, 用于幫助和調(diào)試jvm crash后的信息。 大家知道oracle的hotspot jvm在crash后會(huì)給出大量的crash信息, 這些信息能幫助jvm開(kāi)發(fā)人員快速定位問(wèn)題。同樣, ajvm也增加了類似的功能:
?
1、calltrace(),? 打印函數(shù)調(diào)用棧。
2、截獲SIGSEGV信號(hào), jvm segfault后, 打印離堆棧指針rsp最近的16字節(jié)信息;打印cpu寄存器信息;打印函數(shù)調(diào)用棧。
?
首先看如何打印函數(shù)調(diào)用棧:
筆者在《理解堆棧及其利用方法 》:?http://blog.aliyun.com/964?spm=0.0.0.0.BykR2E
這篇paper中詳細(xì)講述了intel x86和x86_64下進(jìn)程堆棧的結(jié)構(gòu), 關(guān)于堆棧的基礎(chǔ)知識(shí)請(qǐng)大家參考此paper。
下面舉一個(gè)簡(jiǎn)單的例子:
?
#include #include "trace.h" #include "log.h"void test2() {calltrace();*(int *)0 = 0; }void test1() {test2(); }void test() {test1(); }int main(void) {log_init();GET_BP(top_rbp);calltrace_init();test();return 0; }?
在test2函數(shù)中調(diào)用了calltrace()函數(shù), 用來(lái)打印它的函數(shù)調(diào)用棧, 我們知道它的函數(shù)調(diào)用棧是這樣的: main->test->test1->test2->calltrace。我們想讓calltrace的輸出信息類似如下:
test2 test1 test main?
要完成此功能, 我們要利用gcc編譯器的一個(gè)特點(diǎn), 注意在-O2或-fomit-frame-pointer參數(shù)下, 這個(gè)方法就無(wú)效了。 反匯編這個(gè)程序后, 會(huì)發(fā)現(xiàn)每個(gè)函數(shù)調(diào)用的開(kāi)頭總會(huì)有這么幾句匯編指令:
?
0000000000401138 :401138: 55 push %rbp401139: 48 89 e5 mov %rsp,%rbp000000000040114e :40114e: 55 push %rbp40114f: 48 89 e5 mov %rsp,%rbp000000000040115e :40115e: 55 push %rbp40115f: 48 89 e5 mov %rsp,%rbp000000000040116e :40116e: 55 push %rbp40116f: 48 89 e5 mov %rsp,%rbp?
大家想起來(lái)了吧, rbp在intel處理器中代表的是一個(gè)堆棧中棧幀開(kāi)始的地址, rsp代表當(dāng)前堆棧棧頂?shù)牡刂贰T赾語(yǔ)言中一個(gè)函數(shù)的調(diào)用過(guò)程是這樣的:
?
test() {test1(); }?
在test函數(shù)中調(diào)用test1()的時(shí)候,? cpu會(huì)先自動(dòng)把test1函數(shù)后面的指令地址壓入test1函數(shù)的棧幀里, 然后在執(zhí)行push rbp; mov rsp, rbp指令。 我們畫(huà)一下,從main函數(shù)到calltrace函數(shù)的整個(gè)堆棧棧幀結(jié)構(gòu):
?
|...||rbp|<--| push rbp; mov rsp, rbp ctrace->|rip| | call calltrace + 1|...| ||rbp|<--| push rbp; mov rsp, rbp test2-> |rip| | call test2 + 1|...| ||rbp|<--| push rbp; mov rsp, rbp test1-> |rip| | call test1 + 1|...| ||rbp|<--| push rbp; mov rsp, rbp test-> |rip| | call test + 1|...| ||rbp|<--| push rbp; mov rsp, rbp main-> |rip| | call main + 1|...| | glibc |...|<--| rbp->unkonwn所以在正常情況下堆棧的棧幀中每個(gè)rbp后面,保存的都是上一個(gè)函數(shù)的返回地址, calltrace的實(shí)現(xiàn)其實(shí)就很簡(jiǎn)單了, 首先得到rbp的地址,然后rbp后面的地址就是ret rip的地址, 通過(guò)這個(gè)地址,我們可以解析出棧幀對(duì)應(yīng)的符號(hào)信息, 因?yàn)閍jvm通過(guò)自己解析elf文件, 來(lái)獲得符號(hào)表信息。 calltrace的大致實(shí)現(xiàn)如下:
void calltrace(void) {CALL_TRACE trace, prev_trace;uint64_t *rbp, rip, real_rip;int flag = 0, first_bp = 0;printf("Call trace:\n\n");GET_BP(rbp)while (rbp != top_rbp) {rip = *(uint64_t *)(rbp + 1);rbp = (uint64_t *)*rbp;real_rip = compute_real_func_addr(rip);if (flag == 1) {if (search_symbol_by_addr(real_rip, &prev_trace) == -1) {__error("calltrace: search symbol failed.");exit(-1);}prev_trace.rip = rip - 5;prev_trace.offset = trace.rip - prev_trace.symbol_addr;show_calltrace(&prev_trace);trace = prev_trace;}else {if (search_symbol_by_addr(real_rip, &trace) == -1) {__error("calltrace: search symbol failed.");exit(-1);}trace.rip = rip - 5;flag = 1;}}printf("\n"); }?
我們剛才講ajvm還截獲了進(jìn)程的SIGSEGV信號(hào)處理流程, 在jvm初始化的時(shí)候,通過(guò)signal_init()來(lái)實(shí)現(xiàn):
?
int signal_init(void) {struct sigaction sa;sa.sa_flags = SA_SIGINFO;sa.sa_sigaction = signal_handler;sigemptyset(&sa.sa_mask);if (sigaction(SIGSEGV, &sa, NULL) == -1) {perror("sigaction");return -1;}return 0; }?
當(dāng)jvm crash后, signal_handler()函數(shù)接管了信號(hào)的處理流程, 注意此時(shí)整個(gè)jvm進(jìn)程的堆棧結(jié)構(gòu)跟calltrace結(jié)構(gòu)有一點(diǎn)不一樣:
?
|...||rbp|<--| push rbp; mov rsp, rbp do_sig->|eip| | unkown|...|<----- segfault|...||rbp|<--| push rbp; mov rsp, rbp test2-> |rip| | call test2 + 1|...| ||rbp|<--| push rbp; mov rsp, rbp test1-> |rip| | call test1 + 1|...| ||rbp|<--| push rbp; mov rsp, rbp test-> |rip| | call test + 1|...| ||rbp|<--| push rbp; mov rsp, rbp main-> |rip| | call main + 1|...| | glibc |...|<--| rbp->unkonwntest2并沒(méi)有調(diào)用do_sig函數(shù), 這是因?yàn)閠est2函數(shù)里有一個(gè)空指針引用的操作, 操作系統(tǒng)內(nèi)核在處理這個(gè)缺頁(yè)異常中斷的時(shí)候, 向進(jìn)程發(fā)送了SIGSEGV信號(hào), 通常情況下, 會(huì)直接殺死進(jìn)程, 但是這個(gè)信號(hào)被do_sig函數(shù)接管了, 我們要在這個(gè)函數(shù)里打印充足的調(diào)試信息后, 在退出進(jìn)程。
?
void signal_handler(int sig_num, siginfo_t *sig_info, void *ptr) {CALL_TRACE trace, prev_trace;uint64_t *rbp, rip, real_rip;int flag = 0, first_bp = 0;assert(sig_info != NULL);printf("\nPid: %d segfault at addr: 0x%016x\tsi_signo: %d\tsi_errno: %d\n\n",getpid(), sig_info->si_addr,sig_info->si_signo, sig_info->si_errno);show_stack();show_registers();printf("Call trace:\n\n");GET_BP(rbp)while (rbp != top_rbp) {rip = *(uint64_t *)(rbp + 1);rbp = (uint64_t *)*rbp;real_rip = compute_real_func_addr(rip);if (flag == 1) {if (search_symbol_by_addr(real_rip, &prev_trace) == -1) {__error("calltrace: search symbol failed.");exit(-1);}prev_trace.rip = rip - 5;if (first_bp == 0) {first_bp = 1;prev_trace.offset = 0;}else {prev_trace.offset = trace.rip - prev_trace.symbol_addr;}show_calltrace(&prev_trace);trace = prev_trace;}else {/* it's in a single handler function, the last call frame is unkown,* we can't locate the rip addr. */search_symbol_by_addr(real_rip, &trace);trace.rip = rip - 5;flag = 1;}}printf("\n");exit(0); }至于show_stack()和show_registers()函數(shù)就很簡(jiǎn)單了:
#define GET_BP(x) asm("movq %%rbp, %0":"=r"(x)); #define GET_SP(x) asm("movq %%rsp, %0":"=r"(x)); #define GET_AX(x) asm("movq %%rax, %0":"=r"(x)); #define GET_BX(x) asm("movq %%rbx, %0":"=r"(x)); #define GET_CX(x) asm("movq %%rcx, %0":"=r"(x)); #define GET_DX(x) asm("movq %%rdx, %0":"=r"(x)); #define GET_SI(x) asm("movq %%rsi, %0":"=r"(x)); #define GET_DI(x) asm("movq %%rdi, %0":"=r"(x)); #define GET_R8(x) asm("movq %%r8, %0":"=r"(x)); #define GET_R9(x) asm("movq %%r9, %0":"=r"(x)); #define GET_R10(x) asm("movq %%r10, %0":"=r"(x)); #define GET_R11(x) asm("movq %%r11, %0":"=r"(x)); #define GET_R12(x) asm("movq %%r12, %0":"=r"(x)); #define GET_R13(x) asm("movq %%r13, %0":"=r"(x)); #define GET_R14(x) asm("movq %%r14, %0":"=r"(x)); #define GET_R15(x) asm("movq %%r15, %0":"=r"(x));void show_stack(void) {int i;uint64_t *rsp, *rbp;GET_SP(rsp);GET_BP(rbp);printf("Stack:\t\t\nrsp: 0x%016x\t\trbp: 0x%016x\n", rsp, rbp);for (i = 0; i < 16; i++) {printf("0x%02x ", *((unsigned char *)rsp + i));}printf("\n\n"); }void show_registers(void) {uint64_t rax, rbx, rcx, rdx, rsi, rdi;uint64_t r9, r10, r11, r12, r13, r14, r15;GET_AX(rax)GET_BX(rbx)GET_CX(rcx)GET_DX(rdx)GET_SI(rsi)GET_DI(rdi)GET_R9(r9)GET_R10(r10)GET_R11(r11)GET_R12(r12)GET_R13(r13)GET_R14(r14)GET_R15(r15)printf("Registers:\n");printf("rax = 0x%016x, rbx = 0x%016x, rcx = 0x%016x, rdx = 0x%016x\n""rsi = 0x%016x, rdi = 0x%016x, r8 = 0x%016x, r9 = 0x%016x\n""r10 = 0x%016x, r11 = 0x%016x, r12 = 0x%016x, r13 = 0x%016x\n""r14 = 0x%016x, r15 = 0x%016x\n\n",rax, rbx, rcx, rdx, rsi, rdi,r9, r10, r11, r12, r13, r14, r15); }最后演示一下ajvm在crash后的出錯(cuò)信息:
?
Pid: 8739 segfault at addr: 0x0000000000000000 si_signo: 11 si_errno: 0Stack: rsp: 0x00000000caa88680 rbp: 0x00000000caa886a0 0x90 0x87 0xa8 0xca 0xff 0x7f 0x00 0x00 0x58 0xd3 0xe4 0x3d 0x0c 0x00 0x00 0x00Registers: rax = 0x000000003de6c144, rbx = 0x000000003e151780, rcx = 0x0000000000000001, rdx = 0x0000000000000001 rsi = 0x000000003de6317a, rdi = 0x0000000000000000, r8 = 0x00000000caa886a0, r9 = 0x0000000000000000 r10 = 0x000000000040accf, r11 = 0x00000000caa88790, r12 = 0x000000003de4d358, r13 = 0x00000000caa88680 r14 = 0x00000000caa886a0, r15 = 0x000000000000000bCall trace:[<0x401457>] jvm_pc_init + 0x0/0x42 [<0x4015dc>] jvm_run + 0x4b/0x7d?
利用這個(gè)crash信息, 可以幫助程序員快速定位ajvm的bug。
?
一、 前言
ajvm是筆者正在開(kāi)發(fā)中的一個(gè)java虛擬機(jī), 想通過(guò)編寫(xiě)這個(gè)jvm幫助程序員了解jvm的具體實(shí)現(xiàn)細(xì)節(jié), 它是國(guó)內(nèi)第一個(gè)開(kāi)源的java虛擬機(jī)項(xiàng)目:https://github.com/cloudsec/ajvm, 同時(shí)筆者把它的開(kāi)發(fā)筆記也分享到了ata上。 在前面4篇筆記中, 已經(jīng)實(shí)現(xiàn)了class文件加載器, 反匯編器,jvm的crash信息處理, 同時(shí)它已經(jīng)能運(yùn)行簡(jiǎn)單的java代碼了。 在今天的這篇筆記中, 將開(kāi)始分享ajvm的內(nèi)存管理模塊是如何編寫(xiě)的。
二、內(nèi)存分配
看下面一段java代碼:
public class test6 {public static void main(String args[]) {int[] data, data1;int i;int num = 0;data = new int[2];for (i = 0; i < 2; i++) {data[i] = i;}data1 = new int[3];} }首先用javac編譯下, 然后用ajvm的反匯編器查看bytecode:
$./wvm -d test/test6.class Diassember bytecode:<init> ()V stack: 1 local: 10: aload_01: invokespecial #14: returnmain ([Ljava/lang/String;)V stack: 3 local: 50: iconst_01: istore 43: iconst_24: newarray 106: astore_17: iconst_08: istore_39: iload_310: iconst_211: if_icmpge 1314: aload_115: iload_316: iload_317: iastore18: iinc 3 121: goto 0xfffffff424: iconst_325: newarray 1027: astore_228: return源碼中data = new int[2];對(duì)應(yīng)的匯編指令為:
4: newarray 10根據(jù)jvm虛擬機(jī)規(guī)范的描述, newarray指令的作用是, 從操作數(shù)堆棧用取出data數(shù)組的元素個(gè)數(shù),然后根據(jù)newarray后面的type進(jìn)行計(jì)算要申請(qǐng)的內(nèi)存大小, type的值在虛擬機(jī)規(guī)范中如下:
#define T_BOOLEAN 4 #define T_CHAR 5 #define T_FLOAT 6 #define T_DOUBLE 7 #define T_BYTE 8 #define T_SHORT 9 #define T_INT 10 #define T_LONG 11所以10代表這個(gè)int類型的數(shù)組, 接下來(lái)就要給data這個(gè)數(shù)組從heap中分配內(nèi)存了。
void *alloc_newarray_memroy(u1 atype, int count) {void *addr = NULL;switch (atype) {case T_BOOLEAN:case T_CHAR:case T_BYTE:addr = (void *)slab_alloc(jvm_thread_mem, count * sizeof(char));break;case T_SHORT:addr = (void *)slab_alloc(jvm_thread_mem, count * sizeof(short));break;case T_INT:case T_FLOAT:addr = (void *)slab_alloc(jvm_thread_mem, count * sizeof(int));break;case T_LONG:case T_DOUBLE:addr = (void *)slab_alloc(jvm_thread_mem, count * sizeof(long long));break;default:error("bad atype value.n");return NULL;}return addr; }ajvm的內(nèi)存堆用的是slab算法, slab的內(nèi)存結(jié)構(gòu)如下:
------- ------ ------ ------|cache|--> |slab| --> |slab| -->|slab|------- ------ ------ ------|cache|-----|cache| ...----- ------ ------ ------|cache|--> |slab| --> |slab| -->|slab|----- ------ ----- ------|cache| ...------- |cache|-------|cache|-->|slab|-->|slab| -->|slab|------- ------ ------ ------源碼中的slab.c是它完整的實(shí)現(xiàn), 不熟悉slab的同學(xué)請(qǐng)自行g(shù)oogle。
三、垃圾回收
gc是java程序員普遍關(guān)心的問(wèn)題, 當(dāng)內(nèi)存不夠時(shí), 將會(huì)觸發(fā)jvm的垃圾回收機(jī)制。
ajvm使用最原始的引用計(jì)數(shù)法, 需要建立一個(gè)新的數(shù)據(jù)結(jié)構(gòu):
當(dāng)數(shù)組申請(qǐng)完內(nèi)存后, 將會(huì)建立一個(gè)新的JVM_OBJECT與其對(duì)應(yīng), ref_count被初始化為0, addr指向數(shù)組的首地址, size表示數(shù)組的大小, JVM_OBJECT將會(huì)被加入到j(luò)vm_obj_list_head鏈表中, 在這將來(lái)的垃圾回收時(shí)將會(huì)用到。
int jvm_interp_newarray(u2 len, char *symbol, void *base) {...addr = (void *)alloc_newarray_memroy(atype, count);if (!addr) {error("slab alloc failed.n");return -1;}printf("addr: 0x%xn", addr);new_obj = create_new_obj(addr, count);if (!new_obj) {error("create new obj failed.n");return -1;}... }當(dāng)數(shù)組被引用時(shí), 我們跟數(shù)組的地址在JVM_OBJECT鏈表中找到它, 并且把ref_count加1, 表示這個(gè)數(shù)組在被引用。 比如上面的:
17: iastore這條指令就會(huì)對(duì)data數(shù)組進(jìn)行引用, 我們只要在iastore的解釋代碼里, 對(duì)data對(duì)應(yīng)的ref_count加1即可:
int jvm_interp_iastore(u2 len, char *symbol, void *base) {int *addr, index, value;if (jvm_arg->disass_class) {printf("%sn", symbol);return 0;}pop_operand_stack(int, value)pop_operand_stack(int, index)pop_operand_stack(int, addr)printf("addr: 0x%xtindex: %dt%dn", addr, index, value);*(int *)(addr + index) = value;if (inc_obj_ref(addr, (&jvm_obj_list_head)) == -1) {jvm_error(VM_ERROR_INTERP, "inc jvm obj ref failed.n");return -1;}jvm_pc.pc += len;return 0; }對(duì)于數(shù)組data1, 同樣進(jìn)行了內(nèi)存分配, 但是始終沒(méi)有被引用到, 所以data1將會(huì)是gc回收時(shí)要釋放的對(duì)象。
?
void start_gc(struct list_head *list_head) {JVM_OBJECT *s;struct list_head *p, *q;list_for_each_safe(p, q, list_head) {s = list_entry(p, JVM_OBJECT, list);if (s && s->ref_count == 0) {printf("free addr: 0x%xtsize: %dtref_count: %dn",s->addr, s->size, s->ref_count);list_del(p);free_jvm_obj(s);}} }這是ajvm最簡(jiǎn)單的gc算法了, 后續(xù)將會(huì)對(duì)其進(jìn)行優(yōu)化。
四、演示執(zhí)行
下面是ajvm對(duì)上述java代碼的解釋和執(zhí)行過(guò)程:
$./wvm -c test test6 jvm pc init at: 0x630510main ([Ljava/lang/String;)V stack: 3 local : 5 code: 0x3 0x36 0x4 0x5 0xbc 0xa 0x4c 0x3 0x3e 0x1d 0x5 0xa2 0x0 0xd 0x2b 0x1d 0x1d 0x4f 0x84 0x3 0x1 0xa7 0xff 0xf4 0x6 0xbc 0xa 0x4d 0xb1 #local at: 0x630540 #stack at: 0x630554[ 1] iconst_0 pc: 0x630510 -> 0x3 #local: 0x0 0x0 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x0 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 [ 2] istore pc: 0x630511 -> 0x36 #local: 0x0 0x0 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x0 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x0 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x0 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 [ 3] iconst_2 pc: 0x630513 -> 0x5 #local: 0x0 0x0 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x0 0x0 0x0 0x0 #stack: 0x2 0x0 0x0 [ 4] newarray pc: 0x630514 -> 0xbc #local: 0x0 0x0 0x0 0x0 0x0 #stack: 0x2 0x0 0x0 #local: 0x0 0x0 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x0 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x0 0x0 0x0 0x0 #stack: 0x627c20 0x0 0x0 [ 5] astore_1 pc: 0x630516 -> 0x4c #local: 0x0 0x0 0x0 0x0 0x0 #stack: 0x627c20 0x0 0x0 #local: 0x0 0x0 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x0 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 [ 6] iconst_0 pc: 0x630517 -> 0x3 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 [ 7] istore_3 pc: 0x630518 -> 0x3e #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 [ 8] iload_3 pc: 0x630519 -> 0x1d #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 [ 9] iconst_2 pc: 0x63051a -> 0x5 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x2 0x0 [ 10] if_icmpge pc: 0x63051b -> 0xa2 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x2 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 [ 11] aload_1 pc: 0x63051e -> 0x2b #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x627c20 0x0 0x0 [ 12] iload_3 pc: 0x63051f -> 0x1d #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x627c20 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x627c20 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x627c20 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x627c20 0x0 0x0 [ 13] iload_3 pc: 0x630520 -> 0x1d #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x627c20 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x627c20 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x627c20 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x627c20 0x0 0x0 [ 14] iastore pc: 0x630521 -> 0x4f #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x627c20 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x627c20 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x627c20 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x627c20 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x627c20 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 [ 15] iinc pc: 0x630522 -> 0x84 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x0 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x0 0x0 0x0 [ 16] goto pc: 0x630525 -> 0xa7 [ 17] iload_3 pc: 0x630519 -> 0x1d #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x1 0x0 0x0 [ 18] iconst_2 pc: 0x63051a -> 0x5 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x1 0x0 0x0 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x1 0x2 0x0 [ 19] if_icmpge pc: 0x63051b -> 0xa2 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x1 0x2 0x0 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x1 0x0 0x0 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x1 0x0 0x0 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x0 0x0 0x0 [ 20] aload_1 pc: 0x63051e -> 0x2b #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x627c20 0x0 0x0 [ 21] iload_3 pc: 0x63051f -> 0x1d #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x627c20 0x0 0x0 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x627c20 0x0 0x0 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x627c20 0x0 0x0 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x627c20 0x1 0x0 [ 22] iload_3 pc: 0x630520 -> 0x1d #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x627c20 0x1 0x0 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x627c20 0x1 0x0 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x627c20 0x1 0x0 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x627c20 0x1 0x1 [ 23] iastore pc: 0x630521 -> 0x4f #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x627c20 0x1 0x1 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x627c20 0x1 0x0 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x627c20 0x1 0x0 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x627c20 0x0 0x0 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x627c20 0x0 0x0 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x0 0x0 0x0 [ 24] iinc pc: 0x630522 -> 0x84 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x1 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x2 0x0 #stack: 0x0 0x0 0x0 [ 25] goto pc: 0x630525 -> 0xa7 [ 26] iload_3 pc: 0x630519 -> 0x1d #local: 0x0 0x627c20 0x0 0x2 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x2 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x2 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x2 0x0 #stack: 0x2 0x0 0x0 [ 27] iconst_2 pc: 0x63051a -> 0x5 #local: 0x0 0x627c20 0x0 0x2 0x0 #stack: 0x2 0x0 0x0 #local: 0x0 0x627c20 0x0 0x2 0x0 #stack: 0x2 0x2 0x0 [ 28] if_icmpge pc: 0x63051b -> 0xa2 #local: 0x0 0x627c20 0x0 0x2 0x0 #stack: 0x2 0x2 0x0 #local: 0x0 0x627c20 0x0 0x2 0x0 #stack: 0x2 0x0 0x0 #local: 0x0 0x627c20 0x0 0x2 0x0 #stack: 0x2 0x0 0x0 #local: 0x0 0x627c20 0x0 0x2 0x0 #stack: 0x0 0x0 0x0 [ 29] iconst_3 pc: 0x630528 -> 0x6 #local: 0x0 0x627c20 0x0 0x2 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x2 0x0 #stack: 0x3 0x0 0x0 [ 30] newarray pc: 0x630529 -> 0xbc #local: 0x0 0x627c20 0x0 0x2 0x0 #stack: 0x3 0x0 0x0 #local: 0x0 0x627c20 0x0 0x2 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x2 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x2 0x0 #stack: 0x627c80 0x0 0x0 [ 31] astore_2 pc: 0x63052b -> 0x4d #local: 0x0 0x627c20 0x0 0x2 0x0 #stack: 0x627c80 0x0 0x0 #local: 0x0 0x627c20 0x0 0x2 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x0 0x2 0x0 #stack: 0x0 0x0 0x0 #local: 0x0 0x627c20 0x627c80 0x2 0x0 #stack: 0x0 0x0 0x0 [ 32] return pc: 0x63052c -> 0xb1 #local: 0x0 0x627c20 0x627c80 0x2 0x0 #stack: 0x0 0x0 0x0 jvm stack depth is zero. interpret bytecode done.總結(jié)
- 上一篇: 汇编器源码剖析
- 下一篇: 自己归纳整理的ARM THUMB指令机器