jvm開発ノート1---classファイル解析器
37197 ワード
著者:王智通(アリクラウドセキュリティエンジニア)
筆者は最近java仮想マシンに興味を持ち、最も簡単なjvmがどのように書かれているのかを知りたいと思って、「java仮想マシン規範」を見ました.この規範はintel開発マニュアルのように、jvm開発者一人一人が把握しなければならないものです.Java byte codeを翻訳して実行するには、まずjava classファイルからCode属性を解析しなければなりません.筆者から見れば、javaのclassファイル構造はelfファイル構造よりずっと複雑であるが、複雑な構造では、マニュアルの構造を辛抱強く照らし合わせて一つ一つ解析すればよい.数日の努力を経て、cでclassファイル解析器を実現した.現在、マニュアルに規定されているjvmの最も基本的な解析される属性:Code、StackMapTableしか解析できない. LineNumberTable.もちろん、開発が進むにつれて、それは絶えず丈夫になります.次にjava classファイルのフォーマットを解析する中で出会ったいくつかの問題を説明して、後で自分で解析器を書く友达を助けて少し回り道をしないでください:1、解析性能を高めるために、mmapを使ってclassファイルをすべてメモリにマッピングして、解析のたびにreadを使ってディスクファイルを読むのではありません.
2、java classはbig-endianバイトシーケンス、x 86はlitte-endianバイトシーケンスを使用しているので、変換するのはシフト操作です.
たとえば、4バイトのコンテンツを読みます.
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////以下はすべてのソースコードです.
筆者は最近java仮想マシンに興味を持ち、最も簡単なjvmがどのように書かれているのかを知りたいと思って、「java仮想マシン規範」を見ました.この規範はintel開発マニュアルのように、jvm開発者一人一人が把握しなければならないものです.Java byte codeを翻訳して実行するには、まずjava classファイルからCode属性を解析しなければなりません.筆者から見れば、javaのclassファイル構造はelfファイル構造よりずっと複雑であるが、複雑な構造では、マニュアルの構造を辛抱強く照らし合わせて一つ一つ解析すればよい.数日の努力を経て、cでclassファイル解析器を実現した.現在、マニュアルに規定されているjvmの最も基本的な解析される属性:Code、StackMapTableしか解析できない. LineNumberTable.もちろん、開発が進むにつれて、それは絶えず丈夫になります.次にjava classファイルのフォーマットを解析する中で出会ったいくつかの問題を説明して、後で自分で解析器を書く友达を助けて少し回り道をしないでください:1、解析性能を高めるために、mmapを使ってclassファイルをすべてメモリにマッピングして、解析のたびにreadを使ってディスクファイルを読むのではありません.
int mmap_class_file(const char *class_file)
{
struct stat f_stat;
class_fd = open(class_file, O_RDONLY);
if (class_fd == -1) {
perror("open");
return -1;
}
if (stat(class_file, &f_stat) == -1) {
perror("stat");
close(class_fd);
return -1;
}
class_file_len = f_stat.st_size;
printf("%s file len: %d
", class_file, class_file_len);
class_start_mem = mmap(NULL, class_file_len, PROT_READ, MAP_PRIVATE, class_fd, 0);
if (!class_start_mem) {
perror("mmap");
close(class_fd);
return -1;
}
printf("mmap %s at %p
", class_file, class_start_mem);
return 0;
}
2、java classはbig-endianバイトシーケンス、x 86はlitte-endianバイトシーケンスを使用しているので、変換するのはシフト操作です.
#define CLASS_READ_U4(s, p) \
do { \
s = (((p >> 24) & 0x000000ff) | \
((p >> 8) & 0x0000ff00) | \
((p << 24) & 0xff000000) | \
((p << 8) & 0x00ff0000)); \
} while (0);
#define CLASS_READ_U2(s, p) \
do { \
s = (((p >> 8) & 0x00ff) | \
((p << 8) & 0xff00)); \
} while (0);
#define CLASS_READ_U1(s, p) \
do { \
s = p; \
} while (0);
たとえば、4バイトのコンテンツを読みます.
u4 class_magic;
/* read class magic number. */
CLASS_READ_U4(class_magic, (*(u4 *)p_mem))
p_mem = 4;
printf("magic: 0x%x
", class_magic);
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////以下はすべてのソースコードです.
jvm.h
#ifndef JVM_H
#define JVM_H
#define JVM_CLASS_MAGIC 0xcafebabe
#define CLASS_READ_U4(s, p) \
do { \
s = (((p >> 24) & 0x000000ff) | \
((p >> 8) & 0x0000ff00) | \
((p << 24) & 0xff000000) | \
((p << 8) & 0x00ff0000)); \
} while (0);
#define CLASS_READ_U2(s, p) \
do { \
s = (((p >> 8) & 0x00ff) | \
((p << 8) & 0xff00)); \
} while (0);
#define CLASS_READ_U1(s, p) \
do { \
s = p; \
} while (0);
#define CLASS_READ_STRING(s, p, len) \
do { \
memcpy(s, p, len); \
} while (0);
typedef unsigned int u4;
typedef unsigned short u2;
typedef unsigned char u1;
#define CONSTANT_Class 7
#define CONSTANT_Fieldref 9
#define CONSTANT_Methodref 10
#define CONSTANT_InterfaceMethodref 11
#define CONSTANT_String 8
#define CONSTANT_Integer 3
#define CONSTANT_Float 4
#define CONSTANT_Long 5
#define CONSTANT_Double 6
#define CONSTANT_NameAndType 12
#define CONSTANT_Utf8 1
#define CONSTANT_MethodHandle 15
#define CONSTANT_MethodType 16
#define CONSTANT_InvokeDynamic 18
#define ACC_PUBLIC 0x0001
#define ACC_FINAL 0x0010
#define ACC_SUPER 0x0020
#define ACC_INTERFACE 0x0200
#define ACC_ABSTRACT 0X0400
#define ACC_SYNTHETIC 0x1000
#define ACC_ANNOTATION 0x2000
#define ACC_ENUM 0x4000
#define METHOD_ACC_PUBLIC 0x0001
#define METHOD_ACC_PRIVATE 0x0002
#define METHOD_ACC_PROTECTED 0x0004
#define METHOD_ACC_STATIC 0x0008
#define METHOD_ACC_FINAL 0x0010
#define METHOD_ACC_SYNCHRONIED 0x0020
#define METHOD_ACC_BRIDGE 0x0040
#define METHOD_ACC_VARARGS 0x0080
#define METHOD_ACC_NATIVE 0x0100
#define METHOD_ACC_ABSTRACT 0x0400
#define METHOD_ACC_STRICT 0x0800
#define METHOD_ACC_SYNTHETIC 0x1000
#define ITEM_Top 0
#define ITEM_Integer 1
#define ITEM_Float 2
#define ITEM_Double 3
#define ITEM_Long 4
#define ITEM_Null 5
#define ITEM_UninitializedThis 6
#define ITEM_Object 7
#define ITEM_Uninitialized 8
struct constant_info_st {
u2 index;
u1 *base;
}__attribute__ ((packed));
struct cp_info {
u1 tag;
u1 info[];
}__attribute__ ((packed));
struct CONSTANT_Class_info {
//u1 tag;
u2 name_index;
}__attribute__ ((packed));
struct CONSTANT_Fieldref_info {
//u1 tag;
u2 class_index;
u2 name_and_type_index;
}__attribute__ ((packed));
struct CONSTANT_Methodref_info {
//u1 tag;
u2 class_index;
u2 name_and_type_index;
}__attribute__ ((packed));
struct CONSTANT_InterfaceMethodref_info {
//u1 tag;
u2 class_index;
u2 name_and_type_inex;
}__attribute__ ((packed));
struct CONSTANT_String_info {
//u1 tag;
u2 string_index;
}__attribute__ ((packed));
struct CONSTANT_Integer_info {
//u1 tag;
u4 bytes;
}__attribute__ ((packed));
struct CONSTANT_Float_info {
//u1 tag;
u4 bytes;
}__attribute__ ((packed));
struct CONSTANT_Long_info {
//u1 tag;
u4 high_bytes;
u4 low_bytes;
}__attribute__ ((packed));
struct CONSTANT_Double_info {
//u1 tag;
u4 high_bytes;
u4 low_bytes;
}__attribute__ ((packed));
struct CONSTANT_NameAndType_info {
//u1 tag;
u2 name_index;
u2 descriptor_index;
}__attribute__ ((packed));
struct CONSTANT_Utf8_info {
//u1 tag;
u2 length;
u1 bytes[];
}__attribute__ ((packed));
struct CONSTANT_MethodHandle_info {
//u1 tag;
u1 reference_kind;
u2 reference_index;
}__attribute__ ((packed));
struct CONSTANT_MethodType_info {
//u1 tag;
u2 descriptor_index;
}__attribute__ ((packed));
struct CONSTANT_InvokeDynamic_info {
//u1 tag;
u2 bootstrap_method_attr_index;
u2 name_and_type_index;
}__attribute__ ((packed));
#endif
////////////////////////////////////////////////////////////////////////
classreader.c:
/*
* classreader.c - jvm class file parser.
*
* (c) wzt 2012 http://www.cloud-sec.org
*
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include "jvm.h"
static int class_fd;
static int class_file_len;
static void *class_start_mem;
static char *p_mem;
static struct constant_info_st *constant_info;
int mmap_class_file(const char *class_file)
{
struct stat f_stat;
class_fd = open(class_file, O_RDONLY);
if (class_fd == -1) {
perror("open");
return -1;
}
if (stat(class_file, &f_stat) == -1) {
perror("stat");
close(class_fd);
return -1;
}
class_file_len = f_stat.st_size;
printf("%s file len: %d
", class_file, class_file_len);
class_start_mem = mmap(NULL, class_file_len, PROT_READ, MAP_PRIVATE, class_fd, 0);
if (!class_start_mem) {
perror("mmap");
close(class_fd);
return -1;
}
printf("mmap %s at %p
", class_file, class_start_mem);
return 0;
}
int mmap_exit(void)
{
if (munmap(class_start_mem, class_file_len) == -1) {
perror("munmap");
return -1;
}
close(class_fd);
return 0;
}
int parse_class_magic(void)
{
u4 class_magic;
/* read class magic number. */
CLASS_READ_U4(class_magic, (*(u4 *)p_mem))
p_mem = 4;
printf("magic: 0x%x
", class_magic);
if (class_magic != JVM_CLASS_MAGIC) {
printf("jvm class magic not match.
");
return -1;
}
printf("jvm class magic match: 0x%x
", class_magic);
return 0;
}
int parse_class_version(void)
{
u2 minor_version, major_version;
u2 constant_pool_count;
/* read class minor_version. */
CLASS_READ_U2(minor_version, (*(u2 *)p_mem))
p_mem = 2;
printf("jvm class minor_version: %d
", minor_version);
/* read class major_version. */
CLASS_READ_U2(major_version, (*(u2 *)p_mem))
p_mem = 2;
printf("jvm class major_version: %d
", major_version);
return 0;
}
int parse_class_constant(void)
{
u2 constant_pool_count;
u1 constant_tag;
u2 idx;
printf("
-----------parse contant pool count----------------------:
");
/* read constant_pool_count */
CLASS_READ_U2(constant_pool_count, (*(u2 *)p_mem))
p_mem = 2;
printf("jvm constant_pool_count: %d
", constant_pool_count);
constant_info = (struct constant_info_st *)
malloc(sizeof(struct constant_info_st) *
constant_pool_count);
if (!constant_info) {
printf("Malloc failed.
");
return -1;
}
for (idx = 1; idx <= constant_pool_count - 1; idx ) {
CLASS_READ_U1(constant_tag, (*(u1 *)p_mem))
p_mem = 1;
printf("- idx: - constant tag: %d\t", idx, (int)constant_tag);
switch (constant_tag) {
case CONSTANT_Fieldref:
case CONSTANT_Methodref:
case CONSTANT_InterfaceMethodref:
{
struct CONSTANT_Methodref_info methodref_info;
CLASS_READ_U2(methodref_info.class_index, (*(u2 *)p_mem));
p_mem = 2;
assert(methodref_info.class_index > 0 &&
methodref_info.class_index < constant_pool_count);
CLASS_READ_U2(methodref_info.name_and_type_index, (*(u2 *)p_mem));
p_mem = 2;
assert(methodref_info.class_index > 0 &&
methodref_info.class_index < constant_pool_count);
printf("class_index: %d, name_and_type_index: %d
",
methodref_info.class_index,
methodref_info.name_and_type_index);
break;
}
case CONSTANT_Class:
{
struct CONSTANT_Class_info class_info;
CLASS_READ_U2(class_info.name_index, (*(u2 *)p_mem));
p_mem = 2;
assert(class_info.name_index > 0 &&
class_info.name_index < constant_pool_count);
printf("name_index: %d
", class_info.name_index);
break;
}
case CONSTANT_String:
{
struct CONSTANT_String_info string_info;
CLASS_READ_U2(string_info.string_index, (*(u2 *)p_mem));
p_mem = 2;
assert(string_info.string_index > 0 &&
string_info.string_index < constant_pool_count);
printf("string index: %d
", string_info.string_index);
break;
}
case CONSTANT_Long:
{
struct CONSTANT_Long_info long_info;
CLASS_READ_U2(long_info.high_bytes, (*(u2 *)p_mem));
p_mem = 2;
CLASS_READ_U2(long_info.low_bytes, (*(u2 *)p_mem));
p_mem = 2;
printf("high bytes: %d, low bytes: %d
",
long_info.high_bytes, long_info.low_bytes);
break;
}
case CONSTANT_Integer:
{
struct CONSTANT_Integer_info integer_info;
CLASS_READ_U4(integer_info.bytes, (*(u4 *)p_mem));
p_mem = 4;
printf("bytes: %d
", integer_info.bytes);
break;
}
case CONSTANT_Float:
{
struct CONSTANT_Float_info float_info;
CLASS_READ_U4(float_info.bytes, (*(u4 *)p_mem));
p_mem = 4;
printf("bytes: %d
", float_info.bytes);
break;
}
case CONSTANT_Double:
{
struct CONSTANT_Double_info double_info;
CLASS_READ_U4(double_info.high_bytes, (*(u4 *)p_mem));
p_mem = 4;
CLASS_READ_U4(double_info.low_bytes, (*(u4 *)p_mem));
p_mem = 4;
printf("high_bytes: %d, low_bytes: %d
",
double_info.high_bytes, double_info.low_bytes);
break;
}
case CONSTANT_NameAndType:
{
struct CONSTANT_NameAndType_info name_type_info;
CLASS_READ_U2(name_type_info.name_index, (*(u2 *)p_mem));
p_mem = 2;
CLASS_READ_U2(name_type_info.descriptor_index, (*(u2 *)p_mem));
p_mem = 2;
printf("name_index: %d, descriptor_index: %d
",
name_type_info.name_index, name_type_info.descriptor_index);
break;
}
case CONSTANT_MethodHandle:
{
struct CONSTANT_MethodHandle_info method_handle_info;
CLASS_READ_U1(method_handle_info.reference_kind, (*(u1 *)p_mem));
p_mem = 1;
CLASS_READ_U2(method_handle_info.reference_index, (*(u2 *)p_mem));
p_mem = 2;
printf("reference_kind: %d, reference_index: %d
",
method_handle_info.reference_kind,
method_handle_info.reference_index);
break;
}
case CONSTANT_MethodType:
{
struct CONSTANT_MethodType_info method_type_info;
CLASS_READ_U2(method_type_info.descriptor_index, (*(u2 *)p_mem));
p_mem = 2;
printf("descriptor_index %d
", method_type_info.descriptor_index);
break;
}
case CONSTANT_InvokeDynamic:
{
struct CONSTANT_InvokeDynamic_info invoke_dyc_info;
CLASS_READ_U2(invoke_dyc_info.bootstrap_method_attr_index, (*(u2 *)p_mem));
p_mem = 2;
CLASS_READ_U2(invoke_dyc_info.name_and_type_index, (*(u2 *)p_mem));
p_mem = 2;
printf("bootstrap_method_attr_index: %d, name_and_type_index: %d
",
invoke_dyc_info.bootstrap_method_attr_index,
invoke_dyc_info.name_and_type_index);
break;
}
case CONSTANT_Utf8:
{
u2 len;
char *buf;
CLASS_READ_U2(len, (*(u2 *)p_mem));
p_mem = 2;
buf = malloc(len 1);
buf[len] = '\0';
assert(buf != NULL);
memcpy(buf, p_mem, len);
printf("len: %d\t%s
", len, buf);
p_mem = len;
constant_info[idx].index = idx;
constant_info[idx].base = buf;
break;
}
default:
;
}
}
printf("
");
/*
for (idx = 1; idx <= constant_pool_count - 1; idx )
printf("%d: %s
", constant_info[idx].index, constant_info[idx].base);
*/
return 0;
out:
mmap_exit();
return -1;
}
int parse_class_access_flag(void)
{
u2 access_flag;
/* read class access flag. */
CLASS_READ_U2(access_flag, (*(u2 *)p_mem))
p_mem = 2;
printf("access_flag: 0x%x
", access_flag);
return 0;
}
int parse_class_this_super(void)
{
u2 this_class;
u2 super_class;
CLASS_READ_U2(this_class, (*(u2 *)p_mem))
p_mem = 2;
CLASS_READ_U2(super_class, (*(u2 *)p_mem))
p_mem = 2;
printf("this_class: %d\tsuper_class: %d
", this_class, super_class);
return 0;
}
int parse_class_interface(void)
{
u2 interfaces_count;
u2 idx, index;
CLASS_READ_U2(interfaces_count, (*(u2 *)p_mem))
p_mem = 2;
printf("interfaces_count: %d
", interfaces_count);
for (idx = 0; idx < interfaces_count; idx ) {
CLASS_READ_U2(index, (*(u2 *)p_mem));
p_mem = 2;
printf("index: %d
", index);
}
return 0;
}
int parse_class_filed(void)
{
u2 fileds_count;
u2 idx;
CLASS_READ_U2(fileds_count, (*(u2 *)p_mem))
p_mem = 2;
printf("filed_count: %d
", fileds_count);
return 0;
}
int __parse_exception_table(int len)
{
u2 start_pc, end_pc;
u2 handler_pc, catch_type;
u2 idx;
for (idx = 0; idx < len; idx ) {
CLASS_READ_U2(start_pc, (*(u2 *)p_mem))
p_mem = 2;
printf("start_pc: %d
", start_pc);
CLASS_READ_U2(end_pc, (*(u2 *)p_mem))
p_mem = 2;
printf("end_pc: %d
", end_pc);
CLASS_READ_U2(handler_pc, (*(u2 *)p_mem))
p_mem = 2;
printf("handler_pc: %d
", handler_pc);
CLASS_READ_U2(catch_type, (*(u2 *)p_mem))
p_mem = 2;
printf("catch_type: %d
", catch_type);
}
return 0;
}
int __parse_line_number_table(void)
{
u4 attribute_length;
u2 line_number_table_length;
u2 start_pc, line_number;
u2 idx;
CLASS_READ_U4(attribute_length, (*(u4 *)p_mem))
p_mem = 4;
printf("\t\tattribute_length: %d
", attribute_length);
CLASS_READ_U2(line_number_table_length, (*(u2 *)p_mem))
p_mem = 2;
printf("\t\tline_number_table_length: %d
", line_number_table_length);
for (idx = 0; idx < line_number_table_length; idx ) {
CLASS_READ_U2(start_pc, (*(u2 *)p_mem))
p_mem = 2;
printf("\t\tstart_pc: %d
", start_pc);
CLASS_READ_U2(line_number, (*(u2 *)p_mem))
p_mem = 2;
printf("\t\tline_number: %d
", line_number);
}
return 0;
}
int __parse_verification_type_info(u1 number)
{
u1 idx, tag;
for (idx = 0; idx < number; idx ) {
CLASS_READ_U1(tag, (*(u1 *)p_mem))
p_mem = 1;
printf("\t\ttag: %d
", tag);
switch (tag) {
case ITEM_Top:
printf("\t\tITEM_Top.
");
break;
case ITEM_Integer:
printf("\t\tITEM_Integer.
");
break;
case ITEM_Float:
printf("\t\tITEM_float.
");
break;
case ITEM_Double:
printf("\t\tITEM_Double.
");
break;
case ITEM_Long:
printf("\t\tITEM_Long.
");
break;
case ITEM_Null:
printf("\t\tITEM_Long.
");
break;
case ITEM_UninitializedThis:
printf("\t\tITEM_UninitializedThis.
");
break;
case ITEM_Object:
{
u2 cpool_index;
printf("\t\tITEM_Object.
");
CLASS_READ_U2(cpool_index, (*(u2 *)p_mem))
p_mem = 2;
printf("\t\tcpool_index: %d
", cpool_index);
break;
}
case ITEM_Uninitialized:
{
u2 offset;
printf("\t\tITEM_Uninitialized.
");
CLASS_READ_U2(offset, (*(u2 *)p_mem))
p_mem = 2;
printf("\t\toffset: %d
", offset);
break;
}
default:
return -1;
}
}
return 0;
}
int __parse_stack_map_frame(u2 number)
{
u1 frame_type;
u1 offset_delta;
u2 idx;
u1 stack_num;
u1 locals_num;
u1 local_idx;
for (idx = 0; idx < number; idx ) {
CLASS_READ_U1(frame_type, (*(u1 *)p_mem))
p_mem = 1;
printf("\t\tframe_type: %d
", frame_type);
if (frame_type >= 0 && frame_type <= 63) {
offset_delta = frame_type;
printf("\t\tsame_frame\toffset_delta: %d
", offset_delta);
}
if (frame_type >= 64 && frame_type <= 127) {
offset_delta = frame_type - 64;
stack_num = 1;
printf("\t\tsame_locals_l_stack_item_frame\toffset_delta: %d
",
offset_delta);
__parse_verification_type_info(stack_num);
}
if (frame_type == 247) {
stack_num = 1;
CLASS_READ_U2(offset_delta, (*(u2 *)p_mem))
p_mem = 2;
printf("\t\tsame_locals_l_stack_item_frame_extendedn\toffset_delta: %d
",
offset_delta);
__parse_verification_type_info(stack_num);
}
if (frame_type >= 248 && frame_type <= 250) {
CLASS_READ_U2(offset_delta, (*(u2 *)p_mem))
p_mem = 2;
printf("\t\tsame_locals_l_stack_item_frame_extended\toffset_delta: %d
",
offset_delta);
}
if (frame_type == 251) {
CLASS_READ_U2(offset_delta, (*(u2 *)p_mem))
p_mem = 2;
printf("\t\tsame_frame_extended\toffset_delta: %d
", offset_delta);
}
if (frame_type >= 252 && frame_type <= 254) {
CLASS_READ_U2(offset_delta, (*(u2 *)p_mem))
p_mem = 2;
printf("\t\tappend_frame\toffset_delta: %d
", offset_delta);
locals_num = frame_type - 251;
printf("\t\tlocals_num: %d
", locals_num);
__parse_verification_type_info(locals_num);
}
}
}
int __parse_stack_map_table(void)
{
u4 attribute_length;
u2 number_of_entries;
u2 idx;
CLASS_READ_U4(attribute_length, (*(u4 *)p_mem))
p_mem = 4;
printf("\t\tattribute_length: %d
", attribute_length);
CLASS_READ_U2(number_of_entries, (*(u2 *)p_mem))
p_mem = 2;
printf("\t\tnumber_of_entries: %d
", number_of_entries);
__parse_stack_map_frame(number_of_entries);
return 0;
}
/* attribute_name_index has been parsed before. */
int parse_code_attribute(void)
{
u2 attribute_name_index;
u4 attribute_length;
u2 max_stack;
u2 max_locals;
u4 code_length;
u1 *code;
u2 exception_table_length;
u2 attributes_count;
u2 idx;
CLASS_READ_U4(attribute_length, (*(u4 *)p_mem))
p_mem = 4;
printf("\tattribute_length: %d
", attribute_length);
CLASS_READ_U2(max_stack, (*(u2 *)p_mem))
p_mem = 2;
printf("\tmax_stack: %d
", max_stack);
CLASS_READ_U2(max_locals, (*(u2 *)p_mem))
p_mem = 2;
printf("\tmax_locals: %d
", max_locals);
CLASS_READ_U4(code_length, (*(u4 *)p_mem))
p_mem = 4;
printf("\tcode_length: %d
", code_length);
code = (u1 *)malloc(code_length 1);
if (!code) {
printf("Malloc failed.
");
return -1;
}
memcpy(code, p_mem, code_length);
code[code_length] = '\0';
p_mem = code_length;
CLASS_READ_U2(exception_table_length, (*(u2 *)p_mem))
p_mem = 2;
printf("\texception_table_length: %d
", exception_table_length);
__parse_exception_table(exception_table_length);
CLASS_READ_U2(attributes_count, (*(u2 *)p_mem))
p_mem = 2;
printf("\tattributes_count: %d
", attributes_count);
/* parse attributes */
for (idx = 0; idx < attributes_count; idx ) {
CLASS_READ_U2(attribute_name_index, (*(u2 *)p_mem))
p_mem = 2;
printf("\tidx: %d attribute_name_index: %d", idx 1, attribute_name_index);
if (!strcmp(constant_info[attribute_name_index].base, "LineNumberTable")) {
printf("
\tparse LineNumberTable:
");
__parse_line_number_table();
}
if (!strcmp(constant_info[attribute_name_index].base, "StackMapTable")) {
printf("
\tparse StackMapTable:
");
__parse_stack_map_table();
}
if (!strcmp(constant_info[attribute_name_index].base, "LocalVariableTable")) {
;
}
if (!strcmp(constant_info[attribute_name_index].base, "LocalVariableTypeTable")) {
;
}
if (!strcmp(constant_info[attribute_name_index].base, "StackMapTable")) {
;
}
}
return 0;
}
int parse_class_method(void)
{
u2 method_count;
u2 access_flags, name_index;
u2 descriptor_index, attributes_count;
u2 idx;
printf("
---------------parse class method-------------------------:
");
CLASS_READ_U2(method_count, (*(u2 *)p_mem))
p_mem = 2;
printf("method_count: %d
", method_count);
for (idx = 0; idx < method_count; idx ) {
CLASS_READ_U2(access_flags, (*(u2 *)p_mem))
p_mem = 2;
printf("access_flags: 0x%x
", access_flags);
CLASS_READ_U2(name_index, (*(u2 *)p_mem))
p_mem = 2;
printf("name_index: %d
", name_index);
CLASS_READ_U2(descriptor_index, (*(u2 *)p_mem))
p_mem = 2;
printf("descriptor_index: %d
", descriptor_index);
CLASS_READ_U2(attributes_count, (*(u2 *)p_mem))
p_mem = 2;
printf("attributes_count: %d
", attributes_count);
/* parse attributes */
CLASS_READ_U2(name_index, (*(u2 *)p_mem))
p_mem = 2;
printf("attritbutes name_index: %d
", name_index);
if (!strcmp(constant_info[name_index].base, "Code")) {
printf("parse code attribute:
");
parse_code_attribute();
}
if (!strcmp(constant_info[name_index].base, "Exceptions")) {
;
}
if (!strcmp(constant_info[name_index].base, "Signature")) {
;
}
}
return 0;
}
int jvm_parse_class_file(const char *class_file)
{
assert(class_file != NULL);
if (mmap_class_file(class_file) == -1)
return -1;
p_mem = class_start_mem;
if (parse_class_magic() == -1)
goto out;
if (parse_class_version() == -1)
goto out;
if (parse_class_constant() == -1)
goto out;
if (parse_class_access_flag() == -1)
goto out;
if (parse_class_this_super() == -1)
goto out;
if (parse_class_interface() == -1)
goto out;
if (parse_class_filed() == -1)
goto out;
if (parse_class_method() == -1)
goto out;
mmap_exit();
return 0;
out:
mmap_exit();
return -1;
}
void jvm_usage(const char *proc)
{
fprintf(stdout, "usage: %s
", proc);
}
int main(int argc, char **argv)
{
if (argc == 1) {
jvm_usage(argv[0]);
return 0;
}
jvm_parse_class_file(argv[1]);
return 0;
}
////////////////////////////////////////////////////////////////////////////
[email protected] # gcc -o classreader classreader.c -w
[email protected] # ./classreader test.class
test.class file len: 462
mmap test.class at 0x2b0b78fa5000
magic: 0xcafebabe
jvm class magic match: 0xcafebabe
jvm class minor_version: 0
jvm class major_version: 50
-----------parse contant pool count----------------------:
jvm constant_pool_count: 30
- idx: 1 constant tag: 10 class_index: 6, name_and_type_index: 16
- idx: 2 constant tag: 9 class_index: 17, name_and_type_index: 18
- idx: 3 constant tag: 8 string index: 19
- idx: 4 constant tag: 10 class_index: 20, name_and_type_index: 21
- idx: 5 constant tag: 7 name_index: 22
- idx: 6 constant tag: 7 name_index: 23
- idx: 7 constant tag: 1 len: 6
- idx: 8 constant tag: 1 len: 3 ()V
- idx: 9 constant tag: 1 len: 4 Code
- idx: 10 constant tag: 1 len: 15 LineNumberTable
- idx: 11 constant tag: 1 len: 4 main
- idx: 12 constant tag: 1 len: 22 ([Ljava/lang/String;)V
- idx: 13 constant tag: 1 len: 13 StackMapTable
- idx: 14 constant tag: 1 len: 10 SourceFile
- idx: 15 constant tag: 1 len: 9 test.java
- idx: 16 constant tag: 12 name_index: 7, descriptor_index: 8
- idx: 17 constant tag: 7 name_index: 24
- idx: 18 constant tag: 12 name_index: 25, descriptor_index: 26
- idx: 19 constant tag: 1 len: 4 hehe
- idx: 20 constant tag: 7 name_index: 27
- idx: 21 constant tag: 12 name_index: 28, descriptor_index: 29
- idx: 22 constant tag: 1 len: 4 test
- idx: 23 constant tag: 1 len: 16 java/lang/Object
- idx: 24 constant tag: 1 len: 16 java/lang/System
- idx: 25 constant tag: 1 len: 3 out
- idx: 26 constant tag: 1 len: 21 Ljava/io/PrintStream;
- idx: 27 constant tag: 1 len: 19 java/io/PrintStream
- idx: 28 constant tag: 1 len: 7 println
- idx: 29 constant tag: 1 len: 21 (Ljava/lang/String;)V
access_flag: 0x21
this_class: 5 super_class: 6
interfaces_count: 0
filed_count: 0
---------------parse class method-------------------------:
method_count: 2
access_flags: 0x1
name_index: 7
descriptor_index: 8
attributes_count: 1
attritbutes name_index: 9
parse code attribute:
attribute_length: 29
max_stack: 1
max_locals: 1
code_length: 5
exception_table_length: 0
attributes_count: 1
idx: 1 attribute_name_index: 10
parse LineNumberTable:
attribute_length: 6
line_number_table_length: 1
start_pc: 0
line_number: 5
access_flags: 0x9
name_index: 11
descriptor_index: 12
attributes_count: 1
attritbutes name_index: 9
parse code attribute:
attribute_length: 77
max_stack: 2
max_locals: 2
code_length: 24
exception_table_length: 0
attributes_count: 2
idx: 1 attribute_name_index: 10
parse LineNumberTable:
attribute_length: 22
line_number_table_length: 5
start_pc: 0
line_number: 7
start_pc: 2
line_number: 9
start_pc: 9
line_number: 10
start_pc: 17
line_number: 9
start_pc: 23
line_number: 11
idx: 2 attribute_name_index: 13
parse StackMapTable:
attribute_length: 7
number_of_entries: 2
frame_type: 252
append_frame offset_delta: 4
locals_num: 1
tag: 1
ITEM_Integer.
frame_type: 18
same_frame offset_delta: 18
[email protected] #