ELF文件结构之依赖共享库的查找

由于特殊的需求,必须要用C来实现一个轮子,通过解析文件结构来获得一个ELF文件的依赖库的名字,IDA可以做到,那么我们也一定可以做到。造了一个简单的轮子,感谢小花椒和母鸡的科普。

ELF文件有多个节组成,平时一般关注text、data、rodata、bss等,在linux下使用命令可以打出来。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
➜  parser readelf -S /tmp/liblead.so
There are 25 section headers, starting at offset 0x31b4:

Section Headers:
[N] Name Type Addr Off Size ES Flg Lk Inf Al
[ 0] NULL 00000000 000000 000000 00 0 0 0
[ 1] .note.gnu.build-i NOTE 00000134 000134 000024 00 A 0 0 4
[ 2] .dynsym DYNSYM 00000158 000158 0003f0 10 A 3 1 4
[ 3] .dynstr STRTAB 00000548 000548 00051e 00 A 0 0 1
[ 4] .hash HASH 00000a68 000a68 000198 04 A 2 0 4
[ 5] .gnu.version VERSYM 00000c00 000c00 00007e 02 A 2 0 2
[ 6] .gnu.version_d VERDEF 00000c80 000c80 00001c 00 A 3 1 4
[ 7] .gnu.version_r VERNEED 00000c9c 000c9c 000020 00 A 3 1 4
[ 8] .rel.dyn REL 00000cbc 000cbc 000048 08 A 2 0 4
[ 9] .rel.plt REL 00000d04 000d04 000110 08 AI 2 10 4
[10] .plt PROGBITS 00000e14 000e14 0001ac 00 AX 0 0 4
[11] .text PROGBITS 00000fc0 000fc0 00125c 00 AX 0 0 4
[12] .ARM.extab PROGBITS 0000221c 00221c 000084 00 A 0 0 4
[13] .ARM.exidx ARM_EXIDX 000022a0 0022a0 000128 08 AL 11 0 4
[14] .rodata PROGBITS 000023c8 0023c8 0001e3 01 AMS 0 0 1
[15] .fini_array FINI_ARRAY 00003e18 002e18 000008 00 WA 0 0 4
[16] .init_array INIT_ARRAY 00003e20 002e20 000004 00 WA 0 0 1
[17] .dynamic DYNAMIC 00003e24 002e24 000128 08 WA 3 0 4
[18] .got PROGBITS 00003f4c 002f4c 0000b4 00 WA 0 0 4
[19] .data PROGBITS 00004000 003000 000004 00 WA 0 0 4
[20] .bss NOBITS 00004004 003004 000000 00 WA 0 0 1
[21] .comment PROGBITS 00000000 003004 000065 01 MS 0 0 1
[22] .note.gnu.gold-ve NOTE 00000000 00306c 00001c 00 0 0 4
[23] .ARM.attributes ARM_ATTRIBUTES 00000000 003088 000034 00 0 0 1
[24] .shstrtab STRTAB 00000000 0030bc 0000f6 00 0 0 1
Key to Flags:
W (write), A (alloc), X (execute), M (merge), S (strings)
I (info), L (link order), G (group), T (TLS), E (exclude), x (unknown)
O (extra OS processing required) o (OS specific), p (processor specific)

这里主要关注两个节,dynstr和dynamic,它们与依赖库相关,配合010editor我们可以看一下。

dynstr存放的是常量字符串,没啥好说的,用0来隔开的一堆字符串。

后者存放的是一堆叫Elf32_Dyn的结构体,在elf.h里有定义。

1
2
3
4
5
6
7
8
9
10
11
/* Dynamic section entry.  */

typedef struct
{
Elf32_Swordd_tag;/* Dynamic entry type */
union
{
Elf32_Word d_val;/* Integer value */
Elf32_Addr d_ptr;/* Address value */
} d_un;
} Elf32_Dyn;

这些d_tag有种叫DT_NEEDE,表示依赖库的名字,使用Elf32_Addr作为offset,含义是在dynstr中的偏移。

那么现在我们的目标已经比较明确,找到这两个section,挨个遍历,打印出字符串即可。

但是我们并不知道dynstr和dynamic的位置,需要借助ELF文件头里的信息来判断,是Elf32_Ehdr,在elf.h里有定义。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
typedef struct
{
unsigned chare_ident[EI_NIDENT];/* Magic number and other info */
Elf32_Halfe_type;/* Object file type */
Elf32_Halfe_machine;/* Architecture */
Elf32_Worde_version;/* Object file version */
Elf32_Addre_entry;/* Entry point virtual address */
Elf32_Offe_phoff;/* Program header table file offset */
Elf32_Offe_shoff;/* Section header table file offset */
Elf32_Worde_flags;/* Processor-specific flags */
Elf32_Halfe_ehsize;/* ELF header size in bytes */
Elf32_Halfe_phentsize;/* Program header table entry size */
Elf32_Halfe_phnum;/* Program header table entry count */
Elf32_Halfe_shentsize;/* Section header table entry size */
Elf32_Halfe_shnum;/* Section header table entry count */
Elf32_Halfe_shstrndx;/* Section header string table index */
} Elf32_Ehdr;

关注其中的e_shoff,代表section headers存放于文件的offset处;e_shnum代表section headers的个数;e_shstrndx,代表shstrtab位于section header list的第几项。 所以需要访问e_shoff位置的内容,这个地方存放的是Elf32_Shdr[e_shnum],在elf.h里有定义。

1
2
3
4
5
6
7
8
9
10
11
12
13
typedef struct
{
Elf32_Wordsh_name;/* Section name (string tbl index) */
Elf32_Wordsh_type;/* Section type */
Elf32_Wordsh_flags;/* Section flags */
Elf32_Addrsh_addr;/* Section virtual addr at execution */
Elf32_Offsh_offset;/* Section file offset */
Elf32_Wordsh_size;/* Section size in bytes */
Elf32_Wordsh_link;/* Link to another section */
Elf32_Wordsh_info;/* Additional section information */
Elf32_Wordsh_addralign;/* Section alignment */
Elf32_Wordsh_entsize;/* Entry size if section holds table */
} Elf32_Shdr;

需要关注sh_name,表示该section的name位于shstrtab的偏移;sh_offset,表示section内容存放于文件的offset处。而shstrtab的位置我们是知道的,所以可以根据section headers的描述来找到dynstr和dynamic的偏移和大小。 到现在基本已经打通逻辑了,贴下代码(随手写的,很丑,见谅)。

  1. 解析文件头,拿到section headers的位置、个数和shstrtab的下标
  2. 遍历section headers,找到名字是dynstr和dynamic的section,记录其offset
  3. 遍历dynamic的每一项,若是DT_NEEDED,就打印dynstr对应位置的字符串
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#include <iostream>
#include <elf.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <cstring>
#include <zconf.h>

#define DIE(msg) printf("%s %d", msg, __LINE__);

void parse(const char *buffer);


int main() {
std::cout << "Hello, World!" << std::endl;

int fd;
fd = open("/tmp/liblead.so", O_RDONLY);
struct stat file_state;
fstat(fd, &file_state);
const char *buffer = (const char *) mmap(NULL, file_state.st_size, PROT_READ, MAP_SHARED, fd, 0);

parse(buffer);
std::cout << "Bye, World!" << std::endl;
close(fd);

return 0;
}

void parse(const char *buffer) {
Elf32_Ehdr *header = (Elf32_Ehdr *) buffer;
Elf32_Off section_header_offset = header->e_shoff;
Elf32_Half section_header_num = header->e_shnum;
Elf32_Half shstrtab_idx = header->e_shstrndx;
Elf32_Half dynamic_idx = 0;
Elf32_Half dynstr_idx = 0;

printf("header->e_shoff = %dn", header->e_shoff);
printf("header->e_shnum = %dn", header->e_shnum);
printf("header->e_shstrndx = %dn", header->e_shstrndx);


Elf32_Shdr *section_headers_array = (Elf32_Shdr *) (buffer + section_header_offset);
Elf32_Shdr section_shstrtab = section_headers_array[shstrtab_idx];

const char *shstrtab_data = (buffer + section_shstrtab.sh_offset);
printf("shstrtab offset = %dn", section_shstrtab.sh_offset);

for (int i = 0; i < section_header_num; ++i) {
Elf32_Shdr current = section_headers_array[i];
Elf32_Off name_off = current.sh_name;
const char *current_name = shstrtab_data + name_off;
if (!dynamic_idx && !strcmp(current_name, ".dynamic")) {
dynamic_idx = (Elf32_Half) i;
continue;
}
if (!dynstr_idx && !strcmp(current_name, ".dynstr")) {
dynstr_idx = (Elf32_Half) i;
continue;
}
printf("[%d] = %sn", i, current_name);
}

if (!dynamic_idx && !dynstr_idx)
DIE("PIN1");

Elf32_Shdr section_dynamic = section_headers_array[dynamic_idx];
Elf32_Shdr section_dynstr = section_headers_array[dynstr_idx];
const char *dynamic_data = (buffer + section_dynamic.sh_offset);
const char *dynstr_data = (buffer + section_dynstr.sh_offset);

printf("[%d] is dynamic, offset %dn", dynamic_idx, section_dynamic.sh_offset);
printf("[%d] is dynstr, offset %dn", dynstr_idx, section_dynstr.sh_offset);

Elf32_Dyn *dynamic_array = (Elf32_Dyn *) dynamic_data;
int dynamic_count = section_dynamic.sh_size / sizeof(Elf32_Dyn);
printf("dynamic_count is %dn", dynamic_count);
for (int j = 0; j < dynamic_count; ++j) {
Elf32_Dyn current = dynamic_array[j];
if (current.d_tag != DT_NEEDED)
continue;
const char *library_name = (dynstr_data + current.d_un.d_val);
printf("need %sn", library_name);
}
}

运行结果:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
Hello, World!
header->e_shoff = 12724
header->e_shnum = 25
header->e_shstrndx = 24
shstrlab offset = 12476
[0] =
[1] = .note.gnu.build-id
[2] = .dynsym
[4] = .hash
[5] = .gnu.version
[6] = .gnu.version_d
[7] = .gnu.version_r
[8] = .rel.dyn
[9] = .rel.plt
[10] = .plt
[11] = .text
[12] = .ARM.extab
[13] = .ARM.exidx
[14] = .rodata
[15] = .fini_array
[16] = .init_array
[18] = .got
[19] = .data
[20] = .bss
[21] = .comment
[22] = .note.gnu.gold-version
[23] = .ARM.attributes
[24] = .shstrtab
[17] is dynamic, offset 11812
[3] is dynstr, offset 1352
dynamic_count is 37
need libandroid.so
need liblog.so
need libm.so
need libstdc++.so
need libdl.so
need libc.so
Bye, World!

完结,撒花~~~