如何使用 Capstone 5.0 來反組譯執行檔

六 09 九月 2023 by ols3

Capstone 是一種輕量的、跨平台、支援多種 CPU 架構的反組譯器框架。

參考 Capstone 的概略說明

以下是基本用法,順便和 IDA Pro 反組譯的結果相互比較:

#include <iostream>
#include <fstream>
#include <fcntl.h>
#include <unistd.h>
#include <libelf.h>
#include <gelf.h>
#include <capstone/capstone.h>

int main(int argc, char *argv[]) {
    if (argc != 2) {
        std::cerr << "使用法: " << argv[0] << " <x86_64二進位執行檔>" << std::endl;
        return 1;
    }
    const char *filename = argv[1];

    int fd;
    Elf *elf;
    Elf_Scn *scn = NULL;
    GElf_Shdr shdr;
    size_t strndx;

    // 初始化 ELF library
    if (elf_version(EV_CURRENT) == EV_NONE) {
        std::cerr << "ELF 函式庫初始化失敗." << std::endl;
        return 1;
    }

    fd = open(filename, O_RDONLY);
    if (fd < 0) {
        std::cerr << "無法開啟要反組譯的執行檔." << std::endl;
        return 1;
    }

    elf = elf_begin(fd, ELF_C_READ, NULL);
    if (!elf) {
        std::cerr << "無法讀取 ELF." << std::endl;
        return 1;
    }

    if (elf_getshdrstrndx(elf, &strndx) != 0) {
        std::cerr << "無法取得 section header 的字串索引." << std::endl;
        return 1;
    }

    while ((scn = elf_nextscn(elf, scn)) != NULL) {
        if (gelf_getshdr(scn, &shdr) == NULL) {
            std::cerr << "無法取得 section header." << std::endl;
            return 1;
        }

        char *name = elf_strptr(elf, strndx, shdr.sh_name);
        if (name && strcmp(name, ".text") == 0) {
            Elf_Data *data = elf_getdata(scn, NULL);
            if (!data) {
                std::cerr << "無法取得 .text 區的資料" << std::endl;
                return 1;
            }

            uint8_t *text_section = static_cast<uint8_t *>(data->d_buf);
            size_t text_size = data->d_size;

            if (text_size == 0 || !text_section) {
                std::cerr << ".text 區為空白." << std::endl;
                return 1;
            }

            // 初始化 Capstone
            csh handle;
            cs_insn *insn;
            size_t count;

            if (cs_open(CS_ARCH_X86, CS_MODE_64, &handle) != CS_ERR_OK) {
                std::cerr << "無法啟用 Capstone" << std::endl;
                return -1;
            }

            count = cs_disasm(handle, text_section, text_size, 0x1000, 0, &insn);
            if (count > 0) {
                size_t j;
                for (j = 0; j < count; j++) {
                    std::cout << "0x" << std::hex << insn[j].address << ": ";
                    std::cout << insn[j].mnemonic << " " << insn[j].op_str << std::endl;
                }
                cs_free(insn, count);
            } else {
                std::cerr << "無法反組譯!" << std::endl;
            }

            cs_close(&handle);
            break;
        }
    }

    elf_end(elf);
    close(fd);

    return 0;
}

以下是反組譯的結果:

ols3@b2d2022:~/coding/Capstone$ ./re4_distext hello

0x1000: xor ebp, ebp
0x1002: mov r9, rdx
0x1005: pop rsi
0x1006: mov rdx, rsp
0x1009: and rsp, 0xfffffffffffffff0
0x100d: push rax
0x100e: push rsp
0x100f: lea r8, [rip + 0x1ca]
0x1016: lea rcx, [rip + 0x163]
0x101d: lea rdi, [rip + 0xc1]
0x1024: call qword ptr [rip + 0x2f36]
0x102a: hlt 
0x102b: nop dword ptr [rax + rax]
0x1030: lea rdi, [rip + 0x2f91]
0x1037: lea rax, [rip + 0x2f8a]
0x103e: cmp rax, rdi
0x1041: je 0x1058
0x1043: mov rax, qword ptr [rip + 0x2f0e]
0x104a: test rax, rax
0x104d: je 0x1058
0x104f: jmp rax
0x1051: nop dword ptr [rax]
0x1058: ret 
0x1059: nop dword ptr [rax]
0x1060: lea rdi, [rip + 0x2f61]
0x1067: lea rsi, [rip + 0x2f5a]
0x106e: sub rsi, rdi
0x1071: mov rax, rsi
0x1074: shr rsi, 0x3f
0x1078: sar rax, 3
0x107c: add rsi, rax
0x107f: sar rsi, 1
0x1082: je 0x1098
0x1084: mov rax, qword ptr [rip + 0x2ee5]
0x108b: test rax, rax
0x108e: je 0x1098
0x1090: jmp rax
0x1092: nop word ptr [rax + rax]
0x1098: ret 
0x1099: nop dword ptr [rax]
0x10a0: cmp byte ptr [rip + 0x3069], 0
0x10a7: jne 0x10d8
0x10a9: push rbp
0x10aa: cmp qword ptr [rip + 0x2e96], 0
0x10b2: mov rbp, rsp
0x10b5: je 0x10c3
0x10b7: mov rdi, qword ptr [rip + 0x2f02]
0x10be: call 0xff0
0x10c3: call 0x1030
0x10c8: mov byte ptr [rip + 0x3041], 1
0x10cf: pop rbp
0x10d0: ret 
0x10d1: nop dword ptr [rax]
0x10d8: ret 
0x10d9: nop dword ptr [rax]
0x10e0: jmp 0x1060
0x10e5: push rbp
0x10e6: mov rbp, rsp
0x10e9: lea rsi, [rip + 0xe95]
0x10f0: lea rdi, [rip + 0x2f09]
0x10f7: call 0xfc0
0x10fc: mov rdx, rax
0x10ff: mov rax, qword ptr [rip + 0x2e4a]
0x1106: mov rsi, rax
0x1109: mov rdi, rdx
0x110c: call 0xfd0
0x1111: mov eax, 0
0x1116: pop rbp
0x1117: ret 
0x1118: push rbp
0x1119: mov rbp, rsp
0x111c: sub rsp, 0x10
0x1120: mov dword ptr [rbp - 4], edi
0x1123: mov dword ptr [rbp - 8], esi
0x1126: cmp dword ptr [rbp - 4], 1
0x112a: jne 0x115e
0x112c: cmp dword ptr [rbp - 8], 0xffff
0x1133: jne 0x115e
0x1135: lea rdi, [rip + 0x2fd5]
0x113c: call 0xfe0
0x1141: lea rdx, [rip + 0x2e78]
0x1148: lea rsi, [rip + 0x2fc2]
0x114f: mov rax, qword ptr [rip + 0x2e22]
0x1156: mov rdi, rax
0x1159: call 0xfb0
0x115e: nop 
0x115f: leave 
0x1160: ret 
0x1161: push rbp
0x1162: mov rbp, rsp
0x1165: mov esi, 0xffff
0x116a: mov edi, 1
0x116f: call 0x1118
0x1174: pop rbp
0x1175: ret 
0x1176: nop word ptr cs:[rax + rax]
0x1180: push r15
0x1182: lea r15, [rip + 0x2bb7]
0x1189: push r14
0x118b: mov r14, rdx
0x118e: push r13
0x1190: mov r13, rsi
0x1193: push r12
0x1195: mov r12d, edi
0x1198: push rbp
0x1199: lea rbp, [rip + 0x2bb0]
0x11a0: push rbx
0x11a1: sub rbp, r15
0x11a4: sub rsp, 8
0x11a8: call 0xf80
0x11ad: sar rbp, 3
0x11b1: je 0x11ce
0x11b3: xor ebx, ebx
0x11b5: nop dword ptr [rax]
0x11b8: mov rdx, r14
0x11bb: mov rsi, r13
0x11be: mov edi, r12d
0x11c1: call qword ptr [r15 + rbx*8]
0x11c5: add rbx, 1
0x11c9: cmp rbp, rbx
0x11cc: jne 0x11b8
0x11ce: add rsp, 8
0x11d2: pop rbx
0x11d3: pop rbp
0x11d4: pop r12
0x11d6: pop r13
0x11d8: pop r14
0x11da: pop r15
0x11dc: ret 
0x11dd: nop dword ptr [rax]
0x11e0: ret 

以下和 IDA Pro 反組譯的結果比較:

Capstone 5 和 IDA Pro 反組譯結果比較