diff --git a/src/bloaty.cc b/src/bloaty.cc index f489c60..fff2d58 100644 --- a/src/bloaty.cc +++ b/src/bloaty.cc @@ -85,6 +85,7 @@ struct DataSourceDefinition { constexpr DataSourceDefinition data_sources[] = { {DataSource::kArchiveMembers, "armembers", "the .o files in a .a file"}, + {DataSource::kArchs, "archs", "architecture slices in universal binaries"}, {DataSource::kCompileUnits, "compileunits", "source file for the .o file (translation unit). requires debug info."}, {DataSource::kInputFiles, "inputfiles", @@ -2157,7 +2158,7 @@ void Bloaty::DisassembleFunction(string_view function, const Options& options, for (const auto& file_info : input_files_) { auto file = GetObjectFile(file_info.filename_); if (file->GetDisassemblyInfo(function, EffectiveSymbolSource(options), - &info)) { + options, &info)) { output->SetDisassembly(::bloaty::DisassembleFunction(info)); return; } diff --git a/src/bloaty.h b/src/bloaty.h index 85515fc..65bcab2 100644 --- a/src/bloaty.h +++ b/src/bloaty.h @@ -58,6 +58,7 @@ enum class DataSource { kRawRanges, kSections, kSegments, + kArchs, // We always set this to one of the concrete symbol types below before // setting it on a sink. @@ -261,20 +262,29 @@ class NameMunger { std::vector, std::string>> regexes_; }; +struct SymbolInfo { + uint64_t address; + uint64_t size; + uint16_t n_desc; + + SymbolInfo(uint64_t addr, uint64_t sz, uint16_t desc = 0) + : address(addr), size(sz), n_desc(desc) {} +}; + /// SymbolTable holds the symbol table for an object file. -/// It maps symbol names to their address and size. +/// It maps symbol names to their address, size, and other metadata. /// -/// This structure is used in src/elf.cc to accumulate symbols from the ELF -/// symbol table, which are then used to look up symbols during disassembly. +/// This structure is used in src/elf.cc and src/macho.cc to accumulate symbols +/// from symbol tables, which are then used to look up symbols during +/// disassembly. class SymbolTable { public: /// Inserts a symbol into the table. /// The name must be guaranteed to outlive the SymbolTable (e.g. it points /// into the memory-mapped file). /// @return The string_view of the inserted name. - std::string_view insert(std::string_view name, - std::pair val) { - table.insert(std::make_pair(name, val)); + std::string_view insert(std::string_view name, const SymbolInfo& info) { + table.insert(std::make_pair(name, info)); return name; } @@ -282,17 +292,16 @@ class SymbolTable { /// The name is moved into the SymbolTable and owned by it. /// @return The string_view of the inserted name (which points to the owned /// string). - std::string_view insert(std::string&& name, - std::pair val) { + std::string_view insert(std::string&& name, const SymbolInfo& info) { owned_strings.push_back(std::move(name)); std::string_view sv = owned_strings.back(); - table.insert(std::make_pair(sv, val)); + table.insert(std::make_pair(sv, info)); return sv; } /// Inserts a symbol into the table. /// The name must be guaranteed to outlive the SymbolTable. - void insert(std::pair> val) { + void insert(std::pair val) { table.insert(val); } @@ -304,8 +313,8 @@ class SymbolTable { auto end() { return table.end(); } private: - /// Map of symbol name to (address, size) pair. - std::map> table; + /// Map of symbol name to SymbolInfo. + std::map table; /// Strings that are owned by this SymbolTable. /// This is used for synthetic symbols that don't exist in the file (e.g. @@ -330,6 +339,7 @@ class ObjectFile { virtual bool GetDisassemblyInfo(std::string_view symbol, DataSource symbol_source, + const Options& options, DisassemblyInfo* info) const = 0; const InputFile& file_data() const { return *file_data_; } diff --git a/src/elf.cc b/src/elf.cc index ac0f610..703be1f 100644 --- a/src/elf.cc +++ b/src/elf.cc @@ -936,10 +936,10 @@ static void ReadELFSymbols(const InputFile& file, RangeSink* sink, } if (table) { if (name_storage.empty()) { - table->insert(name, std::make_pair(full_addr, sym.st_size)); + table->insert(name, SymbolInfo(full_addr, sym.st_size)); } else { name = table->insert(std::move(name_storage), - std::make_pair(full_addr, sym.st_size)); + SymbolInfo(full_addr, sym.st_size)); } } @@ -1401,6 +1401,8 @@ class ElfObjectFile : public ObjectFile { DoReadELFSections(sink, kReportByEscapedSectionName); break; } + case DataSource::kArchs: + THROW("ELF files do not support 'archs' data source"); default: THROW("unknown data source"); } @@ -1421,14 +1423,15 @@ class ElfObjectFile : public ObjectFile { } bool GetDisassemblyInfo(const std::string_view symbol, - DataSource symbol_source, + DataSource symbol_source, const Options& options, DisassemblyInfo* info) const override { - return DoGetDisassemblyInfo(&symbol, symbol_source, info); + return DoGetDisassemblyInfo(&symbol, symbol_source, options, info); } bool DoGetDisassemblyInfo(const std::string_view* symbol, - DataSource symbol_source, + DataSource symbol_source, const Options& options, DisassemblyInfo* info) const { + (void)options; // Find the corresponding file range. This also could be optimized not to // build the entire map. DualMap base_map; @@ -1455,8 +1458,8 @@ class ElfObjectFile : public ObjectFile { return false; } } - uint64_t vmaddr = entry->second.first; - uint64_t size = entry->second.second; + uint64_t vmaddr = entry->second.address; + uint64_t size = entry->second.size; // TODO(haberman); Add PLT entries to symbol map, so call gets // symbolized. diff --git a/src/macho.cc b/src/macho.cc index cd826fe..938ce4c 100644 --- a/src/macho.cc +++ b/src/macho.cc @@ -21,6 +21,7 @@ #include #include "absl/strings/str_join.h" +#include "absl/strings/str_format.h" #include "absl/strings/substitute.h" #include "third_party/darwin_xnu_macho/mach-o/loader.h" #include "third_party/darwin_xnu_macho/mach-o/fat.h" @@ -69,6 +70,57 @@ void MaybeAddOverhead(RangeSink* sink, const char* label, string_view data) { } } +// ARM64E capability field constants +static constexpr uint32_t ARM64E_SUBTYPE_MASK = 0x00FFFFFF; // Low 24 bits: subtype proper + +static bool IsArm64eSubtype(uint32_t cpusubtype) { + uint32_t subtype_proper = cpusubtype & ARM64E_SUBTYPE_MASK; + return subtype_proper == CPU_SUBTYPE_ARM64E; +} + +std::string CpuTypeToString(uint32_t cputype, uint32_t cpusubtype) { + switch (cputype) { + case CPU_TYPE_X86_64: + switch (cpusubtype) { + case CPU_SUBTYPE_X86_64_H: + return "x86_64h"; + default: + return "x86_64"; + } + case CPU_TYPE_ARM64: + if (IsArm64eSubtype(cpusubtype)) { + return "arm64e"; + } + switch (cpusubtype) { + case CPU_SUBTYPE_ARM64_V8: + return "arm64v8"; + default: + return "arm64"; + } + case CPU_TYPE_X86: + return "i386"; + case CPU_TYPE_ARM: + switch (cpusubtype) { + case CPU_SUBTYPE_ARM_V6: + return "armv6"; + case CPU_SUBTYPE_ARM_V7: + return "armv7"; + case CPU_SUBTYPE_ARM_V7F: + return "armv7f"; + case CPU_SUBTYPE_ARM_V7S: + return "armv7s"; + case CPU_SUBTYPE_ARM_V7K: + return "armv7k"; + case CPU_SUBTYPE_ARM_V8: + return "armv8"; + default: + return "arm"; + } + default: + return absl::StrFormat("cpu_%d", cputype); + } +} + struct LoadCommand { bool is64bit; uint32_t cmd; @@ -448,8 +500,8 @@ void ParseSymbolsFromSymbolTable(const LoadCommand& cmd, SymbolTable* table, } if (table) { - table->insert(std::make_pair( - name, std::make_pair(sym->n_value, RangeSink::kUnknownSize))); + table->insert( + name, SymbolInfo(sym->n_value, RangeSink::kUnknownSize, sym->n_desc)); } // Capture the trailing NULL. @@ -560,6 +612,116 @@ static void ReadDebugSectionsFromMachO(const InputFile &file, }); } +struct TextSegmentInfo { + uint64_t vmaddr = 0; + uint64_t vmsize = 0; + uint64_t fileoff = 0; + uint64_t filesize = 0; + bool found = false; +}; + +struct TextSectionInfo { + uint64_t vmaddr = 0; + uint64_t vmsize = 0; + bool found = false; +}; + +template +static void ExtractTextSegmentInfo(const LoadCommand& cmd, + TextSegmentInfo& text_info) { + auto segment = GetStructPointer(cmd.command_data); + string_view segname = ArrayToStr(segment->segname, 16); + if (segname == "__TEXT") { + text_info.vmaddr = segment->vmaddr; + text_info.vmsize = segment->vmsize; + text_info.fileoff = segment->fileoff; + text_info.filesize = segment->filesize; + text_info.found = true; + } +} + +static TextSegmentInfo GetTextSegmentInfo(string_view data) { + TextSegmentInfo text_info; + ForEachLoadCommand(data, nullptr, [&text_info](const LoadCommand& cmd) { + if (text_info.found) return; // Already found, skip others + + if (cmd.cmd == LC_SEGMENT_64) { + ExtractTextSegmentInfo(cmd, text_info); + } else if (cmd.cmd == LC_SEGMENT) { + ExtractTextSegmentInfo(cmd, text_info); + } + }); + return text_info; +} + +template +static void ExtractTextSectionInfo(const LoadCommand& cmd, + TextSectionInfo& text_section_info) { + auto segment = GetStructPointer(cmd.command_data); + string_view segname = ArrayToStr(segment->segname, 16); + if (segname == "__TEXT") { + string_view command_data = cmd.command_data; + GetStructPointerAndAdvance(&command_data); + + uint32_t nsects = segment->nsects; + for (uint32_t j = 0; j < nsects; j++) { + auto section = GetStructPointerAndAdvance(&command_data); + string_view sectname = ArrayToStr(section->sectname, 16); + if (sectname == "__text") { + text_section_info.vmaddr = section->addr; + text_section_info.vmsize = section->size; + text_section_info.found = true; + return; + } + } + } +} + +static TextSectionInfo GetTextSectionInfo(string_view data) { + TextSectionInfo text_section_info; + ForEachLoadCommand(data, nullptr, + [&text_section_info](const LoadCommand& cmd) { + if (text_section_info.found) + return; // Already found, skip others + + if (cmd.cmd == LC_SEGMENT_64) { + ExtractTextSectionInfo( + cmd, text_section_info); + } else if (cmd.cmd == LC_SEGMENT) { + ExtractTextSectionInfo( + cmd, text_section_info); + } + }); + return text_section_info; +} + +struct CapstoneArchMode { + cs_arch arch; + cs_mode mode; +}; + +// Map Mach-O CPU types to Capstone architecture and mode constants +static CapstoneArchMode MachOToCapstone(uint32_t cputype, uint32_t cpusubtype, + bool is_thumb) { + (void)cpusubtype; + + switch (cputype) { + case CPU_TYPE_X86: + return {CS_ARCH_X86, CS_MODE_32}; + case CPU_TYPE_X86_64: + return {CS_ARCH_X86, CS_MODE_64}; + case CPU_TYPE_ARM: + if (is_thumb) { + return {CS_ARCH_ARM, static_cast(CS_MODE_THUMB | CS_MODE_ARM)}; + } + return {CS_ARCH_ARM, CS_MODE_ARM}; + case CPU_TYPE_ARM64: + return {CS_ARCH_ARM64, CS_MODE_ARM}; + default: + THROWF("Unknown Mach-O CPU type: $0", cputype); + } +} + class MachOObjectFile : public ObjectFile { public: MachOObjectFile(std::unique_ptr file_data) @@ -619,6 +781,10 @@ class MachOObjectFile : public ObjectFile { ReadDWARFInlines(dwarf, sink, true); break; } + case DataSource::kArchs: { + ProcessArchitectures(sink); + break; + } case DataSource::kArchiveMembers: default: THROW("Mach-O doesn't support this data source"); @@ -627,11 +793,215 @@ class MachOObjectFile : public ObjectFile { } } - bool GetDisassemblyInfo(std::string_view /*symbol*/, - DataSource /*symbol_source*/, - DisassemblyInfo* /*info*/) const override { - WARN("Mach-O files do not support disassembly yet"); - return false; + void ProcessArchitectures(RangeSink* sink) const { + uint32_t magic = ReadMagic(file_data().data()); + + if (magic == FAT_CIGAM) { + string_view header_data = file_data().data(); + auto header = GetStructPointerAndAdvance(&header_data); + uint32_t nfat_arch = ByteSwap(header->nfat_arch); + + for (uint32_t i = 0; i < nfat_arch; i++) { + auto arch = GetStructPointerAndAdvance(&header_data); + uint32_t cputype = ByteSwap(arch->cputype); + uint32_t cpusubtype = ByteSwap(arch->cpusubtype); + uint32_t offset = ByteSwap(arch->offset); + uint32_t size = ByteSwap(arch->size); + + std::string arch_name = CpuTypeToString(cputype, cpusubtype); + string_view slice_data = StrictSubstr(file_data().data(), offset, size); + + sink->AddFileRange("archs", arch_name, slice_data); + } + } else { + auto header = GetStructPointer(file_data().data()); + std::string arch_name = CpuTypeToString(header->cputype, header->cpusubtype); + + sink->AddFileRange("archs", arch_name, file_data().data()); + } + } + + bool GetDisassemblyInfo(std::string_view symbol, DataSource symbol_source, + const Options& options, + DisassemblyInfo* info) const override { + string_view macho_data; + uint32_t cputype = 0; + uint32_t cpusubtype = 0; + uint64_t slice_offset = 0; + uint32_t magic = ReadMagic(file_data().data()); + + if (magic == FAT_CIGAM) { + if (!options.has_source_filter()) { + THROW( + "Disassembling universal binaries requires --source-filter to " + "select architecture"); + } + + std::unique_ptr filter_regex = + absl::make_unique(options.source_filter()); + + string_view header_data = file_data().data(); + auto header = GetStructPointerAndAdvance(&header_data); + uint32_t nfat_arch = ByteSwap(header->nfat_arch); + + bool found = false; + for (uint32_t i = 0; i < nfat_arch; i++) { + auto arch = GetStructPointerAndAdvance(&header_data); + cputype = ByteSwap(arch->cputype); + cpusubtype = ByteSwap(arch->cpusubtype); + std::string arch_name = CpuTypeToString(cputype, cpusubtype); + + if (ReImpl::PartialMatch(arch_name, *filter_regex)) { + uint32_t offset = ByteSwap(arch->offset); + uint32_t size = ByteSwap(arch->size); + slice_offset = offset; + macho_data = StrictSubstr(file_data().data(), offset, size); + found = true; + break; + } + } + + if (!found) { + THROWF("No architecture matching filter '$0' found in universal binary", + options.source_filter()); + } + } else { + macho_data = file_data().data(); + auto header = GetStructPointer(macho_data); + cputype = header->cputype; + cpusubtype = header->cpusubtype; + } + + DualMap base_map; + NameMunger empty_munger; + RangeSink base_sink(&file_data(), options, DataSource::kSegments, nullptr, + nullptr); + base_sink.AddOutput(&base_map, &empty_munger); + + ForEachLoadCommand(macho_data, nullptr, + [&base_sink](const LoadCommand& cmd) { + ParseLoadCommand(cmd, &base_sink); + }); + + SymbolTable symtab; + RangeSink symbol_sink(&file_data(), options, symbol_source, &base_map, + nullptr); + symbol_sink.AddOutput(&base_map, &empty_munger); + ParseSymbols(macho_data, &symtab, &symbol_sink); + + auto it = symtab.find(symbol); + if (it == symtab.end()) { + return false; + } + + uint64_t vmaddr = it->second.address; + uint64_t size = it->second.size; + uint16_t n_desc = it->second.n_desc; + + // Handle kUnknownSize - find next symbol and clamp to section/segment + // bounds + if (size == RangeSink::kUnknownSize) { + TextSectionInfo text_section_info = GetTextSectionInfo(macho_data); + uint64_t text_section_end = UINT64_MAX; + if (text_section_info.found) { + text_section_end = text_section_info.vmaddr + text_section_info.vmsize; + } + + // Find the next symbol after this one by address, but don't go beyond + // __text section + uint64_t next_addr = text_section_end; + for (const auto& sym_entry : symtab) { + uint64_t sym_addr = sym_entry.second.address; + if (sym_addr > vmaddr && sym_addr < next_addr) { + next_addr = sym_addr; + } + } + + if (next_addr != UINT64_MAX) { + size = next_addr - vmaddr; + } else { + // Default to a reasonable size if we can't determine it. + size = 256; + } + + if (text_section_info.found && vmaddr >= text_section_info.vmaddr && + vmaddr < text_section_end) { + uint64_t max_size_in_section = text_section_end - vmaddr; + if (size > max_size_in_section) { + size = max_size_in_section; + if (verbose_level > 1) { + printf("Symbol %.*s size clamped to %" PRIu64 + " to stay within __text section\n", + static_cast(symbol.size()), symbol.data(), size); + } + } + } + } + + uint64_t fileoff; + if (!base_map.vm_map.Translate(vmaddr, &fileoff)) { + THROWF("Could not translate VM address $0 to file offset", vmaddr); + } + + TextSegmentInfo text_info = GetTextSegmentInfo(macho_data); + if (text_info.found) { + // Adjust file offsets by slice_offset for universal binaries. + // GetTextSegmentInfo returns slice relative offsets, but base_map.vm_map + // uses file relative offsets + text_info.fileoff += slice_offset; + + uint64_t text_end_vm = text_info.vmaddr + text_info.vmsize; + uint64_t symbol_end_vm = vmaddr + size; + + if (symbol_end_vm > text_end_vm) { + if (vmaddr >= text_end_vm) { + THROWF("Function $0 is outside __TEXT segment", symbol); + } + size = text_end_vm - vmaddr; + if (verbose_level > 1) { + printf("Warning: Function %.*s size limited to %" PRIu64 + " to stay within __TEXT segment\n", + static_cast(symbol.size()), symbol.data(), size); + } + } + + uint64_t text_end_file = text_info.fileoff + text_info.filesize; + uint64_t symbol_end_file = fileoff + size; + + if (symbol_end_file > text_end_file) { + if (fileoff >= text_end_file) { + THROWF("Function $0 file offset is outside __TEXT segment", symbol); + } + uint64_t file_clamped_size = text_end_file - fileoff; + if (file_clamped_size < size) { + size = file_clamped_size; + if (verbose_level > 1) { + printf("Warning: Function %.*s size limited to %" PRIu64 + " due to file bounds\n", + static_cast(symbol.size()), symbol.data(), size); + } + } + } + } else { + if (verbose_level > 1) { + printf("Warning: Could not find __TEXT segment for bounds checking\n"); + } + } + + if (fileoff + size > file_data().data().size()) { + size = file_data().data().size() - fileoff; + } + + info->text = StrictSubstr(file_data().data(), fileoff, size); + info->start_address = vmaddr; + + bool is_thumb = (cputype == CPU_TYPE_ARM) && (n_desc & N_ARM_THUMB_DEF); + + CapstoneArchMode capstone = MachOToCapstone(cputype, cpusubtype, is_thumb); + info->arch = capstone.arch; + info->mode = capstone.mode; + + return true; } }; diff --git a/src/pe.cc b/src/pe.cc index c5e9f04..43f2125 100644 --- a/src/pe.cc +++ b/src/pe.cc @@ -251,6 +251,7 @@ class PEObjectFile : public ObjectFile { } bool GetDisassemblyInfo(string_view /*symbol*/, DataSource /*symbol_source*/, + const Options& /*options*/, DisassemblyInfo* /*info*/) const override { WARN("PE files do not support disassembly yet"); return false; diff --git a/src/source_map.h b/src/source_map.h index d6ee75f..6babfd6 100644 --- a/src/source_map.h +++ b/src/source_map.h @@ -43,6 +43,7 @@ class SourceMapObjectFile : public ObjectFile { bool GetDisassemblyInfo(std::string_view /*symbol*/, DataSource /*symbol_source*/, + const Options& /*options*/, DisassemblyInfo* /*info*/) const override { WARN("Disassembly not supported for source map files"); return false; diff --git a/src/webassembly.cc b/src/webassembly.cc index e6f0a9d..5aa13d9 100644 --- a/src/webassembly.cc +++ b/src/webassembly.cc @@ -484,6 +484,7 @@ class WebAssemblyObjectFile : public ObjectFile { bool GetDisassemblyInfo(std::string_view /*symbol*/, DataSource /*symbol_source*/, + const Options& /*options*/, DisassemblyInfo* /*info*/) const override { WARN("WebAssembly files do not support disassembly yet"); return false; diff --git a/tests/macho/archs.test b/tests/macho/archs.test new file mode 100644 index 0000000..a8374b4 --- /dev/null +++ b/tests/macho/archs.test @@ -0,0 +1,263 @@ +# Test -d archs data source for mach-o universal binaries +# +# Tests that the 'archs' data source correctly reports architecture slices +# in universal binaries and single-architecture binaries. + +## Test 1: Universal binary with two architectures (x86_64 and arm64) +# RUN: %yaml2obj --docnum=1 %s -o %t.universal +# RUN: %bloaty %t.universal -d archs --domain=file | %FileCheck --check-prefix=UNIVERSAL %s + +# UNIVERSAL: FILE SIZE +# UNIVERSAL-DAG: x86_64 +# UNIVERSAL-DAG: arm64 +# UNIVERSAL-DAG: [Unmapped] + +## Test 2: Filter to x86_64 architecture only +# RUN: %bloaty %t.universal -d archs,segments --source-filter=x86_64 --domain=file | %FileCheck --check-prefix=FILTER-X86 %s + +# FILTER-X86: FILE SIZE +# FILTER-X86: x86_64 +# FILTER-X86: __TEXT +# FILTER-X86: __LINKEDIT +# FILTER-X86-NOT: arm64 + +## Test 3: Filter to arm64 architecture only +# RUN: %bloaty %t.universal -d archs,segments --source-filter=arm64 --domain=file | %FileCheck --check-prefix=FILTER-ARM %s + +# FILTER-ARM: FILE SIZE +# FILTER-ARM: arm64 +# FILTER-ARM: __TEXT +# FILTER-ARM: __LINKEDIT +# FILTER-ARM-NOT: x86_64 + +## Test 4: Single architecture binary +# RUN: %yaml2obj --docnum=2 %s -o %t.single +# RUN: %bloaty %t.single -d archs --domain=file | %FileCheck --check-prefix=SINGLE %s + +# SINGLE: FILE SIZE +# SINGLE: x86_64 +# SINGLE-NOT: arm64 + +## Universal binary with x86_64 and arm64 slices +--- !fat-mach-o +FatHeader: + magic: 0xCAFEBABE + nfat_arch: 2 +FatArchs: + - cputype: 0x1000007 + cpusubtype: 0x3 + offset: 0x1000 + size: 4176 + align: 12 + - cputype: 0x100000C + cpusubtype: 0x0 + offset: 0x2050 + size: 8280 + align: 12 +Slices: + - !mach-o + FileHeader: + magic: 0xFEEDFACF + cputype: 0x1000007 + cpusubtype: 0x3 + filetype: 0x2 + ncmds: 3 + sizeofcmds: 328 + flags: 0x200085 + reserved: 0x0 + LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __PAGEZERO + vmaddr: 0 + vmsize: 4294967296 + fileoff: 0 + filesize: 0 + maxprot: 0 + initprot: 0 + nsects: 0 + flags: 0 + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: __TEXT + vmaddr: 4294967296 + vmsize: 4096 + fileoff: 0 + filesize: 4096 + maxprot: 5 + initprot: 5 + nsects: 1 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x100000F80 + size: 8 + offset: 0xF80 + align: 4 + reloff: 0x0 + nreloc: 0 + flags: 0x80000400 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: 554889E531C05DC3 + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __LINKEDIT + vmaddr: 4294971392 + vmsize: 4096 + fileoff: 4096 + filesize: 80 + maxprot: 1 + initprot: 1 + nsects: 0 + flags: 0 + LinkEditData: + NameList: + - n_strx: 1 + n_type: 0xF + n_sect: 1 + n_desc: 0 + n_value: 4294971264 + StringTable: + - ' ' + - _main + - !mach-o + FileHeader: + magic: 0xFEEDFACF + cputype: 0x100000C + cpusubtype: 0x0 + filetype: 0x2 + ncmds: 3 + sizeofcmds: 328 + flags: 0x200085 + reserved: 0x0 + LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __PAGEZERO + vmaddr: 0 + vmsize: 4294967296 + fileoff: 0 + filesize: 0 + maxprot: 0 + initprot: 0 + nsects: 0 + flags: 0 + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: __TEXT + vmaddr: 4294967296 + vmsize: 8192 + fileoff: 0 + filesize: 8192 + maxprot: 5 + initprot: 5 + nsects: 1 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x100001F80 + size: 8 + offset: 0x1F80 + align: 2 + reloff: 0x0 + nreloc: 0 + flags: 0x80000400 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: 00008052C0035FD6 + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __LINKEDIT + vmaddr: 4294975488 + vmsize: 4096 + fileoff: 8192 + filesize: 88 + maxprot: 1 + initprot: 1 + nsects: 0 + flags: 0 + LinkEditData: + NameList: + - n_strx: 1 + n_type: 0xF + n_sect: 1 + n_desc: 0 + n_value: 4294975360 + StringTable: + - ' ' + - _main + +## Single x86_64 Mach-O executable +--- !mach-o +FileHeader: + magic: 0xFEEDFACF + cputype: 0x1000007 + cpusubtype: 0x3 + filetype: 0x2 + ncmds: 3 + sizeofcmds: 328 + flags: 0x200085 + reserved: 0x0 +LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __PAGEZERO + vmaddr: 0 + vmsize: 4294967296 + fileoff: 0 + filesize: 0 + maxprot: 0 + initprot: 0 + nsects: 0 + flags: 0 + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: __TEXT + vmaddr: 4294967296 + vmsize: 4096 + fileoff: 0 + filesize: 4096 + maxprot: 5 + initprot: 5 + nsects: 1 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x100000F80 + size: 8 + offset: 0xF80 + align: 4 + reloff: 0x0 + nreloc: 0 + flags: 0x80000400 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: 554889E531C05DC3 + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __LINKEDIT + vmaddr: 4294971392 + vmsize: 4096 + fileoff: 4096 + filesize: 80 + maxprot: 1 + initprot: 1 + nsects: 0 + flags: 0 +LinkEditData: + NameList: + - n_strx: 1 + n_type: 0xF + n_sect: 1 + n_desc: 0 + n_value: 4294971264 + StringTable: + - ' ' + - _main diff --git a/tests/macho/disassembly.test b/tests/macho/disassembly.test new file mode 100644 index 0000000..99989b5 --- /dev/null +++ b/tests/macho/disassembly.test @@ -0,0 +1,378 @@ +# Test --disassemble for Mach-O binaries +# +# Tests that disassembly works for single-arch and universal binaries, +# and that --source-filter is required for universal binaries. + +## Test 1: Disassemble function in single-architecture x86_64 binary +# RUN: %yaml2obj --docnum=1 %s -o %t.single-x86 +# RUN: %bloaty %t.single-x86 --disassemble=_main | %FileCheck --check-prefix=SINGLE-X86 %s + +# SINGLE-X86: push +# SINGLE-X86-NEXT: mov +# SINGLE-X86-NEXT: xor +# SINGLE-X86-NEXT: pop +# SINGLE-X86-NEXT: ret +# SINGLE-X86-EMPTY: + +## Test 2: Disassemble function in single-architecture arm64 binary +# RUN: %yaml2obj --docnum=2 %s -o %t.single-arm +# RUN: %bloaty %t.single-arm --disassemble=_main | %FileCheck --check-prefix=SINGLE-ARM %s + +# SINGLE-ARM: mov +# SINGLE-ARM-NEXT: ret +# SINGLE-ARM-EMPTY: + +## Test 3: Universal binary requires --source-filter +# RUN: %yaml2obj --docnum=3 %s -o %t.universal +# RUN: not %bloaty %t.universal --disassemble=_main 2>&1 | %FileCheck --check-prefix=NO-FILTER %s + +# NO-FILTER: Disassembling universal binaries requires --source-filter to select architecture + +## Test 4: Disassemble x86_64 slice from universal binary +# RUN: %bloaty %t.universal --disassemble=_main --source-filter=x86_64 | %FileCheck --check-prefix=UNIVERSAL-X86 %s + +# UNIVERSAL-X86: push +# UNIVERSAL-X86-NEXT: mov +# UNIVERSAL-X86-NEXT: xor +# UNIVERSAL-X86-NEXT: pop +# UNIVERSAL-X86-NEXT: ret +# UNIVERSAL-X86-EMPTY: + +## Test 5: Disassemble arm64 slice from universal binary +# RUN: %bloaty %t.universal --disassemble=_main --source-filter=arm64 | %FileCheck --check-prefix=UNIVERSAL-ARM %s + +# UNIVERSAL-ARM: mov +# UNIVERSAL-ARM-NEXT: ret +# UNIVERSAL-ARM-EMPTY: + +## Test 6: Error on non-existent function +# RUN: not %bloaty %t.single-x86 --disassemble=_nonexistent 2>&1 | %FileCheck --check-prefix=NOT-FOUND %s + +# NOT-FOUND: Couldn't find function _nonexistent to disassemble + +## Test 7: Error on wrong architecture filter +# RUN: not %bloaty %t.universal --disassemble=_main --source-filter=i386 2>&1 | %FileCheck --check-prefix=BAD-ARCH %s + +# BAD-ARCH: No architecture matching filter 'i386' found in universal binary + +## Single x86_64 Mach-O executable with _main function +--- !mach-o +FileHeader: + magic: 0xFEEDFACF + cputype: 0x1000007 + cpusubtype: 0x3 + filetype: 0x2 + ncmds: 4 + sizeofcmds: 352 + flags: 0x200085 + reserved: 0x0 +LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __PAGEZERO + vmaddr: 0 + vmsize: 4294967296 + fileoff: 0 + filesize: 0 + maxprot: 0 + initprot: 0 + nsects: 0 + flags: 0 + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: __TEXT + vmaddr: 4294967296 + vmsize: 4096 + fileoff: 0 + filesize: 4096 + maxprot: 5 + initprot: 5 + nsects: 1 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x100000F80 + size: 8 + offset: 0xF80 + align: 4 + reloff: 0x0 + nreloc: 0 + flags: 0x80000400 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: 554889E531C05DC3 + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __LINKEDIT + vmaddr: 4294971392 + vmsize: 4096 + fileoff: 4096 + filesize: 80 + maxprot: 1 + initprot: 1 + nsects: 0 + flags: 0 + - cmd: LC_SYMTAB + cmdsize: 24 + symoff: 4096 + nsyms: 1 + stroff: 4112 + strsize: 9 +LinkEditData: + NameList: + - n_strx: 3 + n_type: 0xF + n_sect: 1 + n_desc: 0 + n_value: 4294971264 + StringTable: + - ' ' + - '' + - _main + +## Single arm64 Mach-O executable with _main function +--- !mach-o +FileHeader: + magic: 0xFEEDFACF + cputype: 0x100000C + cpusubtype: 0x0 + filetype: 0x2 + ncmds: 4 + sizeofcmds: 352 + flags: 0x200085 + reserved: 0x0 +LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __PAGEZERO + vmaddr: 0 + vmsize: 4294967296 + fileoff: 0 + filesize: 0 + maxprot: 0 + initprot: 0 + nsects: 0 + flags: 0 + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: __TEXT + vmaddr: 4294967296 + vmsize: 8192 + fileoff: 0 + filesize: 8192 + maxprot: 5 + initprot: 5 + nsects: 1 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x100001F80 + size: 8 + offset: 0x1F80 + align: 2 + reloff: 0x0 + nreloc: 0 + flags: 0x80000400 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: 00008052C0035FD6 + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __LINKEDIT + vmaddr: 4294975488 + vmsize: 4096 + fileoff: 8192 + filesize: 88 + maxprot: 1 + initprot: 1 + nsects: 0 + flags: 0 + - cmd: LC_SYMTAB + cmdsize: 24 + symoff: 8192 + nsyms: 1 + stroff: 8208 + strsize: 9 +LinkEditData: + NameList: + - n_strx: 3 + n_type: 0xF + n_sect: 1 + n_desc: 0 + n_value: 4294975360 + StringTable: + - ' ' + - '' + - _main + +## Universal binary with x86_64 and arm64 slices, each with _main function +--- !fat-mach-o +FatHeader: + magic: 0xCAFEBABE + nfat_arch: 2 +FatArchs: + - cputype: 0x1000007 + cpusubtype: 0x3 + offset: 0x1000 + size: 4176 + align: 12 + - cputype: 0x100000C + cpusubtype: 0x0 + offset: 0x3000 + size: 8280 + align: 12 +Slices: + - !mach-o + FileHeader: + magic: 0xFEEDFACF + cputype: 0x1000007 + cpusubtype: 0x3 + filetype: 0x2 + ncmds: 4 + sizeofcmds: 352 + flags: 0x200085 + reserved: 0x0 + LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __PAGEZERO + vmaddr: 0 + vmsize: 4294967296 + fileoff: 0 + filesize: 0 + maxprot: 0 + initprot: 0 + nsects: 0 + flags: 0 + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: __TEXT + vmaddr: 4294967296 + vmsize: 4096 + fileoff: 0 + filesize: 4096 + maxprot: 5 + initprot: 5 + nsects: 1 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x100000F80 + size: 8 + offset: 0xF80 + align: 4 + reloff: 0x0 + nreloc: 0 + flags: 0x80000400 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: 554889E531C05DC3 + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __LINKEDIT + vmaddr: 4294971392 + vmsize: 4096 + fileoff: 4096 + filesize: 80 + maxprot: 1 + initprot: 1 + nsects: 0 + flags: 0 + - cmd: LC_SYMTAB + cmdsize: 24 + symoff: 4096 + nsyms: 1 + stroff: 4112 + strsize: 9 + LinkEditData: + NameList: + - n_strx: 3 + n_type: 0xF + n_sect: 1 + n_desc: 0 + n_value: 4294971264 + StringTable: + - ' ' + - '' + - _main + - !mach-o + FileHeader: + magic: 0xFEEDFACF + cputype: 0x100000C + cpusubtype: 0x0 + filetype: 0x2 + ncmds: 4 + sizeofcmds: 352 + flags: 0x200085 + reserved: 0x0 + LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __PAGEZERO + vmaddr: 0 + vmsize: 4294967296 + fileoff: 0 + filesize: 0 + maxprot: 0 + initprot: 0 + nsects: 0 + flags: 0 + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: __TEXT + vmaddr: 4294967296 + vmsize: 8192 + fileoff: 0 + filesize: 8192 + maxprot: 5 + initprot: 5 + nsects: 1 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x100001F80 + size: 8 + offset: 0x1F80 + align: 2 + reloff: 0x0 + nreloc: 0 + flags: 0x80000400 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: 00008052C0035FD6 + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __LINKEDIT + vmaddr: 4294975488 + vmsize: 4096 + fileoff: 8192 + filesize: 88 + maxprot: 1 + initprot: 1 + nsects: 0 + flags: 0 + - cmd: LC_SYMTAB + cmdsize: 24 + symoff: 8192 + nsyms: 1 + stroff: 8208 + strsize: 9 + LinkEditData: + NameList: + - n_strx: 3 + n_type: 0xF + n_sect: 1 + n_desc: 0 + n_value: 4294975360 + StringTable: + - ' ' + - '' + - _main