From cfd57ff281a0ea01c7014e3133270349e1252b16 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 26 Jan 2024 17:22:31 +0100 Subject: [PATCH 01/22] zld: hook tracy allocator for memory profiling --- src/main.zig | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/main.zig b/src/main.zig index 161de5a7..1cd11049 100644 --- a/src/main.zig +++ b/src/main.zig @@ -2,12 +2,18 @@ const std = @import("std"); const builtin = @import("builtin"); const build_options = @import("build_options"); const mem = std.mem; +const tracy = @import("tracy.zig"); const Allocator = mem.Allocator; const ThreadPool = std.Thread.Pool; const Zld = @import("Zld.zig"); -const gpa = std.heap.c_allocator; +var tracy_alloc = tracy.tracyAllocator(std.heap.c_allocator); + +const gpa = if (tracy.enable_allocation) + tracy_alloc.allocator() +else + std.heap.c_allocator; const usage = \\zld is a generic linker driver. From 0e315953240e70d74e7b0489165c2a195db865f6 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 26 Jan 2024 21:51:25 +0100 Subject: [PATCH 02/22] zld: override callstack depth --- build.zig | 2 ++ src/tracy.zig | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/build.zig b/build.zig index 48dc67f0..42e78c62 100644 --- a/build.zig +++ b/build.zig @@ -12,6 +12,7 @@ pub fn build(b: *std.Build) void { const strip = b.option(bool, "strip", "Omit debug information"); const enable_logging = b.option(bool, "log", "Whether to enable logging") orelse (mode == .Debug); const enable_tracy = b.option([]const u8, "tracy", "Enable Tracy integration. Supply path to Tracy source"); + const tracy_callstack_depth = b.option(usize, "tracy-callstack-depth", "Set Tracy callstack depth") orelse 10; const yaml = b.dependency("zig-yaml", .{ .target = target, @@ -37,6 +38,7 @@ pub fn build(b: *std.Build) void { exe.root_module.addOptions("build_options", exe_opts); exe_opts.addOption(bool, "enable_logging", enable_logging); exe_opts.addOption(bool, "enable_tracy", enable_tracy != null); + exe_opts.addOption(usize, "tracy_callstack_depth", tracy_callstack_depth); if (enable_tracy) |tracy_path| { const client_cpp = fs.path.join( diff --git a/src/tracy.zig b/src/tracy.zig index 6c9945e9..3e223a88 100644 --- a/src/tracy.zig +++ b/src/tracy.zig @@ -7,7 +7,7 @@ pub const enable_allocation = enable; pub const enable_callstack = enable; // TODO: make this configurable -const callstack_depth = 10; +const callstack_depth = build_options.tracy_callstack_depth; const ___tracy_c_zone_context = extern struct { id: u32, @@ -60,7 +60,7 @@ pub const Ctx = if (enable) ___tracy_c_zone_context else struct { } }; -pub inline fn trace(comptime src: std.builtin.SourceLocation) Ctx { +pub fn trace(comptime src: std.builtin.SourceLocation) Ctx { if (!enable) return .{}; const global = struct { From 4fc379fe1e167ff142de2d212f34b854d1629748 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 27 Jan 2024 00:34:11 +0100 Subject: [PATCH 03/22] macho: move to pread+alloc for objects --- src/MachO.zig | 38 +++---- src/MachO/Archive.zig | 43 +++++--- src/MachO/Atom.zig | 10 +- src/MachO/DwarfInfo.zig | 4 + src/MachO/Object.zig | 223 ++++++++++++++++++++++---------------- src/MachO/relocatable.zig | 4 +- 6 files changed, 187 insertions(+), 135 deletions(-) diff --git a/src/MachO.zig b/src/MachO.zig index 61e6d6c7..f17f30ff 100644 --- a/src/MachO.zig +++ b/src/MachO.zig @@ -664,21 +664,20 @@ fn addUndefinedGlobals(self: *MachO) !void { fn parsePositional(self: *MachO, arena: Allocator, obj: LinkObject) !void { log.debug("parsing positional {}", .{obj}); - if (try self.parseObject(arena, obj)) return; - if (try self.parseArchive(arena, obj)) return; + if (try self.parseObject(obj)) return; + if (try self.parseArchive(obj)) return; if (try self.parseDylib(arena, obj, true)) |_| return; if (try self.parseTbd(obj, true)) |_| return; self.base.fatal("unknown filetype for positional argument: '{s}'", .{obj.path}); } -fn parseObject(self: *MachO, arena: Allocator, obj: LinkObject) !bool { +fn parseObject(self: *MachO, obj: LinkObject) !bool { const tracy = trace(@src()); defer tracy.end(); const gpa = self.base.allocator; const file = try std.fs.cwd().openFile(obj.path, .{}); - defer file.close(); const header = file.reader().readStruct(macho.mach_header_64) catch return false; try file.seekTo(0); @@ -689,12 +688,11 @@ fn parseObject(self: *MachO, arena: Allocator, obj: LinkObject) !bool { const stat = file.stat() catch break :mtime 0; break :mtime @as(u64, @intCast(@divFloor(stat.mtime, 1_000_000_000))); }; - const data = try file.readToEndAlloc(arena, std.math.maxInt(u32)); const index = @as(File.Index, @intCast(try self.files.addOne(gpa))); self.files.set(index, .{ .object = .{ .path = obj.path, - .data = data, + .file = file, .index = index, .mtime = mtime, } }); @@ -707,37 +705,29 @@ fn parseObject(self: *MachO, arena: Allocator, obj: LinkObject) !bool { return true; } -fn parseArchive(self: *MachO, arena: Allocator, obj: LinkObject) !bool { +fn parseArchive(self: *MachO, obj: LinkObject) !bool { const tracy = trace(@src()); defer tracy.end(); const gpa = self.base.allocator; - const file = try std.fs.cwd().openFile(obj.path, .{}); - defer file.close(); - var offset: u64 = 0; - var size: u64 = (try file.stat()).size; - if (fat.isFatLibrary(file)) { - const fat_arch = self.parseFatLibrary(obj.path, file) catch |err| switch (err) { + const fat_arch: ?fat.Arch = if (fat.isFatLibrary(file)) blk: { + break :blk self.parseFatLibrary(obj.path, file) catch |err| switch (err) { error.NoArchSpecified, error.MissingArch => return false, else => |e| return e, }; - offset = fat_arch.offset; - size = fat_arch.size; - try file.seekTo(offset); - } + } else null; + const offset = if (fat_arch) |ar| ar.offset else 0; + try file.seekTo(offset); const magic = file.reader().readBytesNoEof(Archive.SARMAG) catch return false; if (!mem.eql(u8, &magic, Archive.ARMAG)) return false; + try file.seekTo(0); - const data = try arena.alloc(u8, size - Archive.SARMAG); - const nread = try file.readAll(data); - if (nread != size - Archive.SARMAG) return error.InputOutput; - - var archive = Archive{ .path = obj.path, .data = data }; + var archive = Archive{ .path = obj.path, .file = file, .fat_arch = fat_arch }; defer archive.deinit(gpa); - try archive.parse(arena, self); + try archive.parse(self); var has_parse_error = false; for (archive.objects.items) |extracted| { @@ -1057,6 +1047,8 @@ pub fn resolveSymbols(self: *MachO) !void { const index = self.objects.items[i]; if (!self.getFile(index).?.object.alive) { _ = self.objects.orderedRemove(i); + self.files.items(.data)[index].object.deinit(self.base.allocator); + self.files.set(index, .null); } else i += 1; } diff --git a/src/MachO/Archive.zig b/src/MachO/Archive.zig index c31278ce..cc5e9835 100644 --- a/src/MachO/Archive.zig +++ b/src/MachO/Archive.zig @@ -1,5 +1,6 @@ +file: std.fs.File, +fat_arch: ?fat.Arch, path: []const u8, -data: []const u8, objects: std.ArrayListUnmanaged(Object) = .{}, @@ -62,20 +63,30 @@ const ar_hdr = extern struct { }; pub fn deinit(self: *Archive, allocator: Allocator) void { + self.file.close(); self.objects.deinit(allocator); } -pub fn parse(self: *Archive, arena: Allocator, macho_file: *MachO) !void { +pub fn parse(self: *Archive, macho_file: *MachO) !void { const gpa = macho_file.base.allocator; - var stream = std.io.fixedBufferStream(self.data); - const reader = stream.reader(); + const offset = if (self.fat_arch) |ar| ar.offset else 0; + const size = if (self.fat_arch) |ar| ar.size else (try self.file.stat()).size; + try self.file.seekTo(offset); + const reader = self.file.reader(); + _ = try reader.readBytesNoEof(Archive.SARMAG); + + var pos: usize = Archive.SARMAG; while (true) { - if (stream.pos >= self.data.len) break; - if (!mem.isAligned(stream.pos, 2)) stream.pos += 1; + if (pos >= size) break; + if (!mem.isAligned(pos, 2)) { + try self.file.seekBy(1); + pos += 1; + } const hdr = try reader.readStruct(ar_hdr); + pos += @sizeOf(ar_hdr); if (!mem.eql(u8, &hdr.ar_fmag, ARFMAG)) { macho_file.base.fatal("{s}: invalid header delimiter: expected '{s}', found '{s}'", .{ @@ -84,28 +95,33 @@ pub fn parse(self: *Archive, arena: Allocator, macho_file: *MachO) !void { return error.ParseFailed; } - var size = try hdr.size(); + var hdr_size = try hdr.size(); const name = name: { - if (hdr.name()) |n| break :name try arena.dupe(u8, n); + if (hdr.name()) |n| break :name try gpa.dupe(u8, n); if (try hdr.nameLength()) |len| { - size -= len; - const buf = try arena.alloc(u8, len); + hdr_size -= len; + const buf = try gpa.alloc(u8, len); try reader.readNoEof(buf); + pos += len; const actual_len = mem.indexOfScalar(u8, buf, @as(u8, 0)) orelse len; break :name buf[0..actual_len]; } unreachable; }; defer { - _ = stream.seekBy(size) catch {}; + _ = self.file.seekBy(hdr_size) catch {}; + pos += hdr_size; } if (mem.eql(u8, name, "__.SYMDEF") or mem.eql(u8, name, "__.SYMDEF SORTED")) continue; const object = Object{ - .archive = self.path, + .archive = .{ + .path = try gpa.dupe(u8, self.path), + .offset = offset + pos, + }, .path = name, - .data = self.data[stream.pos..][0..size], + .file = try std.fs.cwd().openFile(self.path, .{}), .index = undefined, .alive = false, .mtime = hdr.date() catch 0, @@ -117,6 +133,7 @@ pub fn parse(self: *Archive, arena: Allocator, macho_file: *MachO) !void { } } +const fat = @import("fat.zig"); const log = std.log.scoped(.link); const macho = std.macho; const mem = std.mem; diff --git a/src/MachO/Atom.zig b/src/MachO/Atom.zig index c2dc5aaa..9fc502cd 100644 --- a/src/MachO/Atom.zig +++ b/src/MachO/Atom.zig @@ -61,10 +61,12 @@ pub fn getPriority(self: Atom, macho_file: *MachO) u64 { return (@as(u64, @intCast(file.getIndex())) << 32) | @as(u64, @intCast(self.n_sect)); } -pub fn getCode(self: Atom, macho_file: *MachO) []const u8 { +pub fn getCode(self: Atom, macho_file: *MachO) ![]const u8 { + const gpa = macho_file.base.allocator; const code = switch (self.getFile(macho_file)) { .dylib => unreachable, - inline else => |x| x.getSectionData(self.n_sect), + .object => |x| try x.getSectionData(gpa, self.n_sect), + .internal => |x| x.getSectionData(self.n_sect), }; return code[self.off..][0..self.size]; } @@ -290,7 +292,9 @@ pub fn resolveRelocs(self: Atom, macho_file: *MachO, buffer: []u8) !void { const relocs = self.getRelocs(macho_file); const file = self.getFile(macho_file); const name = self.getName(macho_file); - @memcpy(buffer, self.getCode(macho_file)); + const code = try self.getCode(macho_file); + defer macho_file.base.allocator.free(code); + @memcpy(buffer, code); relocs_log.debug("{x}: {s}", .{ self.value, name }); diff --git a/src/MachO/DwarfInfo.zig b/src/MachO/DwarfInfo.zig index c3f8d235..db9d52dd 100644 --- a/src/MachO/DwarfInfo.zig +++ b/src/MachO/DwarfInfo.zig @@ -13,6 +13,10 @@ pub fn init(dw: *DwarfInfo, allocator: Allocator) !void { } pub fn deinit(dw: *DwarfInfo, allocator: Allocator) void { + allocator.free(dw.debug_info); + allocator.free(dw.debug_abbrev); + allocator.free(dw.debug_str); + dw.abbrev_tables.deinit(allocator); for (dw.compile_units.items) |*cu| { cu.deinit(allocator); diff --git a/src/MachO/Object.zig b/src/MachO/Object.zig index 7e0ba9ad..f1be9b74 100644 --- a/src/MachO/Object.zig +++ b/src/MachO/Object.zig @@ -1,13 +1,13 @@ -archive: ?[]const u8 = null, +archive: ?Archive = null, path: []const u8, +file: std.fs.File, mtime: u64, -data: []const u8, index: File.Index, header: ?macho.mach_header_64 = null, sections: std.MultiArrayList(Section) = .{}, symtab: std.MultiArrayList(Nlist) = .{}, -strtab: []const u8 = &[0]u8{}, +strtab: std.ArrayListUnmanaged(u8) = .{}, symbols: std.ArrayListUnmanaged(Symbol.Index) = .{}, atoms: std.ArrayListUnmanaged(Atom.Index) = .{}, @@ -22,6 +22,7 @@ cies: std.ArrayListUnmanaged(Cie) = .{}, fdes: std.ArrayListUnmanaged(Fde) = .{}, eh_frame_data: std.ArrayListUnmanaged(u8) = .{}, unwind_records: std.ArrayListUnmanaged(UnwindInfo.Record.Index) = .{}, +data_in_code: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{}, alive: bool = true, hidden: bool = false, @@ -31,13 +32,22 @@ num_weak_bind_relocs: u32 = 0, output_symtab_ctx: MachO.SymtabCtx = .{}, +const Archive = struct { + path: []const u8, + offset: u64, +}; + pub fn deinit(self: *Object, allocator: Allocator) void { + self.file.close(); + allocator.free(self.path); + if (self.archive) |*ar| allocator.free(ar.path); for (self.sections.items(.relocs), self.sections.items(.subsections)) |*relocs, *sub| { relocs.deinit(allocator); sub.deinit(allocator); } self.sections.deinit(allocator); self.symtab.deinit(allocator); + self.strtab.deinit(allocator); self.symbols.deinit(allocator); self.atoms.deinit(allocator); self.cies.deinit(allocator); @@ -49,6 +59,7 @@ pub fn deinit(self: *Object, allocator: Allocator) void { sf.stabs.deinit(allocator); } self.stab_files.deinit(allocator); + self.data_in_code.deinit(allocator); } pub fn parse(self: *Object, macho_file: *MachO) !void { @@ -58,39 +69,76 @@ pub fn parse(self: *Object, macho_file: *MachO) !void { log.debug("parsing input object file {}", .{self.fmtPath()}); const gpa = macho_file.base.allocator; - var stream = std.io.fixedBufferStream(self.data); - const reader = stream.reader(); + const reader = self.file.reader(); + const offset = if (self.archive) |ar| ar.offset else 0; + try self.file.seekTo(offset); self.header = try reader.readStruct(macho.mach_header_64); - if (self.getLoadCommand(.SEGMENT_64)) |lc| { - const sections = lc.getSections(); - try self.sections.ensureUnusedCapacity(gpa, sections.len); - for (sections) |sect| { - const index = try self.sections.addOne(gpa); - self.sections.set(index, .{ .header = sect }); - - if (mem.eql(u8, sect.sectName(), "__eh_frame")) { - self.eh_frame_sect_index = @intCast(index); - } else if (mem.eql(u8, sect.sectName(), "__compact_unwind")) { - self.compact_unwind_sect_index = @intCast(index); + const lc_data = try gpa.alloc(u8, self.header.?.sizeofcmds); + defer gpa.free(lc_data); + var amt = try self.file.preadAll(lc_data, @sizeOf(macho.mach_header_64) + offset); + if (amt != lc_data.len) return error.InputOutput; + + var it = LoadCommandIterator{ + .ncmds = self.header.?.ncmds, + .buffer = lc_data, + }; + while (it.next()) |lc| switch (lc.cmd()) { + .SEGMENT_64 => { + const sections = lc.getSections(); + try self.sections.ensureUnusedCapacity(gpa, sections.len); + for (sections) |sect| { + const index = try self.sections.addOne(gpa); + self.sections.set(index, .{ .header = sect }); + + if (mem.eql(u8, sect.sectName(), "__eh_frame")) { + self.eh_frame_sect_index = @intCast(index); + } else if (mem.eql(u8, sect.sectName(), "__compact_unwind")) { + self.compact_unwind_sect_index = @intCast(index); + } } - } - } - if (self.getLoadCommand(.SYMTAB)) |lc| { - const cmd = lc.cast(macho.symtab_command).?; - self.strtab = self.data[cmd.stroff..][0..cmd.strsize]; - - const symtab = @as([*]align(1) const macho.nlist_64, @ptrCast(self.data.ptr + cmd.symoff))[0..cmd.nsyms]; - try self.symtab.ensureUnusedCapacity(gpa, symtab.len); - for (symtab) |nlist| { - self.symtab.appendAssumeCapacity(.{ - .nlist = nlist, - .atom = 0, - .size = 0, - }); - } - } + }, + .SYMTAB => { + const cmd = lc.cast(macho.symtab_command).?; + try self.strtab.resize(gpa, cmd.strsize); + amt = try self.file.preadAll(self.strtab.items, cmd.stroff + offset); + if (amt != cmd.strsize) return error.InputOutput; + + const symtab_buffer = try gpa.alloc(u8, cmd.nsyms * @sizeOf(macho.nlist_64)); + defer gpa.free(symtab_buffer); + amt = try self.file.preadAll(symtab_buffer, cmd.symoff + offset); + if (amt != symtab_buffer.len) return error.InputOutput; + const symtab = @as([*]align(1) const macho.nlist_64, @ptrCast(symtab_buffer.ptr))[0..cmd.nsyms]; + try self.symtab.ensureUnusedCapacity(gpa, symtab.len); + for (symtab) |nlist| { + self.symtab.appendAssumeCapacity(.{ + .nlist = nlist, + .atom = 0, + .size = 0, + }); + } + }, + .DATA_IN_CODE => { + const cmd = lc.cast(macho.linkedit_data_command).?; + const buffer = try gpa.alloc(u8, cmd.datasize); + defer gpa.free(buffer); + amt = try self.file.preadAll(buffer, offset + cmd.dataoff); + if (amt != cmd.datasize) return error.InputOutput; + const ndice = @divExact(cmd.datasize, @sizeOf(macho.data_in_code_entry)); + const dice = @as([*]align(1) const macho.data_in_code_entry, @ptrCast(buffer.ptr))[0..ndice]; + try self.data_in_code.appendUnalignedSlice(gpa, dice); + }, + .BUILD_VERSION, + .VERSION_MIN_MACOSX, + .VERSION_MIN_IPHONEOS, + .VERSION_MIN_TVOS, + .VERSION_MIN_WATCHOS, + => if (self.platform == null) { + self.platform = MachO.Options.Platform.fromLoadCommand(lc); + }, + else => {}, + }; const NlistIdx = struct { nlist: macho.nlist_64, @@ -151,7 +199,6 @@ pub fn parse(self: *Object, macho_file: *MachO) !void { try self.parseUnwindRecords(macho_file); } - self.initPlatform(); try self.initDwarfInfo(macho_file); for (self.atoms.items) |atom_index| { @@ -601,7 +648,10 @@ fn initEhFrameRecords(self: *Object, sect_id: u8, macho_file: *MachO) !void { const sect = slice.items(.header)[sect_id]; const relocs = slice.items(.relocs)[sect_id]; - const data = self.getSectionData(sect_id); + // TODO: read into buffer directly + const data = try self.getSectionData(gpa, sect_id); + defer gpa.free(data); + try self.eh_frame_data.ensureTotalCapacityPrecise(gpa, data.len); self.eh_frame_data.appendSliceAssumeCapacity(data); @@ -702,7 +752,8 @@ fn initUnwindRecords(self: *Object, sect_id: u8, macho_file: *MachO) !void { }; const gpa = macho_file.base.allocator; - const data = self.getSectionData(sect_id); + const data = try self.getSectionData(gpa, sect_id); + defer gpa.free(data); const nrecs = @divExact(data.len, @sizeOf(macho.compact_unwind_entry)); const recs = @as([*]align(1) const macho.compact_unwind_entry, @ptrCast(data.ptr))[0..nrecs]; const sym_lookup = SymbolLookup{ .ctx = self }; @@ -897,24 +948,6 @@ fn parseUnwindRecords(self: *Object, macho_file: *MachO) !void { } } -fn initPlatform(self: *Object) void { - var it = LoadCommandIterator{ - .ncmds = self.header.?.ncmds, - .buffer = self.data[@sizeOf(macho.mach_header_64)..][0..self.header.?.sizeofcmds], - }; - self.platform = while (it.next()) |cmd| { - switch (cmd.cmd()) { - .BUILD_VERSION, - .VERSION_MIN_MACOSX, - .VERSION_MIN_IPHONEOS, - .VERSION_MIN_TVOS, - .VERSION_MIN_WATCHOS, - => break MachO.Options.Platform.fromLoadCommand(cmd), - else => {}, - } - } else null; -} - /// Currently, we only check if a compile unit for this input object file exists /// and record that so that we can emit symbol stabs. /// TODO in the future, we want parse debug info and debug line sections so that @@ -938,11 +971,13 @@ fn initDwarfInfo(self: *Object, macho_file: *MachO) !void { if (debug_info_index == null or debug_abbrev_index == null) return; + // TODO: do not extend DWARF sections live beyond this function var dwarf_info = DwarfInfo{ - .debug_info = self.getSectionData(@intCast(debug_info_index.?)), - .debug_abbrev = self.getSectionData(@intCast(debug_abbrev_index.?)), - .debug_str = if (debug_str_index) |index| self.getSectionData(@intCast(index)) else "", + .debug_info = try self.getSectionData(gpa, @intCast(debug_info_index.?)), + .debug_abbrev = try self.getSectionData(gpa, @intCast(debug_abbrev_index.?)), + .debug_str = if (debug_str_index) |index| try self.getSectionData(gpa, @intCast(index)) else "", }; + errdefer dwarf_info.deinit(gpa); dwarf_info.init(gpa) catch { macho_file.base.fatal("{}: invalid __DWARF info found", .{self.fmtPath()}); return error.ParseFailed; @@ -1162,8 +1197,8 @@ pub fn calcStabsSize(self: *Object, macho_file: *MachO) void { self.output_symtab_ctx.strsize += @as(u32, @intCast(comp_dir.len + 1)); // comp_dir self.output_symtab_ctx.strsize += @as(u32, @intCast(tu_name.len + 1)); // tu_name - if (self.archive) |path| { - self.output_symtab_ctx.strsize += @as(u32, @intCast(path.len + 1 + self.path.len + 1 + 1)); + if (self.archive) |ar| { + self.output_symtab_ctx.strsize += @as(u32, @intCast(ar.path.len + 1 + self.path.len + 1 + 1)); } else { self.output_symtab_ctx.strsize += @as(u32, @intCast(self.path.len + 1)); } @@ -1307,8 +1342,8 @@ pub fn writeStabs(self: *const Object, macho_file: *MachO) void { index += 1; // N_OSO path n_strx = @as(u32, @intCast(macho_file.strtab.items.len)); - if (self.archive) |path| { - macho_file.strtab.appendSliceAssumeCapacity(path); + if (self.archive) |ar| { + macho_file.strtab.appendSliceAssumeCapacity(ar.path); macho_file.strtab.appendAssumeCapacity('('); macho_file.strtab.appendSliceAssumeCapacity(self.path); macho_file.strtab.appendAssumeCapacity(')'); @@ -1474,26 +1509,21 @@ pub fn writeStabs(self: *const Object, macho_file: *MachO) void { } } -fn getLoadCommand(self: Object, lc: macho.LC) ?LoadCommandIterator.LoadCommand { - var it = LoadCommandIterator{ - .ncmds = self.header.?.ncmds, - .buffer = self.data[@sizeOf(macho.mach_header_64)..][0..self.header.?.sizeofcmds], - }; - while (it.next()) |cmd| { - if (cmd.cmd() == lc) return cmd; - } else return null; -} - -pub fn getSectionData(self: *const Object, index: u32) []const u8 { +pub fn getSectionData(self: *const Object, allocator: Allocator, index: u32) ![]u8 { const slice = self.sections.slice(); + const offset = if (self.archive) |ar| ar.offset else 0; assert(index < slice.items(.header).len); const sect = slice.items(.header)[index]; - return self.data[sect.offset..][0..sect.size]; + const buffer = try allocator.alloc(u8, sect.size); + errdefer allocator.free(buffer); + const amt = try self.file.preadAll(buffer, sect.offset + offset); + if (amt != sect.size) return error.InputOutput; + return buffer; } fn getString(self: Object, off: u32) [:0]const u8 { - assert(off < self.strtab.len); - return mem.sliceTo(@as([*:0]const u8, @ptrCast(self.strtab.ptr + off)), 0); + assert(off < self.strtab.items.len); + return mem.sliceTo(@as([*:0]const u8, @ptrCast(self.strtab.items.ptr + off)), 0); } /// TODO handle multiple CUs @@ -1520,15 +1550,8 @@ pub fn hasObjc(self: Object) bool { return false; } -pub fn getDataInCode(self: Object) []align(1) const macho.data_in_code_entry { - const lc = self.getLoadCommand(.DATA_IN_CODE) orelse return &[0]macho.data_in_code_entry{}; - const cmd = lc.cast(macho.linkedit_data_command).?; - const ndice = @divExact(cmd.datasize, @sizeOf(macho.data_in_code_entry)); - const dice = @as( - [*]align(1) const macho.data_in_code_entry, - @ptrCast(self.data.ptr + cmd.dataoff), - )[0..ndice]; - return dice; +pub fn getDataInCode(self: Object) []const macho.data_in_code_entry { + return self.data_in_code.items; } pub inline fn hasSubsections(self: Object) bool { @@ -1690,8 +1713,8 @@ fn formatPath( ) !void { _ = unused_fmt_string; _ = options; - if (object.archive) |path| { - try writer.writeAll(path); + if (object.archive) |ar| { + try writer.writeAll(ar.path); try writer.writeByte('('); try writer.writeAll(object.path); try writer.writeByte(')'); @@ -1759,11 +1782,16 @@ const x86_64 = struct { ) !void { const gpa = macho_file.base.allocator; - const relocs = @as( - [*]align(1) const macho.relocation_info, - @ptrCast(self.data.ptr + sect.reloff), - )[0..sect.nreloc]; - const code = self.getSectionData(@intCast(n_sect)); + const relocs_buffer = try gpa.alloc(u8, sect.nreloc * @sizeOf(macho.relocation_info)); + defer gpa.free(relocs_buffer); + const offset = if (self.archive) |ar| ar.offset else 0; + const amt = try self.file.preadAll(relocs_buffer, offset + sect.reloff); + if (amt != relocs_buffer.len) return error.InputOutput; + + const relocs = @as([*]align(1) const macho.relocation_info, @ptrCast(relocs_buffer.ptr))[0..sect.nreloc]; + + const code = try self.getSectionData(gpa, @intCast(n_sect)); + defer gpa.free(code); try out.ensureTotalCapacityPrecise(gpa, relocs.len); @@ -1911,11 +1939,16 @@ const aarch64 = struct { ) !void { const gpa = macho_file.base.allocator; - const relocs = @as( - [*]align(1) const macho.relocation_info, - @ptrCast(self.data.ptr + sect.reloff), - )[0..sect.nreloc]; - const code = self.getSectionData(@intCast(n_sect)); + const relocs_buffer = try gpa.alloc(u8, sect.nreloc * @sizeOf(macho.relocation_info)); + defer gpa.free(relocs_buffer); + const offset = if (self.archive) |ar| ar.offset else 0; + const amt = try self.file.preadAll(relocs_buffer, offset + sect.reloff); + if (amt != relocs_buffer.len) return error.InputOutput; + + const relocs = @as([*]align(1) const macho.relocation_info, @ptrCast(relocs_buffer.ptr))[0..sect.nreloc]; + + const code = try self.getSectionData(gpa, @intCast(n_sect)); + defer gpa.free(code); try out.ensureTotalCapacityPrecise(gpa, relocs.len); diff --git a/src/MachO/relocatable.zig b/src/MachO/relocatable.zig index 0a5256d6..3f57ad24 100644 --- a/src/MachO/relocatable.zig +++ b/src/MachO/relocatable.zig @@ -238,7 +238,9 @@ fn writeAtoms(macho_file: *MachO) !void { const atom = macho_file.getAtom(atom_index).?; assert(atom.flags.alive); const off = atom.value - header.addr; - @memcpy(code[off..][0..atom.size], atom.getCode(macho_file)); + const in_code = try atom.getCode(macho_file); + defer gpa.free(in_code); + @memcpy(code[off..][0..atom.size], in_code); try atom.writeRelocs(macho_file, code[off..][0..atom.size], &relocs); } From a9416a196f58e31f8bb90aaa59f257a206b6d89e Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 27 Jan 2024 00:49:18 +0100 Subject: [PATCH 04/22] macho: debug invalid free --- src/MachO.zig | 2 +- src/MachO/Atom.zig | 2 +- src/MachO/Object.zig | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/MachO.zig b/src/MachO.zig index f17f30ff..b7be210c 100644 --- a/src/MachO.zig +++ b/src/MachO.zig @@ -691,7 +691,7 @@ fn parseObject(self: *MachO, obj: LinkObject) !bool { const index = @as(File.Index, @intCast(try self.files.addOne(gpa))); self.files.set(index, .{ .object = .{ - .path = obj.path, + .path = try gpa.dupe(u8, obj.path), .file = file, .index = index, .mtime = mtime, diff --git a/src/MachO/Atom.zig b/src/MachO/Atom.zig index 9fc502cd..62e10250 100644 --- a/src/MachO/Atom.zig +++ b/src/MachO/Atom.zig @@ -66,7 +66,7 @@ pub fn getCode(self: Atom, macho_file: *MachO) ![]const u8 { const code = switch (self.getFile(macho_file)) { .dylib => unreachable, .object => |x| try x.getSectionData(gpa, self.n_sect), - .internal => |x| x.getSectionData(self.n_sect), + .internal => |x| try gpa.dupe(u8, x.getSectionData(self.n_sect)), }; return code[self.off..][0..self.size]; } diff --git a/src/MachO/Object.zig b/src/MachO/Object.zig index f1be9b74..ad89cd8f 100644 --- a/src/MachO/Object.zig +++ b/src/MachO/Object.zig @@ -975,7 +975,7 @@ fn initDwarfInfo(self: *Object, macho_file: *MachO) !void { var dwarf_info = DwarfInfo{ .debug_info = try self.getSectionData(gpa, @intCast(debug_info_index.?)), .debug_abbrev = try self.getSectionData(gpa, @intCast(debug_abbrev_index.?)), - .debug_str = if (debug_str_index) |index| try self.getSectionData(gpa, @intCast(index)) else "", + .debug_str = if (debug_str_index) |index| try self.getSectionData(gpa, @intCast(index)) else &[0]u8{}, }; errdefer dwarf_info.deinit(gpa); dwarf_info.init(gpa) catch { From 3ea8d5bf1c71611e8f491cd8fb7008bf93e3fa0d Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 27 Jan 2024 00:53:57 +0100 Subject: [PATCH 05/22] macho: fix all invalid frees --- src/MachO/Atom.zig | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/MachO/Atom.zig b/src/MachO/Atom.zig index 62e10250..63827a6c 100644 --- a/src/MachO/Atom.zig +++ b/src/MachO/Atom.zig @@ -63,12 +63,18 @@ pub fn getPriority(self: Atom, macho_file: *MachO) u64 { pub fn getCode(self: Atom, macho_file: *MachO) ![]const u8 { const gpa = macho_file.base.allocator; - const code = switch (self.getFile(macho_file)) { + switch (self.getFile(macho_file)) { .dylib => unreachable, - .object => |x| try x.getSectionData(gpa, self.n_sect), - .internal => |x| try gpa.dupe(u8, x.getSectionData(self.n_sect)), - }; - return code[self.off..][0..self.size]; + .object => |x| { + const code = try x.getSectionData(gpa, self.n_sect); + defer gpa.free(code); + return gpa.dupe(u8, code[self.off..][0..self.size]); + }, + .internal => |x| { + const code = x.getSectionData(self.n_sect); + return gpa.dupe(u8, code[self.off..][0..self.size]); + }, + } } pub fn getRelocs(self: Atom, macho_file: *MachO) []const Relocation { From fbd8b0ad3256d17c7913287191658c836c45acd1 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 27 Jan 2024 11:14:05 +0100 Subject: [PATCH 06/22] macho: read atom code directly into a buffer --- src/MachO.zig | 1 + src/MachO/Atom.zig | 29 ++++++++++++++++++++++------- src/MachO/Object.zig | 20 ++++++++++++++------ src/MachO/relocatable.zig | 4 +--- 4 files changed, 38 insertions(+), 16 deletions(-) diff --git a/src/MachO.zig b/src/MachO.zig index b7be210c..b152de61 100644 --- a/src/MachO.zig +++ b/src/MachO.zig @@ -2137,6 +2137,7 @@ fn writeAtoms(self: *MachO) !void { const atom = self.getAtom(atom_index).?; assert(atom.flags.alive); const off = atom.value - header.addr; + try atom.getCode(self, buffer[off..][0..atom.size]); atom.resolveRelocs(self, buffer[off..][0..atom.size]) catch |err| switch (err) { error.ResolveFailed => has_resolve_error = true, else => |e| return e, diff --git a/src/MachO/Atom.zig b/src/MachO/Atom.zig index 63827a6c..e70fea2e 100644 --- a/src/MachO/Atom.zig +++ b/src/MachO/Atom.zig @@ -61,14 +61,32 @@ pub fn getPriority(self: Atom, macho_file: *MachO) u64 { return (@as(u64, @intCast(file.getIndex())) << 32) | @as(u64, @intCast(self.n_sect)); } -pub fn getCode(self: Atom, macho_file: *MachO) ![]const u8 { +pub fn getCode(self: Atom, macho_file: *MachO, buffer: []u8) !void { + assert(buffer.len == self.size); + switch (self.getFile(macho_file)) { + .dylib => unreachable, + .object => |x| { + const slice = x.sections.slice(); + const offset = if (x.archive) |ar| ar.offset else 0; + const sect = slice.items(.header)[self.n_sect]; + try x.getData(sect.offset + offset + self.off, buffer); + }, + .internal => |x| { + const code = x.getSectionData(self.n_sect); + @memcpy(buffer, code); + }, + } +} + +pub fn getCodeAlloc(self: Atom, macho_file: *MachO) ![]const u8 { const gpa = macho_file.base.allocator; switch (self.getFile(macho_file)) { .dylib => unreachable, .object => |x| { - const code = try x.getSectionData(gpa, self.n_sect); - defer gpa.free(code); - return gpa.dupe(u8, code[self.off..][0..self.size]); + const slice = x.sections.slice(); + const offset = if (x.archive) |ar| ar.offset else 0; + const sect = slice.items(.header)[self.n_sect]; + return x.getDataAlloc(gpa, sect.offset + offset + self.off, self.size); }, .internal => |x| { const code = x.getSectionData(self.n_sect); @@ -298,9 +316,6 @@ pub fn resolveRelocs(self: Atom, macho_file: *MachO, buffer: []u8) !void { const relocs = self.getRelocs(macho_file); const file = self.getFile(macho_file); const name = self.getName(macho_file); - const code = try self.getCode(macho_file); - defer macho_file.base.allocator.free(code); - @memcpy(buffer, code); relocs_log.debug("{x}: {s}", .{ self.value, name }); diff --git a/src/MachO/Object.zig b/src/MachO/Object.zig index ad89cd8f..4f8d9a81 100644 --- a/src/MachO/Object.zig +++ b/src/MachO/Object.zig @@ -1509,16 +1509,24 @@ pub fn writeStabs(self: *const Object, macho_file: *MachO) void { } } +pub fn getData(self: *const Object, off: usize, buf: []u8) !void { + const amt = try self.file.preadAll(buf, off); + if (amt != buf.len) return error.InputOutput; +} + +pub fn getDataAlloc(self: *const Object, allocator: Allocator, off: usize, size: usize) ![]u8 { + const buffer = try allocator.alloc(u8, size); + errdefer allocator.free(buffer); + try self.getData(off, buffer); + return buffer; +} + pub fn getSectionData(self: *const Object, allocator: Allocator, index: u32) ![]u8 { const slice = self.sections.slice(); - const offset = if (self.archive) |ar| ar.offset else 0; assert(index < slice.items(.header).len); const sect = slice.items(.header)[index]; - const buffer = try allocator.alloc(u8, sect.size); - errdefer allocator.free(buffer); - const amt = try self.file.preadAll(buffer, sect.offset + offset); - if (amt != sect.size) return error.InputOutput; - return buffer; + const offset = if (self.archive) |ar| ar.offset else 0; + return self.getDataAlloc(allocator, sect.offset + offset, sect.size); } fn getString(self: Object, off: u32) [:0]const u8 { diff --git a/src/MachO/relocatable.zig b/src/MachO/relocatable.zig index 3f57ad24..561dea98 100644 --- a/src/MachO/relocatable.zig +++ b/src/MachO/relocatable.zig @@ -238,9 +238,7 @@ fn writeAtoms(macho_file: *MachO) !void { const atom = macho_file.getAtom(atom_index).?; assert(atom.flags.alive); const off = atom.value - header.addr; - const in_code = try atom.getCode(macho_file); - defer gpa.free(in_code); - @memcpy(code[off..][0..atom.size], in_code); + try atom.getCode(macho_file, code[off..][0..atom.size]); try atom.writeRelocs(macho_file, code[off..][0..atom.size], &relocs); } From 8d0d3f4b65a370697c9d0b153869ce8930f85674 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 27 Jan 2024 14:47:19 +0100 Subject: [PATCH 07/22] macho: rename getData/getDataAlloc to preadAll/preadAllAlloc --- src/MachO/Atom.zig | 4 ++-- src/MachO/Object.zig | 36 ++++++++++++------------------------ 2 files changed, 14 insertions(+), 26 deletions(-) diff --git a/src/MachO/Atom.zig b/src/MachO/Atom.zig index e70fea2e..6e4db068 100644 --- a/src/MachO/Atom.zig +++ b/src/MachO/Atom.zig @@ -69,7 +69,7 @@ pub fn getCode(self: Atom, macho_file: *MachO, buffer: []u8) !void { const slice = x.sections.slice(); const offset = if (x.archive) |ar| ar.offset else 0; const sect = slice.items(.header)[self.n_sect]; - try x.getData(sect.offset + offset + self.off, buffer); + try x.preadAll(buffer, sect.offset + offset + self.off); }, .internal => |x| { const code = x.getSectionData(self.n_sect); @@ -86,7 +86,7 @@ pub fn getCodeAlloc(self: Atom, macho_file: *MachO) ![]const u8 { const slice = x.sections.slice(); const offset = if (x.archive) |ar| ar.offset else 0; const sect = slice.items(.header)[self.n_sect]; - return x.getDataAlloc(gpa, sect.offset + offset + self.off, self.size); + return x.preadAllAlloc(gpa, sect.offset + offset + self.off, self.size); }, .internal => |x| { const code = x.getSectionData(self.n_sect); diff --git a/src/MachO/Object.zig b/src/MachO/Object.zig index 4f8d9a81..7fa12666 100644 --- a/src/MachO/Object.zig +++ b/src/MachO/Object.zig @@ -75,10 +75,8 @@ pub fn parse(self: *Object, macho_file: *MachO) !void { self.header = try reader.readStruct(macho.mach_header_64); - const lc_data = try gpa.alloc(u8, self.header.?.sizeofcmds); + const lc_data = try self.preadAllAlloc(gpa, offset + @sizeOf(macho.mach_header_64), self.header.?.sizeofcmds); defer gpa.free(lc_data); - var amt = try self.file.preadAll(lc_data, @sizeOf(macho.mach_header_64) + offset); - if (amt != lc_data.len) return error.InputOutput; var it = LoadCommandIterator{ .ncmds = self.header.?.ncmds, @@ -102,13 +100,9 @@ pub fn parse(self: *Object, macho_file: *MachO) !void { .SYMTAB => { const cmd = lc.cast(macho.symtab_command).?; try self.strtab.resize(gpa, cmd.strsize); - amt = try self.file.preadAll(self.strtab.items, cmd.stroff + offset); - if (amt != cmd.strsize) return error.InputOutput; - - const symtab_buffer = try gpa.alloc(u8, cmd.nsyms * @sizeOf(macho.nlist_64)); + try self.preadAll(self.strtab.items, cmd.stroff + offset); + const symtab_buffer = try self.preadAllAlloc(gpa, cmd.symoff + offset, cmd.nsyms * @sizeOf(macho.nlist_64)); defer gpa.free(symtab_buffer); - amt = try self.file.preadAll(symtab_buffer, cmd.symoff + offset); - if (amt != symtab_buffer.len) return error.InputOutput; const symtab = @as([*]align(1) const macho.nlist_64, @ptrCast(symtab_buffer.ptr))[0..cmd.nsyms]; try self.symtab.ensureUnusedCapacity(gpa, symtab.len); for (symtab) |nlist| { @@ -121,10 +115,8 @@ pub fn parse(self: *Object, macho_file: *MachO) !void { }, .DATA_IN_CODE => { const cmd = lc.cast(macho.linkedit_data_command).?; - const buffer = try gpa.alloc(u8, cmd.datasize); + const buffer = try self.preadAllAlloc(gpa, offset + cmd.dataoff, cmd.datasize); defer gpa.free(buffer); - amt = try self.file.preadAll(buffer, offset + cmd.dataoff); - if (amt != cmd.datasize) return error.InputOutput; const ndice = @divExact(cmd.datasize, @sizeOf(macho.data_in_code_entry)); const dice = @as([*]align(1) const macho.data_in_code_entry, @ptrCast(buffer.ptr))[0..ndice]; try self.data_in_code.appendUnalignedSlice(gpa, dice); @@ -1509,15 +1501,15 @@ pub fn writeStabs(self: *const Object, macho_file: *MachO) void { } } -pub fn getData(self: *const Object, off: usize, buf: []u8) !void { +pub fn preadAll(self: *const Object, buf: []u8, off: usize) !void { const amt = try self.file.preadAll(buf, off); if (amt != buf.len) return error.InputOutput; } -pub fn getDataAlloc(self: *const Object, allocator: Allocator, off: usize, size: usize) ![]u8 { +pub fn preadAllAlloc(self: *const Object, allocator: Allocator, off: usize, size: usize) ![]u8 { const buffer = try allocator.alloc(u8, size); errdefer allocator.free(buffer); - try self.getData(off, buffer); + try self.preadAll(buffer, off); return buffer; } @@ -1526,7 +1518,7 @@ pub fn getSectionData(self: *const Object, allocator: Allocator, index: u32) ![] assert(index < slice.items(.header).len); const sect = slice.items(.header)[index]; const offset = if (self.archive) |ar| ar.offset else 0; - return self.getDataAlloc(allocator, sect.offset + offset, sect.size); + return self.preadAllAlloc(allocator, sect.offset + offset, sect.size); } fn getString(self: Object, off: u32) [:0]const u8 { @@ -1790,11 +1782,9 @@ const x86_64 = struct { ) !void { const gpa = macho_file.base.allocator; - const relocs_buffer = try gpa.alloc(u8, sect.nreloc * @sizeOf(macho.relocation_info)); - defer gpa.free(relocs_buffer); const offset = if (self.archive) |ar| ar.offset else 0; - const amt = try self.file.preadAll(relocs_buffer, offset + sect.reloff); - if (amt != relocs_buffer.len) return error.InputOutput; + const relocs_buffer = try self.preadAllAlloc(gpa, sect.reloff + offset, sect.nreloc * @sizeOf(macho.relocation_info)); + defer gpa.free(relocs_buffer); const relocs = @as([*]align(1) const macho.relocation_info, @ptrCast(relocs_buffer.ptr))[0..sect.nreloc]; @@ -1947,11 +1937,9 @@ const aarch64 = struct { ) !void { const gpa = macho_file.base.allocator; - const relocs_buffer = try gpa.alloc(u8, sect.nreloc * @sizeOf(macho.relocation_info)); - defer gpa.free(relocs_buffer); const offset = if (self.archive) |ar| ar.offset else 0; - const amt = try self.file.preadAll(relocs_buffer, offset + sect.reloff); - if (amt != relocs_buffer.len) return error.InputOutput; + const relocs_buffer = try self.preadAllAlloc(gpa, sect.reloff + offset, sect.nreloc * @sizeOf(macho.relocation_info)); + defer gpa.free(relocs_buffer); const relocs = @as([*]align(1) const macho.relocation_info, @ptrCast(relocs_buffer.ptr))[0..sect.nreloc]; From f0c8e241799cddf47e9d6998b6366717d86a180f Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 27 Jan 2024 15:27:06 +0100 Subject: [PATCH 08/22] macho: clean up allocations in DwarfInfo --- src/MachO/DwarfInfo.zig | 62 +++++++++++++++++++++++++---------------- src/MachO/Object.zig | 20 ++++++++----- 2 files changed, 51 insertions(+), 31 deletions(-) diff --git a/src/MachO/DwarfInfo.zig b/src/MachO/DwarfInfo.zig index db9d52dd..f9a9e458 100644 --- a/src/MachO/DwarfInfo.zig +++ b/src/MachO/DwarfInfo.zig @@ -1,39 +1,45 @@ -debug_info: []const u8, -debug_abbrev: []const u8, -debug_str: []const u8, - /// Abbreviation table indexed by offset in the .debug_abbrev bytestream abbrev_tables: std.AutoArrayHashMapUnmanaged(u64, AbbrevTable) = .{}, /// List of compile units as they appear in the .debug_info bytestream compile_units: std.ArrayListUnmanaged(CompileUnit) = .{}, - -pub fn init(dw: *DwarfInfo, allocator: Allocator) !void { - try dw.parseAbbrevTables(allocator); - try dw.parseCompileUnits(allocator); +/// Debug info string table +strtab: std.ArrayListUnmanaged(u8) = .{}, +/// Debug info data +di_data: std.ArrayListUnmanaged(u8) = .{}, + +pub fn init(dw: *DwarfInfo, allocator: Allocator, di: DebugInfo) !void { + try dw.strtab.ensureTotalCapacityPrecise(allocator, di.debug_str.len); + dw.strtab.appendSliceAssumeCapacity(di.debug_str); + try dw.parseAbbrevTables(allocator, di); + try dw.parseCompileUnits(allocator, di); } pub fn deinit(dw: *DwarfInfo, allocator: Allocator) void { - allocator.free(dw.debug_info); - allocator.free(dw.debug_abbrev); - allocator.free(dw.debug_str); - dw.abbrev_tables.deinit(allocator); for (dw.compile_units.items) |*cu| { cu.deinit(allocator); } dw.compile_units.deinit(allocator); + dw.strtab.deinit(allocator); +} + +fn appendDiData(dw: *DwarfInfo, allocator: Allocator, values: []const u8) error{OutOfMemory}!u32 { + const index: u32 = @intCast(dw.di_data.items.len); + try dw.di_data.ensureUnusedCapacity(allocator, values.len); + dw.di_data.appendSliceAssumeCapacity(values); + return index; } fn getString(dw: DwarfInfo, off: u64) [:0]const u8 { - assert(off < dw.debug_str.len); - return mem.sliceTo(@as([*:0]const u8, @ptrCast(dw.debug_str.ptr + off)), 0); + assert(off < dw.strtab.items.len); + return mem.sliceTo(@as([*:0]const u8, @ptrCast(dw.strtab.items.ptr + off)), 0); } -fn parseAbbrevTables(dw: *DwarfInfo, allocator: Allocator) !void { +fn parseAbbrevTables(dw: *DwarfInfo, allocator: Allocator, di: DebugInfo) !void { const tracy = trace(@src()); defer tracy.end(); - const debug_abbrev = dw.debug_abbrev; + const debug_abbrev = di.debug_abbrev; var stream = std.io.fixedBufferStream(debug_abbrev); var creader = std.io.countingReader(stream.reader()); const reader = creader.reader(); @@ -81,11 +87,11 @@ fn parseAbbrevTables(dw: *DwarfInfo, allocator: Allocator) !void { } } -fn parseCompileUnits(dw: *DwarfInfo, allocator: Allocator) !void { +fn parseCompileUnits(dw: *DwarfInfo, allocator: Allocator, di: DebugInfo) !void { const tracy = trace(@src()); defer tracy.end(); - const debug_info = dw.debug_info; + const debug_info = di.debug_info; var stream = std.io.fixedBufferStream(debug_info); var creader = std.io.countingReader(stream.reader()); const reader = creader.reader(); @@ -111,7 +117,7 @@ fn parseCompileUnits(dw: *DwarfInfo, allocator: Allocator) !void { cu.header.address_size = try reader.readInt(u8, .little); const table = dw.abbrev_tables.get(cu.header.debug_abbrev_offset).?; - try dw.parseDie(allocator, cu, table, null, &creader); + try dw.parseDie(allocator, cu, table, di, null, &creader); } } @@ -120,6 +126,7 @@ fn parseDie( allocator: Allocator, cu: *CompileUnit, table: AbbrevTable, + di: DebugInfo, parent: ?u32, creader: anytype, ) anyerror!void { @@ -144,19 +151,20 @@ fn parseDie( } const decl = table.decls.get(code) orelse return error.MalformedDwarf; // TODO better errors - const data = dw.debug_info; + const data = di.debug_info; try cu.diePtr(die).values.ensureTotalCapacityPrecise(allocator, decl.attrs.values().len); for (decl.attrs.values()) |attr| { const start = creader.bytes_read; try advanceByFormSize(cu, attr.form, creader); const end = creader.bytes_read; - cu.diePtr(die).values.appendAssumeCapacity(data[start..end]); + const index = try dw.appendDiData(allocator, data[start..end]); + cu.diePtr(die).values.appendAssumeCapacity(.{ .index = index, .len = @intCast(end - start) }); } if (decl.children) { // Open scope - try dw.parseDie(allocator, cu, table, die, creader); + try dw.parseDie(allocator, cu, table, di, die, creader); } } } @@ -342,7 +350,7 @@ pub const CompileUnit = struct { pub const Die = struct { code: Code, - values: std.ArrayListUnmanaged([]const u8) = .{}, + values: std.ArrayListUnmanaged(struct { index: u32, len: u32 }) = .{}, children: std.ArrayListUnmanaged(Die.Index) = .{}, pub fn deinit(die: *Die, gpa: Allocator) void { @@ -356,7 +364,7 @@ pub const Die = struct { const index = decl.attrs.getIndex(at) orelse return null; const attr = decl.attrs.values()[index]; const value = die.values.items[index]; - return .{ .attr = attr, .bytes = value }; + return .{ .attr = attr, .bytes = ctx.di_data.items[value.index..][0..value.len] }; } pub const Index = u32; @@ -459,6 +467,12 @@ pub const Format = enum { dwarf64, }; +const DebugInfo = struct { + debug_info: []const u8, + debug_abbrev: []const u8, + debug_str: []const u8, +}; + const assert = std.debug.assert; const dwarf = std.dwarf; const leb = std.leb; diff --git a/src/MachO/Object.zig b/src/MachO/Object.zig index 7fa12666..ca09dfe2 100644 --- a/src/MachO/Object.zig +++ b/src/MachO/Object.zig @@ -963,14 +963,20 @@ fn initDwarfInfo(self: *Object, macho_file: *MachO) !void { if (debug_info_index == null or debug_abbrev_index == null) return; - // TODO: do not extend DWARF sections live beyond this function - var dwarf_info = DwarfInfo{ - .debug_info = try self.getSectionData(gpa, @intCast(debug_info_index.?)), - .debug_abbrev = try self.getSectionData(gpa, @intCast(debug_abbrev_index.?)), - .debug_str = if (debug_str_index) |index| try self.getSectionData(gpa, @intCast(index)) else &[0]u8{}, - }; + const debug_info = try self.getSectionData(gpa, @intCast(debug_info_index.?)); + defer gpa.free(debug_info); + const debug_abbrev = try self.getSectionData(gpa, @intCast(debug_abbrev_index.?)); + defer gpa.free(debug_abbrev); + const debug_str = if (debug_str_index) |index| try self.getSectionData(gpa, @intCast(index)) else &[0]u8{}; + defer gpa.free(debug_str); + + var dwarf_info = DwarfInfo{}; errdefer dwarf_info.deinit(gpa); - dwarf_info.init(gpa) catch { + dwarf_info.init(gpa, .{ + .debug_info = debug_info, + .debug_abbrev = debug_abbrev, + .debug_str = debug_str, + }) catch { macho_file.base.fatal("{}: invalid __DWARF info found", .{self.fmtPath()}); return error.ParseFailed; }; From 3a166d7e90a7aacd5a75e289f57bf3de3fe182ef Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 27 Jan 2024 16:19:09 +0100 Subject: [PATCH 09/22] macho: store object strings in object's strtab --- src/MachO.zig | 1 + src/MachO/Atom.zig | 2 +- src/MachO/Dylib.zig | 2 ++ src/MachO/InternalObject.zig | 1 + src/MachO/Object.zig | 30 ++++++++++++++++++++---------- src/MachO/Symbol.zig | 11 ++++++++++- 6 files changed, 35 insertions(+), 12 deletions(-) diff --git a/src/MachO.zig b/src/MachO.zig index b152de61..9300093c 100644 --- a/src/MachO.zig +++ b/src/MachO.zig @@ -2885,6 +2885,7 @@ pub fn getOrCreateGlobal(self: *MachO, off: u32) !GetOrCreateGlobalResult { if (!gop.found_existing) { const index = try self.addSymbol(); const global = self.getSymbol(index); + global.flags.global = true; global.name = off; gop.value_ptr.* = index; } diff --git a/src/MachO/Atom.zig b/src/MachO/Atom.zig index 6e4db068..fb96f85a 100644 --- a/src/MachO/Atom.zig +++ b/src/MachO/Atom.zig @@ -38,7 +38,7 @@ unwind_records: Loc = .{}, flags: Flags = .{}, pub fn getName(self: Atom, macho_file: *MachO) [:0]const u8 { - return macho_file.string_intern.getAssumeExists(self.name); + return self.getFile(macho_file).object.getString(self.name); } pub fn getFile(self: Atom, macho_file: *MachO) File { diff --git a/src/MachO/Dylib.zig b/src/MachO/Dylib.zig index 4944c4d5..822d9057 100644 --- a/src/MachO/Dylib.zig +++ b/src/MachO/Dylib.zig @@ -484,8 +484,10 @@ pub fn resetGlobals(self: *Dylib, macho_file: *MachO) void { for (self.symbols.items) |sym_index| { const sym = macho_file.getSymbol(sym_index); const name = sym.name; + const global = sym.flags.global; sym.* = .{}; sym.name = name; + sym.flags.global = global; } } diff --git a/src/MachO/InternalObject.zig b/src/MachO/InternalObject.zig index e139e4ef..a66ef3fa 100644 --- a/src/MachO/InternalObject.zig +++ b/src/MachO/InternalObject.zig @@ -27,6 +27,7 @@ pub fn addSymbol(self: *InternalObject, name: [:0]const u8, macho_file: *MachO) self.symbols.addOneAssumeCapacity().* = gop.index; const sym = macho_file.getSymbol(gop.index); sym.* = .{ .name = off, .file = self.index }; + sym.flags.global = true; return gop.index; } diff --git a/src/MachO/Object.zig b/src/MachO/Object.zig index ca09dfe2..e1bb565a 100644 --- a/src/MachO/Object.zig +++ b/src/MachO/Object.zig @@ -237,7 +237,7 @@ fn initSubsections(self: *Object, nlists: anytype, macho_file: *MachO) !void { defer gpa.free(name); const size = if (nlist_start == nlist_end) sect.size else nlists[nlist_start].nlist.n_value - sect.addr; const atom_index = try self.addAtom(.{ - .name = name, + .name = try self.addString(gpa, name), .n_sect = @intCast(n_sect), .off = 0, .size = size, @@ -267,7 +267,7 @@ fn initSubsections(self: *Object, nlists: anytype, macho_file: *MachO) !void { else sect.@"align"; const atom_index = try self.addAtom(.{ - .name = self.getString(nlist.nlist.n_strx), + .name = nlist.nlist.n_strx, .n_sect = @intCast(n_sect), .off = nlist.nlist.n_value - sect.addr, .size = size, @@ -300,7 +300,7 @@ fn initSections(self: *Object, nlists: anytype, macho_file: *MachO) !void { defer gpa.free(name); const atom_index = try self.addAtom(.{ - .name = name, + .name = try self.addString(gpa, name), .n_sect = @intCast(n_sect), .off = 0, .size = sect.size, @@ -336,7 +336,7 @@ fn initSections(self: *Object, nlists: anytype, macho_file: *MachO) !void { } const AddAtomArgs = struct { - name: [:0]const u8, + name: u32, n_sect: u8, off: u64, size: u64, @@ -349,7 +349,7 @@ fn addAtom(self: *Object, args: AddAtomArgs, macho_file: *MachO) !Atom.Index { const atom = macho_file.getAtom(atom_index).?; atom.file = self.index; atom.atom_index = atom_index; - atom.name = try macho_file.string_intern.insert(gpa, args.name); + atom.name = args.name; atom.n_sect = args.n_sect; atom.size = args.size; atom.alignment = args.alignment; @@ -376,7 +376,7 @@ fn initLiteralSections(self: *Object, macho_file: *MachO) !void { defer gpa.free(name); const atom_index = try self.addAtom(.{ - .name = name, + .name = try self.addString(gpa, name), .n_sect = @intCast(n_sect), .off = 0, .size = sect.size, @@ -475,10 +475,9 @@ fn initSymbols(self: *Object, macho_file: *MachO) !void { const index = try macho_file.addSymbol(); self.symbols.appendAssumeCapacity(index); const symbol = macho_file.getSymbol(index); - const name = self.getString(nlist.n_strx); symbol.* = .{ .value = nlist.n_value, - .name = try macho_file.string_intern.insert(gpa, name), + .name = nlist.n_strx, .nlist_idx = @intCast(i), .atom = 0, .file = self.index, @@ -1044,8 +1043,10 @@ pub fn resetGlobals(self: *Object, macho_file: *MachO) void { if (!self.symtab.items(.nlist)[nlist_idx].ext()) continue; const sym = macho_file.getSymbol(sym_index); const name = sym.name; + const global = sym.flags.global; sym.* = .{}; sym.name = name; + sym.flags.global = global; } } @@ -1114,7 +1115,7 @@ pub fn convertTentativeDefinitions(self: *Object, macho_file: *MachO) !void { defer gpa.free(name); const atom = macho_file.getAtom(atom_index).?; atom.atom_index = atom_index; - atom.name = try macho_file.string_intern.insert(gpa, name); + atom.name = try self.addString(gpa, name); atom.file = self.index; atom.size = nlist.n_value; atom.alignment = (nlist.n_desc >> 8) & 0x0f; @@ -1128,6 +1129,7 @@ pub fn convertTentativeDefinitions(self: *Object, macho_file: *MachO) !void { sym.value = 0; sym.atom = atom_index; + sym.flags.global = true; sym.flags.weak = false; sym.flags.weak_ref = false; sym.flags.tentative = false; @@ -1527,7 +1529,15 @@ pub fn getSectionData(self: *const Object, allocator: Allocator, index: u32) ![] return self.preadAllAlloc(allocator, sect.offset + offset, sect.size); } -fn getString(self: Object, off: u32) [:0]const u8 { +fn addString(self: *Object, allocator: Allocator, name: [:0]const u8) error{OutOfMemory}!u32 { + const off: u32 = @intCast(self.strtab.items.len); + try self.strtab.ensureUnusedCapacity(allocator, name.len + 1); + self.strtab.appendSliceAssumeCapacity(name); + self.strtab.appendAssumeCapacity(0); + return off; +} + +pub fn getString(self: Object, off: u32) [:0]const u8 { assert(off < self.strtab.items.len); return mem.sliceTo(@as([*:0]const u8, @ptrCast(self.strtab.items.ptr + off)), 0); } diff --git a/src/MachO/Symbol.zig b/src/MachO/Symbol.zig index 7f9c500d..3c6d0b3c 100644 --- a/src/MachO/Symbol.zig +++ b/src/MachO/Symbol.zig @@ -55,7 +55,11 @@ pub fn weakRef(symbol: Symbol, macho_file: *MachO) bool { } pub fn getName(symbol: Symbol, macho_file: *MachO) [:0]const u8 { - return macho_file.string_intern.getAssumeExists(symbol.name); + if (symbol.flags.global) return macho_file.string_intern.getAssumeExists(symbol.name); + return switch (symbol.getFile(macho_file).?) { + .object => |x| x.getString(symbol.name), + else => macho_file.string_intern.getAssumeExists(symbol.name), + }; } pub fn getAtom(symbol: Symbol, macho_file: *MachO) ?*Atom { @@ -315,6 +319,11 @@ pub const Flags = packed struct { /// Whether the symbol is exported at runtime. @"export": bool = false, + /// Whether the symbol is effectively an extern and takes part in global + /// symbol resolution. Then, its name will be saved in global string interning + /// table. + global: bool = false, + /// Whether this symbol is weak. weak: bool = false, From 7b33275ed15a2fd1ef026ce573eceb9b0be82e70 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 27 Jan 2024 16:44:00 +0100 Subject: [PATCH 10/22] macho: implement local strtab for internal object --- src/MachO/Atom.zig | 5 ++++- src/MachO/InternalObject.zig | 19 +++++++++++++++++-- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/src/MachO/Atom.zig b/src/MachO/Atom.zig index fb96f85a..6a9ca808 100644 --- a/src/MachO/Atom.zig +++ b/src/MachO/Atom.zig @@ -38,7 +38,10 @@ unwind_records: Loc = .{}, flags: Flags = .{}, pub fn getName(self: Atom, macho_file: *MachO) [:0]const u8 { - return self.getFile(macho_file).object.getString(self.name); + return switch (self.getFile(macho_file)) { + .dylib => unreachable, + inline else => |x| x.getString(self.name), + }; } pub fn getFile(self: Atom, macho_file: *MachO) File { diff --git a/src/MachO/InternalObject.zig b/src/MachO/InternalObject.zig index a66ef3fa..faa64285 100644 --- a/src/MachO/InternalObject.zig +++ b/src/MachO/InternalObject.zig @@ -3,6 +3,7 @@ index: File.Index, sections: std.MultiArrayList(Section) = .{}, atoms: std.ArrayListUnmanaged(Atom.Index) = .{}, symbols: std.ArrayListUnmanaged(Symbol.Index) = .{}, +strtab: std.ArrayListUnmanaged(u8) = .{}, objc_methnames: std.ArrayListUnmanaged(u8) = .{}, objc_selrefs: [@sizeOf(u64)]u8 = [_]u8{0} ** @sizeOf(u64), @@ -16,6 +17,7 @@ pub fn deinit(self: *InternalObject, allocator: Allocator) void { self.sections.deinit(allocator); self.atoms.deinit(allocator); self.symbols.deinit(allocator); + self.strtab.deinit(allocator); self.objc_methnames.deinit(allocator); } @@ -46,7 +48,7 @@ fn addObjcMethnameSection(self: *InternalObject, methname: []const u8, macho_fil defer gpa.free(name); const atom = macho_file.getAtom(atom_index).?; atom.atom_index = atom_index; - atom.name = try macho_file.string_intern.insert(gpa, name); + atom.name = try self.addString(gpa, name); atom.file = self.index; atom.size = methname.len + 1; atom.alignment = 0; @@ -80,7 +82,7 @@ fn addObjcSelrefsSection( defer gpa.free(name); const atom = macho_file.getAtom(atom_index).?; atom.atom_index = atom_index; - atom.name = try macho_file.string_intern.insert(gpa, name); + atom.name = try self.addString(gpa, name); atom.file = self.index; atom.size = @sizeOf(u64); atom.alignment = 3; @@ -171,6 +173,19 @@ pub fn getSectionData(self: *const InternalObject, index: u32) []const u8 { } else @panic("ref to non-existent section"); } +fn addString(self: *InternalObject, allocator: Allocator, name: [:0]const u8) error{OutOfMemory}!u32 { + const off: u32 = @intCast(self.strtab.items.len); + try self.strtab.ensureUnusedCapacity(allocator, name.len + 1); + self.strtab.appendSliceAssumeCapacity(name); + self.strtab.appendAssumeCapacity(0); + return off; +} + +pub fn getString(self: InternalObject, off: u32) [:0]const u8 { + assert(off < self.strtab.items.len); + return mem.sliceTo(@as([*:0]const u8, @ptrCast(self.strtab.items.ptr + off)), 0); +} + pub fn asFile(self: *InternalObject) File { return .{ .internal = self }; } From ff2c70d30cdf5c896f4dc801d9a3ac9053ca29fd Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 27 Jan 2024 19:53:16 +0100 Subject: [PATCH 11/22] macho: clean up managing objc synthetic globals in internal object --- src/MachO.zig | 9 ++++----- src/MachO/InternalObject.zig | 3 +-- src/MachO/Symbol.zig | 4 ++-- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/src/MachO.zig b/src/MachO.zig index 9300093c..f8c6f1e1 100644 --- a/src/MachO.zig +++ b/src/MachO.zig @@ -1240,18 +1240,17 @@ fn createObjcSections(self: *MachO) !void { } for (objc_msgsend_syms.keys()) |sym_index| { + const internal = self.getInternalObject().?; const sym = self.getSymbol(sym_index); + _ = try internal.addSymbol(sym.getName(self), self); sym.value = 0; sym.atom = 0; sym.nlist_idx = 0; - sym.file = self.internal_object_index.?; - sym.flags = .{}; + sym.flags = .{ .global = true }; sym.visibility = .hidden; - const object = self.getInternalObject().?; const name = eatPrefix(sym.getName(self), "_objc_msgSend$").?; - const selrefs_index = try object.addObjcMsgsendSections(name, self); + const selrefs_index = try internal.addObjcMsgsendSections(name, self); try sym.addExtra(.{ .objc_selrefs = selrefs_index }, self); - try object.symbols.append(gpa, sym_index); } } diff --git a/src/MachO/InternalObject.zig b/src/MachO/InternalObject.zig index faa64285..c7e799ce 100644 --- a/src/MachO/InternalObject.zig +++ b/src/MachO/InternalObject.zig @@ -28,8 +28,7 @@ pub fn addSymbol(self: *InternalObject, name: [:0]const u8, macho_file: *MachO) const gop = try macho_file.getOrCreateGlobal(off); self.symbols.addOneAssumeCapacity().* = gop.index; const sym = macho_file.getSymbol(gop.index); - sym.* = .{ .name = off, .file = self.index }; - sym.flags.global = true; + sym.file = self.index; return gop.index; } diff --git a/src/MachO/Symbol.zig b/src/MachO/Symbol.zig index 3c6d0b3c..50657ed8 100644 --- a/src/MachO/Symbol.zig +++ b/src/MachO/Symbol.zig @@ -57,8 +57,8 @@ pub fn weakRef(symbol: Symbol, macho_file: *MachO) bool { pub fn getName(symbol: Symbol, macho_file: *MachO) [:0]const u8 { if (symbol.flags.global) return macho_file.string_intern.getAssumeExists(symbol.name); return switch (symbol.getFile(macho_file).?) { - .object => |x| x.getString(symbol.name), - else => macho_file.string_intern.getAssumeExists(symbol.name), + .dylib => unreachable, // There are no local symbols for dylibs + inline else => |x| x.getString(symbol.name), }; } From 6dbe6424d33b4da755e9e4fe39f498016d7f4655 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 27 Jan 2024 20:31:23 +0100 Subject: [PATCH 12/22] macho: do not allocate full dylibs in memory --- src/MachO.zig | 31 +++++++------------ src/MachO/Dylib.zig | 72 ++++++++++++++++++++++++++------------------- 2 files changed, 53 insertions(+), 50 deletions(-) diff --git a/src/MachO.zig b/src/MachO.zig index f8c6f1e1..6394ec32 100644 --- a/src/MachO.zig +++ b/src/MachO.zig @@ -313,7 +313,7 @@ pub fn flush(self: *MachO) !void { var has_parse_error = false; for (resolved_objects.items) |obj| { - self.parsePositional(arena, obj) catch |err| { + self.parsePositional(obj) catch |err| { has_parse_error = true; switch (err) { error.ParseFailed => {}, // already reported @@ -661,12 +661,12 @@ fn addUndefinedGlobals(self: *MachO) !void { } } -fn parsePositional(self: *MachO, arena: Allocator, obj: LinkObject) !void { +fn parsePositional(self: *MachO, obj: LinkObject) !void { log.debug("parsing positional {}", .{obj}); if (try self.parseObject(obj)) return; if (try self.parseArchive(obj)) return; - if (try self.parseDylib(arena, obj, true)) |_| return; + if (try self.parseDylib(obj, true)) |_| return; if (try self.parseTbd(obj, true)) |_| return; self.base.fatal("unknown filetype for positional argument: '{s}'", .{obj.path}); @@ -785,7 +785,7 @@ const DylibOpts = struct { reexport: bool = false, }; -fn parseDylib(self: *MachO, arena: Allocator, obj: LinkObject, explicit: bool) anyerror!?File.Index { +fn parseDylib(self: *MachO, obj: LinkObject, explicit: bool) anyerror!?File.Index { const tracy = trace(@src()); defer tracy.end(); @@ -799,31 +799,23 @@ fn parseDylib(self: *MachO, arena: Allocator, obj: LinkObject, explicit: bool) a const file = try std.fs.cwd().openFile(obj.path, .{}); defer file.close(); - var offset: u64 = 0; - var size: u64 = (try file.stat()).size; - if (fat.isFatLibrary(file)) { - const fat_arch = self.parseFatLibrary(obj.path, file) catch |err| switch (err) { + const fat_arch = if (fat.isFatLibrary(file)) blk: { + break :blk self.parseFatLibrary(obj.path, file) catch |err| switch (err) { error.NoArchSpecified, error.MissingArch => return null, else => |e| return e, }; - offset = fat_arch.offset; - size = fat_arch.size; - try file.seekTo(offset); - } + } else null; + const offset = if (fat_arch) |ar| ar.offset else 0; + try file.seekTo(offset); const header = file.reader().readStruct(macho.mach_header_64) catch return null; try file.seekTo(offset); if (header.filetype != macho.MH_DYLIB) return null; - const data = try arena.alloc(u8, size); - const nread = try file.readAll(data); - if (nread != size) return error.InputOutput; - const index = @as(File.Index, @intCast(try self.files.addOne(gpa))); self.files.set(index, .{ .dylib = .{ .path = obj.path, - .data = data, .index = index, .needed = obj.needed, .weak = obj.weak, @@ -831,7 +823,7 @@ fn parseDylib(self: *MachO, arena: Allocator, obj: LinkObject, explicit: bool) a .explicit = explicit, } }); const dylib = &self.files.items(.data)[index].dylib; - try dylib.parse(self); + try dylib.parse(self, file, fat_arch); try self.dylibs.append(gpa, index); self.validateCpuArch(index); @@ -861,7 +853,6 @@ fn parseTbd(self: *MachO, obj: LinkObject, explicit: bool) anyerror!?File.Index const index = @as(File.Index, @intCast(try self.files.addOne(gpa))); self.files.set(index, .{ .dylib = .{ .path = obj.path, - .data = &[0]u8{}, .index = index, .needed = obj.needed, .weak = obj.weak, @@ -989,7 +980,7 @@ fn parseDependentDylibs( .weak = is_weak, }; const file_index = file_index: { - if (try self.parseDylib(arena, link_obj, false)) |file| break :file_index file; + if (try self.parseDylib(link_obj, false)) |file| break :file_index file; if (try self.parseTbd(link_obj, false)) |file| break :file_index file; break :file_index @as(File.Index, 0); }; diff --git a/src/MachO/Dylib.zig b/src/MachO/Dylib.zig index 822d9057..8b50ae75 100644 --- a/src/MachO/Dylib.zig +++ b/src/MachO/Dylib.zig @@ -1,5 +1,4 @@ path: []const u8, -data: []const u8, index: File.Index, header: ?macho.mach_header_64 = null, @@ -32,54 +31,77 @@ pub fn deinit(self: *Dylib, allocator: Allocator) void { id.deinit(allocator); } self.dependents.deinit(allocator); + for (self.rpaths.keys()) |rpath| { + allocator.free(rpath); + } self.rpaths.deinit(allocator); } -pub fn parse(self: *Dylib, macho_file: *MachO) !void { +pub fn parse(self: *Dylib, macho_file: *MachO, file: std.fs.File, fat_arch: ?fat.Arch) !void { const tracy = trace(@src()); defer tracy.end(); const gpa = macho_file.base.allocator; - var stream = std.io.fixedBufferStream(self.data); - const reader = stream.reader(); + const offset = if (fat_arch) |ar| ar.offset else 0; log.debug("parsing dylib from binary", .{}); - self.header = try reader.readStruct(macho.mach_header_64); + self.header = try file.reader().readStruct(macho.mach_header_64); - const lc_id = self.getLoadCommand(.ID_DYLIB) orelse { - macho_file.base.fatal("{s}: missing LC_ID_DYLIB load command", .{self.path}); - return error.ParseFailed; - }; - self.id = try Id.fromLoadCommand(gpa, lc_id.cast(macho.dylib_command).?, lc_id.getDylibPathName()); + const lc_buffer = try gpa.alloc(u8, self.header.?.sizeofcmds); + defer gpa.free(lc_buffer); + { + const amt = try file.preadAll(lc_buffer, offset + @sizeOf(macho.mach_header_64)); + if (amt != lc_buffer.len) return error.InputOutput; + } var it = LoadCommandIterator{ .ncmds = self.header.?.ncmds, - .buffer = self.data[@sizeOf(macho.mach_header_64)..][0..self.header.?.sizeofcmds], + .buffer = lc_buffer, }; - while (it.next()) |cmd| switch (cmd.cmd()) { + while (it.next()) |lc| switch (lc.cmd()) { + .ID_DYLIB => { + self.id = try Id.fromLoadCommand(gpa, lc.cast(macho.dylib_command).?, lc.getDylibPathName()); + }, .REEXPORT_DYLIB => if (self.header.?.flags & macho.MH_NO_REEXPORTED_DYLIBS == 0) { - const id = try Id.fromLoadCommand(gpa, cmd.cast(macho.dylib_command).?, cmd.getDylibPathName()); + const id = try Id.fromLoadCommand(gpa, lc.cast(macho.dylib_command).?, lc.getDylibPathName()); try self.dependents.append(gpa, id); }, .DYLD_INFO_ONLY => { - const dyld_cmd = cmd.cast(macho.dyld_info_command).?; - const data = self.data[dyld_cmd.export_off..][0..dyld_cmd.export_size]; + const dyld_cmd = lc.cast(macho.dyld_info_command).?; + const data = try gpa.alloc(u8, dyld_cmd.export_size); + defer gpa.free(data); + const amt = try file.preadAll(data, dyld_cmd.export_off + offset); + if (amt != data.len) return error.InputOutput; try self.parseTrie(data, macho_file); }, .DYLD_EXPORTS_TRIE => { - const ld_cmd = cmd.cast(macho.linkedit_data_command).?; - const data = self.data[ld_cmd.dataoff..][0..ld_cmd.datasize]; + const ld_cmd = lc.cast(macho.linkedit_data_command).?; + const data = try gpa.alloc(u8, ld_cmd.datasize); + defer gpa.free(data); + const amt = try file.preadAll(data, ld_cmd.dataoff + offset); + if (amt != data.len) return error.InputOutput; try self.parseTrie(data, macho_file); }, .RPATH => { - const path = cmd.getRpathPathName(); - try self.rpaths.put(gpa, path, {}); + const path = lc.getRpathPathName(); + try self.rpaths.put(gpa, try gpa.dupe(u8, path), {}); + }, + .BUILD_VERSION, + .VERSION_MIN_MACOSX, + .VERSION_MIN_IPHONEOS, + .VERSION_MIN_TVOS, + .VERSION_MIN_WATCHOS, + => { + self.platform = MachO.Options.Platform.fromLoadCommand(lc); }, else => {}, }; - self.initPlatform(); + if (self.id == null) { + macho_file.base.fatal("{s}: missing LC_ID_DYLIB load command", .{self.path}); + return error.ParseFailed; + } } const TrieIterator = struct { @@ -549,16 +571,6 @@ pub inline fn getUmbrella(self: Dylib, macho_file: *MachO) *Dylib { return macho_file.getFile(self.umbrella).?.dylib; } -fn getLoadCommand(self: Dylib, lc: macho.LC) ?LoadCommandIterator.LoadCommand { - var it = LoadCommandIterator{ - .ncmds = self.header.?.ncmds, - .buffer = self.data[@sizeOf(macho.mach_header_64)..][0..self.header.?.sizeofcmds], - }; - while (it.next()) |cmd| { - if (cmd.cmd() == lc) return cmd; - } else return null; -} - fn insertString(self: *Dylib, allocator: Allocator, name: []const u8) !u32 { const off = @as(u32, @intCast(self.strtab.items.len)); try self.strtab.writer(allocator).print("{s}\x00", .{name}); From 3dbc49bfd9d1f04a3eec48c3ace0806a169000b9 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 27 Jan 2024 20:37:54 +0100 Subject: [PATCH 13/22] macho: do not store macho header in Dylib --- src/MachO.zig | 16 +++++----------- src/MachO/Dylib.zig | 27 ++++----------------------- 2 files changed, 9 insertions(+), 34 deletions(-) diff --git a/src/MachO.zig b/src/MachO.zig index 6394ec32..29c4bdf2 100644 --- a/src/MachO.zig +++ b/src/MachO.zig @@ -582,13 +582,7 @@ fn inferCpuArchAndPlatform(self: *MachO, obj: LinkObject, platforms: anytype) !v } else null; } -fn validateCpuArch(self: *MachO, index: File.Index) void { - const file = self.getFile(index).?; - const cputype = switch (file) { - .object => |x| x.header.?.cputype, - .dylib => |x| x.header.?.cputype, - else => unreachable, - }; +fn validateCpuArch(self: *MachO, index: File.Index, cputype: macho.cpu_type_t) void { const cpu_arch: std.Target.Cpu.Arch = switch (cputype) { macho.CPU_TYPE_ARM64 => .aarch64, macho.CPU_TYPE_X86_64 => .x86_64, @@ -596,7 +590,7 @@ fn validateCpuArch(self: *MachO, index: File.Index) void { }; if (self.options.cpu_arch.? != cpu_arch) { return self.base.fatal("{}: invalid architecture '{s}', expected '{s}'", .{ - file.fmtPath(), + self.getFile(index).?.fmtPath(), @tagName(cpu_arch), @tagName(self.options.cpu_arch.?), }); @@ -699,7 +693,7 @@ fn parseObject(self: *MachO, obj: LinkObject) !bool { const object = &self.files.items(.data)[index].object; try object.parse(self); try self.objects.append(gpa, index); - self.validateCpuArch(index); + self.validateCpuArch(index, header.cputype); self.validatePlatform(index); return true; @@ -747,7 +741,7 @@ fn parseArchive(self: *MachO, obj: LinkObject) !bool { }; try self.objects.append(gpa, index); // TODO this should come before reporting any parse errors - self.validateCpuArch(index); + self.validateCpuArch(index, object.header.?.cputype); self.validatePlatform(index); // Finally, we do a post-parse check for -ObjC to see if we need to force load this member @@ -826,7 +820,7 @@ fn parseDylib(self: *MachO, obj: LinkObject, explicit: bool) anyerror!?File.Inde try dylib.parse(self, file, fat_arch); try self.dylibs.append(gpa, index); - self.validateCpuArch(index); + self.validateCpuArch(index, header.cputype); self.validatePlatform(index); return index; diff --git a/src/MachO/Dylib.zig b/src/MachO/Dylib.zig index 8b50ae75..e3f17a59 100644 --- a/src/MachO/Dylib.zig +++ b/src/MachO/Dylib.zig @@ -1,7 +1,6 @@ path: []const u8, index: File.Index, -header: ?macho.mach_header_64 = null, exports: std.MultiArrayList(Export) = .{}, strtab: std.ArrayListUnmanaged(u8) = .{}, id: ?Id = null, @@ -46,9 +45,9 @@ pub fn parse(self: *Dylib, macho_file: *MachO, file: std.fs.File, fat_arch: ?fat log.debug("parsing dylib from binary", .{}); - self.header = try file.reader().readStruct(macho.mach_header_64); + const header = try file.reader().readStruct(macho.mach_header_64); - const lc_buffer = try gpa.alloc(u8, self.header.?.sizeofcmds); + const lc_buffer = try gpa.alloc(u8, header.sizeofcmds); defer gpa.free(lc_buffer); { const amt = try file.preadAll(lc_buffer, offset + @sizeOf(macho.mach_header_64)); @@ -56,14 +55,14 @@ pub fn parse(self: *Dylib, macho_file: *MachO, file: std.fs.File, fat_arch: ?fat } var it = LoadCommandIterator{ - .ncmds = self.header.?.ncmds, + .ncmds = header.ncmds, .buffer = lc_buffer, }; while (it.next()) |lc| switch (lc.cmd()) { .ID_DYLIB => { self.id = try Id.fromLoadCommand(gpa, lc.cast(macho.dylib_command).?, lc.getDylibPathName()); }, - .REEXPORT_DYLIB => if (self.header.?.flags & macho.MH_NO_REEXPORTED_DYLIBS == 0) { + .REEXPORT_DYLIB => if (header.flags & macho.MH_NO_REEXPORTED_DYLIBS == 0) { const id = try Id.fromLoadCommand(gpa, lc.cast(macho.dylib_command).?, lc.getDylibPathName()); try self.dependents.append(gpa, id); }, @@ -459,24 +458,6 @@ pub fn initSymbols(self: *Dylib, macho_file: *MachO) !void { } } -fn initPlatform(self: *Dylib) void { - var it = LoadCommandIterator{ - .ncmds = self.header.?.ncmds, - .buffer = self.data[@sizeOf(macho.mach_header_64)..][0..self.header.?.sizeofcmds], - }; - self.platform = while (it.next()) |cmd| { - switch (cmd.cmd()) { - .BUILD_VERSION, - .VERSION_MIN_MACOSX, - .VERSION_MIN_IPHONEOS, - .VERSION_MIN_TVOS, - .VERSION_MIN_WATCHOS, - => break MachO.Options.Platform.fromLoadCommand(cmd), - else => {}, - } - } else null; -} - pub fn resolveSymbols(self: *Dylib, macho_file: *MachO) void { const tracy = trace(@src()); defer tracy.end(); From c3131463b6843ebff7ed129a346cc5616129ddd9 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 27 Jan 2024 20:41:35 +0100 Subject: [PATCH 14/22] macho: do not use file.reader() in Dylib --- src/MachO/Dylib.zig | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/MachO/Dylib.zig b/src/MachO/Dylib.zig index e3f17a59..16b4d3d8 100644 --- a/src/MachO/Dylib.zig +++ b/src/MachO/Dylib.zig @@ -45,7 +45,12 @@ pub fn parse(self: *Dylib, macho_file: *MachO, file: std.fs.File, fat_arch: ?fat log.debug("parsing dylib from binary", .{}); - const header = try file.reader().readStruct(macho.mach_header_64); + var header_buffer: [@sizeOf(macho.mach_header_64)]u8 = undefined; + { + const amt = try file.preadAll(&header_buffer, offset); + if (amt != @sizeOf(macho.mach_header_64)) return error.InputOutput; + } + const header = @as(*align(1) const macho.mach_header_64, @ptrCast(&header_buffer)).*; const lc_buffer = try gpa.alloc(u8, header.sizeofcmds); defer gpa.free(lc_buffer); From 9f4e19af50f0fb97c1bf867e6f3be60237237d77 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 27 Jan 2024 20:43:02 +0100 Subject: [PATCH 15/22] macho: rename Dylib.insertString to Dylib.addString --- src/MachO/Dylib.zig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/MachO/Dylib.zig b/src/MachO/Dylib.zig index 16b4d3d8..b55bd651 100644 --- a/src/MachO/Dylib.zig +++ b/src/MachO/Dylib.zig @@ -150,7 +150,7 @@ const TrieIterator = struct { pub fn addExport(self: *Dylib, allocator: Allocator, name: []const u8, flags: Export.Flags) !void { try self.exports.append(allocator, .{ - .name = try self.insertString(allocator, name), + .name = try self.addString(allocator, name), .flags = flags, }); } @@ -557,7 +557,7 @@ pub inline fn getUmbrella(self: Dylib, macho_file: *MachO) *Dylib { return macho_file.getFile(self.umbrella).?.dylib; } -fn insertString(self: *Dylib, allocator: Allocator, name: []const u8) !u32 { +fn addString(self: *Dylib, allocator: Allocator, name: []const u8) !u32 { const off = @as(u32, @intCast(self.strtab.items.len)); try self.strtab.writer(allocator).print("{s}\x00", .{name}); return off; From 1c95f5fd6bdbe327c756b0f2fb3dbdead9b8c4a8 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 27 Jan 2024 21:28:51 +0100 Subject: [PATCH 16/22] macho: remove obsolete helpers from Object --- src/MachO/Atom.zig | 20 ++------------- src/MachO/Object.zig | 60 +++++++++++++++++++++++++++----------------- 2 files changed, 39 insertions(+), 41 deletions(-) diff --git a/src/MachO/Atom.zig b/src/MachO/Atom.zig index 6a9ca808..865167f9 100644 --- a/src/MachO/Atom.zig +++ b/src/MachO/Atom.zig @@ -72,7 +72,8 @@ pub fn getCode(self: Atom, macho_file: *MachO, buffer: []u8) !void { const slice = x.sections.slice(); const offset = if (x.archive) |ar| ar.offset else 0; const sect = slice.items(.header)[self.n_sect]; - try x.preadAll(buffer, sect.offset + offset + self.off); + const amt = try x.file.preadAll(buffer, sect.offset + offset + self.off); + if (amt != buffer.len) return error.InputOutput; }, .internal => |x| { const code = x.getSectionData(self.n_sect); @@ -81,23 +82,6 @@ pub fn getCode(self: Atom, macho_file: *MachO, buffer: []u8) !void { } } -pub fn getCodeAlloc(self: Atom, macho_file: *MachO) ![]const u8 { - const gpa = macho_file.base.allocator; - switch (self.getFile(macho_file)) { - .dylib => unreachable, - .object => |x| { - const slice = x.sections.slice(); - const offset = if (x.archive) |ar| ar.offset else 0; - const sect = slice.items(.header)[self.n_sect]; - return x.preadAllAlloc(gpa, sect.offset + offset + self.off, self.size); - }, - .internal => |x| { - const code = x.getSectionData(self.n_sect); - return gpa.dupe(u8, code[self.off..][0..self.size]); - }, - } -} - pub fn getRelocs(self: Atom, macho_file: *MachO) []const Relocation { const relocs = switch (self.getFile(macho_file)) { .dylib => unreachable, diff --git a/src/MachO/Object.zig b/src/MachO/Object.zig index e1bb565a..cb008666 100644 --- a/src/MachO/Object.zig +++ b/src/MachO/Object.zig @@ -75,12 +75,16 @@ pub fn parse(self: *Object, macho_file: *MachO) !void { self.header = try reader.readStruct(macho.mach_header_64); - const lc_data = try self.preadAllAlloc(gpa, offset + @sizeOf(macho.mach_header_64), self.header.?.sizeofcmds); - defer gpa.free(lc_data); + const lc_buffer = try gpa.alloc(u8, self.header.?.sizeofcmds); + defer gpa.free(lc_buffer); + { + const amt = try self.file.preadAll(lc_buffer, offset + @sizeOf(macho.mach_header_64)); + if (amt != self.header.?.sizeofcmds) return error.InputOutput; + } var it = LoadCommandIterator{ .ncmds = self.header.?.ncmds, - .buffer = lc_data, + .buffer = lc_buffer, }; while (it.next()) |lc| switch (lc.cmd()) { .SEGMENT_64 => { @@ -100,9 +104,17 @@ pub fn parse(self: *Object, macho_file: *MachO) !void { .SYMTAB => { const cmd = lc.cast(macho.symtab_command).?; try self.strtab.resize(gpa, cmd.strsize); - try self.preadAll(self.strtab.items, cmd.stroff + offset); - const symtab_buffer = try self.preadAllAlloc(gpa, cmd.symoff + offset, cmd.nsyms * @sizeOf(macho.nlist_64)); + { + const amt = try self.file.preadAll(self.strtab.items, cmd.stroff + offset); + if (amt != self.strtab.items.len) return error.InputOutput; + } + + const symtab_buffer = try gpa.alloc(u8, cmd.nsyms * @sizeOf(macho.nlist_64)); defer gpa.free(symtab_buffer); + { + const amt = try self.file.preadAll(symtab_buffer, cmd.symoff + offset); + if (amt != symtab_buffer.len) return error.InputOutput; + } const symtab = @as([*]align(1) const macho.nlist_64, @ptrCast(symtab_buffer.ptr))[0..cmd.nsyms]; try self.symtab.ensureUnusedCapacity(gpa, symtab.len); for (symtab) |nlist| { @@ -115,8 +127,12 @@ pub fn parse(self: *Object, macho_file: *MachO) !void { }, .DATA_IN_CODE => { const cmd = lc.cast(macho.linkedit_data_command).?; - const buffer = try self.preadAllAlloc(gpa, offset + cmd.dataoff, cmd.datasize); + const buffer = try gpa.alloc(u8, cmd.datasize); defer gpa.free(buffer); + { + const amt = try self.file.preadAll(buffer, offset + cmd.dataoff); + if (amt != buffer.len) return error.InputOutput; + } const ndice = @divExact(cmd.datasize, @sizeOf(macho.data_in_code_entry)); const dice = @as([*]align(1) const macho.data_in_code_entry, @ptrCast(buffer.ptr))[0..ndice]; try self.data_in_code.appendUnalignedSlice(gpa, dice); @@ -1509,24 +1525,16 @@ pub fn writeStabs(self: *const Object, macho_file: *MachO) void { } } -pub fn preadAll(self: *const Object, buf: []u8, off: usize) !void { - const amt = try self.file.preadAll(buf, off); - if (amt != buf.len) return error.InputOutput; -} - -pub fn preadAllAlloc(self: *const Object, allocator: Allocator, off: usize, size: usize) ![]u8 { - const buffer = try allocator.alloc(u8, size); - errdefer allocator.free(buffer); - try self.preadAll(buffer, off); - return buffer; -} - pub fn getSectionData(self: *const Object, allocator: Allocator, index: u32) ![]u8 { const slice = self.sections.slice(); assert(index < slice.items(.header).len); const sect = slice.items(.header)[index]; const offset = if (self.archive) |ar| ar.offset else 0; - return self.preadAllAlloc(allocator, sect.offset + offset, sect.size); + const buffer = try allocator.alloc(u8, sect.size); + errdefer allocator.free(buffer); + const amt = try self.file.preadAll(buffer, sect.offset + offset); + if (amt != buffer.len) return error.InputOutput; + return buffer; } fn addString(self: *Object, allocator: Allocator, name: [:0]const u8) error{OutOfMemory}!u32 { @@ -1799,9 +1807,12 @@ const x86_64 = struct { const gpa = macho_file.base.allocator; const offset = if (self.archive) |ar| ar.offset else 0; - const relocs_buffer = try self.preadAllAlloc(gpa, sect.reloff + offset, sect.nreloc * @sizeOf(macho.relocation_info)); + const relocs_buffer = try gpa.alloc(u8, sect.nreloc * @sizeOf(macho.relocation_info)); defer gpa.free(relocs_buffer); - + { + const amt = try self.file.preadAll(relocs_buffer, sect.reloff + offset); + if (amt != relocs_buffer.len) return error.InputOutput; + } const relocs = @as([*]align(1) const macho.relocation_info, @ptrCast(relocs_buffer.ptr))[0..sect.nreloc]; const code = try self.getSectionData(gpa, @intCast(n_sect)); @@ -1954,9 +1965,12 @@ const aarch64 = struct { const gpa = macho_file.base.allocator; const offset = if (self.archive) |ar| ar.offset else 0; - const relocs_buffer = try self.preadAllAlloc(gpa, sect.reloff + offset, sect.nreloc * @sizeOf(macho.relocation_info)); + const relocs_buffer = try gpa.alloc(u8, sect.nreloc * @sizeOf(macho.relocation_info)); defer gpa.free(relocs_buffer); - + { + const amt = try self.file.preadAll(relocs_buffer, sect.reloff + offset); + if (amt != relocs_buffer.len) return error.InputOutput; + } const relocs = @as([*]align(1) const macho.relocation_info, @ptrCast(relocs_buffer.ptr))[0..sect.nreloc]; const code = try self.getSectionData(gpa, @intCast(n_sect)); From 6235ff0f108dfba6c58c136cf8642ff83ffd5941 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 27 Jan 2024 21:30:51 +0100 Subject: [PATCH 17/22] macho: remove file.reader() from Object --- src/MachO/Object.zig | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/MachO/Object.zig b/src/MachO/Object.zig index cb008666..21e159bd 100644 --- a/src/MachO/Object.zig +++ b/src/MachO/Object.zig @@ -69,11 +69,14 @@ pub fn parse(self: *Object, macho_file: *MachO) !void { log.debug("parsing input object file {}", .{self.fmtPath()}); const gpa = macho_file.base.allocator; - const reader = self.file.reader(); const offset = if (self.archive) |ar| ar.offset else 0; - try self.file.seekTo(offset); - self.header = try reader.readStruct(macho.mach_header_64); + var header_buffer: [@sizeOf(macho.mach_header_64)]u8 = undefined; + { + const amt = try self.file.preadAll(&header_buffer, offset); + if (amt != @sizeOf(macho.mach_header_64)) return error.InputOutput; + } + self.header = @as(*align(1) const macho.mach_header_64, @ptrCast(&header_buffer)).*; const lc_buffer = try gpa.alloc(u8, self.header.?.sizeofcmds); defer gpa.free(lc_buffer); From a786dc8d7f275628cc6948913194bde0f9f6f05e Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 27 Jan 2024 21:34:16 +0100 Subject: [PATCH 18/22] macho: deinit pruned dylibs --- src/MachO.zig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/MachO.zig b/src/MachO.zig index 29c4bdf2..69940dd9 100644 --- a/src/MachO.zig +++ b/src/MachO.zig @@ -1086,6 +1086,8 @@ fn deadStripDylibs(self: *MachO) void { const index = self.dylibs.items[i]; if (!self.getFile(index).?.dylib.isAlive(self)) { _ = self.dylibs.orderedRemove(i); + self.files.items(.data)[index].dylib.deinit(self.base.allocator); + self.files.set(index, .null); } else i += 1; } } From a9ced4eae5ceed8fcff8ad6cca6273512c461f54 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 27 Jan 2024 21:36:01 +0100 Subject: [PATCH 19/22] macho: do not store arena allocator in global scope --- src/MachO.zig | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/MachO.zig b/src/MachO.zig index 69940dd9..cdadb81f 100644 --- a/src/MachO.zig +++ b/src/MachO.zig @@ -1,5 +1,4 @@ base: Zld, -arena: std.heap.ArenaAllocator.State, options: Options, dyld_info_cmd: macho.dyld_info_command = .{}, @@ -103,7 +102,6 @@ fn createEmpty(gpa: Allocator, options: Options, thread_pool: *ThreadPool) !*Mac .file = undefined, .thread_pool = thread_pool, }, - .arena = std.heap.ArenaAllocator.init(gpa).state, .options = options, }; return self; @@ -148,8 +146,6 @@ pub fn deinit(self: *MachO) void { self.export_trie.deinit(gpa); self.unwind_info.deinit(gpa); self.unwind_records.deinit(gpa); - - self.arena.promote(gpa).deinit(); } pub fn flush(self: *MachO) !void { @@ -169,8 +165,8 @@ pub fn flush(self: *MachO) !void { try self.symbols.append(gpa, .{}); try self.symbols_extra.append(gpa, 0); - var arena_allocator = self.arena.promote(gpa); - defer self.arena = arena_allocator.state; + var arena_allocator = std.heap.ArenaAllocator.init(gpa); + defer arena_allocator.deinit(); const arena = arena_allocator.allocator(); const syslibroot = self.options.syslibroot; From 2ee5877b0e6317132d4f919338dd2997de878fb6 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 27 Jan 2024 21:56:52 +0100 Subject: [PATCH 20/22] macho: reset value and atom of symbols in InternalObject file --- src/MachO/InternalObject.zig | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/MachO/InternalObject.zig b/src/MachO/InternalObject.zig index c7e799ce..19db7fd3 100644 --- a/src/MachO/InternalObject.zig +++ b/src/MachO/InternalObject.zig @@ -29,6 +29,10 @@ pub fn addSymbol(self: *InternalObject, name: [:0]const u8, macho_file: *MachO) self.symbols.addOneAssumeCapacity().* = gop.index; const sym = macho_file.getSymbol(gop.index); sym.file = self.index; + sym.value = 0; + sym.atom = 0; + sym.nlist_idx = 0; + sym.flags = .{ .global = true }; return gop.index; } From 45adee2a897a2574905d84013ba88ef52f211c97 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 27 Jan 2024 22:41:23 +0100 Subject: [PATCH 21/22] macho: fixes --- src/MachO.zig | 4 ---- src/MachO/DwarfInfo.zig | 1 + 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/MachO.zig b/src/MachO.zig index cdadb81f..bb72331e 100644 --- a/src/MachO.zig +++ b/src/MachO.zig @@ -1226,10 +1226,6 @@ fn createObjcSections(self: *MachO) !void { const internal = self.getInternalObject().?; const sym = self.getSymbol(sym_index); _ = try internal.addSymbol(sym.getName(self), self); - sym.value = 0; - sym.atom = 0; - sym.nlist_idx = 0; - sym.flags = .{ .global = true }; sym.visibility = .hidden; const name = eatPrefix(sym.getName(self), "_objc_msgSend$").?; const selrefs_index = try internal.addObjcMsgsendSections(name, self); diff --git a/src/MachO/DwarfInfo.zig b/src/MachO/DwarfInfo.zig index f9a9e458..951d7a20 100644 --- a/src/MachO/DwarfInfo.zig +++ b/src/MachO/DwarfInfo.zig @@ -21,6 +21,7 @@ pub fn deinit(dw: *DwarfInfo, allocator: Allocator) void { } dw.compile_units.deinit(allocator); dw.strtab.deinit(allocator); + dw.di_data.deinit(allocator); } fn appendDiData(dw: *DwarfInfo, allocator: Allocator, values: []const u8) error{OutOfMemory}!u32 { From 79640c92fabdf9d910247da256f91ac76fb6d81e Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 27 Jan 2024 22:50:16 +0100 Subject: [PATCH 22/22] macho: do not store file and path in Archive --- src/MachO.zig | 5 +++-- src/MachO/Archive.zig | 27 +++++++++++---------------- 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/src/MachO.zig b/src/MachO.zig index bb72331e..5dd0e645 100644 --- a/src/MachO.zig +++ b/src/MachO.zig @@ -701,6 +701,7 @@ fn parseArchive(self: *MachO, obj: LinkObject) !bool { const gpa = self.base.allocator; const file = try std.fs.cwd().openFile(obj.path, .{}); + defer file.close(); const fat_arch: ?fat.Arch = if (fat.isFatLibrary(file)) blk: { break :blk self.parseFatLibrary(obj.path, file) catch |err| switch (err) { @@ -715,9 +716,9 @@ fn parseArchive(self: *MachO, obj: LinkObject) !bool { if (!mem.eql(u8, &magic, Archive.ARMAG)) return false; try file.seekTo(0); - var archive = Archive{ .path = obj.path, .file = file, .fat_arch = fat_arch }; + var archive = Archive{}; defer archive.deinit(gpa); - try archive.parse(self); + try archive.parse(self, obj.path, file, fat_arch); var has_parse_error = false; for (archive.objects.items) |extracted| { diff --git a/src/MachO/Archive.zig b/src/MachO/Archive.zig index cc5e9835..0f94e683 100644 --- a/src/MachO/Archive.zig +++ b/src/MachO/Archive.zig @@ -1,7 +1,3 @@ -file: std.fs.File, -fat_arch: ?fat.Arch, -path: []const u8, - objects: std.ArrayListUnmanaged(Object) = .{}, // Archive files start with the ARMAG identifying string. Then follows a @@ -63,25 +59,24 @@ const ar_hdr = extern struct { }; pub fn deinit(self: *Archive, allocator: Allocator) void { - self.file.close(); self.objects.deinit(allocator); } -pub fn parse(self: *Archive, macho_file: *MachO) !void { +pub fn parse(self: *Archive, macho_file: *MachO, path: []const u8, file: std.fs.File, fat_arch: ?fat.Arch) !void { const gpa = macho_file.base.allocator; - const offset = if (self.fat_arch) |ar| ar.offset else 0; - const size = if (self.fat_arch) |ar| ar.size else (try self.file.stat()).size; - try self.file.seekTo(offset); + const offset = if (fat_arch) |ar| ar.offset else 0; + const size = if (fat_arch) |ar| ar.size else (try file.stat()).size; + try file.seekTo(offset); - const reader = self.file.reader(); + const reader = file.reader(); _ = try reader.readBytesNoEof(Archive.SARMAG); var pos: usize = Archive.SARMAG; while (true) { if (pos >= size) break; if (!mem.isAligned(pos, 2)) { - try self.file.seekBy(1); + try file.seekBy(1); pos += 1; } @@ -90,7 +85,7 @@ pub fn parse(self: *Archive, macho_file: *MachO) !void { if (!mem.eql(u8, &hdr.ar_fmag, ARFMAG)) { macho_file.base.fatal("{s}: invalid header delimiter: expected '{s}', found '{s}'", .{ - self.path, std.fmt.fmtSliceEscapeLower(ARFMAG), std.fmt.fmtSliceEscapeLower(&hdr.ar_fmag), + path, std.fmt.fmtSliceEscapeLower(ARFMAG), std.fmt.fmtSliceEscapeLower(&hdr.ar_fmag), }); return error.ParseFailed; } @@ -109,7 +104,7 @@ pub fn parse(self: *Archive, macho_file: *MachO) !void { unreachable; }; defer { - _ = self.file.seekBy(hdr_size) catch {}; + _ = file.seekBy(hdr_size) catch {}; pos += hdr_size; } @@ -117,17 +112,17 @@ pub fn parse(self: *Archive, macho_file: *MachO) !void { const object = Object{ .archive = .{ - .path = try gpa.dupe(u8, self.path), + .path = try gpa.dupe(u8, path), .offset = offset + pos, }, .path = name, - .file = try std.fs.cwd().openFile(self.path, .{}), + .file = try std.fs.cwd().openFile(path, .{}), .index = undefined, .alive = false, .mtime = hdr.date() catch 0, }; - log.debug("extracting object '{s}' from archive '{s}'", .{ object.path, self.path }); + log.debug("extracting object '{s}' from archive '{s}'", .{ object.path, path }); try self.objects.append(gpa, object); }