From 10e1fe282aba33e376bf904740d0a6d1e8d0cabc Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Mon, 2 Mar 2026 19:46:22 -0800 Subject: [PATCH] Configuration: serialize remaining Module information also handle properly Module circular references and introduce a general deduplication mechanism. --- lib/compiler/Maker/ScannedConfig.zig | 2 + lib/compiler/configurer.zig | 103 ++++++++------- lib/std/Build/Step/Compile.zig | 1 + lib/std/zig/Configuration.zig | 179 +++++++++++++++++---------- 4 files changed, 174 insertions(+), 111 deletions(-) diff --git a/lib/compiler/Maker/ScannedConfig.zig b/lib/compiler/Maker/ScannedConfig.zig index 130381e3c2..eca62cfe8b 100644 --- a/lib/compiler/Maker/ScannedConfig.zig +++ b/lib/compiler/Maker/ScannedConfig.zig @@ -87,6 +87,7 @@ fn printValue(sc: *const ScannedConfig, s: *Serializer, comptime Field: type, fi .union_list => comptime unreachable, .length_prefixed_list => comptime unreachable, .flag_union => comptime unreachable, + .multi_list => comptime unreachable, } else if (std.enums.tagName(Field, field_value)) |name| { try s.ident(name); } else { @@ -111,6 +112,7 @@ fn printValue(sc: *const ScannedConfig, s: *Serializer, comptime Field: type, fi .extended => @compileError("TODO"), .union_list => @compileError("TODO"), .flag_union => try printValue(sc, s, Field.Union, field_value.u), + .multi_list => @compileError("TODO"), }, else => @compileError("not implemented: " ++ @typeName(Field)), }, diff --git a/lib/compiler/configurer.zig b/lib/compiler/configurer.zig index 63ac016973..e8548d5aa9 100644 --- a/lib/compiler/configurer.zig +++ b/lib/compiler/configurer.zig @@ -294,9 +294,8 @@ const Serialize = struct { } fn addSystemLib(s: *Serialize, sl: *const std.Build.Module.SystemLib) !Configuration.SystemLib.Index { - log.err("TODO deduplicate addSystemLib", .{}); const wc = s.wc; - return @enumFromInt(try wc.addExtra(@as(Configuration.SystemLib, .{ + return @enumFromInt(try wc.addDeduped(@as(Configuration.SystemLib, .{ .flags = .{ .needed = sl.needed, .weak = sl.weak, @@ -362,7 +361,6 @@ const Serialize = struct { const wc = s.wc; const arena = s.arena; - const gpa = wc.gpa; const include_dirs = try arena.alloc(Configuration.Module.IncludeDir, m.include_dirs.items.len); for (include_dirs, m.include_dirs.items) |*dest, src| dest.* = switch (src) { @@ -393,66 +391,55 @@ const Serialize = struct { .win32_resource_file => |wrf| .{ .win32_resource_file = try addRcSourceFile(s, wrf) }, }; + const frameworks = try arena.alloc(Configuration.Module.Framework, m.frameworks.entries.len); + for (frameworks, m.frameworks.keys(), m.frameworks.values()) |*dest, name, options| dest.* = .{ + .flags = .{ + .needed = options.needed, + .weak = options.weak, + }, + .name = try wc.addString(name), + }; + const lib_paths = try arena.alloc(Configuration.LazyPath, m.lib_paths.items.len); for (lib_paths, m.lib_paths.items) |*dest, src| dest.* = try addLazyPath(s, src); const c_macros = try initStringList(s, m.c_macros.items); const export_symbol_names = try initStringList(s, m.export_symbol_names); - const import_table: Configuration.ImportTable = @enumFromInt(wc.extra.items.len); - const import_table_extra_len = 1 + 2 * m.import_table.entries.len; - try wc.extra.ensureUnusedCapacity(gpa, import_table_extra_len); - wc.extra.items.len += import_table_extra_len; - wc.extra.appendAssumeCapacity(@intCast(m.import_table.entries.len)); - wc.extra.items[@intFromEnum(import_table)] = @intCast(m.import_table.entries.len); - for ( - m.import_table.keys(), - @intFromEnum(import_table) + 1.., - ) |mod_name, extra_index| { - wc.extra.items[extra_index] = @intFromEnum(try wc.addString(mod_name)); - } - for ( - m.import_table.values(), - @intFromEnum(import_table) + 1 + m.import_table.entries.len.., - ) |dep, extra_index| { - log.err("TODO module dependencies can be cyclic", .{}); - wc.extra.items[extra_index] = @intFromEnum(try addModule(s, dep)); - } - const module_index: Configuration.Module.Index = @enumFromInt(try wc.addExtra(@as(Configuration.Module, .{ .flags = .{ .optimize = .init(m.optimize), .strip = .init(m.strip), .unwind_tables = .init(m.unwind_tables), .dwarf_format = .init(m.dwarf_format), - .single_threaded = .init(m.strip), - .stack_protector = .init(m.strip), - .stack_check = .init(m.strip), + .single_threaded = .init(m.single_threaded), + .stack_protector = .init(m.stack_protector), + .stack_check = .init(m.stack_check), .sanitize_c = .init(m.sanitize_c), - .sanitize_thread = .init(m.strip), - .fuzz = .init(m.strip), + .sanitize_thread = .init(m.sanitize_thread), + .fuzz = .init(m.fuzz), .code_model = m.code_model, .c_macros = c_macros.len != 0, .include_dirs = include_dirs.len != 0, .lib_paths = lib_paths.len != 0, .rpaths = rpaths.len != 0, - .frameworks = m.frameworks.entries.len != 0, + .frameworks = frameworks.len != 0, .link_objects = link_objects.len != 0, .export_symbol_names = export_symbol_names.len != 0, }, .flags2 = .{ - .valgrind = .init(m.strip), - .pic = .init(m.strip), - .red_zone = .init(m.strip), - .omit_frame_pointer = .init(m.strip), - .error_tracing = .init(m.strip), - .link_libc = .init(m.strip), - .link_libcpp = .init(m.strip), - .no_builtin = .init(m.strip), + .valgrind = .init(m.valgrind), + .pic = .init(m.pic), + .red_zone = .init(m.red_zone), + .omit_frame_pointer = .init(m.omit_frame_pointer), + .error_tracing = .init(m.error_tracing), + .link_libc = .init(m.link_libc), + .link_libcpp = .init(m.link_libcpp), + .no_builtin = .init(m.no_builtin), }, .owner = try s.builderToPackage(m.owner), .root_source_file = try s.addOptionalLazyPathEnum(m.root_source_file), - .import_table = import_table, + .import_table = .invalid, .resolved_target = try addOptionalResolvedTarget(wc, m.resolved_target), .c_macros = .{ .slice = c_macros }, .lib_paths = .{ .slice = lib_paths }, @@ -460,12 +447,33 @@ const Serialize = struct { .include_dirs = .init(include_dirs), .rpaths = .init(rpaths), .link_objects = .init(link_objects), + .frameworks = .{ .slice = frameworks }, }))); - log.err("TODO serialize the trailing Module data", .{}); - + // The import table is the only place that modules can form dependency + // loops. Therefore, we populate the module indexes only after adding + // the module to module_map. try s.module_map.putNoClobber(arena, m, module_index); + var imports = try std.MultiArrayList(Configuration.ImportTable.Import).initCapacity(arena, m.import_table.entries.len); + imports.len = m.import_table.entries.len; + for ( + imports.items(.name), + imports.items(.module), + m.import_table.keys(), + m.import_table.values(), + ) |*dest_name, *dest_module, src_name, src_module| { + dest_name.* = try wc.addString(src_name); + dest_module.* = try addModule(s, src_module); + } + + comptime assert(std.mem.eql(u8, @typeInfo(Configuration.Module).@"struct".fields[2].name, "import_table")); + comptime assert(@typeInfo(Configuration.Module).@"struct".fields[2].type == Configuration.ImportTable.Index); + assert(wc.extra.items[@intFromEnum(module_index) + 2] == @intFromEnum(Configuration.ImportTable.Index.invalid)); + wc.extra.items[@intFromEnum(module_index) + 2] = try wc.addDeduped(@as(Configuration.ImportTable, .{ + .imports = .{ .mal = imports }, + })); + return module_index; } @@ -502,12 +510,12 @@ fn serialize(b: *std.Build, wc: *Configuration.Wip, writer: *Io.Writer) !void { } // Add and then de-duplicate dependencies. - const deps = d: { - const deps: Configuration.Deps = @enumFromInt(wc.extra.items.len); - for (try wc.reserveLengthPrefixed(step.dependencies.items.len), step.dependencies.items) |*dep, dep_step| - dep.* = @intCast(s.step_map.getIndex(dep_step).?); - break :d try wc.dedupeDeps(deps); - }; + const dep_steps = try arena.alloc(Configuration.Step.Index, step.dependencies.items.len); + for (dep_steps, step.dependencies.items) |*dest, src| + dest.* = @enumFromInt(s.step_map.getIndex(src).?); + const deps: Configuration.Deps.Index = @enumFromInt(try wc.addDeduped(@as(Configuration.Deps, .{ + .steps = .{ .slice = dep_steps }, + }))); try wc.steps.ensureTotalCapacity(gpa, s.step_map.entries.capacity); wc.steps.appendAssumeCapacity(.{ @@ -791,8 +799,7 @@ fn addOptionalResolvedTarget( optional_resolved_target: ?std.Build.ResolvedTarget, ) !Configuration.ResolvedTarget.OptionalIndex { const resolved_target = optional_resolved_target orelse return .none; - log.debug("TODO deduplicate resolved targets", .{}); - return @enumFromInt(try wc.addExtra(@as(Configuration.ResolvedTarget, .{ + return @enumFromInt(try wc.addDeduped(@as(Configuration.ResolvedTarget, .{ .query = try wc.addTargetQuery(resolved_target.query), .result = try wc.addTarget(resolved_target.result), }))); diff --git a/lib/std/Build/Step/Compile.zig b/lib/std/Build/Step/Compile.zig index 38c0b3d6f6..76d45a54a1 100644 --- a/lib/std/Build/Step/Compile.zig +++ b/lib/std/Build/Step/Compile.zig @@ -43,6 +43,7 @@ export_memory: bool = false, /// For WebAssembly targets, this will allow for undefined symbols to /// be imported from the host environment. import_symbols: bool = false, +/// (WebAssembly) import function table from the host environment import_table: bool = false, export_table: bool = false, initial_memory: ?u64 = null, diff --git a/lib/std/zig/Configuration.zig b/lib/std/zig/Configuration.zig index b768d72265..c1af30eb32 100644 --- a/lib/std/zig/Configuration.zig +++ b/lib/std/zig/Configuration.zig @@ -33,9 +33,8 @@ pub const Header = extern struct { pub const Wip = struct { gpa: Allocator, string_table: StringTable = .empty, - /// De-duplicates an array inside `extra` that has first element length - /// followed by length elements. - length_prefixed_table: LengthPrefixedTable = .empty, + /// De-duplicates an array inside `extra`. + dedupe_table: DedupeTable = .empty, targets_table: TargetsTable = .empty, string_bytes: std.ArrayList(u8) = .empty, @@ -46,25 +45,27 @@ pub const Wip = struct { path_deps: std.MultiArrayList(Path) = .empty, extra: std.ArrayList(u32) = .empty, - const LengthPrefixedTable = std.HashMapUnmanaged(u32, void, LengthPrefixedContext, std.hash_map.default_max_load_percentage); + const DedupeTable = std.HashMapUnmanaged(ExtraSlice, void, ExtraSlice.Context, std.hash_map.default_max_load_percentage); const TargetsTable = std.HashMapUnmanaged(TargetQuery.Index, void, TargetsTableContext, std.hash_map.default_max_load_percentage); - const LengthPrefixedContext = struct { - extra: []const u32, + const ExtraSlice = struct { + index: u32, + len: u32, - pub fn eql(ctx: @This(), a: u32, b: u32) bool { - const len_a = ctx.extra[a]; - const len_b = ctx.extra[b]; - const slice_a = ctx.extra[a + 1 ..][0..len_a]; - const slice_b = ctx.extra[b + 1 ..][0..len_b]; - return std.mem.eql(u32, slice_a, slice_b); - } + const Context = struct { + extra: []const u32, - pub fn hash(ctx: @This(), key: u32) u64 { - const len = ctx.extra[key]; - const slice = ctx.extra[key + 1 ..][0..len]; - return std.hash_map.hashString(@ptrCast(slice)); - } + pub fn eql(ctx: @This(), a: ExtraSlice, b: ExtraSlice) bool { + const slice_a = ctx.extra[a.index..][0..a.len]; + const slice_b = ctx.extra[b.index..][0..b.len]; + return std.mem.eql(u32, slice_a, slice_b); + } + + pub fn hash(ctx: @This(), key: ExtraSlice) u64 { + const slice = ctx.extra[key.index..][0..key.len]; + return std.hash_map.hashString(@ptrCast(slice)); + } + }; }; const TargetsTableContext = struct { @@ -323,36 +324,35 @@ pub const Wip = struct { } } - pub fn reserveLengthPrefixed(wip: *Wip, n: usize) Allocator.Error![]u32 { - const slice = try wip.extra.addManyAsSlice(wip.gpa, n + 1); - slice[0] = @intCast(n); - return slice[1..]; - } - - pub fn dedupeLengthPrefixed(wip: *Wip, index: u32) Allocator.Error!u32 { - assert(wip.extra.items.len == index + wip.extra.items[index] + 1); - const gpa = wip.gpa; - const gop = try wip.length_prefixed_table.getOrPutContext(gpa, index, @as(LengthPrefixedContext, .{ - .extra = wip.extra.items, - })); - if (gop.found_existing) { - wip.extra.items.len = index; - return gop.key_ptr.*; - } else { - return index; - } - } - - pub fn dedupeDeps(wip: *Wip, deps: Deps) Allocator.Error!Deps { - return @enumFromInt(try dedupeLengthPrefixed(wip, @intFromEnum(deps))); - } - pub fn addExtra(wip: *Wip, extra: anytype) Allocator.Error!u32 { const extra_len = Storage.extraLen(extra); try wip.extra.ensureUnusedCapacity(wip.gpa, extra_len); return addExtraAssumeCapacity(wip, extra); } + /// Same as `addExtra` but uses a hash map to possibly return an already + /// existing index instead of appending to `extra`. + pub fn addDeduped(wip: *Wip, extra: anytype) Allocator.Error!u32 { + const gpa = wip.gpa; + const revert_index = wip.extra.items.len; + const extra_len = Storage.extraLen(extra); + try wip.extra.ensureUnusedCapacity(gpa, extra_len); + const new_index = addExtraAssumeCapacity(wip, extra); + const len: u32 = @intCast(wip.extra.items.len - new_index); + + const gop = try wip.dedupe_table.getOrPutContext(gpa, .{ + .index = new_index, + .len = len, + }, @as(ExtraSlice.Context, .{ .extra = wip.extra.items })); + + if (gop.found_existing) { + wip.extra.items.len = revert_index; + return gop.key_ptr.index; + } + + return new_index; + } + pub fn addExtraAssumeCapacity(wip: *Wip, extra: anytype) u32 { const result: u32 = @intCast(wip.extra.items.len); wip.extra.items.len = Storage.setExtra(wip.extra.allocatedSlice(), result, extra); @@ -399,7 +399,7 @@ pub const AvailableOption = extern struct { pub const Step = extern struct { name: String, owner: Package.Index, - deps: Deps, + deps: Deps.Index, max_rss: MaxRss, extended: Storage.Extended(Flags, union(Tag) { check_file: CheckFile, @@ -1074,14 +1074,12 @@ pub const Package = struct { }; }; -/// Trailing: -/// * frameworks: FlagsPrefixedList(FrameworkFlags), // if flag is set pub const Module = struct { flags: Flags, flags2: Flags2, + import_table: ImportTable.Index, owner: Package.Index, root_source_file: OptionalLazyPath, - import_table: ImportTable, resolved_target: ResolvedTarget.OptionalIndex, c_macros: Storage.FlagLengthPrefixedList(.flags, .c_macros, String), lib_paths: Storage.FlagLengthPrefixedList(.flags, .lib_paths, LazyPath), @@ -1089,6 +1087,7 @@ pub const Module = struct { include_dirs: Storage.UnionList(.flags, .include_dirs, IncludeDir), rpaths: Storage.UnionList(.flags, .rpaths, RPath), link_objects: Storage.UnionList(.flags, .link_objects, LinkObject), + frameworks: Storage.FlagLengthPrefixedList(.flags, .frameworks, Framework), pub const Optimize = enum(u3) { debug, @@ -1220,28 +1219,47 @@ pub const Module = struct { win32_resource_file: RcSourceFile.Index, }; - pub const FrameworkFlags = packed struct(u2) { - needed: bool, - weak: bool, + pub const Framework = struct { + flags: @This().Flags, + name: String, + + pub const Flags = packed struct(u32) { + needed: bool, + weak: bool, + _: u30 = 0, + }; }; }; -/// Points into `extra`, first element is len, then: -/// * import_name: String, // for each len -/// * Module.Index, // for each len -pub const ImportTable = enum(u32) { - _, +pub const ImportTable = struct { + imports: Storage.MultiList(Import), + + pub const Import = struct { + name: String, + module: Module.Index, + }; + + /// Points into `extra`. + pub const Index = enum(u32) { + invalid = maxInt(u32), + _, + }; }; -/// Points into `extra`, where the first element is count of deps, following -/// elements is `Step.Index` per count. -pub const Deps = enum(u32) { - _, +pub const Deps = struct { + steps: Storage.LengthPrefixedList(Step.Index), - pub fn slice(deps: Deps, c: *const Configuration) []Step.Index { - const len = c.extra[@intFromEnum(deps)]; - return @ptrCast(c.extra[@intFromEnum(deps) + 1 ..][0..len]); - } + pub const Index = enum(u32) { + _, + + pub fn get(this: @This(), c: *const Configuration) Deps { + return extraData(c, Deps, @intFromEnum(this)); + } + + pub fn slice(this: @This(), c: *const Configuration) []const Step.Index { + return get(this, c).steps.slice; + } + }; }; /// Points into `extra`, where the first element is count of strings, following @@ -1760,6 +1778,7 @@ pub const Storage = enum { flag_length_prefixed_list, union_list, flag_union, + multi_list, /// The presence of the field is determined by a boolean within a packed /// struct. @@ -1853,7 +1872,8 @@ pub const Storage = enum { }; } - /// The field contains a u32 length followed by that many items. + /// The field contains a u32 length followed by that many items, each + /// element bitcastable to u32. pub fn LengthPrefixedList(comptime ElemArg: type) type { return struct { slice: []const Elem, @@ -1867,6 +1887,17 @@ pub const Storage = enum { }; } + /// The field contains a u32 length followed by that many items for the + /// first field, that many items for the second field, etc. + pub fn MultiList(comptime ElemArg: type) type { + return struct { + mal: std.MultiArrayList(Elem), + + pub const storage: Storage = .multi_list; + pub const Elem = ElemArg; + }; + } + /// `UnionArg` is a tagged union with a small integer for the enum tag. /// /// A field in flags determines whether the metadata is present. @@ -2028,6 +2059,16 @@ pub const Storage = enum { defer i.* = data_start + len; return .{ .slice = @ptrCast(buffer[data_start..][0..len]) }; }, + .multi_list => { + const data_start = i.* + 1; + const len = buffer[data_start - 1]; + defer i.* = data_start + len * @typeInfo(Field.Elem).@"struct".fields.len; + return .{ .mal = .{ + .bytes = @ptrCast(buffer[data_start..][0..len]), + .len = len, + .capacity = len, + } }; + }, .union_list => { const flags = @field(container, @tagName(Field.flags)); const flag = @field(flags, @tagName(Field.flag)); @@ -2082,6 +2123,7 @@ pub const Storage = enum { .auto => switch (Field.storage) { .flag_optional, .enum_optional, .extended => 1, .length_prefixed_list, .flag_length_prefixed_list => field.slice.len + 1, + .multi_list => 1 + field.mal.len * @typeInfo(Field.Elem).@"struct".fields.len, .union_list => Field.extraLen(field.len), .flag_union => switch (field.u) { inline else => |v| extraFieldLen(v), @@ -2153,6 +2195,17 @@ pub const Storage = enum { @memcpy(buffer[i + 1 ..][0..len], @as([]const u32, @ptrCast(value.slice))); return len + 1; }, + .multi_list => { + const len: u32 = @intCast(value.mal.len); + if (len == 0) return 0; + buffer[i] = len; + const fields = @typeInfo(Field.Elem).@"struct".fields; + inline for (0..fields.len) |field_i| @memcpy( + buffer[i + 1 + field_i * len ..][0..len], + @as([]const u32, @ptrCast(value.mal.items(@enumFromInt(field_i)))), + ); + return 1 + fields.len * len; + }, .union_list => { if (value.len == 0) return 0; const Tag = @typeInfo(Field.Union).@"union".tag_type.?;