Files
zig/tools/process_headers.zig
T
Alex Rønne Petersen 9f2f6aaef5 drop support for powerpc64-linux-gnu
glibc has never officially supported ELFv2 on big-endian PowerPC, and we do not
(and likely never will) support linking ELFv1. So just drop this target instead
of pretending we actually have anything resembling usable support for it. This
is a dying target anyway; IBM have been pushing people to powerpc64le for years
now, and most distros have dropped big endian.

glibc headers and abilists are not updated as part of this; I'll just let that
happen automatically on the next glibc update. Size savings are expected to be
very minimal anyway since there's large overlap between powerpc64 and
powerpc64le.

This commit also fixes a couple of bad assumptions in std.Target:

* The dynamic linker path should be /lib64/ld64.so.1. We should get this right
  even if the Zig compiler doesn't support the target.
* cCallingConvention() was picking powerpc64_elf_v2 only for musl targets. In
  reality, for the targets we support in std.Target, it should pick v2 for all
  except powerpc64-linux-gnu.

Finally, this switches LLVM codegen to use ELFv2 data layout for all targets
except ps3.
2026-04-26 19:24:46 +02:00

408 lines
18 KiB
Zig

//! To get started, run this tool with no args and read the help message.
//!
//! The build systems of glibc, musl, FreeBSD, and NetBSD require specifying a single target
//! architecture. Meanwhile, Zig supports out-of-the-box cross compilation for
//! every target. So the process to create libc headers that Zig ships is to use
//! this tool.
//!
//! First, use the glibc, musl, FreeBSD, and NetBSD build systems to create installations of all the
//! targets in the `glibc_targets`, `musl_targets`, `freebsd_targets`, `netbsd_targets`, and
//! `openbsd_targets` variables. Next, run this tool to create a new directory which puts .h files into
//! <arch> subdirectories, with `generic` being files that apply to all architectures.
//! You'll then have to manually update Zig source repo with these new files.
const std = @import("std");
const Io = std.Io;
const Dir = std.Io.Dir;
const Arch = std.Target.Cpu.Arch;
const Abi = std.Target.Abi;
const OsTag = std.Target.Os.Tag;
const assert = std.debug.assert;
const Blake3 = std.crypto.hash.Blake3;
const LibCTarget = struct {
arch: Arch,
abi: Abi,
dest: ?[]const u8 = null,
};
const glibc_targets = [_]LibCTarget{
.{ .arch = .arc, .abi = .gnu },
.{ .arch = .arm, .abi = .gnueabi, .dest = "arm-linux-gnu" },
.{ .arch = .arm, .abi = .gnueabihf, .dest = "arm-linux-gnu" },
.{ .arch = .armeb, .abi = .gnueabi, .dest = "arm-linux-gnu" },
.{ .arch = .armeb, .abi = .gnueabihf, .dest = "arm-linux-gnu" },
.{ .arch = .aarch64, .abi = .gnu, .dest = "aarch64-linux-gnu" },
.{ .arch = .aarch64_be, .abi = .gnu, .dest = "aarch64-linux-gnu" },
.{ .arch = .csky, .abi = .gnueabi, .dest = "csky-linux-gnu" },
.{ .arch = .csky, .abi = .gnueabihf, .dest = "csky-linux-gnu" },
.{ .arch = .loongarch64, .abi = .gnu, .dest = "loongarch-linux-gnu" },
.{ .arch = .loongarch64, .abi = .gnusf, .dest = "loongarch-linux-gnu" },
.{ .arch = .m68k, .abi = .gnu },
.{ .arch = .mips, .abi = .gnueabi, .dest = "mips-linux-gnu" },
.{ .arch = .mips, .abi = .gnueabihf, .dest = "mips-linux-gnu" },
.{ .arch = .mipsel, .abi = .gnueabi, .dest = "mips-linux-gnu" },
.{ .arch = .mipsel, .abi = .gnueabihf, .dest = "mips-linux-gnu" },
.{ .arch = .mips64, .abi = .gnuabi64, .dest = "mips-linux-gnu" },
.{ .arch = .mips64, .abi = .gnuabin32, .dest = "mips-linux-gnu" },
.{ .arch = .mips64el, .abi = .gnuabi64, .dest = "mips-linux-gnu" },
.{ .arch = .mips64el, .abi = .gnuabin32, .dest = "mips-linux-gnu" },
.{ .arch = .powerpc, .abi = .gnueabi, .dest = "powerpc-linux-gnu" },
.{ .arch = .powerpc, .abi = .gnueabihf, .dest = "powerpc-linux-gnu" },
.{ .arch = .powerpc64le, .abi = .gnu, .dest = "powerpc-linux-gnu" },
.{ .arch = .riscv32, .abi = .gnu, .dest = "riscv-linux-gnu" },
.{ .arch = .riscv64, .abi = .gnu, .dest = "riscv-linux-gnu" },
.{ .arch = .s390x, .abi = .gnu },
.{ .arch = .sparc, .abi = .gnu, .dest = "sparc-linux-gnu" },
.{ .arch = .sparc64, .abi = .gnu, .dest = "sparc-linux-gnu" },
.{ .arch = .x86, .abi = .gnu, .dest = "x86-linux-gnu" },
.{ .arch = .x86_64, .abi = .gnu, .dest = "x86-linux-gnu" },
.{ .arch = .x86_64, .abi = .gnux32, .dest = "x86-linux-gnu" },
};
const musl_targets = [_]LibCTarget{
.{ .arch = .arm, .abi = .musl },
.{ .arch = .aarch64, .abi = .musl },
.{ .arch = .hexagon, .abi = .musl },
.{ .arch = .loongarch64, .abi = .musl },
.{ .arch = .m68k, .abi = .musl },
.{ .arch = .mips, .abi = .musl },
.{ .arch = .mips64, .abi = .musl },
.{ .arch = .mips64, .abi = .muslabin32 },
.{ .arch = .powerpc, .abi = .musl },
.{ .arch = .powerpc64, .abi = .musl },
.{ .arch = .riscv32, .abi = .musl },
.{ .arch = .riscv64, .abi = .musl },
.{ .arch = .s390x, .abi = .musl },
.{ .arch = .x86, .abi = .musl },
.{ .arch = .x86_64, .abi = .musl },
.{ .arch = .x86_64, .abi = .muslx32 },
};
const freebsd_targets = [_]LibCTarget{
.{ .arch = .arm, .abi = .eabihf },
.{ .arch = .aarch64, .abi = .none },
.{ .arch = .powerpc64, .abi = .none },
.{ .arch = .riscv64, .abi = .none },
.{ .arch = .x86, .abi = .none },
.{ .arch = .x86_64, .abi = .none },
};
const netbsd_targets = [_]LibCTarget{
.{ .arch = .arm, .abi = .eabi, .dest = "arm-netbsd-eabi" },
.{ .arch = .arm, .abi = .eabihf, .dest = "arm-netbsd-eabi" },
.{ .arch = .aarch64, .abi = .none },
.{ .arch = .m68k, .abi = .none },
.{ .arch = .mips, .abi = .eabi, .dest = "mips-netbsd-eabi" },
.{ .arch = .mips, .abi = .eabihf, .dest = "mips-netbsd-eabi" },
.{ .arch = .powerpc, .abi = .eabi, .dest = "powerpc-netbsd-eabi" },
.{ .arch = .powerpc, .abi = .eabihf, .dest = "powerpc-netbsd-eabi" },
.{ .arch = .sparc, .abi = .none },
.{ .arch = .sparc64, .abi = .none },
.{ .arch = .x86, .abi = .none },
.{ .arch = .x86_64, .abi = .none },
};
const openbsd_targets = [_]LibCTarget{
.{ .arch = .arm, .abi = .eabi },
.{ .arch = .aarch64, .abi = .none },
.{ .arch = .mips64, .abi = .none },
.{ .arch = .mips64el, .abi = .none },
.{ .arch = .powerpc, .abi = .eabihf },
.{ .arch = .powerpc64, .abi = .none },
.{ .arch = .riscv64, .abi = .none },
.{ .arch = .sparc64, .abi = .none },
.{ .arch = .x86, .abi = .none },
.{ .arch = .x86_64, .abi = .none },
};
const Contents = struct {
bytes: []const u8,
hit_count: usize,
hash: []const u8,
is_generic: bool,
fn hitCountLessThan(context: void, lhs: *const Contents, rhs: *const Contents) bool {
_ = context;
return lhs.hit_count < rhs.hit_count;
}
};
const HashToContents = std.StringHashMap(Contents);
const TargetToHash = std.array_hash_map.String([]const u8);
const PathTable = std.StringHashMap(*TargetToHash);
const LibCVendor = enum {
musl,
glibc,
freebsd,
netbsd,
openbsd,
};
pub fn main(init: std.process.Init) !void {
const arena = init.arena.allocator();
const io = init.io;
const args = try init.minimal.args.toSlice(arena);
const cwd_path = try std.process.currentPathAlloc(io, arena);
const environ_map = init.environ_map;
var search_paths = std.array_list.Managed([]const u8).init(arena);
var opt_out_dir: ?[]const u8 = null;
var opt_abi: ?[]const u8 = null;
var arg_i: usize = 1;
while (arg_i < args.len) : (arg_i += 1) {
if (std.mem.eql(u8, args[arg_i], "--help"))
usageAndExit(args[0]);
if (arg_i + 1 >= args.len) {
std.debug.print("expected argument after '{s}'\n", .{args[arg_i]});
usageAndExit(args[0]);
}
if (std.mem.eql(u8, args[arg_i], "--search-path")) {
try search_paths.append(args[arg_i + 1]);
} else if (std.mem.eql(u8, args[arg_i], "--out")) {
assert(opt_out_dir == null);
opt_out_dir = args[arg_i + 1];
} else if (std.mem.eql(u8, args[arg_i], "--abi")) {
assert(opt_abi == null);
opt_abi = args[arg_i + 1];
} else {
std.debug.print("unrecognized argument: {s}\n", .{args[arg_i]});
usageAndExit(args[0]);
}
arg_i += 1;
}
const out_dir = opt_out_dir orelse usageAndExit(args[0]);
const abi_name = opt_abi orelse usageAndExit(args[0]);
const vendor = std.meta.stringToEnum(LibCVendor, abi_name) orelse {
std.debug.print("unrecognized C ABI: {s}\n", .{abi_name});
usageAndExit(args[0]);
};
const generic_name = try std.fmt.allocPrint(arena, "generic-{s}", .{abi_name});
const libc_targets = switch (vendor) {
.glibc => &glibc_targets,
.musl => &musl_targets,
.freebsd => &freebsd_targets,
.netbsd => &netbsd_targets,
.openbsd => &openbsd_targets,
};
var path_table = PathTable.init(arena);
var hash_to_contents = HashToContents.init(arena);
var max_bytes_saved: usize = 0;
var total_bytes: usize = 0;
var hasher = Blake3.init(.{});
for (libc_targets) |libc_target| {
const libc_dir = switch (vendor) {
.glibc => try std.zig.target.glibcRuntimeTriple(arena, libc_target.arch, .linux, libc_target.abi),
.musl => std.zig.target.muslArchName(libc_target.arch, libc_target.abi),
.freebsd => switch (libc_target.arch) {
.arm => "armv7",
.x86 => "i386",
.x86_64 => "amd64",
.aarch64,
.powerpc,
.powerpc64,
.riscv64,
=> |a| @tagName(a),
else => unreachable,
},
.netbsd => switch (libc_target.arch) {
.arm => if (libc_target.abi == .eabihf) "evbarmv7hf" else "evbarmv7",
.aarch64 => "evbarm64",
.m68k => "mac68k",
.mips => if (libc_target.abi == .eabihf) "evbmips" else "evbmipssf",
.powerpc => if (libc_target.abi == .eabihf) "evbppc" else "evbppcsf",
.x86 => "i386",
.x86_64 => "amd64",
.sparc,
.sparc64,
=> |a| @tagName(a),
else => unreachable,
},
.openbsd => switch (libc_target.arch) {
.arm => "armv7",
.aarch64 => "arm64",
.mips64 => "octeon",
.mips64el => "loongson",
.powerpc => "macppc",
.x86 => "i386",
.x86_64 => "amd64",
.powerpc64,
.riscv64,
.sparc64,
=> |a| @tagName(a),
else => unreachable,
},
};
const dest_target = if (libc_target.dest) |dest| dest else try std.fmt.allocPrint(arena, "{s}-{s}-{s}", .{
@tagName(libc_target.arch),
switch (vendor) {
.musl, .glibc => "linux",
.freebsd => "freebsd",
.netbsd => "netbsd",
.openbsd => "openbsd",
},
@tagName(libc_target.abi),
});
search: for (search_paths.items) |search_path| {
const sub_path = switch (vendor) {
.glibc,
.freebsd,
.netbsd,
.openbsd,
=> &[_][]const u8{ search_path, libc_dir, "usr", "include" },
.musl => &[_][]const u8{ search_path, libc_dir, "usr", "local", "musl", "include" },
};
const target_include_dir = try Dir.path.join(arena, sub_path);
var dir_stack = std.array_list.Managed([]const u8).init(arena);
try dir_stack.append(target_include_dir);
while (dir_stack.pop()) |full_dir_name| {
var dir = Dir.cwd().openDir(io, full_dir_name, .{ .iterate = true }) catch |err| switch (err) {
error.FileNotFound => continue :search,
error.AccessDenied => continue :search,
else => return err,
};
defer dir.close(io);
var dir_it = dir.iterate();
while (try dir_it.next(io)) |entry| {
const full_path = try Dir.path.join(arena, &[_][]const u8{ full_dir_name, entry.name });
switch (entry.kind) {
.directory => try dir_stack.append(full_path),
.file, .sym_link => {
const rel_path = try Dir.path.relative(arena, cwd_path, environ_map, target_include_dir, full_path);
const max_size = 2 * 1024 * 1024 * 1024;
const raw_bytes = try Dir.cwd().readFileAlloc(io, full_path, arena, .limited(max_size));
const trimmed = std.mem.trim(u8, raw_bytes, " \r\n\t");
total_bytes += raw_bytes.len;
const hash = try arena.alloc(u8, 32);
hasher = Blake3.init(.{});
hasher.update(rel_path);
hasher.update(trimmed);
hasher.final(hash);
const gop = try hash_to_contents.getOrPut(hash);
if (gop.found_existing) {
max_bytes_saved += raw_bytes.len;
gop.value_ptr.hit_count += 1;
std.debug.print("duplicate: {s} {s} ({B})\n", .{
libc_dir, rel_path, raw_bytes.len,
});
} else {
gop.value_ptr.* = Contents{
.bytes = trimmed,
.hit_count = 1,
.hash = hash,
.is_generic = false,
};
}
const path_gop = try path_table.getOrPut(rel_path);
const target_to_hash = if (path_gop.found_existing) path_gop.value_ptr.* else blk: {
const ptr = try arena.create(TargetToHash);
ptr.* = .empty;
path_gop.value_ptr.* = ptr;
break :blk ptr;
};
// When `dest` is set, there are a few rare cases where we expect to overwrite a header. For
// example, `bits/long-double.h` differs very slightly between `powerpc64le-linux-gnu` and
// other `powerpc*-linux-gnu` targets, and we unify those targets as `powerpc-linux-gnu`. In
// such cases, we manually patch the affected header after processing, so it's fine that
// only one header wins here.
if (libc_target.dest != null) {
const hash_gop = try target_to_hash.getOrPut(arena, dest_target);
if (hash_gop.found_existing) std.debug.print("overwrote: {s} {s} {s}\n", .{
libc_dir,
rel_path,
dest_target,
}) else hash_gop.value_ptr.* = hash;
} else {
try target_to_hash.putNoClobber(arena, dest_target, hash);
}
},
else => std.debug.print("warning: weird file: {s}\n", .{full_path}),
}
}
}
break;
} else {
std.debug.print("warning: libc target not found: {s}\n", .{libc_dir});
}
}
std.debug.print("summary: {B} could be reduced to {B}\n", .{
total_bytes,
total_bytes - max_bytes_saved,
});
try Dir.cwd().createDirPath(io, out_dir);
var missed_opportunity_bytes: usize = 0;
// iterate path_table. for each path, put all the hashes into a list. sort by hit_count.
// the hash with the highest hit_count gets to be the "generic" one. everybody else
// gets their header in a separate arch directory.
var path_it = path_table.iterator();
while (path_it.next()) |path_kv| {
var contents_list = std.array_list.Managed(*Contents).init(arena);
{
var hash_it = path_kv.value_ptr.*.iterator();
while (hash_it.next()) |hash_kv| {
const contents = hash_to_contents.getPtr(hash_kv.value_ptr.*).?;
try contents_list.append(contents);
}
}
std.mem.sort(*Contents, contents_list.items, {}, Contents.hitCountLessThan);
const best_contents = contents_list.pop().?;
if (best_contents.hit_count > 1) {
// worth it to make it generic
const full_path = try Dir.path.join(arena, &[_][]const u8{ out_dir, generic_name, path_kv.key_ptr.* });
try Dir.cwd().createDirPath(io, Dir.path.dirname(full_path).?);
try Dir.cwd().writeFile(io, .{ .sub_path = full_path, .data = best_contents.bytes });
best_contents.is_generic = true;
while (contents_list.pop()) |contender| {
if (contender.hit_count > 1) {
const this_missed_bytes = contender.hit_count * contender.bytes.len;
missed_opportunity_bytes += this_missed_bytes;
std.debug.print("Missed opportunity ({B}): {s}\n", .{
this_missed_bytes,
path_kv.key_ptr.*,
});
} else break;
}
}
var hash_it = path_kv.value_ptr.*.iterator();
while (hash_it.next()) |hash_kv| {
const contents = hash_to_contents.get(hash_kv.value_ptr.*).?;
if (contents.is_generic) continue;
const dest_target = hash_kv.key_ptr.*;
const full_path = try Dir.path.join(arena, &[_][]const u8{ out_dir, dest_target, path_kv.key_ptr.* });
try Dir.cwd().createDirPath(io, Dir.path.dirname(full_path).?);
try Dir.cwd().writeFile(io, .{ .sub_path = full_path, .data = contents.bytes });
}
}
}
fn usageAndExit(arg0: []const u8) noreturn {
std.debug.print("Usage: {s} [--search-path <dir>] --out <dir> --abi <name>\n", .{arg0});
std.debug.print("--search-path can be used any number of times.\n", .{});
std.debug.print(" subdirectories of search paths look like, e.g. x86_64-linux-gnu\n", .{});
std.debug.print("--out is a dir that will be created, and populated with the results\n", .{});
std.debug.print("--abi is either glibc, musl, freebsd, netbsd, or openbsd\n", .{});
std.process.exit(1);
}