Merge branch 'ziglang:master' into some-documentation-updates-0

This commit is contained in:
Rue
2025-07-28 14:54:52 +02:00
committed by GitHub
358 changed files with 35994 additions and 11556 deletions
+8 -9
View File
@@ -390,15 +390,6 @@ set(ZIG_STAGE2_SOURCES
lib/std/Io.zig
lib/std/Io/Reader.zig
lib/std/Io/Writer.zig
lib/std/Io/buffered_atomic_file.zig
lib/std/Io/buffered_writer.zig
lib/std/Io/change_detection_stream.zig
lib/std/Io/counting_reader.zig
lib/std/Io/counting_writer.zig
lib/std/Io/find_byte_writer.zig
lib/std/Io/fixed_buffer_stream.zig
lib/std/Io/limited_reader.zig
lib/std/Io/seekable_stream.zig
lib/std/Progress.zig
lib/std/Random.zig
lib/std/Target.zig
@@ -550,6 +541,14 @@ set(ZIG_STAGE2_SOURCES
src/clang_options.zig
src/clang_options_data.zig
src/codegen.zig
src/codegen/aarch64.zig
src/codegen/aarch64/abi.zig
src/codegen/aarch64/Assemble.zig
src/codegen/aarch64/Disassemble.zig
src/codegen/aarch64/encoding.zig
src/codegen/aarch64/instructions.zon
src/codegen/aarch64/Mir.zig
src/codegen/aarch64/Select.zig
src/codegen/c.zig
src/codegen/c/Type.zig
src/codegen/llvm.zig
+4 -2
View File
@@ -4,8 +4,10 @@ pub fn build(b: *std.Build) void {
const optimize = b.standardOptimizeOption(.{});
const exe = b.addExecutable(.{
.name = "example",
.root_source_file = b.path("example.zig"),
.optimize = optimize,
.root_module = b.createModule(.{
.root_source_file = b.path("example.zig"),
.optimize = optimize,
}),
});
b.default_step.dependOn(&exe.step);
}
+8 -4
View File
@@ -4,15 +4,19 @@ pub fn build(b: *std.Build) void {
const lib = b.addLibrary(.{
.linkage = .dynamic,
.name = "mathtest",
.root_source_file = b.path("mathtest.zig"),
.root_module = b.createModule(.{
.root_source_file = b.path("mathtest.zig"),
}),
.version = .{ .major = 1, .minor = 0, .patch = 0 },
});
const exe = b.addExecutable(.{
.name = "test",
.root_module = b.createModule(.{
.link_libc = true,
}),
});
exe.addCSourceFile(.{ .file = b.path("test.c"), .flags = &.{"-std=c99"} });
exe.linkLibrary(lib);
exe.linkSystemLibrary("c");
exe.root_module.addCSourceFile(.{ .file = b.path("test.c"), .flags = &.{"-std=c99"} });
exe.root_module.linkLibrary(lib);
b.default_step.dependOn(&exe.step);
+8 -4
View File
@@ -3,15 +3,19 @@ const std = @import("std");
pub fn build(b: *std.Build) void {
const obj = b.addObject(.{
.name = "base64",
.root_source_file = b.path("base64.zig"),
.root_module = b.createModule(.{
.root_source_file = b.path("base64.zig"),
}),
});
const exe = b.addExecutable(.{
.name = "test",
.root_module = b.createModule(.{
.link_libc = true,
}),
});
exe.addCSourceFile(.{ .file = b.path("test.c"), .flags = &.{"-std=c99"} });
exe.addObject(obj);
exe.linkSystemLibrary("c");
exe.root_module.addCSourceFile(.{ .file = b.path("test.c"), .flags = &.{"-std=c99"} });
exe.root_module.addObject(obj);
b.installArtifact(exe);
}
+7 -3
View File
@@ -696,8 +696,11 @@ fn runStepNames(
.failures, .none => true,
else => false,
};
if (failure_count == 0 and failures_only) {
return run.cleanExit();
if (failure_count == 0) {
std.Progress.setStatus(.success);
if (failures_only) return run.cleanExit();
} else {
std.Progress.setStatus(.failure);
}
const ttyconf = run.ttyconf;
@@ -708,7 +711,7 @@ fn runStepNames(
const total_count = success_count + failure_count + pending_count + skipped_count;
ttyconf.setColor(w, .cyan) catch {};
w.writeAll("Build Summary:") catch {};
w.writeAll("\nBuild Summary:") catch {};
ttyconf.setColor(w, .reset) catch {};
w.print(" {d}/{d} steps succeeded", .{ success_count, total_count }) catch {};
if (skipped_count > 0) w.print("; {d} skipped", .{skipped_count}) catch {};
@@ -1149,6 +1152,7 @@ fn workerMakeOneStep(
} else |err| switch (err) {
error.MakeFailed => {
@atomicStore(Step.State, &s.state, .failure, .seq_cst);
std.Progress.setStatus(.failure_working);
break :handle_result;
},
error.MakeSkipped => @atomicStore(Step.State, &s.state, .skipped, .seq_cst),
+62 -907
View File
@@ -13,6 +13,9 @@ const Server = std.zig.Server;
var stdin_buffer: [1024]u8 = undefined;
var stdout_buffer: [1024]u8 = undefined;
var input_buffer: [1024]u8 = undefined;
var output_buffer: [1024]u8 = undefined;
pub fn main() !void {
var arena_instance = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena_instance.deinit();
@@ -145,13 +148,16 @@ fn cmdObjCopy(gpa: Allocator, arena: Allocator, args: []const []const u8) !void
const input = opt_input orelse fatal("expected input parameter", .{});
const output = opt_output orelse fatal("expected output parameter", .{});
var in_file = fs.cwd().openFile(input, .{}) catch |err|
fatal("unable to open '{s}': {s}", .{ input, @errorName(err) });
defer in_file.close();
const input_file = fs.cwd().openFile(input, .{}) catch |err| fatal("failed to open {s}: {t}", .{ input, err });
defer input_file.close();
const elf_hdr = std.elf.Header.read(in_file) catch |err| switch (err) {
error.InvalidElfMagic => fatal("not an ELF file: '{s}'", .{input}),
else => fatal("unable to read '{s}': {s}", .{ input, @errorName(err) }),
const stat = input_file.stat() catch |err| fatal("failed to stat {s}: {t}", .{ input, err });
var in: File.Reader = .initSize(input_file, &input_buffer, stat.size);
const elf_hdr = std.elf.Header.read(&in.interface) catch |err| switch (err) {
error.ReadFailed => fatal("unable to read {s}: {t}", .{ input, in.err.? }),
else => |e| fatal("invalid elf file: {t}", .{e}),
};
const in_ofmt = .elf;
@@ -168,16 +174,12 @@ fn cmdObjCopy(gpa: Allocator, arena: Allocator, args: []const []const u8) !void
}
};
const mode = mode: {
if (out_fmt != .elf or only_keep_debug)
break :mode fs.File.default_mode;
if (in_file.stat()) |stat|
break :mode stat.mode
else |_|
break :mode fs.File.default_mode;
};
var out_file = try fs.cwd().createFile(output, .{ .mode = mode });
defer out_file.close();
const mode = if (out_fmt != .elf or only_keep_debug) fs.File.default_mode else stat.mode;
var output_file = try fs.cwd().createFile(output, .{ .mode = mode });
defer output_file.close();
var out = output_file.writer(&output_buffer);
switch (out_fmt) {
.hex, .raw => {
@@ -192,7 +194,7 @@ fn cmdObjCopy(gpa: Allocator, arena: Allocator, args: []const []const u8) !void
if (set_section_flags != null)
fatal("zig objcopy: ELF to RAW or HEX copying does not support --set_section_flags", .{});
try emitElf(arena, in_file, out_file, elf_hdr, .{
try emitElf(arena, &in, &out, elf_hdr, .{
.ofmt = out_fmt,
.only_section = only_section,
.pad_to = pad_to,
@@ -208,22 +210,13 @@ fn cmdObjCopy(gpa: Allocator, arena: Allocator, args: []const []const u8) !void
if (pad_to) |_|
fatal("zig objcopy: ELF to ELF copying does not support --pad-to", .{});
try stripElf(arena, in_file, out_file, elf_hdr, .{
.strip_debug = strip_debug,
.strip_all = strip_all,
.only_keep_debug = only_keep_debug,
.add_debuglink = opt_add_debuglink,
.extract_to = opt_extract,
.compress_debug = compress_debug_sections,
.add_section = add_section,
.set_section_alignment = set_section_alignment,
.set_section_flags = set_section_flags,
});
return std.process.cleanExit();
fatal("unimplemented", .{});
},
else => fatal("unsupported output object format: {s}", .{@tagName(out_fmt)}),
}
try out.end();
if (listen) {
var stdin_reader = fs.File.stdin().reader(&stdin_buffer);
var stdout_writer = fs.File.stdout().writer(&stdout_buffer);
@@ -304,12 +297,12 @@ const SetSectionFlags = struct {
fn emitElf(
arena: Allocator,
in_file: File,
out_file: File,
in: *File.Reader,
out: *File.Writer,
elf_hdr: elf.Header,
options: EmitRawElfOptions,
) !void {
var binary_elf_output = try BinaryElfOutput.parse(arena, in_file, elf_hdr);
var binary_elf_output = try BinaryElfOutput.parse(arena, in, elf_hdr);
defer binary_elf_output.deinit();
if (options.ofmt == .elf) {
@@ -328,8 +321,8 @@ fn emitElf(
continue;
}
try writeBinaryElfSection(in_file, out_file, section);
try padFile(out_file, options.pad_to);
try writeBinaryElfSection(in, out, section);
try padFile(out, options.pad_to);
return;
}
},
@@ -342,10 +335,10 @@ fn emitElf(
switch (options.ofmt) {
.raw => {
for (binary_elf_output.sections.items) |section| {
try out_file.seekTo(section.binaryOffset);
try writeBinaryElfSection(in_file, out_file, section);
try out.seekTo(section.binaryOffset);
try writeBinaryElfSection(in, out, section);
}
try padFile(out_file, options.pad_to);
try padFile(out, options.pad_to);
},
.hex => {
if (binary_elf_output.segments.items.len == 0) return;
@@ -353,15 +346,15 @@ fn emitElf(
return error.InvalidHexfileAddressRange;
}
var hex_writer = HexWriter{ .out_file = out_file };
var hex_writer = HexWriter{ .out = out };
for (binary_elf_output.segments.items) |segment| {
try hex_writer.writeSegment(segment, in_file);
try hex_writer.writeSegment(segment, in);
}
if (options.pad_to) |_| {
// Padding to a size in hex files isn't applicable
return error.InvalidArgument;
}
try hex_writer.writeEOF();
try hex_writer.writeEof();
},
else => unreachable,
}
@@ -399,7 +392,7 @@ const BinaryElfOutput = struct {
self.segments.deinit(self.allocator);
}
pub fn parse(allocator: Allocator, elf_file: File, elf_hdr: elf.Header) !Self {
pub fn parse(allocator: Allocator, in: *File.Reader, elf_hdr: elf.Header) !Self {
var self: Self = .{
.segments = .{},
.sections = .{},
@@ -412,7 +405,7 @@ const BinaryElfOutput = struct {
self.shstrtab = blk: {
if (elf_hdr.shstrndx >= elf_hdr.shnum) break :blk null;
var section_headers = elf_hdr.section_header_iterator(&elf_file);
var section_headers = elf_hdr.iterateSectionHeaders(in);
var section_counter: usize = 0;
while (section_counter < elf_hdr.shstrndx) : (section_counter += 1) {
@@ -421,18 +414,13 @@ const BinaryElfOutput = struct {
const shstrtab_shdr = (try section_headers.next()).?;
const buffer = try allocator.alloc(u8, @intCast(shstrtab_shdr.sh_size));
errdefer allocator.free(buffer);
const num_read = try elf_file.preadAll(buffer, shstrtab_shdr.sh_offset);
if (num_read != buffer.len) return error.EndOfStream;
break :blk buffer;
try in.seekTo(shstrtab_shdr.sh_offset);
break :blk try in.interface.readAlloc(allocator, shstrtab_shdr.sh_size);
};
errdefer if (self.shstrtab) |shstrtab| allocator.free(shstrtab);
var section_headers = elf_hdr.section_header_iterator(&elf_file);
var section_headers = elf_hdr.iterateSectionHeaders(in);
while (try section_headers.next()) |section| {
if (sectionValidForOutput(section)) {
const newSection = try allocator.create(BinaryElfSection);
@@ -451,7 +439,7 @@ const BinaryElfOutput = struct {
}
}
var program_headers = elf_hdr.program_header_iterator(&elf_file);
var program_headers = elf_hdr.iterateProgramHeaders(in);
while (try program_headers.next()) |phdr| {
if (phdr.p_type == elf.PT_LOAD) {
const newSegment = try allocator.create(BinaryElfSegment);
@@ -539,19 +527,17 @@ const BinaryElfOutput = struct {
}
};
fn writeBinaryElfSection(elf_file: File, out_file: File, section: *BinaryElfSection) !void {
try out_file.writeFileAll(elf_file, .{
.in_offset = section.elfOffset,
.in_len = section.fileSize,
});
fn writeBinaryElfSection(in: *File.Reader, out: *File.Writer, section: *BinaryElfSection) !void {
try in.seekTo(section.elfOffset);
_ = try out.interface.sendFileAll(in, .limited(section.fileSize));
}
const HexWriter = struct {
prev_addr: ?u32 = null,
out_file: File,
out: *File.Writer,
/// Max data bytes per line of output
const MAX_PAYLOAD_LEN: u8 = 16;
const max_payload_len: u8 = 16;
fn addressParts(address: u16) [2]u8 {
const msb: u8 = @truncate(address >> 8);
@@ -627,13 +613,13 @@ const HexWriter = struct {
return (sum ^ 0xFF) +% 1;
}
fn write(self: Record, file: File) File.WriteError!void {
fn write(self: Record, out: *File.Writer) !void {
const linesep = "\r\n";
// colon, (length, address, type, payload, checksum) as hex, CRLF
const BUFSIZE = 1 + (1 + 2 + 1 + MAX_PAYLOAD_LEN + 1) * 2 + linesep.len;
const BUFSIZE = 1 + (1 + 2 + 1 + max_payload_len + 1) * 2 + linesep.len;
var outbuf: [BUFSIZE]u8 = undefined;
const payload_bytes = self.getPayloadBytes();
assert(payload_bytes.len <= MAX_PAYLOAD_LEN);
assert(payload_bytes.len <= max_payload_len);
const line = try std.fmt.bufPrint(&outbuf, ":{0X:0>2}{1X:0>4}{2X:0>2}{3X}{4X:0>2}" ++ linesep, .{
@as(u8, @intCast(payload_bytes.len)),
@@ -642,38 +628,37 @@ const HexWriter = struct {
payload_bytes,
self.checksum(),
});
try file.writeAll(line);
try out.interface.writeAll(line);
}
};
pub fn writeSegment(self: *HexWriter, segment: *const BinaryElfSegment, elf_file: File) !void {
var buf: [MAX_PAYLOAD_LEN]u8 = undefined;
pub fn writeSegment(self: *HexWriter, segment: *const BinaryElfSegment, in: *File.Reader) !void {
var buf: [max_payload_len]u8 = undefined;
var bytes_read: usize = 0;
while (bytes_read < segment.fileSize) {
const row_address: u32 = @intCast(segment.physicalAddress + bytes_read);
const remaining = segment.fileSize - bytes_read;
const to_read: usize = @intCast(@min(remaining, MAX_PAYLOAD_LEN));
const did_read = try elf_file.preadAll(buf[0..to_read], segment.elfOffset + bytes_read);
if (did_read < to_read) return error.UnexpectedEOF;
const dest = buf[0..@min(remaining, max_payload_len)];
try in.seekTo(segment.elfOffset + bytes_read);
try in.interface.readSliceAll(dest);
try self.writeDataRow(row_address, dest);
try self.writeDataRow(row_address, buf[0..did_read]);
bytes_read += did_read;
bytes_read += dest.len;
}
}
fn writeDataRow(self: *HexWriter, address: u32, data: []const u8) File.WriteError!void {
fn writeDataRow(self: *HexWriter, address: u32, data: []const u8) !void {
const record = Record.Data(address, data);
if (address > 0xFFFF and (self.prev_addr == null or record.address != self.prev_addr.?)) {
try Record.Address(address).write(self.out_file);
try Record.Address(address).write(self.out);
}
try record.write(self.out_file);
try record.write(self.out);
self.prev_addr = @intCast(record.address + data.len);
}
fn writeEOF(self: HexWriter) File.WriteError!void {
try Record.EOF().write(self.out_file);
fn writeEof(self: HexWriter) !void {
try Record.EOF().write(self.out);
}
};
@@ -686,9 +671,9 @@ fn containsValidAddressRange(segments: []*BinaryElfSegment) bool {
return true;
}
fn padFile(f: File, opt_size: ?u64) !void {
fn padFile(out: *File.Writer, opt_size: ?u64) !void {
const size = opt_size orelse return;
try f.setEndPos(size);
try out.file.setEndPos(size);
}
test "HexWriter.Record.Address has correct payload and checksum" {
@@ -732,836 +717,6 @@ test "containsValidAddressRange" {
try std.testing.expect(containsValidAddressRange(&buf));
}
// -------------
// ELF to ELF stripping
const StripElfOptions = struct {
extract_to: ?[]const u8 = null,
add_debuglink: ?[]const u8 = null,
strip_all: bool = false,
strip_debug: bool = false,
only_keep_debug: bool = false,
compress_debug: bool = false,
add_section: ?AddSection,
set_section_alignment: ?SetSectionAlignment,
set_section_flags: ?SetSectionFlags,
};
fn stripElf(
allocator: Allocator,
in_file: File,
out_file: File,
elf_hdr: elf.Header,
options: StripElfOptions,
) !void {
const Filter = ElfFileHelper.Filter;
const DebugLink = ElfFileHelper.DebugLink;
const filter: Filter = filter: {
if (options.only_keep_debug) break :filter .debug;
if (options.strip_all) break :filter .program;
if (options.strip_debug) break :filter .program_and_symbols;
break :filter .all;
};
const filter_complement: ?Filter = blk: {
if (options.extract_to) |_| {
break :blk switch (filter) {
.program => .debug_and_symbols,
.debug => .program_and_symbols,
.program_and_symbols => .debug,
.debug_and_symbols => .program,
.all => fatal("zig objcopy: nothing to extract", .{}),
};
} else {
break :blk null;
}
};
const debuglink_path = path: {
if (options.add_debuglink) |path| break :path path;
if (options.extract_to) |path| break :path path;
break :path null;
};
switch (elf_hdr.is_64) {
inline else => |is_64| {
var elf_file = try ElfFile(is_64).parse(allocator, in_file, elf_hdr);
defer elf_file.deinit();
if (options.add_section) |user_section| {
for (elf_file.sections) |section| {
if (std.mem.eql(u8, section.name, user_section.section_name)) {
fatal("zig objcopy: unable to add section '{s}'. Section already exists in input", .{user_section.section_name});
}
}
}
if (filter_complement) |flt| {
// write the .dbg file and close it, so it can be read back to compute the debuglink checksum.
const path = options.extract_to.?;
const dbg_file = std.fs.cwd().createFile(path, .{}) catch |err| {
fatal("zig objcopy: unable to create '{s}': {s}", .{ path, @errorName(err) });
};
defer dbg_file.close();
try elf_file.emit(allocator, dbg_file, in_file, .{ .section_filter = flt, .compress_debug = options.compress_debug });
}
const debuglink: ?DebugLink = if (debuglink_path) |path| ElfFileHelper.createDebugLink(path) else null;
try elf_file.emit(allocator, out_file, in_file, .{
.section_filter = filter,
.debuglink = debuglink,
.compress_debug = options.compress_debug,
.add_section = options.add_section,
.set_section_alignment = options.set_section_alignment,
.set_section_flags = options.set_section_flags,
});
},
}
}
// note: this is "a minimal effort implementation"
// It doesn't support all possibile elf files: some sections type may need fixups, the program header may need fix up, ...
// It was written for a specific use case (strip debug info to a sperate file, for linux 64-bits executables built with `zig` or `zig c++` )
// It moves and reoders the sections as little as possible to avoid having to do fixups.
// TODO: support non-native endianess
fn ElfFile(comptime is_64: bool) type {
const Elf_Ehdr = if (is_64) elf.Elf64_Ehdr else elf.Elf32_Ehdr;
const Elf_Phdr = if (is_64) elf.Elf64_Phdr else elf.Elf32_Phdr;
const Elf_Shdr = if (is_64) elf.Elf64_Shdr else elf.Elf32_Shdr;
const Elf_Chdr = if (is_64) elf.Elf64_Chdr else elf.Elf32_Chdr;
const Elf_Sym = if (is_64) elf.Elf64_Sym else elf.Elf32_Sym;
const Elf_OffSize = if (is_64) elf.Elf64_Off else elf.Elf32_Off;
return struct {
raw_elf_header: Elf_Ehdr,
program_segments: []const Elf_Phdr,
sections: []const Section,
arena: std.heap.ArenaAllocator,
const SectionCategory = ElfFileHelper.SectionCategory;
const section_memory_align: std.mem.Alignment = .of(Elf_Sym); // most restrictive of what we may load in memory
const Section = struct {
section: Elf_Shdr,
name: []const u8 = "",
segment: ?*const Elf_Phdr = null, // if the section is used by a program segment (there can be more than one)
payload: ?[]align(section_memory_align.toByteUnits()) const u8 = null, // if we need the data in memory
category: SectionCategory = .none, // should the section be kept in the exe or stripped to the debug database, or both.
};
const Self = @This();
pub fn parse(gpa: Allocator, in_file: File, header: elf.Header) !Self {
var arena = std.heap.ArenaAllocator.init(gpa);
errdefer arena.deinit();
const allocator = arena.allocator();
var raw_header: Elf_Ehdr = undefined;
{
const bytes_read = try in_file.preadAll(std.mem.asBytes(&raw_header), 0);
if (bytes_read < @sizeOf(Elf_Ehdr))
return error.TRUNCATED_ELF;
}
// program header: list of segments
const program_segments = blk: {
if (@sizeOf(Elf_Phdr) != header.phentsize)
fatal("zig objcopy: unsupported ELF file, unexpected phentsize ({d})", .{header.phentsize});
const program_header = try allocator.alloc(Elf_Phdr, header.phnum);
const bytes_read = try in_file.preadAll(std.mem.sliceAsBytes(program_header), header.phoff);
if (bytes_read < @sizeOf(Elf_Phdr) * header.phnum)
return error.TRUNCATED_ELF;
break :blk program_header;
};
// section header
const sections = blk: {
if (@sizeOf(Elf_Shdr) != header.shentsize)
fatal("zig objcopy: unsupported ELF file, unexpected shentsize ({d})", .{header.shentsize});
const section_header = try allocator.alloc(Section, header.shnum);
const raw_section_header = try allocator.alloc(Elf_Shdr, header.shnum);
defer allocator.free(raw_section_header);
const bytes_read = try in_file.preadAll(std.mem.sliceAsBytes(raw_section_header), header.shoff);
if (bytes_read < @sizeOf(Elf_Phdr) * header.shnum)
return error.TRUNCATED_ELF;
for (section_header, raw_section_header) |*section, hdr| {
section.* = .{ .section = hdr };
}
break :blk section_header;
};
// load data to memory for some sections:
// string tables for access
// sections than need modifications when other sections move.
for (sections, 0..) |*section, idx| {
const need_data = switch (section.section.sh_type) {
elf.DT_VERSYM => true,
elf.SHT_SYMTAB, elf.SHT_DYNSYM => true,
else => false,
};
const need_strings = (idx == header.shstrndx);
if (need_data or need_strings) {
const buffer = try allocator.alignedAlloc(u8, section_memory_align, @intCast(section.section.sh_size));
const bytes_read = try in_file.preadAll(buffer, section.section.sh_offset);
if (bytes_read != section.section.sh_size) return error.TRUNCATED_ELF;
section.payload = buffer;
}
}
// fill-in sections info:
// resolve the name
// find if a program segment uses the section
// categorize sections usage (used by program segments, debug datadase, common metadata, symbol table)
for (sections) |*section| {
section.segment = for (program_segments) |*seg| {
if (sectionWithinSegment(section.section, seg.*)) break seg;
} else null;
if (section.section.sh_name != 0 and header.shstrndx != elf.SHN_UNDEF)
section.name = std.mem.span(@as([*:0]const u8, @ptrCast(&sections[header.shstrndx].payload.?[section.section.sh_name])));
const category_from_program: SectionCategory = if (section.segment != null) .exe else .debug;
section.category = switch (section.section.sh_type) {
elf.SHT_NOTE => .common,
elf.SHT_SYMTAB => .symbols, // "strip all" vs "strip only debug"
elf.SHT_DYNSYM => .exe,
elf.SHT_PROGBITS => cat: {
if (std.mem.eql(u8, section.name, ".comment")) break :cat .exe;
if (std.mem.eql(u8, section.name, ".gnu_debuglink")) break :cat .none;
break :cat category_from_program;
},
elf.SHT_LOPROC...elf.SHT_HIPROC => .common, // don't strip unknown sections
elf.SHT_LOUSER...elf.SHT_HIUSER => .common, // don't strip unknown sections
else => category_from_program,
};
}
sections[0].category = .common; // mandatory null section
if (header.shstrndx != elf.SHN_UNDEF)
sections[header.shstrndx].category = .common; // string table for the headers
// recursively propagate section categories to their linked sections, so that they are kept together
var dirty: u1 = 1;
while (dirty != 0) {
dirty = 0;
for (sections) |*section| {
if (section.section.sh_link != elf.SHN_UNDEF)
dirty |= ElfFileHelper.propagateCategory(&sections[section.section.sh_link].category, section.category);
if ((section.section.sh_flags & elf.SHF_INFO_LINK) != 0 and section.section.sh_info != elf.SHN_UNDEF)
dirty |= ElfFileHelper.propagateCategory(&sections[section.section.sh_info].category, section.category);
}
}
return Self{
.arena = arena,
.raw_elf_header = raw_header,
.program_segments = program_segments,
.sections = sections,
};
}
pub fn deinit(self: *Self) void {
self.arena.deinit();
}
const Filter = ElfFileHelper.Filter;
const DebugLink = ElfFileHelper.DebugLink;
const EmitElfOptions = struct {
section_filter: Filter = .all,
debuglink: ?DebugLink = null,
compress_debug: bool = false,
add_section: ?AddSection = null,
set_section_alignment: ?SetSectionAlignment = null,
set_section_flags: ?SetSectionFlags = null,
};
fn emit(self: *const Self, gpa: Allocator, out_file: File, in_file: File, options: EmitElfOptions) !void {
var arena = std.heap.ArenaAllocator.init(gpa);
defer arena.deinit();
const allocator = arena.allocator();
// when emitting the stripped exe:
// - unused sections are removed
// when emitting the debug file:
// - all sections are kept, but some are emptied and their types is changed to SHT_NOBITS
// the program header is kept unchanged. (`strip` does update it, but `eu-strip` does not, and it still works)
const Update = struct {
action: ElfFileHelper.Action,
// remap the indexs after omitting the filtered sections
remap_idx: u16,
// optionally overrides the payload from the source file
payload: ?[]align(section_memory_align.toByteUnits()) const u8 = null,
section: ?Elf_Shdr = null,
};
const sections_update = try allocator.alloc(Update, self.sections.len);
const new_shnum = blk: {
var next_idx: u16 = 0;
for (self.sections, sections_update) |section, *update| {
const action = ElfFileHelper.selectAction(section.category, options.section_filter);
const remap_idx = idx: {
if (action == .strip) break :idx elf.SHN_UNDEF;
next_idx += 1;
break :idx next_idx - 1;
};
update.* = Update{ .action = action, .remap_idx = remap_idx };
}
if (options.debuglink != null)
next_idx += 1;
if (options.add_section != null) {
next_idx += 1;
}
break :blk next_idx;
};
// add a ".gnu_debuglink" to the string table if needed
const debuglink_name: u32 = blk: {
if (options.debuglink == null) break :blk elf.SHN_UNDEF;
if (self.raw_elf_header.e_shstrndx == elf.SHN_UNDEF)
fatal("zig objcopy: no strtab, cannot add the debuglink section", .{}); // TODO add the section if needed?
const strtab = &self.sections[self.raw_elf_header.e_shstrndx];
const update = &sections_update[self.raw_elf_header.e_shstrndx];
const name: []const u8 = ".gnu_debuglink";
const new_offset: u32 = @intCast(strtab.payload.?.len);
const buf = try allocator.alignedAlloc(u8, section_memory_align, new_offset + name.len + 1);
@memcpy(buf[0..new_offset], strtab.payload.?);
@memcpy(buf[new_offset..][0..name.len], name);
buf[new_offset + name.len] = 0;
assert(update.action == .keep);
update.payload = buf;
break :blk new_offset;
};
// add user section to the string table if needed
const user_section_name: u32 = blk: {
if (options.add_section == null) break :blk elf.SHN_UNDEF;
if (self.raw_elf_header.e_shstrndx == elf.SHN_UNDEF)
fatal("zig objcopy: no strtab, cannot add the user section", .{}); // TODO add the section if needed?
const strtab = &self.sections[self.raw_elf_header.e_shstrndx];
const update = &sections_update[self.raw_elf_header.e_shstrndx];
const name = options.add_section.?.section_name;
const new_offset: u32 = @intCast(strtab.payload.?.len);
const buf = try allocator.alignedAlloc(u8, section_memory_align, new_offset + name.len + 1);
@memcpy(buf[0..new_offset], strtab.payload.?);
@memcpy(buf[new_offset..][0..name.len], name);
buf[new_offset + name.len] = 0;
assert(update.action == .keep);
update.payload = buf;
break :blk new_offset;
};
// maybe compress .debug sections
if (options.compress_debug) {
for (self.sections[1..], sections_update[1..]) |section, *update| {
if (update.action != .keep) continue;
if (!std.mem.startsWith(u8, section.name, ".debug_")) continue;
if ((section.section.sh_flags & elf.SHF_COMPRESSED) != 0) continue; // already compressed
const chdr = Elf_Chdr{
.ch_type = elf.COMPRESS.ZLIB,
.ch_size = section.section.sh_size,
.ch_addralign = section.section.sh_addralign,
};
const compressed_payload = try ElfFileHelper.tryCompressSection(allocator, in_file, section.section.sh_offset, section.section.sh_size, std.mem.asBytes(&chdr));
if (compressed_payload) |payload| {
update.payload = payload;
update.section = section.section;
update.section.?.sh_addralign = @alignOf(Elf_Chdr);
update.section.?.sh_size = @intCast(payload.len);
update.section.?.sh_flags |= elf.SHF_COMPRESSED;
}
}
}
var cmdbuf = std.ArrayList(ElfFileHelper.WriteCmd).init(allocator);
defer cmdbuf.deinit();
try cmdbuf.ensureUnusedCapacity(3 + new_shnum);
var eof_offset: Elf_OffSize = 0; // track the end of the data written so far.
// build the updated headers
// nb: updated_elf_header will be updated before the actual write
var updated_elf_header = self.raw_elf_header;
if (updated_elf_header.e_shstrndx != elf.SHN_UNDEF)
updated_elf_header.e_shstrndx = sections_update[updated_elf_header.e_shstrndx].remap_idx;
cmdbuf.appendAssumeCapacity(.{ .write_data = .{ .data = std.mem.asBytes(&updated_elf_header), .out_offset = 0 } });
eof_offset = @sizeOf(Elf_Ehdr);
// program header as-is.
// nb: for only-debug files, removing it appears to work, but is invalid by ELF specifcation.
{
assert(updated_elf_header.e_phoff == @sizeOf(Elf_Ehdr));
const data = std.mem.sliceAsBytes(self.program_segments);
assert(data.len == @as(usize, updated_elf_header.e_phentsize) * updated_elf_header.e_phnum);
cmdbuf.appendAssumeCapacity(.{ .write_data = .{ .data = data, .out_offset = updated_elf_header.e_phoff } });
eof_offset = updated_elf_header.e_phoff + @as(Elf_OffSize, @intCast(data.len));
}
// update sections and queue payload writes
const updated_section_header = blk: {
const dest_sections = try allocator.alloc(Elf_Shdr, new_shnum);
{
// the ELF format doesn't specify the order for all sections.
// this code only supports when they are in increasing file order.
var offset: u64 = eof_offset;
for (self.sections[1..]) |section| {
if (section.section.sh_type == elf.SHT_NOBITS)
continue;
if (section.section.sh_offset < offset) {
fatal("zig objcopy: unsupported ELF file", .{});
}
offset = section.section.sh_offset;
}
}
dest_sections[0] = self.sections[0].section;
var dest_section_idx: u32 = 1;
for (self.sections[1..], sections_update[1..]) |section, update| {
if (update.action == .strip) continue;
assert(update.remap_idx == dest_section_idx);
const src = if (update.section) |*s| s else &section.section;
const dest = &dest_sections[dest_section_idx];
const payload = if (update.payload) |data| data else section.payload;
dest_section_idx += 1;
dest.* = src.*;
if (src.sh_link != elf.SHN_UNDEF)
dest.sh_link = sections_update[src.sh_link].remap_idx;
if ((src.sh_flags & elf.SHF_INFO_LINK) != 0 and src.sh_info != elf.SHN_UNDEF)
dest.sh_info = sections_update[src.sh_info].remap_idx;
if (payload) |data|
dest.sh_size = @intCast(data.len);
const addralign = if (src.sh_addralign == 0 or dest.sh_type == elf.SHT_NOBITS) 1 else src.sh_addralign;
dest.sh_offset = std.mem.alignForward(Elf_OffSize, eof_offset, addralign);
if (src.sh_offset != dest.sh_offset and section.segment != null and update.action != .empty and dest.sh_type != elf.SHT_NOTE and dest.sh_type != elf.SHT_NOBITS) {
if (src.sh_offset > dest.sh_offset) {
dest.sh_offset = src.sh_offset; // add padding to avoid modifing the program segments
} else {
fatal("zig objcopy: cannot adjust program segments", .{});
}
}
assert(dest.sh_addr % addralign == dest.sh_offset % addralign);
if (update.action == .empty)
dest.sh_type = elf.SHT_NOBITS;
if (dest.sh_type != elf.SHT_NOBITS) {
if (payload) |src_data| {
// update sections payload and write
const dest_data = switch (src.sh_type) {
elf.DT_VERSYM => dst_data: {
const data = try allocator.alignedAlloc(u8, section_memory_align, src_data.len);
@memcpy(data, src_data);
const defs = @as([*]elf.Verdef, @ptrCast(data))[0 .. @as(usize, @intCast(src.sh_size)) / @sizeOf(elf.Verdef)];
for (defs) |*def| switch (def.ndx) {
.LOCAL, .GLOBAL => {},
else => def.ndx = @enumFromInt(sections_update[src.sh_info].remap_idx),
};
break :dst_data data;
},
elf.SHT_SYMTAB, elf.SHT_DYNSYM => dst_data: {
const data = try allocator.alignedAlloc(u8, section_memory_align, src_data.len);
@memcpy(data, src_data);
const syms = @as([*]Elf_Sym, @ptrCast(data))[0 .. @as(usize, @intCast(src.sh_size)) / @sizeOf(Elf_Sym)];
for (syms) |*sym| {
if (sym.st_shndx != elf.SHN_UNDEF and sym.st_shndx < elf.SHN_LORESERVE)
sym.st_shndx = sections_update[sym.st_shndx].remap_idx;
}
break :dst_data data;
},
else => src_data,
};
assert(dest_data.len == dest.sh_size);
cmdbuf.appendAssumeCapacity(.{ .write_data = .{ .data = dest_data, .out_offset = dest.sh_offset } });
eof_offset = dest.sh_offset + dest.sh_size;
} else {
// direct contents copy
cmdbuf.appendAssumeCapacity(.{ .copy_range = .{ .in_offset = src.sh_offset, .len = dest.sh_size, .out_offset = dest.sh_offset } });
eof_offset = dest.sh_offset + dest.sh_size;
}
} else {
// account for alignment padding even in empty sections to keep logical section order
eof_offset = dest.sh_offset;
}
}
// add a ".gnu_debuglink" section
if (options.debuglink) |link| {
const payload = payload: {
const crc_offset = std.mem.alignForward(usize, link.name.len + 1, 4);
const buf = try allocator.alignedAlloc(u8, .@"4", crc_offset + 4);
@memcpy(buf[0..link.name.len], link.name);
@memset(buf[link.name.len..crc_offset], 0);
@memcpy(buf[crc_offset..], std.mem.asBytes(&link.crc32));
break :payload buf;
};
dest_sections[dest_section_idx] = Elf_Shdr{
.sh_name = debuglink_name,
.sh_type = elf.SHT_PROGBITS,
.sh_flags = 0,
.sh_addr = 0,
.sh_offset = eof_offset,
.sh_size = @intCast(payload.len),
.sh_link = elf.SHN_UNDEF,
.sh_info = elf.SHN_UNDEF,
.sh_addralign = 4,
.sh_entsize = 0,
};
dest_section_idx += 1;
cmdbuf.appendAssumeCapacity(.{ .write_data = .{ .data = payload, .out_offset = eof_offset } });
eof_offset += @as(Elf_OffSize, @intCast(payload.len));
}
// --add-section
if (options.add_section) |add_section| {
var section_file = fs.cwd().openFile(add_section.file_path, .{}) catch |err|
fatal("unable to open '{s}': {s}", .{ add_section.file_path, @errorName(err) });
defer section_file.close();
const payload = try section_file.readToEndAlloc(arena.allocator(), std.math.maxInt(usize));
dest_sections[dest_section_idx] = Elf_Shdr{
.sh_name = user_section_name,
.sh_type = elf.SHT_PROGBITS,
.sh_flags = 0,
.sh_addr = 0,
.sh_offset = eof_offset,
.sh_size = @intCast(payload.len),
.sh_link = elf.SHN_UNDEF,
.sh_info = elf.SHN_UNDEF,
.sh_addralign = 4,
.sh_entsize = 0,
};
dest_section_idx += 1;
cmdbuf.appendAssumeCapacity(.{ .write_data = .{ .data = payload, .out_offset = eof_offset } });
eof_offset += @as(Elf_OffSize, @intCast(payload.len));
}
assert(dest_section_idx == new_shnum);
break :blk dest_sections;
};
// --set-section-alignment: overwrite alignment
if (options.set_section_alignment) |set_align| {
if (self.raw_elf_header.e_shstrndx == elf.SHN_UNDEF)
fatal("zig objcopy: no strtab, cannot add the user section", .{}); // TODO add the section if needed?
const strtab = &sections_update[self.raw_elf_header.e_shstrndx];
for (updated_section_header) |*section| {
const section_name = std.mem.span(@as([*:0]const u8, @ptrCast(&strtab.payload.?[section.sh_name])));
if (std.mem.eql(u8, section_name, set_align.section_name)) {
section.sh_addralign = set_align.alignment;
break;
}
} else std.log.warn("Skipping --set-section-alignment. Section '{s}' not found", .{set_align.section_name});
}
// --set-section-flags: overwrite flags
if (options.set_section_flags) |set_flags| {
if (self.raw_elf_header.e_shstrndx == elf.SHN_UNDEF)
fatal("zig objcopy: no strtab, cannot add the user section", .{}); // TODO add the section if needed?
const strtab = &sections_update[self.raw_elf_header.e_shstrndx];
for (updated_section_header) |*section| {
const section_name = std.mem.span(@as([*:0]const u8, @ptrCast(&strtab.payload.?[section.sh_name])));
if (std.mem.eql(u8, section_name, set_flags.section_name)) {
section.sh_flags = std.elf.SHF_WRITE; // default is writable cleared by "readonly"
const f = set_flags.flags;
// Supporting a subset of GNU and LLVM objcopy for ELF only
// GNU:
// alloc: add SHF_ALLOC
// contents: if section is SHT_NOBITS, set SHT_PROGBITS, otherwise do nothing
// load: if section is SHT_NOBITS, set SHT_PROGBITS, otherwise do nothing (same as contents)
// noload: not ELF relevant
// readonly: clear default SHF_WRITE flag
// code: add SHF_EXECINSTR
// data: not ELF relevant
// rom: ignored
// exclude: add SHF_EXCLUDE
// share: not ELF relevant
// debug: not ELF relevant
// large: add SHF_X86_64_LARGE. Fatal error if target is not x86_64
if (f.alloc) section.sh_flags |= std.elf.SHF_ALLOC;
if (f.contents or f.load) {
if (section.sh_type == std.elf.SHT_NOBITS) section.sh_type = std.elf.SHT_PROGBITS;
}
if (f.readonly) section.sh_flags &= ~@as(@TypeOf(section.sh_type), std.elf.SHF_WRITE);
if (f.code) section.sh_flags |= std.elf.SHF_EXECINSTR;
if (f.exclude) section.sh_flags |= std.elf.SHF_EXCLUDE;
if (f.large) {
if (updated_elf_header.e_machine != std.elf.EM.X86_64)
fatal("zig objcopy: 'large' section flag is only supported on x86_64 targets", .{});
section.sh_flags |= std.elf.SHF_X86_64_LARGE;
}
// LLVM:
// merge: add SHF_MERGE
// strings: add SHF_STRINGS
if (f.merge) section.sh_flags |= std.elf.SHF_MERGE;
if (f.strings) section.sh_flags |= std.elf.SHF_STRINGS;
break;
}
} else std.log.warn("Skipping --set-section-flags. Section '{s}' not found", .{set_flags.section_name});
}
// write the section header at the tail
{
const offset = std.mem.alignForward(Elf_OffSize, eof_offset, @alignOf(Elf_Shdr));
const data = std.mem.sliceAsBytes(updated_section_header);
assert(data.len == @as(usize, updated_elf_header.e_shentsize) * new_shnum);
updated_elf_header.e_shoff = offset;
updated_elf_header.e_shnum = new_shnum;
cmdbuf.appendAssumeCapacity(.{ .write_data = .{ .data = data, .out_offset = updated_elf_header.e_shoff } });
}
try ElfFileHelper.write(allocator, out_file, in_file, cmdbuf.items);
}
fn sectionWithinSegment(section: Elf_Shdr, segment: Elf_Phdr) bool {
const file_size = if (section.sh_type == elf.SHT_NOBITS) 0 else section.sh_size;
return segment.p_offset <= section.sh_offset and (segment.p_offset + segment.p_filesz) >= (section.sh_offset + file_size);
}
};
}
const ElfFileHelper = struct {
const DebugLink = struct { name: []const u8, crc32: u32 };
const Filter = enum { all, program, debug, program_and_symbols, debug_and_symbols };
const SectionCategory = enum { common, exe, debug, symbols, none };
fn propagateCategory(cur: *SectionCategory, new: SectionCategory) u1 {
const cat: SectionCategory = switch (cur.*) {
.none => new,
.common => .common,
.debug => switch (new) {
.none, .debug => .debug,
else => new,
},
.exe => switch (new) {
.common => .common,
.none, .debug, .exe => .exe,
.symbols => .exe,
},
.symbols => switch (new) {
.none, .common, .debug, .exe => unreachable,
.symbols => .symbols,
},
};
if (cur.* != cat) {
cur.* = cat;
return 1;
} else {
return 0;
}
}
const Action = enum { keep, strip, empty };
fn selectAction(category: SectionCategory, filter: Filter) Action {
if (category == .none) return .strip;
return switch (filter) {
.all => switch (category) {
.none => .strip,
else => .keep,
},
.program => switch (category) {
.common, .exe => .keep,
else => .strip,
},
.program_and_symbols => switch (category) {
.common, .exe, .symbols => .keep,
else => .strip,
},
.debug => switch (category) {
.exe, .symbols => .empty,
.none => .strip,
else => .keep,
},
.debug_and_symbols => switch (category) {
.exe => .empty,
.none => .strip,
else => .keep,
},
};
}
const WriteCmd = union(enum) {
copy_range: struct { in_offset: u64, len: u64, out_offset: u64 },
write_data: struct { data: []const u8, out_offset: u64 },
};
fn write(allocator: Allocator, out_file: File, in_file: File, cmds: []const WriteCmd) !void {
// consolidate holes between writes:
// by coping original padding data from in_file (by fusing contiguous ranges)
// by writing zeroes otherwise
const zeroes = [1]u8{0} ** 4096;
var consolidated = std.ArrayList(WriteCmd).init(allocator);
defer consolidated.deinit();
try consolidated.ensureUnusedCapacity(cmds.len * 2);
var offset: u64 = 0;
var fused_cmd: ?WriteCmd = null;
for (cmds) |cmd| {
switch (cmd) {
.write_data => |data| {
assert(data.out_offset >= offset);
if (fused_cmd) |prev| {
consolidated.appendAssumeCapacity(prev);
fused_cmd = null;
}
if (data.out_offset > offset) {
consolidated.appendAssumeCapacity(.{ .write_data = .{ .data = zeroes[0..@intCast(data.out_offset - offset)], .out_offset = offset } });
}
consolidated.appendAssumeCapacity(cmd);
offset = data.out_offset + data.data.len;
},
.copy_range => |range| {
assert(range.out_offset >= offset);
if (fused_cmd) |prev| {
if (range.in_offset >= prev.copy_range.in_offset + prev.copy_range.len and (range.out_offset - prev.copy_range.out_offset == range.in_offset - prev.copy_range.in_offset)) {
fused_cmd = .{ .copy_range = .{
.in_offset = prev.copy_range.in_offset,
.out_offset = prev.copy_range.out_offset,
.len = (range.out_offset + range.len) - prev.copy_range.out_offset,
} };
} else {
consolidated.appendAssumeCapacity(prev);
if (range.out_offset > offset) {
consolidated.appendAssumeCapacity(.{ .write_data = .{ .data = zeroes[0..@intCast(range.out_offset - offset)], .out_offset = offset } });
}
fused_cmd = cmd;
}
} else {
fused_cmd = cmd;
}
offset = range.out_offset + range.len;
},
}
}
if (fused_cmd) |cmd| {
consolidated.appendAssumeCapacity(cmd);
}
// write the output file
for (consolidated.items) |cmd| {
switch (cmd) {
.write_data => |data| {
var iovec = [_]std.posix.iovec_const{.{ .base = data.data.ptr, .len = data.data.len }};
try out_file.pwritevAll(&iovec, data.out_offset);
},
.copy_range => |range| {
const copied_bytes = try in_file.copyRangeAll(range.in_offset, out_file, range.out_offset, range.len);
if (copied_bytes < range.len) return error.TRUNCATED_ELF;
},
}
}
}
fn tryCompressSection(allocator: Allocator, in_file: File, offset: u64, size: u64, prefix: []const u8) !?[]align(8) const u8 {
if (size < prefix.len) return null;
try in_file.seekTo(offset);
var section_reader = std.io.limitedReader(in_file.deprecatedReader(), size);
// allocate as large as decompressed data. if the compression doesn't fit, keep the data uncompressed.
const compressed_data = try allocator.alignedAlloc(u8, .@"8", @intCast(size));
var compressed_stream = std.io.fixedBufferStream(compressed_data);
try compressed_stream.writer().writeAll(prefix);
{
var compressor = try std.compress.zlib.compressor(compressed_stream.writer(), .{});
var buf: [8000]u8 = undefined;
while (true) {
const bytes_read = try section_reader.read(&buf);
if (bytes_read == 0) break;
const bytes_written = compressor.write(buf[0..bytes_read]) catch |err| switch (err) {
error.NoSpaceLeft => {
allocator.free(compressed_data);
return null;
},
else => return err,
};
std.debug.assert(bytes_written == bytes_read);
}
compressor.finish() catch |err| switch (err) {
error.NoSpaceLeft => {
allocator.free(compressed_data);
return null;
},
else => return err,
};
}
const compressed_len: usize = @intCast(compressed_stream.getPos() catch unreachable);
const data = allocator.realloc(compressed_data, compressed_len) catch compressed_data;
return data[0..compressed_len];
}
fn createDebugLink(path: []const u8) DebugLink {
const file = std.fs.cwd().openFile(path, .{}) catch |err| {
fatal("zig objcopy: could not open `{s}`: {s}\n", .{ path, @errorName(err) });
};
defer file.close();
const crc = ElfFileHelper.computeFileCrc(file) catch |err| {
fatal("zig objcopy: could not read `{s}`: {s}\n", .{ path, @errorName(err) });
};
return .{
.name = std.fs.path.basename(path),
.crc32 = crc,
};
}
fn computeFileCrc(file: File) !u32 {
var buf: [8000]u8 = undefined;
try file.seekTo(0);
var hasher = std.hash.Crc32.init();
while (true) {
const bytes_read = try file.read(&buf);
if (bytes_read == 0) break;
hasher.update(buf[0..bytes_read]);
}
return hasher.final();
}
};
const SectionFlags = packed struct {
alloc: bool = false,
contents: bool = false,
+25 -20
View File
@@ -60,7 +60,9 @@ pub fn main() !void {
const should_open_browser = force_open_browser orelse (listen_port == 0);
const address = std.net.Address.parseIp("127.0.0.1", listen_port) catch unreachable;
var http_server = try address.listen(.{});
var http_server = try address.listen(.{
.reuse_address = true,
});
const port = http_server.listen_address.in.getPort();
const url_with_newline = try std.fmt.allocPrint(arena, "http://127.0.0.1:{d}/\n", .{port});
std.fs.File.stdout().writeAll(url_with_newline) catch {};
@@ -189,7 +191,11 @@ fn serveSourcesTar(request: *std.http.Server.Request, context: *Context) !void {
var walker = try std_dir.walk(gpa);
defer walker.deinit();
var archiver = std.tar.writer(response.writer());
var adapter_buffer: [500]u8 = undefined;
var response_writer = response.writer().adaptToNewApi();
response_writer.new_interface.buffer = &adapter_buffer;
var archiver: std.tar.Writer = .{ .underlying_writer = &response_writer.new_interface };
archiver.prefix = "std";
while (try walker.next()) |entry| {
@@ -204,7 +210,13 @@ fn serveSourcesTar(request: *std.http.Server.Request, context: *Context) !void {
}
var file = try entry.dir.openFile(entry.basename, .{});
defer file.close();
try archiver.writeFile(entry.path, file);
const stat = try file.stat();
var file_reader: std.fs.File.Reader = .{
.file = file,
.interface = std.fs.File.Reader.initInterface(&.{}),
.size = stat.size,
};
try archiver.writeFile(entry.path, &file_reader, stat.mtime);
}
{
@@ -217,6 +229,7 @@ fn serveSourcesTar(request: *std.http.Server.Request, context: *Context) !void {
// intentionally omitting the pointless trailer
//try archiver.finish();
try response_writer.new_interface.flush();
try response.end();
}
@@ -307,21 +320,17 @@ fn buildWasmBinary(
try sendMessage(child.stdin.?, .update);
try sendMessage(child.stdin.?, .exit);
const Header = std.zig.Server.Message.Header;
var result: ?Cache.Path = null;
var result_error_bundle = std.zig.ErrorBundle.empty;
const stdout = poller.fifo(.stdout);
const stdout = poller.reader(.stdout);
poll: while (true) {
while (stdout.readableLength() < @sizeOf(Header)) {
if (!(try poller.poll())) break :poll;
}
const header = stdout.reader().readStruct(Header) catch unreachable;
while (stdout.readableLength() < header.bytes_len) {
if (!(try poller.poll())) break :poll;
}
const body = stdout.readableSliceOfLen(header.bytes_len);
const Header = std.zig.Server.Message.Header;
while (stdout.buffered().len < @sizeOf(Header)) if (!try poller.poll()) break :poll;
const header = stdout.takeStruct(Header, .little) catch unreachable;
while (stdout.buffered().len < header.bytes_len) if (!try poller.poll()) break :poll;
const body = stdout.take(header.bytes_len) catch unreachable;
switch (header.tag) {
.zig_version => {
@@ -361,15 +370,11 @@ fn buildWasmBinary(
},
else => {}, // ignore other messages
}
stdout.discard(body.len);
}
const stderr = poller.fifo(.stderr);
if (stderr.readableLength() > 0) {
const owned_stderr = try stderr.toOwnedSlice();
defer gpa.free(owned_stderr);
std.debug.print("{s}", .{owned_stderr});
const stderr = poller.reader(.stderr);
if (stderr.bufferedLen() > 0) {
std.debug.print("{s}", .{stderr.buffered()});
}
// Send EOF to stdin.
+19 -20
View File
@@ -16,6 +16,7 @@ var stdin_buffer: [4096]u8 = undefined;
var stdout_buffer: [4096]u8 = undefined;
const crippled = switch (builtin.zig_backend) {
.stage2_aarch64,
.stage2_powerpc,
.stage2_riscv64,
=> true,
@@ -287,13 +288,14 @@ pub fn log(
/// work-in-progress backends can handle it.
pub fn mainSimple() anyerror!void {
@disableInstrumentation();
// is the backend capable of printing to stderr?
const enable_print = switch (builtin.zig_backend) {
// is the backend capable of calling `std.fs.File.writeAll`?
const enable_write = switch (builtin.zig_backend) {
.stage2_aarch64, .stage2_riscv64 => true,
else => false,
};
// is the backend capable of using std.fmt.format to print a summary at the end?
const print_summary = switch (builtin.zig_backend) {
.stage2_riscv64 => true,
// is the backend capable of calling `std.Io.Writer.print`?
const enable_print = switch (builtin.zig_backend) {
.stage2_aarch64, .stage2_riscv64 => true,
else => false,
};
@@ -302,34 +304,31 @@ pub fn mainSimple() anyerror!void {
var failed: u64 = 0;
// we don't want to bring in File and Writer if the backend doesn't support it
const stderr = if (comptime enable_print) std.fs.File.stderr() else {};
const stdout = if (enable_write) std.fs.File.stdout() else {};
for (builtin.test_functions) |test_fn| {
if (enable_write) {
stdout.writeAll(test_fn.name) catch {};
stdout.writeAll("... ") catch {};
}
if (test_fn.func()) |_| {
if (enable_print) {
stderr.writeAll(test_fn.name) catch {};
stderr.writeAll("... ") catch {};
stderr.writeAll("PASS\n") catch {};
}
if (enable_write) stdout.writeAll("PASS\n") catch {};
} else |err| {
if (enable_print) {
stderr.writeAll(test_fn.name) catch {};
stderr.writeAll("... ") catch {};
}
if (err != error.SkipZigTest) {
if (enable_print) stderr.writeAll("FAIL\n") catch {};
if (enable_write) stdout.writeAll("FAIL\n") catch {};
failed += 1;
if (!enable_print) return err;
if (!enable_write) return err;
continue;
}
if (enable_print) stderr.writeAll("SKIP\n") catch {};
if (enable_write) stdout.writeAll("SKIP\n") catch {};
skipped += 1;
continue;
}
passed += 1;
}
if (enable_print and print_summary) {
stderr.deprecatedWriter().print("{} passed, {} skipped, {} failed\n", .{ passed, skipped, failed }) catch {};
if (enable_print) {
var stdout_writer = stdout.writer(&.{});
stdout_writer.interface.print("{} passed, {} skipped, {} failed\n", .{ passed, skipped, failed }) catch {};
}
if (failed != 0) std.process.exit(1);
}
+2 -2
View File
@@ -249,12 +249,12 @@ comptime {
_ = @import("compiler_rt/hexagon.zig");
if (@import("builtin").object_format != .c) {
_ = @import("compiler_rt/atomics.zig");
if (builtin.zig_backend != .stage2_aarch64) _ = @import("compiler_rt/atomics.zig");
_ = @import("compiler_rt/stack_probe.zig");
// macOS has these functions inside libSystem.
if (builtin.cpu.arch.isAARCH64() and !builtin.os.tag.isDarwin()) {
_ = @import("compiler_rt/aarch64_outline_atomics.zig");
if (builtin.zig_backend != .stage2_aarch64) _ = @import("compiler_rt/aarch64_outline_atomics.zig");
}
_ = @import("compiler_rt/memcpy.zig");
+1 -3
View File
@@ -1,6 +1,4 @@
const std = @import("std");
const builtin = @import("builtin");
const is_test = builtin.is_test;
const common = @import("./common.zig");
pub const panic = @import("common.zig").panic;
@@ -16,7 +14,7 @@ comptime {
// - addoXi4_generic as default
inline fn addoXi4_generic(comptime ST: type, a: ST, b: ST, overflow: *c_int) ST {
@setRuntimeSafety(builtin.is_test);
@setRuntimeSafety(common.test_safety);
overflow.* = 0;
const sum: ST = a +% b;
// Hackers Delight: section Overflow Detection, subsection Signed Add/Subtract
+3
View File
@@ -1,4 +1,5 @@
const addv = @import("addo.zig");
const builtin = @import("builtin");
const std = @import("std");
const testing = std.testing;
const math = std.math;
@@ -23,6 +24,8 @@ fn simple_addoti4(a: i128, b: i128, overflow: *c_int) i128 {
}
test "addoti4" {
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
const min: i128 = math.minInt(i128);
const max: i128 = math.maxInt(i128);
var i: i128 = 1;
+4 -10
View File
@@ -97,8 +97,7 @@ fn clear_cache(start: usize, end: usize) callconv(.c) void {
.nbytes = end - start,
.whichcache = 3, // ICACHE | DCACHE
};
asm volatile (
\\ syscall
asm volatile ("syscall"
:
: [_] "{$2}" (165), // nr = SYS_sysarch
[_] "{$4}" (0), // op = MIPS_CACHEFLUSH
@@ -116,11 +115,8 @@ fn clear_cache(start: usize, end: usize) callconv(.c) void {
} else if (arm64 and !apple) {
// Get Cache Type Info.
// TODO memoize this?
var ctr_el0: u64 = 0;
asm volatile (
\\mrs %[x], ctr_el0
\\
: [x] "=r" (ctr_el0),
const ctr_el0 = asm volatile ("mrs %[ctr_el0], ctr_el0"
: [ctr_el0] "=r" (-> u64),
);
// The DC and IC instructions must use 64-bit registers so we don't use
// uintptr_t in case this runs in an IPL32 environment.
@@ -187,9 +183,7 @@ fn clear_cache(start: usize, end: usize) callconv(.c) void {
exportIt();
} else if (os == .linux and loongarch) {
// See: https://github.com/llvm/llvm-project/blob/cf54cae26b65fc3201eff7200ffb9b0c9e8f9a13/compiler-rt/lib/builtins/clear_cache.c#L94-L95
asm volatile (
\\ ibar 0
);
asm volatile ("ibar 0");
exportIt();
}
-1
View File
@@ -1,6 +1,5 @@
const std = @import("std");
const builtin = @import("builtin");
const is_test = builtin.is_test;
const common = @import("common.zig");
pub const panic = common.panic;
+6 -1
View File
@@ -102,9 +102,14 @@ pub const gnu_f16_abi = switch (builtin.cpu.arch) {
pub const want_sparc_abi = builtin.cpu.arch.isSPARC();
pub const test_safety = switch (builtin.zig_backend) {
.stage2_aarch64 => false,
else => builtin.is_test,
};
// Avoid dragging in the runtime safety mechanisms into this .o file, unless
// we're trying to test compiler-rt.
pub const panic = if (builtin.is_test) std.debug.FullPanic(std.debug.defaultPanic) else std.debug.no_panic;
pub const panic = if (test_safety) std.debug.FullPanic(std.debug.defaultPanic) else std.debug.no_panic;
/// This seems to mostly correspond to `clang::TargetInfo::HasFloat16`.
pub fn F16T(comptime OtherType: type) type {
-1
View File
@@ -4,7 +4,6 @@
const std = @import("std");
const builtin = @import("builtin");
const is_test = builtin.is_test;
const __eqdf2 = @import("./cmpdf2.zig").__eqdf2;
const __ledf2 = @import("./cmpdf2.zig").__ledf2;
-1
View File
@@ -4,7 +4,6 @@
const std = @import("std");
const builtin = @import("builtin");
const is_test = builtin.is_test;
const __eqsf2 = @import("./cmpsf2.zig").__eqsf2;
const __lesf2 = @import("./cmpsf2.zig").__lesf2;
-1
View File
@@ -1,6 +1,5 @@
const std = @import("std");
const builtin = @import("builtin");
const is_test = builtin.is_test;
const common = @import("common.zig");
pub const panic = common.panic;
-1
View File
@@ -5,7 +5,6 @@
const std = @import("std");
const builtin = @import("builtin");
const arch = builtin.cpu.arch;
const is_test = builtin.is_test;
const common = @import("common.zig");
const normalize = common.normalize;
+2 -2
View File
@@ -34,7 +34,7 @@ fn divmod(q: ?[]u32, r: ?[]u32, u: []u32, v: []u32) !void {
}
pub fn __divei4(q_p: [*]u8, u_p: [*]u8, v_p: [*]u8, bits: usize) callconv(.c) void {
@setRuntimeSafety(builtin.is_test);
@setRuntimeSafety(common.test_safety);
const byte_size = std.zig.target.intByteSize(&builtin.target, @intCast(bits));
const q: []u32 = @ptrCast(@alignCast(q_p[0..byte_size]));
const u: []u32 = @ptrCast(@alignCast(u_p[0..byte_size]));
@@ -43,7 +43,7 @@ pub fn __divei4(q_p: [*]u8, u_p: [*]u8, v_p: [*]u8, bits: usize) callconv(.c) vo
}
pub fn __modei4(r_p: [*]u8, u_p: [*]u8, v_p: [*]u8, bits: usize) callconv(.c) void {
@setRuntimeSafety(builtin.is_test);
@setRuntimeSafety(common.test_safety);
const byte_size = std.zig.target.intByteSize(&builtin.target, @intCast(bits));
const r: []u32 = @ptrCast(@alignCast(r_p[0..byte_size]));
const u: []u32 = @ptrCast(@alignCast(u_p[0..byte_size]));
-1
View File
@@ -1,4 +1,3 @@
const is_test = @import("builtin").is_test;
const std = @import("std");
const math = std.math;
const testing = std.testing;
-1
View File
@@ -6,7 +6,6 @@ const testing = std.testing;
const maxInt = std.math.maxInt;
const minInt = std.math.minInt;
const arch = builtin.cpu.arch;
const is_test = builtin.is_test;
const common = @import("common.zig");
const udivmod = @import("udivmod.zig").udivmod;
const __divti3 = @import("divti3.zig").__divti3;
+3 -1
View File
@@ -11,7 +11,7 @@ comptime {
.visibility = common.visibility,
};
if (builtin.mode == .ReleaseSmall)
if (builtin.mode == .ReleaseSmall or builtin.zig_backend == .stage2_aarch64)
@export(&memcpySmall, export_options)
else
@export(&memcpyFast, export_options);
@@ -195,6 +195,8 @@ inline fn copyRange4(
}
test "memcpy" {
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
const S = struct {
fn testFunc(comptime copy_func: anytype) !void {
const max_len = 1024;
+9 -7
View File
@@ -14,7 +14,7 @@ comptime {
.visibility = common.visibility,
};
if (builtin.mode == .ReleaseSmall)
if (builtin.mode == .ReleaseSmall or builtin.zig_backend == .stage2_aarch64)
@export(&memmoveSmall, export_options)
else
@export(&memmoveFast, export_options);
@@ -39,7 +39,7 @@ fn memmoveSmall(opt_dest: ?[*]u8, opt_src: ?[*]const u8, len: usize) callconv(.c
}
fn memmoveFast(dest: ?[*]u8, src: ?[*]u8, len: usize) callconv(.c) ?[*]u8 {
@setRuntimeSafety(builtin.is_test);
@setRuntimeSafety(common.test_safety);
const small_limit = @max(2 * @sizeOf(Element), @sizeOf(Element));
if (copySmallLength(small_limit, dest.?, src.?, len)) return dest;
@@ -79,7 +79,7 @@ inline fn copyLessThan16(
src: [*]const u8,
len: usize,
) void {
@setRuntimeSafety(builtin.is_test);
@setRuntimeSafety(common.test_safety);
if (len < 4) {
if (len == 0) return;
const b = len / 2;
@@ -100,7 +100,7 @@ inline fn copy16ToSmallLimit(
src: [*]const u8,
len: usize,
) bool {
@setRuntimeSafety(builtin.is_test);
@setRuntimeSafety(common.test_safety);
inline for (2..(std.math.log2(small_limit) + 1) / 2 + 1) |p| {
const limit = 1 << (2 * p);
if (len < limit) {
@@ -119,7 +119,7 @@ inline fn copyRange4(
src: [*]const u8,
len: usize,
) void {
@setRuntimeSafety(builtin.is_test);
@setRuntimeSafety(common.test_safety);
comptime assert(std.math.isPowerOfTwo(copy_len));
assert(len >= copy_len);
assert(len < 4 * copy_len);
@@ -147,7 +147,7 @@ inline fn copyForwards(
src: [*]const u8,
len: usize,
) void {
@setRuntimeSafety(builtin.is_test);
@setRuntimeSafety(common.test_safety);
assert(len >= 2 * @sizeOf(Element));
const head = src[0..@sizeOf(Element)].*;
@@ -181,7 +181,7 @@ inline fn copyBlocks(
src: anytype,
max_bytes: usize,
) void {
@setRuntimeSafety(builtin.is_test);
@setRuntimeSafety(common.test_safety);
const T = @typeInfo(@TypeOf(dest)).pointer.child;
comptime assert(T == @typeInfo(@TypeOf(src)).pointer.child);
@@ -217,6 +217,8 @@ inline fn copyBackwards(
}
test memmoveFast {
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
const max_len = 1024;
var buffer: [max_len + @alignOf(Element) - 1]u8 = undefined;
for (&buffer, 0..) |*b, i| {
+2 -2
View File
@@ -6,7 +6,7 @@ const common = @import("./common.zig");
/// Ported from:
/// https://github.com/llvm/llvm-project/blob/2ffb1b0413efa9a24eb3c49e710e36f92e2cb50b/compiler-rt/lib/builtins/fp_mul_impl.inc
pub inline fn mulf3(comptime T: type, a: T, b: T) T {
@setRuntimeSafety(builtin.is_test);
@setRuntimeSafety(common.test_safety);
const typeWidth = @typeInfo(T).float.bits;
const significandBits = math.floatMantissaBits(T);
const fractionalBits = math.floatFractionalBits(T);
@@ -163,7 +163,7 @@ pub inline fn mulf3(comptime T: type, a: T, b: T) T {
///
/// This is analogous to an shr version of `@shlWithOverflow`
fn wideShrWithTruncation(comptime Z: type, hi: *Z, lo: *Z, count: u32) bool {
@setRuntimeSafety(builtin.is_test);
@setRuntimeSafety(common.test_safety);
const typeWidth = @typeInfo(Z).int.bits;
var inexact = false;
if (count < typeWidth) {
+1 -1
View File
@@ -251,7 +251,7 @@ const PIo2 = [_]f64{
/// compiler will convert from decimal to binary accurately enough
/// to produce the hexadecimal values shown.
///
pub fn rem_pio2_large(x: []f64, y: []f64, e0: i32, nx: i32, prec: usize) i32 {
pub fn rem_pio2_large(x: []const f64, y: []f64, e0: i32, nx: i32, prec: usize) i32 {
var jz: i32 = undefined;
var jx: i32 = undefined;
var jv: i32 = undefined;
-1
View File
@@ -4,7 +4,6 @@ const common = @import("common.zig");
const os_tag = builtin.os.tag;
const arch = builtin.cpu.arch;
const abi = builtin.abi;
const is_test = builtin.is_test;
pub const panic = common.panic;
+3
View File
@@ -1,4 +1,5 @@
const subo = @import("subo.zig");
const builtin = @import("builtin");
const std = @import("std");
const testing = std.testing;
const math = std.math;
@@ -27,6 +28,8 @@ pub fn simple_suboti4(a: i128, b: i128, overflow: *c_int) i128 {
}
test "suboti3" {
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
const min: i128 = math.minInt(i128);
const max: i128 = math.maxInt(i128);
var i: i128 = 1;
+5 -5
View File
@@ -1,8 +1,8 @@
const std = @import("std");
const builtin = @import("builtin");
const is_test = builtin.is_test;
const Log2Int = std.math.Log2Int;
const HalveInt = @import("common.zig").HalveInt;
const common = @import("common.zig");
const HalveInt = common.HalveInt;
const lo = switch (builtin.cpu.arch.endian()) {
.big => 1,
@@ -14,7 +14,7 @@ const hi = 1 - lo;
// Returns U / v_ and sets r = U % v_.
fn divwide_generic(comptime T: type, _u1: T, _u0: T, v_: T, r: *T) T {
const HalfT = HalveInt(T, false).HalfT;
@setRuntimeSafety(is_test);
@setRuntimeSafety(common.test_safety);
var v = v_;
const b = @as(T, 1) << (@bitSizeOf(T) / 2);
@@ -70,7 +70,7 @@ fn divwide_generic(comptime T: type, _u1: T, _u0: T, v_: T, r: *T) T {
}
fn divwide(comptime T: type, _u1: T, _u0: T, v: T, r: *T) T {
@setRuntimeSafety(is_test);
@setRuntimeSafety(common.test_safety);
if (T == u64 and builtin.target.cpu.arch == .x86_64 and builtin.target.os.tag != .windows) {
var rem: T = undefined;
const quo = asm (
@@ -90,7 +90,7 @@ fn divwide(comptime T: type, _u1: T, _u0: T, v: T, r: *T) T {
// Returns a_ / b_ and sets maybe_rem = a_ % b.
pub fn udivmod(comptime T: type, a_: T, b_: T, maybe_rem: ?*T) T {
@setRuntimeSafety(is_test);
@setRuntimeSafety(common.test_safety);
const HalfT = HalveInt(T, false).HalfT;
const SignedT = std.meta.Int(.signed, @bitSizeOf(T));
+3 -2
View File
@@ -113,7 +113,7 @@ pub fn divmod(q: ?[]u32, r: ?[]u32, u: []const u32, v: []const u32) !void {
}
pub fn __udivei4(q_p: [*]u8, u_p: [*]const u8, v_p: [*]const u8, bits: usize) callconv(.c) void {
@setRuntimeSafety(builtin.is_test);
@setRuntimeSafety(common.test_safety);
const byte_size = std.zig.target.intByteSize(&builtin.target, @intCast(bits));
const q: []u32 = @ptrCast(@alignCast(q_p[0..byte_size]));
const u: []const u32 = @ptrCast(@alignCast(u_p[0..byte_size]));
@@ -122,7 +122,7 @@ pub fn __udivei4(q_p: [*]u8, u_p: [*]const u8, v_p: [*]const u8, bits: usize) ca
}
pub fn __umodei4(r_p: [*]u8, u_p: [*]const u8, v_p: [*]const u8, bits: usize) callconv(.c) void {
@setRuntimeSafety(builtin.is_test);
@setRuntimeSafety(common.test_safety);
const byte_size = std.zig.target.intByteSize(&builtin.target, @intCast(bits));
const r: []u32 = @ptrCast(@alignCast(r_p[0..byte_size]));
const u: []const u32 = @ptrCast(@alignCast(u_p[0..byte_size]));
@@ -131,6 +131,7 @@ pub fn __umodei4(r_p: [*]u8, u_p: [*]const u8, v_p: [*]const u8, bits: usize) ca
}
test "__udivei4/__umodei4" {
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_c) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;
+3 -3
View File
@@ -772,10 +772,10 @@ export fn decl_type_html(decl_index: Decl.Index) String {
const Oom = error{OutOfMemory};
fn unpackInner(tar_bytes: []u8) !void {
var fbs = std.io.fixedBufferStream(tar_bytes);
var reader: std.Io.Reader = .fixed(tar_bytes);
var file_name_buffer: [1024]u8 = undefined;
var link_name_buffer: [1024]u8 = undefined;
var it = std.tar.iterator(fbs.reader(), .{
var it: std.tar.Iterator = .init(&reader, .{
.file_name_buffer = &file_name_buffer,
.link_name_buffer = &link_name_buffer,
});
@@ -796,7 +796,7 @@ fn unpackInner(tar_bytes: []u8) !void {
{
gop.value_ptr.* = file;
}
const file_bytes = tar_bytes[fbs.pos..][0..@intCast(tar_file.size)];
const file_bytes = tar_bytes[reader.seek..][0..@intCast(tar_file.size)];
assert(file == try Walk.add_file(file_name, file_bytes));
}
} else {
-1
View File
@@ -1,4 +1,3 @@
//! Use `zig init --strip` next time to generate a project without comments.
const std = @import("std");
// Although this function looks imperative, it does not perform the build
+170 -95
View File
@@ -408,104 +408,179 @@ fn createChildOnly(
return child;
}
fn userInputOptionsFromArgs(allocator: Allocator, args: anytype) UserInputOptionsMap {
var user_input_options = UserInputOptionsMap.init(allocator);
fn userInputOptionsFromArgs(arena: Allocator, args: anytype) UserInputOptionsMap {
var map = UserInputOptionsMap.init(arena);
inline for (@typeInfo(@TypeOf(args)).@"struct".fields) |field| {
const v = @field(args, field.name);
const T = @TypeOf(v);
switch (T) {
Target.Query => {
user_input_options.put(field.name, .{
.name = field.name,
.value = .{ .scalar = v.zigTriple(allocator) catch @panic("OOM") },
.used = false,
}) catch @panic("OOM");
user_input_options.put("cpu", .{
.name = "cpu",
.value = .{ .scalar = v.serializeCpuAlloc(allocator) catch @panic("OOM") },
.used = false,
}) catch @panic("OOM");
},
ResolvedTarget => {
user_input_options.put(field.name, .{
.name = field.name,
.value = .{ .scalar = v.query.zigTriple(allocator) catch @panic("OOM") },
.used = false,
}) catch @panic("OOM");
user_input_options.put("cpu", .{
.name = "cpu",
.value = .{ .scalar = v.query.serializeCpuAlloc(allocator) catch @panic("OOM") },
.used = false,
}) catch @panic("OOM");
},
LazyPath => {
user_input_options.put(field.name, .{
.name = field.name,
.value = .{ .lazy_path = v.dupeInner(allocator) },
.used = false,
}) catch @panic("OOM");
},
[]const LazyPath => {
var list = ArrayList(LazyPath).initCapacity(allocator, v.len) catch @panic("OOM");
for (v) |lp| list.appendAssumeCapacity(lp.dupeInner(allocator));
user_input_options.put(field.name, .{
.name = field.name,
.value = .{ .lazy_path_list = list },
.used = false,
}) catch @panic("OOM");
},
[]const u8 => {
user_input_options.put(field.name, .{
.name = field.name,
.value = .{ .scalar = v },
.used = false,
}) catch @panic("OOM");
},
[]const []const u8 => {
var list = ArrayList([]const u8).initCapacity(allocator, v.len) catch @panic("OOM");
list.appendSliceAssumeCapacity(v);
user_input_options.put(field.name, .{
.name = field.name,
.value = .{ .list = list },
.used = false,
}) catch @panic("OOM");
},
else => switch (@typeInfo(T)) {
.bool => {
user_input_options.put(field.name, .{
.name = field.name,
.value = .{ .scalar = if (v) "true" else "false" },
.used = false,
}) catch @panic("OOM");
},
.@"enum", .enum_literal => {
user_input_options.put(field.name, .{
.name = field.name,
.value = .{ .scalar = @tagName(v) },
.used = false,
}) catch @panic("OOM");
},
.comptime_int, .int => {
user_input_options.put(field.name, .{
.name = field.name,
.value = .{ .scalar = std.fmt.allocPrint(allocator, "{d}", .{v}) catch @panic("OOM") },
.used = false,
}) catch @panic("OOM");
},
.comptime_float, .float => {
user_input_options.put(field.name, .{
.name = field.name,
.value = .{ .scalar = std.fmt.allocPrint(allocator, "{e}", .{v}) catch @panic("OOM") },
.used = false,
}) catch @panic("OOM");
},
else => @compileError("option '" ++ field.name ++ "' has unsupported type: " ++ @typeName(T)),
},
}
if (field.type == @Type(.null)) continue;
addUserInputOptionFromArg(arena, &map, field, field.type, @field(args, field.name));
}
return map;
}
return user_input_options;
fn addUserInputOptionFromArg(
arena: Allocator,
map: *UserInputOptionsMap,
field: std.builtin.Type.StructField,
comptime T: type,
/// If null, the value won't be added, but `T` will still be type-checked.
maybe_value: ?T,
) void {
switch (T) {
Target.Query => return if (maybe_value) |v| {
map.put(field.name, .{
.name = field.name,
.value = .{ .scalar = v.zigTriple(arena) catch @panic("OOM") },
.used = false,
}) catch @panic("OOM");
map.put("cpu", .{
.name = "cpu",
.value = .{ .scalar = v.serializeCpuAlloc(arena) catch @panic("OOM") },
.used = false,
}) catch @panic("OOM");
},
ResolvedTarget => return if (maybe_value) |v| {
map.put(field.name, .{
.name = field.name,
.value = .{ .scalar = v.query.zigTriple(arena) catch @panic("OOM") },
.used = false,
}) catch @panic("OOM");
map.put("cpu", .{
.name = "cpu",
.value = .{ .scalar = v.query.serializeCpuAlloc(arena) catch @panic("OOM") },
.used = false,
}) catch @panic("OOM");
},
std.zig.BuildId => return if (maybe_value) |v| {
map.put(field.name, .{
.name = field.name,
.value = .{ .scalar = std.fmt.allocPrint(arena, "{f}", .{v}) catch @panic("OOM") },
.used = false,
}) catch @panic("OOM");
},
LazyPath => return if (maybe_value) |v| {
map.put(field.name, .{
.name = field.name,
.value = .{ .lazy_path = v.dupeInner(arena) },
.used = false,
}) catch @panic("OOM");
},
[]const LazyPath => return if (maybe_value) |v| {
var list = ArrayList(LazyPath).initCapacity(arena, v.len) catch @panic("OOM");
for (v) |lp| list.appendAssumeCapacity(lp.dupeInner(arena));
map.put(field.name, .{
.name = field.name,
.value = .{ .lazy_path_list = list },
.used = false,
}) catch @panic("OOM");
},
[]const u8 => return if (maybe_value) |v| {
map.put(field.name, .{
.name = field.name,
.value = .{ .scalar = arena.dupe(u8, v) catch @panic("OOM") },
.used = false,
}) catch @panic("OOM");
},
[]const []const u8 => return if (maybe_value) |v| {
var list = ArrayList([]const u8).initCapacity(arena, v.len) catch @panic("OOM");
for (v) |s| list.appendAssumeCapacity(arena.dupe(u8, s) catch @panic("OOM"));
map.put(field.name, .{
.name = field.name,
.value = .{ .list = list },
.used = false,
}) catch @panic("OOM");
},
else => switch (@typeInfo(T)) {
.bool => return if (maybe_value) |v| {
map.put(field.name, .{
.name = field.name,
.value = .{ .scalar = if (v) "true" else "false" },
.used = false,
}) catch @panic("OOM");
},
.@"enum", .enum_literal => return if (maybe_value) |v| {
map.put(field.name, .{
.name = field.name,
.value = .{ .scalar = @tagName(v) },
.used = false,
}) catch @panic("OOM");
},
.comptime_int, .int => return if (maybe_value) |v| {
map.put(field.name, .{
.name = field.name,
.value = .{ .scalar = std.fmt.allocPrint(arena, "{d}", .{v}) catch @panic("OOM") },
.used = false,
}) catch @panic("OOM");
},
.comptime_float, .float => return if (maybe_value) |v| {
map.put(field.name, .{
.name = field.name,
.value = .{ .scalar = std.fmt.allocPrint(arena, "{x}", .{v}) catch @panic("OOM") },
.used = false,
}) catch @panic("OOM");
},
.pointer => |ptr_info| switch (ptr_info.size) {
.one => switch (@typeInfo(ptr_info.child)) {
.array => |array_info| {
comptime var slice_info = ptr_info;
slice_info.size = .slice;
slice_info.is_const = true;
slice_info.child = array_info.child;
slice_info.sentinel_ptr = null;
addUserInputOptionFromArg(
arena,
map,
field,
@Type(.{ .pointer = slice_info }),
maybe_value orelse null,
);
return;
},
else => {},
},
.slice => switch (@typeInfo(ptr_info.child)) {
.@"enum" => return if (maybe_value) |v| {
var list = ArrayList([]const u8).initCapacity(arena, v.len) catch @panic("OOM");
for (v) |tag| list.appendAssumeCapacity(@tagName(tag));
map.put(field.name, .{
.name = field.name,
.value = .{ .list = list },
.used = false,
}) catch @panic("OOM");
},
else => {
comptime var slice_info = ptr_info;
slice_info.is_const = true;
slice_info.sentinel_ptr = null;
addUserInputOptionFromArg(
arena,
map,
field,
@Type(.{ .pointer = slice_info }),
maybe_value orelse null,
);
return;
},
},
else => {},
},
.null => unreachable,
.optional => |info| switch (@typeInfo(info.child)) {
.optional => {},
else => {
addUserInputOptionFromArg(
arena,
map,
field,
info.child,
maybe_value orelse null,
);
return;
},
},
else => {},
},
}
@compileError("option '" ++ field.name ++ "' has unsupported type: " ++ @typeName(field.type));
}
const OrderedUserValue = union(enum) {
+17 -22
View File
@@ -273,21 +273,17 @@ fn buildWasmBinary(
try sendMessage(child.stdin.?, .update);
try sendMessage(child.stdin.?, .exit);
const Header = std.zig.Server.Message.Header;
var result: ?Path = null;
var result_error_bundle = std.zig.ErrorBundle.empty;
const stdout = poller.fifo(.stdout);
const stdout = poller.reader(.stdout);
poll: while (true) {
while (stdout.readableLength() < @sizeOf(Header)) {
if (!(try poller.poll())) break :poll;
}
const header = stdout.reader().readStruct(Header) catch unreachable;
while (stdout.readableLength() < header.bytes_len) {
if (!(try poller.poll())) break :poll;
}
const body = stdout.readableSliceOfLen(header.bytes_len);
const Header = std.zig.Server.Message.Header;
while (stdout.buffered().len < @sizeOf(Header)) if (!try poller.poll()) break :poll;
const header = stdout.takeStruct(Header, .little) catch unreachable;
while (stdout.buffered().len < header.bytes_len) if (!try poller.poll()) break :poll;
const body = stdout.take(header.bytes_len) catch unreachable;
switch (header.tag) {
.zig_version => {
@@ -325,15 +321,11 @@ fn buildWasmBinary(
},
else => {}, // ignore other messages
}
stdout.discard(body.len);
}
const stderr = poller.fifo(.stderr);
if (stderr.readableLength() > 0) {
const owned_stderr = try stderr.toOwnedSlice();
defer gpa.free(owned_stderr);
std.debug.print("{s}", .{owned_stderr});
const stderr_contents = try poller.toOwnedSlice(.stderr);
if (stderr_contents.len > 0) {
std.debug.print("{s}", .{stderr_contents});
}
// Send EOF to stdin.
@@ -522,7 +514,9 @@ fn serveSourcesTar(ws: *WebServer, request: *std.http.Server.Request) !void {
var cwd_cache: ?[]const u8 = null;
var archiver = std.tar.writer(response.writer());
var adapter = response.writer().adaptToNewApi();
var archiver: std.tar.Writer = .{ .underlying_writer = &adapter.new_interface };
var read_buffer: [1024]u8 = undefined;
for (deduped_paths) |joined_path| {
var file = joined_path.root_dir.handle.openFile(joined_path.sub_path, .{}) catch |err| {
@@ -530,13 +524,14 @@ fn serveSourcesTar(ws: *WebServer, request: *std.http.Server.Request) !void {
continue;
};
defer file.close();
const stat = try file.stat();
var file_reader: std.fs.File.Reader = .initSize(file, &read_buffer, stat.size);
archiver.prefix = joined_path.root_dir.path orelse try memoizedCwd(arena, &cwd_cache);
try archiver.writeFile(joined_path.sub_path, file);
try archiver.writeFile(joined_path.sub_path, &file_reader, stat.mtime);
}
// intentionally omitting the pointless trailer
//try archiver.finish();
// intentionally not calling `archiver.finishPedantically`
try adapter.new_interface.flush();
try response.end();
}
+25 -34
View File
@@ -286,7 +286,7 @@ pub fn cast(step: *Step, comptime T: type) ?*T {
}
/// For debugging purposes, prints identifying information about this Step.
pub fn dump(step: *Step, w: *std.io.Writer, tty_config: std.io.tty.Config) void {
pub fn dump(step: *Step, w: *std.Io.Writer, tty_config: std.Io.tty.Config) void {
const debug_info = std.debug.getSelfDebugInfo() catch |err| {
w.print("Unable to dump stack trace: Unable to open debug info: {s}\n", .{
@errorName(err),
@@ -359,7 +359,7 @@ pub fn addError(step: *Step, comptime fmt: []const u8, args: anytype) error{OutO
pub const ZigProcess = struct {
child: std.process.Child,
poller: std.io.Poller(StreamEnum),
poller: std.Io.Poller(StreamEnum),
progress_ipc_fd: if (std.Progress.have_ipc) ?std.posix.fd_t else void,
pub const StreamEnum = enum { stdout, stderr };
@@ -428,7 +428,7 @@ pub fn evalZigProcess(
const zp = try gpa.create(ZigProcess);
zp.* = .{
.child = child,
.poller = std.io.poll(gpa, ZigProcess.StreamEnum, .{
.poller = std.Io.poll(gpa, ZigProcess.StreamEnum, .{
.stdout = child.stdout.?,
.stderr = child.stderr.?,
}),
@@ -508,20 +508,16 @@ fn zigProcessUpdate(s: *Step, zp: *ZigProcess, watch: bool) !?Path {
try sendMessage(zp.child.stdin.?, .update);
if (!watch) try sendMessage(zp.child.stdin.?, .exit);
const Header = std.zig.Server.Message.Header;
var result: ?Path = null;
const stdout = zp.poller.fifo(.stdout);
const stdout = zp.poller.reader(.stdout);
poll: while (true) {
while (stdout.readableLength() < @sizeOf(Header)) {
if (!(try zp.poller.poll())) break :poll;
}
const header = stdout.reader().readStruct(Header) catch unreachable;
while (stdout.readableLength() < header.bytes_len) {
if (!(try zp.poller.poll())) break :poll;
}
const body = stdout.readableSliceOfLen(header.bytes_len);
const Header = std.zig.Server.Message.Header;
while (stdout.buffered().len < @sizeOf(Header)) if (!try zp.poller.poll()) break :poll;
const header = stdout.takeStruct(Header, .little) catch unreachable;
while (stdout.buffered().len < header.bytes_len) if (!try zp.poller.poll()) break :poll;
const body = stdout.take(header.bytes_len) catch unreachable;
switch (header.tag) {
.zig_version => {
@@ -547,11 +543,8 @@ fn zigProcessUpdate(s: *Step, zp: *ZigProcess, watch: bool) !?Path {
.string_bytes = try arena.dupe(u8, string_bytes),
.extra = extra_array,
};
if (watch) {
// This message indicates the end of the update.
stdout.discard(body.len);
break;
}
// This message indicates the end of the update.
if (watch) break :poll;
},
.emit_digest => {
const EmitDigest = std.zig.Server.Message.EmitDigest;
@@ -611,15 +604,13 @@ fn zigProcessUpdate(s: *Step, zp: *ZigProcess, watch: bool) !?Path {
},
else => {}, // ignore other messages
}
stdout.discard(body.len);
}
s.result_duration_ns = timer.read();
const stderr = zp.poller.fifo(.stderr);
if (stderr.readableLength() > 0) {
try s.result_error_msgs.append(arena, try stderr.toOwnedSlice());
const stderr_contents = try zp.poller.toOwnedSlice(.stderr);
if (stderr_contents.len > 0) {
try s.result_error_msgs.append(arena, try arena.dupe(u8, stderr_contents));
}
return result;
@@ -736,7 +727,7 @@ pub fn allocPrintCmd2(
argv: []const []const u8,
) Allocator.Error![]u8 {
const shell = struct {
fn escape(writer: anytype, string: []const u8, is_argv0: bool) !void {
fn escape(writer: *std.Io.Writer, string: []const u8, is_argv0: bool) !void {
for (string) |c| {
if (switch (c) {
else => true,
@@ -770,9 +761,9 @@ pub fn allocPrintCmd2(
}
};
var buf: std.ArrayListUnmanaged(u8) = .empty;
const writer = buf.writer(arena);
if (opt_cwd) |cwd| try writer.print("cd {s} && ", .{cwd});
var aw: std.Io.Writer.Allocating = .init(arena);
const writer = &aw.writer;
if (opt_cwd) |cwd| writer.print("cd {s} && ", .{cwd}) catch return error.OutOfMemory;
if (opt_env) |env| {
const process_env_map = std.process.getEnvMap(arena) catch std.process.EnvMap.init(arena);
var it = env.iterator();
@@ -782,17 +773,17 @@ pub fn allocPrintCmd2(
if (process_env_map.get(key)) |process_value| {
if (std.mem.eql(u8, value, process_value)) continue;
}
try writer.print("{s}=", .{key});
try shell.escape(writer, value, false);
try writer.writeByte(' ');
writer.print("{s}=", .{key}) catch return error.OutOfMemory;
shell.escape(writer, value, false) catch return error.OutOfMemory;
writer.writeByte(' ') catch return error.OutOfMemory;
}
}
try shell.escape(writer, argv[0], true);
shell.escape(writer, argv[0], true) catch return error.OutOfMemory;
for (argv[1..]) |arg| {
try writer.writeByte(' ');
try shell.escape(writer, arg, false);
writer.writeByte(' ') catch return error.OutOfMemory;
shell.escape(writer, arg, false) catch return error.OutOfMemory;
}
return buf.toOwnedSlice(arena);
return aw.toOwnedSlice();
}
/// Prefer `cacheHitAndWatch` unless you already added watch inputs
+42 -4
View File
@@ -681,10 +681,14 @@ pub fn producesImplib(compile: *Compile) bool {
return compile.isDll();
}
/// Deprecated; use `compile.root_module.link_libc = true` instead.
/// To be removed after 0.15.0 is tagged.
pub fn linkLibC(compile: *Compile) void {
compile.root_module.link_libc = true;
}
/// Deprecated; use `compile.root_module.link_libcpp = true` instead.
/// To be removed after 0.15.0 is tagged.
pub fn linkLibCpp(compile: *Compile) void {
compile.root_module.link_libcpp = true;
}
@@ -802,10 +806,14 @@ fn runPkgConfig(compile: *Compile, lib_name: []const u8) !PkgConfigResult {
};
}
/// Deprecated; use `compile.root_module.linkSystemLibrary(name, .{})` instead.
/// To be removed after 0.15.0 is tagged.
pub fn linkSystemLibrary(compile: *Compile, name: []const u8) void {
return compile.root_module.linkSystemLibrary(name, .{});
}
/// Deprecated; use `compile.root_module.linkSystemLibrary(name, options)` instead.
/// To be removed after 0.15.0 is tagged.
pub fn linkSystemLibrary2(
compile: *Compile,
name: []const u8,
@@ -814,22 +822,26 @@ pub fn linkSystemLibrary2(
return compile.root_module.linkSystemLibrary(name, options);
}
/// Deprecated; use `c.root_module.linkFramework(name, .{})` instead.
/// To be removed after 0.15.0 is tagged.
pub fn linkFramework(c: *Compile, name: []const u8) void {
c.root_module.linkFramework(name, .{});
}
/// Handy when you have many C/C++ source files and want them all to have the same flags.
/// Deprecated; use `compile.root_module.addCSourceFiles(options)` instead.
/// To be removed after 0.15.0 is tagged.
pub fn addCSourceFiles(compile: *Compile, options: Module.AddCSourceFilesOptions) void {
compile.root_module.addCSourceFiles(options);
}
/// Deprecated; use `compile.root_module.addCSourceFile(source)` instead.
/// To be removed after 0.15.0 is tagged.
pub fn addCSourceFile(compile: *Compile, source: Module.CSourceFile) void {
compile.root_module.addCSourceFile(source);
}
/// Resource files must have the extension `.rc`.
/// Can be called regardless of target. The .rc file will be ignored
/// if the target object format does not support embedded resources.
/// Deprecated; use `compile.root_module.addWin32ResourceFile(source)` instead.
/// To be removed after 0.15.0 is tagged.
pub fn addWin32ResourceFile(compile: *Compile, source: Module.RcSourceFile) void {
compile.root_module.addWin32ResourceFile(source);
}
@@ -915,54 +927,80 @@ pub fn getEmittedLlvmBc(compile: *Compile) LazyPath {
return compile.getEmittedFileGeneric(&compile.generated_llvm_bc);
}
/// Deprecated; use `compile.root_module.addAssemblyFile(source)` instead.
/// To be removed after 0.15.0 is tagged.
pub fn addAssemblyFile(compile: *Compile, source: LazyPath) void {
compile.root_module.addAssemblyFile(source);
}
/// Deprecated; use `compile.root_module.addObjectFile(source)` instead.
/// To be removed after 0.15.0 is tagged.
pub fn addObjectFile(compile: *Compile, source: LazyPath) void {
compile.root_module.addObjectFile(source);
}
/// Deprecated; use `compile.root_module.addObject(object)` instead.
/// To be removed after 0.15.0 is tagged.
pub fn addObject(compile: *Compile, object: *Compile) void {
compile.root_module.addObject(object);
}
/// Deprecated; use `compile.root_module.linkLibrary(library)` instead.
/// To be removed after 0.15.0 is tagged.
pub fn linkLibrary(compile: *Compile, library: *Compile) void {
compile.root_module.linkLibrary(library);
}
/// Deprecated; use `compile.root_module.addAfterIncludePath(lazy_path)` instead.
/// To be removed after 0.15.0 is tagged.
pub fn addAfterIncludePath(compile: *Compile, lazy_path: LazyPath) void {
compile.root_module.addAfterIncludePath(lazy_path);
}
/// Deprecated; use `compile.root_module.addSystemIncludePath(lazy_path)` instead.
/// To be removed after 0.15.0 is tagged.
pub fn addSystemIncludePath(compile: *Compile, lazy_path: LazyPath) void {
compile.root_module.addSystemIncludePath(lazy_path);
}
/// Deprecated; use `compile.root_module.addIncludePath(lazy_path)` instead.
/// To be removed after 0.15.0 is tagged.
pub fn addIncludePath(compile: *Compile, lazy_path: LazyPath) void {
compile.root_module.addIncludePath(lazy_path);
}
/// Deprecated; use `compile.root_module.addConfigHeader(config_header)` instead.
/// To be removed after 0.15.0 is tagged.
pub fn addConfigHeader(compile: *Compile, config_header: *Step.ConfigHeader) void {
compile.root_module.addConfigHeader(config_header);
}
/// Deprecated; use `compile.root_module.addEmbedPath(lazy_path)` instead.
/// To be removed after 0.15.0 is tagged.
pub fn addEmbedPath(compile: *Compile, lazy_path: LazyPath) void {
compile.root_module.addEmbedPath(lazy_path);
}
/// Deprecated; use `compile.root_module.addLibraryPath(directory_path)` instead.
/// To be removed after 0.15.0 is tagged.
pub fn addLibraryPath(compile: *Compile, directory_path: LazyPath) void {
compile.root_module.addLibraryPath(directory_path);
}
/// Deprecated; use `compile.root_module.addRPath(directory_path)` instead.
/// To be removed after 0.15.0 is tagged.
pub fn addRPath(compile: *Compile, directory_path: LazyPath) void {
compile.root_module.addRPath(directory_path);
}
/// Deprecated; use `compile.root_module.addSystemFrameworkPath(directory_path)` instead.
/// To be removed after 0.15.0 is tagged.
pub fn addSystemFrameworkPath(compile: *Compile, directory_path: LazyPath) void {
compile.root_module.addSystemFrameworkPath(directory_path);
}
/// Deprecated; use `compile.root_module.addFrameworkPath(directory_path)` instead.
/// To be removed after 0.15.0 is tagged.
pub fn addFrameworkPath(compile: *Compile, directory_path: LazyPath) void {
compile.root_module.addFrameworkPath(directory_path);
}
+58 -39
View File
@@ -73,9 +73,12 @@ skip_foreign_checks: bool,
/// external executor (such as qemu) but not fail if the executor is unavailable.
failing_to_execute_foreign_is_an_error: bool,
/// Deprecated in favor of `stdio_limit`.
max_stdio_size: usize,
/// If stderr or stdout exceeds this amount, the child process is killed and
/// the step fails.
max_stdio_size: usize,
stdio_limit: std.Io.Limit,
captured_stdout: ?*Output,
captured_stderr: ?*Output,
@@ -169,7 +172,7 @@ pub const Output = struct {
pub fn create(owner: *std.Build, name: []const u8) *Run {
const run = owner.allocator.create(Run) catch @panic("OOM");
run.* = .{
.step = Step.init(.{
.step = .init(.{
.id = base_id,
.name = name,
.owner = owner,
@@ -186,6 +189,7 @@ pub fn create(owner: *std.Build, name: []const u8) *Run {
.skip_foreign_checks = false,
.failing_to_execute_foreign_is_an_error = true,
.max_stdio_size = 10 * 1024 * 1024,
.stdio_limit = .unlimited,
.captured_stdout = null,
.captured_stderr = null,
.dep_output_file = null,
@@ -1011,7 +1015,7 @@ fn populateGeneratedPaths(
}
}
fn formatTerm(term: ?std.process.Child.Term, w: *std.io.Writer) std.io.Writer.Error!void {
fn formatTerm(term: ?std.process.Child.Term, w: *std.Io.Writer) std.Io.Writer.Error!void {
if (term) |t| switch (t) {
.Exited => |code| try w.print("exited with code {d}", .{code}),
.Signal => |sig| try w.print("terminated with signal {d}", .{sig}),
@@ -1500,7 +1504,7 @@ fn evalZigTest(
const gpa = run.step.owner.allocator;
const arena = run.step.owner.allocator;
var poller = std.io.poll(gpa, enum { stdout, stderr }, .{
var poller = std.Io.poll(gpa, enum { stdout, stderr }, .{
.stdout = child.stdout.?,
.stderr = child.stderr.?,
});
@@ -1524,11 +1528,6 @@ fn evalZigTest(
break :failed false;
};
const Header = std.zig.Server.Message.Header;
const stdout = poller.fifo(.stdout);
const stderr = poller.fifo(.stderr);
var fail_count: u32 = 0;
var skip_count: u32 = 0;
var leak_count: u32 = 0;
@@ -1541,16 +1540,14 @@ fn evalZigTest(
var sub_prog_node: ?std.Progress.Node = null;
defer if (sub_prog_node) |n| n.end();
const stdout = poller.reader(.stdout);
const stderr = poller.reader(.stderr);
const any_write_failed = first_write_failed or poll: while (true) {
while (stdout.readableLength() < @sizeOf(Header)) {
if (!(try poller.poll())) break :poll false;
}
const header = stdout.reader().readStruct(Header) catch unreachable;
while (stdout.readableLength() < header.bytes_len) {
if (!(try poller.poll())) break :poll false;
}
const body = stdout.readableSliceOfLen(header.bytes_len);
const Header = std.zig.Server.Message.Header;
while (stdout.buffered().len < @sizeOf(Header)) if (!try poller.poll()) break :poll false;
const header = stdout.takeStruct(Header, .little) catch unreachable;
while (stdout.buffered().len < header.bytes_len) if (!try poller.poll()) break :poll false;
const body = stdout.take(header.bytes_len) catch unreachable;
switch (header.tag) {
.zig_version => {
if (!std.mem.eql(u8, builtin.zig_version_string, body)) {
@@ -1607,9 +1604,9 @@ fn evalZigTest(
if (tr_hdr.flags.fail or tr_hdr.flags.leak or tr_hdr.flags.log_err_count > 0) {
const name = std.mem.sliceTo(md.string_bytes[md.names[tr_hdr.index]..], 0);
const orig_msg = stderr.readableSlice(0);
defer stderr.discard(orig_msg.len);
const msg = std.mem.trim(u8, orig_msg, "\n");
const stderr_contents = stderr.buffered();
stderr.toss(stderr_contents.len);
const msg = std.mem.trim(u8, stderr_contents, "\n");
const label = if (tr_hdr.flags.fail)
"failed"
else if (tr_hdr.flags.leak)
@@ -1660,8 +1657,6 @@ fn evalZigTest(
},
else => {}, // ignore other messages
}
stdout.discard(body.len);
};
if (any_write_failed) {
@@ -1670,9 +1665,9 @@ fn evalZigTest(
while (try poller.poll()) {}
}
if (stderr.readableLength() > 0) {
const msg = std.mem.trim(u8, try stderr.toOwnedSlice(), "\n");
if (msg.len > 0) run.step.result_stderr = msg;
const stderr_contents = std.mem.trim(u8, stderr.buffered(), "\n");
if (stderr_contents.len > 0) {
run.step.result_stderr = try arena.dupe(u8, stderr_contents);
}
// Send EOF to stdin.
@@ -1769,13 +1764,22 @@ fn evalGeneric(run: *Run, child: *std.process.Child) !StdIoResult {
child.stdin = null;
},
.lazy_path => |lazy_path| {
const path = lazy_path.getPath2(b, &run.step);
const file = b.build_root.handle.openFile(path, .{}) catch |err| {
const path = lazy_path.getPath3(b, &run.step);
const file = path.root_dir.handle.openFile(path.subPathOrDot(), .{}) catch |err| {
return run.step.fail("unable to open stdin file: {s}", .{@errorName(err)});
};
defer file.close();
child.stdin.?.writeFileAll(file, .{}) catch |err| {
return run.step.fail("unable to write file to stdin: {s}", .{@errorName(err)});
// TODO https://github.com/ziglang/zig/issues/23955
var buffer: [1024]u8 = undefined;
var file_reader = file.reader(&buffer);
var stdin_writer = child.stdin.?.writer(&.{});
_ = stdin_writer.interface.sendFileAll(&file_reader, .unlimited) catch |err| switch (err) {
error.ReadFailed => return run.step.fail("failed to read from {f}: {t}", .{
path, file_reader.err.?,
}),
error.WriteFailed => return run.step.fail("failed to write to stdin: {t}", .{
stdin_writer.err.?,
}),
};
child.stdin.?.close();
child.stdin = null;
@@ -1786,28 +1790,43 @@ fn evalGeneric(run: *Run, child: *std.process.Child) !StdIoResult {
var stdout_bytes: ?[]const u8 = null;
var stderr_bytes: ?[]const u8 = null;
run.stdio_limit = run.stdio_limit.min(.limited(run.max_stdio_size));
if (child.stdout) |stdout| {
if (child.stderr) |stderr| {
var poller = std.io.poll(arena, enum { stdout, stderr }, .{
var poller = std.Io.poll(arena, enum { stdout, stderr }, .{
.stdout = stdout,
.stderr = stderr,
});
defer poller.deinit();
while (try poller.poll()) {
if (poller.fifo(.stdout).count > run.max_stdio_size)
return error.StdoutStreamTooLong;
if (poller.fifo(.stderr).count > run.max_stdio_size)
return error.StderrStreamTooLong;
if (run.stdio_limit.toInt()) |limit| {
if (poller.reader(.stderr).buffered().len > limit)
return error.StdoutStreamTooLong;
if (poller.reader(.stderr).buffered().len > limit)
return error.StderrStreamTooLong;
}
}
stdout_bytes = try poller.fifo(.stdout).toOwnedSlice();
stderr_bytes = try poller.fifo(.stderr).toOwnedSlice();
stdout_bytes = try poller.toOwnedSlice(.stdout);
stderr_bytes = try poller.toOwnedSlice(.stderr);
} else {
stdout_bytes = try stdout.deprecatedReader().readAllAlloc(arena, run.max_stdio_size);
var small_buffer: [1]u8 = undefined;
var stdout_reader = stdout.readerStreaming(&small_buffer);
stdout_bytes = stdout_reader.interface.allocRemaining(arena, run.stdio_limit) catch |err| switch (err) {
error.OutOfMemory => return error.OutOfMemory,
error.ReadFailed => return stdout_reader.err.?,
error.StreamTooLong => return error.StdoutStreamTooLong,
};
}
} else if (child.stderr) |stderr| {
stderr_bytes = try stderr.deprecatedReader().readAllAlloc(arena, run.max_stdio_size);
var small_buffer: [1]u8 = undefined;
var stderr_reader = stderr.readerStreaming(&small_buffer);
stderr_bytes = stderr_reader.interface.allocRemaining(arena, run.stdio_limit) catch |err| switch (err) {
error.OutOfMemory => return error.OutOfMemory,
error.ReadFailed => return stderr_reader.err.?,
error.StreamTooLong => return error.StderrStreamTooLong,
};
}
if (stderr_bytes) |bytes| if (bytes.len > 0) {
+266 -217
View File
@@ -1,16 +1,11 @@
const std = @import("std.zig");
const builtin = @import("builtin");
const root = @import("root");
const c = std.c;
const is_windows = builtin.os.tag == .windows;
const std = @import("std.zig");
const windows = std.os.windows;
const posix = std.posix;
const math = std.math;
const assert = std.debug.assert;
const fs = std.fs;
const mem = std.mem;
const meta = std.meta;
const File = std.fs.File;
const Allocator = std.mem.Allocator;
const Alignment = std.mem.Alignment;
@@ -314,11 +309,11 @@ pub fn GenericReader(
}
/// Helper for bridging to the new `Reader` API while upgrading.
pub fn adaptToNewApi(self: *const Self) Adapter {
pub fn adaptToNewApi(self: *const Self, buffer: []u8) Adapter {
return .{
.derp_reader = self.*,
.new_interface = .{
.buffer = &.{},
.buffer = buffer,
.vtable = &.{ .stream = Adapter.stream },
.seek = 0,
.end = 0,
@@ -334,10 +329,12 @@ pub fn GenericReader(
fn stream(r: *Reader, w: *Writer, limit: Limit) Reader.StreamError!usize {
const a: *@This() = @alignCast(@fieldParentPtr("new_interface", r));
const buf = limit.slice(try w.writableSliceGreedy(1));
return a.derp_reader.read(buf) catch |err| {
const n = a.derp_reader.read(buf) catch |err| {
a.err = err;
return error.ReadFailed;
};
w.advance(n);
return n;
}
};
};
@@ -419,9 +416,14 @@ pub fn GenericWriter(
new_interface: Writer,
err: ?Error = null,
fn drain(w: *Writer, data: []const []const u8, splat: usize) Writer.Error!usize {
fn drain(w: *std.io.Writer, data: []const []const u8, splat: usize) std.io.Writer.Error!usize {
_ = splat;
const a: *@This() = @alignCast(@fieldParentPtr("new_interface", w));
const buffered = w.buffered();
if (buffered.len != 0) return w.consume(a.derp_writer.write(buffered) catch |err| {
a.err = err;
return error.WriteFailed;
});
return a.derp_writer.write(data[0]) catch |err| {
a.err = err;
return error.WriteFailed;
@@ -435,54 +437,46 @@ pub fn GenericWriter(
pub const AnyReader = @import("Io/DeprecatedReader.zig");
/// Deprecated in favor of `Writer`.
pub const AnyWriter = @import("Io/DeprecatedWriter.zig");
/// Deprecated in favor of `File.Reader` and `File.Writer`.
pub const SeekableStream = @import("Io/seekable_stream.zig").SeekableStream;
/// Deprecated in favor of `Writer`.
pub const BufferedWriter = @import("Io/buffered_writer.zig").BufferedWriter;
/// Deprecated in favor of `Writer`.
pub const bufferedWriter = @import("Io/buffered_writer.zig").bufferedWriter;
/// Deprecated in favor of `Reader`.
pub const BufferedReader = @import("Io/buffered_reader.zig").BufferedReader;
/// Deprecated in favor of `Reader`.
pub const bufferedReader = @import("Io/buffered_reader.zig").bufferedReader;
/// Deprecated in favor of `Reader`.
pub const bufferedReaderSize = @import("Io/buffered_reader.zig").bufferedReaderSize;
/// Deprecated in favor of `Reader`.
pub const FixedBufferStream = @import("Io/fixed_buffer_stream.zig").FixedBufferStream;
/// Deprecated in favor of `Reader`.
pub const fixedBufferStream = @import("Io/fixed_buffer_stream.zig").fixedBufferStream;
pub const CWriter = @import("Io/c_writer.zig").CWriter;
pub const cWriter = @import("Io/c_writer.zig").cWriter;
/// Deprecated in favor of `Reader.Limited`.
pub const LimitedReader = @import("Io/limited_reader.zig").LimitedReader;
/// Deprecated in favor of `Reader.Limited`.
pub const limitedReader = @import("Io/limited_reader.zig").limitedReader;
/// Deprecated with no replacement; inefficient pattern
pub const CountingWriter = @import("Io/counting_writer.zig").CountingWriter;
/// Deprecated with no replacement; inefficient pattern
pub const countingWriter = @import("Io/counting_writer.zig").countingWriter;
/// Deprecated with no replacement; inefficient pattern
pub const CountingReader = @import("Io/counting_reader.zig").CountingReader;
/// Deprecated with no replacement; inefficient pattern
pub const countingReader = @import("Io/counting_reader.zig").countingReader;
pub const MultiWriter = @import("Io/multi_writer.zig").MultiWriter;
pub const multiWriter = @import("Io/multi_writer.zig").multiWriter;
pub const BitReader = @import("Io/bit_reader.zig").BitReader;
pub const bitReader = @import("Io/bit_reader.zig").bitReader;
pub const BitWriter = @import("Io/bit_writer.zig").BitWriter;
pub const bitWriter = @import("Io/bit_writer.zig").bitWriter;
pub const ChangeDetectionStream = @import("Io/change_detection_stream.zig").ChangeDetectionStream;
pub const changeDetectionStream = @import("Io/change_detection_stream.zig").changeDetectionStream;
pub const FindByteWriter = @import("Io/find_byte_writer.zig").FindByteWriter;
pub const findByteWriter = @import("Io/find_byte_writer.zig").findByteWriter;
pub const BufferedAtomicFile = @import("Io/buffered_atomic_file.zig").BufferedAtomicFile;
pub const StreamSource = @import("Io/stream_source.zig").StreamSource;
pub const tty = @import("Io/tty.zig");
/// A Writer that doesn't write to anything.
/// Deprecated in favor of `Writer.Discarding`.
pub const null_writer: NullWriter = .{ .context = {} };
/// Deprecated in favor of `Writer.Discarding`.
pub const NullWriter = GenericWriter(void, error{}, dummyWrite);
fn dummyWrite(context: void, data: []const u8) error{}!usize {
_ = context;
@@ -494,54 +488,51 @@ test null_writer {
}
pub fn poll(
allocator: Allocator,
gpa: Allocator,
comptime StreamEnum: type,
files: PollFiles(StreamEnum),
) Poller(StreamEnum) {
const enum_fields = @typeInfo(StreamEnum).@"enum".fields;
var result: Poller(StreamEnum) = undefined;
if (is_windows) result.windows = .{
.first_read_done = false,
.overlapped = [1]windows.OVERLAPPED{
mem.zeroes(windows.OVERLAPPED),
} ** enum_fields.len,
.small_bufs = undefined,
.active = .{
.count = 0,
.handles_buf = undefined,
.stream_map = undefined,
},
var result: Poller(StreamEnum) = .{
.gpa = gpa,
.readers = @splat(.failing),
.poll_fds = undefined,
.windows = if (is_windows) .{
.first_read_done = false,
.overlapped = [1]windows.OVERLAPPED{
std.mem.zeroes(windows.OVERLAPPED),
} ** enum_fields.len,
.small_bufs = undefined,
.active = .{
.count = 0,
.handles_buf = undefined,
.stream_map = undefined,
},
} else {},
};
inline for (0..enum_fields.len) |i| {
result.fifos[i] = .{
.allocator = allocator,
.buf = &.{},
.head = 0,
.count = 0,
};
inline for (enum_fields, 0..) |field, i| {
if (is_windows) {
result.windows.active.handles_buf[i] = @field(files, enum_fields[i].name).handle;
result.windows.active.handles_buf[i] = @field(files, field.name).handle;
} else {
result.poll_fds[i] = .{
.fd = @field(files, enum_fields[i].name).handle,
.fd = @field(files, field.name).handle,
.events = posix.POLL.IN,
.revents = undefined,
};
}
}
return result;
}
pub const PollFifo = std.fifo.LinearFifo(u8, .Dynamic);
pub fn Poller(comptime StreamEnum: type) type {
return struct {
const enum_fields = @typeInfo(StreamEnum).@"enum".fields;
const PollFd = if (is_windows) void else posix.pollfd;
fifos: [enum_fields.len]PollFifo,
gpa: Allocator,
readers: [enum_fields.len]Reader,
poll_fds: [enum_fields.len]PollFd,
windows: if (is_windows) struct {
first_read_done: bool,
@@ -553,7 +544,7 @@ pub fn Poller(comptime StreamEnum: type) type {
stream_map: [enum_fields.len]StreamEnum,
pub fn removeAt(self: *@This(), index: u32) void {
std.debug.assert(index < self.count);
assert(index < self.count);
for (index + 1..self.count) |i| {
self.handles_buf[i - 1] = self.handles_buf[i];
self.stream_map[i - 1] = self.stream_map[i];
@@ -566,13 +557,14 @@ pub fn Poller(comptime StreamEnum: type) type {
const Self = @This();
pub fn deinit(self: *Self) void {
const gpa = self.gpa;
if (is_windows) {
// cancel any pending IO to prevent clobbering OVERLAPPED value
for (self.windows.active.handles_buf[0..self.windows.active.count]) |h| {
_ = windows.kernel32.CancelIo(h);
}
}
inline for (&self.fifos) |*q| q.deinit();
inline for (&self.readers) |*r| gpa.free(r.buffer);
self.* = undefined;
}
@@ -592,21 +584,40 @@ pub fn Poller(comptime StreamEnum: type) type {
}
}
pub inline fn fifo(self: *Self, comptime which: StreamEnum) *PollFifo {
return &self.fifos[@intFromEnum(which)];
pub fn reader(self: *Self, which: StreamEnum) *Reader {
return &self.readers[@intFromEnum(which)];
}
pub fn toOwnedSlice(self: *Self, which: StreamEnum) error{OutOfMemory}![]u8 {
const gpa = self.gpa;
const r = reader(self, which);
if (r.seek == 0) {
const new = try gpa.realloc(r.buffer, r.end);
r.buffer = &.{};
r.end = 0;
return new;
}
const new = try gpa.dupe(u8, r.buffered());
gpa.free(r.buffer);
r.buffer = &.{};
r.seek = 0;
r.end = 0;
return new;
}
fn pollWindows(self: *Self, nanoseconds: ?u64) !bool {
const bump_amt = 512;
const gpa = self.gpa;
if (!self.windows.first_read_done) {
var already_read_data = false;
for (0..enum_fields.len) |i| {
const handle = self.windows.active.handles_buf[i];
switch (try windowsAsyncReadToFifoAndQueueSmallRead(
gpa,
handle,
&self.windows.overlapped[i],
&self.fifos[i],
&self.readers[i],
&self.windows.small_bufs[i],
bump_amt,
)) {
@@ -653,7 +664,7 @@ pub fn Poller(comptime StreamEnum: type) type {
const handle = self.windows.active.handles_buf[active_idx];
const overlapped = &self.windows.overlapped[stream_idx];
const stream_fifo = &self.fifos[stream_idx];
const stream_reader = &self.readers[stream_idx];
const small_buf = &self.windows.small_bufs[stream_idx];
const num_bytes_read = switch (try windowsGetReadResult(handle, overlapped, false)) {
@@ -664,12 +675,16 @@ pub fn Poller(comptime StreamEnum: type) type {
},
.aborted => unreachable,
};
try stream_fifo.write(small_buf[0..num_bytes_read]);
const buf = small_buf[0..num_bytes_read];
const dest = try writableSliceGreedyAlloc(stream_reader, gpa, buf.len);
@memcpy(dest[0..buf.len], buf);
advanceBufferEnd(stream_reader, buf.len);
switch (try windowsAsyncReadToFifoAndQueueSmallRead(
gpa,
handle,
overlapped,
stream_fifo,
stream_reader,
small_buf,
bump_amt,
)) {
@@ -684,6 +699,7 @@ pub fn Poller(comptime StreamEnum: type) type {
}
fn pollPosix(self: *Self, nanoseconds: ?u64) !bool {
const gpa = self.gpa;
// We ask for ensureUnusedCapacity with this much extra space. This
// has more of an effect on small reads because once the reads
// start to get larger the amount of space an ArrayList will
@@ -703,18 +719,18 @@ pub fn Poller(comptime StreamEnum: type) type {
}
var keep_polling = false;
inline for (&self.poll_fds, &self.fifos) |*poll_fd, *q| {
for (&self.poll_fds, &self.readers) |*poll_fd, *r| {
// Try reading whatever is available before checking the error
// conditions.
// It's still possible to read after a POLL.HUP is received,
// always check if there's some data waiting to be read first.
if (poll_fd.revents & posix.POLL.IN != 0) {
const buf = try q.writableWithSize(bump_amt);
const buf = try writableSliceGreedyAlloc(r, gpa, bump_amt);
const amt = posix.read(poll_fd.fd, buf) catch |err| switch (err) {
error.BrokenPipe => 0, // Handle the same as EOF.
else => |e| return e,
};
q.update(amt);
advanceBufferEnd(r, amt);
if (amt == 0) {
// Remove the fd when the EOF condition is met.
poll_fd.fd = -1;
@@ -730,146 +746,181 @@ pub fn Poller(comptime StreamEnum: type) type {
}
return keep_polling;
}
};
}
/// The `ReadFile` docuementation states that `lpNumberOfBytesRead` does not have a meaningful
/// result when using overlapped I/O, but also that it cannot be `null` on Windows 7. For
/// compatibility, we point it to this dummy variables, which we never otherwise access.
/// See: https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-readfile
var win_dummy_bytes_read: u32 = undefined;
/// Read as much data as possible from `handle` with `overlapped`, and write it to the FIFO. Before
/// returning, queue a read into `small_buf` so that `WaitForMultipleObjects` returns when more data
/// is available. `handle` must have no pending asynchronous operation.
fn windowsAsyncReadToFifoAndQueueSmallRead(
handle: windows.HANDLE,
overlapped: *windows.OVERLAPPED,
fifo: *PollFifo,
small_buf: *[128]u8,
bump_amt: usize,
) !enum { empty, populated, closed_populated, closed } {
var read_any_data = false;
while (true) {
const fifo_read_pending = while (true) {
const buf = try fifo.writableWithSize(bump_amt);
const buf_len = math.cast(u32, buf.len) orelse math.maxInt(u32);
if (0 == windows.kernel32.ReadFile(
handle,
buf.ptr,
buf_len,
&win_dummy_bytes_read,
overlapped,
)) switch (windows.GetLastError()) {
.IO_PENDING => break true,
.BROKEN_PIPE => return if (read_any_data) .closed_populated else .closed,
else => |err| return windows.unexpectedError(err),
};
const num_bytes_read = switch (try windowsGetReadResult(handle, overlapped, false)) {
.success => |n| n,
.closed => return if (read_any_data) .closed_populated else .closed,
.aborted => unreachable,
};
read_any_data = true;
fifo.update(num_bytes_read);
if (num_bytes_read == buf_len) {
// We filled the buffer, so there's probably more data available.
continue;
} else {
// We didn't fill the buffer, so assume we're out of data.
// There is no pending read.
break false;
/// Returns a slice into the unused capacity of `buffer` with at least
/// `min_len` bytes, extending `buffer` by resizing it with `gpa` as necessary.
///
/// After calling this function, typically the caller will follow up with a
/// call to `advanceBufferEnd` to report the actual number of bytes buffered.
fn writableSliceGreedyAlloc(r: *Reader, allocator: Allocator, min_len: usize) Allocator.Error![]u8 {
{
const unused = r.buffer[r.end..];
if (unused.len >= min_len) return unused;
}
};
if (fifo_read_pending) cancel_read: {
// Cancel the pending read into the FIFO.
_ = windows.kernel32.CancelIo(handle);
// We have to wait for the handle to be signalled, i.e. for the cancellation to complete.
switch (windows.kernel32.WaitForSingleObject(handle, windows.INFINITE)) {
windows.WAIT_OBJECT_0 => {},
windows.WAIT_FAILED => return windows.unexpectedError(windows.GetLastError()),
else => unreachable,
if (r.seek > 0) r.rebase(r.buffer.len) catch unreachable;
{
var list: std.ArrayListUnmanaged(u8) = .{
.items = r.buffer[0..r.end],
.capacity = r.buffer.len,
};
defer r.buffer = list.allocatedSlice();
try list.ensureUnusedCapacity(allocator, min_len);
}
// If it completed before we canceled, make sure to tell the FIFO!
const num_bytes_read = switch (try windowsGetReadResult(handle, overlapped, true)) {
.success => |n| n,
.closed => return if (read_any_data) .closed_populated else .closed,
.aborted => break :cancel_read,
};
read_any_data = true;
fifo.update(num_bytes_read);
const unused = r.buffer[r.end..];
assert(unused.len >= min_len);
return unused;
}
// Try to queue the 1-byte read.
if (0 == windows.kernel32.ReadFile(
handle,
small_buf,
small_buf.len,
&win_dummy_bytes_read,
overlapped,
)) switch (windows.GetLastError()) {
.IO_PENDING => {
// 1-byte read pending as intended
return if (read_any_data) .populated else .empty;
},
.BROKEN_PIPE => return if (read_any_data) .closed_populated else .closed,
else => |err| return windows.unexpectedError(err),
};
/// After writing directly into the unused capacity of `buffer`, this function
/// updates `end` so that users of `Reader` can receive the data.
fn advanceBufferEnd(r: *Reader, n: usize) void {
assert(n <= r.buffer.len - r.end);
r.end += n;
}
// We got data back this time. Write it to the FIFO and run the main loop again.
const num_bytes_read = switch (try windowsGetReadResult(handle, overlapped, false)) {
.success => |n| n,
.closed => return if (read_any_data) .closed_populated else .closed,
.aborted => unreachable,
};
try fifo.write(small_buf[0..num_bytes_read]);
read_any_data = true;
}
}
/// The `ReadFile` docuementation states that `lpNumberOfBytesRead` does not have a meaningful
/// result when using overlapped I/O, but also that it cannot be `null` on Windows 7. For
/// compatibility, we point it to this dummy variables, which we never otherwise access.
/// See: https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-readfile
var win_dummy_bytes_read: u32 = undefined;
/// Simple wrapper around `GetOverlappedResult` to determine the result of a `ReadFile` operation.
/// If `!allow_aborted`, then `aborted` is never returned (`OPERATION_ABORTED` is considered unexpected).
///
/// The `ReadFile` documentation states that the number of bytes read by an overlapped `ReadFile` must be determined using `GetOverlappedResult`, even if the
/// operation immediately returns data:
/// "Use NULL for [lpNumberOfBytesRead] if this is an asynchronous operation to avoid potentially
/// erroneous results."
/// "If `hFile` was opened with `FILE_FLAG_OVERLAPPED`, the following conditions are in effect: [...]
/// The lpNumberOfBytesRead parameter should be set to NULL. Use the GetOverlappedResult function to
/// get the actual number of bytes read."
/// See: https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-readfile
fn windowsGetReadResult(
handle: windows.HANDLE,
overlapped: *windows.OVERLAPPED,
allow_aborted: bool,
) !union(enum) {
success: u32,
closed,
aborted,
} {
var num_bytes_read: u32 = undefined;
if (0 == windows.kernel32.GetOverlappedResult(
handle,
overlapped,
&num_bytes_read,
0,
)) switch (windows.GetLastError()) {
.BROKEN_PIPE => return .closed,
.OPERATION_ABORTED => |err| if (allow_aborted) {
return .aborted;
} else {
return windows.unexpectedError(err);
},
else => |err| return windows.unexpectedError(err),
/// Read as much data as possible from `handle` with `overlapped`, and write it to the FIFO. Before
/// returning, queue a read into `small_buf` so that `WaitForMultipleObjects` returns when more data
/// is available. `handle` must have no pending asynchronous operation.
fn windowsAsyncReadToFifoAndQueueSmallRead(
gpa: Allocator,
handle: windows.HANDLE,
overlapped: *windows.OVERLAPPED,
r: *Reader,
small_buf: *[128]u8,
bump_amt: usize,
) !enum { empty, populated, closed_populated, closed } {
var read_any_data = false;
while (true) {
const fifo_read_pending = while (true) {
const buf = try writableSliceGreedyAlloc(r, gpa, bump_amt);
const buf_len = math.cast(u32, buf.len) orelse math.maxInt(u32);
if (0 == windows.kernel32.ReadFile(
handle,
buf.ptr,
buf_len,
&win_dummy_bytes_read,
overlapped,
)) switch (windows.GetLastError()) {
.IO_PENDING => break true,
.BROKEN_PIPE => return if (read_any_data) .closed_populated else .closed,
else => |err| return windows.unexpectedError(err),
};
const num_bytes_read = switch (try windowsGetReadResult(handle, overlapped, false)) {
.success => |n| n,
.closed => return if (read_any_data) .closed_populated else .closed,
.aborted => unreachable,
};
read_any_data = true;
advanceBufferEnd(r, num_bytes_read);
if (num_bytes_read == buf_len) {
// We filled the buffer, so there's probably more data available.
continue;
} else {
// We didn't fill the buffer, so assume we're out of data.
// There is no pending read.
break false;
}
};
if (fifo_read_pending) cancel_read: {
// Cancel the pending read into the FIFO.
_ = windows.kernel32.CancelIo(handle);
// We have to wait for the handle to be signalled, i.e. for the cancellation to complete.
switch (windows.kernel32.WaitForSingleObject(handle, windows.INFINITE)) {
windows.WAIT_OBJECT_0 => {},
windows.WAIT_FAILED => return windows.unexpectedError(windows.GetLastError()),
else => unreachable,
}
// If it completed before we canceled, make sure to tell the FIFO!
const num_bytes_read = switch (try windowsGetReadResult(handle, overlapped, true)) {
.success => |n| n,
.closed => return if (read_any_data) .closed_populated else .closed,
.aborted => break :cancel_read,
};
read_any_data = true;
advanceBufferEnd(r, num_bytes_read);
}
// Try to queue the 1-byte read.
if (0 == windows.kernel32.ReadFile(
handle,
small_buf,
small_buf.len,
&win_dummy_bytes_read,
overlapped,
)) switch (windows.GetLastError()) {
.IO_PENDING => {
// 1-byte read pending as intended
return if (read_any_data) .populated else .empty;
},
.BROKEN_PIPE => return if (read_any_data) .closed_populated else .closed,
else => |err| return windows.unexpectedError(err),
};
// We got data back this time. Write it to the FIFO and run the main loop again.
const num_bytes_read = switch (try windowsGetReadResult(handle, overlapped, false)) {
.success => |n| n,
.closed => return if (read_any_data) .closed_populated else .closed,
.aborted => unreachable,
};
const buf = small_buf[0..num_bytes_read];
const dest = try writableSliceGreedyAlloc(r, gpa, buf.len);
@memcpy(dest[0..buf.len], buf);
advanceBufferEnd(r, buf.len);
read_any_data = true;
}
}
/// Simple wrapper around `GetOverlappedResult` to determine the result of a `ReadFile` operation.
/// If `!allow_aborted`, then `aborted` is never returned (`OPERATION_ABORTED` is considered unexpected).
///
/// The `ReadFile` documentation states that the number of bytes read by an overlapped `ReadFile` must be determined using `GetOverlappedResult`, even if the
/// operation immediately returns data:
/// "Use NULL for [lpNumberOfBytesRead] if this is an asynchronous operation to avoid potentially
/// erroneous results."
/// "If `hFile` was opened with `FILE_FLAG_OVERLAPPED`, the following conditions are in effect: [...]
/// The lpNumberOfBytesRead parameter should be set to NULL. Use the GetOverlappedResult function to
/// get the actual number of bytes read."
/// See: https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-readfile
fn windowsGetReadResult(
handle: windows.HANDLE,
overlapped: *windows.OVERLAPPED,
allow_aborted: bool,
) !union(enum) {
success: u32,
closed,
aborted,
} {
var num_bytes_read: u32 = undefined;
if (0 == windows.kernel32.GetOverlappedResult(
handle,
overlapped,
&num_bytes_read,
0,
)) switch (windows.GetLastError()) {
.BROKEN_PIPE => return .closed,
.OPERATION_ABORTED => |err| if (allow_aborted) {
return .aborted;
} else {
return windows.unexpectedError(err);
},
else => |err| return windows.unexpectedError(err),
};
return .{ .success = num_bytes_read };
}
};
return .{ .success = num_bytes_read };
}
/// Given an enum, returns a struct with fields of that enum, each field
@@ -880,10 +931,10 @@ pub fn PollFiles(comptime StreamEnum: type) type {
for (&struct_fields, enum_fields) |*struct_field, enum_field| {
struct_field.* = .{
.name = enum_field.name,
.type = fs.File,
.type = std.fs.File,
.default_value_ptr = null,
.is_comptime = false,
.alignment = @alignOf(fs.File),
.alignment = @alignOf(std.fs.File),
};
}
return @Type(.{ .@"struct" = .{
@@ -898,16 +949,14 @@ test {
_ = Reader;
_ = Reader.Limited;
_ = Writer;
_ = @import("Io/bit_reader.zig");
_ = @import("Io/bit_writer.zig");
_ = @import("Io/buffered_atomic_file.zig");
_ = @import("Io/buffered_reader.zig");
_ = @import("Io/buffered_writer.zig");
_ = @import("Io/c_writer.zig");
_ = @import("Io/counting_writer.zig");
_ = @import("Io/counting_reader.zig");
_ = @import("Io/fixed_buffer_stream.zig");
_ = @import("Io/seekable_stream.zig");
_ = @import("Io/stream_source.zig");
_ = BitReader;
_ = BitWriter;
_ = BufferedReader;
_ = BufferedWriter;
_ = CountingWriter;
_ = CountingReader;
_ = FixedBufferStream;
_ = SeekableStream;
_ = tty;
_ = @import("Io/test.zig");
}
+5 -3
View File
@@ -373,11 +373,11 @@ pub fn discard(self: Self) anyerror!u64 {
}
/// Helper for bridging to the new `Reader` API while upgrading.
pub fn adaptToNewApi(self: *const Self) Adapter {
pub fn adaptToNewApi(self: *const Self, buffer: []u8) Adapter {
return .{
.derp_reader = self.*,
.new_interface = .{
.buffer = &.{},
.buffer = buffer,
.vtable = &.{ .stream = Adapter.stream },
.seek = 0,
.end = 0,
@@ -393,10 +393,12 @@ pub const Adapter = struct {
fn stream(r: *std.io.Reader, w: *std.io.Writer, limit: std.io.Limit) std.io.Reader.StreamError!usize {
const a: *@This() = @alignCast(@fieldParentPtr("new_interface", r));
const buf = limit.slice(try w.writableSliceGreedy(1));
return a.derp_reader.read(buf) catch |err| {
const n = a.derp_reader.read(buf) catch |err| {
a.err = err;
return error.ReadFailed;
};
w.advance(n);
return n;
}
};
+6 -1
View File
@@ -100,7 +100,12 @@ pub const Adapter = struct {
fn drain(w: *std.io.Writer, data: []const []const u8, splat: usize) std.io.Writer.Error!usize {
_ = splat;
const a: *@This() = @fieldParentPtr("new_interface", w);
const a: *@This() = @alignCast(@fieldParentPtr("new_interface", w));
const buffered = w.buffered();
if (buffered.len != 0) return w.consume(a.derp_writer.write(buffered) catch |err| {
a.err = err;
return error.WriteFailed;
});
return a.derp_writer.write(data[0]) catch |err| {
a.err = err;
return error.WriteFailed;
+70 -70
View File
@@ -67,6 +67,18 @@ pub const VTable = struct {
///
/// This function is only called when `buffer` is empty.
discard: *const fn (r: *Reader, limit: Limit) Error!usize = defaultDiscard,
/// Ensures `capacity` more data can be buffered without rebasing.
///
/// Asserts `capacity` is within buffer capacity, or that the stream ends
/// within `capacity` bytes.
///
/// Only called when `capacity` cannot fit into the unused capacity of
/// `buffer`.
///
/// The default implementation moves buffered data to the start of
/// `buffer`, setting `seek` to zero, and cannot fail.
rebase: *const fn (r: *Reader, capacity: usize) RebaseError!void = defaultRebase,
};
pub const StreamError = error{
@@ -97,6 +109,10 @@ pub const ShortError = error{
ReadFailed,
};
pub const RebaseError = error{
EndOfStream,
};
pub const failing: Reader = .{
.vtable = &.{
.stream = failingStream,
@@ -122,6 +138,7 @@ pub fn fixed(buffer: []const u8) Reader {
.vtable = &.{
.stream = endingStream,
.discard = endingDiscard,
.rebase = endingRebase,
},
// This cast is safe because all potential writes to it will instead
// return `error.EndOfStream`.
@@ -179,6 +196,38 @@ pub fn streamExact(r: *Reader, w: *Writer, n: usize) StreamError!void {
while (remaining != 0) remaining -= try r.stream(w, .limited(remaining));
}
/// "Pump" exactly `n` bytes from the reader to the writer.
pub fn streamExact64(r: *Reader, w: *Writer, n: u64) StreamError!void {
var remaining = n;
while (remaining != 0) remaining -= try r.stream(w, .limited64(remaining));
}
/// "Pump" exactly `n` bytes from the reader to the writer.
///
/// When draining `w`, ensures that at least `preserve_len` bytes remain
/// buffered.
///
/// Asserts `Writer.buffer` capacity exceeds `preserve_len`.
pub fn streamExactPreserve(r: *Reader, w: *Writer, preserve_len: usize, n: usize) StreamError!void {
if (w.end + n <= w.buffer.len) {
@branchHint(.likely);
return streamExact(r, w, n);
}
// If `n` is large, we can ignore `preserve_len` up to a point.
var remaining = n;
while (remaining > preserve_len) {
assert(remaining != 0);
remaining -= try r.stream(w, .limited(remaining - preserve_len));
if (w.end + remaining <= w.buffer.len) return streamExact(r, w, remaining);
}
// All the next bytes received must be preserved.
if (preserve_len < w.end) {
@memmove(w.buffer[0..preserve_len], w.buffer[w.end - preserve_len ..][0..preserve_len]);
w.end = preserve_len;
}
return streamExact(r, w, remaining);
}
/// "Pump" data from the reader to the writer, handling `error.EndOfStream` as
/// a success case.
///
@@ -234,7 +283,7 @@ pub fn allocRemaining(r: *Reader, gpa: Allocator, limit: Limit) LimitedAllocErro
/// such case, the next byte that would be read will be the first one to exceed
/// `limit`, and all preceeding bytes have been appended to `list`.
///
/// Asserts `buffer` has nonzero capacity.
/// If `limit` is not `Limit.unlimited`, asserts `buffer` has nonzero capacity.
///
/// See also:
/// * `allocRemaining`
@@ -245,7 +294,7 @@ pub fn appendRemaining(
list: *std.ArrayListAlignedUnmanaged(u8, alignment),
limit: Limit,
) LimitedAllocError!void {
assert(r.buffer.len != 0); // Needed to detect limit exceeded without losing data.
if (limit != .unlimited) assert(r.buffer.len != 0); // Needed to detect limit exceeded without losing data.
const buffer_contents = r.buffer[r.seek..r.end];
const copy_len = limit.minInt(buffer_contents.len);
try list.appendSlice(gpa, r.buffer[0..copy_len]);
@@ -748,11 +797,8 @@ pub fn peekDelimiterInclusive(r: *Reader, delimiter: u8) DelimiterError![]u8 {
@branchHint(.likely);
return buffer[seek .. end + 1];
}
if (r.vtable.stream == &endingStream) {
// Protect the `@constCast` of `fixed`.
return error.EndOfStream;
}
r.rebase();
// TODO take a parameter for max search length rather than relying on buffer capacity
try rebase(r, r.buffer.len);
while (r.buffer.len - r.end != 0) {
const end_cap = r.buffer[r.end..];
var writer: Writer = .fixed(end_cap);
@@ -1018,11 +1064,7 @@ fn fillUnbuffered(r: *Reader, n: usize) Error!void {
};
if (r.seek + n <= r.end) return;
};
if (r.vtable.stream == &endingStream) {
// Protect the `@constCast` of `fixed`.
return error.EndOfStream;
}
rebaseCapacity(r, n);
try rebase(r, n);
var writer: Writer = .{
.buffer = r.buffer,
.vtable = &.{ .drain = Writer.fixedDrain },
@@ -1042,7 +1084,7 @@ fn fillUnbuffered(r: *Reader, n: usize) Error!void {
///
/// Asserts buffer capacity is at least 1.
pub fn fillMore(r: *Reader) Error!void {
rebaseCapacity(r, 1);
try rebase(r, 1);
var writer: Writer = .{
.buffer = r.buffer,
.end = r.end,
@@ -1219,7 +1261,7 @@ pub fn takeLeb128(r: *Reader, comptime Result: type) TakeLeb128Error!Result {
pub fn expandTotalCapacity(r: *Reader, allocator: Allocator, n: usize) Allocator.Error!void {
if (n <= r.buffer.len) return;
if (r.seek > 0) rebase(r);
if (r.seek > 0) rebase(r, r.buffer.len);
var list: ArrayList(u8) = .{
.items = r.buffer[0..r.end],
.capacity = r.buffer.len,
@@ -1235,37 +1277,6 @@ pub fn fillAlloc(r: *Reader, allocator: Allocator, n: usize) FillAllocError!void
return fill(r, n);
}
/// Returns a slice into the unused capacity of `buffer` with at least
/// `min_len` bytes, extending `buffer` by resizing it with `gpa` as necessary.
///
/// After calling this function, typically the caller will follow up with a
/// call to `advanceBufferEnd` to report the actual number of bytes buffered.
pub fn writableSliceGreedyAlloc(r: *Reader, allocator: Allocator, min_len: usize) Allocator.Error![]u8 {
{
const unused = r.buffer[r.end..];
if (unused.len >= min_len) return unused;
}
if (r.seek > 0) rebase(r);
{
var list: ArrayList(u8) = .{
.items = r.buffer[0..r.end],
.capacity = r.buffer.len,
};
defer r.buffer = list.allocatedSlice();
try list.ensureUnusedCapacity(allocator, min_len);
}
const unused = r.buffer[r.end..];
assert(unused.len >= min_len);
return unused;
}
/// After writing directly into the unused capacity of `buffer`, this function
/// updates `end` so that users of `Reader` can receive the data.
pub fn advanceBufferEnd(r: *Reader, n: usize) void {
assert(n <= r.buffer.len - r.end);
r.end += n;
}
fn takeMultipleOf7Leb128(r: *Reader, comptime Result: type) TakeLeb128Error!Result {
const result_info = @typeInfo(Result).int;
comptime assert(result_info.bits % 7 == 0);
@@ -1296,37 +1307,20 @@ fn takeMultipleOf7Leb128(r: *Reader, comptime Result: type) TakeLeb128Error!Resu
}
}
/// Left-aligns data such that `r.seek` becomes zero.
///
/// If `r.seek` is not already zero then `buffer` is mutated, making it illegal
/// to call this function with a const-casted `buffer`, such as in the case of
/// `fixed`. This issue can be avoided:
/// * in implementations, by attempting a read before a rebase, in which
/// case the read will return `error.EndOfStream`, preventing the rebase.
/// * in usage, by copying into a mutable buffer before initializing `fixed`.
pub fn rebase(r: *Reader) void {
if (r.seek == 0) return;
/// Ensures `capacity` more data can be buffered without rebasing.
pub fn rebase(r: *Reader, capacity: usize) RebaseError!void {
if (r.end + capacity <= r.buffer.len) return;
return r.vtable.rebase(r, capacity);
}
pub fn defaultRebase(r: *Reader, capacity: usize) RebaseError!void {
if (r.end <= r.buffer.len - capacity) return;
const data = r.buffer[r.seek..r.end];
@memmove(r.buffer[0..data.len], data);
r.seek = 0;
r.end = data.len;
}
/// Ensures `capacity` more data can be buffered without rebasing, by rebasing
/// if necessary.
///
/// Asserts `capacity` is within the buffer capacity.
///
/// If the rebase occurs then `buffer` is mutated, making it illegal to call
/// this function with a const-casted `buffer`, such as in the case of `fixed`.
/// This issue can be avoided:
/// * in implementations, by attempting a read before a rebase, in which
/// case the read will return `error.EndOfStream`, preventing the rebase.
/// * in usage, by copying into a mutable buffer before initializing `fixed`.
pub fn rebaseCapacity(r: *Reader, capacity: usize) void {
if (r.end > r.buffer.len - capacity) rebase(r);
}
/// Advances the stream and decreases the size of the storage buffer by `n`,
/// returning the range of bytes no longer accessible by `r`.
///
@@ -1682,6 +1676,12 @@ fn endingDiscard(r: *Reader, limit: Limit) Error!usize {
return error.EndOfStream;
}
fn endingRebase(r: *Reader, capacity: usize) RebaseError!void {
_ = r;
_ = capacity;
return error.EndOfStream;
}
fn failingStream(r: *Reader, w: *Writer, limit: Limit) StreamError!usize {
_ = r;
_ = w;
+85 -28
View File
@@ -256,10 +256,10 @@ test "fixed buffer flush" {
try testing.expectEqual(10, buffer[0]);
}
/// Calls `VTable.drain` but hides the last `preserve_length` bytes from the
/// Calls `VTable.drain` but hides the last `preserve_len` bytes from the
/// implementation, keeping them buffered.
pub fn drainPreserve(w: *Writer, preserve_length: usize) Error!void {
const temp_end = w.end -| preserve_length;
pub fn drainPreserve(w: *Writer, preserve_len: usize) Error!void {
const temp_end = w.end -| preserve_len;
const preserved = w.buffer[temp_end..w.end];
w.end = temp_end;
defer w.end += preserved.len;
@@ -310,24 +310,38 @@ pub fn writableSliceGreedy(w: *Writer, minimum_length: usize) Error![]u8 {
}
/// Asserts the provided buffer has total capacity enough for `minimum_length`
/// and `preserve_length` combined.
/// and `preserve_len` combined.
///
/// Does not `advance` the buffer end position.
///
/// When draining the buffer, ensures that at least `preserve_length` bytes
/// When draining the buffer, ensures that at least `preserve_len` bytes
/// remain buffered.
///
/// If `preserve_length` is zero, this is equivalent to `writableSliceGreedy`.
pub fn writableSliceGreedyPreserve(w: *Writer, preserve_length: usize, minimum_length: usize) Error![]u8 {
assert(w.buffer.len >= preserve_length + minimum_length);
/// If `preserve_len` is zero, this is equivalent to `writableSliceGreedy`.
pub fn writableSliceGreedyPreserve(w: *Writer, preserve_len: usize, minimum_length: usize) Error![]u8 {
assert(w.buffer.len >= preserve_len + minimum_length);
while (w.buffer.len - w.end < minimum_length) {
try drainPreserve(w, preserve_length);
try drainPreserve(w, preserve_len);
} else {
@branchHint(.likely);
return w.buffer[w.end..];
}
}
/// Asserts the provided buffer has total capacity enough for `len`.
///
/// Advances the buffer end position by `len`.
///
/// When draining the buffer, ensures that at least `preserve_len` bytes
/// remain buffered.
///
/// If `preserve_len` is zero, this is equivalent to `writableSlice`.
pub fn writableSlicePreserve(w: *Writer, preserve_len: usize, len: usize) Error![]u8 {
const big_slice = try w.writableSliceGreedyPreserve(preserve_len, len);
advance(w, len);
return big_slice[0..len];
}
pub const WritableVectorIterator = struct {
first: []u8,
middle: []const []u8 = &.{},
@@ -523,16 +537,16 @@ pub fn write(w: *Writer, bytes: []const u8) Error!usize {
return w.vtable.drain(w, &.{bytes}, 1);
}
/// Asserts `buffer` capacity exceeds `preserve_length`.
pub fn writePreserve(w: *Writer, preserve_length: usize, bytes: []const u8) Error!usize {
assert(preserve_length <= w.buffer.len);
/// Asserts `buffer` capacity exceeds `preserve_len`.
pub fn writePreserve(w: *Writer, preserve_len: usize, bytes: []const u8) Error!usize {
assert(preserve_len <= w.buffer.len);
if (w.end + bytes.len <= w.buffer.len) {
@branchHint(.likely);
@memcpy(w.buffer[w.end..][0..bytes.len], bytes);
w.end += bytes.len;
return bytes.len;
}
const temp_end = w.end -| preserve_length;
const temp_end = w.end -| preserve_len;
const preserved = w.buffer[temp_end..w.end];
w.end = temp_end;
defer w.end += preserved.len;
@@ -552,13 +566,13 @@ pub fn writeAll(w: *Writer, bytes: []const u8) Error!void {
/// Calls `drain` as many times as necessary such that all of `bytes` are
/// transferred.
///
/// When draining the buffer, ensures that at least `preserve_length` bytes
/// When draining the buffer, ensures that at least `preserve_len` bytes
/// remain buffered.
///
/// Asserts `buffer` capacity exceeds `preserve_length`.
pub fn writeAllPreserve(w: *Writer, preserve_length: usize, bytes: []const u8) Error!void {
/// Asserts `buffer` capacity exceeds `preserve_len`.
pub fn writeAllPreserve(w: *Writer, preserve_len: usize, bytes: []const u8) Error!void {
var index: usize = 0;
while (index < bytes.len) index += try w.writePreserve(preserve_length, bytes[index..]);
while (index < bytes.len) index += try w.writePreserve(preserve_len, bytes[index..]);
}
/// Renders fmt string with args, calling `writer` with slices of bytes.
@@ -761,11 +775,11 @@ pub fn writeByte(w: *Writer, byte: u8) Error!void {
}
}
/// When draining the buffer, ensures that at least `preserve_length` bytes
/// When draining the buffer, ensures that at least `preserve_len` bytes
/// remain buffered.
pub fn writeBytePreserve(w: *Writer, preserve_length: usize, byte: u8) Error!void {
pub fn writeBytePreserve(w: *Writer, preserve_len: usize, byte: u8) Error!void {
while (w.buffer.len - w.end == 0) {
try drainPreserve(w, preserve_length);
try drainPreserve(w, preserve_len);
} else {
@branchHint(.likely);
w.buffer[w.end] = byte;
@@ -788,10 +802,42 @@ test splatByteAll {
try testing.expectEqualStrings("7" ** 45, aw.writer.buffered());
}
pub fn splatBytePreserve(w: *Writer, preserve_len: usize, byte: u8, n: usize) Error!void {
const new_end = w.end + n;
if (new_end <= w.buffer.len) {
@memset(w.buffer[w.end..][0..n], byte);
w.end = new_end;
return;
}
// If `n` is large, we can ignore `preserve_len` up to a point.
var remaining = n;
while (remaining > preserve_len) {
assert(remaining != 0);
remaining -= try splatByte(w, byte, remaining - preserve_len);
if (w.end + remaining <= w.buffer.len) {
@memset(w.buffer[w.end..][0..remaining], byte);
w.end += remaining;
return;
}
}
// All the next bytes received must be preserved.
if (preserve_len < w.end) {
@memmove(w.buffer[0..preserve_len], w.buffer[w.end - preserve_len ..][0..preserve_len]);
w.end = preserve_len;
}
while (remaining > 0) remaining -= try w.splatByte(byte, remaining);
}
/// Writes the same byte many times, allowing short writes.
///
/// Does maximum of one underlying `VTable.drain`.
pub fn splatByte(w: *Writer, byte: u8, n: usize) Error!usize {
if (w.end + n <= w.buffer.len) {
@branchHint(.likely);
@memset(w.buffer[w.end..][0..n], byte);
w.end += n;
return n;
}
return writeSplat(w, &.{&.{byte}}, n);
}
@@ -801,9 +847,10 @@ pub fn splatBytesAll(w: *Writer, bytes: []const u8, splat: usize) Error!void {
var remaining_bytes: usize = bytes.len * splat;
remaining_bytes -= try w.splatBytes(bytes, splat);
while (remaining_bytes > 0) {
const leftover = remaining_bytes % bytes.len;
const buffers: [2][]const u8 = .{ bytes[bytes.len - leftover ..], bytes };
remaining_bytes -= try w.writeSplat(&buffers, splat);
const leftover_splat = remaining_bytes / bytes.len;
const leftover_bytes = remaining_bytes % bytes.len;
const buffers: [2][]const u8 = .{ bytes[bytes.len - leftover_bytes ..], bytes };
remaining_bytes -= try w.writeSplat(&buffers, leftover_splat);
}
}
@@ -1564,17 +1611,23 @@ pub fn printFloatHexOptions(w: *Writer, value: anytype, options: std.fmt.Number)
}
pub fn printFloatHex(w: *Writer, value: anytype, case: std.fmt.Case, opt_precision: ?usize) Error!void {
if (std.math.signbit(value)) try w.writeByte('-');
if (std.math.isNan(value)) return w.writeAll(switch (case) {
const v = switch (@TypeOf(value)) {
// comptime_float internally is a f128; this preserves precision.
comptime_float => @as(f128, value),
else => value,
};
if (std.math.signbit(v)) try w.writeByte('-');
if (std.math.isNan(v)) return w.writeAll(switch (case) {
.lower => "nan",
.upper => "NAN",
});
if (std.math.isInf(value)) return w.writeAll(switch (case) {
if (std.math.isInf(v)) return w.writeAll(switch (case) {
.lower => "inf",
.upper => "INF",
});
const T = @TypeOf(value);
const T = @TypeOf(v);
const TU = std.meta.Int(.unsigned, @bitSizeOf(T));
const mantissa_bits = std.math.floatMantissaBits(T);
@@ -1584,7 +1637,7 @@ pub fn printFloatHex(w: *Writer, value: anytype, case: std.fmt.Case, opt_precisi
const exponent_mask = (1 << exponent_bits) - 1;
const exponent_bias = (1 << (exponent_bits - 1)) - 1;
const as_bits: TU = @bitCast(value);
const as_bits: TU = @bitCast(v);
var mantissa = as_bits & mantissa_mask;
var exponent: i32 = @as(u16, @truncate((as_bits >> mantissa_bits) & exponent_mask));
@@ -2239,6 +2292,10 @@ pub const Discarding = struct {
pub fn sendFile(w: *Writer, file_reader: *File.Reader, limit: Limit) FileError!usize {
if (File.Handle == void) return error.Unimplemented;
switch (builtin.zig_backend) {
else => {},
.stage2_aarch64 => return error.Unimplemented,
}
const d: *Discarding = @alignCast(@fieldParentPtr("writer", w));
d.count += w.end;
w.end = 0;
-55
View File
@@ -1,55 +0,0 @@
const std = @import("../std.zig");
const mem = std.mem;
const fs = std.fs;
const File = std.fs.File;
pub const BufferedAtomicFile = struct {
atomic_file: fs.AtomicFile,
file_writer: File.Writer,
buffered_writer: BufferedWriter,
allocator: mem.Allocator,
pub const buffer_size = 4096;
pub const BufferedWriter = std.io.BufferedWriter(buffer_size, File.Writer);
pub const Writer = std.io.GenericWriter(*BufferedWriter, BufferedWriter.Error, BufferedWriter.write);
/// TODO when https://github.com/ziglang/zig/issues/2761 is solved
/// this API will not need an allocator
pub fn create(
allocator: mem.Allocator,
dir: fs.Dir,
dest_path: []const u8,
atomic_file_options: fs.Dir.AtomicFileOptions,
) !*BufferedAtomicFile {
var self = try allocator.create(BufferedAtomicFile);
self.* = BufferedAtomicFile{
.atomic_file = undefined,
.file_writer = undefined,
.buffered_writer = undefined,
.allocator = allocator,
};
errdefer allocator.destroy(self);
self.atomic_file = try dir.atomicFile(dest_path, atomic_file_options);
errdefer self.atomic_file.deinit();
self.file_writer = self.atomic_file.file.deprecatedWriter();
self.buffered_writer = .{ .unbuffered_writer = self.file_writer };
return self;
}
/// always call destroy, even after successful finish()
pub fn destroy(self: *BufferedAtomicFile) void {
self.atomic_file.deinit();
self.allocator.destroy(self);
}
pub fn finish(self: *BufferedAtomicFile) !void {
try self.buffered_writer.flush();
try self.atomic_file.finish();
}
pub fn writer(self: *BufferedAtomicFile) Writer {
return .{ .context = &self.buffered_writer };
}
};
-44
View File
@@ -1,44 +0,0 @@
const std = @import("../std.zig");
const builtin = @import("builtin");
const io = std.io;
const testing = std.testing;
pub const CWriter = io.GenericWriter(*std.c.FILE, std.fs.File.WriteError, cWriterWrite);
pub fn cWriter(c_file: *std.c.FILE) CWriter {
return .{ .context = c_file };
}
fn cWriterWrite(c_file: *std.c.FILE, bytes: []const u8) std.fs.File.WriteError!usize {
const amt_written = std.c.fwrite(bytes.ptr, 1, bytes.len, c_file);
if (amt_written >= 0) return amt_written;
switch (@as(std.c.E, @enumFromInt(std.c._errno().*))) {
.SUCCESS => unreachable,
.INVAL => unreachable,
.FAULT => unreachable,
.AGAIN => unreachable, // this is a blocking API
.BADF => unreachable, // always a race condition
.DESTADDRREQ => unreachable, // connect was never called
.DQUOT => return error.DiskQuota,
.FBIG => return error.FileTooBig,
.IO => return error.InputOutput,
.NOSPC => return error.NoSpaceLeft,
.PERM => return error.PermissionDenied,
.PIPE => return error.BrokenPipe,
else => |err| return std.posix.unexpectedErrno(err),
}
}
test cWriter {
if (!builtin.link_libc or builtin.os.tag == .wasi) return error.SkipZigTest;
const filename = "tmp_io_test_file.txt";
const out_file = std.c.fopen(filename, "w") orelse return error.UnableToOpenTestFile;
defer {
_ = std.c.fclose(out_file);
std.fs.cwd().deleteFileZ(filename) catch {};
}
const writer = cWriter(out_file);
try writer.print("hi: {}\n", .{@as(i32, 123)});
}
-55
View File
@@ -1,55 +0,0 @@
const std = @import("../std.zig");
const io = std.io;
const mem = std.mem;
const assert = std.debug.assert;
/// Used to detect if the data written to a stream differs from a source buffer
pub fn ChangeDetectionStream(comptime WriterType: type) type {
return struct {
const Self = @This();
pub const Error = WriterType.Error;
pub const Writer = io.GenericWriter(*Self, Error, write);
anything_changed: bool,
underlying_writer: WriterType,
source_index: usize,
source: []const u8,
pub fn writer(self: *Self) Writer {
return .{ .context = self };
}
fn write(self: *Self, bytes: []const u8) Error!usize {
if (!self.anything_changed) {
const end = self.source_index + bytes.len;
if (end > self.source.len) {
self.anything_changed = true;
} else {
const src_slice = self.source[self.source_index..end];
self.source_index += bytes.len;
if (!mem.eql(u8, bytes, src_slice)) {
self.anything_changed = true;
}
}
}
return self.underlying_writer.write(bytes);
}
pub fn changeDetected(self: *Self) bool {
return self.anything_changed or (self.source_index != self.source.len);
}
};
}
pub fn changeDetectionStream(
source: []const u8,
underlying_writer: anytype,
) ChangeDetectionStream(@TypeOf(underlying_writer)) {
return ChangeDetectionStream(@TypeOf(underlying_writer)){
.anything_changed = false,
.underlying_writer = underlying_writer,
.source_index = 0,
.source = source,
};
}
-40
View File
@@ -1,40 +0,0 @@
const std = @import("../std.zig");
const io = std.io;
const assert = std.debug.assert;
/// A Writer that returns whether the given character has been written to it.
/// The contents are not written to anything.
pub fn FindByteWriter(comptime UnderlyingWriter: type) type {
return struct {
const Self = @This();
pub const Error = UnderlyingWriter.Error;
pub const Writer = io.GenericWriter(*Self, Error, write);
underlying_writer: UnderlyingWriter,
byte_found: bool,
byte: u8,
pub fn writer(self: *Self) Writer {
return .{ .context = self };
}
fn write(self: *Self, bytes: []const u8) Error!usize {
if (!self.byte_found) {
self.byte_found = blk: {
for (bytes) |b|
if (b == self.byte) break :blk true;
break :blk false;
};
}
return self.underlying_writer.write(bytes);
}
};
}
pub fn findByteWriter(byte: u8, underlying_writer: anytype) FindByteWriter(@TypeOf(underlying_writer)) {
return FindByteWriter(@TypeOf(underlying_writer)){
.underlying_writer = underlying_writer,
.byte = byte,
.byte_found = false,
};
}
-53
View File
@@ -1,53 +0,0 @@
const std = @import("../std.zig");
const io = std.io;
/// Takes a tuple of streams, and constructs a new stream that writes to all of them
pub fn MultiWriter(comptime Writers: type) type {
comptime var ErrSet = error{};
inline for (@typeInfo(Writers).@"struct".fields) |field| {
const StreamType = field.type;
ErrSet = ErrSet || StreamType.Error;
}
return struct {
const Self = @This();
streams: Writers,
pub const Error = ErrSet;
pub const Writer = io.GenericWriter(*Self, Error, write);
pub fn writer(self: *Self) Writer {
return .{ .context = self };
}
pub fn write(self: *Self, bytes: []const u8) Error!usize {
inline for (self.streams) |stream|
try stream.writeAll(bytes);
return bytes.len;
}
};
}
pub fn multiWriter(streams: anytype) MultiWriter(@TypeOf(streams)) {
return .{ .streams = streams };
}
const testing = std.testing;
test "MultiWriter" {
var tmp = testing.tmpDir(.{});
defer tmp.cleanup();
var f = try tmp.dir.createFile("t.txt", .{});
var buf1: [255]u8 = undefined;
var fbs1 = io.fixedBufferStream(&buf1);
var buf2: [255]u8 = undefined;
var stream = multiWriter(.{ fbs1.writer(), f.writer() });
try stream.writer().print("HI", .{});
f.close();
try testing.expectEqualSlices(u8, "HI", fbs1.getWritten());
try testing.expectEqualSlices(u8, "HI", try tmp.dir.readFile("t.txt", &buf2));
}
-127
View File
@@ -1,127 +0,0 @@
const std = @import("../std.zig");
const builtin = @import("builtin");
const io = std.io;
/// Provides `io.GenericReader`, `io.GenericWriter`, and `io.SeekableStream` for in-memory buffers as
/// well as files.
/// For memory sources, if the supplied byte buffer is const, then `io.GenericWriter` is not available.
/// The error set of the stream functions is the error set of the corresponding file functions.
pub const StreamSource = union(enum) {
// TODO: expose UEFI files to std.os in a way that allows this to be true
const has_file = (builtin.os.tag != .freestanding and builtin.os.tag != .uefi);
/// The stream access is redirected to this buffer.
buffer: io.FixedBufferStream([]u8),
/// The stream access is redirected to this buffer.
/// Writing to the source will always yield `error.AccessDenied`.
const_buffer: io.FixedBufferStream([]const u8),
/// The stream access is redirected to this file.
/// On freestanding, this must never be initialized!
file: if (has_file) std.fs.File else void,
pub const ReadError = io.FixedBufferStream([]u8).ReadError || (if (has_file) std.fs.File.ReadError else error{});
pub const WriteError = error{AccessDenied} || io.FixedBufferStream([]u8).WriteError || (if (has_file) std.fs.File.WriteError else error{});
pub const SeekError = io.FixedBufferStream([]u8).SeekError || (if (has_file) std.fs.File.SeekError else error{});
pub const GetSeekPosError = io.FixedBufferStream([]u8).GetSeekPosError || (if (has_file) std.fs.File.GetSeekPosError else error{});
pub const Reader = io.GenericReader(*StreamSource, ReadError, read);
pub const Writer = io.GenericWriter(*StreamSource, WriteError, write);
pub const SeekableStream = io.SeekableStream(
*StreamSource,
SeekError,
GetSeekPosError,
seekTo,
seekBy,
getPos,
getEndPos,
);
pub fn read(self: *StreamSource, dest: []u8) ReadError!usize {
switch (self.*) {
.buffer => |*x| return x.read(dest),
.const_buffer => |*x| return x.read(dest),
.file => |x| if (!has_file) unreachable else return x.read(dest),
}
}
pub fn write(self: *StreamSource, bytes: []const u8) WriteError!usize {
switch (self.*) {
.buffer => |*x| return x.write(bytes),
.const_buffer => return error.AccessDenied,
.file => |x| if (!has_file) unreachable else return x.write(bytes),
}
}
pub fn seekTo(self: *StreamSource, pos: u64) SeekError!void {
switch (self.*) {
.buffer => |*x| return x.seekTo(pos),
.const_buffer => |*x| return x.seekTo(pos),
.file => |x| if (!has_file) unreachable else return x.seekTo(pos),
}
}
pub fn seekBy(self: *StreamSource, amt: i64) SeekError!void {
switch (self.*) {
.buffer => |*x| return x.seekBy(amt),
.const_buffer => |*x| return x.seekBy(amt),
.file => |x| if (!has_file) unreachable else return x.seekBy(amt),
}
}
pub fn getEndPos(self: *StreamSource) GetSeekPosError!u64 {
switch (self.*) {
.buffer => |*x| return x.getEndPos(),
.const_buffer => |*x| return x.getEndPos(),
.file => |x| if (!has_file) unreachable else return x.getEndPos(),
}
}
pub fn getPos(self: *StreamSource) GetSeekPosError!u64 {
switch (self.*) {
.buffer => |*x| return x.getPos(),
.const_buffer => |*x| return x.getPos(),
.file => |x| if (!has_file) unreachable else return x.getPos(),
}
}
pub fn reader(self: *StreamSource) Reader {
return .{ .context = self };
}
pub fn writer(self: *StreamSource) Writer {
return .{ .context = self };
}
pub fn seekableStream(self: *StreamSource) SeekableStream {
return .{ .context = self };
}
};
test "refs" {
std.testing.refAllDecls(StreamSource);
}
test "mutable buffer" {
var buffer: [64]u8 = undefined;
var source = StreamSource{ .buffer = std.io.fixedBufferStream(&buffer) };
var writer = source.writer();
try writer.writeAll("Hello, World!");
try std.testing.expectEqualStrings("Hello, World!", source.buffer.getWritten());
}
test "const buffer" {
const buffer: [64]u8 = "Hello, World!".* ++ ([1]u8{0xAA} ** 51);
var source = StreamSource{ .const_buffer = std.io.fixedBufferStream(&buffer) };
var reader = source.reader();
var dst_buffer: [13]u8 = undefined;
try reader.readNoEof(&dst_buffer);
try std.testing.expectEqualStrings("Hello, World!", &dst_buffer);
}
+73 -2
View File
@@ -25,6 +25,7 @@ redraw_event: std.Thread.ResetEvent,
/// Accessed atomically.
done: bool,
need_clear: bool,
status: Status,
refresh_rate_ns: u64,
initial_delay_ns: u64,
@@ -47,6 +48,22 @@ node_freelist: Freelist,
/// value may at times temporarily exceed the node count.
node_end_index: u32,
pub const Status = enum {
/// Indicates the application is progressing towards completion of a task.
/// Unless the application is interactive, this is the only status the
/// program will ever have!
working,
/// The application has completed an operation, and is now waiting for user
/// input rather than calling exit(0).
success,
/// The application encountered an error, and is now waiting for user input
/// rather than calling exit(1).
failure,
/// The application encountered at least one error, but is still working on
/// more tasks.
failure_working,
};
const Freelist = packed struct(u32) {
head: Node.OptionalIndex,
/// Whenever `node_freelist` is added to, this generation is incremented
@@ -383,6 +400,7 @@ var global_progress: Progress = .{
.draw_buffer = undefined,
.done = false,
.need_clear = false,
.status = .working,
.node_parents = &node_parents_buffer,
.node_storage = &node_storage_buffer,
@@ -408,6 +426,9 @@ pub const have_ipc = switch (builtin.os.tag) {
const noop_impl = builtin.single_threaded or switch (builtin.os.tag) {
.wasi, .freestanding => true,
else => false,
} or switch (builtin.zig_backend) {
.stage2_aarch64 => true,
else => false,
};
/// Initializes a global Progress instance.
@@ -495,6 +516,11 @@ pub fn start(options: Options) Node {
return root_node;
}
pub fn setStatus(new_status: Status) void {
if (noop_impl) return;
@atomicStore(Status, &global_progress.status, new_status, .monotonic);
}
/// Returns whether a resize is needed to learn the terminal size.
fn wait(timeout_ns: u64) bool {
const resize_flag = if (global_progress.redraw_event.timedWait(timeout_ns)) |_|
@@ -675,6 +701,14 @@ const save = "\x1b7";
const restore = "\x1b8";
const finish_sync = "\x1b[?2026l";
const progress_remove = "\x1b]9;4;0\x07";
const @"progress_normal {d}" = "\x1b]9;4;1;{d}\x07";
const @"progress_error {d}" = "\x1b]9;4;2;{d}\x07";
const progress_pulsing = "\x1b]9;4;3\x07";
const progress_pulsing_error = "\x1b]9;4;2\x07";
const progress_normal_100 = "\x1b]9;4;1;100\x07";
const progress_error_100 = "\x1b]9;4;2;100\x07";
const TreeSymbol = enum {
/// ├─
tee,
@@ -754,10 +788,10 @@ fn appendTreeSymbol(symbol: TreeSymbol, buf: []u8, start_i: usize) usize {
}
fn clearWrittenWithEscapeCodes() anyerror!void {
if (!global_progress.need_clear) return;
if (noop_impl or !global_progress.need_clear) return;
global_progress.need_clear = false;
try write(clear);
try write(clear ++ progress_remove);
}
/// U+25BA or ►
@@ -1200,6 +1234,43 @@ fn computeRedraw(serialized_buffer: *Serialized.Buffer) struct { []u8, usize } {
i, const nl_n = computeNode(buf, i, 0, serialized, children, root_node_index);
if (global_progress.terminal_mode == .ansi_escape_codes) {
{
// Set progress state https://conemu.github.io/en/AnsiEscapeCodes.html#ConEmu_specific_OSC
const root_storage = &serialized.storage[0];
const storage = if (root_storage.name[0] != 0 or children[0].child == .none) root_storage else &serialized.storage[@intFromEnum(children[0].child)];
const estimated_total = storage.estimated_total_count;
const completed_items = storage.completed_count;
const status = @atomicLoad(Status, &global_progress.status, .monotonic);
switch (status) {
.working => {
if (estimated_total == 0) {
buf[i..][0..progress_pulsing.len].* = progress_pulsing.*;
i += progress_pulsing.len;
} else {
const percent = completed_items * 100 / estimated_total;
i += (std.fmt.bufPrint(buf[i..], @"progress_normal {d}", .{percent}) catch &.{}).len;
}
},
.success => {
buf[i..][0..progress_remove.len].* = progress_remove.*;
i += progress_remove.len;
},
.failure => {
buf[i..][0..progress_error_100.len].* = progress_error_100.*;
i += progress_error_100.len;
},
.failure_working => {
if (estimated_total == 0) {
buf[i..][0..progress_pulsing_error.len].* = progress_pulsing_error.*;
i += progress_pulsing_error.len;
} else {
const percent = completed_items * 100 / estimated_total;
i += (std.fmt.bufPrint(buf[i..], @"progress_error {d}", .{percent}) catch &.{}).len;
}
},
}
}
if (nl_n > 0) {
buf[i] = '\r';
i += 1;
+5 -2
View File
@@ -772,7 +772,7 @@ pub const Endian = enum {
/// This data structure is used by the Zig language code generation and
/// therefore must be kept in sync with the compiler implementation.
pub const Signedness = enum {
pub const Signedness = enum(u1) {
signed,
unsigned,
};
@@ -894,7 +894,10 @@ pub const VaList = switch (builtin.cpu.arch) {
.aarch64, .aarch64_be => switch (builtin.os.tag) {
.windows => *u8,
.ios, .macos, .tvos, .watchos, .visionos => *u8,
else => @compileError("disabled due to miscompilations"), // VaListAarch64,
else => switch (builtin.zig_backend) {
.stage2_aarch64 => VaListAarch64,
else => @compileError("disabled due to miscompilations"),
},
},
.arm, .armeb, .thumb, .thumbeb => switch (builtin.os.tag) {
.ios, .macos, .tvos, .watchos, .visionos => *u8,
+3 -3
View File
@@ -7147,7 +7147,7 @@ pub const dirent = switch (native_os) {
off: off_t,
reclen: c_ushort,
type: u8,
name: [256:0]u8,
name: [255:0]u8,
},
else => void,
};
@@ -10497,9 +10497,9 @@ pub const sysconf = switch (native_os) {
pub const sf_hdtr = switch (native_os) {
.freebsd, .macos, .ios, .tvos, .watchos, .visionos => extern struct {
headers: [*]const iovec_const,
headers: ?[*]const iovec_const,
hdr_cnt: c_int,
trailers: [*]const iovec_const,
trailers: ?[*]const iovec_const,
trl_cnt: c_int,
},
else => void,
+2 -58
View File
@@ -1,75 +1,19 @@
//! Compression algorithms.
const std = @import("std.zig");
pub const flate = @import("compress/flate.zig");
pub const gzip = @import("compress/gzip.zig");
pub const zlib = @import("compress/zlib.zig");
pub const lzma = @import("compress/lzma.zig");
pub const lzma2 = @import("compress/lzma2.zig");
pub const xz = @import("compress/xz.zig");
pub const zstd = @import("compress/zstandard.zig");
pub fn HashedReader(ReaderType: type, HasherType: type) type {
return struct {
child_reader: ReaderType,
hasher: HasherType,
pub const Error = ReaderType.Error;
pub const Reader = std.io.GenericReader(*@This(), Error, read);
pub fn read(self: *@This(), buf: []u8) Error!usize {
const amt = try self.child_reader.read(buf);
self.hasher.update(buf[0..amt]);
return amt;
}
pub fn reader(self: *@This()) Reader {
return .{ .context = self };
}
};
}
pub fn hashedReader(
reader: anytype,
hasher: anytype,
) HashedReader(@TypeOf(reader), @TypeOf(hasher)) {
return .{ .child_reader = reader, .hasher = hasher };
}
pub fn HashedWriter(WriterType: type, HasherType: type) type {
return struct {
child_writer: WriterType,
hasher: HasherType,
pub const Error = WriterType.Error;
pub const Writer = std.io.GenericWriter(*@This(), Error, write);
pub fn write(self: *@This(), buf: []const u8) Error!usize {
const amt = try self.child_writer.write(buf);
self.hasher.update(buf[0..amt]);
return amt;
}
pub fn writer(self: *@This()) Writer {
return .{ .context = self };
}
};
}
pub fn hashedWriter(
writer: anytype,
hasher: anytype,
) HashedWriter(@TypeOf(writer), @TypeOf(hasher)) {
return .{ .child_writer = writer, .hasher = hasher };
}
pub const zstd = @import("compress/zstd.zig");
test {
_ = flate;
_ = lzma;
_ = lzma2;
_ = xz;
_ = zstd;
_ = flate;
_ = gzip;
_ = zlib;
}
+35 -14
View File
@@ -12,17 +12,11 @@ pub const Check = enum(u4) {
};
fn readStreamFlags(reader: anytype, check: *Check) !void {
var bit_reader = std.io.bitReader(.little, reader);
const reserved1 = try bit_reader.readBitsNoEof(u8, 8);
if (reserved1 != 0)
return error.CorruptInput;
check.* = @as(Check, @enumFromInt(try bit_reader.readBitsNoEof(u4, 4)));
const reserved2 = try bit_reader.readBitsNoEof(u4, 4);
if (reserved2 != 0)
return error.CorruptInput;
const reserved1 = try reader.readByte();
if (reserved1 != 0) return error.CorruptInput;
const byte = try reader.readByte();
if ((byte >> 4) != 0) return error.CorruptInput;
check.* = @enumFromInt(@as(u4, @truncate(byte)));
}
pub fn decompress(allocator: Allocator, reader: anytype) !Decompress(@TypeOf(reader)) {
@@ -47,7 +41,7 @@ pub fn Decompress(comptime ReaderType: type) type {
var check: Check = undefined;
const hash_a = blk: {
var hasher = std.compress.hashedReader(source, Crc32.init());
var hasher = hashedReader(source, Crc32.init());
try readStreamFlags(hasher.reader(), &check);
break :blk hasher.hasher.final();
};
@@ -80,7 +74,7 @@ pub fn Decompress(comptime ReaderType: type) type {
return r;
const index_size = blk: {
var hasher = std.compress.hashedReader(self.in_reader, Crc32.init());
var hasher = hashedReader(self.in_reader, Crc32.init());
hasher.hasher.update(&[1]u8{0x00});
var counter = std.io.countingReader(hasher.reader());
@@ -115,7 +109,7 @@ pub fn Decompress(comptime ReaderType: type) type {
const hash_a = try self.in_reader.readInt(u32, .little);
const hash_b = blk: {
var hasher = std.compress.hashedReader(self.in_reader, Crc32.init());
var hasher = hashedReader(self.in_reader, Crc32.init());
const hashed_reader = hasher.reader();
const backward_size = (@as(u64, try hashed_reader.readInt(u32, .little)) + 1) * 4;
@@ -140,6 +134,33 @@ pub fn Decompress(comptime ReaderType: type) type {
};
}
pub fn HashedReader(ReaderType: type, HasherType: type) type {
return struct {
child_reader: ReaderType,
hasher: HasherType,
pub const Error = ReaderType.Error;
pub const Reader = std.io.GenericReader(*@This(), Error, read);
pub fn read(self: *@This(), buf: []u8) Error!usize {
const amt = try self.child_reader.read(buf);
self.hasher.update(buf[0..amt]);
return amt;
}
pub fn reader(self: *@This()) Reader {
return .{ .context = self };
}
};
}
pub fn hashedReader(
reader: anytype,
hasher: anytype,
) HashedReader(@TypeOf(reader), @TypeOf(hasher)) {
return .{ .child_reader = reader, .hasher = hasher };
}
test {
_ = @import("xz/test.zig");
}
+1 -1
View File
@@ -91,7 +91,7 @@ pub fn Decoder(comptime ReaderType: type) type {
// Block Header
{
var header_hasher = std.compress.hashedReader(block_reader, Crc32.init());
var header_hasher = xz.hashedReader(block_reader, Crc32.init());
const header_reader = header_hasher.reader();
const header_size = @as(u64, try header_reader.readByte()) * 4;
-310
View File
@@ -1,310 +0,0 @@
const std = @import("std");
const RingBuffer = std.RingBuffer;
const types = @import("zstandard/types.zig");
pub const frame = types.frame;
pub const compressed_block = types.compressed_block;
pub const decompress = @import("zstandard/decompress.zig");
pub const DecompressorOptions = struct {
verify_checksum: bool = true,
window_buffer: []u8,
/// Recommended amount by the standard. Lower than this may result
/// in inability to decompress common streams.
pub const default_window_buffer_len = 8 * 1024 * 1024;
};
pub fn Decompressor(comptime ReaderType: type) type {
return struct {
const Self = @This();
const table_size_max = types.compressed_block.table_size_max;
source: std.io.CountingReader(ReaderType),
state: enum { NewFrame, InFrame, LastBlock },
decode_state: decompress.block.DecodeState,
frame_context: decompress.FrameContext,
buffer: WindowBuffer,
literal_fse_buffer: [table_size_max.literal]types.compressed_block.Table.Fse,
match_fse_buffer: [table_size_max.match]types.compressed_block.Table.Fse,
offset_fse_buffer: [table_size_max.offset]types.compressed_block.Table.Fse,
literals_buffer: [types.block_size_max]u8,
sequence_buffer: [types.block_size_max]u8,
verify_checksum: bool,
checksum: ?u32,
current_frame_decompressed_size: usize,
const WindowBuffer = struct {
data: []u8 = undefined,
read_index: usize = 0,
write_index: usize = 0,
};
pub const Error = ReaderType.Error || error{
ChecksumFailure,
DictionaryIdFlagUnsupported,
MalformedBlock,
MalformedFrame,
OutOfMemory,
};
pub const Reader = std.io.GenericReader(*Self, Error, read);
pub fn init(source: ReaderType, options: DecompressorOptions) Self {
return .{
.source = std.io.countingReader(source),
.state = .NewFrame,
.decode_state = undefined,
.frame_context = undefined,
.buffer = .{ .data = options.window_buffer },
.literal_fse_buffer = undefined,
.match_fse_buffer = undefined,
.offset_fse_buffer = undefined,
.literals_buffer = undefined,
.sequence_buffer = undefined,
.verify_checksum = options.verify_checksum,
.checksum = undefined,
.current_frame_decompressed_size = undefined,
};
}
fn frameInit(self: *Self) !void {
const source_reader = self.source.reader();
switch (try decompress.decodeFrameHeader(source_reader)) {
.skippable => |header| {
try source_reader.skipBytes(header.frame_size, .{});
self.state = .NewFrame;
},
.zstandard => |header| {
const frame_context = try decompress.FrameContext.init(
header,
self.buffer.data.len,
self.verify_checksum,
);
const decode_state = decompress.block.DecodeState.init(
&self.literal_fse_buffer,
&self.match_fse_buffer,
&self.offset_fse_buffer,
);
self.decode_state = decode_state;
self.frame_context = frame_context;
self.checksum = null;
self.current_frame_decompressed_size = 0;
self.state = .InFrame;
},
}
}
pub fn reader(self: *Self) Reader {
return .{ .context = self };
}
pub fn read(self: *Self, buffer: []u8) Error!usize {
if (buffer.len == 0) return 0;
var size: usize = 0;
while (size == 0) {
while (self.state == .NewFrame) {
const initial_count = self.source.bytes_read;
self.frameInit() catch |err| switch (err) {
error.DictionaryIdFlagUnsupported => return error.DictionaryIdFlagUnsupported,
error.EndOfStream => return if (self.source.bytes_read == initial_count)
0
else
error.MalformedFrame,
else => return error.MalformedFrame,
};
}
size = try self.readInner(buffer);
}
return size;
}
fn readInner(self: *Self, buffer: []u8) Error!usize {
std.debug.assert(self.state != .NewFrame);
var ring_buffer = RingBuffer{
.data = self.buffer.data,
.read_index = self.buffer.read_index,
.write_index = self.buffer.write_index,
};
defer {
self.buffer.read_index = ring_buffer.read_index;
self.buffer.write_index = ring_buffer.write_index;
}
const source_reader = self.source.reader();
while (ring_buffer.isEmpty() and self.state != .LastBlock) {
const header_bytes = source_reader.readBytesNoEof(3) catch
return error.MalformedFrame;
const block_header = decompress.block.decodeBlockHeader(&header_bytes);
decompress.block.decodeBlockReader(
&ring_buffer,
source_reader,
block_header,
&self.decode_state,
self.frame_context.block_size_max,
&self.literals_buffer,
&self.sequence_buffer,
) catch
return error.MalformedBlock;
if (self.frame_context.content_size) |size| {
if (self.current_frame_decompressed_size > size) return error.MalformedFrame;
}
const size = ring_buffer.len();
self.current_frame_decompressed_size += size;
if (self.frame_context.hasher_opt) |*hasher| {
if (size > 0) {
const written_slice = ring_buffer.sliceLast(size);
hasher.update(written_slice.first);
hasher.update(written_slice.second);
}
}
if (block_header.last_block) {
self.state = .LastBlock;
if (self.frame_context.has_checksum) {
const checksum = source_reader.readInt(u32, .little) catch
return error.MalformedFrame;
if (self.verify_checksum) {
if (self.frame_context.hasher_opt) |*hasher| {
if (checksum != decompress.computeChecksum(hasher))
return error.ChecksumFailure;
}
}
}
if (self.frame_context.content_size) |content_size| {
if (content_size != self.current_frame_decompressed_size) {
return error.MalformedFrame;
}
}
}
}
const size = @min(ring_buffer.len(), buffer.len);
if (size > 0) {
ring_buffer.readFirstAssumeLength(buffer, size);
}
if (self.state == .LastBlock and ring_buffer.len() == 0) {
self.state = .NewFrame;
}
return size;
}
};
}
pub fn decompressor(reader: anytype, options: DecompressorOptions) Decompressor(@TypeOf(reader)) {
return Decompressor(@TypeOf(reader)).init(reader, options);
}
fn testDecompress(data: []const u8) ![]u8 {
const window_buffer = try std.testing.allocator.alloc(u8, 1 << 23);
defer std.testing.allocator.free(window_buffer);
var in_stream = std.io.fixedBufferStream(data);
var zstd_stream = decompressor(in_stream.reader(), .{ .window_buffer = window_buffer });
const result = zstd_stream.reader().readAllAlloc(std.testing.allocator, std.math.maxInt(usize));
return result;
}
fn testReader(data: []const u8, comptime expected: []const u8) !void {
const buf = try testDecompress(data);
defer std.testing.allocator.free(buf);
try std.testing.expectEqualSlices(u8, expected, buf);
}
test "decompression" {
const uncompressed = @embedFile("testdata/rfc8478.txt");
const compressed3 = @embedFile("testdata/rfc8478.txt.zst.3");
const compressed19 = @embedFile("testdata/rfc8478.txt.zst.19");
const buffer = try std.testing.allocator.alloc(u8, uncompressed.len);
defer std.testing.allocator.free(buffer);
const res3 = try decompress.decode(buffer, compressed3, true);
try std.testing.expectEqual(uncompressed.len, res3);
try std.testing.expectEqualSlices(u8, uncompressed, buffer);
@memset(buffer, undefined);
const res19 = try decompress.decode(buffer, compressed19, true);
try std.testing.expectEqual(uncompressed.len, res19);
try std.testing.expectEqualSlices(u8, uncompressed, buffer);
try testReader(compressed3, uncompressed);
try testReader(compressed19, uncompressed);
}
fn expectEqualDecoded(expected: []const u8, input: []const u8) !void {
{
const result = try decompress.decodeAlloc(std.testing.allocator, input, false, 1 << 23);
defer std.testing.allocator.free(result);
try std.testing.expectEqualStrings(expected, result);
}
{
var buffer = try std.testing.allocator.alloc(u8, 2 * expected.len);
defer std.testing.allocator.free(buffer);
const size = try decompress.decode(buffer, input, false);
try std.testing.expectEqualStrings(expected, buffer[0..size]);
}
}
fn expectEqualDecodedStreaming(expected: []const u8, input: []const u8) !void {
const window_buffer = try std.testing.allocator.alloc(u8, 1 << 23);
defer std.testing.allocator.free(window_buffer);
var in_stream = std.io.fixedBufferStream(input);
var stream = decompressor(in_stream.reader(), .{ .window_buffer = window_buffer });
const result = try stream.reader().readAllAlloc(std.testing.allocator, std.math.maxInt(usize));
defer std.testing.allocator.free(result);
try std.testing.expectEqualStrings(expected, result);
}
test "zero sized block" {
const input_raw =
"\x28\xb5\x2f\xfd" ++ // zstandard frame magic number
"\x20\x00" ++ // frame header: only single_segment_flag set, frame_content_size zero
"\x01\x00\x00"; // block header with: last_block set, block_type raw, block_size zero
const input_rle =
"\x28\xb5\x2f\xfd" ++ // zstandard frame magic number
"\x20\x00" ++ // frame header: only single_segment_flag set, frame_content_size zero
"\x03\x00\x00" ++ // block header with: last_block set, block_type rle, block_size zero
"\xaa"; // block_content
try expectEqualDecoded("", input_raw);
try expectEqualDecoded("", input_rle);
try expectEqualDecodedStreaming("", input_raw);
try expectEqualDecodedStreaming("", input_rle);
}
test "declared raw literals size too large" {
const input_raw =
"\x28\xb5\x2f\xfd" ++ // zstandard frame magic number
"\x00\x00" ++ // frame header: everything unset, window descriptor zero
"\x95\x00\x00" ++ // block header with: last_block set, block_type compressed, block_size 18
"\xbc\xf3\xae" ++ // literals section header with: type raw, size_format 3, regenerated_size 716603
"\xa5\x9f\xe3"; // some bytes of literal content - the content is shorter than regenerated_size
// Note that the regenerated_size in the above input is larger than block maximum size, so the
// block can't be valid as it is a raw literals block.
var fbs = std.io.fixedBufferStream(input_raw);
var window: [1024]u8 = undefined;
var stream = decompressor(fbs.reader(), .{ .window_buffer = &window });
var buf: [1024]u8 = undefined;
try std.testing.expectError(error.MalformedBlock, stream.read(&buf));
}
-1149
View File
@@ -1,1149 +0,0 @@
const std = @import("std");
const assert = std.debug.assert;
const RingBuffer = std.RingBuffer;
const types = @import("../types.zig");
const frame = types.frame;
const Table = types.compressed_block.Table;
const LiteralsSection = types.compressed_block.LiteralsSection;
const SequencesSection = types.compressed_block.SequencesSection;
const huffman = @import("huffman.zig");
const readers = @import("../readers.zig");
const decodeFseTable = @import("fse.zig").decodeFseTable;
pub const Error = error{
BlockSizeOverMaximum,
MalformedBlockSize,
ReservedBlock,
MalformedRleBlock,
MalformedCompressedBlock,
};
pub const DecodeState = struct {
repeat_offsets: [3]u32,
offset: StateData(8),
match: StateData(9),
literal: StateData(9),
offset_fse_buffer: []Table.Fse,
match_fse_buffer: []Table.Fse,
literal_fse_buffer: []Table.Fse,
fse_tables_undefined: bool,
literal_stream_reader: readers.ReverseBitReader,
literal_stream_index: usize,
literal_streams: LiteralsSection.Streams,
literal_header: LiteralsSection.Header,
huffman_tree: ?LiteralsSection.HuffmanTree,
literal_written_count: usize,
written_count: usize = 0,
fn StateData(comptime max_accuracy_log: comptime_int) type {
return struct {
state: State,
table: Table,
accuracy_log: u8,
const State = std.meta.Int(.unsigned, max_accuracy_log);
};
}
pub fn init(
literal_fse_buffer: []Table.Fse,
match_fse_buffer: []Table.Fse,
offset_fse_buffer: []Table.Fse,
) DecodeState {
return DecodeState{
.repeat_offsets = .{
types.compressed_block.start_repeated_offset_1,
types.compressed_block.start_repeated_offset_2,
types.compressed_block.start_repeated_offset_3,
},
.offset = undefined,
.match = undefined,
.literal = undefined,
.literal_fse_buffer = literal_fse_buffer,
.match_fse_buffer = match_fse_buffer,
.offset_fse_buffer = offset_fse_buffer,
.fse_tables_undefined = true,
.literal_written_count = 0,
.literal_header = undefined,
.literal_streams = undefined,
.literal_stream_reader = undefined,
.literal_stream_index = undefined,
.huffman_tree = null,
.written_count = 0,
};
}
/// Prepare the decoder to decode a compressed block. Loads the literals
/// stream and Huffman tree from `literals` and reads the FSE tables from
/// `source`.
///
/// Errors returned:
/// - `error.BitStreamHasNoStartBit` if the (reversed) literal bitstream's
/// first byte does not have any bits set
/// - `error.TreelessLiteralsFirst` `literals` is a treeless literals
/// section and the decode state does not have a Huffman tree from a
/// previous block
/// - `error.RepeatModeFirst` on the first call if one of the sequence FSE
/// tables is set to repeat mode
/// - `error.MalformedAccuracyLog` if an FSE table has an invalid accuracy
/// - `error.MalformedFseTable` if there are errors decoding an FSE table
/// - `error.EndOfStream` if `source` ends before all FSE tables are read
pub fn prepare(
self: *DecodeState,
source: anytype,
literals: LiteralsSection,
sequences_header: SequencesSection.Header,
) !void {
self.literal_written_count = 0;
self.literal_header = literals.header;
self.literal_streams = literals.streams;
if (literals.huffman_tree) |tree| {
self.huffman_tree = tree;
} else if (literals.header.block_type == .treeless and self.huffman_tree == null) {
return error.TreelessLiteralsFirst;
}
switch (literals.header.block_type) {
.raw, .rle => {},
.compressed, .treeless => {
self.literal_stream_index = 0;
switch (literals.streams) {
.one => |slice| try self.initLiteralStream(slice),
.four => |streams| try self.initLiteralStream(streams[0]),
}
},
}
if (sequences_header.sequence_count > 0) {
try self.updateFseTable(source, .literal, sequences_header.literal_lengths);
try self.updateFseTable(source, .offset, sequences_header.offsets);
try self.updateFseTable(source, .match, sequences_header.match_lengths);
self.fse_tables_undefined = false;
}
}
/// Read initial FSE states for sequence decoding.
///
/// Errors returned:
/// - `error.EndOfStream` if `bit_reader` does not contain enough bits.
pub fn readInitialFseState(self: *DecodeState, bit_reader: *readers.ReverseBitReader) error{EndOfStream}!void {
self.literal.state = try bit_reader.readBitsNoEof(u9, self.literal.accuracy_log);
self.offset.state = try bit_reader.readBitsNoEof(u8, self.offset.accuracy_log);
self.match.state = try bit_reader.readBitsNoEof(u9, self.match.accuracy_log);
}
fn updateRepeatOffset(self: *DecodeState, offset: u32) void {
self.repeat_offsets[2] = self.repeat_offsets[1];
self.repeat_offsets[1] = self.repeat_offsets[0];
self.repeat_offsets[0] = offset;
}
fn useRepeatOffset(self: *DecodeState, index: usize) u32 {
if (index == 1)
std.mem.swap(u32, &self.repeat_offsets[0], &self.repeat_offsets[1])
else if (index == 2) {
std.mem.swap(u32, &self.repeat_offsets[0], &self.repeat_offsets[2]);
std.mem.swap(u32, &self.repeat_offsets[1], &self.repeat_offsets[2]);
}
return self.repeat_offsets[0];
}
const DataType = enum { offset, match, literal };
fn updateState(
self: *DecodeState,
comptime choice: DataType,
bit_reader: *readers.ReverseBitReader,
) error{ MalformedFseBits, EndOfStream }!void {
switch (@field(self, @tagName(choice)).table) {
.rle => {},
.fse => |table| {
const data = table[@field(self, @tagName(choice)).state];
const T = @TypeOf(@field(self, @tagName(choice))).State;
const bits_summand = try bit_reader.readBitsNoEof(T, data.bits);
const next_state = std.math.cast(
@TypeOf(@field(self, @tagName(choice))).State,
data.baseline + bits_summand,
) orelse return error.MalformedFseBits;
@field(self, @tagName(choice)).state = next_state;
},
}
}
const FseTableError = error{
MalformedFseTable,
MalformedAccuracyLog,
RepeatModeFirst,
EndOfStream,
};
fn updateFseTable(
self: *DecodeState,
source: anytype,
comptime choice: DataType,
mode: SequencesSection.Header.Mode,
) !void {
const field_name = @tagName(choice);
switch (mode) {
.predefined => {
@field(self, field_name).accuracy_log =
@field(types.compressed_block.default_accuracy_log, field_name);
@field(self, field_name).table =
@field(types.compressed_block, "predefined_" ++ field_name ++ "_fse_table");
},
.rle => {
@field(self, field_name).accuracy_log = 0;
@field(self, field_name).table = .{ .rle = try source.readByte() };
},
.fse => {
var bit_reader = readers.bitReader(source);
const table_size = try decodeFseTable(
&bit_reader,
@field(types.compressed_block.table_symbol_count_max, field_name),
@field(types.compressed_block.table_accuracy_log_max, field_name),
@field(self, field_name ++ "_fse_buffer"),
);
@field(self, field_name).table = .{
.fse = @field(self, field_name ++ "_fse_buffer")[0..table_size],
};
@field(self, field_name).accuracy_log = std.math.log2_int_ceil(usize, table_size);
},
.repeat => if (self.fse_tables_undefined) return error.RepeatModeFirst,
}
}
const Sequence = struct {
literal_length: u32,
match_length: u32,
offset: u32,
};
fn nextSequence(
self: *DecodeState,
bit_reader: *readers.ReverseBitReader,
) error{ InvalidBitStream, EndOfStream }!Sequence {
const raw_code = self.getCode(.offset);
const offset_code = std.math.cast(u5, raw_code) orelse {
return error.InvalidBitStream;
};
const offset_value = (@as(u32, 1) << offset_code) + try bit_reader.readBitsNoEof(u32, offset_code);
const match_code = self.getCode(.match);
if (match_code >= types.compressed_block.match_length_code_table.len)
return error.InvalidBitStream;
const match = types.compressed_block.match_length_code_table[match_code];
const match_length = match[0] + try bit_reader.readBitsNoEof(u32, match[1]);
const literal_code = self.getCode(.literal);
if (literal_code >= types.compressed_block.literals_length_code_table.len)
return error.InvalidBitStream;
const literal = types.compressed_block.literals_length_code_table[literal_code];
const literal_length = literal[0] + try bit_reader.readBitsNoEof(u32, literal[1]);
const offset = if (offset_value > 3) offset: {
const offset = offset_value - 3;
self.updateRepeatOffset(offset);
break :offset offset;
} else offset: {
if (literal_length == 0) {
if (offset_value == 3) {
const offset = self.repeat_offsets[0] - 1;
self.updateRepeatOffset(offset);
break :offset offset;
}
break :offset self.useRepeatOffset(offset_value);
}
break :offset self.useRepeatOffset(offset_value - 1);
};
if (offset == 0) return error.InvalidBitStream;
return .{
.literal_length = literal_length,
.match_length = match_length,
.offset = offset,
};
}
fn executeSequenceSlice(
self: *DecodeState,
dest: []u8,
write_pos: usize,
sequence: Sequence,
) (error{MalformedSequence} || DecodeLiteralsError)!void {
if (sequence.offset > write_pos + sequence.literal_length) return error.MalformedSequence;
try self.decodeLiteralsSlice(dest[write_pos..], sequence.literal_length);
const copy_start = write_pos + sequence.literal_length - sequence.offset;
for (
dest[write_pos + sequence.literal_length ..][0..sequence.match_length],
dest[copy_start..][0..sequence.match_length],
) |*d, s| d.* = s;
self.written_count += sequence.match_length;
}
fn executeSequenceRingBuffer(
self: *DecodeState,
dest: *RingBuffer,
sequence: Sequence,
) (error{MalformedSequence} || DecodeLiteralsError)!void {
if (sequence.offset > @min(dest.data.len, self.written_count + sequence.literal_length))
return error.MalformedSequence;
try self.decodeLiteralsRingBuffer(dest, sequence.literal_length);
const copy_start = dest.write_index + dest.data.len - sequence.offset;
const copy_slice = dest.sliceAt(copy_start, sequence.match_length);
dest.writeSliceForwardsAssumeCapacity(copy_slice.first);
dest.writeSliceForwardsAssumeCapacity(copy_slice.second);
self.written_count += sequence.match_length;
}
const DecodeSequenceError = error{
InvalidBitStream,
EndOfStream,
MalformedSequence,
MalformedFseBits,
} || DecodeLiteralsError;
/// Decode one sequence from `bit_reader` into `dest`, written starting at
/// `write_pos` and update FSE states if `last_sequence` is `false`.
/// `prepare()` must be called for the block before attempting to decode
/// sequences.
///
/// Errors returned:
/// - `error.MalformedSequence` if the decompressed sequence would be
/// longer than `sequence_size_limit` or the sequence's offset is too
/// large
/// - `error.UnexpectedEndOfLiteralStream` if the decoder state's literal
/// streams do not contain enough literals for the sequence (this may
/// mean the literal stream or the sequence is malformed).
/// - `error.InvalidBitStream` if the FSE sequence bitstream is malformed
/// - `error.EndOfStream` if `bit_reader` does not contain enough bits
/// - `error.DestTooSmall` if `dest` is not large enough to holde the
/// decompressed sequence
pub fn decodeSequenceSlice(
self: *DecodeState,
dest: []u8,
write_pos: usize,
bit_reader: *readers.ReverseBitReader,
sequence_size_limit: usize,
last_sequence: bool,
) (error{DestTooSmall} || DecodeSequenceError)!usize {
const sequence = try self.nextSequence(bit_reader);
const sequence_length = @as(usize, sequence.literal_length) + sequence.match_length;
if (sequence_length > sequence_size_limit) return error.MalformedSequence;
if (sequence_length > dest[write_pos..].len) return error.DestTooSmall;
try self.executeSequenceSlice(dest, write_pos, sequence);
if (!last_sequence) {
try self.updateState(.literal, bit_reader);
try self.updateState(.match, bit_reader);
try self.updateState(.offset, bit_reader);
}
return sequence_length;
}
/// Decode one sequence from `bit_reader` into `dest`; see
/// `decodeSequenceSlice`.
pub fn decodeSequenceRingBuffer(
self: *DecodeState,
dest: *RingBuffer,
bit_reader: anytype,
sequence_size_limit: usize,
last_sequence: bool,
) DecodeSequenceError!usize {
const sequence = try self.nextSequence(bit_reader);
const sequence_length = @as(usize, sequence.literal_length) + sequence.match_length;
if (sequence_length > sequence_size_limit) return error.MalformedSequence;
try self.executeSequenceRingBuffer(dest, sequence);
if (!last_sequence) {
try self.updateState(.literal, bit_reader);
try self.updateState(.match, bit_reader);
try self.updateState(.offset, bit_reader);
}
return sequence_length;
}
fn nextLiteralMultiStream(
self: *DecodeState,
) error{BitStreamHasNoStartBit}!void {
self.literal_stream_index += 1;
try self.initLiteralStream(self.literal_streams.four[self.literal_stream_index]);
}
fn initLiteralStream(self: *DecodeState, bytes: []const u8) error{BitStreamHasNoStartBit}!void {
try self.literal_stream_reader.init(bytes);
}
fn isLiteralStreamEmpty(self: *DecodeState) bool {
switch (self.literal_streams) {
.one => return self.literal_stream_reader.isEmpty(),
.four => return self.literal_stream_index == 3 and self.literal_stream_reader.isEmpty(),
}
}
const LiteralBitsError = error{
BitStreamHasNoStartBit,
UnexpectedEndOfLiteralStream,
};
fn readLiteralsBits(
self: *DecodeState,
bit_count_to_read: u16,
) LiteralBitsError!u16 {
return self.literal_stream_reader.readBitsNoEof(u16, bit_count_to_read) catch bits: {
if (self.literal_streams == .four and self.literal_stream_index < 3) {
try self.nextLiteralMultiStream();
break :bits self.literal_stream_reader.readBitsNoEof(u16, bit_count_to_read) catch
return error.UnexpectedEndOfLiteralStream;
} else {
return error.UnexpectedEndOfLiteralStream;
}
};
}
const DecodeLiteralsError = error{
MalformedLiteralsLength,
NotFound,
} || LiteralBitsError;
/// Decode `len` bytes of literals into `dest`.
///
/// Errors returned:
/// - `error.MalformedLiteralsLength` if the number of literal bytes
/// decoded by `self` plus `len` is greater than the regenerated size of
/// `literals`
/// - `error.UnexpectedEndOfLiteralStream` and `error.NotFound` if there
/// are problems decoding Huffman compressed literals
pub fn decodeLiteralsSlice(
self: *DecodeState,
dest: []u8,
len: usize,
) DecodeLiteralsError!void {
if (self.literal_written_count + len > self.literal_header.regenerated_size)
return error.MalformedLiteralsLength;
switch (self.literal_header.block_type) {
.raw => {
const literal_data = self.literal_streams.one[self.literal_written_count..][0..len];
@memcpy(dest[0..len], literal_data);
self.literal_written_count += len;
self.written_count += len;
},
.rle => {
for (0..len) |i| {
dest[i] = self.literal_streams.one[0];
}
self.literal_written_count += len;
self.written_count += len;
},
.compressed, .treeless => {
// const written_bytes_per_stream = (literals.header.regenerated_size + 3) / 4;
const huffman_tree = self.huffman_tree orelse unreachable;
const max_bit_count = huffman_tree.max_bit_count;
const starting_bit_count = LiteralsSection.HuffmanTree.weightToBitCount(
huffman_tree.nodes[huffman_tree.symbol_count_minus_one].weight,
max_bit_count,
);
var bits_read: u4 = 0;
var huffman_tree_index: usize = huffman_tree.symbol_count_minus_one;
var bit_count_to_read: u4 = starting_bit_count;
for (0..len) |i| {
var prefix: u16 = 0;
while (true) {
const new_bits = self.readLiteralsBits(bit_count_to_read) catch |err| {
return err;
};
prefix <<= bit_count_to_read;
prefix |= new_bits;
bits_read += bit_count_to_read;
const result = huffman_tree.query(huffman_tree_index, prefix) catch |err| {
return err;
};
switch (result) {
.symbol => |sym| {
dest[i] = sym;
bit_count_to_read = starting_bit_count;
bits_read = 0;
huffman_tree_index = huffman_tree.symbol_count_minus_one;
break;
},
.index => |index| {
huffman_tree_index = index;
const bit_count = LiteralsSection.HuffmanTree.weightToBitCount(
huffman_tree.nodes[index].weight,
max_bit_count,
);
bit_count_to_read = bit_count - bits_read;
},
}
}
}
self.literal_written_count += len;
self.written_count += len;
},
}
}
/// Decode literals into `dest`; see `decodeLiteralsSlice()`.
pub fn decodeLiteralsRingBuffer(
self: *DecodeState,
dest: *RingBuffer,
len: usize,
) DecodeLiteralsError!void {
if (self.literal_written_count + len > self.literal_header.regenerated_size)
return error.MalformedLiteralsLength;
switch (self.literal_header.block_type) {
.raw => {
const literals_end = self.literal_written_count + len;
const literal_data = self.literal_streams.one[self.literal_written_count..literals_end];
dest.writeSliceAssumeCapacity(literal_data);
self.literal_written_count += len;
self.written_count += len;
},
.rle => {
for (0..len) |_| {
dest.writeAssumeCapacity(self.literal_streams.one[0]);
}
self.literal_written_count += len;
self.written_count += len;
},
.compressed, .treeless => {
// const written_bytes_per_stream = (literals.header.regenerated_size + 3) / 4;
const huffman_tree = self.huffman_tree orelse unreachable;
const max_bit_count = huffman_tree.max_bit_count;
const starting_bit_count = LiteralsSection.HuffmanTree.weightToBitCount(
huffman_tree.nodes[huffman_tree.symbol_count_minus_one].weight,
max_bit_count,
);
var bits_read: u4 = 0;
var huffman_tree_index: usize = huffman_tree.symbol_count_minus_one;
var bit_count_to_read: u4 = starting_bit_count;
for (0..len) |_| {
var prefix: u16 = 0;
while (true) {
const new_bits = try self.readLiteralsBits(bit_count_to_read);
prefix <<= bit_count_to_read;
prefix |= new_bits;
bits_read += bit_count_to_read;
const result = try huffman_tree.query(huffman_tree_index, prefix);
switch (result) {
.symbol => |sym| {
dest.writeAssumeCapacity(sym);
bit_count_to_read = starting_bit_count;
bits_read = 0;
huffman_tree_index = huffman_tree.symbol_count_minus_one;
break;
},
.index => |index| {
huffman_tree_index = index;
const bit_count = LiteralsSection.HuffmanTree.weightToBitCount(
huffman_tree.nodes[index].weight,
max_bit_count,
);
bit_count_to_read = bit_count - bits_read;
},
}
}
}
self.literal_written_count += len;
self.written_count += len;
},
}
}
fn getCode(self: *DecodeState, comptime choice: DataType) u32 {
return switch (@field(self, @tagName(choice)).table) {
.rle => |value| value,
.fse => |table| table[@field(self, @tagName(choice)).state].symbol,
};
}
};
/// Decode a single block from `src` into `dest`. The beginning of `src` must be
/// the start of the block content (i.e. directly after the block header).
/// Increments `consumed_count` by the number of bytes read from `src` to decode
/// the block and returns the decompressed size of the block.
///
/// Errors returned:
///
/// - `error.BlockSizeOverMaximum` if block's size is larger than 1 << 17 or
/// `dest[written_count..].len`
/// - `error.MalformedBlockSize` if `src.len` is smaller than the block size
/// and the block is a raw or compressed block
/// - `error.ReservedBlock` if the block is a reserved block
/// - `error.MalformedRleBlock` if the block is an RLE block and `src.len < 1`
/// - `error.MalformedCompressedBlock` if there are errors decoding a
/// compressed block
/// - `error.DestTooSmall` is `dest` is not large enough to hold the
/// decompressed block
pub fn decodeBlock(
dest: []u8,
src: []const u8,
block_header: frame.Zstandard.Block.Header,
decode_state: *DecodeState,
consumed_count: *usize,
block_size_max: usize,
written_count: usize,
) (error{DestTooSmall} || Error)!usize {
const block_size = block_header.block_size;
if (block_size_max < block_size) return error.BlockSizeOverMaximum;
switch (block_header.block_type) {
.raw => {
if (src.len < block_size) return error.MalformedBlockSize;
if (dest[written_count..].len < block_size) return error.DestTooSmall;
@memcpy(dest[written_count..][0..block_size], src[0..block_size]);
consumed_count.* += block_size;
decode_state.written_count += block_size;
return block_size;
},
.rle => {
if (src.len < 1) return error.MalformedRleBlock;
if (dest[written_count..].len < block_size) return error.DestTooSmall;
for (written_count..block_size + written_count) |write_pos| {
dest[write_pos] = src[0];
}
consumed_count.* += 1;
decode_state.written_count += block_size;
return block_size;
},
.compressed => {
if (src.len < block_size) return error.MalformedBlockSize;
var bytes_read: usize = 0;
const literals = decodeLiteralsSectionSlice(src[0..block_size], &bytes_read) catch
return error.MalformedCompressedBlock;
var fbs = std.io.fixedBufferStream(src[bytes_read..block_size]);
const fbs_reader = fbs.reader();
const sequences_header = decodeSequencesHeader(fbs_reader) catch
return error.MalformedCompressedBlock;
decode_state.prepare(fbs_reader, literals, sequences_header) catch
return error.MalformedCompressedBlock;
bytes_read += fbs.pos;
var bytes_written: usize = 0;
{
const bit_stream_bytes = src[bytes_read..block_size];
var bit_stream: readers.ReverseBitReader = undefined;
bit_stream.init(bit_stream_bytes) catch return error.MalformedCompressedBlock;
if (sequences_header.sequence_count > 0) {
decode_state.readInitialFseState(&bit_stream) catch
return error.MalformedCompressedBlock;
var sequence_size_limit = block_size_max;
for (0..sequences_header.sequence_count) |i| {
const write_pos = written_count + bytes_written;
const decompressed_size = decode_state.decodeSequenceSlice(
dest,
write_pos,
&bit_stream,
sequence_size_limit,
i == sequences_header.sequence_count - 1,
) catch |err| switch (err) {
error.DestTooSmall => return error.DestTooSmall,
else => return error.MalformedCompressedBlock,
};
bytes_written += decompressed_size;
sequence_size_limit -= decompressed_size;
}
}
if (!bit_stream.isEmpty()) {
return error.MalformedCompressedBlock;
}
}
if (decode_state.literal_written_count < literals.header.regenerated_size) {
const len = literals.header.regenerated_size - decode_state.literal_written_count;
if (len > dest[written_count + bytes_written ..].len) return error.DestTooSmall;
decode_state.decodeLiteralsSlice(dest[written_count + bytes_written ..], len) catch
return error.MalformedCompressedBlock;
bytes_written += len;
}
switch (decode_state.literal_header.block_type) {
.treeless, .compressed => {
if (!decode_state.isLiteralStreamEmpty()) return error.MalformedCompressedBlock;
},
.raw, .rle => {},
}
consumed_count.* += block_size;
return bytes_written;
},
.reserved => return error.ReservedBlock,
}
}
/// Decode a single block from `src` into `dest`; see `decodeBlock()`. Returns
/// the size of the decompressed block, which can be used with `dest.sliceLast()`
/// to get the decompressed bytes. `error.BlockSizeOverMaximum` is returned if
/// the block's compressed or decompressed size is larger than `block_size_max`.
pub fn decodeBlockRingBuffer(
dest: *RingBuffer,
src: []const u8,
block_header: frame.Zstandard.Block.Header,
decode_state: *DecodeState,
consumed_count: *usize,
block_size_max: usize,
) Error!usize {
const block_size = block_header.block_size;
if (block_size_max < block_size) return error.BlockSizeOverMaximum;
switch (block_header.block_type) {
.raw => {
if (src.len < block_size) return error.MalformedBlockSize;
// dest may have length zero if block_size == 0, causing division by zero in
// writeSliceAssumeCapacity()
if (block_size > 0) {
const data = src[0..block_size];
dest.writeSliceAssumeCapacity(data);
consumed_count.* += block_size;
decode_state.written_count += block_size;
}
return block_size;
},
.rle => {
if (src.len < 1) return error.MalformedRleBlock;
for (0..block_size) |_| {
dest.writeAssumeCapacity(src[0]);
}
consumed_count.* += 1;
decode_state.written_count += block_size;
return block_size;
},
.compressed => {
if (src.len < block_size) return error.MalformedBlockSize;
var bytes_read: usize = 0;
const literals = decodeLiteralsSectionSlice(src[0..block_size], &bytes_read) catch
return error.MalformedCompressedBlock;
var fbs = std.io.fixedBufferStream(src[bytes_read..block_size]);
const fbs_reader = fbs.reader();
const sequences_header = decodeSequencesHeader(fbs_reader) catch
return error.MalformedCompressedBlock;
decode_state.prepare(fbs_reader, literals, sequences_header) catch
return error.MalformedCompressedBlock;
bytes_read += fbs.pos;
var bytes_written: usize = 0;
{
const bit_stream_bytes = src[bytes_read..block_size];
var bit_stream: readers.ReverseBitReader = undefined;
bit_stream.init(bit_stream_bytes) catch return error.MalformedCompressedBlock;
if (sequences_header.sequence_count > 0) {
decode_state.readInitialFseState(&bit_stream) catch
return error.MalformedCompressedBlock;
var sequence_size_limit = block_size_max;
for (0..sequences_header.sequence_count) |i| {
const decompressed_size = decode_state.decodeSequenceRingBuffer(
dest,
&bit_stream,
sequence_size_limit,
i == sequences_header.sequence_count - 1,
) catch return error.MalformedCompressedBlock;
bytes_written += decompressed_size;
sequence_size_limit -= decompressed_size;
}
}
if (!bit_stream.isEmpty()) {
return error.MalformedCompressedBlock;
}
}
if (decode_state.literal_written_count < literals.header.regenerated_size) {
const len = literals.header.regenerated_size - decode_state.literal_written_count;
decode_state.decodeLiteralsRingBuffer(dest, len) catch
return error.MalformedCompressedBlock;
bytes_written += len;
}
switch (decode_state.literal_header.block_type) {
.treeless, .compressed => {
if (!decode_state.isLiteralStreamEmpty()) return error.MalformedCompressedBlock;
},
.raw, .rle => {},
}
consumed_count.* += block_size;
if (bytes_written > block_size_max) return error.BlockSizeOverMaximum;
return bytes_written;
},
.reserved => return error.ReservedBlock,
}
}
/// Decode a single block from `source` into `dest`. Literal and sequence data
/// from the block is copied into `literals_buffer` and `sequence_buffer`, which
/// must be large enough or `error.LiteralsBufferTooSmall` and
/// `error.SequenceBufferTooSmall` are returned (the maximum block size is an
/// upper bound for the size of both buffers). See `decodeBlock`
/// and `decodeBlockRingBuffer` for function that can decode a block without
/// these extra copies. `error.EndOfStream` is returned if `source` does not
/// contain enough bytes.
pub fn decodeBlockReader(
dest: *RingBuffer,
source: anytype,
block_header: frame.Zstandard.Block.Header,
decode_state: *DecodeState,
block_size_max: usize,
literals_buffer: []u8,
sequence_buffer: []u8,
) !void {
const block_size = block_header.block_size;
var block_reader_limited = std.io.limitedReader(source, block_size);
const block_reader = block_reader_limited.reader();
if (block_size_max < block_size) return error.BlockSizeOverMaximum;
switch (block_header.block_type) {
.raw => {
if (block_size == 0) return;
const slice = dest.sliceAt(dest.write_index, block_size);
try source.readNoEof(slice.first);
try source.readNoEof(slice.second);
dest.write_index = dest.mask2(dest.write_index + block_size);
decode_state.written_count += block_size;
},
.rle => {
const byte = try source.readByte();
for (0..block_size) |_| {
dest.writeAssumeCapacity(byte);
}
decode_state.written_count += block_size;
},
.compressed => {
const literals = try decodeLiteralsSection(block_reader, literals_buffer);
const sequences_header = try decodeSequencesHeader(block_reader);
try decode_state.prepare(block_reader, literals, sequences_header);
var bytes_written: usize = 0;
{
const size = try block_reader.readAll(sequence_buffer);
var bit_stream: readers.ReverseBitReader = undefined;
try bit_stream.init(sequence_buffer[0..size]);
if (sequences_header.sequence_count > 0) {
if (sequence_buffer.len < block_reader_limited.bytes_left)
return error.SequenceBufferTooSmall;
decode_state.readInitialFseState(&bit_stream) catch
return error.MalformedCompressedBlock;
var sequence_size_limit = block_size_max;
for (0..sequences_header.sequence_count) |i| {
const decompressed_size = decode_state.decodeSequenceRingBuffer(
dest,
&bit_stream,
sequence_size_limit,
i == sequences_header.sequence_count - 1,
) catch return error.MalformedCompressedBlock;
sequence_size_limit -= decompressed_size;
bytes_written += decompressed_size;
}
}
if (!bit_stream.isEmpty()) {
return error.MalformedCompressedBlock;
}
}
if (decode_state.literal_written_count < literals.header.regenerated_size) {
const len = literals.header.regenerated_size - decode_state.literal_written_count;
decode_state.decodeLiteralsRingBuffer(dest, len) catch
return error.MalformedCompressedBlock;
bytes_written += len;
}
switch (decode_state.literal_header.block_type) {
.treeless, .compressed => {
if (!decode_state.isLiteralStreamEmpty()) return error.MalformedCompressedBlock;
},
.raw, .rle => {},
}
if (bytes_written > block_size_max) return error.BlockSizeOverMaximum;
if (block_reader_limited.bytes_left != 0) return error.MalformedCompressedBlock;
decode_state.literal_written_count = 0;
},
.reserved => return error.ReservedBlock,
}
}
/// Decode the header of a block.
pub fn decodeBlockHeader(src: *const [3]u8) frame.Zstandard.Block.Header {
const last_block = src[0] & 1 == 1;
const block_type = @as(frame.Zstandard.Block.Type, @enumFromInt((src[0] & 0b110) >> 1));
const block_size = ((src[0] & 0b11111000) >> 3) + (@as(u21, src[1]) << 5) + (@as(u21, src[2]) << 13);
return .{
.last_block = last_block,
.block_type = block_type,
.block_size = block_size,
};
}
/// Decode the header of a block.
///
/// Errors returned:
/// - `error.EndOfStream` if `src.len < 3`
pub fn decodeBlockHeaderSlice(src: []const u8) error{EndOfStream}!frame.Zstandard.Block.Header {
if (src.len < 3) return error.EndOfStream;
return decodeBlockHeader(src[0..3]);
}
/// Decode a `LiteralsSection` from `src`, incrementing `consumed_count` by the
/// number of bytes the section uses.
///
/// Errors returned:
/// - `error.MalformedLiteralsHeader` if the header is invalid
/// - `error.MalformedLiteralsSection` if there are decoding errors
/// - `error.MalformedAccuracyLog` if compressed literals have invalid
/// accuracy
/// - `error.MalformedFseTable` if compressed literals have invalid FSE table
/// - `error.MalformedHuffmanTree` if there are errors decoding a Huffamn tree
/// - `error.EndOfStream` if there are not enough bytes in `src`
pub fn decodeLiteralsSectionSlice(
src: []const u8,
consumed_count: *usize,
) (error{ MalformedLiteralsHeader, MalformedLiteralsSection, EndOfStream } || huffman.Error)!LiteralsSection {
var bytes_read: usize = 0;
const header = header: {
var fbs = std.io.fixedBufferStream(src);
defer bytes_read = fbs.pos;
break :header decodeLiteralsHeader(fbs.reader()) catch return error.MalformedLiteralsHeader;
};
switch (header.block_type) {
.raw => {
if (src.len < bytes_read + header.regenerated_size) return error.MalformedLiteralsSection;
const stream = src[bytes_read..][0..header.regenerated_size];
consumed_count.* += header.regenerated_size + bytes_read;
return LiteralsSection{
.header = header,
.huffman_tree = null,
.streams = .{ .one = stream },
};
},
.rle => {
if (src.len < bytes_read + 1) return error.MalformedLiteralsSection;
const stream = src[bytes_read..][0..1];
consumed_count.* += 1 + bytes_read;
return LiteralsSection{
.header = header,
.huffman_tree = null,
.streams = .{ .one = stream },
};
},
.compressed, .treeless => {
const huffman_tree_start = bytes_read;
const huffman_tree = if (header.block_type == .compressed)
try huffman.decodeHuffmanTreeSlice(src[bytes_read..], &bytes_read)
else
null;
const huffman_tree_size = bytes_read - huffman_tree_start;
const total_streams_size = std.math.sub(usize, header.compressed_size.?, huffman_tree_size) catch
return error.MalformedLiteralsSection;
if (src.len < bytes_read + total_streams_size) return error.MalformedLiteralsSection;
const stream_data = src[bytes_read .. bytes_read + total_streams_size];
const streams = try decodeStreams(header.size_format, stream_data);
consumed_count.* += bytes_read + total_streams_size;
return LiteralsSection{
.header = header,
.huffman_tree = huffman_tree,
.streams = streams,
};
},
}
}
/// Decode a `LiteralsSection` from `src`, incrementing `consumed_count` by the
/// number of bytes the section uses. See `decodeLiterasSectionSlice()`.
pub fn decodeLiteralsSection(
source: anytype,
buffer: []u8,
) !LiteralsSection {
const header = try decodeLiteralsHeader(source);
switch (header.block_type) {
.raw => {
if (buffer.len < header.regenerated_size) return error.LiteralsBufferTooSmall;
try source.readNoEof(buffer[0..header.regenerated_size]);
return LiteralsSection{
.header = header,
.huffman_tree = null,
.streams = .{ .one = buffer },
};
},
.rle => {
buffer[0] = try source.readByte();
return LiteralsSection{
.header = header,
.huffman_tree = null,
.streams = .{ .one = buffer[0..1] },
};
},
.compressed, .treeless => {
var counting_reader = std.io.countingReader(source);
const huffman_tree = if (header.block_type == .compressed)
try huffman.decodeHuffmanTree(counting_reader.reader(), buffer)
else
null;
const huffman_tree_size = @as(usize, @intCast(counting_reader.bytes_read));
const total_streams_size = std.math.sub(usize, header.compressed_size.?, huffman_tree_size) catch
return error.MalformedLiteralsSection;
if (total_streams_size > buffer.len) return error.LiteralsBufferTooSmall;
try source.readNoEof(buffer[0..total_streams_size]);
const stream_data = buffer[0..total_streams_size];
const streams = try decodeStreams(header.size_format, stream_data);
return LiteralsSection{
.header = header,
.huffman_tree = huffman_tree,
.streams = streams,
};
},
}
}
fn decodeStreams(size_format: u2, stream_data: []const u8) !LiteralsSection.Streams {
if (size_format == 0) {
return .{ .one = stream_data };
}
if (stream_data.len < 6) return error.MalformedLiteralsSection;
const stream_1_length: usize = std.mem.readInt(u16, stream_data[0..2], .little);
const stream_2_length: usize = std.mem.readInt(u16, stream_data[2..4], .little);
const stream_3_length: usize = std.mem.readInt(u16, stream_data[4..6], .little);
const stream_1_start = 6;
const stream_2_start = stream_1_start + stream_1_length;
const stream_3_start = stream_2_start + stream_2_length;
const stream_4_start = stream_3_start + stream_3_length;
if (stream_data.len < stream_4_start) return error.MalformedLiteralsSection;
return .{ .four = .{
stream_data[stream_1_start .. stream_1_start + stream_1_length],
stream_data[stream_2_start .. stream_2_start + stream_2_length],
stream_data[stream_3_start .. stream_3_start + stream_3_length],
stream_data[stream_4_start..],
} };
}
/// Decode a literals section header.
///
/// Errors returned:
/// - `error.EndOfStream` if there are not enough bytes in `source`
pub fn decodeLiteralsHeader(source: anytype) !LiteralsSection.Header {
const byte0 = try source.readByte();
const block_type = @as(LiteralsSection.BlockType, @enumFromInt(byte0 & 0b11));
const size_format = @as(u2, @intCast((byte0 & 0b1100) >> 2));
var regenerated_size: u20 = undefined;
var compressed_size: ?u18 = null;
switch (block_type) {
.raw, .rle => {
switch (size_format) {
0, 2 => {
regenerated_size = byte0 >> 3;
},
1 => regenerated_size = (byte0 >> 4) + (@as(u20, try source.readByte()) << 4),
3 => regenerated_size = (byte0 >> 4) +
(@as(u20, try source.readByte()) << 4) +
(@as(u20, try source.readByte()) << 12),
}
},
.compressed, .treeless => {
const byte1 = try source.readByte();
const byte2 = try source.readByte();
switch (size_format) {
0, 1 => {
regenerated_size = (byte0 >> 4) + ((@as(u20, byte1) & 0b00111111) << 4);
compressed_size = ((byte1 & 0b11000000) >> 6) + (@as(u18, byte2) << 2);
},
2 => {
const byte3 = try source.readByte();
regenerated_size = (byte0 >> 4) + (@as(u20, byte1) << 4) + ((@as(u20, byte2) & 0b00000011) << 12);
compressed_size = ((byte2 & 0b11111100) >> 2) + (@as(u18, byte3) << 6);
},
3 => {
const byte3 = try source.readByte();
const byte4 = try source.readByte();
regenerated_size = (byte0 >> 4) + (@as(u20, byte1) << 4) + ((@as(u20, byte2) & 0b00111111) << 12);
compressed_size = ((byte2 & 0b11000000) >> 6) + (@as(u18, byte3) << 2) + (@as(u18, byte4) << 10);
},
}
},
}
return LiteralsSection.Header{
.block_type = block_type,
.size_format = size_format,
.regenerated_size = regenerated_size,
.compressed_size = compressed_size,
};
}
/// Decode a sequences section header.
///
/// Errors returned:
/// - `error.ReservedBitSet` if the reserved bit is set
/// - `error.EndOfStream` if there are not enough bytes in `source`
pub fn decodeSequencesHeader(
source: anytype,
) !SequencesSection.Header {
var sequence_count: u24 = undefined;
const byte0 = try source.readByte();
if (byte0 == 0) {
return SequencesSection.Header{
.sequence_count = 0,
.offsets = undefined,
.match_lengths = undefined,
.literal_lengths = undefined,
};
} else if (byte0 < 128) {
sequence_count = byte0;
} else if (byte0 < 255) {
sequence_count = (@as(u24, (byte0 - 128)) << 8) + try source.readByte();
} else {
sequence_count = (try source.readByte()) + (@as(u24, try source.readByte()) << 8) + 0x7F00;
}
const compression_modes = try source.readByte();
const matches_mode = @as(SequencesSection.Header.Mode, @enumFromInt((compression_modes & 0b00001100) >> 2));
const offsets_mode = @as(SequencesSection.Header.Mode, @enumFromInt((compression_modes & 0b00110000) >> 4));
const literal_mode = @as(SequencesSection.Header.Mode, @enumFromInt((compression_modes & 0b11000000) >> 6));
if (compression_modes & 0b11 != 0) return error.ReservedBitSet;
return SequencesSection.Header{
.sequence_count = sequence_count,
.offsets = offsets_mode,
.match_lengths = matches_mode,
.literal_lengths = literal_mode,
};
}
-153
View File
@@ -1,153 +0,0 @@
const std = @import("std");
const assert = std.debug.assert;
const types = @import("../types.zig");
const Table = types.compressed_block.Table;
pub fn decodeFseTable(
bit_reader: anytype,
expected_symbol_count: usize,
max_accuracy_log: u4,
entries: []Table.Fse,
) !usize {
const accuracy_log_biased = try bit_reader.readBitsNoEof(u4, 4);
if (accuracy_log_biased > max_accuracy_log -| 5) return error.MalformedAccuracyLog;
const accuracy_log = accuracy_log_biased + 5;
var values: [256]u16 = undefined;
var value_count: usize = 0;
const total_probability = @as(u16, 1) << accuracy_log;
var accumulated_probability: u16 = 0;
while (accumulated_probability < total_probability) {
// WARNING: The RFC is poorly worded, and would suggest std.math.log2_int_ceil is correct here,
// but power of two (remaining probabilities + 1) need max bits set to 1 more.
const max_bits = std.math.log2_int(u16, total_probability - accumulated_probability + 1) + 1;
const small = try bit_reader.readBitsNoEof(u16, max_bits - 1);
const cutoff = (@as(u16, 1) << max_bits) - 1 - (total_probability - accumulated_probability + 1);
const value = if (small < cutoff)
small
else value: {
const value_read = small + (try bit_reader.readBitsNoEof(u16, 1) << (max_bits - 1));
break :value if (value_read < @as(u16, 1) << (max_bits - 1))
value_read
else
value_read - cutoff;
};
accumulated_probability += if (value != 0) value - 1 else 1;
values[value_count] = value;
value_count += 1;
if (value == 1) {
while (true) {
const repeat_flag = try bit_reader.readBitsNoEof(u2, 2);
if (repeat_flag + value_count > 256) return error.MalformedFseTable;
for (0..repeat_flag) |_| {
values[value_count] = 1;
value_count += 1;
}
if (repeat_flag < 3) break;
}
}
if (value_count == 256) break;
}
bit_reader.alignToByte();
if (value_count < 2) return error.MalformedFseTable;
if (accumulated_probability != total_probability) return error.MalformedFseTable;
if (value_count > expected_symbol_count) return error.MalformedFseTable;
const table_size = total_probability;
try buildFseTable(values[0..value_count], entries[0..table_size]);
return table_size;
}
fn buildFseTable(values: []const u16, entries: []Table.Fse) !void {
const total_probability = @as(u16, @intCast(entries.len));
const accuracy_log = std.math.log2_int(u16, total_probability);
assert(total_probability <= 1 << 9);
var less_than_one_count: usize = 0;
for (values, 0..) |value, i| {
if (value == 0) {
entries[entries.len - 1 - less_than_one_count] = Table.Fse{
.symbol = @as(u8, @intCast(i)),
.baseline = 0,
.bits = accuracy_log,
};
less_than_one_count += 1;
}
}
var position: usize = 0;
var temp_states: [1 << 9]u16 = undefined;
for (values, 0..) |value, symbol| {
if (value == 0 or value == 1) continue;
const probability = value - 1;
const state_share_dividend = std.math.ceilPowerOfTwo(u16, probability) catch
return error.MalformedFseTable;
const share_size = @divExact(total_probability, state_share_dividend);
const double_state_count = state_share_dividend - probability;
const single_state_count = probability - double_state_count;
const share_size_log = std.math.log2_int(u16, share_size);
for (0..probability) |i| {
temp_states[i] = @as(u16, @intCast(position));
position += (entries.len >> 1) + (entries.len >> 3) + 3;
position &= entries.len - 1;
while (position >= entries.len - less_than_one_count) {
position += (entries.len >> 1) + (entries.len >> 3) + 3;
position &= entries.len - 1;
}
}
std.mem.sort(u16, temp_states[0..probability], {}, std.sort.asc(u16));
for (0..probability) |i| {
entries[temp_states[i]] = if (i < double_state_count) Table.Fse{
.symbol = @as(u8, @intCast(symbol)),
.bits = share_size_log + 1,
.baseline = single_state_count * share_size + @as(u16, @intCast(i)) * 2 * share_size,
} else Table.Fse{
.symbol = @as(u8, @intCast(symbol)),
.bits = share_size_log,
.baseline = (@as(u16, @intCast(i)) - double_state_count) * share_size,
};
}
}
}
test buildFseTable {
const literals_length_default_values = [36]u16{
5, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 2, 2, 2, 2, 2,
0, 0, 0, 0,
};
const match_lengths_default_values = [53]u16{
2, 5, 4, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0,
0, 0, 0, 0, 0,
};
const offset_codes_default_values = [29]u16{
2, 2, 2, 2, 2, 2, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0,
};
var entries: [64]Table.Fse = undefined;
try buildFseTable(&literals_length_default_values, &entries);
try std.testing.expectEqualSlices(Table.Fse, types.compressed_block.predefined_literal_fse_table.fse, &entries);
try buildFseTable(&match_lengths_default_values, &entries);
try std.testing.expectEqualSlices(Table.Fse, types.compressed_block.predefined_match_fse_table.fse, &entries);
try buildFseTable(&offset_codes_default_values, entries[0..32]);
try std.testing.expectEqualSlices(Table.Fse, types.compressed_block.predefined_offset_fse_table.fse, entries[0..32]);
}
@@ -1,234 +0,0 @@
const std = @import("std");
const types = @import("../types.zig");
const LiteralsSection = types.compressed_block.LiteralsSection;
const Table = types.compressed_block.Table;
const readers = @import("../readers.zig");
const decodeFseTable = @import("fse.zig").decodeFseTable;
pub const Error = error{
MalformedHuffmanTree,
MalformedFseTable,
MalformedAccuracyLog,
EndOfStream,
};
fn decodeFseHuffmanTree(
source: anytype,
compressed_size: usize,
buffer: []u8,
weights: *[256]u4,
) !usize {
var stream = std.io.limitedReader(source, compressed_size);
var bit_reader = readers.bitReader(stream.reader());
var entries: [1 << 6]Table.Fse = undefined;
const table_size = decodeFseTable(&bit_reader, 256, 6, &entries) catch |err| switch (err) {
error.MalformedAccuracyLog, error.MalformedFseTable => |e| return e,
error.EndOfStream => return error.MalformedFseTable,
else => |e| return e,
};
const accuracy_log = std.math.log2_int_ceil(usize, table_size);
const amount = try stream.reader().readAll(buffer);
var huff_bits: readers.ReverseBitReader = undefined;
huff_bits.init(buffer[0..amount]) catch return error.MalformedHuffmanTree;
return assignWeights(&huff_bits, accuracy_log, &entries, weights);
}
fn decodeFseHuffmanTreeSlice(src: []const u8, compressed_size: usize, weights: *[256]u4) !usize {
if (src.len < compressed_size) return error.MalformedHuffmanTree;
var stream = std.io.fixedBufferStream(src[0..compressed_size]);
var counting_reader = std.io.countingReader(stream.reader());
var bit_reader = readers.bitReader(counting_reader.reader());
var entries: [1 << 6]Table.Fse = undefined;
const table_size = decodeFseTable(&bit_reader, 256, 6, &entries) catch |err| switch (err) {
error.MalformedAccuracyLog, error.MalformedFseTable => |e| return e,
error.EndOfStream => return error.MalformedFseTable,
};
const accuracy_log = std.math.log2_int_ceil(usize, table_size);
const start_index = std.math.cast(usize, counting_reader.bytes_read) orelse
return error.MalformedHuffmanTree;
const huff_data = src[start_index..compressed_size];
var huff_bits: readers.ReverseBitReader = undefined;
huff_bits.init(huff_data) catch return error.MalformedHuffmanTree;
return assignWeights(&huff_bits, accuracy_log, &entries, weights);
}
fn assignWeights(
huff_bits: *readers.ReverseBitReader,
accuracy_log: u16,
entries: *[1 << 6]Table.Fse,
weights: *[256]u4,
) !usize {
var i: usize = 0;
var even_state: u32 = huff_bits.readBitsNoEof(u32, accuracy_log) catch return error.MalformedHuffmanTree;
var odd_state: u32 = huff_bits.readBitsNoEof(u32, accuracy_log) catch return error.MalformedHuffmanTree;
while (i < 254) {
const even_data = entries[even_state];
var read_bits: u16 = 0;
const even_bits = huff_bits.readBits(u32, even_data.bits, &read_bits) catch unreachable;
weights[i] = std.math.cast(u4, even_data.symbol) orelse return error.MalformedHuffmanTree;
i += 1;
if (read_bits < even_data.bits) {
weights[i] = std.math.cast(u4, entries[odd_state].symbol) orelse return error.MalformedHuffmanTree;
i += 1;
break;
}
even_state = even_data.baseline + even_bits;
read_bits = 0;
const odd_data = entries[odd_state];
const odd_bits = huff_bits.readBits(u32, odd_data.bits, &read_bits) catch unreachable;
weights[i] = std.math.cast(u4, odd_data.symbol) orelse return error.MalformedHuffmanTree;
i += 1;
if (read_bits < odd_data.bits) {
if (i == 255) return error.MalformedHuffmanTree;
weights[i] = std.math.cast(u4, entries[even_state].symbol) orelse return error.MalformedHuffmanTree;
i += 1;
break;
}
odd_state = odd_data.baseline + odd_bits;
} else return error.MalformedHuffmanTree;
if (!huff_bits.isEmpty()) {
return error.MalformedHuffmanTree;
}
return i + 1; // stream contains all but the last symbol
}
fn decodeDirectHuffmanTree(source: anytype, encoded_symbol_count: usize, weights: *[256]u4) !usize {
const weights_byte_count = (encoded_symbol_count + 1) / 2;
for (0..weights_byte_count) |i| {
const byte = try source.readByte();
weights[2 * i] = @as(u4, @intCast(byte >> 4));
weights[2 * i + 1] = @as(u4, @intCast(byte & 0xF));
}
return encoded_symbol_count + 1;
}
fn assignSymbols(weight_sorted_prefixed_symbols: []LiteralsSection.HuffmanTree.PrefixedSymbol, weights: [256]u4) usize {
for (0..weight_sorted_prefixed_symbols.len) |i| {
weight_sorted_prefixed_symbols[i] = .{
.symbol = @as(u8, @intCast(i)),
.weight = undefined,
.prefix = undefined,
};
}
std.mem.sort(
LiteralsSection.HuffmanTree.PrefixedSymbol,
weight_sorted_prefixed_symbols,
weights,
lessThanByWeight,
);
var prefix: u16 = 0;
var prefixed_symbol_count: usize = 0;
var sorted_index: usize = 0;
const symbol_count = weight_sorted_prefixed_symbols.len;
while (sorted_index < symbol_count) {
var symbol = weight_sorted_prefixed_symbols[sorted_index].symbol;
const weight = weights[symbol];
if (weight == 0) {
sorted_index += 1;
continue;
}
while (sorted_index < symbol_count) : ({
sorted_index += 1;
prefixed_symbol_count += 1;
prefix += 1;
}) {
symbol = weight_sorted_prefixed_symbols[sorted_index].symbol;
if (weights[symbol] != weight) {
prefix = ((prefix - 1) >> (weights[symbol] - weight)) + 1;
break;
}
weight_sorted_prefixed_symbols[prefixed_symbol_count].symbol = symbol;
weight_sorted_prefixed_symbols[prefixed_symbol_count].prefix = prefix;
weight_sorted_prefixed_symbols[prefixed_symbol_count].weight = weight;
}
}
return prefixed_symbol_count;
}
fn buildHuffmanTree(weights: *[256]u4, symbol_count: usize) error{MalformedHuffmanTree}!LiteralsSection.HuffmanTree {
var weight_power_sum_big: u32 = 0;
for (weights[0 .. symbol_count - 1]) |value| {
weight_power_sum_big += (@as(u16, 1) << value) >> 1;
}
if (weight_power_sum_big >= 1 << 11) return error.MalformedHuffmanTree;
const weight_power_sum = @as(u16, @intCast(weight_power_sum_big));
// advance to next power of two (even if weight_power_sum is a power of 2)
// TODO: is it valid to have weight_power_sum == 0?
const max_number_of_bits = if (weight_power_sum == 0) 1 else std.math.log2_int(u16, weight_power_sum) + 1;
const next_power_of_two = @as(u16, 1) << max_number_of_bits;
weights[symbol_count - 1] = std.math.log2_int(u16, next_power_of_two - weight_power_sum) + 1;
var weight_sorted_prefixed_symbols: [256]LiteralsSection.HuffmanTree.PrefixedSymbol = undefined;
const prefixed_symbol_count = assignSymbols(weight_sorted_prefixed_symbols[0..symbol_count], weights.*);
const tree = LiteralsSection.HuffmanTree{
.max_bit_count = max_number_of_bits,
.symbol_count_minus_one = @as(u8, @intCast(prefixed_symbol_count - 1)),
.nodes = weight_sorted_prefixed_symbols,
};
return tree;
}
pub fn decodeHuffmanTree(
source: anytype,
buffer: []u8,
) (@TypeOf(source).Error || Error)!LiteralsSection.HuffmanTree {
const header = try source.readByte();
var weights: [256]u4 = undefined;
const symbol_count = if (header < 128)
// FSE compressed weights
try decodeFseHuffmanTree(source, header, buffer, &weights)
else
try decodeDirectHuffmanTree(source, header - 127, &weights);
return buildHuffmanTree(&weights, symbol_count);
}
pub fn decodeHuffmanTreeSlice(
src: []const u8,
consumed_count: *usize,
) Error!LiteralsSection.HuffmanTree {
if (src.len == 0) return error.MalformedHuffmanTree;
const header = src[0];
var bytes_read: usize = 1;
var weights: [256]u4 = undefined;
const symbol_count = if (header < 128) count: {
// FSE compressed weights
bytes_read += header;
break :count try decodeFseHuffmanTreeSlice(src[1..], header, &weights);
} else count: {
var fbs = std.io.fixedBufferStream(src[1..]);
defer bytes_read += fbs.pos;
break :count try decodeDirectHuffmanTree(fbs.reader(), header - 127, &weights);
};
consumed_count.* += bytes_read;
return buildHuffmanTree(&weights, symbol_count);
}
fn lessThanByWeight(
weights: [256]u4,
lhs: LiteralsSection.HuffmanTree.PrefixedSymbol,
rhs: LiteralsSection.HuffmanTree.PrefixedSymbol,
) bool {
// NOTE: this function relies on the use of a stable sorting algorithm,
// otherwise a special case of if (weights[lhs] == weights[rhs]) return lhs < rhs;
// should be added
return weights[lhs.symbol] < weights[rhs.symbol];
}
-633
View File
@@ -1,633 +0,0 @@
const std = @import("std");
const assert = std.debug.assert;
const Allocator = std.mem.Allocator;
const RingBuffer = std.RingBuffer;
const types = @import("types.zig");
const frame = types.frame;
const LiteralsSection = types.compressed_block.LiteralsSection;
const SequencesSection = types.compressed_block.SequencesSection;
const SkippableHeader = types.frame.Skippable.Header;
const ZstandardHeader = types.frame.Zstandard.Header;
const Table = types.compressed_block.Table;
pub const block = @import("decode/block.zig");
const readers = @import("readers.zig");
/// Returns `true` is `magic` is a valid magic number for a skippable frame
pub fn isSkippableMagic(magic: u32) bool {
return frame.Skippable.magic_number_min <= magic and magic <= frame.Skippable.magic_number_max;
}
/// Returns the kind of frame at the beginning of `source`.
///
/// Errors returned:
/// - `error.BadMagic` if `source` begins with bytes not equal to the
/// Zstandard frame magic number, or outside the range of magic numbers for
/// skippable frames.
/// - `error.EndOfStream` if `source` contains fewer than 4 bytes
pub fn decodeFrameType(source: anytype) error{ BadMagic, EndOfStream }!frame.Kind {
const magic = try source.readInt(u32, .little);
return frameType(magic);
}
/// Returns the kind of frame associated to `magic`.
///
/// Errors returned:
/// - `error.BadMagic` if `magic` is not a valid magic number.
pub fn frameType(magic: u32) error{BadMagic}!frame.Kind {
return if (magic == frame.Zstandard.magic_number)
.zstandard
else if (isSkippableMagic(magic))
.skippable
else
error.BadMagic;
}
pub const FrameHeader = union(enum) {
zstandard: ZstandardHeader,
skippable: SkippableHeader,
};
pub const HeaderError = error{ BadMagic, EndOfStream, ReservedBitSet };
/// Returns the header of the frame at the beginning of `source`.
///
/// Errors returned:
/// - `error.BadMagic` if `source` begins with bytes not equal to the
/// Zstandard frame magic number, or outside the range of magic numbers for
/// skippable frames.
/// - `error.EndOfStream` if `source` contains fewer than 4 bytes
/// - `error.ReservedBitSet` if the frame is a Zstandard frame and any of the
/// reserved bits are set
pub fn decodeFrameHeader(source: anytype) (@TypeOf(source).Error || HeaderError)!FrameHeader {
const magic = try source.readInt(u32, .little);
const frame_type = try frameType(magic);
switch (frame_type) {
.zstandard => return FrameHeader{ .zstandard = try decodeZstandardHeader(source) },
.skippable => return FrameHeader{
.skippable = .{
.magic_number = magic,
.frame_size = try source.readInt(u32, .little),
},
},
}
}
pub const ReadWriteCount = struct {
read_count: usize,
write_count: usize,
};
/// Decodes frames from `src` into `dest`; returns the length of the result.
/// The stream should not have extra trailing bytes - either all bytes in `src`
/// will be decoded, or an error will be returned. An error will be returned if
/// a Zstandard frame in `src` does not declare its content size.
///
/// Errors returned:
/// - `error.DictionaryIdFlagUnsupported` if a `src` contains a frame that
/// uses a dictionary
/// - `error.MalformedFrame` if a frame in `src` is invalid
/// - `error.UnknownContentSizeUnsupported` if a frame in `src` does not
/// declare its content size
pub fn decode(dest: []u8, src: []const u8, verify_checksum: bool) error{
MalformedFrame,
UnknownContentSizeUnsupported,
DictionaryIdFlagUnsupported,
}!usize {
var write_count: usize = 0;
var read_count: usize = 0;
while (read_count < src.len) {
const counts = decodeFrame(dest, src[read_count..], verify_checksum) catch |err| {
switch (err) {
error.UnknownContentSizeUnsupported => return error.UnknownContentSizeUnsupported,
error.DictionaryIdFlagUnsupported => return error.DictionaryIdFlagUnsupported,
else => return error.MalformedFrame,
}
};
read_count += counts.read_count;
write_count += counts.write_count;
}
return write_count;
}
/// Decodes a stream of frames from `src`; returns the decoded bytes. The stream
/// should not have extra trailing bytes - either all bytes in `src` will be
/// decoded, or an error will be returned.
///
/// Errors returned:
/// - `error.DictionaryIdFlagUnsupported` if a `src` contains a frame that
/// uses a dictionary
/// - `error.MalformedFrame` if a frame in `src` is invalid
/// - `error.OutOfMemory` if `allocator` cannot allocate enough memory
pub fn decodeAlloc(
allocator: Allocator,
src: []const u8,
verify_checksum: bool,
window_size_max: usize,
) error{ DictionaryIdFlagUnsupported, MalformedFrame, OutOfMemory }![]u8 {
var result = std.ArrayList(u8).init(allocator);
errdefer result.deinit();
var read_count: usize = 0;
while (read_count < src.len) {
read_count += decodeFrameArrayList(
allocator,
&result,
src[read_count..],
verify_checksum,
window_size_max,
) catch |err| switch (err) {
error.OutOfMemory => return error.OutOfMemory,
error.DictionaryIdFlagUnsupported => return error.DictionaryIdFlagUnsupported,
else => return error.MalformedFrame,
};
}
return result.toOwnedSlice();
}
/// Decodes the frame at the start of `src` into `dest`. Returns the number of
/// bytes read from `src` and written to `dest`. This function can only decode
/// frames that declare the decompressed content size.
///
/// Errors returned:
/// - `error.BadMagic` if the first 4 bytes of `src` is not a valid magic
/// number for a Zstandard or skippable frame
/// - `error.UnknownContentSizeUnsupported` if the frame does not declare the
/// uncompressed content size
/// - `error.WindowSizeUnknown` if the frame does not have a valid window size
/// - `error.ContentTooLarge` if `dest` is smaller than the uncompressed data
/// size declared by the frame header
/// - `error.ContentSizeTooLarge` if the frame header indicates a content size
/// that is larger than `std.math.maxInt(usize)`
/// - `error.DictionaryIdFlagUnsupported` if the frame uses a dictionary
/// - `error.ChecksumFailure` if `verify_checksum` is true and the frame
/// contains a checksum that does not match the checksum of the decompressed
/// data
/// - `error.ReservedBitSet` if any of the reserved bits of the frame header
/// are set
/// - `error.EndOfStream` if `src` does not contain a complete frame
/// - `error.BadContentSize` if the content size declared by the frame does
/// not equal the actual size of decompressed data
/// - an error in `block.Error` if there are errors decoding a block
/// - `error.SkippableSizeTooLarge` if the frame is skippable and reports a
/// size greater than `src.len`
pub fn decodeFrame(
dest: []u8,
src: []const u8,
verify_checksum: bool,
) (error{
BadMagic,
UnknownContentSizeUnsupported,
ContentTooLarge,
ContentSizeTooLarge,
WindowSizeUnknown,
DictionaryIdFlagUnsupported,
SkippableSizeTooLarge,
} || FrameError)!ReadWriteCount {
var fbs = std.io.fixedBufferStream(src);
switch (try decodeFrameType(fbs.reader())) {
.zstandard => return decodeZstandardFrame(dest, src, verify_checksum),
.skippable => {
const content_size = try fbs.reader().readInt(u32, .little);
if (content_size > std.math.maxInt(usize) - 8) return error.SkippableSizeTooLarge;
const read_count = @as(usize, content_size) + 8;
if (read_count > src.len) return error.SkippableSizeTooLarge;
return ReadWriteCount{
.read_count = read_count,
.write_count = 0,
};
},
}
}
/// Decodes the frame at the start of `src` into `dest`. Returns the number of
/// bytes read from `src`.
///
/// Errors returned:
/// - `error.BadMagic` if the first 4 bytes of `src` is not a valid magic
/// number for a Zstandard or skippable frame
/// - `error.WindowSizeUnknown` if the frame does not have a valid window size
/// - `error.WindowTooLarge` if the window size is larger than
/// `window_size_max`
/// - `error.ContentSizeTooLarge` if the frame header indicates a content size
/// that is larger than `std.math.maxInt(usize)`
/// - `error.DictionaryIdFlagUnsupported` if the frame uses a dictionary
/// - `error.ChecksumFailure` if `verify_checksum` is true and the frame
/// contains a checksum that does not match the checksum of the decompressed
/// data
/// - `error.ReservedBitSet` if any of the reserved bits of the frame header
/// are set
/// - `error.EndOfStream` if `src` does not contain a complete frame
/// - `error.BadContentSize` if the content size declared by the frame does
/// not equal the actual size of decompressed data
/// - `error.OutOfMemory` if `allocator` cannot allocate enough memory
/// - an error in `block.Error` if there are errors decoding a block
/// - `error.SkippableSizeTooLarge` if the frame is skippable and reports a
/// size greater than `src.len`
pub fn decodeFrameArrayList(
allocator: Allocator,
dest: *std.ArrayList(u8),
src: []const u8,
verify_checksum: bool,
window_size_max: usize,
) (error{ BadMagic, OutOfMemory, SkippableSizeTooLarge } || FrameContext.Error || FrameError)!usize {
var fbs = std.io.fixedBufferStream(src);
const reader = fbs.reader();
const magic = try reader.readInt(u32, .little);
switch (try frameType(magic)) {
.zstandard => return decodeZstandardFrameArrayList(
allocator,
dest,
src,
verify_checksum,
window_size_max,
),
.skippable => {
const content_size = try fbs.reader().readInt(u32, .little);
if (content_size > std.math.maxInt(usize) - 8) return error.SkippableSizeTooLarge;
const read_count = @as(usize, content_size) + 8;
if (read_count > src.len) return error.SkippableSizeTooLarge;
return read_count;
},
}
}
/// Returns the frame checksum corresponding to the data fed into `hasher`
pub fn computeChecksum(hasher: *std.hash.XxHash64) u32 {
const hash = hasher.final();
return @as(u32, @intCast(hash & 0xFFFFFFFF));
}
const FrameError = error{
ChecksumFailure,
BadContentSize,
EndOfStream,
ReservedBitSet,
} || block.Error;
/// Decode a Zstandard frame from `src` into `dest`, returning the number of
/// bytes read from `src` and written to `dest`. The first four bytes of `src`
/// must be the magic number for a Zstandard frame.
///
/// Error returned:
/// - `error.UnknownContentSizeUnsupported` if the frame does not declare the
/// uncompressed content size
/// - `error.ContentTooLarge` if `dest` is smaller than the uncompressed data
/// size declared by the frame header
/// - `error.WindowSizeUnknown` if the frame does not have a valid window size
/// - `error.DictionaryIdFlagUnsupported` if the frame uses a dictionary
/// - `error.ContentSizeTooLarge` if the frame header indicates a content size
/// that is larger than `std.math.maxInt(usize)`
/// - `error.ChecksumFailure` if `verify_checksum` is true and the frame
/// contains a checksum that does not match the checksum of the decompressed
/// data
/// - `error.ReservedBitSet` if the reserved bit of the frame header is set
/// - `error.EndOfStream` if `src` does not contain a complete frame
/// - an error in `block.Error` if there are errors decoding a block
/// - `error.BadContentSize` if the content size declared by the frame does
/// not equal the actual size of decompressed data
pub fn decodeZstandardFrame(
dest: []u8,
src: []const u8,
verify_checksum: bool,
) (error{
UnknownContentSizeUnsupported,
ContentTooLarge,
ContentSizeTooLarge,
WindowSizeUnknown,
DictionaryIdFlagUnsupported,
} || FrameError)!ReadWriteCount {
assert(std.mem.readInt(u32, src[0..4], .little) == frame.Zstandard.magic_number);
var consumed_count: usize = 4;
var frame_context = context: {
var fbs = std.io.fixedBufferStream(src[consumed_count..]);
const source = fbs.reader();
const frame_header = try decodeZstandardHeader(source);
consumed_count += fbs.pos;
break :context FrameContext.init(
frame_header,
std.math.maxInt(usize),
verify_checksum,
) catch |err| switch (err) {
error.WindowTooLarge => unreachable,
inline else => |e| return e,
};
};
const counts = try decodeZStandardFrameBlocks(
dest,
src[consumed_count..],
&frame_context,
);
return ReadWriteCount{
.read_count = counts.read_count + consumed_count,
.write_count = counts.write_count,
};
}
pub fn decodeZStandardFrameBlocks(
dest: []u8,
src: []const u8,
frame_context: *FrameContext,
) (error{ ContentTooLarge, UnknownContentSizeUnsupported } || FrameError)!ReadWriteCount {
const content_size = frame_context.content_size orelse
return error.UnknownContentSizeUnsupported;
if (dest.len < content_size) return error.ContentTooLarge;
var consumed_count: usize = 0;
const written_count = decodeFrameBlocksInner(
dest[0..content_size],
src[consumed_count..],
&consumed_count,
if (frame_context.hasher_opt) |*hasher| hasher else null,
frame_context.block_size_max,
) catch |err| switch (err) {
error.DestTooSmall => return error.BadContentSize,
inline else => |e| return e,
};
if (written_count != content_size) return error.BadContentSize;
if (frame_context.has_checksum) {
if (src.len < consumed_count + 4) return error.EndOfStream;
const checksum = std.mem.readInt(u32, src[consumed_count..][0..4], .little);
consumed_count += 4;
if (frame_context.hasher_opt) |*hasher| {
if (checksum != computeChecksum(hasher)) return error.ChecksumFailure;
}
}
return ReadWriteCount{ .read_count = consumed_count, .write_count = written_count };
}
pub const FrameContext = struct {
hasher_opt: ?std.hash.XxHash64,
window_size: usize,
has_checksum: bool,
block_size_max: usize,
content_size: ?usize,
const Error = error{
DictionaryIdFlagUnsupported,
WindowSizeUnknown,
WindowTooLarge,
ContentSizeTooLarge,
};
/// Validates `frame_header` and returns the associated `FrameContext`.
///
/// Errors returned:
/// - `error.DictionaryIdFlagUnsupported` if the frame uses a dictionary
/// - `error.WindowSizeUnknown` if the frame does not have a valid window
/// size
/// - `error.WindowTooLarge` if the window size is larger than
/// `window_size_max` or `std.math.intMax(usize)`
/// - `error.ContentSizeTooLarge` if the frame header indicates a content
/// size larger than `std.math.maxInt(usize)`
pub fn init(
frame_header: ZstandardHeader,
window_size_max: usize,
verify_checksum: bool,
) Error!FrameContext {
if (frame_header.descriptor.dictionary_id_flag != 0)
return error.DictionaryIdFlagUnsupported;
const window_size_raw = frameWindowSize(frame_header) orelse return error.WindowSizeUnknown;
const window_size = if (window_size_raw > window_size_max)
return error.WindowTooLarge
else
std.math.cast(usize, window_size_raw) orelse return error.WindowTooLarge;
const should_compute_checksum =
frame_header.descriptor.content_checksum_flag and verify_checksum;
const content_size = if (frame_header.content_size) |size|
std.math.cast(usize, size) orelse return error.ContentSizeTooLarge
else
null;
return .{
.hasher_opt = if (should_compute_checksum) std.hash.XxHash64.init(0) else null,
.window_size = window_size,
.has_checksum = frame_header.descriptor.content_checksum_flag,
.block_size_max = @min(types.block_size_max, window_size),
.content_size = content_size,
};
}
};
/// Decode a Zstandard from from `src` and return number of bytes read; see
/// `decodeZstandardFrame()`. The first four bytes of `src` must be the magic
/// number for a Zstandard frame.
///
/// Errors returned:
/// - `error.WindowSizeUnknown` if the frame does not have a valid window size
/// - `error.WindowTooLarge` if the window size is larger than
/// `window_size_max`
/// - `error.DictionaryIdFlagUnsupported` if the frame uses a dictionary
/// - `error.ContentSizeTooLarge` if the frame header indicates a content size
/// that is larger than `std.math.maxInt(usize)`
/// - `error.ChecksumFailure` if `verify_checksum` is true and the frame
/// contains a checksum that does not match the checksum of the decompressed
/// data
/// - `error.ReservedBitSet` if the reserved bit of the frame header is set
/// - `error.EndOfStream` if `src` does not contain a complete frame
/// - `error.OutOfMemory` if `allocator` cannot allocate enough memory
/// - an error in `block.Error` if there are errors decoding a block
/// - `error.BadContentSize` if the content size declared by the frame does
/// not equal the size of decompressed data
pub fn decodeZstandardFrameArrayList(
allocator: Allocator,
dest: *std.ArrayList(u8),
src: []const u8,
verify_checksum: bool,
window_size_max: usize,
) (error{OutOfMemory} || FrameContext.Error || FrameError)!usize {
assert(std.mem.readInt(u32, src[0..4], .little) == frame.Zstandard.magic_number);
var consumed_count: usize = 4;
var frame_context = context: {
var fbs = std.io.fixedBufferStream(src[consumed_count..]);
const source = fbs.reader();
const frame_header = try decodeZstandardHeader(source);
consumed_count += fbs.pos;
break :context try FrameContext.init(frame_header, window_size_max, verify_checksum);
};
consumed_count += try decodeZstandardFrameBlocksArrayList(
allocator,
dest,
src[consumed_count..],
&frame_context,
);
return consumed_count;
}
pub fn decodeZstandardFrameBlocksArrayList(
allocator: Allocator,
dest: *std.ArrayList(u8),
src: []const u8,
frame_context: *FrameContext,
) (error{OutOfMemory} || FrameError)!usize {
const initial_len = dest.items.len;
var ring_buffer = try RingBuffer.init(allocator, frame_context.window_size);
defer ring_buffer.deinit(allocator);
// These tables take 7680 bytes
var literal_fse_data: [types.compressed_block.table_size_max.literal]Table.Fse = undefined;
var match_fse_data: [types.compressed_block.table_size_max.match]Table.Fse = undefined;
var offset_fse_data: [types.compressed_block.table_size_max.offset]Table.Fse = undefined;
var block_header = try block.decodeBlockHeaderSlice(src);
var consumed_count: usize = 3;
var decode_state = block.DecodeState.init(&literal_fse_data, &match_fse_data, &offset_fse_data);
while (true) : ({
block_header = try block.decodeBlockHeaderSlice(src[consumed_count..]);
consumed_count += 3;
}) {
const written_size = try block.decodeBlockRingBuffer(
&ring_buffer,
src[consumed_count..],
block_header,
&decode_state,
&consumed_count,
frame_context.block_size_max,
);
if (frame_context.content_size) |size| {
if (dest.items.len - initial_len > size) {
return error.BadContentSize;
}
}
if (written_size > 0) {
const written_slice = ring_buffer.sliceLast(written_size);
try dest.appendSlice(written_slice.first);
try dest.appendSlice(written_slice.second);
if (frame_context.hasher_opt) |*hasher| {
hasher.update(written_slice.first);
hasher.update(written_slice.second);
}
}
if (block_header.last_block) break;
}
if (frame_context.content_size) |size| {
if (dest.items.len - initial_len != size) {
return error.BadContentSize;
}
}
if (frame_context.has_checksum) {
if (src.len < consumed_count + 4) return error.EndOfStream;
const checksum = std.mem.readInt(u32, src[consumed_count..][0..4], .little);
consumed_count += 4;
if (frame_context.hasher_opt) |*hasher| {
if (checksum != computeChecksum(hasher)) return error.ChecksumFailure;
}
}
return consumed_count;
}
fn decodeFrameBlocksInner(
dest: []u8,
src: []const u8,
consumed_count: *usize,
hash: ?*std.hash.XxHash64,
block_size_max: usize,
) (error{ EndOfStream, DestTooSmall } || block.Error)!usize {
// These tables take 7680 bytes
var literal_fse_data: [types.compressed_block.table_size_max.literal]Table.Fse = undefined;
var match_fse_data: [types.compressed_block.table_size_max.match]Table.Fse = undefined;
var offset_fse_data: [types.compressed_block.table_size_max.offset]Table.Fse = undefined;
var block_header = try block.decodeBlockHeaderSlice(src);
var bytes_read: usize = 3;
defer consumed_count.* += bytes_read;
var decode_state = block.DecodeState.init(&literal_fse_data, &match_fse_data, &offset_fse_data);
var count: usize = 0;
while (true) : ({
block_header = try block.decodeBlockHeaderSlice(src[bytes_read..]);
bytes_read += 3;
}) {
const written_size = try block.decodeBlock(
dest,
src[bytes_read..],
block_header,
&decode_state,
&bytes_read,
block_size_max,
count,
);
if (hash) |hash_state| hash_state.update(dest[count .. count + written_size]);
count += written_size;
if (block_header.last_block) break;
}
return count;
}
/// Decode the header of a skippable frame. The first four bytes of `src` must
/// be a valid magic number for a skippable frame.
pub fn decodeSkippableHeader(src: *const [8]u8) SkippableHeader {
const magic = std.mem.readInt(u32, src[0..4], .little);
assert(isSkippableMagic(magic));
const frame_size = std.mem.readInt(u32, src[4..8], .little);
return .{
.magic_number = magic,
.frame_size = frame_size,
};
}
/// Returns the window size required to decompress a frame, or `null` if it
/// cannot be determined (which indicates a malformed frame header).
pub fn frameWindowSize(header: ZstandardHeader) ?u64 {
if (header.window_descriptor) |descriptor| {
const exponent = (descriptor & 0b11111000) >> 3;
const mantissa = descriptor & 0b00000111;
const window_log = 10 + exponent;
const window_base = @as(u64, 1) << @as(u6, @intCast(window_log));
const window_add = (window_base / 8) * mantissa;
return window_base + window_add;
} else return header.content_size;
}
/// Decode the header of a Zstandard frame.
///
/// Errors returned:
/// - `error.ReservedBitSet` if any of the reserved bits of the header are set
/// - `error.EndOfStream` if `source` does not contain a complete header
pub fn decodeZstandardHeader(
source: anytype,
) (@TypeOf(source).Error || error{ EndOfStream, ReservedBitSet })!ZstandardHeader {
const descriptor = @as(ZstandardHeader.Descriptor, @bitCast(try source.readByte()));
if (descriptor.reserved) return error.ReservedBitSet;
var window_descriptor: ?u8 = null;
if (!descriptor.single_segment_flag) {
window_descriptor = try source.readByte();
}
var dictionary_id: ?u32 = null;
if (descriptor.dictionary_id_flag > 0) {
// if flag is 3 then field_size = 4, else field_size = flag
const field_size = (@as(u4, 1) << descriptor.dictionary_id_flag) >> 1;
dictionary_id = try source.readVarInt(u32, .little, field_size);
}
var content_size: ?u64 = null;
if (descriptor.single_segment_flag or descriptor.content_size_flag > 0) {
const field_size = @as(u4, 1) << descriptor.content_size_flag;
content_size = try source.readVarInt(u64, .little, field_size);
if (field_size == 2) content_size.? += 256;
}
const header = ZstandardHeader{
.descriptor = descriptor,
.window_descriptor = window_descriptor,
.dictionary_id = dictionary_id,
.content_size = content_size,
};
return header;
}
test {
std.testing.refAllDecls(@This());
}
-82
View File
@@ -1,82 +0,0 @@
const std = @import("std");
pub const ReversedByteReader = struct {
remaining_bytes: usize,
bytes: []const u8,
const Reader = std.io.GenericReader(*ReversedByteReader, error{}, readFn);
pub fn init(bytes: []const u8) ReversedByteReader {
return .{
.bytes = bytes,
.remaining_bytes = bytes.len,
};
}
pub fn reader(self: *ReversedByteReader) Reader {
return .{ .context = self };
}
fn readFn(ctx: *ReversedByteReader, buffer: []u8) !usize {
if (ctx.remaining_bytes == 0) return 0;
const byte_index = ctx.remaining_bytes - 1;
buffer[0] = ctx.bytes[byte_index];
// buffer[0] = @bitReverse(ctx.bytes[byte_index]);
ctx.remaining_bytes = byte_index;
return 1;
}
};
/// A bit reader for reading the reversed bit streams used to encode
/// FSE compressed data.
pub const ReverseBitReader = struct {
byte_reader: ReversedByteReader,
bit_reader: std.io.BitReader(.big, ReversedByteReader.Reader),
pub fn init(self: *ReverseBitReader, bytes: []const u8) error{BitStreamHasNoStartBit}!void {
self.byte_reader = ReversedByteReader.init(bytes);
self.bit_reader = std.io.bitReader(.big, self.byte_reader.reader());
if (bytes.len == 0) return;
var i: usize = 0;
while (i < 8 and 0 == self.readBitsNoEof(u1, 1) catch unreachable) : (i += 1) {}
if (i == 8) return error.BitStreamHasNoStartBit;
}
pub fn readBitsNoEof(self: *@This(), comptime U: type, num_bits: u16) error{EndOfStream}!U {
return self.bit_reader.readBitsNoEof(U, num_bits);
}
pub fn readBits(self: *@This(), comptime U: type, num_bits: u16, out_bits: *u16) error{}!U {
return try self.bit_reader.readBits(U, num_bits, out_bits);
}
pub fn alignToByte(self: *@This()) void {
self.bit_reader.alignToByte();
}
pub fn isEmpty(self: ReverseBitReader) bool {
return self.byte_reader.remaining_bytes == 0 and self.bit_reader.count == 0;
}
};
pub fn BitReader(comptime Reader: type) type {
return struct {
underlying: std.io.BitReader(.little, Reader),
pub fn readBitsNoEof(self: *@This(), comptime U: type, num_bits: u16) !U {
return self.underlying.readBitsNoEof(U, num_bits);
}
pub fn readBits(self: *@This(), comptime U: type, num_bits: u16, out_bits: *u16) !U {
return self.underlying.readBits(U, num_bits, out_bits);
}
pub fn alignToByte(self: *@This()) void {
self.underlying.alignToByte();
}
};
}
pub fn bitReader(reader: anytype) BitReader(@TypeOf(reader)) {
return .{ .underlying = std.io.bitReader(.little, reader) };
}
-403
View File
@@ -1,403 +0,0 @@
pub const block_size_max = 1 << 17;
pub const frame = struct {
pub const Kind = enum { zstandard, skippable };
pub const Zstandard = struct {
pub const magic_number = 0xFD2FB528;
header: Header,
data_blocks: []Block,
checksum: ?u32,
pub const Header = struct {
descriptor: Descriptor,
window_descriptor: ?u8,
dictionary_id: ?u32,
content_size: ?u64,
pub const Descriptor = packed struct {
dictionary_id_flag: u2,
content_checksum_flag: bool,
reserved: bool,
unused: bool,
single_segment_flag: bool,
content_size_flag: u2,
};
};
pub const Block = struct {
pub const Header = struct {
last_block: bool,
block_type: Block.Type,
block_size: u21,
};
pub const Type = enum(u2) {
raw,
rle,
compressed,
reserved,
};
};
};
pub const Skippable = struct {
pub const magic_number_min = 0x184D2A50;
pub const magic_number_max = 0x184D2A5F;
pub const Header = struct {
magic_number: u32,
frame_size: u32,
};
};
};
pub const compressed_block = struct {
pub const LiteralsSection = struct {
header: Header,
huffman_tree: ?HuffmanTree,
streams: Streams,
pub const Streams = union(enum) {
one: []const u8,
four: [4][]const u8,
};
pub const Header = struct {
block_type: BlockType,
size_format: u2,
regenerated_size: u20,
compressed_size: ?u18,
};
pub const BlockType = enum(u2) {
raw,
rle,
compressed,
treeless,
};
pub const HuffmanTree = struct {
max_bit_count: u4,
symbol_count_minus_one: u8,
nodes: [256]PrefixedSymbol,
pub const PrefixedSymbol = struct {
symbol: u8,
prefix: u16,
weight: u4,
};
pub const Result = union(enum) {
symbol: u8,
index: usize,
};
pub fn query(self: HuffmanTree, index: usize, prefix: u16) error{NotFound}!Result {
var node = self.nodes[index];
const weight = node.weight;
var i: usize = index;
while (node.weight == weight) {
if (node.prefix == prefix) return Result{ .symbol = node.symbol };
if (i == 0) return error.NotFound;
i -= 1;
node = self.nodes[i];
}
return Result{ .index = i };
}
pub fn weightToBitCount(weight: u4, max_bit_count: u4) u4 {
return if (weight == 0) 0 else ((max_bit_count + 1) - weight);
}
};
pub const StreamCount = enum { one, four };
pub fn streamCount(size_format: u2, block_type: BlockType) StreamCount {
return switch (block_type) {
.raw, .rle => .one,
.compressed, .treeless => if (size_format == 0) .one else .four,
};
}
};
pub const SequencesSection = struct {
header: SequencesSection.Header,
literals_length_table: Table,
offset_table: Table,
match_length_table: Table,
pub const Header = struct {
sequence_count: u24,
match_lengths: Mode,
offsets: Mode,
literal_lengths: Mode,
pub const Mode = enum(u2) {
predefined,
rle,
fse,
repeat,
};
};
};
pub const Table = union(enum) {
fse: []const Fse,
rle: u8,
pub const Fse = struct {
symbol: u8,
baseline: u16,
bits: u8,
};
};
pub const literals_length_code_table = [36]struct { u32, u5 }{
.{ 0, 0 }, .{ 1, 0 }, .{ 2, 0 }, .{ 3, 0 },
.{ 4, 0 }, .{ 5, 0 }, .{ 6, 0 }, .{ 7, 0 },
.{ 8, 0 }, .{ 9, 0 }, .{ 10, 0 }, .{ 11, 0 },
.{ 12, 0 }, .{ 13, 0 }, .{ 14, 0 }, .{ 15, 0 },
.{ 16, 1 }, .{ 18, 1 }, .{ 20, 1 }, .{ 22, 1 },
.{ 24, 2 }, .{ 28, 2 }, .{ 32, 3 }, .{ 40, 3 },
.{ 48, 4 }, .{ 64, 6 }, .{ 128, 7 }, .{ 256, 8 },
.{ 512, 9 }, .{ 1024, 10 }, .{ 2048, 11 }, .{ 4096, 12 },
.{ 8192, 13 }, .{ 16384, 14 }, .{ 32768, 15 }, .{ 65536, 16 },
};
pub const match_length_code_table = [53]struct { u32, u5 }{
.{ 3, 0 }, .{ 4, 0 }, .{ 5, 0 }, .{ 6, 0 }, .{ 7, 0 }, .{ 8, 0 },
.{ 9, 0 }, .{ 10, 0 }, .{ 11, 0 }, .{ 12, 0 }, .{ 13, 0 }, .{ 14, 0 },
.{ 15, 0 }, .{ 16, 0 }, .{ 17, 0 }, .{ 18, 0 }, .{ 19, 0 }, .{ 20, 0 },
.{ 21, 0 }, .{ 22, 0 }, .{ 23, 0 }, .{ 24, 0 }, .{ 25, 0 }, .{ 26, 0 },
.{ 27, 0 }, .{ 28, 0 }, .{ 29, 0 }, .{ 30, 0 }, .{ 31, 0 }, .{ 32, 0 },
.{ 33, 0 }, .{ 34, 0 }, .{ 35, 1 }, .{ 37, 1 }, .{ 39, 1 }, .{ 41, 1 },
.{ 43, 2 }, .{ 47, 2 }, .{ 51, 3 }, .{ 59, 3 }, .{ 67, 4 }, .{ 83, 4 },
.{ 99, 5 }, .{ 131, 7 }, .{ 259, 8 }, .{ 515, 9 }, .{ 1027, 10 }, .{ 2051, 11 },
.{ 4099, 12 }, .{ 8195, 13 }, .{ 16387, 14 }, .{ 32771, 15 }, .{ 65539, 16 },
};
pub const literals_length_default_distribution = [36]i16{
4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1,
-1, -1, -1, -1,
};
pub const match_lengths_default_distribution = [53]i16{
1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1,
-1, -1, -1, -1, -1,
};
pub const offset_codes_default_distribution = [29]i16{
1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1,
};
pub const predefined_literal_fse_table = Table{
.fse = &[64]Table.Fse{
.{ .symbol = 0, .bits = 4, .baseline = 0 },
.{ .symbol = 0, .bits = 4, .baseline = 16 },
.{ .symbol = 1, .bits = 5, .baseline = 32 },
.{ .symbol = 3, .bits = 5, .baseline = 0 },
.{ .symbol = 4, .bits = 5, .baseline = 0 },
.{ .symbol = 6, .bits = 5, .baseline = 0 },
.{ .symbol = 7, .bits = 5, .baseline = 0 },
.{ .symbol = 9, .bits = 5, .baseline = 0 },
.{ .symbol = 10, .bits = 5, .baseline = 0 },
.{ .symbol = 12, .bits = 5, .baseline = 0 },
.{ .symbol = 14, .bits = 6, .baseline = 0 },
.{ .symbol = 16, .bits = 5, .baseline = 0 },
.{ .symbol = 18, .bits = 5, .baseline = 0 },
.{ .symbol = 19, .bits = 5, .baseline = 0 },
.{ .symbol = 21, .bits = 5, .baseline = 0 },
.{ .symbol = 22, .bits = 5, .baseline = 0 },
.{ .symbol = 24, .bits = 5, .baseline = 0 },
.{ .symbol = 25, .bits = 5, .baseline = 32 },
.{ .symbol = 26, .bits = 5, .baseline = 0 },
.{ .symbol = 27, .bits = 6, .baseline = 0 },
.{ .symbol = 29, .bits = 6, .baseline = 0 },
.{ .symbol = 31, .bits = 6, .baseline = 0 },
.{ .symbol = 0, .bits = 4, .baseline = 32 },
.{ .symbol = 1, .bits = 4, .baseline = 0 },
.{ .symbol = 2, .bits = 5, .baseline = 0 },
.{ .symbol = 4, .bits = 5, .baseline = 32 },
.{ .symbol = 5, .bits = 5, .baseline = 0 },
.{ .symbol = 7, .bits = 5, .baseline = 32 },
.{ .symbol = 8, .bits = 5, .baseline = 0 },
.{ .symbol = 10, .bits = 5, .baseline = 32 },
.{ .symbol = 11, .bits = 5, .baseline = 0 },
.{ .symbol = 13, .bits = 6, .baseline = 0 },
.{ .symbol = 16, .bits = 5, .baseline = 32 },
.{ .symbol = 17, .bits = 5, .baseline = 0 },
.{ .symbol = 19, .bits = 5, .baseline = 32 },
.{ .symbol = 20, .bits = 5, .baseline = 0 },
.{ .symbol = 22, .bits = 5, .baseline = 32 },
.{ .symbol = 23, .bits = 5, .baseline = 0 },
.{ .symbol = 25, .bits = 4, .baseline = 0 },
.{ .symbol = 25, .bits = 4, .baseline = 16 },
.{ .symbol = 26, .bits = 5, .baseline = 32 },
.{ .symbol = 28, .bits = 6, .baseline = 0 },
.{ .symbol = 30, .bits = 6, .baseline = 0 },
.{ .symbol = 0, .bits = 4, .baseline = 48 },
.{ .symbol = 1, .bits = 4, .baseline = 16 },
.{ .symbol = 2, .bits = 5, .baseline = 32 },
.{ .symbol = 3, .bits = 5, .baseline = 32 },
.{ .symbol = 5, .bits = 5, .baseline = 32 },
.{ .symbol = 6, .bits = 5, .baseline = 32 },
.{ .symbol = 8, .bits = 5, .baseline = 32 },
.{ .symbol = 9, .bits = 5, .baseline = 32 },
.{ .symbol = 11, .bits = 5, .baseline = 32 },
.{ .symbol = 12, .bits = 5, .baseline = 32 },
.{ .symbol = 15, .bits = 6, .baseline = 0 },
.{ .symbol = 17, .bits = 5, .baseline = 32 },
.{ .symbol = 18, .bits = 5, .baseline = 32 },
.{ .symbol = 20, .bits = 5, .baseline = 32 },
.{ .symbol = 21, .bits = 5, .baseline = 32 },
.{ .symbol = 23, .bits = 5, .baseline = 32 },
.{ .symbol = 24, .bits = 5, .baseline = 32 },
.{ .symbol = 35, .bits = 6, .baseline = 0 },
.{ .symbol = 34, .bits = 6, .baseline = 0 },
.{ .symbol = 33, .bits = 6, .baseline = 0 },
.{ .symbol = 32, .bits = 6, .baseline = 0 },
},
};
pub const predefined_match_fse_table = Table{
.fse = &[64]Table.Fse{
.{ .symbol = 0, .bits = 6, .baseline = 0 },
.{ .symbol = 1, .bits = 4, .baseline = 0 },
.{ .symbol = 2, .bits = 5, .baseline = 32 },
.{ .symbol = 3, .bits = 5, .baseline = 0 },
.{ .symbol = 5, .bits = 5, .baseline = 0 },
.{ .symbol = 6, .bits = 5, .baseline = 0 },
.{ .symbol = 8, .bits = 5, .baseline = 0 },
.{ .symbol = 10, .bits = 6, .baseline = 0 },
.{ .symbol = 13, .bits = 6, .baseline = 0 },
.{ .symbol = 16, .bits = 6, .baseline = 0 },
.{ .symbol = 19, .bits = 6, .baseline = 0 },
.{ .symbol = 22, .bits = 6, .baseline = 0 },
.{ .symbol = 25, .bits = 6, .baseline = 0 },
.{ .symbol = 28, .bits = 6, .baseline = 0 },
.{ .symbol = 31, .bits = 6, .baseline = 0 },
.{ .symbol = 33, .bits = 6, .baseline = 0 },
.{ .symbol = 35, .bits = 6, .baseline = 0 },
.{ .symbol = 37, .bits = 6, .baseline = 0 },
.{ .symbol = 39, .bits = 6, .baseline = 0 },
.{ .symbol = 41, .bits = 6, .baseline = 0 },
.{ .symbol = 43, .bits = 6, .baseline = 0 },
.{ .symbol = 45, .bits = 6, .baseline = 0 },
.{ .symbol = 1, .bits = 4, .baseline = 16 },
.{ .symbol = 2, .bits = 4, .baseline = 0 },
.{ .symbol = 3, .bits = 5, .baseline = 32 },
.{ .symbol = 4, .bits = 5, .baseline = 0 },
.{ .symbol = 6, .bits = 5, .baseline = 32 },
.{ .symbol = 7, .bits = 5, .baseline = 0 },
.{ .symbol = 9, .bits = 6, .baseline = 0 },
.{ .symbol = 12, .bits = 6, .baseline = 0 },
.{ .symbol = 15, .bits = 6, .baseline = 0 },
.{ .symbol = 18, .bits = 6, .baseline = 0 },
.{ .symbol = 21, .bits = 6, .baseline = 0 },
.{ .symbol = 24, .bits = 6, .baseline = 0 },
.{ .symbol = 27, .bits = 6, .baseline = 0 },
.{ .symbol = 30, .bits = 6, .baseline = 0 },
.{ .symbol = 32, .bits = 6, .baseline = 0 },
.{ .symbol = 34, .bits = 6, .baseline = 0 },
.{ .symbol = 36, .bits = 6, .baseline = 0 },
.{ .symbol = 38, .bits = 6, .baseline = 0 },
.{ .symbol = 40, .bits = 6, .baseline = 0 },
.{ .symbol = 42, .bits = 6, .baseline = 0 },
.{ .symbol = 44, .bits = 6, .baseline = 0 },
.{ .symbol = 1, .bits = 4, .baseline = 32 },
.{ .symbol = 1, .bits = 4, .baseline = 48 },
.{ .symbol = 2, .bits = 4, .baseline = 16 },
.{ .symbol = 4, .bits = 5, .baseline = 32 },
.{ .symbol = 5, .bits = 5, .baseline = 32 },
.{ .symbol = 7, .bits = 5, .baseline = 32 },
.{ .symbol = 8, .bits = 5, .baseline = 32 },
.{ .symbol = 11, .bits = 6, .baseline = 0 },
.{ .symbol = 14, .bits = 6, .baseline = 0 },
.{ .symbol = 17, .bits = 6, .baseline = 0 },
.{ .symbol = 20, .bits = 6, .baseline = 0 },
.{ .symbol = 23, .bits = 6, .baseline = 0 },
.{ .symbol = 26, .bits = 6, .baseline = 0 },
.{ .symbol = 29, .bits = 6, .baseline = 0 },
.{ .symbol = 52, .bits = 6, .baseline = 0 },
.{ .symbol = 51, .bits = 6, .baseline = 0 },
.{ .symbol = 50, .bits = 6, .baseline = 0 },
.{ .symbol = 49, .bits = 6, .baseline = 0 },
.{ .symbol = 48, .bits = 6, .baseline = 0 },
.{ .symbol = 47, .bits = 6, .baseline = 0 },
.{ .symbol = 46, .bits = 6, .baseline = 0 },
},
};
pub const predefined_offset_fse_table = Table{
.fse = &[32]Table.Fse{
.{ .symbol = 0, .bits = 5, .baseline = 0 },
.{ .symbol = 6, .bits = 4, .baseline = 0 },
.{ .symbol = 9, .bits = 5, .baseline = 0 },
.{ .symbol = 15, .bits = 5, .baseline = 0 },
.{ .symbol = 21, .bits = 5, .baseline = 0 },
.{ .symbol = 3, .bits = 5, .baseline = 0 },
.{ .symbol = 7, .bits = 4, .baseline = 0 },
.{ .symbol = 12, .bits = 5, .baseline = 0 },
.{ .symbol = 18, .bits = 5, .baseline = 0 },
.{ .symbol = 23, .bits = 5, .baseline = 0 },
.{ .symbol = 5, .bits = 5, .baseline = 0 },
.{ .symbol = 8, .bits = 4, .baseline = 0 },
.{ .symbol = 14, .bits = 5, .baseline = 0 },
.{ .symbol = 20, .bits = 5, .baseline = 0 },
.{ .symbol = 2, .bits = 5, .baseline = 0 },
.{ .symbol = 7, .bits = 4, .baseline = 16 },
.{ .symbol = 11, .bits = 5, .baseline = 0 },
.{ .symbol = 17, .bits = 5, .baseline = 0 },
.{ .symbol = 22, .bits = 5, .baseline = 0 },
.{ .symbol = 4, .bits = 5, .baseline = 0 },
.{ .symbol = 8, .bits = 4, .baseline = 16 },
.{ .symbol = 13, .bits = 5, .baseline = 0 },
.{ .symbol = 19, .bits = 5, .baseline = 0 },
.{ .symbol = 1, .bits = 5, .baseline = 0 },
.{ .symbol = 6, .bits = 4, .baseline = 16 },
.{ .symbol = 10, .bits = 5, .baseline = 0 },
.{ .symbol = 16, .bits = 5, .baseline = 0 },
.{ .symbol = 28, .bits = 5, .baseline = 0 },
.{ .symbol = 27, .bits = 5, .baseline = 0 },
.{ .symbol = 26, .bits = 5, .baseline = 0 },
.{ .symbol = 25, .bits = 5, .baseline = 0 },
.{ .symbol = 24, .bits = 5, .baseline = 0 },
},
};
pub const start_repeated_offset_1 = 1;
pub const start_repeated_offset_2 = 4;
pub const start_repeated_offset_3 = 8;
pub const table_accuracy_log_max = struct {
pub const literal = 9;
pub const match = 9;
pub const offset = 8;
};
pub const table_symbol_count_max = struct {
pub const literal = 36;
pub const match = 53;
pub const offset = 32;
};
pub const default_accuracy_log = struct {
pub const literal = 6;
pub const match = 6;
pub const offset = 5;
};
pub const table_size_max = struct {
pub const literal = 1 << table_accuracy_log_max.literal;
pub const match = 1 << table_accuracy_log_max.match;
pub const offset = 1 << table_accuracy_log_max.offset;
};
};
test {
const testing = @import("std").testing;
testing.refAllDeclsRecursive(@This());
}
+152
View File
@@ -0,0 +1,152 @@
const std = @import("../std.zig");
const assert = std.debug.assert;
pub const Decompress = @import("zstd/Decompress.zig");
/// Recommended amount by the standard. Lower than this may result in inability
/// to decompress common streams.
pub const default_window_len = 8 * 1024 * 1024;
pub const block_size_max = 1 << 17;
pub const literals_length_default_distribution = [36]i16{
4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1,
-1, -1, -1, -1,
};
pub const match_lengths_default_distribution = [53]i16{
1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1,
-1, -1, -1, -1, -1,
};
pub const offset_codes_default_distribution = [29]i16{
1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1,
};
pub const start_repeated_offset_1 = 1;
pub const start_repeated_offset_2 = 4;
pub const start_repeated_offset_3 = 8;
pub const literals_length_code_table = [36]struct { u32, u5 }{
.{ 0, 0 }, .{ 1, 0 }, .{ 2, 0 }, .{ 3, 0 },
.{ 4, 0 }, .{ 5, 0 }, .{ 6, 0 }, .{ 7, 0 },
.{ 8, 0 }, .{ 9, 0 }, .{ 10, 0 }, .{ 11, 0 },
.{ 12, 0 }, .{ 13, 0 }, .{ 14, 0 }, .{ 15, 0 },
.{ 16, 1 }, .{ 18, 1 }, .{ 20, 1 }, .{ 22, 1 },
.{ 24, 2 }, .{ 28, 2 }, .{ 32, 3 }, .{ 40, 3 },
.{ 48, 4 }, .{ 64, 6 }, .{ 128, 7 }, .{ 256, 8 },
.{ 512, 9 }, .{ 1024, 10 }, .{ 2048, 11 }, .{ 4096, 12 },
.{ 8192, 13 }, .{ 16384, 14 }, .{ 32768, 15 }, .{ 65536, 16 },
};
pub const match_length_code_table = [53]struct { u32, u5 }{
.{ 3, 0 }, .{ 4, 0 }, .{ 5, 0 }, .{ 6, 0 }, .{ 7, 0 }, .{ 8, 0 },
.{ 9, 0 }, .{ 10, 0 }, .{ 11, 0 }, .{ 12, 0 }, .{ 13, 0 }, .{ 14, 0 },
.{ 15, 0 }, .{ 16, 0 }, .{ 17, 0 }, .{ 18, 0 }, .{ 19, 0 }, .{ 20, 0 },
.{ 21, 0 }, .{ 22, 0 }, .{ 23, 0 }, .{ 24, 0 }, .{ 25, 0 }, .{ 26, 0 },
.{ 27, 0 }, .{ 28, 0 }, .{ 29, 0 }, .{ 30, 0 }, .{ 31, 0 }, .{ 32, 0 },
.{ 33, 0 }, .{ 34, 0 }, .{ 35, 1 }, .{ 37, 1 }, .{ 39, 1 }, .{ 41, 1 },
.{ 43, 2 }, .{ 47, 2 }, .{ 51, 3 }, .{ 59, 3 }, .{ 67, 4 }, .{ 83, 4 },
.{ 99, 5 }, .{ 131, 7 }, .{ 259, 8 }, .{ 515, 9 }, .{ 1027, 10 }, .{ 2051, 11 },
.{ 4099, 12 }, .{ 8195, 13 }, .{ 16387, 14 }, .{ 32771, 15 }, .{ 65539, 16 },
};
pub const table_accuracy_log_max = struct {
pub const literal = 9;
pub const match = 9;
pub const offset = 8;
};
pub const table_symbol_count_max = struct {
pub const literal = 36;
pub const match = 53;
pub const offset = 32;
};
pub const default_accuracy_log = struct {
pub const literal = 6;
pub const match = 6;
pub const offset = 5;
};
pub const table_size_max = struct {
pub const literal = 1 << table_accuracy_log_max.literal;
pub const match = 1 << table_accuracy_log_max.match;
pub const offset = 1 << table_accuracy_log_max.offset;
};
fn testDecompress(gpa: std.mem.Allocator, compressed: []const u8) ![]u8 {
var out: std.ArrayListUnmanaged(u8) = .empty;
defer out.deinit(gpa);
try out.ensureUnusedCapacity(gpa, default_window_len);
var in: std.io.Reader = .fixed(compressed);
var zstd_stream: Decompress = .init(&in, &.{}, .{});
try zstd_stream.reader.appendRemaining(gpa, null, &out, .unlimited);
return out.toOwnedSlice(gpa);
}
fn testExpectDecompress(uncompressed: []const u8, compressed: []const u8) !void {
const gpa = std.testing.allocator;
const result = try testDecompress(gpa, compressed);
defer gpa.free(result);
try std.testing.expectEqualSlices(u8, uncompressed, result);
}
fn testExpectDecompressError(err: anyerror, compressed: []const u8) !void {
const gpa = std.testing.allocator;
var out: std.ArrayListUnmanaged(u8) = .empty;
defer out.deinit(gpa);
try out.ensureUnusedCapacity(gpa, default_window_len);
var in: std.io.Reader = .fixed(compressed);
var zstd_stream: Decompress = .init(&in, &.{}, .{});
try std.testing.expectError(
error.ReadFailed,
zstd_stream.reader.appendRemaining(gpa, null, &out, .unlimited),
);
try std.testing.expectError(err, zstd_stream.err orelse {});
}
test Decompress {
const uncompressed = @embedFile("testdata/rfc8478.txt");
const compressed3 = @embedFile("testdata/rfc8478.txt.zst.3");
const compressed19 = @embedFile("testdata/rfc8478.txt.zst.19");
try testExpectDecompress(uncompressed, compressed3);
try testExpectDecompress(uncompressed, compressed19);
}
test "zero sized raw block" {
const input_raw =
"\x28\xb5\x2f\xfd" ++ // zstandard frame magic number
"\x20\x00" ++ // frame header: only single_segment_flag set, frame_content_size zero
"\x01\x00\x00"; // block header with: last_block set, block_type raw, block_size zero
try testExpectDecompress("", input_raw);
}
test "zero sized rle block" {
const input_rle =
"\x28\xb5\x2f\xfd" ++ // zstandard frame magic number
"\x20\x00" ++ // frame header: only single_segment_flag set, frame_content_size zero
"\x03\x00\x00" ++ // block header with: last_block set, block_type rle, block_size zero
"\xaa"; // block_content
try testExpectDecompress("", input_rle);
}
test "declared raw literals size too large" {
const input_raw =
"\x28\xb5\x2f\xfd" ++ // zstandard frame magic number
"\x00\x00" ++ // frame header: everything unset, window descriptor zero
"\x95\x00\x00" ++ // block header with: last_block set, block_type compressed, block_size 18
"\xbc\xf3\xae" ++ // literals section header with: type raw, size_format 3, regenerated_size 716603
"\xa5\x9f\xe3"; // some bytes of literal content - the content is shorter than regenerated_size
// Note that the regenerated_size in the above input is larger than block maximum size, so the
// block can't be valid as it is a raw literals block.
try testExpectDecompressError(error.MalformedLiteralsSection, input_raw);
}
+1840
View File
@@ -0,0 +1,1840 @@
const Decompress = @This();
const std = @import("std");
const assert = std.debug.assert;
const Reader = std.io.Reader;
const Limit = std.io.Limit;
const zstd = @import("../zstd.zig");
const Writer = std.io.Writer;
input: *Reader,
reader: Reader,
state: State,
verify_checksum: bool,
window_len: u32,
err: ?Error = null,
const State = union(enum) {
new_frame,
in_frame: InFrame,
skipping_frame: usize,
end,
const InFrame = struct {
frame: Frame,
checksum: ?u32,
decompressed_size: usize,
decode: Frame.Zstandard.Decode,
};
};
pub const Options = struct {
/// Verifying checksums is not implemented yet and will cause a panic if
/// you set this to true.
verify_checksum: bool = false,
/// The output buffer is asserted to have capacity for `window_len` plus
/// `zstd.block_size_max`.
///
/// If `window_len` is too small, then some streams will fail to decompress
/// with `error.OutputBufferUndersize`.
window_len: u32 = zstd.default_window_len,
};
pub const Error = error{
BadMagic,
BlockOversize,
ChecksumFailure,
ContentOversize,
DictionaryIdFlagUnsupported,
EndOfStream,
HuffmanTreeIncomplete,
InvalidBitStream,
MalformedAccuracyLog,
MalformedBlock,
MalformedCompressedBlock,
MalformedFrame,
MalformedFseBits,
MalformedFseTable,
MalformedHuffmanTree,
MalformedLiteralsHeader,
MalformedLiteralsLength,
MalformedLiteralsSection,
MalformedSequence,
MissingStartBit,
OutputBufferUndersize,
InputBufferUndersize,
ReadFailed,
RepeatModeFirst,
ReservedBitSet,
ReservedBlock,
SequenceBufferUndersize,
TreelessLiteralsFirst,
UnexpectedEndOfLiteralStream,
WindowOversize,
WindowSizeUnknown,
};
/// When connecting `reader` to a `Writer`, `buffer` should be empty, and
/// `Writer.buffer` capacity has requirements based on `Options.window_len`.
///
/// Otherwise, `buffer` has those requirements.
pub fn init(input: *Reader, buffer: []u8, options: Options) Decompress {
return .{
.input = input,
.state = .new_frame,
.verify_checksum = options.verify_checksum,
.window_len = options.window_len,
.reader = .{
.vtable = &.{
.stream = stream,
.rebase = rebase,
},
.buffer = buffer,
.seek = 0,
.end = 0,
},
};
}
fn rebase(r: *Reader, capacity: usize) Reader.RebaseError!void {
const d: *Decompress = @alignCast(@fieldParentPtr("reader", r));
assert(capacity <= r.buffer.len - d.window_len);
assert(r.end + capacity > r.buffer.len);
const buffered = r.buffer[0..r.end];
const discard = buffered.len - d.window_len;
const keep = buffered[discard..];
@memmove(r.buffer[0..keep.len], keep);
r.end = keep.len;
r.seek -= discard;
}
fn stream(r: *Reader, w: *Writer, limit: Limit) Reader.StreamError!usize {
const d: *Decompress = @alignCast(@fieldParentPtr("reader", r));
const in = d.input;
switch (d.state) {
.new_frame => {
// Allow error.EndOfStream only on the frame magic.
const magic = try in.takeEnumNonexhaustive(Frame.Magic, .little);
initFrame(d, w.buffer.len, magic) catch |err| {
d.err = err;
return error.ReadFailed;
};
return readInFrame(d, w, limit, &d.state.in_frame) catch |err| switch (err) {
error.ReadFailed => return error.ReadFailed,
error.WriteFailed => return error.WriteFailed,
else => |e| {
d.err = e;
return error.ReadFailed;
},
};
},
.in_frame => |*in_frame| {
return readInFrame(d, w, limit, in_frame) catch |err| switch (err) {
error.ReadFailed => return error.ReadFailed,
error.WriteFailed => return error.WriteFailed,
else => |e| {
d.err = e;
return error.ReadFailed;
},
};
},
.skipping_frame => |*remaining| {
const n = in.discard(.limited(remaining.*)) catch |err| {
d.err = err;
return error.ReadFailed;
};
remaining.* -= n;
if (remaining.* == 0) d.state = .new_frame;
return 0;
},
.end => return error.EndOfStream,
}
}
fn initFrame(d: *Decompress, window_size_max: usize, magic: Frame.Magic) !void {
const in = d.input;
switch (magic.kind() orelse return error.BadMagic) {
.zstandard => {
const header = try Frame.Zstandard.Header.decode(in);
d.state = .{ .in_frame = .{
.frame = try Frame.init(header, window_size_max, d.verify_checksum),
.checksum = null,
.decompressed_size = 0,
.decode = .init,
} };
},
.skippable => {
const frame_size = try in.takeInt(u32, .little);
d.state = .{ .skipping_frame = frame_size };
},
}
}
fn readInFrame(d: *Decompress, w: *Writer, limit: Limit, state: *State.InFrame) !usize {
const in = d.input;
const window_len = d.window_len;
const block_header = try in.takeStruct(Frame.Zstandard.Block.Header, .little);
const block_size = block_header.size;
const frame_block_size_max = state.frame.block_size_max;
if (frame_block_size_max < block_size) return error.BlockOversize;
if (@intFromEnum(limit) < block_size) return error.OutputBufferUndersize;
var bytes_written: usize = 0;
switch (block_header.type) {
.raw => {
try in.streamExactPreserve(w, window_len, block_size);
bytes_written = block_size;
},
.rle => {
const byte = try in.takeByte();
try w.splatBytePreserve(window_len, byte, block_size);
bytes_written = block_size;
},
.compressed => {
var literals_buffer: [zstd.block_size_max]u8 = undefined;
var sequence_buffer: [zstd.block_size_max]u8 = undefined;
var remaining: Limit = .limited(block_size);
const literals = try LiteralsSection.decode(in, &remaining, &literals_buffer);
const sequences_header = try SequencesSection.Header.decode(in, &remaining);
const decode = &state.decode;
try decode.prepare(in, &remaining, literals, sequences_header);
{
if (sequence_buffer.len < @intFromEnum(remaining))
return error.SequenceBufferUndersize;
const seq_slice = remaining.slice(&sequence_buffer);
try in.readSliceAll(seq_slice);
var bit_stream = try ReverseBitReader.init(seq_slice);
if (sequences_header.sequence_count > 0) {
try decode.readInitialFseState(&bit_stream);
// Ensures the following calls to `decodeSequence` will not flush.
if (window_len + frame_block_size_max > w.buffer.len) return error.OutputBufferUndersize;
const dest = (try w.writableSliceGreedyPreserve(window_len, frame_block_size_max))[0..frame_block_size_max];
const write_pos = dest.ptr - w.buffer.ptr;
for (0..sequences_header.sequence_count - 1) |_| {
bytes_written += try decode.decodeSequence(w.buffer, write_pos + bytes_written, &bit_stream);
try decode.updateState(.literal, &bit_stream);
try decode.updateState(.match, &bit_stream);
try decode.updateState(.offset, &bit_stream);
}
bytes_written += try decode.decodeSequence(w.buffer, write_pos + bytes_written, &bit_stream);
if (bytes_written > dest.len) return error.MalformedSequence;
w.advance(bytes_written);
}
if (!bit_stream.isEmpty()) {
return error.MalformedCompressedBlock;
}
}
if (decode.literal_written_count < literals.header.regenerated_size) {
const len = literals.header.regenerated_size - decode.literal_written_count;
try decode.decodeLiterals(w, len);
decode.literal_written_count += len;
bytes_written += len;
}
switch (decode.literal_header.block_type) {
.treeless, .compressed => {
if (!decode.isLiteralStreamEmpty()) return error.MalformedCompressedBlock;
},
.raw, .rle => {},
}
if (bytes_written > frame_block_size_max) return error.BlockOversize;
},
.reserved => return error.ReservedBlock,
}
if (state.frame.hasher_opt) |*hasher| {
if (bytes_written > 0) {
_ = hasher;
@panic("TODO all those bytes written needed to go through the hasher too");
}
}
state.decompressed_size += bytes_written;
if (block_header.last) {
if (state.frame.has_checksum) {
const expected_checksum = try in.takeInt(u32, .little);
if (state.frame.hasher_opt) |*hasher| {
const actual_checksum: u32 = @truncate(hasher.final());
if (expected_checksum != actual_checksum) return error.ChecksumFailure;
}
}
if (state.frame.content_size) |content_size| {
if (content_size != state.decompressed_size) {
return error.MalformedFrame;
}
}
d.state = .new_frame;
} else if (state.frame.content_size) |content_size| {
if (state.decompressed_size > content_size) return error.MalformedFrame;
}
return bytes_written;
}
pub const Frame = struct {
hasher_opt: ?std.hash.XxHash64,
window_size: usize,
has_checksum: bool,
block_size_max: usize,
content_size: ?usize,
pub const Magic = enum(u32) {
zstandard = 0xFD2FB528,
_,
pub fn kind(m: Magic) ?Kind {
return switch (@intFromEnum(m)) {
@intFromEnum(Magic.zstandard) => .zstandard,
@intFromEnum(Skippable.magic_min)...@intFromEnum(Skippable.magic_max) => .skippable,
else => null,
};
}
pub fn isSkippable(m: Magic) bool {
return switch (@intFromEnum(m)) {
@intFromEnum(Skippable.magic_min)...@intFromEnum(Skippable.magic_max) => true,
else => false,
};
}
};
pub const Kind = enum { zstandard, skippable };
pub const Zstandard = struct {
pub const magic: Magic = .zstandard;
header: Header,
data_blocks: []Block,
checksum: ?u32,
pub const Header = struct {
descriptor: Descriptor,
window_descriptor: ?u8,
dictionary_id: ?u32,
content_size: ?u64,
pub const Descriptor = packed struct {
dictionary_id_flag: u2,
content_checksum_flag: bool,
reserved: bool,
unused: bool,
single_segment_flag: bool,
content_size_flag: u2,
};
pub const DecodeError = Reader.Error || error{ReservedBitSet};
pub fn decode(in: *Reader) DecodeError!Header {
const descriptor: Descriptor = @bitCast(try in.takeByte());
if (descriptor.reserved) return error.ReservedBitSet;
const window_descriptor: ?u8 = if (descriptor.single_segment_flag) null else try in.takeByte();
const dictionary_id: ?u32 = if (descriptor.dictionary_id_flag > 0) d: {
// if flag is 3 then field_size = 4, else field_size = flag
const field_size = (@as(u4, 1) << descriptor.dictionary_id_flag) >> 1;
break :d try in.takeVarInt(u32, .little, field_size);
} else null;
const content_size: ?u64 = if (descriptor.single_segment_flag or descriptor.content_size_flag > 0) c: {
const field_size = @as(u4, 1) << descriptor.content_size_flag;
const content_size = try in.takeVarInt(u64, .little, field_size);
break :c if (field_size == 2) content_size + 256 else content_size;
} else null;
return .{
.descriptor = descriptor,
.window_descriptor = window_descriptor,
.dictionary_id = dictionary_id,
.content_size = content_size,
};
}
/// Returns the window size required to decompress a frame, or `null` if it
/// cannot be determined (which indicates a malformed frame header).
pub fn windowSize(header: Header) ?u64 {
if (header.window_descriptor) |descriptor| {
const exponent = (descriptor & 0b11111000) >> 3;
const mantissa = descriptor & 0b00000111;
const window_log = 10 + exponent;
const window_base = @as(u64, 1) << @as(u6, @intCast(window_log));
const window_add = (window_base / 8) * mantissa;
return window_base + window_add;
} else return header.content_size;
}
};
pub const Block = struct {
pub const Header = packed struct(u24) {
last: bool,
type: Type,
size: u21,
};
pub const Type = enum(u2) {
raw,
rle,
compressed,
reserved,
};
};
pub const Decode = struct {
repeat_offsets: [3]u32,
offset: StateData(8),
match: StateData(9),
literal: StateData(9),
literal_fse_buffer: [zstd.table_size_max.literal]Table.Fse,
match_fse_buffer: [zstd.table_size_max.match]Table.Fse,
offset_fse_buffer: [zstd.table_size_max.offset]Table.Fse,
fse_tables_undefined: bool,
literal_stream_reader: ReverseBitReader,
literal_stream_index: usize,
literal_streams: LiteralsSection.Streams,
literal_header: LiteralsSection.Header,
huffman_tree: ?LiteralsSection.HuffmanTree,
literal_written_count: usize,
fn StateData(comptime max_accuracy_log: comptime_int) type {
return struct {
state: @This().State,
table: Table,
accuracy_log: u8,
const State = std.meta.Int(.unsigned, max_accuracy_log);
};
}
const init: Decode = .{
.repeat_offsets = .{
zstd.start_repeated_offset_1,
zstd.start_repeated_offset_2,
zstd.start_repeated_offset_3,
},
.offset = undefined,
.match = undefined,
.literal = undefined,
.literal_fse_buffer = undefined,
.match_fse_buffer = undefined,
.offset_fse_buffer = undefined,
.fse_tables_undefined = true,
.literal_written_count = 0,
.literal_header = undefined,
.literal_streams = undefined,
.literal_stream_reader = undefined,
.literal_stream_index = undefined,
.huffman_tree = null,
};
pub const PrepareError = error{
/// the (reversed) literal bitstream's first byte does not have any bits set
MissingStartBit,
/// `literals` is a treeless literals section and the decode state does not
/// have a Huffman tree from a previous block
TreelessLiteralsFirst,
/// on the first call if one of the sequence FSE tables is set to repeat mode
RepeatModeFirst,
/// an FSE table has an invalid accuracy
MalformedAccuracyLog,
/// failed decoding an FSE table
MalformedFseTable,
/// input stream ends before all FSE tables are read
EndOfStream,
ReadFailed,
InputBufferUndersize,
};
/// Prepare the decoder to decode a compressed block. Loads the
/// literals stream and Huffman tree from `literals` and reads the
/// FSE tables from `in`.
pub fn prepare(
self: *Decode,
in: *Reader,
remaining: *Limit,
literals: LiteralsSection,
sequences_header: SequencesSection.Header,
) PrepareError!void {
self.literal_written_count = 0;
self.literal_header = literals.header;
self.literal_streams = literals.streams;
if (literals.huffman_tree) |tree| {
self.huffman_tree = tree;
} else if (literals.header.block_type == .treeless and self.huffman_tree == null) {
return error.TreelessLiteralsFirst;
}
switch (literals.header.block_type) {
.raw, .rle => {},
.compressed, .treeless => {
self.literal_stream_index = 0;
switch (literals.streams) {
.one => |slice| try self.initLiteralStream(slice),
.four => |streams| try self.initLiteralStream(streams[0]),
}
},
}
if (sequences_header.sequence_count > 0) {
try self.updateFseTable(in, remaining, .literal, sequences_header.literal_lengths);
try self.updateFseTable(in, remaining, .offset, sequences_header.offsets);
try self.updateFseTable(in, remaining, .match, sequences_header.match_lengths);
self.fse_tables_undefined = false;
}
}
/// Read initial FSE states for sequence decoding.
pub fn readInitialFseState(self: *Decode, bit_reader: *ReverseBitReader) error{EndOfStream}!void {
self.literal.state = try bit_reader.readBitsNoEof(u9, self.literal.accuracy_log);
self.offset.state = try bit_reader.readBitsNoEof(u8, self.offset.accuracy_log);
self.match.state = try bit_reader.readBitsNoEof(u9, self.match.accuracy_log);
}
fn updateRepeatOffset(self: *Decode, offset: u32) void {
self.repeat_offsets[2] = self.repeat_offsets[1];
self.repeat_offsets[1] = self.repeat_offsets[0];
self.repeat_offsets[0] = offset;
}
fn useRepeatOffset(self: *Decode, index: usize) u32 {
if (index == 1)
std.mem.swap(u32, &self.repeat_offsets[0], &self.repeat_offsets[1])
else if (index == 2) {
std.mem.swap(u32, &self.repeat_offsets[0], &self.repeat_offsets[2]);
std.mem.swap(u32, &self.repeat_offsets[1], &self.repeat_offsets[2]);
}
return self.repeat_offsets[0];
}
const WhichFse = enum { offset, match, literal };
/// TODO: don't use `@field`
fn updateState(
self: *Decode,
comptime choice: WhichFse,
bit_reader: *ReverseBitReader,
) error{ MalformedFseBits, EndOfStream }!void {
switch (@field(self, @tagName(choice)).table) {
.rle => {},
.fse => |table| {
const data = table[@field(self, @tagName(choice)).state];
const T = @TypeOf(@field(self, @tagName(choice))).State;
const bits_summand = try bit_reader.readBitsNoEof(T, data.bits);
const next_state = std.math.cast(
@TypeOf(@field(self, @tagName(choice))).State,
data.baseline + bits_summand,
) orelse return error.MalformedFseBits;
@field(self, @tagName(choice)).state = next_state;
},
}
}
const FseTableError = error{
MalformedFseTable,
MalformedAccuracyLog,
RepeatModeFirst,
EndOfStream,
};
/// TODO: don't use `@field`
fn updateFseTable(
self: *Decode,
in: *Reader,
remaining: *Limit,
comptime choice: WhichFse,
mode: SequencesSection.Header.Mode,
) !void {
const field_name = @tagName(choice);
switch (mode) {
.predefined => {
@field(self, field_name).accuracy_log =
@field(zstd.default_accuracy_log, field_name);
@field(self, field_name).table =
@field(Table, "predefined_" ++ field_name);
},
.rle => {
@field(self, field_name).accuracy_log = 0;
remaining.* = remaining.subtract(1) orelse return error.EndOfStream;
@field(self, field_name).table = .{ .rle = try in.takeByte() };
},
.fse => {
const max_table_size = 2048;
const peek_len: usize = remaining.minInt(max_table_size);
if (in.buffer.len < peek_len) return error.InputBufferUndersize;
const limited_buffer = try in.peek(peek_len);
var bit_reader: BitReader = .{ .bytes = limited_buffer };
const table_size = try Table.decode(
&bit_reader,
@field(zstd.table_symbol_count_max, field_name),
@field(zstd.table_accuracy_log_max, field_name),
&@field(self, field_name ++ "_fse_buffer"),
);
@field(self, field_name).table = .{
.fse = (&@field(self, field_name ++ "_fse_buffer"))[0..table_size],
};
@field(self, field_name).accuracy_log = std.math.log2_int_ceil(usize, table_size);
in.toss(bit_reader.index);
remaining.* = remaining.subtract(bit_reader.index).?;
},
.repeat => if (self.fse_tables_undefined) return error.RepeatModeFirst,
}
}
const Sequence = struct {
literal_length: u32,
match_length: u32,
offset: u32,
};
fn nextSequence(
self: *Decode,
bit_reader: *ReverseBitReader,
) error{ InvalidBitStream, EndOfStream }!Sequence {
const raw_code = self.getCode(.offset);
const offset_code = std.math.cast(u5, raw_code) orelse {
return error.InvalidBitStream;
};
const offset_value = (@as(u32, 1) << offset_code) + try bit_reader.readBitsNoEof(u32, offset_code);
const match_code = self.getCode(.match);
if (match_code >= zstd.match_length_code_table.len)
return error.InvalidBitStream;
const match = zstd.match_length_code_table[match_code];
const match_length = match[0] + try bit_reader.readBitsNoEof(u32, match[1]);
const literal_code = self.getCode(.literal);
if (literal_code >= zstd.literals_length_code_table.len)
return error.InvalidBitStream;
const literal = zstd.literals_length_code_table[literal_code];
const literal_length = literal[0] + try bit_reader.readBitsNoEof(u32, literal[1]);
const offset = if (offset_value > 3) offset: {
const offset = offset_value - 3;
self.updateRepeatOffset(offset);
break :offset offset;
} else offset: {
if (literal_length == 0) {
if (offset_value == 3) {
const offset = self.repeat_offsets[0] - 1;
self.updateRepeatOffset(offset);
break :offset offset;
}
break :offset self.useRepeatOffset(offset_value);
}
break :offset self.useRepeatOffset(offset_value - 1);
};
if (offset == 0) return error.InvalidBitStream;
return .{
.literal_length = literal_length,
.match_length = match_length,
.offset = offset,
};
}
/// Decode one sequence from `bit_reader` into `dest`. Updates FSE states
/// if `last_sequence` is `false`. Assumes `prepare` called for the block
/// before attempting to decode sequences.
fn decodeSequence(
decode: *Decode,
dest: []u8,
write_pos: usize,
bit_reader: *ReverseBitReader,
) !usize {
const sequence = try decode.nextSequence(bit_reader);
const literal_length: usize = sequence.literal_length;
const match_length: usize = sequence.match_length;
const sequence_length = literal_length + match_length;
const copy_start = std.math.sub(usize, write_pos + sequence.literal_length, sequence.offset) catch
return error.MalformedSequence;
if (decode.literal_written_count + literal_length > decode.literal_header.regenerated_size)
return error.MalformedLiteralsLength;
var sub_bw: Writer = .fixed(dest[write_pos..]);
try decodeLiterals(decode, &sub_bw, literal_length);
decode.literal_written_count += literal_length;
// This is not a @memmove; it intentionally repeats patterns
// caused by iterating one byte at a time.
for (
dest[write_pos + literal_length ..][0..match_length],
dest[copy_start..][0..match_length],
) |*d, s| d.* = s;
return sequence_length;
}
fn nextLiteralMultiStream(self: *Decode) error{MissingStartBit}!void {
self.literal_stream_index += 1;
try self.initLiteralStream(self.literal_streams.four[self.literal_stream_index]);
}
fn initLiteralStream(self: *Decode, bytes: []const u8) error{MissingStartBit}!void {
self.literal_stream_reader = try ReverseBitReader.init(bytes);
}
fn isLiteralStreamEmpty(self: *Decode) bool {
switch (self.literal_streams) {
.one => return self.literal_stream_reader.isEmpty(),
.four => return self.literal_stream_index == 3 and self.literal_stream_reader.isEmpty(),
}
}
const LiteralBitsError = error{
MissingStartBit,
UnexpectedEndOfLiteralStream,
};
fn readLiteralsBits(
self: *Decode,
bit_count_to_read: u16,
) LiteralBitsError!u16 {
return self.literal_stream_reader.readBitsNoEof(u16, bit_count_to_read) catch bits: {
if (self.literal_streams == .four and self.literal_stream_index < 3) {
try self.nextLiteralMultiStream();
break :bits self.literal_stream_reader.readBitsNoEof(u16, bit_count_to_read) catch
return error.UnexpectedEndOfLiteralStream;
} else {
return error.UnexpectedEndOfLiteralStream;
}
};
}
/// Decode `len` bytes of literals into `w`.
fn decodeLiterals(d: *Decode, w: *Writer, len: usize) !void {
switch (d.literal_header.block_type) {
.raw => {
try w.writeAll(d.literal_streams.one[d.literal_written_count..][0..len]);
},
.rle => {
try w.splatByteAll(d.literal_streams.one[0], len);
},
.compressed, .treeless => {
if (len > w.buffer.len) return error.OutputBufferUndersize;
const buf = try w.writableSlice(len);
const huffman_tree = d.huffman_tree.?;
const max_bit_count = huffman_tree.max_bit_count;
const starting_bit_count = LiteralsSection.HuffmanTree.weightToBitCount(
huffman_tree.nodes[huffman_tree.symbol_count_minus_one].weight,
max_bit_count,
);
var bits_read: u4 = 0;
var huffman_tree_index: usize = huffman_tree.symbol_count_minus_one;
var bit_count_to_read: u4 = starting_bit_count;
for (buf) |*out| {
var prefix: u16 = 0;
while (true) {
const new_bits = try d.readLiteralsBits(bit_count_to_read);
prefix <<= bit_count_to_read;
prefix |= new_bits;
bits_read += bit_count_to_read;
const result = try huffman_tree.query(huffman_tree_index, prefix);
switch (result) {
.symbol => |sym| {
out.* = sym;
bit_count_to_read = starting_bit_count;
bits_read = 0;
huffman_tree_index = huffman_tree.symbol_count_minus_one;
break;
},
.index => |index| {
huffman_tree_index = index;
const bit_count = LiteralsSection.HuffmanTree.weightToBitCount(
huffman_tree.nodes[index].weight,
max_bit_count,
);
bit_count_to_read = bit_count - bits_read;
},
}
}
}
},
}
}
/// TODO: don't use `@field`
fn getCode(self: *Decode, comptime choice: WhichFse) u32 {
return switch (@field(self, @tagName(choice)).table) {
.rle => |value| value,
.fse => |table| table[@field(self, @tagName(choice)).state].symbol,
};
}
};
};
pub const Skippable = struct {
pub const magic_min: Magic = @enumFromInt(0x184D2A50);
pub const magic_max: Magic = @enumFromInt(0x184D2A5F);
pub const Header = struct {
magic_number: u32,
frame_size: u32,
};
};
const InitError = error{
/// Frame uses a dictionary.
DictionaryIdFlagUnsupported,
/// Frame does not have a valid window size.
WindowSizeUnknown,
/// Window size exceeds `window_size_max` or max `usize` value.
WindowOversize,
/// Frame header indicates a content size exceeding max `usize` value.
ContentOversize,
};
/// Validates `frame_header` and returns the associated `Frame`.
pub fn init(
frame_header: Frame.Zstandard.Header,
window_size_max: usize,
verify_checksum: bool,
) InitError!Frame {
if (frame_header.descriptor.dictionary_id_flag != 0)
return error.DictionaryIdFlagUnsupported;
const window_size_raw = frame_header.windowSize() orelse return error.WindowSizeUnknown;
const window_size = if (window_size_raw > window_size_max)
return error.WindowOversize
else
std.math.cast(usize, window_size_raw) orelse return error.WindowOversize;
const should_compute_checksum =
frame_header.descriptor.content_checksum_flag and verify_checksum;
const content_size = if (frame_header.content_size) |size|
std.math.cast(usize, size) orelse return error.ContentOversize
else
null;
return .{
.hasher_opt = if (should_compute_checksum) std.hash.XxHash64.init(0) else null,
.window_size = window_size,
.has_checksum = frame_header.descriptor.content_checksum_flag,
.block_size_max = @min(zstd.block_size_max, window_size),
.content_size = content_size,
};
}
};
pub const LiteralsSection = struct {
header: Header,
huffman_tree: ?HuffmanTree,
streams: Streams,
pub const Streams = union(enum) {
one: []const u8,
four: [4][]const u8,
fn decode(size_format: u2, stream_data: []const u8) !Streams {
if (size_format == 0) {
return .{ .one = stream_data };
}
if (stream_data.len < 6) return error.MalformedLiteralsSection;
const stream_1_length: usize = std.mem.readInt(u16, stream_data[0..2], .little);
const stream_2_length: usize = std.mem.readInt(u16, stream_data[2..4], .little);
const stream_3_length: usize = std.mem.readInt(u16, stream_data[4..6], .little);
const stream_1_start = 6;
const stream_2_start = stream_1_start + stream_1_length;
const stream_3_start = stream_2_start + stream_2_length;
const stream_4_start = stream_3_start + stream_3_length;
if (stream_data.len < stream_4_start) return error.MalformedLiteralsSection;
return .{ .four = .{
stream_data[stream_1_start .. stream_1_start + stream_1_length],
stream_data[stream_2_start .. stream_2_start + stream_2_length],
stream_data[stream_3_start .. stream_3_start + stream_3_length],
stream_data[stream_4_start..],
} };
}
};
pub const Header = struct {
block_type: BlockType,
size_format: u2,
regenerated_size: u20,
compressed_size: ?u18,
/// Decode a literals section header.
pub fn decode(in: *Reader, remaining: *Limit) !Header {
remaining.* = remaining.subtract(1) orelse return error.EndOfStream;
const byte0 = try in.takeByte();
const block_type: BlockType = @enumFromInt(byte0 & 0b11);
const size_format: u2 = @intCast((byte0 & 0b1100) >> 2);
var regenerated_size: u20 = undefined;
var compressed_size: ?u18 = null;
switch (block_type) {
.raw, .rle => {
switch (size_format) {
0, 2 => {
regenerated_size = byte0 >> 3;
},
1 => {
remaining.* = remaining.subtract(1) orelse return error.EndOfStream;
regenerated_size = (byte0 >> 4) + (@as(u20, try in.takeByte()) << 4);
},
3 => {
remaining.* = remaining.subtract(2) orelse return error.EndOfStream;
regenerated_size = (byte0 >> 4) +
(@as(u20, try in.takeByte()) << 4) +
(@as(u20, try in.takeByte()) << 12);
},
}
},
.compressed, .treeless => {
remaining.* = remaining.subtract(2) orelse return error.EndOfStream;
const byte1 = try in.takeByte();
const byte2 = try in.takeByte();
switch (size_format) {
0, 1 => {
regenerated_size = (byte0 >> 4) + ((@as(u20, byte1) & 0b00111111) << 4);
compressed_size = ((byte1 & 0b11000000) >> 6) + (@as(u18, byte2) << 2);
},
2 => {
remaining.* = remaining.subtract(1) orelse return error.EndOfStream;
const byte3 = try in.takeByte();
regenerated_size = (byte0 >> 4) + (@as(u20, byte1) << 4) + ((@as(u20, byte2) & 0b00000011) << 12);
compressed_size = ((byte2 & 0b11111100) >> 2) + (@as(u18, byte3) << 6);
},
3 => {
remaining.* = remaining.subtract(2) orelse return error.EndOfStream;
const byte3 = try in.takeByte();
const byte4 = try in.takeByte();
regenerated_size = (byte0 >> 4) + (@as(u20, byte1) << 4) + ((@as(u20, byte2) & 0b00111111) << 12);
compressed_size = ((byte2 & 0b11000000) >> 6) + (@as(u18, byte3) << 2) + (@as(u18, byte4) << 10);
},
}
},
}
return .{
.block_type = block_type,
.size_format = size_format,
.regenerated_size = regenerated_size,
.compressed_size = compressed_size,
};
}
};
pub const BlockType = enum(u2) {
raw,
rle,
compressed,
treeless,
};
pub const HuffmanTree = struct {
max_bit_count: u4,
symbol_count_minus_one: u8,
nodes: [256]PrefixedSymbol,
pub const PrefixedSymbol = struct {
symbol: u8,
prefix: u16,
weight: u4,
};
pub const Result = union(enum) {
symbol: u8,
index: usize,
};
pub fn query(self: HuffmanTree, index: usize, prefix: u16) error{HuffmanTreeIncomplete}!Result {
var node = self.nodes[index];
const weight = node.weight;
var i: usize = index;
while (node.weight == weight) {
if (node.prefix == prefix) return .{ .symbol = node.symbol };
if (i == 0) return error.HuffmanTreeIncomplete;
i -= 1;
node = self.nodes[i];
}
return .{ .index = i };
}
pub fn weightToBitCount(weight: u4, max_bit_count: u4) u4 {
return if (weight == 0) 0 else ((max_bit_count + 1) - weight);
}
pub const DecodeError = Reader.Error || error{
MalformedHuffmanTree,
MalformedFseTable,
MalformedAccuracyLog,
EndOfStream,
MissingStartBit,
};
pub fn decode(in: *Reader, remaining: *Limit) HuffmanTree.DecodeError!HuffmanTree {
remaining.* = remaining.subtract(1) orelse return error.EndOfStream;
const header = try in.takeByte();
if (header < 128) {
return decodeFse(in, remaining, header);
} else {
return decodeDirect(in, remaining, header - 127);
}
}
fn decodeDirect(
in: *Reader,
remaining: *Limit,
encoded_symbol_count: usize,
) HuffmanTree.DecodeError!HuffmanTree {
var weights: [256]u4 = undefined;
const weights_byte_count = (encoded_symbol_count + 1) / 2;
remaining.* = remaining.subtract(weights_byte_count) orelse return error.EndOfStream;
for (0..weights_byte_count) |i| {
const byte = try in.takeByte();
weights[2 * i] = @as(u4, @intCast(byte >> 4));
weights[2 * i + 1] = @as(u4, @intCast(byte & 0xF));
}
const symbol_count = encoded_symbol_count + 1;
return build(&weights, symbol_count);
}
fn decodeFse(
in: *Reader,
remaining: *Limit,
compressed_size: usize,
) HuffmanTree.DecodeError!HuffmanTree {
var weights: [256]u4 = undefined;
remaining.* = remaining.subtract(compressed_size) orelse return error.EndOfStream;
const compressed_buffer = try in.take(compressed_size);
var bit_reader: BitReader = .{ .bytes = compressed_buffer };
var entries: [1 << 6]Table.Fse = undefined;
const table_size = try Table.decode(&bit_reader, 256, 6, &entries);
const accuracy_log = std.math.log2_int_ceil(usize, table_size);
const remaining_buffer = bit_reader.bytes[bit_reader.index..];
const symbol_count = try assignWeights(remaining_buffer, accuracy_log, &entries, &weights);
return build(&weights, symbol_count);
}
fn assignWeights(
huff_bits_buffer: []const u8,
accuracy_log: u16,
entries: *[1 << 6]Table.Fse,
weights: *[256]u4,
) !usize {
var huff_bits = try ReverseBitReader.init(huff_bits_buffer);
var i: usize = 0;
var even_state: u32 = try huff_bits.readBitsNoEof(u32, accuracy_log);
var odd_state: u32 = try huff_bits.readBitsNoEof(u32, accuracy_log);
while (i < 254) {
const even_data = entries[even_state];
var read_bits: u16 = 0;
const even_bits = huff_bits.readBits(u32, even_data.bits, &read_bits) catch unreachable;
weights[i] = std.math.cast(u4, even_data.symbol) orelse return error.MalformedHuffmanTree;
i += 1;
if (read_bits < even_data.bits) {
weights[i] = std.math.cast(u4, entries[odd_state].symbol) orelse return error.MalformedHuffmanTree;
i += 1;
break;
}
even_state = even_data.baseline + even_bits;
read_bits = 0;
const odd_data = entries[odd_state];
const odd_bits = huff_bits.readBits(u32, odd_data.bits, &read_bits) catch unreachable;
weights[i] = std.math.cast(u4, odd_data.symbol) orelse return error.MalformedHuffmanTree;
i += 1;
if (read_bits < odd_data.bits) {
if (i == 255) return error.MalformedHuffmanTree;
weights[i] = std.math.cast(u4, entries[even_state].symbol) orelse return error.MalformedHuffmanTree;
i += 1;
break;
}
odd_state = odd_data.baseline + odd_bits;
} else return error.MalformedHuffmanTree;
if (!huff_bits.isEmpty()) {
return error.MalformedHuffmanTree;
}
return i + 1; // stream contains all but the last symbol
}
fn assignSymbols(weight_sorted_prefixed_symbols: []PrefixedSymbol, weights: [256]u4) usize {
for (0..weight_sorted_prefixed_symbols.len) |i| {
weight_sorted_prefixed_symbols[i] = .{
.symbol = @as(u8, @intCast(i)),
.weight = undefined,
.prefix = undefined,
};
}
std.mem.sort(
PrefixedSymbol,
weight_sorted_prefixed_symbols,
weights,
lessThanByWeight,
);
var prefix: u16 = 0;
var prefixed_symbol_count: usize = 0;
var sorted_index: usize = 0;
const symbol_count = weight_sorted_prefixed_symbols.len;
while (sorted_index < symbol_count) {
var symbol = weight_sorted_prefixed_symbols[sorted_index].symbol;
const weight = weights[symbol];
if (weight == 0) {
sorted_index += 1;
continue;
}
while (sorted_index < symbol_count) : ({
sorted_index += 1;
prefixed_symbol_count += 1;
prefix += 1;
}) {
symbol = weight_sorted_prefixed_symbols[sorted_index].symbol;
if (weights[symbol] != weight) {
prefix = ((prefix - 1) >> (weights[symbol] - weight)) + 1;
break;
}
weight_sorted_prefixed_symbols[prefixed_symbol_count].symbol = symbol;
weight_sorted_prefixed_symbols[prefixed_symbol_count].prefix = prefix;
weight_sorted_prefixed_symbols[prefixed_symbol_count].weight = weight;
}
}
return prefixed_symbol_count;
}
fn build(weights: *[256]u4, symbol_count: usize) error{MalformedHuffmanTree}!HuffmanTree {
var weight_power_sum_big: u32 = 0;
for (weights[0 .. symbol_count - 1]) |value| {
weight_power_sum_big += (@as(u16, 1) << value) >> 1;
}
if (weight_power_sum_big >= 1 << 11) return error.MalformedHuffmanTree;
const weight_power_sum = @as(u16, @intCast(weight_power_sum_big));
// advance to next power of two (even if weight_power_sum is a power of 2)
// TODO: is it valid to have weight_power_sum == 0?
const max_number_of_bits = if (weight_power_sum == 0) 1 else std.math.log2_int(u16, weight_power_sum) + 1;
const next_power_of_two = @as(u16, 1) << max_number_of_bits;
weights[symbol_count - 1] = std.math.log2_int(u16, next_power_of_two - weight_power_sum) + 1;
var weight_sorted_prefixed_symbols: [256]PrefixedSymbol = undefined;
const prefixed_symbol_count = assignSymbols(weight_sorted_prefixed_symbols[0..symbol_count], weights.*);
const tree: HuffmanTree = .{
.max_bit_count = max_number_of_bits,
.symbol_count_minus_one = @as(u8, @intCast(prefixed_symbol_count - 1)),
.nodes = weight_sorted_prefixed_symbols,
};
return tree;
}
fn lessThanByWeight(
weights: [256]u4,
lhs: PrefixedSymbol,
rhs: PrefixedSymbol,
) bool {
// NOTE: this function relies on the use of a stable sorting algorithm,
// otherwise a special case of if (weights[lhs] == weights[rhs]) return lhs < rhs;
// should be added
return weights[lhs.symbol] < weights[rhs.symbol];
}
};
pub const StreamCount = enum { one, four };
pub fn streamCount(size_format: u2, block_type: BlockType) StreamCount {
return switch (block_type) {
.raw, .rle => .one,
.compressed, .treeless => if (size_format == 0) .one else .four,
};
}
pub const DecodeError = error{
/// Invalid header.
MalformedLiteralsHeader,
/// Decoding errors.
MalformedLiteralsSection,
/// Compressed literals have invalid accuracy.
MalformedAccuracyLog,
/// Compressed literals have invalid FSE table.
MalformedFseTable,
/// Failed decoding a Huffamn tree.
MalformedHuffmanTree,
/// Not enough bytes to complete the section.
EndOfStream,
ReadFailed,
MissingStartBit,
};
pub fn decode(in: *Reader, remaining: *Limit, buffer: []u8) DecodeError!LiteralsSection {
const header = try Header.decode(in, remaining);
switch (header.block_type) {
.raw => {
if (buffer.len < header.regenerated_size) return error.MalformedLiteralsSection;
remaining.* = remaining.subtract(header.regenerated_size) orelse return error.EndOfStream;
try in.readSliceAll(buffer[0..header.regenerated_size]);
return .{
.header = header,
.huffman_tree = null,
.streams = .{ .one = buffer },
};
},
.rle => {
remaining.* = remaining.subtract(1) orelse return error.EndOfStream;
buffer[0] = try in.takeByte();
return .{
.header = header,
.huffman_tree = null,
.streams = .{ .one = buffer[0..1] },
};
},
.compressed, .treeless => {
const before_remaining = remaining.*;
const huffman_tree = if (header.block_type == .compressed)
try HuffmanTree.decode(in, remaining)
else
null;
const huffman_tree_size = @intFromEnum(before_remaining) - @intFromEnum(remaining.*);
const total_streams_size = std.math.sub(usize, header.compressed_size.?, huffman_tree_size) catch
return error.MalformedLiteralsSection;
if (total_streams_size > buffer.len) return error.MalformedLiteralsSection;
remaining.* = remaining.subtract(total_streams_size) orelse return error.EndOfStream;
try in.readSliceAll(buffer[0..total_streams_size]);
const stream_data = buffer[0..total_streams_size];
const streams = try Streams.decode(header.size_format, stream_data);
return .{
.header = header,
.huffman_tree = huffman_tree,
.streams = streams,
};
},
}
}
};
pub const SequencesSection = struct {
header: Header,
literals_length_table: Table,
offset_table: Table,
match_length_table: Table,
pub const Header = struct {
sequence_count: u24,
match_lengths: Mode,
offsets: Mode,
literal_lengths: Mode,
pub const Mode = enum(u2) {
predefined,
rle,
fse,
repeat,
};
pub const DecodeError = error{
ReservedBitSet,
EndOfStream,
ReadFailed,
};
pub fn decode(in: *Reader, remaining: *Limit) DecodeError!Header {
var sequence_count: u24 = undefined;
remaining.* = remaining.subtract(1) orelse return error.EndOfStream;
const byte0 = try in.takeByte();
if (byte0 == 0) {
return .{
.sequence_count = 0,
.offsets = undefined,
.match_lengths = undefined,
.literal_lengths = undefined,
};
} else if (byte0 < 128) {
remaining.* = remaining.subtract(1) orelse return error.EndOfStream;
sequence_count = byte0;
} else if (byte0 < 255) {
remaining.* = remaining.subtract(2) orelse return error.EndOfStream;
sequence_count = (@as(u24, (byte0 - 128)) << 8) + try in.takeByte();
} else {
remaining.* = remaining.subtract(3) orelse return error.EndOfStream;
sequence_count = (try in.takeByte()) + (@as(u24, try in.takeByte()) << 8) + 0x7F00;
}
const compression_modes = try in.takeByte();
const matches_mode: Header.Mode = @enumFromInt((compression_modes & 0b00001100) >> 2);
const offsets_mode: Header.Mode = @enumFromInt((compression_modes & 0b00110000) >> 4);
const literal_mode: Header.Mode = @enumFromInt((compression_modes & 0b11000000) >> 6);
if (compression_modes & 0b11 != 0) return error.ReservedBitSet;
return .{
.sequence_count = sequence_count,
.offsets = offsets_mode,
.match_lengths = matches_mode,
.literal_lengths = literal_mode,
};
}
};
};
pub const Table = union(enum) {
fse: []const Fse,
rle: u8,
pub const Fse = struct {
symbol: u8,
baseline: u16,
bits: u8,
};
pub fn decode(
bit_reader: *BitReader,
expected_symbol_count: usize,
max_accuracy_log: u4,
entries: []Table.Fse,
) !usize {
const accuracy_log_biased = try bit_reader.readBitsNoEof(u4, 4);
if (accuracy_log_biased > max_accuracy_log -| 5) return error.MalformedAccuracyLog;
const accuracy_log = accuracy_log_biased + 5;
var values: [256]u16 = undefined;
var value_count: usize = 0;
const total_probability = @as(u16, 1) << accuracy_log;
var accumulated_probability: u16 = 0;
while (accumulated_probability < total_probability) {
// WARNING: The RFC is poorly worded, and would suggest std.math.log2_int_ceil is correct here,
// but power of two (remaining probabilities + 1) need max bits set to 1 more.
const max_bits = std.math.log2_int(u16, total_probability - accumulated_probability + 1) + 1;
const small = try bit_reader.readBitsNoEof(u16, max_bits - 1);
const cutoff = (@as(u16, 1) << max_bits) - 1 - (total_probability - accumulated_probability + 1);
const value = if (small < cutoff)
small
else value: {
const value_read = small + (try bit_reader.readBitsNoEof(u16, 1) << (max_bits - 1));
break :value if (value_read < @as(u16, 1) << (max_bits - 1))
value_read
else
value_read - cutoff;
};
accumulated_probability += if (value != 0) value - 1 else 1;
values[value_count] = value;
value_count += 1;
if (value == 1) {
while (true) {
const repeat_flag = try bit_reader.readBitsNoEof(u2, 2);
if (repeat_flag + value_count > 256) return error.MalformedFseTable;
for (0..repeat_flag) |_| {
values[value_count] = 1;
value_count += 1;
}
if (repeat_flag < 3) break;
}
}
if (value_count == 256) break;
}
bit_reader.alignToByte();
if (value_count < 2) return error.MalformedFseTable;
if (accumulated_probability != total_probability) return error.MalformedFseTable;
if (value_count > expected_symbol_count) return error.MalformedFseTable;
const table_size = total_probability;
try build(values[0..value_count], entries[0..table_size]);
return table_size;
}
pub fn build(values: []const u16, entries: []Table.Fse) !void {
const total_probability = @as(u16, @intCast(entries.len));
const accuracy_log = std.math.log2_int(u16, total_probability);
assert(total_probability <= 1 << 9);
var less_than_one_count: usize = 0;
for (values, 0..) |value, i| {
if (value == 0) {
entries[entries.len - 1 - less_than_one_count] = Table.Fse{
.symbol = @as(u8, @intCast(i)),
.baseline = 0,
.bits = accuracy_log,
};
less_than_one_count += 1;
}
}
var position: usize = 0;
var temp_states: [1 << 9]u16 = undefined;
for (values, 0..) |value, symbol| {
if (value == 0 or value == 1) continue;
const probability = value - 1;
const state_share_dividend = std.math.ceilPowerOfTwo(u16, probability) catch
return error.MalformedFseTable;
const share_size = @divExact(total_probability, state_share_dividend);
const double_state_count = state_share_dividend - probability;
const single_state_count = probability - double_state_count;
const share_size_log = std.math.log2_int(u16, share_size);
for (0..probability) |i| {
temp_states[i] = @as(u16, @intCast(position));
position += (entries.len >> 1) + (entries.len >> 3) + 3;
position &= entries.len - 1;
while (position >= entries.len - less_than_one_count) {
position += (entries.len >> 1) + (entries.len >> 3) + 3;
position &= entries.len - 1;
}
}
std.mem.sort(u16, temp_states[0..probability], {}, std.sort.asc(u16));
for (0..probability) |i| {
entries[temp_states[i]] = if (i < double_state_count) Table.Fse{
.symbol = @as(u8, @intCast(symbol)),
.bits = share_size_log + 1,
.baseline = single_state_count * share_size + @as(u16, @intCast(i)) * 2 * share_size,
} else Table.Fse{
.symbol = @as(u8, @intCast(symbol)),
.bits = share_size_log,
.baseline = (@as(u16, @intCast(i)) - double_state_count) * share_size,
};
}
}
}
test build {
const literals_length_default_values = [36]u16{
5, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 2, 2, 2, 2, 2,
0, 0, 0, 0,
};
const match_lengths_default_values = [53]u16{
2, 5, 4, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0,
0, 0, 0, 0, 0,
};
const offset_codes_default_values = [29]u16{
2, 2, 2, 2, 2, 2, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0,
};
var entries: [64]Table.Fse = undefined;
try build(&literals_length_default_values, &entries);
try std.testing.expectEqualSlices(Table.Fse, Table.predefined_literal.fse, &entries);
try build(&match_lengths_default_values, &entries);
try std.testing.expectEqualSlices(Table.Fse, Table.predefined_match.fse, &entries);
try build(&offset_codes_default_values, entries[0..32]);
try std.testing.expectEqualSlices(Table.Fse, Table.predefined_offset.fse, entries[0..32]);
}
pub const predefined_literal: Table = .{
.fse = &[64]Table.Fse{
.{ .symbol = 0, .bits = 4, .baseline = 0 },
.{ .symbol = 0, .bits = 4, .baseline = 16 },
.{ .symbol = 1, .bits = 5, .baseline = 32 },
.{ .symbol = 3, .bits = 5, .baseline = 0 },
.{ .symbol = 4, .bits = 5, .baseline = 0 },
.{ .symbol = 6, .bits = 5, .baseline = 0 },
.{ .symbol = 7, .bits = 5, .baseline = 0 },
.{ .symbol = 9, .bits = 5, .baseline = 0 },
.{ .symbol = 10, .bits = 5, .baseline = 0 },
.{ .symbol = 12, .bits = 5, .baseline = 0 },
.{ .symbol = 14, .bits = 6, .baseline = 0 },
.{ .symbol = 16, .bits = 5, .baseline = 0 },
.{ .symbol = 18, .bits = 5, .baseline = 0 },
.{ .symbol = 19, .bits = 5, .baseline = 0 },
.{ .symbol = 21, .bits = 5, .baseline = 0 },
.{ .symbol = 22, .bits = 5, .baseline = 0 },
.{ .symbol = 24, .bits = 5, .baseline = 0 },
.{ .symbol = 25, .bits = 5, .baseline = 32 },
.{ .symbol = 26, .bits = 5, .baseline = 0 },
.{ .symbol = 27, .bits = 6, .baseline = 0 },
.{ .symbol = 29, .bits = 6, .baseline = 0 },
.{ .symbol = 31, .bits = 6, .baseline = 0 },
.{ .symbol = 0, .bits = 4, .baseline = 32 },
.{ .symbol = 1, .bits = 4, .baseline = 0 },
.{ .symbol = 2, .bits = 5, .baseline = 0 },
.{ .symbol = 4, .bits = 5, .baseline = 32 },
.{ .symbol = 5, .bits = 5, .baseline = 0 },
.{ .symbol = 7, .bits = 5, .baseline = 32 },
.{ .symbol = 8, .bits = 5, .baseline = 0 },
.{ .symbol = 10, .bits = 5, .baseline = 32 },
.{ .symbol = 11, .bits = 5, .baseline = 0 },
.{ .symbol = 13, .bits = 6, .baseline = 0 },
.{ .symbol = 16, .bits = 5, .baseline = 32 },
.{ .symbol = 17, .bits = 5, .baseline = 0 },
.{ .symbol = 19, .bits = 5, .baseline = 32 },
.{ .symbol = 20, .bits = 5, .baseline = 0 },
.{ .symbol = 22, .bits = 5, .baseline = 32 },
.{ .symbol = 23, .bits = 5, .baseline = 0 },
.{ .symbol = 25, .bits = 4, .baseline = 0 },
.{ .symbol = 25, .bits = 4, .baseline = 16 },
.{ .symbol = 26, .bits = 5, .baseline = 32 },
.{ .symbol = 28, .bits = 6, .baseline = 0 },
.{ .symbol = 30, .bits = 6, .baseline = 0 },
.{ .symbol = 0, .bits = 4, .baseline = 48 },
.{ .symbol = 1, .bits = 4, .baseline = 16 },
.{ .symbol = 2, .bits = 5, .baseline = 32 },
.{ .symbol = 3, .bits = 5, .baseline = 32 },
.{ .symbol = 5, .bits = 5, .baseline = 32 },
.{ .symbol = 6, .bits = 5, .baseline = 32 },
.{ .symbol = 8, .bits = 5, .baseline = 32 },
.{ .symbol = 9, .bits = 5, .baseline = 32 },
.{ .symbol = 11, .bits = 5, .baseline = 32 },
.{ .symbol = 12, .bits = 5, .baseline = 32 },
.{ .symbol = 15, .bits = 6, .baseline = 0 },
.{ .symbol = 17, .bits = 5, .baseline = 32 },
.{ .symbol = 18, .bits = 5, .baseline = 32 },
.{ .symbol = 20, .bits = 5, .baseline = 32 },
.{ .symbol = 21, .bits = 5, .baseline = 32 },
.{ .symbol = 23, .bits = 5, .baseline = 32 },
.{ .symbol = 24, .bits = 5, .baseline = 32 },
.{ .symbol = 35, .bits = 6, .baseline = 0 },
.{ .symbol = 34, .bits = 6, .baseline = 0 },
.{ .symbol = 33, .bits = 6, .baseline = 0 },
.{ .symbol = 32, .bits = 6, .baseline = 0 },
},
};
pub const predefined_match: Table = .{
.fse = &[64]Table.Fse{
.{ .symbol = 0, .bits = 6, .baseline = 0 },
.{ .symbol = 1, .bits = 4, .baseline = 0 },
.{ .symbol = 2, .bits = 5, .baseline = 32 },
.{ .symbol = 3, .bits = 5, .baseline = 0 },
.{ .symbol = 5, .bits = 5, .baseline = 0 },
.{ .symbol = 6, .bits = 5, .baseline = 0 },
.{ .symbol = 8, .bits = 5, .baseline = 0 },
.{ .symbol = 10, .bits = 6, .baseline = 0 },
.{ .symbol = 13, .bits = 6, .baseline = 0 },
.{ .symbol = 16, .bits = 6, .baseline = 0 },
.{ .symbol = 19, .bits = 6, .baseline = 0 },
.{ .symbol = 22, .bits = 6, .baseline = 0 },
.{ .symbol = 25, .bits = 6, .baseline = 0 },
.{ .symbol = 28, .bits = 6, .baseline = 0 },
.{ .symbol = 31, .bits = 6, .baseline = 0 },
.{ .symbol = 33, .bits = 6, .baseline = 0 },
.{ .symbol = 35, .bits = 6, .baseline = 0 },
.{ .symbol = 37, .bits = 6, .baseline = 0 },
.{ .symbol = 39, .bits = 6, .baseline = 0 },
.{ .symbol = 41, .bits = 6, .baseline = 0 },
.{ .symbol = 43, .bits = 6, .baseline = 0 },
.{ .symbol = 45, .bits = 6, .baseline = 0 },
.{ .symbol = 1, .bits = 4, .baseline = 16 },
.{ .symbol = 2, .bits = 4, .baseline = 0 },
.{ .symbol = 3, .bits = 5, .baseline = 32 },
.{ .symbol = 4, .bits = 5, .baseline = 0 },
.{ .symbol = 6, .bits = 5, .baseline = 32 },
.{ .symbol = 7, .bits = 5, .baseline = 0 },
.{ .symbol = 9, .bits = 6, .baseline = 0 },
.{ .symbol = 12, .bits = 6, .baseline = 0 },
.{ .symbol = 15, .bits = 6, .baseline = 0 },
.{ .symbol = 18, .bits = 6, .baseline = 0 },
.{ .symbol = 21, .bits = 6, .baseline = 0 },
.{ .symbol = 24, .bits = 6, .baseline = 0 },
.{ .symbol = 27, .bits = 6, .baseline = 0 },
.{ .symbol = 30, .bits = 6, .baseline = 0 },
.{ .symbol = 32, .bits = 6, .baseline = 0 },
.{ .symbol = 34, .bits = 6, .baseline = 0 },
.{ .symbol = 36, .bits = 6, .baseline = 0 },
.{ .symbol = 38, .bits = 6, .baseline = 0 },
.{ .symbol = 40, .bits = 6, .baseline = 0 },
.{ .symbol = 42, .bits = 6, .baseline = 0 },
.{ .symbol = 44, .bits = 6, .baseline = 0 },
.{ .symbol = 1, .bits = 4, .baseline = 32 },
.{ .symbol = 1, .bits = 4, .baseline = 48 },
.{ .symbol = 2, .bits = 4, .baseline = 16 },
.{ .symbol = 4, .bits = 5, .baseline = 32 },
.{ .symbol = 5, .bits = 5, .baseline = 32 },
.{ .symbol = 7, .bits = 5, .baseline = 32 },
.{ .symbol = 8, .bits = 5, .baseline = 32 },
.{ .symbol = 11, .bits = 6, .baseline = 0 },
.{ .symbol = 14, .bits = 6, .baseline = 0 },
.{ .symbol = 17, .bits = 6, .baseline = 0 },
.{ .symbol = 20, .bits = 6, .baseline = 0 },
.{ .symbol = 23, .bits = 6, .baseline = 0 },
.{ .symbol = 26, .bits = 6, .baseline = 0 },
.{ .symbol = 29, .bits = 6, .baseline = 0 },
.{ .symbol = 52, .bits = 6, .baseline = 0 },
.{ .symbol = 51, .bits = 6, .baseline = 0 },
.{ .symbol = 50, .bits = 6, .baseline = 0 },
.{ .symbol = 49, .bits = 6, .baseline = 0 },
.{ .symbol = 48, .bits = 6, .baseline = 0 },
.{ .symbol = 47, .bits = 6, .baseline = 0 },
.{ .symbol = 46, .bits = 6, .baseline = 0 },
},
};
pub const predefined_offset: Table = .{
.fse = &[32]Table.Fse{
.{ .symbol = 0, .bits = 5, .baseline = 0 },
.{ .symbol = 6, .bits = 4, .baseline = 0 },
.{ .symbol = 9, .bits = 5, .baseline = 0 },
.{ .symbol = 15, .bits = 5, .baseline = 0 },
.{ .symbol = 21, .bits = 5, .baseline = 0 },
.{ .symbol = 3, .bits = 5, .baseline = 0 },
.{ .symbol = 7, .bits = 4, .baseline = 0 },
.{ .symbol = 12, .bits = 5, .baseline = 0 },
.{ .symbol = 18, .bits = 5, .baseline = 0 },
.{ .symbol = 23, .bits = 5, .baseline = 0 },
.{ .symbol = 5, .bits = 5, .baseline = 0 },
.{ .symbol = 8, .bits = 4, .baseline = 0 },
.{ .symbol = 14, .bits = 5, .baseline = 0 },
.{ .symbol = 20, .bits = 5, .baseline = 0 },
.{ .symbol = 2, .bits = 5, .baseline = 0 },
.{ .symbol = 7, .bits = 4, .baseline = 16 },
.{ .symbol = 11, .bits = 5, .baseline = 0 },
.{ .symbol = 17, .bits = 5, .baseline = 0 },
.{ .symbol = 22, .bits = 5, .baseline = 0 },
.{ .symbol = 4, .bits = 5, .baseline = 0 },
.{ .symbol = 8, .bits = 4, .baseline = 16 },
.{ .symbol = 13, .bits = 5, .baseline = 0 },
.{ .symbol = 19, .bits = 5, .baseline = 0 },
.{ .symbol = 1, .bits = 5, .baseline = 0 },
.{ .symbol = 6, .bits = 4, .baseline = 16 },
.{ .symbol = 10, .bits = 5, .baseline = 0 },
.{ .symbol = 16, .bits = 5, .baseline = 0 },
.{ .symbol = 28, .bits = 5, .baseline = 0 },
.{ .symbol = 27, .bits = 5, .baseline = 0 },
.{ .symbol = 26, .bits = 5, .baseline = 0 },
.{ .symbol = 25, .bits = 5, .baseline = 0 },
.{ .symbol = 24, .bits = 5, .baseline = 0 },
},
};
};
const low_bit_mask = [9]u8{
0b00000000,
0b00000001,
0b00000011,
0b00000111,
0b00001111,
0b00011111,
0b00111111,
0b01111111,
0b11111111,
};
fn Bits(comptime T: type) type {
return struct { T, u16 };
}
/// For reading the reversed bit streams used to encode FSE compressed data.
const ReverseBitReader = struct {
bytes: []const u8,
remaining: usize,
bits: u8,
count: u4,
fn init(bytes: []const u8) error{MissingStartBit}!ReverseBitReader {
var result: ReverseBitReader = .{
.bytes = bytes,
.remaining = bytes.len,
.bits = 0,
.count = 0,
};
if (bytes.len == 0) return result;
for (0..8) |_| if (0 != (result.readBitsNoEof(u1, 1) catch unreachable)) return result;
return error.MissingStartBit;
}
fn initBits(comptime T: type, out: anytype, num: u16) Bits(T) {
const UT = std.meta.Int(.unsigned, @bitSizeOf(T));
return .{
@bitCast(@as(UT, @intCast(out))),
num,
};
}
fn readBitsNoEof(self: *ReverseBitReader, comptime T: type, num: u16) error{EndOfStream}!T {
const b, const c = try self.readBitsTuple(T, num);
if (c < num) return error.EndOfStream;
return b;
}
fn readBits(self: *ReverseBitReader, comptime T: type, num: u16, out_bits: *u16) !T {
const b, const c = try self.readBitsTuple(T, num);
out_bits.* = c;
return b;
}
fn readBitsTuple(self: *ReverseBitReader, comptime T: type, num: u16) !Bits(T) {
const UT = std.meta.Int(.unsigned, @bitSizeOf(T));
const U = if (@bitSizeOf(T) < 8) u8 else UT;
if (num <= self.count) return initBits(T, self.removeBits(@intCast(num)), num);
var out_count: u16 = self.count;
var out: U = self.removeBits(self.count);
const full_bytes_left = (num - out_count) / 8;
for (0..full_bytes_left) |_| {
const byte = takeByte(self) catch |err| switch (err) {
error.EndOfStream => return initBits(T, out, out_count),
};
if (U == u8) out = 0 else out <<= 8;
out |= byte;
out_count += 8;
}
const bits_left = num - out_count;
const keep = 8 - bits_left;
if (bits_left == 0) return initBits(T, out, out_count);
const final_byte = takeByte(self) catch |err| switch (err) {
error.EndOfStream => return initBits(T, out, out_count),
};
out <<= @intCast(bits_left);
out |= final_byte >> @intCast(keep);
self.bits = final_byte & low_bit_mask[keep];
self.count = @intCast(keep);
return initBits(T, out, num);
}
fn takeByte(rbr: *ReverseBitReader) error{EndOfStream}!u8 {
if (rbr.remaining == 0) return error.EndOfStream;
rbr.remaining -= 1;
return rbr.bytes[rbr.remaining];
}
fn isEmpty(self: *const ReverseBitReader) bool {
return self.remaining == 0 and self.count == 0;
}
fn removeBits(self: *ReverseBitReader, num: u4) u8 {
if (num == 8) {
self.count = 0;
return self.bits;
}
const keep = self.count - num;
const bits = self.bits >> @intCast(keep);
self.bits &= low_bit_mask[keep];
self.count = keep;
return bits;
}
};
const BitReader = struct {
bytes: []const u8,
index: usize = 0,
bits: u8 = 0,
count: u4 = 0,
fn initBits(comptime T: type, out: anytype, num: u16) Bits(T) {
const UT = std.meta.Int(.unsigned, @bitSizeOf(T));
return .{
@bitCast(@as(UT, @intCast(out))),
num,
};
}
fn readBitsNoEof(self: *@This(), comptime T: type, num: u16) !T {
const b, const c = try self.readBitsTuple(T, num);
if (c < num) return error.EndOfStream;
return b;
}
fn readBits(self: *@This(), comptime T: type, num: u16, out_bits: *u16) !T {
const b, const c = try self.readBitsTuple(T, num);
out_bits.* = c;
return b;
}
fn readBitsTuple(self: *@This(), comptime T: type, num: u16) !Bits(T) {
const UT = std.meta.Int(.unsigned, @bitSizeOf(T));
const U = if (@bitSizeOf(T) < 8) u8 else UT;
if (num <= self.count) return initBits(T, self.removeBits(@intCast(num)), num);
var out_count: u16 = self.count;
var out: U = self.removeBits(self.count);
const full_bytes_left = (num - out_count) / 8;
for (0..full_bytes_left) |_| {
const byte = takeByte(self) catch |err| switch (err) {
error.EndOfStream => return initBits(T, out, out_count),
};
const pos = @as(U, byte) << @intCast(out_count);
out |= pos;
out_count += 8;
}
const bits_left = num - out_count;
const keep = 8 - bits_left;
if (bits_left == 0) return initBits(T, out, out_count);
const final_byte = takeByte(self) catch |err| switch (err) {
error.EndOfStream => return initBits(T, out, out_count),
};
const pos = @as(U, final_byte & low_bit_mask[bits_left]) << @intCast(out_count);
out |= pos;
self.bits = final_byte >> @intCast(bits_left);
self.count = @intCast(keep);
return initBits(T, out, num);
}
fn takeByte(br: *BitReader) error{EndOfStream}!u8 {
if (br.bytes.len - br.index == 0) return error.EndOfStream;
const result = br.bytes[br.index];
br.index += 1;
return result;
}
fn removeBits(self: *@This(), num: u4) u8 {
if (num == 8) {
self.count = 0;
return self.bits;
}
const keep = self.count - num;
const bits = self.bits & low_bit_mask[num];
self.bits >>= @intCast(num);
self.count = keep;
return bits;
}
fn alignToByte(self: *@This()) void {
self.bits = 0;
self.count = 0;
}
};
test {
_ = Table;
}
+10 -2
View File
@@ -54,12 +54,20 @@ pub const Md5 = struct {
};
}
pub fn hash(b: []const u8, out: *[digest_length]u8, options: Options) void {
pub fn hash(data: []const u8, out: *[digest_length]u8, options: Options) void {
var d = Md5.init(options);
d.update(b);
d.update(data);
d.final(out);
}
pub fn hashResult(data: []const u8) [digest_length]u8 {
var out: [digest_length]u8 = undefined;
var d = Md5.init(.{});
d.update(data);
d.final(&out);
return out;
}
pub fn update(d: *Self, b: []const u8) void {
var off: usize = 0;
+104 -173
View File
@@ -482,6 +482,7 @@ pub const Header = struct {
is_64: bool,
endian: std.builtin.Endian,
os_abi: OSABI,
/// The meaning of this value depends on `os_abi`.
abi_version: u8,
type: ET,
machine: EM,
@@ -494,205 +495,135 @@ pub const Header = struct {
shnum: u16,
shstrndx: u16,
pub fn program_header_iterator(self: Header, parse_source: anytype) ProgramHeaderIterator(@TypeOf(parse_source)) {
return ProgramHeaderIterator(@TypeOf(parse_source)){
.elf_header = self,
.parse_source = parse_source,
pub fn iterateProgramHeaders(h: Header, file_reader: *std.fs.File.Reader) ProgramHeaderIterator {
return .{
.elf_header = h,
.file_reader = file_reader,
};
}
pub fn section_header_iterator(self: Header, parse_source: anytype) SectionHeaderIterator(@TypeOf(parse_source)) {
return SectionHeaderIterator(@TypeOf(parse_source)){
.elf_header = self,
.parse_source = parse_source,
pub fn iterateSectionHeaders(h: Header, file_reader: *std.fs.File.Reader) SectionHeaderIterator {
return .{
.elf_header = h,
.file_reader = file_reader,
};
}
pub fn read(parse_source: anytype) !Header {
var hdr_buf: [@sizeOf(Elf64_Ehdr)]u8 align(@alignOf(Elf64_Ehdr)) = undefined;
try parse_source.seekableStream().seekTo(0);
try parse_source.deprecatedReader().readNoEof(&hdr_buf);
return Header.parse(&hdr_buf);
}
pub const ReadError = std.Io.Reader.Error || error{
InvalidElfMagic,
InvalidElfVersion,
InvalidElfClass,
InvalidElfEndian,
};
pub fn parse(hdr_buf: *align(@alignOf(Elf64_Ehdr)) const [@sizeOf(Elf64_Ehdr)]u8) !Header {
const hdr32 = @as(*const Elf32_Ehdr, @ptrCast(hdr_buf));
const hdr64 = @as(*const Elf64_Ehdr, @ptrCast(hdr_buf));
if (!mem.eql(u8, hdr32.e_ident[0..4], MAGIC)) return error.InvalidElfMagic;
if (hdr32.e_ident[EI_VERSION] != 1) return error.InvalidElfVersion;
pub fn read(r: *std.Io.Reader) ReadError!Header {
const buf = try r.peek(@sizeOf(Elf64_Ehdr));
const is_64 = switch (hdr32.e_ident[EI_CLASS]) {
ELFCLASS32 => false,
ELFCLASS64 => true,
else => return error.InvalidElfClass,
};
if (!mem.eql(u8, buf[0..4], MAGIC)) return error.InvalidElfMagic;
if (buf[EI_VERSION] != 1) return error.InvalidElfVersion;
const endian: std.builtin.Endian = switch (hdr32.e_ident[EI_DATA]) {
const endian: std.builtin.Endian = switch (buf[EI_DATA]) {
ELFDATA2LSB => .little,
ELFDATA2MSB => .big,
else => return error.InvalidElfEndian,
};
const need_bswap = endian != native_endian;
return switch (buf[EI_CLASS]) {
ELFCLASS32 => .init(try r.takeStruct(Elf32_Ehdr, endian), endian),
ELFCLASS64 => .init(try r.takeStruct(Elf64_Ehdr, endian), endian),
else => return error.InvalidElfClass,
};
}
pub fn init(hdr: anytype, endian: std.builtin.Endian) Header {
// Converting integers to exhaustive enums using `@enumFromInt` could cause a panic.
comptime assert(!@typeInfo(OSABI).@"enum".is_exhaustive);
const os_abi: OSABI = @enumFromInt(hdr32.e_ident[EI_OSABI]);
// The meaning of this value depends on `os_abi` so just make it available as `u8`.
const abi_version = hdr32.e_ident[EI_ABIVERSION];
const @"type" = if (need_bswap) blk: {
comptime assert(!@typeInfo(ET).@"enum".is_exhaustive);
const value = @intFromEnum(hdr32.e_type);
break :blk @as(ET, @enumFromInt(@byteSwap(value)));
} else hdr32.e_type;
const machine = if (need_bswap) blk: {
comptime assert(!@typeInfo(EM).@"enum".is_exhaustive);
const value = @intFromEnum(hdr32.e_machine);
break :blk @as(EM, @enumFromInt(@byteSwap(value)));
} else hdr32.e_machine;
return @as(Header, .{
.is_64 = is_64,
return .{
.is_64 = switch (@TypeOf(hdr)) {
Elf32_Ehdr => false,
Elf64_Ehdr => true,
else => @compileError("bad type"),
},
.endian = endian,
.os_abi = os_abi,
.abi_version = abi_version,
.type = @"type",
.machine = machine,
.entry = int(is_64, need_bswap, hdr32.e_entry, hdr64.e_entry),
.phoff = int(is_64, need_bswap, hdr32.e_phoff, hdr64.e_phoff),
.shoff = int(is_64, need_bswap, hdr32.e_shoff, hdr64.e_shoff),
.phentsize = int(is_64, need_bswap, hdr32.e_phentsize, hdr64.e_phentsize),
.phnum = int(is_64, need_bswap, hdr32.e_phnum, hdr64.e_phnum),
.shentsize = int(is_64, need_bswap, hdr32.e_shentsize, hdr64.e_shentsize),
.shnum = int(is_64, need_bswap, hdr32.e_shnum, hdr64.e_shnum),
.shstrndx = int(is_64, need_bswap, hdr32.e_shstrndx, hdr64.e_shstrndx),
});
.os_abi = @enumFromInt(hdr.e_ident[EI_OSABI]),
.abi_version = hdr.e_ident[EI_ABIVERSION],
.type = hdr.e_type,
.machine = hdr.e_machine,
.entry = hdr.e_entry,
.phoff = hdr.e_phoff,
.shoff = hdr.e_shoff,
.phentsize = hdr.e_phentsize,
.phnum = hdr.e_phnum,
.shentsize = hdr.e_shentsize,
.shnum = hdr.e_shnum,
.shstrndx = hdr.e_shstrndx,
};
}
};
pub fn ProgramHeaderIterator(comptime ParseSource: anytype) type {
return struct {
elf_header: Header,
parse_source: ParseSource,
index: usize = 0,
pub const ProgramHeaderIterator = struct {
elf_header: Header,
file_reader: *std.fs.File.Reader,
index: usize = 0,
pub fn next(self: *@This()) !?Elf64_Phdr {
if (self.index >= self.elf_header.phnum) return null;
defer self.index += 1;
pub fn next(it: *ProgramHeaderIterator) !?Elf64_Phdr {
if (it.index >= it.elf_header.phnum) return null;
defer it.index += 1;
if (self.elf_header.is_64) {
var phdr: Elf64_Phdr = undefined;
const offset = self.elf_header.phoff + @sizeOf(@TypeOf(phdr)) * self.index;
try self.parse_source.seekableStream().seekTo(offset);
try self.parse_source.deprecatedReader().readNoEof(mem.asBytes(&phdr));
// ELF endianness matches native endianness.
if (self.elf_header.endian == native_endian) return phdr;
// Convert fields to native endianness.
mem.byteSwapAllFields(Elf64_Phdr, &phdr);
return phdr;
}
var phdr: Elf32_Phdr = undefined;
const offset = self.elf_header.phoff + @sizeOf(@TypeOf(phdr)) * self.index;
try self.parse_source.seekableStream().seekTo(offset);
try self.parse_source.deprecatedReader().readNoEof(mem.asBytes(&phdr));
// ELF endianness does NOT match native endianness.
if (self.elf_header.endian != native_endian) {
// Convert fields to native endianness.
mem.byteSwapAllFields(Elf32_Phdr, &phdr);
}
// Convert 32-bit header to 64-bit.
return Elf64_Phdr{
.p_type = phdr.p_type,
.p_offset = phdr.p_offset,
.p_vaddr = phdr.p_vaddr,
.p_paddr = phdr.p_paddr,
.p_filesz = phdr.p_filesz,
.p_memsz = phdr.p_memsz,
.p_flags = phdr.p_flags,
.p_align = phdr.p_align,
};
if (it.elf_header.is_64) {
const offset = it.elf_header.phoff + @sizeOf(Elf64_Phdr) * it.index;
try it.file_reader.seekTo(offset);
const phdr = try it.file_reader.interface.takeStruct(Elf64_Phdr, it.elf_header.endian);
return phdr;
}
};
}
pub fn SectionHeaderIterator(comptime ParseSource: anytype) type {
return struct {
elf_header: Header,
parse_source: ParseSource,
index: usize = 0,
pub fn next(self: *@This()) !?Elf64_Shdr {
if (self.index >= self.elf_header.shnum) return null;
defer self.index += 1;
if (self.elf_header.is_64) {
var shdr: Elf64_Shdr = undefined;
const offset = self.elf_header.shoff + @sizeOf(@TypeOf(shdr)) * self.index;
try self.parse_source.seekableStream().seekTo(offset);
try self.parse_source.deprecatedReader().readNoEof(mem.asBytes(&shdr));
// ELF endianness matches native endianness.
if (self.elf_header.endian == native_endian) return shdr;
// Convert fields to native endianness.
mem.byteSwapAllFields(Elf64_Shdr, &shdr);
return shdr;
}
var shdr: Elf32_Shdr = undefined;
const offset = self.elf_header.shoff + @sizeOf(@TypeOf(shdr)) * self.index;
try self.parse_source.seekableStream().seekTo(offset);
try self.parse_source.deprecatedReader().readNoEof(mem.asBytes(&shdr));
// ELF endianness does NOT match native endianness.
if (self.elf_header.endian != native_endian) {
// Convert fields to native endianness.
mem.byteSwapAllFields(Elf32_Shdr, &shdr);
}
// Convert 32-bit header to 64-bit.
return Elf64_Shdr{
.sh_name = shdr.sh_name,
.sh_type = shdr.sh_type,
.sh_flags = shdr.sh_flags,
.sh_addr = shdr.sh_addr,
.sh_offset = shdr.sh_offset,
.sh_size = shdr.sh_size,
.sh_link = shdr.sh_link,
.sh_info = shdr.sh_info,
.sh_addralign = shdr.sh_addralign,
.sh_entsize = shdr.sh_entsize,
};
}
};
}
fn int(is_64: bool, need_bswap: bool, int_32: anytype, int_64: anytype) @TypeOf(int_64) {
if (is_64) {
if (need_bswap) {
return @byteSwap(int_64);
} else {
return int_64;
}
} else {
return int32(need_bswap, int_32, @TypeOf(int_64));
const offset = it.elf_header.phoff + @sizeOf(Elf32_Phdr) * it.index;
try it.file_reader.seekTo(offset);
const phdr = try it.file_reader.interface.takeStruct(Elf32_Phdr, it.elf_header.endian);
return .{
.p_type = phdr.p_type,
.p_offset = phdr.p_offset,
.p_vaddr = phdr.p_vaddr,
.p_paddr = phdr.p_paddr,
.p_filesz = phdr.p_filesz,
.p_memsz = phdr.p_memsz,
.p_flags = phdr.p_flags,
.p_align = phdr.p_align,
};
}
}
};
fn int32(need_bswap: bool, int_32: anytype, comptime Int64: anytype) Int64 {
if (need_bswap) {
return @byteSwap(int_32);
} else {
return int_32;
pub const SectionHeaderIterator = struct {
elf_header: Header,
file_reader: *std.fs.File.Reader,
index: usize = 0,
pub fn next(it: *SectionHeaderIterator) !?Elf64_Shdr {
if (it.index >= it.elf_header.shnum) return null;
defer it.index += 1;
if (it.elf_header.is_64) {
try it.file_reader.seekTo(it.elf_header.shoff + @sizeOf(Elf64_Shdr) * it.index);
const shdr = try it.file_reader.interface.takeStruct(Elf64_Shdr, it.elf_header.endian);
return shdr;
}
try it.file_reader.seekTo(it.elf_header.shoff + @sizeOf(Elf32_Shdr) * it.index);
const shdr = try it.file_reader.interface.takeStruct(Elf32_Shdr, it.elf_header.endian);
return .{
.sh_name = shdr.sh_name,
.sh_type = shdr.sh_type,
.sh_flags = shdr.sh_flags,
.sh_addr = shdr.sh_addr,
.sh_offset = shdr.sh_offset,
.sh_size = shdr.sh_size,
.sh_link = shdr.sh_link,
.sh_info = shdr.sh_info,
.sh_addralign = shdr.sh_addralign,
.sh_entsize = shdr.sh_entsize,
};
}
}
};
pub const ELFCLASSNONE = 0;
pub const ELFCLASS32 = 1;
@@ -2070,7 +2001,7 @@ pub const R_AARCH64 = enum(u32) {
TLSLE_LDST64_TPREL_LO12 = 558,
/// Likewise; no check.
TLSLE_LDST64_TPREL_LO12_NC = 559,
/// PC-rel. load immediate 20:2.
/// PC-rel. load immediate 20:2.
TLSDESC_LD_PREL19 = 560,
/// PC-rel. ADR immediate 20:0.
TLSDESC_ADR_PREL21 = 561,
+52 -46
View File
@@ -1,6 +1,13 @@
file: File,
// TODO either replace this with rand_buf or use []u16 on Windows
tmp_path_buf: [tmp_path_len:0]u8,
const AtomicFile = @This();
const std = @import("../std.zig");
const File = std.fs.File;
const Dir = std.fs.Dir;
const fs = std.fs;
const assert = std.debug.assert;
const posix = std.posix;
file_writer: File.Writer,
random_integer: u64,
dest_basename: []const u8,
file_open: bool,
file_exists: bool,
@@ -9,35 +16,24 @@ dir: Dir,
pub const InitError = File.OpenError;
pub const random_bytes_len = 12;
const tmp_path_len = fs.base64_encoder.calcSize(random_bytes_len);
/// Note that the `Dir.atomicFile` API may be more handy than this lower-level function.
pub fn init(
dest_basename: []const u8,
mode: File.Mode,
dir: Dir,
close_dir_on_deinit: bool,
write_buffer: []u8,
) InitError!AtomicFile {
var rand_buf: [random_bytes_len]u8 = undefined;
var tmp_path_buf: [tmp_path_len:0]u8 = undefined;
while (true) {
std.crypto.random.bytes(rand_buf[0..]);
const tmp_path = fs.base64_encoder.encode(&tmp_path_buf, &rand_buf);
tmp_path_buf[tmp_path.len] = 0;
const file = dir.createFile(
tmp_path,
.{ .mode = mode, .exclusive = true },
) catch |err| switch (err) {
const random_integer = std.crypto.random.int(u64);
const tmp_sub_path = std.fmt.hex(random_integer);
const file = dir.createFile(&tmp_sub_path, .{ .mode = mode, .exclusive = true }) catch |err| switch (err) {
error.PathAlreadyExists => continue,
else => |e| return e,
};
return AtomicFile{
.file = file,
.tmp_path_buf = tmp_path_buf,
return .{
.file_writer = file.writer(write_buffer),
.random_integer = random_integer,
.dest_basename = dest_basename,
.file_open = true,
.file_exists = true,
@@ -48,41 +44,51 @@ pub fn init(
}
/// Always call deinit, even after a successful finish().
pub fn deinit(self: *AtomicFile) void {
if (self.file_open) {
self.file.close();
self.file_open = false;
pub fn deinit(af: *AtomicFile) void {
if (af.file_open) {
af.file_writer.file.close();
af.file_open = false;
}
if (self.file_exists) {
self.dir.deleteFile(&self.tmp_path_buf) catch {};
self.file_exists = false;
if (af.file_exists) {
const tmp_sub_path = std.fmt.hex(af.random_integer);
af.dir.deleteFile(&tmp_sub_path) catch {};
af.file_exists = false;
}
if (self.close_dir_on_deinit) {
self.dir.close();
if (af.close_dir_on_deinit) {
af.dir.close();
}
self.* = undefined;
af.* = undefined;
}
pub const FinishError = posix.RenameError;
pub const FlushError = File.WriteError;
pub fn flush(af: *AtomicFile) FlushError!void {
af.file_writer.interface.flush() catch |err| switch (err) {
error.WriteFailed => return af.file_writer.err.?,
};
}
pub const RenameIntoPlaceError = posix.RenameError;
/// On Windows, this function introduces a period of time where some file
/// system operations on the destination file will result in
/// `error.AccessDenied`, including rename operations (such as the one used in
/// this function).
pub fn finish(self: *AtomicFile) FinishError!void {
assert(self.file_exists);
if (self.file_open) {
self.file.close();
self.file_open = false;
pub fn renameIntoPlace(af: *AtomicFile) RenameIntoPlaceError!void {
assert(af.file_exists);
if (af.file_open) {
af.file_writer.file.close();
af.file_open = false;
}
try posix.renameat(self.dir.fd, self.tmp_path_buf[0..], self.dir.fd, self.dest_basename);
self.file_exists = false;
const tmp_sub_path = std.fmt.hex(af.random_integer);
try posix.renameat(af.dir.fd, &tmp_sub_path, af.dir.fd, af.dest_basename);
af.file_exists = false;
}
const AtomicFile = @This();
const std = @import("../std.zig");
const File = std.fs.File;
const Dir = std.fs.Dir;
const fs = std.fs;
const assert = std.debug.assert;
const posix = std.posix;
pub const FinishError = FlushError || RenameIntoPlaceError;
/// Combination of `flush` followed by `renameIntoPlace`.
pub fn finish(af: *AtomicFile) FinishError!void {
try af.flush();
try af.renameIntoPlace();
}
+71 -109
View File
@@ -1,3 +1,20 @@
const Dir = @This();
const builtin = @import("builtin");
const std = @import("../std.zig");
const File = std.fs.File;
const AtomicFile = std.fs.AtomicFile;
const base64_encoder = fs.base64_encoder;
const posix = std.posix;
const mem = std.mem;
const path = fs.path;
const fs = std.fs;
const Allocator = std.mem.Allocator;
const assert = std.debug.assert;
const linux = std.os.linux;
const windows = std.os.windows;
const native_os = builtin.os.tag;
const have_flock = @TypeOf(posix.system.flock) != void;
fd: Handle,
pub const Handle = posix.fd_t;
@@ -1862,9 +1879,10 @@ pub fn symLinkW(
/// Same as `symLink`, except tries to create the symbolic link until it
/// succeeds or encounters an error other than `error.PathAlreadyExists`.
/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, both paths should be encoded as valid UTF-8.
/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.
///
/// * On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// * On WASI, both paths should be encoded as valid UTF-8.
/// * On other platforms, both paths are an opaque sequence of bytes with no particular encoding.
pub fn atomicSymLink(
dir: Dir,
target_path: []const u8,
@@ -1880,9 +1898,8 @@ pub fn atomicSymLink(
const dirname = path.dirname(sym_link_path) orelse ".";
var rand_buf: [AtomicFile.random_bytes_len]u8 = undefined;
const temp_path_len = dirname.len + 1 + base64_encoder.calcSize(rand_buf.len);
const rand_len = @sizeOf(u64) * 2;
const temp_path_len = dirname.len + 1 + rand_len;
var temp_path_buf: [fs.max_path_bytes]u8 = undefined;
if (temp_path_len > temp_path_buf.len) return error.NameTooLong;
@@ -1892,8 +1909,8 @@ pub fn atomicSymLink(
const temp_path = temp_path_buf[0..temp_path_len];
while (true) {
crypto.random.bytes(rand_buf[0..]);
_ = base64_encoder.encode(temp_path[dirname.len + 1 ..], rand_buf[0..]);
const random_integer = std.crypto.random.int(u64);
temp_path[dirname.len + 1 ..][0..rand_len].* = std.fmt.hex(random_integer);
if (dir.symLink(target_path, temp_path, flags)) {
return dir.rename(temp_path, sym_link_path);
@@ -2552,25 +2569,42 @@ pub fn updateFile(
try dest_dir.makePath(dirname);
}
var atomic_file = try dest_dir.atomicFile(dest_path, .{ .mode = actual_mode });
var buffer: [1000]u8 = undefined; // Used only when direct fd-to-fd is not available.
var atomic_file = try dest_dir.atomicFile(dest_path, .{
.mode = actual_mode,
.write_buffer = &buffer,
});
defer atomic_file.deinit();
try atomic_file.file.writeFileAll(src_file, .{ .in_len = src_stat.size });
try atomic_file.file.updateTimes(src_stat.atime, src_stat.mtime);
var src_reader: File.Reader = .initSize(src_file, &.{}, src_stat.size);
const dest_writer = &atomic_file.file_writer.interface;
_ = dest_writer.sendFileAll(&src_reader, .unlimited) catch |err| switch (err) {
error.ReadFailed => return src_reader.err.?,
error.WriteFailed => return atomic_file.file_writer.err.?,
};
try atomic_file.file_writer.file.updateTimes(src_stat.atime, src_stat.mtime);
try atomic_file.finish();
return PrevStatus.stale;
return .stale;
}
pub const CopyFileError = File.OpenError || File.StatError ||
AtomicFile.InitError || CopyFileRawError || AtomicFile.FinishError;
AtomicFile.InitError || AtomicFile.FinishError ||
File.ReadError || File.WriteError;
/// Guaranteed to be atomic.
/// On Linux, until https://patchwork.kernel.org/patch/9636735/ is merged and readily available,
/// there is a possibility of power loss or application termination leaving temporary files present
/// in the same directory as dest_path.
/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, both paths should be encoded as valid UTF-8.
/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.
/// Atomically creates a new file at `dest_path` within `dest_dir` with the
/// same contents as `source_path` within `source_dir`, overwriting any already
/// existing file.
///
/// On Linux, until https://patchwork.kernel.org/patch/9636735/ is merged and
/// readily available, there is a possibility of power loss or application
/// termination leaving temporary files present in the same directory as
/// dest_path.
///
/// On Windows, both paths should be encoded as
/// [WTF-8](https://simonsapin.github.io/wtf-8/). On WASI, both paths should be
/// encoded as valid UTF-8. On other platforms, both paths are an opaque
/// sequence of bytes with no particular encoding.
pub fn copyFile(
source_dir: Dir,
source_path: []const u8,
@@ -2578,79 +2612,34 @@ pub fn copyFile(
dest_path: []const u8,
options: CopyFileOptions,
) CopyFileError!void {
var in_file = try source_dir.openFile(source_path, .{});
defer in_file.close();
var file_reader: File.Reader = .init(try source_dir.openFile(source_path, .{}), &.{});
defer file_reader.file.close();
var size: ?u64 = null;
const mode = options.override_mode orelse blk: {
const st = try in_file.stat();
size = st.size;
const st = try file_reader.file.stat();
file_reader.size = st.size;
break :blk st.mode;
};
var atomic_file = try dest_dir.atomicFile(dest_path, .{ .mode = mode });
var buffer: [1024]u8 = undefined; // Used only when direct fd-to-fd is not available.
var atomic_file = try dest_dir.atomicFile(dest_path, .{
.mode = mode,
.write_buffer = &buffer,
});
defer atomic_file.deinit();
try copy_file(in_file.handle, atomic_file.file.handle, size);
_ = atomic_file.file_writer.interface.sendFileAll(&file_reader, .unlimited) catch |err| switch (err) {
error.ReadFailed => return file_reader.err.?,
error.WriteFailed => return atomic_file.file_writer.err.?,
};
try atomic_file.finish();
}
const CopyFileRawError = error{SystemResources} || posix.CopyFileRangeError || posix.SendFileError;
// Transfer all the data between two file descriptors in the most efficient way.
// The copy starts at offset 0, the initial offsets are preserved.
// No metadata is transferred over.
fn copy_file(fd_in: posix.fd_t, fd_out: posix.fd_t, maybe_size: ?u64) CopyFileRawError!void {
if (builtin.target.os.tag.isDarwin()) {
const rc = posix.system.fcopyfile(fd_in, fd_out, null, .{ .DATA = true });
switch (posix.errno(rc)) {
.SUCCESS => return,
.INVAL => unreachable,
.NOMEM => return error.SystemResources,
// The source file is not a directory, symbolic link, or regular file.
// Try with the fallback path before giving up.
.OPNOTSUPP => {},
else => |err| return posix.unexpectedErrno(err),
}
}
if (native_os == .linux) {
// Try copy_file_range first as that works at the FS level and is the
// most efficient method (if available).
var offset: u64 = 0;
cfr_loop: while (true) {
// The kernel checks the u64 value `offset+count` for overflow, use
// a 32 bit value so that the syscall won't return EINVAL except for
// impossibly large files (> 2^64-1 - 2^32-1).
const amt = try posix.copy_file_range(fd_in, offset, fd_out, offset, std.math.maxInt(u32), 0);
// Terminate as soon as we have copied size bytes or no bytes
if (maybe_size) |s| {
if (s == amt) break :cfr_loop;
}
if (amt == 0) break :cfr_loop;
offset += amt;
}
return;
}
// Sendfile is a zero-copy mechanism iff the OS supports it, otherwise the
// fallback code will copy the contents chunk by chunk.
const empty_iovec = [0]posix.iovec_const{};
var offset: u64 = 0;
sendfile_loop: while (true) {
const amt = try posix.sendfile(fd_out, fd_in, offset, 0, &empty_iovec, &empty_iovec, 0);
// Terminate as soon as we have copied size bytes or no bytes
if (maybe_size) |s| {
if (s == amt) break :sendfile_loop;
}
if (amt == 0) break :sendfile_loop;
offset += amt;
}
}
pub const AtomicFileOptions = struct {
mode: File.Mode = File.default_mode,
make_path: bool = false,
write_buffer: []u8,
};
/// Directly access the `.file` field, and then call `AtomicFile.finish` to
@@ -2668,9 +2657,9 @@ pub fn atomicFile(self: Dir, dest_path: []const u8, options: AtomicFileOptions)
else
try self.openDir(dirname, .{});
return AtomicFile.init(fs.path.basename(dest_path), options.mode, dir, true);
return .init(fs.path.basename(dest_path), options.mode, dir, true, options.write_buffer);
} else {
return AtomicFile.init(dest_path, options.mode, self, false);
return .init(dest_path, options.mode, self, false, options.write_buffer);
}
}
@@ -2768,30 +2757,3 @@ pub fn setPermissions(self: Dir, permissions: Permissions) SetPermissionsError!v
const file: File = .{ .handle = self.fd };
try file.setPermissions(permissions);
}
const Metadata = File.Metadata;
pub const MetadataError = File.MetadataError;
/// Returns a `Metadata` struct, representing the permissions on the directory
pub fn metadata(self: Dir) MetadataError!Metadata {
const file: File = .{ .handle = self.fd };
return try file.metadata();
}
const Dir = @This();
const builtin = @import("builtin");
const std = @import("../std.zig");
const File = std.fs.File;
const AtomicFile = std.fs.AtomicFile;
const base64_encoder = fs.base64_encoder;
const crypto = std.crypto;
const posix = std.posix;
const mem = std.mem;
const path = fs.path;
const fs = std.fs;
const Allocator = std.mem.Allocator;
const assert = std.debug.assert;
const linux = std.os.linux;
const windows = std.os.windows;
const native_os = builtin.os.tag;
const have_flock = @TypeOf(posix.system.flock) != void;
+188 -129
View File
@@ -1089,113 +1089,6 @@ pub fn copyRangeAll(in: File, in_offset: u64, out: File, out_offset: u64, len: u
return total_bytes_copied;
}
/// Deprecated in favor of `Writer`.
pub const WriteFileOptions = struct {
in_offset: u64 = 0,
in_len: ?u64 = null,
headers_and_trailers: []posix.iovec_const = &[0]posix.iovec_const{},
header_count: usize = 0,
};
/// Deprecated in favor of `Writer`.
pub const WriteFileError = ReadError || error{EndOfStream} || WriteError;
/// Deprecated in favor of `Writer`.
pub fn writeFileAll(self: File, in_file: File, args: WriteFileOptions) WriteFileError!void {
return self.writeFileAllSendfile(in_file, args) catch |err| switch (err) {
error.Unseekable,
error.FastOpenAlreadyInProgress,
error.MessageTooBig,
error.FileDescriptorNotASocket,
error.NetworkUnreachable,
error.NetworkSubsystemFailed,
error.ConnectionRefused,
=> return self.writeFileAllUnseekable(in_file, args),
else => |e| return e,
};
}
/// Deprecated in favor of `Writer`.
pub fn writeFileAllUnseekable(self: File, in_file: File, args: WriteFileOptions) WriteFileError!void {
const headers = args.headers_and_trailers[0..args.header_count];
const trailers = args.headers_and_trailers[args.header_count..];
try self.writevAll(headers);
try in_file.deprecatedReader().skipBytes(args.in_offset, .{ .buf_size = 4096 });
var fifo = std.fifo.LinearFifo(u8, .{ .Static = 4096 }).init();
if (args.in_len) |len| {
var stream = std.io.limitedReader(in_file.deprecatedReader(), len);
try fifo.pump(stream.reader(), self.deprecatedWriter());
} else {
try fifo.pump(in_file.deprecatedReader(), self.deprecatedWriter());
}
try self.writevAll(trailers);
}
/// Deprecated in favor of `Writer`.
fn writeFileAllSendfile(self: File, in_file: File, args: WriteFileOptions) posix.SendFileError!void {
const count = blk: {
if (args.in_len) |l| {
if (l == 0) {
return self.writevAll(args.headers_and_trailers);
} else {
break :blk l;
}
} else {
break :blk 0;
}
};
const headers = args.headers_and_trailers[0..args.header_count];
const trailers = args.headers_and_trailers[args.header_count..];
const zero_iovec = &[0]posix.iovec_const{};
// When reading the whole file, we cannot put the trailers in the sendfile() syscall,
// because we have no way to determine whether a partial write is past the end of the file or not.
const trls = if (count == 0) zero_iovec else trailers;
const offset = args.in_offset;
const out_fd = self.handle;
const in_fd = in_file.handle;
const flags = 0;
var amt: usize = 0;
hdrs: {
var i: usize = 0;
while (i < headers.len) {
amt = try posix.sendfile(out_fd, in_fd, offset, count, headers[i..], trls, flags);
while (amt >= headers[i].len) {
amt -= headers[i].len;
i += 1;
if (i >= headers.len) break :hdrs;
}
headers[i].base += amt;
headers[i].len -= amt;
}
}
if (count == 0) {
var off: u64 = amt;
while (true) {
amt = try posix.sendfile(out_fd, in_fd, offset + off, 0, zero_iovec, zero_iovec, flags);
if (amt == 0) break;
off += amt;
}
} else {
var off: u64 = amt;
while (off < count) {
amt = try posix.sendfile(out_fd, in_fd, offset + off, count - off, zero_iovec, trailers, flags);
off += amt;
}
amt = @as(usize, @intCast(off - count));
}
var i: usize = 0;
while (i < trailers.len) {
while (amt >= trailers[i].len) {
amt -= trailers[i].len;
i += 1;
if (i >= trailers.len) return;
}
trailers[i].base += amt;
trailers[i].len -= amt;
amt = try posix.writev(self.handle, trailers[i..]);
}
}
/// Deprecated in favor of `Reader`.
pub const DeprecatedReader = io.GenericReader(File, ReadError, read);
@@ -1242,7 +1135,7 @@ pub const Reader = struct {
err: ?ReadError = null,
mode: Reader.Mode = .positional,
/// Tracks the true seek position in the file. To obtain the logical
/// position, subtract the buffer size from this value.
/// position, use `logicalPos`.
pos: u64 = 0,
size: ?u64 = null,
size_err: ?GetEndPosError = null,
@@ -1335,14 +1228,12 @@ pub const Reader = struct {
pub fn seekBy(r: *Reader, offset: i64) Reader.SeekError!void {
switch (r.mode) {
.positional, .positional_reading => {
// TODO: make += operator allow any integer types
r.pos = @intCast(@as(i64, @intCast(r.pos)) + offset);
setPosAdjustingBuffer(r, @intCast(@as(i64, @intCast(r.pos)) + offset));
},
.streaming, .streaming_reading => {
const seek_err = r.seek_err orelse e: {
if (posix.lseek_CUR(r.file.handle, offset)) |_| {
// TODO: make += operator allow any integer types
r.pos = @intCast(@as(i64, @intCast(r.pos)) + offset);
setPosAdjustingBuffer(r, @intCast(@as(i64, @intCast(r.pos)) + offset));
return;
} else |err| {
r.seek_err = err;
@@ -1358,6 +1249,8 @@ pub const Reader = struct {
r.pos += n;
remaining -= n;
}
r.interface.seek = 0;
r.interface.end = 0;
},
.failure => return r.seek_err.?,
}
@@ -1366,7 +1259,7 @@ pub const Reader = struct {
pub fn seekTo(r: *Reader, offset: u64) Reader.SeekError!void {
switch (r.mode) {
.positional, .positional_reading => {
r.pos = offset;
setPosAdjustingBuffer(r, offset);
},
.streaming, .streaming_reading => {
if (offset >= r.pos) return Reader.seekBy(r, @intCast(offset - r.pos));
@@ -1375,12 +1268,28 @@ pub const Reader = struct {
r.seek_err = err;
return err;
};
r.pos = offset;
setPosAdjustingBuffer(r, offset);
},
.failure => return r.seek_err.?,
}
}
pub fn logicalPos(r: *const Reader) u64 {
return r.pos - r.interface.bufferedLen();
}
fn setPosAdjustingBuffer(r: *Reader, offset: u64) void {
const logical_pos = logicalPos(r);
if (offset < logical_pos or offset >= r.pos) {
r.interface.seek = 0;
r.interface.end = 0;
r.pos = offset;
} else {
const logical_delta: usize = @intCast(offset - logical_pos);
r.interface.seek += logical_delta;
}
}
/// Number of slices to store on the stack, when trying to send as many byte
/// vectors through the underlying read calls as possible.
const max_buffers_len = 16;
@@ -1526,7 +1435,7 @@ pub const Reader = struct {
}
return 0;
};
const n = @min(size - pos, std.math.maxInt(i64), @intFromEnum(limit));
const n = @min(size - pos, maxInt(i64), @intFromEnum(limit));
file.seekBy(n) catch |err| {
r.seek_err = err;
return 0;
@@ -1645,7 +1554,10 @@ pub const Writer = struct {
return .{
.vtable = &.{
.drain = drain,
.sendFile = sendFile,
.sendFile = switch (builtin.zig_backend) {
else => sendFile,
.stage2_aarch64 => std.io.Writer.unimplementedSendFile,
},
},
.buffer = buffer,
};
@@ -1715,7 +1627,6 @@ pub const Writer = struct {
const pattern = data[data.len - 1];
if (pattern.len == 0 or splat == 0) return 0;
const n = windows.WriteFile(handle, pattern, null) catch |err| {
std.debug.print("windows write file failed3: {t}\n", .{err});
w.err = err;
return error.WriteFailed;
};
@@ -1817,18 +1728,141 @@ pub const Writer = struct {
file_reader: *Reader,
limit: std.io.Limit,
) std.io.Writer.FileError!usize {
const reader_buffered = file_reader.interface.buffered();
if (reader_buffered.len >= @intFromEnum(limit))
return sendFileBuffered(io_w, file_reader, reader_buffered);
const writer_buffered = io_w.buffered();
const file_limit = @intFromEnum(limit) - reader_buffered.len;
const w: *Writer = @alignCast(@fieldParentPtr("interface", io_w));
const out_fd = w.file.handle;
const in_fd = file_reader.file.handle;
// TODO try using copy_file_range on FreeBSD
// TODO try using sendfile on macOS
// TODO try using sendfile on FreeBSD
if (file_reader.size) |size| {
if (size - file_reader.pos == 0) {
if (reader_buffered.len != 0) {
return sendFileBuffered(io_w, file_reader, reader_buffered);
} else {
return error.EndOfStream;
}
}
}
if (native_os == .freebsd and w.mode == .streaming) sf: {
// Try using sendfile on FreeBSD.
if (w.sendfile_err != null) break :sf;
const offset = std.math.cast(std.c.off_t, file_reader.pos) orelse break :sf;
var hdtr_data: std.c.sf_hdtr = undefined;
var headers: [2]posix.iovec_const = undefined;
var headers_i: u8 = 0;
if (writer_buffered.len != 0) {
headers[headers_i] = .{ .base = writer_buffered.ptr, .len = writer_buffered.len };
headers_i += 1;
}
if (reader_buffered.len != 0) {
headers[headers_i] = .{ .base = reader_buffered.ptr, .len = reader_buffered.len };
headers_i += 1;
}
const hdtr: ?*std.c.sf_hdtr = if (headers_i == 0) null else b: {
hdtr_data = .{
.headers = &headers,
.hdr_cnt = headers_i,
.trailers = null,
.trl_cnt = 0,
};
break :b &hdtr_data;
};
var sbytes: std.c.off_t = undefined;
const nbytes: usize = @min(file_limit, maxInt(usize));
const flags = 0;
switch (posix.errno(std.c.sendfile(in_fd, out_fd, offset, nbytes, hdtr, &sbytes, flags))) {
.SUCCESS, .INTR => {},
.INVAL, .OPNOTSUPP, .NOTSOCK, .NOSYS => w.sendfile_err = error.UnsupportedOperation,
.BADF => if (builtin.mode == .Debug) @panic("race condition") else {
w.sendfile_err = error.Unexpected;
},
.FAULT => if (builtin.mode == .Debug) @panic("segmentation fault") else {
w.sendfile_err = error.Unexpected;
},
.NOTCONN => w.sendfile_err = error.BrokenPipe,
.AGAIN, .BUSY => if (sbytes == 0) {
w.sendfile_err = error.WouldBlock;
},
.IO => w.sendfile_err = error.InputOutput,
.PIPE => w.sendfile_err = error.BrokenPipe,
.NOBUFS => w.sendfile_err = error.SystemResources,
else => |err| w.sendfile_err = posix.unexpectedErrno(err),
}
if (sbytes == 0) {
file_reader.size = file_reader.pos;
return error.EndOfStream;
}
const consumed = io_w.consume(@intCast(sbytes));
file_reader.seekTo(file_reader.pos + consumed) catch return error.ReadFailed;
return consumed;
}
if (native_os.isDarwin() and w.mode == .streaming) sf: {
// Try using sendfile on macOS.
if (w.sendfile_err != null) break :sf;
const offset = std.math.cast(std.c.off_t, file_reader.pos) orelse break :sf;
var hdtr_data: std.c.sf_hdtr = undefined;
var headers: [2]posix.iovec_const = undefined;
var headers_i: u8 = 0;
if (writer_buffered.len != 0) {
headers[headers_i] = .{ .base = writer_buffered.ptr, .len = writer_buffered.len };
headers_i += 1;
}
if (reader_buffered.len != 0) {
headers[headers_i] = .{ .base = reader_buffered.ptr, .len = reader_buffered.len };
headers_i += 1;
}
const hdtr: ?*std.c.sf_hdtr = if (headers_i == 0) null else b: {
hdtr_data = .{
.headers = &headers,
.hdr_cnt = headers_i,
.trailers = null,
.trl_cnt = 0,
};
break :b &hdtr_data;
};
const max_count = maxInt(i32); // Avoid EINVAL.
var len: std.c.off_t = @min(file_limit, max_count);
const flags = 0;
switch (posix.errno(std.c.sendfile(in_fd, out_fd, offset, &len, hdtr, flags))) {
.SUCCESS, .INTR => {},
.OPNOTSUPP, .NOTSOCK, .NOSYS => w.sendfile_err = error.UnsupportedOperation,
.BADF => if (builtin.mode == .Debug) @panic("race condition") else {
w.sendfile_err = error.Unexpected;
},
.FAULT => if (builtin.mode == .Debug) @panic("segmentation fault") else {
w.sendfile_err = error.Unexpected;
},
.INVAL => if (builtin.mode == .Debug) @panic("invalid API usage") else {
w.sendfile_err = error.Unexpected;
},
.NOTCONN => w.sendfile_err = error.BrokenPipe,
.AGAIN => if (len == 0) {
w.sendfile_err = error.WouldBlock;
},
.IO => w.sendfile_err = error.InputOutput,
.PIPE => w.sendfile_err = error.BrokenPipe,
else => |err| w.sendfile_err = posix.unexpectedErrno(err),
}
if (len == 0) {
file_reader.size = file_reader.pos;
return error.EndOfStream;
}
const consumed = io_w.consume(@bitCast(len));
file_reader.seekTo(file_reader.pos + consumed) catch return error.ReadFailed;
return consumed;
}
if (native_os == .linux and w.mode == .streaming) sf: {
// Try using sendfile on Linux.
if (w.sendfile_err != null) break :sf;
// Linux sendfile does not support headers.
const buffered = limit.slice(file_reader.interface.buffer);
if (io_w.end != 0 or buffered.len != 0) return drain(io_w, &.{buffered}, 1);
if (writer_buffered.len != 0 or reader_buffered.len != 0)
return sendFileBuffered(io_w, file_reader, reader_buffered);
const max_count = 0x7ffff000; // Avoid EINVAL.
var off: std.os.linux.off_t = undefined;
const off_ptr: ?*std.os.linux.off_t, const count: usize = switch (file_reader.mode) {
@@ -1875,6 +1909,7 @@ pub const Writer = struct {
w.pos += n;
return n;
}
const copy_file_range = switch (native_os) {
.freebsd => std.os.freebsd.copy_file_range,
.linux => if (std.c.versionCheck(.{ .major = 2, .minor = 27, .patch = 0 })) std.os.linux.wrapped.copy_file_range else {},
@@ -1882,8 +1917,8 @@ pub const Writer = struct {
};
if (@TypeOf(copy_file_range) != void) cfr: {
if (w.copy_file_range_err != null) break :cfr;
const buffered = limit.slice(file_reader.interface.buffer);
if (io_w.end != 0 or buffered.len != 0) return drain(io_w, &.{buffered}, 1);
if (writer_buffered.len != 0 or reader_buffered.len != 0)
return sendFileBuffered(io_w, file_reader, reader_buffered);
var off_in: i64 = undefined;
var off_out: i64 = undefined;
const off_in_ptr: ?*i64 = switch (file_reader.mode) {
@@ -1922,6 +1957,9 @@ pub const Writer = struct {
if (file_reader.pos != 0) break :fcf;
if (w.pos != 0) break :fcf;
if (limit != .unlimited) break :fcf;
const size = file_reader.getSize() catch break :fcf;
if (writer_buffered.len != 0 or reader_buffered.len != 0)
return sendFileBuffered(io_w, file_reader, reader_buffered);
const rc = std.c.fcopyfile(in_fd, out_fd, null, .{ .DATA = true });
switch (posix.errno(rc)) {
.SUCCESS => {},
@@ -1942,15 +1980,24 @@ pub const Writer = struct {
return 0;
},
}
const n = if (file_reader.size) |size| size else @panic("TODO figure out how much copied");
file_reader.pos = n;
w.pos = n;
return n;
file_reader.pos = size;
w.pos = size;
return size;
}
return error.Unimplemented;
}
fn sendFileBuffered(
io_w: *std.io.Writer,
file_reader: *Reader,
reader_buffered: []const u8,
) std.io.Writer.FileError!usize {
const n = try drain(io_w, &.{reader_buffered}, 1);
file_reader.seekTo(file_reader.pos + n) catch return error.ReadFailed;
return n;
}
pub fn seekTo(w: *Writer, offset: u64) SeekError!void {
switch (w.mode) {
.positional, .positional_reading => {
@@ -1979,7 +2026,19 @@ pub const Writer = struct {
/// along with other write failures.
pub fn end(w: *Writer) EndError!void {
try w.interface.flush();
return w.file.setEndPos(w.pos);
switch (w.mode) {
.positional,
.positional_reading,
=> w.file.setEndPos(w.pos) catch |err| switch (err) {
error.NonResizable => return,
else => |e| return e,
},
.streaming,
.streaming_reading,
.failure,
=> {},
}
}
};
+57 -29
View File
@@ -1499,32 +1499,18 @@ test "sendfile" {
const header2 = "second header\n";
const trailer1 = "trailer1\n";
const trailer2 = "second trailer\n";
var hdtr = [_]posix.iovec_const{
.{
.base = header1,
.len = header1.len,
},
.{
.base = header2,
.len = header2.len,
},
.{
.base = trailer1,
.len = trailer1.len,
},
.{
.base = trailer2,
.len = trailer2.len,
},
};
var headers: [2][]const u8 = .{ header1, header2 };
var trailers: [2][]const u8 = .{ trailer1, trailer2 };
var written_buf: [100]u8 = undefined;
try dest_file.writeFileAll(src_file, .{
.in_offset = 1,
.in_len = 10,
.headers_and_trailers = &hdtr,
.header_count = 2,
});
var file_reader = src_file.reader(&.{});
var fallback_buffer: [50]u8 = undefined;
var file_writer = dest_file.writer(&fallback_buffer);
try file_writer.interface.writeVecAll(&headers);
try file_reader.seekTo(1);
try testing.expectEqual(10, try file_writer.interface.sendFileAll(&file_reader, .limited(10)));
try file_writer.interface.writeVecAll(&trailers);
try file_writer.interface.flush();
const amt = try dest_file.preadAll(&written_buf, 0);
try testing.expectEqualStrings("header1\nsecond header\nine1\nsecontrailer1\nsecond trailer\n", written_buf[0..amt]);
}
@@ -1595,9 +1581,10 @@ test "AtomicFile" {
;
{
var af = try ctx.dir.atomicFile(test_out_file, .{});
var buffer: [100]u8 = undefined;
var af = try ctx.dir.atomicFile(test_out_file, .{ .write_buffer = &buffer });
defer af.deinit();
try af.file.writeAll(test_content);
try af.file_writer.interface.writeAll(test_content);
try af.finish();
}
const content = try ctx.dir.readFileAlloc(allocator, test_out_file, 9999);
@@ -2073,7 +2060,7 @@ test "invalid UTF-8/WTF-8 paths" {
}
test "read file non vectored" {
var tmp_dir = std.testing.tmpDir(.{});
var tmp_dir = testing.tmpDir(.{});
defer tmp_dir.cleanup();
const contents = "hello, world!\n";
@@ -2098,6 +2085,47 @@ test "read file non vectored" {
else => |e| return e,
};
}
try std.testing.expectEqualStrings(contents, w.buffered());
try std.testing.expectEqual(contents.len, i);
try testing.expectEqualStrings(contents, w.buffered());
try testing.expectEqual(contents.len, i);
}
test "seek keeping partial buffer" {
var tmp_dir = testing.tmpDir(.{});
defer tmp_dir.cleanup();
const contents = "0123456789";
const file = try tmp_dir.dir.createFile("input.txt", .{ .read = true });
defer file.close();
{
var file_writer: std.fs.File.Writer = .init(file, &.{});
try file_writer.interface.writeAll(contents);
try file_writer.interface.flush();
}
var read_buffer: [3]u8 = undefined;
var file_reader: std.fs.File.Reader = .init(file, &read_buffer);
try testing.expectEqual(0, file_reader.logicalPos());
var buf: [4]u8 = undefined;
try file_reader.interface.readSliceAll(&buf);
if (file_reader.interface.bufferedLen() != 3) {
// Pass the test if the OS doesn't give us vectored reads.
return;
}
try testing.expectEqual(4, file_reader.logicalPos());
try testing.expectEqual(7, file_reader.pos);
try file_reader.seekTo(6);
try testing.expectEqual(6, file_reader.logicalPos());
try testing.expectEqual(7, file_reader.pos);
try testing.expectEqualStrings("0123", &buf);
const n = try file_reader.interface.readSliceShort(&buf);
try testing.expectEqual(4, n);
try testing.expectEqualStrings("6789", &buf);
}
+1 -2
View File
@@ -129,11 +129,10 @@ pub const Request = struct {
pub const Compression = union(enum) {
pub const DeflateDecompressor = std.compress.zlib.Decompressor(std.io.AnyReader);
pub const GzipDecompressor = std.compress.gzip.Decompressor(std.io.AnyReader);
pub const ZstdDecompressor = std.compress.zstd.Decompressor(std.io.AnyReader);
deflate: DeflateDecompressor,
gzip: GzipDecompressor,
zstd: ZstdDecompressor,
zstd: std.compress.zstd.Decompress,
none: void,
};
-1
View File
@@ -69,7 +69,6 @@ pub const ArrayHashMap = @import("json/hashmap.zig").ArrayHashMap;
pub const Scanner = @import("json/Scanner.zig");
pub const validate = Scanner.validate;
pub const Error = Scanner.Error;
pub const reader = Scanner.reader;
pub const default_buffer_size = Scanner.default_buffer_size;
pub const Token = Scanner.Token;
pub const TokenType = Scanner.TokenType;
+55 -38
View File
@@ -45,6 +45,7 @@ pub const rad_per_deg = 0.017453292519943295769236907684886127134428718885417254
/// 180.0/pi
pub const deg_per_rad = 57.295779513082320876798154814105170332405472466564321549160243861;
pub const Sign = enum(u1) { positive, negative };
pub const FloatRepr = float.FloatRepr;
pub const floatExponentBits = float.floatExponentBits;
pub const floatMantissaBits = float.floatMantissaBits;
@@ -594,27 +595,30 @@ pub fn shlExact(comptime T: type, a: T, shift_amt: Log2Int(T)) !T {
/// Shifts left. Overflowed bits are truncated.
/// A negative shift amount results in a right shift.
pub fn shl(comptime T: type, a: T, shift_amt: anytype) T {
const is_shl = shift_amt >= 0;
const abs_shift_amt = @abs(shift_amt);
const casted_shift_amt = blk: {
if (@typeInfo(T) == .vector) {
const C = @typeInfo(T).vector.child;
const len = @typeInfo(T).vector.len;
if (abs_shift_amt >= @typeInfo(C).int.bits) return @splat(0);
break :blk @as(@Vector(len, Log2Int(C)), @splat(@as(Log2Int(C), @intCast(abs_shift_amt))));
} else {
if (abs_shift_amt >= @typeInfo(T).int.bits) return 0;
break :blk @as(Log2Int(T), @intCast(abs_shift_amt));
}
const casted_shift_amt = casted_shift_amt: switch (@typeInfo(T)) {
.int => |info| {
if (abs_shift_amt < info.bits) break :casted_shift_amt @as(
Log2Int(T),
@intCast(abs_shift_amt),
);
if (info.signedness == .unsigned or is_shl) return 0;
return a >> (info.bits - 1);
},
.vector => |info| {
const Child = info.child;
const child_info = @typeInfo(Child).int;
if (abs_shift_amt < child_info.bits) break :casted_shift_amt @as(
@Vector(info.len, Log2Int(Child)),
@splat(@as(Log2Int(Child), @intCast(abs_shift_amt))),
);
if (child_info.signedness == .unsigned or is_shl) return @splat(0);
return a >> @splat(child_info.bits - 1);
},
else => comptime unreachable,
};
if (@TypeOf(shift_amt) == comptime_int or @typeInfo(@TypeOf(shift_amt)).int.signedness == .signed) {
if (shift_amt < 0) {
return a >> casted_shift_amt;
}
}
return a << casted_shift_amt;
return if (is_shl) a << casted_shift_amt else a >> casted_shift_amt;
}
test shl {
@@ -629,32 +633,40 @@ test shl {
try testing.expect(shl(@Vector(1, u32), @Vector(1, u32){42}, @as(usize, 1))[0] == @as(u32, 42) << 1);
try testing.expect(shl(@Vector(1, u32), @Vector(1, u32){42}, @as(isize, -1))[0] == @as(u32, 42) >> 1);
try testing.expect(shl(@Vector(1, u32), @Vector(1, u32){42}, 33)[0] == 0);
try testing.expect(shl(i8, -1, -100) == -1);
try testing.expect(shl(i8, -1, 100) == 0);
try testing.expect(@reduce(.And, shl(@Vector(2, i8), .{ -1, 1 }, -100) == @Vector(2, i8){ -1, 0 }));
try testing.expect(@reduce(.And, shl(@Vector(2, i8), .{ -1, 1 }, 100) == @Vector(2, i8){ 0, 0 }));
}
/// Shifts right. Overflowed bits are truncated.
/// A negative shift amount results in a left shift.
pub fn shr(comptime T: type, a: T, shift_amt: anytype) T {
const is_shl = shift_amt < 0;
const abs_shift_amt = @abs(shift_amt);
const casted_shift_amt = blk: {
if (@typeInfo(T) == .vector) {
const C = @typeInfo(T).vector.child;
const len = @typeInfo(T).vector.len;
if (abs_shift_amt >= @typeInfo(C).int.bits) return @splat(0);
break :blk @as(@Vector(len, Log2Int(C)), @splat(@as(Log2Int(C), @intCast(abs_shift_amt))));
} else {
if (abs_shift_amt >= @typeInfo(T).int.bits) return 0;
break :blk @as(Log2Int(T), @intCast(abs_shift_amt));
}
const casted_shift_amt = casted_shift_amt: switch (@typeInfo(T)) {
.int => |info| {
if (abs_shift_amt < info.bits) break :casted_shift_amt @as(
Log2Int(T),
@intCast(abs_shift_amt),
);
if (info.signedness == .unsigned or is_shl) return 0;
return a >> (info.bits - 1);
},
.vector => |info| {
const Child = info.child;
const child_info = @typeInfo(Child).int;
if (abs_shift_amt < child_info.bits) break :casted_shift_amt @as(
@Vector(info.len, Log2Int(Child)),
@splat(@as(Log2Int(Child), @intCast(abs_shift_amt))),
);
if (child_info.signedness == .unsigned or is_shl) return @splat(0);
return a >> @splat(child_info.bits - 1);
},
else => comptime unreachable,
};
if (@TypeOf(shift_amt) == comptime_int or @typeInfo(@TypeOf(shift_amt)).int.signedness == .signed) {
if (shift_amt < 0) {
return a << casted_shift_amt;
}
}
return a >> casted_shift_amt;
return if (is_shl) a << casted_shift_amt else a >> casted_shift_amt;
}
test shr {
@@ -669,6 +681,11 @@ test shr {
try testing.expect(shr(@Vector(1, u32), @Vector(1, u32){42}, @as(usize, 1))[0] == @as(u32, 42) >> 1);
try testing.expect(shr(@Vector(1, u32), @Vector(1, u32){42}, @as(isize, -1))[0] == @as(u32, 42) << 1);
try testing.expect(shr(@Vector(1, u32), @Vector(1, u32){42}, 33)[0] == 0);
try testing.expect(shr(i8, -1, -100) == 0);
try testing.expect(shr(i8, -1, 100) == -1);
try testing.expect(@reduce(.And, shr(@Vector(2, i8), .{ -1, 1 }, -100) == @Vector(2, i8){ 0, 0 }));
try testing.expect(@reduce(.And, shr(@Vector(2, i8), .{ -1, 1 }, 100) == @Vector(2, i8){ -1, 0 }));
}
/// Rotates right. Only unsigned values can be rotated. Negative shift
-1
View File
@@ -2774,7 +2774,6 @@ test "bitNotWrap more than two limbs" {
// This test requires int sizes greater than 128 bits.
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
// LLVM: unexpected runtime library name: __umodei4
if (builtin.zig_backend == .stage2_llvm and comptime builtin.target.cpu.arch.isWasm()) return error.SkipZigTest; // TODO
+2 -4
View File
@@ -4,8 +4,6 @@ const assert = std.debug.assert;
const expect = std.testing.expect;
const expectEqual = std.testing.expectEqual;
pub const Sign = enum(u1) { positive, negative };
pub fn FloatRepr(comptime Float: type) type {
const fractional_bits = floatFractionalBits(Float);
const exponent_bits = floatExponentBits(Float);
@@ -14,7 +12,7 @@ pub fn FloatRepr(comptime Float: type) type {
mantissa: StoredMantissa,
exponent: BiasedExponent,
sign: Sign,
sign: std.math.Sign,
pub const StoredMantissa = @Type(.{ .int = .{
.signedness = .unsigned,
@@ -69,7 +67,7 @@ pub fn FloatRepr(comptime Float: type) type {
/// This currently truncates denormal values, which needs to be fixed before this can be used to
/// produce a rounded value.
pub fn reconstruct(normalized: Normalized, sign: Sign) Float {
pub fn reconstruct(normalized: Normalized, sign: std.math.Sign) Float {
if (normalized.exponent > BiasedExponent.max_normal.unbias()) return @bitCast(Repr{
.mantissa = 0,
.exponent = .infinite,
-1
View File
@@ -132,7 +132,6 @@ inline fn less_than_5(x: u32) u32 {
test log10_int {
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_llvm and comptime builtin.target.cpu.arch.isWasm()) return error.SkipZigTest; // TODO
+4 -3
View File
@@ -676,6 +676,7 @@ test lessThan {
const eqlBytes_allowed = switch (builtin.zig_backend) {
// These backends don't support vectors yet.
.stage2_aarch64,
.stage2_powerpc,
.stage2_riscv64,
=> false,
@@ -4482,7 +4483,7 @@ pub fn doNotOptimizeAway(val: anytype) void {
);
asm volatile (""
:
: [val2] "r" (val2),
: [_] "r" (val2),
);
} else doNotOptimizeAway(&val);
},
@@ -4490,7 +4491,7 @@ pub fn doNotOptimizeAway(val: anytype) void {
if ((t.float.bits == 32 or t.float.bits == 64) and builtin.zig_backend != .stage2_c) {
asm volatile (""
:
: [val] "rm" (val),
: [_] "rm" (val),
);
} else doNotOptimizeAway(&val);
},
@@ -4500,7 +4501,7 @@ pub fn doNotOptimizeAway(val: anytype) void {
} else {
asm volatile (""
:
: [val] "m" (val),
: [_] "m" (val),
: .{ .memory = true });
}
},
-1
View File
@@ -503,7 +503,6 @@ pub var elf_aux_maybe: ?[*]std.elf.Auxv = null;
/// Whether an external or internal getauxval implementation is used.
const extern_getauxval = switch (builtin.zig_backend) {
// Calling extern functions is not yet supported with these backends
.stage2_aarch64,
.stage2_arm,
.stage2_powerpc,
.stage2_riscv64,
+24 -295
View File
@@ -192,10 +192,27 @@ pub const iovec_const = extern struct {
len: usize,
};
pub const ACCMODE = enum(u2) {
RDONLY = 0,
WRONLY = 1,
RDWR = 2,
pub const ACCMODE = switch (native_os) {
// POSIX has a note about the access mode values:
//
// In historical implementations the value of O_RDONLY is zero. Because of
// that, it is not possible to detect the presence of O_RDONLY and another
// option. Future implementations should encode O_RDONLY and O_WRONLY as
// bit flags so that: O_RDONLY | O_WRONLY == O_RDWR
//
// In practice SerenityOS is the only system supported by Zig that
// implements this suggestion.
// https://github.com/SerenityOS/serenity/blob/4adc51fdf6af7d50679c48b39362e062f5a3b2cb/Kernel/API/POSIX/fcntl.h#L28-L30
.serenity => enum(u2) {
RDONLY = 1,
WRONLY = 2,
RDWR = 3,
},
else => enum(u2) {
RDONLY = 0,
WRONLY = 1,
RDWR = 2,
},
};
pub const TCSA = enum(c_uint) {
@@ -1035,6 +1052,7 @@ pub const TruncateError = error{
FileBusy,
AccessDenied,
PermissionDenied,
NonResizable,
} || UnexpectedError;
/// Length must be positive when treated as an i64.
@@ -1074,7 +1092,7 @@ pub fn ftruncate(fd: fd_t, length: u64) TruncateError!void {
.PERM => return error.PermissionDenied,
.TXTBSY => return error.FileBusy,
.BADF => unreachable, // Handle not open for writing
.INVAL => unreachable, // Handle not open for writing, negative length, or non-resizable handle
.INVAL => return error.NonResizable,
.NOTCAPABLE => return error.AccessDenied,
else => |err| return unexpectedErrno(err),
}
@@ -1090,7 +1108,7 @@ pub fn ftruncate(fd: fd_t, length: u64) TruncateError!void {
.PERM => return error.PermissionDenied,
.TXTBSY => return error.FileBusy,
.BADF => unreachable, // Handle not open for writing
.INVAL => unreachable, // Handle not open for writing, negative length, or non-resizable handle
.INVAL => return error.NonResizable, // This is returned for /dev/null for example.
else => |err| return unexpectedErrno(err),
}
}
@@ -6326,295 +6344,6 @@ pub fn send(
};
}
pub const SendFileError = PReadError || WriteError || SendError;
/// Transfer data between file descriptors, with optional headers and trailers.
///
/// Returns the number of bytes written, which can be zero.
///
/// The `sendfile` call copies `in_len` bytes from one file descriptor to another. When possible,
/// this is done within the operating system kernel, which can provide better performance
/// characteristics than transferring data from kernel to user space and back, such as with
/// `read` and `write` calls. When `in_len` is `0`, it means to copy until the end of the input file has been
/// reached. Note, however, that partial writes are still possible in this case.
///
/// `in_fd` must be a file descriptor opened for reading, and `out_fd` must be a file descriptor
/// opened for writing. They may be any kind of file descriptor; however, if `in_fd` is not a regular
/// file system file, it may cause this function to fall back to calling `read` and `write`, in which case
/// atomicity guarantees no longer apply.
///
/// Copying begins reading at `in_offset`. The input file descriptor seek position is ignored and not updated.
/// If the output file descriptor has a seek position, it is updated as bytes are written. When
/// `in_offset` is past the end of the input file, it successfully reads 0 bytes.
///
/// `flags` has different meanings per operating system; refer to the respective man pages.
///
/// These systems support atomically sending everything, including headers and trailers:
/// * macOS
/// * FreeBSD
///
/// These systems support in-kernel data copying, but headers and trailers are not sent atomically:
/// * Linux
///
/// Other systems fall back to calling `read` / `write`.
///
/// Linux has a limit on how many bytes may be transferred in one `sendfile` call, which is `0x7ffff000`
/// on both 64-bit and 32-bit systems. This is due to using a signed C int as the return value, as
/// well as stuffing the errno codes into the last `4096` values. This is noted on the `sendfile` man page.
/// The limit on Darwin is `0x7fffffff`, trying to write more than that returns EINVAL.
/// The corresponding POSIX limit on this is `maxInt(isize)`.
pub fn sendfile(
out_fd: fd_t,
in_fd: fd_t,
in_offset: u64,
in_len: u64,
headers: []const iovec_const,
trailers: []const iovec_const,
flags: u32,
) SendFileError!usize {
var header_done = false;
var total_written: usize = 0;
// Prevents EOVERFLOW.
const size_t = std.meta.Int(.unsigned, @typeInfo(usize).int.bits - 1);
const max_count = switch (native_os) {
.linux => 0x7ffff000,
.macos, .ios, .watchos, .tvos, .visionos => maxInt(i32),
else => maxInt(size_t),
};
switch (native_os) {
.linux => sf: {
if (headers.len != 0) {
const amt = try writev(out_fd, headers);
total_written += amt;
if (amt < count_iovec_bytes(headers)) return total_written;
header_done = true;
}
// Here we match BSD behavior, making a zero count value send as many bytes as possible.
const adjusted_count = if (in_len == 0) max_count else @min(in_len, max_count);
const sendfile_sym = if (lfs64_abi) system.sendfile64 else system.sendfile;
while (true) {
var offset: off_t = @bitCast(in_offset);
const rc = sendfile_sym(out_fd, in_fd, &offset, adjusted_count);
switch (errno(rc)) {
.SUCCESS => {
const amt: usize = @bitCast(rc);
total_written += amt;
if (in_len == 0 and amt == 0) {
// We have detected EOF from `in_fd`.
break;
} else if (amt < in_len) {
return total_written;
} else {
break;
}
},
.BADF => unreachable, // Always a race condition.
.FAULT => unreachable, // Segmentation fault.
.OVERFLOW => unreachable, // We avoid passing too large of a `count`.
.NOTCONN => return error.BrokenPipe, // `out_fd` is an unconnected socket
.INVAL => {
// EINVAL could be any of the following situations:
// * Descriptor is not valid or locked
// * an mmap(2)-like operation is not available for in_fd
// * count is negative
// * out_fd has the APPEND flag set
// Because of the "mmap(2)-like operation" possibility, we fall back to doing read/write
// manually.
break :sf;
},
.AGAIN => return error.WouldBlock,
.IO => return error.InputOutput,
.PIPE => return error.BrokenPipe,
.NOMEM => return error.SystemResources,
.NXIO => return error.Unseekable,
.SPIPE => return error.Unseekable,
else => |err| {
unexpectedErrno(err) catch {};
break :sf;
},
}
}
if (trailers.len != 0) {
total_written += try writev(out_fd, trailers);
}
return total_written;
},
.freebsd => sf: {
var hdtr_data: std.c.sf_hdtr = undefined;
var hdtr: ?*std.c.sf_hdtr = null;
if (headers.len != 0 or trailers.len != 0) {
// Here we carefully avoid `@intCast` by returning partial writes when
// too many io vectors are provided.
const hdr_cnt = cast(u31, headers.len) orelse maxInt(u31);
if (headers.len > hdr_cnt) return writev(out_fd, headers);
const trl_cnt = cast(u31, trailers.len) orelse maxInt(u31);
hdtr_data = std.c.sf_hdtr{
.headers = headers.ptr,
.hdr_cnt = hdr_cnt,
.trailers = trailers.ptr,
.trl_cnt = trl_cnt,
};
hdtr = &hdtr_data;
}
while (true) {
var sbytes: off_t = undefined;
const err = errno(system.sendfile(in_fd, out_fd, @bitCast(in_offset), @min(in_len, max_count), hdtr, &sbytes, flags));
const amt: usize = @bitCast(sbytes);
switch (err) {
.SUCCESS => return amt,
.BADF => unreachable, // Always a race condition.
.FAULT => unreachable, // Segmentation fault.
.NOTCONN => return error.BrokenPipe, // `out_fd` is an unconnected socket
.INVAL, .OPNOTSUPP, .NOTSOCK, .NOSYS => {
// EINVAL could be any of the following situations:
// * The fd argument is not a regular file.
// * The s argument is not a SOCK.STREAM type socket.
// * The offset argument is negative.
// Because of some of these possibilities, we fall back to doing read/write
// manually, the same as ENOSYS.
break :sf;
},
.INTR => if (amt != 0) return amt else continue,
.AGAIN => if (amt != 0) {
return amt;
} else {
return error.WouldBlock;
},
.BUSY => if (amt != 0) {
return amt;
} else {
return error.WouldBlock;
},
.IO => return error.InputOutput,
.NOBUFS => return error.SystemResources,
.PIPE => return error.BrokenPipe,
else => {
unexpectedErrno(err) catch {};
if (amt != 0) {
return amt;
} else {
break :sf;
}
},
}
}
},
.macos, .ios, .tvos, .watchos, .visionos => sf: {
var hdtr_data: std.c.sf_hdtr = undefined;
var hdtr: ?*std.c.sf_hdtr = null;
if (headers.len != 0 or trailers.len != 0) {
// Here we carefully avoid `@intCast` by returning partial writes when
// too many io vectors are provided.
const hdr_cnt = cast(u31, headers.len) orelse maxInt(u31);
if (headers.len > hdr_cnt) return writev(out_fd, headers);
const trl_cnt = cast(u31, trailers.len) orelse maxInt(u31);
hdtr_data = std.c.sf_hdtr{
.headers = headers.ptr,
.hdr_cnt = hdr_cnt,
.trailers = trailers.ptr,
.trl_cnt = trl_cnt,
};
hdtr = &hdtr_data;
}
while (true) {
var sbytes: off_t = @min(in_len, max_count);
const err = errno(system.sendfile(in_fd, out_fd, @bitCast(in_offset), &sbytes, hdtr, flags));
const amt: usize = @bitCast(sbytes);
switch (err) {
.SUCCESS => return amt,
.BADF => unreachable, // Always a race condition.
.FAULT => unreachable, // Segmentation fault.
.INVAL => unreachable,
.NOTCONN => return error.BrokenPipe, // `out_fd` is an unconnected socket
.OPNOTSUPP, .NOTSOCK, .NOSYS => break :sf,
.INTR => if (amt != 0) return amt else continue,
.AGAIN => if (amt != 0) {
return amt;
} else {
return error.WouldBlock;
},
.IO => return error.InputOutput,
.PIPE => return error.BrokenPipe,
else => {
unexpectedErrno(err) catch {};
if (amt != 0) {
return amt;
} else {
break :sf;
}
},
}
}
},
else => {}, // fall back to read/write
}
if (headers.len != 0 and !header_done) {
const amt = try writev(out_fd, headers);
total_written += amt;
if (amt < count_iovec_bytes(headers)) return total_written;
}
rw: {
var buf: [8 * 4096]u8 = undefined;
// Here we match BSD behavior, making a zero count value send as many bytes as possible.
const adjusted_count = if (in_len == 0) buf.len else @min(buf.len, in_len);
const amt_read = try pread(in_fd, buf[0..adjusted_count], in_offset);
if (amt_read == 0) {
if (in_len == 0) {
// We have detected EOF from `in_fd`.
break :rw;
} else {
return total_written;
}
}
const amt_written = try write(out_fd, buf[0..amt_read]);
total_written += amt_written;
if (amt_written < in_len or in_len == 0) return total_written;
}
if (trailers.len != 0) {
total_written += try writev(out_fd, trailers);
}
return total_written;
}
fn count_iovec_bytes(iovs: []const iovec_const) usize {
var count: usize = 0;
for (iovs) |iov| {
count += iov.len;
}
return count;
}
pub const CopyFileRangeError = error{
FileTooBig,
InputOutput,
+48 -37
View File
@@ -14,6 +14,7 @@ const assert = std.debug.assert;
const native_os = builtin.os.tag;
const Allocator = std.mem.Allocator;
const ChildProcess = @This();
const ArrayList = std.ArrayListUnmanaged;
pub const Id = switch (native_os) {
.windows => windows.HANDLE,
@@ -348,19 +349,6 @@ pub const RunResult = struct {
stderr: []u8,
};
fn writeFifoDataToArrayList(allocator: Allocator, list: *std.ArrayListUnmanaged(u8), fifo: *std.io.PollFifo) !void {
if (fifo.head != 0) fifo.realign();
if (list.capacity == 0) {
list.* = .{
.items = fifo.buf[0..fifo.count],
.capacity = fifo.buf.len,
};
fifo.* = std.io.PollFifo.init(fifo.allocator);
} else {
try list.appendSlice(allocator, fifo.buf[0..fifo.count]);
}
}
/// Collect the output from the process's stdout and stderr. Will return once all output
/// has been collected. This does not mean that the process has ended. `wait` should still
/// be called to wait for and clean up the process.
@@ -370,28 +358,48 @@ pub fn collectOutput(
child: ChildProcess,
/// Used for `stdout` and `stderr`.
allocator: Allocator,
stdout: *std.ArrayListUnmanaged(u8),
stderr: *std.ArrayListUnmanaged(u8),
stdout: *ArrayList(u8),
stderr: *ArrayList(u8),
max_output_bytes: usize,
) !void {
assert(child.stdout_behavior == .Pipe);
assert(child.stderr_behavior == .Pipe);
var poller = std.io.poll(allocator, enum { stdout, stderr }, .{
var poller = std.Io.poll(allocator, enum { stdout, stderr }, .{
.stdout = child.stdout.?,
.stderr = child.stderr.?,
});
defer poller.deinit();
while (try poller.poll()) {
if (poller.fifo(.stdout).count > max_output_bytes)
return error.StdoutStreamTooLong;
if (poller.fifo(.stderr).count > max_output_bytes)
return error.StderrStreamTooLong;
const stdout_r = poller.reader(.stdout);
stdout_r.buffer = stdout.allocatedSlice();
stdout_r.seek = 0;
stdout_r.end = stdout.items.len;
const stderr_r = poller.reader(.stderr);
stderr_r.buffer = stderr.allocatedSlice();
stderr_r.seek = 0;
stderr_r.end = stderr.items.len;
defer {
stdout.* = .{
.items = stdout_r.buffer[0..stdout_r.end],
.capacity = stdout_r.buffer.len,
};
stderr.* = .{
.items = stderr_r.buffer[0..stderr_r.end],
.capacity = stderr_r.buffer.len,
};
stdout_r.buffer = &.{};
stderr_r.buffer = &.{};
}
try writeFifoDataToArrayList(allocator, stdout, poller.fifo(.stdout));
try writeFifoDataToArrayList(allocator, stderr, poller.fifo(.stderr));
while (try poller.poll()) {
if (stdout_r.bufferedLen() > max_output_bytes)
return error.StdoutStreamTooLong;
if (stderr_r.bufferedLen() > max_output_bytes)
return error.StderrStreamTooLong;
}
}
pub const RunError = posix.GetCwdError || posix.ReadError || SpawnError || posix.PollError || error{
@@ -421,10 +429,10 @@ pub fn run(args: struct {
child.expand_arg0 = args.expand_arg0;
child.progress_node = args.progress_node;
var stdout: std.ArrayListUnmanaged(u8) = .empty;
errdefer stdout.deinit(args.allocator);
var stderr: std.ArrayListUnmanaged(u8) = .empty;
errdefer stderr.deinit(args.allocator);
var stdout: ArrayList(u8) = .empty;
defer stdout.deinit(args.allocator);
var stderr: ArrayList(u8) = .empty;
defer stderr.deinit(args.allocator);
try child.spawn();
errdefer {
@@ -432,7 +440,7 @@ pub fn run(args: struct {
}
try child.collectOutput(args.allocator, &stdout, &stderr, args.max_output_bytes);
return RunResult{
return .{
.stdout = try stdout.toOwnedSlice(args.allocator),
.stderr = try stderr.toOwnedSlice(args.allocator),
.term = try child.wait(),
@@ -878,12 +886,12 @@ fn spawnWindows(self: *ChildProcess) SpawnError!void {
var cmd_line_cache = WindowsCommandLineCache.init(self.allocator, self.argv);
defer cmd_line_cache.deinit();
var app_buf: std.ArrayListUnmanaged(u16) = .empty;
var app_buf: ArrayList(u16) = .empty;
defer app_buf.deinit(self.allocator);
try app_buf.appendSlice(self.allocator, app_name_w);
var dir_buf: std.ArrayListUnmanaged(u16) = .empty;
var dir_buf: ArrayList(u16) = .empty;
defer dir_buf.deinit(self.allocator);
if (cwd_path_w.len > 0) {
@@ -1003,13 +1011,16 @@ fn forkChildErrReport(fd: i32, err: ChildProcess.SpawnError) noreturn {
}
fn writeIntFd(fd: i32, value: ErrInt) !void {
const file: File = .{ .handle = fd };
file.deprecatedWriter().writeInt(u64, @intCast(value), .little) catch return error.SystemResources;
var buffer: [8]u8 = undefined;
var fw: std.fs.File.Writer = .initMode(.{ .handle = fd }, &buffer, .streaming);
fw.interface.writeInt(u64, value, .little) catch unreachable;
fw.interface.flush() catch return error.SystemResources;
}
fn readIntFd(fd: i32) !ErrInt {
const file: File = .{ .handle = fd };
return @intCast(file.deprecatedReader().readInt(u64, .little) catch return error.SystemResources);
var buffer: [8]u8 = undefined;
var fr: std.fs.File.Reader = .initMode(.{ .handle = fd }, &buffer, .streaming);
return @intCast(fr.interface.takeInt(u64, .little) catch return error.SystemResources);
}
const ErrInt = std.meta.Int(.unsigned, @sizeOf(anyerror) * 8);
@@ -1020,8 +1031,8 @@ const ErrInt = std.meta.Int(.unsigned, @sizeOf(anyerror) * 8);
/// Note: If the dir is the cwd, dir_buf should be empty (len = 0).
fn windowsCreateProcessPathExt(
allocator: mem.Allocator,
dir_buf: *std.ArrayListUnmanaged(u16),
app_buf: *std.ArrayListUnmanaged(u16),
dir_buf: *ArrayList(u16),
app_buf: *ArrayList(u16),
pathext: [:0]const u16,
cmd_line_cache: *WindowsCommandLineCache,
envp_ptr: ?[*]u16,
@@ -1504,7 +1515,7 @@ const WindowsCommandLineCache = struct {
/// Returns the absolute path of `cmd.exe` within the Windows system directory.
/// The caller owns the returned slice.
fn windowsCmdExePath(allocator: mem.Allocator) error{ OutOfMemory, Unexpected }![:0]u16 {
var buf = try std.ArrayListUnmanaged(u16).initCapacity(allocator, 128);
var buf = try ArrayList(u16).initCapacity(allocator, 128);
errdefer buf.deinit(allocator);
while (true) {
const unused_slice = buf.unusedCapacitySlice();
+5 -54
View File
@@ -101,17 +101,11 @@ comptime {
// Simplified start code for stage2 until it supports more language features ///
fn main2() callconv(.c) c_int {
root.main();
return 0;
return callMain();
}
fn _start2() callconv(.withStackAlign(.c, 1)) noreturn {
callMain2();
}
fn callMain2() noreturn {
root.main();
exit2(0);
std.posix.exit(callMain());
}
fn spirvMain2() callconv(.kernel) void {
@@ -119,51 +113,7 @@ fn spirvMain2() callconv(.kernel) void {
}
fn wWinMainCRTStartup2() callconv(.c) noreturn {
root.main();
exit2(0);
}
fn exit2(code: usize) noreturn {
switch (native_os) {
.linux => switch (builtin.cpu.arch) {
.x86_64 => {
asm volatile ("syscall"
:
: [number] "{rax}" (231),
[arg1] "{rdi}" (code),
: .{ .rcx = true, .r11 = true, .memory = true });
},
.arm => {
asm volatile ("svc #0"
:
: [number] "{r7}" (1),
[arg1] "{r0}" (code),
: .{ .memory = true });
},
.aarch64 => {
asm volatile ("svc #0"
:
: [number] "{x8}" (93),
[arg1] "{x0}" (code),
: .{ .memory = true });
},
.sparc64 => {
asm volatile ("ta 0x6d"
:
: [number] "{g1}" (1),
[arg1] "{o0}" (code),
: .{ .o0 = true, .o1 = true, .o2 = true, .o3 = true, .o4 = true, .o5 = true, .o6 = true, .o7 = true, .memory = true });
},
else => @compileError("TODO"),
},
// exits(0)
.plan9 => std.os.plan9.exits(null),
.windows => {
std.os.windows.ntdll.RtlExitUserProcess(@truncate(code));
},
else => @compileError("TODO"),
}
unreachable;
std.posix.exit(callMain());
}
////////////////////////////////////////////////////////////////////////////////
@@ -676,10 +626,11 @@ pub inline fn callMain() u8 {
const result = root.main() catch |err| {
switch (builtin.zig_backend) {
.stage2_aarch64,
.stage2_powerpc,
.stage2_riscv64,
=> {
std.debug.print("error: failed with error\n", .{});
_ = std.posix.write(std.posix.STDERR_FILENO, "error: failed with error\n") catch {};
return 1;
},
else => {},
+295 -341
View File
@@ -19,7 +19,7 @@ const std = @import("std");
const assert = std.debug.assert;
const testing = std.testing;
pub const writer = @import("tar/writer.zig").writer;
pub const Writer = @import("tar/Writer.zig");
/// Provide this to receive detailed error messages.
/// When this is provided, some errors which would otherwise be returned
@@ -293,28 +293,6 @@ fn nullStr(str: []const u8) []const u8 {
return str;
}
/// Options for iterator.
/// Buffers should be provided by the caller.
pub const IteratorOptions = struct {
/// Use a buffer with length `std.fs.max_path_bytes` to match file system capabilities.
file_name_buffer: []u8,
/// Use a buffer with length `std.fs.max_path_bytes` to match file system capabilities.
link_name_buffer: []u8,
/// Collects error messages during unpacking
diagnostics: ?*Diagnostics = null,
};
/// Iterates over files in tar archive.
/// `next` returns each file in tar archive.
pub fn iterator(reader: anytype, options: IteratorOptions) Iterator(@TypeOf(reader)) {
return .{
.reader = reader,
.diagnostics = options.diagnostics,
.file_name_buffer = options.file_name_buffer,
.link_name_buffer = options.link_name_buffer,
};
}
/// Type of the file returned by iterator `next` method.
pub const FileKind = enum {
directory,
@@ -323,206 +301,192 @@ pub const FileKind = enum {
};
/// Iterator over entries in the tar file represented by reader.
pub fn Iterator(comptime ReaderType: type) type {
return struct {
reader: ReaderType,
diagnostics: ?*Diagnostics = null,
pub const Iterator = struct {
reader: *std.Io.Reader,
diagnostics: ?*Diagnostics = null,
// buffers for heeader and file attributes
header_buffer: [Header.SIZE]u8 = undefined,
// buffers for heeader and file attributes
header_buffer: [Header.SIZE]u8 = undefined,
file_name_buffer: []u8,
link_name_buffer: []u8,
// bytes of padding to the end of the block
padding: usize = 0,
// not consumed bytes of file from last next iteration
unread_file_bytes: u64 = 0,
/// Options for iterator.
/// Buffers should be provided by the caller.
pub const Options = struct {
/// Use a buffer with length `std.fs.max_path_bytes` to match file system capabilities.
file_name_buffer: []u8,
/// Use a buffer with length `std.fs.max_path_bytes` to match file system capabilities.
link_name_buffer: []u8,
/// Collects error messages during unpacking
diagnostics: ?*Diagnostics = null,
};
// bytes of padding to the end of the block
padding: usize = 0,
// not consumed bytes of file from last next iteration
unread_file_bytes: u64 = 0,
pub const File = struct {
name: []const u8, // name of file, symlink or directory
link_name: []const u8, // target name of symlink
size: u64 = 0, // size of the file in bytes
mode: u32 = 0,
kind: FileKind = .file,
unread_bytes: *u64,
parent_reader: ReaderType,
pub const Reader = std.io.GenericReader(File, ReaderType.Error, File.read);
pub fn reader(self: File) Reader {
return .{ .context = self };
}
pub fn read(self: File, dest: []u8) ReaderType.Error!usize {
const buf = dest[0..@min(dest.len, self.unread_bytes.*)];
const n = try self.parent_reader.read(buf);
self.unread_bytes.* -= n;
return n;
}
// Writes file content to writer.
pub fn writeAll(self: File, out_writer: anytype) !void {
var buffer: [4096]u8 = undefined;
while (self.unread_bytes.* > 0) {
const buf = buffer[0..@min(buffer.len, self.unread_bytes.*)];
try self.parent_reader.readNoEof(buf);
try out_writer.writeAll(buf);
self.unread_bytes.* -= buf.len;
}
}
/// Iterates over files in tar archive.
/// `next` returns each file in tar archive.
pub fn init(reader: *std.Io.Reader, options: Options) Iterator {
return .{
.reader = reader,
.diagnostics = options.diagnostics,
.file_name_buffer = options.file_name_buffer,
.link_name_buffer = options.link_name_buffer,
};
}
const Self = @This();
fn readHeader(self: *Self) !?Header {
if (self.padding > 0) {
try self.reader.skipBytes(self.padding, .{});
}
const n = try self.reader.readAll(&self.header_buffer);
if (n == 0) return null;
if (n < Header.SIZE) return error.UnexpectedEndOfStream;
const header = Header{ .bytes = self.header_buffer[0..Header.SIZE] };
if (try header.checkChksum() == 0) return null;
return header;
}
fn readString(self: *Self, size: usize, buffer: []u8) ![]const u8 {
if (size > buffer.len) return error.TarInsufficientBuffer;
const buf = buffer[0..size];
try self.reader.readNoEof(buf);
return nullStr(buf);
}
fn newFile(self: *Self) File {
return .{
.name = self.file_name_buffer[0..0],
.link_name = self.link_name_buffer[0..0],
.parent_reader = self.reader,
.unread_bytes = &self.unread_file_bytes,
};
}
// Number of padding bytes in the last file block.
fn blockPadding(size: u64) usize {
const block_rounded = std.mem.alignForward(u64, size, Header.SIZE); // size rounded to te block boundary
return @intCast(block_rounded - size);
}
/// Iterates through the tar archive as if it is a series of files.
/// Internally, the tar format often uses entries (header with optional
/// content) to add meta data that describes the next file. These
/// entries should not normally be visible to the outside. As such, this
/// loop iterates through one or more entries until it collects a all
/// file attributes.
pub fn next(self: *Self) !?File {
if (self.unread_file_bytes > 0) {
// If file content was not consumed by caller
try self.reader.skipBytes(self.unread_file_bytes, .{});
self.unread_file_bytes = 0;
}
var file: File = self.newFile();
while (try self.readHeader()) |header| {
const kind = header.kind();
const size: u64 = try header.size();
self.padding = blockPadding(size);
switch (kind) {
// File types to return upstream
.directory, .normal, .symbolic_link => {
file.kind = switch (kind) {
.directory => .directory,
.normal => .file,
.symbolic_link => .sym_link,
else => unreachable,
};
file.mode = try header.mode();
// set file attributes if not already set by prefix/extended headers
if (file.size == 0) {
file.size = size;
}
if (file.link_name.len == 0) {
file.link_name = try header.linkName(self.link_name_buffer);
}
if (file.name.len == 0) {
file.name = try header.fullName(self.file_name_buffer);
}
self.padding = blockPadding(file.size);
self.unread_file_bytes = file.size;
return file;
},
// Prefix header types
.gnu_long_name => {
file.name = try self.readString(@intCast(size), self.file_name_buffer);
},
.gnu_long_link => {
file.link_name = try self.readString(@intCast(size), self.link_name_buffer);
},
.extended_header => {
// Use just attributes from last extended header.
file = self.newFile();
var rdr = paxIterator(self.reader, @intCast(size));
while (try rdr.next()) |attr| {
switch (attr.kind) {
.path => {
file.name = try attr.value(self.file_name_buffer);
},
.linkpath => {
file.link_name = try attr.value(self.link_name_buffer);
},
.size => {
var buf: [pax_max_size_attr_len]u8 = undefined;
file.size = try std.fmt.parseInt(u64, try attr.value(&buf), 10);
},
}
}
},
// Ignored header type
.global_extended_header => {
self.reader.skipBytes(size, .{}) catch return error.TarHeadersTooBig;
},
// All other are unsupported header types
else => {
const d = self.diagnostics orelse return error.TarUnsupportedHeader;
try d.errors.append(d.allocator, .{ .unsupported_file_type = .{
.file_name = try d.allocator.dupe(u8, header.name()),
.file_type = kind,
} });
if (kind == .gnu_sparse) {
try self.skipGnuSparseExtendedHeaders(header);
}
self.reader.skipBytes(size, .{}) catch return error.TarHeadersTooBig;
},
}
}
return null;
}
fn skipGnuSparseExtendedHeaders(self: *Self, header: Header) !void {
var is_extended = header.bytes[482] > 0;
while (is_extended) {
var buf: [Header.SIZE]u8 = undefined;
const n = try self.reader.readAll(&buf);
if (n < Header.SIZE) return error.UnexpectedEndOfStream;
is_extended = buf[504] > 0;
}
}
pub const File = struct {
name: []const u8, // name of file, symlink or directory
link_name: []const u8, // target name of symlink
size: u64 = 0, // size of the file in bytes
mode: u32 = 0,
kind: FileKind = .file,
};
}
/// Pax attributes iterator.
/// Size is length of pax extended header in reader.
fn paxIterator(reader: anytype, size: usize) PaxIterator(@TypeOf(reader)) {
return PaxIterator(@TypeOf(reader)){
.reader = reader,
.size = size,
};
}
fn readHeader(self: *Iterator) !?Header {
if (self.padding > 0) {
try self.reader.discardAll(self.padding);
}
const n = try self.reader.readSliceShort(&self.header_buffer);
if (n == 0) return null;
if (n < Header.SIZE) return error.UnexpectedEndOfStream;
const header = Header{ .bytes = self.header_buffer[0..Header.SIZE] };
if (try header.checkChksum() == 0) return null;
return header;
}
fn readString(self: *Iterator, size: usize, buffer: []u8) ![]const u8 {
if (size > buffer.len) return error.TarInsufficientBuffer;
const buf = buffer[0..size];
try self.reader.readSliceAll(buf);
return nullStr(buf);
}
fn newFile(self: *Iterator) File {
return .{
.name = self.file_name_buffer[0..0],
.link_name = self.link_name_buffer[0..0],
};
}
// Number of padding bytes in the last file block.
fn blockPadding(size: u64) usize {
const block_rounded = std.mem.alignForward(u64, size, Header.SIZE); // size rounded to te block boundary
return @intCast(block_rounded - size);
}
/// Iterates through the tar archive as if it is a series of files.
/// Internally, the tar format often uses entries (header with optional
/// content) to add meta data that describes the next file. These
/// entries should not normally be visible to the outside. As such, this
/// loop iterates through one or more entries until it collects a all
/// file attributes.
pub fn next(self: *Iterator) !?File {
if (self.unread_file_bytes > 0) {
// If file content was not consumed by caller
try self.reader.discardAll64(self.unread_file_bytes);
self.unread_file_bytes = 0;
}
var file: File = self.newFile();
while (try self.readHeader()) |header| {
const kind = header.kind();
const size: u64 = try header.size();
self.padding = blockPadding(size);
switch (kind) {
// File types to return upstream
.directory, .normal, .symbolic_link => {
file.kind = switch (kind) {
.directory => .directory,
.normal => .file,
.symbolic_link => .sym_link,
else => unreachable,
};
file.mode = try header.mode();
// set file attributes if not already set by prefix/extended headers
if (file.size == 0) {
file.size = size;
}
if (file.link_name.len == 0) {
file.link_name = try header.linkName(self.link_name_buffer);
}
if (file.name.len == 0) {
file.name = try header.fullName(self.file_name_buffer);
}
self.padding = blockPadding(file.size);
self.unread_file_bytes = file.size;
return file;
},
// Prefix header types
.gnu_long_name => {
file.name = try self.readString(@intCast(size), self.file_name_buffer);
},
.gnu_long_link => {
file.link_name = try self.readString(@intCast(size), self.link_name_buffer);
},
.extended_header => {
// Use just attributes from last extended header.
file = self.newFile();
var rdr: PaxIterator = .{
.reader = self.reader,
.size = @intCast(size),
};
while (try rdr.next()) |attr| {
switch (attr.kind) {
.path => {
file.name = try attr.value(self.file_name_buffer);
},
.linkpath => {
file.link_name = try attr.value(self.link_name_buffer);
},
.size => {
var buf: [pax_max_size_attr_len]u8 = undefined;
file.size = try std.fmt.parseInt(u64, try attr.value(&buf), 10);
},
}
}
},
// Ignored header type
.global_extended_header => {
self.reader.discardAll64(size) catch return error.TarHeadersTooBig;
},
// All other are unsupported header types
else => {
const d = self.diagnostics orelse return error.TarUnsupportedHeader;
try d.errors.append(d.allocator, .{ .unsupported_file_type = .{
.file_name = try d.allocator.dupe(u8, header.name()),
.file_type = kind,
} });
if (kind == .gnu_sparse) {
try self.skipGnuSparseExtendedHeaders(header);
}
self.reader.discardAll64(size) catch return error.TarHeadersTooBig;
},
}
}
return null;
}
pub fn streamRemaining(it: *Iterator, file: File, w: *std.Io.Writer) std.Io.Reader.StreamError!void {
try it.reader.streamExact64(w, file.size);
it.unread_file_bytes = 0;
}
fn skipGnuSparseExtendedHeaders(self: *Iterator, header: Header) !void {
var is_extended = header.bytes[482] > 0;
while (is_extended) {
var buf: [Header.SIZE]u8 = undefined;
try self.reader.readSliceAll(&buf);
is_extended = buf[504] > 0;
}
}
};
const PaxAttributeKind = enum {
path,
@@ -533,108 +497,99 @@ const PaxAttributeKind = enum {
// maxInt(u64) has 20 chars, base 10 in practice we got 24 chars
const pax_max_size_attr_len = 64;
fn PaxIterator(comptime ReaderType: type) type {
return struct {
size: usize, // cumulative size of all pax attributes
reader: ReaderType,
// scratch buffer used for reading attribute length and keyword
scratch: [128]u8 = undefined,
pub const PaxIterator = struct {
size: usize, // cumulative size of all pax attributes
reader: *std.Io.Reader,
const Self = @This();
const Self = @This();
const Attribute = struct {
kind: PaxAttributeKind,
len: usize, // length of the attribute value
reader: ReaderType, // reader positioned at value start
const Attribute = struct {
kind: PaxAttributeKind,
len: usize, // length of the attribute value
reader: *std.Io.Reader, // reader positioned at value start
// Copies pax attribute value into destination buffer.
// Must be called with destination buffer of size at least Attribute.len.
pub fn value(self: Attribute, dst: []u8) ![]const u8 {
if (self.len > dst.len) return error.TarInsufficientBuffer;
// assert(self.len <= dst.len);
const buf = dst[0..self.len];
const n = try self.reader.readAll(buf);
if (n < self.len) return error.UnexpectedEndOfStream;
try validateAttributeEnding(self.reader);
if (hasNull(buf)) return error.PaxNullInValue;
return buf;
}
};
// Iterates over pax attributes. Returns known only known attributes.
// Caller has to call value in Attribute, to advance reader across value.
pub fn next(self: *Self) !?Attribute {
// Pax extended header consists of one or more attributes, each constructed as follows:
// "%d %s=%s\n", <length>, <keyword>, <value>
while (self.size > 0) {
const length_buf = try self.readUntil(' ');
const length = try std.fmt.parseInt(usize, length_buf, 10); // record length in bytes
const keyword = try self.readUntil('=');
if (hasNull(keyword)) return error.PaxNullInKeyword;
// calculate value_len
const value_start = length_buf.len + keyword.len + 2; // 2 separators
if (length < value_start + 1 or self.size < length) return error.UnexpectedEndOfStream;
const value_len = length - value_start - 1; // \n separator at end
self.size -= length;
const kind: PaxAttributeKind = if (eql(keyword, "path"))
.path
else if (eql(keyword, "linkpath"))
.linkpath
else if (eql(keyword, "size"))
.size
else {
try self.reader.skipBytes(value_len, .{});
try validateAttributeEnding(self.reader);
continue;
};
if (kind == .size and value_len > pax_max_size_attr_len) {
return error.PaxSizeAttrOverflow;
}
return Attribute{
.kind = kind,
.len = value_len,
.reader = self.reader,
};
}
return null;
}
fn readUntil(self: *Self, delimiter: u8) ![]const u8 {
var fbs = std.io.fixedBufferStream(&self.scratch);
try self.reader.streamUntilDelimiter(fbs.writer(), delimiter, null);
return fbs.getWritten();
}
fn eql(a: []const u8, b: []const u8) bool {
return std.mem.eql(u8, a, b);
}
fn hasNull(str: []const u8) bool {
return (std.mem.indexOfScalar(u8, str, 0)) != null;
}
// Checks that each record ends with new line.
fn validateAttributeEnding(reader: ReaderType) !void {
if (try reader.readByte() != '\n') return error.PaxInvalidAttributeEnd;
// Copies pax attribute value into destination buffer.
// Must be called with destination buffer of size at least Attribute.len.
pub fn value(self: Attribute, dst: []u8) ![]const u8 {
if (self.len > dst.len) return error.TarInsufficientBuffer;
// assert(self.len <= dst.len);
const buf = dst[0..self.len];
const n = try self.reader.readSliceShort(buf);
if (n < self.len) return error.UnexpectedEndOfStream;
try validateAttributeEnding(self.reader);
if (hasNull(buf)) return error.PaxNullInValue;
return buf;
}
};
}
// Iterates over pax attributes. Returns known only known attributes.
// Caller has to call value in Attribute, to advance reader across value.
pub fn next(self: *Self) !?Attribute {
// Pax extended header consists of one or more attributes, each constructed as follows:
// "%d %s=%s\n", <length>, <keyword>, <value>
while (self.size > 0) {
const length_buf = try self.reader.takeSentinel(' ');
const length = try std.fmt.parseInt(usize, length_buf, 10); // record length in bytes
const keyword = try self.reader.takeSentinel('=');
if (hasNull(keyword)) return error.PaxNullInKeyword;
// calculate value_len
const value_start = length_buf.len + keyword.len + 2; // 2 separators
if (length < value_start + 1 or self.size < length) return error.UnexpectedEndOfStream;
const value_len = length - value_start - 1; // \n separator at end
self.size -= length;
const kind: PaxAttributeKind = if (eql(keyword, "path"))
.path
else if (eql(keyword, "linkpath"))
.linkpath
else if (eql(keyword, "size"))
.size
else {
try self.reader.discardAll(value_len);
try validateAttributeEnding(self.reader);
continue;
};
if (kind == .size and value_len > pax_max_size_attr_len) {
return error.PaxSizeAttrOverflow;
}
return .{
.kind = kind,
.len = value_len,
.reader = self.reader,
};
}
return null;
}
fn eql(a: []const u8, b: []const u8) bool {
return std.mem.eql(u8, a, b);
}
fn hasNull(str: []const u8) bool {
return (std.mem.indexOfScalar(u8, str, 0)) != null;
}
// Checks that each record ends with new line.
fn validateAttributeEnding(reader: *std.Io.Reader) !void {
if (try reader.takeByte() != '\n') return error.PaxInvalidAttributeEnd;
}
};
/// Saves tar file content to the file systems.
pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: PipeOptions) !void {
pub fn pipeToFileSystem(dir: std.fs.Dir, reader: *std.Io.Reader, options: PipeOptions) !void {
var file_name_buffer: [std.fs.max_path_bytes]u8 = undefined;
var link_name_buffer: [std.fs.max_path_bytes]u8 = undefined;
var iter = iterator(reader, .{
var file_contents_buffer: [1024]u8 = undefined;
var it: Iterator = .init(reader, .{
.file_name_buffer = &file_name_buffer,
.link_name_buffer = &link_name_buffer,
.diagnostics = options.diagnostics,
});
while (try iter.next()) |file| {
while (try it.next()) |file| {
const file_name = stripComponents(file.name, options.strip_components);
if (file_name.len == 0 and file.kind != .directory) {
const d = options.diagnostics orelse return error.TarComponentsOutsideStrippedPrefix;
@@ -656,7 +611,9 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: PipeOptions)
.file => {
if (createDirAndFile(dir, file_name, fileMode(file.mode, options))) |fs_file| {
defer fs_file.close();
try file.writeAll(fs_file);
var file_writer = fs_file.writer(&file_contents_buffer);
try it.streamRemaining(file, &file_writer.interface);
try file_writer.interface.flush();
} else |err| {
const d = options.diagnostics orelse return err;
try d.errors.append(d.allocator, .{ .unable_to_create_file = .{
@@ -826,11 +783,14 @@ test PaxIterator {
var buffer: [1024]u8 = undefined;
outer: for (cases) |case| {
var stream = std.io.fixedBufferStream(case.data);
var iter = paxIterator(stream.reader(), case.data.len);
var reader: std.Io.Reader = .fixed(case.data);
var it: PaxIterator = .{
.size = case.data.len,
.reader = &reader,
};
var i: usize = 0;
while (iter.next() catch |err| {
while (it.next() catch |err| {
if (case.err) |e| {
try testing.expectEqual(e, err);
continue;
@@ -853,12 +813,6 @@ test PaxIterator {
}
}
test {
_ = @import("tar/test.zig");
_ = @import("tar/writer.zig");
_ = Diagnostics;
}
test "header parse size" {
const cases = [_]struct {
in: []const u8,
@@ -941,7 +895,7 @@ test "create file and symlink" {
file.close();
}
test iterator {
test Iterator {
// Example tar file is created from this tree structure:
// $ tree example
// example
@@ -962,19 +916,19 @@ test iterator {
// example/empty/
const data = @embedFile("tar/testdata/example.tar");
var fbs = std.io.fixedBufferStream(data);
var reader: std.Io.Reader = .fixed(data);
// User provided buffers to the iterator
var file_name_buffer: [std.fs.max_path_bytes]u8 = undefined;
var link_name_buffer: [std.fs.max_path_bytes]u8 = undefined;
// Create iterator
var iter = iterator(fbs.reader(), .{
var it: Iterator = .init(&reader, .{
.file_name_buffer = &file_name_buffer,
.link_name_buffer = &link_name_buffer,
});
// Iterate over files in example.tar
var file_no: usize = 0;
while (try iter.next()) |file| : (file_no += 1) {
while (try it.next()) |file| : (file_no += 1) {
switch (file.kind) {
.directory => {
switch (file_no) {
@@ -987,10 +941,10 @@ test iterator {
},
.file => {
try testing.expectEqualStrings("example/a/file", file.name);
// Read file content
var buf: [16]u8 = undefined;
const n = try file.reader().readAll(&buf);
try testing.expectEqualStrings("content\n", buf[0..n]);
var w: std.Io.Writer = .fixed(&buf);
try it.streamRemaining(file, &w);
try testing.expectEqualStrings("content\n", w.buffered());
},
.sym_link => {
try testing.expectEqualStrings("example/b/symlink", file.name);
@@ -1021,15 +975,14 @@ test pipeToFileSystem {
// example/empty/
const data = @embedFile("tar/testdata/example.tar");
var fbs = std.io.fixedBufferStream(data);
const reader = fbs.reader();
var reader: std.Io.Reader = .fixed(data);
var tmp = testing.tmpDir(.{ .no_follow = true });
defer tmp.cleanup();
const dir = tmp.dir;
// Save tar from `reader` to the file system `dir`
pipeToFileSystem(dir, reader, .{
// Save tar from reader to the file system `dir`
pipeToFileSystem(dir, &reader, .{
.mode_mode = .ignore,
.strip_components = 1,
.exclude_empty_directories = true,
@@ -1053,8 +1006,7 @@ test pipeToFileSystem {
test "pipeToFileSystem root_dir" {
const data = @embedFile("tar/testdata/example.tar");
var fbs = std.io.fixedBufferStream(data);
const reader = fbs.reader();
var reader: std.Io.Reader = .fixed(data);
// with strip_components = 1
{
@@ -1063,7 +1015,7 @@ test "pipeToFileSystem root_dir" {
var diagnostics: Diagnostics = .{ .allocator = testing.allocator };
defer diagnostics.deinit();
pipeToFileSystem(tmp.dir, reader, .{
pipeToFileSystem(tmp.dir, &reader, .{
.strip_components = 1,
.diagnostics = &diagnostics,
}) catch |err| {
@@ -1079,13 +1031,13 @@ test "pipeToFileSystem root_dir" {
// with strip_components = 0
{
fbs.reset();
reader = .fixed(data);
var tmp = testing.tmpDir(.{ .no_follow = true });
defer tmp.cleanup();
var diagnostics: Diagnostics = .{ .allocator = testing.allocator };
defer diagnostics.deinit();
pipeToFileSystem(tmp.dir, reader, .{
pipeToFileSystem(tmp.dir, &reader, .{
.strip_components = 0,
.diagnostics = &diagnostics,
}) catch |err| {
@@ -1102,45 +1054,42 @@ test "pipeToFileSystem root_dir" {
test "findRoot with single file archive" {
const data = @embedFile("tar/testdata/22752.tar");
var fbs = std.io.fixedBufferStream(data);
const reader = fbs.reader();
var reader: std.Io.Reader = .fixed(data);
var tmp = testing.tmpDir(.{});
defer tmp.cleanup();
var diagnostics: Diagnostics = .{ .allocator = testing.allocator };
defer diagnostics.deinit();
try pipeToFileSystem(tmp.dir, reader, .{ .diagnostics = &diagnostics });
try pipeToFileSystem(tmp.dir, &reader, .{ .diagnostics = &diagnostics });
try testing.expectEqualStrings("", diagnostics.root_dir);
}
test "findRoot without explicit root dir" {
const data = @embedFile("tar/testdata/19820.tar");
var fbs = std.io.fixedBufferStream(data);
const reader = fbs.reader();
var reader: std.Io.Reader = .fixed(data);
var tmp = testing.tmpDir(.{});
defer tmp.cleanup();
var diagnostics: Diagnostics = .{ .allocator = testing.allocator };
defer diagnostics.deinit();
try pipeToFileSystem(tmp.dir, reader, .{ .diagnostics = &diagnostics });
try pipeToFileSystem(tmp.dir, &reader, .{ .diagnostics = &diagnostics });
try testing.expectEqualStrings("root", diagnostics.root_dir);
}
test "pipeToFileSystem strip_components" {
const data = @embedFile("tar/testdata/example.tar");
var fbs = std.io.fixedBufferStream(data);
const reader = fbs.reader();
var reader: std.Io.Reader = .fixed(data);
var tmp = testing.tmpDir(.{ .no_follow = true });
defer tmp.cleanup();
var diagnostics: Diagnostics = .{ .allocator = testing.allocator };
defer diagnostics.deinit();
pipeToFileSystem(tmp.dir, reader, .{
pipeToFileSystem(tmp.dir, &reader, .{
.strip_components = 3,
.diagnostics = &diagnostics,
}) catch |err| {
@@ -1194,13 +1143,12 @@ test "executable bit" {
const data = @embedFile("tar/testdata/example.tar");
for ([_]PipeOptions.ModeMode{ .ignore, .executable_bit_only }) |opt| {
var fbs = std.io.fixedBufferStream(data);
const reader = fbs.reader();
var reader: std.Io.Reader = .fixed(data);
var tmp = testing.tmpDir(.{ .no_follow = true });
//defer tmp.cleanup();
pipeToFileSystem(tmp.dir, reader, .{
pipeToFileSystem(tmp.dir, &reader, .{
.strip_components = 1,
.exclude_empty_directories = true,
.mode_mode = opt,
@@ -1226,3 +1174,9 @@ test "executable bit" {
}
}
}
test {
_ = @import("tar/test.zig");
_ = Writer;
_ = Diagnostics;
}
+462
View File
@@ -0,0 +1,462 @@
const std = @import("std");
const assert = std.debug.assert;
const testing = std.testing;
const Writer = @This();
const block_size = @sizeOf(Header);
/// Options for writing file/dir/link. If left empty 0o664 is used for
/// file mode and current time for mtime.
pub const Options = struct {
/// File system permission mode.
mode: u32 = 0,
/// File system modification time.
mtime: u64 = 0,
};
underlying_writer: *std.Io.Writer,
prefix: []const u8 = "",
mtime_now: u64 = 0,
const Error = error{
WriteFailed,
OctalOverflow,
NameTooLong,
};
/// Sets prefix for all other write* method paths.
pub fn setRoot(w: *Writer, root: []const u8) Error!void {
if (root.len > 0)
try w.writeDir(root, .{});
w.prefix = root;
}
pub fn writeDir(w: *Writer, sub_path: []const u8, options: Options) Error!void {
try w.writeHeader(.directory, sub_path, "", 0, options);
}
pub const WriteFileError = std.Io.Writer.FileError || Error || std.fs.File.GetEndPosError;
pub fn writeFile(
w: *Writer,
sub_path: []const u8,
file_reader: *std.fs.File.Reader,
stat_mtime: i128,
) WriteFileError!void {
const size = try file_reader.getSize();
const mtime: u64 = @intCast(@divFloor(stat_mtime, std.time.ns_per_s));
var header: Header = .{};
try w.setPath(&header, sub_path);
try header.setSize(size);
try header.setMtime(mtime);
try header.updateChecksum();
try w.underlying_writer.writeAll(@ptrCast((&header)[0..1]));
_ = try w.underlying_writer.sendFileAll(file_reader, .unlimited);
try w.writePadding64(size);
}
pub const WriteFileStreamError = Error || std.Io.Reader.StreamError;
/// Writes file reading file content from `reader`. Reads exactly `size` bytes
/// from `reader`, or returns `error.EndOfStream`.
pub fn writeFileStream(
w: *Writer,
sub_path: []const u8,
size: u64,
reader: *std.Io.Reader,
options: Options,
) WriteFileStreamError!void {
try w.writeHeader(.regular, sub_path, "", size, options);
try reader.streamExact64(w.underlying_writer, size);
try w.writePadding64(size);
}
/// Writes file using bytes buffer `content` for size and file content.
pub fn writeFileBytes(w: *Writer, sub_path: []const u8, content: []const u8, options: Options) Error!void {
try w.writeHeader(.regular, sub_path, "", content.len, options);
try w.underlying_writer.writeAll(content);
try w.writePadding(content.len);
}
pub fn writeLink(w: *Writer, sub_path: []const u8, link_name: []const u8, options: Options) Error!void {
try w.writeHeader(.symbolic_link, sub_path, link_name, 0, options);
}
fn writeHeader(
w: *Writer,
typeflag: Header.FileType,
sub_path: []const u8,
link_name: []const u8,
size: u64,
options: Options,
) Error!void {
var header = Header.init(typeflag);
try w.setPath(&header, sub_path);
try header.setSize(size);
try header.setMtime(options.mtime);
if (options.mode != 0)
try header.setMode(options.mode);
if (typeflag == .symbolic_link)
header.setLinkname(link_name) catch |err| switch (err) {
error.NameTooLong => try w.writeExtendedHeader(.gnu_long_link, &.{link_name}),
else => return err,
};
try header.write(w.underlying_writer);
}
/// Writes path in posix header, if don't fit (in name+prefix; 100+155
/// bytes) writes it in gnu extended header.
fn setPath(w: *Writer, header: *Header, sub_path: []const u8) Error!void {
header.setPath(w.prefix, sub_path) catch |err| switch (err) {
error.NameTooLong => {
// write extended header
const buffers: []const []const u8 = if (w.prefix.len == 0)
&.{sub_path}
else
&.{ w.prefix, "/", sub_path };
try w.writeExtendedHeader(.gnu_long_name, buffers);
},
else => return err,
};
}
/// Writes gnu extended header: gnu_long_name or gnu_long_link.
fn writeExtendedHeader(w: *Writer, typeflag: Header.FileType, buffers: []const []const u8) Error!void {
var len: usize = 0;
for (buffers) |buf| len += buf.len;
var header: Header = .init(typeflag);
try header.setSize(len);
try header.write(w.underlying_writer);
for (buffers) |buf|
try w.underlying_writer.writeAll(buf);
try w.writePadding(len);
}
fn writePadding(w: *Writer, bytes: usize) std.Io.Writer.Error!void {
return writePaddingPos(w, bytes % block_size);
}
fn writePadding64(w: *Writer, bytes: u64) std.Io.Writer.Error!void {
return writePaddingPos(w, @intCast(bytes % block_size));
}
fn writePaddingPos(w: *Writer, pos: usize) std.Io.Writer.Error!void {
if (pos == 0) return;
try w.underlying_writer.splatByteAll(0, block_size - pos);
}
/// According to the specification, tar should finish with two zero blocks, but
/// "reasonable system must not assume that such a block exists when reading an
/// archive". Therefore, the Zig standard library recommends to not call this
/// function.
pub fn finishPedantically(w: *Writer) std.Io.Writer.Error!void {
try w.underlying_writer.splatByteAll(0, block_size * 2);
}
/// A struct that is exactly 512 bytes and matches tar file format. This is
/// intended to be used for outputting tar files; for parsing there is
/// `std.tar.Header`.
pub const Header = extern struct {
// This struct was originally copied from
// https://github.com/mattnite/tar/blob/main/src/main.zig which is MIT
// licensed.
//
// The name, linkname, magic, uname, and gname are null-terminated character
// strings. All other fields are zero-filled octal numbers in ASCII. Each
// numeric field of width w contains w minus 1 digits, and a null.
// Reference: https://www.gnu.org/software/tar/manual/html_node/Standard.html
// POSIX header: byte offset
name: [100]u8 = [_]u8{0} ** 100, // 0
mode: [7:0]u8 = default_mode.file, // 100
uid: [7:0]u8 = [_:0]u8{0} ** 7, // unused 108
gid: [7:0]u8 = [_:0]u8{0} ** 7, // unused 116
size: [11:0]u8 = [_:0]u8{'0'} ** 11, // 124
mtime: [11:0]u8 = [_:0]u8{'0'} ** 11, // 136
checksum: [7:0]u8 = [_:0]u8{' '} ** 7, // 148
typeflag: FileType = .regular, // 156
linkname: [100]u8 = [_]u8{0} ** 100, // 157
magic: [6]u8 = [_]u8{ 'u', 's', 't', 'a', 'r', 0 }, // 257
version: [2]u8 = [_]u8{ '0', '0' }, // 263
uname: [32]u8 = [_]u8{0} ** 32, // unused 265
gname: [32]u8 = [_]u8{0} ** 32, // unused 297
devmajor: [7:0]u8 = [_:0]u8{0} ** 7, // unused 329
devminor: [7:0]u8 = [_:0]u8{0} ** 7, // unused 337
prefix: [155]u8 = [_]u8{0} ** 155, // 345
pad: [12]u8 = [_]u8{0} ** 12, // unused 500
pub const FileType = enum(u8) {
regular = '0',
symbolic_link = '2',
directory = '5',
gnu_long_name = 'L',
gnu_long_link = 'K',
};
const default_mode = struct {
const file = [_:0]u8{ '0', '0', '0', '0', '6', '6', '4' }; // 0o664
const dir = [_:0]u8{ '0', '0', '0', '0', '7', '7', '5' }; // 0o775
const sym_link = [_:0]u8{ '0', '0', '0', '0', '7', '7', '7' }; // 0o777
const other = [_:0]u8{ '0', '0', '0', '0', '0', '0', '0' }; // 0o000
};
pub fn init(typeflag: FileType) Header {
return .{
.typeflag = typeflag,
.mode = switch (typeflag) {
.directory => default_mode.dir,
.symbolic_link => default_mode.sym_link,
.regular => default_mode.file,
else => default_mode.other,
},
};
}
pub fn setSize(w: *Header, size: u64) error{OctalOverflow}!void {
try octal(&w.size, size);
}
fn octal(buf: []u8, value: u64) error{OctalOverflow}!void {
var remainder: u64 = value;
var pos: usize = buf.len;
while (remainder > 0 and pos > 0) {
pos -= 1;
const c: u8 = @as(u8, @intCast(remainder % 8)) + '0';
buf[pos] = c;
remainder /= 8;
if (pos == 0 and remainder > 0) return error.OctalOverflow;
}
}
pub fn setMode(w: *Header, mode: u32) error{OctalOverflow}!void {
try octal(&w.mode, mode);
}
// Integer number of seconds since January 1, 1970, 00:00 Coordinated Universal Time.
// mtime == 0 will use current time
pub fn setMtime(w: *Header, mtime: u64) error{OctalOverflow}!void {
try octal(&w.mtime, mtime);
}
pub fn updateChecksum(w: *Header) !void {
var checksum: usize = ' '; // other 7 w.checksum bytes are initialized to ' '
for (std.mem.asBytes(w)) |val|
checksum += val;
try octal(&w.checksum, checksum);
}
pub fn write(h: *Header, bw: *std.Io.Writer) error{ OctalOverflow, WriteFailed }!void {
try h.updateChecksum();
try bw.writeAll(std.mem.asBytes(h));
}
pub fn setLinkname(w: *Header, link: []const u8) !void {
if (link.len > w.linkname.len) return error.NameTooLong;
@memcpy(w.linkname[0..link.len], link);
}
pub fn setPath(w: *Header, prefix: []const u8, sub_path: []const u8) !void {
const max_prefix = w.prefix.len;
const max_name = w.name.len;
const sep = std.fs.path.sep_posix;
if (prefix.len + sub_path.len > max_name + max_prefix or prefix.len > max_prefix)
return error.NameTooLong;
// both fit into name
if (prefix.len > 0 and prefix.len + sub_path.len < max_name) {
@memcpy(w.name[0..prefix.len], prefix);
w.name[prefix.len] = sep;
@memcpy(w.name[prefix.len + 1 ..][0..sub_path.len], sub_path);
return;
}
// sub_path fits into name
// there is no prefix or prefix fits into prefix
if (sub_path.len <= max_name) {
@memcpy(w.name[0..sub_path.len], sub_path);
@memcpy(w.prefix[0..prefix.len], prefix);
return;
}
if (prefix.len > 0) {
@memcpy(w.prefix[0..prefix.len], prefix);
w.prefix[prefix.len] = sep;
}
const prefix_pos = if (prefix.len > 0) prefix.len + 1 else 0;
// add as much to prefix as you can, must split at /
const prefix_remaining = max_prefix - prefix_pos;
if (std.mem.lastIndexOf(u8, sub_path[0..@min(prefix_remaining, sub_path.len)], &.{'/'})) |sep_pos| {
@memcpy(w.prefix[prefix_pos..][0..sep_pos], sub_path[0..sep_pos]);
if ((sub_path.len - sep_pos - 1) > max_name) return error.NameTooLong;
@memcpy(w.name[0..][0 .. sub_path.len - sep_pos - 1], sub_path[sep_pos + 1 ..]);
return;
}
return error.NameTooLong;
}
comptime {
assert(@sizeOf(Header) == 512);
}
test "setPath" {
const cases = [_]struct {
in: []const []const u8,
out: []const []const u8,
}{
.{
.in = &.{ "", "123456789" },
.out = &.{ "", "123456789" },
},
// can fit into name
.{
.in = &.{ "prefix", "sub_path" },
.out = &.{ "", "prefix/sub_path" },
},
// no more both fits into name
.{
.in = &.{ "prefix", "0123456789/" ** 8 ++ "basename" },
.out = &.{ "prefix", "0123456789/" ** 8 ++ "basename" },
},
// put as much as you can into prefix the rest goes into name
.{
.in = &.{ "prefix", "0123456789/" ** 10 ++ "basename" },
.out = &.{ "prefix/" ++ "0123456789/" ** 9 ++ "0123456789", "basename" },
},
.{
.in = &.{ "prefix", "0123456789/" ** 15 ++ "basename" },
.out = &.{ "prefix/" ++ "0123456789/" ** 12 ++ "0123456789", "0123456789/0123456789/basename" },
},
.{
.in = &.{ "prefix", "0123456789/" ** 21 ++ "basename" },
.out = &.{ "prefix/" ++ "0123456789/" ** 12 ++ "0123456789", "0123456789/" ** 8 ++ "basename" },
},
.{
.in = &.{ "", "012345678/" ** 10 ++ "foo" },
.out = &.{ "012345678/" ** 9 ++ "012345678", "foo" },
},
};
for (cases) |case| {
var header = Header.init(.regular);
try header.setPath(case.in[0], case.in[1]);
try testing.expectEqualStrings(case.out[0], std.mem.sliceTo(&header.prefix, 0));
try testing.expectEqualStrings(case.out[1], std.mem.sliceTo(&header.name, 0));
}
const error_cases = [_]struct {
in: []const []const u8,
}{
// basename can't fit into name (106 characters)
.{ .in = &.{ "zig", "test/cases/compile_errors/regression_test_2980_base_type_u32_is_not_type_checked_properly_when_assigning_a_value_within_a_struct.zig" } },
// cant fit into 255 + sep
.{ .in = &.{ "prefix", "0123456789/" ** 22 ++ "basename" } },
// can fit but sub_path can't be split (there is no separator)
.{ .in = &.{ "prefix", "0123456789" ** 10 ++ "a" } },
.{ .in = &.{ "prefix", "0123456789" ** 14 ++ "basename" } },
};
for (error_cases) |case| {
var header = Header.init(.regular);
try testing.expectError(
error.NameTooLong,
header.setPath(case.in[0], case.in[1]),
);
}
}
};
test {
_ = Header;
}
test "write files" {
const files = [_]struct {
path: []const u8,
content: []const u8,
}{
.{ .path = "foo", .content = "bar" },
.{ .path = "a12345678/" ** 10 ++ "foo", .content = "a" ** 511 },
.{ .path = "b12345678/" ** 24 ++ "foo", .content = "b" ** 512 },
.{ .path = "c12345678/" ** 25 ++ "foo", .content = "c" ** 513 },
.{ .path = "d12345678/" ** 51 ++ "foo", .content = "d" ** 1025 },
.{ .path = "e123456789" ** 11, .content = "e" },
};
var file_name_buffer: [std.fs.max_path_bytes]u8 = undefined;
var link_name_buffer: [std.fs.max_path_bytes]u8 = undefined;
// with root
{
const root = "root";
var output: std.Io.Writer.Allocating = .init(testing.allocator);
var w: Writer = .{ .underlying_writer = &output.writer };
defer output.deinit();
try w.setRoot(root);
for (files) |file|
try w.writeFileBytes(file.path, file.content, .{});
var input: std.Io.Reader = .fixed(output.getWritten());
var it: std.tar.Iterator = .init(&input, .{
.file_name_buffer = &file_name_buffer,
.link_name_buffer = &link_name_buffer,
});
// first entry is directory with prefix
{
const actual = (try it.next()).?;
try testing.expectEqualStrings(root, actual.name);
try testing.expectEqual(std.tar.FileKind.directory, actual.kind);
}
var i: usize = 0;
while (try it.next()) |actual| {
defer i += 1;
const expected = files[i];
try testing.expectEqualStrings(root, actual.name[0..root.len]);
try testing.expectEqual('/', actual.name[root.len..][0]);
try testing.expectEqualStrings(expected.path, actual.name[root.len + 1 ..]);
var content: std.Io.Writer.Allocating = .init(testing.allocator);
defer content.deinit();
try it.streamRemaining(actual, &content.writer);
try testing.expectEqualSlices(u8, expected.content, content.getWritten());
}
}
// without root
{
var output: std.Io.Writer.Allocating = .init(testing.allocator);
var w: Writer = .{ .underlying_writer = &output.writer };
defer output.deinit();
for (files) |file| {
var content: std.Io.Reader = .fixed(file.content);
try w.writeFileStream(file.path, file.content.len, &content, .{});
}
var input: std.Io.Reader = .fixed(output.getWritten());
var it: std.tar.Iterator = .init(&input, .{
.file_name_buffer = &file_name_buffer,
.link_name_buffer = &link_name_buffer,
});
var i: usize = 0;
while (try it.next()) |actual| {
defer i += 1;
const expected = files[i];
try testing.expectEqualStrings(expected.path, actual.name);
var content: std.Io.Writer.Allocating = .init(testing.allocator);
defer content.deinit();
try it.streamRemaining(actual, &content.writer);
try testing.expectEqualSlices(u8, expected.content, content.getWritten());
}
try w.finishPedantically();
}
}
+173 -177
View File
@@ -18,31 +18,72 @@ const Case = struct {
err: ?anyerror = null, // parsing should fail with this error
};
const cases = [_]Case{
.{
.data = @embedFile("testdata/gnu.tar"),
.files = &[_]Case.File{
.{
.name = "small.txt",
.size = 5,
.mode = 0o640,
},
.{
.name = "small2.txt",
.size = 11,
.mode = 0o640,
},
const gnu_case: Case = .{
.data = @embedFile("testdata/gnu.tar"),
.files = &[_]Case.File{
.{
.name = "small.txt",
.size = 5,
.mode = 0o640,
},
.chksums = &[_][]const u8{
"e38b27eaccb4391bdec553a7f3ae6b2f",
"c65bd2e50a56a2138bf1716f2fd56fe9",
.{
.name = "small2.txt",
.size = 11,
.mode = 0o640,
},
},
.{
.chksums = &[_][]const u8{
"e38b27eaccb4391bdec553a7f3ae6b2f",
"c65bd2e50a56a2138bf1716f2fd56fe9",
},
};
const gnu_multi_headers_case: Case = .{
.data = @embedFile("testdata/gnu-multi-hdrs.tar"),
.files = &[_]Case.File{
.{
.name = "GNU2/GNU2/long-path-name",
.link_name = "GNU4/GNU4/long-linkpath-name",
.kind = .sym_link,
},
},
};
const trailing_slash_case: Case = .{
.data = @embedFile("testdata/trailing-slash.tar"),
.files = &[_]Case.File{
.{
.name = "123456789/" ** 30,
.kind = .directory,
},
},
};
const writer_big_long_case: Case = .{
// Size in gnu extended format, and name in pax attribute.
.data = @embedFile("testdata/writer-big-long.tar"),
.files = &[_]Case.File{
.{
.name = "longname/" ** 15 ++ "16gig.txt",
.size = 16 * 1024 * 1024 * 1024,
.mode = 0o644,
.truncated = true,
},
},
};
const fuzz1_case: Case = .{
.data = @embedFile("testdata/fuzz1.tar"),
.err = error.TarInsufficientBuffer,
};
test "run test cases" {
try testCase(gnu_case);
try testCase(.{
.data = @embedFile("testdata/sparse-formats.tar"),
.err = error.TarUnsupportedHeader,
},
.{
});
try testCase(.{
.data = @embedFile("testdata/star.tar"),
.files = &[_]Case.File{
.{
@@ -60,8 +101,8 @@ const cases = [_]Case{
"e38b27eaccb4391bdec553a7f3ae6b2f",
"c65bd2e50a56a2138bf1716f2fd56fe9",
},
},
.{
});
try testCase(.{
.data = @embedFile("testdata/v7.tar"),
.files = &[_]Case.File{
.{
@@ -79,8 +120,8 @@ const cases = [_]Case{
"e38b27eaccb4391bdec553a7f3ae6b2f",
"c65bd2e50a56a2138bf1716f2fd56fe9",
},
},
.{
});
try testCase(.{
.data = @embedFile("testdata/pax.tar"),
.files = &[_]Case.File{
.{
@@ -99,13 +140,13 @@ const cases = [_]Case{
.chksums = &[_][]const u8{
"3c382e8f5b6631aa2db52643912ffd4a",
},
},
.{
});
try testCase(.{
// pax attribute don't end with \n
.data = @embedFile("testdata/pax-bad-hdr-file.tar"),
.err = error.PaxInvalidAttributeEnd,
},
.{
});
try testCase(.{
// size is in pax attribute
.data = @embedFile("testdata/pax-pos-size-file.tar"),
.files = &[_]Case.File{
@@ -119,8 +160,8 @@ const cases = [_]Case{
.chksums = &[_][]const u8{
"0afb597b283fe61b5d4879669a350556",
},
},
.{
});
try testCase(.{
// has pax records which we are not interested in
.data = @embedFile("testdata/pax-records.tar"),
.files = &[_]Case.File{
@@ -128,8 +169,8 @@ const cases = [_]Case{
.name = "file",
},
},
},
.{
});
try testCase(.{
// has global records which we are ignoring
.data = @embedFile("testdata/pax-global-records.tar"),
.files = &[_]Case.File{
@@ -146,8 +187,8 @@ const cases = [_]Case{
.name = "file4",
},
},
},
.{
});
try testCase(.{
.data = @embedFile("testdata/nil-uid.tar"),
.files = &[_]Case.File{
.{
@@ -160,8 +201,8 @@ const cases = [_]Case{
.chksums = &[_][]const u8{
"08d504674115e77a67244beac19668f5",
},
},
.{
});
try testCase(.{
// has xattrs and pax records which we are ignoring
.data = @embedFile("testdata/xattrs.tar"),
.files = &[_]Case.File{
@@ -182,23 +223,14 @@ const cases = [_]Case{
"e38b27eaccb4391bdec553a7f3ae6b2f",
"c65bd2e50a56a2138bf1716f2fd56fe9",
},
},
.{
.data = @embedFile("testdata/gnu-multi-hdrs.tar"),
.files = &[_]Case.File{
.{
.name = "GNU2/GNU2/long-path-name",
.link_name = "GNU4/GNU4/long-linkpath-name",
.kind = .sym_link,
},
},
},
.{
});
try testCase(gnu_multi_headers_case);
try testCase(.{
// has gnu type D (directory) and S (sparse) blocks
.data = @embedFile("testdata/gnu-incremental.tar"),
.err = error.TarUnsupportedHeader,
},
.{
});
try testCase(.{
// should use values only from last pax header
.data = @embedFile("testdata/pax-multi-hdrs.tar"),
.files = &[_]Case.File{
@@ -208,8 +240,8 @@ const cases = [_]Case{
.kind = .sym_link,
},
},
},
.{
});
try testCase(.{
.data = @embedFile("testdata/gnu-long-nul.tar"),
.files = &[_]Case.File{
.{
@@ -217,8 +249,8 @@ const cases = [_]Case{
.mode = 0o644,
},
},
},
.{
});
try testCase(.{
.data = @embedFile("testdata/gnu-utf8.tar"),
.files = &[_]Case.File{
.{
@@ -226,8 +258,8 @@ const cases = [_]Case{
.mode = 0o644,
},
},
},
.{
});
try testCase(.{
.data = @embedFile("testdata/gnu-not-utf8.tar"),
.files = &[_]Case.File{
.{
@@ -235,33 +267,33 @@ const cases = [_]Case{
.mode = 0o644,
},
},
},
.{
});
try testCase(.{
// null in pax key
.data = @embedFile("testdata/pax-nul-xattrs.tar"),
.err = error.PaxNullInKeyword,
},
.{
});
try testCase(.{
.data = @embedFile("testdata/pax-nul-path.tar"),
.err = error.PaxNullInValue,
},
.{
});
try testCase(.{
.data = @embedFile("testdata/neg-size.tar"),
.err = error.TarHeader,
},
.{
});
try testCase(.{
.data = @embedFile("testdata/issue10968.tar"),
.err = error.TarHeader,
},
.{
});
try testCase(.{
.data = @embedFile("testdata/issue11169.tar"),
.err = error.TarHeader,
},
.{
});
try testCase(.{
.data = @embedFile("testdata/issue12435.tar"),
.err = error.TarHeaderChksum,
},
.{
});
try testCase(.{
// has magic with space at end instead of null
.data = @embedFile("testdata/invalid-go17.tar"),
.files = &[_]Case.File{
@@ -269,8 +301,8 @@ const cases = [_]Case{
.name = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/foo",
},
},
},
.{
});
try testCase(.{
.data = @embedFile("testdata/ustar-file-devs.tar"),
.files = &[_]Case.File{
.{
@@ -278,17 +310,9 @@ const cases = [_]Case{
.mode = 0o644,
},
},
},
.{
.data = @embedFile("testdata/trailing-slash.tar"),
.files = &[_]Case.File{
.{
.name = "123456789/" ** 30,
.kind = .directory,
},
},
},
.{
});
try testCase(trailing_slash_case);
try testCase(.{
// Has size in gnu extended format. To represent size bigger than 8 GB.
.data = @embedFile("testdata/writer-big.tar"),
.files = &[_]Case.File{
@@ -299,120 +323,92 @@ const cases = [_]Case{
.mode = 0o640,
},
},
},
.{
// Size in gnu extended format, and name in pax attribute.
.data = @embedFile("testdata/writer-big-long.tar"),
.files = &[_]Case.File{
.{
.name = "longname/" ** 15 ++ "16gig.txt",
.size = 16 * 1024 * 1024 * 1024,
.mode = 0o644,
.truncated = true,
},
},
},
.{
.data = @embedFile("testdata/fuzz1.tar"),
.err = error.TarInsufficientBuffer,
},
.{
});
try testCase(writer_big_long_case);
try testCase(fuzz1_case);
try testCase(.{
.data = @embedFile("testdata/fuzz2.tar"),
.err = error.PaxSizeAttrOverflow,
},
};
});
}
// used in test to calculate file chksum
const Md5Writer = struct {
h: std.crypto.hash.Md5 = std.crypto.hash.Md5.init(.{}),
pub fn writeAll(self: *Md5Writer, buf: []const u8) !void {
self.h.update(buf);
}
pub fn writeByte(self: *Md5Writer, byte: u8) !void {
self.h.update(&[_]u8{byte});
}
pub fn chksum(self: *Md5Writer) [32]u8 {
var s = [_]u8{0} ** 16;
self.h.final(&s);
return std.fmt.bytesToHex(s, .lower);
}
};
test "run test cases" {
fn testCase(case: Case) !void {
var file_name_buffer: [std.fs.max_path_bytes]u8 = undefined;
var link_name_buffer: [std.fs.max_path_bytes]u8 = undefined;
for (cases) |case| {
var fsb = std.io.fixedBufferStream(case.data);
var iter = tar.iterator(fsb.reader(), .{
.file_name_buffer = &file_name_buffer,
.link_name_buffer = &link_name_buffer,
});
var i: usize = 0;
while (iter.next() catch |err| {
if (case.err) |e| {
try testing.expectEqual(e, err);
continue;
} else {
return err;
}
}) |actual| : (i += 1) {
const expected = case.files[i];
try testing.expectEqualStrings(expected.name, actual.name);
try testing.expectEqual(expected.size, actual.size);
try testing.expectEqual(expected.kind, actual.kind);
try testing.expectEqual(expected.mode, actual.mode);
try testing.expectEqualStrings(expected.link_name, actual.link_name);
var br: std.io.Reader = .fixed(case.data);
var it: tar.Iterator = .init(&br, .{
.file_name_buffer = &file_name_buffer,
.link_name_buffer = &link_name_buffer,
});
var i: usize = 0;
while (it.next() catch |err| {
if (case.err) |e| {
try testing.expectEqual(e, err);
return;
} else {
return err;
}
}) |actual| : (i += 1) {
const expected = case.files[i];
try testing.expectEqualStrings(expected.name, actual.name);
try testing.expectEqual(expected.size, actual.size);
try testing.expectEqual(expected.kind, actual.kind);
try testing.expectEqual(expected.mode, actual.mode);
try testing.expectEqualStrings(expected.link_name, actual.link_name);
if (case.chksums.len > i) {
var md5writer = Md5Writer{};
try actual.writeAll(&md5writer);
const chksum = md5writer.chksum();
try testing.expectEqualStrings(case.chksums[i], &chksum);
} else {
if (expected.truncated) {
iter.unread_file_bytes = 0;
}
if (case.chksums.len > i) {
var aw: std.Io.Writer.Allocating = .init(std.testing.allocator);
defer aw.deinit();
try it.streamRemaining(actual, &aw.writer);
const chksum = std.fmt.bytesToHex(std.crypto.hash.Md5.hashResult(aw.getWritten()), .lower);
try testing.expectEqualStrings(case.chksums[i], &chksum);
} else {
if (expected.truncated) {
it.unread_file_bytes = 0;
}
}
try testing.expectEqual(case.files.len, i);
}
try testing.expectEqual(case.files.len, i);
}
test "pax/gnu long names with small buffer" {
try testLongNameCase(gnu_multi_headers_case);
try testLongNameCase(trailing_slash_case);
try testLongNameCase(.{
.data = @embedFile("testdata/fuzz1.tar"),
.err = error.TarInsufficientBuffer,
});
}
fn testLongNameCase(case: Case) !void {
// should fail with insufficient buffer error
var min_file_name_buffer: [256]u8 = undefined;
var min_link_name_buffer: [100]u8 = undefined;
const long_name_cases = [_]Case{ cases[11], cases[25], cases[28] };
for (long_name_cases) |case| {
var fsb = std.io.fixedBufferStream(case.data);
var iter = tar.iterator(fsb.reader(), .{
.file_name_buffer = &min_file_name_buffer,
.link_name_buffer = &min_link_name_buffer,
});
var br: std.io.Reader = .fixed(case.data);
var iter: tar.Iterator = .init(&br, .{
.file_name_buffer = &min_file_name_buffer,
.link_name_buffer = &min_link_name_buffer,
});
var iter_err: ?anyerror = null;
while (iter.next() catch |err| brk: {
iter_err = err;
break :brk null;
}) |_| {}
var iter_err: ?anyerror = null;
while (iter.next() catch |err| brk: {
iter_err = err;
break :brk null;
}) |_| {}
try testing.expect(iter_err != null);
try testing.expectEqual(error.TarInsufficientBuffer, iter_err.?);
}
try testing.expect(iter_err != null);
try testing.expectEqual(error.TarInsufficientBuffer, iter_err.?);
}
test "insufficient buffer in Header name filed" {
var min_file_name_buffer: [9]u8 = undefined;
var min_link_name_buffer: [100]u8 = undefined;
var fsb = std.io.fixedBufferStream(cases[0].data);
var iter = tar.iterator(fsb.reader(), .{
var br: std.io.Reader = .fixed(gnu_case.data);
var iter: tar.Iterator = .init(&br, .{
.file_name_buffer = &min_file_name_buffer,
.link_name_buffer = &min_link_name_buffer,
});
@@ -466,21 +462,21 @@ test "should not overwrite existing file" {
// This ensures that file is not overwritten.
//
const data = @embedFile("testdata/overwrite_file.tar");
var fsb = std.io.fixedBufferStream(data);
var r: std.io.Reader = .fixed(data);
// Unpack with strip_components = 1 should fail
var root = std.testing.tmpDir(.{});
defer root.cleanup();
try testing.expectError(
error.PathAlreadyExists,
tar.pipeToFileSystem(root.dir, fsb.reader(), .{ .mode_mode = .ignore, .strip_components = 1 }),
tar.pipeToFileSystem(root.dir, &r, .{ .mode_mode = .ignore, .strip_components = 1 }),
);
// Unpack with strip_components = 0 should pass
fsb.reset();
r = .fixed(data);
var root2 = std.testing.tmpDir(.{});
defer root2.cleanup();
try tar.pipeToFileSystem(root2.dir, fsb.reader(), .{ .mode_mode = .ignore, .strip_components = 0 });
try tar.pipeToFileSystem(root2.dir, &r, .{ .mode_mode = .ignore, .strip_components = 0 });
}
test "case sensitivity" {
@@ -494,12 +490,12 @@ test "case sensitivity" {
// 18089/alacritty/Darkermatrix.yml
//
const data = @embedFile("testdata/18089.tar");
var fsb = std.io.fixedBufferStream(data);
var r: std.io.Reader = .fixed(data);
var root = std.testing.tmpDir(.{});
defer root.cleanup();
tar.pipeToFileSystem(root.dir, fsb.reader(), .{ .mode_mode = .ignore, .strip_components = 1 }) catch |err| {
tar.pipeToFileSystem(root.dir, &r, .{ .mode_mode = .ignore, .strip_components = 1 }) catch |err| {
// on case insensitive fs we fail on overwrite existing file
try testing.expectEqual(error.PathAlreadyExists, err);
return;
-497
View File
@@ -1,497 +0,0 @@
const std = @import("std");
const assert = std.debug.assert;
const testing = std.testing;
/// Creates tar Writer which will write tar content to the `underlying_writer`.
/// Use setRoot to nest all following entries under single root. If file don't
/// fit into posix header (name+prefix: 100+155 bytes) gnu extented header will
/// be used for long names. Options enables setting file premission mode and
/// mtime. Default is to use current time for mtime and 0o664 for file mode.
pub fn writer(underlying_writer: anytype) Writer(@TypeOf(underlying_writer)) {
return .{ .underlying_writer = underlying_writer };
}
pub fn Writer(comptime WriterType: type) type {
return struct {
const block_size = @sizeOf(Header);
const empty_block: [block_size]u8 = [_]u8{0} ** block_size;
/// Options for writing file/dir/link. If left empty 0o664 is used for
/// file mode and current time for mtime.
pub const Options = struct {
/// File system permission mode.
mode: u32 = 0,
/// File system modification time.
mtime: u64 = 0,
};
const Self = @This();
underlying_writer: WriterType,
prefix: []const u8 = "",
mtime_now: u64 = 0,
/// Sets prefix for all other write* method paths.
pub fn setRoot(self: *Self, root: []const u8) !void {
if (root.len > 0)
try self.writeDir(root, .{});
self.prefix = root;
}
/// Writes directory.
pub fn writeDir(self: *Self, sub_path: []const u8, opt: Options) !void {
try self.writeHeader(.directory, sub_path, "", 0, opt);
}
/// Writes file system file.
pub fn writeFile(self: *Self, sub_path: []const u8, file: std.fs.File) !void {
const stat = try file.stat();
const mtime: u64 = @intCast(@divFloor(stat.mtime, std.time.ns_per_s));
var header = Header{};
try self.setPath(&header, sub_path);
try header.setSize(stat.size);
try header.setMtime(mtime);
try header.write(self.underlying_writer);
try self.underlying_writer.writeFile(file);
try self.writePadding(stat.size);
}
/// Writes file reading file content from `reader`. Number of bytes in
/// reader must be equal to `size`.
pub fn writeFileStream(self: *Self, sub_path: []const u8, size: usize, reader: anytype, opt: Options) !void {
try self.writeHeader(.regular, sub_path, "", @intCast(size), opt);
var counting_reader = std.io.countingReader(reader);
var fifo = std.fifo.LinearFifo(u8, .{ .Static = 4096 }).init();
try fifo.pump(counting_reader.reader(), self.underlying_writer);
if (counting_reader.bytes_read != size) return error.WrongReaderSize;
try self.writePadding(size);
}
/// Writes file using bytes buffer `content` for size and file content.
pub fn writeFileBytes(self: *Self, sub_path: []const u8, content: []const u8, opt: Options) !void {
try self.writeHeader(.regular, sub_path, "", @intCast(content.len), opt);
try self.underlying_writer.writeAll(content);
try self.writePadding(content.len);
}
/// Writes symlink.
pub fn writeLink(self: *Self, sub_path: []const u8, link_name: []const u8, opt: Options) !void {
try self.writeHeader(.symbolic_link, sub_path, link_name, 0, opt);
}
/// Writes fs.Dir.WalkerEntry. Uses `mtime` from file system entry and
/// default for entry mode .
pub fn writeEntry(self: *Self, entry: std.fs.Dir.Walker.Entry) !void {
switch (entry.kind) {
.directory => {
try self.writeDir(entry.path, .{ .mtime = try entryMtime(entry) });
},
.file => {
var file = try entry.dir.openFile(entry.basename, .{});
defer file.close();
try self.writeFile(entry.path, file);
},
.sym_link => {
var link_name_buffer: [std.fs.max_path_bytes]u8 = undefined;
const link_name = try entry.dir.readLink(entry.basename, &link_name_buffer);
try self.writeLink(entry.path, link_name, .{ .mtime = try entryMtime(entry) });
},
else => {
return error.UnsupportedWalkerEntryKind;
},
}
}
fn writeHeader(
self: *Self,
typeflag: Header.FileType,
sub_path: []const u8,
link_name: []const u8,
size: u64,
opt: Options,
) !void {
var header = Header.init(typeflag);
try self.setPath(&header, sub_path);
try header.setSize(size);
try header.setMtime(if (opt.mtime != 0) opt.mtime else self.mtimeNow());
if (opt.mode != 0)
try header.setMode(opt.mode);
if (typeflag == .symbolic_link)
header.setLinkname(link_name) catch |err| switch (err) {
error.NameTooLong => try self.writeExtendedHeader(.gnu_long_link, &.{link_name}),
else => return err,
};
try header.write(self.underlying_writer);
}
fn mtimeNow(self: *Self) u64 {
if (self.mtime_now == 0)
self.mtime_now = @intCast(std.time.timestamp());
return self.mtime_now;
}
fn entryMtime(entry: std.fs.Dir.Walker.Entry) !u64 {
const stat = try entry.dir.statFile(entry.basename);
return @intCast(@divFloor(stat.mtime, std.time.ns_per_s));
}
/// Writes path in posix header, if don't fit (in name+prefix; 100+155
/// bytes) writes it in gnu extended header.
fn setPath(self: *Self, header: *Header, sub_path: []const u8) !void {
header.setPath(self.prefix, sub_path) catch |err| switch (err) {
error.NameTooLong => {
// write extended header
const buffers: []const []const u8 = if (self.prefix.len == 0)
&.{sub_path}
else
&.{ self.prefix, "/", sub_path };
try self.writeExtendedHeader(.gnu_long_name, buffers);
},
else => return err,
};
}
/// Writes gnu extended header: gnu_long_name or gnu_long_link.
fn writeExtendedHeader(self: *Self, typeflag: Header.FileType, buffers: []const []const u8) !void {
var len: usize = 0;
for (buffers) |buf|
len += buf.len;
var header = Header.init(typeflag);
try header.setSize(len);
try header.write(self.underlying_writer);
for (buffers) |buf|
try self.underlying_writer.writeAll(buf);
try self.writePadding(len);
}
fn writePadding(self: *Self, bytes: u64) !void {
const pos: usize = @intCast(bytes % block_size);
if (pos == 0) return;
try self.underlying_writer.writeAll(empty_block[pos..]);
}
/// Tar should finish with two zero blocks, but 'reasonable system must
/// not assume that such a block exists when reading an archive' (from
/// reference). In practice it is safe to skip this finish.
pub fn finish(self: *Self) !void {
try self.underlying_writer.writeAll(&empty_block);
try self.underlying_writer.writeAll(&empty_block);
}
};
}
/// A struct that is exactly 512 bytes and matches tar file format. This is
/// intended to be used for outputting tar files; for parsing there is
/// `std.tar.Header`.
const Header = extern struct {
// This struct was originally copied from
// https://github.com/mattnite/tar/blob/main/src/main.zig which is MIT
// licensed.
//
// The name, linkname, magic, uname, and gname are null-terminated character
// strings. All other fields are zero-filled octal numbers in ASCII. Each
// numeric field of width w contains w minus 1 digits, and a null.
// Reference: https://www.gnu.org/software/tar/manual/html_node/Standard.html
// POSIX header: byte offset
name: [100]u8 = [_]u8{0} ** 100, // 0
mode: [7:0]u8 = default_mode.file, // 100
uid: [7:0]u8 = [_:0]u8{0} ** 7, // unused 108
gid: [7:0]u8 = [_:0]u8{0} ** 7, // unused 116
size: [11:0]u8 = [_:0]u8{'0'} ** 11, // 124
mtime: [11:0]u8 = [_:0]u8{'0'} ** 11, // 136
checksum: [7:0]u8 = [_:0]u8{' '} ** 7, // 148
typeflag: FileType = .regular, // 156
linkname: [100]u8 = [_]u8{0} ** 100, // 157
magic: [6]u8 = [_]u8{ 'u', 's', 't', 'a', 'r', 0 }, // 257
version: [2]u8 = [_]u8{ '0', '0' }, // 263
uname: [32]u8 = [_]u8{0} ** 32, // unused 265
gname: [32]u8 = [_]u8{0} ** 32, // unused 297
devmajor: [7:0]u8 = [_:0]u8{0} ** 7, // unused 329
devminor: [7:0]u8 = [_:0]u8{0} ** 7, // unused 337
prefix: [155]u8 = [_]u8{0} ** 155, // 345
pad: [12]u8 = [_]u8{0} ** 12, // unused 500
pub const FileType = enum(u8) {
regular = '0',
symbolic_link = '2',
directory = '5',
gnu_long_name = 'L',
gnu_long_link = 'K',
};
const default_mode = struct {
const file = [_:0]u8{ '0', '0', '0', '0', '6', '6', '4' }; // 0o664
const dir = [_:0]u8{ '0', '0', '0', '0', '7', '7', '5' }; // 0o775
const sym_link = [_:0]u8{ '0', '0', '0', '0', '7', '7', '7' }; // 0o777
const other = [_:0]u8{ '0', '0', '0', '0', '0', '0', '0' }; // 0o000
};
pub fn init(typeflag: FileType) Header {
return .{
.typeflag = typeflag,
.mode = switch (typeflag) {
.directory => default_mode.dir,
.symbolic_link => default_mode.sym_link,
.regular => default_mode.file,
else => default_mode.other,
},
};
}
pub fn setSize(self: *Header, size: u64) !void {
try octal(&self.size, size);
}
fn octal(buf: []u8, value: u64) !void {
var remainder: u64 = value;
var pos: usize = buf.len;
while (remainder > 0 and pos > 0) {
pos -= 1;
const c: u8 = @as(u8, @intCast(remainder % 8)) + '0';
buf[pos] = c;
remainder /= 8;
if (pos == 0 and remainder > 0) return error.OctalOverflow;
}
}
pub fn setMode(self: *Header, mode: u32) !void {
try octal(&self.mode, mode);
}
// Integer number of seconds since January 1, 1970, 00:00 Coordinated Universal Time.
// mtime == 0 will use current time
pub fn setMtime(self: *Header, mtime: u64) !void {
try octal(&self.mtime, mtime);
}
pub fn updateChecksum(self: *Header) !void {
var checksum: usize = ' '; // other 7 self.checksum bytes are initialized to ' '
for (std.mem.asBytes(self)) |val|
checksum += val;
try octal(&self.checksum, checksum);
}
pub fn write(self: *Header, output_writer: anytype) !void {
try self.updateChecksum();
try output_writer.writeAll(std.mem.asBytes(self));
}
pub fn setLinkname(self: *Header, link: []const u8) !void {
if (link.len > self.linkname.len) return error.NameTooLong;
@memcpy(self.linkname[0..link.len], link);
}
pub fn setPath(self: *Header, prefix: []const u8, sub_path: []const u8) !void {
const max_prefix = self.prefix.len;
const max_name = self.name.len;
const sep = std.fs.path.sep_posix;
if (prefix.len + sub_path.len > max_name + max_prefix or prefix.len > max_prefix)
return error.NameTooLong;
// both fit into name
if (prefix.len > 0 and prefix.len + sub_path.len < max_name) {
@memcpy(self.name[0..prefix.len], prefix);
self.name[prefix.len] = sep;
@memcpy(self.name[prefix.len + 1 ..][0..sub_path.len], sub_path);
return;
}
// sub_path fits into name
// there is no prefix or prefix fits into prefix
if (sub_path.len <= max_name) {
@memcpy(self.name[0..sub_path.len], sub_path);
@memcpy(self.prefix[0..prefix.len], prefix);
return;
}
if (prefix.len > 0) {
@memcpy(self.prefix[0..prefix.len], prefix);
self.prefix[prefix.len] = sep;
}
const prefix_pos = if (prefix.len > 0) prefix.len + 1 else 0;
// add as much to prefix as you can, must split at /
const prefix_remaining = max_prefix - prefix_pos;
if (std.mem.lastIndexOf(u8, sub_path[0..@min(prefix_remaining, sub_path.len)], &.{'/'})) |sep_pos| {
@memcpy(self.prefix[prefix_pos..][0..sep_pos], sub_path[0..sep_pos]);
if ((sub_path.len - sep_pos - 1) > max_name) return error.NameTooLong;
@memcpy(self.name[0..][0 .. sub_path.len - sep_pos - 1], sub_path[sep_pos + 1 ..]);
return;
}
return error.NameTooLong;
}
comptime {
assert(@sizeOf(Header) == 512);
}
test setPath {
const cases = [_]struct {
in: []const []const u8,
out: []const []const u8,
}{
.{
.in = &.{ "", "123456789" },
.out = &.{ "", "123456789" },
},
// can fit into name
.{
.in = &.{ "prefix", "sub_path" },
.out = &.{ "", "prefix/sub_path" },
},
// no more both fits into name
.{
.in = &.{ "prefix", "0123456789/" ** 8 ++ "basename" },
.out = &.{ "prefix", "0123456789/" ** 8 ++ "basename" },
},
// put as much as you can into prefix the rest goes into name
.{
.in = &.{ "prefix", "0123456789/" ** 10 ++ "basename" },
.out = &.{ "prefix/" ++ "0123456789/" ** 9 ++ "0123456789", "basename" },
},
.{
.in = &.{ "prefix", "0123456789/" ** 15 ++ "basename" },
.out = &.{ "prefix/" ++ "0123456789/" ** 12 ++ "0123456789", "0123456789/0123456789/basename" },
},
.{
.in = &.{ "prefix", "0123456789/" ** 21 ++ "basename" },
.out = &.{ "prefix/" ++ "0123456789/" ** 12 ++ "0123456789", "0123456789/" ** 8 ++ "basename" },
},
.{
.in = &.{ "", "012345678/" ** 10 ++ "foo" },
.out = &.{ "012345678/" ** 9 ++ "012345678", "foo" },
},
};
for (cases) |case| {
var header = Header.init(.regular);
try header.setPath(case.in[0], case.in[1]);
try testing.expectEqualStrings(case.out[0], str(&header.prefix));
try testing.expectEqualStrings(case.out[1], str(&header.name));
}
const error_cases = [_]struct {
in: []const []const u8,
}{
// basename can't fit into name (106 characters)
.{ .in = &.{ "zig", "test/cases/compile_errors/regression_test_2980_base_type_u32_is_not_type_checked_properly_when_assigning_a_value_within_a_struct.zig" } },
// cant fit into 255 + sep
.{ .in = &.{ "prefix", "0123456789/" ** 22 ++ "basename" } },
// can fit but sub_path can't be split (there is no separator)
.{ .in = &.{ "prefix", "0123456789" ** 10 ++ "a" } },
.{ .in = &.{ "prefix", "0123456789" ** 14 ++ "basename" } },
};
for (error_cases) |case| {
var header = Header.init(.regular);
try testing.expectError(
error.NameTooLong,
header.setPath(case.in[0], case.in[1]),
);
}
}
// Breaks string on first null character.
fn str(s: []const u8) []const u8 {
for (s, 0..) |c, i| {
if (c == 0) return s[0..i];
}
return s;
}
};
test {
_ = Header;
}
test "write files" {
const files = [_]struct {
path: []const u8,
content: []const u8,
}{
.{ .path = "foo", .content = "bar" },
.{ .path = "a12345678/" ** 10 ++ "foo", .content = "a" ** 511 },
.{ .path = "b12345678/" ** 24 ++ "foo", .content = "b" ** 512 },
.{ .path = "c12345678/" ** 25 ++ "foo", .content = "c" ** 513 },
.{ .path = "d12345678/" ** 51 ++ "foo", .content = "d" ** 1025 },
.{ .path = "e123456789" ** 11, .content = "e" },
};
var file_name_buffer: [std.fs.max_path_bytes]u8 = undefined;
var link_name_buffer: [std.fs.max_path_bytes]u8 = undefined;
// with root
{
const root = "root";
var output = std.ArrayList(u8).init(testing.allocator);
defer output.deinit();
var wrt = writer(output.writer());
try wrt.setRoot(root);
for (files) |file|
try wrt.writeFileBytes(file.path, file.content, .{});
var input = std.io.fixedBufferStream(output.items);
var iter = std.tar.iterator(
input.reader(),
.{ .file_name_buffer = &file_name_buffer, .link_name_buffer = &link_name_buffer },
);
// first entry is directory with prefix
{
const actual = (try iter.next()).?;
try testing.expectEqualStrings(root, actual.name);
try testing.expectEqual(std.tar.FileKind.directory, actual.kind);
}
var i: usize = 0;
while (try iter.next()) |actual| {
defer i += 1;
const expected = files[i];
try testing.expectEqualStrings(root, actual.name[0..root.len]);
try testing.expectEqual('/', actual.name[root.len..][0]);
try testing.expectEqualStrings(expected.path, actual.name[root.len + 1 ..]);
var content = std.ArrayList(u8).init(testing.allocator);
defer content.deinit();
try actual.writeAll(content.writer());
try testing.expectEqualSlices(u8, expected.content, content.items);
}
}
// without root
{
var output = std.ArrayList(u8).init(testing.allocator);
defer output.deinit();
var wrt = writer(output.writer());
for (files) |file| {
var content = std.io.fixedBufferStream(file.content);
try wrt.writeFileStream(file.path, file.content.len, content.reader(), .{});
}
var input = std.io.fixedBufferStream(output.items);
var iter = std.tar.iterator(
input.reader(),
.{ .file_name_buffer = &file_name_buffer, .link_name_buffer = &link_name_buffer },
);
var i: usize = 0;
while (try iter.next()) |actual| {
defer i += 1;
const expected = files[i];
try testing.expectEqualStrings(expected.path, actual.name);
var content = std.ArrayList(u8).init(testing.allocator);
defer content.deinit();
try actual.writeAll(content.writer());
try testing.expectEqualSlices(u8, expected.content, content.items);
}
try wrt.finish();
}
}
+1
View File
@@ -33,6 +33,7 @@ pub var log_level = std.log.Level.warn;
// Disable printing in tests for simple backends.
pub const backend_can_print = switch (builtin.zig_backend) {
.stage2_aarch64,
.stage2_powerpc,
.stage2_riscv64,
.stage2_spirv,
+30 -9
View File
@@ -321,6 +321,27 @@ pub const BuildId = union(enum) {
try std.testing.expectError(error.InvalidCharacter, parse("0xfoobbb"));
try std.testing.expectError(error.InvalidBuildIdStyle, parse("yaddaxxx"));
}
pub fn format(id: BuildId, writer: *std.io.Writer) std.io.Writer.Error!void {
switch (id) {
.none, .fast, .uuid, .sha1, .md5 => {
try writer.writeAll(@tagName(id));
},
.hexstring => |hs| {
try writer.print("0x{x}", .{hs.toSlice()});
},
}
}
test format {
try std.testing.expectFmt("none", "{f}", .{@as(BuildId, .none)});
try std.testing.expectFmt("fast", "{f}", .{@as(BuildId, .fast)});
try std.testing.expectFmt("uuid", "{f}", .{@as(BuildId, .uuid)});
try std.testing.expectFmt("sha1", "{f}", .{@as(BuildId, .sha1)});
try std.testing.expectFmt("md5", "{f}", .{@as(BuildId, .md5)});
try std.testing.expectFmt("0x", "{f}", .{BuildId.initHexString("")});
try std.testing.expectFmt("0x1234cdef", "{f}", .{BuildId.initHexString("\x12\x34\xcd\xef")});
}
};
pub const LtoMode = enum { none, full, thin };
@@ -364,23 +385,23 @@ pub fn serializeCpuAlloc(ally: Allocator, cpu: std.Target.Cpu) Allocator.Error![
/// Return a Formatter for a Zig identifier, escaping it with `@""` syntax if needed.
///
/// See also `fmtIdFlags`.
pub fn fmtId(bytes: []const u8) std.fmt.Formatter(FormatId, FormatId.render) {
return .{ .data = .{ .bytes = bytes, .flags = .{} } };
pub fn fmtId(bytes: []const u8) FormatId {
return .{ .bytes = bytes, .flags = .{} };
}
/// Return a Formatter for a Zig identifier, escaping it with `@""` syntax if needed.
///
/// See also `fmtId`.
pub fn fmtIdFlags(bytes: []const u8, flags: FormatId.Flags) std.fmt.Formatter(FormatId, FormatId.render) {
return .{ .data = .{ .bytes = bytes, .flags = flags } };
pub fn fmtIdFlags(bytes: []const u8, flags: FormatId.Flags) FormatId {
return .{ .bytes = bytes, .flags = flags };
}
pub fn fmtIdPU(bytes: []const u8) std.fmt.Formatter(FormatId, FormatId.render) {
return .{ .data = .{ .bytes = bytes, .flags = .{ .allow_primitive = true, .allow_underscore = true } } };
pub fn fmtIdPU(bytes: []const u8) FormatId {
return .{ .bytes = bytes, .flags = .{ .allow_primitive = true, .allow_underscore = true } };
}
pub fn fmtIdP(bytes: []const u8) std.fmt.Formatter(FormatId, FormatId.render) {
return .{ .data = .{ .bytes = bytes, .flags = .{ .allow_primitive = true } } };
pub fn fmtIdP(bytes: []const u8) FormatId {
return .{ .bytes = bytes, .flags = .{ .allow_primitive = true } };
}
test fmtId {
@@ -426,7 +447,7 @@ pub const FormatId = struct {
};
/// Print the string as a Zig identifier, escaping it with `@""` syntax if needed.
fn render(ctx: FormatId, writer: *Writer) Writer.Error!void {
pub fn format(ctx: FormatId, writer: *Writer) Writer.Error!void {
const bytes = ctx.bytes;
if (isValidId(bytes) and
(ctx.flags.allow_primitive or !std.zig.isPrimitive(bytes)) and
+2 -2
View File
@@ -342,9 +342,9 @@ pub fn updateFileOnDisk(file: *File, comp: *Compilation) !void {
}
// `make_path` matters because the dir hasn't actually been created yet.
var af = try root_dir.atomicFile(sub_path, .{ .make_path = true });
var af = try root_dir.atomicFile(sub_path, .{ .make_path = true, .write_buffer = &.{} });
defer af.deinit();
try af.file.writeAll(file.source.?);
try af.file_writer.interface.writeAll(file.source.?);
af.finish() catch |err| switch (err) {
error.AccessDenied => switch (builtin.os.tag) {
.windows => {
+164 -147
View File
@@ -1816,10 +1816,12 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil
if (options.skip_linker_dependencies) break :s .none;
const want = options.want_compiler_rt orelse is_exe_or_dyn_lib;
if (!want) break :s .none;
if (have_zcu) {
if (have_zcu and target_util.canBuildLibCompilerRt(target, use_llvm, build_options.have_llvm and use_llvm)) {
if (output_mode == .Obj) break :s .zcu;
if (target.ofmt == .coff and target_util.zigBackend(target, use_llvm) == .stage2_x86_64)
break :s if (is_exe_or_dyn_lib) .dyn_lib else .zcu;
if (switch (target_util.zigBackend(target, use_llvm)) {
else => false,
.stage2_aarch64, .stage2_x86_64 => target.ofmt == .coff,
}) break :s if (is_exe_or_dyn_lib) .dyn_lib else .zcu;
}
if (is_exe_or_dyn_lib) break :s .lib;
break :s .obj;
@@ -1850,11 +1852,11 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil
// approach, since the ubsan runtime uses quite a lot of the standard library
// and this reduces unnecessary bloat.
const ubsan_rt_strat: RtStrat = s: {
const can_build_ubsan_rt = target_util.canBuildLibUbsanRt(target);
const can_build_ubsan_rt = target_util.canBuildLibUbsanRt(target, use_llvm, build_options.have_llvm);
const want_ubsan_rt = options.want_ubsan_rt orelse (can_build_ubsan_rt and any_sanitize_c == .full and is_exe_or_dyn_lib);
if (!want_ubsan_rt) break :s .none;
if (options.skip_linker_dependencies) break :s .none;
if (have_zcu) break :s .zcu;
if (have_zcu and target_util.canBuildLibUbsanRt(target, use_llvm, build_options.have_llvm and use_llvm)) break :s .zcu;
if (is_exe_or_dyn_lib) break :s .lib;
break :s .obj;
};
@@ -3382,7 +3384,7 @@ pub fn saveState(comp: *Compilation) !void {
const gpa = comp.gpa;
var bufs = std.ArrayList(std.posix.iovec_const).init(gpa);
var bufs = std.ArrayList([]const u8).init(gpa);
defer bufs.deinit();
var pt_headers = std.ArrayList(Header.PerThread).init(gpa);
@@ -3421,50 +3423,50 @@ pub fn saveState(comp: *Compilation) !void {
try bufs.ensureTotalCapacityPrecise(14 + 8 * pt_headers.items.len);
addBuf(&bufs, mem.asBytes(&header));
addBuf(&bufs, mem.sliceAsBytes(pt_headers.items));
addBuf(&bufs, @ptrCast(pt_headers.items));
addBuf(&bufs, mem.sliceAsBytes(ip.src_hash_deps.keys()));
addBuf(&bufs, mem.sliceAsBytes(ip.src_hash_deps.values()));
addBuf(&bufs, mem.sliceAsBytes(ip.nav_val_deps.keys()));
addBuf(&bufs, mem.sliceAsBytes(ip.nav_val_deps.values()));
addBuf(&bufs, mem.sliceAsBytes(ip.nav_ty_deps.keys()));
addBuf(&bufs, mem.sliceAsBytes(ip.nav_ty_deps.values()));
addBuf(&bufs, mem.sliceAsBytes(ip.interned_deps.keys()));
addBuf(&bufs, mem.sliceAsBytes(ip.interned_deps.values()));
addBuf(&bufs, mem.sliceAsBytes(ip.zon_file_deps.keys()));
addBuf(&bufs, mem.sliceAsBytes(ip.zon_file_deps.values()));
addBuf(&bufs, mem.sliceAsBytes(ip.embed_file_deps.keys()));
addBuf(&bufs, mem.sliceAsBytes(ip.embed_file_deps.values()));
addBuf(&bufs, mem.sliceAsBytes(ip.namespace_deps.keys()));
addBuf(&bufs, mem.sliceAsBytes(ip.namespace_deps.values()));
addBuf(&bufs, mem.sliceAsBytes(ip.namespace_name_deps.keys()));
addBuf(&bufs, mem.sliceAsBytes(ip.namespace_name_deps.values()));
addBuf(&bufs, @ptrCast(ip.src_hash_deps.keys()));
addBuf(&bufs, @ptrCast(ip.src_hash_deps.values()));
addBuf(&bufs, @ptrCast(ip.nav_val_deps.keys()));
addBuf(&bufs, @ptrCast(ip.nav_val_deps.values()));
addBuf(&bufs, @ptrCast(ip.nav_ty_deps.keys()));
addBuf(&bufs, @ptrCast(ip.nav_ty_deps.values()));
addBuf(&bufs, @ptrCast(ip.interned_deps.keys()));
addBuf(&bufs, @ptrCast(ip.interned_deps.values()));
addBuf(&bufs, @ptrCast(ip.zon_file_deps.keys()));
addBuf(&bufs, @ptrCast(ip.zon_file_deps.values()));
addBuf(&bufs, @ptrCast(ip.embed_file_deps.keys()));
addBuf(&bufs, @ptrCast(ip.embed_file_deps.values()));
addBuf(&bufs, @ptrCast(ip.namespace_deps.keys()));
addBuf(&bufs, @ptrCast(ip.namespace_deps.values()));
addBuf(&bufs, @ptrCast(ip.namespace_name_deps.keys()));
addBuf(&bufs, @ptrCast(ip.namespace_name_deps.values()));
addBuf(&bufs, mem.sliceAsBytes(ip.first_dependency.keys()));
addBuf(&bufs, mem.sliceAsBytes(ip.first_dependency.values()));
addBuf(&bufs, mem.sliceAsBytes(ip.dep_entries.items));
addBuf(&bufs, mem.sliceAsBytes(ip.free_dep_entries.items));
addBuf(&bufs, @ptrCast(ip.first_dependency.keys()));
addBuf(&bufs, @ptrCast(ip.first_dependency.values()));
addBuf(&bufs, @ptrCast(ip.dep_entries.items));
addBuf(&bufs, @ptrCast(ip.free_dep_entries.items));
for (ip.locals, pt_headers.items) |*local, pt_header| {
if (pt_header.intern_pool.limbs_len > 0) {
addBuf(&bufs, mem.sliceAsBytes(local.shared.limbs.view().items(.@"0")[0..pt_header.intern_pool.limbs_len]));
addBuf(&bufs, @ptrCast(local.shared.limbs.view().items(.@"0")[0..pt_header.intern_pool.limbs_len]));
}
if (pt_header.intern_pool.extra_len > 0) {
addBuf(&bufs, mem.sliceAsBytes(local.shared.extra.view().items(.@"0")[0..pt_header.intern_pool.extra_len]));
addBuf(&bufs, @ptrCast(local.shared.extra.view().items(.@"0")[0..pt_header.intern_pool.extra_len]));
}
if (pt_header.intern_pool.items_len > 0) {
addBuf(&bufs, mem.sliceAsBytes(local.shared.items.view().items(.data)[0..pt_header.intern_pool.items_len]));
addBuf(&bufs, mem.sliceAsBytes(local.shared.items.view().items(.tag)[0..pt_header.intern_pool.items_len]));
addBuf(&bufs, @ptrCast(local.shared.items.view().items(.data)[0..pt_header.intern_pool.items_len]));
addBuf(&bufs, @ptrCast(local.shared.items.view().items(.tag)[0..pt_header.intern_pool.items_len]));
}
if (pt_header.intern_pool.string_bytes_len > 0) {
addBuf(&bufs, local.shared.strings.view().items(.@"0")[0..pt_header.intern_pool.string_bytes_len]);
}
if (pt_header.intern_pool.tracked_insts_len > 0) {
addBuf(&bufs, mem.sliceAsBytes(local.shared.tracked_insts.view().items(.@"0")[0..pt_header.intern_pool.tracked_insts_len]));
addBuf(&bufs, @ptrCast(local.shared.tracked_insts.view().items(.@"0")[0..pt_header.intern_pool.tracked_insts_len]));
}
if (pt_header.intern_pool.files_len > 0) {
addBuf(&bufs, mem.sliceAsBytes(local.shared.files.view().items(.bin_digest)[0..pt_header.intern_pool.files_len]));
addBuf(&bufs, mem.sliceAsBytes(local.shared.files.view().items(.root_type)[0..pt_header.intern_pool.files_len]));
addBuf(&bufs, @ptrCast(local.shared.files.view().items(.bin_digest)[0..pt_header.intern_pool.files_len]));
addBuf(&bufs, @ptrCast(local.shared.files.view().items(.root_type)[0..pt_header.intern_pool.files_len]));
}
}
@@ -3482,95 +3484,95 @@ pub fn saveState(comp: *Compilation) !void {
try bufs.ensureUnusedCapacity(85);
addBuf(&bufs, wasm.string_bytes.items);
// TODO make it well-defined memory layout
//addBuf(&bufs, mem.sliceAsBytes(wasm.objects.items));
addBuf(&bufs, mem.sliceAsBytes(wasm.func_types.keys()));
addBuf(&bufs, mem.sliceAsBytes(wasm.object_function_imports.keys()));
addBuf(&bufs, mem.sliceAsBytes(wasm.object_function_imports.values()));
addBuf(&bufs, mem.sliceAsBytes(wasm.object_functions.items));
addBuf(&bufs, mem.sliceAsBytes(wasm.object_global_imports.keys()));
addBuf(&bufs, mem.sliceAsBytes(wasm.object_global_imports.values()));
addBuf(&bufs, mem.sliceAsBytes(wasm.object_globals.items));
addBuf(&bufs, mem.sliceAsBytes(wasm.object_table_imports.keys()));
addBuf(&bufs, mem.sliceAsBytes(wasm.object_table_imports.values()));
addBuf(&bufs, mem.sliceAsBytes(wasm.object_tables.items));
addBuf(&bufs, mem.sliceAsBytes(wasm.object_memory_imports.keys()));
addBuf(&bufs, mem.sliceAsBytes(wasm.object_memory_imports.values()));
addBuf(&bufs, mem.sliceAsBytes(wasm.object_memories.items));
addBuf(&bufs, mem.sliceAsBytes(wasm.object_relocations.items(.tag)));
addBuf(&bufs, mem.sliceAsBytes(wasm.object_relocations.items(.offset)));
//addBuf(&bufs, @ptrCast(wasm.objects.items));
addBuf(&bufs, @ptrCast(wasm.func_types.keys()));
addBuf(&bufs, @ptrCast(wasm.object_function_imports.keys()));
addBuf(&bufs, @ptrCast(wasm.object_function_imports.values()));
addBuf(&bufs, @ptrCast(wasm.object_functions.items));
addBuf(&bufs, @ptrCast(wasm.object_global_imports.keys()));
addBuf(&bufs, @ptrCast(wasm.object_global_imports.values()));
addBuf(&bufs, @ptrCast(wasm.object_globals.items));
addBuf(&bufs, @ptrCast(wasm.object_table_imports.keys()));
addBuf(&bufs, @ptrCast(wasm.object_table_imports.values()));
addBuf(&bufs, @ptrCast(wasm.object_tables.items));
addBuf(&bufs, @ptrCast(wasm.object_memory_imports.keys()));
addBuf(&bufs, @ptrCast(wasm.object_memory_imports.values()));
addBuf(&bufs, @ptrCast(wasm.object_memories.items));
addBuf(&bufs, @ptrCast(wasm.object_relocations.items(.tag)));
addBuf(&bufs, @ptrCast(wasm.object_relocations.items(.offset)));
// TODO handle the union safety field
//addBuf(&bufs, mem.sliceAsBytes(wasm.object_relocations.items(.pointee)));
addBuf(&bufs, mem.sliceAsBytes(wasm.object_relocations.items(.addend)));
addBuf(&bufs, mem.sliceAsBytes(wasm.object_init_funcs.items));
addBuf(&bufs, mem.sliceAsBytes(wasm.object_data_segments.items));
addBuf(&bufs, mem.sliceAsBytes(wasm.object_datas.items));
addBuf(&bufs, mem.sliceAsBytes(wasm.object_data_imports.keys()));
addBuf(&bufs, mem.sliceAsBytes(wasm.object_data_imports.values()));
addBuf(&bufs, mem.sliceAsBytes(wasm.object_custom_segments.keys()));
addBuf(&bufs, mem.sliceAsBytes(wasm.object_custom_segments.values()));
//addBuf(&bufs, @ptrCast(wasm.object_relocations.items(.pointee)));
addBuf(&bufs, @ptrCast(wasm.object_relocations.items(.addend)));
addBuf(&bufs, @ptrCast(wasm.object_init_funcs.items));
addBuf(&bufs, @ptrCast(wasm.object_data_segments.items));
addBuf(&bufs, @ptrCast(wasm.object_datas.items));
addBuf(&bufs, @ptrCast(wasm.object_data_imports.keys()));
addBuf(&bufs, @ptrCast(wasm.object_data_imports.values()));
addBuf(&bufs, @ptrCast(wasm.object_custom_segments.keys()));
addBuf(&bufs, @ptrCast(wasm.object_custom_segments.values()));
// TODO make it well-defined memory layout
// addBuf(&bufs, mem.sliceAsBytes(wasm.object_comdats.items));
addBuf(&bufs, mem.sliceAsBytes(wasm.object_relocations_table.keys()));
addBuf(&bufs, mem.sliceAsBytes(wasm.object_relocations_table.values()));
addBuf(&bufs, mem.sliceAsBytes(wasm.object_comdat_symbols.items(.kind)));
addBuf(&bufs, mem.sliceAsBytes(wasm.object_comdat_symbols.items(.index)));
addBuf(&bufs, mem.sliceAsBytes(wasm.out_relocs.items(.tag)));
addBuf(&bufs, mem.sliceAsBytes(wasm.out_relocs.items(.offset)));
// addBuf(&bufs, @ptrCast(wasm.object_comdats.items));
addBuf(&bufs, @ptrCast(wasm.object_relocations_table.keys()));
addBuf(&bufs, @ptrCast(wasm.object_relocations_table.values()));
addBuf(&bufs, @ptrCast(wasm.object_comdat_symbols.items(.kind)));
addBuf(&bufs, @ptrCast(wasm.object_comdat_symbols.items(.index)));
addBuf(&bufs, @ptrCast(wasm.out_relocs.items(.tag)));
addBuf(&bufs, @ptrCast(wasm.out_relocs.items(.offset)));
// TODO handle the union safety field
//addBuf(&bufs, mem.sliceAsBytes(wasm.out_relocs.items(.pointee)));
addBuf(&bufs, mem.sliceAsBytes(wasm.out_relocs.items(.addend)));
addBuf(&bufs, mem.sliceAsBytes(wasm.uav_fixups.items));
addBuf(&bufs, mem.sliceAsBytes(wasm.nav_fixups.items));
addBuf(&bufs, mem.sliceAsBytes(wasm.func_table_fixups.items));
//addBuf(&bufs, @ptrCast(wasm.out_relocs.items(.pointee)));
addBuf(&bufs, @ptrCast(wasm.out_relocs.items(.addend)));
addBuf(&bufs, @ptrCast(wasm.uav_fixups.items));
addBuf(&bufs, @ptrCast(wasm.nav_fixups.items));
addBuf(&bufs, @ptrCast(wasm.func_table_fixups.items));
if (is_obj) {
addBuf(&bufs, mem.sliceAsBytes(wasm.navs_obj.keys()));
addBuf(&bufs, mem.sliceAsBytes(wasm.navs_obj.values()));
addBuf(&bufs, mem.sliceAsBytes(wasm.uavs_obj.keys()));
addBuf(&bufs, mem.sliceAsBytes(wasm.uavs_obj.values()));
addBuf(&bufs, @ptrCast(wasm.navs_obj.keys()));
addBuf(&bufs, @ptrCast(wasm.navs_obj.values()));
addBuf(&bufs, @ptrCast(wasm.uavs_obj.keys()));
addBuf(&bufs, @ptrCast(wasm.uavs_obj.values()));
} else {
addBuf(&bufs, mem.sliceAsBytes(wasm.navs_exe.keys()));
addBuf(&bufs, mem.sliceAsBytes(wasm.navs_exe.values()));
addBuf(&bufs, mem.sliceAsBytes(wasm.uavs_exe.keys()));
addBuf(&bufs, mem.sliceAsBytes(wasm.uavs_exe.values()));
addBuf(&bufs, @ptrCast(wasm.navs_exe.keys()));
addBuf(&bufs, @ptrCast(wasm.navs_exe.values()));
addBuf(&bufs, @ptrCast(wasm.uavs_exe.keys()));
addBuf(&bufs, @ptrCast(wasm.uavs_exe.values()));
}
addBuf(&bufs, mem.sliceAsBytes(wasm.overaligned_uavs.keys()));
addBuf(&bufs, mem.sliceAsBytes(wasm.overaligned_uavs.values()));
addBuf(&bufs, mem.sliceAsBytes(wasm.zcu_funcs.keys()));
addBuf(&bufs, @ptrCast(wasm.overaligned_uavs.keys()));
addBuf(&bufs, @ptrCast(wasm.overaligned_uavs.values()));
addBuf(&bufs, @ptrCast(wasm.zcu_funcs.keys()));
// TODO handle the union safety field
// addBuf(&bufs, mem.sliceAsBytes(wasm.zcu_funcs.values()));
addBuf(&bufs, mem.sliceAsBytes(wasm.nav_exports.keys()));
addBuf(&bufs, mem.sliceAsBytes(wasm.nav_exports.values()));
addBuf(&bufs, mem.sliceAsBytes(wasm.uav_exports.keys()));
addBuf(&bufs, mem.sliceAsBytes(wasm.uav_exports.values()));
addBuf(&bufs, mem.sliceAsBytes(wasm.imports.keys()));
addBuf(&bufs, mem.sliceAsBytes(wasm.missing_exports.keys()));
addBuf(&bufs, mem.sliceAsBytes(wasm.function_exports.keys()));
addBuf(&bufs, mem.sliceAsBytes(wasm.function_exports.values()));
addBuf(&bufs, mem.sliceAsBytes(wasm.hidden_function_exports.keys()));
addBuf(&bufs, mem.sliceAsBytes(wasm.hidden_function_exports.values()));
addBuf(&bufs, mem.sliceAsBytes(wasm.global_exports.items));
addBuf(&bufs, mem.sliceAsBytes(wasm.functions.keys()));
addBuf(&bufs, mem.sliceAsBytes(wasm.function_imports.keys()));
addBuf(&bufs, mem.sliceAsBytes(wasm.function_imports.values()));
addBuf(&bufs, mem.sliceAsBytes(wasm.data_imports.keys()));
addBuf(&bufs, mem.sliceAsBytes(wasm.data_imports.values()));
addBuf(&bufs, mem.sliceAsBytes(wasm.data_segments.keys()));
addBuf(&bufs, mem.sliceAsBytes(wasm.globals.keys()));
addBuf(&bufs, mem.sliceAsBytes(wasm.global_imports.keys()));
addBuf(&bufs, mem.sliceAsBytes(wasm.global_imports.values()));
addBuf(&bufs, mem.sliceAsBytes(wasm.tables.keys()));
addBuf(&bufs, mem.sliceAsBytes(wasm.table_imports.keys()));
addBuf(&bufs, mem.sliceAsBytes(wasm.table_imports.values()));
addBuf(&bufs, mem.sliceAsBytes(wasm.zcu_indirect_function_set.keys()));
addBuf(&bufs, mem.sliceAsBytes(wasm.object_indirect_function_import_set.keys()));
addBuf(&bufs, mem.sliceAsBytes(wasm.object_indirect_function_set.keys()));
addBuf(&bufs, mem.sliceAsBytes(wasm.mir_instructions.items(.tag)));
// addBuf(&bufs, @ptrCast(wasm.zcu_funcs.values()));
addBuf(&bufs, @ptrCast(wasm.nav_exports.keys()));
addBuf(&bufs, @ptrCast(wasm.nav_exports.values()));
addBuf(&bufs, @ptrCast(wasm.uav_exports.keys()));
addBuf(&bufs, @ptrCast(wasm.uav_exports.values()));
addBuf(&bufs, @ptrCast(wasm.imports.keys()));
addBuf(&bufs, @ptrCast(wasm.missing_exports.keys()));
addBuf(&bufs, @ptrCast(wasm.function_exports.keys()));
addBuf(&bufs, @ptrCast(wasm.function_exports.values()));
addBuf(&bufs, @ptrCast(wasm.hidden_function_exports.keys()));
addBuf(&bufs, @ptrCast(wasm.hidden_function_exports.values()));
addBuf(&bufs, @ptrCast(wasm.global_exports.items));
addBuf(&bufs, @ptrCast(wasm.functions.keys()));
addBuf(&bufs, @ptrCast(wasm.function_imports.keys()));
addBuf(&bufs, @ptrCast(wasm.function_imports.values()));
addBuf(&bufs, @ptrCast(wasm.data_imports.keys()));
addBuf(&bufs, @ptrCast(wasm.data_imports.values()));
addBuf(&bufs, @ptrCast(wasm.data_segments.keys()));
addBuf(&bufs, @ptrCast(wasm.globals.keys()));
addBuf(&bufs, @ptrCast(wasm.global_imports.keys()));
addBuf(&bufs, @ptrCast(wasm.global_imports.values()));
addBuf(&bufs, @ptrCast(wasm.tables.keys()));
addBuf(&bufs, @ptrCast(wasm.table_imports.keys()));
addBuf(&bufs, @ptrCast(wasm.table_imports.values()));
addBuf(&bufs, @ptrCast(wasm.zcu_indirect_function_set.keys()));
addBuf(&bufs, @ptrCast(wasm.object_indirect_function_import_set.keys()));
addBuf(&bufs, @ptrCast(wasm.object_indirect_function_set.keys()));
addBuf(&bufs, @ptrCast(wasm.mir_instructions.items(.tag)));
// TODO handle the union safety field
//addBuf(&bufs, mem.sliceAsBytes(wasm.mir_instructions.items(.data)));
addBuf(&bufs, mem.sliceAsBytes(wasm.mir_extra.items));
addBuf(&bufs, mem.sliceAsBytes(wasm.mir_locals.items));
addBuf(&bufs, mem.sliceAsBytes(wasm.tag_name_bytes.items));
addBuf(&bufs, mem.sliceAsBytes(wasm.tag_name_offs.items));
//addBuf(&bufs, @ptrCast(wasm.mir_instructions.items(.data)));
addBuf(&bufs, @ptrCast(wasm.mir_extra.items));
addBuf(&bufs, @ptrCast(wasm.mir_locals.items));
addBuf(&bufs, @ptrCast(wasm.tag_name_bytes.items));
addBuf(&bufs, @ptrCast(wasm.tag_name_offs.items));
// TODO add as header fields
// entry_resolution: FunctionImport.Resolution
@@ -3596,16 +3598,16 @@ pub fn saveState(comp: *Compilation) !void {
// Using an atomic file prevents a crash or power failure from corrupting
// the previous incremental compilation state.
var af = try lf.emit.root_dir.handle.atomicFile(basename, .{});
var write_buffer: [1024]u8 = undefined;
var af = try lf.emit.root_dir.handle.atomicFile(basename, .{ .write_buffer = &write_buffer });
defer af.deinit();
try af.file.pwritevAll(bufs.items, 0);
try af.file_writer.interface.writeVecAll(bufs.items);
try af.finish();
}
fn addBuf(list: *std.ArrayList(std.posix.iovec_const), buf: []const u8) void {
// Even when len=0, the undefined pointer might cause EFAULT.
fn addBuf(list: *std.ArrayList([]const u8), buf: []const u8) void {
if (buf.len == 0) return;
list.appendAssumeCapacity(.{ .base = buf.ptr, .len = buf.len });
list.appendAssumeCapacity(buf);
}
/// This function is temporally single-threaded.
@@ -4862,6 +4864,9 @@ fn docsCopyFallible(comp: *Compilation) anyerror!void {
};
defer tar_file.close();
var buffer: [1024]u8 = undefined;
var tar_file_writer = tar_file.writer(&buffer);
var seen_table: std.AutoArrayHashMapUnmanaged(*Package.Module, []const u8) = .empty;
defer seen_table.deinit(comp.gpa);
@@ -4871,32 +4876,45 @@ fn docsCopyFallible(comp: *Compilation) anyerror!void {
var i: usize = 0;
while (i < seen_table.count()) : (i += 1) {
const mod = seen_table.keys()[i];
try comp.docsCopyModule(mod, seen_table.values()[i], tar_file);
try comp.docsCopyModule(mod, seen_table.values()[i], &tar_file_writer);
const deps = mod.deps.values();
try seen_table.ensureUnusedCapacity(comp.gpa, deps.len);
for (deps) |dep| seen_table.putAssumeCapacity(dep, dep.fully_qualified_name);
}
tar_file_writer.end() catch |err| {
return comp.lockAndSetMiscFailure(
.docs_copy,
"unable to write '{f}/sources.tar': {t}",
.{ docs_path, err },
);
};
}
fn docsCopyModule(comp: *Compilation, module: *Package.Module, name: []const u8, tar_file: fs.File) !void {
fn docsCopyModule(
comp: *Compilation,
module: *Package.Module,
name: []const u8,
tar_file_writer: *fs.File.Writer,
) !void {
const root = module.root;
var mod_dir = d: {
const root_dir, const sub_path = root.openInfo(comp.dirs);
break :d root_dir.openDir(sub_path, .{ .iterate = true });
} catch |err| {
return comp.lockAndSetMiscFailure(.docs_copy, "unable to open directory '{f}': {s}", .{
root.fmt(comp), @errorName(err),
});
return comp.lockAndSetMiscFailure(.docs_copy, "unable to open directory '{f}': {t}", .{ root.fmt(comp), err });
};
defer mod_dir.close();
var walker = try mod_dir.walk(comp.gpa);
defer walker.deinit();
var archiver = std.tar.writer(tar_file.deprecatedWriter().any());
var archiver: std.tar.Writer = .{ .underlying_writer = &tar_file_writer.interface };
archiver.prefix = name;
var buffer: [1024]u8 = undefined;
while (try walker.next()) |entry| {
switch (entry.kind) {
.file => {
@@ -4907,14 +4925,17 @@ fn docsCopyModule(comp: *Compilation, module: *Package.Module, name: []const u8,
else => continue,
}
var file = mod_dir.openFile(entry.path, .{}) catch |err| {
return comp.lockAndSetMiscFailure(.docs_copy, "unable to open '{f}{s}': {s}", .{
root.fmt(comp), entry.path, @errorName(err),
return comp.lockAndSetMiscFailure(.docs_copy, "unable to open {f}{s}: {t}", .{
root.fmt(comp), entry.path, err,
});
};
defer file.close();
archiver.writeFile(entry.path, file) catch |err| {
return comp.lockAndSetMiscFailure(.docs_copy, "unable to archive '{f}{s}': {s}", .{
root.fmt(comp), entry.path, @errorName(err),
const stat = try file.stat();
var file_reader: fs.File.Reader = .initSize(file, &buffer, stat.size);
archiver.writeFile(entry.path, &file_reader, stat.mtime) catch |err| {
return comp.lockAndSetMiscFailure(.docs_copy, "unable to archive {f}{s}: {t}", .{
root.fmt(comp), entry.path, err,
});
};
}
@@ -4926,9 +4947,7 @@ fn workerDocsWasm(comp: *Compilation, parent_prog_node: std.Progress.Node) void
workerDocsWasmFallible(comp, prog_node) catch |err| switch (err) {
error.SubCompilationFailed => return, // error reported already
else => comp.lockAndSetMiscFailure(.docs_wasm, "unable to build autodocs: {s}", .{
@errorName(err),
}),
else => comp.lockAndSetMiscFailure(.docs_wasm, "unable to build autodocs: {t}", .{err}),
};
}
@@ -6206,19 +6225,20 @@ fn spawnZigRc(
return comp.failWin32Resource(win32_resource, "unable to spawn {s} rc: {s}", .{ argv[0], @errorName(err) });
};
var poller = std.io.poll(comp.gpa, enum { stdout }, .{
var poller = std.Io.poll(comp.gpa, enum { stdout, stderr }, .{
.stdout = child.stdout.?,
.stderr = child.stderr.?,
});
defer poller.deinit();
const stdout = poller.fifo(.stdout);
const stdout = poller.reader(.stdout);
poll: while (true) {
while (stdout.readableLength() < @sizeOf(std.zig.Server.Message.Header)) if (!try poller.poll()) break :poll;
var header: std.zig.Server.Message.Header = undefined;
assert(stdout.read(std.mem.asBytes(&header)) == @sizeOf(std.zig.Server.Message.Header));
while (stdout.readableLength() < header.bytes_len) if (!try poller.poll()) break :poll;
const body = stdout.readableSliceOfLen(header.bytes_len);
const MessageHeader = std.zig.Server.Message.Header;
while (stdout.buffered().len < @sizeOf(MessageHeader)) if (!try poller.poll()) break :poll;
const header = stdout.takeStruct(MessageHeader, .little) catch unreachable;
while (stdout.buffered().len < header.bytes_len) if (!try poller.poll()) break :poll;
const body = stdout.take(header.bytes_len) catch unreachable;
switch (header.tag) {
// We expect exactly one ErrorBundle, and if any error_bundle header is
@@ -6241,13 +6261,10 @@ fn spawnZigRc(
},
else => {}, // ignore other messages
}
stdout.discard(body.len);
}
// Just in case there's a failure that didn't send an ErrorBundle (e.g. an error return trace)
const stderr_reader = child.stderr.?.deprecatedReader();
const stderr = try stderr_reader.readAllAlloc(arena, 10 * 1024 * 1024);
const stderr = poller.reader(.stderr);
const term = child.wait() catch |err| {
return comp.failWin32Resource(win32_resource, "unable to wait for {s} rc: {s}", .{ argv[0], @errorName(err) });
@@ -6256,12 +6273,12 @@ fn spawnZigRc(
switch (term) {
.Exited => |code| {
if (code != 0) {
log.err("zig rc failed with stderr:\n{s}", .{stderr});
log.err("zig rc failed with stderr:\n{s}", .{stderr.buffered()});
return comp.failWin32Resource(win32_resource, "zig rc exited with code {d}", .{code});
}
},
else => {
log.err("zig rc terminated with stderr:\n{s}", .{stderr});
log.err("zig rc terminated with stderr:\n{s}", .{stderr.buffered()});
return comp.failWin32Resource(win32_resource, "zig rc terminated unexpectedly", .{});
},
}
+10 -4
View File
@@ -7556,12 +7556,18 @@ fn extraFuncCoerced(ip: *const InternPool, extra: Local.Extra, extra_index: u32)
fn indexToKeyBigInt(ip: *const InternPool, tid: Zcu.PerThread.Id, limb_index: u32, positive: bool) Key {
const limbs_items = ip.getLocalShared(tid).getLimbs().view().items(.@"0");
const int: Int = @bitCast(limbs_items[limb_index..][0..Int.limbs_items_len].*);
const big_int: BigIntConst = .{
.limbs = limbs_items[limb_index + Int.limbs_items_len ..][0..int.limbs_len],
.positive = positive,
};
return .{ .int = .{
.ty = int.ty,
.storage = .{ .big_int = .{
.limbs = limbs_items[limb_index + Int.limbs_items_len ..][0..int.limbs_len],
.positive = positive,
} },
.storage = if (big_int.toInt(u64)) |x|
.{ .u64 = x }
else |_| if (big_int.toInt(i64)) |x|
.{ .i64 = x }
else |_|
.{ .big_int = big_int },
} };
}
+22 -14
View File
@@ -1197,12 +1197,18 @@ fn unpackResource(
};
switch (file_type) {
.tar => return try unpackTarball(f, tmp_directory.handle, resource.reader()),
.tar => {
var adapter_buffer: [1024]u8 = undefined;
var adapter = resource.reader().adaptToNewApi(&adapter_buffer);
return unpackTarball(f, tmp_directory.handle, &adapter.new_interface);
},
.@"tar.gz" => {
const reader = resource.reader();
var br = std.io.bufferedReaderSize(std.crypto.tls.max_ciphertext_record_len, reader);
var dcp = std.compress.gzip.decompressor(br.reader());
return try unpackTarball(f, tmp_directory.handle, dcp.reader());
var adapter_buffer: [1024]u8 = undefined;
var adapter = dcp.reader().adaptToNewApi(&adapter_buffer);
return try unpackTarball(f, tmp_directory.handle, &adapter.new_interface);
},
.@"tar.xz" => {
const gpa = f.arena.child_allocator;
@@ -1215,17 +1221,19 @@ fn unpackResource(
));
};
defer dcp.deinit();
return try unpackTarball(f, tmp_directory.handle, dcp.reader());
var adapter_buffer: [1024]u8 = undefined;
var adapter = dcp.reader().adaptToNewApi(&adapter_buffer);
return try unpackTarball(f, tmp_directory.handle, &adapter.new_interface);
},
.@"tar.zst" => {
const window_size = std.compress.zstd.DecompressorOptions.default_window_buffer_len;
const window_size = std.compress.zstd.default_window_len;
const window_buffer = try f.arena.allocator().create([window_size]u8);
const reader = resource.reader();
var br = std.io.bufferedReaderSize(std.crypto.tls.max_ciphertext_record_len, reader);
var dcp = std.compress.zstd.decompressor(br.reader(), .{
.window_buffer = window_buffer,
var adapter_buffer: [std.crypto.tls.max_ciphertext_record_len]u8 = undefined;
var adapter = resource.reader().adaptToNewApi(&adapter_buffer);
var decompress: std.compress.zstd.Decompress = .init(&adapter.new_interface, window_buffer, .{
.verify_checksum = false,
});
return try unpackTarball(f, tmp_directory.handle, dcp.reader());
return try unpackTarball(f, tmp_directory.handle, &decompress.reader);
},
.git_pack => return unpackGitPack(f, tmp_directory.handle, &resource.git) catch |err| switch (err) {
error.FetchFailed => return error.FetchFailed,
@@ -1239,7 +1247,7 @@ fn unpackResource(
}
}
fn unpackTarball(f: *Fetch, out_dir: fs.Dir, reader: anytype) RunError!UnpackResult {
fn unpackTarball(f: *Fetch, out_dir: fs.Dir, reader: *std.Io.Reader) RunError!UnpackResult {
const eb = &f.error_bundle;
const arena = f.arena.allocator();
@@ -1250,10 +1258,10 @@ fn unpackTarball(f: *Fetch, out_dir: fs.Dir, reader: anytype) RunError!UnpackRes
.strip_components = 0,
.mode_mode = .ignore,
.exclude_empty_directories = true,
}) catch |err| return f.fail(f.location_tok, try eb.printString(
"unable to unpack tarball to temporary directory: {s}",
.{@errorName(err)},
));
}) catch |err| return f.fail(
f.location_tok,
try eb.printString("unable to unpack tarball to temporary directory: {t}", .{err}),
);
var res: UnpackResult = .{ .root_dir = diagnostics.root_dir };
if (diagnostics.errors.items.len > 0) {
+60 -5
View File
@@ -1281,7 +1281,7 @@ pub fn indexPack(allocator: Allocator, format: Oid.Format, pack: std.fs.File, in
}
@memset(fan_out_table[fan_out_index..], count);
var index_hashed_writer = std.compress.hashedWriter(index_writer, Oid.Hasher.init(format));
var index_hashed_writer = hashedWriter(index_writer, Oid.Hasher.init(format));
const writer = index_hashed_writer.writer();
try writer.writeAll(IndexHeader.signature);
try writer.writeInt(u32, IndexHeader.supported_version, .big);
@@ -1331,7 +1331,7 @@ fn indexPackFirstPass(
) !Oid {
var pack_buffered_reader = std.io.bufferedReader(pack.deprecatedReader());
var pack_counting_reader = std.io.countingReader(pack_buffered_reader.reader());
var pack_hashed_reader = std.compress.hashedReader(pack_counting_reader.reader(), Oid.Hasher.init(format));
var pack_hashed_reader = hashedReader(pack_counting_reader.reader(), Oid.Hasher.init(format));
const pack_reader = pack_hashed_reader.reader();
const pack_header = try PackHeader.read(pack_reader);
@@ -1339,13 +1339,13 @@ fn indexPackFirstPass(
var current_entry: u32 = 0;
while (current_entry < pack_header.total_objects) : (current_entry += 1) {
const entry_offset = pack_counting_reader.bytes_read;
var entry_crc32_reader = std.compress.hashedReader(pack_reader, std.hash.Crc32.init());
var entry_crc32_reader = hashedReader(pack_reader, std.hash.Crc32.init());
const entry_header = try EntryHeader.read(format, entry_crc32_reader.reader());
switch (entry_header) {
.commit, .tree, .blob, .tag => |object| {
var entry_decompress_stream = std.compress.zlib.decompressor(entry_crc32_reader.reader());
var entry_counting_reader = std.io.countingReader(entry_decompress_stream.reader());
var entry_hashed_writer = std.compress.hashedWriter(std.io.null_writer, Oid.Hasher.init(format));
var entry_hashed_writer = hashedWriter(std.io.null_writer, Oid.Hasher.init(format));
const entry_writer = entry_hashed_writer.writer();
// The object header is not included in the pack data but is
// part of the object's ID
@@ -1432,7 +1432,7 @@ fn indexPackHashDelta(
const base_data = try resolveDeltaChain(allocator, format, pack, base_object, delta_offsets.items, cache);
var entry_hasher: Oid.Hasher = .init(format);
var entry_hashed_writer = std.compress.hashedWriter(std.io.null_writer, &entry_hasher);
var entry_hashed_writer = hashedWriter(std.io.null_writer, &entry_hasher);
try entry_hashed_writer.writer().print("{s} {}\x00", .{ @tagName(base_object.type), base_data.len });
entry_hasher.update(base_data);
return entry_hasher.finalResult();
@@ -1703,3 +1703,58 @@ pub fn main() !void {
std.debug.print("Diagnostic: {}\n", .{err});
}
}
/// Deprecated
fn hashedReader(reader: anytype, hasher: anytype) HashedReader(@TypeOf(reader), @TypeOf(hasher)) {
return .{ .child_reader = reader, .hasher = hasher };
}
/// Deprecated
fn HashedReader(ReaderType: type, HasherType: type) type {
return struct {
child_reader: ReaderType,
hasher: HasherType,
pub const Error = ReaderType.Error;
pub const Reader = std.io.GenericReader(*@This(), Error, read);
pub fn read(self: *@This(), buf: []u8) Error!usize {
const amt = try self.child_reader.read(buf);
self.hasher.update(buf[0..amt]);
return amt;
}
pub fn reader(self: *@This()) Reader {
return .{ .context = self };
}
};
}
/// Deprecated
pub fn HashedWriter(WriterType: type, HasherType: type) type {
return struct {
child_writer: WriterType,
hasher: HasherType,
pub const Error = WriterType.Error;
pub const Writer = std.io.GenericWriter(*@This(), Error, write);
pub fn write(self: *@This(), buf: []const u8) Error!usize {
const amt = try self.child_writer.write(buf);
self.hasher.update(buf[0..amt]);
return amt;
}
pub fn writer(self: *@This()) Writer {
return .{ .context = self };
}
};
}
/// Deprecated
pub fn hashedWriter(
writer: anytype,
hasher: anytype,
) HashedWriter(@TypeOf(writer), @TypeOf(hasher)) {
return .{ .child_writer = writer, .hasher = hasher };
}
+1 -1
View File
@@ -250,7 +250,7 @@ pub fn create(arena: Allocator, options: CreateOptions) !*Package.Module {
};
const stack_check = b: {
if (!target_util.supportsStackProbing(target)) {
if (!target_util.supportsStackProbing(target, zig_backend)) {
if (options.inherited.stack_check == true)
return error.StackCheckUnsupportedByTarget;
break :b false;
+19 -12
View File
@@ -16522,7 +16522,7 @@ fn zirAsm(
break :empty try sema.structInitEmpty(block, clobbers_ty, src, src);
} else try sema.resolveInst(extra.data.clobbers); // Already coerced by AstGen.
const clobbers_val = try sema.resolveConstDefinedValue(block, src, clobbers, .{ .simple = .clobber });
needed_capacity += (asm_source.len + 3) / 4;
needed_capacity += asm_source.len / 4 + 1;
const gpa = sema.gpa;
try sema.air_extra.ensureUnusedCapacity(gpa, needed_capacity);
@@ -16562,7 +16562,8 @@ fn zirAsm(
{
const buffer = mem.sliceAsBytes(sema.air_extra.unusedCapacitySlice());
@memcpy(buffer[0..asm_source.len], asm_source);
sema.air_extra.items.len += (asm_source.len + 3) / 4;
buffer[asm_source.len] = 0;
sema.air_extra.items.len += asm_source.len / 4 + 1;
}
return asm_air;
}
@@ -22482,11 +22483,18 @@ fn ptrCastFull(
.slice => {},
.many, .c, .one => break :len null,
}
// `null` means the operand is a runtime-known slice (so the length is runtime-known).
const opt_src_len: ?u64 = switch (src_info.flags.size) {
.one => 1,
.slice => src_len: {
const operand_val = try sema.resolveValue(operand) orelse break :src_len null;
// A `null` length means the operand is a runtime-known slice (so the length is runtime-known).
// `src_elem_type` is different from `src_info.child` if the latter is an array, to ensure we ignore sentinels.
const src_elem_ty: Type, const opt_src_len: ?u64 = switch (src_info.flags.size) {
.one => src: {
const true_child: Type = .fromInterned(src_info.child);
break :src switch (true_child.zigTypeTag(zcu)) {
.array => .{ true_child.childType(zcu), true_child.arrayLen(zcu) },
else => .{ true_child, 1 },
};
},
.slice => src: {
const operand_val = try sema.resolveValue(operand) orelse break :src .{ .fromInterned(src_info.child), null };
if (operand_val.isUndef(zcu)) break :len .undef;
const slice_val = switch (operand_ty.zigTypeTag(zcu)) {
.optional => operand_val.optionalValue(zcu) orelse break :len .undef,
@@ -22495,14 +22503,13 @@ fn ptrCastFull(
};
const slice_len_resolved = try sema.resolveLazyValue(.fromInterned(zcu.intern_pool.sliceLen(slice_val.toIntern())));
if (slice_len_resolved.isUndef(zcu)) break :len .undef;
break :src_len slice_len_resolved.toUnsignedInt(zcu);
break :src .{ .fromInterned(src_info.child), slice_len_resolved.toUnsignedInt(zcu) };
},
.many, .c => {
return sema.fail(block, src, "cannot infer length of slice from {s}", .{pointerSizeString(src_info.flags.size)});
},
};
const dest_elem_ty: Type = .fromInterned(dest_info.child);
const src_elem_ty: Type = .fromInterned(src_info.child);
if (dest_elem_ty.toIntern() == src_elem_ty.toIntern()) {
break :len if (opt_src_len) |l| .{ .constant = l } else .equal_runtime_src_slice;
}
@@ -22518,7 +22525,7 @@ fn ptrCastFull(
const bytes = src_len * src_elem_size;
const dest_len = std.math.divExact(u64, bytes, dest_elem_size) catch switch (src_info.flags.size) {
.slice => return sema.fail(block, src, "slice length '{d}' does not divide exactly into destination elements", .{src_len}),
.one => return sema.fail(block, src, "type '{f}' does not divide exactly into destination elements", .{src_elem_ty.fmt(pt)}),
.one => return sema.fail(block, src, "type '{f}' does not divide exactly into destination elements", .{Type.fromInterned(src_info.child).fmt(pt)}),
else => unreachable,
};
break :len .{ .constant = dest_len };
@@ -24846,7 +24853,7 @@ fn zirFieldParentPtr(sema: *Sema, block: *Block, extended: Zir.Inst.Extended.Ins
},
.@"packed" => {
const byte_offset = std.math.divExact(u32, @abs(@as(i32, actual_parent_ptr_info.packed_offset.bit_offset) +
(if (zcu.typeToStruct(parent_ty)) |struct_obj| pt.structPackedFieldBitOffset(struct_obj, field_index) else 0) -
(if (zcu.typeToStruct(parent_ty)) |struct_obj| zcu.structPackedFieldBitOffset(struct_obj, field_index) else 0) -
actual_field_ptr_info.packed_offset.bit_offset), 8) catch
return sema.fail(block, inst_src, "pointer bit-offset mismatch", .{});
actual_parent_ptr_info.flags.alignment = actual_field_ptr_info.flags.alignment.minStrict(if (byte_offset > 0)
@@ -24873,7 +24880,7 @@ fn zirFieldParentPtr(sema: *Sema, block: *Block, extended: Zir.Inst.Extended.Ins
// Logic lifted from type computation above - I'm just assuming it's correct.
// `catch unreachable` since error case handled above.
const byte_offset = std.math.divExact(u32, @abs(@as(i32, actual_parent_ptr_info.packed_offset.bit_offset) +
pt.structPackedFieldBitOffset(zcu.typeToStruct(parent_ty).?, field_index) -
zcu.structPackedFieldBitOffset(zcu.typeToStruct(parent_ty).?, field_index) -
actual_field_ptr_info.packed_offset.bit_offset), 8) catch unreachable;
const parent_ptr_val = try sema.ptrSubtract(block, field_ptr_src, field_ptr_val, byte_offset, actual_parent_ptr_ty);
break :result Air.internedToRef(parent_ptr_val.toIntern());
+1 -1
View File
@@ -4166,7 +4166,7 @@ pub const generic_poison: Type = .{ .ip_index = .generic_poison_type };
pub fn smallestUnsignedBits(max: u64) u16 {
return switch (max) {
0 => 0,
else => 1 + std.math.log2_int(u64, max),
else => @as(u16, 1) + std.math.log2_int(u64, max),
};
}
+24 -5
View File
@@ -3891,6 +3891,29 @@ pub fn typeToPackedStruct(zcu: *const Zcu, ty: Type) ?InternPool.LoadedStructTyp
return s;
}
/// https://github.com/ziglang/zig/issues/17178 explored storing these bit offsets
/// into the packed struct InternPool data rather than computing this on the
/// fly, however it was found to perform worse when measured on real world
/// projects.
pub fn structPackedFieldBitOffset(
zcu: *Zcu,
struct_type: InternPool.LoadedStructType,
field_index: u32,
) u16 {
const ip = &zcu.intern_pool;
assert(struct_type.layout == .@"packed");
assert(struct_type.haveLayout(ip));
var bit_sum: u64 = 0;
for (0..struct_type.field_types.len) |i| {
if (i == field_index) {
return @intCast(bit_sum);
}
const field_ty = Type.fromInterned(struct_type.field_types.get(ip)[i]);
bit_sum += field_ty.bitSize(zcu);
}
unreachable; // index out of bounds
}
pub fn typeToUnion(zcu: *const Zcu, ty: Type) ?InternPool.LoadedUnionType {
if (ty.ip_index == .none) return null;
const ip = &zcu.intern_pool;
@@ -4436,11 +4459,7 @@ pub fn callconvSupported(zcu: *Zcu, cc: std.builtin.CallingConvention) union(enu
else => false,
},
.stage2_aarch64 => switch (cc) {
.aarch64_aapcs,
.aarch64_aapcs_darwin,
.aarch64_aapcs_win,
=> |opts| opts.incoming_stack_alignment == null,
.naked => true,
.aarch64_aapcs, .aarch64_aapcs_darwin, .naked => true,
else => false,
},
.stage2_x86 => switch (cc) {
+7 -28
View File
@@ -3737,30 +3737,6 @@ pub fn intBitsForValue(pt: Zcu.PerThread, val: Value, sign: bool) u16 {
}
}
/// https://github.com/ziglang/zig/issues/17178 explored storing these bit offsets
/// into the packed struct InternPool data rather than computing this on the
/// fly, however it was found to perform worse when measured on real world
/// projects.
pub fn structPackedFieldBitOffset(
pt: Zcu.PerThread,
struct_type: InternPool.LoadedStructType,
field_index: u32,
) u16 {
const zcu = pt.zcu;
const ip = &zcu.intern_pool;
assert(struct_type.layout == .@"packed");
assert(struct_type.haveLayout(ip));
var bit_sum: u64 = 0;
for (0..struct_type.field_types.len) |i| {
if (i == field_index) {
return @intCast(bit_sum);
}
const field_ty = Type.fromInterned(struct_type.field_types.get(ip)[i]);
bit_sum += field_ty.bitSize(zcu);
}
unreachable; // index out of bounds
}
pub fn navPtrType(pt: Zcu.PerThread, nav_id: InternPool.Nav.Index) Allocator.Error!Type {
const zcu = pt.zcu;
const ip = &zcu.intern_pool;
@@ -4381,8 +4357,11 @@ fn runCodegenInner(pt: Zcu.PerThread, func_index: InternPool.Index, air: *Air) e
try air.legalize(pt, features);
}
var liveness: Air.Liveness = try .analyze(zcu, air.*, ip);
defer liveness.deinit(gpa);
var liveness: ?Air.Liveness = if (codegen.wantsLiveness(pt, nav))
try .analyze(zcu, air.*, ip)
else
null;
defer if (liveness) |*l| l.deinit(gpa);
if (build_options.enable_debug_extensions and comp.verbose_air) {
const stderr = std.debug.lockStderrWriter(&.{});
@@ -4392,12 +4371,12 @@ fn runCodegenInner(pt: Zcu.PerThread, func_index: InternPool.Index, air: *Air) e
stderr.print("# End Function AIR: {f}\n\n", .{fqn.fmt(ip)}) catch {};
}
if (std.debug.runtime_safety) {
if (std.debug.runtime_safety) verify_liveness: {
var verify: Air.Liveness.Verify = .{
.gpa = gpa,
.zcu = zcu,
.air = air.*,
.liveness = liveness,
.liveness = liveness orelse break :verify_liveness,
.intern_pool = ip,
};
defer verify.deinit();
-2063
View File
@@ -1,2063 +0,0 @@
const std = @import("std");
const builtin = @import("builtin");
const assert = std.debug.assert;
const testing = std.testing;
/// Disjoint sets of registers. Every register must belong to
/// exactly one register class.
pub const RegisterClass = enum {
general_purpose,
stack_pointer,
floating_point,
};
/// Registers in the AArch64 instruction set
pub const Register = enum(u8) {
// zig fmt: off
// 64-bit general-purpose registers
x0, x1, x2, x3, x4, x5, x6, x7,
x8, x9, x10, x11, x12, x13, x14, x15,
x16, x17, x18, x19, x20, x21, x22, x23,
x24, x25, x26, x27, x28, x29, x30, xzr,
// 32-bit general-purpose registers
w0, w1, w2, w3, w4, w5, w6, w7,
w8, w9, w10, w11, w12, w13, w14, w15,
w16, w17, w18, w19, w20, w21, w22, w23,
w24, w25, w26, w27, w28, w29, w30, wzr,
// Stack pointer
sp, wsp,
// 128-bit floating-point registers
q0, q1, q2, q3, q4, q5, q6, q7,
q8, q9, q10, q11, q12, q13, q14, q15,
q16, q17, q18, q19, q20, q21, q22, q23,
q24, q25, q26, q27, q28, q29, q30, q31,
// 64-bit floating-point registers
d0, d1, d2, d3, d4, d5, d6, d7,
d8, d9, d10, d11, d12, d13, d14, d15,
d16, d17, d18, d19, d20, d21, d22, d23,
d24, d25, d26, d27, d28, d29, d30, d31,
// 32-bit floating-point registers
s0, s1, s2, s3, s4, s5, s6, s7,
s8, s9, s10, s11, s12, s13, s14, s15,
s16, s17, s18, s19, s20, s21, s22, s23,
s24, s25, s26, s27, s28, s29, s30, s31,
// 16-bit floating-point registers
h0, h1, h2, h3, h4, h5, h6, h7,
h8, h9, h10, h11, h12, h13, h14, h15,
h16, h17, h18, h19, h20, h21, h22, h23,
h24, h25, h26, h27, h28, h29, h30, h31,
// 8-bit floating-point registers
b0, b1, b2, b3, b4, b5, b6, b7,
b8, b9, b10, b11, b12, b13, b14, b15,
b16, b17, b18, b19, b20, b21, b22, b23,
b24, b25, b26, b27, b28, b29, b30, b31,
// zig fmt: on
pub fn class(self: Register) RegisterClass {
return switch (@intFromEnum(self)) {
@intFromEnum(Register.x0)...@intFromEnum(Register.xzr) => .general_purpose,
@intFromEnum(Register.w0)...@intFromEnum(Register.wzr) => .general_purpose,
@intFromEnum(Register.sp) => .stack_pointer,
@intFromEnum(Register.wsp) => .stack_pointer,
@intFromEnum(Register.q0)...@intFromEnum(Register.q31) => .floating_point,
@intFromEnum(Register.d0)...@intFromEnum(Register.d31) => .floating_point,
@intFromEnum(Register.s0)...@intFromEnum(Register.s31) => .floating_point,
@intFromEnum(Register.h0)...@intFromEnum(Register.h31) => .floating_point,
@intFromEnum(Register.b0)...@intFromEnum(Register.b31) => .floating_point,
else => unreachable,
};
}
pub fn id(self: Register) u6 {
return switch (@intFromEnum(self)) {
@intFromEnum(Register.x0)...@intFromEnum(Register.xzr) => @as(u6, @intCast(@intFromEnum(self) - @intFromEnum(Register.x0))),
@intFromEnum(Register.w0)...@intFromEnum(Register.wzr) => @as(u6, @intCast(@intFromEnum(self) - @intFromEnum(Register.w0))),
@intFromEnum(Register.sp) => 32,
@intFromEnum(Register.wsp) => 32,
@intFromEnum(Register.q0)...@intFromEnum(Register.q31) => @as(u6, @intCast(@intFromEnum(self) - @intFromEnum(Register.q0) + 33)),
@intFromEnum(Register.d0)...@intFromEnum(Register.d31) => @as(u6, @intCast(@intFromEnum(self) - @intFromEnum(Register.d0) + 33)),
@intFromEnum(Register.s0)...@intFromEnum(Register.s31) => @as(u6, @intCast(@intFromEnum(self) - @intFromEnum(Register.s0) + 33)),
@intFromEnum(Register.h0)...@intFromEnum(Register.h31) => @as(u6, @intCast(@intFromEnum(self) - @intFromEnum(Register.h0) + 33)),
@intFromEnum(Register.b0)...@intFromEnum(Register.b31) => @as(u6, @intCast(@intFromEnum(self) - @intFromEnum(Register.b0) + 33)),
else => unreachable,
};
}
pub fn enc(self: Register) u5 {
return switch (@intFromEnum(self)) {
@intFromEnum(Register.x0)...@intFromEnum(Register.xzr) => @as(u5, @intCast(@intFromEnum(self) - @intFromEnum(Register.x0))),
@intFromEnum(Register.w0)...@intFromEnum(Register.wzr) => @as(u5, @intCast(@intFromEnum(self) - @intFromEnum(Register.w0))),
@intFromEnum(Register.sp) => 31,
@intFromEnum(Register.wsp) => 31,
@intFromEnum(Register.q0)...@intFromEnum(Register.q31) => @as(u5, @intCast(@intFromEnum(self) - @intFromEnum(Register.q0))),
@intFromEnum(Register.d0)...@intFromEnum(Register.d31) => @as(u5, @intCast(@intFromEnum(self) - @intFromEnum(Register.d0))),
@intFromEnum(Register.s0)...@intFromEnum(Register.s31) => @as(u5, @intCast(@intFromEnum(self) - @intFromEnum(Register.s0))),
@intFromEnum(Register.h0)...@intFromEnum(Register.h31) => @as(u5, @intCast(@intFromEnum(self) - @intFromEnum(Register.h0))),
@intFromEnum(Register.b0)...@intFromEnum(Register.b31) => @as(u5, @intCast(@intFromEnum(self) - @intFromEnum(Register.b0))),
else => unreachable,
};
}
/// Returns the bit-width of the register.
pub fn size(self: Register) u8 {
return switch (@intFromEnum(self)) {
@intFromEnum(Register.x0)...@intFromEnum(Register.xzr) => 64,
@intFromEnum(Register.w0)...@intFromEnum(Register.wzr) => 32,
@intFromEnum(Register.sp) => 64,
@intFromEnum(Register.wsp) => 32,
@intFromEnum(Register.q0)...@intFromEnum(Register.q31) => 128,
@intFromEnum(Register.d0)...@intFromEnum(Register.d31) => 64,
@intFromEnum(Register.s0)...@intFromEnum(Register.s31) => 32,
@intFromEnum(Register.h0)...@intFromEnum(Register.h31) => 16,
@intFromEnum(Register.b0)...@intFromEnum(Register.b31) => 8,
else => unreachable,
};
}
/// Convert from a general-purpose register to its 64 bit alias.
pub fn toX(self: Register) Register {
return switch (@intFromEnum(self)) {
@intFromEnum(Register.x0)...@intFromEnum(Register.xzr) => @as(
Register,
@enumFromInt(@intFromEnum(self) - @intFromEnum(Register.x0) + @intFromEnum(Register.x0)),
),
@intFromEnum(Register.w0)...@intFromEnum(Register.wzr) => @as(
Register,
@enumFromInt(@intFromEnum(self) - @intFromEnum(Register.w0) + @intFromEnum(Register.x0)),
),
else => unreachable,
};
}
/// Convert from a general-purpose register to its 32 bit alias.
pub fn toW(self: Register) Register {
return switch (@intFromEnum(self)) {
@intFromEnum(Register.x0)...@intFromEnum(Register.xzr) => @as(
Register,
@enumFromInt(@intFromEnum(self) - @intFromEnum(Register.x0) + @intFromEnum(Register.w0)),
),
@intFromEnum(Register.w0)...@intFromEnum(Register.wzr) => @as(
Register,
@enumFromInt(@intFromEnum(self) - @intFromEnum(Register.w0) + @intFromEnum(Register.w0)),
),
else => unreachable,
};
}
/// Convert from a floating-point register to its 128 bit alias.
pub fn toQ(self: Register) Register {
return switch (@intFromEnum(self)) {
@intFromEnum(Register.q0)...@intFromEnum(Register.q31) => @as(
Register,
@enumFromInt(@intFromEnum(self) - @intFromEnum(Register.q0) + @intFromEnum(Register.q0)),
),
@intFromEnum(Register.d0)...@intFromEnum(Register.d31) => @as(
Register,
@enumFromInt(@intFromEnum(self) - @intFromEnum(Register.d0) + @intFromEnum(Register.q0)),
),
@intFromEnum(Register.s0)...@intFromEnum(Register.s31) => @as(
Register,
@enumFromInt(@intFromEnum(self) - @intFromEnum(Register.s0) + @intFromEnum(Register.q0)),
),
@intFromEnum(Register.h0)...@intFromEnum(Register.h31) => @as(
Register,
@enumFromInt(@intFromEnum(self) - @intFromEnum(Register.h0) + @intFromEnum(Register.q0)),
),
@intFromEnum(Register.b0)...@intFromEnum(Register.b31) => @as(
Register,
@enumFromInt(@intFromEnum(self) - @intFromEnum(Register.b0) + @intFromEnum(Register.q0)),
),
else => unreachable,
};
}
/// Convert from a floating-point register to its 64 bit alias.
pub fn toD(self: Register) Register {
return switch (@intFromEnum(self)) {
@intFromEnum(Register.q0)...@intFromEnum(Register.q31) => @as(
Register,
@enumFromInt(@intFromEnum(self) - @intFromEnum(Register.q0) + @intFromEnum(Register.d0)),
),
@intFromEnum(Register.d0)...@intFromEnum(Register.d31) => @as(
Register,
@enumFromInt(@intFromEnum(self) - @intFromEnum(Register.d0) + @intFromEnum(Register.d0)),
),
@intFromEnum(Register.s0)...@intFromEnum(Register.s31) => @as(
Register,
@enumFromInt(@intFromEnum(self) - @intFromEnum(Register.s0) + @intFromEnum(Register.d0)),
),
@intFromEnum(Register.h0)...@intFromEnum(Register.h31) => @as(
Register,
@enumFromInt(@intFromEnum(self) - @intFromEnum(Register.h0) + @intFromEnum(Register.d0)),
),
@intFromEnum(Register.b0)...@intFromEnum(Register.b31) => @as(
Register,
@enumFromInt(@intFromEnum(self) - @intFromEnum(Register.b0) + @intFromEnum(Register.d0)),
),
else => unreachable,
};
}
/// Convert from a floating-point register to its 32 bit alias.
pub fn toS(self: Register) Register {
return switch (@intFromEnum(self)) {
@intFromEnum(Register.q0)...@intFromEnum(Register.q31) => @as(
Register,
@enumFromInt(@intFromEnum(self) - @intFromEnum(Register.q0) + @intFromEnum(Register.s0)),
),
@intFromEnum(Register.d0)...@intFromEnum(Register.d31) => @as(
Register,
@enumFromInt(@intFromEnum(self) - @intFromEnum(Register.d0) + @intFromEnum(Register.s0)),
),
@intFromEnum(Register.s0)...@intFromEnum(Register.s31) => @as(
Register,
@enumFromInt(@intFromEnum(self) - @intFromEnum(Register.s0) + @intFromEnum(Register.s0)),
),
@intFromEnum(Register.h0)...@intFromEnum(Register.h31) => @as(
Register,
@enumFromInt(@intFromEnum(self) - @intFromEnum(Register.h0) + @intFromEnum(Register.s0)),
),
@intFromEnum(Register.b0)...@intFromEnum(Register.b31) => @as(
Register,
@enumFromInt(@intFromEnum(self) - @intFromEnum(Register.b0) + @intFromEnum(Register.s0)),
),
else => unreachable,
};
}
/// Convert from a floating-point register to its 16 bit alias.
pub fn toH(self: Register) Register {
return switch (@intFromEnum(self)) {
@intFromEnum(Register.q0)...@intFromEnum(Register.q31) => @as(
Register,
@enumFromInt(@intFromEnum(self) - @intFromEnum(Register.q0) + @intFromEnum(Register.h0)),
),
@intFromEnum(Register.d0)...@intFromEnum(Register.d31) => @as(
Register,
@enumFromInt(@intFromEnum(self) - @intFromEnum(Register.d0) + @intFromEnum(Register.h0)),
),
@intFromEnum(Register.s0)...@intFromEnum(Register.s31) => @as(
Register,
@enumFromInt(@intFromEnum(self) - @intFromEnum(Register.s0) + @intFromEnum(Register.h0)),
),
@intFromEnum(Register.h0)...@intFromEnum(Register.h31) => @as(
Register,
@enumFromInt(@intFromEnum(self) - @intFromEnum(Register.h0) + @intFromEnum(Register.h0)),
),
@intFromEnum(Register.b0)...@intFromEnum(Register.b31) => @as(
Register,
@enumFromInt(@intFromEnum(self) - @intFromEnum(Register.b0) + @intFromEnum(Register.h0)),
),
else => unreachable,
};
}
/// Convert from a floating-point register to its 8 bit alias.
pub fn toB(self: Register) Register {
return switch (@intFromEnum(self)) {
@intFromEnum(Register.q0)...@intFromEnum(Register.q31) => @as(
Register,
@enumFromInt(@intFromEnum(self) - @intFromEnum(Register.q0) + @intFromEnum(Register.b0)),
),
@intFromEnum(Register.d0)...@intFromEnum(Register.d31) => @as(
Register,
@enumFromInt(@intFromEnum(self) - @intFromEnum(Register.d0) + @intFromEnum(Register.b0)),
),
@intFromEnum(Register.s0)...@intFromEnum(Register.s31) => @as(
Register,
@enumFromInt(@intFromEnum(self) - @intFromEnum(Register.s0) + @intFromEnum(Register.b0)),
),
@intFromEnum(Register.h0)...@intFromEnum(Register.h31) => @as(
Register,
@enumFromInt(@intFromEnum(self) - @intFromEnum(Register.h0) + @intFromEnum(Register.b0)),
),
@intFromEnum(Register.b0)...@intFromEnum(Register.b31) => @as(
Register,
@enumFromInt(@intFromEnum(self) - @intFromEnum(Register.b0) + @intFromEnum(Register.b0)),
),
else => unreachable,
};
}
pub fn dwarfNum(self: Register) u5 {
return self.enc();
}
};
test "Register.enc" {
try testing.expectEqual(@as(u5, 0), Register.x0.enc());
try testing.expectEqual(@as(u5, 0), Register.w0.enc());
try testing.expectEqual(@as(u5, 31), Register.xzr.enc());
try testing.expectEqual(@as(u5, 31), Register.wzr.enc());
try testing.expectEqual(@as(u5, 31), Register.sp.enc());
try testing.expectEqual(@as(u5, 31), Register.sp.enc());
}
test "Register.size" {
try testing.expectEqual(@as(u8, 64), Register.x19.size());
try testing.expectEqual(@as(u8, 32), Register.w3.size());
}
test "Register.toX/toW" {
try testing.expectEqual(Register.x0, Register.w0.toX());
try testing.expectEqual(Register.x0, Register.x0.toX());
try testing.expectEqual(Register.w3, Register.w3.toW());
try testing.expectEqual(Register.w3, Register.x3.toW());
}
/// Represents an instruction in the AArch64 instruction set
pub const Instruction = union(enum) {
move_wide_immediate: packed struct {
rd: u5,
imm16: u16,
hw: u2,
fixed: u6 = 0b100101,
opc: u2,
sf: u1,
},
pc_relative_address: packed struct {
rd: u5,
immhi: u19,
fixed: u5 = 0b10000,
immlo: u2,
op: u1,
},
load_store_register: packed struct {
rt: u5,
rn: u5,
offset: u12,
opc: u2,
op1: u2,
v: u1,
fixed: u3 = 0b111,
size: u2,
},
load_store_register_pair: packed struct {
rt1: u5,
rn: u5,
rt2: u5,
imm7: u7,
load: u1,
encoding: u2,
fixed: u5 = 0b101_0_0,
opc: u2,
},
load_literal: packed struct {
rt: u5,
imm19: u19,
fixed: u6 = 0b011_0_00,
opc: u2,
},
exception_generation: packed struct {
ll: u2,
op2: u3,
imm16: u16,
opc: u3,
fixed: u8 = 0b1101_0100,
},
unconditional_branch_register: packed struct {
op4: u5,
rn: u5,
op3: u6,
op2: u5,
opc: u4,
fixed: u7 = 0b1101_011,
},
unconditional_branch_immediate: packed struct {
imm26: u26,
fixed: u5 = 0b00101,
op: u1,
},
no_operation: packed struct {
fixed: u32 = 0b1101010100_0_00_011_0010_0000_000_11111,
},
logical_shifted_register: packed struct {
rd: u5,
rn: u5,
imm6: u6,
rm: u5,
n: u1,
shift: u2,
fixed: u5 = 0b01010,
opc: u2,
sf: u1,
},
add_subtract_immediate: packed struct {
rd: u5,
rn: u5,
imm12: u12,
sh: u1,
fixed: u6 = 0b100010,
s: u1,
op: u1,
sf: u1,
},
logical_immediate: packed struct {
rd: u5,
rn: u5,
imms: u6,
immr: u6,
n: u1,
fixed: u6 = 0b100100,
opc: u2,
sf: u1,
},
bitfield: packed struct {
rd: u5,
rn: u5,
imms: u6,
immr: u6,
n: u1,
fixed: u6 = 0b100110,
opc: u2,
sf: u1,
},
add_subtract_shifted_register: packed struct {
rd: u5,
rn: u5,
imm6: u6,
rm: u5,
fixed_1: u1 = 0b0,
shift: u2,
fixed_2: u5 = 0b01011,
s: u1,
op: u1,
sf: u1,
},
add_subtract_extended_register: packed struct {
rd: u5,
rn: u5,
imm3: u3,
option: u3,
rm: u5,
fixed: u8 = 0b01011_00_1,
s: u1,
op: u1,
sf: u1,
},
conditional_branch: struct {
cond: u4,
o0: u1,
imm19: u19,
o1: u1,
fixed: u7 = 0b0101010,
},
compare_and_branch: struct {
rt: u5,
imm19: u19,
op: u1,
fixed: u6 = 0b011010,
sf: u1,
},
conditional_select: struct {
rd: u5,
rn: u5,
op2: u2,
cond: u4,
rm: u5,
fixed: u8 = 0b11010100,
s: u1,
op: u1,
sf: u1,
},
data_processing_3_source: packed struct {
rd: u5,
rn: u5,
ra: u5,
o0: u1,
rm: u5,
op31: u3,
fixed: u5 = 0b11011,
op54: u2,
sf: u1,
},
data_processing_2_source: packed struct {
rd: u5,
rn: u5,
opcode: u6,
rm: u5,
fixed_1: u8 = 0b11010110,
s: u1,
fixed_2: u1 = 0b0,
sf: u1,
},
pub const Condition = enum(u4) {
/// Integer: Equal
/// Floating point: Equal
eq,
/// Integer: Not equal
/// Floating point: Not equal or unordered
ne,
/// Integer: Carry set
/// Floating point: Greater than, equal, or unordered
cs,
/// Integer: Carry clear
/// Floating point: Less than
cc,
/// Integer: Minus, negative
/// Floating point: Less than
mi,
/// Integer: Plus, positive or zero
/// Floating point: Greater than, equal, or unordered
pl,
/// Integer: Overflow
/// Floating point: Unordered
vs,
/// Integer: No overflow
/// Floating point: Ordered
vc,
/// Integer: Unsigned higher
/// Floating point: Greater than, or unordered
hi,
/// Integer: Unsigned lower or same
/// Floating point: Less than or equal
ls,
/// Integer: Signed greater than or equal
/// Floating point: Greater than or equal
ge,
/// Integer: Signed less than
/// Floating point: Less than, or unordered
lt,
/// Integer: Signed greater than
/// Floating point: Greater than
gt,
/// Integer: Signed less than or equal
/// Floating point: Less than, equal, or unordered
le,
/// Integer: Always
/// Floating point: Always
al,
/// Integer: Always
/// Floating point: Always
nv,
/// Converts a std.math.CompareOperator into a condition flag,
/// i.e. returns the condition that is true iff the result of the
/// comparison is true. Assumes signed comparison
pub fn fromCompareOperatorSigned(op: std.math.CompareOperator) Condition {
return switch (op) {
.gte => .ge,
.gt => .gt,
.neq => .ne,
.lt => .lt,
.lte => .le,
.eq => .eq,
};
}
/// Converts a std.math.CompareOperator into a condition flag,
/// i.e. returns the condition that is true iff the result of the
/// comparison is true. Assumes unsigned comparison
pub fn fromCompareOperatorUnsigned(op: std.math.CompareOperator) Condition {
return switch (op) {
.gte => .cs,
.gt => .hi,
.neq => .ne,
.lt => .cc,
.lte => .ls,
.eq => .eq,
};
}
/// Returns the condition which is true iff the given condition is
/// false (if such a condition exists)
pub fn negate(cond: Condition) Condition {
return switch (cond) {
.eq => .ne,
.ne => .eq,
.cs => .cc,
.cc => .cs,
.mi => .pl,
.pl => .mi,
.vs => .vc,
.vc => .vs,
.hi => .ls,
.ls => .hi,
.ge => .lt,
.lt => .ge,
.gt => .le,
.le => .gt,
.al => unreachable,
.nv => unreachable,
};
}
};
pub fn toU32(self: Instruction) u32 {
return switch (self) {
.move_wide_immediate => |v| @as(u32, @bitCast(v)),
.pc_relative_address => |v| @as(u32, @bitCast(v)),
.load_store_register => |v| @as(u32, @bitCast(v)),
.load_store_register_pair => |v| @as(u32, @bitCast(v)),
.load_literal => |v| @as(u32, @bitCast(v)),
.exception_generation => |v| @as(u32, @bitCast(v)),
.unconditional_branch_register => |v| @as(u32, @bitCast(v)),
.unconditional_branch_immediate => |v| @as(u32, @bitCast(v)),
.no_operation => |v| @as(u32, @bitCast(v)),
.logical_shifted_register => |v| @as(u32, @bitCast(v)),
.add_subtract_immediate => |v| @as(u32, @bitCast(v)),
.logical_immediate => |v| @as(u32, @bitCast(v)),
.bitfield => |v| @as(u32, @bitCast(v)),
.add_subtract_shifted_register => |v| @as(u32, @bitCast(v)),
.add_subtract_extended_register => |v| @as(u32, @bitCast(v)),
// TODO once packed structs work, this can be refactored
.conditional_branch => |v| @as(u32, v.cond) | (@as(u32, v.o0) << 4) | (@as(u32, v.imm19) << 5) | (@as(u32, v.o1) << 24) | (@as(u32, v.fixed) << 25),
.compare_and_branch => |v| @as(u32, v.rt) | (@as(u32, v.imm19) << 5) | (@as(u32, v.op) << 24) | (@as(u32, v.fixed) << 25) | (@as(u32, v.sf) << 31),
.conditional_select => |v| @as(u32, v.rd) | @as(u32, v.rn) << 5 | @as(u32, v.op2) << 10 | @as(u32, v.cond) << 12 | @as(u32, v.rm) << 16 | @as(u32, v.fixed) << 21 | @as(u32, v.s) << 29 | @as(u32, v.op) << 30 | @as(u32, v.sf) << 31,
.data_processing_3_source => |v| @as(u32, @bitCast(v)),
.data_processing_2_source => |v| @as(u32, @bitCast(v)),
};
}
fn moveWideImmediate(
opc: u2,
rd: Register,
imm16: u16,
shift: u6,
) Instruction {
assert(shift % 16 == 0);
assert(!(rd.size() == 32 and shift > 16));
assert(!(rd.size() == 64 and shift > 48));
return Instruction{
.move_wide_immediate = .{
.rd = rd.enc(),
.imm16 = imm16,
.hw = @as(u2, @intCast(shift / 16)),
.opc = opc,
.sf = switch (rd.size()) {
32 => 0,
64 => 1,
else => unreachable, // unexpected register size
},
},
};
}
fn pcRelativeAddress(rd: Register, imm21: i21, op: u1) Instruction {
assert(rd.size() == 64);
const imm21_u = @as(u21, @bitCast(imm21));
return Instruction{
.pc_relative_address = .{
.rd = rd.enc(),
.immlo = @as(u2, @truncate(imm21_u)),
.immhi = @as(u19, @truncate(imm21_u >> 2)),
.op = op,
},
};
}
pub const LoadStoreOffsetImmediate = union(enum) {
post_index: i9,
pre_index: i9,
unsigned: u12,
};
pub const LoadStoreOffsetRegister = struct {
rm: u5,
shift: union(enum) {
uxtw: u2,
lsl: u2,
sxtw: u2,
sxtx: u2,
},
};
/// Represents the offset operand of a load or store instruction.
/// Data can be loaded from memory with either an immediate offset
/// or an offset that is stored in some register.
pub const LoadStoreOffset = union(enum) {
immediate: LoadStoreOffsetImmediate,
register: LoadStoreOffsetRegister,
pub const none = LoadStoreOffset{
.immediate = .{ .unsigned = 0 },
};
pub fn toU12(self: LoadStoreOffset) u12 {
return switch (self) {
.immediate => |imm_type| switch (imm_type) {
.post_index => |v| (@as(u12, @intCast(@as(u9, @bitCast(v)))) << 2) + 1,
.pre_index => |v| (@as(u12, @intCast(@as(u9, @bitCast(v)))) << 2) + 3,
.unsigned => |v| v,
},
.register => |r| switch (r.shift) {
.uxtw => |v| (@as(u12, @intCast(r.rm)) << 6) + (@as(u12, @intCast(v)) << 2) + 16 + 2050,
.lsl => |v| (@as(u12, @intCast(r.rm)) << 6) + (@as(u12, @intCast(v)) << 2) + 24 + 2050,
.sxtw => |v| (@as(u12, @intCast(r.rm)) << 6) + (@as(u12, @intCast(v)) << 2) + 48 + 2050,
.sxtx => |v| (@as(u12, @intCast(r.rm)) << 6) + (@as(u12, @intCast(v)) << 2) + 56 + 2050,
},
};
}
pub fn imm(offset: u12) LoadStoreOffset {
return .{
.immediate = .{ .unsigned = offset },
};
}
pub fn imm_post_index(offset: i9) LoadStoreOffset {
return .{
.immediate = .{ .post_index = offset },
};
}
pub fn imm_pre_index(offset: i9) LoadStoreOffset {
return .{
.immediate = .{ .pre_index = offset },
};
}
pub fn reg(rm: Register) LoadStoreOffset {
return .{
.register = .{
.rm = rm.enc(),
.shift = .{
.lsl = 0,
},
},
};
}
pub fn reg_uxtw(rm: Register, shift: u2) LoadStoreOffset {
assert(rm.size() == 32 and (shift == 0 or shift == 2));
return .{
.register = .{
.rm = rm.enc(),
.shift = .{
.uxtw = shift,
},
},
};
}
pub fn reg_lsl(rm: Register, shift: u2) LoadStoreOffset {
assert(rm.size() == 64 and (shift == 0 or shift == 3));
return .{
.register = .{
.rm = rm.enc(),
.shift = .{
.lsl = shift,
},
},
};
}
pub fn reg_sxtw(rm: Register, shift: u2) LoadStoreOffset {
assert(rm.size() == 32 and (shift == 0 or shift == 2));
return .{
.register = .{
.rm = rm.enc(),
.shift = .{
.sxtw = shift,
},
},
};
}
pub fn reg_sxtx(rm: Register, shift: u2) LoadStoreOffset {
assert(rm.size() == 64 and (shift == 0 or shift == 3));
return .{
.register = .{
.rm = rm.enc(),
.shift = .{
.sxtx = shift,
},
},
};
}
};
/// Which kind of load/store to perform
const LoadStoreVariant = enum {
/// 32 bits or 64 bits
str,
/// 8 bits, zero-extended
strb,
/// 16 bits, zero-extended
strh,
/// 32 bits or 64 bits
ldr,
/// 8 bits, zero-extended
ldrb,
/// 16 bits, zero-extended
ldrh,
/// 8 bits, sign extended
ldrsb,
/// 16 bits, sign extended
ldrsh,
/// 32 bits, sign extended
ldrsw,
};
fn loadStoreRegister(
rt: Register,
rn: Register,
offset: LoadStoreOffset,
variant: LoadStoreVariant,
) Instruction {
assert(rn.size() == 64);
assert(rn.id() != Register.xzr.id());
const off = offset.toU12();
const op1: u2 = blk: {
switch (offset) {
.immediate => |imm| switch (imm) {
.unsigned => break :blk 0b01,
else => {},
},
else => {},
}
break :blk 0b00;
};
const opc: u2 = blk: {
switch (variant) {
.ldr, .ldrh, .ldrb => break :blk 0b01,
.str, .strh, .strb => break :blk 0b00,
.ldrsb,
.ldrsh,
=> switch (rt.size()) {
32 => break :blk 0b11,
64 => break :blk 0b10,
else => unreachable, // unexpected register size
},
.ldrsw => break :blk 0b10,
}
};
const size: u2 = blk: {
switch (variant) {
.ldr, .str => switch (rt.size()) {
32 => break :blk 0b10,
64 => break :blk 0b11,
else => unreachable, // unexpected register size
},
.ldrsw => break :blk 0b10,
.ldrh, .ldrsh, .strh => break :blk 0b01,
.ldrb, .ldrsb, .strb => break :blk 0b00,
}
};
return Instruction{
.load_store_register = .{
.rt = rt.enc(),
.rn = rn.enc(),
.offset = off,
.opc = opc,
.op1 = op1,
.v = 0,
.size = size,
},
};
}
fn loadStoreRegisterPair(
rt1: Register,
rt2: Register,
rn: Register,
offset: i9,
encoding: u2,
load: bool,
) Instruction {
assert(rn.size() == 64);
assert(rn.id() != Register.xzr.id());
switch (rt1.size()) {
32 => {
assert(-256 <= offset and offset <= 252);
const imm7 = @as(u7, @truncate(@as(u9, @bitCast(offset >> 2))));
return Instruction{
.load_store_register_pair = .{
.rt1 = rt1.enc(),
.rn = rn.enc(),
.rt2 = rt2.enc(),
.imm7 = imm7,
.load = @intFromBool(load),
.encoding = encoding,
.opc = 0b00,
},
};
},
64 => {
assert(-512 <= offset and offset <= 504);
const imm7 = @as(u7, @truncate(@as(u9, @bitCast(offset >> 3))));
return Instruction{
.load_store_register_pair = .{
.rt1 = rt1.enc(),
.rn = rn.enc(),
.rt2 = rt2.enc(),
.imm7 = imm7,
.load = @intFromBool(load),
.encoding = encoding,
.opc = 0b10,
},
};
},
else => unreachable, // unexpected register size
}
}
fn loadLiteral(rt: Register, imm19: u19) Instruction {
return Instruction{
.load_literal = .{
.rt = rt.enc(),
.imm19 = imm19,
.opc = switch (rt.size()) {
32 => 0b00,
64 => 0b01,
else => unreachable, // unexpected register size
},
},
};
}
fn exceptionGeneration(
opc: u3,
op2: u3,
ll: u2,
imm16: u16,
) Instruction {
return Instruction{
.exception_generation = .{
.ll = ll,
.op2 = op2,
.imm16 = imm16,
.opc = opc,
},
};
}
fn unconditionalBranchRegister(
opc: u4,
op2: u5,
op3: u6,
rn: Register,
op4: u5,
) Instruction {
assert(rn.size() == 64);
return Instruction{
.unconditional_branch_register = .{
.op4 = op4,
.rn = rn.enc(),
.op3 = op3,
.op2 = op2,
.opc = opc,
},
};
}
fn unconditionalBranchImmediate(
op: u1,
offset: i28,
) Instruction {
return Instruction{
.unconditional_branch_immediate = .{
.imm26 = @as(u26, @bitCast(@as(i26, @intCast(offset >> 2)))),
.op = op,
},
};
}
pub const LogicalShiftedRegisterShift = enum(u2) { lsl, lsr, asr, ror };
fn logicalShiftedRegister(
opc: u2,
n: u1,
rd: Register,
rn: Register,
rm: Register,
shift: LogicalShiftedRegisterShift,
amount: u6,
) Instruction {
assert(rd.size() == rn.size());
assert(rd.size() == rm.size());
if (rd.size() == 32) assert(amount < 32);
return Instruction{
.logical_shifted_register = .{
.rd = rd.enc(),
.rn = rn.enc(),
.imm6 = amount,
.rm = rm.enc(),
.n = n,
.shift = @intFromEnum(shift),
.opc = opc,
.sf = switch (rd.size()) {
32 => 0b0,
64 => 0b1,
else => unreachable,
},
},
};
}
fn addSubtractImmediate(
op: u1,
s: u1,
rd: Register,
rn: Register,
imm12: u12,
shift: bool,
) Instruction {
assert(rd.size() == rn.size());
assert(rn.id() != Register.xzr.id());
return Instruction{
.add_subtract_immediate = .{
.rd = rd.enc(),
.rn = rn.enc(),
.imm12 = imm12,
.sh = @intFromBool(shift),
.s = s,
.op = op,
.sf = switch (rd.size()) {
32 => 0b0,
64 => 0b1,
else => unreachable, // unexpected register size
},
},
};
}
fn logicalImmediate(
opc: u2,
rd: Register,
rn: Register,
imms: u6,
immr: u6,
n: u1,
) Instruction {
assert(rd.size() == rn.size());
assert(!(rd.size() == 32 and n != 0));
return Instruction{
.logical_immediate = .{
.rd = rd.enc(),
.rn = rn.enc(),
.imms = imms,
.immr = immr,
.n = n,
.opc = opc,
.sf = switch (rd.size()) {
32 => 0b0,
64 => 0b1,
else => unreachable, // unexpected register size
},
},
};
}
fn initBitfield(
opc: u2,
n: u1,
rd: Register,
rn: Register,
immr: u6,
imms: u6,
) Instruction {
assert(rd.size() == rn.size());
assert(!(rd.size() == 64 and n != 1));
assert(!(rd.size() == 32 and (n != 0 or immr >> 5 != 0 or immr >> 5 != 0)));
return Instruction{
.bitfield = .{
.rd = rd.enc(),
.rn = rn.enc(),
.imms = imms,
.immr = immr,
.n = n,
.opc = opc,
.sf = switch (rd.size()) {
32 => 0b0,
64 => 0b1,
else => unreachable, // unexpected register size
},
},
};
}
pub const AddSubtractShiftedRegisterShift = enum(u2) { lsl, lsr, asr, _ };
fn addSubtractShiftedRegister(
op: u1,
s: u1,
shift: AddSubtractShiftedRegisterShift,
rd: Register,
rn: Register,
rm: Register,
imm6: u6,
) Instruction {
assert(rd.size() == rn.size());
assert(rd.size() == rm.size());
return Instruction{
.add_subtract_shifted_register = .{
.rd = rd.enc(),
.rn = rn.enc(),
.imm6 = imm6,
.rm = rm.enc(),
.shift = @intFromEnum(shift),
.s = s,
.op = op,
.sf = switch (rd.size()) {
32 => 0b0,
64 => 0b1,
else => unreachable, // unexpected register size
},
},
};
}
pub const AddSubtractExtendedRegisterOption = enum(u3) {
uxtb,
uxth,
uxtw,
uxtx, // serves also as lsl
sxtb,
sxth,
sxtw,
sxtx,
};
fn addSubtractExtendedRegister(
op: u1,
s: u1,
rd: Register,
rn: Register,
rm: Register,
extend: AddSubtractExtendedRegisterOption,
imm3: u3,
) Instruction {
return Instruction{
.add_subtract_extended_register = .{
.rd = rd.enc(),
.rn = rn.enc(),
.imm3 = imm3,
.option = @intFromEnum(extend),
.rm = rm.enc(),
.s = s,
.op = op,
.sf = switch (rd.size()) {
32 => 0b0,
64 => 0b1,
else => unreachable, // unexpected register size
},
},
};
}
fn conditionalBranch(
o0: u1,
o1: u1,
cond: Condition,
offset: i21,
) Instruction {
assert(offset & 0b11 == 0b00);
return Instruction{
.conditional_branch = .{
.cond = @intFromEnum(cond),
.o0 = o0,
.imm19 = @as(u19, @bitCast(@as(i19, @intCast(offset >> 2)))),
.o1 = o1,
},
};
}
fn compareAndBranch(
op: u1,
rt: Register,
offset: i21,
) Instruction {
assert(offset & 0b11 == 0b00);
return Instruction{
.compare_and_branch = .{
.rt = rt.enc(),
.imm19 = @as(u19, @bitCast(@as(i19, @intCast(offset >> 2)))),
.op = op,
.sf = switch (rt.size()) {
32 => 0b0,
64 => 0b1,
else => unreachable, // unexpected register size
},
},
};
}
fn conditionalSelect(
op2: u2,
op: u1,
s: u1,
rd: Register,
rn: Register,
rm: Register,
cond: Condition,
) Instruction {
assert(rd.size() == rn.size());
assert(rd.size() == rm.size());
return Instruction{
.conditional_select = .{
.rd = rd.enc(),
.rn = rn.enc(),
.op2 = op2,
.cond = @intFromEnum(cond),
.rm = rm.enc(),
.s = s,
.op = op,
.sf = switch (rd.size()) {
32 => 0b0,
64 => 0b1,
else => unreachable, // unexpected register size
},
},
};
}
fn dataProcessing3Source(
op54: u2,
op31: u3,
o0: u1,
rd: Register,
rn: Register,
rm: Register,
ra: Register,
) Instruction {
return Instruction{
.data_processing_3_source = .{
.rd = rd.enc(),
.rn = rn.enc(),
.ra = ra.enc(),
.o0 = o0,
.rm = rm.enc(),
.op31 = op31,
.op54 = op54,
.sf = switch (rd.size()) {
32 => 0b0,
64 => 0b1,
else => unreachable, // unexpected register size
},
},
};
}
fn dataProcessing2Source(
s: u1,
opcode: u6,
rd: Register,
rn: Register,
rm: Register,
) Instruction {
assert(rd.size() == rn.size());
assert(rd.size() == rm.size());
return Instruction{
.data_processing_2_source = .{
.rd = rd.enc(),
.rn = rn.enc(),
.opcode = opcode,
.rm = rm.enc(),
.s = s,
.sf = switch (rd.size()) {
32 => 0b0,
64 => 0b1,
else => unreachable, // unexpected register size
},
},
};
}
// Helper functions for assembly syntax functions
// Move wide (immediate)
pub fn movn(rd: Register, imm16: u16, shift: u6) Instruction {
return moveWideImmediate(0b00, rd, imm16, shift);
}
pub fn movz(rd: Register, imm16: u16, shift: u6) Instruction {
return moveWideImmediate(0b10, rd, imm16, shift);
}
pub fn movk(rd: Register, imm16: u16, shift: u6) Instruction {
return moveWideImmediate(0b11, rd, imm16, shift);
}
// PC relative address
pub fn adr(rd: Register, imm21: i21) Instruction {
return pcRelativeAddress(rd, imm21, 0b0);
}
pub fn adrp(rd: Register, imm21: i21) Instruction {
return pcRelativeAddress(rd, imm21, 0b1);
}
// Load or store register
pub fn ldrLiteral(rt: Register, literal: u19) Instruction {
return loadLiteral(rt, literal);
}
pub fn ldr(rt: Register, rn: Register, offset: LoadStoreOffset) Instruction {
return loadStoreRegister(rt, rn, offset, .ldr);
}
pub fn ldrh(rt: Register, rn: Register, offset: LoadStoreOffset) Instruction {
return loadStoreRegister(rt, rn, offset, .ldrh);
}
pub fn ldrb(rt: Register, rn: Register, offset: LoadStoreOffset) Instruction {
return loadStoreRegister(rt, rn, offset, .ldrb);
}
pub fn ldrsb(rt: Register, rn: Register, offset: LoadStoreOffset) Instruction {
return loadStoreRegister(rt, rn, offset, .ldrsb);
}
pub fn ldrsh(rt: Register, rn: Register, offset: LoadStoreOffset) Instruction {
return loadStoreRegister(rt, rn, offset, .ldrsh);
}
pub fn ldrsw(rt: Register, rn: Register, offset: LoadStoreOffset) Instruction {
return loadStoreRegister(rt, rn, offset, .ldrsw);
}
pub fn str(rt: Register, rn: Register, offset: LoadStoreOffset) Instruction {
return loadStoreRegister(rt, rn, offset, .str);
}
pub fn strh(rt: Register, rn: Register, offset: LoadStoreOffset) Instruction {
return loadStoreRegister(rt, rn, offset, .strh);
}
pub fn strb(rt: Register, rn: Register, offset: LoadStoreOffset) Instruction {
return loadStoreRegister(rt, rn, offset, .strb);
}
// Load or store pair of registers
pub const LoadStorePairOffset = struct {
encoding: enum(u2) {
post_index = 0b01,
signed = 0b10,
pre_index = 0b11,
},
offset: i9,
pub fn none() LoadStorePairOffset {
return .{ .encoding = .signed, .offset = 0 };
}
pub fn post_index(imm: i9) LoadStorePairOffset {
return .{ .encoding = .post_index, .offset = imm };
}
pub fn pre_index(imm: i9) LoadStorePairOffset {
return .{ .encoding = .pre_index, .offset = imm };
}
pub fn signed(imm: i9) LoadStorePairOffset {
return .{ .encoding = .signed, .offset = imm };
}
};
pub fn ldp(rt1: Register, rt2: Register, rn: Register, offset: LoadStorePairOffset) Instruction {
return loadStoreRegisterPair(rt1, rt2, rn, offset.offset, @intFromEnum(offset.encoding), true);
}
pub fn ldnp(rt1: Register, rt2: Register, rn: Register, offset: i9) Instruction {
return loadStoreRegisterPair(rt1, rt2, rn, offset, 0, true);
}
pub fn stp(rt1: Register, rt2: Register, rn: Register, offset: LoadStorePairOffset) Instruction {
return loadStoreRegisterPair(rt1, rt2, rn, offset.offset, @intFromEnum(offset.encoding), false);
}
pub fn stnp(rt1: Register, rt2: Register, rn: Register, offset: i9) Instruction {
return loadStoreRegisterPair(rt1, rt2, rn, offset, 0, false);
}
// Exception generation
pub fn svc(imm16: u16) Instruction {
return exceptionGeneration(0b000, 0b000, 0b01, imm16);
}
pub fn hvc(imm16: u16) Instruction {
return exceptionGeneration(0b000, 0b000, 0b10, imm16);
}
pub fn smc(imm16: u16) Instruction {
return exceptionGeneration(0b000, 0b000, 0b11, imm16);
}
pub fn brk(imm16: u16) Instruction {
return exceptionGeneration(0b001, 0b000, 0b00, imm16);
}
pub fn hlt(imm16: u16) Instruction {
return exceptionGeneration(0b010, 0b000, 0b00, imm16);
}
// Unconditional branch (register)
pub fn br(rn: Register) Instruction {
return unconditionalBranchRegister(0b0000, 0b11111, 0b000000, rn, 0b00000);
}
pub fn blr(rn: Register) Instruction {
return unconditionalBranchRegister(0b0001, 0b11111, 0b000000, rn, 0b00000);
}
pub fn ret(rn: ?Register) Instruction {
return unconditionalBranchRegister(0b0010, 0b11111, 0b000000, rn orelse .x30, 0b00000);
}
// Unconditional branch (immediate)
pub fn b(offset: i28) Instruction {
return unconditionalBranchImmediate(0, offset);
}
pub fn bl(offset: i28) Instruction {
return unconditionalBranchImmediate(1, offset);
}
// Nop
pub fn nop() Instruction {
return Instruction{ .no_operation = .{} };
}
// Logical (shifted register)
pub fn andShiftedRegister(
rd: Register,
rn: Register,
rm: Register,
shift: LogicalShiftedRegisterShift,
amount: u6,
) Instruction {
return logicalShiftedRegister(0b00, 0b0, rd, rn, rm, shift, amount);
}
pub fn bicShiftedRegister(
rd: Register,
rn: Register,
rm: Register,
shift: LogicalShiftedRegisterShift,
amount: u6,
) Instruction {
return logicalShiftedRegister(0b00, 0b1, rd, rn, rm, shift, amount);
}
pub fn orrShiftedRegister(
rd: Register,
rn: Register,
rm: Register,
shift: LogicalShiftedRegisterShift,
amount: u6,
) Instruction {
return logicalShiftedRegister(0b01, 0b0, rd, rn, rm, shift, amount);
}
pub fn ornShiftedRegister(
rd: Register,
rn: Register,
rm: Register,
shift: LogicalShiftedRegisterShift,
amount: u6,
) Instruction {
return logicalShiftedRegister(0b01, 0b1, rd, rn, rm, shift, amount);
}
pub fn eorShiftedRegister(
rd: Register,
rn: Register,
rm: Register,
shift: LogicalShiftedRegisterShift,
amount: u6,
) Instruction {
return logicalShiftedRegister(0b10, 0b0, rd, rn, rm, shift, amount);
}
pub fn eonShiftedRegister(
rd: Register,
rn: Register,
rm: Register,
shift: LogicalShiftedRegisterShift,
amount: u6,
) Instruction {
return logicalShiftedRegister(0b10, 0b1, rd, rn, rm, shift, amount);
}
pub fn andsShiftedRegister(
rd: Register,
rn: Register,
rm: Register,
shift: LogicalShiftedRegisterShift,
amount: u6,
) Instruction {
return logicalShiftedRegister(0b11, 0b0, rd, rn, rm, shift, amount);
}
pub fn bicsShiftedRegister(
rd: Register,
rn: Register,
rm: Register,
shift: LogicalShiftedRegisterShift,
amount: u6,
) Instruction {
return logicalShiftedRegister(0b11, 0b1, rd, rn, rm, shift, amount);
}
// Add/subtract (immediate)
pub fn add(rd: Register, rn: Register, imm: u12, shift: bool) Instruction {
return addSubtractImmediate(0b0, 0b0, rd, rn, imm, shift);
}
pub fn adds(rd: Register, rn: Register, imm: u12, shift: bool) Instruction {
return addSubtractImmediate(0b0, 0b1, rd, rn, imm, shift);
}
pub fn sub(rd: Register, rn: Register, imm: u12, shift: bool) Instruction {
return addSubtractImmediate(0b1, 0b0, rd, rn, imm, shift);
}
pub fn subs(rd: Register, rn: Register, imm: u12, shift: bool) Instruction {
return addSubtractImmediate(0b1, 0b1, rd, rn, imm, shift);
}
// Logical (immediate)
pub fn andImmediate(rd: Register, rn: Register, imms: u6, immr: u6, n: u1) Instruction {
return logicalImmediate(0b00, rd, rn, imms, immr, n);
}
pub fn orrImmediate(rd: Register, rn: Register, imms: u6, immr: u6, n: u1) Instruction {
return logicalImmediate(0b01, rd, rn, imms, immr, n);
}
pub fn eorImmediate(rd: Register, rn: Register, imms: u6, immr: u6, n: u1) Instruction {
return logicalImmediate(0b10, rd, rn, imms, immr, n);
}
pub fn andsImmediate(rd: Register, rn: Register, imms: u6, immr: u6, n: u1) Instruction {
return logicalImmediate(0b11, rd, rn, imms, immr, n);
}
// Bitfield
pub fn sbfm(rd: Register, rn: Register, immr: u6, imms: u6) Instruction {
const n: u1 = switch (rd.size()) {
32 => 0b0,
64 => 0b1,
else => unreachable, // unexpected register size
};
return initBitfield(0b00, n, rd, rn, immr, imms);
}
pub fn bfm(rd: Register, rn: Register, immr: u6, imms: u6) Instruction {
const n: u1 = switch (rd.size()) {
32 => 0b0,
64 => 0b1,
else => unreachable, // unexpected register size
};
return initBitfield(0b01, n, rd, rn, immr, imms);
}
pub fn ubfm(rd: Register, rn: Register, immr: u6, imms: u6) Instruction {
const n: u1 = switch (rd.size()) {
32 => 0b0,
64 => 0b1,
else => unreachable, // unexpected register size
};
return initBitfield(0b10, n, rd, rn, immr, imms);
}
pub fn asrImmediate(rd: Register, rn: Register, shift: u6) Instruction {
const imms = @as(u6, @intCast(rd.size() - 1));
return sbfm(rd, rn, shift, imms);
}
pub fn sbfx(rd: Register, rn: Register, lsb: u6, width: u7) Instruction {
return sbfm(rd, rn, lsb, @as(u6, @intCast(lsb + width - 1)));
}
pub fn sxtb(rd: Register, rn: Register) Instruction {
return sbfm(rd, rn, 0, 7);
}
pub fn sxth(rd: Register, rn: Register) Instruction {
return sbfm(rd, rn, 0, 15);
}
pub fn sxtw(rd: Register, rn: Register) Instruction {
assert(rd.size() == 64);
return sbfm(rd, rn, 0, 31);
}
pub fn lslImmediate(rd: Register, rn: Register, shift: u6) Instruction {
const size = @as(u6, @intCast(rd.size() - 1));
return ubfm(rd, rn, size - shift + 1, size - shift);
}
pub fn lsrImmediate(rd: Register, rn: Register, shift: u6) Instruction {
const imms = @as(u6, @intCast(rd.size() - 1));
return ubfm(rd, rn, shift, imms);
}
pub fn ubfx(rd: Register, rn: Register, lsb: u6, width: u7) Instruction {
return ubfm(rd, rn, lsb, @as(u6, @intCast(lsb + width - 1)));
}
pub fn uxtb(rd: Register, rn: Register) Instruction {
return ubfm(rd, rn, 0, 7);
}
pub fn uxth(rd: Register, rn: Register) Instruction {
return ubfm(rd, rn, 0, 15);
}
// Add/subtract (shifted register)
pub fn addShiftedRegister(
rd: Register,
rn: Register,
rm: Register,
shift: AddSubtractShiftedRegisterShift,
imm6: u6,
) Instruction {
return addSubtractShiftedRegister(0b0, 0b0, shift, rd, rn, rm, imm6);
}
pub fn addsShiftedRegister(
rd: Register,
rn: Register,
rm: Register,
shift: AddSubtractShiftedRegisterShift,
imm6: u6,
) Instruction {
return addSubtractShiftedRegister(0b0, 0b1, shift, rd, rn, rm, imm6);
}
pub fn subShiftedRegister(
rd: Register,
rn: Register,
rm: Register,
shift: AddSubtractShiftedRegisterShift,
imm6: u6,
) Instruction {
return addSubtractShiftedRegister(0b1, 0b0, shift, rd, rn, rm, imm6);
}
pub fn subsShiftedRegister(
rd: Register,
rn: Register,
rm: Register,
shift: AddSubtractShiftedRegisterShift,
imm6: u6,
) Instruction {
return addSubtractShiftedRegister(0b1, 0b1, shift, rd, rn, rm, imm6);
}
// Add/subtract (extended register)
pub fn addExtendedRegister(
rd: Register,
rn: Register,
rm: Register,
extend: AddSubtractExtendedRegisterOption,
imm3: u3,
) Instruction {
return addSubtractExtendedRegister(0b0, 0b0, rd, rn, rm, extend, imm3);
}
pub fn addsExtendedRegister(
rd: Register,
rn: Register,
rm: Register,
extend: AddSubtractExtendedRegisterOption,
imm3: u3,
) Instruction {
return addSubtractExtendedRegister(0b0, 0b1, rd, rn, rm, extend, imm3);
}
pub fn subExtendedRegister(
rd: Register,
rn: Register,
rm: Register,
extend: AddSubtractExtendedRegisterOption,
imm3: u3,
) Instruction {
return addSubtractExtendedRegister(0b1, 0b0, rd, rn, rm, extend, imm3);
}
pub fn subsExtendedRegister(
rd: Register,
rn: Register,
rm: Register,
extend: AddSubtractExtendedRegisterOption,
imm3: u3,
) Instruction {
return addSubtractExtendedRegister(0b1, 0b1, rd, rn, rm, extend, imm3);
}
// Conditional branch
pub fn bCond(cond: Condition, offset: i21) Instruction {
return conditionalBranch(0b0, 0b0, cond, offset);
}
// Compare and branch
pub fn cbz(rt: Register, offset: i21) Instruction {
return compareAndBranch(0b0, rt, offset);
}
pub fn cbnz(rt: Register, offset: i21) Instruction {
return compareAndBranch(0b1, rt, offset);
}
// Conditional select
pub fn csel(rd: Register, rn: Register, rm: Register, cond: Condition) Instruction {
return conditionalSelect(0b00, 0b0, 0b0, rd, rn, rm, cond);
}
pub fn csinc(rd: Register, rn: Register, rm: Register, cond: Condition) Instruction {
return conditionalSelect(0b01, 0b0, 0b0, rd, rn, rm, cond);
}
pub fn csinv(rd: Register, rn: Register, rm: Register, cond: Condition) Instruction {
return conditionalSelect(0b00, 0b1, 0b0, rd, rn, rm, cond);
}
pub fn csneg(rd: Register, rn: Register, rm: Register, cond: Condition) Instruction {
return conditionalSelect(0b01, 0b1, 0b0, rd, rn, rm, cond);
}
// Data processing (3 source)
pub fn madd(rd: Register, rn: Register, rm: Register, ra: Register) Instruction {
return dataProcessing3Source(0b00, 0b000, 0b0, rd, rn, rm, ra);
}
pub fn smaddl(rd: Register, rn: Register, rm: Register, ra: Register) Instruction {
assert(rd.size() == 64 and rn.size() == 32 and rm.size() == 32 and ra.size() == 64);
return dataProcessing3Source(0b00, 0b001, 0b0, rd, rn, rm, ra);
}
pub fn umaddl(rd: Register, rn: Register, rm: Register, ra: Register) Instruction {
assert(rd.size() == 64 and rn.size() == 32 and rm.size() == 32 and ra.size() == 64);
return dataProcessing3Source(0b00, 0b101, 0b0, rd, rn, rm, ra);
}
pub fn msub(rd: Register, rn: Register, rm: Register, ra: Register) Instruction {
return dataProcessing3Source(0b00, 0b000, 0b1, rd, rn, rm, ra);
}
pub fn mul(rd: Register, rn: Register, rm: Register) Instruction {
return madd(rd, rn, rm, .xzr);
}
pub fn smull(rd: Register, rn: Register, rm: Register) Instruction {
return smaddl(rd, rn, rm, .xzr);
}
pub fn smulh(rd: Register, rn: Register, rm: Register) Instruction {
assert(rd.size() == 64);
return dataProcessing3Source(0b00, 0b010, 0b0, rd, rn, rm, .xzr);
}
pub fn umull(rd: Register, rn: Register, rm: Register) Instruction {
return umaddl(rd, rn, rm, .xzr);
}
pub fn umulh(rd: Register, rn: Register, rm: Register) Instruction {
assert(rd.size() == 64);
return dataProcessing3Source(0b00, 0b110, 0b0, rd, rn, rm, .xzr);
}
pub fn mneg(rd: Register, rn: Register, rm: Register) Instruction {
return msub(rd, rn, rm, .xzr);
}
// Data processing (2 source)
pub fn udiv(rd: Register, rn: Register, rm: Register) Instruction {
return dataProcessing2Source(0b0, 0b000010, rd, rn, rm);
}
pub fn sdiv(rd: Register, rn: Register, rm: Register) Instruction {
return dataProcessing2Source(0b0, 0b000011, rd, rn, rm);
}
pub fn lslv(rd: Register, rn: Register, rm: Register) Instruction {
return dataProcessing2Source(0b0, 0b001000, rd, rn, rm);
}
pub fn lsrv(rd: Register, rn: Register, rm: Register) Instruction {
return dataProcessing2Source(0b0, 0b001001, rd, rn, rm);
}
pub fn asrv(rd: Register, rn: Register, rm: Register) Instruction {
return dataProcessing2Source(0b0, 0b001010, rd, rn, rm);
}
pub const asrRegister = asrv;
pub const lslRegister = lslv;
pub const lsrRegister = lsrv;
};
test {
testing.refAllDecls(@This());
}
test "serialize instructions" {
const Testcase = struct {
inst: Instruction,
expected: u32,
};
const testcases = [_]Testcase{
.{ // orr x0, xzr, x1
.inst = Instruction.orrShiftedRegister(.x0, .xzr, .x1, .lsl, 0),
.expected = 0b1_01_01010_00_0_00001_000000_11111_00000,
},
.{ // orn x0, xzr, x1
.inst = Instruction.ornShiftedRegister(.x0, .xzr, .x1, .lsl, 0),
.expected = 0b1_01_01010_00_1_00001_000000_11111_00000,
},
.{ // movz x1, #4
.inst = Instruction.movz(.x1, 4, 0),
.expected = 0b1_10_100101_00_0000000000000100_00001,
},
.{ // movz x1, #4, lsl 16
.inst = Instruction.movz(.x1, 4, 16),
.expected = 0b1_10_100101_01_0000000000000100_00001,
},
.{ // movz x1, #4, lsl 32
.inst = Instruction.movz(.x1, 4, 32),
.expected = 0b1_10_100101_10_0000000000000100_00001,
},
.{ // movz x1, #4, lsl 48
.inst = Instruction.movz(.x1, 4, 48),
.expected = 0b1_10_100101_11_0000000000000100_00001,
},
.{ // movz w1, #4
.inst = Instruction.movz(.w1, 4, 0),
.expected = 0b0_10_100101_00_0000000000000100_00001,
},
.{ // movz w1, #4, lsl 16
.inst = Instruction.movz(.w1, 4, 16),
.expected = 0b0_10_100101_01_0000000000000100_00001,
},
.{ // svc #0
.inst = Instruction.svc(0),
.expected = 0b1101_0100_000_0000000000000000_00001,
},
.{ // svc #0x80 ; typical on Darwin
.inst = Instruction.svc(0x80),
.expected = 0b1101_0100_000_0000000010000000_00001,
},
.{ // ret
.inst = Instruction.ret(null),
.expected = 0b1101_011_00_10_11111_0000_00_11110_00000,
},
.{ // bl #0x10
.inst = Instruction.bl(0x10),
.expected = 0b1_00101_00_0000_0000_0000_0000_0000_0100,
},
.{ // ldr x2, [x1]
.inst = Instruction.ldr(.x2, .x1, Instruction.LoadStoreOffset.none),
.expected = 0b11_111_0_01_01_000000000000_00001_00010,
},
.{ // ldr x2, [x1, #1]!
.inst = Instruction.ldr(.x2, .x1, Instruction.LoadStoreOffset.imm_pre_index(1)),
.expected = 0b11_111_0_00_01_0_000000001_11_00001_00010,
},
.{ // ldr x2, [x1], #-1
.inst = Instruction.ldr(.x2, .x1, Instruction.LoadStoreOffset.imm_post_index(-1)),
.expected = 0b11_111_0_00_01_0_111111111_01_00001_00010,
},
.{ // ldr x2, [x1], (x3)
.inst = Instruction.ldr(.x2, .x1, Instruction.LoadStoreOffset.reg(.x3)),
.expected = 0b11_111_0_00_01_1_00011_011_0_10_00001_00010,
},
.{ // ldr x2, label
.inst = Instruction.ldrLiteral(.x2, 0x1),
.expected = 0b01_011_0_00_0000000000000000001_00010,
},
.{ // ldrh x7, [x4], #0xaa
.inst = Instruction.ldrh(.x7, .x4, Instruction.LoadStoreOffset.imm_post_index(0xaa)),
.expected = 0b01_111_0_00_01_0_010101010_01_00100_00111,
},
.{ // ldrb x9, [x15, #0xff]!
.inst = Instruction.ldrb(.x9, .x15, Instruction.LoadStoreOffset.imm_pre_index(0xff)),
.expected = 0b00_111_0_00_01_0_011111111_11_01111_01001,
},
.{ // str x2, [x1]
.inst = Instruction.str(.x2, .x1, Instruction.LoadStoreOffset.none),
.expected = 0b11_111_0_01_00_000000000000_00001_00010,
},
.{ // str x2, [x1], (x3)
.inst = Instruction.str(.x2, .x1, Instruction.LoadStoreOffset.reg(.x3)),
.expected = 0b11_111_0_00_00_1_00011_011_0_10_00001_00010,
},
.{ // strh w0, [x1]
.inst = Instruction.strh(.w0, .x1, Instruction.LoadStoreOffset.none),
.expected = 0b01_111_0_01_00_000000000000_00001_00000,
},
.{ // strb w8, [x9]
.inst = Instruction.strb(.w8, .x9, Instruction.LoadStoreOffset.none),
.expected = 0b00_111_0_01_00_000000000000_01001_01000,
},
.{ // adr x2, #0x8
.inst = Instruction.adr(.x2, 0x8),
.expected = 0b0_00_10000_0000000000000000010_00010,
},
.{ // adr x2, -#0x8
.inst = Instruction.adr(.x2, -0x8),
.expected = 0b0_00_10000_1111111111111111110_00010,
},
.{ // adrp x2, #0x8
.inst = Instruction.adrp(.x2, 0x8),
.expected = 0b1_00_10000_0000000000000000010_00010,
},
.{ // adrp x2, -#0x8
.inst = Instruction.adrp(.x2, -0x8),
.expected = 0b1_00_10000_1111111111111111110_00010,
},
.{ // stp x1, x2, [sp, #8]
.inst = Instruction.stp(.x1, .x2, .sp, Instruction.LoadStorePairOffset.signed(8)),
.expected = 0b10_101_0_010_0_0000001_00010_11111_00001,
},
.{ // ldp x1, x2, [sp, #8]
.inst = Instruction.ldp(.x1, .x2, .sp, Instruction.LoadStorePairOffset.signed(8)),
.expected = 0b10_101_0_010_1_0000001_00010_11111_00001,
},
.{ // stp x1, x2, [sp, #-16]!
.inst = Instruction.stp(.x1, .x2, .sp, Instruction.LoadStorePairOffset.pre_index(-16)),
.expected = 0b10_101_0_011_0_1111110_00010_11111_00001,
},
.{ // ldp x1, x2, [sp], #16
.inst = Instruction.ldp(.x1, .x2, .sp, Instruction.LoadStorePairOffset.post_index(16)),
.expected = 0b10_101_0_001_1_0000010_00010_11111_00001,
},
.{ // and x0, x4, x2
.inst = Instruction.andShiftedRegister(.x0, .x4, .x2, .lsl, 0),
.expected = 0b1_00_01010_00_0_00010_000000_00100_00000,
},
.{ // and x0, x4, x2, lsl #0x8
.inst = Instruction.andShiftedRegister(.x0, .x4, .x2, .lsl, 0x8),
.expected = 0b1_00_01010_00_0_00010_001000_00100_00000,
},
.{ // add x0, x10, #10
.inst = Instruction.add(.x0, .x10, 10, false),
.expected = 0b1_0_0_100010_0_0000_0000_1010_01010_00000,
},
.{ // subs x0, x5, #11, lsl #12
.inst = Instruction.subs(.x0, .x5, 11, true),
.expected = 0b1_1_1_100010_1_0000_0000_1011_00101_00000,
},
.{ // b.hi #-4
.inst = Instruction.bCond(.hi, -4),
.expected = 0b0101010_0_1111111111111111111_0_1000,
},
.{ // cbz x10, #40
.inst = Instruction.cbz(.x10, 40),
.expected = 0b1_011010_0_0000000000000001010_01010,
},
.{ // add x0, x1, x2, lsl #5
.inst = Instruction.addShiftedRegister(.x0, .x1, .x2, .lsl, 5),
.expected = 0b1_0_0_01011_00_0_00010_000101_00001_00000,
},
.{ // csinc x1, x2, x4, eq
.inst = Instruction.csinc(.x1, .x2, .x4, .eq),
.expected = 0b1_0_0_11010100_00100_0000_0_1_00010_00001,
},
.{ // mul x1, x4, x9
.inst = Instruction.mul(.x1, .x4, .x9),
.expected = 0b1_00_11011_000_01001_0_11111_00100_00001,
},
.{ // eor x3, x5, #1
.inst = Instruction.eorImmediate(.x3, .x5, 0b000000, 0b000000, 0b1),
.expected = 0b1_10_100100_1_000000_000000_00101_00011,
},
.{ // lslv x6, x9, x10
.inst = Instruction.lslv(.x6, .x9, .x10),
.expected = 0b1_0_0_11010110_01010_0010_00_01001_00110,
},
.{ // lsl x4, x2, #42
.inst = Instruction.lslImmediate(.x4, .x2, 42),
.expected = 0b1_10_100110_1_010110_010101_00010_00100,
},
.{ // lsl x4, x2, #63
.inst = Instruction.lslImmediate(.x4, .x2, 63),
.expected = 0b1_10_100110_1_000001_000000_00010_00100,
},
.{ // lsr x4, x2, #42
.inst = Instruction.lsrImmediate(.x4, .x2, 42),
.expected = 0b1_10_100110_1_101010_111111_00010_00100,
},
.{ // lsr x4, x2, #63
.inst = Instruction.lsrImmediate(.x4, .x2, 63),
.expected = 0b1_10_100110_1_111111_111111_00010_00100,
},
.{ // umull x0, w0, w1
.inst = Instruction.umull(.x0, .w0, .w1),
.expected = 0b1_00_11011_1_01_00001_0_11111_00000_00000,
},
.{ // smull x0, w0, w1
.inst = Instruction.smull(.x0, .w0, .w1),
.expected = 0b1_00_11011_0_01_00001_0_11111_00000_00000,
},
.{ // tst x0, #0xffffffff00000000
.inst = Instruction.andsImmediate(.xzr, .x0, 0b011111, 0b100000, 0b1),
.expected = 0b1_11_100100_1_100000_011111_00000_11111,
},
.{ // umulh x0, x1, x2
.inst = Instruction.umulh(.x0, .x1, .x2),
.expected = 0b1_00_11011_1_10_00010_0_11111_00001_00000,
},
.{ // smulh x0, x1, x2
.inst = Instruction.smulh(.x0, .x1, .x2),
.expected = 0b1_00_11011_0_10_00010_0_11111_00001_00000,
},
.{ // adds x0, x1, x2, sxtx
.inst = Instruction.addsExtendedRegister(.x0, .x1, .x2, .sxtx, 0),
.expected = 0b1_0_1_01011_00_1_00010_111_000_00001_00000,
},
};
for (testcases) |case| {
const actual = case.inst.toU32();
try testing.expectEqual(case.expected, actual);
}
}
+5 -5
View File
@@ -744,7 +744,7 @@ pub fn generate(
src_loc: Zcu.LazySrcLoc,
func_index: InternPool.Index,
air: *const Air,
liveness: *const Air.Liveness,
liveness: *const ?Air.Liveness,
) CodeGenError!Mir {
const zcu = pt.zcu;
const gpa = zcu.gpa;
@@ -767,7 +767,7 @@ pub fn generate(
.pt = pt,
.mod = mod,
.bin_file = bin_file,
.liveness = liveness.*,
.liveness = liveness.*.?,
.target = &mod.resolved_target.result,
.owner = .{ .nav_index = func.owner_nav },
.args = undefined, // populated after `resolveCallingConventionValues`
@@ -4584,7 +4584,7 @@ fn structFieldPtr(func: *Func, inst: Air.Inst.Index, operand: Air.Inst.Ref, inde
const field_offset: i32 = switch (container_ty.containerLayout(zcu)) {
.auto, .@"extern" => @intCast(container_ty.structFieldOffset(index, zcu)),
.@"packed" => @divExact(@as(i32, ptr_container_ty.ptrInfo(zcu).packed_offset.bit_offset) +
(if (zcu.typeToStruct(container_ty)) |struct_obj| pt.structPackedFieldBitOffset(struct_obj, index) else 0) -
(if (zcu.typeToStruct(container_ty)) |struct_obj| zcu.structPackedFieldBitOffset(struct_obj, index) else 0) -
ptr_field_ty.ptrInfo(zcu).packed_offset.bit_offset, 8),
};
@@ -4615,7 +4615,7 @@ fn airStructFieldVal(func: *Func, inst: Air.Inst.Index) !void {
const field_off: u32 = switch (struct_ty.containerLayout(zcu)) {
.auto, .@"extern" => @intCast(struct_ty.structFieldOffset(index, zcu) * 8),
.@"packed" => if (zcu.typeToStruct(struct_ty)) |struct_type|
pt.structPackedFieldBitOffset(struct_type, index)
zcu.structPackedFieldBitOffset(struct_type, index)
else
0,
};
@@ -8059,7 +8059,7 @@ fn airAggregateInit(func: *Func, inst: Air.Inst.Index) !void {
const elem_abi_size: u32 = @intCast(elem_ty.abiSize(zcu));
const elem_abi_bits = elem_abi_size * 8;
const elem_off = pt.structPackedFieldBitOffset(struct_obj, elem_i);
const elem_off = zcu.structPackedFieldBitOffset(struct_obj, elem_i);
const elem_byte_off: i32 = @intCast(elem_off / elem_abi_bits * elem_abi_size);
const elem_bit_off = elem_off % elem_abi_bits;
const elem_mcv = try func.resolveInst(elem);
+2 -2
View File
@@ -267,7 +267,7 @@ pub fn generate(
src_loc: Zcu.LazySrcLoc,
func_index: InternPool.Index,
air: *const Air,
liveness: *const Air.Liveness,
liveness: *const ?Air.Liveness,
) CodeGenError!Mir {
const zcu = pt.zcu;
const gpa = zcu.gpa;
@@ -288,7 +288,7 @@ pub fn generate(
.gpa = gpa,
.pt = pt,
.air = air.*,
.liveness = liveness.*,
.liveness = liveness.*.?,
.target = target,
.bin_file = lf,
.func_index = func_index,
+14 -17
View File
@@ -1173,7 +1173,7 @@ pub fn generate(
src_loc: Zcu.LazySrcLoc,
func_index: InternPool.Index,
air: *const Air,
liveness: *const Air.Liveness,
liveness: *const ?Air.Liveness,
) Error!Mir {
_ = src_loc;
_ = bin_file;
@@ -1194,7 +1194,7 @@ pub fn generate(
.gpa = gpa,
.pt = pt,
.air = air.*,
.liveness = liveness.*,
.liveness = liveness.*.?,
.owner_nav = cg.owner_nav,
.target = target,
.ptr_size = switch (target.cpu.arch) {
@@ -1886,8 +1886,10 @@ fn genInst(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
.call_never_tail => cg.airCall(inst, .never_tail),
.call_never_inline => cg.airCall(inst, .never_inline),
.is_err => cg.airIsErr(inst, .i32_ne),
.is_non_err => cg.airIsErr(inst, .i32_eq),
.is_err => cg.airIsErr(inst, .i32_ne, .value),
.is_non_err => cg.airIsErr(inst, .i32_eq, .value),
.is_err_ptr => cg.airIsErr(inst, .i32_ne, .ptr),
.is_non_err_ptr => cg.airIsErr(inst, .i32_eq, .ptr),
.is_null => cg.airIsNull(inst, .i32_eq, .value),
.is_non_null => cg.airIsNull(inst, .i32_ne, .value),
@@ -1970,8 +1972,6 @@ fn genInst(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
.runtime_nav_ptr => cg.airRuntimeNavPtr(inst),
.assembly,
.is_err_ptr,
.is_non_err_ptr,
.err_return_trace,
.set_err_return_trace,
@@ -3776,7 +3776,7 @@ fn structFieldPtr(
break :offset @as(u32, 0);
}
const struct_type = zcu.typeToStruct(struct_ty).?;
break :offset @divExact(pt.structPackedFieldBitOffset(struct_type, index) + struct_ptr_ty_info.packed_offset.bit_offset, 8);
break :offset @divExact(zcu.structPackedFieldBitOffset(struct_type, index) + struct_ptr_ty_info.packed_offset.bit_offset, 8);
},
.@"union" => 0,
else => unreachable,
@@ -3812,7 +3812,7 @@ fn airStructFieldVal(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
.@"packed" => switch (struct_ty.zigTypeTag(zcu)) {
.@"struct" => result: {
const packed_struct = zcu.typeToPackedStruct(struct_ty).?;
const offset = pt.structPackedFieldBitOffset(packed_struct, field_index);
const offset = zcu.structPackedFieldBitOffset(packed_struct, field_index);
const backing_ty = Type.fromInterned(packed_struct.backingIntTypeUnordered(ip));
const host_bits = backing_ty.intInfo(zcu).bits;
@@ -4105,7 +4105,7 @@ fn airSwitchDispatch(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
return cg.finishAir(inst, .none, &.{br.operand});
}
fn airIsErr(cg: *CodeGen, inst: Air.Inst.Index, opcode: std.wasm.Opcode) InnerError!void {
fn airIsErr(cg: *CodeGen, inst: Air.Inst.Index, opcode: std.wasm.Opcode, op_kind: enum { value, ptr }) InnerError!void {
const zcu = cg.pt.zcu;
const un_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
const operand = try cg.resolveInst(un_op);
@@ -4122,7 +4122,7 @@ fn airIsErr(cg: *CodeGen, inst: Air.Inst.Index, opcode: std.wasm.Opcode) InnerEr
}
try cg.emitWValue(operand);
if (pl_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
if (op_kind == .ptr or pl_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
try cg.addMemArg(.i32_load16_u, .{
.offset = operand.offset() + @as(u32, @intCast(errUnionErrorOffset(pl_ty, zcu))),
.alignment = @intCast(Type.anyerror.abiAlignment(zcu).toByteUnits().?),
@@ -5696,7 +5696,7 @@ fn airFieldParentPtr(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
.auto, .@"extern" => parent_ty.structFieldOffset(field_index, zcu),
.@"packed" => offset: {
const parent_ptr_offset = parent_ptr_ty.ptrInfo(zcu).packed_offset.bit_offset;
const field_offset = if (zcu.typeToStruct(parent_ty)) |loaded_struct| pt.structPackedFieldBitOffset(loaded_struct, field_index) else 0;
const field_offset = if (zcu.typeToStruct(parent_ty)) |loaded_struct| zcu.structPackedFieldBitOffset(loaded_struct, field_index) else 0;
const field_ptr_offset = field_ptr_ty.ptrInfo(zcu).packed_offset.bit_offset;
break :offset @divExact(parent_ptr_offset + field_offset - field_ptr_offset, 8);
},
@@ -6462,9 +6462,6 @@ fn lowerTry(
operand_is_ptr: bool,
) InnerError!WValue {
const zcu = cg.pt.zcu;
if (operand_is_ptr) {
return cg.fail("TODO: lowerTry for pointers", .{});
}
const pl_ty = err_union_ty.errorUnionPayload(zcu);
const pl_has_bits = pl_ty.hasRuntimeBitsIgnoreComptime(zcu);
@@ -6475,7 +6472,7 @@ fn lowerTry(
// check if the error tag is set for the error union.
try cg.emitWValue(err_union);
if (pl_has_bits) {
if (pl_has_bits or operand_is_ptr) {
const err_offset: u32 = @intCast(errUnionErrorOffset(pl_ty, zcu));
try cg.addMemArg(.i32_load16_u, .{
.offset = err_union.offset() + err_offset,
@@ -6497,12 +6494,12 @@ fn lowerTry(
}
// if we reach here it means error was not set, and we want the payload
if (!pl_has_bits) {
if (!pl_has_bits and !operand_is_ptr) {
return .none;
}
const pl_offset: u32 = @intCast(errUnionPayloadOffset(pl_ty, zcu));
if (isByRef(pl_ty, zcu, cg.target)) {
if (operand_is_ptr or isByRef(pl_ty, zcu, cg.target)) {
return buildPointerOffset(cg, err_union, pl_offset, .new);
}
const payload = try cg.load(err_union, pl_ty, pl_offset);
+53 -55
View File
@@ -878,7 +878,7 @@ pub fn generate(
src_loc: Zcu.LazySrcLoc,
func_index: InternPool.Index,
air: *const Air,
liveness: *const Air.Liveness,
liveness: *const ?Air.Liveness,
) codegen.CodeGenError!Mir {
_ = bin_file;
const zcu = pt.zcu;
@@ -894,7 +894,7 @@ pub fn generate(
.gpa = gpa,
.pt = pt,
.air = air.*,
.liveness = liveness.*,
.liveness = liveness.*.?,
.target = &mod.resolved_target.result,
.mod = mod,
.owner = .{ .nav_index = func.owner_nav },
@@ -1103,11 +1103,7 @@ const FormatAirData = struct {
inst: Air.Inst.Index,
};
fn formatAir(data: FormatAirData, w: *std.io.Writer) Writer.Error!void {
// not acceptable implementation because it ignores `w`:
//data.self.air.dumpInst(data.inst, data.self.pt, data.self.liveness);
_ = data;
_ = w;
@panic("TODO: unimplemented");
data.self.air.writeInst(w, data.inst, data.self.pt, data.self.liveness);
}
fn fmtAir(self: *CodeGen, inst: Air.Inst.Index) std.fmt.Formatter(FormatAirData, formatAir) {
return .{ .data = .{ .self = self, .inst = inst } };
@@ -100674,11 +100670,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
const ty_pl = air_datas[@intFromEnum(inst)].ty_pl;
const struct_field = cg.air.extraData(Air.StructField, ty_pl.payload).data;
var ops = try cg.tempsFromOperands(inst, .{struct_field.struct_operand});
try ops[0].toOffset(cg.fieldOffset(
try ops[0].toOffset(@intCast(codegen.fieldOffset(
cg.typeOf(struct_field.struct_operand),
ty_pl.ty.toType(),
struct_field.field_index,
), cg);
zcu,
)), cg);
try ops[0].finish(inst, &.{struct_field.struct_operand}, &ops, cg);
},
.struct_field_ptr_index_0,
@@ -100688,7 +100685,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
=> |air_tag| {
const ty_op = air_datas[@intFromEnum(inst)].ty_op;
var ops = try cg.tempsFromOperands(inst, .{ty_op.operand});
try ops[0].toOffset(cg.fieldOffset(
try ops[0].toOffset(@intCast(codegen.fieldOffset(
cg.typeOf(ty_op.operand),
ty_op.ty.toType(),
switch (air_tag) {
@@ -100698,7 +100695,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.struct_field_ptr_index_2 => 2,
.struct_field_ptr_index_3 => 3,
},
), cg);
zcu,
)), cg);
try ops[0].finish(inst, &.{ty_op.operand}, &ops, cg);
},
.struct_field_val => {
@@ -168108,11 +168106,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
const ty_pl = air_datas[@intFromEnum(inst)].ty_pl;
const field_parent_ptr = cg.air.extraData(Air.FieldParentPtr, ty_pl.payload).data;
var ops = try cg.tempsFromOperands(inst, .{field_parent_ptr.field_ptr});
try ops[0].toOffset(-cg.fieldOffset(
try ops[0].toOffset(-@as(i32, @intCast(codegen.fieldOffset(
ty_pl.ty.toType(),
cg.typeOf(field_parent_ptr.field_ptr),
field_parent_ptr.field_index,
), cg);
zcu,
))), cg);
try ops[0].finish(inst, &.{field_parent_ptr.field_ptr}, &ops, cg);
},
.wasm_memory_size, .wasm_memory_grow => unreachable,
@@ -168138,7 +168137,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
},
.dst_temps = .{ .{ .cc = .b }, .unused },
.dst_temps = .{ .{ .cc = .be }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp1p, .lea(.tmp0), ._, ._ },
@@ -168162,7 +168161,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
},
.dst_temps = .{ .{ .cc = .b }, .unused },
.dst_temps = .{ .{ .cc = .be }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp1p, .lea(.tmp0), ._, ._ },
@@ -168186,7 +168185,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
.unused,
.unused,
},
.dst_temps = .{ .{ .cc = .b }, .unused },
.dst_temps = .{ .{ .cc = .be }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .lea, .tmp1p, .lea(.tmp0), ._, ._ },
@@ -174809,18 +174808,6 @@ fn airStore(self: *CodeGen, inst: Air.Inst.Index, safety: bool) !void {
return self.finishAir(inst, .none, .{ bin_op.lhs, bin_op.rhs, .none });
}
fn fieldOffset(self: *CodeGen, ptr_agg_ty: Type, ptr_field_ty: Type, field_index: u32) i32 {
const pt = self.pt;
const zcu = pt.zcu;
const agg_ty = ptr_agg_ty.childType(zcu);
return switch (agg_ty.containerLayout(zcu)) {
.auto, .@"extern" => @intCast(agg_ty.structFieldOffset(field_index, zcu)),
.@"packed" => @divExact(@as(i32, ptr_agg_ty.ptrInfo(zcu).packed_offset.bit_offset) +
(if (zcu.typeToStruct(agg_ty)) |loaded_struct| pt.structPackedFieldBitOffset(loaded_struct, field_index) else 0) -
ptr_field_ty.ptrInfo(zcu).packed_offset.bit_offset, 8),
};
}
fn genUnOp(self: *CodeGen, maybe_inst: ?Air.Inst.Index, tag: Air.Inst.Tag, src_air: Air.Inst.Ref) !MCValue {
const pt = self.pt;
const zcu = pt.zcu;
@@ -179309,10 +179296,13 @@ fn lowerSwitchBr(
} else undefined;
const table_start: u31 = @intCast(cg.mir_table.items.len);
{
const condition_index_reg = if (condition_index.isRegister())
condition_index.getReg().?
else
try cg.copyToTmpRegister(.usize, condition_index);
const condition_index_reg = condition_index_reg: {
if (condition_index.isRegister()) {
const condition_index_reg = condition_index.getReg().?;
if (condition_index_reg.isClass(.general_purpose)) break :condition_index_reg condition_index_reg;
}
break :condition_index_reg try cg.copyToTmpRegister(.usize, condition_index);
};
const condition_index_lock = cg.register_manager.lockReg(condition_index_reg);
defer if (condition_index_lock) |lock| cg.register_manager.unlockReg(lock);
try cg.truncateRegister(condition_ty, condition_index_reg);
@@ -184575,7 +184565,7 @@ fn airAggregateInit(self: *CodeGen, inst: Air.Inst.Index) !void {
}
const elem_abi_size: u32 = @intCast(elem_ty.abiSize(zcu));
const elem_abi_bits = elem_abi_size * 8;
const elem_off = pt.structPackedFieldBitOffset(loaded_struct, elem_i);
const elem_off = zcu.structPackedFieldBitOffset(loaded_struct, elem_i);
const elem_byte_off: i32 = @intCast(elem_off / elem_abi_bits * elem_abi_size);
const elem_bit_off = elem_off % elem_abi_bits;
const elem_mcv = try self.resolveInst(elem);
@@ -185625,21 +185615,19 @@ fn resolveCallingConventionValues(
fn fail(cg: *CodeGen, comptime format: []const u8, args: anytype) error{ OutOfMemory, CodegenFail } {
@branchHint(.cold);
const zcu = cg.pt.zcu;
switch (cg.owner) {
.nav_index => |i| return zcu.codegenFail(i, format, args),
.lazy_sym => |s| return zcu.codegenFailType(s.ty, format, args),
}
return error.CodegenFail;
return switch (cg.owner) {
.nav_index => |i| zcu.codegenFail(i, format, args),
.lazy_sym => |s| zcu.codegenFailType(s.ty, format, args),
};
}
fn failMsg(cg: *CodeGen, msg: *Zcu.ErrorMsg) error{ OutOfMemory, CodegenFail } {
@branchHint(.cold);
const zcu = cg.pt.zcu;
switch (cg.owner) {
.nav_index => |i| return zcu.codegenFailMsg(i, msg),
.lazy_sym => |s| return zcu.codegenFailTypeMsg(s.ty, msg),
}
return error.CodegenFail;
return switch (cg.owner) {
.nav_index => |i| zcu.codegenFailMsg(i, msg),
.lazy_sym => |s| zcu.codegenFailTypeMsg(s.ty, msg),
};
}
fn parseRegName(name: []const u8) ?Register {
@@ -191932,18 +191920,15 @@ const Select = struct {
error.InvalidInstruction => {
const fixes = @tagName(mir_tag[0]);
const fixes_blank = std.mem.indexOfScalar(u8, fixes, '_').?;
return s.cg.fail(
"invalid instruction: '{s}{s}{s} {s} {s} {s} {s}'",
.{
fixes[0..fixes_blank],
@tagName(mir_tag[1]),
fixes[fixes_blank + 1 ..],
@tagName(mir_ops[0]),
@tagName(mir_ops[1]),
@tagName(mir_ops[2]),
@tagName(mir_ops[3]),
},
);
return s.cg.fail("invalid instruction: '{s}{s}{s} {s} {s} {s} {s}'", .{
fixes[0..fixes_blank],
@tagName(mir_tag[1]),
fixes[fixes_blank + 1 ..],
@tagName(mir_ops[0]),
@tagName(mir_ops[1]),
@tagName(mir_ops[2]),
@tagName(mir_ops[3]),
});
},
else => |e| return e,
};
@@ -194435,6 +194420,18 @@ fn select(
while (true) for (pattern.src[0..src_temps.len], src_temps) |src_pattern, *src_temp| {
if (try src_pattern.convert(src_temp, cg)) break;
} else break;
var src_locks: [s_src_temps.len][2]?RegisterLock = @splat(@splat(null));
for (src_locks[0..src_temps.len], src_temps) |*locks, src_temp| {
const regs: [2]Register = switch (src_temp.tracking(cg).short) {
else => continue,
.register => |reg| .{ reg, .none },
.register_pair => |regs| regs,
};
for (regs, locks) |reg, *lock| {
if (reg == .none) continue;
lock.* = cg.register_manager.lockRegIndex(RegisterManager.indexOfRegIntoTracked(reg) orelse continue);
}
}
@memcpy(s_src_temps[0..src_temps.len], src_temps);
std.mem.swap(Temp, &s_src_temps[pattern.commute[0]], &s_src_temps[pattern.commute[1]]);
@@ -194453,6 +194450,7 @@ fn select(
}
assert(s.top == 0);
for (src_locks) |locks| for (locks) |lock| if (lock) |reg| cg.register_manager.unlockReg(reg);
for (tmp_locks) |locks| for (locks) |lock| if (lock) |reg| cg.register_manager.unlockReg(reg);
for (dst_locks) |locks| for (locks) |lock| if (lock) |reg| cg.register_manager.unlockReg(reg);
caller_preserved: {

Some files were not shown because too many files have changed in this diff Show More