std.compress.zstd.Decompress fixes

* std.Io.Reader: appendRemaining no longer supports alignment and has
  different rules about how exceeding limit. Fixed bug where it would
  return success instead of error.StreamTooLong like it was supposed to.

* std.Io.Reader: simplify appendRemaining and appendRemainingUnlimited
  to be implemented based on std.Io.Writer.Allocating

* std.Io.Writer: introduce unreachableRebase

* std.Io.Writer: remove minimum_unused_capacity from Allocating. maybe
  that flexibility could have been handy, but let's see if anyone
  actually needs it. The field is redundant with the superlinear growth
  of ArrayList capacity.

* std.Io.Writer: growingRebase also ensures total capacity on the
  preserve parameter, making it no longer necessary to do
  ensureTotalCapacity at the usage site of decompression streams.

* std.compress.flate.Decompress: fix rebase not taking into account seek

* std.compress.zstd.Decompress: split into "direct" and "indirect" usage
  patterns depending on whether a buffer is provided to init, matching
  how flate works. Remove some overzealous asserts that prevented buffer
  expansion from within rebase implementation.

* std.zig: fix readSourceFileToAlloc returning an overaligned slice
  which was difficult to free correctly.

fixes #24608
This commit is contained in:
Andrew Kelley
2025-08-14 20:34:44 -07:00
parent 6d7c6a0f4e
commit 30b41dc510
10 changed files with 168 additions and 154 deletions
+16 -15
View File
@@ -4,6 +4,7 @@ const assert = std.debug.assert;
const testing = std.testing;
const mem = std.mem;
const native_endian = builtin.cpu.arch.endian();
const Allocator = std.mem.Allocator;
/// Use this to replace an unknown, unrecognized, or unrepresentable character.
///
@@ -921,7 +922,7 @@ fn utf16LeToUtf8ArrayListImpl(
comptime surrogates: Surrogates,
) (switch (surrogates) {
.cannot_encode_surrogate_half => Utf16LeToUtf8AllocError,
.can_encode_surrogate_half => mem.Allocator.Error,
.can_encode_surrogate_half => Allocator.Error,
})!void {
assert(result.unusedCapacitySlice().len >= utf16le.len);
@@ -965,15 +966,15 @@ fn utf16LeToUtf8ArrayListImpl(
}
}
pub const Utf16LeToUtf8AllocError = mem.Allocator.Error || Utf16LeToUtf8Error;
pub const Utf16LeToUtf8AllocError = Allocator.Error || Utf16LeToUtf8Error;
pub fn utf16LeToUtf8ArrayList(result: *std.array_list.Managed(u8), utf16le: []const u16) Utf16LeToUtf8AllocError!void {
try result.ensureUnusedCapacity(utf16le.len);
return utf16LeToUtf8ArrayListImpl(result, utf16le, .cannot_encode_surrogate_half);
}
/// Caller must free returned memory.
pub fn utf16LeToUtf8Alloc(allocator: mem.Allocator, utf16le: []const u16) Utf16LeToUtf8AllocError![]u8 {
/// Caller owns returned memory.
pub fn utf16LeToUtf8Alloc(allocator: Allocator, utf16le: []const u16) Utf16LeToUtf8AllocError![]u8 {
// optimistically guess that it will all be ascii.
var result = try std.array_list.Managed(u8).initCapacity(allocator, utf16le.len);
errdefer result.deinit();
@@ -982,8 +983,8 @@ pub fn utf16LeToUtf8Alloc(allocator: mem.Allocator, utf16le: []const u16) Utf16L
return result.toOwnedSlice();
}
/// Caller must free returned memory.
pub fn utf16LeToUtf8AllocZ(allocator: mem.Allocator, utf16le: []const u16) Utf16LeToUtf8AllocError![:0]u8 {
/// Caller owns returned memory.
pub fn utf16LeToUtf8AllocZ(allocator: Allocator, utf16le: []const u16) Utf16LeToUtf8AllocError![:0]u8 {
// optimistically guess that it will all be ascii (and allocate space for the null terminator)
var result = try std.array_list.Managed(u8).initCapacity(allocator, utf16le.len + 1);
errdefer result.deinit();
@@ -1160,7 +1161,7 @@ pub fn utf8ToUtf16LeArrayList(result: *std.array_list.Managed(u16), utf8: []cons
return utf8ToUtf16LeArrayListImpl(result, utf8, .cannot_encode_surrogate_half);
}
pub fn utf8ToUtf16LeAlloc(allocator: mem.Allocator, utf8: []const u8) error{ InvalidUtf8, OutOfMemory }![]u16 {
pub fn utf8ToUtf16LeAlloc(allocator: Allocator, utf8: []const u8) error{ InvalidUtf8, OutOfMemory }![]u16 {
// optimistically guess that it will not require surrogate pairs
var result = try std.array_list.Managed(u16).initCapacity(allocator, utf8.len);
errdefer result.deinit();
@@ -1169,7 +1170,7 @@ pub fn utf8ToUtf16LeAlloc(allocator: mem.Allocator, utf8: []const u8) error{ Inv
return result.toOwnedSlice();
}
pub fn utf8ToUtf16LeAllocZ(allocator: mem.Allocator, utf8: []const u8) error{ InvalidUtf8, OutOfMemory }![:0]u16 {
pub fn utf8ToUtf16LeAllocZ(allocator: Allocator, utf8: []const u8) error{ InvalidUtf8, OutOfMemory }![:0]u16 {
// optimistically guess that it will not require surrogate pairs
var result = try std.array_list.Managed(u16).initCapacity(allocator, utf8.len + 1);
errdefer result.deinit();
@@ -1750,13 +1751,13 @@ pub const Wtf8Iterator = struct {
}
};
pub fn wtf16LeToWtf8ArrayList(result: *std.array_list.Managed(u8), utf16le: []const u16) mem.Allocator.Error!void {
pub fn wtf16LeToWtf8ArrayList(result: *std.array_list.Managed(u8), utf16le: []const u16) Allocator.Error!void {
try result.ensureUnusedCapacity(utf16le.len);
return utf16LeToUtf8ArrayListImpl(result, utf16le, .can_encode_surrogate_half);
}
/// Caller must free returned memory.
pub fn wtf16LeToWtf8Alloc(allocator: mem.Allocator, wtf16le: []const u16) mem.Allocator.Error![]u8 {
pub fn wtf16LeToWtf8Alloc(allocator: Allocator, wtf16le: []const u16) Allocator.Error![]u8 {
// optimistically guess that it will all be ascii.
var result = try std.array_list.Managed(u8).initCapacity(allocator, wtf16le.len);
errdefer result.deinit();
@@ -1766,7 +1767,7 @@ pub fn wtf16LeToWtf8Alloc(allocator: mem.Allocator, wtf16le: []const u16) mem.Al
}
/// Caller must free returned memory.
pub fn wtf16LeToWtf8AllocZ(allocator: mem.Allocator, wtf16le: []const u16) mem.Allocator.Error![:0]u8 {
pub fn wtf16LeToWtf8AllocZ(allocator: Allocator, wtf16le: []const u16) Allocator.Error![:0]u8 {
// optimistically guess that it will all be ascii (and allocate space for the null terminator)
var result = try std.array_list.Managed(u8).initCapacity(allocator, wtf16le.len + 1);
errdefer result.deinit();
@@ -1784,7 +1785,7 @@ pub fn wtf8ToWtf16LeArrayList(result: *std.array_list.Managed(u16), wtf8: []cons
return utf8ToUtf16LeArrayListImpl(result, wtf8, .can_encode_surrogate_half);
}
pub fn wtf8ToWtf16LeAlloc(allocator: mem.Allocator, wtf8: []const u8) error{ InvalidWtf8, OutOfMemory }![]u16 {
pub fn wtf8ToWtf16LeAlloc(allocator: Allocator, wtf8: []const u8) error{ InvalidWtf8, OutOfMemory }![]u16 {
// optimistically guess that it will not require surrogate pairs
var result = try std.array_list.Managed(u16).initCapacity(allocator, wtf8.len);
errdefer result.deinit();
@@ -1793,7 +1794,7 @@ pub fn wtf8ToWtf16LeAlloc(allocator: mem.Allocator, wtf8: []const u8) error{ Inv
return result.toOwnedSlice();
}
pub fn wtf8ToWtf16LeAllocZ(allocator: mem.Allocator, wtf8: []const u8) error{ InvalidWtf8, OutOfMemory }![:0]u16 {
pub fn wtf8ToWtf16LeAllocZ(allocator: Allocator, wtf8: []const u8) error{ InvalidWtf8, OutOfMemory }![:0]u16 {
// optimistically guess that it will not require surrogate pairs
var result = try std.array_list.Managed(u16).initCapacity(allocator, wtf8.len + 1);
errdefer result.deinit();
@@ -1870,7 +1871,7 @@ pub fn wtf8ToUtf8Lossy(utf8: []u8, wtf8: []const u8) error{InvalidWtf8}!void {
}
}
pub fn wtf8ToUtf8LossyAlloc(allocator: mem.Allocator, wtf8: []const u8) error{ InvalidWtf8, OutOfMemory }![]u8 {
pub fn wtf8ToUtf8LossyAlloc(allocator: Allocator, wtf8: []const u8) error{ InvalidWtf8, OutOfMemory }![]u8 {
const utf8 = try allocator.alloc(u8, wtf8.len);
errdefer allocator.free(utf8);
@@ -1879,7 +1880,7 @@ pub fn wtf8ToUtf8LossyAlloc(allocator: mem.Allocator, wtf8: []const u8) error{ I
return utf8;
}
pub fn wtf8ToUtf8LossyAllocZ(allocator: mem.Allocator, wtf8: []const u8) error{ InvalidWtf8, OutOfMemory }![:0]u8 {
pub fn wtf8ToUtf8LossyAllocZ(allocator: Allocator, wtf8: []const u8) error{ InvalidWtf8, OutOfMemory }![:0]u8 {
const utf8 = try allocator.allocSentinel(u8, wtf8.len, 0);
errdefer allocator.free(utf8);