mirror of
https://codeberg.org/ziglang/zig.git
synced 2026-04-27 19:09:47 +03:00
zstd.Decompress: smarter rebase when discarding (#30891)
The call to `rebase` in `discardIndirect` and `discardDirect` was inappropriate. As `rebase` expects the `capacity` parameter to exclude the sliding window, this call was asking for ANOTHER `d.window_len` bytes. This was impossible to fulfill with a buffer smaller than 2*`d.window_len`, and caused [#25764](https://github.com/ziglang/zig/issues/25764). This PR adds a basic test to do a discard (which does trigger [#25764](https://github.com/ziglang/zig/issues/25764)), and rebases only as much as is required to make the discard succeed ([or no rebase at all](https://github.com/ziglang/zig/issues/25764#issuecomment-3484716253)). That means: ideally rebase to fit `limit`, or if the buffer is too small, as much as possible. I must say, `discardDirect` does not make much sense to me, but I replaced it anyway. `rebaseForDiscard` works fine with `d.reader.buffer.len == 0`. Let me know if anything should be changed. Reviewed-on: https://codeberg.org/ziglang/zig/pulls/30891 Reviewed-by: Andrew Kelley <andrew@ziglang.org> Co-authored-by: mercenary <mercenary@noreply.codeberg.org> Co-committed-by: mercenary <mercenary@noreply.codeberg.org>
This commit is contained in:
@@ -728,6 +728,14 @@ pub const Limit = enum(usize) {
|
||||
return @enumFromInt(@min(@intFromEnum(a), @intFromEnum(b)));
|
||||
}
|
||||
|
||||
pub fn max(a: Limit, b: Limit) Limit {
|
||||
if (a == .unlimited or b == .unlimited) {
|
||||
return .unlimited;
|
||||
}
|
||||
|
||||
return @enumFromInt(@max(@intFromEnum(a), @intFromEnum(b)));
|
||||
}
|
||||
|
||||
pub fn minInt(l: Limit, n: usize) usize {
|
||||
return @min(n, @intFromEnum(l));
|
||||
}
|
||||
|
||||
@@ -88,6 +88,17 @@ fn testDecompress(gpa: std.mem.Allocator, compressed: []const u8) ![]u8 {
|
||||
return out.toOwnedSlice();
|
||||
}
|
||||
|
||||
/// Create a `Decompress` from `compressed` and immediately discard all output. Returns the number
|
||||
/// of discarded bytes.
|
||||
fn testDiscard(gpa: std.mem.Allocator, compressed: []const u8) !usize {
|
||||
const buf: []u8 = try gpa.alloc(u8, default_window_len + block_size_max);
|
||||
defer gpa.free(buf);
|
||||
|
||||
var in: std.Io.Reader = .fixed(compressed);
|
||||
var zstd_stream: Decompress = .init(&in, buf, .{});
|
||||
return try zstd_stream.reader.discardRemaining();
|
||||
}
|
||||
|
||||
fn testExpectDecompress(uncompressed: []const u8, compressed: []const u8) !void {
|
||||
const gpa = std.testing.allocator;
|
||||
const result = try testDecompress(gpa, compressed);
|
||||
@@ -117,6 +128,8 @@ test Decompress {
|
||||
|
||||
try testExpectDecompress(uncompressed, compressed3);
|
||||
try testExpectDecompress(uncompressed, compressed19);
|
||||
try std.testing.expectEqual(uncompressed.len, testDiscard(std.testing.allocator, compressed3));
|
||||
try std.testing.expectEqual(uncompressed.len, testDiscard(std.testing.allocator, compressed19));
|
||||
}
|
||||
|
||||
test "partial magic number" {
|
||||
|
||||
@@ -123,9 +123,13 @@ fn rebaseFallible(r: *Reader, capacity: usize) Reader.RebaseError!void {
|
||||
rebase(r, capacity);
|
||||
}
|
||||
|
||||
// Rebase the buffer, keeping at least the sliding window (`d.window_len` bytes) buffered
|
||||
fn rebase(r: *Reader, capacity: usize) void {
|
||||
const d: *Decompress = @alignCast(@fieldParentPtr("reader", r));
|
||||
// `capacity` must fit in the buffer along with the required sliding window
|
||||
assert(capacity <= r.buffer.len - d.window_len);
|
||||
// According to the vtable contract, this function will only be called if the free space in the
|
||||
// buffer cannot already fit `capacity` bytes
|
||||
assert(r.end + capacity > r.buffer.len);
|
||||
const discard_n = @min(r.seek, r.end - d.window_len);
|
||||
const keep = r.buffer[discard_n..r.end];
|
||||
@@ -134,11 +138,27 @@ fn rebase(r: *Reader, capacity: usize) void {
|
||||
r.seek -= discard_n;
|
||||
}
|
||||
|
||||
/// Rebase `d.reader.buffer` as much as needed for a discard limited by `limit`
|
||||
fn rebaseForDiscard(d: *Decompress, limit: std.Io.Limit) void {
|
||||
// Number of bytes desired to rebase, always rebase for at least block_size
|
||||
const desire_n = limit.max(Limit.limited(zstd.block_size_max));
|
||||
// Maximum number of bytes possible to rebase
|
||||
const max_n = d.reader.buffer.len -| d.window_len;
|
||||
// Number of bytes to rebase
|
||||
const n = desire_n.minInt(max_n);
|
||||
|
||||
// Current buffer free space
|
||||
const current_cap = d.reader.buffer.len - d.reader.end;
|
||||
if (current_cap < n) {
|
||||
rebase(&d.reader, n);
|
||||
}
|
||||
}
|
||||
|
||||
/// This could be improved so that when an amount is discarded that includes an
|
||||
/// entire frame, skip decoding that frame.
|
||||
fn discardDirect(r: *Reader, limit: std.Io.Limit) Reader.Error!usize {
|
||||
const d: *Decompress = @alignCast(@fieldParentPtr("reader", r));
|
||||
rebase(r, d.window_len);
|
||||
rebaseForDiscard(d, limit);
|
||||
var writer: Writer = .{
|
||||
.vtable = &.{
|
||||
.drain = std.Io.Writer.Discarding.drain,
|
||||
@@ -162,7 +182,7 @@ fn discardDirect(r: *Reader, limit: std.Io.Limit) Reader.Error!usize {
|
||||
|
||||
fn discardIndirect(r: *Reader, limit: std.Io.Limit) Reader.Error!usize {
|
||||
const d: *Decompress = @alignCast(@fieldParentPtr("reader", r));
|
||||
rebase(r, d.window_len);
|
||||
rebaseForDiscard(d, limit);
|
||||
var writer: Writer = .{
|
||||
.buffer = r.buffer,
|
||||
.end = r.end,
|
||||
|
||||
Reference in New Issue
Block a user