Merge pull request #23416 from gooncreeper/improved-fuzzer

greatly improve capabilities of the fuzzer
This commit is contained in:
Andrew Kelley
2025-09-19 09:27:25 -07:00
committed by GitHub
9 changed files with 1367 additions and 552 deletions
+18 -29
View File
@@ -4,6 +4,7 @@ const builtin = @import("builtin");
const std = @import("std");
const testing = std.testing;
const assert = std.debug.assert;
const fuzz_abi = std.Build.abi.fuzz;
pub const std_options: std.Options = .{
.logFn = log,
@@ -57,7 +58,7 @@ pub fn main() void {
fba.reset();
if (builtin.fuzz) {
const cache_dir = opt_cache_dir orelse @panic("missing --cache-dir=[path] argument");
fuzzer_init(FuzzerSlice.fromSlice(cache_dir));
fuzz_abi.fuzzer_init(.fromSlice(cache_dir));
}
if (listen) {
@@ -78,7 +79,7 @@ fn mainServer() !void {
});
if (builtin.fuzz) {
const coverage_id = fuzzer_coverage_id();
const coverage_id = fuzz_abi.fuzzer_coverage_id();
try server.serveU64Message(.coverage_id, coverage_id);
}
@@ -152,14 +153,19 @@ fn mainServer() !void {
});
},
.start_fuzzing => {
// This ensures that this code won't be analyzed and hence reference fuzzer symbols
// since they are not present.
if (!builtin.fuzz) unreachable;
const index = try server.receiveBody_u32();
const test_fn = builtin.test_functions[index];
const entry_addr = @intFromPtr(test_fn.func);
try server.serveU64Message(.fuzz_start_addr, entry_addr);
defer if (testing.allocator_instance.deinit() == .leak) std.process.exit(1);
is_fuzz_test = false;
fuzzer_set_name(test_fn.name.ptr, test_fn.name.len);
fuzz_test_index = index;
test_fn.func() catch |err| switch (err) {
error.SkipZigTest => return,
else => {
@@ -184,6 +190,8 @@ fn mainServer() !void {
fn mainTerminal() void {
@disableInstrumentation();
if (builtin.fuzz) @panic("fuzz test requires server");
const test_fn_list = builtin.test_functions;
var ok_count: usize = 0;
var skip_count: usize = 0;
@@ -333,28 +341,8 @@ pub fn mainSimple() anyerror!void {
if (failed != 0) std.process.exit(1);
}
const FuzzerSlice = extern struct {
ptr: [*]const u8,
len: usize,
/// Inline to avoid fuzzer instrumentation.
inline fn toSlice(s: FuzzerSlice) []const u8 {
return s.ptr[0..s.len];
}
/// Inline to avoid fuzzer instrumentation.
inline fn fromSlice(s: []const u8) FuzzerSlice {
return .{ .ptr = s.ptr, .len = s.len };
}
};
var is_fuzz_test: bool = undefined;
extern fn fuzzer_set_name(name_ptr: [*]const u8, name_len: usize) void;
extern fn fuzzer_init(cache_dir: FuzzerSlice) void;
extern fn fuzzer_init_corpus_elem(input_ptr: [*]const u8, input_len: usize) void;
extern fn fuzzer_start(testOne: *const fn ([*]const u8, usize) callconv(.c) void) void;
extern fn fuzzer_coverage_id() u64;
var fuzz_test_index: u32 = undefined;
pub fn fuzz(
context: anytype,
@@ -385,12 +373,12 @@ pub fn fuzz(
const global = struct {
var ctx: @TypeOf(context) = undefined;
fn fuzzer_one(input_ptr: [*]const u8, input_len: usize) callconv(.c) void {
fn test_one(input: fuzz_abi.Slice) callconv(.c) void {
@disableInstrumentation();
testing.allocator_instance = .{};
defer if (testing.allocator_instance.deinit() == .leak) std.process.exit(1);
log_err_count = 0;
testOne(ctx, input_ptr[0..input_len]) catch |err| switch (err) {
testOne(ctx, input.toSlice()) catch |err| switch (err) {
error.SkipZigTest => return,
else => {
std.debug.lockStdErr();
@@ -411,10 +399,11 @@ pub fn fuzz(
testing.allocator_instance = .{};
defer testing.allocator_instance = prev_allocator_state;
for (options.corpus) |elem| fuzzer_init_corpus_elem(elem.ptr, elem.len);
global.ctx = context;
fuzzer_start(&global.fuzzer_one);
fuzz_abi.fuzzer_init_test(&global.test_one, .fromSlice(builtin.test_functions[fuzz_test_index].name));
for (options.corpus) |elem|
fuzz_abi.fuzzer_new_input(.fromSlice(elem));
fuzz_abi.fuzzer_main();
return;
}
+1242 -484
View File
@@ -1,531 +1,1274 @@
const builtin = @import("builtin");
const std = @import("std");
const Allocator = std.mem.Allocator;
const mem = std.mem;
const math = std.math;
const Allocator = mem.Allocator;
const assert = std.debug.assert;
const fatal = std.process.fatal;
const SeenPcsHeader = std.Build.abi.fuzz.SeenPcsHeader;
const panic = std.debug.panic;
const abi = std.Build.abi.fuzz;
const native_endian = builtin.cpu.arch.endian();
pub const std_options = std.Options{
.logFn = logOverride,
};
var log_file_buffer: [256]u8 = undefined;
var log_file_writer: ?std.fs.File.Writer = null;
fn logOverride(
comptime level: std.log.Level,
comptime scope: @Type(.enum_literal),
comptime format: []const u8,
args: anytype,
) void {
const fw = if (log_file_writer) |*f| f else f: {
const f = fuzzer.cache_dir.createFile("tmp/libfuzzer.log", .{}) catch
@panic("failed to open fuzzer log file");
log_file_writer = f.writer(&log_file_buffer);
break :f &log_file_writer.?;
};
const f = log_f orelse
panic("attempt to use log before initialization, message:\n" ++ format, args);
f.lock(.exclusive) catch |e| panic("failed to lock logging file: {t}", .{e});
defer f.unlock();
var buf: [256]u8 = undefined;
var fw = f.writer(&buf);
const end = f.getEndPos() catch |e| panic("failed to get fuzzer log file end: {t}", .{e});
fw.seekTo(end) catch |e| panic("failed to seek to fuzzer log file end: {t}", .{e});
const prefix1 = comptime level.asText();
const prefix2 = if (scope == .default) ": " else "(" ++ @tagName(scope) ++ "): ";
fw.interface.print(prefix1 ++ prefix2 ++ format ++ "\n", args) catch
@panic("failed to write to fuzzer log");
fw.interface.flush() catch @panic("failed to flush fuzzer log");
fw.interface.print(
"[{s}] " ++ prefix1 ++ prefix2 ++ format ++ "\n",
.{current_test_name orelse "setup"} ++ args,
) catch panic("failed to write to fuzzer log: {t}", .{fw.err.?});
fw.interface.flush() catch panic("failed to write to fuzzer log: {t}", .{fw.err.?});
}
/// Helps determine run uniqueness in the face of recursion.
export threadlocal var __sancov_lowest_stack: usize = 0;
export fn __sanitizer_cov_trace_const_cmp1(arg1: u8, arg2: u8) void {
handleCmp(@returnAddress(), arg1, arg2);
}
export fn __sanitizer_cov_trace_cmp1(arg1: u8, arg2: u8) void {
handleCmp(@returnAddress(), arg1, arg2);
}
export fn __sanitizer_cov_trace_const_cmp2(arg1: u16, arg2: u16) void {
handleCmp(@returnAddress(), arg1, arg2);
}
export fn __sanitizer_cov_trace_cmp2(arg1: u16, arg2: u16) void {
handleCmp(@returnAddress(), arg1, arg2);
}
export fn __sanitizer_cov_trace_const_cmp4(arg1: u32, arg2: u32) void {
handleCmp(@returnAddress(), arg1, arg2);
}
export fn __sanitizer_cov_trace_cmp4(arg1: u32, arg2: u32) void {
handleCmp(@returnAddress(), arg1, arg2);
}
export fn __sanitizer_cov_trace_const_cmp8(arg1: u64, arg2: u64) void {
handleCmp(@returnAddress(), arg1, arg2);
}
export fn __sanitizer_cov_trace_cmp8(arg1: u64, arg2: u64) void {
handleCmp(@returnAddress(), arg1, arg2);
}
export fn __sanitizer_cov_trace_switch(val: u64, cases_ptr: [*]u64) void {
const pc = @returnAddress();
const len = cases_ptr[0];
const val_size_in_bits = cases_ptr[1];
const cases = cases_ptr[2..][0..len];
fuzzer.traceValue(pc ^ val);
_ = val_size_in_bits;
_ = cases;
//std.log.debug("0x{x}: switch on value {d} ({d} bits) with {d} cases", .{
// pc, val, val_size_in_bits, cases.len,
//});
}
export fn __sanitizer_cov_trace_pc_indir(callee: usize) void {
// Not valuable because we already have pc tracing via 8bit counters.
_ = callee;
//const pc = @returnAddress();
//fuzzer.traceValue(pc ^ callee);
//std.log.debug("0x{x}: indirect call to 0x{x}", .{ pc, callee });
}
export fn __sanitizer_cov_8bit_counters_init(start: usize, end: usize) void {
// clang will emit a call to this function when compiling with code coverage instrumentation.
// however fuzzer_init() does not need this information, since it directly reads from the symbol table.
_ = start;
_ = end;
}
export fn __sanitizer_cov_pcs_init(start: usize, end: usize) void {
// clang will emit a call to this function when compiling with code coverage instrumentation.
// however fuzzer_init() does not need this information, since it directly reads from the symbol table.
_ = start;
_ = end;
}
fn handleCmp(pc: usize, arg1: u64, arg2: u64) void {
fuzzer.traceValue(pc ^ arg1 ^ arg2);
//std.log.debug("0x{x}: comparison of {d} and {d}", .{ pc, arg1, arg2 });
}
const Fuzzer = struct {
rng: std.Random.DefaultPrng,
pcs: []const usize,
pc_counters: []u8,
n_runs: usize,
traced_comparisons: std.AutoArrayHashMapUnmanaged(usize, void),
/// Tracks which PCs have been seen across all runs that do not crash the fuzzer process.
/// Stored in a memory-mapped file so that it can be shared with other
/// processes and viewed while the fuzzer is running.
seen_pcs: MemoryMappedList,
cache_dir: std.fs.Dir,
/// Identifies the file name that will be used to store coverage
/// information, available to other processes.
coverage_id: u64,
unit_test_name: []const u8,
/// The index corresponds to the file name within the f/ subdirectory.
/// The string is the input.
/// This data is read-only; it caches what is on the filesystem.
corpus: std.ArrayListUnmanaged(Input),
corpus_directory: std.Build.Cache.Directory,
/// The next input that will be given to the testOne function. When the
/// current process crashes, this memory-mapped file is used to recover the
/// input.
///
/// The file size corresponds to the capacity. The length is not stored
/// and that is the next thing to work on!
input: MemoryMappedList,
const Input = struct {
bytes: []u8,
last_traced_comparison: usize,
};
const Slice = extern struct {
ptr: [*]const u8,
len: usize,
fn toZig(s: Slice) []const u8 {
return s.ptr[0..s.len];
}
fn fromZig(s: []const u8) Slice {
return .{
.ptr = s.ptr,
.len = s.len,
};
}
};
fn init(f: *Fuzzer, cache_dir: std.fs.Dir, pc_counters: []u8, pcs: []const usize) !void {
f.cache_dir = cache_dir;
f.pc_counters = pc_counters;
f.pcs = pcs;
// Choose a file name for the coverage based on a hash of the PCs that will be stored within.
const pc_digest = std.hash.Wyhash.hash(0, std.mem.sliceAsBytes(pcs));
f.coverage_id = pc_digest;
const hex_digest = std.fmt.hex(pc_digest);
const coverage_file_path = "v/" ++ hex_digest;
// Layout of this file:
// - Header
// - list of PC addresses (usize elements)
// - list of hit flag, 1 bit per address (stored in u8 elements)
const coverage_file = createFileBail(cache_dir, coverage_file_path, .{
.read = true,
.truncate = false,
});
const n_bitset_elems = (pcs.len + @bitSizeOf(usize) - 1) / @bitSizeOf(usize);
comptime assert(SeenPcsHeader.trailing[0] == .pc_bits_usize);
comptime assert(SeenPcsHeader.trailing[1] == .pc_addr);
const bytes_len = @sizeOf(SeenPcsHeader) +
n_bitset_elems * @sizeOf(usize) +
pcs.len * @sizeOf(usize);
const existing_len = coverage_file.getEndPos() catch |err| {
fatal("unable to check len of coverage file: {s}", .{@errorName(err)});
};
if (existing_len == 0) {
coverage_file.setEndPos(bytes_len) catch |err| {
fatal("unable to set len of coverage file: {s}", .{@errorName(err)});
};
} else if (existing_len != bytes_len) {
fatal("incompatible existing coverage file (differing lengths)", .{});
}
f.seen_pcs = MemoryMappedList.init(coverage_file, existing_len, bytes_len) catch |err| {
fatal("unable to init coverage memory map: {s}", .{@errorName(err)});
};
if (existing_len != 0) {
const existing_pcs_bytes = f.seen_pcs.items[@sizeOf(SeenPcsHeader) + @sizeOf(usize) * n_bitset_elems ..][0 .. pcs.len * @sizeOf(usize)];
const existing_pcs = std.mem.bytesAsSlice(usize, existing_pcs_bytes);
for (existing_pcs, pcs, 0..) |old, new, i| {
if (old != new) {
fatal("incompatible existing coverage file (differing PC at index {d}: {x} != {x})", .{
i, old, new,
});
}
}
} else {
const header: SeenPcsHeader = .{
.n_runs = 0,
.unique_runs = 0,
.pcs_len = pcs.len,
};
f.seen_pcs.appendSliceAssumeCapacity(std.mem.asBytes(&header));
f.seen_pcs.appendNTimesAssumeCapacity(0, n_bitset_elems * @sizeOf(usize));
f.seen_pcs.appendSliceAssumeCapacity(std.mem.sliceAsBytes(pcs));
}
}
fn initNextInput(f: *Fuzzer) void {
while (true) {
const i = f.corpus.items.len;
var buf: [30]u8 = undefined;
const input_sub_path = std.fmt.bufPrint(&buf, "{d}", .{i}) catch unreachable;
const input = f.corpus_directory.handle.readFileAlloc(input_sub_path, gpa, .limited(1 << 31)) catch |err| switch (err) {
error.FileNotFound => {
// Make this one the next input.
const input_file = f.corpus_directory.handle.createFile(input_sub_path, .{
.exclusive = true,
.truncate = false,
.read = true,
}) catch |e| switch (e) {
error.PathAlreadyExists => continue,
else => fatal("unable to create '{f}{d}: {s}", .{ f.corpus_directory, i, @errorName(err) }),
};
errdefer input_file.close();
// Initialize the mmap for the current input.
f.input = MemoryMappedList.create(input_file, 0, std.heap.page_size_max) catch |e| {
fatal("unable to init memory map for input at '{f}{d}': {s}", .{
f.corpus_directory, i, @errorName(e),
});
};
break;
},
else => fatal("unable to read '{f}{d}': {s}", .{ f.corpus_directory, i, @errorName(err) }),
};
errdefer gpa.free(input);
f.corpus.append(gpa, .{
.bytes = input,
.last_traced_comparison = 0,
}) catch |err| oom(err);
}
}
fn addCorpusElem(f: *Fuzzer, input: []const u8) !void {
try f.corpus.append(gpa, .{
.bytes = try gpa.dupe(u8, input),
.last_traced_comparison = 0,
});
}
fn start(f: *Fuzzer) !void {
const rng = fuzzer.rng.random();
// Grab the corpus which is namespaced based on `unit_test_name`.
{
if (f.unit_test_name.len == 0) fatal("test runner never set unit test name", .{});
const sub_path = try std.fmt.allocPrint(gpa, "f/{s}", .{f.unit_test_name});
f.corpus_directory = .{
.handle = f.cache_dir.makeOpenPath(sub_path, .{}) catch |err|
fatal("unable to open corpus directory 'f/{s}': {t}", .{ sub_path, err }),
.path = sub_path,
};
initNextInput(f);
}
assert(f.n_runs == 0);
// If the corpus is empty, synthesize one input.
if (f.corpus.items.len == 0) {
const len = rng.uintLessThanBiased(usize, 200);
const slice = try gpa.alloc(u8, len);
rng.bytes(slice);
f.input.appendSliceAssumeCapacity(slice);
try f.corpus.append(gpa, .{
.bytes = slice,
.last_traced_comparison = 0,
});
runOne(f, 0);
}
while (true) {
const chosen_index = rng.uintLessThanBiased(usize, f.corpus.items.len);
const modification = rng.enumValue(Mutation);
f.mutateAndRunOne(chosen_index, modification);
}
}
/// `x` represents a possible branch. It is the PC address of the possible
/// branch site, hashed together with the value(s) used that determine to
/// where it branches.
fn traceValue(f: *Fuzzer, x: usize) void {
errdefer |err| oom(err);
try f.traced_comparisons.put(gpa, x, {});
}
const Mutation = enum {
remove_byte,
modify_byte,
add_byte,
};
fn mutateAndRunOne(f: *Fuzzer, corpus_index: usize, mutation: Mutation) void {
const rng = fuzzer.rng.random();
f.input.clearRetainingCapacity();
const old_input = f.corpus.items[corpus_index].bytes;
f.input.ensureTotalCapacity(old_input.len + 1) catch @panic("mmap file resize failed");
switch (mutation) {
.remove_byte => {
const omitted_index = rng.uintLessThanBiased(usize, old_input.len);
f.input.appendSliceAssumeCapacity(old_input[0..omitted_index]);
f.input.appendSliceAssumeCapacity(old_input[omitted_index + 1 ..]);
},
.modify_byte => {
const modified_index = rng.uintLessThanBiased(usize, old_input.len);
f.input.appendSliceAssumeCapacity(old_input);
f.input.items[modified_index] = rng.int(u8);
},
.add_byte => {
const modified_index = rng.uintLessThanBiased(usize, old_input.len);
f.input.appendSliceAssumeCapacity(old_input[0..modified_index]);
f.input.appendAssumeCapacity(rng.int(u8));
f.input.appendSliceAssumeCapacity(old_input[modified_index..]);
},
}
runOne(f, corpus_index);
}
fn runOne(f: *Fuzzer, corpus_index: usize) void {
const header: *volatile SeenPcsHeader = @ptrCast(f.seen_pcs.items[0..@sizeOf(SeenPcsHeader)]);
f.traced_comparisons.clearRetainingCapacity();
@memset(f.pc_counters, 0);
__sancov_lowest_stack = std.math.maxInt(usize);
fuzzer_one(@volatileCast(f.input.items.ptr), f.input.items.len);
f.n_runs += 1;
_ = @atomicRmw(usize, &header.n_runs, .Add, 1, .monotonic);
// Track code coverage from all runs.
comptime assert(SeenPcsHeader.trailing[0] == .pc_bits_usize);
const header_end_ptr: [*]volatile usize = @ptrCast(f.seen_pcs.items[@sizeOf(SeenPcsHeader)..]);
const remainder = f.pcs.len % @bitSizeOf(usize);
const aligned_len = f.pcs.len - remainder;
const seen_pcs = header_end_ptr[0..aligned_len];
const pc_counters = std.mem.bytesAsSlice([@bitSizeOf(usize)]u8, f.pc_counters[0..aligned_len]);
const V = @Vector(@bitSizeOf(usize), u8);
const zero_v: V = @splat(0);
var fresh = false;
var superset = true;
for (header_end_ptr[0..pc_counters.len], pc_counters) |*elem, *array| {
const v: V = array.*;
const mask: usize = @bitCast(v != zero_v);
const prev = @atomicRmw(usize, elem, .Or, mask, .monotonic);
fresh = fresh or (prev | mask) != prev;
superset = superset and (prev | mask) != mask;
}
if (remainder > 0) {
const i = pc_counters.len;
const elem = &seen_pcs[i];
var mask: usize = 0;
for (f.pc_counters[i * @bitSizeOf(usize) ..][0..remainder], 0..) |byte, bit_index| {
mask |= @as(usize, @intFromBool(byte != 0)) << @intCast(bit_index);
}
const prev = @atomicRmw(usize, elem, .Or, mask, .monotonic);
fresh = fresh or (prev | mask) != prev;
superset = superset and (prev | mask) != mask;
}
// First check if this is a better version of an already existing
// input, replacing that input.
if (superset or f.traced_comparisons.entries.len >= f.corpus.items[corpus_index].last_traced_comparison) {
const new_input = gpa.realloc(f.corpus.items[corpus_index].bytes, f.input.items.len) catch |err| oom(err);
f.corpus.items[corpus_index] = .{
.bytes = new_input,
.last_traced_comparison = f.traced_comparisons.count(),
};
@memcpy(new_input, @volatileCast(f.input.items));
_ = @atomicRmw(usize, &header.unique_runs, .Add, 1, .monotonic);
return;
}
if (!fresh) return;
// Input is already committed to the file system, we just need to open a new file
// for the next input.
// Pre-add it to the corpus list so that it does not get redundantly picked up.
f.corpus.append(gpa, .{
.bytes = gpa.dupe(u8, @volatileCast(f.input.items)) catch |err| oom(err),
.last_traced_comparison = f.traced_comparisons.entries.len,
}) catch |err| oom(err);
f.input.deinit();
initNextInput(f);
// TODO: also mark input as "hot" so it gets prioritized for checking mutations above others.
_ = @atomicRmw(usize, &header.unique_runs, .Add, 1, .monotonic);
}
};
fn createFileBail(dir: std.fs.Dir, sub_path: []const u8, flags: std.fs.File.CreateFlags) std.fs.File {
return dir.createFile(sub_path, flags) catch |err| switch (err) {
error.FileNotFound => {
const dir_name = std.fs.path.dirname(sub_path).?;
dir.makePath(dir_name) catch |e| {
fatal("unable to make path '{s}': {s}", .{ dir_name, @errorName(e) });
};
return dir.createFile(sub_path, flags) catch |e| {
fatal("unable to create file '{s}': {s}", .{ sub_path, @errorName(e) });
};
},
else => fatal("unable to create file '{s}': {s}", .{ sub_path, @errorName(err) }),
};
}
fn oom(err: anytype) noreturn {
switch (err) {
error.OutOfMemory => @panic("out of memory"),
}
}
var debug_allocator: std.heap.GeneralPurposeAllocator(.{}) = .init;
var debug_allocator: std.heap.DebugAllocator(.{}) = .init;
const gpa = switch (builtin.mode) {
.Debug => debug_allocator.allocator(),
.ReleaseFast, .ReleaseSmall, .ReleaseSafe => std.heap.smp_allocator,
};
var fuzzer: Fuzzer = .{
.rng = std.Random.DefaultPrng.init(0),
.input = undefined,
.pcs = undefined,
.pc_counters = undefined,
.n_runs = 0,
.cache_dir = undefined,
.seen_pcs = undefined,
.coverage_id = undefined,
.unit_test_name = &.{},
.corpus = .empty,
.corpus_directory = undefined,
.traced_comparisons = .empty,
/// Part of `exec`, however seperate to allow it to be set before `exec` is.
var log_f: ?std.fs.File = null;
var exec: Executable = .preinit;
var inst: Instrumentation = .preinit;
var fuzzer: Fuzzer = undefined;
var current_test_name: ?[]const u8 = null;
fn bitsetUsizes(elems: usize) usize {
return math.divCeil(usize, elems, @bitSizeOf(usize)) catch unreachable;
}
const Executable = struct {
/// Tracks the hit count for each pc as updated by the process's instrumentation.
pc_counters: []u8,
cache_f: std.fs.Dir,
/// Shared copy of all pcs that have been hit stored in a memory-mapped file that can viewed
/// while the fuzzer is running.
shared_seen_pcs: MemoryMappedList,
/// Hash of pcs used to uniquely identify the shared coverage file
pc_digest: u64,
/// A minimal state for this struct which instrumentation can function on.
/// Used before this structure is initialized to avoid illegal behavior
/// from instrumentation functions being called and using undefined values.
pub const preinit: Executable = .{
.pc_counters = undefined, // instrumentation works off the __sancov_cntrs section
.cache_f = undefined,
.shared_seen_pcs = undefined,
.pc_digest = undefined,
};
fn getCoverageFile(cache_dir: std.fs.Dir, pcs: []const usize, pc_digest: u64) MemoryMappedList {
const pc_bitset_usizes = bitsetUsizes(pcs.len);
const coverage_file_name = std.fmt.hex(pc_digest);
comptime assert(abi.SeenPcsHeader.trailing[0] == .pc_bits_usize);
comptime assert(abi.SeenPcsHeader.trailing[1] == .pc_addr);
var v = cache_dir.makeOpenPath("v", .{}) catch |e|
panic("failed to create directory 'v': {t}", .{e});
defer v.close();
const coverage_file, const populate = if (v.createFile(&coverage_file_name, .{
.read = true,
// If we create the file, we want to block other processes while we populate it
.lock = .exclusive,
.exclusive = true,
})) |f|
.{ f, true }
else |e| switch (e) {
error.PathAlreadyExists => .{ v.openFile(&coverage_file_name, .{
.mode = .read_write,
.lock = .shared,
}) catch |e2| panic(
"failed to open existing coverage file '{s}': {t}",
.{ &coverage_file_name, e2 },
), false },
else => panic("failed to create coverage file '{s}': {t}", .{ &coverage_file_name, e }),
};
const coverage_file_len = @sizeOf(abi.SeenPcsHeader) +
pc_bitset_usizes * @sizeOf(usize) +
pcs.len * @sizeOf(usize);
if (populate) {
defer coverage_file.lock(.shared) catch |e| panic(
"failed to demote lock for coverage file '{s}': {t}",
.{ &coverage_file_name, e },
);
var map = MemoryMappedList.create(coverage_file, 0, coverage_file_len) catch |e| panic(
"failed to init memory map for coverage file '{s}': {t}",
.{ &coverage_file_name, e },
);
map.appendSliceAssumeCapacity(mem.asBytes(&abi.SeenPcsHeader{
.n_runs = 0,
.unique_runs = 0,
.pcs_len = pcs.len,
}));
map.appendNTimesAssumeCapacity(0, pc_bitset_usizes * @sizeOf(usize));
map.appendSliceAssumeCapacity(mem.sliceAsBytes(pcs));
return map;
} else {
const size = coverage_file.getEndPos() catch |e| panic(
"failed to stat coverage file '{s}': {t}",
.{ &coverage_file_name, e },
);
if (size != coverage_file_len) panic(
"incompatible existing coverage file '{s}' (differing lengths: {} != {})",
.{ &coverage_file_name, size, coverage_file_len },
);
const map = MemoryMappedList.init(
coverage_file,
coverage_file_len,
coverage_file_len,
) catch |e| panic(
"failed to init memory map for coverage file '{s}': {t}",
.{ &coverage_file_name, e },
);
const seen_pcs_header: *const abi.SeenPcsHeader = @ptrCast(@volatileCast(map.items));
if (seen_pcs_header.pcs_len != pcs.len) panic(
"incompatible existing coverage file '{s}' (differing pcs length: {} != {})",
.{ &coverage_file_name, seen_pcs_header.pcs_len, pcs.len },
);
if (mem.indexOfDiff(usize, seen_pcs_header.pcAddrs(), pcs)) |i| panic(
"incompatible existing coverage file '{s}' (differing pc at index {d}: {x} != {x})",
.{ &coverage_file_name, i, seen_pcs_header.pcAddrs()[i], pcs[i] },
);
return map;
}
}
pub fn init(cache_dir_path: []const u8) Executable {
var self: Executable = undefined;
const cache_dir = std.fs.cwd().makeOpenPath(cache_dir_path, .{}) catch |e| panic(
"failed to open directory '{s}': {t}",
.{ cache_dir_path, e },
);
log_f = cache_dir.createFile("tmp/libfuzzer.log", .{ .truncate = false }) catch |e|
panic("failed to create file 'tmp/libfuzzer.log': {t}", .{e});
self.cache_f = cache_dir.makeOpenPath("f", .{}) catch |e|
panic("failed to open directory 'f': {t}", .{e});
// Linkers are expected to automatically add symbols prefixed with these for the start and
// end of sections whose names are valid C identifiers.
const ofmt = builtin.object_format;
const section_start_prefix, const section_end_prefix = switch (ofmt) {
.elf => .{ "__start_", "__stop_" },
.macho => .{ "\x01section$start$__DATA$", "\x01section$end$__DATA$" },
else => @compileError("unsupported fuzzing object format '" ++ @tagName(ofmt) ++ "'"),
};
self.pc_counters = blk: {
const pc_counters_start_name = section_start_prefix ++ "__sancov_cntrs";
const pc_counters_start = @extern([*]u8, .{
.name = pc_counters_start_name,
.linkage = .weak,
}) orelse panic("missing {s} symbol", .{pc_counters_start_name});
const pc_counters_end_name = section_end_prefix ++ "__sancov_cntrs";
const pc_counters_end = @extern([*]u8, .{
.name = pc_counters_end_name,
.linkage = .weak,
}) orelse panic("missing {s} symbol", .{pc_counters_end_name});
break :blk pc_counters_start[0 .. pc_counters_end - pc_counters_start];
};
const pcs = blk: {
const pcs_start_name = section_start_prefix ++ "__sancov_pcs1";
const pcs_start = @extern([*]usize, .{
.name = pcs_start_name,
.linkage = .weak,
}) orelse panic("missing {s} symbol", .{pcs_start_name});
const pcs_end_name = section_end_prefix ++ "__sancov_pcs1";
const pcs_end = @extern([*]usize, .{
.name = pcs_end_name,
.linkage = .weak,
}) orelse panic("missing {s} symbol", .{pcs_end_name});
break :blk pcs_start[0 .. pcs_end - pcs_start];
};
if (self.pc_counters.len != pcs.len) panic(
"pc counters length and pcs length do not match ({} != {})",
.{ self.pc_counters.len, pcs.len },
);
self.pc_digest = std.hash.Wyhash.hash(0, mem.sliceAsBytes(pcs));
self.shared_seen_pcs = getCoverageFile(cache_dir, pcs, self.pc_digest);
return self;
}
pub fn pcBitsetIterator(self: Executable) PcBitsetIterator {
return .{ .pc_counters = self.pc_counters };
}
/// Iterates over pc_counters returning a bitset for if each of them have been hit
pub const PcBitsetIterator = struct {
index: usize = 0,
pc_counters: []u8,
pub fn next(self: *PcBitsetIterator) usize {
const rest = self.pc_counters[self.index..];
if (rest.len >= @bitSizeOf(usize)) {
defer self.index += @bitSizeOf(usize);
const V = @Vector(@bitSizeOf(usize), u8);
return @as(usize, @bitCast(@as(V, @splat(0)) != rest[0..@bitSizeOf(usize)].*));
} else if (rest.len != 0) {
defer self.index += rest.len;
var res: usize = 0;
for (0.., rest) |bit_index, byte| {
res |= @shlExact(@as(usize, @intFromBool(byte != 0)), @intCast(bit_index));
}
return res;
} else unreachable;
}
};
};
/// Data gathered from instrumentation functions.
/// Seperate from Executable since its state is resetable and changes.
/// Seperate from Fuzzer since it may be needed before fuzzing starts.
const Instrumentation = struct {
/// Bitset of seen pcs across all runs excluding fresh pcs.
/// This is seperate then shared_seen_pcs because multiple fuzzing processes are likely using
/// it which causes contention and unrelated pcs to our campaign being set.
seen_pcs: []usize,
/// Stores a fresh input's new pcs
fresh_pcs: []usize,
/// Pcs which __sanitizer_cov_trace_switch and __sanitizer_cov_trace_const_cmpx
/// have been called from and have had their already been added to const_x_vals
const_pcs: std.AutoArrayHashMapUnmanaged(usize, void) = .empty,
/// Values that have been constant operands in comparisons and switch cases.
/// There may be duplicates in this array if they came from different addresses, which is
/// fine as they are likely more important and hence more likely to be selected.
const_vals2: std.ArrayListUnmanaged(u16) = .empty,
const_vals4: std.ArrayListUnmanaged(u32) = .empty,
const_vals8: std.ArrayListUnmanaged(u64) = .empty,
const_vals16: std.ArrayListUnmanaged(u128) = .empty,
/// A minimal state for this struct which instrumentation can function on.
/// Used before this structure is initialized to avoid illegal behavior
/// from instrumentation functions being called and using undefined values.
pub const preinit: Instrumentation = .{
.seen_pcs = undefined, // currently only updated by `Fuzzer`
.fresh_pcs = undefined,
};
pub fn depreinit(self: *Instrumentation) void {
self.const_vals2.deinit(gpa);
self.const_vals4.deinit(gpa);
self.const_vals8.deinit(gpa);
self.const_vals16.deinit(gpa);
self.* = undefined;
}
pub fn init() Instrumentation {
const pc_bitset_usizes = bitsetUsizes(exec.pc_counters.len);
const alloc_usizes = pc_bitset_usizes * 2;
const buf = gpa.alloc(u8, alloc_usizes * @sizeOf(usize)) catch @panic("OOM");
var fba_ctx: std.heap.FixedBufferAllocator = .init(buf);
const fba = fba_ctx.allocator();
var self: Instrumentation = .{
.seen_pcs = fba.alloc(usize, pc_bitset_usizes) catch unreachable,
.fresh_pcs = fba.alloc(usize, pc_bitset_usizes) catch unreachable,
};
self.reset();
return self;
}
pub fn reset(self: *Instrumentation) void {
@memset(self.seen_pcs, 0);
@memset(self.fresh_pcs, 0);
self.const_pcs.clearRetainingCapacity();
self.const_vals2.clearRetainingCapacity();
self.const_vals4.clearRetainingCapacity();
self.const_vals8.clearRetainingCapacity();
self.const_vals16.clearRetainingCapacity();
}
/// If false is returned, then the pc is marked as seen
pub fn constPcSeen(self: *Instrumentation, pc: usize) bool {
return (self.const_pcs.getOrPut(gpa, pc) catch @panic("OOM")).found_existing;
}
pub fn isFresh(self: *Instrumentation) bool {
var hit_pcs = exec.pcBitsetIterator();
for (self.seen_pcs) |seen_pcs| {
if (hit_pcs.next() & ~seen_pcs != 0) return true;
}
return false;
}
/// Updates `fresh_pcs`
pub fn setFresh(self: *Instrumentation) void {
var hit_pcs = exec.pcBitsetIterator();
for (self.seen_pcs, self.fresh_pcs) |seen_pcs, *fresh_pcs| {
fresh_pcs.* = hit_pcs.next() & ~seen_pcs;
}
}
/// Returns if `exec.pc_counters` is a superset of `fresh_pcs`.
pub fn atleastFresh(self: *Instrumentation) bool {
var hit_pcs = exec.pcBitsetIterator();
for (self.fresh_pcs) |fresh_pcs| {
if (fresh_pcs & hit_pcs.next() != fresh_pcs) return false;
}
return true;
}
/// Updates based off `fresh_pcs`
fn updateSeen(self: *Instrumentation) void {
comptime assert(abi.SeenPcsHeader.trailing[0] == .pc_bits_usize);
const shared_seen_pcs: [*]volatile usize = @ptrCast(
exec.shared_seen_pcs.items[@sizeOf(abi.SeenPcsHeader)..].ptr,
);
for (self.seen_pcs, shared_seen_pcs, self.fresh_pcs) |*seen, *shared_seen, fresh| {
seen.* |= fresh;
if (fresh != 0)
_ = @atomicRmw(usize, shared_seen, .Or, fresh, .monotonic);
}
}
};
const Fuzzer = struct {
arena_ctx: std.heap.ArenaAllocator = .init(gpa),
rng: std.Random.DefaultPrng = .init(0),
test_one: abi.TestOne,
/// The next input that will be given to the testOne function. When the
/// current process crashes, this memory-mapped file is used to recover the
/// input.
input: MemoryMappedList,
/// Minimized past inputs leading to new pc hits.
/// These are randomly mutated in round-robin fashion
/// Element zero is always an empty input. It is gauraunteed no other elements are empty.
corpus: std.ArrayListUnmanaged([]const u8),
corpus_pos: usize,
/// List of past mutations that have led to new inputs. This way, the mutations that are the
/// most effective are the most likely to be selected again. Starts with one of each mutation.
mutations: std.ArrayListUnmanaged(Mutation) = .empty,
/// Filesystem directory containing found inputs for future runs
corpus_dir: std.fs.Dir,
corpus_dir_idx: usize = 0,
pub fn init(test_one: abi.TestOne, unit_test_name: []const u8) Fuzzer {
var self: Fuzzer = .{
.test_one = test_one,
.input = undefined,
.corpus = .empty,
.corpus_pos = 0,
.mutations = .empty,
.corpus_dir = undefined,
};
const arena = self.arena_ctx.allocator();
self.corpus_dir = exec.cache_f.makeOpenPath(unit_test_name, .{}) catch |e|
panic("failed to open directory '{s}': {t}", .{ unit_test_name, e });
self.input = in: {
const f = self.corpus_dir.createFile("in", .{
.read = true,
.truncate = false,
// In case any other fuzz tests are running under the same test name,
// the input file is exclusively locked to ensures only one proceeds.
.lock = .exclusive,
.lock_nonblocking = true,
}) catch |e| switch (e) {
error.WouldBlock => @panic("input file 'in' is in use by another fuzzing process"),
else => panic("failed to create input file 'in': {t}", .{e}),
};
const size = f.getEndPos() catch |e| panic("failed to stat input file 'in': {t}", .{e});
const map = (if (size < std.heap.page_size_max)
MemoryMappedList.create(f, 8, std.heap.page_size_max)
else
MemoryMappedList.init(f, size, size)) catch |e|
panic("failed to memory map input file 'in': {t}", .{e});
// Perform a dry-run of the stored input if there was one in case it might reproduce a
// crash.
const old_in_len = mem.littleToNative(usize, mem.bytesAsValue(usize, map.items[0..8]).*);
if (size >= 8 and old_in_len != 0 and map.items.len - 8 < old_in_len) {
test_one(.fromSlice(@volatileCast(map.items[8..][0..old_in_len])));
}
break :in map;
};
inst.reset();
self.mutations.appendSlice(gpa, std.meta.tags(Mutation)) catch @panic("OOM");
// Ensure there is never an empty corpus. Additionally, an empty input usually leads to
// new inputs.
self.addInput(&.{});
while (true) {
var name_buf: [@sizeOf(usize) * 2]u8 = undefined;
const bytes = self.corpus_dir.readFileAlloc(
std.fmt.bufPrint(&name_buf, "{x}", .{self.corpus_dir_idx}) catch unreachable,
arena,
.unlimited,
) catch |e| switch (e) {
error.FileNotFound => break,
else => panic("failed to read corpus file '{x}': {t}", .{ self.corpus_dir_idx, e }),
};
// No corpus file of length zero will ever be created
if (bytes.len == 0)
panic("corrupt corpus file '{x}' (len of zero)", .{self.corpus_dir_idx});
self.addInput(bytes);
self.corpus_dir_idx += 1;
}
return self;
}
pub fn deinit(self: *Fuzzer) void {
self.input.deinit();
self.corpus.deinit(gpa);
self.mutations.deinit(gpa);
self.corpus_dir.close();
self.arena_ctx.deinit();
self.* = undefined;
}
pub fn addInput(self: *Fuzzer, bytes: []const u8) void {
self.corpus.append(gpa, bytes) catch @panic("OOM");
self.input.clearRetainingCapacity();
self.input.ensureTotalCapacity(8 + bytes.len) catch |e|
panic("could not resize shared input file: {t}", .{e});
self.input.items.len = 8;
self.input.appendSliceAssumeCapacity(bytes);
self.run();
inst.setFresh();
inst.updateSeen();
}
/// Assumes `fresh_pcs` correspond to the input
fn minimizeInput(self: *Fuzzer) void {
// The minimization technique is kept relatively simple, we sequentially try to remove each
// byte and check that the new pcs and memory loads are still hit.
var i = self.input.items.len;
while (i != 8) {
i -= 1;
const old = self.input.orderedRemove(i);
@memset(exec.pc_counters, 0);
self.run();
if (!inst.atleastFresh()) {
self.input.insertAssumeCapacity(i, old);
} else {
// This removal may have led to new pcs or memory loads being hit, so we need to
// update them to avoid duplicates.
inst.setFresh();
}
}
}
fn run(self: *Fuzzer) void {
// `pc_counters` is not cleared since only new hits are relevant.
mem.bytesAsValue(usize, self.input.items[0..8]).* =
mem.nativeToLittle(usize, self.input.items.len - 8);
self.test_one(.fromSlice(@volatileCast(self.input.items[8..])));
const header = mem.bytesAsValue(
abi.SeenPcsHeader,
exec.shared_seen_pcs.items[0..@sizeOf(abi.SeenPcsHeader)],
);
_ = @atomicRmw(usize, &header.n_runs, .Add, 1, .monotonic);
}
pub fn cycle(self: *Fuzzer) void {
const input = self.corpus.items[self.corpus_pos];
self.corpus_pos += 1;
if (self.corpus_pos == self.corpus.items.len)
self.corpus_pos = 0;
const rng = self.rng.random();
while (true) {
const m = self.mutations.items[rng.uintLessThanBiased(usize, self.mutations.items.len)];
if (!m.mutate(
rng,
input,
&self.input,
self.corpus.items,
inst.const_vals2.items,
inst.const_vals4.items,
inst.const_vals8.items,
inst.const_vals16.items,
)) continue;
self.run();
if (inst.isFresh()) {
@branchHint(.unlikely);
const header = mem.bytesAsValue(
abi.SeenPcsHeader,
exec.shared_seen_pcs.items[0..@sizeOf(abi.SeenPcsHeader)],
);
_ = @atomicRmw(usize, &header.unique_runs, .Add, 1, .monotonic);
inst.setFresh();
self.minimizeInput();
inst.updateSeen();
// An empty-input has always been tried, so if an empty input is fresh then the
// test has to be non-deterministic. This has to be checked as duplicate empty
// entries are not allowed.
if (self.input.items.len - 8 == 0) {
std.log.warn("non-deterministic test (empty input produces different hits)", .{});
_ = @atomicRmw(usize, &header.unique_runs, .Sub, 1, .monotonic);
return;
}
const arena = self.arena_ctx.allocator();
const bytes = arena.dupe(u8, @volatileCast(self.input.items[8..])) catch @panic("OOM");
self.corpus.append(gpa, bytes) catch @panic("OOM");
self.mutations.appendNTimes(gpa, m, 6) catch @panic("OOM");
// Write new corpus to cache
var name_buf: [@sizeOf(usize) * 2]u8 = undefined;
self.corpus_dir.writeFile(.{
.sub_path = std.fmt.bufPrint(
&name_buf,
"{x}",
.{self.corpus_dir_idx},
) catch unreachable,
.data = bytes,
}) catch |e| panic(
"failed to write corpus file '{x}': {t}",
.{ self.corpus_dir_idx, e },
);
self.corpus_dir_idx += 1;
}
break;
}
}
};
/// Instrumentation must not be triggered before this function is called
export fn fuzzer_init(cache_dir_path: abi.Slice) void {
inst.depreinit();
exec = .init(cache_dir_path.toSlice());
inst = .init();
}
/// Invalid until `fuzzer_init` is called.
export fn fuzzer_coverage_id() u64 {
return fuzzer.coverage_id;
return exec.pc_digest;
}
var fuzzer_one: *const fn (input_ptr: [*]const u8, input_len: usize) callconv(.c) void = undefined;
export fn fuzzer_start(testOne: @TypeOf(fuzzer_one)) void {
fuzzer_one = testOne;
fuzzer.start() catch |err| oom(err);
/// fuzzer_init must be called beforehand
export fn fuzzer_init_test(test_one: abi.TestOne, unit_test_name: abi.Slice) void {
current_test_name = unit_test_name.toSlice();
fuzzer = .init(test_one, unit_test_name.toSlice());
}
export fn fuzzer_set_name(name_ptr: [*]const u8, name_len: usize) void {
fuzzer.unit_test_name = name_ptr[0..name_len];
/// fuzzer_init_test must be called beforehand
/// The callee owns the memory of bytes and must not free it until the fuzzer is finished.
export fn fuzzer_new_input(bytes: abi.Slice) void {
// An entry of length zero is always added and duplicates of it are not allowed.
if (bytes.len != 0)
fuzzer.addInput(bytes.toSlice());
}
export fn fuzzer_init(cache_dir_struct: Fuzzer.Slice) void {
// Linkers are expected to automatically add `__start_<section>` and
// `__stop_<section>` symbols when section names are valid C identifiers.
/// fuzzer_init_test must be called first
export fn fuzzer_main() void {
while (true) {
fuzzer.cycle();
}
}
const ofmt = builtin.object_format;
/// Helps determine run uniqueness in the face of recursion.
/// Currently not used by the fuzzer.
export threadlocal var __sancov_lowest_stack: usize = 0;
const start_symbol_prefix: []const u8 = if (ofmt == .macho)
"\x01section$start$__DATA$__"
else
"__start___";
const end_symbol_prefix: []const u8 = if (ofmt == .macho)
"\x01section$end$__DATA$__"
else
"__stop___";
/// Inline since the return address of the callee is required
inline fn genericConstCmp(T: anytype, val: T, comptime const_vals_field: []const u8) void {
if (!inst.constPcSeen(@returnAddress())) {
@branchHint(.unlikely);
@field(inst, const_vals_field).append(gpa, val) catch @panic("OOM");
}
}
const pc_counters_start_name = start_symbol_prefix ++ "sancov_cntrs";
const pc_counters_start = @extern([*]u8, .{
.name = pc_counters_start_name,
.linkage = .weak,
}) orelse fatal("missing {s} symbol", .{pc_counters_start_name});
export fn __sanitizer_cov_trace_const_cmp1(const_arg: u8, arg: u8) void {
_ = const_arg;
_ = arg;
}
const pc_counters_end_name = end_symbol_prefix ++ "sancov_cntrs";
const pc_counters_end = @extern([*]u8, .{
.name = pc_counters_end_name,
.linkage = .weak,
}) orelse fatal("missing {s} symbol", .{pc_counters_end_name});
export fn __sanitizer_cov_trace_const_cmp2(const_arg: u16, arg: u16) void {
_ = arg;
genericConstCmp(u16, const_arg, "const_vals2");
}
const pc_counters = pc_counters_start[0 .. pc_counters_end - pc_counters_start];
export fn __sanitizer_cov_trace_const_cmp4(const_arg: u32, arg: u32) void {
_ = arg;
genericConstCmp(u32, const_arg, "const_vals4");
}
const pcs_start_name = start_symbol_prefix ++ "sancov_pcs1";
const pcs_start = @extern([*]usize, .{
.name = pcs_start_name,
.linkage = .weak,
}) orelse fatal("missing {s} symbol", .{pcs_start_name});
export fn __sanitizer_cov_trace_const_cmp8(const_arg: u64, arg: u64) void {
_ = arg;
genericConstCmp(u64, const_arg, "const_vals8");
}
const pcs_end_name = end_symbol_prefix ++ "sancov_pcs1";
const pcs_end = @extern([*]usize, .{
.name = pcs_end_name,
.linkage = .weak,
}) orelse fatal("missing {s} symbol", .{pcs_end_name});
export fn __sanitizer_cov_trace_switch(val: u64, cases: [*]const u64) void {
_ = val;
if (!inst.constPcSeen(@returnAddress())) {
@branchHint(.unlikely);
const case_bits = cases[1];
const cases_slice = cases[2..][0..cases[0]];
switch (case_bits) {
// 8-bit cases are ignored because they are likely to be randomly generated
0...8 => {},
9...16 => for (cases_slice) |c|
inst.const_vals2.append(gpa, @truncate(c)) catch @panic("OOM"),
17...32 => for (cases_slice) |c|
inst.const_vals4.append(gpa, @truncate(c)) catch @panic("OOM"),
33...64 => for (cases_slice) |c|
inst.const_vals8.append(gpa, @truncate(c)) catch @panic("OOM"),
else => {}, // Should be impossible
}
}
}
const pcs = pcs_start[0 .. pcs_end - pcs_start];
export fn __sanitizer_cov_trace_cmp1(arg1: u8, arg2: u8) void {
_ = arg1;
_ = arg2;
}
const cache_dir_path = cache_dir_struct.toZig();
const cache_dir = if (cache_dir_path.len == 0)
std.fs.cwd()
else
std.fs.cwd().makeOpenPath(cache_dir_path, .{ .iterate = true }) catch |err| {
fatal("unable to open fuzz directory '{s}': {s}", .{ cache_dir_path, @errorName(err) });
export fn __sanitizer_cov_trace_cmp2(arg1: u16, arg2: u16) void {
_ = arg1;
_ = arg2;
}
export fn __sanitizer_cov_trace_cmp4(arg1: u32, arg2: u32) void {
_ = arg1;
_ = arg2;
}
export fn __sanitizer_cov_trace_cmp8(arg1: u64, arg2: u64) void {
_ = arg1;
_ = arg2;
}
export fn __sanitizer_cov_trace_pc_indir(callee: usize) void {
// Not valuable because we already have pc tracing via 8bit counters.
_ = callee;
}
export fn __sanitizer_cov_8bit_counters_init(start: usize, end: usize) void {
// clang will emit a call to this function when compiling with code coverage instrumentation.
// however, fuzzer_init() does not need this information since it directly reads from the
// symbol table.
_ = start;
_ = end;
}
export fn __sanitizer_cov_pcs_init(start: usize, end: usize) void {
// clang will emit a call to this function when compiling with code coverage instrumentation.
// however, fuzzer_init() does not need this information since it directly reads from the
// symbol table.
_ = start;
_ = end;
}
/// Copy all of source into dest at position 0.
/// If the slices overlap, dest.ptr must be <= src.ptr.
fn volatileCopyForwards(comptime T: type, dest: []volatile T, source: []const volatile T) void {
for (dest, source) |*d, s| d.* = s;
}
/// Copy all of source into dest at position 0.
/// If the slices overlap, dest.ptr must be >= src.ptr.
fn volatileCopyBackwards(comptime T: type, dest: []volatile T, source: []const volatile T) void {
var i = source.len;
while (i > 0) {
i -= 1;
dest[i] = source[i];
}
}
const Mutation = enum {
/// Applies .insert_*_span, .push_*_span
/// For wtf-8, this limits code units, not code points
const max_insert_len = 12;
/// Applies to .insert_large_*_span and .push_large_*_span
/// 4096 is used as it is a common sector size
const max_large_insert_len = 4096;
/// Applies to .delete_span and .pop_span
const max_delete_len = 16;
/// Applies to .set_*span, .move_span, .set_existing_span
const max_set_len = 12;
const max_replicate_len = 64;
const AddValue = i6;
const SmallValue = i10;
delete_byte,
delete_span,
/// Removes the last byte from the input
pop_byte,
pop_span,
/// Inserts a group of bytes which is already in the input and removes the original copy.
move_span,
/// Replaces a group of bytes in the input with another group of bytes in the input
set_existing_span,
insert_existing_span,
push_existing_span,
set_rng_byte,
set_rng_span,
insert_rng_byte,
insert_rng_span,
/// Adds a byte to the end of the input
push_rng_byte,
push_rng_span,
set_zero_byte,
set_zero_span,
insert_zero_byte,
insert_zero_span,
push_zero_byte,
push_zero_span,
/// Inserts a lot of zeros to the end of the input
/// This is intended to work with fuzz tests that require data in (large) blocks
push_large_zero_span,
/// Inserts a group of ascii printable character
insert_print_span,
/// Inserts a group of character from a...z, A...Z, 0...9, _, and ' '
insert_common_span,
/// Inserts a group of ascii digits possibly preceded by a `-`
insert_integer,
/// Code units are evenly distributed between one to four
insert_wtf8_char,
insert_wtf8_span,
/// Inserts a group of bytes from another input
insert_splice_span,
// utf16 is not yet included since insertion of random bytes should adaquetly check
// BMP character, surrogate handling, and occasionally chacters outside of the BMP.
set_print_span,
set_common_span,
set_splice_span,
/// Similar to set_splice_span, but the bytes are copied to the same index instead of a random
replicate_splice_span,
push_print_span,
push_common_span,
push_integer,
push_wtf8_char,
push_wtf8_span,
push_splice_span,
/// Clears a random amount of high bits of a byte
truncate_8,
truncate_16le,
truncate_16be,
truncate_32le,
truncate_32be,
truncate_64le,
truncate_64be,
/// Flips a random bit
xor_1,
/// Swaps up to three bits of a byte biased to less bits
xor_few_8,
/// Swaps up to six bits of a 16-bit value biased to less bits
xor_few_16,
/// Swaps up to nine bits of a 32-bit value biased to less bits
xor_few_32,
/// Swaps up to twelve bits of 64-bit value biased to less bits
xor_few_64,
/// Adds to a byte a value of type AddValue
add_8,
add_16le,
add_16be,
add_32le,
add_32be,
add_64le,
add_64be,
/// Sets a 16-bit little-endian value to a value of type SmallValue
set_small_16le,
set_small_16be,
set_small_32le,
set_small_32be,
set_small_64le,
set_small_64be,
insert_small_16le,
insert_small_16be,
insert_small_32le,
insert_small_32be,
insert_small_64le,
insert_small_64be,
push_small_16le,
push_small_16be,
push_small_32le,
push_small_32be,
push_small_64le,
push_small_64be,
set_const_16,
set_const_32,
set_const_64,
set_const_128,
insert_const_16,
insert_const_32,
insert_const_64,
insert_const_128,
push_const_16,
push_const_32,
push_const_64,
push_const_128,
/// Sets a byte with up to three bits set biased to less bits
set_few_8,
/// Sets a 16-bit value with up to six bits set biased to less bits
set_few_16,
/// Sets a 32-bit value with up to nine bits set biased to less bits
set_few_32,
/// Sets a 64-bit value with up to twelve bits set biased to less bits
set_few_64,
insert_few_8,
insert_few_16,
insert_few_32,
insert_few_64,
push_few_8,
push_few_16,
push_few_32,
push_few_64,
/// Randomizes a random contigous group of bits in a byte
packed_set_rng_8,
packed_set_rng_16le,
packed_set_rng_16be,
packed_set_rng_32le,
packed_set_rng_32be,
packed_set_rng_64le,
packed_set_rng_64be,
fn fewValue(rng: std.Random, T: type, comptime bits: u16) T {
var result: T = 0;
var remaining_bits = rng.intRangeAtMostBiased(u16, 1, bits);
while (remaining_bits > 0) {
result |= @shlExact(@as(T, 1), rng.int(math.Log2Int(T)));
remaining_bits -= 1;
}
return result;
}
/// Returns if the mutation was applicable to the input
pub fn mutate(
mutation: Mutation,
rng: std.Random,
in: []const u8,
out: *MemoryMappedList,
corpus: []const []const u8,
const_vals2: []const u16,
const_vals4: []const u32,
const_vals8: []const u64,
const_vals16: []const u128,
) bool {
out.clearRetainingCapacity();
const new_capacity = 8 + in.len + @max(
16, // builtin 128 value
Mutation.max_insert_len,
Mutation.max_large_insert_len,
);
out.ensureTotalCapacity(new_capacity) catch |e|
panic("could not resize shared input file: {t}", .{e});
out.items.len = 8; // Length field
const applied = switch (mutation) {
inline else => |m| m.comptimeMutate(
rng,
in,
out,
corpus,
const_vals2,
const_vals4,
const_vals8,
const_vals16,
),
};
if (!applied)
assert(out.items.len == 8)
else
assert(out.items.len <= new_capacity);
return applied;
}
/// Assumes out has already been cleared
fn comptimeMutate(
comptime mutation: Mutation,
rng: std.Random,
in: []const u8,
out: *MemoryMappedList,
corpus: []const []const u8,
const_vals2: []const u16,
const_vals4: []const u32,
const_vals8: []const u64,
const_vals16: []const u128,
) bool {
const Class = enum { new, remove, rmw, move_span, replicate_splice_span };
const class: Class, const class_ctx = switch (mutation) {
// zig fmt: off
.move_span => .{ .move_span, null },
.replicate_splice_span => .{ .replicate_splice_span, null },
.delete_byte => .{ .remove, .{ .delete, 1 } },
.delete_span => .{ .remove, .{ .delete, max_delete_len } },
.pop_byte => .{ .remove, .{ .pop, 1 } },
.pop_span => .{ .remove, .{ .pop, max_delete_len } },
.set_rng_byte => .{ .new, .{ .set , 1, .rng , .one } },
.set_zero_byte => .{ .new, .{ .set , 1, .zero , .one } },
.set_rng_span => .{ .new, .{ .set , 1, .rng , .many } },
.set_zero_span => .{ .new, .{ .set , 1, .zero , .many } },
.set_common_span => .{ .new, .{ .set , 1, .common , .many } },
.set_print_span => .{ .new, .{ .set , 1, .print , .many } },
.set_existing_span => .{ .new, .{ .set , 2, .existing, .many } },
.set_splice_span => .{ .new, .{ .set , 1, .splice , .many } },
.set_const_16 => .{ .new, .{ .set , 2, .@"const", const_vals2 } },
.set_const_32 => .{ .new, .{ .set , 4, .@"const", const_vals4 } },
.set_const_64 => .{ .new, .{ .set , 8, .@"const", const_vals8 } },
.set_const_128 => .{ .new, .{ .set , 16, .@"const", const_vals16 } },
.set_small_16le => .{ .new, .{ .set , 2, .small , .{ i16, .little } } },
.set_small_32le => .{ .new, .{ .set , 4, .small , .{ i32, .little } } },
.set_small_64le => .{ .new, .{ .set , 8, .small , .{ i64, .little } } },
.set_small_16be => .{ .new, .{ .set , 2, .small , .{ i16, .big } } },
.set_small_32be => .{ .new, .{ .set , 4, .small , .{ i32, .big } } },
.set_small_64be => .{ .new, .{ .set , 8, .small , .{ i64, .big } } },
.set_few_8 => .{ .new, .{ .set , 1, .few , .{ u8 , 3 } } },
.set_few_16 => .{ .new, .{ .set , 2, .few , .{ u16, 6 } } },
.set_few_32 => .{ .new, .{ .set , 4, .few , .{ u32, 9 } } },
.set_few_64 => .{ .new, .{ .set , 8, .few , .{ u64, 12 } } },
.insert_rng_byte => .{ .new, .{ .insert, 0, .rng , .one } },
.insert_zero_byte => .{ .new, .{ .insert, 0, .zero , .one } },
.insert_rng_span => .{ .new, .{ .insert, 0, .rng , .many } },
.insert_zero_span => .{ .new, .{ .insert, 0, .zero , .many } },
.insert_print_span => .{ .new, .{ .insert, 0, .print , .many } },
.insert_common_span => .{ .new, .{ .insert, 0, .common , .many } },
.insert_integer => .{ .new, .{ .insert, 0, .integer , .many } },
.insert_wtf8_char => .{ .new, .{ .insert, 0, .wtf8 , .one } },
.insert_wtf8_span => .{ .new, .{ .insert, 0, .wtf8 , .many } },
.insert_existing_span => .{ .new, .{ .insert, 1, .existing, .many } },
.insert_splice_span => .{ .new, .{ .insert, 0, .splice , .many } },
.insert_const_16 => .{ .new, .{ .insert, 0, .@"const", const_vals2 } },
.insert_const_32 => .{ .new, .{ .insert, 0, .@"const", const_vals4 } },
.insert_const_64 => .{ .new, .{ .insert, 0, .@"const", const_vals8 } },
.insert_const_128 => .{ .new, .{ .insert, 0, .@"const", const_vals16 } },
.insert_small_16le => .{ .new, .{ .insert, 0, .small , .{ i16, .little } } },
.insert_small_32le => .{ .new, .{ .insert, 0, .small , .{ i32, .little } } },
.insert_small_64le => .{ .new, .{ .insert, 0, .small , .{ i64, .little } } },
.insert_small_16be => .{ .new, .{ .insert, 0, .small , .{ i16, .big } } },
.insert_small_32be => .{ .new, .{ .insert, 0, .small , .{ i32, .big } } },
.insert_small_64be => .{ .new, .{ .insert, 0, .small , .{ i64, .big } } },
.insert_few_8 => .{ .new, .{ .insert, 0, .few , .{ u8 , 3 } } },
.insert_few_16 => .{ .new, .{ .insert, 0, .few , .{ u16, 6 } } },
.insert_few_32 => .{ .new, .{ .insert, 0, .few , .{ u32, 9 } } },
.insert_few_64 => .{ .new, .{ .insert, 0, .few , .{ u64, 12 } } },
.push_rng_byte => .{ .new, .{ .push , 0, .rng , .one } },
.push_zero_byte => .{ .new, .{ .push , 0, .zero , .one } },
.push_rng_span => .{ .new, .{ .push , 0, .rng , .many } },
.push_zero_span => .{ .new, .{ .push , 0, .zero , .many } },
.push_print_span => .{ .new, .{ .push , 0, .print , .many } },
.push_common_span => .{ .new, .{ .push , 0, .common , .many } },
.push_integer => .{ .new, .{ .push , 0, .integer , .many } },
.push_large_zero_span => .{ .new, .{ .push , 0, .zero , .large } },
.push_wtf8_char => .{ .new, .{ .push , 0, .wtf8 , .one } },
.push_wtf8_span => .{ .new, .{ .push , 0, .wtf8 , .many } },
.push_existing_span => .{ .new, .{ .push , 1, .existing, .many } },
.push_splice_span => .{ .new, .{ .push , 0, .splice , .many } },
.push_const_16 => .{ .new, .{ .push , 0, .@"const", const_vals2 } },
.push_const_32 => .{ .new, .{ .push , 0, .@"const", const_vals4 } },
.push_const_64 => .{ .new, .{ .push , 0, .@"const", const_vals8 } },
.push_const_128 => .{ .new, .{ .push , 0, .@"const", const_vals16 } },
.push_small_16le => .{ .new, .{ .push , 0, .small , .{ i16, .little } } },
.push_small_32le => .{ .new, .{ .push , 0, .small , .{ i32, .little } } },
.push_small_64le => .{ .new, .{ .push , 0, .small , .{ i64, .little } } },
.push_small_16be => .{ .new, .{ .push , 0, .small , .{ i16, .big } } },
.push_small_32be => .{ .new, .{ .push , 0, .small , .{ i32, .big } } },
.push_small_64be => .{ .new, .{ .push , 0, .small , .{ i64, .big } } },
.push_few_8 => .{ .new, .{ .push , 0, .few , .{ u8 , 3 } } },
.push_few_16 => .{ .new, .{ .push , 0, .few , .{ u16, 6 } } },
.push_few_32 => .{ .new, .{ .push , 0, .few , .{ u32, 9 } } },
.push_few_64 => .{ .new, .{ .push , 0, .few , .{ u64, 12 } } },
.xor_1 => .{ .rmw, .{ .xor , u8 , native_endian, 1 } },
.xor_few_8 => .{ .rmw, .{ .xor , u8 , native_endian, 3 } },
.xor_few_16 => .{ .rmw, .{ .xor , u16, native_endian, 6 } },
.xor_few_32 => .{ .rmw, .{ .xor , u32, native_endian, 9 } },
.xor_few_64 => .{ .rmw, .{ .xor , u64, native_endian, 12 } },
.truncate_8 => .{ .rmw, .{ .truncate , u8 , native_endian, {} } },
.truncate_16le => .{ .rmw, .{ .truncate , u16, .little , {} } },
.truncate_32le => .{ .rmw, .{ .truncate , u32, .little , {} } },
.truncate_64le => .{ .rmw, .{ .truncate , u64, .little , {} } },
.truncate_16be => .{ .rmw, .{ .truncate , u16, .big , {} } },
.truncate_32be => .{ .rmw, .{ .truncate , u32, .big , {} } },
.truncate_64be => .{ .rmw, .{ .truncate , u64, .big , {} } },
.add_8 => .{ .rmw, .{ .add , i8 , native_endian, {} } },
.add_16le => .{ .rmw, .{ .add , i16, .little , {} } },
.add_32le => .{ .rmw, .{ .add , i32, .little , {} } },
.add_64le => .{ .rmw, .{ .add , i64, .little , {} } },
.add_16be => .{ .rmw, .{ .add , i16, .big , {} } },
.add_32be => .{ .rmw, .{ .add , i32, .big , {} } },
.add_64be => .{ .rmw, .{ .add , i64, .big , {} } },
.packed_set_rng_8 => .{ .rmw, .{ .packed_rng, u8 , native_endian, {} } },
.packed_set_rng_16le => .{ .rmw, .{ .packed_rng, u16, .little , {} } },
.packed_set_rng_32le => .{ .rmw, .{ .packed_rng, u32, .little , {} } },
.packed_set_rng_64le => .{ .rmw, .{ .packed_rng, u64, .little , {} } },
.packed_set_rng_16be => .{ .rmw, .{ .packed_rng, u16, .big , {} } },
.packed_set_rng_32be => .{ .rmw, .{ .packed_rng, u32, .big , {} } },
.packed_set_rng_64be => .{ .rmw, .{ .packed_rng, u64, .big , {} } },
// zig fmt: on
};
fuzzer.init(cache_dir, pc_counters, pcs) catch |err|
fatal("unable to init fuzzer: {s}", .{@errorName(err)});
}
switch (class) {
.new => {
const op: enum {
set,
insert,
push,
export fn fuzzer_init_corpus_elem(input_ptr: [*]const u8, input_len: usize) void {
fuzzer.addCorpusElem(input_ptr[0..input_len]) catch |err|
fatal("failed to add corpus element: {s}", .{@errorName(err)});
}
pub fn maxLen(comptime op: @This(), in_len: usize) usize {
return switch (op) {
.set => @min(in_len, max_set_len),
.insert, .push => max_insert_len,
};
}
}, const min_in_len, const data: enum {
rng,
zero,
common,
print,
integer,
wtf8,
existing,
splice,
@"const",
small,
few,
}, const data_ctx = class_ctx;
const Size = enum { one, many, large };
if (in.len < min_in_len) return false;
if (data == .@"const" and data_ctx.len == 0) return false;
const splice_i = if (data == .splice) blk: {
// Element zero always holds an empty input, so we do not select it
if (corpus.len == 1) return false;
break :blk rng.intRangeLessThanBiased(usize, 1, corpus.len);
} else undefined;
// Only needs to be followed for set
const len = switch (data) {
else => switch (@as(Size, data_ctx)) {
.one => 1,
.many => rng.intRangeAtMostBiased(usize, 1, op.maxLen(in.len)),
.large => rng.intRangeAtMostBiased(usize, 1, max_large_insert_len),
},
.wtf8 => undefined, // varies by size of each code unit
.splice => rng.intRangeAtMostBiased(usize, 1, @min(
corpus[splice_i].len,
op.maxLen(in.len),
)),
.existing => rng.intRangeAtMostBiased(usize, 1, @min(
in.len,
op.maxLen(in.len),
)),
.@"const" => @sizeOf(@typeInfo(@TypeOf(data_ctx)).pointer.child),
.small, .few => @sizeOf(data_ctx[0]),
};
const i = switch (op) {
.set => rng.uintAtMostBiased(usize, in.len - len),
.insert => rng.uintAtMostBiased(usize, in.len),
.push => in.len,
};
out.appendSliceAssumeCapacity(in[0..i]);
switch (data) {
.rng => {
var bytes: [@max(max_insert_len, max_set_len)]u8 = undefined;
rng.bytes(bytes[0..len]);
out.appendSliceAssumeCapacity(bytes[0..len]);
},
.zero => out.appendNTimesAssumeCapacity(0, len),
.common => for (out.addManyAsSliceAssumeCapacity(len)) |*c| {
c.* = switch (rng.int(u6)) {
0 => ' ',
1...10 => |x| '0' + (@as(u8, x) - 1),
11...36 => |x| 'A' + (@as(u8, x) - 11),
37 => '_',
38...63 => |x| 'a' + (@as(u8, x) - 38),
};
},
.print => for (out.addManyAsSliceAssumeCapacity(len)) |*c| {
c.* = rng.intRangeAtMostBiased(u8, 0x20, 0x7E);
},
.integer => {
const negative = len != 0 and rng.boolean();
if (negative) {
out.appendAssumeCapacity('-');
}
for (out.addManyAsSliceAssumeCapacity(len - @intFromBool(negative))) |*c| {
c.* = rng.intRangeAtMostBiased(u8, '0', '9');
}
},
.wtf8 => {
comptime assert(op != .set);
var codepoints: usize = if (data_ctx == .one)
1
else
rng.intRangeAtMostBiased(usize, 1, Mutation.max_insert_len / 4);
while (true) {
const units1 = rng.int(u2);
const value = switch (units1) {
0 => rng.int(u7),
1 => rng.intRangeAtMostBiased(u11, 0x000080, 0x0007FF),
2 => rng.intRangeAtMostBiased(u16, 0x000800, 0x00FFFF),
3 => rng.intRangeAtMostBiased(u21, 0x010000, 0x10FFFF),
};
const units = @as(u3, units1) + 1;
var buf: [4]u8 = undefined;
assert(std.unicode.wtf8Encode(value, &buf) catch unreachable == units);
out.appendSliceAssumeCapacity(buf[0..units]);
codepoints -= 1;
if (codepoints == 0) break;
}
},
.existing => {
const j = rng.uintAtMostBiased(usize, in.len - len);
out.appendSliceAssumeCapacity(in[j..][0..len]);
},
.splice => {
const j = rng.uintAtMostBiased(usize, corpus[splice_i].len - len);
out.appendSliceAssumeCapacity(corpus[splice_i][j..][0..len]);
},
.@"const" => out.appendSliceAssumeCapacity(mem.asBytes(
&data_ctx[rng.uintLessThanBiased(usize, data_ctx.len)],
)),
.small => out.appendSliceAssumeCapacity(mem.asBytes(
&mem.nativeTo(data_ctx[0], rng.int(SmallValue), data_ctx[1]),
)),
.few => out.appendSliceAssumeCapacity(mem.asBytes(
&fewValue(rng, data_ctx[0], data_ctx[1]),
)),
}
switch (op) {
.set => out.appendSliceAssumeCapacity(in[i + len ..]),
.insert => out.appendSliceAssumeCapacity(in[i..]),
.push => {},
}
},
.remove => {
if (in.len == 0) return false;
const Op = enum { delete, pop };
const op: Op, const max_len = class_ctx;
// LessThan is used so we don't delete the entire span (which is unproductive since
// an empty input has always been tried)
const len = if (max_len == 1) 1 else rng.uintLessThanBiased(
usize,
@min(max_len + 1, in.len),
);
switch (op) {
.delete => {
const i = rng.uintAtMostBiased(usize, in.len - len);
out.appendSliceAssumeCapacity(in[0..i]);
out.appendSliceAssumeCapacity(in[i + len ..]);
},
.pop => out.appendSliceAssumeCapacity(in[0 .. in.len - len]),
}
},
.rmw => {
const Op = enum { xor, truncate, add, packed_rng };
const op: Op, const T, const endian, const xor_bits = class_ctx;
if (in.len < @sizeOf(T)) return false;
const Log2T = math.Log2Int(T);
const idx = rng.uintAtMostBiased(usize, in.len - @sizeOf(T));
const old = mem.readInt(T, in[idx..][0..@sizeOf(T)], endian);
const new = switch (op) {
.xor => old ^ fewValue(rng, T, xor_bits),
.truncate => old & (@as(T, math.maxInt(T)) >> rng.int(Log2T)),
.add => old +% addend: {
const val = rng.int(Mutation.AddValue);
break :addend if (val == 0) 1 else val;
},
.packed_rng => blk: {
const bits = rng.int(math.Log2Int(T)) +| 1;
break :blk old ^ (rng.int(T) >> bits << rng.uintAtMostBiased(Log2T, bits));
},
};
out.appendSliceAssumeCapacity(in);
mem.bytesAsValue(T, out.items[8..][idx..][0..@sizeOf(T)]).* =
mem.nativeTo(T, new, endian);
},
.move_span => {
if (in.len < 2) return false;
// One less since moving whole output will never change anything
const len = rng.intRangeAtMostBiased(usize, 1, @min(
in.len - 1,
Mutation.max_set_len,
));
const src = rng.uintAtMostBiased(usize, in.len - len);
// This indexes into the final input
const dst = blk: {
const res = rng.uintAtMostBiased(usize, in.len - len - 1);
break :blk res + @intFromBool(res >= src);
};
if (src < dst) {
out.appendSliceAssumeCapacity(in[0..src]);
out.appendSliceAssumeCapacity(in[src + len .. dst + len]);
out.appendSliceAssumeCapacity(in[src..][0..len]);
out.appendSliceAssumeCapacity(in[dst + len ..]);
} else {
out.appendSliceAssumeCapacity(in[0..dst]);
out.appendSliceAssumeCapacity(in[src..][0..len]);
out.appendSliceAssumeCapacity(in[dst..src]);
out.appendSliceAssumeCapacity(in[src + len ..]);
}
},
.replicate_splice_span => {
if (in.len == 0) return false;
if (corpus.len == 1) return false;
const from = corpus[rng.intRangeLessThanBiased(usize, 1, corpus.len)];
const len = rng.uintLessThanBiased(usize, @min(in.len, from.len, max_replicate_len));
const i = rng.uintAtMostBiased(usize, @min(in.len, from.len) - len);
out.appendSliceAssumeCapacity(in[0..i]);
out.appendSliceAssumeCapacity(from[i..][0..len]);
out.appendSliceAssumeCapacity(in[i + len ..]);
},
}
return true;
}
};
/// Like `std.ArrayListUnmanaged(u8)` but backed by memory mapping.
pub const MemoryMappedList = struct {
@@ -654,8 +1397,23 @@ pub const MemoryMappedList = struct {
fn growCapacity(current: usize, minimum: usize) usize {
var new = current;
while (true) {
new = std.mem.alignForward(usize, new + new / 2, std.heap.page_size_max);
new = mem.alignForward(usize, new + new / 2, std.heap.page_size_max);
if (new >= minimum) return new;
}
}
pub fn insertAssumeCapacity(l: *MemoryMappedList, i: usize, item: u8) void {
assert(l.items.len + 1 <= l.capacity);
l.items.len += 1;
volatileCopyBackwards(u8, l.items[i + 1 ..], l.items[i .. l.items.len - 1]);
l.items[i] = item;
}
pub fn orderedRemove(l: *MemoryMappedList, i: usize) u8 {
assert(l.items.len + 1 <= l.capacity);
const old = l.items[i];
volatileCopyForwards(u8, l.items[i .. l.items.len - 1], l.items[i + 1 ..]);
l.items.len -= 1;
return old;
}
};
+2 -3
View File
@@ -252,9 +252,8 @@ pub fn sendUpdate(
const seen_pcs = cov_header.seenBits();
const n_runs = @atomicLoad(usize, &cov_header.n_runs, .monotonic);
const unique_runs = @atomicLoad(usize, &cov_header.unique_runs, .monotonic);
if (prev.unique_runs != unique_runs) {
// There has been an update.
if (prev.unique_runs == 0) {
{
if (unique_runs != 0 and prev.unique_runs == 0) {
// We need to send initial context.
const header: abi.SourceIndexHeader = .{
.directories_len = @intCast(coverage_map.coverage.directories.entries.len),
+20
View File
@@ -138,6 +138,26 @@ pub const Rebuild = extern struct {
/// ABI bits specifically relating to the fuzzer interface.
pub const fuzz = struct {
pub const TestOne = *const fn (Slice) callconv(.c) void;
pub extern fn fuzzer_init(cache_dir_path: Slice) void;
pub extern fn fuzzer_coverage_id() u64;
pub extern fn fuzzer_init_test(test_one: TestOne, unit_test_name: Slice) void;
pub extern fn fuzzer_new_input(bytes: Slice) void;
pub extern fn fuzzer_main() void;
pub const Slice = extern struct {
ptr: [*]const u8,
len: usize,
pub fn toSlice(s: Slice) []const u8 {
return s.ptr[0..s.len];
}
pub fn fromSlice(s: []const u8) Slice {
return .{ .ptr = s.ptr, .len = s.len };
}
};
/// libfuzzer uses this and its usize is the one that counts. To match the ABI,
/// make the ints be the size of the target used with libfuzzer.
///
+17
View File
@@ -517,3 +517,20 @@ test isNumberFormattedLikeAnInteger {
try std.testing.expect(!isNumberFormattedLikeAnInteger("1e10"));
try std.testing.expect(!isNumberFormattedLikeAnInteger("1E10"));
}
test "fuzz" {
try std.testing.fuzz({}, fuzzTestOne, .{});
}
fn fuzzTestOne(_: void, input: []const u8) !void {
var buf: [16384]u8 = undefined;
var fba: std.heap.FixedBufferAllocator = .init(&buf);
var scanner = Scanner.initCompleteInput(fba.allocator(), input);
// Property: There are at most input.len tokens
var tokens: usize = 0;
while ((scanner.next() catch return) != .end_of_document) {
tokens += 1;
if (tokens > input.len) return error.Overflow;
}
}
+16
View File
@@ -6451,3 +6451,19 @@ fn testError(source: [:0]const u8, expected_errors: []const Error) !void {
try std.testing.expectEqual(expected, tree.errors[i].tag);
}
}
test "fuzz ast parse" {
try std.testing.fuzz({}, fuzzTestOneParse, .{});
}
fn fuzzTestOneParse(_: void, input: []const u8) !void {
// The first byte holds if zig / zon
if (input.len == 0) return;
const mode: std.zig.Ast.Mode = if (input[0] & 1 == 0) .zig else .zon;
const bytes = input[1..];
var fba: std.heap.FixedBufferAllocator = .init(&fixed_buffer_mem);
const allocator = fba.allocator();
const source = allocator.dupeZ(u8, bytes) catch return;
_ = std.zig.Ast.parse(allocator, source, mode) catch return;
}
+15 -14
View File
@@ -1721,10 +1721,14 @@ fn testTokenize(source: [:0]const u8, expected_token_tags: []const Token.Tag) !v
try std.testing.expectEqual(source.len, last_token.loc.end);
}
fn testPropertiesUpheld(context: void, source: []const u8) anyerror!void {
_ = context;
const source0 = try std.testing.allocator.dupeZ(u8, source);
defer std.testing.allocator.free(source0);
fn testPropertiesUpheld(_: void, source: []const u8) !void {
var source0_buf: [512]u8 = undefined;
if (source.len + 1 > source0_buf.len)
return;
@memcpy(source0_buf[0..source.len], source);
source0_buf[source.len] = 0;
const source0 = source0_buf[0..source.len :0];
var tokenizer = Tokenizer.init(source0);
var tokenization_failed = false;
while (true) {
@@ -1750,18 +1754,15 @@ fn testPropertiesUpheld(context: void, source: []const u8) anyerror!void {
}
}
if (source0.len > 0) for (source0, source0[1..][0..source0.len]) |cur, next| {
if (tokenization_failed) return;
for (source0) |cur| {
// Property: No null byte allowed except at end.
if (cur == 0) {
try std.testing.expect(tokenization_failed);
return error.TestUnexpectedResult;
}
// Property: No ASCII control characters other than \n and \t are allowed.
if (std.ascii.isControl(cur) and cur != '\n' and cur != '\t') {
try std.testing.expect(tokenization_failed);
// Property: No ASCII control characters other than \n, \t, and \r are allowed.
if (std.ascii.isControl(cur) and cur != '\n' and cur != '\t' and cur != '\r') {
return error.TestUnexpectedResult;
}
// Property: All '\r' must be followed by '\n'.
if (cur == '\r' and next != '\n') {
try std.testing.expect(tokenization_failed);
}
};
}
}
+1
View File
@@ -16,6 +16,7 @@ pub fn build(b: *std.Build) void {
.optimize = optimize,
.fuzz = true,
}),
.use_llvm = true, // #23423
});
b.installArtifact(exe);
+36 -22
View File
@@ -1,29 +1,43 @@
const std = @import("std");
const abi = std.Build.abi.fuzz;
const native_endian = @import("builtin").cpu.arch.endian();
const FuzzerSlice = extern struct {
ptr: [*]const u8,
len: usize,
fn fromSlice(s: []const u8) FuzzerSlice {
return .{ .ptr = s.ptr, .len = s.len };
}
};
extern fn fuzzer_set_name(name_ptr: [*]const u8, name_len: usize) void;
extern fn fuzzer_init(cache_dir: FuzzerSlice) void;
extern fn fuzzer_init_corpus_elem(input_ptr: [*]const u8, input_len: usize) void;
extern fn fuzzer_coverage_id() u64;
fn testOne(in: abi.Slice) callconv(.c) void {
std.debug.assertReadable(in.toSlice());
}
pub fn main() !void {
var gpa: std.heap.GeneralPurposeAllocator(.{}) = .init;
defer _ = gpa.deinit();
const args = try std.process.argsAlloc(gpa.allocator());
defer std.process.argsFree(gpa.allocator(), args);
var debug_gpa_ctx: std.heap.DebugAllocator(.{}) = .init;
defer _ = debug_gpa_ctx.deinit();
const gpa = debug_gpa_ctx.allocator();
const cache_dir = args[1];
var args = try std.process.argsWithAllocator(gpa);
defer args.deinit();
_ = args.skip(); // executable name
fuzzer_init(FuzzerSlice.fromSlice(cache_dir));
fuzzer_init_corpus_elem("hello".ptr, "hello".len);
fuzzer_set_name("test".ptr, "test".len);
_ = fuzzer_coverage_id();
const cache_dir_path = args.next() orelse @panic("expected cache directory path argument");
var cache_dir = try std.fs.cwd().openDir(cache_dir_path, .{});
defer cache_dir.close();
abi.fuzzer_init(.fromSlice(cache_dir_path));
abi.fuzzer_init_test(testOne, .fromSlice("test"));
abi.fuzzer_new_input(.fromSlice(""));
abi.fuzzer_new_input(.fromSlice("hello"));
const pc_digest = abi.fuzzer_coverage_id();
const coverage_file_path = "v/" ++ std.fmt.hex(pc_digest);
const coverage_file = try cache_dir.openFile(coverage_file_path, .{});
defer coverage_file.close();
var read_buf: [@sizeOf(abi.SeenPcsHeader)]u8 = undefined;
var r = coverage_file.reader(&read_buf);
const pcs_header = r.interface.takeStruct(abi.SeenPcsHeader, native_endian) catch return r.err.?;
if (pcs_header.pcs_len == 0)
return error.ZeroPcs;
const expected_len = @sizeOf(abi.SeenPcsHeader) +
try std.math.divCeil(usize, pcs_header.pcs_len, @bitSizeOf(usize)) * @sizeOf(usize) +
pcs_header.pcs_len * @sizeOf(usize);
if (try coverage_file.getEndPos() != expected_len)
return error.WrongEnd;
}