From ebca8c2dbba5b4c0db9910d4cbd854d679114a8d Mon Sep 17 00:00:00 2001 From: Kendall Condon Date: Mon, 23 Mar 2026 18:47:56 -0400 Subject: [PATCH 01/11] x86_64: fix runtime array concat with comptime slice --- src/codegen/x86_64/CodeGen.zig | 1 + test/behavior/array.zig | 12 ++++++++++++ 2 files changed, 13 insertions(+) diff --git a/src/codegen/x86_64/CodeGen.zig b/src/codegen/x86_64/CodeGen.zig index 9ba98c3e83..50b5bdc150 100644 --- a/src/codegen/x86_64/CodeGen.zig +++ b/src/codegen/x86_64/CodeGen.zig @@ -170801,6 +170801,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { else => |e| return e, }; try ops[0].toSlicePtr(cg); + try ops[1].toSlicePtr(cg); cg.select(&.{}, &.{}, &ops, switch (air_tag) { else => unreachable, inline .memcpy, .memmove => |symbol| comptime &.{.{ diff --git a/test/behavior/array.zig b/test/behavior/array.zig index 2bd8555529..2f3aa841bc 100644 --- a/test/behavior/array.zig +++ b/test/behavior/array.zig @@ -45,6 +45,18 @@ fn getArrayLen(a: []const u32) usize { return a.len; } +test "runtime array concat with comptime slice" { + if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest; + + var a: [1]u8 = .{1}; + const b = (comptime @as([]const u8, &.{0})) ++ &a; + const c = &a ++ (comptime @as([]const u8, &.{0})); + try std.testing.expectEqualSlices(u8, &.{ 0, 1 }, b); + try std.testing.expectEqualSlices(u8, &.{ 1, 0 }, c); +} + test "array concat with undefined" { if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest; From 2aee0cd6b95d8154a8245d2d1d7be3f6058c659c Mon Sep 17 00:00:00 2001 From: Kendall Condon Date: Sun, 22 Mar 2026 16:34:04 -0400 Subject: [PATCH 02/11] add an ast smith This generates zig ASTs from `testing.Smith` and is based off the langref's PEG. The choice to not build the Ast while generating and instead parsing it afterwards makes the smith more versatile by not being tied to a single implementation at a cost of efficiency. Additionally, a new function `boolWeighted` was added to `Smith` due to its frequent use in `AstSmith`. --- lib/std/Build/abi.zig | 12 +- lib/std/testing/Smith.zig | 39 +- lib/std/zig.zig | 2 + lib/std/zig/AstSmith.zig | 2602 +++++++++++++++++++++++++++++++++++++ 4 files changed, 2649 insertions(+), 6 deletions(-) create mode 100644 lib/std/zig/AstSmith.zig diff --git a/lib/std/Build/abi.zig b/lib/std/Build/abi.zig index 28f455d73f..ed560bffa7 100644 --- a/lib/std/Build/abi.zig +++ b/lib/std/Build/abi.zig @@ -235,7 +235,8 @@ pub const fuzz = struct { max: u64, weight: u64, - fn intFromValue(x: anytype) u64 { + /// `inline` to propogate comptimeness + inline fn intFromValue(x: anytype) u64 { const T = @TypeOf(x); return switch (@typeInfo(T)) { .comptime_int => x, @@ -269,11 +270,13 @@ pub const fuzz = struct { }; } - pub fn value(T: type, x: T, weight: u64) Weight { + /// `inline` to propogate comptimeness + pub inline fn value(T: type, x: T, weight: u64) Weight { return .{ .min = intFromValue(x), .max = intFromValue(x), .weight = weight }; } - pub fn rangeAtMost(T: type, at_least: T, at_most: T, weight: u64) Weight { + /// `inline` to propogate comptimeness + pub inline fn rangeAtMost(T: type, at_least: T, at_most: T, weight: u64) Weight { std.debug.assert(intFromValue(at_least) <= intFromValue(at_most)); return .{ .min = intFromValue(at_least), @@ -282,7 +285,8 @@ pub const fuzz = struct { }; } - pub fn rangeLessThan(T: type, at_least: T, less_than: T, weight: u64) Weight { + /// `inline` to propogate comptimeness + pub inline fn rangeLessThan(T: type, at_least: T, less_than: T, weight: u64) Weight { std.debug.assert(intFromValue(at_least) < intFromValue(less_than)); return .{ .min = intFromValue(at_least), diff --git a/lib/std/testing/Smith.zig b/lib/std/testing/Smith.zig index 9b1574282b..e36c97c50e 100644 --- a/lib/std/testing/Smith.zig +++ b/lib/std/testing/Smith.zig @@ -205,6 +205,24 @@ pub noinline fn valueRangeLessThan(s: *Smith, T: type, at_least: T, less_than: T return s.valueRangeLessThanWithHash(T, at_least, less_than, firstHash()); } +/// It is asserted `len` is nonzero. +/// It is asserted `len` fits within 64 bits. +// +// `noinline` to capture a unique return address +pub noinline fn index(s: *Smith, len: usize) usize { + @disableInstrumentation(); + return s.indexWithHash(len, firstHash()); +} + +/// It is asserted that the weight of `false` is non-zero. +/// It is asserted that the weight of `true` is non-zero. +// +// `noinline` to capture a unique return address +pub noinline fn boolWeighted(s: *Smith, false_weight: u64, true_weight: u64) bool { + @disableInstrumentation(); + return s.boolWeightedWithHash(false_weight, true_weight, firstHash()); +} + /// This is similar to `value(bool)` however it is gauraunteed to eventually /// return `true` and provides the fuzzer with an extra hint about the data. // @@ -228,6 +246,7 @@ pub noinline fn eosWeighted(s: *Smith, weights: []const Weight) bool { /// This is similar to `value(bool)` however it is gauraunteed to eventually /// return `true` and provides the fuzzer with an extra hint about the data. /// +/// It is asserted that the weight of `false` is non-zero. /// It is asserted that the weight of `true` is non-zero. // // `noinline` to capture a unique return address @@ -463,6 +482,24 @@ pub fn valueRangeLessThanWithHash(s: *Smith, T: type, at_least: T, less_than: T, return s.valueWeightedWithHash(T, &.{.rangeLessThan(T, at_least, less_than, 1)}, hash); } +/// It is asserted `len` is nonzero. +/// It is asserted `len` fits within 64 bits. +pub fn indexWithHash(s: *Smith, len: usize, hash: u32) usize { + @disableInstrumentation(); + assert(len != 0); + return @intCast(s.valueWeightedWithHash(u64, &.{.rangeLessThan(u64, 0, @intCast(len), 1)}, hash)); +} + +/// It is asserted that the weight of `false` is non-zero. +/// It is asserted that the weight of `true` is non-zero. +pub fn boolWeightedWithHash(s: *Smith, false_weight: u64, true_weight: u64, hash: u32) bool { + @disableInstrumentation(); + return s.valueWeightedWithHash(bool, &.{ + .value(bool, false, false_weight), + .value(bool, true, true_weight), + }, hash); +} + /// This is similar to `value(bool)` however it is gauraunteed to eventually /// return `true` and provides the fuzzer with an extra hint about the data. pub fn eosWithHash(s: *Smith, hash: u32) bool { @@ -504,8 +541,6 @@ pub fn eosWeightedWithHash(s: *Smith, weights: []const Weight, hash: u32) bool { /// /// It is asserted that the weight of `false` is non-zero. /// It is asserted that the weight of `true` is non-zero. -// -// `noinline` to capture a unique return address pub fn eosWeightedSimpleWithHash(s: *Smith, false_weight: u64, true_weight: u64, hash: u32) bool { @disableInstrumentation(); return s.eosWeightedWithHash(&.{ diff --git a/lib/std/zig.zig b/lib/std/zig.zig index fdb945c7ce..c2250c2651 100644 --- a/lib/std/zig.zig +++ b/lib/std/zig.zig @@ -23,6 +23,7 @@ pub const primitives = @import("zig/primitives.zig"); pub const isPrimitive = primitives.isPrimitive; pub const Ast = @import("zig/Ast.zig"); pub const AstGen = @import("zig/AstGen.zig"); +pub const AstSmith = @import("zig/AstSmith.zig"); pub const Zir = @import("zig/Zir.zig"); pub const Zoir = @import("zig/Zoir.zig"); pub const ZonGen = @import("zig/ZonGen.zig"); @@ -1165,6 +1166,7 @@ pub const ClangCliParam = struct { test { _ = Ast; _ = AstRlAnnotate; + _ = AstSmith; _ = BuiltinFn; _ = Client; _ = ErrorBundle; diff --git a/lib/std/zig/AstSmith.zig b/lib/std/zig/AstSmith.zig new file mode 100644 index 0000000000..e849ed21d0 --- /dev/null +++ b/lib/std/zig/AstSmith.zig @@ -0,0 +1,2602 @@ +//! Generates a valid AST and corresponding source. +//! +//! This is based directly off grammer.peg + +const std = @import("../std.zig"); +const assert = std.debug.assert; +const Token = std.zig.Token; +const Smith = std.testing.Smith; +const Weight = Smith.Weight; +const AstSmith = @This(); + +smith: *Smith, + +source_buf: [16384]u8, +source_len: usize, + +token_tag_buf: [2048]Token.Tag, +token_start_buf: [2048]std.zig.Ast.ByteOffset, +tokens_len: usize, + +/// For `.asterisk`, this also includes `.asterisk2` +not_token: ?Token.Tag, +not_token_comptime: bool, +/// ExprSuffix +/// <- KEYWORD_or +/// / KEYWORD_and +/// / CompareOp +/// / BitwiseOp +/// / BitShiftOp +/// / AdditionOp +/// / MultiplyOp +/// / EXCLAMATIONMARK +/// / SuffixOp +/// / FnCallArguments +not_expr_suffix: bool, +/// LabelableExpr +/// <- Block +/// / SwitchExpr +/// / LoopExpr +not_labelable_expr: ?enum { colon, expr }, +not_label: bool, +not_break_label: bool, +not_block_expr: bool, +not_expr_statement: bool, + +prev_ids_buf: [256]struct { start: u16, len: u16 }, +/// This may be larger than `prev_ids` in which case, +/// x % prev_ids.len = next index +/// @min(x, prev_ids) = length +prev_ids_len: usize, + +/// `generate` must be called on the returned value before any other methods +pub fn init(smith: *Smith) AstSmith { + return .{ + .smith = smith, + + .source_buf = undefined, + .source_len = 0, + + .token_tag_buf = undefined, + .token_start_buf = undefined, + .tokens_len = 0, + + .not_token = null, + .not_token_comptime = false, + .not_expr_suffix = false, + .not_labelable_expr = null, + .not_label = false, + .not_break_label = false, + .not_block_expr = false, + .not_expr_statement = false, + + .prev_ids_buf = undefined, + .prev_ids_len = 0, + }; +} + +pub fn source(t: *AstSmith) [:0]u8 { + return t.source_buf[0..t.source_len :0]; +} + +/// The Slice is not backed by a MultiArrayList, so calling deinit or toMultiArrayList is illegal. +pub fn tokens(t: *AstSmith) std.zig.Ast.TokenList.Slice { + var slice: std.zig.Ast.TokenList.Slice = .{ + .ptrs = undefined, + .len = t.tokens_len, + .capacity = t.tokens_len, + }; + comptime assert(slice.ptrs.len == 2); + slice.ptrs[@intFromEnum(std.zig.Ast.TokenList.Field.tag)] = @ptrCast(&t.token_tag_buf); + slice.ptrs[@intFromEnum(std.zig.Ast.TokenList.Field.start)] = @ptrCast(&t.token_start_buf); + return slice; +} + +pub const Error = error{ OutOfMemory, SkipZigTest }; +const SourceError = error{SkipZigTest}; + +pub fn generate(a: *AstSmith, gpa: std.mem.Allocator) Error!std.zig.Ast { + try a.generateSource(); + const ast = try std.zig.Ast.parseTokens(gpa, a.source(), a.tokens(), .zig); + assert(ast.errors.len == 0); + return ast; +} + +pub fn generateSource(a: *AstSmith) SourceError!void { + try a.pegRoot(); + try a.ensureSourceCapacity(1); + a.source_buf[a.source_len] = 0; + try a.addTokenTag(.eof); +} + +/// For choices which can introduce a variable number of expressions, this should be used to reduce +/// unbounded recursion. +// +// `inline` to propogate caller's return address +inline fn smithListItemBool(a: *AstSmith) bool { + return a.smith.boolWeighted(63, 1); +} + +/// For choices which can introduce a variable number of expressions, this should be used to reduce +/// unbounded recursion. +// +// `inline` to propogate caller's return address +inline fn smithListItemEos(a: *AstSmith) bool { + return a.smith.eosWeightedSimple(1, 63); +} + +fn sourceCapacity(a: *AstSmith) []u8 { + return a.source_buf[a.source_len..]; +} + +fn sourceCapacityLen(a: *AstSmith) usize { + return a.source_buf.len - a.source_len; +} + +fn ensureSourceCapacity(a: *AstSmith, n: usize) SourceError!void { + if (a.sourceCapacityLen() < n) return error.SkipZigTest; +} + +fn addSourceByte(a: *AstSmith, byte: u8) SourceError!void { + try a.ensureSourceCapacity(1); + a.addSourceByteAssumeCapacity(byte); +} + +fn addSourceByteAssumeCapacity(a: *AstSmith, byte: u8) void { + a.sourceCapacity()[0] = byte; + a.source_len += 1; +} + +fn addSource(a: *AstSmith, bytes: []const u8) SourceError!void { + try a.ensureSourceCapacity(bytes.len); + a.addSourceAssumeCapacity(bytes); +} + +fn addSourceAssumeCapacity(a: *AstSmith, bytes: []const u8) void { + @memcpy(a.sourceCapacity()[0..bytes.len], bytes); + a.source_len += bytes.len; +} + +fn addSourceAsSlice(a: *AstSmith, len: usize) SourceError![]u8 { + try a.ensureSourceCapacity(len); + return a.addSourceAsSliceAssumeCapacity(len); +} + +fn addSourceAsSliceAssumeCapacity(a: *AstSmith, len: usize) []u8 { + const slice = a.sourceCapacity()[0..len]; + a.source_len += len; + return slice; +} + +fn tokenCapacityLen(a: *AstSmith) usize { + return a.token_tag_buf.len - a.tokens_len; +} + +fn ensureTokenCapacity(a: *AstSmith, n: usize) SourceError!void { + if (a.tokenCapacityLen() < n) return error.SkipZigTest; +} + +fn isAlphanumeric(c: u8) bool { + return switch (c) { + '_', 'a'...'z', 'A'...'Z', '0'...'9' => true, + else => false, + }; +} + +/// For tokens starting with alphanumerics, this ensures +/// previous tokens followed by end_of_word aren't altered. +/// +/// end_of_word <- ![a-zA-Z0-9_] skip +fn preservePegEndOfWord(a: *AstSmith) SourceError!void { + if (a.source_len > 0 and isAlphanumeric(a.source_buf[a.source_len - 1])) { + try a.addSourceByte(' '); + } +} + +/// Assumes the token has not been written yet +fn addTokenTag(a: *AstSmith, tag: Token.Tag) SourceError!void { + assert(tag != a.not_token); + if (a.not_token == .asterisk) assert(tag != .asterisk_asterisk); + a.not_token = null; + + if (a.not_token_comptime) assert(tag != .keyword_comptime); + a.not_token_comptime = false; + + if (a.not_label and tag == .identifier) { + a.not_token = .colon; + } + a.not_label = false; + + if (a.not_break_label and tag == .colon) { + a.not_token = .identifier; + } + a.not_break_label = false; + + if (a.not_labelable_expr) |part| switch (part) { + .colon => a.not_labelable_expr = if (tag == .colon) .expr else null, + .expr => switch (tag) { + .l_brace => unreachable, + .keyword_inline => {}, + .keyword_for => unreachable, + .keyword_while => unreachable, + .keyword_switch => unreachable, + else => a.not_labelable_expr = null, + }, + }; + + a.not_expr_suffix = false; + a.not_block_expr = false; + a.not_expr_statement = false; + + try a.ensureTokenCapacity(1); + a.token_tag_buf[a.tokens_len] = tag; + a.token_start_buf[a.tokens_len] = @intCast(a.source_len); + a.tokens_len += 1; +} + +/// Asserts the token has a lexeme (those without have corresponding methods) +fn pegToken(a: *AstSmith, tag: Token.Tag) SourceError!void { + const lexeme = tag.lexeme().?; + + switch (lexeme[0]) { + '_', 'a'...'z', 'A'...'Z', '0'...'9' => try a.preservePegEndOfWord(), + '*' => if (a.tokens_len > 0 and a.source_buf[a.source_len - 1] == '*' and + a.token_tag_buf[a.tokens_len - 1] != .asterisk_asterisk) + { + try a.addSourceByte(' '); + }, + '.' => if (a.tokens_len > 0 and switch (a.source_buf[a.source_len - 1]) { + '.' => true, + '0'...'9', 'a'...'z', 'A'...'Z' => a.token_tag_buf[a.tokens_len - 1] == .number_literal, + else => false, + }) { + try a.addSourceByte(' '); + }, + '+', '-' => if (a.tokens_len > 0 and a.token_tag_buf[a.tokens_len - 1] == .number_literal and + switch (a.source_buf[a.source_len - 1]) { + 'e', 'E', 'p', 'P' => true, + else => false, + }) + { + // Would otherwise be tokenized as the sign of a float's exponent + // + // e.g. "0xFE" ++ "+" ++ "2" (number_literal, plus, number_literal) + try a.addSourceByte(' '); + }, + else => {}, + } + + if (isAlphanumeric(lexeme[0])) try a.preservePegEndOfWord(); + + try a.addTokenTag(tag); + try a.addSource(lexeme); + try a.pegSkip(); +} + +/// Asserts `a.source_len != 0` +fn pegTokenWhitespaceAround(a: *AstSmith, tag: Token.Tag) SourceError!void { + switch (a.source_buf[a.source_len - 1]) { + ' ', '\n' => {}, + else => try a.addSourceByte(' '), + } + try a.addTokenTag(tag); + try a.addSource(tag.lexeme().?); + switch (a.smith.value(enum { space, line_break, cr_line_break })) { + // This is not the same as 'skip' since comments are not whitespace + .space => try a.addSourceByte(' '), + .line_break => try a.addSourceByte('\n'), + .cr_line_break => try a.addSource("\r\n"), + } + try a.pegSkip(); +} + +/// Root <- skip ContainerMembers eof +fn pegRoot(a: *AstSmith) SourceError!void { + try a.pegSkip(); + try a.pegContainerMembers(); +} + +/// ContainerMembers <- container_doc_comment? ContainerDeclaration* (ContainerField COMMA)* +/// (ContainerField / ContainerDeclaration*) +fn pegContainerMembers(a: *AstSmith) SourceError!void { + if (a.smith.boolWeighted(63, 1)) { + try a.pegContainerDocComment(); + } + while (!a.smithListItemEos()) { + try a.pegContainerDeclaration(); + } + while (!a.smithListItemEos()) { + try a.pegContainerField(); + try a.pegToken(.comma); + } + if (a.smithListItemBool()) { + if (a.smith.value(bool)) { + try a.pegContainerField(); + } else while (true) { + try a.pegContainerDeclaration(); + if (a.smithListItemEos()) break; + } + } +} + +/// ContainerDeclaration <- TestDecl / ComptimeDecl / doc_comment? KEYWORD_pub? Decl +fn pegContainerDeclaration(a: *AstSmith) SourceError!void { + switch (a.smith.value(enum { TestDecl, ComptimeDecl, Decl })) { + .TestDecl => try a.pegTestDecl(), + .ComptimeDecl => try a.pegComptimeDecl(), + .Decl => { + try a.pegMaybeDocComment(); + if (a.smith.value(bool)) { + try a.pegToken(.keyword_pub); + } + try a.pegDecl(); + }, + } +} + +/// KEYWORD_test (STRINGLITERALSINGLE / IDENTIFIER)? Block +fn pegTestDecl(a: *AstSmith) SourceError!void { + try a.pegToken(.keyword_test); + switch (a.smith.value(enum { none, string, id })) { + .none => {}, + .string => try a.pegStringLiteralSingle(), + .id => try a.pegIdentifier(), + } + try a.pegBlock(); +} + +/// ComptimeDecl <- KEYWORD_comptime Block +fn pegComptimeDecl(a: *AstSmith) SourceError!void { + try a.pegToken(.keyword_comptime); + try a.pegBlock(); +} + +/// Decl +/// <- (KEYWORD_export / KEYWORD_inline / KEYWORD_noinline)? FnProto (SEMICOLON / Block) +/// / KEYWORD_extern STRINGLITERALSINGLE? FnProto SEMICOLON +/// / (KEYWORD_export / KEYWORD_extern STRINGLITERALSINGLE?)? KEYWORD_threadlocal? +/// GlobalVarDecl +fn pegDecl(a: *AstSmith) SourceError!void { + const Modifier = enum(u8) { + none, + @"export", + @"extern", + extern_library, + @"inline", + @"noinline", + }; + const is_fn = a.smith.value(bool); + const fn_modifiers = Smith.baselineWeights(Modifier); + const var_modifiers: []const Weight = &.{.rangeAtMost(Modifier, .none, .extern_library, 1)}; + const modifier = a.smith.valueWeighted(Modifier, if (is_fn) fn_modifiers else var_modifiers); + + switch (modifier) { + .none => {}, + .@"export" => try a.pegToken(.keyword_export), + .@"extern" => try a.pegToken(.keyword_extern), + .extern_library => { + try a.pegToken(.keyword_extern); + try a.pegStringLiteralSingle(); + }, + .@"inline" => try a.pegToken(.keyword_inline), + .@"noinline" => try a.pegToken(.keyword_noinline), + } + + if (is_fn) { + try a.pegFnProto(); + if (modifier == .@"extern" or modifier == .extern_library or a.smith.value(bool)) { + try a.pegToken(.semicolon); + } else { + try a.pegBlock(); + } + } else { + if (a.smith.value(bool)) try a.pegToken(.keyword_threadlocal); + try a.pegGlobalVarDecl(); + } +} + +/// FnProto <- KEYWORD_fn IDENTIFIER? LPAREN ParamDeclList RPAREN ByteAlign? AddrSpace? +/// LinkSection? CallConv? EXCLAMATIONMARK? TypeExpr !ExprSuffix +fn pegFnProto(a: *AstSmith) SourceError!void { + try a.pegToken(.keyword_fn); + if (a.smith.value(bool)) { + try a.pegIdentifier(); + } + try a.pegToken(.l_paren); + try a.pegParamDeclList(); + try a.pegToken(.r_paren); + if (a.smith.value(bool)) { + try a.pegByteAlign(); + } + if (a.smith.value(bool)) { + try a.pegAddrSpace(); + } + if (a.smith.value(bool)) { + try a.pegLinkSection(); + } + if (a.smith.value(bool)) { + try a.pegCallConv(); + } + if (a.smith.value(bool)) { + try a.pegToken(.bang); + } + try a.pegTypeExpr(); + a.not_expr_suffix = true; +} + +/// VarDeclProto <- (KEYWORD_const / KEYWORD_var) IDENTIFIER (COLON TypeExpr)? ByteAlign? +/// AddrSpace? LinkSection? +fn pegVarDeclProto(a: *AstSmith) SourceError!void { + try a.pegToken(if (a.smith.value(bool)) .keyword_var else .keyword_const); + try a.pegIdentifier(); + + if (a.smith.value(bool)) { + try a.pegToken(.colon); + try a.pegTypeExpr(); + } + + if (a.smith.value(bool)) { + try a.pegByteAlign(); + } + + if (a.smith.value(bool)) { + try a.pegAddrSpace(); + } + + if (a.smith.value(bool)) { + try a.pegLinkSection(); + } +} + +/// GlobalVarDecl <- VarDeclProto (EQUAL Expr)? SEMICOLON +fn pegGlobalVarDecl(a: *AstSmith) SourceError!void { + try a.pegVarDeclProto(); + if (a.smithListItemBool()) { + try a.pegToken(.equal); + try a.pegExpr(); + } + try a.pegToken(.semicolon); +} + +/// ContainerField <- doc_comment? (KEYWORD_comptime / !KEYWORD_comptime) !KEYWORD_fn +/// (IDENTIFIER COLON !(IDENTIFIER COLON)) TypeExpr ByteAlign? (EQUAL Expr)? +fn pegContainerField(a: *AstSmith) SourceError!void { + try a.pegMaybeDocComment(); + if (a.smith.value(bool)) { + try a.pegToken(.keyword_comptime); + } + if (a.smith.value(bool)) { + try a.pegIdentifier(); + try a.pegToken(.colon); + } else { + a.not_token = .keyword_fn; + a.not_token_comptime = true; + a.not_label = true; + } + try a.pegTypeExpr(); + if (a.smith.value(bool)) { + try a.pegByteAlign(); + } + if (a.smith.value(bool)) { + try a.pegToken(.equal); + try a.pegExpr(); + } +} + +/// BlockStatement +/// <- Statement +/// / KEYWORD_defer BlockExprStatement +/// / KEYWORD_errdefer Payload? BlockExprStatement +/// / !ExprStatement (KEYWORD_comptime !BlockExpr)? VarAssignStatement +fn pegBlockStatement(a: *AstSmith) SourceError!void { + const Kind = enum { + statement, + defer_statement, + errdefer_statement, + var_assign, + comptime_var_assign, + }; + const weights = Smith.baselineWeights(Kind) ++ &[1]Weight{.value(Kind, .statement, 4)}; + switch (a.smith.valueWeighted(Kind, weights)) { + .statement => try a.pegStatement(), + .defer_statement, .errdefer_statement => |kind| { + try a.pegToken(switch (kind) { + .defer_statement => .keyword_defer, + .errdefer_statement => .keyword_errdefer, + else => unreachable, + }); + try a.pegBlockExprStatement(); + }, + .var_assign, .comptime_var_assign => |kind| { + a.not_expr_statement = true; + if (kind == .comptime_var_assign) { + try a.pegToken(.keyword_comptime); + a.not_block_expr = true; + } + try a.pegVarAssignStatement(); + }, + } +} + +/// Statement +/// <- ExprStatement +/// / KEYWORD_suspend BlockExprStatement +/// / !ExprStatement (KEYWORD_comptime !BlockExpr)? AssignExpr SEMICOLON +/// +/// ExprStatement +/// <- IfStatement +/// / LabeledStatement +/// / KEYWORD_nosuspend BlockExprStatement +/// / KEYWORD_comptime BlockExpr +fn pegStatement(a: *AstSmith) SourceError!void { + switch (a.smith.value(enum { + if_statement, + labeled_statement, + comptime_block_expr, + + nosuspend_statement, + suspend_statement, + assign_expr, + comptime_assign_expr, + })) { + .if_statement => try a.pegIfStatement(), + .labeled_statement => try a.pegLabeledStatement(), + .comptime_block_expr => { + try a.pegToken(.keyword_comptime); + try a.pegBlockExpr(); + }, + + .nosuspend_statement, + .suspend_statement, + => |kind| { + try a.pegToken(switch (kind) { + .nosuspend_statement => .keyword_nosuspend, + .suspend_statement => .keyword_suspend, + else => unreachable, + }); + try a.pegBlockExprStatement(); + }, + .assign_expr, .comptime_assign_expr => |kind| { + a.not_expr_statement = true; + if (kind == .comptime_assign_expr) { + try a.pegToken(.keyword_comptime); + a.not_block_expr = true; + } + try a.pegAssignExpr(); + try a.pegToken(.semicolon); + }, + } +} + +/// IfStatement +/// <- IfPrefix BlockExpr ( KEYWORD_else Payload? Statement )? +/// / IfPrefix !BlockExpr AssignExpr ( SEMICOLON / KEYWORD_else Payload? Statement ) +fn pegIfStatement(a: *AstSmith) SourceError!void { + try a.pegIfPrefix(); + const is_assign = a.smith.value(bool); + if (!is_assign) { + try a.pegBlockExpr(); + } else { + a.not_block_expr = true; + try a.pegAssignExpr(); + } + if (a.not_token != .keyword_else and a.smithListItemBool()) { + try a.pegToken(.keyword_else); + if (a.smith.value(bool)) { + try a.pegPayload(); + } + try a.pegStatement(); + } else if (is_assign) { + try a.pegToken(.semicolon); + } else { + a.not_token = .keyword_else; + } +} + +/// LabeledStatement <- BlockLabel? (Block / LoopStatement / SwitchExpr) +fn pegLabeledStatement(a: *AstSmith) SourceError!void { + if (a.smith.value(bool)) { + try a.pegBlockLabel(); + } + switch (a.smith.value(enum { block, loop_statement, switch_expr })) { + .block => try a.pegBlock(), + .loop_statement => try a.pegLoopStatement(), + .switch_expr => try a.pegSwitchExpr(), + } +} + +/// LoopStatement <- KEYWORD_inline? (ForStatement / WhileStatement) +fn pegLoopStatement(a: *AstSmith) SourceError!void { + if (a.smith.value(bool)) { + try a.pegToken(.keyword_inline); + } + if (a.smith.value(bool)) { + try a.pegForStatement(); + } else { + try a.pegWhileStatement(); + } +} + +/// ForStatement +/// <- ForPrefix BlockExpr ( KEYWORD_else Statement / !KEYWORD_else ) +/// / ForPrefix !BlockExpr AssignExpr ( SEMICOLON / KEYWORD_else Statement ) +fn pegForStatement(a: *AstSmith) SourceError!void { + try a.pegForPrefix(); + const is_assign = a.smith.value(bool); + if (!is_assign) { + try a.pegBlockExpr(); + } else { + a.not_block_expr = true; + try a.pegAssignExpr(); + } + if (a.not_token != .keyword_else and a.smithListItemBool()) { + try a.pegToken(.keyword_else); + try a.pegStatement(); + } else if (is_assign) { + try a.pegToken(.semicolon); + } else { + a.not_token = .keyword_else; + } +} + +/// WhileStatement +/// <- WhilePrefix BlockExpr ( KEYWORD_else Payload? Statement )? +/// / WhilePrefix !BlockExpr AssignExpr ( SEMICOLON / KEYWORD_else Payload? Statement ) +fn pegWhileStatement(a: *AstSmith) SourceError!void { + try a.pegWhilePrefix(); + const is_assign = a.smith.value(bool); + if (!is_assign) { + try a.pegBlockExpr(); + } else { + a.not_block_expr = true; + try a.pegAssignExpr(); + } + if (a.not_token != .keyword_else and a.smithListItemBool()) { + try a.pegToken(.keyword_else); + if (a.smith.value(bool)) { + try a.pegPayload(); + } + try a.pegStatement(); + } else if (is_assign) { + try a.pegToken(.semicolon); + } else { + a.not_token = .keyword_else; + } +} + +/// BlockExprStatement +/// <- BlockExpr +/// / !BlockExpr AssignExpr SEMICOLON +fn pegBlockExprStatement(a: *AstSmith) SourceError!void { + if (a.smith.value(bool)) { + try a.pegBlockExpr(); + } else { + a.not_block_expr = true; + try a.pegAssignExpr(); + try a.pegToken(.semicolon); + } +} + +/// BlockExpr <- BlockLabel? Block +fn pegBlockExpr(a: *AstSmith) SourceError!void { + if (a.smith.value(bool)) { + try a.pegBlockLabel(); + } + try a.pegBlock(); +} + +/// VarAssignStatement <- (Expr / VarDeclProto) (COMMA (Expr / VarDeclProto))* EQUAL Expr SEMICOLON +fn pegVarAssignStatement(a: *AstSmith) SourceError!void { + while (true) { + if (a.smith.value(bool)) { + try a.pegVarDeclProto(); + } else { + try a.pegExpr(); + } + + if (a.smithListItemEos()) { + break; + } else { + try a.pegToken(.comma); + } + } + + try a.pegToken(.equal); + try a.pegExpr(); + try a.pegToken(.semicolon); +} + +/// AssignExpr <- Expr (AssignOp Expr / (COMMA Expr)+ EQUAL Expr)? +fn pegAssignExpr(a: *AstSmith) SourceError!void { + try a.pegExpr(); + if (a.smith.value(bool)) { + if (!a.smithListItemBool()) { + try a.pegAssignOp(); + } else { + while (true) { + try a.pegToken(.comma); + try a.pegExpr(); + if (a.smithListItemEos()) break; + } + try a.pegToken(.equal); + } + try a.pegExpr(); + } +} + +/// SingleAssignExpr <- Expr (AssignOp Expr)? +fn pegSingleAssignExpr(a: *AstSmith) SourceError!void { + try a.pegExpr(); + if (a.smith.value(bool)) { + try a.pegAssignOp(); + try a.pegExpr(); + } +} + +/// Expr <- BoolOrExpr +const pegExpr = pegBoolOrExpr; + +/// BoolOrExpr <- BoolAndExpr (KEYWORD_or BoolAndExpr)* +fn pegBoolOrExpr(a: *AstSmith) SourceError!void { + try a.pegBoolAndExpr(); + while (!a.not_expr_suffix and !a.smithListItemEos()) { + try a.pegTokenWhitespaceAround(.keyword_or); + try a.pegBoolAndExpr(); + } +} + +/// BoolAndExpr <- CompareExpr (KEYWORD_and CompareExpr)* +fn pegBoolAndExpr(a: *AstSmith) SourceError!void { + try a.pegCompareExpr(); + while (!a.not_expr_suffix and !a.smithListItemEos()) { + try a.pegTokenWhitespaceAround(.keyword_and); + try a.pegCompareExpr(); + } +} + +/// CompareExpr <- BitwiseExpr (CompareOp BitwiseExpr)? +fn pegCompareExpr(a: *AstSmith) SourceError!void { + try a.pegBitwiseExpr(); + if (!a.not_expr_suffix and a.smithListItemBool()) { + try a.pegCompareOp(); + try a.pegBitwiseExpr(); + } +} + +/// BitwiseExpr <- BitShiftExpr (BitwiseOp BitShiftExpr)* +fn pegBitwiseExpr(a: *AstSmith) SourceError!void { + try a.pegBitShiftExpr(); + while (!a.not_expr_suffix and !a.smithListItemEos()) { + try a.pegBitwiseOp(); + try a.pegBitShiftExpr(); + } +} + +/// BitShiftExpr <- AdditionExpr (BitShiftOp AdditionExpr)* +fn pegBitShiftExpr(a: *AstSmith) SourceError!void { + try a.pegAdditionExpr(); + while (!a.not_expr_suffix and !a.smithListItemEos()) { + try a.pegBitShiftOp(); + try a.pegAdditionExpr(); + } +} + +/// AdditionExpr <- MultiplyExpr (AdditionOp MultiplyExpr)* +fn pegAdditionExpr(a: *AstSmith) SourceError!void { + try a.pegMultiplyExpr(); + while (!a.not_expr_suffix and !a.smithListItemEos()) { + try a.pegAdditionOp(); + try a.pegMultiplyExpr(); + } +} + +/// MultiplyExpr <- PrefixExpr (MultiplyOp PrefixExpr)* +fn pegMultiplyExpr(a: *AstSmith) SourceError!void { + try a.pegPrefixExpr(); + while (!a.not_expr_suffix and !a.smithListItemEos()) { + try a.pegMultiplyOp(); + try a.pegPrefixExpr(); + } +} + +/// PrefixExpr <- PrefixOp* PrimaryExpr +fn pegPrefixExpr(a: *AstSmith) SourceError!void { + while (!a.smithListItemEos()) { + try a.pegPrefixOp(); + } + try a.pegPrimaryExpr(); +} + +/// PrimaryExpr +/// <- AsmExpr +/// / IfExpr +/// / KEYWORD_break (BreakLabel / !BreakLabel) (Expr !ExprSuffix / !SinglePtrTypeStart) +/// / KEYWORD_comptime Expr !ExprSuffix +/// / KEYWORD_nosuspend Expr !ExprSuffix +/// / KEYWORD_continue (BreakLabel / !BreakLabel) (Expr !ExprSuffix / !SinglePtrTypeStart) +/// / KEYWORD_resume Expr !ExprSuffix +/// / KEYWORD_return (Expr !ExprSuffix / !SinglePtrTypeStart) +/// / BlockLabel? LoopExpr +/// / Block +/// / CurlySuffixExpr +fn pegPrimaryExpr(a: *AstSmith) SourceError!void { + const Kind = enum(u8) { + curly_suffix_expr, + @"return", + @"continue", + @"break", + block, + asm_expr, + // Always contain more expressions + if_expr, + loop_expr, + @"resume", + @"comptime", + @"nosuspend", + }; + + switch (a.smith.valueWeighted(Kind, &.{ + .value(Kind, .curly_suffix_expr, 75), + .rangeAtMost(Kind, .@"return", .asm_expr, 4), + .rangeAtMost(Kind, .if_expr, .@"nosuspend", 1), + })) { + .curly_suffix_expr => try a.pegCurlySuffixExpr(), + + .block => if (a.not_labelable_expr != .expr and !a.not_block_expr and !a.not_expr_statement) { + try a.pegBlock(); + } else { + // Group + try a.pegToken(.l_paren); + try a.pegBlock(); + try a.pegToken(.r_paren); + }, + .asm_expr => try a.pegAsmExpr(), + .if_expr => if (!a.not_expr_statement) { + try a.pegIfExpr(); + } else { + // Group + try a.pegToken(.l_paren); + try a.pegIfExpr(); + try a.pegToken(.r_paren); + }, + .loop_expr => { + const group = a.not_labelable_expr == .expr or a.not_expr_statement; + if (group) try a.pegToken(.l_paren); + if (!a.not_label and a.not_token != .identifier and a.smith.value(bool)) { + try a.pegBlockLabel(); + } + try a.pegLoopExpr(); + if (group) try a.pegToken(.r_paren); + }, + + .@"return", + .@"comptime", + .@"nosuspend", + .@"resume", + .@"break", + .@"continue", + => |t| { + const group = a.not_expr_statement and (t == .@"nosuspend" or t == .@"comptime"); + if (group) try a.pegToken(.l_paren); + + const kw: Token.Tag, const label, const expr = switch (t) { + .@"return" => .{ .keyword_return, false, a.smithListItemBool() }, + .@"comptime" => .{ .keyword_comptime, false, true }, + .@"nosuspend" => .{ .keyword_nosuspend, false, true }, + .@"resume" => .{ .keyword_resume, false, true }, + .@"break" => .{ .keyword_break, a.smith.value(bool), a.smithListItemBool() }, + .@"continue" => .{ .keyword_continue, a.smith.value(bool), a.smithListItemBool() }, + else => unreachable, + }; + try a.pegToken(kw); + if (label) { + try a.pegBreakLabel(); + } else { + a.not_break_label = true; + } + if (expr) { + try a.pegExpr(); + a.not_expr_suffix = true; + } else { + a.not_token = .asterisk; + } + + if (group) try a.pegToken(.r_paren); + }, + } +} + +/// IfExpr <- IfPrefix Expr (KEYWORD_else Payload? Expr)? !ExprSuffix +fn pegIfExpr(a: *AstSmith) SourceError!void { + try a.pegIfPrefix(); + try a.pegExpr(); + const Else = enum { none, @"else", else_payload }; + switch (if (a.not_token != .keyword_else) a.smith.value(Else) else .none) { + .none => a.not_token = .keyword_else, + .@"else" => { + try a.pegToken(.keyword_else); + try a.pegExpr(); + }, + .else_payload => { + try a.pegToken(.keyword_else); + try a.pegPayload(); + try a.pegExpr(); + }, + } + a.not_expr_suffix = true; +} + +/// Block <- LBRACE Statement* RBRACE +fn pegBlock(a: *AstSmith) SourceError!void { + try a.pegToken(.l_brace); + while (!a.smithListItemEos()) { + try a.pegBlockStatement(); + } + try a.pegToken(.r_brace); +} + +/// LoopExpr <- KEYWORD_inline? (ForExpr / WhileExpr) +fn pegLoopExpr(a: *AstSmith) SourceError!void { + if (a.smith.value(bool)) { + try a.pegToken(.keyword_inline); + } + + if (a.smith.value(bool)) { + try a.pegForExpr(); + } else { + try a.pegWhileExpr(); + } +} + +/// ForExpr <- ForPrefix Expr (KEYWORD_else Expr / !KEYWORD_else) !ExprSuffix +fn pegForExpr(a: *AstSmith) SourceError!void { + try a.pegForPrefix(); + try a.pegExpr(); + if (a.not_token != .keyword_else and a.smith.value(bool)) { + try a.pegToken(.keyword_else); + try a.pegExpr(); + } else { + a.not_token = .keyword_else; + } + a.not_expr_suffix = true; +} + +/// WhileExpr <- WhilePrefix Expr (KEYWORD_else Payload? Expr)? !ExprSuffix +fn pegWhileExpr(a: *AstSmith) SourceError!void { + try a.pegWhilePrefix(); + try a.pegExpr(); + const Else = enum { none, @"else", else_payload }; + switch (if (a.not_token != .keyword_else) a.smith.value(Else) else .none) { + .none => a.not_token = .keyword_else, + .@"else" => { + try a.pegToken(.keyword_else); + try a.pegExpr(); + }, + .else_payload => { + try a.pegToken(.keyword_else); + try a.pegPayload(); + try a.pegExpr(); + }, + } + a.not_expr_suffix = true; +} + +/// CurlySuffixExpr <- TypeExpr InitList? +fn pegCurlySuffixExpr(a: *AstSmith) SourceError!void { + try a.pegTypeExpr(); + if (!a.not_expr_suffix and a.smith.value(bool)) { + try a.pegInitList(); + } +} + +/// InitList +/// <- LBRACE FieldInit (COMMA FieldInit)* COMMA? RBRACE +/// / LBRACE Expr (COMMA Expr)* COMMA? RBRACE +/// / LBRACE RBRACE +fn pegInitList(a: *AstSmith) SourceError!void { + try a.pegToken(.l_brace); + if (a.smithListItemBool()) { + if (a.smith.value(bool)) { + try a.pegFieldInit(); + while (!a.smithListItemEos()) { + try a.pegToken(.comma); + try a.pegFieldInit(); + } + } else { + try a.pegExpr(); + while (!a.smithListItemEos()) { + try a.pegToken(.comma); + try a.pegExpr(); + } + } + if (a.smith.value(bool)) { + try a.pegToken(.comma); + } + } + try a.pegToken(.r_brace); +} + +/// PrefixTypeOp* ErrorUnionExpr +fn pegTypeExpr(a: *AstSmith) SourceError!void { + while (!a.smithListItemEos()) { + try a.pegPrefixTypeOp(); + } + try a.pegErrorUnionExpr(); +} + +/// ErrorUnionExpr <- SuffixExpr (EXCLAMATIONMARK TypeExpr)? +fn pegErrorUnionExpr(a: *AstSmith) SourceError!void { + try a.pegSuffixExpr(); + if (!a.not_expr_suffix and a.smithListItemBool()) { + try a.pegToken(.bang); + try a.pegTypeExpr(); + } +} + +/// SuffixExpr +/// <- PrimaryTypeExpr (SuffixOp / FnCallArguments)* +fn pegSuffixExpr(a: *AstSmith) SourceError!void { + try a.pegPrimaryTypeExpr(); + while (!a.not_expr_suffix and !a.smithListItemEos()) { + if (a.smith.value(bool)) { + try a.pegSuffixOp(); + } else { + try a.pegFnCallArguments(); + } + } +} + +/// PrimaryTypeExpr +/// <- BUILTINIDENTIFIER FnCallArguments +/// / CHAR_LITERAL +/// / ContainerDecl +/// / DOT IDENTIFIER +/// / DOT InitList +/// / ErrorSetDecl +/// / FLOAT +/// / FnProto +/// / GroupedExpr +/// / LabeledTypeExpr +/// / IDENTIFIER !(COLON LabelableExpr) +/// / IfTypeExpr +/// / INTEGER +/// / KEYWORD_comptime TypeExpr !ExprSuffix +/// / KEYWORD_error DOT IDENTIFIER +/// / KEYWORD_anyframe +/// / KEYWORD_unreachable +/// / STRINGLITERAL +fn pegPrimaryTypeExpr(a: *AstSmith) SourceError!void { + const Kind = enum(u8) { + identifier, + float, + integer, + char_literal, + string_literal, + enum_literal, + error_literal, + unreachable_type, + anyframe_type, + + // Containing zero or more expressions + builtin_call, + array_literal, + container_decl, + fn_proto, + error_set, + + // Containing one or more epressions + grouped, + labeled_type_expr, + if_type_expr, + comptime_expr, + }; + + switch (a.smith.valueWeighted(Kind, &.{ + .rangeAtMost(Kind, .identifier, .anyframe_type, 5), + .rangeAtMost(Kind, .builtin_call, .error_set, 2), + .rangeAtMost(Kind, .grouped, .comptime_expr, 1), + })) { + .identifier => if (a.not_token != .identifier) { + try a.pegIdentifier(); + a.not_labelable_expr = .colon; + } else { + // Group + try a.pegToken(.l_paren); + try a.pegIdentifier(); + try a.pegToken(.r_paren); + }, + .float => try a.pegFloat(), + .integer => try a.pegInteger(), + .char_literal => try a.pegCharLiteral(), + .string_literal => try a.pegStringLiteral(), + .enum_literal => { + try a.pegToken(.period); + try a.pegIdentifier(); + }, + .error_literal => { + try a.pegToken(.keyword_error); + try a.pegToken(.period); + try a.pegIdentifier(); + }, + .unreachable_type => try a.pegToken(.keyword_unreachable), + .anyframe_type => try a.pegToken(.keyword_anyframe), + + .builtin_call => { + try a.pegBuiltinIdentifier(); + try a.pegFnCallArguments(); + }, + .array_literal => { + try a.pegToken(.period); + try a.pegInitList(); + }, + .container_decl => try a.pegContainerDecl(), + .fn_proto => if (a.not_token != .keyword_fn) { + try a.pegFnProto(); + } else { + // Group + try a.pegToken(.l_paren); + try a.pegFnProto(); + try a.pegToken(.r_paren); + }, + .error_set => try a.pegErrorSetDecl(), + + .grouped => try a.pegGroupedExpr(), + .labeled_type_expr => try a.pegLabeledTypeExpr(), + .if_type_expr => if (!a.not_expr_statement) { + try a.pegIfTypeExpr(); + } else { + // Group + try a.pegToken(.l_paren); + try a.pegIfTypeExpr(); + try a.pegToken(.r_paren); + }, + .comptime_expr => if (!a.not_token_comptime and !a.not_expr_statement) { + try a.pegToken(.keyword_comptime); + try a.pegTypeExpr(); + } else { + // Group + try a.pegToken(.l_paren); + try a.pegToken(.keyword_comptime); + try a.pegTypeExpr(); + try a.pegToken(.r_paren); + }, + } +} + +/// ContainerDecl <- (KEYWORD_extern / KEYWORD_packed)? ContainerDeclAuto +fn pegContainerDecl(a: *AstSmith) SourceError!void { + switch (a.smith.value(enum { auto, @"extern", @"packed" })) { + .auto => {}, + .@"extern" => try a.pegToken(.keyword_extern), + .@"packed" => try a.pegToken(.keyword_packed), + } + try a.pegContainerDeclAuto(); +} + +/// ErrorSetDecl <- KEYWORD_error LBRACE IdentifierList RBRACE +fn pegErrorSetDecl(a: *AstSmith) SourceError!void { + try a.pegToken(.keyword_error); + try a.pegToken(.l_brace); + try a.pegIdentifierList(); + try a.pegToken(.r_brace); +} + +/// GroupedExpr <- LPAREN Expr RPAREN +fn pegGroupedExpr(a: *AstSmith) SourceError!void { + try a.pegToken(.l_paren); + try a.pegExpr(); + try a.pegToken(.r_paren); +} + +/// IfTypeExpr <- IfPrefix TypeExpr (KEYWORD_else Payload? TypeExpr)? !ExprSuffix +fn pegIfTypeExpr(a: *AstSmith) SourceError!void { + try a.pegIfPrefix(); + try a.pegTypeExpr(); + const Else = enum { none, @"else", else_payload }; + switch (if (a.not_token != .keyword_else) a.smith.value(Else) else .none) { + .none => a.not_token = .keyword_else, + .@"else" => { + try a.pegToken(.keyword_else); + try a.pegTypeExpr(); + }, + .else_payload => { + try a.pegToken(.keyword_else); + try a.pegPayload(); + try a.pegTypeExpr(); + }, + } + a.not_expr_suffix = true; +} + +/// LabeledTypeExpr +/// <- BlockLabel Block +/// / BlockLabel? LoopTypeExpr +/// / BlockLabel? SwitchExpr +fn pegLabeledTypeExpr(a: *AstSmith) SourceError!void { + const kind = a.smith.value(enum { block, loop, @"switch" }); + const not_any = a.not_labelable_expr == .expr or a.not_expr_statement; + const no_label = a.not_label or a.not_token == .identifier; + const no_block = no_label or a.not_block_expr; + const group = not_any or (kind == .block and no_block); + if (group) try a.pegToken(.l_paren); + + switch (kind) { + .block => { + try a.pegBlockLabel(); + try a.pegBlock(); + }, + .loop => { + if (!no_label and a.smith.value(bool)) { + try a.pegBlockLabel(); + } + try a.pegLoopTypeExpr(); + }, + .@"switch" => { + if (!no_label and a.smith.value(bool)) { + try a.pegBlockLabel(); + } + try a.pegSwitchExpr(); + }, + } + + if (group) try a.pegToken(.r_paren); +} + +/// LoopTypeExpr <- KEYWORD_inline? (ForTypeExpr / WhileTypeExpr) +fn pegLoopTypeExpr(a: *AstSmith) SourceError!void { + if (a.smith.value(bool)) { + try a.pegToken(.keyword_inline); + } + + if (a.smith.value(bool)) { + try a.pegForTypeExpr(); + } else { + try a.pegWhileTypeExpr(); + } +} + +/// ForTypeExpr <- ForPrefix TypeExpr (KEYWORD_else TypeExpr / !KEYWORD_else) !ExprSuffix +fn pegForTypeExpr(a: *AstSmith) SourceError!void { + try a.pegForPrefix(); + try a.pegTypeExpr(); + if (a.not_token != .keyword_else and a.smith.value(bool)) { + try a.pegToken(.keyword_else); + try a.pegTypeExpr(); + } else { + a.not_token = .keyword_else; + } + a.not_expr_suffix = true; +} + +/// WhileTypeExpr <- WhilePrefix TypeExpr (KEYWORD_else Payload? TypeExpr)? !ExprSuffix +fn pegWhileTypeExpr(a: *AstSmith) SourceError!void { + try a.pegWhilePrefix(); + try a.pegTypeExpr(); + const Else = enum { none, @"else", else_payload }; + switch (if (a.not_token != .keyword_else) a.smith.value(Else) else .none) { + .none => a.not_token = .keyword_else, + .@"else" => { + try a.pegToken(.keyword_else); + try a.pegTypeExpr(); + }, + .else_payload => { + try a.pegToken(.keyword_else); + try a.pegPayload(); + try a.pegTypeExpr(); + }, + } + a.not_expr_suffix = true; +} + +/// SwitchExpr <- KEYWORD_switch LPAREN Expr RPAREN LBRACE SwitchProngList RBRACE +fn pegSwitchExpr(a: *AstSmith) SourceError!void { + try a.pegToken(.keyword_switch); + try a.pegToken(.l_paren); + try a.pegExpr(); + try a.pegToken(.r_paren); + + try a.pegToken(.l_brace); + try a.pegSwitchProngList(); + try a.pegToken(.r_brace); +} + +/// AsmExpr <- KEYWORD_asm KEYWORD_volatile? LPAREN Expr AsmOutput? RPAREN +fn pegAsmExpr(a: *AstSmith) SourceError!void { + try a.pegToken(.keyword_asm); + if (a.smith.value(bool)) { + try a.pegToken(.keyword_volatile); + } + try a.pegToken(.l_paren); + try a.pegExpr(); + if (a.smith.value(bool)) { + try a.pegAsmOutput(); + } + try a.pegToken(.r_paren); +} + +/// AsmOutput <- COLON AsmOutputList AsmInput? +fn pegAsmOutput(a: *AstSmith) SourceError!void { + try a.pegToken(.colon); + try a.pegAsmOutputList(); + if (a.smith.value(bool)) { + try a.pegAsmInput(); + } +} + +/// AsmOutputItem <- LBRACKET IDENTIFIER RBRACKET STRINGLITERALSINGLE LPAREN (MINUSRARROW TypeExpr / IDENTIFIER) RPAREN +fn pegAsmOutputItem(a: *AstSmith) SourceError!void { + try a.pegToken(.l_bracket); + try a.pegIdentifier(); + try a.pegToken(.r_bracket); + try a.pegStringLiteralSingle(); + try a.pegToken(.l_paren); + if (a.smith.value(bool)) { + try a.pegToken(.arrow); + try a.pegTypeExpr(); + } else { + try a.pegIdentifier(); + } + try a.pegToken(.r_paren); +} + +/// AsmInput <- COLON AsmInputList AsmClobbers? +fn pegAsmInput(a: *AstSmith) SourceError!void { + try a.pegToken(.colon); + try a.pegAsmInputList(); + if (a.smith.value(bool)) { + try a.pegAsmClobbers(); + } +} + +/// AsmInputItem <- LBRACKET IDENTIFIER RBRACKET STRINGLITERALSINGLE LPAREN Expr RPAREN +fn pegAsmInputItem(a: *AstSmith) SourceError!void { + try a.pegToken(.l_bracket); + try a.pegIdentifier(); + try a.pegToken(.r_bracket); + try a.pegStringLiteralSingle(); + try a.pegToken(.l_paren); + try a.pegExpr(); + try a.pegToken(.r_paren); +} + +/// AsmClobbers <- COLON Expr +fn pegAsmClobbers(a: *AstSmith) SourceError!void { + try a.pegToken(.colon); + try a.pegExpr(); +} + +/// BreakLabel <- COLON IDENTIFIER +fn pegBreakLabel(a: *AstSmith) SourceError!void { + try a.pegToken(.colon); + try a.pegIdentifier(); +} + +/// BlockLabel <- IDENTIFIER COLON +fn pegBlockLabel(a: *AstSmith) SourceError!void { + try a.pegIdentifier(); + try a.pegToken(.colon); +} + +/// FieldInit <- DOT IDENTIFIER EQUAL Expr +fn pegFieldInit(a: *AstSmith) SourceError!void { + try a.pegToken(.period); + try a.pegIdentifier(); + try a.pegToken(.equal); + try a.pegExpr(); +} + +/// WhileContinueExpr <- COLON LPAREN AssignExpr RPAREN +fn pegWhileContinueExpr(a: *AstSmith) SourceError!void { + try a.pegToken(.colon); + try a.pegToken(.l_paren); + try a.pegAssignExpr(); + try a.pegToken(.r_paren); +} + +/// LinkSection <- KEYWORD_linksection LPAREN Expr RPAREN +fn pegLinkSection(a: *AstSmith) SourceError!void { + try a.pegToken(.keyword_linksection); + try a.pegToken(.l_paren); + try a.pegExpr(); + try a.pegToken(.r_paren); +} + +/// AddrSpace <- KEYWORD_addrspace LPAREN Expr RPAREN +fn pegAddrSpace(a: *AstSmith) SourceError!void { + try a.pegToken(.keyword_addrspace); + try a.pegToken(.l_paren); + try a.pegExpr(); + try a.pegToken(.r_paren); +} + +/// CallConv <- KEYWORD_callconv LPAREN Expr RPAREN +fn pegCallConv(a: *AstSmith) SourceError!void { + try a.pegToken(.keyword_callconv); + try a.pegToken(.l_paren); + try a.pegExpr(); + try a.pegToken(.r_paren); +} + +/// ParamDecl <- doc_comment? (KEYWORD_noalias / KEYWORD_comptime)? +/// ((IDENTIFIER COLON) / !KEYWORD_comptime !(IDENTIFIER COLON)) +/// ParamType +fn pegParamDecl(a: *AstSmith) SourceError!void { + try a.pegMaybeDocComment(); + const modifier = a.smith.value(enum { none, @"noalias", @"comptime" }); + switch (modifier) { + .none => a.not_token_comptime = true, + .@"noalias" => try a.pegToken(.keyword_noalias), + .@"comptime" => try a.pegToken(.keyword_comptime), + } + if (a.smith.value(bool)) { + try a.pegIdentifier(); + try a.pegToken(.colon); + } else { + a.not_label = true; + } + try a.pegParamType(); +} + +/// ParamType +/// <- KEYWORD_anytype +/// / TypeExpr +fn pegParamType(a: *AstSmith) SourceError!void { + if (a.smith.value(bool)) { + try a.pegToken(.keyword_anytype); + } else { + try a.pegTypeExpr(); + } +} + +/// IfPrefix <- KEYWORD_if LPAREN Expr RPAREN PtrPayload? +fn pegIfPrefix(a: *AstSmith) SourceError!void { + try a.pegToken(.keyword_if); + try a.pegToken(.l_paren); + try a.pegExpr(); + try a.pegToken(.r_paren); + try a.pegPtrPayload(); +} + +/// WhilePrefix <- KEYWORD_while LPAREN Expr RPAREN PtrPayload? WhileContinueExpr? +fn pegWhilePrefix(a: *AstSmith) SourceError!void { + try a.pegToken(.keyword_while); + try a.pegToken(.l_paren); + try a.pegExpr(); + try a.pegToken(.r_paren); + + if (a.smith.value(bool)) { + try a.pegPtrPayload(); + } + + if (a.smith.value(bool)) { + try a.pegWhileContinueExpr(); + } +} + +/// ForPrefix <- KEYWORD_for LPAREN ForArgumentsList RPAREN PtrListPayload +/// +/// An additional requirement checked in the Parser is that the number of +/// arguments and payload elements are the same. +fn pegForPrefix(a: *AstSmith) SourceError!void { + try a.pegToken(.keyword_for); + try a.pegToken(.l_paren); + const n = try a.pegForArgumentsList(); + try a.pegToken(.r_paren); + try a.pegPtrListPayload(n); +} + +/// Payload <- PIPE IDENTIFIER PIPE +fn pegPayload(a: *AstSmith) SourceError!void { + try a.pegToken(.pipe); + try a.pegIdentifier(); + try a.pegToken(.pipe); +} + +/// PtrPayload <- PIPE ASTERISK? IDENTIFIER PIPE +fn pegPtrPayload(a: *AstSmith) SourceError!void { + try a.pegToken(.pipe); + if (a.smith.value(bool)) { + try a.pegToken(.asterisk); + } + try a.pegIdentifier(); + try a.pegToken(.pipe); +} + +/// PtrIndexPayload <- PIPE ASTERISK? IDENTIFIER (COMMA IDENTIFIER)? PIPE +fn pegPtrIndexPayload(a: *AstSmith) SourceError!void { + try a.pegToken(.pipe); + if (a.smith.value(bool)) { + try a.pegToken(.asterisk); + } + try a.pegIdentifier(); + if (a.smith.value(bool)) { + try a.pegToken(.comma); + try a.pegIdentifier(); + } + try a.pegToken(.pipe); +} + +/// PtrListPayload <- PIPE ASTERISK? IDENTIFIER (COMMA ASTERISK? IDENTIFIER)* COMMA? PIPE +fn pegPtrListPayload(a: *AstSmith, n: usize) SourceError!void { + try a.pegToken(.pipe); + if (a.smith.value(bool)) { + try a.pegToken(.asterisk); + } + try a.pegIdentifier(); + + for (1..n) |_| { + try a.pegToken(.comma); + if (a.smith.value(bool)) { + try a.pegToken(.asterisk); + } + try a.pegIdentifier(); + } + + if (a.smith.value(bool)) { + try a.pegToken(.comma); + } + try a.pegToken(.pipe); +} + +/// SwitchProng <- KEYWORD_inline? SwitchCase EQUALRARROW PtrIndexPayload? SingleAssignExpr +fn pegSwitchProng(a: *AstSmith) SourceError!void { + if (a.smith.value(bool)) { + try a.pegToken(.keyword_inline); + } + try a.pegSwitchCase(); + try a.pegToken(.equal_angle_bracket_right); + if (a.smith.value(bool)) { + try a.pegPtrIndexPayload(); + } + try a.pegSingleAssignExpr(); +} + +/// SwitchCase +/// <- SwitchItem (COMMA SwitchItem)* COMMA? +/// / KEYWORD_else +fn pegSwitchCase(a: *AstSmith) SourceError!void { + if (a.smith.value(bool)) { + try a.pegSwitchItem(); + while (!a.smithListItemEos()) { + try a.pegToken(.comma); + try a.pegSwitchItem(); + } + if (a.smith.value(bool)) { + try a.pegToken(.comma); + } + } else { + try a.pegToken(.keyword_else); + } +} + +/// SwitchItem <- Expr (DOT3 Expr)? +fn pegSwitchItem(a: *AstSmith) SourceError!void { + try a.pegExpr(); + if (a.smith.value(bool)) { + try a.pegToken(.ellipsis3); + try a.pegExpr(); + } +} + +/// ForArgumentsList <- ForItem (COMMA ForItem)* COMMA? +fn pegForArgumentsList(a: *AstSmith) SourceError!usize { + try a.pegForItem(); + var n: usize = 1; + while (!a.smithListItemEos()) { + try a.pegToken(.comma); + try a.pegForItem(); + n += 1; + } + if (a.smith.value(bool)) { + try a.pegToken(.comma); + } + return n; +} + +/// ForItem <- Expr (DOT2 Expr?)? +fn pegForItem(a: *AstSmith) SourceError!void { + try a.pegExpr(); + const components = a.smith.valueRangeAtMost(u2, 0, 2); + if (components >= 1) try a.pegToken(.ellipsis2); + if (components >= 2) try a.pegExpr(); +} + +/// AssignOp +/// <- ASTERISKEQUAL +/// / ASTERISKPIPEEQUAL +/// / SLASHEQUAL +/// / PERCENTEQUAL +/// / PLUSEQUAL +/// / PLUSPIPEEQUAL +/// / MINUSEQUAL +/// / MINUSPIPEEQUAL +/// / LARROW2EQUAL +/// / LARROW2PIPEEQUAL +/// / RARROW2EQUAL +/// / AMPERSANDEQUAL +/// / CARETEQUAL +/// / PIPEEQUAL +/// / ASTERISKPERCENTEQUAL +/// / PLUSPERCENTEQUAL +/// / MINUSPERCENTEQUAL +/// / EQUAL +fn pegAssignOp(a: *AstSmith) SourceError!void { + const tags = [_]Token.Tag{ + .asterisk_equal, + .asterisk_pipe_equal, + .slash_equal, + .percent_equal, + .plus_equal, + .plus_pipe_equal, + .minus_equal, + .minus_pipe_equal, + .angle_bracket_angle_bracket_left_equal, + .angle_bracket_angle_bracket_left_pipe_equal, + .angle_bracket_angle_bracket_right_equal, + .ampersand_equal, + .caret_equal, + .pipe_equal, + .asterisk_percent_equal, + .plus_percent_equal, + .minus_percent_equal, + .equal, + }; + try a.pegToken(tags[a.smith.index(tags.len)]); +} + +/// CompareOp +/// <- EQUALEQUAL +/// / EXCLAMATIONMARKEQUAL +/// / LARROW +/// / RARROW +/// / LARROWEQUAL +/// / RARROWEQUAL +fn pegCompareOp(a: *AstSmith) SourceError!void { + const tags = [_]Token.Tag{ + .equal_equal, + .bang_equal, + .angle_bracket_left, + .angle_bracket_right, + .angle_bracket_left_equal, + .angle_bracket_right_equal, + }; + try a.pegTokenWhitespaceAround(tags[a.smith.index(tags.len)]); +} + +/// BitwiseOp +/// <- AMPERSAND +/// / CARET +/// / PIPE +/// / KEYWORD_orelse +/// / KEYWORD_catch Payload? +fn pegBitwiseOp(a: *AstSmith) SourceError!void { + const tags = [_]Token.Tag{ + .ampersand, + .caret, + .pipe, + .keyword_orelse, + .keyword_catch, + }; + const tag = tags[a.smith.index(tags.len)]; + try a.pegTokenWhitespaceAround(tag); + if (tag == .keyword_catch and a.smith.value(bool)) { + try a.pegPayload(); + } +} + +/// BitShiftOp +/// <- LARROW2 +/// / RARROW2 +/// / LARROW2PIPE +fn pegBitShiftOp(a: *AstSmith) SourceError!void { + const tags = [_]Token.Tag{ + .angle_bracket_angle_bracket_left, + .angle_bracket_angle_bracket_right, + .angle_bracket_angle_bracket_left_pipe, + }; + try a.pegTokenWhitespaceAround(tags[a.smith.index(tags.len)]); +} + +/// AdditionOp +/// <- PLUS +/// / MINUS +/// / PLUS2 +/// / PLUSPERCENT +/// / MINUSPERCENT +/// / PLUSPIPE +/// / MINUSPIPE +fn pegAdditionOp(a: *AstSmith) SourceError!void { + const tags = [_]Token.Tag{ + .plus, + .minus, + .plus_plus, + .plus_percent, + .minus_percent, + .plus_pipe, + .minus_pipe, + }; + try a.pegTokenWhitespaceAround(tags[a.smith.index(tags.len)]); +} + +/// MultiplyOp +/// <- PIPE2 +/// / ASTERISK +/// / SLASH +/// / PERCENT +/// / ASTERISK2 +/// / ASTERISKPERCENT +/// / ASTERISKPIPE +fn pegMultiplyOp(a: *AstSmith) SourceError!void { + const tags = [_]Token.Tag{ + .asterisk, + .asterisk_asterisk, + .pipe_pipe, + .slash, + .percent, + .asterisk_percent, + .asterisk_pipe, + }; + const start = @as(u8, 2) * @intFromBool(a.not_token == .asterisk); + try a.pegTokenWhitespaceAround(tags[a.smith.valueRangeLessThan(u8, start, tags.len)]); +} + +/// PrefixOp +/// <- EXCLAMATIONMARK +/// / MINUS +/// / TILDE +/// / MINUSPERCENT +/// / AMPERSAND +/// / KEYWORD_try +fn pegPrefixOp(a: *AstSmith) SourceError!void { + const tags = [_]Token.Tag{ + .bang, + .minus, + .tilde, + .minus_percent, + .ampersand, + .keyword_try, + }; + try a.pegToken(tags[a.smith.index(tags.len)]); +} + +/// PrefixTypeOp +/// <- QUESTIONMARK +/// / KEYWORD_anyframe MINUSRARROW +/// / (ManyPtrTypeStart / SliceTypeStart) KEYWORD_allowzero? ByteAlign? AddrSpace? +/// KEYWORD_const? KEYWORD_volatile? +/// / SinglePtrTypeStart KEYWORD_allowzero? BitAlign? AddrSpace? +/// KEYWORD_const? KEYWORD_volatile? +/// / ArrayTypeStart +fn pegPrefixTypeOp(a: *AstSmith) SourceError!void { + switch (a.smith.value(enum { + optional, + anyframe_arrow, + array, + single_pointer, + many_pointer, + slice, + })) { + .optional => try a.pegToken(.question_mark), + .anyframe_arrow => { + try a.pegToken(.keyword_anyframe); + try a.pegToken(.arrow); + }, + .array => try a.pegArrayTypeStart(), + .single_pointer, .many_pointer, .slice => |kind| { + const is_single = kind == .single_pointer and a.not_token != .asterisk; + if (is_single) { + try a.pegSinglePtrTypeStart(); + } else if (kind == .many_pointer) { + try a.pegManyPtrTypeStart(); + } else { + try a.pegSliceTypeStart(); + } + + if (a.smith.value(bool)) { + try a.pegToken(.keyword_allowzero); + } + if (a.smith.value(bool)) { + if (is_single) { + try a.pegBitAlign(); + } else { + try a.pegByteAlign(); + } + } + if (a.smith.value(bool)) { + try a.pegAddrSpace(); + } + if (a.smith.value(bool)) { + try a.pegToken(.keyword_const); + } + if (a.smith.value(bool)) { + try a.pegToken(.keyword_volatile); + } + }, + } +} + +/// SuffixOp +/// <- LBRACKET Expr (DOT2 (Expr? (COLON Expr)?)?)? RBRACKET +/// / DOT IDENTIFIER +/// / DOTASTERISK +/// / DOTQUESTIONMARK +fn pegSuffixOp(a: *AstSmith) SourceError!void { + switch (a.smith.value(enum { slice, field, deref, unwrap })) { + .slice => { + try a.pegToken(.l_bracket); + try a.pegExpr(); + + const components = a.smith.value(u2); + if (components >= 1) try a.pegToken(.ellipsis2); + if (components >= 2) try a.pegExpr(); + if (components >= 3) { + try a.pegToken(.colon); + try a.pegExpr(); + } + + try a.pegToken(.r_bracket); + }, + .field => { + try a.pegToken(.period); + try a.pegIdentifier(); + }, + .deref => try a.pegToken(.period_asterisk), + .unwrap => { + try a.pegToken(.period); + try a.pegToken(.question_mark); + }, + } +} + +/// FnCallArguments <- LPAREN ExprList RPAREN +fn pegFnCallArguments(a: *AstSmith) SourceError!void { + try a.pegToken(.l_paren); + try a.pegExprList(); + try a.pegToken(.r_paren); +} + +/// SliceTypeStart <- LBRACKET (COLON Expr)? RBRACKET +fn pegSliceTypeStart(a: *AstSmith) SourceError!void { + try a.pegToken(.l_bracket); + if (a.smith.value(bool)) { + try a.pegToken(.colon); + try a.pegExpr(); + } + try a.pegToken(.r_bracket); +} + +/// SinglePtrTypeStart <- ASTERISK / ASTERISK2 +fn pegSinglePtrTypeStart(a: *AstSmith) SourceError!void { + try a.pegToken(if (!a.smith.value(bool)) .asterisk else .asterisk_asterisk); +} + +/// ManyPtrTypeStart <- LBRACKET ASTERISK (LETTERC / COLON Expr)? RBRACKET +fn pegManyPtrTypeStart(a: *AstSmith) SourceError!void { + try a.pegToken(.l_bracket); + try a.pegToken(.asterisk); + switch (a.smith.value(enum { many, many_c, many_sentinel })) { + .many => {}, + .many_c => { + // No need for `preservePegEndOfWord` because the previous token is an asterisk + try a.addTokenTag(.identifier); + try a.addSourceByte('c'); + }, + .many_sentinel => { + try a.pegToken(.colon); + try a.pegExpr(); + }, + } + try a.pegToken(.r_bracket); +} + +/// ArrayTypeStart <- LBRACKET !(ASTERISK / ASTERISK2) Expr (COLON Expr)? RBRACKET +fn pegArrayTypeStart(a: *AstSmith) SourceError!void { + try a.pegToken(.l_bracket); + a.not_token = .asterisk; + try a.pegExpr(); + if (a.smith.value(bool)) { + try a.pegToken(.colon); + try a.pegExpr(); + } + try a.pegToken(.r_bracket); +} + +/// ContainerDeclAuto <- ContainerDeclType LBRACE ContainerMembers RBRACE +fn pegContainerDeclAuto(a: *AstSmith) SourceError!void { + try a.pegContainerDeclType(); + try a.pegToken(.l_brace); + try a.pegContainerMembers(); + try a.pegToken(.r_brace); +} + +/// ContainerDeclType +/// <- KEYWORD_struct (LPAREN Expr RPAREN)? +/// / KEYWORD_opaque +/// / KEYWORD_enum (LPAREN Expr RPAREN)? +/// / KEYWORD_union (LPAREN (KEYWORD_enum (LPAREN Expr RPAREN)? / !KEYWORD_enum Expr) RPAREN)? +fn pegContainerDeclType(a: *AstSmith) SourceError!void { + switch (a.smith.value(enum { @"struct", @"opaque", @"enum", @"union" })) { + .@"struct", .@"enum" => |c| { + const is_struct = c == .@"struct" or a.not_token == .keyword_enum; + try a.pegToken(if (is_struct) .keyword_struct else .keyword_enum); + if (a.smith.value(bool)) { + try a.pegToken(.l_paren); + try a.pegExpr(); + try a.pegToken(.r_paren); + } + }, + .@"opaque" => try a.pegToken(.keyword_opaque), + .@"union" => { + try a.pegToken(.keyword_union); + switch (a.smith.value(enum { no_tag, expr_tag, enum_tag, enum_expr_tag })) { + .no_tag => {}, + .expr_tag => { + try a.pegToken(.l_paren); + a.not_token = .keyword_enum; + try a.pegExpr(); + try a.pegToken(.r_paren); + }, + .enum_tag => { + try a.pegToken(.l_paren); + try a.pegToken(.keyword_enum); + try a.pegToken(.r_paren); + }, + .enum_expr_tag => { + try a.pegToken(.l_paren); + try a.pegToken(.keyword_enum); + try a.pegToken(.l_paren); + try a.pegExpr(); + try a.pegToken(.r_paren); + try a.pegToken(.r_paren); + }, + } + }, + } +} + +/// ByteAlign <- KEYWORD_align LPAREN Expr RPAREN +fn pegByteAlign(a: *AstSmith) SourceError!void { + try a.pegToken(.keyword_align); + try a.pegToken(.l_paren); + try a.pegExpr(); + try a.pegToken(.r_paren); +} + +/// BitAlign <- KEYWORD_align LPAREN Expr (COLON Expr COLON Expr)? RPAREN +fn pegBitAlign(a: *AstSmith) SourceError!void { + try a.pegToken(.keyword_align); + try a.pegToken(.l_paren); + try a.pegExpr(); + if (a.smith.value(bool)) { + try a.pegToken(.colon); + try a.pegExpr(); + try a.pegToken(.colon); + try a.pegExpr(); + } + try a.pegToken(.r_paren); +} + +/// IdentifierList <- (doc_comment? IDENTIFIER COMMA)* (doc_comment? IDENTIFIER)? +fn pegIdentifierList(a: *AstSmith) SourceError!void { + while (!a.smith.eos()) { + try a.pegMaybeDocComment(); + try a.pegIdentifier(); + try a.pegToken(.comma); + } + if (a.smith.value(bool)) { + try a.pegMaybeDocComment(); + try a.pegIdentifier(); + } +} + +/// SwitchProngList <- (SwitchProng COMMA)* SwitchProng? +fn pegSwitchProngList(a: *AstSmith) SourceError!void { + while (!a.smithListItemEos()) { + try a.pegSwitchProng(); + try a.pegToken(.comma); + } + if (a.smithListItemBool()) { + try a.pegSwitchProng(); + } +} + +/// AsmOutputList <- (AsmOutputItem COMMA)* AsmOutputItem? +fn pegAsmOutputList(a: *AstSmith) SourceError!void { + while (!a.smithListItemEos()) { + try a.pegAsmOutputItem(); + try a.pegToken(.comma); + } + if (a.smithListItemBool()) { + try a.pegAsmOutputItem(); + } +} + +/// AsmInputList <- (AsmInputItem COMMA)* AsmInputItem? +fn pegAsmInputList(a: *AstSmith) SourceError!void { + while (!a.smithListItemEos()) { + try a.pegAsmInputItem(); + try a.pegToken(.comma); + } + if (a.smithListItemBool()) { + try a.pegAsmInputItem(); + } +} + +/// ParamDeclList <- (ParamDecl COMMA)* (ParamDecl / DOT3 COMMA?)? +fn pegParamDeclList(a: *AstSmith) SourceError!void { + while (!a.smithListItemEos()) { + try a.pegParamDecl(); + try a.pegToken(.comma); + } + const Final = enum { none, dot3, dot3_comma, param }; + switch (a.smith.valueWeighted(Final, &.{ + .rangeLessThan(Final, .none, .param, 2), + .value(Final, .param, 1), + })) { + .none => {}, + .dot3 => try a.pegToken(.ellipsis3), + .dot3_comma => { + try a.pegToken(.ellipsis3); + try a.pegToken(.comma); + }, + .param => try a.pegParamDecl(), + } +} + +/// ExprList <- (Expr COMMA)* Expr? +fn pegExprList(a: *AstSmith) SourceError!void { + while (!a.smithListItemEos()) { + try a.pegExpr(); + try a.pegToken(.comma); + } + if (a.smithListItemBool()) { + try a.pegExpr(); + } +} + +/// container_doc_comment <- ('//!' non_control_utf8* [ \n]* skip)+ +fn pegContainerDocComment(a: *AstSmith) SourceError!void { + while (true) { + try a.addTokenTag(.container_doc_comment); + try a.pegGenericLine("//!", .any); + try a.pegSkip(); + if (a.smith.eos()) break; + } +} + +/// doc_comment? +fn pegMaybeDocComment(a: *AstSmith) SourceError!void { + // A specific hash is provided here since this function is likely to be inlined, + // however having all doc comments with the same uid is beneficial. + if (a.smith.boolWeightedWithHash(63, 1, 0x39b94392)) { + try a.pegDocComment(); + } +} + +/// doc_comment <- ('///' non_control_utf8* [ \n]* skip)+ +fn pegDocComment(a: *AstSmith) SourceError!void { + if (a.source_len > 0 and a.source_buf[a.source_len - 1] != '\n') { + try a.addSourceByte('\n'); + } + while (true) { + try a.addTokenTag(.doc_comment); + try a.pegGenericLine("///", .doc_comment); + try a.pegSkip(); + if (a.smith.eosWeightedSimple(1, 3)) break; + } +} + +/// line_comment <- '//' ![!/] non_control_utf8* / '////' non_control_utf8* +fn pegLineComment(a: *AstSmith) SourceError!void { + return a.pegGenericLine("//", .line_comment); +} + +/// line_string <- '\\\\' non_control_utf8* [ \n]* +fn pegLineString(a: *AstSmith) SourceError!void { + try a.addTokenTag(.multiline_string_literal_line); + return a.pegGenericLine("\\\\", .any); +} + +/// non_control_utf8 <- [\040-\377] +/// +/// Used for line, doc, and container comments as well as +/// multiline string literal lines. +fn pegGenericLine( + a: *AstSmith, + prefix: []const u8, + /// Adds constraints to what the line contains + prefix_kind: enum { any, line_comment, doc_comment }, +) SourceError!void { + const cr = a.smith.value(bool); + const newline_len = @intFromBool(cr) + @as(usize, 1); + + try a.ensureSourceCapacity(prefix.len + newline_len); + a.addSourceAssumeCapacity(prefix); + + const line = a.variableChar(newline_len, 0, &.{ + .rangeAtMost(u8, ' ', 0x7f - 1, 1), + .rangeAtMost(u8, 0x7f + 1, 0xff, 1), + }); + if (line.len >= 1) switch (prefix_kind) { + .any => {}, + .line_comment => { + // Convert doc comments to quadruple slashes when possible; + // Otherwise, and for container doc comments, erase the '/' or '!' + if (line[0] == '/' and line.len >= 2) { + line[1] = '/'; + } else if (line[0] == '/' or line[0] == '!') { + line[0] = ' '; + } + }, + .doc_comment => { + // Avoid quadruple slashes + if (line[0] == '/') { + line[0] = ' '; + } + }, + }; + + if (cr) a.addSourceByteAssumeCapacity('\r'); + a.addSourceByteAssumeCapacity('\n'); +} + +/// skip <- ([ \n] / line_comment)* +fn pegSkip(a: *AstSmith) SourceError!void { + if (a.smith.boolWeighted(63, 1)) { + while (true) { + const Kind = enum { + space, + line_break, + cr_line_break, + line_comment, + line_comment_zig_fmt_off, + line_comment_zig_fmt_on, + }; + + const weights = Smith.baselineWeights(Kind) ++ + [_]Weight{.value(Kind, .space, 11)}; + switch (a.smith.valueWeighted(Kind, weights)) { + .space => try a.addSourceByte(' '), + .line_break => try a.addSourceByte('\n'), + .cr_line_break => try a.addSource("\r\n"), + .line_comment => try a.pegLineComment(), + .line_comment_zig_fmt_off => try a.addSource("//zig fmt: off\n"), + .line_comment_zig_fmt_on => try a.addSource("//zig fmt: on\n"), + } + + if (a.smith.eos()) break; + } + } +} + +const bin_weights: []const Weight = &.{.rangeAtMost(u8, '0', '1', 1)}; +const oct_weights: []const Weight = &.{.rangeAtMost(u8, '0', '7', 1)}; +const dec_weights: []const Weight = &.{.rangeAtMost(u8, '0', '9', 1)}; +const hex_weights: []const Weight = &.{ + .rangeAtMost(u8, '0', '9', 1), + .rangeAtMost(u8, 'a', 'f', 1), + .rangeAtMost(u8, 'A', 'F', 1), +}; + +/// Asserts enough capacity for at `min + reserved_capacity` +fn variableChar( + a: *AstSmith, + reserved_capacity: usize, + min: usize, + weights: []const Weight, +) []u8 { + const capacity = a.sourceCapacity(); + const max_out = capacity.len - reserved_capacity; + + const len_weights: [3]Weight = .{ + .rangeAtMost(u32, @intCast(min), @min(2, max_out), 32678), + // For the below `.rangeAtMost` is not used because max may be less than min. + // In this case, the weights are omitted. + .{ .min = 3, .max = @min(16, max_out), .weight = 512 }, + // Still allow much longer sequences to test parsing overflows + .{ .min = 17, .max = @min(256, max_out), .weight = 1 }, + }; + const n_weights = @as(usize, 1) + @intFromBool(max_out >= 3) + @intFromBool(max_out >= 17); + + const len = a.smith.sliceWeighted(capacity, len_weights[0..n_weights], weights); + a.source_len += len; + return capacity[0..len]; +} + +/// char_escape +/// <- "\\x" hex hex +/// / "\\u{" hex+ "}" +/// / "\\" [nr\\t'"] +/// char_char +/// <- multibyte_utf8 +/// / char_escape +/// / ![\\'\n] non_control_ascii +/// +/// string_char +/// <- multibyte_utf8 +/// / char_escape +/// / ![\\"\n] non_control_ascii +fn pegChar(a: *AstSmith, quote: u8) SourceError!void { + const Char = enum(u8) { + ascii, + unicode_2, + unicode_3, + unicode_4, + hex_escape, + unicode_escape, + char_escape, + }; + const weights = Smith.baselineWeights(Char) ++ &[_]Weight{.value(Char, .ascii, 32)}; + switch (a.smith.valueWeighted(Char, weights)) { + .ascii => try a.addSourceByte(a.smith.valueWeighted(u8, &.{ + .rangeAtMost(u8, ' ', quote - 1, 1), + .rangeAtMost(u8, quote + 1, '\\' - 1, 1), + .rangeAtMost(u8, '\\' + 1, 0x7e, 1), + })), + .unicode_2 => assert(2 == std.unicode.wtf8Encode( + a.smith.valueRangeLessThan(u21, 0x80, 0x800), + try a.addSourceAsSlice(2), + ) catch unreachable), + .unicode_3 => assert(3 == std.unicode.wtf8Encode( + a.smith.valueRangeLessThan(u21, 0x800, 0x10000), + try a.addSourceAsSlice(3), + ) catch unreachable), + .unicode_4 => assert(4 == std.unicode.wtf8Encode( + a.smith.valueRangeLessThan(u21, 0x10000, 0x110000), + try a.addSourceAsSlice(4), + ) catch unreachable), + .hex_escape => { + try a.ensureSourceCapacity(4); + a.addSourceAssumeCapacity("\\x"); + a.smith.bytesWeighted(a.addSourceAsSliceAssumeCapacity(2), hex_weights); + }, + .unicode_escape => { + try a.ensureSourceCapacity(5); + a.addSourceAssumeCapacity("\\u{"); + _ = a.variableChar(1, 1, hex_weights); + a.addSourceByteAssumeCapacity('}'); + }, + .char_escape => { + try a.ensureSourceCapacity(2); + a.addSourceByteAssumeCapacity('\\'); + a.addSourceByteAssumeCapacity(a.smith.valueWeighted(u8, &.{ + .value(u8, 'n', 1), + .value(u8, 'r', 1), + .value(u8, 't', 1), + .value(u8, '\\', 1), + .value(u8, '\'', 1), + .value(u8, '"', 1), + })); + }, + } +} + +/// CHAR_LITERAL <- ['] char_char ['] skip +fn pegCharLiteral(a: *AstSmith) SourceError!void { + try a.addTokenTag(.char_literal); + try a.addSourceByte('\''); + try a.pegChar('\''); + try a.addSourceByte('\''); + try a.pegSkip(); +} + +///FLOAT +/// <- '0x' hex_int '.' hex_int ([pP] [-+]? dec_int)? skip +/// / dec_int '.' dec_int ([eE] [-+]? dec_int)? skip +/// / '0x' hex_int [pP] [-+]? dec_int skip +/// / dec_int [eE] [-+]? dec_int skip +fn pegFloat(a: *AstSmith) SourceError!void { + try a.preservePegEndOfWord(); + try a.addTokenTag(.number_literal); + + const hex = a.smith.value(bool); + const exp = a.smith.value(packed struct(u3) { + kind: enum(u2) { none, no_sign, minus, plus }, + upper: bool, + }); + const dot = exp.kind == .none or a.smith.value(bool); + + var reserved: usize = @intFromBool(hex) * "0x".len + "0".len + @intFromBool(dot) * ".0".len + + switch (exp.kind) { + .none => 0, + .no_sign => "e0".len, + .minus => "e-0".len, + .plus => "e+0".len, + }; + try a.ensureSourceCapacity(reserved); + + if (hex) { + reserved -= 2; + a.addSourceAssumeCapacity("0x"); + } + const digits = if (hex) hex_weights else dec_weights; + + reserved -= 1; + _ = a.variableChar(reserved, 1, digits); + + if (dot) { + reserved -= 2; + a.addSourceByteAssumeCapacity('.'); + _ = a.variableChar(reserved, 1, digits); + } + + if (exp.kind != .none) { + reserved -= 1; + const case_diff = @as(u8, 'a' - 'A') * @intFromBool(exp.upper); + a.addSourceByteAssumeCapacity(@as(u8, if (hex) 'p' else 'e') - case_diff); + + if (exp.kind != .no_sign) { + reserved -= 1; + a.addSourceByteAssumeCapacity(if (exp.kind == .plus) '+' else '-'); + } + + reserved -= 1; + assert(reserved == 0); + _ = a.variableChar(reserved, 1, dec_weights); + } +} + +///INTEGER +/// <- '0b' bin_int skip +/// / '0o' oct_int skip +/// / '0x' hex_int skip +/// / dec_int skip +fn pegInteger(a: *AstSmith) SourceError!void { + try a.preservePegEndOfWord(); + try a.addTokenTag(.number_literal); + const Base = enum { bin, dec, oct, hex }; + const base_weights: []const Weight = Smith.baselineWeights(Base) ++ + &[_]Weight{ .value(Base, .dec, 6), .value(Base, .hex, 2) }; + const digits, const prefix = switch (a.smith.valueWeighted(Base, base_weights)) { + .bin => .{ bin_weights, "0b" }, + .oct => .{ oct_weights, "0o" }, + .dec => .{ dec_weights, "" }, + .hex => .{ hex_weights, "0x" }, + }; + try a.ensureSourceCapacity(prefix.len + 1); + if (prefix.len != 0) a.addSourceAssumeCapacity(prefix); + _ = a.variableChar(0, 1, digits); +} + +/// Does not include 'skip'. Does not add any token tag. +fn stringLiteralSingleInner(a: *AstSmith) SourceError!void { + try a.addSourceByte('"'); + while (!a.smith.eosWeightedSimple(3, 1)) { + try a.pegChar('"'); + } + try a.addSourceByte('"'); +} + +/// STRINGLITERALSINGLE <- ["] string_char* ["] skip +fn pegStringLiteralSingle(a: *AstSmith) SourceError!void { + try a.addTokenTag(.string_literal); + try a.stringLiteralSingleInner(); + try a.pegSkip(); +} + +/// STRINGLITERAL +/// <- STRINGLITERALSINGLE +/// / (line_string skip)+ +fn pegStringLiteral(a: *AstSmith) SourceError!void { + if (a.smith.value(bool)) { + try a.pegStringLiteralSingle(); + } else { + while (true) { + try a.pegLineString(); + try a.pegSkip(); + if (a.smith.eos()) break; + } + } +} + +const alphanumeric_weights: [4]Weight = .{ + .rangeAtMost(u8, '0', '9', 1), + .rangeAtMost(u8, 'A', 'Z', 1), + .rangeAtMost(u8, 'a', 'z', 1), + .value(u8, '_', 1), +}; + +/// IDENTIFIER +/// <- !keyword [A-Za-z_] [A-Za-z0-9_]* skip +/// / '@' STRINGLITERALSINGLE +fn pegIdentifier(a: *AstSmith) SourceError!void { + const Kind = enum(u2) { underscore, regular_identifier, quoted_identifier, copy_identifier }; + const kind_weights: [4]Weight = .{ + .value(Kind, .underscore, 6), + .value(Kind, .regular_identifier, 3), + .value(Kind, .quoted_identifier, 1), + .value(Kind, .copy_identifier, 6), + }; + const n_weights = @as(usize, kind_weights.len) - @intFromBool(a.prev_ids_len == 0); + const kind = a.smith.valueWeighted(Kind, kind_weights[0..n_weights]); + + switch (kind) { + .underscore => { + try a.preservePegEndOfWord(); + try a.addTokenTag(.identifier); + try a.addSourceByte('_'); + }, + .regular_identifier => { + try a.preservePegEndOfWord(); + try a.addTokenTag(.identifier); + + const start = a.source_len; + try a.addSourceByte(a.smith.valueWeighted(u8, alphanumeric_weights[1..])); + _ = a.variableChar(0, 0, &alphanumeric_weights); + + if (Token.getKeyword(a.source_buf[start..a.source_len]) != null) { + a.source_buf[start] = '_'; // No keywords start with '_' + } + }, + .quoted_identifier => { + try a.addTokenTag(.identifier); + try a.addSourceByte('@'); + try a.stringLiteralSingleInner(); + }, + .copy_identifier => { + const n_prev = @min(a.prev_ids_len, a.prev_ids_buf.len); + const prev_i = a.smith.valueRangeLessThan(u16, 0, n_prev); + const prev = a.prev_ids_buf[prev_i]; + + if (a.source_buf[prev.start] != '@') try a.preservePegEndOfWord(); + try a.addTokenTag(.identifier); + try a.addSource(a.source_buf[prev.start..][0..prev.len]); + }, + } + try a.pegSkip(); + if (kind != .copy_identifier) { + const start = a.token_start_buf[a.tokens_len - 1]; + a.prev_ids_buf[a.prev_ids_len % a.prev_ids_buf.len] = .{ + .start = @intCast(start), + .len = @intCast(a.source_len - start), + }; + a.prev_ids_len += 1; + } +} + +/// BUILTINIDENTIFIER <- '@'[A-Za-z_][A-Za-z0-9_]* skip +fn pegBuiltinIdentifier(a: *AstSmith) SourceError!void { + try a.addTokenTag(.builtin); + if (a.smith.boolWeighted(1, 31)) { + if (a.smith.boolWeighted(1, 8)) { + // Pointer cast (reordable with zig fmt) + const ids = [_][]const u8{ + "@ptrCast", + "@addrspaceCast", + "@alignCast", + "@constCast", + "@volatileCast", + }; + try a.addSource(ids[a.smith.index(ids.len)]); + } else { + const ids = std.zig.BuiltinFn.list.keys(); + try a.addSource(ids[a.smith.index(ids.len)]); + } + } else { + try a.ensureSourceCapacity(2); + a.addSourceByteAssumeCapacity('@'); + a.addSourceByteAssumeCapacity(a.smith.valueWeighted(u8, alphanumeric_weights[1..])); + _ = a.variableChar(0, 0, &alphanumeric_weights); + } + try a.pegSkip(); +} + +test AstSmith { + try std.testing.fuzz({}, checkGenerated, .{}); +} + +fn checkGenerated(_: void, smith: *Smith) !void { + var a: AstSmith = .init(smith); + try a.generateSource(); + + { // Check tokenization matches source + errdefer a.logBadSource(null); + + const token_tags = a.token_tag_buf[0..a.tokens_len]; + const token_starts = a.token_start_buf[0..a.tokens_len]; + try std.testing.expectEqual(Token.Tag.eof, token_tags[token_tags.len - 1]); + + var tokenizer: std.zig.Tokenizer = .init(a.source()); + for (token_tags, token_starts) |tag, start| { + const tok = tokenizer.next(); + try std.testing.expectEqual(tok.tag, tag); + try std.testing.expectEqual(tok.loc.start, start); + if (tag == .invalid) return error.InvalidToken; + } + } + + var fba_buf: [1 << 18]u8 = undefined; + var fba: std.heap.FixedBufferAllocator = .init(&fba_buf); + const ast = std.zig.Ast.parseTokens(fba.allocator(), a.source(), a.tokens(), .zig) catch + return error.SkipZigTest; + + errdefer a.logBadSource(ast); + try std.testing.expectEqual(0, ast.errors.len); +} + +fn logBadSource(a: *AstSmith, ast: ?std.zig.Ast) void { + var buf: [256]u8 = undefined; + const ls = std.debug.lockStderr(&buf); + defer std.debug.unlockStderr(); + a.logBadSourceInner(ls.terminal(), ast) catch {}; +} + +fn logBadSourceInner(a: *AstSmith, t: std.Io.Terminal, ast: ?std.zig.Ast) std.Io.Writer.Error!void { + try a.logSourceInner(t); + const w = t.writer; + + if (ast) |bad_ast| { + try w.writeAll("=== Parse Errors ===\n"); + for (bad_ast.errors) |err| { + const loc = bad_ast.tokenLocation(0, err.token); + try w.print("{}:{}: ", .{ loc.line + 1, loc.column + 1 }); + try bad_ast.renderError(err, w); + try w.writeByte('\n'); + } + } else { + t.setColor(.dim) catch {}; + try w.writeAll("=== Tokens ===\n"); + t.setColor(.reset) catch {}; + for ( + 0.., + a.token_tag_buf[0..a.tokens_len], + a.token_start_buf[0..a.tokens_len], + ) |i, tag, start| { + try w.print("#{} @{}: {t}\n", .{ i, start, tag }); + } + + t.setColor(.dim) catch {}; + try w.writeAll("\n=== Expected Tokens ===\n"); + t.setColor(.reset) catch {}; + + var tokenizer: std.zig.Tokenizer = .init(a.source()); + var i: usize = 0; + while (true) { + const tok = tokenizer.next(); + try w.print("#{} @{}-{}: {t}\n", .{ i, tok.loc.start, tok.loc.end, tok.tag }); + i += 1; + if (tok.tag == .invalid or tok.tag == .eof) break; + } + } +} + +pub fn logSource(a: *AstSmith) void { + var buf: [256]u8 = undefined; + const ls = std.debug.lockStderr(&buf); + defer std.debug.unlockStderr(); + a.logSourceInner(ls.terminal()) catch {}; +} + +fn logSourceInner(a: *AstSmith, t: std.Io.Terminal) std.Io.Writer.Error!void { + const w = t.writer; + + t.setColor(.dim) catch {}; + try w.writeAll("=== Source ===\n"); + t.setColor(.reset) catch {}; + + var line: usize = 1; + try w.print("{: >5} ", .{line}); + for (a.source()) |c| switch (c) { + ' '...0x7e => try w.writeByte(c), + '\n' => { + line += 1; + try w.print("\n{: >5} ", .{line}); + }, + '\r' => { + t.setColor(.cyan) catch {}; + try w.writeAll("\\r"); + t.setColor(.reset) catch {}; + }, + '\t' => { + t.setColor(.cyan) catch {}; + try w.writeAll("\\t"); + t.setColor(.reset) catch {}; + }, + else => { + t.setColor(.cyan) catch {}; + try w.print("\\x{x:0>2}", .{c}); + t.setColor(.reset) catch {}; + }, + }; + try w.writeByte('\n'); +} From 785fb1be111186525bf288fa3460945404f676eb Mon Sep 17 00:00:00 2001 From: Kendall Condon Date: Sun, 22 Mar 2026 17:13:44 -0400 Subject: [PATCH 03/11] fix several inconsistencies between parser and PEG - PEG / Parser Changes All the changes made here are to places where the PEG was more permissive than the parser. Changes to the parser make it more permissive and changes to the PEG make it more strict. When choosing between these two options for discrepancies, I opted for the choice that was more natural and increased code readability. Changes to the Parser * Tuple types can now be `inline` and `extern` (e.g. `extern struct`). * Break labels are now only consumed if both the colon and identifier are present instead of failing if there is only a colon. * Labeled blocks are no longer parsed in PrimaryExpr (so they are now allowed to have CurlySuffixExpr) as in the PEG. * While expressions can now be grouped on the same line. * Added distinction in error messages for "a multiline string literal" so places where only single string literals are allowed do not give "expected 'a string literal', found 'a string literal'". Changes to the PEG * Made it so extern functions cannot have a body * Made it so ... can be only the last function argument * Made it so many item pointers can't have bit alignment * Made it so asm inputs / outputs can not be multiline string literals * Added distinction between block-level statements and regular statements -- Pointer Qualifier Order The PEG allowed for duplicated qualifiers, which the parser did not. The simplest fix for this was to make each be allowed zero or one times which required giving them a order similar to how FnProto already works. The chosen order is the same as used by zig fmt. The parser still accepts them in any order similar to functions. -- Backtracking Made it so several places could not backtrack in the PEG. A common pattern for this was (A / !A). --- !ExprSuffix Expressions ending with expressions now have !ExprSuffix after. This change prevents expressions such as `if (a) T else U{}` being be parsable as `(if (a) T else U){}`. It also stops some backtracking, take for example: `if (a) for (b) |c| d else |e| f` It may seem at first that the else clause belongs to the `for`, however it actually belongs to the `if` because for else-clauses cannot have a payload. This is fixed by a new `KEYWORD_else / !KEYWORD_else`, however this alone does not fix more complex cases such as: `if (a) for (b) |c| d() else |e| f` The PEG would first attempt to parse it as expected but fail due to the new guard. It will then backtrack to `if (a) (for (b) |c| d)() else |e| f` which is surprising but avoids the new gaurd. So, !ExprSuffix is required to disallow this type of backtracking. --- !LabelableExpr For identifiers, excluding labels is necessary despite ordered choice due to pointer bit alignment. For example `*align(a : b: for (c) e) T` could backtrack to `*align(a : b : (for (c) e)) T`. --- !SinglePtrTypeStart Prevents expressions like `break * break` which is parsed as `break (*break)` backtracking to `(break) * (break)` --- !BlockExpr Prevents expressions like `test { {} = a; }` being backtracked to and parsed as `test { ({} = a); }` (the parenthesis are just for demonstration, that expression is not legal either) --- !ExprStatement In addition to splitting up block level statements, statements that are also parsable as expressions are now part of ExprStatement to disallow backtracking. --- doc/langref.html.in | 129 ++++++++++++++++------------- lib/std/zig/Ast.zig | 4 - lib/std/zig/Parse.zig | 157 +++++++++++++++++++----------------- lib/std/zig/parser_test.zig | 39 ++++++--- lib/std/zig/tokenizer.zig | 3 +- 5 files changed, 189 insertions(+), 143 deletions(-) diff --git a/doc/langref.html.in b/doc/langref.html.in index 3642e9bc18..a084a57825 100644 --- a/doc/langref.html.in +++ b/doc/langref.html.in @@ -7944,58 +7944,60 @@ TestDecl <- KEYWORD_test (STRINGLITERALSINGLE / IDENTIFIER)? Block ComptimeDecl <- KEYWORD_comptime Block Decl - <- (KEYWORD_export / KEYWORD_extern STRINGLITERALSINGLE? / KEYWORD_inline / KEYWORD_noinline)? FnProto (SEMICOLON / Block) + <- (KEYWORD_export / KEYWORD_inline / KEYWORD_noinline)? FnProto (SEMICOLON / Block) + / KEYWORD_extern STRINGLITERALSINGLE? FnProto SEMICOLON / (KEYWORD_export / KEYWORD_extern STRINGLITERALSINGLE?)? KEYWORD_threadlocal? GlobalVarDecl -FnProto <- KEYWORD_fn IDENTIFIER? LPAREN ParamDeclList RPAREN ByteAlign? AddrSpace? LinkSection? CallConv? EXCLAMATIONMARK? TypeExpr +FnProto <- KEYWORD_fn IDENTIFIER? LPAREN ParamDeclList RPAREN ByteAlign? AddrSpace? LinkSection? CallConv? EXCLAMATIONMARK? TypeExpr !ExprSuffix VarDeclProto <- (KEYWORD_const / KEYWORD_var) IDENTIFIER (COLON TypeExpr)? ByteAlign? AddrSpace? LinkSection? GlobalVarDecl <- VarDeclProto (EQUAL Expr)? SEMICOLON -ContainerField <- doc_comment? KEYWORD_comptime? !KEYWORD_fn (IDENTIFIER COLON)? TypeExpr ByteAlign? (EQUAL Expr)? +ContainerField <- doc_comment? (KEYWORD_comptime / !KEYWORD_comptime) !KEYWORD_fn (IDENTIFIER COLON / !(IDENTIFIER COLON))? TypeExpr ByteAlign? (EQUAL Expr)? # *** Block Level *** -Statement - <- KEYWORD_comptime ComptimeStatement - / KEYWORD_nosuspend BlockExprStatement - / KEYWORD_suspend BlockExprStatement +BlockStatement + <- Statement / KEYWORD_defer BlockExprStatement / KEYWORD_errdefer Payload? BlockExprStatement - / IfStatement - / LabeledStatement - / VarDeclExprStatement + / !ExprStatement (KEYWORD_comptime !BlockExpr)? VarAssignStatement -ComptimeStatement - <- BlockExpr - / VarDeclExprStatement +Statement + <- ExprStatement + / KEYWORD_suspend BlockExprStatement + / !ExprStatement (KEYWORD_comptime !BlockExpr)? AssignExpr SEMICOLON + +ExprStatement + <- IfStatement + / LabeledStatement + / KEYWORD_nosuspend BlockExprStatement + / KEYWORD_comptime BlockExpr IfStatement <- IfPrefix BlockExpr ( KEYWORD_else Payload? Statement )? - / IfPrefix AssignExpr ( SEMICOLON / KEYWORD_else Payload? Statement ) + / IfPrefix !BlockExpr AssignExpr ( SEMICOLON / KEYWORD_else Payload? Statement ) LabeledStatement <- BlockLabel? (Block / LoopStatement / SwitchExpr) LoopStatement <- KEYWORD_inline? (ForStatement / WhileStatement) ForStatement - <- ForPrefix BlockExpr ( KEYWORD_else Statement )? - / ForPrefix AssignExpr ( SEMICOLON / KEYWORD_else Statement ) + <- ForPrefix BlockExpr ( KEYWORD_else Statement / !KEYWORD_else ) + / ForPrefix !BlockExpr AssignExpr ( SEMICOLON / KEYWORD_else Statement ) WhileStatement <- WhilePrefix BlockExpr ( KEYWORD_else Payload? Statement )? - / WhilePrefix AssignExpr ( SEMICOLON / KEYWORD_else Payload? Statement ) + / WhilePrefix !BlockExpr AssignExpr ( SEMICOLON / KEYWORD_else Payload? Statement ) BlockExprStatement <- BlockExpr - / AssignExpr SEMICOLON + / !BlockExpr AssignExpr SEMICOLON BlockExpr <- BlockLabel? Block -# An expression, assignment, or any destructure, as a statement. -VarDeclExprStatement - <- VarDeclProto (COMMA (VarDeclProto / Expr))* EQUAL Expr SEMICOLON - / Expr (AssignOp Expr / (COMMA (VarDeclProto / Expr))+ EQUAL Expr)? SEMICOLON +# An assignment or a destructure whose LHS are all lvalue expressions or variable declarations. +VarAssignStatement <- (VarDeclProto / Expr) (COMMA (VarDeclProto / Expr))* EQUAL Expr SEMICOLON # *** Expression Level *** @@ -8025,25 +8027,25 @@ PrefixExpr <- PrefixOp* PrimaryExpr PrimaryExpr <- AsmExpr / IfExpr - / KEYWORD_break BreakLabel? Expr? - / KEYWORD_comptime Expr - / KEYWORD_nosuspend Expr - / KEYWORD_continue BreakLabel? Expr? - / KEYWORD_resume Expr - / KEYWORD_return Expr? + / KEYWORD_break (BreakLabel / !BreakLabel) (Expr !ExprSuffix / !SinglePtrTypeStart) + / KEYWORD_comptime Expr !ExprSuffix + / KEYWORD_nosuspend Expr !ExprSuffix + / KEYWORD_continue (BreakLabel / !BreakLabel) (Expr !ExprSuffix / !SinglePtrTypeStart) + / KEYWORD_resume Expr !ExprSuffix + / KEYWORD_return (Expr !ExprSuffix / !SinglePtrTypeStart) / BlockLabel? LoopExpr / Block / CurlySuffixExpr -IfExpr <- IfPrefix Expr (KEYWORD_else Payload? Expr)? +IfExpr <- IfPrefix Expr (KEYWORD_else Payload? Expr)? !ExprSuffix -Block <- LBRACE Statement* RBRACE +Block <- LBRACE BlockStatement* RBRACE LoopExpr <- KEYWORD_inline? (ForExpr / WhileExpr) -ForExpr <- ForPrefix Expr (KEYWORD_else Expr)? +ForExpr <- ForPrefix Expr (KEYWORD_else Expr / !KEYWORD_else) !ExprSuffix -WhileExpr <- WhilePrefix Expr (KEYWORD_else Payload? Expr)? +WhileExpr <- WhilePrefix Expr (KEYWORD_else Payload? Expr)? !ExprSuffix CurlySuffixExpr <- TypeExpr InitList? @@ -8070,10 +8072,10 @@ PrimaryTypeExpr / FnProto / GroupedExpr / LabeledTypeExpr - / IDENTIFIER + / IDENTIFIER !(COLON LabelableExpr) / IfTypeExpr / INTEGER - / KEYWORD_comptime TypeExpr + / KEYWORD_comptime TypeExpr !ExprSuffix / KEYWORD_error DOT IDENTIFIER / KEYWORD_anyframe / KEYWORD_unreachable @@ -8085,7 +8087,7 @@ ErrorSetDecl <- KEYWORD_error LBRACE IdentifierList RBRACE GroupedExpr <- LPAREN Expr RPAREN -IfTypeExpr <- IfPrefix TypeExpr (KEYWORD_else Payload? TypeExpr)? +IfTypeExpr <- IfPrefix TypeExpr (KEYWORD_else Payload? TypeExpr)? !ExprSuffix LabeledTypeExpr <- BlockLabel Block @@ -8094,9 +8096,9 @@ LabeledTypeExpr LoopTypeExpr <- KEYWORD_inline? (ForTypeExpr / WhileTypeExpr) -ForTypeExpr <- ForPrefix TypeExpr (KEYWORD_else TypeExpr)? +ForTypeExpr <- ForPrefix TypeExpr (KEYWORD_else TypeExpr / !KEYWORD_else) !ExprSuffix -WhileTypeExpr <- WhilePrefix TypeExpr (KEYWORD_else Payload? TypeExpr)? +WhileTypeExpr <- WhilePrefix TypeExpr (KEYWORD_else Payload? TypeExpr)? !ExprSuffix SwitchExpr <- KEYWORD_switch LPAREN Expr RPAREN LBRACE SwitchProngList RBRACE @@ -8105,11 +8107,11 @@ AsmExpr <- KEYWORD_asm KEYWORD_volatile? LPAREN Expr AsmOutput? RPAREN AsmOutput <- COLON AsmOutputList AsmInput? -AsmOutputItem <- LBRACKET IDENTIFIER RBRACKET STRINGLITERAL LPAREN (MINUSRARROW TypeExpr / IDENTIFIER) RPAREN +AsmOutputItem <- LBRACKET IDENTIFIER RBRACKET STRINGLITERALSINGLE LPAREN (MINUSRARROW TypeExpr / IDENTIFIER) RPAREN AsmInput <- COLON AsmInputList AsmClobbers? -AsmInputItem <- LBRACKET IDENTIFIER RBRACKET STRINGLITERAL LPAREN Expr RPAREN +AsmInputItem <- LBRACKET IDENTIFIER RBRACKET STRINGLITERALSINGLE LPAREN Expr RPAREN AsmClobbers <- COLON Expr @@ -8129,9 +8131,7 @@ AddrSpace <- KEYWORD_addrspace LPAREN Expr RPAREN # Fn specific CallConv <- KEYWORD_callconv LPAREN Expr RPAREN -ParamDecl - <- doc_comment? (KEYWORD_noalias / KEYWORD_comptime)? (IDENTIFIER COLON)? ParamType - / DOT3 +ParamDecl <- doc_comment? (KEYWORD_noalias / KEYWORD_comptime / !KEYWORD_comptime) (IDENTIFIER COLON / !(IDENTIFIER_COLON)) ParamType ParamType <- KEYWORD_anytype @@ -8237,8 +8237,8 @@ PrefixOp PrefixTypeOp <- QUESTIONMARK / KEYWORD_anyframe MINUSRARROW - / SliceTypeStart (ByteAlign / AddrSpace / KEYWORD_const / KEYWORD_volatile / KEYWORD_allowzero)* - / PtrTypeStart (AddrSpace / KEYWORD_align LPAREN Expr (COLON Expr COLON Expr)? RPAREN / KEYWORD_const / KEYWORD_volatile / KEYWORD_allowzero)* + / (ManyPtrTypeStart / SliceTypeStart) KEYWORD_allowzero? ByteAlign? AddrSpace? KEYWORD_const? KEYWORD_volatile? + / SinglePtrTypeStart KEYWORD_allowzero? BitAlign? AddrSpace? KEYWORD_const? KEYWORD_volatile? / ArrayTypeStart SuffixOp @@ -8249,15 +8249,31 @@ SuffixOp FnCallArguments <- LPAREN ExprList RPAREN +ExprSuffix + <- KEYWORD_or + / KEYWORD_and + / CompareOp + / BitwiseOp + / BitShiftOp + / AdditionOp + / MultiplyOp + / EXCLAMATIONMARK + / SuffixOp + / FnCallArguments + +LabelableExpr + <- Block + / SwitchExpr + / LoopExpr + # Ptr specific SliceTypeStart <- LBRACKET (COLON Expr)? RBRACKET -PtrTypeStart - <- ASTERISK - / ASTERISK2 - / LBRACKET ASTERISK (LETTERC / COLON Expr)? RBRACKET +SinglePtrTypeStart <- ASTERISK / ASTERISK2 -ArrayTypeStart <- LBRACKET Expr (COLON Expr)? RBRACKET +ManyPtrTypeStart <- LBRACKET ASTERISK (LETTERC / COLON Expr)? RBRACKET + +ArrayTypeStart <- LBRACKET Expr !(ASTERISK / ASTERISK2) (COLON Expr)? RBRACKET # ContainerDecl specific ContainerDeclAuto <- ContainerDeclType LBRACE ContainerMembers RBRACE @@ -8266,11 +8282,13 @@ ContainerDeclType <- KEYWORD_struct (LPAREN Expr RPAREN)? / KEYWORD_opaque / KEYWORD_enum (LPAREN Expr RPAREN)? - / KEYWORD_union (LPAREN (KEYWORD_enum (LPAREN Expr RPAREN)? / Expr) RPAREN)? + / KEYWORD_union (LPAREN (KEYWORD_enum (LPAREN Expr RPAREN)? / !KEYWORD_enum Expr) RPAREN)? # Alignment ByteAlign <- KEYWORD_align LPAREN Expr RPAREN +BitAlign <- KEYWORD_align LPAREN Expr (COLON Expr COLON Expr)? RPAREN + # Lists IdentifierList <- (doc_comment? IDENTIFIER COMMA)* (doc_comment? IDENTIFIER)? @@ -8280,7 +8298,7 @@ AsmOutputList <- (AsmOutputItem COMMA)* AsmOutputItem? AsmInputList <- (AsmInputItem COMMA)* AsmInputItem? -ParamDeclList <- (ParamDecl COMMA)* ParamDecl? +ParamDeclList <- (ParamDecl COMMA)* (ParamDecl / DOT3 COMMA?)? ExprList <- (Expr COMMA)* Expr? @@ -8337,6 +8355,7 @@ multibyte_utf8 <- / oxC2_oxDF ox80_oxBF non_control_ascii <- [\040-\176] +non_control_utf8 <- [\040-\377] char_escape <- "\\x" hex hex @@ -8352,10 +8371,10 @@ string_char / char_escape / ![\\"\n] non_control_ascii -container_doc_comment <- ('//!' [^\n]* [ \n]* skip)+ -doc_comment <- ('///' [^\n]* [ \n]* skip)+ -line_comment <- '//' ![!/][^\n]* / '////' [^\n]* -line_string <- ('\\\\' [^\n]* [ \n]*)+ +container_doc_comment <- ('//!' non_control_utf8* [ \n]* skip)+ +doc_comment <- ('///' non_control_utf8* [ \n]* skip)+ +line_comment <- '//' ![!/] non_control_utf8* / '////' non_control_utf8* +line_string <- '\\\\' non_control_utf8* [ \n]* skip <- ([ \n] / line_comment)* CHAR_LITERAL <- ['] char_char ['] skip diff --git a/lib/std/zig/Ast.zig b/lib/std/zig/Ast.zig index 7c73c3d4d7..3e253543f1 100644 --- a/lib/std/zig/Ast.zig +++ b/lib/std/zig/Ast.zig @@ -504,9 +504,6 @@ pub fn renderError(tree: Ast, parse_error: Error, w: *Writer) Writer.Error!void .varargs_nonfinal => { return w.writeAll("function prototype has parameter after varargs"); }, - .expected_continue_expr => { - return w.writeAll("expected ':' before while continue expression"); - }, .expected_semi_after_decl => { return w.writeAll("expected ';' after declaration"); @@ -2888,7 +2885,6 @@ pub const Error = struct { test_doc_comment, comptime_doc_comment, varargs_nonfinal, - expected_continue_expr, expected_semi_after_decl, expected_semi_after_stmt, expected_comma_after_field, diff --git a/lib/std/zig/Parse.zig b/lib/std/zig/Parse.zig index 5dcc183a21..541a74c3ea 100644 --- a/lib/std/zig/Parse.zig +++ b/lib/std/zig/Parse.zig @@ -257,7 +257,7 @@ fn parseContainerMembers(p: *Parse) Allocator.Error!Members { while (true) { const doc_comment = try p.eatDocComments(); - switch (p.tokenTag(p.tok_i)) { + sw: switch (p.tokenTag(p.tok_i)) { .keyword_test => { if (doc_comment) |some| { try p.warnMsg(.{ .tag = .test_doc_comment, .token = some }); @@ -348,17 +348,7 @@ fn parseContainerMembers(p: *Parse) Allocator.Error!Members { p.findNextContainerMember(); }, }, - .keyword_pub => { - p.tok_i += 1; - const opt_top_level_decl = try p.expectTopLevelDeclRecoverable(); - if (opt_top_level_decl) |top_level_decl| { - if (field_state == .seen) { - field_state = .{ .end = top_level_decl }; - } - try p.scratch.append(p.gpa, top_level_decl); - } - trailing = p.tokenTag(p.tok_i - 1) == .semicolon; - }, + .keyword_pub, .keyword_const, .keyword_var, .keyword_threadlocal, @@ -367,7 +357,27 @@ fn parseContainerMembers(p: *Parse) Allocator.Error!Members { .keyword_inline, .keyword_noinline, .keyword_fn, - => { + => |t| { + if (t == .keyword_extern) { + switch (p.tokenTag(p.tok_i + 1)) { + .keyword_struct, + .keyword_union, + .keyword_enum, + .keyword_opaque, + => |ct| continue :sw ct, + else => {}, + } + } + if (t == .keyword_inline) { + switch (p.tokenTag(p.tok_i + 1)) { + .keyword_for, + .keyword_while, + => |ct| continue :sw ct, + else => {}, + } + } + + p.tok_i += @intFromBool(t == .keyword_pub); const opt_top_level_decl = try p.expectTopLevelDeclRecoverable(); if (opt_top_level_decl) |top_level_decl| { if (field_state == .seen) { @@ -588,7 +598,8 @@ fn expectTestDeclRecoverable(p: *Parse) error{OutOfMemory}!?Node.Index { } /// Decl -/// <- (KEYWORD_export / KEYWORD_extern STRINGLITERALSINGLE? / KEYWORD_inline / KEYWORD_noinline)? FnProto (SEMICOLON / Block) +/// <- (KEYWORD_export / KEYWORD_inline / KEYWORD_noinline)? FnProto (SEMICOLON / Block) +/// / KEYWORD_extern STRINGLITERALSINGLE? FnProto SEMICOLON /// / (KEYWORD_export / KEYWORD_extern STRINGLITERALSINGLE?)? KEYWORD_threadlocal? VarDecl fn expectTopLevelDecl(p: *Parse) !?Node.Index { const extern_export_inline_token = p.nextToken(); @@ -665,7 +676,7 @@ fn expectTopLevelDeclRecoverable(p: *Parse) error{OutOfMemory}!?Node.Index { }; } -/// FnProto <- KEYWORD_fn IDENTIFIER? LPAREN ParamDeclList RPAREN ByteAlign? AddrSpace? LinkSection? CallConv? EXCLAMATIONMARK? TypeExpr +/// FnProto <- KEYWORD_fn IDENTIFIER? LPAREN ParamDeclList RPAREN ByteAlign? AddrSpace? LinkSection? CallConv? EXCLAMATIONMARK? TypeExpr !ExprSuffix fn parseFnProto(p: *Parse) !?Node.Index { const fn_token = p.eatToken(.keyword_fn) orelse return null; @@ -853,7 +864,7 @@ fn parseGlobalVarDecl(p: *Parse) !?Node.Index { return var_decl; } -/// ContainerField <- doc_comment? KEYWORD_comptime? !KEYWORD_fn (IDENTIFIER COLON)? TypeExpr ByteAlign? (EQUAL Expr)? +/// ContainerField <- doc_comment? (KEYWORD_comptime / !KEYWORD_comptime) !KEYWORD_fn (IDENTIFIER COLON / !(IDENTIFIER COLON))? TypeExpr ByteAlign? (EQUAL Expr)? fn expectContainerField(p: *Parse) !Node.Index { _ = p.eatToken(.keyword_comptime); const main_token = p.tok_i; @@ -895,16 +906,23 @@ fn expectContainerField(p: *Parse) !Node.Index { } } -/// Statement -/// <- KEYWORD_comptime ComptimeStatement -/// / KEYWORD_nosuspend BlockExprStatement -/// / KEYWORD_suspend BlockExprStatement +/// BlockStatement +/// <- Statement /// / KEYWORD_defer BlockExprStatement /// / KEYWORD_errdefer Payload? BlockExprStatement -/// / IfStatement +/// / !ExprStatement (KEYWORD_comptime !BlockExpr)? VarAssignStatement +/// +/// Statement +/// <- ExprStatement +/// / KEYWORD_suspend BlockExprStatement +/// / !ExprStatement (KEYWORD_comptime !BlockExpr)? AssignExpr SEMICOLON +/// +/// ExprStatement +/// <- IfStatement /// / LabeledStatement -/// / VarDeclExprStatement -fn expectStatement(p: *Parse, allow_defer_var: bool) Error!Node.Index { +/// / KEYWORD_nosuspend BlockExprStatement +/// / KEYWORD_comptime BlockExpr +fn expectStatement(p: *Parse, is_block_level: bool) Error!Node.Index { if (p.eatToken(.keyword_comptime)) |comptime_token| { const opt_block_expr = try p.parseBlockExpr(); if (opt_block_expr) |block_expr| { @@ -915,7 +933,7 @@ fn expectStatement(p: *Parse, allow_defer_var: bool) Error!Node.Index { }); } - if (allow_defer_var) { + if (is_block_level) { return p.expectVarDeclExprStatement(comptime_token); } else { const assign = try p.expectAssignExpr(); @@ -949,12 +967,12 @@ fn expectStatement(p: *Parse, allow_defer_var: bool) Error!Node.Index { .data = .{ .node = block_expr }, }); }, - .keyword_defer => if (allow_defer_var) return p.addNode(.{ + .keyword_defer => if (is_block_level) return p.addNode(.{ .tag = .@"defer", .main_token = p.nextToken(), .data = .{ .node = try p.expectBlockExprStatement() }, }), - .keyword_errdefer => if (allow_defer_var) return p.addNode(.{ + .keyword_errdefer => if (is_block_level) return p.addNode(.{ .tag = .@"errdefer", .main_token = p.nextToken(), .data = .{ .opt_token_and_node = .{ @@ -979,7 +997,7 @@ fn expectStatement(p: *Parse, allow_defer_var: bool) Error!Node.Index { if (try p.parseLabeledStatement()) |labeled_statement| return labeled_statement; - if (allow_defer_var) { + if (is_block_level) { return p.expectVarDeclExprStatement(null); } else { const assign = try p.expectAssignExpr(); @@ -1007,8 +1025,10 @@ fn expectComptimeStatement(p: *Parse, comptime_token: TokenIndex) !Node.Index { } /// VarDeclExprStatement -/// <- VarDeclProto (COMMA (VarDeclProto / Expr))* EQUAL Expr SEMICOLON -/// / Expr (AssignOp Expr / (COMMA (VarDeclProto / Expr))+ EQUAL Expr)? SEMICOLON +/// <- Expr +/// / VarAssignStatement +/// +/// VarAssignStatement <- (VarDeclProto / Expr) (COMMA (VarDeclProto / Expr))* EQUAL Expr SEMICOLON fn expectVarDeclExprStatement(p: *Parse, comptime_token: ?TokenIndex) !Node.Index { const scratch_top = p.scratch.items.len; defer p.scratch.shrinkRetainingCapacity(scratch_top); @@ -1140,7 +1160,7 @@ fn expectStatementRecoverable(p: *Parse) Error!?Node.Index { /// IfStatement /// <- IfPrefix BlockExpr ( KEYWORD_else Payload? Statement )? -/// / IfPrefix AssignExpr ( SEMICOLON / KEYWORD_else Payload? Statement ) +/// / IfPrefix !BlockExpr AssignExpr ( SEMICOLON / KEYWORD_else Payload? Statement ) fn expectIfStatement(p: *Parse) !Node.Index { const if_token = p.assertToken(.keyword_if); _ = try p.expectToken(.l_paren); @@ -1235,8 +1255,8 @@ fn parseLoopStatement(p: *Parse) !?Node.Index { } /// ForStatement -/// <- ForPrefix BlockExpr ( KEYWORD_else Statement )? -/// / ForPrefix AssignExpr ( SEMICOLON / KEYWORD_else Statement ) +/// <- ForPrefix BlockExpr ( KEYWORD_else Statement / !KEYWORD_else ) +/// / ForPrefix !BlockExpr AssignExpr ( SEMICOLON / KEYWORD_else Statement ) fn parseForStatement(p: *Parse) !?Node.Index { const for_token = p.eatToken(.keyword_for) orelse return null; @@ -1293,7 +1313,7 @@ fn parseForStatement(p: *Parse) !?Node.Index { /// /// WhileStatement /// <- WhilePrefix BlockExpr ( KEYWORD_else Payload? Statement )? -/// / WhilePrefix AssignExpr ( SEMICOLON / KEYWORD_else Payload? Statement ) +/// / WhilePrefix !BlockExpr AssignExpr ( SEMICOLON / KEYWORD_else Payload? Statement ) fn parseWhileStatement(p: *Parse) !?Node.Index { const while_token = p.eatToken(.keyword_while) orelse return null; _ = try p.expectToken(.l_paren); @@ -1383,7 +1403,7 @@ fn parseWhileStatement(p: *Parse) !?Node.Index { /// BlockExprStatement /// <- BlockExpr -/// / AssignExpr SEMICOLON +/// / !BlockExpr AssignExpr SEMICOLON fn parseBlockExprStatement(p: *Parse) !?Node.Index { const block_expr = try p.parseBlockExpr(); if (block_expr) |expr| return expr; @@ -1685,18 +1705,20 @@ fn expectPrefixExpr(p: *Parse) Error!Node.Index { /// PrefixTypeOp /// <- QUESTIONMARK /// / KEYWORD_anyframe MINUSRARROW -/// / SliceTypeStart (ByteAlign / AddrSpace / KEYWORD_const / KEYWORD_volatile / KEYWORD_allowzero)* +/// / (ManyPtrTypeStart / SliceTypeStart) KEYWORD_allowzero? ByteAlign? AddrSpace? KEYWORD_const? KEYWORD_volatile? +/// / SinglePtrTypeStart KEYWORD_allowzero? BitAlign? AddrSpace? KEYWORD_const? KEYWORD_volatile? /// / PtrTypeStart (AddrSpace / KEYWORD_align LPAREN Expr (COLON Expr COLON Expr)? RPAREN / KEYWORD_const / KEYWORD_volatile / KEYWORD_allowzero)* /// / ArrayTypeStart /// /// SliceTypeStart <- LBRACKET (COLON Expr)? RBRACKET /// -/// PtrTypeStart -/// <- ASTERISK -/// / ASTERISK2 -/// / LBRACKET ASTERISK (LETTERC / COLON Expr)? RBRACKET +/// SinglePtrTypeStart <- ASTERISK / ASTERISK2 /// -/// ArrayTypeStart <- LBRACKET Expr (COLON Expr)? RBRACKET +/// ManyPtrTypeStart <- LBRACKET ASTERISK (LETTERC / COLON Expr)? RBRACKET +/// +/// ArrayTypeStart <- LBRACKET Expr !(ASTERISK / ASTERISK2) (COLON Expr)? RBRACKET +/// +/// BitAlign <- KEYWORD_align LPAREN Expr (COLON Expr COLON Expr)? RPAREN fn parseTypeExpr(p: *Parse) Error!?Node.Index { switch (p.tokenTag(p.tok_i)) { .question_mark => return try p.addNode(.{ @@ -1962,12 +1984,12 @@ fn expectTypeExpr(p: *Parse) Error!Node.Index { /// PrimaryExpr /// <- AsmExpr /// / IfExpr -/// / KEYWORD_break BreakLabel? Expr? -/// / KEYWORD_comptime Expr -/// / KEYWORD_nosuspend Expr -/// / KEYWORD_continue BreakLabel? Expr? -/// / KEYWORD_resume Expr -/// / KEYWORD_return Expr? +/// / KEYWORD_break (BreakLabel / !BreakLabel) (Expr !ExprSuffix / !SinglePtrTypeStart) +/// / KEYWORD_comptime Expr !ExprSuffix +/// / KEYWORD_nosuspend Expr !ExprSuffix +/// / KEYWORD_continue (BreakLabel / !BreakLabel) (Expr !ExprSuffix / !SinglePtrTypeStart) +/// / KEYWORD_resume Expr !ExprSuffix +/// / KEYWORD_return (Expr !ExprSuffix / !SinglePtrTypeStart) /// / BlockLabel? LoopExpr /// / Block /// / CurlySuffixExpr @@ -2042,10 +2064,6 @@ fn parsePrimaryExpr(p: *Parse) !?Node.Index { p.tok_i += 2; return try p.parseWhileExpr(); }, - .l_brace => { - p.tok_i += 2; - return try p.parseBlock(); - }, else => return try p.parseCurlySuffixExpr(), } } else { @@ -2067,12 +2085,12 @@ fn parsePrimaryExpr(p: *Parse) !?Node.Index { } } -/// IfExpr <- IfPrefix Expr (KEYWORD_else Payload? Expr)? +/// IfExpr <- IfPrefix Expr (KEYWORD_else Payload? Expr)? !ExprSuffix fn parseIfExpr(p: *Parse) !?Node.Index { return try p.parseIf(expectExpr); } -/// Block <- LBRACE Statement* RBRACE +/// Block <- LBRACE BlockStatement* RBRACE fn parseBlock(p: *Parse) !?Node.Index { const lbrace = p.eatToken(.l_brace) orelse return null; const scratch_top = p.scratch.items.len; @@ -2177,7 +2195,7 @@ fn forPrefix(p: *Parse) Error!usize { /// WhilePrefix <- KEYWORD_while LPAREN Expr RPAREN PtrPayload? WhileContinueExpr? /// -/// WhileExpr <- WhilePrefix Expr (KEYWORD_else Payload? Expr)? +/// WhileExpr <- WhilePrefix Expr (KEYWORD_else Payload? Expr)? !ExprSuffi fn parseWhileExpr(p: *Parse) !?Node.Index { const while_token = p.eatToken(.keyword_while) orelse return null; _ = try p.expectToken(.l_paren); @@ -2409,10 +2427,10 @@ fn parseSuffixExpr(p: *Parse) !?Node.Index { /// / FnProto /// / GroupedExpr /// / LabeledTypeExpr -/// / IDENTIFIER +/// / IDENTIFIER !(COLON LabelableExpr) /// / IfTypeExpr /// / INTEGER -/// / KEYWORD_comptime TypeExpr +/// / KEYWORD_comptime TypeExpr !ExprSuffix /// / KEYWORD_error DOT IDENTIFIER /// / KEYWORD_anyframe /// / KEYWORD_unreachable @@ -2431,7 +2449,7 @@ fn parseSuffixExpr(p: *Parse) !?Node.Index { /// /// GroupedExpr <- LPAREN Expr RPAREN /// -/// IfTypeExpr <- IfPrefix TypeExpr (KEYWORD_else Payload? TypeExpr)? +/// IfTypeExpr <- IfPrefix TypeExpr (KEYWORD_else Payload? TypeExpr)? !ExprSuffix /// /// LabeledTypeExpr /// <- BlockLabel Block @@ -2711,7 +2729,7 @@ fn expectPrimaryTypeExpr(p: *Parse) !Node.Index { /// WhilePrefix <- KEYWORD_while LPAREN Expr RPAREN PtrPayload? WhileContinueExpr? /// -/// WhileTypeExpr <- WhilePrefix TypeExpr (KEYWORD_else Payload? TypeExpr)? +/// WhileTypeExpr <- WhilePrefix TypeExpr (KEYWORD_else Payload? TypeExpr)? !ExprSuffix fn parseWhileTypeExpr(p: *Parse) !?Node.Index { const while_token = p.eatToken(.keyword_while) orelse return null; _ = try p.expectToken(.l_paren); @@ -2876,7 +2894,7 @@ fn expectAsmExpr(p: *Parse) !Node.Index { }); } -/// AsmOutputItem <- LBRACKET IDENTIFIER RBRACKET STRINGLITERAL LPAREN (MINUSRARROW TypeExpr / IDENTIFIER) RPAREN +/// AsmOutputItem <- LBRACKET IDENTIFIER RBRACKET STRINGLITERALSINGLE LPAREN (MINUSRARROW TypeExpr / IDENTIFIER) RPAREN fn parseAsmOutputItem(p: *Parse) !?Node.Index { _ = p.eatToken(.l_bracket) orelse return null; const identifier = try p.expectToken(.identifier); @@ -2902,7 +2920,7 @@ fn parseAsmOutputItem(p: *Parse) !?Node.Index { }); } -/// AsmInputItem <- LBRACKET IDENTIFIER RBRACKET STRINGLITERAL LPAREN Expr RPAREN +/// AsmInputItem <- LBRACKET IDENTIFIER RBRACKET STRINGLITERALSINGLE LPAREN Expr RPAREN fn parseAsmInputItem(p: *Parse) !?Node.Index { _ = p.eatToken(.l_bracket) orelse return null; const identifier = try p.expectToken(.identifier); @@ -2923,9 +2941,7 @@ fn parseAsmInputItem(p: *Parse) !?Node.Index { /// BreakLabel <- COLON IDENTIFIER fn parseBreakLabel(p: *Parse) Error!OptionalTokenIndex { - _ = p.eatToken(.colon) orelse return .none; - const next_token = try p.expectToken(.identifier); - return .fromToken(next_token); + return if (p.eatTokens(&.{ .colon, .identifier })) |i| .fromToken(i + 1) else .none; } /// BlockLabel <- IDENTIFIER COLON @@ -2950,12 +2966,7 @@ fn expectFieldInit(p: *Parse) !Node.Index { /// WhileContinueExpr <- COLON LPAREN AssignExpr RPAREN fn parseWhileContinueExpr(p: *Parse) !?Node.Index { - _ = p.eatToken(.colon) orelse { - if (p.tokenTag(p.tok_i) == .l_paren and - p.tokensOnSameLine(p.tok_i - 1, p.tok_i)) - return p.fail(.expected_continue_expr); - return null; - }; + _ = p.eatToken(.colon) orelse return null; _ = try p.expectToken(.l_paren); const node = try p.parseAssignExpr() orelse return p.fail(.expected_expr_or_assignment); _ = try p.expectToken(.r_paren); @@ -2993,9 +3004,7 @@ fn parseAddrSpace(p: *Parse) !?Node.Index { /// such as in the case of anytype and `...`. Caller must look for rparen to find /// out when there are no more param decls left. /// -/// ParamDecl -/// <- doc_comment? (KEYWORD_noalias / KEYWORD_comptime)? (IDENTIFIER COLON)? ParamType -/// / DOT3 +/// ParamDecl <- doc_comment? (KEYWORD_noalias / KEYWORD_comptime / !KEYWORD_comptime) (IDENTIFIER COLON / !(IDENTIFIER_COLON)) ParamType /// /// ParamType /// <- KEYWORD_anytype @@ -3482,7 +3491,7 @@ fn parseSwitchProngList(p: *Parse) !Node.SubRange { return p.listToSpan(p.scratch.items[scratch_top..]); } -/// ParamDeclList <- (ParamDecl COMMA)* ParamDecl? +/// ParamDeclList <- (ParamDecl COMMA)* (ParamDecl / DOT3 COMMA?)? fn parseParamDeclList(p: *Parse) !SmallSpan { _ = try p.expectToken(.l_paren); const scratch_top = p.scratch.items.len; @@ -3604,9 +3613,9 @@ fn parseIf(p: *Parse, comptime bodyParseFn: fn (p: *Parse) Error!Node.Index) !?N }); } -/// ForExpr <- ForPrefix Expr (KEYWORD_else Expr)? +/// ForExpr <- ForPrefix Expr (KEYWORD_else Expr / !KEYWORD_else) !ExprSuffix /// -/// ForTypeExpr <- ForPrefix TypeExpr (KEYWORD_else TypeExpr)? +/// ForTypeExpr <- ForPrefix TypeExpr (KEYWORD_else TypeExpr / !KEYWORD_else) !ExprSuffix fn parseFor(p: *Parse, comptime bodyParseFn: fn (p: *Parse) Error!Node.Index) !?Node.Index { const for_token = p.eatToken(.keyword_for) orelse return null; diff --git a/lib/std/zig/parser_test.zig b/lib/std/zig/parser_test.zig index b98207f904..3fa3064459 100644 --- a/lib/std/zig/parser_test.zig +++ b/lib/std/zig/parser_test.zig @@ -5472,17 +5472,11 @@ test "zig fmt: while continue expr" { \\ while (i > 0) \\ (i * 2); \\} + \\T: (while (true) ({ + \\ break usize; + \\})), \\ ); - try testError( - \\test { - \\ while (i > 0) (i -= 1) { - \\ print("test123", .{}); - \\ } - \\} - , &[_]Error{ - .expected_continue_expr, - }); } test "zig fmt: canonicalize symbols (simple)" { @@ -6838,6 +6832,33 @@ test "zig fmt: error set with extra newline before comma" { ); } +test "zig fmt: extern container in tuple" { + try testCanonical( + \\const T = struct { + \\ extern struct {}, + \\ extern union {}, + \\ extern enum {}, + \\}; + \\ + ); +} + +test "zig fmt: break followed by colon" { + try testCanonical( + \\const a = [if (cond) len else break:0]u8; + \\ + ); +} + +test "zig fmt: array init of labeled block" { + try testCanonical( + \\const a = blk: { + \\ break :blk T; + \\}{ .a = false }; + \\ + ); +} + test "recovery: top level" { try testError( \\test "" {inline} diff --git a/lib/std/zig/tokenizer.zig b/lib/std/zig/tokenizer.zig index c296b6f533..cd74a66ce5 100644 --- a/lib/std/zig/tokenizer.zig +++ b/lib/std/zig/tokenizer.zig @@ -313,7 +313,8 @@ pub const Token = struct { return tag.lexeme() orelse switch (tag) { .invalid => "invalid token", .identifier => "an identifier", - .string_literal, .multiline_string_literal_line => "a string literal", + .string_literal => "a string literal", + .multiline_string_literal_line => "a multiline string literal", .char_literal => "a character literal", .eof => "EOF", .builtin => "a builtin function", From 23db3090436c6f8391474342cda6e73ed3c2b3d4 Mon Sep 17 00:00:00 2001 From: Kendall Condon Date: Sun, 22 Mar 2026 17:19:41 -0400 Subject: [PATCH 04/11] zig fmt: use AstSmith in fuzz test --- lib/std/zig/parser_test.zig | 207 ++++++++++++++++++------------------ 1 file changed, 105 insertions(+), 102 deletions(-) diff --git a/lib/std/zig/parser_test.zig b/lib/std/zig/parser_test.zig index 3fa3064459..ca49aa2b13 100644 --- a/lib/std/zig/parser_test.zig +++ b/lib/std/zig/parser_test.zig @@ -7279,83 +7279,61 @@ test "zig fmt: fuzz" { try std.testing.fuzz({}, fuzzRender, .{}); } -fn parseTokens( - fba: Allocator, - source: [:0]const u8, -) error{ SkipZigTest, OutOfMemory }!struct { - toks: std.zig.Ast.TokenList, - maybe_rewritable: bool, - skip_idempotency: bool, -} { +fn isRewritable(source: []const u8, tokens: std.zig.Ast.TokenList.Slice) !bool { @disableInstrumentation(); + // Byte-order marker is stripped var maybe_rewritable = std.mem.startsWith(u8, source, "\xEF\xBB\xBF"); - var skip_idempotency = false; // This should be able to be removed once all the bugs are fixed + // The above variable can not yet be replaced by returns since error.SkipZigTest still needs to + // be checked for. - var tokens: std.zig.Ast.TokenList = .{}; - try tokens.ensureTotalCapacity(fba, source.len / 2); - var tokenizer: std.zig.Tokenizer = .init(source); - while (true) { - const tok = tokenizer.next(); - switch (tok.tag) { - .invalid, - .invalid_periodasterisks, - => return error.SkipZigTest, - // Extra colons can be removed - .keyword_asm, - // Qualifiers can be reordered - // keyword_const is intentionally excluded since it is used in other contexts and - // having only one qualifier will never lead to reordering. - .keyword_addrspace, - .keyword_align, - .keyword_allowzero, - .keyword_callconv, - .keyword_linksection, - .keyword_volatile, - => maybe_rewritable = true, - .builtin, - // Pointer casts can be reordered - => for ([_][]const u8{ - "ptrCast", - "alignCast", - "addrSpaceCast", - "constCast", - "volatileCast", - }) |id| { - if (std.mem.eql(u8, source[tok.loc.start + 1 .. tok.loc.end], id)) { - maybe_rewritable = false; - } - }, - // Quoted identifiers can be unquoted - .identifier => maybe_rewritable = maybe_rewritable or source[tok.loc.start] == '@', - else => {}, - // #23754 - .container_doc_comment, - => if (std.mem.endsWith(Token.Tag, tokens.items(.tag), &.{.l_brace})) { - return error.SkipZigTest; - }, - // #24507 - .keyword_inline, - .keyword_for, - .keyword_while, - .l_brace, - => if (std.mem.endsWith(Token.Tag, tokens.items(.tag), &.{ .identifier, .colon })) { + for (0.., tokens.items(.tag), tokens.items(.start)) |i, tag, start| switch (tag) { + // Extra colons can be removed + .keyword_asm, + // Qualifiers can be reordered + // keyword_const is intentionally excluded since it is used in other contexts and + // having only one qualifier will never lead to reordering. + .keyword_addrspace, + .keyword_align, + .keyword_allowzero, + .keyword_callconv, + .keyword_linksection, + .keyword_volatile, + => maybe_rewritable = true, + .builtin, + // Pointer casts can be reordered + => for ([_][]const u8{ + "ptrCast", + "alignCast", + "addrSpaceCast", + "constCast", + "volatileCast", + }) |id| { + if (std.mem.startsWith(u8, source[start + 1 ..], id)) { maybe_rewritable = true; - skip_idempotency = true; - }, - } - try tokens.append(fba, .{ - .tag = tok.tag, - .start = @intCast(tok.loc.start), - }); - if (tok.tag == .eof) - break; - } - return .{ - .toks = tokens, - .maybe_rewritable = maybe_rewritable, - .skip_idempotency = skip_idempotency, + } + }, + // Quoted identifiers can be unquoted + .identifier => if (source[start] == '@') { + maybe_rewritable = true; + }, + else => {}, + // #23754 + .container_doc_comment, + => if (std.mem.endsWith(Token.Tag, tokens.items(.tag)[0..i], &.{.l_brace})) { + return error.SkipZigTest; // Can cause I.B. + }, + // #24507 + .keyword_inline, + .keyword_for, + .keyword_while, + .l_brace, + => if (std.mem.endsWith(Token.Tag, tokens.items(.tag)[0..i], &.{ .identifier, .colon })) { + return error.SkipZigTest; // Can cause I.B. due to double rendering of zig fmt on/off + }, }; + + return maybe_rewritable; } /// Checks equivelence of non-whitespace characters. @@ -7468,34 +7446,29 @@ fn reparseTokens( fn fuzzRender(_: void, smith: *std.testing.Smith) !void { @disableInstrumentation(); - var src_buf: [512]u8 = undefined; - const src_len = smith.sliceWeighted(&src_buf, &.{ - .rangeLessThan(u32, 0, 32, 256), - .rangeLessThan(u32, 32, 64, 64), - .rangeLessThan(u32, 64, src_buf.len, 1), - }, &.{ - .rangeAtMost(u8, 0x20, 0x7e, 8), - .value(u8, '\n', 32), - .value(u8, '\t', 8), - .value(u8, '\r', 4), - .rangeAtMost(u8, 0x7f, 0xff, 1), - }); - src_buf[src_len] = 0; - + var ast_smith: std.zig.AstSmith = .init(smith); + try ast_smith.generateSource(); var fba_ctx = std.heap.FixedBufferAllocator.init(&fixed_buffer_mem); - fuzzRenderInner(src_buf[0..src_len :0], fba_ctx.allocator()) catch |e| return switch (e) { - error.OutOfMemory => {}, - else => e, + var opt_rendered: ?[]const u8 = null; + fuzzRenderInner(&ast_smith, fba_ctx.allocator(), &opt_rendered) catch |e| switch (e) { + error.SkipZigTest, error.OutOfMemory, error.WriteFailed => return error.SkipZigTest, + else => |failure| { + ast_smith.logSource(); + if (opt_rendered) |rendered| { + logRenderedSource(rendered); + } + return failure; + }, }; } -fn fuzzRenderInner(source: [:0]const u8, fba: Allocator) !void { +fn fuzzRenderInner(ast_smith: *std.zig.AstSmith, fba: Allocator, opt_rendered: *?[]const u8) !void { @disableInstrumentation(); - const src_toks = try parseTokens(fba, source); - const src_tree = try std.zig.Ast.parseTokens(fba, source, src_toks.toks.slice(), .zig); - if (src_tree.errors.len != 0) - return; + const source = ast_smith.source(); + const src_rewritable = try isRewritable(source, ast_smith.tokens()); + const src_tree = try std.zig.Ast.parseTokens(fba, source, ast_smith.tokens(), .zig); + std.debug.assert(src_tree.errors.len == 0); for (src_tree.nodes.items(.tag)) |tag| switch (tag) { // #24507 (`switch(x) { inline for (a) |a| a => {} }` to // `switch(x) { { inline for (a) |a| a => {} }` since @@ -7511,15 +7484,16 @@ fn fuzzRenderInner(source: [:0]const u8, fba: Allocator) !void { // list to save space which is useless for fixed buffer allocators. try rendered_w.writer.writeByte(0); const rendered = rendered_w.written()[0 .. rendered_w.written().len - 1 :0]; + opt_rendered.* = rendered; // First check that the non-whitespace characters match. This ensures that // identifier names, numbers, comments, et cetera are preserved. - if (!src_toks.maybe_rewritable and isRewritten(source, rendered)) + if (!src_rewritable and isRewritten(source, rendered)) return error.Rewritten; // Next check that the tokens are the same since whitespace removal can change the tokens - const src_tags = src_toks.toks.items(.tag); + const src_tags = ast_smith.tokens().items(.tag); const rendered_toks = try reparseTokens(fba, rendered, src_tags[0 .. src_tags.len - 1 :.eof]); - if (!src_toks.maybe_rewritable and rendered_toks.rewritten) + if (!src_rewritable and rendered_toks.rewritten) return error.Rewritten; // Rerender the tree to check idempotency and that new commas @@ -7527,10 +7501,39 @@ fn fuzzRenderInner(source: [:0]const u8, fba: Allocator) !void { const rendered_tree = try std.zig.Ast.parseTokens(fba, rendered, rendered_toks.toks.slice(), .zig); if (rendered_tree.errors.len != 0) return error.Rewritten; - if (!src_toks.skip_idempotency) { - var rerendered_w: std.Io.Writer.Allocating = .init(fba); - try rerendered_w.ensureUnusedCapacity(source.len); - try rendered_tree.render(fba, &rerendered_w.writer, .{}); - try std.testing.expectEqualStrings(rendered, rerendered_w.written()); - } + var rerendered_w: std.Io.Writer.Allocating = .init(fba); + try rerendered_w.ensureUnusedCapacity(source.len); + try rendered_tree.render(fba, &rerendered_w.writer, .{}); + try std.testing.expectEqualStrings(rendered, rerendered_w.written()); +} + +fn logRenderedSource(source: []const u8) void { + var buf: [256]u8 = undefined; + const ls = std.debug.lockStderr(&buf); + defer std.debug.unlockStderr(); + logRenderedSourceInner(source, ls.terminal()) catch {}; +} + +fn logRenderedSourceInner(source: []const u8, t: std.Io.Terminal) std.Io.Writer.Error!void { + const w = t.writer; + + t.setColor(.dim) catch {}; + try w.writeAll("=== Rendered Source ===\n"); + t.setColor(.reset) catch {}; + + for (0.., source) |i, c| switch (c) { + ' '...0x7e => try w.writeByte(c), + '\n' => { + if (i != 0 and source[i - 1] == ' ') { + try w.writeAll("⏎"); + } + try w.writeByte('\n'); + }, + else => { + t.setColor(.cyan) catch {}; + try w.print("\\x{x:0>2}", .{c}); + t.setColor(.reset) catch {}; + }, + }; + try w.writeAll("␃\n"); } From d70a9ea5d759fd5e61c8cd7404343c34280fe87e Mon Sep 17 00:00:00 2001 From: Kendall Condon Date: Sun, 22 Mar 2026 17:26:40 -0400 Subject: [PATCH 05/11] zig fmt: properly revert nested asm indentation --- lib/std/zig/Ast/Render.zig | 4 +++- lib/std/zig/parser_test.zig | 12 ++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/lib/std/zig/Ast/Render.zig b/lib/std/zig/Ast/Render.zig index d61707b36d..19abada490 100644 --- a/lib/std/zig/Ast/Render.zig +++ b/lib/std/zig/Ast/Render.zig @@ -2775,6 +2775,8 @@ fn renderAsm( ais.forceLastIndent(); // Might have been dedented by a multiline string literal assert(ais.current_line_empty); + const prev_indent_delta = ais.indent_delta; // May be part of another asm expression + // so indent_delta can't be unconditionally used ais.setIndentDelta(asm_indent_delta); const colon1 = tree.lastToken(asm_node.ast.template) + 1; @@ -2856,7 +2858,7 @@ fn renderAsm( const clobbers = asm_node.ast.clobbers.unwrap().?; try renderExpression(r, clobbers, .none); ais.forceLastIndent(); // Might have been dedented by a multiline string literal - ais.setIndentDelta(indent_delta); + ais.setIndentDelta(prev_indent_delta); ais.popIndent(); return renderToken(r, asm_node.ast.rparen, space); // rparen } diff --git a/lib/std/zig/parser_test.zig b/lib/std/zig/parser_test.zig index ca49aa2b13..bc9256f1dd 100644 --- a/lib/std/zig/parser_test.zig +++ b/lib/std/zig/parser_test.zig @@ -6859,6 +6859,18 @@ test "zig fmt: array init of labeled block" { ); } +test "zig fmt: nested asm indentation" { + try testCanonical( + \\const A = asm ("" + \\ : [_] "" (_), + \\ : + \\ : asm ("" + \\ : [_] "" (_), + \\ )); + \\ + ); +} + test "recovery: top level" { try testError( \\test "" {inline} From ed3ca0f570e052adbed291a79e9a11bba5d3f7d0 Mon Sep 17 00:00:00 2001 From: Kendall Condon Date: Sun, 22 Mar 2026 17:29:03 -0400 Subject: [PATCH 06/11] zig fmt: call ais.resetLine after "zig fmt: on" --- lib/std/zig/Ast/Render.zig | 1 + lib/std/zig/parser_test.zig | 10 ++++++++++ 2 files changed, 11 insertions(+) diff --git a/lib/std/zig/Ast/Render.zig b/lib/std/zig/Ast/Render.zig index 19abada490..7d367805d4 100644 --- a/lib/std/zig/Ast/Render.zig +++ b/lib/std/zig/Ast/Render.zig @@ -3326,6 +3326,7 @@ fn renderComments(r: *Render, start: usize, end: usize) Error!bool { // Write with the canonical single space. try ais.underlying_writer.writeAll("// zig fmt: on\n"); ais.disabled_offset = null; + ais.resetLine(); } else if (ais.disabled_offset == null and mem.eql(u8, comment_content, "zig fmt: off")) { // Write with the canonical single space. try ais.writeAll("// zig fmt: off\n"); diff --git a/lib/std/zig/parser_test.zig b/lib/std/zig/parser_test.zig index bc9256f1dd..7a6dc3f2f2 100644 --- a/lib/std/zig/parser_test.zig +++ b/lib/std/zig/parser_test.zig @@ -6871,6 +6871,16 @@ test "zig fmt: nested asm indentation" { ); } +test "zig fmt: asm with zig fmt on" { + try testCanonical( + \\// zig fmt: off + \\const A = asm("a" // zig fmt: on + \\ : [_] "" (_), + \\); + \\ + ); +} + test "recovery: top level" { try testError( \\test "" {inline} From c818a1e61f702e1990af3e0773b3954d0e29d0ca Mon Sep 17 00:00:00 2001 From: Kendall Condon Date: Sun, 22 Mar 2026 17:31:02 -0400 Subject: [PATCH 07/11] zig fmt: handle `skip` space for multiline strings --- lib/std/zig/Ast/Render.zig | 9 ++++++++- lib/std/zig/parser_test.zig | 11 +++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/lib/std/zig/Ast/Render.zig b/lib/std/zig/Ast/Render.zig index 7d367805d4..8b2c284256 100644 --- a/lib/std/zig/Ast/Render.zig +++ b/lib/std/zig/Ast/Render.zig @@ -338,9 +338,16 @@ fn renderExpression(r: *Render, node: Ast.Node.Index, space: Space) Error!void { try ais.maybeInsertNewline(); const first_tok, const last_tok = tree.nodeData(node).token_and_token; - for (first_tok..last_tok + 1) |i| { + for (first_tok..last_tok) |i| { try renderToken(r, @intCast(i), .newline); } + if (space != .skip) { + try renderToken(r, last_tok, .newline); + } else { + try renderToken(r, last_tok, .skip); + try ais.insertNewline(); // A newline is part of the token, so it still needs + // rendered here. + } const next_token = last_tok + 1; const next_token_tag = tree.tokenTag(next_token); diff --git a/lib/std/zig/parser_test.zig b/lib/std/zig/parser_test.zig index 7a6dc3f2f2..8d1863b0b0 100644 --- a/lib/std/zig/parser_test.zig +++ b/lib/std/zig/parser_test.zig @@ -6881,6 +6881,17 @@ test "zig fmt: asm with zig fmt on" { ); } +test "zig fmt: array init with multiline string literal with fmt on/off" { + try testCanonical( + \\const array = .{ + \\ \\ + \\ // zig fmt: on + \\ // zig fmt: off + \\}; + \\ + ); +} + test "recovery: top level" { try testError( \\test "" {inline} From ab237855b8df3f91f232c7e63eba7d21f166f75f Mon Sep 17 00:00:00 2001 From: Kendall Condon Date: Sun, 22 Mar 2026 17:34:56 -0400 Subject: [PATCH 08/11] zig fmt: render asm colons with trailing comments Previously, the comments would be lost with the colons. This required a substantial rewrite of renderAsm to determine how many colons should be rendered. --- lib/std/zig/Ast/Render.zig | 154 ++++++++++++++++++++---------------- lib/std/zig/parser_test.zig | 17 ++++ 2 files changed, 102 insertions(+), 69 deletions(-) diff --git a/lib/std/zig/Ast/Render.zig b/lib/std/zig/Ast/Render.zig index 8b2c284256..41cd1e2260 100644 --- a/lib/std/zig/Ast/Render.zig +++ b/lib/std/zig/Ast/Render.zig @@ -2756,8 +2756,47 @@ fn renderAsm( try renderToken(r, asm_node.ast.asm_token + 1, .none); // lparen } + const render_colons: [3]?Ast.TokenIndex = colons: { + var colons: [3]Ast.TokenIndex = undefined; + var render: u2 = 0; + + const rparen = asm_node.ast.rparen; + filled: { + colons[0] = tree.lastToken(asm_node.ast.template) + 1; + if (colons[0] == rparen) break :filled; + + if (asm_node.outputs.len != 0) { + colons[1] = tree.lastToken(asm_node.outputs[asm_node.outputs.len - 1]) + 1; + colons[1] += @intFromBool(tree.tokenTag(colons[1]) == .comma); + render = 1; + } else { + colons[1] = colons[0] + 1; + if (hasComment(tree, colons[0], colons[1])) render = 1; + } + if (colons[1] == rparen) break :filled; + + // Next colon is not checked for here since it cannot present without clobbers + if (asm_node.inputs.len != 0) { + render = 2; + } else { + const colon_or_rparen = colons[1] + 1; + if (hasComment(tree, colons[1], colon_or_rparen)) render = 2; + } + + if (asm_node.ast.clobbers.unwrap()) |clobbers| { + colons[2] = tree.firstToken(clobbers) - 1; + render = 3; + } + } + + var opt_colons: [3]?Ast.TokenIndex = @splat(null); + for (0..render) |i| opt_colons[i] = colons[i]; + break :colons opt_colons; + }; + + try ais.forcePushIndent(.normal); + if (asm_node.ast.items.len == 0) { - try ais.forcePushIndent(.normal); if (asm_node.ast.clobbers.unwrap()) |clobbers| { // asm ("foo" ::: clobbers) try renderExpression(r, asm_node.ast.template, .space); @@ -2771,13 +2810,14 @@ fn renderAsm( return renderToken(r, asm_node.ast.rparen, space); // rparen } - // asm ("foo") - try renderExpression(r, asm_node.ast.template, .none); - ais.popIndent(); - return renderToken(r, asm_node.ast.rparen, space); // rparen + if (render_colons[0] == null) { + // asm ("foo") + try renderExpression(r, asm_node.ast.template, .none); + ais.popIndent(); + return renderToken(r, asm_node.ast.rparen, space); // rparen + } } - try ais.forcePushIndent(.normal); try renderExpression(r, asm_node.ast.template, .newline); ais.forceLastIndent(); // Might have been dedented by a multiline string literal assert(ais.current_line_empty); @@ -2785,86 +2825,62 @@ fn renderAsm( const prev_indent_delta = ais.indent_delta; // May be part of another asm expression // so indent_delta can't be unconditionally used ais.setIndentDelta(asm_indent_delta); - const colon1 = tree.lastToken(asm_node.ast.template) + 1; - const colon2 = if (asm_node.outputs.len == 0) colon2: { - try renderToken(r, colon1, .newline); // : - break :colon2 colon1 + 1; - } else colon2: { - try renderToken(r, colon1, .space); // : + rendered: { + if (render_colons[0]) |colon1| { + if (asm_node.outputs.len != 0) { + try renderToken(r, colon1, .space); + try ais.forcePushIndent(.normal); - try ais.forcePushIndent(.normal); - for (asm_node.outputs, 0..) |asm_output, i| { - if (i + 1 < asm_node.outputs.len) { - const next_asm_output = asm_node.outputs[i + 1]; - try renderAsmOutput(r, asm_output, .none); + const final = asm_node.outputs.len - 1; + for (asm_node.outputs[0..final], 0..) |asm_output, i| { + try renderAsmOutput(r, asm_output, .none); + + const next_start = tree.firstToken(asm_node.outputs[i + 1]); + try renderToken(r, next_start - 1, .newline); // , + try renderExtraNewlineToken(r, next_start); + } - const comma = tree.firstToken(next_asm_output) - 1; - try renderToken(r, comma, .newline); // , - try renderExtraNewlineToken(r, tree.firstToken(next_asm_output)); - } else if (asm_node.inputs.len == 0 and asm_node.ast.clobbers == .none) { try ais.pushSpace(.comma); - try renderAsmOutput(r, asm_output, .comma); + try renderAsmOutput(r, asm_node.outputs[final], .comma); ais.popSpace(); ais.popIndent(); - ais.setIndentDelta(indent_delta); - ais.popIndent(); - return renderToken(r, asm_node.ast.rparen, space); // rparen } else { - try ais.pushSpace(.comma); - try renderAsmOutput(r, asm_output, .comma); - ais.popSpace(); - const comma_or_colon = tree.lastToken(asm_output) + 1; - ais.popIndent(); - break :colon2 switch (tree.tokenTag(comma_or_colon)) { - .comma => comma_or_colon + 1, - else => comma_or_colon, - }; + try renderToken(r, colon1, .newline); } } else unreachable; - }; - const colon3 = if (asm_node.inputs.len == 0) colon3: { - try renderToken(r, colon2, .newline); // : - break :colon3 colon2 + 1; - } else colon3: { - try renderToken(r, colon2, .space); // : - try ais.forcePushIndent(.normal); - for (asm_node.inputs, 0..) |asm_input, i| { - if (i + 1 < asm_node.inputs.len) { - const next_asm_input = asm_node.inputs[i + 1]; - try renderAsmInput(r, asm_input, .none); + if (render_colons[1]) |colon2| { + if (asm_node.inputs.len != 0) { + try renderToken(r, colon2, .space); + try ais.forcePushIndent(.normal); + + const final = asm_node.inputs.len - 1; + for (asm_node.inputs[0..final], 0..) |asm_input, i| { + try renderAsmInput(r, asm_input, .none); + + const next_start = tree.firstToken(asm_node.inputs[i + 1]); + try renderToken(r, next_start - 1, .newline); // , + try renderExtraNewlineToken(r, next_start); + } - const first_token = tree.firstToken(next_asm_input); - try renderToken(r, first_token - 1, .newline); // , - try renderExtraNewlineToken(r, first_token); - } else if (asm_node.ast.clobbers == .none) { try ais.pushSpace(.comma); - try renderAsmInput(r, asm_input, .comma); + try renderAsmInput(r, asm_node.inputs[final], .comma); ais.popSpace(); ais.popIndent(); - ais.setIndentDelta(indent_delta); - ais.popIndent(); - return renderToken(r, asm_node.ast.rparen, space); // rparen } else { - try ais.pushSpace(.comma); - try renderAsmInput(r, asm_input, .comma); - ais.popSpace(); - const comma_or_colon = tree.lastToken(asm_input) + 1; - ais.popIndent(); - break :colon3 switch (tree.tokenTag(comma_or_colon)) { - .comma => comma_or_colon + 1, - else => comma_or_colon, - }; + try renderToken(r, colon2, .newline); } - } - unreachable; - }; + } else break :rendered; + + if (render_colons[2]) |colon3| { + const clobbers = asm_node.ast.clobbers.unwrap().?; + try renderToken(r, colon3, .maybe_space); + try renderExpression(r, clobbers, .none); + ais.forceLastIndent(); // Might have been dedented by a multiline string literal + } + } - try renderToken(r, colon3, .maybe_space); // : - const clobbers = asm_node.ast.clobbers.unwrap().?; - try renderExpression(r, clobbers, .none); - ais.forceLastIndent(); // Might have been dedented by a multiline string literal ais.setIndentDelta(prev_indent_delta); ais.popIndent(); return renderToken(r, asm_node.ast.rparen, space); // rparen diff --git a/lib/std/zig/parser_test.zig b/lib/std/zig/parser_test.zig index 8d1863b0b0..dd8b199dc2 100644 --- a/lib/std/zig/parser_test.zig +++ b/lib/std/zig/parser_test.zig @@ -6892,6 +6892,23 @@ test "zig fmt: array init with multiline string literal with fmt on/off" { ); } +test "zig fmt: render extra colons with comments" { + try testCanonical( + \\const a = asm ("" + \\ : // testing + \\); + \\const b = asm ("" + \\ : // testing + \\ : // testing + \\); + \\const c = asm ("" + \\ : + \\ : // testing + \\); + \\ + ); +} + test "recovery: top level" { try testError( \\test "" {inline} From 92915a42b55f2916f36ed6b1a2d51363331f47f8 Mon Sep 17 00:00:00 2001 From: Kendall Condon Date: Sun, 22 Mar 2026 17:39:43 -0400 Subject: [PATCH 09/11] zig fmt: do not reorder pointer casts with comments This was especially bad as it could crash or duplicate them due to them being moved around "zig fmt: on/off". As a bonus this fixes a crash from reordering at the start of the file. --- lib/std/zig/Ast/Render.zig | 37 +++++++++++++++++++++++++------------ lib/std/zig/parser_test.zig | 21 +++++++++++++++++++++ 2 files changed, 46 insertions(+), 12 deletions(-) diff --git a/lib/std/zig/Ast/Render.zig b/lib/std/zig/Ast/Render.zig index 41cd1e2260..3a420cd3b2 100644 --- a/lib/std/zig/Ast/Render.zig +++ b/lib/std/zig/Ast/Render.zig @@ -800,34 +800,47 @@ fn renderExpression(r: *Render, node: Ast.Node.Index, space: Space) Error!void { canonicalize: { if (params.len != 1) break :canonicalize; - const CastKind = enum { + const CastKind = enum(u8) { ptrCast, alignCast, addrSpaceCast, constCast, volatileCast, }; - const kind = meta.stringToEnum(CastKind, tree.tokenSlice(builtin_token)[1..]) orelse break :canonicalize; + const kind = meta.stringToEnum( + CastKind, + tree.tokenSlice(builtin_token)[1..], + ) orelse break :canonicalize; var cast_map = std.EnumMap(CastKind, Ast.TokenIndex).init(.{}); cast_map.put(kind, builtin_token); var casts_before: usize = 0; - if (builtin_token >= 2) { - var prev_builtin_token = builtin_token - 2; - while (tree.tokenTag(prev_builtin_token) == .builtin) : (prev_builtin_token -= 2) { - const prev_kind = meta.stringToEnum(CastKind, tree.tokenSlice(prev_builtin_token)[1..]) orelse break; - if (cast_map.contains(prev_kind)) break :canonicalize; - cast_map.put(prev_kind, prev_builtin_token); - casts_before += 1; - } + var prev_builtin_token = builtin_token; + while (prev_builtin_token >= 2) { + prev_builtin_token -= 2; + if (tree.tokenTag(prev_builtin_token) != .builtin) break; + const builtin_name = tree.tokenSlice(prev_builtin_token)[1..]; + const prev_kind = meta.stringToEnum(CastKind, builtin_name) orelse break; + if (cast_map.contains(prev_kind)) break :canonicalize; + // This must be checked after so that cast builtins as arguments to other + // builtins containing comments are reordered. + if (hasComment(tree, prev_builtin_token, prev_builtin_token + 2)) + break :canonicalize; + cast_map.put(prev_kind, prev_builtin_token); + casts_before += 1; } var next_builtin_token = builtin_token + 2; - while (tree.tokenTag(next_builtin_token) == .builtin) : (next_builtin_token += 2) { - const next_kind = meta.stringToEnum(CastKind, tree.tokenSlice(next_builtin_token)[1..]) orelse break; + while (true) { + if (hasComment(tree, next_builtin_token - 2, next_builtin_token)) + break :canonicalize; + if (tree.tokenTag(next_builtin_token) != .builtin) break; + const builtin_name = tree.tokenSlice(next_builtin_token)[1..]; + const next_kind = meta.stringToEnum(CastKind, builtin_name) orelse break; if (cast_map.contains(next_kind)) break :canonicalize; cast_map.put(next_kind, next_builtin_token); + next_builtin_token += 2; } var it = cast_map.iterator(); diff --git a/lib/std/zig/parser_test.zig b/lib/std/zig/parser_test.zig index dd8b199dc2..f44e9ae544 100644 --- a/lib/std/zig/parser_test.zig +++ b/lib/std/zig/parser_test.zig @@ -6133,6 +6133,16 @@ test "zig fmt: do not canonicalize invalid cast builtins" { ); } +test "zig fmt: canonicalize cast builtins at file start" { + try testTransform( + \\@alignCast(@ptrCast(a)), + \\ + , + \\@ptrCast(@alignCast(a)), + \\ + ); +} + test "zig fmt: extern addrspace in struct" { try testCanonical( \\const namespace = struct { @@ -6909,6 +6919,17 @@ test "zig fmt: render extra colons with comments" { ); } +test "zig fmt: cast builtins are not reordered with comments" { + try testCanonical( + \\const a = @volatileCast(@constCast( // ... + \\ @alignCast(@ptrCast(a)))); + \\ + \\const b = @alignCast(@ptrCast( // zig fmt: off + \\ c)); + \\ + ); +} + test "recovery: top level" { try testError( \\test "" {inline} From 09e523bd51185ae8d71c347ce606e3b74dea1d5b Mon Sep 17 00:00:00 2001 From: Kendall Condon Date: Sun, 22 Mar 2026 17:42:57 -0400 Subject: [PATCH 10/11] zig fmt: fix overindent tracking in sub-renders This problem also affected determining if an expression became multiline as that depends on if the line is overindented. As such, `becomesMultilineExpr` has been replaced by `rendersMultiline` which constructs a temporary writer which returns `error.WriteFailed` when newlines are written. This new approach also has the advantage of being more maintainable. --- lib/std/zig/Ast/Render.zig | 439 ++++++------------------------------ lib/std/zig/parser_test.zig | 29 +++ 2 files changed, 93 insertions(+), 375 deletions(-) diff --git a/lib/std/zig/Ast/Render.zig b/lib/std/zig/Ast/Render.zig index 3a420cd3b2..f2efd5beb9 100644 --- a/lib/std/zig/Ast/Render.zig +++ b/lib/std/zig/Ast/Render.zig @@ -652,10 +652,12 @@ fn renderExpression(r: *Render, node: Ast.Node.Index, space: Space) Error!void { const lhs, const rhs = tree.nodeData(node).node_and_node; const lbracket = tree.firstToken(rhs) - 1; const rbracket = tree.lastToken(rhs) + 1; - const one_line = tree.tokensOnSameLine(lbracket, rbracket) and - !becomesMultilineExpr(tree, rhs); - const inner_space = if (one_line) Space.none else Space.newline; try renderExpression(r, lhs, .none); + // One lien check must come after rendering lhs since it can influence + // isLineOverIndented + const one_line = tree.tokensOnSameLine(lbracket, rbracket) and + !try rendersMultiline(r, rhs); + const inner_space = if (one_line) Space.none else Space.newline; try ais.pushIndent(.normal); try renderToken(r, lbracket, inner_space); // [ try renderExpression(r, rhs, inner_space); @@ -951,380 +953,61 @@ fn renderExpressionFixup(r: *Render, node: Ast.Node.Index, space: Space) Error!v } } -/// Same as becomesMultilineExpr, but returns false when `node == .none` -fn optBecomesMultilineExpr(tree: Ast, node: Ast.Node.OptionalIndex) bool { - return if (node.unwrap()) |payload| becomesMultilineExpr(tree, payload) else false; -} - -/// May return false if `node` is already multiline -fn becomesMultilineExpr(tree: Ast, node: Ast.Node.Index) bool { - // Conditions related to comments, doc comments, and multiline string literals are ignored - // since they always go to the end of the line, which already make them a multi-line - // expression (since they contain a newline). - switch (tree.nodeTag(node)) { - .identifier, - .number_literal, - .char_literal, - .unreachable_literal, - .anyframe_literal, - .string_literal, - .multiline_string_literal, - .error_value, - .enum_literal, - => return false, - .container_decl_trailing, - .container_decl_arg_trailing, - .container_decl_two_trailing, - .tagged_union_trailing, - .tagged_union_enum_tag_trailing, - .tagged_union_two_trailing, - .switch_comma, - .builtin_call_two_comma, - .builtin_call_comma, - .call_one_comma, - .call_comma, - .struct_init_one_comma, - .struct_init_dot_two_comma, - .struct_init_dot_comma, - .struct_init_comma, - .array_init_one_comma, - .array_init_dot_two_comma, - .array_init_dot_comma, - .array_init_comma, - // The following always have a non-zero amount of members - // which is also the condition for them to be multi-line. - .block, - .block_semicolon, - => return true, - .block_two, - .block_two_semicolon, - => return tree.nodeData(node).opt_node_and_opt_node[0] != .none, - .container_decl, - .container_decl_arg, - .container_decl_two, - .tagged_union, - .tagged_union_enum_tag, - .tagged_union_two, - => { - var buf: [2]Ast.Node.Index = undefined; - const full = tree.fullContainerDecl(&buf, node).?; - if (full.ast.arg.unwrap()) |arg| { - if (becomesMultilineExpr(tree, arg)) - return true; - } - // This does the same checks as `isOneLineContainerDecl`, however it avoids unnecessary - // checks related to comments and multiline strings, which would mean the container is - // already multiple lines. - for (full.ast.members) |member| { - if (tree.fullContainerField(member)) |field_full| { - for ([_]Ast.Node.OptionalIndex{ - field_full.ast.type_expr, - field_full.ast.align_expr, - field_full.ast.value_expr, - }) |opt_expr| { - if (opt_expr.unwrap()) |expr| { - if (becomesMultilineExpr(tree, expr)) - return true; - } - } - } else return true; - } - return false; - }, - .error_set_decl => { - const lbrace, const rbrace = tree.nodeData(node).token_and_token; - return !isOneLineErrorSetDecl(tree, lbrace, rbrace); - }, - .@"switch" => { - const op, const extra_index = tree.nodeData(node).node_and_extra; - const case_range = tree.extraData(extra_index, Ast.Node.SubRange); - return @intFromEnum(case_range.end) - @intFromEnum(case_range.start) != 0 or - becomesMultilineExpr(tree, op); - }, - .for_simple, .@"for" => { - const full = tree.fullFor(node).?; - if (becomesMultilineExpr(tree, full.ast.then_expr) or - optBecomesMultilineExpr(tree, full.ast.else_expr)) - return true; - - for (full.ast.inputs) |expr| { - if (if (tree.nodeTag(expr) == .for_range) blk: { - const lhs, const rhs = tree.nodeData(expr).node_and_opt_node; - break :blk becomesMultilineExpr(tree, lhs) or optBecomesMultilineExpr(tree, rhs); - } else becomesMultilineExpr(tree, expr)) - return true; - } - const final_input_expr = full.ast.inputs[full.ast.inputs.len - 1]; - if (tree.tokenTag(tree.lastToken(final_input_expr) + 1) == .comma) - return true; - - const token_tags = tree.tokens.items(.tag); - const payload = full.payload_token; - const pipe = std.mem.indexOfScalarPos(Token.Tag, token_tags, payload, .pipe).?; - return token_tags[@intCast(pipe - 1)] == .comma; - }, - .while_simple, - .while_cont, - .@"while", - => { - const full = tree.fullWhile(node).?; - return becomesMultilineExpr(tree, full.ast.cond_expr) or - becomesMultilineExpr(tree, full.ast.then_expr) or - optBecomesMultilineExpr(tree, full.ast.cont_expr) or - optBecomesMultilineExpr(tree, full.ast.else_expr); - }, - .if_simple, - .@"if", - => { - const full = tree.fullIf(node).?; - return becomesMultilineExpr(tree, full.ast.cond_expr) or - becomesMultilineExpr(tree, full.ast.then_expr) or - optBecomesMultilineExpr(tree, full.ast.else_expr); - }, - .fn_proto_simple, - .fn_proto_multi, - .fn_proto_one, - .fn_proto, - => { - var buf: [1]Ast.Node.Index = undefined; - const fn_proto = tree.fullFnProto(&buf, node).?; - - for ([_]Ast.Node.OptionalIndex{ - fn_proto.ast.return_type, - fn_proto.ast.align_expr, - fn_proto.ast.addrspace_expr, - fn_proto.ast.section_expr, - fn_proto.ast.callconv_expr, - }) |opt_expr| { - if (opt_expr.unwrap()) |expr| { - if (becomesMultilineExpr(tree, expr)) - return true; - } - } - for (fn_proto.ast.params) |expr| { - if (becomesMultilineExpr(tree, expr)) - return true; - } - - const lparen = fn_proto.ast.fn_token + 1; - const return_type = fn_proto.ast.return_type.unwrap().?; - const maybe_bang = tree.firstToken(return_type) - 1; - const rparen = fnProtoRparen(tree, fn_proto, maybe_bang); - return !isOneLineFnProto(tree, fn_proto, lparen, rparen); - }, - .asm_simple, - => { - const lhs = tree.nodeData(node).node_and_token[0]; - return becomesMultilineExpr(tree, lhs); - }, - .@"asm", - => { - const lhs, const extra_index = tree.nodeData(node).node_and_extra; - const asm_extra = tree.extraData(extra_index, Ast.Node.Asm); - return @intFromEnum(asm_extra.items_end) - @intFromEnum(asm_extra.items_start) != 0 or - becomesMultilineExpr(tree, lhs) or optBecomesMultilineExpr(tree, asm_extra.clobbers); - }, - .array_type, .array_type_sentinel => { - const array_type = tree.fullArrayType(node).?; - const rbracket = tree.firstToken(array_type.ast.elem_type) - 1; - return !isOneLineArrayType(tree, array_type, rbracket) or - becomesMultilineExpr(tree, array_type.ast.elem_type); - }, - .array_access => { - const lhs, const rhs = tree.nodeData(node).node_and_node; - const lbracket = tree.firstToken(rhs) - 1; - const rbracket = tree.lastToken(rhs) + 1; - return !tree.tokensOnSameLine(lbracket, rbracket) or - becomesMultilineExpr(tree, lhs) or - becomesMultilineExpr(tree, rhs); - }, - .call_one, - .call, - .builtin_call_two, - .builtin_call, - .array_init_one, - .array_init_dot_two, - .array_init_dot, - .array_init, - .struct_init_one, - .struct_init_dot_two, - .struct_init_dot, - .struct_init, - => |tag| { - var buf: [2]Ast.Node.Index = undefined; - const opt_lhs: Ast.Node.OptionalIndex, const items = switch (tag) { - .call_one, .call => blk: { - const full = tree.fullCall(buf[0..1], node).?; - break :blk .{ full.ast.fn_expr.toOptional(), full.ast.params }; - }, - .builtin_call_two, .builtin_call => .{ .none, tree.builtinCallParams(&buf, node).? }, - .array_init_one, - .array_init_dot_two, - .array_init_dot, - .array_init, - => blk: { - const full = tree.fullArrayInit(&buf, node).?; - break :blk .{ full.ast.type_expr, full.ast.elements }; - }, - .struct_init_one, - .struct_init_dot_two, - .struct_init_dot, - .struct_init, - => blk: { - const full = tree.fullStructInit(&buf, node).?; - break :blk .{ full.ast.type_expr, full.ast.fields }; - }, - else => unreachable, - }; - if (opt_lhs.unwrap()) |lhs| { - if (becomesMultilineExpr(tree, lhs)) - return true; - } - for (items) |expr| { - if (becomesMultilineExpr(tree, expr)) - return true; - } - return false; - }, - .assign_destructure => { - const full = tree.assignDestructure(node); - for (full.ast.variables) |expr| { - if (becomesMultilineExpr(tree, expr)) - return true; - } - return becomesMultilineExpr(tree, full.ast.value_expr); - }, - .ptr_type_aligned, - .ptr_type_sentinel, - .ptr_type, - .ptr_type_bit_range, - => { - const full = tree.fullPtrType(node).?; - return becomesMultilineExpr(tree, full.ast.child_type) or - optBecomesMultilineExpr(tree, full.ast.sentinel) or - optBecomesMultilineExpr(tree, full.ast.align_node) or - optBecomesMultilineExpr(tree, full.ast.addrspace_node) or - optBecomesMultilineExpr(tree, full.ast.bit_range_start) or - optBecomesMultilineExpr(tree, full.ast.bit_range_end); - }, - .slice_open, - .slice, - .slice_sentinel, - => { - const full = tree.fullSlice(node).?; - return becomesMultilineExpr(tree, full.ast.sliced) or - becomesMultilineExpr(tree, full.ast.start) or - optBecomesMultilineExpr(tree, full.ast.end) or - optBecomesMultilineExpr(tree, full.ast.sentinel); - }, - .@"comptime", - .@"nosuspend", - .@"suspend", - .@"resume", - .bit_not, - .bool_not, - .negation, - .negation_wrap, - .optional_type, - .address_of, - .deref, - .@"try", - => return becomesMultilineExpr(tree, tree.nodeData(node).node), - .@"return" => return optBecomesMultilineExpr(tree, tree.nodeData(node).opt_node), - .field_access, - .unwrap_optional, - .grouped_expression, - => return becomesMultilineExpr(tree, tree.nodeData(node).node_and_token[0]), - .add, - .add_wrap, - .add_sat, - .array_cat, - .array_mult, - .bang_equal, - .bit_and, - .bit_or, - .shl, - .shl_sat, - .shr, - .bit_xor, - .bool_and, - .bool_or, - .div, - .equal_equal, - .greater_or_equal, - .greater_than, - .less_or_equal, - .less_than, - .merge_error_sets, - .mod, - .mul, - .mul_wrap, - .mul_sat, - .sub, - .sub_wrap, - .sub_sat, - .@"orelse", - .@"catch", - .error_union, - .assign, - .assign_bit_and, - .assign_bit_or, - .assign_shl, - .assign_shl_sat, - .assign_shr, - .assign_bit_xor, - .assign_div, - .assign_sub, - .assign_sub_wrap, - .assign_sub_sat, - .assign_mod, - .assign_add, - .assign_add_wrap, - .assign_add_sat, - .assign_mul, - .assign_mul_wrap, - .assign_mul_sat, - => { - const lhs, const rhs = tree.nodeData(node).node_and_node; - return becomesMultilineExpr(tree, lhs) or becomesMultilineExpr(tree, rhs); - }, - .@"break", .@"continue" => { - const opt_expr = tree.nodeData(node).opt_token_and_opt_node[1]; - return optBecomesMultilineExpr(tree, opt_expr); - }, - .anyframe_type => return becomesMultilineExpr(tree, tree.nodeData(node).token_and_node[1]), - .@"errdefer", - .@"defer", - .for_range, - .switch_range, - .switch_case_one, - .switch_case_inline_one, - .switch_case, - .switch_case_inline, - .asm_output, - .asm_input, - .fn_decl, - .container_field, - .container_field_init, - .container_field_align, - .root, - .global_var_decl, - .local_var_decl, - .simple_var_decl, - .aligned_var_decl, - .test_decl, - => unreachable, +fn drainNoNewline(w: *Writer, data: []const []const u8, splat: usize) Writer.Error!usize { + if (std.mem.indexOfScalar(u8, w.buffered(), '\n') != null) { + return error.WriteFailed; } + + var n: usize = 0; + for (data[0 .. data.len - 1]) |v| { + if (std.mem.indexOfScalar(u8, v, '\n') != null) { + return error.WriteFailed; + } + n += v.len; + } + + const pattern = data[data.len - 1]; + if (splat != 0 and std.mem.indexOfScalar(u8, pattern, '\n') != null) { + return error.WriteFailed; + } + n += pattern.len * splat; + + w.end = 0; + return n; } -fn isOneLineArrayType( - tree: Ast, - array_type: Ast.full.ArrayType, - rbracket: Ast.TokenIndex, -) bool { - return tree.tokensOnSameLine(array_type.ast.lbracket, rbracket) and - !becomesMultilineExpr(tree, array_type.ast.elem_count) and - !optBecomesMultilineExpr(tree, array_type.ast.sentinel); +fn rendersMultiline(r: *const Render, node: Ast.Node.Index) error{OutOfMemory}!bool { + var no_nl_buf: [64]u8 = undefined; + var no_nl_w: Writer = .{ + .vtable = &.{ .drain = drainNoNewline }, + .buffer = &no_nl_buf, + }; + + if (r.ais.disabled_offset != null) return true; + var sub_ais: AutoIndentingStream = .init(r.gpa, &no_nl_w, r.ais.indent_delta); + defer sub_ais.deinit(); + // The following are needed to make sure isLineOverIndented is correct + sub_ais.indent_count = r.ais.indent_count; + sub_ais.applied_indent = r.ais.applied_indent; + sub_ais.current_line_empty = r.ais.current_line_empty; + + var sub_r: Render = .{ + .gpa = r.gpa, + .ais = &sub_ais, + .tree = r.tree, + .fixups = r.fixups, + }; + + renderExpression(&sub_r, node, .none) catch |e| return switch (e) { + error.OutOfMemory => return error.OutOfMemory, + error.WriteFailed => return true, + }; + if (sub_ais.disabled_offset != null) return true; + if (std.mem.indexOfScalar(u8, no_nl_w.buffered(), '\n') != null) { + return true; + } + + return false; } fn renderArrayType( @@ -1335,7 +1018,9 @@ fn renderArrayType( const tree = r.tree; const ais = r.ais; const rbracket = tree.firstToken(array_type.ast.elem_type) - 1; - const one_line = isOneLineArrayType(tree, array_type, rbracket); + const one_line = tree.tokensOnSameLine(array_type.ast.lbracket, rbracket) and + !try rendersMultiline(r, array_type.ast.elem_count) and + (if (array_type.ast.sentinel.unwrap()) |s| !try rendersMultiline(r, s) else true); const inner_space = if (one_line) Space.none else Space.newline; try ais.pushIndent(.normal); try renderToken(r, array_type.ast.lbracket, inner_space); // lbracket @@ -2524,6 +2209,10 @@ fn renderArrayInit( try renderSpace(&sub_r, after_expr, tokenSliceForRender(tree, after_expr).len, .none); buf.clearRetainingCapacity(); + // The following are needed to make sure isLineOverIndented is not influenced by + // the previous element. + sub_ais.indent_count = 0; + sub_ais.applied_indent = 0; } } diff --git a/lib/std/zig/parser_test.zig b/lib/std/zig/parser_test.zig index f44e9ae544..5f1e38239d 100644 --- a/lib/std/zig/parser_test.zig +++ b/lib/std/zig/parser_test.zig @@ -6930,6 +6930,35 @@ test "zig fmt: cast builtins are not reordered with comments" { ); } +test "zig fmt: inner over-indented if expressions becoming multiline" { + try testTransform( + \\const a = (b or + \\c) and [if (d) {}]T; // If the if-statement is kept on the same line it becomes multiline + \\const a = (b or + \\c)[if (d) {}]; // If the if-statement is kept on the same line it becomes multiline + \\const a = .{a, b, (c or + \\d), if (d) {}, e, f, g,}; + \\ + , + \\const a = (b or + \\ c) and [ + \\ if (d) {} + \\]T; // If the if-statement is kept on the same line it becomes multiline + \\const a = (b or + \\ c)[ + \\ if (d) {} + \\]; // If the if-statement is kept on the same line it becomes multiline + \\const a = .{ + \\ a, b, + \\ (c or + \\ d), + \\ if (d) {}, e, + \\ f, g, + \\}; + \\ + ); +} + test "recovery: top level" { try testError( \\test "" {inline} From 0b52cac43465bdd920a506e43b716a8754bb3426 Mon Sep 17 00:00:00 2001 From: Kendall Condon Date: Sun, 22 Mar 2026 17:52:33 -0400 Subject: [PATCH 11/11] zig fmt: add missed `maybe_space`s --- lib/std/zig/Ast/Render.zig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/std/zig/Ast/Render.zig b/lib/std/zig/Ast/Render.zig index f2efd5beb9..e60d2ab74c 100644 --- a/lib/std/zig/Ast/Render.zig +++ b/lib/std/zig/Ast/Render.zig @@ -1799,7 +1799,7 @@ fn renderFnProto(r: *Render, fn_proto: Ast.full.FnProto, space: Space) Error!voi break; }, .keyword_noalias, .keyword_comptime => { - try renderToken(r, last_param_token, .space); + try renderToken(r, last_param_token, .maybe_space); last_param_token += 1; }, .identifier => {}, @@ -1850,7 +1850,7 @@ fn renderFnProto(r: *Render, fn_proto: Ast.full.FnProto, space: Space) Error!voi break; }, .keyword_noalias, .keyword_comptime => { - try renderToken(r, last_param_token, .space); + try renderToken(r, last_param_token, .maybe_space); last_param_token += 1; }, .identifier => {}, @@ -1868,7 +1868,7 @@ fn renderFnProto(r: *Render, fn_proto: Ast.full.FnProto, space: Space) Error!voi { try renderIdentifier(r, last_param_token, .none, .preserve_when_shadowing); // name last_param_token += 1; - try renderToken(r, last_param_token, .space); // : + try renderToken(r, last_param_token, .maybe_space); // : last_param_token += 1; } if (tree.tokenTag(last_param_token) == .keyword_anytype) {