diff --git a/doc/langref.html.in b/doc/langref.html.in index 3642e9bc18..a084a57825 100644 --- a/doc/langref.html.in +++ b/doc/langref.html.in @@ -7944,58 +7944,60 @@ TestDecl <- KEYWORD_test (STRINGLITERALSINGLE / IDENTIFIER)? Block ComptimeDecl <- KEYWORD_comptime Block Decl - <- (KEYWORD_export / KEYWORD_extern STRINGLITERALSINGLE? / KEYWORD_inline / KEYWORD_noinline)? FnProto (SEMICOLON / Block) + <- (KEYWORD_export / KEYWORD_inline / KEYWORD_noinline)? FnProto (SEMICOLON / Block) + / KEYWORD_extern STRINGLITERALSINGLE? FnProto SEMICOLON / (KEYWORD_export / KEYWORD_extern STRINGLITERALSINGLE?)? KEYWORD_threadlocal? GlobalVarDecl -FnProto <- KEYWORD_fn IDENTIFIER? LPAREN ParamDeclList RPAREN ByteAlign? AddrSpace? LinkSection? CallConv? EXCLAMATIONMARK? TypeExpr +FnProto <- KEYWORD_fn IDENTIFIER? LPAREN ParamDeclList RPAREN ByteAlign? AddrSpace? LinkSection? CallConv? EXCLAMATIONMARK? TypeExpr !ExprSuffix VarDeclProto <- (KEYWORD_const / KEYWORD_var) IDENTIFIER (COLON TypeExpr)? ByteAlign? AddrSpace? LinkSection? GlobalVarDecl <- VarDeclProto (EQUAL Expr)? SEMICOLON -ContainerField <- doc_comment? KEYWORD_comptime? !KEYWORD_fn (IDENTIFIER COLON)? TypeExpr ByteAlign? (EQUAL Expr)? +ContainerField <- doc_comment? (KEYWORD_comptime / !KEYWORD_comptime) !KEYWORD_fn (IDENTIFIER COLON / !(IDENTIFIER COLON))? TypeExpr ByteAlign? (EQUAL Expr)? # *** Block Level *** -Statement - <- KEYWORD_comptime ComptimeStatement - / KEYWORD_nosuspend BlockExprStatement - / KEYWORD_suspend BlockExprStatement +BlockStatement + <- Statement / KEYWORD_defer BlockExprStatement / KEYWORD_errdefer Payload? BlockExprStatement - / IfStatement - / LabeledStatement - / VarDeclExprStatement + / !ExprStatement (KEYWORD_comptime !BlockExpr)? VarAssignStatement -ComptimeStatement - <- BlockExpr - / VarDeclExprStatement +Statement + <- ExprStatement + / KEYWORD_suspend BlockExprStatement + / !ExprStatement (KEYWORD_comptime !BlockExpr)? AssignExpr SEMICOLON + +ExprStatement + <- IfStatement + / LabeledStatement + / KEYWORD_nosuspend BlockExprStatement + / KEYWORD_comptime BlockExpr IfStatement <- IfPrefix BlockExpr ( KEYWORD_else Payload? Statement )? - / IfPrefix AssignExpr ( SEMICOLON / KEYWORD_else Payload? Statement ) + / IfPrefix !BlockExpr AssignExpr ( SEMICOLON / KEYWORD_else Payload? Statement ) LabeledStatement <- BlockLabel? (Block / LoopStatement / SwitchExpr) LoopStatement <- KEYWORD_inline? (ForStatement / WhileStatement) ForStatement - <- ForPrefix BlockExpr ( KEYWORD_else Statement )? - / ForPrefix AssignExpr ( SEMICOLON / KEYWORD_else Statement ) + <- ForPrefix BlockExpr ( KEYWORD_else Statement / !KEYWORD_else ) + / ForPrefix !BlockExpr AssignExpr ( SEMICOLON / KEYWORD_else Statement ) WhileStatement <- WhilePrefix BlockExpr ( KEYWORD_else Payload? Statement )? - / WhilePrefix AssignExpr ( SEMICOLON / KEYWORD_else Payload? Statement ) + / WhilePrefix !BlockExpr AssignExpr ( SEMICOLON / KEYWORD_else Payload? Statement ) BlockExprStatement <- BlockExpr - / AssignExpr SEMICOLON + / !BlockExpr AssignExpr SEMICOLON BlockExpr <- BlockLabel? Block -# An expression, assignment, or any destructure, as a statement. -VarDeclExprStatement - <- VarDeclProto (COMMA (VarDeclProto / Expr))* EQUAL Expr SEMICOLON - / Expr (AssignOp Expr / (COMMA (VarDeclProto / Expr))+ EQUAL Expr)? SEMICOLON +# An assignment or a destructure whose LHS are all lvalue expressions or variable declarations. +VarAssignStatement <- (VarDeclProto / Expr) (COMMA (VarDeclProto / Expr))* EQUAL Expr SEMICOLON # *** Expression Level *** @@ -8025,25 +8027,25 @@ PrefixExpr <- PrefixOp* PrimaryExpr PrimaryExpr <- AsmExpr / IfExpr - / KEYWORD_break BreakLabel? Expr? - / KEYWORD_comptime Expr - / KEYWORD_nosuspend Expr - / KEYWORD_continue BreakLabel? Expr? - / KEYWORD_resume Expr - / KEYWORD_return Expr? + / KEYWORD_break (BreakLabel / !BreakLabel) (Expr !ExprSuffix / !SinglePtrTypeStart) + / KEYWORD_comptime Expr !ExprSuffix + / KEYWORD_nosuspend Expr !ExprSuffix + / KEYWORD_continue (BreakLabel / !BreakLabel) (Expr !ExprSuffix / !SinglePtrTypeStart) + / KEYWORD_resume Expr !ExprSuffix + / KEYWORD_return (Expr !ExprSuffix / !SinglePtrTypeStart) / BlockLabel? LoopExpr / Block / CurlySuffixExpr -IfExpr <- IfPrefix Expr (KEYWORD_else Payload? Expr)? +IfExpr <- IfPrefix Expr (KEYWORD_else Payload? Expr)? !ExprSuffix -Block <- LBRACE Statement* RBRACE +Block <- LBRACE BlockStatement* RBRACE LoopExpr <- KEYWORD_inline? (ForExpr / WhileExpr) -ForExpr <- ForPrefix Expr (KEYWORD_else Expr)? +ForExpr <- ForPrefix Expr (KEYWORD_else Expr / !KEYWORD_else) !ExprSuffix -WhileExpr <- WhilePrefix Expr (KEYWORD_else Payload? Expr)? +WhileExpr <- WhilePrefix Expr (KEYWORD_else Payload? Expr)? !ExprSuffix CurlySuffixExpr <- TypeExpr InitList? @@ -8070,10 +8072,10 @@ PrimaryTypeExpr / FnProto / GroupedExpr / LabeledTypeExpr - / IDENTIFIER + / IDENTIFIER !(COLON LabelableExpr) / IfTypeExpr / INTEGER - / KEYWORD_comptime TypeExpr + / KEYWORD_comptime TypeExpr !ExprSuffix / KEYWORD_error DOT IDENTIFIER / KEYWORD_anyframe / KEYWORD_unreachable @@ -8085,7 +8087,7 @@ ErrorSetDecl <- KEYWORD_error LBRACE IdentifierList RBRACE GroupedExpr <- LPAREN Expr RPAREN -IfTypeExpr <- IfPrefix TypeExpr (KEYWORD_else Payload? TypeExpr)? +IfTypeExpr <- IfPrefix TypeExpr (KEYWORD_else Payload? TypeExpr)? !ExprSuffix LabeledTypeExpr <- BlockLabel Block @@ -8094,9 +8096,9 @@ LabeledTypeExpr LoopTypeExpr <- KEYWORD_inline? (ForTypeExpr / WhileTypeExpr) -ForTypeExpr <- ForPrefix TypeExpr (KEYWORD_else TypeExpr)? +ForTypeExpr <- ForPrefix TypeExpr (KEYWORD_else TypeExpr / !KEYWORD_else) !ExprSuffix -WhileTypeExpr <- WhilePrefix TypeExpr (KEYWORD_else Payload? TypeExpr)? +WhileTypeExpr <- WhilePrefix TypeExpr (KEYWORD_else Payload? TypeExpr)? !ExprSuffix SwitchExpr <- KEYWORD_switch LPAREN Expr RPAREN LBRACE SwitchProngList RBRACE @@ -8105,11 +8107,11 @@ AsmExpr <- KEYWORD_asm KEYWORD_volatile? LPAREN Expr AsmOutput? RPAREN AsmOutput <- COLON AsmOutputList AsmInput? -AsmOutputItem <- LBRACKET IDENTIFIER RBRACKET STRINGLITERAL LPAREN (MINUSRARROW TypeExpr / IDENTIFIER) RPAREN +AsmOutputItem <- LBRACKET IDENTIFIER RBRACKET STRINGLITERALSINGLE LPAREN (MINUSRARROW TypeExpr / IDENTIFIER) RPAREN AsmInput <- COLON AsmInputList AsmClobbers? -AsmInputItem <- LBRACKET IDENTIFIER RBRACKET STRINGLITERAL LPAREN Expr RPAREN +AsmInputItem <- LBRACKET IDENTIFIER RBRACKET STRINGLITERALSINGLE LPAREN Expr RPAREN AsmClobbers <- COLON Expr @@ -8129,9 +8131,7 @@ AddrSpace <- KEYWORD_addrspace LPAREN Expr RPAREN # Fn specific CallConv <- KEYWORD_callconv LPAREN Expr RPAREN -ParamDecl - <- doc_comment? (KEYWORD_noalias / KEYWORD_comptime)? (IDENTIFIER COLON)? ParamType - / DOT3 +ParamDecl <- doc_comment? (KEYWORD_noalias / KEYWORD_comptime / !KEYWORD_comptime) (IDENTIFIER COLON / !(IDENTIFIER_COLON)) ParamType ParamType <- KEYWORD_anytype @@ -8237,8 +8237,8 @@ PrefixOp PrefixTypeOp <- QUESTIONMARK / KEYWORD_anyframe MINUSRARROW - / SliceTypeStart (ByteAlign / AddrSpace / KEYWORD_const / KEYWORD_volatile / KEYWORD_allowzero)* - / PtrTypeStart (AddrSpace / KEYWORD_align LPAREN Expr (COLON Expr COLON Expr)? RPAREN / KEYWORD_const / KEYWORD_volatile / KEYWORD_allowzero)* + / (ManyPtrTypeStart / SliceTypeStart) KEYWORD_allowzero? ByteAlign? AddrSpace? KEYWORD_const? KEYWORD_volatile? + / SinglePtrTypeStart KEYWORD_allowzero? BitAlign? AddrSpace? KEYWORD_const? KEYWORD_volatile? / ArrayTypeStart SuffixOp @@ -8249,15 +8249,31 @@ SuffixOp FnCallArguments <- LPAREN ExprList RPAREN +ExprSuffix + <- KEYWORD_or + / KEYWORD_and + / CompareOp + / BitwiseOp + / BitShiftOp + / AdditionOp + / MultiplyOp + / EXCLAMATIONMARK + / SuffixOp + / FnCallArguments + +LabelableExpr + <- Block + / SwitchExpr + / LoopExpr + # Ptr specific SliceTypeStart <- LBRACKET (COLON Expr)? RBRACKET -PtrTypeStart - <- ASTERISK - / ASTERISK2 - / LBRACKET ASTERISK (LETTERC / COLON Expr)? RBRACKET +SinglePtrTypeStart <- ASTERISK / ASTERISK2 -ArrayTypeStart <- LBRACKET Expr (COLON Expr)? RBRACKET +ManyPtrTypeStart <- LBRACKET ASTERISK (LETTERC / COLON Expr)? RBRACKET + +ArrayTypeStart <- LBRACKET Expr !(ASTERISK / ASTERISK2) (COLON Expr)? RBRACKET # ContainerDecl specific ContainerDeclAuto <- ContainerDeclType LBRACE ContainerMembers RBRACE @@ -8266,11 +8282,13 @@ ContainerDeclType <- KEYWORD_struct (LPAREN Expr RPAREN)? / KEYWORD_opaque / KEYWORD_enum (LPAREN Expr RPAREN)? - / KEYWORD_union (LPAREN (KEYWORD_enum (LPAREN Expr RPAREN)? / Expr) RPAREN)? + / KEYWORD_union (LPAREN (KEYWORD_enum (LPAREN Expr RPAREN)? / !KEYWORD_enum Expr) RPAREN)? # Alignment ByteAlign <- KEYWORD_align LPAREN Expr RPAREN +BitAlign <- KEYWORD_align LPAREN Expr (COLON Expr COLON Expr)? RPAREN + # Lists IdentifierList <- (doc_comment? IDENTIFIER COMMA)* (doc_comment? IDENTIFIER)? @@ -8280,7 +8298,7 @@ AsmOutputList <- (AsmOutputItem COMMA)* AsmOutputItem? AsmInputList <- (AsmInputItem COMMA)* AsmInputItem? -ParamDeclList <- (ParamDecl COMMA)* ParamDecl? +ParamDeclList <- (ParamDecl COMMA)* (ParamDecl / DOT3 COMMA?)? ExprList <- (Expr COMMA)* Expr? @@ -8337,6 +8355,7 @@ multibyte_utf8 <- / oxC2_oxDF ox80_oxBF non_control_ascii <- [\040-\176] +non_control_utf8 <- [\040-\377] char_escape <- "\\x" hex hex @@ -8352,10 +8371,10 @@ string_char / char_escape / ![\\"\n] non_control_ascii -container_doc_comment <- ('//!' [^\n]* [ \n]* skip)+ -doc_comment <- ('///' [^\n]* [ \n]* skip)+ -line_comment <- '//' ![!/][^\n]* / '////' [^\n]* -line_string <- ('\\\\' [^\n]* [ \n]*)+ +container_doc_comment <- ('//!' non_control_utf8* [ \n]* skip)+ +doc_comment <- ('///' non_control_utf8* [ \n]* skip)+ +line_comment <- '//' ![!/] non_control_utf8* / '////' non_control_utf8* +line_string <- '\\\\' non_control_utf8* [ \n]* skip <- ([ \n] / line_comment)* CHAR_LITERAL <- ['] char_char ['] skip diff --git a/lib/std/Build/abi.zig b/lib/std/Build/abi.zig index 28f455d73f..ed560bffa7 100644 --- a/lib/std/Build/abi.zig +++ b/lib/std/Build/abi.zig @@ -235,7 +235,8 @@ pub const fuzz = struct { max: u64, weight: u64, - fn intFromValue(x: anytype) u64 { + /// `inline` to propogate comptimeness + inline fn intFromValue(x: anytype) u64 { const T = @TypeOf(x); return switch (@typeInfo(T)) { .comptime_int => x, @@ -269,11 +270,13 @@ pub const fuzz = struct { }; } - pub fn value(T: type, x: T, weight: u64) Weight { + /// `inline` to propogate comptimeness + pub inline fn value(T: type, x: T, weight: u64) Weight { return .{ .min = intFromValue(x), .max = intFromValue(x), .weight = weight }; } - pub fn rangeAtMost(T: type, at_least: T, at_most: T, weight: u64) Weight { + /// `inline` to propogate comptimeness + pub inline fn rangeAtMost(T: type, at_least: T, at_most: T, weight: u64) Weight { std.debug.assert(intFromValue(at_least) <= intFromValue(at_most)); return .{ .min = intFromValue(at_least), @@ -282,7 +285,8 @@ pub const fuzz = struct { }; } - pub fn rangeLessThan(T: type, at_least: T, less_than: T, weight: u64) Weight { + /// `inline` to propogate comptimeness + pub inline fn rangeLessThan(T: type, at_least: T, less_than: T, weight: u64) Weight { std.debug.assert(intFromValue(at_least) < intFromValue(less_than)); return .{ .min = intFromValue(at_least), diff --git a/lib/std/testing/Smith.zig b/lib/std/testing/Smith.zig index 9b1574282b..e36c97c50e 100644 --- a/lib/std/testing/Smith.zig +++ b/lib/std/testing/Smith.zig @@ -205,6 +205,24 @@ pub noinline fn valueRangeLessThan(s: *Smith, T: type, at_least: T, less_than: T return s.valueRangeLessThanWithHash(T, at_least, less_than, firstHash()); } +/// It is asserted `len` is nonzero. +/// It is asserted `len` fits within 64 bits. +// +// `noinline` to capture a unique return address +pub noinline fn index(s: *Smith, len: usize) usize { + @disableInstrumentation(); + return s.indexWithHash(len, firstHash()); +} + +/// It is asserted that the weight of `false` is non-zero. +/// It is asserted that the weight of `true` is non-zero. +// +// `noinline` to capture a unique return address +pub noinline fn boolWeighted(s: *Smith, false_weight: u64, true_weight: u64) bool { + @disableInstrumentation(); + return s.boolWeightedWithHash(false_weight, true_weight, firstHash()); +} + /// This is similar to `value(bool)` however it is gauraunteed to eventually /// return `true` and provides the fuzzer with an extra hint about the data. // @@ -228,6 +246,7 @@ pub noinline fn eosWeighted(s: *Smith, weights: []const Weight) bool { /// This is similar to `value(bool)` however it is gauraunteed to eventually /// return `true` and provides the fuzzer with an extra hint about the data. /// +/// It is asserted that the weight of `false` is non-zero. /// It is asserted that the weight of `true` is non-zero. // // `noinline` to capture a unique return address @@ -463,6 +482,24 @@ pub fn valueRangeLessThanWithHash(s: *Smith, T: type, at_least: T, less_than: T, return s.valueWeightedWithHash(T, &.{.rangeLessThan(T, at_least, less_than, 1)}, hash); } +/// It is asserted `len` is nonzero. +/// It is asserted `len` fits within 64 bits. +pub fn indexWithHash(s: *Smith, len: usize, hash: u32) usize { + @disableInstrumentation(); + assert(len != 0); + return @intCast(s.valueWeightedWithHash(u64, &.{.rangeLessThan(u64, 0, @intCast(len), 1)}, hash)); +} + +/// It is asserted that the weight of `false` is non-zero. +/// It is asserted that the weight of `true` is non-zero. +pub fn boolWeightedWithHash(s: *Smith, false_weight: u64, true_weight: u64, hash: u32) bool { + @disableInstrumentation(); + return s.valueWeightedWithHash(bool, &.{ + .value(bool, false, false_weight), + .value(bool, true, true_weight), + }, hash); +} + /// This is similar to `value(bool)` however it is gauraunteed to eventually /// return `true` and provides the fuzzer with an extra hint about the data. pub fn eosWithHash(s: *Smith, hash: u32) bool { @@ -504,8 +541,6 @@ pub fn eosWeightedWithHash(s: *Smith, weights: []const Weight, hash: u32) bool { /// /// It is asserted that the weight of `false` is non-zero. /// It is asserted that the weight of `true` is non-zero. -// -// `noinline` to capture a unique return address pub fn eosWeightedSimpleWithHash(s: *Smith, false_weight: u64, true_weight: u64, hash: u32) bool { @disableInstrumentation(); return s.eosWeightedWithHash(&.{ diff --git a/lib/std/zig.zig b/lib/std/zig.zig index 919bd9485b..1650bbff26 100644 --- a/lib/std/zig.zig +++ b/lib/std/zig.zig @@ -23,6 +23,7 @@ pub const primitives = @import("zig/primitives.zig"); pub const isPrimitive = primitives.isPrimitive; pub const Ast = @import("zig/Ast.zig"); pub const AstGen = @import("zig/AstGen.zig"); +pub const AstSmith = @import("zig/AstSmith.zig"); pub const Zir = @import("zig/Zir.zig"); pub const Zoir = @import("zig/Zoir.zig"); pub const ZonGen = @import("zig/ZonGen.zig"); @@ -1166,6 +1167,7 @@ pub const ClangCliParam = struct { test { _ = Ast; _ = AstRlAnnotate; + _ = AstSmith; _ = BuiltinFn; _ = Client; _ = ErrorBundle; diff --git a/lib/std/zig/Ast.zig b/lib/std/zig/Ast.zig index 7c73c3d4d7..3e253543f1 100644 --- a/lib/std/zig/Ast.zig +++ b/lib/std/zig/Ast.zig @@ -504,9 +504,6 @@ pub fn renderError(tree: Ast, parse_error: Error, w: *Writer) Writer.Error!void .varargs_nonfinal => { return w.writeAll("function prototype has parameter after varargs"); }, - .expected_continue_expr => { - return w.writeAll("expected ':' before while continue expression"); - }, .expected_semi_after_decl => { return w.writeAll("expected ';' after declaration"); @@ -2888,7 +2885,6 @@ pub const Error = struct { test_doc_comment, comptime_doc_comment, varargs_nonfinal, - expected_continue_expr, expected_semi_after_decl, expected_semi_after_stmt, expected_comma_after_field, diff --git a/lib/std/zig/Ast/Render.zig b/lib/std/zig/Ast/Render.zig index d61707b36d..e60d2ab74c 100644 --- a/lib/std/zig/Ast/Render.zig +++ b/lib/std/zig/Ast/Render.zig @@ -338,9 +338,16 @@ fn renderExpression(r: *Render, node: Ast.Node.Index, space: Space) Error!void { try ais.maybeInsertNewline(); const first_tok, const last_tok = tree.nodeData(node).token_and_token; - for (first_tok..last_tok + 1) |i| { + for (first_tok..last_tok) |i| { try renderToken(r, @intCast(i), .newline); } + if (space != .skip) { + try renderToken(r, last_tok, .newline); + } else { + try renderToken(r, last_tok, .skip); + try ais.insertNewline(); // A newline is part of the token, so it still needs + // rendered here. + } const next_token = last_tok + 1; const next_token_tag = tree.tokenTag(next_token); @@ -645,10 +652,12 @@ fn renderExpression(r: *Render, node: Ast.Node.Index, space: Space) Error!void { const lhs, const rhs = tree.nodeData(node).node_and_node; const lbracket = tree.firstToken(rhs) - 1; const rbracket = tree.lastToken(rhs) + 1; - const one_line = tree.tokensOnSameLine(lbracket, rbracket) and - !becomesMultilineExpr(tree, rhs); - const inner_space = if (one_line) Space.none else Space.newline; try renderExpression(r, lhs, .none); + // One lien check must come after rendering lhs since it can influence + // isLineOverIndented + const one_line = tree.tokensOnSameLine(lbracket, rbracket) and + !try rendersMultiline(r, rhs); + const inner_space = if (one_line) Space.none else Space.newline; try ais.pushIndent(.normal); try renderToken(r, lbracket, inner_space); // [ try renderExpression(r, rhs, inner_space); @@ -793,34 +802,47 @@ fn renderExpression(r: *Render, node: Ast.Node.Index, space: Space) Error!void { canonicalize: { if (params.len != 1) break :canonicalize; - const CastKind = enum { + const CastKind = enum(u8) { ptrCast, alignCast, addrSpaceCast, constCast, volatileCast, }; - const kind = meta.stringToEnum(CastKind, tree.tokenSlice(builtin_token)[1..]) orelse break :canonicalize; + const kind = meta.stringToEnum( + CastKind, + tree.tokenSlice(builtin_token)[1..], + ) orelse break :canonicalize; var cast_map = std.EnumMap(CastKind, Ast.TokenIndex).init(.{}); cast_map.put(kind, builtin_token); var casts_before: usize = 0; - if (builtin_token >= 2) { - var prev_builtin_token = builtin_token - 2; - while (tree.tokenTag(prev_builtin_token) == .builtin) : (prev_builtin_token -= 2) { - const prev_kind = meta.stringToEnum(CastKind, tree.tokenSlice(prev_builtin_token)[1..]) orelse break; - if (cast_map.contains(prev_kind)) break :canonicalize; - cast_map.put(prev_kind, prev_builtin_token); - casts_before += 1; - } + var prev_builtin_token = builtin_token; + while (prev_builtin_token >= 2) { + prev_builtin_token -= 2; + if (tree.tokenTag(prev_builtin_token) != .builtin) break; + const builtin_name = tree.tokenSlice(prev_builtin_token)[1..]; + const prev_kind = meta.stringToEnum(CastKind, builtin_name) orelse break; + if (cast_map.contains(prev_kind)) break :canonicalize; + // This must be checked after so that cast builtins as arguments to other + // builtins containing comments are reordered. + if (hasComment(tree, prev_builtin_token, prev_builtin_token + 2)) + break :canonicalize; + cast_map.put(prev_kind, prev_builtin_token); + casts_before += 1; } var next_builtin_token = builtin_token + 2; - while (tree.tokenTag(next_builtin_token) == .builtin) : (next_builtin_token += 2) { - const next_kind = meta.stringToEnum(CastKind, tree.tokenSlice(next_builtin_token)[1..]) orelse break; + while (true) { + if (hasComment(tree, next_builtin_token - 2, next_builtin_token)) + break :canonicalize; + if (tree.tokenTag(next_builtin_token) != .builtin) break; + const builtin_name = tree.tokenSlice(next_builtin_token)[1..]; + const next_kind = meta.stringToEnum(CastKind, builtin_name) orelse break; if (cast_map.contains(next_kind)) break :canonicalize; cast_map.put(next_kind, next_builtin_token); + next_builtin_token += 2; } var it = cast_map.iterator(); @@ -931,380 +953,61 @@ fn renderExpressionFixup(r: *Render, node: Ast.Node.Index, space: Space) Error!v } } -/// Same as becomesMultilineExpr, but returns false when `node == .none` -fn optBecomesMultilineExpr(tree: Ast, node: Ast.Node.OptionalIndex) bool { - return if (node.unwrap()) |payload| becomesMultilineExpr(tree, payload) else false; -} - -/// May return false if `node` is already multiline -fn becomesMultilineExpr(tree: Ast, node: Ast.Node.Index) bool { - // Conditions related to comments, doc comments, and multiline string literals are ignored - // since they always go to the end of the line, which already make them a multi-line - // expression (since they contain a newline). - switch (tree.nodeTag(node)) { - .identifier, - .number_literal, - .char_literal, - .unreachable_literal, - .anyframe_literal, - .string_literal, - .multiline_string_literal, - .error_value, - .enum_literal, - => return false, - .container_decl_trailing, - .container_decl_arg_trailing, - .container_decl_two_trailing, - .tagged_union_trailing, - .tagged_union_enum_tag_trailing, - .tagged_union_two_trailing, - .switch_comma, - .builtin_call_two_comma, - .builtin_call_comma, - .call_one_comma, - .call_comma, - .struct_init_one_comma, - .struct_init_dot_two_comma, - .struct_init_dot_comma, - .struct_init_comma, - .array_init_one_comma, - .array_init_dot_two_comma, - .array_init_dot_comma, - .array_init_comma, - // The following always have a non-zero amount of members - // which is also the condition for them to be multi-line. - .block, - .block_semicolon, - => return true, - .block_two, - .block_two_semicolon, - => return tree.nodeData(node).opt_node_and_opt_node[0] != .none, - .container_decl, - .container_decl_arg, - .container_decl_two, - .tagged_union, - .tagged_union_enum_tag, - .tagged_union_two, - => { - var buf: [2]Ast.Node.Index = undefined; - const full = tree.fullContainerDecl(&buf, node).?; - if (full.ast.arg.unwrap()) |arg| { - if (becomesMultilineExpr(tree, arg)) - return true; - } - // This does the same checks as `isOneLineContainerDecl`, however it avoids unnecessary - // checks related to comments and multiline strings, which would mean the container is - // already multiple lines. - for (full.ast.members) |member| { - if (tree.fullContainerField(member)) |field_full| { - for ([_]Ast.Node.OptionalIndex{ - field_full.ast.type_expr, - field_full.ast.align_expr, - field_full.ast.value_expr, - }) |opt_expr| { - if (opt_expr.unwrap()) |expr| { - if (becomesMultilineExpr(tree, expr)) - return true; - } - } - } else return true; - } - return false; - }, - .error_set_decl => { - const lbrace, const rbrace = tree.nodeData(node).token_and_token; - return !isOneLineErrorSetDecl(tree, lbrace, rbrace); - }, - .@"switch" => { - const op, const extra_index = tree.nodeData(node).node_and_extra; - const case_range = tree.extraData(extra_index, Ast.Node.SubRange); - return @intFromEnum(case_range.end) - @intFromEnum(case_range.start) != 0 or - becomesMultilineExpr(tree, op); - }, - .for_simple, .@"for" => { - const full = tree.fullFor(node).?; - if (becomesMultilineExpr(tree, full.ast.then_expr) or - optBecomesMultilineExpr(tree, full.ast.else_expr)) - return true; - - for (full.ast.inputs) |expr| { - if (if (tree.nodeTag(expr) == .for_range) blk: { - const lhs, const rhs = tree.nodeData(expr).node_and_opt_node; - break :blk becomesMultilineExpr(tree, lhs) or optBecomesMultilineExpr(tree, rhs); - } else becomesMultilineExpr(tree, expr)) - return true; - } - const final_input_expr = full.ast.inputs[full.ast.inputs.len - 1]; - if (tree.tokenTag(tree.lastToken(final_input_expr) + 1) == .comma) - return true; - - const token_tags = tree.tokens.items(.tag); - const payload = full.payload_token; - const pipe = std.mem.indexOfScalarPos(Token.Tag, token_tags, payload, .pipe).?; - return token_tags[@intCast(pipe - 1)] == .comma; - }, - .while_simple, - .while_cont, - .@"while", - => { - const full = tree.fullWhile(node).?; - return becomesMultilineExpr(tree, full.ast.cond_expr) or - becomesMultilineExpr(tree, full.ast.then_expr) or - optBecomesMultilineExpr(tree, full.ast.cont_expr) or - optBecomesMultilineExpr(tree, full.ast.else_expr); - }, - .if_simple, - .@"if", - => { - const full = tree.fullIf(node).?; - return becomesMultilineExpr(tree, full.ast.cond_expr) or - becomesMultilineExpr(tree, full.ast.then_expr) or - optBecomesMultilineExpr(tree, full.ast.else_expr); - }, - .fn_proto_simple, - .fn_proto_multi, - .fn_proto_one, - .fn_proto, - => { - var buf: [1]Ast.Node.Index = undefined; - const fn_proto = tree.fullFnProto(&buf, node).?; - - for ([_]Ast.Node.OptionalIndex{ - fn_proto.ast.return_type, - fn_proto.ast.align_expr, - fn_proto.ast.addrspace_expr, - fn_proto.ast.section_expr, - fn_proto.ast.callconv_expr, - }) |opt_expr| { - if (opt_expr.unwrap()) |expr| { - if (becomesMultilineExpr(tree, expr)) - return true; - } - } - for (fn_proto.ast.params) |expr| { - if (becomesMultilineExpr(tree, expr)) - return true; - } - - const lparen = fn_proto.ast.fn_token + 1; - const return_type = fn_proto.ast.return_type.unwrap().?; - const maybe_bang = tree.firstToken(return_type) - 1; - const rparen = fnProtoRparen(tree, fn_proto, maybe_bang); - return !isOneLineFnProto(tree, fn_proto, lparen, rparen); - }, - .asm_simple, - => { - const lhs = tree.nodeData(node).node_and_token[0]; - return becomesMultilineExpr(tree, lhs); - }, - .@"asm", - => { - const lhs, const extra_index = tree.nodeData(node).node_and_extra; - const asm_extra = tree.extraData(extra_index, Ast.Node.Asm); - return @intFromEnum(asm_extra.items_end) - @intFromEnum(asm_extra.items_start) != 0 or - becomesMultilineExpr(tree, lhs) or optBecomesMultilineExpr(tree, asm_extra.clobbers); - }, - .array_type, .array_type_sentinel => { - const array_type = tree.fullArrayType(node).?; - const rbracket = tree.firstToken(array_type.ast.elem_type) - 1; - return !isOneLineArrayType(tree, array_type, rbracket) or - becomesMultilineExpr(tree, array_type.ast.elem_type); - }, - .array_access => { - const lhs, const rhs = tree.nodeData(node).node_and_node; - const lbracket = tree.firstToken(rhs) - 1; - const rbracket = tree.lastToken(rhs) + 1; - return !tree.tokensOnSameLine(lbracket, rbracket) or - becomesMultilineExpr(tree, lhs) or - becomesMultilineExpr(tree, rhs); - }, - .call_one, - .call, - .builtin_call_two, - .builtin_call, - .array_init_one, - .array_init_dot_two, - .array_init_dot, - .array_init, - .struct_init_one, - .struct_init_dot_two, - .struct_init_dot, - .struct_init, - => |tag| { - var buf: [2]Ast.Node.Index = undefined; - const opt_lhs: Ast.Node.OptionalIndex, const items = switch (tag) { - .call_one, .call => blk: { - const full = tree.fullCall(buf[0..1], node).?; - break :blk .{ full.ast.fn_expr.toOptional(), full.ast.params }; - }, - .builtin_call_two, .builtin_call => .{ .none, tree.builtinCallParams(&buf, node).? }, - .array_init_one, - .array_init_dot_two, - .array_init_dot, - .array_init, - => blk: { - const full = tree.fullArrayInit(&buf, node).?; - break :blk .{ full.ast.type_expr, full.ast.elements }; - }, - .struct_init_one, - .struct_init_dot_two, - .struct_init_dot, - .struct_init, - => blk: { - const full = tree.fullStructInit(&buf, node).?; - break :blk .{ full.ast.type_expr, full.ast.fields }; - }, - else => unreachable, - }; - if (opt_lhs.unwrap()) |lhs| { - if (becomesMultilineExpr(tree, lhs)) - return true; - } - for (items) |expr| { - if (becomesMultilineExpr(tree, expr)) - return true; - } - return false; - }, - .assign_destructure => { - const full = tree.assignDestructure(node); - for (full.ast.variables) |expr| { - if (becomesMultilineExpr(tree, expr)) - return true; - } - return becomesMultilineExpr(tree, full.ast.value_expr); - }, - .ptr_type_aligned, - .ptr_type_sentinel, - .ptr_type, - .ptr_type_bit_range, - => { - const full = tree.fullPtrType(node).?; - return becomesMultilineExpr(tree, full.ast.child_type) or - optBecomesMultilineExpr(tree, full.ast.sentinel) or - optBecomesMultilineExpr(tree, full.ast.align_node) or - optBecomesMultilineExpr(tree, full.ast.addrspace_node) or - optBecomesMultilineExpr(tree, full.ast.bit_range_start) or - optBecomesMultilineExpr(tree, full.ast.bit_range_end); - }, - .slice_open, - .slice, - .slice_sentinel, - => { - const full = tree.fullSlice(node).?; - return becomesMultilineExpr(tree, full.ast.sliced) or - becomesMultilineExpr(tree, full.ast.start) or - optBecomesMultilineExpr(tree, full.ast.end) or - optBecomesMultilineExpr(tree, full.ast.sentinel); - }, - .@"comptime", - .@"nosuspend", - .@"suspend", - .@"resume", - .bit_not, - .bool_not, - .negation, - .negation_wrap, - .optional_type, - .address_of, - .deref, - .@"try", - => return becomesMultilineExpr(tree, tree.nodeData(node).node), - .@"return" => return optBecomesMultilineExpr(tree, tree.nodeData(node).opt_node), - .field_access, - .unwrap_optional, - .grouped_expression, - => return becomesMultilineExpr(tree, tree.nodeData(node).node_and_token[0]), - .add, - .add_wrap, - .add_sat, - .array_cat, - .array_mult, - .bang_equal, - .bit_and, - .bit_or, - .shl, - .shl_sat, - .shr, - .bit_xor, - .bool_and, - .bool_or, - .div, - .equal_equal, - .greater_or_equal, - .greater_than, - .less_or_equal, - .less_than, - .merge_error_sets, - .mod, - .mul, - .mul_wrap, - .mul_sat, - .sub, - .sub_wrap, - .sub_sat, - .@"orelse", - .@"catch", - .error_union, - .assign, - .assign_bit_and, - .assign_bit_or, - .assign_shl, - .assign_shl_sat, - .assign_shr, - .assign_bit_xor, - .assign_div, - .assign_sub, - .assign_sub_wrap, - .assign_sub_sat, - .assign_mod, - .assign_add, - .assign_add_wrap, - .assign_add_sat, - .assign_mul, - .assign_mul_wrap, - .assign_mul_sat, - => { - const lhs, const rhs = tree.nodeData(node).node_and_node; - return becomesMultilineExpr(tree, lhs) or becomesMultilineExpr(tree, rhs); - }, - .@"break", .@"continue" => { - const opt_expr = tree.nodeData(node).opt_token_and_opt_node[1]; - return optBecomesMultilineExpr(tree, opt_expr); - }, - .anyframe_type => return becomesMultilineExpr(tree, tree.nodeData(node).token_and_node[1]), - .@"errdefer", - .@"defer", - .for_range, - .switch_range, - .switch_case_one, - .switch_case_inline_one, - .switch_case, - .switch_case_inline, - .asm_output, - .asm_input, - .fn_decl, - .container_field, - .container_field_init, - .container_field_align, - .root, - .global_var_decl, - .local_var_decl, - .simple_var_decl, - .aligned_var_decl, - .test_decl, - => unreachable, +fn drainNoNewline(w: *Writer, data: []const []const u8, splat: usize) Writer.Error!usize { + if (std.mem.indexOfScalar(u8, w.buffered(), '\n') != null) { + return error.WriteFailed; } + + var n: usize = 0; + for (data[0 .. data.len - 1]) |v| { + if (std.mem.indexOfScalar(u8, v, '\n') != null) { + return error.WriteFailed; + } + n += v.len; + } + + const pattern = data[data.len - 1]; + if (splat != 0 and std.mem.indexOfScalar(u8, pattern, '\n') != null) { + return error.WriteFailed; + } + n += pattern.len * splat; + + w.end = 0; + return n; } -fn isOneLineArrayType( - tree: Ast, - array_type: Ast.full.ArrayType, - rbracket: Ast.TokenIndex, -) bool { - return tree.tokensOnSameLine(array_type.ast.lbracket, rbracket) and - !becomesMultilineExpr(tree, array_type.ast.elem_count) and - !optBecomesMultilineExpr(tree, array_type.ast.sentinel); +fn rendersMultiline(r: *const Render, node: Ast.Node.Index) error{OutOfMemory}!bool { + var no_nl_buf: [64]u8 = undefined; + var no_nl_w: Writer = .{ + .vtable = &.{ .drain = drainNoNewline }, + .buffer = &no_nl_buf, + }; + + if (r.ais.disabled_offset != null) return true; + var sub_ais: AutoIndentingStream = .init(r.gpa, &no_nl_w, r.ais.indent_delta); + defer sub_ais.deinit(); + // The following are needed to make sure isLineOverIndented is correct + sub_ais.indent_count = r.ais.indent_count; + sub_ais.applied_indent = r.ais.applied_indent; + sub_ais.current_line_empty = r.ais.current_line_empty; + + var sub_r: Render = .{ + .gpa = r.gpa, + .ais = &sub_ais, + .tree = r.tree, + .fixups = r.fixups, + }; + + renderExpression(&sub_r, node, .none) catch |e| return switch (e) { + error.OutOfMemory => return error.OutOfMemory, + error.WriteFailed => return true, + }; + if (sub_ais.disabled_offset != null) return true; + if (std.mem.indexOfScalar(u8, no_nl_w.buffered(), '\n') != null) { + return true; + } + + return false; } fn renderArrayType( @@ -1315,7 +1018,9 @@ fn renderArrayType( const tree = r.tree; const ais = r.ais; const rbracket = tree.firstToken(array_type.ast.elem_type) - 1; - const one_line = isOneLineArrayType(tree, array_type, rbracket); + const one_line = tree.tokensOnSameLine(array_type.ast.lbracket, rbracket) and + !try rendersMultiline(r, array_type.ast.elem_count) and + (if (array_type.ast.sentinel.unwrap()) |s| !try rendersMultiline(r, s) else true); const inner_space = if (one_line) Space.none else Space.newline; try ais.pushIndent(.normal); try renderToken(r, array_type.ast.lbracket, inner_space); // lbracket @@ -2094,7 +1799,7 @@ fn renderFnProto(r: *Render, fn_proto: Ast.full.FnProto, space: Space) Error!voi break; }, .keyword_noalias, .keyword_comptime => { - try renderToken(r, last_param_token, .space); + try renderToken(r, last_param_token, .maybe_space); last_param_token += 1; }, .identifier => {}, @@ -2145,7 +1850,7 @@ fn renderFnProto(r: *Render, fn_proto: Ast.full.FnProto, space: Space) Error!voi break; }, .keyword_noalias, .keyword_comptime => { - try renderToken(r, last_param_token, .space); + try renderToken(r, last_param_token, .maybe_space); last_param_token += 1; }, .identifier => {}, @@ -2163,7 +1868,7 @@ fn renderFnProto(r: *Render, fn_proto: Ast.full.FnProto, space: Space) Error!voi { try renderIdentifier(r, last_param_token, .none, .preserve_when_shadowing); // name last_param_token += 1; - try renderToken(r, last_param_token, .space); // : + try renderToken(r, last_param_token, .maybe_space); // : last_param_token += 1; } if (tree.tokenTag(last_param_token) == .keyword_anytype) { @@ -2504,6 +2209,10 @@ fn renderArrayInit( try renderSpace(&sub_r, after_expr, tokenSliceForRender(tree, after_expr).len, .none); buf.clearRetainingCapacity(); + // The following are needed to make sure isLineOverIndented is not influenced by + // the previous element. + sub_ais.indent_count = 0; + sub_ais.applied_indent = 0; } } @@ -2749,8 +2458,47 @@ fn renderAsm( try renderToken(r, asm_node.ast.asm_token + 1, .none); // lparen } + const render_colons: [3]?Ast.TokenIndex = colons: { + var colons: [3]Ast.TokenIndex = undefined; + var render: u2 = 0; + + const rparen = asm_node.ast.rparen; + filled: { + colons[0] = tree.lastToken(asm_node.ast.template) + 1; + if (colons[0] == rparen) break :filled; + + if (asm_node.outputs.len != 0) { + colons[1] = tree.lastToken(asm_node.outputs[asm_node.outputs.len - 1]) + 1; + colons[1] += @intFromBool(tree.tokenTag(colons[1]) == .comma); + render = 1; + } else { + colons[1] = colons[0] + 1; + if (hasComment(tree, colons[0], colons[1])) render = 1; + } + if (colons[1] == rparen) break :filled; + + // Next colon is not checked for here since it cannot present without clobbers + if (asm_node.inputs.len != 0) { + render = 2; + } else { + const colon_or_rparen = colons[1] + 1; + if (hasComment(tree, colons[1], colon_or_rparen)) render = 2; + } + + if (asm_node.ast.clobbers.unwrap()) |clobbers| { + colons[2] = tree.firstToken(clobbers) - 1; + render = 3; + } + } + + var opt_colons: [3]?Ast.TokenIndex = @splat(null); + for (0..render) |i| opt_colons[i] = colons[i]; + break :colons opt_colons; + }; + + try ais.forcePushIndent(.normal); + if (asm_node.ast.items.len == 0) { - try ais.forcePushIndent(.normal); if (asm_node.ast.clobbers.unwrap()) |clobbers| { // asm ("foo" ::: clobbers) try renderExpression(r, asm_node.ast.template, .space); @@ -2764,99 +2512,78 @@ fn renderAsm( return renderToken(r, asm_node.ast.rparen, space); // rparen } - // asm ("foo") - try renderExpression(r, asm_node.ast.template, .none); - ais.popIndent(); - return renderToken(r, asm_node.ast.rparen, space); // rparen + if (render_colons[0] == null) { + // asm ("foo") + try renderExpression(r, asm_node.ast.template, .none); + ais.popIndent(); + return renderToken(r, asm_node.ast.rparen, space); // rparen + } } - try ais.forcePushIndent(.normal); try renderExpression(r, asm_node.ast.template, .newline); ais.forceLastIndent(); // Might have been dedented by a multiline string literal assert(ais.current_line_empty); + const prev_indent_delta = ais.indent_delta; // May be part of another asm expression + // so indent_delta can't be unconditionally used ais.setIndentDelta(asm_indent_delta); - const colon1 = tree.lastToken(asm_node.ast.template) + 1; - const colon2 = if (asm_node.outputs.len == 0) colon2: { - try renderToken(r, colon1, .newline); // : - break :colon2 colon1 + 1; - } else colon2: { - try renderToken(r, colon1, .space); // : + rendered: { + if (render_colons[0]) |colon1| { + if (asm_node.outputs.len != 0) { + try renderToken(r, colon1, .space); + try ais.forcePushIndent(.normal); - try ais.forcePushIndent(.normal); - for (asm_node.outputs, 0..) |asm_output, i| { - if (i + 1 < asm_node.outputs.len) { - const next_asm_output = asm_node.outputs[i + 1]; - try renderAsmOutput(r, asm_output, .none); + const final = asm_node.outputs.len - 1; + for (asm_node.outputs[0..final], 0..) |asm_output, i| { + try renderAsmOutput(r, asm_output, .none); + + const next_start = tree.firstToken(asm_node.outputs[i + 1]); + try renderToken(r, next_start - 1, .newline); // , + try renderExtraNewlineToken(r, next_start); + } - const comma = tree.firstToken(next_asm_output) - 1; - try renderToken(r, comma, .newline); // , - try renderExtraNewlineToken(r, tree.firstToken(next_asm_output)); - } else if (asm_node.inputs.len == 0 and asm_node.ast.clobbers == .none) { try ais.pushSpace(.comma); - try renderAsmOutput(r, asm_output, .comma); + try renderAsmOutput(r, asm_node.outputs[final], .comma); ais.popSpace(); ais.popIndent(); - ais.setIndentDelta(indent_delta); - ais.popIndent(); - return renderToken(r, asm_node.ast.rparen, space); // rparen } else { - try ais.pushSpace(.comma); - try renderAsmOutput(r, asm_output, .comma); - ais.popSpace(); - const comma_or_colon = tree.lastToken(asm_output) + 1; - ais.popIndent(); - break :colon2 switch (tree.tokenTag(comma_or_colon)) { - .comma => comma_or_colon + 1, - else => comma_or_colon, - }; + try renderToken(r, colon1, .newline); } } else unreachable; - }; - const colon3 = if (asm_node.inputs.len == 0) colon3: { - try renderToken(r, colon2, .newline); // : - break :colon3 colon2 + 1; - } else colon3: { - try renderToken(r, colon2, .space); // : - try ais.forcePushIndent(.normal); - for (asm_node.inputs, 0..) |asm_input, i| { - if (i + 1 < asm_node.inputs.len) { - const next_asm_input = asm_node.inputs[i + 1]; - try renderAsmInput(r, asm_input, .none); + if (render_colons[1]) |colon2| { + if (asm_node.inputs.len != 0) { + try renderToken(r, colon2, .space); + try ais.forcePushIndent(.normal); + + const final = asm_node.inputs.len - 1; + for (asm_node.inputs[0..final], 0..) |asm_input, i| { + try renderAsmInput(r, asm_input, .none); + + const next_start = tree.firstToken(asm_node.inputs[i + 1]); + try renderToken(r, next_start - 1, .newline); // , + try renderExtraNewlineToken(r, next_start); + } - const first_token = tree.firstToken(next_asm_input); - try renderToken(r, first_token - 1, .newline); // , - try renderExtraNewlineToken(r, first_token); - } else if (asm_node.ast.clobbers == .none) { try ais.pushSpace(.comma); - try renderAsmInput(r, asm_input, .comma); + try renderAsmInput(r, asm_node.inputs[final], .comma); ais.popSpace(); ais.popIndent(); - ais.setIndentDelta(indent_delta); - ais.popIndent(); - return renderToken(r, asm_node.ast.rparen, space); // rparen } else { - try ais.pushSpace(.comma); - try renderAsmInput(r, asm_input, .comma); - ais.popSpace(); - const comma_or_colon = tree.lastToken(asm_input) + 1; - ais.popIndent(); - break :colon3 switch (tree.tokenTag(comma_or_colon)) { - .comma => comma_or_colon + 1, - else => comma_or_colon, - }; + try renderToken(r, colon2, .newline); } - } - unreachable; - }; + } else break :rendered; - try renderToken(r, colon3, .maybe_space); // : - const clobbers = asm_node.ast.clobbers.unwrap().?; - try renderExpression(r, clobbers, .none); - ais.forceLastIndent(); // Might have been dedented by a multiline string literal - ais.setIndentDelta(indent_delta); + if (render_colons[2]) |colon3| { + const clobbers = asm_node.ast.clobbers.unwrap().?; + try renderToken(r, colon3, .maybe_space); + try renderExpression(r, clobbers, .none); + ais.forceLastIndent(); // Might have been dedented by a multiline string literal + } + } + + ais.setIndentDelta(prev_indent_delta); ais.popIndent(); return renderToken(r, asm_node.ast.rparen, space); // rparen } @@ -3324,6 +3051,7 @@ fn renderComments(r: *Render, start: usize, end: usize) Error!bool { // Write with the canonical single space. try ais.underlying_writer.writeAll("// zig fmt: on\n"); ais.disabled_offset = null; + ais.resetLine(); } else if (ais.disabled_offset == null and mem.eql(u8, comment_content, "zig fmt: off")) { // Write with the canonical single space. try ais.writeAll("// zig fmt: off\n"); diff --git a/lib/std/zig/AstSmith.zig b/lib/std/zig/AstSmith.zig new file mode 100644 index 0000000000..e849ed21d0 --- /dev/null +++ b/lib/std/zig/AstSmith.zig @@ -0,0 +1,2602 @@ +//! Generates a valid AST and corresponding source. +//! +//! This is based directly off grammer.peg + +const std = @import("../std.zig"); +const assert = std.debug.assert; +const Token = std.zig.Token; +const Smith = std.testing.Smith; +const Weight = Smith.Weight; +const AstSmith = @This(); + +smith: *Smith, + +source_buf: [16384]u8, +source_len: usize, + +token_tag_buf: [2048]Token.Tag, +token_start_buf: [2048]std.zig.Ast.ByteOffset, +tokens_len: usize, + +/// For `.asterisk`, this also includes `.asterisk2` +not_token: ?Token.Tag, +not_token_comptime: bool, +/// ExprSuffix +/// <- KEYWORD_or +/// / KEYWORD_and +/// / CompareOp +/// / BitwiseOp +/// / BitShiftOp +/// / AdditionOp +/// / MultiplyOp +/// / EXCLAMATIONMARK +/// / SuffixOp +/// / FnCallArguments +not_expr_suffix: bool, +/// LabelableExpr +/// <- Block +/// / SwitchExpr +/// / LoopExpr +not_labelable_expr: ?enum { colon, expr }, +not_label: bool, +not_break_label: bool, +not_block_expr: bool, +not_expr_statement: bool, + +prev_ids_buf: [256]struct { start: u16, len: u16 }, +/// This may be larger than `prev_ids` in which case, +/// x % prev_ids.len = next index +/// @min(x, prev_ids) = length +prev_ids_len: usize, + +/// `generate` must be called on the returned value before any other methods +pub fn init(smith: *Smith) AstSmith { + return .{ + .smith = smith, + + .source_buf = undefined, + .source_len = 0, + + .token_tag_buf = undefined, + .token_start_buf = undefined, + .tokens_len = 0, + + .not_token = null, + .not_token_comptime = false, + .not_expr_suffix = false, + .not_labelable_expr = null, + .not_label = false, + .not_break_label = false, + .not_block_expr = false, + .not_expr_statement = false, + + .prev_ids_buf = undefined, + .prev_ids_len = 0, + }; +} + +pub fn source(t: *AstSmith) [:0]u8 { + return t.source_buf[0..t.source_len :0]; +} + +/// The Slice is not backed by a MultiArrayList, so calling deinit or toMultiArrayList is illegal. +pub fn tokens(t: *AstSmith) std.zig.Ast.TokenList.Slice { + var slice: std.zig.Ast.TokenList.Slice = .{ + .ptrs = undefined, + .len = t.tokens_len, + .capacity = t.tokens_len, + }; + comptime assert(slice.ptrs.len == 2); + slice.ptrs[@intFromEnum(std.zig.Ast.TokenList.Field.tag)] = @ptrCast(&t.token_tag_buf); + slice.ptrs[@intFromEnum(std.zig.Ast.TokenList.Field.start)] = @ptrCast(&t.token_start_buf); + return slice; +} + +pub const Error = error{ OutOfMemory, SkipZigTest }; +const SourceError = error{SkipZigTest}; + +pub fn generate(a: *AstSmith, gpa: std.mem.Allocator) Error!std.zig.Ast { + try a.generateSource(); + const ast = try std.zig.Ast.parseTokens(gpa, a.source(), a.tokens(), .zig); + assert(ast.errors.len == 0); + return ast; +} + +pub fn generateSource(a: *AstSmith) SourceError!void { + try a.pegRoot(); + try a.ensureSourceCapacity(1); + a.source_buf[a.source_len] = 0; + try a.addTokenTag(.eof); +} + +/// For choices which can introduce a variable number of expressions, this should be used to reduce +/// unbounded recursion. +// +// `inline` to propogate caller's return address +inline fn smithListItemBool(a: *AstSmith) bool { + return a.smith.boolWeighted(63, 1); +} + +/// For choices which can introduce a variable number of expressions, this should be used to reduce +/// unbounded recursion. +// +// `inline` to propogate caller's return address +inline fn smithListItemEos(a: *AstSmith) bool { + return a.smith.eosWeightedSimple(1, 63); +} + +fn sourceCapacity(a: *AstSmith) []u8 { + return a.source_buf[a.source_len..]; +} + +fn sourceCapacityLen(a: *AstSmith) usize { + return a.source_buf.len - a.source_len; +} + +fn ensureSourceCapacity(a: *AstSmith, n: usize) SourceError!void { + if (a.sourceCapacityLen() < n) return error.SkipZigTest; +} + +fn addSourceByte(a: *AstSmith, byte: u8) SourceError!void { + try a.ensureSourceCapacity(1); + a.addSourceByteAssumeCapacity(byte); +} + +fn addSourceByteAssumeCapacity(a: *AstSmith, byte: u8) void { + a.sourceCapacity()[0] = byte; + a.source_len += 1; +} + +fn addSource(a: *AstSmith, bytes: []const u8) SourceError!void { + try a.ensureSourceCapacity(bytes.len); + a.addSourceAssumeCapacity(bytes); +} + +fn addSourceAssumeCapacity(a: *AstSmith, bytes: []const u8) void { + @memcpy(a.sourceCapacity()[0..bytes.len], bytes); + a.source_len += bytes.len; +} + +fn addSourceAsSlice(a: *AstSmith, len: usize) SourceError![]u8 { + try a.ensureSourceCapacity(len); + return a.addSourceAsSliceAssumeCapacity(len); +} + +fn addSourceAsSliceAssumeCapacity(a: *AstSmith, len: usize) []u8 { + const slice = a.sourceCapacity()[0..len]; + a.source_len += len; + return slice; +} + +fn tokenCapacityLen(a: *AstSmith) usize { + return a.token_tag_buf.len - a.tokens_len; +} + +fn ensureTokenCapacity(a: *AstSmith, n: usize) SourceError!void { + if (a.tokenCapacityLen() < n) return error.SkipZigTest; +} + +fn isAlphanumeric(c: u8) bool { + return switch (c) { + '_', 'a'...'z', 'A'...'Z', '0'...'9' => true, + else => false, + }; +} + +/// For tokens starting with alphanumerics, this ensures +/// previous tokens followed by end_of_word aren't altered. +/// +/// end_of_word <- ![a-zA-Z0-9_] skip +fn preservePegEndOfWord(a: *AstSmith) SourceError!void { + if (a.source_len > 0 and isAlphanumeric(a.source_buf[a.source_len - 1])) { + try a.addSourceByte(' '); + } +} + +/// Assumes the token has not been written yet +fn addTokenTag(a: *AstSmith, tag: Token.Tag) SourceError!void { + assert(tag != a.not_token); + if (a.not_token == .asterisk) assert(tag != .asterisk_asterisk); + a.not_token = null; + + if (a.not_token_comptime) assert(tag != .keyword_comptime); + a.not_token_comptime = false; + + if (a.not_label and tag == .identifier) { + a.not_token = .colon; + } + a.not_label = false; + + if (a.not_break_label and tag == .colon) { + a.not_token = .identifier; + } + a.not_break_label = false; + + if (a.not_labelable_expr) |part| switch (part) { + .colon => a.not_labelable_expr = if (tag == .colon) .expr else null, + .expr => switch (tag) { + .l_brace => unreachable, + .keyword_inline => {}, + .keyword_for => unreachable, + .keyword_while => unreachable, + .keyword_switch => unreachable, + else => a.not_labelable_expr = null, + }, + }; + + a.not_expr_suffix = false; + a.not_block_expr = false; + a.not_expr_statement = false; + + try a.ensureTokenCapacity(1); + a.token_tag_buf[a.tokens_len] = tag; + a.token_start_buf[a.tokens_len] = @intCast(a.source_len); + a.tokens_len += 1; +} + +/// Asserts the token has a lexeme (those without have corresponding methods) +fn pegToken(a: *AstSmith, tag: Token.Tag) SourceError!void { + const lexeme = tag.lexeme().?; + + switch (lexeme[0]) { + '_', 'a'...'z', 'A'...'Z', '0'...'9' => try a.preservePegEndOfWord(), + '*' => if (a.tokens_len > 0 and a.source_buf[a.source_len - 1] == '*' and + a.token_tag_buf[a.tokens_len - 1] != .asterisk_asterisk) + { + try a.addSourceByte(' '); + }, + '.' => if (a.tokens_len > 0 and switch (a.source_buf[a.source_len - 1]) { + '.' => true, + '0'...'9', 'a'...'z', 'A'...'Z' => a.token_tag_buf[a.tokens_len - 1] == .number_literal, + else => false, + }) { + try a.addSourceByte(' '); + }, + '+', '-' => if (a.tokens_len > 0 and a.token_tag_buf[a.tokens_len - 1] == .number_literal and + switch (a.source_buf[a.source_len - 1]) { + 'e', 'E', 'p', 'P' => true, + else => false, + }) + { + // Would otherwise be tokenized as the sign of a float's exponent + // + // e.g. "0xFE" ++ "+" ++ "2" (number_literal, plus, number_literal) + try a.addSourceByte(' '); + }, + else => {}, + } + + if (isAlphanumeric(lexeme[0])) try a.preservePegEndOfWord(); + + try a.addTokenTag(tag); + try a.addSource(lexeme); + try a.pegSkip(); +} + +/// Asserts `a.source_len != 0` +fn pegTokenWhitespaceAround(a: *AstSmith, tag: Token.Tag) SourceError!void { + switch (a.source_buf[a.source_len - 1]) { + ' ', '\n' => {}, + else => try a.addSourceByte(' '), + } + try a.addTokenTag(tag); + try a.addSource(tag.lexeme().?); + switch (a.smith.value(enum { space, line_break, cr_line_break })) { + // This is not the same as 'skip' since comments are not whitespace + .space => try a.addSourceByte(' '), + .line_break => try a.addSourceByte('\n'), + .cr_line_break => try a.addSource("\r\n"), + } + try a.pegSkip(); +} + +/// Root <- skip ContainerMembers eof +fn pegRoot(a: *AstSmith) SourceError!void { + try a.pegSkip(); + try a.pegContainerMembers(); +} + +/// ContainerMembers <- container_doc_comment? ContainerDeclaration* (ContainerField COMMA)* +/// (ContainerField / ContainerDeclaration*) +fn pegContainerMembers(a: *AstSmith) SourceError!void { + if (a.smith.boolWeighted(63, 1)) { + try a.pegContainerDocComment(); + } + while (!a.smithListItemEos()) { + try a.pegContainerDeclaration(); + } + while (!a.smithListItemEos()) { + try a.pegContainerField(); + try a.pegToken(.comma); + } + if (a.smithListItemBool()) { + if (a.smith.value(bool)) { + try a.pegContainerField(); + } else while (true) { + try a.pegContainerDeclaration(); + if (a.smithListItemEos()) break; + } + } +} + +/// ContainerDeclaration <- TestDecl / ComptimeDecl / doc_comment? KEYWORD_pub? Decl +fn pegContainerDeclaration(a: *AstSmith) SourceError!void { + switch (a.smith.value(enum { TestDecl, ComptimeDecl, Decl })) { + .TestDecl => try a.pegTestDecl(), + .ComptimeDecl => try a.pegComptimeDecl(), + .Decl => { + try a.pegMaybeDocComment(); + if (a.smith.value(bool)) { + try a.pegToken(.keyword_pub); + } + try a.pegDecl(); + }, + } +} + +/// KEYWORD_test (STRINGLITERALSINGLE / IDENTIFIER)? Block +fn pegTestDecl(a: *AstSmith) SourceError!void { + try a.pegToken(.keyword_test); + switch (a.smith.value(enum { none, string, id })) { + .none => {}, + .string => try a.pegStringLiteralSingle(), + .id => try a.pegIdentifier(), + } + try a.pegBlock(); +} + +/// ComptimeDecl <- KEYWORD_comptime Block +fn pegComptimeDecl(a: *AstSmith) SourceError!void { + try a.pegToken(.keyword_comptime); + try a.pegBlock(); +} + +/// Decl +/// <- (KEYWORD_export / KEYWORD_inline / KEYWORD_noinline)? FnProto (SEMICOLON / Block) +/// / KEYWORD_extern STRINGLITERALSINGLE? FnProto SEMICOLON +/// / (KEYWORD_export / KEYWORD_extern STRINGLITERALSINGLE?)? KEYWORD_threadlocal? +/// GlobalVarDecl +fn pegDecl(a: *AstSmith) SourceError!void { + const Modifier = enum(u8) { + none, + @"export", + @"extern", + extern_library, + @"inline", + @"noinline", + }; + const is_fn = a.smith.value(bool); + const fn_modifiers = Smith.baselineWeights(Modifier); + const var_modifiers: []const Weight = &.{.rangeAtMost(Modifier, .none, .extern_library, 1)}; + const modifier = a.smith.valueWeighted(Modifier, if (is_fn) fn_modifiers else var_modifiers); + + switch (modifier) { + .none => {}, + .@"export" => try a.pegToken(.keyword_export), + .@"extern" => try a.pegToken(.keyword_extern), + .extern_library => { + try a.pegToken(.keyword_extern); + try a.pegStringLiteralSingle(); + }, + .@"inline" => try a.pegToken(.keyword_inline), + .@"noinline" => try a.pegToken(.keyword_noinline), + } + + if (is_fn) { + try a.pegFnProto(); + if (modifier == .@"extern" or modifier == .extern_library or a.smith.value(bool)) { + try a.pegToken(.semicolon); + } else { + try a.pegBlock(); + } + } else { + if (a.smith.value(bool)) try a.pegToken(.keyword_threadlocal); + try a.pegGlobalVarDecl(); + } +} + +/// FnProto <- KEYWORD_fn IDENTIFIER? LPAREN ParamDeclList RPAREN ByteAlign? AddrSpace? +/// LinkSection? CallConv? EXCLAMATIONMARK? TypeExpr !ExprSuffix +fn pegFnProto(a: *AstSmith) SourceError!void { + try a.pegToken(.keyword_fn); + if (a.smith.value(bool)) { + try a.pegIdentifier(); + } + try a.pegToken(.l_paren); + try a.pegParamDeclList(); + try a.pegToken(.r_paren); + if (a.smith.value(bool)) { + try a.pegByteAlign(); + } + if (a.smith.value(bool)) { + try a.pegAddrSpace(); + } + if (a.smith.value(bool)) { + try a.pegLinkSection(); + } + if (a.smith.value(bool)) { + try a.pegCallConv(); + } + if (a.smith.value(bool)) { + try a.pegToken(.bang); + } + try a.pegTypeExpr(); + a.not_expr_suffix = true; +} + +/// VarDeclProto <- (KEYWORD_const / KEYWORD_var) IDENTIFIER (COLON TypeExpr)? ByteAlign? +/// AddrSpace? LinkSection? +fn pegVarDeclProto(a: *AstSmith) SourceError!void { + try a.pegToken(if (a.smith.value(bool)) .keyword_var else .keyword_const); + try a.pegIdentifier(); + + if (a.smith.value(bool)) { + try a.pegToken(.colon); + try a.pegTypeExpr(); + } + + if (a.smith.value(bool)) { + try a.pegByteAlign(); + } + + if (a.smith.value(bool)) { + try a.pegAddrSpace(); + } + + if (a.smith.value(bool)) { + try a.pegLinkSection(); + } +} + +/// GlobalVarDecl <- VarDeclProto (EQUAL Expr)? SEMICOLON +fn pegGlobalVarDecl(a: *AstSmith) SourceError!void { + try a.pegVarDeclProto(); + if (a.smithListItemBool()) { + try a.pegToken(.equal); + try a.pegExpr(); + } + try a.pegToken(.semicolon); +} + +/// ContainerField <- doc_comment? (KEYWORD_comptime / !KEYWORD_comptime) !KEYWORD_fn +/// (IDENTIFIER COLON !(IDENTIFIER COLON)) TypeExpr ByteAlign? (EQUAL Expr)? +fn pegContainerField(a: *AstSmith) SourceError!void { + try a.pegMaybeDocComment(); + if (a.smith.value(bool)) { + try a.pegToken(.keyword_comptime); + } + if (a.smith.value(bool)) { + try a.pegIdentifier(); + try a.pegToken(.colon); + } else { + a.not_token = .keyword_fn; + a.not_token_comptime = true; + a.not_label = true; + } + try a.pegTypeExpr(); + if (a.smith.value(bool)) { + try a.pegByteAlign(); + } + if (a.smith.value(bool)) { + try a.pegToken(.equal); + try a.pegExpr(); + } +} + +/// BlockStatement +/// <- Statement +/// / KEYWORD_defer BlockExprStatement +/// / KEYWORD_errdefer Payload? BlockExprStatement +/// / !ExprStatement (KEYWORD_comptime !BlockExpr)? VarAssignStatement +fn pegBlockStatement(a: *AstSmith) SourceError!void { + const Kind = enum { + statement, + defer_statement, + errdefer_statement, + var_assign, + comptime_var_assign, + }; + const weights = Smith.baselineWeights(Kind) ++ &[1]Weight{.value(Kind, .statement, 4)}; + switch (a.smith.valueWeighted(Kind, weights)) { + .statement => try a.pegStatement(), + .defer_statement, .errdefer_statement => |kind| { + try a.pegToken(switch (kind) { + .defer_statement => .keyword_defer, + .errdefer_statement => .keyword_errdefer, + else => unreachable, + }); + try a.pegBlockExprStatement(); + }, + .var_assign, .comptime_var_assign => |kind| { + a.not_expr_statement = true; + if (kind == .comptime_var_assign) { + try a.pegToken(.keyword_comptime); + a.not_block_expr = true; + } + try a.pegVarAssignStatement(); + }, + } +} + +/// Statement +/// <- ExprStatement +/// / KEYWORD_suspend BlockExprStatement +/// / !ExprStatement (KEYWORD_comptime !BlockExpr)? AssignExpr SEMICOLON +/// +/// ExprStatement +/// <- IfStatement +/// / LabeledStatement +/// / KEYWORD_nosuspend BlockExprStatement +/// / KEYWORD_comptime BlockExpr +fn pegStatement(a: *AstSmith) SourceError!void { + switch (a.smith.value(enum { + if_statement, + labeled_statement, + comptime_block_expr, + + nosuspend_statement, + suspend_statement, + assign_expr, + comptime_assign_expr, + })) { + .if_statement => try a.pegIfStatement(), + .labeled_statement => try a.pegLabeledStatement(), + .comptime_block_expr => { + try a.pegToken(.keyword_comptime); + try a.pegBlockExpr(); + }, + + .nosuspend_statement, + .suspend_statement, + => |kind| { + try a.pegToken(switch (kind) { + .nosuspend_statement => .keyword_nosuspend, + .suspend_statement => .keyword_suspend, + else => unreachable, + }); + try a.pegBlockExprStatement(); + }, + .assign_expr, .comptime_assign_expr => |kind| { + a.not_expr_statement = true; + if (kind == .comptime_assign_expr) { + try a.pegToken(.keyword_comptime); + a.not_block_expr = true; + } + try a.pegAssignExpr(); + try a.pegToken(.semicolon); + }, + } +} + +/// IfStatement +/// <- IfPrefix BlockExpr ( KEYWORD_else Payload? Statement )? +/// / IfPrefix !BlockExpr AssignExpr ( SEMICOLON / KEYWORD_else Payload? Statement ) +fn pegIfStatement(a: *AstSmith) SourceError!void { + try a.pegIfPrefix(); + const is_assign = a.smith.value(bool); + if (!is_assign) { + try a.pegBlockExpr(); + } else { + a.not_block_expr = true; + try a.pegAssignExpr(); + } + if (a.not_token != .keyword_else and a.smithListItemBool()) { + try a.pegToken(.keyword_else); + if (a.smith.value(bool)) { + try a.pegPayload(); + } + try a.pegStatement(); + } else if (is_assign) { + try a.pegToken(.semicolon); + } else { + a.not_token = .keyword_else; + } +} + +/// LabeledStatement <- BlockLabel? (Block / LoopStatement / SwitchExpr) +fn pegLabeledStatement(a: *AstSmith) SourceError!void { + if (a.smith.value(bool)) { + try a.pegBlockLabel(); + } + switch (a.smith.value(enum { block, loop_statement, switch_expr })) { + .block => try a.pegBlock(), + .loop_statement => try a.pegLoopStatement(), + .switch_expr => try a.pegSwitchExpr(), + } +} + +/// LoopStatement <- KEYWORD_inline? (ForStatement / WhileStatement) +fn pegLoopStatement(a: *AstSmith) SourceError!void { + if (a.smith.value(bool)) { + try a.pegToken(.keyword_inline); + } + if (a.smith.value(bool)) { + try a.pegForStatement(); + } else { + try a.pegWhileStatement(); + } +} + +/// ForStatement +/// <- ForPrefix BlockExpr ( KEYWORD_else Statement / !KEYWORD_else ) +/// / ForPrefix !BlockExpr AssignExpr ( SEMICOLON / KEYWORD_else Statement ) +fn pegForStatement(a: *AstSmith) SourceError!void { + try a.pegForPrefix(); + const is_assign = a.smith.value(bool); + if (!is_assign) { + try a.pegBlockExpr(); + } else { + a.not_block_expr = true; + try a.pegAssignExpr(); + } + if (a.not_token != .keyword_else and a.smithListItemBool()) { + try a.pegToken(.keyword_else); + try a.pegStatement(); + } else if (is_assign) { + try a.pegToken(.semicolon); + } else { + a.not_token = .keyword_else; + } +} + +/// WhileStatement +/// <- WhilePrefix BlockExpr ( KEYWORD_else Payload? Statement )? +/// / WhilePrefix !BlockExpr AssignExpr ( SEMICOLON / KEYWORD_else Payload? Statement ) +fn pegWhileStatement(a: *AstSmith) SourceError!void { + try a.pegWhilePrefix(); + const is_assign = a.smith.value(bool); + if (!is_assign) { + try a.pegBlockExpr(); + } else { + a.not_block_expr = true; + try a.pegAssignExpr(); + } + if (a.not_token != .keyword_else and a.smithListItemBool()) { + try a.pegToken(.keyword_else); + if (a.smith.value(bool)) { + try a.pegPayload(); + } + try a.pegStatement(); + } else if (is_assign) { + try a.pegToken(.semicolon); + } else { + a.not_token = .keyword_else; + } +} + +/// BlockExprStatement +/// <- BlockExpr +/// / !BlockExpr AssignExpr SEMICOLON +fn pegBlockExprStatement(a: *AstSmith) SourceError!void { + if (a.smith.value(bool)) { + try a.pegBlockExpr(); + } else { + a.not_block_expr = true; + try a.pegAssignExpr(); + try a.pegToken(.semicolon); + } +} + +/// BlockExpr <- BlockLabel? Block +fn pegBlockExpr(a: *AstSmith) SourceError!void { + if (a.smith.value(bool)) { + try a.pegBlockLabel(); + } + try a.pegBlock(); +} + +/// VarAssignStatement <- (Expr / VarDeclProto) (COMMA (Expr / VarDeclProto))* EQUAL Expr SEMICOLON +fn pegVarAssignStatement(a: *AstSmith) SourceError!void { + while (true) { + if (a.smith.value(bool)) { + try a.pegVarDeclProto(); + } else { + try a.pegExpr(); + } + + if (a.smithListItemEos()) { + break; + } else { + try a.pegToken(.comma); + } + } + + try a.pegToken(.equal); + try a.pegExpr(); + try a.pegToken(.semicolon); +} + +/// AssignExpr <- Expr (AssignOp Expr / (COMMA Expr)+ EQUAL Expr)? +fn pegAssignExpr(a: *AstSmith) SourceError!void { + try a.pegExpr(); + if (a.smith.value(bool)) { + if (!a.smithListItemBool()) { + try a.pegAssignOp(); + } else { + while (true) { + try a.pegToken(.comma); + try a.pegExpr(); + if (a.smithListItemEos()) break; + } + try a.pegToken(.equal); + } + try a.pegExpr(); + } +} + +/// SingleAssignExpr <- Expr (AssignOp Expr)? +fn pegSingleAssignExpr(a: *AstSmith) SourceError!void { + try a.pegExpr(); + if (a.smith.value(bool)) { + try a.pegAssignOp(); + try a.pegExpr(); + } +} + +/// Expr <- BoolOrExpr +const pegExpr = pegBoolOrExpr; + +/// BoolOrExpr <- BoolAndExpr (KEYWORD_or BoolAndExpr)* +fn pegBoolOrExpr(a: *AstSmith) SourceError!void { + try a.pegBoolAndExpr(); + while (!a.not_expr_suffix and !a.smithListItemEos()) { + try a.pegTokenWhitespaceAround(.keyword_or); + try a.pegBoolAndExpr(); + } +} + +/// BoolAndExpr <- CompareExpr (KEYWORD_and CompareExpr)* +fn pegBoolAndExpr(a: *AstSmith) SourceError!void { + try a.pegCompareExpr(); + while (!a.not_expr_suffix and !a.smithListItemEos()) { + try a.pegTokenWhitespaceAround(.keyword_and); + try a.pegCompareExpr(); + } +} + +/// CompareExpr <- BitwiseExpr (CompareOp BitwiseExpr)? +fn pegCompareExpr(a: *AstSmith) SourceError!void { + try a.pegBitwiseExpr(); + if (!a.not_expr_suffix and a.smithListItemBool()) { + try a.pegCompareOp(); + try a.pegBitwiseExpr(); + } +} + +/// BitwiseExpr <- BitShiftExpr (BitwiseOp BitShiftExpr)* +fn pegBitwiseExpr(a: *AstSmith) SourceError!void { + try a.pegBitShiftExpr(); + while (!a.not_expr_suffix and !a.smithListItemEos()) { + try a.pegBitwiseOp(); + try a.pegBitShiftExpr(); + } +} + +/// BitShiftExpr <- AdditionExpr (BitShiftOp AdditionExpr)* +fn pegBitShiftExpr(a: *AstSmith) SourceError!void { + try a.pegAdditionExpr(); + while (!a.not_expr_suffix and !a.smithListItemEos()) { + try a.pegBitShiftOp(); + try a.pegAdditionExpr(); + } +} + +/// AdditionExpr <- MultiplyExpr (AdditionOp MultiplyExpr)* +fn pegAdditionExpr(a: *AstSmith) SourceError!void { + try a.pegMultiplyExpr(); + while (!a.not_expr_suffix and !a.smithListItemEos()) { + try a.pegAdditionOp(); + try a.pegMultiplyExpr(); + } +} + +/// MultiplyExpr <- PrefixExpr (MultiplyOp PrefixExpr)* +fn pegMultiplyExpr(a: *AstSmith) SourceError!void { + try a.pegPrefixExpr(); + while (!a.not_expr_suffix and !a.smithListItemEos()) { + try a.pegMultiplyOp(); + try a.pegPrefixExpr(); + } +} + +/// PrefixExpr <- PrefixOp* PrimaryExpr +fn pegPrefixExpr(a: *AstSmith) SourceError!void { + while (!a.smithListItemEos()) { + try a.pegPrefixOp(); + } + try a.pegPrimaryExpr(); +} + +/// PrimaryExpr +/// <- AsmExpr +/// / IfExpr +/// / KEYWORD_break (BreakLabel / !BreakLabel) (Expr !ExprSuffix / !SinglePtrTypeStart) +/// / KEYWORD_comptime Expr !ExprSuffix +/// / KEYWORD_nosuspend Expr !ExprSuffix +/// / KEYWORD_continue (BreakLabel / !BreakLabel) (Expr !ExprSuffix / !SinglePtrTypeStart) +/// / KEYWORD_resume Expr !ExprSuffix +/// / KEYWORD_return (Expr !ExprSuffix / !SinglePtrTypeStart) +/// / BlockLabel? LoopExpr +/// / Block +/// / CurlySuffixExpr +fn pegPrimaryExpr(a: *AstSmith) SourceError!void { + const Kind = enum(u8) { + curly_suffix_expr, + @"return", + @"continue", + @"break", + block, + asm_expr, + // Always contain more expressions + if_expr, + loop_expr, + @"resume", + @"comptime", + @"nosuspend", + }; + + switch (a.smith.valueWeighted(Kind, &.{ + .value(Kind, .curly_suffix_expr, 75), + .rangeAtMost(Kind, .@"return", .asm_expr, 4), + .rangeAtMost(Kind, .if_expr, .@"nosuspend", 1), + })) { + .curly_suffix_expr => try a.pegCurlySuffixExpr(), + + .block => if (a.not_labelable_expr != .expr and !a.not_block_expr and !a.not_expr_statement) { + try a.pegBlock(); + } else { + // Group + try a.pegToken(.l_paren); + try a.pegBlock(); + try a.pegToken(.r_paren); + }, + .asm_expr => try a.pegAsmExpr(), + .if_expr => if (!a.not_expr_statement) { + try a.pegIfExpr(); + } else { + // Group + try a.pegToken(.l_paren); + try a.pegIfExpr(); + try a.pegToken(.r_paren); + }, + .loop_expr => { + const group = a.not_labelable_expr == .expr or a.not_expr_statement; + if (group) try a.pegToken(.l_paren); + if (!a.not_label and a.not_token != .identifier and a.smith.value(bool)) { + try a.pegBlockLabel(); + } + try a.pegLoopExpr(); + if (group) try a.pegToken(.r_paren); + }, + + .@"return", + .@"comptime", + .@"nosuspend", + .@"resume", + .@"break", + .@"continue", + => |t| { + const group = a.not_expr_statement and (t == .@"nosuspend" or t == .@"comptime"); + if (group) try a.pegToken(.l_paren); + + const kw: Token.Tag, const label, const expr = switch (t) { + .@"return" => .{ .keyword_return, false, a.smithListItemBool() }, + .@"comptime" => .{ .keyword_comptime, false, true }, + .@"nosuspend" => .{ .keyword_nosuspend, false, true }, + .@"resume" => .{ .keyword_resume, false, true }, + .@"break" => .{ .keyword_break, a.smith.value(bool), a.smithListItemBool() }, + .@"continue" => .{ .keyword_continue, a.smith.value(bool), a.smithListItemBool() }, + else => unreachable, + }; + try a.pegToken(kw); + if (label) { + try a.pegBreakLabel(); + } else { + a.not_break_label = true; + } + if (expr) { + try a.pegExpr(); + a.not_expr_suffix = true; + } else { + a.not_token = .asterisk; + } + + if (group) try a.pegToken(.r_paren); + }, + } +} + +/// IfExpr <- IfPrefix Expr (KEYWORD_else Payload? Expr)? !ExprSuffix +fn pegIfExpr(a: *AstSmith) SourceError!void { + try a.pegIfPrefix(); + try a.pegExpr(); + const Else = enum { none, @"else", else_payload }; + switch (if (a.not_token != .keyword_else) a.smith.value(Else) else .none) { + .none => a.not_token = .keyword_else, + .@"else" => { + try a.pegToken(.keyword_else); + try a.pegExpr(); + }, + .else_payload => { + try a.pegToken(.keyword_else); + try a.pegPayload(); + try a.pegExpr(); + }, + } + a.not_expr_suffix = true; +} + +/// Block <- LBRACE Statement* RBRACE +fn pegBlock(a: *AstSmith) SourceError!void { + try a.pegToken(.l_brace); + while (!a.smithListItemEos()) { + try a.pegBlockStatement(); + } + try a.pegToken(.r_brace); +} + +/// LoopExpr <- KEYWORD_inline? (ForExpr / WhileExpr) +fn pegLoopExpr(a: *AstSmith) SourceError!void { + if (a.smith.value(bool)) { + try a.pegToken(.keyword_inline); + } + + if (a.smith.value(bool)) { + try a.pegForExpr(); + } else { + try a.pegWhileExpr(); + } +} + +/// ForExpr <- ForPrefix Expr (KEYWORD_else Expr / !KEYWORD_else) !ExprSuffix +fn pegForExpr(a: *AstSmith) SourceError!void { + try a.pegForPrefix(); + try a.pegExpr(); + if (a.not_token != .keyword_else and a.smith.value(bool)) { + try a.pegToken(.keyword_else); + try a.pegExpr(); + } else { + a.not_token = .keyword_else; + } + a.not_expr_suffix = true; +} + +/// WhileExpr <- WhilePrefix Expr (KEYWORD_else Payload? Expr)? !ExprSuffix +fn pegWhileExpr(a: *AstSmith) SourceError!void { + try a.pegWhilePrefix(); + try a.pegExpr(); + const Else = enum { none, @"else", else_payload }; + switch (if (a.not_token != .keyword_else) a.smith.value(Else) else .none) { + .none => a.not_token = .keyword_else, + .@"else" => { + try a.pegToken(.keyword_else); + try a.pegExpr(); + }, + .else_payload => { + try a.pegToken(.keyword_else); + try a.pegPayload(); + try a.pegExpr(); + }, + } + a.not_expr_suffix = true; +} + +/// CurlySuffixExpr <- TypeExpr InitList? +fn pegCurlySuffixExpr(a: *AstSmith) SourceError!void { + try a.pegTypeExpr(); + if (!a.not_expr_suffix and a.smith.value(bool)) { + try a.pegInitList(); + } +} + +/// InitList +/// <- LBRACE FieldInit (COMMA FieldInit)* COMMA? RBRACE +/// / LBRACE Expr (COMMA Expr)* COMMA? RBRACE +/// / LBRACE RBRACE +fn pegInitList(a: *AstSmith) SourceError!void { + try a.pegToken(.l_brace); + if (a.smithListItemBool()) { + if (a.smith.value(bool)) { + try a.pegFieldInit(); + while (!a.smithListItemEos()) { + try a.pegToken(.comma); + try a.pegFieldInit(); + } + } else { + try a.pegExpr(); + while (!a.smithListItemEos()) { + try a.pegToken(.comma); + try a.pegExpr(); + } + } + if (a.smith.value(bool)) { + try a.pegToken(.comma); + } + } + try a.pegToken(.r_brace); +} + +/// PrefixTypeOp* ErrorUnionExpr +fn pegTypeExpr(a: *AstSmith) SourceError!void { + while (!a.smithListItemEos()) { + try a.pegPrefixTypeOp(); + } + try a.pegErrorUnionExpr(); +} + +/// ErrorUnionExpr <- SuffixExpr (EXCLAMATIONMARK TypeExpr)? +fn pegErrorUnionExpr(a: *AstSmith) SourceError!void { + try a.pegSuffixExpr(); + if (!a.not_expr_suffix and a.smithListItemBool()) { + try a.pegToken(.bang); + try a.pegTypeExpr(); + } +} + +/// SuffixExpr +/// <- PrimaryTypeExpr (SuffixOp / FnCallArguments)* +fn pegSuffixExpr(a: *AstSmith) SourceError!void { + try a.pegPrimaryTypeExpr(); + while (!a.not_expr_suffix and !a.smithListItemEos()) { + if (a.smith.value(bool)) { + try a.pegSuffixOp(); + } else { + try a.pegFnCallArguments(); + } + } +} + +/// PrimaryTypeExpr +/// <- BUILTINIDENTIFIER FnCallArguments +/// / CHAR_LITERAL +/// / ContainerDecl +/// / DOT IDENTIFIER +/// / DOT InitList +/// / ErrorSetDecl +/// / FLOAT +/// / FnProto +/// / GroupedExpr +/// / LabeledTypeExpr +/// / IDENTIFIER !(COLON LabelableExpr) +/// / IfTypeExpr +/// / INTEGER +/// / KEYWORD_comptime TypeExpr !ExprSuffix +/// / KEYWORD_error DOT IDENTIFIER +/// / KEYWORD_anyframe +/// / KEYWORD_unreachable +/// / STRINGLITERAL +fn pegPrimaryTypeExpr(a: *AstSmith) SourceError!void { + const Kind = enum(u8) { + identifier, + float, + integer, + char_literal, + string_literal, + enum_literal, + error_literal, + unreachable_type, + anyframe_type, + + // Containing zero or more expressions + builtin_call, + array_literal, + container_decl, + fn_proto, + error_set, + + // Containing one or more epressions + grouped, + labeled_type_expr, + if_type_expr, + comptime_expr, + }; + + switch (a.smith.valueWeighted(Kind, &.{ + .rangeAtMost(Kind, .identifier, .anyframe_type, 5), + .rangeAtMost(Kind, .builtin_call, .error_set, 2), + .rangeAtMost(Kind, .grouped, .comptime_expr, 1), + })) { + .identifier => if (a.not_token != .identifier) { + try a.pegIdentifier(); + a.not_labelable_expr = .colon; + } else { + // Group + try a.pegToken(.l_paren); + try a.pegIdentifier(); + try a.pegToken(.r_paren); + }, + .float => try a.pegFloat(), + .integer => try a.pegInteger(), + .char_literal => try a.pegCharLiteral(), + .string_literal => try a.pegStringLiteral(), + .enum_literal => { + try a.pegToken(.period); + try a.pegIdentifier(); + }, + .error_literal => { + try a.pegToken(.keyword_error); + try a.pegToken(.period); + try a.pegIdentifier(); + }, + .unreachable_type => try a.pegToken(.keyword_unreachable), + .anyframe_type => try a.pegToken(.keyword_anyframe), + + .builtin_call => { + try a.pegBuiltinIdentifier(); + try a.pegFnCallArguments(); + }, + .array_literal => { + try a.pegToken(.period); + try a.pegInitList(); + }, + .container_decl => try a.pegContainerDecl(), + .fn_proto => if (a.not_token != .keyword_fn) { + try a.pegFnProto(); + } else { + // Group + try a.pegToken(.l_paren); + try a.pegFnProto(); + try a.pegToken(.r_paren); + }, + .error_set => try a.pegErrorSetDecl(), + + .grouped => try a.pegGroupedExpr(), + .labeled_type_expr => try a.pegLabeledTypeExpr(), + .if_type_expr => if (!a.not_expr_statement) { + try a.pegIfTypeExpr(); + } else { + // Group + try a.pegToken(.l_paren); + try a.pegIfTypeExpr(); + try a.pegToken(.r_paren); + }, + .comptime_expr => if (!a.not_token_comptime and !a.not_expr_statement) { + try a.pegToken(.keyword_comptime); + try a.pegTypeExpr(); + } else { + // Group + try a.pegToken(.l_paren); + try a.pegToken(.keyword_comptime); + try a.pegTypeExpr(); + try a.pegToken(.r_paren); + }, + } +} + +/// ContainerDecl <- (KEYWORD_extern / KEYWORD_packed)? ContainerDeclAuto +fn pegContainerDecl(a: *AstSmith) SourceError!void { + switch (a.smith.value(enum { auto, @"extern", @"packed" })) { + .auto => {}, + .@"extern" => try a.pegToken(.keyword_extern), + .@"packed" => try a.pegToken(.keyword_packed), + } + try a.pegContainerDeclAuto(); +} + +/// ErrorSetDecl <- KEYWORD_error LBRACE IdentifierList RBRACE +fn pegErrorSetDecl(a: *AstSmith) SourceError!void { + try a.pegToken(.keyword_error); + try a.pegToken(.l_brace); + try a.pegIdentifierList(); + try a.pegToken(.r_brace); +} + +/// GroupedExpr <- LPAREN Expr RPAREN +fn pegGroupedExpr(a: *AstSmith) SourceError!void { + try a.pegToken(.l_paren); + try a.pegExpr(); + try a.pegToken(.r_paren); +} + +/// IfTypeExpr <- IfPrefix TypeExpr (KEYWORD_else Payload? TypeExpr)? !ExprSuffix +fn pegIfTypeExpr(a: *AstSmith) SourceError!void { + try a.pegIfPrefix(); + try a.pegTypeExpr(); + const Else = enum { none, @"else", else_payload }; + switch (if (a.not_token != .keyword_else) a.smith.value(Else) else .none) { + .none => a.not_token = .keyword_else, + .@"else" => { + try a.pegToken(.keyword_else); + try a.pegTypeExpr(); + }, + .else_payload => { + try a.pegToken(.keyword_else); + try a.pegPayload(); + try a.pegTypeExpr(); + }, + } + a.not_expr_suffix = true; +} + +/// LabeledTypeExpr +/// <- BlockLabel Block +/// / BlockLabel? LoopTypeExpr +/// / BlockLabel? SwitchExpr +fn pegLabeledTypeExpr(a: *AstSmith) SourceError!void { + const kind = a.smith.value(enum { block, loop, @"switch" }); + const not_any = a.not_labelable_expr == .expr or a.not_expr_statement; + const no_label = a.not_label or a.not_token == .identifier; + const no_block = no_label or a.not_block_expr; + const group = not_any or (kind == .block and no_block); + if (group) try a.pegToken(.l_paren); + + switch (kind) { + .block => { + try a.pegBlockLabel(); + try a.pegBlock(); + }, + .loop => { + if (!no_label and a.smith.value(bool)) { + try a.pegBlockLabel(); + } + try a.pegLoopTypeExpr(); + }, + .@"switch" => { + if (!no_label and a.smith.value(bool)) { + try a.pegBlockLabel(); + } + try a.pegSwitchExpr(); + }, + } + + if (group) try a.pegToken(.r_paren); +} + +/// LoopTypeExpr <- KEYWORD_inline? (ForTypeExpr / WhileTypeExpr) +fn pegLoopTypeExpr(a: *AstSmith) SourceError!void { + if (a.smith.value(bool)) { + try a.pegToken(.keyword_inline); + } + + if (a.smith.value(bool)) { + try a.pegForTypeExpr(); + } else { + try a.pegWhileTypeExpr(); + } +} + +/// ForTypeExpr <- ForPrefix TypeExpr (KEYWORD_else TypeExpr / !KEYWORD_else) !ExprSuffix +fn pegForTypeExpr(a: *AstSmith) SourceError!void { + try a.pegForPrefix(); + try a.pegTypeExpr(); + if (a.not_token != .keyword_else and a.smith.value(bool)) { + try a.pegToken(.keyword_else); + try a.pegTypeExpr(); + } else { + a.not_token = .keyword_else; + } + a.not_expr_suffix = true; +} + +/// WhileTypeExpr <- WhilePrefix TypeExpr (KEYWORD_else Payload? TypeExpr)? !ExprSuffix +fn pegWhileTypeExpr(a: *AstSmith) SourceError!void { + try a.pegWhilePrefix(); + try a.pegTypeExpr(); + const Else = enum { none, @"else", else_payload }; + switch (if (a.not_token != .keyword_else) a.smith.value(Else) else .none) { + .none => a.not_token = .keyword_else, + .@"else" => { + try a.pegToken(.keyword_else); + try a.pegTypeExpr(); + }, + .else_payload => { + try a.pegToken(.keyword_else); + try a.pegPayload(); + try a.pegTypeExpr(); + }, + } + a.not_expr_suffix = true; +} + +/// SwitchExpr <- KEYWORD_switch LPAREN Expr RPAREN LBRACE SwitchProngList RBRACE +fn pegSwitchExpr(a: *AstSmith) SourceError!void { + try a.pegToken(.keyword_switch); + try a.pegToken(.l_paren); + try a.pegExpr(); + try a.pegToken(.r_paren); + + try a.pegToken(.l_brace); + try a.pegSwitchProngList(); + try a.pegToken(.r_brace); +} + +/// AsmExpr <- KEYWORD_asm KEYWORD_volatile? LPAREN Expr AsmOutput? RPAREN +fn pegAsmExpr(a: *AstSmith) SourceError!void { + try a.pegToken(.keyword_asm); + if (a.smith.value(bool)) { + try a.pegToken(.keyword_volatile); + } + try a.pegToken(.l_paren); + try a.pegExpr(); + if (a.smith.value(bool)) { + try a.pegAsmOutput(); + } + try a.pegToken(.r_paren); +} + +/// AsmOutput <- COLON AsmOutputList AsmInput? +fn pegAsmOutput(a: *AstSmith) SourceError!void { + try a.pegToken(.colon); + try a.pegAsmOutputList(); + if (a.smith.value(bool)) { + try a.pegAsmInput(); + } +} + +/// AsmOutputItem <- LBRACKET IDENTIFIER RBRACKET STRINGLITERALSINGLE LPAREN (MINUSRARROW TypeExpr / IDENTIFIER) RPAREN +fn pegAsmOutputItem(a: *AstSmith) SourceError!void { + try a.pegToken(.l_bracket); + try a.pegIdentifier(); + try a.pegToken(.r_bracket); + try a.pegStringLiteralSingle(); + try a.pegToken(.l_paren); + if (a.smith.value(bool)) { + try a.pegToken(.arrow); + try a.pegTypeExpr(); + } else { + try a.pegIdentifier(); + } + try a.pegToken(.r_paren); +} + +/// AsmInput <- COLON AsmInputList AsmClobbers? +fn pegAsmInput(a: *AstSmith) SourceError!void { + try a.pegToken(.colon); + try a.pegAsmInputList(); + if (a.smith.value(bool)) { + try a.pegAsmClobbers(); + } +} + +/// AsmInputItem <- LBRACKET IDENTIFIER RBRACKET STRINGLITERALSINGLE LPAREN Expr RPAREN +fn pegAsmInputItem(a: *AstSmith) SourceError!void { + try a.pegToken(.l_bracket); + try a.pegIdentifier(); + try a.pegToken(.r_bracket); + try a.pegStringLiteralSingle(); + try a.pegToken(.l_paren); + try a.pegExpr(); + try a.pegToken(.r_paren); +} + +/// AsmClobbers <- COLON Expr +fn pegAsmClobbers(a: *AstSmith) SourceError!void { + try a.pegToken(.colon); + try a.pegExpr(); +} + +/// BreakLabel <- COLON IDENTIFIER +fn pegBreakLabel(a: *AstSmith) SourceError!void { + try a.pegToken(.colon); + try a.pegIdentifier(); +} + +/// BlockLabel <- IDENTIFIER COLON +fn pegBlockLabel(a: *AstSmith) SourceError!void { + try a.pegIdentifier(); + try a.pegToken(.colon); +} + +/// FieldInit <- DOT IDENTIFIER EQUAL Expr +fn pegFieldInit(a: *AstSmith) SourceError!void { + try a.pegToken(.period); + try a.pegIdentifier(); + try a.pegToken(.equal); + try a.pegExpr(); +} + +/// WhileContinueExpr <- COLON LPAREN AssignExpr RPAREN +fn pegWhileContinueExpr(a: *AstSmith) SourceError!void { + try a.pegToken(.colon); + try a.pegToken(.l_paren); + try a.pegAssignExpr(); + try a.pegToken(.r_paren); +} + +/// LinkSection <- KEYWORD_linksection LPAREN Expr RPAREN +fn pegLinkSection(a: *AstSmith) SourceError!void { + try a.pegToken(.keyword_linksection); + try a.pegToken(.l_paren); + try a.pegExpr(); + try a.pegToken(.r_paren); +} + +/// AddrSpace <- KEYWORD_addrspace LPAREN Expr RPAREN +fn pegAddrSpace(a: *AstSmith) SourceError!void { + try a.pegToken(.keyword_addrspace); + try a.pegToken(.l_paren); + try a.pegExpr(); + try a.pegToken(.r_paren); +} + +/// CallConv <- KEYWORD_callconv LPAREN Expr RPAREN +fn pegCallConv(a: *AstSmith) SourceError!void { + try a.pegToken(.keyword_callconv); + try a.pegToken(.l_paren); + try a.pegExpr(); + try a.pegToken(.r_paren); +} + +/// ParamDecl <- doc_comment? (KEYWORD_noalias / KEYWORD_comptime)? +/// ((IDENTIFIER COLON) / !KEYWORD_comptime !(IDENTIFIER COLON)) +/// ParamType +fn pegParamDecl(a: *AstSmith) SourceError!void { + try a.pegMaybeDocComment(); + const modifier = a.smith.value(enum { none, @"noalias", @"comptime" }); + switch (modifier) { + .none => a.not_token_comptime = true, + .@"noalias" => try a.pegToken(.keyword_noalias), + .@"comptime" => try a.pegToken(.keyword_comptime), + } + if (a.smith.value(bool)) { + try a.pegIdentifier(); + try a.pegToken(.colon); + } else { + a.not_label = true; + } + try a.pegParamType(); +} + +/// ParamType +/// <- KEYWORD_anytype +/// / TypeExpr +fn pegParamType(a: *AstSmith) SourceError!void { + if (a.smith.value(bool)) { + try a.pegToken(.keyword_anytype); + } else { + try a.pegTypeExpr(); + } +} + +/// IfPrefix <- KEYWORD_if LPAREN Expr RPAREN PtrPayload? +fn pegIfPrefix(a: *AstSmith) SourceError!void { + try a.pegToken(.keyword_if); + try a.pegToken(.l_paren); + try a.pegExpr(); + try a.pegToken(.r_paren); + try a.pegPtrPayload(); +} + +/// WhilePrefix <- KEYWORD_while LPAREN Expr RPAREN PtrPayload? WhileContinueExpr? +fn pegWhilePrefix(a: *AstSmith) SourceError!void { + try a.pegToken(.keyword_while); + try a.pegToken(.l_paren); + try a.pegExpr(); + try a.pegToken(.r_paren); + + if (a.smith.value(bool)) { + try a.pegPtrPayload(); + } + + if (a.smith.value(bool)) { + try a.pegWhileContinueExpr(); + } +} + +/// ForPrefix <- KEYWORD_for LPAREN ForArgumentsList RPAREN PtrListPayload +/// +/// An additional requirement checked in the Parser is that the number of +/// arguments and payload elements are the same. +fn pegForPrefix(a: *AstSmith) SourceError!void { + try a.pegToken(.keyword_for); + try a.pegToken(.l_paren); + const n = try a.pegForArgumentsList(); + try a.pegToken(.r_paren); + try a.pegPtrListPayload(n); +} + +/// Payload <- PIPE IDENTIFIER PIPE +fn pegPayload(a: *AstSmith) SourceError!void { + try a.pegToken(.pipe); + try a.pegIdentifier(); + try a.pegToken(.pipe); +} + +/// PtrPayload <- PIPE ASTERISK? IDENTIFIER PIPE +fn pegPtrPayload(a: *AstSmith) SourceError!void { + try a.pegToken(.pipe); + if (a.smith.value(bool)) { + try a.pegToken(.asterisk); + } + try a.pegIdentifier(); + try a.pegToken(.pipe); +} + +/// PtrIndexPayload <- PIPE ASTERISK? IDENTIFIER (COMMA IDENTIFIER)? PIPE +fn pegPtrIndexPayload(a: *AstSmith) SourceError!void { + try a.pegToken(.pipe); + if (a.smith.value(bool)) { + try a.pegToken(.asterisk); + } + try a.pegIdentifier(); + if (a.smith.value(bool)) { + try a.pegToken(.comma); + try a.pegIdentifier(); + } + try a.pegToken(.pipe); +} + +/// PtrListPayload <- PIPE ASTERISK? IDENTIFIER (COMMA ASTERISK? IDENTIFIER)* COMMA? PIPE +fn pegPtrListPayload(a: *AstSmith, n: usize) SourceError!void { + try a.pegToken(.pipe); + if (a.smith.value(bool)) { + try a.pegToken(.asterisk); + } + try a.pegIdentifier(); + + for (1..n) |_| { + try a.pegToken(.comma); + if (a.smith.value(bool)) { + try a.pegToken(.asterisk); + } + try a.pegIdentifier(); + } + + if (a.smith.value(bool)) { + try a.pegToken(.comma); + } + try a.pegToken(.pipe); +} + +/// SwitchProng <- KEYWORD_inline? SwitchCase EQUALRARROW PtrIndexPayload? SingleAssignExpr +fn pegSwitchProng(a: *AstSmith) SourceError!void { + if (a.smith.value(bool)) { + try a.pegToken(.keyword_inline); + } + try a.pegSwitchCase(); + try a.pegToken(.equal_angle_bracket_right); + if (a.smith.value(bool)) { + try a.pegPtrIndexPayload(); + } + try a.pegSingleAssignExpr(); +} + +/// SwitchCase +/// <- SwitchItem (COMMA SwitchItem)* COMMA? +/// / KEYWORD_else +fn pegSwitchCase(a: *AstSmith) SourceError!void { + if (a.smith.value(bool)) { + try a.pegSwitchItem(); + while (!a.smithListItemEos()) { + try a.pegToken(.comma); + try a.pegSwitchItem(); + } + if (a.smith.value(bool)) { + try a.pegToken(.comma); + } + } else { + try a.pegToken(.keyword_else); + } +} + +/// SwitchItem <- Expr (DOT3 Expr)? +fn pegSwitchItem(a: *AstSmith) SourceError!void { + try a.pegExpr(); + if (a.smith.value(bool)) { + try a.pegToken(.ellipsis3); + try a.pegExpr(); + } +} + +/// ForArgumentsList <- ForItem (COMMA ForItem)* COMMA? +fn pegForArgumentsList(a: *AstSmith) SourceError!usize { + try a.pegForItem(); + var n: usize = 1; + while (!a.smithListItemEos()) { + try a.pegToken(.comma); + try a.pegForItem(); + n += 1; + } + if (a.smith.value(bool)) { + try a.pegToken(.comma); + } + return n; +} + +/// ForItem <- Expr (DOT2 Expr?)? +fn pegForItem(a: *AstSmith) SourceError!void { + try a.pegExpr(); + const components = a.smith.valueRangeAtMost(u2, 0, 2); + if (components >= 1) try a.pegToken(.ellipsis2); + if (components >= 2) try a.pegExpr(); +} + +/// AssignOp +/// <- ASTERISKEQUAL +/// / ASTERISKPIPEEQUAL +/// / SLASHEQUAL +/// / PERCENTEQUAL +/// / PLUSEQUAL +/// / PLUSPIPEEQUAL +/// / MINUSEQUAL +/// / MINUSPIPEEQUAL +/// / LARROW2EQUAL +/// / LARROW2PIPEEQUAL +/// / RARROW2EQUAL +/// / AMPERSANDEQUAL +/// / CARETEQUAL +/// / PIPEEQUAL +/// / ASTERISKPERCENTEQUAL +/// / PLUSPERCENTEQUAL +/// / MINUSPERCENTEQUAL +/// / EQUAL +fn pegAssignOp(a: *AstSmith) SourceError!void { + const tags = [_]Token.Tag{ + .asterisk_equal, + .asterisk_pipe_equal, + .slash_equal, + .percent_equal, + .plus_equal, + .plus_pipe_equal, + .minus_equal, + .minus_pipe_equal, + .angle_bracket_angle_bracket_left_equal, + .angle_bracket_angle_bracket_left_pipe_equal, + .angle_bracket_angle_bracket_right_equal, + .ampersand_equal, + .caret_equal, + .pipe_equal, + .asterisk_percent_equal, + .plus_percent_equal, + .minus_percent_equal, + .equal, + }; + try a.pegToken(tags[a.smith.index(tags.len)]); +} + +/// CompareOp +/// <- EQUALEQUAL +/// / EXCLAMATIONMARKEQUAL +/// / LARROW +/// / RARROW +/// / LARROWEQUAL +/// / RARROWEQUAL +fn pegCompareOp(a: *AstSmith) SourceError!void { + const tags = [_]Token.Tag{ + .equal_equal, + .bang_equal, + .angle_bracket_left, + .angle_bracket_right, + .angle_bracket_left_equal, + .angle_bracket_right_equal, + }; + try a.pegTokenWhitespaceAround(tags[a.smith.index(tags.len)]); +} + +/// BitwiseOp +/// <- AMPERSAND +/// / CARET +/// / PIPE +/// / KEYWORD_orelse +/// / KEYWORD_catch Payload? +fn pegBitwiseOp(a: *AstSmith) SourceError!void { + const tags = [_]Token.Tag{ + .ampersand, + .caret, + .pipe, + .keyword_orelse, + .keyword_catch, + }; + const tag = tags[a.smith.index(tags.len)]; + try a.pegTokenWhitespaceAround(tag); + if (tag == .keyword_catch and a.smith.value(bool)) { + try a.pegPayload(); + } +} + +/// BitShiftOp +/// <- LARROW2 +/// / RARROW2 +/// / LARROW2PIPE +fn pegBitShiftOp(a: *AstSmith) SourceError!void { + const tags = [_]Token.Tag{ + .angle_bracket_angle_bracket_left, + .angle_bracket_angle_bracket_right, + .angle_bracket_angle_bracket_left_pipe, + }; + try a.pegTokenWhitespaceAround(tags[a.smith.index(tags.len)]); +} + +/// AdditionOp +/// <- PLUS +/// / MINUS +/// / PLUS2 +/// / PLUSPERCENT +/// / MINUSPERCENT +/// / PLUSPIPE +/// / MINUSPIPE +fn pegAdditionOp(a: *AstSmith) SourceError!void { + const tags = [_]Token.Tag{ + .plus, + .minus, + .plus_plus, + .plus_percent, + .minus_percent, + .plus_pipe, + .minus_pipe, + }; + try a.pegTokenWhitespaceAround(tags[a.smith.index(tags.len)]); +} + +/// MultiplyOp +/// <- PIPE2 +/// / ASTERISK +/// / SLASH +/// / PERCENT +/// / ASTERISK2 +/// / ASTERISKPERCENT +/// / ASTERISKPIPE +fn pegMultiplyOp(a: *AstSmith) SourceError!void { + const tags = [_]Token.Tag{ + .asterisk, + .asterisk_asterisk, + .pipe_pipe, + .slash, + .percent, + .asterisk_percent, + .asterisk_pipe, + }; + const start = @as(u8, 2) * @intFromBool(a.not_token == .asterisk); + try a.pegTokenWhitespaceAround(tags[a.smith.valueRangeLessThan(u8, start, tags.len)]); +} + +/// PrefixOp +/// <- EXCLAMATIONMARK +/// / MINUS +/// / TILDE +/// / MINUSPERCENT +/// / AMPERSAND +/// / KEYWORD_try +fn pegPrefixOp(a: *AstSmith) SourceError!void { + const tags = [_]Token.Tag{ + .bang, + .minus, + .tilde, + .minus_percent, + .ampersand, + .keyword_try, + }; + try a.pegToken(tags[a.smith.index(tags.len)]); +} + +/// PrefixTypeOp +/// <- QUESTIONMARK +/// / KEYWORD_anyframe MINUSRARROW +/// / (ManyPtrTypeStart / SliceTypeStart) KEYWORD_allowzero? ByteAlign? AddrSpace? +/// KEYWORD_const? KEYWORD_volatile? +/// / SinglePtrTypeStart KEYWORD_allowzero? BitAlign? AddrSpace? +/// KEYWORD_const? KEYWORD_volatile? +/// / ArrayTypeStart +fn pegPrefixTypeOp(a: *AstSmith) SourceError!void { + switch (a.smith.value(enum { + optional, + anyframe_arrow, + array, + single_pointer, + many_pointer, + slice, + })) { + .optional => try a.pegToken(.question_mark), + .anyframe_arrow => { + try a.pegToken(.keyword_anyframe); + try a.pegToken(.arrow); + }, + .array => try a.pegArrayTypeStart(), + .single_pointer, .many_pointer, .slice => |kind| { + const is_single = kind == .single_pointer and a.not_token != .asterisk; + if (is_single) { + try a.pegSinglePtrTypeStart(); + } else if (kind == .many_pointer) { + try a.pegManyPtrTypeStart(); + } else { + try a.pegSliceTypeStart(); + } + + if (a.smith.value(bool)) { + try a.pegToken(.keyword_allowzero); + } + if (a.smith.value(bool)) { + if (is_single) { + try a.pegBitAlign(); + } else { + try a.pegByteAlign(); + } + } + if (a.smith.value(bool)) { + try a.pegAddrSpace(); + } + if (a.smith.value(bool)) { + try a.pegToken(.keyword_const); + } + if (a.smith.value(bool)) { + try a.pegToken(.keyword_volatile); + } + }, + } +} + +/// SuffixOp +/// <- LBRACKET Expr (DOT2 (Expr? (COLON Expr)?)?)? RBRACKET +/// / DOT IDENTIFIER +/// / DOTASTERISK +/// / DOTQUESTIONMARK +fn pegSuffixOp(a: *AstSmith) SourceError!void { + switch (a.smith.value(enum { slice, field, deref, unwrap })) { + .slice => { + try a.pegToken(.l_bracket); + try a.pegExpr(); + + const components = a.smith.value(u2); + if (components >= 1) try a.pegToken(.ellipsis2); + if (components >= 2) try a.pegExpr(); + if (components >= 3) { + try a.pegToken(.colon); + try a.pegExpr(); + } + + try a.pegToken(.r_bracket); + }, + .field => { + try a.pegToken(.period); + try a.pegIdentifier(); + }, + .deref => try a.pegToken(.period_asterisk), + .unwrap => { + try a.pegToken(.period); + try a.pegToken(.question_mark); + }, + } +} + +/// FnCallArguments <- LPAREN ExprList RPAREN +fn pegFnCallArguments(a: *AstSmith) SourceError!void { + try a.pegToken(.l_paren); + try a.pegExprList(); + try a.pegToken(.r_paren); +} + +/// SliceTypeStart <- LBRACKET (COLON Expr)? RBRACKET +fn pegSliceTypeStart(a: *AstSmith) SourceError!void { + try a.pegToken(.l_bracket); + if (a.smith.value(bool)) { + try a.pegToken(.colon); + try a.pegExpr(); + } + try a.pegToken(.r_bracket); +} + +/// SinglePtrTypeStart <- ASTERISK / ASTERISK2 +fn pegSinglePtrTypeStart(a: *AstSmith) SourceError!void { + try a.pegToken(if (!a.smith.value(bool)) .asterisk else .asterisk_asterisk); +} + +/// ManyPtrTypeStart <- LBRACKET ASTERISK (LETTERC / COLON Expr)? RBRACKET +fn pegManyPtrTypeStart(a: *AstSmith) SourceError!void { + try a.pegToken(.l_bracket); + try a.pegToken(.asterisk); + switch (a.smith.value(enum { many, many_c, many_sentinel })) { + .many => {}, + .many_c => { + // No need for `preservePegEndOfWord` because the previous token is an asterisk + try a.addTokenTag(.identifier); + try a.addSourceByte('c'); + }, + .many_sentinel => { + try a.pegToken(.colon); + try a.pegExpr(); + }, + } + try a.pegToken(.r_bracket); +} + +/// ArrayTypeStart <- LBRACKET !(ASTERISK / ASTERISK2) Expr (COLON Expr)? RBRACKET +fn pegArrayTypeStart(a: *AstSmith) SourceError!void { + try a.pegToken(.l_bracket); + a.not_token = .asterisk; + try a.pegExpr(); + if (a.smith.value(bool)) { + try a.pegToken(.colon); + try a.pegExpr(); + } + try a.pegToken(.r_bracket); +} + +/// ContainerDeclAuto <- ContainerDeclType LBRACE ContainerMembers RBRACE +fn pegContainerDeclAuto(a: *AstSmith) SourceError!void { + try a.pegContainerDeclType(); + try a.pegToken(.l_brace); + try a.pegContainerMembers(); + try a.pegToken(.r_brace); +} + +/// ContainerDeclType +/// <- KEYWORD_struct (LPAREN Expr RPAREN)? +/// / KEYWORD_opaque +/// / KEYWORD_enum (LPAREN Expr RPAREN)? +/// / KEYWORD_union (LPAREN (KEYWORD_enum (LPAREN Expr RPAREN)? / !KEYWORD_enum Expr) RPAREN)? +fn pegContainerDeclType(a: *AstSmith) SourceError!void { + switch (a.smith.value(enum { @"struct", @"opaque", @"enum", @"union" })) { + .@"struct", .@"enum" => |c| { + const is_struct = c == .@"struct" or a.not_token == .keyword_enum; + try a.pegToken(if (is_struct) .keyword_struct else .keyword_enum); + if (a.smith.value(bool)) { + try a.pegToken(.l_paren); + try a.pegExpr(); + try a.pegToken(.r_paren); + } + }, + .@"opaque" => try a.pegToken(.keyword_opaque), + .@"union" => { + try a.pegToken(.keyword_union); + switch (a.smith.value(enum { no_tag, expr_tag, enum_tag, enum_expr_tag })) { + .no_tag => {}, + .expr_tag => { + try a.pegToken(.l_paren); + a.not_token = .keyword_enum; + try a.pegExpr(); + try a.pegToken(.r_paren); + }, + .enum_tag => { + try a.pegToken(.l_paren); + try a.pegToken(.keyword_enum); + try a.pegToken(.r_paren); + }, + .enum_expr_tag => { + try a.pegToken(.l_paren); + try a.pegToken(.keyword_enum); + try a.pegToken(.l_paren); + try a.pegExpr(); + try a.pegToken(.r_paren); + try a.pegToken(.r_paren); + }, + } + }, + } +} + +/// ByteAlign <- KEYWORD_align LPAREN Expr RPAREN +fn pegByteAlign(a: *AstSmith) SourceError!void { + try a.pegToken(.keyword_align); + try a.pegToken(.l_paren); + try a.pegExpr(); + try a.pegToken(.r_paren); +} + +/// BitAlign <- KEYWORD_align LPAREN Expr (COLON Expr COLON Expr)? RPAREN +fn pegBitAlign(a: *AstSmith) SourceError!void { + try a.pegToken(.keyword_align); + try a.pegToken(.l_paren); + try a.pegExpr(); + if (a.smith.value(bool)) { + try a.pegToken(.colon); + try a.pegExpr(); + try a.pegToken(.colon); + try a.pegExpr(); + } + try a.pegToken(.r_paren); +} + +/// IdentifierList <- (doc_comment? IDENTIFIER COMMA)* (doc_comment? IDENTIFIER)? +fn pegIdentifierList(a: *AstSmith) SourceError!void { + while (!a.smith.eos()) { + try a.pegMaybeDocComment(); + try a.pegIdentifier(); + try a.pegToken(.comma); + } + if (a.smith.value(bool)) { + try a.pegMaybeDocComment(); + try a.pegIdentifier(); + } +} + +/// SwitchProngList <- (SwitchProng COMMA)* SwitchProng? +fn pegSwitchProngList(a: *AstSmith) SourceError!void { + while (!a.smithListItemEos()) { + try a.pegSwitchProng(); + try a.pegToken(.comma); + } + if (a.smithListItemBool()) { + try a.pegSwitchProng(); + } +} + +/// AsmOutputList <- (AsmOutputItem COMMA)* AsmOutputItem? +fn pegAsmOutputList(a: *AstSmith) SourceError!void { + while (!a.smithListItemEos()) { + try a.pegAsmOutputItem(); + try a.pegToken(.comma); + } + if (a.smithListItemBool()) { + try a.pegAsmOutputItem(); + } +} + +/// AsmInputList <- (AsmInputItem COMMA)* AsmInputItem? +fn pegAsmInputList(a: *AstSmith) SourceError!void { + while (!a.smithListItemEos()) { + try a.pegAsmInputItem(); + try a.pegToken(.comma); + } + if (a.smithListItemBool()) { + try a.pegAsmInputItem(); + } +} + +/// ParamDeclList <- (ParamDecl COMMA)* (ParamDecl / DOT3 COMMA?)? +fn pegParamDeclList(a: *AstSmith) SourceError!void { + while (!a.smithListItemEos()) { + try a.pegParamDecl(); + try a.pegToken(.comma); + } + const Final = enum { none, dot3, dot3_comma, param }; + switch (a.smith.valueWeighted(Final, &.{ + .rangeLessThan(Final, .none, .param, 2), + .value(Final, .param, 1), + })) { + .none => {}, + .dot3 => try a.pegToken(.ellipsis3), + .dot3_comma => { + try a.pegToken(.ellipsis3); + try a.pegToken(.comma); + }, + .param => try a.pegParamDecl(), + } +} + +/// ExprList <- (Expr COMMA)* Expr? +fn pegExprList(a: *AstSmith) SourceError!void { + while (!a.smithListItemEos()) { + try a.pegExpr(); + try a.pegToken(.comma); + } + if (a.smithListItemBool()) { + try a.pegExpr(); + } +} + +/// container_doc_comment <- ('//!' non_control_utf8* [ \n]* skip)+ +fn pegContainerDocComment(a: *AstSmith) SourceError!void { + while (true) { + try a.addTokenTag(.container_doc_comment); + try a.pegGenericLine("//!", .any); + try a.pegSkip(); + if (a.smith.eos()) break; + } +} + +/// doc_comment? +fn pegMaybeDocComment(a: *AstSmith) SourceError!void { + // A specific hash is provided here since this function is likely to be inlined, + // however having all doc comments with the same uid is beneficial. + if (a.smith.boolWeightedWithHash(63, 1, 0x39b94392)) { + try a.pegDocComment(); + } +} + +/// doc_comment <- ('///' non_control_utf8* [ \n]* skip)+ +fn pegDocComment(a: *AstSmith) SourceError!void { + if (a.source_len > 0 and a.source_buf[a.source_len - 1] != '\n') { + try a.addSourceByte('\n'); + } + while (true) { + try a.addTokenTag(.doc_comment); + try a.pegGenericLine("///", .doc_comment); + try a.pegSkip(); + if (a.smith.eosWeightedSimple(1, 3)) break; + } +} + +/// line_comment <- '//' ![!/] non_control_utf8* / '////' non_control_utf8* +fn pegLineComment(a: *AstSmith) SourceError!void { + return a.pegGenericLine("//", .line_comment); +} + +/// line_string <- '\\\\' non_control_utf8* [ \n]* +fn pegLineString(a: *AstSmith) SourceError!void { + try a.addTokenTag(.multiline_string_literal_line); + return a.pegGenericLine("\\\\", .any); +} + +/// non_control_utf8 <- [\040-\377] +/// +/// Used for line, doc, and container comments as well as +/// multiline string literal lines. +fn pegGenericLine( + a: *AstSmith, + prefix: []const u8, + /// Adds constraints to what the line contains + prefix_kind: enum { any, line_comment, doc_comment }, +) SourceError!void { + const cr = a.smith.value(bool); + const newline_len = @intFromBool(cr) + @as(usize, 1); + + try a.ensureSourceCapacity(prefix.len + newline_len); + a.addSourceAssumeCapacity(prefix); + + const line = a.variableChar(newline_len, 0, &.{ + .rangeAtMost(u8, ' ', 0x7f - 1, 1), + .rangeAtMost(u8, 0x7f + 1, 0xff, 1), + }); + if (line.len >= 1) switch (prefix_kind) { + .any => {}, + .line_comment => { + // Convert doc comments to quadruple slashes when possible; + // Otherwise, and for container doc comments, erase the '/' or '!' + if (line[0] == '/' and line.len >= 2) { + line[1] = '/'; + } else if (line[0] == '/' or line[0] == '!') { + line[0] = ' '; + } + }, + .doc_comment => { + // Avoid quadruple slashes + if (line[0] == '/') { + line[0] = ' '; + } + }, + }; + + if (cr) a.addSourceByteAssumeCapacity('\r'); + a.addSourceByteAssumeCapacity('\n'); +} + +/// skip <- ([ \n] / line_comment)* +fn pegSkip(a: *AstSmith) SourceError!void { + if (a.smith.boolWeighted(63, 1)) { + while (true) { + const Kind = enum { + space, + line_break, + cr_line_break, + line_comment, + line_comment_zig_fmt_off, + line_comment_zig_fmt_on, + }; + + const weights = Smith.baselineWeights(Kind) ++ + [_]Weight{.value(Kind, .space, 11)}; + switch (a.smith.valueWeighted(Kind, weights)) { + .space => try a.addSourceByte(' '), + .line_break => try a.addSourceByte('\n'), + .cr_line_break => try a.addSource("\r\n"), + .line_comment => try a.pegLineComment(), + .line_comment_zig_fmt_off => try a.addSource("//zig fmt: off\n"), + .line_comment_zig_fmt_on => try a.addSource("//zig fmt: on\n"), + } + + if (a.smith.eos()) break; + } + } +} + +const bin_weights: []const Weight = &.{.rangeAtMost(u8, '0', '1', 1)}; +const oct_weights: []const Weight = &.{.rangeAtMost(u8, '0', '7', 1)}; +const dec_weights: []const Weight = &.{.rangeAtMost(u8, '0', '9', 1)}; +const hex_weights: []const Weight = &.{ + .rangeAtMost(u8, '0', '9', 1), + .rangeAtMost(u8, 'a', 'f', 1), + .rangeAtMost(u8, 'A', 'F', 1), +}; + +/// Asserts enough capacity for at `min + reserved_capacity` +fn variableChar( + a: *AstSmith, + reserved_capacity: usize, + min: usize, + weights: []const Weight, +) []u8 { + const capacity = a.sourceCapacity(); + const max_out = capacity.len - reserved_capacity; + + const len_weights: [3]Weight = .{ + .rangeAtMost(u32, @intCast(min), @min(2, max_out), 32678), + // For the below `.rangeAtMost` is not used because max may be less than min. + // In this case, the weights are omitted. + .{ .min = 3, .max = @min(16, max_out), .weight = 512 }, + // Still allow much longer sequences to test parsing overflows + .{ .min = 17, .max = @min(256, max_out), .weight = 1 }, + }; + const n_weights = @as(usize, 1) + @intFromBool(max_out >= 3) + @intFromBool(max_out >= 17); + + const len = a.smith.sliceWeighted(capacity, len_weights[0..n_weights], weights); + a.source_len += len; + return capacity[0..len]; +} + +/// char_escape +/// <- "\\x" hex hex +/// / "\\u{" hex+ "}" +/// / "\\" [nr\\t'"] +/// char_char +/// <- multibyte_utf8 +/// / char_escape +/// / ![\\'\n] non_control_ascii +/// +/// string_char +/// <- multibyte_utf8 +/// / char_escape +/// / ![\\"\n] non_control_ascii +fn pegChar(a: *AstSmith, quote: u8) SourceError!void { + const Char = enum(u8) { + ascii, + unicode_2, + unicode_3, + unicode_4, + hex_escape, + unicode_escape, + char_escape, + }; + const weights = Smith.baselineWeights(Char) ++ &[_]Weight{.value(Char, .ascii, 32)}; + switch (a.smith.valueWeighted(Char, weights)) { + .ascii => try a.addSourceByte(a.smith.valueWeighted(u8, &.{ + .rangeAtMost(u8, ' ', quote - 1, 1), + .rangeAtMost(u8, quote + 1, '\\' - 1, 1), + .rangeAtMost(u8, '\\' + 1, 0x7e, 1), + })), + .unicode_2 => assert(2 == std.unicode.wtf8Encode( + a.smith.valueRangeLessThan(u21, 0x80, 0x800), + try a.addSourceAsSlice(2), + ) catch unreachable), + .unicode_3 => assert(3 == std.unicode.wtf8Encode( + a.smith.valueRangeLessThan(u21, 0x800, 0x10000), + try a.addSourceAsSlice(3), + ) catch unreachable), + .unicode_4 => assert(4 == std.unicode.wtf8Encode( + a.smith.valueRangeLessThan(u21, 0x10000, 0x110000), + try a.addSourceAsSlice(4), + ) catch unreachable), + .hex_escape => { + try a.ensureSourceCapacity(4); + a.addSourceAssumeCapacity("\\x"); + a.smith.bytesWeighted(a.addSourceAsSliceAssumeCapacity(2), hex_weights); + }, + .unicode_escape => { + try a.ensureSourceCapacity(5); + a.addSourceAssumeCapacity("\\u{"); + _ = a.variableChar(1, 1, hex_weights); + a.addSourceByteAssumeCapacity('}'); + }, + .char_escape => { + try a.ensureSourceCapacity(2); + a.addSourceByteAssumeCapacity('\\'); + a.addSourceByteAssumeCapacity(a.smith.valueWeighted(u8, &.{ + .value(u8, 'n', 1), + .value(u8, 'r', 1), + .value(u8, 't', 1), + .value(u8, '\\', 1), + .value(u8, '\'', 1), + .value(u8, '"', 1), + })); + }, + } +} + +/// CHAR_LITERAL <- ['] char_char ['] skip +fn pegCharLiteral(a: *AstSmith) SourceError!void { + try a.addTokenTag(.char_literal); + try a.addSourceByte('\''); + try a.pegChar('\''); + try a.addSourceByte('\''); + try a.pegSkip(); +} + +///FLOAT +/// <- '0x' hex_int '.' hex_int ([pP] [-+]? dec_int)? skip +/// / dec_int '.' dec_int ([eE] [-+]? dec_int)? skip +/// / '0x' hex_int [pP] [-+]? dec_int skip +/// / dec_int [eE] [-+]? dec_int skip +fn pegFloat(a: *AstSmith) SourceError!void { + try a.preservePegEndOfWord(); + try a.addTokenTag(.number_literal); + + const hex = a.smith.value(bool); + const exp = a.smith.value(packed struct(u3) { + kind: enum(u2) { none, no_sign, minus, plus }, + upper: bool, + }); + const dot = exp.kind == .none or a.smith.value(bool); + + var reserved: usize = @intFromBool(hex) * "0x".len + "0".len + @intFromBool(dot) * ".0".len + + switch (exp.kind) { + .none => 0, + .no_sign => "e0".len, + .minus => "e-0".len, + .plus => "e+0".len, + }; + try a.ensureSourceCapacity(reserved); + + if (hex) { + reserved -= 2; + a.addSourceAssumeCapacity("0x"); + } + const digits = if (hex) hex_weights else dec_weights; + + reserved -= 1; + _ = a.variableChar(reserved, 1, digits); + + if (dot) { + reserved -= 2; + a.addSourceByteAssumeCapacity('.'); + _ = a.variableChar(reserved, 1, digits); + } + + if (exp.kind != .none) { + reserved -= 1; + const case_diff = @as(u8, 'a' - 'A') * @intFromBool(exp.upper); + a.addSourceByteAssumeCapacity(@as(u8, if (hex) 'p' else 'e') - case_diff); + + if (exp.kind != .no_sign) { + reserved -= 1; + a.addSourceByteAssumeCapacity(if (exp.kind == .plus) '+' else '-'); + } + + reserved -= 1; + assert(reserved == 0); + _ = a.variableChar(reserved, 1, dec_weights); + } +} + +///INTEGER +/// <- '0b' bin_int skip +/// / '0o' oct_int skip +/// / '0x' hex_int skip +/// / dec_int skip +fn pegInteger(a: *AstSmith) SourceError!void { + try a.preservePegEndOfWord(); + try a.addTokenTag(.number_literal); + const Base = enum { bin, dec, oct, hex }; + const base_weights: []const Weight = Smith.baselineWeights(Base) ++ + &[_]Weight{ .value(Base, .dec, 6), .value(Base, .hex, 2) }; + const digits, const prefix = switch (a.smith.valueWeighted(Base, base_weights)) { + .bin => .{ bin_weights, "0b" }, + .oct => .{ oct_weights, "0o" }, + .dec => .{ dec_weights, "" }, + .hex => .{ hex_weights, "0x" }, + }; + try a.ensureSourceCapacity(prefix.len + 1); + if (prefix.len != 0) a.addSourceAssumeCapacity(prefix); + _ = a.variableChar(0, 1, digits); +} + +/// Does not include 'skip'. Does not add any token tag. +fn stringLiteralSingleInner(a: *AstSmith) SourceError!void { + try a.addSourceByte('"'); + while (!a.smith.eosWeightedSimple(3, 1)) { + try a.pegChar('"'); + } + try a.addSourceByte('"'); +} + +/// STRINGLITERALSINGLE <- ["] string_char* ["] skip +fn pegStringLiteralSingle(a: *AstSmith) SourceError!void { + try a.addTokenTag(.string_literal); + try a.stringLiteralSingleInner(); + try a.pegSkip(); +} + +/// STRINGLITERAL +/// <- STRINGLITERALSINGLE +/// / (line_string skip)+ +fn pegStringLiteral(a: *AstSmith) SourceError!void { + if (a.smith.value(bool)) { + try a.pegStringLiteralSingle(); + } else { + while (true) { + try a.pegLineString(); + try a.pegSkip(); + if (a.smith.eos()) break; + } + } +} + +const alphanumeric_weights: [4]Weight = .{ + .rangeAtMost(u8, '0', '9', 1), + .rangeAtMost(u8, 'A', 'Z', 1), + .rangeAtMost(u8, 'a', 'z', 1), + .value(u8, '_', 1), +}; + +/// IDENTIFIER +/// <- !keyword [A-Za-z_] [A-Za-z0-9_]* skip +/// / '@' STRINGLITERALSINGLE +fn pegIdentifier(a: *AstSmith) SourceError!void { + const Kind = enum(u2) { underscore, regular_identifier, quoted_identifier, copy_identifier }; + const kind_weights: [4]Weight = .{ + .value(Kind, .underscore, 6), + .value(Kind, .regular_identifier, 3), + .value(Kind, .quoted_identifier, 1), + .value(Kind, .copy_identifier, 6), + }; + const n_weights = @as(usize, kind_weights.len) - @intFromBool(a.prev_ids_len == 0); + const kind = a.smith.valueWeighted(Kind, kind_weights[0..n_weights]); + + switch (kind) { + .underscore => { + try a.preservePegEndOfWord(); + try a.addTokenTag(.identifier); + try a.addSourceByte('_'); + }, + .regular_identifier => { + try a.preservePegEndOfWord(); + try a.addTokenTag(.identifier); + + const start = a.source_len; + try a.addSourceByte(a.smith.valueWeighted(u8, alphanumeric_weights[1..])); + _ = a.variableChar(0, 0, &alphanumeric_weights); + + if (Token.getKeyword(a.source_buf[start..a.source_len]) != null) { + a.source_buf[start] = '_'; // No keywords start with '_' + } + }, + .quoted_identifier => { + try a.addTokenTag(.identifier); + try a.addSourceByte('@'); + try a.stringLiteralSingleInner(); + }, + .copy_identifier => { + const n_prev = @min(a.prev_ids_len, a.prev_ids_buf.len); + const prev_i = a.smith.valueRangeLessThan(u16, 0, n_prev); + const prev = a.prev_ids_buf[prev_i]; + + if (a.source_buf[prev.start] != '@') try a.preservePegEndOfWord(); + try a.addTokenTag(.identifier); + try a.addSource(a.source_buf[prev.start..][0..prev.len]); + }, + } + try a.pegSkip(); + if (kind != .copy_identifier) { + const start = a.token_start_buf[a.tokens_len - 1]; + a.prev_ids_buf[a.prev_ids_len % a.prev_ids_buf.len] = .{ + .start = @intCast(start), + .len = @intCast(a.source_len - start), + }; + a.prev_ids_len += 1; + } +} + +/// BUILTINIDENTIFIER <- '@'[A-Za-z_][A-Za-z0-9_]* skip +fn pegBuiltinIdentifier(a: *AstSmith) SourceError!void { + try a.addTokenTag(.builtin); + if (a.smith.boolWeighted(1, 31)) { + if (a.smith.boolWeighted(1, 8)) { + // Pointer cast (reordable with zig fmt) + const ids = [_][]const u8{ + "@ptrCast", + "@addrspaceCast", + "@alignCast", + "@constCast", + "@volatileCast", + }; + try a.addSource(ids[a.smith.index(ids.len)]); + } else { + const ids = std.zig.BuiltinFn.list.keys(); + try a.addSource(ids[a.smith.index(ids.len)]); + } + } else { + try a.ensureSourceCapacity(2); + a.addSourceByteAssumeCapacity('@'); + a.addSourceByteAssumeCapacity(a.smith.valueWeighted(u8, alphanumeric_weights[1..])); + _ = a.variableChar(0, 0, &alphanumeric_weights); + } + try a.pegSkip(); +} + +test AstSmith { + try std.testing.fuzz({}, checkGenerated, .{}); +} + +fn checkGenerated(_: void, smith: *Smith) !void { + var a: AstSmith = .init(smith); + try a.generateSource(); + + { // Check tokenization matches source + errdefer a.logBadSource(null); + + const token_tags = a.token_tag_buf[0..a.tokens_len]; + const token_starts = a.token_start_buf[0..a.tokens_len]; + try std.testing.expectEqual(Token.Tag.eof, token_tags[token_tags.len - 1]); + + var tokenizer: std.zig.Tokenizer = .init(a.source()); + for (token_tags, token_starts) |tag, start| { + const tok = tokenizer.next(); + try std.testing.expectEqual(tok.tag, tag); + try std.testing.expectEqual(tok.loc.start, start); + if (tag == .invalid) return error.InvalidToken; + } + } + + var fba_buf: [1 << 18]u8 = undefined; + var fba: std.heap.FixedBufferAllocator = .init(&fba_buf); + const ast = std.zig.Ast.parseTokens(fba.allocator(), a.source(), a.tokens(), .zig) catch + return error.SkipZigTest; + + errdefer a.logBadSource(ast); + try std.testing.expectEqual(0, ast.errors.len); +} + +fn logBadSource(a: *AstSmith, ast: ?std.zig.Ast) void { + var buf: [256]u8 = undefined; + const ls = std.debug.lockStderr(&buf); + defer std.debug.unlockStderr(); + a.logBadSourceInner(ls.terminal(), ast) catch {}; +} + +fn logBadSourceInner(a: *AstSmith, t: std.Io.Terminal, ast: ?std.zig.Ast) std.Io.Writer.Error!void { + try a.logSourceInner(t); + const w = t.writer; + + if (ast) |bad_ast| { + try w.writeAll("=== Parse Errors ===\n"); + for (bad_ast.errors) |err| { + const loc = bad_ast.tokenLocation(0, err.token); + try w.print("{}:{}: ", .{ loc.line + 1, loc.column + 1 }); + try bad_ast.renderError(err, w); + try w.writeByte('\n'); + } + } else { + t.setColor(.dim) catch {}; + try w.writeAll("=== Tokens ===\n"); + t.setColor(.reset) catch {}; + for ( + 0.., + a.token_tag_buf[0..a.tokens_len], + a.token_start_buf[0..a.tokens_len], + ) |i, tag, start| { + try w.print("#{} @{}: {t}\n", .{ i, start, tag }); + } + + t.setColor(.dim) catch {}; + try w.writeAll("\n=== Expected Tokens ===\n"); + t.setColor(.reset) catch {}; + + var tokenizer: std.zig.Tokenizer = .init(a.source()); + var i: usize = 0; + while (true) { + const tok = tokenizer.next(); + try w.print("#{} @{}-{}: {t}\n", .{ i, tok.loc.start, tok.loc.end, tok.tag }); + i += 1; + if (tok.tag == .invalid or tok.tag == .eof) break; + } + } +} + +pub fn logSource(a: *AstSmith) void { + var buf: [256]u8 = undefined; + const ls = std.debug.lockStderr(&buf); + defer std.debug.unlockStderr(); + a.logSourceInner(ls.terminal()) catch {}; +} + +fn logSourceInner(a: *AstSmith, t: std.Io.Terminal) std.Io.Writer.Error!void { + const w = t.writer; + + t.setColor(.dim) catch {}; + try w.writeAll("=== Source ===\n"); + t.setColor(.reset) catch {}; + + var line: usize = 1; + try w.print("{: >5} ", .{line}); + for (a.source()) |c| switch (c) { + ' '...0x7e => try w.writeByte(c), + '\n' => { + line += 1; + try w.print("\n{: >5} ", .{line}); + }, + '\r' => { + t.setColor(.cyan) catch {}; + try w.writeAll("\\r"); + t.setColor(.reset) catch {}; + }, + '\t' => { + t.setColor(.cyan) catch {}; + try w.writeAll("\\t"); + t.setColor(.reset) catch {}; + }, + else => { + t.setColor(.cyan) catch {}; + try w.print("\\x{x:0>2}", .{c}); + t.setColor(.reset) catch {}; + }, + }; + try w.writeByte('\n'); +} diff --git a/lib/std/zig/Parse.zig b/lib/std/zig/Parse.zig index 5dcc183a21..541a74c3ea 100644 --- a/lib/std/zig/Parse.zig +++ b/lib/std/zig/Parse.zig @@ -257,7 +257,7 @@ fn parseContainerMembers(p: *Parse) Allocator.Error!Members { while (true) { const doc_comment = try p.eatDocComments(); - switch (p.tokenTag(p.tok_i)) { + sw: switch (p.tokenTag(p.tok_i)) { .keyword_test => { if (doc_comment) |some| { try p.warnMsg(.{ .tag = .test_doc_comment, .token = some }); @@ -348,17 +348,7 @@ fn parseContainerMembers(p: *Parse) Allocator.Error!Members { p.findNextContainerMember(); }, }, - .keyword_pub => { - p.tok_i += 1; - const opt_top_level_decl = try p.expectTopLevelDeclRecoverable(); - if (opt_top_level_decl) |top_level_decl| { - if (field_state == .seen) { - field_state = .{ .end = top_level_decl }; - } - try p.scratch.append(p.gpa, top_level_decl); - } - trailing = p.tokenTag(p.tok_i - 1) == .semicolon; - }, + .keyword_pub, .keyword_const, .keyword_var, .keyword_threadlocal, @@ -367,7 +357,27 @@ fn parseContainerMembers(p: *Parse) Allocator.Error!Members { .keyword_inline, .keyword_noinline, .keyword_fn, - => { + => |t| { + if (t == .keyword_extern) { + switch (p.tokenTag(p.tok_i + 1)) { + .keyword_struct, + .keyword_union, + .keyword_enum, + .keyword_opaque, + => |ct| continue :sw ct, + else => {}, + } + } + if (t == .keyword_inline) { + switch (p.tokenTag(p.tok_i + 1)) { + .keyword_for, + .keyword_while, + => |ct| continue :sw ct, + else => {}, + } + } + + p.tok_i += @intFromBool(t == .keyword_pub); const opt_top_level_decl = try p.expectTopLevelDeclRecoverable(); if (opt_top_level_decl) |top_level_decl| { if (field_state == .seen) { @@ -588,7 +598,8 @@ fn expectTestDeclRecoverable(p: *Parse) error{OutOfMemory}!?Node.Index { } /// Decl -/// <- (KEYWORD_export / KEYWORD_extern STRINGLITERALSINGLE? / KEYWORD_inline / KEYWORD_noinline)? FnProto (SEMICOLON / Block) +/// <- (KEYWORD_export / KEYWORD_inline / KEYWORD_noinline)? FnProto (SEMICOLON / Block) +/// / KEYWORD_extern STRINGLITERALSINGLE? FnProto SEMICOLON /// / (KEYWORD_export / KEYWORD_extern STRINGLITERALSINGLE?)? KEYWORD_threadlocal? VarDecl fn expectTopLevelDecl(p: *Parse) !?Node.Index { const extern_export_inline_token = p.nextToken(); @@ -665,7 +676,7 @@ fn expectTopLevelDeclRecoverable(p: *Parse) error{OutOfMemory}!?Node.Index { }; } -/// FnProto <- KEYWORD_fn IDENTIFIER? LPAREN ParamDeclList RPAREN ByteAlign? AddrSpace? LinkSection? CallConv? EXCLAMATIONMARK? TypeExpr +/// FnProto <- KEYWORD_fn IDENTIFIER? LPAREN ParamDeclList RPAREN ByteAlign? AddrSpace? LinkSection? CallConv? EXCLAMATIONMARK? TypeExpr !ExprSuffix fn parseFnProto(p: *Parse) !?Node.Index { const fn_token = p.eatToken(.keyword_fn) orelse return null; @@ -853,7 +864,7 @@ fn parseGlobalVarDecl(p: *Parse) !?Node.Index { return var_decl; } -/// ContainerField <- doc_comment? KEYWORD_comptime? !KEYWORD_fn (IDENTIFIER COLON)? TypeExpr ByteAlign? (EQUAL Expr)? +/// ContainerField <- doc_comment? (KEYWORD_comptime / !KEYWORD_comptime) !KEYWORD_fn (IDENTIFIER COLON / !(IDENTIFIER COLON))? TypeExpr ByteAlign? (EQUAL Expr)? fn expectContainerField(p: *Parse) !Node.Index { _ = p.eatToken(.keyword_comptime); const main_token = p.tok_i; @@ -895,16 +906,23 @@ fn expectContainerField(p: *Parse) !Node.Index { } } -/// Statement -/// <- KEYWORD_comptime ComptimeStatement -/// / KEYWORD_nosuspend BlockExprStatement -/// / KEYWORD_suspend BlockExprStatement +/// BlockStatement +/// <- Statement /// / KEYWORD_defer BlockExprStatement /// / KEYWORD_errdefer Payload? BlockExprStatement -/// / IfStatement +/// / !ExprStatement (KEYWORD_comptime !BlockExpr)? VarAssignStatement +/// +/// Statement +/// <- ExprStatement +/// / KEYWORD_suspend BlockExprStatement +/// / !ExprStatement (KEYWORD_comptime !BlockExpr)? AssignExpr SEMICOLON +/// +/// ExprStatement +/// <- IfStatement /// / LabeledStatement -/// / VarDeclExprStatement -fn expectStatement(p: *Parse, allow_defer_var: bool) Error!Node.Index { +/// / KEYWORD_nosuspend BlockExprStatement +/// / KEYWORD_comptime BlockExpr +fn expectStatement(p: *Parse, is_block_level: bool) Error!Node.Index { if (p.eatToken(.keyword_comptime)) |comptime_token| { const opt_block_expr = try p.parseBlockExpr(); if (opt_block_expr) |block_expr| { @@ -915,7 +933,7 @@ fn expectStatement(p: *Parse, allow_defer_var: bool) Error!Node.Index { }); } - if (allow_defer_var) { + if (is_block_level) { return p.expectVarDeclExprStatement(comptime_token); } else { const assign = try p.expectAssignExpr(); @@ -949,12 +967,12 @@ fn expectStatement(p: *Parse, allow_defer_var: bool) Error!Node.Index { .data = .{ .node = block_expr }, }); }, - .keyword_defer => if (allow_defer_var) return p.addNode(.{ + .keyword_defer => if (is_block_level) return p.addNode(.{ .tag = .@"defer", .main_token = p.nextToken(), .data = .{ .node = try p.expectBlockExprStatement() }, }), - .keyword_errdefer => if (allow_defer_var) return p.addNode(.{ + .keyword_errdefer => if (is_block_level) return p.addNode(.{ .tag = .@"errdefer", .main_token = p.nextToken(), .data = .{ .opt_token_and_node = .{ @@ -979,7 +997,7 @@ fn expectStatement(p: *Parse, allow_defer_var: bool) Error!Node.Index { if (try p.parseLabeledStatement()) |labeled_statement| return labeled_statement; - if (allow_defer_var) { + if (is_block_level) { return p.expectVarDeclExprStatement(null); } else { const assign = try p.expectAssignExpr(); @@ -1007,8 +1025,10 @@ fn expectComptimeStatement(p: *Parse, comptime_token: TokenIndex) !Node.Index { } /// VarDeclExprStatement -/// <- VarDeclProto (COMMA (VarDeclProto / Expr))* EQUAL Expr SEMICOLON -/// / Expr (AssignOp Expr / (COMMA (VarDeclProto / Expr))+ EQUAL Expr)? SEMICOLON +/// <- Expr +/// / VarAssignStatement +/// +/// VarAssignStatement <- (VarDeclProto / Expr) (COMMA (VarDeclProto / Expr))* EQUAL Expr SEMICOLON fn expectVarDeclExprStatement(p: *Parse, comptime_token: ?TokenIndex) !Node.Index { const scratch_top = p.scratch.items.len; defer p.scratch.shrinkRetainingCapacity(scratch_top); @@ -1140,7 +1160,7 @@ fn expectStatementRecoverable(p: *Parse) Error!?Node.Index { /// IfStatement /// <- IfPrefix BlockExpr ( KEYWORD_else Payload? Statement )? -/// / IfPrefix AssignExpr ( SEMICOLON / KEYWORD_else Payload? Statement ) +/// / IfPrefix !BlockExpr AssignExpr ( SEMICOLON / KEYWORD_else Payload? Statement ) fn expectIfStatement(p: *Parse) !Node.Index { const if_token = p.assertToken(.keyword_if); _ = try p.expectToken(.l_paren); @@ -1235,8 +1255,8 @@ fn parseLoopStatement(p: *Parse) !?Node.Index { } /// ForStatement -/// <- ForPrefix BlockExpr ( KEYWORD_else Statement )? -/// / ForPrefix AssignExpr ( SEMICOLON / KEYWORD_else Statement ) +/// <- ForPrefix BlockExpr ( KEYWORD_else Statement / !KEYWORD_else ) +/// / ForPrefix !BlockExpr AssignExpr ( SEMICOLON / KEYWORD_else Statement ) fn parseForStatement(p: *Parse) !?Node.Index { const for_token = p.eatToken(.keyword_for) orelse return null; @@ -1293,7 +1313,7 @@ fn parseForStatement(p: *Parse) !?Node.Index { /// /// WhileStatement /// <- WhilePrefix BlockExpr ( KEYWORD_else Payload? Statement )? -/// / WhilePrefix AssignExpr ( SEMICOLON / KEYWORD_else Payload? Statement ) +/// / WhilePrefix !BlockExpr AssignExpr ( SEMICOLON / KEYWORD_else Payload? Statement ) fn parseWhileStatement(p: *Parse) !?Node.Index { const while_token = p.eatToken(.keyword_while) orelse return null; _ = try p.expectToken(.l_paren); @@ -1383,7 +1403,7 @@ fn parseWhileStatement(p: *Parse) !?Node.Index { /// BlockExprStatement /// <- BlockExpr -/// / AssignExpr SEMICOLON +/// / !BlockExpr AssignExpr SEMICOLON fn parseBlockExprStatement(p: *Parse) !?Node.Index { const block_expr = try p.parseBlockExpr(); if (block_expr) |expr| return expr; @@ -1685,18 +1705,20 @@ fn expectPrefixExpr(p: *Parse) Error!Node.Index { /// PrefixTypeOp /// <- QUESTIONMARK /// / KEYWORD_anyframe MINUSRARROW -/// / SliceTypeStart (ByteAlign / AddrSpace / KEYWORD_const / KEYWORD_volatile / KEYWORD_allowzero)* +/// / (ManyPtrTypeStart / SliceTypeStart) KEYWORD_allowzero? ByteAlign? AddrSpace? KEYWORD_const? KEYWORD_volatile? +/// / SinglePtrTypeStart KEYWORD_allowzero? BitAlign? AddrSpace? KEYWORD_const? KEYWORD_volatile? /// / PtrTypeStart (AddrSpace / KEYWORD_align LPAREN Expr (COLON Expr COLON Expr)? RPAREN / KEYWORD_const / KEYWORD_volatile / KEYWORD_allowzero)* /// / ArrayTypeStart /// /// SliceTypeStart <- LBRACKET (COLON Expr)? RBRACKET /// -/// PtrTypeStart -/// <- ASTERISK -/// / ASTERISK2 -/// / LBRACKET ASTERISK (LETTERC / COLON Expr)? RBRACKET +/// SinglePtrTypeStart <- ASTERISK / ASTERISK2 /// -/// ArrayTypeStart <- LBRACKET Expr (COLON Expr)? RBRACKET +/// ManyPtrTypeStart <- LBRACKET ASTERISK (LETTERC / COLON Expr)? RBRACKET +/// +/// ArrayTypeStart <- LBRACKET Expr !(ASTERISK / ASTERISK2) (COLON Expr)? RBRACKET +/// +/// BitAlign <- KEYWORD_align LPAREN Expr (COLON Expr COLON Expr)? RPAREN fn parseTypeExpr(p: *Parse) Error!?Node.Index { switch (p.tokenTag(p.tok_i)) { .question_mark => return try p.addNode(.{ @@ -1962,12 +1984,12 @@ fn expectTypeExpr(p: *Parse) Error!Node.Index { /// PrimaryExpr /// <- AsmExpr /// / IfExpr -/// / KEYWORD_break BreakLabel? Expr? -/// / KEYWORD_comptime Expr -/// / KEYWORD_nosuspend Expr -/// / KEYWORD_continue BreakLabel? Expr? -/// / KEYWORD_resume Expr -/// / KEYWORD_return Expr? +/// / KEYWORD_break (BreakLabel / !BreakLabel) (Expr !ExprSuffix / !SinglePtrTypeStart) +/// / KEYWORD_comptime Expr !ExprSuffix +/// / KEYWORD_nosuspend Expr !ExprSuffix +/// / KEYWORD_continue (BreakLabel / !BreakLabel) (Expr !ExprSuffix / !SinglePtrTypeStart) +/// / KEYWORD_resume Expr !ExprSuffix +/// / KEYWORD_return (Expr !ExprSuffix / !SinglePtrTypeStart) /// / BlockLabel? LoopExpr /// / Block /// / CurlySuffixExpr @@ -2042,10 +2064,6 @@ fn parsePrimaryExpr(p: *Parse) !?Node.Index { p.tok_i += 2; return try p.parseWhileExpr(); }, - .l_brace => { - p.tok_i += 2; - return try p.parseBlock(); - }, else => return try p.parseCurlySuffixExpr(), } } else { @@ -2067,12 +2085,12 @@ fn parsePrimaryExpr(p: *Parse) !?Node.Index { } } -/// IfExpr <- IfPrefix Expr (KEYWORD_else Payload? Expr)? +/// IfExpr <- IfPrefix Expr (KEYWORD_else Payload? Expr)? !ExprSuffix fn parseIfExpr(p: *Parse) !?Node.Index { return try p.parseIf(expectExpr); } -/// Block <- LBRACE Statement* RBRACE +/// Block <- LBRACE BlockStatement* RBRACE fn parseBlock(p: *Parse) !?Node.Index { const lbrace = p.eatToken(.l_brace) orelse return null; const scratch_top = p.scratch.items.len; @@ -2177,7 +2195,7 @@ fn forPrefix(p: *Parse) Error!usize { /// WhilePrefix <- KEYWORD_while LPAREN Expr RPAREN PtrPayload? WhileContinueExpr? /// -/// WhileExpr <- WhilePrefix Expr (KEYWORD_else Payload? Expr)? +/// WhileExpr <- WhilePrefix Expr (KEYWORD_else Payload? Expr)? !ExprSuffi fn parseWhileExpr(p: *Parse) !?Node.Index { const while_token = p.eatToken(.keyword_while) orelse return null; _ = try p.expectToken(.l_paren); @@ -2409,10 +2427,10 @@ fn parseSuffixExpr(p: *Parse) !?Node.Index { /// / FnProto /// / GroupedExpr /// / LabeledTypeExpr -/// / IDENTIFIER +/// / IDENTIFIER !(COLON LabelableExpr) /// / IfTypeExpr /// / INTEGER -/// / KEYWORD_comptime TypeExpr +/// / KEYWORD_comptime TypeExpr !ExprSuffix /// / KEYWORD_error DOT IDENTIFIER /// / KEYWORD_anyframe /// / KEYWORD_unreachable @@ -2431,7 +2449,7 @@ fn parseSuffixExpr(p: *Parse) !?Node.Index { /// /// GroupedExpr <- LPAREN Expr RPAREN /// -/// IfTypeExpr <- IfPrefix TypeExpr (KEYWORD_else Payload? TypeExpr)? +/// IfTypeExpr <- IfPrefix TypeExpr (KEYWORD_else Payload? TypeExpr)? !ExprSuffix /// /// LabeledTypeExpr /// <- BlockLabel Block @@ -2711,7 +2729,7 @@ fn expectPrimaryTypeExpr(p: *Parse) !Node.Index { /// WhilePrefix <- KEYWORD_while LPAREN Expr RPAREN PtrPayload? WhileContinueExpr? /// -/// WhileTypeExpr <- WhilePrefix TypeExpr (KEYWORD_else Payload? TypeExpr)? +/// WhileTypeExpr <- WhilePrefix TypeExpr (KEYWORD_else Payload? TypeExpr)? !ExprSuffix fn parseWhileTypeExpr(p: *Parse) !?Node.Index { const while_token = p.eatToken(.keyword_while) orelse return null; _ = try p.expectToken(.l_paren); @@ -2876,7 +2894,7 @@ fn expectAsmExpr(p: *Parse) !Node.Index { }); } -/// AsmOutputItem <- LBRACKET IDENTIFIER RBRACKET STRINGLITERAL LPAREN (MINUSRARROW TypeExpr / IDENTIFIER) RPAREN +/// AsmOutputItem <- LBRACKET IDENTIFIER RBRACKET STRINGLITERALSINGLE LPAREN (MINUSRARROW TypeExpr / IDENTIFIER) RPAREN fn parseAsmOutputItem(p: *Parse) !?Node.Index { _ = p.eatToken(.l_bracket) orelse return null; const identifier = try p.expectToken(.identifier); @@ -2902,7 +2920,7 @@ fn parseAsmOutputItem(p: *Parse) !?Node.Index { }); } -/// AsmInputItem <- LBRACKET IDENTIFIER RBRACKET STRINGLITERAL LPAREN Expr RPAREN +/// AsmInputItem <- LBRACKET IDENTIFIER RBRACKET STRINGLITERALSINGLE LPAREN Expr RPAREN fn parseAsmInputItem(p: *Parse) !?Node.Index { _ = p.eatToken(.l_bracket) orelse return null; const identifier = try p.expectToken(.identifier); @@ -2923,9 +2941,7 @@ fn parseAsmInputItem(p: *Parse) !?Node.Index { /// BreakLabel <- COLON IDENTIFIER fn parseBreakLabel(p: *Parse) Error!OptionalTokenIndex { - _ = p.eatToken(.colon) orelse return .none; - const next_token = try p.expectToken(.identifier); - return .fromToken(next_token); + return if (p.eatTokens(&.{ .colon, .identifier })) |i| .fromToken(i + 1) else .none; } /// BlockLabel <- IDENTIFIER COLON @@ -2950,12 +2966,7 @@ fn expectFieldInit(p: *Parse) !Node.Index { /// WhileContinueExpr <- COLON LPAREN AssignExpr RPAREN fn parseWhileContinueExpr(p: *Parse) !?Node.Index { - _ = p.eatToken(.colon) orelse { - if (p.tokenTag(p.tok_i) == .l_paren and - p.tokensOnSameLine(p.tok_i - 1, p.tok_i)) - return p.fail(.expected_continue_expr); - return null; - }; + _ = p.eatToken(.colon) orelse return null; _ = try p.expectToken(.l_paren); const node = try p.parseAssignExpr() orelse return p.fail(.expected_expr_or_assignment); _ = try p.expectToken(.r_paren); @@ -2993,9 +3004,7 @@ fn parseAddrSpace(p: *Parse) !?Node.Index { /// such as in the case of anytype and `...`. Caller must look for rparen to find /// out when there are no more param decls left. /// -/// ParamDecl -/// <- doc_comment? (KEYWORD_noalias / KEYWORD_comptime)? (IDENTIFIER COLON)? ParamType -/// / DOT3 +/// ParamDecl <- doc_comment? (KEYWORD_noalias / KEYWORD_comptime / !KEYWORD_comptime) (IDENTIFIER COLON / !(IDENTIFIER_COLON)) ParamType /// /// ParamType /// <- KEYWORD_anytype @@ -3482,7 +3491,7 @@ fn parseSwitchProngList(p: *Parse) !Node.SubRange { return p.listToSpan(p.scratch.items[scratch_top..]); } -/// ParamDeclList <- (ParamDecl COMMA)* ParamDecl? +/// ParamDeclList <- (ParamDecl COMMA)* (ParamDecl / DOT3 COMMA?)? fn parseParamDeclList(p: *Parse) !SmallSpan { _ = try p.expectToken(.l_paren); const scratch_top = p.scratch.items.len; @@ -3604,9 +3613,9 @@ fn parseIf(p: *Parse, comptime bodyParseFn: fn (p: *Parse) Error!Node.Index) !?N }); } -/// ForExpr <- ForPrefix Expr (KEYWORD_else Expr)? +/// ForExpr <- ForPrefix Expr (KEYWORD_else Expr / !KEYWORD_else) !ExprSuffix /// -/// ForTypeExpr <- ForPrefix TypeExpr (KEYWORD_else TypeExpr)? +/// ForTypeExpr <- ForPrefix TypeExpr (KEYWORD_else TypeExpr / !KEYWORD_else) !ExprSuffix fn parseFor(p: *Parse, comptime bodyParseFn: fn (p: *Parse) Error!Node.Index) !?Node.Index { const for_token = p.eatToken(.keyword_for) orelse return null; diff --git a/lib/std/zig/parser_test.zig b/lib/std/zig/parser_test.zig index b98207f904..5f1e38239d 100644 --- a/lib/std/zig/parser_test.zig +++ b/lib/std/zig/parser_test.zig @@ -5472,17 +5472,11 @@ test "zig fmt: while continue expr" { \\ while (i > 0) \\ (i * 2); \\} + \\T: (while (true) ({ + \\ break usize; + \\})), \\ ); - try testError( - \\test { - \\ while (i > 0) (i -= 1) { - \\ print("test123", .{}); - \\ } - \\} - , &[_]Error{ - .expected_continue_expr, - }); } test "zig fmt: canonicalize symbols (simple)" { @@ -6139,6 +6133,16 @@ test "zig fmt: do not canonicalize invalid cast builtins" { ); } +test "zig fmt: canonicalize cast builtins at file start" { + try testTransform( + \\@alignCast(@ptrCast(a)), + \\ + , + \\@ptrCast(@alignCast(a)), + \\ + ); +} + test "zig fmt: extern addrspace in struct" { try testCanonical( \\const namespace = struct { @@ -6838,6 +6842,123 @@ test "zig fmt: error set with extra newline before comma" { ); } +test "zig fmt: extern container in tuple" { + try testCanonical( + \\const T = struct { + \\ extern struct {}, + \\ extern union {}, + \\ extern enum {}, + \\}; + \\ + ); +} + +test "zig fmt: break followed by colon" { + try testCanonical( + \\const a = [if (cond) len else break:0]u8; + \\ + ); +} + +test "zig fmt: array init of labeled block" { + try testCanonical( + \\const a = blk: { + \\ break :blk T; + \\}{ .a = false }; + \\ + ); +} + +test "zig fmt: nested asm indentation" { + try testCanonical( + \\const A = asm ("" + \\ : [_] "" (_), + \\ : + \\ : asm ("" + \\ : [_] "" (_), + \\ )); + \\ + ); +} + +test "zig fmt: asm with zig fmt on" { + try testCanonical( + \\// zig fmt: off + \\const A = asm("a" // zig fmt: on + \\ : [_] "" (_), + \\); + \\ + ); +} + +test "zig fmt: array init with multiline string literal with fmt on/off" { + try testCanonical( + \\const array = .{ + \\ \\ + \\ // zig fmt: on + \\ // zig fmt: off + \\}; + \\ + ); +} + +test "zig fmt: render extra colons with comments" { + try testCanonical( + \\const a = asm ("" + \\ : // testing + \\); + \\const b = asm ("" + \\ : // testing + \\ : // testing + \\); + \\const c = asm ("" + \\ : + \\ : // testing + \\); + \\ + ); +} + +test "zig fmt: cast builtins are not reordered with comments" { + try testCanonical( + \\const a = @volatileCast(@constCast( // ... + \\ @alignCast(@ptrCast(a)))); + \\ + \\const b = @alignCast(@ptrCast( // zig fmt: off + \\ c)); + \\ + ); +} + +test "zig fmt: inner over-indented if expressions becoming multiline" { + try testTransform( + \\const a = (b or + \\c) and [if (d) {}]T; // If the if-statement is kept on the same line it becomes multiline + \\const a = (b or + \\c)[if (d) {}]; // If the if-statement is kept on the same line it becomes multiline + \\const a = .{a, b, (c or + \\d), if (d) {}, e, f, g,}; + \\ + , + \\const a = (b or + \\ c) and [ + \\ if (d) {} + \\]T; // If the if-statement is kept on the same line it becomes multiline + \\const a = (b or + \\ c)[ + \\ if (d) {} + \\]; // If the if-statement is kept on the same line it becomes multiline + \\const a = .{ + \\ a, b, + \\ (c or + \\ d), + \\ if (d) {}, e, + \\ f, g, + \\}; + \\ + ); +} + test "recovery: top level" { try testError( \\test "" {inline} @@ -7258,83 +7379,61 @@ test "zig fmt: fuzz" { try std.testing.fuzz({}, fuzzRender, .{}); } -fn parseTokens( - fba: Allocator, - source: [:0]const u8, -) error{ SkipZigTest, OutOfMemory }!struct { - toks: std.zig.Ast.TokenList, - maybe_rewritable: bool, - skip_idempotency: bool, -} { +fn isRewritable(source: []const u8, tokens: std.zig.Ast.TokenList.Slice) !bool { @disableInstrumentation(); + // Byte-order marker is stripped var maybe_rewritable = std.mem.startsWith(u8, source, "\xEF\xBB\xBF"); - var skip_idempotency = false; // This should be able to be removed once all the bugs are fixed + // The above variable can not yet be replaced by returns since error.SkipZigTest still needs to + // be checked for. - var tokens: std.zig.Ast.TokenList = .{}; - try tokens.ensureTotalCapacity(fba, source.len / 2); - var tokenizer: std.zig.Tokenizer = .init(source); - while (true) { - const tok = tokenizer.next(); - switch (tok.tag) { - .invalid, - .invalid_periodasterisks, - => return error.SkipZigTest, - // Extra colons can be removed - .keyword_asm, - // Qualifiers can be reordered - // keyword_const is intentionally excluded since it is used in other contexts and - // having only one qualifier will never lead to reordering. - .keyword_addrspace, - .keyword_align, - .keyword_allowzero, - .keyword_callconv, - .keyword_linksection, - .keyword_volatile, - => maybe_rewritable = true, - .builtin, - // Pointer casts can be reordered - => for ([_][]const u8{ - "ptrCast", - "alignCast", - "addrSpaceCast", - "constCast", - "volatileCast", - }) |id| { - if (std.mem.eql(u8, source[tok.loc.start + 1 .. tok.loc.end], id)) { - maybe_rewritable = false; - } - }, - // Quoted identifiers can be unquoted - .identifier => maybe_rewritable = maybe_rewritable or source[tok.loc.start] == '@', - else => {}, - // #23754 - .container_doc_comment, - => if (std.mem.endsWith(Token.Tag, tokens.items(.tag), &.{.l_brace})) { - return error.SkipZigTest; - }, - // #24507 - .keyword_inline, - .keyword_for, - .keyword_while, - .l_brace, - => if (std.mem.endsWith(Token.Tag, tokens.items(.tag), &.{ .identifier, .colon })) { + for (0.., tokens.items(.tag), tokens.items(.start)) |i, tag, start| switch (tag) { + // Extra colons can be removed + .keyword_asm, + // Qualifiers can be reordered + // keyword_const is intentionally excluded since it is used in other contexts and + // having only one qualifier will never lead to reordering. + .keyword_addrspace, + .keyword_align, + .keyword_allowzero, + .keyword_callconv, + .keyword_linksection, + .keyword_volatile, + => maybe_rewritable = true, + .builtin, + // Pointer casts can be reordered + => for ([_][]const u8{ + "ptrCast", + "alignCast", + "addrSpaceCast", + "constCast", + "volatileCast", + }) |id| { + if (std.mem.startsWith(u8, source[start + 1 ..], id)) { maybe_rewritable = true; - skip_idempotency = true; - }, - } - try tokens.append(fba, .{ - .tag = tok.tag, - .start = @intCast(tok.loc.start), - }); - if (tok.tag == .eof) - break; - } - return .{ - .toks = tokens, - .maybe_rewritable = maybe_rewritable, - .skip_idempotency = skip_idempotency, + } + }, + // Quoted identifiers can be unquoted + .identifier => if (source[start] == '@') { + maybe_rewritable = true; + }, + else => {}, + // #23754 + .container_doc_comment, + => if (std.mem.endsWith(Token.Tag, tokens.items(.tag)[0..i], &.{.l_brace})) { + return error.SkipZigTest; // Can cause I.B. + }, + // #24507 + .keyword_inline, + .keyword_for, + .keyword_while, + .l_brace, + => if (std.mem.endsWith(Token.Tag, tokens.items(.tag)[0..i], &.{ .identifier, .colon })) { + return error.SkipZigTest; // Can cause I.B. due to double rendering of zig fmt on/off + }, }; + + return maybe_rewritable; } /// Checks equivelence of non-whitespace characters. @@ -7447,34 +7546,29 @@ fn reparseTokens( fn fuzzRender(_: void, smith: *std.testing.Smith) !void { @disableInstrumentation(); - var src_buf: [512]u8 = undefined; - const src_len = smith.sliceWeighted(&src_buf, &.{ - .rangeLessThan(u32, 0, 32, 256), - .rangeLessThan(u32, 32, 64, 64), - .rangeLessThan(u32, 64, src_buf.len, 1), - }, &.{ - .rangeAtMost(u8, 0x20, 0x7e, 8), - .value(u8, '\n', 32), - .value(u8, '\t', 8), - .value(u8, '\r', 4), - .rangeAtMost(u8, 0x7f, 0xff, 1), - }); - src_buf[src_len] = 0; - + var ast_smith: std.zig.AstSmith = .init(smith); + try ast_smith.generateSource(); var fba_ctx = std.heap.FixedBufferAllocator.init(&fixed_buffer_mem); - fuzzRenderInner(src_buf[0..src_len :0], fba_ctx.allocator()) catch |e| return switch (e) { - error.OutOfMemory => {}, - else => e, + var opt_rendered: ?[]const u8 = null; + fuzzRenderInner(&ast_smith, fba_ctx.allocator(), &opt_rendered) catch |e| switch (e) { + error.SkipZigTest, error.OutOfMemory, error.WriteFailed => return error.SkipZigTest, + else => |failure| { + ast_smith.logSource(); + if (opt_rendered) |rendered| { + logRenderedSource(rendered); + } + return failure; + }, }; } -fn fuzzRenderInner(source: [:0]const u8, fba: Allocator) !void { +fn fuzzRenderInner(ast_smith: *std.zig.AstSmith, fba: Allocator, opt_rendered: *?[]const u8) !void { @disableInstrumentation(); - const src_toks = try parseTokens(fba, source); - const src_tree = try std.zig.Ast.parseTokens(fba, source, src_toks.toks.slice(), .zig); - if (src_tree.errors.len != 0) - return; + const source = ast_smith.source(); + const src_rewritable = try isRewritable(source, ast_smith.tokens()); + const src_tree = try std.zig.Ast.parseTokens(fba, source, ast_smith.tokens(), .zig); + std.debug.assert(src_tree.errors.len == 0); for (src_tree.nodes.items(.tag)) |tag| switch (tag) { // #24507 (`switch(x) { inline for (a) |a| a => {} }` to // `switch(x) { { inline for (a) |a| a => {} }` since @@ -7490,15 +7584,16 @@ fn fuzzRenderInner(source: [:0]const u8, fba: Allocator) !void { // list to save space which is useless for fixed buffer allocators. try rendered_w.writer.writeByte(0); const rendered = rendered_w.written()[0 .. rendered_w.written().len - 1 :0]; + opt_rendered.* = rendered; // First check that the non-whitespace characters match. This ensures that // identifier names, numbers, comments, et cetera are preserved. - if (!src_toks.maybe_rewritable and isRewritten(source, rendered)) + if (!src_rewritable and isRewritten(source, rendered)) return error.Rewritten; // Next check that the tokens are the same since whitespace removal can change the tokens - const src_tags = src_toks.toks.items(.tag); + const src_tags = ast_smith.tokens().items(.tag); const rendered_toks = try reparseTokens(fba, rendered, src_tags[0 .. src_tags.len - 1 :.eof]); - if (!src_toks.maybe_rewritable and rendered_toks.rewritten) + if (!src_rewritable and rendered_toks.rewritten) return error.Rewritten; // Rerender the tree to check idempotency and that new commas @@ -7506,10 +7601,39 @@ fn fuzzRenderInner(source: [:0]const u8, fba: Allocator) !void { const rendered_tree = try std.zig.Ast.parseTokens(fba, rendered, rendered_toks.toks.slice(), .zig); if (rendered_tree.errors.len != 0) return error.Rewritten; - if (!src_toks.skip_idempotency) { - var rerendered_w: std.Io.Writer.Allocating = .init(fba); - try rerendered_w.ensureUnusedCapacity(source.len); - try rendered_tree.render(fba, &rerendered_w.writer, .{}); - try std.testing.expectEqualStrings(rendered, rerendered_w.written()); - } + var rerendered_w: std.Io.Writer.Allocating = .init(fba); + try rerendered_w.ensureUnusedCapacity(source.len); + try rendered_tree.render(fba, &rerendered_w.writer, .{}); + try std.testing.expectEqualStrings(rendered, rerendered_w.written()); +} + +fn logRenderedSource(source: []const u8) void { + var buf: [256]u8 = undefined; + const ls = std.debug.lockStderr(&buf); + defer std.debug.unlockStderr(); + logRenderedSourceInner(source, ls.terminal()) catch {}; +} + +fn logRenderedSourceInner(source: []const u8, t: std.Io.Terminal) std.Io.Writer.Error!void { + const w = t.writer; + + t.setColor(.dim) catch {}; + try w.writeAll("=== Rendered Source ===\n"); + t.setColor(.reset) catch {}; + + for (0.., source) |i, c| switch (c) { + ' '...0x7e => try w.writeByte(c), + '\n' => { + if (i != 0 and source[i - 1] == ' ') { + try w.writeAll("⏎"); + } + try w.writeByte('\n'); + }, + else => { + t.setColor(.cyan) catch {}; + try w.print("\\x{x:0>2}", .{c}); + t.setColor(.reset) catch {}; + }, + }; + try w.writeAll("␃\n"); } diff --git a/lib/std/zig/tokenizer.zig b/lib/std/zig/tokenizer.zig index c296b6f533..cd74a66ce5 100644 --- a/lib/std/zig/tokenizer.zig +++ b/lib/std/zig/tokenizer.zig @@ -313,7 +313,8 @@ pub const Token = struct { return tag.lexeme() orelse switch (tag) { .invalid => "invalid token", .identifier => "an identifier", - .string_literal, .multiline_string_literal_line => "a string literal", + .string_literal => "a string literal", + .multiline_string_literal_line => "a multiline string literal", .char_literal => "a character literal", .eof => "EOF", .builtin => "a builtin function", diff --git a/src/codegen/x86_64/CodeGen.zig b/src/codegen/x86_64/CodeGen.zig index 266aaffeab..3d08433df9 100644 --- a/src/codegen/x86_64/CodeGen.zig +++ b/src/codegen/x86_64/CodeGen.zig @@ -170801,6 +170801,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { else => |e| return e, }; try ops[0].toSlicePtr(cg); + try ops[1].toSlicePtr(cg); cg.select(&.{}, &.{}, &ops, switch (air_tag) { else => unreachable, inline .memcpy, .memmove => |symbol| comptime &.{.{ diff --git a/test/behavior/array.zig b/test/behavior/array.zig index 2bd8555529..2f3aa841bc 100644 --- a/test/behavior/array.zig +++ b/test/behavior/array.zig @@ -45,6 +45,18 @@ fn getArrayLen(a: []const u32) usize { return a.len; } +test "runtime array concat with comptime slice" { + if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest; + + var a: [1]u8 = .{1}; + const b = (comptime @as([]const u8, &.{0})) ++ &a; + const c = &a ++ (comptime @as([]const u8, &.{0})); + try std.testing.expectEqualSlices(u8, &.{ 0, 1 }, b); + try std.testing.expectEqualSlices(u8, &.{ 1, 0 }, c); +} + test "array concat with undefined" { if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest;