mirror of
https://codeberg.org/ziglang/zig.git
synced 2026-04-27 19:09:47 +03:00
Merge pull request #23815 from alichraghi/master
spirv: unroll all vector operations
This commit is contained in:
+1
-1
@@ -2014,7 +2014,7 @@ pub const Cpu = struct {
|
||||
.global, .local, .shared => is_gpu,
|
||||
.constant => is_gpu and (context == null or context == .constant),
|
||||
.param => is_nvptx,
|
||||
.input, .output, .uniform, .push_constant, .storage_buffer => is_spirv,
|
||||
.input, .output, .uniform, .push_constant, .storage_buffer, .physical_storage_buffer => is_spirv,
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
+131
-80
@@ -1,8 +1,21 @@
|
||||
//! This file is auto-generated by tools/update_cpu_features.zig.
|
||||
|
||||
const std = @import("../std.zig");
|
||||
const CpuFeature = std.Target.Cpu.Feature;
|
||||
const CpuModel = std.Target.Cpu.Model;
|
||||
|
||||
pub const Feature = enum {
|
||||
addresses,
|
||||
arbitrary_precision_integers,
|
||||
float16,
|
||||
float64,
|
||||
generic_pointer,
|
||||
int64,
|
||||
kernel,
|
||||
matrix,
|
||||
physical_storage_buffer,
|
||||
shader,
|
||||
storage_push_constant16,
|
||||
v1_0,
|
||||
v1_1,
|
||||
v1_2,
|
||||
@@ -10,18 +23,8 @@ pub const Feature = enum {
|
||||
v1_4,
|
||||
v1_5,
|
||||
v1_6,
|
||||
int64,
|
||||
float16,
|
||||
float64,
|
||||
matrix,
|
||||
storage_push_constant16,
|
||||
arbitrary_precision_integers,
|
||||
kernel,
|
||||
addresses,
|
||||
generic_pointer,
|
||||
variable_pointers,
|
||||
vector16,
|
||||
shader,
|
||||
physical_storage_buffer,
|
||||
};
|
||||
|
||||
pub const featureSet = CpuFeature.FeatureSetFns(Feature).featureSet;
|
||||
@@ -34,6 +37,83 @@ pub const all_features = blk: {
|
||||
const len = @typeInfo(Feature).@"enum".fields.len;
|
||||
std.debug.assert(len <= CpuFeature.Set.needed_bit_count);
|
||||
var result: [len]CpuFeature = undefined;
|
||||
result[@intFromEnum(Feature.addresses)] = .{
|
||||
.llvm_name = null,
|
||||
.description = "Enable Addresses capability",
|
||||
.dependencies = featureSet(&[_]Feature{
|
||||
.v1_0,
|
||||
}),
|
||||
};
|
||||
result[@intFromEnum(Feature.arbitrary_precision_integers)] = .{
|
||||
.llvm_name = null,
|
||||
.description = "Enable SPV_INTEL_arbitrary_precision_integers extension and the ArbitraryPrecisionIntegersINTEL capability",
|
||||
.dependencies = featureSet(&[_]Feature{
|
||||
.v1_5,
|
||||
}),
|
||||
};
|
||||
result[@intFromEnum(Feature.float16)] = .{
|
||||
.llvm_name = null,
|
||||
.description = "Enable Float16 capability",
|
||||
.dependencies = featureSet(&[_]Feature{
|
||||
.v1_0,
|
||||
}),
|
||||
};
|
||||
result[@intFromEnum(Feature.float64)] = .{
|
||||
.llvm_name = null,
|
||||
.description = "Enable Float64 capability",
|
||||
.dependencies = featureSet(&[_]Feature{
|
||||
.v1_0,
|
||||
}),
|
||||
};
|
||||
result[@intFromEnum(Feature.generic_pointer)] = .{
|
||||
.llvm_name = null,
|
||||
.description = "Enable GenericPointer capability",
|
||||
.dependencies = featureSet(&[_]Feature{
|
||||
.addresses,
|
||||
}),
|
||||
};
|
||||
result[@intFromEnum(Feature.int64)] = .{
|
||||
.llvm_name = null,
|
||||
.description = "Enable Int64 capability",
|
||||
.dependencies = featureSet(&[_]Feature{
|
||||
.v1_0,
|
||||
}),
|
||||
};
|
||||
result[@intFromEnum(Feature.kernel)] = .{
|
||||
.llvm_name = null,
|
||||
.description = "Enable Kernel capability",
|
||||
.dependencies = featureSet(&[_]Feature{
|
||||
.v1_0,
|
||||
}),
|
||||
};
|
||||
result[@intFromEnum(Feature.matrix)] = .{
|
||||
.llvm_name = null,
|
||||
.description = "Enable Matrix capability",
|
||||
.dependencies = featureSet(&[_]Feature{
|
||||
.v1_0,
|
||||
}),
|
||||
};
|
||||
result[@intFromEnum(Feature.physical_storage_buffer)] = .{
|
||||
.llvm_name = null,
|
||||
.description = "Enable SPV_KHR_variable_pointers extension and the (VariablePointers, VariablePointersStorageBuffer) capabilities",
|
||||
.dependencies = featureSet(&[_]Feature{
|
||||
.v1_0,
|
||||
}),
|
||||
};
|
||||
result[@intFromEnum(Feature.shader)] = .{
|
||||
.llvm_name = null,
|
||||
.description = "Enable Shader capability",
|
||||
.dependencies = featureSet(&[_]Feature{
|
||||
.matrix,
|
||||
}),
|
||||
};
|
||||
result[@intFromEnum(Feature.storage_push_constant16)] = .{
|
||||
.llvm_name = null,
|
||||
.description = "Enable SPV_KHR_16bit_storage extension and the StoragePushConstant16 capability",
|
||||
.dependencies = featureSet(&[_]Feature{
|
||||
.v1_3,
|
||||
}),
|
||||
};
|
||||
result[@intFromEnum(Feature.v1_0)] = .{
|
||||
.llvm_name = null,
|
||||
.description = "Enable version 1.0",
|
||||
@@ -42,92 +122,58 @@ pub const all_features = blk: {
|
||||
result[@intFromEnum(Feature.v1_1)] = .{
|
||||
.llvm_name = null,
|
||||
.description = "Enable version 1.1",
|
||||
.dependencies = featureSet(&[_]Feature{.v1_0}),
|
||||
.dependencies = featureSet(&[_]Feature{
|
||||
.v1_0,
|
||||
}),
|
||||
};
|
||||
result[@intFromEnum(Feature.v1_2)] = .{
|
||||
.llvm_name = null,
|
||||
.description = "Enable version 1.2",
|
||||
.dependencies = featureSet(&[_]Feature{.v1_1}),
|
||||
.dependencies = featureSet(&[_]Feature{
|
||||
.v1_1,
|
||||
}),
|
||||
};
|
||||
result[@intFromEnum(Feature.v1_3)] = .{
|
||||
.llvm_name = null,
|
||||
.description = "Enable version 1.3",
|
||||
.dependencies = featureSet(&[_]Feature{.v1_2}),
|
||||
.dependencies = featureSet(&[_]Feature{
|
||||
.v1_2,
|
||||
}),
|
||||
};
|
||||
result[@intFromEnum(Feature.v1_4)] = .{
|
||||
.llvm_name = null,
|
||||
.description = "Enable version 1.4",
|
||||
.dependencies = featureSet(&[_]Feature{.v1_3}),
|
||||
.dependencies = featureSet(&[_]Feature{
|
||||
.v1_3,
|
||||
}),
|
||||
};
|
||||
result[@intFromEnum(Feature.v1_5)] = .{
|
||||
.llvm_name = null,
|
||||
.description = "Enable version 1.5",
|
||||
.dependencies = featureSet(&[_]Feature{.v1_4}),
|
||||
.dependencies = featureSet(&[_]Feature{
|
||||
.v1_4,
|
||||
}),
|
||||
};
|
||||
result[@intFromEnum(Feature.v1_6)] = .{
|
||||
.llvm_name = null,
|
||||
.description = "Enable version 1.6",
|
||||
.dependencies = featureSet(&[_]Feature{.v1_5}),
|
||||
.dependencies = featureSet(&[_]Feature{
|
||||
.v1_5,
|
||||
}),
|
||||
};
|
||||
result[@intFromEnum(Feature.int64)] = .{
|
||||
result[@intFromEnum(Feature.variable_pointers)] = .{
|
||||
.llvm_name = null,
|
||||
.description = "Enable Int64 capability",
|
||||
.dependencies = featureSet(&[_]Feature{.v1_0}),
|
||||
};
|
||||
result[@intFromEnum(Feature.float16)] = .{
|
||||
.llvm_name = null,
|
||||
.description = "Enable Float16 capability",
|
||||
.dependencies = featureSet(&[_]Feature{.v1_0}),
|
||||
};
|
||||
result[@intFromEnum(Feature.float64)] = .{
|
||||
.llvm_name = null,
|
||||
.description = "Enable Float64 capability",
|
||||
.dependencies = featureSet(&[_]Feature{.v1_0}),
|
||||
};
|
||||
result[@intFromEnum(Feature.matrix)] = .{
|
||||
.llvm_name = null,
|
||||
.description = "Enable Matrix capability",
|
||||
.dependencies = featureSet(&[_]Feature{.v1_0}),
|
||||
};
|
||||
result[@intFromEnum(Feature.storage_push_constant16)] = .{
|
||||
.llvm_name = null,
|
||||
.description = "Enable SPV_KHR_16bit_storage extension and the StoragePushConstant16 capability",
|
||||
.dependencies = featureSet(&[_]Feature{.v1_3}),
|
||||
};
|
||||
result[@intFromEnum(Feature.arbitrary_precision_integers)] = .{
|
||||
.llvm_name = null,
|
||||
.description = "Enable SPV_INTEL_arbitrary_precision_integers extension and the ArbitraryPrecisionIntegersINTEL capability",
|
||||
.dependencies = featureSet(&[_]Feature{.v1_5}),
|
||||
};
|
||||
result[@intFromEnum(Feature.kernel)] = .{
|
||||
.llvm_name = null,
|
||||
.description = "Enable Kernel capability",
|
||||
.dependencies = featureSet(&[_]Feature{.v1_0}),
|
||||
};
|
||||
result[@intFromEnum(Feature.addresses)] = .{
|
||||
.llvm_name = null,
|
||||
.description = "Enable Addresses capability",
|
||||
.dependencies = featureSet(&[_]Feature{.v1_0}),
|
||||
};
|
||||
result[@intFromEnum(Feature.generic_pointer)] = .{
|
||||
.llvm_name = null,
|
||||
.description = "Enable GenericPointer capability",
|
||||
.dependencies = featureSet(&[_]Feature{ .v1_0, .addresses }),
|
||||
.description = "Enable SPV_KHR_physical_storage_buffer extension and the PhysicalStorageBufferAddresses capability",
|
||||
.dependencies = featureSet(&[_]Feature{
|
||||
.v1_0,
|
||||
}),
|
||||
};
|
||||
result[@intFromEnum(Feature.vector16)] = .{
|
||||
.llvm_name = null,
|
||||
.description = "Enable Vector16 capability",
|
||||
.dependencies = featureSet(&[_]Feature{ .v1_0, .kernel }),
|
||||
};
|
||||
result[@intFromEnum(Feature.shader)] = .{
|
||||
.llvm_name = null,
|
||||
.description = "Enable Shader capability",
|
||||
.dependencies = featureSet(&[_]Feature{ .v1_0, .matrix }),
|
||||
};
|
||||
result[@intFromEnum(Feature.physical_storage_buffer)] = .{
|
||||
.llvm_name = null,
|
||||
.description = "Enable SPV_KHR_physical_storage_buffer extension and the PhysicalStorageBufferAddresses capability",
|
||||
.dependencies = featureSet(&[_]Feature{.v1_0}),
|
||||
.dependencies = featureSet(&[_]Feature{
|
||||
.kernel,
|
||||
}),
|
||||
};
|
||||
const ti = @typeInfo(Feature);
|
||||
for (&result, 0..) |*elem, i| {
|
||||
@@ -141,18 +187,23 @@ pub const cpu = struct {
|
||||
pub const generic: CpuModel = .{
|
||||
.name = "generic",
|
||||
.llvm_name = "generic",
|
||||
.features = featureSet(&[_]Feature{.v1_0}),
|
||||
.features = featureSet(&[_]Feature{}),
|
||||
};
|
||||
|
||||
pub const vulkan_v1_2: CpuModel = .{
|
||||
.name = "vulkan_v1_2",
|
||||
.llvm_name = null,
|
||||
.features = featureSet(&[_]Feature{ .v1_5, .shader, .physical_storage_buffer }),
|
||||
};
|
||||
|
||||
pub const opencl_v2: CpuModel = .{
|
||||
.name = "opencl_v2",
|
||||
.llvm_name = null,
|
||||
.features = featureSet(&[_]Feature{ .v1_2, .kernel, .addresses, .generic_pointer }),
|
||||
.features = featureSet(&[_]Feature{
|
||||
.generic_pointer,
|
||||
.kernel,
|
||||
.v1_2,
|
||||
}),
|
||||
};
|
||||
pub const vulkan_v1_2: CpuModel = .{
|
||||
.name = "vulkan_v1_2",
|
||||
.llvm_name = null,
|
||||
.features = featureSet(&[_]Feature{
|
||||
.shader,
|
||||
.v1_5,
|
||||
}),
|
||||
};
|
||||
};
|
||||
|
||||
@@ -531,6 +531,7 @@ pub const AddressSpace = enum(u5) {
|
||||
uniform,
|
||||
push_constant,
|
||||
storage_buffer,
|
||||
physical_storage_buffer,
|
||||
|
||||
// AVR address spaces.
|
||||
flash,
|
||||
|
||||
+92
-138
@@ -1,81 +1,24 @@
|
||||
const std = @import("std.zig");
|
||||
|
||||
/// Will make `ptr` contain the location of the current invocation within the
|
||||
/// global workgroup. Each component is equal to the index of the local workgroup
|
||||
/// multiplied by the size of the local workgroup plus `localInvocationId`.
|
||||
/// `ptr` must be a reference to variable or struct field.
|
||||
pub fn globalInvocationId(comptime ptr: *addrspace(.input) @Vector(3, u32)) void {
|
||||
asm volatile (
|
||||
\\OpDecorate %ptr BuiltIn GlobalInvocationId
|
||||
:
|
||||
: [ptr] "" (ptr),
|
||||
);
|
||||
}
|
||||
|
||||
/// Will make that variable contain the location of the current cluster
|
||||
/// culling, task, mesh, or compute shader invocation within the local
|
||||
/// workgroup. Each component ranges from zero through to the size of the
|
||||
/// workgroup in that dimension minus one.
|
||||
/// `ptr` must be a reference to variable or struct field.
|
||||
pub fn localInvocationId(comptime ptr: *addrspace(.input) @Vector(3, u32)) void {
|
||||
asm volatile (
|
||||
\\OpDecorate %ptr BuiltIn LocalInvocationId
|
||||
:
|
||||
: [ptr] "" (ptr),
|
||||
);
|
||||
}
|
||||
|
||||
/// Output vertex position from a `Vertex` entrypoint
|
||||
/// `ptr` must be a reference to variable or struct field.
|
||||
pub fn position(comptime ptr: *addrspace(.output) @Vector(4, f32)) void {
|
||||
asm volatile (
|
||||
\\OpDecorate %ptr BuiltIn Position
|
||||
:
|
||||
: [ptr] "" (ptr),
|
||||
);
|
||||
}
|
||||
|
||||
/// Will make `ptr` contain the index of the vertex that is
|
||||
/// being processed by the current vertex shader invocation.
|
||||
/// `ptr` must be a reference to variable or struct field.
|
||||
pub fn vertexIndex(comptime ptr: *addrspace(.input) u32) void {
|
||||
asm volatile (
|
||||
\\OpDecorate %ptr BuiltIn VertexIndex
|
||||
:
|
||||
: [ptr] "" (ptr),
|
||||
);
|
||||
}
|
||||
|
||||
/// Will make `ptr` contain the index of the instance that is
|
||||
/// being processed by the current vertex shader invocation.
|
||||
/// `ptr` must be a reference to variable or struct field.
|
||||
pub fn instanceIndex(comptime ptr: *addrspace(.input) u32) void {
|
||||
asm volatile (
|
||||
\\OpDecorate %ptr BuiltIn InstanceIndex
|
||||
:
|
||||
: [ptr] "" (ptr),
|
||||
);
|
||||
}
|
||||
|
||||
/// Output fragment depth from a `Fragment` entrypoint
|
||||
/// `ptr` must be a reference to variable or struct field.
|
||||
pub fn fragmentCoord(comptime ptr: *addrspace(.input) @Vector(4, f32)) void {
|
||||
asm volatile (
|
||||
\\OpDecorate %ptr BuiltIn FragCoord
|
||||
:
|
||||
: [ptr] "" (ptr),
|
||||
);
|
||||
}
|
||||
|
||||
/// Output fragment depth from a `Fragment` entrypoint
|
||||
/// `ptr` must be a reference to variable or struct field.
|
||||
pub fn fragmentDepth(comptime ptr: *addrspace(.output) f32) void {
|
||||
asm volatile (
|
||||
\\OpDecorate %ptr BuiltIn FragDepth
|
||||
:
|
||||
: [ptr] "" (ptr),
|
||||
);
|
||||
}
|
||||
pub const position_in = @extern(*addrspace(.input) @Vector(4, f32), .{ .name = "position" });
|
||||
pub const position_out = @extern(*addrspace(.output) @Vector(4, f32), .{ .name = "position" });
|
||||
pub const point_size_in = @extern(*addrspace(.input) f32, .{ .name = "point_size" });
|
||||
pub const point_size_out = @extern(*addrspace(.output) f32, .{ .name = "point_size" });
|
||||
pub extern const invocation_id: u32 addrspace(.input);
|
||||
pub extern const frag_coord: @Vector(4, f32) addrspace(.input);
|
||||
pub extern const point_coord: @Vector(2, f32) addrspace(.input);
|
||||
// TODO: direct/indirect values
|
||||
// pub extern const front_facing: bool addrspace(.input);
|
||||
// TODO: runtime array
|
||||
// pub extern const sample_mask;
|
||||
pub extern var frag_depth: f32 addrspace(.output);
|
||||
pub extern const num_workgroups: @Vector(3, u32) addrspace(.input);
|
||||
pub extern const workgroup_size: @Vector(3, u32) addrspace(.input);
|
||||
pub extern const workgroup_id: @Vector(3, u32) addrspace(.input);
|
||||
pub extern const local_invocation_id: @Vector(3, u32) addrspace(.input);
|
||||
pub extern const global_invocation_id: @Vector(3, u32) addrspace(.input);
|
||||
pub extern const vertex_index: u32 addrspace(.input);
|
||||
pub extern const instance_index: u32 addrspace(.input);
|
||||
|
||||
/// Forms the main linkage for `input` and `output` address spaces.
|
||||
/// `ptr` must be a reference to variable or struct field.
|
||||
@@ -101,74 +44,85 @@ pub fn binding(comptime ptr: anytype, comptime set: u32, comptime bind: u32) voi
|
||||
);
|
||||
}
|
||||
|
||||
pub const Origin = enum(u32) {
|
||||
/// Increase toward the right and downward
|
||||
upper_left = 7,
|
||||
/// Increase toward the right and upward
|
||||
lower_left = 8,
|
||||
};
|
||||
|
||||
/// The coordinates appear to originate in the specified `origin`.
|
||||
/// Only valid with the `Fragment` calling convention.
|
||||
pub fn fragmentOrigin(comptime entry_point: anytype, comptime origin: Origin) void {
|
||||
asm volatile (
|
||||
\\OpExecutionMode %entry_point $origin
|
||||
:
|
||||
: [entry_point] "" (entry_point),
|
||||
[origin] "c" (@intFromEnum(origin)),
|
||||
);
|
||||
}
|
||||
|
||||
pub const DepthMode = enum(u32) {
|
||||
/// Declares that this entry point dynamically writes the
|
||||
/// `fragmentDepth` built in-decorated variable.
|
||||
replacing = 12,
|
||||
pub const ExecutionMode = union(Tag) {
|
||||
/// Sets origin of the framebuffer to the upper-left corner
|
||||
origin_upper_left,
|
||||
/// Sets origin of the framebuffer to the lower-left corner
|
||||
origin_lower_left,
|
||||
/// Indicates that the fragment shader writes to `frag_depth`,
|
||||
/// replacing the fixed-function depth value.
|
||||
depth_replacing,
|
||||
/// Indicates that per-fragment tests may assume that
|
||||
/// any `fragmentDepth` built in-decorated value written by the shader is
|
||||
/// any `frag_depth` built in-decorated value written by the shader is
|
||||
/// greater-than-or-equal to the fragment’s interpolated depth value
|
||||
greater = 14,
|
||||
depth_greater,
|
||||
/// Indicates that per-fragment tests may assume that
|
||||
/// any `fragmentDepth` built in-decorated value written by the shader is
|
||||
/// any `frag_depth` built in-decorated value written by the shader is
|
||||
/// less-than-or-equal to the fragment’s interpolated depth value
|
||||
less = 15,
|
||||
depth_less,
|
||||
/// Indicates that per-fragment tests may assume that
|
||||
/// any `fragmentDepth` built in-decorated value written by the shader is
|
||||
/// any `frag_depth` built in-decorated value written by the shader is
|
||||
/// the same as the fragment’s interpolated depth value
|
||||
unchanged = 16,
|
||||
depth_unchanged,
|
||||
/// Indicates the workgroup size in the x, y, and z dimensions.
|
||||
local_size: LocalSize,
|
||||
|
||||
pub const Tag = enum(u32) {
|
||||
origin_upper_left = 7,
|
||||
origin_lower_left = 8,
|
||||
depth_replacing = 12,
|
||||
depth_greater = 14,
|
||||
depth_less = 15,
|
||||
depth_unchanged = 16,
|
||||
local_size = 17,
|
||||
};
|
||||
|
||||
pub const LocalSize = struct { x: u32, y: u32, z: u32 };
|
||||
};
|
||||
|
||||
/// Only valid with the `Fragment` calling convention.
|
||||
pub fn depthMode(comptime entry_point: anytype, comptime mode: DepthMode) void {
|
||||
asm volatile (
|
||||
\\OpExecutionMode %entry_point $mode
|
||||
:
|
||||
: [entry_point] "" (entry_point),
|
||||
[mode] "c" (mode),
|
||||
);
|
||||
}
|
||||
|
||||
/// Indicates the workgroup size in the `x`, `y`, and `z` dimensions.
|
||||
/// Only valid with the `GLCompute` or `Kernel` calling conventions.
|
||||
pub fn workgroupSize(comptime entry_point: anytype, comptime size: @Vector(3, u32)) void {
|
||||
asm volatile (
|
||||
\\OpExecutionMode %entry_point LocalSize %x %y %z
|
||||
:
|
||||
: [entry_point] "" (entry_point),
|
||||
[x] "c" (size[0]),
|
||||
[y] "c" (size[1]),
|
||||
[z] "c" (size[2]),
|
||||
);
|
||||
}
|
||||
|
||||
/// A hint to the client, which indicates the workgroup size in the `x`, `y`, and `z` dimensions.
|
||||
/// Only valid with the `GLCompute` or `Kernel` calling conventions.
|
||||
pub fn workgroupSizeHint(comptime entry_point: anytype, comptime size: @Vector(3, u32)) void {
|
||||
asm volatile (
|
||||
\\OpExecutionMode %entry_point LocalSizeHint %x %y %z
|
||||
:
|
||||
: [entry_point] "" (entry_point),
|
||||
[x] "c" (size[0]),
|
||||
[y] "c" (size[1]),
|
||||
[z] "c" (size[2]),
|
||||
);
|
||||
/// Declare the mode entry point executes in.
|
||||
pub fn executionMode(comptime entry_point: anytype, comptime mode: ExecutionMode) void {
|
||||
const cc = @typeInfo(@TypeOf(entry_point)).@"fn".calling_convention;
|
||||
switch (mode) {
|
||||
.origin_upper_left,
|
||||
.origin_lower_left,
|
||||
.depth_replacing,
|
||||
.depth_greater,
|
||||
.depth_less,
|
||||
.depth_unchanged,
|
||||
=> {
|
||||
if (cc != .spirv_fragment) {
|
||||
@compileError(
|
||||
\\invalid execution mode '
|
||||
++ @tagName(mode) ++
|
||||
\\' for function with '
|
||||
++ @tagName(cc) ++
|
||||
\\' calling convention
|
||||
);
|
||||
}
|
||||
asm volatile (
|
||||
\\OpExecutionMode %entry_point $mode
|
||||
:
|
||||
: [entry_point] "" (entry_point),
|
||||
[mode] "c" (@intFromEnum(mode)),
|
||||
);
|
||||
},
|
||||
.local_size => |size| {
|
||||
if (cc != .spirv_kernel) {
|
||||
@compileError(
|
||||
\\invalid execution mode 'local_size' for function with '
|
||||
++ @tagName(cc) ++
|
||||
\\' calling convention
|
||||
);
|
||||
}
|
||||
asm volatile (
|
||||
\\OpExecutionMode %entry_point LocalSize $x $y $z
|
||||
:
|
||||
: [entry_point] "" (entry_point),
|
||||
[x] "c" (size.x),
|
||||
[y] "c" (size.y),
|
||||
[z] "c" (size.z),
|
||||
);
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
+1
-1
@@ -3693,7 +3693,7 @@ pub fn errorSetBits(zcu: *const Zcu) u16 {
|
||||
const target = zcu.getTarget();
|
||||
|
||||
if (zcu.error_limit == 0) return 0;
|
||||
if (target.cpu.arch == .spirv64) {
|
||||
if (target.cpu.arch.isSpirV()) {
|
||||
if (!std.Target.spirv.featureSetHas(target.cpu.features, .storage_push_constant16)) {
|
||||
return 32;
|
||||
}
|
||||
|
||||
+286
-440
@@ -30,6 +30,7 @@ const SpvAssembler = @import("spirv/Assembler.zig");
|
||||
const InstMap = std.AutoHashMapUnmanaged(Air.Inst.Index, IdRef);
|
||||
|
||||
pub const zig_call_abi_ver = 3;
|
||||
pub const big_int_bits = 32;
|
||||
|
||||
const InternMap = std.AutoHashMapUnmanaged(struct { InternPool.Index, NavGen.Repr }, IdResult);
|
||||
const PtrTypeMap = std.AutoHashMapUnmanaged(
|
||||
@@ -169,12 +170,10 @@ pub const Object = struct {
|
||||
/// via the usual `intern_map` mechanism.
|
||||
ptr_types: PtrTypeMap = .{},
|
||||
|
||||
/// For test declarations for Vulkan, we have to add a push constant with a pointer to a
|
||||
/// buffer that we can use. We only need to generate this once, this holds the link information
|
||||
/// For test declarations for Vulkan, we have to add a buffer.
|
||||
/// We only need to generate this once, this holds the link information
|
||||
/// related to that.
|
||||
error_push_constant: ?struct {
|
||||
push_constant_ptr: SpvModule.Decl.Index,
|
||||
} = null,
|
||||
error_buffer: ?SpvModule.Decl.Index = null,
|
||||
|
||||
pub fn init(gpa: Allocator, target: std.Target) Object {
|
||||
return .{
|
||||
@@ -344,8 +343,7 @@ const NavGen = struct {
|
||||
|
||||
/// This structure is used to return information about a type typically used for
|
||||
/// arithmetic operations. These types may either be integers, floats, or a vector
|
||||
/// of these. Most scalar operations also work on vectors, so we can easily represent
|
||||
/// those as arithmetic types. If the type is a scalar, 'inner type' refers to the
|
||||
/// of these. If the type is a scalar, 'inner type' refers to the
|
||||
/// scalar type. Otherwise, if its a vector, it refers to the vector's element type.
|
||||
const ArithmeticTypeInfo = struct {
|
||||
/// A classification of the inner type.
|
||||
@@ -379,7 +377,7 @@ const NavGen = struct {
|
||||
/// The number of bits required to store the type.
|
||||
/// For `integer` and `float`, this is equal to `bits`.
|
||||
/// For `strange_integer` and `bool` this is the size of the backing integer.
|
||||
/// For `composite_integer` this is 0 (TODO)
|
||||
/// For `composite_integer` this is the elements count.
|
||||
backing_bits: u16,
|
||||
|
||||
/// Null if this type is a scalar, or the length
|
||||
@@ -582,11 +580,13 @@ const NavGen = struct {
|
||||
/// The backing type will be chosen as the smallest supported integer larger or equal to it in number of bits.
|
||||
/// The result is valid to be used with OpTypeInt.
|
||||
/// TODO: Should the result of this function be cached?
|
||||
fn backingIntBits(self: *NavGen, bits: u16) ?u16 {
|
||||
fn backingIntBits(self: *NavGen, bits: u16) struct { u16, bool } {
|
||||
// The backend will never be asked to compiler a 0-bit integer, so we won't have to handle those in this function.
|
||||
assert(bits != 0);
|
||||
|
||||
if (self.spv.hasFeature(.arbitrary_precision_integers) and bits <= 32) return bits;
|
||||
if (self.spv.hasFeature(.arbitrary_precision_integers) and bits <= 32) {
|
||||
return .{ bits, false };
|
||||
}
|
||||
|
||||
// We require Int8 and Int16 capabilities and benefit Int64 when available.
|
||||
// 32-bit integers are always supported (see spec, 2.16.1, Data rules).
|
||||
@@ -599,10 +599,11 @@ const NavGen = struct {
|
||||
|
||||
for (ints) |int| {
|
||||
const has_feature = if (int.feature) |feature| self.spv.hasFeature(feature) else true;
|
||||
if (bits <= int.bits and has_feature) return int.bits;
|
||||
if (bits <= int.bits and has_feature) return .{ int.bits, false };
|
||||
}
|
||||
|
||||
return null;
|
||||
// Big int
|
||||
return .{ std.mem.alignForward(u16, bits, big_int_bits), true };
|
||||
}
|
||||
|
||||
/// Return the amount of bits in the largest supported integer type. This is either 32 (always supported), or 64 (if
|
||||
@@ -615,11 +616,48 @@ const NavGen = struct {
|
||||
return if (self.spv.hasFeature(.int64)) 64 else 32;
|
||||
}
|
||||
|
||||
/// Checks whether the type is "composite int", an integer consisting of multiple native integers. These are represented by
|
||||
/// arrays of largestSupportedIntBits().
|
||||
/// Asserts `ty` is an integer.
|
||||
fn isCompositeInt(self: *NavGen, ty: Type) bool {
|
||||
return self.backingIntBits(ty) == null;
|
||||
fn arithmeticTypeInfo(self: *NavGen, ty: Type) ArithmeticTypeInfo {
|
||||
const zcu = self.pt.zcu;
|
||||
const target = self.spv.target;
|
||||
var scalar_ty = ty.scalarType(zcu);
|
||||
if (scalar_ty.zigTypeTag(zcu) == .@"enum") {
|
||||
scalar_ty = scalar_ty.intTagType(zcu);
|
||||
}
|
||||
const vector_len = if (ty.isVector(zcu)) ty.vectorLen(zcu) else null;
|
||||
return switch (scalar_ty.zigTypeTag(zcu)) {
|
||||
.bool => .{
|
||||
.bits = 1, // Doesn't matter for this class.
|
||||
.backing_bits = self.backingIntBits(1).@"0",
|
||||
.vector_len = vector_len,
|
||||
.signedness = .unsigned, // Technically, but doesn't matter for this class.
|
||||
.class = .bool,
|
||||
},
|
||||
.float => .{
|
||||
.bits = scalar_ty.floatBits(target),
|
||||
.backing_bits = scalar_ty.floatBits(target), // TODO: F80?
|
||||
.vector_len = vector_len,
|
||||
.signedness = .signed, // Technically, but doesn't matter for this class.
|
||||
.class = .float,
|
||||
},
|
||||
.int => blk: {
|
||||
const int_info = scalar_ty.intInfo(zcu);
|
||||
// TODO: Maybe it's useful to also return this value.
|
||||
const backing_bits, const big_int = self.backingIntBits(int_info.bits);
|
||||
break :blk .{
|
||||
.bits = int_info.bits,
|
||||
.backing_bits = backing_bits,
|
||||
.vector_len = vector_len,
|
||||
.signedness = int_info.signedness,
|
||||
.class = class: {
|
||||
if (big_int) break :class .composite_integer;
|
||||
break :class if (backing_bits == int_info.bits) .integer else .strange_integer;
|
||||
},
|
||||
};
|
||||
},
|
||||
.@"enum" => unreachable,
|
||||
.vector => unreachable,
|
||||
else => unreachable, // Unhandled arithmetic type
|
||||
};
|
||||
}
|
||||
|
||||
/// Checks whether the type can be directly translated to SPIR-V vectors
|
||||
@@ -650,53 +688,6 @@ const NavGen = struct {
|
||||
return false;
|
||||
}
|
||||
|
||||
fn arithmeticTypeInfo(self: *NavGen, ty: Type) ArithmeticTypeInfo {
|
||||
const zcu = self.pt.zcu;
|
||||
const target = self.spv.target;
|
||||
var scalar_ty = ty.scalarType(zcu);
|
||||
if (scalar_ty.zigTypeTag(zcu) == .@"enum") {
|
||||
scalar_ty = scalar_ty.intTagType(zcu);
|
||||
}
|
||||
const vector_len = if (ty.isVector(zcu)) ty.vectorLen(zcu) else null;
|
||||
return switch (scalar_ty.zigTypeTag(zcu)) {
|
||||
.bool => ArithmeticTypeInfo{
|
||||
.bits = 1, // Doesn't matter for this class.
|
||||
.backing_bits = self.backingIntBits(1).?,
|
||||
.vector_len = vector_len,
|
||||
.signedness = .unsigned, // Technically, but doesn't matter for this class.
|
||||
.class = .bool,
|
||||
},
|
||||
.float => ArithmeticTypeInfo{
|
||||
.bits = scalar_ty.floatBits(target),
|
||||
.backing_bits = scalar_ty.floatBits(target), // TODO: F80?
|
||||
.vector_len = vector_len,
|
||||
.signedness = .signed, // Technically, but doesn't matter for this class.
|
||||
.class = .float,
|
||||
},
|
||||
.int => blk: {
|
||||
const int_info = scalar_ty.intInfo(zcu);
|
||||
// TODO: Maybe it's useful to also return this value.
|
||||
const maybe_backing_bits = self.backingIntBits(int_info.bits);
|
||||
break :blk ArithmeticTypeInfo{
|
||||
.bits = int_info.bits,
|
||||
.backing_bits = maybe_backing_bits orelse 0,
|
||||
.vector_len = vector_len,
|
||||
.signedness = int_info.signedness,
|
||||
.class = if (maybe_backing_bits) |backing_bits|
|
||||
if (backing_bits == int_info.bits)
|
||||
ArithmeticTypeInfo.Class.integer
|
||||
else
|
||||
ArithmeticTypeInfo.Class.strange_integer
|
||||
else
|
||||
.composite_integer,
|
||||
};
|
||||
},
|
||||
.@"enum" => unreachable,
|
||||
.vector => unreachable,
|
||||
else => unreachable, // Unhandled arithmetic type
|
||||
};
|
||||
}
|
||||
|
||||
/// Emits a bool constant in a particular representation.
|
||||
fn constBool(self: *NavGen, value: bool, repr: Repr) !IdRef {
|
||||
return switch (repr) {
|
||||
@@ -713,14 +704,26 @@ const NavGen = struct {
|
||||
const scalar_ty = ty.scalarType(zcu);
|
||||
const int_info = scalar_ty.intInfo(zcu);
|
||||
// Use backing bits so that negatives are sign extended
|
||||
const backing_bits = self.backingIntBits(int_info.bits).?; // Assertion failure means big int
|
||||
const backing_bits, const big_int = self.backingIntBits(int_info.bits);
|
||||
assert(backing_bits != 0); // u0 is comptime
|
||||
|
||||
const result_ty_id = try self.resolveType(scalar_ty, .indirect);
|
||||
const signedness: Signedness = switch (@typeInfo(@TypeOf(value))) {
|
||||
.int => |int| int.signedness,
|
||||
.comptime_int => if (value < 0) .signed else .unsigned,
|
||||
else => unreachable,
|
||||
};
|
||||
if (@sizeOf(@TypeOf(value)) >= 4 and big_int) {
|
||||
const value64: u64 = switch (signedness) {
|
||||
.signed => @bitCast(@as(i64, @intCast(value))),
|
||||
.unsigned => @as(u64, @intCast(value)),
|
||||
};
|
||||
assert(backing_bits == 64);
|
||||
return self.constructComposite(result_ty_id, &.{
|
||||
try self.constInt(.u32, @as(u32, @truncate(value64))),
|
||||
try self.constInt(.u32, @as(u32, @truncate(value64 << 32))),
|
||||
});
|
||||
}
|
||||
|
||||
const final_value: spec.LiteralContextDependentNumber = blk: {
|
||||
if (self.spv.hasFeature(.kernel)) {
|
||||
@@ -738,18 +741,17 @@ const NavGen = struct {
|
||||
break :blk switch (backing_bits) {
|
||||
1...32 => .{ .uint32 = @truncate(truncated_value) },
|
||||
33...64 => .{ .uint64 = truncated_value },
|
||||
else => unreachable, // TODO: Large integer constants
|
||||
else => unreachable,
|
||||
};
|
||||
}
|
||||
|
||||
break :blk switch (backing_bits) {
|
||||
1...32 => if (signedness == .signed) .{ .int32 = @intCast(value) } else .{ .uint32 = @intCast(value) },
|
||||
33...64 => if (signedness == .signed) .{ .int64 = value } else .{ .uint64 = value },
|
||||
else => unreachable, // TODO: Large integer constants
|
||||
else => unreachable,
|
||||
};
|
||||
};
|
||||
|
||||
const result_ty_id = try self.resolveType(scalar_ty, .indirect);
|
||||
const result_id = try self.spv.constant(result_ty_id, final_value);
|
||||
|
||||
if (!ty.isVector(zcu)) return result_id;
|
||||
@@ -987,7 +989,7 @@ const NavGen = struct {
|
||||
// TODO: composite int
|
||||
// TODO: endianness
|
||||
const bits: u16 = @intCast(ty.bitSize(zcu));
|
||||
const bytes = std.mem.alignForward(u16, self.backingIntBits(bits).?, 8) / 8;
|
||||
const bytes = std.mem.alignForward(u16, self.backingIntBits(bits).@"0", 8) / 8;
|
||||
var limbs: [8]u8 = undefined;
|
||||
@memset(&limbs, 0);
|
||||
val.writeToPackedMemory(ty, pt, limbs[0..bytes], 0) catch unreachable;
|
||||
@@ -1106,19 +1108,11 @@ const NavGen = struct {
|
||||
const parent_ptr_id = try self.derivePtr(oac.parent.*);
|
||||
const parent_ptr_ty = try oac.parent.ptrType(pt);
|
||||
const result_ty_id = try self.resolveType(oac.new_ptr_ty, .direct);
|
||||
const child_size = oac.new_ptr_ty.childType(zcu).abiSize(zcu);
|
||||
|
||||
if (oac.byte_offset != 0) {
|
||||
const child_size = oac.new_ptr_ty.childType(zcu).abiSize(zcu);
|
||||
if (oac.byte_offset % child_size != 0) {
|
||||
return self.fail("cannot perform pointer cast: '{}' to '{}'", .{
|
||||
parent_ptr_ty.fmt(pt),
|
||||
oac.new_ptr_ty.fmt(pt),
|
||||
});
|
||||
}
|
||||
|
||||
if (parent_ptr_ty.childType(zcu).isVector(zcu) and oac.byte_offset % child_size == 0) {
|
||||
// Vector element ptr accesses are derived as offset_and_cast.
|
||||
// We can just use OpAccessChain.
|
||||
assert(parent_ptr_ty.childType(zcu).zigTypeTag(zcu) == .vector);
|
||||
return self.accessChain(
|
||||
result_ty_id,
|
||||
parent_ptr_id,
|
||||
@@ -1126,15 +1120,22 @@ const NavGen = struct {
|
||||
);
|
||||
}
|
||||
|
||||
// Allow changing the pointer type child only to restructure arrays.
|
||||
// e.g. [3][2]T to T is fine, as is [2]T -> [2][1]T.
|
||||
const result_ptr_id = self.spv.allocId();
|
||||
try self.func.body.emit(self.spv.gpa, .OpBitcast, .{
|
||||
.id_result_type = result_ty_id,
|
||||
.id_result = result_ptr_id,
|
||||
.operand = parent_ptr_id,
|
||||
if (oac.byte_offset == 0) {
|
||||
// Allow changing the pointer type child only to restructure arrays.
|
||||
// e.g. [3][2]T to T is fine, as is [2]T -> [2][1]T.
|
||||
const result_ptr_id = self.spv.allocId();
|
||||
try self.func.body.emit(self.spv.gpa, .OpBitcast, .{
|
||||
.id_result_type = result_ty_id,
|
||||
.id_result = result_ptr_id,
|
||||
.operand = parent_ptr_id,
|
||||
});
|
||||
return result_ptr_id;
|
||||
}
|
||||
|
||||
return self.fail("cannot perform pointer cast: '{}' to '{}'", .{
|
||||
parent_ptr_ty.fmt(pt),
|
||||
oac.new_ptr_ty.fmt(pt),
|
||||
});
|
||||
return result_ptr_id;
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -1255,11 +1256,14 @@ const NavGen = struct {
|
||||
/// actual operations (as well as store) a Zig type of a particular number of bits. To create
|
||||
/// a type with an exact size, use SpvModule.intType.
|
||||
fn intType(self: *NavGen, signedness: std.builtin.Signedness, bits: u16) !IdRef {
|
||||
const backing_bits = self.backingIntBits(bits) orelse {
|
||||
// TODO: Integers too big for any native type are represented as "composite integers":
|
||||
// An array of largestSupportedIntBits.
|
||||
return self.todo("Implement {s} composite int type of {} bits", .{ @tagName(signedness), bits });
|
||||
};
|
||||
const backing_bits, const big_int = self.backingIntBits(bits);
|
||||
if (big_int) {
|
||||
if (backing_bits > 64) {
|
||||
return self.fail("composite integers larger than 64bit aren't supported", .{});
|
||||
}
|
||||
const int_ty = try self.resolveType(.u32, .direct);
|
||||
return self.arrayType(backing_bits / big_int_bits, int_ty);
|
||||
}
|
||||
|
||||
// Kernel only supports unsigned ints.
|
||||
if (self.spv.hasFeature(.kernel)) {
|
||||
@@ -1338,19 +1342,6 @@ const NavGen = struct {
|
||||
return self.spv.functionType(return_ty_id, param_ids);
|
||||
}
|
||||
|
||||
fn zigScalarOrVectorTypeLike(self: *NavGen, new_ty: Type, base_ty: Type) !Type {
|
||||
const pt = self.pt;
|
||||
const new_scalar_ty = new_ty.scalarType(pt.zcu);
|
||||
if (!base_ty.isVector(pt.zcu)) {
|
||||
return new_scalar_ty;
|
||||
}
|
||||
|
||||
return try pt.vectorType(.{
|
||||
.len = base_ty.vectorLen(pt.zcu),
|
||||
.child = new_scalar_ty.toIntern(),
|
||||
});
|
||||
}
|
||||
|
||||
/// Generate a union type. Union types are always generated with the
|
||||
/// most aligned field active. If the tag alignment is greater
|
||||
/// than that of the payload, a regular union (non-packed, with both tag and
|
||||
@@ -1560,6 +1551,17 @@ const NavGen = struct {
|
||||
return result_id;
|
||||
}
|
||||
},
|
||||
.vector => {
|
||||
const elem_ty = ty.childType(zcu);
|
||||
const elem_ty_id = try self.resolveType(elem_ty, repr);
|
||||
const len = ty.vectorLen(zcu);
|
||||
|
||||
if (self.isSpvVector(ty)) {
|
||||
return try self.spv.vectorType(len, elem_ty_id);
|
||||
} else {
|
||||
return try self.arrayType(len, elem_ty_id);
|
||||
}
|
||||
},
|
||||
.@"fn" => switch (repr) {
|
||||
.direct => {
|
||||
const fn_info = zcu.typeToFunc(ty).?;
|
||||
@@ -1628,17 +1630,6 @@ const NavGen = struct {
|
||||
);
|
||||
return result_id;
|
||||
},
|
||||
.vector => {
|
||||
const elem_ty = ty.childType(zcu);
|
||||
const elem_ty_id = try self.resolveType(elem_ty, repr);
|
||||
const len = ty.vectorLen(zcu);
|
||||
|
||||
if (self.isSpvVector(ty)) {
|
||||
return try self.spv.vectorType(len, elem_ty_id);
|
||||
} else {
|
||||
return try self.arrayType(len, elem_ty_id);
|
||||
}
|
||||
},
|
||||
.@"struct" => {
|
||||
const struct_type = switch (ip.indexToKey(ty.toIntern())) {
|
||||
.tuple_type => |tuple| {
|
||||
@@ -1793,15 +1784,34 @@ const NavGen = struct {
|
||||
fn spvStorageClass(self: *NavGen, as: std.builtin.AddressSpace) StorageClass {
|
||||
return switch (as) {
|
||||
.generic => if (self.spv.hasFeature(.generic_pointer)) .Generic else .Function,
|
||||
.global => {
|
||||
if (self.spv.hasFeature(.kernel)) return .CrossWorkgroup;
|
||||
return .StorageBuffer;
|
||||
},
|
||||
.push_constant => {
|
||||
assert(self.spv.hasFeature(.shader));
|
||||
return .PushConstant;
|
||||
},
|
||||
.output => {
|
||||
assert(self.spv.hasFeature(.shader));
|
||||
return .Output;
|
||||
},
|
||||
.uniform => {
|
||||
assert(self.spv.hasFeature(.shader));
|
||||
return .Uniform;
|
||||
},
|
||||
.storage_buffer => {
|
||||
assert(self.spv.hasFeature(.shader));
|
||||
return .StorageBuffer;
|
||||
},
|
||||
.physical_storage_buffer => {
|
||||
assert(self.spv.hasFeature(.physical_storage_buffer));
|
||||
return .PhysicalStorageBuffer;
|
||||
},
|
||||
.constant => .UniformConstant,
|
||||
.shared => .Workgroup,
|
||||
.local => .Function,
|
||||
.global => if (self.spv.hasFeature(.shader)) .PhysicalStorageBuffer else .CrossWorkgroup,
|
||||
.constant => .UniformConstant,
|
||||
.push_constant => .PushConstant,
|
||||
.input => .Input,
|
||||
.output => .Output,
|
||||
.uniform => .Uniform,
|
||||
.storage_buffer => .StorageBuffer,
|
||||
.gs,
|
||||
.fs,
|
||||
.ss,
|
||||
@@ -2035,69 +2045,32 @@ const NavGen = struct {
|
||||
const Vectorization = union(enum) {
|
||||
/// This is an operation between scalars.
|
||||
scalar,
|
||||
/// This is an operation between SPIR-V vectors.
|
||||
/// Value is number of components.
|
||||
spv_vectorized: u32,
|
||||
/// This operation is unrolled into separate operations.
|
||||
/// Inputs may still be SPIR-V vectors, for example,
|
||||
/// when the operation can't be vectorized in SPIR-V.
|
||||
/// Value is number of components.
|
||||
unrolled: u32,
|
||||
|
||||
/// Derive a vectorization from a particular type. This usually
|
||||
/// only checks the size, but the source-of-truth is implemented
|
||||
/// by `isSpvVector()`.
|
||||
/// Derive a vectorization from a particular type
|
||||
fn fromType(ty: Type, ng: *NavGen) Vectorization {
|
||||
const zcu = ng.pt.zcu;
|
||||
if (!ty.isVector(zcu)) {
|
||||
return .scalar;
|
||||
} else if (ng.isSpvVector(ty)) {
|
||||
return .{ .spv_vectorized = ty.vectorLen(zcu) };
|
||||
} else {
|
||||
return .{ .unrolled = ty.vectorLen(zcu) };
|
||||
}
|
||||
if (!ty.isVector(zcu)) return .scalar;
|
||||
return .{ .unrolled = ty.vectorLen(zcu) };
|
||||
}
|
||||
|
||||
/// Given two vectorization methods, compute a "unification": a fallback
|
||||
/// that works for both, according to the following rules:
|
||||
/// - Scalars may broadcast
|
||||
/// - SPIR-V vectorized operations may unroll
|
||||
/// - Prefer scalar > SPIR-V vectorized > unrolled
|
||||
/// - SPIR-V vectorized operations will unroll
|
||||
/// - Prefer scalar > unrolled
|
||||
fn unify(a: Vectorization, b: Vectorization) Vectorization {
|
||||
if (a == .scalar and b == .scalar) {
|
||||
return .scalar;
|
||||
} else if (a == .spv_vectorized and b == .spv_vectorized) {
|
||||
assert(a.components() == b.components());
|
||||
return .{ .spv_vectorized = a.components() };
|
||||
} else if (a == .unrolled or b == .unrolled) {
|
||||
if (a == .unrolled and b == .unrolled) {
|
||||
assert(a.components() == b.components());
|
||||
return .{ .unrolled = a.components() };
|
||||
} else if (a == .unrolled) {
|
||||
return .{ .unrolled = a.components() };
|
||||
} else if (b == .unrolled) {
|
||||
return .{ .unrolled = b.components() };
|
||||
} else {
|
||||
unreachable;
|
||||
}
|
||||
} else {
|
||||
if (a == .spv_vectorized) {
|
||||
return .{ .spv_vectorized = a.components() };
|
||||
} else if (b == .spv_vectorized) {
|
||||
return .{ .spv_vectorized = b.components() };
|
||||
} else {
|
||||
unreachable;
|
||||
}
|
||||
if (a == .scalar and b == .scalar) return .scalar;
|
||||
if (a == .unrolled or b == .unrolled) {
|
||||
if (a == .unrolled and b == .unrolled) assert(a.components() == b.components());
|
||||
if (a == .unrolled) return .{ .unrolled = a.components() };
|
||||
return .{ .unrolled = b.components() };
|
||||
}
|
||||
}
|
||||
|
||||
/// Force this vectorization to be unrolled, if its
|
||||
/// an operation involving vectors.
|
||||
fn unroll(self: Vectorization) Vectorization {
|
||||
return switch (self) {
|
||||
.scalar, .unrolled => self,
|
||||
.spv_vectorized => |n| .{ .unrolled = n },
|
||||
};
|
||||
unreachable;
|
||||
}
|
||||
|
||||
/// Query the number of components that inputs of this operation have.
|
||||
@@ -2106,35 +2079,10 @@ const NavGen = struct {
|
||||
fn components(self: Vectorization) u32 {
|
||||
return switch (self) {
|
||||
.scalar => 1,
|
||||
.spv_vectorized => |n| n,
|
||||
.unrolled => |n| n,
|
||||
};
|
||||
}
|
||||
|
||||
/// Query the number of operations involving this vectorization.
|
||||
/// This is basically the number of components, except that SPIR-V vectorized
|
||||
/// operations only need a single SPIR-V instruction.
|
||||
fn operations(self: Vectorization) u32 {
|
||||
return switch (self) {
|
||||
.scalar, .spv_vectorized => 1,
|
||||
.unrolled => |n| n,
|
||||
};
|
||||
}
|
||||
|
||||
/// Turns `ty` into the result-type of an individual vector operation.
|
||||
/// `ty` may be a scalar or vector, it doesn't matter.
|
||||
fn operationType(self: Vectorization, ng: *NavGen, ty: Type) !Type {
|
||||
const pt = ng.pt;
|
||||
const scalar_ty = ty.scalarType(pt.zcu);
|
||||
return switch (self) {
|
||||
.scalar, .unrolled => scalar_ty,
|
||||
.spv_vectorized => |n| try pt.vectorType(.{
|
||||
.len = n,
|
||||
.child = scalar_ty.toIntern(),
|
||||
}),
|
||||
};
|
||||
}
|
||||
|
||||
/// Turns `ty` into the result-type of the entire operation.
|
||||
/// `ty` may be a scalar or vector, it doesn't matter.
|
||||
fn resultType(self: Vectorization, ng: *NavGen, ty: Type) !Type {
|
||||
@@ -2142,10 +2090,7 @@ const NavGen = struct {
|
||||
const scalar_ty = ty.scalarType(pt.zcu);
|
||||
return switch (self) {
|
||||
.scalar => scalar_ty,
|
||||
.unrolled, .spv_vectorized => |n| try pt.vectorType(.{
|
||||
.len = n,
|
||||
.child = scalar_ty.toIntern(),
|
||||
}),
|
||||
.unrolled => |n| try pt.vectorType(.{ .len = n, .child = scalar_ty.toIntern() }),
|
||||
};
|
||||
}
|
||||
|
||||
@@ -2155,51 +2100,19 @@ const NavGen = struct {
|
||||
fn prepare(self: Vectorization, ng: *NavGen, tmp: Temporary) !PreparedOperand {
|
||||
const pt = ng.pt;
|
||||
const is_vector = tmp.ty.isVector(pt.zcu);
|
||||
const is_spv_vector = ng.isSpvVector(tmp.ty);
|
||||
const value: PreparedOperand.Value = switch (tmp.value) {
|
||||
.singleton => |id| switch (self) {
|
||||
.scalar => blk: {
|
||||
assert(!is_vector);
|
||||
break :blk .{ .scalar = id };
|
||||
},
|
||||
.spv_vectorized => blk: {
|
||||
if (is_vector) {
|
||||
assert(is_spv_vector);
|
||||
break :blk .{ .spv_vectorwise = id };
|
||||
}
|
||||
|
||||
// Broadcast scalar into vector.
|
||||
const vector_ty = try pt.vectorType(.{
|
||||
.len = self.components(),
|
||||
.child = tmp.ty.toIntern(),
|
||||
});
|
||||
|
||||
const vector = try ng.constructCompositeSplat(vector_ty, id);
|
||||
return .{
|
||||
.ty = vector_ty,
|
||||
.value = .{ .spv_vectorwise = vector },
|
||||
};
|
||||
},
|
||||
.unrolled => blk: {
|
||||
if (is_vector) {
|
||||
break :blk .{ .vector_exploded = try tmp.explode(ng) };
|
||||
} else {
|
||||
break :blk .{ .scalar_broadcast = id };
|
||||
}
|
||||
if (is_vector) break :blk .{ .vector_exploded = try tmp.explode(ng) };
|
||||
break :blk .{ .scalar_broadcast = id };
|
||||
},
|
||||
},
|
||||
.exploded_vector => |range| switch (self) {
|
||||
.scalar => unreachable,
|
||||
.spv_vectorized => |n| blk: {
|
||||
// We can vectorize this operation, but we have an exploded vector. This can happen
|
||||
// when a vectorizable operation succeeds a non-vectorizable operation. In this case,
|
||||
// pack up the IDs into a SPIR-V vector. This path should not be able to be hit with
|
||||
// a type that cannot do that.
|
||||
assert(is_spv_vector);
|
||||
assert(range.len == n);
|
||||
const vec = try tmp.materialize(ng);
|
||||
break :blk .{ .spv_vectorwise = vec };
|
||||
},
|
||||
.unrolled => |n| blk: {
|
||||
assert(range.len == n);
|
||||
break :blk .{ .vector_exploded = range };
|
||||
@@ -2216,17 +2129,14 @@ const NavGen = struct {
|
||||
/// Finalize the results of an operation back into a temporary. `results` is
|
||||
/// a list of result-ids of the operation.
|
||||
fn finalize(self: Vectorization, ty: Type, results: IdRange) Temporary {
|
||||
assert(self.operations() == results.len);
|
||||
const value: Temporary.Value = switch (self) {
|
||||
.scalar, .spv_vectorized => blk: {
|
||||
break :blk .{ .singleton = results.at(0) };
|
||||
},
|
||||
.unrolled => blk: {
|
||||
break :blk .{ .exploded_vector = results };
|
||||
assert(self.components() == results.len);
|
||||
return .{
|
||||
.ty = ty,
|
||||
.value = switch (self) {
|
||||
.scalar => .{ .singleton = results.at(0) },
|
||||
.unrolled => .{ .exploded_vector = results },
|
||||
},
|
||||
};
|
||||
|
||||
return .{ .ty = ty, .value = value };
|
||||
}
|
||||
|
||||
/// This struct represents an operand that has gone through some setup, and is
|
||||
@@ -2242,32 +2152,20 @@ const NavGen = struct {
|
||||
scalar: IdResult,
|
||||
/// A single scalar that is broadcasted in an unrolled operation.
|
||||
scalar_broadcast: IdResult,
|
||||
/// A SPIR-V vector that is used in SPIR-V vectorize operation.
|
||||
spv_vectorwise: IdResult,
|
||||
/// A vector represented by a consecutive list of IDs that is used in an unrolled operation.
|
||||
vector_exploded: IdRange,
|
||||
};
|
||||
|
||||
/// Query the value at a particular index of the operation. Note that
|
||||
/// the index is *not* the component/lane, but the index of the *operation*. When
|
||||
/// this operation is vectorized, the return value of this function is a SPIR-V vector.
|
||||
/// See also `Vectorization.operations()`.
|
||||
/// the index is *not* the component/lane, but the index of the *operation*.
|
||||
fn at(self: PreparedOperand, i: usize) IdResult {
|
||||
switch (self.value) {
|
||||
.scalar => |id| {
|
||||
assert(i == 0);
|
||||
return id;
|
||||
},
|
||||
.scalar_broadcast => |id| {
|
||||
return id;
|
||||
},
|
||||
.spv_vectorwise => |id| {
|
||||
assert(i == 0);
|
||||
return id;
|
||||
},
|
||||
.vector_exploded => |range| {
|
||||
return range.at(i);
|
||||
},
|
||||
.scalar_broadcast => |id| return id,
|
||||
.vector_exploded => |range| return range.at(i),
|
||||
}
|
||||
}
|
||||
};
|
||||
@@ -2299,7 +2197,7 @@ const NavGen = struct {
|
||||
|
||||
/// This function builds an OpSConvert of OpUConvert depending on the
|
||||
/// signedness of the types.
|
||||
fn buildIntConvert(self: *NavGen, dst_ty: Type, src: Temporary) !Temporary {
|
||||
fn buildConvert(self: *NavGen, dst_ty: Type, src: Temporary) !Temporary {
|
||||
const zcu = self.pt.zcu;
|
||||
|
||||
const dst_ty_id = try self.resolveType(dst_ty.scalarType(zcu), .direct);
|
||||
@@ -2318,13 +2216,17 @@ const NavGen = struct {
|
||||
return src.pun(result_ty);
|
||||
}
|
||||
|
||||
const ops = v.operations();
|
||||
const ops = v.components();
|
||||
const results = self.spv.allocIds(ops);
|
||||
|
||||
const op_result_ty = try v.operationType(self, dst_ty);
|
||||
const op_result_ty = dst_ty.scalarType(zcu);
|
||||
const op_result_ty_id = try self.resolveType(op_result_ty, .direct);
|
||||
|
||||
const opcode: Opcode = if (dst_ty.isSignedInt(zcu)) .OpSConvert else .OpUConvert;
|
||||
const opcode: Opcode = blk: {
|
||||
if (dst_ty.scalarType(zcu).isAnyFloat()) break :blk .OpFConvert;
|
||||
if (dst_ty.scalarType(zcu).isSignedInt(zcu)) break :blk .OpSConvert;
|
||||
break :blk .OpUConvert;
|
||||
};
|
||||
|
||||
const op_src = try v.prepare(self, src);
|
||||
|
||||
@@ -2339,13 +2241,14 @@ const NavGen = struct {
|
||||
}
|
||||
|
||||
fn buildFma(self: *NavGen, a: Temporary, b: Temporary, c: Temporary) !Temporary {
|
||||
const zcu = self.pt.zcu;
|
||||
const target = self.spv.target;
|
||||
|
||||
const v = self.vectorization(.{ a, b, c });
|
||||
const ops = v.operations();
|
||||
const ops = v.components();
|
||||
const results = self.spv.allocIds(ops);
|
||||
|
||||
const op_result_ty = try v.operationType(self, a.ty);
|
||||
const op_result_ty = a.ty.scalarType(zcu);
|
||||
const op_result_ty_id = try self.resolveType(op_result_ty, .direct);
|
||||
const result_ty = try v.resultType(self, a.ty);
|
||||
|
||||
@@ -2382,10 +2285,10 @@ const NavGen = struct {
|
||||
const zcu = self.pt.zcu;
|
||||
|
||||
const v = self.vectorization(.{ condition, lhs, rhs });
|
||||
const ops = v.operations();
|
||||
const ops = v.components();
|
||||
const results = self.spv.allocIds(ops);
|
||||
|
||||
const op_result_ty = try v.operationType(self, lhs.ty);
|
||||
const op_result_ty = lhs.ty.scalarType(zcu);
|
||||
const op_result_ty_id = try self.resolveType(op_result_ty, .direct);
|
||||
const result_ty = try v.resultType(self, lhs.ty);
|
||||
|
||||
@@ -2431,10 +2334,10 @@ const NavGen = struct {
|
||||
|
||||
fn buildCmp(self: *NavGen, pred: CmpPredicate, lhs: Temporary, rhs: Temporary) !Temporary {
|
||||
const v = self.vectorization(.{ lhs, rhs });
|
||||
const ops = v.operations();
|
||||
const ops = v.components();
|
||||
const results = self.spv.allocIds(ops);
|
||||
|
||||
const op_result_ty = try v.operationType(self, Type.bool);
|
||||
const op_result_ty: Type = .bool;
|
||||
const op_result_ty_id = try self.resolveType(op_result_ty, .direct);
|
||||
const result_ty = try v.resultType(self, Type.bool);
|
||||
|
||||
@@ -2498,22 +2401,12 @@ const NavGen = struct {
|
||||
};
|
||||
|
||||
fn buildUnary(self: *NavGen, op: UnaryOp, operand: Temporary) !Temporary {
|
||||
const zcu = self.pt.zcu;
|
||||
const target = self.spv.target;
|
||||
const v = blk: {
|
||||
const v = self.vectorization(.{operand});
|
||||
break :blk switch (op) {
|
||||
// TODO: These instructions don't seem to be working
|
||||
// properly for LLVM-based backends on OpenCL for 8- and
|
||||
// 16-component vectors.
|
||||
.i_abs => if (self.spv.hasFeature(.vector16) and v.components() >= 8) v.unroll() else v,
|
||||
else => v,
|
||||
};
|
||||
};
|
||||
|
||||
const ops = v.operations();
|
||||
const v = self.vectorization(.{operand});
|
||||
const ops = v.components();
|
||||
const results = self.spv.allocIds(ops);
|
||||
|
||||
const op_result_ty = try v.operationType(self, operand.ty);
|
||||
const op_result_ty = operand.ty.scalarType(zcu);
|
||||
const op_result_ty_id = try self.resolveType(op_result_ty, .direct);
|
||||
const result_ty = try v.resultType(self, operand.ty);
|
||||
|
||||
@@ -2628,13 +2521,14 @@ const NavGen = struct {
|
||||
};
|
||||
|
||||
fn buildBinary(self: *NavGen, op: BinaryOp, lhs: Temporary, rhs: Temporary) !Temporary {
|
||||
const zcu = self.pt.zcu;
|
||||
const target = self.spv.target;
|
||||
|
||||
const v = self.vectorization(.{ lhs, rhs });
|
||||
const ops = v.operations();
|
||||
const ops = v.components();
|
||||
const results = self.spv.allocIds(ops);
|
||||
|
||||
const op_result_ty = try v.operationType(self, lhs.ty);
|
||||
const op_result_ty = lhs.ty.scalarType(zcu);
|
||||
const op_result_ty_id = try self.resolveType(op_result_ty, .direct);
|
||||
const result_ty = try v.resultType(self, lhs.ty);
|
||||
|
||||
@@ -2730,9 +2624,9 @@ const NavGen = struct {
|
||||
const ip = &zcu.intern_pool;
|
||||
|
||||
const v = lhs.vectorization(self).unify(rhs.vectorization(self));
|
||||
const ops = v.operations();
|
||||
const ops = v.components();
|
||||
|
||||
const arith_op_ty = try v.operationType(self, lhs.ty);
|
||||
const arith_op_ty = lhs.ty.scalarType(zcu);
|
||||
const arith_op_ty_id = try self.resolveType(arith_op_ty, .direct);
|
||||
|
||||
const lhs_op = try v.prepare(self, lhs);
|
||||
@@ -2883,38 +2777,32 @@ const NavGen = struct {
|
||||
});
|
||||
},
|
||||
.vulkan, .opengl => {
|
||||
const ptr_ptr_anyerror_ty_id = self.spv.allocId();
|
||||
try self.spv.sections.types_globals_constants.emit(self.spv.gpa, .OpTypePointer, .{
|
||||
.id_result = ptr_ptr_anyerror_ty_id,
|
||||
.storage_class = .PushConstant,
|
||||
.type = ptr_anyerror_ty_id,
|
||||
});
|
||||
|
||||
if (self.object.error_push_constant == null) {
|
||||
if (self.object.error_buffer == null) {
|
||||
const spv_err_decl_index = try self.spv.allocDecl(.global);
|
||||
try self.spv.declareDeclDeps(spv_err_decl_index, &.{});
|
||||
|
||||
const push_constant_struct_ty_id = self.spv.allocId();
|
||||
try self.spv.structType(push_constant_struct_ty_id, &.{ptr_anyerror_ty_id}, &.{"error_out_ptr"});
|
||||
try self.spv.decorate(push_constant_struct_ty_id, .Block);
|
||||
try self.spv.decorateMember(push_constant_struct_ty_id, 0, .{ .Offset = .{ .byte_offset = 0 } });
|
||||
const buffer_struct_ty_id = self.spv.allocId();
|
||||
try self.spv.structType(buffer_struct_ty_id, &.{anyerror_ty_id}, &.{"error_out"});
|
||||
try self.spv.decorate(buffer_struct_ty_id, .Block);
|
||||
try self.spv.decorateMember(buffer_struct_ty_id, 0, .{ .Offset = .{ .byte_offset = 0 } });
|
||||
|
||||
const ptr_push_constant_struct_ty_id = self.spv.allocId();
|
||||
const ptr_buffer_struct_ty_id = self.spv.allocId();
|
||||
try self.spv.sections.types_globals_constants.emit(self.spv.gpa, .OpTypePointer, .{
|
||||
.id_result = ptr_push_constant_struct_ty_id,
|
||||
.storage_class = .PushConstant,
|
||||
.type = push_constant_struct_ty_id,
|
||||
.id_result = ptr_buffer_struct_ty_id,
|
||||
.storage_class = self.spvStorageClass(.global),
|
||||
.type = buffer_struct_ty_id,
|
||||
});
|
||||
|
||||
const buffer_struct_id = self.spv.declPtr(spv_err_decl_index).result_id;
|
||||
try self.spv.sections.types_globals_constants.emit(self.spv.gpa, .OpVariable, .{
|
||||
.id_result_type = ptr_push_constant_struct_ty_id,
|
||||
.id_result = self.spv.declPtr(spv_err_decl_index).result_id,
|
||||
.storage_class = .PushConstant,
|
||||
.id_result_type = ptr_buffer_struct_ty_id,
|
||||
.id_result = buffer_struct_id,
|
||||
.storage_class = self.spvStorageClass(.global),
|
||||
});
|
||||
try self.spv.decorate(buffer_struct_id, .{ .DescriptorSet = .{ .descriptor_set = 0 } });
|
||||
try self.spv.decorate(buffer_struct_id, .{ .Binding = .{ .binding_point = 0 } });
|
||||
|
||||
self.object.error_push_constant = .{
|
||||
.push_constant_ptr = spv_err_decl_index,
|
||||
};
|
||||
self.object.error_buffer = spv_err_decl_index;
|
||||
}
|
||||
|
||||
try self.spv.sections.execution_modes.emit(self.spv.gpa, .OpExecutionMode, .{
|
||||
@@ -2937,24 +2825,16 @@ const NavGen = struct {
|
||||
.id_result = self.spv.allocId(),
|
||||
});
|
||||
|
||||
const spv_err_decl_index = self.object.error_push_constant.?.push_constant_ptr;
|
||||
const push_constant_id = self.spv.declPtr(spv_err_decl_index).result_id;
|
||||
const spv_err_decl_index = self.object.error_buffer.?;
|
||||
const buffer_id = self.spv.declPtr(spv_err_decl_index).result_id;
|
||||
try decl_deps.append(spv_err_decl_index);
|
||||
|
||||
const zero_id = try self.constInt(Type.u32, 0);
|
||||
// We cannot use OpInBoundsAccessChain to dereference cross-storage class, so we have to use
|
||||
// a load.
|
||||
const tmp = self.spv.allocId();
|
||||
try section.emit(self.spv.gpa, .OpInBoundsAccessChain, .{
|
||||
.id_result_type = ptr_ptr_anyerror_ty_id,
|
||||
.id_result = tmp,
|
||||
.base = push_constant_id,
|
||||
.indexes = &.{zero_id},
|
||||
});
|
||||
try section.emit(self.spv.gpa, .OpLoad, .{
|
||||
.id_result_type = ptr_anyerror_ty_id,
|
||||
.id_result = p_error_id,
|
||||
.pointer = tmp,
|
||||
.base = buffer_id,
|
||||
.indexes = &.{zero_id},
|
||||
});
|
||||
},
|
||||
else => unreachable,
|
||||
@@ -2990,7 +2870,7 @@ const NavGen = struct {
|
||||
};
|
||||
|
||||
try self.spv.declareDeclDeps(spv_decl_index, decl_deps.items);
|
||||
try self.spv.declareEntryPoint(spv_decl_index, test_name, execution_mode);
|
||||
try self.spv.declareEntryPoint(spv_decl_index, test_name, execution_mode, null);
|
||||
}
|
||||
|
||||
fn genNav(self: *NavGen, do_codegen: bool) !void {
|
||||
@@ -3092,6 +2972,40 @@ const NavGen = struct {
|
||||
.storage_class = storage_class,
|
||||
});
|
||||
|
||||
if (nav.fqn.eqlSlice("position", ip)) {
|
||||
try self.spv.decorate(result_id, .{ .BuiltIn = .{ .built_in = .Position } });
|
||||
} else if (nav.fqn.eqlSlice("point_size", ip)) {
|
||||
try self.spv.decorate(result_id, .{ .BuiltIn = .{ .built_in = .PointSize } });
|
||||
} else if (nav.fqn.eqlSlice("invocation_id", ip)) {
|
||||
try self.spv.decorate(result_id, .{ .BuiltIn = .{ .built_in = .InvocationId } });
|
||||
} else if (nav.fqn.eqlSlice("frag_coord", ip)) {
|
||||
try self.spv.decorate(result_id, .{ .BuiltIn = .{ .built_in = .FragCoord } });
|
||||
} else if (nav.fqn.eqlSlice("point_coord", ip)) {
|
||||
try self.spv.decorate(result_id, .{ .BuiltIn = .{ .built_in = .PointCoord } });
|
||||
} else if (nav.fqn.eqlSlice("front_facing", ip)) {
|
||||
try self.spv.decorate(result_id, .{ .BuiltIn = .{ .built_in = .FrontFacing } });
|
||||
} else if (nav.fqn.eqlSlice("sample_mask", ip)) {
|
||||
try self.spv.decorate(result_id, .{ .BuiltIn = .{ .built_in = .SampleMask } });
|
||||
} else if (nav.fqn.eqlSlice("frag_depth", ip)) {
|
||||
try self.spv.decorate(result_id, .{ .BuiltIn = .{ .built_in = .FragDepth } });
|
||||
} else if (nav.fqn.eqlSlice("num_workgroups", ip)) {
|
||||
try self.spv.decorate(result_id, .{ .BuiltIn = .{ .built_in = .NumWorkgroups } });
|
||||
} else if (nav.fqn.eqlSlice("workgroup_size", ip)) {
|
||||
try self.spv.decorate(result_id, .{ .BuiltIn = .{ .built_in = .WorkgroupSize } });
|
||||
} else if (nav.fqn.eqlSlice("workgroup_id", ip)) {
|
||||
try self.spv.decorate(result_id, .{ .BuiltIn = .{ .built_in = .WorkgroupId } });
|
||||
} else if (nav.fqn.eqlSlice("local_invocation_id", ip)) {
|
||||
try self.spv.decorate(result_id, .{ .BuiltIn = .{ .built_in = .LocalInvocationId } });
|
||||
} else if (nav.fqn.eqlSlice("global_invocation_id", ip)) {
|
||||
try self.spv.decorate(result_id, .{ .BuiltIn = .{ .built_in = .GlobalInvocationId } });
|
||||
} else if (nav.fqn.eqlSlice("local_invocation_index", ip)) {
|
||||
try self.spv.decorate(result_id, .{ .BuiltIn = .{ .built_in = .LocalInvocationIndex } });
|
||||
} else if (nav.fqn.eqlSlice("vertex_index", ip)) {
|
||||
try self.spv.decorate(result_id, .{ .BuiltIn = .{ .built_in = .VertexIndex } });
|
||||
} else if (nav.fqn.eqlSlice("instance_index", ip)) {
|
||||
try self.spv.decorate(result_id, .{ .BuiltIn = .{ .built_in = .InstanceIndex } });
|
||||
}
|
||||
|
||||
try self.spv.debugName(result_id, nav.fqn.toSlice(ip));
|
||||
try self.spv.declareDeclDeps(spv_decl_index, &.{});
|
||||
},
|
||||
@@ -3175,17 +3089,18 @@ const NavGen = struct {
|
||||
/// Convert representation from indirect (in memory) to direct (in 'register')
|
||||
/// This converts the argument type from resolveType(ty, .indirect) to resolveType(ty, .direct).
|
||||
fn convertToDirect(self: *NavGen, ty: Type, operand_id: IdRef) !IdRef {
|
||||
const zcu = self.pt.zcu;
|
||||
const pt = self.pt;
|
||||
const zcu = pt.zcu;
|
||||
switch (ty.scalarType(zcu).zigTypeTag(zcu)) {
|
||||
.bool => {
|
||||
const false_id = try self.constBool(false, .indirect);
|
||||
// The operation below requires inputs in direct representation, but the operand
|
||||
// is actually in indirect representation.
|
||||
// Cheekily swap out the type to the direct equivalent of the indirect type here, they have the
|
||||
// same representation when converted to SPIR-V.
|
||||
const operand_ty = try self.zigScalarOrVectorTypeLike(Type.u1, ty);
|
||||
// Note: We can guarantee that these are the same ID due to the SPIR-V Module's `vector_types` cache!
|
||||
assert(try self.resolveType(operand_ty, .direct) == try self.resolveType(ty, .indirect));
|
||||
const operand_ty = blk: {
|
||||
if (!ty.isVector(pt.zcu)) break :blk Type.u1;
|
||||
break :blk try pt.vectorType(.{
|
||||
.len = ty.vectorLen(pt.zcu),
|
||||
.child = Type.u1.toIntern(),
|
||||
});
|
||||
};
|
||||
|
||||
const result = try self.buildCmp(
|
||||
.i_ne,
|
||||
@@ -3226,7 +3141,6 @@ const NavGen = struct {
|
||||
}
|
||||
|
||||
fn extractVectorComponent(self: *NavGen, result_ty: Type, vector_id: IdRef, field: u32) !IdRef {
|
||||
// Whether this is an OpTypeVector or OpTypeArray, we need to emit the same instruction regardless.
|
||||
const result_ty_id = try self.resolveType(result_ty, .direct);
|
||||
const result_id = self.spv.allocId();
|
||||
const indexes = [_]u32{field};
|
||||
@@ -3485,7 +3399,7 @@ const NavGen = struct {
|
||||
// Note: The sign may differ here between the shift and the base type, in case
|
||||
// of an arithmetic right shift. SPIR-V still expects the same type,
|
||||
// so in that case we have to cast convert to signed.
|
||||
const casted_shift = try self.buildIntConvert(base.ty.scalarType(zcu), shift);
|
||||
const casted_shift = try self.buildConvert(base.ty.scalarType(zcu), shift);
|
||||
|
||||
const shifted = switch (info.signedness) {
|
||||
.unsigned => try self.buildBinary(unsigned, base, casted_shift),
|
||||
@@ -3545,8 +3459,7 @@ const NavGen = struct {
|
||||
const zcu = self.pt.zcu;
|
||||
const ty = value.ty;
|
||||
switch (info.class) {
|
||||
.integer, .bool, .float => return value,
|
||||
.composite_integer => unreachable, // TODO
|
||||
.composite_integer, .integer, .bool, .float => return value,
|
||||
.strange_integer => switch (info.signedness) {
|
||||
.unsigned => {
|
||||
const mask_value = if (info.bits == 64) 0xFFFF_FFFF_FFFF_FFFF else (@as(u64, 1) << @as(u6, @intCast(info.bits))) - 1;
|
||||
@@ -3815,12 +3728,12 @@ const NavGen = struct {
|
||||
.unsigned => blk: {
|
||||
if (maybe_op_ty_bits) |op_ty_bits| {
|
||||
const op_ty = try pt.intType(.unsigned, op_ty_bits);
|
||||
const casted_lhs = try self.buildIntConvert(op_ty, lhs);
|
||||
const casted_rhs = try self.buildIntConvert(op_ty, rhs);
|
||||
const casted_lhs = try self.buildConvert(op_ty, lhs);
|
||||
const casted_rhs = try self.buildConvert(op_ty, rhs);
|
||||
|
||||
const full_result = try self.buildBinary(.i_mul, casted_lhs, casted_rhs);
|
||||
|
||||
const low_bits = try self.buildIntConvert(lhs.ty, full_result);
|
||||
const low_bits = try self.buildConvert(lhs.ty, full_result);
|
||||
const result = try self.normalize(low_bits, info);
|
||||
|
||||
// Shift the result bits away to get the overflow bits.
|
||||
@@ -3846,9 +3759,7 @@ const NavGen = struct {
|
||||
const high_overflowed = try self.buildCmp(.i_ne, zero, high_bits);
|
||||
|
||||
// If no overflow bits in low_bits, no extra work needs to be done.
|
||||
if (info.backing_bits == info.bits) {
|
||||
break :blk .{ result, high_overflowed };
|
||||
}
|
||||
if (info.backing_bits == info.bits) break :blk .{ result, high_overflowed };
|
||||
|
||||
// Shift the result bits away to get the overflow bits.
|
||||
const shift = Temporary.init(lhs.ty, try self.constInt(lhs.ty, info.bits));
|
||||
@@ -3886,13 +3797,13 @@ const NavGen = struct {
|
||||
if (maybe_op_ty_bits) |op_ty_bits| {
|
||||
const op_ty = try pt.intType(.signed, op_ty_bits);
|
||||
// Assume normalized; sign bit is set. We want a sign extend.
|
||||
const casted_lhs = try self.buildIntConvert(op_ty, lhs);
|
||||
const casted_rhs = try self.buildIntConvert(op_ty, rhs);
|
||||
const casted_lhs = try self.buildConvert(op_ty, lhs);
|
||||
const casted_rhs = try self.buildConvert(op_ty, rhs);
|
||||
|
||||
const full_result = try self.buildBinary(.i_mul, casted_lhs, casted_rhs);
|
||||
|
||||
// Truncate to the result type.
|
||||
const low_bits = try self.buildIntConvert(lhs.ty, full_result);
|
||||
const low_bits = try self.buildConvert(lhs.ty, full_result);
|
||||
const result = try self.normalize(low_bits, info);
|
||||
|
||||
// Now, we need to check the overflow bits AND the sign
|
||||
@@ -3929,9 +3840,7 @@ const NavGen = struct {
|
||||
// If no overflow bits in low_bits, no extra work needs to be done.
|
||||
// Careful, we still have to check the sign bit, so this branch
|
||||
// only goes for i33 and such.
|
||||
if (info.backing_bits == info.bits + 1) {
|
||||
break :blk .{ result, high_overflowed };
|
||||
}
|
||||
if (info.backing_bits == info.bits + 1) break :blk .{ result, high_overflowed };
|
||||
|
||||
// Shift the result bits away to get the overflow bits.
|
||||
const shift = Temporary.init(lhs.ty, try self.constInt(lhs.ty, info.bits - 1));
|
||||
@@ -3972,7 +3881,7 @@ const NavGen = struct {
|
||||
|
||||
// Sometimes Zig doesn't make both of the arguments the same types here. SPIR-V expects that,
|
||||
// so just manually upcast it if required.
|
||||
const casted_shift = try self.buildIntConvert(base.ty.scalarType(zcu), shift);
|
||||
const casted_shift = try self.buildConvert(base.ty.scalarType(zcu), shift);
|
||||
|
||||
const left = try self.buildBinary(.sll, base, casted_shift);
|
||||
const result = try self.normalize(left, info);
|
||||
@@ -4026,7 +3935,7 @@ const NavGen = struct {
|
||||
// Result of OpenCL ctz/clz returns operand.ty, and we want result_ty.
|
||||
// result_ty is always large enough to hold the result, so we might have to down
|
||||
// cast it.
|
||||
const result = try self.buildIntConvert(scalar_result_ty, count);
|
||||
const result = try self.buildConvert(scalar_result_ty, count);
|
||||
return try result.materialize(self);
|
||||
}
|
||||
|
||||
@@ -4057,11 +3966,8 @@ const NavGen = struct {
|
||||
const operand_ty = self.typeOf(reduce.operand);
|
||||
const scalar_ty = operand_ty.scalarType(zcu);
|
||||
const scalar_ty_id = try self.resolveType(scalar_ty, .direct);
|
||||
|
||||
const info = self.arithmeticTypeInfo(operand_ty);
|
||||
|
||||
const len = operand_ty.vectorLen(zcu);
|
||||
|
||||
const first = try self.extractVectorComponent(scalar_ty, operand, 0);
|
||||
|
||||
switch (reduce.operation) {
|
||||
@@ -4136,51 +4042,9 @@ const NavGen = struct {
|
||||
|
||||
// Note: number of components in the result, a, and b may differ.
|
||||
const result_ty = self.typeOfIndex(inst);
|
||||
const a_ty = self.typeOf(extra.a);
|
||||
const b_ty = self.typeOf(extra.b);
|
||||
|
||||
const scalar_ty = result_ty.scalarType(zcu);
|
||||
const scalar_ty_id = try self.resolveType(scalar_ty, .direct);
|
||||
|
||||
// If all of the types are SPIR-V vectors, we can use OpVectorShuffle.
|
||||
if (self.isSpvVector(result_ty) and self.isSpvVector(a_ty) and self.isSpvVector(b_ty)) {
|
||||
// The SPIR-V shuffle instruction is similar to the Air instruction, except that the elements are
|
||||
// numbered consecutively instead of using negatives.
|
||||
|
||||
const components = try self.gpa.alloc(Word, result_ty.vectorLen(zcu));
|
||||
defer self.gpa.free(components);
|
||||
|
||||
const a_len = a_ty.vectorLen(zcu);
|
||||
|
||||
for (components, 0..) |*component, i| {
|
||||
const elem = try mask.elemValue(pt, i);
|
||||
if (elem.isUndef(zcu)) {
|
||||
// This is explicitly valid for OpVectorShuffle, it indicates undefined.
|
||||
component.* = 0xFFFF_FFFF;
|
||||
continue;
|
||||
}
|
||||
|
||||
const index = elem.toSignedInt(zcu);
|
||||
if (index >= 0) {
|
||||
component.* = @intCast(index);
|
||||
} else {
|
||||
component.* = @intCast(~index + a_len);
|
||||
}
|
||||
}
|
||||
|
||||
const result_id = self.spv.allocId();
|
||||
try self.func.body.emit(self.spv.gpa, .OpVectorShuffle, .{
|
||||
.id_result_type = try self.resolveType(result_ty, .direct),
|
||||
.id_result = result_id,
|
||||
.vector_1 = a,
|
||||
.vector_2 = b,
|
||||
.components = components,
|
||||
});
|
||||
return result_id;
|
||||
}
|
||||
|
||||
// Fall back to manually extracting and inserting components.
|
||||
|
||||
const constituents = try self.gpa.alloc(IdRef, result_ty.vectorLen(zcu));
|
||||
defer self.gpa.free(constituents);
|
||||
|
||||
@@ -4535,9 +4399,7 @@ const NavGen = struct {
|
||||
const dst_ty_id = try self.resolveType(dst_ty, .direct);
|
||||
|
||||
const result_id = blk: {
|
||||
if (src_ty_id == dst_ty_id) {
|
||||
break :blk src_id;
|
||||
}
|
||||
if (src_ty_id == dst_ty_id) break :blk src_id;
|
||||
|
||||
// TODO: Some more cases are missing here
|
||||
// See fn bitCast in llvm.zig
|
||||
@@ -4618,7 +4480,7 @@ const NavGen = struct {
|
||||
return try src.materialize(self);
|
||||
}
|
||||
|
||||
const converted = try self.buildIntConvert(dst_ty, src);
|
||||
const converted = try self.buildConvert(dst_ty, src);
|
||||
|
||||
// Make sure to normalize the result if shrinking.
|
||||
// Because strange ints are sign extended in their backing
|
||||
@@ -4698,17 +4560,10 @@ const NavGen = struct {
|
||||
|
||||
fn airFloatCast(self: *NavGen, inst: Air.Inst.Index) !?IdRef {
|
||||
const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
|
||||
const operand_id = try self.resolve(ty_op.operand);
|
||||
const operand = try self.temporary(ty_op.operand);
|
||||
const dest_ty = self.typeOfIndex(inst);
|
||||
const dest_ty_id = try self.resolveType(dest_ty, .direct);
|
||||
|
||||
const result_id = self.spv.allocId();
|
||||
try self.func.body.emit(self.spv.gpa, .OpFConvert, .{
|
||||
.id_result_type = dest_ty_id,
|
||||
.id_result = result_id,
|
||||
.float_value = operand_id,
|
||||
});
|
||||
return result_id;
|
||||
const result = try self.buildConvert(dest_ty, operand);
|
||||
return try result.materialize(self);
|
||||
}
|
||||
|
||||
fn airNot(self: *NavGen, inst: Air.Inst.Index) !?IdRef {
|
||||
@@ -4790,13 +4645,14 @@ const NavGen = struct {
|
||||
const field_int_id = blk: {
|
||||
if (field_ty.isPtrAtRuntime(zcu)) {
|
||||
assert(self.spv.hasFeature(.addresses) or
|
||||
(self.spv.hasFeature(.physical_storage_buffer) and field_ty.ptrAddressSpace(zcu) == .storage_buffer));
|
||||
(self.spv.hasFeature(.physical_storage_buffer) and
|
||||
field_ty.ptrAddressSpace(zcu) == .storage_buffer));
|
||||
break :blk try self.intFromPtr(field_id);
|
||||
}
|
||||
break :blk try self.bitCast(field_int_ty, field_ty, field_id);
|
||||
};
|
||||
const shift_rhs = try self.constInt(backing_int_ty, running_bits);
|
||||
const extended_int_conv = try self.buildIntConvert(backing_int_ty, .{
|
||||
const extended_int_conv = try self.buildConvert(backing_int_ty, .{
|
||||
.ty = field_int_ty,
|
||||
.value = .{ .singleton = field_int_id },
|
||||
});
|
||||
@@ -5016,17 +4872,6 @@ const NavGen = struct {
|
||||
const array_id = try self.resolve(bin_op.lhs);
|
||||
const index_id = try self.resolve(bin_op.rhs);
|
||||
|
||||
if (self.isSpvVector(array_ty)) {
|
||||
const result_id = self.spv.allocId();
|
||||
try self.func.body.emit(self.spv.gpa, .OpVectorExtractDynamic, .{
|
||||
.id_result_type = try self.resolveType(elem_ty, .direct),
|
||||
.id_result = result_id,
|
||||
.vector = array_id,
|
||||
.index = index_id,
|
||||
});
|
||||
return result_id;
|
||||
}
|
||||
|
||||
// SPIR-V doesn't have an array indexing function for some damn reason.
|
||||
// For now, just generate a temporary and use that.
|
||||
// TODO: This backend probably also should use isByRef from llvm...
|
||||
@@ -5173,7 +5018,7 @@ const NavGen = struct {
|
||||
return self.bitCast(ty, payload_ty, payload.?);
|
||||
}
|
||||
|
||||
const trunc = try self.buildIntConvert(ty, .{ .ty = payload_ty, .value = .{ .singleton = payload.? } });
|
||||
const trunc = try self.buildConvert(ty, .{ .ty = payload_ty, .value = .{ .singleton = payload.? } });
|
||||
return try trunc.materialize(self);
|
||||
}
|
||||
|
||||
@@ -5182,7 +5027,7 @@ const NavGen = struct {
|
||||
try self.convertToIndirect(payload_ty, payload.?)
|
||||
else
|
||||
try self.bitCast(payload_int_ty, payload_ty, payload.?);
|
||||
const trunc = try self.buildIntConvert(ty, .{ .ty = payload_int_ty, .value = .{ .singleton = payload_int } });
|
||||
const trunc = try self.buildConvert(ty, .{ .ty = payload_int_ty, .value = .{ .singleton = payload_int } });
|
||||
return try trunc.materialize(self);
|
||||
}
|
||||
|
||||
@@ -5208,13 +5053,16 @@ const NavGen = struct {
|
||||
if (payload_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
|
||||
const pl_ptr_ty_id = try self.ptrType(layout.payload_ty, .Function, .indirect);
|
||||
const pl_ptr_id = try self.accessChain(pl_ptr_ty_id, tmp_id, &.{layout.payload_index});
|
||||
const active_pl_ptr_ty_id = try self.ptrType(payload_ty, .Function, .indirect);
|
||||
const active_pl_ptr_id = self.spv.allocId();
|
||||
try self.func.body.emit(self.spv.gpa, .OpBitcast, .{
|
||||
.id_result_type = active_pl_ptr_ty_id,
|
||||
.id_result = active_pl_ptr_id,
|
||||
.operand = pl_ptr_id,
|
||||
});
|
||||
const active_pl_ptr_id = if (!layout.payload_ty.eql(payload_ty, zcu)) blk: {
|
||||
const active_pl_ptr_ty_id = try self.ptrType(payload_ty, .Function, .indirect);
|
||||
const active_pl_ptr_id = self.spv.allocId();
|
||||
try self.func.body.emit(self.spv.gpa, .OpBitcast, .{
|
||||
.id_result_type = active_pl_ptr_ty_id,
|
||||
.id_result = active_pl_ptr_id,
|
||||
.operand = pl_ptr_id,
|
||||
});
|
||||
break :blk active_pl_ptr_id;
|
||||
} else pl_ptr_id;
|
||||
|
||||
try self.store(payload_ty, active_pl_ptr_id, payload.?, .{});
|
||||
} else {
|
||||
@@ -5271,9 +5119,9 @@ const NavGen = struct {
|
||||
const mask_id = try self.constInt(object_ty, (@as(u64, 1) << @as(u6, @intCast(field_bit_size))) - 1);
|
||||
const masked = try self.buildBinary(.bit_and, shift, .{ .ty = object_ty, .value = .{ .singleton = mask_id } });
|
||||
const result_id = blk: {
|
||||
if (self.backingIntBits(field_bit_size).? == self.backingIntBits(@intCast(object_ty.bitSize(zcu))).?)
|
||||
if (self.backingIntBits(field_bit_size).@"0" == self.backingIntBits(@intCast(object_ty.bitSize(zcu))).@"0")
|
||||
break :blk try self.bitCast(field_int_ty, object_ty, try masked.materialize(self));
|
||||
const trunc = try self.buildIntConvert(field_int_ty, masked);
|
||||
const trunc = try self.buildConvert(field_int_ty, masked);
|
||||
break :blk try trunc.materialize(self);
|
||||
};
|
||||
if (field_ty.ip_index == .bool_type) return try self.convertToDirect(.bool, result_id);
|
||||
@@ -5295,9 +5143,9 @@ const NavGen = struct {
|
||||
.{ .ty = backing_int_ty, .value = .{ .singleton = mask_id } },
|
||||
);
|
||||
const result_id = blk: {
|
||||
if (self.backingIntBits(field_bit_size).? == self.backingIntBits(@intCast(backing_int_ty.bitSize(zcu))).?)
|
||||
if (self.backingIntBits(field_bit_size).@"0" == self.backingIntBits(@intCast(backing_int_ty.bitSize(zcu))).@"0")
|
||||
break :blk try self.bitCast(int_ty, backing_int_ty, try masked.materialize(self));
|
||||
const trunc = try self.buildIntConvert(int_ty, masked);
|
||||
const trunc = try self.buildConvert(int_ty, masked);
|
||||
break :blk try trunc.materialize(self);
|
||||
};
|
||||
if (field_ty.ip_index == .bool_type) return try self.convertToDirect(.bool, result_id);
|
||||
@@ -6332,17 +6180,15 @@ const NavGen = struct {
|
||||
.bool, .error_set => 1,
|
||||
.int => blk: {
|
||||
const bits = cond_ty.intInfo(zcu).bits;
|
||||
const backing_bits = self.backingIntBits(bits) orelse {
|
||||
return self.todo("implement composite int switch", .{});
|
||||
};
|
||||
const backing_bits, const big_int = self.backingIntBits(bits);
|
||||
if (big_int) return self.todo("implement composite int switch", .{});
|
||||
break :blk if (backing_bits <= 32) 1 else 2;
|
||||
},
|
||||
.@"enum" => blk: {
|
||||
const int_ty = cond_ty.intTagType(zcu);
|
||||
const int_info = int_ty.intInfo(zcu);
|
||||
const backing_bits = self.backingIntBits(int_info.bits) orelse {
|
||||
return self.todo("implement composite int switch", .{});
|
||||
};
|
||||
const backing_bits, const big_int = self.backingIntBits(int_info.bits);
|
||||
if (big_int) return self.todo("implement composite int switch", .{});
|
||||
break :blk if (backing_bits <= 32) 1 else 2;
|
||||
},
|
||||
.pointer => blk: {
|
||||
@@ -6752,7 +6598,7 @@ const NavGen = struct {
|
||||
// TODO: Should we make these builtins return usize?
|
||||
const result_id = try self.builtin3D(Type.u64, .LocalInvocationId, dimension, 0);
|
||||
const tmp = Temporary.init(Type.u64, result_id);
|
||||
const result = try self.buildIntConvert(Type.u32, tmp);
|
||||
const result = try self.buildConvert(Type.u32, tmp);
|
||||
return try result.materialize(self);
|
||||
}
|
||||
|
||||
@@ -6763,7 +6609,7 @@ const NavGen = struct {
|
||||
// TODO: Should we make these builtins return usize?
|
||||
const result_id = try self.builtin3D(Type.u64, .WorkgroupSize, dimension, 0);
|
||||
const tmp = Temporary.init(Type.u64, result_id);
|
||||
const result = try self.buildIntConvert(Type.u32, tmp);
|
||||
const result = try self.buildConvert(Type.u32, tmp);
|
||||
return try result.materialize(self);
|
||||
}
|
||||
|
||||
@@ -6774,7 +6620,7 @@ const NavGen = struct {
|
||||
// TODO: Should we make these builtins return usize?
|
||||
const result_id = try self.builtin3D(Type.u64, .WorkgroupId, dimension, 0);
|
||||
const tmp = Temporary.init(Type.u64, result_id);
|
||||
const result = try self.buildIntConvert(Type.u32, tmp);
|
||||
const result = try self.buildConvert(Type.u32, tmp);
|
||||
return try result.materialize(self);
|
||||
}
|
||||
|
||||
|
||||
@@ -296,12 +296,26 @@ fn processInstruction(self: *Assembler) !void {
|
||||
};
|
||||
break :blk .{ .value = try self.spv.importInstructionSet(set_tag) };
|
||||
},
|
||||
.OpExecutionMode, .OpExecutionModeId => {
|
||||
assert(try self.processGenericInstruction() == null);
|
||||
const entry_point_id = try self.resolveRefId(self.inst.operands.items[0].ref_id);
|
||||
const exec_mode: spec.ExecutionMode = @enumFromInt(self.inst.operands.items[1].value);
|
||||
const gop = try self.spv.entry_points.getOrPut(self.gpa, entry_point_id);
|
||||
if (!gop.found_existing) {
|
||||
gop.value_ptr.* = .{};
|
||||
} else if (gop.value_ptr.exec_mode != null) {
|
||||
return self.fail(
|
||||
self.currentToken().start,
|
||||
"cannot set execution mode more than once to any entry point",
|
||||
.{},
|
||||
);
|
||||
}
|
||||
gop.value_ptr.exec_mode = exec_mode;
|
||||
return;
|
||||
},
|
||||
else => switch (self.inst.opcode.class()) {
|
||||
.TypeDeclaration => try self.processTypeInstruction(),
|
||||
else => if (try self.processGenericInstruction()) |result|
|
||||
result
|
||||
else
|
||||
return,
|
||||
else => (try self.processGenericInstruction()) orelse return,
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
@@ -92,11 +92,12 @@ pub const Decl = struct {
|
||||
/// This models a kernel entry point.
|
||||
pub const EntryPoint = struct {
|
||||
/// The declaration that should be exported.
|
||||
decl_index: Decl.Index,
|
||||
decl_index: ?Decl.Index = null,
|
||||
/// The name of the kernel to be exported.
|
||||
name: []const u8,
|
||||
name: ?[]const u8 = null,
|
||||
/// Calling Convention
|
||||
execution_model: spec.ExecutionModel,
|
||||
exec_model: ?spec.ExecutionModel = null,
|
||||
exec_mode: ?spec.ExecutionMode = null,
|
||||
};
|
||||
|
||||
/// A general-purpose allocator which may be used to allocate resources for this module
|
||||
@@ -164,8 +165,6 @@ cache: struct {
|
||||
void_type: ?IdRef = null,
|
||||
int_types: std.AutoHashMapUnmanaged(std.builtin.Type.Int, IdRef) = .empty,
|
||||
float_types: std.AutoHashMapUnmanaged(std.builtin.Type.Float, IdRef) = .empty,
|
||||
// This cache is required so that @Vector(X, u1) in direct representation has the
|
||||
// same ID as @Vector(X, bool) in indirect representation.
|
||||
vector_types: std.AutoHashMapUnmanaged(struct { IdRef, u32 }, IdRef) = .empty,
|
||||
array_types: std.AutoHashMapUnmanaged(struct { IdRef, IdRef }, IdRef) = .empty,
|
||||
|
||||
@@ -186,7 +185,7 @@ decls: std.ArrayListUnmanaged(Decl) = .empty,
|
||||
decl_deps: std.ArrayListUnmanaged(Decl.Index) = .empty,
|
||||
|
||||
/// The list of entry points that should be exported from this module.
|
||||
entry_points: std.ArrayListUnmanaged(EntryPoint) = .empty,
|
||||
entry_points: std.AutoArrayHashMapUnmanaged(IdRef, EntryPoint) = .empty,
|
||||
|
||||
pub fn init(gpa: Allocator, target: std.Target) Module {
|
||||
const version_minor: u8 = blk: {
|
||||
@@ -306,19 +305,30 @@ fn entryPoints(self: *Module) !Section {
|
||||
var seen = try std.DynamicBitSetUnmanaged.initEmpty(self.gpa, self.decls.items.len);
|
||||
defer seen.deinit(self.gpa);
|
||||
|
||||
for (self.entry_points.items) |entry_point| {
|
||||
for (self.entry_points.keys(), self.entry_points.values()) |entry_point_id, entry_point| {
|
||||
interface.items.len = 0;
|
||||
seen.setRangeValue(.{ .start = 0, .end = self.decls.items.len }, false);
|
||||
|
||||
try self.addEntryPointDeps(entry_point.decl_index, &seen, &interface);
|
||||
|
||||
const entry_point_id = self.declPtr(entry_point.decl_index).result_id;
|
||||
try self.addEntryPointDeps(entry_point.decl_index.?, &seen, &interface);
|
||||
try entry_points.emit(self.gpa, .OpEntryPoint, .{
|
||||
.execution_model = entry_point.execution_model,
|
||||
.execution_model = entry_point.exec_model.?,
|
||||
.entry_point = entry_point_id,
|
||||
.name = entry_point.name,
|
||||
.name = entry_point.name.?,
|
||||
.interface = interface.items,
|
||||
});
|
||||
|
||||
if (entry_point.exec_mode == null and entry_point.exec_model == .Fragment) {
|
||||
switch (self.target.os.tag) {
|
||||
.vulkan, .opengl => |tag| {
|
||||
try self.sections.execution_modes.emit(self.gpa, .OpExecutionMode, .{
|
||||
.entry_point = entry_point_id,
|
||||
.mode = if (tag == .vulkan) .OriginUpperLeft else .OriginLowerLeft,
|
||||
});
|
||||
},
|
||||
.opencl => {},
|
||||
else => unreachable,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return entry_points;
|
||||
@@ -352,6 +362,11 @@ pub fn finalize(self: *Module, a: Allocator) ![]Word {
|
||||
.vector16 => try self.addCapability(.Vector16),
|
||||
// Shader
|
||||
.shader => try self.addCapability(.Shader),
|
||||
.variable_pointers => {
|
||||
try self.addExtension("SPV_KHR_variable_pointers");
|
||||
try self.addCapability(.VariablePointersStorageBuffer);
|
||||
try self.addCapability(.VariablePointers);
|
||||
},
|
||||
.physical_storage_buffer => {
|
||||
try self.addExtension("SPV_KHR_physical_storage_buffer");
|
||||
try self.addCapability(.PhysicalStorageBufferAddresses);
|
||||
@@ -366,20 +381,20 @@ pub fn finalize(self: *Module, a: Allocator) ![]Word {
|
||||
// Emit memory model
|
||||
const addressing_model: spec.AddressingModel = blk: {
|
||||
if (self.hasFeature(.shader)) {
|
||||
break :blk switch (self.target.cpu.arch) {
|
||||
.spirv32 => .Logical, // TODO: I don't think this will ever be implemented.
|
||||
.spirv64 => .PhysicalStorageBuffer64,
|
||||
else => unreachable,
|
||||
};
|
||||
} else if (self.hasFeature(.kernel)) {
|
||||
break :blk switch (self.target.cpu.arch) {
|
||||
.spirv32 => .Physical32,
|
||||
.spirv64 => .Physical64,
|
||||
else => unreachable,
|
||||
};
|
||||
if (self.hasFeature(.physical_storage_buffer)) {
|
||||
assert(self.target.cpu.arch == .spirv64);
|
||||
break :blk .PhysicalStorageBuffer64;
|
||||
}
|
||||
assert(self.target.cpu.arch == .spirv);
|
||||
break :blk .Logical;
|
||||
}
|
||||
|
||||
unreachable;
|
||||
assert(self.hasFeature(.kernel));
|
||||
break :blk switch (self.target.cpu.arch) {
|
||||
.spirv32 => .Physical32,
|
||||
.spirv64 => .Physical64,
|
||||
else => unreachable,
|
||||
};
|
||||
};
|
||||
try self.sections.memory_model.emit(self.gpa, .OpMemoryModel, .{
|
||||
.addressing_model = addressing_model,
|
||||
@@ -746,13 +761,15 @@ pub fn declareEntryPoint(
|
||||
self: *Module,
|
||||
decl_index: Decl.Index,
|
||||
name: []const u8,
|
||||
execution_model: spec.ExecutionModel,
|
||||
exec_model: spec.ExecutionModel,
|
||||
exec_mode: ?spec.ExecutionMode,
|
||||
) !void {
|
||||
try self.entry_points.append(self.gpa, .{
|
||||
.decl_index = decl_index,
|
||||
.name = try self.arena.allocator().dupe(u8, name),
|
||||
.execution_model = execution_model,
|
||||
});
|
||||
const gop = try self.entry_points.getOrPut(self.gpa, self.declPtr(decl_index).result_id);
|
||||
gop.value_ptr.decl_index = decl_index;
|
||||
gop.value_ptr.name = try self.arena.allocator().dupe(u8, name);
|
||||
gop.value_ptr.exec_model = exec_model;
|
||||
// Might've been set by assembler
|
||||
if (!gop.found_existing) gop.value_ptr.exec_mode = exec_mode;
|
||||
}
|
||||
|
||||
pub fn debugName(self: *Module, target: IdResult, name: []const u8) !void {
|
||||
|
||||
+3
-2
@@ -162,7 +162,7 @@ pub fn updateExports(
|
||||
if (ip.isFunctionType(nav_ty)) {
|
||||
const spv_decl_index = try self.object.resolveNav(zcu, nav_index);
|
||||
const cc = Type.fromInterned(nav_ty).fnCallingConvention(zcu);
|
||||
const execution_model: spec.ExecutionModel = switch (target.os.tag) {
|
||||
const exec_model: spec.ExecutionModel = switch (target.os.tag) {
|
||||
.vulkan, .opengl => switch (cc) {
|
||||
.spirv_vertex => .Vertex,
|
||||
.spirv_fragment => .Fragment,
|
||||
@@ -185,7 +185,8 @@ pub fn updateExports(
|
||||
try self.object.spv.declareEntryPoint(
|
||||
spv_decl_index,
|
||||
exp.opts.name.toSlice(ip),
|
||||
execution_model,
|
||||
exec_model,
|
||||
null,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
+15
-9
@@ -501,21 +501,26 @@ pub fn addrSpaceCastIsValid(
|
||||
/// part of a merge (result of a branch) and may not be stored in memory at all. This function returns
|
||||
/// for a particular architecture and address space wether such pointers are logical.
|
||||
pub fn arePointersLogical(target: std.Target, as: AddressSpace) bool {
|
||||
if (target.os.tag != .vulkan) {
|
||||
return false;
|
||||
}
|
||||
if (target.os.tag != .vulkan) return false;
|
||||
|
||||
return switch (as) {
|
||||
// TODO: Vulkan doesn't support pointers in the generic address space, we
|
||||
// should remove this case but this requires a change in defaultAddressSpace().
|
||||
// For now, at least disable them from being regarded as physical.
|
||||
.generic => true,
|
||||
// For now, all global pointers are represented using PhysicalStorageBuffer, so these are real
|
||||
// pointers.
|
||||
// For now, all global pointers are represented using StorageBuffer or CrossWorkgroup,
|
||||
// so these are real pointers.
|
||||
.global => false,
|
||||
// TODO: Allowed with VK_KHR_variable_pointers.
|
||||
.shared => true,
|
||||
.constant, .local, .input, .output, .uniform, .push_constant, .storage_buffer => true,
|
||||
.physical_storage_buffer => false,
|
||||
.shared => !target.cpu.features.isEnabled(@intFromEnum(std.Target.spirv.Feature.variable_pointers)),
|
||||
.constant,
|
||||
.local,
|
||||
.input,
|
||||
.output,
|
||||
.uniform,
|
||||
.push_constant,
|
||||
.storage_buffer,
|
||||
=> true,
|
||||
else => unreachable,
|
||||
};
|
||||
}
|
||||
@@ -802,7 +807,8 @@ pub fn zigBackend(target: std.Target, use_llvm: bool) std.builtin.CompilerBacken
|
||||
.powerpc, .powerpcle, .powerpc64, .powerpc64le => .stage2_powerpc,
|
||||
.riscv64 => .stage2_riscv64,
|
||||
.sparc64 => .stage2_sparc64,
|
||||
.spirv64 => .stage2_spirv64,
|
||||
.spirv32 => if (target.os.tag == .opencl) .stage2_spirv64 else .other,
|
||||
.spirv, .spirv64 => .stage2_spirv64,
|
||||
.wasm32, .wasm64 => .stage2_wasm,
|
||||
.x86 => .stage2_x86,
|
||||
.x86_64 => .stage2_x86_64,
|
||||
|
||||
@@ -117,9 +117,9 @@ export fn testMutablePointer() void {
|
||||
// tmp.zig:37:38: note: imported here
|
||||
// neg_inf.zon:1:1: error: expected type '?u8'
|
||||
// tmp.zig:57:28: note: imported here
|
||||
// neg_inf.zon:1:1: error: expected type 'tmp.testNonExhaustiveEnum__enum_499'
|
||||
// neg_inf.zon:1:1: error: expected type 'tmp.testNonExhaustiveEnum__enum_501'
|
||||
// tmp.zig:62:39: note: imported here
|
||||
// neg_inf.zon:1:1: error: expected type 'tmp.testUntaggedUnion__union_501'
|
||||
// neg_inf.zon:1:1: error: expected type 'tmp.testUntaggedUnion__union_503'
|
||||
// tmp.zig:67:44: note: imported here
|
||||
// neg_inf.zon:1:1: error: expected type 'tmp.testTaggedUnionVoid__union_504'
|
||||
// neg_inf.zon:1:1: error: expected type 'tmp.testTaggedUnionVoid__union_506'
|
||||
// tmp.zig:72:50: note: imported here
|
||||
|
||||
@@ -15,6 +15,6 @@ pub export fn entry() void {
|
||||
// error
|
||||
//
|
||||
// :7:25: error: unable to resolve comptime value
|
||||
// :7:25: note: initializer of comptime-only struct 'tmp.S.foo__anon_473.C' must be comptime-known
|
||||
// :7:25: note: initializer of comptime-only struct 'tmp.S.foo__anon_475.C' must be comptime-known
|
||||
// :4:16: note: struct requires comptime because of this field
|
||||
// :4:16: note: types are not available at runtime
|
||||
|
||||
@@ -16,5 +16,5 @@ pub export fn entry2() void {
|
||||
//
|
||||
// :3:6: error: no field or member function named 'copy' in '[]const u8'
|
||||
// :9:8: error: no field or member function named 'bar' in '@TypeOf(.{})'
|
||||
// :12:18: error: no field or member function named 'bar' in 'tmp.entry2__struct_477'
|
||||
// :12:18: error: no field or member function named 'bar' in 'tmp.entry2__struct_479'
|
||||
// :12:6: note: struct declared here
|
||||
|
||||
@@ -6,6 +6,6 @@ export fn foo() void {
|
||||
|
||||
// error
|
||||
//
|
||||
// :4:16: error: expected type 'tmp.T', found 'tmp.foo__struct_466'
|
||||
// :4:16: error: expected type 'tmp.T', found 'tmp.foo__struct_468'
|
||||
// :3:16: note: struct declared here
|
||||
// :1:11: note: struct declared here
|
||||
|
||||
@@ -44,9 +44,9 @@ comptime {
|
||||
//
|
||||
// :5:23: error: expected error union type, found 'comptime_int'
|
||||
// :10:23: error: expected error union type, found '@TypeOf(.{})'
|
||||
// :15:23: error: expected error union type, found 'tmp.test2__struct_503'
|
||||
// :15:23: error: expected error union type, found 'tmp.test2__struct_505'
|
||||
// :15:23: note: struct declared here
|
||||
// :20:27: error: expected error union type, found 'tmp.test3__struct_505'
|
||||
// :20:27: error: expected error union type, found 'tmp.test3__struct_507'
|
||||
// :20:27: note: struct declared here
|
||||
// :25:23: error: expected error union type, found 'struct { comptime *const [5:0]u8 = "hello" }'
|
||||
// :31:13: error: expected error union type, found 'u32'
|
||||
|
||||
+1
-1
@@ -145,7 +145,7 @@ const test_targets = blk: {
|
||||
.{
|
||||
.target = std.Target.Query.parse(.{
|
||||
.arch_os_abi = "spirv64-vulkan",
|
||||
.cpu_features = "vulkan_v1_2+int64+float16+float64",
|
||||
.cpu_features = "vulkan_v1_2+physical_storage_buffer+int64+float16+float64",
|
||||
}) catch unreachable,
|
||||
.use_llvm = false,
|
||||
.use_lld = false,
|
||||
|
||||
@@ -1047,6 +1047,128 @@ const targets = [_]ArchTarget{
|
||||
},
|
||||
},
|
||||
},
|
||||
.{
|
||||
.zig_name = "spirv",
|
||||
.llvm = .{
|
||||
.name = "SPIRV",
|
||||
.td_name = "SPIRV",
|
||||
},
|
||||
.branch_quota = 2000,
|
||||
.extra_features = &.{
|
||||
.{
|
||||
.zig_name = "v1_0",
|
||||
.desc = "Enable version 1.0",
|
||||
.deps = &.{},
|
||||
},
|
||||
.{
|
||||
.zig_name = "v1_1",
|
||||
.desc = "Enable version 1.1",
|
||||
.deps = &.{"v1_0"},
|
||||
},
|
||||
.{
|
||||
.zig_name = "v1_2",
|
||||
.desc = "Enable version 1.2",
|
||||
.deps = &.{"v1_1"},
|
||||
},
|
||||
.{
|
||||
.zig_name = "v1_3",
|
||||
.desc = "Enable version 1.3",
|
||||
.deps = &.{"v1_2"},
|
||||
},
|
||||
.{
|
||||
.zig_name = "v1_4",
|
||||
.desc = "Enable version 1.4",
|
||||
.deps = &.{"v1_3"},
|
||||
},
|
||||
.{
|
||||
.zig_name = "v1_5",
|
||||
.desc = "Enable version 1.5",
|
||||
.deps = &.{"v1_4"},
|
||||
},
|
||||
.{
|
||||
.zig_name = "v1_6",
|
||||
.desc = "Enable version 1.6",
|
||||
.deps = &.{"v1_5"},
|
||||
},
|
||||
.{
|
||||
.zig_name = "int64",
|
||||
.desc = "Enable Int64 capability",
|
||||
.deps = &.{"v1_0"},
|
||||
},
|
||||
.{
|
||||
.zig_name = "float16",
|
||||
.desc = "Enable Float16 capability",
|
||||
.deps = &.{"v1_0"},
|
||||
},
|
||||
.{
|
||||
.zig_name = "float64",
|
||||
.desc = "Enable Float64 capability",
|
||||
.deps = &.{"v1_0"},
|
||||
},
|
||||
.{
|
||||
.zig_name = "matrix",
|
||||
.desc = "Enable Matrix capability",
|
||||
.deps = &.{"v1_0"},
|
||||
},
|
||||
.{
|
||||
.zig_name = "storage_push_constant16",
|
||||
.desc = "Enable SPV_KHR_16bit_storage extension and the StoragePushConstant16 capability",
|
||||
.deps = &.{"v1_3"},
|
||||
},
|
||||
.{
|
||||
.zig_name = "arbitrary_precision_integers",
|
||||
.desc = "Enable SPV_INTEL_arbitrary_precision_integers extension and the ArbitraryPrecisionIntegersINTEL capability",
|
||||
.deps = &.{"v1_5"},
|
||||
},
|
||||
.{
|
||||
.zig_name = "kernel",
|
||||
.desc = "Enable Kernel capability",
|
||||
.deps = &.{"v1_0"},
|
||||
},
|
||||
.{
|
||||
.zig_name = "addresses",
|
||||
.desc = "Enable Addresses capability",
|
||||
.deps = &.{"v1_0"},
|
||||
},
|
||||
.{
|
||||
.zig_name = "generic_pointer",
|
||||
.desc = "Enable GenericPointer capability",
|
||||
.deps = &.{ "v1_0", "addresses" },
|
||||
},
|
||||
.{
|
||||
.zig_name = "vector16",
|
||||
.desc = "Enable Vector16 capability",
|
||||
.deps = &.{ "v1_0", "kernel" },
|
||||
},
|
||||
.{
|
||||
.zig_name = "shader",
|
||||
.desc = "Enable Shader capability",
|
||||
.deps = &.{ "v1_0", "matrix" },
|
||||
},
|
||||
.{
|
||||
.zig_name = "variable_pointers",
|
||||
.desc = "Enable SPV_KHR_physical_storage_buffer extension and the PhysicalStorageBufferAddresses capability",
|
||||
.deps = &.{"v1_0"},
|
||||
},
|
||||
.{
|
||||
.zig_name = "physical_storage_buffer",
|
||||
.desc = "Enable SPV_KHR_variable_pointers extension and the (VariablePointers, VariablePointersStorageBuffer) capabilities",
|
||||
.deps = &.{"v1_0"},
|
||||
},
|
||||
},
|
||||
.extra_cpus = &.{
|
||||
.{
|
||||
.llvm_name = null,
|
||||
.zig_name = "vulkan_v1_2",
|
||||
.features = &.{ "v1_5", "shader" },
|
||||
},
|
||||
.{
|
||||
.llvm_name = null,
|
||||
.zig_name = "opencl_v2",
|
||||
.features = &.{ "v1_2", "kernel", "addresses", "generic_pointer" },
|
||||
},
|
||||
},
|
||||
},
|
||||
.{
|
||||
.zig_name = "riscv",
|
||||
.llvm = .{
|
||||
|
||||
Reference in New Issue
Block a user