mirror of
https://codeberg.org/ziglang/zig.git
synced 2026-04-27 19:09:47 +03:00
Merge pull request #25772 from mlugg/kill-dead-code
compiler: rewrite some legalizations, and remove a bunch of dead code
This commit is contained in:
+1
-2
@@ -211,10 +211,10 @@ set(ZIG_STAGE2_SOURCES
|
||||
lib/compiler_rt/absvti2.zig
|
||||
lib/compiler_rt/adddf3.zig
|
||||
lib/compiler_rt/addf3.zig
|
||||
lib/compiler_rt/addo.zig
|
||||
lib/compiler_rt/addsf3.zig
|
||||
lib/compiler_rt/addtf3.zig
|
||||
lib/compiler_rt/addvsi3.zig
|
||||
lib/compiler_rt/addvdi3.zig
|
||||
lib/compiler_rt/addxf3.zig
|
||||
lib/compiler_rt/arm.zig
|
||||
lib/compiler_rt/atomics.zig
|
||||
@@ -354,7 +354,6 @@ set(ZIG_STAGE2_SOURCES
|
||||
lib/compiler_rt/sqrt.zig
|
||||
lib/compiler_rt/stack_probe.zig
|
||||
lib/compiler_rt/subdf3.zig
|
||||
lib/compiler_rt/subo.zig
|
||||
lib/compiler_rt/subsf3.zig
|
||||
lib/compiler_rt/subtf3.zig
|
||||
lib/compiler_rt/subvdi3.zig
|
||||
|
||||
+3
-2
@@ -28,12 +28,13 @@ comptime {
|
||||
_ = @import("compiler_rt/negv.zig");
|
||||
|
||||
_ = @import("compiler_rt/addvsi3.zig");
|
||||
_ = @import("compiler_rt/addvdi3.zig");
|
||||
|
||||
_ = @import("compiler_rt/subvsi3.zig");
|
||||
_ = @import("compiler_rt/subvdi3.zig");
|
||||
|
||||
_ = @import("compiler_rt/mulvsi3.zig");
|
||||
|
||||
_ = @import("compiler_rt/addo.zig");
|
||||
_ = @import("compiler_rt/subo.zig");
|
||||
_ = @import("compiler_rt/mulo.zig");
|
||||
|
||||
// Float routines
|
||||
|
||||
@@ -1,46 +0,0 @@
|
||||
const std = @import("std");
|
||||
const common = @import("./common.zig");
|
||||
pub const panic = @import("common.zig").panic;
|
||||
|
||||
comptime {
|
||||
@export(&__addosi4, .{ .name = "__addosi4", .linkage = common.linkage, .visibility = common.visibility });
|
||||
@export(&__addodi4, .{ .name = "__addodi4", .linkage = common.linkage, .visibility = common.visibility });
|
||||
@export(&__addoti4, .{ .name = "__addoti4", .linkage = common.linkage, .visibility = common.visibility });
|
||||
}
|
||||
|
||||
// addo - add overflow
|
||||
// * return a+%b.
|
||||
// * return if a+b overflows => 1 else => 0
|
||||
// - addoXi4_generic as default
|
||||
|
||||
inline fn addoXi4_generic(comptime ST: type, a: ST, b: ST, overflow: *c_int) ST {
|
||||
@setRuntimeSafety(common.test_safety);
|
||||
overflow.* = 0;
|
||||
const sum: ST = a +% b;
|
||||
// Hackers Delight: section Overflow Detection, subsection Signed Add/Subtract
|
||||
// Let sum = a +% b == a + b + carry == wraparound addition.
|
||||
// Overflow in a+b+carry occurs, iff a and b have opposite signs
|
||||
// and the sign of a+b+carry is the same as a (or equivalently b).
|
||||
// Slower routine: res = ~(a ^ b) & ((sum ^ a)
|
||||
// Faster routine: res = (sum ^ a) & (sum ^ b)
|
||||
// Overflow occurred, iff (res < 0)
|
||||
if (((sum ^ a) & (sum ^ b)) < 0)
|
||||
overflow.* = 1;
|
||||
return sum;
|
||||
}
|
||||
|
||||
pub fn __addosi4(a: i32, b: i32, overflow: *c_int) callconv(.c) i32 {
|
||||
return addoXi4_generic(i32, a, b, overflow);
|
||||
}
|
||||
pub fn __addodi4(a: i64, b: i64, overflow: *c_int) callconv(.c) i64 {
|
||||
return addoXi4_generic(i64, a, b, overflow);
|
||||
}
|
||||
pub fn __addoti4(a: i128, b: i128, overflow: *c_int) callconv(.c) i128 {
|
||||
return addoXi4_generic(i128, a, b, overflow);
|
||||
}
|
||||
|
||||
test {
|
||||
_ = @import("addosi4_test.zig");
|
||||
_ = @import("addodi4_test.zig");
|
||||
_ = @import("addoti4_test.zig");
|
||||
}
|
||||
@@ -1,77 +0,0 @@
|
||||
const addv = @import("addo.zig");
|
||||
const std = @import("std");
|
||||
const testing = std.testing;
|
||||
const math = std.math;
|
||||
|
||||
fn test__addodi4(a: i64, b: i64) !void {
|
||||
var result_ov: c_int = undefined;
|
||||
var expected_ov: c_int = undefined;
|
||||
const result = addv.__addodi4(a, b, &result_ov);
|
||||
const expected: i64 = simple_addodi4(a, b, &expected_ov);
|
||||
try testing.expectEqual(expected, result);
|
||||
try testing.expectEqual(expected_ov, result_ov);
|
||||
}
|
||||
|
||||
fn simple_addodi4(a: i64, b: i64, overflow: *c_int) i64 {
|
||||
overflow.* = 0;
|
||||
const min: i64 = math.minInt(i64);
|
||||
const max: i64 = math.maxInt(i64);
|
||||
if (((a > 0) and (b > max - a)) or
|
||||
((a < 0) and (b < min - a)))
|
||||
overflow.* = 1;
|
||||
return a +% b;
|
||||
}
|
||||
|
||||
test "addodi4" {
|
||||
const min: i64 = math.minInt(i64);
|
||||
const max: i64 = math.maxInt(i64);
|
||||
var i: i64 = 1;
|
||||
while (i < max) : (i *|= 2) {
|
||||
try test__addodi4(i, i);
|
||||
try test__addodi4(-i, -i);
|
||||
try test__addodi4(i, -i);
|
||||
try test__addodi4(-i, i);
|
||||
}
|
||||
|
||||
// edge cases
|
||||
// 0 + 0 = 0
|
||||
// MIN + MIN overflow
|
||||
// MAX + MAX overflow
|
||||
// 0 + MIN MIN
|
||||
// 0 + MAX MAX
|
||||
// MIN + 0 MIN
|
||||
// MAX + 0 MAX
|
||||
// MIN + MAX -1
|
||||
// MAX + MIN -1
|
||||
try test__addodi4(0, 0);
|
||||
try test__addodi4(min, min);
|
||||
try test__addodi4(max, max);
|
||||
try test__addodi4(0, min);
|
||||
try test__addodi4(0, max);
|
||||
try test__addodi4(min, 0);
|
||||
try test__addodi4(max, 0);
|
||||
try test__addodi4(min, max);
|
||||
try test__addodi4(max, min);
|
||||
|
||||
// derived edge cases
|
||||
// MIN+1 + MIN overflow
|
||||
// MAX-1 + MAX overflow
|
||||
// 1 + MIN = MIN+1
|
||||
// -1 + MIN overflow
|
||||
// -1 + MAX = MAX-1
|
||||
// +1 + MAX overflow
|
||||
// MIN + 1 = MIN+1
|
||||
// MIN + -1 overflow
|
||||
// MAX + 1 overflow
|
||||
// MAX + -1 = MAX-1
|
||||
try test__addodi4(min + 1, min);
|
||||
try test__addodi4(max - 1, max);
|
||||
try test__addodi4(1, min);
|
||||
try test__addodi4(-1, min);
|
||||
try test__addodi4(-1, max);
|
||||
try test__addodi4(1, max);
|
||||
try test__addodi4(min, 1);
|
||||
try test__addodi4(min, -1);
|
||||
try test__addodi4(max, -1);
|
||||
try test__addodi4(max, 1);
|
||||
}
|
||||
@@ -1,78 +0,0 @@
|
||||
const addv = @import("addo.zig");
|
||||
const testing = @import("std").testing;
|
||||
|
||||
fn test__addosi4(a: i32, b: i32) !void {
|
||||
var result_ov: c_int = undefined;
|
||||
var expected_ov: c_int = undefined;
|
||||
const result = addv.__addosi4(a, b, &result_ov);
|
||||
const expected: i32 = simple_addosi4(a, b, &expected_ov);
|
||||
try testing.expectEqual(expected, result);
|
||||
try testing.expectEqual(expected_ov, result_ov);
|
||||
}
|
||||
|
||||
fn simple_addosi4(a: i32, b: i32, overflow: *c_int) i32 {
|
||||
overflow.* = 0;
|
||||
const min: i32 = -2147483648;
|
||||
const max: i32 = 2147483647;
|
||||
if (((a > 0) and (b > max - a)) or
|
||||
((a < 0) and (b < min - a)))
|
||||
overflow.* = 1;
|
||||
return a +% b;
|
||||
}
|
||||
|
||||
test "addosi4" {
|
||||
// -2^31 <= i32 <= 2^31-1
|
||||
// 2^31 = 2147483648
|
||||
// 2^31-1 = 2147483647
|
||||
const min: i32 = -2147483648;
|
||||
const max: i32 = 2147483647;
|
||||
var i: i32 = 1;
|
||||
while (i < max) : (i *|= 2) {
|
||||
try test__addosi4(i, i);
|
||||
try test__addosi4(-i, -i);
|
||||
try test__addosi4(i, -i);
|
||||
try test__addosi4(-i, i);
|
||||
}
|
||||
|
||||
// edge cases
|
||||
// 0 + 0 = 0
|
||||
// MIN + MIN overflow
|
||||
// MAX + MAX overflow
|
||||
// 0 + MIN MIN
|
||||
// 0 + MAX MAX
|
||||
// MIN + 0 MIN
|
||||
// MAX + 0 MAX
|
||||
// MIN + MAX -1
|
||||
// MAX + MIN -1
|
||||
try test__addosi4(0, 0);
|
||||
try test__addosi4(min, min);
|
||||
try test__addosi4(max, max);
|
||||
try test__addosi4(0, min);
|
||||
try test__addosi4(0, max);
|
||||
try test__addosi4(min, 0);
|
||||
try test__addosi4(max, 0);
|
||||
try test__addosi4(min, max);
|
||||
try test__addosi4(max, min);
|
||||
|
||||
// derived edge cases
|
||||
// MIN+1 + MIN overflow
|
||||
// MAX-1 + MAX overflow
|
||||
// 1 + MIN = MIN+1
|
||||
// -1 + MIN overflow
|
||||
// -1 + MAX = MAX-1
|
||||
// +1 + MAX overflow
|
||||
// MIN + 1 = MIN+1
|
||||
// MIN + -1 overflow
|
||||
// MAX + 1 overflow
|
||||
// MAX + -1 = MAX-1
|
||||
try test__addosi4(min + 1, min);
|
||||
try test__addosi4(max - 1, max);
|
||||
try test__addosi4(1, min);
|
||||
try test__addosi4(-1, min);
|
||||
try test__addosi4(-1, max);
|
||||
try test__addosi4(1, max);
|
||||
try test__addosi4(min, 1);
|
||||
try test__addosi4(min, -1);
|
||||
try test__addosi4(max, -1);
|
||||
try test__addosi4(max, 1);
|
||||
}
|
||||
@@ -1,80 +0,0 @@
|
||||
const addv = @import("addo.zig");
|
||||
const builtin = @import("builtin");
|
||||
const std = @import("std");
|
||||
const testing = std.testing;
|
||||
const math = std.math;
|
||||
|
||||
fn test__addoti4(a: i128, b: i128) !void {
|
||||
var result_ov: c_int = undefined;
|
||||
var expected_ov: c_int = undefined;
|
||||
const result = addv.__addoti4(a, b, &result_ov);
|
||||
const expected: i128 = simple_addoti4(a, b, &expected_ov);
|
||||
try testing.expectEqual(expected, result);
|
||||
try testing.expectEqual(expected_ov, result_ov);
|
||||
}
|
||||
|
||||
fn simple_addoti4(a: i128, b: i128, overflow: *c_int) i128 {
|
||||
overflow.* = 0;
|
||||
const min: i128 = math.minInt(i128);
|
||||
const max: i128 = math.maxInt(i128);
|
||||
if (((a > 0) and (b > max - a)) or
|
||||
((a < 0) and (b < min - a)))
|
||||
overflow.* = 1;
|
||||
return a +% b;
|
||||
}
|
||||
|
||||
test "addoti4" {
|
||||
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
|
||||
|
||||
const min: i128 = math.minInt(i128);
|
||||
const max: i128 = math.maxInt(i128);
|
||||
var i: i128 = 1;
|
||||
while (i < max) : (i *|= 2) {
|
||||
try test__addoti4(i, i);
|
||||
try test__addoti4(-i, -i);
|
||||
try test__addoti4(i, -i);
|
||||
try test__addoti4(-i, i);
|
||||
}
|
||||
|
||||
// edge cases
|
||||
// 0 + 0 = 0
|
||||
// MIN + MIN overflow
|
||||
// MAX + MAX overflow
|
||||
// 0 + MIN MIN
|
||||
// 0 + MAX MAX
|
||||
// MIN + 0 MIN
|
||||
// MAX + 0 MAX
|
||||
// MIN + MAX -1
|
||||
// MAX + MIN -1
|
||||
try test__addoti4(0, 0);
|
||||
try test__addoti4(min, min);
|
||||
try test__addoti4(max, max);
|
||||
try test__addoti4(0, min);
|
||||
try test__addoti4(0, max);
|
||||
try test__addoti4(min, 0);
|
||||
try test__addoti4(max, 0);
|
||||
try test__addoti4(min, max);
|
||||
try test__addoti4(max, min);
|
||||
|
||||
// derived edge cases
|
||||
// MIN+1 + MIN overflow
|
||||
// MAX-1 + MAX overflow
|
||||
// 1 + MIN = MIN+1
|
||||
// -1 + MIN overflow
|
||||
// -1 + MAX = MAX-1
|
||||
// +1 + MAX overflow
|
||||
// MIN + 1 = MIN+1
|
||||
// MIN + -1 overflow
|
||||
// MAX + 1 overflow
|
||||
// MAX + -1 = MAX-1
|
||||
try test__addoti4(min + 1, min);
|
||||
try test__addoti4(max - 1, max);
|
||||
try test__addoti4(1, min);
|
||||
try test__addoti4(-1, min);
|
||||
try test__addoti4(-1, max);
|
||||
try test__addoti4(1, max);
|
||||
try test__addoti4(min, 1);
|
||||
try test__addoti4(min, -1);
|
||||
try test__addoti4(max, -1);
|
||||
try test__addoti4(max, 1);
|
||||
}
|
||||
@@ -0,0 +1,26 @@
|
||||
const common = @import("./common.zig");
|
||||
const testing = @import("std").testing;
|
||||
|
||||
pub const panic = common.panic;
|
||||
|
||||
comptime {
|
||||
@export(&__addvdi3, .{ .name = "__addvdi3", .linkage = common.linkage, .visibility = common.visibility });
|
||||
}
|
||||
|
||||
pub fn __addvdi3(a: i64, b: i64) callconv(.c) i64 {
|
||||
const sum = a +% b;
|
||||
// Overflow occurred iff both operands have the same sign, and the sign of the sum does
|
||||
// not match it. In other words, iff the sum sign is not the sign of either operand.
|
||||
if (((sum ^ a) & (sum ^ b)) < 0) @panic("compiler-rt: integer overflow");
|
||||
return sum;
|
||||
}
|
||||
|
||||
test "addvdi3" {
|
||||
// const min: i64 = -9223372036854775808
|
||||
// const max: i64 = 9223372036854775807
|
||||
// TODO write panic handler for testing panics
|
||||
// try test__addvdi3(-9223372036854775808, -1, -1); // panic
|
||||
// try test__addvdi3(9223372036854775807, 1, 1); // panic
|
||||
try testing.expectEqual(-9223372036854775808, __addvdi3(-9223372036854775807, -1));
|
||||
try testing.expectEqual(9223372036854775807, __addvdi3(9223372036854775806, 1));
|
||||
}
|
||||
@@ -1,4 +1,3 @@
|
||||
const addv = @import("addo.zig");
|
||||
const common = @import("./common.zig");
|
||||
const testing = @import("std").testing;
|
||||
|
||||
@@ -9,9 +8,10 @@ comptime {
|
||||
}
|
||||
|
||||
pub fn __addvsi3(a: i32, b: i32) callconv(.c) i32 {
|
||||
var overflow: c_int = 0;
|
||||
const sum = addv.__addosi4(a, b, &overflow);
|
||||
if (overflow != 0) @panic("compiler-rt: integer overflow");
|
||||
const sum = a +% b;
|
||||
// Overflow occurred iff both operands have the same sign, and the sign of the sum does
|
||||
// not match it. In other words, iff the sum sign is not the sign of either operand.
|
||||
if (((sum ^ a) & (sum ^ b)) < 0) @panic("compiler-rt: integer overflow");
|
||||
return sum;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,47 +0,0 @@
|
||||
//! subo - subtract overflow
|
||||
//! * return a-%b.
|
||||
//! * return if a-b overflows => 1 else => 0
|
||||
//! - suboXi4_generic as default
|
||||
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
const common = @import("common.zig");
|
||||
|
||||
pub const panic = common.panic;
|
||||
|
||||
comptime {
|
||||
@export(&__subosi4, .{ .name = "__subosi4", .linkage = common.linkage, .visibility = common.visibility });
|
||||
@export(&__subodi4, .{ .name = "__subodi4", .linkage = common.linkage, .visibility = common.visibility });
|
||||
@export(&__suboti4, .{ .name = "__suboti4", .linkage = common.linkage, .visibility = common.visibility });
|
||||
}
|
||||
|
||||
pub fn __subosi4(a: i32, b: i32, overflow: *c_int) callconv(.c) i32 {
|
||||
return suboXi4_generic(i32, a, b, overflow);
|
||||
}
|
||||
pub fn __subodi4(a: i64, b: i64, overflow: *c_int) callconv(.c) i64 {
|
||||
return suboXi4_generic(i64, a, b, overflow);
|
||||
}
|
||||
pub fn __suboti4(a: i128, b: i128, overflow: *c_int) callconv(.c) i128 {
|
||||
return suboXi4_generic(i128, a, b, overflow);
|
||||
}
|
||||
|
||||
inline fn suboXi4_generic(comptime ST: type, a: ST, b: ST, overflow: *c_int) ST {
|
||||
overflow.* = 0;
|
||||
const sum: ST = a -% b;
|
||||
// Hackers Delight: section Overflow Detection, subsection Signed Add/Subtract
|
||||
// Let sum = a -% b == a - b - carry == wraparound subtraction.
|
||||
// Overflow in a-b-carry occurs, iff a and b have opposite signs
|
||||
// and the sign of a-b-carry is opposite of a (or equivalently same as b).
|
||||
// Faster routine: res = (a ^ b) & (sum ^ a)
|
||||
// Slower routine: res = (sum^a) & ~(sum^b)
|
||||
// Overflow occurred, iff (res < 0)
|
||||
if (((a ^ b) & (sum ^ a)) < 0)
|
||||
overflow.* = 1;
|
||||
return sum;
|
||||
}
|
||||
|
||||
test {
|
||||
_ = @import("subosi4_test.zig");
|
||||
_ = @import("subodi4_test.zig");
|
||||
_ = @import("suboti4_test.zig");
|
||||
}
|
||||
@@ -1,81 +0,0 @@
|
||||
const subo = @import("subo.zig");
|
||||
const std = @import("std");
|
||||
const testing = std.testing;
|
||||
const math = std.math;
|
||||
|
||||
fn test__subodi4(a: i64, b: i64) !void {
|
||||
var result_ov: c_int = undefined;
|
||||
var expected_ov: c_int = undefined;
|
||||
const result = subo.__subodi4(a, b, &result_ov);
|
||||
const expected: i64 = simple_subodi4(a, b, &expected_ov);
|
||||
try testing.expectEqual(expected, result);
|
||||
try testing.expectEqual(expected_ov, result_ov);
|
||||
}
|
||||
|
||||
// 2 cases on evaluating `a-b`:
|
||||
// 1. `a-b` may underflow, iff b>0 && a<0 and a-b < min <=> a<min+b
|
||||
// 2. `a-b` may overflow, iff b<0 && a>0 and a-b > max <=> a>max+b
|
||||
// `-b` evaluation may overflow, iff b==min, but this is handled by the hardware
|
||||
pub fn simple_subodi4(a: i64, b: i64, overflow: *c_int) i64 {
|
||||
overflow.* = 0;
|
||||
const min: i64 = math.minInt(i64);
|
||||
const max: i64 = math.maxInt(i64);
|
||||
if (((b > 0) and (a < min + b)) or
|
||||
((b < 0) and (a > max + b)))
|
||||
overflow.* = 1;
|
||||
return a -% b;
|
||||
}
|
||||
|
||||
test "subodi3" {
|
||||
const min: i64 = math.minInt(i64);
|
||||
const max: i64 = math.maxInt(i64);
|
||||
var i: i64 = 1;
|
||||
while (i < max) : (i *|= 2) {
|
||||
try test__subodi4(i, i);
|
||||
try test__subodi4(-i, -i);
|
||||
try test__subodi4(i, -i);
|
||||
try test__subodi4(-i, i);
|
||||
}
|
||||
|
||||
// edge cases
|
||||
// 0 - 0 = 0
|
||||
// MIN - MIN = 0
|
||||
// MAX - MAX = 0
|
||||
// 0 - MIN overflow
|
||||
// 0 - MAX = MIN+1
|
||||
// MIN - 0 = MIN
|
||||
// MAX - 0 = MAX
|
||||
// MIN - MAX overflow
|
||||
// MAX - MIN overflow
|
||||
try test__subodi4(0, 0);
|
||||
try test__subodi4(min, min);
|
||||
try test__subodi4(max, max);
|
||||
try test__subodi4(0, min);
|
||||
try test__subodi4(0, max);
|
||||
try test__subodi4(min, 0);
|
||||
try test__subodi4(max, 0);
|
||||
try test__subodi4(min, max);
|
||||
try test__subodi4(max, min);
|
||||
|
||||
// derived edge cases
|
||||
// MIN+1 - MIN = 1
|
||||
// MAX-1 - MAX = -1
|
||||
// 1 - MIN overflow
|
||||
// -1 - MIN = MAX
|
||||
// -1 - MAX = MIN
|
||||
// +1 - MAX = MIN+2
|
||||
// MIN - 1 overflow
|
||||
// MIN - -1 = MIN+1
|
||||
// MAX - 1 = MAX-1
|
||||
// MAX - -1 overflow
|
||||
try test__subodi4(min + 1, min);
|
||||
try test__subodi4(max - 1, max);
|
||||
try test__subodi4(1, min);
|
||||
try test__subodi4(-1, min);
|
||||
try test__subodi4(-1, max);
|
||||
try test__subodi4(1, max);
|
||||
try test__subodi4(min, 1);
|
||||
try test__subodi4(min, -1);
|
||||
try test__subodi4(max, -1);
|
||||
try test__subodi4(max, 1);
|
||||
}
|
||||
@@ -1,82 +0,0 @@
|
||||
const subo = @import("subo.zig");
|
||||
const testing = @import("std").testing;
|
||||
|
||||
fn test__subosi4(a: i32, b: i32) !void {
|
||||
var result_ov: c_int = undefined;
|
||||
var expected_ov: c_int = undefined;
|
||||
const result = subo.__subosi4(a, b, &result_ov);
|
||||
const expected: i32 = simple_subosi4(a, b, &expected_ov);
|
||||
try testing.expectEqual(expected, result);
|
||||
try testing.expectEqual(expected_ov, result_ov);
|
||||
}
|
||||
|
||||
// 2 cases on evaluating `a-b`:
|
||||
// 1. `a-b` may underflow, iff b>0 && a<0 and a-b < min <=> a<min+b
|
||||
// 2. `a-b` may overflow, iff b<0 && a>0 and a-b > max <=> a>max+b
|
||||
// `-b` evaluation may overflow, iff b==min, but this is handled by the hardware
|
||||
pub fn simple_subosi4(a: i32, b: i32, overflow: *c_int) i32 {
|
||||
overflow.* = 0;
|
||||
const min: i32 = -2147483648;
|
||||
const max: i32 = 2147483647;
|
||||
if (((b > 0) and (a < min + b)) or
|
||||
((b < 0) and (a > max + b)))
|
||||
overflow.* = 1;
|
||||
return a -% b;
|
||||
}
|
||||
|
||||
test "subosi3" {
|
||||
// -2^31 <= i32 <= 2^31-1
|
||||
// 2^31 = 2147483648
|
||||
// 2^31-1 = 2147483647
|
||||
const min: i32 = -2147483648;
|
||||
const max: i32 = 2147483647;
|
||||
var i: i32 = 1;
|
||||
while (i < max) : (i *|= 2) {
|
||||
try test__subosi4(i, i);
|
||||
try test__subosi4(-i, -i);
|
||||
try test__subosi4(i, -i);
|
||||
try test__subosi4(-i, i);
|
||||
}
|
||||
|
||||
// edge cases
|
||||
// 0 - 0 = 0
|
||||
// MIN - MIN = 0
|
||||
// MAX - MAX = 0
|
||||
// 0 - MIN overflow
|
||||
// 0 - MAX = MIN+1
|
||||
// MIN - 0 = MIN
|
||||
// MAX - 0 = MAX
|
||||
// MIN - MAX overflow
|
||||
// MAX - MIN overflow
|
||||
try test__subosi4(0, 0);
|
||||
try test__subosi4(min, min);
|
||||
try test__subosi4(max, max);
|
||||
try test__subosi4(0, min);
|
||||
try test__subosi4(0, max);
|
||||
try test__subosi4(min, 0);
|
||||
try test__subosi4(max, 0);
|
||||
try test__subosi4(min, max);
|
||||
try test__subosi4(max, min);
|
||||
|
||||
// derived edge cases
|
||||
// MIN+1 - MIN = 1
|
||||
// MAX-1 - MAX = -1
|
||||
// 1 - MIN overflow
|
||||
// -1 - MIN = MAX
|
||||
// -1 - MAX = MIN
|
||||
// +1 - MAX = MIN+2
|
||||
// MIN - 1 overflow
|
||||
// MIN - -1 = MIN+1
|
||||
// MAX - 1 = MAX-1
|
||||
// MAX - -1 overflow
|
||||
try test__subosi4(min + 1, min);
|
||||
try test__subosi4(max - 1, max);
|
||||
try test__subosi4(1, min);
|
||||
try test__subosi4(-1, min);
|
||||
try test__subosi4(-1, max);
|
||||
try test__subosi4(1, max);
|
||||
try test__subosi4(min, 1);
|
||||
try test__subosi4(min, -1);
|
||||
try test__subosi4(max, -1);
|
||||
try test__subosi4(max, 1);
|
||||
}
|
||||
@@ -1,84 +0,0 @@
|
||||
const subo = @import("subo.zig");
|
||||
const builtin = @import("builtin");
|
||||
const std = @import("std");
|
||||
const testing = std.testing;
|
||||
const math = std.math;
|
||||
|
||||
fn test__suboti4(a: i128, b: i128) !void {
|
||||
var result_ov: c_int = undefined;
|
||||
var expected_ov: c_int = undefined;
|
||||
const result = subo.__suboti4(a, b, &result_ov);
|
||||
const expected: i128 = simple_suboti4(a, b, &expected_ov);
|
||||
try testing.expectEqual(expected, result);
|
||||
try testing.expectEqual(expected_ov, result_ov);
|
||||
}
|
||||
|
||||
// 2 cases on evaluating `a-b`:
|
||||
// 1. `a-b` may underflow, iff b>0 && a<0 and a-b < min <=> a<min+b
|
||||
// 2. `a-b` may overflow, iff b<0 && a>0 and a-b > max <=> a>max+b
|
||||
// `-b` evaluation may overflow, iff b==min, but this is handled by the hardware
|
||||
pub fn simple_suboti4(a: i128, b: i128, overflow: *c_int) i128 {
|
||||
overflow.* = 0;
|
||||
const min: i128 = math.minInt(i128);
|
||||
const max: i128 = math.maxInt(i128);
|
||||
if (((b > 0) and (a < min + b)) or
|
||||
((b < 0) and (a > max + b)))
|
||||
overflow.* = 1;
|
||||
return a -% b;
|
||||
}
|
||||
|
||||
test "suboti3" {
|
||||
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
|
||||
|
||||
const min: i128 = math.minInt(i128);
|
||||
const max: i128 = math.maxInt(i128);
|
||||
var i: i128 = 1;
|
||||
while (i < max) : (i *|= 2) {
|
||||
try test__suboti4(i, i);
|
||||
try test__suboti4(-i, -i);
|
||||
try test__suboti4(i, -i);
|
||||
try test__suboti4(-i, i);
|
||||
}
|
||||
|
||||
// edge cases
|
||||
// 0 - 0 = 0
|
||||
// MIN - MIN = 0
|
||||
// MAX - MAX = 0
|
||||
// 0 - MIN overflow
|
||||
// 0 - MAX = MIN+1
|
||||
// MIN - 0 = MIN
|
||||
// MAX - 0 = MAX
|
||||
// MIN - MAX overflow
|
||||
// MAX - MIN overflow
|
||||
try test__suboti4(0, 0);
|
||||
try test__suboti4(min, min);
|
||||
try test__suboti4(max, max);
|
||||
try test__suboti4(0, min);
|
||||
try test__suboti4(0, max);
|
||||
try test__suboti4(min, 0);
|
||||
try test__suboti4(max, 0);
|
||||
try test__suboti4(min, max);
|
||||
try test__suboti4(max, min);
|
||||
|
||||
// derived edge cases
|
||||
// MIN+1 - MIN = 1
|
||||
// MAX-1 - MAX = -1
|
||||
// 1 - MIN overflow
|
||||
// -1 - MIN = MAX
|
||||
// -1 - MAX = MIN
|
||||
// +1 - MAX = MIN+2
|
||||
// MIN - 1 overflow
|
||||
// MIN - -1 = MIN+1
|
||||
// MAX - 1 = MAX-1
|
||||
// MAX - -1 overflow
|
||||
try test__suboti4(min + 1, min);
|
||||
try test__suboti4(max - 1, max);
|
||||
try test__suboti4(1, min);
|
||||
try test__suboti4(-1, min);
|
||||
try test__suboti4(-1, max);
|
||||
try test__suboti4(1, max);
|
||||
try test__suboti4(min, 1);
|
||||
try test__suboti4(min, -1);
|
||||
try test__suboti4(max, -1);
|
||||
try test__suboti4(max, 1);
|
||||
}
|
||||
@@ -1,4 +1,3 @@
|
||||
const subv = @import("subo.zig");
|
||||
const common = @import("./common.zig");
|
||||
const testing = @import("std").testing;
|
||||
|
||||
@@ -9,9 +8,10 @@ comptime {
|
||||
}
|
||||
|
||||
pub fn __subvdi3(a: i64, b: i64) callconv(.c) i64 {
|
||||
var overflow: c_int = 0;
|
||||
const sum = subv.__subodi4(a, b, &overflow);
|
||||
if (overflow != 0) @panic("compiler-rt: integer overflow");
|
||||
const sum = a -% b;
|
||||
// Overflow occurred iff the operands have opposite signs, and the sign of the
|
||||
// sum is the opposite of the lhs sign.
|
||||
if (((a ^ b) & (sum ^ a)) < 0) @panic("compiler-rt: integer overflow");
|
||||
return sum;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
const subv = @import("subo.zig");
|
||||
const common = @import("./common.zig");
|
||||
const testing = @import("std").testing;
|
||||
|
||||
@@ -9,9 +8,10 @@ comptime {
|
||||
}
|
||||
|
||||
pub fn __subvsi3(a: i32, b: i32) callconv(.c) i32 {
|
||||
var overflow: c_int = 0;
|
||||
const sum = subv.__subosi4(a, b, &overflow);
|
||||
if (overflow != 0) @panic("compiler-rt: integer overflow");
|
||||
const sum = a -% b;
|
||||
// Overflow occurred iff the operands have opposite signs, and the sign of the
|
||||
// sum is the opposite of the lhs sign.
|
||||
if (((a ^ b) & (sum ^ a)) < 0) @panic("compiler-rt: integer overflow");
|
||||
return sum;
|
||||
}
|
||||
|
||||
|
||||
@@ -809,15 +809,13 @@ static inline bool zig_addo_u32(uint32_t *res, uint32_t lhs, uint32_t rhs, uint8
|
||||
#endif
|
||||
}
|
||||
|
||||
zig_extern int32_t __addosi4(int32_t lhs, int32_t rhs, int *overflow);
|
||||
static inline bool zig_addo_i32(int32_t *res, int32_t lhs, int32_t rhs, uint8_t bits) {
|
||||
#if zig_has_builtin(add_overflow) || defined(zig_gcc)
|
||||
int32_t full_res;
|
||||
bool overflow = __builtin_add_overflow(lhs, rhs, &full_res);
|
||||
#else
|
||||
int overflow_int;
|
||||
int32_t full_res = __addosi4(lhs, rhs, &overflow_int);
|
||||
bool overflow = overflow_int != 0;
|
||||
int32_t full_res = (int32_t)((uint32_t)lhs + (uint32_t)rhs);
|
||||
bool overflow = ((full_res ^ lhs) & (full_res ^ rhs)) < 0;
|
||||
#endif
|
||||
*res = zig_wrap_i32(full_res, bits);
|
||||
return overflow || full_res < zig_minInt_i(32, bits) || full_res > zig_maxInt_i(32, bits);
|
||||
@@ -835,15 +833,13 @@ static inline bool zig_addo_u64(uint64_t *res, uint64_t lhs, uint64_t rhs, uint8
|
||||
#endif
|
||||
}
|
||||
|
||||
zig_extern int64_t __addodi4(int64_t lhs, int64_t rhs, int *overflow);
|
||||
static inline bool zig_addo_i64(int64_t *res, int64_t lhs, int64_t rhs, uint8_t bits) {
|
||||
#if zig_has_builtin(add_overflow) || defined(zig_gcc)
|
||||
int64_t full_res;
|
||||
bool overflow = __builtin_add_overflow(lhs, rhs, &full_res);
|
||||
#else
|
||||
int overflow_int;
|
||||
int64_t full_res = __addodi4(lhs, rhs, &overflow_int);
|
||||
bool overflow = overflow_int != 0;
|
||||
int64_t full_res = (int64_t)((uint64_t)lhs + (uint64_t)rhs);
|
||||
bool overflow = ((full_res ^ lhs) & (full_res ^ rhs)) < 0;
|
||||
#endif
|
||||
*res = zig_wrap_i64(full_res, bits);
|
||||
return overflow || full_res < zig_minInt_i(64, bits) || full_res > zig_maxInt_i(64, bits);
|
||||
@@ -917,15 +913,13 @@ static inline bool zig_subo_u32(uint32_t *res, uint32_t lhs, uint32_t rhs, uint8
|
||||
#endif
|
||||
}
|
||||
|
||||
zig_extern int32_t __subosi4(int32_t lhs, int32_t rhs, int *overflow);
|
||||
static inline bool zig_subo_i32(int32_t *res, int32_t lhs, int32_t rhs, uint8_t bits) {
|
||||
#if zig_has_builtin(sub_overflow) || defined(zig_gcc)
|
||||
int32_t full_res;
|
||||
bool overflow = __builtin_sub_overflow(lhs, rhs, &full_res);
|
||||
#else
|
||||
int overflow_int;
|
||||
int32_t full_res = __subosi4(lhs, rhs, &overflow_int);
|
||||
bool overflow = overflow_int != 0;
|
||||
int32_t full_res = (int32_t)((uint32_t)lhs - (uint32_t)rhs);
|
||||
bool overflow = ((lhs ^ rhs) & (full_res ^ lhs)) < 0;
|
||||
#endif
|
||||
*res = zig_wrap_i32(full_res, bits);
|
||||
return overflow || full_res < zig_minInt_i(32, bits) || full_res > zig_maxInt_i(32, bits);
|
||||
@@ -943,15 +937,13 @@ static inline bool zig_subo_u64(uint64_t *res, uint64_t lhs, uint64_t rhs, uint8
|
||||
#endif
|
||||
}
|
||||
|
||||
zig_extern int64_t __subodi4(int64_t lhs, int64_t rhs, int *overflow);
|
||||
static inline bool zig_subo_i64(int64_t *res, int64_t lhs, int64_t rhs, uint8_t bits) {
|
||||
#if zig_has_builtin(sub_overflow) || defined(zig_gcc)
|
||||
int64_t full_res;
|
||||
bool overflow = __builtin_sub_overflow(lhs, rhs, &full_res);
|
||||
#else
|
||||
int overflow_int;
|
||||
int64_t full_res = __subodi4(lhs, rhs, &overflow_int);
|
||||
bool overflow = overflow_int != 0;
|
||||
int64_t full_res = (int64_t)((uint64_t)lhs - (uint64_t)rhs);
|
||||
bool overflow = ((lhs ^ rhs) & (full_res ^ lhs)) < 0;
|
||||
#endif
|
||||
*res = zig_wrap_i64(full_res, bits);
|
||||
return overflow || full_res < zig_minInt_i(64, bits) || full_res > zig_maxInt_i(64, bits);
|
||||
@@ -1755,15 +1747,13 @@ static inline bool zig_addo_u128(zig_u128 *res, zig_u128 lhs, zig_u128 rhs, uint
|
||||
#endif
|
||||
}
|
||||
|
||||
zig_extern zig_i128 __addoti4(zig_i128 lhs, zig_i128 rhs, int *overflow);
|
||||
static inline bool zig_addo_i128(zig_i128 *res, zig_i128 lhs, zig_i128 rhs, uint8_t bits) {
|
||||
#if zig_has_builtin(add_overflow)
|
||||
zig_i128 full_res;
|
||||
bool overflow = __builtin_add_overflow(lhs, rhs, &full_res);
|
||||
#else
|
||||
int overflow_int;
|
||||
zig_i128 full_res = __addoti4(lhs, rhs, &overflow_int);
|
||||
bool overflow = overflow_int != 0;
|
||||
zig_i128 full_res = (zig_i128)((zig_u128)lhs + (zig_u128)rhs);
|
||||
bool overflow = ((full_res ^ lhs) & (full_res ^ rhs)) < 0;
|
||||
#endif
|
||||
*res = zig_wrap_i128(full_res, bits);
|
||||
return overflow || full_res < zig_minInt_i(128, bits) || full_res > zig_maxInt_i(128, bits);
|
||||
@@ -1781,15 +1771,13 @@ static inline bool zig_subo_u128(zig_u128 *res, zig_u128 lhs, zig_u128 rhs, uint
|
||||
#endif
|
||||
}
|
||||
|
||||
zig_extern zig_i128 __suboti4(zig_i128 lhs, zig_i128 rhs, int *overflow);
|
||||
static inline bool zig_subo_i128(zig_i128 *res, zig_i128 lhs, zig_i128 rhs, uint8_t bits) {
|
||||
#if zig_has_builtin(sub_overflow)
|
||||
zig_i128 full_res;
|
||||
bool overflow = __builtin_sub_overflow(lhs, rhs, &full_res);
|
||||
#else
|
||||
int overflow_int;
|
||||
zig_i128 full_res = __suboti4(lhs, rhs, &overflow_int);
|
||||
bool overflow = overflow_int != 0;
|
||||
zig_i128 full_res = (zig_i128)((zig_u128)lhs - (zig_u128)rhs);
|
||||
bool overflow = ((lhs ^ rhs) & (full_res ^ lhs)) < 0;
|
||||
#endif
|
||||
*res = zig_wrap_i128(full_res, bits);
|
||||
return overflow || full_res < zig_minInt_i(128, bits) || full_res > zig_maxInt_i(128, bits);
|
||||
|
||||
+26
-14
@@ -660,8 +660,8 @@ pub const Inst = struct {
|
||||
/// Given a pointer to a slice, return a pointer to the pointer of the slice.
|
||||
/// Uses the `ty_op` field.
|
||||
ptr_slice_ptr_ptr,
|
||||
/// Given an (array value or vector value) and element index,
|
||||
/// return the element value at that index.
|
||||
/// Given an (array value or vector value) and element index, return the element value at
|
||||
/// that index. If the lhs is a vector value, the index is guaranteed to be comptime-known.
|
||||
/// Result type is the element type of the array operand.
|
||||
/// Uses the `bin_op` field.
|
||||
array_elem_val,
|
||||
@@ -874,10 +874,6 @@ pub const Inst = struct {
|
||||
/// Uses the `ty_pl` field.
|
||||
save_err_return_trace_index,
|
||||
|
||||
/// Store an element to a vector pointer at an index.
|
||||
/// Uses the `vector_store_elem` field.
|
||||
vector_store_elem,
|
||||
|
||||
/// Compute a pointer to a `Nav` at runtime, always one of:
|
||||
///
|
||||
/// * `threadlocal var`
|
||||
@@ -919,6 +915,26 @@ pub const Inst = struct {
|
||||
/// Operand is unused and set to Ref.none
|
||||
work_group_id,
|
||||
|
||||
// The remaining instructions are not emitted by Sema. They are only emitted by `Legalize`,
|
||||
// depending on the enabled features. As such, backends can consider them `unreachable` if
|
||||
// they do not enable the relevant legalizations.
|
||||
|
||||
/// Given a pointer to a vector, a runtime-known index, and a scalar value, store the value
|
||||
/// into the vector at the given index. Zig does not support this operation, but `Legalize`
|
||||
/// may emit it when scalarizing vector operations.
|
||||
///
|
||||
/// Uses the `pl_op` field with payload `Bin`. `operand` is the vector pointer. `lhs` is the
|
||||
/// element index of type `usize`. `rhs` is the element value. Result is always void.
|
||||
legalize_vec_store_elem,
|
||||
/// Given a vector value and a runtime-known index, return the element value at that index.
|
||||
/// This instruction is similar to `array_elem_val`; the only difference is that the index
|
||||
/// here is runtime-known, which is usually not allowed for vectors. `Legalize` may emit
|
||||
/// this instruction when scalarizing vector operations.
|
||||
///
|
||||
/// Uses the `bin_op` field. `lhs` is the vector pointer. `rhs` is the element index. Result
|
||||
/// type is the vector element type.
|
||||
legalize_vec_elem_val,
|
||||
|
||||
pub fn fromCmpOp(op: std.math.CompareOperator, optimized: bool) Tag {
|
||||
switch (op) {
|
||||
.lt => return if (optimized) .cmp_lt_optimized else .cmp_lt,
|
||||
@@ -1220,11 +1236,6 @@ pub const Inst = struct {
|
||||
operand: Ref,
|
||||
operation: std.builtin.ReduceOp,
|
||||
},
|
||||
vector_store_elem: struct {
|
||||
vector_ptr: Ref,
|
||||
// Index into a different array.
|
||||
payload: u32,
|
||||
},
|
||||
ty_nav: struct {
|
||||
ty: InternPool.Index,
|
||||
nav: InternPool.Nav.Index,
|
||||
@@ -1689,8 +1700,8 @@ pub fn typeOfIndex(air: *const Air, inst: Air.Inst.Index, ip: *const InternPool)
|
||||
.set_union_tag,
|
||||
.prefetch,
|
||||
.set_err_return_trace,
|
||||
.vector_store_elem,
|
||||
.c_va_end,
|
||||
.legalize_vec_store_elem,
|
||||
=> return .void,
|
||||
|
||||
.slice_len,
|
||||
@@ -1709,7 +1720,7 @@ pub fn typeOfIndex(air: *const Air, inst: Air.Inst.Index, ip: *const InternPool)
|
||||
return .fromInterned(ip.funcTypeReturnType(callee_ty.toIntern()));
|
||||
},
|
||||
|
||||
.slice_elem_val, .ptr_elem_val, .array_elem_val => {
|
||||
.slice_elem_val, .ptr_elem_val, .array_elem_val, .legalize_vec_elem_val => {
|
||||
const ptr_ty = air.typeOf(datas[@intFromEnum(inst)].bin_op.lhs, ip);
|
||||
return ptr_ty.childTypeIp(ip);
|
||||
},
|
||||
@@ -1857,7 +1868,6 @@ pub fn mustLower(air: Air, inst: Air.Inst.Index, ip: *const InternPool) bool {
|
||||
.prefetch,
|
||||
.wasm_memory_grow,
|
||||
.set_err_return_trace,
|
||||
.vector_store_elem,
|
||||
.c_va_arg,
|
||||
.c_va_copy,
|
||||
.c_va_end,
|
||||
@@ -1868,6 +1878,7 @@ pub fn mustLower(air: Air, inst: Air.Inst.Index, ip: *const InternPool) bool {
|
||||
.intcast_safe,
|
||||
.int_from_float_safe,
|
||||
.int_from_float_optimized_safe,
|
||||
.legalize_vec_store_elem,
|
||||
=> true,
|
||||
|
||||
.add,
|
||||
@@ -2013,6 +2024,7 @@ pub fn mustLower(air: Air, inst: Air.Inst.Index, ip: *const InternPool) bool {
|
||||
.work_item_id,
|
||||
.work_group_size,
|
||||
.work_group_id,
|
||||
.legalize_vec_elem_val,
|
||||
=> false,
|
||||
|
||||
.is_non_null_ptr, .is_null_ptr, .is_non_err_ptr, .is_err_ptr => air.typeOf(data.un_op, ip).isVolatilePtrIp(ip),
|
||||
|
||||
+1016
-1389
@@ -14,7 +14,7 @@ features: if (switch (dev.env) {
|
||||
return comptime bootstrap_features.contains(feature);
|
||||
}
|
||||
/// `inline` to propagate comptime-known result.
|
||||
fn hasAny(_: @This(), comptime features: []const Feature) bool {
|
||||
inline fn hasAny(_: @This(), comptime features: []const Feature) bool {
|
||||
return comptime !bootstrap_features.intersectWith(.initMany(features)).eql(.initEmpty());
|
||||
}
|
||||
} else struct {
|
||||
@@ -154,9 +154,9 @@ pub const Feature = enum {
|
||||
/// Currently assumes little endian and a specific integer layout where the lsb of every integer is the lsb of the
|
||||
/// first byte of memory until bit pointers know their backing type.
|
||||
expand_packed_store,
|
||||
/// Replace `struct_field_val` of a packed field with a `store` and packed `load`.
|
||||
/// Replace `struct_field_val` of a packed field with a `bitcast` to integer, `shr`, `trunc`, and `bitcast` to field type.
|
||||
expand_packed_struct_field_val,
|
||||
/// Replace `aggregate_init` of a packed aggregate with a series a packed `store`s followed by a `load`.
|
||||
/// Replace `aggregate_init` of a packed struct with a sequence of `shl_exact`, `bitcast`, `intcast`, and `bit_or`.
|
||||
expand_packed_aggregate_init,
|
||||
|
||||
fn scalarize(tag: Air.Inst.Tag) Feature {
|
||||
@@ -320,28 +320,36 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void {
|
||||
.xor,
|
||||
=> |air_tag| if (l.features.has(comptime .scalarize(air_tag))) {
|
||||
const bin_op = l.air_instructions.items(.data)[@intFromEnum(inst)].bin_op;
|
||||
if (l.typeOf(bin_op.lhs).isVector(zcu)) continue :inst try l.scalarize(inst, .bin_op);
|
||||
if (l.typeOf(bin_op.lhs).isVector(zcu)) {
|
||||
continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .bin_op));
|
||||
}
|
||||
},
|
||||
.add_safe => if (l.features.has(.expand_add_safe)) {
|
||||
assert(!l.features.has(.scalarize_add_safe)); // it doesn't make sense to do both
|
||||
continue :inst l.replaceInst(inst, .block, try l.safeArithmeticBlockPayload(inst, .add_with_overflow));
|
||||
} else if (l.features.has(.scalarize_add_safe)) {
|
||||
const bin_op = l.air_instructions.items(.data)[@intFromEnum(inst)].bin_op;
|
||||
if (l.typeOf(bin_op.lhs).isVector(zcu)) continue :inst try l.scalarize(inst, .bin_op);
|
||||
if (l.typeOf(bin_op.lhs).isVector(zcu)) {
|
||||
continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .bin_op));
|
||||
}
|
||||
},
|
||||
.sub_safe => if (l.features.has(.expand_sub_safe)) {
|
||||
assert(!l.features.has(.scalarize_sub_safe)); // it doesn't make sense to do both
|
||||
continue :inst l.replaceInst(inst, .block, try l.safeArithmeticBlockPayload(inst, .sub_with_overflow));
|
||||
} else if (l.features.has(.scalarize_sub_safe)) {
|
||||
const bin_op = l.air_instructions.items(.data)[@intFromEnum(inst)].bin_op;
|
||||
if (l.typeOf(bin_op.lhs).isVector(zcu)) continue :inst try l.scalarize(inst, .bin_op);
|
||||
if (l.typeOf(bin_op.lhs).isVector(zcu)) {
|
||||
continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .bin_op));
|
||||
}
|
||||
},
|
||||
.mul_safe => if (l.features.has(.expand_mul_safe)) {
|
||||
assert(!l.features.has(.scalarize_mul_safe)); // it doesn't make sense to do both
|
||||
continue :inst l.replaceInst(inst, .block, try l.safeArithmeticBlockPayload(inst, .mul_with_overflow));
|
||||
} else if (l.features.has(.scalarize_mul_safe)) {
|
||||
const bin_op = l.air_instructions.items(.data)[@intFromEnum(inst)].bin_op;
|
||||
if (l.typeOf(bin_op.lhs).isVector(zcu)) continue :inst try l.scalarize(inst, .bin_op);
|
||||
if (l.typeOf(bin_op.lhs).isVector(zcu)) {
|
||||
continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .bin_op));
|
||||
}
|
||||
},
|
||||
.ptr_add, .ptr_sub => {},
|
||||
inline .add_with_overflow,
|
||||
@@ -350,7 +358,9 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void {
|
||||
.shl_with_overflow,
|
||||
=> |air_tag| if (l.features.has(comptime .scalarize(air_tag))) {
|
||||
const ty_pl = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_pl;
|
||||
if (ty_pl.ty.toType().fieldType(0, zcu).isVector(zcu)) continue :inst l.replaceInst(inst, .block, try l.scalarizeOverflowBlockPayload(inst));
|
||||
if (ty_pl.ty.toType().fieldType(0, zcu).isVector(zcu)) {
|
||||
continue :inst l.replaceInst(inst, .block, try l.scalarizeOverflowBlockPayload(inst));
|
||||
}
|
||||
},
|
||||
.alloc => {},
|
||||
.inferred_alloc, .inferred_alloc_comptime => unreachable,
|
||||
@@ -387,7 +397,9 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void {
|
||||
}
|
||||
}
|
||||
}
|
||||
if (l.features.has(comptime .scalarize(air_tag))) continue :inst try l.scalarize(inst, .bin_op);
|
||||
if (l.features.has(comptime .scalarize(air_tag))) {
|
||||
continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .bin_op));
|
||||
}
|
||||
}
|
||||
},
|
||||
inline .not,
|
||||
@@ -406,64 +418,41 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void {
|
||||
.float_from_int,
|
||||
=> |air_tag| if (l.features.has(comptime .scalarize(air_tag))) {
|
||||
const ty_op = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_op;
|
||||
if (ty_op.ty.toType().isVector(zcu)) continue :inst try l.scalarize(inst, .ty_op);
|
||||
if (ty_op.ty.toType().isVector(zcu)) {
|
||||
continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .ty_op));
|
||||
}
|
||||
},
|
||||
.bitcast => if (l.features.has(.scalarize_bitcast)) {
|
||||
const ty_op = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_op;
|
||||
|
||||
const to_ty = ty_op.ty.toType();
|
||||
const to_ty_tag = to_ty.zigTypeTag(zcu);
|
||||
const to_ty_legal = legal: switch (to_ty_tag) {
|
||||
else => true,
|
||||
.array, .vector => {
|
||||
if (to_ty.arrayLen(zcu) == 1) break :legal true;
|
||||
const to_elem_ty = to_ty.childType(zcu);
|
||||
break :legal to_elem_ty.bitSize(zcu) == 8 * to_elem_ty.abiSize(zcu);
|
||||
},
|
||||
};
|
||||
|
||||
const from_ty = l.typeOf(ty_op.operand);
|
||||
const from_ty_legal = legal: switch (from_ty.zigTypeTag(zcu)) {
|
||||
else => true,
|
||||
.array, .vector => {
|
||||
if (from_ty.arrayLen(zcu) == 1) break :legal true;
|
||||
const from_elem_ty = from_ty.childType(zcu);
|
||||
break :legal from_elem_ty.bitSize(zcu) == 8 * from_elem_ty.abiSize(zcu);
|
||||
},
|
||||
};
|
||||
|
||||
if (!to_ty_legal and !from_ty_legal and to_ty.arrayLen(zcu) == from_ty.arrayLen(zcu)) switch (to_ty_tag) {
|
||||
else => unreachable,
|
||||
.array => continue :inst l.replaceInst(inst, .block, try l.scalarizeBitcastToArrayBlockPayload(inst)),
|
||||
.vector => continue :inst try l.scalarize(inst, .bitcast),
|
||||
};
|
||||
if (!to_ty_legal) switch (to_ty_tag) {
|
||||
else => unreachable,
|
||||
.array => continue :inst l.replaceInst(inst, .block, try l.scalarizeBitcastResultArrayBlockPayload(inst)),
|
||||
.vector => continue :inst l.replaceInst(inst, .block, try l.scalarizeBitcastResultVectorBlockPayload(inst)),
|
||||
};
|
||||
if (!from_ty_legal) continue :inst l.replaceInst(inst, .block, try l.scalarizeBitcastOperandBlockPayload(inst));
|
||||
if (try l.scalarizeBitcastBlockPayload(inst)) |payload| {
|
||||
continue :inst l.replaceInst(inst, .block, payload);
|
||||
}
|
||||
},
|
||||
.intcast_safe => if (l.features.has(.expand_intcast_safe)) {
|
||||
assert(!l.features.has(.scalarize_intcast_safe)); // it doesn't make sense to do both
|
||||
continue :inst l.replaceInst(inst, .block, try l.safeIntcastBlockPayload(inst));
|
||||
} else if (l.features.has(.scalarize_intcast_safe)) {
|
||||
const ty_op = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_op;
|
||||
if (ty_op.ty.toType().isVector(zcu)) continue :inst try l.scalarize(inst, .ty_op);
|
||||
if (ty_op.ty.toType().isVector(zcu)) {
|
||||
continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .ty_op));
|
||||
}
|
||||
},
|
||||
.int_from_float_safe => if (l.features.has(.expand_int_from_float_safe)) {
|
||||
assert(!l.features.has(.scalarize_int_from_float_safe));
|
||||
continue :inst l.replaceInst(inst, .block, try l.safeIntFromFloatBlockPayload(inst, false));
|
||||
} else if (l.features.has(.scalarize_int_from_float_safe)) {
|
||||
const ty_op = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_op;
|
||||
if (ty_op.ty.toType().isVector(zcu)) continue :inst try l.scalarize(inst, .ty_op);
|
||||
if (ty_op.ty.toType().isVector(zcu)) {
|
||||
continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .ty_op));
|
||||
}
|
||||
},
|
||||
.int_from_float_optimized_safe => if (l.features.has(.expand_int_from_float_optimized_safe)) {
|
||||
assert(!l.features.has(.scalarize_int_from_float_optimized_safe));
|
||||
continue :inst l.replaceInst(inst, .block, try l.safeIntFromFloatBlockPayload(inst, true));
|
||||
} else if (l.features.has(.scalarize_int_from_float_optimized_safe)) {
|
||||
const ty_op = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_op;
|
||||
if (ty_op.ty.toType().isVector(zcu)) continue :inst try l.scalarize(inst, .ty_op);
|
||||
if (ty_op.ty.toType().isVector(zcu)) {
|
||||
continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .ty_op));
|
||||
}
|
||||
},
|
||||
.block, .loop => {
|
||||
const ty_pl = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_pl;
|
||||
@@ -498,7 +487,9 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void {
|
||||
.neg_optimized,
|
||||
=> |air_tag| if (l.features.has(comptime .scalarize(air_tag))) {
|
||||
const un_op = l.air_instructions.items(.data)[@intFromEnum(inst)].un_op;
|
||||
if (l.typeOf(un_op).isVector(zcu)) continue :inst try l.scalarize(inst, .un_op);
|
||||
if (l.typeOf(un_op).isVector(zcu)) {
|
||||
continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .un_op));
|
||||
}
|
||||
},
|
||||
.cmp_lt,
|
||||
.cmp_lt_optimized,
|
||||
@@ -515,7 +506,9 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void {
|
||||
=> {},
|
||||
inline .cmp_vector, .cmp_vector_optimized => |air_tag| if (l.features.has(comptime .scalarize(air_tag))) {
|
||||
const ty_pl = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_pl;
|
||||
if (ty_pl.ty.toType().isVector(zcu)) continue :inst try l.scalarize(inst, .cmp_vector);
|
||||
if (ty_pl.ty.toType().isVector(zcu)) {
|
||||
continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .cmp_vector));
|
||||
}
|
||||
},
|
||||
.cond_br => {
|
||||
const pl_op = l.air_instructions.items(.data)[@intFromEnum(inst)].pl_op;
|
||||
@@ -570,13 +563,17 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void {
|
||||
.load => if (l.features.has(.expand_packed_load)) {
|
||||
const ty_op = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_op;
|
||||
const ptr_info = l.typeOf(ty_op.operand).ptrInfo(zcu);
|
||||
if (ptr_info.packed_offset.host_size > 0 and ptr_info.flags.vector_index == .none) continue :inst l.replaceInst(inst, .block, try l.packedLoadBlockPayload(inst));
|
||||
if (ptr_info.packed_offset.host_size > 0 and ptr_info.flags.vector_index == .none) {
|
||||
continue :inst l.replaceInst(inst, .block, try l.packedLoadBlockPayload(inst));
|
||||
}
|
||||
},
|
||||
.ret, .ret_safe, .ret_load => {},
|
||||
.store, .store_safe => if (l.features.has(.expand_packed_store)) {
|
||||
const bin_op = l.air_instructions.items(.data)[@intFromEnum(inst)].bin_op;
|
||||
const ptr_info = l.typeOf(bin_op.lhs).ptrInfo(zcu);
|
||||
if (ptr_info.packed_offset.host_size > 0 and ptr_info.flags.vector_index == .none) continue :inst l.replaceInst(inst, .block, try l.packedStoreBlockPayload(inst));
|
||||
if (ptr_info.packed_offset.host_size > 0 and ptr_info.flags.vector_index == .none) {
|
||||
continue :inst l.replaceInst(inst, .block, try l.packedStoreBlockPayload(inst));
|
||||
}
|
||||
},
|
||||
.unreach,
|
||||
.optional_payload,
|
||||
@@ -624,7 +621,7 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void {
|
||||
switch (vector_ty.vectorLen(zcu)) {
|
||||
0 => unreachable,
|
||||
1 => continue :inst l.replaceInst(inst, .bitcast, .{ .ty_op = .{
|
||||
.ty = Air.internedToRef(vector_ty.childType(zcu).toIntern()),
|
||||
.ty = .fromType(vector_ty.childType(zcu)),
|
||||
.operand = reduce.operand,
|
||||
} }),
|
||||
else => {},
|
||||
@@ -641,9 +638,15 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void {
|
||||
else => {},
|
||||
}
|
||||
},
|
||||
.shuffle_one => if (l.features.has(.scalarize_shuffle_one)) continue :inst try l.scalarize(inst, .shuffle_one),
|
||||
.shuffle_two => if (l.features.has(.scalarize_shuffle_two)) continue :inst try l.scalarize(inst, .shuffle_two),
|
||||
.select => if (l.features.has(.scalarize_select)) continue :inst try l.scalarize(inst, .select),
|
||||
.shuffle_one => if (l.features.has(.scalarize_shuffle_one)) {
|
||||
continue :inst l.replaceInst(inst, .block, try l.scalarizeShuffleOneBlockPayload(inst));
|
||||
},
|
||||
.shuffle_two => if (l.features.has(.scalarize_shuffle_two)) {
|
||||
continue :inst l.replaceInst(inst, .block, try l.scalarizeShuffleTwoBlockPayload(inst));
|
||||
},
|
||||
.select => if (l.features.has(.scalarize_select)) {
|
||||
continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .select));
|
||||
},
|
||||
.memset,
|
||||
.memset_safe,
|
||||
.memcpy,
|
||||
@@ -666,16 +669,27 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void {
|
||||
const agg_ty = ty_pl.ty.toType();
|
||||
switch (agg_ty.zigTypeTag(zcu)) {
|
||||
else => {},
|
||||
.@"struct", .@"union" => switch (agg_ty.containerLayout(zcu)) {
|
||||
.@"union" => unreachable,
|
||||
.@"struct" => switch (agg_ty.containerLayout(zcu)) {
|
||||
.auto, .@"extern" => {},
|
||||
.@"packed" => continue :inst l.replaceInst(inst, .block, try l.packedAggregateInitBlockPayload(inst)),
|
||||
.@"packed" => switch (agg_ty.structFieldCount(zcu)) {
|
||||
0 => unreachable,
|
||||
// An `aggregate_init` of a packed struct with 1 field is just a fancy bitcast.
|
||||
1 => continue :inst l.replaceInst(inst, .bitcast, .{ .ty_op = .{
|
||||
.ty = .fromType(agg_ty),
|
||||
.operand = @enumFromInt(l.air_extra.items[ty_pl.payload]),
|
||||
} }),
|
||||
else => continue :inst l.replaceInst(inst, .block, try l.packedAggregateInitBlockPayload(inst)),
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
.union_init, .prefetch => {},
|
||||
.mul_add => if (l.features.has(.scalarize_mul_add)) {
|
||||
const pl_op = l.air_instructions.items(.data)[@intFromEnum(inst)].pl_op;
|
||||
if (l.typeOf(pl_op.operand).isVector(zcu)) continue :inst try l.scalarize(inst, .pl_op_bin);
|
||||
if (l.typeOf(pl_op.operand).isVector(zcu)) {
|
||||
continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .pl_op_bin));
|
||||
}
|
||||
},
|
||||
.field_parent_ptr,
|
||||
.wasm_memory_size,
|
||||
@@ -685,7 +699,6 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void {
|
||||
.set_err_return_trace,
|
||||
.addrspace_cast,
|
||||
.save_err_return_trace_index,
|
||||
.vector_store_elem,
|
||||
.runtime_nav_ptr,
|
||||
.c_va_arg,
|
||||
.c_va_copy,
|
||||
@@ -694,1003 +707,757 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void {
|
||||
.work_item_id,
|
||||
.work_group_size,
|
||||
.work_group_id,
|
||||
.legalize_vec_elem_val,
|
||||
.legalize_vec_store_elem,
|
||||
=> {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const ScalarizeForm = enum { un_op, ty_op, bin_op, pl_op_bin, bitcast, cmp_vector, shuffle_one, shuffle_two, select };
|
||||
/// inline to propagate comptime-known `replaceInst` result.
|
||||
inline fn scalarize(l: *Legalize, orig_inst: Air.Inst.Index, comptime form: ScalarizeForm) Error!Air.Inst.Tag {
|
||||
return l.replaceInst(orig_inst, .block, try l.scalarizeBlockPayload(orig_inst, form));
|
||||
}
|
||||
fn scalarizeBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, comptime form: ScalarizeForm) Error!Air.Inst.Data {
|
||||
const ScalarizeForm = enum { un_op, ty_op, bin_op, pl_op_bin, cmp_vector, select };
|
||||
fn scalarizeBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, form: ScalarizeForm) Error!Air.Inst.Data {
|
||||
const pt = l.pt;
|
||||
const zcu = pt.zcu;
|
||||
|
||||
const orig = l.air_instructions.get(@intFromEnum(orig_inst));
|
||||
const res_ty = l.typeOfIndex(orig_inst);
|
||||
const res_len = res_ty.vectorLen(zcu);
|
||||
|
||||
const extra_insts = switch (form) {
|
||||
.un_op, .ty_op, .bitcast => 1,
|
||||
.bin_op, .cmp_vector => 2,
|
||||
.pl_op_bin => 3,
|
||||
.shuffle_one, .shuffle_two => 13,
|
||||
.select => 6,
|
||||
const result_is_array = switch (res_ty.zigTypeTag(zcu)) {
|
||||
.vector => false,
|
||||
.array => true,
|
||||
else => unreachable,
|
||||
};
|
||||
var inst_buf: [5 + extra_insts + 9]Air.Inst.Index = undefined;
|
||||
try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len);
|
||||
|
||||
var res_block: Block = .init(&inst_buf);
|
||||
{
|
||||
const res_alloc_inst = res_block.add(l, .{
|
||||
.tag = .alloc,
|
||||
.data = .{ .ty = try pt.singleMutPtrType(res_ty) },
|
||||
});
|
||||
const index_alloc_inst = res_block.add(l, .{
|
||||
.tag = .alloc,
|
||||
.data = .{ .ty = .ptr_usize },
|
||||
});
|
||||
_ = res_block.add(l, .{
|
||||
.tag = .store,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = index_alloc_inst.toRef(),
|
||||
.rhs = .zero_usize,
|
||||
} },
|
||||
});
|
||||
|
||||
var loop: Loop = .init(l, &res_block);
|
||||
loop.block = .init(res_block.stealRemainingCapacity());
|
||||
{
|
||||
const cur_index_inst = loop.block.add(l, .{
|
||||
.tag = .load,
|
||||
.data = .{ .ty_op = .{
|
||||
.ty = .usize_type,
|
||||
.operand = index_alloc_inst.toRef(),
|
||||
} },
|
||||
});
|
||||
_ = loop.block.add(l, .{
|
||||
.tag = .vector_store_elem,
|
||||
.data = .{ .vector_store_elem = .{
|
||||
.vector_ptr = res_alloc_inst.toRef(),
|
||||
.payload = try l.addExtra(Air.Bin, .{
|
||||
.lhs = cur_index_inst.toRef(),
|
||||
.rhs = res_elem: switch (form) {
|
||||
.un_op => loop.block.add(l, .{
|
||||
.tag = orig.tag,
|
||||
.data = .{ .un_op = loop.block.add(l, .{
|
||||
.tag = .array_elem_val,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = orig.data.un_op,
|
||||
.rhs = cur_index_inst.toRef(),
|
||||
} },
|
||||
}).toRef() },
|
||||
}).toRef(),
|
||||
.ty_op => loop.block.add(l, .{
|
||||
.tag = orig.tag,
|
||||
.data = .{ .ty_op = .{
|
||||
.ty = Air.internedToRef(res_ty.childType(zcu).toIntern()),
|
||||
.operand = loop.block.add(l, .{
|
||||
.tag = .array_elem_val,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = orig.data.ty_op.operand,
|
||||
.rhs = cur_index_inst.toRef(),
|
||||
} },
|
||||
}).toRef(),
|
||||
} },
|
||||
}).toRef(),
|
||||
.bin_op => loop.block.add(l, .{
|
||||
.tag = orig.tag,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = loop.block.add(l, .{
|
||||
.tag = .array_elem_val,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = orig.data.bin_op.lhs,
|
||||
.rhs = cur_index_inst.toRef(),
|
||||
} },
|
||||
}).toRef(),
|
||||
.rhs = loop.block.add(l, .{
|
||||
.tag = .array_elem_val,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = orig.data.bin_op.rhs,
|
||||
.rhs = cur_index_inst.toRef(),
|
||||
} },
|
||||
}).toRef(),
|
||||
} },
|
||||
}).toRef(),
|
||||
.pl_op_bin => {
|
||||
const extra = l.extraData(Air.Bin, orig.data.pl_op.payload).data;
|
||||
break :res_elem loop.block.add(l, .{
|
||||
.tag = orig.tag,
|
||||
.data = .{ .pl_op = .{
|
||||
.payload = try l.addExtra(Air.Bin, .{
|
||||
.lhs = loop.block.add(l, .{
|
||||
.tag = .array_elem_val,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = extra.lhs,
|
||||
.rhs = cur_index_inst.toRef(),
|
||||
} },
|
||||
}).toRef(),
|
||||
.rhs = loop.block.add(l, .{
|
||||
.tag = .array_elem_val,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = extra.rhs,
|
||||
.rhs = cur_index_inst.toRef(),
|
||||
} },
|
||||
}).toRef(),
|
||||
}),
|
||||
.operand = loop.block.add(l, .{
|
||||
.tag = .array_elem_val,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = orig.data.pl_op.operand,
|
||||
.rhs = cur_index_inst.toRef(),
|
||||
} },
|
||||
}).toRef(),
|
||||
} },
|
||||
}).toRef();
|
||||
},
|
||||
.bitcast => loop.block.addBitCast(l, res_ty.childType(zcu), loop.block.add(l, .{
|
||||
.tag = .array_elem_val,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = orig.data.ty_op.operand,
|
||||
.rhs = cur_index_inst.toRef(),
|
||||
} },
|
||||
}).toRef()),
|
||||
.cmp_vector => {
|
||||
const extra = l.extraData(Air.VectorCmp, orig.data.ty_pl.payload).data;
|
||||
break :res_elem (try loop.block.addCmp(
|
||||
l,
|
||||
extra.compareOperator(),
|
||||
loop.block.add(l, .{
|
||||
.tag = .array_elem_val,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = extra.lhs,
|
||||
.rhs = cur_index_inst.toRef(),
|
||||
} },
|
||||
}).toRef(),
|
||||
loop.block.add(l, .{
|
||||
.tag = .array_elem_val,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = extra.rhs,
|
||||
.rhs = cur_index_inst.toRef(),
|
||||
} },
|
||||
}).toRef(),
|
||||
.{ .optimized = switch (orig.tag) {
|
||||
else => unreachable,
|
||||
.cmp_vector => false,
|
||||
.cmp_vector_optimized => true,
|
||||
} },
|
||||
)).toRef();
|
||||
},
|
||||
.shuffle_one, .shuffle_two => {
|
||||
const ip = &zcu.intern_pool;
|
||||
const unwrapped = switch (form) {
|
||||
else => comptime unreachable,
|
||||
.shuffle_one => l.getTmpAir().unwrapShuffleOne(zcu, orig_inst),
|
||||
.shuffle_two => l.getTmpAir().unwrapShuffleTwo(zcu, orig_inst),
|
||||
};
|
||||
const operand_a = switch (form) {
|
||||
else => comptime unreachable,
|
||||
.shuffle_one => unwrapped.operand,
|
||||
.shuffle_two => unwrapped.operand_a,
|
||||
};
|
||||
const operand_a_len = l.typeOf(operand_a).vectorLen(zcu);
|
||||
const elem_ty = res_ty.childType(zcu);
|
||||
var res_elem: Result = .init(l, elem_ty, &loop.block);
|
||||
res_elem.block = .init(loop.block.stealCapacity(extra_insts));
|
||||
{
|
||||
const ExpectedContents = extern struct {
|
||||
mask_elems: [128]InternPool.Index,
|
||||
ct_elems: switch (form) {
|
||||
else => unreachable,
|
||||
.shuffle_one => extern struct {
|
||||
keys: [152]InternPool.Index,
|
||||
header: u8 align(@alignOf(u32)),
|
||||
index: [256][2]u8,
|
||||
},
|
||||
.shuffle_two => void,
|
||||
},
|
||||
};
|
||||
var stack align(@max(@alignOf(ExpectedContents), @alignOf(std.heap.StackFallbackAllocator(0)))) =
|
||||
std.heap.stackFallback(@sizeOf(ExpectedContents), zcu.gpa);
|
||||
const gpa = stack.get();
|
||||
|
||||
const mask_elems = try gpa.alloc(InternPool.Index, res_len);
|
||||
defer gpa.free(mask_elems);
|
||||
|
||||
var ct_elems: switch (form) {
|
||||
else => unreachable,
|
||||
.shuffle_one => std.AutoArrayHashMapUnmanaged(InternPool.Index, void),
|
||||
.shuffle_two => struct {
|
||||
const empty: @This() = .{};
|
||||
inline fn deinit(_: @This(), _: std.mem.Allocator) void {}
|
||||
inline fn ensureTotalCapacity(_: @This(), _: std.mem.Allocator, _: usize) error{}!void {}
|
||||
},
|
||||
} = .empty;
|
||||
defer ct_elems.deinit(gpa);
|
||||
try ct_elems.ensureTotalCapacity(gpa, res_len);
|
||||
|
||||
const mask_elem_ty = try pt.intType(.signed, 1 + Type.smallestUnsignedBits(@max(operand_a_len, switch (form) {
|
||||
else => comptime unreachable,
|
||||
.shuffle_one => res_len,
|
||||
.shuffle_two => l.typeOf(unwrapped.operand_b).vectorLen(zcu),
|
||||
})));
|
||||
for (mask_elems, unwrapped.mask) |*mask_elem_val, mask_elem| mask_elem_val.* = (try pt.intValue(mask_elem_ty, switch (form) {
|
||||
else => comptime unreachable,
|
||||
.shuffle_one => switch (mask_elem.unwrap()) {
|
||||
.elem => |index| index,
|
||||
.value => |elem_val| if (ip.isUndef(elem_val))
|
||||
operand_a_len
|
||||
else
|
||||
~@as(i33, @intCast((ct_elems.getOrPutAssumeCapacity(elem_val)).index)),
|
||||
},
|
||||
.shuffle_two => switch (mask_elem.unwrap()) {
|
||||
.a_elem => |a_index| a_index,
|
||||
.b_elem => |b_index| ~@as(i33, b_index),
|
||||
.undef => operand_a_len,
|
||||
},
|
||||
})).toIntern();
|
||||
const mask_ty = try pt.arrayType(.{
|
||||
.len = res_len,
|
||||
.child = mask_elem_ty.toIntern(),
|
||||
});
|
||||
const mask_elem_inst = res_elem.block.add(l, .{
|
||||
.tag = .ptr_elem_val,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = Air.internedToRef(try pt.intern(.{ .ptr = .{
|
||||
.ty = (try pt.manyConstPtrType(mask_elem_ty)).toIntern(),
|
||||
.base_addr = .{ .uav = .{
|
||||
.val = (try pt.aggregateValue(mask_ty, mask_elems)).toIntern(),
|
||||
.orig_ty = (try pt.singleConstPtrType(mask_ty)).toIntern(),
|
||||
} },
|
||||
.byte_offset = 0,
|
||||
} })),
|
||||
.rhs = cur_index_inst.toRef(),
|
||||
} },
|
||||
});
|
||||
var def_cond_br: CondBr = .init(l, (try res_elem.block.addCmp(
|
||||
l,
|
||||
.lt,
|
||||
mask_elem_inst.toRef(),
|
||||
try pt.intRef(mask_elem_ty, operand_a_len),
|
||||
.{},
|
||||
)).toRef(), &res_elem.block, .{});
|
||||
def_cond_br.then_block = .init(res_elem.block.stealRemainingCapacity());
|
||||
{
|
||||
const operand_b_used = switch (form) {
|
||||
else => comptime unreachable,
|
||||
.shuffle_one => ct_elems.count() > 0,
|
||||
.shuffle_two => true,
|
||||
};
|
||||
var operand_cond_br: CondBr = undefined;
|
||||
operand_cond_br.then_block = if (operand_b_used) then_block: {
|
||||
operand_cond_br = .init(l, (try def_cond_br.then_block.addCmp(
|
||||
l,
|
||||
.gte,
|
||||
mask_elem_inst.toRef(),
|
||||
try pt.intRef(mask_elem_ty, 0),
|
||||
.{},
|
||||
)).toRef(), &def_cond_br.then_block, .{});
|
||||
break :then_block .init(def_cond_br.then_block.stealRemainingCapacity());
|
||||
} else def_cond_br.then_block;
|
||||
_ = operand_cond_br.then_block.add(l, .{
|
||||
.tag = .br,
|
||||
.data = .{ .br = .{
|
||||
.block_inst = res_elem.inst,
|
||||
.operand = operand_cond_br.then_block.add(l, .{
|
||||
.tag = .array_elem_val,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = operand_a,
|
||||
.rhs = operand_cond_br.then_block.add(l, .{
|
||||
.tag = .intcast,
|
||||
.data = .{ .ty_op = .{
|
||||
.ty = .usize_type,
|
||||
.operand = mask_elem_inst.toRef(),
|
||||
} },
|
||||
}).toRef(),
|
||||
} },
|
||||
}).toRef(),
|
||||
} },
|
||||
});
|
||||
if (operand_b_used) {
|
||||
operand_cond_br.else_block = .init(operand_cond_br.then_block.stealRemainingCapacity());
|
||||
_ = operand_cond_br.else_block.add(l, .{
|
||||
.tag = .br,
|
||||
.data = .{ .br = .{
|
||||
.block_inst = res_elem.inst,
|
||||
.operand = if (switch (form) {
|
||||
else => comptime unreachable,
|
||||
.shuffle_one => ct_elems.count() > 1,
|
||||
.shuffle_two => true,
|
||||
}) operand_cond_br.else_block.add(l, .{
|
||||
.tag = switch (form) {
|
||||
else => comptime unreachable,
|
||||
.shuffle_one => .ptr_elem_val,
|
||||
.shuffle_two => .array_elem_val,
|
||||
},
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = operand_b: switch (form) {
|
||||
else => comptime unreachable,
|
||||
.shuffle_one => {
|
||||
const ct_elems_ty = try pt.arrayType(.{
|
||||
.len = ct_elems.count(),
|
||||
.child = elem_ty.toIntern(),
|
||||
});
|
||||
break :operand_b Air.internedToRef(try pt.intern(.{ .ptr = .{
|
||||
.ty = (try pt.manyConstPtrType(elem_ty)).toIntern(),
|
||||
.base_addr = .{ .uav = .{
|
||||
.val = (try pt.aggregateValue(ct_elems_ty, ct_elems.keys())).toIntern(),
|
||||
.orig_ty = (try pt.singleConstPtrType(ct_elems_ty)).toIntern(),
|
||||
} },
|
||||
.byte_offset = 0,
|
||||
} }));
|
||||
},
|
||||
.shuffle_two => unwrapped.operand_b,
|
||||
},
|
||||
.rhs = operand_cond_br.else_block.add(l, .{
|
||||
.tag = .intcast,
|
||||
.data = .{ .ty_op = .{
|
||||
.ty = .usize_type,
|
||||
.operand = operand_cond_br.else_block.add(l, .{
|
||||
.tag = .not,
|
||||
.data = .{ .ty_op = .{
|
||||
.ty = Air.internedToRef(mask_elem_ty.toIntern()),
|
||||
.operand = mask_elem_inst.toRef(),
|
||||
} },
|
||||
}).toRef(),
|
||||
} },
|
||||
}).toRef(),
|
||||
} },
|
||||
}).toRef() else res_elem_br: {
|
||||
_ = operand_cond_br.else_block.stealCapacity(3);
|
||||
break :res_elem_br Air.internedToRef(ct_elems.keys()[0]);
|
||||
},
|
||||
} },
|
||||
});
|
||||
def_cond_br.else_block = .init(operand_cond_br.else_block.stealRemainingCapacity());
|
||||
try operand_cond_br.finish(l);
|
||||
} else {
|
||||
def_cond_br.then_block = operand_cond_br.then_block;
|
||||
_ = def_cond_br.then_block.stealCapacity(6);
|
||||
def_cond_br.else_block = .init(def_cond_br.then_block.stealRemainingCapacity());
|
||||
}
|
||||
}
|
||||
_ = def_cond_br.else_block.add(l, .{
|
||||
.tag = .br,
|
||||
.data = .{ .br = .{
|
||||
.block_inst = res_elem.inst,
|
||||
.operand = try pt.undefRef(elem_ty),
|
||||
} },
|
||||
});
|
||||
try def_cond_br.finish(l);
|
||||
}
|
||||
try res_elem.finish(l);
|
||||
break :res_elem res_elem.inst.toRef();
|
||||
},
|
||||
.select => {
|
||||
const extra = l.extraData(Air.Bin, orig.data.pl_op.payload).data;
|
||||
var res_elem: Result = .init(l, l.typeOf(extra.lhs).childType(zcu), &loop.block);
|
||||
res_elem.block = .init(loop.block.stealCapacity(extra_insts));
|
||||
{
|
||||
var select_cond_br: CondBr = .init(l, res_elem.block.add(l, .{
|
||||
.tag = .array_elem_val,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = orig.data.pl_op.operand,
|
||||
.rhs = cur_index_inst.toRef(),
|
||||
} },
|
||||
}).toRef(), &res_elem.block, .{});
|
||||
select_cond_br.then_block = .init(res_elem.block.stealRemainingCapacity());
|
||||
_ = select_cond_br.then_block.add(l, .{
|
||||
.tag = .br,
|
||||
.data = .{ .br = .{
|
||||
.block_inst = res_elem.inst,
|
||||
.operand = select_cond_br.then_block.add(l, .{
|
||||
.tag = .array_elem_val,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = extra.lhs,
|
||||
.rhs = cur_index_inst.toRef(),
|
||||
} },
|
||||
}).toRef(),
|
||||
} },
|
||||
});
|
||||
select_cond_br.else_block = .init(select_cond_br.then_block.stealRemainingCapacity());
|
||||
_ = select_cond_br.else_block.add(l, .{
|
||||
.tag = .br,
|
||||
.data = .{ .br = .{
|
||||
.block_inst = res_elem.inst,
|
||||
.operand = select_cond_br.else_block.add(l, .{
|
||||
.tag = .array_elem_val,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = extra.rhs,
|
||||
.rhs = cur_index_inst.toRef(),
|
||||
} },
|
||||
}).toRef(),
|
||||
} },
|
||||
});
|
||||
try select_cond_br.finish(l);
|
||||
}
|
||||
try res_elem.finish(l);
|
||||
break :res_elem res_elem.inst.toRef();
|
||||
},
|
||||
},
|
||||
}),
|
||||
} },
|
||||
});
|
||||
|
||||
var loop_cond_br: CondBr = .init(l, (try loop.block.addCmp(
|
||||
l,
|
||||
.lt,
|
||||
cur_index_inst.toRef(),
|
||||
try pt.intRef(.usize, res_len - 1),
|
||||
.{},
|
||||
)).toRef(), &loop.block, .{});
|
||||
loop_cond_br.then_block = .init(loop.block.stealRemainingCapacity());
|
||||
{
|
||||
_ = loop_cond_br.then_block.add(l, .{
|
||||
.tag = .store,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = index_alloc_inst.toRef(),
|
||||
.rhs = loop_cond_br.then_block.add(l, .{
|
||||
.tag = .add,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = cur_index_inst.toRef(),
|
||||
.rhs = .one_usize,
|
||||
} },
|
||||
}).toRef(),
|
||||
} },
|
||||
});
|
||||
_ = loop_cond_br.then_block.add(l, .{
|
||||
.tag = .repeat,
|
||||
.data = .{ .repeat = .{ .loop_inst = loop.inst } },
|
||||
});
|
||||
}
|
||||
loop_cond_br.else_block = .init(loop_cond_br.then_block.stealRemainingCapacity());
|
||||
_ = loop_cond_br.else_block.add(l, .{
|
||||
.tag = .br,
|
||||
.data = .{ .br = .{
|
||||
.block_inst = orig_inst,
|
||||
.operand = loop_cond_br.else_block.add(l, .{
|
||||
.tag = .load,
|
||||
.data = .{ .ty_op = .{
|
||||
.ty = Air.internedToRef(res_ty.toIntern()),
|
||||
.operand = res_alloc_inst.toRef(),
|
||||
} },
|
||||
}).toRef(),
|
||||
} },
|
||||
});
|
||||
try loop_cond_br.finish(l);
|
||||
}
|
||||
try loop.finish(l);
|
||||
}
|
||||
return .{ .ty_pl = .{
|
||||
.ty = Air.internedToRef(res_ty.toIntern()),
|
||||
.payload = try l.addBlockBody(res_block.body()),
|
||||
} };
|
||||
}
|
||||
fn scalarizeBitcastToArrayBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!Air.Inst.Data {
|
||||
const pt = l.pt;
|
||||
const zcu = pt.zcu;
|
||||
|
||||
const orig_ty_op = l.air_instructions.items(.data)[@intFromEnum(orig_inst)].ty_op;
|
||||
const res_ty = orig_ty_op.ty.toType();
|
||||
const res_elem_ty = res_ty.childType(zcu);
|
||||
const res_len = res_ty.arrayLen(zcu);
|
||||
|
||||
var inst_buf: [16]Air.Inst.Index = undefined;
|
||||
try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len);
|
||||
|
||||
var res_block: Block = .init(&inst_buf);
|
||||
{
|
||||
const res_alloc_inst = res_block.add(l, .{
|
||||
.tag = .alloc,
|
||||
.data = .{ .ty = try pt.singleMutPtrType(res_ty) },
|
||||
});
|
||||
const index_alloc_inst = res_block.add(l, .{
|
||||
.tag = .alloc,
|
||||
.data = .{ .ty = .ptr_usize },
|
||||
});
|
||||
_ = res_block.add(l, .{
|
||||
.tag = .store,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = index_alloc_inst.toRef(),
|
||||
.rhs = .zero_usize,
|
||||
} },
|
||||
});
|
||||
|
||||
var loop: Loop = .init(l, &res_block);
|
||||
loop.block = .init(res_block.stealRemainingCapacity());
|
||||
{
|
||||
const cur_index_inst = loop.block.add(l, .{
|
||||
.tag = .load,
|
||||
.data = .{ .ty_op = .{
|
||||
.ty = .usize_type,
|
||||
.operand = index_alloc_inst.toRef(),
|
||||
} },
|
||||
});
|
||||
_ = loop.block.add(l, .{
|
||||
.tag = .store,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = loop.block.add(l, .{
|
||||
.tag = .ptr_elem_ptr,
|
||||
.data = .{ .ty_pl = .{
|
||||
.ty = Air.internedToRef((try pt.singleMutPtrType(res_elem_ty)).toIntern()),
|
||||
.payload = try l.addExtra(Air.Bin, .{
|
||||
.lhs = res_alloc_inst.toRef(),
|
||||
.rhs = cur_index_inst.toRef(),
|
||||
}),
|
||||
} },
|
||||
}).toRef(),
|
||||
.rhs = loop.block.addBitCast(l, res_elem_ty, loop.block.add(l, .{
|
||||
.tag = .array_elem_val,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = orig_ty_op.operand,
|
||||
.rhs = cur_index_inst.toRef(),
|
||||
} },
|
||||
}).toRef()),
|
||||
} },
|
||||
});
|
||||
|
||||
var loop_cond_br: CondBr = .init(l, (try loop.block.addCmp(
|
||||
l,
|
||||
.lt,
|
||||
cur_index_inst.toRef(),
|
||||
try pt.intRef(.usize, res_len - 1),
|
||||
.{},
|
||||
)).toRef(), &loop.block, .{});
|
||||
loop_cond_br.then_block = .init(loop.block.stealRemainingCapacity());
|
||||
{
|
||||
_ = loop_cond_br.then_block.add(l, .{
|
||||
.tag = .store,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = index_alloc_inst.toRef(),
|
||||
.rhs = loop_cond_br.then_block.add(l, .{
|
||||
.tag = .add,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = cur_index_inst.toRef(),
|
||||
.rhs = .one_usize,
|
||||
} },
|
||||
}).toRef(),
|
||||
} },
|
||||
});
|
||||
_ = loop_cond_br.then_block.add(l, .{
|
||||
.tag = .repeat,
|
||||
.data = .{ .repeat = .{ .loop_inst = loop.inst } },
|
||||
});
|
||||
}
|
||||
loop_cond_br.else_block = .init(loop_cond_br.then_block.stealRemainingCapacity());
|
||||
_ = loop_cond_br.else_block.add(l, .{
|
||||
.tag = .br,
|
||||
.data = .{ .br = .{
|
||||
.block_inst = orig_inst,
|
||||
.operand = loop_cond_br.else_block.add(l, .{
|
||||
.tag = .load,
|
||||
.data = .{ .ty_op = .{
|
||||
.ty = Air.internedToRef(res_ty.toIntern()),
|
||||
.operand = res_alloc_inst.toRef(),
|
||||
} },
|
||||
}).toRef(),
|
||||
} },
|
||||
});
|
||||
try loop_cond_br.finish(l);
|
||||
}
|
||||
try loop.finish(l);
|
||||
}
|
||||
return .{ .ty_pl = .{
|
||||
.ty = Air.internedToRef(res_ty.toIntern()),
|
||||
.payload = try l.addBlockBody(res_block.body()),
|
||||
} };
|
||||
}
|
||||
fn scalarizeBitcastOperandBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!Air.Inst.Data {
|
||||
const pt = l.pt;
|
||||
const zcu = pt.zcu;
|
||||
|
||||
const orig_ty_op = l.air_instructions.items(.data)[@intFromEnum(orig_inst)].ty_op;
|
||||
const res_ty = orig_ty_op.ty.toType();
|
||||
const operand_ty = l.typeOf(orig_ty_op.operand);
|
||||
const int_bits: u16 = @intCast(operand_ty.bitSize(zcu));
|
||||
const int_ty = try pt.intType(.unsigned, int_bits);
|
||||
const shift_ty = try pt.intType(.unsigned, std.math.log2_int_ceil(u16, int_bits));
|
||||
const elem_bits: u16 = @intCast(operand_ty.childType(zcu).bitSize(zcu));
|
||||
const elem_int_ty = try pt.intType(.unsigned, elem_bits);
|
||||
|
||||
var inst_buf: [22]Air.Inst.Index = undefined;
|
||||
try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len);
|
||||
|
||||
var res_block: Block = .init(&inst_buf);
|
||||
{
|
||||
const int_alloc_inst = res_block.add(l, .{
|
||||
.tag = .alloc,
|
||||
.data = .{ .ty = try pt.singleMutPtrType(int_ty) },
|
||||
});
|
||||
_ = res_block.add(l, .{
|
||||
.tag = .store,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = int_alloc_inst.toRef(),
|
||||
.rhs = try pt.intRef(int_ty, 0),
|
||||
} },
|
||||
});
|
||||
const index_alloc_inst = res_block.add(l, .{
|
||||
.tag = .alloc,
|
||||
.data = .{ .ty = .ptr_usize },
|
||||
});
|
||||
_ = res_block.add(l, .{
|
||||
.tag = .store,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = index_alloc_inst.toRef(),
|
||||
.rhs = .zero_usize,
|
||||
} },
|
||||
});
|
||||
|
||||
var loop: Loop = .init(l, &res_block);
|
||||
loop.block = .init(res_block.stealRemainingCapacity());
|
||||
{
|
||||
const cur_index_inst = loop.block.add(l, .{
|
||||
.tag = .load,
|
||||
.data = .{ .ty_op = .{
|
||||
.ty = .usize_type,
|
||||
.operand = index_alloc_inst.toRef(),
|
||||
} },
|
||||
});
|
||||
const cur_int_inst = loop.block.add(l, .{
|
||||
.tag = .bit_or,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = loop.block.add(l, .{
|
||||
.tag = .shl_exact,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = loop.block.add(l, .{
|
||||
.tag = .intcast,
|
||||
.data = .{ .ty_op = .{
|
||||
.ty = Air.internedToRef(int_ty.toIntern()),
|
||||
.operand = loop.block.addBitCast(l, elem_int_ty, loop.block.add(l, .{
|
||||
.tag = .array_elem_val,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = orig_ty_op.operand,
|
||||
.rhs = cur_index_inst.toRef(),
|
||||
} },
|
||||
}).toRef()),
|
||||
} },
|
||||
}).toRef(),
|
||||
.rhs = loop.block.add(l, .{
|
||||
.tag = .mul,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = loop.block.add(l, .{
|
||||
.tag = .intcast,
|
||||
.data = .{ .ty_op = .{
|
||||
.ty = Air.internedToRef(shift_ty.toIntern()),
|
||||
.operand = cur_index_inst.toRef(),
|
||||
} },
|
||||
}).toRef(),
|
||||
.rhs = try pt.intRef(shift_ty, elem_bits),
|
||||
} },
|
||||
}).toRef(),
|
||||
} },
|
||||
}).toRef(),
|
||||
.rhs = loop.block.add(l, .{
|
||||
.tag = .load,
|
||||
.data = .{ .ty_op = .{
|
||||
.ty = Air.internedToRef(int_ty.toIntern()),
|
||||
.operand = int_alloc_inst.toRef(),
|
||||
} },
|
||||
}).toRef(),
|
||||
} },
|
||||
});
|
||||
|
||||
var loop_cond_br: CondBr = .init(l, (try loop.block.addCmp(
|
||||
l,
|
||||
.lt,
|
||||
cur_index_inst.toRef(),
|
||||
try pt.intRef(.usize, operand_ty.arrayLen(zcu) - 1),
|
||||
.{},
|
||||
)).toRef(), &loop.block, .{});
|
||||
loop_cond_br.then_block = .init(loop.block.stealRemainingCapacity());
|
||||
{
|
||||
_ = loop_cond_br.then_block.add(l, .{
|
||||
.tag = .store,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = int_alloc_inst.toRef(),
|
||||
.rhs = cur_int_inst.toRef(),
|
||||
} },
|
||||
});
|
||||
_ = loop_cond_br.then_block.add(l, .{
|
||||
.tag = .store,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = index_alloc_inst.toRef(),
|
||||
.rhs = loop_cond_br.then_block.add(l, .{
|
||||
.tag = .add,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = cur_index_inst.toRef(),
|
||||
.rhs = .one_usize,
|
||||
} },
|
||||
}).toRef(),
|
||||
} },
|
||||
});
|
||||
_ = loop_cond_br.then_block.add(l, .{
|
||||
.tag = .repeat,
|
||||
.data = .{ .repeat = .{ .loop_inst = loop.inst } },
|
||||
});
|
||||
}
|
||||
loop_cond_br.else_block = .init(loop_cond_br.then_block.stealRemainingCapacity());
|
||||
_ = loop_cond_br.else_block.add(l, .{
|
||||
.tag = .br,
|
||||
.data = .{ .br = .{
|
||||
.block_inst = orig_inst,
|
||||
.operand = loop_cond_br.else_block.addBitCast(l, res_ty, cur_int_inst.toRef()),
|
||||
} },
|
||||
});
|
||||
try loop_cond_br.finish(l);
|
||||
}
|
||||
try loop.finish(l);
|
||||
}
|
||||
return .{ .ty_pl = .{
|
||||
.ty = Air.internedToRef(res_ty.toIntern()),
|
||||
.payload = try l.addBlockBody(res_block.body()),
|
||||
} };
|
||||
}
|
||||
fn scalarizeBitcastResultArrayBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!Air.Inst.Data {
|
||||
const pt = l.pt;
|
||||
const zcu = pt.zcu;
|
||||
|
||||
const orig_ty_op = l.air_instructions.items(.data)[@intFromEnum(orig_inst)].ty_op;
|
||||
const res_ty = orig_ty_op.ty.toType();
|
||||
const int_bits: u16 = @intCast(res_ty.bitSize(zcu));
|
||||
const int_ty = try pt.intType(.unsigned, int_bits);
|
||||
const shift_ty = try pt.intType(.unsigned, std.math.log2_int_ceil(u16, int_bits));
|
||||
const res_elem_ty = res_ty.childType(zcu);
|
||||
const elem_bits: u16 = @intCast(res_elem_ty.bitSize(zcu));
|
||||
const elem_int_ty = try pt.intType(.unsigned, elem_bits);
|
||||
|
||||
var inst_buf: [20]Air.Inst.Index = undefined;
|
||||
if (result_is_array) {
|
||||
// This is only allowed when legalizing an elementwise bitcast.
|
||||
assert(orig.tag == .bitcast);
|
||||
assert(form == .ty_op);
|
||||
}
|
||||
|
||||
// Our output will be a loop doing elementwise stores:
|
||||
//
|
||||
// %1 = block(@Vector(N, Scalar), {
|
||||
// %2 = alloc(*usize)
|
||||
// %3 = alloc(*@Vector(N, Scalar))
|
||||
// %4 = store(%2, @zero_usize)
|
||||
// %5 = loop({
|
||||
// %6 = load(%2)
|
||||
// %7 = <scalar result of operation at index %5>
|
||||
// %8 = legalize_vec_store_elem(%3, %5, %6)
|
||||
// %9 = cmp_eq(%6, <usize, N-1>)
|
||||
// %10 = cond_br(%9, {
|
||||
// %11 = load(%3)
|
||||
// %12 = br(%1, %11)
|
||||
// }, {
|
||||
// %13 = add(%6, @one_usize)
|
||||
// %14 = store(%2, %13)
|
||||
// %15 = repeat(%5)
|
||||
// })
|
||||
// })
|
||||
// })
|
||||
//
|
||||
// If scalarizing an elementwise bitcast, the result might be an array, in which case
|
||||
// `legalize_vec_store_elem` becomes two instructions (`ptr_elem_ptr` and `store`).
|
||||
// Therefore, there are 13 or 14 instructions in the block, plus however many are
|
||||
// needed to compute each result element for `form`.
|
||||
const inst_per_form: usize = switch (form) {
|
||||
.un_op, .ty_op => 2,
|
||||
.bin_op, .cmp_vector => 3,
|
||||
.pl_op_bin => 4,
|
||||
.select => 7,
|
||||
};
|
||||
const max_inst_per_form = 7; // maximum value in the above switch
|
||||
var inst_buf: [14 + max_inst_per_form]Air.Inst.Index = undefined;
|
||||
|
||||
var main_block: Block = .init(&inst_buf);
|
||||
try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len);
|
||||
|
||||
var res_block: Block = .init(&inst_buf);
|
||||
{
|
||||
const res_alloc_inst = res_block.add(l, .{
|
||||
.tag = .alloc,
|
||||
.data = .{ .ty = try pt.singleMutPtrType(res_ty) },
|
||||
});
|
||||
const int_ref = res_block.addBitCast(l, int_ty, orig_ty_op.operand);
|
||||
const index_alloc_inst = res_block.add(l, .{
|
||||
.tag = .alloc,
|
||||
.data = .{ .ty = .ptr_usize },
|
||||
});
|
||||
_ = res_block.add(l, .{
|
||||
.tag = .store,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = index_alloc_inst.toRef(),
|
||||
.rhs = .zero_usize,
|
||||
const index_ptr = main_block.addTy(l, .alloc, .ptr_usize).toRef();
|
||||
const result_ptr = main_block.addTy(l, .alloc, try pt.singleMutPtrType(res_ty)).toRef();
|
||||
|
||||
_ = main_block.addBinOp(l, .store, index_ptr, .zero_usize);
|
||||
|
||||
var loop: Loop = .init(l, &main_block);
|
||||
loop.block = .init(main_block.stealRemainingCapacity());
|
||||
|
||||
const index_val = loop.block.addTyOp(l, .load, .usize, index_ptr).toRef();
|
||||
const elem_val: Air.Inst.Ref = switch (form) {
|
||||
.un_op => elem: {
|
||||
const orig_operand = orig.data.un_op;
|
||||
const operand = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_operand, index_val).toRef();
|
||||
break :elem loop.block.addUnOp(l, orig.tag, operand).toRef();
|
||||
},
|
||||
.ty_op => elem: {
|
||||
const orig_operand = orig.data.ty_op.operand;
|
||||
const operand_is_array = switch (l.typeOf(orig_operand).zigTypeTag(zcu)) {
|
||||
.vector => false,
|
||||
.array => true,
|
||||
else => unreachable,
|
||||
};
|
||||
const operand = loop.block.addBinOp(
|
||||
l,
|
||||
if (operand_is_array) .array_elem_val else .legalize_vec_elem_val,
|
||||
orig_operand,
|
||||
index_val,
|
||||
).toRef();
|
||||
break :elem loop.block.addTyOp(l, orig.tag, res_elem_ty, operand).toRef();
|
||||
},
|
||||
.bin_op => elem: {
|
||||
const orig_bin = orig.data.bin_op;
|
||||
const lhs = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_bin.lhs, index_val).toRef();
|
||||
const rhs = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_bin.rhs, index_val).toRef();
|
||||
break :elem loop.block.addBinOp(l, orig.tag, lhs, rhs).toRef();
|
||||
},
|
||||
.pl_op_bin => elem: {
|
||||
const orig_operand = orig.data.pl_op.operand;
|
||||
const orig_bin = l.extraData(Air.Bin, orig.data.pl_op.payload).data;
|
||||
const operand = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_operand, index_val).toRef();
|
||||
const lhs = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_bin.lhs, index_val).toRef();
|
||||
const rhs = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_bin.rhs, index_val).toRef();
|
||||
break :elem loop.block.add(l, .{
|
||||
.tag = orig.tag,
|
||||
.data = .{ .pl_op = .{
|
||||
.operand = operand,
|
||||
.payload = try l.addExtra(Air.Bin, .{ .lhs = lhs, .rhs = rhs }),
|
||||
} },
|
||||
}).toRef();
|
||||
},
|
||||
.cmp_vector => elem: {
|
||||
const orig_payload = l.extraData(Air.VectorCmp, orig.data.ty_pl.payload).data;
|
||||
const cmp_op = orig_payload.compareOperator();
|
||||
const optimized = switch (orig.tag) {
|
||||
.cmp_vector => false,
|
||||
.cmp_vector_optimized => true,
|
||||
else => unreachable,
|
||||
};
|
||||
const lhs = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_payload.lhs, index_val).toRef();
|
||||
const rhs = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_payload.rhs, index_val).toRef();
|
||||
break :elem loop.block.addCmpScalar(l, cmp_op, lhs, rhs, optimized).toRef();
|
||||
},
|
||||
.select => elem: {
|
||||
const orig_cond = orig.data.pl_op.operand;
|
||||
const orig_bin = l.extraData(Air.Bin, orig.data.pl_op.payload).data;
|
||||
|
||||
const elem_block_inst = loop.block.add(l, .{
|
||||
.tag = .block,
|
||||
.data = .{ .ty_pl = .{
|
||||
.ty = .fromType(res_elem_ty),
|
||||
.payload = undefined,
|
||||
} },
|
||||
});
|
||||
var elem_block: Block = .init(loop.block.stealCapacity(2));
|
||||
const cond = elem_block.addBinOp(l, .legalize_vec_elem_val, orig_cond, index_val).toRef();
|
||||
|
||||
var condbr: CondBr = .init(l, cond, &elem_block, .{});
|
||||
|
||||
condbr.then_block = .init(loop.block.stealCapacity(2));
|
||||
const lhs = condbr.then_block.addBinOp(l, .legalize_vec_elem_val, orig_bin.lhs, index_val).toRef();
|
||||
condbr.then_block.addBr(l, elem_block_inst, lhs);
|
||||
|
||||
condbr.else_block = .init(loop.block.stealCapacity(2));
|
||||
const rhs = condbr.else_block.addBinOp(l, .legalize_vec_elem_val, orig_bin.rhs, index_val).toRef();
|
||||
condbr.else_block.addBr(l, elem_block_inst, rhs);
|
||||
|
||||
try condbr.finish(l);
|
||||
|
||||
const inst_data = l.air_instructions.items(.data);
|
||||
inst_data[@intFromEnum(elem_block_inst)].ty_pl.payload = try l.addBlockBody(elem_block.body());
|
||||
|
||||
break :elem elem_block_inst.toRef();
|
||||
},
|
||||
};
|
||||
_ = loop.block.stealCapacity(max_inst_per_form - inst_per_form);
|
||||
if (result_is_array) {
|
||||
const elem_ptr = loop.block.add(l, .{
|
||||
.tag = .ptr_elem_ptr,
|
||||
.data = .{ .ty_pl = .{
|
||||
.ty = .fromType(try pt.singleMutPtrType(res_elem_ty)),
|
||||
.payload = try l.addExtra(Air.Bin, .{
|
||||
.lhs = result_ptr,
|
||||
.rhs = index_val,
|
||||
}),
|
||||
} },
|
||||
}).toRef();
|
||||
_ = loop.block.addBinOp(l, .store, elem_ptr, elem_val);
|
||||
} else {
|
||||
_ = loop.block.add(l, .{
|
||||
.tag = .legalize_vec_store_elem,
|
||||
.data = .{ .pl_op = .{
|
||||
.operand = result_ptr,
|
||||
.payload = try l.addExtra(Air.Bin, .{
|
||||
.lhs = index_val,
|
||||
.rhs = elem_val,
|
||||
}),
|
||||
} },
|
||||
});
|
||||
|
||||
var loop: Loop = .init(l, &res_block);
|
||||
loop.block = .init(res_block.stealRemainingCapacity());
|
||||
{
|
||||
const cur_index_inst = loop.block.add(l, .{
|
||||
.tag = .load,
|
||||
.data = .{ .ty_op = .{
|
||||
.ty = .usize_type,
|
||||
.operand = index_alloc_inst.toRef(),
|
||||
} },
|
||||
});
|
||||
_ = loop.block.add(l, .{
|
||||
.tag = .store,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = loop.block.add(l, .{
|
||||
.tag = .ptr_elem_ptr,
|
||||
.data = .{ .ty_pl = .{
|
||||
.ty = Air.internedToRef((try pt.singleMutPtrType(res_elem_ty)).toIntern()),
|
||||
.payload = try l.addExtra(Air.Bin, .{
|
||||
.lhs = res_alloc_inst.toRef(),
|
||||
.rhs = cur_index_inst.toRef(),
|
||||
}),
|
||||
} },
|
||||
}).toRef(),
|
||||
.rhs = loop.block.addBitCast(l, res_elem_ty, loop.block.add(l, .{
|
||||
.tag = .trunc,
|
||||
.data = .{ .ty_op = .{
|
||||
.ty = Air.internedToRef(elem_int_ty.toIntern()),
|
||||
.operand = loop.block.add(l, .{
|
||||
.tag = .shr,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = int_ref,
|
||||
.rhs = loop.block.add(l, .{
|
||||
.tag = .mul,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = loop.block.add(l, .{
|
||||
.tag = .intcast,
|
||||
.data = .{ .ty_op = .{
|
||||
.ty = Air.internedToRef(shift_ty.toIntern()),
|
||||
.operand = cur_index_inst.toRef(),
|
||||
} },
|
||||
}).toRef(),
|
||||
.rhs = try pt.intRef(shift_ty, elem_bits),
|
||||
} },
|
||||
}).toRef(),
|
||||
} },
|
||||
}).toRef(),
|
||||
} },
|
||||
}).toRef()),
|
||||
} },
|
||||
});
|
||||
|
||||
var loop_cond_br: CondBr = .init(l, (try loop.block.addCmp(
|
||||
l,
|
||||
.lt,
|
||||
cur_index_inst.toRef(),
|
||||
try pt.intRef(.usize, res_ty.arrayLen(zcu) - 1),
|
||||
.{},
|
||||
)).toRef(), &loop.block, .{});
|
||||
loop_cond_br.then_block = .init(loop.block.stealRemainingCapacity());
|
||||
{
|
||||
_ = loop_cond_br.then_block.add(l, .{
|
||||
.tag = .store,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = index_alloc_inst.toRef(),
|
||||
.rhs = loop_cond_br.then_block.add(l, .{
|
||||
.tag = .add,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = cur_index_inst.toRef(),
|
||||
.rhs = .one_usize,
|
||||
} },
|
||||
}).toRef(),
|
||||
} },
|
||||
});
|
||||
_ = loop_cond_br.then_block.add(l, .{
|
||||
.tag = .repeat,
|
||||
.data = .{ .repeat = .{ .loop_inst = loop.inst } },
|
||||
});
|
||||
}
|
||||
loop_cond_br.else_block = .init(loop_cond_br.then_block.stealRemainingCapacity());
|
||||
_ = loop_cond_br.else_block.add(l, .{
|
||||
.tag = .br,
|
||||
.data = .{ .br = .{
|
||||
.block_inst = orig_inst,
|
||||
.operand = loop_cond_br.else_block.add(l, .{
|
||||
.tag = .load,
|
||||
.data = .{ .ty_op = .{
|
||||
.ty = Air.internedToRef(res_ty.toIntern()),
|
||||
.operand = res_alloc_inst.toRef(),
|
||||
} },
|
||||
}).toRef(),
|
||||
} },
|
||||
});
|
||||
try loop_cond_br.finish(l);
|
||||
}
|
||||
try loop.finish(l);
|
||||
_ = loop.block.stealCapacity(1);
|
||||
}
|
||||
const is_end_val = loop.block.addBinOp(l, .cmp_eq, index_val, .fromValue(try pt.intValue(.usize, res_len - 1))).toRef();
|
||||
|
||||
var condbr: CondBr = .init(l, is_end_val, &loop.block, .{});
|
||||
condbr.then_block = .init(loop.block.stealRemainingCapacity());
|
||||
const result_val = condbr.then_block.addTyOp(l, .load, res_ty, result_ptr).toRef();
|
||||
condbr.then_block.addBr(l, orig_inst, result_val);
|
||||
|
||||
condbr.else_block = .init(condbr.then_block.stealRemainingCapacity());
|
||||
const new_index_val = condbr.else_block.addBinOp(l, .add, index_val, .one_usize).toRef();
|
||||
_ = condbr.else_block.addBinOp(l, .store, index_ptr, new_index_val);
|
||||
_ = condbr.else_block.add(l, .{
|
||||
.tag = .repeat,
|
||||
.data = .{ .repeat = .{ .loop_inst = loop.inst } },
|
||||
});
|
||||
|
||||
try condbr.finish(l);
|
||||
|
||||
try loop.finish(l);
|
||||
|
||||
return .{ .ty_pl = .{
|
||||
.ty = Air.internedToRef(res_ty.toIntern()),
|
||||
.payload = try l.addBlockBody(res_block.body()),
|
||||
.ty = .fromType(res_ty),
|
||||
.payload = try l.addBlockBody(main_block.body()),
|
||||
} };
|
||||
}
|
||||
fn scalarizeBitcastResultVectorBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!Air.Inst.Data {
|
||||
fn scalarizeShuffleOneBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!Air.Inst.Data {
|
||||
const pt = l.pt;
|
||||
const zcu = pt.zcu;
|
||||
const gpa = zcu.gpa;
|
||||
|
||||
const shuffle = l.getTmpAir().unwrapShuffleOne(zcu, orig_inst);
|
||||
|
||||
// We're going to emit something like this:
|
||||
//
|
||||
// var x: @Vector(N, T) = all_comptime_known_elems;
|
||||
// for (out_idxs, in_idxs) |i, j| x[i] = operand[j];
|
||||
//
|
||||
// So we must first compute `out_idxs` and `in_idxs`.
|
||||
|
||||
var sfba_state = std.heap.stackFallback(512, gpa);
|
||||
const sfba = sfba_state.get();
|
||||
|
||||
const out_idxs_buf = try sfba.alloc(InternPool.Index, shuffle.mask.len);
|
||||
defer sfba.free(out_idxs_buf);
|
||||
|
||||
const in_idxs_buf = try sfba.alloc(InternPool.Index, shuffle.mask.len);
|
||||
defer sfba.free(in_idxs_buf);
|
||||
|
||||
var n: usize = 0;
|
||||
for (shuffle.mask, 0..) |mask, out_idx| switch (mask.unwrap()) {
|
||||
.value => {},
|
||||
.elem => |in_idx| {
|
||||
out_idxs_buf[n] = (try pt.intValue(.usize, out_idx)).toIntern();
|
||||
in_idxs_buf[n] = (try pt.intValue(.usize, in_idx)).toIntern();
|
||||
n += 1;
|
||||
},
|
||||
};
|
||||
|
||||
const init_val: Value = init: {
|
||||
const undef_val = try pt.undefValue(shuffle.result_ty.childType(zcu));
|
||||
const elems = try sfba.alloc(InternPool.Index, shuffle.mask.len);
|
||||
defer sfba.free(elems);
|
||||
for (shuffle.mask, elems) |mask, *elem| elem.* = switch (mask.unwrap()) {
|
||||
.value => |ip_index| ip_index,
|
||||
.elem => undef_val.toIntern(),
|
||||
};
|
||||
break :init try pt.aggregateValue(shuffle.result_ty, elems);
|
||||
};
|
||||
|
||||
// %1 = block(@Vector(N, T), {
|
||||
// %2 = alloc(*@Vector(N, T))
|
||||
// %3 = alloc(*usize)
|
||||
// %4 = store(%2, <init_val>)
|
||||
// %5 = [addScalarizedShuffle]
|
||||
// %6 = load(%2)
|
||||
// %7 = br(%1, %6)
|
||||
// })
|
||||
|
||||
var inst_buf: [6]Air.Inst.Index = undefined;
|
||||
var main_block: Block = .init(&inst_buf);
|
||||
try l.air_instructions.ensureUnusedCapacity(gpa, 19);
|
||||
|
||||
const result_ptr = main_block.addTy(l, .alloc, try pt.singleMutPtrType(shuffle.result_ty)).toRef();
|
||||
const index_ptr = main_block.addTy(l, .alloc, .ptr_usize).toRef();
|
||||
|
||||
_ = main_block.addBinOp(l, .store, result_ptr, .fromValue(init_val));
|
||||
|
||||
try l.addScalarizedShuffle(
|
||||
&main_block,
|
||||
shuffle.operand,
|
||||
result_ptr,
|
||||
index_ptr,
|
||||
out_idxs_buf[0..n],
|
||||
in_idxs_buf[0..n],
|
||||
);
|
||||
|
||||
const result_val = main_block.addTyOp(l, .load, shuffle.result_ty, result_ptr).toRef();
|
||||
main_block.addBr(l, orig_inst, result_val);
|
||||
|
||||
return .{ .ty_pl = .{
|
||||
.ty = .fromType(shuffle.result_ty),
|
||||
.payload = try l.addBlockBody(main_block.body()),
|
||||
} };
|
||||
}
|
||||
fn scalarizeShuffleTwoBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!Air.Inst.Data {
|
||||
const pt = l.pt;
|
||||
const zcu = pt.zcu;
|
||||
const gpa = zcu.gpa;
|
||||
|
||||
const shuffle = l.getTmpAir().unwrapShuffleTwo(zcu, orig_inst);
|
||||
|
||||
// We're going to emit something like this:
|
||||
//
|
||||
// var x: @Vector(N, T) = undefined;
|
||||
// for (out_idxs_a, in_idxs_a) |i, j| x[i] = operand_a[j];
|
||||
// for (out_idxs_b, in_idxs_b) |i, j| x[i] = operand_b[j];
|
||||
//
|
||||
// The AIR will look like this:
|
||||
//
|
||||
// %1 = block(@Vector(N, T), {
|
||||
// %2 = alloc(*@Vector(N, T))
|
||||
// %3 = alloc(*usize)
|
||||
// %4 = store(%2, <@Vector(N, T), undefined>)
|
||||
// %5 = [addScalarizedShuffle]
|
||||
// %6 = [addScalarizedShuffle]
|
||||
// %7 = load(%2)
|
||||
// %8 = br(%1, %7)
|
||||
// })
|
||||
|
||||
var sfba_state = std.heap.stackFallback(512, gpa);
|
||||
const sfba = sfba_state.get();
|
||||
|
||||
const out_idxs_buf = try sfba.alloc(InternPool.Index, shuffle.mask.len);
|
||||
defer sfba.free(out_idxs_buf);
|
||||
|
||||
const in_idxs_buf = try sfba.alloc(InternPool.Index, shuffle.mask.len);
|
||||
defer sfba.free(in_idxs_buf);
|
||||
|
||||
// Iterate `shuffle.mask` before doing anything, because modifying AIR invalidates it.
|
||||
const out_idxs_a, const in_idxs_a, const out_idxs_b, const in_idxs_b = idxs: {
|
||||
var n: usize = 0;
|
||||
for (shuffle.mask, 0..) |mask, out_idx| switch (mask.unwrap()) {
|
||||
.undef, .b_elem => {},
|
||||
.a_elem => |in_idx| {
|
||||
out_idxs_buf[n] = (try pt.intValue(.usize, out_idx)).toIntern();
|
||||
in_idxs_buf[n] = (try pt.intValue(.usize, in_idx)).toIntern();
|
||||
n += 1;
|
||||
},
|
||||
};
|
||||
const a_len = n;
|
||||
for (shuffle.mask, 0..) |mask, out_idx| switch (mask.unwrap()) {
|
||||
.undef, .a_elem => {},
|
||||
.b_elem => |in_idx| {
|
||||
out_idxs_buf[n] = (try pt.intValue(.usize, out_idx)).toIntern();
|
||||
in_idxs_buf[n] = (try pt.intValue(.usize, in_idx)).toIntern();
|
||||
n += 1;
|
||||
},
|
||||
};
|
||||
break :idxs .{
|
||||
out_idxs_buf[0..a_len],
|
||||
in_idxs_buf[0..a_len],
|
||||
out_idxs_buf[a_len..n],
|
||||
in_idxs_buf[a_len..n],
|
||||
};
|
||||
};
|
||||
|
||||
var inst_buf: [7]Air.Inst.Index = undefined;
|
||||
var main_block: Block = .init(&inst_buf);
|
||||
try l.air_instructions.ensureUnusedCapacity(gpa, 33);
|
||||
|
||||
const result_ptr = main_block.addTy(l, .alloc, try pt.singleMutPtrType(shuffle.result_ty)).toRef();
|
||||
const index_ptr = main_block.addTy(l, .alloc, .ptr_usize).toRef();
|
||||
|
||||
_ = main_block.addBinOp(l, .store, result_ptr, .fromValue(try pt.undefValue(shuffle.result_ty)));
|
||||
|
||||
if (out_idxs_a.len == 0) {
|
||||
_ = main_block.stealCapacity(1);
|
||||
} else {
|
||||
try l.addScalarizedShuffle(
|
||||
&main_block,
|
||||
shuffle.operand_a,
|
||||
result_ptr,
|
||||
index_ptr,
|
||||
out_idxs_a,
|
||||
in_idxs_a,
|
||||
);
|
||||
}
|
||||
|
||||
if (out_idxs_b.len == 0) {
|
||||
_ = main_block.stealCapacity(1);
|
||||
} else {
|
||||
try l.addScalarizedShuffle(
|
||||
&main_block,
|
||||
shuffle.operand_b,
|
||||
result_ptr,
|
||||
index_ptr,
|
||||
out_idxs_b,
|
||||
in_idxs_b,
|
||||
);
|
||||
}
|
||||
|
||||
const result_val = main_block.addTyOp(l, .load, shuffle.result_ty, result_ptr).toRef();
|
||||
main_block.addBr(l, orig_inst, result_val);
|
||||
|
||||
return .{ .ty_pl = .{
|
||||
.ty = .fromType(shuffle.result_ty),
|
||||
.payload = try l.addBlockBody(main_block.body()),
|
||||
} };
|
||||
}
|
||||
/// Adds code to `parent_block` which behaves like this loop:
|
||||
///
|
||||
/// for (out_idxs, in_idxs) |i, j| result_vec_ptr[i] = operand_vec[j];
|
||||
///
|
||||
/// The actual AIR adds exactly one instruction to `parent_block` itself and 14 instructions
|
||||
/// overall, and is as follows:
|
||||
///
|
||||
/// %1 = block(void, {
|
||||
/// %2 = store(index_ptr, @zero_usize)
|
||||
/// %3 = loop({
|
||||
/// %4 = load(index_ptr)
|
||||
/// %5 = ptr_elem_val(out_idxs_ptr, %4)
|
||||
/// %6 = ptr_elem_val(in_idxs_ptr, %4)
|
||||
/// %7 = legalize_vec_elem_val(operand_vec, %6)
|
||||
/// %8 = legalize_vec_store_elem(result_vec_ptr, %4, %7)
|
||||
/// %9 = cmp_eq(%4, <usize, out_idxs.len-1>)
|
||||
/// %10 = cond_br(%9, {
|
||||
/// %11 = br(%1, @void_value)
|
||||
/// }, {
|
||||
/// %12 = add(%4, @one_usize)
|
||||
/// %13 = store(index_ptr, %12)
|
||||
/// %14 = repeat(%3)
|
||||
/// })
|
||||
/// })
|
||||
/// })
|
||||
///
|
||||
/// The caller is responsible for reserving space in `l.air_instructions`.
|
||||
fn addScalarizedShuffle(
|
||||
l: *Legalize,
|
||||
parent_block: *Block,
|
||||
operand_vec: Air.Inst.Ref,
|
||||
result_vec_ptr: Air.Inst.Ref,
|
||||
index_ptr: Air.Inst.Ref,
|
||||
out_idxs: []const InternPool.Index,
|
||||
in_idxs: []const InternPool.Index,
|
||||
) Error!void {
|
||||
const pt = l.pt;
|
||||
|
||||
assert(out_idxs.len == in_idxs.len);
|
||||
const n = out_idxs.len;
|
||||
|
||||
const idxs_ty = try pt.arrayType(.{ .len = n, .child = .usize_type });
|
||||
const idxs_ptr_ty = try pt.singleConstPtrType(idxs_ty);
|
||||
const manyptr_usize_ty = try pt.manyConstPtrType(.usize);
|
||||
|
||||
const out_idxs_ptr = try pt.intern(.{ .ptr = .{
|
||||
.ty = manyptr_usize_ty.toIntern(),
|
||||
.base_addr = .{ .uav = .{
|
||||
.val = (try pt.aggregateValue(idxs_ty, out_idxs)).toIntern(),
|
||||
.orig_ty = idxs_ptr_ty.toIntern(),
|
||||
} },
|
||||
.byte_offset = 0,
|
||||
} });
|
||||
const in_idxs_ptr = try pt.intern(.{ .ptr = .{
|
||||
.ty = manyptr_usize_ty.toIntern(),
|
||||
.base_addr = .{ .uav = .{
|
||||
.val = (try pt.aggregateValue(idxs_ty, in_idxs)).toIntern(),
|
||||
.orig_ty = idxs_ptr_ty.toIntern(),
|
||||
} },
|
||||
.byte_offset = 0,
|
||||
} });
|
||||
|
||||
const main_block_inst = parent_block.add(l, .{
|
||||
.tag = .block,
|
||||
.data = .{ .ty_pl = .{
|
||||
.ty = .void_type,
|
||||
.payload = undefined,
|
||||
} },
|
||||
});
|
||||
|
||||
var inst_buf: [13]Air.Inst.Index = undefined;
|
||||
var main_block: Block = .init(&inst_buf);
|
||||
|
||||
_ = main_block.addBinOp(l, .store, index_ptr, .zero_usize);
|
||||
|
||||
var loop: Loop = .init(l, &main_block);
|
||||
loop.block = .init(main_block.stealRemainingCapacity());
|
||||
|
||||
const index_val = loop.block.addTyOp(l, .load, .usize, index_ptr).toRef();
|
||||
const in_idx_val = loop.block.addBinOp(l, .ptr_elem_val, .fromIntern(in_idxs_ptr), index_val).toRef();
|
||||
const out_idx_val = loop.block.addBinOp(l, .ptr_elem_val, .fromIntern(out_idxs_ptr), index_val).toRef();
|
||||
|
||||
const elem_val = loop.block.addBinOp(l, .legalize_vec_elem_val, operand_vec, in_idx_val).toRef();
|
||||
_ = loop.block.add(l, .{
|
||||
.tag = .legalize_vec_store_elem,
|
||||
.data = .{ .pl_op = .{
|
||||
.operand = result_vec_ptr,
|
||||
.payload = try l.addExtra(Air.Bin, .{
|
||||
.lhs = out_idx_val,
|
||||
.rhs = elem_val,
|
||||
}),
|
||||
} },
|
||||
});
|
||||
|
||||
const is_end_val = loop.block.addBinOp(l, .cmp_eq, index_val, .fromValue(try pt.intValue(.usize, n - 1))).toRef();
|
||||
var condbr: CondBr = .init(l, is_end_val, &loop.block, .{});
|
||||
condbr.then_block = .init(loop.block.stealRemainingCapacity());
|
||||
condbr.then_block.addBr(l, main_block_inst, .void_value);
|
||||
|
||||
condbr.else_block = .init(condbr.then_block.stealRemainingCapacity());
|
||||
const new_index_val = condbr.else_block.addBinOp(l, .add, index_val, .one_usize).toRef();
|
||||
_ = condbr.else_block.addBinOp(l, .store, index_ptr, new_index_val);
|
||||
_ = condbr.else_block.add(l, .{
|
||||
.tag = .repeat,
|
||||
.data = .{ .repeat = .{ .loop_inst = loop.inst } },
|
||||
});
|
||||
|
||||
try condbr.finish(l);
|
||||
try loop.finish(l);
|
||||
|
||||
const inst_data = l.air_instructions.items(.data);
|
||||
inst_data[@intFromEnum(main_block_inst)].ty_pl.payload = try l.addBlockBody(main_block.body());
|
||||
}
|
||||
fn scalarizeBitcastBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!?Air.Inst.Data {
|
||||
const pt = l.pt;
|
||||
const zcu = pt.zcu;
|
||||
|
||||
const orig_ty_op = l.air_instructions.items(.data)[@intFromEnum(orig_inst)].ty_op;
|
||||
const res_ty = orig_ty_op.ty.toType();
|
||||
const int_bits: u16 = @intCast(res_ty.bitSize(zcu));
|
||||
const int_ty = try pt.intType(.unsigned, int_bits);
|
||||
const shift_ty = try pt.intType(.unsigned, std.math.log2_int_ceil(u16, int_bits));
|
||||
const res_elem_ty = res_ty.childType(zcu);
|
||||
const elem_bits: u16 = @intCast(res_elem_ty.bitSize(zcu));
|
||||
const elem_int_ty = try pt.intType(.unsigned, elem_bits);
|
||||
const ty_op = l.air_instructions.items(.data)[@intFromEnum(orig_inst)].ty_op;
|
||||
|
||||
var inst_buf: [19]Air.Inst.Index = undefined;
|
||||
const dest_ty = ty_op.ty.toType();
|
||||
const dest_legal = switch (dest_ty.zigTypeTag(zcu)) {
|
||||
else => true,
|
||||
.array, .vector => legal: {
|
||||
if (dest_ty.arrayLen(zcu) == 1) break :legal true;
|
||||
const dest_elem_ty = dest_ty.childType(zcu);
|
||||
break :legal dest_elem_ty.bitSize(zcu) == 8 * dest_elem_ty.abiSize(zcu);
|
||||
},
|
||||
};
|
||||
|
||||
const operand_ty = l.typeOf(ty_op.operand);
|
||||
const operand_legal = switch (operand_ty.zigTypeTag(zcu)) {
|
||||
else => true,
|
||||
.array, .vector => legal: {
|
||||
if (operand_ty.arrayLen(zcu) == 1) break :legal true;
|
||||
const operand_elem_ty = operand_ty.childType(zcu);
|
||||
break :legal operand_elem_ty.bitSize(zcu) == 8 * operand_elem_ty.abiSize(zcu);
|
||||
},
|
||||
};
|
||||
|
||||
if (dest_legal and operand_legal) return null;
|
||||
|
||||
if (!operand_legal and !dest_legal and operand_ty.arrayLen(zcu) == dest_ty.arrayLen(zcu)) {
|
||||
// from_ty and to_ty are both arrays or vectors of types with the same bit size,
|
||||
// so we can do an elementwise bitcast.
|
||||
return try l.scalarizeBlockPayload(orig_inst, .ty_op);
|
||||
}
|
||||
|
||||
// Fallback path. Our strategy is to use an unsigned integer type as an intermediate
|
||||
// "bag of bits" representation which can be manipulated by bitwise operations.
|
||||
|
||||
const num_bits: u16 = @intCast(dest_ty.bitSize(zcu));
|
||||
assert(operand_ty.bitSize(zcu) == num_bits);
|
||||
const uint_ty = try pt.intType(.unsigned, num_bits);
|
||||
const shift_ty = try pt.intType(.unsigned, std.math.log2_int_ceil(u16, num_bits));
|
||||
|
||||
var inst_buf: [39]Air.Inst.Index = undefined;
|
||||
var main_block: Block = .init(&inst_buf);
|
||||
try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len);
|
||||
|
||||
var res_block: Block = .init(&inst_buf);
|
||||
{
|
||||
const res_alloc_inst = res_block.add(l, .{
|
||||
.tag = .alloc,
|
||||
.data = .{ .ty = try pt.singleMutPtrType(res_ty) },
|
||||
});
|
||||
const int_ref = res_block.addBitCast(l, int_ty, orig_ty_op.operand);
|
||||
const index_alloc_inst = res_block.add(l, .{
|
||||
.tag = .alloc,
|
||||
.data = .{ .ty = .ptr_usize },
|
||||
});
|
||||
_ = res_block.add(l, .{
|
||||
.tag = .store,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = index_alloc_inst.toRef(),
|
||||
.rhs = .zero_usize,
|
||||
// First, convert `operand_ty` to `uint_ty` (`uN`).
|
||||
|
||||
const uint_val: Air.Inst.Ref = uint_val: {
|
||||
if (operand_legal) {
|
||||
_ = main_block.stealCapacity(19);
|
||||
break :uint_val main_block.addBitCast(l, uint_ty, ty_op.operand);
|
||||
}
|
||||
|
||||
// %1 = block({
|
||||
// %2 = alloc(*usize)
|
||||
// %3 = alloc(*uN)
|
||||
// %4 = store(%2, <usize, operand_len>)
|
||||
// %5 = store(%3, <uN, 0>)
|
||||
// %6 = loop({
|
||||
// %7 = load(%2)
|
||||
// %8 = array_elem_val(orig_operand, %7)
|
||||
// %9 = bitcast(uE, %8)
|
||||
// %10 = intcast(uN, %9)
|
||||
// %11 = load(%3)
|
||||
// %12 = shl_exact(%11, <uS, E>)
|
||||
// %13 = bit_or(%12, %10)
|
||||
// %14 = cmp_eq(%4, @zero_usize)
|
||||
// %15 = cond_br(%14, {
|
||||
// %16 = br(%1, %13)
|
||||
// }, {
|
||||
// %17 = store(%3, %13)
|
||||
// %18 = sub(%7, @one_usize)
|
||||
// %19 = store(%2, %18)
|
||||
// %20 = repeat(%6)
|
||||
// })
|
||||
// })
|
||||
// })
|
||||
|
||||
const elem_bits = operand_ty.childType(zcu).bitSize(zcu);
|
||||
const elem_bits_val = try pt.intValue(shift_ty, elem_bits);
|
||||
const elem_uint_ty = try pt.intType(.unsigned, @intCast(elem_bits));
|
||||
|
||||
const uint_block_inst = main_block.add(l, .{
|
||||
.tag = .block,
|
||||
.data = .{ .ty_pl = .{
|
||||
.ty = .fromType(uint_ty),
|
||||
.payload = undefined,
|
||||
} },
|
||||
});
|
||||
var uint_block: Block = .init(main_block.stealCapacity(19));
|
||||
|
||||
var loop: Loop = .init(l, &res_block);
|
||||
loop.block = .init(res_block.stealRemainingCapacity());
|
||||
{
|
||||
const cur_index_inst = loop.block.add(l, .{
|
||||
.tag = .load,
|
||||
.data = .{ .ty_op = .{
|
||||
.ty = .usize_type,
|
||||
.operand = index_alloc_inst.toRef(),
|
||||
} },
|
||||
});
|
||||
_ = loop.block.add(l, .{
|
||||
.tag = .vector_store_elem,
|
||||
.data = .{ .vector_store_elem = .{
|
||||
.vector_ptr = res_alloc_inst.toRef(),
|
||||
.payload = try l.addExtra(Air.Bin, .{
|
||||
.lhs = cur_index_inst.toRef(),
|
||||
.rhs = loop.block.addBitCast(l, res_elem_ty, loop.block.add(l, .{
|
||||
.tag = .trunc,
|
||||
.data = .{ .ty_op = .{
|
||||
.ty = Air.internedToRef(elem_int_ty.toIntern()),
|
||||
.operand = loop.block.add(l, .{
|
||||
.tag = .shr,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = int_ref,
|
||||
.rhs = loop.block.add(l, .{
|
||||
.tag = .mul,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = loop.block.add(l, .{
|
||||
.tag = .intcast,
|
||||
.data = .{ .ty_op = .{
|
||||
.ty = Air.internedToRef(shift_ty.toIntern()),
|
||||
.operand = cur_index_inst.toRef(),
|
||||
} },
|
||||
}).toRef(),
|
||||
.rhs = try pt.intRef(shift_ty, elem_bits),
|
||||
} },
|
||||
}).toRef(),
|
||||
} },
|
||||
}).toRef(),
|
||||
} },
|
||||
}).toRef()),
|
||||
}),
|
||||
} },
|
||||
});
|
||||
const index_ptr = uint_block.addTy(l, .alloc, .ptr_usize).toRef();
|
||||
const result_ptr = uint_block.addTy(l, .alloc, try pt.singleMutPtrType(uint_ty)).toRef();
|
||||
_ = uint_block.addBinOp(
|
||||
l,
|
||||
.store,
|
||||
index_ptr,
|
||||
.fromValue(try pt.intValue(.usize, operand_ty.arrayLen(zcu))),
|
||||
);
|
||||
_ = uint_block.addBinOp(l, .store, result_ptr, .fromValue(try pt.intValue(uint_ty, 0)));
|
||||
|
||||
var loop_cond_br: CondBr = .init(l, (try loop.block.addCmp(
|
||||
l,
|
||||
.lt,
|
||||
cur_index_inst.toRef(),
|
||||
try pt.intRef(.usize, res_ty.vectorLen(zcu) - 1),
|
||||
.{},
|
||||
)).toRef(), &loop.block, .{});
|
||||
loop_cond_br.then_block = .init(loop.block.stealRemainingCapacity());
|
||||
{
|
||||
_ = loop_cond_br.then_block.add(l, .{
|
||||
.tag = .store,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = index_alloc_inst.toRef(),
|
||||
.rhs = loop_cond_br.then_block.add(l, .{
|
||||
.tag = .add,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = cur_index_inst.toRef(),
|
||||
.rhs = .one_usize,
|
||||
} },
|
||||
}).toRef(),
|
||||
var loop: Loop = .init(l, &uint_block);
|
||||
loop.block = .init(uint_block.stealRemainingCapacity());
|
||||
|
||||
const index_val = loop.block.addTyOp(l, .load, .usize, index_ptr).toRef();
|
||||
const raw_elem = loop.block.addBinOp(
|
||||
l,
|
||||
if (operand_ty.zigTypeTag(zcu) == .vector) .legalize_vec_elem_val else .array_elem_val,
|
||||
ty_op.operand,
|
||||
index_val,
|
||||
).toRef();
|
||||
const elem_uint = loop.block.addBitCast(l, elem_uint_ty, raw_elem);
|
||||
const elem_extended = loop.block.addTyOp(l, .intcast, uint_ty, elem_uint).toRef();
|
||||
const old_result = loop.block.addTyOp(l, .load, uint_ty, result_ptr).toRef();
|
||||
const shifted_result = loop.block.addBinOp(l, .shl_exact, old_result, .fromValue(elem_bits_val)).toRef();
|
||||
const new_result = loop.block.addBinOp(l, .bit_or, shifted_result, elem_extended).toRef();
|
||||
|
||||
const is_end_val = loop.block.addBinOp(l, .cmp_eq, index_val, .zero_usize).toRef();
|
||||
var condbr: CondBr = .init(l, is_end_val, &loop.block, .{});
|
||||
|
||||
condbr.then_block = .init(loop.block.stealRemainingCapacity());
|
||||
condbr.then_block.addBr(l, uint_block_inst, new_result);
|
||||
|
||||
condbr.else_block = .init(condbr.then_block.stealRemainingCapacity());
|
||||
_ = condbr.else_block.addBinOp(l, .store, result_ptr, new_result);
|
||||
const new_index_val = condbr.else_block.addBinOp(l, .sub, index_val, .one_usize).toRef();
|
||||
_ = condbr.else_block.addBinOp(l, .store, index_ptr, new_index_val);
|
||||
_ = condbr.else_block.add(l, .{
|
||||
.tag = .repeat,
|
||||
.data = .{ .repeat = .{ .loop_inst = loop.inst } },
|
||||
});
|
||||
|
||||
try condbr.finish(l);
|
||||
try loop.finish(l);
|
||||
|
||||
const inst_data = l.air_instructions.items(.data);
|
||||
inst_data[@intFromEnum(uint_block_inst)].ty_pl.payload = try l.addBlockBody(uint_block.body());
|
||||
|
||||
break :uint_val uint_block_inst.toRef();
|
||||
};
|
||||
|
||||
// Now convert `uint_ty` (`uN`) to `dest_ty`.
|
||||
|
||||
if (dest_legal) {
|
||||
_ = main_block.stealCapacity(17);
|
||||
const result = main_block.addBitCast(l, dest_ty, uint_val);
|
||||
main_block.addBr(l, orig_inst, result);
|
||||
} else {
|
||||
// %1 = alloc(*usize)
|
||||
// %2 = alloc(*@Vector(N, Result))
|
||||
// %3 = store(%1, @zero_usize)
|
||||
// %4 = loop({
|
||||
// %5 = load(%1)
|
||||
// %6 = mul(%5, <usize, E>)
|
||||
// %7 = intcast(uS, %6)
|
||||
// %8 = shr(uint_val, %7)
|
||||
// %9 = trunc(uE, %8)
|
||||
// %10 = bitcast(Result, %9)
|
||||
// %11 = legalize_vec_store_elem(%2, %5, %10)
|
||||
// %12 = cmp_eq(%5, <usize, vec_len>)
|
||||
// %13 = cond_br(%12, {
|
||||
// %14 = load(%2)
|
||||
// %15 = br(%0, %14)
|
||||
// }, {
|
||||
// %16 = add(%5, @one_usize)
|
||||
// %17 = store(%1, %16)
|
||||
// %18 = repeat(%4)
|
||||
// })
|
||||
// })
|
||||
//
|
||||
// The result might be an array, in which case `legalize_vec_store_elem`
|
||||
// becomes `ptr_elem_ptr` followed by `store`.
|
||||
|
||||
const elem_ty = dest_ty.childType(zcu);
|
||||
const elem_bits = elem_ty.bitSize(zcu);
|
||||
const elem_uint_ty = try pt.intType(.unsigned, @intCast(elem_bits));
|
||||
|
||||
const index_ptr = main_block.addTy(l, .alloc, .ptr_usize).toRef();
|
||||
const result_ptr = main_block.addTy(l, .alloc, try pt.singleMutPtrType(dest_ty)).toRef();
|
||||
_ = main_block.addBinOp(l, .store, index_ptr, .zero_usize);
|
||||
|
||||
var loop: Loop = .init(l, &main_block);
|
||||
loop.block = .init(main_block.stealRemainingCapacity());
|
||||
|
||||
const index_val = loop.block.addTyOp(l, .load, .usize, index_ptr).toRef();
|
||||
const bit_offset = loop.block.addBinOp(l, .mul, index_val, .fromValue(try pt.intValue(.usize, elem_bits))).toRef();
|
||||
const casted_bit_offset = loop.block.addTyOp(l, .intcast, shift_ty, bit_offset).toRef();
|
||||
const shifted_uint = loop.block.addBinOp(l, .shr, index_val, casted_bit_offset).toRef();
|
||||
const elem_uint = loop.block.addTyOp(l, .trunc, elem_uint_ty, shifted_uint).toRef();
|
||||
const elem_val = loop.block.addBitCast(l, elem_ty, elem_uint);
|
||||
switch (dest_ty.zigTypeTag(zcu)) {
|
||||
.array => {
|
||||
const elem_ptr = loop.block.add(l, .{
|
||||
.tag = .ptr_elem_ptr,
|
||||
.data = .{ .ty_pl = .{
|
||||
.ty = .fromType(try pt.singleMutPtrType(elem_ty)),
|
||||
.payload = try l.addExtra(Air.Bin, .{
|
||||
.lhs = result_ptr,
|
||||
.rhs = index_val,
|
||||
}),
|
||||
} },
|
||||
}).toRef();
|
||||
_ = loop.block.addBinOp(l, .store, elem_ptr, elem_val);
|
||||
},
|
||||
.vector => {
|
||||
_ = loop.block.add(l, .{
|
||||
.tag = .legalize_vec_store_elem,
|
||||
.data = .{ .pl_op = .{
|
||||
.operand = result_ptr,
|
||||
.payload = try l.addExtra(Air.Bin, .{
|
||||
.lhs = index_val,
|
||||
.rhs = elem_val,
|
||||
}),
|
||||
} },
|
||||
});
|
||||
_ = loop_cond_br.then_block.add(l, .{
|
||||
.tag = .repeat,
|
||||
.data = .{ .repeat = .{ .loop_inst = loop.inst } },
|
||||
});
|
||||
}
|
||||
loop_cond_br.else_block = .init(loop_cond_br.then_block.stealRemainingCapacity());
|
||||
_ = loop_cond_br.else_block.add(l, .{
|
||||
.tag = .br,
|
||||
.data = .{ .br = .{
|
||||
.block_inst = orig_inst,
|
||||
.operand = loop_cond_br.else_block.add(l, .{
|
||||
.tag = .load,
|
||||
.data = .{ .ty_op = .{
|
||||
.ty = Air.internedToRef(res_ty.toIntern()),
|
||||
.operand = res_alloc_inst.toRef(),
|
||||
} },
|
||||
}).toRef(),
|
||||
} },
|
||||
});
|
||||
try loop_cond_br.finish(l);
|
||||
_ = loop.block.stealCapacity(1);
|
||||
},
|
||||
else => unreachable,
|
||||
}
|
||||
|
||||
const is_end_val = loop.block.addBinOp(l, .cmp_eq, index_val, .fromValue(try pt.intValue(.usize, dest_ty.arrayLen(zcu) - 1))).toRef();
|
||||
|
||||
var condbr: CondBr = .init(l, is_end_val, &loop.block, .{});
|
||||
|
||||
condbr.then_block = .init(loop.block.stealRemainingCapacity());
|
||||
const result_val = condbr.then_block.addTyOp(l, .load, dest_ty, result_ptr).toRef();
|
||||
condbr.then_block.addBr(l, orig_inst, result_val);
|
||||
|
||||
condbr.else_block = .init(condbr.then_block.stealRemainingCapacity());
|
||||
const new_index_val = condbr.else_block.addBinOp(l, .add, index_val, .one_usize).toRef();
|
||||
_ = condbr.else_block.addBinOp(l, .store, index_ptr, new_index_val);
|
||||
_ = condbr.else_block.add(l, .{
|
||||
.tag = .repeat,
|
||||
.data = .{ .repeat = .{ .loop_inst = loop.inst } },
|
||||
});
|
||||
|
||||
try condbr.finish(l);
|
||||
try loop.finish(l);
|
||||
}
|
||||
|
||||
return .{ .ty_pl = .{
|
||||
.ty = Air.internedToRef(res_ty.toIntern()),
|
||||
.payload = try l.addBlockBody(res_block.body()),
|
||||
.ty = .fromType(dest_ty),
|
||||
.payload = try l.addBlockBody(main_block.body()),
|
||||
} };
|
||||
}
|
||||
fn scalarizeOverflowBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!Air.Inst.Data {
|
||||
@@ -1698,169 +1465,145 @@ fn scalarizeOverflowBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!
|
||||
const zcu = pt.zcu;
|
||||
|
||||
const orig = l.air_instructions.get(@intFromEnum(orig_inst));
|
||||
const res_ty = l.typeOfIndex(orig_inst);
|
||||
const wrapped_res_ty = res_ty.fieldType(0, zcu);
|
||||
const wrapped_res_scalar_ty = wrapped_res_ty.childType(zcu);
|
||||
const res_len = wrapped_res_ty.vectorLen(zcu);
|
||||
const orig_operands = l.extraData(Air.Bin, orig.data.ty_pl.payload).data;
|
||||
|
||||
const vec_tuple_ty = l.typeOfIndex(orig_inst);
|
||||
const vec_int_ty = vec_tuple_ty.fieldType(0, zcu);
|
||||
const vec_overflow_ty = vec_tuple_ty.fieldType(1, zcu);
|
||||
|
||||
assert(l.typeOf(orig_operands.lhs).toIntern() == vec_int_ty.toIntern());
|
||||
if (orig.tag != .shl_with_overflow) {
|
||||
assert(l.typeOf(orig_operands.rhs).toIntern() == vec_int_ty.toIntern());
|
||||
}
|
||||
|
||||
const scalar_int_ty = vec_int_ty.childType(zcu);
|
||||
const scalar_tuple_ty = try pt.overflowArithmeticTupleType(scalar_int_ty);
|
||||
|
||||
// %1 = block(struct { @Vector(N, Int), @Vector(N, u1) }, {
|
||||
// %2 = alloc(*usize)
|
||||
// %3 = alloc(*struct { @Vector(N, Int), @Vector(N, u1) })
|
||||
// %4 = struct_field_ptr_index_0(*@Vector(N, Int), %3)
|
||||
// %5 = struct_field_ptr_index_1(*@Vector(N, u1), %3)
|
||||
// %6 = store(%2, @zero_usize)
|
||||
// %7 = loop({
|
||||
// %8 = load(%2)
|
||||
// %9 = legalize_vec_elem_val(orig_lhs, %8)
|
||||
// %10 = legalize_vec_elem_val(orig_rhs, %8)
|
||||
// %11 = ???_with_overflow(struct { Int, u1 }, %9, %10)
|
||||
// %12 = struct_field_val(%11, 0)
|
||||
// %13 = struct_field_val(%11, 1)
|
||||
// %14 = legalize_vec_store_elem(%4, %8, %12)
|
||||
// %15 = legalize_vec_store_elem(%4, %8, %13)
|
||||
// %16 = cmp_eq(%8, <usize, N-1>)
|
||||
// %17 = cond_br(%16, {
|
||||
// %18 = load(%3)
|
||||
// %19 = br(%1, %18)
|
||||
// }, {
|
||||
// %20 = add(%8, @one_usize)
|
||||
// %21 = store(%2, %20)
|
||||
// %22 = repeat(%7)
|
||||
// })
|
||||
// })
|
||||
// })
|
||||
|
||||
const elems_len = vec_int_ty.vectorLen(zcu);
|
||||
|
||||
var inst_buf: [21]Air.Inst.Index = undefined;
|
||||
var main_block: Block = .init(&inst_buf);
|
||||
try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len);
|
||||
|
||||
var res_block: Block = .init(&inst_buf);
|
||||
{
|
||||
const res_alloc_inst = res_block.add(l, .{
|
||||
.tag = .alloc,
|
||||
.data = .{ .ty = try pt.singleMutPtrType(res_ty) },
|
||||
});
|
||||
const ptr_wrapped_res_inst = res_block.add(l, .{
|
||||
.tag = .struct_field_ptr_index_0,
|
||||
.data = .{ .ty_op = .{
|
||||
.ty = Air.internedToRef((try pt.singleMutPtrType(wrapped_res_ty)).toIntern()),
|
||||
.operand = res_alloc_inst.toRef(),
|
||||
} },
|
||||
});
|
||||
const ptr_overflow_res_inst = res_block.add(l, .{
|
||||
.tag = .struct_field_ptr_index_1,
|
||||
.data = .{ .ty_op = .{
|
||||
.ty = Air.internedToRef((try pt.singleMutPtrType(res_ty.fieldType(1, zcu))).toIntern()),
|
||||
.operand = res_alloc_inst.toRef(),
|
||||
} },
|
||||
});
|
||||
const index_alloc_inst = res_block.add(l, .{
|
||||
.tag = .alloc,
|
||||
.data = .{ .ty = .ptr_usize },
|
||||
});
|
||||
_ = res_block.add(l, .{
|
||||
.tag = .store,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = index_alloc_inst.toRef(),
|
||||
.rhs = .zero_usize,
|
||||
} },
|
||||
});
|
||||
const index_ptr = main_block.addTy(l, .alloc, .ptr_usize).toRef();
|
||||
const result_ptr = main_block.addTy(l, .alloc, try pt.singleMutPtrType(vec_tuple_ty)).toRef();
|
||||
const result_int_ptr = main_block.addTyOp(
|
||||
l,
|
||||
.struct_field_ptr_index_0,
|
||||
try pt.singleMutPtrType(vec_int_ty),
|
||||
result_ptr,
|
||||
).toRef();
|
||||
const result_overflow_ptr = main_block.addTyOp(
|
||||
l,
|
||||
.struct_field_ptr_index_1,
|
||||
try pt.singleMutPtrType(vec_overflow_ty),
|
||||
result_ptr,
|
||||
).toRef();
|
||||
|
||||
var loop: Loop = .init(l, &res_block);
|
||||
loop.block = .init(res_block.stealRemainingCapacity());
|
||||
{
|
||||
const cur_index_inst = loop.block.add(l, .{
|
||||
.tag = .load,
|
||||
.data = .{ .ty_op = .{
|
||||
.ty = .usize_type,
|
||||
.operand = index_alloc_inst.toRef(),
|
||||
} },
|
||||
});
|
||||
const extra = l.extraData(Air.Bin, orig.data.ty_pl.payload).data;
|
||||
const res_elem = loop.block.add(l, .{
|
||||
.tag = orig.tag,
|
||||
.data = .{ .ty_pl = .{
|
||||
.ty = Air.internedToRef(try zcu.intern_pool.getTupleType(zcu.gpa, pt.tid, .{
|
||||
.types = &.{ wrapped_res_scalar_ty.toIntern(), .u1_type },
|
||||
.values = &(.{.none} ** 2),
|
||||
})),
|
||||
.payload = try l.addExtra(Air.Bin, .{
|
||||
.lhs = loop.block.add(l, .{
|
||||
.tag = .array_elem_val,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = extra.lhs,
|
||||
.rhs = cur_index_inst.toRef(),
|
||||
} },
|
||||
}).toRef(),
|
||||
.rhs = loop.block.add(l, .{
|
||||
.tag = .array_elem_val,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = extra.rhs,
|
||||
.rhs = cur_index_inst.toRef(),
|
||||
} },
|
||||
}).toRef(),
|
||||
}),
|
||||
} },
|
||||
});
|
||||
_ = loop.block.add(l, .{
|
||||
.tag = .vector_store_elem,
|
||||
.data = .{ .vector_store_elem = .{
|
||||
.vector_ptr = ptr_overflow_res_inst.toRef(),
|
||||
.payload = try l.addExtra(Air.Bin, .{
|
||||
.lhs = cur_index_inst.toRef(),
|
||||
.rhs = loop.block.add(l, .{
|
||||
.tag = .struct_field_val,
|
||||
.data = .{ .ty_pl = .{
|
||||
.ty = .u1_type,
|
||||
.payload = try l.addExtra(Air.StructField, .{
|
||||
.struct_operand = res_elem.toRef(),
|
||||
.field_index = 1,
|
||||
}),
|
||||
} },
|
||||
}).toRef(),
|
||||
}),
|
||||
} },
|
||||
});
|
||||
_ = loop.block.add(l, .{
|
||||
.tag = .vector_store_elem,
|
||||
.data = .{ .vector_store_elem = .{
|
||||
.vector_ptr = ptr_wrapped_res_inst.toRef(),
|
||||
.payload = try l.addExtra(Air.Bin, .{
|
||||
.lhs = cur_index_inst.toRef(),
|
||||
.rhs = loop.block.add(l, .{
|
||||
.tag = .struct_field_val,
|
||||
.data = .{ .ty_pl = .{
|
||||
.ty = Air.internedToRef(wrapped_res_scalar_ty.toIntern()),
|
||||
.payload = try l.addExtra(Air.StructField, .{
|
||||
.struct_operand = res_elem.toRef(),
|
||||
.field_index = 0,
|
||||
}),
|
||||
} },
|
||||
}).toRef(),
|
||||
}),
|
||||
} },
|
||||
});
|
||||
_ = main_block.addBinOp(l, .store, index_ptr, .zero_usize);
|
||||
|
||||
var loop: Loop = .init(l, &main_block);
|
||||
loop.block = .init(main_block.stealRemainingCapacity());
|
||||
|
||||
const index_val = loop.block.addTyOp(l, .load, .usize, index_ptr).toRef();
|
||||
const lhs = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_operands.lhs, index_val).toRef();
|
||||
const rhs = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_operands.rhs, index_val).toRef();
|
||||
const elem_result = loop.block.add(l, .{
|
||||
.tag = orig.tag,
|
||||
.data = .{ .ty_pl = .{
|
||||
.ty = .fromType(scalar_tuple_ty),
|
||||
.payload = try l.addExtra(Air.Bin, .{ .lhs = lhs, .rhs = rhs }),
|
||||
} },
|
||||
}).toRef();
|
||||
const int_elem = loop.block.add(l, .{
|
||||
.tag = .struct_field_val,
|
||||
.data = .{ .ty_pl = .{
|
||||
.ty = .fromType(scalar_int_ty),
|
||||
.payload = try l.addExtra(Air.StructField, .{
|
||||
.struct_operand = elem_result,
|
||||
.field_index = 0,
|
||||
}),
|
||||
} },
|
||||
}).toRef();
|
||||
const overflow_elem = loop.block.add(l, .{
|
||||
.tag = .struct_field_val,
|
||||
.data = .{ .ty_pl = .{
|
||||
.ty = .u1_type,
|
||||
.payload = try l.addExtra(Air.StructField, .{
|
||||
.struct_operand = elem_result,
|
||||
.field_index = 1,
|
||||
}),
|
||||
} },
|
||||
}).toRef();
|
||||
_ = loop.block.add(l, .{
|
||||
.tag = .legalize_vec_store_elem,
|
||||
.data = .{ .pl_op = .{
|
||||
.operand = result_int_ptr,
|
||||
.payload = try l.addExtra(Air.Bin, .{
|
||||
.lhs = index_val,
|
||||
.rhs = int_elem,
|
||||
}),
|
||||
} },
|
||||
});
|
||||
_ = loop.block.add(l, .{
|
||||
.tag = .legalize_vec_store_elem,
|
||||
.data = .{ .pl_op = .{
|
||||
.operand = result_overflow_ptr,
|
||||
.payload = try l.addExtra(Air.Bin, .{
|
||||
.lhs = index_val,
|
||||
.rhs = overflow_elem,
|
||||
}),
|
||||
} },
|
||||
});
|
||||
|
||||
const is_end_val = loop.block.addBinOp(l, .cmp_eq, index_val, .fromValue(try pt.intValue(.usize, elems_len - 1))).toRef();
|
||||
var condbr: CondBr = .init(l, is_end_val, &loop.block, .{});
|
||||
|
||||
condbr.then_block = .init(loop.block.stealRemainingCapacity());
|
||||
const result_val = condbr.then_block.addTyOp(l, .load, vec_tuple_ty, result_ptr).toRef();
|
||||
condbr.then_block.addBr(l, orig_inst, result_val);
|
||||
|
||||
condbr.else_block = .init(condbr.then_block.stealRemainingCapacity());
|
||||
const new_index_val = condbr.else_block.addBinOp(l, .add, index_val, .one_usize).toRef();
|
||||
_ = condbr.else_block.addBinOp(l, .store, index_ptr, new_index_val);
|
||||
_ = condbr.else_block.add(l, .{
|
||||
.tag = .repeat,
|
||||
.data = .{ .repeat = .{ .loop_inst = loop.inst } },
|
||||
});
|
||||
|
||||
try condbr.finish(l);
|
||||
try loop.finish(l);
|
||||
|
||||
var loop_cond_br: CondBr = .init(l, (try loop.block.addCmp(
|
||||
l,
|
||||
.lt,
|
||||
cur_index_inst.toRef(),
|
||||
try pt.intRef(.usize, res_len - 1),
|
||||
.{},
|
||||
)).toRef(), &loop.block, .{});
|
||||
loop_cond_br.then_block = .init(loop.block.stealRemainingCapacity());
|
||||
{
|
||||
_ = loop_cond_br.then_block.add(l, .{
|
||||
.tag = .store,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = index_alloc_inst.toRef(),
|
||||
.rhs = loop_cond_br.then_block.add(l, .{
|
||||
.tag = .add,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = cur_index_inst.toRef(),
|
||||
.rhs = .one_usize,
|
||||
} },
|
||||
}).toRef(),
|
||||
} },
|
||||
});
|
||||
_ = loop_cond_br.then_block.add(l, .{
|
||||
.tag = .repeat,
|
||||
.data = .{ .repeat = .{ .loop_inst = loop.inst } },
|
||||
});
|
||||
}
|
||||
loop_cond_br.else_block = .init(loop_cond_br.then_block.stealRemainingCapacity());
|
||||
_ = loop_cond_br.else_block.add(l, .{
|
||||
.tag = .br,
|
||||
.data = .{ .br = .{
|
||||
.block_inst = orig_inst,
|
||||
.operand = loop_cond_br.else_block.add(l, .{
|
||||
.tag = .load,
|
||||
.data = .{ .ty_op = .{
|
||||
.ty = Air.internedToRef(res_ty.toIntern()),
|
||||
.operand = res_alloc_inst.toRef(),
|
||||
} },
|
||||
}).toRef(),
|
||||
} },
|
||||
});
|
||||
try loop_cond_br.finish(l);
|
||||
}
|
||||
try loop.finish(l);
|
||||
}
|
||||
return .{ .ty_pl = .{
|
||||
.ty = Air.internedToRef(res_ty.toIntern()),
|
||||
.payload = try l.addBlockBody(res_block.body()),
|
||||
.ty = .fromType(vec_tuple_ty),
|
||||
.payload = try l.addBlockBody(main_block.body()),
|
||||
} };
|
||||
}
|
||||
|
||||
@@ -2047,7 +1790,7 @@ fn safeIntFromFloatBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, optimiz
|
||||
|
||||
// We emit 9 instructions in the worst case.
|
||||
var inst_buf: [9]Air.Inst.Index = undefined;
|
||||
try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len);
|
||||
try l.air_instructions.ensureUnusedCapacity(gpa, inst_buf.len);
|
||||
var main_block: Block = .init(&inst_buf);
|
||||
|
||||
// This check is a bit annoying because of floating-point rounding and the fact that this
|
||||
@@ -2231,37 +1974,6 @@ fn safeArithmeticBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, overflow_
|
||||
} };
|
||||
}
|
||||
|
||||
fn expandBitcastBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!Air.Inst.Data {
|
||||
const pt = l.pt;
|
||||
const zcu = pt.zcu;
|
||||
const ip = &zcu.intern_pool;
|
||||
|
||||
const orig_ty_op = l.air_instructions.items(.data)[@intFromEnum(orig_inst)].ty_op;
|
||||
const res_ty = orig_ty_op.ty.toType();
|
||||
const res_ty_key = ip.indexToKey(res_ty.toIntern());
|
||||
const operand_ty = l.typeOf(orig_ty_op.operand);
|
||||
const operand_ty_key = ip.indexToKey(operand_ty.toIntern());
|
||||
_ = res_ty_key;
|
||||
_ = operand_ty_key;
|
||||
|
||||
var inst_buf: [1]Air.Inst.Index = undefined;
|
||||
try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len);
|
||||
|
||||
var res_block: Block = .init(&inst_buf);
|
||||
{
|
||||
_ = res_block.add(l, .{
|
||||
.tag = .br,
|
||||
.data = .{ .br = .{
|
||||
.block_inst = orig_inst,
|
||||
.operand = try pt.undefRef(res_ty),
|
||||
} },
|
||||
});
|
||||
}
|
||||
return .{ .ty_pl = .{
|
||||
.ty = Air.internedToRef(res_ty.toIntern()),
|
||||
.payload = try l.addBlockBody(res_block.body()),
|
||||
} };
|
||||
}
|
||||
fn packedLoadBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!Air.Inst.Data {
|
||||
const pt = l.pt;
|
||||
const zcu = pt.zcu;
|
||||
@@ -2431,89 +2143,73 @@ fn packedStructFieldValBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Err
|
||||
const field_ty = orig_ty_pl.ty.toType();
|
||||
const agg_ty = l.typeOf(orig_extra.struct_operand);
|
||||
|
||||
const agg_bits: u16 = @intCast(agg_ty.bitSize(zcu));
|
||||
const bit_offset = zcu.structPackedFieldBitOffset(zcu.typeToStruct(agg_ty).?, orig_extra.field_index);
|
||||
|
||||
const agg_int_ty = try pt.intType(.unsigned, agg_bits);
|
||||
const field_int_ty = try pt.intType(.unsigned, @intCast(field_ty.bitSize(zcu)));
|
||||
|
||||
const agg_shift_ty = try pt.intType(.unsigned, std.math.log2_int_ceil(u16, agg_bits));
|
||||
const bit_offset_ref: Air.Inst.Ref = .fromValue(try pt.intValue(agg_shift_ty, bit_offset));
|
||||
|
||||
var inst_buf: [5]Air.Inst.Index = undefined;
|
||||
var main_block: Block = .init(&inst_buf);
|
||||
try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len);
|
||||
|
||||
var res_block: Block = .init(&inst_buf);
|
||||
{
|
||||
const agg_alloc_inst = res_block.add(l, .{
|
||||
.tag = .alloc,
|
||||
.data = .{ .ty = try pt.singleMutPtrType(agg_ty) },
|
||||
});
|
||||
_ = res_block.add(l, .{
|
||||
.tag = .store,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = agg_alloc_inst.toRef(),
|
||||
.rhs = orig_extra.struct_operand,
|
||||
} },
|
||||
});
|
||||
_ = res_block.add(l, .{
|
||||
.tag = .br,
|
||||
.data = .{ .br = .{
|
||||
.block_inst = orig_inst,
|
||||
.operand = res_block.add(l, .{
|
||||
.tag = .load,
|
||||
.data = .{ .ty_op = .{
|
||||
.ty = Air.internedToRef(field_ty.toIntern()),
|
||||
.operand = (try res_block.addStructFieldPtr(l, agg_alloc_inst.toRef(), orig_extra.field_index)).toRef(),
|
||||
} },
|
||||
}).toRef(),
|
||||
} },
|
||||
});
|
||||
}
|
||||
const agg_int = main_block.addBitCast(l, agg_int_ty, orig_extra.struct_operand);
|
||||
const shifted_agg_int = main_block.addBinOp(l, .shr, agg_int, bit_offset_ref).toRef();
|
||||
const field_int = main_block.addTyOp(l, .trunc, field_int_ty, shifted_agg_int).toRef();
|
||||
const field_val = main_block.addBitCast(l, field_ty, field_int);
|
||||
main_block.addBr(l, orig_inst, field_val);
|
||||
|
||||
return .{ .ty_pl = .{
|
||||
.ty = Air.internedToRef(field_ty.toIntern()),
|
||||
.payload = try l.addBlockBody(res_block.body()),
|
||||
.ty = .fromType(field_ty),
|
||||
.payload = try l.addBlockBody(main_block.body()),
|
||||
} };
|
||||
}
|
||||
fn packedAggregateInitBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!Air.Inst.Data {
|
||||
const pt = l.pt;
|
||||
const zcu = pt.zcu;
|
||||
const gpa = zcu.gpa;
|
||||
|
||||
const orig_ty_pl = l.air_instructions.items(.data)[@intFromEnum(orig_inst)].ty_pl;
|
||||
const field_ty = orig_ty_pl.ty.toType();
|
||||
const agg_ty = orig_ty_pl.ty.toType();
|
||||
const agg_field_count = agg_ty.structFieldCount(zcu);
|
||||
|
||||
const ExpectedContents = [1 + 2 * 32 + 2]Air.Inst.Index;
|
||||
var stack align(@max(@alignOf(ExpectedContents), @alignOf(std.heap.StackFallbackAllocator(0)))) =
|
||||
std.heap.stackFallback(@sizeOf(ExpectedContents), zcu.gpa);
|
||||
const gpa = stack.get();
|
||||
var sfba_state = std.heap.stackFallback(@sizeOf([4 * 32 + 2]Air.Inst.Index), gpa);
|
||||
const sfba = sfba_state.get();
|
||||
|
||||
const inst_buf = try gpa.alloc(Air.Inst.Index, 1 + 2 * agg_field_count + 2);
|
||||
defer gpa.free(inst_buf);
|
||||
try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len);
|
||||
const inst_buf = try sfba.alloc(Air.Inst.Index, 4 * agg_field_count + 2);
|
||||
defer sfba.free(inst_buf);
|
||||
|
||||
var res_block: Block = .init(inst_buf);
|
||||
{
|
||||
const agg_alloc_inst = res_block.add(l, .{
|
||||
.tag = .alloc,
|
||||
.data = .{ .ty = try pt.singleMutPtrType(agg_ty) },
|
||||
});
|
||||
for (0..agg_field_count, orig_ty_pl.payload..) |field_index, extra_index| _ = res_block.add(l, .{
|
||||
.tag = .store,
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = (try res_block.addStructFieldPtr(l, agg_alloc_inst.toRef(), field_index)).toRef(),
|
||||
.rhs = @enumFromInt(l.air_extra.items[extra_index]),
|
||||
} },
|
||||
});
|
||||
_ = res_block.add(l, .{
|
||||
.tag = .br,
|
||||
.data = .{ .br = .{
|
||||
.block_inst = orig_inst,
|
||||
.operand = res_block.add(l, .{
|
||||
.tag = .load,
|
||||
.data = .{ .ty_op = .{
|
||||
.ty = Air.internedToRef(field_ty.toIntern()),
|
||||
.operand = agg_alloc_inst.toRef(),
|
||||
} },
|
||||
}).toRef(),
|
||||
} },
|
||||
});
|
||||
var main_block: Block = .init(inst_buf);
|
||||
try l.air_instructions.ensureUnusedCapacity(gpa, inst_buf.len);
|
||||
|
||||
const num_bits: u16 = @intCast(agg_ty.bitSize(zcu));
|
||||
const shift_ty = try pt.intType(.unsigned, std.math.log2_int_ceil(u16, num_bits));
|
||||
const uint_ty = try pt.intType(.unsigned, num_bits);
|
||||
var cur_uint: Air.Inst.Ref = .fromValue(try pt.intValue(uint_ty, 0));
|
||||
|
||||
var field_idx = agg_field_count;
|
||||
while (field_idx > 0) {
|
||||
field_idx -= 1;
|
||||
const field_ty = agg_ty.fieldType(field_idx, zcu);
|
||||
const field_uint_ty = try pt.intType(.unsigned, @intCast(field_ty.bitSize(zcu)));
|
||||
const field_bit_size_ref: Air.Inst.Ref = .fromValue(try pt.intValue(shift_ty, field_ty.bitSize(zcu)));
|
||||
const field_val: Air.Inst.Ref = @enumFromInt(l.air_extra.items[orig_ty_pl.payload + field_idx]);
|
||||
|
||||
const shifted = main_block.addBinOp(l, .shl_exact, cur_uint, field_bit_size_ref).toRef();
|
||||
const field_as_uint = main_block.addBitCast(l, field_uint_ty, field_val);
|
||||
const field_extended = main_block.addTyOp(l, .intcast, uint_ty, field_as_uint).toRef();
|
||||
cur_uint = main_block.addBinOp(l, .bit_or, shifted, field_extended).toRef();
|
||||
}
|
||||
|
||||
const result = main_block.addBitCast(l, agg_ty, cur_uint);
|
||||
main_block.addBr(l, orig_inst, result);
|
||||
|
||||
return .{ .ty_pl = .{
|
||||
.ty = Air.internedToRef(field_ty.toIntern()),
|
||||
.payload = try l.addBlockBody(res_block.body()),
|
||||
.ty = .fromType(agg_ty),
|
||||
.payload = try l.addBlockBody(main_block.body()),
|
||||
} };
|
||||
}
|
||||
|
||||
@@ -2571,6 +2267,36 @@ const Block = struct {
|
||||
b.len += 1;
|
||||
return inst;
|
||||
}
|
||||
fn addBr(b: *Block, l: *Legalize, target: Air.Inst.Index, operand: Air.Inst.Ref) void {
|
||||
_ = b.add(l, .{
|
||||
.tag = .br,
|
||||
.data = .{ .br = .{ .block_inst = target, .operand = operand } },
|
||||
});
|
||||
}
|
||||
fn addTy(b: *Block, l: *Legalize, tag: Air.Inst.Tag, ty: Type) Air.Inst.Index {
|
||||
return b.add(l, .{ .tag = tag, .data = .{ .ty = ty } });
|
||||
}
|
||||
fn addBinOp(b: *Block, l: *Legalize, tag: Air.Inst.Tag, lhs: Air.Inst.Ref, rhs: Air.Inst.Ref) Air.Inst.Index {
|
||||
return b.add(l, .{
|
||||
.tag = tag,
|
||||
.data = .{ .bin_op = .{ .lhs = lhs, .rhs = rhs } },
|
||||
});
|
||||
}
|
||||
fn addUnOp(b: *Block, l: *Legalize, tag: Air.Inst.Tag, operand: Air.Inst.Ref) Air.Inst.Index {
|
||||
return b.add(l, .{
|
||||
.tag = tag,
|
||||
.data = .{ .un_op = operand },
|
||||
});
|
||||
}
|
||||
fn addTyOp(b: *Block, l: *Legalize, tag: Air.Inst.Tag, ty: Type, operand: Air.Inst.Ref) Air.Inst.Index {
|
||||
return b.add(l, .{
|
||||
.tag = tag,
|
||||
.data = .{ .ty_op = .{
|
||||
.ty = .fromType(ty),
|
||||
.operand = operand,
|
||||
} },
|
||||
});
|
||||
}
|
||||
|
||||
/// Adds the code to call the panic handler `panic_id`. This is usually `.call` then `.unreach`,
|
||||
/// but if `Zcu.Feature.panic_fn` is unsupported, we lower to `.trap` instead.
|
||||
@@ -2625,14 +2351,27 @@ const Block = struct {
|
||||
} },
|
||||
});
|
||||
}
|
||||
return addCmpScalar(b, l, op, lhs, rhs, opts.optimized);
|
||||
}
|
||||
|
||||
/// Similar to `addCmp`, but for scalars only. Unlike `addCmp`, this function is
|
||||
/// infallible, because it doesn't need to add entries to `extra`.
|
||||
fn addCmpScalar(
|
||||
b: *Block,
|
||||
l: *Legalize,
|
||||
op: std.math.CompareOperator,
|
||||
lhs: Air.Inst.Ref,
|
||||
rhs: Air.Inst.Ref,
|
||||
optimized: bool,
|
||||
) Air.Inst.Index {
|
||||
return b.add(l, .{
|
||||
.tag = switch (op) {
|
||||
.lt => if (opts.optimized) .cmp_lt_optimized else .cmp_lt,
|
||||
.lte => if (opts.optimized) .cmp_lte_optimized else .cmp_lte,
|
||||
.eq => if (opts.optimized) .cmp_eq_optimized else .cmp_eq,
|
||||
.gte => if (opts.optimized) .cmp_gte_optimized else .cmp_gte,
|
||||
.gt => if (opts.optimized) .cmp_gt_optimized else .cmp_gt,
|
||||
.neq => if (opts.optimized) .cmp_neq_optimized else .cmp_neq,
|
||||
.lt => if (optimized) .cmp_lt_optimized else .cmp_lt,
|
||||
.lte => if (optimized) .cmp_lte_optimized else .cmp_lte,
|
||||
.eq => if (optimized) .cmp_eq_optimized else .cmp_eq,
|
||||
.gte => if (optimized) .cmp_gte_optimized else .cmp_gte,
|
||||
.gt => if (optimized) .cmp_gt_optimized else .cmp_gt,
|
||||
.neq => if (optimized) .cmp_neq_optimized else .cmp_neq,
|
||||
},
|
||||
.data = .{ .bin_op = .{
|
||||
.lhs = lhs,
|
||||
@@ -2641,93 +2380,6 @@ const Block = struct {
|
||||
});
|
||||
}
|
||||
|
||||
/// Adds a `struct_field_ptr*` instruction to `b`. This is a fairly thin wrapper around `add`
|
||||
/// that selects the optimized instruction encoding to use, although it does compute the
|
||||
/// proper field pointer type.
|
||||
fn addStructFieldPtr(
|
||||
b: *Block,
|
||||
l: *Legalize,
|
||||
struct_operand: Air.Inst.Ref,
|
||||
field_index: usize,
|
||||
) Error!Air.Inst.Index {
|
||||
const pt = l.pt;
|
||||
const zcu = pt.zcu;
|
||||
|
||||
const agg_ptr_ty = l.typeOf(struct_operand);
|
||||
const agg_ptr_info = agg_ptr_ty.ptrInfo(zcu);
|
||||
const agg_ty: Type = .fromInterned(agg_ptr_info.child);
|
||||
const agg_ptr_align = switch (agg_ptr_info.flags.alignment) {
|
||||
.none => agg_ty.abiAlignment(zcu),
|
||||
else => |agg_ptr_align| agg_ptr_align,
|
||||
};
|
||||
const agg_layout = agg_ty.containerLayout(zcu);
|
||||
const field_ty = agg_ty.fieldType(field_index, zcu);
|
||||
var field_ptr_info: InternPool.Key.PtrType = .{
|
||||
.child = field_ty.toIntern(),
|
||||
.flags = .{
|
||||
.is_const = agg_ptr_info.flags.is_const,
|
||||
.is_volatile = agg_ptr_info.flags.is_volatile,
|
||||
.address_space = agg_ptr_info.flags.address_space,
|
||||
},
|
||||
};
|
||||
field_ptr_info.flags.alignment = field_ptr_align: switch (agg_layout) {
|
||||
.auto => agg_ty.fieldAlignment(field_index, zcu).min(agg_ptr_align),
|
||||
.@"extern" => switch (agg_ty.zigTypeTag(zcu)) {
|
||||
else => unreachable,
|
||||
.@"struct" => .fromLog2Units(@min(
|
||||
agg_ptr_align.toLog2Units(),
|
||||
@ctz(agg_ty.structFieldOffset(field_index, zcu)),
|
||||
)),
|
||||
.@"union" => agg_ptr_align,
|
||||
},
|
||||
.@"packed" => switch (agg_ty.zigTypeTag(zcu)) {
|
||||
else => unreachable,
|
||||
.@"struct" => {
|
||||
const packed_offset = agg_ty.packedStructFieldPtrInfo(agg_ptr_ty, @intCast(field_index), pt);
|
||||
field_ptr_info.packed_offset = packed_offset;
|
||||
break :field_ptr_align agg_ptr_align;
|
||||
},
|
||||
.@"union" => {
|
||||
field_ptr_info.packed_offset = .{
|
||||
.host_size = switch (agg_ptr_info.packed_offset.host_size) {
|
||||
0 => @intCast(agg_ty.abiSize(zcu)),
|
||||
else => |host_size| host_size,
|
||||
},
|
||||
.bit_offset = agg_ptr_info.packed_offset.bit_offset,
|
||||
};
|
||||
break :field_ptr_align agg_ptr_align;
|
||||
},
|
||||
},
|
||||
};
|
||||
const field_ptr_ty = try pt.ptrType(field_ptr_info);
|
||||
const field_ptr_ty_ref = Air.internedToRef(field_ptr_ty.toIntern());
|
||||
return switch (field_index) {
|
||||
inline 0...3 => |ct_field_index| b.add(l, .{
|
||||
.tag = switch (ct_field_index) {
|
||||
0 => .struct_field_ptr_index_0,
|
||||
1 => .struct_field_ptr_index_1,
|
||||
2 => .struct_field_ptr_index_2,
|
||||
3 => .struct_field_ptr_index_3,
|
||||
else => comptime unreachable,
|
||||
},
|
||||
.data = .{ .ty_op = .{
|
||||
.ty = field_ptr_ty_ref,
|
||||
.operand = struct_operand,
|
||||
} },
|
||||
}),
|
||||
else => b.add(l, .{
|
||||
.tag = .struct_field_ptr,
|
||||
.data = .{ .ty_pl = .{
|
||||
.ty = field_ptr_ty_ref,
|
||||
.payload = try l.addExtra(Air.StructField, .{
|
||||
.struct_operand = struct_operand,
|
||||
.field_index = @intCast(field_index),
|
||||
}),
|
||||
} },
|
||||
}),
|
||||
};
|
||||
}
|
||||
|
||||
/// Adds a `bitcast` instruction to `b`. This is a thin wrapper that omits the instruction for
|
||||
/// no-op casts.
|
||||
fn addBitCast(
|
||||
@@ -2774,31 +2426,6 @@ const Block = struct {
|
||||
}
|
||||
};
|
||||
|
||||
const Result = struct {
|
||||
inst: Air.Inst.Index,
|
||||
block: Block,
|
||||
|
||||
/// The return value has `block` initialized to `undefined`; it is the caller's reponsibility
|
||||
/// to initialize it.
|
||||
fn init(l: *Legalize, ty: Type, parent_block: *Block) Result {
|
||||
return .{
|
||||
.inst = parent_block.add(l, .{
|
||||
.tag = .block,
|
||||
.data = .{ .ty_pl = .{
|
||||
.ty = Air.internedToRef(ty.toIntern()),
|
||||
.payload = undefined,
|
||||
} },
|
||||
}),
|
||||
.block = undefined,
|
||||
};
|
||||
}
|
||||
|
||||
fn finish(res: Result, l: *Legalize) Error!void {
|
||||
const data = &l.air_instructions.items(.data)[@intFromEnum(res.inst)];
|
||||
data.ty_pl.payload = try l.addBlockBody(res.block.body());
|
||||
}
|
||||
};
|
||||
|
||||
const Loop = struct {
|
||||
inst: Air.Inst.Index,
|
||||
block: Block,
|
||||
|
||||
@@ -458,17 +458,12 @@ fn analyzeInst(
|
||||
.memset_safe,
|
||||
.memcpy,
|
||||
.memmove,
|
||||
.legalize_vec_elem_val,
|
||||
=> {
|
||||
const o = inst_datas[@intFromEnum(inst)].bin_op;
|
||||
return analyzeOperands(a, pass, data, inst, .{ o.lhs, o.rhs, .none });
|
||||
},
|
||||
|
||||
.vector_store_elem => {
|
||||
const o = inst_datas[@intFromEnum(inst)].vector_store_elem;
|
||||
const extra = a.air.extraData(Air.Bin, o.payload).data;
|
||||
return analyzeOperands(a, pass, data, inst, .{ o.vector_ptr, extra.lhs, extra.rhs });
|
||||
},
|
||||
|
||||
.arg,
|
||||
.alloc,
|
||||
.ret_ptr,
|
||||
@@ -775,6 +770,12 @@ fn analyzeInst(
|
||||
const pl_op = inst_datas[@intFromEnum(inst)].pl_op;
|
||||
return analyzeOperands(a, pass, data, inst, .{ pl_op.operand, .none, .none });
|
||||
},
|
||||
|
||||
.legalize_vec_store_elem => {
|
||||
const pl_op = inst_datas[@intFromEnum(inst)].pl_op;
|
||||
const bin = a.air.extraData(Air.Bin, pl_op.payload).data;
|
||||
return analyzeOperands(a, pass, data, inst, .{ pl_op.operand, bin.lhs, bin.rhs });
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -272,6 +272,7 @@ fn verifyBody(self: *Verify, body: []const Air.Inst.Index) Error!void {
|
||||
.memset_safe,
|
||||
.memcpy,
|
||||
.memmove,
|
||||
.legalize_vec_elem_val,
|
||||
=> {
|
||||
const bin_op = data[@intFromEnum(inst)].bin_op;
|
||||
try self.verifyInstOperands(inst, .{ bin_op.lhs, bin_op.rhs, .none });
|
||||
@@ -322,11 +323,6 @@ fn verifyBody(self: *Verify, body: []const Air.Inst.Index) Error!void {
|
||||
const extra = self.air.extraData(Air.Bin, pl_op.payload).data;
|
||||
try self.verifyInstOperands(inst, .{ extra.lhs, extra.rhs, pl_op.operand });
|
||||
},
|
||||
.vector_store_elem => {
|
||||
const vector_store_elem = data[@intFromEnum(inst)].vector_store_elem;
|
||||
const extra = self.air.extraData(Air.Bin, vector_store_elem.payload).data;
|
||||
try self.verifyInstOperands(inst, .{ vector_store_elem.vector_ptr, extra.lhs, extra.rhs });
|
||||
},
|
||||
.cmpxchg_strong,
|
||||
.cmpxchg_weak,
|
||||
=> {
|
||||
@@ -582,6 +578,11 @@ fn verifyBody(self: *Verify, body: []const Air.Inst.Index) Error!void {
|
||||
|
||||
try self.verifyInst(inst);
|
||||
},
|
||||
.legalize_vec_store_elem => {
|
||||
const pl_op = data[@intFromEnum(inst)].pl_op;
|
||||
const bin = self.air.extraData(Air.Bin, pl_op.payload).data;
|
||||
try self.verifyInstOperands(inst, .{ pl_op.operand, bin.lhs, bin.rhs });
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
+14
-12
@@ -171,6 +171,7 @@ const Writer = struct {
|
||||
.memmove,
|
||||
.memset,
|
||||
.memset_safe,
|
||||
.legalize_vec_elem_val,
|
||||
=> try w.writeBinOp(s, inst),
|
||||
|
||||
.is_null,
|
||||
@@ -330,8 +331,8 @@ const Writer = struct {
|
||||
.shuffle_two => try w.writeShuffleTwo(s, inst),
|
||||
.reduce, .reduce_optimized => try w.writeReduce(s, inst),
|
||||
.cmp_vector, .cmp_vector_optimized => try w.writeCmpVector(s, inst),
|
||||
.vector_store_elem => try w.writeVectorStoreElem(s, inst),
|
||||
.runtime_nav_ptr => try w.writeRuntimeNavPtr(s, inst),
|
||||
.legalize_vec_store_elem => try w.writeLegalizeVecStoreElem(s, inst),
|
||||
|
||||
.work_item_id,
|
||||
.work_group_size,
|
||||
@@ -509,6 +510,18 @@ const Writer = struct {
|
||||
try w.writeOperand(s, inst, 2, pl_op.operand);
|
||||
}
|
||||
|
||||
fn writeLegalizeVecStoreElem(w: *Writer, s: *std.Io.Writer, inst: Air.Inst.Index) Error!void {
|
||||
const pl_op = w.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
|
||||
const bin = w.air.extraData(Air.Bin, pl_op.payload).data;
|
||||
|
||||
try w.writeOperand(s, inst, 0, pl_op.operand);
|
||||
try s.writeAll(", ");
|
||||
try w.writeOperand(s, inst, 1, bin.lhs);
|
||||
try s.writeAll(", ");
|
||||
try w.writeOperand(s, inst, 2, bin.rhs);
|
||||
try s.writeAll(", ");
|
||||
}
|
||||
|
||||
fn writeShuffleOne(w: *Writer, s: *std.Io.Writer, inst: Air.Inst.Index) Error!void {
|
||||
const unwrapped = w.air.unwrapShuffleOne(w.pt.zcu, inst);
|
||||
try w.writeType(s, unwrapped.result_ty);
|
||||
@@ -576,17 +589,6 @@ const Writer = struct {
|
||||
try w.writeOperand(s, inst, 1, extra.rhs);
|
||||
}
|
||||
|
||||
fn writeVectorStoreElem(w: *Writer, s: *std.Io.Writer, inst: Air.Inst.Index) Error!void {
|
||||
const data = w.air.instructions.items(.data)[@intFromEnum(inst)].vector_store_elem;
|
||||
const extra = w.air.extraData(Air.VectorCmp, data.payload).data;
|
||||
|
||||
try w.writeOperand(s, inst, 0, data.vector_ptr);
|
||||
try s.writeAll(", ");
|
||||
try w.writeOperand(s, inst, 1, extra.lhs);
|
||||
try s.writeAll(", ");
|
||||
try w.writeOperand(s, inst, 2, extra.rhs);
|
||||
}
|
||||
|
||||
fn writeRuntimeNavPtr(w: *Writer, s: *std.Io.Writer, inst: Air.Inst.Index) Error!void {
|
||||
const ip = &w.pt.zcu.intern_pool;
|
||||
const ty_nav = w.air.instructions.items(.data)[@intFromEnum(inst)].ty_nav;
|
||||
|
||||
@@ -88,6 +88,7 @@ fn checkBody(air: Air, body: []const Air.Inst.Index, zcu: *Zcu) bool {
|
||||
.atomic_store_monotonic,
|
||||
.atomic_store_release,
|
||||
.atomic_store_seq_cst,
|
||||
.legalize_vec_elem_val,
|
||||
=> {
|
||||
if (!checkRef(data.bin_op.lhs, zcu)) return false;
|
||||
if (!checkRef(data.bin_op.rhs, zcu)) return false;
|
||||
@@ -316,19 +317,13 @@ fn checkBody(air: Air, body: []const Air.Inst.Index, zcu: *Zcu) bool {
|
||||
if (!checkRef(data.prefetch.ptr, zcu)) return false;
|
||||
},
|
||||
|
||||
.vector_store_elem => {
|
||||
const bin = air.extraData(Air.Bin, data.vector_store_elem.payload).data;
|
||||
if (!checkRef(data.vector_store_elem.vector_ptr, zcu)) return false;
|
||||
if (!checkRef(bin.lhs, zcu)) return false;
|
||||
if (!checkRef(bin.rhs, zcu)) return false;
|
||||
},
|
||||
|
||||
.runtime_nav_ptr => {
|
||||
if (!checkType(.fromInterned(data.ty_nav.ty), zcu)) return false;
|
||||
},
|
||||
|
||||
.select,
|
||||
.mul_add,
|
||||
.legalize_vec_store_elem,
|
||||
=> {
|
||||
const bin = air.extraData(Air.Bin, data.pl_op.payload).data;
|
||||
if (!checkRef(data.pl_op.operand, zcu)) return false;
|
||||
|
||||
+2
-5
@@ -2104,7 +2104,6 @@ pub const Key = union(enum) {
|
||||
|
||||
pub const VectorIndex = enum(u16) {
|
||||
none = std.math.maxInt(u16),
|
||||
runtime = std.math.maxInt(u16) - 1,
|
||||
_,
|
||||
};
|
||||
|
||||
@@ -3739,10 +3738,8 @@ pub const LoadedStructType = struct {
|
||||
return s.field_inits.get(ip)[i];
|
||||
}
|
||||
|
||||
/// Returns `none` in the case the struct is a tuple.
|
||||
pub fn fieldName(s: LoadedStructType, ip: *const InternPool, i: usize) OptionalNullTerminatedString {
|
||||
if (s.field_names.len == 0) return .none;
|
||||
return s.field_names.get(ip)[i].toOptional();
|
||||
pub fn fieldName(s: LoadedStructType, ip: *const InternPool, i: usize) NullTerminatedString {
|
||||
return s.field_names.get(ip)[i];
|
||||
}
|
||||
|
||||
pub fn fieldIsComptime(s: LoadedStructType, ip: *const InternPool, i: usize) bool {
|
||||
|
||||
+32
-95
@@ -15919,24 +15919,30 @@ fn zirOverflowArithmetic(
|
||||
},
|
||||
.mul_with_overflow => {
|
||||
// If either of the arguments is zero, the result is zero and no overflow occured.
|
||||
// If either of the arguments is one, the result is the other and no overflow occured.
|
||||
// Otherwise, if either of the arguments is undefined, both results are undefined.
|
||||
const scalar_one = try pt.intValue(dest_ty.scalarType(zcu), 1);
|
||||
if (maybe_lhs_val) |lhs_val| {
|
||||
if (!lhs_val.isUndef(zcu)) {
|
||||
if (try lhs_val.compareAllWithZeroSema(.eq, pt)) {
|
||||
break :result .{ .overflow_bit = try sema.splat(overflow_ty, .zero_u1), .inst = lhs };
|
||||
} else if (try sema.compareAll(lhs_val, .eq, try sema.splat(dest_ty, scalar_one), dest_ty)) {
|
||||
if (!lhs_val.isUndef(zcu) and try lhs_val.compareAllWithZeroSema(.eq, pt)) {
|
||||
break :result .{ .overflow_bit = try sema.splat(overflow_ty, .zero_u1), .inst = lhs };
|
||||
}
|
||||
}
|
||||
if (maybe_rhs_val) |rhs_val| {
|
||||
if (!rhs_val.isUndef(zcu) and try rhs_val.compareAllWithZeroSema(.eq, pt)) {
|
||||
break :result .{ .overflow_bit = try sema.splat(overflow_ty, .zero_u1), .inst = rhs };
|
||||
}
|
||||
}
|
||||
// If either of the arguments is one, the result is the other and no overflow occured.
|
||||
const dest_scalar_ty = dest_ty.scalarType(zcu);
|
||||
const dest_scalar_int = dest_scalar_ty.intInfo(zcu);
|
||||
// We could still be working with i1, where '1' is not a legal value!
|
||||
if (!(dest_scalar_int.bits == 1 and dest_scalar_int.signedness == .signed)) {
|
||||
const scalar_one = try pt.intValue(dest_scalar_ty, 1);
|
||||
const vec_one = try sema.splat(dest_ty, scalar_one);
|
||||
if (maybe_lhs_val) |lhs_val| {
|
||||
if (!lhs_val.isUndef(zcu) and try sema.compareAll(lhs_val, .eq, vec_one, dest_ty)) {
|
||||
break :result .{ .overflow_bit = try sema.splat(overflow_ty, .zero_u1), .inst = rhs };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (maybe_rhs_val) |rhs_val| {
|
||||
if (!rhs_val.isUndef(zcu)) {
|
||||
if (try rhs_val.compareAllWithZeroSema(.eq, pt)) {
|
||||
break :result .{ .overflow_bit = try sema.splat(overflow_ty, .zero_u1), .inst = rhs };
|
||||
} else if (try sema.compareAll(rhs_val, .eq, try sema.splat(dest_ty, scalar_one), dest_ty)) {
|
||||
if (maybe_rhs_val) |rhs_val| {
|
||||
if (!rhs_val.isUndef(zcu) and try sema.compareAll(rhs_val, .eq, vec_one, dest_ty)) {
|
||||
break :result .{ .overflow_bit = try sema.splat(overflow_ty, .zero_u1), .inst = lhs };
|
||||
}
|
||||
}
|
||||
@@ -15947,7 +15953,6 @@ fn zirOverflowArithmetic(
|
||||
if (lhs_val.isUndef(zcu) or rhs_val.isUndef(zcu)) {
|
||||
break :result .{ .overflow_bit = .undef, .wrapped = .undef };
|
||||
}
|
||||
|
||||
const result = try arith.mulWithOverflow(sema, dest_ty, lhs_val, rhs_val);
|
||||
break :result .{ .overflow_bit = result.overflow_bit, .wrapped = result.wrapped_result };
|
||||
}
|
||||
@@ -17751,10 +17756,7 @@ fn zirTypeInfo(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Ai
|
||||
try ty.resolveStructFieldInits(pt);
|
||||
|
||||
for (struct_field_vals, 0..) |*field_val, field_index| {
|
||||
const field_name = if (struct_type.fieldName(ip, field_index).unwrap()) |field_name|
|
||||
field_name
|
||||
else
|
||||
try ip.getOrPutStringFmt(gpa, pt.tid, "{d}", .{field_index}, .no_embedded_nulls);
|
||||
const field_name = struct_type.fieldName(ip, field_index);
|
||||
const field_name_len = field_name.length(ip);
|
||||
const field_ty: Type = .fromInterned(struct_type.field_types.get(ip)[field_index]);
|
||||
const field_init = struct_type.fieldInit(ip, field_index);
|
||||
@@ -28345,6 +28347,10 @@ fn elemPtrArray(
|
||||
break :o index;
|
||||
} else null;
|
||||
|
||||
if (offset == null and array_ty.zigTypeTag(zcu) == .vector) {
|
||||
return sema.fail(block, elem_index_src, "vector index not comptime known", .{});
|
||||
}
|
||||
|
||||
const elem_ptr_ty = try array_ptr_ty.elemPtrType(offset, pt);
|
||||
|
||||
if (maybe_undef_array_ptr_val) |array_ptr_val| {
|
||||
@@ -28362,10 +28368,6 @@ fn elemPtrArray(
|
||||
try sema.validateRuntimeValue(block, array_ptr_src, array_ptr);
|
||||
}
|
||||
|
||||
if (offset == null and array_ty.zigTypeTag(zcu) == .vector) {
|
||||
return sema.fail(block, elem_index_src, "vector index not comptime known", .{});
|
||||
}
|
||||
|
||||
// Runtime check is only needed if unable to comptime check.
|
||||
if (oob_safety and block.wantSafety() and offset == null) {
|
||||
const len_inst = try pt.intRef(.usize, array_len);
|
||||
@@ -30397,22 +30399,6 @@ fn storePtr2(
|
||||
|
||||
const is_ret = air_tag == .ret_ptr;
|
||||
|
||||
// Detect if we are storing an array operand to a bitcasted vector pointer.
|
||||
// If so, we instead reach through the bitcasted pointer to the vector pointer,
|
||||
// bitcast the array operand to a vector, and then lower this as a store of
|
||||
// a vector value to a vector pointer. This generally results in better code,
|
||||
// as well as working around an LLVM bug:
|
||||
// https://github.com/ziglang/zig/issues/11154
|
||||
if (sema.obtainBitCastedVectorPtr(ptr)) |vector_ptr| {
|
||||
const vector_ty = sema.typeOf(vector_ptr).childType(zcu);
|
||||
const vector = sema.coerceExtra(block, vector_ty, uncasted_operand, operand_src, .{ .is_ret = is_ret }) catch |err| switch (err) {
|
||||
error.NotCoercible => unreachable,
|
||||
else => |e| return e,
|
||||
};
|
||||
try sema.storePtr2(block, src, vector_ptr, ptr_src, vector, operand_src, .store);
|
||||
return;
|
||||
}
|
||||
|
||||
const operand = sema.coerceExtra(block, elem_ty, uncasted_operand, operand_src, .{ .is_ret = is_ret }) catch |err| switch (err) {
|
||||
error.NotCoercible => unreachable,
|
||||
else => |e| return e,
|
||||
@@ -30445,29 +30431,6 @@ fn storePtr2(
|
||||
|
||||
try sema.requireRuntimeBlock(block, src, runtime_src);
|
||||
|
||||
if (ptr_ty.ptrInfo(zcu).flags.vector_index == .runtime) {
|
||||
const ptr_inst = ptr.toIndex().?;
|
||||
const air_tags = sema.air_instructions.items(.tag);
|
||||
if (air_tags[@intFromEnum(ptr_inst)] == .ptr_elem_ptr) {
|
||||
const ty_pl = sema.air_instructions.items(.data)[@intFromEnum(ptr_inst)].ty_pl;
|
||||
const bin_op = sema.getTmpAir().extraData(Air.Bin, ty_pl.payload).data;
|
||||
_ = try block.addInst(.{
|
||||
.tag = .vector_store_elem,
|
||||
.data = .{ .vector_store_elem = .{
|
||||
.vector_ptr = bin_op.lhs,
|
||||
.payload = try block.sema.addExtra(Air.Bin{
|
||||
.lhs = bin_op.rhs,
|
||||
.rhs = operand,
|
||||
}),
|
||||
} },
|
||||
});
|
||||
return;
|
||||
}
|
||||
return sema.fail(block, ptr_src, "unable to determine vector element index of type '{f}'", .{
|
||||
ptr_ty.fmt(pt),
|
||||
});
|
||||
}
|
||||
|
||||
const store_inst = if (is_ret)
|
||||
try block.addBinOp(.store, ptr, operand)
|
||||
else
|
||||
@@ -30567,37 +30530,6 @@ fn markMaybeComptimeAllocRuntime(sema: *Sema, block: *Block, alloc_inst: Air.Ins
|
||||
}
|
||||
}
|
||||
|
||||
/// Traverse an arbitrary number of bitcasted pointers and return the underyling vector
|
||||
/// pointer. Only if the final element type matches the vector element type, and the
|
||||
/// lengths match.
|
||||
fn obtainBitCastedVectorPtr(sema: *Sema, ptr: Air.Inst.Ref) ?Air.Inst.Ref {
|
||||
const pt = sema.pt;
|
||||
const zcu = pt.zcu;
|
||||
const array_ty = sema.typeOf(ptr).childType(zcu);
|
||||
if (array_ty.zigTypeTag(zcu) != .array) return null;
|
||||
var ptr_ref = ptr;
|
||||
var ptr_inst = ptr_ref.toIndex() orelse return null;
|
||||
const air_datas = sema.air_instructions.items(.data);
|
||||
const air_tags = sema.air_instructions.items(.tag);
|
||||
const vector_ty = while (air_tags[@intFromEnum(ptr_inst)] == .bitcast) {
|
||||
ptr_ref = air_datas[@intFromEnum(ptr_inst)].ty_op.operand;
|
||||
if (!sema.isKnownZigType(ptr_ref, .pointer)) return null;
|
||||
const child_ty = sema.typeOf(ptr_ref).childType(zcu);
|
||||
if (child_ty.zigTypeTag(zcu) == .vector) break child_ty;
|
||||
ptr_inst = ptr_ref.toIndex() orelse return null;
|
||||
} else return null;
|
||||
|
||||
// We have a pointer-to-array and a pointer-to-vector. If the elements and
|
||||
// lengths match, return the result.
|
||||
if (array_ty.childType(zcu).eql(vector_ty.childType(zcu), zcu) and
|
||||
array_ty.arrayLen(zcu) == vector_ty.vectorLen(zcu))
|
||||
{
|
||||
return ptr_ref;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/// Call when you have Value objects rather than Air instructions, and you want to
|
||||
/// assert the store must be done at comptime.
|
||||
fn storePtrVal(
|
||||
@@ -35577,8 +35509,13 @@ fn structFieldInits(
|
||||
const default_val = try sema.resolveConstValue(&block_scope, init_src, coerced, null);
|
||||
|
||||
if (default_val.canMutateComptimeVarState(zcu)) {
|
||||
const field_name = struct_type.fieldName(ip, field_i).unwrap().?;
|
||||
return sema.failWithContainsReferenceToComptimeVar(&block_scope, init_src, field_name, "field default value", default_val);
|
||||
return sema.failWithContainsReferenceToComptimeVar(
|
||||
&block_scope,
|
||||
init_src,
|
||||
struct_type.fieldName(ip, field_i),
|
||||
"field default value",
|
||||
default_val,
|
||||
);
|
||||
}
|
||||
struct_type.field_inits.get(ip)[field_i] = default_val.toIntern();
|
||||
}
|
||||
|
||||
@@ -24,7 +24,6 @@ pub fn loadComptimePtr(sema: *Sema, block: *Block, src: LazySrcLoc, ptr: Value)
|
||||
const child_bits = Type.fromInterned(ptr_info.child).bitSize(zcu);
|
||||
const bit_offset = ptr_info.packed_offset.bit_offset + switch (ptr_info.flags.vector_index) {
|
||||
.none => 0,
|
||||
.runtime => return .runtime_load,
|
||||
else => |idx| switch (pt.zcu.getTarget().cpu.arch.endian()) {
|
||||
.little => child_bits * @intFromEnum(idx),
|
||||
.big => host_bits - child_bits * (@intFromEnum(idx) + 1), // element order reversed on big endian
|
||||
@@ -81,7 +80,6 @@ pub fn storeComptimePtr(
|
||||
};
|
||||
const bit_offset = ptr_info.packed_offset.bit_offset + switch (ptr_info.flags.vector_index) {
|
||||
.none => 0,
|
||||
.runtime => return .runtime_store,
|
||||
else => |idx| switch (zcu.getTarget().cpu.arch.endian()) {
|
||||
.little => Type.fromInterned(ptr_info.child).bitSize(zcu) * @intFromEnum(idx),
|
||||
.big => host_bits - Type.fromInterned(ptr_info.child).bitSize(zcu) * (@intFromEnum(idx) + 1), // element order reversed on big endian
|
||||
|
||||
+4
-6
@@ -198,9 +198,7 @@ pub fn print(ty: Type, writer: *std.Io.Writer, pt: Zcu.PerThread) std.Io.Writer.
|
||||
info.packed_offset.bit_offset, info.packed_offset.host_size,
|
||||
});
|
||||
}
|
||||
if (info.flags.vector_index == .runtime) {
|
||||
try writer.writeAll(":?");
|
||||
} else if (info.flags.vector_index != .none) {
|
||||
if (info.flags.vector_index != .none) {
|
||||
try writer.print(":{d}", .{@intFromEnum(info.flags.vector_index)});
|
||||
}
|
||||
try writer.writeAll(") ");
|
||||
@@ -3113,7 +3111,7 @@ pub fn enumTagFieldIndex(ty: Type, enum_tag: Value, zcu: *const Zcu) ?u32 {
|
||||
pub fn structFieldName(ty: Type, index: usize, zcu: *const Zcu) InternPool.OptionalNullTerminatedString {
|
||||
const ip = &zcu.intern_pool;
|
||||
return switch (ip.indexToKey(ty.toIntern())) {
|
||||
.struct_type => ip.loadStructType(ty.toIntern()).fieldName(ip, index),
|
||||
.struct_type => ip.loadStructType(ty.toIntern()).fieldName(ip, index).toOptional(),
|
||||
.tuple_type => .none,
|
||||
else => unreachable,
|
||||
};
|
||||
@@ -3558,7 +3556,7 @@ pub fn packedStructFieldPtrInfo(
|
||||
} else .{
|
||||
switch (zcu.comp.getZigBackend()) {
|
||||
else => (running_bits + 7) / 8,
|
||||
.stage2_x86_64 => @intCast(struct_ty.abiSize(zcu)),
|
||||
.stage2_x86_64, .stage2_c => @intCast(struct_ty.abiSize(zcu)),
|
||||
},
|
||||
bit_offset,
|
||||
};
|
||||
@@ -3985,7 +3983,7 @@ pub fn elemPtrType(ptr_ty: Type, offset: ?usize, pt: Zcu.PerThread) !Type {
|
||||
break :blk .{
|
||||
.host_size = @intCast(parent_ty.arrayLen(zcu)),
|
||||
.alignment = parent_ty.abiAlignment(zcu),
|
||||
.vector_index = if (offset) |some| @enumFromInt(some) else .runtime,
|
||||
.vector_index = @enumFromInt(offset.?),
|
||||
};
|
||||
} else .{};
|
||||
|
||||
|
||||
+20
-149
@@ -574,166 +574,37 @@ pub fn writeToPackedMemory(
|
||||
}
|
||||
}
|
||||
|
||||
/// Load a Value from the contents of `buffer`.
|
||||
/// Load a Value from the contents of `buffer`, where `ty` is an unsigned integer type.
|
||||
///
|
||||
/// Asserts that buffer.len >= ty.abiSize(). The buffer is allowed to extend past
|
||||
/// the end of the value in memory.
|
||||
pub fn readFromMemory(
|
||||
pub fn readUintFromMemory(
|
||||
ty: Type,
|
||||
pt: Zcu.PerThread,
|
||||
buffer: []const u8,
|
||||
arena: Allocator,
|
||||
) error{
|
||||
IllDefinedMemoryLayout,
|
||||
Unimplemented,
|
||||
OutOfMemory,
|
||||
}!Value {
|
||||
) Allocator.Error!Value {
|
||||
const zcu = pt.zcu;
|
||||
const ip = &zcu.intern_pool;
|
||||
const target = zcu.getTarget();
|
||||
const endian = target.cpu.arch.endian();
|
||||
switch (ty.zigTypeTag(zcu)) {
|
||||
.void => return Value.void,
|
||||
.bool => {
|
||||
if (buffer[0] == 0) {
|
||||
return Value.false;
|
||||
} else {
|
||||
return Value.true;
|
||||
}
|
||||
},
|
||||
.int, .@"enum" => |ty_tag| {
|
||||
const int_ty = switch (ty_tag) {
|
||||
.int => ty,
|
||||
.@"enum" => ty.intTagType(zcu),
|
||||
else => unreachable,
|
||||
};
|
||||
const int_info = int_ty.intInfo(zcu);
|
||||
const bits = int_info.bits;
|
||||
const byte_count: u16 = @intCast((@as(u17, bits) + 7) / 8);
|
||||
if (bits == 0 or buffer.len == 0) return zcu.getCoerced(try zcu.intValue(int_ty, 0), ty);
|
||||
const endian = zcu.getTarget().cpu.arch.endian();
|
||||
|
||||
if (bits <= 64) switch (int_info.signedness) { // Fast path for integers <= u64
|
||||
.signed => {
|
||||
const val = std.mem.readVarInt(i64, buffer[0..byte_count], endian);
|
||||
const result = (val << @as(u6, @intCast(64 - bits))) >> @as(u6, @intCast(64 - bits));
|
||||
return zcu.getCoerced(try zcu.intValue(int_ty, result), ty);
|
||||
},
|
||||
.unsigned => {
|
||||
const val = std.mem.readVarInt(u64, buffer[0..byte_count], endian);
|
||||
const result = (val << @as(u6, @intCast(64 - bits))) >> @as(u6, @intCast(64 - bits));
|
||||
return zcu.getCoerced(try zcu.intValue(int_ty, result), ty);
|
||||
},
|
||||
} else { // Slow path, we have to construct a big-int
|
||||
const Limb = std.math.big.Limb;
|
||||
const limb_count = (byte_count + @sizeOf(Limb) - 1) / @sizeOf(Limb);
|
||||
const limbs_buffer = try arena.alloc(Limb, limb_count);
|
||||
assert(ty.isUnsignedInt(zcu));
|
||||
const bits = ty.intInfo(zcu).bits;
|
||||
const byte_count: u16 = @intCast((@as(u17, bits) + 7) / 8);
|
||||
|
||||
var bigint = BigIntMutable.init(limbs_buffer, 0);
|
||||
bigint.readTwosComplement(buffer[0..byte_count], bits, endian, int_info.signedness);
|
||||
return zcu.getCoerced(try zcu.intValue_big(int_ty, bigint.toConst()), ty);
|
||||
}
|
||||
},
|
||||
.float => return Value.fromInterned(try pt.intern(.{ .float = .{
|
||||
.ty = ty.toIntern(),
|
||||
.storage = switch (ty.floatBits(target)) {
|
||||
16 => .{ .f16 = @bitCast(std.mem.readInt(u16, buffer[0..2], endian)) },
|
||||
32 => .{ .f32 = @bitCast(std.mem.readInt(u32, buffer[0..4], endian)) },
|
||||
64 => .{ .f64 = @bitCast(std.mem.readInt(u64, buffer[0..8], endian)) },
|
||||
80 => .{ .f80 = @bitCast(std.mem.readInt(u80, buffer[0..10], endian)) },
|
||||
128 => .{ .f128 = @bitCast(std.mem.readInt(u128, buffer[0..16], endian)) },
|
||||
else => unreachable,
|
||||
},
|
||||
} })),
|
||||
.array => {
|
||||
const elem_ty = ty.childType(zcu);
|
||||
const elem_size = elem_ty.abiSize(zcu);
|
||||
const elems = try arena.alloc(InternPool.Index, @intCast(ty.arrayLen(zcu)));
|
||||
var offset: usize = 0;
|
||||
for (elems) |*elem| {
|
||||
elem.* = (try readFromMemory(elem_ty, zcu, buffer[offset..], arena)).toIntern();
|
||||
offset += @intCast(elem_size);
|
||||
}
|
||||
return pt.aggregateValue(ty, elems);
|
||||
},
|
||||
.vector => {
|
||||
// We use byte_count instead of abi_size here, so that any padding bytes
|
||||
// follow the data bytes, on both big- and little-endian systems.
|
||||
const byte_count = (@as(usize, @intCast(ty.bitSize(zcu))) + 7) / 8;
|
||||
return readFromPackedMemory(ty, zcu, buffer[0..byte_count], 0, arena);
|
||||
},
|
||||
.@"struct" => {
|
||||
const struct_type = zcu.typeToStruct(ty).?;
|
||||
switch (struct_type.layout) {
|
||||
.auto => unreachable, // Sema is supposed to have emitted a compile error already
|
||||
.@"extern" => {
|
||||
const field_types = struct_type.field_types;
|
||||
const field_vals = try arena.alloc(InternPool.Index, field_types.len);
|
||||
for (field_vals, 0..) |*field_val, i| {
|
||||
const field_ty = Type.fromInterned(field_types.get(ip)[i]);
|
||||
const off: usize = @intCast(ty.structFieldOffset(i, zcu));
|
||||
const sz: usize = @intCast(field_ty.abiSize(zcu));
|
||||
field_val.* = (try readFromMemory(field_ty, zcu, buffer[off..(off + sz)], arena)).toIntern();
|
||||
}
|
||||
return pt.aggregateValue(ty, field_vals);
|
||||
},
|
||||
.@"packed" => {
|
||||
const byte_count = (@as(usize, @intCast(ty.bitSize(zcu))) + 7) / 8;
|
||||
return readFromPackedMemory(ty, zcu, buffer[0..byte_count], 0, arena);
|
||||
},
|
||||
}
|
||||
},
|
||||
.error_set => {
|
||||
const bits = zcu.errorSetBits();
|
||||
const byte_count: u16 = @intCast((@as(u17, bits) + 7) / 8);
|
||||
const int = std.mem.readVarInt(u64, buffer[0..byte_count], endian);
|
||||
const index = (int << @as(u6, @intCast(64 - bits))) >> @as(u6, @intCast(64 - bits));
|
||||
const name = zcu.global_error_set.keys()[@intCast(index)];
|
||||
assert(buffer.len >= byte_count);
|
||||
|
||||
return Value.fromInterned(try pt.intern(.{ .err = .{
|
||||
.ty = ty.toIntern(),
|
||||
.name = name,
|
||||
} }));
|
||||
},
|
||||
.@"union" => switch (ty.containerLayout(zcu)) {
|
||||
.auto => return error.IllDefinedMemoryLayout,
|
||||
.@"extern" => {
|
||||
const union_size = ty.abiSize(zcu);
|
||||
const array_ty = try zcu.arrayType(.{ .len = union_size, .child = .u8_type });
|
||||
const val = (try readFromMemory(array_ty, zcu, buffer, arena)).toIntern();
|
||||
return Value.fromInterned(try pt.internUnion(.{
|
||||
.ty = ty.toIntern(),
|
||||
.tag = .none,
|
||||
.val = val,
|
||||
}));
|
||||
},
|
||||
.@"packed" => {
|
||||
const byte_count = (@as(usize, @intCast(ty.bitSize(zcu))) + 7) / 8;
|
||||
return readFromPackedMemory(ty, zcu, buffer[0..byte_count], 0, arena);
|
||||
},
|
||||
},
|
||||
.pointer => {
|
||||
assert(!ty.isSlice(zcu)); // No well defined layout.
|
||||
const int_val = try readFromMemory(Type.usize, zcu, buffer, arena);
|
||||
return Value.fromInterned(try pt.intern(.{ .ptr = .{
|
||||
.ty = ty.toIntern(),
|
||||
.base_addr = .int,
|
||||
.byte_offset = int_val.toUnsignedInt(zcu),
|
||||
} }));
|
||||
},
|
||||
.optional => {
|
||||
assert(ty.isPtrLikeOptional(zcu));
|
||||
const child_ty = ty.optionalChild(zcu);
|
||||
const child_val = try readFromMemory(child_ty, zcu, buffer, arena);
|
||||
return Value.fromInterned(try pt.intern(.{ .opt = .{
|
||||
.ty = ty.toIntern(),
|
||||
.val = switch (child_val.orderAgainstZero(pt)) {
|
||||
.lt => unreachable,
|
||||
.eq => .none,
|
||||
.gt => child_val.toIntern(),
|
||||
},
|
||||
} }));
|
||||
},
|
||||
else => return error.Unimplemented,
|
||||
if (bits <= 64) {
|
||||
const val = std.mem.readVarInt(u64, buffer[0..byte_count], endian);
|
||||
const result = (val << @as(u6, @intCast(64 - bits))) >> @as(u6, @intCast(64 - bits));
|
||||
return pt.intValue(ty, result);
|
||||
} else {
|
||||
const Limb = std.math.big.Limb;
|
||||
const limb_count = (byte_count + @sizeOf(Limb) - 1) / @sizeOf(Limb);
|
||||
const limbs_buffer = try arena.alloc(Limb, limb_count);
|
||||
|
||||
var bigint: BigIntMutable = .init(limbs_buffer, 0);
|
||||
bigint.readTwosComplement(buffer[0..byte_count], bits, endian, .unsigned);
|
||||
return pt.intValue_big(ty, bigint.toConst());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
+22
-4
@@ -3512,7 +3512,6 @@ pub fn ptrType(pt: Zcu.PerThread, info: InternPool.Key.PtrType) Allocator.Error!
|
||||
canon_info.packed_offset.host_size = 0;
|
||||
}
|
||||
},
|
||||
.runtime => {},
|
||||
_ => assert(@intFromEnum(info.flags.vector_index) < info.packed_offset.host_size),
|
||||
}
|
||||
|
||||
@@ -3663,21 +3662,40 @@ pub fn intRef(pt: Zcu.PerThread, ty: Type, x: anytype) Allocator.Error!Air.Inst.
|
||||
}
|
||||
|
||||
pub fn intValue_big(pt: Zcu.PerThread, ty: Type, x: BigIntConst) Allocator.Error!Value {
|
||||
return Value.fromInterned(try pt.intern(.{ .int = .{
|
||||
if (ty.toIntern() != .comptime_int_type) {
|
||||
const int_info = ty.intInfo(pt.zcu);
|
||||
assert(x.fitsInTwosComp(int_info.signedness, int_info.bits));
|
||||
}
|
||||
return .fromInterned(try pt.intern(.{ .int = .{
|
||||
.ty = ty.toIntern(),
|
||||
.storage = .{ .big_int = x },
|
||||
} }));
|
||||
}
|
||||
|
||||
pub fn intValue_u64(pt: Zcu.PerThread, ty: Type, x: u64) Allocator.Error!Value {
|
||||
return Value.fromInterned(try pt.intern(.{ .int = .{
|
||||
if (ty.toIntern() != .comptime_int_type and x != 0) {
|
||||
const int_info = ty.intInfo(pt.zcu);
|
||||
const unsigned_bits = int_info.bits - @intFromBool(int_info.signedness == .signed);
|
||||
assert(unsigned_bits >= std.math.log2(x) + 1);
|
||||
}
|
||||
return .fromInterned(try pt.intern(.{ .int = .{
|
||||
.ty = ty.toIntern(),
|
||||
.storage = .{ .u64 = x },
|
||||
} }));
|
||||
}
|
||||
|
||||
pub fn intValue_i64(pt: Zcu.PerThread, ty: Type, x: i64) Allocator.Error!Value {
|
||||
return Value.fromInterned(try pt.intern(.{ .int = .{
|
||||
if (ty.toIntern() != .comptime_int_type and x != 0) {
|
||||
const int_info = ty.intInfo(pt.zcu);
|
||||
const unsigned_bits = int_info.bits - @intFromBool(int_info.signedness == .signed);
|
||||
if (x > 0) {
|
||||
assert(unsigned_bits >= std.math.log2(x) + 1);
|
||||
} else {
|
||||
assert(int_info.signedness == .signed);
|
||||
assert(unsigned_bits >= std.math.log2_int_ceil(u64, @abs(x)));
|
||||
}
|
||||
}
|
||||
return .fromInterned(try pt.intern(.{ .int = .{
|
||||
.ty = ty.toIntern(),
|
||||
.storage = .{ .i64 = x },
|
||||
} }));
|
||||
|
||||
@@ -134,6 +134,10 @@ pub fn analyze(isel: *Select, air_body: []const Air.Inst.Index) !void {
|
||||
var air_inst_index = air_body[air_body_index];
|
||||
const initial_def_order_len = isel.def_order.count();
|
||||
air_tag: switch (air_tags[@intFromEnum(air_inst_index)]) {
|
||||
// No "scalarize" legalizations are enabled, so these instructions never appear.
|
||||
.legalize_vec_elem_val => unreachable,
|
||||
.legalize_vec_store_elem => unreachable,
|
||||
|
||||
.arg,
|
||||
.ret_addr,
|
||||
.frame_addr,
|
||||
@@ -826,18 +830,6 @@ pub fn analyze(isel: *Select, air_body: []const Air.Inst.Index) !void {
|
||||
|
||||
try isel.analyzeUse(un_op);
|
||||
|
||||
air_body_index += 1;
|
||||
air_inst_index = air_body[air_body_index];
|
||||
continue :air_tag air_tags[@intFromEnum(air_inst_index)];
|
||||
},
|
||||
.vector_store_elem => {
|
||||
const vector_store_elem = air_data[@intFromEnum(air_inst_index)].vector_store_elem;
|
||||
const bin_op = isel.air.extraData(Air.Bin, vector_store_elem.payload).data;
|
||||
|
||||
try isel.analyzeUse(vector_store_elem.vector_ptr);
|
||||
try isel.analyzeUse(bin_op.lhs);
|
||||
try isel.analyzeUse(bin_op.rhs);
|
||||
|
||||
air_body_index += 1;
|
||||
air_inst_index = air_body[air_body_index];
|
||||
continue :air_tag air_tags[@intFromEnum(air_inst_index)];
|
||||
@@ -962,6 +954,11 @@ pub fn body(isel: *Select, air_body: []const Air.Inst.Index) error{ OutOfMemory,
|
||||
};
|
||||
air_tag: switch (air.next().?) {
|
||||
else => |air_tag| return isel.fail("unimplemented {t}", .{air_tag}),
|
||||
|
||||
// No "scalarize" legalizations are enabled, so these instructions never appear.
|
||||
.legalize_vec_elem_val => unreachable,
|
||||
.legalize_vec_store_elem => unreachable,
|
||||
|
||||
.arg => {
|
||||
const arg_vi = isel.live_values.fetchRemove(air.inst_index).?.value;
|
||||
defer arg_vi.deref(isel);
|
||||
|
||||
+135
-450
@@ -37,6 +37,7 @@ pub fn legalizeFeatures(_: *const std.Target) ?*const Air.Legalize.Features {
|
||||
.expand_packed_load = true,
|
||||
.expand_packed_store = true,
|
||||
.expand_packed_struct_field_val = true,
|
||||
.expand_packed_aggregate_init = true,
|
||||
}),
|
||||
};
|
||||
}
|
||||
@@ -1392,114 +1393,21 @@ pub const DeclGen = struct {
|
||||
try w.writeByte('}');
|
||||
},
|
||||
.@"packed" => {
|
||||
const int_info = ty.intInfo(zcu);
|
||||
|
||||
const bits = Type.smallestUnsignedBits(int_info.bits - 1);
|
||||
const bit_offset_ty = try pt.intType(.unsigned, bits);
|
||||
|
||||
var bit_offset: u64 = 0;
|
||||
var eff_num_fields: usize = 0;
|
||||
|
||||
for (0..loaded_struct.field_types.len) |field_index| {
|
||||
const field_ty: Type = .fromInterned(loaded_struct.field_types.get(ip)[field_index]);
|
||||
if (!field_ty.hasRuntimeBitsIgnoreComptime(zcu)) continue;
|
||||
eff_num_fields += 1;
|
||||
}
|
||||
|
||||
if (eff_num_fields == 0) {
|
||||
try w.writeByte('(');
|
||||
try dg.renderUndefValue(w, ty, location);
|
||||
try w.writeByte(')');
|
||||
} else if (ty.bitSize(zcu) > 64) {
|
||||
// zig_or_u128(zig_or_u128(zig_shl_u128(a, a_off), zig_shl_u128(b, b_off)), zig_shl_u128(c, c_off))
|
||||
var num_or = eff_num_fields - 1;
|
||||
while (num_or > 0) : (num_or -= 1) {
|
||||
try w.writeAll("zig_or_");
|
||||
try dg.renderTypeForBuiltinFnName(w, ty);
|
||||
try w.writeByte('(');
|
||||
}
|
||||
|
||||
var eff_index: usize = 0;
|
||||
var needs_closing_paren = false;
|
||||
for (0..loaded_struct.field_types.len) |field_index| {
|
||||
const field_ty: Type = .fromInterned(loaded_struct.field_types.get(ip)[field_index]);
|
||||
if (!field_ty.hasRuntimeBitsIgnoreComptime(zcu)) continue;
|
||||
|
||||
const field_val = switch (ip.indexToKey(val.toIntern()).aggregate.storage) {
|
||||
.bytes => |bytes| try pt.intern(.{ .int = .{
|
||||
.ty = field_ty.toIntern(),
|
||||
.storage = .{ .u64 = bytes.at(field_index, ip) },
|
||||
} }),
|
||||
.elems => |elems| elems[field_index],
|
||||
.repeated_elem => |elem| elem,
|
||||
};
|
||||
const cast_context = IntCastContext{ .value = .{ .value = Value.fromInterned(field_val) } };
|
||||
if (bit_offset != 0) {
|
||||
try w.writeAll("zig_shl_");
|
||||
try dg.renderTypeForBuiltinFnName(w, ty);
|
||||
try w.writeByte('(');
|
||||
try dg.renderIntCast(w, ty, cast_context, field_ty, .FunctionArgument);
|
||||
try w.writeAll(", ");
|
||||
try dg.renderValue(w, try pt.intValue(bit_offset_ty, bit_offset), .FunctionArgument);
|
||||
try w.writeByte(')');
|
||||
} else {
|
||||
try dg.renderIntCast(w, ty, cast_context, field_ty, .FunctionArgument);
|
||||
}
|
||||
|
||||
if (needs_closing_paren) try w.writeByte(')');
|
||||
if (eff_index != eff_num_fields - 1) try w.writeAll(", ");
|
||||
|
||||
bit_offset += field_ty.bitSize(zcu);
|
||||
needs_closing_paren = true;
|
||||
eff_index += 1;
|
||||
}
|
||||
} else {
|
||||
try w.writeByte('(');
|
||||
// a << a_off | b << b_off | c << c_off
|
||||
var empty = true;
|
||||
for (0..loaded_struct.field_types.len) |field_index| {
|
||||
const field_ty: Type = .fromInterned(loaded_struct.field_types.get(ip)[field_index]);
|
||||
if (!field_ty.hasRuntimeBitsIgnoreComptime(zcu)) continue;
|
||||
|
||||
if (!empty) try w.writeAll(" | ");
|
||||
try w.writeByte('(');
|
||||
try dg.renderCType(w, ctype);
|
||||
try w.writeByte(')');
|
||||
|
||||
const field_val = switch (ip.indexToKey(val.toIntern()).aggregate.storage) {
|
||||
.bytes => |bytes| try pt.intern(.{ .int = .{
|
||||
.ty = field_ty.toIntern(),
|
||||
.storage = .{ .u64 = bytes.at(field_index, ip) },
|
||||
} }),
|
||||
.elems => |elems| elems[field_index],
|
||||
.repeated_elem => |elem| elem,
|
||||
};
|
||||
|
||||
const field_int_info: std.builtin.Type.Int = if (field_ty.isAbiInt(zcu))
|
||||
field_ty.intInfo(zcu)
|
||||
else
|
||||
.{ .signedness = .unsigned, .bits = undefined };
|
||||
switch (field_int_info.signedness) {
|
||||
.signed => {
|
||||
try w.writeByte('(');
|
||||
try dg.renderValue(w, Value.fromInterned(field_val), .Other);
|
||||
try w.writeAll(" & ");
|
||||
const field_uint_ty = try pt.intType(.unsigned, field_int_info.bits);
|
||||
try dg.renderValue(w, try field_uint_ty.maxIntScalar(pt, field_uint_ty), .Other);
|
||||
try w.writeByte(')');
|
||||
},
|
||||
.unsigned => try dg.renderValue(w, Value.fromInterned(field_val), .Other),
|
||||
}
|
||||
if (bit_offset != 0) {
|
||||
try w.writeAll(" << ");
|
||||
try dg.renderValue(w, try pt.intValue(bit_offset_ty, bit_offset), .FunctionArgument);
|
||||
}
|
||||
|
||||
bit_offset += field_ty.bitSize(zcu);
|
||||
empty = false;
|
||||
}
|
||||
try w.writeByte(')');
|
||||
}
|
||||
// https://github.com/ziglang/zig/issues/24657 will eliminate most of the
|
||||
// following logic, leaving only the recursive `renderValue` call. Once
|
||||
// that proposal is implemented, a `packed struct` will literally be
|
||||
// represented in the InternPool by its comptime-known backing integer.
|
||||
var arena: std.heap.ArenaAllocator = .init(zcu.gpa);
|
||||
defer arena.deinit();
|
||||
const backing_ty: Type = .fromInterned(loaded_struct.backingIntTypeUnordered(ip));
|
||||
const buf = try arena.allocator().alloc(u8, @intCast(ty.abiSize(zcu)));
|
||||
val.writeToMemory(pt, buf) catch |err| switch (err) {
|
||||
error.IllDefinedMemoryLayout => unreachable,
|
||||
error.OutOfMemory => |e| return e,
|
||||
error.ReinterpretDeclRef, error.Unimplemented => return dg.fail("TODO: C backend: lower packed struct value", .{}),
|
||||
};
|
||||
const backing_val: Value = try .readUintFromMemory(backing_ty, pt, buf, arena.allocator());
|
||||
return dg.renderValue(w, backing_val, location);
|
||||
},
|
||||
}
|
||||
},
|
||||
@@ -1507,33 +1415,38 @@ pub const DeclGen = struct {
|
||||
},
|
||||
.un => |un| {
|
||||
const loaded_union = ip.loadUnionType(ty.toIntern());
|
||||
if (loaded_union.flagsUnordered(ip).layout == .@"packed") {
|
||||
// https://github.com/ziglang/zig/issues/24657 will eliminate most of the
|
||||
// following logic, leaving only the recursive `renderValue` call. Once
|
||||
// that proposal is implemented, a `packed union` will literally be
|
||||
// represented in the InternPool by its comptime-known backing integer.
|
||||
var arena: std.heap.ArenaAllocator = .init(zcu.gpa);
|
||||
defer arena.deinit();
|
||||
const backing_ty = try ty.unionBackingType(pt);
|
||||
const buf = try arena.allocator().alloc(u8, @intCast(ty.abiSize(zcu)));
|
||||
val.writeToMemory(pt, buf) catch |err| switch (err) {
|
||||
error.IllDefinedMemoryLayout => unreachable,
|
||||
error.OutOfMemory => |e| return e,
|
||||
error.ReinterpretDeclRef, error.Unimplemented => return dg.fail("TODO: C backend: lower packed union value", .{}),
|
||||
};
|
||||
const backing_val: Value = try .readUintFromMemory(backing_ty, pt, buf, arena.allocator());
|
||||
return dg.renderValue(w, backing_val, location);
|
||||
}
|
||||
if (un.tag == .none) {
|
||||
const backing_ty = try ty.unionBackingType(pt);
|
||||
switch (loaded_union.flagsUnordered(ip).layout) {
|
||||
.@"packed" => {
|
||||
if (!location.isInitializer()) {
|
||||
try w.writeByte('(');
|
||||
try dg.renderType(w, backing_ty);
|
||||
try w.writeByte(')');
|
||||
}
|
||||
try dg.renderValue(w, Value.fromInterned(un.val), location);
|
||||
},
|
||||
.@"extern" => {
|
||||
if (location == .StaticInitializer) {
|
||||
return dg.fail("TODO: C backend: implement extern union backing type rendering in static initializers", .{});
|
||||
}
|
||||
|
||||
const ptr_ty = try pt.singleConstPtrType(ty);
|
||||
try w.writeAll("*((");
|
||||
try dg.renderType(w, ptr_ty);
|
||||
try w.writeAll(")(");
|
||||
try dg.renderType(w, backing_ty);
|
||||
try w.writeAll("){");
|
||||
try dg.renderValue(w, Value.fromInterned(un.val), location);
|
||||
try w.writeAll("})");
|
||||
},
|
||||
else => unreachable,
|
||||
assert(loaded_union.flagsUnordered(ip).layout == .@"extern");
|
||||
if (location == .StaticInitializer) {
|
||||
return dg.fail("TODO: C backend: implement extern union backing type rendering in static initializers", .{});
|
||||
}
|
||||
|
||||
const ptr_ty = try pt.singleConstPtrType(ty);
|
||||
try w.writeAll("*((");
|
||||
try dg.renderType(w, ptr_ty);
|
||||
try w.writeAll(")(");
|
||||
try dg.renderType(w, backing_ty);
|
||||
try w.writeAll("){");
|
||||
try dg.renderValue(w, Value.fromInterned(un.val), location);
|
||||
try w.writeAll("})");
|
||||
} else {
|
||||
if (!location.isInitializer()) {
|
||||
try w.writeByte('(');
|
||||
@@ -1544,21 +1457,6 @@ pub const DeclGen = struct {
|
||||
const field_index = zcu.unionTagFieldIndex(loaded_union, Value.fromInterned(un.tag)).?;
|
||||
const field_ty: Type = .fromInterned(loaded_union.field_types.get(ip)[field_index]);
|
||||
const field_name = loaded_union.loadTagType(ip).names.get(ip)[field_index];
|
||||
if (loaded_union.flagsUnordered(ip).layout == .@"packed") {
|
||||
if (field_ty.hasRuntimeBits(zcu)) {
|
||||
if (field_ty.isPtrAtRuntime(zcu)) {
|
||||
try w.writeByte('(');
|
||||
try dg.renderCType(w, ctype);
|
||||
try w.writeByte(')');
|
||||
} else if (field_ty.zigTypeTag(zcu) == .float) {
|
||||
try w.writeByte('(');
|
||||
try dg.renderCType(w, ctype);
|
||||
try w.writeByte(')');
|
||||
}
|
||||
try dg.renderValue(w, Value.fromInterned(un.val), location);
|
||||
} else try w.writeByte('0');
|
||||
return;
|
||||
}
|
||||
|
||||
const has_tag = loaded_union.hasTag(ip);
|
||||
if (has_tag) try w.writeByte('{');
|
||||
@@ -1745,9 +1643,11 @@ pub const DeclGen = struct {
|
||||
}
|
||||
return w.writeByte('}');
|
||||
},
|
||||
.@"packed" => return w.print("{f}", .{
|
||||
try dg.fmtIntLiteralHex(try pt.undefValue(ty), .Other),
|
||||
}),
|
||||
.@"packed" => return dg.renderUndefValue(
|
||||
w,
|
||||
.fromInterned(loaded_struct.backingIntTypeUnordered(ip)),
|
||||
location,
|
||||
),
|
||||
}
|
||||
},
|
||||
.tuple_type => |tuple_info| {
|
||||
@@ -1815,9 +1715,11 @@ pub const DeclGen = struct {
|
||||
}
|
||||
if (has_tag) try w.writeByte('}');
|
||||
},
|
||||
.@"packed" => return w.print("{f}", .{
|
||||
try dg.fmtIntLiteralHex(try pt.undefValue(ty), .Other),
|
||||
}),
|
||||
.@"packed" => return dg.renderUndefValue(
|
||||
w,
|
||||
try ty.unionBackingType(pt),
|
||||
location,
|
||||
),
|
||||
}
|
||||
},
|
||||
.error_union_type => |error_union_type| switch (ctype.info(ctype_pool)) {
|
||||
@@ -2445,10 +2347,7 @@ pub const DeclGen = struct {
|
||||
const ty = val.typeOf(zcu);
|
||||
return .{ .data = .{
|
||||
.dg = dg,
|
||||
.int_info = if (ty.zigTypeTag(zcu) == .@"union" and ty.containerLayout(zcu) == .@"packed")
|
||||
.{ .signedness = .unsigned, .bits = @intCast(ty.bitSize(zcu)) }
|
||||
else
|
||||
ty.intInfo(zcu),
|
||||
.int_info = ty.intInfo(zcu),
|
||||
.kind = kind,
|
||||
.ctype = try dg.ctypeFromType(ty, kind),
|
||||
.val = val,
|
||||
@@ -3426,6 +3325,10 @@ fn genBodyInner(f: *Function, body: []const Air.Inst.Index) Error!void {
|
||||
// zig fmt: off
|
||||
.inferred_alloc, .inferred_alloc_comptime => unreachable,
|
||||
|
||||
// No "scalarize" legalizations are enabled, so these instructions never appear.
|
||||
.legalize_vec_elem_val => unreachable,
|
||||
.legalize_vec_store_elem => unreachable,
|
||||
|
||||
.arg => try airArg(f, inst),
|
||||
|
||||
.breakpoint => try airBreakpoint(f),
|
||||
@@ -3656,7 +3559,6 @@ fn genBodyInner(f: *Function, body: []const Air.Inst.Index) Error!void {
|
||||
|
||||
.is_named_enum_value => return f.fail("TODO: C backend: implement is_named_enum_value", .{}),
|
||||
.error_set_has_value => return f.fail("TODO: C backend: implement error_set_has_value", .{}),
|
||||
.vector_store_elem => return f.fail("TODO: C backend: implement vector_store_elem", .{}),
|
||||
|
||||
.runtime_nav_ptr => try airRuntimeNavPtr(f, inst),
|
||||
|
||||
@@ -3899,6 +3801,24 @@ fn airAlloc(f: *Function, inst: Air.Inst.Index) !CValue {
|
||||
});
|
||||
log.debug("%{d}: allocated unfreeable t{d}", .{ inst, local.new_local });
|
||||
try f.allocs.put(zcu.gpa, local.new_local, true);
|
||||
|
||||
switch (elem_ty.zigTypeTag(zcu)) {
|
||||
.@"struct", .@"union" => switch (elem_ty.containerLayout(zcu)) {
|
||||
.@"packed" => {
|
||||
// For packed aggregates, we zero-initialize to try and work around a design flaw
|
||||
// related to how `packed`, `undefined`, and RLS interact. See comment in `airStore`
|
||||
// for details.
|
||||
const w = &f.object.code.writer;
|
||||
try w.print("memset(&t{d}, 0x00, sizeof(", .{local.new_local});
|
||||
try f.renderType(w, elem_ty);
|
||||
try w.writeAll("));");
|
||||
try f.object.newline();
|
||||
},
|
||||
.auto, .@"extern" => {},
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
|
||||
return .{ .local_ref = local.new_local };
|
||||
}
|
||||
|
||||
@@ -3918,6 +3838,24 @@ fn airRetPtr(f: *Function, inst: Air.Inst.Index) !CValue {
|
||||
});
|
||||
log.debug("%{d}: allocated unfreeable t{d}", .{ inst, local.new_local });
|
||||
try f.allocs.put(zcu.gpa, local.new_local, true);
|
||||
|
||||
switch (elem_ty.zigTypeTag(zcu)) {
|
||||
.@"struct", .@"union" => switch (elem_ty.containerLayout(zcu)) {
|
||||
.@"packed" => {
|
||||
// For packed aggregates, we zero-initialize to try and work around a design flaw
|
||||
// related to how `packed`, `undefined`, and RLS interact. See comment in `airStore`
|
||||
// for details.
|
||||
const w = &f.object.code.writer;
|
||||
try w.print("memset(&t{d}, 0x00, sizeof(", .{local.new_local});
|
||||
try f.renderType(w, elem_ty);
|
||||
try w.writeAll("));");
|
||||
try f.object.newline();
|
||||
},
|
||||
.auto, .@"extern" => {},
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
|
||||
return .{ .local_ref = local.new_local };
|
||||
}
|
||||
|
||||
@@ -3956,6 +3894,10 @@ fn airLoad(f: *Function, inst: Air.Inst.Index) !CValue {
|
||||
const ptr_info = ptr_scalar_ty.ptrInfo(zcu);
|
||||
const src_ty: Type = .fromInterned(ptr_info.child);
|
||||
|
||||
// `Air.Legalize.Feature.expand_packed_load` should ensure that the only
|
||||
// bit-pointers we see here are vector element pointers.
|
||||
assert(ptr_info.packed_offset.host_size == 0 or ptr_info.flags.vector_index != .none);
|
||||
|
||||
if (!src_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
|
||||
try reap(f, inst, &.{ty_op.operand});
|
||||
return .none;
|
||||
@@ -3987,40 +3929,6 @@ fn airLoad(f: *Function, inst: Air.Inst.Index) !CValue {
|
||||
try w.writeAll(", sizeof(");
|
||||
try f.renderType(w, src_ty);
|
||||
try w.writeAll("))");
|
||||
} else if (ptr_info.packed_offset.host_size > 0 and ptr_info.flags.vector_index == .none) {
|
||||
const host_bits: u16 = ptr_info.packed_offset.host_size * 8;
|
||||
const host_ty = try pt.intType(.unsigned, host_bits);
|
||||
|
||||
const bit_offset_ty = try pt.intType(.unsigned, Type.smallestUnsignedBits(host_bits - 1));
|
||||
const bit_offset_val = try pt.intValue(bit_offset_ty, ptr_info.packed_offset.bit_offset);
|
||||
|
||||
const field_ty = try pt.intType(.unsigned, @as(u16, @intCast(src_ty.bitSize(zcu))));
|
||||
|
||||
try f.writeCValue(w, local, .Other);
|
||||
try v.elem(f, w);
|
||||
try w.writeAll(" = (");
|
||||
try f.renderType(w, src_ty);
|
||||
try w.writeAll(")zig_wrap_");
|
||||
try f.object.dg.renderTypeForBuiltinFnName(w, field_ty);
|
||||
try w.writeAll("((");
|
||||
try f.renderType(w, field_ty);
|
||||
try w.writeByte(')');
|
||||
const cant_cast = host_ty.isInt(zcu) and host_ty.bitSize(zcu) > 64;
|
||||
if (cant_cast) {
|
||||
if (field_ty.bitSize(zcu) > 64) return f.fail("TODO: C backend: implement casting between types > 64 bits", .{});
|
||||
try w.writeAll("zig_lo_");
|
||||
try f.object.dg.renderTypeForBuiltinFnName(w, host_ty);
|
||||
try w.writeByte('(');
|
||||
}
|
||||
try w.writeAll("zig_shr_");
|
||||
try f.object.dg.renderTypeForBuiltinFnName(w, host_ty);
|
||||
try w.writeByte('(');
|
||||
try f.writeCValueDeref(w, operand);
|
||||
try v.elem(f, w);
|
||||
try w.print(", {f})", .{try f.fmtIntLiteralDec(bit_offset_val)});
|
||||
if (cant_cast) try w.writeByte(')');
|
||||
try f.object.dg.renderBuiltinInfo(w, field_ty, .bits);
|
||||
try w.writeByte(')');
|
||||
} else {
|
||||
try f.writeCValue(w, local, .Other);
|
||||
try v.elem(f, w);
|
||||
@@ -4213,6 +4121,10 @@ fn airStore(f: *Function, inst: Air.Inst.Index, safety: bool) !CValue {
|
||||
const ptr_scalar_ty = ptr_ty.scalarType(zcu);
|
||||
const ptr_info = ptr_scalar_ty.ptrInfo(zcu);
|
||||
|
||||
// `Air.Legalize.Feature.expand_packed_store` should ensure that the only
|
||||
// bit-pointers we see here are vector element pointers.
|
||||
assert(ptr_info.packed_offset.host_size == 0 or ptr_info.flags.vector_index != .none);
|
||||
|
||||
const ptr_val = try f.resolveInst(bin_op.lhs);
|
||||
const src_ty = f.typeOf(bin_op.rhs);
|
||||
|
||||
@@ -4222,9 +4134,24 @@ fn airStore(f: *Function, inst: Air.Inst.Index, safety: bool) !CValue {
|
||||
if (val_is_undef) {
|
||||
try reap(f, inst, &.{ bin_op.lhs, bin_op.rhs });
|
||||
if (safety and ptr_info.packed_offset.host_size == 0) {
|
||||
// If the thing we're initializing is a packed struct/union, we set to 0 instead of
|
||||
// 0xAA. This is a hack to work around a problem with partially-undefined packed
|
||||
// aggregates. If we used 0xAA here, then a later initialization through RLS would
|
||||
// not zero the high padding bits (for a packed type which is not 8/16/32/64/etc bits),
|
||||
// so we would get a miscompilation. Using 0x00 here avoids this bug in some cases. It
|
||||
// is *not* a correct fix; for instance it misses any case where packed structs are
|
||||
// nested in other aggregates. A proper fix for this will involve changing the language,
|
||||
// such as to remove RLS. This just prevents miscompilations in *some* common cases.
|
||||
const byte_str: []const u8 = switch (src_ty.zigTypeTag(zcu)) {
|
||||
else => "0xaa",
|
||||
.@"struct", .@"union" => switch (src_ty.containerLayout(zcu)) {
|
||||
.auto, .@"extern" => "0xaa",
|
||||
.@"packed" => "0x00",
|
||||
},
|
||||
};
|
||||
try w.writeAll("memset(");
|
||||
try f.writeCValue(w, ptr_val, .FunctionArgument);
|
||||
try w.writeAll(", 0xaa, sizeof(");
|
||||
try w.print(", {s}, sizeof(", .{byte_str});
|
||||
try f.renderType(w, .fromInterned(ptr_info.child));
|
||||
try w.writeAll("));");
|
||||
try f.object.newline();
|
||||
@@ -4277,66 +4204,6 @@ fn airStore(f: *Function, inst: Air.Inst.Index, safety: bool) !CValue {
|
||||
try w.writeByte(';');
|
||||
try f.object.newline();
|
||||
try v.end(f, inst, w);
|
||||
} else if (ptr_info.packed_offset.host_size > 0 and ptr_info.flags.vector_index == .none) {
|
||||
const host_bits = ptr_info.packed_offset.host_size * 8;
|
||||
const host_ty = try pt.intType(.unsigned, host_bits);
|
||||
|
||||
const bit_offset_ty = try pt.intType(.unsigned, Type.smallestUnsignedBits(host_bits - 1));
|
||||
const bit_offset_val = try pt.intValue(bit_offset_ty, ptr_info.packed_offset.bit_offset);
|
||||
|
||||
const src_bits = src_ty.bitSize(zcu);
|
||||
|
||||
const ExpectedContents = [BigInt.Managed.default_capacity]BigIntLimb;
|
||||
var stack align(@alignOf(ExpectedContents)) =
|
||||
std.heap.stackFallback(@sizeOf(ExpectedContents), f.object.dg.gpa);
|
||||
|
||||
var mask = try BigInt.Managed.initCapacity(stack.get(), BigInt.calcTwosCompLimbCount(host_bits));
|
||||
defer mask.deinit();
|
||||
|
||||
try mask.setTwosCompIntLimit(.max, .unsigned, @intCast(src_bits));
|
||||
try mask.shiftLeft(&mask, ptr_info.packed_offset.bit_offset);
|
||||
try mask.bitNotWrap(&mask, .unsigned, host_bits);
|
||||
|
||||
const mask_val = try pt.intValue_big(host_ty, mask.toConst());
|
||||
|
||||
const v = try Vectorize.start(f, inst, w, ptr_ty);
|
||||
const a = try Assignment.start(f, w, src_scalar_ctype);
|
||||
try f.writeCValueDeref(w, ptr_val);
|
||||
try v.elem(f, w);
|
||||
try a.assign(f, w);
|
||||
try w.writeAll("zig_or_");
|
||||
try f.object.dg.renderTypeForBuiltinFnName(w, host_ty);
|
||||
try w.writeAll("(zig_and_");
|
||||
try f.object.dg.renderTypeForBuiltinFnName(w, host_ty);
|
||||
try w.writeByte('(');
|
||||
try f.writeCValueDeref(w, ptr_val);
|
||||
try v.elem(f, w);
|
||||
try w.print(", {f}), zig_shl_", .{try f.fmtIntLiteralHex(mask_val)});
|
||||
try f.object.dg.renderTypeForBuiltinFnName(w, host_ty);
|
||||
try w.writeByte('(');
|
||||
const cant_cast = host_ty.isInt(zcu) and host_ty.bitSize(zcu) > 64;
|
||||
if (cant_cast) {
|
||||
if (src_ty.bitSize(zcu) > 64) return f.fail("TODO: C backend: implement casting between types > 64 bits", .{});
|
||||
try w.writeAll("zig_make_");
|
||||
try f.object.dg.renderTypeForBuiltinFnName(w, host_ty);
|
||||
try w.writeAll("(0, ");
|
||||
} else {
|
||||
try w.writeByte('(');
|
||||
try f.renderType(w, host_ty);
|
||||
try w.writeByte(')');
|
||||
}
|
||||
|
||||
if (src_ty.isPtrAtRuntime(zcu)) {
|
||||
try w.writeByte('(');
|
||||
try f.renderType(w, .usize);
|
||||
try w.writeByte(')');
|
||||
}
|
||||
try f.writeCValue(w, src_val, .Other);
|
||||
try v.elem(f, w);
|
||||
if (cant_cast) try w.writeByte(')');
|
||||
try w.print(", {f}))", .{try f.fmtIntLiteralDec(bit_offset_val)});
|
||||
try a.end(f, w);
|
||||
try v.end(f, inst, w);
|
||||
} else {
|
||||
switch (ptr_val) {
|
||||
.local_ref => |ptr_local_index| switch (src_val) {
|
||||
@@ -6015,10 +5882,7 @@ fn fieldLocation(
|
||||
else if (!field_ptr_ty.childType(zcu).hasRuntimeBitsIgnoreComptime(zcu))
|
||||
.{ .byte_offset = loaded_struct.offsets.get(ip)[field_index] }
|
||||
else
|
||||
.{ .field = if (loaded_struct.fieldName(ip, field_index).unwrap()) |field_name|
|
||||
.{ .identifier = field_name.toSlice(ip) }
|
||||
else
|
||||
.{ .field = field_index } },
|
||||
.{ .field = .{ .identifier = loaded_struct.fieldName(ip, field_index).toSlice(ip) } },
|
||||
.@"packed" => if (field_ptr_ty.ptrInfo(zcu).packed_offset.host_size == 0)
|
||||
.{ .byte_offset = @divExact(zcu.structPackedFieldBitOffset(loaded_struct, field_index) +
|
||||
container_ptr_ty.ptrInfo(zcu).packed_offset.bit_offset, 8) }
|
||||
@@ -6202,115 +6066,20 @@ fn airStructFieldVal(f: *Function, inst: Air.Inst.Index) !CValue {
|
||||
// Ensure complete type definition is visible before accessing fields.
|
||||
_ = try f.ctypeFromType(struct_ty, .complete);
|
||||
|
||||
assert(struct_ty.containerLayout(zcu) != .@"packed"); // `Air.Legalize.Feature.expand_packed_struct_field_val` handles this case
|
||||
const field_name: CValue = switch (ip.indexToKey(struct_ty.toIntern())) {
|
||||
.struct_type => field_name: {
|
||||
const loaded_struct = ip.loadStructType(struct_ty.toIntern());
|
||||
switch (loaded_struct.layout) {
|
||||
.auto, .@"extern" => break :field_name if (loaded_struct.fieldName(ip, extra.field_index).unwrap()) |field_name|
|
||||
.{ .identifier = field_name.toSlice(ip) }
|
||||
else
|
||||
.{ .field = extra.field_index },
|
||||
.@"packed" => {
|
||||
const int_info = struct_ty.intInfo(zcu);
|
||||
|
||||
const bit_offset_ty = try pt.intType(.unsigned, Type.smallestUnsignedBits(int_info.bits - 1));
|
||||
|
||||
const bit_offset = zcu.structPackedFieldBitOffset(loaded_struct, extra.field_index);
|
||||
|
||||
const field_int_signedness = if (inst_ty.isAbiInt(zcu))
|
||||
inst_ty.intInfo(zcu).signedness
|
||||
else
|
||||
.unsigned;
|
||||
const field_int_ty = try pt.intType(field_int_signedness, @as(u16, @intCast(inst_ty.bitSize(zcu))));
|
||||
|
||||
const temp_local = try f.allocLocal(inst, field_int_ty);
|
||||
try f.writeCValue(w, temp_local, .Other);
|
||||
try w.writeAll(" = zig_wrap_");
|
||||
try f.object.dg.renderTypeForBuiltinFnName(w, field_int_ty);
|
||||
try w.writeAll("((");
|
||||
try f.renderType(w, field_int_ty);
|
||||
try w.writeByte(')');
|
||||
const cant_cast = int_info.bits > 64;
|
||||
if (cant_cast) {
|
||||
if (field_int_ty.bitSize(zcu) > 64) return f.fail("TODO: C backend: implement casting between types > 64 bits", .{});
|
||||
try w.writeAll("zig_lo_");
|
||||
try f.object.dg.renderTypeForBuiltinFnName(w, struct_ty);
|
||||
try w.writeByte('(');
|
||||
}
|
||||
if (bit_offset > 0) {
|
||||
try w.writeAll("zig_shr_");
|
||||
try f.object.dg.renderTypeForBuiltinFnName(w, struct_ty);
|
||||
try w.writeByte('(');
|
||||
}
|
||||
try f.writeCValue(w, struct_byval, .Other);
|
||||
if (bit_offset > 0) try w.print(", {f})", .{
|
||||
try f.fmtIntLiteralDec(try pt.intValue(bit_offset_ty, bit_offset)),
|
||||
});
|
||||
if (cant_cast) try w.writeByte(')');
|
||||
try f.object.dg.renderBuiltinInfo(w, field_int_ty, .bits);
|
||||
try w.writeAll(");");
|
||||
try f.object.newline();
|
||||
if (inst_ty.eql(field_int_ty, zcu)) return temp_local;
|
||||
|
||||
const local = try f.allocLocal(inst, inst_ty);
|
||||
if (local.new_local != temp_local.new_local) {
|
||||
try w.writeAll("memcpy(");
|
||||
try f.writeCValue(w, .{ .local_ref = local.new_local }, .FunctionArgument);
|
||||
try w.writeAll(", ");
|
||||
try f.writeCValue(w, .{ .local_ref = temp_local.new_local }, .FunctionArgument);
|
||||
try w.writeAll(", sizeof(");
|
||||
try f.renderType(w, inst_ty);
|
||||
try w.writeAll("));");
|
||||
try f.object.newline();
|
||||
}
|
||||
try freeLocal(f, inst, temp_local.new_local, null);
|
||||
return local;
|
||||
},
|
||||
.struct_type => .{ .identifier = struct_ty.structFieldName(extra.field_index, zcu).unwrap().?.toSlice(ip) },
|
||||
.union_type => name: {
|
||||
const union_type = ip.loadUnionType(struct_ty.toIntern());
|
||||
const enum_tag_ty: Type = .fromInterned(union_type.enum_tag_ty);
|
||||
const field_name_str = enum_tag_ty.enumFieldName(extra.field_index, zcu).toSlice(ip);
|
||||
if (union_type.hasTag(ip)) {
|
||||
break :name .{ .payload_identifier = field_name_str };
|
||||
} else {
|
||||
break :name .{ .identifier = field_name_str };
|
||||
}
|
||||
},
|
||||
.tuple_type => .{ .field = extra.field_index },
|
||||
.union_type => field_name: {
|
||||
const loaded_union = ip.loadUnionType(struct_ty.toIntern());
|
||||
switch (loaded_union.flagsUnordered(ip).layout) {
|
||||
.auto, .@"extern" => {
|
||||
const name = loaded_union.loadTagType(ip).names.get(ip)[extra.field_index];
|
||||
break :field_name if (loaded_union.hasTag(ip))
|
||||
.{ .payload_identifier = name.toSlice(ip) }
|
||||
else
|
||||
.{ .identifier = name.toSlice(ip) };
|
||||
},
|
||||
.@"packed" => {
|
||||
const operand_lval = if (struct_byval == .constant) blk: {
|
||||
const operand_local = try f.allocLocal(inst, struct_ty);
|
||||
try f.writeCValue(w, operand_local, .Other);
|
||||
try w.writeAll(" = ");
|
||||
try f.writeCValue(w, struct_byval, .Other);
|
||||
try w.writeByte(';');
|
||||
try f.object.newline();
|
||||
break :blk operand_local;
|
||||
} else struct_byval;
|
||||
const local = try f.allocLocal(inst, inst_ty);
|
||||
if (switch (local) {
|
||||
.new_local, .local => |local_index| switch (operand_lval) {
|
||||
.new_local, .local => |operand_local_index| local_index != operand_local_index,
|
||||
else => true,
|
||||
},
|
||||
else => true,
|
||||
}) {
|
||||
try w.writeAll("memcpy(&");
|
||||
try f.writeCValue(w, local, .Other);
|
||||
try w.writeAll(", &");
|
||||
try f.writeCValue(w, operand_lval, .Other);
|
||||
try w.writeAll(", sizeof(");
|
||||
try f.renderType(w, inst_ty);
|
||||
try w.writeAll("));");
|
||||
try f.object.newline();
|
||||
}
|
||||
try f.freeCValue(inst, operand_lval);
|
||||
return local;
|
||||
},
|
||||
}
|
||||
},
|
||||
else => unreachable,
|
||||
};
|
||||
|
||||
@@ -7702,98 +7471,13 @@ fn airAggregateInit(f: *Function, inst: Air.Inst.Index) !CValue {
|
||||
if (!field_ty.hasRuntimeBitsIgnoreComptime(zcu)) continue;
|
||||
|
||||
const a = try Assignment.start(f, w, try f.ctypeFromType(field_ty, .complete));
|
||||
try f.writeCValueMember(w, local, if (loaded_struct.fieldName(ip, field_index).unwrap()) |field_name|
|
||||
.{ .identifier = field_name.toSlice(ip) }
|
||||
else
|
||||
.{ .field = field_index });
|
||||
try f.writeCValueMember(w, local, .{ .identifier = loaded_struct.fieldName(ip, field_index).toSlice(ip) });
|
||||
try a.assign(f, w);
|
||||
try f.writeCValue(w, resolved_elements[field_index], .Other);
|
||||
try a.end(f, w);
|
||||
}
|
||||
},
|
||||
.@"packed" => {
|
||||
try f.writeCValue(w, local, .Other);
|
||||
try w.writeAll(" = ");
|
||||
|
||||
const backing_int_ty: Type = .fromInterned(loaded_struct.backingIntTypeUnordered(ip));
|
||||
const int_info = backing_int_ty.intInfo(zcu);
|
||||
|
||||
const bit_offset_ty = try pt.intType(.unsigned, Type.smallestUnsignedBits(int_info.bits - 1));
|
||||
|
||||
var bit_offset: u64 = 0;
|
||||
|
||||
var empty = true;
|
||||
for (0..elements.len) |field_index| {
|
||||
if (inst_ty.structFieldIsComptime(field_index, zcu)) continue;
|
||||
const field_ty = inst_ty.fieldType(field_index, zcu);
|
||||
if (!field_ty.hasRuntimeBitsIgnoreComptime(zcu)) continue;
|
||||
|
||||
if (!empty) {
|
||||
try w.writeAll("zig_or_");
|
||||
try f.object.dg.renderTypeForBuiltinFnName(w, inst_ty);
|
||||
try w.writeByte('(');
|
||||
}
|
||||
empty = false;
|
||||
}
|
||||
empty = true;
|
||||
for (resolved_elements, 0..) |element, field_index| {
|
||||
if (inst_ty.structFieldIsComptime(field_index, zcu)) continue;
|
||||
const field_ty = inst_ty.fieldType(field_index, zcu);
|
||||
if (!field_ty.hasRuntimeBitsIgnoreComptime(zcu)) continue;
|
||||
|
||||
if (!empty) try w.writeAll(", ");
|
||||
// TODO: Skip this entire shift if val is 0?
|
||||
try w.writeAll("zig_shlw_");
|
||||
try f.object.dg.renderTypeForBuiltinFnName(w, inst_ty);
|
||||
try w.writeByte('(');
|
||||
|
||||
if (field_ty.isAbiInt(zcu)) {
|
||||
try w.writeAll("zig_and_");
|
||||
try f.object.dg.renderTypeForBuiltinFnName(w, inst_ty);
|
||||
try w.writeByte('(');
|
||||
}
|
||||
|
||||
if (inst_ty.isAbiInt(zcu) and (field_ty.isAbiInt(zcu) or field_ty.isPtrAtRuntime(zcu))) {
|
||||
try f.renderIntCast(w, inst_ty, element, .{}, field_ty, .FunctionArgument);
|
||||
} else {
|
||||
try w.writeByte('(');
|
||||
try f.renderType(w, inst_ty);
|
||||
try w.writeByte(')');
|
||||
if (field_ty.isPtrAtRuntime(zcu)) {
|
||||
try w.writeByte('(');
|
||||
try f.renderType(w, switch (int_info.signedness) {
|
||||
.unsigned => .usize,
|
||||
.signed => .isize,
|
||||
});
|
||||
try w.writeByte(')');
|
||||
}
|
||||
try f.writeCValue(w, element, .Other);
|
||||
}
|
||||
|
||||
if (field_ty.isAbiInt(zcu)) {
|
||||
try w.writeAll(", ");
|
||||
const field_int_info = field_ty.intInfo(zcu);
|
||||
const field_mask = if (int_info.signedness == .signed and int_info.bits == field_int_info.bits)
|
||||
try pt.intValue(backing_int_ty, -1)
|
||||
else
|
||||
try (try pt.intType(.unsigned, field_int_info.bits)).maxIntScalar(pt, backing_int_ty);
|
||||
try f.object.dg.renderValue(w, field_mask, .FunctionArgument);
|
||||
try w.writeByte(')');
|
||||
}
|
||||
|
||||
try w.print(", {f}", .{
|
||||
try f.fmtIntLiteralDec(try pt.intValue(bit_offset_ty, bit_offset)),
|
||||
});
|
||||
try f.object.dg.renderBuiltinInfo(w, inst_ty, .bits);
|
||||
try w.writeByte(')');
|
||||
if (!empty) try w.writeByte(')');
|
||||
|
||||
bit_offset += field_ty.bitSize(zcu);
|
||||
empty = false;
|
||||
}
|
||||
try w.writeByte(';');
|
||||
try f.object.newline();
|
||||
},
|
||||
.@"packed" => unreachable, // `Air.Legalize.Feature.expand_packed_struct_init` handles this case
|
||||
}
|
||||
},
|
||||
.tuple_type => |tuple_info| for (0..tuple_info.types.len) |field_index| {
|
||||
@@ -7828,9 +7512,10 @@ fn airUnionInit(f: *Function, inst: Air.Inst.Index) !CValue {
|
||||
try reap(f, inst, &.{extra.init});
|
||||
|
||||
const w = &f.object.code.writer;
|
||||
const local = try f.allocLocal(inst, union_ty);
|
||||
if (loaded_union.flagsUnordered(ip).layout == .@"packed") return f.moveCValue(inst, union_ty, payload);
|
||||
|
||||
const local = try f.allocLocal(inst, union_ty);
|
||||
|
||||
const field: CValue = if (union_ty.unionTagTypeSafety(zcu)) |tag_ty| field: {
|
||||
const layout = union_ty.unionGetLayout(zcu);
|
||||
if (layout.tag_size != 0) {
|
||||
|
||||
@@ -2514,11 +2514,7 @@ pub const Pool = struct {
|
||||
kind.noParameter(),
|
||||
);
|
||||
if (field_ctype.index == .void) continue;
|
||||
const field_name = if (loaded_struct.fieldName(ip, field_index)
|
||||
.unwrap()) |field_name|
|
||||
try pool.string(allocator, field_name.toSlice(ip))
|
||||
else
|
||||
String.fromUnnamed(@intCast(field_index));
|
||||
const field_name = try pool.string(allocator, loaded_struct.fieldName(ip, field_index).toSlice(ip));
|
||||
const field_alignas = AlignAs.fromAlignment(.{
|
||||
.@"align" = loaded_struct.fieldAlign(ip, field_index),
|
||||
.abi = field_type.abiAlignment(zcu),
|
||||
|
||||
+13
-45
@@ -2409,8 +2409,7 @@ pub const Object = struct {
|
||||
const field_size = field_ty.abiSize(zcu);
|
||||
const field_align = ty.fieldAlignment(field_index, zcu);
|
||||
const field_offset = ty.structFieldOffset(field_index, zcu);
|
||||
const field_name = struct_type.fieldName(ip, field_index).unwrap() orelse
|
||||
try ip.getOrPutStringFmt(gpa, pt.tid, "{d}", .{field_index}, .no_embedded_nulls);
|
||||
const field_name = struct_type.fieldName(ip, field_index);
|
||||
fields.appendAssumeCapacity(try o.builder.debugMemberType(
|
||||
try o.builder.metadataString(field_name.toSlice(ip)),
|
||||
null, // File
|
||||
@@ -4885,6 +4884,11 @@ pub const FuncGen = struct {
|
||||
|
||||
const val: Builder.Value = switch (air_tags[@intFromEnum(inst)]) {
|
||||
// zig fmt: off
|
||||
|
||||
// No "scalarize" legalizations are enabled, so these instructions never appear.
|
||||
.legalize_vec_elem_val => unreachable,
|
||||
.legalize_vec_store_elem => unreachable,
|
||||
|
||||
.add => try self.airAdd(inst, .normal),
|
||||
.add_optimized => try self.airAdd(inst, .fast),
|
||||
.add_wrap => try self.airAddWrap(inst),
|
||||
@@ -5091,8 +5095,6 @@ pub const FuncGen = struct {
|
||||
.wasm_memory_size => try self.airWasmMemorySize(inst),
|
||||
.wasm_memory_grow => try self.airWasmMemoryGrow(inst),
|
||||
|
||||
.vector_store_elem => try self.airVectorStoreElem(inst),
|
||||
|
||||
.runtime_nav_ptr => try self.airRuntimeNavPtr(inst),
|
||||
|
||||
.inferred_alloc, .inferred_alloc_comptime => unreachable,
|
||||
@@ -6871,16 +6873,14 @@ pub const FuncGen = struct {
|
||||
const array_llvm_ty = try o.lowerType(pt, array_ty);
|
||||
const elem_ty = array_ty.childType(zcu);
|
||||
if (isByRef(array_ty, zcu)) {
|
||||
const indices: [2]Builder.Value = .{
|
||||
try o.builder.intValue(try o.lowerType(pt, Type.usize), 0), rhs,
|
||||
};
|
||||
const elem_ptr = try self.wip.gep(.inbounds, array_llvm_ty, array_llvm_val, &.{
|
||||
try o.builder.intValue(try o.lowerType(pt, Type.usize), 0),
|
||||
rhs,
|
||||
}, "");
|
||||
if (isByRef(elem_ty, zcu)) {
|
||||
const elem_ptr = try self.wip.gep(.inbounds, array_llvm_ty, array_llvm_val, &indices, "");
|
||||
const elem_alignment = elem_ty.abiAlignment(zcu).toLlvm();
|
||||
return self.loadByRef(elem_ptr, elem_ty, elem_alignment, .normal);
|
||||
} else {
|
||||
const elem_ptr =
|
||||
try self.wip.gep(.inbounds, array_llvm_ty, array_llvm_val, &indices, "");
|
||||
return self.loadTruncate(.normal, elem_ty, elem_ptr, .default);
|
||||
}
|
||||
}
|
||||
@@ -8138,33 +8138,6 @@ pub const FuncGen = struct {
|
||||
}, "");
|
||||
}
|
||||
|
||||
fn airVectorStoreElem(self: *FuncGen, inst: Air.Inst.Index) !Builder.Value {
|
||||
const o = self.ng.object;
|
||||
const pt = self.ng.pt;
|
||||
const zcu = pt.zcu;
|
||||
const data = self.air.instructions.items(.data)[@intFromEnum(inst)].vector_store_elem;
|
||||
const extra = self.air.extraData(Air.Bin, data.payload).data;
|
||||
|
||||
const vector_ptr = try self.resolveInst(data.vector_ptr);
|
||||
const vector_ptr_ty = self.typeOf(data.vector_ptr);
|
||||
const index = try self.resolveInst(extra.lhs);
|
||||
const operand = try self.resolveInst(extra.rhs);
|
||||
|
||||
self.maybeMarkAllowZeroAccess(vector_ptr_ty.ptrInfo(zcu));
|
||||
|
||||
// TODO: Emitting a load here is a violation of volatile semantics. Not fixable in general.
|
||||
// https://github.com/ziglang/zig/issues/18652#issuecomment-2452844908
|
||||
const access_kind: Builder.MemoryAccessKind =
|
||||
if (vector_ptr_ty.isVolatilePtr(zcu)) .@"volatile" else .normal;
|
||||
const elem_llvm_ty = try o.lowerType(pt, vector_ptr_ty.childType(zcu));
|
||||
const alignment = vector_ptr_ty.ptrAlignment(zcu).toLlvm();
|
||||
const loaded = try self.wip.load(access_kind, elem_llvm_ty, vector_ptr, alignment, "");
|
||||
|
||||
const new_vector = try self.wip.insertElement(loaded, operand, index, "");
|
||||
_ = try self.store(vector_ptr, vector_ptr_ty, new_vector, .none);
|
||||
return .none;
|
||||
}
|
||||
|
||||
fn airRuntimeNavPtr(fg: *FuncGen, inst: Air.Inst.Index) !Builder.Value {
|
||||
const o = fg.ng.object;
|
||||
const pt = fg.ng.pt;
|
||||
@@ -8301,8 +8274,7 @@ pub const FuncGen = struct {
|
||||
const rhs = try self.resolveInst(bin_op.rhs);
|
||||
const inst_ty = self.typeOfIndex(inst);
|
||||
const scalar_ty = inst_ty.scalarType(zcu);
|
||||
|
||||
if (scalar_ty.isAnyFloat()) return self.todo("saturating float add", .{});
|
||||
assert(scalar_ty.zigTypeTag(zcu) == .int);
|
||||
return self.wip.callIntrinsic(
|
||||
.normal,
|
||||
.none,
|
||||
@@ -8342,8 +8314,7 @@ pub const FuncGen = struct {
|
||||
const rhs = try self.resolveInst(bin_op.rhs);
|
||||
const inst_ty = self.typeOfIndex(inst);
|
||||
const scalar_ty = inst_ty.scalarType(zcu);
|
||||
|
||||
if (scalar_ty.isAnyFloat()) return self.todo("saturating float sub", .{});
|
||||
assert(scalar_ty.zigTypeTag(zcu) == .int);
|
||||
return self.wip.callIntrinsic(
|
||||
.normal,
|
||||
.none,
|
||||
@@ -8383,8 +8354,7 @@ pub const FuncGen = struct {
|
||||
const rhs = try self.resolveInst(bin_op.rhs);
|
||||
const inst_ty = self.typeOfIndex(inst);
|
||||
const scalar_ty = inst_ty.scalarType(zcu);
|
||||
|
||||
if (scalar_ty.isAnyFloat()) return self.todo("saturating float mul", .{});
|
||||
assert(scalar_ty.zigTypeTag(zcu) == .int);
|
||||
return self.wip.callIntrinsic(
|
||||
.normal,
|
||||
.none,
|
||||
@@ -11452,7 +11422,6 @@ pub const FuncGen = struct {
|
||||
const access_kind: Builder.MemoryAccessKind =
|
||||
if (info.flags.is_volatile) .@"volatile" else .normal;
|
||||
|
||||
assert(info.flags.vector_index != .runtime);
|
||||
if (info.flags.vector_index != .none) {
|
||||
const index_u32 = try o.builder.intValue(.i32, info.flags.vector_index);
|
||||
const vec_elem_ty = try o.lowerType(pt, elem_ty);
|
||||
@@ -11522,7 +11491,6 @@ pub const FuncGen = struct {
|
||||
const access_kind: Builder.MemoryAccessKind =
|
||||
if (info.flags.is_volatile) .@"volatile" else .normal;
|
||||
|
||||
assert(info.flags.vector_index != .runtime);
|
||||
if (info.flags.vector_index != .none) {
|
||||
const index_u32 = try o.builder.intValue(.i32, info.flags.vector_index);
|
||||
const vec_elem_ty = try o.lowerType(pt, elem_ty);
|
||||
|
||||
@@ -1391,6 +1391,11 @@ fn genBody(func: *Func, body: []const Air.Inst.Index) InnerError!void {
|
||||
const tag = air_tags[@intFromEnum(inst)];
|
||||
switch (tag) {
|
||||
// zig fmt: off
|
||||
|
||||
// No "scalarize" legalizations are enabled, so these instructions never appear.
|
||||
.legalize_vec_elem_val => unreachable,
|
||||
.legalize_vec_store_elem => unreachable,
|
||||
|
||||
.add,
|
||||
.add_wrap,
|
||||
.sub,
|
||||
@@ -1633,7 +1638,6 @@ fn genBody(func: *Func, body: []const Air.Inst.Index) InnerError!void {
|
||||
|
||||
.is_named_enum_value => return func.fail("TODO implement is_named_enum_value", .{}),
|
||||
.error_set_has_value => return func.fail("TODO implement error_set_has_value", .{}),
|
||||
.vector_store_elem => return func.fail("TODO implement vector_store_elem", .{}),
|
||||
|
||||
.c_va_arg => return func.fail("TODO implement c_va_arg", .{}),
|
||||
.c_va_copy => return func.fail("TODO implement c_va_copy", .{}),
|
||||
|
||||
@@ -479,6 +479,11 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
|
||||
self.reused_operands = @TypeOf(self.reused_operands).initEmpty();
|
||||
switch (air_tags[@intFromEnum(inst)]) {
|
||||
// zig fmt: off
|
||||
|
||||
// No "scalarize" legalizations are enabled, so these instructions never appear.
|
||||
.legalize_vec_elem_val => unreachable,
|
||||
.legalize_vec_store_elem => unreachable,
|
||||
|
||||
.ptr_add => try self.airPtrArithmetic(inst, .ptr_add),
|
||||
.ptr_sub => try self.airPtrArithmetic(inst, .ptr_sub),
|
||||
|
||||
@@ -702,7 +707,6 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
|
||||
|
||||
.is_named_enum_value => @panic("TODO implement is_named_enum_value"),
|
||||
.error_set_has_value => @panic("TODO implement error_set_has_value"),
|
||||
.vector_store_elem => @panic("TODO implement vector_store_elem"),
|
||||
.runtime_nav_ptr => @panic("TODO implement runtime_nav_ptr"),
|
||||
|
||||
.c_va_arg => return self.fail("TODO implement c_va_arg", .{}),
|
||||
|
||||
@@ -1520,8 +1520,7 @@ fn resolveType(cg: *CodeGen, ty: Type, repr: Repr) Error!Id {
|
||||
const field_ty: Type = .fromInterned(struct_type.field_types.get(ip)[field_index]);
|
||||
if (!field_ty.hasRuntimeBitsIgnoreComptime(zcu)) continue;
|
||||
|
||||
const field_name = struct_type.fieldName(ip, field_index).unwrap() orelse
|
||||
try ip.getOrPutStringFmt(zcu.gpa, pt.tid, "{d}", .{field_index}, .no_embedded_nulls);
|
||||
const field_name = struct_type.fieldName(ip, field_index);
|
||||
try member_types.append(try cg.resolveType(field_ty, .indirect));
|
||||
try member_names.append(field_name.toSlice(ip));
|
||||
try member_offsets.append(@intCast(ty.structFieldOffset(field_index, zcu)));
|
||||
@@ -2726,8 +2725,6 @@ fn genInst(cg: *CodeGen, inst: Air.Inst.Index) Error!void {
|
||||
.ptr_elem_val => try cg.airPtrElemVal(inst),
|
||||
.array_elem_val => try cg.airArrayElemVal(inst),
|
||||
|
||||
.vector_store_elem => return cg.airVectorStoreElem(inst),
|
||||
|
||||
.set_union_tag => return cg.airSetUnionTag(inst),
|
||||
.get_union_tag => try cg.airGetUnionTag(inst),
|
||||
.union_init => try cg.airUnionInit(inst),
|
||||
@@ -4446,29 +4443,6 @@ fn airPtrElemVal(cg: *CodeGen, inst: Air.Inst.Index) !?Id {
|
||||
return try cg.load(elem_ty, elem_ptr_id, .{ .is_volatile = ptr_ty.isVolatilePtr(zcu) });
|
||||
}
|
||||
|
||||
fn airVectorStoreElem(cg: *CodeGen, inst: Air.Inst.Index) !void {
|
||||
const zcu = cg.module.zcu;
|
||||
const data = cg.air.instructions.items(.data)[@intFromEnum(inst)].vector_store_elem;
|
||||
const extra = cg.air.extraData(Air.Bin, data.payload).data;
|
||||
|
||||
const vector_ptr_ty = cg.typeOf(data.vector_ptr);
|
||||
const vector_ty = vector_ptr_ty.childType(zcu);
|
||||
const scalar_ty = vector_ty.scalarType(zcu);
|
||||
|
||||
const scalar_ty_id = try cg.resolveType(scalar_ty, .indirect);
|
||||
const storage_class = cg.module.storageClass(vector_ptr_ty.ptrAddressSpace(zcu));
|
||||
const scalar_ptr_ty_id = try cg.module.ptrType(scalar_ty_id, storage_class);
|
||||
|
||||
const vector_ptr = try cg.resolve(data.vector_ptr);
|
||||
const index = try cg.resolve(extra.lhs);
|
||||
const operand = try cg.resolve(extra.rhs);
|
||||
|
||||
const elem_ptr_id = try cg.accessChainId(scalar_ptr_ty_id, vector_ptr, &.{index});
|
||||
try cg.store(scalar_ty, elem_ptr_id, operand, .{
|
||||
.is_volatile = vector_ptr_ty.isVolatilePtr(zcu),
|
||||
});
|
||||
}
|
||||
|
||||
fn airSetUnionTag(cg: *CodeGen, inst: Air.Inst.Index) !void {
|
||||
const zcu = cg.module.zcu;
|
||||
const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
|
||||
|
||||
@@ -1786,6 +1786,10 @@ fn buildPointerOffset(cg: *CodeGen, ptr_value: WValue, offset: u64, action: enum
|
||||
fn genInst(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
|
||||
const air_tags = cg.air.instructions.items(.tag);
|
||||
return switch (air_tags[@intFromEnum(inst)]) {
|
||||
// No "scalarize" legalizations are enabled, so these instructions never appear.
|
||||
.legalize_vec_elem_val => unreachable,
|
||||
.legalize_vec_store_elem => unreachable,
|
||||
|
||||
.inferred_alloc, .inferred_alloc_comptime => unreachable,
|
||||
|
||||
.add => cg.airBinOp(inst, .add),
|
||||
@@ -1978,7 +1982,6 @@ fn genInst(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
|
||||
.save_err_return_trace_index,
|
||||
.is_named_enum_value,
|
||||
.addrspace_cast,
|
||||
.vector_store_elem,
|
||||
.c_va_arg,
|
||||
.c_va_copy,
|
||||
.c_va_end,
|
||||
|
||||
+67
-974
@@ -854,12 +854,6 @@ const FrameAlloc = struct {
|
||||
}
|
||||
};
|
||||
|
||||
const StackAllocation = struct {
|
||||
inst: ?Air.Inst.Index,
|
||||
/// TODO do we need size? should be determined by inst.ty.abiSize(zcu)
|
||||
size: u32,
|
||||
};
|
||||
|
||||
const BlockData = struct {
|
||||
relocs: std.ArrayListUnmanaged(Mir.Inst.Index) = .empty,
|
||||
state: State,
|
||||
@@ -89326,7 +89320,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
|
||||
error.SelectFailed => res[0] = try ops[0].load(val_ty, .{
|
||||
.disp = switch (cg.typeOf(ty_op.operand).ptrInfo(zcu).flags.vector_index) {
|
||||
.none => 0,
|
||||
.runtime => unreachable,
|
||||
else => |vector_index| @intCast(val_ty.abiSize(zcu) * @intFromEnum(vector_index)),
|
||||
},
|
||||
}, cg),
|
||||
@@ -89569,7 +89562,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
|
||||
error.SelectFailed => try ops[0].store(&ops[1], .{
|
||||
.disp = switch (cg.typeOf(bin_op.lhs).ptrInfo(zcu).flags.vector_index) {
|
||||
.none => 0,
|
||||
.runtime => unreachable,
|
||||
else => |vector_index| @intCast(cg.typeOf(bin_op.rhs).abiSize(zcu) * @intFromEnum(vector_index)),
|
||||
},
|
||||
.safe = switch (air_tag) {
|
||||
@@ -103934,7 +103926,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
|
||||
try ops[0].toOffset(0, cg);
|
||||
try ops[0].finish(inst, &.{ty_op.operand}, &ops, cg);
|
||||
},
|
||||
.array_elem_val => {
|
||||
.array_elem_val, .legalize_vec_elem_val => {
|
||||
const bin_op = air_datas[@intFromEnum(inst)].bin_op;
|
||||
const array_ty = cg.typeOf(bin_op.lhs);
|
||||
const res_ty = array_ty.elemType2(zcu);
|
||||
@@ -171402,8 +171394,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
|
||||
.aggregate_init => |air_tag| fallback: {
|
||||
const ty_pl = air_datas[@intFromEnum(inst)].ty_pl;
|
||||
const agg_ty = ty_pl.ty.toType();
|
||||
if ((agg_ty.isVector(zcu) and agg_ty.childType(zcu).toIntern() == .bool_type) or
|
||||
(agg_ty.zigTypeTag(zcu) == .@"struct" and agg_ty.containerLayout(zcu) == .@"packed")) break :fallback try cg.airAggregateInit(inst);
|
||||
if (agg_ty.isVector(zcu) and agg_ty.childType(zcu).toIntern() == .bool_type) {
|
||||
break :fallback try cg.airAggregateInitBoolVec(inst);
|
||||
}
|
||||
var res = try cg.tempAllocMem(agg_ty);
|
||||
const reset_index = cg.next_temp_index;
|
||||
var bt = cg.liveness.iterateBigTomb(inst);
|
||||
@@ -171441,10 +171434,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
|
||||
}
|
||||
}
|
||||
},
|
||||
.@"packed" => return cg.fail("failed to select {s} {f}", .{
|
||||
@tagName(air_tag),
|
||||
agg_ty.fmt(pt),
|
||||
}),
|
||||
.@"packed" => unreachable,
|
||||
}
|
||||
},
|
||||
.tuple_type => |tuple_type| {
|
||||
@@ -173054,10 +173044,28 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
|
||||
try ert.die(cg);
|
||||
try res.finish(inst, &.{}, &.{}, cg);
|
||||
},
|
||||
.vector_store_elem => {
|
||||
const vector_store_elem = air_datas[@intFromEnum(inst)].vector_store_elem;
|
||||
const bin_op = cg.air.extraData(Air.Bin, vector_store_elem.payload).data;
|
||||
var ops = try cg.tempsFromOperands(inst, .{ vector_store_elem.vector_ptr, bin_op.lhs, bin_op.rhs });
|
||||
.runtime_nav_ptr => {
|
||||
const ty_nav = air_datas[@intFromEnum(inst)].ty_nav;
|
||||
const nav = ip.getNav(ty_nav.nav);
|
||||
const is_threadlocal = zcu.comp.config.any_non_single_threaded and nav.isThreadlocal(ip);
|
||||
if (is_threadlocal) if (cg.target.ofmt == .coff or cg.mod.pic) {
|
||||
try cg.spillRegisters(&.{ .rdi, .rax });
|
||||
} else {
|
||||
try cg.spillRegisters(&.{.rax});
|
||||
};
|
||||
var res = try cg.tempInit(.fromInterned(ty_nav.ty), .{ .lea_nav = ty_nav.nav });
|
||||
if (is_threadlocal) while (try res.toRegClass(true, .general_purpose, cg)) {};
|
||||
try res.finish(inst, &.{}, &.{}, cg);
|
||||
},
|
||||
.c_va_arg => try cg.airVaArg(inst),
|
||||
.c_va_copy => try cg.airVaCopy(inst),
|
||||
.c_va_end => try cg.airVaEnd(inst),
|
||||
.c_va_start => try cg.airVaStart(inst),
|
||||
.legalize_vec_store_elem => {
|
||||
const pl_op = air_datas[@intFromEnum(inst)].pl_op;
|
||||
const bin = cg.air.extraData(Air.Bin, pl_op.payload).data;
|
||||
// vector_ptr, index, elem_val
|
||||
var ops = try cg.tempsFromOperands(inst, .{ pl_op.operand, bin.lhs, bin.rhs });
|
||||
cg.select(&.{}, &.{}, &ops, comptime &.{ .{
|
||||
.src_constraints = .{ .{ .ptr_bool_vec = .byte }, .any, .bool },
|
||||
.patterns = &.{
|
||||
@@ -173639,7 +173647,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
|
||||
} },
|
||||
} }) catch |err| switch (err) {
|
||||
error.SelectFailed => {
|
||||
const elem_size = cg.typeOf(bin_op.rhs).abiSize(zcu);
|
||||
const elem_size = cg.typeOf(bin.rhs).abiSize(zcu);
|
||||
while (try ops[0].toRegClass(true, .general_purpose, cg) or
|
||||
try ops[1].toRegClass(true, .general_purpose, cg))
|
||||
{}
|
||||
@@ -173681,23 +173689,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
|
||||
};
|
||||
for (ops) |op| try op.die(cg);
|
||||
},
|
||||
.runtime_nav_ptr => {
|
||||
const ty_nav = air_datas[@intFromEnum(inst)].ty_nav;
|
||||
const nav = ip.getNav(ty_nav.nav);
|
||||
const is_threadlocal = zcu.comp.config.any_non_single_threaded and nav.isThreadlocal(ip);
|
||||
if (is_threadlocal) if (cg.target.ofmt == .coff or cg.mod.pic) {
|
||||
try cg.spillRegisters(&.{ .rdi, .rax });
|
||||
} else {
|
||||
try cg.spillRegisters(&.{.rax});
|
||||
};
|
||||
var res = try cg.tempInit(.fromInterned(ty_nav.ty), .{ .lea_nav = ty_nav.nav });
|
||||
if (is_threadlocal) while (try res.toRegClass(true, .general_purpose, cg)) {};
|
||||
try res.finish(inst, &.{}, &.{}, cg);
|
||||
},
|
||||
.c_va_arg => try cg.airVaArg(inst),
|
||||
.c_va_copy => try cg.airVaCopy(inst),
|
||||
.c_va_end => try cg.airVaEnd(inst),
|
||||
.c_va_start => try cg.airVaStart(inst),
|
||||
.work_item_id, .work_group_size, .work_group_id => unreachable,
|
||||
}
|
||||
try cg.resetTemps(@enumFromInt(0));
|
||||
@@ -180646,944 +180637,57 @@ fn airSelect(self: *CodeGen, inst: Air.Inst.Index) !void {
|
||||
return self.finishAir(inst, result, .{ pl_op.operand, extra.lhs, extra.rhs });
|
||||
}
|
||||
|
||||
fn airShuffle(self: *CodeGen, inst: Air.Inst.Index) !void {
|
||||
const pt = self.pt;
|
||||
const zcu = pt.zcu;
|
||||
const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
|
||||
const extra = self.air.extraData(Air.Shuffle, ty_pl.payload).data;
|
||||
|
||||
const dst_ty = self.typeOfIndex(inst);
|
||||
const elem_ty = dst_ty.childType(zcu);
|
||||
const elem_abi_size: u16 = @intCast(elem_ty.abiSize(zcu));
|
||||
const dst_abi_size: u32 = @intCast(dst_ty.abiSize(zcu));
|
||||
const lhs_ty = self.typeOf(extra.a);
|
||||
const lhs_abi_size: u32 = @intCast(lhs_ty.abiSize(zcu));
|
||||
const rhs_ty = self.typeOf(extra.b);
|
||||
const rhs_abi_size: u32 = @intCast(rhs_ty.abiSize(zcu));
|
||||
const max_abi_size = @max(dst_abi_size, lhs_abi_size, rhs_abi_size);
|
||||
|
||||
const ExpectedContents = [32]?i32;
|
||||
var stack align(@max(@alignOf(ExpectedContents), @alignOf(std.heap.StackFallbackAllocator(0)))) =
|
||||
std.heap.stackFallback(@sizeOf(ExpectedContents), self.gpa);
|
||||
const allocator = stack.get();
|
||||
|
||||
const mask_elems = try allocator.alloc(?i32, extra.mask_len);
|
||||
defer allocator.free(mask_elems);
|
||||
for (mask_elems, 0..) |*mask_elem, elem_index| {
|
||||
const mask_elem_val =
|
||||
Value.fromInterned(extra.mask).elemValue(pt, elem_index) catch unreachable;
|
||||
mask_elem.* = if (mask_elem_val.isUndef(zcu))
|
||||
null
|
||||
else
|
||||
@intCast(mask_elem_val.toSignedInt(zcu));
|
||||
}
|
||||
|
||||
const has_avx = self.hasFeature(.avx);
|
||||
const result = @as(?MCValue, result: {
|
||||
for (mask_elems) |mask_elem| {
|
||||
if (mask_elem) |_| break;
|
||||
} else break :result try self.allocRegOrMem(inst, true);
|
||||
|
||||
for (mask_elems, 0..) |mask_elem, elem_index| {
|
||||
if (mask_elem orelse continue != elem_index) break;
|
||||
} else {
|
||||
const lhs_mcv = try self.resolveInst(extra.a);
|
||||
if (self.reuseOperand(inst, extra.a, 0, lhs_mcv)) break :result lhs_mcv;
|
||||
const dst_mcv = try self.allocRegOrMem(inst, true);
|
||||
try self.genCopy(dst_ty, dst_mcv, lhs_mcv, .{});
|
||||
break :result dst_mcv;
|
||||
}
|
||||
|
||||
for (mask_elems, 0..) |mask_elem, elem_index| {
|
||||
if (~(mask_elem orelse continue) != elem_index) break;
|
||||
} else {
|
||||
const rhs_mcv = try self.resolveInst(extra.b);
|
||||
if (self.reuseOperand(inst, extra.b, 1, rhs_mcv)) break :result rhs_mcv;
|
||||
const dst_mcv = try self.allocRegOrMem(inst, true);
|
||||
try self.genCopy(dst_ty, dst_mcv, rhs_mcv, .{});
|
||||
break :result dst_mcv;
|
||||
}
|
||||
|
||||
for ([_]Mir.Inst.Tag{ .unpckl, .unpckh }) |variant| unpck: {
|
||||
if (elem_abi_size > 8) break :unpck;
|
||||
if (dst_abi_size > self.vectorSize(if (elem_abi_size >= 4) .float else .int)) break :unpck;
|
||||
|
||||
var sources: [2]?u1 = @splat(null);
|
||||
for (mask_elems, 0..) |maybe_mask_elem, elem_index| {
|
||||
const mask_elem = maybe_mask_elem orelse continue;
|
||||
const mask_elem_index =
|
||||
std.math.cast(u5, if (mask_elem < 0) ~mask_elem else mask_elem) orelse break :unpck;
|
||||
const elem_byte = (elem_index >> 1) * elem_abi_size;
|
||||
if (mask_elem_index * elem_abi_size != (elem_byte & 0b0111) | @as(u4, switch (variant) {
|
||||
.unpckl => 0b0000,
|
||||
.unpckh => 0b1000,
|
||||
else => unreachable,
|
||||
}) | (elem_byte << 1 & 0b10000)) break :unpck;
|
||||
|
||||
const source = @intFromBool(mask_elem < 0);
|
||||
if (sources[elem_index & 0b00001]) |prev_source| {
|
||||
if (source != prev_source) break :unpck;
|
||||
} else sources[elem_index & 0b00001] = source;
|
||||
}
|
||||
if (sources[0] orelse break :unpck == sources[1] orelse break :unpck) break :unpck;
|
||||
|
||||
const operands = [2]Air.Inst.Ref{ extra.a, extra.b };
|
||||
const operand_tys = [2]Type{ lhs_ty, rhs_ty };
|
||||
const lhs_mcv = try self.resolveInst(operands[sources[0].?]);
|
||||
const rhs_mcv = try self.resolveInst(operands[sources[1].?]);
|
||||
|
||||
const dst_mcv: MCValue = if (lhs_mcv.isRegister() and
|
||||
self.reuseOperand(inst, operands[sources[0].?], sources[0].?, lhs_mcv))
|
||||
lhs_mcv
|
||||
else if (has_avx and lhs_mcv.isRegister())
|
||||
.{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) }
|
||||
else
|
||||
try self.copyToRegisterWithInstTracking(inst, operand_tys[sources[0].?], lhs_mcv);
|
||||
const dst_reg = dst_mcv.getReg().?;
|
||||
const dst_alias = registerAlias(dst_reg, max_abi_size);
|
||||
|
||||
const mir_tag: Mir.Inst.FixedTag = if ((elem_abi_size >= 4 and elem_ty.isRuntimeFloat()) or
|
||||
(dst_abi_size > 16 and !self.hasFeature(.avx2))) .{ switch (elem_abi_size) {
|
||||
4 => if (has_avx) .v_ps else ._ps,
|
||||
8 => if (has_avx) .v_pd else ._pd,
|
||||
else => unreachable,
|
||||
}, variant } else .{ if (has_avx) .vp_ else .p_, switch (variant) {
|
||||
.unpckl => switch (elem_abi_size) {
|
||||
1 => .unpcklbw,
|
||||
2 => .unpcklwd,
|
||||
4 => .unpckldq,
|
||||
8 => .unpcklqdq,
|
||||
else => unreachable,
|
||||
},
|
||||
.unpckh => switch (elem_abi_size) {
|
||||
1 => .unpckhbw,
|
||||
2 => .unpckhwd,
|
||||
4 => .unpckhdq,
|
||||
8 => .unpckhqdq,
|
||||
else => unreachable,
|
||||
},
|
||||
else => unreachable,
|
||||
} };
|
||||
if (has_avx) if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemory(
|
||||
mir_tag,
|
||||
dst_alias,
|
||||
registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size),
|
||||
try rhs_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }),
|
||||
) else try self.asmRegisterRegisterRegister(
|
||||
mir_tag,
|
||||
dst_alias,
|
||||
registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size),
|
||||
registerAlias(if (rhs_mcv.isRegister())
|
||||
rhs_mcv.getReg().?
|
||||
else
|
||||
try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size),
|
||||
) else if (rhs_mcv.isBase()) try self.asmRegisterMemory(
|
||||
mir_tag,
|
||||
dst_alias,
|
||||
try rhs_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }),
|
||||
) else try self.asmRegisterRegister(
|
||||
mir_tag,
|
||||
dst_alias,
|
||||
registerAlias(if (rhs_mcv.isRegister())
|
||||
rhs_mcv.getReg().?
|
||||
else
|
||||
try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size),
|
||||
);
|
||||
break :result dst_mcv;
|
||||
}
|
||||
|
||||
pshufd: {
|
||||
if (elem_abi_size != 4) break :pshufd;
|
||||
if (max_abi_size > self.vectorSize(.float)) break :pshufd;
|
||||
|
||||
var control: u8 = 0b00_00_00_00;
|
||||
var sources: [1]?u1 = @splat(null);
|
||||
for (mask_elems, 0..) |maybe_mask_elem, elem_index| {
|
||||
const mask_elem = maybe_mask_elem orelse continue;
|
||||
const mask_elem_index: u3 = @intCast(if (mask_elem < 0) ~mask_elem else mask_elem);
|
||||
if (mask_elem_index & 0b100 != elem_index & 0b100) break :pshufd;
|
||||
|
||||
const source = @intFromBool(mask_elem < 0);
|
||||
if (sources[0]) |prev_source| {
|
||||
if (source != prev_source) break :pshufd;
|
||||
} else sources[(elem_index & 0b010) >> 1] = source;
|
||||
|
||||
const select_bit: u3 = @intCast((elem_index & 0b011) << 1);
|
||||
const select_mask = @as(u8, @intCast(mask_elem_index & 0b011)) << select_bit;
|
||||
if (elem_index & 0b100 == 0)
|
||||
control |= select_mask
|
||||
else if (control & @as(u8, 0b11) << select_bit != select_mask) break :pshufd;
|
||||
}
|
||||
|
||||
const operands = [2]Air.Inst.Ref{ extra.a, extra.b };
|
||||
const operand_tys = [2]Type{ lhs_ty, rhs_ty };
|
||||
const src_mcv = try self.resolveInst(operands[sources[0] orelse break :pshufd]);
|
||||
|
||||
const dst_reg = if (src_mcv.isRegister() and
|
||||
self.reuseOperand(inst, operands[sources[0].?], sources[0].?, src_mcv))
|
||||
src_mcv.getReg().?
|
||||
else
|
||||
try self.register_manager.allocReg(inst, abi.RegisterClass.sse);
|
||||
const dst_alias = registerAlias(dst_reg, max_abi_size);
|
||||
|
||||
if (src_mcv.isBase()) try self.asmRegisterMemoryImmediate(
|
||||
.{ if (has_avx) .vp_d else .p_d, .shuf },
|
||||
dst_alias,
|
||||
try src_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }),
|
||||
.u(control),
|
||||
) else try self.asmRegisterRegisterImmediate(
|
||||
.{ if (has_avx) .vp_d else .p_d, .shuf },
|
||||
dst_alias,
|
||||
registerAlias(if (src_mcv.isRegister())
|
||||
src_mcv.getReg().?
|
||||
else
|
||||
try self.copyToTmpRegister(operand_tys[sources[0].?], src_mcv), max_abi_size),
|
||||
.u(control),
|
||||
);
|
||||
break :result .{ .register = dst_reg };
|
||||
}
|
||||
|
||||
shufps: {
|
||||
if (elem_abi_size != 4) break :shufps;
|
||||
if (max_abi_size > self.vectorSize(.float)) break :shufps;
|
||||
|
||||
var control: u8 = 0b00_00_00_00;
|
||||
var sources: [2]?u1 = @splat(null);
|
||||
for (mask_elems, 0..) |maybe_mask_elem, elem_index| {
|
||||
const mask_elem = maybe_mask_elem orelse continue;
|
||||
const mask_elem_index: u3 = @intCast(if (mask_elem < 0) ~mask_elem else mask_elem);
|
||||
if (mask_elem_index & 0b100 != elem_index & 0b100) break :shufps;
|
||||
|
||||
const source = @intFromBool(mask_elem < 0);
|
||||
if (sources[(elem_index & 0b010) >> 1]) |prev_source| {
|
||||
if (source != prev_source) break :shufps;
|
||||
} else sources[(elem_index & 0b010) >> 1] = source;
|
||||
|
||||
const select_bit: u3 = @intCast((elem_index & 0b011) << 1);
|
||||
const select_mask = @as(u8, @intCast(mask_elem_index & 0b011)) << select_bit;
|
||||
if (elem_index & 0b100 == 0)
|
||||
control |= select_mask
|
||||
else if (control & @as(u8, 0b11) << select_bit != select_mask) break :shufps;
|
||||
}
|
||||
if (sources[0] orelse break :shufps == sources[1] orelse break :shufps) break :shufps;
|
||||
|
||||
const operands = [2]Air.Inst.Ref{ extra.a, extra.b };
|
||||
const operand_tys = [2]Type{ lhs_ty, rhs_ty };
|
||||
const lhs_mcv = try self.resolveInst(operands[sources[0].?]);
|
||||
const rhs_mcv = try self.resolveInst(operands[sources[1].?]);
|
||||
|
||||
const dst_mcv: MCValue = if (lhs_mcv.isRegister() and
|
||||
self.reuseOperand(inst, operands[sources[0].?], sources[0].?, lhs_mcv))
|
||||
lhs_mcv
|
||||
else if (has_avx and lhs_mcv.isRegister())
|
||||
.{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) }
|
||||
else
|
||||
try self.copyToRegisterWithInstTracking(inst, operand_tys[sources[0].?], lhs_mcv);
|
||||
const dst_reg = dst_mcv.getReg().?;
|
||||
const dst_alias = registerAlias(dst_reg, max_abi_size);
|
||||
|
||||
if (has_avx) if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate(
|
||||
.{ .v_ps, .shuf },
|
||||
dst_alias,
|
||||
registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size),
|
||||
try rhs_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }),
|
||||
.u(control),
|
||||
) else try self.asmRegisterRegisterRegisterImmediate(
|
||||
.{ .v_ps, .shuf },
|
||||
dst_alias,
|
||||
registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size),
|
||||
registerAlias(if (rhs_mcv.isRegister())
|
||||
rhs_mcv.getReg().?
|
||||
else
|
||||
try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size),
|
||||
.u(control),
|
||||
) else if (rhs_mcv.isBase()) try self.asmRegisterMemoryImmediate(
|
||||
.{ ._ps, .shuf },
|
||||
dst_alias,
|
||||
try rhs_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }),
|
||||
.u(control),
|
||||
) else try self.asmRegisterRegisterImmediate(
|
||||
.{ ._ps, .shuf },
|
||||
dst_alias,
|
||||
registerAlias(if (rhs_mcv.isRegister())
|
||||
rhs_mcv.getReg().?
|
||||
else
|
||||
try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size),
|
||||
.u(control),
|
||||
);
|
||||
break :result dst_mcv;
|
||||
}
|
||||
|
||||
shufpd: {
|
||||
if (elem_abi_size != 8) break :shufpd;
|
||||
if (max_abi_size > self.vectorSize(.float)) break :shufpd;
|
||||
|
||||
var control: u4 = 0b0_0_0_0;
|
||||
var sources: [2]?u1 = @splat(null);
|
||||
for (mask_elems, 0..) |maybe_mask_elem, elem_index| {
|
||||
const mask_elem = maybe_mask_elem orelse continue;
|
||||
const mask_elem_index: u2 = @intCast(if (mask_elem < 0) ~mask_elem else mask_elem);
|
||||
if (mask_elem_index & 0b10 != elem_index & 0b10) break :shufpd;
|
||||
|
||||
const source = @intFromBool(mask_elem < 0);
|
||||
if (sources[elem_index & 0b01]) |prev_source| {
|
||||
if (source != prev_source) break :shufpd;
|
||||
} else sources[elem_index & 0b01] = source;
|
||||
|
||||
control |= @as(u4, @intCast(mask_elem_index & 0b01)) << @intCast(elem_index);
|
||||
}
|
||||
if (sources[0] orelse break :shufpd == sources[1] orelse break :shufpd) break :shufpd;
|
||||
|
||||
const operands: [2]Air.Inst.Ref = .{ extra.a, extra.b };
|
||||
const operand_tys: [2]Type = .{ lhs_ty, rhs_ty };
|
||||
const lhs_mcv = try self.resolveInst(operands[sources[0].?]);
|
||||
const rhs_mcv = try self.resolveInst(operands[sources[1].?]);
|
||||
|
||||
const dst_mcv: MCValue = if (lhs_mcv.isRegister() and
|
||||
self.reuseOperand(inst, operands[sources[0].?], sources[0].?, lhs_mcv))
|
||||
lhs_mcv
|
||||
else if (has_avx and lhs_mcv.isRegister())
|
||||
.{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) }
|
||||
else
|
||||
try self.copyToRegisterWithInstTracking(inst, operand_tys[sources[0].?], lhs_mcv);
|
||||
const dst_reg = dst_mcv.getReg().?;
|
||||
const dst_alias = registerAlias(dst_reg, max_abi_size);
|
||||
|
||||
if (has_avx) if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate(
|
||||
.{ .v_pd, .shuf },
|
||||
dst_alias,
|
||||
registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size),
|
||||
try rhs_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }),
|
||||
.u(control),
|
||||
) else try self.asmRegisterRegisterRegisterImmediate(
|
||||
.{ .v_pd, .shuf },
|
||||
dst_alias,
|
||||
registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size),
|
||||
registerAlias(if (rhs_mcv.isRegister())
|
||||
rhs_mcv.getReg().?
|
||||
else
|
||||
try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size),
|
||||
.u(control),
|
||||
) else if (rhs_mcv.isBase()) try self.asmRegisterMemoryImmediate(
|
||||
.{ ._pd, .shuf },
|
||||
dst_alias,
|
||||
try rhs_mcv.mem(self, .{ .size = .fromSize(max_abi_size) }),
|
||||
.u(control),
|
||||
) else try self.asmRegisterRegisterImmediate(
|
||||
.{ ._pd, .shuf },
|
||||
dst_alias,
|
||||
registerAlias(if (rhs_mcv.isRegister())
|
||||
rhs_mcv.getReg().?
|
||||
else
|
||||
try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size),
|
||||
.u(control),
|
||||
);
|
||||
break :result dst_mcv;
|
||||
}
|
||||
|
||||
blend: {
|
||||
if (elem_abi_size < 2) break :blend;
|
||||
if (dst_abi_size > self.vectorSize(.float)) break :blend;
|
||||
if (!self.hasFeature(.sse4_1)) break :blend;
|
||||
|
||||
var control: u8 = 0b0_0_0_0_0_0_0_0;
|
||||
for (mask_elems, 0..) |maybe_mask_elem, elem_index| {
|
||||
const mask_elem = maybe_mask_elem orelse continue;
|
||||
const mask_elem_index =
|
||||
std.math.cast(u4, if (mask_elem < 0) ~mask_elem else mask_elem) orelse break :blend;
|
||||
if (mask_elem_index != elem_index) break :blend;
|
||||
|
||||
const select_mask = @as(u8, @intFromBool(mask_elem < 0)) << @truncate(elem_index);
|
||||
if (elem_index & 0b1000 == 0)
|
||||
control |= select_mask
|
||||
else if (control & @as(u8, 0b1) << @truncate(elem_index) != select_mask) break :blend;
|
||||
}
|
||||
|
||||
if (!elem_ty.isRuntimeFloat() and self.hasFeature(.avx2)) vpblendd: {
|
||||
const expanded_control = switch (elem_abi_size) {
|
||||
4 => control,
|
||||
8 => @as(u8, if (control & 0b0001 != 0) 0b00_00_00_11 else 0b00_00_00_00) |
|
||||
@as(u8, if (control & 0b0010 != 0) 0b00_00_11_00 else 0b00_00_00_00) |
|
||||
@as(u8, if (control & 0b0100 != 0) 0b00_11_00_00 else 0b00_00_00_00) |
|
||||
@as(u8, if (control & 0b1000 != 0) 0b11_00_00_00 else 0b00_00_00_00),
|
||||
else => break :vpblendd,
|
||||
};
|
||||
|
||||
const lhs_mcv = try self.resolveInst(extra.a);
|
||||
const lhs_reg = if (lhs_mcv.isRegister())
|
||||
lhs_mcv.getReg().?
|
||||
else
|
||||
try self.copyToTmpRegister(dst_ty, lhs_mcv);
|
||||
const lhs_lock = self.register_manager.lockReg(lhs_reg);
|
||||
defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
|
||||
|
||||
const rhs_mcv = try self.resolveInst(extra.b);
|
||||
const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.sse);
|
||||
if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate(
|
||||
.{ .vp_d, .blend },
|
||||
registerAlias(dst_reg, dst_abi_size),
|
||||
registerAlias(lhs_reg, dst_abi_size),
|
||||
try rhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }),
|
||||
.u(expanded_control),
|
||||
) else try self.asmRegisterRegisterRegisterImmediate(
|
||||
.{ .vp_d, .blend },
|
||||
registerAlias(dst_reg, dst_abi_size),
|
||||
registerAlias(lhs_reg, dst_abi_size),
|
||||
registerAlias(if (rhs_mcv.isRegister())
|
||||
rhs_mcv.getReg().?
|
||||
else
|
||||
try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size),
|
||||
.u(expanded_control),
|
||||
);
|
||||
break :result .{ .register = dst_reg };
|
||||
}
|
||||
|
||||
if (!elem_ty.isRuntimeFloat() or elem_abi_size == 2) pblendw: {
|
||||
const expanded_control = switch (elem_abi_size) {
|
||||
2 => control,
|
||||
4 => if (dst_abi_size <= 16 or
|
||||
@as(u4, @intCast(control >> 4)) == @as(u4, @truncate(control >> 0)))
|
||||
@as(u8, if (control & 0b0001 != 0) 0b00_00_00_11 else 0b00_00_00_00) |
|
||||
@as(u8, if (control & 0b0010 != 0) 0b00_00_11_00 else 0b00_00_00_00) |
|
||||
@as(u8, if (control & 0b0100 != 0) 0b00_11_00_00 else 0b00_00_00_00) |
|
||||
@as(u8, if (control & 0b1000 != 0) 0b11_00_00_00 else 0b00_00_00_00)
|
||||
else
|
||||
break :pblendw,
|
||||
8 => if (dst_abi_size <= 16 or
|
||||
@as(u2, @intCast(control >> 2)) == @as(u2, @truncate(control >> 0)))
|
||||
@as(u8, if (control & 0b01 != 0) 0b0000_1111 else 0b0000_0000) |
|
||||
@as(u8, if (control & 0b10 != 0) 0b1111_0000 else 0b0000_0000)
|
||||
else
|
||||
break :pblendw,
|
||||
16 => break :pblendw,
|
||||
else => unreachable,
|
||||
};
|
||||
|
||||
const lhs_mcv = try self.resolveInst(extra.a);
|
||||
const rhs_mcv = try self.resolveInst(extra.b);
|
||||
|
||||
const dst_mcv: MCValue = if (lhs_mcv.isRegister() and
|
||||
self.reuseOperand(inst, extra.a, 0, lhs_mcv))
|
||||
lhs_mcv
|
||||
else if (has_avx and lhs_mcv.isRegister())
|
||||
.{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) }
|
||||
else
|
||||
try self.copyToRegisterWithInstTracking(inst, dst_ty, lhs_mcv);
|
||||
const dst_reg = dst_mcv.getReg().?;
|
||||
|
||||
if (has_avx) if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate(
|
||||
.{ .vp_w, .blend },
|
||||
registerAlias(dst_reg, dst_abi_size),
|
||||
registerAlias(if (lhs_mcv.isRegister())
|
||||
lhs_mcv.getReg().?
|
||||
else
|
||||
dst_reg, dst_abi_size),
|
||||
try rhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }),
|
||||
.u(expanded_control),
|
||||
) else try self.asmRegisterRegisterRegisterImmediate(
|
||||
.{ .vp_w, .blend },
|
||||
registerAlias(dst_reg, dst_abi_size),
|
||||
registerAlias(if (lhs_mcv.isRegister())
|
||||
lhs_mcv.getReg().?
|
||||
else
|
||||
dst_reg, dst_abi_size),
|
||||
registerAlias(if (rhs_mcv.isRegister())
|
||||
rhs_mcv.getReg().?
|
||||
else
|
||||
try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size),
|
||||
.u(expanded_control),
|
||||
) else if (rhs_mcv.isBase()) try self.asmRegisterMemoryImmediate(
|
||||
.{ .p_w, .blend },
|
||||
registerAlias(dst_reg, dst_abi_size),
|
||||
try rhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }),
|
||||
.u(expanded_control),
|
||||
) else try self.asmRegisterRegisterImmediate(
|
||||
.{ .p_w, .blend },
|
||||
registerAlias(dst_reg, dst_abi_size),
|
||||
registerAlias(if (rhs_mcv.isRegister())
|
||||
rhs_mcv.getReg().?
|
||||
else
|
||||
try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size),
|
||||
.u(expanded_control),
|
||||
);
|
||||
break :result .{ .register = dst_reg };
|
||||
}
|
||||
|
||||
const expanded_control = switch (elem_abi_size) {
|
||||
4, 8 => control,
|
||||
16 => @as(u4, if (control & 0b01 != 0) 0b00_11 else 0b00_00) |
|
||||
@as(u4, if (control & 0b10 != 0) 0b11_00 else 0b00_00),
|
||||
else => unreachable,
|
||||
};
|
||||
|
||||
const lhs_mcv = try self.resolveInst(extra.a);
|
||||
const rhs_mcv = try self.resolveInst(extra.b);
|
||||
|
||||
const dst_mcv: MCValue = if (lhs_mcv.isRegister() and
|
||||
self.reuseOperand(inst, extra.a, 0, lhs_mcv))
|
||||
lhs_mcv
|
||||
else if (has_avx and lhs_mcv.isRegister())
|
||||
.{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) }
|
||||
else
|
||||
try self.copyToRegisterWithInstTracking(inst, dst_ty, lhs_mcv);
|
||||
const dst_reg = dst_mcv.getReg().?;
|
||||
|
||||
if (has_avx) if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate(
|
||||
switch (elem_abi_size) {
|
||||
4 => .{ .v_ps, .blend },
|
||||
8, 16 => .{ .v_pd, .blend },
|
||||
else => unreachable,
|
||||
},
|
||||
registerAlias(dst_reg, dst_abi_size),
|
||||
registerAlias(if (lhs_mcv.isRegister())
|
||||
lhs_mcv.getReg().?
|
||||
else
|
||||
dst_reg, dst_abi_size),
|
||||
try rhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }),
|
||||
.u(expanded_control),
|
||||
) else try self.asmRegisterRegisterRegisterImmediate(
|
||||
switch (elem_abi_size) {
|
||||
4 => .{ .v_ps, .blend },
|
||||
8, 16 => .{ .v_pd, .blend },
|
||||
else => unreachable,
|
||||
},
|
||||
registerAlias(dst_reg, dst_abi_size),
|
||||
registerAlias(if (lhs_mcv.isRegister())
|
||||
lhs_mcv.getReg().?
|
||||
else
|
||||
dst_reg, dst_abi_size),
|
||||
registerAlias(if (rhs_mcv.isRegister())
|
||||
rhs_mcv.getReg().?
|
||||
else
|
||||
try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size),
|
||||
.u(expanded_control),
|
||||
) else if (rhs_mcv.isBase()) try self.asmRegisterMemoryImmediate(
|
||||
switch (elem_abi_size) {
|
||||
4 => .{ ._ps, .blend },
|
||||
8, 16 => .{ ._pd, .blend },
|
||||
else => unreachable,
|
||||
},
|
||||
registerAlias(dst_reg, dst_abi_size),
|
||||
try rhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }),
|
||||
.u(expanded_control),
|
||||
) else try self.asmRegisterRegisterImmediate(
|
||||
switch (elem_abi_size) {
|
||||
4 => .{ ._ps, .blend },
|
||||
8, 16 => .{ ._pd, .blend },
|
||||
else => unreachable,
|
||||
},
|
||||
registerAlias(dst_reg, dst_abi_size),
|
||||
registerAlias(if (rhs_mcv.isRegister())
|
||||
rhs_mcv.getReg().?
|
||||
else
|
||||
try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size),
|
||||
.u(expanded_control),
|
||||
);
|
||||
break :result .{ .register = dst_reg };
|
||||
}
|
||||
|
||||
blendv: {
|
||||
if (dst_abi_size > self.vectorSize(if (elem_abi_size >= 4) .float else .int)) break :blendv;
|
||||
|
||||
const select_mask_elem_ty = try pt.intType(.unsigned, elem_abi_size * 8);
|
||||
const select_mask_ty = try pt.vectorType(.{
|
||||
.len = @intCast(mask_elems.len),
|
||||
.child = select_mask_elem_ty.toIntern(),
|
||||
});
|
||||
var select_mask_elems: [32]InternPool.Index = undefined;
|
||||
for (
|
||||
select_mask_elems[0..mask_elems.len],
|
||||
mask_elems,
|
||||
0..,
|
||||
) |*select_mask_elem, maybe_mask_elem, elem_index| {
|
||||
const mask_elem = maybe_mask_elem orelse continue;
|
||||
const mask_elem_index =
|
||||
std.math.cast(u5, if (mask_elem < 0) ~mask_elem else mask_elem) orelse break :blendv;
|
||||
if (mask_elem_index != elem_index) break :blendv;
|
||||
|
||||
select_mask_elem.* = (if (mask_elem < 0)
|
||||
try select_mask_elem_ty.maxIntScalar(pt, select_mask_elem_ty)
|
||||
else
|
||||
try select_mask_elem_ty.minIntScalar(pt, select_mask_elem_ty)).toIntern();
|
||||
}
|
||||
const select_mask_mcv = try self.lowerValue(
|
||||
try pt.aggregateValue(select_mask_ty, select_mask_elems[0..mask_elems.len]),
|
||||
);
|
||||
|
||||
if (self.hasFeature(.sse4_1)) {
|
||||
const mir_tag: Mir.Inst.FixedTag = .{
|
||||
if ((elem_abi_size >= 4 and elem_ty.isRuntimeFloat()) or
|
||||
(dst_abi_size > 16 and !self.hasFeature(.avx2))) switch (elem_abi_size) {
|
||||
4 => if (has_avx) .v_ps else ._ps,
|
||||
8 => if (has_avx) .v_pd else ._pd,
|
||||
else => unreachable,
|
||||
} else if (has_avx) .vp_b else .p_b,
|
||||
.blendv,
|
||||
};
|
||||
|
||||
const select_mask_reg = if (!has_avx) reg: {
|
||||
try self.register_manager.getKnownReg(.xmm0, null);
|
||||
try self.genSetReg(.xmm0, select_mask_elem_ty, select_mask_mcv, .{});
|
||||
break :reg .xmm0;
|
||||
} else try self.copyToTmpRegister(select_mask_ty, select_mask_mcv);
|
||||
const select_mask_alias = registerAlias(select_mask_reg, dst_abi_size);
|
||||
const select_mask_lock = self.register_manager.lockRegAssumeUnused(select_mask_reg);
|
||||
defer self.register_manager.unlockReg(select_mask_lock);
|
||||
|
||||
const lhs_mcv = try self.resolveInst(extra.a);
|
||||
const rhs_mcv = try self.resolveInst(extra.b);
|
||||
|
||||
const dst_mcv: MCValue = if (lhs_mcv.isRegister() and
|
||||
self.reuseOperand(inst, extra.a, 0, lhs_mcv))
|
||||
lhs_mcv
|
||||
else if (has_avx and lhs_mcv.isRegister())
|
||||
.{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) }
|
||||
else
|
||||
try self.copyToRegisterWithInstTracking(inst, dst_ty, lhs_mcv);
|
||||
const dst_reg = dst_mcv.getReg().?;
|
||||
const dst_alias = registerAlias(dst_reg, dst_abi_size);
|
||||
|
||||
if (has_avx) if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryRegister(
|
||||
mir_tag,
|
||||
dst_alias,
|
||||
if (lhs_mcv.isRegister())
|
||||
registerAlias(lhs_mcv.getReg().?, dst_abi_size)
|
||||
else
|
||||
dst_alias,
|
||||
try rhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }),
|
||||
select_mask_alias,
|
||||
) else try self.asmRegisterRegisterRegisterRegister(
|
||||
mir_tag,
|
||||
dst_alias,
|
||||
if (lhs_mcv.isRegister())
|
||||
registerAlias(lhs_mcv.getReg().?, dst_abi_size)
|
||||
else
|
||||
dst_alias,
|
||||
registerAlias(if (rhs_mcv.isRegister())
|
||||
rhs_mcv.getReg().?
|
||||
else
|
||||
try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size),
|
||||
select_mask_alias,
|
||||
) else if (rhs_mcv.isBase()) try self.asmRegisterMemoryRegister(
|
||||
mir_tag,
|
||||
dst_alias,
|
||||
try rhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }),
|
||||
select_mask_alias,
|
||||
) else try self.asmRegisterRegisterRegister(
|
||||
mir_tag,
|
||||
dst_alias,
|
||||
registerAlias(if (rhs_mcv.isRegister())
|
||||
rhs_mcv.getReg().?
|
||||
else
|
||||
try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size),
|
||||
select_mask_alias,
|
||||
);
|
||||
break :result dst_mcv;
|
||||
}
|
||||
|
||||
const lhs_mcv = try self.resolveInst(extra.a);
|
||||
const rhs_mcv = try self.resolveInst(extra.b);
|
||||
|
||||
const dst_mcv: MCValue = if (rhs_mcv.isRegister() and
|
||||
self.reuseOperand(inst, extra.b, 1, rhs_mcv))
|
||||
rhs_mcv
|
||||
else
|
||||
try self.copyToRegisterWithInstTracking(inst, dst_ty, rhs_mcv);
|
||||
const dst_reg = dst_mcv.getReg().?;
|
||||
const dst_alias = registerAlias(dst_reg, dst_abi_size);
|
||||
|
||||
const mask_reg = try self.copyToTmpRegister(select_mask_ty, select_mask_mcv);
|
||||
const mask_alias = registerAlias(mask_reg, dst_abi_size);
|
||||
const mask_lock = self.register_manager.lockRegAssumeUnused(mask_reg);
|
||||
defer self.register_manager.unlockReg(mask_lock);
|
||||
|
||||
const mir_fixes: Mir.Inst.Fixes = if (elem_ty.isRuntimeFloat())
|
||||
switch (elem_ty.floatBits(self.target)) {
|
||||
16, 80, 128 => .p_,
|
||||
32 => ._ps,
|
||||
64 => ._pd,
|
||||
else => unreachable,
|
||||
}
|
||||
else
|
||||
.p_;
|
||||
try self.asmRegisterRegister(.{ mir_fixes, .@"and" }, dst_alias, mask_alias);
|
||||
if (lhs_mcv.isBase()) try self.asmRegisterMemory(
|
||||
.{ mir_fixes, .andn },
|
||||
mask_alias,
|
||||
try lhs_mcv.mem(self, .{ .size = .fromSize(dst_abi_size) }),
|
||||
) else try self.asmRegisterRegister(
|
||||
.{ mir_fixes, .andn },
|
||||
mask_alias,
|
||||
if (lhs_mcv.isRegister())
|
||||
lhs_mcv.getReg().?
|
||||
else
|
||||
try self.copyToTmpRegister(dst_ty, lhs_mcv),
|
||||
);
|
||||
try self.asmRegisterRegister(.{ mir_fixes, .@"or" }, dst_alias, mask_alias);
|
||||
break :result dst_mcv;
|
||||
}
|
||||
|
||||
pshufb: {
|
||||
if (max_abi_size > 16) break :pshufb;
|
||||
if (!self.hasFeature(.ssse3)) break :pshufb;
|
||||
|
||||
const temp_regs =
|
||||
try self.register_manager.allocRegs(2, .{ inst, null }, abi.RegisterClass.sse);
|
||||
const temp_locks = self.register_manager.lockRegsAssumeUnused(2, temp_regs);
|
||||
defer for (temp_locks) |lock| self.register_manager.unlockReg(lock);
|
||||
|
||||
const lhs_temp_alias = registerAlias(temp_regs[0], max_abi_size);
|
||||
try self.genSetReg(temp_regs[0], lhs_ty, .{ .air_ref = extra.a }, .{});
|
||||
|
||||
const rhs_temp_alias = registerAlias(temp_regs[1], max_abi_size);
|
||||
try self.genSetReg(temp_regs[1], rhs_ty, .{ .air_ref = extra.b }, .{});
|
||||
|
||||
var lhs_mask_elems: [16]InternPool.Index = undefined;
|
||||
for (lhs_mask_elems[0..max_abi_size], 0..) |*lhs_mask_elem, byte_index| {
|
||||
const elem_index = byte_index / elem_abi_size;
|
||||
lhs_mask_elem.* = (try pt.intValue(.u8, if (elem_index >= mask_elems.len) 0b1_00_00000 else elem: {
|
||||
const mask_elem = mask_elems[elem_index] orelse break :elem 0b1_00_00000;
|
||||
if (mask_elem < 0) break :elem 0b1_00_00000;
|
||||
const mask_elem_index: u31 = @intCast(mask_elem);
|
||||
const byte_off: u32 = @intCast(byte_index % elem_abi_size);
|
||||
break :elem mask_elem_index * elem_abi_size + byte_off;
|
||||
})).toIntern();
|
||||
}
|
||||
const lhs_mask_ty = try pt.vectorType(.{ .len = max_abi_size, .child = .u8_type });
|
||||
const lhs_mask_mcv = try self.lowerValue(
|
||||
try pt.aggregateValue(lhs_mask_ty, lhs_mask_elems[0..max_abi_size]),
|
||||
);
|
||||
const lhs_mask_mem: Memory = .{
|
||||
.base = .{ .reg = try self.copyToTmpRegister(.usize, lhs_mask_mcv.address()) },
|
||||
.mod = .{ .rm = .{ .size = .fromSize(@max(max_abi_size, 16)) } },
|
||||
};
|
||||
if (has_avx) try self.asmRegisterRegisterMemory(
|
||||
.{ .vp_b, .shuf },
|
||||
lhs_temp_alias,
|
||||
lhs_temp_alias,
|
||||
lhs_mask_mem,
|
||||
) else try self.asmRegisterMemory(
|
||||
.{ .p_b, .shuf },
|
||||
lhs_temp_alias,
|
||||
lhs_mask_mem,
|
||||
);
|
||||
|
||||
var rhs_mask_elems: [16]InternPool.Index = undefined;
|
||||
for (rhs_mask_elems[0..max_abi_size], 0..) |*rhs_mask_elem, byte_index| {
|
||||
const elem_index = byte_index / elem_abi_size;
|
||||
rhs_mask_elem.* = (try pt.intValue(.u8, if (elem_index >= mask_elems.len) 0b1_00_00000 else elem: {
|
||||
const mask_elem = mask_elems[elem_index] orelse break :elem 0b1_00_00000;
|
||||
if (mask_elem >= 0) break :elem 0b1_00_00000;
|
||||
const mask_elem_index: u31 = @intCast(~mask_elem);
|
||||
const byte_off: u32 = @intCast(byte_index % elem_abi_size);
|
||||
break :elem mask_elem_index * elem_abi_size + byte_off;
|
||||
})).toIntern();
|
||||
}
|
||||
const rhs_mask_ty = try pt.vectorType(.{ .len = max_abi_size, .child = .u8_type });
|
||||
const rhs_mask_mcv = try self.lowerValue(
|
||||
try pt.aggregateValue(rhs_mask_ty, rhs_mask_elems[0..max_abi_size]),
|
||||
);
|
||||
const rhs_mask_mem: Memory = .{
|
||||
.base = .{ .reg = try self.copyToTmpRegister(.usize, rhs_mask_mcv.address()) },
|
||||
.mod = .{ .rm = .{ .size = .fromSize(@max(max_abi_size, 16)) } },
|
||||
};
|
||||
if (has_avx) try self.asmRegisterRegisterMemory(
|
||||
.{ .vp_b, .shuf },
|
||||
rhs_temp_alias,
|
||||
rhs_temp_alias,
|
||||
rhs_mask_mem,
|
||||
) else try self.asmRegisterMemory(
|
||||
.{ .p_b, .shuf },
|
||||
rhs_temp_alias,
|
||||
rhs_mask_mem,
|
||||
);
|
||||
|
||||
if (has_avx) try self.asmRegisterRegisterRegister(
|
||||
.{ switch (elem_ty.zigTypeTag(zcu)) {
|
||||
else => break :result null,
|
||||
.int => .vp_,
|
||||
.float => switch (elem_ty.floatBits(self.target)) {
|
||||
32 => .v_ps,
|
||||
64 => .v_pd,
|
||||
16, 80, 128 => break :result null,
|
||||
else => unreachable,
|
||||
},
|
||||
}, .@"or" },
|
||||
lhs_temp_alias,
|
||||
lhs_temp_alias,
|
||||
rhs_temp_alias,
|
||||
) else try self.asmRegisterRegister(
|
||||
.{ switch (elem_ty.zigTypeTag(zcu)) {
|
||||
else => break :result null,
|
||||
.int => .p_,
|
||||
.float => switch (elem_ty.floatBits(self.target)) {
|
||||
32 => ._ps,
|
||||
64 => ._pd,
|
||||
16, 80, 128 => break :result null,
|
||||
else => unreachable,
|
||||
},
|
||||
}, .@"or" },
|
||||
lhs_temp_alias,
|
||||
rhs_temp_alias,
|
||||
);
|
||||
break :result .{ .register = temp_regs[0] };
|
||||
}
|
||||
|
||||
break :result null;
|
||||
}) orelse return self.fail("TODO implement airShuffle from {f} and {f} to {f} with {f}", .{
|
||||
lhs_ty.fmt(pt),
|
||||
rhs_ty.fmt(pt),
|
||||
dst_ty.fmt(pt),
|
||||
Value.fromInterned(extra.mask).fmtValue(pt),
|
||||
});
|
||||
return self.finishAir(inst, result, .{ extra.a, extra.b, .none });
|
||||
}
|
||||
|
||||
fn airAggregateInit(self: *CodeGen, inst: Air.Inst.Index) !void {
|
||||
fn airAggregateInitBoolVec(self: *CodeGen, inst: Air.Inst.Index) !void {
|
||||
const pt = self.pt;
|
||||
const zcu = pt.zcu;
|
||||
const result_ty = self.typeOfIndex(inst);
|
||||
const len: usize = @intCast(result_ty.arrayLen(zcu));
|
||||
const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
|
||||
const elements: []const Air.Inst.Ref = @ptrCast(self.air.extra.items[ty_pl.payload..][0..len]);
|
||||
const result: MCValue = result: {
|
||||
switch (result_ty.zigTypeTag(zcu)) {
|
||||
.@"struct" => {
|
||||
if (result_ty.containerLayout(zcu) == .@"packed") return self.fail(
|
||||
"TODO implement airAggregateInit for {f}",
|
||||
.{result_ty.fmt(pt)},
|
||||
);
|
||||
const frame_index = try self.allocFrameIndex(.initSpill(result_ty, zcu));
|
||||
const loaded_struct = zcu.intern_pool.loadStructType(result_ty.toIntern());
|
||||
try self.genInlineMemset(
|
||||
.{ .lea_frame = .{ .index = frame_index } },
|
||||
.{ .immediate = 0 },
|
||||
.{ .immediate = result_ty.abiSize(zcu) },
|
||||
.{},
|
||||
);
|
||||
for (elements, 0..) |elem, elem_i_usize| {
|
||||
const elem_i: u32 = @intCast(elem_i_usize);
|
||||
if ((try result_ty.structFieldValueComptime(pt, elem_i)) != null) continue;
|
||||
|
||||
const elem_ty = result_ty.fieldType(elem_i, zcu);
|
||||
const elem_bit_size: u32 = @intCast(elem_ty.bitSize(zcu));
|
||||
if (elem_bit_size > 64) {
|
||||
return self.fail(
|
||||
"TODO airAggregateInit implement packed structs with large fields",
|
||||
.{},
|
||||
);
|
||||
}
|
||||
const elem_abi_size: u32 = @intCast(elem_ty.abiSize(zcu));
|
||||
const elem_abi_bits = elem_abi_size * 8;
|
||||
const elem_off = zcu.structPackedFieldBitOffset(loaded_struct, elem_i);
|
||||
const elem_byte_off: i32 = @intCast(elem_off / elem_abi_bits * elem_abi_size);
|
||||
const elem_bit_off = elem_off % elem_abi_bits;
|
||||
const elem_mcv = try self.resolveInst(elem);
|
||||
const elem_lock = switch (elem_mcv) {
|
||||
.register => |reg| self.register_manager.lockReg(reg),
|
||||
.immediate => |imm| lock: {
|
||||
if (imm == 0) continue;
|
||||
break :lock null;
|
||||
},
|
||||
else => null,
|
||||
};
|
||||
defer if (elem_lock) |lock| self.register_manager.unlockReg(lock);
|
||||
assert(result_ty.zigTypeTag(zcu) == .vector);
|
||||
assert(result_ty.childType(zcu).toIntern() == .bool_type);
|
||||
|
||||
const elem_extra_bits = self.regExtraBits(elem_ty);
|
||||
{
|
||||
const temp_reg = try self.copyToTmpRegister(elem_ty, elem_mcv);
|
||||
const temp_alias = registerAlias(temp_reg, elem_abi_size);
|
||||
const temp_lock = self.register_manager.lockRegAssumeUnused(temp_reg);
|
||||
defer self.register_manager.unlockReg(temp_lock);
|
||||
const result_size = result_ty.abiSize(zcu);
|
||||
if (result_size > 8) return self.fail("TODO airAggregateInitBoolVec over 8 bytes", .{});
|
||||
|
||||
if (elem_bit_off < elem_extra_bits) {
|
||||
try self.truncateRegister(elem_ty, temp_alias);
|
||||
}
|
||||
if (elem_bit_off > 0) try self.genShiftBinOpMir(
|
||||
.{ ._l, .sh },
|
||||
elem_ty,
|
||||
.{ .register = temp_alias },
|
||||
.u8,
|
||||
.{ .immediate = elem_bit_off },
|
||||
);
|
||||
try self.genBinOpMir(
|
||||
.{ ._, .@"or" },
|
||||
elem_ty,
|
||||
.{ .load_frame = .{ .index = frame_index, .off = elem_byte_off } },
|
||||
.{ .register = temp_alias },
|
||||
);
|
||||
}
|
||||
if (elem_bit_off > elem_extra_bits) {
|
||||
const temp_reg = try self.copyToTmpRegister(elem_ty, elem_mcv);
|
||||
const temp_alias = registerAlias(temp_reg, elem_abi_size);
|
||||
const temp_lock = self.register_manager.lockRegAssumeUnused(temp_reg);
|
||||
defer self.register_manager.unlockReg(temp_lock);
|
||||
const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp);
|
||||
|
||||
if (elem_extra_bits > 0) {
|
||||
try self.truncateRegister(elem_ty, temp_alias);
|
||||
}
|
||||
try self.genShiftBinOpMir(
|
||||
.{ ._r, .sh },
|
||||
elem_ty,
|
||||
.{ .register = temp_reg },
|
||||
.u8,
|
||||
.{ .immediate = elem_abi_bits - elem_bit_off },
|
||||
);
|
||||
try self.genBinOpMir(
|
||||
.{ ._, .@"or" },
|
||||
elem_ty,
|
||||
.{ .load_frame = .{
|
||||
.index = frame_index,
|
||||
.off = elem_byte_off + @as(i32, @intCast(elem_abi_size)),
|
||||
} },
|
||||
.{ .register = temp_alias },
|
||||
);
|
||||
}
|
||||
}
|
||||
break :result .{ .load_frame = .{ .index = frame_index } };
|
||||
},
|
||||
.vector => {
|
||||
const elem_ty = result_ty.childType(zcu);
|
||||
if (elem_ty.toIntern() != .bool_type) return self.fail(
|
||||
"TODO implement airAggregateInit for {f}",
|
||||
.{result_ty.fmt(pt)},
|
||||
);
|
||||
const result_size: u32 = @intCast(result_ty.abiSize(zcu));
|
||||
const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp);
|
||||
const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
|
||||
defer self.register_manager.unlockReg(dst_lock);
|
||||
try self.asmRegisterRegister(
|
||||
.{ ._, .xor },
|
||||
registerAlias(dst_reg, @min(result_size, 4)),
|
||||
registerAlias(dst_reg, @min(result_size, 4)),
|
||||
);
|
||||
{
|
||||
const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
|
||||
defer self.register_manager.unlockReg(dst_lock);
|
||||
|
||||
for (elements, 0..) |elem, elem_i| {
|
||||
const elem_reg = try self.copyToTmpRegister(elem_ty, .{ .air_ref = elem });
|
||||
const elem_lock = self.register_manager.lockRegAssumeUnused(elem_reg);
|
||||
defer self.register_manager.unlockReg(elem_lock);
|
||||
try self.spillEflagsIfOccupied();
|
||||
try self.asmRegisterRegister(
|
||||
.{ ._, .xor },
|
||||
registerAlias(dst_reg, @min(result_size, 4)),
|
||||
registerAlias(dst_reg, @min(result_size, 4)),
|
||||
);
|
||||
|
||||
try self.asmRegisterImmediate(
|
||||
.{ ._, .@"and" },
|
||||
registerAlias(elem_reg, @min(result_size, 4)),
|
||||
.u(1),
|
||||
);
|
||||
if (elem_i > 0) try self.asmRegisterImmediate(
|
||||
.{ ._l, .sh },
|
||||
registerAlias(elem_reg, result_size),
|
||||
.u(@intCast(elem_i)),
|
||||
);
|
||||
try self.asmRegisterRegister(
|
||||
.{ ._, .@"or" },
|
||||
registerAlias(dst_reg, result_size),
|
||||
registerAlias(elem_reg, result_size),
|
||||
);
|
||||
}
|
||||
break :result .{ .register = dst_reg };
|
||||
},
|
||||
else => unreachable,
|
||||
for (elements, 0..) |elem, elem_i| {
|
||||
const elem_reg = try self.copyToTmpRegister(.bool, .{ .air_ref = elem });
|
||||
const elem_lock = self.register_manager.lockRegAssumeUnused(elem_reg);
|
||||
defer self.register_manager.unlockReg(elem_lock);
|
||||
|
||||
try self.asmRegisterImmediate(
|
||||
.{ ._, .@"and" },
|
||||
registerAlias(elem_reg, @min(result_size, 4)),
|
||||
.u(1),
|
||||
);
|
||||
if (elem_i > 0) try self.asmRegisterImmediate(
|
||||
.{ ._l, .sh },
|
||||
registerAlias(elem_reg, @intCast(result_size)),
|
||||
.u(@intCast(elem_i)),
|
||||
);
|
||||
try self.asmRegisterRegister(
|
||||
.{ ._, .@"or" },
|
||||
registerAlias(dst_reg, @intCast(result_size)),
|
||||
registerAlias(elem_reg, @intCast(result_size)),
|
||||
);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
const result: MCValue = .{ .register = dst_reg };
|
||||
|
||||
if (elements.len <= Air.Liveness.bpi - 1) {
|
||||
var buf: [Air.Liveness.bpi - 1]Air.Inst.Ref = @splat(.none);
|
||||
@@ -182269,15 +181373,6 @@ fn fail(cg: *CodeGen, comptime format: []const u8, args: anytype) error{ OutOfMe
|
||||
};
|
||||
}
|
||||
|
||||
fn failMsg(cg: *CodeGen, msg: *Zcu.ErrorMsg) error{ OutOfMemory, CodegenFail } {
|
||||
@branchHint(.cold);
|
||||
const zcu = cg.pt.zcu;
|
||||
return switch (cg.owner) {
|
||||
.nav_index => |i| zcu.codegenFailMsg(i, msg),
|
||||
.lazy_sym => |s| zcu.codegenFailTypeMsg(s.ty, msg),
|
||||
};
|
||||
}
|
||||
|
||||
fn parseRegName(name: []const u8) ?Register {
|
||||
if (std.mem.startsWith(u8, name, "db")) return @enumFromInt(
|
||||
@intFromEnum(Register.dr0) + (std.fmt.parseInt(u4, name["db".len..], 0) catch return null),
|
||||
@@ -188819,7 +187914,6 @@ const Select = struct {
|
||||
const ptr_info = ty.ptrInfo(zcu);
|
||||
return switch (ptr_info.flags.vector_index) {
|
||||
.none => false,
|
||||
.runtime => unreachable,
|
||||
else => ptr_info.child == .bool_type,
|
||||
};
|
||||
},
|
||||
@@ -188827,7 +187921,6 @@ const Select = struct {
|
||||
const ptr_info = ty.ptrInfo(zcu);
|
||||
return switch (ptr_info.flags.vector_index) {
|
||||
.none => false,
|
||||
.runtime => unreachable,
|
||||
else => ptr_info.child == .bool_type and size.bitSize(cg.target) >= ptr_info.packed_offset.host_size,
|
||||
};
|
||||
},
|
||||
@@ -190814,7 +189907,7 @@ const Select = struct {
|
||||
.src0_elem_size_mul_src1 => @intCast(Select.Operand.Ref.src0.typeOf(s).elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu) *
|
||||
Select.Operand.Ref.src1.valueOf(s).immediate),
|
||||
.vector_index => switch (op.flags.base.ref.typeOf(s).ptrInfo(s.cg.pt.zcu).flags.vector_index) {
|
||||
.none, .runtime => unreachable,
|
||||
.none => unreachable,
|
||||
else => |vector_index| @intFromEnum(vector_index),
|
||||
},
|
||||
.src1 => @intCast(Select.Operand.Ref.src1.valueOf(s).immediate),
|
||||
|
||||
+6
-22
@@ -3158,11 +3158,7 @@ fn updateComptimeNavInner(dwarf: *Dwarf, pt: Zcu.PerThread, nav_index: InternPoo
|
||||
.struct_field
|
||||
else
|
||||
.struct_field);
|
||||
if (loaded_struct.fieldName(ip, field_index).unwrap()) |field_name| try wip_nav.strp(field_name.toSlice(ip)) else {
|
||||
var field_name_buf: [std.fmt.count("{d}", .{std.math.maxInt(u32)})]u8 = undefined;
|
||||
const field_name = std.fmt.bufPrint(&field_name_buf, "{d}", .{field_index}) catch unreachable;
|
||||
try wip_nav.strp(field_name);
|
||||
}
|
||||
try wip_nav.strp(loaded_struct.fieldName(ip, field_index).toSlice(ip));
|
||||
try wip_nav.refType(field_type);
|
||||
if (!is_comptime) {
|
||||
try diw.writeUleb128(loaded_struct.offsets.get(ip)[field_index]);
|
||||
@@ -3187,7 +3183,7 @@ fn updateComptimeNavInner(dwarf: *Dwarf, pt: Zcu.PerThread, nav_index: InternPoo
|
||||
var field_bit_offset: u16 = 0;
|
||||
for (0..loaded_struct.field_types.len) |field_index| {
|
||||
try wip_nav.abbrevCode(.packed_struct_field);
|
||||
try wip_nav.strp(loaded_struct.fieldName(ip, field_index).unwrap().?.toSlice(ip));
|
||||
try wip_nav.strp(loaded_struct.fieldName(ip, field_index).toSlice(ip));
|
||||
const field_type: Type = .fromInterned(loaded_struct.field_types.get(ip)[field_index]);
|
||||
try wip_nav.refType(field_type);
|
||||
try diw.writeUleb128(field_bit_offset);
|
||||
@@ -4269,11 +4265,7 @@ fn updateLazyValue(
|
||||
.comptime_value_field_runtime_bits
|
||||
else
|
||||
continue);
|
||||
if (loaded_struct_type.fieldName(ip, field_index).unwrap()) |field_name| try wip_nav.strp(field_name.toSlice(ip)) else {
|
||||
var field_name_buf: [std.fmt.count("{d}", .{std.math.maxInt(u32)})]u8 = undefined;
|
||||
const field_name = std.fmt.bufPrint(&field_name_buf, "{d}", .{field_index}) catch unreachable;
|
||||
try wip_nav.strp(field_name);
|
||||
}
|
||||
try wip_nav.strp(loaded_struct_type.fieldName(ip, field_index).toSlice(ip));
|
||||
const field_value: Value = .fromInterned(switch (aggregate.storage) {
|
||||
.bytes => unreachable,
|
||||
.elems => |elems| elems[field_index],
|
||||
@@ -4467,11 +4459,7 @@ fn updateContainerTypeWriterError(
|
||||
.struct_field
|
||||
else
|
||||
.struct_field);
|
||||
if (loaded_struct.fieldName(ip, field_index).unwrap()) |field_name| try wip_nav.strp(field_name.toSlice(ip)) else {
|
||||
var field_name_buf: [std.fmt.count("{d}", .{std.math.maxInt(u32)})]u8 = undefined;
|
||||
const field_name = std.fmt.bufPrint(&field_name_buf, "{d}", .{field_index}) catch unreachable;
|
||||
try wip_nav.strp(field_name);
|
||||
}
|
||||
try wip_nav.strp(loaded_struct.fieldName(ip, field_index).toSlice(ip));
|
||||
try wip_nav.refType(field_type);
|
||||
if (!is_comptime) {
|
||||
try diw.writeUleb128(loaded_struct.offsets.get(ip)[field_index]);
|
||||
@@ -4573,11 +4561,7 @@ fn updateContainerTypeWriterError(
|
||||
.struct_field
|
||||
else
|
||||
.struct_field);
|
||||
if (loaded_struct.fieldName(ip, field_index).unwrap()) |field_name| try wip_nav.strp(field_name.toSlice(ip)) else {
|
||||
var field_name_buf: [std.fmt.count("{d}", .{std.math.maxInt(u32)})]u8 = undefined;
|
||||
const field_name = std.fmt.bufPrint(&field_name_buf, "{d}", .{field_index}) catch unreachable;
|
||||
try wip_nav.strp(field_name);
|
||||
}
|
||||
try wip_nav.strp(loaded_struct.fieldName(ip, field_index).toSlice(ip));
|
||||
try wip_nav.refType(field_type);
|
||||
if (!is_comptime) {
|
||||
try diw.writeUleb128(loaded_struct.offsets.get(ip)[field_index]);
|
||||
@@ -4600,7 +4584,7 @@ fn updateContainerTypeWriterError(
|
||||
var field_bit_offset: u16 = 0;
|
||||
for (0..loaded_struct.field_types.len) |field_index| {
|
||||
try wip_nav.abbrevCode(.packed_struct_field);
|
||||
try wip_nav.strp(loaded_struct.fieldName(ip, field_index).unwrap().?.toSlice(ip));
|
||||
try wip_nav.strp(loaded_struct.fieldName(ip, field_index).toSlice(ip));
|
||||
const field_type: Type = .fromInterned(loaded_struct.field_types.get(ip)[field_index]);
|
||||
try wip_nav.refType(field_type);
|
||||
try diw.writeUleb128(field_bit_offset);
|
||||
|
||||
+25
-34
@@ -40,6 +40,8 @@
|
||||
#elif defined(__mips__)
|
||||
#define zig_mips32
|
||||
#define zig_mips
|
||||
#elif defined(__or1k__)
|
||||
#define zig_or1k
|
||||
#elif defined(__powerpc64__)
|
||||
#define zig_powerpc64
|
||||
#define zig_powerpc
|
||||
@@ -72,6 +74,9 @@
|
||||
#elif defined (__x86_64__) || (defined(zig_msvc) && defined(_M_X64))
|
||||
#define zig_x86_64
|
||||
#define zig_x86
|
||||
#elif defined(__I86__)
|
||||
#define zig_x86_16
|
||||
#define zig_x86
|
||||
#endif
|
||||
|
||||
#if defined(zig_msvc) || __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
@@ -82,9 +87,7 @@
|
||||
#define zig_big_endian 1
|
||||
#endif
|
||||
|
||||
#if defined(_AIX)
|
||||
#define zig_aix
|
||||
#elif defined(__MACH__)
|
||||
#if defined(__MACH__)
|
||||
#define zig_darwin
|
||||
#elif defined(__DragonFly__)
|
||||
#define zig_dragonfly
|
||||
@@ -114,20 +117,14 @@
|
||||
#define zig_wasi
|
||||
#elif defined(_WIN32)
|
||||
#define zig_windows
|
||||
#elif defined(__MVS__)
|
||||
#define zig_zos
|
||||
#endif
|
||||
|
||||
#if defined(zig_windows)
|
||||
#define zig_coff
|
||||
#elif defined(__ELF__)
|
||||
#define zig_elf
|
||||
#elif defined(zig_zos)
|
||||
#define zig_goff
|
||||
#elif defined(zig_darwin)
|
||||
#define zig_macho
|
||||
#elif defined(zig_aix)
|
||||
#define zig_xcoff
|
||||
#endif
|
||||
|
||||
#define zig_concat(lhs, rhs) lhs##rhs
|
||||
@@ -390,12 +387,16 @@
|
||||
#define zig_trap() __asm__ volatile(".word 0x0")
|
||||
#elif defined(zig_mips)
|
||||
#define zig_trap() __asm__ volatile(".word 0x3d")
|
||||
#elif defined(zig_or1k)
|
||||
#define zig_trap() __asm__ volatile("l.cust8")
|
||||
#elif defined(zig_riscv)
|
||||
#define zig_trap() __asm__ volatile("unimp")
|
||||
#elif defined(zig_s390x)
|
||||
#define zig_trap() __asm__ volatile("j 0x2")
|
||||
#elif defined(zig_sparc)
|
||||
#define zig_trap() __asm__ volatile("illtrap")
|
||||
#elif defined(zig_x86_16)
|
||||
#define zig_trap() __asm__ volatile("int $0x3")
|
||||
#elif defined(zig_x86)
|
||||
#define zig_trap() __asm__ volatile("ud2")
|
||||
#else
|
||||
@@ -422,6 +423,8 @@
|
||||
#define zig_breakpoint() __asm__ volatile("break 0x0")
|
||||
#elif defined(zig_mips)
|
||||
#define zig_breakpoint() __asm__ volatile("break")
|
||||
#elif defined(zig_or1k)
|
||||
#define zig_breakpoint() __asm__ volatile("l.trap 0x0")
|
||||
#elif defined(zig_powerpc)
|
||||
#define zig_breakpoint() __asm__ volatile("trap")
|
||||
#elif defined(zig_riscv)
|
||||
@@ -804,15 +807,13 @@ static inline bool zig_addo_u32(uint32_t *res, uint32_t lhs, uint32_t rhs, uint8
|
||||
#endif
|
||||
}
|
||||
|
||||
zig_extern int32_t __addosi4(int32_t lhs, int32_t rhs, int *overflow);
|
||||
static inline bool zig_addo_i32(int32_t *res, int32_t lhs, int32_t rhs, uint8_t bits) {
|
||||
#if zig_has_builtin(add_overflow) || defined(zig_gcc)
|
||||
int32_t full_res;
|
||||
bool overflow = __builtin_add_overflow(lhs, rhs, &full_res);
|
||||
#else
|
||||
int overflow_int;
|
||||
int32_t full_res = __addosi4(lhs, rhs, &overflow_int);
|
||||
bool overflow = overflow_int != 0;
|
||||
int32_t full_res = (int32_t)((uint32_t)lhs + (uint32_t)rhs);
|
||||
bool overflow = ((full_res ^ lhs) & (full_res ^ rhs)) < 0;
|
||||
#endif
|
||||
*res = zig_wrap_i32(full_res, bits);
|
||||
return overflow || full_res < zig_minInt_i(32, bits) || full_res > zig_maxInt_i(32, bits);
|
||||
@@ -830,15 +831,13 @@ static inline bool zig_addo_u64(uint64_t *res, uint64_t lhs, uint64_t rhs, uint8
|
||||
#endif
|
||||
}
|
||||
|
||||
zig_extern int64_t __addodi4(int64_t lhs, int64_t rhs, int *overflow);
|
||||
static inline bool zig_addo_i64(int64_t *res, int64_t lhs, int64_t rhs, uint8_t bits) {
|
||||
#if zig_has_builtin(add_overflow) || defined(zig_gcc)
|
||||
int64_t full_res;
|
||||
bool overflow = __builtin_add_overflow(lhs, rhs, &full_res);
|
||||
#else
|
||||
int overflow_int;
|
||||
int64_t full_res = __addodi4(lhs, rhs, &overflow_int);
|
||||
bool overflow = overflow_int != 0;
|
||||
int64_t full_res = (int64_t)((uint64_t)lhs + (uint64_t)rhs);
|
||||
bool overflow = ((full_res ^ lhs) & (full_res ^ rhs)) < 0;
|
||||
#endif
|
||||
*res = zig_wrap_i64(full_res, bits);
|
||||
return overflow || full_res < zig_minInt_i(64, bits) || full_res > zig_maxInt_i(64, bits);
|
||||
@@ -912,15 +911,13 @@ static inline bool zig_subo_u32(uint32_t *res, uint32_t lhs, uint32_t rhs, uint8
|
||||
#endif
|
||||
}
|
||||
|
||||
zig_extern int32_t __subosi4(int32_t lhs, int32_t rhs, int *overflow);
|
||||
static inline bool zig_subo_i32(int32_t *res, int32_t lhs, int32_t rhs, uint8_t bits) {
|
||||
#if zig_has_builtin(sub_overflow) || defined(zig_gcc)
|
||||
int32_t full_res;
|
||||
bool overflow = __builtin_sub_overflow(lhs, rhs, &full_res);
|
||||
#else
|
||||
int overflow_int;
|
||||
int32_t full_res = __subosi4(lhs, rhs, &overflow_int);
|
||||
bool overflow = overflow_int != 0;
|
||||
int32_t full_res = (int32_t)((uint32_t)lhs - (uint32_t)rhs);
|
||||
bool overflow = ((lhs ^ rhs) & (full_res ^ lhs)) < 0;
|
||||
#endif
|
||||
*res = zig_wrap_i32(full_res, bits);
|
||||
return overflow || full_res < zig_minInt_i(32, bits) || full_res > zig_maxInt_i(32, bits);
|
||||
@@ -938,15 +935,13 @@ static inline bool zig_subo_u64(uint64_t *res, uint64_t lhs, uint64_t rhs, uint8
|
||||
#endif
|
||||
}
|
||||
|
||||
zig_extern int64_t __subodi4(int64_t lhs, int64_t rhs, int *overflow);
|
||||
static inline bool zig_subo_i64(int64_t *res, int64_t lhs, int64_t rhs, uint8_t bits) {
|
||||
#if zig_has_builtin(sub_overflow) || defined(zig_gcc)
|
||||
int64_t full_res;
|
||||
bool overflow = __builtin_sub_overflow(lhs, rhs, &full_res);
|
||||
#else
|
||||
int overflow_int;
|
||||
int64_t full_res = __subodi4(lhs, rhs, &overflow_int);
|
||||
bool overflow = overflow_int != 0;
|
||||
int64_t full_res = (int64_t)((uint64_t)lhs - (uint64_t)rhs);
|
||||
bool overflow = ((lhs ^ rhs) & (full_res ^ lhs)) < 0;
|
||||
#endif
|
||||
*res = zig_wrap_i64(full_res, bits);
|
||||
return overflow || full_res < zig_minInt_i(64, bits) || full_res > zig_maxInt_i(64, bits);
|
||||
@@ -1750,15 +1745,13 @@ static inline bool zig_addo_u128(zig_u128 *res, zig_u128 lhs, zig_u128 rhs, uint
|
||||
#endif
|
||||
}
|
||||
|
||||
zig_extern zig_i128 __addoti4(zig_i128 lhs, zig_i128 rhs, int *overflow);
|
||||
static inline bool zig_addo_i128(zig_i128 *res, zig_i128 lhs, zig_i128 rhs, uint8_t bits) {
|
||||
#if zig_has_builtin(add_overflow)
|
||||
zig_i128 full_res;
|
||||
bool overflow = __builtin_add_overflow(lhs, rhs, &full_res);
|
||||
#else
|
||||
int overflow_int;
|
||||
zig_i128 full_res = __addoti4(lhs, rhs, &overflow_int);
|
||||
bool overflow = overflow_int != 0;
|
||||
zig_i128 full_res = (zig_i128)((zig_u128)lhs + (zig_u128)rhs);
|
||||
bool overflow = ((full_res ^ lhs) & (full_res ^ rhs)) < 0;
|
||||
#endif
|
||||
*res = zig_wrap_i128(full_res, bits);
|
||||
return overflow || full_res < zig_minInt_i(128, bits) || full_res > zig_maxInt_i(128, bits);
|
||||
@@ -1776,15 +1769,13 @@ static inline bool zig_subo_u128(zig_u128 *res, zig_u128 lhs, zig_u128 rhs, uint
|
||||
#endif
|
||||
}
|
||||
|
||||
zig_extern zig_i128 __suboti4(zig_i128 lhs, zig_i128 rhs, int *overflow);
|
||||
static inline bool zig_subo_i128(zig_i128 *res, zig_i128 lhs, zig_i128 rhs, uint8_t bits) {
|
||||
#if zig_has_builtin(sub_overflow)
|
||||
zig_i128 full_res;
|
||||
bool overflow = __builtin_sub_overflow(lhs, rhs, &full_res);
|
||||
#else
|
||||
int overflow_int;
|
||||
zig_i128 full_res = __suboti4(lhs, rhs, &overflow_int);
|
||||
bool overflow = overflow_int != 0;
|
||||
zig_i128 full_res = (zig_i128)((zig_u128)lhs - (zig_u128)rhs);
|
||||
bool overflow = ((lhs ^ rhs) & (full_res ^ lhs)) < 0;
|
||||
#endif
|
||||
*res = zig_wrap_i128(full_res, bits);
|
||||
return overflow || full_res < zig_minInt_i(128, bits) || full_res > zig_maxInt_i(128, bits);
|
||||
@@ -4213,7 +4204,7 @@ static inline void zig_loongarch_cpucfg(uint32_t word, uint32_t* result) {
|
||||
#endif
|
||||
}
|
||||
|
||||
#elif defined(zig_x86)
|
||||
#elif defined(zig_x86) && !defined(zig_x86_16)
|
||||
|
||||
static inline void zig_x86_cpuid(uint32_t leaf_id, uint32_t subid, uint32_t* eax, uint32_t* ebx, uint32_t* ecx, uint32_t* edx) {
|
||||
#if defined(zig_msvc)
|
||||
|
||||
@@ -218,10 +218,13 @@ test "union with specified enum tag" {
|
||||
}
|
||||
|
||||
test "packed union generates correctly aligned type" {
|
||||
// This test will be removed after the following accepted proposal is implemented:
|
||||
// https://github.com/ziglang/zig/issues/24657
|
||||
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest;
|
||||
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
|
||||
if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest;
|
||||
if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;
|
||||
if (builtin.zig_backend == .stage2_c) return error.SkipZigTest;
|
||||
|
||||
const U = packed union {
|
||||
f1: *const fn () error{TestUnexpectedResult}!void,
|
||||
@@ -1544,7 +1547,7 @@ test "packed union field pointer has correct alignment" {
|
||||
|
||||
const host_size = switch (builtin.zig_backend) {
|
||||
else => comptime std.math.divCeil(comptime_int, @bitSizeOf(S), 8) catch unreachable,
|
||||
.stage2_x86_64 => @sizeOf(S),
|
||||
.stage2_x86_64, .stage2_c => @sizeOf(S),
|
||||
};
|
||||
comptime assert(@TypeOf(ap) == *align(4:2:host_size) u20);
|
||||
comptime assert(@TypeOf(bp) == *align(1:2:host_size) u20);
|
||||
|
||||
Reference in New Issue
Block a user