mirror of
https://codeberg.org/ziglang/zig.git
synced 2026-04-27 19:09:47 +03:00
Air.Legalize: revert to loops for scalarizations
I had tried unrolling the loops to avoid requiring the `vector_store_elem` instruction, but it's arguably a problem to generate O(N) code for an operation on `@Vector(N, T)`. In addition, that lowering emitted a lot of `.aggregate_init` instructions, which is itself a quite difficult operation to codegen. This requires reintroducing runtime vector indexing internally. However, I've put it in a couple of instructions which are intended only for use by `Air.Legalize`, named `legalize_vec_elem_val` (like `array_elem_val`, but for indexing a vector with a runtime-known index) and `legalize_vec_store_elem` (like the old `vector_store_elem` instruction). These are explicitly documented as *not* being emitted by Sema, so need only be implemented by backends if they actually use an `Air.Legalize.Feature` which emits them (otherwise they can be marked as `unreachable`).
This commit is contained in:
+26
-3
@@ -660,8 +660,8 @@ pub const Inst = struct {
|
||||
/// Given a pointer to a slice, return a pointer to the pointer of the slice.
|
||||
/// Uses the `ty_op` field.
|
||||
ptr_slice_ptr_ptr,
|
||||
/// Given an (array value or vector value) and element index,
|
||||
/// return the element value at that index.
|
||||
/// Given an (array value or vector value) and element index, return the element value at
|
||||
/// that index. If the lhs is a vector value, the index is guaranteed to be comptime-known.
|
||||
/// Result type is the element type of the array operand.
|
||||
/// Uses the `bin_op` field.
|
||||
array_elem_val,
|
||||
@@ -915,6 +915,26 @@ pub const Inst = struct {
|
||||
/// Operand is unused and set to Ref.none
|
||||
work_group_id,
|
||||
|
||||
// The remaining instructions are not emitted by Sema. They are only emitted by `Legalize`,
|
||||
// depending on the enabled features. As such, backends can consider them `unreachable` if
|
||||
// they do not enable the relevant legalizations.
|
||||
|
||||
/// Given a pointer to a vector, a runtime-known index, and a scalar value, store the value
|
||||
/// into the vector at the given index. Zig does not support this operation, but `Legalize`
|
||||
/// may emit it when scalarizing vector operations.
|
||||
///
|
||||
/// Uses the `pl_op` field with payload `Bin`. `operand` is the vector pointer. `lhs` is the
|
||||
/// element index of type `usize`. `rhs` is the element value. Result is always void.
|
||||
legalize_vec_store_elem,
|
||||
/// Given a vector value and a runtime-known index, return the element value at that index.
|
||||
/// This instruction is similar to `array_elem_val`; the only difference is that the index
|
||||
/// here is runtime-known, which is usually not allowed for vectors. `Legalize` may emit
|
||||
/// this instruction when scalarizing vector operations.
|
||||
///
|
||||
/// Uses the `bin_op` field. `lhs` is the vector pointer. `rhs` is the element index. Result
|
||||
/// type is the vector element type.
|
||||
legalize_vec_elem_val,
|
||||
|
||||
pub fn fromCmpOp(op: std.math.CompareOperator, optimized: bool) Tag {
|
||||
switch (op) {
|
||||
.lt => return if (optimized) .cmp_lt_optimized else .cmp_lt,
|
||||
@@ -1681,6 +1701,7 @@ pub fn typeOfIndex(air: *const Air, inst: Air.Inst.Index, ip: *const InternPool)
|
||||
.prefetch,
|
||||
.set_err_return_trace,
|
||||
.c_va_end,
|
||||
.legalize_vec_store_elem,
|
||||
=> return .void,
|
||||
|
||||
.slice_len,
|
||||
@@ -1699,7 +1720,7 @@ pub fn typeOfIndex(air: *const Air, inst: Air.Inst.Index, ip: *const InternPool)
|
||||
return .fromInterned(ip.funcTypeReturnType(callee_ty.toIntern()));
|
||||
},
|
||||
|
||||
.slice_elem_val, .ptr_elem_val, .array_elem_val => {
|
||||
.slice_elem_val, .ptr_elem_val, .array_elem_val, .legalize_vec_elem_val => {
|
||||
const ptr_ty = air.typeOf(datas[@intFromEnum(inst)].bin_op.lhs, ip);
|
||||
return ptr_ty.childTypeIp(ip);
|
||||
},
|
||||
@@ -1857,6 +1878,7 @@ pub fn mustLower(air: Air, inst: Air.Inst.Index, ip: *const InternPool) bool {
|
||||
.intcast_safe,
|
||||
.int_from_float_safe,
|
||||
.int_from_float_optimized_safe,
|
||||
.legalize_vec_store_elem,
|
||||
=> true,
|
||||
|
||||
.add,
|
||||
@@ -2002,6 +2024,7 @@ pub fn mustLower(air: Air, inst: Air.Inst.Index, ip: *const InternPool) bool {
|
||||
.work_item_id,
|
||||
.work_group_size,
|
||||
.work_group_id,
|
||||
.legalize_vec_elem_val,
|
||||
=> false,
|
||||
|
||||
.is_non_null_ptr, .is_null_ptr, .is_non_err_ptr, .is_err_ptr => air.typeOf(data.un_op, ip).isVolatilePtrIp(ip),
|
||||
|
||||
+813
-283
@@ -320,28 +320,36 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void {
|
||||
.xor,
|
||||
=> |air_tag| if (l.features.has(comptime .scalarize(air_tag))) {
|
||||
const bin_op = l.air_instructions.items(.data)[@intFromEnum(inst)].bin_op;
|
||||
if (l.typeOf(bin_op.lhs).isVector(zcu)) continue :inst try l.scalarize(inst, .bin_op);
|
||||
if (l.typeOf(bin_op.lhs).isVector(zcu)) {
|
||||
continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .bin_op));
|
||||
}
|
||||
},
|
||||
.add_safe => if (l.features.has(.expand_add_safe)) {
|
||||
assert(!l.features.has(.scalarize_add_safe)); // it doesn't make sense to do both
|
||||
continue :inst l.replaceInst(inst, .block, try l.safeArithmeticBlockPayload(inst, .add_with_overflow));
|
||||
} else if (l.features.has(.scalarize_add_safe)) {
|
||||
const bin_op = l.air_instructions.items(.data)[@intFromEnum(inst)].bin_op;
|
||||
if (l.typeOf(bin_op.lhs).isVector(zcu)) continue :inst try l.scalarize(inst, .bin_op);
|
||||
if (l.typeOf(bin_op.lhs).isVector(zcu)) {
|
||||
continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .bin_op));
|
||||
}
|
||||
},
|
||||
.sub_safe => if (l.features.has(.expand_sub_safe)) {
|
||||
assert(!l.features.has(.scalarize_sub_safe)); // it doesn't make sense to do both
|
||||
continue :inst l.replaceInst(inst, .block, try l.safeArithmeticBlockPayload(inst, .sub_with_overflow));
|
||||
} else if (l.features.has(.scalarize_sub_safe)) {
|
||||
const bin_op = l.air_instructions.items(.data)[@intFromEnum(inst)].bin_op;
|
||||
if (l.typeOf(bin_op.lhs).isVector(zcu)) continue :inst try l.scalarize(inst, .bin_op);
|
||||
if (l.typeOf(bin_op.lhs).isVector(zcu)) {
|
||||
continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .bin_op));
|
||||
}
|
||||
},
|
||||
.mul_safe => if (l.features.has(.expand_mul_safe)) {
|
||||
assert(!l.features.has(.scalarize_mul_safe)); // it doesn't make sense to do both
|
||||
continue :inst l.replaceInst(inst, .block, try l.safeArithmeticBlockPayload(inst, .mul_with_overflow));
|
||||
} else if (l.features.has(.scalarize_mul_safe)) {
|
||||
const bin_op = l.air_instructions.items(.data)[@intFromEnum(inst)].bin_op;
|
||||
if (l.typeOf(bin_op.lhs).isVector(zcu)) continue :inst try l.scalarize(inst, .bin_op);
|
||||
if (l.typeOf(bin_op.lhs).isVector(zcu)) {
|
||||
continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .bin_op));
|
||||
}
|
||||
},
|
||||
.ptr_add, .ptr_sub => {},
|
||||
inline .add_with_overflow,
|
||||
@@ -350,7 +358,9 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void {
|
||||
.shl_with_overflow,
|
||||
=> |air_tag| if (l.features.has(comptime .scalarize(air_tag))) {
|
||||
const ty_pl = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_pl;
|
||||
if (ty_pl.ty.toType().fieldType(0, zcu).isVector(zcu)) continue :inst l.replaceInst(inst, .block, try l.scalarizeOverflowBlockPayload(inst));
|
||||
if (ty_pl.ty.toType().fieldType(0, zcu).isVector(zcu)) {
|
||||
continue :inst l.replaceInst(inst, .block, try l.scalarizeOverflowBlockPayload(inst));
|
||||
}
|
||||
},
|
||||
.alloc => {},
|
||||
.inferred_alloc, .inferred_alloc_comptime => unreachable,
|
||||
@@ -387,7 +397,9 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void {
|
||||
}
|
||||
}
|
||||
}
|
||||
if (l.features.has(comptime .scalarize(air_tag))) continue :inst try l.scalarize(inst, .bin_op);
|
||||
if (l.features.has(comptime .scalarize(air_tag))) {
|
||||
continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .bin_op));
|
||||
}
|
||||
}
|
||||
},
|
||||
inline .not,
|
||||
@@ -406,7 +418,9 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void {
|
||||
.float_from_int,
|
||||
=> |air_tag| if (l.features.has(comptime .scalarize(air_tag))) {
|
||||
const ty_op = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_op;
|
||||
if (ty_op.ty.toType().isVector(zcu)) continue :inst try l.scalarize(inst, .ty_op);
|
||||
if (ty_op.ty.toType().isVector(zcu)) {
|
||||
continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .ty_op));
|
||||
}
|
||||
},
|
||||
.bitcast => if (l.features.has(.scalarize_bitcast)) {
|
||||
if (try l.scalarizeBitcastBlockPayload(inst)) |payload| {
|
||||
@@ -418,21 +432,27 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void {
|
||||
continue :inst l.replaceInst(inst, .block, try l.safeIntcastBlockPayload(inst));
|
||||
} else if (l.features.has(.scalarize_intcast_safe)) {
|
||||
const ty_op = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_op;
|
||||
if (ty_op.ty.toType().isVector(zcu)) continue :inst try l.scalarize(inst, .ty_op);
|
||||
if (ty_op.ty.toType().isVector(zcu)) {
|
||||
continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .ty_op));
|
||||
}
|
||||
},
|
||||
.int_from_float_safe => if (l.features.has(.expand_int_from_float_safe)) {
|
||||
assert(!l.features.has(.scalarize_int_from_float_safe));
|
||||
continue :inst l.replaceInst(inst, .block, try l.safeIntFromFloatBlockPayload(inst, false));
|
||||
} else if (l.features.has(.scalarize_int_from_float_safe)) {
|
||||
const ty_op = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_op;
|
||||
if (ty_op.ty.toType().isVector(zcu)) continue :inst try l.scalarize(inst, .ty_op);
|
||||
if (ty_op.ty.toType().isVector(zcu)) {
|
||||
continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .ty_op));
|
||||
}
|
||||
},
|
||||
.int_from_float_optimized_safe => if (l.features.has(.expand_int_from_float_optimized_safe)) {
|
||||
assert(!l.features.has(.scalarize_int_from_float_optimized_safe));
|
||||
continue :inst l.replaceInst(inst, .block, try l.safeIntFromFloatBlockPayload(inst, true));
|
||||
} else if (l.features.has(.scalarize_int_from_float_optimized_safe)) {
|
||||
const ty_op = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_op;
|
||||
if (ty_op.ty.toType().isVector(zcu)) continue :inst try l.scalarize(inst, .ty_op);
|
||||
if (ty_op.ty.toType().isVector(zcu)) {
|
||||
continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .ty_op));
|
||||
}
|
||||
},
|
||||
.block, .loop => {
|
||||
const ty_pl = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_pl;
|
||||
@@ -467,7 +487,9 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void {
|
||||
.neg_optimized,
|
||||
=> |air_tag| if (l.features.has(comptime .scalarize(air_tag))) {
|
||||
const un_op = l.air_instructions.items(.data)[@intFromEnum(inst)].un_op;
|
||||
if (l.typeOf(un_op).isVector(zcu)) continue :inst try l.scalarize(inst, .un_op);
|
||||
if (l.typeOf(un_op).isVector(zcu)) {
|
||||
continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .un_op));
|
||||
}
|
||||
},
|
||||
.cmp_lt,
|
||||
.cmp_lt_optimized,
|
||||
@@ -484,7 +506,9 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void {
|
||||
=> {},
|
||||
inline .cmp_vector, .cmp_vector_optimized => |air_tag| if (l.features.has(comptime .scalarize(air_tag))) {
|
||||
const ty_pl = l.air_instructions.items(.data)[@intFromEnum(inst)].ty_pl;
|
||||
if (ty_pl.ty.toType().isVector(zcu)) continue :inst try l.scalarize(inst, .cmp_vector);
|
||||
if (ty_pl.ty.toType().isVector(zcu)) {
|
||||
continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .cmp_vector));
|
||||
}
|
||||
},
|
||||
.cond_br => {
|
||||
const pl_op = l.air_instructions.items(.data)[@intFromEnum(inst)].pl_op;
|
||||
@@ -614,9 +638,15 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void {
|
||||
else => {},
|
||||
}
|
||||
},
|
||||
.shuffle_one => if (l.features.has(.scalarize_shuffle_one)) continue :inst try l.scalarize(inst, .shuffle_one),
|
||||
.shuffle_two => if (l.features.has(.scalarize_shuffle_two)) continue :inst try l.scalarize(inst, .shuffle_two),
|
||||
.select => if (l.features.has(.scalarize_select)) continue :inst try l.scalarize(inst, .select),
|
||||
.shuffle_one => if (l.features.has(.scalarize_shuffle_one)) {
|
||||
continue :inst l.replaceInst(inst, .block, try l.scalarizeShuffleOneBlockPayload(inst));
|
||||
},
|
||||
.shuffle_two => if (l.features.has(.scalarize_shuffle_two)) {
|
||||
continue :inst l.replaceInst(inst, .block, try l.scalarizeShuffleTwoBlockPayload(inst));
|
||||
},
|
||||
.select => if (l.features.has(.scalarize_select)) {
|
||||
continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .select));
|
||||
},
|
||||
.memset,
|
||||
.memset_safe,
|
||||
.memcpy,
|
||||
@@ -657,7 +687,9 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void {
|
||||
.union_init, .prefetch => {},
|
||||
.mul_add => if (l.features.has(.scalarize_mul_add)) {
|
||||
const pl_op = l.air_instructions.items(.data)[@intFromEnum(inst)].pl_op;
|
||||
if (l.typeOf(pl_op.operand).isVector(zcu)) continue :inst try l.scalarize(inst, .pl_op_bin);
|
||||
if (l.typeOf(pl_op.operand).isVector(zcu)) {
|
||||
continue :inst l.replaceInst(inst, .block, try l.scalarizeBlockPayload(inst, .pl_op_bin));
|
||||
}
|
||||
},
|
||||
.field_parent_ptr,
|
||||
.wasm_memory_size,
|
||||
@@ -675,96 +707,123 @@ fn legalizeBody(l: *Legalize, body_start: usize, body_len: usize) Error!void {
|
||||
.work_item_id,
|
||||
.work_group_size,
|
||||
.work_group_id,
|
||||
.legalize_vec_elem_val,
|
||||
.legalize_vec_store_elem,
|
||||
=> {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const ScalarizeForm = enum { un_op, ty_op, bin_op, pl_op_bin, cmp_vector, shuffle_one, shuffle_two, select };
|
||||
/// inline to propagate comptime-known `replaceInst` result.
|
||||
inline fn scalarize(l: *Legalize, orig_inst: Air.Inst.Index, comptime form: ScalarizeForm) Error!Air.Inst.Tag {
|
||||
return l.replaceInst(orig_inst, .block, try l.scalarizeBlockPayload(orig_inst, form));
|
||||
}
|
||||
fn scalarizeBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, comptime form: ScalarizeForm) Error!Air.Inst.Data {
|
||||
const ScalarizeForm = enum { un_op, ty_op, bin_op, pl_op_bin, cmp_vector, select };
|
||||
fn scalarizeBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, form: ScalarizeForm) Error!Air.Inst.Data {
|
||||
const pt = l.pt;
|
||||
const zcu = pt.zcu;
|
||||
const gpa = zcu.gpa;
|
||||
|
||||
const orig = l.air_instructions.get(@intFromEnum(orig_inst));
|
||||
const res_ty = l.typeOfIndex(orig_inst);
|
||||
const res_len = res_ty.vectorLen(zcu);
|
||||
const result_is_array = switch (res_ty.zigTypeTag(zcu)) {
|
||||
.vector => false,
|
||||
.array => true,
|
||||
else => unreachable,
|
||||
};
|
||||
const res_len = res_ty.arrayLen(zcu);
|
||||
const res_elem_ty = res_ty.childType(zcu);
|
||||
|
||||
const inst_per_elem = switch (form) {
|
||||
if (result_is_array) {
|
||||
// This is only allowed when legalizing an elementwise bitcast.
|
||||
assert(orig.tag == .bitcast);
|
||||
assert(form == .ty_op);
|
||||
}
|
||||
|
||||
// Our output will be a loop doing elementwise stores:
|
||||
//
|
||||
// %1 = block(@Vector(N, Scalar), {
|
||||
// %2 = alloc(*usize)
|
||||
// %3 = alloc(*@Vector(N, Scalar))
|
||||
// %4 = store(%2, @zero_usize)
|
||||
// %5 = loop({
|
||||
// %6 = load(%2)
|
||||
// %7 = <scalar result of operation at index %5>
|
||||
// %8 = legalize_vec_store_elem(%3, %5, %6)
|
||||
// %9 = cmp_eq(%6, <usize, N-1>)
|
||||
// %10 = cond_br(%9, {
|
||||
// %11 = load(%3)
|
||||
// %12 = br(%1, %11)
|
||||
// }, {
|
||||
// %13 = add(%6, @one_usize)
|
||||
// %14 = store(%2, %13)
|
||||
// %15 = repeat(%5)
|
||||
// })
|
||||
// })
|
||||
// })
|
||||
//
|
||||
// If scalarizing an elementwise bitcast, the result might be an array, in which case
|
||||
// `legalize_vec_store_elem` becomes two instructions (`ptr_elem_ptr` and `store`).
|
||||
// Therefore, there are 13 or 14 instructions in the block, plus however many are
|
||||
// needed to compute each result element for `form`.
|
||||
const inst_per_form: usize = switch (form) {
|
||||
.un_op, .ty_op => 2,
|
||||
.bin_op, .cmp_vector => 3,
|
||||
.pl_op_bin => 4,
|
||||
.shuffle_one, .shuffle_two => 1,
|
||||
.select => 7,
|
||||
};
|
||||
const max_inst_per_form = 7; // maximum value in the above switch
|
||||
var inst_buf: [14 + max_inst_per_form]Air.Inst.Index = undefined;
|
||||
|
||||
var sfba_state = std.heap.stackFallback(@sizeOf([inst_per_elem * 32 + 2]Air.Inst.Index) + @sizeOf([32]Air.Inst.Ref), gpa);
|
||||
const sfba = sfba_state.get();
|
||||
var main_block: Block = .init(&inst_buf);
|
||||
try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len);
|
||||
|
||||
// Plus 2 extra instructions for `aggregate_init` and `br`.
|
||||
const inst_buf = try sfba.alloc(Air.Inst.Index, inst_per_elem * res_len + 2);
|
||||
defer sfba.free(inst_buf);
|
||||
const index_ptr = main_block.addTy(l, .alloc, .ptr_usize).toRef();
|
||||
const result_ptr = main_block.addTy(l, .alloc, try pt.singleMutPtrType(res_ty)).toRef();
|
||||
|
||||
var main_block: Block = .init(inst_buf);
|
||||
try l.air_instructions.ensureUnusedCapacity(gpa, inst_buf.len);
|
||||
_ = main_block.addBinOp(l, .store, index_ptr, .zero_usize);
|
||||
|
||||
const elem_buf = try sfba.alloc(Air.Inst.Ref, res_len);
|
||||
defer sfba.free(elem_buf);
|
||||
var loop: Loop = .init(l, &main_block);
|
||||
loop.block = .init(main_block.stealRemainingCapacity());
|
||||
|
||||
switch (form) {
|
||||
.un_op => {
|
||||
const index_val = loop.block.addTyOp(l, .load, .usize, index_ptr).toRef();
|
||||
const elem_val: Air.Inst.Ref = switch (form) {
|
||||
.un_op => elem: {
|
||||
const orig_operand = orig.data.un_op;
|
||||
const un_op_tag = orig.tag;
|
||||
for (elem_buf, 0..) |*elem, elem_idx| {
|
||||
const elem_idx_ref: Air.Inst.Ref = .fromValue(try pt.intValue(.usize, elem_idx));
|
||||
const operand = main_block.addBinOp(l, .array_elem_val, orig_operand, elem_idx_ref).toRef();
|
||||
elem.* = main_block.addUnOp(l, un_op_tag, operand).toRef();
|
||||
}
|
||||
const operand = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_operand, index_val).toRef();
|
||||
break :elem loop.block.addUnOp(l, orig.tag, operand).toRef();
|
||||
},
|
||||
.ty_op => {
|
||||
.ty_op => elem: {
|
||||
const orig_operand = orig.data.ty_op.operand;
|
||||
const orig_ty: Type = .fromInterned(orig.data.ty_op.ty.toInterned().?);
|
||||
const scalar_ty = orig_ty.childType(zcu);
|
||||
const ty_op_tag = orig.tag;
|
||||
for (elem_buf, 0..) |*elem, elem_idx| {
|
||||
const elem_idx_ref: Air.Inst.Ref = .fromValue(try pt.intValue(.usize, elem_idx));
|
||||
const operand = main_block.addBinOp(l, .array_elem_val, orig_operand, elem_idx_ref).toRef();
|
||||
elem.* = main_block.addTyOp(l, ty_op_tag, scalar_ty, operand).toRef();
|
||||
}
|
||||
const operand_is_array = switch (l.typeOf(orig_operand).zigTypeTag(zcu)) {
|
||||
.vector => false,
|
||||
.array => true,
|
||||
else => unreachable,
|
||||
};
|
||||
const operand = loop.block.addBinOp(
|
||||
l,
|
||||
if (operand_is_array) .array_elem_val else .legalize_vec_elem_val,
|
||||
orig_operand,
|
||||
index_val,
|
||||
).toRef();
|
||||
break :elem loop.block.addTyOp(l, orig.tag, res_elem_ty, operand).toRef();
|
||||
},
|
||||
.bin_op => {
|
||||
const orig_operands = orig.data.bin_op;
|
||||
const bin_op_tag = orig.tag;
|
||||
for (elem_buf, 0..) |*elem, elem_idx| {
|
||||
const elem_idx_ref: Air.Inst.Ref = .fromValue(try pt.intValue(.usize, elem_idx));
|
||||
const lhs = main_block.addBinOp(l, .array_elem_val, orig_operands.lhs, elem_idx_ref).toRef();
|
||||
const rhs = main_block.addBinOp(l, .array_elem_val, orig_operands.rhs, elem_idx_ref).toRef();
|
||||
elem.* = main_block.addBinOp(l, bin_op_tag, lhs, rhs).toRef();
|
||||
}
|
||||
.bin_op => elem: {
|
||||
const orig_bin = orig.data.bin_op;
|
||||
const lhs = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_bin.lhs, index_val).toRef();
|
||||
const rhs = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_bin.rhs, index_val).toRef();
|
||||
break :elem loop.block.addBinOp(l, orig.tag, lhs, rhs).toRef();
|
||||
},
|
||||
.pl_op_bin => {
|
||||
.pl_op_bin => elem: {
|
||||
const orig_operand = orig.data.pl_op.operand;
|
||||
const orig_payload = l.extraData(Air.Bin, orig.data.pl_op.payload).data;
|
||||
const pl_op_tag = orig.tag;
|
||||
for (elem_buf, 0..) |*elem, elem_idx| {
|
||||
const elem_idx_ref: Air.Inst.Ref = .fromValue(try pt.intValue(.usize, elem_idx));
|
||||
const operand = main_block.addBinOp(l, .array_elem_val, orig_operand, elem_idx_ref).toRef();
|
||||
const lhs = main_block.addBinOp(l, .array_elem_val, orig_payload.lhs, elem_idx_ref).toRef();
|
||||
const rhs = main_block.addBinOp(l, .array_elem_val, orig_payload.rhs, elem_idx_ref).toRef();
|
||||
elem.* = main_block.add(l, .{
|
||||
.tag = pl_op_tag,
|
||||
.data = .{ .pl_op = .{
|
||||
.payload = try l.addExtra(Air.Bin, .{ .lhs = lhs, .rhs = rhs }),
|
||||
.operand = operand,
|
||||
} },
|
||||
}).toRef();
|
||||
}
|
||||
const orig_bin = l.extraData(Air.Bin, orig.data.pl_op.payload).data;
|
||||
const operand = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_operand, index_val).toRef();
|
||||
const lhs = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_bin.lhs, index_val).toRef();
|
||||
const rhs = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_bin.rhs, index_val).toRef();
|
||||
break :elem loop.block.add(l, .{
|
||||
.tag = orig.tag,
|
||||
.data = .{ .pl_op = .{
|
||||
.operand = operand,
|
||||
.payload = try l.addExtra(Air.Bin, .{ .lhs = lhs, .rhs = rhs }),
|
||||
} },
|
||||
}).toRef();
|
||||
},
|
||||
.cmp_vector => {
|
||||
.cmp_vector => elem: {
|
||||
const orig_payload = l.extraData(Air.VectorCmp, orig.data.ty_pl.payload).data;
|
||||
const cmp_op = orig_payload.compareOperator();
|
||||
const optimized = switch (orig.tag) {
|
||||
@@ -772,116 +831,393 @@ fn scalarizeBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, comptime form:
|
||||
.cmp_vector_optimized => true,
|
||||
else => unreachable,
|
||||
};
|
||||
for (elem_buf, 0..) |*elem, elem_idx| {
|
||||
const elem_idx_ref: Air.Inst.Ref = .fromValue(try pt.intValue(.usize, elem_idx));
|
||||
const lhs = main_block.addBinOp(l, .array_elem_val, orig_payload.lhs, elem_idx_ref).toRef();
|
||||
const rhs = main_block.addBinOp(l, .array_elem_val, orig_payload.rhs, elem_idx_ref).toRef();
|
||||
elem.* = main_block.addCmpScalar(l, cmp_op, lhs, rhs, optimized).toRef();
|
||||
}
|
||||
const lhs = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_payload.lhs, index_val).toRef();
|
||||
const rhs = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_payload.rhs, index_val).toRef();
|
||||
break :elem loop.block.addCmpScalar(l, cmp_op, lhs, rhs, optimized).toRef();
|
||||
},
|
||||
.shuffle_one => {
|
||||
const shuffle = l.getTmpAir().unwrapShuffleOne(zcu, orig_inst);
|
||||
for (elem_buf, shuffle.mask) |*elem, mask| elem.* = switch (mask.unwrap()) {
|
||||
.value => |val| .fromIntern(val),
|
||||
.elem => |src_idx| elem: {
|
||||
const src_idx_ref: Air.Inst.Ref = .fromValue(try pt.intValue(.usize, src_idx));
|
||||
break :elem main_block.addBinOp(l, .array_elem_val, shuffle.operand, src_idx_ref).toRef();
|
||||
},
|
||||
};
|
||||
},
|
||||
.shuffle_two => {
|
||||
const shuffle = l.getTmpAir().unwrapShuffleTwo(zcu, orig_inst);
|
||||
const scalar_ty = res_ty.childType(zcu);
|
||||
for (elem_buf, shuffle.mask) |*elem, mask| elem.* = switch (mask.unwrap()) {
|
||||
.undef => .fromValue(try pt.undefValue(scalar_ty)),
|
||||
.a_elem => |src_idx| elem: {
|
||||
const src_idx_ref: Air.Inst.Ref = .fromValue(try pt.intValue(.usize, src_idx));
|
||||
break :elem main_block.addBinOp(l, .array_elem_val, shuffle.operand_a, src_idx_ref).toRef();
|
||||
},
|
||||
.b_elem => |src_idx| elem: {
|
||||
const src_idx_ref: Air.Inst.Ref = .fromValue(try pt.intValue(.usize, src_idx));
|
||||
break :elem main_block.addBinOp(l, .array_elem_val, shuffle.operand_b, src_idx_ref).toRef();
|
||||
},
|
||||
};
|
||||
},
|
||||
.select => {
|
||||
.select => elem: {
|
||||
const orig_cond = orig.data.pl_op.operand;
|
||||
const orig_bin = l.extraData(Air.Bin, orig.data.pl_op.payload).data;
|
||||
const res_scalar_ty = res_ty.childType(zcu);
|
||||
for (elem_buf, 0..) |*elem, elem_idx| {
|
||||
// Payload to be populated later; we need the index early for `br`s.
|
||||
const elem_block_inst = main_block.add(l, .{
|
||||
.tag = .block,
|
||||
.data = .{ .ty_pl = .{
|
||||
.ty = .fromType(res_scalar_ty),
|
||||
.payload = undefined,
|
||||
} },
|
||||
});
|
||||
var elem_block: Block = .init(main_block.stealCapacity(2));
|
||||
|
||||
const elem_idx_ref: Air.Inst.Ref = .fromValue(try pt.intValue(.usize, elem_idx));
|
||||
const cond = elem_block.addBinOp(l, .array_elem_val, orig_cond, elem_idx_ref).toRef();
|
||||
var condbr: CondBr = .init(l, cond, &elem_block, .{});
|
||||
const elem_block_inst = loop.block.add(l, .{
|
||||
.tag = .block,
|
||||
.data = .{ .ty_pl = .{
|
||||
.ty = .fromType(res_elem_ty),
|
||||
.payload = undefined,
|
||||
} },
|
||||
});
|
||||
var elem_block: Block = .init(loop.block.stealCapacity(2));
|
||||
const cond = elem_block.addBinOp(l, .legalize_vec_elem_val, orig_cond, index_val).toRef();
|
||||
|
||||
condbr.then_block = .init(main_block.stealCapacity(2));
|
||||
const lhs = condbr.then_block.addBinOp(l, .array_elem_val, orig_bin.lhs, elem_idx_ref).toRef();
|
||||
condbr.then_block.addBr(l, elem_block_inst, lhs);
|
||||
var condbr: CondBr = .init(l, cond, &elem_block, .{});
|
||||
|
||||
condbr.else_block = .init(main_block.stealCapacity(2));
|
||||
const rhs = condbr.else_block.addBinOp(l, .array_elem_val, orig_bin.rhs, elem_idx_ref).toRef();
|
||||
condbr.else_block.addBr(l, elem_block_inst, rhs);
|
||||
condbr.then_block = .init(loop.block.stealCapacity(2));
|
||||
const lhs = condbr.then_block.addBinOp(l, .legalize_vec_elem_val, orig_bin.lhs, index_val).toRef();
|
||||
condbr.then_block.addBr(l, elem_block_inst, lhs);
|
||||
|
||||
try condbr.finish(l);
|
||||
condbr.else_block = .init(loop.block.stealCapacity(2));
|
||||
const rhs = condbr.else_block.addBinOp(l, .legalize_vec_elem_val, orig_bin.rhs, index_val).toRef();
|
||||
condbr.else_block.addBr(l, elem_block_inst, rhs);
|
||||
|
||||
const inst_data = l.air_instructions.items(.data);
|
||||
inst_data[@intFromEnum(elem_block_inst)].ty_pl.payload = try l.addBlockBody(elem_block.body());
|
||||
try condbr.finish(l);
|
||||
|
||||
elem.* = elem_block_inst.toRef();
|
||||
}
|
||||
const inst_data = l.air_instructions.items(.data);
|
||||
inst_data[@intFromEnum(elem_block_inst)].ty_pl.payload = try l.addBlockBody(elem_block.body());
|
||||
|
||||
break :elem elem_block_inst.toRef();
|
||||
},
|
||||
};
|
||||
_ = loop.block.stealCapacity(max_inst_per_form - inst_per_form);
|
||||
if (result_is_array) {
|
||||
const elem_ptr = loop.block.add(l, .{
|
||||
.tag = .ptr_elem_ptr,
|
||||
.data = .{ .ty_pl = .{
|
||||
.ty = .fromType(try pt.singleMutPtrType(res_elem_ty)),
|
||||
.payload = try l.addExtra(Air.Bin, .{
|
||||
.lhs = result_ptr,
|
||||
.rhs = index_val,
|
||||
}),
|
||||
} },
|
||||
}).toRef();
|
||||
_ = loop.block.addBinOp(l, .store, elem_ptr, elem_val);
|
||||
} else {
|
||||
_ = loop.block.add(l, .{
|
||||
.tag = .legalize_vec_store_elem,
|
||||
.data = .{ .pl_op = .{
|
||||
.operand = result_ptr,
|
||||
.payload = try l.addExtra(Air.Bin, .{
|
||||
.lhs = index_val,
|
||||
.rhs = elem_val,
|
||||
}),
|
||||
} },
|
||||
});
|
||||
_ = loop.block.stealCapacity(1);
|
||||
}
|
||||
const is_end_val = loop.block.addBinOp(l, .cmp_eq, index_val, .fromValue(try pt.intValue(.usize, res_len - 1))).toRef();
|
||||
|
||||
const result = main_block.add(l, .{
|
||||
.tag = .aggregate_init,
|
||||
.data = .{ .ty_pl = .{
|
||||
.ty = .fromType(res_ty),
|
||||
.payload = payload: {
|
||||
const idx = l.air_extra.items.len;
|
||||
try l.air_extra.appendSlice(gpa, @ptrCast(elem_buf));
|
||||
break :payload @intCast(idx);
|
||||
},
|
||||
} },
|
||||
}).toRef();
|
||||
var condbr: CondBr = .init(l, is_end_val, &loop.block, .{});
|
||||
condbr.then_block = .init(loop.block.stealRemainingCapacity());
|
||||
const result_val = condbr.then_block.addTyOp(l, .load, res_ty, result_ptr).toRef();
|
||||
condbr.then_block.addBr(l, orig_inst, result_val);
|
||||
|
||||
main_block.addBr(l, orig_inst, result);
|
||||
condbr.else_block = .init(condbr.then_block.stealRemainingCapacity());
|
||||
const new_index_val = condbr.else_block.addBinOp(l, .add, index_val, .one_usize).toRef();
|
||||
_ = condbr.else_block.addBinOp(l, .store, index_ptr, new_index_val);
|
||||
_ = condbr.else_block.add(l, .{
|
||||
.tag = .repeat,
|
||||
.data = .{ .repeat = .{ .loop_inst = loop.inst } },
|
||||
});
|
||||
|
||||
// Some `form` values may intentionally not use the full instruction buffer.
|
||||
switch (form) {
|
||||
.un_op,
|
||||
.ty_op,
|
||||
.bin_op,
|
||||
.pl_op_bin,
|
||||
.cmp_vector,
|
||||
.select,
|
||||
=> {},
|
||||
.shuffle_one,
|
||||
.shuffle_two,
|
||||
=> _ = main_block.stealRemainingCapacity(),
|
||||
}
|
||||
try condbr.finish(l);
|
||||
|
||||
try loop.finish(l);
|
||||
|
||||
return .{ .ty_pl = .{
|
||||
.ty = .fromType(res_ty),
|
||||
.payload = try l.addBlockBody(main_block.body()),
|
||||
} };
|
||||
}
|
||||
fn scalarizeBitcastBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!?Air.Inst.Data {
|
||||
fn scalarizeShuffleOneBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!Air.Inst.Data {
|
||||
const pt = l.pt;
|
||||
const zcu = pt.zcu;
|
||||
const gpa = zcu.gpa;
|
||||
|
||||
const shuffle = l.getTmpAir().unwrapShuffleOne(zcu, orig_inst);
|
||||
|
||||
// We're going to emit something like this:
|
||||
//
|
||||
// var x: @Vector(N, T) = all_comptime_known_elems;
|
||||
// for (out_idxs, in_idxs) |i, j| x[i] = operand[j];
|
||||
//
|
||||
// So we must first compute `out_idxs` and `in_idxs`.
|
||||
|
||||
var sfba_state = std.heap.stackFallback(512, gpa);
|
||||
const sfba = sfba_state.get();
|
||||
|
||||
const out_idxs_buf = try sfba.alloc(InternPool.Index, shuffle.mask.len);
|
||||
defer sfba.free(out_idxs_buf);
|
||||
|
||||
const in_idxs_buf = try sfba.alloc(InternPool.Index, shuffle.mask.len);
|
||||
defer sfba.free(in_idxs_buf);
|
||||
|
||||
var n: usize = 0;
|
||||
for (shuffle.mask, 0..) |mask, out_idx| switch (mask.unwrap()) {
|
||||
.value => {},
|
||||
.elem => |in_idx| {
|
||||
out_idxs_buf[n] = (try pt.intValue(.usize, out_idx)).toIntern();
|
||||
in_idxs_buf[n] = (try pt.intValue(.usize, in_idx)).toIntern();
|
||||
n += 1;
|
||||
},
|
||||
};
|
||||
|
||||
const init_val: Value = init: {
|
||||
const undef_val = try pt.undefValue(shuffle.result_ty.childType(zcu));
|
||||
const elems = try sfba.alloc(InternPool.Index, shuffle.mask.len);
|
||||
defer sfba.free(elems);
|
||||
for (shuffle.mask, elems) |mask, *elem| elem.* = switch (mask.unwrap()) {
|
||||
.value => |ip_index| ip_index,
|
||||
.elem => undef_val.toIntern(),
|
||||
};
|
||||
break :init try pt.aggregateValue(shuffle.result_ty, elems);
|
||||
};
|
||||
|
||||
// %1 = block(@Vector(N, T), {
|
||||
// %2 = alloc(*@Vector(N, T))
|
||||
// %3 = alloc(*usize)
|
||||
// %4 = store(%2, <init_val>)
|
||||
// %5 = [addScalarizedShuffle]
|
||||
// %6 = load(%2)
|
||||
// %7 = br(%1, %6)
|
||||
// })
|
||||
|
||||
var inst_buf: [6]Air.Inst.Index = undefined;
|
||||
var main_block: Block = .init(&inst_buf);
|
||||
try l.air_instructions.ensureUnusedCapacity(gpa, 19);
|
||||
|
||||
const result_ptr = main_block.addTy(l, .alloc, try pt.singleMutPtrType(shuffle.result_ty)).toRef();
|
||||
const index_ptr = main_block.addTy(l, .alloc, .ptr_usize).toRef();
|
||||
|
||||
_ = main_block.addBinOp(l, .store, result_ptr, .fromValue(init_val));
|
||||
|
||||
try l.addScalarizedShuffle(
|
||||
&main_block,
|
||||
shuffle.operand,
|
||||
result_ptr,
|
||||
index_ptr,
|
||||
out_idxs_buf[0..n],
|
||||
in_idxs_buf[0..n],
|
||||
);
|
||||
|
||||
const result_val = main_block.addTyOp(l, .load, shuffle.result_ty, result_ptr).toRef();
|
||||
main_block.addBr(l, orig_inst, result_val);
|
||||
|
||||
return .{ .ty_pl = .{
|
||||
.ty = .fromType(shuffle.result_ty),
|
||||
.payload = try l.addBlockBody(main_block.body()),
|
||||
} };
|
||||
}
|
||||
fn scalarizeShuffleTwoBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!Air.Inst.Data {
|
||||
const pt = l.pt;
|
||||
const zcu = pt.zcu;
|
||||
const gpa = zcu.gpa;
|
||||
|
||||
const shuffle = l.getTmpAir().unwrapShuffleTwo(zcu, orig_inst);
|
||||
|
||||
// We're going to emit something like this:
|
||||
//
|
||||
// var x: @Vector(N, T) = undefined;
|
||||
// for (out_idxs_a, in_idxs_a) |i, j| x[i] = operand_a[j];
|
||||
// for (out_idxs_b, in_idxs_b) |i, j| x[i] = operand_b[j];
|
||||
//
|
||||
// The AIR will look like this:
|
||||
//
|
||||
// %1 = block(@Vector(N, T), {
|
||||
// %2 = alloc(*@Vector(N, T))
|
||||
// %3 = alloc(*usize)
|
||||
// %4 = store(%2, <@Vector(N, T), undefined>)
|
||||
// %5 = [addScalarizedShuffle]
|
||||
// %6 = [addScalarizedShuffle]
|
||||
// %7 = load(%2)
|
||||
// %8 = br(%1, %7)
|
||||
// })
|
||||
|
||||
var sfba_state = std.heap.stackFallback(512, gpa);
|
||||
const sfba = sfba_state.get();
|
||||
|
||||
const out_idxs_buf = try sfba.alloc(InternPool.Index, shuffle.mask.len);
|
||||
defer sfba.free(out_idxs_buf);
|
||||
|
||||
const in_idxs_buf = try sfba.alloc(InternPool.Index, shuffle.mask.len);
|
||||
defer sfba.free(in_idxs_buf);
|
||||
|
||||
// Iterate `shuffle.mask` before doing anything, because modifying AIR invalidates it.
|
||||
const out_idxs_a, const in_idxs_a, const out_idxs_b, const in_idxs_b = idxs: {
|
||||
var n: usize = 0;
|
||||
for (shuffle.mask, 0..) |mask, out_idx| switch (mask.unwrap()) {
|
||||
.undef, .b_elem => {},
|
||||
.a_elem => |in_idx| {
|
||||
out_idxs_buf[n] = (try pt.intValue(.usize, out_idx)).toIntern();
|
||||
in_idxs_buf[n] = (try pt.intValue(.usize, in_idx)).toIntern();
|
||||
n += 1;
|
||||
},
|
||||
};
|
||||
const a_len = n;
|
||||
for (shuffle.mask, 0..) |mask, out_idx| switch (mask.unwrap()) {
|
||||
.undef, .a_elem => {},
|
||||
.b_elem => |in_idx| {
|
||||
out_idxs_buf[n] = (try pt.intValue(.usize, out_idx)).toIntern();
|
||||
in_idxs_buf[n] = (try pt.intValue(.usize, in_idx)).toIntern();
|
||||
n += 1;
|
||||
},
|
||||
};
|
||||
break :idxs .{
|
||||
out_idxs_buf[0..a_len],
|
||||
in_idxs_buf[0..a_len],
|
||||
out_idxs_buf[a_len..n],
|
||||
in_idxs_buf[a_len..n],
|
||||
};
|
||||
};
|
||||
|
||||
var inst_buf: [7]Air.Inst.Index = undefined;
|
||||
var main_block: Block = .init(&inst_buf);
|
||||
try l.air_instructions.ensureUnusedCapacity(gpa, 33);
|
||||
|
||||
const result_ptr = main_block.addTy(l, .alloc, try pt.singleMutPtrType(shuffle.result_ty)).toRef();
|
||||
const index_ptr = main_block.addTy(l, .alloc, .ptr_usize).toRef();
|
||||
|
||||
_ = main_block.addBinOp(l, .store, result_ptr, .fromValue(try pt.undefValue(shuffle.result_ty)));
|
||||
|
||||
if (out_idxs_a.len == 0) {
|
||||
_ = main_block.stealCapacity(1);
|
||||
} else {
|
||||
try l.addScalarizedShuffle(
|
||||
&main_block,
|
||||
shuffle.operand_a,
|
||||
result_ptr,
|
||||
index_ptr,
|
||||
out_idxs_a,
|
||||
in_idxs_a,
|
||||
);
|
||||
}
|
||||
|
||||
if (out_idxs_b.len == 0) {
|
||||
_ = main_block.stealCapacity(1);
|
||||
} else {
|
||||
try l.addScalarizedShuffle(
|
||||
&main_block,
|
||||
shuffle.operand_b,
|
||||
result_ptr,
|
||||
index_ptr,
|
||||
out_idxs_b,
|
||||
in_idxs_b,
|
||||
);
|
||||
}
|
||||
|
||||
const result_val = main_block.addTyOp(l, .load, shuffle.result_ty, result_ptr).toRef();
|
||||
main_block.addBr(l, orig_inst, result_val);
|
||||
|
||||
return .{ .ty_pl = .{
|
||||
.ty = .fromType(shuffle.result_ty),
|
||||
.payload = try l.addBlockBody(main_block.body()),
|
||||
} };
|
||||
}
|
||||
/// Adds code to `parent_block` which behaves like this loop:
|
||||
///
|
||||
/// for (out_idxs, in_idxs) |i, j| result_vec_ptr[i] = operand_vec[j];
|
||||
///
|
||||
/// The actual AIR adds exactly one instruction to `parent_block` itself and 14 instructions
|
||||
/// overall, and is as follows:
|
||||
///
|
||||
/// %1 = block(void, {
|
||||
/// %2 = store(index_ptr, @zero_usize)
|
||||
/// %3 = loop({
|
||||
/// %4 = load(index_ptr)
|
||||
/// %5 = ptr_elem_val(out_idxs_ptr, %4)
|
||||
/// %6 = ptr_elem_val(in_idxs_ptr, %4)
|
||||
/// %7 = legalize_vec_elem_val(operand_vec, %6)
|
||||
/// %8 = legalize_vec_store_elem(result_vec_ptr, %4, %7)
|
||||
/// %9 = cmp_eq(%4, <usize, out_idxs.len-1>)
|
||||
/// %10 = cond_br(%9, {
|
||||
/// %11 = br(%1, @void_value)
|
||||
/// }, {
|
||||
/// %12 = add(%4, @one_usize)
|
||||
/// %13 = store(index_ptr, %12)
|
||||
/// %14 = repeat(%3)
|
||||
/// })
|
||||
/// })
|
||||
/// })
|
||||
///
|
||||
/// The caller is responsible for reserving space in `l.air_instructions`.
|
||||
fn addScalarizedShuffle(
|
||||
l: *Legalize,
|
||||
parent_block: *Block,
|
||||
operand_vec: Air.Inst.Ref,
|
||||
result_vec_ptr: Air.Inst.Ref,
|
||||
index_ptr: Air.Inst.Ref,
|
||||
out_idxs: []const InternPool.Index,
|
||||
in_idxs: []const InternPool.Index,
|
||||
) Error!void {
|
||||
const pt = l.pt;
|
||||
|
||||
assert(out_idxs.len == in_idxs.len);
|
||||
const n = out_idxs.len;
|
||||
|
||||
const idxs_ty = try pt.arrayType(.{ .len = n, .child = .usize_type });
|
||||
const idxs_ptr_ty = try pt.singleConstPtrType(idxs_ty);
|
||||
const manyptr_usize_ty = try pt.manyConstPtrType(.usize);
|
||||
|
||||
const out_idxs_ptr = try pt.intern(.{ .ptr = .{
|
||||
.ty = manyptr_usize_ty.toIntern(),
|
||||
.base_addr = .{ .uav = .{
|
||||
.val = (try pt.aggregateValue(idxs_ty, out_idxs)).toIntern(),
|
||||
.orig_ty = idxs_ptr_ty.toIntern(),
|
||||
} },
|
||||
.byte_offset = 0,
|
||||
} });
|
||||
const in_idxs_ptr = try pt.intern(.{ .ptr = .{
|
||||
.ty = manyptr_usize_ty.toIntern(),
|
||||
.base_addr = .{ .uav = .{
|
||||
.val = (try pt.aggregateValue(idxs_ty, in_idxs)).toIntern(),
|
||||
.orig_ty = idxs_ptr_ty.toIntern(),
|
||||
} },
|
||||
.byte_offset = 0,
|
||||
} });
|
||||
|
||||
const main_block_inst = parent_block.add(l, .{
|
||||
.tag = .block,
|
||||
.data = .{ .ty_pl = .{
|
||||
.ty = .void_type,
|
||||
.payload = undefined,
|
||||
} },
|
||||
});
|
||||
|
||||
var inst_buf: [13]Air.Inst.Index = undefined;
|
||||
var main_block: Block = .init(&inst_buf);
|
||||
|
||||
_ = main_block.addBinOp(l, .store, index_ptr, .zero_usize);
|
||||
|
||||
var loop: Loop = .init(l, &main_block);
|
||||
loop.block = .init(main_block.stealRemainingCapacity());
|
||||
|
||||
const index_val = loop.block.addTyOp(l, .load, .usize, index_ptr).toRef();
|
||||
const in_idx_val = loop.block.addBinOp(l, .ptr_elem_val, .fromIntern(in_idxs_ptr), index_val).toRef();
|
||||
const out_idx_val = loop.block.addBinOp(l, .ptr_elem_val, .fromIntern(out_idxs_ptr), index_val).toRef();
|
||||
|
||||
const elem_val = loop.block.addBinOp(l, .legalize_vec_elem_val, operand_vec, in_idx_val).toRef();
|
||||
_ = loop.block.add(l, .{
|
||||
.tag = .legalize_vec_store_elem,
|
||||
.data = .{ .pl_op = .{
|
||||
.operand = result_vec_ptr,
|
||||
.payload = try l.addExtra(Air.Bin, .{
|
||||
.lhs = out_idx_val,
|
||||
.rhs = elem_val,
|
||||
}),
|
||||
} },
|
||||
});
|
||||
|
||||
const is_end_val = loop.block.addBinOp(l, .cmp_eq, index_val, .fromValue(try pt.intValue(.usize, n - 1))).toRef();
|
||||
var condbr: CondBr = .init(l, is_end_val, &loop.block, .{});
|
||||
condbr.then_block = .init(loop.block.stealRemainingCapacity());
|
||||
condbr.then_block.addBr(l, main_block_inst, .void_value);
|
||||
|
||||
condbr.else_block = .init(condbr.then_block.stealRemainingCapacity());
|
||||
const new_index_val = condbr.else_block.addBinOp(l, .add, index_val, .one_usize).toRef();
|
||||
_ = condbr.else_block.addBinOp(l, .store, index_ptr, new_index_val);
|
||||
_ = condbr.else_block.add(l, .{
|
||||
.tag = .repeat,
|
||||
.data = .{ .repeat = .{ .loop_inst = loop.inst } },
|
||||
});
|
||||
|
||||
try condbr.finish(l);
|
||||
try loop.finish(l);
|
||||
|
||||
const inst_data = l.air_instructions.items(.data);
|
||||
inst_data[@intFromEnum(main_block_inst)].ty_pl.payload = try l.addBlockBody(main_block.body());
|
||||
}
|
||||
fn scalarizeBitcastBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!?Air.Inst.Data {
|
||||
const pt = l.pt;
|
||||
const zcu = pt.zcu;
|
||||
|
||||
const ty_op = l.air_instructions.items(.data)[@intFromEnum(orig_inst)].ty_op;
|
||||
|
||||
const dest_ty = ty_op.ty.toType();
|
||||
@@ -920,72 +1256,204 @@ fn scalarizeBitcastBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!?
|
||||
const uint_ty = try pt.intType(.unsigned, num_bits);
|
||||
const shift_ty = try pt.intType(.unsigned, std.math.log2_int_ceil(u16, num_bits));
|
||||
|
||||
const inst_buf = try sfba.alloc(Air.Inst.Index, len: {
|
||||
const operand_to_uint_len: u64 = if (operand_legal) 1 else (operand_ty.arrayLen(zcu) * 5);
|
||||
const uint_to_dest_len: u64 = if (dest_legal) 1 else (dest_ty.arrayLen(zcu) * 3 + 1);
|
||||
break :len @intCast(operand_to_uint_len + uint_to_dest_len + 1);
|
||||
});
|
||||
defer sfba.free(inst_buf);
|
||||
var main_block: Block = .init(inst_buf);
|
||||
try l.air_instructions.ensureUnusedCapacity(gpa, inst_buf.len);
|
||||
var inst_buf: [39]Air.Inst.Index = undefined;
|
||||
var main_block: Block = .init(&inst_buf);
|
||||
try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len);
|
||||
|
||||
// First, convert `operand_ty` to `uint_ty` (`uN`).
|
||||
|
||||
const uint_val: Air.Inst.Ref = uint_val: {
|
||||
if (operand_legal) break :uint_val main_block.addBitCast(l, uint_ty, ty_op.operand);
|
||||
|
||||
const bits_per_elem: u16 = @intCast(operand_ty.childType(zcu).bitSize(zcu));
|
||||
const bits_per_elem_ref: Air.Inst.Ref = .fromValue(try pt.intValue(shift_ty, bits_per_elem));
|
||||
const elem_uint_ty = try pt.intType(.unsigned, bits_per_elem);
|
||||
|
||||
var cur_uint: Air.Inst.Ref = .fromValue(try pt.intValue(uint_ty, 0));
|
||||
var elem_idx = operand_ty.arrayLen(zcu);
|
||||
while (elem_idx > 0) {
|
||||
elem_idx -= 1;
|
||||
const elem_idx_ref: Air.Inst.Ref = .fromValue(try pt.intValue(.usize, elem_idx));
|
||||
const orig_elem = main_block.addBinOp(l, .array_elem_val, ty_op.operand, elem_idx_ref).toRef();
|
||||
const elem_as_uint = main_block.addBitCast(l, elem_uint_ty, orig_elem);
|
||||
const elem_extended = main_block.addTyOp(l, .intcast, uint_ty, elem_as_uint).toRef();
|
||||
cur_uint = main_block.addBinOp(l, .shl_exact, cur_uint, bits_per_elem_ref).toRef();
|
||||
cur_uint = main_block.addBinOp(l, .bit_or, cur_uint, elem_extended).toRef();
|
||||
if (operand_legal) {
|
||||
_ = main_block.stealCapacity(19);
|
||||
break :uint_val main_block.addBitCast(l, uint_ty, ty_op.operand);
|
||||
}
|
||||
break :uint_val cur_uint;
|
||||
|
||||
// %1 = block({
|
||||
// %2 = alloc(*usize)
|
||||
// %3 = alloc(*uN)
|
||||
// %4 = store(%2, <usize, operand_len>)
|
||||
// %5 = store(%3, <uN, 0>)
|
||||
// %6 = loop({
|
||||
// %7 = load(%2)
|
||||
// %8 = array_elem_val(orig_operand, %7)
|
||||
// %9 = bitcast(uE, %8)
|
||||
// %10 = intcast(uN, %9)
|
||||
// %11 = load(%3)
|
||||
// %12 = shl_exact(%11, <uS, E>)
|
||||
// %13 = bit_or(%12, %10)
|
||||
// %14 = cmp_eq(%4, @zero_usize)
|
||||
// %15 = cond_br(%14, {
|
||||
// %16 = br(%1, %13)
|
||||
// }, {
|
||||
// %17 = store(%3, %13)
|
||||
// %18 = sub(%7, @one_usize)
|
||||
// %19 = store(%2, %18)
|
||||
// %20 = repeat(%6)
|
||||
// })
|
||||
// })
|
||||
// })
|
||||
|
||||
const elem_bits = operand_ty.childType(zcu).bitSize(zcu);
|
||||
const elem_bits_val = try pt.intValue(shift_ty, elem_bits);
|
||||
const elem_uint_ty = try pt.intType(.unsigned, @intCast(elem_bits));
|
||||
|
||||
const uint_block_inst = main_block.add(l, .{
|
||||
.tag = .block,
|
||||
.data = .{ .ty_pl = .{
|
||||
.ty = .fromType(uint_ty),
|
||||
.payload = undefined,
|
||||
} },
|
||||
});
|
||||
var uint_block: Block = .init(main_block.stealCapacity(19));
|
||||
|
||||
const index_ptr = uint_block.addTy(l, .alloc, .ptr_usize).toRef();
|
||||
const result_ptr = uint_block.addTy(l, .alloc, try pt.singleMutPtrType(uint_ty)).toRef();
|
||||
_ = uint_block.addBinOp(
|
||||
l,
|
||||
.store,
|
||||
index_ptr,
|
||||
.fromValue(try pt.intValue(.usize, operand_ty.arrayLen(zcu))),
|
||||
);
|
||||
_ = uint_block.addBinOp(l, .store, result_ptr, .fromValue(try pt.intValue(uint_ty, 0)));
|
||||
|
||||
var loop: Loop = .init(l, &uint_block);
|
||||
loop.block = .init(uint_block.stealRemainingCapacity());
|
||||
|
||||
const index_val = loop.block.addTyOp(l, .load, .usize, index_ptr).toRef();
|
||||
const raw_elem = loop.block.addBinOp(
|
||||
l,
|
||||
if (operand_ty.zigTypeTag(zcu) == .vector) .legalize_vec_elem_val else .array_elem_val,
|
||||
ty_op.operand,
|
||||
index_val,
|
||||
).toRef();
|
||||
const elem_uint = loop.block.addBitCast(l, elem_uint_ty, raw_elem);
|
||||
const elem_extended = loop.block.addTyOp(l, .intcast, uint_ty, elem_uint).toRef();
|
||||
const old_result = loop.block.addTyOp(l, .load, uint_ty, result_ptr).toRef();
|
||||
const shifted_result = loop.block.addBinOp(l, .shl_exact, old_result, .fromValue(elem_bits_val)).toRef();
|
||||
const new_result = loop.block.addBinOp(l, .bit_or, shifted_result, elem_extended).toRef();
|
||||
|
||||
const is_end_val = loop.block.addBinOp(l, .cmp_eq, index_val, .zero_usize).toRef();
|
||||
var condbr: CondBr = .init(l, is_end_val, &loop.block, .{});
|
||||
|
||||
condbr.then_block = .init(loop.block.stealRemainingCapacity());
|
||||
condbr.then_block.addBr(l, uint_block_inst, new_result);
|
||||
|
||||
condbr.else_block = .init(condbr.then_block.stealRemainingCapacity());
|
||||
_ = condbr.else_block.addBinOp(l, .store, result_ptr, new_result);
|
||||
const new_index_val = condbr.else_block.addBinOp(l, .sub, index_val, .one_usize).toRef();
|
||||
_ = condbr.else_block.addBinOp(l, .store, index_ptr, new_index_val);
|
||||
_ = condbr.else_block.add(l, .{
|
||||
.tag = .repeat,
|
||||
.data = .{ .repeat = .{ .loop_inst = loop.inst } },
|
||||
});
|
||||
|
||||
try condbr.finish(l);
|
||||
try loop.finish(l);
|
||||
|
||||
const inst_data = l.air_instructions.items(.data);
|
||||
inst_data[@intFromEnum(uint_block_inst)].ty_pl.payload = try l.addBlockBody(uint_block.body());
|
||||
|
||||
break :uint_val uint_block_inst.toRef();
|
||||
};
|
||||
|
||||
// Now convert `uint_ty` (`uN`) to `dest_ty`.
|
||||
|
||||
const result: Air.Inst.Ref = result: {
|
||||
if (dest_legal) break :result main_block.addBitCast(l, dest_ty, uint_val);
|
||||
if (dest_legal) {
|
||||
_ = main_block.stealCapacity(17);
|
||||
const result = main_block.addBitCast(l, dest_ty, uint_val);
|
||||
main_block.addBr(l, orig_inst, result);
|
||||
} else {
|
||||
// %1 = alloc(*usize)
|
||||
// %2 = alloc(*@Vector(N, Result))
|
||||
// %3 = store(%1, @zero_usize)
|
||||
// %4 = loop({
|
||||
// %5 = load(%1)
|
||||
// %6 = mul(%5, <usize, E>)
|
||||
// %7 = intcast(uS, %6)
|
||||
// %8 = shr(uint_val, %7)
|
||||
// %9 = trunc(uE, %8)
|
||||
// %10 = bitcast(Result, %9)
|
||||
// %11 = legalize_vec_store_elem(%2, %5, %10)
|
||||
// %12 = cmp_eq(%5, <usize, vec_len>)
|
||||
// %13 = cond_br(%12, {
|
||||
// %14 = load(%2)
|
||||
// %15 = br(%0, %14)
|
||||
// }, {
|
||||
// %16 = add(%5, @one_usize)
|
||||
// %17 = store(%1, %16)
|
||||
// %18 = repeat(%4)
|
||||
// })
|
||||
// })
|
||||
//
|
||||
// The result might be an array, in which case `legalize_vec_store_elem`
|
||||
// becomes `ptr_elem_ptr` followed by `store`.
|
||||
|
||||
const elem_ty = dest_ty.childType(zcu);
|
||||
const bits_per_elem: u16 = @intCast(elem_ty.bitSize(zcu));
|
||||
const bits_per_elem_ref: Air.Inst.Ref = .fromValue(try pt.intValue(shift_ty, bits_per_elem));
|
||||
const elem_uint_ty = try pt.intType(.unsigned, bits_per_elem);
|
||||
const elem_bits = elem_ty.bitSize(zcu);
|
||||
const elem_uint_ty = try pt.intType(.unsigned, @intCast(elem_bits));
|
||||
|
||||
const elem_buf = try sfba.alloc(Air.Inst.Ref, dest_ty.arrayLen(zcu));
|
||||
defer sfba.free(elem_buf);
|
||||
const index_ptr = main_block.addTy(l, .alloc, .ptr_usize).toRef();
|
||||
const result_ptr = main_block.addTy(l, .alloc, try pt.singleMutPtrType(dest_ty)).toRef();
|
||||
_ = main_block.addBinOp(l, .store, index_ptr, .zero_usize);
|
||||
|
||||
var cur_uint = uint_val;
|
||||
for (elem_buf) |*elem| {
|
||||
const elem_as_uint = main_block.addTyOp(l, .trunc, elem_uint_ty, cur_uint).toRef();
|
||||
elem.* = main_block.addBitCast(l, elem_ty, elem_as_uint);
|
||||
cur_uint = main_block.addBinOp(l, .shr, cur_uint, bits_per_elem_ref).toRef();
|
||||
var loop: Loop = .init(l, &main_block);
|
||||
loop.block = .init(main_block.stealRemainingCapacity());
|
||||
|
||||
const index_val = loop.block.addTyOp(l, .load, .usize, index_ptr).toRef();
|
||||
const bit_offset = loop.block.addBinOp(l, .mul, index_val, .fromValue(try pt.intValue(.usize, elem_bits))).toRef();
|
||||
const casted_bit_offset = loop.block.addTyOp(l, .intcast, shift_ty, bit_offset).toRef();
|
||||
const shifted_uint = loop.block.addBinOp(l, .shr, index_val, casted_bit_offset).toRef();
|
||||
const elem_uint = loop.block.addTyOp(l, .trunc, elem_uint_ty, shifted_uint).toRef();
|
||||
const elem_val = loop.block.addBitCast(l, elem_ty, elem_uint);
|
||||
switch (dest_ty.zigTypeTag(zcu)) {
|
||||
.array => {
|
||||
const elem_ptr = loop.block.add(l, .{
|
||||
.tag = .ptr_elem_ptr,
|
||||
.data = .{ .ty_pl = .{
|
||||
.ty = .fromType(try pt.singleMutPtrType(elem_ty)),
|
||||
.payload = try l.addExtra(Air.Bin, .{
|
||||
.lhs = result_ptr,
|
||||
.rhs = index_val,
|
||||
}),
|
||||
} },
|
||||
}).toRef();
|
||||
_ = loop.block.addBinOp(l, .store, elem_ptr, elem_val);
|
||||
},
|
||||
.vector => {
|
||||
_ = loop.block.add(l, .{
|
||||
.tag = .legalize_vec_store_elem,
|
||||
.data = .{ .pl_op = .{
|
||||
.operand = result_ptr,
|
||||
.payload = try l.addExtra(Air.Bin, .{
|
||||
.lhs = index_val,
|
||||
.rhs = elem_val,
|
||||
}),
|
||||
} },
|
||||
});
|
||||
_ = loop.block.stealCapacity(1);
|
||||
},
|
||||
else => unreachable,
|
||||
}
|
||||
|
||||
break :result main_block.add(l, .{
|
||||
.tag = .aggregate_init,
|
||||
.data = .{ .ty_pl = .{
|
||||
.ty = .fromType(dest_ty),
|
||||
.payload = payload: {
|
||||
const idx = l.air_extra.items.len;
|
||||
try l.air_extra.appendSlice(gpa, @ptrCast(elem_buf));
|
||||
break :payload @intCast(idx);
|
||||
},
|
||||
} },
|
||||
}).toRef();
|
||||
};
|
||||
const is_end_val = loop.block.addBinOp(l, .cmp_eq, index_val, .fromValue(try pt.intValue(.usize, dest_ty.arrayLen(zcu) - 1))).toRef();
|
||||
|
||||
main_block.addBr(l, orig_inst, result);
|
||||
var condbr: CondBr = .init(l, is_end_val, &loop.block, .{});
|
||||
|
||||
condbr.then_block = .init(loop.block.stealRemainingCapacity());
|
||||
const result_val = condbr.then_block.addTyOp(l, .load, dest_ty, result_ptr).toRef();
|
||||
condbr.then_block.addBr(l, orig_inst, result_val);
|
||||
|
||||
condbr.else_block = .init(condbr.then_block.stealRemainingCapacity());
|
||||
const new_index_val = condbr.else_block.addBinOp(l, .add, index_val, .one_usize).toRef();
|
||||
_ = condbr.else_block.addBinOp(l, .store, index_ptr, new_index_val);
|
||||
_ = condbr.else_block.add(l, .{
|
||||
.tag = .repeat,
|
||||
.data = .{ .repeat = .{ .loop_inst = loop.inst } },
|
||||
});
|
||||
|
||||
try condbr.finish(l);
|
||||
try loop.finish(l);
|
||||
}
|
||||
|
||||
return .{ .ty_pl = .{
|
||||
.ty = .fromType(dest_ty),
|
||||
@@ -995,10 +1463,6 @@ fn scalarizeBitcastBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!?
|
||||
fn scalarizeOverflowBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!Air.Inst.Data {
|
||||
const pt = l.pt;
|
||||
const zcu = pt.zcu;
|
||||
const gpa = zcu.gpa;
|
||||
|
||||
var sfba_state = std.heap.stackFallback(512, gpa);
|
||||
const sfba = sfba_state.get();
|
||||
|
||||
const orig = l.air_instructions.get(@intFromEnum(orig_inst));
|
||||
const orig_operands = l.extraData(Air.Bin, orig.data.ty_pl.payload).data;
|
||||
@@ -1015,89 +1479,127 @@ fn scalarizeOverflowBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index) Error!
|
||||
const scalar_int_ty = vec_int_ty.childType(zcu);
|
||||
const scalar_tuple_ty = try pt.overflowArithmeticTupleType(scalar_int_ty);
|
||||
|
||||
// %1 = block(struct { @Vector(N, Int), @Vector(N, u1) }, {
|
||||
// %2 = alloc(*usize)
|
||||
// %3 = alloc(*struct { @Vector(N, Int), @Vector(N, u1) })
|
||||
// %4 = struct_field_ptr_index_0(*@Vector(N, Int), %3)
|
||||
// %5 = struct_field_ptr_index_1(*@Vector(N, u1), %3)
|
||||
// %6 = store(%2, @zero_usize)
|
||||
// %7 = loop({
|
||||
// %8 = load(%2)
|
||||
// %9 = legalize_vec_elem_val(orig_lhs, %8)
|
||||
// %10 = legalize_vec_elem_val(orig_rhs, %8)
|
||||
// %11 = ???_with_overflow(struct { Int, u1 }, %9, %10)
|
||||
// %12 = struct_field_val(%11, 0)
|
||||
// %13 = struct_field_val(%11, 1)
|
||||
// %14 = legalize_vec_store_elem(%4, %8, %12)
|
||||
// %15 = legalize_vec_store_elem(%4, %8, %13)
|
||||
// %16 = cmp_eq(%8, <usize, N-1>)
|
||||
// %17 = cond_br(%16, {
|
||||
// %18 = load(%3)
|
||||
// %19 = br(%1, %18)
|
||||
// }, {
|
||||
// %20 = add(%8, @one_usize)
|
||||
// %21 = store(%2, %20)
|
||||
// %22 = repeat(%7)
|
||||
// })
|
||||
// })
|
||||
// })
|
||||
|
||||
const elems_len = vec_int_ty.vectorLen(zcu);
|
||||
|
||||
const inst_buf = try sfba.alloc(Air.Inst.Index, 5 * elems_len + 4);
|
||||
defer sfba.free(inst_buf);
|
||||
var inst_buf: [21]Air.Inst.Index = undefined;
|
||||
var main_block: Block = .init(&inst_buf);
|
||||
try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len);
|
||||
|
||||
var main_block: Block = .init(inst_buf);
|
||||
try l.air_instructions.ensureUnusedCapacity(gpa, inst_buf.len);
|
||||
const index_ptr = main_block.addTy(l, .alloc, .ptr_usize).toRef();
|
||||
const result_ptr = main_block.addTy(l, .alloc, try pt.singleMutPtrType(vec_tuple_ty)).toRef();
|
||||
const result_int_ptr = main_block.addTyOp(
|
||||
l,
|
||||
.struct_field_ptr_index_0,
|
||||
try pt.singleMutPtrType(vec_int_ty),
|
||||
result_ptr,
|
||||
).toRef();
|
||||
const result_overflow_ptr = main_block.addTyOp(
|
||||
l,
|
||||
.struct_field_ptr_index_1,
|
||||
try pt.singleMutPtrType(vec_overflow_ty),
|
||||
result_ptr,
|
||||
).toRef();
|
||||
|
||||
const int_elem_buf = try sfba.alloc(Air.Inst.Ref, elems_len);
|
||||
defer sfba.free(int_elem_buf);
|
||||
const overflow_elem_buf = try sfba.alloc(Air.Inst.Ref, elems_len);
|
||||
defer sfba.free(overflow_elem_buf);
|
||||
_ = main_block.addBinOp(l, .store, index_ptr, .zero_usize);
|
||||
|
||||
for (int_elem_buf, overflow_elem_buf, 0..) |*int_elem, *overflow_elem, elem_idx| {
|
||||
const elem_idx_ref: Air.Inst.Ref = .fromValue(try pt.intValue(.usize, elem_idx));
|
||||
const lhs = main_block.addBinOp(l, .array_elem_val, orig_operands.lhs, elem_idx_ref).toRef();
|
||||
const rhs = main_block.addBinOp(l, .array_elem_val, orig_operands.rhs, elem_idx_ref).toRef();
|
||||
const elem_result = main_block.add(l, .{
|
||||
.tag = orig.tag,
|
||||
.data = .{ .ty_pl = .{
|
||||
.ty = .fromType(scalar_tuple_ty),
|
||||
.payload = try l.addExtra(Air.Bin, .{ .lhs = lhs, .rhs = rhs }),
|
||||
} },
|
||||
}).toRef();
|
||||
int_elem.* = main_block.add(l, .{
|
||||
.tag = .struct_field_val,
|
||||
.data = .{ .ty_pl = .{
|
||||
.ty = .fromType(scalar_int_ty),
|
||||
.payload = try l.addExtra(Air.StructField, .{
|
||||
.struct_operand = elem_result,
|
||||
.field_index = 0,
|
||||
}),
|
||||
} },
|
||||
}).toRef();
|
||||
overflow_elem.* = main_block.add(l, .{
|
||||
.tag = .struct_field_val,
|
||||
.data = .{ .ty_pl = .{
|
||||
.ty = .bool_type,
|
||||
.payload = try l.addExtra(Air.StructField, .{
|
||||
.struct_operand = elem_result,
|
||||
.field_index = 1,
|
||||
}),
|
||||
} },
|
||||
}).toRef();
|
||||
}
|
||||
var loop: Loop = .init(l, &main_block);
|
||||
loop.block = .init(main_block.stealRemainingCapacity());
|
||||
|
||||
const int_vec = main_block.add(l, .{
|
||||
.tag = .aggregate_init,
|
||||
const index_val = loop.block.addTyOp(l, .load, .usize, index_ptr).toRef();
|
||||
const lhs = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_operands.lhs, index_val).toRef();
|
||||
const rhs = loop.block.addBinOp(l, .legalize_vec_elem_val, orig_operands.rhs, index_val).toRef();
|
||||
const elem_result = loop.block.add(l, .{
|
||||
.tag = orig.tag,
|
||||
.data = .{ .ty_pl = .{
|
||||
.ty = .fromType(vec_int_ty),
|
||||
.payload = payload: {
|
||||
const idx = l.air_extra.items.len;
|
||||
try l.air_extra.appendSlice(gpa, @ptrCast(int_elem_buf));
|
||||
break :payload @intCast(idx);
|
||||
},
|
||||
.ty = .fromType(scalar_tuple_ty),
|
||||
.payload = try l.addExtra(Air.Bin, .{ .lhs = lhs, .rhs = rhs }),
|
||||
} },
|
||||
}).toRef();
|
||||
const overflow_vec = main_block.add(l, .{
|
||||
.tag = .aggregate_init,
|
||||
const int_elem = loop.block.add(l, .{
|
||||
.tag = .struct_field_val,
|
||||
.data = .{ .ty_pl = .{
|
||||
.ty = .fromType(vec_overflow_ty),
|
||||
.payload = payload: {
|
||||
const idx = l.air_extra.items.len;
|
||||
try l.air_extra.appendSlice(gpa, @ptrCast(overflow_elem_buf));
|
||||
break :payload @intCast(idx);
|
||||
},
|
||||
.ty = .fromType(scalar_int_ty),
|
||||
.payload = try l.addExtra(Air.StructField, .{
|
||||
.struct_operand = elem_result,
|
||||
.field_index = 0,
|
||||
}),
|
||||
} },
|
||||
}).toRef();
|
||||
|
||||
const tuple_elems: [2]Air.Inst.Ref = .{ int_vec, overflow_vec };
|
||||
const result = main_block.add(l, .{
|
||||
.tag = .aggregate_init,
|
||||
const overflow_elem = loop.block.add(l, .{
|
||||
.tag = .struct_field_val,
|
||||
.data = .{ .ty_pl = .{
|
||||
.ty = .fromType(vec_tuple_ty),
|
||||
.payload = payload: {
|
||||
const idx = l.air_extra.items.len;
|
||||
try l.air_extra.appendSlice(gpa, @ptrCast(&tuple_elems));
|
||||
break :payload @intCast(idx);
|
||||
},
|
||||
.ty = .u1_type,
|
||||
.payload = try l.addExtra(Air.StructField, .{
|
||||
.struct_operand = elem_result,
|
||||
.field_index = 1,
|
||||
}),
|
||||
} },
|
||||
}).toRef();
|
||||
_ = loop.block.add(l, .{
|
||||
.tag = .legalize_vec_store_elem,
|
||||
.data = .{ .pl_op = .{
|
||||
.operand = result_int_ptr,
|
||||
.payload = try l.addExtra(Air.Bin, .{
|
||||
.lhs = index_val,
|
||||
.rhs = int_elem,
|
||||
}),
|
||||
} },
|
||||
});
|
||||
_ = loop.block.add(l, .{
|
||||
.tag = .legalize_vec_store_elem,
|
||||
.data = .{ .pl_op = .{
|
||||
.operand = result_overflow_ptr,
|
||||
.payload = try l.addExtra(Air.Bin, .{
|
||||
.lhs = index_val,
|
||||
.rhs = overflow_elem,
|
||||
}),
|
||||
} },
|
||||
});
|
||||
|
||||
main_block.addBr(l, orig_inst, result);
|
||||
const is_end_val = loop.block.addBinOp(l, .cmp_eq, index_val, .fromValue(try pt.intValue(.usize, elems_len - 1))).toRef();
|
||||
var condbr: CondBr = .init(l, is_end_val, &loop.block, .{});
|
||||
|
||||
condbr.then_block = .init(loop.block.stealRemainingCapacity());
|
||||
const result_val = condbr.then_block.addTyOp(l, .load, vec_tuple_ty, result_ptr).toRef();
|
||||
condbr.then_block.addBr(l, orig_inst, result_val);
|
||||
|
||||
condbr.else_block = .init(condbr.then_block.stealRemainingCapacity());
|
||||
const new_index_val = condbr.else_block.addBinOp(l, .add, index_val, .one_usize).toRef();
|
||||
_ = condbr.else_block.addBinOp(l, .store, index_ptr, new_index_val);
|
||||
_ = condbr.else_block.add(l, .{
|
||||
.tag = .repeat,
|
||||
.data = .{ .repeat = .{ .loop_inst = loop.inst } },
|
||||
});
|
||||
|
||||
try condbr.finish(l);
|
||||
try loop.finish(l);
|
||||
|
||||
return .{ .ty_pl = .{
|
||||
.ty = .fromType(vec_tuple_ty),
|
||||
@@ -1288,7 +1790,7 @@ fn safeIntFromFloatBlockPayload(l: *Legalize, orig_inst: Air.Inst.Index, optimiz
|
||||
|
||||
// We emit 9 instructions in the worst case.
|
||||
var inst_buf: [9]Air.Inst.Index = undefined;
|
||||
try l.air_instructions.ensureUnusedCapacity(zcu.gpa, inst_buf.len);
|
||||
try l.air_instructions.ensureUnusedCapacity(gpa, inst_buf.len);
|
||||
var main_block: Block = .init(&inst_buf);
|
||||
|
||||
// This check is a bit annoying because of floating-point rounding and the fact that this
|
||||
@@ -1771,6 +2273,9 @@ const Block = struct {
|
||||
.data = .{ .br = .{ .block_inst = target, .operand = operand } },
|
||||
});
|
||||
}
|
||||
fn addTy(b: *Block, l: *Legalize, tag: Air.Inst.Tag, ty: Type) Air.Inst.Index {
|
||||
return b.add(l, .{ .tag = tag, .data = .{ .ty = ty } });
|
||||
}
|
||||
fn addBinOp(b: *Block, l: *Legalize, tag: Air.Inst.Tag, lhs: Air.Inst.Ref, rhs: Air.Inst.Ref) Air.Inst.Index {
|
||||
return b.add(l, .{
|
||||
.tag = tag,
|
||||
@@ -1921,6 +2426,31 @@ const Block = struct {
|
||||
}
|
||||
};
|
||||
|
||||
const Loop = struct {
|
||||
inst: Air.Inst.Index,
|
||||
block: Block,
|
||||
|
||||
/// The return value has `block` initialized to `undefined`; it is the caller's reponsibility
|
||||
/// to initialize it.
|
||||
fn init(l: *Legalize, parent_block: *Block) Loop {
|
||||
return .{
|
||||
.inst = parent_block.add(l, .{
|
||||
.tag = .loop,
|
||||
.data = .{ .ty_pl = .{
|
||||
.ty = .noreturn_type,
|
||||
.payload = undefined,
|
||||
} },
|
||||
}),
|
||||
.block = undefined,
|
||||
};
|
||||
}
|
||||
|
||||
fn finish(loop: Loop, l: *Legalize) Error!void {
|
||||
const data = &l.air_instructions.items(.data)[@intFromEnum(loop.inst)];
|
||||
data.ty_pl.payload = try l.addBlockBody(loop.block.body());
|
||||
}
|
||||
};
|
||||
|
||||
const CondBr = struct {
|
||||
inst: Air.Inst.Index,
|
||||
hints: Air.CondBr.BranchHints,
|
||||
|
||||
@@ -458,6 +458,7 @@ fn analyzeInst(
|
||||
.memset_safe,
|
||||
.memcpy,
|
||||
.memmove,
|
||||
.legalize_vec_elem_val,
|
||||
=> {
|
||||
const o = inst_datas[@intFromEnum(inst)].bin_op;
|
||||
return analyzeOperands(a, pass, data, inst, .{ o.lhs, o.rhs, .none });
|
||||
@@ -769,6 +770,12 @@ fn analyzeInst(
|
||||
const pl_op = inst_datas[@intFromEnum(inst)].pl_op;
|
||||
return analyzeOperands(a, pass, data, inst, .{ pl_op.operand, .none, .none });
|
||||
},
|
||||
|
||||
.legalize_vec_store_elem => {
|
||||
const pl_op = inst_datas[@intFromEnum(inst)].pl_op;
|
||||
const bin = a.air.extraData(Air.Bin, pl_op.payload).data;
|
||||
return analyzeOperands(a, pass, data, inst, .{ pl_op.operand, bin.lhs, bin.rhs });
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -272,6 +272,7 @@ fn verifyBody(self: *Verify, body: []const Air.Inst.Index) Error!void {
|
||||
.memset_safe,
|
||||
.memcpy,
|
||||
.memmove,
|
||||
.legalize_vec_elem_val,
|
||||
=> {
|
||||
const bin_op = data[@intFromEnum(inst)].bin_op;
|
||||
try self.verifyInstOperands(inst, .{ bin_op.lhs, bin_op.rhs, .none });
|
||||
@@ -577,6 +578,11 @@ fn verifyBody(self: *Verify, body: []const Air.Inst.Index) Error!void {
|
||||
|
||||
try self.verifyInst(inst);
|
||||
},
|
||||
.legalize_vec_store_elem => {
|
||||
const pl_op = data[@intFromEnum(inst)].pl_op;
|
||||
const bin = self.air.extraData(Air.Bin, pl_op.payload).data;
|
||||
try self.verifyInstOperands(inst, .{ pl_op.operand, bin.lhs, bin.rhs });
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -171,6 +171,7 @@ const Writer = struct {
|
||||
.memmove,
|
||||
.memset,
|
||||
.memset_safe,
|
||||
.legalize_vec_elem_val,
|
||||
=> try w.writeBinOp(s, inst),
|
||||
|
||||
.is_null,
|
||||
@@ -331,6 +332,7 @@ const Writer = struct {
|
||||
.reduce, .reduce_optimized => try w.writeReduce(s, inst),
|
||||
.cmp_vector, .cmp_vector_optimized => try w.writeCmpVector(s, inst),
|
||||
.runtime_nav_ptr => try w.writeRuntimeNavPtr(s, inst),
|
||||
.legalize_vec_store_elem => try w.writeLegalizeVecStoreElem(s, inst),
|
||||
|
||||
.work_item_id,
|
||||
.work_group_size,
|
||||
@@ -508,6 +510,18 @@ const Writer = struct {
|
||||
try w.writeOperand(s, inst, 2, pl_op.operand);
|
||||
}
|
||||
|
||||
fn writeLegalizeVecStoreElem(w: *Writer, s: *std.Io.Writer, inst: Air.Inst.Index) Error!void {
|
||||
const pl_op = w.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
|
||||
const bin = w.air.extraData(Air.Bin, pl_op.payload).data;
|
||||
|
||||
try w.writeOperand(s, inst, 0, pl_op.operand);
|
||||
try s.writeAll(", ");
|
||||
try w.writeOperand(s, inst, 1, bin.lhs);
|
||||
try s.writeAll(", ");
|
||||
try w.writeOperand(s, inst, 2, bin.rhs);
|
||||
try s.writeAll(", ");
|
||||
}
|
||||
|
||||
fn writeShuffleOne(w: *Writer, s: *std.Io.Writer, inst: Air.Inst.Index) Error!void {
|
||||
const unwrapped = w.air.unwrapShuffleOne(w.pt.zcu, inst);
|
||||
try w.writeType(s, unwrapped.result_ty);
|
||||
|
||||
@@ -88,6 +88,7 @@ fn checkBody(air: Air, body: []const Air.Inst.Index, zcu: *Zcu) bool {
|
||||
.atomic_store_monotonic,
|
||||
.atomic_store_release,
|
||||
.atomic_store_seq_cst,
|
||||
.legalize_vec_elem_val,
|
||||
=> {
|
||||
if (!checkRef(data.bin_op.lhs, zcu)) return false;
|
||||
if (!checkRef(data.bin_op.rhs, zcu)) return false;
|
||||
@@ -322,6 +323,7 @@ fn checkBody(air: Air, body: []const Air.Inst.Index, zcu: *Zcu) bool {
|
||||
|
||||
.select,
|
||||
.mul_add,
|
||||
.legalize_vec_store_elem,
|
||||
=> {
|
||||
const bin = air.extraData(Air.Bin, data.pl_op.payload).data;
|
||||
if (!checkRef(data.pl_op.operand, zcu)) return false;
|
||||
|
||||
+14
-9
@@ -15930,16 +15930,21 @@ fn zirOverflowArithmetic(
|
||||
}
|
||||
}
|
||||
// If either of the arguments is one, the result is the other and no overflow occured.
|
||||
const scalar_one = try pt.intValue(dest_ty.scalarType(zcu), 1);
|
||||
const vec_one = try sema.splat(dest_ty, scalar_one);
|
||||
if (maybe_lhs_val) |lhs_val| {
|
||||
if (!lhs_val.isUndef(zcu) and try sema.compareAll(lhs_val, .eq, vec_one, dest_ty)) {
|
||||
break :result .{ .overflow_bit = try sema.splat(overflow_ty, .zero_u1), .inst = rhs };
|
||||
const dest_scalar_ty = dest_ty.scalarType(zcu);
|
||||
const dest_scalar_int = dest_scalar_ty.intInfo(zcu);
|
||||
// We could still be working with i1, where '1' is not a legal value!
|
||||
if (!(dest_scalar_int.bits == 1 and dest_scalar_int.signedness == .signed)) {
|
||||
const scalar_one = try pt.intValue(dest_scalar_ty, 1);
|
||||
const vec_one = try sema.splat(dest_ty, scalar_one);
|
||||
if (maybe_lhs_val) |lhs_val| {
|
||||
if (!lhs_val.isUndef(zcu) and try sema.compareAll(lhs_val, .eq, vec_one, dest_ty)) {
|
||||
break :result .{ .overflow_bit = try sema.splat(overflow_ty, .zero_u1), .inst = rhs };
|
||||
}
|
||||
}
|
||||
}
|
||||
if (maybe_rhs_val) |rhs_val| {
|
||||
if (!rhs_val.isUndef(zcu) and try sema.compareAll(rhs_val, .eq, vec_one, dest_ty)) {
|
||||
break :result .{ .overflow_bit = try sema.splat(overflow_ty, .zero_u1), .inst = lhs };
|
||||
if (maybe_rhs_val) |rhs_val| {
|
||||
if (!rhs_val.isUndef(zcu) and try sema.compareAll(rhs_val, .eq, vec_one, dest_ty)) {
|
||||
break :result .{ .overflow_bit = try sema.splat(overflow_ty, .zero_u1), .inst = lhs };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -134,6 +134,10 @@ pub fn analyze(isel: *Select, air_body: []const Air.Inst.Index) !void {
|
||||
var air_inst_index = air_body[air_body_index];
|
||||
const initial_def_order_len = isel.def_order.count();
|
||||
air_tag: switch (air_tags[@intFromEnum(air_inst_index)]) {
|
||||
// No "scalarize" legalizations are enabled, so these instructions never appear.
|
||||
.legalize_vec_elem_val => unreachable,
|
||||
.legalize_vec_store_elem => unreachable,
|
||||
|
||||
.arg,
|
||||
.ret_addr,
|
||||
.frame_addr,
|
||||
@@ -950,6 +954,11 @@ pub fn body(isel: *Select, air_body: []const Air.Inst.Index) error{ OutOfMemory,
|
||||
};
|
||||
air_tag: switch (air.next().?) {
|
||||
else => |air_tag| return isel.fail("unimplemented {t}", .{air_tag}),
|
||||
|
||||
// No "scalarize" legalizations are enabled, so these instructions never appear.
|
||||
.legalize_vec_elem_val => unreachable,
|
||||
.legalize_vec_store_elem => unreachable,
|
||||
|
||||
.arg => {
|
||||
const arg_vi = isel.live_values.fetchRemove(air.inst_index).?.value;
|
||||
defer arg_vi.deref(isel);
|
||||
|
||||
@@ -3325,6 +3325,10 @@ fn genBodyInner(f: *Function, body: []const Air.Inst.Index) Error!void {
|
||||
// zig fmt: off
|
||||
.inferred_alloc, .inferred_alloc_comptime => unreachable,
|
||||
|
||||
// No "scalarize" legalizations are enabled, so these instructions never appear.
|
||||
.legalize_vec_elem_val => unreachable,
|
||||
.legalize_vec_store_elem => unreachable,
|
||||
|
||||
.arg => try airArg(f, inst),
|
||||
|
||||
.breakpoint => try airBreakpoint(f),
|
||||
|
||||
@@ -4886,6 +4886,11 @@ pub const FuncGen = struct {
|
||||
|
||||
const val: Builder.Value = switch (air_tags[@intFromEnum(inst)]) {
|
||||
// zig fmt: off
|
||||
|
||||
// No "scalarize" legalizations are enabled, so these instructions never appear.
|
||||
.legalize_vec_elem_val => unreachable,
|
||||
.legalize_vec_store_elem => unreachable,
|
||||
|
||||
.add => try self.airAdd(inst, .normal),
|
||||
.add_optimized => try self.airAdd(inst, .fast),
|
||||
.add_wrap => try self.airAddWrap(inst),
|
||||
|
||||
@@ -1391,6 +1391,11 @@ fn genBody(func: *Func, body: []const Air.Inst.Index) InnerError!void {
|
||||
const tag = air_tags[@intFromEnum(inst)];
|
||||
switch (tag) {
|
||||
// zig fmt: off
|
||||
|
||||
// No "scalarize" legalizations are enabled, so these instructions never appear.
|
||||
.legalize_vec_elem_val => unreachable,
|
||||
.legalize_vec_store_elem => unreachable,
|
||||
|
||||
.add,
|
||||
.add_wrap,
|
||||
.sub,
|
||||
|
||||
@@ -479,6 +479,11 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
|
||||
self.reused_operands = @TypeOf(self.reused_operands).initEmpty();
|
||||
switch (air_tags[@intFromEnum(inst)]) {
|
||||
// zig fmt: off
|
||||
|
||||
// No "scalarize" legalizations are enabled, so these instructions never appear.
|
||||
.legalize_vec_elem_val => unreachable,
|
||||
.legalize_vec_store_elem => unreachable,
|
||||
|
||||
.ptr_add => try self.airPtrArithmetic(inst, .ptr_add),
|
||||
.ptr_sub => try self.airPtrArithmetic(inst, .ptr_sub),
|
||||
|
||||
|
||||
@@ -1786,6 +1786,10 @@ fn buildPointerOffset(cg: *CodeGen, ptr_value: WValue, offset: u64, action: enum
|
||||
fn genInst(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
|
||||
const air_tags = cg.air.instructions.items(.tag);
|
||||
return switch (air_tags[@intFromEnum(inst)]) {
|
||||
// No "scalarize" legalizations are enabled, so these instructions never appear.
|
||||
.legalize_vec_elem_val => unreachable,
|
||||
.legalize_vec_store_elem => unreachable,
|
||||
|
||||
.inferred_alloc, .inferred_alloc_comptime => unreachable,
|
||||
|
||||
.add => cg.airBinOp(inst, .add),
|
||||
|
||||
@@ -103926,7 +103926,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
|
||||
try ops[0].toOffset(0, cg);
|
||||
try ops[0].finish(inst, &.{ty_op.operand}, &ops, cg);
|
||||
},
|
||||
.array_elem_val => {
|
||||
.array_elem_val, .legalize_vec_elem_val => {
|
||||
const bin_op = air_datas[@intFromEnum(inst)].bin_op;
|
||||
const array_ty = cg.typeOf(bin_op.lhs);
|
||||
const res_ty = array_ty.elemType2(zcu);
|
||||
@@ -173061,6 +173061,634 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
|
||||
.c_va_copy => try cg.airVaCopy(inst),
|
||||
.c_va_end => try cg.airVaEnd(inst),
|
||||
.c_va_start => try cg.airVaStart(inst),
|
||||
.legalize_vec_store_elem => {
|
||||
const pl_op = air_datas[@intFromEnum(inst)].pl_op;
|
||||
const bin = cg.air.extraData(Air.Bin, pl_op.payload).data;
|
||||
// vector_ptr, index, elem_val
|
||||
var ops = try cg.tempsFromOperands(inst, .{ pl_op.operand, bin.lhs, bin.rhs });
|
||||
cg.select(&.{}, &.{}, &ops, comptime &.{ .{
|
||||
.src_constraints = .{ .{ .ptr_bool_vec = .byte }, .any, .bool },
|
||||
.patterns = &.{
|
||||
.{ .src = .{ .to_gpr, .to_gpr, .{ .imm = 0 } } },
|
||||
},
|
||||
.extra_temps = .{
|
||||
.{ .type = .u8, .kind = .{ .rc = .general_purpose } },
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
},
|
||||
.clobbers = .{ .eflags = true },
|
||||
.each = .{ .once = &.{
|
||||
.{ ._, ._, .movzx, .tmp0d, .lea(.src0b), ._, ._ },
|
||||
.{ ._, ._r, .bt, .tmp0d, .src1d, ._, ._ },
|
||||
.{ ._, ._, .mov, .lea(.src0b), .tmp0b, ._, ._ },
|
||||
} },
|
||||
}, .{
|
||||
.src_constraints = .{ .{ .ptr_bool_vec = .byte }, .any, .bool },
|
||||
.patterns = &.{
|
||||
.{ .src = .{ .to_gpr, .to_gpr, .{ .imm = 1 } } },
|
||||
},
|
||||
.extra_temps = .{
|
||||
.{ .type = .u8, .kind = .{ .rc = .general_purpose } },
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
},
|
||||
.clobbers = .{ .eflags = true },
|
||||
.each = .{ .once = &.{
|
||||
.{ ._, ._, .movzx, .tmp0d, .lea(.src0b), ._, ._ },
|
||||
.{ ._, ._s, .bt, .tmp0d, .src1d, ._, ._ },
|
||||
.{ ._, ._, .mov, .lea(.src0b), .tmp0b, ._, ._ },
|
||||
} },
|
||||
}, .{
|
||||
.required_features = .{ .cmov, null, null, null },
|
||||
.src_constraints = .{ .{ .ptr_bool_vec = .byte }, .any, .bool },
|
||||
.patterns = &.{
|
||||
.{ .src = .{ .to_gpr, .to_gpr, .to_gpr } },
|
||||
},
|
||||
.extra_temps = .{
|
||||
.{ .type = .u8, .kind = .{ .rc = .general_purpose } },
|
||||
.{ .type = .u8, .kind = .{ .rc = .general_purpose } },
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
},
|
||||
.clobbers = .{ .eflags = true },
|
||||
.each = .{ .once = &.{
|
||||
.{ ._, ._, .movzx, .tmp0d, .lea(.src0b), ._, ._ },
|
||||
.{ ._, ._, .mov, .tmp1d, .tmp0d, ._, ._ },
|
||||
.{ ._, ._r, .bt, .tmp1d, .src1d, ._, ._ },
|
||||
.{ ._, ._s, .bt, .tmp0d, .src1d, ._, ._ },
|
||||
.{ ._, ._, .@"test", .src2b, .si(1), ._, ._ },
|
||||
.{ ._, ._z, .cmov, .tmp0d, .tmp1d, ._, ._ },
|
||||
.{ ._, ._, .mov, .lea(.src0b), .tmp0b, ._, ._ },
|
||||
} },
|
||||
}, .{
|
||||
.src_constraints = .{ .{ .ptr_bool_vec = .byte }, .any, .bool },
|
||||
.patterns = &.{
|
||||
.{ .src = .{ .to_gpr, .to_gpr, .to_gpr } },
|
||||
},
|
||||
.extra_temps = .{
|
||||
.{ .type = .u8, .kind = .{ .rc = .general_purpose } },
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
},
|
||||
.clobbers = .{ .eflags = true },
|
||||
.each = .{ .once = &.{
|
||||
.{ ._, ._, .movzx, .tmp0d, .lea(.src0b), ._, ._ },
|
||||
.{ ._, ._, .@"test", .src2b, .si(1), ._, ._ },
|
||||
.{ ._, ._nz, .j, .@"0f", ._, ._, ._ },
|
||||
.{ ._, ._r, .bt, .tmp0d, .src1d, ._, ._ },
|
||||
.{ ._, ._mp, .j, .@"1f", ._, ._, ._ },
|
||||
.{ .@"0:", ._s, .bt, .tmp0d, .src1d, ._, ._ },
|
||||
.{ .@"1:", ._, .mov, .lea(.src0b), .tmp0b, ._, ._ },
|
||||
} },
|
||||
}, .{
|
||||
.src_constraints = .{ .{ .ptr_bool_vec = .word }, .any, .bool },
|
||||
.patterns = &.{
|
||||
.{ .src = .{ .to_gpr, .to_gpr, .{ .imm = 0 } } },
|
||||
},
|
||||
.clobbers = .{ .eflags = true },
|
||||
.each = .{ .once = &.{
|
||||
.{ ._, ._r, .bt, .lea(.src0w), .src1w, ._, ._ },
|
||||
} },
|
||||
}, .{
|
||||
.src_constraints = .{ .{ .ptr_bool_vec = .word }, .any, .bool },
|
||||
.patterns = &.{
|
||||
.{ .src = .{ .to_gpr, .to_gpr, .{ .imm = 1 } } },
|
||||
},
|
||||
.clobbers = .{ .eflags = true },
|
||||
.each = .{ .once = &.{
|
||||
.{ ._, ._s, .bt, .lea(.src0d), .src1d, ._, ._ },
|
||||
} },
|
||||
}, .{
|
||||
.required_features = .{ .cmov, null, null, null },
|
||||
.src_constraints = .{ .{ .ptr_bool_vec = .word }, .any, .bool },
|
||||
.patterns = &.{
|
||||
.{ .src = .{ .to_gpr, .to_gpr, .to_gpr } },
|
||||
},
|
||||
.extra_temps = .{
|
||||
.{ .type = .u16, .kind = .{ .rc = .general_purpose } },
|
||||
.{ .type = .u16, .kind = .{ .rc = .general_purpose } },
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
},
|
||||
.clobbers = .{ .eflags = true },
|
||||
.each = .{ .once = &.{
|
||||
.{ ._, ._, .movzx, .tmp0d, .lea(.src0w), ._, ._ },
|
||||
.{ ._, ._, .mov, .tmp1d, .tmp0d, ._, ._ },
|
||||
.{ ._, ._r, .bt, .tmp1d, .src1d, ._, ._ },
|
||||
.{ ._, ._s, .bt, .tmp0d, .src1d, ._, ._ },
|
||||
.{ ._, ._, .@"test", .src2b, .si(1), ._, ._ },
|
||||
.{ ._, ._z, .cmov, .tmp0d, .tmp1d, ._, ._ },
|
||||
.{ ._, ._, .mov, .lea(.src0w), .tmp0w, ._, ._ },
|
||||
} },
|
||||
}, .{
|
||||
.src_constraints = .{ .{ .ptr_bool_vec = .word }, .any, .bool },
|
||||
.patterns = &.{
|
||||
.{ .src = .{ .to_gpr, .to_gpr, .to_gpr } },
|
||||
},
|
||||
.clobbers = .{ .eflags = true },
|
||||
.each = .{ .once = &.{
|
||||
.{ ._, ._, .@"test", .src2b, .si(1), ._, ._ },
|
||||
.{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
|
||||
.{ ._, ._r, .bt, .lea(.src0w), .src1w, ._, ._ },
|
||||
.{ ._, ._mp, .j, .@"0f", ._, ._, ._ },
|
||||
.{ .@"1:", ._s, .bt, .lea(.src0w), .src1w, ._, ._ },
|
||||
} },
|
||||
}, .{
|
||||
.src_constraints = .{ .ptr_any_bool_vec, .any, .bool },
|
||||
.patterns = &.{
|
||||
.{ .src = .{ .to_gpr, .to_gpr, .{ .imm = 0 } } },
|
||||
},
|
||||
.clobbers = .{ .eflags = true },
|
||||
.each = .{ .once = &.{
|
||||
.{ ._, ._r, .bt, .lea(.src0d), .src1d, ._, ._ },
|
||||
} },
|
||||
}, .{
|
||||
.src_constraints = .{ .ptr_any_bool_vec, .any, .bool },
|
||||
.patterns = &.{
|
||||
.{ .src = .{ .to_gpr, .to_gpr, .{ .imm = 1 } } },
|
||||
},
|
||||
.clobbers = .{ .eflags = true },
|
||||
.each = .{ .once = &.{
|
||||
.{ ._, ._s, .bt, .lea(.src0d), .src1d, ._, ._ },
|
||||
} },
|
||||
}, .{
|
||||
.required_features = .{ .cmov, null, null, null },
|
||||
.src_constraints = .{ .{ .ptr_bool_vec = .dword }, .any, .bool },
|
||||
.patterns = &.{
|
||||
.{ .src = .{ .to_gpr, .to_gpr, .to_gpr } },
|
||||
},
|
||||
.extra_temps = .{
|
||||
.{ .type = .u32, .kind = .{ .rc = .general_purpose } },
|
||||
.{ .type = .u32, .kind = .{ .rc = .general_purpose } },
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
},
|
||||
.clobbers = .{ .eflags = true },
|
||||
.each = .{ .once = &.{
|
||||
.{ ._, ._, .mov, .tmp0d, .lea(.src0d), ._, ._ },
|
||||
.{ ._, ._, .mov, .tmp1d, .tmp0d, ._, ._ },
|
||||
.{ ._, ._r, .bt, .tmp1d, .src1d, ._, ._ },
|
||||
.{ ._, ._s, .bt, .tmp0d, .src1d, ._, ._ },
|
||||
.{ ._, ._, .@"test", .src2b, .si(1), ._, ._ },
|
||||
.{ ._, ._z, .cmov, .tmp0d, .tmp1d, ._, ._ },
|
||||
.{ ._, ._, .mov, .lea(.src0d), .tmp0d, ._, ._ },
|
||||
} },
|
||||
}, .{
|
||||
.required_features = .{ .@"64bit", .cmov, null, null },
|
||||
.src_constraints = .{ .{ .ptr_bool_vec = .qword }, .any, .bool },
|
||||
.patterns = &.{
|
||||
.{ .src = .{ .to_gpr, .to_gpr, .to_gpr } },
|
||||
},
|
||||
.extra_temps = .{
|
||||
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
|
||||
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
},
|
||||
.clobbers = .{ .eflags = true },
|
||||
.each = .{ .once = &.{
|
||||
.{ ._, ._, .mov, .tmp0q, .lea(.src0q), ._, ._ },
|
||||
.{ ._, ._, .mov, .tmp1q, .tmp0q, ._, ._ },
|
||||
.{ ._, ._r, .bt, .tmp1q, .src1q, ._, ._ },
|
||||
.{ ._, ._s, .bt, .tmp0q, .src1q, ._, ._ },
|
||||
.{ ._, ._, .@"test", .src2b, .si(1), ._, ._ },
|
||||
.{ ._, ._z, .cmov, .tmp0q, .tmp1q, ._, ._ },
|
||||
.{ ._, ._, .mov, .lea(.src0q), .tmp0q, ._, ._ },
|
||||
} },
|
||||
}, .{
|
||||
.required_features = .{ .cmov, null, null, null },
|
||||
.src_constraints = .{ .ptr_any_bool_vec, .any, .bool },
|
||||
.patterns = &.{
|
||||
.{ .src = .{ .to_gpr, .to_gpr, .to_gpr } },
|
||||
},
|
||||
.extra_temps = .{
|
||||
.{ .type = .u32, .kind = .{ .rc = .general_purpose } },
|
||||
.{ .type = .u32, .kind = .{ .rc = .general_purpose } },
|
||||
.{ .type = .u32, .kind = .{ .rc = .general_purpose } },
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
},
|
||||
.clobbers = .{ .eflags = true },
|
||||
.each = .{ .once = &.{
|
||||
.{ ._, ._, .mov, .tmp0d, .src1d, ._, ._ },
|
||||
.{ ._, ._r, .sh, .tmp0d, .ui(5), ._, ._ },
|
||||
.{ ._, ._, .mov, .tmp1d, .leasi(.src0d, .@"4", .tmp0), ._, ._ },
|
||||
.{ ._, ._, .mov, .tmp2d, .tmp1d, ._, ._ },
|
||||
.{ ._, ._r, .bt, .tmp2d, .src1d, ._, ._ },
|
||||
.{ ._, ._s, .bt, .tmp1d, .src1d, ._, ._ },
|
||||
.{ ._, ._, .@"test", .src2b, .si(1), ._, ._ },
|
||||
.{ ._, ._z, .cmov, .tmp1d, .tmp2d, ._, ._ },
|
||||
.{ ._, ._, .mov, .leasi(.src0d, .@"4", .tmp0), .tmp1d, ._, ._ },
|
||||
} },
|
||||
}, .{
|
||||
.src_constraints = .{ .ptr_any_bool_vec, .any, .bool },
|
||||
.patterns = &.{
|
||||
.{ .src = .{ .to_gpr, .to_gpr, .to_gpr } },
|
||||
},
|
||||
.clobbers = .{ .eflags = true },
|
||||
.each = .{ .once = &.{
|
||||
.{ ._, ._, .@"test", .src2b, .si(1), ._, ._ },
|
||||
.{ ._, ._nz, .j, .@"1f", ._, ._, ._ },
|
||||
.{ ._, ._r, .bt, .lea(.src0d), .src1d, ._, ._ },
|
||||
.{ ._, ._mp, .j, .@"0f", ._, ._, ._ },
|
||||
.{ .@"1:", ._s, .bt, .lea(.src0d), .src1d, ._, ._ },
|
||||
} },
|
||||
}, .{
|
||||
.src_constraints = .{ .any, .any, .{ .int = .byte } },
|
||||
.patterns = &.{
|
||||
.{ .src = .{ .to_gpr, .simm32, .imm8 } },
|
||||
.{ .src = .{ .to_gpr, .simm32, .to_gpr } },
|
||||
},
|
||||
.each = .{ .once = &.{
|
||||
.{ ._, ._, .mov, .leaa(.src0b, .add_src0_elem_size_mul_src1), .src2b, ._, ._ },
|
||||
} },
|
||||
}, .{
|
||||
.src_constraints = .{ .any, .any, .{ .int = .byte } },
|
||||
.patterns = &.{
|
||||
.{ .src = .{ .to_gpr, .to_gpr, .imm8 } },
|
||||
.{ .src = .{ .to_gpr, .to_gpr, .to_gpr } },
|
||||
},
|
||||
.each = .{ .once = &.{
|
||||
.{ ._, ._, .mov, .leai(.src0b, .src1), .src2b, ._, ._ },
|
||||
} },
|
||||
}, .{
|
||||
.src_constraints = .{ .any, .any, .{ .int = .word } },
|
||||
.patterns = &.{
|
||||
.{ .src = .{ .to_gpr, .simm32, .imm16 } },
|
||||
.{ .src = .{ .to_gpr, .simm32, .to_gpr } },
|
||||
},
|
||||
.each = .{ .once = &.{
|
||||
.{ ._, ._, .mov, .leaa(.src0w, .add_src0_elem_size_mul_src1), .src2w, ._, ._ },
|
||||
} },
|
||||
}, .{
|
||||
.src_constraints = .{ .any, .any, .{ .int = .word } },
|
||||
.patterns = &.{
|
||||
.{ .src = .{ .to_gpr, .to_gpr, .imm16 } },
|
||||
.{ .src = .{ .to_gpr, .to_gpr, .to_gpr } },
|
||||
},
|
||||
.each = .{ .once = &.{
|
||||
.{ ._, ._, .mov, .leasi(.src0w, .@"2", .src1), .src2w, ._, ._ },
|
||||
} },
|
||||
}, .{
|
||||
.required_features = .{ .avx, null, null, null },
|
||||
.src_constraints = .{ .any, .any, .{ .float = .word } },
|
||||
.patterns = &.{
|
||||
.{ .src = .{ .to_gpr, .simm32, .to_sse } },
|
||||
},
|
||||
.each = .{ .once = &.{
|
||||
.{ ._, .vp_w, .extr, .leaa(.src0w, .add_src0_elem_size_mul_src1), .src2x, .ui(0), ._ },
|
||||
} },
|
||||
}, .{
|
||||
.required_features = .{ .sse4_1, null, null, null },
|
||||
.src_constraints = .{ .any, .any, .{ .float = .word } },
|
||||
.patterns = &.{
|
||||
.{ .src = .{ .to_gpr, .simm32, .to_sse } },
|
||||
},
|
||||
.each = .{ .once = &.{
|
||||
.{ ._, .p_w, .extr, .leaa(.src0w, .add_src0_elem_size_mul_src1), .src2x, .ui(0), ._ },
|
||||
} },
|
||||
}, .{
|
||||
.required_features = .{ .sse2, null, null, null },
|
||||
.src_constraints = .{ .any, .any, .{ .float = .word } },
|
||||
.patterns = &.{
|
||||
.{ .src = .{ .to_gpr, .simm32, .to_sse } },
|
||||
},
|
||||
.extra_temps = .{
|
||||
.{ .type = .f16, .kind = .{ .rc = .general_purpose } },
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
},
|
||||
.each = .{ .once = &.{
|
||||
.{ ._, .p_w, .extr, .tmp0d, .src2x, .ui(0), ._ },
|
||||
.{ ._, ._, .mov, .leaa(.src0w, .add_src0_elem_size_mul_src1), .tmp0w, ._, ._ },
|
||||
} },
|
||||
}, .{
|
||||
.required_features = .{ .sse, null, null, null },
|
||||
.src_constraints = .{ .any, .any, .{ .float = .word } },
|
||||
.patterns = &.{
|
||||
.{ .src = .{ .to_gpr, .simm32, .to_sse } },
|
||||
},
|
||||
.extra_temps = .{
|
||||
.{ .type = .f32, .kind = .mem },
|
||||
.{ .type = .f16, .kind = .{ .rc = .general_purpose } },
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
},
|
||||
.each = .{ .once = &.{
|
||||
.{ ._, ._ss, .mov, .mem(.tmp1d), .src2x, ._, ._ },
|
||||
.{ ._, ._, .mov, .tmp1d, .mem(.tmp1d), ._, ._ },
|
||||
.{ ._, ._, .mov, .leaa(.src0w, .add_src0_elem_size_mul_src1), .tmp1w, ._, ._ },
|
||||
} },
|
||||
}, .{
|
||||
.required_features = .{ .avx, null, null, null },
|
||||
.src_constraints = .{ .any, .any, .{ .float = .word } },
|
||||
.patterns = &.{
|
||||
.{ .src = .{ .to_gpr, .to_gpr, .to_sse } },
|
||||
},
|
||||
.each = .{ .once = &.{
|
||||
.{ ._, .vp_w, .extr, .leasi(.src0w, .@"2", .src1), .src2x, .ui(0), ._ },
|
||||
} },
|
||||
}, .{
|
||||
.required_features = .{ .sse4_1, null, null, null },
|
||||
.src_constraints = .{ .any, .any, .{ .float = .word } },
|
||||
.patterns = &.{
|
||||
.{ .src = .{ .to_gpr, .to_gpr, .to_sse } },
|
||||
},
|
||||
.each = .{ .once = &.{
|
||||
.{ ._, .p_w, .extr, .leasi(.src0w, .@"2", .src1), .src2x, .ui(0), ._ },
|
||||
} },
|
||||
}, .{
|
||||
.required_features = .{ .sse2, null, null, null },
|
||||
.src_constraints = .{ .any, .any, .{ .float = .word } },
|
||||
.patterns = &.{
|
||||
.{ .src = .{ .to_gpr, .simm32, .to_sse } },
|
||||
},
|
||||
.extra_temps = .{
|
||||
.{ .type = .f16, .kind = .{ .rc = .general_purpose } },
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
},
|
||||
.each = .{ .once = &.{
|
||||
.{ ._, .p_w, .extr, .tmp0d, .src2x, .ui(0), ._ },
|
||||
.{ ._, ._, .mov, .leasi(.src0w, .@"2", .src1), .tmp0w, ._, ._ },
|
||||
} },
|
||||
}, .{
|
||||
.required_features = .{ .sse, null, null, null },
|
||||
.src_constraints = .{ .any, .any, .{ .float = .word } },
|
||||
.patterns = &.{
|
||||
.{ .src = .{ .to_gpr, .simm32, .to_sse } },
|
||||
},
|
||||
.extra_temps = .{
|
||||
.{ .type = .f32, .kind = .mem },
|
||||
.{ .type = .f16, .kind = .{ .rc = .general_purpose } },
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
.unused,
|
||||
},
|
||||
.each = .{ .once = &.{
|
||||
.{ ._, ._ss, .mov, .mem(.tmp1d), .src2x, ._, ._ },
|
||||
.{ ._, ._, .mov, .tmp1d, .mem(.tmp1d), ._, ._ },
|
||||
.{ ._, ._, .mov, .leasi(.src0w, .@"2", .src1), .tmp1w, ._, ._ },
|
||||
} },
|
||||
}, .{
|
||||
.src_constraints = .{ .any, .any, .{ .int = .dword } },
|
||||
.patterns = &.{
|
||||
.{ .src = .{ .to_gpr, .simm32, .imm32 } },
|
||||
.{ .src = .{ .to_gpr, .simm32, .to_gpr } },
|
||||
},
|
||||
.each = .{ .once = &.{
|
||||
.{ ._, ._, .mov, .leaa(.src0d, .add_src0_elem_size_mul_src1), .src2d, ._, ._ },
|
||||
} },
|
||||
}, .{
|
||||
.src_constraints = .{ .any, .any, .{ .int = .dword } },
|
||||
.patterns = &.{
|
||||
.{ .src = .{ .to_gpr, .to_gpr, .imm32 } },
|
||||
.{ .src = .{ .to_gpr, .to_gpr, .to_gpr } },
|
||||
},
|
||||
.each = .{ .once = &.{
|
||||
.{ ._, ._, .mov, .leasi(.src0d, .@"4", .src1), .src2d, ._, ._ },
|
||||
} },
|
||||
}, .{
|
||||
.required_features = .{ .avx, null, null, null },
|
||||
.src_constraints = .{ .any, .any, .{ .float = .dword } },
|
||||
.patterns = &.{
|
||||
.{ .src = .{ .to_gpr, .simm32, .to_sse } },
|
||||
},
|
||||
.each = .{ .once = &.{
|
||||
.{ ._, .v_ss, .mov, .leaa(.src0d, .add_src0_elem_size_mul_src1), .src2x, ._, ._ },
|
||||
} },
|
||||
}, .{
|
||||
.required_features = .{ .sse, null, null, null },
|
||||
.src_constraints = .{ .any, .any, .{ .float = .dword } },
|
||||
.patterns = &.{
|
||||
.{ .src = .{ .to_gpr, .simm32, .to_sse } },
|
||||
},
|
||||
.each = .{ .once = &.{
|
||||
.{ ._, ._ss, .mov, .leaa(.src0d, .add_src0_elem_size_mul_src1), .src2x, ._, ._ },
|
||||
} },
|
||||
}, .{
|
||||
.required_features = .{ .avx, null, null, null },
|
||||
.src_constraints = .{ .any, .any, .{ .float = .dword } },
|
||||
.patterns = &.{
|
||||
.{ .src = .{ .to_gpr, .to_gpr, .to_sse } },
|
||||
},
|
||||
.each = .{ .once = &.{
|
||||
.{ ._, .v_ss, .mov, .leasi(.src0d, .@"4", .src1), .src2x, ._, ._ },
|
||||
} },
|
||||
}, .{
|
||||
.required_features = .{ .sse, null, null, null },
|
||||
.src_constraints = .{ .any, .any, .{ .float = .dword } },
|
||||
.patterns = &.{
|
||||
.{ .src = .{ .to_gpr, .to_gpr, .to_sse } },
|
||||
},
|
||||
.each = .{ .once = &.{
|
||||
.{ ._, ._ss, .mov, .leasi(.src0d, .@"4", .src1), .src2x, ._, ._ },
|
||||
} },
|
||||
}, .{
|
||||
.required_features = .{ .@"64bit", null, null, null },
|
||||
.src_constraints = .{ .any, .any, .{ .int = .qword } },
|
||||
.patterns = &.{
|
||||
.{ .src = .{ .to_gpr, .simm32, .simm32 } },
|
||||
.{ .src = .{ .to_gpr, .simm32, .to_gpr } },
|
||||
},
|
||||
.each = .{ .once = &.{
|
||||
.{ ._, ._, .mov, .leaa(.src0q, .add_src0_elem_size_mul_src1), .src2q, ._, ._ },
|
||||
} },
|
||||
}, .{
|
||||
.required_features = .{ .@"64bit", null, null, null },
|
||||
.src_constraints = .{ .any, .any, .{ .int = .qword } },
|
||||
.patterns = &.{
|
||||
.{ .src = .{ .to_gpr, .to_gpr, .simm32 } },
|
||||
.{ .src = .{ .to_gpr, .to_gpr, .to_gpr } },
|
||||
},
|
||||
.each = .{ .once = &.{
|
||||
.{ ._, ._, .mov, .leasi(.src0q, .@"8", .src1), .src2q, ._, ._ },
|
||||
} },
|
||||
}, .{
|
||||
.required_features = .{ .avx, null, null, null },
|
||||
.src_constraints = .{ .any, .any, .{ .float = .qword } },
|
||||
.patterns = &.{
|
||||
.{ .src = .{ .to_gpr, .simm32, .to_sse } },
|
||||
},
|
||||
.each = .{ .once = &.{
|
||||
.{ ._, .v_sd, .mov, .leaa(.src0q, .add_src0_elem_size_mul_src1), .src2x, ._, ._ },
|
||||
} },
|
||||
}, .{
|
||||
.required_features = .{ .sse2, null, null, null },
|
||||
.src_constraints = .{ .any, .any, .{ .float = .qword } },
|
||||
.patterns = &.{
|
||||
.{ .src = .{ .to_gpr, .simm32, .to_sse } },
|
||||
},
|
||||
.each = .{ .once = &.{
|
||||
.{ ._, ._sd, .mov, .leaa(.src0q, .add_src0_elem_size_mul_src1), .src2x, ._, ._ },
|
||||
} },
|
||||
}, .{
|
||||
.required_features = .{ .sse, null, null, null },
|
||||
.src_constraints = .{ .any, .any, .{ .float = .qword } },
|
||||
.patterns = &.{
|
||||
.{ .src = .{ .to_gpr, .simm32, .to_sse } },
|
||||
},
|
||||
.each = .{ .once = &.{
|
||||
.{ ._, ._ps, .movl, .leaa(.src0q, .add_src0_elem_size_mul_src1), .src2x, ._, ._ },
|
||||
} },
|
||||
}, .{
|
||||
.required_features = .{ .avx, null, null, null },
|
||||
.src_constraints = .{ .any, .any, .{ .float = .qword } },
|
||||
.patterns = &.{
|
||||
.{ .src = .{ .to_gpr, .to_gpr, .to_sse } },
|
||||
},
|
||||
.each = .{ .once = &.{
|
||||
.{ ._, .v_sd, .mov, .leasi(.src0q, .@"8", .src1), .src2x, ._, ._ },
|
||||
} },
|
||||
}, .{
|
||||
.required_features = .{ .sse2, null, null, null },
|
||||
.src_constraints = .{ .any, .any, .{ .float = .qword } },
|
||||
.patterns = &.{
|
||||
.{ .src = .{ .to_gpr, .to_gpr, .to_sse } },
|
||||
},
|
||||
.each = .{ .once = &.{
|
||||
.{ ._, ._sd, .mov, .leasi(.src0q, .@"8", .src1), .src2x, ._, ._ },
|
||||
} },
|
||||
}, .{
|
||||
.required_features = .{ .sse, null, null, null },
|
||||
.src_constraints = .{ .any, .any, .{ .float = .qword } },
|
||||
.patterns = &.{
|
||||
.{ .src = .{ .to_gpr, .to_gpr, .to_sse } },
|
||||
},
|
||||
.each = .{ .once = &.{
|
||||
.{ ._, ._ps, .movl, .leasi(.src0q, .@"8", .src1), .src2x, ._, ._ },
|
||||
} },
|
||||
} }) catch |err| switch (err) {
|
||||
error.SelectFailed => {
|
||||
const elem_size = cg.typeOf(bin.rhs).abiSize(zcu);
|
||||
while (try ops[0].toRegClass(true, .general_purpose, cg) or
|
||||
try ops[1].toRegClass(true, .general_purpose, cg))
|
||||
{}
|
||||
const base_reg = ops[0].tracking(cg).short.register.to64();
|
||||
const rhs_reg = ops[1].tracking(cg).short.register.to64();
|
||||
if (!std.math.isPowerOfTwo(elem_size)) {
|
||||
try cg.spillEflagsIfOccupied();
|
||||
try cg.asmRegisterRegisterImmediate(
|
||||
.{ .i_, .mul },
|
||||
rhs_reg,
|
||||
rhs_reg,
|
||||
.u(elem_size),
|
||||
);
|
||||
try cg.asmRegisterMemory(.{ ._, .lea }, base_reg, .{
|
||||
.base = .{ .reg = base_reg },
|
||||
.mod = .{ .rm = .{ .index = rhs_reg } },
|
||||
});
|
||||
} else if (elem_size > 8) {
|
||||
try cg.spillEflagsIfOccupied();
|
||||
try cg.asmRegisterImmediate(
|
||||
.{ ._l, .sh },
|
||||
rhs_reg,
|
||||
.u(std.math.log2_int(u64, elem_size)),
|
||||
);
|
||||
try cg.asmRegisterMemory(.{ ._, .lea }, base_reg, .{
|
||||
.base = .{ .reg = base_reg },
|
||||
.mod = .{ .rm = .{ .index = rhs_reg } },
|
||||
});
|
||||
} else try cg.asmRegisterMemory(.{ ._, .lea }, base_reg, .{
|
||||
.base = .{ .reg = base_reg },
|
||||
.mod = .{ .rm = .{
|
||||
.index = rhs_reg,
|
||||
.scale = .fromFactor(@intCast(elem_size)),
|
||||
} },
|
||||
});
|
||||
try ops[0].store(&ops[2], .{}, cg);
|
||||
},
|
||||
else => |e| return e,
|
||||
};
|
||||
for (ops) |op| try op.die(cg);
|
||||
},
|
||||
.work_item_id, .work_group_size, .work_group_id => unreachable,
|
||||
}
|
||||
try cg.resetTemps(@enumFromInt(0));
|
||||
|
||||
Reference in New Issue
Block a user