x86_64: rewrite scalar shifts

This commit is contained in:
Jacob Young
2025-03-12 22:09:46 -04:00
parent aff2be01c9
commit 2361468e23
5 changed files with 1071 additions and 64 deletions
+1000 -39
View File
@@ -2418,7 +2418,7 @@ fn genBodyBlock(self: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
}
fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
@setEvalBranchQuota(13_800);
@setEvalBranchQuota(13_900);
const pt = cg.pt;
const zcu = pt.zcu;
const ip = &zcu.intern_pool;
@@ -2454,9 +2454,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
try cg.inst_tracking.ensureUnusedCapacity(cg.gpa, 1);
switch (air_tags[@intFromEnum(inst)]) {
// zig fmt: off
.shr, .shr_exact => try cg.airShlShrBinOp(inst),
.shl, .shl_exact => try cg.airShlShrBinOp(inst),
.add_sat => try cg.airAddSat(inst),
.sub_sat => try cg.airSubSat(inst),
.mul_sat => try cg.airMulSat(inst),
@@ -28416,6 +28413,947 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
};
try res[0].finish(inst, &.{ bin_op.lhs, bin_op.rhs }, &ops, cg);
},
.shr, .shr_exact => |air_tag| if (use_old) try cg.airShlShrBinOp(inst) else fallback: {
const bin_op = air_datas[@intFromEnum(inst)].bin_op;
if (cg.typeOf(bin_op.lhs).isVector(zcu)) break :fallback try cg.airShlShrBinOp(inst);
var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs });
var res: [1]Temp = undefined;
cg.select(&res, &.{cg.typeOf(bin_op.lhs)}, &ops, comptime &.{ .{
.src_constraints = .{ .{ .signed_int = .byte }, .{ .unsigned_int = .byte }, .any },
.patterns = &.{
.{ .src = .{ .mut_mem, .imm8, .none } },
.{ .src = .{ .to_mut_gpr, .imm8, .none } },
.{ .src = .{ .mut_mem, .{ .to_reg = .cl }, .none } },
.{ .src = .{ .to_mut_gpr, .{ .to_reg = .cl }, .none } },
},
.dst_temps = .{ .{ .ref = .src0 }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._r, .sa, .dst0b, .src1b, ._, ._ },
} },
}, .{
.src_constraints = .{ .{ .unsigned_int = .byte }, .{ .unsigned_int = .byte }, .any },
.patterns = &.{
.{ .src = .{ .mut_mem, .imm8, .none } },
.{ .src = .{ .to_mut_gpr, .imm8, .none } },
.{ .src = .{ .mut_mem, .{ .to_reg = .cl }, .none } },
.{ .src = .{ .to_mut_gpr, .{ .to_reg = .cl }, .none } },
},
.dst_temps = .{ .{ .ref = .src0 }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._r, .sh, .dst0b, .src1b, ._, ._ },
} },
}, .{
.src_constraints = .{ .{ .signed_int = .word }, .{ .exact_unsigned_int = 4 }, .any },
.patterns = &.{
.{ .src = .{ .mut_mem, .imm8, .none } },
.{ .src = .{ .to_mut_gpr, .imm8, .none } },
.{ .src = .{ .mut_mem, .{ .to_reg = .cl }, .none } },
.{ .src = .{ .to_mut_gpr, .{ .to_reg = .cl }, .none } },
},
.dst_temps = .{ .{ .ref = .src0 }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._r, .sa, .dst0w, .src1b, ._, ._ },
} },
}, .{
.src_constraints = .{ .{ .unsigned_int = .word }, .{ .exact_unsigned_int = 4 }, .any },
.patterns = &.{
.{ .src = .{ .mut_mem, .imm8, .none } },
.{ .src = .{ .to_mut_gpr, .imm8, .none } },
.{ .src = .{ .mut_mem, .{ .to_reg = .cl }, .none } },
.{ .src = .{ .to_mut_gpr, .{ .to_reg = .cl }, .none } },
},
.dst_temps = .{ .{ .ref = .src0 }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._r, .sh, .dst0w, .src1b, ._, ._ },
} },
}, .{
.src_constraints = .{ .{ .signed_int = .dword }, .{ .exact_unsigned_int = 5 }, .any },
.patterns = &.{
.{ .src = .{ .mut_mem, .imm8, .none } },
.{ .src = .{ .to_mut_gpr, .imm8, .none } },
},
.dst_temps = .{ .{ .ref = .src0 }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._r, .sa, .dst0d, .src1b, ._, ._ },
} },
}, .{
.src_constraints = .{ .{ .unsigned_int = .dword }, .{ .exact_unsigned_int = 5 }, .any },
.patterns = &.{
.{ .src = .{ .mut_mem, .imm8, .none } },
.{ .src = .{ .to_mut_gpr, .imm8, .none } },
},
.dst_temps = .{ .{ .ref = .src0 }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._r, .sh, .dst0d, .src1b, ._, ._ },
} },
}, .{
.required_features = .{ .bmi2, null, null, null },
.src_constraints = .{ .{ .signed_int = .dword }, .{ .exact_unsigned_int = 5 }, .any },
.patterns = &.{
.{ .src = .{ .mem, .to_gpr, .none } },
.{ .src = .{ .to_gpr, .to_gpr, .none } },
},
.dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .general_purpose } }, .unused },
.each = .{ .once = &.{
.{ ._, ._rx, .sa, .dst0d, .src0d, .src1d, ._ },
} },
}, .{
.required_features = .{ .bmi2, null, null, null },
.src_constraints = .{ .{ .unsigned_int = .dword }, .{ .exact_unsigned_int = 5 }, .any },
.patterns = &.{
.{ .src = .{ .mem, .to_gpr, .none } },
.{ .src = .{ .to_gpr, .to_gpr, .none } },
},
.dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .general_purpose } }, .unused },
.each = .{ .once = &.{
.{ ._, ._rx, .sh, .dst0d, .src0d, .src1d, ._ },
} },
}, .{
.src_constraints = .{ .{ .signed_int = .dword }, .{ .exact_unsigned_int = 5 }, .any },
.patterns = &.{
.{ .src = .{ .mut_mem, .{ .to_reg = .cl }, .none } },
.{ .src = .{ .to_mut_gpr, .{ .to_reg = .cl }, .none } },
},
.dst_temps = .{ .{ .ref = .src0 }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._r, .sa, .dst0d, .src1b, ._, ._ },
} },
}, .{
.src_constraints = .{ .{ .unsigned_int = .dword }, .{ .exact_unsigned_int = 5 }, .any },
.patterns = &.{
.{ .src = .{ .mut_mem, .{ .to_reg = .cl }, .none } },
.{ .src = .{ .to_mut_gpr, .{ .to_reg = .cl }, .none } },
},
.dst_temps = .{ .{ .ref = .src0 }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._r, .sh, .dst0d, .src1b, ._, ._ },
} },
}, .{
.required_features = .{ .@"64bit", null, null, null },
.src_constraints = .{ .{ .signed_int = .qword }, .{ .exact_unsigned_int = 6 }, .any },
.patterns = &.{
.{ .src = .{ .mut_mem, .imm8, .none } },
.{ .src = .{ .to_mut_gpr, .imm8, .none } },
},
.dst_temps = .{ .{ .ref = .src0 }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._r, .sa, .dst0q, .src1b, ._, ._ },
} },
}, .{
.required_features = .{ .@"64bit", null, null, null },
.src_constraints = .{ .{ .unsigned_int = .qword }, .{ .exact_unsigned_int = 6 }, .any },
.patterns = &.{
.{ .src = .{ .mut_mem, .imm8, .none } },
.{ .src = .{ .to_mut_gpr, .imm8, .none } },
},
.dst_temps = .{ .{ .ref = .src0 }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._r, .sh, .dst0q, .src1b, ._, ._ },
} },
}, .{
.required_features = .{ .@"64bit", .bmi2, null, null },
.src_constraints = .{ .{ .signed_int = .qword }, .{ .exact_unsigned_int = 6 }, .any },
.patterns = &.{
.{ .src = .{ .mem, .to_gpr, .none } },
.{ .src = .{ .to_gpr, .to_gpr, .none } },
},
.dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .general_purpose } }, .unused },
.each = .{ .once = &.{
.{ ._, ._rx, .sa, .dst0q, .src0q, .src1q, ._ },
} },
}, .{
.required_features = .{ .@"64bit", .bmi2, null, null },
.src_constraints = .{ .{ .unsigned_int = .qword }, .{ .exact_unsigned_int = 6 }, .any },
.patterns = &.{
.{ .src = .{ .mem, .to_gpr, .none } },
.{ .src = .{ .to_gpr, .to_gpr, .none } },
},
.dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .general_purpose } }, .unused },
.each = .{ .once = &.{
.{ ._, ._rx, .sh, .dst0q, .src0q, .src1q, ._ },
} },
}, .{
.required_features = .{ .@"64bit", null, null, null },
.src_constraints = .{ .{ .signed_int = .qword }, .{ .exact_unsigned_int = 6 }, .any },
.patterns = &.{
.{ .src = .{ .mut_mem, .{ .to_reg = .cl }, .none } },
.{ .src = .{ .to_mut_gpr, .{ .to_reg = .cl }, .none } },
},
.dst_temps = .{ .{ .ref = .src0 }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._r, .sa, .dst0q, .src1b, ._, ._ },
} },
}, .{
.required_features = .{ .@"64bit", null, null, null },
.src_constraints = .{ .{ .unsigned_int = .qword }, .{ .exact_unsigned_int = 6 }, .any },
.patterns = &.{
.{ .src = .{ .mut_mem, .{ .to_reg = .cl }, .none } },
.{ .src = .{ .to_mut_gpr, .{ .to_reg = .cl }, .none } },
},
.dst_temps = .{ .{ .ref = .src0 }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._r, .sh, .dst0q, .src1b, ._, ._ },
} },
}, .{
.required_features = .{ .@"64bit", .slow_incdec, null, null },
.src_constraints = .{
.{ .remainder_signed_int = .{ .of = .qword, .is = .qword } },
.{ .unsigned_int = .byte },
.any,
},
.patterns = &.{
.{ .src = .{ .to_mem, .{ .to_reg = .cl }, .none } },
},
.extra_temps = .{
.{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
.{ .type = .usize, .kind = .{ .rc = .general_purpose } },
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
},
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .movzx, .tmp0d, .src1b, ._, ._ },
.{ ._, ._r, .sh, .tmp0d, .ui(6), ._, ._ },
.{ ._, ._, .lea, .tmp1p, .leasia(.none, .@"8", .tmp0, .sub_src0_size), ._, ._ },
.{ ._, ._, .not, .tmp0p, ._, ._, ._ },
.{ ._, ._, .lea, .tmp2p, .memsia(.dst0, .@"8", .tmp0, .add_size), ._, ._ },
.{ ._, ._, .mov, .tmp3q, .memia(.src0q, .tmp1, .add_size), ._, ._ },
.{ ._, ._mp, .j, .@"1f", ._, ._, ._ },
.{ .@"0:", ._, .mov, .tmp4q, .memia(.src0q, .tmp1, .add_size), ._, ._ },
.{ ._, ._rd, .sh, .tmp3q, .tmp4q, .src1b, ._ },
.{ ._, ._, .mov, .leai(.tmp2q, .tmp1), .tmp3q, ._, ._ },
.{ ._, ._, .mov, .tmp3q, .tmp4q, ._, ._ },
.{ .@"1:", ._, .add, .tmp1p, .si(8), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
.{ ._, ._r, .sa, .tmp3q, .src1b, ._, ._ },
.{ .@"0:", ._, .mov, .memsia(.dst0q, .@"8", .tmp0, .add_size), .tmp3q, ._, ._ },
.{ ._, ._r, .sa, .tmp3q, .ui(63), ._, ._ },
.{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
} },
}, .{
.required_features = .{ .@"64bit", null, null, null },
.src_constraints = .{
.{ .remainder_signed_int = .{ .of = .qword, .is = .qword } },
.{ .unsigned_int = .byte },
.any,
},
.patterns = &.{
.{ .src = .{ .to_mem, .{ .to_reg = .cl }, .none } },
},
.extra_temps = .{
.{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
.{ .type = .usize, .kind = .{ .rc = .general_purpose } },
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
},
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .movzx, .tmp0d, .src1b, ._, ._ },
.{ ._, ._r, .sh, .tmp0d, .ui(6), ._, ._ },
.{ ._, ._, .lea, .tmp1p, .leasia(.none, .@"8", .tmp0, .sub_src0_size), ._, ._ },
.{ ._, ._, .not, .tmp0p, ._, ._, ._ },
.{ ._, ._, .lea, .tmp2p, .memsia(.dst0, .@"8", .tmp0, .add_size), ._, ._ },
.{ ._, ._, .mov, .tmp3q, .memia(.src0q, .tmp1, .add_size), ._, ._ },
.{ ._, ._mp, .j, .@"1f", ._, ._, ._ },
.{ .@"0:", ._, .mov, .tmp4q, .memia(.src0q, .tmp1, .add_size), ._, ._ },
.{ ._, ._rd, .sh, .tmp3q, .tmp4q, .src1b, ._ },
.{ ._, ._, .mov, .leai(.tmp2q, .tmp1), .tmp3q, ._, ._ },
.{ ._, ._, .mov, .tmp3q, .tmp4q, ._, ._ },
.{ .@"1:", ._, .add, .tmp1p, .si(8), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
.{ ._, ._r, .sa, .tmp3q, .src1b, ._, ._ },
.{ .@"0:", ._, .mov, .memsia(.dst0q, .@"8", .tmp0, .add_size), .tmp3q, ._, ._ },
.{ ._, ._r, .sa, .tmp3q, .ui(63), ._, ._ },
.{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
.{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
} },
}, .{
.required_features = .{ .@"64bit", .slow_incdec, null, null },
.src_constraints = .{
.{ .remainder_unsigned_int = .{ .of = .qword, .is = .qword } },
.{ .unsigned_int = .byte },
.any,
},
.patterns = &.{
.{ .src = .{ .to_mem, .{ .to_reg = .cl }, .none } },
},
.extra_temps = .{
.{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
.{ .type = .usize, .kind = .{ .rc = .general_purpose } },
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
},
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .movzx, .tmp0d, .src1b, ._, ._ },
.{ ._, ._r, .sh, .tmp0d, .ui(6), ._, ._ },
.{ ._, ._, .lea, .tmp1p, .leasia(.none, .@"8", .tmp0, .sub_src0_size), ._, ._ },
.{ ._, ._, .not, .tmp0p, ._, ._, ._ },
.{ ._, ._, .lea, .tmp2p, .memsia(.dst0, .@"8", .tmp0, .add_size), ._, ._ },
.{ ._, ._, .mov, .tmp3q, .memia(.src0q, .tmp1, .add_size), ._, ._ },
.{ ._, ._mp, .j, .@"1f", ._, ._, ._ },
.{ .@"0:", ._, .mov, .tmp4q, .memia(.src0q, .tmp1, .add_size), ._, ._ },
.{ ._, ._rd, .sh, .tmp3q, .tmp4q, .src1b, ._ },
.{ ._, ._, .mov, .leai(.tmp2q, .tmp1), .tmp3q, ._, ._ },
.{ ._, ._, .mov, .tmp3q, .tmp4q, ._, ._ },
.{ .@"1:", ._, .add, .tmp1p, .si(8), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
.{ ._, ._r, .sh, .tmp3q, .src1b, ._, ._ },
.{ .@"0:", ._, .mov, .memsia(.dst0q, .@"8", .tmp0, .add_size), .tmp3q, ._, ._ },
.{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ },
.{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
} },
}, .{
.required_features = .{ .@"64bit", null, null, null },
.src_constraints = .{
.{ .remainder_unsigned_int = .{ .of = .qword, .is = .qword } },
.{ .unsigned_int = .byte },
.any,
},
.patterns = &.{
.{ .src = .{ .to_mem, .{ .to_reg = .cl }, .none } },
},
.extra_temps = .{
.{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
.{ .type = .usize, .kind = .{ .rc = .general_purpose } },
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
},
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .movzx, .tmp0d, .src1b, ._, ._ },
.{ ._, ._r, .sh, .tmp0d, .ui(6), ._, ._ },
.{ ._, ._, .lea, .tmp1p, .leasia(.none, .@"8", .tmp0, .sub_src0_size), ._, ._ },
.{ ._, ._, .not, .tmp0p, ._, ._, ._ },
.{ ._, ._, .lea, .tmp2p, .memsia(.dst0, .@"8", .tmp0, .add_size), ._, ._ },
.{ ._, ._, .mov, .tmp3q, .memia(.src0q, .tmp1, .add_size), ._, ._ },
.{ ._, ._mp, .j, .@"1f", ._, ._, ._ },
.{ .@"0:", ._, .mov, .tmp4q, .memia(.src0q, .tmp1, .add_size), ._, ._ },
.{ ._, ._rd, .sh, .tmp3q, .tmp4q, .src1b, ._ },
.{ ._, ._, .mov, .leai(.tmp2q, .tmp1), .tmp3q, ._, ._ },
.{ ._, ._, .mov, .tmp3q, .tmp4q, ._, ._ },
.{ .@"1:", ._, .add, .tmp1p, .si(8), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
.{ ._, ._r, .sh, .tmp3q, .src1b, ._, ._ },
.{ .@"0:", ._, .mov, .memsia(.dst0q, .@"8", .tmp0, .add_size), .tmp3q, ._, ._ },
.{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ },
.{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
.{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
} },
}, .{
.required_features = .{ .@"64bit", .slow_incdec, null, null },
.src_constraints = .{
.{ .remainder_signed_int = .{ .of = .qword, .is = .qword } },
.{ .unsigned_int = .word },
.any,
},
.patterns = &.{
.{ .src = .{ .to_mem, .{ .to_reg = .cx }, .none } },
},
.extra_temps = .{
.{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
.{ .type = .usize, .kind = .{ .rc = .general_purpose } },
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
},
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .movzx, .tmp0d, .src1w, ._, ._ },
.{ ._, ._r, .sh, .tmp0d, .ui(6), ._, ._ },
.{ ._, ._, .lea, .tmp1p, .leasia(.none, .@"8", .tmp0, .sub_src0_size), ._, ._ },
.{ ._, ._, .not, .tmp0p, ._, ._, ._ },
.{ ._, ._, .lea, .tmp2p, .memsia(.dst0, .@"8", .tmp0, .add_size), ._, ._ },
.{ ._, ._, .mov, .tmp3q, .memia(.src0q, .tmp1, .add_size), ._, ._ },
.{ ._, ._mp, .j, .@"1f", ._, ._, ._ },
.{ .@"0:", ._, .mov, .tmp4q, .memia(.src0q, .tmp1, .add_size), ._, ._ },
.{ ._, ._rd, .sh, .tmp3q, .tmp4q, .src1b, ._ },
.{ ._, ._, .mov, .leai(.tmp2q, .tmp1), .tmp3q, ._, ._ },
.{ ._, ._, .mov, .tmp3q, .tmp4q, ._, ._ },
.{ .@"1:", ._, .add, .tmp1p, .si(8), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
.{ ._, ._r, .sa, .tmp3q, .src1b, ._, ._ },
.{ .@"0:", ._, .mov, .memsia(.dst0q, .@"8", .tmp0, .add_size), .tmp3q, ._, ._ },
.{ ._, ._r, .sa, .tmp3q, .ui(63), ._, ._ },
.{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
} },
}, .{
.required_features = .{ .@"64bit", null, null, null },
.src_constraints = .{
.{ .remainder_signed_int = .{ .of = .qword, .is = .qword } },
.{ .unsigned_int = .word },
.any,
},
.patterns = &.{
.{ .src = .{ .to_mem, .{ .to_reg = .cx }, .none } },
},
.extra_temps = .{
.{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
.{ .type = .usize, .kind = .{ .rc = .general_purpose } },
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
},
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .movzx, .tmp0d, .src1w, ._, ._ },
.{ ._, ._r, .sh, .tmp0d, .ui(6), ._, ._ },
.{ ._, ._, .lea, .tmp1p, .leasia(.none, .@"8", .tmp0, .sub_src0_size), ._, ._ },
.{ ._, ._, .not, .tmp0p, ._, ._, ._ },
.{ ._, ._, .lea, .tmp2p, .memsia(.dst0, .@"8", .tmp0, .add_size), ._, ._ },
.{ ._, ._, .mov, .tmp3q, .memia(.src0q, .tmp1, .add_size), ._, ._ },
.{ ._, ._mp, .j, .@"1f", ._, ._, ._ },
.{ .@"0:", ._, .mov, .tmp4q, .memia(.src0q, .tmp1, .add_size), ._, ._ },
.{ ._, ._rd, .sh, .tmp3q, .tmp4q, .src1b, ._ },
.{ ._, ._, .mov, .leai(.tmp2q, .tmp1), .tmp3q, ._, ._ },
.{ ._, ._, .mov, .tmp3q, .tmp4q, ._, ._ },
.{ .@"1:", ._, .add, .tmp1p, .si(8), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
.{ ._, ._r, .sa, .tmp3q, .src1b, ._, ._ },
.{ .@"0:", ._, .mov, .memsia(.dst0q, .@"8", .tmp0, .add_size), .tmp3q, ._, ._ },
.{ ._, ._r, .sa, .tmp3q, .ui(63), ._, ._ },
.{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
.{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
} },
}, .{
.required_features = .{ .@"64bit", .slow_incdec, null, null },
.src_constraints = .{
.{ .remainder_unsigned_int = .{ .of = .qword, .is = .qword } },
.{ .unsigned_int = .word },
.any,
},
.patterns = &.{
.{ .src = .{ .to_mem, .{ .to_reg = .cx }, .none } },
},
.extra_temps = .{
.{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
.{ .type = .usize, .kind = .{ .rc = .general_purpose } },
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
},
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .movzx, .tmp0d, .src1w, ._, ._ },
.{ ._, ._r, .sh, .tmp0d, .ui(6), ._, ._ },
.{ ._, ._, .lea, .tmp1p, .leasia(.none, .@"8", .tmp0, .sub_src0_size), ._, ._ },
.{ ._, ._, .not, .tmp0p, ._, ._, ._ },
.{ ._, ._, .lea, .tmp2p, .memsia(.dst0, .@"8", .tmp0, .add_size), ._, ._ },
.{ ._, ._, .mov, .tmp3q, .memia(.src0q, .tmp1, .add_size), ._, ._ },
.{ ._, ._mp, .j, .@"1f", ._, ._, ._ },
.{ .@"0:", ._, .mov, .tmp4q, .memia(.src0q, .tmp1, .add_size), ._, ._ },
.{ ._, ._rd, .sh, .tmp3q, .tmp4q, .src1b, ._ },
.{ ._, ._, .mov, .leai(.tmp2q, .tmp1), .tmp3q, ._, ._ },
.{ ._, ._, .mov, .tmp3q, .tmp4q, ._, ._ },
.{ .@"1:", ._, .add, .tmp1p, .si(8), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
.{ ._, ._r, .sh, .tmp3q, .src1b, ._, ._ },
.{ .@"0:", ._, .mov, .memsia(.dst0q, .@"8", .tmp0, .add_size), .tmp3q, ._, ._ },
.{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ },
.{ ._, ._, .add, .tmp0p, .si(1), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
} },
}, .{
.required_features = .{ .@"64bit", null, null, null },
.src_constraints = .{
.{ .remainder_unsigned_int = .{ .of = .qword, .is = .qword } },
.{ .unsigned_int = .word },
.any,
},
.patterns = &.{
.{ .src = .{ .to_mem, .{ .to_reg = .cx }, .none } },
},
.extra_temps = .{
.{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .isize, .kind = .{ .rc = .general_purpose } },
.{ .type = .usize, .kind = .{ .rc = .general_purpose } },
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
},
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .movzx, .tmp0d, .src1w, ._, ._ },
.{ ._, ._r, .sh, .tmp0d, .ui(6), ._, ._ },
.{ ._, ._, .lea, .tmp1p, .leasia(.none, .@"8", .tmp0, .sub_src0_size), ._, ._ },
.{ ._, ._, .not, .tmp0p, ._, ._, ._ },
.{ ._, ._, .lea, .tmp2p, .memsia(.dst0, .@"8", .tmp0, .add_size), ._, ._ },
.{ ._, ._, .mov, .tmp3q, .memia(.src0q, .tmp1, .add_size), ._, ._ },
.{ ._, ._mp, .j, .@"1f", ._, ._, ._ },
.{ .@"0:", ._, .mov, .tmp4q, .memia(.src0q, .tmp1, .add_size), ._, ._ },
.{ ._, ._rd, .sh, .tmp3q, .tmp4q, .src1b, ._ },
.{ ._, ._, .mov, .leai(.tmp2q, .tmp1), .tmp3q, ._, ._ },
.{ ._, ._, .mov, .tmp3q, .tmp4q, ._, ._ },
.{ .@"1:", ._, .add, .tmp1p, .si(8), ._, ._ },
.{ ._, ._nc, .j, .@"0b", ._, ._, ._ },
.{ ._, ._r, .sh, .tmp3q, .src1b, ._, ._ },
.{ .@"0:", ._, .mov, .memsia(.dst0q, .@"8", .tmp0, .add_size), .tmp3q, ._, ._ },
.{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ },
.{ ._, ._c, .in, .tmp0p, ._, ._, ._ },
.{ ._, ._nz, .j, .@"0b", ._, ._, ._ },
} },
} }) catch |err| switch (err) {
error.SelectFailed => return cg.fail("failed to select {s} {} {} {} {}", .{
@tagName(air_tag),
cg.typeOf(bin_op.lhs).fmt(pt),
cg.typeOf(bin_op.rhs).fmt(pt),
ops[0].tracking(cg),
ops[1].tracking(cg),
}),
else => |e| return e,
};
try res[0].finish(inst, &.{ bin_op.lhs, bin_op.rhs }, &ops, cg);
},
.shl, .shl_exact => |air_tag| if (use_old) try cg.airShlShrBinOp(inst) else fallback: {
const bin_op = air_datas[@intFromEnum(inst)].bin_op;
if (cg.typeOf(bin_op.lhs).isVector(zcu)) break :fallback try cg.airShlShrBinOp(inst);
var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs });
var res: [1]Temp = undefined;
cg.select(&res, &.{cg.typeOf(bin_op.lhs)}, &ops, comptime &.{ .{
.src_constraints = .{ .{ .signed_int = .byte }, .{ .unsigned_int = .byte }, .any },
.patterns = &.{
.{ .src = .{ .mut_mem, .imm8, .none } },
.{ .src = .{ .to_mut_gpr, .imm8, .none } },
.{ .src = .{ .mut_mem, .{ .to_reg = .cl }, .none } },
.{ .src = .{ .to_mut_gpr, .{ .to_reg = .cl }, .none } },
},
.dst_temps = .{ .{ .ref = .src0 }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._l, .sa, .dst0b, .src1b, ._, ._ },
} },
}, .{
.src_constraints = .{ .{ .unsigned_int = .byte }, .{ .unsigned_int = .byte }, .any },
.patterns = &.{
.{ .src = .{ .mut_mem, .imm8, .none } },
.{ .src = .{ .to_mut_gpr, .imm8, .none } },
.{ .src = .{ .mut_mem, .{ .to_reg = .cl }, .none } },
.{ .src = .{ .to_mut_gpr, .{ .to_reg = .cl }, .none } },
},
.dst_temps = .{ .{ .ref = .src0 }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._l, .sh, .dst0b, .src1b, ._, ._ },
} },
}, .{
.src_constraints = .{ .{ .signed_int = .word }, .{ .exact_unsigned_int = 4 }, .any },
.patterns = &.{
.{ .src = .{ .mut_mem, .imm8, .none } },
.{ .src = .{ .to_mut_gpr, .imm8, .none } },
.{ .src = .{ .mut_mem, .{ .to_reg = .cl }, .none } },
.{ .src = .{ .to_mut_gpr, .{ .to_reg = .cl }, .none } },
},
.dst_temps = .{ .{ .ref = .src0 }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._l, .sa, .dst0w, .src1b, ._, ._ },
} },
}, .{
.src_constraints = .{ .{ .unsigned_int = .word }, .{ .exact_unsigned_int = 4 }, .any },
.patterns = &.{
.{ .src = .{ .mut_mem, .imm8, .none } },
.{ .src = .{ .to_mut_gpr, .imm8, .none } },
.{ .src = .{ .mut_mem, .{ .to_reg = .cl }, .none } },
.{ .src = .{ .to_mut_gpr, .{ .to_reg = .cl }, .none } },
},
.dst_temps = .{ .{ .ref = .src0 }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._l, .sh, .dst0w, .src1b, ._, ._ },
} },
}, .{
.src_constraints = .{ .{ .signed_int = .dword }, .{ .exact_unsigned_int = 5 }, .any },
.patterns = &.{
.{ .src = .{ .mut_mem, .imm8, .none } },
.{ .src = .{ .to_mut_gpr, .imm8, .none } },
},
.dst_temps = .{ .{ .ref = .src0 }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._l, .sa, .dst0d, .src1b, ._, ._ },
} },
}, .{
.src_constraints = .{ .{ .unsigned_int = .dword }, .{ .exact_unsigned_int = 5 }, .any },
.patterns = &.{
.{ .src = .{ .mut_mem, .imm8, .none } },
.{ .src = .{ .to_mut_gpr, .imm8, .none } },
},
.dst_temps = .{ .{ .ref = .src0 }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._l, .sh, .dst0d, .src1b, ._, ._ },
} },
}, .{
.required_features = .{ .bmi2, null, null, null },
.src_constraints = .{ .{ .int = .dword }, .{ .exact_unsigned_int = 5 }, .any },
.patterns = &.{
.{ .src = .{ .mem, .to_gpr, .none } },
.{ .src = .{ .to_gpr, .to_gpr, .none } },
},
.dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .general_purpose } }, .unused },
.each = .{ .once = &.{
.{ ._, ._lx, .sh, .dst0d, .src0d, .src1d, ._ },
} },
}, .{
.src_constraints = .{ .{ .signed_int = .dword }, .{ .exact_unsigned_int = 5 }, .any },
.patterns = &.{
.{ .src = .{ .mut_mem, .{ .to_reg = .cl }, .none } },
.{ .src = .{ .to_mut_gpr, .{ .to_reg = .cl }, .none } },
},
.dst_temps = .{ .{ .ref = .src0 }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._l, .sa, .dst0d, .src1b, ._, ._ },
} },
}, .{
.src_constraints = .{ .{ .unsigned_int = .dword }, .{ .exact_unsigned_int = 5 }, .any },
.patterns = &.{
.{ .src = .{ .mut_mem, .{ .to_reg = .cl }, .none } },
.{ .src = .{ .to_mut_gpr, .{ .to_reg = .cl }, .none } },
},
.dst_temps = .{ .{ .ref = .src0 }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._l, .sh, .dst0d, .src1b, ._, ._ },
} },
}, .{
.required_features = .{ .@"64bit", null, null, null },
.src_constraints = .{ .{ .signed_int = .qword }, .{ .exact_unsigned_int = 6 }, .any },
.patterns = &.{
.{ .src = .{ .mut_mem, .imm8, .none } },
.{ .src = .{ .to_mut_gpr, .imm8, .none } },
},
.dst_temps = .{ .{ .ref = .src0 }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._l, .sa, .dst0q, .src1b, ._, ._ },
} },
}, .{
.required_features = .{ .@"64bit", null, null, null },
.src_constraints = .{ .{ .unsigned_int = .qword }, .{ .exact_unsigned_int = 6 }, .any },
.patterns = &.{
.{ .src = .{ .mut_mem, .imm8, .none } },
.{ .src = .{ .to_mut_gpr, .imm8, .none } },
},
.dst_temps = .{ .{ .ref = .src0 }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._l, .sh, .dst0q, .src1b, ._, ._ },
} },
}, .{
.required_features = .{ .@"64bit", .bmi2, null, null },
.src_constraints = .{ .{ .int = .qword }, .{ .exact_unsigned_int = 6 }, .any },
.patterns = &.{
.{ .src = .{ .mem, .to_gpr, .none } },
.{ .src = .{ .to_gpr, .to_gpr, .none } },
},
.dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .general_purpose } }, .unused },
.each = .{ .once = &.{
.{ ._, ._lx, .sh, .dst0q, .src0q, .src1q, ._ },
} },
}, .{
.required_features = .{ .@"64bit", null, null, null },
.src_constraints = .{ .{ .signed_int = .qword }, .{ .exact_unsigned_int = 6 }, .any },
.patterns = &.{
.{ .src = .{ .mut_mem, .{ .to_reg = .cl }, .none } },
.{ .src = .{ .to_mut_gpr, .{ .to_reg = .cl }, .none } },
},
.dst_temps = .{ .{ .ref = .src0 }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._l, .sa, .dst0q, .src1b, ._, ._ },
} },
}, .{
.required_features = .{ .@"64bit", null, null, null },
.src_constraints = .{ .{ .unsigned_int = .qword }, .{ .exact_unsigned_int = 6 }, .any },
.patterns = &.{
.{ .src = .{ .mut_mem, .{ .to_reg = .cl }, .none } },
.{ .src = .{ .to_mut_gpr, .{ .to_reg = .cl }, .none } },
},
.dst_temps = .{ .{ .ref = .src0 }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._l, .sh, .dst0q, .src1b, ._, ._ },
} },
}, .{
.required_features = .{ .@"64bit", .slow_incdec, null, null },
.src_constraints = .{
.{ .remainder_int = .{ .of = .qword, .is = .qword } },
.{ .unsigned_int = .byte },
.any,
},
.patterns = &.{
.{ .src = .{ .to_mem, .{ .to_reg = .cl }, .none } },
},
.extra_temps = .{
.{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .usize, .kind = .{ .rc = .general_purpose } },
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
},
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .movzx, .tmp0d, .src1b, ._, ._ },
.{ ._, ._, .mov, .tmp1d, .sia(-1, .src0, .add_size_div_8), ._, ._ },
.{ ._, ._r, .sh, .tmp0d, .ui(6), ._, ._ },
.{ ._, ._, .sub, .tmp1d, .tmp0d, ._, ._ },
.{ ._, ._, .lea, .tmp2p, .memsid(.dst0, .@"8", .tmp0, 8), ._, ._ },
.{ ._, ._, .mov, .tmp3q, .memsi(.src0q, .@"8", .tmp1), ._, ._ },
.{ ._, ._mp, .j, .@"1f", ._, ._, ._ },
.{ .@"0:", ._, .mov, .tmp4q, .memsi(.src0q, .@"8", .tmp1), ._, ._ },
.{ ._, ._ld, .sh, .tmp3q, .tmp4q, .src1b, ._ },
.{ ._, ._, .mov, .leasi(.tmp2q, .@"8", .tmp1), .tmp3q, ._, ._ },
.{ ._, ._, .mov, .tmp3q, .tmp4q, ._, ._ },
.{ .@"1:", ._, .sub, .tmp1d, .si(1), ._, ._ },
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
.{ ._, ._l, .sh, .tmp3q, .src1b, ._, ._ },
.{ .@"0:", ._, .mov, .memsi(.dst0q, .@"8", .tmp0), .tmp3q, ._, ._ },
.{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ },
.{ ._, ._, .sub, .tmp0d, .si(1), ._, ._ },
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
.required_features = .{ .@"64bit", null, null, null },
.src_constraints = .{
.{ .remainder_int = .{ .of = .qword, .is = .qword } },
.{ .unsigned_int = .byte },
.any,
},
.patterns = &.{
.{ .src = .{ .to_mem, .{ .to_reg = .cl }, .none } },
},
.extra_temps = .{
.{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .usize, .kind = .{ .rc = .general_purpose } },
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
},
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .movzx, .tmp0d, .src1b, ._, ._ },
.{ ._, ._, .mov, .tmp1d, .sia(-1, .src0, .add_size_div_8), ._, ._ },
.{ ._, ._r, .sh, .tmp0d, .ui(6), ._, ._ },
.{ ._, ._, .sub, .tmp1d, .tmp0d, ._, ._ },
.{ ._, ._, .lea, .tmp2p, .memsid(.dst0, .@"8", .tmp0, 8), ._, ._ },
.{ ._, ._, .mov, .tmp3q, .memsi(.src0q, .@"8", .tmp1), ._, ._ },
.{ ._, ._mp, .j, .@"1f", ._, ._, ._ },
.{ .@"0:", ._, .mov, .tmp4q, .memsi(.src0q, .@"8", .tmp1), ._, ._ },
.{ ._, ._ld, .sh, .tmp3q, .tmp4q, .src1b, ._ },
.{ ._, ._, .mov, .leasi(.tmp2q, .@"8", .tmp1), .tmp3q, ._, ._ },
.{ ._, ._, .mov, .tmp3q, .tmp4q, ._, ._ },
.{ .@"1:", ._c, .de, .tmp1d, ._, ._, ._ },
.{ ._, ._ns, .j, .@"0b", ._, ._, ._ },
.{ ._, ._l, .sh, .tmp3q, .src1b, ._, ._ },
.{ .@"0:", ._, .mov, .memsi(.dst0q, .@"8", .tmp0), .tmp3q, ._, ._ },
.{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ },
.{ ._, ._c, .de, .tmp0d, ._, ._, ._ },
.{ ._, ._ns, .j, .@"0b", ._, ._, ._ },
} },
}, .{
.required_features = .{ .@"64bit", .slow_incdec, null, null },
.src_constraints = .{
.{ .remainder_int = .{ .of = .qword, .is = .qword } },
.{ .unsigned_int = .word },
.any,
},
.patterns = &.{
.{ .src = .{ .to_mem, .{ .to_reg = .cx }, .none } },
},
.extra_temps = .{
.{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .usize, .kind = .{ .rc = .general_purpose } },
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
},
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .movzx, .tmp0d, .src1w, ._, ._ },
.{ ._, ._, .mov, .tmp1d, .sia(-1, .src0, .add_size_div_8), ._, ._ },
.{ ._, ._r, .sh, .tmp0d, .ui(6), ._, ._ },
.{ ._, ._, .sub, .tmp1d, .tmp0d, ._, ._ },
.{ ._, ._, .lea, .tmp2p, .memsid(.dst0, .@"8", .tmp0, 8), ._, ._ },
.{ ._, ._, .mov, .tmp3q, .memsi(.src0q, .@"8", .tmp1), ._, ._ },
.{ ._, ._mp, .j, .@"1f", ._, ._, ._ },
.{ .@"0:", ._, .mov, .tmp4q, .memsi(.src0q, .@"8", .tmp1), ._, ._ },
.{ ._, ._ld, .sh, .tmp3q, .tmp4q, .src1b, ._ },
.{ ._, ._, .mov, .leasi(.tmp2q, .@"8", .tmp1), .tmp3q, ._, ._ },
.{ ._, ._, .mov, .tmp3q, .tmp4q, ._, ._ },
.{ .@"1:", ._, .sub, .tmp1d, .si(1), ._, ._ },
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
.{ ._, ._l, .sh, .tmp3q, .src1b, ._, ._ },
.{ .@"0:", ._, .mov, .memsi(.dst0q, .@"8", .tmp0), .tmp3q, ._, ._ },
.{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ },
.{ ._, ._, .sub, .tmp0d, .si(1), ._, ._ },
.{ ._, ._ae, .j, .@"0b", ._, ._, ._ },
} },
}, .{
.required_features = .{ .@"64bit", null, null, null },
.src_constraints = .{
.{ .remainder_int = .{ .of = .qword, .is = .qword } },
.{ .unsigned_int = .word },
.any,
},
.patterns = &.{
.{ .src = .{ .to_mem, .{ .to_reg = .cx }, .none } },
},
.extra_temps = .{
.{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .u32, .kind = .{ .rc = .general_purpose } },
.{ .type = .usize, .kind = .{ .rc = .general_purpose } },
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
},
.dst_temps = .{ .mem, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
.{ ._, ._, .movzx, .tmp0d, .src1w, ._, ._ },
.{ ._, ._, .mov, .tmp1d, .sia(-1, .src0, .add_size_div_8), ._, ._ },
.{ ._, ._r, .sh, .tmp0d, .ui(6), ._, ._ },
.{ ._, ._, .sub, .tmp1d, .tmp0d, ._, ._ },
.{ ._, ._, .lea, .tmp2p, .memsid(.dst0, .@"8", .tmp0, 8), ._, ._ },
.{ ._, ._, .mov, .tmp3q, .memsi(.src0q, .@"8", .tmp1), ._, ._ },
.{ ._, ._mp, .j, .@"1f", ._, ._, ._ },
.{ .@"0:", ._, .mov, .tmp4q, .memsi(.src0q, .@"8", .tmp1), ._, ._ },
.{ ._, ._ld, .sh, .tmp3q, .tmp4q, .src1b, ._ },
.{ ._, ._, .mov, .leasi(.tmp2q, .@"8", .tmp1), .tmp3q, ._, ._ },
.{ ._, ._, .mov, .tmp3q, .tmp4q, ._, ._ },
.{ .@"1:", ._c, .de, .tmp1d, ._, ._, ._ },
.{ ._, ._ns, .j, .@"0b", ._, ._, ._ },
.{ ._, ._l, .sh, .tmp3q, .src1b, ._, ._ },
.{ .@"0:", ._, .mov, .memsi(.dst0q, .@"8", .tmp0), .tmp3q, ._, ._ },
.{ ._, ._, .xor, .tmp3d, .tmp3d, ._, ._ },
.{ ._, ._c, .de, .tmp0d, ._, ._, ._ },
.{ ._, ._ns, .j, .@"0b", ._, ._, ._ },
} },
} }) catch |err| switch (err) {
error.SelectFailed => return cg.fail("failed to select {s} {} {} {} {}", .{
@tagName(air_tag),
cg.typeOf(bin_op.lhs).fmt(pt),
cg.typeOf(bin_op.rhs).fmt(pt),
ops[0].tracking(cg),
ops[1].tracking(cg),
}),
else => |e| return e,
};
switch (air_tag) {
else => unreachable,
.shl => res[0].wrapInt(cg) catch |err| switch (err) {
error.SelectFailed => return cg.fail("failed to select wrap {} {} {} {}", .{
cg.typeOf(bin_op.lhs).fmt(pt),
cg.typeOf(bin_op.rhs).fmt(pt),
ops[0].tracking(cg),
ops[1].tracking(cg),
}),
else => |e| return e,
},
.shl_exact => {},
}
try res[0].finish(inst, &.{ bin_op.lhs, bin_op.rhs }, &ops, cg);
},
.not => |air_tag| if (use_old) try cg.airUnOp(inst, air_tag) else {
const ty_op = air_datas[@intFromEnum(inst)].ty_op;
var ops = try cg.tempsFromOperands(inst, .{ty_op.operand});
@@ -85342,7 +86280,7 @@ fn regClassForType(self: *CodeGen, ty: Type) Register.Class {
fn regSetForRegClass(rc: Register.Class) RegisterManager.RegisterBitSet {
return switch (rc) {
.general_purpose => abi.RegisterClass.gp,
.segment, .ip, .cr, .dr => unreachable,
.gphi, .segment, .ip, .cr, .dr => unreachable,
.x87 => abi.RegisterClass.x87,
.mmx => @panic("TODO"),
.sse => abi.RegisterClass.sse,
@@ -97763,7 +98701,7 @@ fn moveStrategy(cg: *CodeGen, ty: Type, class: Register.Class, aligned: bool) !M
const pt = cg.pt;
const zcu = pt.zcu;
switch (class) {
.general_purpose, .segment => return .{ .load_store = .{ ._, .mov } },
.general_purpose, .gphi, .segment => return .{ .load_store = .{ ._, .mov } },
.x87 => return .load_store_x87,
.mmx => {},
.sse => switch (ty.zigTypeTag(zcu)) {
@@ -98239,7 +99177,7 @@ fn genSetReg(
.reserved_frame,
=> unreachable,
.undef => if (opts.safety) switch (dst_reg.class()) {
.general_purpose => switch (abi_size) {
.general_purpose, .gphi => switch (abi_size) {
1 => try self.asmRegisterImmediate(.{ ._, .mov }, dst_reg.to8(), .u(0xaa)),
2 => try self.asmRegisterImmediate(.{ ._, .mov }, dst_reg.to16(), .u(0xaaaa)),
3...4 => try self.asmRegisterImmediate(
@@ -98296,8 +99234,8 @@ fn genSetReg(
}
},
.register => |src_reg| if (dst_reg.id() != src_reg.id()) switch (dst_reg.class()) {
.general_purpose => switch (src_reg.class()) {
.general_purpose => try self.asmRegisterRegister(
.general_purpose, .gphi => switch (src_reg.class()) {
.general_purpose, .gphi => try self.asmRegisterRegister(
.{ ._, .mov },
dst_alias,
registerAlias(src_reg, abi_size),
@@ -98341,13 +99279,13 @@ fn genSetReg(
.{ ._, .mov },
dst_reg,
switch (src_reg.class()) {
.general_purpose, .segment => registerAlias(src_reg, abi_size),
.general_purpose, .gphi, .segment => registerAlias(src_reg, abi_size),
.x87, .mmx, .ip, .cr, .dr => unreachable,
.sse => try self.copyToTmpRegister(ty, src_mcv),
},
),
.x87 => switch (src_reg.class()) {
.general_purpose, .segment => unreachable,
.general_purpose, .gphi, .segment => unreachable,
.x87 => switch (src_reg) {
.st0 => try self.asmRegister(.{ .f_, .st }, dst_reg),
.st1, .st2, .st3, .st4, .st5, .st6 => switch (dst_reg) {
@@ -98376,7 +99314,7 @@ fn genSetReg(
},
.mmx => unreachable,
.sse => switch (src_reg.class()) {
.general_purpose => if (self.hasFeature(.sse2)) try self.asmRegisterRegister(
.general_purpose, .gphi => if (self.hasFeature(.sse2)) try self.asmRegisterRegister(
switch (abi_size) {
1...4 => if (self.hasFeature(.avx)) .{ .v_d, .mov } else .{ ._d, .mov },
5...8 => if (self.hasFeature(.avx)) .{ .v_q, .mov } else .{ ._q, .mov },
@@ -98602,7 +99540,7 @@ fn genSetReg(
} },
}),
.load_symbol => |sym_off| switch (dst_reg.class()) {
.general_purpose => {
.general_purpose, .gphi => {
assert(sym_off.off == 0);
try self.asmRegisterMemory(.{ ._, .mov }, dst_alias, .{
.base = .{ .reloc = sym_off.sym_index },
@@ -98617,7 +99555,7 @@ fn genSetReg(
.x87, .sse => {},
},
.load_direct => |sym_index| switch (dst_reg.class()) {
.general_purpose => {
.general_purpose, .gphi => {
_ = try self.addInst(.{
.tag = .mov,
.ops = .direct_reloc,
@@ -98781,7 +99719,7 @@ fn genSetMem(
};
const src_alias = registerAlias(src_reg, abi_size);
const src_size: u32 = @intCast(switch (src_alias.class()) {
.general_purpose, .segment, .x87, .ip, .cr, .dr => @divExact(src_alias.bitSize(), 8),
.general_purpose, .gphi, .segment, .x87, .ip, .cr, .dr => @divExact(src_alias.bitSize(), 8),
.mmx, .sse => abi_size,
});
const src_align: InternPool.Alignment = .fromNonzeroByteUnits(
@@ -103089,7 +104027,7 @@ fn resolveCallingConventionValues(
const ret_gpr = abi.getCAbiIntReturnRegs(cc);
const ret_size: u31 = @intCast(ret_ty.abiSize(zcu));
if (abi.zigcc.return_in_regs) switch (self.regClassForType(ret_ty)) {
.general_purpose => if (ret_size <= @as(u4, switch (self.target.cpu.arch) {
.general_purpose, .gphi => if (ret_size <= @as(u4, switch (self.target.cpu.arch) {
else => unreachable,
.x86 => 4,
.x86_64 => 8,
@@ -103119,7 +104057,7 @@ fn resolveCallingConventionValues(
}
const param_size: u31 = @intCast(param_ty.abiSize(zcu));
if (abi.zigcc.params_in_regs) switch (self.regClassForType(param_ty)) {
.general_purpose => if (param_gpr.len >= 1 and param_size <= @as(u4, switch (self.target.cpu.arch) {
.general_purpose, .gphi => if (param_gpr.len >= 1 and param_size <= @as(u4, switch (self.target.cpu.arch) {
else => unreachable,
.x86 => 4,
.x86_64 => 8,
@@ -103192,10 +104130,9 @@ fn parseRegName(name: []const u8) ?Register {
/// Returns register wide enough to hold at least `size_bytes`.
fn registerAlias(reg: Register, size_bytes: u32) Register {
if (size_bytes == 0) unreachable; // should be comptime-known
return switch (reg.class()) {
.general_purpose => if (size_bytes == 0)
unreachable // should be comptime-known
else if (size_bytes <= 1)
.general_purpose => if (size_bytes <= 1)
reg.to8()
else if (size_bytes <= 2)
reg.to16()
@@ -103205,6 +104142,16 @@ fn registerAlias(reg: Register, size_bytes: u32) Register {
reg.to64()
else
unreachable,
.gphi => if (size_bytes <= 1)
reg
else if (size_bytes <= 2)
reg.to16()
else if (size_bytes <= 4)
reg.to32()
else if (size_bytes <= 8)
reg.to64()
else
unreachable,
.segment => if (size_bytes <= 2)
reg
else
@@ -104566,6 +105513,7 @@ const Temp = struct {
.required_features = .{ .@"64bit", .bmi2, null, null },
.src_constraints = .{ .{ .unsigned_int = .qword }, .any, .any },
.patterns = &.{
.{ .src = .{ .mem, .none, .none } },
.{ .src = .{ .to_gpr, .none, .none } },
},
.extra_temps = .{
@@ -104594,6 +105542,19 @@ const Temp = struct {
.{ .src = .{ .mut_mem, .none, .none } },
.{ .src = .{ .to_mut_gpr, .none, .none } },
},
.extra_temps = .{
.{ .type = .u64, .kind = .{ .rc = .general_purpose } },
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
.unused,
},
.dst_temps = .{ .{ .ref = .src0 }, .unused },
.clobbers = .{ .eflags = true },
.each = .{ .once = &.{
@@ -108625,7 +109586,7 @@ const Temp = struct {
) InnerError!void {
const tomb_bits = cg.liveness.getTombBits(inst);
for (0.., op_refs, op_temps) |op_index, op_ref, op_temp| {
if (op_temp.index != temp.index) try op_temp.die(cg);
if (op_temp.index != temp.index and op_temp.tracking(cg).short != .dead) try op_temp.die(cg);
if (tomb_bits & @as(Liveness.Bpi, 1) << @intCast(op_index) == 0) continue;
if (cg.reused_operands.isSet(op_index)) continue;
try cg.processDeath(op_ref.toIndexAllowNone() orelse continue);
@@ -110197,7 +111158,10 @@ const Select = struct {
}
fn valueOf(ref: Ref, s: *const Select) MCValue {
return s.temps[@intFromEnum(ref)].tracking(s.cg).short;
return switch (ref) {
.none => .none,
else => s.temps[@intFromEnum(ref)].tracking(s.cg).short,
};
}
};
@@ -110742,42 +111706,39 @@ const Select = struct {
.lea => .{ .mem = .{
.base = switch (op.flags.base.ref.valueOf(s)) {
else => unreachable,
.none => .none,
.register => |base_reg| .{ .reg = registerAlias(base_reg, @divExact(s.cg.target.ptrBitWidth(), 8)) },
.register_offset => |base_reg_off| .{ .reg = registerAlias(base_reg_off.reg, @divExact(s.cg.target.ptrBitWidth(), 8)) },
.lea_symbol => |base_sym_off| .{ .reloc = base_sym_off.sym_index },
},
.mod = .{ .rm = .{
.size = op.flags.base.size,
.index = switch (op.flags.index.ref) {
else => |index_ref| switch (index_ref.valueOf(s)) {
else => unreachable,
.register => |index_reg| registerAlias(index_reg, @divExact(s.cg.target.ptrBitWidth(), 8)),
.register_offset => |index_reg_off| registerAlias(index_reg_off.reg, @divExact(s.cg.target.ptrBitWidth(), 8)),
},
.index = switch (op.flags.index.ref.valueOf(s)) {
else => unreachable,
.none => .none,
.register => |index_reg| registerAlias(index_reg, @divExact(s.cg.target.ptrBitWidth(), 8)),
.register_offset => |index_reg_off| registerAlias(index_reg_off.reg, @divExact(s.cg.target.ptrBitWidth(), 8)),
},
.scale = op.flags.index.scale,
.disp = op.adjustedImm(i32, s) + switch (op.flags.base.ref.valueOf(s)) {
else => unreachable,
.register => 0,
.none, .register => 0,
.register_offset => |base_reg_off| base_reg_off.off,
.lea_symbol => |base_sym_off| base_sym_off.off,
} + switch (op.flags.index.ref.valueOf(s)) {
else => unreachable,
.none, .register => 0,
.register_offset => |base_reg_off| base_reg_off.off,
.lea_symbol => |base_sym_off| base_sym_off.off,
} + switch (op.flags.index.ref) {
else => |index_ref| switch (index_ref.valueOf(s)) {
else => unreachable,
.register => 0,
.register_offset => |base_reg_off| base_reg_off.off,
.lea_symbol => |base_sym_off| base_sym_off.off,
},
.none => 0,
},
} },
} },
.mem => .{ .mem = try op.flags.base.ref.valueOf(s).mem(s.cg, .{
.size = op.flags.base.size,
.index = switch (op.flags.index.ref) {
else => |index_ref| registerAlias(index_ref.valueOf(s).register, @divExact(s.cg.target.ptrBitWidth(), 8)),
.index = switch (op.flags.index.ref.valueOf(s)) {
else => unreachable,
.none => .none,
.register => |index_reg| registerAlias(index_reg, @divExact(s.cg.target.ptrBitWidth(), 8)),
},
.scale = op.flags.index.scale,
.disp = op.adjustedImm(i32, s),
+1
View File
@@ -592,6 +592,7 @@ pub const Op = enum {
else => unreachable,
},
},
.gphi => .r8,
.segment => .sreg,
.x87 => switch (reg) {
.st0 => .st0,
+3 -3
View File
@@ -384,6 +384,7 @@ pub const Register = enum(u8) {
pub const Class = enum {
general_purpose,
gphi,
segment,
x87,
mmx,
@@ -400,7 +401,7 @@ pub const Register = enum(u8) {
@intFromEnum(Register.eax) ... @intFromEnum(Register.r15d) => .general_purpose,
@intFromEnum(Register.ax) ... @intFromEnum(Register.r15w) => .general_purpose,
@intFromEnum(Register.al) ... @intFromEnum(Register.r15b) => .general_purpose,
@intFromEnum(Register.ah) ... @intFromEnum(Register.bh) => .general_purpose,
@intFromEnum(Register.ah) ... @intFromEnum(Register.bh) => .gphi,
@intFromEnum(Register.ymm0) ... @intFromEnum(Register.ymm15) => .sse,
@intFromEnum(Register.xmm0) ... @intFromEnum(Register.xmm15) => .sse,
@@ -525,7 +526,6 @@ pub const Register = enum(u8) {
}
fn gpBase(reg: Register) u7 {
assert(reg.class() == .general_purpose);
return switch (@intFromEnum(reg)) {
// zig fmt: off
@intFromEnum(Register.rax) ... @intFromEnum(Register.r15) => @intFromEnum(Register.rax),
@@ -577,7 +577,7 @@ pub const Register = enum(u8) {
/// DWARF register encoding
pub fn dwarfNum(reg: Register) u6 {
return switch (reg.class()) {
.general_purpose => if (reg.isExtended())
.general_purpose, .gphi => if (reg.isExtended())
reg.enc()
else
@as(u3, @truncate(@as(u24, 0o54673120) >> @as(u5, reg.enc()) * 3)),
+66 -22
View File
@@ -1,4 +1,5 @@
const AddOneBit = math.AddOneBit;
const cast = math.cast;
const checkExpected = math.checkExpected;
const Compare = math.Compare;
const DoubleBits = math.DoubleBits;
@@ -6,6 +7,7 @@ const fmax = math.fmax;
const fmin = math.fmin;
const Gpr = math.Gpr;
const inf = math.inf;
const Log2Int = math.Log2Int;
const math = @import("math.zig");
const nan = math.nan;
const Scalar = math.Scalar;
@@ -5582,6 +5584,28 @@ test mod {
try test_mod.testFloatVectors();
}
inline fn max(comptime Type: type, lhs: Type, rhs: Type) Type {
return @max(lhs, rhs);
}
test max {
const test_max = binary(max, .{});
try test_max.testInts();
try test_max.testIntVectors();
try test_max.testFloats();
try test_max.testFloatVectors();
}
inline fn min(comptime Type: type, lhs: Type, rhs: Type) Type {
return @min(lhs, rhs);
}
test min {
const test_min = binary(min, .{});
try test_min.testInts();
try test_min.testIntVectors();
try test_min.testFloats();
try test_min.testFloatVectors();
}
inline fn equal(comptime Type: type, lhs: Type, rhs: Type) @TypeOf(lhs == rhs) {
return lhs == rhs;
}
@@ -5654,6 +5678,48 @@ test bitOr {
try test_bit_or.testIntVectors();
}
inline fn shr(comptime Type: type, lhs: Type, rhs: Type) Type {
const bit_cast_rhs: @Type(.{ .int = .{ .signedness = .unsigned, .bits = @bitSizeOf(Type) } }) = @bitCast(rhs);
const truncate_rhs: Log2Int(Type) = @truncate(bit_cast_rhs);
return lhs >> if (comptime cast(Log2Int(Type), @bitSizeOf(Type))) |bits| truncate_rhs % bits else truncate_rhs;
}
test shr {
const test_shr = binary(shr, .{});
try test_shr.testInts();
}
inline fn shrExact(comptime Type: type, lhs: Type, rhs: Type) Type {
const bit_cast_rhs: @Type(.{ .int = .{ .signedness = .unsigned, .bits = @bitSizeOf(Type) } }) = @bitCast(rhs);
const truncate_rhs: Log2Int(Type) = @truncate(bit_cast_rhs);
const final_rhs = if (comptime cast(Log2Int(Type), @bitSizeOf(Type))) |bits| truncate_rhs % bits else truncate_rhs;
return @shrExact(lhs >> final_rhs << final_rhs, final_rhs);
}
test shrExact {
const test_shr_exact = binary(shrExact, .{});
try test_shr_exact.testInts();
}
inline fn shl(comptime Type: type, lhs: Type, rhs: Type) Type {
const bit_cast_rhs: @Type(.{ .int = .{ .signedness = .unsigned, .bits = @bitSizeOf(Type) } }) = @bitCast(rhs);
const truncate_rhs: Log2Int(Type) = @truncate(bit_cast_rhs);
return lhs << if (comptime cast(Log2Int(Type), @bitSizeOf(Type))) |bits| truncate_rhs % bits else truncate_rhs;
}
test shl {
const test_shl = binary(shl, .{});
try test_shl.testInts();
}
inline fn shlExact(comptime Type: type, lhs: Type, rhs: Type) Type {
const bit_cast_rhs: @Type(.{ .int = .{ .signedness = .unsigned, .bits = @bitSizeOf(Type) } }) = @bitCast(rhs);
const truncate_rhs: Log2Int(Type) = @truncate(bit_cast_rhs);
const final_rhs = if (comptime cast(Log2Int(Type), @bitSizeOf(Type))) |bits| truncate_rhs % bits else truncate_rhs;
return @shlExact(lhs << final_rhs >> final_rhs, final_rhs);
}
test shlExact {
const test_shl_exact = binary(shlExact, .{});
try test_shl_exact.testInts();
}
inline fn bitXor(comptime Type: type, lhs: Type, rhs: Type) @TypeOf(lhs ^ rhs) {
return lhs ^ rhs;
}
@@ -5663,28 +5729,6 @@ test bitXor {
try test_bit_xor.testIntVectors();
}
inline fn min(comptime Type: type, lhs: Type, rhs: Type) Type {
return @min(lhs, rhs);
}
test min {
const test_min = binary(min, .{});
try test_min.testInts();
try test_min.testIntVectors();
try test_min.testFloats();
try test_min.testFloatVectors();
}
inline fn max(comptime Type: type, lhs: Type, rhs: Type) Type {
return @max(lhs, rhs);
}
test max {
const test_max = binary(max, .{});
try test_max.testInts();
try test_max.testIntVectors();
try test_max.testFloats();
try test_max.testFloatVectors();
}
inline fn optionalsEqual(comptime Type: type, lhs: Type, rhs: Type) bool {
if (@inComptime()) return lhs == rhs; // workaround https://github.com/ziglang/zig/issues/22636
return @as(?Type, lhs) == rhs;
+1
View File
@@ -2,6 +2,7 @@ const builtin = @import("builtin");
const math = std.math;
const std = @import("std");
pub const cast = math.cast;
pub const fmax = math.floatMax;
pub const fmin = math.floatMin;
pub const imax = math.maxInt;