diff --git a/lib/compiler_rt/limb64.zig b/lib/compiler_rt/limb64.zig index 4c0126d4b3..a5456b548c 100644 --- a/lib/compiler_rt/limb64.zig +++ b/lib/compiler_rt/limb64.zig @@ -839,3 +839,141 @@ test __byteswap_limb64 { try test__byteswap_limb64(i128, 1 << 56, 1 << 64); try test__byteswap_limb64(i248, minInt(i248), 128); } + +comptime { + symbol(&__mulo_limb64, "__mulo_limb64"); +} + +inline fn add3(x: *[3]u64, start: usize, v0: u64) void { + var i = start; + var v = v0; + while (i < 3) : (i += 1) { + const s = @addWithOverflow(x[i], v); + x[i] = s[0]; + if (s[1] == 0) break; + v = 1; + } +} + +fn mulwide(a: u64, b: u64) [2]u64 { + const muldXi = @import("mulXi3.zig").muldXi; + return @bitCast(muldXi(u64, a, b)); +} + +fn __mulo_limb64(out_ptr: [*]u64, a_ptr: [*]const u64, b_ptr: [*]const u64, is_signed: bool, bits: u16) callconv(.c) bool { + const limb_cnt = limbCount(bits); + + const out = out_ptr[0..limb_cnt]; + const a = a_ptr[0..limb_cnt]; + const b = b_ptr[0..limb_cnt]; + + @memset(out, 0); + + const all_ones = ~@as(u64, 0); + const a_neg = is_signed and ((limbGet(a, limb_cnt - 1) >> 63) != 0); + const b_neg = is_signed and ((limbGet(b, limb_cnt - 1) >> 63) != 0); + + var carry: [3]u64 = @splat(0); + var hi_zero = true; + var hi_ones = true; + var hi_borrow: u1 = 0; + var raw_last: u64 = 0; + + var k: usize = 0; + while (k < 2 * limb_cnt) : (k += 1) { + var acc = carry; + + var i: usize = if (k < limb_cnt) 0 else k - (limb_cnt - 1); + while (i < limb_cnt and i <= k) : (i += 1) { + const j = k - i; + if (j >= limb_cnt) continue; + + const p = mulwide(limbGet(a, i), limbGet(b, j)); + add3(&acc, 0, p[0]); + add3(&acc, 1, p[1]); + } + + var limb = acc[0]; + if (k < limb_cnt) { + limbSet(out, k, limb); + if (k == limb_cnt - 1) raw_last = limb; + } else { + if (is_signed) { + const h = k - limb_cnt; + + const s0 = @subWithOverflow(limb, if (a_neg) limbGet(b, h) else 0); + const s1 = @subWithOverflow(s0[0], if (b_neg) limbGet(a, h) else 0); + const s2 = @subWithOverflow(s1[0], hi_borrow); + + limb = s2[0]; + hi_borrow = @intFromBool(s0[1] != 0 or s1[1] != 0 or s2[1] != 0); + } + + hi_zero = hi_zero and limb == 0; + hi_ones = hi_ones and limb == all_ones; + } + + carry = .{ acc[1], acc[2], 0 }; + } + + const last = if (bits % 64 == 0) raw_last else limbWrap(raw_last, is_signed, bits); + if (bits % 64 != 0) { + limbSet(out, limb_cnt - 1, last); + } + + if (!is_signed) { + return !hi_zero or raw_last != last; + } + + const sign_extend: u64 = if ((last >> 63) == 1) all_ones else 0; + return (raw_last != last) or if (sign_extend == 0) !hi_zero else !hi_ones; +} + +fn test__mulo_limb64(comptime T: type, a: T, b: T, expected: struct { T, bool }) !void { + const int_info = @typeInfo(T).int; + const is_signed = int_info.signedness == .signed; + + var a_limbs = asLimbs(a); + var b_limbs = asLimbs(b); + var out: Limbs(T) = undefined; + const overflow = __mulo_limb64(&out, &a_limbs, &b_limbs, is_signed, int_info.bits); + + const expected_limbs = asLimbs(expected[0]); + try testing.expectEqual(expected_limbs, out); + try testing.expectEqual(expected[1], overflow); +} + +test __mulo_limb64 { + try test__mulo_limb64(u64, 3, 5, .{ 15, false }); + try test__mulo_limb64(u64, maxInt(u64), 2, .{ maxInt(u64) - 1, true }); + try test__mulo_limb64(u65, 1 << 32, 1 << 32, .{ 1 << 64, false }); + try test__mulo_limb64(u65, 1 << 64, 2, .{ 0, true }); + try test__mulo_limb64(u128, 1 << 80, 1 << 40, .{ 1 << 120, false }); + try test__mulo_limb64(u128, 1 << 100, 1 << 40, .{ 0, true }); + try test__mulo_limb64(u255, 7, 9, .{ 63, false }); + try test__mulo_limb64(u255, maxInt(u255), 2, .{ maxInt(u255) - 1, true }); + + try test__mulo_limb64(i64, -3, 2, .{ -6, false }); + try test__mulo_limb64(i64, maxInt(i64), 2, .{ -2, true }); + try test__mulo_limb64(i65, 1 << 63, 2, .{ minInt(i65), true }); + try test__mulo_limb64(i65, -1 << 32, 1 << 16, .{ -1 << 48, false }); + try test__mulo_limb64(i128, 1 << 100, 1 << 27, .{ minInt(i128), true }); + try test__mulo_limb64(i128, -1 << 80, 1 << 40, .{ -1 << 120, false }); + try test__mulo_limb64(i255, -3, 2, .{ -6, false }); + try test__mulo_limb64(i255, maxInt(i255), 2, .{ -2, true }); + + try test__mulo_limb64(u200, 0, maxInt(u200), .{ 0, false }); + try test__mulo_limb64(u200, 1, maxInt(u200), .{ maxInt(u200), false }); + try test__mulo_limb64(u200, 1 << 100, 1 << 99, .{ 1 << 199, false }); + try test__mulo_limb64(u200, 1 << 100, 1 << 100, .{ 0, true }); + try test__mulo_limb64(u200, maxInt(u200), maxInt(u200), .{ 1, true }); + + try test__mulo_limb64(i200, 0, -1, .{ 0, false }); + try test__mulo_limb64(i200, -1, -1, .{ 1, false }); + try test__mulo_limb64(i200, -1, minInt(i200), .{ minInt(i200), true }); + try test__mulo_limb64(i200, maxInt(i200), 2, .{ -2, true }); + try test__mulo_limb64(i200, 1 << 100, 1 << 98, .{ 1 << 198, false }); + try test__mulo_limb64(i200, 1 << 100, 1 << 99, .{ minInt(i200), true }); + try test__mulo_limb64(i200, maxInt(i200), maxInt(i200), .{ 1, true }); + try test__mulo_limb64(i200, minInt(i200), minInt(i200), .{ 0, true }); +} diff --git a/lib/compiler_rt/mulXi3.zig b/lib/compiler_rt/mulXi3.zig index 41df7283ea..9fce9754f3 100644 --- a/lib/compiler_rt/mulXi3.zig +++ b/lib/compiler_rt/mulXi3.zig @@ -63,7 +63,7 @@ fn DoubleInt(comptime T: type) type { }; } -fn muldXi(comptime T: type, a: T, b: T) DoubleInt(T) { +pub fn muldXi(comptime T: type, a: T, b: T) DoubleInt(T) { const DT = DoubleInt(T); const word_t = compiler_rt.HalveInt(DT, false); const bits_in_word_2 = @sizeOf(T) * 8 / 2; diff --git a/src/codegen/wasm/CodeGen.zig b/src/codegen/wasm/CodeGen.zig index 19fb654ae8..a2750fa02c 100644 --- a/src/codegen/wasm/CodeGen.zig +++ b/src/codegen/wasm/CodeGen.zig @@ -2480,7 +2480,19 @@ fn intMul(cg: *CodeGen, ty: IntType, lhs: WValue, rhs: WValue) InnerError!WValue return .stack; }, 65...128 => return cg.callIntrinsic(.__multi3, &.{ .i128_type, .i128_type }, Type.i128, &.{ lhs, rhs }), - else => return cg.fail("TODO: Support intMul for integer bitsize: {d}", .{ty.bits}), + else => { + const result = try cg.allocInt(ty); + + try cg.lowerToStack(result); + try cg.lowerToStack(lhs); + try cg.lowerToStack(rhs); + try cg.addImm32(@intFromBool(ty.is_signed)); + try cg.addImm32(ty.bits); + try cg.addCallIntrinsic(.__mulo_limb64); + try cg.addTag(.drop); + + return result; + }, } } @@ -3680,68 +3692,6 @@ fn intMulOverflow(cg: *CodeGen, int_ty: IntType, lhs: WValue, rhs: WValue) Inner _ = try cg.intCmp(new_ty, .neq, res_upcast, bin_op); try cg.addLocal(.local_set, overflow_bit.local.value); break :blk res_tmp; - } else if (int_ty.bits == 128 and !int_ty.is_signed) blk: { - var lhs_lsb = try (try cg.load(lhs, Type.u64, 0)).toLocal(cg, Type.u64); - defer lhs_lsb.free(cg); - var lhs_msb = try (try cg.load(lhs, Type.u64, 8)).toLocal(cg, Type.u64); - defer lhs_msb.free(cg); - var rhs_lsb = try (try cg.load(rhs, Type.u64, 0)).toLocal(cg, Type.u64); - defer rhs_lsb.free(cg); - var rhs_msb = try (try cg.load(rhs, Type.u64, 8)).toLocal(cg, Type.u64); - defer rhs_msb.free(cg); - - const zero: WValue = .{ .imm64 = 0 }; - - const cross_1 = try cg.callIntrinsic( - .__multi3, - &[_]InternPool.Index{.i64_type} ** 4, - Type.i128, - &.{ lhs_msb, zero, rhs_lsb, zero }, - ); - const cross_2 = try cg.callIntrinsic( - .__multi3, - &[_]InternPool.Index{.i64_type} ** 4, - Type.i128, - &.{ rhs_msb, zero, lhs_lsb, zero }, - ); - const mul_lsb = try cg.callIntrinsic( - .__multi3, - &[_]InternPool.Index{.i64_type} ** 4, - Type.i128, - &.{ rhs_lsb, zero, lhs_lsb, zero }, - ); - - const rhs_msb_not_zero = try cg.intCmp(.u64, .neq, rhs_msb, zero); - const lhs_msb_not_zero = try cg.intCmp(.u64, .neq, lhs_msb, zero); - const both_msb_not_zero = try cg.intAnd(.u32, rhs_msb_not_zero, lhs_msb_not_zero); - - const cross_1_msb = try cg.load(cross_1, .u64, 8); - const cross_1_msb_not_zero = try cg.intCmp(.u64, .neq, cross_1_msb, zero); - const cond_1 = try cg.intOr(.u32, both_msb_not_zero, cross_1_msb_not_zero); - - const cross_2_msb = try cg.load(cross_2, Type.u64, 8); - const cross_2_msb_not_zero = try cg.intCmp(.u64, .neq, cross_2_msb, zero); - const cond_2 = try cg.intOr(.u32, cond_1, cross_2_msb_not_zero); - - const cross_1_lsb = try cg.load(cross_1, Type.u64, 0); - const cross_2_lsb = try cg.load(cross_2, Type.u64, 0); - const cross_add = try cg.intAdd(.u64, cross_1_lsb, cross_2_lsb); - - var mul_lsb_msb = try (try cg.load(mul_lsb, Type.u64, 8)).toLocal(cg, Type.u64); - defer mul_lsb_msb.free(cg); - var all_add = try (try cg.intAdd(.u64, cross_add, mul_lsb_msb)).toLocal(cg, Type.u64); - defer all_add.free(cg); - const add_overflow = try cg.intCmp(.u64, .lt, all_add, mul_lsb_msb); - - _ = try cg.intOr(.u32, cond_2, add_overflow); - try cg.addLocal(.local_set, overflow_bit.local.value); - - const tmp_result = try cg.allocStack(Type.u128); - try cg.emitWValue(tmp_result); - const mul_lsb_lsb = try cg.load(mul_lsb, Type.u64, 0); - try cg.store(.stack, mul_lsb_lsb, Type.u64, tmp_result.offset()); - try cg.store(tmp_result, all_add, Type.u64, 8); - break :blk tmp_result; } else if (int_ty.bits == 128 and int_ty.is_signed) blk: { const overflow_ret = try cg.allocStack(Type.i32); const res = try cg.callIntrinsic( @@ -3753,7 +3703,18 @@ fn intMulOverflow(cg: *CodeGen, int_ty: IntType, lhs: WValue, rhs: WValue) Inner _ = try cg.load(overflow_ret, Type.i32, 0); try cg.addLocal(.local_set, overflow_bit.local.value); break :blk res; - } else return cg.fail("TODO: intMulOverflow for bitsize {d}", .{int_ty.bits}); + } else { + const result = try cg.allocInt(int_ty); + + try cg.lowerToStack(result); + try cg.lowerToStack(lhs); + try cg.lowerToStack(rhs); + try cg.addImm32(@intFromBool(int_ty.is_signed)); + try cg.addImm32(int_ty.bits); + try cg.addCallIntrinsic(.__mulo_limb64); + + return .{ .result = result, .ov = .stack }; + }; return .{ .result = result_val, .ov = .{ .local = overflow_bit.local } }; } diff --git a/src/codegen/wasm/Mir.zig b/src/codegen/wasm/Mir.zig index d7297af253..74b6c33020 100644 --- a/src/codegen/wasm/Mir.zig +++ b/src/codegen/wasm/Mir.zig @@ -1018,4 +1018,5 @@ pub const Intrinsic = enum(u32) { __popcount_limb64, __bitreverse_limb64, __byteswap_limb64, + __mulo_limb64, }; diff --git a/test/behavior/math.zig b/test/behavior/math.zig index 085a72f390..20a1c4ed3e 100644 --- a/test/behavior/math.zig +++ b/test/behavior/math.zig @@ -1100,10 +1100,37 @@ test "@mulWithOverflow bitsize 128 bits" { try testMulWithOverflow(i128, -1 << 63, -1 << 64, -1 << 127, 1); } +test "@mulWithOverflow > 128 bits" { + if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; + + try testMulWithOverflow(u140, 0, maxInt(u140), 0, 0); + try testMulWithOverflow(u140, 1, maxInt(u140), maxInt(u140), 0); + try testMulWithOverflow(u140, 1 << 70, 1 << 69, 1 << 139, 0); + try testMulWithOverflow(u140, 1 << 70, 1 << 70, 0, 1); + + try testMulWithOverflow(u200, 1 << 100, 1 << 99, 1 << 199, 0); + try testMulWithOverflow(u200, 1 << 100, 1 << 100, 0, 1); + try testMulWithOverflow(u200, maxInt(u200), maxInt(u200), 1, 1); + try testMulWithOverflow(u200, maxInt(u200) - 1, 2, maxInt(u200) - 3, 1); + + try testMulWithOverflow(i140, 0, -1, 0, 0); + try testMulWithOverflow(i140, -1, -1, 1, 0); + try testMulWithOverflow(i140, 1 << 69, 1 << 69, 1 << 138, 0); + try testMulWithOverflow(i140, 1 << 69, 1 << 70, minInt(i140), 1); + try testMulWithOverflow(i140, -1 << 70, 1 << 20, -1 << 90, 0); + try testMulWithOverflow(i140, minInt(i140), -1, minInt(i140), 1); + + try testMulWithOverflow(i200, 1 << 100, 1 << 98, 1 << 198, 0); + try testMulWithOverflow(i200, 1 << 100, 1 << 99, minInt(i200), 1); + try testMulWithOverflow(i200, -1 << 120, 1 << 30, -1 << 150, 0); + try testMulWithOverflow(i200, minInt(i200), minInt(i200), 0, 1); + try testMulWithOverflow(i200, maxInt(i200), 2, -2, 1); + try testMulWithOverflow(i200, maxInt(i200), maxInt(i200), 1, 1); +} + test "@mulWithOverflow bitsize 256 bits" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest;