diff --git a/lib/compiler_rt/divmodei4.zig b/lib/compiler_rt/divmodei4.zig index f30eded317..55c7ec5792 100644 --- a/lib/compiler_rt/divmodei4.zig +++ b/lib/compiler_rt/divmodei4.zig @@ -10,45 +10,39 @@ const symbol = @import("../compiler_rt.zig").symbol; comptime { symbol(&__divei4, "__divei4"); symbol(&__modei4, "__modei4"); + symbol(&__divei5, "__divei5"); + symbol(&__modei5, "__modei5"); } -inline fn limb(x: []u32, i: usize) *u32 { - return if (endian == .little) &x[i] else &x[x.len - 1 - i]; +inline fn limb(i: usize, len: usize) usize { + return if (endian == .little) i else len - 1 - i; } -inline fn neg(x: []u32) void { +inline fn neg(out: []u32, in: []const u32) void { var ov: u1 = 1; - for (0..x.len) |limb_index| { - const l = limb(x, limb_index); - l.*, ov = @addWithOverflow(~l.*, ov); + for (0..in.len) |limb_index| { + const new, ov = @addWithOverflow(~in[limb(limb_index, in.len)], ov); + out[limb(limb_index, out.len)] = new; } } -const max_limbs = std.math.divCeil(usize, 65535, 32) catch unreachable; - -fn divmod(q: ?[]u32, r: ?[]u32, u: []const u32, v: []const u32) !void { - const u_sign: i32 = @bitCast(u[u.len - 1]); - const v_sign: i32 = @bitCast(v[v.len - 1]); - var ua: [max_limbs]u32 = undefined; - const us = ua[0..u.len]; - @memcpy(us, u); - var va: [max_limbs]u32 = undefined; - const vs = va[0..v.len]; - @memcpy(vs, v); - if (u_sign < 0) neg(us); - if (v_sign < 0) neg(vs); - try @call(.always_inline, udivmod, .{ q, r, us, vs }); - if (q) |x| if (u_sign ^ v_sign < 0) neg(x); - if (r) |x| if (u_sign < 0) neg(x); +fn divmod(q: ?[]u32, r: ?[]u32, u: []const u32, v: []const u32, tu: []u32, tv: []u32) !void { + const u_sign: i32 = @bitCast(u[limb(u.len - 1, u.len)]); + const v_sign: i32 = @bitCast(v[limb(v.len - 1, v.len)]); + if (u_sign < 0) neg(tu, u); + if (v_sign < 0) neg(tv, v); + try @call(.always_inline, udivmod, .{ q, r, if (u_sign < 0) tu else u, if (v_sign < 0) tv else v }); + if (q) |x| if (u_sign ^ v_sign < 0) neg(x, x); + if (r) |x| if (u_sign < 0) neg(x, x); } -pub fn __divei4(q_p: [*]u8, u_p: [*]const u8, v_p: [*]const u8, bits: usize) callconv(.c) void { +pub fn __divei4(q_p: [*]u8, u_p: [*]u8, v_p: [*]u8, bits: usize) callconv(.c) void { @setRuntimeSafety(compiler_rt.test_safety); const byte_size = std.zig.target.intByteSize(&builtin.target, @intCast(bits)); const q: []u32 = @ptrCast(@alignCast(q_p[0..byte_size])); - const u: []const u32 = @ptrCast(@alignCast(u_p[0..byte_size])); - const v: []const u32 = @ptrCast(@alignCast(v_p[0..byte_size])); - @call(.always_inline, divmod, .{ q, null, u, v }) catch unreachable; + const u: []u32 = @ptrCast(@alignCast(u_p[0..byte_size])); + const v: []u32 = @ptrCast(@alignCast(v_p[0..byte_size])); + @call(.always_inline, divmod, .{ q, null, u, v, u, v }) catch unreachable; } pub fn __modei4(r_p: [*]u8, u_p: [*]u8, v_p: [*]u8, bits: usize) callconv(.c) void { @@ -57,5 +51,27 @@ pub fn __modei4(r_p: [*]u8, u_p: [*]u8, v_p: [*]u8, bits: usize) callconv(.c) vo const r: []u32 = @ptrCast(@alignCast(r_p[0..byte_size])); const u: []u32 = @ptrCast(@alignCast(u_p[0..byte_size])); const v: []u32 = @ptrCast(@alignCast(v_p[0..byte_size])); - @call(.always_inline, divmod, .{ null, r, u, v }) catch unreachable; + @call(.always_inline, divmod, .{ null, r, u, v, u, v }) catch unreachable; +} + +pub fn __divei5(q_p: [*]u8, u_p: [*]const u8, v_p: [*]const u8, t_p: [*]u8, bits: usize) callconv(.c) void { + @setRuntimeSafety(compiler_rt.test_safety); + const byte_size = std.zig.target.intByteSize(&builtin.target, @intCast(bits)); + const q: []u32 = @ptrCast(@alignCast(q_p[0..byte_size])); + const u: []const u32 = @ptrCast(@alignCast(u_p[0..byte_size])); + const v: []const u32 = @ptrCast(@alignCast(v_p[0..byte_size])); + const tu: []u32 = @ptrCast(@alignCast(t_p[0..byte_size])); + const tv: []u32 = @ptrCast(@alignCast(t_p[byte_size..][0..byte_size])); + @call(.always_inline, divmod, .{ q, null, u, v, tu, tv }) catch unreachable; +} + +pub fn __modei5(r_p: [*]u8, u_p: [*]const u8, v_p: [*]const u8, t_p: [*]u8, bits: usize) callconv(.c) void { + @setRuntimeSafety(compiler_rt.test_safety); + const byte_size = std.zig.target.intByteSize(&builtin.target, @intCast(bits)); + const r: []u32 = @ptrCast(@alignCast(r_p[0..byte_size])); + const u: []const u32 = @ptrCast(@alignCast(u_p[0..byte_size])); + const v: []const u32 = @ptrCast(@alignCast(v_p[0..byte_size])); + const tu: []u32 = @ptrCast(@alignCast(t_p[0..byte_size])); + const tv: []u32 = @ptrCast(@alignCast(t_p[byte_size..][0..byte_size])); + @call(.always_inline, divmod, .{ null, r, u, v, tu, tv }) catch unreachable; } diff --git a/lib/compiler_rt/udivmodei4.zig b/lib/compiler_rt/udivmodei4.zig index f037639c72..5aa8421bdb 100644 --- a/lib/compiler_rt/udivmodei4.zig +++ b/lib/compiler_rt/udivmodei4.zig @@ -13,6 +13,8 @@ const max_limbs = std.math.divCeil(usize, 65535, 32) catch unreachable; // max s comptime { symbol(&__udivei4, "__udivei4"); symbol(&__umodei4, "__umodei4"); + symbol(&__udivei5, "__udivei5"); + symbol(&__umodei5, "__umodei5"); } /// Get the value of a limb. @@ -132,6 +134,32 @@ pub fn __umodei4(r_p: [*]u8, u_p: [*]const u8, v_p: [*]const u8, bits: usize) ca @call(.always_inline, divmod, .{ null, r, u, v }) catch unreachable; } +pub fn __udivei5(q_p: [*]u8, u_p: [*]const u8, v_p: [*]const u8, t_p: [*]u8, bits: usize) callconv(.c) void { + @setRuntimeSafety(compiler_rt.test_safety); + const byte_size = std.zig.target.intByteSize(&builtin.target, @intCast(bits)); + const q: []u32 = @ptrCast(@alignCast(q_p[0..byte_size])); + const u: []const u32 = @ptrCast(@alignCast(u_p[0..byte_size])); + const v: []const u32 = @ptrCast(@alignCast(v_p[0..byte_size])); + const tu: []u32 = @ptrCast(@alignCast(t_p[0..byte_size])); + _ = tu; + const tv: []u32 = @ptrCast(@alignCast(t_p[byte_size..][0..byte_size])); + _ = tv; + @call(.always_inline, divmod, .{ q, null, u, v }) catch unreachable; +} + +pub fn __umodei5(r_p: [*]u8, u_p: [*]const u8, v_p: [*]const u8, t_p: [*]u8, bits: usize) callconv(.c) void { + @setRuntimeSafety(compiler_rt.test_safety); + const byte_size = std.zig.target.intByteSize(&builtin.target, @intCast(bits)); + const r: []u32 = @ptrCast(@alignCast(r_p[0..byte_size])); + const u: []const u32 = @ptrCast(@alignCast(u_p[0..byte_size])); + const v: []const u32 = @ptrCast(@alignCast(v_p[0..byte_size])); + const tu: []u32 = @ptrCast(@alignCast(t_p[0..byte_size])); + _ = tu; + const tv: []u32 = @ptrCast(@alignCast(t_p[byte_size..][0..byte_size])); + _ = tv; + @call(.always_inline, divmod, .{ null, r, u, v }) catch unreachable; +} + test "__udivei4/__umodei4" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; diff --git a/src/codegen/wasm/CodeGen.zig b/src/codegen/wasm/CodeGen.zig index 82b4c21e0b..12e8826a92 100644 --- a/src/codegen/wasm/CodeGen.zig +++ b/src/codegen/wasm/CodeGen.zig @@ -2530,11 +2530,24 @@ fn intDiv(cg: *CodeGen, ty: IntType, lhs: WValue, rhs: WValue) InnerError!WValue }, else => { const result = try cg.allocInt(ty); + const bits = cg.intBackingBits(ty.bits); + var tmp = try cg.allocInt(.{ .is_signed = false, .bits = bits * 2 }); if (ty.is_signed) { - _ = try cg.callIntrinsic(.__divei4, &.{ .usize_type, .usize_type, .usize_type, .usize_type }, .void, &.{ result, lhs, rhs, .{ .imm32 = ty.bits } }); + _ = try cg.callIntrinsic( + .__divei5, + &.{ .usize_type, .usize_type, .usize_type, .usize_type, .usize_type }, + .void, + &.{ result, lhs, rhs, tmp, .{ .imm32 = ty.bits } }, + ); } else { - _ = try cg.callIntrinsic(.__udivei4, &.{ .usize_type, .usize_type, .usize_type, .usize_type }, .void, &.{ result, lhs, rhs, .{ .imm32 = ty.bits } }); + _ = try cg.callIntrinsic( + .__udivei5, + &.{ .usize_type, .usize_type, .usize_type, .usize_type, .usize_type }, + .void, + &.{ result, lhs, rhs, tmp, .{ .imm32 = ty.bits } }, + ); } + tmp.free(cg); return result; }, } @@ -2631,11 +2644,24 @@ fn intRem(cg: *CodeGen, ty: IntType, lhs: WValue, rhs: WValue) InnerError!WValue }, else => { const result = try cg.allocInt(ty); + const bits = cg.intBackingBits(ty.bits); + var tmp = try cg.allocInt(.{ .is_signed = false, .bits = bits * 2 }); if (ty.is_signed) { - _ = try cg.callIntrinsic(.__modei4, &.{ .usize_type, .usize_type, .usize_type, .usize_type }, .void, &.{ result, lhs, rhs, .{ .imm32 = ty.bits } }); + _ = try cg.callIntrinsic( + .__modei5, + &.{ .usize_type, .usize_type, .usize_type, .usize_type, .usize_type }, + .void, + &.{ result, lhs, rhs, tmp, .{ .imm32 = ty.bits } }, + ); } else { - _ = try cg.callIntrinsic(.__umodei4, &.{ .usize_type, .usize_type, .usize_type, .usize_type }, .void, &.{ result, lhs, rhs, .{ .imm32 = ty.bits } }); + _ = try cg.callIntrinsic( + .__umodei5, + &.{ .usize_type, .usize_type, .usize_type, .usize_type, .usize_type }, + .void, + &.{ result, lhs, rhs, tmp, .{ .imm32 = ty.bits } }, + ); } + tmp.free(cg); return result; }, } diff --git a/src/codegen/wasm/Mir.zig b/src/codegen/wasm/Mir.zig index d2d48e9bee..11bf63bdfd 100644 --- a/src/codegen/wasm/Mir.zig +++ b/src/codegen/wasm/Mir.zig @@ -825,7 +825,7 @@ pub const Intrinsic = enum(u32) { __ceilx, __cosh, __cosx, - __divei4, + __divei5, __divhf3, __divtf3, __divti3, @@ -951,7 +951,7 @@ pub const Intrinsic = enum(u32) { __lshrti3, __lttf2, __ltxf2, - __modei4, + __modei5, __modti3, __mulhf3, __mulodi4, @@ -982,9 +982,9 @@ pub const Intrinsic = enum(u32) { __truncxfdf2, __truncxfhf2, __truncxfsf2, - __udivei4, + __udivei5, __udivti3, - __umodei4, + __umodei5, __umodti3, ceilq, cos,