libzigc/math: Implement more precise tanl in compiler_rt

The logic was more or less ported from `musl`, with small adjustments
where it was convenient. The 'internal' `__tanl` function was implemented
in the `trig.zig` module along with other 'internal' trigonometric functions.

Now, the `tanl` implementation is precise enough to pass all the relevant `libc-test` suite tests:

```
$ ./build/stage3/bin/zig build -p stage4 -Denable-llvm -Dno-lib

$ stage4/bin/zig build test-libc -Dlibc-test-path=<LIBC-TEST-PATH> -Dtest-filter=tanl -fqemu -fwasmtime --summary line
Build Summary: 553/553 steps succeeded
```

The unit tests were also extended to include cases for `f80` and `f128`, and they're passing.
This commit is contained in:
mihael
2026-03-24 01:11:56 +01:00
parent c76644caf3
commit ffd6f6cc6e
2 changed files with 224 additions and 34 deletions
+97 -34
View File
@@ -3,6 +3,7 @@
//!
//! https://git.musl-libc.org/cgit/musl/tree/src/math/tanf.c
//! https://git.musl-libc.org/cgit/musl/tree/src/math/tan.c
//! https://git.musl-libc.org/cgit/musl/tree/src/math/tanl.c
//! https://golang.org/src/math/tan.go
const std = @import("std");
@@ -10,10 +11,13 @@ const builtin = @import("builtin");
const math = std.math;
const mem = std.mem;
const expect = std.testing.expect;
const expectApproxEqAbs = std.testing.expectApproxEqAbs;
const kernel = @import("trig.zig");
const rem_pio2 = @import("rem_pio2.zig").rem_pio2;
const rem_pio2f = @import("rem_pio2f.zig").rem_pio2f;
const rem_pio2l = @import("rem_pio2l.zig").rem_pio2l;
const utils = @import("math_utils.zig");
const arch = builtin.cpu.arch;
const compiler_rt = @import("../compiler_rt.zig");
@@ -116,14 +120,38 @@ pub fn tan(x: f64) callconv(.c) f64 {
return kernel.__tan(y[0], y[1], n & 1 != 0);
}
fn tanlGeneric(comptime T: type, x: T) T {
if (!(T == f80 or T == f128)) {
@compileError("`tanlGeneric` implemented only for `f80` and `f128`, got: " ++ T);
}
const se = utils.ldSignExponent(x) & 0x7fff;
if (se == 0x7fff) {
return x - x;
}
const pi_4 = 0.78539816339744830962;
if (@abs(x) < pi_4) {
if (se < 0x3fff - math.floatMantissaBits(T) / 2) {
if (compiler_rt.want_float_exceptions) {
mem.doNotOptimizeAway(if (se == 0) x * 0x1p-120 else x + 0x1p120);
}
return x;
}
return kernel.__tanl(T, x, 0.0, 0);
}
var y: [2]T = undefined;
const n = rem_pio2l(T, x, &y);
return kernel.__tanl(T, y[0], y[1], n & 1);
}
pub fn __tanx(x: f80) callconv(.c) f80 {
// TODO: more efficient implementation
return @floatCast(tanq(x));
return tanlGeneric(f80, x);
}
pub fn tanq(x: f128) callconv(.c) f128 {
// TODO: more correct implementation
return tan(@floatCast(x));
return tanlGeneric(f128, x);
}
pub fn tanl(x: c_longdouble) callconv(.c) c_longdouble {
@@ -137,45 +165,80 @@ pub fn tanl(x: c_longdouble) callconv(.c) c_longdouble {
}
}
test "tan" {
try expect(tan(@as(f32, 0.0)) == tanf(0.0));
try expect(tan(@as(f64, 0.0)) == tan(0.0));
}
test "tan32" {
fn testTanNormal(comptime T: type) !void {
const f = switch (T) {
f32 => tanf,
f64 => tan,
else => @compileError("unimplemented"),
};
const epsilon = 0.00001;
try expect(math.approxEqAbs(f32, tanf(0.0), 0.0, epsilon));
try expect(math.approxEqAbs(f32, tanf(0.2), 0.202710, epsilon));
try expect(math.approxEqAbs(f32, tanf(0.8923), 1.240422, epsilon));
try expect(math.approxEqAbs(f32, tanf(1.5), 14.101420, epsilon));
try expect(math.approxEqAbs(f32, tanf(37.45), -0.254397, epsilon));
try expect(math.approxEqAbs(f32, tanf(89.123), 2.285852, epsilon));
try expectApproxEqAbs(@as(T, 0.0), f(0.0), epsilon);
try expectApproxEqAbs(@as(T, 0.202710), f(0.2), epsilon);
try expectApproxEqAbs(@as(T, 1.240422), f(0.8923), epsilon);
try expectApproxEqAbs(@as(T, 14.101420), f(1.5), epsilon);
try expectApproxEqAbs(@as(T, -0.254397), f(37.45), epsilon);
try expectApproxEqAbs(@as(T, 2.285837), f(89.123), epsilon);
}
test "tan64" {
const epsilon = 0.000001;
fn testTanSpecial(comptime T: type) !void {
const f = switch (T) {
f32 => tanf,
f64 => tan,
f80 => __tanx,
f128 => tanq,
else => @compileError("unimplemented"),
};
try expect(math.approxEqAbs(f64, tan(0.0), 0.0, epsilon));
try expect(math.approxEqAbs(f64, tan(0.2), 0.202710, epsilon));
try expect(math.approxEqAbs(f64, tan(0.8923), 1.240422, epsilon));
try expect(math.approxEqAbs(f64, tan(1.5), 14.101420, epsilon));
try expect(math.approxEqAbs(f64, tan(37.45), -0.254397, epsilon));
try expect(math.approxEqAbs(f64, tan(89.123), 2.2858376, epsilon));
try expect(math.isPositiveZero(f(0.0)));
try expect(math.isNegativeZero(f(-0.0)));
try expect(math.isNan(f(math.inf(f32))));
try expect(math.isNan(f(-math.inf(f32))));
try expect(math.isNan(f(math.nan(f32))));
}
test "tan32.normal" {
try testTanNormal(f32);
}
test "tan64.normal" {
try testTanNormal(f64);
}
test "tan80.normal" {
const epsilon = math.floatEps(f80);
try expectApproxEqAbs(@as(f80, 0.0), __tanx(0.0), epsilon);
try expectApproxEqAbs(@as(f80, 0.2027100355086724833213582716475345), __tanx(0.2), epsilon);
try expectApproxEqAbs(@as(f80, 1.2404217445497097995561220131857544), __tanx(0.8923), epsilon);
try expectApproxEqAbs(@as(f80, 14.10141994717171938764), __tanx(1.5), epsilon);
try expectApproxEqAbs(@as(f80, -0.25439607116885656232), __tanx(37.45), epsilon);
try expectApproxEqAbs(@as(f80, 2.2858376251355320963), __tanx(89.123), epsilon);
}
test "tan128.normal" {
const epsilon = math.floatEps(f128);
try expectApproxEqAbs(@as(f128, 0.0), tanq(0.0), epsilon);
try expectApproxEqAbs(@as(f128, 0.2027100355086724833213582716475345), tanq(0.2), epsilon);
try expectApproxEqAbs(@as(f128, 1.2404217445497097995561220131857544), tanq(0.8923), epsilon);
try expectApproxEqAbs(@as(f128, 14.101419947171719387646083651987755), tanq(1.5), epsilon);
try expectApproxEqAbs(@as(f128, -0.2543960711688565630469573224504774), tanq(37.45), epsilon);
try expectApproxEqAbs(@as(f128, 2.2858376251355321074066028114094292), tanq(89.123), epsilon);
}
test "tan32.special" {
try expect(tanf(0.0) == 0.0);
try expect(tanf(-0.0) == -0.0);
try expect(math.isNan(tanf(math.inf(f32))));
try expect(math.isNan(tanf(-math.inf(f32))));
try expect(math.isNan(tanf(math.nan(f32))));
try testTanSpecial(f32);
}
test "tan64.special" {
try expect(tan(0.0) == 0.0);
try expect(tan(-0.0) == -0.0);
try expect(math.isNan(tan(math.inf(f64))));
try expect(math.isNan(tan(-math.inf(f64))));
try expect(math.isNan(tan(math.nan(f64))));
try testTanSpecial(f64);
}
test "tan80.special" {
try testTanSpecial(f80);
}
test "tan128.special" {
try testTanSpecial(f128);
}
+127
View File
@@ -7,6 +7,7 @@
// https://git.musl-libc.org/cgit/musl/tree/src/math/__sindf.c
// https://git.musl-libc.org/cgit/musl/tree/src/math/__tand.c
// https://git.musl-libc.org/cgit/musl/tree/src/math/__tandf.c
// https://git.musl-libc.org/cgit/musl/tree/src/math/__tanl.c
/// kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
/// Input x is assumed to be bounded by ~pi/4 in magnitude.
@@ -271,3 +272,129 @@ pub fn __tandf(x: f64, odd: bool) f32 {
const r0 = (x + s * u) + (s * w) * (t + w * r);
return @floatCast(if (odd) -1.0 / r0 else r0);
}
pub fn __tanl(comptime T: type, x_: T, y_: T, odd: i32) T {
var x = x_;
var y = y_;
const impl = switch (T) {
f80 => struct {
const pio4: T = 0.785398163397448309628;
const pio4lo: T = -1.25413940316708300586e-20;
const T3: T = 0.333333333333333333180;
const T5: T = 0.133333333333333372290;
const T7: T = 0.0539682539682504975744;
const T9: f64 = 0.021869488536312216;
const T11: f64 = 0.0088632355256619590;
const T13: f64 = 0.0035921281113786528;
const T15: f64 = 0.0014558334756312418;
const T17: f64 = 0.00059003538700862256;
const T19: f64 = 0.00023907843576635544;
const T21: f64 = 0.000097154625656538905;
const T23: f64 = 0.000038440165747303162;
const T25: f64 = 0.000018082171885432524;
const T27: f64 = 0.0000024196006108814377;
const T29: f64 = 0.0000078293456938132840;
const T31: f64 = -0.0000032609076735050182;
const T33: f64 = 0.0000023261313142559411;
inline fn rpoly(w: T) T {
return T5 + w * (T9 + w * (T13 + w * (T17 + w * (T21 +
w * (T25 + w * (T29 + w * T33))))));
}
inline fn vpoly(w: T) T {
return T7 + w * (T11 + w * (T15 + w * (T19 + w * (T23 +
w * (T27 + w * T31)))));
}
},
f128 => struct {
const pio4: T = 0x1.921fb54442d18469898cc51701b8p-1;
const pio4lo: T = 0x1.cd129024e088a67cc74020bbea60p-116;
const T3: T = 0x1.5555555555555555555555555553p-2;
const T5: T = 0x1.1111111111111111111111111eb5p-3;
const T7: T = 0x1.ba1ba1ba1ba1ba1ba1ba1b694cd6p-5;
const T9: T = 0x1.664f4882c10f9f32d6bbe09d8bcdp-6;
const T11: T = 0x1.226e355e6c23c8f5b4f5762322eep-7;
const T13: T = 0x1.d6d3d0e157ddfb5fed8e84e27b37p-9;
const T15: T = 0x1.7da36452b75e2b5fce9ee7c2c92ep-10;
const T17: T = 0x1.355824803674477dfcf726649efep-11;
const T19: T = 0x1.f57d7734d1656e0aceb716f614c2p-13;
const T21: T = 0x1.967e18afcb180ed942dfdc518d6cp-14;
const T23: T = 0x1.497d8eea21e95bc7e2aa79b9f2cdp-15;
const T25: T = 0x1.0b132d39f055c81be49eff7afd50p-16;
const T27: T = 0x1.b0f72d33eff7bfa2fbc1059d90b6p-18;
const T29: T = 0x1.5ef2daf21d1113df38d0fbc00267p-19;
const T31: T = 0x1.1c77d6eac0234988cdaa04c96626p-20;
const T33: T = 0x1.cd2a5a292b180e0bdd701057dfe3p-22;
const T35: T = 0x1.75c7357d0298c01a31d0a6f7d518p-23;
const T37: T = 0x1.2f3190f4718a9a520f98f50081fcp-24;
const T39: f64 = 0.000000028443389121318352;
const T41: f64 = 0.000000011981013102001973;
const T43: f64 = 0.0000000038303578044958070;
const T45: f64 = 0.0000000034664378216909893;
const T47: f64 = -0.0000000015090641701997785;
const T49: f64 = 0.0000000029449552300483952;
const T51: f64 = -0.0000000022006995706097711;
const T53: f64 = 0.0000000015468200913196612;
const T55: f64 = -0.00000000061311613386849674;
const T57: f64 = 1.4912469681508012e-10;
inline fn rpoly(w: T) T {
return T5 + w * (T9 + w * (T13 + w * (T17 + w * (T21 +
w * (T25 + w * (T29 + w * (T33 + w * (T37 + w * (T41 +
w * (T45 + w * (T49 + w * (T53 + w * T57))))))))))));
}
inline fn vpoly(w: T) T {
return T7 + w * (T11 + w * (T15 + w * (T19 + w * (T23 +
w * (T27 + w * (T31 + w * (T35 + w * (T39 + w * (T43 +
w * (T47 + w * (T51 + w * T55)))))))))));
}
},
else => @compileError("__tanl supports only f80 and f128, got: " ++ @typeName(T)),
};
const big = @abs(x) >= 0.67434;
var sign: i8 = 0;
if (big) {
if (x < 0) {
sign = -1;
x = -x;
y = -y;
}
x = (impl.pio4 - x) + (impl.pio4lo - y);
y = 0.0;
}
var z = x * x;
var w = z * z;
var r = impl.rpoly(w);
var v = z * impl.vpoly(w);
var s = z * x;
r = y + z * (s * (r + v) + y) + impl.T3 * s;
w = x + r;
if (big) {
s = @as(T, @floatFromInt(1 - 2 * odd));
v = s - 2.0 * (x + (r - w * w / (w + s)));
return if (sign == -1) -v else v;
}
if (odd == 0) {
return w;
}
// if allow error up to 2 ulp, simply return
// -1.0 / (x+r) here
//
// compute -1.0 / (x+r) accurately
z = w + 0x1p32 - 0x1p32;
v = r - (z - x);
const a = -1.0 / w;
const t = a + 0x1p32 - 0x1p32;
s = 1.0 + t * z;
return t + a * (s + t * v);
}