From ffd6f6cc6e05e84a4e029b02ff13a3e84cbf1aa4 Mon Sep 17 00:00:00 2001
From: mihael <hi@mihaelm.com>
Date: Tue, 24 Mar 2026 01:11:56 +0100
Subject: [PATCH] `libzigc/math`: Implement more precise `tanl` in
 `compiler_rt`

The logic was more or less ported from `musl`, with small adjustments
where it was convenient. The 'internal' `__tanl` function was implemented
in the `trig.zig` module along with other 'internal' trigonometric functions.

Now, the `tanl` implementation is precise enough to pass all the relevant `libc-test` suite tests:

```
$ ./build/stage3/bin/zig build -p stage4 -Denable-llvm -Dno-lib

$ stage4/bin/zig build test-libc -Dlibc-test-path=<LIBC-TEST-PATH> -Dtest-filter=tanl -fqemu -fwasmtime --summary line
Build Summary: 553/553 steps succeeded
```

The unit tests were also extended to include cases for `f80` and `f128`, and they're passing.
---
 lib/compiler_rt/tan.zig  | 131 +++++++++++++++++++++++++++++----------
 lib/compiler_rt/trig.zig | 127 +++++++++++++++++++++++++++++++++++++
 2 files changed, 224 insertions(+), 34 deletions(-)
diff --git a/lib/compiler_rt/tan.zig b/lib/compiler_rt/tan.zig
index 822ffd4dec..6534ca3c27 100644
--- a/lib/compiler_rt/tan.zig
+++ b/lib/compiler_rt/tan.zig
@@ -3,6 +3,7 @@
 //!
 //! https://git.musl-libc.org/cgit/musl/tree/src/math/tanf.c
 //! https://git.musl-libc.org/cgit/musl/tree/src/math/tan.c
+//! https://git.musl-libc.org/cgit/musl/tree/src/math/tanl.c
 //! https://golang.org/src/math/tan.go
 
 const std = @import("std");
@@ -10,10 +11,13 @@ const builtin = @import("builtin");
 const math = std.math;
 const mem = std.mem;
 const expect = std.testing.expect;
+const expectApproxEqAbs = std.testing.expectApproxEqAbs;
 
 const kernel = @import("trig.zig");
 const rem_pio2 = @import("rem_pio2.zig").rem_pio2;
 const rem_pio2f = @import("rem_pio2f.zig").rem_pio2f;
+const rem_pio2l = @import("rem_pio2l.zig").rem_pio2l;
+const utils = @import("math_utils.zig");
 
 const arch = builtin.cpu.arch;
 const compiler_rt = @import("../compiler_rt.zig");
@@ -116,14 +120,38 @@ pub fn tan(x: f64) callconv(.c) f64 {
     return kernel.__tan(y[0], y[1], n & 1 != 0);
 }
 
+fn tanlGeneric(comptime T: type, x: T) T {
+    if (!(T == f80 or T == f128)) {
+        @compileError("`tanlGeneric` implemented only for `f80` and `f128`, got: " ++ T);
+    }
+
+    const se = utils.ldSignExponent(x) & 0x7fff;
+    if (se == 0x7fff) {
+        return x - x;
+    }
+
+    const pi_4 = 0.78539816339744830962;
+    if (@abs(x) < pi_4) {
+        if (se < 0x3fff - math.floatMantissaBits(T) / 2) {
+            if (compiler_rt.want_float_exceptions) {
+                mem.doNotOptimizeAway(if (se == 0) x * 0x1p-120 else x + 0x1p120);
+            }
+            return x;
+        }
+        return kernel.__tanl(T, x, 0.0, 0);
+    }
+
+    var y: [2]T = undefined;
+    const n = rem_pio2l(T, x, &y);
+    return kernel.__tanl(T, y[0], y[1], n & 1);
+}
+
 pub fn __tanx(x: f80) callconv(.c) f80 {
-    // TODO: more efficient implementation
-    return @floatCast(tanq(x));
+    return tanlGeneric(f80, x);
 }
 
 pub fn tanq(x: f128) callconv(.c) f128 {
-    // TODO: more correct implementation
-    return tan(@floatCast(x));
+    return tanlGeneric(f128, x);
 }
 
 pub fn tanl(x: c_longdouble) callconv(.c) c_longdouble {
@@ -137,45 +165,80 @@ pub fn tanl(x: c_longdouble) callconv(.c) c_longdouble {
     }
 }
 
-test "tan" {
-    try expect(tan(@as(f32, 0.0)) == tanf(0.0));
-    try expect(tan(@as(f64, 0.0)) == tan(0.0));
-}
-
-test "tan32" {
+fn testTanNormal(comptime T: type) !void {
+    const f = switch (T) {
+        f32 => tanf,
+        f64 => tan,
+        else => @compileError("unimplemented"),
+    };
     const epsilon = 0.00001;
 
-    try expect(math.approxEqAbs(f32, tanf(0.0), 0.0, epsilon));
-    try expect(math.approxEqAbs(f32, tanf(0.2), 0.202710, epsilon));
-    try expect(math.approxEqAbs(f32, tanf(0.8923), 1.240422, epsilon));
-    try expect(math.approxEqAbs(f32, tanf(1.5), 14.101420, epsilon));
-    try expect(math.approxEqAbs(f32, tanf(37.45), -0.254397, epsilon));
-    try expect(math.approxEqAbs(f32, tanf(89.123), 2.285852, epsilon));
+    try expectApproxEqAbs(@as(T, 0.0), f(0.0), epsilon);
+    try expectApproxEqAbs(@as(T, 0.202710), f(0.2), epsilon);
+    try expectApproxEqAbs(@as(T, 1.240422), f(0.8923), epsilon);
+    try expectApproxEqAbs(@as(T, 14.101420), f(1.5), epsilon);
+    try expectApproxEqAbs(@as(T, -0.254397), f(37.45), epsilon);
+    try expectApproxEqAbs(@as(T, 2.285837), f(89.123), epsilon);
 }
 
-test "tan64" {
-    const epsilon = 0.000001;
+fn testTanSpecial(comptime T: type) !void {
+    const f = switch (T) {
+        f32 => tanf,
+        f64 => tan,
+        f80 => __tanx,
+        f128 => tanq,
+        else => @compileError("unimplemented"),
+    };
 
-    try expect(math.approxEqAbs(f64, tan(0.0), 0.0, epsilon));
-    try expect(math.approxEqAbs(f64, tan(0.2), 0.202710, epsilon));
-    try expect(math.approxEqAbs(f64, tan(0.8923), 1.240422, epsilon));
-    try expect(math.approxEqAbs(f64, tan(1.5), 14.101420, epsilon));
-    try expect(math.approxEqAbs(f64, tan(37.45), -0.254397, epsilon));
-    try expect(math.approxEqAbs(f64, tan(89.123), 2.2858376, epsilon));
+    try expect(math.isPositiveZero(f(0.0)));
+    try expect(math.isNegativeZero(f(-0.0)));
+    try expect(math.isNan(f(math.inf(f32))));
+    try expect(math.isNan(f(-math.inf(f32))));
+    try expect(math.isNan(f(math.nan(f32))));
+}
+
+test "tan32.normal" {
+    try testTanNormal(f32);
+}
+
+test "tan64.normal" {
+    try testTanNormal(f64);
+}
+
+test "tan80.normal" {
+    const epsilon = math.floatEps(f80);
+
+    try expectApproxEqAbs(@as(f80, 0.0), __tanx(0.0), epsilon);
+    try expectApproxEqAbs(@as(f80, 0.2027100355086724833213582716475345), __tanx(0.2), epsilon);
+    try expectApproxEqAbs(@as(f80, 1.2404217445497097995561220131857544), __tanx(0.8923), epsilon);
+    try expectApproxEqAbs(@as(f80, 14.10141994717171938764), __tanx(1.5), epsilon);
+    try expectApproxEqAbs(@as(f80, -0.25439607116885656232), __tanx(37.45), epsilon);
+    try expectApproxEqAbs(@as(f80, 2.2858376251355320963), __tanx(89.123), epsilon);
+}
+
+test "tan128.normal" {
+    const epsilon = math.floatEps(f128);
+
+    try expectApproxEqAbs(@as(f128, 0.0), tanq(0.0), epsilon);
+    try expectApproxEqAbs(@as(f128, 0.2027100355086724833213582716475345), tanq(0.2), epsilon);
+    try expectApproxEqAbs(@as(f128, 1.2404217445497097995561220131857544), tanq(0.8923), epsilon);
+    try expectApproxEqAbs(@as(f128, 14.101419947171719387646083651987755), tanq(1.5), epsilon);
+    try expectApproxEqAbs(@as(f128, -0.2543960711688565630469573224504774), tanq(37.45), epsilon);
+    try expectApproxEqAbs(@as(f128, 2.2858376251355321074066028114094292), tanq(89.123), epsilon);
 }
 
 test "tan32.special" {
-    try expect(tanf(0.0) == 0.0);
-    try expect(tanf(-0.0) == -0.0);
-    try expect(math.isNan(tanf(math.inf(f32))));
-    try expect(math.isNan(tanf(-math.inf(f32))));
-    try expect(math.isNan(tanf(math.nan(f32))));
+    try testTanSpecial(f32);
 }
 
 test "tan64.special" {
-    try expect(tan(0.0) == 0.0);
-    try expect(tan(-0.0) == -0.0);
-    try expect(math.isNan(tan(math.inf(f64))));
-    try expect(math.isNan(tan(-math.inf(f64))));
-    try expect(math.isNan(tan(math.nan(f64))));
+    try testTanSpecial(f64);
+}
+
+test "tan80.special" {
+    try testTanSpecial(f80);
+}
+
+test "tan128.special" {
+    try testTanSpecial(f128);
 }
diff --git a/lib/compiler_rt/trig.zig b/lib/compiler_rt/trig.zig
index 616dcf53f2..4d1690e4fe 100644
--- a/lib/compiler_rt/trig.zig
+++ b/lib/compiler_rt/trig.zig
@@ -7,6 +7,7 @@
 // https://git.musl-libc.org/cgit/musl/tree/src/math/__sindf.c
 // https://git.musl-libc.org/cgit/musl/tree/src/math/__tand.c
 // https://git.musl-libc.org/cgit/musl/tree/src/math/__tandf.c
+// https://git.musl-libc.org/cgit/musl/tree/src/math/__tanl.c
 
 /// kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
 /// Input x is assumed to be bounded by ~pi/4 in magnitude.
@@ -271,3 +272,129 @@ pub fn __tandf(x: f64, odd: bool) f32 {
     const r0 = (x + s * u) + (s * w) * (t + w * r);
     return @floatCast(if (odd) -1.0 / r0 else r0);
 }
+
+pub fn __tanl(comptime T: type, x_: T, y_: T, odd: i32) T {
+    var x = x_;
+    var y = y_;
+    const impl = switch (T) {
+        f80 => struct {
+            const pio4: T = 0.785398163397448309628;
+            const pio4lo: T = -1.25413940316708300586e-20;
+
+            const T3: T = 0.333333333333333333180;
+            const T5: T = 0.133333333333333372290;
+            const T7: T = 0.0539682539682504975744;
+            const T9: f64 = 0.021869488536312216;
+            const T11: f64 = 0.0088632355256619590;
+            const T13: f64 = 0.0035921281113786528;
+            const T15: f64 = 0.0014558334756312418;
+            const T17: f64 = 0.00059003538700862256;
+            const T19: f64 = 0.00023907843576635544;
+            const T21: f64 = 0.000097154625656538905;
+            const T23: f64 = 0.000038440165747303162;
+            const T25: f64 = 0.000018082171885432524;
+            const T27: f64 = 0.0000024196006108814377;
+            const T29: f64 = 0.0000078293456938132840;
+            const T31: f64 = -0.0000032609076735050182;
+            const T33: f64 = 0.0000023261313142559411;
+
+            inline fn rpoly(w: T) T {
+                return T5 + w * (T9 + w * (T13 + w * (T17 + w * (T21 +
+                    w * (T25 + w * (T29 + w * T33))))));
+            }
+
+            inline fn vpoly(w: T) T {
+                return T7 + w * (T11 + w * (T15 + w * (T19 + w * (T23 +
+                    w * (T27 + w * T31)))));
+            }
+        },
+        f128 => struct {
+            const pio4: T = 0x1.921fb54442d18469898cc51701b8p-1;
+            const pio4lo: T = 0x1.cd129024e088a67cc74020bbea60p-116;
+
+            const T3: T = 0x1.5555555555555555555555555553p-2;
+            const T5: T = 0x1.1111111111111111111111111eb5p-3;
+            const T7: T = 0x1.ba1ba1ba1ba1ba1ba1ba1b694cd6p-5;
+            const T9: T = 0x1.664f4882c10f9f32d6bbe09d8bcdp-6;
+            const T11: T = 0x1.226e355e6c23c8f5b4f5762322eep-7;
+            const T13: T = 0x1.d6d3d0e157ddfb5fed8e84e27b37p-9;
+            const T15: T = 0x1.7da36452b75e2b5fce9ee7c2c92ep-10;
+            const T17: T = 0x1.355824803674477dfcf726649efep-11;
+            const T19: T = 0x1.f57d7734d1656e0aceb716f614c2p-13;
+            const T21: T = 0x1.967e18afcb180ed942dfdc518d6cp-14;
+            const T23: T = 0x1.497d8eea21e95bc7e2aa79b9f2cdp-15;
+            const T25: T = 0x1.0b132d39f055c81be49eff7afd50p-16;
+            const T27: T = 0x1.b0f72d33eff7bfa2fbc1059d90b6p-18;
+            const T29: T = 0x1.5ef2daf21d1113df38d0fbc00267p-19;
+            const T31: T = 0x1.1c77d6eac0234988cdaa04c96626p-20;
+            const T33: T = 0x1.cd2a5a292b180e0bdd701057dfe3p-22;
+            const T35: T = 0x1.75c7357d0298c01a31d0a6f7d518p-23;
+            const T37: T = 0x1.2f3190f4718a9a520f98f50081fcp-24;
+            const T39: f64 = 0.000000028443389121318352;
+            const T41: f64 = 0.000000011981013102001973;
+            const T43: f64 = 0.0000000038303578044958070;
+            const T45: f64 = 0.0000000034664378216909893;
+            const T47: f64 = -0.0000000015090641701997785;
+            const T49: f64 = 0.0000000029449552300483952;
+            const T51: f64 = -0.0000000022006995706097711;
+            const T53: f64 = 0.0000000015468200913196612;
+            const T55: f64 = -0.00000000061311613386849674;
+            const T57: f64 = 1.4912469681508012e-10;
+
+            inline fn rpoly(w: T) T {
+                return T5 + w * (T9 + w * (T13 + w * (T17 + w * (T21 +
+                    w * (T25 + w * (T29 + w * (T33 + w * (T37 + w * (T41 +
+                        w * (T45 + w * (T49 + w * (T53 + w * T57))))))))))));
+            }
+
+            inline fn vpoly(w: T) T {
+                return T7 + w * (T11 + w * (T15 + w * (T19 + w * (T23 +
+                    w * (T27 + w * (T31 + w * (T35 + w * (T39 + w * (T43 +
+                        w * (T47 + w * (T51 + w * T55)))))))))));
+            }
+        },
+        else => @compileError("__tanl supports only f80 and f128, got: " ++ @typeName(T)),
+    };
+
+    const big = @abs(x) >= 0.67434;
+    var sign: i8 = 0;
+
+    if (big) {
+        if (x < 0) {
+            sign = -1;
+            x = -x;
+            y = -y;
+        }
+        x = (impl.pio4 - x) + (impl.pio4lo - y);
+        y = 0.0;
+    }
+
+    var z = x * x;
+    var w = z * z;
+    var r = impl.rpoly(w);
+    var v = z * impl.vpoly(w);
+    var s = z * x;
+    r = y + z * (s * (r + v) + y) + impl.T3 * s;
+    w = x + r;
+
+    if (big) {
+        s = @as(T, @floatFromInt(1 - 2 * odd));
+        v = s - 2.0 * (x + (r - w * w / (w + s)));
+        return if (sign == -1) -v else v;
+    }
+
+    if (odd == 0) {
+        return w;
+    }
+
+    // if allow error up to 2 ulp, simply return
+    // -1.0 / (x+r) here
+    //
+    // compute -1.0 / (x+r) accurately
+    z = w + 0x1p32 - 0x1p32;
+    v = r - (z - x);
+    const a = -1.0 / w;
+    const t = a + 0x1p32 - 0x1p32;
+    s = 1.0 + t * z;
+    return t + a * (s + t * v);
+}