math: Add expf() implementation from musl

Signed-off-by: Asahi Lina <lina@asahilina.net>
2025-02-18 22:58:32 +00:00 · 2022-11-30 17:28:25 +09:00 · 2022-11-30 17:28:25 +09:00 · 057151cb21
commit 057151cb21
parent 87fa247d9b
8 changed files with 470 additions and 7 deletions
--- a/28
+++ b/28
@ -33,14 +33,16 @@ CLANG_FORMAT := clang-format
 EXTRA_CFLAGS ?= -Wstack-usage=1024
 endif

-CFLAGS := -O2 -Wall -g -Wundef -Werror=strict-prototypes -fno-common -fno-PIE \
+BASE_CFLAGS := -O2 -Wall -g -Wundef -Werror=strict-prototypes -fno-common -fno-PIE \
 	-Werror=implicit-function-declaration -Werror=implicit-int \
 	-Wsign-compare -Wunused-parameter -Wno-multichar \
 	-ffreestanding -fpic -ffunction-sections -fdata-sections \
 	-nostdinc -isystem $(shell $(CC) -print-file-name=include) -isystem sysinc \
-	-fno-stack-protector -mgeneral-regs-only -mstrict-align -march=armv8.2-a \
+	-fno-stack-protector -mstrict-align -march=armv8.2-a \
 	$(EXTRA_CFLAGS)

+CFLAGS := $(BASE_CFLAGS) -mgeneral-regs-only
+
 CFG :=
 ifeq ($(RELEASE),1)
 CFG += RELEASE
@ -127,7 +129,13 @@ OBJECTS := \
 	wdt.o \
 	$(MINILZLIB_OBJECTS) $(TINF_OBJECTS) $(DLMALLOC_OBJECTS) $(LIBFDT_OBJECTS) $(RUST_LIBS)

+FP_OBJECTS := \
+	math/expf.o \
+	math/exp2f_data.o \
+
 BUILD_OBJS := $(patsubst %,build/%,$(OBJECTS))
+BUILD_FP_OBJS := $(patsubst %,build/%,$(FP_OBJECTS))
+BUILD_ALL_OBJS := $(BUILD_OBJS) $(BUILD_FP_OBJS)
 NAME := m1n1
 TARGET := m1n1.macho
 TARGET_RAW := m1n1.bin
@ -139,7 +147,7 @@ all: update_tag update_cfg build/$(TARGET) build/$(TARGET_RAW)
 clean:
 	rm -rf build/*
 format:
-	$(CLANG_FORMAT) -i src/*.c src/*.h sysinc/*.h
+	$(CLANG_FORMAT) -i src/*.c src/math/*.c src/*.h src/math/*.h sysinc/*.h
 format-check:
 	$(CLANG_FORMAT) --dry-run --Werror src/*.c src/*.h sysinc/*.h
 rustfmt:
@ -160,6 +168,12 @@ build/%.o: src/%.S
 	@mkdir -p "$(dir $@)"
 	@$(AS) -c $(CFLAGS) -MMD -MF $(DEPDIR)/$(*F).d -MQ "$@" -MP -o $@ $<

+$(BUILD_FP_OBJS): build/%.o: src/%.c
+	@echo "  CC[FP]  $@"
+	@mkdir -p $(DEPDIR)
+	@mkdir -p "$(dir $@)"
+	@$(CC) -c $(BASE_CFLAGS) -MMD -MF $(DEPDIR)/$(*F).d -MQ "$@" -MP -o $@ $<
+
 build/%.o: src/%.c
 	@echo "  CC    $@"
 	@mkdir -p $(DEPDIR)
@ -170,13 +184,13 @@ build/%.o: src/%.c
 invoke_cc:
 	@$(CC) -c $(CFLAGS) -Isrc -o $(OBJFILE) $(CFILE)

-build/$(NAME).elf: $(BUILD_OBJS) m1n1.ld
+build/$(NAME).elf: $(BUILD_ALL_OBJS) m1n1.ld
 	@echo "  LD    $@"
-	@$(LD) -T m1n1.ld $(LDFLAGS) -o $@ $(BUILD_OBJS)
+	@$(LD) -T m1n1.ld $(LDFLAGS) -o $@ $(BUILD_ALL_OBJS)

-build/$(NAME)-raw.elf: $(BUILD_OBJS) m1n1-raw.ld
+build/$(NAME)-raw.elf: $(BUILD_ALL_OBJS) m1n1-raw.ld
 	@echo "  LDRAW $@"
-	@$(LD) -T m1n1-raw.ld $(LDFLAGS) -o $@ $(BUILD_OBJS)
+	@$(LD) -T m1n1-raw.ld $(LDFLAGS) -o $@ $(BUILD_ALL_OBJS)

 build/$(NAME).macho: build/$(NAME).elf
 	@echo "  MACHO $@"
--- a/README.md
+++ b/README.md
@ -132,4 +132,7 @@ licensed under the [OFL-1.1](3rdparty_licenses/LICENSE.OFL-1.1) license and copy
 m1n1 embeds portions of the [dwc3 usb linux driver](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/usb/dwc3/core.h?id=7bc5a6ba369217e0137833f5955cf0b0f08b0712), which was [BSD-or-GPLv2 dual-licensed](3rdparty_licenses/LICENSE.BSD-3.dwc3) and copyright
 * Copyright (C) 2010-2011 Texas Instruments Incorporated - http://www.ti.com

+m1n1 embeds portions of [musl-libc](https://musl.libc.org/)'s floating point library, which are MIT licensed and copyright
+* Copyright (c) 2017-2018, Arm Limited.
+
 m1n1 embeds some rust crates. Licenses can be found in the vendor directory for every crate.
--- a/src/math/exp2f_data.c
+++ b/src/math/exp2f_data.c
@ -0,0 +1,42 @@
+/*
+ * Shared data between expf, exp2f and powf.
+ *
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "exp2f_data.h"
+
+#define N (1 << EXP2F_TABLE_BITS)
+
+const struct exp2f_data __exp2f_data = {
+    /* tab[i] = uint(2^(i/N)) - (i << 52-BITS)
+       used for computing 2^(k/N) for an int |k| < 150 N as
+       double(tab[k%N] + (k << 52-BITS)) */
+    .tab =
+        {
+            0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f, 0x3fef9301d0125b51,
+            0x3fef72b83c7d517b, 0x3fef54873168b9aa, 0x3fef387a6e756238, 0x3fef1e9df51fdee1,
+            0x3fef06fe0a31b715, 0x3feef1a7373aa9cb, 0x3feedea64c123422, 0x3feece086061892d,
+            0x3feebfdad5362a27, 0x3feeb42b569d4f82, 0x3feeab07dd485429, 0x3feea47eb03a5585,
+            0x3feea09e667f3bcd, 0x3fee9f75e8ec5f74, 0x3feea11473eb0187, 0x3feea589994cce13,
+            0x3feeace5422aa0db, 0x3feeb737b0cdc5e5, 0x3feec49182a3f090, 0x3feed503b23e255d,
+            0x3feee89f995ad3ad, 0x3feeff76f2fb5e47, 0x3fef199bdd85529c, 0x3fef3720dcef9069,
+            0x3fef5818dcfba487, 0x3fef7c97337b9b5f, 0x3fefa4afa2a490da, 0x3fefd0765b6e4540,
+        },
+    .shift_scaled = 0x1.8p+52 / N,
+    .poly =
+        {
+            0x1.c6af84b912394p-5,
+            0x1.ebfce50fac4f3p-3,
+            0x1.62e42ff0c52d6p-1,
+        },
+    .shift = 0x1.8p+52,
+    .invln2_scaled = 0x1.71547652b82fep+0 * N,
+    .poly_scaled =
+        {
+            0x1.c6af84b912394p-5 / N / N / N,
+            0x1.ebfce50fac4f3p-3 / N / N,
+            0x1.62e42ff0c52d6p-1 / N,
+        },
+};
--- a/src/math/exp2f_data.h
+++ b/src/math/exp2f_data.h
@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+#ifndef _EXP2F_DATA_H
+#define _EXP2F_DATA_H
+
+#include <stdint.h>
+
+/* Shared between expf, exp2f and powf.  */
+#define EXP2F_TABLE_BITS 5
+#define EXP2F_POLY_ORDER 3
+extern const struct exp2f_data {
+    uint64_t tab[1 << EXP2F_TABLE_BITS];
+    double shift_scaled;
+    double poly[EXP2F_POLY_ORDER];
+    double shift;
+    double invln2_scaled;
+    double poly_scaled[EXP2F_POLY_ORDER];
+} __exp2f_data;
+
+#endif
--- a/src/math/expf.c
+++ b/src/math/expf.c
@ -0,0 +1,83 @@
+/*
+ * Single-precision e^x function.
+ *
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include <math.h>
+#include <stdint.h>
+
+#include "exp2f_data.h"
+#include "libm.h"
+
+#define double_t double
+
+/*
+EXP2F_TABLE_BITS = 5
+EXP2F_POLY_ORDER = 3
+
+ULP error: 0.502 (nearest rounding.)
+Relative error: 1.69 * 2^-34 in [-ln2/64, ln2/64] (before rounding.)
+Wrong count: 170635 (all nearest rounding wrong results with fma.)
+Non-nearest ULP error: 1 (rounded ULP error)
+*/
+
+#define N       (1 << EXP2F_TABLE_BITS)
+#define InvLn2N __exp2f_data.invln2_scaled
+#define T       __exp2f_data.tab
+#define C       __exp2f_data.poly_scaled
+
+static inline uint32_t top12(float x)
+{
+    return asuint(x) >> 20;
+}
+
+float expf(float x)
+{
+    uint32_t abstop;
+    uint64_t ki, t;
+    double_t kd, xd, z, r, r2, y, s;
+
+    xd = (double_t)x;
+    abstop = top12(x) & 0x7ff;
+    if (predict_false(abstop >= top12(88.0f))) {
+        /* |x| >= 88 or x is nan.  */
+        if (asuint(x) == asuint(-INFINITY))
+            return 0.0f;
+        if (abstop >= top12(INFINITY))
+            return x + x;
+        if (x > 0x1.62e42ep6f) /* x > log(0x1p128) ~= 88.72 */
+            return __math_oflowf(0);
+        if (x < -0x1.9fe368p6f) /* x < log(0x1p-150) ~= -103.97 */
+            return __math_uflowf(0);
+    }
+
+    /* x*N/Ln2 = k + r with r in [-1/2, 1/2] and int k.  */
+    z = InvLn2N * xd;
+
+    /* Round and convert z to int, the result is in [-150*N, 128*N] and
+       ideally ties-to-even rule is used, otherwise the magnitude of r
+       can be bigger which gives larger approximation error.  */
+#if TOINT_INTRINSICS
+    kd = roundtoint(z);
+    ki = converttoint(z);
+#else
+#define SHIFT __exp2f_data.shift
+    kd = eval_as_double(z + SHIFT);
+    ki = asuint64(kd);
+    kd -= SHIFT;
+#endif
+    r = z - kd;
+
+    /* exp(x) = 2^(k/N) * 2^(r/N) ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
+    t = T[ki % N];
+    t += ki << (52 - EXP2F_TABLE_BITS);
+    s = asdouble(t);
+    z = C[0] * r + C[1];
+    r2 = r * r;
+    y = C[2] * r + 1;
+    y = z * r2 + y;
+    y = y * s;
+    return eval_as_float(y);
+}
--- a/src/math/libm.h
+++ b/src/math/libm.h
@ -0,0 +1,271 @@
+#ifndef _LIBM_H
+#define _LIBM_H
+
+#include <endian.h>
+#include <float.h>
+#include <math.h>
+#include <stdint.h>
+
+#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
+#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __LITTLE_ENDIAN
+union ldshape {
+    long double f;
+    struct {
+        uint64_t m;
+        uint16_t se;
+    } i;
+};
+#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __BIG_ENDIAN
+/* This is the m68k variant of 80-bit long double, and this definition only works
+ * on archs where the alignment requirement of uint64_t is <= 4. */
+union ldshape {
+    long double f;
+    struct {
+        uint16_t se;
+        uint16_t pad;
+        uint64_t m;
+    } i;
+};
+#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __LITTLE_ENDIAN
+union ldshape {
+    long double f;
+    struct {
+        uint64_t lo;
+        uint32_t mid;
+        uint16_t top;
+        uint16_t se;
+    } i;
+    struct {
+        uint64_t lo;
+        uint64_t hi;
+    } i2;
+};
+#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __BIG_ENDIAN
+union ldshape {
+    long double f;
+    struct {
+        uint16_t se;
+        uint16_t top;
+        uint32_t mid;
+        uint64_t lo;
+    } i;
+    struct {
+        uint64_t hi;
+        uint64_t lo;
+    } i2;
+};
+#else
+#error Unsupported long double representation
+#endif
+
+/* Support non-nearest rounding mode.  */
+#define WANT_ROUNDING 1
+/* Support signaling NaNs.  */
+#define WANT_SNAN 0
+
+#if WANT_SNAN
+#error SNaN is unsupported
+#else
+#define issignalingf_inline(x) 0
+#define issignaling_inline(x)  0
+#endif
+
+#ifndef TOINT_INTRINSICS
+#define TOINT_INTRINSICS 0
+#endif
+
+#if TOINT_INTRINSICS
+/* Round x to nearest int in all rounding modes, ties have to be rounded
+   consistently with converttoint so the results match.  If the result
+   would be outside of [-2^31, 2^31-1] then the semantics is unspecified.  */
+static double_t roundtoint(double_t);
+
+/* Convert x to nearest int in all rounding modes, ties have to be rounded
+   consistently with roundtoint.  If the result is not representible in an
+   int32_t then the semantics is unspecified.  */
+static int32_t converttoint(double_t);
+#endif
+
+/* Helps static branch prediction so hot path can be better optimized.  */
+#ifdef __GNUC__
+#define predict_true(x)  __builtin_expect(!!(x), 1)
+#define predict_false(x) __builtin_expect(x, 0)
+#else
+#define predict_true(x)  (x)
+#define predict_false(x) (x)
+#endif
+
+/* Evaluate an expression as the specified type. With standard excess
+   precision handling a type cast or assignment is enough (with
+   -ffloat-store an assignment is required, in old compilers argument
+   passing and return statement may not drop excess precision).  */
+
+static inline float eval_as_float(float x)
+{
+    float y = x;
+    return y;
+}
+
+static inline double eval_as_double(double x)
+{
+    double y = x;
+    return y;
+}
+
+/* fp_barrier returns its input, but limits code transformations
+   as if it had a side-effect (e.g. observable io) and returned
+   an arbitrary value.  */
+
+#ifndef fp_barrierf
+#define fp_barrierf fp_barrierf
+static inline float fp_barrierf(float x)
+{
+    volatile float y = x;
+    return y;
+}
+#endif
+
+#ifndef fp_barrier
+#define fp_barrier fp_barrier
+static inline double fp_barrier(double x)
+{
+    volatile double y = x;
+    return y;
+}
+#endif
+
+#ifndef fp_barrierl
+#define fp_barrierl fp_barrierl
+static inline long double fp_barrierl(long double x)
+{
+    volatile long double y = x;
+    return y;
+}
+#endif
+
+/* fp_force_eval ensures that the input value is computed when that's
+   otherwise unused.  To prevent the constant folding of the input
+   expression, an additional fp_barrier may be needed or a compilation
+   mode that does so (e.g. -frounding-math in gcc). Then it can be
+   used to evaluate an expression for its fenv side-effects only.   */
+
+#ifndef fp_force_evalf
+#define fp_force_evalf fp_force_evalf
+static inline void fp_force_evalf(float x)
+{
+    volatile float y;
+    y = x;
+    (void)y;
+}
+#endif
+
+#ifndef fp_force_eval
+#define fp_force_eval fp_force_eval
+static inline void fp_force_eval(double x)
+{
+    volatile double y;
+    y = x;
+    (void)y;
+}
+#endif
+
+#ifndef fp_force_evall
+#define fp_force_evall fp_force_evall
+static inline void fp_force_evall(long double x)
+{
+    volatile long double y;
+    y = x;
+    (void)y;
+}
+#endif
+
+#define FORCE_EVAL(x)                                                                              \
+    do {                                                                                           \
+        if (sizeof(x) == sizeof(float)) {                                                          \
+            fp_force_evalf(x);                                                                     \
+        } else if (sizeof(x) == sizeof(double)) {                                                  \
+            fp_force_eval(x);                                                                      \
+        } else {                                                                                   \
+            fp_force_evall(x);                                                                     \
+        }                                                                                          \
+    } while (0)
+
+#define asuint(f)                                                                                  \
+    ((union {                                                                                      \
+        float _f;                                                                                  \
+        uint32_t _i;                                                                               \
+    }){f})                                                                                         \
+        ._i
+#define asfloat(i)                                                                                 \
+    ((union {                                                                                      \
+        uint32_t _i;                                                                               \
+        float _f;                                                                                  \
+    }){i})                                                                                         \
+        ._f
+#define asuint64(f)                                                                                \
+    ((union {                                                                                      \
+        double _f;                                                                                 \
+        uint64_t _i;                                                                               \
+    }){f})                                                                                         \
+        ._i
+#define asdouble(i)                                                                                \
+    ((union {                                                                                      \
+        uint64_t _i;                                                                               \
+        double _f;                                                                                 \
+    }){i})                                                                                         \
+        ._f
+
+#define EXTRACT_WORDS(hi, lo, d)                                                                   \
+    do {                                                                                           \
+        uint64_t __u = asuint64(d);                                                                \
+        (hi) = __u >> 32;                                                                          \
+        (lo) = (uint32_t)__u;                                                                      \
+    } while (0)
+
+#define GET_HIGH_WORD(hi, d)                                                                       \
+    do {                                                                                           \
+        (hi) = asuint64(d) >> 32;                                                                  \
+    } while (0)
+
+#define GET_LOW_WORD(lo, d)                                                                        \
+    do {                                                                                           \
+        (lo) = (uint32_t)asuint64(d);                                                              \
+    } while (0)
+
+#define INSERT_WORDS(d, hi, lo)                                                                    \
+    do {                                                                                           \
+        (d) = asdouble(((uint64_t)(hi) << 32) | (uint32_t)(lo));                                   \
+    } while (0)
+
+#define SET_HIGH_WORD(d, hi) INSERT_WORDS(d, hi, (uint32_t)asuint64(d))
+
+#define SET_LOW_WORD(d, lo) INSERT_WORDS(d, asuint64(d) >> 32, lo)
+
+#define GET_FLOAT_WORD(w, d)                                                                       \
+    do {                                                                                           \
+        (w) = asuint(d);                                                                           \
+    } while (0)
+
+#define SET_FLOAT_WORD(d, w)                                                                       \
+    do {                                                                                           \
+        (d) = asfloat(w);                                                                          \
+    } while (0)
+
+/* error handling functions */
+
+static inline float __math_xflowf(uint32_t sign, float y)
+{
+    return eval_as_float(fp_barrierf(sign ? -y : y) * y);
+}
+
+static inline float __math_oflowf(uint32_t sign)
+{
+    return __math_xflowf(sign, 0x1p97f);
+}
+
+float __math_uflowf(uint32_t sign)
+{
+    return __math_xflowf(sign, 0x1p-95f);
+}
+
+#endif
--- a/sysinc/endian.h
+++ b/sysinc/endian.h
@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef ENDIAN_H
+#define ENDIAN_H
+
+#define __LITTLE_ENDIAN 1234
+#define __BIG_ENDIAN    4321
+#define __PDP_ENDIAN    3412
+
+#define __BYTE_ORDER __LITTLE_ENDIAN
+
+#endif
--- a/sysinc/math.h
+++ b/sysinc/math.h
@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef MATH_H
+#define MATH_H
+
+#if 100 * __GNUC__ + __GNUC_MINOR__ >= 303
+#define NAN      __builtin_nanf("")
+#define INFINITY __builtin_inff()
+#else
+#define NAN      (0.0f / 0.0f)
+#define INFINITY 1e5000f
+#endif
+
+float expf(float);
+
+#endif