math: Add expf() implementation from musl

Signed-off-by: Asahi Lina <lina@asahilina.net>
This commit is contained in:
Asahi Lina 2022-11-30 17:28:25 +09:00 committed by Hector Martin
parent 87fa247d9b
commit 057151cb21
8 changed files with 470 additions and 7 deletions

View file

@ -33,14 +33,16 @@ CLANG_FORMAT := clang-format
EXTRA_CFLAGS ?= -Wstack-usage=1024
endif
CFLAGS := -O2 -Wall -g -Wundef -Werror=strict-prototypes -fno-common -fno-PIE \
BASE_CFLAGS := -O2 -Wall -g -Wundef -Werror=strict-prototypes -fno-common -fno-PIE \
-Werror=implicit-function-declaration -Werror=implicit-int \
-Wsign-compare -Wunused-parameter -Wno-multichar \
-ffreestanding -fpic -ffunction-sections -fdata-sections \
-nostdinc -isystem $(shell $(CC) -print-file-name=include) -isystem sysinc \
-fno-stack-protector -mgeneral-regs-only -mstrict-align -march=armv8.2-a \
-fno-stack-protector -mstrict-align -march=armv8.2-a \
$(EXTRA_CFLAGS)
CFLAGS := $(BASE_CFLAGS) -mgeneral-regs-only
CFG :=
ifeq ($(RELEASE),1)
CFG += RELEASE
@ -127,7 +129,13 @@ OBJECTS := \
wdt.o \
$(MINILZLIB_OBJECTS) $(TINF_OBJECTS) $(DLMALLOC_OBJECTS) $(LIBFDT_OBJECTS) $(RUST_LIBS)
FP_OBJECTS := \
math/expf.o \
math/exp2f_data.o \
BUILD_OBJS := $(patsubst %,build/%,$(OBJECTS))
BUILD_FP_OBJS := $(patsubst %,build/%,$(FP_OBJECTS))
BUILD_ALL_OBJS := $(BUILD_OBJS) $(BUILD_FP_OBJS)
NAME := m1n1
TARGET := m1n1.macho
TARGET_RAW := m1n1.bin
@ -139,7 +147,7 @@ all: update_tag update_cfg build/$(TARGET) build/$(TARGET_RAW)
clean:
rm -rf build/*
format:
$(CLANG_FORMAT) -i src/*.c src/*.h sysinc/*.h
$(CLANG_FORMAT) -i src/*.c src/math/*.c src/*.h src/math/*.h sysinc/*.h
format-check:
$(CLANG_FORMAT) --dry-run --Werror src/*.c src/*.h sysinc/*.h
rustfmt:
@ -160,6 +168,12 @@ build/%.o: src/%.S
@mkdir -p "$(dir $@)"
@$(AS) -c $(CFLAGS) -MMD -MF $(DEPDIR)/$(*F).d -MQ "$@" -MP -o $@ $<
$(BUILD_FP_OBJS): build/%.o: src/%.c
@echo " CC[FP] $@"
@mkdir -p $(DEPDIR)
@mkdir -p "$(dir $@)"
@$(CC) -c $(BASE_CFLAGS) -MMD -MF $(DEPDIR)/$(*F).d -MQ "$@" -MP -o $@ $<
build/%.o: src/%.c
@echo " CC $@"
@mkdir -p $(DEPDIR)
@ -170,13 +184,13 @@ build/%.o: src/%.c
invoke_cc:
@$(CC) -c $(CFLAGS) -Isrc -o $(OBJFILE) $(CFILE)
build/$(NAME).elf: $(BUILD_OBJS) m1n1.ld
build/$(NAME).elf: $(BUILD_ALL_OBJS) m1n1.ld
@echo " LD $@"
@$(LD) -T m1n1.ld $(LDFLAGS) -o $@ $(BUILD_OBJS)
@$(LD) -T m1n1.ld $(LDFLAGS) -o $@ $(BUILD_ALL_OBJS)
build/$(NAME)-raw.elf: $(BUILD_OBJS) m1n1-raw.ld
build/$(NAME)-raw.elf: $(BUILD_ALL_OBJS) m1n1-raw.ld
@echo " LDRAW $@"
@$(LD) -T m1n1-raw.ld $(LDFLAGS) -o $@ $(BUILD_OBJS)
@$(LD) -T m1n1-raw.ld $(LDFLAGS) -o $@ $(BUILD_ALL_OBJS)
build/$(NAME).macho: build/$(NAME).elf
@echo " MACHO $@"

View file

@ -132,4 +132,7 @@ licensed under the [OFL-1.1](3rdparty_licenses/LICENSE.OFL-1.1) license and copy
m1n1 embeds portions of the [dwc3 usb linux driver](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/usb/dwc3/core.h?id=7bc5a6ba369217e0137833f5955cf0b0f08b0712), which was [BSD-or-GPLv2 dual-licensed](3rdparty_licenses/LICENSE.BSD-3.dwc3) and copyright
* Copyright (C) 2010-2011 Texas Instruments Incorporated - http://www.ti.com
m1n1 embeds portions of [musl-libc](https://musl.libc.org/)'s floating point library, which are MIT licensed and copyright
* Copyright (c) 2017-2018, Arm Limited.
m1n1 embeds some rust crates. Licenses can be found in the vendor directory for every crate.

42
src/math/exp2f_data.c Normal file
View file

@ -0,0 +1,42 @@
/*
* Shared data between expf, exp2f and powf.
*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include "exp2f_data.h"
#define N (1 << EXP2F_TABLE_BITS)
const struct exp2f_data __exp2f_data = {
/* tab[i] = uint(2^(i/N)) - (i << 52-BITS)
used for computing 2^(k/N) for an int |k| < 150 N as
double(tab[k%N] + (k << 52-BITS)) */
.tab =
{
0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f, 0x3fef9301d0125b51,
0x3fef72b83c7d517b, 0x3fef54873168b9aa, 0x3fef387a6e756238, 0x3fef1e9df51fdee1,
0x3fef06fe0a31b715, 0x3feef1a7373aa9cb, 0x3feedea64c123422, 0x3feece086061892d,
0x3feebfdad5362a27, 0x3feeb42b569d4f82, 0x3feeab07dd485429, 0x3feea47eb03a5585,
0x3feea09e667f3bcd, 0x3fee9f75e8ec5f74, 0x3feea11473eb0187, 0x3feea589994cce13,
0x3feeace5422aa0db, 0x3feeb737b0cdc5e5, 0x3feec49182a3f090, 0x3feed503b23e255d,
0x3feee89f995ad3ad, 0x3feeff76f2fb5e47, 0x3fef199bdd85529c, 0x3fef3720dcef9069,
0x3fef5818dcfba487, 0x3fef7c97337b9b5f, 0x3fefa4afa2a490da, 0x3fefd0765b6e4540,
},
.shift_scaled = 0x1.8p+52 / N,
.poly =
{
0x1.c6af84b912394p-5,
0x1.ebfce50fac4f3p-3,
0x1.62e42ff0c52d6p-1,
},
.shift = 0x1.8p+52,
.invln2_scaled = 0x1.71547652b82fep+0 * N,
.poly_scaled =
{
0x1.c6af84b912394p-5 / N / N / N,
0x1.ebfce50fac4f3p-3 / N / N,
0x1.62e42ff0c52d6p-1 / N,
},
};

22
src/math/exp2f_data.h Normal file
View file

@ -0,0 +1,22 @@
/*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#ifndef _EXP2F_DATA_H
#define _EXP2F_DATA_H
#include <stdint.h>
/* Shared between expf, exp2f and powf. */
#define EXP2F_TABLE_BITS 5
#define EXP2F_POLY_ORDER 3
extern const struct exp2f_data {
uint64_t tab[1 << EXP2F_TABLE_BITS];
double shift_scaled;
double poly[EXP2F_POLY_ORDER];
double shift;
double invln2_scaled;
double poly_scaled[EXP2F_POLY_ORDER];
} __exp2f_data;
#endif

83
src/math/expf.c Normal file
View file

@ -0,0 +1,83 @@
/*
* Single-precision e^x function.
*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include <math.h>
#include <stdint.h>
#include "exp2f_data.h"
#include "libm.h"
#define double_t double
/*
EXP2F_TABLE_BITS = 5
EXP2F_POLY_ORDER = 3
ULP error: 0.502 (nearest rounding.)
Relative error: 1.69 * 2^-34 in [-ln2/64, ln2/64] (before rounding.)
Wrong count: 170635 (all nearest rounding wrong results with fma.)
Non-nearest ULP error: 1 (rounded ULP error)
*/
#define N (1 << EXP2F_TABLE_BITS)
#define InvLn2N __exp2f_data.invln2_scaled
#define T __exp2f_data.tab
#define C __exp2f_data.poly_scaled
static inline uint32_t top12(float x)
{
return asuint(x) >> 20;
}
float expf(float x)
{
uint32_t abstop;
uint64_t ki, t;
double_t kd, xd, z, r, r2, y, s;
xd = (double_t)x;
abstop = top12(x) & 0x7ff;
if (predict_false(abstop >= top12(88.0f))) {
/* |x| >= 88 or x is nan. */
if (asuint(x) == asuint(-INFINITY))
return 0.0f;
if (abstop >= top12(INFINITY))
return x + x;
if (x > 0x1.62e42ep6f) /* x > log(0x1p128) ~= 88.72 */
return __math_oflowf(0);
if (x < -0x1.9fe368p6f) /* x < log(0x1p-150) ~= -103.97 */
return __math_uflowf(0);
}
/* x*N/Ln2 = k + r with r in [-1/2, 1/2] and int k. */
z = InvLn2N * xd;
/* Round and convert z to int, the result is in [-150*N, 128*N] and
ideally ties-to-even rule is used, otherwise the magnitude of r
can be bigger which gives larger approximation error. */
#if TOINT_INTRINSICS
kd = roundtoint(z);
ki = converttoint(z);
#else
#define SHIFT __exp2f_data.shift
kd = eval_as_double(z + SHIFT);
ki = asuint64(kd);
kd -= SHIFT;
#endif
r = z - kd;
/* exp(x) = 2^(k/N) * 2^(r/N) ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
t = T[ki % N];
t += ki << (52 - EXP2F_TABLE_BITS);
s = asdouble(t);
z = C[0] * r + C[1];
r2 = r * r;
y = C[2] * r + 1;
y = z * r2 + y;
y = y * s;
return eval_as_float(y);
}

271
src/math/libm.h Normal file
View file

@ -0,0 +1,271 @@
#ifndef _LIBM_H
#define _LIBM_H
#include <endian.h>
#include <float.h>
#include <math.h>
#include <stdint.h>
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __LITTLE_ENDIAN
union ldshape {
long double f;
struct {
uint64_t m;
uint16_t se;
} i;
};
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __BIG_ENDIAN
/* This is the m68k variant of 80-bit long double, and this definition only works
* on archs where the alignment requirement of uint64_t is <= 4. */
union ldshape {
long double f;
struct {
uint16_t se;
uint16_t pad;
uint64_t m;
} i;
};
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __LITTLE_ENDIAN
union ldshape {
long double f;
struct {
uint64_t lo;
uint32_t mid;
uint16_t top;
uint16_t se;
} i;
struct {
uint64_t lo;
uint64_t hi;
} i2;
};
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __BIG_ENDIAN
union ldshape {
long double f;
struct {
uint16_t se;
uint16_t top;
uint32_t mid;
uint64_t lo;
} i;
struct {
uint64_t hi;
uint64_t lo;
} i2;
};
#else
#error Unsupported long double representation
#endif
/* Support non-nearest rounding mode. */
#define WANT_ROUNDING 1
/* Support signaling NaNs. */
#define WANT_SNAN 0
#if WANT_SNAN
#error SNaN is unsupported
#else
#define issignalingf_inline(x) 0
#define issignaling_inline(x) 0
#endif
#ifndef TOINT_INTRINSICS
#define TOINT_INTRINSICS 0
#endif
#if TOINT_INTRINSICS
/* Round x to nearest int in all rounding modes, ties have to be rounded
consistently with converttoint so the results match. If the result
would be outside of [-2^31, 2^31-1] then the semantics is unspecified. */
static double_t roundtoint(double_t);
/* Convert x to nearest int in all rounding modes, ties have to be rounded
consistently with roundtoint. If the result is not representible in an
int32_t then the semantics is unspecified. */
static int32_t converttoint(double_t);
#endif
/* Helps static branch prediction so hot path can be better optimized. */
#ifdef __GNUC__
#define predict_true(x) __builtin_expect(!!(x), 1)
#define predict_false(x) __builtin_expect(x, 0)
#else
#define predict_true(x) (x)
#define predict_false(x) (x)
#endif
/* Evaluate an expression as the specified type. With standard excess
precision handling a type cast or assignment is enough (with
-ffloat-store an assignment is required, in old compilers argument
passing and return statement may not drop excess precision). */
static inline float eval_as_float(float x)
{
float y = x;
return y;
}
static inline double eval_as_double(double x)
{
double y = x;
return y;
}
/* fp_barrier returns its input, but limits code transformations
as if it had a side-effect (e.g. observable io) and returned
an arbitrary value. */
#ifndef fp_barrierf
#define fp_barrierf fp_barrierf
static inline float fp_barrierf(float x)
{
volatile float y = x;
return y;
}
#endif
#ifndef fp_barrier
#define fp_barrier fp_barrier
static inline double fp_barrier(double x)
{
volatile double y = x;
return y;
}
#endif
#ifndef fp_barrierl
#define fp_barrierl fp_barrierl
static inline long double fp_barrierl(long double x)
{
volatile long double y = x;
return y;
}
#endif
/* fp_force_eval ensures that the input value is computed when that's
otherwise unused. To prevent the constant folding of the input
expression, an additional fp_barrier may be needed or a compilation
mode that does so (e.g. -frounding-math in gcc). Then it can be
used to evaluate an expression for its fenv side-effects only. */
#ifndef fp_force_evalf
#define fp_force_evalf fp_force_evalf
static inline void fp_force_evalf(float x)
{
volatile float y;
y = x;
(void)y;
}
#endif
#ifndef fp_force_eval
#define fp_force_eval fp_force_eval
static inline void fp_force_eval(double x)
{
volatile double y;
y = x;
(void)y;
}
#endif
#ifndef fp_force_evall
#define fp_force_evall fp_force_evall
static inline void fp_force_evall(long double x)
{
volatile long double y;
y = x;
(void)y;
}
#endif
#define FORCE_EVAL(x) \
do { \
if (sizeof(x) == sizeof(float)) { \
fp_force_evalf(x); \
} else if (sizeof(x) == sizeof(double)) { \
fp_force_eval(x); \
} else { \
fp_force_evall(x); \
} \
} while (0)
#define asuint(f) \
((union { \
float _f; \
uint32_t _i; \
}){f}) \
._i
#define asfloat(i) \
((union { \
uint32_t _i; \
float _f; \
}){i}) \
._f
#define asuint64(f) \
((union { \
double _f; \
uint64_t _i; \
}){f}) \
._i
#define asdouble(i) \
((union { \
uint64_t _i; \
double _f; \
}){i}) \
._f
#define EXTRACT_WORDS(hi, lo, d) \
do { \
uint64_t __u = asuint64(d); \
(hi) = __u >> 32; \
(lo) = (uint32_t)__u; \
} while (0)
#define GET_HIGH_WORD(hi, d) \
do { \
(hi) = asuint64(d) >> 32; \
} while (0)
#define GET_LOW_WORD(lo, d) \
do { \
(lo) = (uint32_t)asuint64(d); \
} while (0)
#define INSERT_WORDS(d, hi, lo) \
do { \
(d) = asdouble(((uint64_t)(hi) << 32) | (uint32_t)(lo)); \
} while (0)
#define SET_HIGH_WORD(d, hi) INSERT_WORDS(d, hi, (uint32_t)asuint64(d))
#define SET_LOW_WORD(d, lo) INSERT_WORDS(d, asuint64(d) >> 32, lo)
#define GET_FLOAT_WORD(w, d) \
do { \
(w) = asuint(d); \
} while (0)
#define SET_FLOAT_WORD(d, w) \
do { \
(d) = asfloat(w); \
} while (0)
/* error handling functions */
static inline float __math_xflowf(uint32_t sign, float y)
{
return eval_as_float(fp_barrierf(sign ? -y : y) * y);
}
static inline float __math_oflowf(uint32_t sign)
{
return __math_xflowf(sign, 0x1p97f);
}
float __math_uflowf(uint32_t sign)
{
return __math_xflowf(sign, 0x1p-95f);
}
#endif

12
sysinc/endian.h Normal file
View file

@ -0,0 +1,12 @@
/* SPDX-License-Identifier: MIT */
#ifndef ENDIAN_H
#define ENDIAN_H
#define __LITTLE_ENDIAN 1234
#define __BIG_ENDIAN 4321
#define __PDP_ENDIAN 3412
#define __BYTE_ORDER __LITTLE_ENDIAN
#endif

16
sysinc/math.h Normal file
View file

@ -0,0 +1,16 @@
/* SPDX-License-Identifier: MIT */
#ifndef MATH_H
#define MATH_H
#if 100 * __GNUC__ + __GNUC_MINOR__ >= 303
#define NAN __builtin_nanf("")
#define INFINITY __builtin_inff()
#else
#define NAN (0.0f / 0.0f)
#define INFINITY 1e5000f
#endif
float expf(float);
#endif