diff --git a/Makefile b/Makefile index fcfad325..5325eaaa 100644 --- a/Makefile +++ b/Makefile @@ -33,14 +33,16 @@ CLANG_FORMAT := clang-format EXTRA_CFLAGS ?= -Wstack-usage=1024 endif -CFLAGS := -O2 -Wall -g -Wundef -Werror=strict-prototypes -fno-common -fno-PIE \ +BASE_CFLAGS := -O2 -Wall -g -Wundef -Werror=strict-prototypes -fno-common -fno-PIE \ -Werror=implicit-function-declaration -Werror=implicit-int \ -Wsign-compare -Wunused-parameter -Wno-multichar \ -ffreestanding -fpic -ffunction-sections -fdata-sections \ -nostdinc -isystem $(shell $(CC) -print-file-name=include) -isystem sysinc \ - -fno-stack-protector -mgeneral-regs-only -mstrict-align -march=armv8.2-a \ + -fno-stack-protector -mstrict-align -march=armv8.2-a \ $(EXTRA_CFLAGS) +CFLAGS := $(BASE_CFLAGS) -mgeneral-regs-only + CFG := ifeq ($(RELEASE),1) CFG += RELEASE @@ -127,7 +129,13 @@ OBJECTS := \ wdt.o \ $(MINILZLIB_OBJECTS) $(TINF_OBJECTS) $(DLMALLOC_OBJECTS) $(LIBFDT_OBJECTS) $(RUST_LIBS) +FP_OBJECTS := \ + math/expf.o \ + math/exp2f_data.o \ + BUILD_OBJS := $(patsubst %,build/%,$(OBJECTS)) +BUILD_FP_OBJS := $(patsubst %,build/%,$(FP_OBJECTS)) +BUILD_ALL_OBJS := $(BUILD_OBJS) $(BUILD_FP_OBJS) NAME := m1n1 TARGET := m1n1.macho TARGET_RAW := m1n1.bin @@ -139,7 +147,7 @@ all: update_tag update_cfg build/$(TARGET) build/$(TARGET_RAW) clean: rm -rf build/* format: - $(CLANG_FORMAT) -i src/*.c src/*.h sysinc/*.h + $(CLANG_FORMAT) -i src/*.c src/math/*.c src/*.h src/math/*.h sysinc/*.h format-check: $(CLANG_FORMAT) --dry-run --Werror src/*.c src/*.h sysinc/*.h rustfmt: @@ -160,6 +168,12 @@ build/%.o: src/%.S @mkdir -p "$(dir $@)" @$(AS) -c $(CFLAGS) -MMD -MF $(DEPDIR)/$(*F).d -MQ "$@" -MP -o $@ $< +$(BUILD_FP_OBJS): build/%.o: src/%.c + @echo " CC[FP] $@" + @mkdir -p $(DEPDIR) + @mkdir -p "$(dir $@)" + @$(CC) -c $(BASE_CFLAGS) -MMD -MF $(DEPDIR)/$(*F).d -MQ "$@" -MP -o $@ $< + build/%.o: src/%.c @echo " CC $@" @mkdir -p $(DEPDIR) @@ -170,13 +184,13 @@ build/%.o: src/%.c invoke_cc: @$(CC) -c $(CFLAGS) -Isrc -o $(OBJFILE) $(CFILE) -build/$(NAME).elf: $(BUILD_OBJS) m1n1.ld +build/$(NAME).elf: $(BUILD_ALL_OBJS) m1n1.ld @echo " LD $@" - @$(LD) -T m1n1.ld $(LDFLAGS) -o $@ $(BUILD_OBJS) + @$(LD) -T m1n1.ld $(LDFLAGS) -o $@ $(BUILD_ALL_OBJS) -build/$(NAME)-raw.elf: $(BUILD_OBJS) m1n1-raw.ld +build/$(NAME)-raw.elf: $(BUILD_ALL_OBJS) m1n1-raw.ld @echo " LDRAW $@" - @$(LD) -T m1n1-raw.ld $(LDFLAGS) -o $@ $(BUILD_OBJS) + @$(LD) -T m1n1-raw.ld $(LDFLAGS) -o $@ $(BUILD_ALL_OBJS) build/$(NAME).macho: build/$(NAME).elf @echo " MACHO $@" diff --git a/README.md b/README.md index 3f8c04e7..30299ac7 100644 --- a/README.md +++ b/README.md @@ -132,4 +132,7 @@ licensed under the [OFL-1.1](3rdparty_licenses/LICENSE.OFL-1.1) license and copy m1n1 embeds portions of the [dwc3 usb linux driver](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/usb/dwc3/core.h?id=7bc5a6ba369217e0137833f5955cf0b0f08b0712), which was [BSD-or-GPLv2 dual-licensed](3rdparty_licenses/LICENSE.BSD-3.dwc3) and copyright * Copyright (C) 2010-2011 Texas Instruments Incorporated - http://www.ti.com +m1n1 embeds portions of [musl-libc](https://musl.libc.org/)'s floating point library, which are MIT licensed and copyright +* Copyright (c) 2017-2018, Arm Limited. + m1n1 embeds some rust crates. Licenses can be found in the vendor directory for every crate. diff --git a/src/math/exp2f_data.c b/src/math/exp2f_data.c new file mode 100644 index 00000000..38c9333f --- /dev/null +++ b/src/math/exp2f_data.c @@ -0,0 +1,42 @@ +/* + * Shared data between expf, exp2f and powf. + * + * Copyright (c) 2017-2018, Arm Limited. + * SPDX-License-Identifier: MIT + */ + +#include "exp2f_data.h" + +#define N (1 << EXP2F_TABLE_BITS) + +const struct exp2f_data __exp2f_data = { + /* tab[i] = uint(2^(i/N)) - (i << 52-BITS) + used for computing 2^(k/N) for an int |k| < 150 N as + double(tab[k%N] + (k << 52-BITS)) */ + .tab = + { + 0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f, 0x3fef9301d0125b51, + 0x3fef72b83c7d517b, 0x3fef54873168b9aa, 0x3fef387a6e756238, 0x3fef1e9df51fdee1, + 0x3fef06fe0a31b715, 0x3feef1a7373aa9cb, 0x3feedea64c123422, 0x3feece086061892d, + 0x3feebfdad5362a27, 0x3feeb42b569d4f82, 0x3feeab07dd485429, 0x3feea47eb03a5585, + 0x3feea09e667f3bcd, 0x3fee9f75e8ec5f74, 0x3feea11473eb0187, 0x3feea589994cce13, + 0x3feeace5422aa0db, 0x3feeb737b0cdc5e5, 0x3feec49182a3f090, 0x3feed503b23e255d, + 0x3feee89f995ad3ad, 0x3feeff76f2fb5e47, 0x3fef199bdd85529c, 0x3fef3720dcef9069, + 0x3fef5818dcfba487, 0x3fef7c97337b9b5f, 0x3fefa4afa2a490da, 0x3fefd0765b6e4540, + }, + .shift_scaled = 0x1.8p+52 / N, + .poly = + { + 0x1.c6af84b912394p-5, + 0x1.ebfce50fac4f3p-3, + 0x1.62e42ff0c52d6p-1, + }, + .shift = 0x1.8p+52, + .invln2_scaled = 0x1.71547652b82fep+0 * N, + .poly_scaled = + { + 0x1.c6af84b912394p-5 / N / N / N, + 0x1.ebfce50fac4f3p-3 / N / N, + 0x1.62e42ff0c52d6p-1 / N, + }, +}; diff --git a/src/math/exp2f_data.h b/src/math/exp2f_data.h new file mode 100644 index 00000000..a88c6ce1 --- /dev/null +++ b/src/math/exp2f_data.h @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2017-2018, Arm Limited. + * SPDX-License-Identifier: MIT + */ +#ifndef _EXP2F_DATA_H +#define _EXP2F_DATA_H + +#include + +/* Shared between expf, exp2f and powf. */ +#define EXP2F_TABLE_BITS 5 +#define EXP2F_POLY_ORDER 3 +extern const struct exp2f_data { + uint64_t tab[1 << EXP2F_TABLE_BITS]; + double shift_scaled; + double poly[EXP2F_POLY_ORDER]; + double shift; + double invln2_scaled; + double poly_scaled[EXP2F_POLY_ORDER]; +} __exp2f_data; + +#endif diff --git a/src/math/expf.c b/src/math/expf.c new file mode 100644 index 00000000..c9f1b3f1 --- /dev/null +++ b/src/math/expf.c @@ -0,0 +1,83 @@ +/* + * Single-precision e^x function. + * + * Copyright (c) 2017-2018, Arm Limited. + * SPDX-License-Identifier: MIT + */ + +#include +#include + +#include "exp2f_data.h" +#include "libm.h" + +#define double_t double + +/* +EXP2F_TABLE_BITS = 5 +EXP2F_POLY_ORDER = 3 + +ULP error: 0.502 (nearest rounding.) +Relative error: 1.69 * 2^-34 in [-ln2/64, ln2/64] (before rounding.) +Wrong count: 170635 (all nearest rounding wrong results with fma.) +Non-nearest ULP error: 1 (rounded ULP error) +*/ + +#define N (1 << EXP2F_TABLE_BITS) +#define InvLn2N __exp2f_data.invln2_scaled +#define T __exp2f_data.tab +#define C __exp2f_data.poly_scaled + +static inline uint32_t top12(float x) +{ + return asuint(x) >> 20; +} + +float expf(float x) +{ + uint32_t abstop; + uint64_t ki, t; + double_t kd, xd, z, r, r2, y, s; + + xd = (double_t)x; + abstop = top12(x) & 0x7ff; + if (predict_false(abstop >= top12(88.0f))) { + /* |x| >= 88 or x is nan. */ + if (asuint(x) == asuint(-INFINITY)) + return 0.0f; + if (abstop >= top12(INFINITY)) + return x + x; + if (x > 0x1.62e42ep6f) /* x > log(0x1p128) ~= 88.72 */ + return __math_oflowf(0); + if (x < -0x1.9fe368p6f) /* x < log(0x1p-150) ~= -103.97 */ + return __math_uflowf(0); + } + + /* x*N/Ln2 = k + r with r in [-1/2, 1/2] and int k. */ + z = InvLn2N * xd; + + /* Round and convert z to int, the result is in [-150*N, 128*N] and + ideally ties-to-even rule is used, otherwise the magnitude of r + can be bigger which gives larger approximation error. */ +#if TOINT_INTRINSICS + kd = roundtoint(z); + ki = converttoint(z); +#else +#define SHIFT __exp2f_data.shift + kd = eval_as_double(z + SHIFT); + ki = asuint64(kd); + kd -= SHIFT; +#endif + r = z - kd; + + /* exp(x) = 2^(k/N) * 2^(r/N) ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */ + t = T[ki % N]; + t += ki << (52 - EXP2F_TABLE_BITS); + s = asdouble(t); + z = C[0] * r + C[1]; + r2 = r * r; + y = C[2] * r + 1; + y = z * r2 + y; + y = y * s; + return eval_as_float(y); +} diff --git a/src/math/libm.h b/src/math/libm.h new file mode 100644 index 00000000..1616c741 --- /dev/null +++ b/src/math/libm.h @@ -0,0 +1,271 @@ +#ifndef _LIBM_H +#define _LIBM_H + +#include +#include +#include +#include + +#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 +#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __LITTLE_ENDIAN +union ldshape { + long double f; + struct { + uint64_t m; + uint16_t se; + } i; +}; +#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __BIG_ENDIAN +/* This is the m68k variant of 80-bit long double, and this definition only works + * on archs where the alignment requirement of uint64_t is <= 4. */ +union ldshape { + long double f; + struct { + uint16_t se; + uint16_t pad; + uint64_t m; + } i; +}; +#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __LITTLE_ENDIAN +union ldshape { + long double f; + struct { + uint64_t lo; + uint32_t mid; + uint16_t top; + uint16_t se; + } i; + struct { + uint64_t lo; + uint64_t hi; + } i2; +}; +#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __BIG_ENDIAN +union ldshape { + long double f; + struct { + uint16_t se; + uint16_t top; + uint32_t mid; + uint64_t lo; + } i; + struct { + uint64_t hi; + uint64_t lo; + } i2; +}; +#else +#error Unsupported long double representation +#endif + +/* Support non-nearest rounding mode. */ +#define WANT_ROUNDING 1 +/* Support signaling NaNs. */ +#define WANT_SNAN 0 + +#if WANT_SNAN +#error SNaN is unsupported +#else +#define issignalingf_inline(x) 0 +#define issignaling_inline(x) 0 +#endif + +#ifndef TOINT_INTRINSICS +#define TOINT_INTRINSICS 0 +#endif + +#if TOINT_INTRINSICS +/* Round x to nearest int in all rounding modes, ties have to be rounded + consistently with converttoint so the results match. If the result + would be outside of [-2^31, 2^31-1] then the semantics is unspecified. */ +static double_t roundtoint(double_t); + +/* Convert x to nearest int in all rounding modes, ties have to be rounded + consistently with roundtoint. If the result is not representible in an + int32_t then the semantics is unspecified. */ +static int32_t converttoint(double_t); +#endif + +/* Helps static branch prediction so hot path can be better optimized. */ +#ifdef __GNUC__ +#define predict_true(x) __builtin_expect(!!(x), 1) +#define predict_false(x) __builtin_expect(x, 0) +#else +#define predict_true(x) (x) +#define predict_false(x) (x) +#endif + +/* Evaluate an expression as the specified type. With standard excess + precision handling a type cast or assignment is enough (with + -ffloat-store an assignment is required, in old compilers argument + passing and return statement may not drop excess precision). */ + +static inline float eval_as_float(float x) +{ + float y = x; + return y; +} + +static inline double eval_as_double(double x) +{ + double y = x; + return y; +} + +/* fp_barrier returns its input, but limits code transformations + as if it had a side-effect (e.g. observable io) and returned + an arbitrary value. */ + +#ifndef fp_barrierf +#define fp_barrierf fp_barrierf +static inline float fp_barrierf(float x) +{ + volatile float y = x; + return y; +} +#endif + +#ifndef fp_barrier +#define fp_barrier fp_barrier +static inline double fp_barrier(double x) +{ + volatile double y = x; + return y; +} +#endif + +#ifndef fp_barrierl +#define fp_barrierl fp_barrierl +static inline long double fp_barrierl(long double x) +{ + volatile long double y = x; + return y; +} +#endif + +/* fp_force_eval ensures that the input value is computed when that's + otherwise unused. To prevent the constant folding of the input + expression, an additional fp_barrier may be needed or a compilation + mode that does so (e.g. -frounding-math in gcc). Then it can be + used to evaluate an expression for its fenv side-effects only. */ + +#ifndef fp_force_evalf +#define fp_force_evalf fp_force_evalf +static inline void fp_force_evalf(float x) +{ + volatile float y; + y = x; + (void)y; +} +#endif + +#ifndef fp_force_eval +#define fp_force_eval fp_force_eval +static inline void fp_force_eval(double x) +{ + volatile double y; + y = x; + (void)y; +} +#endif + +#ifndef fp_force_evall +#define fp_force_evall fp_force_evall +static inline void fp_force_evall(long double x) +{ + volatile long double y; + y = x; + (void)y; +} +#endif + +#define FORCE_EVAL(x) \ + do { \ + if (sizeof(x) == sizeof(float)) { \ + fp_force_evalf(x); \ + } else if (sizeof(x) == sizeof(double)) { \ + fp_force_eval(x); \ + } else { \ + fp_force_evall(x); \ + } \ + } while (0) + +#define asuint(f) \ + ((union { \ + float _f; \ + uint32_t _i; \ + }){f}) \ + ._i +#define asfloat(i) \ + ((union { \ + uint32_t _i; \ + float _f; \ + }){i}) \ + ._f +#define asuint64(f) \ + ((union { \ + double _f; \ + uint64_t _i; \ + }){f}) \ + ._i +#define asdouble(i) \ + ((union { \ + uint64_t _i; \ + double _f; \ + }){i}) \ + ._f + +#define EXTRACT_WORDS(hi, lo, d) \ + do { \ + uint64_t __u = asuint64(d); \ + (hi) = __u >> 32; \ + (lo) = (uint32_t)__u; \ + } while (0) + +#define GET_HIGH_WORD(hi, d) \ + do { \ + (hi) = asuint64(d) >> 32; \ + } while (0) + +#define GET_LOW_WORD(lo, d) \ + do { \ + (lo) = (uint32_t)asuint64(d); \ + } while (0) + +#define INSERT_WORDS(d, hi, lo) \ + do { \ + (d) = asdouble(((uint64_t)(hi) << 32) | (uint32_t)(lo)); \ + } while (0) + +#define SET_HIGH_WORD(d, hi) INSERT_WORDS(d, hi, (uint32_t)asuint64(d)) + +#define SET_LOW_WORD(d, lo) INSERT_WORDS(d, asuint64(d) >> 32, lo) + +#define GET_FLOAT_WORD(w, d) \ + do { \ + (w) = asuint(d); \ + } while (0) + +#define SET_FLOAT_WORD(d, w) \ + do { \ + (d) = asfloat(w); \ + } while (0) + +/* error handling functions */ + +static inline float __math_xflowf(uint32_t sign, float y) +{ + return eval_as_float(fp_barrierf(sign ? -y : y) * y); +} + +static inline float __math_oflowf(uint32_t sign) +{ + return __math_xflowf(sign, 0x1p97f); +} + +float __math_uflowf(uint32_t sign) +{ + return __math_xflowf(sign, 0x1p-95f); +} + +#endif diff --git a/sysinc/endian.h b/sysinc/endian.h new file mode 100644 index 00000000..6115c0b6 --- /dev/null +++ b/sysinc/endian.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef ENDIAN_H +#define ENDIAN_H + +#define __LITTLE_ENDIAN 1234 +#define __BIG_ENDIAN 4321 +#define __PDP_ENDIAN 3412 + +#define __BYTE_ORDER __LITTLE_ENDIAN + +#endif diff --git a/sysinc/math.h b/sysinc/math.h new file mode 100644 index 00000000..38094469 --- /dev/null +++ b/sysinc/math.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef MATH_H +#define MATH_H + +#if 100 * __GNUC__ + __GNUC_MINOR__ >= 303 +#define NAN __builtin_nanf("") +#define INFINITY __builtin_inff() +#else +#define NAN (0.0f / 0.0f) +#define INFINITY 1e5000f +#endif + +float expf(float); + +#endif