u-boot/arch/arm/lib/bitops.S
Sean Anderson 726a802fda arm: Use builtins for ffs/fls
Since ARMv5, the clz instruction allows for efficient implementation of
ffs/fls with builtins. Until ARMv7 (with Thumb-2), this instruction is
only available in ARM mode. LTO makes it difficult to force specific
functions to be in ARM mode, as it is effectively a form of very
aggressive inlining. To work around this, fls/ffs are implemented in
assembly for ARMv5 and ARMv6 when compiling U-Boot in Thumb mode.
Overall, this saves around 75 bytes per call.

This code is synced with v5.15 of the Linux kernel.

Signed-off-by: Sean Anderson <sean.anderson@seco.com>
Reviewed-by: Tom Rini <trini@konsulko.com>
2023-08-17 16:39:20 -04:00

45 lines
724 B
ArmAsm

/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2023 Sean Anderson <sean.anderson@seco.com>
*
* ARM bitops to call when using THUMB1, which doesn't have these instructions.
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
.pushsection .text.__fls
ENTRY(__fls)
clz r0, r0
rsb r0, r0, #31
ret lr
ENDPROC(__fls)
.popsection
.pushsection .text.__ffs
ENTRY(__ffs)
rsb r3, r0, #0
and r0, r0, r3
clz r0, r0
rsb r0, r0, #31
ret lr
ENDPROC(__ffs)
.popsection
.pushsection .text.fls
ENTRY(fls)
cmp r0, #0
clzne r0, r0
rsbne r0, r0, #32
ret lr
ENDPROC(fls)
.popsection
.pushsection .text.ffs
ENTRY(ffs)
rsb r3, r0, #0
and r0, r0, r3
clz r0, r0
rsb r0, r0, #32
ret lr
ENDPROC(ffs)
.popsection