mirror of
https://github.com/AsahiLinux/u-boot
synced 2024-12-14 23:33:00 +00:00
5ffcf7c590
The assembly for __gnu_thumb1_case_si was taken from upstream gcc and adapted
as width suffix was removed for the add instruction [1].
Signed-off-by: Francis Laniel <francis.laniel@amarulasolutions.com>
Tested-by: Tony Dinh <mibodhi@gmail.com>
[1] 4f181f9c7e/libgcc/config/arm/lib1funcs.S (L2156)
Acked-by: Pali Rohár <pali@kernel.org>
Acked-by: Tony Dinh <mibodhi@gmail.com>
439 lines
9 KiB
ArmAsm
439 lines
9 KiB
ArmAsm
/* SPDX-License-Identifier: GPL-2.0+ */
|
|
/*
|
|
* linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
|
|
*
|
|
* Author: Nicolas Pitre <nico@fluxnic.net>
|
|
* - contributed to gcc-3.4 on Sep 30, 2003
|
|
* - adapted for the Linux kernel on Oct 2, 2003
|
|
*/
|
|
/*
|
|
* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
|
|
*/
|
|
|
|
#include <linux/linkage.h>
|
|
#include <asm/assembler.h>
|
|
|
|
/*
|
|
* U-Boot compatibility bit, define empty UNWIND() macro as, since we
|
|
* do not support stack unwinding to make all of the functions available
|
|
* without diverging from Linux code.
|
|
*/
|
|
#ifdef __UBOOT__
|
|
#define UNWIND(x...)
|
|
#endif
|
|
|
|
.macro ARM_DIV_BODY dividend, divisor, result, curbit
|
|
|
|
#if __LINUX_ARM_ARCH__ >= 5
|
|
|
|
clz \curbit, \divisor
|
|
clz \result, \dividend
|
|
sub \result, \curbit, \result
|
|
mov \curbit, #1
|
|
mov \divisor, \divisor, lsl \result
|
|
mov \curbit, \curbit, lsl \result
|
|
mov \result, #0
|
|
|
|
#else
|
|
|
|
@ Initially shift the divisor left 3 bits if possible,
|
|
@ set curbit accordingly. This allows for curbit to be located
|
|
@ at the left end of each 4 bit nibbles in the division loop
|
|
@ to save one loop in most cases.
|
|
tst \divisor, #0xe0000000
|
|
moveq \divisor, \divisor, lsl #3
|
|
moveq \curbit, #8
|
|
movne \curbit, #1
|
|
|
|
@ Unless the divisor is very big, shift it up in multiples of
|
|
@ four bits, since this is the amount of unwinding in the main
|
|
@ division loop. Continue shifting until the divisor is
|
|
@ larger than the dividend.
|
|
1: cmp \divisor, #0x10000000
|
|
cmplo \divisor, \dividend
|
|
movlo \divisor, \divisor, lsl #4
|
|
movlo \curbit, \curbit, lsl #4
|
|
blo 1b
|
|
|
|
@ For very big divisors, we must shift it a bit at a time, or
|
|
@ we will be in danger of overflowing.
|
|
1: cmp \divisor, #0x80000000
|
|
cmplo \divisor, \dividend
|
|
movlo \divisor, \divisor, lsl #1
|
|
movlo \curbit, \curbit, lsl #1
|
|
blo 1b
|
|
|
|
mov \result, #0
|
|
|
|
#endif
|
|
|
|
@ Division loop
|
|
1: cmp \dividend, \divisor
|
|
subhs \dividend, \dividend, \divisor
|
|
orrhs \result, \result, \curbit
|
|
cmp \dividend, \divisor, lsr #1
|
|
subhs \dividend, \dividend, \divisor, lsr #1
|
|
orrhs \result, \result, \curbit, lsr #1
|
|
cmp \dividend, \divisor, lsr #2
|
|
subhs \dividend, \dividend, \divisor, lsr #2
|
|
orrhs \result, \result, \curbit, lsr #2
|
|
cmp \dividend, \divisor, lsr #3
|
|
subhs \dividend, \dividend, \divisor, lsr #3
|
|
orrhs \result, \result, \curbit, lsr #3
|
|
cmp \dividend, #0 @ Early termination?
|
|
movsne \curbit, \curbit, lsr #4 @ No, any more bits to do?
|
|
movne \divisor, \divisor, lsr #4
|
|
bne 1b
|
|
|
|
.endm
|
|
|
|
|
|
.macro ARM_DIV2_ORDER divisor, order
|
|
|
|
#if __LINUX_ARM_ARCH__ >= 5
|
|
|
|
clz \order, \divisor
|
|
rsb \order, \order, #31
|
|
|
|
#else
|
|
|
|
cmp \divisor, #(1 << 16)
|
|
movhs \divisor, \divisor, lsr #16
|
|
movhs \order, #16
|
|
movlo \order, #0
|
|
|
|
cmp \divisor, #(1 << 8)
|
|
movhs \divisor, \divisor, lsr #8
|
|
addhs \order, \order, #8
|
|
|
|
cmp \divisor, #(1 << 4)
|
|
movhs \divisor, \divisor, lsr #4
|
|
addhs \order, \order, #4
|
|
|
|
cmp \divisor, #(1 << 2)
|
|
addhi \order, \order, #3
|
|
addls \order, \order, \divisor, lsr #1
|
|
|
|
#endif
|
|
|
|
.endm
|
|
|
|
|
|
.macro ARM_MOD_BODY dividend, divisor, order, spare
|
|
|
|
#if __LINUX_ARM_ARCH__ >= 5
|
|
|
|
clz \order, \divisor
|
|
clz \spare, \dividend
|
|
sub \order, \order, \spare
|
|
mov \divisor, \divisor, lsl \order
|
|
|
|
#else
|
|
|
|
mov \order, #0
|
|
|
|
@ Unless the divisor is very big, shift it up in multiples of
|
|
@ four bits, since this is the amount of unwinding in the main
|
|
@ division loop. Continue shifting until the divisor is
|
|
@ larger than the dividend.
|
|
1: cmp \divisor, #0x10000000
|
|
cmplo \divisor, \dividend
|
|
movlo \divisor, \divisor, lsl #4
|
|
addlo \order, \order, #4
|
|
blo 1b
|
|
|
|
@ For very big divisors, we must shift it a bit at a time, or
|
|
@ we will be in danger of overflowing.
|
|
1: cmp \divisor, #0x80000000
|
|
cmplo \divisor, \dividend
|
|
movlo \divisor, \divisor, lsl #1
|
|
addlo \order, \order, #1
|
|
blo 1b
|
|
|
|
#endif
|
|
|
|
@ Perform all needed subtractions to keep only the reminder.
|
|
@ Do comparisons in batch of 4 first.
|
|
subs \order, \order, #3 @ yes, 3 is intended here
|
|
blt 2f
|
|
|
|
1: cmp \dividend, \divisor
|
|
subhs \dividend, \dividend, \divisor
|
|
cmp \dividend, \divisor, lsr #1
|
|
subhs \dividend, \dividend, \divisor, lsr #1
|
|
cmp \dividend, \divisor, lsr #2
|
|
subhs \dividend, \dividend, \divisor, lsr #2
|
|
cmp \dividend, \divisor, lsr #3
|
|
subhs \dividend, \dividend, \divisor, lsr #3
|
|
cmp \dividend, #1
|
|
mov \divisor, \divisor, lsr #4
|
|
subsge \order, \order, #4
|
|
bge 1b
|
|
|
|
tst \order, #3
|
|
teqne \dividend, #0
|
|
beq 5f
|
|
|
|
@ Either 1, 2 or 3 comparison/subtractions are left.
|
|
2: cmn \order, #2
|
|
blt 4f
|
|
beq 3f
|
|
cmp \dividend, \divisor
|
|
subhs \dividend, \dividend, \divisor
|
|
mov \divisor, \divisor, lsr #1
|
|
3: cmp \dividend, \divisor
|
|
subhs \dividend, \dividend, \divisor
|
|
mov \divisor, \divisor, lsr #1
|
|
4: cmp \dividend, \divisor
|
|
subhs \dividend, \dividend, \divisor
|
|
5:
|
|
.endm
|
|
|
|
|
|
.pushsection .text.__udivsi3, "ax"
|
|
ENTRY(__udivsi3)
|
|
ENTRY(__aeabi_uidiv)
|
|
UNWIND(.fnstart)
|
|
|
|
subs r2, r1, #1
|
|
reteq lr
|
|
bcc Ldiv0
|
|
cmp r0, r1
|
|
bls 11f
|
|
tst r1, r2
|
|
beq 12f
|
|
|
|
ARM_DIV_BODY r0, r1, r2, r3
|
|
|
|
mov r0, r2
|
|
ret lr
|
|
|
|
11: moveq r0, #1
|
|
movne r0, #0
|
|
ret lr
|
|
|
|
12: ARM_DIV2_ORDER r1, r2
|
|
|
|
mov r0, r0, lsr r2
|
|
ret lr
|
|
|
|
UNWIND(.fnend)
|
|
ENDPROC(__udivsi3)
|
|
ENDPROC(__aeabi_uidiv)
|
|
.popsection
|
|
|
|
.pushsection .text.__umodsi3, "ax"
|
|
ENTRY(__umodsi3)
|
|
UNWIND(.fnstart)
|
|
|
|
subs r2, r1, #1 @ compare divisor with 1
|
|
bcc Ldiv0
|
|
cmpne r0, r1 @ compare dividend with divisor
|
|
moveq r0, #0
|
|
tsthi r1, r2 @ see if divisor is power of 2
|
|
andeq r0, r0, r2
|
|
retls lr
|
|
|
|
ARM_MOD_BODY r0, r1, r2, r3
|
|
|
|
ret lr
|
|
|
|
UNWIND(.fnend)
|
|
ENDPROC(__umodsi3)
|
|
.popsection
|
|
|
|
.pushsection .text.__divsi3, "ax"
|
|
ENTRY(__divsi3)
|
|
ENTRY(__aeabi_idiv)
|
|
UNWIND(.fnstart)
|
|
|
|
cmp r1, #0
|
|
eor ip, r0, r1 @ save the sign of the result.
|
|
beq Ldiv0
|
|
rsbmi r1, r1, #0 @ loops below use unsigned.
|
|
subs r2, r1, #1 @ division by 1 or -1 ?
|
|
beq 10f
|
|
movs r3, r0
|
|
rsbmi r3, r0, #0 @ positive dividend value
|
|
cmp r3, r1
|
|
bls 11f
|
|
tst r1, r2 @ divisor is power of 2 ?
|
|
beq 12f
|
|
|
|
ARM_DIV_BODY r3, r1, r0, r2
|
|
|
|
cmp ip, #0
|
|
rsbmi r0, r0, #0
|
|
ret lr
|
|
|
|
10: teq ip, r0 @ same sign ?
|
|
rsbmi r0, r0, #0
|
|
ret lr
|
|
|
|
11: movlo r0, #0
|
|
moveq r0, ip, asr #31
|
|
orreq r0, r0, #1
|
|
ret lr
|
|
|
|
12: ARM_DIV2_ORDER r1, r2
|
|
|
|
cmp ip, #0
|
|
mov r0, r3, lsr r2
|
|
rsbmi r0, r0, #0
|
|
ret lr
|
|
|
|
UNWIND(.fnend)
|
|
ENDPROC(__divsi3)
|
|
ENDPROC(__aeabi_idiv)
|
|
.popsection
|
|
|
|
.pushsection .text.__modsi3, "ax"
|
|
ENTRY(__modsi3)
|
|
UNWIND(.fnstart)
|
|
|
|
cmp r1, #0
|
|
beq Ldiv0
|
|
rsbmi r1, r1, #0 @ loops below use unsigned.
|
|
movs ip, r0 @ preserve sign of dividend
|
|
rsbmi r0, r0, #0 @ if negative make positive
|
|
subs r2, r1, #1 @ compare divisor with 1
|
|
cmpne r0, r1 @ compare dividend with divisor
|
|
moveq r0, #0
|
|
tsthi r1, r2 @ see if divisor is power of 2
|
|
andeq r0, r0, r2
|
|
bls 10f
|
|
|
|
ARM_MOD_BODY r0, r1, r2, r3
|
|
|
|
10: cmp ip, #0
|
|
rsbmi r0, r0, #0
|
|
ret lr
|
|
|
|
UNWIND(.fnend)
|
|
ENDPROC(__modsi3)
|
|
.popsection
|
|
|
|
.pushsection .text.__aeabi_uidivmod, "ax"
|
|
ENTRY(__aeabi_uidivmod)
|
|
UNWIND(.fnstart)
|
|
UNWIND(.save {r0, r1, ip, lr} )
|
|
|
|
stmfd sp!, {r0, r1, ip, lr}
|
|
bl __aeabi_uidiv
|
|
ldmfd sp!, {r1, r2, ip, lr}
|
|
mul r3, r0, r2
|
|
sub r1, r1, r3
|
|
ret lr
|
|
|
|
UNWIND(.fnend)
|
|
ENDPROC(__aeabi_uidivmod)
|
|
.popsection
|
|
|
|
.pushsection .text.__aeabi_uidivmod, "ax"
|
|
ENTRY(__aeabi_idivmod)
|
|
UNWIND(.fnstart)
|
|
UNWIND(.save {r0, r1, ip, lr} )
|
|
|
|
stmfd sp!, {r0, r1, ip, lr}
|
|
bl __aeabi_idiv
|
|
ldmfd sp!, {r1, r2, ip, lr}
|
|
mul r3, r0, r2
|
|
sub r1, r1, r3
|
|
ret lr
|
|
|
|
UNWIND(.fnend)
|
|
ENDPROC(__aeabi_idivmod)
|
|
.popsection
|
|
|
|
.pushsection .text.Ldiv0, "ax"
|
|
Ldiv0:
|
|
UNWIND(.fnstart)
|
|
UNWIND(.pad #4)
|
|
UNWIND(.save {lr})
|
|
|
|
str lr, [sp, #-8]!
|
|
bl __div0
|
|
mov r0, #0 @ About as wrong as it could be.
|
|
ldr pc, [sp], #8
|
|
|
|
UNWIND(.fnend)
|
|
ENDPROC(Ldiv0)
|
|
.popsection
|
|
|
|
/* Thumb-1 specialities */
|
|
#if CONFIG_IS_ENABLED(SYS_THUMB_BUILD) && !defined(CONFIG_HAS_THUMB2)
|
|
.pushsection .text.__gnu_thumb1_case_sqi, "ax"
|
|
ENTRY(__gnu_thumb1_case_sqi)
|
|
push {r1}
|
|
mov r1, lr
|
|
lsrs r1, r1, #1
|
|
lsls r1, r1, #1
|
|
ldrsb r1, [r1, r0]
|
|
lsls r1, r1, #1
|
|
add lr, lr, r1
|
|
pop {r1}
|
|
ret lr
|
|
ENDPROC(__gnu_thumb1_case_sqi)
|
|
.popsection
|
|
|
|
.pushsection .text.__gnu_thumb1_case_uqi, "ax"
|
|
ENTRY(__gnu_thumb1_case_uqi)
|
|
push {r1}
|
|
mov r1, lr
|
|
lsrs r1, r1, #1
|
|
lsls r1, r1, #1
|
|
ldrb r1, [r1, r0]
|
|
lsls r1, r1, #1
|
|
add lr, lr, r1
|
|
pop {r1}
|
|
ret lr
|
|
ENDPROC(__gnu_thumb1_case_uqi)
|
|
.popsection
|
|
|
|
.pushsection .text.__gnu_thumb1_case_shi, "ax"
|
|
ENTRY(__gnu_thumb1_case_shi)
|
|
push {r0, r1}
|
|
mov r1, lr
|
|
lsrs r1, r1, #1
|
|
lsls r0, r0, #1
|
|
lsls r1, r1, #1
|
|
ldrsh r1, [r1, r0]
|
|
lsls r1, r1, #1
|
|
add lr, lr, r1
|
|
pop {r0, r1}
|
|
ret lr
|
|
ENDPROC(__gnu_thumb1_case_shi)
|
|
.popsection
|
|
|
|
.pushsection .text.__gnu_thumb1_case_uhi, "ax"
|
|
ENTRY(__gnu_thumb1_case_uhi)
|
|
push {r0, r1}
|
|
mov r1, lr
|
|
lsrs r1, r1, #1
|
|
lsls r0, r0, #1
|
|
lsls r1, r1, #1
|
|
ldrh r1, [r1, r0]
|
|
lsls r1, r1, #1
|
|
add lr, lr, r1
|
|
pop {r0, r1}
|
|
ret lr
|
|
ENDPROC(__gnu_thumb1_case_uhi)
|
|
.popsection
|
|
|
|
/* Taken and adapted from: https://github.com/gcc-mirror/gcc/blob/4f181f9c7ee3efc509d185fdfda33be9018f1611/libgcc/config/arm/lib1funcs.S#L2156 */
|
|
.pushsection .text.__gnu_thumb1_case_si, "ax"
|
|
ENTRY(__gnu_thumb1_case_si)
|
|
push {r0, r1}
|
|
mov r1, lr
|
|
adds r1, r1, #2 /* Align to word. */
|
|
lsrs r1, r1, #2
|
|
lsls r0, r0, #2
|
|
lsls r1, r1, #2
|
|
ldr r0, [r1, r0]
|
|
adds r0, r0, r1
|
|
mov lr, r0
|
|
pop {r0, r1}
|
|
mov pc, lr /* We know we were called from thumb code. */
|
|
ENDPROC(__gnu_thumb1_case_si)
|
|
.popsection
|
|
#endif
|