mirror of
https://github.com/AsahiLinux/u-boot
synced 2024-11-11 23:47:24 +00:00
583f1b2f10
There is currently a problem that U-Boot can not work on ARMv4 because assembly imlementations of memcpy() and some other functions use "bx lr" instruction that is not available on ARMv4 ("mov pc, lr" should be used instead). A working preprocessor-based solution to this problem is found in arch/arm/lib/relocate.S. Move it to the "ret" macro in arch/arm/include/asm/assembler.h and change all "bx lr" code to "ret lr" in functions that may run on ARMv4. Linux source code deals with this problem in the same manner. v1 -> v2: Comment update. Pointed out by Andre Przywara. Signed-off-by: Sergei Antonov <saproj@gmail.com> CC: Samuel Holland <samuel@sholland.org> CC: Ye Li <ye.li@nxp.com> CC: Simon Glass <sjg@chromium.org> CC: Andre Przywara <andre.przywara@arm.com> CC: Marek Vasut <marex@denx.de> CC: Sean Anderson <sean.anderson@seco.com> CC: Tom Rini <trini@konsulko.com>
427 lines
8.6 KiB
ArmAsm
427 lines
8.6 KiB
ArmAsm
/* SPDX-License-Identifier: GPL-2.0+ */
|
|
/*
|
|
* linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
|
|
*
|
|
* Author: Nicolas Pitre <nico@fluxnic.net>
|
|
* - contributed to gcc-3.4 on Sep 30, 2003
|
|
* - adapted for the Linux kernel on Oct 2, 2003
|
|
*/
|
|
/*
|
|
* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
|
|
*/
|
|
|
|
#include <linux/linkage.h>
|
|
#include <asm/assembler.h>
|
|
|
|
/*
|
|
* U-Boot compatibility bit, define empty UNWIND() macro as, since we
|
|
* do not support stack unwinding and define CONFIG_AEABI to make all
|
|
* of the functions available without diverging from Linux code.
|
|
*/
|
|
#ifdef __UBOOT__
|
|
#define UNWIND(x...)
|
|
#define CONFIG_AEABI
|
|
#endif
|
|
|
|
.macro ARM_DIV_BODY dividend, divisor, result, curbit
|
|
|
|
#if __LINUX_ARM_ARCH__ >= 5
|
|
|
|
clz \curbit, \divisor
|
|
clz \result, \dividend
|
|
sub \result, \curbit, \result
|
|
mov \curbit, #1
|
|
mov \divisor, \divisor, lsl \result
|
|
mov \curbit, \curbit, lsl \result
|
|
mov \result, #0
|
|
|
|
#else
|
|
|
|
@ Initially shift the divisor left 3 bits if possible,
|
|
@ set curbit accordingly. This allows for curbit to be located
|
|
@ at the left end of each 4 bit nibbles in the division loop
|
|
@ to save one loop in most cases.
|
|
tst \divisor, #0xe0000000
|
|
moveq \divisor, \divisor, lsl #3
|
|
moveq \curbit, #8
|
|
movne \curbit, #1
|
|
|
|
@ Unless the divisor is very big, shift it up in multiples of
|
|
@ four bits, since this is the amount of unwinding in the main
|
|
@ division loop. Continue shifting until the divisor is
|
|
@ larger than the dividend.
|
|
1: cmp \divisor, #0x10000000
|
|
cmplo \divisor, \dividend
|
|
movlo \divisor, \divisor, lsl #4
|
|
movlo \curbit, \curbit, lsl #4
|
|
blo 1b
|
|
|
|
@ For very big divisors, we must shift it a bit at a time, or
|
|
@ we will be in danger of overflowing.
|
|
1: cmp \divisor, #0x80000000
|
|
cmplo \divisor, \dividend
|
|
movlo \divisor, \divisor, lsl #1
|
|
movlo \curbit, \curbit, lsl #1
|
|
blo 1b
|
|
|
|
mov \result, #0
|
|
|
|
#endif
|
|
|
|
@ Division loop
|
|
1: cmp \dividend, \divisor
|
|
subhs \dividend, \dividend, \divisor
|
|
orrhs \result, \result, \curbit
|
|
cmp \dividend, \divisor, lsr #1
|
|
subhs \dividend, \dividend, \divisor, lsr #1
|
|
orrhs \result, \result, \curbit, lsr #1
|
|
cmp \dividend, \divisor, lsr #2
|
|
subhs \dividend, \dividend, \divisor, lsr #2
|
|
orrhs \result, \result, \curbit, lsr #2
|
|
cmp \dividend, \divisor, lsr #3
|
|
subhs \dividend, \dividend, \divisor, lsr #3
|
|
orrhs \result, \result, \curbit, lsr #3
|
|
cmp \dividend, #0 @ Early termination?
|
|
movsne \curbit, \curbit, lsr #4 @ No, any more bits to do?
|
|
movne \divisor, \divisor, lsr #4
|
|
bne 1b
|
|
|
|
.endm
|
|
|
|
|
|
.macro ARM_DIV2_ORDER divisor, order
|
|
|
|
#if __LINUX_ARM_ARCH__ >= 5
|
|
|
|
clz \order, \divisor
|
|
rsb \order, \order, #31
|
|
|
|
#else
|
|
|
|
cmp \divisor, #(1 << 16)
|
|
movhs \divisor, \divisor, lsr #16
|
|
movhs \order, #16
|
|
movlo \order, #0
|
|
|
|
cmp \divisor, #(1 << 8)
|
|
movhs \divisor, \divisor, lsr #8
|
|
addhs \order, \order, #8
|
|
|
|
cmp \divisor, #(1 << 4)
|
|
movhs \divisor, \divisor, lsr #4
|
|
addhs \order, \order, #4
|
|
|
|
cmp \divisor, #(1 << 2)
|
|
addhi \order, \order, #3
|
|
addls \order, \order, \divisor, lsr #1
|
|
|
|
#endif
|
|
|
|
.endm
|
|
|
|
|
|
.macro ARM_MOD_BODY dividend, divisor, order, spare
|
|
|
|
#if __LINUX_ARM_ARCH__ >= 5
|
|
|
|
clz \order, \divisor
|
|
clz \spare, \dividend
|
|
sub \order, \order, \spare
|
|
mov \divisor, \divisor, lsl \order
|
|
|
|
#else
|
|
|
|
mov \order, #0
|
|
|
|
@ Unless the divisor is very big, shift it up in multiples of
|
|
@ four bits, since this is the amount of unwinding in the main
|
|
@ division loop. Continue shifting until the divisor is
|
|
@ larger than the dividend.
|
|
1: cmp \divisor, #0x10000000
|
|
cmplo \divisor, \dividend
|
|
movlo \divisor, \divisor, lsl #4
|
|
addlo \order, \order, #4
|
|
blo 1b
|
|
|
|
@ For very big divisors, we must shift it a bit at a time, or
|
|
@ we will be in danger of overflowing.
|
|
1: cmp \divisor, #0x80000000
|
|
cmplo \divisor, \dividend
|
|
movlo \divisor, \divisor, lsl #1
|
|
addlo \order, \order, #1
|
|
blo 1b
|
|
|
|
#endif
|
|
|
|
@ Perform all needed subtractions to keep only the reminder.
|
|
@ Do comparisons in batch of 4 first.
|
|
subs \order, \order, #3 @ yes, 3 is intended here
|
|
blt 2f
|
|
|
|
1: cmp \dividend, \divisor
|
|
subhs \dividend, \dividend, \divisor
|
|
cmp \dividend, \divisor, lsr #1
|
|
subhs \dividend, \dividend, \divisor, lsr #1
|
|
cmp \dividend, \divisor, lsr #2
|
|
subhs \dividend, \dividend, \divisor, lsr #2
|
|
cmp \dividend, \divisor, lsr #3
|
|
subhs \dividend, \dividend, \divisor, lsr #3
|
|
cmp \dividend, #1
|
|
mov \divisor, \divisor, lsr #4
|
|
subsge \order, \order, #4
|
|
bge 1b
|
|
|
|
tst \order, #3
|
|
teqne \dividend, #0
|
|
beq 5f
|
|
|
|
@ Either 1, 2 or 3 comparison/subtractions are left.
|
|
2: cmn \order, #2
|
|
blt 4f
|
|
beq 3f
|
|
cmp \dividend, \divisor
|
|
subhs \dividend, \dividend, \divisor
|
|
mov \divisor, \divisor, lsr #1
|
|
3: cmp \dividend, \divisor
|
|
subhs \dividend, \dividend, \divisor
|
|
mov \divisor, \divisor, lsr #1
|
|
4: cmp \dividend, \divisor
|
|
subhs \dividend, \dividend, \divisor
|
|
5:
|
|
.endm
|
|
|
|
|
|
.pushsection .text.__udivsi3, "ax"
|
|
ENTRY(__udivsi3)
|
|
ENTRY(__aeabi_uidiv)
|
|
UNWIND(.fnstart)
|
|
|
|
subs r2, r1, #1
|
|
reteq lr
|
|
bcc Ldiv0
|
|
cmp r0, r1
|
|
bls 11f
|
|
tst r1, r2
|
|
beq 12f
|
|
|
|
ARM_DIV_BODY r0, r1, r2, r3
|
|
|
|
mov r0, r2
|
|
ret lr
|
|
|
|
11: moveq r0, #1
|
|
movne r0, #0
|
|
ret lr
|
|
|
|
12: ARM_DIV2_ORDER r1, r2
|
|
|
|
mov r0, r0, lsr r2
|
|
ret lr
|
|
|
|
UNWIND(.fnend)
|
|
ENDPROC(__udivsi3)
|
|
ENDPROC(__aeabi_uidiv)
|
|
.popsection
|
|
|
|
.pushsection .text.__umodsi3, "ax"
|
|
ENTRY(__umodsi3)
|
|
UNWIND(.fnstart)
|
|
|
|
subs r2, r1, #1 @ compare divisor with 1
|
|
bcc Ldiv0
|
|
cmpne r0, r1 @ compare dividend with divisor
|
|
moveq r0, #0
|
|
tsthi r1, r2 @ see if divisor is power of 2
|
|
andeq r0, r0, r2
|
|
retls lr
|
|
|
|
ARM_MOD_BODY r0, r1, r2, r3
|
|
|
|
ret lr
|
|
|
|
UNWIND(.fnend)
|
|
ENDPROC(__umodsi3)
|
|
.popsection
|
|
|
|
.pushsection .text.__divsi3, "ax"
|
|
ENTRY(__divsi3)
|
|
ENTRY(__aeabi_idiv)
|
|
UNWIND(.fnstart)
|
|
|
|
cmp r1, #0
|
|
eor ip, r0, r1 @ save the sign of the result.
|
|
beq Ldiv0
|
|
rsbmi r1, r1, #0 @ loops below use unsigned.
|
|
subs r2, r1, #1 @ division by 1 or -1 ?
|
|
beq 10f
|
|
movs r3, r0
|
|
rsbmi r3, r0, #0 @ positive dividend value
|
|
cmp r3, r1
|
|
bls 11f
|
|
tst r1, r2 @ divisor is power of 2 ?
|
|
beq 12f
|
|
|
|
ARM_DIV_BODY r3, r1, r0, r2
|
|
|
|
cmp ip, #0
|
|
rsbmi r0, r0, #0
|
|
ret lr
|
|
|
|
10: teq ip, r0 @ same sign ?
|
|
rsbmi r0, r0, #0
|
|
ret lr
|
|
|
|
11: movlo r0, #0
|
|
moveq r0, ip, asr #31
|
|
orreq r0, r0, #1
|
|
ret lr
|
|
|
|
12: ARM_DIV2_ORDER r1, r2
|
|
|
|
cmp ip, #0
|
|
mov r0, r3, lsr r2
|
|
rsbmi r0, r0, #0
|
|
ret lr
|
|
|
|
UNWIND(.fnend)
|
|
ENDPROC(__divsi3)
|
|
ENDPROC(__aeabi_idiv)
|
|
.popsection
|
|
|
|
.pushsection .text.__modsi3, "ax"
|
|
ENTRY(__modsi3)
|
|
UNWIND(.fnstart)
|
|
|
|
cmp r1, #0
|
|
beq Ldiv0
|
|
rsbmi r1, r1, #0 @ loops below use unsigned.
|
|
movs ip, r0 @ preserve sign of dividend
|
|
rsbmi r0, r0, #0 @ if negative make positive
|
|
subs r2, r1, #1 @ compare divisor with 1
|
|
cmpne r0, r1 @ compare dividend with divisor
|
|
moveq r0, #0
|
|
tsthi r1, r2 @ see if divisor is power of 2
|
|
andeq r0, r0, r2
|
|
bls 10f
|
|
|
|
ARM_MOD_BODY r0, r1, r2, r3
|
|
|
|
10: cmp ip, #0
|
|
rsbmi r0, r0, #0
|
|
ret lr
|
|
|
|
UNWIND(.fnend)
|
|
ENDPROC(__modsi3)
|
|
.popsection
|
|
|
|
#ifdef CONFIG_AEABI
|
|
|
|
.pushsection .text.__aeabi_uidivmod, "ax"
|
|
ENTRY(__aeabi_uidivmod)
|
|
UNWIND(.fnstart)
|
|
UNWIND(.save {r0, r1, ip, lr} )
|
|
|
|
stmfd sp!, {r0, r1, ip, lr}
|
|
bl __aeabi_uidiv
|
|
ldmfd sp!, {r1, r2, ip, lr}
|
|
mul r3, r0, r2
|
|
sub r1, r1, r3
|
|
ret lr
|
|
|
|
UNWIND(.fnend)
|
|
ENDPROC(__aeabi_uidivmod)
|
|
.popsection
|
|
|
|
.pushsection .text.__aeabi_uidivmod, "ax"
|
|
ENTRY(__aeabi_idivmod)
|
|
UNWIND(.fnstart)
|
|
UNWIND(.save {r0, r1, ip, lr} )
|
|
|
|
stmfd sp!, {r0, r1, ip, lr}
|
|
bl __aeabi_idiv
|
|
ldmfd sp!, {r1, r2, ip, lr}
|
|
mul r3, r0, r2
|
|
sub r1, r1, r3
|
|
ret lr
|
|
|
|
UNWIND(.fnend)
|
|
ENDPROC(__aeabi_idivmod)
|
|
.popsection
|
|
|
|
#endif
|
|
|
|
.pushsection .text.Ldiv0, "ax"
|
|
Ldiv0:
|
|
UNWIND(.fnstart)
|
|
UNWIND(.pad #4)
|
|
UNWIND(.save {lr})
|
|
|
|
str lr, [sp, #-8]!
|
|
bl __div0
|
|
mov r0, #0 @ About as wrong as it could be.
|
|
ldr pc, [sp], #8
|
|
|
|
UNWIND(.fnend)
|
|
ENDPROC(Ldiv0)
|
|
.popsection
|
|
|
|
/* Thumb-1 specialities */
|
|
#if CONFIG_IS_ENABLED(SYS_THUMB_BUILD) && !defined(CONFIG_HAS_THUMB2)
|
|
.pushsection .text.__gnu_thumb1_case_sqi, "ax"
|
|
ENTRY(__gnu_thumb1_case_sqi)
|
|
push {r1}
|
|
mov r1, lr
|
|
lsrs r1, r1, #1
|
|
lsls r1, r1, #1
|
|
ldrsb r1, [r1, r0]
|
|
lsls r1, r1, #1
|
|
add lr, lr, r1
|
|
pop {r1}
|
|
ret lr
|
|
ENDPROC(__gnu_thumb1_case_sqi)
|
|
.popsection
|
|
|
|
.pushsection .text.__gnu_thumb1_case_uqi, "ax"
|
|
ENTRY(__gnu_thumb1_case_uqi)
|
|
push {r1}
|
|
mov r1, lr
|
|
lsrs r1, r1, #1
|
|
lsls r1, r1, #1
|
|
ldrb r1, [r1, r0]
|
|
lsls r1, r1, #1
|
|
add lr, lr, r1
|
|
pop {r1}
|
|
ret lr
|
|
ENDPROC(__gnu_thumb1_case_uqi)
|
|
.popsection
|
|
|
|
.pushsection .text.__gnu_thumb1_case_shi, "ax"
|
|
ENTRY(__gnu_thumb1_case_shi)
|
|
push {r0, r1}
|
|
mov r1, lr
|
|
lsrs r1, r1, #1
|
|
lsls r0, r0, #1
|
|
lsls r1, r1, #1
|
|
ldrsh r1, [r1, r0]
|
|
lsls r1, r1, #1
|
|
add lr, lr, r1
|
|
pop {r0, r1}
|
|
ret lr
|
|
ENDPROC(__gnu_thumb1_case_shi)
|
|
.popsection
|
|
|
|
.pushsection .text.__gnu_thumb1_case_uhi, "ax"
|
|
ENTRY(__gnu_thumb1_case_uhi)
|
|
push {r0, r1}
|
|
mov r1, lr
|
|
lsrs r1, r1, #1
|
|
lsls r0, r0, #1
|
|
lsls r1, r1, #1
|
|
ldrh r1, [r1, r0]
|
|
lsls r1, r1, #1
|
|
add lr, lr, r1
|
|
pop {r0, r1}
|
|
ret lr
|
|
ENDPROC(__gnu_thumb1_case_uhi)
|
|
.popsection
|
|
#endif
|