Merge https://source.denx.de/u-boot/custodians/u-boot-riscv

2024-11-11 07:34:31 +00:00 · 2021-05-18 11:09:41 -04:00 · 2021-05-18 11:09:41 -04:00 · 52993fcb76
commit 52993fcb76
parent feddbdb55f 89419279f4
10 changed files with 275 additions and 145 deletions
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@ -158,7 +158,14 @@ config DMA_ADDR_T_64BIT
 config SIFIVE_CLINT
 	bool
-	depends on RISCV_MMODE || SPL_RISCV_MMODE
+	depends on RISCV_MMODE
 	help
 	  The SiFive CLINT block holds memory-mapped control and status registers
 	  associated with software and timer interrupts.
 config SPL_SIFIVE_CLINT
 	bool
 	depends on SPL_RISCV_MMODE
 	help
 	  The SiFive CLINT block holds memory-mapped control and status registers
 	  associated with software and timer interrupts.
@ -271,6 +278,8 @@ config STACK_SIZE_SHIFT
 config OF_BOARD_FIXUP
 	default y if OF_SEPARATE && RISCV_SMODE
 menu "Use assembly optimized implementation of memory routines"
 config USE_ARCH_MEMCPY
 	bool "Use an assembly optimized implementation of memcpy"
 	default y
@ -350,3 +359,5 @@ config TPL_USE_ARCH_MEMSET
 	  but may increase the binary size.
 endmenu
 endmenu
--- a/arch/riscv/cpu/fu540/Kconfig
+++ b/arch/riscv/cpu/fu540/Kconfig
@ -11,7 +11,7 @@ config SIFIVE_FU540
 	imply CPU
 	imply CPU_RISCV
 	imply RISCV_TIMER if (RISCV_SMODE || SPL_RISCV_SMODE)
-	imply SIFIVE_CLINT if (RISCV_MMODE || SPL_RISCV_MMODE)
+	imply SPL_SIFIVE_CLINT
 	imply CMD_CPU
 	imply SPL_CPU
 	imply SPL_OPENSBI
--- a/arch/riscv/cpu/generic/Kconfig
+++ b/arch/riscv/cpu/generic/Kconfig
@ -8,7 +8,8 @@ config GENERIC_RISCV
 	imply CPU
 	imply CPU_RISCV
 	imply RISCV_TIMER if (RISCV_SMODE || SPL_RISCV_SMODE)
-	imply SIFIVE_CLINT if (RISCV_MMODE || SPL_RISCV_MMODE)
+	imply SIFIVE_CLINT if RISCV_MMODE
 	imply SPL_SIFIVE_CLINT if SPL_RISCV_MMODE
 	imply CMD_CPU
 	imply SPL_CPU
 	imply SPL_OPENSBI
--- a/arch/riscv/include/asm/global_data.h
+++ b/arch/riscv/include/asm/global_data.h
@ -18,7 +18,7 @@
 struct arch_global_data {
 	long boot_hart;		/* boot hart id */
 	phys_addr_t firmware_fdt_addr;
-#ifdef CONFIG_SIFIVE_CLINT
+#if CONFIG_IS_ENABLED(SIFIVE_CLINT)
 	void __iomem *clint;	/* clint base address */
 #endif
 #ifdef CONFIG_ANDES_PLIC
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@ -11,7 +11,7 @@ obj-$(CONFIG_CMD_BOOTI) += bootm.o image.o
 obj-$(CONFIG_CMD_GO) += boot.o
 obj-y	+= cache.o
 ifeq ($(CONFIG_$(SPL_)RISCV_MMODE),y)
-obj-$(CONFIG_SIFIVE_CLINT) += sifive_clint.o
+obj-$(CONFIG_$(SPL_)SIFIVE_CLINT) += sifive_clint.o
 obj-$(CONFIG_ANDES_PLIC) += andes_plic.o
 else
 obj-$(CONFIG_SBI) += sbi.o
--- a/arch/riscv/lib/fdt_fixup.c
+++ b/arch/riscv/lib/fdt_fixup.c
@ -151,14 +151,17 @@ int arch_fixup_fdt(void *blob)
 	}
 	chosen_offset = fdt_path_offset(blob, "/chosen");
 	if (chosen_offset < 0) {
-		err = fdt_add_subnode(blob, 0, "chosen");
+		chosen_offset = fdt_add_subnode(blob, 0, "chosen");
-		if (err < 0) {
+		if (chosen_offset < 0) {
 			log_err("chosen node cannot be added\n");
-			return err;
+			return chosen_offset;
 		}
 	}
 	/* Overwrite the boot-hartid as U-Boot is the last stage BL */
-	fdt_setprop_u32(blob, chosen_offset, "boot-hartid", gd->arch.boot_hart);
+	err = fdt_setprop_u32(blob, chosen_offset, "boot-hartid",
 			      gd->arch.boot_hart);
 	if (err < 0)
 		return log_msg_ret("could not set boot-hartid", err);
 #endif
 	/* Copy the reserved-memory node to the DT used by OS */
--- a/arch/riscv/lib/memcpy.S
+++ b/arch/riscv/lib/memcpy.S
@ -9,100 +9,151 @@
 /* void *memcpy(void *, const void *, size_t) */
 ENTRY(__memcpy)
 WEAK(memcpy)
-	move t6, a0  /* Preserve return value */
+	/* Save for return value */
 	mv	t6, a0
 	/* Defer to byte-oriented copy for small sizes */
 	sltiu a3, a2, 128
 	bnez a3, 4f
 	/* Use word-oriented copy only if low-order bits match */
 	andi a3, t6, SZREG-1
 	andi a4, a1, SZREG-1
 	bne a3, a4, 4f
 	beqz a3, 2f  /* Skip if already aligned */
 	/*
-	 * Round to nearest double word-aligned address
+	 * Register allocation for code below:
-	 * greater than or equal to start address
+	 * a0 - start of uncopied dst
 	 * a1 - start of uncopied src
 	 * t0 - end of uncopied dst
 	 */
-	andi a3, a1, ~(SZREG-1)
+	add	t0, a0, a2
-	addi a3, a3, SZREG
+
-	/* Handle initial misalignment */
+	/*
-	sub a4, a3, a1
+	 * Use bytewise copy if too small.
 	 *
 	 * This threshold must be at least 2*SZREG to ensure at least one
 	 * wordwise copy is performed. It is chosen to be 16 because it will
 	 * save at least 7 iterations of bytewise copy, which pays off the
 	 * fixed overhead.
 	 */
 	li	a3, 16
 	bltu	a2, a3, .Lbyte_copy_tail
 	/*
 	 * Bytewise copy first to align a0 to word boundary.
 	 */
 	addi	a2, a0, SZREG-1
 	andi	a2, a2, ~(SZREG-1)
 	beq	a0, a2, 2f
 1:
 	lb	a5, 0(a1)
 	addi	a1, a1, 1
-	sb a5, 0(t6)
+	sb	a5, 0(a0)
-	addi t6, t6, 1
+	addi	a0, a0, 1
-	bltu a1, a3, 1b
+	bne	a0, a2, 1b
 	sub a2, a2, a4  /* Update count */
 2:
-	andi a4, a2, ~((16*SZREG)-1)
+
-	beqz a4, 4f
+	/*
-	add a3, a1, a4
+	 * Now a0 is word-aligned. If a1 is also word aligned, we could perform
-3:
+	 * aligned word-wise copy. Otherwise we need to perform misaligned
-	REG_L a4,       0(a1)
+	 * word-wise copy.
-	REG_L a5,   SZREG(a1)
+	 */
-	REG_L a6, 2*SZREG(a1)
+	andi	a3, a1, SZREG-1
-	REG_L a7, 3*SZREG(a1)
+	bnez	a3, .Lmisaligned_word_copy
-	REG_L t0, 4*SZREG(a1)
+
-	REG_L t1, 5*SZREG(a1)
+	/* Unrolled wordwise copy */
-	REG_L t2, 6*SZREG(a1)
+	addi	t0, t0, -(16*SZREG-1)
-	REG_L t3, 7*SZREG(a1)
+	bgeu	a0, t0, 2f
-	REG_L t4, 8*SZREG(a1)
+1:
-	REG_L t5, 9*SZREG(a1)
+	REG_L	a2,        0(a1)
-	REG_S a4,       0(t6)
+	REG_L	a3,    SZREG(a1)
-	REG_S a5,   SZREG(t6)
+	REG_L	a4,  2*SZREG(a1)
-	REG_S a6, 2*SZREG(t6)
+	REG_L	a5,  3*SZREG(a1)
-	REG_S a7, 3*SZREG(t6)
+	REG_L	a6,  4*SZREG(a1)
-	REG_S t0, 4*SZREG(t6)
+	REG_L	a7,  5*SZREG(a1)
-	REG_S t1, 5*SZREG(t6)
+	REG_L	t1,  6*SZREG(a1)
-	REG_S t2, 6*SZREG(t6)
+	REG_L	t2,  7*SZREG(a1)
-	REG_S t3, 7*SZREG(t6)
+	REG_L	t3,  8*SZREG(a1)
-	REG_S t4, 8*SZREG(t6)
+	REG_L	t4,  9*SZREG(a1)
-	REG_S t5, 9*SZREG(t6)
+	REG_L	t5, 10*SZREG(a1)
-	REG_L a4, 10*SZREG(a1)
+	REG_S	a2,        0(a0)
-	REG_L a5, 11*SZREG(a1)
+	REG_S	a3,    SZREG(a0)
-	REG_L a6, 12*SZREG(a1)
+	REG_S	a4,  2*SZREG(a0)
-	REG_L a7, 13*SZREG(a1)
+	REG_S	a5,  3*SZREG(a0)
-	REG_L t0, 14*SZREG(a1)
+	REG_S	a6,  4*SZREG(a0)
-	REG_L t1, 15*SZREG(a1)
+	REG_S	a7,  5*SZREG(a0)
 	REG_S	t1,  6*SZREG(a0)
 	REG_S	t2,  7*SZREG(a0)
 	REG_S	t3,  8*SZREG(a0)
 	REG_S	t4,  9*SZREG(a0)
 	REG_S	t5, 10*SZREG(a0)
 	REG_L	a2, 11*SZREG(a1)
 	REG_L	a3, 12*SZREG(a1)
 	REG_L	a4, 13*SZREG(a1)
 	REG_L	a5, 14*SZREG(a1)
 	REG_L	a6, 15*SZREG(a1)
 	addi	a1, a1, 16*SZREG
-	REG_S a4, 10*SZREG(t6)
+	REG_S	a2, 11*SZREG(a0)
-	REG_S a5, 11*SZREG(t6)
+	REG_S	a3, 12*SZREG(a0)
-	REG_S a6, 12*SZREG(t6)
+	REG_S	a4, 13*SZREG(a0)
-	REG_S a7, 13*SZREG(t6)
+	REG_S	a5, 14*SZREG(a0)
-	REG_S t0, 14*SZREG(t6)
+	REG_S	a6, 15*SZREG(a0)
-	REG_S t1, 15*SZREG(t6)
+	addi	a0, a0, 16*SZREG
-	addi t6, t6, 16*SZREG
+	bltu	a0, t0, 1b
-	bltu a1, a3, 3b
+2:
-	andi a2, a2, (16*SZREG)-1  /* Update count */
+	/* Post-loop increment by 16*SZREG-1 and pre-loop decrement by SZREG-1 */
 	addi	t0, t0, 15*SZREG
-4:
+	/* Wordwise copy */
-	/* Handle trailing misalignment */
+	bgeu	a0, t0, 2f
-	beqz a2, 6f
+1:
-	add a3, a1, a2
+	REG_L	a5, 0(a1)
 	addi	a1, a1, SZREG
 	REG_S	a5, 0(a0)
 	addi	a0, a0, SZREG
 	bltu	a0, t0, 1b
 2:
 	addi	t0, t0, SZREG-1
-	/* Use word-oriented copy if co-aligned to word boundary */
+.Lbyte_copy_tail:
-	or a5, a1, t6
+	/*
-	or a5, a5, a3
+	 * Bytewise copy anything left.
-	andi a5, a5, 3
+	 */
-	bnez a5, 5f
+	beq	a0, t0, 2f
-7:
+1:
-	lw a4, 0(a1)
+	lb	a5, 0(a1)
 	addi a1, a1, 4
 	sw a4, 0(t6)
 	addi t6, t6, 4
 	bltu a1, a3, 7b
 	ret
 5:
 	lb a4, 0(a1)
 	addi	a1, a1, 1
-	sb a4, 0(t6)
+	sb	a5, 0(a0)
-	addi t6, t6, 1
+	addi	a0, a0, 1
-	bltu a1, a3, 5b
+	bne	a0, t0, 1b
-6:
+2:
 	mv	a0, t6
 	ret
 .Lmisaligned_word_copy:
 	/*
 	 * Misaligned word-wise copy.
 	 * For misaligned copy we still perform word-wise copy, but we need to
 	 * use the value fetched from the previous iteration and do some shifts.
 	 * This is safe because we wouldn't access more words than necessary.
 	 */
 	/* Calculate shifts */
 	slli	t3, a3, 3
 	sub	t4, x0, t3 /* negate is okay as shift will only look at LSBs */
 	/* Load the initial value and align a1 */
 	andi	a1, a1, ~(SZREG-1)
 	REG_L	a5, 0(a1)
 	addi	t0, t0, -(SZREG-1)
 	/* At least one iteration will be executed here, no check */
 1:
 	srl	a4, a5, t3
 	REG_L	a5, SZREG(a1)
 	addi	a1, a1, SZREG
 	sll	a2, a5, t4
 	or	a2, a2, a4
 	REG_S	a2, 0(a0)
 	addi	a0, a0, SZREG
 	bltu	a0, t0, 1b
 	/* Update pointers to correct value */
 	addi	t0, t0, SZREG-1
 	add	a1, a1, a3
 	j	.Lbyte_copy_tail
 END(__memcpy)
--- a/arch/riscv/lib/memmove.S
+++ b/arch/riscv/lib/memmove.S
@ -5,60 +5,124 @@
 ENTRY(__memmove)
 WEAK(memmove)
-        move    t0, a0
+	/*
-        move    t1, a1
+	 * Here we determine if forward copy is possible. Forward copy is
 	 * preferred to backward copy as it is more cache friendly.
 	 *
 	 * If a0 >= a1, t0 gives their distance, if t0 >= a2 then we can
 	 *   copy forward.
 	 * If a0 < a1, we can always copy forward. This will make t0 negative,
 	 *   so a *unsigned* comparison will always have t0 >= a2.
 	 *
 	 * For forward copy we just delegate the task to memcpy.
 	 */
 	sub	t0, a0, a1
 	bltu	t0, a2, 1f
 	tail	__memcpy
 1:
-        beq     a0, a1, exit_memcpy
+	/*
-        beqz    a2, exit_memcpy
+	 * Register allocation for code below:
-        srli    t2, a2, 0x2
+	 * a0 - end of uncopied dst
-
+	 * a1 - end of uncopied src
-        slt     t3, a0, a1
+	 * t0 - start of uncopied dst
-        beqz    t3, do_reverse
+	 */
-
+	mv	t0, a0
        andi    a2, a2, 0x3
        li      t4, 1
        beqz    t2, byte_copy
 word_copy:
        lw      t3, 0(a1)
        addi    t2, t2, -1
        addi    a1, a1, 4
        sw      t3, 0(a0)
        addi    a0, a0, 4
        bnez    t2, word_copy
        beqz    a2, exit_memcpy
        j       byte_copy
 do_reverse:
 	add	a0, a0, a2
 	add	a1, a1, a2
        andi    a2, a2, 0x3
        li      t4, -1
        beqz    t2, reverse_byte_copy
-reverse_word_copy:
+	/*
-        addi    a1, a1, -4
+	 * Use bytewise copy if too small.
-        addi    t2, t2, -1
+	 *
-        lw      t3, 0(a1)
+	 * This threshold must be at least 2*SZREG to ensure at least one
-        addi    a0, a0, -4
+	 * wordwise copy is performed. It is chosen to be 16 because it will
-        sw      t3, 0(a0)
+	 * save at least 7 iterations of bytewise copy, which pays off the
-        bnez    t2, reverse_word_copy
+	 * fixed overhead.
-        beqz    a2, exit_memcpy
+	 */
 	li	a3, 16
 	bltu	a2, a3, .Lbyte_copy_tail
-reverse_byte_copy:
+	/*
-        addi    a0, a0, -1
+	 * Bytewise copy first to align t0 to word boundary.
 	 */
 	andi	a2, a0, ~(SZREG-1)
 	beq	a0, a2, 2f
 1:
 	addi	a1, a1, -1
 	lb	a5, 0(a1)
 	addi	a0, a0, -1
 	sb	a5, 0(a0)
 	bne	a0, a2, 1b
 2:
-byte_copy:
+	/*
-        lb      t3, 0(a1)
+	 * Now a0 is word-aligned. If a1 is also word aligned, we could perform
-        addi    a2, a2, -1
+	 * aligned word-wise copy. Otherwise we need to perform misaligned
-        sb      t3, 0(a0)
+	 * word-wise copy.
-        add     a1, a1, t4
+	 */
-        add     a0, a0, t4
+	andi	a3, a1, SZREG-1
-        bnez    a2, byte_copy
+	bnez	a3, .Lmisaligned_word_copy
-exit_memcpy:
+	/* Wordwise copy */
-        move a0, t0
+	addi	t0, t0, SZREG-1
-        move a1, t1
+	bleu	a0, t0, 2f
 1:
 	addi	a1, a1, -SZREG
 	REG_L	a5, 0(a1)
 	addi	a0, a0, -SZREG
 	REG_S	a5, 0(a0)
 	bgtu	a0, t0, 1b
 2:
 	addi	t0, t0, -(SZREG-1)
 .Lbyte_copy_tail:
 	/*
 	 * Bytewise copy anything left.
 	 */
 	beq	a0, t0, 2f
 1:
 	addi	a1, a1, -1
 	lb	a5, 0(a1)
 	addi	a0, a0, -1
 	sb	a5, 0(a0)
 	bne	a0, t0, 1b
 2:
 	mv	a0, t0
 	ret
 .Lmisaligned_word_copy:
 	/*
 	 * Misaligned word-wise copy.
 	 * For misaligned copy we still perform word-wise copy, but we need to
 	 * use the value fetched from the previous iteration and do some shifts.
 	 * This is safe because we wouldn't access more words than necessary.
 	 */
 	/* Calculate shifts */
 	slli	t3, a3, 3
 	sub	t4, x0, t3 /* negate is okay as shift will only look at LSBs */
 	/* Load the initial value and align a1 */
 	andi	a1, a1, ~(SZREG-1)
 	REG_L	a5, 0(a1)
 	addi	t0, t0, SZREG-1
 	/* At least one iteration will be executed here, no check */
 1:
 	sll	a4, a5, t4
 	addi	a1, a1, -SZREG
 	REG_L	a5, 0(a1)
 	srl	a2, a5, t3
 	or	a2, a2, a4
 	addi	a0, a0, -SZREG
 	REG_S	a2, 0(a0)
 	bgtu	a0, t0, 1b
 	/* Update pointers to correct value */
 	addi	t0, t0, -(SZREG-1)
 	add	a1, a1, a3
 	j	.Lbyte_copy_tail
 END(__memmove)
--- a/doc/board/AndesTech/ax25-ae350.rst
+++ b/doc/board/AndesTech/ax25-ae350.rst
@ -343,7 +343,7 @@ cloned and build for AE350 as below:
        cd opensbi
        make PLATFORM=andes/ae350
-Copy OpenSBI FW_DYNAMIC image (build\platform\andes\ae350\firmware\fw_dynamic.bin)
+Copy OpenSBI FW_DYNAMIC image (build/platform/andes/ae350/firmware/fw_dynamic.bin)
 into U-Boot root directory
--- a/drivers/timer/Makefile
+++ b/drivers/timer/Makefile
@ -19,7 +19,7 @@ obj-$(CONFIG_RENESAS_OSTM_TIMER) += ostm_timer.o
 obj-$(CONFIG_RISCV_TIMER) += riscv_timer.o
 obj-$(CONFIG_ROCKCHIP_TIMER) += rockchip_timer.o
 obj-$(CONFIG_SANDBOX_TIMER)	+= sandbox_timer.o
-obj-$(CONFIG_SIFIVE_CLINT) += sifive_clint_timer.o
+obj-$(CONFIG_$(SPL_)SIFIVE_CLINT) += sifive_clint_timer.o
 obj-$(CONFIG_STI_TIMER)		+= sti-timer.o
 obj-$(CONFIG_STM32_TIMER)	+= stm32_timer.o
 obj-$(CONFIG_X86_TSC_TIMER)	+= tsc_timer.o