diff mbox series

[v2,2/5] mips: octeon: use mips_mach_early_init() to copy to L2 cache

Message ID 20200630103320.1290545-3-sr@denx.de
State Accepted
Commit e9609dc38ba225ddb58fcef41a0beb8a3b09a888
Headers show
Series mips: Improve initial Octeon MIPS64 support | expand

Commit Message

Stefan Roese June 30, 2020, 10:33 a.m. UTC
This patch adds the code to copy itself from bootrom location to a
different location (TEXT_BASE) to the Octeon platform. Its used in
this case to copy the complete U-Boot image into L2 cache, which
greatly improves the bootup time - especially in regard to the
very long and complex DDR4 init code.

The Kconfig symbol CONFIG_MIPS_MACH_EARLY_INIT is enabled with this
patch for Octeon.

Signed-off-by: Stefan Roese <sr at denx.de>

---

Changes in v2:
- Change mips_mach_early_init() as suggested by Daniel to make it
  easier to understand and smaller
- Drop CONFIG_BOARD_SIZE_LIMIT

 arch/mips/Kconfig                     |  1 +
 arch/mips/mach-octeon/lowlevel_init.S | 50 +++++++++++++++++++++++++++
 2 files changed, 51 insertions(+)
diff mbox series

Patch

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 327fd4848a..bcf6f26457 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -114,6 +114,7 @@  config ARCH_OCTEON
 	select DM
 	select DM_SERIAL
 	select MIPS_L2_CACHE
+	select MIPS_MACH_EARLY_INIT
 	select MIPS_TUNE_OCTEON3
 	select ROM_EXCEPTION_VECTORS
 	select SUPPORTS_BIG_ENDIAN
diff --git a/arch/mips/mach-octeon/lowlevel_init.S b/arch/mips/mach-octeon/lowlevel_init.S
index d9aab38cde..fa87cb4e34 100644
--- a/arch/mips/mach-octeon/lowlevel_init.S
+++ b/arch/mips/mach-octeon/lowlevel_init.S
@@ -17,3 +17,53 @@  LEAF(lowlevel_init)
 	jr	ra
 	 nop
 	END(lowlevel_init)
+
+LEAF(mips_mach_early_init)
+
+	move    s0, ra
+
+	bal	__dummy
+	 nop
+
+__dummy:
+	/* Get the actual address that we are running at */
+	PTR_LA	a7, __dummy
+	dsubu	t3, ra, a7	/* t3 now has reloc offset */
+
+	PTR_LA	t1, _start
+	daddu	t0, t1, t3	/* t0 now has actual address of _start */
+
+	/* Calculate end address of copy loop */
+	PTR_LA	t2, _end
+	daddiu	t2, t2, 0x4000	/* Increase size to include appended DTB */
+	daddiu	t2, t2, 127
+	ins	t2, zero, 0, 7	/* Round up to cache line for memcpy */
+
+	/* Copy ourself to the L2 cache from flash, 32 bytes at a time */
+1:
+	ld	a0, 0(t0)
+	ld	a1, 8(t0)
+	ld	a2, 16(t0)
+	ld	a3, 24(t0)
+	sd	a0, 0(t1)
+	sd	a1, 8(t1)
+	sd	a2, 16(t1)
+	sd	a3, 24(t1)
+	addiu	t0, 32
+	addiu	t1, 32
+	bne	t1, t2, 1b
+	 nop
+
+	sync
+
+	/*
+	 * Return to start.S now running from TEXT_BASE, which points
+	 * to DRAM address space, which effectively is L2 cache now.
+	 * This speeds up the init process extremely, especially the
+	 * DDR init code.
+	 */
+	dsubu	s0, s0, t3	/* Fixup return address with reloc offset */
+	jr.hb	s0		/* Jump back with hazard barrier */
+	 nop
+
+	END(mips_mach_early_init)