memory: Add support for 4K translation

Add support for 4K page sizes, supported by A7-A8X.

Co-developed-by: fenfenS <104295404+fenfenS@users.noreply.github.com>
Signed-off-by: fenfenS <104295404+fenfenS@users.noreply.github.com>
Signed-off-by: Nick Chan <towinchenmi@gmail.com>
This commit is contained in:
Nick Chan 2024-08-25 15:46:27 +08:00 committed by Hector Martin
parent f4ca3a19fd
commit 32ca84c769
4 changed files with 57 additions and 21 deletions

View file

@ -309,6 +309,7 @@
#define TCR_IPS_16TB 0b100UL
#define TCR_TG1 GENMASK(31, 30)
#define TCR_TG1_16K 0b01UL
#define TCR_TG1_4K 0b10UL
#define TCR_SH1 GENMASK(29, 28)
#define TCR_SH1_IS 0b11UL
#define TCR_ORGN1 GENMASK(27, 26)
@ -321,6 +322,7 @@
#define TCR_T1SZ_48BIT 16UL
#define TCR_TG0 GENMASK(15, 14)
#define TCR_TG0_16K 0b10UL
#define TCR_TG0_4K 0b0UL
#define TCR_SH0 GENMASK(13, 12)
#define TCR_SH0_IS 0b11UL
#define TCR_ORGN0 GENMASK(11, 10)

View file

@ -15,7 +15,7 @@
#define ABORT panic("dlmalloc: internal error\n")
#define NO_MALLINFO 1
#define NO_MALLOC_STATS 1
#define malloc_getpagesize 16384
#define malloc_getpagesize get_page_size()
#define LACKS_FCNTL_H 1
#define LACKS_SYS_MMAN_H 1
#define LACKS_SYS_PARAM_H 1

View file

@ -13,7 +13,7 @@
#include "utils.h"
#include "xnuboot.h"
#define PAGE_SIZE 0x4000
#define PAGE_SIZE get_page_size()
#define CACHE_LINE_SIZE 64
#define CACHE_RANGE_OP(func, op) \
@ -50,20 +50,25 @@ static inline void write_sctlr(u64 val)
sysop("isb");
}
#define VADDR_L3_INDEX_BITS 11
#define VADDR_L2_INDEX_BITS 11
// We treat two concatenated L1 page tables as one
#define VADDR_L1_INDEX_BITS 12
#define VADDR_L3_INDEX_BITS (is_16k() ? 11 : 9)
#define VADDR_L2_INDEX_BITS (is_16k() ? 11 : 9)
// We treat two concatenated L1 page tables as one on 16K
// And all the L1 page tables together on 4K...
#define VADDR_L1_INDEX_BITS (is_16k() ? 12 : 18)
#define VADDR_L1_INDEX_BITS_ACTUAL (is_16k() ? 11 : 9)
#define VADDR_L0_INDEX_BITS (is_16k() ? 1 : 9)
#define VADDR_L3_OFFSET_BITS 14
#define VADDR_L2_OFFSET_BITS 25
#define VADDR_L1_OFFSET_BITS 36
#define VADDR_L3_OFFSET_BITS (is_16k() ? 14 : 12)
#define VADDR_L2_OFFSET_BITS (is_16k() ? 25 : 21)
#define VADDR_L1_OFFSET_BITS (is_16k() ? 36 : 30)
#define VADDR_L1_ALIGN_MASK GENMASK(VADDR_L1_OFFSET_BITS - 1, VADDR_L2_OFFSET_BITS)
#define VADDR_L2_ALIGN_MASK GENMASK(VADDR_L2_OFFSET_BITS - 1, VADDR_L3_OFFSET_BITS)
#define PTE_TARGET_MASK GENMASK(49, VADDR_L3_OFFSET_BITS)
#define ENTRIES_PER_L0_TABLE BIT(VADDR_L0_INDEX_BITS)
#define ENTRIES_PER_L1_TABLE BIT(VADDR_L1_INDEX_BITS)
#define ENTRIES_PER_L1_TABLE_ACTUAL BIT(VADDR_L1_INDEX_BITS_ACTUAL)
#define ENTRIES_PER_L2_TABLE BIT(VADDR_L2_INDEX_BITS)
#define ENTRIES_PER_L3_TABLE BIT(VADDR_L3_INDEX_BITS)
@ -91,6 +96,22 @@ static inline void write_sctlr(u64 val)
* point to the two halves in the single L0 table and then create L2/L3 tables on demand.
*/
/*
* On 4K devices, the following virtual address space results:
*
* [L0 index] [L1 index] [L2 index] [L3 index] [page offset]
* 9 bit 9 bits 9 bits 9 bits 11 bits
*
* To simplify things we treat the L1 page table as a concatenated table,
* which results in the following layout:
*
* [L1 index] [L2 index] [L3 index] [page offset]
* 18 bits 9 bits 9 bits 11 bits
*
* We initialize one giant L1 table which covers the entire virtual memory space,
* point to the parts in a single L0 table and then create L2/L3 tables on demand.
*/
/*
* SPRR mappings interpret these bits as a 4-bit index as follows
* [AP1][AP0][PXN][UXN]
@ -301,14 +322,16 @@ static u64 mmu_make_table_pte(u64 *addr)
static void mmu_init_pagetables(void)
{
mmu_pt_L0 = memalign(PAGE_SIZE, sizeof(u64) * 2);
mmu_pt_L0 = memalign(PAGE_SIZE, sizeof(u64) * ENTRIES_PER_L0_TABLE);
mmu_pt_L1 = memalign(PAGE_SIZE, sizeof(u64) * ENTRIES_PER_L1_TABLE);
memset64(mmu_pt_L0, 0, sizeof(u64) * 2);
memset64(mmu_pt_L0, 0, sizeof(u64) * ENTRIES_PER_L0_TABLE);
memset64(mmu_pt_L1, 0, sizeof(u64) * ENTRIES_PER_L1_TABLE);
mmu_pt_L0[0] = mmu_make_table_pte(&mmu_pt_L1[0]);
mmu_pt_L0[1] = mmu_make_table_pte(&mmu_pt_L1[ENTRIES_PER_L1_TABLE >> 1]);
for (size_t i = 0; i < ENTRIES_PER_L0_TABLE; i++) {
mmu_pt_L0[i] = mmu_make_table_pte(&mmu_pt_L1[i * ENTRIES_PER_L1_TABLE_ACTUAL]);
}
return;
}
void mmu_add_mapping(u64 from, u64 to, size_t size, u8 attribute_index, u64 perms)
@ -399,7 +422,7 @@ static void mmu_add_default_mappings(void)
{
ram_base = ALIGN_DOWN(cur_boot_args.phys_base, BIT(32));
uint64_t ram_size = cur_boot_args.mem_size + cur_boot_args.phys_base - ram_base;
ram_size = ALIGN_DOWN(ram_size, 0x4000);
ram_size = ALIGN_DOWN(ram_size, PAGE_SIZE);
printf("MMU: RAM base: 0x%lx\n", ram_base);
printf("MMU: Top of normal RAM: 0x%lx\n", ram_base + ram_size);
@ -466,12 +489,13 @@ static void mmu_configure(void)
(MAIR_ATTR_DEVICE_nGnRnE << MAIR_SHIFT_DEVICE_nGnRnE) |
(MAIR_ATTR_DEVICE_nGnRE << MAIR_SHIFT_DEVICE_nGnRE) |
(MAIR_ATTR_NORMAL_NC << MAIR_SHIFT_NORMAL_NC));
msr(TCR_EL1, FIELD_PREP(TCR_IPS, TCR_IPS_4TB) | FIELD_PREP(TCR_TG1, TCR_TG1_16K) |
msr(TCR_EL1, FIELD_PREP(TCR_IPS, TCR_IPS_4TB) |
FIELD_PREP(TCR_TG1, is_16k() ? TCR_TG1_16K : TCR_TG1_4K) |
FIELD_PREP(TCR_SH1, TCR_SH1_IS) | FIELD_PREP(TCR_ORGN1, TCR_ORGN1_WBWA) |
FIELD_PREP(TCR_IRGN1, TCR_IRGN1_WBWA) | FIELD_PREP(TCR_T1SZ, TCR_T1SZ_48BIT) |
FIELD_PREP(TCR_TG0, TCR_TG0_16K) | FIELD_PREP(TCR_SH0, TCR_SH0_IS) |
FIELD_PREP(TCR_ORGN0, TCR_ORGN0_WBWA) | FIELD_PREP(TCR_IRGN0, TCR_IRGN0_WBWA) |
FIELD_PREP(TCR_T0SZ, TCR_T0SZ_48BIT));
FIELD_PREP(TCR_TG0, is_16k() ? TCR_TG0_16K : TCR_TG0_4K) |
FIELD_PREP(TCR_SH0, TCR_SH0_IS) | FIELD_PREP(TCR_ORGN0, TCR_ORGN0_WBWA) |
FIELD_PREP(TCR_IRGN0, TCR_IRGN0_WBWA) | FIELD_PREP(TCR_T0SZ, TCR_T0SZ_48BIT));
msr(TTBR0_EL1, (uintptr_t)mmu_pt_L0);
msr(TTBR1_EL1, (uintptr_t)mmu_pt_L0);

View file

@ -352,6 +352,11 @@ static inline int has_el3(void)
return !!(mrs(ID_AA64PFR0_EL1) & 0xf000);
}
static inline bool is_16k(void)
{
return ((mrs(ID_AA64MMFR0_EL1) >> 20) & 0xf) == 0x1;
}
extern int boot_cpu_idx;
extern u64 boot_cpu_mpidr;
static inline int is_boot_cpu(void)
@ -359,6 +364,11 @@ static inline int is_boot_cpu(void)
return boot_cpu_idx == -1 || boot_cpu_mpidr == mrs(MPIDR_EL1);
}
static inline size_t get_page_size(void)
{
return is_16k() ? 16384 : 4096;
}
extern char _base[];
extern char _rodata_end[];
extern char _end[];