memory: Support L3 mappings, map m1n1 code as RX.

This replaces the old pagetable code with an adapted version of what
hv_vm.c does, which can handle block and page mappings more
intelligently.

Then, map the m1n1 code section as RX. This allows us to work in modes
where W^X is enforced.

Signed-off-by: Hector Martin <marcan@marcan.st>
This commit is contained in:
Hector Martin 2021-05-13 17:37:09 +09:00
parent 3e1ea2d503
commit 8af8dadee1
2 changed files with 235 additions and 144 deletions

View file

@ -71,8 +71,8 @@ void m1n1_main(void)
printf("Running in EL%lu\n\n", mrs(CurrentEL) >> 2);
mmu_init();
heapblock_init();
mmu_init();
#ifdef USE_FB
fb_init();

View file

@ -1,8 +1,10 @@
/* SPDX-License-Identifier: MIT */
#include "memory.h"
#include "assert.h"
#include "cpu_regs.h"
#include "fb.h"
#include "malloc.h"
#include "string.h"
#include "utils.h"
@ -41,14 +43,18 @@ static inline void write_sctlr(u64 val)
/*
* https://armv8-ref.codingbelief.com/en/chapter_d4/d43_2_armv8_translation_table_level_3_descriptor_formats.html
* PTE_TYPE_BLOCK indicates that the page table entry (PTE) points to a physical memory block
* PTE_TYPE_TABLE indicates that the PTE points to another PTE
* PTE_TYPE:PTE_BLOCK indicates that the page table entry (PTE) points to a physical memory block
* PTE_TYPE:PTE_TABLE indicates that the PTE points to another PTE
* PTE_TYPE:PTE_PAGE indicates that the PTE points to a single page
* PTE_FLAG_ACCESS is required to allow access to the memory region
* PTE_MAIR_IDX sets the MAIR index to be used for this PTE
*/
#define PTE_TYPE_BLOCK 0b01
#define PTE_TYPE_TABLE 0b11
#define PTE_FLAG_ACCESS BIT(10)
#define PTE_VALID BIT(0)
#define PTE_TYPE BIT(1)
#define PTE_BLOCK 0
#define PTE_TABLE 1
#define PTE_PAGE 1
#define PTE_ACCESS BIT(10)
#define PTE_MAIR_IDX(i) ((i & 7) << 2)
#define PTE_PXN BIT(53)
#define PTE_UXN BIT(54)
@ -65,6 +71,92 @@ static inline void write_sctlr(u64 val)
#define PERM_RX PTE_AP_RO | PTE_UXN
#define PERM_RWX 0
#define VADDR_L3_INDEX_BITS 11
#define VADDR_L2_INDEX_BITS 11
// We treat two concatenated L1 page tables as one
#define VADDR_L1_INDEX_BITS 12
#define VADDR_L3_OFFSET_BITS 14
#define VADDR_L2_OFFSET_BITS 25
#define VADDR_L1_OFFSET_BITS 36
#define VADDR_L1_ALIGN_MASK GENMASK(VADDR_L1_OFFSET_BITS - 1, VADDR_L2_OFFSET_BITS)
#define VADDR_L2_ALIGN_MASK GENMASK(VADDR_L2_OFFSET_BITS - 1, VADDR_L3_OFFSET_BITS)
#define PTE_TARGET_MASK GENMASK(49, VADDR_L3_OFFSET_BITS)
#define ENTRIES_PER_L1_TABLE BIT(VADDR_L1_INDEX_BITS)
#define ENTRIES_PER_L2_TABLE BIT(VADDR_L2_INDEX_BITS)
#define ENTRIES_PER_L3_TABLE BIT(VADDR_L3_INDEX_BITS)
#define IS_PTE(pte) ((pte) && pte & PTE_VALID)
#define L1_IS_TABLE(pte) (IS_PTE(pte) && FIELD_GET(PTE_TYPE, pte) == PTE_TABLE)
#define L1_IS_BLOCK(pte) (IS_PTE(pte) && FIELD_GET(PTE_TYPE, pte) == PTE_BLOCK)
#define L2_IS_TABLE(pte) (IS_PTE(pte) && FIELD_GET(PTE_TYPE, pte) == PTE_TABLE)
#define L2_IS_BLOCK(pte) (IS_PTE(pte) && FIELD_GET(PTE_TYPE, pte) == PTE_BLOCK)
#define L3_IS_BLOCK(pte) (IS_PTE(pte) && FIELD_GET(PTE_TYPE, pte) == PTE_PAGE)
/*
* We use 16KB pages which results in the following virtual address space:
*
* [L0 index] [L1 index] [L2 index] [L3 index] [page offset]
* 1 bit 11 bits 11 bits 11 bits 14 bits
*
* To simplify things we treat the L1 page table as a concatenated table,
* which results in the following layout:
*
* [L1 index] [L2 index] [L3 index] [page offset]
* 12 bits 11 bits 11 bits 14 bits
*
* We initalize one double-size L1 table which covers the entire virtual memory space,
* point to the two halves in the single L0 table and then create L2/L3 tables on demand.
*/
/*
* SPRR mappings interpret these bits as a 4-bit index as follows
* [AP1][AP0][PXN][UXN]
*/
#define SPRR_INDEX(perm) \
(((PTE_AP_RO & (perm)) ? 0b1000 : 0) | ((PTE_AP_EL0 & (perm)) ? 0b0100 : 0) | \
((PTE_UXN & (perm)) ? 0b0010 : 0) | ((PTE_PXN & (perm)) ? 0b0001 : 0))
enum SPRR_val_t {
EL0_GL0,
ELrx_GL0,
ELr_GL0,
ELrw_GL0,
EL0_GLrx,
ELrx_GLrx,
ELr_GLrx,
EL0_GLrx_ALT,
EL0_GLr,
ELx_GLr,
ELr_GLr,
ELrw_GLr,
EL0_GLrw,
ELrx_GLrw,
ELr_GLrw,
ELrw_GLrw,
};
/*
* With SPRR enabled, RWX mappings get downgraded to RW.
*/
#define SPRR_PERM(ap, val) (((u64)val) << (4 * SPRR_INDEX(ap)))
#define SPRR_DEFAULT_PERM_EL1 \
SPRR_PERM(PERM_RO_EL0, ELrw_GLrw) | SPRR_PERM(PERM_RW_EL0, ELrw_GLrw) | \
SPRR_PERM(PERM_RX_EL0, ELrw_GLrw) | SPRR_PERM(PERM_RWX_EL0, ELrw_GLrw) | \
SPRR_PERM(PERM_RO, ELr_GLr) | SPRR_PERM(PERM_RW, ELrw_GLrw) | \
SPRR_PERM(PERM_RX, ELrx_GLrx) | SPRR_PERM(PERM_RWX, ELrw_GLrw)
#define SPRR_DEFAULT_PERM_EL0 \
SPRR_PERM(PERM_RO_EL0, ELr_GLr) | SPRR_PERM(PERM_RW_EL0, ELrw_GLrw) | \
SPRR_PERM(PERM_RX_EL0, ELrx_GLrx) | SPRR_PERM(PERM_RWX_EL0, ELrw_GLrw) | \
SPRR_PERM(PERM_RO, ELr_GLr) | SPRR_PERM(PERM_RW, ELrw_GLrw) | \
SPRR_PERM(PERM_RX, ELrx_GLrx) | SPRR_PERM(PERM_RWX, ELrw_GLrw)
/*
* aarch64 allows to configure attribute sets for up to eight different memory
* types. we need normal memory and two types of device memory (nGnRnE and
@ -93,155 +185,146 @@ static inline void write_sctlr(u64 val)
#define MAIR_ATTR_DEVICE_nGnRnE 0x00UL
#define MAIR_ATTR_DEVICE_nGnRE 0x04UL
/*
* We want use 16KB pages which would usually result in the following
* virtual address space:
*
* [L0 index] [L1 index] [L2 index] [L3 index] [page offset]
* 1 bit 11 bits 11 bits 11 bits 14 bits
*
* To simplify things we only allow 32MB mappings directly from
* the L2 tables such that in m1n1 all virtual addresses will look like this
* instead (Block maps from L0 or L1 are not possible with 16KB pages):
*
* [L0 index] [L1 index] [L2 index] [page offset]
* 1 bit 11 bits 11 bits 25 bits
*
* We initalize two L1 tables which cover the entire virtual memory space,
* point to them in the singe L0 table and then create L2 tables on demand.
*/
#define VADDR_PAGE_OFFSET_BITS 25
#define VADDR_L2_INDEX_BITS 11
#define VADDR_L1_INDEX_BITS 11
#define VADDR_L0_INDEX_BITS 1
static u64 mmu_pt_L0[2] ALIGNED(PAGE_SIZE);
static u64 mmu_pt_L1[ENTRIES_PER_L1_TABLE] ALIGNED(PAGE_SIZE);
#define MAX_L2_TABLES 10
#define ENTRIES_PER_TABLE 2048
#define L2_PAGE_SIZE 0x2000000
static u64 pagetable_L0[2] ALIGNED(PAGE_SIZE);
static u64 pagetable_L1[2][ENTRIES_PER_TABLE] ALIGNED(PAGE_SIZE);
static u64 pagetable_L2[MAX_L2_TABLES][ENTRIES_PER_TABLE] ALIGNED(PAGE_SIZE);
static u32 pagetable_L2_next = 0;
static u64 mmu_make_block_pte(uintptr_t addr, u8 attribute_index, u64 perms)
static u64 *mmu_pt_get_l2(u64 from)
{
u64 pte = PTE_TYPE_BLOCK;
pte |= addr;
pte |= PTE_FLAG_ACCESS;
pte |= PTE_MAIR_IDX(attribute_index);
pte |= perms;
u64 l1idx = from >> VADDR_L1_OFFSET_BITS;
assert(l1idx < ENTRIES_PER_L1_TABLE);
u64 l1d = mmu_pt_L1[l1idx];
return pte;
if (L1_IS_TABLE(l1d))
return (u64 *)(l1d & PTE_TARGET_MASK);
u64 *l2 = (u64 *)memalign(PAGE_SIZE, ENTRIES_PER_L2_TABLE * sizeof(u64));
assert(!IS_PTE(l1d));
memset64(l2, 0, ENTRIES_PER_L2_TABLE * sizeof(u64));
l1d = ((u64)l2) | FIELD_PREP(PTE_TYPE, PTE_TABLE) | PTE_VALID;
mmu_pt_L1[l1idx] = l1d;
return l2;
}
static void mmu_pt_map_l2(u64 from, u64 to, u64 size)
{
assert((from & MASK(VADDR_L2_OFFSET_BITS)) == 0);
assert((to & PTE_TARGET_MASK & MASK(VADDR_L2_OFFSET_BITS)) == 0);
assert((size & MASK(VADDR_L2_OFFSET_BITS)) == 0);
to |= FIELD_PREP(PTE_TYPE, PTE_BLOCK);
for (; size; size -= BIT(VADDR_L2_OFFSET_BITS)) {
u64 idx = (from >> VADDR_L2_OFFSET_BITS) & MASK(VADDR_L2_INDEX_BITS);
u64 *l2 = mmu_pt_get_l2(from);
if (L2_IS_TABLE(l2[idx]))
free((void *)(l2[idx] & PTE_TARGET_MASK));
l2[idx] = to;
from += BIT(VADDR_L2_OFFSET_BITS);
to += BIT(VADDR_L2_OFFSET_BITS);
}
}
static u64 *mmu_pt_get_l3(u64 from)
{
u64 *l2 = mmu_pt_get_l2(from);
u64 l2idx = (from >> VADDR_L2_OFFSET_BITS) & MASK(VADDR_L2_INDEX_BITS);
assert(l2idx < ENTRIES_PER_L2_TABLE);
u64 l2d = l2[l2idx];
if (L2_IS_TABLE(l2d))
return (u64 *)(l2d & PTE_TARGET_MASK);
u64 *l3 = (u64 *)memalign(PAGE_SIZE, ENTRIES_PER_L3_TABLE * sizeof(u64));
if (IS_PTE(l2d)) {
u64 l3d = l2d;
l3d &= ~PTE_TYPE;
l3d |= FIELD_PREP(PTE_TYPE, PTE_PAGE);
for (u64 idx = 0; idx < ENTRIES_PER_L3_TABLE; idx++, l3d += BIT(VADDR_L3_OFFSET_BITS))
l3[idx] = l3d;
} else {
memset64(l3, 0, ENTRIES_PER_L3_TABLE * sizeof(u64));
}
l2d = ((u64)l3) | FIELD_PREP(PTE_TYPE, PTE_TABLE) | PTE_VALID;
l2[l2idx] = l2d;
return l3;
}
static void mmu_pt_map_l3(u64 from, u64 to, u64 size)
{
assert((from & MASK(VADDR_L3_OFFSET_BITS)) == 0);
assert((to & PTE_TARGET_MASK & MASK(VADDR_L3_OFFSET_BITS)) == 0);
assert((size & MASK(VADDR_L3_OFFSET_BITS)) == 0);
to |= FIELD_PREP(PTE_TYPE, PTE_PAGE);
for (; size; size -= BIT(VADDR_L3_OFFSET_BITS)) {
u64 idx = (from >> VADDR_L3_OFFSET_BITS) & MASK(VADDR_L3_INDEX_BITS);
u64 *l3 = mmu_pt_get_l3(from);
l3[idx] = to;
from += BIT(VADDR_L3_OFFSET_BITS);
to += BIT(VADDR_L3_OFFSET_BITS);
}
}
int mmu_map(u64 from, u64 to, u64 size)
{
u64 chunk;
if (from & MASK(VADDR_L3_OFFSET_BITS) || size & MASK(VADDR_L3_OFFSET_BITS))
return -1;
// L3 mappings to boundary
chunk = min(size, ALIGN_UP(from, MASK(VADDR_L2_OFFSET_BITS)) - from);
if (chunk) {
mmu_pt_map_l3(from, to, chunk);
from += chunk;
to += chunk;
size -= chunk;
}
// L2 mappings
chunk = ALIGN_DOWN(size, MASK(VADDR_L2_OFFSET_BITS));
if (chunk && (to & VADDR_L2_ALIGN_MASK) == 0) {
mmu_pt_map_l2(from, to, chunk);
from += chunk;
to += chunk;
size -= chunk;
}
// L3 mappings to end
if (size) {
mmu_pt_map_l3(from, to, size);
}
return 0;
}
static u64 mmu_make_table_pte(u64 *addr)
{
u64 pte = PTE_TYPE_TABLE;
u64 pte = FIELD_PREP(PTE_TYPE, PTE_TABLE) | PTE_VALID;
pte |= (uintptr_t)addr;
pte |= PTE_FLAG_ACCESS;
pte |= PTE_ACCESS;
return pte;
}
static void mmu_init_pagetables(void)
{
pagetable_L2_next = 0;
memset64(mmu_pt_L0, 0, sizeof mmu_pt_L0);
memset64(mmu_pt_L1, 0, sizeof mmu_pt_L1);
memset64(pagetable_L0, 0, sizeof pagetable_L0);
memset64(pagetable_L1, 0, sizeof pagetable_L1);
memset64(pagetable_L2, 0, sizeof pagetable_L2);
pagetable_L0[0] = mmu_make_table_pte(&pagetable_L1[0][0]);
pagetable_L0[1] = mmu_make_table_pte(&pagetable_L1[1][0]);
mmu_pt_L0[0] = mmu_make_table_pte(&mmu_pt_L1[0]);
mmu_pt_L0[1] = mmu_make_table_pte(&mmu_pt_L1[ENTRIES_PER_L1_TABLE >> 1]);
}
static u8 mmu_extract_L0_index(uintptr_t addr)
static void mmu_add_mapping(u64 from, u64 to, size_t size, u8 attribute_index, u64 perms)
{
addr >>= VADDR_PAGE_OFFSET_BITS;
addr >>= VADDR_L2_INDEX_BITS;
addr >>= VADDR_L1_INDEX_BITS;
addr &= (1 << VADDR_L0_INDEX_BITS) - 1;
return (u8)addr;
}
static u64 mmu_extract_L1_index(uintptr_t addr)
{
addr >>= VADDR_PAGE_OFFSET_BITS;
addr >>= VADDR_L2_INDEX_BITS;
addr &= (1 << VADDR_L1_INDEX_BITS) - 1;
return (u64)addr;
}
static u64 mmu_extract_L2_index(uintptr_t addr)
{
addr >>= VADDR_PAGE_OFFSET_BITS;
addr &= (1 << VADDR_L2_INDEX_BITS) - 1;
return (u64)addr;
}
static uintptr_t mmu_extract_addr(u64 pte)
{
/*
* https://armv8-ref.codingbelief.com/en/chapter_d4/d43_1_vmsav8-64_translation_table_descriptor_formats.html
* need to extract bits [47:14]
*/
pte &= ((1ULL << 48) - 1);
pte &= ~((1ULL << 14) - 1);
return (uintptr_t)pte;
}
static u64 *mmu_get_L1_table(uintptr_t addr)
{
return pagetable_L1[mmu_extract_L0_index(addr)];
}
static u64 *mmu_get_L2_table(uintptr_t addr)
{
u64 *tbl_l1 = mmu_get_L1_table(addr);
u64 l1_idx = mmu_extract_L1_index(addr);
u64 desc_l1 = tbl_l1[l1_idx];
if (desc_l1 == 0) {
if (pagetable_L2_next == MAX_L2_TABLES)
panic("MMU: not enough space to create an additional L2 table to "
"map %lx",
addr);
desc_l1 = mmu_make_table_pte((u64 *)&pagetable_L2[pagetable_L2_next++]);
tbl_l1[l1_idx] = desc_l1;
}
return (u64 *)mmu_extract_addr(desc_l1);
}
static void mmu_add_single_mapping(uintptr_t from, uintptr_t to, u8 attribute_index, u64 perms)
{
u64 *tbl_l2 = mmu_get_L2_table(from);
u64 l2_idx = mmu_extract_L2_index(from);
if (tbl_l2[l2_idx])
panic("MMU: mapping for %lx already exists", from);
tbl_l2[l2_idx] = mmu_make_block_pte(to, attribute_index, perms);
}
static void mmu_add_mapping(uintptr_t from, uintptr_t to, size_t size, u8 attribute_index,
u64 perms)
{
if (from % L2_PAGE_SIZE)
panic("mmu_add_mapping: from address not aligned: %lx", from);
if (to % L2_PAGE_SIZE)
panic("mmu_add_mapping: to address not aligned: %lx", to);
if (size % L2_PAGE_SIZE)
panic("mmu_add_mapping: size not aligned: %lx", size);
while (size > 0) {
mmu_add_single_mapping(from, to, attribute_index, perms);
from += L2_PAGE_SIZE;
to += L2_PAGE_SIZE;
size -= L2_PAGE_SIZE;
}
if (mmu_map(from, to | PTE_MAIR_IDX(attribute_index) | PTE_ACCESS | PTE_VALID | perms, size) <
0)
panic("Failed to add MMU mapping 0x%lx -> 0x%lx (0x%lx)\n", from, to, size);
}
static void mmu_add_default_mappings(void)
@ -263,6 +346,12 @@ static void mmu_add_default_mappings(void)
*/
mmu_add_mapping(0x0800000000, 0x0800000000, 0x0400000000, MAIR_IDX_NORMAL, PERM_RWX);
/*
* Remap m1n1 executable code as RX.
*/
mmu_add_mapping((u64)_base, (u64)_base, (u64)_rodata_end - (u64)_base, MAIR_IDX_NORMAL,
PERM_RX);
/*
* Create mapping for 16GB RAM from 0x88_0000_0000 to 0x8c_0000_0000,
* read/writable/exec by EL0 (but not executable by EL1)
@ -304,8 +393,8 @@ static void mmu_configure(void)
FIELD_PREP(TCR_ORGN0, TCR_ORGN0_WBWA) | FIELD_PREP(TCR_IRGN0, TCR_IRGN0_WBWA) |
FIELD_PREP(TCR_T0SZ, TCR_T0SZ_48BIT));
msr(TTBR0_EL1, (uintptr_t)pagetable_L0);
msr(TTBR1_EL1, (uintptr_t)pagetable_L0);
msr(TTBR0_EL1, (uintptr_t)mmu_pt_L0);
msr(TTBR1_EL1, (uintptr_t)mmu_pt_L0);
// Armv8-A Address Translation, 100940_0101_en, page 28
sysop("dsb ishst");
@ -330,11 +419,13 @@ void mmu_init(void)
// Enable EL0 memory access by EL1
msr(PAN, 0);
u64 sctlr_old = read_sctlr();
u64 sctlr_new = sctlr_old | SCTLR_I | SCTLR_C | SCTLR_M | SCTLR_SPAN;
// RES1 bits
u64 sctlr = SCTLR_LSMAOE | SCTLR_nTLSMD | SCTLR_TSCXT | SCTLR_ITD;
// Configure translation
sctlr |= SCTLR_I | SCTLR_C | SCTLR_M | SCTLR_SPAN;
printf("MMU: SCTLR_EL1: %lx -> %lx\n", sctlr_old, sctlr_new);
write_sctlr(sctlr_new);
printf("MMU: SCTLR_EL1: %lx -> %lx\n", mrs(SCTLR_EL1), sctlr);
write_sctlr(sctlr);
printf("MMU: running with MMU and caches enabled!\n");
}