m1n1/src/hv_vm.c

333 lines
9.3 KiB
C
Raw Normal View History

/* SPDX-License-Identifier: MIT */
#include "hv.h"
#include "assert.h"
#include "cpu_regs.h"
#include "malloc.h"
#include "string.h"
#include "types.h"
#include "utils.h"
#define PAGE_SIZE 0x4000
#define CACHE_LINE_SIZE 64
#define PTE_ACCESS BIT(10)
#define PTE_SH_NS (0b11L << 8)
#define PTE_S2AP_RW (0b11L << 6)
#define PTE_MEMATTR_UNCHANGED (0b1111L << 2)
#define PTE_ATTRIBUTES (PTE_ACCESS | PTE_SH_NS | PTE_S2AP_RW | PTE_MEMATTR_UNCHANGED)
#define PTE_VALID BIT(0)
#define PTE_TYPE BIT(1)
#define PTE_BLOCK 0
#define PTE_TABLE 1
#define PTE_PAGE 1
#define VADDR_L4_INDEX_BITS 12
#define VADDR_L3_INDEX_BITS 11
#define VADDR_L2_INDEX_BITS 11
#define VADDR_L4_OFFSET_BITS 2
#define VADDR_L3_OFFSET_BITS 14
#define VADDR_L2_OFFSET_BITS 25
#define VADDR_L2_ALIGN_MASK GENMASK(VADDR_L2_OFFSET_BITS - 1, VADDR_L3_OFFSET_BITS)
#define PTE_TARGET_MASK GENMASK(49, 14)
#define ENTRIES_PER_L2_TABLE BIT(VADDR_L2_INDEX_BITS)
#define ENTRIES_PER_L3_TABLE BIT(VADDR_L3_INDEX_BITS)
#define ENTRIES_PER_L4_TABLE BIT(VADDR_L4_INDEX_BITS)
#define SPTE_TYPE BIT(50)
#define SPTE_MAP 0
#define SPTE_HOOK 1
#define IS_HW(pte) (pte && pte & PTE_VALID)
#define IS_SW(pte) (pte && !(pte & PTE_VALID))
#define L2_IS_TABLE(pte) ((pte) && FIELD_GET(PTE_TYPE, pte) == PTE_TABLE)
#define L2_IS_NOT_TABLE(pte) ((pte) && !L2_IS_TABLE(pte))
#define L3_IS_TABLE(pte) (IS_SW(pte) && FIELD_GET(PTE_TYPE, pte) == PTE_TABLE)
#define L3_IS_NOT_TABLE(pte) ((pte) && !L3_IS_TABLE(pte))
/*
* We use 16KB page tables for stage 2 translation, and a 64GB (36-bit) guest
* PA size, which results in the following virtual address space:
*
* [L2 index] [L3 index] [page offset]
* 11 bits 11 bits 14 bits
*
* 32MB L2 mappings look like this:
* [L2 index] [page offset]
* 11 bits 25 bits
*
* We implement sub-page granularity mappings for software MMIO hooks, which behave
* as an additional page table level used only by software. This works like this:
*
* [L2 index] [L3 index] [L4 index] [Word offset]
* 11 bits 11 bits 12 bits 2 bits
*
* Thus, L4 sub-page tables are twice the size.
*
* We use invalid mappings (PTE_VALID == 0) to represent mmiotrace descriptors, but
* otherwise the page table format is the same. The PTE_TYPE bit is weird, as 0 means
* block but 1 means both table (at L<3) and page (at L3). For mmiotrace, this is
* pushed to L4.
*/
static u64 hv_L2[ENTRIES_PER_L2_TABLE] ALIGNED(PAGE_SIZE);
;
void hv_pt_init(void)
{
memset(hv_L2, 0, sizeof(hv_L2));
msr(VTCR_EL2, FIELD_PREP(VTCR_PS, 1) | // 64GB PA size
FIELD_PREP(VTCR_TG0, 2) | // 16KB page size
FIELD_PREP(VTCR_SH0, 3) | // PTWs Inner Sharable
FIELD_PREP(VTCR_ORGN0, 1) | // PTWs Cacheable
FIELD_PREP(VTCR_IRGN0, 1) | // PTWs Cacheable
FIELD_PREP(VTCR_SL0, 1) | // Start at level 2
FIELD_PREP(VTCR_T0SZ, 28)); // 64GB translation region
msr(VTTBR_EL2, hv_L2);
}
static void hv_pt_free_l3(u64 *l3)
{
if (!l3)
return;
for (u64 idx = 0; idx < ENTRIES_PER_L3_TABLE; idx++)
if (IS_SW(l3[idx]) && FIELD_GET(PTE_TYPE, l3[idx]) == PTE_TABLE)
free((void *)(l3[idx] & PTE_TARGET_MASK));
free(l3);
}
static void hv_pt_map_l2(u64 from, u64 to, u64 size, u64 incr)
{
assert((from & MASK(VADDR_L2_OFFSET_BITS)) == 0);
assert((IS_SW(to) || to & PTE_TARGET_MASK & MASK(VADDR_L2_OFFSET_BITS)) == 0);
assert((size & MASK(VADDR_L2_OFFSET_BITS)) == 0);
to |= FIELD_PREP(PTE_TYPE, PTE_BLOCK);
for (; size; size -= BIT(VADDR_L2_OFFSET_BITS)) {
u64 idx = from >> VADDR_L2_OFFSET_BITS;
if (L2_IS_TABLE(hv_L2[idx]))
hv_pt_free_l3((u64 *)(hv_L2[idx] & PTE_TARGET_MASK));
hv_L2[idx] = to;
from += BIT(VADDR_L2_OFFSET_BITS);
to += incr * BIT(VADDR_L2_OFFSET_BITS);
}
}
static u64 *hv_pt_get_l3(u64 from)
{
u64 l2idx = from >> VADDR_L2_OFFSET_BITS;
u64 l2d = hv_L2[l2idx];
if (L2_IS_TABLE(l2d))
return (u64 *)(l2d & PTE_TARGET_MASK);
u64 *l3 = (u64 *)memalign(PAGE_SIZE, ENTRIES_PER_L3_TABLE * sizeof(u64));
if (l2d) {
u64 incr = 0;
u64 l3d = l2d;
if (IS_HW(l2d)) {
l3d &= ~PTE_TYPE;
l3d |= FIELD_PREP(PTE_TYPE, PTE_PAGE);
incr = BIT(VADDR_L3_OFFSET_BITS);
}
for (u64 idx = 0; idx < ENTRIES_PER_L3_TABLE; idx++, l3d += incr)
l3[idx] = l3d;
} else {
memset64(l3, 0, ENTRIES_PER_L3_TABLE * sizeof(u64));
}
l2d = ((u64)l3) | FIELD_PREP(PTE_TYPE, PTE_TABLE) | PTE_VALID;
hv_L2[l2idx] = l2d;
return l3;
}
static void hv_pt_map_l3(u64 from, u64 to, u64 size, u64 incr)
{
assert((from & MASK(VADDR_L3_OFFSET_BITS)) == 0);
assert((IS_SW(to) || to & PTE_TARGET_MASK & MASK(VADDR_L3_OFFSET_BITS)) == 0);
assert((size & MASK(VADDR_L3_OFFSET_BITS)) == 0);
if (IS_HW(to))
to |= FIELD_PREP(PTE_TYPE, PTE_PAGE);
else
to |= FIELD_PREP(PTE_TYPE, PTE_BLOCK);
for (; size; size -= BIT(VADDR_L3_OFFSET_BITS)) {
u64 idx = (from >> VADDR_L3_OFFSET_BITS) & MASK(VADDR_L3_INDEX_BITS);
u64 *l3 = hv_pt_get_l3(from);
if (L3_IS_TABLE(l3[idx]))
free((void *)(l3[idx] & PTE_TARGET_MASK));
l3[idx] = to;
from += BIT(VADDR_L3_OFFSET_BITS);
to += incr * BIT(VADDR_L3_OFFSET_BITS);
}
}
static u64 *hv_pt_get_l4(u64 from)
{
u64 *l3 = hv_pt_get_l3(from);
u64 l3idx = (from >> VADDR_L3_OFFSET_BITS) & MASK(VADDR_L3_INDEX_BITS);
u64 l3d = l3[l3idx];
if (L3_IS_TABLE(l3d)) {
return (u64 *)(l3d & PTE_TARGET_MASK);
}
if (IS_HW(l3d)) {
assert(FIELD_GET(PTE_TYPE, l3d) == PTE_PAGE);
l3d &= PTE_TARGET_MASK;
l3d |= FIELD_PREP(PTE_TYPE, PTE_BLOCK) | FIELD_PREP(SPTE_TYPE, SPTE_MAP);
}
u64 *l4 = (u64 *)memalign(PAGE_SIZE, ENTRIES_PER_L4_TABLE * sizeof(u64));
if (l3d) {
u64 incr = 0;
u64 l4d = l3d;
l4d &= ~PTE_TYPE;
l4d |= FIELD_PREP(PTE_TYPE, PTE_PAGE);
if (FIELD_GET(SPTE_TYPE, l4d) == SPTE_MAP)
incr = BIT(VADDR_L4_OFFSET_BITS);
for (u64 idx = 0; idx < ENTRIES_PER_L4_TABLE; idx++, l4d += incr)
l4[idx] = l4d;
} else {
memset64(l4, 0, ENTRIES_PER_L4_TABLE * sizeof(u64));
}
l3d = ((u64)l4) | FIELD_PREP(PTE_TYPE, PTE_TABLE);
l3[l3idx] = l3d;
return l4;
}
static void hv_pt_map_l4(u64 from, u64 to, u64 size, u64 incr)
{
assert((from & MASK(VADDR_L4_OFFSET_BITS)) == 0);
assert((size & MASK(VADDR_L4_OFFSET_BITS)) == 0);
assert(!IS_HW(to));
if (IS_SW(to))
to |= FIELD_PREP(PTE_TYPE, PTE_PAGE);
for (; size; size -= BIT(VADDR_L4_OFFSET_BITS)) {
u64 idx = (from >> VADDR_L4_OFFSET_BITS) & MASK(VADDR_L4_INDEX_BITS);
u64 *l4 = hv_pt_get_l4(from);
l4[idx] = to;
from += BIT(VADDR_L4_OFFSET_BITS);
to += incr * BIT(VADDR_L4_OFFSET_BITS);
}
}
int hv_map(u64 from, u64 to, u64 size, u64 incr)
{
u64 chunk;
bool hw = IS_HW(to);
if (from & MASK(VADDR_L4_OFFSET_BITS) || size & MASK(VADDR_L4_OFFSET_BITS))
return -1;
if (hw && (from & MASK(VADDR_L3_OFFSET_BITS) || size & MASK(VADDR_L3_OFFSET_BITS))) {
printf("HV: cannot use L4 pages with HW mappings (0x%lx -> 0x%lx)\n", from, to);
return -1;
}
// L4 mappings to boundary
chunk = min(size, ALIGN_UP(from, MASK(VADDR_L3_OFFSET_BITS)) - from);
if (chunk) {
assert(!hw);
hv_pt_map_l4(from, to, chunk, incr);
from += chunk;
to += incr * chunk;
size -= chunk;
}
// L3 mappings to boundary
chunk = ALIGN_DOWN(min(size, ALIGN_UP(from, MASK(VADDR_L2_OFFSET_BITS)) - from),
MASK(VADDR_L3_OFFSET_BITS));
if (chunk) {
hv_pt_map_l3(from, to, chunk, incr);
from += chunk;
to += incr * chunk;
size -= chunk;
}
// L2 mappings
chunk = ALIGN_DOWN(size, MASK(VADDR_L3_OFFSET_BITS));
if (chunk && (!hw || (to & VADDR_L2_ALIGN_MASK) == 0)) {
hv_pt_map_l2(from, to, chunk, incr);
from += chunk;
to += incr * chunk;
size -= chunk;
}
// L3 mappings to end
chunk = ALIGN_DOWN(size, MASK(VADDR_L3_OFFSET_BITS));
if (chunk) {
hv_pt_map_l3(from, to, chunk, incr);
from += chunk;
to += incr * chunk;
size -= chunk;
}
// L4 mappings to end
if (size) {
assert(!hw);
hv_pt_map_l4(from, to, size, incr);
}
return 0;
}
int hv_unmap(u64 from, u64 size)
{
return hv_map(from, 0, size, 0);
}
int hv_map_hw(u64 from, u64 to, u64 size)
{
return hv_map(from, to | PTE_ATTRIBUTES | PTE_VALID, size, 1);
}
int hv_map_sw(u64 from, u64 to, u64 size)
{
return hv_map(from, to | FIELD_PREP(SPTE_TYPE, SPTE_MAP), size, 1);
}
int hv_map_hook(u64 from, void *hook, u64 size)
{
return hv_map(from, ((u64)hook) | FIELD_PREP(SPTE_TYPE, SPTE_HOOK), size, 0);
}
u64 hv_translate(u64 addr)
{
u64 el = FIELD_GET(SPSR_M, mrs(SPSR_EL2)) >> 2;
u64 save = mrs(PAR_EL1);
if (el == 0)
asm("at s12e0r, %0" : : "r"(addr));
else
asm("at s12e1r, %0" : : "r"(addr));
u64 par = mrs(PAR_EL1);
msr(PAR_EL1, save);
if (par & PAR_F)
return 0; // fault
else
return (par & PAR_PA) | (addr & 0xfff);
}