m1n1/src/hv_vm.c
Hector Martin 481f662271 hv_vm: Fix CTRR hugepage issue
This is the hypervisor counterpart to da9ceddeac.

Signed-off-by: Hector Martin <marcan@marcan.st>
2023-04-09 19:46:13 +09:00

1284 lines
39 KiB
C

/* SPDX-License-Identifier: MIT */
// #define DEBUG
#include "hv.h"
#include "assert.h"
#include "cpu_regs.h"
#include "exception.h"
#include "iodev.h"
#include "malloc.h"
#include "smp.h"
#include "string.h"
#include "types.h"
#include "uartproxy.h"
#include "utils.h"
extern uint64_t ram_base;
#define PAGE_SIZE 0x4000
#define CACHE_LINE_SIZE 64
#define CACHE_LINE_LOG2 6
#define PTE_ACCESS BIT(10)
#define PTE_SH_NS (0b11L << 8)
#define PTE_S2AP_RW (0b11L << 6)
#define PTE_MEMATTR_UNCHANGED (0b1111L << 2)
#define PTE_ATTRIBUTES (PTE_ACCESS | PTE_SH_NS | PTE_S2AP_RW | PTE_MEMATTR_UNCHANGED)
#define PTE_LOWER_ATTRIBUTES GENMASK(13, 2)
#define PTE_VALID BIT(0)
#define PTE_TYPE BIT(1)
#define PTE_BLOCK 0
#define PTE_TABLE 1
#define PTE_PAGE 1
#define VADDR_L4_INDEX_BITS 12
#define VADDR_L3_INDEX_BITS 11
#define VADDR_L2_INDEX_BITS 11
#define VADDR_L1_INDEX_BITS 8
#define VADDR_L4_OFFSET_BITS 2
#define VADDR_L3_OFFSET_BITS 14
#define VADDR_L2_OFFSET_BITS 25
#define VADDR_L1_OFFSET_BITS 36
#define VADDR_L2_ALIGN_MASK GENMASK(VADDR_L2_OFFSET_BITS - 1, VADDR_L3_OFFSET_BITS)
#define VADDR_L3_ALIGN_MASK GENMASK(VADDR_L3_OFFSET_BITS - 1, VADDR_L4_OFFSET_BITS)
#define PTE_TARGET_MASK GENMASK(49, VADDR_L3_OFFSET_BITS)
#define PTE_TARGET_MASK_L4 GENMASK(49, VADDR_L4_OFFSET_BITS)
#define ENTRIES_PER_L1_TABLE BIT(VADDR_L1_INDEX_BITS)
#define ENTRIES_PER_L2_TABLE BIT(VADDR_L2_INDEX_BITS)
#define ENTRIES_PER_L3_TABLE BIT(VADDR_L3_INDEX_BITS)
#define ENTRIES_PER_L4_TABLE BIT(VADDR_L4_INDEX_BITS)
#define SPTE_TRACE_READ BIT(63)
#define SPTE_TRACE_WRITE BIT(62)
#define SPTE_TRACE_UNBUF BIT(61)
#define SPTE_TYPE GENMASK(52, 50)
#define SPTE_MAP 0
#define SPTE_HOOK 1
#define SPTE_PROXY_HOOK_R 2
#define SPTE_PROXY_HOOK_W 3
#define SPTE_PROXY_HOOK_RW 4
#define IS_HW(pte) ((pte) && pte & PTE_VALID)
#define IS_SW(pte) ((pte) && !(pte & PTE_VALID))
#define L1_IS_TABLE(pte) ((pte) && FIELD_GET(PTE_TYPE, pte) == PTE_TABLE)
#define L2_IS_TABLE(pte) ((pte) && FIELD_GET(PTE_TYPE, pte) == PTE_TABLE)
#define L2_IS_NOT_TABLE(pte) ((pte) && !L2_IS_TABLE(pte))
#define L2_IS_HW_BLOCK(pte) (IS_HW(pte) && FIELD_GET(PTE_TYPE, pte) == PTE_BLOCK)
#define L2_IS_SW_BLOCK(pte) \
(IS_SW(pte) && FIELD_GET(PTE_TYPE, pte) == PTE_BLOCK && FIELD_GET(SPTE_TYPE, pte) == SPTE_MAP)
#define L3_IS_TABLE(pte) (IS_SW(pte) && FIELD_GET(PTE_TYPE, pte) == PTE_TABLE)
#define L3_IS_NOT_TABLE(pte) ((pte) && !L3_IS_TABLE(pte))
#define L3_IS_HW_BLOCK(pte) (IS_HW(pte) && FIELD_GET(PTE_TYPE, pte) == PTE_PAGE)
#define L3_IS_SW_BLOCK(pte) \
(IS_SW(pte) && FIELD_GET(PTE_TYPE, pte) == PTE_BLOCK && FIELD_GET(SPTE_TYPE, pte) == SPTE_MAP)
uint64_t vaddr_bits;
/*
* We use 16KB page tables for stage 2 translation, and a 64GB (36-bit) guest
* PA size, which results in the following virtual address space:
*
* [L2 index] [L3 index] [page offset]
* 11 bits 11 bits 14 bits
*
* 32MB L2 mappings look like this:
* [L2 index] [page offset]
* 11 bits 25 bits
*
* We implement sub-page granularity mappings for software MMIO hooks, which behave
* as an additional page table level used only by software. This works like this:
*
* [L2 index] [L3 index] [L4 index] [Word offset]
* 11 bits 11 bits 12 bits 2 bits
*
* Thus, L4 sub-page tables are twice the size.
*
* We use invalid mappings (PTE_VALID == 0) to represent mmiotrace descriptors, but
* otherwise the page table format is the same. The PTE_TYPE bit is weird, as 0 means
* block but 1 means both table (at L<3) and page (at L3). For mmiotrace, this is
* pushed to L4.
*
* On SoCs with more than 36-bit PA sizes there is an additional L1 translation level,
* but no blocks or software mappings are allowed there. This level can have up to 8 bits
* at this time.
*/
static u64 *hv_Ltop;
void hv_pt_init(void)
{
const uint64_t pa_bits[] = {32, 36, 40, 42, 44, 48, 52};
uint64_t pa_range = FIELD_GET(ID_AA64MMFR0_PARange, mrs(ID_AA64MMFR0_EL1));
vaddr_bits = min(44, pa_bits[pa_range]);
printf("HV: Initializing for %ld-bit PA range\n", vaddr_bits);
hv_Ltop = memalign(PAGE_SIZE, sizeof(u64) * ENTRIES_PER_L2_TABLE);
memset(hv_Ltop, 0, sizeof(u64) * ENTRIES_PER_L2_TABLE);
u64 sl0 = vaddr_bits > 36 ? 2 : 1;
msr(VTCR_EL2, FIELD_PREP(VTCR_PS, pa_range) | // Full PA size
FIELD_PREP(VTCR_TG0, 2) | // 16KB page size
FIELD_PREP(VTCR_SH0, 3) | // PTWs Inner Sharable
FIELD_PREP(VTCR_ORGN0, 1) | // PTWs Cacheable
FIELD_PREP(VTCR_IRGN0, 1) | // PTWs Cacheable
FIELD_PREP(VTCR_SL0, sl0) | // Start level
FIELD_PREP(VTCR_T0SZ, 64 - vaddr_bits)); // Translation region == PA
msr(VTTBR_EL2, hv_Ltop);
}
static u64 *hv_pt_get_l2(u64 from)
{
u64 l1idx = from >> VADDR_L1_OFFSET_BITS;
if (vaddr_bits <= 36) {
assert(l1idx == 0);
return hv_Ltop;
}
u64 l1d = hv_Ltop[l1idx];
if (L1_IS_TABLE(l1d))
return (u64 *)(l1d & PTE_TARGET_MASK);
u64 *l2 = (u64 *)memalign(PAGE_SIZE, ENTRIES_PER_L2_TABLE * sizeof(u64));
memset64(l2, 0, ENTRIES_PER_L2_TABLE * sizeof(u64));
l1d = ((u64)l2) | FIELD_PREP(PTE_TYPE, PTE_TABLE) | PTE_VALID;
hv_Ltop[l1idx] = l1d;
return l2;
}
static void hv_pt_free_l3(u64 *l3)
{
if (!l3)
return;
for (u64 idx = 0; idx < ENTRIES_PER_L3_TABLE; idx++)
if (IS_SW(l3[idx]) && FIELD_GET(PTE_TYPE, l3[idx]) == PTE_TABLE)
free((void *)(l3[idx] & PTE_TARGET_MASK));
free(l3);
}
static void hv_pt_map_l2(u64 from, u64 to, u64 size, u64 incr)
{
assert((from & MASK(VADDR_L2_OFFSET_BITS)) == 0);
assert(IS_SW(to) || (to & PTE_TARGET_MASK & MASK(VADDR_L2_OFFSET_BITS)) == 0);
assert((size & MASK(VADDR_L2_OFFSET_BITS)) == 0);
to |= FIELD_PREP(PTE_TYPE, PTE_BLOCK);
for (; size; size -= BIT(VADDR_L2_OFFSET_BITS)) {
u64 *l2 = hv_pt_get_l2(from);
u64 idx = (from >> VADDR_L2_OFFSET_BITS) & MASK(VADDR_L2_INDEX_BITS);
if (L2_IS_TABLE(l2[idx]))
hv_pt_free_l3((u64 *)(l2[idx] & PTE_TARGET_MASK));
l2[idx] = to;
from += BIT(VADDR_L2_OFFSET_BITS);
to += incr * BIT(VADDR_L2_OFFSET_BITS);
}
}
static u64 *hv_pt_get_l3(u64 from)
{
u64 *l2 = hv_pt_get_l2(from);
u64 l2idx = (from >> VADDR_L2_OFFSET_BITS) & MASK(VADDR_L2_INDEX_BITS);
u64 l2d = l2[l2idx];
if (L2_IS_TABLE(l2d))
return (u64 *)(l2d & PTE_TARGET_MASK);
u64 *l3 = (u64 *)memalign(PAGE_SIZE, ENTRIES_PER_L3_TABLE * sizeof(u64));
if (l2d) {
u64 incr = 0;
u64 l3d = l2d;
if (IS_HW(l2d)) {
l3d &= ~PTE_TYPE;
l3d |= FIELD_PREP(PTE_TYPE, PTE_PAGE);
incr = BIT(VADDR_L3_OFFSET_BITS);
} else if (IS_SW(l2d) && FIELD_GET(SPTE_TYPE, l3d) == SPTE_MAP) {
incr = BIT(VADDR_L3_OFFSET_BITS);
}
for (u64 idx = 0; idx < ENTRIES_PER_L3_TABLE; idx++, l3d += incr)
l3[idx] = l3d;
} else {
memset64(l3, 0, ENTRIES_PER_L3_TABLE * sizeof(u64));
}
l2d = ((u64)l3) | FIELD_PREP(PTE_TYPE, PTE_TABLE) | PTE_VALID;
l2[l2idx] = l2d;
return l3;
}
static void hv_pt_map_l3(u64 from, u64 to, u64 size, u64 incr)
{
assert((from & MASK(VADDR_L3_OFFSET_BITS)) == 0);
assert(IS_SW(to) || (to & PTE_TARGET_MASK & MASK(VADDR_L3_OFFSET_BITS)) == 0);
assert((size & MASK(VADDR_L3_OFFSET_BITS)) == 0);
if (IS_HW(to))
to |= FIELD_PREP(PTE_TYPE, PTE_PAGE);
else
to |= FIELD_PREP(PTE_TYPE, PTE_BLOCK);
for (; size; size -= BIT(VADDR_L3_OFFSET_BITS)) {
u64 idx = (from >> VADDR_L3_OFFSET_BITS) & MASK(VADDR_L3_INDEX_BITS);
u64 *l3 = hv_pt_get_l3(from);
if (L3_IS_TABLE(l3[idx]))
free((void *)(l3[idx] & PTE_TARGET_MASK));
l3[idx] = to;
from += BIT(VADDR_L3_OFFSET_BITS);
to += incr * BIT(VADDR_L3_OFFSET_BITS);
}
}
static u64 *hv_pt_get_l4(u64 from)
{
u64 *l3 = hv_pt_get_l3(from);
u64 l3idx = (from >> VADDR_L3_OFFSET_BITS) & MASK(VADDR_L3_INDEX_BITS);
u64 l3d = l3[l3idx];
if (L3_IS_TABLE(l3d)) {
return (u64 *)(l3d & PTE_TARGET_MASK);
}
if (IS_HW(l3d)) {
assert(FIELD_GET(PTE_TYPE, l3d) == PTE_PAGE);
l3d &= PTE_TARGET_MASK;
l3d |= FIELD_PREP(PTE_TYPE, PTE_BLOCK) | FIELD_PREP(SPTE_TYPE, SPTE_MAP);
}
u64 *l4 = (u64 *)memalign(PAGE_SIZE, ENTRIES_PER_L4_TABLE * sizeof(u64));
if (l3d) {
u64 incr = 0;
u64 l4d = l3d;
l4d &= ~PTE_TYPE;
l4d |= FIELD_PREP(PTE_TYPE, PTE_PAGE);
if (FIELD_GET(SPTE_TYPE, l4d) == SPTE_MAP)
incr = BIT(VADDR_L4_OFFSET_BITS);
for (u64 idx = 0; idx < ENTRIES_PER_L4_TABLE; idx++, l4d += incr)
l4[idx] = l4d;
} else {
memset64(l4, 0, ENTRIES_PER_L4_TABLE * sizeof(u64));
}
l3d = ((u64)l4) | FIELD_PREP(PTE_TYPE, PTE_TABLE);
l3[l3idx] = l3d;
return l4;
}
static void hv_pt_map_l4(u64 from, u64 to, u64 size, u64 incr)
{
assert((from & MASK(VADDR_L4_OFFSET_BITS)) == 0);
assert((size & MASK(VADDR_L4_OFFSET_BITS)) == 0);
assert(!IS_HW(to));
if (IS_SW(to))
to |= FIELD_PREP(PTE_TYPE, PTE_PAGE);
for (; size; size -= BIT(VADDR_L4_OFFSET_BITS)) {
u64 idx = (from >> VADDR_L4_OFFSET_BITS) & MASK(VADDR_L4_INDEX_BITS);
u64 *l4 = hv_pt_get_l4(from);
l4[idx] = to;
from += BIT(VADDR_L4_OFFSET_BITS);
to += incr * BIT(VADDR_L4_OFFSET_BITS);
}
}
int hv_map(u64 from, u64 to, u64 size, u64 incr)
{
u64 chunk;
bool hw = IS_HW(to);
if (from & MASK(VADDR_L4_OFFSET_BITS) || size & MASK(VADDR_L4_OFFSET_BITS))
return -1;
if (hw && (from & MASK(VADDR_L3_OFFSET_BITS) || size & MASK(VADDR_L3_OFFSET_BITS))) {
printf("HV: cannot use L4 pages with HW mappings (0x%lx -> 0x%lx)\n", from, to);
return -1;
}
// L4 mappings to boundary
chunk = min(size, ALIGN_UP(from, BIT(VADDR_L3_OFFSET_BITS)) - from);
if (chunk) {
assert(!hw);
hv_pt_map_l4(from, to, chunk, incr);
from += chunk;
to += incr * chunk;
size -= chunk;
}
// L3 mappings to boundary
u64 boundary = ALIGN_UP(from, MASK(VADDR_L2_OFFSET_BITS));
// CPU CTRR doesn't like L2 mappings crossing CTRR boundaries!
// Map everything below the m1n1 base as L3
if (boundary >= ram_base && boundary < (u64)_base)
boundary = ALIGN_UP((u64)_base, MASK(VADDR_L2_OFFSET_BITS));
chunk = ALIGN_DOWN(min(size, boundary - from), BIT(VADDR_L3_OFFSET_BITS));
if (chunk) {
hv_pt_map_l3(from, to, chunk, incr);
from += chunk;
to += incr * chunk;
size -= chunk;
}
// L2 mappings
chunk = ALIGN_DOWN(size, BIT(VADDR_L2_OFFSET_BITS));
if (chunk && (!hw || (to & VADDR_L2_ALIGN_MASK) == 0)) {
hv_pt_map_l2(from, to, chunk, incr);
from += chunk;
to += incr * chunk;
size -= chunk;
}
// L3 mappings to end
chunk = ALIGN_DOWN(size, BIT(VADDR_L3_OFFSET_BITS));
if (chunk) {
hv_pt_map_l3(from, to, chunk, incr);
from += chunk;
to += incr * chunk;
size -= chunk;
}
// L4 mappings to end
if (size) {
assert(!hw);
hv_pt_map_l4(from, to, size, incr);
}
return 0;
}
int hv_unmap(u64 from, u64 size)
{
return hv_map(from, 0, size, 0);
}
int hv_map_hw(u64 from, u64 to, u64 size)
{
return hv_map(from, to | PTE_ATTRIBUTES | PTE_VALID, size, 1);
}
int hv_map_sw(u64 from, u64 to, u64 size)
{
return hv_map(from, to | FIELD_PREP(SPTE_TYPE, SPTE_MAP), size, 1);
}
int hv_map_hook(u64 from, hv_hook_t *hook, u64 size)
{
return hv_map(from, ((u64)hook) | FIELD_PREP(SPTE_TYPE, SPTE_HOOK), size, 0);
}
u64 hv_translate(u64 addr, bool s1, bool w, u64 *par_out)
{
if (!(mrs(SCTLR_EL12) & SCTLR_M))
return addr; // MMU off
u64 el = FIELD_GET(SPSR_M, hv_get_spsr()) >> 2;
u64 save = mrs(PAR_EL1);
if (w) {
if (s1) {
if (el == 0)
asm("at s1e0w, %0" : : "r"(addr));
else
asm("at s1e1w, %0" : : "r"(addr));
} else {
if (el == 0)
asm("at s12e0w, %0" : : "r"(addr));
else
asm("at s12e1w, %0" : : "r"(addr));
}
} else {
if (s1) {
if (el == 0)
asm("at s1e0r, %0" : : "r"(addr));
else
asm("at s1e1r, %0" : : "r"(addr));
} else {
if (el == 0)
asm("at s12e0r, %0" : : "r"(addr));
else
asm("at s12e1r, %0" : : "r"(addr));
}
}
u64 par = mrs(PAR_EL1);
if (par_out)
*par_out = par;
msr(PAR_EL1, save);
if (par & PAR_F) {
dprintf("hv_translate(0x%lx, %d, %d): fault 0x%lx\n", addr, s1, w, par);
return 0; // fault
} else {
return (par & PAR_PA) | (addr & 0xfff);
}
}
u64 hv_pt_walk(u64 addr)
{
dprintf("hv_pt_walk(0x%lx)\n", addr);
u64 idx = addr >> VADDR_L1_OFFSET_BITS;
u64 *l2;
if (vaddr_bits > 36) {
assert(idx < ENTRIES_PER_L1_TABLE);
u64 l1d = hv_Ltop[idx];
dprintf(" l1d = 0x%lx\n", l2d);
if (!L1_IS_TABLE(l1d)) {
dprintf(" result: 0x%lx\n", l1d);
return l1d;
}
l2 = (u64 *)(l1d & PTE_TARGET_MASK);
} else {
assert(idx == 0);
l2 = hv_Ltop;
}
idx = (addr >> VADDR_L2_OFFSET_BITS) & MASK(VADDR_L2_INDEX_BITS);
u64 l2d = l2[idx];
dprintf(" l2d = 0x%lx\n", l2d);
if (!L2_IS_TABLE(l2d)) {
if (L2_IS_SW_BLOCK(l2d))
l2d += addr & (VADDR_L2_ALIGN_MASK | VADDR_L3_ALIGN_MASK);
if (L2_IS_HW_BLOCK(l2d)) {
l2d &= ~PTE_LOWER_ATTRIBUTES;
l2d |= addr & (VADDR_L2_ALIGN_MASK | VADDR_L3_ALIGN_MASK);
}
dprintf(" result: 0x%lx\n", l2d);
return l2d;
}
idx = (addr >> VADDR_L3_OFFSET_BITS) & MASK(VADDR_L3_INDEX_BITS);
u64 l3d = ((u64 *)(l2d & PTE_TARGET_MASK))[idx];
dprintf(" l3d = 0x%lx\n", l3d);
if (!L3_IS_TABLE(l3d)) {
if (L3_IS_SW_BLOCK(l3d))
l3d += addr & VADDR_L3_ALIGN_MASK;
if (L3_IS_HW_BLOCK(l3d)) {
l3d &= ~PTE_LOWER_ATTRIBUTES;
l3d |= addr & VADDR_L3_ALIGN_MASK;
}
dprintf(" result: 0x%lx\n", l3d);
return l3d;
}
idx = (addr >> VADDR_L4_OFFSET_BITS) & MASK(VADDR_L4_INDEX_BITS);
dprintf(" l4 idx = 0x%lx\n", idx);
u64 l4d = ((u64 *)(l3d & PTE_TARGET_MASK))[idx];
dprintf(" l4d = 0x%lx\n", l4d);
return l4d;
}
#define CHECK_RN \
if (Rn == 31) \
return false
#define DECODE_OK \
if (!val) \
return true
#define EXT(n, b) (((s32)(((u32)(n)) << (32 - (b)))) >> (32 - (b)))
union simd_reg {
u64 d[2];
u32 s[4];
u16 h[8];
u8 b[16];
};
static bool emulate_load(struct exc_info *ctx, u32 insn, u64 *val, u64 *width, u64 *vaddr)
{
u64 Rt = insn & 0x1f;
u64 Rn = (insn >> 5) & 0x1f;
u64 imm12 = EXT((insn >> 10) & 0xfff, 12);
u64 imm9 = EXT((insn >> 12) & 0x1ff, 9);
u64 imm7 = EXT((insn >> 15) & 0x7f, 7);
u64 *regs = ctx->regs;
union simd_reg simd[32];
*width = insn >> 30;
if (val)
dprintf("emulate_load(%p, 0x%08x, 0x%08lx, %ld\n", regs, insn, *val, *width);
if ((insn & 0x3fe00400) == 0x38400400) {
// LDRx (immediate) Pre/Post-index
CHECK_RN;
DECODE_OK;
regs[Rn] += imm9;
regs[Rt] = *val;
} else if ((insn & 0x3fc00000) == 0x39400000) {
// LDRx (immediate) Unsigned offset
DECODE_OK;
regs[Rt] = *val;
} else if ((insn & 0x3fa00400) == 0x38800400) {
// LDRSx (immediate) Pre/Post-index
CHECK_RN;
DECODE_OK;
regs[Rn] += imm9;
regs[Rt] = (s64)EXT(*val, 8 << *width);
if (insn & (1 << 22))
regs[Rt] &= 0xffffffff;
} else if ((insn & 0x3fa00000) == 0x39800000) {
// LDRSx (immediate) Unsigned offset
DECODE_OK;
regs[Rt] = (s64)EXT(*val, 8 << *width);
if (insn & (1 << 22))
regs[Rt] &= 0xffffffff;
} else if ((insn & 0x3fe04c00) == 0x38604800) {
// LDRx (register)
DECODE_OK;
regs[Rt] = *val;
} else if ((insn & 0x3fa04c00) == 0x38a04800) {
// LDRSx (register)
DECODE_OK;
regs[Rt] = (s64)EXT(*val, 8 << *width);
if (insn & (1 << 22))
regs[Rt] &= 0xffffffff;
} else if ((insn & 0x3fe00c00) == 0x38400000) {
// LDURx (unscaled)
DECODE_OK;
regs[Rt] = *val;
} else if ((insn & 0x3fa00c00) == 0x38a00000) {
// LDURSx (unscaled)
DECODE_OK;
regs[Rt] = (s64)EXT(*val, (8 << *width));
if (insn & (1 << 22))
regs[Rt] &= 0xffffffff;
} else if ((insn & 0xffc00000) == 0x29400000) {
// LDP (Signed offset, 32-bit)
*width = 3;
*vaddr = regs[Rn] + (imm7 * 4);
DECODE_OK;
u64 Rt2 = (insn >> 10) & 0x1f;
regs[Rt] = val[0] & 0xffffffff;
regs[Rt2] = val[0] >> 32;
} else if ((insn & 0xffc00000) == 0xa9400000) {
// LDP (Signed offset, 64-bit)
*width = 4;
*vaddr = regs[Rn] + (imm7 * 8);
DECODE_OK;
u64 Rt2 = (insn >> 10) & 0x1f;
regs[Rt] = val[0];
regs[Rt2] = val[1];
} else if ((insn & 0xfec00000) == 0xa8c00000) {
// LDP (pre/post-increment, 64-bit)
*width = 4;
*vaddr = regs[Rn] + ((insn & BIT(24)) ? (imm7 * 8) : 0);
DECODE_OK;
regs[Rn] += imm7 * 8;
u64 Rt2 = (insn >> 10) & 0x1f;
regs[Rt] = val[0];
regs[Rt2] = val[1];
} else if ((insn & 0xfec00000) == 0xac400000) {
// LD[N]P (SIMD&FP, 128-bit) Signed offset
*width = 5;
*vaddr = regs[Rn] + (imm7 * 16);
DECODE_OK;
u64 Rt2 = (insn >> 10) & 0x1f;
get_simd_state(simd);
simd[Rt].d[0] = val[0];
simd[Rt].d[1] = val[1];
simd[Rt2].d[0] = val[2];
simd[Rt2].d[1] = val[3];
put_simd_state(simd);
} else if ((insn & 0x3fc00000) == 0x3d400000) {
// LDR (immediate, SIMD&FP) Unsigned offset
*vaddr = regs[Rn] + (imm12 << *width);
DECODE_OK;
get_simd_state(simd);
simd[Rt].d[0] = val[0];
simd[Rt].d[1] = 0;
put_simd_state(simd);
} else if ((insn & 0xffc00000) == 0x3dc00000) {
// LDR (immediate, SIMD&FP) Unsigned offset, 128-bit
*width = 4;
*vaddr = regs[Rn] + (imm12 << *width);
DECODE_OK;
get_simd_state(simd);
simd[Rt].d[0] = val[0];
simd[Rt].d[1] = val[1];
put_simd_state(simd);
} else if ((insn & 0xffe00c00) == 0x3cc00000) {
// LDURx (unscaled, SIMD&FP, 128-bit)
*width = 4;
*vaddr = regs[Rn] + (imm9 << *width);
DECODE_OK;
get_simd_state(simd);
simd[Rt].d[0] = val[0];
simd[Rt].d[1] = val[1];
put_simd_state(simd);
} else if ((insn & 0x3fe00400) == 0x3c400400) {
// LDR (immediate, SIMD&FP) Pre/Post-index
CHECK_RN;
DECODE_OK;
regs[Rn] += imm9;
get_simd_state(simd);
simd[Rt].d[0] = val[0];
simd[Rt].d[1] = 0;
put_simd_state(simd);
} else if ((insn & 0xffe00400) == 0x3cc00400) {
// LDR (immediate, SIMD&FP) Pre/Post-index, 128-bit
*width = 4;
CHECK_RN;
DECODE_OK;
regs[Rn] += imm9;
get_simd_state(simd);
simd[Rt].d[0] = val[0];
simd[Rt].d[1] = val[1];
put_simd_state(simd);
} else if ((insn & 0x3fe04c00) == 0x3c604800) {
// LDR (register, SIMD&FP)
DECODE_OK;
get_simd_state(simd);
simd[Rt].d[0] = val[0];
simd[Rt].d[1] = 0;
put_simd_state(simd);
} else if ((insn & 0xffe04c00) == 0x3ce04800) {
// LDR (register, SIMD&FP), 128-bit
*width = 4;
DECODE_OK;
get_simd_state(simd);
simd[Rt].d[0] = val[0];
simd[Rt].d[1] = val[1];
put_simd_state(simd);
} else if ((insn & 0xbffffc00) == 0x0d408400) {
// LD1 (single structure) No offset, 64-bit
*width = 3;
DECODE_OK;
u64 index = (insn >> 30) & 1;
get_simd_state(simd);
simd[Rt].d[index] = val[0];
put_simd_state(simd);
} else if ((insn & 0x3ffffc00) == 0x08dffc00) {
// LDAR*
DECODE_OK;
regs[Rt] = *val;
} else {
return false;
}
return true;
}
static bool emulate_store(struct exc_info *ctx, u32 insn, u64 *val, u64 *width, u64 *vaddr)
{
u64 Rt = insn & 0x1f;
u64 Rn = (insn >> 5) & 0x1f;
u64 imm9 = EXT((insn >> 12) & 0x1ff, 9);
u64 imm7 = EXT((insn >> 15) & 0x7f, 7);
u64 *regs = ctx->regs;
union simd_reg simd[32];
*width = insn >> 30;
dprintf("emulate_store(%p, 0x%08x, ..., %ld) = ", regs, insn, *width);
regs[31] = 0;
u64 mask = 0xffffffffffffffffUL;
if (*width < 3)
mask = (1UL << (8 << *width)) - 1;
if ((insn & 0x3fe00400) == 0x38000400) {
// STRx (immediate) Pre/Post-index
CHECK_RN;
regs[Rn] += imm9;
*val = regs[Rt] & mask;
} else if ((insn & 0x3fc00000) == 0x39000000) {
// STRx (immediate) Unsigned offset
*val = regs[Rt] & mask;
} else if ((insn & 0x3fe04c00) == 0x38204800) {
// STRx (register)
*val = regs[Rt] & mask;
} else if ((insn & 0xfec00000) == 0x28000000) {
// ST[N]P (Signed offset, 32-bit)
*vaddr = regs[Rn] + (imm7 * 4);
u64 Rt2 = (insn >> 10) & 0x1f;
val[0] = (regs[Rt] & 0xffffffff) | (regs[Rt2] << 32);
*width = 3;
} else if ((insn & 0xfec00000) == 0xa8000000) {
// ST[N]P (Signed offset, 64-bit)
*vaddr = regs[Rn] + (imm7 * 8);
u64 Rt2 = (insn >> 10) & 0x1f;
val[0] = regs[Rt];
val[1] = regs[Rt2];
*width = 4;
} else if ((insn & 0xfec00000) == 0xa8800000) {
// ST[N]P (immediate, 64-bit, pre/post-index)
CHECK_RN;
*vaddr = regs[Rn] + ((insn & BIT(24)) ? (imm7 * 8) : 0);
regs[Rn] += (imm7 * 8);
u64 Rt2 = (insn >> 10) & 0x1f;
val[0] = regs[Rt];
val[1] = regs[Rt2];
*width = 4;
} else if ((insn & 0x3fc00000) == 0x3d000000) {
// STR (immediate, SIMD&FP) Unsigned offset, 8..64-bit
get_simd_state(simd);
*val = simd[Rt].d[0];
} else if ((insn & 0x3fe04c00) == 0x3c204800) {
// STR (register, SIMD&FP) 8..64-bit
get_simd_state(simd);
*val = simd[Rt].d[0];
} else if ((insn & 0xffe04c00) == 0x3ca04800) {
// STR (register, SIMD&FP) 128-bit
get_simd_state(simd);
val[0] = simd[Rt].d[0];
val[1] = simd[Rt].d[1];
*width = 4;
} else if ((insn & 0xffc00000) == 0x3d800000) {
// STR (immediate, SIMD&FP) Unsigned offset, 128-bit
get_simd_state(simd);
val[0] = simd[Rt].d[0];
val[1] = simd[Rt].d[1];
*width = 4;
} else if ((insn & 0xffe00000) == 0xbc000000) {
// STUR (immediate, SIMD&FP) 32-bit
get_simd_state(simd);
val[0] = simd[Rt].s[0];
*width = 2;
} else if ((insn & 0xffe00000) == 0xfc000000) {
// STUR (immediate, SIMD&FP) 64-bit
get_simd_state(simd);
val[0] = simd[Rt].d[0];
*width = 3;
} else if ((insn & 0xffe00000) == 0x3c800000) {
// STUR (immediate, SIMD&FP) 128-bit
get_simd_state(simd);
val[0] = simd[Rt].d[0];
val[1] = simd[Rt].d[1];
*width = 4;
} else if ((insn & 0xffc00000) == 0x2d000000) {
// STP (SIMD&FP, 128-bit) Signed offset
*vaddr = regs[Rn] + (imm7 * 4);
u64 Rt2 = (insn >> 10) & 0x1f;
get_simd_state(simd);
val[0] = simd[Rt].s[0] | (((u64)simd[Rt2].s[0]) << 32);
*width = 3;
} else if ((insn & 0xffc00000) == 0xad000000) {
// STP (SIMD&FP, 128-bit) Signed offset
*vaddr = regs[Rn] + (imm7 * 16);
u64 Rt2 = (insn >> 10) & 0x1f;
get_simd_state(simd);
val[0] = simd[Rt].d[0];
val[1] = simd[Rt].d[1];
val[2] = simd[Rt2].d[0];
val[3] = simd[Rt2].d[1];
*width = 5;
} else if ((insn & 0x3fe00c00) == 0x38000000) {
// STURx (unscaled)
*val = regs[Rt] & mask;
} else if ((insn & 0xffffffe0) == 0xd50b7420) {
// DC ZVA
*vaddr = regs[Rt];
memset(val, 0, CACHE_LINE_SIZE);
*width = CACHE_LINE_LOG2;
} else if ((insn & 0x3ffffc00) == 0x089ffc00) {
// STL qR*
*val = regs[Rt] & mask;
} else {
return false;
}
dprintf("0x%lx\n", *width);
return true;
}
static void emit_mmiotrace(u64 pc, u64 addr, u64 *data, u64 width, u64 flags, bool sync)
{
struct hv_evt_mmiotrace evt = {
.flags = flags | FIELD_PREP(MMIO_EVT_CPU, smp_id()),
.pc = pc,
.addr = addr,
};
if (width > 3)
evt.flags |= FIELD_PREP(MMIO_EVT_WIDTH, 3) | MMIO_EVT_MULTI;
else
evt.flags |= FIELD_PREP(MMIO_EVT_WIDTH, width);
for (int i = 0; i < (1 << width); i += 8) {
evt.data = *data++;
hv_wdt_suspend();
uartproxy_send_event(EVT_MMIOTRACE, &evt, sizeof(evt));
if (sync) {
iodev_flush(uartproxy_iodev);
}
hv_wdt_resume();
evt.addr += 8;
}
}
bool hv_pa_write(struct exc_info *ctx, u64 addr, u64 *val, int width)
{
sysop("dsb sy");
exc_count = 0;
exc_guard = GUARD_SKIP;
switch (width) {
case 0:
write8(addr, val[0]);
break;
case 1:
write16(addr, val[0]);
break;
case 2:
write32(addr, val[0]);
break;
case 3:
write64(addr, val[0]);
break;
case 4:
case 5:
case 6:
for (u64 i = 0; i < (1UL << (width - 3)); i++)
write64(addr + 8 * i, val[i]);
break;
default:
dprintf("HV: unsupported write width %ld\n", width);
exc_guard = GUARD_OFF;
return false;
}
// Make sure we catch SErrors here
sysop("dsb sy");
sysop("isb");
exc_guard = GUARD_OFF;
if (exc_count) {
printf("HV: Exception during write to 0x%lx (width: %d)\n", addr, width);
// Update exception info with "real" cause
ctx->esr = hv_get_esr();
ctx->far = hv_get_far();
return false;
}
return true;
}
bool hv_pa_read(struct exc_info *ctx, u64 addr, u64 *val, int width)
{
sysop("dsb sy");
exc_count = 0;
exc_guard = GUARD_SKIP;
switch (width) {
case 0:
val[0] = read8(addr);
break;
case 1:
val[0] = read16(addr);
break;
case 2:
val[0] = read32(addr);
break;
case 3:
val[0] = read64(addr);
break;
case 4:
val[0] = read64(addr);
val[1] = read64(addr + 8);
break;
case 5:
val[0] = read64(addr);
val[1] = read64(addr + 8);
val[2] = read64(addr + 16);
val[3] = read64(addr + 24);
break;
default:
dprintf("HV: unsupported read width %ld\n", width);
exc_guard = GUARD_OFF;
return false;
}
sysop("dsb sy");
exc_guard = GUARD_OFF;
if (exc_count) {
dprintf("HV: Exception during read from 0x%lx (width: %d)\n", addr, width);
// Update exception info with "real" cause
ctx->esr = hv_get_esr();
ctx->far = hv_get_far();
return false;
}
return true;
}
bool hv_pa_rw(struct exc_info *ctx, u64 addr, u64 *val, bool write, int width)
{
if (write)
return hv_pa_write(ctx, addr, val, width);
else
return hv_pa_read(ctx, addr, val, width);
}
static bool hv_emulate_rw_aligned(struct exc_info *ctx, u64 pte, u64 vaddr, u64 ipa, u64 *val,
bool is_write, u64 width, u64 elr, u64 par)
{
assert(pte);
assert(((ipa & 0x3fff) + (1 << width)) <= 0x4000);
u64 target = pte & PTE_TARGET_MASK_L4;
u64 paddr = target | (vaddr & MASK(VADDR_L4_OFFSET_BITS));
u64 flags = FIELD_PREP(MMIO_EVT_ATTR, FIELD_GET(PAR_ATTR, par)) |
FIELD_PREP(MMIO_EVT_SH, FIELD_GET(PAR_SH, par));
// For split ops, treat hardware mapped pages as SPTE_MAP
if (IS_HW(pte))
pte = target | FIELD_PREP(PTE_TYPE, PTE_BLOCK) | FIELD_PREP(SPTE_TYPE, SPTE_MAP);
if (is_write) {
// Write
hv_wdt_breadcrumb('3');
if (pte & SPTE_TRACE_WRITE)
emit_mmiotrace(elr, ipa, val, width, flags | MMIO_EVT_WRITE, pte & SPTE_TRACE_UNBUF);
hv_wdt_breadcrumb('4');
switch (FIELD_GET(SPTE_TYPE, pte)) {
case SPTE_PROXY_HOOK_R:
paddr = ipa;
// fallthrough
case SPTE_MAP:
hv_wdt_breadcrumb('5');
dprintf("HV: SPTE_MAP[W] @0x%lx 0x%lx -> 0x%lx (w=%d): 0x%lx\n", elr, ipa, paddr,
1 << width, val[0]);
if (!hv_pa_write(ctx, paddr, val, width))
return false;
break;
case SPTE_HOOK: {
hv_wdt_breadcrumb('6');
hv_hook_t *hook = (hv_hook_t *)target;
if (!hook(ctx, ipa, val, true, width))
return false;
dprintf("HV: SPTE_HOOK[W] @0x%lx 0x%lx -> 0x%lx (w=%d) @%p: 0x%lx\n", elr, far, ipa,
1 << width, hook, wval);
break;
}
case SPTE_PROXY_HOOK_RW:
case SPTE_PROXY_HOOK_W: {
hv_wdt_breadcrumb('7');
struct hv_vm_proxy_hook_data hook = {
.flags = FIELD_PREP(MMIO_EVT_WIDTH, width) | MMIO_EVT_WRITE | flags,
.id = FIELD_GET(PTE_TARGET_MASK_L4, pte),
.addr = ipa,
.data = {0},
};
memcpy(hook.data, val, 1 << width);
hv_exc_proxy(ctx, START_HV, HV_HOOK_VM, &hook);
break;
}
default:
printf("HV: invalid SPTE 0x%016lx for IPA 0x%lx\n", pte, ipa);
return false;
}
} else {
hv_wdt_breadcrumb('3');
switch (FIELD_GET(SPTE_TYPE, pte)) {
case SPTE_PROXY_HOOK_W:
paddr = ipa;
// fallthrough
case SPTE_MAP:
hv_wdt_breadcrumb('4');
if (!hv_pa_read(ctx, paddr, val, width))
return false;
dprintf("HV: SPTE_MAP[R] @0x%lx 0x%lx -> 0x%lx (w=%d): 0x%lx\n", elr, ipa, paddr,
1 << width, val[0]);
break;
case SPTE_HOOK: {
hv_wdt_breadcrumb('5');
hv_hook_t *hook = (hv_hook_t *)target;
if (!hook(ctx, ipa, val, false, width))
return false;
dprintf("HV: SPTE_HOOK[R] @0x%lx 0x%lx -> 0x%lx (w=%d) @%p: 0x%lx\n", elr, far, ipa,
1 << width, hook, val);
break;
}
case SPTE_PROXY_HOOK_RW:
case SPTE_PROXY_HOOK_R: {
hv_wdt_breadcrumb('6');
struct hv_vm_proxy_hook_data hook = {
.flags = FIELD_PREP(MMIO_EVT_WIDTH, width) | flags,
.id = FIELD_GET(PTE_TARGET_MASK_L4, pte),
.addr = ipa,
};
hv_exc_proxy(ctx, START_HV, HV_HOOK_VM, &hook);
memcpy(val, hook.data, 1 << width);
break;
}
default:
printf("HV: invalid SPTE 0x%016lx for IPA 0x%lx\n", pte, ipa);
return false;
}
hv_wdt_breadcrumb('7');
if (pte & SPTE_TRACE_READ)
emit_mmiotrace(elr, ipa, val, width, flags, pte & SPTE_TRACE_UNBUF);
}
hv_wdt_breadcrumb('*');
return true;
}
static bool hv_emulate_rw(struct exc_info *ctx, u64 pte, u64 vaddr, u64 ipa, u8 *val, bool is_write,
u64 bytes, u64 elr, u64 par)
{
u64 aval[HV_MAX_RW_WORDS];
bool advance = (IS_HW(pte) || (IS_SW(pte) && FIELD_GET(SPTE_TYPE, pte) == SPTE_MAP)) ? 1 : 0;
u64 off = 0;
u64 width;
bool first = true;
u64 left = bytes;
u64 paddr = (pte & PTE_TARGET_MASK_L4) | (vaddr & MASK(VADDR_L4_OFFSET_BITS));
while (left > 0) {
memset(aval, 0, sizeof(aval));
if (left >= 64 && (ipa & 63) == 0)
width = 6;
else if (left >= 32 && (ipa & 31) == 0)
width = 5;
else if (left >= 16 && (ipa & 15) == 0)
width = 4;
else if (left >= 8 && (ipa & 7) == 0)
width = 3;
else if (left >= 4 && (ipa & 3) == 0)
width = 2;
else if (left >= 2 && (ipa & 1) == 0)
width = 1;
else
width = 0;
u64 chunk = 1 << width;
/*
if (chunk != bytes)
printf("HV: Splitting unaligned %ld-byte %s: %ld bytes @ 0x%lx\n", bytes,
is_write ? "write" : "read", chunk, vaddr);
*/
if (is_write)
memcpy(aval, val + off, chunk);
if (advance)
pte = (paddr & PTE_TARGET_MASK_L4) | (pte & ~PTE_TARGET_MASK_L4);
if (!hv_emulate_rw_aligned(ctx, pte, vaddr, ipa, aval, is_write, width, elr, par)) {
if (!first)
printf("HV: WARNING: Failed to emulate split op but part of it did commit!\n");
return false;
}
if (!is_write)
memcpy(val + off, aval, chunk);
left -= chunk;
off += chunk;
ipa += chunk;
vaddr += chunk;
if (advance)
paddr += chunk;
first = 0;
}
return true;
}
bool hv_handle_dabort(struct exc_info *ctx)
{
hv_wdt_breadcrumb('0');
u64 esr = hv_get_esr();
bool is_write = esr & ESR_ISS_DABORT_WnR;
u64 far = hv_get_far();
u64 par;
u64 ipa = hv_translate(far, true, is_write, &par);
dprintf("hv_handle_abort(): stage 1 0x%0lx -> 0x%lx\n", far, ipa);
if (!ipa) {
printf("HV: stage 1 translation failed at VA 0x%0lx\n", far);
return false;
}
if (ipa >= BIT(vaddr_bits)) {
printf("hv_handle_abort(): IPA out of bounds: 0x%0lx -> 0x%lx\n", far, ipa);
return false;
}
u64 pte = hv_pt_walk(ipa);
if (!pte) {
printf("HV: Unmapped IPA 0x%lx\n", ipa);
return false;
}
if (IS_HW(pte)) {
printf("HV: Data abort on mapped page (0x%lx -> 0x%lx)\n", far, pte);
// Try again, this is usually a race
ctx->elr -= 4;
return true;
}
hv_wdt_breadcrumb('1');
assert(IS_SW(pte));
u64 elr = ctx->elr;
u64 elr_pa = hv_translate(elr, false, false, NULL);
if (!elr_pa) {
printf("HV: Failed to fetch instruction for data abort at 0x%lx\n", elr);
return false;
}
u32 insn = read32(elr_pa);
u64 width;
hv_wdt_breadcrumb('2');
u64 vaddr = far;
u8 val[HV_MAX_RW_SIZE] ALIGNED(HV_MAX_RW_SIZE);
memset(val, 0, sizeof(val));
if (is_write) {
hv_wdt_breadcrumb('W');
if (!emulate_store(ctx, insn, (u64 *)val, &width, &vaddr)) {
printf("HV: store not emulated: 0x%08x at 0x%lx\n", insn, ipa);
return false;
}
} else {
hv_wdt_breadcrumb('R');
if (!emulate_load(ctx, insn, NULL, &width, &vaddr)) {
printf("HV: load not emulated: 0x%08x at 0x%lx\n", insn, ipa);
return false;
}
}
/*
Check for HW page-straddling conditions
Right now we only support the case where the page boundary is exactly halfway
through the read/write.
*/
u64 bytes = 1 << width;
u64 vaddrp0 = vaddr & ~MASK(VADDR_L3_OFFSET_BITS);
u64 vaddrp1 = (vaddr + bytes - 1) & ~MASK(VADDR_L3_OFFSET_BITS);
if (vaddrp0 == vaddrp1) {
// Easy case, no page straddle
if (far != vaddr) {
printf("HV: faulted at 0x%lx, but expecting 0x%lx\n", far, vaddr);
return false;
}
if (!hv_emulate_rw(ctx, pte, vaddr, ipa, val, is_write, bytes, elr, par))
return false;
} else {
// Oops, we're straddling a page boundary
// Treat it as two separate loads or stores
assert(bytes > 1);
hv_wdt_breadcrumb('s');
u64 off = vaddrp1 - vaddr;
u64 vaddr2;
const char *other;
if (far == vaddr) {
other = "upper";
vaddr2 = vaddrp1;
} else {
if (far != vaddrp1) {
printf("HV: faulted at 0x%lx, but expecting 0x%lx\n", far, vaddrp1);
return false;
}
other = "lower";
vaddr2 = vaddr;
}
u64 par2;
u64 ipa2 = hv_translate(vaddr2, true, esr & ESR_ISS_DABORT_WnR, &par2);
if (!ipa2) {
printf("HV: %s half stage 1 translation failed at VA 0x%0lx\n", other, vaddr2);
return false;
}
if (ipa2 >= BIT(vaddr_bits)) {
printf("hv_handle_abort(): %s half IPA out of bounds: 0x%0lx -> 0x%lx\n", other, vaddr2,
ipa2);
return false;
}
u64 pte2 = hv_pt_walk(ipa2);
if (!pte2) {
printf("HV: Unmapped %s half IPA 0x%lx\n", other, ipa2);
return false;
}
hv_wdt_breadcrumb('S');
printf("HV: Emulating %s straddling page boundary as two ops @ 0x%lx (%ld bytes)\n",
is_write ? "write" : "read", vaddr, bytes);
bool upper_ret;
if (far == vaddr) {
if (!hv_emulate_rw(ctx, pte, vaddr, ipa, val, is_write, off, elr, par))
return false;
upper_ret =
hv_emulate_rw(ctx, pte2, vaddr2, ipa2, val + off, is_write, bytes - off, elr, par2);
} else {
if (!hv_emulate_rw(ctx, pte2, vaddr2, ipa2, val, is_write, off, elr, par2))
return false;
upper_ret =
hv_emulate_rw(ctx, pte, vaddrp1, ipa, val + off, is_write, bytes - off, elr, par);
}
if (!upper_ret) {
printf("HV: WARNING: Failed to emulate upper half but lower half did commit!\n");
return false;
}
}
if (vaddrp0 != vaddrp1) {
printf("HV: Straddled r/w data:\n");
hexdump(val, bytes);
}
hv_wdt_breadcrumb('8');
if (!is_write && !emulate_load(ctx, insn, (u64 *)val, &width, &vaddr))
return false;
hv_wdt_breadcrumb('9');
return true;
}