From 1ae60ad715978e92de890fe2457759e5d3b7bac5 Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Sat, 1 May 2021 19:05:21 +0900 Subject: [PATCH] hv: Beginnings of a hypervisor Signed-off-by: Hector Martin --- Makefile | 1 + proxyclient/proxy.py | 8 + proxyclient/run_guest.py | 23 +++ src/arm_cpu_regs.h | 106 +++++++++++++ src/cpu_regs.h | 1 + src/hv.c | 332 +++++++++++++++++++++++++++++++++++++++ src/hv.h | 16 ++ src/proxy.c | 8 + src/proxy.h | 3 + 9 files changed, 498 insertions(+) create mode 100755 proxyclient/run_guest.py create mode 100644 src/arm_cpu_regs.h create mode 100644 src/hv.c create mode 100644 src/hv.h diff --git a/Makefile b/Makefile index 77391cb2..2866226f 100644 --- a/Makefile +++ b/Makefile @@ -43,6 +43,7 @@ OBJECTS := \ exception.o exception_asm.o \ fb.o font.o font_retina.o \ heapblock.o \ + hv.o \ iodev.o \ kboot.o \ main.o \ diff --git a/proxyclient/proxy.py b/proxyclient/proxy.py index 3285bc15..f2980a68 100755 --- a/proxyclient/proxy.py +++ b/proxyclient/proxy.py @@ -418,6 +418,9 @@ class M1N1Proxy: P_DART_MAP = 0xb02 P_DART_UNMAP = 0xb03 + P_HV_INIT = 0xc00 + P_HV_MAP = 0xc01 + def __init__(self, iface, debug=False): self.debug = debug self.iface = iface @@ -733,6 +736,11 @@ class M1N1Proxy: def dart_unmap(self, dart, iova, len): return self.request(self.P_DART_UNMAP, dart, iova, len) + def hv_init(self): + return self.request(self.P_HV_INIT) + def hv_map(self, from_, to, size, incr): + return self.request(self.P_HV_MAP, from_, to, size, incr) + if __name__ == "__main__": import serial uartdev = os.environ.get("M1N1DEVICE", "/dev/ttyUSB0") diff --git a/proxyclient/run_guest.py b/proxyclient/run_guest.py new file mode 100755 index 00000000..bf1f1c02 --- /dev/null +++ b/proxyclient/run_guest.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python3 + +import argparse, pathlib + +parser = argparse.ArgumentParser(description='Run a Mach-O payload under the hypervisor') +parser.add_argument('payload', type=pathlib.Path) +args = parser.parse_args() + +from proxy import * +from proxyutils import * +from utils import * +from hv import HV + +iface = UartInterface() +p = M1N1Proxy(iface, debug=False) +bootstrap_port(iface, p) +u = ProxyUtils(p, heap_size = 128 * 1024 * 1024) + +hv = HV(iface, p, u) + +hv.init() +hv.load_macho(args.payload.read_bytes()) +hv.start() diff --git a/src/arm_cpu_regs.h b/src/arm_cpu_regs.h new file mode 100644 index 00000000..95140537 --- /dev/null +++ b/src/arm_cpu_regs.h @@ -0,0 +1,106 @@ +/* SPDX-License-Identifier: MIT */ + +#include "utils.h" + +#define SYS_CNTHCTL_EL2 sys_reg(3, 4, 14, 1, 0) +// HCR_EL2.E2H == 1 +#define CNTHCTL_EVNTIS BIT(17) +#define CNTHCTL_EL1NVVCT BIT(16) +#define CNTHCTL_EL1NVPCT BIT(15) +#define CNTHCTL_EL1TVCT BIT(14) +#define CNTHCTL_EL1TVT BIT(13) +#define CNTHCTL_ECV BIT(12) +#define CNTHCTL_EL1PTEN BIT(11) +#define CNTHCTL_EL1PCTEN BIT(10) +#define CNTHCTL_EL0PTEN BIT(9) +#define CNTHCTL_EL0VTEN BIT(8) +#define CNTHCTL_EVNTI GENMASK(7, 4) +#define CNTHCTL_EVNTDIR BIT(3) +#define CNTHCTL_EVNTEN BIT(2) +#define CNTHCTL_EL0VCTEN BIT(1) +#define CNTHCTL_EL0PCTEN BIT(0) + +#define SYS_HCR_EL2 sys_reg(3, 4, 1, 1, 0) +#define HCR_TWEDEL GENMASK(63, 60) +#define HCR_TWEDEn BIT(59) +#define HCR_TID5 BIT(58) +#define HCR_DCT BIT(57) +#define HCR_ATA BIT(56) +#define HCR_TTLBOS BIT(55) +#define HCR_TTLBIS BIT(54) +#define HCR_EnSCXT BIT(53) +#define HCR_TOCU BIT(52) +#define HCR_AMVOFFEN BIT(51) +#define HCR_TICAB BIT(50) +#define HCR_TID4 BIT(49) +#define HCR_FIEN BIT(47) +#define HCR_FWB BIT(46) +#define HCR_NV2 BIT(45) +#define HCR_AT BIT(44) +#define HCR_NV1 BIT(43) +#define HCR_NV1 BIT(43) +#define HCR_NV BIT(42) +#define HCR_NV BIT(42) +#define HCR_API BIT(41) +#define HCR_APK BIT(40) +#define HCR_MIOCNCE BIT(38) +#define HCR_TEA BIT(37) +#define HCR_TERR BIT(36) +#define HCR_TLOR BIT(35) +#define HCR_E2H BIT(34) +#define HCR_ID BIT(33) +#define HCR_CD BIT(32) +#define HCR_RW BIT(31) +#define HCR_TRVM BIT(30) +#define HCR_HCD BIT(29) +#define HCR_TDZ BIT(28) +#define HCR_TGE BIT(27) +#define HCR_TVM BIT(26) +#define HCR_TTLB BIT(25) +#define HCR_TPU BIT(24) +#define HCR_TPCP BIT(23) +#define HCR_TPC BIT(23) +#define HCR_TSW BIT(22) +#define HCR_TACR BIT(21) +#define HCR_TIDCP BIT(20) +#define HCR_TSC BIT(19) +#define HCR_TID3 BIT(18) +#define HCR_TID2 BIT(17) +#define HCR_TID1 BIT(16) +#define HCR_TID0 BIT(15) +#define HCR_TWE BIT(14) +#define HCR_TWI BIT(13) +#define HCR_DC BIT(12) +#define HCR_BSU GENMASK(11, 10) +#define HCR_FB BIT(9) +#define HCR_VSE BIT(8) +#define HCR_VI BIT(7) +#define HCR_VF BIT(6) +#define HCR_AMO BIT(5) +#define HCR_IMO BIT(4) +#define HCR_FMO BIT(3) +#define HCR_PTW BIT(2) +#define HCR_SWIO BIT(1) +#define HCR_VM BIT(0) + +#define SYS_VTCR_EL2 sys_reg(3, 4, 2, 1, 2) +// Profile(A) +#define VTCR_SL2 BIT(33) +#define VTCR_DS BIT(32) +#define VTCR_NSA BIT(30) +#define VTCR_NSW BIT(29) +#define VTCR_HWU62 BIT(28) +#define VTCR_HWU61 BIT(27) +#define VTCR_HWU60 BIT(26) +#define VTCR_HWU59 BIT(25) +#define VTCR_HD BIT(22) +#define VTCR_HA BIT(21) +#define VTCR_VS BIT(19) +#define VTCR_PS GENMASK(18, 16) +#define VTCR_TG0 GENMASK(15, 14) +#define VTCR_SH0 GENMASK(13, 12) +#define VTCR_ORGN0 GENMASK(11, 10) +#define VTCR_IRGN0 GENMASK(9, 8) +#define VTCR_SL0 GENMASK(7, 6) +#define VTCR_SL0 GENMASK(7, 6) +#define VTCR_T0SZ GENMASK(5, 0) diff --git a/src/cpu_regs.h b/src/cpu_regs.h index a038d817..ee42a9e4 100644 --- a/src/cpu_regs.h +++ b/src/cpu_regs.h @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: MIT */ +#include "arm_cpu_regs.h" #include "utils.h" /* HID registers */ diff --git a/src/hv.c b/src/hv.c new file mode 100644 index 00000000..95dc7f9e --- /dev/null +++ b/src/hv.c @@ -0,0 +1,332 @@ +/* SPDX-License-Identifier: MIT */ + +#include "hv.h" +#include "assert.h" +#include "cpu_regs.h" +#include "malloc.h" +#include "string.h" +#include "types.h" +#include "utils.h" + +#define PAGE_SIZE 0x4000 +#define CACHE_LINE_SIZE 64 + +#define PTE_ACCESS BIT(10) +#define PTE_SH_NS (0b11L << 8) +#define PTE_S2AP_RW (0b11L << 6) +#define PTE_MEMATTR_UNCHANGED (0b1111L << 2) + +#define PTE_ATTRIBUTES (PTE_ACCESS | PTE_SH_NS | PTE_S2AP_RW | PTE_MEMATTR_UNCHANGED) + +#define PTE_VALID BIT(0) +#define PTE_TYPE BIT(1) +#define PTE_BLOCK 0 +#define PTE_TABLE 1 +#define PTE_PAGE 1 + +#define VADDR_L4_INDEX_BITS 12 +#define VADDR_L3_INDEX_BITS 11 +#define VADDR_L2_INDEX_BITS 11 + +#define VADDR_L4_OFFSET_BITS 2 +#define VADDR_L3_OFFSET_BITS 14 +#define VADDR_L2_OFFSET_BITS 25 + +#define VADDR_L2_ALIGN_MASK GENMASK(VADDR_L2_OFFSET_BITS - 1, VADDR_L3_OFFSET_BITS) +#define PTE_TARGET_MASK GENMASK(49, 14) + +#define ENTRIES_PER_L2_TABLE BIT(VADDR_L2_INDEX_BITS) +#define ENTRIES_PER_L3_TABLE BIT(VADDR_L3_INDEX_BITS) +#define ENTRIES_PER_L4_TABLE BIT(VADDR_L4_INDEX_BITS) + +#define SPTE_TYPE BIT(48) +#define SPTE_MAP 0 +#define SPTE_HOOK 1 + +#define IS_HW(pte) (pte && pte & PTE_VALID) +#define IS_SW(pte) (pte && !(pte & PTE_VALID)) + +#define L2_IS_TABLE(pte) ((pte) && FIELD_GET(PTE_TYPE, pte) == PTE_TABLE) +#define L2_IS_NOT_TABLE(pte) ((pte) && !L2_IS_TABLE(pte)) +#define L3_IS_TABLE(pte) (IS_SW(pte) && FIELD_GET(PTE_TYPE, pte) == PTE_TABLE) +#define L3_IS_NOT_TABLE(pte) ((pte) && !L3_IS_TABLE(pte)) + +/* + * We use 16KB page tables for stage 2 translation, and a 64GB (36-bit) guest + * PA size, which results in the following virtual address space: + * + * [L2 index] [L3 index] [page offset] + * 11 bits 11 bits 14 bits + * + * 32MB L2 mappings look like this: + * [L2 index] [page offset] + * 11 bits 25 bits + * + * We implement sub-page granularity mappings for software MMIO hooks, which behave + * as an additional page table level used only by software. This works like this: + * + * [L2 index] [L3 index] [L4 index] [Word offset] + * 11 bits 11 bits 12 bits 2 bits + * + * Thus, L4 sub-page tables are twice the size. + * + * We use invalid mappings (PTE_VALID == 0) to represent mmiotrace descriptors, but + * otherwise the page table format is the same. The PTE_TYPE bit is weird, as 0 means + * block but 1 means both table (at L<3) and page (at L3). For mmiotrace, this is + * pushed to L4. + */ + +static u64 hv_L2[ENTRIES_PER_L2_TABLE] ALIGNED(PAGE_SIZE); +; + +static void hv_pt_init(void) +{ + memset(hv_L2, 0, sizeof(hv_L2)); + + msr(VTCR_EL2, FIELD_PREP(VTCR_PS, 1) | // 64GB PA size + FIELD_PREP(VTCR_TG0, 2) | // 16KB page size + FIELD_PREP(VTCR_SH0, 3) | // PTWs Inner Sharable + FIELD_PREP(VTCR_ORGN0, 1) | // PTWs Cacheable + FIELD_PREP(VTCR_IRGN0, 1) | // PTWs Cacheable + FIELD_PREP(VTCR_SL0, 1) | // Start at level 2 + FIELD_PREP(VTCR_T0SZ, 28)); // 64GB translation region + + msr(VTTBR_EL2, hv_L2); +} + +static void hv_pt_free_l3(u64 *l3) +{ + if (!l3) + return; + + for (u64 idx = 0; idx < ENTRIES_PER_L3_TABLE; idx++) + if (IS_SW(l3[idx]) && FIELD_GET(PTE_TYPE, l3[idx]) == PTE_TABLE) + free((void *)(l3[idx] & PTE_TARGET_MASK)); + free(l3); +} + +static void hv_pt_map_l2(u64 from, u64 to, u64 size, u64 incr) +{ + assert((from & MASK(VADDR_L2_OFFSET_BITS)) == 0); + assert((IS_SW(to) || to & PTE_TARGET_MASK & MASK(VADDR_L2_OFFSET_BITS)) == 0); + assert((size & MASK(VADDR_L2_OFFSET_BITS)) == 0); + + to |= FIELD_PREP(PTE_TYPE, PTE_BLOCK); + + for (; size; size -= BIT(VADDR_L2_OFFSET_BITS)) { + u64 idx = from >> VADDR_L2_OFFSET_BITS; + + if (L2_IS_TABLE(hv_L2[idx])) + hv_pt_free_l3((u64 *)(hv_L2[idx] & PTE_TARGET_MASK)); + + hv_L2[idx] = to; + from += BIT(VADDR_L2_OFFSET_BITS); + to += incr * BIT(VADDR_L2_OFFSET_BITS); + } +} + +static u64 *hv_pt_get_l3(u64 from) +{ + u64 l2idx = from >> VADDR_L2_OFFSET_BITS; + u64 l2d = hv_L2[l2idx]; + + if (L2_IS_TABLE(l2d)) + return (u64 *)(l2d & PTE_TARGET_MASK); + + u64 *l3 = (u64 *)memalign(PAGE_SIZE, ENTRIES_PER_L3_TABLE * sizeof(u64)); + if (l2d) { + u64 incr = 0; + u64 l3d = l2d; + if (IS_HW(l2d)) { + l3d &= ~PTE_TYPE; + l3d |= FIELD_PREP(PTE_TYPE, PTE_PAGE); + incr = BIT(VADDR_L3_OFFSET_BITS); + } + for (u64 idx = 0; idx < ENTRIES_PER_L3_TABLE; idx++, l3d += incr) + l3[idx] = l3d; + } else { + memset64(l3, 0, ENTRIES_PER_L3_TABLE * sizeof(u64)); + } + + l2d = ((u64)l3) | FIELD_PREP(PTE_TYPE, PTE_TABLE) | PTE_VALID; + hv_L2[l2idx] = l2d; + return l3; +} + +static void hv_pt_map_l3(u64 from, u64 to, u64 size, u64 incr) +{ + assert((from & MASK(VADDR_L3_OFFSET_BITS)) == 0); + assert((IS_SW(to) || to & PTE_TARGET_MASK & MASK(VADDR_L3_OFFSET_BITS)) == 0); + assert((size & MASK(VADDR_L3_OFFSET_BITS)) == 0); + + if (IS_HW(to)) + to |= FIELD_PREP(PTE_TYPE, PTE_PAGE); + else + to |= FIELD_PREP(PTE_TYPE, PTE_BLOCK); + + for (; size; size -= BIT(VADDR_L3_OFFSET_BITS)) { + u64 idx = (from >> VADDR_L3_OFFSET_BITS) & MASK(VADDR_L3_INDEX_BITS); + u64 *l3 = hv_pt_get_l3(from); + + if (L3_IS_TABLE(l3[idx])) + free((void *)(l3[idx] & PTE_TARGET_MASK)); + + l3[idx] = to; + from += BIT(VADDR_L3_OFFSET_BITS); + to += incr * BIT(VADDR_L3_OFFSET_BITS); + } +} + +static u64 *hv_pt_get_l4(u64 from) +{ + u64 *l3 = hv_pt_get_l3(from); + u64 l3idx = (from >> VADDR_L3_OFFSET_BITS) & MASK(VADDR_L3_INDEX_BITS); + u64 l3d = l3[l3idx]; + + if (L3_IS_TABLE(l3d)) { + return (u64 *)(l3d & PTE_TARGET_MASK); + } + + if (IS_HW(l3d)) { + assert(FIELD_GET(PTE_TYPE, l3d) == PTE_PAGE); + l3d &= PTE_TARGET_MASK; + l3d |= FIELD_PREP(PTE_TYPE, PTE_BLOCK) | FIELD_PREP(SPTE_TYPE, SPTE_MAP); + } + + u64 *l4 = (u64 *)memalign(PAGE_SIZE, ENTRIES_PER_L4_TABLE * sizeof(u64)); + if (l3d) { + u64 incr = 0; + u64 l4d = l3d; + l4d &= ~PTE_TYPE; + l4d |= FIELD_PREP(PTE_TYPE, PTE_PAGE); + if (FIELD_GET(SPTE_TYPE, l4d) == SPTE_MAP) + incr = BIT(VADDR_L4_OFFSET_BITS); + for (u64 idx = 0; idx < ENTRIES_PER_L4_TABLE; idx++, l4d += incr) + l4[idx] = l4d; + } else { + memset64(l4, 0, ENTRIES_PER_L4_TABLE * sizeof(u64)); + } + + l3d = ((u64)l4) | FIELD_PREP(PTE_TYPE, PTE_TABLE); + l3[l3idx] = l3d; + return l4; +} + +static void hv_pt_map_l4(u64 from, u64 to, u64 size, u64 incr) +{ + assert((from & MASK(VADDR_L4_OFFSET_BITS)) == 0); + assert((size & MASK(VADDR_L4_OFFSET_BITS)) == 0); + + assert(IS_SW(to)); + + for (; size; size -= BIT(VADDR_L4_OFFSET_BITS)) { + u64 idx = (from >> VADDR_L4_OFFSET_BITS) & MASK(VADDR_L4_INDEX_BITS); + u64 *l4 = hv_pt_get_l4(from); + + l4[idx] = to; + from += BIT(VADDR_L4_OFFSET_BITS); + to += incr * BIT(VADDR_L4_OFFSET_BITS); + } +} + +int hv_map(u64 from, u64 to, u64 size, u64 incr) +{ + u64 chunk; + bool hw = IS_HW(to); + + if (from & MASK(VADDR_L4_OFFSET_BITS) || size & MASK(VADDR_L4_OFFSET_BITS)) + return -1; + + if (hw && (from & MASK(VADDR_L3_OFFSET_BITS) || size & MASK(VADDR_L3_OFFSET_BITS))) { + printf("HV: cannot use L4 pages with HW mappings (0x%lx -> 0x%lx)\n", from, to); + return -1; + } + + // L4 mappings to boundary + chunk = min(size, ALIGN_UP(from, MASK(VADDR_L3_OFFSET_BITS)) - from); + if (chunk) { + assert(!hw); + hv_pt_map_l4(from, to, chunk, incr); + from += chunk; + to += incr * chunk; + size -= chunk; + } + + // L3 mappings to boundary + chunk = ALIGN_DOWN(min(size, ALIGN_UP(from, MASK(VADDR_L2_OFFSET_BITS)) - from), + MASK(VADDR_L3_OFFSET_BITS)); + if (chunk) { + hv_pt_map_l3(from, to, chunk, incr); + from += chunk; + to += incr * chunk; + size -= chunk; + } + + // L2 mappings + chunk = ALIGN_DOWN(size, MASK(VADDR_L3_OFFSET_BITS)); + if (chunk && (!hw || (to & VADDR_L2_ALIGN_MASK) == 0)) { + hv_pt_map_l2(from, to, chunk, incr); + from += chunk; + to += incr * chunk; + size -= chunk; + } + + // L3 mappings to end + chunk = ALIGN_DOWN(size, MASK(VADDR_L3_OFFSET_BITS)); + if (chunk) { + hv_pt_map_l3(from, to, chunk, incr); + from += chunk; + to += incr * chunk; + size -= chunk; + } + + // L4 mappings to end + if (size) { + assert(!hw); + hv_pt_map_l4(from, to, size, incr); + } + + return 0; +} + +int hv_unmap(u64 from, u64 size) +{ + return hv_map(from, 0, size, 0); +} + +int hv_map_hw(u64 from, u64 to, u64 size) +{ + return hv_map(from, to | PTE_ATTRIBUTES | PTE_VALID, size, 1); +} + +int hv_map_sw(u64 from, u64 to, u64 size) +{ + return hv_map(from, to | FIELD_PREP(SPTE_TYPE, SPTE_MAP), size, 1); +} + +int hv_map_hook(u64 from, void *hook, u64 size) +{ + return hv_map(from, ((u64)hook) | FIELD_PREP(SPTE_TYPE, SPTE_HOOK), size, 0); +} + +void hv_init(void) +{ + // Enable physical timer for EL1 + msr(CNTHCTL_EL2, CNTHCTL_EL1PCTEN); + + hv_pt_init(); + + // Configure hypervisor defaults + msr(HCR_EL2, HCR_API | // Allow PAuth instructions + HCR_APK | // Allow PAuth key registers + HCR_TEA | // Trap external aborts + HCR_E2H | // VHE mode (forced) + HCR_RW | // AArch64 guest + HCR_AMO | // Trap SError exceptions + HCR_VM); // Enable stage 2 translation + + sysop("dsb ishst"); + sysop("tlbi alle1is"); + sysop("dsb ish"); + sysop("isb"); +} diff --git a/src/hv.h b/src/hv.h new file mode 100644 index 00000000..64a43652 --- /dev/null +++ b/src/hv.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef HV_H +#define HV_H + +#include "types.h" + +void hv_init(void); +int hv_map(u64 from, u64 to, u64 size, u64 incr); +int hv_unmap(u64 from, u64 size); +int hv_map_hw(u64 from, u64 to, u64 size); +int hv_map_sw(u64 from, u64 to, u64 size); +int hv_map_hook(u64 from, void *hook, u64 size); +void hv_start(void *entry, u64 regs[4]); + +#endif diff --git a/src/proxy.c b/src/proxy.c index 4f8c841b..1206bf53 100644 --- a/src/proxy.c +++ b/src/proxy.c @@ -4,6 +4,7 @@ #include "dart.h" #include "exception.h" #include "heapblock.h" +#include "hv.h" #include "iodev.h" #include "kboot.h" #include "malloc.h" @@ -374,6 +375,13 @@ int proxy_process(ProxyRequest *request, ProxyReply *reply) dart_unmap((dart_dev_t *)request->args[0], request->args[1], request->args[2]); break; + case P_HV_INIT: + hv_init(); + break; + case P_HV_MAP: + hv_map(request->args[0], request->args[1], request->args[2], request->args[3]); + break; + default: reply->status = S_BADCMD; break; diff --git a/src/proxy.h b/src/proxy.h index 2a2e632d..68edfdb3 100644 --- a/src/proxy.h +++ b/src/proxy.h @@ -103,6 +103,9 @@ typedef enum { P_DART_MAP, P_DART_UNMAP, + P_HV_INIT = 0xc00, + P_HV_MAP = 0xc01, + } ProxyOp; #define S_OK 0