hv: Beginnings of a hypervisor

Signed-off-by: Hector Martin <marcan@marcan.st>
This commit is contained in:
Hector Martin 2021-05-01 19:05:21 +09:00
parent 1ff5b82aab
commit 1ae60ad715
9 changed files with 498 additions and 0 deletions

View file

@ -43,6 +43,7 @@ OBJECTS := \
exception.o exception_asm.o \
fb.o font.o font_retina.o \
heapblock.o \
hv.o \
iodev.o \
kboot.o \
main.o \

View file

@ -418,6 +418,9 @@ class M1N1Proxy:
P_DART_MAP = 0xb02
P_DART_UNMAP = 0xb03
P_HV_INIT = 0xc00
P_HV_MAP = 0xc01
def __init__(self, iface, debug=False):
self.debug = debug
self.iface = iface
@ -733,6 +736,11 @@ class M1N1Proxy:
def dart_unmap(self, dart, iova, len):
return self.request(self.P_DART_UNMAP, dart, iova, len)
def hv_init(self):
return self.request(self.P_HV_INIT)
def hv_map(self, from_, to, size, incr):
return self.request(self.P_HV_MAP, from_, to, size, incr)
if __name__ == "__main__":
import serial
uartdev = os.environ.get("M1N1DEVICE", "/dev/ttyUSB0")

23
proxyclient/run_guest.py Executable file
View file

@ -0,0 +1,23 @@
#!/usr/bin/env python3
import argparse, pathlib
parser = argparse.ArgumentParser(description='Run a Mach-O payload under the hypervisor')
parser.add_argument('payload', type=pathlib.Path)
args = parser.parse_args()
from proxy import *
from proxyutils import *
from utils import *
from hv import HV
iface = UartInterface()
p = M1N1Proxy(iface, debug=False)
bootstrap_port(iface, p)
u = ProxyUtils(p, heap_size = 128 * 1024 * 1024)
hv = HV(iface, p, u)
hv.init()
hv.load_macho(args.payload.read_bytes())
hv.start()

106
src/arm_cpu_regs.h Normal file
View file

@ -0,0 +1,106 @@
/* SPDX-License-Identifier: MIT */
#include "utils.h"
#define SYS_CNTHCTL_EL2 sys_reg(3, 4, 14, 1, 0)
// HCR_EL2.E2H == 1
#define CNTHCTL_EVNTIS BIT(17)
#define CNTHCTL_EL1NVVCT BIT(16)
#define CNTHCTL_EL1NVPCT BIT(15)
#define CNTHCTL_EL1TVCT BIT(14)
#define CNTHCTL_EL1TVT BIT(13)
#define CNTHCTL_ECV BIT(12)
#define CNTHCTL_EL1PTEN BIT(11)
#define CNTHCTL_EL1PCTEN BIT(10)
#define CNTHCTL_EL0PTEN BIT(9)
#define CNTHCTL_EL0VTEN BIT(8)
#define CNTHCTL_EVNTI GENMASK(7, 4)
#define CNTHCTL_EVNTDIR BIT(3)
#define CNTHCTL_EVNTEN BIT(2)
#define CNTHCTL_EL0VCTEN BIT(1)
#define CNTHCTL_EL0PCTEN BIT(0)
#define SYS_HCR_EL2 sys_reg(3, 4, 1, 1, 0)
#define HCR_TWEDEL GENMASK(63, 60)
#define HCR_TWEDEn BIT(59)
#define HCR_TID5 BIT(58)
#define HCR_DCT BIT(57)
#define HCR_ATA BIT(56)
#define HCR_TTLBOS BIT(55)
#define HCR_TTLBIS BIT(54)
#define HCR_EnSCXT BIT(53)
#define HCR_TOCU BIT(52)
#define HCR_AMVOFFEN BIT(51)
#define HCR_TICAB BIT(50)
#define HCR_TID4 BIT(49)
#define HCR_FIEN BIT(47)
#define HCR_FWB BIT(46)
#define HCR_NV2 BIT(45)
#define HCR_AT BIT(44)
#define HCR_NV1 BIT(43)
#define HCR_NV1 BIT(43)
#define HCR_NV BIT(42)
#define HCR_NV BIT(42)
#define HCR_API BIT(41)
#define HCR_APK BIT(40)
#define HCR_MIOCNCE BIT(38)
#define HCR_TEA BIT(37)
#define HCR_TERR BIT(36)
#define HCR_TLOR BIT(35)
#define HCR_E2H BIT(34)
#define HCR_ID BIT(33)
#define HCR_CD BIT(32)
#define HCR_RW BIT(31)
#define HCR_TRVM BIT(30)
#define HCR_HCD BIT(29)
#define HCR_TDZ BIT(28)
#define HCR_TGE BIT(27)
#define HCR_TVM BIT(26)
#define HCR_TTLB BIT(25)
#define HCR_TPU BIT(24)
#define HCR_TPCP BIT(23)
#define HCR_TPC BIT(23)
#define HCR_TSW BIT(22)
#define HCR_TACR BIT(21)
#define HCR_TIDCP BIT(20)
#define HCR_TSC BIT(19)
#define HCR_TID3 BIT(18)
#define HCR_TID2 BIT(17)
#define HCR_TID1 BIT(16)
#define HCR_TID0 BIT(15)
#define HCR_TWE BIT(14)
#define HCR_TWI BIT(13)
#define HCR_DC BIT(12)
#define HCR_BSU GENMASK(11, 10)
#define HCR_FB BIT(9)
#define HCR_VSE BIT(8)
#define HCR_VI BIT(7)
#define HCR_VF BIT(6)
#define HCR_AMO BIT(5)
#define HCR_IMO BIT(4)
#define HCR_FMO BIT(3)
#define HCR_PTW BIT(2)
#define HCR_SWIO BIT(1)
#define HCR_VM BIT(0)
#define SYS_VTCR_EL2 sys_reg(3, 4, 2, 1, 2)
// Profile(A)
#define VTCR_SL2 BIT(33)
#define VTCR_DS BIT(32)
#define VTCR_NSA BIT(30)
#define VTCR_NSW BIT(29)
#define VTCR_HWU62 BIT(28)
#define VTCR_HWU61 BIT(27)
#define VTCR_HWU60 BIT(26)
#define VTCR_HWU59 BIT(25)
#define VTCR_HD BIT(22)
#define VTCR_HA BIT(21)
#define VTCR_VS BIT(19)
#define VTCR_PS GENMASK(18, 16)
#define VTCR_TG0 GENMASK(15, 14)
#define VTCR_SH0 GENMASK(13, 12)
#define VTCR_ORGN0 GENMASK(11, 10)
#define VTCR_IRGN0 GENMASK(9, 8)
#define VTCR_SL0 GENMASK(7, 6)
#define VTCR_SL0 GENMASK(7, 6)
#define VTCR_T0SZ GENMASK(5, 0)

View file

@ -1,5 +1,6 @@
/* SPDX-License-Identifier: MIT */
#include "arm_cpu_regs.h"
#include "utils.h"
/* HID registers */

332
src/hv.c Normal file
View file

@ -0,0 +1,332 @@
/* SPDX-License-Identifier: MIT */
#include "hv.h"
#include "assert.h"
#include "cpu_regs.h"
#include "malloc.h"
#include "string.h"
#include "types.h"
#include "utils.h"
#define PAGE_SIZE 0x4000
#define CACHE_LINE_SIZE 64
#define PTE_ACCESS BIT(10)
#define PTE_SH_NS (0b11L << 8)
#define PTE_S2AP_RW (0b11L << 6)
#define PTE_MEMATTR_UNCHANGED (0b1111L << 2)
#define PTE_ATTRIBUTES (PTE_ACCESS | PTE_SH_NS | PTE_S2AP_RW | PTE_MEMATTR_UNCHANGED)
#define PTE_VALID BIT(0)
#define PTE_TYPE BIT(1)
#define PTE_BLOCK 0
#define PTE_TABLE 1
#define PTE_PAGE 1
#define VADDR_L4_INDEX_BITS 12
#define VADDR_L3_INDEX_BITS 11
#define VADDR_L2_INDEX_BITS 11
#define VADDR_L4_OFFSET_BITS 2
#define VADDR_L3_OFFSET_BITS 14
#define VADDR_L2_OFFSET_BITS 25
#define VADDR_L2_ALIGN_MASK GENMASK(VADDR_L2_OFFSET_BITS - 1, VADDR_L3_OFFSET_BITS)
#define PTE_TARGET_MASK GENMASK(49, 14)
#define ENTRIES_PER_L2_TABLE BIT(VADDR_L2_INDEX_BITS)
#define ENTRIES_PER_L3_TABLE BIT(VADDR_L3_INDEX_BITS)
#define ENTRIES_PER_L4_TABLE BIT(VADDR_L4_INDEX_BITS)
#define SPTE_TYPE BIT(48)
#define SPTE_MAP 0
#define SPTE_HOOK 1
#define IS_HW(pte) (pte && pte & PTE_VALID)
#define IS_SW(pte) (pte && !(pte & PTE_VALID))
#define L2_IS_TABLE(pte) ((pte) && FIELD_GET(PTE_TYPE, pte) == PTE_TABLE)
#define L2_IS_NOT_TABLE(pte) ((pte) && !L2_IS_TABLE(pte))
#define L3_IS_TABLE(pte) (IS_SW(pte) && FIELD_GET(PTE_TYPE, pte) == PTE_TABLE)
#define L3_IS_NOT_TABLE(pte) ((pte) && !L3_IS_TABLE(pte))
/*
* We use 16KB page tables for stage 2 translation, and a 64GB (36-bit) guest
* PA size, which results in the following virtual address space:
*
* [L2 index] [L3 index] [page offset]
* 11 bits 11 bits 14 bits
*
* 32MB L2 mappings look like this:
* [L2 index] [page offset]
* 11 bits 25 bits
*
* We implement sub-page granularity mappings for software MMIO hooks, which behave
* as an additional page table level used only by software. This works like this:
*
* [L2 index] [L3 index] [L4 index] [Word offset]
* 11 bits 11 bits 12 bits 2 bits
*
* Thus, L4 sub-page tables are twice the size.
*
* We use invalid mappings (PTE_VALID == 0) to represent mmiotrace descriptors, but
* otherwise the page table format is the same. The PTE_TYPE bit is weird, as 0 means
* block but 1 means both table (at L<3) and page (at L3). For mmiotrace, this is
* pushed to L4.
*/
static u64 hv_L2[ENTRIES_PER_L2_TABLE] ALIGNED(PAGE_SIZE);
;
static void hv_pt_init(void)
{
memset(hv_L2, 0, sizeof(hv_L2));
msr(VTCR_EL2, FIELD_PREP(VTCR_PS, 1) | // 64GB PA size
FIELD_PREP(VTCR_TG0, 2) | // 16KB page size
FIELD_PREP(VTCR_SH0, 3) | // PTWs Inner Sharable
FIELD_PREP(VTCR_ORGN0, 1) | // PTWs Cacheable
FIELD_PREP(VTCR_IRGN0, 1) | // PTWs Cacheable
FIELD_PREP(VTCR_SL0, 1) | // Start at level 2
FIELD_PREP(VTCR_T0SZ, 28)); // 64GB translation region
msr(VTTBR_EL2, hv_L2);
}
static void hv_pt_free_l3(u64 *l3)
{
if (!l3)
return;
for (u64 idx = 0; idx < ENTRIES_PER_L3_TABLE; idx++)
if (IS_SW(l3[idx]) && FIELD_GET(PTE_TYPE, l3[idx]) == PTE_TABLE)
free((void *)(l3[idx] & PTE_TARGET_MASK));
free(l3);
}
static void hv_pt_map_l2(u64 from, u64 to, u64 size, u64 incr)
{
assert((from & MASK(VADDR_L2_OFFSET_BITS)) == 0);
assert((IS_SW(to) || to & PTE_TARGET_MASK & MASK(VADDR_L2_OFFSET_BITS)) == 0);
assert((size & MASK(VADDR_L2_OFFSET_BITS)) == 0);
to |= FIELD_PREP(PTE_TYPE, PTE_BLOCK);
for (; size; size -= BIT(VADDR_L2_OFFSET_BITS)) {
u64 idx = from >> VADDR_L2_OFFSET_BITS;
if (L2_IS_TABLE(hv_L2[idx]))
hv_pt_free_l3((u64 *)(hv_L2[idx] & PTE_TARGET_MASK));
hv_L2[idx] = to;
from += BIT(VADDR_L2_OFFSET_BITS);
to += incr * BIT(VADDR_L2_OFFSET_BITS);
}
}
static u64 *hv_pt_get_l3(u64 from)
{
u64 l2idx = from >> VADDR_L2_OFFSET_BITS;
u64 l2d = hv_L2[l2idx];
if (L2_IS_TABLE(l2d))
return (u64 *)(l2d & PTE_TARGET_MASK);
u64 *l3 = (u64 *)memalign(PAGE_SIZE, ENTRIES_PER_L3_TABLE * sizeof(u64));
if (l2d) {
u64 incr = 0;
u64 l3d = l2d;
if (IS_HW(l2d)) {
l3d &= ~PTE_TYPE;
l3d |= FIELD_PREP(PTE_TYPE, PTE_PAGE);
incr = BIT(VADDR_L3_OFFSET_BITS);
}
for (u64 idx = 0; idx < ENTRIES_PER_L3_TABLE; idx++, l3d += incr)
l3[idx] = l3d;
} else {
memset64(l3, 0, ENTRIES_PER_L3_TABLE * sizeof(u64));
}
l2d = ((u64)l3) | FIELD_PREP(PTE_TYPE, PTE_TABLE) | PTE_VALID;
hv_L2[l2idx] = l2d;
return l3;
}
static void hv_pt_map_l3(u64 from, u64 to, u64 size, u64 incr)
{
assert((from & MASK(VADDR_L3_OFFSET_BITS)) == 0);
assert((IS_SW(to) || to & PTE_TARGET_MASK & MASK(VADDR_L3_OFFSET_BITS)) == 0);
assert((size & MASK(VADDR_L3_OFFSET_BITS)) == 0);
if (IS_HW(to))
to |= FIELD_PREP(PTE_TYPE, PTE_PAGE);
else
to |= FIELD_PREP(PTE_TYPE, PTE_BLOCK);
for (; size; size -= BIT(VADDR_L3_OFFSET_BITS)) {
u64 idx = (from >> VADDR_L3_OFFSET_BITS) & MASK(VADDR_L3_INDEX_BITS);
u64 *l3 = hv_pt_get_l3(from);
if (L3_IS_TABLE(l3[idx]))
free((void *)(l3[idx] & PTE_TARGET_MASK));
l3[idx] = to;
from += BIT(VADDR_L3_OFFSET_BITS);
to += incr * BIT(VADDR_L3_OFFSET_BITS);
}
}
static u64 *hv_pt_get_l4(u64 from)
{
u64 *l3 = hv_pt_get_l3(from);
u64 l3idx = (from >> VADDR_L3_OFFSET_BITS) & MASK(VADDR_L3_INDEX_BITS);
u64 l3d = l3[l3idx];
if (L3_IS_TABLE(l3d)) {
return (u64 *)(l3d & PTE_TARGET_MASK);
}
if (IS_HW(l3d)) {
assert(FIELD_GET(PTE_TYPE, l3d) == PTE_PAGE);
l3d &= PTE_TARGET_MASK;
l3d |= FIELD_PREP(PTE_TYPE, PTE_BLOCK) | FIELD_PREP(SPTE_TYPE, SPTE_MAP);
}
u64 *l4 = (u64 *)memalign(PAGE_SIZE, ENTRIES_PER_L4_TABLE * sizeof(u64));
if (l3d) {
u64 incr = 0;
u64 l4d = l3d;
l4d &= ~PTE_TYPE;
l4d |= FIELD_PREP(PTE_TYPE, PTE_PAGE);
if (FIELD_GET(SPTE_TYPE, l4d) == SPTE_MAP)
incr = BIT(VADDR_L4_OFFSET_BITS);
for (u64 idx = 0; idx < ENTRIES_PER_L4_TABLE; idx++, l4d += incr)
l4[idx] = l4d;
} else {
memset64(l4, 0, ENTRIES_PER_L4_TABLE * sizeof(u64));
}
l3d = ((u64)l4) | FIELD_PREP(PTE_TYPE, PTE_TABLE);
l3[l3idx] = l3d;
return l4;
}
static void hv_pt_map_l4(u64 from, u64 to, u64 size, u64 incr)
{
assert((from & MASK(VADDR_L4_OFFSET_BITS)) == 0);
assert((size & MASK(VADDR_L4_OFFSET_BITS)) == 0);
assert(IS_SW(to));
for (; size; size -= BIT(VADDR_L4_OFFSET_BITS)) {
u64 idx = (from >> VADDR_L4_OFFSET_BITS) & MASK(VADDR_L4_INDEX_BITS);
u64 *l4 = hv_pt_get_l4(from);
l4[idx] = to;
from += BIT(VADDR_L4_OFFSET_BITS);
to += incr * BIT(VADDR_L4_OFFSET_BITS);
}
}
int hv_map(u64 from, u64 to, u64 size, u64 incr)
{
u64 chunk;
bool hw = IS_HW(to);
if (from & MASK(VADDR_L4_OFFSET_BITS) || size & MASK(VADDR_L4_OFFSET_BITS))
return -1;
if (hw && (from & MASK(VADDR_L3_OFFSET_BITS) || size & MASK(VADDR_L3_OFFSET_BITS))) {
printf("HV: cannot use L4 pages with HW mappings (0x%lx -> 0x%lx)\n", from, to);
return -1;
}
// L4 mappings to boundary
chunk = min(size, ALIGN_UP(from, MASK(VADDR_L3_OFFSET_BITS)) - from);
if (chunk) {
assert(!hw);
hv_pt_map_l4(from, to, chunk, incr);
from += chunk;
to += incr * chunk;
size -= chunk;
}
// L3 mappings to boundary
chunk = ALIGN_DOWN(min(size, ALIGN_UP(from, MASK(VADDR_L2_OFFSET_BITS)) - from),
MASK(VADDR_L3_OFFSET_BITS));
if (chunk) {
hv_pt_map_l3(from, to, chunk, incr);
from += chunk;
to += incr * chunk;
size -= chunk;
}
// L2 mappings
chunk = ALIGN_DOWN(size, MASK(VADDR_L3_OFFSET_BITS));
if (chunk && (!hw || (to & VADDR_L2_ALIGN_MASK) == 0)) {
hv_pt_map_l2(from, to, chunk, incr);
from += chunk;
to += incr * chunk;
size -= chunk;
}
// L3 mappings to end
chunk = ALIGN_DOWN(size, MASK(VADDR_L3_OFFSET_BITS));
if (chunk) {
hv_pt_map_l3(from, to, chunk, incr);
from += chunk;
to += incr * chunk;
size -= chunk;
}
// L4 mappings to end
if (size) {
assert(!hw);
hv_pt_map_l4(from, to, size, incr);
}
return 0;
}
int hv_unmap(u64 from, u64 size)
{
return hv_map(from, 0, size, 0);
}
int hv_map_hw(u64 from, u64 to, u64 size)
{
return hv_map(from, to | PTE_ATTRIBUTES | PTE_VALID, size, 1);
}
int hv_map_sw(u64 from, u64 to, u64 size)
{
return hv_map(from, to | FIELD_PREP(SPTE_TYPE, SPTE_MAP), size, 1);
}
int hv_map_hook(u64 from, void *hook, u64 size)
{
return hv_map(from, ((u64)hook) | FIELD_PREP(SPTE_TYPE, SPTE_HOOK), size, 0);
}
void hv_init(void)
{
// Enable physical timer for EL1
msr(CNTHCTL_EL2, CNTHCTL_EL1PCTEN);
hv_pt_init();
// Configure hypervisor defaults
msr(HCR_EL2, HCR_API | // Allow PAuth instructions
HCR_APK | // Allow PAuth key registers
HCR_TEA | // Trap external aborts
HCR_E2H | // VHE mode (forced)
HCR_RW | // AArch64 guest
HCR_AMO | // Trap SError exceptions
HCR_VM); // Enable stage 2 translation
sysop("dsb ishst");
sysop("tlbi alle1is");
sysop("dsb ish");
sysop("isb");
}

16
src/hv.h Normal file
View file

@ -0,0 +1,16 @@
/* SPDX-License-Identifier: MIT */
#ifndef HV_H
#define HV_H
#include "types.h"
void hv_init(void);
int hv_map(u64 from, u64 to, u64 size, u64 incr);
int hv_unmap(u64 from, u64 size);
int hv_map_hw(u64 from, u64 to, u64 size);
int hv_map_sw(u64 from, u64 to, u64 size);
int hv_map_hook(u64 from, void *hook, u64 size);
void hv_start(void *entry, u64 regs[4]);
#endif

View file

@ -4,6 +4,7 @@
#include "dart.h"
#include "exception.h"
#include "heapblock.h"
#include "hv.h"
#include "iodev.h"
#include "kboot.h"
#include "malloc.h"
@ -374,6 +375,13 @@ int proxy_process(ProxyRequest *request, ProxyReply *reply)
dart_unmap((dart_dev_t *)request->args[0], request->args[1], request->args[2]);
break;
case P_HV_INIT:
hv_init();
break;
case P_HV_MAP:
hv_map(request->args[0], request->args[1], request->args[2], request->args[3]);
break;
default:
reply->status = S_BADCMD;
break;

View file

@ -103,6 +103,9 @@ typedef enum {
P_DART_MAP,
P_DART_UNMAP,
P_HV_INIT = 0xc00,
P_HV_MAP = 0xc01,
} ProxyOp;
#define S_OK 0