m1n1.hw.uat: Move handoff code to m1n1.fw.agx.handoff

Signed-off-by: Asahi Lina <lina@asahilina.net>
This commit is contained in:
Asahi Lina 2022-08-17 14:06:00 +09:00
parent ec3fcfc562
commit 2b6996db63
2 changed files with 144 additions and 58 deletions

View file

@ -0,0 +1,120 @@
# SPDX-License-Identifier: MIT
from ...utils import *
from contextlib import contextmanager
PPL_MAGIC = 0x4b1d000000000002
class GFXHandoffStruct(RegMap):
MAGIC_AP = 0x0, Register64
MAGIC_FW = 0x8, Register64
LOCK_AP = 0x10, Register8
LOCK_FW = 0x11, Register8
TURN = 0x14, Register32
UNK = 0x18, Register32
FLUSH_STATE = irange(0x20, 0x41, 0x18), Register32
FLUSH_ADDR = irange(0x28, 0x41, 0x18), Register32
FLUSH_SIZE = irange(0x30, 0x41, 0x18), Register32
UNK2 = 0x638, Register8
UNK3 = 0x640, Register64
class GFXHandoff:
def __init__(self, u):
self.u = u
self.sgx_dev = self.u.adt["/arm-io/sgx"]
self.base = self.sgx_dev.gfx_handoff_base
self.reg = GFXHandoffStruct(u, self.base)
self.is_locked = False
self.initialized = False
@contextmanager
def lock(self):
"""Dekker's algorithm lock"""
assert not self.is_locked
# Note: This *absolutely* needs barriers everywhere.
# Those are implicit in proxyclient for every operation.
self.reg.LOCK_AP.val = 1
while self.reg.LOCK_FW.val != 0:
if self.reg.TURN != 0:
self.reg.LOCK_AP = 0
while self.reg.TURN != 0:
pass
self.reg.LOCK_AP = 1
self.is_locked = True
try:
yield
finally:
self.reg.TURN.val = 1
self.reg.LOCK_AP.val = 0
self.is_locked = False
def initialize(self):
if self.initialized:
return
print("[Handoff] Initializing...")
self.reg.MAGIC_AP.val = PPL_MAGIC
self.reg.UNK = 0xffffffff
self.reg.UNK3 = 0
with self.lock():
print("[Handoff] Waiting for FW PPL init...")
while self.reg.MAGIC_FW.val != PPL_MAGIC:
pass
for i in range(0x41):
self.reg.FLUSH_STATE[i].val = 0
self.reg.FLUSH_ADDR[i].val = 0
self.reg.FLUSH_SIZE[i].val = 0
self.initialized = True
print("[Handoff] Initialized!")
# The order here is:
# - Remap memory as shared
# - TLBI
# - prepare_cacheflush()
# - issue FWCtl request
# - wait for completion (ring or wait_cacheflush?)
# - Unmap memory
# - TLBI
# - complete_cacheflush()
def prepare_cacheflush(base, size, context=0x40):
assert self.reg.FLUSH_STATE[context].val == 0
self.reg.FLUSH_ADDR[context].val = base
self.reg.FLUSH_SIZE[context].val = size
self.reg.FLUSH_STATE[context].val = 1
def wait_cacheflush(context=0x40):
while self.reg.FLUSH_STATE[context].val == 1:
pass
def complete_cacheflush(context=0x40):
assert self.reg.FLUSH_STATE[context].val == 2
self.reg.FLUSH_STATE[context].val = 0
# probably not necessary?
# order is:
# - Remap memory as shared
# - (no TLBI?)
# - prepare_unmap()
# - unmap
# - TLBI
# - complete_unmap()
def prepare_unmap(base, size, context):
assert self.reg.FLUSH_STATE[context].val == 0
self.reg.FLUSH_ADDR[context].val = 0xdead000000000000 | (base & 0xffffffffffff)
self.reg.FLUSH_SIZE[context].val = size
self.reg.FLUSH_STATE[context].val = 2
def complete_unmap(context):
assert self.reg.FLUSH_STATE[context].val == 2
self.reg.FLUSH_STATE[context].val = 0

View file

@ -9,6 +9,7 @@
import struct
from ..fw.agx.handoff import GFXHandoff
from ..utils import *
from ..malloc import Heap
from enum import IntEnum
@ -20,6 +21,11 @@ class MemoryAttr(IntEnum):
Normal = 0 # Only accessed by the gfx-asc coprocessor
Device = 1
Shared = 2 # Probally Outer-shareable. Shared with either the main cpu or AGX hardware
UNK3 = 3
UNK4 = 4
UNK5 = 5
UNK6 = 6
UNK7 = 7
class TTBR(Register64):
@ -206,6 +212,8 @@ class UatStream(Reloadable):
class UAT(Reloadable):
NUM_CONTEXTS = 64
PAGE_BITS = 14
PAGE_SIZE = 1 << PAGE_BITS
@ -242,17 +250,13 @@ class UAT(Reloadable):
self.gpu_region = self.sgx_dev.gpu_region_base
self.ttbr0_base = self.u.memalign(self.PAGE_SIZE, self.PAGE_SIZE)
self.ttbr1_base = self.sgx_dev.gfx_shared_region_base
self.handoff = GFXHandoff(self.u)
self.VA_MASK = 0
for (off, size, _) in self.LEVELS:
self.VA_MASK |= (size - 1) << off
self.VA_MASK |= self.PAGE_SIZE - 1
def early_init(self):
# Unknown init (needed?)
self.sgx_base = self.sgx_dev.get_reg(0)[0]
self.p.read32(self.sgx_base + 0xd14000)
self.p.write32(self.sgx_base + 0xd14000, 0x70001)
def set_l0(self, ctx, off, base, asid=0):
ttbr = TTBR(BADDR = base >> 1, ASID = asid, VALID=(base != 0))
@ -315,7 +319,7 @@ class UAT(Reloadable):
if iova & (self.PAGE_SIZE - 1):
raise Exception(f"Unaligned IOVA {iova:#x}")
self.init_handoff()
self.init()
map_flags = {'OS': 1, 'AttrIndex': MemoryAttr.Normal, 'VALID': 1, 'TYPE': 1, 'AP': 1, 'AF': 1, 'UXN': 1}
map_flags.update(flags)
@ -366,7 +370,6 @@ class UAT(Reloadable):
table[idx] = pte.value
self.dirty.add(offset)
def iotranslate(self, ctx, start, size):
if size == 0:
return []
@ -476,49 +479,21 @@ class UAT(Reloadable):
def foreach_table(self, ctx, table_fn):
self.recurse_level(0, 0, self.gpu_region + ctx * 16, table_fn=table_fn)
def init_handoff(self):
def init(self):
if self.initialized:
return
print("[UAT] Initializing...")
self.handoff.initialize()
MAGIC = 0x4b1d000000000002
with self.handoff.lock():
print(f"[UAT] TTBR0[0] = {self.ttbr0_base:#x}")
print(f"[UAT] TTBR1[0] = {self.ttbr1_base:#x}")
self.set_l0(0, 0, self.ttbr0_base)
self.set_l0(0, 1, self.ttbr1_base)
self.flush_dirty()
self.invalidate_cache()
self.p.write64(self.handoff + 0, MAGIC)
self.p.write32(self.handoff + 0x18, 0xffffffff)
self.p.write64(self.handoff + 0x640, 0)
self.p.write8(self.handoff + 0x10, 1)
assert self.p.read8(self.handoff + 0x11) == 0
print("[UAT] Waiting for handoff...")
while self.p.read64(self.handoff + 0x8) != MAGIC:
pass
self.p.write32(self.handoff + 0x14, 1)
self.p.write8(self.handoff + 0x10, 0)
for i in range(0x20, 0x640, 0x18):
self.p.write32(self.handoff + i, 0)
self.p.write64(self.handoff + i + 0x28, 0)
self.p.write64(self.handoff + i + 0x30, 0)
self.p.write8(self.handoff + 0x10, 1)
assert self.p.read8(self.handoff + 0x11) == 0
# read TTBRs here
self.p.write32(self.handoff + 0x14, 1)
self.p.write8(self.handoff + 0x10, 0)
self.p.write8(self.handoff + 0x10, 1)
assert self.p.read8(self.handoff + 0x11) == 0
print(f"[UAT] TTBR0[0] = {self.ttbr0_base:#x}")
print(f"[UAT] TTBR1[0] = {self.ttbr1_base:#x}")
self.set_l0(0, 0, self.ttbr0_base)
self.set_l0(0, 1, self.ttbr1_base)
self.flush_dirty()
self.invalidate_cache()
self.p.write32(self.handoff + 0x14, 1)
self.p.write8(self.handoff + 0x10, 0)
print("[UAT] Init complete")
self.initialized = True
@ -526,20 +501,11 @@ class UAT(Reloadable):
def bind_context(self, ctx, ttbr0_base):
assert ctx != 0
self.p.write8(self.handoff + 0x10, 1)
assert self.p.read8(self.handoff + 0x11) == 0
# read TTBRs here
self.p.write32(self.handoff + 0x14, 1)
self.p.write8(self.handoff + 0x10, 0)
self.p.write8(self.handoff + 0x10, 1)
assert self.p.read8(self.handoff + 0x11) == 0
self.set_l0(ctx, 0, ttbr0_base, ctx)
self.set_l0(ctx, 1, self.ttbr1_base, ctx)
self.flush_dirty()
self.invalidate_cache()
self.p.write32(self.handoff + 0x14, 1)
self.p.write8(self.handoff + 0x10, 0)
with self.handoff.lock():
self.set_l0(ctx, 0, ttbr0_base, ctx)
self.set_l0(ctx, 1, self.ttbr1_base, ctx)
self.flush_dirty()
self.invalidate_cache()
def dump(self, ctx, log=print):
def print_fn(start, end, i, pte, level, sparse):