mirror of
https://github.com/AsahiLinux/m1n1
synced 2024-11-11 02:04:11 +00:00
m1n1.hw.uat: Move handoff code to m1n1.fw.agx.handoff
Signed-off-by: Asahi Lina <lina@asahilina.net>
This commit is contained in:
parent
ec3fcfc562
commit
2b6996db63
2 changed files with 144 additions and 58 deletions
120
proxyclient/m1n1/fw/agx/handoff.py
Normal file
120
proxyclient/m1n1/fw/agx/handoff.py
Normal file
|
@ -0,0 +1,120 @@
|
|||
# SPDX-License-Identifier: MIT
|
||||
from ...utils import *
|
||||
from contextlib import contextmanager
|
||||
|
||||
PPL_MAGIC = 0x4b1d000000000002
|
||||
|
||||
class GFXHandoffStruct(RegMap):
|
||||
MAGIC_AP = 0x0, Register64
|
||||
MAGIC_FW = 0x8, Register64
|
||||
|
||||
LOCK_AP = 0x10, Register8
|
||||
LOCK_FW = 0x11, Register8
|
||||
TURN = 0x14, Register32
|
||||
|
||||
UNK = 0x18, Register32
|
||||
|
||||
FLUSH_STATE = irange(0x20, 0x41, 0x18), Register32
|
||||
FLUSH_ADDR = irange(0x28, 0x41, 0x18), Register32
|
||||
FLUSH_SIZE = irange(0x30, 0x41, 0x18), Register32
|
||||
|
||||
UNK2 = 0x638, Register8
|
||||
UNK3 = 0x640, Register64
|
||||
|
||||
class GFXHandoff:
|
||||
def __init__(self, u):
|
||||
self.u = u
|
||||
self.sgx_dev = self.u.adt["/arm-io/sgx"]
|
||||
self.base = self.sgx_dev.gfx_handoff_base
|
||||
self.reg = GFXHandoffStruct(u, self.base)
|
||||
self.is_locked = False
|
||||
self.initialized = False
|
||||
|
||||
@contextmanager
|
||||
def lock(self):
|
||||
"""Dekker's algorithm lock"""
|
||||
assert not self.is_locked
|
||||
|
||||
# Note: This *absolutely* needs barriers everywhere.
|
||||
# Those are implicit in proxyclient for every operation.
|
||||
|
||||
self.reg.LOCK_AP.val = 1
|
||||
while self.reg.LOCK_FW.val != 0:
|
||||
if self.reg.TURN != 0:
|
||||
self.reg.LOCK_AP = 0
|
||||
while self.reg.TURN != 0:
|
||||
pass
|
||||
self.reg.LOCK_AP = 1
|
||||
|
||||
self.is_locked = True
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
self.reg.TURN.val = 1
|
||||
self.reg.LOCK_AP.val = 0
|
||||
self.is_locked = False
|
||||
|
||||
def initialize(self):
|
||||
if self.initialized:
|
||||
return
|
||||
|
||||
print("[Handoff] Initializing...")
|
||||
|
||||
self.reg.MAGIC_AP.val = PPL_MAGIC
|
||||
self.reg.UNK = 0xffffffff
|
||||
self.reg.UNK3 = 0
|
||||
|
||||
with self.lock():
|
||||
print("[Handoff] Waiting for FW PPL init...")
|
||||
while self.reg.MAGIC_FW.val != PPL_MAGIC:
|
||||
pass
|
||||
|
||||
for i in range(0x41):
|
||||
self.reg.FLUSH_STATE[i].val = 0
|
||||
self.reg.FLUSH_ADDR[i].val = 0
|
||||
self.reg.FLUSH_SIZE[i].val = 0
|
||||
|
||||
self.initialized = True
|
||||
print("[Handoff] Initialized!")
|
||||
|
||||
# The order here is:
|
||||
# - Remap memory as shared
|
||||
# - TLBI
|
||||
# - prepare_cacheflush()
|
||||
# - issue FWCtl request
|
||||
# - wait for completion (ring or wait_cacheflush?)
|
||||
# - Unmap memory
|
||||
# - TLBI
|
||||
# - complete_cacheflush()
|
||||
def prepare_cacheflush(base, size, context=0x40):
|
||||
assert self.reg.FLUSH_STATE[context].val == 0
|
||||
|
||||
self.reg.FLUSH_ADDR[context].val = base
|
||||
self.reg.FLUSH_SIZE[context].val = size
|
||||
self.reg.FLUSH_STATE[context].val = 1
|
||||
|
||||
def wait_cacheflush(context=0x40):
|
||||
while self.reg.FLUSH_STATE[context].val == 1:
|
||||
pass
|
||||
|
||||
def complete_cacheflush(context=0x40):
|
||||
assert self.reg.FLUSH_STATE[context].val == 2
|
||||
self.reg.FLUSH_STATE[context].val = 0
|
||||
|
||||
# probably not necessary?
|
||||
# order is:
|
||||
# - Remap memory as shared
|
||||
# - (no TLBI?)
|
||||
# - prepare_unmap()
|
||||
# - unmap
|
||||
# - TLBI
|
||||
# - complete_unmap()
|
||||
def prepare_unmap(base, size, context):
|
||||
assert self.reg.FLUSH_STATE[context].val == 0
|
||||
self.reg.FLUSH_ADDR[context].val = 0xdead000000000000 | (base & 0xffffffffffff)
|
||||
self.reg.FLUSH_SIZE[context].val = size
|
||||
self.reg.FLUSH_STATE[context].val = 2
|
||||
|
||||
def complete_unmap(context):
|
||||
assert self.reg.FLUSH_STATE[context].val == 2
|
||||
self.reg.FLUSH_STATE[context].val = 0
|
|
@ -9,6 +9,7 @@
|
|||
|
||||
|
||||
import struct
|
||||
from ..fw.agx.handoff import GFXHandoff
|
||||
from ..utils import *
|
||||
from ..malloc import Heap
|
||||
from enum import IntEnum
|
||||
|
@ -20,6 +21,11 @@ class MemoryAttr(IntEnum):
|
|||
Normal = 0 # Only accessed by the gfx-asc coprocessor
|
||||
Device = 1
|
||||
Shared = 2 # Probally Outer-shareable. Shared with either the main cpu or AGX hardware
|
||||
UNK3 = 3
|
||||
UNK4 = 4
|
||||
UNK5 = 5
|
||||
UNK6 = 6
|
||||
UNK7 = 7
|
||||
|
||||
|
||||
class TTBR(Register64):
|
||||
|
@ -206,6 +212,8 @@ class UatStream(Reloadable):
|
|||
|
||||
|
||||
class UAT(Reloadable):
|
||||
NUM_CONTEXTS = 64
|
||||
|
||||
PAGE_BITS = 14
|
||||
PAGE_SIZE = 1 << PAGE_BITS
|
||||
|
||||
|
@ -242,17 +250,13 @@ class UAT(Reloadable):
|
|||
self.gpu_region = self.sgx_dev.gpu_region_base
|
||||
self.ttbr0_base = self.u.memalign(self.PAGE_SIZE, self.PAGE_SIZE)
|
||||
self.ttbr1_base = self.sgx_dev.gfx_shared_region_base
|
||||
self.handoff = GFXHandoff(self.u)
|
||||
|
||||
self.VA_MASK = 0
|
||||
for (off, size, _) in self.LEVELS:
|
||||
self.VA_MASK |= (size - 1) << off
|
||||
self.VA_MASK |= self.PAGE_SIZE - 1
|
||||
|
||||
def early_init(self):
|
||||
# Unknown init (needed?)
|
||||
self.sgx_base = self.sgx_dev.get_reg(0)[0]
|
||||
self.p.read32(self.sgx_base + 0xd14000)
|
||||
self.p.write32(self.sgx_base + 0xd14000, 0x70001)
|
||||
|
||||
def set_l0(self, ctx, off, base, asid=0):
|
||||
ttbr = TTBR(BADDR = base >> 1, ASID = asid, VALID=(base != 0))
|
||||
|
@ -315,7 +319,7 @@ class UAT(Reloadable):
|
|||
if iova & (self.PAGE_SIZE - 1):
|
||||
raise Exception(f"Unaligned IOVA {iova:#x}")
|
||||
|
||||
self.init_handoff()
|
||||
self.init()
|
||||
|
||||
map_flags = {'OS': 1, 'AttrIndex': MemoryAttr.Normal, 'VALID': 1, 'TYPE': 1, 'AP': 1, 'AF': 1, 'UXN': 1}
|
||||
map_flags.update(flags)
|
||||
|
@ -366,7 +370,6 @@ class UAT(Reloadable):
|
|||
table[idx] = pte.value
|
||||
self.dirty.add(offset)
|
||||
|
||||
|
||||
def iotranslate(self, ctx, start, size):
|
||||
if size == 0:
|
||||
return []
|
||||
|
@ -476,49 +479,21 @@ class UAT(Reloadable):
|
|||
def foreach_table(self, ctx, table_fn):
|
||||
self.recurse_level(0, 0, self.gpu_region + ctx * 16, table_fn=table_fn)
|
||||
|
||||
def init_handoff(self):
|
||||
def init(self):
|
||||
if self.initialized:
|
||||
return
|
||||
|
||||
print("[UAT] Initializing...")
|
||||
self.handoff.initialize()
|
||||
|
||||
MAGIC = 0x4b1d000000000002
|
||||
with self.handoff.lock():
|
||||
print(f"[UAT] TTBR0[0] = {self.ttbr0_base:#x}")
|
||||
print(f"[UAT] TTBR1[0] = {self.ttbr1_base:#x}")
|
||||
self.set_l0(0, 0, self.ttbr0_base)
|
||||
self.set_l0(0, 1, self.ttbr1_base)
|
||||
self.flush_dirty()
|
||||
self.invalidate_cache()
|
||||
|
||||
self.p.write64(self.handoff + 0, MAGIC)
|
||||
self.p.write32(self.handoff + 0x18, 0xffffffff)
|
||||
self.p.write64(self.handoff + 0x640, 0)
|
||||
self.p.write8(self.handoff + 0x10, 1)
|
||||
assert self.p.read8(self.handoff + 0x11) == 0
|
||||
print("[UAT] Waiting for handoff...")
|
||||
while self.p.read64(self.handoff + 0x8) != MAGIC:
|
||||
pass
|
||||
self.p.write32(self.handoff + 0x14, 1)
|
||||
self.p.write8(self.handoff + 0x10, 0)
|
||||
|
||||
for i in range(0x20, 0x640, 0x18):
|
||||
self.p.write32(self.handoff + i, 0)
|
||||
self.p.write64(self.handoff + i + 0x28, 0)
|
||||
self.p.write64(self.handoff + i + 0x30, 0)
|
||||
|
||||
self.p.write8(self.handoff + 0x10, 1)
|
||||
assert self.p.read8(self.handoff + 0x11) == 0
|
||||
|
||||
# read TTBRs here
|
||||
|
||||
self.p.write32(self.handoff + 0x14, 1)
|
||||
self.p.write8(self.handoff + 0x10, 0)
|
||||
self.p.write8(self.handoff + 0x10, 1)
|
||||
assert self.p.read8(self.handoff + 0x11) == 0
|
||||
|
||||
print(f"[UAT] TTBR0[0] = {self.ttbr0_base:#x}")
|
||||
print(f"[UAT] TTBR1[0] = {self.ttbr1_base:#x}")
|
||||
self.set_l0(0, 0, self.ttbr0_base)
|
||||
self.set_l0(0, 1, self.ttbr1_base)
|
||||
self.flush_dirty()
|
||||
self.invalidate_cache()
|
||||
|
||||
self.p.write32(self.handoff + 0x14, 1)
|
||||
self.p.write8(self.handoff + 0x10, 0)
|
||||
print("[UAT] Init complete")
|
||||
|
||||
self.initialized = True
|
||||
|
@ -526,20 +501,11 @@ class UAT(Reloadable):
|
|||
def bind_context(self, ctx, ttbr0_base):
|
||||
assert ctx != 0
|
||||
|
||||
self.p.write8(self.handoff + 0x10, 1)
|
||||
assert self.p.read8(self.handoff + 0x11) == 0
|
||||
# read TTBRs here
|
||||
self.p.write32(self.handoff + 0x14, 1)
|
||||
self.p.write8(self.handoff + 0x10, 0)
|
||||
|
||||
self.p.write8(self.handoff + 0x10, 1)
|
||||
assert self.p.read8(self.handoff + 0x11) == 0
|
||||
self.set_l0(ctx, 0, ttbr0_base, ctx)
|
||||
self.set_l0(ctx, 1, self.ttbr1_base, ctx)
|
||||
self.flush_dirty()
|
||||
self.invalidate_cache()
|
||||
self.p.write32(self.handoff + 0x14, 1)
|
||||
self.p.write8(self.handoff + 0x10, 0)
|
||||
with self.handoff.lock():
|
||||
self.set_l0(ctx, 0, ttbr0_base, ctx)
|
||||
self.set_l0(ctx, 1, self.ttbr1_base, ctx)
|
||||
self.flush_dirty()
|
||||
self.invalidate_cache()
|
||||
|
||||
def dump(self, ctx, log=print):
|
||||
def print_fn(start, end, i, pte, level, sparse):
|
||||
|
|
Loading…
Reference in a new issue