mirror of
https://github.com/AsahiLinux/m1n1
synced 2024-11-10 01:34:12 +00:00
m1n1.trace.agx: Perfctr stuff
Signed-off-by: Asahi Lina <lina@asahilina.net>
This commit is contained in:
parent
c602bfd72e
commit
52e1855c2a
5 changed files with 174 additions and 18 deletions
|
@ -101,6 +101,20 @@ class DC_09(ConstructClass):
|
|||
Ver("G == G14 && V >= V13_2", ZPadding(0x10)),
|
||||
)
|
||||
|
||||
class DC_ConfigurePerfCounters(ConstructClass):
|
||||
subcon = Struct (
|
||||
"msg_type" / Const(0xb, Int32ul),
|
||||
"data" / HexDump(Default(Bytes(0x2c), bytes(0x2c))),
|
||||
Ver("G == G14 && V >= V13_2", ZPadding(0x10)),
|
||||
)
|
||||
|
||||
class DC_DisablePerfCounters(ConstructClass):
|
||||
subcon = Struct (
|
||||
"msg_type" / Const(0xc, Int32ul),
|
||||
"data" / HexDump(Default(Bytes(0x2c), bytes(0x2c))),
|
||||
Ver("G == G14 && V >= V13_2", ZPadding(0x10)),
|
||||
)
|
||||
|
||||
class DC_GrowTVBAck(ConstructClass):
|
||||
subcon = Struct (
|
||||
"msg_type" / Const(0xd, Int32ul),
|
||||
|
@ -153,6 +167,8 @@ DeviceControlMsg = FixedSized(DeviceControlSize, Select(
|
|||
DC_UpdateIdleTS,
|
||||
DC_1e,
|
||||
DC_Write32,
|
||||
DC_ConfigurePerfCounters,
|
||||
DC_DisablePerfCounters,
|
||||
DC_GrowTVBAck,
|
||||
UnknownMsg,
|
||||
))
|
||||
|
|
|
@ -46,6 +46,24 @@ class WorkCommandInitBM(ConstructClass):
|
|||
"stamp_value" / Hex(Int32ul), # 0x100
|
||||
)
|
||||
|
||||
class WorkCommandComputeUnk10(ConstructClass):
|
||||
"""
|
||||
occassionally sent before WorkCommandCP on the SubmitCP queue.
|
||||
"""
|
||||
subcon = Struct(
|
||||
"magic" / Const(0xa, Hex(Int32ul)),
|
||||
"unk" / Hex(Int32ul),
|
||||
)
|
||||
|
||||
class WorkCommandComputeUnk11(ConstructClass):
|
||||
"""
|
||||
occassionally sent before WorkCommandCP on the SubmitCP queue.
|
||||
"""
|
||||
subcon = Struct(
|
||||
"magic" / Const(0xb, Hex(Int32ul)),
|
||||
"unk" / Hex(Int32ul),
|
||||
)
|
||||
|
||||
class Flag(ConstructValueClass):
|
||||
subcon = Hex(Int32ul)
|
||||
|
||||
|
@ -428,6 +446,8 @@ class CmdBufWork(ConstructClass):
|
|||
3: WorkCommandCP,
|
||||
4: WorkCommandBarrier,
|
||||
6: WorkCommandInitBM,
|
||||
10: WorkCommandComputeUnk10,
|
||||
11: WorkCommandComputeUnk11,
|
||||
})
|
||||
)
|
||||
|
||||
|
@ -522,7 +542,7 @@ class CommandQueueInfo(ConstructClass):
|
|||
"busy" / Hex(Int32ul), # 1 = gpu busy
|
||||
"pad1" / ZPadding(0x1c),
|
||||
"unk_80" / Hex(Int32ul),
|
||||
"blocked_on_barrier" / Hex(Int32ul),
|
||||
"has_commands" / Hex(Int32ul),
|
||||
"unk_88" / Int32ul,
|
||||
"unk_8c" / Int32ul,
|
||||
"unk_90" / Int32ul,
|
||||
|
@ -548,8 +568,8 @@ class CommandQueueInfo(ConstructClass):
|
|||
self.unk_54 = -1
|
||||
self.unk_58 = 0x0
|
||||
self.busy = 0x0
|
||||
self.blocked_on_barrier = 0x0
|
||||
self.unk_80 = 0
|
||||
self.has_commands = 0
|
||||
self.unk_88 = 0
|
||||
self.unk_8c = 0
|
||||
self.unk_90 = 0
|
||||
|
|
|
@ -1725,6 +1725,19 @@ class RCPowerZone(ConstructClass):
|
|||
self.target_off = self.target - off
|
||||
self.tc = tc
|
||||
|
||||
class PerfCounterDesc(ConstructClass):
|
||||
subcon = Struct(
|
||||
"regs" / Int64ul,
|
||||
"dis_mask" / Int32ul,
|
||||
"en_mask" / Int32ul,
|
||||
"source_mask" / Int32ul,
|
||||
"base_reg" / Int32ul,
|
||||
"unk_type" / Int8ul,
|
||||
"count" / Int8ul,
|
||||
"index" / Int8ul,
|
||||
"pad0" / Int8ul,
|
||||
"pad1" / Int32ul,
|
||||
)
|
||||
|
||||
class InitData_RegionC(ConstructClass):
|
||||
subcon = Struct(
|
||||
|
@ -1748,13 +1761,29 @@ class InitData_RegionC(ConstructClass):
|
|||
"unk_62" / Int32ul,
|
||||
Ver("V >= V13_0B4", "unk_66_0" / HexDump(Bytes(0xc))),
|
||||
"unk_66" / Int32ul,
|
||||
"unk_6a" / HexDump(Bytes(0x16)),
|
||||
"unk_80" / HexDump(Bytes(0xf80)),
|
||||
"unk_1000" / HexDump(Bytes(0x7000)),
|
||||
"unk_8000" / HexDump(Bytes(0x900)),
|
||||
Ver("G >= G14X", "unk_8900_pad" / Default(HexDump(Bytes(0x484c)), bytes(0x484c))),
|
||||
Ver("V >= V13_0B4 && V < V13_2", "unk_8900_0" / Int32ul),
|
||||
Ver("V >= V13_3", "unk_8900_pad2" / Default(HexDump(Bytes(0x54)), bytes(0x54))),
|
||||
"unk_6a" / HexDump(Bytes(0x12)),
|
||||
"perfctrs" / Array(512, PerfCounterDesc),
|
||||
"perfctr_count" / Int32ul,
|
||||
"unk_4080" / HexDump(Bytes(0x3f80)),
|
||||
Ver("G >= G14X", "unk_8000_pad" / Default(HexDump(Bytes(0x484c)), bytes(0x484c))),
|
||||
Ver("V >= V13_0B4 && V < V13_2", "unk_8000_0" / Int32ul),
|
||||
Ver("V >= V13_3", "unk_8000_pad2" / Default(HexDump(Bytes(0x54)), bytes(0x54))),
|
||||
"unk_8000" / HexDump(Bytes(0x878)),
|
||||
"unk_8878" / Int32ul,
|
||||
"unk_887c" / Int32ul,
|
||||
"unk_8880" / HexDump(Bytes(0x10)),
|
||||
"unk_8890" / Int32ul,
|
||||
"unkptr_8894" / Int64ul,
|
||||
"size_889c" / Int32ul,
|
||||
"unkptr_88a0" / Int64ul,
|
||||
"perf_source_list" / Int64ul,
|
||||
"perf_source_count" / Int32ul,
|
||||
"unkptr_88b4" / Int64ul,
|
||||
"unk_88bc" / Int32ul,
|
||||
"unk_88c0" / Int16ul,
|
||||
"unk_88c2" / Int8ul,
|
||||
"unkpad_88c3" / Int8ul,
|
||||
"unk_88c4" / HexDump(Bytes(0x3c)),
|
||||
"unk_8900" / Int32ul,
|
||||
"unk_atomic" / Int32ul,
|
||||
"max_power" / Int32ul,
|
||||
|
|
|
@ -914,7 +914,7 @@ class TimestampCmd(ConstructClass):
|
|||
"unk_24" / Int64ul,
|
||||
Ver("V >= V13_0B4", "unk_ts_addr" / Int64ul),
|
||||
"uuid" / Int32ul,
|
||||
"unk_30_padding" / Int32ul,
|
||||
"unk_30" / Int32ul,
|
||||
)
|
||||
|
||||
class WaitForInterruptCmd(ConstructClass):
|
||||
|
@ -1114,6 +1114,20 @@ class DoorbellCmd(ConstructClass):
|
|||
self.pad = 0
|
||||
self.arg = flags << 10
|
||||
|
||||
class ProfileCaptureCmd(ConstructClass):
|
||||
subcon = Struct(
|
||||
"magic" / Const(0x1f, Int8ul),
|
||||
"arg0" / Int8ul,
|
||||
"arg1" / Int8ul,
|
||||
"arg2" / Int8ul,
|
||||
"encoder_id" / Int32ul,
|
||||
"uuid" / Int32ul,
|
||||
"workitem_ptr" / Int64ul,
|
||||
)
|
||||
|
||||
def __init__(self, flags):
|
||||
super().__init__()
|
||||
|
||||
class MicroSequence(ConstructValueClass):
|
||||
subcon = RepeatUntil(lambda obj, lst, ctx: lst[-1].op & 0x3f in (0x18, 0x2b, 0x2c),
|
||||
Struct(
|
||||
|
@ -1147,6 +1161,7 @@ class MicroSequence(ConstructValueClass):
|
|||
0x17: Add16Cmd,
|
||||
0x18: EndCmd,
|
||||
0x19: TimestampCmd,
|
||||
0x1f: ProfileCaptureCmd,
|
||||
#0x1a: KTraceCmd,
|
||||
0x22: StartTACmd,
|
||||
0x23: FinalizeTACmd,
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
# SPDX-License-Identifier: MIT
|
||||
|
||||
import textwrap, os.path, json, datetime, ctypes
|
||||
import textwrap, os.path, json, datetime, ctypes, plistlib
|
||||
from .asc import *
|
||||
from ..hw.uat import UAT, MemoryAttr, PTE, Page_PTE, TTBR
|
||||
from ..hw.agx import *
|
||||
|
||||
from ..fw.agx.initdata import InitData
|
||||
from ..fw.agx.initdata import InitData, InitData_RegionC
|
||||
from ..fw.agx.channels import *
|
||||
from ..fw.agx.cmdqueue import *
|
||||
from ..fw.agx.microsequence import *
|
||||
|
@ -223,13 +223,24 @@ class CommandQueueTracer(Reloadable):
|
|||
|
||||
if info_addr not in tracer.state.queues:
|
||||
self.state = CommandQueueState()
|
||||
self.state.last_uuid = None
|
||||
self.state.rptr = None
|
||||
self.state.active = True
|
||||
tracer.state.queues[info_addr] = self.state
|
||||
else:
|
||||
self.state = tracer.state.queues[info_addr]
|
||||
|
||||
self.tracer.uat.invalidate_cache()
|
||||
self.update_info()
|
||||
|
||||
if new_queue:
|
||||
if getattr(self.state, "last_uuid", None) is None:
|
||||
self.state.last_uuid = None
|
||||
|
||||
if self.info.uuid == self.state.last_uuid:
|
||||
return
|
||||
|
||||
self.state.last_uuid = self.info.uuid
|
||||
self.state.rptr = 0
|
||||
|
||||
if tracer.cmd_dump_dir:
|
||||
|
@ -246,8 +257,6 @@ class CommandQueueTracer(Reloadable):
|
|||
self.dumpfile.flush()
|
||||
tracer.state.queue_seq += 1
|
||||
|
||||
self.tracer.uat.invalidate_cache()
|
||||
self.update_info()
|
||||
|
||||
def update_info(self):
|
||||
self.info = CommandQueueInfo.parse_stream(self.tracer.get_stream(0, self.info_addr))
|
||||
|
@ -260,7 +269,6 @@ class CommandQueueTracer(Reloadable):
|
|||
return self.info.pointers.rb_size
|
||||
|
||||
def json_default(self, val):
|
||||
print(repr(val))
|
||||
return None
|
||||
|
||||
def get_workitems(self, workmsg):
|
||||
|
@ -423,6 +431,16 @@ class AGXTracer(ASCTracer):
|
|||
if libagxdecode:
|
||||
self.init_agxdecode(libagxdecode)
|
||||
|
||||
self.counters = {}
|
||||
counters = os.getenv("COUNTERS", None)
|
||||
if counters:
|
||||
pl = plistlib.load(open(counters,"rb"))
|
||||
for ctr in pl["DeviceCounters"]:
|
||||
spec = pl[ctr]
|
||||
spec["Name"] = ctr
|
||||
self.counters[(spec['Partition'], spec['Select'])] = spec
|
||||
self.log(f"Loaded {len(self.counters)} performance counters")
|
||||
|
||||
def init_agxdecode(self, path):
|
||||
# Hack to make sure we reload the lib when it changes
|
||||
# tpath = os.getenv("XDG_RUNTIME_DIR", "/tmp") + "/" + str(time.time()) + ".so"
|
||||
|
@ -526,6 +544,7 @@ class AGXTracer(ASCTracer):
|
|||
self.uat.foreach_table(ctx, trace_pt)
|
||||
|
||||
def clear_gpuvm_tracers(self, ctx=None):
|
||||
self.uat.invalidate_cache()
|
||||
if ctx is None:
|
||||
for i in range(UAT.NUM_CONTEXTS):
|
||||
self.clear_gpuvm_tracers(i)
|
||||
|
@ -575,6 +594,7 @@ class AGXTracer(ASCTracer):
|
|||
self.add_uatmap_tracers(ctx)
|
||||
self.add_gpuvm_tracers(ctx)
|
||||
else:
|
||||
self.uat.invalidate_cache()
|
||||
is_kernel = iova >= 0xf8000000000
|
||||
iova += off << (level * 11 + 14)
|
||||
if level == 0:
|
||||
|
@ -735,6 +755,8 @@ class AGXTracer(ASCTracer):
|
|||
self.mon.add(va, size, name, readfn= lambda a, s: self.uat.ioread(ctx, a, s))
|
||||
|
||||
def handle_ringmsg(self, msg):
|
||||
self.uat.invalidate_cache()
|
||||
self.mon.poll()
|
||||
if msg.__class__.__name__ == "FlagMsg":
|
||||
self.log(f"== Event flag notification ==")
|
||||
self.handle_event(msg)
|
||||
|
@ -785,8 +807,51 @@ class AGXTracer(ASCTracer):
|
|||
if addr:
|
||||
info = BufferManagerInfo.parse_stream(self.get_stream(0, addr))
|
||||
self.log(f"BM info: {info}")
|
||||
elif msg.__class__.__name__ == "DC_ConfigurePerfCounters":
|
||||
self.configure_perf_counters()
|
||||
self.state.perf_enabled = True
|
||||
elif msg.__class__.__name__ == "DC_DisablePerfCounters":
|
||||
self.state.perf_enabled = False
|
||||
return True
|
||||
|
||||
def configure_perf_counters(self):
|
||||
self.log("=== Performance counter config ==")
|
||||
rc = self.state.initdata.regionC = InitData_RegionC.parse_stream(
|
||||
self.get_stream(0, self.state.initdata.regionC._addr))
|
||||
for i in range(rc.perfctr_count):
|
||||
cfg = self.state.initdata.regionC.perfctrs[i]
|
||||
tail = ""
|
||||
partition = cfg.regs & 0x7f
|
||||
if partition == 1:
|
||||
select = (cfg.en_mask << 32) | cfg.dis_mask
|
||||
else:
|
||||
select = cfg.source_mask
|
||||
spec = self.counters.get((partition, cfg.source_mask), None)
|
||||
if spec:
|
||||
tail = f" {spec['Name']} [+{spec['Increment']} {spec['Partition']}/{spec['Select']:x}/{spec.get('SourceMask', 0)}]: {spec['type']}:{spec.get('Description', None)}"
|
||||
self.log(f"#{i:3d} R={cfg.regs:016x} M={cfg.dis_mask:8x}/{cfg.en_mask:8x} S={cfg.source_mask:#10x} B={cfg.base_reg:#x} T={cfg.unk_type} C={cfg.count} idx={cfg.index}{tail}")
|
||||
|
||||
self.dump_perf_counters()
|
||||
|
||||
def dump_perf_counters(self):
|
||||
if not self.state.perf_enabled:
|
||||
return
|
||||
|
||||
rc = self.state.initdata.regionC
|
||||
self.log(f"unk_8890 = {rc.unk_8890:#x}")
|
||||
self.log("=== Performance counter buffers ==")
|
||||
if rc.unkptr_8894 != 0:
|
||||
self.log(f"unkptr_8894 @ {rc.unkptr_8894:#x}")
|
||||
chexdump(self.uat.ioread(0, rc.unkptr_8894, rc.size_889c), print_fn=self.log)
|
||||
if rc.unkptr_88a0 != 0:
|
||||
self.log(f"unkptr_88a0 @ {rc.unkptr_88a0:#x}")
|
||||
chexdump(self.uat.ioread(0, rc.unkptr_88a0, rc.size_889c), print_fn=self.log)
|
||||
if rc.unkptr_88a8 != 0:
|
||||
self.log(f"unkptr_88a8 @ {rc.unkptr_88a8:#x}")
|
||||
chexdump(self.uat.ioread(0, rc.unkptr_88a8, rc.unkptr_88b4 - rc.unkptr_88a8), print_fn=self.log)
|
||||
self.log(f"unkptr_88b4 @ {rc.unkptr_88b4:#x}")
|
||||
chexdump(self.uat.ioread(0, rc.unkptr_88b4, 0x1000), print_fn=self.log)
|
||||
|
||||
def handle_event(self, msg):
|
||||
if self.last_ta and self.redump:
|
||||
self.log("Redumping TA...")
|
||||
|
@ -797,6 +862,9 @@ class AGXTracer(ASCTracer):
|
|||
self.queue_ta.update_info()
|
||||
self.log(f"Queue info: {self.queue_ta.info}")
|
||||
self.last_ta = None
|
||||
self.dump_perf_counters()
|
||||
if self.state.perf_enabled:
|
||||
self.hv.run_shell()
|
||||
if self.last_3d and self.redump:
|
||||
self.log("Redumping 3D...")
|
||||
stream = self.get_stream(0, self.last_3d._addr)
|
||||
|
@ -806,6 +874,9 @@ class AGXTracer(ASCTracer):
|
|||
self.queue_3d.update_info()
|
||||
self.log(f"Queue info: {self.queue_3d.info}")
|
||||
self.last_3d = None
|
||||
self.dump_perf_counters()
|
||||
if self.state.perf_enabled:
|
||||
self.hv.run_shell()
|
||||
if self.last_cp and self.redump:
|
||||
self.log("Redumping CP...")
|
||||
stream = self.get_stream(0, self.last_cp._addr)
|
||||
|
@ -815,6 +886,9 @@ class AGXTracer(ASCTracer):
|
|||
self.queue_cp.update_info()
|
||||
self.log(f"Queue info: {self.queue_cp.info}")
|
||||
self.last_cp = None
|
||||
self.dump_perf_counters()
|
||||
if self.state.perf_enabled:
|
||||
self.hv.run_shell()
|
||||
|
||||
def dump_buffer_manager(self, buffer_mgr, kread, read):
|
||||
return
|
||||
|
@ -1299,6 +1373,7 @@ class AGXTracer(ASCTracer):
|
|||
self.state.channels = {}
|
||||
self.state.queues = {}
|
||||
self.state.queue_seq = 0
|
||||
self.state.perf_enabled = False
|
||||
|
||||
def init_channels(self):
|
||||
if self.channels:
|
||||
|
@ -1335,6 +1410,7 @@ class AGXTracer(ASCTracer):
|
|||
self.untrace_uatrange(0, self.state.initdata.regionC_addr, 0x11d40)
|
||||
|
||||
def resume(self):
|
||||
self.uat.invalidate_cache()
|
||||
self.add_gpuvm_tracers()
|
||||
self.add_uatmap_tracers()
|
||||
self.add_ttbr_tracers()
|
||||
|
@ -1365,9 +1441,9 @@ class AGXTracer(ASCTracer):
|
|||
#self.mon_addva(0, initdata.regionB.unkptr_180, 0x140, "unkptr_180")
|
||||
self.mon_addva(0, initdata.regionB.unkptr_190, 0x80, "unkptr_190")
|
||||
self.mon_addva(0, initdata.regionB.unkptr_198, 0xc0, "unkptr_198")
|
||||
self.mon_addva(0, initdata.regionB.buffer_mgr_ctl_addr, 0x4000, "Buffer manager ctl")
|
||||
self.mon_addva(0, initdata.unkptr_20.unkptr_0, 0x40, "unkptr_20.unkptr_0")
|
||||
self.mon_addva(0, initdata.unkptr_20.unkptr_8, 0x40, "unkptr_20.unkptr_8")
|
||||
# self.mon_addva(0, initdata.regionB.buffer_mgr_ctl_addr, 0x4000, "Buffer manager ctl")
|
||||
#self.mon_addva(0, initdata.unkptr_20.unkptr_0, 0x40, "unkptr_20.unkptr_0")
|
||||
#self.mon_addva(0, initdata.unkptr_20.unkptr_8, 0x40, "unkptr_20.unkptr_8")
|
||||
|
||||
def clear_gpuvm_range(self, ctx, iova, length):
|
||||
while length > 0:
|
||||
|
|
Loading…
Reference in a new issue