m1n1.trace.agx: Perfctr stuff

Signed-off-by: Asahi Lina <lina@asahilina.net>
This commit is contained in:
Asahi Lina 2024-01-17 17:33:11 +09:00
parent c602bfd72e
commit 52e1855c2a
5 changed files with 174 additions and 18 deletions

View file

@ -101,6 +101,20 @@ class DC_09(ConstructClass):
Ver("G == G14 && V >= V13_2", ZPadding(0x10)),
)
class DC_ConfigurePerfCounters(ConstructClass):
subcon = Struct (
"msg_type" / Const(0xb, Int32ul),
"data" / HexDump(Default(Bytes(0x2c), bytes(0x2c))),
Ver("G == G14 && V >= V13_2", ZPadding(0x10)),
)
class DC_DisablePerfCounters(ConstructClass):
subcon = Struct (
"msg_type" / Const(0xc, Int32ul),
"data" / HexDump(Default(Bytes(0x2c), bytes(0x2c))),
Ver("G == G14 && V >= V13_2", ZPadding(0x10)),
)
class DC_GrowTVBAck(ConstructClass):
subcon = Struct (
"msg_type" / Const(0xd, Int32ul),
@ -153,6 +167,8 @@ DeviceControlMsg = FixedSized(DeviceControlSize, Select(
DC_UpdateIdleTS,
DC_1e,
DC_Write32,
DC_ConfigurePerfCounters,
DC_DisablePerfCounters,
DC_GrowTVBAck,
UnknownMsg,
))

View file

@ -46,6 +46,24 @@ class WorkCommandInitBM(ConstructClass):
"stamp_value" / Hex(Int32ul), # 0x100
)
class WorkCommandComputeUnk10(ConstructClass):
"""
occassionally sent before WorkCommandCP on the SubmitCP queue.
"""
subcon = Struct(
"magic" / Const(0xa, Hex(Int32ul)),
"unk" / Hex(Int32ul),
)
class WorkCommandComputeUnk11(ConstructClass):
"""
occassionally sent before WorkCommandCP on the SubmitCP queue.
"""
subcon = Struct(
"magic" / Const(0xb, Hex(Int32ul)),
"unk" / Hex(Int32ul),
)
class Flag(ConstructValueClass):
subcon = Hex(Int32ul)
@ -428,6 +446,8 @@ class CmdBufWork(ConstructClass):
3: WorkCommandCP,
4: WorkCommandBarrier,
6: WorkCommandInitBM,
10: WorkCommandComputeUnk10,
11: WorkCommandComputeUnk11,
})
)
@ -522,7 +542,7 @@ class CommandQueueInfo(ConstructClass):
"busy" / Hex(Int32ul), # 1 = gpu busy
"pad1" / ZPadding(0x1c),
"unk_80" / Hex(Int32ul),
"blocked_on_barrier" / Hex(Int32ul),
"has_commands" / Hex(Int32ul),
"unk_88" / Int32ul,
"unk_8c" / Int32ul,
"unk_90" / Int32ul,
@ -548,8 +568,8 @@ class CommandQueueInfo(ConstructClass):
self.unk_54 = -1
self.unk_58 = 0x0
self.busy = 0x0
self.blocked_on_barrier = 0x0
self.unk_80 = 0
self.has_commands = 0
self.unk_88 = 0
self.unk_8c = 0
self.unk_90 = 0

View file

@ -1725,6 +1725,19 @@ class RCPowerZone(ConstructClass):
self.target_off = self.target - off
self.tc = tc
class PerfCounterDesc(ConstructClass):
subcon = Struct(
"regs" / Int64ul,
"dis_mask" / Int32ul,
"en_mask" / Int32ul,
"source_mask" / Int32ul,
"base_reg" / Int32ul,
"unk_type" / Int8ul,
"count" / Int8ul,
"index" / Int8ul,
"pad0" / Int8ul,
"pad1" / Int32ul,
)
class InitData_RegionC(ConstructClass):
subcon = Struct(
@ -1748,13 +1761,29 @@ class InitData_RegionC(ConstructClass):
"unk_62" / Int32ul,
Ver("V >= V13_0B4", "unk_66_0" / HexDump(Bytes(0xc))),
"unk_66" / Int32ul,
"unk_6a" / HexDump(Bytes(0x16)),
"unk_80" / HexDump(Bytes(0xf80)),
"unk_1000" / HexDump(Bytes(0x7000)),
"unk_8000" / HexDump(Bytes(0x900)),
Ver("G >= G14X", "unk_8900_pad" / Default(HexDump(Bytes(0x484c)), bytes(0x484c))),
Ver("V >= V13_0B4 && V < V13_2", "unk_8900_0" / Int32ul),
Ver("V >= V13_3", "unk_8900_pad2" / Default(HexDump(Bytes(0x54)), bytes(0x54))),
"unk_6a" / HexDump(Bytes(0x12)),
"perfctrs" / Array(512, PerfCounterDesc),
"perfctr_count" / Int32ul,
"unk_4080" / HexDump(Bytes(0x3f80)),
Ver("G >= G14X", "unk_8000_pad" / Default(HexDump(Bytes(0x484c)), bytes(0x484c))),
Ver("V >= V13_0B4 && V < V13_2", "unk_8000_0" / Int32ul),
Ver("V >= V13_3", "unk_8000_pad2" / Default(HexDump(Bytes(0x54)), bytes(0x54))),
"unk_8000" / HexDump(Bytes(0x878)),
"unk_8878" / Int32ul,
"unk_887c" / Int32ul,
"unk_8880" / HexDump(Bytes(0x10)),
"unk_8890" / Int32ul,
"unkptr_8894" / Int64ul,
"size_889c" / Int32ul,
"unkptr_88a0" / Int64ul,
"perf_source_list" / Int64ul,
"perf_source_count" / Int32ul,
"unkptr_88b4" / Int64ul,
"unk_88bc" / Int32ul,
"unk_88c0" / Int16ul,
"unk_88c2" / Int8ul,
"unkpad_88c3" / Int8ul,
"unk_88c4" / HexDump(Bytes(0x3c)),
"unk_8900" / Int32ul,
"unk_atomic" / Int32ul,
"max_power" / Int32ul,

View file

@ -914,7 +914,7 @@ class TimestampCmd(ConstructClass):
"unk_24" / Int64ul,
Ver("V >= V13_0B4", "unk_ts_addr" / Int64ul),
"uuid" / Int32ul,
"unk_30_padding" / Int32ul,
"unk_30" / Int32ul,
)
class WaitForInterruptCmd(ConstructClass):
@ -1114,6 +1114,20 @@ class DoorbellCmd(ConstructClass):
self.pad = 0
self.arg = flags << 10
class ProfileCaptureCmd(ConstructClass):
subcon = Struct(
"magic" / Const(0x1f, Int8ul),
"arg0" / Int8ul,
"arg1" / Int8ul,
"arg2" / Int8ul,
"encoder_id" / Int32ul,
"uuid" / Int32ul,
"workitem_ptr" / Int64ul,
)
def __init__(self, flags):
super().__init__()
class MicroSequence(ConstructValueClass):
subcon = RepeatUntil(lambda obj, lst, ctx: lst[-1].op & 0x3f in (0x18, 0x2b, 0x2c),
Struct(
@ -1147,6 +1161,7 @@ class MicroSequence(ConstructValueClass):
0x17: Add16Cmd,
0x18: EndCmd,
0x19: TimestampCmd,
0x1f: ProfileCaptureCmd,
#0x1a: KTraceCmd,
0x22: StartTACmd,
0x23: FinalizeTACmd,

View file

@ -1,11 +1,11 @@
# SPDX-License-Identifier: MIT
import textwrap, os.path, json, datetime, ctypes
import textwrap, os.path, json, datetime, ctypes, plistlib
from .asc import *
from ..hw.uat import UAT, MemoryAttr, PTE, Page_PTE, TTBR
from ..hw.agx import *
from ..fw.agx.initdata import InitData
from ..fw.agx.initdata import InitData, InitData_RegionC
from ..fw.agx.channels import *
from ..fw.agx.cmdqueue import *
from ..fw.agx.microsequence import *
@ -223,13 +223,24 @@ class CommandQueueTracer(Reloadable):
if info_addr not in tracer.state.queues:
self.state = CommandQueueState()
self.state.last_uuid = None
self.state.rptr = None
self.state.active = True
tracer.state.queues[info_addr] = self.state
else:
self.state = tracer.state.queues[info_addr]
self.tracer.uat.invalidate_cache()
self.update_info()
if new_queue:
if getattr(self.state, "last_uuid", None) is None:
self.state.last_uuid = None
if self.info.uuid == self.state.last_uuid:
return
self.state.last_uuid = self.info.uuid
self.state.rptr = 0
if tracer.cmd_dump_dir:
@ -246,8 +257,6 @@ class CommandQueueTracer(Reloadable):
self.dumpfile.flush()
tracer.state.queue_seq += 1
self.tracer.uat.invalidate_cache()
self.update_info()
def update_info(self):
self.info = CommandQueueInfo.parse_stream(self.tracer.get_stream(0, self.info_addr))
@ -260,7 +269,6 @@ class CommandQueueTracer(Reloadable):
return self.info.pointers.rb_size
def json_default(self, val):
print(repr(val))
return None
def get_workitems(self, workmsg):
@ -423,6 +431,16 @@ class AGXTracer(ASCTracer):
if libagxdecode:
self.init_agxdecode(libagxdecode)
self.counters = {}
counters = os.getenv("COUNTERS", None)
if counters:
pl = plistlib.load(open(counters,"rb"))
for ctr in pl["DeviceCounters"]:
spec = pl[ctr]
spec["Name"] = ctr
self.counters[(spec['Partition'], spec['Select'])] = spec
self.log(f"Loaded {len(self.counters)} performance counters")
def init_agxdecode(self, path):
# Hack to make sure we reload the lib when it changes
# tpath = os.getenv("XDG_RUNTIME_DIR", "/tmp") + "/" + str(time.time()) + ".so"
@ -526,6 +544,7 @@ class AGXTracer(ASCTracer):
self.uat.foreach_table(ctx, trace_pt)
def clear_gpuvm_tracers(self, ctx=None):
self.uat.invalidate_cache()
if ctx is None:
for i in range(UAT.NUM_CONTEXTS):
self.clear_gpuvm_tracers(i)
@ -575,6 +594,7 @@ class AGXTracer(ASCTracer):
self.add_uatmap_tracers(ctx)
self.add_gpuvm_tracers(ctx)
else:
self.uat.invalidate_cache()
is_kernel = iova >= 0xf8000000000
iova += off << (level * 11 + 14)
if level == 0:
@ -735,6 +755,8 @@ class AGXTracer(ASCTracer):
self.mon.add(va, size, name, readfn= lambda a, s: self.uat.ioread(ctx, a, s))
def handle_ringmsg(self, msg):
self.uat.invalidate_cache()
self.mon.poll()
if msg.__class__.__name__ == "FlagMsg":
self.log(f"== Event flag notification ==")
self.handle_event(msg)
@ -785,8 +807,51 @@ class AGXTracer(ASCTracer):
if addr:
info = BufferManagerInfo.parse_stream(self.get_stream(0, addr))
self.log(f"BM info: {info}")
elif msg.__class__.__name__ == "DC_ConfigurePerfCounters":
self.configure_perf_counters()
self.state.perf_enabled = True
elif msg.__class__.__name__ == "DC_DisablePerfCounters":
self.state.perf_enabled = False
return True
def configure_perf_counters(self):
self.log("=== Performance counter config ==")
rc = self.state.initdata.regionC = InitData_RegionC.parse_stream(
self.get_stream(0, self.state.initdata.regionC._addr))
for i in range(rc.perfctr_count):
cfg = self.state.initdata.regionC.perfctrs[i]
tail = ""
partition = cfg.regs & 0x7f
if partition == 1:
select = (cfg.en_mask << 32) | cfg.dis_mask
else:
select = cfg.source_mask
spec = self.counters.get((partition, cfg.source_mask), None)
if spec:
tail = f" {spec['Name']} [+{spec['Increment']} {spec['Partition']}/{spec['Select']:x}/{spec.get('SourceMask', 0)}]: {spec['type']}:{spec.get('Description', None)}"
self.log(f"#{i:3d} R={cfg.regs:016x} M={cfg.dis_mask:8x}/{cfg.en_mask:8x} S={cfg.source_mask:#10x} B={cfg.base_reg:#x} T={cfg.unk_type} C={cfg.count} idx={cfg.index}{tail}")
self.dump_perf_counters()
def dump_perf_counters(self):
if not self.state.perf_enabled:
return
rc = self.state.initdata.regionC
self.log(f"unk_8890 = {rc.unk_8890:#x}")
self.log("=== Performance counter buffers ==")
if rc.unkptr_8894 != 0:
self.log(f"unkptr_8894 @ {rc.unkptr_8894:#x}")
chexdump(self.uat.ioread(0, rc.unkptr_8894, rc.size_889c), print_fn=self.log)
if rc.unkptr_88a0 != 0:
self.log(f"unkptr_88a0 @ {rc.unkptr_88a0:#x}")
chexdump(self.uat.ioread(0, rc.unkptr_88a0, rc.size_889c), print_fn=self.log)
if rc.unkptr_88a8 != 0:
self.log(f"unkptr_88a8 @ {rc.unkptr_88a8:#x}")
chexdump(self.uat.ioread(0, rc.unkptr_88a8, rc.unkptr_88b4 - rc.unkptr_88a8), print_fn=self.log)
self.log(f"unkptr_88b4 @ {rc.unkptr_88b4:#x}")
chexdump(self.uat.ioread(0, rc.unkptr_88b4, 0x1000), print_fn=self.log)
def handle_event(self, msg):
if self.last_ta and self.redump:
self.log("Redumping TA...")
@ -797,6 +862,9 @@ class AGXTracer(ASCTracer):
self.queue_ta.update_info()
self.log(f"Queue info: {self.queue_ta.info}")
self.last_ta = None
self.dump_perf_counters()
if self.state.perf_enabled:
self.hv.run_shell()
if self.last_3d and self.redump:
self.log("Redumping 3D...")
stream = self.get_stream(0, self.last_3d._addr)
@ -806,6 +874,9 @@ class AGXTracer(ASCTracer):
self.queue_3d.update_info()
self.log(f"Queue info: {self.queue_3d.info}")
self.last_3d = None
self.dump_perf_counters()
if self.state.perf_enabled:
self.hv.run_shell()
if self.last_cp and self.redump:
self.log("Redumping CP...")
stream = self.get_stream(0, self.last_cp._addr)
@ -815,6 +886,9 @@ class AGXTracer(ASCTracer):
self.queue_cp.update_info()
self.log(f"Queue info: {self.queue_cp.info}")
self.last_cp = None
self.dump_perf_counters()
if self.state.perf_enabled:
self.hv.run_shell()
def dump_buffer_manager(self, buffer_mgr, kread, read):
return
@ -1299,6 +1373,7 @@ class AGXTracer(ASCTracer):
self.state.channels = {}
self.state.queues = {}
self.state.queue_seq = 0
self.state.perf_enabled = False
def init_channels(self):
if self.channels:
@ -1335,6 +1410,7 @@ class AGXTracer(ASCTracer):
self.untrace_uatrange(0, self.state.initdata.regionC_addr, 0x11d40)
def resume(self):
self.uat.invalidate_cache()
self.add_gpuvm_tracers()
self.add_uatmap_tracers()
self.add_ttbr_tracers()
@ -1365,9 +1441,9 @@ class AGXTracer(ASCTracer):
#self.mon_addva(0, initdata.regionB.unkptr_180, 0x140, "unkptr_180")
self.mon_addva(0, initdata.regionB.unkptr_190, 0x80, "unkptr_190")
self.mon_addva(0, initdata.regionB.unkptr_198, 0xc0, "unkptr_198")
self.mon_addva(0, initdata.regionB.buffer_mgr_ctl_addr, 0x4000, "Buffer manager ctl")
self.mon_addva(0, initdata.unkptr_20.unkptr_0, 0x40, "unkptr_20.unkptr_0")
self.mon_addva(0, initdata.unkptr_20.unkptr_8, 0x40, "unkptr_20.unkptr_8")
# self.mon_addva(0, initdata.regionB.buffer_mgr_ctl_addr, 0x4000, "Buffer manager ctl")
#self.mon_addva(0, initdata.unkptr_20.unkptr_0, 0x40, "unkptr_20.unkptr_0")
#self.mon_addva(0, initdata.unkptr_20.unkptr_8, 0x40, "unkptr_20.unkptr_8")
def clear_gpuvm_range(self, ctx, iova, length):
while length > 0: