mirror of
https://github.com/AsahiLinux/m1n1
synced 2025-01-01 15:58:44 +00:00
337 lines
11 KiB
Python
337 lines
11 KiB
Python
|
#!/usr/bin/env python3
|
||
|
# SPDX-License-Identifier: MIT
|
||
|
|
||
|
import sys, pathlib, time
|
||
|
sys.path.append(str(pathlib.Path(__file__).resolve().parents[1]))
|
||
|
|
||
|
import atexit, sys
|
||
|
|
||
|
from m1n1.agx import AGX
|
||
|
from m1n1.agx.render import *
|
||
|
|
||
|
from m1n1.setup import *
|
||
|
from m1n1 import asm
|
||
|
|
||
|
from m1n1.gpiola import GPIOLogicAnalyzer
|
||
|
|
||
|
analyzer_cpu = 1
|
||
|
|
||
|
p.pmgr_adt_clocks_enable("/arm-io/gfx-asc")
|
||
|
p.pmgr_adt_clocks_enable("/arm-io/sgx")
|
||
|
p.smp_start_secondaries()
|
||
|
p.mmu_init_secondary(analyzer_cpu)
|
||
|
iface.dev.timeout = 42
|
||
|
|
||
|
## heater code
|
||
|
if True:
|
||
|
code = u.malloc(0x1000)
|
||
|
|
||
|
util = asm.ARMAsm("""
|
||
|
bench:
|
||
|
mrs x1, CNTPCT_EL0
|
||
|
1:
|
||
|
sub x0, x0, #1
|
||
|
cbnz x0, 1b
|
||
|
|
||
|
mrs x2, CNTPCT_EL0
|
||
|
sub x0, x2, x1
|
||
|
ret
|
||
|
""", code)
|
||
|
iface.writemem(code, util.data)
|
||
|
p.dc_cvau(code, len(util.data))
|
||
|
p.ic_ivau(code, len(util.data))
|
||
|
|
||
|
LOOPS = 80000000000
|
||
|
for idx in range(2, 8):
|
||
|
print(f"bench {idx}")
|
||
|
p.smp_call(idx, util.bench, LOOPS)
|
||
|
|
||
|
agx = AGX(u)
|
||
|
|
||
|
mon = RegMonitor(u, ascii=True, bufsize=0x8000000)
|
||
|
agx.mon = mon
|
||
|
|
||
|
sgx = agx.sgx_dev
|
||
|
#mon.add(sgx.gpu_region_base, sgx.gpu_region_size, "contexts")
|
||
|
#mon.add(sgx.gfx_shared_region_base, sgx.gfx_shared_region_size, "gfx-shared")
|
||
|
#mon.add(sgx.gfx_handoff_base, sgx.gfx_handoff_size, "gfx-handoff")
|
||
|
|
||
|
#mon.add(agx.initdasgx.gfx_handoff_base, sgx.gfx_handoff_size, "gfx-handoff")
|
||
|
|
||
|
atexit.register(p.reboot)
|
||
|
agx.start()
|
||
|
|
||
|
print("==========================================")
|
||
|
print("## After init")
|
||
|
print("==========================================")
|
||
|
mon.poll()
|
||
|
agx.poll_objects()
|
||
|
|
||
|
ctx = GPUContext(agx)
|
||
|
ctx.bind(63)
|
||
|
ctx0 = GPUContext(agx)
|
||
|
ctx0.bind(62)
|
||
|
|
||
|
f = GPUFrame(ctx, sys.argv[1], track=False)
|
||
|
f2 = GPUFrame(ctx0, sys.argv[1], track=False)
|
||
|
|
||
|
RENDERERS = 4
|
||
|
FRAMES = 8
|
||
|
|
||
|
renderers = []
|
||
|
|
||
|
fault_cmdbuf = f.cmdbuf.clone()
|
||
|
#fault_cmdbuf.depth_buffer = 0xdeadb000
|
||
|
|
||
|
for i in range(RENDERERS):
|
||
|
c = ctx0 if i == 0 else ctx
|
||
|
r = GPURenderer(c, 8, bm_slot=0x10 + i, queue=1)
|
||
|
renderers.append(r)
|
||
|
|
||
|
for q in (r.wq_3d, r.wq_ta):
|
||
|
q.info.set_prio(2)
|
||
|
q.info.push()
|
||
|
|
||
|
#for r in renderers[2:4]:
|
||
|
#for q in (r.wq_3d, r.wq_ta):
|
||
|
#q.info.set_prio(3)
|
||
|
#q.info.push()
|
||
|
|
||
|
#for r in renderers[4:6]:
|
||
|
#for q in (r.wq_3d, r.wq_ta):
|
||
|
#q.info.set_prio(0)
|
||
|
#q.info.push()
|
||
|
|
||
|
#for r in renderers[6:8]:
|
||
|
#for q in (r.wq_3d, r.wq_ta):
|
||
|
#q.info.set_prio(1)
|
||
|
#q.info.push()
|
||
|
|
||
|
print("==========================================")
|
||
|
print("## Submitting")
|
||
|
print("==========================================")
|
||
|
|
||
|
for i, r in enumerate(renderers):
|
||
|
for j in range(FRAMES):
|
||
|
if (i, j) in ((1, 0), (2, 1), (3, 1)):
|
||
|
r.submit(fault_cmdbuf)
|
||
|
elif i == 0:
|
||
|
r.submit(f2.cmdbuf)
|
||
|
else:
|
||
|
r.submit(f.cmdbuf)
|
||
|
|
||
|
print("==========================================")
|
||
|
print("## Submitted")
|
||
|
print("==========================================")
|
||
|
|
||
|
def t(addr):
|
||
|
paddr = agx.uat.iotranslate(0, addr, 4)[0][0]
|
||
|
if paddr is None:
|
||
|
raise Exception(f"Failed to iotranslate {addr:#x}")
|
||
|
return paddr
|
||
|
|
||
|
regs = {
|
||
|
"ta_cmds": t(agx.initdata.regionB.stats_ta.addrof("total_cmds")),
|
||
|
"ta_ts": t(agx.initdata.regionB.stats_ta.stats.addrof("unk_timestamp")),
|
||
|
}
|
||
|
|
||
|
pend_base = agx.initdata.regionC.addrof("pending_stamps")
|
||
|
for i in range(5):
|
||
|
regs[f"st{i}_info"] = t(pend_base + i*8)
|
||
|
regs[f"st{i}_val"] = t(pend_base + i*8 + 4)
|
||
|
|
||
|
for i in range(4):
|
||
|
regs[f"ta{i}_cq"] = t(agx.initdata.regionB.stats_ta.stats.queues[i].addrof("cur_cmdqueue"))
|
||
|
|
||
|
regs.update({
|
||
|
#"pwr_status": t(agx.initdata.regionB.hwdata_a.addrof("pwr_status")),
|
||
|
#"pstate": t(agx.initdata.regionB.hwdata_a.addrof("cur_pstate")),
|
||
|
#"temp_c": t(agx.initdata.regionB.hwdata_a.addrof("temp_c")),
|
||
|
#"pwr_mw": t(agx.initdata.regionB.hwdata_a.addrof("avg_power_mw")),
|
||
|
#"pwr_ts": t(agx.initdata.regionB.hwdata_a.addrof("update_ts")),
|
||
|
|
||
|
#"unk_10": t(agx.initdata.regionB.hwdata_a.addrof("unk_10")),
|
||
|
#"unk_14": t(agx.initdata.regionB.hwdata_a.addrof("unk_14")),
|
||
|
#"actual_pstate": t(agx.initdata.regionB.hwdata_a.addrof("actual_pstate")),
|
||
|
#"tgt_pstate": t(agx.initdata.regionB.hwdata_a.addrof("tgt_pstate")),
|
||
|
#"unk_40": t(agx.initdata.regionB.hwdata_a.addrof("unk_40")),
|
||
|
#"unk_44": t(agx.initdata.regionB.hwdata_a.addrof("unk_44")),
|
||
|
#"unk_48": t(agx.initdata.regionB.hwdata_a.addrof("unk_48")),
|
||
|
#"freq_mhz": t(agx.initdata.regionB.hwdata_a.addrof("freq_mhz")),
|
||
|
|
||
|
#"unk_748.0": t(agx.initdata.regionB.hwdata_a.addrof("unk_748")),
|
||
|
#"unk_748.1": t(agx.initdata.regionB.hwdata_a.addrof("unk_748")+4),
|
||
|
#"unk_748.2": t(agx.initdata.regionB.hwdata_a.addrof("unk_748")+8),
|
||
|
#"unk_748.3": t(agx.initdata.regionB.hwdata_a.addrof("unk_748")+12),
|
||
|
#"use_percent": t(agx.initdata.regionB.hwdata_a.addrof("use_percent")),
|
||
|
#"unk_83c": t(agx.initdata.regionB.hwdata_a.addrof("unk_83c")),
|
||
|
#"freq_with_off": t(agx.initdata.regionB.hwdata_a.addrof("freq_with_off")),
|
||
|
#"unk_ba0": t(agx.initdata.regionB.hwdata_a.addrof("unk_ba0")),
|
||
|
#"unk_bb0": t(agx.initdata.regionB.hwdata_a.addrof("unk_bb0")),
|
||
|
#"unk_c44": t(agx.initdata.regionB.hwdata_a.addrof("unk_c44")),
|
||
|
#"unk_c58": t(agx.initdata.regionB.hwdata_a.addrof("unk_c58")),
|
||
|
|
||
|
#"unk_3ca0": t(agx.initdata.regionB.hwdata_a.addrof("unk_3ca0")),
|
||
|
#"unk_3ca8": t(agx.initdata.regionB.hwdata_a.addrof("unk_3ca8")),
|
||
|
#"unk_3cb0": t(agx.initdata.regionB.hwdata_a.addrof("unk_3cb0")),
|
||
|
#"ts_last_idle": t(agx.initdata.regionB.hwdata_a.addrof("ts_last_idle")),
|
||
|
#"ts_last_poweron": t(agx.initdata.regionB.hwdata_a.addrof("ts_last_poweron")),
|
||
|
#"ts_last_poweroff": t(agx.initdata.regionB.hwdata_a.addrof("ts_last_poweroff")),
|
||
|
#"unk_3cd0": t(agx.initdata.regionB.hwdata_a.addrof("unk_3cd0")),
|
||
|
|
||
|
"halt_count": t(agx.initdata.fw_status.addrof("halt_count")),
|
||
|
"halted": t(agx.initdata.fw_status.addrof("halted")),
|
||
|
"resume": t(agx.initdata.fw_status.addrof("resume")),
|
||
|
"unk_40": t(agx.initdata.fw_status.addrof("unk_40")),
|
||
|
"unk_ctr": t(agx.initdata.fw_status.addrof("unk_ctr")),
|
||
|
"unk_60": t(agx.initdata.fw_status.addrof("unk_60")),
|
||
|
"unk_70": t(agx.initdata.fw_status.addrof("unk_70")),
|
||
|
"c_118c0": t(agx.initdata.regionC._addr + 0x118c0),
|
||
|
"c_118c4": t(agx.initdata.regionC._addr + 0x118c4),
|
||
|
"c_118c8": t(agx.initdata.regionC._addr + 0x118c8),
|
||
|
"c_118cc": t(agx.initdata.regionC._addr + 0x118cc),
|
||
|
"c_118d0": t(agx.initdata.regionC._addr + 0x118d0),
|
||
|
"c_118d4": t(agx.initdata.regionC._addr + 0x118d4),
|
||
|
"c_118d8": t(agx.initdata.regionC._addr + 0x118d8),
|
||
|
"c_118dc": t(agx.initdata.regionC._addr + 0x118dc),
|
||
|
"3d_cmds": t(agx.initdata.regionB.stats_3d.addrof("total_cmds")),
|
||
|
#"3d_tvb_oflws_1": t(agx.initdata.regionB.stats_3d.stats.addrof("tvb_overflows_1")),
|
||
|
#"3d_tvb_oflws_2": t(agx.initdata.regionB.stats_3d.stats.addrof("tvb_overflows_2")),
|
||
|
"3d_cur_stamp_id": t(agx.initdata.regionB.stats_3d.stats.addrof("cur_stamp_id")),
|
||
|
"3d_ts": t(agx.initdata.regionB.stats_3d.stats.addrof("unk_timestamp")),
|
||
|
#"3d_cur_stamp_id": t(agx.initdata.regionB.stats_3d.stats.addrof("cur_stamp_id")),
|
||
|
})
|
||
|
|
||
|
for i in range(4):
|
||
|
regs[f"3d{i}_cq"] = t(agx.initdata.regionB.stats_3d.stats.queues[i].addrof("cur_cmdqueue"))
|
||
|
|
||
|
|
||
|
for i, r in enumerate(renderers):
|
||
|
regs.update({
|
||
|
f"r{i}_3d_done": t(r.wq_3d.info.pointers.addrof("gpu_doneptr")),
|
||
|
#f"r{i}_3d_rptr": t(r.wq_3d.info.pointers.addrof("gpu_rptr")),
|
||
|
f"r{i}_3d_busy": t(r.wq_3d.info.addrof("busy")),
|
||
|
#f"r{i}_3d_blk": t(r.wq_3d.info.addrof("blocked_on_barrier")),
|
||
|
#f"r{i}_3d_2c": t(r.wq_3d.info.addrof("unk_2c")),
|
||
|
#f"r{i}_3d_54": t(r.wq_3d.info.addrof("unk_54")),
|
||
|
|
||
|
f"r{i}_ta_done": t(r.wq_ta.info.pointers.addrof("gpu_doneptr")),
|
||
|
#f"r{i}_ta_rptr": t(r.wq_ta.info.pointers.addrof("gpu_rptr")),
|
||
|
f"r{i}_ta_busy": t(r.wq_ta.info.addrof("busy")),
|
||
|
#f"r{i}_ta_blk": t(r.wq_ta.info.addrof("blocked_on_barrier")),
|
||
|
#f"r{i}_ta_2c": t(r.wq_ta.info.addrof("unk_2c")),
|
||
|
#f"r{i}_ta_54": t(r.wq_ta.info.addrof("unk_54")),
|
||
|
f"r{i}_f{j}_ta_stamp1": t(r.stamp_ta1._addr),
|
||
|
f"r{i}_ta_stamp2":t(r.stamp_ta2._addr),
|
||
|
f"r{i}_f{j}_3d_stamp1": t(r.stamp_3d1._addr),
|
||
|
f"r{i}_3d_stamp2":t(r.stamp_3d2._addr),
|
||
|
})
|
||
|
|
||
|
for j in range(FRAMES):
|
||
|
work = r.work[j]
|
||
|
regs.update({
|
||
|
f"r{i}_f{j}_3d_ts": t(work.wc_3d.ts1._addr),
|
||
|
f"r{i}_f{j}_ta_ts": t(work.wc_ta.ts1._addr),
|
||
|
})
|
||
|
|
||
|
div=4
|
||
|
ticks = 24000000 // div * 25
|
||
|
|
||
|
la = GPIOLogicAnalyzer(u, regs=regs, cpu=analyzer_cpu, div=div)
|
||
|
|
||
|
|
||
|
print("==========================================")
|
||
|
print("## Poll prior to job start")
|
||
|
print("==========================================")
|
||
|
|
||
|
#mon.poll()
|
||
|
#agx.poll_objects()
|
||
|
|
||
|
|
||
|
print("==========================================")
|
||
|
print("## After start")
|
||
|
print("==========================================")
|
||
|
#agx.poll_objects()
|
||
|
|
||
|
#mon.poll()
|
||
|
print("==========================================")
|
||
|
print("## Waiting")
|
||
|
print("==========================================")
|
||
|
|
||
|
print("Queues:")
|
||
|
for i, r in enumerate(renderers):
|
||
|
print(f" Renderer {i}")
|
||
|
print(f" TA: {r.wq_ta.info._addr:#x} (stamp {r.work[0].ev_ta.id})")
|
||
|
#print(r.wq_ta.info)
|
||
|
print(f" 3D: {r.wq_3d.info._addr:#x} (stamp {r.work[0].ev_3d.id})")
|
||
|
#print(r.wq_3d.info)
|
||
|
|
||
|
print("==========================================")
|
||
|
print("## Run")
|
||
|
print("==========================================")
|
||
|
|
||
|
la.start(ticks, bufsize=0x8000000)
|
||
|
|
||
|
try:
|
||
|
for r in renderers[:RENDERERS]:
|
||
|
r.run()
|
||
|
|
||
|
for r in renderers[:RENDERERS]:
|
||
|
while not r.ev_3d.fired:
|
||
|
agx.asc.work()
|
||
|
agx.poll_channels()
|
||
|
print("==========================================")
|
||
|
agx.poll_objects()
|
||
|
mon.poll()
|
||
|
|
||
|
r.wait()
|
||
|
|
||
|
#agx.poll_objects()
|
||
|
|
||
|
#print("==========================================")
|
||
|
#print("## Stop ASC")
|
||
|
#print("==========================================")
|
||
|
|
||
|
#agx.asc.stop()
|
||
|
|
||
|
##time.sleep(0.1)
|
||
|
|
||
|
##agx.poll_objects()
|
||
|
|
||
|
#print("==========================================")
|
||
|
#print("## Start ASC")
|
||
|
#print("==========================================")
|
||
|
|
||
|
#agx.asc.start()
|
||
|
|
||
|
##agx.poll_objects()
|
||
|
|
||
|
#print("==========================================")
|
||
|
#print("## Run 2")
|
||
|
#print("==========================================")
|
||
|
|
||
|
#for r in renderers[RENDERERS//2:]:
|
||
|
#r.run()
|
||
|
|
||
|
#for r in renderers[RENDERERS//2:]:
|
||
|
#while not r.ev_3d.fired:
|
||
|
#agx.asc.work()
|
||
|
#agx.poll_channels()
|
||
|
#print("==========================================")
|
||
|
|
||
|
#r.wait()
|
||
|
|
||
|
#agx.poll_objects()
|
||
|
|
||
|
#mon.poll()
|
||
|
|
||
|
finally:
|
||
|
#agx.poll_objects()
|
||
|
#mon.poll()
|
||
|
|
||
|
la.complete()
|
||
|
la.show()
|
||
|
|
||
|
time.sleep(2)
|
||
|
|