m1n1/avd: Add initial AVD driver prototype/tracer

Not much to see here, most of the juice is over at:

    https://github.com/eiln/avd.git

The kernel driver (m1n1.fw.avd) only really pipes the instruction stream
into the respective hardware FIFOs and then hushes the interrupt lines.
Most of the work (bitstream syntax parsing and instruction generation)
is done in the avid repo above.

I'm hoping to keep this userland-kernel separation in the very imminent
actual driver.

experiments/avd.py: Decode on the command line. Read file for usage.
experiments/avd_e.py: Decode via emulated instruction stream.
experiments/avd_f.py: Decode via Cortex-M3 firmware (for debugging).
hv/trace_avd.py: Tracer. Read file for usage.
m1n1/fw/avd/__init__.py: Driver base class (power, tunables, etc).
m1n1/fw/avd/decoder.py: Codec-specific decode logic + mini media player.

Signed-off-by: Eileen Yoon <eyn@gmx.com>
This commit is contained in:
Eileen Yoon 2023-10-20 05:29:34 +09:00 committed by Hector Martin
parent 90eef7223e
commit 16a6b41c98
6 changed files with 1013 additions and 0 deletions

86
proxyclient/experiments/avd.py Executable file
View file

@ -0,0 +1,86 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: MIT
import sys, pathlib, argparse
sys.path.append(str(pathlib.Path(__file__).resolve().parents[1]))
sys.path.append("/home/eileen/asahi/avd") # git clone https://github.com/eiln/avd.git
# Decode with our own generated instruction stream
from m1n1.setup import *
from m1n1.utils import *
from m1n1.fw.avd import *
from tools.common import ffprobe
if __name__ == "__main__":
parser = argparse.ArgumentParser()
#
# Usage:
# ffmpeg -i input.mp4 -c:v copy (or reencode libx264) input.264
# python3 experiments/avd.py -i input.264 -a
#
# - Supports .264, .265, and .ivf formats.
# - Regarding the codec, it's whatever codec features are supported.
# Check avid for details.
# - Also ensure to change the sys.path.append above to the avid repo
# as it does not install system-wide.
#
parser.add_argument('-i', '--input', type=str, required=True, help="path to input bitstream")
parser.add_argument('-n', '--num', type=int, default=1, help="frame count")
parser.add_argument('-a', '--all', action='store_true', help="run all frames")
parser.add_argument('-x', '--stfu', action='store_true')
parser.add_argument('-p', '--poll', action='store_true', help="poll iommu space")
parser.add_argument('--save-raw', type=str, default="", help="file name to save raw yuv")
parser.add_argument('--save-images', type=str, default="", help="dirname to save images")
args = parser.parse_args()
mode = ffprobe(args.input)
if (mode == "h264"):
from avid.h264.decoder import AVDH264Decoder
dec = AVDH264Decoder()
elif (mode == "h265"):
from avid.h265.decoder import AVDH265Decoder
dec = AVDH265Decoder()
elif (mode == "vp09"):
from avid.vp9.decoder import AVDVP9Decoder
dec = AVDVP9Decoder()
else:
raise RuntimeError("unsupported codec")
if (args.stfu):
dec.stfu = True
dec.hal.stfu = True
nal_stop = 0 if args.all else 1
units = dec.setup(args.input, nal_stop=nal_stop, num=args.num)
avd = AVDDevice(u)
if (mode == "h264"):
avd.decoder = AVDH264Dec(avd)
elif (mode == "h265"):
avd.decoder = AVDH265Dec(avd)
elif (mode == "vp09"):
avd.decoder = AVDVP9Dec(avd)
else:
raise RuntimeError("unsupported codec")
avd.decoder.winname = args.input
if (args.stfu):
avd.stfu = True
avd.boot()
rawvideo = b''
num = len(units) if args.all else min(args.num, len(units))
for i,unit in enumerate(units[:num]):
print(unit)
inst = dec.decode(unit)
if (i == 0):
avd.ioalloc_at(0x0, dec.allocator_top(), stream=0, val=0)
if (args.poll):
avd.iomon.add(0x0, dec.allocator_top())
frame = avd.decoder.decode(dec.ctx, unit, inst)
if (frame != None):
if (args.save_raw):
rawvideo += frame.y_data + frame.uv_data
if (args.save_images):
os.makedirs(f"data/out/{args.save_images}", exist_ok=True)
path = os.path.join(f"data/out/{args.save_images}", "out%03d.png" % (avd.decoder.count))
cv2.imwrite(path, frame.img)
if (args.save_raw):
path = os.path.join(f"data/out/{args.save_raw}")
open(path, "wb").write(rawvideo)

View file

@ -0,0 +1,79 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: MIT
import sys, pathlib, argparse, os
sys.path.append(str(pathlib.Path(__file__).resolve().parents[1]))
sys.path.append("/home/eileen/asahi/avd") # git clone https://github.com/eiln/avd.git
# Decode via firmware-emulated AVD instruction stream
from m1n1.setup import *
from m1n1.utils import *
from m1n1.fw.avd import *
from avd_emu import AVDEmulator
from tools.common import ffprobe
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--input', type=str, required=True, help="input bitstream")
parser.add_argument('-d', '--dir', type=str, required=True, help="path to trace dir")
parser.add_argument('-f', '--firmware', type=str, default="data/fw.bin", help="path to fw")
parser.add_argument('-n', '--num', type=int, default=1, help="count")
parser.add_argument('-a', '--all', action='store_true', help="run all")
parser.add_argument('-x', '--stfu', action='store_true')
parser.add_argument('-p', '--poll', action='store_true', help="poll iommu space")
parser.add_argument('--save-raw', action='store_true', help="save raw yuv")
args = parser.parse_args()
mode = ffprobe(args.input)
emu = AVDEmulator(args.firmware, stfu=True)
emu.start()
paths = os.listdir(os.path.join(args.dir))
paths = sorted([os.path.join(args.dir, path) for path in paths if "frame" in path])
assert(len(paths))
num = len(paths) if args.all else args.num
num = min(len(paths), num)
if (mode == "h264"):
from avid.h264.decoder import AVDH264Decoder
dec = AVDH264Decoder()
elif (mode == "h265"):
from avid.h265.decoder import AVDH265Decoder
dec = AVDH265Decoder()
elif (mode == "vp09"):
from avid.vp9.decoder import AVDVP9Decoder
dec = AVDVP9Decoder()
else:
raise RuntimeError("unsupported codec")
if (args.stfu):
dec.stfu = True
dec.hal.stfu = True
units = dec.setup(args.input)
avd = AVDDevice(u)
if (mode == "h264"):
avd.decoder = AVDH264Dec(avd)
elif (mode == "h265"):
avd.decoder = AVDH265Dec(avd)
elif (mode == "vp09"):
avd.decoder = AVDVP9Dec(avd)
else:
raise RuntimeError("unsupported codec")
avd.decoder.winname = args.input
if (args.stfu):
avd.stfu = True
avd.boot()
avd.ioalloc_at(0x0, 0xf000000, stream=0)
if (args.poll):
avd.iomon.add(0x0, 0xf000000)
for i,unit in enumerate(units[:num]):
print(unit)
inst = dec.decode(unit)
path = paths[i]
print(path)
inst = emu.avd_cm3_cmd_decode(path)
avd.decoder.decode(dec.ctx, unit, inst)
if (args.save_raw):
y_data = avd.ioread(dec.ctx.y_addr, dec.ctx.luma_size, stream=0)
uv_data = avd.ioread(dec.ctx.uv_addr, dec.ctx.chroma_size, stream=0)
open("data/raw-emu/%03d.bin" % (i), "wb").write(y_data + uv_data)

View file

@ -0,0 +1,63 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: MIT
import sys, pathlib, argparse
sys.path.append(str(pathlib.Path(__file__).resolve().parents[1]))
sys.path.append("/home/eileen/asahi/avd") # git clone https://github.com/eiln/avd.git
# Decode via AVD Cortex-M3 firmware
from m1n1.setup import *
from m1n1.utils import *
from m1n1.fw.avd import *
import cv2
from avd_emu import AVDEmulator
from avid.h264.decoder import AVDH264Decoder
from avid.vp9.decoder import AVDVP9Decoder
from avid.utils import *
from tools.common import *
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('-f','--firmware', type=str, default="data/fw.bin", help="path to CM3 firmware")
parser.add_argument('-i','--input', type=str, required=True, help="path to CM3 firmware")
parser.add_argument('-d','--dir', type=str, required=True, help="frame_params trace dir")
parser.add_argument('-p', '--poll', action='store_true', help="poll iommu space")
args = parser.parse_args()
paths = os.listdir(os.path.join(args.dir))
paths = sorted([os.path.join(args.dir, path) for path in paths if "frame" in path])
assert(len(paths))
avd = AVDDevice(u)
avd.decoder = AVDVP9Dec(avd)
avd.boot()
avd.mcpu_decode_init(args.firmware)
avd.poll()
avd.ioalloc_at(0x0, 0xff0000, stream=0)
if (args.poll):
avd.iomon.add(0x0, 0xff0000)
avd.ioalloc_at(0x0, 0xb84000, stream=1)
avd.iomon.poll()
emu = AVDEmulator(args.firmware, stfu=True)
emu.start()
dec = AVDVP9Decoder()
dec.stfu = True
dec.hal.stfu = True
units = dec.setup(args.input, num=4, do_probs=True)
for n,unit in enumerate(units):
inst = dec.decode(unit)
avd.decoder.set_payload(dec.ctx, units[n])
avd.decoder.avd.iowrite(dec.ctx.probs_addr, unit.get_probs(), stream=0)
avd.iomon.poll()
cmd = emu.set_params(paths[n])
xxde(cmd)
avd.iowrite(0x0, emu.dart1_space, stream=1)
avd.iomon.poll()
avd.avd_wbuf(emu.get_cmd_addr(n), cmd)
avd.avd_w32(0x1098054, emu.get_cmd_addr(n))
avd.poll()
avd.iomon.poll()
img = avd.decoder.get_nv12_disp_frame(dec.ctx)
cv2.imshow(avd.decoder.winname, img); cv2.waitKey(1)

224
proxyclient/hv/trace_avd.py Normal file
View file

@ -0,0 +1,224 @@
# SPDX-License-Identifier: MIT
from m1n1.trace import Tracer
from m1n1.trace.dart import DARTTracer
from m1n1.utils import *
from m1n1.proxyutils import RegMonitor
hv.p.hv_set_time_stealing(0, 1)
# Usage
#
# 2023/12/25: Only tested on J293AP (AVD revision "Viola"/V3).
# Should work on all baseline M1s.
#
# 1. Start tracer under the hypervisor
#
# 2. Send over the bitstream(s) to the target machine
# Supported formats: .264, .265, .ivf (unless you want to add a demuxer to avid/codecs)
#
# 3. The tracer is purposely not activated at boot. As of 13.5, it takes ~2 HEVC runs
# to get to the login screen - it's probably decoding the login screen. By "activate",
# I mean the tracer variable "outdir" is NULLed s.t. the tracer will not save the
# traced data and merely log the IPC transactions.
#
# [cpu0] [AVDTracer@/arm-io/avd] sent fw command at 0x108eb30
# 00000000 00000801 00030007 0050c000 000002a8 00000003 01091a70 01091a78 00000001
# 00000020 0050caa4 01091af0 01091bc0 01091c50 0050c2a4 0050c210 0050c28c 00000000
# 00000040 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
# [cpu0] [0xfffffe00149d9984] MMIO: W.4 0x269098054 (avd[0], offset 0x1098054) = 0x108eb30
# [cpu7] [0xfffffe00149d99b0] MMIO: R.4 0x269098048 (avd[0], offset 0x1098048) = 0x9
#
# 4. To save the trace contents, break into the hypervisor console and set
#
# >>> tracer.outdir = "matrix_1080X512"
#
# The output data dir will become 'data/[inferred codec name]/$outdir'.
# The data dir will be created if it does not exist. The directory structure is
# meant to look like this:
#
# proxyclient/
# data/
# h264/*
# h265/*
# vp9/*
#
# 5. After the directory name is configured, trigger avd from the target machine:
#
# ffmpeg -hwaccel videotoolbox -i matrix_1080X512.ivf
#
# Or you can access VT directly yourself with some Obj-C (but why would you want to do that..)
# Though I may need to do that to test some sekrit undocumented features.
# The input bitstream is hopefully the matching one as the directory name just set.
#
# 6. If all goes well (i.e. the bitstream is decoding on AVD), the tracer will save:
#
# >> ~/m1n1/proxyclient $ ls data/*
# data/vp9/matrix_1080X512:
# frame.2023-12-17T21:17:47.519048.00004000.bin probs.2023-12-17T21:17:47.519048.00004000.00004000.bin
# frame.2023-12-17T21:17:47.578537.000bc000.bin probs.2023-12-17T21:17:47.578537.000bc000.0000c000.bin
# frame.2023-12-17T21:17:47.633768.00174000.bin probs.2023-12-17T21:17:47.633768.00174000.00014000.bin
# frame.2023-12-17T21:17:47.688067.0022c000.bin probs.2023-12-17T21:17:47.688067.0022c000.0001c000.bin
#
# The "frame" is the macOS source frame_params struct, and this directory is the
# one intended to be supplied to all the avid tools (e.g. emulator, differs).
# For VP9 (and presumably AV1) the tracer will additionally save the "probs" blob.
# You can also bypass FairPlay encryption and save the coded bitstream, but that's
# an exercise left for the reader.
#
# 7. Copy the data directory over to `avd/data/*` & have fun :D
#
# python3 avd_emu.py -i frame.2023-12-17T21:17:47.519048.00004000.bin # emulate single fp
# python3 avd_emu.py -d vp9/matrix_1080X512 -a # emulate all using trace dir name
# python3 tools/test.py -m vp9 -d vp9/matrix_1080X512 -e -a # test against emulated output
#
# Optionally save the firmware to run on the emulator via
# >>> tracer.save_firmware("data/fw.bin")
import datetime
import os
import struct
import time
class AVDTracer(Tracer):
DEFAULT_MODE = TraceMode.SYNC
def __init__(self, hv, dev_path, dart_tracer, verbose=False):
super().__init__(hv, verbose=verbose, ident=type(self).__name__ + "@" + dev_path)
self.dev = hv.adt[dev_path]
self.dart_tracer = dart_tracer
self.base = self.dev.get_reg(0)[0] # 0x268000000
self.p = hv.p
self.u = hv.u
self.dart = dart_tracer.dart
mon = RegMonitor(hv.u)
AVD_REGS = [
#(0x1000000, 0x4000, "unk0"),
#(0x1010000, 0x4000, "dart"),
#(0x1002000, 0x1000, "unk2"),
(0x1070000, 0x4000, "piodma"),
(0x1088000, 0x4000, "sram"),
(0x108c000, 0xc000, "cmd"),
#(0x1098000, 0x4000, "mbox"),
#(0x10a3000, 0x1000, "unka"),
(0x1100000, 0xc000, "config"),
(0x110c000, 0x4000, "dma"),
#(0x1400000, 0x4000, "wrap"),
]
#for (offset, size, name) in AVD_REGS: mon.add(self.base + offset, size, name=name)
self.mon = mon
iomon = RegMonitor(hv.u, ascii=True)
iomon1 = RegMonitor(hv.u, ascii=True)
def readmem_iova(addr, size, readfn=None):
try:
return dart_tracer.dart.ioread(0, addr, size)
except Exception as e:
print(e)
return None
iomon.readmem = readmem_iova
def readmem_iova(addr, size, readfn=None):
try:
return dart_tracer.dart.ioread(1, addr, size)
except Exception as e:
print(e)
return None
iomon1.readmem = readmem_iova
self.iomon = iomon
self.iomon1 = iomon1
self.state_active = False
self.outdir = ""
def avd_r32(self, off): return self.p.read32(self.base + off)
def avd_w32(self, off, x): return self.p.write32(self.base + off, x)
def avd_r64(self, off): return self.p.read64(self.base + off)
def avd_w64(self, off, x): return self.p.write64(self.base + off, x)
def start(self):
self.hv.trace_range(irange(self.dev.get_reg(0)[0], self.dev.get_reg(0)[1]), mode=TraceMode.SYNC)
self.hv.trace_range(irange(self.base + 0x1080000, 0x18000), False)
self.hv.add_tracer(irange(self.base + 0x1098054, 4), "avd-mbox-54", TraceMode.SYNC, self.evt_rw_hook, self.w_AVD_MBOX_0054)
self.hv.add_tracer(irange(self.base + 0x1098064, 4), "avd-mbox-64", TraceMode.SYNC, self.r_AVD_MBOX_0064, self.evt_rw_hook)
def poll(self):
self.mon.poll()
self.iomon.poll()
self.iomon1.poll()
def evt_rw_hook(self, x):
self.poll()
def w_AVD_MBOX_0054(self, x):
if ((x.data >= 0x1080000) and (x.data <= 0x10a0000)):
self.log("Sent fw command at 0x%x" % (x.data))
self.poll()
cmd = self.read_regs(self.base + x.data, 0x60)
chexdump32(cmd)
opcode = struct.unpack("<I", cmd[:4])[0] & 0xf
if (opcode == 0):
self.log("Command start")
self.state_active = True
self.access_idx = 0
elif (opcode == 1):
frame_params_iova = self.p.read32(self.base + x.data + 0x8)
if (self.outdir) and (frame_params_iova != 0x0):
t = datetime.datetime.now().isoformat()
frame_params = self.dart.ioread(1, frame_params_iova, 0xb0000)
word = self.p.read32(self.base + x.data)
if (word & 0x000) == 0x000: # h265
name = "h265"
elif (word & 0x400) == 0x400: # h264
name = "h264"
elif (word & 0x800) == 0x800: # vp9
name = "vp9"
else:
name = "unk"
outdir = os.path.join("data", name, self.outdir)
os.makedirs(outdir, exist_ok=True)
open(os.path.join(outdir, f'frame.{t}.{frame_params_iova:08x}.bin'), "wb").write(frame_params)
if (word & 0x800) == 0x800: # save probs for vp9
iova = [0x4000, 0xc000, 0x14000, 0x1c000][self.access_idx % 4]
open(os.path.join(outdir, f'probs.{t}.{frame_params_iova:08x}.{iova:08x}.bin'), "wb").write(self.dart.ioread(0, iova, 0x4000))
self.access_idx += 1
elif (opcode == 2):
self.log("Command end")
self.state_active = False
self.access_idx = 0
def r_AVD_MBOX_0064(self, x):
if ((x.data >= 0x1080000) and (x.data <= 0x10a0000)):
self.log("Received fw command at 0x%x" % (x.data))
cmd = self.read_regs(self.base + x.data, 0x60)
chexdump32(cmd)
self.poll()
def read_regs(self, addr, size):
scratch = self.u.malloc(size)
p.memcpy32(scratch, addr, size)
return self.p.iface.readmem(scratch, size)
def read_iova(self, start, end, stream=0):
data = b''
for i in range((end - start) // 0x4000):
try:
d = self.dart_tracer.dart.ioread(stream, start + (i * 0x4000), 0x4000)
except:
d = b'\0' * 0x4000
data += d
return data
def save_firmware(self, path="fw.bin"):
firmware = self.read_regs(self.base + 0x1080000, 0x10000)
open(path, "wb").write(firmware)
p.pmgr_adt_clocks_enable('/arm-io/dart-avd')
p.pmgr_adt_clocks_enable('/arm-io/avd')
dart_tracer = DARTTracer(hv, "/arm-io/dart-avd", verbose=0)
dart_tracer.start()
dart = dart_tracer.dart
tracer = AVDTracer(hv, '/arm-io/avd', dart_tracer, verbose=3)
tracer.start()

View file

@ -0,0 +1,311 @@
# SPDX-License-Identifier: MIT
from ...hw.dart import DART
from ...proxyutils import RegMonitor
from ...utils import *
from .decoder import *
import contextlib
import struct
class AVDDevice:
def __init__(self, u, dev_path="/arm-io/avd", dart_path="/arm-io/dart-avd"):
self.u = u
self.p = u.proxy
self.iface = u.iface
self.PAGE_SIZE = 0x4000
self.base = u.adt[dev_path].get_reg(0)[0] # 0x268000000
self.node = u.adt[dev_path]
self.p.pmgr_adt_clocks_enable(dev_path)
self.p.pmgr_adt_clocks_enable(dart_path)
dart = DART.from_adt(u, dart_path)
dart.initialize()
self.dart = dart
mon = RegMonitor(u)
AVD_REGS = [
#(0x1000000, 0x4000, "unk0"),
#(0x1010000, 0x4000, "dart"),
#(0x1002000, 0x1000, "unk2"),
#(0x1070000, 0x4000, "piodma"),
#(0x108c000, 0xc000, "cmd"),
#(0x1098000, 0x4000, "mbox"),
#(0x10a3000, 0x1000, "unka"),
(0x1100000, 0xc000, "dec"),
(0x110c000, 0x4000, "dma"),
#(0x1400000, 0x4000, "wrap"),
]
for x in AVD_REGS:
mon.add(self.base + x[0], x[1], name=x[2])
self.mon = mon
iomon = RegMonitor(u, ascii=True)
iomon1 = RegMonitor(u, ascii=True)
def readmem_iova(addr, size, readfn=None):
try:
return self.dart.ioread(0, addr, size)
except Exception as e:
print(e)
return None
def readmem_iova1(addr, size, readfn=None):
try:
return self.dart.ioread(1, addr, size)
except Exception as e:
print(e)
return None
iomon.readmem = readmem_iova
iomon1.readmem = readmem_iova1
self.iomon = iomon
self.iomon1 = iomon1
self.stfu = False
self.decoder = AVDDec(self)
def log(self, x): print(f"[AVD] {x}")
def poll(self): self.mon.poll()
def avd_r32(self, off): return self.p.read32(self.base + off)
def avd_w32(self, off, x):
if (not self.stfu):
self.log("w32(0x%x, 0x%x)" % (off, x))
return self.p.write32(self.base + off, x)
def avd_r64(self, off): return self.p.read64(self.base + off)
def avd_w64(self, off, x): return self.p.write64(self.base + off, x)
def avd_wbuf(self, off, buf):
for n in range(len(buf) // 4):
x = struct.unpack("<I", buf[n*4:(n+1)*4])[0]
self.p.write32(self.base + off + (n*4), x)
def boot(self):
self.avd_w32(0x1000000, 0xfff)
self.p.mask32(0x269010060, self.p.read32(0x269010060), 0x80016100)
self.p.mask32(0x269010068, self.p.read32(0x269010068), 0xf0f0f)
self.p.mask32(0x26901006c, self.p.read32(0x26901006c), 0x80808)
self.p.memset32(self.base + 0x1080000, 0, 0xc000) # CODE
self.p.memset32(self.base + 0x108c000, 0, 0xc000) # SRAM
with contextlib.redirect_stdout(None):
self.wrap_ctrl_device_init()
self.avd_dma_tunables_stage0()
self.poll()
def avd_mcpu_start(self):
avd_r32 = self.avd_r32; avd_w32 = self.avd_w32
avd_w32(0x1098008, 0xe)
avd_w32(0x1098010, 0x0)
avd_w32(0x1098048, 0x0)
avd_w32(0x1098010, 0x0)
avd_w32(0x1098048, 0x0)
avd_w32(0x1098050, 0x1)
avd_w32(0x1098068, 0x1)
avd_w32(0x109805c, 0x1)
avd_w32(0x1098074, 0x1)
avd_w32(0x1098010, 0x2) # Enable mailbox interrupts
avd_w32(0x1098048, 0x8) # Enable mailbox interrupts
avd_w32(0x1098008, 0x1)
assert((avd_r32(0x1098090) == 0x1))
self.avd_w32(0x1400014, 0x0)
def mcpu_boot(self, fw):
if (isinstance(fw, str)):
fw = open(fw, "rb").read()[:0xc000]
else:
fw = fw[:0xc000]
self.avd_wbuf(0x1080000, fw)
self.avd_mcpu_start()
def mcpu_decode_init(self, fw):
self.mcpu_boot(fw=fw)
dump = """
26908ee80: 00000000 00000000 00000000 00000000 04020002 00020002 04020002 04020002
26908eea0: 04020002 00070007 00070007 00070007 00070007 00070007 04020002 00020002
26908eec0: 04020002 04020002 04020002 00070007 00070007 00070007 00070007 00070007
26908eee0: 04020002 02020202 04020002 04020002 04020202 00070007 00070007 00070007
26908ef00: 00070007 00070007 00000000 00000000 00000000 00000000 00000000 00000000
"""
for line in dump.strip().splitlines():
offset = int(line.split()[0].replace(":", ""), 16)
vals = line.split()[1:]
for n,arg in enumerate(vals[:8]):
self.avd_w32(offset + (n*4) - self.base, int(arg, 16))
self.avd_w32(0x1098054, 0x108eb30)
def wrap_ctrl_device_init(self):
avd_w32 = self.avd_w32
avd_w32(0x1400014, 0x1)
avd_w32(0x1400018, 0x1)
avd_w32(0x1070000, 0x0) # PIODMA cfg
avd_w32(0x1104064, 0x3)
avd_w32(0x110cc90, 0xffffffff) # IRQ clear
avd_w32(0x110cc94, 0xffffffff) # IRQ clear
avd_w32(0x110ccd0, 0xffffffff) # IRQ clear
avd_w32(0x110ccd4, 0xffffffff) # IRQ clear
avd_w32(0x110cac8, 0xffffffff) # IRQ clear
avd_w32(0x1070024, 0x26907000)
avd_w32(0x1400014, 0x0) # idle thing
def avd_dma_tunables_stage0(self):
avd_w32 = self.avd_w32; avd_r32 = self.avd_r32
avd_w32(0x1070024, 0x26907000)
avd_w32(0x1400000, 0x3)
avd_w32(0x1104000, 0x0)
avd_w32(0x110405c, 0x0)
avd_w32(0x1104110, 0x0)
avd_w32(0x11040f4, 0x1555)
avd_w32(0x1100000, 0xc0000000)
avd_w32(0x1101000, 0xc0000000)
avd_w32(0x1102000, 0xc0000000)
avd_w32(0x1103000, 0xc0000000)
avd_w32(0x1104000, 0xc0000000)
avd_w32(0x1105000, 0xc0000000)
avd_w32(0x1106000, 0xc0000000)
avd_w32(0x1107000, 0xc0000000)
avd_w32(0x1108000, 0xc0000000)
avd_w32(0x1109000, 0xc0000000)
avd_w32(0x110a000, 0xc0000000)
avd_w32(0x110b000, 0xc0000000)
avd_w32(0x110c010, 0x1)
avd_w32(0x110c018, 0x1)
avd_w32(0x110c040, avd_r32(0x110c040) | 0xc0000000)
avd_w32(0x110c080, avd_r32(0x110c080) | 0xc0000000)
avd_w32(0x110c0c0, avd_r32(0x110c0c0) | 0xc0000000)
avd_w32(0x110c100, avd_r32(0x110c100) | 0xc0000000)
avd_w32(0x110c140, avd_r32(0x110c140) | 0xc0000000)
avd_w32(0x110c180, avd_r32(0x110c180) | 0xc0000000)
avd_w32(0x110c1c0, avd_r32(0x110c1c0) | 0xc0000000)
avd_w32(0x110c200, avd_r32(0x110c200) | 0xc0000000)
avd_w32(0x110c240, avd_r32(0x110c240) | 0xc0000000)
avd_w32(0x110c280, avd_r32(0x110c280) | 0xc0000000)
avd_w32(0x110c2c0, avd_r32(0x110c2c0) | 0xc0000000)
avd_w32(0x110c300, avd_r32(0x110c300) | 0xc0000000)
avd_w32(0x110c340, avd_r32(0x110c340) | 0xc0000000)
avd_w32(0x110c380, avd_r32(0x110c380) | 0xc0000000)
avd_w32(0x110c3c0, avd_r32(0x110c3c0) | 0xc0000000)
avd_w32(0x110c400, avd_r32(0x110c400) | 0xc0000000)
avd_w32(0x110c440, avd_r32(0x110c440) | 0xc0000000)
avd_w32(0x110c480, avd_r32(0x110c480) | 0xc0000000)
avd_w32(0x110c4c0, avd_r32(0x110c4c0) | 0xc0000000)
avd_w32(0x110c500, avd_r32(0x110c500) | 0xc0000000)
avd_w32(0x110c540, avd_r32(0x110c540) | 0xc0000000)
avd_w32(0x110c580, avd_r32(0x110c580) | 0xc0000000)
avd_w32(0x110c5c0, avd_r32(0x110c5c0) | 0xc0000000)
avd_w32(0x110c600, avd_r32(0x110c600) | 0xc0000000)
avd_w32(0x110c640, avd_r32(0x110c640) | 0xc0000000)
avd_w32(0x110c680, avd_r32(0x110c680) | 0xc0000000)
avd_w32(0x110c6c0, avd_r32(0x110c6c0) | 0xc0000000)
avd_w32(0x110c700, avd_r32(0x110c700) | 0xc0000000)
avd_w32(0x110c740, avd_r32(0x110c740) | 0xc0000000)
avd_w32(0x110c780, avd_r32(0x110c780) | 0xc0000000)
avd_w32(0x110c7c0, avd_r32(0x110c7c0) | 0xc0000000)
avd_w32(0x110c800, avd_r32(0x110c800) | 0xc0000000)
avd_w32(0x110c840, avd_r32(0x110c840) | 0xc0000000)
avd_w32(0x110c880, avd_r32(0x110c880) | 0xc0000000)
avd_w32(0x110c8c0, avd_r32(0x110c8c0) | 0xc0000000)
avd_w32(0x110c900, avd_r32(0x110c900) | 0xc0000000)
avd_w32(0x110c940, avd_r32(0x110c940) | 0xc0000000)
avd_w32(0x110c980, avd_r32(0x110c980) | 0xc0000000)
avd_w32(0x110c9c0, avd_r32(0x110c9c0) | 0xc0000000)
avd_w32(0x110ca00, avd_r32(0x110ca00) | 0xc0000000)
avd_w32(0x110ca40, avd_r32(0x110ca40) | 0xc0000000)
avd_w32(0x110ca80, avd_r32(0x110ca80) | 0xc0000000)
avd_w32(0x110cac0, avd_r32(0x110cac0) | 0xc0000000)
avd_w32(0x110cb00, avd_r32(0x110cb00) | 0xc0000000)
avd_w32(0x110cb40, avd_r32(0x110cb40) | 0xc0000000)
avd_w32(0x110cb80, avd_r32(0x110cb80) | 0xc0000000)
avd_w32(0x110cbc0, avd_r32(0x110cbc0) | 0xc0000000)
avd_w32(0x110cc00, avd_r32(0x110cc00) | 0xc0000000)
avd_w32(0x110cc40, avd_r32(0x110cc40) | 0xc0000000)
avd_w32(0x110cc80, avd_r32(0x110cc80) | 0xc0000000)
avd_w32(0x110ccc0, avd_r32(0x110ccc0) | 0xc0000000)
avd_w32(0x110cd00, avd_r32(0x110cd00) | 0xc0000003)
avd_w32(0x110c044, 0x40)
avd_w32(0x110c084, 0x400040)
avd_w32(0x110c244, 0x800034)
avd_w32(0x110c284, 0x18)
avd_w32(0x110c2c4, 0xb40020)
avd_w32(0x110c3c4, 0xd40030)
avd_w32(0x110c404, 0x180014)
avd_w32(0x110c444, 0x104001c)
avd_w32(0x110c484, 0x2c0014)
avd_w32(0x110c4c4, 0x1200014)
avd_w32(0x110c504, 0x400018)
avd_w32(0x110c544, 0x1340024)
avd_w32(0x110c584, 0x580014)
avd_w32(0x110c5c4, 0x1580014)
avd_w32(0x110c1c4, 0x6c0048)
avd_w32(0x110c204, 0xb40048)
avd_w32(0x110c384, 0xfc0038)
avd_w32(0x110c604, 0x1340030)
avd_w32(0x110c644, 0x16c00b0)
avd_w32(0x110c684, 0x21c00b0)
avd_w32(0x110c844, 0x164001c)
avd_w32(0x110c884, 0x2cc0028)
avd_w32(0x110c744, 0x1800018)
avd_w32(0x110c784, 0x2f40020)
avd_w32(0x110c7c4, 0x1980018)
avd_w32(0x110c804, 0x314001c)
avd_w32(0x110c8c4, 0x1b00024)
avd_w32(0x110c904, 0x3300040)
avd_w32(0x110c944, 0x1d4001c)
avd_w32(0x110c984, 0x370002c)
avd_w32(0x110c9c4, 0x1f00030)
avd_w32(0x110ca04, 0x39c003c)
avd_w32(0x110ca44, 0x2200014)
avd_w32(0x110ca84, 0x3d80014)
avd_w32(0x110cb04, 0x2340014)
avd_w32(0x110cb44, 0x3ec0014)
avd_w32(0x110cac4, 0x2480080)
avd_w32(0x110cc8c, 0x2c80014)
avd_w32(0x110cccc, 0x2dc0014)
avd_w32(0x110cc88, 0x2f00060)
avd_w32(0x110ccc8, 0x3500054)
avd_w32(0x110cb84, 0x3a4001c)
avd_w32(0x110cbc4, 0x4000040)
avd_w32(0x110cc04, 0x3c00040)
avd_w32(0x110cc44, 0x44000c0)
avd_w32(0x110405c, avd_r32(0x110405c) | 0x500000)
avd_w32(0x109807c, 0x1)
avd_w32(0x1098080, 0xffffffff)
def ioread(self, iova, size, stream=0):
data = self.dart.ioread(stream, iova & 0xFFFFFFFFFF, size)
return data
def iowrite(self, iova, data, stream=0):
self.dart.iowrite(stream, iova & 0xFFFFFFFFFF, data)
def iomap_at(self, iova, phys, size, stream):
self.dart.iomap_at(stream, iova & 0xFFFFFFFFFF, phys, size)
def ioalloc_at(self, iova, size, stream=0, val=0):
phys = self.u.heap.memalign(self.PAGE_SIZE, size)
self.p.memset32(phys, val, size)
self.dart.iomap_at(stream, iova & 0xFFFFFFFFFF, phys, size)
def iowrite32(self, iova, val, stream=0):
data = struct.pack("<I", val)
self.dart.iowrite(stream, iova & 0xFFFFFFFFFF, data)
def ioread32(self, iova, stream=0):
data = self.dart.ioread(stream, iova & 0xFFFFFFFFFF, 0x4)
return struct.unpack("<I", data)[0]

View file

@ -0,0 +1,250 @@
# SPDX-License-Identifier: MIT
import cv2
import numpy as np
import time
import os
MAX_TRIES = 100
def round_up(x, y): return ((x + (y - 1)) & (-y))
def round_down(x, y): return (x - (x % y))
class AVDFrame:
def __init__(self, img, sl, y_data, uv_data):
self.img = img
self.sl = sl
self.y_data = y_data
self.uv_data = uv_data
class AVDDec:
def __init__(self, avd):
self.avd = avd
self.frames = []
self.last_poc = -1
self.winname = "img"
self.count = 0
def log(self, x):
return self.avd.log(x)
def setup_dma(self, ctx, sl):
avd_w32 = self.avd.avd_w32; avd_r32 = self.avd.avd_r32
#self.avd.avd_dma_tunables_stage0()
assert((ctx.inst_fifo_idx >= 0) and (ctx.inst_fifo_idx < ctx.inst_fifo_count))
avd_w32(0x1104068 + (ctx.inst_fifo_idx * 0x4), ctx.inst_fifo_iova >> 8)
avd_w32(0x1104084 + (ctx.inst_fifo_idx * 0x4), 0x100000)
avd_w32(0x11040a0 + (ctx.inst_fifo_idx * 0x4), 0x0)
avd_w32(0x11040bc + (ctx.inst_fifo_idx * 0x4), 0x0)
if (sl.mode == "h265"):
x = 0x7
avd_w32(0x1104040, 0x0)
if (sl.mode == "h264"):
x = 0x1c00
avd_w32(0x1104048, 0x0)
if (sl.mode == "vp09"):
x = 0x38000
avd_w32(0x110404c, 0x0)
avd_w32(0x110405c, avd_r32(0x110405c) | x)
self.avd.poll()
def get_nv12_disp_frame(self, ctx, sl, r=2):
w = round_up(ctx.orig_width, 16)
h = round_up(ctx.orig_height, 16)
y_data = self.avd.ioread(ctx.y_addr & 0xffffff00, w * h, stream=0)
y = np.frombuffer(y_data, dtype=np.uint8).reshape((h, w))
uv_data = self.avd.ioread(ctx.uv_addr & 0xffffff00, w * (h // 2), stream=0)
uv = np.frombuffer(uv_data, dtype=np.uint8).reshape((h // 2, w))
u2 = cv2.resize(uv[:,::2], (w, h), interpolation=cv2.INTER_AREA)
v2 = cv2.resize(uv[:,1::2], (w, h), interpolation=cv2.INTER_AREA)
yuv = np.stack((y, u2, v2), axis=-1)
img = cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR)[:ctx.orig_height, :ctx.orig_width, :]
return AVDFrame(img, sl, y_data, uv_data)
def get_nv12_disp_frame2(self, ctx, sl):
fmt = ctx.fmt
w = fmt.in_width
h = fmt.in_height
y_data = self.avd.ioread(ctx.y_addr & 0xffffff00, w * h, stream=0)
y = np.frombuffer(y_data, dtype=np.uint8).reshape((h, w))
if (fmt.chroma <= 1):
ch = h // 2
elif (fmt.chroma == 2):
ch = h
uv_data = self.avd.ioread(ctx.uv_addr & 0xffffff00, w * ch, stream=0)
uv = np.frombuffer(uv_data, dtype=np.uint8).reshape((ch, w))
u2 = cv2.resize(uv[:,::2], (w, h), interpolation=cv2.INTER_AREA)
v2 = cv2.resize(uv[:,1::2], (w, h), interpolation=cv2.INTER_AREA)
yuv = np.stack((y, u2, v2), axis=-1)
img = cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR)[fmt.y0:fmt.y1, fmt.x0:fmt.x1,:]
return AVDFrame(img, sl, y_data, uv_data)
def set_insn(self, x):
raise ValueError()
def set_payload(self, ctx, sl):
self.avd.iowrite(ctx.slice_data_addr, sl.get_payload(), stream=0)
self.avd.iomon.poll()
def get_disp_frame(self, ctx, sl):
raise ValueError()
def display(self, frame):
cv2.imshow(self.winname, frame.img); cv2.waitKey(1)
if (frame.sl.mode == "h264") or (frame.sl.mode == "h265"):
self.last_poc = frame.sl.pic.poc
else:
self.last_poc = 0
self.frames = [f for f in self.frames if f != frame]
self.count += 1
def select_disp_frame(self, ctx, sl):
return self.frames[0]
def decode(self, ctx, sl, inst_stream):
if not inst_stream: return
self.set_payload(ctx, sl)
self.setup_dma(ctx, sl)
for x in inst_stream:
v = x if isinstance(x, int) else x.val
self.set_insn(v)
self.get_disp_frame(ctx, sl)
assert(self.avd.avd_r32(0x1104060) == 0x2842108)
if (hasattr(ctx, "fmt")):
frame = self.get_nv12_disp_frame2(ctx, sl)
else:
frame = self.get_nv12_disp_frame(ctx, sl)
self.frames.append(frame)
frame = self.select_disp_frame(ctx, sl)
if (frame != None):
self.display(frame)
return frame
class AVDH265Dec(AVDDec):
def __init__(self, avd):
super().__init__(avd)
def set_insn(self, x):
self.avd.avd_w32(0x1104004, x)
def set_payload(self, ctx, sl):
self.avd.iowrite(sl.payload_addr, sl.get_payload(), stream=0)
for seg in sl.slices:
self.avd.iowrite(seg.payload_addr, seg.get_payload(), stream=0)
self.avd.iomon.poll()
def get_disp_frame(self, ctx, sl):
avd_w32 = self.avd.avd_w32; avd_r32 = self.avd.avd_r32
avd_w32(0x1104014, 0x2b000100 | ctx.inst_fifo_idx * 0x10 | 7)
if (ctx.pos > 1):
for n in range(ctx.pos - 1):
avd_w32(0x1104014, 0x2b000000 | ctx.inst_fifo_idx * 0x10 | 7)
self.avd.poll(); self.avd.iomon.poll()
for n in range(MAX_TRIES):
status = avd_r32(0x1104060)
if (status & 0xc00000 == 0xc00000): # 0x2c4210c -> 0x2c4210c
break
self.log("[H265] status: 0x%x" % (status))
if (n >= MAX_TRIES - 1): raise RuntimeError("error")
avd_w32(0x1104060, 0x4)
for n in range(MAX_TRIES):
status = avd_r32(0x1104060)
if (status & 0x3000 == 0x2000): # 0x2c4210c -> 0x2c42108
break
self.log("[H265] status: 0x%x" % (status))
if (n >= MAX_TRIES - 1): raise RuntimeError("error")
avd_w32(0x1104060, 0x400000) # 0x2c42108 -> 0x2842108
self.avd.poll(); self.avd.iomon.poll()
def select_disp_frame(self, ctx, sl):
dpb_size = ctx.vps_list[0].vps_max_num_reorder_pics + 1
if (len(self.frames) >= dpb_size):
frames = [f for f in self.frames if f.sl.pic.poc == self.last_poc + 1]
if (len(frames) == 1):
return frames[0]
frames = sorted(self.frames, key=lambda f: (f.sl.pic.poc))
return frames[0]
return None
class AVDH264Dec(AVDDec):
def __init__(self, avd):
super().__init__(avd)
def set_insn(self, x):
self.avd.avd_w32(0x110400c, x)
def set_payload(self, ctx, sl):
self.avd.iowrite(ctx.slice_data_addr, sl.get_payload(), stream=0)
self.avd.iomon.poll()
def get_disp_frame(self, ctx, sl):
avd_w32 = self.avd.avd_w32; avd_r32 = self.avd.avd_r32
avd_w32(0x1104014, 0x2b000100 | (ctx.inst_fifo_idx * 0x10) | 7)
self.avd.poll(); self.avd.iomon.poll()
for n in range(MAX_TRIES):
status = avd_r32(0x1104060)
if (status & 0xc00000 == 0xc00000): # 0x2843108 -> 0x2c43108
break
self.log("[H264] status: 0x%x" % (status))
if (n >= MAX_TRIES - 1): raise RuntimeError("error")
avd_w32(0x1104060, 0x1000)
for n in range(MAX_TRIES):
status = avd_r32(0x1104060)
if (status & 0x3000 == 0x2000): # 0x2c43108 -> 0x2c42108
break
self.log("[H264] status: 0x%x" % (status))
if (n >= MAX_TRIES - 1): raise RuntimeError("error")
avd_w32(0x1104060, 0x400000) # 0x2c42108 -> 0x2842108
self.avd.poll(); self.avd.iomon.poll()
def select_disp_frame(self, ctx, sl):
dpb_size = ctx.num_reorder_frames
if (len(self.frames) >= dpb_size):
frames = [f for f in self.frames if f.sl.pic.poc == self.last_poc + 2]
if (len(frames) == 1):
return frames[0]
return
frames = sorted(self.frames, key=lambda f: (f.sl.pic.poc))
return frames[0]
return None
class AVDVP9Dec(AVDDec):
def __init__(self, avd):
super().__init__(avd)
def set_insn(self, x):
self.avd.avd_w32(0x1104010, x)
def set_payload(self, ctx, sl):
self.avd.iowrite(ctx.slice_data_addr, sl.get_payload(), stream=0)
self.avd.iowrite(ctx.probs_addr, sl.get_probs(), stream=0)
self.avd.iomon.poll()
def get_disp_frame(self, ctx, sl):
avd_w32 = self.avd.avd_w32; avd_r32 = self.avd.avd_r32
avd_w32(0x1104014, 0x2bfff100 | (ctx.inst_fifo_idx * 0x10) | 7)
if (len(sl.tiles) > 1):
for n in range(len(sl.tiles) - 1):
avd_w32(0x1104014, 0x2bfff000 | (ctx.inst_fifo_idx * 0x10) | 7)
self.avd.poll(); self.avd.iomon.poll()
for n in range(MAX_TRIES):
status = avd_r32(0x1104060)
if (status & 0xc00000 == 0xc00000): # 0x2862108 -> 0x2c62108
break
self.log("[VP9] status: 0x%x" % (status))
if (n >= MAX_TRIES - 1):
raise RuntimeError("error")
avd_w32(0x1104060, 0x20000)
for n in range(MAX_TRIES):
status = avd_r32(0x1104060)
if (status & 0x3000 == 0x2000): # 0x2c62108 -> 0x2c42108
break
self.log("[VP9] status: 0x%x" % (status))
if (n >= MAX_TRIES - 1):
raise RuntimeError("error")
avd_w32(0x1104060, 0x400000) # 0x2c42108 -> 0x2842108
self.avd.poll(); self.avd.iomon.poll()