moonlight-qt/app/streaming/video/ffmpeg-renderers/vaapi.cpp
Cameron Gutman e6c4332445 Revert "Use Rec 2020 colorspace for WCG support even if HDR is off on the host"
Rec 2020 conversion causes colors to be blown out in SDR

This reverts commit 472e8ee92e.
2022-10-13 01:19:49 -05:00

1123 lines
43 KiB
C++

#include <QString>
// HACK: Include before vaapi.h to prevent conflicts with Xlib.h
#include <streaming/session.h>
#include "vaapi.h"
#include "utils.h"
#include <streaming/streamutils.h>
#include <SDL_syswm.h>
#include <unistd.h>
#include <fcntl.h>
VAAPIRenderer::VAAPIRenderer()
: m_HwContext(nullptr),
m_BlacklistedForDirectRendering(false),
m_OverlayMutex(nullptr)
{
#ifdef HAVE_EGL
m_PrimeDescriptor.num_layers = 0;
m_PrimeDescriptor.num_objects = 0;
m_EGLExtDmaBuf = false;
m_eglCreateImage = nullptr;
m_eglCreateImageKHR = nullptr;
m_eglDestroyImage = nullptr;
m_eglDestroyImageKHR = nullptr;
#endif
SDL_zero(m_OverlayImage);
SDL_zero(m_OverlaySubpicture);
SDL_zero(m_OverlayFormat);
}
VAAPIRenderer::~VAAPIRenderer()
{
if (m_HwContext != nullptr) {
AVHWDeviceContext* deviceContext = (AVHWDeviceContext*)m_HwContext->data;
AVVAAPIDeviceContext* vaDeviceContext = (AVVAAPIDeviceContext*)deviceContext->hwctx;
// Hold onto this VADisplay since we'll need it to uninitialize VAAPI
VADisplay display = vaDeviceContext->display;
for (int i = 0; i < Overlay::OverlayMax; i++) {
if (m_OverlaySubpicture[i] != 0) {
vaDestroySubpicture(display, m_OverlaySubpicture[i]);
}
if (m_OverlayImage[i].image_id != 0) {
vaDestroyImage(display, m_OverlayImage[i].image_id);
}
}
av_buffer_unref(&m_HwContext);
if (display) {
vaTerminate(display);
}
}
if (m_OverlayMutex != nullptr) {
SDL_DestroyMutex(m_OverlayMutex);
}
}
VADisplay
VAAPIRenderer::openDisplay(SDL_Window* window)
{
SDL_SysWMinfo info;
VADisplay display;
SDL_VERSION(&info.version);
if (!SDL_GetWindowWMInfo(window, &info)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"SDL_GetWindowWMInfo() failed: %s",
SDL_GetError());
return nullptr;
}
m_WindowSystem = info.subsystem;
if (info.subsystem == SDL_SYSWM_X11) {
#ifdef HAVE_LIBVA_X11
m_XWindow = info.info.x11.window;
display = vaGetDisplay(info.info.x11.display);
if (display == nullptr) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"Unable to open X11 display for VAAPI");
return nullptr;
}
#else
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"Moonlight not compiled with VAAPI X11 support!");
return nullptr;
#endif
}
else if (info.subsystem == SDL_SYSWM_WAYLAND) {
#ifdef HAVE_LIBVA_WAYLAND
display = vaGetDisplayWl(info.info.wl.display);
if (display == nullptr) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"Unable to open Wayland display for VAAPI");
return nullptr;
}
#else
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"Moonlight not compiled with VAAPI Wayland support!");
return nullptr;
#endif
}
#if defined(SDL_VIDEO_DRIVER_KMSDRM) && defined(HAVE_LIBVA_DRM) && SDL_VERSION_ATLEAST(2, 0, 15)
else if (info.subsystem == SDL_SYSWM_KMSDRM) {
SDL_assert(info.info.kmsdrm.drm_fd >= 0);
display = vaGetDisplayDRM(info.info.kmsdrm.drm_fd);
if (display == nullptr) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"Unable to open DRM display for VAAPI");
return nullptr;
}
}
#endif
else {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"Unsupported VAAPI rendering subsystem: %d",
info.subsystem);
return nullptr;
}
return display;
}
bool
VAAPIRenderer::initialize(PDECODER_PARAMETERS params)
{
int err;
m_VideoFormat = params->videoFormat;
m_VideoWidth = params->width;
m_VideoHeight = params->height;
SDL_GetWindowSize(params->window, &m_DisplayWidth, &m_DisplayHeight);
m_HwContext = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_VAAPI);
if (!m_HwContext) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"Failed to allocate VAAPI context");
return false;
}
AVHWDeviceContext* deviceContext = (AVHWDeviceContext*)m_HwContext->data;
AVVAAPIDeviceContext* vaDeviceContext = (AVVAAPIDeviceContext*)deviceContext->hwctx;
vaDeviceContext->display = openDisplay(params->window);
if (vaDeviceContext->display == nullptr) {
// openDisplay() logs the error
return false;
}
int major, minor;
VAStatus status;
bool setPathVar = false;
for (;;) {
status = vaInitialize(vaDeviceContext->display, &major, &minor);
if (status != VA_STATUS_SUCCESS && qEnvironmentVariableIsEmpty("LIBVA_DRIVER_NAME")) {
SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION,
"Trying fallback VAAPI driver names");
// It would be nice to use vaSetDriverName() here, but there's no way to unset
// it and get back to the default driver selection logic once we've overridden
// the driver name using that API. As a result, we must use LIBVA_DRIVER_NAME.
if (status != VA_STATUS_SUCCESS) {
// The iHD driver supports newer hardware like Ice Lake and Comet Lake.
// It should be picked by default on those platforms, but that doesn't
// always seem to be the case for some reason.
qputenv("LIBVA_DRIVER_NAME", "iHD");
status = vaInitialize(vaDeviceContext->display, &major, &minor);
}
if (status != VA_STATUS_SUCCESS) {
// The Iris driver in Mesa 20.0 returns a bogus VA driver (iris_drv_video.so)
// even though the correct driver is still i965. If we hit this path, we'll
// explicitly try i965 to handle this case.
qputenv("LIBVA_DRIVER_NAME", "i965");
status = vaInitialize(vaDeviceContext->display, &major, &minor);
}
if (status != VA_STATUS_SUCCESS) {
// The RadeonSI driver is compatible with XWayland but can't be detected by libva
// so try it too if all else fails.
qputenv("LIBVA_DRIVER_NAME", "radeonsi");
status = vaInitialize(vaDeviceContext->display, &major, &minor);
}
if (status != VA_STATUS_SUCCESS
#if defined(HAVE_CUDA) || defined(HAVE_LIBVDPAU)
&& m_WindowSystem != SDL_SYSWM_X11
#endif
) {
// The unofficial nvidia VAAPI driver over NVDEC/CUDA works well on Wayland,
// but we'd rather use CUDA for XWayland and VDPAU for regular X11.
qputenv("LIBVA_DRIVER_NAME", "nvidia");
status = vaInitialize(vaDeviceContext->display, &major, &minor);
}
if (status != VA_STATUS_SUCCESS) {
// Unset LIBVA_DRIVER_NAME if none of the drivers we tried worked. This ensures
// we will get a fresh start using the default driver selection behavior after
// setting LIBVA_DRIVERS_PATH in the code below.
qunsetenv("LIBVA_DRIVER_NAME");
}
}
if (status == VA_STATUS_SUCCESS) {
// Success!
break;
}
if (qEnvironmentVariableIsEmpty("LIBVA_DRIVERS_PATH")) {
SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION,
"Trying fallback VAAPI driver paths");
qputenv("LIBVA_DRIVERS_PATH",
#if Q_PROCESSOR_WORDSIZE == 8
"/usr/lib64/dri:" // Fedora x86_64
"/usr/lib64/va/drivers:" // Gentoo x86_64
#endif
"/usr/lib/dri:" // Arch i386 & x86_64, Fedora i386
"/usr/lib/va/drivers:" // Gentoo i386
#if defined(Q_PROCESSOR_X86_64)
"/usr/lib/x86_64-linux-gnu/dri:" // Ubuntu/Debian x86_64
#elif defined(Q_PROCESSOR_X86_32)
"/usr/lib/i386-linux-gnu/dri:" // Ubuntu/Debian i386
#endif
);
setPathVar = true;
}
else {
if (setPathVar) {
// Unset LIBVA_DRIVERS_PATH if we set it ourselves
// and we didn't find any working VAAPI drivers.
qunsetenv("LIBVA_DRIVERS_PATH");
}
// Give up
break;
}
}
if (status != VA_STATUS_SUCCESS) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"Failed to initialize VAAPI: %d",
status);
return false;
}
SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION,
"Initialized VAAPI %d.%d",
major, minor);
const char* vendorString = vaQueryVendorString(vaDeviceContext->display);
QString vendorStr(vendorString);
SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION,
"Driver: %s",
vendorString ? vendorString : "<unknown>");
// Older versions of the Gallium VAAPI driver have a nasty memory leak that
// causes memory to be leaked for each submitted frame. I believe this is
// resolved in the libva2 drivers (VAAPI 1.x).
// If we're using Wayland, we have no choice but to use VAAPI because VDPAU
// is only supported under X11 or XWayland.
if (major == 0 && qgetenv("FORCE_VAAPI") != "1" && !WMUtils::isRunningWayland()) {
if (vendorStr.contains("AMD", Qt::CaseInsensitive) ||
vendorStr.contains("Radeon", Qt::CaseInsensitive)) {
// Fail and let VDPAU pick this up
SDL_LogWarn(SDL_LOG_CATEGORY_APPLICATION,
"Avoiding VAAPI on AMD driver. Set FORCE_VAAPI=1 to override.");
return false;
}
}
#if defined(HAVE_CUDA) || defined(HAVE_LIBVDPAU)
if (m_WindowSystem == SDL_SYSWM_X11 && qgetenv("FORCE_VAAPI") != "1" && vendorStr.contains("VA-API NVDEC", Qt::CaseInsensitive)) {
// Prefer CUDA for XWayland and VDPAU for regular X11.
SDL_LogWarn(SDL_LOG_CATEGORY_APPLICATION,
"Avoiding VAAPI for NVIDIA driver on X11/XWayland. Set FORCE_VAAPI=1 to override.");
return false;
}
#endif
if (WMUtils::isRunningWayland()) {
// The iHD VAAPI driver can initialize on XWayland but it crashes in
// vaPutSurface() so we must also not directly render on XWayland.
m_BlacklistedForDirectRendering = vendorStr.contains("iHD");
}
// This will populate the driver_quirks
err = av_hwdevice_ctx_init(m_HwContext);
if (err < 0) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"Failed to initialize VAAPI context: %d",
err);
return false;
}
// This quirk is set for the VDPAU wrapper which doesn't work with our VAAPI renderer
if (vaDeviceContext->driver_quirks & AV_VAAPI_DRIVER_QUIRK_SURFACE_ATTRIBUTES) {
// Fail and let our VDPAU renderer pick this up
SDL_LogWarn(SDL_LOG_CATEGORY_APPLICATION,
"Avoiding VDPAU wrapper for VAAPI decoding");
return false;
}
// Allocate mutex to synchronize overlay updates and rendering
m_OverlayMutex = SDL_CreateMutex();
if (m_OverlayMutex == nullptr) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"Failed to create overlay mutex");
return false;
}
unsigned int formatCount = vaMaxNumSubpictureFormats(vaDeviceContext->display);
if (formatCount != 0) {
auto formats = new VAImageFormat[formatCount];
auto flags = new unsigned int[formatCount];
status = vaQuerySubpictureFormats(vaDeviceContext->display, formats, flags, &formatCount);
if (status == VA_STATUS_SUCCESS) {
for (unsigned int i = 0; i < formatCount; i++) {
// Format must have 32-bit color depth
if (formats[i].depth != 32) {
continue;
}
// Select an RGB format with alpha
if (formats[i].byte_order == VA_MSB_FIRST) {
switch (formats[i].fourcc) {
case VA_FOURCC_RGBA:
m_OverlaySdlPixelFormat = SDL_PIXELFORMAT_RGBA8888;
break;
case VA_FOURCC_ARGB:
m_OverlaySdlPixelFormat = SDL_PIXELFORMAT_ARGB8888;
break;
case VA_FOURCC_BGRA:
m_OverlaySdlPixelFormat = SDL_PIXELFORMAT_BGRA8888;
break;
case VA_FOURCC_ABGR:
m_OverlaySdlPixelFormat = SDL_PIXELFORMAT_ABGR8888;
break;
default:
continue;
}
}
else {
SDL_assert(formats[i].byte_order == VA_LSB_FIRST);
switch (formats[i].fourcc) {
case VA_FOURCC_RGBA:
m_OverlaySdlPixelFormat = SDL_PIXELFORMAT_ABGR8888;
break;
case VA_FOURCC_ARGB:
m_OverlaySdlPixelFormat = SDL_PIXELFORMAT_BGRA8888;
break;
case VA_FOURCC_BGRA:
m_OverlaySdlPixelFormat = SDL_PIXELFORMAT_ARGB8888;
break;
case VA_FOURCC_ABGR:
m_OverlaySdlPixelFormat = SDL_PIXELFORMAT_RGBA8888;
break;
default:
continue;
}
}
// If we made it here, we found a format that works for us
m_OverlayFormat = formats[i];
SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION,
"Selected overlay subpicture format: %c%c%c%c8888",
(m_OverlayFormat.fourcc >> 0) & 0xff,
(m_OverlayFormat.fourcc >> 8) & 0xff,
(m_OverlayFormat.fourcc >> 16) & 0xff,
(m_OverlayFormat.fourcc >> 24) & 0xff);
break;
}
}
else {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"vaQuerySubpictureFormats() failed: %d",
status);
}
delete[] formats;
delete[] flags;
}
return true;
}
bool
VAAPIRenderer::prepareDecoderContext(AVCodecContext* context, AVDictionary**)
{
context->hw_device_ctx = av_buffer_ref(m_HwContext);
SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION,
"Using VAAPI accelerated renderer on %s",
SDL_GetCurrentVideoDriver());
return true;
}
bool
VAAPIRenderer::needsTestFrame()
{
// We need a test frame to see if this VAAPI driver
// supports the profile used for streaming
return true;
}
bool
VAAPIRenderer::isDirectRenderingSupported()
{
if (qgetenv("VAAPI_FORCE_DIRECT") == "1") {
SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION,
"Using direct rendering due to environment variable");
return true;
}
else if (qgetenv("VAAPI_FORCE_INDIRECT") == "1") {
SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION,
"Using indirect rendering due to environment variable");
return false;
}
// We only support direct rendering on X11 with VAEntrypointVideoProc support
if (m_WindowSystem != SDL_SYSWM_X11 || m_BlacklistedForDirectRendering) {
SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION,
"Using indirect rendering due to WM or blacklist");
return false;
}
else if (m_VideoFormat & VIDEO_FORMAT_MASK_10BIT) {
SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION,
"Using indirect rendering for 10-bit video");
return false;
}
AVHWDeviceContext* deviceContext = (AVHWDeviceContext*)m_HwContext->data;
AVVAAPIDeviceContext* vaDeviceContext = (AVVAAPIDeviceContext*)deviceContext->hwctx;
VAEntrypoint entrypoints[vaMaxNumEntrypoints(vaDeviceContext->display)];
int entrypointCount;
VAStatus status = vaQueryConfigEntrypoints(vaDeviceContext->display, VAProfileNone, entrypoints, &entrypointCount);
if (status == VA_STATUS_SUCCESS) {
for (int i = 0; i < entrypointCount; i++) {
// Without VAEntrypointVideoProc support, the driver will crash inside vaPutSurface()
if (entrypoints[i] == VAEntrypointVideoProc) {
SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION,
"Using direct rendering with VAEntrypointVideoProc");
if (m_OverlayFormat.fourcc == 0) {
SDL_LogWarn(SDL_LOG_CATEGORY_APPLICATION,
"Unable to find supported subpicture format. Overlays will be unavailable!");
}
return true;
}
}
}
SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION,
"Using indirect rendering due to lack of VAEntrypointVideoProc");
return false;
}
int VAAPIRenderer::getDecoderColorspace()
{
// Gallium drivers don't support Rec 709 yet - https://gitlab.freedesktop.org/mesa/mesa/issues/1915
// Intel-vaapi-driver defaults to Rec 601 - https://github.com/intel/intel-vaapi-driver/blob/021bcb79d1bd873bbd9fbca55f40320344bab866/src/i965_output_dri.c#L186
return COLORSPACE_REC_601;
}
int VAAPIRenderer::getDecoderCapabilities()
{
return CAPABILITY_REFERENCE_FRAME_INVALIDATION_HEVC;
}
void VAAPIRenderer::notifyOverlayUpdated(Overlay::OverlayType type)
{
AVHWDeviceContext* deviceContext = (AVHWDeviceContext*)m_HwContext->data;
AVVAAPIDeviceContext* vaDeviceContext = (AVVAAPIDeviceContext*)deviceContext->hwctx;
VAStatus status;
if (m_OverlayFormat.fourcc == 0) {
// We already logged for this in isDirectRenderingSupported()
return;
}
SDL_Surface* newSurface = Session::get()->getOverlayManager().getUpdatedOverlaySurface(type);
if (newSurface == nullptr && Session::get()->getOverlayManager().isOverlayEnabled(type)) {
// There's no updated surface and the overlay is enabled, so just leave the old surface alone.
return;
}
// Destroy the old image and subpicture
// NB: The mutex ensures the overlay is not currently being read for rendering.
// NB 2: It is safe to unlock here because this thread is the only surface producer.
SDL_LockMutex(m_OverlayMutex);
VAImageID oldImageId = m_OverlayImage[type].image_id;
SDL_zero(m_OverlayImage[type]);
VASubpictureID oldSubpictureId = m_OverlaySubpicture[type];
m_OverlaySubpicture[type] = 0;
SDL_UnlockMutex(m_OverlayMutex);
if (oldSubpictureId != 0) {
status = vaDestroySubpicture(vaDeviceContext->display, oldSubpictureId);
if (status != VA_STATUS_SUCCESS) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"vaDestroySubpicture() failed: %d",
status);
}
}
if (oldImageId != 0) {
status = vaDestroyImage(vaDeviceContext->display, oldImageId);
if (status != VA_STATUS_SUCCESS) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"vaDestroyImage() failed: %d",
status);
}
}
if (!Session::get()->getOverlayManager().isOverlayEnabled(type)) {
SDL_FreeSurface(newSurface);
return;
}
if (newSurface != nullptr) {
VAImage newImage;
SDL_assert(!SDL_MUSTLOCK(newSurface));
status = vaCreateImage(vaDeviceContext->display, &m_OverlayFormat, newSurface->w, newSurface->h, &newImage);
if (status != VA_STATUS_SUCCESS) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"vaCreateImage() failed: %d",
status);
SDL_FreeSurface(newSurface);
return;
}
void* imagePixels;
status = vaMapBuffer(vaDeviceContext->display, newImage.buf, &imagePixels);
if (status != VA_STATUS_SUCCESS) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"vaMapBuffer() failed: %d",
status);
SDL_FreeSurface(newSurface);
vaDestroyImage(vaDeviceContext->display, newImage.image_id);
return;
}
// Convert the surface to the proper format for the VAImage
SDL_ConvertPixels(newSurface->w, newSurface->h, newSurface->format->format,
newSurface->pixels, newSurface->pitch, m_OverlaySdlPixelFormat,
imagePixels, (int)newImage.pitches[0]);
status = vaUnmapBuffer(vaDeviceContext->display, newImage.buf);
if (status != VA_STATUS_SUCCESS) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"vaUnmapBuffer() failed: %d",
status);
SDL_FreeSurface(newSurface);
vaDestroyImage(vaDeviceContext->display, newImage.image_id);
return;
}
SDL_Rect overlayRect;
if (type == Overlay::OverlayStatusUpdate) {
// Bottom Left
overlayRect.x = 0;
overlayRect.y = m_DisplayHeight - newSurface->h;
}
else if (type == Overlay::OverlayDebug) {
// Top left
overlayRect.x = 0;
overlayRect.y = 0;
}
overlayRect.w = newSurface->w;
overlayRect.h = newSurface->h;
// Surface data is no longer needed
SDL_FreeSurface(newSurface);
VASubpictureID newSubpicture;
status = vaCreateSubpicture(vaDeviceContext->display, newImage.image_id, &newSubpicture);
if (status != VA_STATUS_SUCCESS) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"vaCreateSubpicture() failed: %d",
status);
vaDestroyImage(vaDeviceContext->display, newImage.image_id);
return;
}
SDL_LockMutex(m_OverlayMutex);
m_OverlayImage[type] = newImage;
m_OverlaySubpicture[type] = newSubpicture;
m_OverlayRect[type] = overlayRect;
SDL_UnlockMutex(m_OverlayMutex);
}
}
void
VAAPIRenderer::renderFrame(AVFrame* frame)
{
VASurfaceID surface = (VASurfaceID)(uintptr_t)frame->data[3];
AVHWDeviceContext* deviceContext = (AVHWDeviceContext*)m_HwContext->data;
AVVAAPIDeviceContext* vaDeviceContext = (AVVAAPIDeviceContext*)deviceContext->hwctx;
SDL_Rect src, dst;
src.x = src.y = 0;
src.w = m_VideoWidth;
src.h = m_VideoHeight;
dst.x = dst.y = 0;
dst.w = m_DisplayWidth;
dst.h = m_DisplayHeight;
StreamUtils::scaleSourceToDestinationSurface(&src, &dst);
if (m_WindowSystem == SDL_SYSWM_X11) {
#ifdef HAVE_LIBVA_X11
unsigned int flags = 0;
// NB: Not all VAAPI drivers respect these flags. Many drivers
// just ignore them and do the color conversion as Rec 601.
switch (frame->colorspace) {
case AVCOL_SPC_BT709:
flags |= VA_SRC_BT709;
break;
case AVCOL_SPC_BT470BG:
case AVCOL_SPC_SMPTE170M:
flags |= VA_SRC_BT601;
break;
case AVCOL_SPC_SMPTE240M:
flags |= VA_SRC_SMPTE_240;
break;
default:
// Unknown colorspace
SDL_assert(false);
break;
}
SDL_LockMutex(m_OverlayMutex);
VAImage associatedOverlayImages[Overlay::OverlayMax] = {};
VASubpictureID associatedOverlaySubpictures[Overlay::OverlayMax] = {};
// Associate our overlay subpictures to the current surface
for (int type = 0; type < Overlay::OverlayMax; type++) {
VAStatus status;
if (m_OverlaySubpicture[type] == 0) {
continue;
}
status = vaAssociateSubpicture(vaDeviceContext->display,
m_OverlaySubpicture[type],
&surface,
1,
0,
0,
m_OverlayImage[type].width,
m_OverlayImage[type].height,
m_OverlayRect[type].x,
m_OverlayRect[type].y,
m_OverlayRect[type].w,
m_OverlayRect[type].h,
0);
if (status == VA_STATUS_SUCCESS) {
// Take temporary ownership of the overlay to prevent notifyOverlayUpdated()
// from freeing them frum underneath us. We need to release the lock while
// we render for performance reasons.
associatedOverlayImages[type] = m_OverlayImage[type];
associatedOverlaySubpictures[type] = m_OverlaySubpicture[type];
SDL_zero(m_OverlayImage[type]);
m_OverlaySubpicture[type] = 0;
}
else {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"vaAssociateSubpicture() failed: %d",
status);
}
}
SDL_UnlockMutex(m_OverlayMutex);
// This will draw the surface and any associated subpictures
// NB: This can take a full VBlank period to complete!
vaPutSurface(vaDeviceContext->display,
surface,
m_XWindow,
0, 0,
m_VideoWidth, m_VideoHeight,
dst.x, dst.y,
dst.w, dst.h,
NULL, 0, flags);
SDL_LockMutex(m_OverlayMutex);
// Now that we've reacquired the lock, we will need to reconcile the current
// state of the overlay with our saved state from before we unlocked.
for (int type = 0; type < Overlay::OverlayMax; type++) {
VAStatus status;
if (associatedOverlaySubpictures[type] == 0) {
continue;
}
// Deassociate the subpicture so it can be safely destroyed/replaced
status = vaDeassociateSubpicture(vaDeviceContext->display, associatedOverlaySubpictures[type], &surface, 1);
if (status != VA_STATUS_SUCCESS) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"vaDeassociateSubpicture() failed: %d",
status);
}
// If a new subpicture was populated while we were unlocked, free the old one we took ownership of
if (m_OverlaySubpicture[type] != 0) {
status = vaDestroySubpicture(vaDeviceContext->display, associatedOverlaySubpictures[type]);
if (status != VA_STATUS_SUCCESS) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"vaDestroySubpicture() failed: %d",
status);
}
}
else {
// If no new subpicture was populated, return ownership of this one
m_OverlaySubpicture[type] = associatedOverlaySubpictures[type];
}
// If a new image was populated while we were unlocked, free the one old we took ownership of
if (m_OverlayImage[type].image_id != 0) {
status = vaDestroyImage(vaDeviceContext->display, associatedOverlayImages[type].image_id);
if (status != VA_STATUS_SUCCESS) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"vaDestroyImage() failed: %d",
status);
}
}
else {
// If no new image was populated, return ownership of this one
m_OverlayImage[type] = associatedOverlayImages[type];
}
}
SDL_UnlockMutex(m_OverlayMutex);
#endif
}
else if (m_WindowSystem == SDL_SYSWM_WAYLAND) {
// We don't support direct rendering on Wayland, so we should
// never get called there. Many common Wayland compositors don't
// support YUV surfaces, so direct rendering would fail.
SDL_assert(false);
}
else {
// We don't accept anything else in initialize().
SDL_assert(false);
}
}
#if defined(HAVE_EGL) || defined(HAVE_DRM)
// Ensure that vaExportSurfaceHandle() is supported by the VA-API driver
bool
VAAPIRenderer::canExportSurfaceHandle(int layerTypeFlag) {
AVHWDeviceContext* deviceContext = (AVHWDeviceContext*)m_HwContext->data;
AVVAAPIDeviceContext* vaDeviceContext = (AVVAAPIDeviceContext*)deviceContext->hwctx;
VASurfaceID surfaceId;
VAStatus st;
VADRMPRIMESurfaceDescriptor descriptor;
VASurfaceAttrib attrs[2];
int attributeCount = 0;
if (qgetenv("VAAPI_FORCE_DIRECT") == "1") {
SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION,
"Using direct rendering due to environment variable");
return false;
}
// FFmpeg handles setting these quirk flags for us
if (!(vaDeviceContext->driver_quirks & AV_VAAPI_DRIVER_QUIRK_ATTRIB_MEMTYPE)) {
attrs[attributeCount].type = VASurfaceAttribMemoryType;
attrs[attributeCount].flags = VA_SURFACE_ATTRIB_SETTABLE;
attrs[attributeCount].value.type = VAGenericValueTypeInteger;
attrs[attributeCount].value.value.i = VA_SURFACE_ATTRIB_MEM_TYPE_VA;
attributeCount++;
}
// These attributes are required for i965 to create a surface that can
// be successfully exported via vaExportSurfaceHandle(). iHD doesn't
// need these, but it doesn't seem to hurt either.
attrs[attributeCount].type = VASurfaceAttribPixelFormat;
attrs[attributeCount].flags = VA_SURFACE_ATTRIB_SETTABLE;
attrs[attributeCount].value.type = VAGenericValueTypeInteger;
attrs[attributeCount].value.value.i = (m_VideoFormat & VIDEO_FORMAT_MASK_10BIT) ?
VA_FOURCC_P010 : VA_FOURCC_NV12;
attributeCount++;
st = vaCreateSurfaces(vaDeviceContext->display,
(m_VideoFormat & VIDEO_FORMAT_MASK_10BIT) ?
VA_RT_FORMAT_YUV420_10 : VA_RT_FORMAT_YUV420,
1280,
720,
&surfaceId,
1,
attrs,
attributeCount);
if (st != VA_STATUS_SUCCESS) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"vaCreateSurfaces() failed: %d", st);
return false;
}
st = vaExportSurfaceHandle(vaDeviceContext->display,
surfaceId,
VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2,
VA_EXPORT_SURFACE_READ_ONLY | layerTypeFlag,
&descriptor);
vaDestroySurfaces(vaDeviceContext->display, &surfaceId, 1);
if (st != VA_STATUS_SUCCESS) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"vaExportSurfaceHandle() failed: %d", st);
return false;
}
for (size_t i = 0; i < descriptor.num_objects; ++i) {
close(descriptor.objects[i].fd);
}
SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION,
"VAAPI driver supports exporting DRM PRIME surface handles with %s layers",
layerTypeFlag == VA_EXPORT_SURFACE_COMPOSED_LAYERS ? "composed" : "separate");
return true;
}
#endif
#ifdef HAVE_EGL
bool
VAAPIRenderer::canExportEGL() {
// Our EGL export logic requires exporting separate layers
return canExportSurfaceHandle(VA_EXPORT_SURFACE_SEPARATE_LAYERS);
}
AVPixelFormat VAAPIRenderer::getEGLImagePixelFormat() {
return (m_VideoFormat & VIDEO_FORMAT_MASK_10BIT) ?
AV_PIX_FMT_P010 : AV_PIX_FMT_NV12;
}
bool
VAAPIRenderer::initializeEGL(EGLDisplay,
const EGLExtensions &ext) {
if (!ext.isSupported("EGL_EXT_image_dma_buf_import")) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"VAAPI-EGL: DMABUF unsupported");
return false;
}
m_EGLExtDmaBuf = ext.isSupported("EGL_EXT_image_dma_buf_import_modifiers");
// NB: eglCreateImage() and eglCreateImageKHR() have slightly different definitions
m_eglCreateImage = (typeof(m_eglCreateImage))eglGetProcAddress("eglCreateImage");
m_eglCreateImageKHR = (typeof(m_eglCreateImageKHR))eglGetProcAddress("eglCreateImageKHR");
m_eglDestroyImage = (typeof(m_eglDestroyImage))eglGetProcAddress("eglDestroyImage");
m_eglDestroyImageKHR = (typeof(m_eglDestroyImageKHR))eglGetProcAddress("eglDestroyImageKHR");
if (!(m_eglCreateImage && m_eglDestroyImage) &&
!(m_eglCreateImageKHR && m_eglDestroyImageKHR)) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"Missing eglCreateImage()/eglDestroyImage() in EGL driver");
return false;
}
return true;
}
ssize_t
VAAPIRenderer::exportEGLImages(AVFrame *frame, EGLDisplay dpy,
EGLImage images[EGL_MAX_PLANES]) {
ssize_t count = 0;
auto hwFrameCtx = (AVHWFramesContext*)frame->hw_frames_ctx->data;
AVVAAPIDeviceContext* vaDeviceContext = (AVVAAPIDeviceContext*)hwFrameCtx->device_ctx->hwctx;
VASurfaceID surface_id = (VASurfaceID)(uintptr_t)frame->data[3];
VAStatus st = vaExportSurfaceHandle(vaDeviceContext->display,
surface_id,
VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2,
VA_EXPORT_SURFACE_READ_ONLY | VA_EXPORT_SURFACE_SEPARATE_LAYERS,
&m_PrimeDescriptor);
if (st != VA_STATUS_SUCCESS) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"vaExportSurfaceHandle failed: %d", st);
return -1;
}
SDL_assert(m_PrimeDescriptor.num_layers <= EGL_MAX_PLANES);
st = vaSyncSurface(vaDeviceContext->display, surface_id);
if (st != VA_STATUS_SUCCESS) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"vaSyncSurface failed: %d", st);
goto sync_fail;
}
for (size_t i = 0; i < m_PrimeDescriptor.num_layers; ++i) {
const auto &layer = m_PrimeDescriptor.layers[i];
// Max 30 attributes (1 key + 1 value for each)
const int EGL_ATTRIB_COUNT = 30 * 2;
EGLAttrib attribs[EGL_ATTRIB_COUNT] = {
EGL_LINUX_DRM_FOURCC_EXT, layer.drm_format,
EGL_WIDTH, i == 0 ? frame->width : frame->width / 2,
EGL_HEIGHT, i == 0 ? frame->height : frame->height / 2,
};
int attribIndex = 6;
for (size_t j = 0; j < layer.num_planes; j++) {
const auto &object = m_PrimeDescriptor.objects[layer.object_index[j]];
switch (j) {
case 0:
attribs[attribIndex++] = EGL_DMA_BUF_PLANE0_FD_EXT;
attribs[attribIndex++] = object.fd;
attribs[attribIndex++] = EGL_DMA_BUF_PLANE0_OFFSET_EXT;
attribs[attribIndex++] = layer.offset[0];
attribs[attribIndex++] = EGL_DMA_BUF_PLANE0_PITCH_EXT;
attribs[attribIndex++] = layer.pitch[0];
if (m_EGLExtDmaBuf) {
attribs[attribIndex++] = EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT;
attribs[attribIndex++] = (EGLint)(object.drm_format_modifier & 0xFFFFFFFF);
attribs[attribIndex++] = EGL_DMA_BUF_PLANE0_MODIFIER_HI_EXT;
attribs[attribIndex++] = (EGLint)(object.drm_format_modifier >> 32);
}
break;
case 1:
attribs[attribIndex++] = EGL_DMA_BUF_PLANE1_FD_EXT;
attribs[attribIndex++] = object.fd;
attribs[attribIndex++] = EGL_DMA_BUF_PLANE1_OFFSET_EXT;
attribs[attribIndex++] = layer.offset[1];
attribs[attribIndex++] = EGL_DMA_BUF_PLANE1_PITCH_EXT;
attribs[attribIndex++] = layer.pitch[1];
if (m_EGLExtDmaBuf) {
attribs[attribIndex++] = EGL_DMA_BUF_PLANE1_MODIFIER_LO_EXT;
attribs[attribIndex++] = (EGLint)(object.drm_format_modifier & 0xFFFFFFFF);
attribs[attribIndex++] = EGL_DMA_BUF_PLANE1_MODIFIER_HI_EXT;
attribs[attribIndex++] = (EGLint)(object.drm_format_modifier >> 32);
}
break;
case 2:
attribs[attribIndex++] = EGL_DMA_BUF_PLANE2_FD_EXT;
attribs[attribIndex++] = object.fd;
attribs[attribIndex++] = EGL_DMA_BUF_PLANE2_OFFSET_EXT;
attribs[attribIndex++] = layer.offset[2];
attribs[attribIndex++] = EGL_DMA_BUF_PLANE2_PITCH_EXT;
attribs[attribIndex++] = layer.pitch[2];
if (m_EGLExtDmaBuf) {
attribs[attribIndex++] = EGL_DMA_BUF_PLANE2_MODIFIER_LO_EXT;
attribs[attribIndex++] = (EGLint)(object.drm_format_modifier & 0xFFFFFFFF);
attribs[attribIndex++] = EGL_DMA_BUF_PLANE2_MODIFIER_HI_EXT;
attribs[attribIndex++] = (EGLint)(object.drm_format_modifier >> 32);
}
break;
case 3:
attribs[attribIndex++] = EGL_DMA_BUF_PLANE3_FD_EXT;
attribs[attribIndex++] = object.fd;
attribs[attribIndex++] = EGL_DMA_BUF_PLANE3_OFFSET_EXT;
attribs[attribIndex++] = layer.offset[3];
attribs[attribIndex++] = EGL_DMA_BUF_PLANE3_PITCH_EXT;
attribs[attribIndex++] = layer.pitch[3];
if (m_EGLExtDmaBuf) {
attribs[attribIndex++] = EGL_DMA_BUF_PLANE3_MODIFIER_LO_EXT;
attribs[attribIndex++] = (EGLint)(object.drm_format_modifier & 0xFFFFFFFF);
attribs[attribIndex++] = EGL_DMA_BUF_PLANE3_MODIFIER_HI_EXT;
attribs[attribIndex++] = (EGLint)(object.drm_format_modifier >> 32);
}
break;
default:
Q_UNREACHABLE();
}
}
// Terminate the attribute list
attribs[attribIndex++] = EGL_NONE;
SDL_assert(attribIndex <= EGL_ATTRIB_COUNT);
if (m_eglCreateImage) {
images[i] = m_eglCreateImage(dpy, EGL_NO_CONTEXT,
EGL_LINUX_DMA_BUF_EXT,
nullptr, attribs);
if (!images[i]) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"eglCreateImage() Failed: %d", eglGetError());
goto create_image_fail;
}
}
else {
// Cast the EGLAttrib array elements to EGLint for the KHR extension
EGLint intAttribs[EGL_ATTRIB_COUNT];
for (int i = 0; i < attribIndex; i++) {
intAttribs[i] = (EGLint)attribs[i];
}
images[i] = m_eglCreateImageKHR(dpy, EGL_NO_CONTEXT,
EGL_LINUX_DMA_BUF_EXT,
nullptr, intAttribs);
if (!images[i]) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"eglCreateImageKHR() Failed: %d", eglGetError());
goto create_image_fail;
}
}
++count;
}
return count;
create_image_fail:
m_PrimeDescriptor.num_layers = count;
sync_fail:
freeEGLImages(dpy, images);
return -1;
}
void
VAAPIRenderer::freeEGLImages(EGLDisplay dpy, EGLImage images[EGL_MAX_PLANES]) {
for (size_t i = 0; i < m_PrimeDescriptor.num_layers; ++i) {
if (m_eglDestroyImage) {
m_eglDestroyImage(dpy, images[i]);
}
else {
m_eglDestroyImageKHR(dpy, images[i]);
}
}
for (size_t i = 0; i < m_PrimeDescriptor.num_objects; ++i) {
close(m_PrimeDescriptor.objects[i].fd);
}
m_PrimeDescriptor.num_layers = 0;
m_PrimeDescriptor.num_objects = 0;
}
#endif
#ifdef HAVE_DRM
bool VAAPIRenderer::canExportDrmPrime()
{
// Our DRM renderer requires composed layers
return canExportSurfaceHandle(VA_EXPORT_SURFACE_COMPOSED_LAYERS);
}
bool VAAPIRenderer::mapDrmPrimeFrame(AVFrame* frame, AVDRMFrameDescriptor* drmDescriptor)
{
auto hwFrameCtx = (AVHWFramesContext*)frame->hw_frames_ctx->data;
AVVAAPIDeviceContext* vaDeviceContext = (AVVAAPIDeviceContext*)hwFrameCtx->device_ctx->hwctx;
VASurfaceID vaSurfaceId = (VASurfaceID)(uintptr_t)frame->data[3];
VADRMPRIMESurfaceDescriptor vaDrmPrimeDescriptor;
VAStatus st = vaExportSurfaceHandle(vaDeviceContext->display,
vaSurfaceId,
VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2,
VA_EXPORT_SURFACE_READ_ONLY | VA_EXPORT_SURFACE_COMPOSED_LAYERS,
&vaDrmPrimeDescriptor);
if (st != VA_STATUS_SUCCESS) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"vaExportSurfaceHandle() failed: %d", st);
return false;
}
st = vaSyncSurface(vaDeviceContext->display, vaSurfaceId);
if (st != VA_STATUS_SUCCESS) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"vaSyncSurface() failed: %d", st);
for (uint32_t i = 0; i < vaDrmPrimeDescriptor.num_objects; i++) {
close(vaDrmPrimeDescriptor.objects[i].fd);
}
return false;
}
// Map our VADRMPRIMESurfaceDescriptor to the AVDRMFrameDescriptor our caller wants
drmDescriptor->nb_objects = vaDrmPrimeDescriptor.num_objects;
for (uint32_t i = 0; i < vaDrmPrimeDescriptor.num_objects; i++) {
drmDescriptor->objects[i].fd = vaDrmPrimeDescriptor.objects[i].fd;
drmDescriptor->objects[i].size = vaDrmPrimeDescriptor.objects[i].size;
drmDescriptor->objects[i].format_modifier = vaDrmPrimeDescriptor.objects[i].drm_format_modifier;
}
drmDescriptor->nb_layers = vaDrmPrimeDescriptor.num_layers;
for (uint32_t i = 0; i < vaDrmPrimeDescriptor.num_layers; i++) {
drmDescriptor->layers[i].format = vaDrmPrimeDescriptor.layers[i].drm_format;
drmDescriptor->layers[i].nb_planes = vaDrmPrimeDescriptor.layers[i].num_planes;
for (uint32_t j = 0; j < vaDrmPrimeDescriptor.layers[i].num_planes; j++) {
drmDescriptor->layers[i].planes[j].object_index = vaDrmPrimeDescriptor.layers[i].object_index[j];
drmDescriptor->layers[i].planes[j].offset = vaDrmPrimeDescriptor.layers[i].offset[j];
drmDescriptor->layers[i].planes[j].pitch = vaDrmPrimeDescriptor.layers[i].pitch[j];
}
}
return true;
}
void VAAPIRenderer::unmapDrmPrimeFrame(AVDRMFrameDescriptor* drmDescriptor)
{
for (int i = 0; i < drmDescriptor->nb_objects; i++) {
close(drmDescriptor->objects[i].fd);
}
}
#endif