Prefer D3D11VA in non-FSE modes for improved performance

This commit is contained in:
Cameron Gutman 2022-08-21 17:50:56 -05:00
parent feaae7b684
commit bf4332b9e7
5 changed files with 55 additions and 11 deletions

View file

@ -73,8 +73,9 @@ typedef struct _CSC_CONST_BUF
} CSC_CONST_BUF, *PCSC_CONST_BUF;
static_assert(sizeof(CSC_CONST_BUF) % 16 == 0, "Constant buffer sizes must be a multiple of 16");
D3D11VARenderer::D3D11VARenderer()
: m_Factory(nullptr),
D3D11VARenderer::D3D11VARenderer(int decoderSelectionPass)
: m_DecoderSelectionPass(decoderSelectionPass),
m_Factory(nullptr),
m_Device(nullptr),
m_SwapChain(nullptr),
m_DeviceContext(nullptr),

View file

@ -13,7 +13,7 @@ extern "C" {
class D3D11VARenderer : public IFFmpegRenderer
{
public:
D3D11VARenderer();
D3D11VARenderer(int decoderSelectionPass);
virtual ~D3D11VARenderer() override;
virtual bool initialize(PDECODER_PARAMETERS params) override;
virtual bool prepareDecoderContext(AVCodecContext* context, AVDictionary**) override;
@ -36,6 +36,8 @@ private:
bool checkDecoderSupport(IDXGIAdapter* adapter);
bool createDeviceByAdapterIndex(int adapterIndex, bool* adapterNotFound = nullptr);
int m_DecoderSelectionPass;
IDXGIFactory5* m_Factory;
ID3D11Device* m_Device;
IDXGISwapChain4* m_SwapChain;

View file

@ -28,7 +28,8 @@ typedef struct _VERTEX
float tu, tv;
} VERTEX, *PVERTEX;
DXVA2Renderer::DXVA2Renderer() :
DXVA2Renderer::DXVA2Renderer(int decoderSelectionPass) :
m_DecoderSelectionPass(decoderSelectionPass),
m_DecService(nullptr),
m_Decoder(nullptr),
m_SurfacesUsed(0),
@ -434,6 +435,13 @@ bool DXVA2Renderer::initializeDeviceQuirks()
// For other GPUs, we'll avoid populating it as was our previous behavior.
m_DeviceQuirks |= DXVA2_QUIRK_SET_DEST_FORMAT;
}
// Tag this display device if it has a WDDM 2.0+ driver for the decoder selection logic
if (HIWORD(id.DriverVersion.HighPart) >= 20) {
SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION,
"Detected WDDM 2.0 or later display driver");
m_DeviceQuirks |= DXVA2_QUIRK_WDDM_20_PLUS;
}
}
return true;
@ -707,7 +715,7 @@ bool DXVA2Renderer::initialize(PDECODER_PARAMETERS params)
return false;
}
#else
else if (qgetenv("DXVA2_ENABLED") != "1") {
else if (qgetenv("DXVA2_ENABLED") != "1" && m_DecoderSelectionPass == 0) {
SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION,
"DXVA2 is disabled by default on ARM64. Set DXVA2_ENABLED=1 to override.");
return false;
@ -759,6 +767,26 @@ bool DXVA2Renderer::initialize(PDECODER_PARAMETERS params)
return false;
}
// If we have a WDDM 2.0 or later display driver and we're not running in
// full-screen exclusive mode, prefer the D3D11VA renderer.
//
// D3D11VA is better in this case because it can enable tearing in non-FSE
// modes when the user has V-Sync disabled. In non-FSE V-Sync cases, D3D11VA
// provides lower display latency on systems that support Independent Flip
// in windowed mode. When using D3D9, DWM will not promote us to IFlip unless
// we're full-screen (exclusive or not).
//
// NB: The reason we only do this for WDDM 2.0 and later is because older
// AMD drivers (such as those for the HD 5570) render garbage when using
// the D3D11VA renderer.
if (m_DecoderSelectionPass == 0 &&
(m_DeviceQuirks & DXVA2_QUIRK_WDDM_20_PLUS) &&
!((SDL_GetWindowFlags(params->window) & SDL_WINDOW_FULLSCREEN_DESKTOP) == SDL_WINDOW_FULLSCREEN)) {
SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION,
"Defaulting to D3D11VA for non-FSE mode");
return false;
}
if (!initializeDecoder()) {
return false;
}

View file

@ -13,7 +13,7 @@ extern "C" {
class DXVA2Renderer : public IFFmpegRenderer
{
public:
DXVA2Renderer();
DXVA2Renderer(int decoderSelectionPass);
virtual ~DXVA2Renderer() override;
virtual bool initialize(PDECODER_PARAMETERS params) override;
virtual bool prepareDecoderContext(AVCodecContext* context, AVDictionary** options) override;
@ -44,6 +44,8 @@ private:
static
int ffGetBuffer2(AVCodecContext* context, AVFrame* frame, int flags);
int m_DecoderSelectionPass;
int m_VideoFormat;
int m_VideoWidth;
int m_VideoHeight;
@ -77,5 +79,6 @@ private:
#define DXVA2_QUIRK_NO_VP 0x01
#define DXVA2_QUIRK_SET_DEST_FORMAT 0x02
#define DXVA2_QUIRK_WDDM_20_PLUS 0x04
int m_DeviceQuirks;
};

View file

@ -601,13 +601,14 @@ IFFmpegRenderer* FFmpegVideoDecoder::createHwAccelRenderer(const AVCodecHWConfig
if (pass == 0) {
switch (hwDecodeCfg->device_type) {
#ifdef Q_OS_WIN32
// DXVA2 appears in the hwaccel list before D3D11VA, so we will implicitly
// prefer it. When we want to switch to D3D11VA by default, we'll need to
// move it into the second pass set below.
// DXVA2 appears in the hwaccel list before D3D11VA, so we will prefer it.
//
// There is logic in DXVA2 that may elect to fail on the first selection pass
// to allow D3D11VA to be used in cases where it is known to be better.
case AV_HWDEVICE_TYPE_DXVA2:
return new DXVA2Renderer();
return new DXVA2Renderer(pass);
case AV_HWDEVICE_TYPE_D3D11VA:
return new D3D11VARenderer();
return new D3D11VARenderer(pass);
#endif
#ifdef Q_OS_DARWIN
case AV_HWDEVICE_TYPE_VIDEOTOOLBOX:
@ -636,6 +637,15 @@ IFFmpegRenderer* FFmpegVideoDecoder::createHwAccelRenderer(const AVCodecHWConfig
case AV_HWDEVICE_TYPE_CUDA:
// CUDA should only be used to cover the NVIDIA+Wayland case
return new CUDARenderer();
#endif
#ifdef Q_OS_WIN32
// This gives DXVA2 and D3D11VA another shot at handling cases where they
// chose to purposefully fail in the first selection pass to allow a more
// optimal decoder to be tried.
case AV_HWDEVICE_TYPE_DXVA2:
return new DXVA2Renderer(pass);
case AV_HWDEVICE_TYPE_D3D11VA:
return new D3D11VARenderer(pass);
#endif
default:
return nullptr;