mirror of
https://github.com/moonlight-stream/moonlight-qt
synced 2024-12-15 13:52:28 +00:00
Cleanup D3D11VA renderer by removing unnecessary codepaths
Waitable swapchains seem to be worse than our existing DXVsyncSource for synchronizing with VBlank. Full-screen exclusive mode isn't beneficial vs Independent Flip on modern Win10/Win11 systems.
This commit is contained in:
parent
fd563726f0
commit
747d3eedf1
2 changed files with 30 additions and 148 deletions
|
@ -80,7 +80,6 @@ D3D11VARenderer::D3D11VARenderer()
|
|||
m_LastColorSpace(AVCOL_SPC_UNSPECIFIED),
|
||||
m_LastColorRange(AVCOL_RANGE_UNSPECIFIED),
|
||||
m_AllowTearing(false),
|
||||
m_FrameWaitableObject(nullptr),
|
||||
m_VideoGenericPixelShader(nullptr),
|
||||
m_VideoBt601LimPixelShader(nullptr),
|
||||
m_VideoBt2020LimPixelShader(nullptr),
|
||||
|
@ -127,15 +126,6 @@ D3D11VARenderer::~D3D11VARenderer()
|
|||
SAFE_COM_RELEASE(m_OverlayPixelShader);
|
||||
|
||||
SAFE_COM_RELEASE(m_RenderTargetView);
|
||||
|
||||
if (m_FrameWaitableObject != nullptr) {
|
||||
CloseHandle(m_FrameWaitableObject);
|
||||
}
|
||||
|
||||
if (m_SwapChain != nullptr && !m_Windowed) {
|
||||
// It's illegal to destroy a full-screen swapchain. Make sure we're in windowed mode.
|
||||
m_SwapChain->SetFullscreenState(FALSE, nullptr);
|
||||
}
|
||||
SAFE_COM_RELEASE(m_SwapChain);
|
||||
|
||||
if (m_HwFramesContext != nullptr) {
|
||||
|
@ -224,20 +214,13 @@ bool D3D11VARenderer::initialize(PDECODER_PARAMETERS params)
|
|||
adapter->Release();
|
||||
adapter = nullptr;
|
||||
|
||||
#if 0
|
||||
m_Windowed = (SDL_GetWindowFlags(params->window) & SDL_WINDOW_FULLSCREEN_DESKTOP) != SDL_WINDOW_FULLSCREEN;
|
||||
#else
|
||||
// Always use windowed or borderless windowed mode for now. SDL does mode-setting for us
|
||||
// in full-screen exclusive mode, so this actually works out okay.
|
||||
m_Windowed = true;
|
||||
#endif
|
||||
|
||||
DXGI_SWAP_CHAIN_DESC1 swapChainDesc = {};
|
||||
swapChainDesc.Stereo = FALSE;
|
||||
swapChainDesc.SampleDesc.Count = 1;
|
||||
swapChainDesc.SampleDesc.Quality = 0;
|
||||
swapChainDesc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
|
||||
swapChainDesc.Scaling = DXGI_SCALING_STRETCH;
|
||||
swapChainDesc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD;
|
||||
swapChainDesc.AlphaMode = DXGI_ALPHA_MODE_UNSPECIFIED;
|
||||
swapChainDesc.Flags = 0;
|
||||
|
||||
|
@ -247,42 +230,20 @@ bool D3D11VARenderer::initialize(PDECODER_PARAMETERS params)
|
|||
//
|
||||
// Even though we allocate 3 front buffers for pre-rendered frames,
|
||||
// they won't actually increase presentation latency because we
|
||||
// always use SyncInterval 0 which replaces the last one. See
|
||||
// the SetMaximumFrameLatency comment below for more details.
|
||||
// always use SyncInterval 0 which replaces the last one.
|
||||
//
|
||||
// IDXGIDevice1 has a SetMaximumFrameLatency() function, but counter-
|
||||
// intuitively we must avoid it to reduce latency. If we set our max
|
||||
// frame latency to 1 on thedevice, our SyncInterval 0 Present() calls
|
||||
// will block on DWM (acting like SyncInterval 1) rather than doing
|
||||
// the non-blocking present we expect.
|
||||
//
|
||||
// NB: 3 total buffers seems sufficient on NVIDIA hardware but
|
||||
// causes performance issues (buffer starvation) on AMD GPUs.
|
||||
swapChainDesc.BufferCount = 3 + 1 + 1;
|
||||
|
||||
DXGI_SWAP_CHAIN_FULLSCREEN_DESC fullScreenDesc = {};
|
||||
|
||||
if (m_Windowed) {
|
||||
// Use the current window size as the swapchain size
|
||||
SDL_GetWindowSize(params->window, (int*)&swapChainDesc.Width, (int*)&swapChainDesc.Height);
|
||||
}
|
||||
else {
|
||||
// Use the current display mode as the swapchain size
|
||||
SDL_DisplayMode sdlMode;
|
||||
if (SDL_GetWindowDisplayMode(params->window, &sdlMode) < 0) {
|
||||
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
|
||||
"SDL_GetWindowDisplayMode() failed: %s",
|
||||
SDL_GetError());
|
||||
return false;
|
||||
}
|
||||
|
||||
swapChainDesc.Width = sdlMode.w;
|
||||
swapChainDesc.Height = sdlMode.h;
|
||||
|
||||
// Leave these unspecified to ensure we don't end up in an inefficient "proxy"
|
||||
// full-screen swapchain with DXGI doing format conversion behind our backs.
|
||||
// https://youtu.be/E3wTajGZOsA?t=1489
|
||||
fullScreenDesc.RefreshRate.Numerator = 0;
|
||||
fullScreenDesc.RefreshRate.Denominator = 0;
|
||||
fullScreenDesc.Scaling = DXGI_MODE_SCALING_UNSPECIFIED;
|
||||
fullScreenDesc.ScanlineOrdering = DXGI_MODE_SCANLINE_ORDER_UNSPECIFIED;
|
||||
|
||||
fullScreenDesc.Windowed = FALSE;
|
||||
}
|
||||
|
||||
m_DisplayWidth = swapChainDesc.Width;
|
||||
m_DisplayHeight = swapChainDesc.Height;
|
||||
|
@ -294,10 +255,9 @@ bool D3D11VARenderer::initialize(PDECODER_PARAMETERS params)
|
|||
swapChainDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
|
||||
}
|
||||
|
||||
// Use DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING with flip mode for non-vsync case, if possible
|
||||
// Use DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING with flip mode for non-vsync case, if possible.
|
||||
// NOTE: This is only possible in windowed or borderless windowed mode.
|
||||
if (!params->enableVsync) {
|
||||
// DXGI_PRESENT_ALLOW_TEARING may only be used in windowed mode
|
||||
if (m_Windowed) {
|
||||
BOOL allowTearing = FALSE;
|
||||
hr = m_Factory->CheckFeatureSupport(DXGI_FEATURE_PRESENT_ALLOW_TEARING,
|
||||
&allowTearing,
|
||||
|
@ -305,30 +265,11 @@ bool D3D11VARenderer::initialize(PDECODER_PARAMETERS params)
|
|||
if (SUCCEEDED(hr)) {
|
||||
// Use flip discard with allow tearing mode if possible.
|
||||
swapChainDesc.Flags |= DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING;
|
||||
swapChainDesc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD;
|
||||
m_AllowTearing = true;
|
||||
}
|
||||
else {
|
||||
SDL_LogWarn(SDL_LOG_CATEGORY_APPLICATION,
|
||||
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
|
||||
"GPU driver doesn't support DXGI_FEATURE_PRESENT_ALLOW_TEARING");
|
||||
|
||||
// Without tearing support, we'll have to use regular discard mode to get tearing
|
||||
swapChainDesc.SwapEffect = DXGI_SWAP_EFFECT_DISCARD;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// In full-screen exclusive mode, we'll have to use regular discard mode
|
||||
swapChainDesc.SwapEffect = DXGI_SWAP_EFFECT_DISCARD;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// In V-sync mode, we can always use flip discard
|
||||
swapChainDesc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD;
|
||||
|
||||
// We'll use a waitable swapchain to pace frame delivery
|
||||
// NB: We can only use this option in windowed mode (or borderless fullscreen).
|
||||
if (m_Windowed && params->enableFramePacing) {
|
||||
swapChainDesc.Flags |= DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -337,11 +278,13 @@ bool D3D11VARenderer::initialize(PDECODER_PARAMETERS params)
|
|||
SDL_GetWindowWMInfo(params->window, &info);
|
||||
SDL_assert(info.subsystem == SDL_SYSWM_WINDOWS);
|
||||
|
||||
// Always use windowed or borderless windowed mode.. SDL does mode-setting for us in
|
||||
// full-screen exclusive mode (SDL_WINDOW_FULLSCREEN), so this actually works out okay.
|
||||
IDXGISwapChain1* swapChain;
|
||||
hr = m_Factory->CreateSwapChainForHwnd(m_Device,
|
||||
info.info.win.window,
|
||||
&swapChainDesc,
|
||||
m_Windowed ? nullptr : &fullScreenDesc,
|
||||
nullptr,
|
||||
nullptr,
|
||||
&swapChain);
|
||||
|
||||
|
@ -449,29 +392,6 @@ bool D3D11VARenderer::initialize(PDECODER_PARAMETERS params)
|
|||
}
|
||||
}
|
||||
|
||||
// We have to set the maximum frame latency on waitable swapchains.
|
||||
//
|
||||
// NB: IDXGIDevice1 has a SetMaximumFrameLatency() function, but counterintuitively
|
||||
// we must avoid it to reduce latency. If we set our max frame latency to 1 on the
|
||||
// device, our SyncInterval 0 Present() calls will block on DWM (acting like
|
||||
// SyncInterval 1) rather than doing the non-blocking present we expect.
|
||||
if (swapChainDesc.Flags & DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT) {
|
||||
SDL_assert(params->enableVsync);
|
||||
SDL_assert(m_Windowed);
|
||||
|
||||
// We only want one buffered frame on our waitable swapchain to pace properly
|
||||
hr = m_SwapChain->SetMaximumFrameLatency(1);
|
||||
if (FAILED(hr)) {
|
||||
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
|
||||
"IDXGISwapChain::SetMaximumFrameLatency() failed: %x",
|
||||
hr);
|
||||
return false;
|
||||
}
|
||||
|
||||
m_FrameWaitableObject = m_SwapChain->GetFrameLatencyWaitableObject();
|
||||
SDL_assert(m_FrameWaitableObject != nullptr);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -560,23 +480,6 @@ void D3D11VARenderer::setHdrMode(bool enabled)
|
|||
unlockContext(this);
|
||||
}
|
||||
|
||||
void D3D11VARenderer::waitToRender()
|
||||
{
|
||||
if (m_FrameWaitableObject != nullptr) {
|
||||
SDL_assert(m_Windowed);
|
||||
SDL_assert(m_DecoderParams.enableVsync);
|
||||
SDL_assert(m_DecoderParams.enableFramePacing);
|
||||
|
||||
// Wait for the pipeline to be ready for the next frame in pacing mode.
|
||||
//
|
||||
// This callback happens before selecting the next frame to render, so
|
||||
// we can wait for the previous frame to finish prior to picking the
|
||||
// next one to display. This reduces the effective display latency
|
||||
// by ensuring we always render the most recent frame immediately.
|
||||
WaitForSingleObjectEx(m_FrameWaitableObject, 500, FALSE);
|
||||
}
|
||||
}
|
||||
|
||||
void D3D11VARenderer::renderFrame(AVFrame* frame)
|
||||
{
|
||||
// Acquire the context lock for rendering to prevent concurrent
|
||||
|
@ -603,33 +506,15 @@ void D3D11VARenderer::renderFrame(AVFrame* frame)
|
|||
|
||||
if (m_AllowTearing) {
|
||||
SDL_assert(!m_DecoderParams.enableVsync);
|
||||
SDL_assert(m_Windowed);
|
||||
|
||||
// If tearing is allowed, use DXGI_PRESENT_ALLOW_TEARING with syncInterval 0.
|
||||
// It is not valid to use any other syncInterval values in tearing mode.
|
||||
flags = DXGI_PRESENT_ALLOW_TEARING;
|
||||
}
|
||||
else if (!m_DecoderParams.enableVsync) {
|
||||
// In any other non-vsync mode, just render with syncInterval 0.
|
||||
// We'll probably have a non-flip swapchain here.
|
||||
flags = 0;
|
||||
}
|
||||
else if (m_FrameWaitableObject != nullptr) {
|
||||
SDL_assert(m_Windowed);
|
||||
SDL_assert(m_DecoderParams.enableVsync);
|
||||
SDL_assert(m_DecoderParams.enableFramePacing);
|
||||
|
||||
// With frame pacing, we'll have a waitable swapchain, so we can
|
||||
// use syncInterval 0 and the wait will sync us with VBlank.
|
||||
flags = 0;
|
||||
}
|
||||
else {
|
||||
SDL_assert(m_DecoderParams.enableVsync);
|
||||
SDL_assert(!m_DecoderParams.enableFramePacing);
|
||||
SDL_assert(m_FrameWaitableObject == nullptr);
|
||||
|
||||
// With vsync enabled but frame pacing disabled, we'll submit as
|
||||
// fast as possible and DWM will discard excess frames for us.
|
||||
// Otherwise, we'll submit as fast as possible and DWM will discard excess
|
||||
// frames for us. If frame pacing is also enabled, our Vsync source will keep
|
||||
// us in sync with VBlank.
|
||||
flags = 0;
|
||||
}
|
||||
|
||||
|
@ -1046,8 +931,8 @@ bool D3D11VARenderer::checkDecoderSupport(IDXGIAdapter* adapter)
|
|||
|
||||
int D3D11VARenderer::getRendererAttributes()
|
||||
{
|
||||
// This renderer supports HDR and can frame pace with waitToRender()
|
||||
return RENDERER_ATTRIBUTE_HDR_SUPPORT | RENDERER_ATTRIBUTE_SELF_PACING;
|
||||
// This renderer supports HDR
|
||||
return RENDERER_ATTRIBUTE_HDR_SUPPORT;
|
||||
}
|
||||
|
||||
void D3D11VARenderer::lockContext(void *lock_ctx)
|
||||
|
|
|
@ -19,7 +19,6 @@ public:
|
|||
virtual bool prepareDecoderContext(AVCodecContext* context, AVDictionary**) override;
|
||||
virtual bool prepareDecoderContextInGetFormat(AVCodecContext* context, AVPixelFormat pixelFormat) override;
|
||||
virtual void renderFrame(AVFrame* frame) override;
|
||||
virtual void waitToRender() override;
|
||||
virtual void notifyOverlayUpdated(Overlay::OverlayType) override;
|
||||
virtual void setHdrMode(bool enabled) override;
|
||||
virtual int getRendererAttributes() override;
|
||||
|
@ -46,12 +45,10 @@ private:
|
|||
int m_TextureAlignment;
|
||||
int m_DisplayWidth;
|
||||
int m_DisplayHeight;
|
||||
bool m_Windowed;
|
||||
AVColorSpace m_LastColorSpace;
|
||||
AVColorRange m_LastColorRange;
|
||||
|
||||
bool m_AllowTearing;
|
||||
HANDLE m_FrameWaitableObject;
|
||||
|
||||
ID3D11PixelShader* m_VideoGenericPixelShader;
|
||||
ID3D11PixelShader* m_VideoBt601LimPixelShader;
|
||||
|
|
Loading…
Reference in a new issue