mirror of
https://github.com/moonlight-stream/moonlight-qt
synced 2024-12-15 05:42:28 +00:00
Optimize CUDA GL interop and provide fallback
This commit is contained in:
parent
c4d85cf928
commit
36e0468a99
4 changed files with 168 additions and 62 deletions
|
@ -1,13 +1,7 @@
|
|||
#include "cuda.h"
|
||||
|
||||
#include <ffnvcodec/dynlink_loader.h>
|
||||
|
||||
#include <SDL_opengl.h>
|
||||
|
||||
extern "C" {
|
||||
#include <libavutil/hwcontext_cuda.h>
|
||||
}
|
||||
|
||||
CUDARenderer::CUDARenderer()
|
||||
: m_HwContext(nullptr)
|
||||
{
|
||||
|
@ -63,35 +57,47 @@ bool CUDARenderer::isDirectRenderingSupported()
|
|||
return false;
|
||||
}
|
||||
|
||||
bool CUDARenderer::copyCudaFrameToBoundTexture(AVFrame* frame)
|
||||
CUDAGLInteropHelper::CUDAGLInteropHelper(AVHWDeviceContext* context)
|
||||
: m_Funcs(nullptr),
|
||||
m_Context((AVCUDADeviceContext*)context->hwctx)
|
||||
{
|
||||
static CudaFunctions* funcs;
|
||||
CUresult err;
|
||||
AVCUDADeviceContext* devCtx = (AVCUDADeviceContext*)(((AVHWFramesContext*)frame->hw_frames_ctx->data)->device_ctx->hwctx);
|
||||
bool ret = false;
|
||||
memset(m_Resources, 0, sizeof(m_Resources));
|
||||
|
||||
if (!funcs) {
|
||||
// One-time init of CUDA library
|
||||
cuda_load_functions(&funcs, nullptr);
|
||||
if (!funcs) {
|
||||
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Failed to initialize CUDA library");
|
||||
return false;
|
||||
}
|
||||
// One-time init of CUDA library
|
||||
cuda_load_functions(&m_Funcs, nullptr);
|
||||
if (m_Funcs == nullptr) {
|
||||
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Failed to initialize CUDA library");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
CUDAGLInteropHelper::~CUDAGLInteropHelper()
|
||||
{
|
||||
unregisterTextures();
|
||||
|
||||
if (m_Funcs != nullptr) {
|
||||
cuda_free_functions(&m_Funcs);
|
||||
}
|
||||
}
|
||||
|
||||
bool CUDAGLInteropHelper::registerBoundTextures()
|
||||
{
|
||||
int err;
|
||||
|
||||
if (m_Funcs == nullptr) {
|
||||
// Already logged in constructor
|
||||
return false;
|
||||
}
|
||||
|
||||
SDL_assert(frame->format == AV_PIX_FMT_CUDA);
|
||||
|
||||
// Push FFmpeg's CUDA context to use for our CUDA operations
|
||||
err = funcs->cuCtxPushCurrent(devCtx->cuda_ctx);
|
||||
err = m_Funcs->cuCtxPushCurrent(m_Context->cuda_ctx);
|
||||
if (err != CUDA_SUCCESS) {
|
||||
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "cuCtxPushCurrent() failed: %d", err);
|
||||
return false;
|
||||
}
|
||||
|
||||
// NV12 has 2 planes
|
||||
for (int i = 0; i < 2; i++) {
|
||||
CUgraphicsResource cudaResource;
|
||||
CUarray cudaArray;
|
||||
// Register each plane as a separate resource
|
||||
for (int i = 0; i < NV12_PLANES; i++) {
|
||||
GLint tex;
|
||||
|
||||
// Get the ID of this plane's texture
|
||||
|
@ -99,29 +105,86 @@ bool CUDARenderer::copyCudaFrameToBoundTexture(AVFrame* frame)
|
|||
glGetIntegerv(GL_TEXTURE_BINDING_2D, &tex);
|
||||
|
||||
// Register it with CUDA
|
||||
err = funcs->cuGraphicsGLRegisterImage(&cudaResource, tex, GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD);
|
||||
err = m_Funcs->cuGraphicsGLRegisterImage(&m_Resources[i], tex, GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD);
|
||||
if (err != CUDA_SUCCESS) {
|
||||
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "cuGraphicsGLRegisterImage() failed: %d", err);
|
||||
m_Resources[i] = 0;
|
||||
unregisterTextures();
|
||||
goto Exit;
|
||||
}
|
||||
}
|
||||
|
||||
// Map it to allow us to use it as a copy destination
|
||||
err = funcs->cuGraphicsMapResources(1, &cudaResource, devCtx->stream);
|
||||
if (err != CUDA_SUCCESS) {
|
||||
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "cuGraphicsMapResources() failed: %d", err);
|
||||
funcs->cuGraphicsUnregisterResource(cudaResource);
|
||||
goto Exit;
|
||||
Exit:
|
||||
{
|
||||
CUcontext dummy;
|
||||
m_Funcs->cuCtxPopCurrent(&dummy);
|
||||
}
|
||||
return err == CUDA_SUCCESS;
|
||||
}
|
||||
|
||||
void CUDAGLInteropHelper::unregisterTextures()
|
||||
{
|
||||
int err;
|
||||
|
||||
if (m_Funcs == nullptr) {
|
||||
// Already logged in constructor
|
||||
return;
|
||||
}
|
||||
|
||||
// Push FFmpeg's CUDA context to use for our CUDA operations
|
||||
err = m_Funcs->cuCtxPushCurrent(m_Context->cuda_ctx);
|
||||
if (err != CUDA_SUCCESS) {
|
||||
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "cuCtxPushCurrent() failed: %d", err);
|
||||
return;
|
||||
}
|
||||
|
||||
for (int i = 0; i < NV12_PLANES; i++) {
|
||||
if (m_Resources[i] != 0) {
|
||||
m_Funcs->cuGraphicsUnregisterResource(m_Resources[i]);
|
||||
m_Resources[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Get a pointer to the mapped array
|
||||
err = funcs->cuGraphicsSubResourceGetMappedArray(&cudaArray, cudaResource, 0, 0);
|
||||
{
|
||||
CUcontext dummy;
|
||||
m_Funcs->cuCtxPopCurrent(&dummy);
|
||||
}
|
||||
}
|
||||
|
||||
bool CUDAGLInteropHelper::copyCudaFrameToTextures(AVFrame* frame)
|
||||
{
|
||||
int err;
|
||||
|
||||
if (m_Funcs == nullptr) {
|
||||
// Already logged in constructor
|
||||
return false;
|
||||
}
|
||||
|
||||
// Push FFmpeg's CUDA context to use for our CUDA operations
|
||||
err = m_Funcs->cuCtxPushCurrent(m_Context->cuda_ctx);
|
||||
if (err != CUDA_SUCCESS) {
|
||||
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "cuCtxPushCurrent() failed: %d", err);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Map our resources
|
||||
err = m_Funcs->cuGraphicsMapResources(NV12_PLANES, m_Resources, m_Context->stream);
|
||||
if (err != CUDA_SUCCESS) {
|
||||
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "cuGraphicsMapResources() failed: %d", err);
|
||||
goto PopCtxExit;
|
||||
}
|
||||
|
||||
for (int i = 0; i < NV12_PLANES; i++) {
|
||||
CUarray cudaArray;
|
||||
|
||||
// Get a pointer to the mapped array for this plane
|
||||
err = m_Funcs->cuGraphicsSubResourceGetMappedArray(&cudaArray, m_Resources[i], 0, 0);
|
||||
if (err != CUDA_SUCCESS) {
|
||||
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "cuGraphicsSubResourceGetMappedArray() failed: %d", err);
|
||||
funcs->cuGraphicsUnmapResources(1, &cudaResource, devCtx->stream);
|
||||
funcs->cuGraphicsUnregisterResource(cudaResource);
|
||||
goto Exit;
|
||||
goto UnmapExit;
|
||||
}
|
||||
|
||||
// Do the copy
|
||||
CUDA_MEMCPY2D cu2d = {
|
||||
.srcMemoryType = CU_MEMORYTYPE_DEVICE,
|
||||
.srcDevice = (CUdeviceptr)frame->data[i],
|
||||
|
@ -132,27 +195,19 @@ bool CUDARenderer::copyCudaFrameToBoundTexture(AVFrame* frame)
|
|||
.WidthInBytes = (size_t)frame->width,
|
||||
.Height = (size_t)frame->height >> i
|
||||
};
|
||||
|
||||
// Do the copy
|
||||
err = funcs->cuMemcpy2D(&cu2d);
|
||||
err = m_Funcs->cuMemcpy2D(&cu2d);
|
||||
if (err != CUDA_SUCCESS) {
|
||||
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "cuMemcpy2D() failed: %d", err);
|
||||
funcs->cuGraphicsUnmapResources(1, &cudaResource, devCtx->stream);
|
||||
funcs->cuGraphicsUnregisterResource(cudaResource);
|
||||
goto Exit;
|
||||
goto UnmapExit;
|
||||
}
|
||||
|
||||
funcs->cuGraphicsUnmapResources(1, &cudaResource, devCtx->stream);
|
||||
funcs->cuGraphicsUnregisterResource(cudaResource);
|
||||
}
|
||||
|
||||
ret = true;
|
||||
|
||||
Exit:
|
||||
UnmapExit:
|
||||
m_Funcs->cuGraphicsUnmapResources(NV12_PLANES, m_Resources, m_Context->stream);
|
||||
PopCtxExit:
|
||||
{
|
||||
CUcontext dummy;
|
||||
funcs->cuCtxPopCurrent(&dummy);
|
||||
m_Funcs->cuCtxPopCurrent(&dummy);
|
||||
}
|
||||
return ret;
|
||||
return err == CUDA_SUCCESS;
|
||||
}
|
||||
|
||||
|
|
|
@ -2,6 +2,12 @@
|
|||
|
||||
#include "renderer.h"
|
||||
|
||||
#include <ffnvcodec/dynlink_loader.h>
|
||||
|
||||
extern "C" {
|
||||
#include <libavutil/hwcontext_cuda.h>
|
||||
}
|
||||
|
||||
class CUDARenderer : public IFFmpegRenderer {
|
||||
public:
|
||||
CUDARenderer();
|
||||
|
@ -12,10 +18,25 @@ public:
|
|||
virtual bool needsTestFrame() override;
|
||||
virtual bool isDirectRenderingSupported() override;
|
||||
|
||||
// Helper function used by SDLRenderer to read our CUDA frame
|
||||
static bool copyCudaFrameToBoundTexture(AVFrame* frame);
|
||||
|
||||
private:
|
||||
AVBufferRef* m_HwContext;
|
||||
};
|
||||
|
||||
#define NV12_PLANES 2
|
||||
|
||||
// Helper class used by SDLRenderer to read our CUDA frame
|
||||
class CUDAGLInteropHelper {
|
||||
public:
|
||||
CUDAGLInteropHelper(AVHWDeviceContext* context);
|
||||
~CUDAGLInteropHelper();
|
||||
|
||||
bool registerBoundTextures();
|
||||
void unregisterTextures();
|
||||
|
||||
bool copyCudaFrameToTextures(AVFrame* frame);
|
||||
|
||||
private:
|
||||
CudaFunctions* m_Funcs;
|
||||
AVCUDADeviceContext* m_Context;
|
||||
CUgraphicsResource m_Resources[NV12_PLANES];
|
||||
};
|
||||
|
|
|
@ -5,20 +5,26 @@
|
|||
|
||||
#include <Limelight.h>
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
#include "cuda.h"
|
||||
#endif
|
||||
|
||||
SdlRenderer::SdlRenderer()
|
||||
: m_Renderer(nullptr),
|
||||
m_Texture(nullptr),
|
||||
m_SwPixelFormat(AV_PIX_FMT_NONE)
|
||||
{
|
||||
SDL_zero(m_OverlayTextures);
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
m_CudaGLHelper = nullptr;
|
||||
#endif
|
||||
}
|
||||
|
||||
SdlRenderer::~SdlRenderer()
|
||||
{
|
||||
#ifdef HAVE_CUDA
|
||||
if (m_CudaGLHelper != nullptr) {
|
||||
delete m_CudaGLHelper;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (int i = 0; i < Overlay::OverlayMax; i++) {
|
||||
if (m_OverlayTextures[i] != nullptr) {
|
||||
SDL_DestroyTexture(m_OverlayTextures[i]);
|
||||
|
@ -208,6 +214,7 @@ void SdlRenderer::renderFrame(AVFrame* frame)
|
|||
}
|
||||
|
||||
if (frame->hw_frames_ctx != nullptr && frame->format != AV_PIX_FMT_CUDA) {
|
||||
ReadbackRetry:
|
||||
// If we are acting as the frontend for a hardware
|
||||
// accelerated decoder, we'll need to read the frame
|
||||
// back to render it.
|
||||
|
@ -293,13 +300,28 @@ void SdlRenderer::renderFrame(AVFrame* frame)
|
|||
SDL_GetError());
|
||||
goto Exit;
|
||||
}
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
if (frame->format == AV_PIX_FMT_CUDA) {
|
||||
SDL_assert(m_CudaGLHelper == nullptr);
|
||||
m_CudaGLHelper = new CUDAGLInteropHelper(((AVHWFramesContext*)frame->hw_frames_ctx->data)->device_ctx);
|
||||
|
||||
SDL_GL_BindTexture(m_Texture, nullptr, nullptr);
|
||||
if (!m_CudaGLHelper->registerBoundTextures()) {
|
||||
// If we can't register textures, fall back to normal read-back rendering
|
||||
delete m_CudaGLHelper;
|
||||
m_CudaGLHelper = nullptr;
|
||||
}
|
||||
SDL_GL_UnbindTexture(m_Texture);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
if (frame->format == AV_PIX_FMT_CUDA) {
|
||||
#ifdef HAVE_CUDA
|
||||
SDL_GL_BindTexture(m_Texture, nullptr, nullptr);
|
||||
CUDARenderer::copyCudaFrameToBoundTexture(frame);
|
||||
SDL_GL_UnbindTexture(m_Texture);
|
||||
if (m_CudaGLHelper == nullptr || !m_CudaGLHelper->copyCudaFrameToTextures(frame)) {
|
||||
goto ReadbackRetry;
|
||||
}
|
||||
#else
|
||||
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
|
||||
"Got CUDA frame, but not built with CUDA support!");
|
||||
|
|
|
@ -2,6 +2,10 @@
|
|||
|
||||
#include "renderer.h"
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
#include "cuda.h"
|
||||
#endif
|
||||
|
||||
class SdlRenderer : public IFFmpegRenderer {
|
||||
public:
|
||||
SdlRenderer();
|
||||
|
@ -21,5 +25,9 @@ private:
|
|||
int m_SwPixelFormat;
|
||||
SDL_Texture* m_OverlayTextures[Overlay::OverlayMax];
|
||||
SDL_Rect m_OverlayRects[Overlay::OverlayMax];
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
CUDAGLInteropHelper* m_CudaGLHelper;
|
||||
#endif
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in a new issue