Optimize CUDA GL interop and provide fallback

This commit is contained in:
Cameron Gutman 2021-12-13 21:41:00 -06:00
parent c4d85cf928
commit 36e0468a99
4 changed files with 168 additions and 62 deletions

View file

@ -1,13 +1,7 @@
#include "cuda.h" #include "cuda.h"
#include <ffnvcodec/dynlink_loader.h>
#include <SDL_opengl.h> #include <SDL_opengl.h>
extern "C" {
#include <libavutil/hwcontext_cuda.h>
}
CUDARenderer::CUDARenderer() CUDARenderer::CUDARenderer()
: m_HwContext(nullptr) : m_HwContext(nullptr)
{ {
@ -63,35 +57,47 @@ bool CUDARenderer::isDirectRenderingSupported()
return false; return false;
} }
bool CUDARenderer::copyCudaFrameToBoundTexture(AVFrame* frame) CUDAGLInteropHelper::CUDAGLInteropHelper(AVHWDeviceContext* context)
: m_Funcs(nullptr),
m_Context((AVCUDADeviceContext*)context->hwctx)
{ {
static CudaFunctions* funcs; memset(m_Resources, 0, sizeof(m_Resources));
CUresult err;
AVCUDADeviceContext* devCtx = (AVCUDADeviceContext*)(((AVHWFramesContext*)frame->hw_frames_ctx->data)->device_ctx->hwctx);
bool ret = false;
if (!funcs) { // One-time init of CUDA library
// One-time init of CUDA library cuda_load_functions(&m_Funcs, nullptr);
cuda_load_functions(&funcs, nullptr); if (m_Funcs == nullptr) {
if (!funcs) { SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Failed to initialize CUDA library");
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Failed to initialize CUDA library"); return;
return false; }
} }
CUDAGLInteropHelper::~CUDAGLInteropHelper()
{
unregisterTextures();
if (m_Funcs != nullptr) {
cuda_free_functions(&m_Funcs);
}
}
bool CUDAGLInteropHelper::registerBoundTextures()
{
int err;
if (m_Funcs == nullptr) {
// Already logged in constructor
return false;
} }
SDL_assert(frame->format == AV_PIX_FMT_CUDA);
// Push FFmpeg's CUDA context to use for our CUDA operations // Push FFmpeg's CUDA context to use for our CUDA operations
err = funcs->cuCtxPushCurrent(devCtx->cuda_ctx); err = m_Funcs->cuCtxPushCurrent(m_Context->cuda_ctx);
if (err != CUDA_SUCCESS) { if (err != CUDA_SUCCESS) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "cuCtxPushCurrent() failed: %d", err); SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "cuCtxPushCurrent() failed: %d", err);
return false; return false;
} }
// NV12 has 2 planes // Register each plane as a separate resource
for (int i = 0; i < 2; i++) { for (int i = 0; i < NV12_PLANES; i++) {
CUgraphicsResource cudaResource;
CUarray cudaArray;
GLint tex; GLint tex;
// Get the ID of this plane's texture // Get the ID of this plane's texture
@ -99,29 +105,86 @@ bool CUDARenderer::copyCudaFrameToBoundTexture(AVFrame* frame)
glGetIntegerv(GL_TEXTURE_BINDING_2D, &tex); glGetIntegerv(GL_TEXTURE_BINDING_2D, &tex);
// Register it with CUDA // Register it with CUDA
err = funcs->cuGraphicsGLRegisterImage(&cudaResource, tex, GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD); err = m_Funcs->cuGraphicsGLRegisterImage(&m_Resources[i], tex, GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD);
if (err != CUDA_SUCCESS) { if (err != CUDA_SUCCESS) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "cuGraphicsGLRegisterImage() failed: %d", err); SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "cuGraphicsGLRegisterImage() failed: %d", err);
m_Resources[i] = 0;
unregisterTextures();
goto Exit; goto Exit;
} }
}
// Map it to allow us to use it as a copy destination Exit:
err = funcs->cuGraphicsMapResources(1, &cudaResource, devCtx->stream); {
if (err != CUDA_SUCCESS) { CUcontext dummy;
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "cuGraphicsMapResources() failed: %d", err); m_Funcs->cuCtxPopCurrent(&dummy);
funcs->cuGraphicsUnregisterResource(cudaResource); }
goto Exit; return err == CUDA_SUCCESS;
}
void CUDAGLInteropHelper::unregisterTextures()
{
int err;
if (m_Funcs == nullptr) {
// Already logged in constructor
return;
}
// Push FFmpeg's CUDA context to use for our CUDA operations
err = m_Funcs->cuCtxPushCurrent(m_Context->cuda_ctx);
if (err != CUDA_SUCCESS) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "cuCtxPushCurrent() failed: %d", err);
return;
}
for (int i = 0; i < NV12_PLANES; i++) {
if (m_Resources[i] != 0) {
m_Funcs->cuGraphicsUnregisterResource(m_Resources[i]);
m_Resources[i] = 0;
} }
}
// Get a pointer to the mapped array {
err = funcs->cuGraphicsSubResourceGetMappedArray(&cudaArray, cudaResource, 0, 0); CUcontext dummy;
m_Funcs->cuCtxPopCurrent(&dummy);
}
}
bool CUDAGLInteropHelper::copyCudaFrameToTextures(AVFrame* frame)
{
int err;
if (m_Funcs == nullptr) {
// Already logged in constructor
return false;
}
// Push FFmpeg's CUDA context to use for our CUDA operations
err = m_Funcs->cuCtxPushCurrent(m_Context->cuda_ctx);
if (err != CUDA_SUCCESS) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "cuCtxPushCurrent() failed: %d", err);
return false;
}
// Map our resources
err = m_Funcs->cuGraphicsMapResources(NV12_PLANES, m_Resources, m_Context->stream);
if (err != CUDA_SUCCESS) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "cuGraphicsMapResources() failed: %d", err);
goto PopCtxExit;
}
for (int i = 0; i < NV12_PLANES; i++) {
CUarray cudaArray;
// Get a pointer to the mapped array for this plane
err = m_Funcs->cuGraphicsSubResourceGetMappedArray(&cudaArray, m_Resources[i], 0, 0);
if (err != CUDA_SUCCESS) { if (err != CUDA_SUCCESS) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "cuGraphicsSubResourceGetMappedArray() failed: %d", err); SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "cuGraphicsSubResourceGetMappedArray() failed: %d", err);
funcs->cuGraphicsUnmapResources(1, &cudaResource, devCtx->stream); goto UnmapExit;
funcs->cuGraphicsUnregisterResource(cudaResource);
goto Exit;
} }
// Do the copy
CUDA_MEMCPY2D cu2d = { CUDA_MEMCPY2D cu2d = {
.srcMemoryType = CU_MEMORYTYPE_DEVICE, .srcMemoryType = CU_MEMORYTYPE_DEVICE,
.srcDevice = (CUdeviceptr)frame->data[i], .srcDevice = (CUdeviceptr)frame->data[i],
@ -132,27 +195,19 @@ bool CUDARenderer::copyCudaFrameToBoundTexture(AVFrame* frame)
.WidthInBytes = (size_t)frame->width, .WidthInBytes = (size_t)frame->width,
.Height = (size_t)frame->height >> i .Height = (size_t)frame->height >> i
}; };
err = m_Funcs->cuMemcpy2D(&cu2d);
// Do the copy
err = funcs->cuMemcpy2D(&cu2d);
if (err != CUDA_SUCCESS) { if (err != CUDA_SUCCESS) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "cuMemcpy2D() failed: %d", err); SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "cuMemcpy2D() failed: %d", err);
funcs->cuGraphicsUnmapResources(1, &cudaResource, devCtx->stream); goto UnmapExit;
funcs->cuGraphicsUnregisterResource(cudaResource);
goto Exit;
} }
funcs->cuGraphicsUnmapResources(1, &cudaResource, devCtx->stream);
funcs->cuGraphicsUnregisterResource(cudaResource);
} }
ret = true; UnmapExit:
m_Funcs->cuGraphicsUnmapResources(NV12_PLANES, m_Resources, m_Context->stream);
Exit: PopCtxExit:
{ {
CUcontext dummy; CUcontext dummy;
funcs->cuCtxPopCurrent(&dummy); m_Funcs->cuCtxPopCurrent(&dummy);
} }
return ret; return err == CUDA_SUCCESS;
} }

View file

@ -2,6 +2,12 @@
#include "renderer.h" #include "renderer.h"
#include <ffnvcodec/dynlink_loader.h>
extern "C" {
#include <libavutil/hwcontext_cuda.h>
}
class CUDARenderer : public IFFmpegRenderer { class CUDARenderer : public IFFmpegRenderer {
public: public:
CUDARenderer(); CUDARenderer();
@ -12,10 +18,25 @@ public:
virtual bool needsTestFrame() override; virtual bool needsTestFrame() override;
virtual bool isDirectRenderingSupported() override; virtual bool isDirectRenderingSupported() override;
// Helper function used by SDLRenderer to read our CUDA frame
static bool copyCudaFrameToBoundTexture(AVFrame* frame);
private: private:
AVBufferRef* m_HwContext; AVBufferRef* m_HwContext;
}; };
#define NV12_PLANES 2
// Helper class used by SDLRenderer to read our CUDA frame
class CUDAGLInteropHelper {
public:
CUDAGLInteropHelper(AVHWDeviceContext* context);
~CUDAGLInteropHelper();
bool registerBoundTextures();
void unregisterTextures();
bool copyCudaFrameToTextures(AVFrame* frame);
private:
CudaFunctions* m_Funcs;
AVCUDADeviceContext* m_Context;
CUgraphicsResource m_Resources[NV12_PLANES];
};

View file

@ -5,20 +5,26 @@
#include <Limelight.h> #include <Limelight.h>
#ifdef HAVE_CUDA
#include "cuda.h"
#endif
SdlRenderer::SdlRenderer() SdlRenderer::SdlRenderer()
: m_Renderer(nullptr), : m_Renderer(nullptr),
m_Texture(nullptr), m_Texture(nullptr),
m_SwPixelFormat(AV_PIX_FMT_NONE) m_SwPixelFormat(AV_PIX_FMT_NONE)
{ {
SDL_zero(m_OverlayTextures); SDL_zero(m_OverlayTextures);
#ifdef HAVE_CUDA
m_CudaGLHelper = nullptr;
#endif
} }
SdlRenderer::~SdlRenderer() SdlRenderer::~SdlRenderer()
{ {
#ifdef HAVE_CUDA
if (m_CudaGLHelper != nullptr) {
delete m_CudaGLHelper;
}
#endif
for (int i = 0; i < Overlay::OverlayMax; i++) { for (int i = 0; i < Overlay::OverlayMax; i++) {
if (m_OverlayTextures[i] != nullptr) { if (m_OverlayTextures[i] != nullptr) {
SDL_DestroyTexture(m_OverlayTextures[i]); SDL_DestroyTexture(m_OverlayTextures[i]);
@ -208,6 +214,7 @@ void SdlRenderer::renderFrame(AVFrame* frame)
} }
if (frame->hw_frames_ctx != nullptr && frame->format != AV_PIX_FMT_CUDA) { if (frame->hw_frames_ctx != nullptr && frame->format != AV_PIX_FMT_CUDA) {
ReadbackRetry:
// If we are acting as the frontend for a hardware // If we are acting as the frontend for a hardware
// accelerated decoder, we'll need to read the frame // accelerated decoder, we'll need to read the frame
// back to render it. // back to render it.
@ -293,13 +300,28 @@ void SdlRenderer::renderFrame(AVFrame* frame)
SDL_GetError()); SDL_GetError());
goto Exit; goto Exit;
} }
#ifdef HAVE_CUDA
if (frame->format == AV_PIX_FMT_CUDA) {
SDL_assert(m_CudaGLHelper == nullptr);
m_CudaGLHelper = new CUDAGLInteropHelper(((AVHWFramesContext*)frame->hw_frames_ctx->data)->device_ctx);
SDL_GL_BindTexture(m_Texture, nullptr, nullptr);
if (!m_CudaGLHelper->registerBoundTextures()) {
// If we can't register textures, fall back to normal read-back rendering
delete m_CudaGLHelper;
m_CudaGLHelper = nullptr;
}
SDL_GL_UnbindTexture(m_Texture);
}
#endif
} }
if (frame->format == AV_PIX_FMT_CUDA) { if (frame->format == AV_PIX_FMT_CUDA) {
#ifdef HAVE_CUDA #ifdef HAVE_CUDA
SDL_GL_BindTexture(m_Texture, nullptr, nullptr); if (m_CudaGLHelper == nullptr || !m_CudaGLHelper->copyCudaFrameToTextures(frame)) {
CUDARenderer::copyCudaFrameToBoundTexture(frame); goto ReadbackRetry;
SDL_GL_UnbindTexture(m_Texture); }
#else #else
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
"Got CUDA frame, but not built with CUDA support!"); "Got CUDA frame, but not built with CUDA support!");

View file

@ -2,6 +2,10 @@
#include "renderer.h" #include "renderer.h"
#ifdef HAVE_CUDA
#include "cuda.h"
#endif
class SdlRenderer : public IFFmpegRenderer { class SdlRenderer : public IFFmpegRenderer {
public: public:
SdlRenderer(); SdlRenderer();
@ -21,5 +25,9 @@ private:
int m_SwPixelFormat; int m_SwPixelFormat;
SDL_Texture* m_OverlayTextures[Overlay::OverlayMax]; SDL_Texture* m_OverlayTextures[Overlay::OverlayMax];
SDL_Rect m_OverlayRects[Overlay::OverlayMax]; SDL_Rect m_OverlayRects[Overlay::OverlayMax];
#ifdef HAVE_CUDA
CUDAGLInteropHelper* m_CudaGLHelper;
#endif
}; };