From 36e0468a993778b534076c2e44c31739894eafb3 Mon Sep 17 00:00:00 2001 From: Cameron Gutman Date: Mon, 13 Dec 2021 21:41:00 -0600 Subject: [PATCH] Optimize CUDA GL interop and provide fallback --- app/streaming/video/ffmpeg-renderers/cuda.cpp | 159 ++++++++++++------ app/streaming/video/ffmpeg-renderers/cuda.h | 27 ++- .../video/ffmpeg-renderers/sdlvid.cpp | 36 +++- app/streaming/video/ffmpeg-renderers/sdlvid.h | 8 + 4 files changed, 168 insertions(+), 62 deletions(-) diff --git a/app/streaming/video/ffmpeg-renderers/cuda.cpp b/app/streaming/video/ffmpeg-renderers/cuda.cpp index c1679301..035a51b9 100644 --- a/app/streaming/video/ffmpeg-renderers/cuda.cpp +++ b/app/streaming/video/ffmpeg-renderers/cuda.cpp @@ -1,13 +1,7 @@ #include "cuda.h" -#include - #include -extern "C" { - #include -} - CUDARenderer::CUDARenderer() : m_HwContext(nullptr) { @@ -63,35 +57,47 @@ bool CUDARenderer::isDirectRenderingSupported() return false; } -bool CUDARenderer::copyCudaFrameToBoundTexture(AVFrame* frame) +CUDAGLInteropHelper::CUDAGLInteropHelper(AVHWDeviceContext* context) + : m_Funcs(nullptr), + m_Context((AVCUDADeviceContext*)context->hwctx) { - static CudaFunctions* funcs; - CUresult err; - AVCUDADeviceContext* devCtx = (AVCUDADeviceContext*)(((AVHWFramesContext*)frame->hw_frames_ctx->data)->device_ctx->hwctx); - bool ret = false; + memset(m_Resources, 0, sizeof(m_Resources)); - if (!funcs) { - // One-time init of CUDA library - cuda_load_functions(&funcs, nullptr); - if (!funcs) { - SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Failed to initialize CUDA library"); - return false; - } + // One-time init of CUDA library + cuda_load_functions(&m_Funcs, nullptr); + if (m_Funcs == nullptr) { + SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Failed to initialize CUDA library"); + return; + } +} + +CUDAGLInteropHelper::~CUDAGLInteropHelper() +{ + unregisterTextures(); + + if (m_Funcs != nullptr) { + cuda_free_functions(&m_Funcs); + } +} + +bool CUDAGLInteropHelper::registerBoundTextures() +{ + int err; + + if (m_Funcs == nullptr) { + // Already logged in constructor + return false; } - SDL_assert(frame->format == AV_PIX_FMT_CUDA); - // Push FFmpeg's CUDA context to use for our CUDA operations - err = funcs->cuCtxPushCurrent(devCtx->cuda_ctx); + err = m_Funcs->cuCtxPushCurrent(m_Context->cuda_ctx); if (err != CUDA_SUCCESS) { SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "cuCtxPushCurrent() failed: %d", err); return false; } - // NV12 has 2 planes - for (int i = 0; i < 2; i++) { - CUgraphicsResource cudaResource; - CUarray cudaArray; + // Register each plane as a separate resource + for (int i = 0; i < NV12_PLANES; i++) { GLint tex; // Get the ID of this plane's texture @@ -99,29 +105,86 @@ bool CUDARenderer::copyCudaFrameToBoundTexture(AVFrame* frame) glGetIntegerv(GL_TEXTURE_BINDING_2D, &tex); // Register it with CUDA - err = funcs->cuGraphicsGLRegisterImage(&cudaResource, tex, GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD); + err = m_Funcs->cuGraphicsGLRegisterImage(&m_Resources[i], tex, GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD); if (err != CUDA_SUCCESS) { SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "cuGraphicsGLRegisterImage() failed: %d", err); + m_Resources[i] = 0; + unregisterTextures(); goto Exit; } + } - // Map it to allow us to use it as a copy destination - err = funcs->cuGraphicsMapResources(1, &cudaResource, devCtx->stream); - if (err != CUDA_SUCCESS) { - SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "cuGraphicsMapResources() failed: %d", err); - funcs->cuGraphicsUnregisterResource(cudaResource); - goto Exit; +Exit: + { + CUcontext dummy; + m_Funcs->cuCtxPopCurrent(&dummy); + } + return err == CUDA_SUCCESS; +} + +void CUDAGLInteropHelper::unregisterTextures() +{ + int err; + + if (m_Funcs == nullptr) { + // Already logged in constructor + return; + } + + // Push FFmpeg's CUDA context to use for our CUDA operations + err = m_Funcs->cuCtxPushCurrent(m_Context->cuda_ctx); + if (err != CUDA_SUCCESS) { + SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "cuCtxPushCurrent() failed: %d", err); + return; + } + + for (int i = 0; i < NV12_PLANES; i++) { + if (m_Resources[i] != 0) { + m_Funcs->cuGraphicsUnregisterResource(m_Resources[i]); + m_Resources[i] = 0; } + } - // Get a pointer to the mapped array - err = funcs->cuGraphicsSubResourceGetMappedArray(&cudaArray, cudaResource, 0, 0); + { + CUcontext dummy; + m_Funcs->cuCtxPopCurrent(&dummy); + } +} + +bool CUDAGLInteropHelper::copyCudaFrameToTextures(AVFrame* frame) +{ + int err; + + if (m_Funcs == nullptr) { + // Already logged in constructor + return false; + } + + // Push FFmpeg's CUDA context to use for our CUDA operations + err = m_Funcs->cuCtxPushCurrent(m_Context->cuda_ctx); + if (err != CUDA_SUCCESS) { + SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "cuCtxPushCurrent() failed: %d", err); + return false; + } + + // Map our resources + err = m_Funcs->cuGraphicsMapResources(NV12_PLANES, m_Resources, m_Context->stream); + if (err != CUDA_SUCCESS) { + SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "cuGraphicsMapResources() failed: %d", err); + goto PopCtxExit; + } + + for (int i = 0; i < NV12_PLANES; i++) { + CUarray cudaArray; + + // Get a pointer to the mapped array for this plane + err = m_Funcs->cuGraphicsSubResourceGetMappedArray(&cudaArray, m_Resources[i], 0, 0); if (err != CUDA_SUCCESS) { SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "cuGraphicsSubResourceGetMappedArray() failed: %d", err); - funcs->cuGraphicsUnmapResources(1, &cudaResource, devCtx->stream); - funcs->cuGraphicsUnregisterResource(cudaResource); - goto Exit; + goto UnmapExit; } + // Do the copy CUDA_MEMCPY2D cu2d = { .srcMemoryType = CU_MEMORYTYPE_DEVICE, .srcDevice = (CUdeviceptr)frame->data[i], @@ -132,27 +195,19 @@ bool CUDARenderer::copyCudaFrameToBoundTexture(AVFrame* frame) .WidthInBytes = (size_t)frame->width, .Height = (size_t)frame->height >> i }; - - // Do the copy - err = funcs->cuMemcpy2D(&cu2d); + err = m_Funcs->cuMemcpy2D(&cu2d); if (err != CUDA_SUCCESS) { SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "cuMemcpy2D() failed: %d", err); - funcs->cuGraphicsUnmapResources(1, &cudaResource, devCtx->stream); - funcs->cuGraphicsUnregisterResource(cudaResource); - goto Exit; + goto UnmapExit; } - - funcs->cuGraphicsUnmapResources(1, &cudaResource, devCtx->stream); - funcs->cuGraphicsUnregisterResource(cudaResource); } - ret = true; - -Exit: +UnmapExit: + m_Funcs->cuGraphicsUnmapResources(NV12_PLANES, m_Resources, m_Context->stream); +PopCtxExit: { CUcontext dummy; - funcs->cuCtxPopCurrent(&dummy); + m_Funcs->cuCtxPopCurrent(&dummy); } - return ret; + return err == CUDA_SUCCESS; } - diff --git a/app/streaming/video/ffmpeg-renderers/cuda.h b/app/streaming/video/ffmpeg-renderers/cuda.h index 7a0f5af7..086abcbf 100644 --- a/app/streaming/video/ffmpeg-renderers/cuda.h +++ b/app/streaming/video/ffmpeg-renderers/cuda.h @@ -2,6 +2,12 @@ #include "renderer.h" +#include + +extern "C" { + #include +} + class CUDARenderer : public IFFmpegRenderer { public: CUDARenderer(); @@ -12,10 +18,25 @@ public: virtual bool needsTestFrame() override; virtual bool isDirectRenderingSupported() override; - // Helper function used by SDLRenderer to read our CUDA frame - static bool copyCudaFrameToBoundTexture(AVFrame* frame); - private: AVBufferRef* m_HwContext; }; +#define NV12_PLANES 2 + +// Helper class used by SDLRenderer to read our CUDA frame +class CUDAGLInteropHelper { +public: + CUDAGLInteropHelper(AVHWDeviceContext* context); + ~CUDAGLInteropHelper(); + + bool registerBoundTextures(); + void unregisterTextures(); + + bool copyCudaFrameToTextures(AVFrame* frame); + +private: + CudaFunctions* m_Funcs; + AVCUDADeviceContext* m_Context; + CUgraphicsResource m_Resources[NV12_PLANES]; +}; diff --git a/app/streaming/video/ffmpeg-renderers/sdlvid.cpp b/app/streaming/video/ffmpeg-renderers/sdlvid.cpp index 3048d8d2..27f4f0bb 100644 --- a/app/streaming/video/ffmpeg-renderers/sdlvid.cpp +++ b/app/streaming/video/ffmpeg-renderers/sdlvid.cpp @@ -5,20 +5,26 @@ #include -#ifdef HAVE_CUDA -#include "cuda.h" -#endif - SdlRenderer::SdlRenderer() : m_Renderer(nullptr), m_Texture(nullptr), m_SwPixelFormat(AV_PIX_FMT_NONE) { SDL_zero(m_OverlayTextures); + +#ifdef HAVE_CUDA + m_CudaGLHelper = nullptr; +#endif } SdlRenderer::~SdlRenderer() { +#ifdef HAVE_CUDA + if (m_CudaGLHelper != nullptr) { + delete m_CudaGLHelper; + } +#endif + for (int i = 0; i < Overlay::OverlayMax; i++) { if (m_OverlayTextures[i] != nullptr) { SDL_DestroyTexture(m_OverlayTextures[i]); @@ -208,6 +214,7 @@ void SdlRenderer::renderFrame(AVFrame* frame) } if (frame->hw_frames_ctx != nullptr && frame->format != AV_PIX_FMT_CUDA) { +ReadbackRetry: // If we are acting as the frontend for a hardware // accelerated decoder, we'll need to read the frame // back to render it. @@ -293,13 +300,28 @@ void SdlRenderer::renderFrame(AVFrame* frame) SDL_GetError()); goto Exit; } + +#ifdef HAVE_CUDA + if (frame->format == AV_PIX_FMT_CUDA) { + SDL_assert(m_CudaGLHelper == nullptr); + m_CudaGLHelper = new CUDAGLInteropHelper(((AVHWFramesContext*)frame->hw_frames_ctx->data)->device_ctx); + + SDL_GL_BindTexture(m_Texture, nullptr, nullptr); + if (!m_CudaGLHelper->registerBoundTextures()) { + // If we can't register textures, fall back to normal read-back rendering + delete m_CudaGLHelper; + m_CudaGLHelper = nullptr; + } + SDL_GL_UnbindTexture(m_Texture); + } +#endif } if (frame->format == AV_PIX_FMT_CUDA) { #ifdef HAVE_CUDA - SDL_GL_BindTexture(m_Texture, nullptr, nullptr); - CUDARenderer::copyCudaFrameToBoundTexture(frame); - SDL_GL_UnbindTexture(m_Texture); + if (m_CudaGLHelper == nullptr || !m_CudaGLHelper->copyCudaFrameToTextures(frame)) { + goto ReadbackRetry; + } #else SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Got CUDA frame, but not built with CUDA support!"); diff --git a/app/streaming/video/ffmpeg-renderers/sdlvid.h b/app/streaming/video/ffmpeg-renderers/sdlvid.h index 819435c3..9c827ce6 100644 --- a/app/streaming/video/ffmpeg-renderers/sdlvid.h +++ b/app/streaming/video/ffmpeg-renderers/sdlvid.h @@ -2,6 +2,10 @@ #include "renderer.h" +#ifdef HAVE_CUDA +#include "cuda.h" +#endif + class SdlRenderer : public IFFmpegRenderer { public: SdlRenderer(); @@ -21,5 +25,9 @@ private: int m_SwPixelFormat; SDL_Texture* m_OverlayTextures[Overlay::OverlayMax]; SDL_Rect m_OverlayRects[Overlay::OverlayMax]; + +#ifdef HAVE_CUDA + CUDAGLInteropHelper* m_CudaGLHelper; +#endif };