moonlight-qt/app/streaming/video/ffmpeg.cpp

#include <Limelight.h>
#include "ffmpeg.h"
#include "streaming/streamutils.h"
#include "streaming/session.h"

#include <h264_stream.h>

#include "ffmpeg-renderers/sdlvid.h"

#ifdef Q_OS_WIN32
#include "ffmpeg-renderers/dxva2.h"
#include "ffmpeg-renderers/d3d11va.h"
#endif

#ifdef Q_OS_DARWIN
#include "ffmpeg-renderers/vt.h"
#endif

#ifdef HAVE_LIBVA
#include "ffmpeg-renderers/vaapi.h"
#endif

#ifdef HAVE_LIBVDPAU
#include "ffmpeg-renderers/vdpau.h"
#endif

#ifdef HAVE_MMAL
#include "ffmpeg-renderers/mmal.h"
#endif

#ifdef HAVE_DRM
#include "ffmpeg-renderers/drm.h"
#endif

#ifdef HAVE_EGL
#include "ffmpeg-renderers/eglvid.h"
#endif

#ifdef HAVE_CUDA
#include "ffmpeg-renderers/cuda.h"
#endif

// This is gross but it allows us to use sizeof()
#include "ffmpeg_videosamples.cpp"

#define MAX_SPS_EXTRA_SIZE 16

#define FAILED_DECODES_RESET_THRESHOLD 20

bool FFmpegVideoDecoder::isHardwareAccelerated()
{
    return m_HwDecodeCfg != nullptr ||
            (m_VideoDecoderCtx->codec->capabilities & AV_CODEC_CAP_HARDWARE) != 0;
}

bool FFmpegVideoDecoder::isAlwaysFullScreen()
{
    return m_FrontendRenderer->getRendererAttributes() & RENDERER_ATTRIBUTE_FULLSCREEN_ONLY;
}

bool FFmpegVideoDecoder::isHdrSupported()
{
    return m_FrontendRenderer->getRendererAttributes() & RENDERER_ATTRIBUTE_HDR_SUPPORT;
}

void FFmpegVideoDecoder::setHdrMode(bool enabled)
{
    m_FrontendRenderer->setHdrMode(enabled);
}

int FFmpegVideoDecoder::getDecoderCapabilities()
{
    int capabilities = m_BackendRenderer->getDecoderCapabilities();

    if (!isHardwareAccelerated()) {
        // Slice up to 4 times for parallel CPU decoding, once slice per core
        int slices = qMin(MAX_SLICES, SDL_GetCPUCount());
        SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION,
                    "Encoder configured for %d slices per frame",
                    slices);
        capabilities |= CAPABILITY_SLICES_PER_FRAME(slices);
    }

    // We use our own decoder thread with the "pull" model
    capabilities |= CAPABILITY_PULL_RENDERER;

    return capabilities;
}

int FFmpegVideoDecoder::getDecoderColorspace()
{
    return m_FrontendRenderer->getDecoderColorspace();
}

int FFmpegVideoDecoder::getDecoderColorRange()
{
    return m_FrontendRenderer->getDecoderColorRange();
}

QSize FFmpegVideoDecoder::getDecoderMaxResolution()
{
    if (m_BackendRenderer->getRendererAttributes() & RENDERER_ATTRIBUTE_1080P_MAX) {
        return QSize(1920, 1080);
    }
    else {
        // No known maximum
        return QSize(0, 0);
    }
}

enum AVPixelFormat FFmpegVideoDecoder::ffGetFormat(AVCodecContext* context,
                                                   const enum AVPixelFormat* pixFmts)
{
    FFmpegVideoDecoder* decoder = (FFmpegVideoDecoder*)context->opaque;
    const enum AVPixelFormat *p;

    for (p = pixFmts; *p != -1; p++) {
        // Only match our hardware decoding codec or preferred SW pixel
        // format (if not using hardware decoding). It's crucial
        // to override the default get_format() which will try
        // to gracefully fall back to software decode and break us.
        if (*p == (decoder->m_HwDecodeCfg ? decoder->m_HwDecodeCfg->pix_fmt : context->pix_fmt) &&
                decoder->m_BackendRenderer->prepareDecoderContextInGetFormat(context, *p)) {
            return *p;
        }
    }

    // Failed to match the preferred pixel formats. Try non-preferred options for non-hwaccel decoders.
    if (decoder->m_HwDecodeCfg == nullptr) {
        for (p = pixFmts; *p != -1; p++) {
            if (decoder->m_FrontendRenderer->isPixelFormatSupported(decoder->m_VideoFormat, *p) &&
                    decoder->m_BackendRenderer->prepareDecoderContextInGetFormat(context, *p)) {
                return *p;
            }
        }
    }

    return AV_PIX_FMT_NONE;
}

FFmpegVideoDecoder::FFmpegVideoDecoder(bool testOnly)
    : m_Pkt(av_packet_alloc()),
      m_VideoDecoderCtx(nullptr),
      m_DecodeBuffer(1024 * 1024, 0),
      m_HwDecodeCfg(nullptr),
      m_BackendRenderer(nullptr),
      m_FrontendRenderer(nullptr),
      m_ConsecutiveFailedDecodes(0),
      m_Pacer(nullptr),
      m_FramesIn(0),
      m_FramesOut(0),
      m_LastFrameNumber(0),
      m_StreamFps(0),
      m_VideoFormat(0),
      m_NeedsSpsFixup(false),
      m_TestOnly(testOnly),
      m_DecoderThread(nullptr)
{
    SDL_zero(m_ActiveWndVideoStats);
    SDL_zero(m_LastWndVideoStats);
    SDL_zero(m_GlobalVideoStats);

    SDL_AtomicSet(&m_DecoderThreadShouldQuit, 0);

    // Use linear filtering when renderer scaling is required
    SDL_SetHint(SDL_HINT_RENDER_SCALE_QUALITY, "1");
}

FFmpegVideoDecoder::~FFmpegVideoDecoder()
{
    reset();

    // Set log level back to default.
    // NB: We don't do this in reset() because we want
    // to preserve the log level across reset() during
    // test initialization.
    av_log_set_level(AV_LOG_INFO);

    av_packet_free(&m_Pkt);
}

IFFmpegRenderer* FFmpegVideoDecoder::getBackendRenderer()
{
    return m_BackendRenderer;
}

void FFmpegVideoDecoder::reset()
{
    // Terminate the decoder thread before doing anything else.
    // It might be touching things we're about to free.
    if (m_DecoderThread != nullptr) {
        SDL_AtomicSet(&m_DecoderThreadShouldQuit, 1);
        LiWakeWaitForVideoFrame();
        SDL_WaitThread(m_DecoderThread, NULL);
        SDL_AtomicSet(&m_DecoderThreadShouldQuit, 0);
        m_DecoderThread = nullptr;
    }

    m_FramesIn = m_FramesOut = 0;
    m_FrameInfoQueue.clear();

    delete m_Pacer;
    m_Pacer = nullptr;

    // This must be called after deleting Pacer because it
    // may be holding AVFrames to free in its destructor.
    // However, it must be called before deleting the IFFmpegRenderer
    // since the codec context may be referencing objects that we
    // need to delete in the renderer destructor.
    avcodec_free_context(&m_VideoDecoderCtx);

    if (!m_TestOnly) {
        Session::get()->getOverlayManager().setOverlayRenderer(nullptr);
    }

    // If we have a separate frontend renderer, free that first
    if (m_FrontendRenderer != m_BackendRenderer) {
        delete m_FrontendRenderer;
    }

    delete m_BackendRenderer;

    m_FrontendRenderer = m_BackendRenderer = nullptr;

    if (!m_TestOnly) {
        logVideoStats(m_GlobalVideoStats, "Global video stats");
    }
    else {
        // Test-only decoders can't have any frames submitted
        SDL_assert(m_GlobalVideoStats.totalFrames == 0);
    }
}

bool FFmpegVideoDecoder::createFrontendRenderer(PDECODER_PARAMETERS params, bool useAlternateFrontend)
{
    if (useAlternateFrontend) {
#ifdef HAVE_DRM
        // If we're trying to stream HDR, we need to use the DRM renderer in direct
        // rendering mode so it can set the HDR metadata on the display. EGL does
        // not currently support this (and even if it did, Mesa and Wayland don't
        // currently have protocols to actually get that metadata to the display).
        if ((params->videoFormat & VIDEO_FORMAT_MASK_10BIT) && m_BackendRenderer->canExportDrmPrime()) {
            m_FrontendRenderer = new DrmRenderer(m_BackendRenderer);
            if (m_FrontendRenderer->initialize(params)) {
                return true;
            }
            delete m_FrontendRenderer;
            m_FrontendRenderer = nullptr;
        }
#endif

#ifdef HAVE_EGL
        if (m_BackendRenderer->canExportEGL()) {
            m_FrontendRenderer = new EGLRenderer(m_BackendRenderer);
            if (m_FrontendRenderer->initialize(params)) {
                return true;
            }
            delete m_FrontendRenderer;
            m_FrontendRenderer = nullptr;
        }
#endif
        // If we made it here, we failed to create the EGLRenderer
        return false;
    }

    if (m_BackendRenderer->isDirectRenderingSupported()) {
        // The backend renderer can render to the display
        m_FrontendRenderer = m_BackendRenderer;
    }
    else {
        // The backend renderer cannot directly render to the display, so
        // we will create an SDL renderer to draw the frames.
        m_FrontendRenderer = new SdlRenderer();
        if (!m_FrontendRenderer->initialize(params)) {
            return false;
        }
    }

    return true;
}

bool FFmpegVideoDecoder::completeInitialization(const AVCodec* decoder, PDECODER_PARAMETERS params, bool testFrame, bool useAlternateFrontend)
{
    // In test-only mode, we should only see test frames
    SDL_assert(!m_TestOnly || testFrame);

    // Create the frontend renderer based on the capabilities of the backend renderer
    if (!createFrontendRenderer(params, useAlternateFrontend)) {
        return false;
    }

    m_StreamFps = params->frameRate;
    m_VideoFormat = params->videoFormat;

    // Don't bother initializing Pacer if we're not actually going to render
    if (!testFrame) {
        m_Pacer = new Pacer(m_FrontendRenderer, &m_ActiveWndVideoStats);
        if (!m_Pacer->initialize(params->window, params->frameRate,
                                 params->enableFramePacing || (params->enableVsync && (m_FrontendRenderer->getRendererAttributes() & RENDERER_ATTRIBUTE_FORCE_PACING)))) {
            return false;
        }
    }

    m_VideoDecoderCtx = avcodec_alloc_context3(decoder);
    if (!m_VideoDecoderCtx) {
        SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
                     "Unable to allocate video decoder context");
        return false;
    }

    // Always request low delay decoding
    m_VideoDecoderCtx->flags |= AV_CODEC_FLAG_LOW_DELAY;

    // Allow display of corrupt frames and frames missing references
    m_VideoDecoderCtx->flags |= AV_CODEC_FLAG_OUTPUT_CORRUPT;
    m_VideoDecoderCtx->flags2 |= AV_CODEC_FLAG2_SHOW_ALL;

    // Report decoding errors to allow us to request a key frame
    //
    // With HEVC streams, FFmpeg can drop a frame (hwaccel->start_frame() fails)
    // without telling us. Since we have an infinite GOP length, this causes artifacts
    // on screen that persist for a long time. It's easy to cause this condition
    // by using NVDEC and delaying 100 ms randomly in the render path so the decoder
    // runs out of output buffers.
    m_VideoDecoderCtx->err_recognition = AV_EF_EXPLODE;

    // Enable slice multi-threading for software decoding
    if (!isHardwareAccelerated()) {
        m_VideoDecoderCtx->thread_type = FF_THREAD_SLICE;
        m_VideoDecoderCtx->thread_count = qMin(MAX_SLICES, SDL_GetCPUCount());
    }
    else {
        // No threading for HW decode
        m_VideoDecoderCtx->thread_count = 1;
    }

    // Setup decoding parameters
    m_VideoDecoderCtx->width = params->width;
    m_VideoDecoderCtx->height = params->height;
    m_VideoDecoderCtx->pix_fmt = m_FrontendRenderer->getPreferredPixelFormat(params->videoFormat);
    m_VideoDecoderCtx->get_format = ffGetFormat;

    AVDictionary* options = nullptr;

    // Allow the backend renderer to attach data to this decoder
    if (!m_BackendRenderer->prepareDecoderContext(m_VideoDecoderCtx, &options)) {
        return false;
    }

    // Nobody must override our ffGetFormat
    SDL_assert(m_VideoDecoderCtx->get_format == ffGetFormat);

    // Stash a pointer to this object in the context
    SDL_assert(m_VideoDecoderCtx->opaque == nullptr);
    m_VideoDecoderCtx->opaque = this;

    int err = avcodec_open2(m_VideoDecoderCtx, decoder, &options);
    av_dict_free(&options);
    if (err < 0) {
        SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
                     "Unable to open decoder for format: %x",
                     params->videoFormat);
        return false;
    }

    // FFMpeg doesn't completely initialize the codec until the codec
    // config data comes in. This would be too late for us to change
    // our minds on the selected video codec, so we'll do a trial run
    // now to see if things will actually work when the video stream
    // comes in.
    if (testFrame) {
        switch (params->videoFormat) {
        case VIDEO_FORMAT_H264:
            m_Pkt->data = (uint8_t*)k_H264TestFrame;
            m_Pkt->size = sizeof(k_H264TestFrame);
            break;
        case VIDEO_FORMAT_H265:
            m_Pkt->data = (uint8_t*)k_HEVCMainTestFrame;
            m_Pkt->size = sizeof(k_HEVCMainTestFrame);
            break;
        case VIDEO_FORMAT_H265_MAIN10:
            m_Pkt->data = (uint8_t*)k_HEVCMain10TestFrame;
            m_Pkt->size = sizeof(k_HEVCMain10TestFrame);
            break;
        default:
            SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
                         "No test frame for format: %x",
                         params->videoFormat);
            return false;
        }

        AVFrame* frame = av_frame_alloc();
        if (!frame) {
            SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
                         "Failed to allocate frame");
            return false;
        }

        // Some decoders won't output on the first frame, so we'll submit
        // a few test frames if we get an EAGAIN error.
        for (int retries = 0; retries < 5; retries++) {
            // Most FFmpeg decoders process input using a "push" model.
            // We'll see those fail here if the format is not supported.
            err = avcodec_send_packet(m_VideoDecoderCtx, m_Pkt);
            if (err < 0) {
                av_frame_free(&frame);
                char errorstring[512];
                av_strerror(err, errorstring, sizeof(errorstring));
                SDL_LogWarn(SDL_LOG_CATEGORY_APPLICATION,
                            "Test decode failed (avcodec_send_packet): %s", errorstring);
                return false;
            }

            // A few FFmpeg decoders (h264_mmal) process here using a "pull" model.
            // Those decoders will fail here if the format is not supported.
            err = avcodec_receive_frame(m_VideoDecoderCtx, frame);
            if (err == AVERROR(EAGAIN)) {
                // Wait a little while to let the hardware work
                SDL_Delay(100);
            }
            else {
                // Done!
                break;
            }
        }

        if (err < 0) {
            char errorstring[512];
            av_strerror(err, errorstring, sizeof(errorstring));
            SDL_LogWarn(SDL_LOG_CATEGORY_APPLICATION,
                        "Test decode failed (avcodec_receive_frame): %s", errorstring);
            av_frame_free(&frame);
            return false;
        }

        // Allow the renderer to do any validation it wants on this frame
        if (!m_FrontendRenderer->testRenderFrame(frame)) {
            SDL_LogWarn(SDL_LOG_CATEGORY_APPLICATION,
                        "Test decode failed (testRenderFrame)");
            av_frame_free(&frame);
            return false;
        }

        av_frame_free(&frame);
    }
    else {
        if ((params->videoFormat & VIDEO_FORMAT_MASK_H264) &&
                !(m_BackendRenderer->getDecoderCapabilities() & CAPABILITY_REFERENCE_FRAME_INVALIDATION_AVC)) {
            SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION,
                        "Using H.264 SPS fixup");
            m_NeedsSpsFixup = true;
        }
        else {
            m_NeedsSpsFixup = false;
        }

        // Tell overlay manager to use this frontend renderer
        Session::get()->getOverlayManager().setOverlayRenderer(m_FrontendRenderer);

        // Only create the decoder thread when instantiating the decoder for real. It will use APIs from
        // moonlight-common-c that can only be legally called with an established connection.
        m_DecoderThread = SDL_CreateThread(FFmpegVideoDecoder::decoderThreadProcThunk, "FFDecoder", (void*)this);
        if (m_DecoderThread == nullptr) {
            SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
                         "Failed to create decoder thread: %s", SDL_GetError());
            return false;
        }
    }

    return true;
}

void FFmpegVideoDecoder::addVideoStats(VIDEO_STATS& src, VIDEO_STATS& dst)
{
    dst.receivedFrames += src.receivedFrames;
    dst.decodedFrames += src.decodedFrames;
    dst.renderedFrames += src.renderedFrames;
    dst.totalFrames += src.totalFrames;
    dst.networkDroppedFrames += src.networkDroppedFrames;
    dst.pacerDroppedFrames += src.pacerDroppedFrames;
    dst.totalReassemblyTime += src.totalReassemblyTime;
    dst.totalDecodeTime += src.totalDecodeTime;
    dst.totalPacerTime += src.totalPacerTime;
    dst.totalRenderTime += src.totalRenderTime;

    if (!LiGetEstimatedRttInfo(&dst.lastRtt, &dst.lastRttVariance)) {
        dst.lastRtt = 0;
        dst.lastRttVariance = 0;
    }
    else {
        // Our logic to determine if RTT is valid depends on us never
        // getting an RTT of 0. ENet currently ensures RTTs are >= 1.
        SDL_assert(dst.lastRtt > 0);
    }

    Uint32 now = SDL_GetTicks();

    // Initialize the measurement start point if this is the first video stat window
    if (!dst.measurementStartTimestamp) {
        dst.measurementStartTimestamp = src.measurementStartTimestamp;
    }

    // The following code assumes the global measure was already started first
    SDL_assert(dst.measurementStartTimestamp <= src.measurementStartTimestamp);

    dst.totalFps = (float)dst.totalFrames / ((float)(now - dst.measurementStartTimestamp) / 1000);
    dst.receivedFps = (float)dst.receivedFrames / ((float)(now - dst.measurementStartTimestamp) / 1000);
    dst.decodedFps = (float)dst.decodedFrames / ((float)(now - dst.measurementStartTimestamp) / 1000);
    dst.renderedFps = (float)dst.renderedFrames / ((float)(now - dst.measurementStartTimestamp) / 1000);
}

void FFmpegVideoDecoder::stringifyVideoStats(VIDEO_STATS& stats, char* output)
{
    int offset = 0;
    const char* codecString;

    // Start with an empty string
    output[offset] = 0;

    switch (m_VideoFormat)
    {
    case VIDEO_FORMAT_H264:
        codecString = "H.264";
        break;

    case VIDEO_FORMAT_H265:
        codecString = "HEVC";
        break;

    case VIDEO_FORMAT_H265_MAIN10:
        if (LiGetCurrentHostDisplayHdrMode()) {
            codecString = "HEVC Main 10 HDR";
        }
        else {
            codecString = "HEVC Main 10 SDR";
        }
        break;

    default:
        SDL_assert(false);
        codecString = "UNKNOWN";
        break;
    }

    if (stats.receivedFps > 0) {
        if (m_VideoDecoderCtx != nullptr) {
            offset += sprintf(&output[offset],
                              "Video stream: %dx%d %.2f FPS (Codec: %s)\n",
                              m_VideoDecoderCtx->width,
                              m_VideoDecoderCtx->height,
                              stats.totalFps,
                              codecString);
        }

        offset += sprintf(&output[offset],
                          "Incoming frame rate from network: %.2f FPS\n"
                          "Decoding frame rate: %.2f FPS\n"
                          "Rendering frame rate: %.2f FPS\n",
                          stats.receivedFps,
                          stats.decodedFps,
                          stats.renderedFps);
    }

    if (stats.renderedFrames != 0) {
        char rttString[32];

        if (stats.lastRtt != 0) {
            sprintf(rttString, "%u ms (variance: %u ms)", stats.lastRtt, stats.lastRttVariance);
        }
        else {
            sprintf(rttString, "N/A");
        }

        offset += sprintf(&output[offset],
                          "Frames dropped by your network connection: %.2f%%\n"
                          "Frames dropped due to network jitter: %.2f%%\n"
                          "Average network latency: %s\n"
                          "Average decoding time: %.2f ms\n"
                          "Average frame queue delay: %.2f ms\n"
                          "Average rendering time (including monitor V-sync latency): %.2f ms\n",
                          (float)stats.networkDroppedFrames / stats.totalFrames * 100,
                          (float)stats.pacerDroppedFrames / stats.decodedFrames * 100,
                          rttString,
                          (float)stats.totalDecodeTime / stats.decodedFrames,
                          (float)stats.totalPacerTime / stats.renderedFrames,
                          (float)stats.totalRenderTime / stats.renderedFrames);
    }
}

void FFmpegVideoDecoder::logVideoStats(VIDEO_STATS& stats, const char* title)
{
    if (stats.renderedFps > 0 || stats.renderedFrames != 0) {
        char videoStatsStr[512];
        stringifyVideoStats(stats, videoStatsStr);

        SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION,
                    "%s", title);
        SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION,
                    "----------------------------------------------------------\n%s",
                    videoStatsStr);
    }
}

IFFmpegRenderer* FFmpegVideoDecoder::createHwAccelRenderer(const AVCodecHWConfig* hwDecodeCfg, int pass)
{
    if (!(hwDecodeCfg->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX)) {
        return nullptr;
    }

    // First pass using our top-tier hwaccel implementations
    if (pass == 0) {
        switch (hwDecodeCfg->device_type) {
#ifdef Q_OS_WIN32
        // DXVA2 appears in the hwaccel list before D3D11VA, so we will prefer it.
        //
        // There is logic in DXVA2 that may elect to fail on the first selection pass
        // to allow D3D11VA to be used in cases where it is known to be better.
        case AV_HWDEVICE_TYPE_DXVA2:
            return new DXVA2Renderer(pass);
        case AV_HWDEVICE_TYPE_D3D11VA:
            return new D3D11VARenderer(pass);
#endif
#ifdef Q_OS_DARWIN
        case AV_HWDEVICE_TYPE_VIDEOTOOLBOX:
            return VTRendererFactory::createRenderer();
#endif
#ifdef HAVE_LIBVA
        case AV_HWDEVICE_TYPE_VAAPI:
            return new VAAPIRenderer();
#endif
#ifdef HAVE_LIBVDPAU
        case AV_HWDEVICE_TYPE_VDPAU:
            return new VDPAURenderer();
#endif
#ifdef HAVE_DRM
        case AV_HWDEVICE_TYPE_DRM:
            return new DrmRenderer();
#endif
        default:
            return nullptr;
        }
    }
    // Second pass for our second-tier hwaccel implementations
    else if (pass == 1) {
        switch (hwDecodeCfg->device_type) {
#ifdef HAVE_CUDA
        case AV_HWDEVICE_TYPE_CUDA:
            // CUDA should only be used to cover the NVIDIA+Wayland case
            return new CUDARenderer();
#endif
#ifdef Q_OS_WIN32
        // This gives DXVA2 and D3D11VA another shot at handling cases where they
        // chose to purposefully fail in the first selection pass to allow a more
        // optimal decoder to be tried.
        case AV_HWDEVICE_TYPE_DXVA2:
            return new DXVA2Renderer(pass);
        case AV_HWDEVICE_TYPE_D3D11VA:
            return new D3D11VARenderer(pass);
#endif
        default:
            return nullptr;
        }
    }
    else {
        SDL_assert(false);
        return nullptr;
    }
}

bool FFmpegVideoDecoder::tryInitializeRenderer(const AVCodec* decoder,
                                               PDECODER_PARAMETERS params,
                                               const AVCodecHWConfig* hwConfig,
                                               std::function<IFFmpegRenderer*()> createRendererFunc)
{
    m_HwDecodeCfg = hwConfig;

    // i == 0 - Indirect via EGL or DRM frontend with zero-copy DMA-BUF passing
    // i == 1 - Direct rendering or indirect via SDL read-back
#ifdef HAVE_EGL
    for (int i = 0; i < 2; i++) {
#else
    for (int i = 1; i < 2; i++) {
#endif
        SDL_assert(m_BackendRenderer == nullptr);
        if ((m_BackendRenderer = createRendererFunc()) != nullptr &&
                m_BackendRenderer->initialize(params) &&
                completeInitialization(decoder, params, m_TestOnly || m_BackendRenderer->needsTestFrame(), i == 0 /* EGL/DRM */)) {
            if (m_TestOnly) {
                // This decoder is only for testing capabilities, so don't bother
                // creating a usable renderer
                return true;
            }

            if (m_BackendRenderer->needsTestFrame()) {
                // The test worked, so now let's initialize it for real
                reset();
                if ((m_BackendRenderer = createRendererFunc()) != nullptr &&
                        m_BackendRenderer->initialize(params) &&
                        completeInitialization(decoder, params, false, i == 0 /* EGL/DRM */)) {
                    return true;
                }
                else {
                    SDL_LogCritical(SDL_LOG_CATEGORY_APPLICATION,
                                    "Decoder failed to initialize after successful test");
                    reset();
                }
            }
            else {
                // No test required. Good to go now.
                return true;
            }
        }
        else {
            // Failed to initialize, so keep looking
            reset();
        }
    }

    // reset() must be called before we reach this point!
    SDL_assert(m_BackendRenderer == nullptr);
    return false;
}

#define TRY_PREFERRED_PIXEL_FORMAT(RENDERER_TYPE) \
    { \
        RENDERER_TYPE renderer; \
        if (renderer.getPreferredPixelFormat(params->videoFormat) == decoder->pix_fmts[i]) { \
            if (tryInitializeRenderer(decoder, params, nullptr, \
                                      []() -> IFFmpegRenderer* { return new RENDERER_TYPE(); })) { \
                SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION, \
                            "Chose " #RENDERER_TYPE " for codec %s due to preferred pixel format: 0x%x", \
                            decoder->name, decoder->pix_fmts[i]); \
                return true; \
            } \
        } \
    }

#define TRY_SUPPORTED_NON_PREFERRED_PIXEL_FORMAT(RENDERER_TYPE) \
    { \
        RENDERER_TYPE renderer; \
        if (decoder->pix_fmts[i] != renderer.getPreferredPixelFormat(params->videoFormat) && \
            renderer.isPixelFormatSupported(params->videoFormat, decoder->pix_fmts[i])) { \
            if (tryInitializeRenderer(decoder, params, nullptr, \
                                      []() -> IFFmpegRenderer* { return new RENDERER_TYPE(); })) { \
                SDL_LogInfo(SDL_LOG_CATEGORY_APPLICATION, \
                            "Chose " #RENDERER_TYPE " for codec %s due to compatible pixel format: 0x%x", \
                            decoder->name, decoder->pix_fmts[i]); \
                return true; \
            } \
        } \
    }

bool FFmpegVideoDecoder::tryInitializeRendererForDecoderByName(const char *decoderName,
                                                               PDECODER_PARAMETERS params)
{
    const AVCodec* decoder = avcodec_find_decoder_by_name(decoderName);
    if (decoder == nullptr) {
        return false;
    }

    // This might be a hwaccel decoder, so try any hw configs first
    for (int i = 0;; i++) {
        const AVCodecHWConfig *config = avcodec_get_hw_config(decoder, i);
        if (!config) {
            // No remaing hwaccel options
            break;
        }

        // Initialize the hardware codec and submit a test frame if the renderer needs it
        if (tryInitializeRenderer(decoder, params, config,
                                  [config]() -> IFFmpegRenderer* { return createHwAccelRenderer(config, 0); })) {
            return true;
        }
    }

    if (decoder->pix_fmts == NULL) {
        // Supported output pixel formats are unknown. We'll just try SDL and hope it can cope.
        return tryInitializeRenderer(decoder, params, nullptr,
                                     []() -> IFFmpegRenderer* { return new SdlRenderer(); });
    }

#ifdef HAVE_MMAL
    // HACK: Avoid using YUV420P on h264_mmal. It can cause a deadlock inside the MMAL libraries.
    // Even if it didn't completely deadlock us, the performance would likely be atrocious.
    if (strcmp(decoderName, "h264_mmal") == 0) {
        for (int i = 0; decoder->pix_fmts[i] != AV_PIX_FMT_NONE; i++) {
            TRY_PREFERRED_PIXEL_FORMAT(MmalRenderer);
        }

        for (int i = 0; decoder->pix_fmts[i] != AV_PIX_FMT_NONE; i++) {
            TRY_SUPPORTED_NON_PREFERRED_PIXEL_FORMAT(MmalRenderer);
        }

        // Give up if we can't use MmalRenderer for h264_mmal
        return false;
    }
#endif

    // Check if any of our decoders prefer any of the pixel formats first
    for (int i = 0; decoder->pix_fmts[i] != AV_PIX_FMT_NONE; i++) {
#ifdef HAVE_DRM
        TRY_PREFERRED_PIXEL_FORMAT(DrmRenderer);
#endif
        TRY_PREFERRED_PIXEL_FORMAT(SdlRenderer);
    }

    // Nothing prefers any of them. Let's see if anyone will tolerate one.
    for (int i = 0; decoder->pix_fmts[i] != AV_PIX_FMT_NONE; i++) {
#ifdef HAVE_DRM
        TRY_SUPPORTED_NON_PREFERRED_PIXEL_FORMAT(DrmRenderer);
#endif
        TRY_SUPPORTED_NON_PREFERRED_PIXEL_FORMAT(SdlRenderer);
    }

    // If we made it here, we couldn't find anything
    return false;
}

bool FFmpegVideoDecoder::initialize(PDECODER_PARAMETERS params)
{
    // Increase log level until the first frame is decoded
    av_log_set_level(AV_LOG_DEBUG);

    // First try decoders that the user has manually specified via environment variables.
    // These must output surfaces in one of the formats that one of our renderers supports,
    // which is currently:
    // - AV_PIX_FMT_DRM_PRIME
    // - AV_PIX_FMT_MMAL
    // - AV_PIX_FMT_YUV420P
    // - AV_PIX_FMT_YUVJ420P
    // - AV_PIX_FMT_NV12
    // - AV_PIX_FMT_NV21
    {
        QString h264DecoderHint = qgetenv("H264_DECODER_HINT");
        if (!h264DecoderHint.isEmpty() && (params->videoFormat & VIDEO_FORMAT_MASK_H264)) {
            QByteArray decoderString = h264DecoderHint.toLocal8Bit();
            if (tryInitializeRendererForDecoderByName(decoderString.constData(), params)) {
                SDL_LogWarn(SDL_LOG_CATEGORY_APPLICATION,
                            "Using custom H.264 decoder (H264_DECODER_HINT): %s",
                            decoderString.constData());
                return true;
            }
            else {
                SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
                             "Custom H.264 decoder (H264_DECODER_HINT) failed to load: %s",
                             decoderString.constData());
            }
        }
    }
    {
        QString hevcDecoderHint = qgetenv("HEVC_DECODER_HINT");
        if (!hevcDecoderHint.isEmpty() && (params->videoFormat & VIDEO_FORMAT_MASK_H265)) {
            QByteArray decoderString = hevcDecoderHint.toLocal8Bit();
            if (tryInitializeRendererForDecoderByName(decoderString.constData(), params)) {
                SDL_LogWarn(SDL_LOG_CATEGORY_APPLICATION,
                            "Using custom HEVC decoder (HEVC_DECODER_HINT): %s",
                            decoderString.constData());
                return true;
            }
            else {
                SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
                             "Custom HEVC decoder (HEVC_DECODER_HINT) failed to load: %s",
                             decoderString.constData());
            }
        }
    }

    const AVCodec* decoder;

    if (params->videoFormat & VIDEO_FORMAT_MASK_H264) {
        decoder = avcodec_find_decoder(AV_CODEC_ID_H264);
    }
    else if (params->videoFormat & VIDEO_FORMAT_MASK_H265) {
        decoder = avcodec_find_decoder(AV_CODEC_ID_HEVC);
    }
    else {
        Q_ASSERT(false);
        decoder = nullptr;
    }

    if (!decoder) {
        SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
                     "Unable to find decoder for format: %x",
                     params->videoFormat);
        return false;
    }

    // Look for a hardware decoder first unless software-only
    if (params->vds != StreamingPreferences::VDS_FORCE_SOFTWARE) {
        // Look for the first matching hwaccel hardware decoder (pass 0)
        for (int i = 0;; i++) {
            const AVCodecHWConfig *config = avcodec_get_hw_config(decoder, i);
            if (!config) {
                // No remaing hwaccel options
                break;
            }

            // Initialize the hardware codec and submit a test frame if the renderer needs it
            if (tryInitializeRenderer(decoder, params, config,
                                      [config]() -> IFFmpegRenderer* { return createHwAccelRenderer(config, 0); })) {
                return true;
            }
        }

        // Continue with special non-hwaccel hardware decoders
        if (params->videoFormat & VIDEO_FORMAT_MASK_H264) {
            QList<const char *> knownAvcCodecs = {
#ifdef HAVE_MMAL
                "h264_mmal",
#endif
                "h264_rkmpp",
                "h264_nvv4l2",
                "h264_nvmpi",
#ifndef HAVE_MMAL
                // Only enable V4L2M2M by default on non-MMAL (RPi) builds. The performance
                // of the V4L2M2M wrapper around MMAL is not enough for 1080p 60 FPS, so we
                // would rather show the missing hardware acceleration warning when the user
                // is in Full KMS mode rather than try to use a poorly performing hwaccel.
                // See discussion on https://github.com/jc-kynesim/rpi-ffmpeg/pull/25
                "h264_v4l2m2m",
#endif
            };
            for (const char* codec : knownAvcCodecs) {
                if (tryInitializeRendererForDecoderByName(codec, params)) {
                    return true;
                }
            }
        }
        else {
            QList<const char *> knownHevcCodecs = { "hevc_rkmpp", "hevc_nvv4l2", "hevc_nvmpi", "hevc_v4l2m2m" };
            for (const char* codec : knownHevcCodecs) {
                if (tryInitializeRendererForDecoderByName(codec, params)) {
                    return true;
                }
            }
        }

        // Look for the first matching hwaccel hardware decoder (pass 1)
        // This picks up "second-tier" hwaccels like CUDA.
        for (int i = 0;; i++) {
            const AVCodecHWConfig *config = avcodec_get_hw_config(decoder, i);
            if (!config) {
                // No remaing hwaccel options
                break;
            }

            // Initialize the hardware codec and submit a test frame if the renderer needs it
            if (tryInitializeRenderer(decoder, params, config,
                                      [config]() -> IFFmpegRenderer* { return createHwAccelRenderer(config, 1); })) {
                return true;
            }
        }
    }

    // Fallback to software if no matching hardware decoder was found
    // and if software fallback is allowed
    if (params->vds != StreamingPreferences::VDS_FORCE_HARDWARE) {
        if (tryInitializeRenderer(decoder, params, nullptr,
                                  []() -> IFFmpegRenderer* { return new SdlRenderer(); })) {
            return true;
        }
    }

    // No decoder worked
    return false;
}

void FFmpegVideoDecoder::writeBuffer(PLENTRY entry, int& offset)
{
    if (m_NeedsSpsFixup && entry->bufferType == BUFFER_TYPE_SPS) {
        h264_stream_t* stream = h264_new();
        int nalStart, nalEnd;

        // Read the old NALU
        find_nal_unit((uint8_t*)entry->data, entry->length, &nalStart, &nalEnd);
        read_nal_unit(stream,
                      (unsigned char *)&entry->data[nalStart],
                      nalEnd - nalStart);

        SDL_assert(nalStart == 3 || nalStart == 4); // 3 or 4 byte Annex B start sequence
        SDL_assert(nalEnd == entry->length);

        // Fixup the SPS to what OS X needs to use hardware acceleration
        stream->sps->num_ref_frames = 1;
        stream->sps->vui.max_dec_frame_buffering = 1;

        int initialOffset = offset;

        // Copy the modified NALU data. This clobbers byte 0 and starts NALU data at byte 1.
        // Since it prepended one extra byte, subtract one from the returned length.
        offset += write_nal_unit(stream, (uint8_t*)&m_DecodeBuffer.data()[initialOffset + nalStart - 1],
                                 MAX_SPS_EXTRA_SIZE + entry->length - nalStart) - 1;

        // Copy the NALU prefix over from the original SPS
        memcpy(&m_DecodeBuffer.data()[initialOffset], entry->data, nalStart);
        offset += nalStart;

        h264_free(stream);
    }
    else {
        // Write the buffer as-is
        memcpy(&m_DecodeBuffer.data()[offset],
               entry->data,
               entry->length);
        offset += entry->length;
    }
}

int FFmpegVideoDecoder::decoderThreadProcThunk(void *context)
{
    ((FFmpegVideoDecoder*)context)->decoderThreadProc();
    return 0;
}

void FFmpegVideoDecoder::decoderThreadProc()
{
    while (!SDL_AtomicGet(&m_DecoderThreadShouldQuit)) {
        if (m_FramesIn == m_FramesOut) {
            VIDEO_FRAME_HANDLE handle;
            PDECODE_UNIT du;

            // Waiting for input. All output frames have been received.
            // Block until we receive a new frame from the host.
            if (!LiWaitForNextVideoFrame(&handle, &du)) {
                // This might be a signal from the main thread to exit
                continue;
            }

            LiCompleteVideoFrame(handle, submitDecodeUnit(du));
        }

        if (m_FramesIn != m_FramesOut) {
            SDL_assert(m_FramesIn > m_FramesOut);

            // We have output frames to receive. Let's poll until we get one,
            // and submit new input data if/when we get it.
            AVFrame* frame = av_frame_alloc();
            if (!frame) {
                // Failed to allocate a frame but we did submit,
                // so we can return DR_OK
                SDL_LogWarn(SDL_LOG_CATEGORY_APPLICATION,
                            "Failed to allocate frame");
                continue;
            }

            int err;
            do {
                err = avcodec_receive_frame(m_VideoDecoderCtx, frame);
                if (err == 0) {
                    SDL_assert(m_FrameInfoQueue.size() == m_FramesIn - m_FramesOut);
                    m_FramesOut++;

                    // Reset failed decodes count if we reached this far
                    m_ConsecutiveFailedDecodes = 0;

                    // Restore default log level after a successful decode
                    av_log_set_level(AV_LOG_INFO);

                    // Capture a frame timestamp to measuring pacing delay
                    frame->pkt_dts = SDL_GetTicks();

                    if (!m_FrameInfoQueue.isEmpty()) {
                        // Data buffers in the DU are not valid here!
                        DECODE_UNIT du = m_FrameInfoQueue.dequeue();

                        // Count time in avcodec_send_packet() and avcodec_receive_frame()
                        // as time spent decoding. Also count time spent in the decode unit
                        // queue because that's directly caused by decoder latency.
                        m_ActiveWndVideoStats.totalDecodeTime += LiGetMillis() - du.enqueueTimeMs;

                        // Store the presentation time
                        frame->pts = du.presentationTimeMs;
                    }

                    m_ActiveWndVideoStats.decodedFrames++;

                    // Queue the frame for rendering (or render now if pacer is disabled)
                    m_Pacer->submitFrame(frame);
                }
                else if (err == AVERROR(EAGAIN)) {
                    VIDEO_FRAME_HANDLE handle;
                    PDECODE_UNIT du;

                    // No output data, so let's try to submit more input data,
                    // while we're waiting for this to frame to come back.
                    if (LiPollNextVideoFrame(&handle, &du)) {
                        // FIXME: Handle EAGAIN on avcodec_send_packet() properly?
                        LiCompleteVideoFrame(handle, submitDecodeUnit(du));
                    }
                    else {
                        // No output data or input data. Let's wait a little bit.
                        SDL_Delay(2);
                    }
                }
                else {
                    char errorstring[512];

                    // FIXME: Should we pop an entry off m_FrameInfoQueue here?

                    av_strerror(err, errorstring, sizeof(errorstring));
                    SDL_LogWarn(SDL_LOG_CATEGORY_APPLICATION,
                                "avcodec_receive_frame() failed: %s (frame %d)",
                                errorstring,
                                !m_FrameInfoQueue.isEmpty() ? m_FrameInfoQueue.head().frameNumber : -1);

                    if (++m_ConsecutiveFailedDecodes == FAILED_DECODES_RESET_THRESHOLD) {
                        SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
                                     "Resetting decoder due to consistent failure");

                        SDL_Event event;
                        event.type = SDL_RENDER_DEVICE_RESET;
                        SDL_PushEvent(&event);

                        // Don't consume any additional data
                        SDL_AtomicSet(&m_DecoderThreadShouldQuit, 1);
                    }

                    // Just in case the error resulted in the loss of the frame,
                    // request an IDR frame to reset our decoder state.
                    LiRequestIdrFrame();
                }
            } while (err == AVERROR(EAGAIN) && !SDL_AtomicGet(&m_DecoderThreadShouldQuit));

            if (err != 0) {
                // Free the frame if we failed to submit it
                av_frame_free(&frame);
            }
        }
    }
}

int FFmpegVideoDecoder::submitDecodeUnit(PDECODE_UNIT du)
{
    PLENTRY entry = du->bufferList;
    int err;

    SDL_assert(!m_TestOnly);

    // If this is the first frame, reject anything that's not an IDR frame
    if (m_FramesIn == 0 && du->frameType != FRAME_TYPE_IDR) {
        return DR_NEED_IDR;
    }

    if (!m_LastFrameNumber) {
        m_ActiveWndVideoStats.measurementStartTimestamp = SDL_GetTicks();
        m_LastFrameNumber = du->frameNumber;
    }
    else {
        // Any frame number greater than m_LastFrameNumber + 1 represents a dropped frame
        m_ActiveWndVideoStats.networkDroppedFrames += du->frameNumber - (m_LastFrameNumber + 1);
        m_ActiveWndVideoStats.totalFrames += du->frameNumber - (m_LastFrameNumber + 1);
        m_LastFrameNumber = du->frameNumber;
    }

    // Flip stats windows roughly every second
    if (SDL_TICKS_PASSED(SDL_GetTicks(), m_ActiveWndVideoStats.measurementStartTimestamp + 1000)) {
        // Update overlay stats if it's enabled
        if (Session::get()->getOverlayManager().isOverlayEnabled(Overlay::OverlayDebug)) {
            VIDEO_STATS lastTwoWndStats = {};
            addVideoStats(m_LastWndVideoStats, lastTwoWndStats);
            addVideoStats(m_ActiveWndVideoStats, lastTwoWndStats);

            stringifyVideoStats(lastTwoWndStats, Session::get()->getOverlayManager().getOverlayText(Overlay::OverlayDebug));
            Session::get()->getOverlayManager().setOverlayTextUpdated(Overlay::OverlayDebug);
        }

        // Accumulate these values into the global stats
        addVideoStats(m_ActiveWndVideoStats, m_GlobalVideoStats);

        // Move this window into the last window slot and clear it for next window
        SDL_memcpy(&m_LastWndVideoStats, &m_ActiveWndVideoStats, sizeof(m_ActiveWndVideoStats));
        SDL_zero(m_ActiveWndVideoStats);
        m_ActiveWndVideoStats.measurementStartTimestamp = SDL_GetTicks();
    }

    m_ActiveWndVideoStats.receivedFrames++;
    m_ActiveWndVideoStats.totalFrames++;

    int requiredBufferSize = du->fullLength;
    if (du->frameType == FRAME_TYPE_IDR) {
        // Add some extra space in case we need to do an SPS fixup
        requiredBufferSize += MAX_SPS_EXTRA_SIZE;
    }

    // Ensure the decoder buffer is large enough
    m_DecodeBuffer.reserve(requiredBufferSize + AV_INPUT_BUFFER_PADDING_SIZE);

    int offset = 0;
    while (entry != nullptr) {
        writeBuffer(entry, offset);
        entry = entry->next;
    }

    m_Pkt->data = reinterpret_cast<uint8_t*>(m_DecodeBuffer.data());
    m_Pkt->size = offset;

    if (du->frameType == FRAME_TYPE_IDR) {
        m_Pkt->flags = AV_PKT_FLAG_KEY;
    }
    else {
        m_Pkt->flags = 0;
    }

    m_ActiveWndVideoStats.totalReassemblyTime += du->enqueueTimeMs - du->receiveTimeMs;

    err = avcodec_send_packet(m_VideoDecoderCtx, m_Pkt);
    if (err < 0) {
        char errorstring[512];
        av_strerror(err, errorstring, sizeof(errorstring));
        SDL_LogWarn(SDL_LOG_CATEGORY_APPLICATION,
                    "avcodec_send_packet() failed: %s (frame %d)",
                    errorstring,
                    du->frameNumber);

        // If we've failed a bunch of decodes in a row, the decoder/renderer is
        // clearly unhealthy, so let's generate a synthetic reset event to trigger
        // the event loop to destroy and recreate the decoder.
        if (++m_ConsecutiveFailedDecodes == FAILED_DECODES_RESET_THRESHOLD) {
            SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
                         "Resetting decoder due to consistent failure");

            SDL_Event event;
            event.type = SDL_RENDER_DEVICE_RESET;
            SDL_PushEvent(&event);

            // Don't consume any additional data
            SDL_AtomicSet(&m_DecoderThreadShouldQuit, 1);
        }

        return DR_NEED_IDR;
    }

    m_FrameInfoQueue.enqueue(*du);

    m_FramesIn++;
    return DR_OK;
}

void FFmpegVideoDecoder::renderFrameOnMainThread()
{
    m_Pacer->renderOnMainThread();
}