From d8d03cca35e08563fbd9d2c0e73753f1f44aaf19 Mon Sep 17 00:00:00 2001 From: Dmitry Ermilov Date: Thu, 26 Nov 2020 11:43:28 -0500 Subject: [PATCH] Custom latency checker output example: Total encode latency on session 2: 3303.009000 on 200 frames, average 16.515045, max 33.943000 Total encode latency on session 1: 3290.410000 on 200 frames, average 16.452050, max 29.489000 Total decode latency on session 0: 222.491000 on 201 frames, average 1.106920, max 6.213000 cmd line example: sample_multi_transcode -par par_1N.file -latency-stat par_1N.file example: -i::h264 /msdk/MEDIASDK_STREAMS/transcoder/H264/Jets_1280x720_60Hz_main_IPPP_f200_15M_FS0.264 -o::sink -i::source -b 2000 -o::h264 /dev/null -async 1 -i::source -b 2000 -o::h264 /dev/null -async 1 Signed-off-by: Dmitry Ermilov --- .../include/pipeline_transcode.h | 14 ++++ .../include/transcode_utils.h | 1 + .../src/pipeline_transcode.cpp | 66 +++++++++++++++++-- .../src/transcode_utils.cpp | 6 ++ 4 files changed, 83 insertions(+), 4 deletions(-) diff --git a/samples/sample_multi_transcode/include/pipeline_transcode.h b/samples/sample_multi_transcode/include/pipeline_transcode.h index df6befec69..98b6e54c65 100644 --- a/samples/sample_multi_transcode/include/pipeline_transcode.h +++ b/samples/sample_multi_transcode/include/pipeline_transcode.h @@ -325,6 +325,7 @@ namespace TranscodingSample mfxI32 monitorType; bool shouldUseGreedyFormula; + bool bLatencyMeasurement; bool enableQSVFF; bool bSingleTexture; @@ -389,6 +390,15 @@ namespace TranscodingSample mfxENCOutput encOutput; }; + struct Latency { + std::mutex mutex; + std::map data; + mfxF64 value = 0.; + mfxU64 amount = 0; + + mfxF64 maxLatency = 0; + }; + struct ExtendedSurface { mfxFrameSurface1 *pSurface; @@ -891,6 +901,7 @@ namespace TranscodingSample CIOStat outputStatistics; bool shouldUseGreedyFormula; + bool m_bLatencyMeasurement; #if MFX_VERSION >= 1022 // ROI data @@ -916,6 +927,9 @@ namespace TranscodingSample msdk_string m_strMfxParamsDumpFile; + Latency m_decLatency; + Latency m_encLatency; + void FillMBQPBuffer(mfxExtMBQP &qpMap, mfxU16 pictStruct); #endif //MFX_VERSION >= 1022 diff --git a/samples/sample_multi_transcode/include/transcode_utils.h b/samples/sample_multi_transcode/include/transcode_utils.h index 005cc514f8..341cec226e 100644 --- a/samples/sample_multi_transcode/include/transcode_utils.h +++ b/samples/sample_multi_transcode/include/transcode_utils.h @@ -92,6 +92,7 @@ namespace TranscodingSample bool bRobustFlag; bool bSoftRobustFlag; bool shouldUseGreedyFormula; + bool bLatencyMeasurement; std::vector m_lines; private: DISALLOW_COPY_AND_ASSIGN(CmdProcessor); diff --git a/samples/sample_multi_transcode/src/pipeline_transcode.cpp b/samples/sample_multi_transcode/src/pipeline_transcode.cpp index 238af9cf04..da2009cc19 100644 --- a/samples/sample_multi_transcode/src/pipeline_transcode.cpp +++ b/samples/sample_multi_transcode/src/pipeline_transcode.cpp @@ -153,6 +153,7 @@ CTranscodingPipeline::CTranscodingPipeline(): m_nReqFrameTime(0), m_nOutputFramesNum(0), shouldUseGreedyFormula(false), + m_bLatencyMeasurement(false), m_nRotationAngle(0) { MSDK_ZERO_MEMORY(m_RotateParam); @@ -544,6 +545,8 @@ mfxStatus CTranscodingPipeline::DecodeOneFrame(ExtendedSurface *pExtSurface) CTimer DevBusyTimer; DevBusyTimer.Start(); + + mfxU64 latencyTimestamp = 0; while (MFX_ERR_MORE_DATA == sts || MFX_ERR_MORE_SURFACE == sts || MFX_ERR_NONE < sts) { if (m_rawInput) @@ -578,6 +581,8 @@ mfxStatus CTranscodingPipeline::DecodeOneFrame(ExtendedSurface *pExtSurface) MSDK_CHECK_POINTER_SAFE(pmfxSurface, MFX_ERR_MEMORY_ALLOC, msdk_printf(MSDK_STRING("ERROR: No free surfaces in decoder pool (during long period)\n"))); // return an error if a free surface wasn't found } + latencyTimestamp = msdk_time_get_tick(); + if (!m_rawInput) { sts = m_pmfxDEC->DecodeFrameAsync(m_pmfxBS, pmfxSurface, &pExtSurface->pSurface, &pExtSurface->Syncp); @@ -602,13 +607,21 @@ mfxStatus CTranscodingPipeline::DecodeOneFrame(ExtendedSurface *pExtSurface) } //while processing - // HEVC SW requires additional synchronization - if( MFX_ERR_NONE == sts && isHEVCSW) + if (MFX_ERR_NONE == sts) { sts = m_pmfxSession->SyncOperation(pExtSurface->Syncp, MSDK_WAIT_INTERVAL); HandlePossibleGpuHang(sts); MSDK_CHECK_ERR_NONE_STATUS(sts, MFX_ERR_ABORTED, "Decode: SyncOperation failed"); } + + if (m_bLatencyMeasurement) + { + mfxU64 finish = msdk_time_get_tick(); + auto latency = (finish - latencyTimestamp) / 1000.; + m_decLatency.value += latency; + m_decLatency.maxLatency = latency > m_decLatency.maxLatency ? latency : m_decLatency.maxLatency; + m_decLatency.amount += 1; + } return sts; } // mfxStatus CTranscodingPipeline::DecodeOneFrame(ExtendedSurface *pExtSurface) @@ -656,8 +669,7 @@ mfxStatus CTranscodingPipeline::DecodeLastFrame(ExtendedSurface *pExtSurface) } } - // HEVC SW requires additional synchronization - if( MFX_ERR_NONE == sts && isHEVCSW) + if (MFX_ERR_NONE == sts) { sts = m_pmfxSession->SyncOperation(pExtSurface->Syncp, MSDK_WAIT_INTERVAL); HandlePossibleGpuHang(sts); @@ -741,9 +753,20 @@ mfxStatus CTranscodingPipeline::EncodeOneFrame(ExtendedSurface *pExtSurface, mfx for (;;) { + mfxU64 latencyTimestamp = msdk_time_get_tick(); + // at this point surface for encoder contains either a frame from file or a frame processed by vpp sts = m_pmfxENC->EncodeFrameAsync(pExtSurface->pEncCtrl, pExtSurface->pSurface, pBS, &pExtSurface->Syncp); + if (m_bLatencyMeasurement) + { + if (sts != MFX_WRN_DEVICE_BUSY && pExtSurface->pSurface) + { + std::lock_guard guard(m_encLatency.mutex); + m_encLatency.data[pExtSurface->pSurface->Data.TimeStamp] = latencyTimestamp; + } + } + if (MFX_ERR_NONE < sts && !pExtSurface->Syncp) // repeat the call if warning and no output { if (MFX_WRN_DEVICE_BUSY == sts) @@ -1934,6 +1957,24 @@ mfxStatus CTranscodingPipeline::PutBS() MSDK_CHECK_ERR_NONE_STATUS(sts, MFX_ERR_ABORTED, "Encode: SyncOperation failed"); } + if (m_bLatencyMeasurement) + { + mfxU64 finish = msdk_time_get_tick(); + for (auto p = m_encLatency.data.begin(); p != m_encLatency.data.end(); ++p) + { + if (p->first == pBitstreamEx->Bitstream.TimeStamp) + { + std::lock_guard guard(m_encLatency.mutex); + auto latency = (finish - p->second) / 1000.; + m_encLatency.value += latency; + m_encLatency.amount += 1; + m_encLatency.maxLatency = latency > m_encLatency.maxLatency ? latency : m_encLatency.maxLatency; + m_encLatency.data.erase(p); + break; + } + } + } + m_nOutputFramesNum++; //--- Time measurements @@ -2288,6 +2329,9 @@ mfxStatus CTranscodingPipeline::InitDecMfxParams(sInputParams *pInParams) decPostProc->Out.CropH = pInParams->nVppCompDstH; } #endif //MFX_VERSION >= 1022 + + m_mfxDecParams.mfx.DecodedOrder = true; + return MFX_ERR_NONE; }// mfxStatus CTranscodingPipeline::InitDecMfxParams() @@ -3598,6 +3642,7 @@ mfxStatus CTranscodingPipeline::Init(sInputParams *pParams, m_pParentPipeline = pParentPipeline; shouldUseGreedyFormula = pParams->shouldUseGreedyFormula; + m_bLatencyMeasurement = pParams->bLatencyMeasurement; m_nTimeout = pParams->nTimeout; @@ -4197,6 +4242,19 @@ size_t CTranscodingPipeline::GetRobustFlag() void CTranscodingPipeline::Close() { + if (m_bLatencyMeasurement) + { + if (m_decLatency.value) + msdk_printf( + MSDK_STRING("Total decode latency on session %u: %lf on %llu frames, average %lf, max %lf\n"), + GetPipelineID(), m_decLatency.value, m_decLatency.amount, m_decLatency.value / m_decLatency.amount, m_decLatency.maxLatency); + + if (m_encLatency.value) + msdk_printf( + MSDK_STRING("Total encode latency on session %u: %lf on %llu frames, average %lf, max %lf\n\n"), + GetPipelineID(), m_encLatency.value, m_encLatency.amount, m_encLatency.value / m_encLatency.amount, m_encLatency.maxLatency); + } + if (m_pmfxDEC.get()) m_pmfxDEC->Close(); diff --git a/samples/sample_multi_transcode/src/transcode_utils.cpp b/samples/sample_multi_transcode/src/transcode_utils.cpp index 529c790cc9..2a374ac432 100644 --- a/samples/sample_multi_transcode/src/transcode_utils.cpp +++ b/samples/sample_multi_transcode/src/transcode_utils.cpp @@ -464,6 +464,7 @@ CmdProcessor::CmdProcessor() shouldUseGreedyFormula=false; bRobustFlag = false; bSoftRobustFlag = false; + bLatencyMeasurement = false; } //CmdProcessor::CmdProcessor() @@ -619,6 +620,10 @@ mfxStatus CmdProcessor::ParseCmdLine(int argc, msdk_char *argv[]) } DumpLogFileName = argv[0]; } + else if (0 == msdk_strcmp(argv[0], MSDK_STRING("-latency-stat"))) + { + bLatencyMeasurement = true; + } else { break; @@ -1491,6 +1496,7 @@ mfxStatus CmdProcessor::ParseParamsForOneSession(mfxU32 argc, msdk_char *argv[]) InputParams.bSoftRobustFlag = true; InputParams.shouldUseGreedyFormula = shouldUseGreedyFormula; + InputParams.bLatencyMeasurement = bLatencyMeasurement; InputParams.statisticsWindowSize = statisticsWindowSize; InputParams.statisticsLogFile = statisticsLogFile;