diff --git a/samples/sample_multi_transcode/include/pipeline_transcode.h b/samples/sample_multi_transcode/include/pipeline_transcode.h index 6553d92e11..ad6a9799f8 100644 --- a/samples/sample_multi_transcode/include/pipeline_transcode.h +++ b/samples/sample_multi_transcode/include/pipeline_transcode.h @@ -329,6 +329,7 @@ namespace TranscodingSample mfxI32 monitorType; bool shouldUseGreedyFormula; + bool bLatencyMeasurement; bool enableQSVFF; bool bSingleTexture; @@ -399,6 +400,17 @@ namespace TranscodingSample mfxENCOutput encOutput; }; + struct Latency { + std::mutex mutex; + std::map data; + mfxF64 value = 0.; + mfxU64 amount = 0; + + mfxF64 maxLatency = 0; + + std::vector perFrameLatency; + }; + struct ExtendedSurface { mfxFrameSurface1 *pSurface; @@ -904,6 +916,7 @@ namespace TranscodingSample CIOStat outputStatistics; bool shouldUseGreedyFormula; + bool m_bLatencyMeasurement; #if MFX_VERSION >= 1022 // ROI data @@ -929,6 +942,9 @@ namespace TranscodingSample msdk_string m_strMfxParamsDumpFile; + Latency m_decLatency; + Latency m_encLatency; + void FillMBQPBuffer(mfxExtMBQP &qpMap, mfxU16 pictStruct); #endif //MFX_VERSION >= 1022 diff --git a/samples/sample_multi_transcode/include/transcode_utils.h b/samples/sample_multi_transcode/include/transcode_utils.h index 005cc514f8..341cec226e 100644 --- a/samples/sample_multi_transcode/include/transcode_utils.h +++ b/samples/sample_multi_transcode/include/transcode_utils.h @@ -92,6 +92,7 @@ namespace TranscodingSample bool bRobustFlag; bool bSoftRobustFlag; bool shouldUseGreedyFormula; + bool bLatencyMeasurement; std::vector m_lines; private: DISALLOW_COPY_AND_ASSIGN(CmdProcessor); diff --git a/samples/sample_multi_transcode/src/pipeline_transcode.cpp b/samples/sample_multi_transcode/src/pipeline_transcode.cpp index f757cddf99..2a712a7d5d 100644 --- a/samples/sample_multi_transcode/src/pipeline_transcode.cpp +++ b/samples/sample_multi_transcode/src/pipeline_transcode.cpp @@ -167,6 +167,7 @@ CTranscodingPipeline::CTranscodingPipeline(): m_nReqFrameTime(0), m_nOutputFramesNum(0), shouldUseGreedyFormula(false), + m_bLatencyMeasurement(true), m_nRotationAngle(0) { MSDK_ZERO_MEMORY(m_RotateParam); @@ -558,6 +559,8 @@ mfxStatus CTranscodingPipeline::DecodeOneFrame(ExtendedSurface *pExtSurface) CTimer DevBusyTimer; DevBusyTimer.Start(); + + mfxU64 latencyTimestamp = 0; while (MFX_ERR_MORE_DATA == sts || MFX_ERR_MORE_SURFACE == sts || MFX_ERR_NONE < sts) { if (m_rawInput) @@ -594,6 +597,8 @@ mfxStatus CTranscodingPipeline::DecodeOneFrame(ExtendedSurface *pExtSurface) MSDK_CHECK_POINTER_SAFE(pmfxSurface, MFX_ERR_MEMORY_ALLOC, msdk_printf(MSDK_STRING("ERROR: No free surfaces in decoder pool (during long period)\n"))); // return an error if a free surface wasn't found } + latencyTimestamp = msdk_time_get_tick(); + if (!m_rawInput) { sts = m_pmfxDEC->DecodeFrameAsync(m_pmfxBS, pmfxSurface, &pExtSurface->pSurface, &pExtSurface->Syncp); @@ -625,6 +630,16 @@ mfxStatus CTranscodingPipeline::DecodeOneFrame(ExtendedSurface *pExtSurface) HandlePossibleGpuHang(sts); MSDK_CHECK_ERR_NONE_STATUS(sts, MFX_ERR_ABORTED, "Decode: SyncOperation failed"); } + + if (m_bLatencyMeasurement) + { + mfxU64 finish = msdk_time_get_tick(); + auto latency = (finish - latencyTimestamp) / 1000.; + m_decLatency.value += latency; + m_decLatency.maxLatency = latency > m_decLatency.maxLatency ? latency : m_decLatency.maxLatency; + m_decLatency.amount += 1; + m_decLatency.perFrameLatency.push_back(latency); + } return sts; } // mfxStatus CTranscodingPipeline::DecodeOneFrame(ExtendedSurface *pExtSurface) @@ -757,9 +772,20 @@ mfxStatus CTranscodingPipeline::EncodeOneFrame(ExtendedSurface *pExtSurface, mfx for (;;) { + mfxU64 latencyTimestamp = msdk_time_get_tick(); + // at this point surface for encoder contains either a frame from file or a frame processed by vpp sts = m_pmfxENC->EncodeFrameAsync(pExtSurface->pEncCtrl, pExtSurface->pSurface, pBS, &pExtSurface->Syncp); + if (m_bLatencyMeasurement) + { + if (sts != MFX_WRN_DEVICE_BUSY && pExtSurface->pSurface) + { + std::lock_guard guard(m_encLatency.mutex); + m_encLatency.data[pExtSurface->pSurface->Data.TimeStamp] = latencyTimestamp; + } + } + if (MFX_ERR_NONE < sts && !pExtSurface->Syncp) // repeat the call if warning and no output { if (MFX_WRN_DEVICE_BUSY == sts) @@ -1950,6 +1976,25 @@ mfxStatus CTranscodingPipeline::PutBS() MSDK_CHECK_ERR_NONE_STATUS(sts, MFX_ERR_ABORTED, "Encode: SyncOperation failed"); } + if (m_bLatencyMeasurement) + { + mfxU64 finish = msdk_time_get_tick(); + for (auto p = m_encLatency.data.begin(); p != m_encLatency.data.end(); ++p) + { + if (p->first == pBitstreamEx->Bitstream.TimeStamp) + { + std::lock_guard guard(m_encLatency.mutex); + auto latency = (finish - p->second) / 1000.; + m_encLatency.value += latency; + m_encLatency.amount += 1; + m_encLatency.maxLatency = latency > m_encLatency.maxLatency ? latency : m_encLatency.maxLatency; + m_encLatency.data.erase(p); + m_encLatency.perFrameLatency.push_back(latency); + break; + } + } + } + m_nOutputFramesNum++; //--- Time measurements @@ -3675,6 +3720,7 @@ mfxStatus CTranscodingPipeline::Init(sInputParams *pParams, m_pParentPipeline = pParentPipeline; shouldUseGreedyFormula = pParams->shouldUseGreedyFormula; + m_bLatencyMeasurement = pParams->bLatencyMeasurement; m_nTimeout = pParams->nTimeout; @@ -4306,6 +4352,33 @@ size_t CTranscodingPipeline::GetRobustFlag() void CTranscodingPipeline::Close() { + if (m_bLatencyMeasurement) + { + if (m_decLatency.value) { + msdk_printf( + MSDK_STRING("Total decode latency on session %u: %lf on %llu frames, average %lf, max %lf\n"), + GetPipelineID(), m_decLatency.value, m_decLatency.amount, m_decLatency.value / m_decLatency.amount, m_decLatency.maxLatency); + + msdk_printf(MSDK_STRING("Per frame latency (ms): ")); + for (auto v : m_decLatency.perFrameLatency) { + msdk_printf(MSDK_STRING("%.2f,"), v); + } + msdk_printf(MSDK_STRING("\n\n")); + } + + if (m_encLatency.value) { + msdk_printf( + MSDK_STRING("Total encode latency on session %u: %lf on %llu frames, average %lf, max %lf\n"), + GetPipelineID(), m_encLatency.value, m_encLatency.amount, m_encLatency.value / m_encLatency.amount, m_encLatency.maxLatency); + + msdk_printf(MSDK_STRING("Per frame latency (ms): ")); + for (auto v : m_encLatency.perFrameLatency) { + msdk_printf(MSDK_STRING("%.2f,"), v); + } + msdk_printf(MSDK_STRING("\n\n")); + } + } + if (m_pmfxDEC.get()) m_pmfxDEC->Close(); diff --git a/samples/sample_multi_transcode/src/transcode_utils.cpp b/samples/sample_multi_transcode/src/transcode_utils.cpp index 578f37c75a..a43f3d6233 100644 --- a/samples/sample_multi_transcode/src/transcode_utils.cpp +++ b/samples/sample_multi_transcode/src/transcode_utils.cpp @@ -478,6 +478,7 @@ CmdProcessor::CmdProcessor() shouldUseGreedyFormula=false; bRobustFlag = false; bSoftRobustFlag = false; + bLatencyMeasurement = true; } //CmdProcessor::CmdProcessor() @@ -633,6 +634,10 @@ mfxStatus CmdProcessor::ParseCmdLine(int argc, msdk_char *argv[]) } DumpLogFileName = argv[0]; } + else if (0 == msdk_strcmp(argv[0], MSDK_STRING("-latency-stat"))) + { + bLatencyMeasurement = true; + } else { break; @@ -1612,6 +1617,7 @@ mfxStatus CmdProcessor::ParseParamsForOneSession(mfxU32 argc, msdk_char *argv[]) InputParams.bSoftRobustFlag = true; InputParams.shouldUseGreedyFormula = shouldUseGreedyFormula; + InputParams.bLatencyMeasurement = bLatencyMeasurement; InputParams.statisticsWindowSize = statisticsWindowSize; InputParams.statisticsLogFile = statisticsLogFile;