Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Multilayer Decoding Support (and minimum FFmpeg version bump) #446

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ AM_INIT_AUTOMAKE([1.11 subdir-objects])
AM_SILENT_RULES([yes])
AM_MAINTAINER_MODE([disable])

VERSION_INFO="5:0:0"
VERSION_INFO="5:1:0"

AC_MSG_CHECKING([if debug build is enabled])

Expand Down Expand Up @@ -88,7 +88,7 @@ PKG_PROG_PKG_CONFIG([0.22])
pkgconfigdir="\$(libdir)/pkgconfig"
AC_SUBST(pkgconfigdir)

PKG_CHECK_MODULES(FFMPEG, [libavformat >= 60.16.0 libavcodec >= 60.31.0 libswscale >= 7.5.0 libavutil >= 58.29.0 libswresample >= 4.12.0])
PKG_CHECK_MODULES(FFMPEG, [libavformat >= 61.7.0 libavcodec >= 61.19.0 libswscale >= 8.3.0 libavutil >= 59.39.0 libswresample >= 5.3.0])

dnl As of 0eec06ed8747923faa6a98e474f224d922dc487d ffmpeg only adds -lrt to lavc's
dnl LIBS, but lavu needs it, so move it to the end if it's present
Expand Down
4 changes: 4 additions & 0 deletions doc/ffms2-changelog.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
# FFmpegSource2 Changelog
- 5.1
- FFmpeg 7.1 is now the minimum requirement.
- Added layered decoding support, for e.g. spatial MV-HEVC.

- 5.0
- Fixed all issues with FFmpeg 6.1 which is now the minimum requirement
- Fixed av1 decoding
Expand Down
14 changes: 13 additions & 1 deletion include/ffms.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
#define FFMS_H

// Version format: major - minor - micro - bump
#define FFMS_VERSION ((5 << 24) | (0 << 16) | (0 << 8) | 0)
#define FFMS_VERSION ((5 << 24) | (1 << 16) | (0 << 8) | 0)

#include <stdint.h>
#include <stddef.h>
Expand Down Expand Up @@ -344,6 +344,18 @@ typedef struct FFMS_Frame {
/* Introduced in FFMS_VERSION ((3 << 24) | (1 << 16) | (1 << 8) | 0) */
uint8_t *HDR10Plus;
int HDR10PlusSize;

/*
* If these buffers are not NULL, left and right eye data is present.
* In such a case, the main buffer points to the primary eye's buffer,
* for use in monoscopic encoding.
*
* Introduced in FFMS_VERSION ((5 << 24) | (1 << 16) | (0 << 8) | 0)
*/
const uint8_t *LeftEyeData[4];
int LeftEyeLinesize[4];
const uint8_t *RightEyeData[4];
int RightEyeLinesize[4];
} FFMS_Frame;

typedef struct FFMS_TrackTimeBase {
Expand Down
96 changes: 95 additions & 1 deletion src/core/videosource.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,32 @@ FFMS_Frame *FFMS_VideoSource::OutputFrame(AVFrame *Frame) {
}
}

if (IsLayered) {
if ((PrimaryEyeIsLeft && !EyesInverted) || (!PrimaryEyeIsLeft && EyesInverted)) {
for (int i = 0; i < 4; i++) {
LocalFrame.Data[i] = LeftEyeFrameData[i];
LocalFrame.Linesize[i] = LeftEyeLinesize[i];
}
} else {
for (int i = 0; i < 4; i++) {
LocalFrame.Data[i] = RightEyeFrameData[i];
LocalFrame.Linesize[i] = RightEyeLinesize[i];
}
}
for (int i = 0; i < 4; i++) {
LocalFrame.LeftEyeData[i] = LeftEyeFrameData[i];
LocalFrame.LeftEyeLinesize[i] = LeftEyeLinesize[i];
LocalFrame.RightEyeData[i] = RightEyeFrameData[i];
LocalFrame.RightEyeLinesize[i] = RightEyeLinesize[i];
}
if (EyesInverted) {
for (int i = 0; i < 4; i++) {
std::swap(LocalFrame.RightEyeData[i], LocalFrame.LeftEyeData[i]);
std::swap(LocalFrame.RightEyeLinesize[i], LocalFrame.LeftEyeLinesize[i]);
}
}
}

LocalFrame.EncodedWidth = Frame->width;
LocalFrame.EncodedHeight = Frame->height;
LocalFrame.EncodedPixelFormat = Frame->format;
Expand Down Expand Up @@ -258,10 +284,31 @@ FFMS_VideoSource::FFMS_VideoSource(const char *SourceFile, FFMS_Index &Index, in
if (CodecContext->codec_id == AV_CODEC_ID_H264 && CodecContext->has_b_frames)
CodecContext->has_b_frames = 15; // the maximum possible value for h264

if (avcodec_open2(CodecContext, Codec, nullptr) < 0)
// Are we layered?
if (FormatContext->streams[VideoTrack]->disposition & AV_DISPOSITION_MULTILAYER) {
IsLayered = true;
// See if we can figure out the primary (base) eye based on side data
for (int i = 0; i < FormatContext->streams[VideoTrack]->codecpar->nb_coded_side_data; i++) {
if (FormatContext->streams[VideoTrack]->codecpar->coded_side_data[i].type == AV_PKT_DATA_STEREO3D) {
const AVStereo3D *StereoSideData = (const AVStereo3D *)FormatContext->streams[VideoTrack]->codecpar->coded_side_data[i].data;
// If 'right', set it as such, otherwise it is left.
PrimaryEyeIsLeft = !(StereoSideData->primary_eye == AV_PRIMARY_EYE_RIGHT);
EyesInverted = !!(StereoSideData->flags & AV_STEREO3D_FLAG_INVERT);
}
}
}

// Just ask for all views possible if we're layered
AVDictionary *CodecDict = nullptr;
if (IsLayered)
av_dict_set(&CodecDict, "view_ids", "-1", 0);

if (avcodec_open2(CodecContext, Codec, &CodecDict) < 0)
throw FFMS_Exception(FFMS_ERROR_DECODING, FFMS_ERROR_CODEC,
"Could not open video codec");

av_dict_free(&CodecDict);

// Similar yet different to h264 workaround above
// vc1 simply sets has_b_frames to 1 no matter how many there are so instead we set it to the max value
// in order to not confuse our own delay guesses later
Expand Down Expand Up @@ -674,6 +721,24 @@ bool FFMS_VideoSource::HasPendingDelayedFrames() {
return false;
}

void FFMS_VideoSource::CopyEye(AVStereo3DView view) {
if (view == AV_STEREO3D_VIEW_LEFT) {
av_freep(&LeftEyeFrameData[0]);
if (av_image_alloc(LeftEyeFrameData, LeftEyeLinesize, DecodeFrame->width, DecodeFrame->height, (enum AVPixelFormat) DecodeFrame->format, 16) < 0)
throw FFMS_Exception(FFMS_ERROR_DECODING, FFMS_ERROR_ALLOCATION_FAILED,
"Could not allocate left eye buffer");
av_image_copy(LeftEyeFrameData, LeftEyeLinesize, DecodeFrame->data, DecodeFrame->linesize, (enum AVPixelFormat) DecodeFrame->format, DecodeFrame->width, DecodeFrame->height);
} else if (view == AV_STEREO3D_VIEW_RIGHT) {
av_freep(&RightEyeFrameData[0]);
if (av_image_alloc(RightEyeFrameData, RightEyeLinesize, DecodeFrame->width, DecodeFrame->height, (enum AVPixelFormat) DecodeFrame->format, 16) < 0)
throw FFMS_Exception(FFMS_ERROR_DECODING, FFMS_ERROR_ALLOCATION_FAILED,
"Could not allocate right eye buffer");
av_image_copy(RightEyeFrameData, RightEyeLinesize, DecodeFrame->data, DecodeFrame->linesize, (enum AVPixelFormat) DecodeFrame->format, DecodeFrame->width, DecodeFrame->height);
} else {
throw FFMS_Exception(FFMS_ERROR_DECODING, FFMS_ERROR_CODEC, "Layered decode with invalid view.");
}
}

bool FFMS_VideoSource::DecodePacket(AVPacket *Packet) {
std::swap(DecodeFrame, LastDecodedFrame);
ResendPacket = false;
Expand All @@ -692,6 +757,33 @@ bool FFMS_VideoSource::DecodePacket(AVPacket *Packet) {

Ret = avcodec_receive_frame(CodecContext, DecodeFrame);
if (Ret == 0) {
if (IsLayered) {
const AVFrameSideData *sd = av_frame_get_side_data(DecodeFrame, AV_FRAME_DATA_STEREO3D);
if (!sd)
throw FFMS_Exception(FFMS_ERROR_DECODING, FFMS_ERROR_CODEC,
"Missing Stereo3D for layered decode.");

const AVStereo3D *stereo3d = (const AVStereo3D *)sd->data;
AVStereo3DView first_view = stereo3d->view;
CopyEye(stereo3d->view);

Ret = avcodec_receive_frame(CodecContext, DecodeFrame);
if (Ret != 0)
throw FFMS_Exception(FFMS_ERROR_DECODING, FFMS_ERROR_CODEC,
"Missing second view for layered decode.");

sd = av_frame_get_side_data(DecodeFrame, AV_FRAME_DATA_STEREO3D);
if (!sd)
throw FFMS_Exception(FFMS_ERROR_DECODING, FFMS_ERROR_CODEC,
"Missing Stereo3D for layered decode second layer.");

stereo3d = (const AVStereo3D *)sd->data;
if ((first_view == AV_STEREO3D_VIEW_LEFT && stereo3d->view != AV_STEREO3D_VIEW_RIGHT) ||
(first_view == AV_STEREO3D_VIEW_RIGHT && stereo3d->view != AV_STEREO3D_VIEW_LEFT)) {
throw FFMS_Exception(FFMS_ERROR_DECODING, FFMS_ERROR_CODEC, "Unmatched left/right views in layered decode.");
}
CopyEye(stereo3d->view);
}
Delay.Decrement();
} else {
std::swap(DecodeFrame, LastDecodedFrame);
Expand Down Expand Up @@ -740,6 +832,8 @@ void FFMS_VideoSource::Free() {
if (SWS)
sws_freeContext(SWS);
av_freep(&SWSFrameData[0]);
av_freep(&LeftEyeFrameData[0]);
av_freep(&RightEyeFrameData[0]);
av_frame_free(&DecodeFrame);
av_frame_free(&LastDecodedFrame);
av_packet_free(&StashedPacket);
Expand Down
8 changes: 8 additions & 0 deletions src/core/videosource.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,12 @@ struct FFMS_VideoSource {

uint8_t *SWSFrameData[4] = {};
int SWSFrameLinesize[4] = {};
bool EyesInverted = false;
bool PrimaryEyeIsLeft = true;
uint8_t *LeftEyeFrameData[4] = {};
int LeftEyeLinesize[4] = {};
uint8_t *RightEyeFrameData[4] = {};
int RightEyeLinesize[4] = {};

AVPacket *StashedPacket = nullptr;
bool ResendPacket = false;
Expand All @@ -118,6 +124,7 @@ struct FFMS_VideoSource {
int SeekMode;
bool SeekByPos = false;
bool HaveSeenInterlacedFrame = false;
bool IsLayered = false;

void ReAdjustOutputFormat(AVFrame *Frame);
FFMS_Frame *OutputFrame(AVFrame *Frame);
Expand All @@ -135,6 +142,7 @@ struct FFMS_VideoSource {
FFMS_Track *GetTrack() { return &Frames; }
FFMS_Frame *GetFrame(int n);
void GetFrameCheck(int n);
void CopyEye(AVStereo3DView view);
FFMS_Frame *GetFrameByTime(double Time);
void SetOutputFormat(const AVPixelFormat *TargetFormats, int Width, int Height, int Resizer);
void ResetOutputFormat();
Expand Down
Loading