Skip to content

Commit

Permalink
Add layered decoding support
Browse files Browse the repository at this point in the history
If the eye buffers are present, then the encode is layered.

The main buffer is the primary eye.

Signed-off-by: Derek Buitenhuis <[email protected]>
  • Loading branch information
dwbuiten committed Nov 18, 2024
1 parent be8f328 commit f1bdf16
Show file tree
Hide file tree
Showing 4 changed files with 117 additions and 3 deletions.
2 changes: 1 addition & 1 deletion configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ AM_INIT_AUTOMAKE([1.11 subdir-objects])
AM_SILENT_RULES([yes])
AM_MAINTAINER_MODE([disable])

VERSION_INFO="5:0:0"
VERSION_INFO="5:1:0"

AC_MSG_CHECKING([if debug build is enabled])

Expand Down
14 changes: 13 additions & 1 deletion include/ffms.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
#define FFMS_H

// Version format: major - minor - micro - bump
#define FFMS_VERSION ((5 << 24) | (0 << 16) | (0 << 8) | 0)
#define FFMS_VERSION ((5 << 24) | (1 << 16) | (0 << 8) | 0)

#include <stdint.h>
#include <stddef.h>
Expand Down Expand Up @@ -344,6 +344,18 @@ typedef struct FFMS_Frame {
/* Introduced in FFMS_VERSION ((3 << 24) | (1 << 16) | (1 << 8) | 0) */
uint8_t *HDR10Plus;
int HDR10PlusSize;

/*
* If these buffers are not NULL, left and right eye data is present.
* In such a case, the main buffer points to the primary eye's buffer,
* for use in monoscopic encoding.
*
* Introduced in FFMS_VERSION ((5 << 24) | (1 << 16) | (0 << 8) | 0)
*/
const uint8_t *LeftEyeData[4];
int LeftEyeLinesize[4];
const uint8_t *RightEyeData[4];
int RightEyeLinesize[4];
} FFMS_Frame;

typedef struct FFMS_TrackTimeBase {
Expand Down
96 changes: 95 additions & 1 deletion src/core/videosource.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,32 @@ FFMS_Frame *FFMS_VideoSource::OutputFrame(AVFrame *Frame) {
}
}

if (IsLayered) {
if ((PrimaryEyeIsLeft && !EyesInverted) || (!PrimaryEyeIsLeft && EyesInverted)) {
for (int i = 0; i < 4; i++) {
LocalFrame.Data[i] = LeftEyeFrameData[i];
LocalFrame.Linesize[i] = LeftEyeLinesize[i];
}
} else {
for (int i = 0; i < 4; i++) {
LocalFrame.Data[i] = RightEyeFrameData[i];
LocalFrame.Linesize[i] = RightEyeLinesize[i];
}
}
for (int i = 0; i < 4; i++) {
LocalFrame.LeftEyeData[i] = LeftEyeFrameData[i];
LocalFrame.LeftEyeLinesize[i] = LeftEyeLinesize[i];
LocalFrame.RightEyeData[i] = RightEyeFrameData[i];
LocalFrame.RightEyeLinesize[i] = RightEyeLinesize[i];
}
if (EyesInverted) {
for (int i = 0; i < 4; i++) {
std::swap(LocalFrame.RightEyeData[i], LocalFrame.LeftEyeData[i]);
std::swap(LocalFrame.RightEyeLinesize[i], LocalFrame.LeftEyeLinesize[i]);
}
}
}

LocalFrame.EncodedWidth = Frame->width;
LocalFrame.EncodedHeight = Frame->height;
LocalFrame.EncodedPixelFormat = Frame->format;
Expand Down Expand Up @@ -258,10 +284,31 @@ FFMS_VideoSource::FFMS_VideoSource(const char *SourceFile, FFMS_Index &Index, in
if (CodecContext->codec_id == AV_CODEC_ID_H264 && CodecContext->has_b_frames)
CodecContext->has_b_frames = 15; // the maximum possible value for h264

if (avcodec_open2(CodecContext, Codec, nullptr) < 0)
// Are we layered?
if (FormatContext->streams[VideoTrack]->disposition & AV_DISPOSITION_MULTILAYER) {
IsLayered = true;
// See if we can figure out the primary (base) eye based on side data
for (int i = 0; i < FormatContext->streams[VideoTrack]->codecpar->nb_coded_side_data; i++) {
if (FormatContext->streams[VideoTrack]->codecpar->coded_side_data[i].type == AV_PKT_DATA_STEREO3D) {
const AVStereo3D *StereoSideData = (const AVStereo3D *)FormatContext->streams[VideoTrack]->codecpar->coded_side_data[i].data;
// If 'right', set it as such, otherwise it is left.
PrimaryEyeIsLeft = !(StereoSideData->primary_eye == AV_PRIMARY_EYE_RIGHT);
EyesInverted = !!(StereoSideData->flags & AV_STEREO3D_FLAG_INVERT);
}
}
}

// Just ask for all views possible if we're layered
AVDictionary *CodecDict = nullptr;
if (IsLayered)
av_dict_set(&CodecDict, "view_ids", "-1", 0);

if (avcodec_open2(CodecContext, Codec, &CodecDict) < 0)
throw FFMS_Exception(FFMS_ERROR_DECODING, FFMS_ERROR_CODEC,
"Could not open video codec");

av_dict_free(&CodecDict);

// Similar yet different to h264 workaround above
// vc1 simply sets has_b_frames to 1 no matter how many there are so instead we set it to the max value
// in order to not confuse our own delay guesses later
Expand Down Expand Up @@ -674,6 +721,24 @@ bool FFMS_VideoSource::HasPendingDelayedFrames() {
return false;
}

void FFMS_VideoSource::CopyEye(AVStereo3DView view) {
if (view == AV_STEREO3D_VIEW_LEFT) {
av_freep(&LeftEyeFrameData[0]);
if (av_image_alloc(LeftEyeFrameData, LeftEyeLinesize, DecodeFrame->width, DecodeFrame->height, (enum AVPixelFormat) DecodeFrame->format, 16) < 0)
throw FFMS_Exception(FFMS_ERROR_DECODING, FFMS_ERROR_ALLOCATION_FAILED,
"Could not allocate left eye buffer");
av_image_copy(LeftEyeFrameData, LeftEyeLinesize, DecodeFrame->data, DecodeFrame->linesize, (enum AVPixelFormat) DecodeFrame->format, DecodeFrame->width, DecodeFrame->height);
} else if (view == AV_STEREO3D_VIEW_RIGHT) {
av_freep(&RightEyeFrameData[0]);
if (av_image_alloc(RightEyeFrameData, RightEyeLinesize, DecodeFrame->width, DecodeFrame->height, (enum AVPixelFormat) DecodeFrame->format, 16) < 0)
throw FFMS_Exception(FFMS_ERROR_DECODING, FFMS_ERROR_ALLOCATION_FAILED,
"Could not allocate right eye buffer");
av_image_copy(RightEyeFrameData, RightEyeLinesize, DecodeFrame->data, DecodeFrame->linesize, (enum AVPixelFormat) DecodeFrame->format, DecodeFrame->width, DecodeFrame->height);
} else {
throw FFMS_Exception(FFMS_ERROR_DECODING, FFMS_ERROR_CODEC, "Layered decode with invalid view.");
}
}

bool FFMS_VideoSource::DecodePacket(AVPacket *Packet) {
std::swap(DecodeFrame, LastDecodedFrame);
ResendPacket = false;
Expand All @@ -692,6 +757,33 @@ bool FFMS_VideoSource::DecodePacket(AVPacket *Packet) {

Ret = avcodec_receive_frame(CodecContext, DecodeFrame);
if (Ret == 0) {
if (IsLayered) {
const AVFrameSideData *sd = av_frame_get_side_data(DecodeFrame, AV_FRAME_DATA_STEREO3D);
if (!sd)
throw FFMS_Exception(FFMS_ERROR_DECODING, FFMS_ERROR_CODEC,
"Missing Stereo3D for layered decode.");

const AVStereo3D *stereo3d = (const AVStereo3D *)sd->data;
AVStereo3DView first_view = stereo3d->view;
CopyEye(stereo3d->view);

Ret = avcodec_receive_frame(CodecContext, DecodeFrame);
if (Ret != 0)
throw FFMS_Exception(FFMS_ERROR_DECODING, FFMS_ERROR_CODEC,
"Missing second view for layered decode.");

sd = av_frame_get_side_data(DecodeFrame, AV_FRAME_DATA_STEREO3D);
if (!sd)
throw FFMS_Exception(FFMS_ERROR_DECODING, FFMS_ERROR_CODEC,
"Missing Stereo3D for layered decode second layer.");

stereo3d = (const AVStereo3D *)sd->data;
if ((first_view == AV_STEREO3D_VIEW_LEFT && stereo3d->view != AV_STEREO3D_VIEW_RIGHT) ||
(first_view == AV_STEREO3D_VIEW_RIGHT && stereo3d->view != AV_STEREO3D_VIEW_LEFT)) {
throw FFMS_Exception(FFMS_ERROR_DECODING, FFMS_ERROR_CODEC, "Unmatched left/right views in layered decode.");
}
CopyEye(stereo3d->view);
}
Delay.Decrement();
} else {
std::swap(DecodeFrame, LastDecodedFrame);
Expand Down Expand Up @@ -740,6 +832,8 @@ void FFMS_VideoSource::Free() {
if (SWS)
sws_freeContext(SWS);
av_freep(&SWSFrameData[0]);
av_freep(&LeftEyeFrameData[0]);
av_freep(&RightEyeFrameData[0]);
av_frame_free(&DecodeFrame);
av_frame_free(&LastDecodedFrame);
av_packet_free(&StashedPacket);
Expand Down
8 changes: 8 additions & 0 deletions src/core/videosource.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,12 @@ struct FFMS_VideoSource {

uint8_t *SWSFrameData[4] = {};
int SWSFrameLinesize[4] = {};
bool EyesInverted = false;
bool PrimaryEyeIsLeft = true;
uint8_t *LeftEyeFrameData[4] = {};
int LeftEyeLinesize[4] = {};
uint8_t *RightEyeFrameData[4] = {};
int RightEyeLinesize[4] = {};

AVPacket *StashedPacket = nullptr;
bool ResendPacket = false;
Expand All @@ -118,6 +124,7 @@ struct FFMS_VideoSource {
int SeekMode;
bool SeekByPos = false;
bool HaveSeenInterlacedFrame = false;
bool IsLayered = false;

void ReAdjustOutputFormat(AVFrame *Frame);
FFMS_Frame *OutputFrame(AVFrame *Frame);
Expand All @@ -135,6 +142,7 @@ struct FFMS_VideoSource {
FFMS_Track *GetTrack() { return &Frames; }
FFMS_Frame *GetFrame(int n);
void GetFrameCheck(int n);
void CopyEye(AVStereo3DView view);
FFMS_Frame *GetFrameByTime(double Time);
void SetOutputFormat(const AVPixelFormat *TargetFormats, int Width, int Height, int Resizer);
void ResetOutputFormat();
Expand Down

0 comments on commit f1bdf16

Please sign in to comment.