forked from FFmpeg/FFmpeg
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
avcodec: [loongarch] Optimize vp9_mc/intra with LSX.
ffmpeg -i ../10_vp9_1080p_30fps_3Mbps.webm -f rawvideo -y /dev/null -an before:170fps after :294fps Reviewed-by: Shiyou Yin <[email protected]> Signed-off-by: Michael Niedermayer <[email protected]>
- Loading branch information
1 parent
72bcbe2
commit 2fd914e
Showing
7 changed files
with
3,379 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
/* | ||
* Copyright (c) 2021 Loongson Technology Corporation Limited | ||
* Contributed by Hao Chen <[email protected]> | ||
* | ||
* This file is part of FFmpeg. | ||
* | ||
* FFmpeg is free software; you can redistribute it and/or | ||
* modify it under the terms of the GNU Lesser General Public | ||
* License as published by the Free Software Foundation; either | ||
* version 2.1 of the License, or (at your option) any later version. | ||
* | ||
* FFmpeg is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
* Lesser General Public License for more details. | ||
* | ||
* You should have received a copy of the GNU Lesser General Public | ||
* License along with FFmpeg; if not, write to the Free Software | ||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
*/ | ||
|
||
#include "libavutil/loongarch/cpu.h" | ||
#include "libavutil/attributes.h" | ||
#include "libavcodec/vp9dsp.h" | ||
#include "vp9dsp_loongarch.h" | ||
|
||
#define init_subpel1(idx1, idx2, idxh, idxv, sz, dir, type) \ | ||
dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][idxh][idxv] = \ | ||
ff_##type##_8tap_smooth_##sz##dir##_lsx; \ | ||
dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][idxh][idxv] = \ | ||
ff_##type##_8tap_regular_##sz##dir##_lsx; \ | ||
dsp->mc[idx1][FILTER_8TAP_SHARP ][idx2][idxh][idxv] = \ | ||
ff_##type##_8tap_sharp_##sz##dir##_lsx; | ||
|
||
#define init_subpel2(idx, idxh, idxv, dir, type) \ | ||
init_subpel1(0, idx, idxh, idxv, 64, dir, type); \ | ||
init_subpel1(1, idx, idxh, idxv, 32, dir, type); \ | ||
init_subpel1(2, idx, idxh, idxv, 16, dir, type); \ | ||
init_subpel1(3, idx, idxh, idxv, 8, dir, type); \ | ||
init_subpel1(4, idx, idxh, idxv, 4, dir, type); | ||
|
||
#define init_subpel3(idx, type) \ | ||
init_subpel2(idx, 1, 0, h, type); \ | ||
init_subpel2(idx, 0, 1, v, type); \ | ||
init_subpel2(idx, 1, 1, hv, type); | ||
|
||
#define init_fpel(idx1, idx2, sz, type) \ | ||
dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][0][0] = ff_##type##sz##_lsx; \ | ||
dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][0][0] = ff_##type##sz##_lsx; \ | ||
dsp->mc[idx1][FILTER_8TAP_SHARP ][idx2][0][0] = ff_##type##sz##_lsx; \ | ||
dsp->mc[idx1][FILTER_BILINEAR ][idx2][0][0] = ff_##type##sz##_lsx; | ||
|
||
#define init_copy(idx, sz) \ | ||
init_fpel(idx, 0, sz, copy); \ | ||
init_fpel(idx, 1, sz, avg); | ||
|
||
#define init_intra_pred1_lsx(tx, sz) \ | ||
dsp->intra_pred[tx][VERT_PRED] = ff_vert_##sz##_lsx; \ | ||
dsp->intra_pred[tx][HOR_PRED] = ff_hor_##sz##_lsx; \ | ||
dsp->intra_pred[tx][DC_PRED] = ff_dc_##sz##_lsx; \ | ||
dsp->intra_pred[tx][LEFT_DC_PRED] = ff_dc_left_##sz##_lsx; \ | ||
dsp->intra_pred[tx][TOP_DC_PRED] = ff_dc_top_##sz##_lsx; \ | ||
dsp->intra_pred[tx][DC_128_PRED] = ff_dc_128_##sz##_lsx; \ | ||
dsp->intra_pred[tx][DC_127_PRED] = ff_dc_127_##sz##_lsx; \ | ||
dsp->intra_pred[tx][DC_129_PRED] = ff_dc_129_##sz##_lsx; \ | ||
dsp->intra_pred[tx][TM_VP8_PRED] = ff_tm_##sz##_lsx; \ | ||
|
||
#define init_intra_pred2_lsx(tx, sz) \ | ||
dsp->intra_pred[tx][DC_PRED] = ff_dc_##sz##_lsx; \ | ||
dsp->intra_pred[tx][LEFT_DC_PRED] = ff_dc_left_##sz##_lsx; \ | ||
dsp->intra_pred[tx][TOP_DC_PRED] = ff_dc_top_##sz##_lsx; \ | ||
dsp->intra_pred[tx][TM_VP8_PRED] = ff_tm_##sz##_lsx; \ | ||
|
||
av_cold void ff_vp9dsp_init_loongarch(VP9DSPContext *dsp, int bpp) | ||
{ | ||
int cpu_flags = av_get_cpu_flags(); | ||
if (have_lsx(cpu_flags)) | ||
if (bpp == 8) { | ||
init_subpel3(0, put); | ||
init_subpel3(1, avg); | ||
init_copy(0, 64); | ||
init_copy(1, 32); | ||
init_copy(2, 16); | ||
init_copy(3, 8); | ||
init_intra_pred1_lsx(TX_16X16, 16x16); | ||
init_intra_pred1_lsx(TX_32X32, 32x32); | ||
init_intra_pred2_lsx(TX_4X4, 4x4); | ||
init_intra_pred2_lsx(TX_8X8, 8x8); | ||
} | ||
} | ||
#undef init_subpel1 | ||
#undef init_subpel2 | ||
#undef init_subpel3 | ||
#undef init_copy | ||
#undef init_fpel | ||
#undef init_intra_pred1_lsx | ||
#undef init_intra_pred2_lsx |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
/* | ||
* Copyright (c) 2021 Loongson Technology Corporation Limited | ||
* Contributed by Hao Chen <[email protected]> | ||
* | ||
* This file is part of FFmpeg. | ||
* | ||
* FFmpeg is free software; you can redistribute it and/or | ||
* modify it under the terms of the GNU Lesser General Public | ||
* License as published by the Free Software Foundation; either | ||
* version 2.1 of the License, or (at your option) any later version. | ||
* | ||
* FFmpeg is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
* Lesser General Public License for more details. | ||
* | ||
* You should have received a copy of the GNU Lesser General Public | ||
* License along with FFmpeg; if not, write to the Free Software | ||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
*/ | ||
|
||
#ifndef AVCODEC_LOONGARCH_VP9DSP_LOONGARCH_H | ||
#define AVCODEC_LOONGARCH_VP9DSP_LOONGARCH_H | ||
|
||
#define VP9_8TAP_LOONGARCH_LSX_FUNC(SIZE, type, type_idx) \ | ||
void ff_put_8tap_##type##_##SIZE##h_lsx(uint8_t *dst, ptrdiff_t dststride, \ | ||
const uint8_t *src, \ | ||
ptrdiff_t srcstride, \ | ||
int h, int mx, int my); \ | ||
\ | ||
void ff_put_8tap_##type##_##SIZE##v_lsx(uint8_t *dst, ptrdiff_t dststride, \ | ||
const uint8_t *src, \ | ||
ptrdiff_t srcstride, \ | ||
int h, int mx, int my); \ | ||
\ | ||
void ff_put_8tap_##type##_##SIZE##hv_lsx(uint8_t *dst, ptrdiff_t dststride, \ | ||
const uint8_t *src, \ | ||
ptrdiff_t srcstride, \ | ||
int h, int mx, int my); \ | ||
\ | ||
void ff_avg_8tap_##type##_##SIZE##h_lsx(uint8_t *dst, ptrdiff_t dststride, \ | ||
const uint8_t *src, \ | ||
ptrdiff_t srcstride, \ | ||
int h, int mx, int my); \ | ||
\ | ||
void ff_avg_8tap_##type##_##SIZE##v_lsx(uint8_t *dst, ptrdiff_t dststride, \ | ||
const uint8_t *src, \ | ||
ptrdiff_t srcstride, \ | ||
int h, int mx, int my); \ | ||
\ | ||
void ff_avg_8tap_##type##_##SIZE##hv_lsx(uint8_t *dst, ptrdiff_t dststride, \ | ||
const uint8_t *src, \ | ||
ptrdiff_t srcstride, \ | ||
int h, int mx, int my); | ||
|
||
#define VP9_COPY_LOONGARCH_LSX_FUNC(SIZE) \ | ||
void ff_copy##SIZE##_lsx(uint8_t *dst, ptrdiff_t dststride, \ | ||
const uint8_t *src, ptrdiff_t srcstride, \ | ||
int h, int mx, int my); \ | ||
\ | ||
void ff_avg##SIZE##_lsx(uint8_t *dst, ptrdiff_t dststride, \ | ||
const uint8_t *src, ptrdiff_t srcstride, \ | ||
int h, int mx, int my); | ||
|
||
VP9_8TAP_LOONGARCH_LSX_FUNC(64, regular, FILTER_8TAP_REGULAR); | ||
VP9_8TAP_LOONGARCH_LSX_FUNC(32, regular, FILTER_8TAP_REGULAR); | ||
VP9_8TAP_LOONGARCH_LSX_FUNC(16, regular, FILTER_8TAP_REGULAR); | ||
VP9_8TAP_LOONGARCH_LSX_FUNC(8, regular, FILTER_8TAP_REGULAR); | ||
VP9_8TAP_LOONGARCH_LSX_FUNC(4, regular, FILTER_8TAP_REGULAR); | ||
|
||
VP9_8TAP_LOONGARCH_LSX_FUNC(64, sharp, FILTER_8TAP_SHARP); | ||
VP9_8TAP_LOONGARCH_LSX_FUNC(32, sharp, FILTER_8TAP_SHARP); | ||
VP9_8TAP_LOONGARCH_LSX_FUNC(16, sharp, FILTER_8TAP_SHARP); | ||
VP9_8TAP_LOONGARCH_LSX_FUNC(8, sharp, FILTER_8TAP_SHARP); | ||
VP9_8TAP_LOONGARCH_LSX_FUNC(4, sharp, FILTER_8TAP_SHARP); | ||
|
||
VP9_8TAP_LOONGARCH_LSX_FUNC(64, smooth, FILTER_8TAP_SMOOTH); | ||
VP9_8TAP_LOONGARCH_LSX_FUNC(32, smooth, FILTER_8TAP_SMOOTH); | ||
VP9_8TAP_LOONGARCH_LSX_FUNC(16, smooth, FILTER_8TAP_SMOOTH); | ||
VP9_8TAP_LOONGARCH_LSX_FUNC(8, smooth, FILTER_8TAP_SMOOTH); | ||
VP9_8TAP_LOONGARCH_LSX_FUNC(4, smooth, FILTER_8TAP_SMOOTH); | ||
|
||
VP9_COPY_LOONGARCH_LSX_FUNC(64); | ||
VP9_COPY_LOONGARCH_LSX_FUNC(32); | ||
VP9_COPY_LOONGARCH_LSX_FUNC(16); | ||
VP9_COPY_LOONGARCH_LSX_FUNC(8); | ||
|
||
#undef VP9_8TAP_LOONGARCH_LSX_FUNC | ||
#undef VP9_COPY_LOONGARCH_LSX_FUNC | ||
|
||
void ff_vert_16x16_lsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *left, | ||
const uint8_t *top); | ||
void ff_vert_32x32_lsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *left, | ||
const uint8_t *top); | ||
void ff_hor_16x16_lsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *left, | ||
const uint8_t *top); | ||
void ff_hor_32x32_lsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *left, | ||
const uint8_t *top); | ||
void ff_dc_4x4_lsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *left, | ||
const uint8_t *top); | ||
void ff_dc_8x8_lsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *left, | ||
const uint8_t *top); | ||
void ff_dc_16x16_lsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *left, | ||
const uint8_t *top); | ||
void ff_dc_32x32_lsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *left, | ||
const uint8_t *top); | ||
void ff_dc_left_4x4_lsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *left, | ||
const uint8_t *top); | ||
void ff_dc_left_8x8_lsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *left, | ||
const uint8_t *top); | ||
void ff_dc_left_16x16_lsx(uint8_t *dst, ptrdiff_t stride, | ||
const uint8_t *left, const uint8_t *top); | ||
void ff_dc_left_32x32_lsx(uint8_t *dst, ptrdiff_t stride, | ||
const uint8_t *left, const uint8_t *top); | ||
void ff_dc_top_4x4_lsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *left, | ||
const uint8_t *top); | ||
void ff_dc_top_8x8_lsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *left, | ||
const uint8_t *top); | ||
void ff_dc_top_16x16_lsx(uint8_t *dst, ptrdiff_t stride, | ||
const uint8_t *left, const uint8_t *top); | ||
void ff_dc_top_32x32_lsx(uint8_t *dst, ptrdiff_t stride, | ||
const uint8_t *left, const uint8_t *top); | ||
void ff_dc_128_16x16_lsx(uint8_t *dst, ptrdiff_t stride, | ||
const uint8_t *left, const uint8_t *top); | ||
void ff_dc_128_32x32_lsx(uint8_t *dst, ptrdiff_t stride, | ||
const uint8_t *left, const uint8_t *top); | ||
void ff_dc_127_16x16_lsx(uint8_t *dst, ptrdiff_t stride, | ||
const uint8_t *left, const uint8_t *top); | ||
void ff_dc_127_32x32_lsx(uint8_t *dst, ptrdiff_t stride, | ||
const uint8_t *left, const uint8_t *top); | ||
void ff_dc_129_16x16_lsx(uint8_t *dst, ptrdiff_t stride, | ||
const uint8_t *left, const uint8_t *top); | ||
void ff_dc_129_32x32_lsx(uint8_t *dst, ptrdiff_t stride, | ||
const uint8_t *left, const uint8_t *top); | ||
void ff_tm_4x4_lsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *left, | ||
const uint8_t *top); | ||
void ff_tm_8x8_lsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *left, | ||
const uint8_t *top); | ||
void ff_tm_16x16_lsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *left, | ||
const uint8_t *top); | ||
void ff_tm_32x32_lsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *left, | ||
const uint8_t *top); | ||
|
||
#endif /* AVCODEC_LOONGARCH_VP9DSP_LOONGARCH_H */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters