618 files changed, 21190 insertions, 12289 deletions
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 784b7bdf73..0000000000
--- a/.travis.yml
+++ /dev/null
@@ -1,30 +0,0 @@
-language: c
-sudo: false
-os:
-  - linux
-  - osx
-addons:
-  apt:
-    packages:
-      - nasm
-      - diffutils
-compiler:
-  - clang
-  - gcc
-matrix:
-    exclude:
-        - os: osx
-          compiler: gcc
-cache:
-  directories:
-    - ffmpeg-samples
-before_install:
-  - if [ "$TRAVIS_OS_NAME" == "osx" ]; then brew update; fi
-install:
-  - if [ "$TRAVIS_OS_NAME" == "osx" ]; then brew install nasm; fi
-script:
-  - mkdir -p ffmpeg-samples
-  - ./configure --samples=ffmpeg-samples --cc=$CC
-  - make -j 8
-  - make fate-rsync
-  - make check -j 8
diff --git a/Changelog b/Changelog
index 18e83b99a1..12770e4296 100644
--- a/Changelog
+++ b/Changelog
@@ -4,6 +4,13 @@ releases are sorted from youngest to oldest.
 version <next>:
 - Raw Captions with Time (RCWT) closed caption demuxer
 - LC3/LC3plus decoding/encoding using external library liblc3
+- ffmpeg CLI filtergraph chaining
+- LC3/LC3plus demuxer and muxer
+- pad_vaapi, drawbox_vaapi filters
+- vf_scale supports secondary ref input and framesync options
+- vf_scale2ref deprecated
+- qsv_params option added for QSV encoders
+- VVC decoder compatible with DVB test content
 
 
 version 7.0:
diff --git a/MAINTAINERS b/MAINTAINERS
index dd633f37e8..41a98744ad 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -488,6 +488,7 @@ Resamplers:
 Operating systems / CPU architectures
 =====================================
 
+*BSD                                    Brad Smith
 Alpha                                   Falk Hueffner
 MIPS                                    Manojkumar Bhosale, Shiyou Yin
 LoongArch                               Shiyou Yin
diff --git a/Makefile b/Makefile
index b309dbc4db..4c3af09fec 100644
--- a/Makefile
+++ b/Makefile
@@ -52,6 +52,9 @@ $(TOOLS): %$(EXESUF): %.o
 target_dec_%_fuzzer$(EXESUF): target_dec_%_fuzzer.o $(FF_DEP_LIBS)
 	$(LD) $(LDFLAGS) $(LDEXEFLAGS) $(LD_O) $^ $(ELIBS) $(FF_EXTRALIBS) $(LIBFUZZER_PATH)
 
+target_enc_%_fuzzer$(EXESUF): target_enc_%_fuzzer.o $(FF_DEP_LIBS)
+	$(LD) $(LDFLAGS) $(LDEXEFLAGS) $(LD_O) $^ $(ELIBS) $(FF_EXTRALIBS) $(LIBFUZZER_PATH)
+
 tools/target_bsf_%_fuzzer$(EXESUF): tools/target_bsf_%_fuzzer.o $(FF_DEP_LIBS)
 	$(LD) $(LDFLAGS) $(LDEXEFLAGS) $(LD_O) $^ $(ELIBS) $(FF_EXTRALIBS) $(LIBFUZZER_PATH)
 
@@ -98,7 +101,7 @@ SUBDIR_VARS := CLEANFILES FFLIBS HOSTPROGS TESTPROGS TOOLS               \
                ARMV5TE-OBJS ARMV6-OBJS ARMV8-OBJS VFP-OBJS NEON-OBJS     \
                ALTIVEC-OBJS VSX-OBJS MMX-OBJS X86ASM-OBJS                \
                MIPSFPU-OBJS MIPSDSPR2-OBJS MIPSDSP-OBJS MSA-OBJS         \
-               MMI-OBJS LSX-OBJS LASX-OBJS RV-OBJS RVV-OBJS              \
+               MMI-OBJS LSX-OBJS LASX-OBJS RV-OBJS RVV-OBJS RVVB-OBJS    \
                OBJS SLIBOBJS SHLIBOBJS STLIBOBJS HOSTOBJS TESTOBJS
 
 define RESET
diff --git a/configure b/configure
index 493fe3cffd..b16722d83d 100755
--- a/configure
+++ b/configure
@@ -2222,6 +2222,7 @@ ARCH_EXT_LIST_PPC="
 ARCH_EXT_LIST_RISCV="
     rv
     rvv
+    rv_zvbb
 "
 
 ARCH_EXT_LIST_X86="
@@ -2272,6 +2273,7 @@ HAVE_LIST_PUB="
 
 HEADERS_LIST="
     arpa_inet_h
+    asm_hwprobe_h
     asm_types_h
     cdio_paranoia_h
     cdio_paranoia_paranoia_h
@@ -2298,6 +2300,7 @@ HEADERS_LIST="
     OpenGL_gl3_h
     poll_h
     pthread_np_h
+    sys_hwprobe_h
     sys_param_h
     sys_resource_h
     sys_select_h
@@ -2489,6 +2492,7 @@ TYPES_LIST="
     struct_sockaddr_storage
     struct_stat_st_mtim_tv_nsec
     struct_v4l2_frmivalenum_discrete
+    struct_mfxConfigInterface
 "
 
 HAVE_LIST="
@@ -2550,7 +2554,8 @@ CONFIG_EXTRA="
     deflate_wrapper
     dirac_parse
     dnn
-    dovi_rpu
+    dovi_rpudec
+    dovi_rpuenc
     dvprofile
     evcparse
     exif
@@ -2582,6 +2587,7 @@ CONFIG_EXTRA="
     inflate_wrapper
     intrax8
     iso_media
+    iso_writer
     ividsp
     jpegtables
     lgplv3
@@ -2757,6 +2763,7 @@ power8_deps="vsx"
 
 rv_deps="riscv"
 rvv_deps="rv"
+rv_zvbb_deps="rvv"
 
 loongson2_deps="mips"
 loongson3_deps="mips"
@@ -2841,7 +2848,8 @@ cbs_vp8_select="cbs"
 cbs_vp9_select="cbs"
 deflate_wrapper_deps="zlib"
 dirac_parse_select="golomb"
-dovi_rpu_select="golomb"
+dovi_rpudec_select="golomb"
+dovi_rpuenc_select="golomb"
 dnn_suggest="libtensorflow libopenvino libtorch"
 dnn_deps="avformat swscale"
 error_resilience_select="me_cmp"
@@ -2851,6 +2859,7 @@ faandct_select="fdctdsp"
 faanidct_deps="faan"
 faanidct_select="idctdsp"
 h264dsp_select="startcode"
+h264parse_select="golomb"
 h264_sei_select="atsc_a53 golomb"
 hevcparse_select="golomb"
 hevc_sei_select="atsc_a53 golomb"
@@ -2870,6 +2879,7 @@ mpegvideoenc_select="aandcttables fdctdsp me_cmp mpegvideo pixblockdsp"
 msmpeg4dec_select="h263_decoder"
 msmpeg4enc_select="h263_encoder"
 vc1dsp_select="h264chroma qpeldsp startcode"
+wmv2dsp_select="qpeldsp"
 
 # decoders / encoders
 aac_decoder_select="adts_header mpeg4audio sinewin"
@@ -2901,7 +2911,9 @@ asv1_encoder_select="aandcttables bswapdsp fdctdsp pixblockdsp"
 asv2_decoder_select="blockdsp bswapdsp idctdsp"
 asv2_encoder_select="aandcttables bswapdsp fdctdsp pixblockdsp"
 atrac1_decoder_select="sinewin"
-av1_decoder_select="atsc_a53 cbs_av1 dovi_rpu"
+atrac3p_decoder_select="sinewin"
+atrac3pal_decoder_select="sinewin"
+av1_decoder_select="atsc_a53 cbs_av1 dovi_rpudec"
 bink_decoder_select="blockdsp hpeldsp"
 binkaudio_dct_decoder_select="wma_freqs"
 binkaudio_rdft_decoder_select="wma_freqs"
@@ -2913,7 +2925,7 @@ cook_decoder_select="audiodsp sinewin"
 cri_decoder_select="mjpeg_decoder"
 cscd_decoder_suggest="zlib"
 dds_decoder_select="texturedsp"
-dirac_decoder_select="dirac_parse dwt golomb videodsp mpegvideoenc"
+dirac_decoder_select="dirac_parse dwt golomb mpegvideoenc qpeldsp videodsp"
 dnxhd_decoder_select="blockdsp idctdsp"
 dnxhd_encoder_select="blockdsp fdctdsp idctdsp mpegvideoenc pixblockdsp videodsp"
 dvvideo_decoder_select="dvprofile idctdsp"
@@ -2942,6 +2954,7 @@ flv_decoder_select="h263_decoder"
 flv_encoder_select="h263_encoder"
 fourxm_decoder_select="blockdsp bswapdsp"
 fraps_decoder_select="bswapdsp huffman"
+ftr_decoder_select="adts_header"
 g2m_decoder_deps="zlib"
 g2m_decoder_select="blockdsp idctdsp jpegtables"
 g729_decoder_select="audiodsp"
@@ -2957,7 +2970,7 @@ h264_decoder_suggest="error_resilience"
 hap_decoder_select="snappy texturedsp"
 hap_encoder_deps="libsnappy"
 hap_encoder_select="texturedspenc"
-hevc_decoder_select="bswapdsp cabac dovi_rpu golomb hevcparse hevc_sei videodsp"
+hevc_decoder_select="bswapdsp cabac dovi_rpudec golomb hevcparse hevc_sei videodsp"
 huffyuv_decoder_select="bswapdsp huffyuvdsp llviddsp"
 huffyuv_encoder_select="bswapdsp huffman huffyuvencdsp llvidencdsp"
 hymt_decoder_select="huffyuv_decoder"
@@ -3027,7 +3040,7 @@ notchlc_decoder_select="lzf"
 nuv_decoder_select="idctdsp"
 opus_decoder_deps="swresample"
 opus_encoder_select="audio_frame_queue"
-pdv_decoder_deps="zlib"
+pdv_decoder_select="inflate_wrapper"
 png_decoder_select="inflate_wrapper"
 png_encoder_select="deflate_wrapper llvidencdsp"
 prores_decoder_select="blockdsp idctdsp"
@@ -3131,7 +3144,7 @@ d3d11va_deps="dxva_h ID3D11VideoDecoder ID3D11VideoContext"
 d3d12va_deps="dxva_h ID3D12Device ID3D12VideoDecoder"
 dxva2_deps="dxva2api_h DXVA2_ConfigPictureDecode ole32 user32"
 ffnvcodec_deps_any="libdl LoadLibrary"
-mediacodec_deps="android"
+mediacodec_deps="android mediandk"
 nvdec_deps="ffnvcodec"
 vaapi_x11_deps="xlib_x11"
 videotoolbox_hwaccel_deps="videotoolbox pthreads"
@@ -3313,6 +3326,7 @@ ac3_mf_encoder_deps="mediafoundation"
 av1_cuvid_decoder_deps="cuvid CUVIDAV1PICPARAMS"
 av1_mediacodec_decoder_deps="mediacodec"
 av1_mediacodec_encoder_deps="mediacodec"
+av1_mediacodec_encoder_select="extract_extradata_bsf"
 av1_nvenc_encoder_deps="nvenc NV_ENC_PIC_PARAMS_AV1"
 av1_nvenc_encoder_select="atsc_a53"
 h263_v4l2m2m_decoder_deps="v4l2_m2m h263_v4l2_m2m"
@@ -3323,7 +3337,7 @@ h264_cuvid_decoder_select="h264_mp4toannexb_bsf"
 h264_mediacodec_decoder_deps="mediacodec"
 h264_mediacodec_decoder_select="h264_mp4toannexb_bsf h264_parser"
 h264_mediacodec_encoder_deps="mediacodec"
-h264_mediacodec_encoder_select="h264_metadata"
+h264_mediacodec_encoder_select="extract_extradata_bsf h264_metadata"
 h264_mf_encoder_deps="mediafoundation"
 h264_mmal_decoder_deps="mmal"
 h264_nvenc_encoder_deps="nvenc"
@@ -3343,7 +3357,7 @@ hevc_cuvid_decoder_select="hevc_mp4toannexb_bsf"
 hevc_mediacodec_decoder_deps="mediacodec"
 hevc_mediacodec_decoder_select="hevc_mp4toannexb_bsf hevc_parser"
 hevc_mediacodec_encoder_deps="mediacodec"
-hevc_mediacodec_encoder_select="hevc_metadata"
+hevc_mediacodec_encoder_select="extract_extradata_bsf hevc_metadata"
 hevc_mf_encoder_deps="mediafoundation"
 hevc_nvenc_encoder_deps="nvenc"
 hevc_nvenc_encoder_select="atsc_a53"
@@ -3375,6 +3389,7 @@ mpeg2_v4l2m2m_decoder_deps="v4l2_m2m mpeg2_v4l2_m2m"
 mpeg4_cuvid_decoder_deps="cuvid"
 mpeg4_mediacodec_decoder_deps="mediacodec"
 mpeg4_mediacodec_encoder_deps="mediacodec"
+mpeg4_mediacodec_encoder_select="extract_extradata_bsf"
 mpeg4_mmal_decoder_deps="mmal"
 mpeg4_omx_encoder_deps="omx"
 mpeg4_v4l2m2m_decoder_deps="v4l2_m2m mpeg4_v4l2_m2m"
@@ -3413,6 +3428,7 @@ av1_vaapi_encoder_select="cbs_av1 vaapi_encode"
 aac_parser_select="adts_header mpeg4audio"
 av1_parser_select="cbs_av1"
 evc_parser_select="evcparse"
+ftr_parser_select="adts_header mpeg4audio"
 h264_parser_select="golomb h264dsp h264parse h264_sei"
 hevc_parser_select="hevcparse hevc_sei"
 mpegaudio_parser_select="mpegaudioheader"
@@ -3483,14 +3499,14 @@ prores_videotoolbox_encoder_deps="pthreads"
 prores_videotoolbox_encoder_select="videotoolbox_encoder"
 libaom_av1_decoder_deps="libaom"
 libaom_av1_encoder_deps="libaom"
-libaom_av1_encoder_select="extract_extradata_bsf"
+libaom_av1_encoder_select="extract_extradata_bsf dovi_rpuenc"
 libaribb24_decoder_deps="libaribb24"
 libaribcaption_decoder_deps="libaribcaption"
 libcelt_decoder_deps="libcelt"
 libcodec2_decoder_deps="libcodec2"
 libcodec2_encoder_deps="libcodec2"
 libdav1d_decoder_deps="libdav1d"
-libdav1d_decoder_select="atsc_a53 dovi_rpu"
+libdav1d_decoder_select="atsc_a53 dovi_rpudec"
 libdavs2_decoder_deps="libdavs2"
 libdavs2_decoder_select="avs2_parser"
 libfdk_aac_decoder_deps="libfdk_aac"
@@ -3532,6 +3548,7 @@ libspeex_decoder_deps="libspeex"
 libspeex_encoder_deps="libspeex"
 libspeex_encoder_select="audio_frame_queue"
 libsvtav1_encoder_deps="libsvtav1"
+libsvtav1_encoder_select="dovi_rpuenc"
 libtheora_encoder_deps="libtheora"
 libtwolame_encoder_deps="libtwolame"
 libuavs3d_decoder_deps="libuavs3d"
@@ -3551,7 +3568,7 @@ libx264_encoder_select="atsc_a53 golomb"
 libx264rgb_encoder_deps="libx264"
 libx264rgb_encoder_select="libx264_encoder"
 libx265_encoder_deps="libx265"
-libx265_encoder_select="atsc_a53"
+libx265_encoder_select="atsc_a53 dovi_rpuenc"
 libxavs_encoder_deps="libxavs"
 libxavs2_encoder_deps="libxavs2"
 libxevd_decoder_deps="libxevd"
@@ -3595,7 +3612,7 @@ evc_demuxer_select="evc_frame_merge_bsf evc_parser"
 f4v_muxer_select="mov_muxer"
 fifo_muxer_deps="threads"
 flac_demuxer_select="flac_parser"
-flv_muxer_select="aac_adtstoasc_bsf"
+flv_muxer_select="aac_adtstoasc_bsf iso_writer"
 gxf_muxer_select="pcm_rechunk_bsf"
 hds_muxer_select="flv_muxer"
 hls_demuxer_select="aac_demuxer ac3_demuxer adts_header ac3_parser eac3_demuxer mov_demuxer mpegts_demuxer"
@@ -3613,12 +3630,12 @@ latm_muxer_select="aac_adtstoasc_bsf mpeg4audio"
 matroska_audio_muxer_select="matroska_muxer"
 matroska_demuxer_select="riffdec"
 matroska_demuxer_suggest="bzlib zlib"
-matroska_muxer_select="mpeg4audio riffenc aac_adtstoasc_bsf pgs_frame_merge_bsf vp9_superframe_bsf"
+matroska_muxer_select="iso_writer mpeg4audio riffenc aac_adtstoasc_bsf pgs_frame_merge_bsf vp9_superframe_bsf"
 mlp_demuxer_select="mlp_parser"
 mmf_muxer_select="riffenc"
 mov_demuxer_select="iso_media riffdec"
 mov_demuxer_suggest="iamfdec zlib"
-mov_muxer_select="iso_media riffenc rtpenc_chain vp9_superframe_bsf aac_adtstoasc_bsf ac3_parser"
+mov_muxer_select="iso_media iso_writer riffenc rtpenc_chain vp9_superframe_bsf aac_adtstoasc_bsf ac3_parser"
 mov_muxer_suggest="iamfenc"
 mp3_demuxer_select="mpegaudio_parser"
 mp3_muxer_select="mpegaudioheader"
@@ -3626,7 +3643,7 @@ mp4_muxer_select="mov_muxer"
 mpegts_demuxer_select="iso_media"
 mpegts_muxer_select="ac3_parser adts_muxer latm_muxer h264_mp4toannexb_bsf hevc_mp4toannexb_bsf vvc_mp4toannexb_bsf"
 mpegtsraw_demuxer_select="mpegts_demuxer"
-mxf_muxer_select="pcm_rechunk_bsf rangecoder"
+mxf_muxer_select="iso_writer pcm_rechunk_bsf rangecoder"
 mxf_d10_muxer_select="mxf_muxer"
 mxf_opatom_muxer_select="mxf_muxer"
 nut_muxer_select="riffenc"
@@ -3639,6 +3656,7 @@ ogv_muxer_select="ogg_muxer"
 opus_muxer_select="ogg_muxer"
 psp_muxer_select="mov_muxer"
 rtp_demuxer_select="sdp_demuxer"
+rtp_muxer_select="iso_writer"
 rtp_mpegts_muxer_select="mpegts_muxer rtp_muxer"
 rtpdec_select="asf_demuxer mov_demuxer mpegts_demuxer rm_demuxer rtp_protocol srtp"
 rtsp_demuxer_select="http_protocol rtpdec"
@@ -3850,6 +3868,7 @@ histeq_filter_deps="gpl"
 hqdn3d_filter_deps="gpl"
 iccdetect_filter_deps="lcms2"
 iccgen_filter_deps="lcms2"
+identity_filter_select="scene_sad"
 interlace_filter_deps="gpl"
 kerndeint_filter_deps="gpl"
 ladspa_filter_deps="ladspa libdl"
@@ -3956,6 +3975,8 @@ vstack_qsv_filter_deps="libmfx"
 vstack_qsv_filter_select="qsvvpp"
 xstack_qsv_filter_deps="libmfx"
 xstack_qsv_filter_select="qsvvpp"
+pad_vaapi_filter_deps="vaapi_1"
+drawbox_vaapi_filter_deps="vaapi_1"
 
 # examples
 avio_http_serve_files_deps="avformat avutil fork"
@@ -4737,7 +4758,7 @@ chmod +x $TMPE
 
 # make sure we can execute files in $TMPDIR
 cat > $TMPSH 2>> $logfile <<EOF
-#! /bin/sh
+#!/bin/sh
 EOF
 chmod +x $TMPSH >> $logfile 2>&1
 if ! $TMPSH >> $logfile 2>&1; then
@@ -5029,7 +5050,12 @@ probe_cc(){
         else
             _ident=$($_cc --version 2>/dev/null | head -n1 | tr -d '\r')
         fi
-        _DEPCMD='$(DEP$(1)) $(DEP$(1)FLAGS) $($(1)DEP_FLAGS) $< 2>&1 | awk '\''/including/ { sub(/^.*file: */, ""); gsub(/\\/, "/"); if (!match($$0, / /)) print "$@:", $$0 }'\'' > $(@:.o=.d)'
+        if [ -x "$(command -v wslpath)" ]; then
+            _DEPCMD='$(DEP$(1)) $(DEP$(1)FLAGS) $($(1)DEP_FLAGS) $< 2>&1 | awk '\''/including/ { sub(/^.*file: */, ""); if (!match($$0, / /)) { print $$0 } }'\'' | xargs -r -d\\n -n1 wslpath -u | awk '\''BEGIN { printf "%s:", "$@" }; { sub(/\r/,""); printf " %s", $$0 }; END { print "" }'\'' > $(@:.o=.d)'
+
+        else
+            _DEPCMD='$(DEP$(1)) $(DEP$(1)FLAGS) $($(1)DEP_FLAGS) $< 2>&1 | awk '\''/including/ { sub(/^.*file: */, ""); gsub(/\\/, "/"); if (!match($$0, / /)) print "$@:", $$0 }'\'' > $(@:.o=.d)'
+        fi
         _DEPFLAGS='$(CPPFLAGS) $(CFLAGS) -showIncludes -Zs'
         _cflags_speed="-O2"
         _cflags_size="-O1"
@@ -5525,6 +5551,9 @@ elif enabled ppc; then
 
 elif enabled riscv; then
 
+    check_headers asm/hwprobe.h
+    check_headers sys/hwprobe.h
+
     if test_cpp_condition stddef.h "__riscv_zbb"; then
         enable fast_clz
     fi
@@ -6120,11 +6149,7 @@ extern_prefix=${sym%%ff_extern*}
 
 check_cc pragma_deprecated "" '_Pragma("GCC diagnostic push") _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")'
 
-# The global variable ensures the bits appear unchanged in the object file.
-test_cc <<EOF || die "endian test failed"
-unsigned int endian = 'B' << 24 | 'I' << 16 | 'G' << 8 | 'E';
-EOF
-od -t x1 $TMPO | grep -q '42 *49 *47 *45' && enable bigendian
+test_cpp_condition stdlib.h "defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)" && enable bigendian
 
 check_cc const_nan math.h "struct { double d; } static const bar[] = { { NAN } }"
 
@@ -6367,6 +6392,7 @@ elif enabled riscv; then
 
     enabled rv && check_inline_asm rv '".option arch, +zbb\nrev8 t0, t1"'
     enabled rvv && check_inline_asm rvv '".option arch, +v\nvsetivli zero, 0, e8, m1, ta, ma"'
+    enabled rv_zvbb && check_inline_asm rv_zvbb '".option arch, +zvbb\nvclz.v v0, v8"'
 
 elif enabled x86; then
 
@@ -6644,7 +6670,7 @@ check_lib shell32  "windows.h shellapi.h" CommandLineToArgvW   -lshell32
 check_lib psapi    "windows.h psapi.h"    GetProcessMemoryInfo -lpsapi
 
 check_lib android android/native_window.h ANativeWindow_acquire -landroid
-check_lib mediandk "stdint.h media/NdkImage.h" AImage_delete -lmediandk
+check_lib mediandk "stdint.h media/NdkMediaFormat.h" AMediaFormat_new -lmediandk
 check_lib camera2ndk "stdbool.h stdint.h camera/NdkCameraManager.h" ACameraManager_create -lcamera2ndk
 
 enabled appkit       && check_apple_framework AppKit
@@ -6834,7 +6860,7 @@ enabled gnutls            && require_pkg_config gnutls gnutls gnutls/gnutls.h gn
 enabled jni               && { [ $target_os = "android" ] && check_headers jni.h && enabled pthreads || die "ERROR: jni not found"; }
 enabled ladspa            && require_headers "ladspa.h dlfcn.h"
 enabled lcms2             && require_pkg_config lcms2 "lcms2 >= 2.13" lcms2.h cmsCreateContext
-enabled libaom            && require_pkg_config libaom "aom >= 1.0.0" aom/aom_codec.h aom_codec_version
+enabled libaom            && require_pkg_config libaom "aom >= 2.0.0" aom/aom_codec.h aom_codec_version
 enabled libaribb24        && { check_pkg_config libaribb24 "aribb24 > 1.0.3" "aribb24/aribb24.h" arib_instance_new ||
                                { enabled gpl && require_pkg_config libaribb24 aribb24 "aribb24/aribb24.h" arib_instance_new; } ||
                                die "ERROR: libaribb24 requires version higher than 1.0.3 or --enable-gpl."; }
@@ -6910,6 +6936,7 @@ elif enabled libvpl; then
     check_pkg_config libmfx "vpl >= 2.6" "mfxvideo.h mfxdispatcher.h" MFXLoad || \
             die "ERROR: libvpl >= 2.6 not found"
     add_cflags -DMFX_DEPRECATED_OFF
+    check_type "vpl/mfxdefs.h vpl/mfxvideo.h" "struct mfxConfigInterface"
 fi
 
 if enabled libmfx; then
@@ -7007,7 +7034,7 @@ enabled libwebp           && {
     enabled libwebp_encoder      && require_pkg_config libwebp "libwebp >= 0.2.0" webp/encode.h WebPGetEncoderVersion
     enabled libwebp_anim_encoder && check_pkg_config libwebp_anim_encoder "libwebpmux >= 0.4.0" webp/mux.h WebPAnimEncoderOptionsInit; }
 enabled libx264           && require_pkg_config libx264 x264 "stdint.h x264.h" x264_encoder_encode &&
-                             require_cpp_condition libx264 x264.h "X264_BUILD >= 122" && {
+                             require_cpp_condition libx264 x264.h "X264_BUILD >= 155" && {
                              [ "$toolchain" != "msvc" ] ||
                              require_cpp_condition libx264 x264.h "X264_BUILD >= 158"; } &&
                              check_cpp_condition libx264_hdr10 x264.h "X264_BUILD >= 163" &&
diff --git a/doc/APIchanges b/doc/APIchanges
index 0a39b6d7ab..269fd36559 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -2,6 +2,29 @@ The last version increases of all libraries were on 2024-03-07
 
 API changes, most recent first:
 
+2024-05-xx - xxxxxxxxxx - lavu 59.19.100 - hwcontext_qsv.h
+  Add AVQSVFramesContext.info
+
+2024-05-10 - xxxxxxxxx - lavu 59.18.100 - cpu.h
+  Add AV_CPU_FLAG_RV_ZVBB.
+
+2024-05-04 - xxxxxxxxxx - lavu 59.17.100 - opt.h
+  Add AV_OPT_TYPE_UINT and av_opt_eval_uint().
+
+2024-04-24 - 8616cfe0890 - lavu 59.16.100 - opt.h
+  Add AV_OPT_SERIALIZE_SEARCH_CHILDREN.
+
+2024-04-11 - xxxxxxxxxx - lavc 61.5.102 - avcodec.h
+  AVCodecContext.decoded_side_data may now be set by libavcodec after
+  calling avcodec_open2().
+
+2024-04-11 - xxxxxxxxxx - lavu 59.15.100 - frame.h
+  Add av_mastering_display_metadata_alloc_size().
+
+2024-04-11 - xxxxxxxxxx - lavu 59.14.100 - frame.h
+  Add av_frame_side_data_add() and av_frame_side_data_remove().
+  Add AV_FRAME_SIDE_DATA_FLAG_REPLACE.
+
 2024-04-03 - xxxxxxxxxx - lavu 59.13.100 - pixfmt.h
   Add AVCOL_SPC_IPT_C2, AVCOL_SPC_YCGCO_RE and AVCOL_SPC_YCGCO_RO
   to map new matrix coefficients defined by H.273 v3.
diff --git a/doc/Makefile b/doc/Makefile
index 67586e4b74..98d29f1c66 100644
--- a/doc/Makefile
+++ b/doc/Makefile
@@ -60,7 +60,7 @@ GENTEXI := $(GENTEXI:%=doc/avoptions_%.texi)
 
 $(GENTEXI): TAG = GENTEXI
 $(GENTEXI): doc/avoptions_%.texi: doc/print_options$(HOSTEXESUF)
-	$(M)doc/print_options $* > $@
+	$(M)doc/print_options$(HOSTEXESUF) $* > $@
 
 doc/%.html: TAG = HTML
 doc/%-all.html: TAG = HTML
diff --git a/doc/encoders.texi b/doc/encoders.texi
index 66847191e1..c82f316f94 100644
--- a/doc/encoders.texi
+++ b/doc/encoders.texi
@@ -144,8 +144,7 @@ If this option is unspecified it is set to @samp{aac_low}.
 
 AC-3 audio encoders.
 
-These encoders implement part of ATSC A/52:2010 and ETSI TS 102 366, as well as
-the undocumented RealAudio 3 (a.k.a. dnet).
+These encoders implement part of ATSC A/52:2010 and ETSI TS 102 366.
 
 The @var{ac3} encoder uses floating-point math, while the @var{ac3_fixed}
 encoder only uses fixed-point integer math. This does not mean that one is
@@ -3102,9 +3101,6 @@ Enable high quality AC prediction.
 @item gray
 Only encode grayscale.
 
-@item gmc
-Enable the use of global motion compensation (GMC).
-
 @item qpel
 Enable quarter-pixel motion compensation.
 
@@ -3116,7 +3112,9 @@ Place global headers in extradata instead of every keyframe.
 
 @end table
 
-@item trellis
+@item gmc
+Enable the use of global motion compensation (GMC).  Default is 0
+(disabled).
 
 @item me_quality
 Set motion estimation quality level. Possible values in decreasing order of
@@ -3171,6 +3169,9 @@ be better than any of the two specified individually. In other
 words, the resulting quality will be the worse one of the two
 effects.
 
+@item trellis
+Set rate-distortion optimal quantization.
+
 @item ssim
 Set structural similarity (SSIM) displaying method. Possible values:
 
@@ -3545,6 +3546,20 @@ Change these value to reset qsv codec's bitrate control configuration.
 @item @var{pic_timing_sei}
 Supported in h264_qsv and hevc_qsv.
 Change this value to reset qsv codec's pic_timing_sei configuration.
+
+@item @var{qsv_params}
+Set QSV encoder parameters as a colon-separated list of key-value pairs.
+
+The @option{qsv_params} should be formatted as @code{key1=value1:key2=value2:...}.
+
+These parameters are passed directly to the underlying Intel Quick Sync Video (QSV) encoder using the MFXSetParameter function.
+
+Example:
+@example
+ffmpeg -i input.mp4 -c:v h264_qsv -qsv_params "CodingOption1=1:CodingOption2=2" output.mp4
+@end example
+
+This option allows fine-grained control over various encoder-specific settings provided by the QSV encoder.
 @end table
 
 @subsection H264 options
diff --git a/doc/examples/demux_decode.c b/doc/examples/demux_decode.c
index f26611d8f4..64f5547bc4 100644
--- a/doc/examples/demux_decode.c
+++ b/doc/examples/demux_decode.c
@@ -138,11 +138,9 @@ static int decode_packet(AVCodecContext *dec, const AVPacket *pkt)
             ret = output_audio_frame(frame);
 
         av_frame_unref(frame);
-        if (ret < 0)
-            return ret;
     }
 
-    return 0;
+    return ret;
 }
 
 static int open_codec_context(int *stream_idx,
diff --git a/doc/examples/qsv_transcode.c b/doc/examples/qsv_transcode.c
index 8e7d2899f1..665a76af2e 100644
--- a/doc/examples/qsv_transcode.c
+++ b/doc/examples/qsv_transcode.c
@@ -76,8 +76,7 @@ static int str_to_dict(char* optstr, AVDictionary **opt)
         if (value == NULL)
             return AVERROR(EINVAL);
         av_dict_set(opt, key, value, 0);
-    } while(key != NULL);
-    return 0;
+    } while(1);
 }
 
 static int dynamic_set_parameter(AVCodecContext *avctx)
@@ -335,17 +334,15 @@ static int dec_enc(AVPacket *pkt, const AVCodec *enc_codec, char *optstr)
 
 fail:
         av_frame_free(&frame);
-        if (ret < 0)
-            return ret;
     }
-    return 0;
+    return ret;
 }
 
 int main(int argc, char **argv)
 {
     const AVCodec *enc_codec;
     int ret = 0;
-    AVPacket *dec_pkt;
+    AVPacket *dec_pkt = NULL;
 
     if (argc < 5 || (argc - 5) % 2) {
         av_log(NULL, AV_LOG_ERROR, "Usage: %s <input file> <encoder> <output file>"
diff --git a/doc/examples/vaapi_transcode.c b/doc/examples/vaapi_transcode.c
index 8367cb3040..e1b7a43883 100644
--- a/doc/examples/vaapi_transcode.c
+++ b/doc/examples/vaapi_transcode.c
@@ -215,10 +215,8 @@ static int dec_enc(AVPacket *pkt, const AVCodec *enc_codec)
 
 fail:
         av_frame_free(&frame);
-        if (ret < 0)
-            return ret;
     }
-    return 0;
+    return ret;
 }
 
 int main(int argc, char **argv)
diff --git a/doc/ffmpeg.texi b/doc/ffmpeg.texi
index 801c083705..da37e3ad37 100644
--- a/doc/ffmpeg.texi
+++ b/doc/ffmpeg.texi
@@ -663,10 +663,11 @@ Not all muxers support embedded thumbnails, and those who do, only support a few
 Creates a program with the specified @var{title}, @var{program_num} and adds the specified
 @var{stream}(s) to it.
 
-@item -stream_group type=@var{type}:st=@var{stream}[:st=@var{stream}][:stg=@var{stream_group}][:id=@var{stream_group_id}...] (@emph{output})
+@item -stream_group [map=@var{input_file_id}=@var{stream_group}][type=@var{type}:]st=@var{stream}[:st=@var{stream}][:stg=@var{stream_group}][:id=@var{stream_group_id}...] (@emph{output})
 
-Creates a stream group of the specified @var{type}, @var{stream_group_id} and adds the specified
-@var{stream}(s) and/or previously defined @var{stream_group}(s) to it.
+Creates a stream group of the specified @var{type} and @var{stream_group_id}, or by
+@var{map}ping an input group, adding the specified @var{stream}(s) and/or previously
+defined @var{stream_group}(s) to it.
 
 @var{type} can be one of the following:
 @table @option
@@ -863,6 +864,27 @@ all sub-mix element's @var{annotations}s
 
 @end table
 
+E.g. to create an scalable 5.1 IAMF file from several WAV input files
+@example
+ffmpeg -i front.wav -i back.wav -i center.wav -i lfe.wav
+-map 0:0 -map 1:0 -map 2:0 -map 3:0 -c:a opus
+-stream_group type=iamf_audio_element:id=1:st=0:st=1:st=2:st=3,
+demixing=parameter_id=998,
+recon_gain=parameter_id=101,
+layer=ch_layout=stereo,
+layer=ch_layout=5.1,
+-stream_group type=iamf_mix_presentation:id=2:stg=0:annotations=en-us=Mix_Presentation,
+submix=parameter_id=100:parameter_rate=48000|element=stg=0:parameter_id=100:annotations=en-us=Scalable_Submix|layout=sound_system=stereo|layout=sound_system=5.1
+-streamid 0:0 -streamid 1:1 -streamid 2:2 -streamid 3:3 output.iamf
+@end example
+
+To copy the two stream groups (Audio Element and Mix Presentation) from an input IAMF file with four
+streams into an mp4 output
+@example
+ffmpeg -i input.iamf -c:a copy -stream_group map=0=0:st=0:st=1:st=2:st=3 -stream_group map=0=1:stg=0
+-streamid 0:0 -streamid 1:1 -streamid 2:2 -streamid 3:3 output.mp4
+@end example
+
 @item -target @var{type} (@emph{output})
 Specify target file type (@code{vcd}, @code{svcd}, @code{dvd}, @code{dv},
 @code{dv50}). @var{type} may be prefixed with @code{pal-}, @code{ntsc-} or
@@ -2143,16 +2165,65 @@ Define a complex filtergraph, i.e. one with arbitrary number of inputs and/or
 outputs. For simple graphs -- those with one input and one output of the same
 type -- see the @option{-filter} options. @var{filtergraph} is a description of
 the filtergraph, as described in the ``Filtergraph syntax'' section of the
-ffmpeg-filters manual.
+ffmpeg-filters manual. This option may be specified multiple times - each use
+creates a new complex filtergraph.
 
-Input link labels must refer to either input streams or loopback decoders. For
-input streams, use the @code{[file_index:stream_specifier]} syntax (i.e. the
-same as @option{-map} uses). If @var{stream_specifier} matches multiple streams,
-the first one will be used.
+Inputs to a complex filtergraph may come from different source types,
+distinguished by the format of the corresponding link label:
+@itemize
+@item
+To connect an input stream, use @code{[file_index:stream_specifier]} (i.e. the
+same syntax as @option{-map}). If @var{stream_specifier} matches multiple
+streams, the first one will be used.
 
-For decoders, the link label must be [dec:@var{dec_idx}], where @var{dec_idx} is
+@item
+To connect a loopback decoder use [dec:@var{dec_idx}], where @var{dec_idx} is
 the index of the loopback decoder to be connected to given input.
 
+@item
+To connect an output from another complex filtergraph, use its link label. E.g
+the following example:
+
+@example
+ffmpeg -i input.mkv \
+  -filter_complex '[0:v]scale=size=hd1080,split=outputs=2[for_enc][orig_scaled]' \
+  -c:v libx264 -map '[for_enc]' output.mkv \
+  -dec 0:0 \
+  -filter_complex '[dec:0][orig_scaled]hstack[stacked]' \
+  -map '[stacked]' -c:v ffv1 comparison.mkv
+@end example
+
+reads an input video and
+@itemize
+@item
+(line 2) uses a complex filtergraph with one input and two outputs
+to scale the video to 1920x1080 and duplicate the result to both
+outputs;
+
+@item
+(line 3) encodes one scaled output with @code{libx264} and writes the result to
+@file{output.mkv};
+
+@item
+(line 4) decodes this encoded stream with a loopback decoder;
+
+@item
+(line 5) places the output of the loopback decoder (i.e. the
+@code{libx264}-encoded video) side by side with the scaled original input;
+
+@item
+(line 6) combined video is then losslessly encoded and written into
+@file{comparison.mkv}.
+
+@end itemize
+
+Note that the two filtergraphs cannot be combined into one, because then there
+would be a cycle in the transcoding pipeline (filtergraph output goes to
+encoding, from there to decoding, then back to the same graph), and such cycles
+are not allowed.
+
+@end itemize
+
 An unlabeled input will be connected to the first unused input stream of the
 matching type.
 
diff --git a/doc/filters.texi b/doc/filters.texi
index bfa8ccec8b..f5bf475d13 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -20989,8 +20989,8 @@ the next filter, the scale filter will convert the input to the
 requested format.
 
 @subsection Options
-The filter accepts the following options, or any of the options
-supported by the libswscale scaler.
+The filter accepts the following options, any of the options supported
+by the libswscale scaler, as well as any of the @ref{framesync} options.
 
 See @ref{scaler_options,,the ffmpeg-scaler manual,ffmpeg-scaler} for
 the complete list of scaler options.
@@ -21228,6 +21228,16 @@ The position (byte offset) of the frame in the input stream, or NaN if
 this information is unavailable and/or meaningless (for example in case of synthetic video).
 Only available with @code{eval=frame}.
 Deprecated, do not use.
+
+@item ref_w, rw
+@item ref_h, rh
+@item ref_a
+@item ref_dar, rdar
+@item ref_n
+@item ref_t
+@item ref_pos
+Eqvuialent to the above, but for a second reference input. If any of these
+variables are present, this filter accepts two inputs.
 @end table
 
 @subsection Examples
@@ -21329,6 +21339,20 @@ making sure the resulting resolution is even (required by some codecs):
 @example
 scale='trunc(ih*dar/2)*2:trunc(ih/2)*2',setsar=1/1
 @end example
+
+@item
+Scale a subtitle stream (sub) to match the main video (main) in size before
+overlaying. ("scale2ref")
+@example
+'[main]split[a][b]; [ref][a]scale=rw:rh[c]; [b][c]overlay'
+@end example
+
+@item
+Scale a logo to 1/10th the height of a video, while preserving its display
+aspect ratio.
+@example
+[logo-in][video-in]scale=w=oh*dar:h=rh/10[logo-out]
+@end example
 @end itemize
 
 @subsection Commands
@@ -21564,79 +21588,6 @@ Only available with @code{eval=frame}.
 Deprecated, do not use.
 @end table
 
-@section scale2ref
-
-Scale (resize) the input video, based on a reference video.
-
-See the scale filter for available options, scale2ref supports the same but
-uses the reference video instead of the main input as basis. scale2ref also
-supports the following additional constants for the @option{w} and
-@option{h} options:
-
-@table @var
-@item main_w
-@item main_h
-The main input video's width and height
-
-@item main_a
-The same as @var{main_w} / @var{main_h}
-
-@item main_sar
-The main input video's sample aspect ratio
-
-@item main_dar, mdar
-The main input video's display aspect ratio. Calculated from
-@code{(main_w / main_h) * main_sar}.
-
-@item main_hsub
-@item main_vsub
-The main input video's horizontal and vertical chroma subsample values.
-For example for the pixel format "yuv422p" @var{hsub} is 2 and @var{vsub}
-is 1.
-
-@item main_n
-The (sequential) number of the main input frame, starting from 0.
-Only available with @code{eval=frame}.
-
-@item main_t
-The presentation timestamp of the main input frame, expressed as a number of
-seconds. Only available with @code{eval=frame}.
-
-@item main_pos
-The position (byte offset) of the frame in the main input stream, or NaN if
-this information is unavailable and/or meaningless (for example in case of synthetic video).
-Only available with @code{eval=frame}.
-@end table
-
-@subsection Examples
-
-@itemize
-@item
-Scale a subtitle stream (b) to match the main video (a) in size before overlaying
-@example
-'scale2ref[b][a];[a][b]overlay'
-@end example
-
-@item
-Scale a logo to 1/10th the height of a video, while preserving its display aspect ratio.
-@example
-[logo-in][video-in]scale2ref=w=oh*mdar:h=ih/10[logo-out][video-out]
-@end example
-@end itemize
-
-@subsection Commands
-
-This filter supports the following commands:
-@table @option
-@item width, w
-@item height, h
-Set the output video dimension expression.
-The command accepts the same syntax of the corresponding option.
-
-If the specified expression is not valid, it is kept at its current
-value.
-@end table
-
 @section scale2ref_npp
 
 Use the NVIDIA Performance Primitives (libnpp) to scale (resize) the input
@@ -22702,9 +22653,27 @@ whether a pixel should be blurred or not. The option value must be an
 integer in the range [-30,30]. A value of 0 will filter all the image,
 a value included in [0,30] will filter flat areas and a value included
 in [-30,0] will filter edges. Default value is @option{luma_threshold}.
+
+@item alpha_radius, ar
+Set the alpha radius. The option value must be a float number in
+the range [0.1,5.0] that specifies the variance of the gaussian filter
+used to blur the image (slower if larger). Default value is @option{luma_radius}.
+
+@item alpha_strength, as
+Set the alpha strength. The option value must be a float number
+in the range [-1.0,1.0] that configures the blurring. A value included
+in [0.0,1.0] will blur the image whereas a value included in
+[-1.0,0.0] will sharpen the image. Default value is @option{luma_strength}.
+
+@item alpha_threshold, at
+Set the alpha threshold used as a coefficient to determine
+whether a pixel should be blurred or not. The option value must be an
+integer in the range [-30,30]. A value of 0 will filter all the image,
+a value included in [0,30] will filter flat areas and a value included
+in [-30,0] will filter edges. Default value is @option{luma_threshold}.
 @end table
 
-If a chroma option is not explicitly set, the corresponding luma value
+If a chroma or alpha option is not explicitly set, the corresponding luma value
 is set.
 
 @section sobel
@@ -27832,8 +27801,7 @@ The inputs have same memory layout for color channels, the overlay has additiona
 
 @section tonemap_vaapi
 
-Perform HDR(High Dynamic Range) to SDR(Standard Dynamic Range) conversion with tone-mapping.
-It maps the dynamic range of HDR10 content to the SDR content.
+Perform HDR-to-SDR or HDR-to-HDR tone-mapping.
 It currently only accepts HDR10 as input.
 
 It accepts the following parameters:
@@ -27842,28 +27810,42 @@ It accepts the following parameters:
 @item format
 Specify the output pixel format.
 
-Currently supported formats are:
-@table @var
-@item p010
-@item nv12
-@end table
-
-Default is nv12.
+Default is nv12 for HDR-to-SDR tone-mapping and p010 for HDR-to-HDR
+tone-mapping.
 
 @item primaries, p
 Set the output color primaries.
 
-Default is same as input.
+Default is bt709 for HDR-to-SDR tone-mapping and same as input for HDR-to-HDR
+tone-mapping.
 
 @item transfer, t
 Set the output transfer characteristics.
 
-Default is bt709.
+Default is bt709 for HDR-to-SDR tone-mapping and same as input for HDR-to-HDR
+tone-mapping.
 
 @item matrix, m
 Set the output colorspace matrix.
 
-Default is same as input.
+Default is bt709 for HDR-to-SDR tone-mapping and same as input for HDR-to-HDR
+tone-mapping.
+
+@item display
+Set the output mastering display colour volume. It is given by a '|'-separated
+list of two values, two values are space separated. It set display primaries
+x & y in G, B, R order, then white point x & y, the nominal minimum & maximum
+display luminances.
+
+HDR-to-HDR tone-mapping will be performed when this option is set.
+
+@item light
+Set the output content light level information. It accepts 2 space-separated
+values, the first input is the maximum light level and the second input is
+the maximum average light level.
+
+It is ignored for HDR-to-SDR tone-mapping, and optional for HDR-to-HDR
+tone-mapping.
 
 @end table
 
@@ -27875,6 +27857,11 @@ Convert HDR(HDR10) video to bt2020-transfer-characteristic p010 format
 @example
 tonemap_vaapi=format=p010:t=bt2020-10
 @end example
+@item
+Convert HDR video to HDR video
+@example
+tonemap_vaapi=display=7500\ 3000|34000\ 16000|13250\ 34500|15635\ 16450|500\ 10000000
+@end example
 @end itemize
 
 @section hstack_vaapi
@@ -27955,6 +27942,168 @@ first input stream. For the syntax of this option, check the
 See @ref{xstack}.
 @end table
 
+@section pad_vaapi
+
+Add paddings to the input image, and place the original input at the
+provided @var{x}, @var{y} coordinates.
+
+It accepts the following options:
+
+@table @option
+@item width, w
+@item height, h
+Specify an expression for the size of the output image with the
+paddings added. If the value for @var{width} or @var{height} is 0, the
+corresponding input size is used for the output.
+
+The @var{width} expression can reference the value set by the
+@var{height} expression, and vice versa.
+
+The default value of @var{width} and @var{height} is 0.
+
+@item x
+@item y
+Specify the offsets to place the input image at within the padded area,
+with respect to the top/left border of the output image.
+
+The @var{x} expression can reference the value set by the @var{y}
+expression, and vice versa.
+
+The default value of @var{x} and @var{y} is 0.
+
+If @var{x} or @var{y} evaluate to a negative number, they'll be changed
+so the input image is centered on the padded area.
+
+@item color
+Specify the color of the padded area. For the syntax of this option,
+check the @ref{color syntax,,"Color" section in the ffmpeg-utils
+manual,ffmpeg-utils}.
+
+@item aspect
+Pad to an aspect instead to a resolution.
+@end table
+
+The value for the @var{width}, @var{height}, @var{x}, and @var{y}
+options are expressions containing the following constants:
+
+@table @option
+@item in_w
+@item in_h
+The input video width and height.
+
+@item iw
+@item ih
+These are the same as @var{in_w} and @var{in_h}.
+
+@item out_w
+@item out_h
+The output width and height (the size of the padded area), as
+specified by the @var{width} and @var{height} expressions.
+
+@item ow
+@item oh
+These are the same as @var{out_w} and @var{out_h}.
+
+@item x
+@item y
+The x and y offsets as specified by the @var{x} and @var{y}
+expressions, or NAN if not yet specified.
+
+@item a
+same as @var{iw} / @var{ih}
+
+@item sar
+input sample aspect ratio
+
+@item dar
+input display aspect ratio, it is the same as (@var{iw} / @var{ih}) * @var{sar}
+@end table
+
+@section drawbox_vaapi
+
+Draw a colored box on the input image.
+
+It accepts the following parameters:
+
+@table @option
+@item x
+@item y
+The expressions which specify the top left corner coordinates of the box. It defaults to 0.
+
+@item width, w
+@item height, h
+The expressions which specify the width and height of the box; if 0 they are interpreted as
+the input width and height. It defaults to 0.
+
+@item color, c
+Specify the color of the box to write. For the general syntax of this option,
+check the @ref{color syntax,,"Color" section in the ffmpeg-utils manual,ffmpeg-utils}.
+
+@item thickness, t
+The expression which sets the thickness of the box edge.
+A value of @code{fill} will create a filled box. Default value is @code{3}.
+
+See below for the list of accepted constants.
+
+@item replace
+With value @code{1}, the pixels of the painted box will overwrite the video's color and alpha pixels.
+Default is @code{0}, which composites the box onto the input video.
+@end table
+
+The parameters for @var{x}, @var{y}, @var{w} and @var{h} and @var{t} are expressions containing the
+following constants:
+
+@table @option
+@item in_h, ih
+@item in_w, iw
+The input width and height.
+
+@item x
+@item y
+The x and y offset coordinates where the box is drawn.
+
+@item w
+@item h
+The width and height of the drawn box.
+
+@item t
+The thickness of the drawn box.
+
+@end table
+
+@subsection Examples
+
+@itemize
+@item
+Draw a black box around the edge of the input image:
+@example
+drawbox
+@end example
+
+@item
+Draw a box with color red and an opacity of 50%:
+@example
+drawbox=10:20:200:60:red@@0.5
+@end example
+
+The previous example can be specified as:
+@example
+drawbox=x=10:y=20:w=200:h=60:color=red@@0.5
+@end example
+
+@item
+Fill the box with pink color:
+@example
+drawbox=x=10:y=10:w=100:h=100:color=pink@@0.5:t=fill
+@end example
+
+@item
+Draw a 2-pixel red 2.40:1 mask:
+@example
+drawbox=x=-t:y=0.5*(ih-iw/2.4)-t:w=iw+t*2:h=iw/2.4+t*2:t=2:c=red
+@end example
+@end itemize
+
 @c man end VAAPI VIDEO FILTERS
 
 @chapter Vulkan Video Filters
@@ -30751,6 +30900,12 @@ missing.
 That basically means that an input frame is selected if its pts is within the
 interval set by the concat demuxer.
 
+@item iw @emph{(video only)}
+Represents the width of the input video frame.
+
+@item ih @emph{(video only)}
+Represents the height of the input video frame.
+
 @end table
 
 The default value of the select expression is "1".
diff --git a/doc/multithreading.txt b/doc/multithreading.txt
index 6c65ca9651..842d331e4f 100644
--- a/doc/multithreading.txt
+++ b/doc/multithreading.txt
@@ -36,9 +36,9 @@ Frame threading -
 * Codecs similar to ffv1, whose streams don't reset across frames,
   will not work because their bitstreams cannot be decoded in parallel.
 
-* The contents of buffers must not be read before ff_thread_await_progress()
+* The contents of buffers must not be read before ff_progress_frame_await()
   has been called on them. reget_buffer() and buffer age optimizations no longer work.
-* The contents of buffers must not be written to after ff_thread_report_progress()
+* The contents of buffers must not be written to after ff_progress_frame_report()
   has been called on them. This includes draw_edges().
 
 Porting codecs to frame threading
@@ -53,14 +53,13 @@ thread.
 Add AV_CODEC_CAP_FRAME_THREADS to the codec capabilities. There will be very little
 speed gain at this point but it should work.
 
-If there are inter-frame dependencies, so the codec calls
-ff_thread_report/await_progress(), set FF_CODEC_CAP_ALLOCATE_PROGRESS in
-FFCodec.caps_internal and use ff_thread_get_buffer() to allocate frames.
-Otherwise decode directly into the user-supplied frames.
+Use ff_thread_get_buffer() (or ff_progress_frame_get_buffer()
+in case you have inter-frame dependencies and use the ProgressFrame API)
+to allocate frame buffers.
 
-Call ff_thread_report_progress() after some part of the current picture has decoded.
+Call ff_progress_frame_report() after some part of the current picture has decoded.
 A good place to put this is where draw_horiz_band() is called - add this if it isn't
 called anywhere, as it's useful too and the implementation is trivial when you're
 doing this. Note that draw_edges() needs to be called before reporting progress.
 
-Before accessing a reference frame or its MVs, call ff_thread_await_progress().
+Before accessing a reference frame or its MVs, call ff_progress_frame_await().
diff --git a/doc/muxers.texi b/doc/muxers.texi
index d8a1f83309..6340c8e54d 100644
--- a/doc/muxers.texi
+++ b/doc/muxers.texi
@@ -2700,14 +2700,68 @@ computer-generated compositions.
 
 This muxer accepts a single audio stream containing PCM data.
 
-@section matroska
+@section ivf
+On2 IVF muxer.
+
+IVF was developed by On2 Technologies (formerly known as Duck
+Corporation), to store internally developed codecs.
+
+This muxer accepts a single @samp{vp8}, @samp{vp9}, or @samp{av1}
+video stream.
+
+@section jacosub
+JACOsub subtitle format muxer.
+
+This muxer accepts a single @samp{jacosub} subtitles stream.
+
+For more information about the format, see
+@url{http://unicorn.us.com/jacosub/jscripts.html}.
+
+@section kvag
+Simon & Schuster Interactive VAG muxer.
+
+This custom VAG container is used by some Simon & Schuster Interactive
+games such as "Real War", and "Real War: Rogue States".
+
+This muxer accepts a single @samp{adpcm_ima_ssi} audio stream.
+
+@section lc3
+Bluetooth SIG Low Complexity Communication Codec audio (LC3), or
+ETSI TS 103 634 Low Complexity Communication Codec plus (LC3plus).
+
+This muxer accepts a single @samp{lc3} audio stream.
+
+@section lrc
+LRC lyrics file format muxer.
+
+LRC (short for LyRiCs) is a computer file format that synchronizes
+song lyrics with an audio file, such as MP3, Vorbis, or MIDI.
+
+This muxer accepts a single @samp{subrip} or @samp{text} subtitles stream.
 
+@subsection Metadata
+The following metadata tags are converted to the format corresponding
+metadata:
+
+@table @option
+@item title
+@item album
+@item artist
+@item author
+@item creator
+@item encoder
+@item encoder_version
+@end table
+
+If @samp{encoder_version} is not explicitly set, it is automatically
+set to the libavformat version.
+
+@section matroska
 Matroska container muxer.
 
 This muxer implements the matroska and webm container specs.
 
 @subsection Metadata
-
 The recognized metadata settings in this muxer are:
 
 @table @option
@@ -2767,18 +2821,15 @@ ffmpeg -i sample_left_right_clip.mpg -an -c:v libvpx -metadata stereo_mode=left_
 @end example
 
 @subsection Options
-
-This muxer supports the following options:
-
 @table @option
-@item reserve_index_space
+@item reserve_index_space @var{size}
 By default, this muxer writes the index for seeking (called cues in Matroska
 terms) at the end of the file, because it cannot know in advance how much space
 to leave for the index at the beginning of the file. However for some use cases
 -- e.g.  streaming where seeking is possible but slow -- it is useful to put the
 index at the beginning of the file.
 
-If this option is set to a non-zero value, the muxer will reserve a given amount
+If this option is set to a non-zero value, the muxer will reserve @var{size} bytes
 of space in the file header and then try to write the cues there when the muxing
 finishes. If the reserved space does not suffice, no Cues will be written, the
 file will be finalized and writing the trailer will return an error.
@@ -2787,7 +2838,7 @@ A safe size for most use cases should be about 50kB per hour of video.
 Note that cues are only written if the output is seekable and this option will
 have no effect if it is not.
 
-@item cues_to_front
+@item cues_to_front @var{bool}
 If set, the muxer will write the index at the beginning of the file
 by shifting the main data if necessary. This can be combined with
 reserve_index_space in which case the data is only shifted if
@@ -2795,8 +2846,44 @@ the initially reserved space turns out to be insufficient.
 
 This option is ignored if the output is unseekable.
 
-@item default_mode
-This option controls how the FlagDefault of the output tracks will be set.
+@item cluster_size_limit @var{size}
+Store at most the provided amount of bytes in a cluster.
+
+If not specified, the limit is set automatically to a sensible
+hardcoded fixed value.
+
+@item cluster_time_limit @var{duration}
+Store at most the provided number of milliseconds in a cluster.
+
+If not specified, the limit is set automatically to a sensible
+hardcoded fixed value.
+
+@item dash @var{bool}
+Create a WebM file conforming to WebM DASH specification. By default
+it is set to @code{false}.
+
+@item dash_track_number @var{index}
+Track number for the DASH stream. By default it is set to @code{1}.
+
+@item live @var{bool}
+Write files assuming it is a live stream. By default it is set to
+@code{false}.
+
+@item allow_raw_vfw @var{bool}
+Allow raw VFW mode. By default it is set to @code{false}.
+
+@item flipped_raw_rgb @var{bool}
+If set to @code{true}, store positive height for raw RGB bitmaps, which indicates
+bitmap is stored bottom-up. Note that this option does not flip the bitmap
+which has to be done manually beforehand, e.g. by using the @samp{vflip} filter.
+Default is @code{false} and indicates bitmap is stored top down.
+
+@item write_crc32 @var{bool}
+Write a CRC32 element inside every Level 1 element. By default it is
+set to @code{true}. This option is ignored for WebM.
+
+@item default_mode @var{mode}
+Control how the FlagDefault of the output tracks will be set.
 It influences which tracks players should play by default. The default mode
 is @samp{passthrough}.
 @table @samp
@@ -2814,37 +2901,46 @@ disposition default exists, no subtitle track will be marked as default.
 In this mode the FlagDefault is set if and only if the AV_DISPOSITION_DEFAULT
 flag is set in the disposition of the corresponding stream.
 @end table
-
-@item flipped_raw_rgb
-If set to true, store positive height for raw RGB bitmaps, which indicates
-bitmap is stored bottom-up. Note that this option does not flip the bitmap
-which has to be done manually beforehand, e.g. by using the vflip filter.
-Default is @var{false} and indicates bitmap is stored top down.
-
 @end table
 
 @anchor{md5}
 @section md5
-
 MD5 testing format.
 
 This is a variant of the @ref{hash} muxer. Unlike that muxer, it
 defaults to using the MD5 hash function.
 
-@subsection Examples
+See also the @ref{hash} and @ref{framemd5} muxers.
 
+@subsection Examples
+@itemize
+@item
 To compute the MD5 hash of the input converted to raw
 audio and video, and store it in the file @file{out.md5}:
 @example
 ffmpeg -i INPUT -f md5 out.md5
 @end example
 
-You can print the MD5 to stdout with the command:
+@item
+To print the MD5 hash to stdout:
 @example
 ffmpeg -i INPUT -f md5 -
 @end example
+@end itemize
 
-See also the @ref{hash} and @ref{framemd5} muxers.
+@section microdvd
+MicroDVD subtitle format muxer.
+
+This muxer accepts a single @samp{microdvd} subtitles stream.
+
+@section mmf
+Synthetic music Mobile Application Format (SMAF) format muxer.
+
+SMAF is a music data format specified by Yamaha for portable
+electronic devices, such as mobile phones and personal digital
+assistants.
+
+This muxer accepts a single @samp{adpcm_yamaha} audio stream.
 
 @section mp3
 
diff --git a/doc/protocols.texi b/doc/protocols.texi
index f54600b846..ed70af4b33 100644
--- a/doc/protocols.texi
+++ b/doc/protocols.texi
@@ -442,9 +442,6 @@ value is -1.
 @item chunked_post
 If set to 1 use chunked Transfer-Encoding for posts, default is 1.
 
-@item content_type
-Set a specific content type for the POST messages or for listen mode.
-
 @item http_proxy
 set HTTP proxy to tunnel through e.g. http://example.com:1234
 
@@ -452,35 +449,21 @@ set HTTP proxy to tunnel through e.g. http://example.com:1234
 Set custom HTTP headers, can override built in default headers. The
 value must be a string encoding the headers.
 
-@item multiple_requests
-Use persistent connections if set to 1, default is 0.
-
-@item post_data
-Set custom HTTP post data.
-
-@item referer
-Set the Referer header. Include 'Referer: URL' header in HTTP request.
+@item content_type
+Set a specific content type for the POST messages or for listen mode.
 
 @item user_agent
 Override the User-Agent header. If not specified the protocol will use a
 string describing the libavformat build. ("Lavf/<version>")
 
-@item reconnect_at_eof
-If set then eof is treated like an error and causes reconnection, this is useful
-for live / endless streams.
-
-@item reconnect_streamed
-If set then even streamed/non seekable streams will be reconnected on errors.
-
-@item reconnect_on_network_error
-Reconnect automatically in case of TCP/TLS errors during connect.
+@item referer
+Set the Referer header. Include 'Referer: URL' header in HTTP request.
 
-@item reconnect_on_http_error
-A comma separated list of HTTP status codes to reconnect on. The list can
-include specific status codes (e.g. '503') or the strings '4xx' / '5xx'.
+@item multiple_requests
+Use persistent connections if set to 1, default is 0.
 
-@item reconnect_delay_max
-Sets the maximum delay in seconds after which to give up reconnecting
+@item post_data
+Set custom HTTP post data.
 
 @item mime_type
 Export the MIME type.
@@ -488,6 +471,11 @@ Export the MIME type.
 @item http_version
 Exports the HTTP response version number. Usually "1.0" or "1.1".
 
+@item cookies
+Set the cookies to be sent in future requests. The format of each cookie is the
+same as the value of a Set-Cookie HTTP response field. Multiple cookies can be
+delimited by a newline character.
+
 @item icy
 If set to 1 request ICY (SHOUTcast) metadata from the server. If the server
 supports this, the metadata has to be retrieved by the application by reading
@@ -504,10 +492,40 @@ contains the last non-empty metadata packet sent by the server. It should be
 polled in regular intervals by applications interested in mid-stream metadata
 updates.
 
-@item cookies
-Set the cookies to be sent in future requests. The format of each cookie is the
-same as the value of a Set-Cookie HTTP response field. Multiple cookies can be
-delimited by a newline character.
+@item metadata
+Set an exported dictionary containing Icecast metadata from the bitstream, if present.
+Only useful with the C API.
+
+@item auth_type
+
+Set HTTP authentication type. No option for Digest, since this method requires
+getting nonce parameters from the server first and can't be used straight away like
+Basic.
+
+@table @option
+@item none
+Choose the HTTP authentication type automatically. This is the default.
+@item basic
+
+Choose the HTTP basic authentication.
+
+Basic authentication sends a Base64-encoded string that contains a user name and password
+for the client. Base64 is not a form of encryption and should be considered the same as
+sending the user name and password in clear text (Base64 is a reversible encoding).
+If a resource needs to be protected, strongly consider using an authentication scheme
+other than basic authentication. HTTPS/TLS should be used with basic authentication.
+Without these additional security enhancements, basic authentication should not be used
+to protect sensitive or valuable information.
+@end table
+
+@item send_expect_100
+Send an Expect: 100-continue header for POST. If set to 1 it will send, if set
+to 0 it won't, if set to -1 it will try to send if it is applicable. Default
+value is -1.
+
+@item location
+An exported dictionary containing the content location. Only useful with the C
+API.
 
 @item offset
 Set initial byte offset.
@@ -525,6 +543,37 @@ be given a Bad Request response.
 When unset the HTTP method is not checked for now. This will be replaced by
 autodetection in the future.
 
+@item reconnect
+Reconnect automatically when disconnected before EOF is hit.
+
+@item reconnect_at_eof
+If set then eof is treated like an error and causes reconnection, this is useful
+for live / endless streams.
+
+@item reconnect_on_network_error
+Reconnect automatically in case of TCP/TLS errors during connect.
+
+@item reconnect_on_http_error
+A comma separated list of HTTP status codes to reconnect on. The list can
+include specific status codes (e.g. '503') or the strings '4xx' / '5xx'.
+
+@item reconnect_streamed
+If set then even streamed/non seekable streams will be reconnected on errors.
+
+@item reconnect_delay_max
+Set the maximum delay in seconds after which to give up reconnecting.
+
+@item reconnect_max_retries
+Set the maximum number of times to retry a connection. Default unset.
+
+@item reconnect_delay_total_max
+Set the maximum total delay in seconds after which to give up reconnecting.
+
+@item respect_retry_after
+If enabled, and a Retry-After header is encountered, its requested reconnection
+delay will be honored, rather than using exponential backoff. Useful for 429 and
+503 errors. Default enabled.
+
 @item listen
 If set to 1 enables experimental HTTP server. This can be used to send data when
 used as an output option, or read data from a client with HTTP POST when used as
@@ -551,32 +600,16 @@ ffmpeg -i somefile.ogg -chunked_post 0 -c copy -f ogg http://@var{server}:@var{p
 wget --post-file=somefile.ogg http://@var{server}:@var{port}
 @end example
 
-@item send_expect_100
-Send an Expect: 100-continue header for POST. If set to 1 it will send, if set
-to 0 it won't, if set to -1 it will try to send if it is applicable. Default
-value is -1.
-
-@item auth_type
-
-Set HTTP authentication type. No option for Digest, since this method requires
-getting nonce parameters from the server first and can't be used straight away like
-Basic.
-
-@table @option
-@item none
-Choose the HTTP authentication type automatically. This is the default.
-@item basic
+@item resource
+The resource requested by a client, when the experimental HTTP server is in use.
 
-Choose the HTTP basic authentication.
+@item reply_code
+The HTTP code returned to the client, when the experimental HTTP server is in use.
 
-Basic authentication sends a Base64-encoded string that contains a user name and password
-for the client. Base64 is not a form of encryption and should be considered the same as
-sending the user name and password in clear text (Base64 is a reversible encoding).
-If a resource needs to be protected, strongly consider using an authentication scheme
-other than basic authentication. HTTPS/TLS should be used with basic authentication.
-Without these additional security enhancements, basic authentication should not be used
-to protect sensitive or valuable information.
-@end table
+@item short_seek_size
+Set the threshold, in bytes, for when a readahead should be prefered over a seek and
+new HTTP request. This is useful, for example, to make sure the same connection
+is used for reading large video packets with small audio packets in between.
 
 @end table
 
diff --git a/doc/texidep.pl b/doc/texidep.pl
index 099690378e..33e6c7c53e 100644
--- a/doc/texidep.pl
+++ b/doc/texidep.pl
@@ -1,4 +1,4 @@
-#! /usr/bin/env perl
+#!/usr/bin/env perl
 
 # This script will print the dependency of a Texinfo file to stdout.
 # texidep.pl <src-path> <input.texi> <output.ext>
diff --git a/doc/utils.texi b/doc/utils.texi
index 76e704fc3c..9968725d2a 100644
--- a/doc/utils.texi
+++ b/doc/utils.texi
@@ -801,6 +801,11 @@ The following binary operators are available: @code{+}, @code{-},
 
 The following unary operators are available: @code{+}, @code{-}.
 
+Some internal variables can be used to store and load intermediary
+results. They can be accessed using the @code{ld} and @code{st}
+functions with an index argument varying from 0 to 9 to specify which
+internal variable to access.
+
 The following functions are available:
 @table @option
 @item abs(x)
@@ -898,9 +903,9 @@ Return 1.0 if @var{x} is +/-INFINITY, 0.0 otherwise.
 @item isnan(x)
 Return 1.0 if @var{x} is NAN, 0.0 otherwise.
 
-@item ld(var)
-Load the value of the internal variable with number
-@var{var}, which was previously stored with st(@var{var}, @var{expr}).
+@item ld(idx)
+Load the value of the internal variable with index @var{idx}, which was
+previously stored with st(@var{idx}, @var{expr}).
 The function returns the loaded value.
 
 @item lerp(x, y, z)
@@ -933,21 +938,31 @@ Compute the power of @var{x} elevated @var{y}, it is equivalent to
 
 @item print(t)
 @item print(t, l)
-Print the value of expression @var{t} with loglevel @var{l}. If
-@var{l} is not specified then a default log level is used.
-Returns the value of the expression printed.
-
-Prints t with loglevel l
+Print the value of expression @var{t} with loglevel @var{l}. If @var{l} is not
+specified then a default log level is used.
+Return the value of the expression printed.
 
 @item random(idx)
 Return a pseudo random value between 0.0 and 1.0. @var{idx} is the
-index of the internal variable which will be used to save the
-seed/state.
+index of the internal variable used to save the seed/state, which can be
+previously stored with @code{st(idx)}.
+
+To initialize the seed, you need to store the seed value as a 64-bit
+unsigned integer in the internal variable with index @var{idx}.
+
+For example, to store the seed with value @code{42} in the internal
+variable with index @code{0} and print a few random values:
+@example
+st(0,42); print(random(0)); print(random(0)); print(random(0))
+@end example
 
 @item randomi(idx, min, max)
 Return a pseudo random value in the interval between @var{min} and
-@var{max}. @var{idx} is the index of the internal variable which will
-be used to save the seed/state.
+@var{max}. @var{idx} is the index of the internal variable which will be used to
+save the seed/state, which can be previously stored with @code{st(idx)}.
+
+To initialize the seed, you need to store the seed value as a 64-bit
+unsigned integer in the internal variable with index @var{idx}.
 
 @item root(expr, max)
 Find an input value for which the function represented by @var{expr}
@@ -956,14 +971,14 @@ with argument @var{ld(0)} is 0 in the interval 0..@var{max}.
 The expression in @var{expr} must denote a continuous function or the
 result is undefined.
 
-@var{ld(0)} is used to represent the function input value, which means
-that the given expression will be evaluated multiple times with
-various input values that the expression can access through
-@code{ld(0)}. When the expression evaluates to 0 then the
-corresponding input value will be returned.
+@var{ld(0)} is used to represent the function input value, which means that the
+given expression will be evaluated multiple times with various input values that
+the expression can access through @code{ld(0)}. When the expression evaluates to
+0 then the corresponding input value will be returned.
 
 @item round(expr)
-Round the value of expression @var{expr} to the nearest integer. For example, "round(1.5)" is "2.0".
+Round the value of expression @var{expr} to the nearest integer. For example,
+"round(1.5)" is "2.0".
 
 @item sgn(x)
 Compute sign of @var{x}.
@@ -981,12 +996,15 @@ Compute the square root of @var{expr}. This is equivalent to
 @item squish(x)
 Compute expression @code{1/(1 + exp(4*x))}.
 
-@item st(var, expr)
+@item st(idx, expr)
 Store the value of the expression @var{expr} in an internal
-variable. @var{var} specifies the number of the variable where to
-store the value, and it is a value ranging from 0 to 9. The function
-returns the value stored in the internal variable.
-Note, Variables are currently not shared between expressions.
+variable. @var{idx} specifies the index of the variable where to store
+the value, and it is a value ranging from 0 to 9. The function returns
+the value stored in the internal variable.
+
+The stored value can be retrieved with @code{ld(var)}.
+
+Note: variables are currently not shared between expressions.
 
 @item tan(x)
 Compute tangent of @var{x}.
@@ -995,16 +1013,16 @@ Compute tangent of @var{x}.
 Compute hyperbolic tangent of @var{x}.
 
 @item taylor(expr, x)
-@item taylor(expr, x, id)
+@item taylor(expr, x, idx)
 Evaluate a Taylor series at @var{x}, given an expression representing
-the @code{ld(id)}-th derivative of a function at 0.
+the @code{ld(idx)}-th derivative of a function at 0.
 
 When the series does not converge the result is undefined.
 
-@var{ld(id)} is used to represent the derivative order in @var{expr},
+@var{ld(idx)} is used to represent the derivative order in @var{expr},
 which means that the given expression will be evaluated multiple times
 with various input values that the expression can access through
-@code{ld(id)}. If @var{id} is not specified then 0 is assumed.
+@code{ld(idx)}. If @var{idx} is not specified then 0 is assumed.
 
 Note, when you have the derivatives at y instead of 0,
 @code{taylor(expr, x-y)} can be used.
diff --git a/ffbuild/arch.mak b/ffbuild/arch.mak
index 23a3feb090..3fc40e5e5d 100644
--- a/ffbuild/arch.mak
+++ b/ffbuild/arch.mak
@@ -17,6 +17,7 @@ OBJS-$(HAVE_VSX)     += $(VSX-OBJS) $(VSX-OBJS-yes)
 
 OBJS-$(HAVE_RV)      += $(RV-OBJS)      $(RV-OBJS-yes)
 OBJS-$(HAVE_RVV)     += $(RVV-OBJS)     $(RVV-OBJS-yes)
+OBJS-$(HAVE_RV_ZVBB) += $(RVVB-OBJS)    $(RVVB-OBJS-yes)
 
 OBJS-$(HAVE_MMX)     += $(MMX-OBJS)     $(MMX-OBJS-yes)
 OBJS-$(HAVE_X86ASM)  += $(X86ASM-OBJS)  $(X86ASM-OBJS-yes)
diff --git a/ffbuild/libversion.sh b/ffbuild/libversion.sh
index a94ab58057..ecaa90cde6 100755
--- a/ffbuild/libversion.sh
+++ b/ffbuild/libversion.sh
@@ -1,3 +1,4 @@
+#!/bin/sh
 toupper(){
     echo "$@" | tr abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ
 }
diff --git a/fftools/cmdutils.c b/fftools/cmdutils.c
index 2120fc7935..a8f5c6d89b 100644
--- a/fftools/cmdutils.c
+++ b/fftools/cmdutils.c
@@ -311,7 +311,7 @@ static int write_option(void *optctx, const OptionDef *po, const char *opt,
 
         *(int *)dst = num;
     } else if (po->type == OPT_TYPE_INT64) {
-        ret = parse_number(opt, arg, OPT_TYPE_INT64, INT64_MIN, INT64_MAX, &num);
+        ret = parse_number(opt, arg, OPT_TYPE_INT64, INT64_MIN, (double)INT64_MAX, &num);
         if (ret < 0)
             goto finish;
 
diff --git a/fftools/ffmpeg.c b/fftools/ffmpeg.c
index 0ee76d69b5..c86fd5065e 100644
--- a/fftools/ffmpeg.c
+++ b/fftools/ffmpeg.c
@@ -484,8 +484,8 @@ void remove_avoptions(AVDictionary **a, AVDictionary *b)
 
 int check_avoptions(AVDictionary *m)
 {
-    const AVDictionaryEntry *t;
-    if ((t = av_dict_get(m, "", NULL, AV_DICT_IGNORE_SUFFIX))) {
+    const AVDictionaryEntry *t = av_dict_iterate(m, NULL);
+    if (t) {
         av_log(NULL, AV_LOG_FATAL, "Option %s not found.\n", t->key);
         return AVERROR_OPTION_NOT_FOUND;
     }
@@ -787,6 +787,11 @@ static int check_keyboard_interaction(int64_t cur_time)
             (n = sscanf(buf, "%63[^ ] %lf %255[^ ] %255[^\n]", target, &time, command, arg)) >= 3) {
             av_log(NULL, AV_LOG_DEBUG, "Processing command target:%s time:%f command:%s arg:%s",
                    target, time, command, arg);
+            for (OutputStream *ost = ost_iter(NULL); ost; ost = ost_iter(ost)) {
+                if (ost->fg_simple)
+                    fg_send_command(ost->fg_simple, time, target, command, arg,
+                                    key == 'C');
+            }
             for (i = 0; i < nb_filtergraphs; i++)
                 fg_send_command(filtergraphs[i], time, target, command, arg,
                                 key == 'C');
diff --git a/fftools/ffmpeg.h b/fftools/ffmpeg.h
index a575ee70d5..885a7c0c10 100644
--- a/fftools/ffmpeg.h
+++ b/fftools/ffmpeg.h
@@ -263,20 +263,67 @@ typedef struct InputFilterOptions {
     AVFrame            *fallback;
 } InputFilterOptions;
 
+enum OFilterFlags {
+    OFILTER_FLAG_DISABLE_CONVERT    = (1 << 0),
+    // produce 24-bit audio
+    OFILTER_FLAG_AUDIO_24BIT        = (1 << 1),
+    OFILTER_FLAG_AUTOSCALE          = (1 << 2),
+};
+
+typedef struct OutputFilterOptions {
+    // Caller-provided name for this output
+    char               *name;
+
+    // Codec used for encoding, may be NULL
+    const AVCodec      *enc;
+    // Overrides encoder pixel formats when set.
+    const enum AVPixelFormat *pix_fmts;
+
+    int64_t             trim_start_us;
+    int64_t             trim_duration_us;
+    int64_t             ts_offset;
+
+    /* Desired output timebase.
+     * Numerator can be one of EncTimeBase values, or 0 when no preference.
+     */
+    AVRational          output_tb;
+
+    AVDictionary       *sws_opts;
+    AVDictionary       *swr_opts;
+
+    const char         *nb_threads;
+
+    // A combination of OFilterFlags.
+    unsigned            flags;
+
+    int                 format;
+    int                 width;
+    int                 height;
+
+    enum VideoSyncMethod vsync_method;
+
+    int                 sample_rate;
+    AVChannelLayout     ch_layout;
+} OutputFilterOptions;
+
 typedef struct InputFilter {
     struct FilterGraph *graph;
     uint8_t            *name;
 } InputFilter;
 
 typedef struct OutputFilter {
-    struct OutputStream *ost;
+    const AVClass       *class;
+
     struct FilterGraph  *graph;
     uint8_t             *name;
 
     /* for filters that are not yet bound to an output stream,
      * this stores the output linklabel, if any */
+    int                  bound;
     uint8_t             *linklabel;
 
+    char                *apad;
+
     enum AVMediaType     type;
 
     atomic_uint_least64_t nb_frames_dup;
@@ -515,21 +562,16 @@ typedef struct OutputStream {
 
     AVStream *st;            /* stream in the output file */
 
-    AVRational enc_timebase;
-
     Encoder *enc;
     AVCodecContext *enc_ctx;
 
     /* video only */
     AVRational frame_rate;
     AVRational max_frame_rate;
-    enum VideoSyncMethod vsync_method;
-    int is_cfr;
     int force_fps;
 #if FFMPEG_OPT_TOP
     int top_field_first;
 #endif
-    int autoscale;
     int bitexact;
     int bits_per_raw_sample;
 
@@ -540,17 +582,14 @@ typedef struct OutputStream {
     char *logfile_prefix;
     FILE *logfile;
 
+    // simple filtergraph feeding this stream, if any
+    FilterGraph  *fg_simple;
     OutputFilter *filter;
 
     AVDictionary *encoder_opts;
-    AVDictionary *sws_dict;
-    AVDictionary *swr_opts;
-    char *apad;
 
     char *attachment_filename;
 
-    int keep_pix_fmt;
-
     /* stats */
     // number of packets send to the muxer
     atomic_uint_least64_t packets_written;
@@ -576,7 +615,6 @@ typedef struct OutputFile {
 
     int index;
 
-    const AVOutputFormat *format;
     const char           *url;
 
     OutputStream **streams;
@@ -585,7 +623,6 @@ typedef struct OutputFile {
     int64_t recording_time;  ///< desired length of the resulting file in microseconds == AV_TIME_BASE units
     int64_t start_time;      ///< start time in microseconds == AV_TIME_BASE units
 
-    int shortest;
     int bitexact;
 } OutputFile;
 
@@ -618,6 +655,7 @@ extern int        nb_input_files;
 extern OutputFile   **output_files;
 extern int         nb_output_files;
 
+// complex filtergraphs
 extern FilterGraph **filtergraphs;
 extern int        nb_filtergraphs;
 
@@ -684,8 +722,9 @@ int parse_and_set_vsync(const char *arg, int *vsync_var, int file_idx, int st_id
 int filtergraph_is_simple(const FilterGraph *fg);
 int init_simple_filtergraph(InputStream *ist, OutputStream *ost,
                             char *graph_desc,
-                            Scheduler *sch, unsigned sch_idx_enc);
-int fg_finalise_bindings(FilterGraph *fg);
+                            Scheduler *sch, unsigned sch_idx_enc,
+                            const OutputFilterOptions *opts);
+int fg_finalise_bindings(void);
 
 /**
  * Get our axiliary frame data attached to the frame, allocating it
@@ -699,7 +738,8 @@ FrameData       *packet_data  (AVPacket *pkt);
 const FrameData *packet_data_c(AVPacket *pkt);
 
 int ofilter_bind_ost(OutputFilter *ofilter, OutputStream *ost,
-                     unsigned sched_idx_enc);
+                     unsigned sched_idx_enc,
+                     const OutputFilterOptions *opts);
 
 /**
  * Create a new filtergraph in the global filtergraph list.
diff --git a/fftools/ffmpeg_enc.c b/fftools/ffmpeg_enc.c
index d1d1526830..618ba193ff 100644
--- a/fftools/ffmpeg_enc.c
+++ b/fftools/ffmpeg_enc.c
@@ -280,9 +280,6 @@ int enc_open(void *opaque, const AVFrame *frame)
         break;
         }
     case AVMEDIA_TYPE_SUBTITLE:
-        if (ost->enc_timebase.num)
-            av_log(ost, AV_LOG_WARNING,
-                   "-enc_time_base not supported for subtitles, ignoring\n");
         enc_ctx->time_base = AV_TIME_BASE_Q;
 
         if (!enc_ctx->width) {
diff --git a/fftools/ffmpeg_filter.c b/fftools/ffmpeg_filter.c
index 2308abf82a..12cca684b4 100644
--- a/fftools/ffmpeg_filter.c
+++ b/fftools/ffmpeg_filter.c
@@ -59,6 +59,8 @@ typedef struct FilterGraphPriv {
 
     const char      *graph_desc;
 
+    char            *nb_threads;
+
     // frame for temporarily holding output from the filtergraph
     AVFrame         *frame;
     // frame for sending output to the encoder
@@ -175,6 +177,8 @@ typedef struct FPSConvContext {
     int               last_dropped;
     int               dropped_keyframe;
 
+    enum VideoSyncMethod vsync_method;
+
     AVRational        framerate;
     AVRational        framerate_max;
     const AVRational *framerate_supported;
@@ -186,6 +190,11 @@ typedef struct OutputFilterPriv {
 
     int                     index;
 
+    void                   *log_parent;
+    char                    log_name[32];
+
+    char                   *name;
+
     AVFilterContext        *filter;
 
     /* desired output stream properties */
@@ -203,6 +212,9 @@ typedef struct OutputFilterPriv {
 
     AVRational              sample_aspect_ratio;
 
+    AVDictionary           *sws_opts;
+    AVDictionary           *swr_opts;
+
     // those are only set if no format is specified and the encoder gives us multiple options
     // They point directly to the relevant lists of the encoder.
     const int              *formats;
@@ -210,10 +222,14 @@ typedef struct OutputFilterPriv {
     const int              *sample_rates;
 
     AVRational              enc_timebase;
+    int64_t                 trim_start_us;
+    int64_t                 trim_duration_us;
     // offset for output timestamps, in AV_TIME_BASE_Q
     int64_t                 ts_offset;
     int64_t                 next_pts;
     FPSConvContext          fps;
+
+    unsigned                flags;
 } OutputFilterPriv;
 
 static OutputFilterPriv *ofp_from_ofilter(OutputFilter *ofilter)
@@ -355,11 +371,10 @@ static int choose_pix_fmts(OutputFilter *ofilter, AVBPrint *bprint,
                            const char **dst)
 {
     OutputFilterPriv *ofp = ofp_from_ofilter(ofilter);
-    OutputStream *ost = ofilter->ost;
 
     *dst = NULL;
 
-    if (ost->keep_pix_fmt || ofp->format != AV_PIX_FMT_NONE) {
+    if (ofp->flags & OFILTER_FLAG_DISABLE_CONVERT || ofp->format != AV_PIX_FMT_NONE) {
         *dst = ofp->format == AV_PIX_FMT_NONE ? NULL :
                av_get_pix_fmt_name(ofp->format);
     } else if (ofp->formats) {
@@ -624,7 +639,21 @@ static char *describe_filter_link(FilterGraph *fg, AVFilterInOut *inout, int in)
                        avfilter_pad_get_name(pads, inout->pad_idx));
 }
 
-static OutputFilter *ofilter_alloc(FilterGraph *fg)
+static const char *ofilter_item_name(void *obj)
+{
+    OutputFilterPriv *ofp = obj;
+    return ofp->log_name;
+}
+
+static const AVClass ofilter_class = {
+    .class_name                = "OutputFilter",
+    .version                   = LIBAVUTIL_VERSION_INT,
+    .item_name                 = ofilter_item_name,
+    .parent_log_context_offset = offsetof(OutputFilterPriv, log_parent),
+    .category                  = AV_CLASS_CATEGORY_FILTER,
+};
+
+static OutputFilter *ofilter_alloc(FilterGraph *fg, enum AVMediaType type)
 {
     OutputFilterPriv *ofp;
     OutputFilter *ofilter;
@@ -634,10 +663,16 @@ static OutputFilter *ofilter_alloc(FilterGraph *fg)
         return NULL;
 
     ofilter           = &ofp->ofilter;
+    ofilter->class    = &ofilter_class;
+    ofp->log_parent   = fg;
     ofilter->graph    = fg;
+    ofilter->type     = type;
     ofp->format       = -1;
     ofp->index        = fg->nb_outputs - 1;
 
+    snprintf(ofp->log_name, sizeof(ofp->log_name), "%co%d",
+             av_get_media_type_string(type)[0], ofp->index);
+
     return ofilter;
 }
 
@@ -722,133 +757,139 @@ static int ifilter_bind_dec(InputFilterPriv *ifp, Decoder *dec)
     return 0;
 }
 
-static int set_channel_layout(OutputFilterPriv *f, OutputStream *ost)
+static int set_channel_layout(OutputFilterPriv *f, const AVChannelLayout *layouts_allowed,
+                              const AVChannelLayout *layout_requested)
 {
-    const AVCodec *c = ost->enc_ctx->codec;
     int i, err;
 
-    if (ost->enc_ctx->ch_layout.order != AV_CHANNEL_ORDER_UNSPEC) {
+    if (layout_requested->order != AV_CHANNEL_ORDER_UNSPEC) {
         /* Pass the layout through for all orders but UNSPEC */
-        err = av_channel_layout_copy(&f->ch_layout, &ost->enc_ctx->ch_layout);
+        err = av_channel_layout_copy(&f->ch_layout, layout_requested);
         if (err < 0)
             return err;
         return 0;
     }
 
     /* Requested layout is of order UNSPEC */
-    if (!c->ch_layouts) {
+    if (!layouts_allowed) {
         /* Use the default native layout for the requested amount of channels when the
            encoder doesn't have a list of supported layouts */
-        av_channel_layout_default(&f->ch_layout, ost->enc_ctx->ch_layout.nb_channels);
+        av_channel_layout_default(&f->ch_layout, layout_requested->nb_channels);
         return 0;
     }
     /* Encoder has a list of supported layouts. Pick the first layout in it with the
        same amount of channels as the requested layout */
-    for (i = 0; c->ch_layouts[i].nb_channels; i++) {
-        if (c->ch_layouts[i].nb_channels == ost->enc_ctx->ch_layout.nb_channels)
+    for (i = 0; layouts_allowed[i].nb_channels; i++) {
+        if (layouts_allowed[i].nb_channels == layout_requested->nb_channels)
             break;
     }
-    if (c->ch_layouts[i].nb_channels) {
+    if (layouts_allowed[i].nb_channels) {
         /* Use it if one is found */
-        err = av_channel_layout_copy(&f->ch_layout, &c->ch_layouts[i]);
+        err = av_channel_layout_copy(&f->ch_layout, &layouts_allowed[i]);
         if (err < 0)
             return err;
         return 0;
     }
     /* If no layout for the amount of channels requested was found, use the default
        native layout for it. */
-    av_channel_layout_default(&f->ch_layout, ost->enc_ctx->ch_layout.nb_channels);
+    av_channel_layout_default(&f->ch_layout, layout_requested->nb_channels);
 
     return 0;
 }
 
 int ofilter_bind_ost(OutputFilter *ofilter, OutputStream *ost,
-                     unsigned sched_idx_enc)
+                     unsigned sched_idx_enc,
+                     const OutputFilterOptions *opts)
 {
-    const OutputFile  *of = ost->file;
     OutputFilterPriv *ofp = ofp_from_ofilter(ofilter);
     FilterGraph  *fg = ofilter->graph;
     FilterGraphPriv *fgp = fgp_from_fg(fg);
-    const AVCodec *c = ost->enc_ctx->codec;
     int ret;
 
-    av_assert0(!ofilter->ost);
+    av_assert0(!ofilter->bound);
+    av_assert0(ofilter->type == ost->type);
 
-    ofilter->ost = ost;
+    ofilter->bound = 1;
     av_freep(&ofilter->linklabel);
 
-    ofp->ts_offset     = of->start_time == AV_NOPTS_VALUE ? 0 : of->start_time;
-    ofp->enc_timebase = ost->enc_timebase;
+    ofp->flags        = opts->flags;
+    ofp->ts_offset    = opts->ts_offset;
+    ofp->enc_timebase = opts->output_tb;
 
-    switch (ost->enc_ctx->codec_type) {
-    case AVMEDIA_TYPE_VIDEO:
-        ofp->width      = ost->enc_ctx->width;
-        ofp->height     = ost->enc_ctx->height;
-        if (ost->enc_ctx->pix_fmt != AV_PIX_FMT_NONE) {
-            ofp->format = ost->enc_ctx->pix_fmt;
-        } else {
-            ofp->formats = c->pix_fmts;
-
-            // MJPEG encoder exports a full list of supported pixel formats,
-            // but the full-range ones are experimental-only.
-            // Restrict the auto-conversion list unless -strict experimental
-            // has been specified.
-            if (!strcmp(c->name, "mjpeg")) {
-                // FIXME: YUV420P etc. are actually supported with full color range,
-                // yet the latter information isn't available here.
-                static const enum AVPixelFormat mjpeg_formats[] =
-                    { AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ444P,
-                      AV_PIX_FMT_NONE };
-
-                const AVDictionaryEntry *strict = av_dict_get(ost->encoder_opts, "strict", NULL, 0);
-                int strict_val = ost->enc_ctx->strict_std_compliance;
-
-                if (strict) {
-                    const AVOption *o = av_opt_find(ost->enc_ctx, strict->key, NULL, 0, 0);
-                    av_assert0(o);
-                    av_opt_eval_int(ost->enc_ctx, o, strict->value, &strict_val);
-                }
+    ofp->trim_start_us    = opts->trim_start_us;
+    ofp->trim_duration_us = opts->trim_duration_us;
 
-                if (strict_val > FF_COMPLIANCE_UNOFFICIAL)
-                    ofp->formats = mjpeg_formats;
-            }
-        }
+    ofp->name         = av_strdup(opts->name);
+    if (!ofp->name)
+        return AVERROR(EINVAL);
+
+    ret = av_dict_copy(&ofp->sws_opts, opts->sws_opts, 0);
+    if (ret < 0)
+        return ret;
 
-        fgp->disable_conversions |= ost->keep_pix_fmt;
+    ret = av_dict_copy(&ofp->swr_opts, opts->swr_opts, 0);
+    if (ret < 0)
+        return ret;
+
+    if (opts->flags & OFILTER_FLAG_AUDIO_24BIT)
+        av_dict_set(&ofp->swr_opts, "output_sample_bits", "24", 0);
+
+    if (fgp->is_simple) {
+        // for simple filtergraph there is just one output,
+        // so use only graph-level information for logging
+        ofp->log_parent = NULL;
+        av_strlcpy(ofp->log_name, fgp->log_name, sizeof(ofp->log_name));
+    } else
+        av_strlcatf(ofp->log_name, sizeof(ofp->log_name), "->%s", ofp->name);
+
+    switch (ofilter->type) {
+    case AVMEDIA_TYPE_VIDEO:
+        ofp->width      = opts->width;
+        ofp->height     = opts->height;
+        if (opts->format != AV_PIX_FMT_NONE) {
+            ofp->format = opts->format;
+        } else if (opts->pix_fmts)
+            ofp->formats = opts->pix_fmts;
+        else if (opts->enc)
+            ofp->formats = opts->enc->pix_fmts;
+
+        fgp->disable_conversions |= !!(ofp->flags & OFILTER_FLAG_DISABLE_CONVERT);
 
         ofp->fps.last_frame = av_frame_alloc();
         if (!ofp->fps.last_frame)
             return AVERROR(ENOMEM);
 
+        ofp->fps.vsync_method        = opts->vsync_method;
         ofp->fps.framerate           = ost->frame_rate;
         ofp->fps.framerate_max       = ost->max_frame_rate;
-        ofp->fps.framerate_supported = ost->force_fps ?
-                                       NULL : c->supported_framerates;
+        ofp->fps.framerate_supported = ost->force_fps || !opts->enc ?
+                                       NULL : opts->enc->supported_framerates;
 
         // reduce frame rate for mpeg4 to be within the spec limits
-        if (c->id == AV_CODEC_ID_MPEG4)
+        if (opts->enc && opts->enc->id == AV_CODEC_ID_MPEG4)
             ofp->fps.framerate_clip = 65535;
 
         ofp->fps.dup_warning         = 1000;
 
         break;
     case AVMEDIA_TYPE_AUDIO:
-        if (ost->enc_ctx->sample_fmt != AV_SAMPLE_FMT_NONE) {
-            ofp->format = ost->enc_ctx->sample_fmt;
-        } else {
-            ofp->formats = c->sample_fmts;
-        }
-        if (ost->enc_ctx->sample_rate) {
-            ofp->sample_rate = ost->enc_ctx->sample_rate;
-        } else {
-            ofp->sample_rates = c->supported_samplerates;
-        }
-        if (ost->enc_ctx->ch_layout.nb_channels) {
-            int ret = set_channel_layout(ofp, ost);
+        if (opts->format != AV_SAMPLE_FMT_NONE) {
+            ofp->format = opts->format;
+        } else if (opts->enc) {
+            ofp->formats = opts->enc->sample_fmts;
+        }
+        if (opts->sample_rate) {
+            ofp->sample_rate = opts->sample_rate;
+        } else if (opts->enc) {
+            ofp->sample_rates = opts->enc->supported_samplerates;
+        }
+        if (opts->ch_layout.nb_channels) {
+            int ret = set_channel_layout(ofp, opts->enc ? opts->enc->ch_layouts : NULL,
+                                         &opts->ch_layout);
             if (ret < 0)
                 return ret;
-        } else if (c->ch_layouts) {
-            ofp->ch_layouts = c->ch_layouts;
+        } else if (opts->enc) {
+            ofp->ch_layouts = opts->enc->ch_layouts;
         }
         break;
     }
@@ -861,6 +902,63 @@ int ofilter_bind_ost(OutputFilter *ofilter, OutputStream *ost,
     return 0;
 }
 
+static int ofilter_bind_ifilter(OutputFilter *ofilter, InputFilterPriv *ifp,
+                                const OutputFilterOptions *opts)
+{
+    OutputFilterPriv *ofp = ofp_from_ofilter(ofilter);
+
+    av_assert0(!ofilter->bound);
+    av_assert0(ofilter->type == ifp->type);
+
+    ofilter->bound = 1;
+    av_freep(&ofilter->linklabel);
+
+    ofp->name = av_strdup(opts->name);
+    if (!ofp->name)
+        return AVERROR(EINVAL);
+
+    av_strlcatf(ofp->log_name, sizeof(ofp->log_name), "->%s", ofp->name);
+
+    return 0;
+}
+
+static int ifilter_bind_fg(InputFilterPriv *ifp, FilterGraph *fg_src, int out_idx)
+{
+    FilterGraphPriv      *fgp = fgp_from_fg(ifp->ifilter.graph);
+    OutputFilter *ofilter_src = fg_src->outputs[out_idx];
+    OutputFilterOptions opts;
+    char name[32];
+    int ret;
+
+    av_assert0(!ifp->bound);
+    ifp->bound = 1;
+
+    if (ifp->type != ofilter_src->type) {
+        av_log(fgp, AV_LOG_ERROR, "Tried to connect %s output to %s input\n",
+               av_get_media_type_string(ofilter_src->type),
+               av_get_media_type_string(ifp->type));
+        return AVERROR(EINVAL);
+    }
+
+    ifp->type_src = ifp->type;
+
+    memset(&opts, 0, sizeof(opts));
+
+    snprintf(name, sizeof(name), "fg:%d:%d", fgp->fg.index, ifp->index);
+    opts.name = name;
+
+    ret = ofilter_bind_ifilter(ofilter_src, ifp, &opts);
+    if (ret < 0)
+        return ret;
+
+    ret = sch_connect(fgp->sch, SCH_FILTER_OUT(fg_src->index, out_idx),
+                                SCH_FILTER_IN(fgp->sch_idx, ifp->index));
+    if (ret < 0)
+        return ret;
+
+    return 0;
+}
+
 static InputFilter *ifilter_alloc(FilterGraph *fg)
 {
     InputFilterPriv *ifp;
@@ -925,14 +1023,19 @@ void fg_free(FilterGraph **pfg)
         OutputFilterPriv *ofp = ofp_from_ofilter(ofilter);
 
         av_frame_free(&ofp->fps.last_frame);
+        av_dict_free(&ofp->sws_opts);
+        av_dict_free(&ofp->swr_opts);
 
         av_freep(&ofilter->linklabel);
         av_freep(&ofilter->name);
+        av_freep(&ofilter->apad);
+        av_freep(&ofp->name);
         av_channel_layout_uninit(&ofp->ch_layout);
         av_freep(&fg->outputs[j]);
     }
     av_freep(&fg->outputs);
     av_freep(&fgp->graph_desc);
+    av_freep(&fgp->nb_threads);
 
     av_frame_free(&fgp->frame);
     av_frame_free(&fgp->frame_enc);
@@ -963,16 +1066,25 @@ int fg_create(FilterGraph **pfg, char *graph_desc, Scheduler *sch)
     AVFilterGraph *graph;
     int ret = 0;
 
-    fgp = allocate_array_elem(&filtergraphs, sizeof(*fgp), &nb_filtergraphs);
+    fgp = av_mallocz(sizeof(*fgp));
     if (!fgp)
         return AVERROR(ENOMEM);
     fg = &fgp->fg;
 
-    if (pfg)
+    if (pfg) {
         *pfg = fg;
+        fg->index = -1;
+    } else {
+        ret = av_dynarray_add_nofree(&filtergraphs, &nb_filtergraphs, fgp);
+        if (ret < 0) {
+            av_freep(&fgp);
+            return ret;
+        }
+
+        fg->index = nb_filtergraphs - 1;
+    }
 
     fg->class       = &fg_class;
-    fg->index      = nb_filtergraphs - 1;
     fgp->graph_desc = graph_desc;
     fgp->disable_conversions = !auto_conversion_filters;
     fgp->sch                 = sch;
@@ -1035,7 +1147,9 @@ int fg_create(FilterGraph **pfg, char *graph_desc, Scheduler *sch)
     }
 
     for (AVFilterInOut *cur = outputs; cur; cur = cur->next) {
-        OutputFilter *const ofilter = ofilter_alloc(fg);
+        const enum AVMediaType type = avfilter_pad_get_type(cur->filter_ctx->output_pads,
+                                                            cur->pad_idx);
+        OutputFilter *const ofilter = ofilter_alloc(fg, type);
 
         if (!ofilter) {
             ret = AVERROR(ENOMEM);
@@ -1045,8 +1159,6 @@ int fg_create(FilterGraph **pfg, char *graph_desc, Scheduler *sch)
         ofilter->linklabel = cur->name;
         cur->name          = NULL;
 
-        ofilter->type      = avfilter_pad_get_type(cur->filter_ctx->output_pads,
-                                                   cur->pad_idx);
         ofilter->name      = describe_filter_link(fg, cur, 0);
         if (!ofilter->name) {
             ret = AVERROR(ENOMEM);
@@ -1079,22 +1191,23 @@ fail:
 
 int init_simple_filtergraph(InputStream *ist, OutputStream *ost,
                             char *graph_desc,
-                            Scheduler *sch, unsigned sched_idx_enc)
+                            Scheduler *sch, unsigned sched_idx_enc,
+                            const OutputFilterOptions *opts)
 {
     FilterGraph *fg;
     FilterGraphPriv *fgp;
     int ret;
 
-    ret = fg_create(&fg, graph_desc, sch);
+    ret = fg_create(&ost->fg_simple, graph_desc, sch);
     if (ret < 0)
         return ret;
+    fg  = ost->fg_simple;
     fgp = fgp_from_fg(fg);
 
     fgp->is_simple = 1;
 
-    snprintf(fgp->log_name, sizeof(fgp->log_name), "%cf#%d:%d",
-             av_get_media_type_string(ost->type)[0],
-             ost->file->index, ost->index);
+    snprintf(fgp->log_name, sizeof(fgp->log_name), "%cf%s",
+             av_get_media_type_string(ost->type)[0], opts->name);
 
     if (fg->nb_inputs != 1 || fg->nb_outputs != 1) {
         av_log(fg, AV_LOG_ERROR, "Simple filtergraph '%s' was expected "
@@ -1104,6 +1217,13 @@ int init_simple_filtergraph(InputStream *ist, OutputStream *ost,
                graph_desc, fg->nb_inputs, fg->nb_outputs);
         return AVERROR(EINVAL);
     }
+    if (fg->outputs[0]->type != ost->type) {
+        av_log(fg, AV_LOG_ERROR, "Filtergraph has a %s output, cannot connect "
+               "it to %s output stream\n",
+               av_get_media_type_string(fg->outputs[0]->type),
+               av_get_media_type_string(ost->type));
+        return AVERROR(EINVAL);
+    }
 
     ost->filter = fg->outputs[0];
 
@@ -1111,10 +1231,17 @@ int init_simple_filtergraph(InputStream *ist, OutputStream *ost,
     if (ret < 0)
         return ret;
 
-    ret = ofilter_bind_ost(fg->outputs[0], ost, sched_idx_enc);
+    ret = ofilter_bind_ost(fg->outputs[0], ost, sched_idx_enc, opts);
     if (ret < 0)
         return ret;
 
+    if (opts->nb_threads) {
+        av_freep(&fgp->nb_threads);
+        fgp->nb_threads = av_strdup(opts->nb_threads);
+        if (!fgp->nb_threads)
+            return AVERROR(ENOMEM);
+    }
+
     return 0;
 }
 
@@ -1143,12 +1270,38 @@ static int fg_complex_bind_input(FilterGraph *fg, InputFilter *ifilter)
                    ifilter->name);
         return ret;
     } else if (ifp->linklabel) {
-        // bind to an explicitly specified demuxer stream
         AVFormatContext *s;
         AVStream       *st = NULL;
         char *p;
-        int file_idx = strtol(ifp->linklabel, &p, 0);
+        int file_idx;
 
+        // try finding an unbound filtergraph output with this label
+        for (int i = 0; i < nb_filtergraphs; i++) {
+            FilterGraph *fg_src = filtergraphs[i];
+
+            if (fg == fg_src)
+                continue;
+
+            for (int j = 0; j < fg_src->nb_outputs; j++) {
+                OutputFilter *ofilter = fg_src->outputs[j];
+
+                if (!ofilter->bound && ofilter->linklabel &&
+                    !strcmp(ofilter->linklabel, ifp->linklabel)) {
+                    av_log(fg, AV_LOG_VERBOSE,
+                           "Binding input with label '%s' to filtergraph output %d:%d\n",
+                           ifp->linklabel, i, j);
+
+                    ret = ifilter_bind_fg(ifp, fg_src, j);
+                    if (ret < 0)
+                        av_log(fg, AV_LOG_ERROR, "Error binding filtergraph input %s\n",
+                               ifp->linklabel);
+                    return ret;
+                }
+            }
+        }
+
+        // bind to an explicitly specified demuxer stream
+        file_idx = strtol(ifp->linklabel, &p, 0);
         if (file_idx < 0 || file_idx >= nb_input_files) {
             av_log(fg, AV_LOG_FATAL, "Invalid file index %d in filtergraph description %s.\n",
                    file_idx, fgp->graph_desc);
@@ -1202,9 +1355,9 @@ static int fg_complex_bind_input(FilterGraph *fg, InputFilter *ifilter)
     return 0;
 }
 
-int fg_finalise_bindings(FilterGraph *fg)
+static int bind_inputs(FilterGraph *fg)
 {
-    // bind filtergraph inputs to input streams
+    // bind filtergraph inputs to input streams or other filtergraphs
     for (int i = 0; i < fg->nb_inputs; i++) {
         InputFilterPriv *ifp = ifp_from_ifilter(fg->inputs[i]);
         int ret;
@@ -1217,14 +1370,33 @@ int fg_finalise_bindings(FilterGraph *fg)
             return ret;
     }
 
-    for (int i = 0; i < fg->nb_outputs; i++) {
-        OutputFilter *output = fg->outputs[i];
-        if (!output->ost) {
-            av_log(filtergraphs[i], AV_LOG_FATAL,
-                   "Filter %s has an unconnected output\n", output->name);
-            return AVERROR(EINVAL);
+    return 0;
+}
+
+int fg_finalise_bindings(void)
+{
+    int ret;
+
+    for (int i = 0; i < nb_filtergraphs; i++) {
+        ret = bind_inputs(filtergraphs[i]);
+        if (ret < 0)
+            return ret;
+    }
+
+    // check that all outputs were bound
+    for (int i = 0; i < nb_filtergraphs; i++) {
+        FilterGraph *fg = filtergraphs[i];
+
+        for (int j = 0; j < fg->nb_outputs; j++) {
+            OutputFilter *output = fg->outputs[j];
+            if (!output->bound) {
+                av_log(filtergraphs[j], AV_LOG_FATAL,
+                       "Filter %s has an unconnected output\n", output->name);
+                return AVERROR(EINVAL);
+            }
         }
     }
+
     return 0;
 }
 
@@ -1305,8 +1477,6 @@ static int configure_output_video_filter(FilterGraph *fg, AVFilterGraph *graph,
                                          OutputFilter *ofilter, AVFilterInOut *out)
 {
     OutputFilterPriv *ofp = ofp_from_ofilter(ofilter);
-    OutputStream *ost = ofilter->ost;
-    OutputFile    *of = ost->file;
     AVFilterContext *last_filter = out->filter_ctx;
     AVBPrint bprint;
     int pad_idx = out->pad_idx;
@@ -1314,7 +1484,7 @@ static int configure_output_video_filter(FilterGraph *fg, AVFilterGraph *graph,
     const char *pix_fmts;
     char name[255];
 
-    snprintf(name, sizeof(name), "out_%d_%d", ost->file->index, ost->index);
+    snprintf(name, sizeof(name), "out_%s", ofp->name);
     ret = avfilter_graph_create_filter(&ofp->filter,
                                        avfilter_get_by_name("buffersink"),
                                        name, NULL, NULL, graph);
@@ -1322,7 +1492,7 @@ static int configure_output_video_filter(FilterGraph *fg, AVFilterGraph *graph,
     if (ret < 0)
         return ret;
 
-    if ((ofp->width || ofp->height) && ofilter->ost->autoscale) {
+    if ((ofp->width || ofp->height) && (ofp->flags & OFILTER_FLAG_AUTOSCALE)) {
         char args[255];
         AVFilterContext *filter;
         const AVDictionaryEntry *e = NULL;
@@ -1330,12 +1500,11 @@ static int configure_output_video_filter(FilterGraph *fg, AVFilterGraph *graph,
         snprintf(args, sizeof(args), "%d:%d",
                  ofp->width, ofp->height);
 
-        while ((e = av_dict_iterate(ost->sws_dict, e))) {
+        while ((e = av_dict_iterate(ofp->sws_opts, e))) {
             av_strlcatf(args, sizeof(args), ":%s=%s", e->key, e->value);
         }
 
-        snprintf(name, sizeof(name), "scaler_out_%d_%d",
-                 ost->file->index, ost->index);
+        snprintf(name, sizeof(name), "scaler_out_%s", ofp->name);
         if ((ret = avfilter_graph_create_filter(&filter, avfilter_get_by_name("scale"),
                                                 name, args, NULL, graph)) < 0)
             return ret;
@@ -1367,9 +1536,8 @@ static int configure_output_video_filter(FilterGraph *fg, AVFilterGraph *graph,
         pad_idx     = 0;
     }
 
-    snprintf(name, sizeof(name), "trim_out_%d_%d",
-             ost->file->index, ost->index);
-    ret = insert_trim(of->start_time, of->recording_time,
+    snprintf(name, sizeof(name), "trim_out_%s", ofp->name);
+    ret = insert_trim(ofp->trim_start_us, ofp->trim_duration_us,
                       &last_filter, &pad_idx, name);
     if (ret < 0)
         return ret;
@@ -1385,15 +1553,13 @@ static int configure_output_audio_filter(FilterGraph *fg, AVFilterGraph *graph,
                                          OutputFilter *ofilter, AVFilterInOut *out)
 {
     OutputFilterPriv *ofp = ofp_from_ofilter(ofilter);
-    OutputStream *ost = ofilter->ost;
-    OutputFile    *of = ost->file;
     AVFilterContext *last_filter = out->filter_ctx;
     int pad_idx = out->pad_idx;
     AVBPrint args;
     char name[255];
     int ret;
 
-    snprintf(name, sizeof(name), "out_%d_%d", ost->file->index, ost->index);
+    snprintf(name, sizeof(name), "out_%s", ofp->name);
     ret = avfilter_graph_create_filter(&ofp->filter,
                                        avfilter_get_by_name("abuffersink"),
                                        name, NULL, NULL, graph);
@@ -1405,7 +1571,7 @@ static int configure_output_audio_filter(FilterGraph *fg, AVFilterGraph *graph,
 #define AUTO_INSERT_FILTER(opt_name, filter_name, arg) do {                 \
     AVFilterContext *filt_ctx;                                              \
                                                                             \
-    av_log(fg, AV_LOG_INFO, opt_name " is forwarded to lavfi "              \
+    av_log(ofilter, AV_LOG_INFO, opt_name " is forwarded to lavfi "         \
            "similarly to -af " filter_name "=%s.\n", arg);                  \
                                                                             \
     ret = avfilter_graph_create_filter(&filt_ctx,                           \
@@ -1433,8 +1599,7 @@ static int configure_output_audio_filter(FilterGraph *fg, AVFilterGraph *graph,
     if (args.len) {
         AVFilterContext *format;
 
-        snprintf(name, sizeof(name), "format_out_%d_%d",
-                 ost->file->index, ost->index);
+        snprintf(name, sizeof(name), "format_out_%s", ofp->name);
         ret = avfilter_graph_create_filter(&format,
                                            avfilter_get_by_name("aformat"),
                                            name, args.str, NULL, graph);
@@ -1449,21 +1614,11 @@ static int configure_output_audio_filter(FilterGraph *fg, AVFilterGraph *graph,
         pad_idx = 0;
     }
 
-    if (ost->apad && of->shortest) {
-        int i;
-
-        for (i = 0; i < of->nb_streams; i++)
-            if (of->streams[i]->st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO)
-                break;
-
-        if (i < of->nb_streams) {
-            AUTO_INSERT_FILTER("-apad", "apad", ost->apad);
-        }
-    }
+    if (ofilter->apad)
+        AUTO_INSERT_FILTER("-apad", "apad", ofilter->apad);
 
-    snprintf(name, sizeof(name), "trim for output stream %d:%d",
-             ost->file->index, ost->index);
-    ret = insert_trim(of->start_time, of->recording_time,
+    snprintf(name, sizeof(name), "trim for output %s", ofp->name);
+    ret = insert_trim(ofp->trim_start_us, ofp->trim_duration_us,
                       &last_filter, &pad_idx, name);
     if (ret < 0)
         goto fail;
@@ -1479,12 +1634,7 @@ fail:
 static int configure_output_filter(FilterGraph *fg, AVFilterGraph *graph,
                                    OutputFilter *ofilter, AVFilterInOut *out)
 {
-    if (!ofilter->ost) {
-        av_log(fg, AV_LOG_FATAL, "Filter %s has an unconnected output\n", ofilter->name);
-        return AVERROR(EINVAL);
-    }
-
-    switch (avfilter_pad_get_type(out->filter_ctx->output_pads, out->pad_idx)) {
+    switch (ofilter->type) {
     case AVMEDIA_TYPE_VIDEO: return configure_output_video_filter(fg, graph, ofilter, out);
     case AVMEDIA_TYPE_AUDIO: return configure_output_audio_filter(fg, graph, ofilter, out);
     default: av_assert0(0); return 0;
@@ -1706,30 +1856,29 @@ static int configure_filtergraph(FilterGraph *fg, FilterGraphThread *fgt)
         return AVERROR(ENOMEM);
 
     if (simple) {
-        OutputStream *ost = fg->outputs[0]->ost;
+        OutputFilterPriv *ofp = ofp_from_ofilter(fg->outputs[0]);
 
         if (filter_nbthreads) {
             ret = av_opt_set(fgt->graph, "threads", filter_nbthreads, 0);
             if (ret < 0)
                 goto fail;
-        } else {
-            const AVDictionaryEntry *e = NULL;
-            e = av_dict_get(ost->encoder_opts, "threads", NULL, 0);
-            if (e)
-                av_opt_set(fgt->graph, "threads", e->value, 0);
+        } else if (fgp->nb_threads) {
+            ret = av_opt_set(fgt->graph, "threads", fgp->nb_threads, 0);
+            if (ret < 0)
+                return ret;
         }
 
-        if (av_dict_count(ost->sws_dict)) {
-            ret = av_dict_get_string(ost->sws_dict,
+        if (av_dict_count(ofp->sws_opts)) {
+            ret = av_dict_get_string(ofp->sws_opts,
                                      &fgt->graph->scale_sws_opts,
                                      '=', ':');
             if (ret < 0)
                 goto fail;
         }
 
-        if (av_dict_count(ost->swr_opts)) {
+        if (av_dict_count(ofp->swr_opts)) {
             char *args;
-            ret = av_dict_get_string(ost->swr_opts, &args, '=', ':');
+            ret = av_dict_get_string(ofp->swr_opts, &args, '=', ':');
             if (ret < 0)
                 goto fail;
             av_opt_set(fgt->graph, "aresample_swr_opts", args, 0);
@@ -1941,7 +2090,7 @@ static int choose_out_timebase(OutputFilterPriv *ofp, AVFrame *frame)
     // apply -enc_time_base
     if (ofp->enc_timebase.num == ENC_TIME_BASE_DEMUX &&
         (fd->dec.tb.num <= 0 || fd->dec.tb.den <= 0)) {
-        av_log(ofilter->ost, AV_LOG_ERROR,
+        av_log(ofp, AV_LOG_ERROR,
                "Demuxing timebase not available - cannot use it for encoding\n");
         return AVERROR(EINVAL);
     }
@@ -1965,10 +2114,10 @@ static int choose_out_timebase(OutputFilterPriv *ofp, AVFrame *frame)
             fr = fr_sink;
     }
 
-    if (ofilter->ost->is_cfr) {
+    if (fps->vsync_method == VSYNC_CFR || fps->vsync_method == VSYNC_VSCFR) {
         if (!fr.num && !fps->framerate_max.num) {
             fr = (AVRational){25, 1};
-            av_log(ofilter->ost, AV_LOG_WARNING,
+            av_log(ofp, AV_LOG_WARNING,
                    "No information "
                    "about the input framerate is available. Falling "
                    "back to a default value of 25fps. Use the -r option "
@@ -2051,7 +2200,6 @@ static void video_sync_process(OutputFilterPriv *ofp, AVFrame *frame,
                                int64_t *nb_frames, int64_t *nb_frames_prev)
 {
     OutputFilter   *ofilter = &ofp->ofilter;
-    OutputStream       *ost = ofilter->ost;
     FPSConvContext     *fps = &ofp->fps;
     double delta0, delta, sync_ipts, duration;
 
@@ -2084,24 +2232,24 @@ static void video_sync_process(OutputFilterPriv *ofp, AVFrame *frame,
 
     if (delta0 < 0 &&
         delta > 0 &&
-        ost->vsync_method != VSYNC_PASSTHROUGH
+        fps->vsync_method != VSYNC_PASSTHROUGH
 #if FFMPEG_OPT_VSYNC_DROP
-        && ost->vsync_method != VSYNC_DROP
+        && fps->vsync_method != VSYNC_DROP
 #endif
         ) {
         if (delta0 < -0.6) {
-            av_log(ost, AV_LOG_VERBOSE, "Past duration %f too large\n", -delta0);
+            av_log(ofp, AV_LOG_VERBOSE, "Past duration %f too large\n", -delta0);
         } else
-            av_log(ost, AV_LOG_DEBUG, "Clipping frame in rate conversion by %f\n", -delta0);
+            av_log(ofp, AV_LOG_DEBUG, "Clipping frame in rate conversion by %f\n", -delta0);
         sync_ipts = ofp->next_pts;
         duration += delta0;
         delta0 = 0;
     }
 
-    switch (ost->vsync_method) {
+    switch (fps->vsync_method) {
     case VSYNC_VSCFR:
         if (fps->frame_number == 0 && delta0 >= 0.5) {
-            av_log(ost, AV_LOG_DEBUG, "Not duplicating %d initial frames\n", (int)lrintf(delta0));
+            av_log(ofp, AV_LOG_DEBUG, "Not duplicating %d initial frames\n", (int)lrintf(delta0));
             delta = duration;
             delta0 = 0;
             ofp->next_pts = llrint(sync_ipts);
@@ -2145,23 +2293,23 @@ finish:
 
     if (*nb_frames_prev == 0 && fps->last_dropped) {
         atomic_fetch_add(&ofilter->nb_frames_drop, 1);
-        av_log(ost, AV_LOG_VERBOSE,
+        av_log(ofp, AV_LOG_VERBOSE,
                "*** dropping frame %"PRId64" at ts %"PRId64"\n",
                fps->frame_number, fps->last_frame->pts);
     }
     if (*nb_frames > (*nb_frames_prev && fps->last_dropped) + (*nb_frames > *nb_frames_prev)) {
         uint64_t nb_frames_dup;
         if (*nb_frames > dts_error_threshold * 30) {
-            av_log(ost, AV_LOG_ERROR, "%"PRId64" frame duplication too large, skipping\n", *nb_frames - 1);
+            av_log(ofp, AV_LOG_ERROR, "%"PRId64" frame duplication too large, skipping\n", *nb_frames - 1);
             atomic_fetch_add(&ofilter->nb_frames_drop, 1);
             *nb_frames = 0;
             return;
         }
         nb_frames_dup = atomic_fetch_add(&ofilter->nb_frames_dup,
                                          *nb_frames - (*nb_frames_prev && fps->last_dropped) - (*nb_frames > *nb_frames_prev));
-        av_log(ost, AV_LOG_VERBOSE, "*** %"PRId64" dup!\n", *nb_frames - 1);
+        av_log(ofp, AV_LOG_VERBOSE, "*** %"PRId64" dup!\n", *nb_frames - 1);
         if (nb_frames_dup > fps->dup_warning) {
-            av_log(ost, AV_LOG_WARNING, "More than %"PRIu64" frames duplicated\n", fps->dup_warning);
+            av_log(ofp, AV_LOG_WARNING, "More than %"PRIu64" frames duplicated\n", fps->dup_warning);
             fps->dup_warning *= 10;
         }
     }
@@ -2203,7 +2351,7 @@ static int close_output(OutputFilterPriv *ofp, FilterGraphThread *fgt)
 
         av_assert0(!frame->buf[0]);
 
-        av_log(ofp->ofilter.ost, AV_LOG_WARNING,
+        av_log(ofp, AV_LOG_WARNING,
                "No filtered frames for output stream, trying to "
                "initialize anyway.\n");
 
@@ -2307,7 +2455,6 @@ static int fg_output_step(OutputFilterPriv *ofp, FilterGraphThread *fgt,
                           AVFrame *frame)
 {
     FilterGraphPriv    *fgp = fgp_from_fg(ofp->ofilter.graph);
-    OutputStream       *ost = ofp->ofilter.ost;
     AVFilterContext *filter = ofp->filter;
     FrameData *fd;
     int ret;
@@ -2320,7 +2467,7 @@ static int fg_output_step(OutputFilterPriv *ofp, FilterGraphThread *fgt,
     } else if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
         return 1;
     } else if (ret < 0) {
-        av_log(fgp, AV_LOG_WARNING,
+        av_log(ofp, AV_LOG_WARNING,
                "Error in retrieving a frame from the filtergraph: %s\n",
                av_err2str(ret));
         return ret;
@@ -2334,7 +2481,7 @@ static int fg_output_step(OutputFilterPriv *ofp, FilterGraphThread *fgt,
     frame->time_base = av_buffersink_get_time_base(filter);
 
     if (debug_ts)
-        av_log(fgp, AV_LOG_INFO, "filter_raw -> pts:%s pts_time:%s time_base:%d/%d\n",
+        av_log(ofp, AV_LOG_INFO, "filter_raw -> pts:%s pts_time:%s time_base:%d/%d\n",
                av_ts2str(frame->pts), av_ts2timestr(frame->pts, &frame->time_base),
                          frame->time_base.num, frame->time_base.den);
 
@@ -2342,7 +2489,7 @@ static int fg_output_step(OutputFilterPriv *ofp, FilterGraphThread *fgt,
     if (!ofp->tb_out_locked) {
         ret = choose_out_timebase(ofp, frame);
         if (ret < 0) {
-            av_log(ost, AV_LOG_ERROR, "Could not choose an output time base\n");
+            av_log(ofp, AV_LOG_ERROR, "Could not choose an output time base\n");
             av_frame_unref(frame);
             return ret;
         }
@@ -2361,7 +2508,7 @@ static int fg_output_step(OutputFilterPriv *ofp, FilterGraphThread *fgt,
     if (!fgp->is_meta)
         fd->bits_per_raw_sample = 0;
 
-    if (ost->type == AVMEDIA_TYPE_VIDEO) {
+    if (ofp->ofilter.type == AVMEDIA_TYPE_VIDEO) {
         if (!frame->duration) {
             AVRational fr = av_buffersink_get_frame_rate(filter);
             if (fr.num > 0 && fr.den > 0)
@@ -2708,10 +2855,10 @@ static void fg_thread_set_name(const FilterGraph *fg)
 {
     char name[16];
     if (filtergraph_is_simple(fg)) {
-        OutputStream *ost = fg->outputs[0]->ost;
-        snprintf(name, sizeof(name), "%cf#%d:%d",
-                 av_get_media_type_string(ost->type)[0],
-                 ost->file->index, ost->index);
+        OutputFilterPriv *ofp = ofp_from_ofilter(fg->outputs[0]);
+        snprintf(name, sizeof(name), "%cf%s",
+                 av_get_media_type_string(ofp->ofilter.type)[0],
+                 ofp->name);
     } else {
         snprintf(name, sizeof(name), "fc%d", fg->index);
     }
diff --git a/fftools/ffmpeg_mux.c b/fftools/ffmpeg_mux.c
index e8e5c677b8..a1583edd61 100644
--- a/fftools/ffmpeg_mux.c
+++ b/fftools/ffmpeg_mux.c
@@ -140,7 +140,7 @@ static int mux_fixup_ts(Muxer *mux, MuxStream *ms, AVPacket *pkt)
     OutputStream *ost = &ms->ost;
 
 #if FFMPEG_OPT_VSYNC_DROP
-    if (ost->type == AVMEDIA_TYPE_VIDEO && ost->vsync_method == VSYNC_DROP)
+    if (ost->type == AVMEDIA_TYPE_VIDEO && ms->ts_drop)
         pkt->pts = pkt->dts = AV_NOPTS_VALUE;
 #endif
 
@@ -370,10 +370,11 @@ fail:
     return ret;
 }
 
-static void thread_set_name(OutputFile *of)
+static void thread_set_name(Muxer *mux)
 {
     char name[16];
-    snprintf(name, sizeof(name), "mux%d:%s", of->index, of->format->name);
+    snprintf(name, sizeof(name), "mux%d:%s",
+             mux->of.index, mux->fc->oformat->name);
     ff_thread_setname(name);
 }
 
@@ -417,7 +418,7 @@ int muxer_thread(void *arg)
     if (ret < 0)
         goto finish;
 
-    thread_set_name(of);
+    thread_set_name(mux);
 
     while (1) {
         OutputStream *ost;
@@ -515,8 +516,10 @@ int print_sdp(const char *filename)
     if (!avc)
         return AVERROR(ENOMEM);
     for (int i = 0; i < nb_output_files; i++) {
-        if (!strcmp(output_files[i]->format->name, "rtp")) {
-            avc[j] = mux_from_of(output_files[i])->fc;
+        Muxer *mux = mux_from_of(output_files[i]);
+
+        if (!strcmp(mux->fc->oformat->name, "rtp")) {
+            avc[j] = mux->fc;
             j++;
         }
     }
@@ -756,7 +759,7 @@ int of_write_trailer(OutputFile *of)
 
     mux->last_filesize = filesize(fc->pb);
 
-    if (!(of->format->flags & AVFMT_NOFILE)) {
+    if (!(fc->oformat->flags & AVFMT_NOFILE)) {
         ret = avio_closep(&fc->pb);
         if (ret < 0) {
             av_log(mux, AV_LOG_ERROR, "Error closing file: %s\n", av_err2str(ret));
@@ -794,6 +797,7 @@ static void ost_free(OutputStream **post)
     ms = ms_from_ost(ost);
 
     enc_free(&ost->enc);
+    fg_free(&ost->fg_simple);
 
     if (ost->logfile) {
         if (fclose(ost->logfile))
@@ -815,13 +819,9 @@ static void ost_free(OutputStream **post)
     av_expr_free(ost->kf.pexpr);
 
     av_freep(&ost->logfile_prefix);
-    av_freep(&ost->apad);
 
     av_freep(&ost->attachment_filename);
 
-    av_dict_free(&ost->sws_dict);
-    av_dict_free(&ost->swr_opts);
-
     if (ost->enc_ctx)
         av_freep(&ost->enc_ctx->stats_in);
     avcodec_free_context(&ost->enc_ctx);
diff --git a/fftools/ffmpeg_mux.h b/fftools/ffmpeg_mux.h
index 16af6d38ba..1e9ea35412 100644
--- a/fftools/ffmpeg_mux.h
+++ b/fftools/ffmpeg_mux.h
@@ -75,6 +75,11 @@ typedef struct MuxStream {
     int             copy_initial_nonkeyframes;
     int             copy_prior_start;
     int             streamcopy_started;
+#if FFMPEG_OPT_VSYNC_DROP
+    int             ts_drop;
+#endif
+
+    char           *apad;
 } MuxStream;
 
 typedef struct Muxer {
diff --git a/fftools/ffmpeg_mux_init.c b/fftools/ffmpeg_mux_init.c
index d3d7d022ff..8797265145 100644
--- a/fftools/ffmpeg_mux_init.c
+++ b/fftools/ffmpeg_mux_init.c
@@ -580,8 +580,10 @@ static enum AVPixelFormat pix_fmt_parse(OutputStream *ost, const char *name)
 }
 
 static int new_stream_video(Muxer *mux, const OptionsContext *o,
-                            OutputStream *ost)
+                            OutputStream *ost, int *keep_pix_fmt,
+                            enum VideoSyncMethod *vsync_method)
 {
+    MuxStream       *ms = ms_from_ost(ost);
     AVFormatContext *oc = mux->fc;
     AVStream *st;
     char *frame_rate = NULL, *max_frame_rate = NULL, *frame_aspect_ratio = NULL;
@@ -638,7 +640,7 @@ static int new_stream_video(Muxer *mux, const OptionsContext *o,
 
         MATCH_PER_STREAM_OPT(frame_pix_fmts, str, frame_pix_fmt, oc, st);
         if (frame_pix_fmt && *frame_pix_fmt == '+') {
-            ost->keep_pix_fmt = 1;
+            *keep_pix_fmt = 1;
             if (!*++frame_pix_fmt)
                 frame_pix_fmt = NULL;
         }
@@ -773,49 +775,51 @@ static int new_stream_video(Muxer *mux, const OptionsContext *o,
 #endif
 
 #if FFMPEG_OPT_VSYNC
-        ost->vsync_method = video_sync_method;
+        *vsync_method = video_sync_method;
 #else
-        ost->vsync_method = VSYNC_AUTO;
+        *vsync_method = VSYNC_AUTO;
 #endif
         MATCH_PER_STREAM_OPT(fps_mode, str, fps_mode, oc, st);
         if (fps_mode) {
-            ret = parse_and_set_vsync(fps_mode, &ost->vsync_method, ost->file->index, ost->index, 0);
+            ret = parse_and_set_vsync(fps_mode, vsync_method, ost->file->index, ost->index, 0);
             if (ret < 0)
                 return ret;
         }
 
         if ((ost->frame_rate.num || ost->max_frame_rate.num) &&
-            !(ost->vsync_method == VSYNC_AUTO ||
-              ost->vsync_method == VSYNC_CFR || ost->vsync_method == VSYNC_VSCFR)) {
+            !(*vsync_method == VSYNC_AUTO ||
+              *vsync_method == VSYNC_CFR || *vsync_method == VSYNC_VSCFR)) {
             av_log(ost, AV_LOG_FATAL, "One of -r/-fpsmax was specified "
                    "together a non-CFR -vsync/-fps_mode. This is contradictory.\n");
             return AVERROR(EINVAL);
         }
 
-        if (ost->vsync_method == VSYNC_AUTO) {
+        if (*vsync_method == VSYNC_AUTO) {
             if (ost->frame_rate.num || ost->max_frame_rate.num) {
-                ost->vsync_method = VSYNC_CFR;
+                *vsync_method = VSYNC_CFR;
             } else if (!strcmp(oc->oformat->name, "avi")) {
-                ost->vsync_method = VSYNC_VFR;
+                *vsync_method = VSYNC_VFR;
             } else {
-                ost->vsync_method = (oc->oformat->flags & AVFMT_VARIABLE_FPS)       ?
-                                     ((oc->oformat->flags & AVFMT_NOTIMESTAMPS) ?
-                                      VSYNC_PASSTHROUGH : VSYNC_VFR)                :
-                                     VSYNC_CFR;
+                *vsync_method = (oc->oformat->flags & AVFMT_VARIABLE_FPS)  ?
+                                ((oc->oformat->flags & AVFMT_NOTIMESTAMPS) ?
+                                VSYNC_PASSTHROUGH : VSYNC_VFR) : VSYNC_CFR;
             }
 
-            if (ost->ist && ost->vsync_method == VSYNC_CFR) {
+            if (ost->ist && *vsync_method == VSYNC_CFR) {
                 const InputFile *ifile = ost->ist->file;
 
                 if (ifile->nb_streams == 1 && ifile->input_ts_offset == 0)
-                    ost->vsync_method = VSYNC_VSCFR;
+                    *vsync_method = VSYNC_VSCFR;
             }
 
-            if (ost->vsync_method == VSYNC_CFR && copy_ts) {
-                ost->vsync_method = VSYNC_VSCFR;
+            if (*vsync_method == VSYNC_CFR && copy_ts) {
+                *vsync_method = VSYNC_VSCFR;
             }
         }
-        ost->is_cfr = (ost->vsync_method == VSYNC_CFR || ost->vsync_method == VSYNC_VSCFR);
+#if FFMPEG_OPT_VSYNC_DROP
+        if (*vsync_method == VSYNC_DROP)
+            ms->ts_drop = 1;
+#endif
     }
 
     return 0;
@@ -824,6 +828,7 @@ static int new_stream_video(Muxer *mux, const OptionsContext *o,
 static int new_stream_audio(Muxer *mux, const OptionsContext *o,
                             OutputStream *ost)
 {
+    MuxStream *ms = ms_from_ost(ost);
     AVFormatContext *oc = mux->fc;
     AVStream *st = ost->st;
 
@@ -832,7 +837,6 @@ static int new_stream_audio(Muxer *mux, const OptionsContext *o,
         int channels = 0;
         char *layout = NULL;
         char *sample_fmt = NULL;
-        const char *apad = NULL;
 
         MATCH_PER_STREAM_OPT(audio_channels, i, channels, oc, st);
         if (channels) {
@@ -855,12 +859,7 @@ static int new_stream_audio(Muxer *mux, const OptionsContext *o,
 
         MATCH_PER_STREAM_OPT(audio_sample_rate, i, audio_enc->sample_rate, oc, st);
 
-        MATCH_PER_STREAM_OPT(apad, str, apad, oc, st);
-        if (apad) {
-            ost->apad = av_strdup(apad);
-            if (!ost->apad)
-                return AVERROR(ENOMEM);
-        }
+        MATCH_PER_STREAM_OPT(apad, str, ms->apad, oc, st);
     }
 
     return 0;
@@ -1041,7 +1040,9 @@ static int ost_add(Muxer *mux, const OptionsContext *o, enum AVMediaType type,
     OutputStream *ost;
     const AVCodec *enc;
     AVStream *st;
-    int ret = 0;
+    int ret = 0, keep_pix_fmt = 0, autoscale = 1;
+    AVRational enc_tb = { 0, 0 };
+    enum VideoSyncMethod vsync_method = VSYNC_AUTO;
     const char *bsfs = NULL, *time_base = NULL;
     char *filters = NULL, *next, *codec_tag = NULL;
     double qscale = -1;
@@ -1164,8 +1165,8 @@ static int ost_add(Muxer *mux, const OptionsContext *o, enum AVMediaType type,
             return ret;
 
         MATCH_PER_STREAM_OPT(presets, str, preset, oc, st);
-        ost->autoscale = 1;
-        MATCH_PER_STREAM_OPT(autoscale, i, ost->autoscale, oc, st);
+
+        MATCH_PER_STREAM_OPT(autoscale, i, autoscale, oc, st);
         if (preset && (!(ret = get_preset_file_2(preset, enc->codec->name, &s)))) {
             AVBPrint bprint;
             av_bprint_init(&bprint, 0, AV_BPRINT_SIZE_UNLIMITED);
@@ -1233,8 +1234,12 @@ static int ost_add(Muxer *mux, const OptionsContext *o, enum AVMediaType type,
         }
 
         MATCH_PER_STREAM_OPT(enc_time_bases, str, enc_time_base, oc, st);
-        if (enc_time_base) {
+        if (enc_time_base && type == AVMEDIA_TYPE_SUBTITLE)
+            av_log(ost, AV_LOG_WARNING,
+                   "-enc_time_base not supported for subtitles, ignoring\n");
+        else if (enc_time_base) {
             AVRational q;
+
             if (!strcmp(enc_time_base, "demux")) {
                 q = (AVRational){ ENC_TIME_BASE_DEMUX, 0 };
             } else if (!strcmp(enc_time_base, "filter")) {
@@ -1256,7 +1261,7 @@ static int ost_add(Muxer *mux, const OptionsContext *o, enum AVMediaType type,
 #endif
             }
 
-            ost->enc_timebase = q;
+            enc_tb = q;
         }
     } else {
         ret = filter_codec_opts(o->g->codec_opts, AV_CODEC_ID_NONE, oc, st,
@@ -1346,17 +1351,11 @@ static int ost_add(Muxer *mux, const OptionsContext *o, enum AVMediaType type,
     if (oc->oformat->flags & AVFMT_GLOBALHEADER && ost->enc_ctx)
         ost->enc_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
 
-    av_dict_copy(&ost->sws_dict, o->g->sws_dict, 0);
-
-    av_dict_copy(&ost->swr_opts, o->g->swr_opts, 0);
-    if (ost->enc_ctx && av_get_exact_bits_per_sample(ost->enc_ctx->codec_id) == 24)
-        av_dict_set(&ost->swr_opts, "output_sample_bits", "24", 0);
-
     MATCH_PER_STREAM_OPT(copy_initial_nonkeyframes, i,
                          ms->copy_initial_nonkeyframes, oc, st);
 
     switch (type) {
-    case AVMEDIA_TYPE_VIDEO:      ret = new_stream_video     (mux, o, ost); break;
+    case AVMEDIA_TYPE_VIDEO:      ret = new_stream_video     (mux, o, ost, &keep_pix_fmt, &vsync_method); break;
     case AVMEDIA_TYPE_AUDIO:      ret = new_stream_audio     (mux, o, ost); break;
     case AVMEDIA_TYPE_SUBTITLE:   ret = new_stream_subtitle  (mux, o, ost); break;
     }
@@ -1371,14 +1370,68 @@ static int ost_add(Muxer *mux, const OptionsContext *o, enum AVMediaType type,
 
     if (ost->enc &&
         (type == AVMEDIA_TYPE_VIDEO || type == AVMEDIA_TYPE_AUDIO)) {
+        const AVDictionaryEntry *e;
+        char name[16];
+        OutputFilterOptions opts = {
+            .enc = enc,
+            .name        = name,
+            .format      = (type == AVMEDIA_TYPE_VIDEO) ?
+                           ost->enc_ctx->pix_fmt : ost->enc_ctx->sample_fmt,
+            .width       = ost->enc_ctx->width,
+            .height      = ost->enc_ctx->height,
+            .vsync_method = vsync_method,
+            .sample_rate = ost->enc_ctx->sample_rate,
+            .ch_layout   = ost->enc_ctx->ch_layout,
+            .sws_opts    = o->g->sws_dict,
+            .swr_opts    = o->g->swr_opts,
+            .output_tb = enc_tb,
+            .trim_start_us    = mux->of.start_time,
+            .trim_duration_us = mux->of.recording_time,
+            .ts_offset = mux->of.start_time == AV_NOPTS_VALUE ?
+                         0 : mux->of.start_time,
+            .flags = OFILTER_FLAG_DISABLE_CONVERT * !!keep_pix_fmt |
+                     OFILTER_FLAG_AUTOSCALE       * !!autoscale    |
+                     OFILTER_FLAG_AUDIO_24BIT * !!(av_get_exact_bits_per_sample(ost->enc_ctx->codec_id) == 24),
+        };
+
+        snprintf(name, sizeof(name), "#%d:%d", mux->of.index, ost->index);
+
+        e = av_dict_get(ost->encoder_opts, "threads", NULL, 0);
+        if (e)
+            opts.nb_threads = e->value;
+
+        // MJPEG encoder exports a full list of supported pixel formats,
+        // but the full-range ones are experimental-only.
+        // Restrict the auto-conversion list unless -strict experimental
+        // has been specified.
+        if (!strcmp(enc->name, "mjpeg")) {
+            // FIXME: YUV420P etc. are actually supported with full color range,
+            // yet the latter information isn't available here.
+            static const enum AVPixelFormat mjpeg_formats[] =
+                { AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ444P,
+                  AV_PIX_FMT_NONE };
+
+            const AVDictionaryEntry *strict = av_dict_get(ost->encoder_opts, "strict", NULL, 0);
+            int strict_val = ost->enc_ctx->strict_std_compliance;
+
+            if (strict) {
+                const AVOption *o = av_opt_find(ost->enc_ctx, strict->key, NULL, 0, 0);
+                av_assert0(o);
+                av_opt_eval_int(ost->enc_ctx, o, strict->value, &strict_val);
+            }
+
+            if (strict_val > FF_COMPLIANCE_UNOFFICIAL)
+                opts.pix_fmts = mjpeg_formats;
+        }
+
         if (ofilter) {
             ost->filter       = ofilter;
-            ret = ofilter_bind_ost(ofilter, ost, ms->sch_idx_enc);
+            ret = ofilter_bind_ost(ofilter, ost, ms->sch_idx_enc, &opts);
             if (ret < 0)
                 return ret;
         } else {
             ret = init_simple_filtergraph(ost->ist, ost, filters,
-                                          mux->sch, ms->sch_idx_enc);
+                                          mux->sch, ms->sch_idx_enc, &opts);
             if (ret < 0) {
                 av_log(ost, AV_LOG_ERROR,
                        "Error initializing a simple filtergraph\n");
@@ -1774,7 +1827,7 @@ static int create_streams(Muxer *mux, const OptionsContext *o)
         for (int j = 0; j < fg->nb_outputs; j++) {
             OutputFilter *ofilter = fg->outputs[j];
 
-            if (ofilter->linklabel || ofilter->ost)
+            if (ofilter->linklabel || ofilter->bound)
                 continue;
 
             auto_disable |= 1 << ofilter->type;
@@ -1837,6 +1890,33 @@ static int create_streams(Muxer *mux, const OptionsContext *o)
         }
     }
 
+    // handle -apad
+    if (o->shortest) {
+        int have_video = 0;
+
+        for (unsigned i = 0; i < mux->of.nb_streams; i++)
+            if (mux->of.streams[i]->type == AVMEDIA_TYPE_VIDEO) {
+                have_video = 1;
+                break;
+            }
+
+        for (unsigned i = 0; have_video && i < mux->of.nb_streams; i++) {
+            MuxStream         *ms = ms_from_ost(mux->of.streams[i]);
+            OutputFilter *ofilter = ms->ost.filter;
+
+            if (ms->ost.type != AVMEDIA_TYPE_AUDIO || !ms->apad || !ofilter)
+                continue;
+
+            ofilter->apad = av_strdup(ms->apad);
+            if (!ofilter->apad)
+                return AVERROR(ENOMEM);
+        }
+    }
+    for (unsigned i = 0; i < mux->of.nb_streams; i++) {
+        MuxStream *ms = ms_from_ost(mux->of.streams[i]);
+        ms->apad = NULL;
+    }
+
     if (!oc->nb_streams && !(oc->oformat->flags & AVFMT_NOSTREAMS)) {
         av_dump_format(oc, nb_output_files - 1, oc->url, 1);
         av_log(mux, AV_LOG_ERROR, "Output file does not contain any stream\n");
@@ -1846,7 +1926,8 @@ static int create_streams(Muxer *mux, const OptionsContext *o)
     return 0;
 }
 
-static int setup_sync_queues(Muxer *mux, AVFormatContext *oc, int64_t buf_size_us)
+static int setup_sync_queues(Muxer *mux, AVFormatContext *oc,
+                             int64_t buf_size_us, int shortest)
 {
     OutputFile *of = &mux->of;
     int nb_av_enc = 0, nb_audio_fs = 0, nb_interleaved = 0;
@@ -1872,7 +1953,7 @@ static int setup_sync_queues(Muxer *mux, AVFormatContext *oc, int64_t buf_size_u
         limit_frames_av_enc |= (ms->max_frames < INT64_MAX) && IS_AV_ENC(ost, type);
     }
 
-    if (!((nb_interleaved > 1 && of->shortest) ||
+    if (!((nb_interleaved > 1 && shortest) ||
           (nb_interleaved > 0 && limit_frames) ||
           nb_audio_fs))
         return 0;
@@ -1888,7 +1969,7 @@ static int setup_sync_queues(Muxer *mux, AVFormatContext *oc, int64_t buf_size_u
      * different encoders run in different threads and need external
      * synchronization, while muxer sync queues can be handled inside the muxer
      */
-    if ((of->shortest && nb_av_enc > 1) || limit_frames_av_enc || nb_audio_fs) {
+    if ((shortest && nb_av_enc > 1) || limit_frames_av_enc || nb_audio_fs) {
         int sq_idx, ret;
 
         sq_idx = sch_add_sq_enc(mux->sch, buf_size_us, mux);
@@ -1904,7 +1985,7 @@ static int setup_sync_queues(Muxer *mux, AVFormatContext *oc, int64_t buf_size_u
                 continue;
 
             ret = sch_sq_add_enc(mux->sch, sq_idx, ms->sch_idx_enc,
-                                 of->shortest || ms->max_frames < INT64_MAX,
+                                 shortest || ms->max_frames < INT64_MAX,
                                  ms->max_frames);
             if (ret < 0)
                 return ret;
@@ -1931,7 +2012,7 @@ static int setup_sync_queues(Muxer *mux, AVFormatContext *oc, int64_t buf_size_u
                 continue;
 
             ms->sq_idx_mux = sq_add_stream(mux->sq_mux,
-                                           of->shortest || ms->max_frames < INT64_MAX);
+                                           shortest || ms->max_frames < INT64_MAX);
             if (ms->sq_idx_mux < 0)
                 return ms->sq_idx_mux;
 
@@ -2151,11 +2232,160 @@ fail:
     return ret;
 }
 
+static int of_serialize_options(Muxer *mux, void *obj, AVBPrint *bp)
+{
+    char *ptr;
+    int ret;
+
+    ret = av_opt_serialize(obj, 0, AV_OPT_SERIALIZE_SKIP_DEFAULTS | AV_OPT_SERIALIZE_SEARCH_CHILDREN,
+                           &ptr, '=', ':');
+    if (ret < 0) {
+        av_log(mux, AV_LOG_ERROR, "Failed to serialize group\n");
+        return ret;
+    }
+
+    av_bprintf(bp, "%s", ptr);
+    ret = strlen(ptr);
+    av_free(ptr);
+
+    return ret;
+}
+
+#define SERIALIZE(parent, child) do {                   \
+    ret = of_serialize_options(mux, parent->child, bp); \
+    if (ret < 0)                                        \
+        return ret;                                     \
+} while (0)
+
+#define SERIALIZE_LOOP_SUBBLOCK(obj) do {                                \
+    for (int k = 0; k < obj->nb_subblocks; k++) {                        \
+        ret = of_serialize_options(mux,                                  \
+                  av_iamf_param_definition_get_subblock(obj, k), bp);    \
+        if (ret < 0)                                                     \
+            return ret;                                                  \
+    }                                                                    \
+} while (0)
+
+#define SERIALIZE_LOOP(parent, child, suffix, separator) do {            \
+    for (int j = 0; j < parent->nb_## child ## suffix; j++) {            \
+        av_bprintf(bp, separator#child "=");                             \
+        SERIALIZE(parent, child ## suffix[j]);                           \
+    }                                                                    \
+} while (0)
+
+static int64_t get_stream_group_index_from_id(Muxer *mux, int64_t id)
+{
+    AVFormatContext *oc = mux->fc;
+
+    for (unsigned i = 0; i < oc->nb_stream_groups; i++)
+        if (oc->stream_groups[i]->id == id)
+            return oc->stream_groups[i]->index;
+
+    return AVERROR(EINVAL);
+}
+
+static int of_map_group(Muxer *mux, AVDictionary **dict, AVBPrint *bp, const char *map)
+{
+    AVStreamGroup *stg;
+    int ret, file_idx, stream_idx;
+    char *ptr;
+
+    file_idx = strtol(map, &ptr, 0);
+    if (file_idx >= nb_input_files || file_idx < 0 || map == ptr) {
+        av_log(mux, AV_LOG_ERROR, "Invalid input file index: %d.\n", file_idx);
+        return AVERROR(EINVAL);
+    }
+
+    stream_idx = strtol(*ptr == '=' ? ptr + 1 : ptr, &ptr, 0);
+    if (*ptr || stream_idx >= input_files[file_idx]->ctx->nb_stream_groups || stream_idx < 0) {
+        av_log(mux, AV_LOG_ERROR, "Invalid input stream group index: %d.\n", stream_idx);
+        return AVERROR(EINVAL);
+    }
+
+    stg = input_files[file_idx]->ctx->stream_groups[stream_idx];
+    ret = of_serialize_options(mux, stg, bp);
+    if (ret < 0)
+       return ret;
+
+    ret = av_dict_parse_string(dict, bp->str, "=", ":", 0);
+    if (ret < 0)
+        av_log(mux, AV_LOG_ERROR, "Error parsing mapped group specification %s\n", ptr);
+    av_dict_set_int(dict, "type", stg->type, 0);
+
+    av_bprint_clear(bp);
+    switch(stg->type) {
+    case AV_STREAM_GROUP_PARAMS_IAMF_AUDIO_ELEMENT: {
+        AVIAMFAudioElement *audio_element = stg->params.iamf_audio_element;
+
+        if (audio_element->demixing_info) {
+            AVIAMFParamDefinition *demixing_info = audio_element->demixing_info;
+            av_bprintf(bp, ",demixing=");
+            SERIALIZE(audio_element, demixing_info);
+            if (ret && demixing_info->nb_subblocks)
+                av_bprintf(bp, ":");
+            SERIALIZE_LOOP_SUBBLOCK(demixing_info);
+        }
+        if (audio_element->recon_gain_info) {
+            AVIAMFParamDefinition *recon_gain_info = audio_element->recon_gain_info;
+            av_bprintf(bp, ",recon_gain=");
+            SERIALIZE(audio_element, recon_gain_info);
+            if (ret && recon_gain_info->nb_subblocks)
+                av_bprintf(bp, ":");
+            SERIALIZE_LOOP_SUBBLOCK(recon_gain_info);
+        }
+        SERIALIZE_LOOP(audio_element, layer, s, ",");
+        break;
+    }
+    case AV_STREAM_GROUP_PARAMS_IAMF_MIX_PRESENTATION: {
+        AVIAMFMixPresentation *mix = stg->params.iamf_mix_presentation;
+
+        for (int i = 0; i < mix->nb_submixes; i++) {
+            AVIAMFSubmix *submix = mix->submixes[i];
+            AVIAMFParamDefinition *output_mix_config = submix->output_mix_config;
+
+            av_bprintf(bp, ",submix=");
+            SERIALIZE(mix, submixes[i]);
+            if (ret && output_mix_config->nb_subblocks)
+                av_bprintf(bp, ":");
+            SERIALIZE_LOOP_SUBBLOCK(output_mix_config);
+            for (int j = 0; j < submix->nb_elements; j++) {
+                AVIAMFSubmixElement *element = submix->elements[j];
+                AVIAMFParamDefinition *element_mix_config = element->element_mix_config;
+                int64_t id = get_stream_group_index_from_id(mux, element->audio_element_id);
+
+                if (id < 0) {
+                    av_log(mux, AV_LOG_ERROR, "Invalid or missing stream group index in"
+                                              "submix element");
+                    return id;
+                }
+
+                av_bprintf(bp, "|element=");
+                SERIALIZE(submix, elements[j]);
+                if (ret && element_mix_config->nb_subblocks)
+                    av_bprintf(bp, ":");
+                SERIALIZE_LOOP_SUBBLOCK(element_mix_config);
+                if (ret)
+                    av_bprintf(bp, ":");
+                av_bprintf(bp, "stg=%"PRId64, id);
+            }
+            SERIALIZE_LOOP(submix, layout, s, "|");
+        }
+        break;
+    }
+    default:
+        av_log(mux, AV_LOG_ERROR, "Unsupported mapped group type %d.\n", stg->type);
+        ret = AVERROR(EINVAL);
+        break;
+    }
+    return 0;
+}
+
 static int of_parse_group_token(Muxer *mux, const char *token, char *ptr)
 {
     AVFormatContext *oc = mux->fc;
     AVStreamGroup *stg;
     AVDictionary *dict = NULL, *tmp = NULL;
+    char *mapped_string = NULL;
     const AVDictionaryEntry *e;
     const AVOption opts[] = {
         { "type", "Set group type", offsetof(AVStreamGroup, type), AV_OPT_TYPE_INT,
@@ -2181,8 +2411,31 @@ static int of_parse_group_token(Muxer *mux, const char *token, char *ptr)
         return ret;
     }
 
+    av_dict_copy(&tmp, dict, 0);
+    e = av_dict_get(dict, "map", NULL, 0);
+    if (e) {
+        AVBPrint bp;
+
+        if (ptr) {
+            av_log(mux, AV_LOG_ERROR, "Unexpected extra parameters when mapping a"
+                                      " stream group\n");
+            ret = AVERROR(EINVAL);
+            goto end;
+        }
+
+        av_bprint_init(&bp, 0, AV_BPRINT_SIZE_AUTOMATIC);
+        ret = of_map_group(mux, &tmp, &bp, e->value);
+        if (ret < 0) {
+            av_bprint_finalize(&bp, NULL);
+            goto end;
+        }
+
+        av_bprint_finalize(&bp, &mapped_string);
+        ptr = mapped_string;
+    }
+
     // "type" is not a user settable AVOption in AVStreamGroup, so handle it here
-    e = av_dict_get(dict, "type", NULL, 0);
+    e = av_dict_get(tmp, "type", NULL, 0);
     if (!e) {
         av_log(mux, AV_LOG_ERROR, "No type specified for Stream Group in \"%s\"\n", token);
         ret = AVERROR(EINVAL);
@@ -2197,7 +2450,6 @@ static int of_parse_group_token(Muxer *mux, const char *token, char *ptr)
         goto end;
     }
 
-    av_dict_copy(&tmp, dict, 0);
     stg = avformat_stream_group_create(oc, type, &tmp);
     if (!stg) {
         ret = AVERROR(ENOMEM);
@@ -2250,6 +2502,7 @@ static int of_parse_group_token(Muxer *mux, const char *token, char *ptr)
 
     // make sure that nothing but "st" and "stg" entries are left in the dict
     e = NULL;
+    av_dict_set(&tmp, "map", NULL, 0);
     av_dict_set(&tmp, "type", NULL, 0);
     while (e = av_dict_iterate(tmp, e)) {
         if (!strcmp(e->key, "st") || !strcmp(e->key, "stg"))
@@ -2262,6 +2515,7 @@ static int of_parse_group_token(Muxer *mux, const char *token, char *ptr)
 
     ret = 0;
 end:
+    av_free(mapped_string);
     av_dict_free(&dict);
     av_dict_free(&tmp);
 
@@ -2968,7 +3222,6 @@ int of_open(const OptionsContext *o, const char *filename, Scheduler *sch)
 
     of->recording_time = recording_time;
     of->start_time     = o->start_time;
-    of->shortest       = o->shortest;
 
     mux->limit_filesize    = o->limit_filesize;
     av_dict_copy(&mux->opts, o->g->format_opts, 0);
@@ -2988,7 +3241,6 @@ int of_open(const OptionsContext *o, const char *filename, Scheduler *sch)
     av_strlcat(mux->log_name, oc->oformat->name, sizeof(mux->log_name));
 
 
-    of->format = oc->oformat;
     if (recording_time != INT64_MAX)
         oc->duration = recording_time;
 
@@ -3084,7 +3336,8 @@ int of_open(const OptionsContext *o, const char *filename, Scheduler *sch)
         return err;
     }
 
-    err = setup_sync_queues(mux, oc, o->shortest_buf_duration * AV_TIME_BASE);
+    err = setup_sync_queues(mux, oc, o->shortest_buf_duration * AV_TIME_BASE,
+                            o->shortest);
     if (err < 0) {
         av_log(mux, AV_LOG_FATAL, "Error setting up output sync queues\n");
         return err;
diff --git a/fftools/ffmpeg_opt.c b/fftools/ffmpeg_opt.c
index f764da1ed4..910e4a336b 100644
--- a/fftools/ffmpeg_opt.c
+++ b/fftools/ffmpeg_opt.c
@@ -317,7 +317,7 @@ static int opt_filter_threads(void *optctx, const char *opt, const char *arg)
 static int opt_abort_on(void *optctx, const char *opt, const char *arg)
 {
     static const AVOption opts[] = {
-        { "abort_on"           , NULL, 0, AV_OPT_TYPE_FLAGS, { .i64 = 0 }, INT64_MIN, INT64_MAX,           .unit = "flags" },
+        { "abort_on"           , NULL, 0, AV_OPT_TYPE_FLAGS, { .i64 = 0 }, INT64_MIN, (double)INT64_MAX,   .unit = "flags" },
         { "empty_output"       , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = ABORT_ON_FLAG_EMPTY_OUTPUT        }, .unit = "flags" },
         { "empty_output_stream", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = ABORT_ON_FLAG_EMPTY_OUTPUT_STREAM }, .unit = "flags" },
         { NULL },
@@ -1264,12 +1264,10 @@ int ffmpeg_parse_options(int argc, char **argv, Scheduler *sch)
     }
 
     // bind unbound filtegraph inputs/outputs and check consistency
-    for (int i = 0; i < nb_filtergraphs; i++) {
-        ret = fg_finalise_bindings(filtergraphs[i]);
-        if (ret < 0) {
-            errmsg = "binding filtergraph inputs/outputs";
-            goto fail;
-        }
+    ret = fg_finalise_bindings();
+    if (ret < 0) {
+        errmsg = "binding filtergraph inputs/outputs";
+        goto fail;
     }
 
     correct_input_start_times();
diff --git a/fftools/ffmpeg_sched.c b/fftools/ffmpeg_sched.c
index f8485db30b..e58b00ea97 100644
--- a/fftools/ffmpeg_sched.c
+++ b/fftools/ffmpeg_sched.c
@@ -983,20 +983,40 @@ int sch_connect(Scheduler *sch, SchedulerNode src, SchedulerNode dst)
         }
     case SCH_NODE_TYPE_FILTER_OUT: {
         SchFilterOut *fo;
-        SchEnc      *enc;
 
         av_assert0(src.idx < sch->nb_filters &&
                    src.idx_stream < sch->filters[src.idx].nb_outputs);
-        // filtered frames go to encoding
-        av_assert0(dst.type == SCH_NODE_TYPE_ENC &&
-                   dst.idx < sch->nb_enc);
+        fo = &sch->filters[src.idx].outputs[src.idx_stream];
 
-        fo  = &sch->filters[src.idx].outputs[src.idx_stream];
-        enc = &sch->enc[dst.idx];
+        av_assert0(!fo->dst.type);
+        fo->dst = dst;
+
+        // filtered frames go to encoding or another filtergraph
+        switch (dst.type) {
+        case SCH_NODE_TYPE_ENC: {
+            SchEnc *enc;
+
+            av_assert0(dst.idx < sch->nb_enc);
+            enc = &sch->enc[dst.idx];
+
+            av_assert0(!enc->src.type);
+            enc->src = src;
+            break;
+            }
+        case SCH_NODE_TYPE_FILTER_IN: {
+            SchFilterIn *fi;
+
+            av_assert0(dst.idx < sch->nb_filters &&
+                       dst.idx_stream < sch->filters[dst.idx].nb_inputs);
+            fi = &sch->filters[dst.idx].inputs[dst.idx_stream];
+
+            av_assert0(!fi->src.type);
+            fi->src = src;
+            break;
+            }
+        default: av_assert0(0);
+        }
 
-        av_assert0(!fo->dst.type && !enc->src.type);
-        fo->dst  = dst;
-        enc->src = src;
 
         break;
         }
@@ -1351,24 +1371,13 @@ static int check_acyclic(Scheduler *sch)
         goto fail;
     }
 
-    // trace the transcoding graph upstream from every output stream
-    // fed by a filtergraph
-    for (unsigned i = 0; i < sch->nb_mux; i++) {
-        SchMux *mux = &sch->mux[i];
-
-        for (unsigned j = 0; j < mux->nb_streams; j++) {
-            SchMuxStream  *ms = &mux->streams[j];
-            SchedulerNode src = ms->src_sched;
-
-            if (src.type != SCH_NODE_TYPE_FILTER_OUT)
-                continue;
-            src.idx_stream = 0;
-
-            ret = check_acyclic_for_output(sch, src, filters_visited, filters_stack);
-            if (ret < 0) {
-                av_log(mux, AV_LOG_ERROR, "Transcoding graph has a cycle\n");
-                goto fail;
-            }
+    // trace the transcoding graph upstream from every filtegraph
+    for (unsigned i = 0; i < sch->nb_filters; i++) {
+        ret = check_acyclic_for_output(sch, (SchedulerNode){ .idx = i },
+                                       filters_visited, filters_stack);
+        if (ret < 0) {
+            av_log(&sch->filters[i], AV_LOG_ERROR, "Transcoding graph has a cycle\n");
+            goto fail;
         }
     }
 
@@ -1484,13 +1493,18 @@ static int start_prepare(Scheduler *sch)
                        "Filtergraph input %u not connected to a source\n", j);
                 return AVERROR(EINVAL);
             }
-            av_assert0(fi->src.type == SCH_NODE_TYPE_DEC);
-            dec = &sch->dec[fi->src.idx];
 
-            switch (dec->src.type) {
-            case SCH_NODE_TYPE_DEMUX: fi->src_sched = dec->src;                   break;
-            case SCH_NODE_TYPE_ENC:   fi->src_sched = sch->enc[dec->src.idx].src; break;
-            default: av_assert0(0);
+            if (fi->src.type == SCH_NODE_TYPE_FILTER_OUT)
+                fi->src_sched = fi->src;
+            else {
+                av_assert0(fi->src.type == SCH_NODE_TYPE_DEC);
+                dec = &sch->dec[fi->src.idx];
+
+                switch (dec->src.type) {
+                case SCH_NODE_TYPE_DEMUX: fi->src_sched = dec->src;                   break;
+                case SCH_NODE_TYPE_ENC:   fi->src_sched = sch->enc[dec->src.idx].src; break;
+                default: av_assert0(0);
+                }
             }
         }
 
@@ -2379,12 +2393,17 @@ void sch_filter_receive_finish(Scheduler *sch, unsigned fg_idx, unsigned in_idx)
 int sch_filter_send(Scheduler *sch, unsigned fg_idx, unsigned out_idx, AVFrame *frame)
 {
     SchFilterGraph *fg;
+    SchedulerNode  dst;
 
     av_assert0(fg_idx < sch->nb_filters);
     fg = &sch->filters[fg_idx];
 
     av_assert0(out_idx < fg->nb_outputs);
-    return send_to_enc(sch, &sch->enc[fg->outputs[out_idx].dst.idx], frame);
+    dst = fg->outputs[out_idx].dst;
+
+    return (dst.type == SCH_NODE_TYPE_ENC)                                    ?
+           send_to_enc   (sch, &sch->enc[dst.idx],                     frame) :
+           send_to_filter(sch, &sch->filters[dst.idx], dst.idx_stream, frame);
 }
 
 static int filter_done(Scheduler *sch, unsigned fg_idx)
@@ -2396,8 +2415,11 @@ static int filter_done(Scheduler *sch, unsigned fg_idx)
         tq_receive_finish(fg->queue, i);
 
     for (unsigned i = 0; i < fg->nb_outputs; i++) {
-        SchEnc *enc = &sch->enc[fg->outputs[i].dst.idx];
-        int err = send_to_enc(sch, enc, NULL);
+        SchedulerNode dst = fg->outputs[i].dst;
+        int err = (dst.type == SCH_NODE_TYPE_ENC)                                   ?
+                  send_to_enc   (sch, &sch->enc[dst.idx],                     NULL) :
+                  send_to_filter(sch, &sch->filters[dst.idx], dst.idx_stream, NULL);
+
         if (err < 0 && err != AVERROR_EOF)
             ret = err_merge(ret, err);
     }
diff --git a/fftools/ffmpeg_sched.h b/fftools/ffmpeg_sched.h
index e51c26cec9..7cd839016c 100644
--- a/fftools/ffmpeg_sched.h
+++ b/fftools/ffmpeg_sched.h
@@ -41,7 +41,8 @@
  * - filtergraphs, each containing zero or more inputs (0 in case the
  *   filtergraph contains a lavfi source filter), and one or more outputs; the
  *   inputs and outputs need not have matching media types;
- *   each filtergraph input receives decoded frames from some decoder;
+ *   each filtergraph input receives decoded frames from some decoder or another
+ *   filtergraph output;
  *   filtered frames from each output are sent to some encoder;
  * - encoders, which receive decoded frames from some decoder (subtitles) or
  *   some filtergraph output (audio/video), encode them, and send encoded
@@ -51,6 +52,9 @@
  *   encoder (transcoding); those packets are interleaved and written out by the
  *   muxer.
  *
+ * The structure formed by the above components is a directed acyclic graph
+ * (absence of cycles is checked at startup).
+ *
  * There must be at least one muxer instance, otherwise the transcode produces
  * no output and is meaningless. Otherwise, in a generic transcoding scenario
  * there may be arbitrary number of instances of any of the above components,
diff --git a/fftools/ffplay.c b/fftools/ffplay.c
index d4300d5d46..5a66bfa38d 100644
--- a/fftools/ffplay.c
+++ b/fftools/ffplay.c
@@ -2391,12 +2391,13 @@ static int audio_decode_frame(VideoState *is)
         av_channel_layout_compare(&af->frame->ch_layout, &is->audio_src.ch_layout) ||
         af->frame->sample_rate   != is->audio_src.freq           ||
         (wanted_nb_samples       != af->frame->nb_samples && !is->swr_ctx)) {
+        int ret;
         swr_free(&is->swr_ctx);
-        swr_alloc_set_opts2(&is->swr_ctx,
+        ret = swr_alloc_set_opts2(&is->swr_ctx,
                             &is->audio_tgt.ch_layout, is->audio_tgt.fmt, is->audio_tgt.freq,
                             &af->frame->ch_layout, af->frame->format, af->frame->sample_rate,
                             0, NULL);
-        if (!is->swr_ctx || swr_init(is->swr_ctx) < 0) {
+        if (ret < 0 || swr_init(is->swr_ctx) < 0) {
             av_log(NULL, AV_LOG_ERROR,
                    "Cannot create sample rate converter for conversion of %d Hz %s %d channels to %d Hz %s %d channels!\n",
                     af->frame->sample_rate, av_get_sample_fmt_name(af->frame->format), af->frame->ch_layout.nb_channels,
@@ -2693,7 +2694,7 @@ static int stream_component_open(VideoState *is, int stream_index)
     if ((ret = avcodec_open2(avctx, codec, &opts)) < 0) {
         goto fail;
     }
-    if ((t = av_dict_get(opts, "", NULL, AV_DICT_IGNORE_SUFFIX))) {
+    if ((t = av_dict_iterate(opts, NULL))) {
         av_log(NULL, AV_LOG_ERROR, "Option %s not found.\n", t->key);
         ret =  AVERROR_OPTION_NOT_FOUND;
         goto fail;
@@ -2861,7 +2862,7 @@ static int read_thread(void *arg)
     if (scan_all_pmts_set)
         av_dict_set(&format_opts, "scan_all_pmts", NULL, AV_DICT_MATCH_CASE);
 
-    if ((t = av_dict_get(format_opts, "", NULL, AV_DICT_IGNORE_SUFFIX))) {
+    if ((t = av_dict_iterate(format_opts, NULL))) {
         av_log(NULL, AV_LOG_ERROR, "Option %s not found.\n", t->key);
         ret = AVERROR_OPTION_NOT_FOUND;
         goto fail;
diff --git a/fftools/ffprobe.c b/fftools/ffprobe.c
index 0d4cd0b048..2d38e5dfdc 100644
--- a/fftools/ffprobe.c
+++ b/fftools/ffprobe.c
@@ -3324,8 +3324,8 @@ static int show_stream(WriterContext *w, AVFormatContext *fmt_ctx, int stream_id
         if (sar.num) {
             print_q("sample_aspect_ratio", sar, ':');
             av_reduce(&dar.num, &dar.den,
-                      par->width  * sar.num,
-                      par->height * sar.den,
+                      (int64_t) par->width  * sar.num,
+                      (int64_t) par->height * sar.den,
                       1024*1024);
             print_q("display_aspect_ratio", dar, ':');
         } else {
@@ -3951,7 +3951,7 @@ static int open_input_file(InputFile *ifile, const char *filename,
                 exit(1);
             }
 
-            if ((t = av_dict_get(opts, "", NULL, AV_DICT_IGNORE_SUFFIX))) {
+            if ((t = av_dict_iterate(opts, NULL))) {
                 av_log(NULL, AV_LOG_ERROR, "Option %s for input stream %d not found\n",
                        t->key, stream->index);
                 return AVERROR_OPTION_NOT_FOUND;
diff --git a/fftools/opt_common.c b/fftools/opt_common.c
index 947a226d8d..9d2d5184a0 100644
--- a/fftools/opt_common.c
+++ b/fftools/opt_common.c
@@ -724,10 +724,13 @@ int show_codecs(void *optctx, const char *opt, const char *arg)
     return 0;
 }
 
-static void print_codecs(int encoder)
+static int print_codecs(int encoder)
 {
     const AVCodecDescriptor **codecs;
-    unsigned i, nb_codecs = get_codecs_sorted(&codecs);
+    int i, nb_codecs = get_codecs_sorted(&codecs);
+
+    if (nb_codecs < 0)
+        return nb_codecs;
 
     printf("%s:\n"
            " V..... = Video\n"
@@ -762,18 +765,17 @@ static void print_codecs(int encoder)
         }
     }
     av_free(codecs);
+    return 0;
 }
 
 int show_decoders(void *optctx, const char *opt, const char *arg)
 {
-    print_codecs(0);
-    return 0;
+    return print_codecs(0);
 }
 
 int show_encoders(void *optctx, const char *opt, const char *arg)
 {
-    print_codecs(1);
-    return 0;
+    return print_codecs(1);
 }
 
 int show_bsfs(void *optctx, const char *opt, const char *arg)
diff --git a/libavcodec/012v.c b/libavcodec/012v.c
index fa5eb0f95e..7bb60219cc 100644
--- a/libavcodec/012v.c
+++ b/libavcodec/012v.c
@@ -64,9 +64,6 @@ static int zero12v_decode_frame(AVCodecContext *avctx, AVFrame *pic,
     if ((ret = ff_get_buffer(avctx, pic, 0)) < 0)
         return ret;
 
-    pic->pict_type = AV_PICTURE_TYPE_I;
-    pic->flags |= AV_FRAME_FLAG_KEY;
-
     line_end = avpkt->data + stride;
     for (line = 0; line < avctx->height; line++) {
         uint16_t y_temp[6] = {0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000};
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 7f6de4470e..2443d2c6fd 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -56,6 +56,7 @@ OBJS = ac3_parser.o                                                     \
        qsv_api.o                                                        \
        raw.o                                                            \
        refstruct.o                                                      \
+       threadprogress.o                                                 \
        utils.o                                                          \
        version.o                                                        \
        vlc.o                                                            \
@@ -63,6 +64,7 @@ OBJS = ac3_parser.o                                                     \
        xiph.o                                                           \
 
 # subsystems
+include $(SRC_PATH)/libavcodec/aac/Makefile
 include $(SRC_PATH)/libavcodec/vvc/Makefile
 -include $(SRC_PATH)/libavcodec/$(ARCH)/vvc/Makefile
 OBJS-$(CONFIG_AANDCTTABLES)            += aandcttab.o
@@ -85,7 +87,8 @@ OBJS-$(CONFIG_CBS_MPEG2)               += cbs_mpeg2.o
 OBJS-$(CONFIG_CBS_VP8)                 += cbs_vp8.o vp8data.o
 OBJS-$(CONFIG_CBS_VP9)                 += cbs_vp9.o
 OBJS-$(CONFIG_DEFLATE_WRAPPER)         += zlib_wrapper.o
-OBJS-$(CONFIG_DOVI_RPU)                += dovi_rpu.o
+OBJS-$(CONFIG_DOVI_RPUDEC)             += dovi_rpu.o dovi_rpudec.o
+OBJS-$(CONFIG_DOVI_RPUENC)             += dovi_rpu.o dovi_rpuenc.o
 OBJS-$(CONFIG_ERROR_RESILIENCE)        += error_resilience.o
 OBJS-$(CONFIG_EVCPARSE)                += evc_parse.o evc_ps.o
 OBJS-$(CONFIG_EXIF)                    += exif.o tiff_common.o
@@ -97,8 +100,8 @@ OBJS-$(CONFIG_GOLOMB)                  += golomb.o
 OBJS-$(CONFIG_H263DSP)                 += h263dsp.o
 OBJS-$(CONFIG_H264CHROMA)              += h264chroma.o
 OBJS-$(CONFIG_H264DSP)                 += h264dsp.o h264idct.o
-OBJS-$(CONFIG_H264PARSE)               += h264_parse.o h264_ps.o h2645data.o \
-                                          h2645_parse.o h2645_vui.o
+OBJS-$(CONFIG_H264PARSE)               += h264_parse.o h264_ps.o h264data.o \
+                                          h2645data.o h2645_parse.o h2645_vui.o
 OBJS-$(CONFIG_H264PRED)                += h264pred.o
 OBJS-$(CONFIG_H264QPEL)                += h264qpel.o
 OBJS-$(CONFIG_H264_SEI)                += h264_sei.o h2645_sei.o
@@ -113,7 +116,7 @@ OBJS-$(CONFIG_HUFFYUVENCDSP)           += huffyuvencdsp.o
 OBJS-$(CONFIG_IDCTDSP)                 += idctdsp.o simple_idct.o jrevdct.o
 OBJS-$(CONFIG_IIRFILTER)               += iirfilter.o
 OBJS-$(CONFIG_INFLATE_WRAPPER)         += zlib_wrapper.o
-OBJS-$(CONFIG_INTRAX8)                 += intrax8.o intrax8dsp.o msmpeg4data.o
+OBJS-$(CONFIG_INTRAX8)                 += intrax8.o intrax8dsp.o msmpeg4_vc1_data.o
 OBJS-$(CONFIG_IVIDSP)                  += ivi_dsp.o
 OBJS-$(CONFIG_JNI)                     += ffjni.o jni.o
 OBJS-$(CONFIG_JPEGTABLES)              += jpegtables.o
@@ -177,11 +180,11 @@ OBJS-$(CONFIG_WMV2DSP)                 += wmv2dsp.o
 OBJS-$(CONFIG_ZERO12V_DECODER)         += 012v.o
 OBJS-$(CONFIG_A64MULTI_ENCODER)        += a64multienc.o elbg.o
 OBJS-$(CONFIG_A64MULTI5_ENCODER)       += a64multienc.o elbg.o
-OBJS-$(CONFIG_AAC_DECODER)             += aacdec.o aacdec_common.o aactab.o \
+OBJS-$(CONFIG_AAC_DECODER)             += aactab.o \
                                           aacsbr.o aacps_common.o aacps_float.o \
                                           kbdwin.o \
                                           sbrdsp.o aacpsdsp_float.o cbrt_data.o
-OBJS-$(CONFIG_AAC_FIXED_DECODER)       += aacdec_fixed.o aacdec_common.o aactab.o \
+OBJS-$(CONFIG_AAC_FIXED_DECODER)       += aactab.o \
                                           aacsbr_fixed.o aacps_common.o aacps_fixed.o \
                                           kbdwin.o \
                                           sbrdsp_fixed.o aacpsdsp_fixed.o cbrt_data_fixed.o
@@ -265,8 +268,6 @@ OBJS-$(CONFIG_AVRP_ENCODER)            += r210enc.o
 OBJS-$(CONFIG_AVS_DECODER)             += avs.o
 OBJS-$(CONFIG_AVUI_DECODER)            += avuidec.o
 OBJS-$(CONFIG_AVUI_ENCODER)            += avuienc.o
-OBJS-$(CONFIG_AYUV_DECODER)            += v408dec.o
-OBJS-$(CONFIG_AYUV_ENCODER)            += v408enc.o
 OBJS-$(CONFIG_BETHSOFTVID_DECODER)     += bethsoftvideo.o
 OBJS-$(CONFIG_BFI_DECODER)             += bfi.o
 OBJS-$(CONFIG_BINK_DECODER)            += bink.o binkdsp.o
@@ -284,7 +285,7 @@ OBJS-$(CONFIG_BRENDER_PIX_DECODER)     += brenderpix.o
 OBJS-$(CONFIG_C93_DECODER)             += c93.o
 OBJS-$(CONFIG_CAVS_DECODER)            += cavs.o cavsdec.o cavsdsp.o \
                                           cavsdata.o
-OBJS-$(CONFIG_CBD2_DECODER)            += dpcm.o
+OBJS-$(CONFIG_CBD2_DPCM_DECODER)       += dpcm.o
 OBJS-$(CONFIG_CCAPTION_DECODER)        += ccaption_dec.o ass.o
 OBJS-$(CONFIG_CDGRAPHICS_DECODER)      += cdgraphics.o
 OBJS-$(CONFIG_CDTOONS_DECODER)         += cdtoons.o
diff --git a/libavcodec/aac.h b/libavcodec/aac.h
index 89f838eab5..9508760fa6 100644
--- a/libavcodec/aac.h
+++ b/libavcodec/aac.h
@@ -30,9 +30,6 @@
 #ifndef AVCODEC_AAC_H
 #define AVCODEC_AAC_H
 
-
-#include "aac_defines.h"
-
 #define MAX_CHANNELS 64
 #define MAX_ELEM_ID 16
 
@@ -85,20 +82,6 @@ enum ChannelPosition {
     AAC_CHANNEL_CC    = 5,
 };
 
-/**
- * Predictor State
- */
-typedef struct PredictorState {
-    AAC_FLOAT cor0;
-    AAC_FLOAT cor1;
-    AAC_FLOAT var0;
-    AAC_FLOAT var1;
-    AAC_FLOAT r0;
-    AAC_FLOAT r1;
-    AAC_FLOAT k1;
-    AAC_FLOAT x_est;
-} PredictorState;
-
 #define MAX_PREDICTORS 672
 
 #define SCALE_DIV_512    36    ///< scalefactor difference that corresponds to scale difference in 512 times
diff --git a/libavcodec/aac/Makefile b/libavcodec/aac/Makefile
new file mode 100644
index 0000000000..c3e525d373
--- /dev/null
+++ b/libavcodec/aac/Makefile
@@ -0,0 +1,7 @@
+clean::
+		$(RM) $(CLEANSUFFIXES:%=libavcodec/aac/%)
+
+OBJS-$(CONFIG_AAC_DECODER)          +=  aac/aacdec.o aac/aacdec_tab.o \
+                                        aac/aacdec_float.o
+OBJS-$(CONFIG_AAC_FIXED_DECODER)    +=  aac/aacdec.o aac/aacdec_tab.o \
+                                        aac/aacdec_fixed.o
diff --git a/libavcodec/aacdec_template.c b/libavcodec/aac/aacdec.c
index b2f069f83a..7457fe6c97 100644
--- a/libavcodec/aacdec_template.c
+++ b/libavcodec/aac/aacdec.c
@@ -1,5 +1,5 @@
 /*
- * AAC decoder
+ * Common parts of the AAC decoders
  * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
  * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
  * Copyright (c) 2008-2013 Alex Converse <alex.converse@gmail.com>
@@ -29,16 +29,38 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-/**
- * @file
- * AAC decoder
- * @author Oded Shimon  ( ods15 ods15 dyndns org )
- * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
- *
- * AAC decoder fixed-point implementation
- * @author Stanislav Ocovaj ( stanislav.ocovaj imgtec com )
- * @author Nedeljko Babic ( nedeljko.babic imgtec com )
- */
+/* We use several quantization functions here (Q31, Q30),
+ * for which we need this to be defined for them to work as expected. */
+#define USE_FIXED 1
+
+#include "config_components.h"
+
+#include <limits.h>
+#include <stddef.h>
+
+#include "aacdec.h"
+#include "aacdec_tab.h"
+
+#include "libavcodec/aac.h"
+#include "libavcodec/aac_defines.h"
+#include "libavcodec/aacsbr.h"
+#include "libavcodec/aactab.h"
+#include "libavcodec/adts_header.h"
+
+#include "libavcodec/avcodec.h"
+#include "libavcodec/internal.h"
+#include "libavcodec/codec_internal.h"
+#include "libavcodec/decode.h"
+#include "libavcodec/profiles.h"
+
+#include "libavutil/attributes.h"
+#include "libavutil/error.h"
+#include "libavutil/log.h"
+#include "libavutil/macros.h"
+#include "libavutil/mem.h"
+#include "libavutil/opt.h"
+#include "libavutil/tx.h"
+#include "libavutil/version.h"
 
 /*
  * supported tools
@@ -89,13 +111,6 @@
            Parametric Stereo.
  */
 
-#include "libavutil/channel_layout.h"
-#include "libavutil/mem.h"
-#include "libavutil/thread.h"
-#include "decode.h"
-#include "internal.h"
-#include "lpc_functions.h"
-
 static int output_configure(AACDecContext *ac,
                             uint8_t layout_map[MAX_ELEM_ID*4][3], int tags,
                             enum OCStatus oc_type, int get_new_frame);
@@ -134,10 +149,7 @@ static av_cold int che_configure(AACDecContext *ac,
         return AVERROR_INVALIDDATA;
     if (che_pos) {
         if (!ac->che[type][id]) {
-            int ret;
-            if (!(ac->che[type][id] = av_mallocz(sizeof(ChannelElement))))
-                return AVERROR(ENOMEM);
-            ret = AAC_RENAME(ff_aac_sbr_ctx_init)(ac, &ac->che[type][id]->sbr, type);
+            int ret = ac->proc.sbr_ctx_alloc_init(ac, &ac->che[type][id], type);
             if (ret < 0)
                 return ret;
         }
@@ -153,8 +165,9 @@ static av_cold int che_configure(AACDecContext *ac,
             }
         }
     } else {
-        if (ac->che[type][id])
-            AAC_RENAME(ff_aac_sbr_ctx_close)(&ac->che[type][id]->sbr);
+        if (ac->che[type][id]) {
+            ac->proc.sbr_ctx_close(ac->che[type][id]);
+        }
         av_freep(&ac->che[type][id]);
     }
     return 0;
@@ -170,8 +183,8 @@ static int frame_configure_elements(AVCodecContext *avctx)
         for (id = 0; id < MAX_ELEM_ID; id++) {
             ChannelElement *che = ac->che[type][id];
             if (che) {
-                che->ch[0].ret = che->ch[0].ret_buf;
-                che->ch[1].ret = che->ch[1].ret_buf;
+                che->ch[0].output = che->ch[0].ret_buf;
+                che->ch[1].output = che->ch[1].ret_buf;
             }
         }
     }
@@ -188,7 +201,7 @@ static int frame_configure_elements(AVCodecContext *avctx)
     /* map output channel pointers to AVFrame data */
     for (ch = 0; ch < avctx->ch_layout.nb_channels; ch++) {
         if (ac->output_element[ch])
-            ac->output_element[ch]->ret = (INTFLOAT *)ac->frame->extended_data[ch];
+            ac->output_element[ch]->output = (void *)ac->frame->extended_data[ch];
     }
 
     return 0;
@@ -511,7 +524,7 @@ static int output_configure(AACDecContext *ac,
     return 0;
 }
 
-static void flush(AVCodecContext *avctx)
+static av_cold void flush(AVCodecContext *avctx)
 {
     AACDecContext *ac= avctx->priv_data;
     int type, i, j;
@@ -1075,26 +1088,6 @@ static int decode_audio_specific_config(AACDecContext *ac,
                                            sync_extension);
 }
 
-/**
- * linear congruential pseudorandom number generator
- *
- * @param   previous_val    pointer to the current state of the generator
- *
- * @return  Returns a 32-bit pseudorandom integer
- */
-static av_always_inline int lcg_random(unsigned previous_val)
-{
-    union { unsigned u; int s; } v = { previous_val * 1664525u + 1013904223 };
-    return v.s;
-}
-
-static void reset_all_predictors(PredictorState *ps)
-{
-    int i;
-    for (i = 0; i < MAX_PREDICTORS; i++)
-        reset_predict_state(&ps[i]);
-}
-
 static int sample_rate_idx (int rate)
 {
          if (92017 <= rate) return 0;
@@ -1111,64 +1104,82 @@ static int sample_rate_idx (int rate)
     else                    return 11;
 }
 
-static void reset_predictor_group(PredictorState *ps, int group_num)
+static av_cold int decode_close(AVCodecContext *avctx)
 {
-    int i;
-    for (i = group_num - 1; i < MAX_PREDICTORS; i += 30)
-        reset_predict_state(&ps[i]);
-}
-
-static void aacdec_init(AACDecContext *ac);
+    AACDecContext *ac = avctx->priv_data;
 
-static av_cold void aac_static_table_init(void)
-{
-    AAC_RENAME(ff_aac_sbr_init)();
+    for (int type = 0; type < FF_ARRAY_ELEMS(ac->che); type++) {
+        for (int i = 0; i < MAX_ELEM_ID; i++) {
+            if (ac->che[type][i]) {
+                ac->proc.sbr_ctx_close(ac->che[type][i]);
+                av_freep(&ac->che[type][i]);
+            }
+        }
+    }
 
-    ff_aacdec_common_init_once();
+    av_tx_uninit(&ac->mdct120);
+    av_tx_uninit(&ac->mdct128);
+    av_tx_uninit(&ac->mdct480);
+    av_tx_uninit(&ac->mdct512);
+    av_tx_uninit(&ac->mdct960);
+    av_tx_uninit(&ac->mdct1024);
+    av_tx_uninit(&ac->mdct_ltp);
 
-    // window initialization
-    AAC_RENAME(ff_kbd_window_init)(AAC_RENAME(aac_kbd_long_960), 4.0, 960);
-    AAC_RENAME(ff_kbd_window_init)(AAC_RENAME(aac_kbd_short_120), 6.0, 120);
-
-#if !USE_FIXED
-    AAC_RENAME(ff_sine_window_init)(AAC_RENAME(sine_960), 960);
-    AAC_RENAME(ff_sine_window_init)(AAC_RENAME(sine_120), 120);
-    AAC_RENAME(ff_init_ff_sine_windows)(9);
-    ff_aac_float_common_init();
-#else
-    AAC_RENAME(ff_kbd_window_init)(AAC_RENAME2(aac_kbd_long_1024), 4.0, 1024);
-    AAC_RENAME(ff_kbd_window_init)(AAC_RENAME2(aac_kbd_short_128), 6.0, 128);
-    init_sine_windows_fixed();
-#endif
+    // Compiler will optimize this branch away.
+    if (ac->is_fixed)
+        av_freep(&ac->RENAME_FIXED(fdsp));
+    else
+        av_freep(&ac->fdsp);
 
-    AAC_RENAME(ff_cbrt_tableinit)();
+    return 0;
 }
 
-static AVOnce aac_table_init = AV_ONCE_INIT;
+static av_cold int init_dsp(AVCodecContext *avctx)
+{
+    AACDecContext *ac = avctx->priv_data;
+    int is_fixed = ac->is_fixed, ret;
+    float scale_fixed, scale_float;
+    const float *const scalep = is_fixed ? &scale_fixed : &scale_float;
+    enum AVTXType tx_type = is_fixed ? AV_TX_INT32_MDCT : AV_TX_FLOAT_MDCT;
+
+#define MDCT_INIT(s, fn, len, sval)                                          \
+    scale_fixed = (sval) * 128.0f;                                           \
+    scale_float = (sval) / 32768.0f;                                         \
+    ret = av_tx_init(&s, &fn, tx_type, 1, len, scalep, 0);                   \
+    if (ret < 0)                                                             \
+        return ret
+
+    MDCT_INIT(ac->mdct120,  ac->mdct120_fn,   120, 1.0/120);
+    MDCT_INIT(ac->mdct128,  ac->mdct128_fn,   128, 1.0/128);
+    MDCT_INIT(ac->mdct480,  ac->mdct480_fn,   480, 1.0/480);
+    MDCT_INIT(ac->mdct512,  ac->mdct512_fn,   512, 1.0/512);
+    MDCT_INIT(ac->mdct960,  ac->mdct960_fn,   960, 1.0/960);
+    MDCT_INIT(ac->mdct1024, ac->mdct1024_fn, 1024, 1.0/1024);
+#undef MDCT_INIT
+
+    /* LTP forward MDCT */
+    scale_fixed = -1.0;
+    scale_float = -32786.0*2 + 36;
+    ret = av_tx_init(&ac->mdct_ltp, &ac->mdct_ltp_fn, tx_type, 0, 1024, scalep, 0);
+    if (ret < 0)
+        return ret;
+
+    return 0;
+}
 
-static av_cold int aac_decode_init(AVCodecContext *avctx)
+av_cold int ff_aac_decode_init(AVCodecContext *avctx)
 {
-    float scale;
     AACDecContext *ac = avctx->priv_data;
     int ret;
 
     if (avctx->sample_rate > 96000)
         return AVERROR_INVALIDDATA;
 
-    ret = ff_thread_once(&aac_table_init, &aac_static_table_init);
-    if (ret != 0)
-        return AVERROR_UNKNOWN;
+    ff_aacdec_common_init_once();
 
     ac->avctx = avctx;
     ac->oc[1].m4ac.sample_rate = avctx->sample_rate;
 
-    aacdec_init(ac);
-#if USE_FIXED
-    avctx->sample_fmt = AV_SAMPLE_FMT_S32P;
-#else
-    avctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
-#endif /* USE_FIXED */
-
     if (avctx->extradata_size > 0) {
         if ((ret = decode_audio_specific_config(ac, ac->avctx, &ac->oc[1].m4ac,
                                                 avctx->extradata,
@@ -1210,38 +1221,9 @@ static av_cold int aac_decode_init(AVCodecContext *avctx)
         return AVERROR_INVALIDDATA;
     }
 
-#if USE_FIXED
-    ac->fdsp = avpriv_alloc_fixed_dsp(avctx->flags & AV_CODEC_FLAG_BITEXACT);
-#else
-    ac->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
-#endif /* USE_FIXED */
-    if (!ac->fdsp) {
-        return AVERROR(ENOMEM);
-    }
-
     ac->random_state = 0x1f2e3d4c;
 
-#define MDCT_INIT(s, fn, len, sval)                                            \
-    scale = sval;                                                              \
-    ret = av_tx_init(&s, &fn, TX_TYPE, 1, len, &scale, 0);                     \
-    if (ret < 0)                                                               \
-        return ret;
-
-    MDCT_INIT(ac->mdct120,  ac->mdct120_fn,   120, TX_SCALE(1.0/120))
-    MDCT_INIT(ac->mdct128,  ac->mdct128_fn,   128, TX_SCALE(1.0/128))
-    MDCT_INIT(ac->mdct480,  ac->mdct480_fn,   480, TX_SCALE(1.0/480))
-    MDCT_INIT(ac->mdct512,  ac->mdct512_fn,   512, TX_SCALE(1.0/512))
-    MDCT_INIT(ac->mdct960,  ac->mdct960_fn,   960, TX_SCALE(1.0/960))
-    MDCT_INIT(ac->mdct1024, ac->mdct1024_fn, 1024, TX_SCALE(1.0/1024))
-#undef MDCT_INIT
-
-    /* LTP forward MDCT */
-    scale = USE_FIXED ? -1.0 : -32786.0*2 + 36;
-    ret = av_tx_init(&ac->mdct_ltp, &ac->mdct_ltp_fn, TX_TYPE, 0, 1024, &scale, 0);
-    if (ret < 0)
-        return ret;
-
-    return 0;
+    return init_dsp(avctx);
 }
 
 /**
@@ -1286,13 +1268,17 @@ static int decode_prediction(AACDecContext *ac, IndividualChannelStream *ics,
 /**
  * Decode Long Term Prediction data; reference: table 4.xx.
  */
-static void decode_ltp(LongTermPrediction *ltp,
+static void decode_ltp(AACDecContext *ac, LongTermPrediction *ltp,
                        GetBitContext *gb, uint8_t max_sfb)
 {
     int sfb;
 
     ltp->lag  = get_bits(gb, 11);
-    ltp->coef = AAC_RENAME2(ltp_coef)[get_bits(gb, 3)];
+    if (CONFIG_AAC_FIXED_DECODER && ac->is_fixed)
+        ltp->coef_fixed = Q30(ff_ltp_coef[get_bits(gb, 3)]);
+    else if (CONFIG_AAC_DECODER)
+        ltp->coef = ff_ltp_coef[get_bits(gb, 3)];
+
     for (sfb = 0; sfb < FFMIN(max_sfb, MAX_LTP_LONG_SFB); sfb++)
         ltp->used[sfb] = get_bits1(gb);
 }
@@ -1399,7 +1385,7 @@ static int decode_ics_info(AACDecContext *ac, IndividualChannelStream *ics,
                     goto fail;
                 }
                 if ((ics->ltp.present = get_bits(gb, 1)))
-                    decode_ltp(&ics->ltp, gb, ics->max_sfb);
+                    decode_ltp(ac, &ics->ltp, gb, ics->max_sfb);
             }
         }
     }
@@ -1475,7 +1461,8 @@ static int decode_band_types(AACDecContext *ac, enum BandType band_type[120],
  *
  * @return  Returns error status. 0 - OK, !0 - error
  */
-static int decode_scalefactors(AACDecContext *ac, INTFLOAT sf[120], GetBitContext *gb,
+static int decode_scalefactors(AACDecContext *ac, int sfo[120],
+                               GetBitContext *gb,
                                unsigned int global_gain,
                                IndividualChannelStream *ics,
                                enum BandType band_type[120],
@@ -1488,11 +1475,13 @@ static int decode_scalefactors(AACDecContext *ac, INTFLOAT sf[120], GetBitContex
     for (g = 0; g < ics->num_window_groups; g++) {
         for (i = 0; i < ics->max_sfb;) {
             int run_end = band_type_run_end[idx];
-            if (band_type[idx] == ZERO_BT) {
+            switch (band_type[idx]) {
+            case ZERO_BT:
                 for (; i < run_end; i++, idx++)
-                    sf[idx] = FIXR(0.);
-            } else if ((band_type[idx] == INTENSITY_BT) ||
-                       (band_type[idx] == INTENSITY_BT2)) {
+                    sfo[idx] = 0;
+                break;
+            case INTENSITY_BT: /* fallthrough */
+            case INTENSITY_BT2:
                 for (; i < run_end; i++, idx++) {
                     offset[2] += get_vlc2(gb, ff_vlc_scalefactors, 7, 3) - SCALE_DIFF_ZERO;
                     clipped_offset = av_clip(offset[2], -155, 100);
@@ -1502,13 +1491,10 @@ static int decode_scalefactors(AACDecContext *ac, INTFLOAT sf[120], GetBitContex
                                               "Clipped intensity stereo position (%d -> %d)",
                                               offset[2], clipped_offset);
                     }
-#if USE_FIXED
-                    sf[idx] = 100 - clipped_offset;
-#else
-                    sf[idx] = ff_aac_pow2sf_tab[-clipped_offset + POW_SF2_ZERO];
-#endif /* USE_FIXED */
+                    sfo[idx] = clipped_offset;
                 }
-            } else if (band_type[idx] == NOISE_BT) {
+                break;
+            case NOISE_BT:
                 for (; i < run_end; i++, idx++) {
                     if (noise_flag-- > 0)
                         offset[1] += get_bits(gb, NOISE_PRE_BITS) - NOISE_PRE;
@@ -1521,13 +1507,10 @@ static int decode_scalefactors(AACDecContext *ac, INTFLOAT sf[120], GetBitContex
                                               "Clipped noise gain (%d -> %d)",
                                               offset[1], clipped_offset);
                     }
-#if USE_FIXED
-                    sf[idx] = -(100 + clipped_offset);
-#else
-                    sf[idx] = -ff_aac_pow2sf_tab[clipped_offset + POW_SF2_ZERO];
-#endif /* USE_FIXED */
+                    sfo[idx] = clipped_offset;
                 }
-            } else {
+                break;
+            default:
                 for (; i < run_end; i++, idx++) {
                     offset[0] += get_vlc2(gb, ff_vlc_scalefactors, 7, 3) - SCALE_DIFF_ZERO;
                     if (offset[0] > 255U) {
@@ -1535,12 +1518,9 @@ static int decode_scalefactors(AACDecContext *ac, INTFLOAT sf[120], GetBitContex
                                "Scalefactor (%d) out of range.\n", offset[0]);
                         return AVERROR_INVALIDDATA;
                     }
-#if USE_FIXED
-                    sf[idx] = -offset[0];
-#else
-                    sf[idx] = -ff_aac_pow2sf_tab[offset[0] - 100 + POW_SF2_ZERO];
-#endif /* USE_FIXED */
+                    sfo[idx] = offset[0];
                 }
+                break;
             }
         }
     }
@@ -1604,8 +1584,12 @@ static int decode_tns(AACDecContext *ac, TemporalNoiseShaping *tns,
                     coef_len = coef_res + 3 - coef_compress;
                     tmp2_idx = 2 * coef_compress + coef_res;
 
-                    for (i = 0; i < tns->order[w][filt]; i++)
-                        tns->coef[w][filt][i] = AAC_RENAME2(tns_tmp2_map)[tmp2_idx][get_bits(gb, coef_len)];
+                    for (i = 0; i < tns->order[w][filt]; i++) {
+                        if (CONFIG_AAC_FIXED_DECODER && ac->is_fixed)
+                            tns->coef_fixed[w][filt][i] = Q31(ff_tns_tmp2_map[tmp2_idx][get_bits(gb, coef_len)]);
+                        else if (CONFIG_AAC_DECODER)
+                            tns->coef[w][filt][i] = ff_tns_tmp2_map[tmp2_idx][get_bits(gb, coef_len)];
+                    }
                 }
             }
         }
@@ -1633,341 +1617,6 @@ static void decode_mid_side_stereo(ChannelElement *cpe, GetBitContext *gb,
     }
 }
 
-/**
- * Decode spectral data; reference: table 4.50.
- * Dequantize and scale spectral data; reference: 4.6.3.3.
- *
- * @param   coef            array of dequantized, scaled spectral data
- * @param   sf              array of scalefactors or intensity stereo positions
- * @param   pulse_present   set if pulses are present
- * @param   pulse           pointer to pulse data struct
- * @param   band_type       array of the used band type
- *
- * @return  Returns error status. 0 - OK, !0 - error
- */
-static int decode_spectrum_and_dequant(AACDecContext *ac, INTFLOAT coef[1024],
-                                       GetBitContext *gb, const INTFLOAT sf[120],
-                                       int pulse_present, const Pulse *pulse,
-                                       const IndividualChannelStream *ics,
-                                       enum BandType band_type[120])
-{
-    int i, k, g, idx = 0;
-    const int c = 1024 / ics->num_windows;
-    const uint16_t *offsets = ics->swb_offset;
-    INTFLOAT *coef_base = coef;
-
-    for (g = 0; g < ics->num_windows; g++)
-        memset(coef + g * 128 + offsets[ics->max_sfb], 0,
-               sizeof(INTFLOAT) * (c - offsets[ics->max_sfb]));
-
-    for (g = 0; g < ics->num_window_groups; g++) {
-        unsigned g_len = ics->group_len[g];
-
-        for (i = 0; i < ics->max_sfb; i++, idx++) {
-            const unsigned cbt_m1 = band_type[idx] - 1;
-            INTFLOAT *cfo = coef + offsets[i];
-            int off_len = offsets[i + 1] - offsets[i];
-            int group;
-
-            if (cbt_m1 >= INTENSITY_BT2 - 1) {
-                for (group = 0; group < (AAC_SIGNE)g_len; group++, cfo+=128) {
-                    memset(cfo, 0, off_len * sizeof(*cfo));
-                }
-            } else if (cbt_m1 == NOISE_BT - 1) {
-                for (group = 0; group < (AAC_SIGNE)g_len; group++, cfo+=128) {
-                    INTFLOAT band_energy;
-#if USE_FIXED
-                    for (k = 0; k < off_len; k++) {
-                        ac->random_state  = lcg_random(ac->random_state);
-                        cfo[k] = ac->random_state >> 3;
-                    }
-
-                    band_energy = ac->fdsp->scalarproduct_fixed(cfo, cfo, off_len);
-                    band_energy = fixed_sqrt(band_energy, 31);
-                    noise_scale(cfo, sf[idx], band_energy, off_len);
-#else
-                    float scale;
-
-                    for (k = 0; k < off_len; k++) {
-                        ac->random_state  = lcg_random(ac->random_state);
-                        cfo[k] = ac->random_state;
-                    }
-
-                    band_energy = ac->fdsp->scalarproduct_float(cfo, cfo, off_len);
-                    scale = sf[idx] / sqrtf(band_energy);
-                    ac->fdsp->vector_fmul_scalar(cfo, cfo, scale, off_len);
-#endif /* USE_FIXED */
-                }
-            } else {
-#if !USE_FIXED
-                const float *vq = ff_aac_codebook_vector_vals[cbt_m1];
-#endif /* !USE_FIXED */
-                const VLCElem *vlc_tab = ff_vlc_spectral[cbt_m1];
-                OPEN_READER(re, gb);
-
-                switch (cbt_m1 >> 1) {
-                case 0:
-                    for (group = 0; group < (AAC_SIGNE)g_len; group++, cfo+=128) {
-                        INTFLOAT *cf = cfo;
-                        int len = off_len;
-
-                        do {
-                            int code;
-                            unsigned cb_idx;
-
-                            UPDATE_CACHE(re, gb);
-                            GET_VLC(code, re, gb, vlc_tab, 8, 2);
-                            cb_idx = code;
-#if USE_FIXED
-                            cf = DEC_SQUAD(cf, cb_idx);
-#else
-                            cf = VMUL4(cf, vq, cb_idx, sf + idx);
-#endif /* USE_FIXED */
-                        } while (len -= 4);
-                    }
-                    break;
-
-                case 1:
-                    for (group = 0; group < (AAC_SIGNE)g_len; group++, cfo+=128) {
-                        INTFLOAT *cf = cfo;
-                        int len = off_len;
-
-                        do {
-                            int code;
-                            unsigned nnz;
-                            unsigned cb_idx;
-                            uint32_t bits;
-
-                            UPDATE_CACHE(re, gb);
-                            GET_VLC(code, re, gb, vlc_tab, 8, 2);
-                            cb_idx = code;
-                            nnz = cb_idx >> 8 & 15;
-                            bits = nnz ? GET_CACHE(re, gb) : 0;
-                            LAST_SKIP_BITS(re, gb, nnz);
-#if USE_FIXED
-                            cf = DEC_UQUAD(cf, cb_idx, bits);
-#else
-                            cf = VMUL4S(cf, vq, cb_idx, bits, sf + idx);
-#endif /* USE_FIXED */
-                        } while (len -= 4);
-                    }
-                    break;
-
-                case 2:
-                    for (group = 0; group < (AAC_SIGNE)g_len; group++, cfo+=128) {
-                        INTFLOAT *cf = cfo;
-                        int len = off_len;
-
-                        do {
-                            int code;
-                            unsigned cb_idx;
-
-                            UPDATE_CACHE(re, gb);
-                            GET_VLC(code, re, gb, vlc_tab, 8, 2);
-                            cb_idx = code;
-#if USE_FIXED
-                            cf = DEC_SPAIR(cf, cb_idx);
-#else
-                            cf = VMUL2(cf, vq, cb_idx, sf + idx);
-#endif /* USE_FIXED */
-                        } while (len -= 2);
-                    }
-                    break;
-
-                case 3:
-                case 4:
-                    for (group = 0; group < (AAC_SIGNE)g_len; group++, cfo+=128) {
-                        INTFLOAT *cf = cfo;
-                        int len = off_len;
-
-                        do {
-                            int code;
-                            unsigned nnz;
-                            unsigned cb_idx;
-                            unsigned sign;
-
-                            UPDATE_CACHE(re, gb);
-                            GET_VLC(code, re, gb, vlc_tab, 8, 2);
-                            cb_idx = code;
-                            nnz = cb_idx >> 8 & 15;
-                            sign = nnz ? SHOW_UBITS(re, gb, nnz) << (cb_idx >> 12) : 0;
-                            LAST_SKIP_BITS(re, gb, nnz);
-#if USE_FIXED
-                            cf = DEC_UPAIR(cf, cb_idx, sign);
-#else
-                            cf = VMUL2S(cf, vq, cb_idx, sign, sf + idx);
-#endif /* USE_FIXED */
-                        } while (len -= 2);
-                    }
-                    break;
-
-                default:
-                    for (group = 0; group < (AAC_SIGNE)g_len; group++, cfo+=128) {
-#if USE_FIXED
-                        int *icf = cfo;
-                        int v;
-#else
-                        float *cf = cfo;
-                        uint32_t *icf = (uint32_t *) cf;
-#endif /* USE_FIXED */
-                        int len = off_len;
-
-                        do {
-                            int code;
-                            unsigned nzt, nnz;
-                            unsigned cb_idx;
-                            uint32_t bits;
-                            int j;
-
-                            UPDATE_CACHE(re, gb);
-                            GET_VLC(code, re, gb, vlc_tab, 8, 2);
-                            cb_idx = code;
-
-                            if (cb_idx == 0x0000) {
-                                *icf++ = 0;
-                                *icf++ = 0;
-                                continue;
-                            }
-
-                            nnz = cb_idx >> 12;
-                            nzt = cb_idx >> 8;
-                            bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
-                            LAST_SKIP_BITS(re, gb, nnz);
-
-                            for (j = 0; j < 2; j++) {
-                                if (nzt & 1<<j) {
-                                    uint32_t b;
-                                    int n;
-                                    /* The total length of escape_sequence must be < 22 bits according
-                                       to the specification (i.e. max is 111111110xxxxxxxxxxxx). */
-                                    UPDATE_CACHE(re, gb);
-                                    b = GET_CACHE(re, gb);
-                                    b = 31 - av_log2(~b);
-
-                                    if (b > 8) {
-                                        av_log(ac->avctx, AV_LOG_ERROR, "error in spectral data, ESC overflow\n");
-                                        return AVERROR_INVALIDDATA;
-                                    }
-
-                                    SKIP_BITS(re, gb, b + 1);
-                                    b += 4;
-                                    n = (1 << b) + SHOW_UBITS(re, gb, b);
-                                    LAST_SKIP_BITS(re, gb, b);
-#if USE_FIXED
-                                    v = n;
-                                    if (bits & 1U<<31)
-                                        v = -v;
-                                    *icf++ = v;
-#else
-                                    *icf++ = ff_cbrt_tab[n] | (bits & 1U<<31);
-#endif /* USE_FIXED */
-                                    bits <<= 1;
-                                } else {
-#if USE_FIXED
-                                    v = cb_idx & 15;
-                                    if (bits & 1U<<31)
-                                        v = -v;
-                                    *icf++ = v;
-#else
-                                    unsigned v = ((const uint32_t*)vq)[cb_idx & 15];
-                                    *icf++ = (bits & 1U<<31) | v;
-#endif /* USE_FIXED */
-                                    bits <<= !!v;
-                                }
-                                cb_idx >>= 4;
-                            }
-                        } while (len -= 2);
-#if !USE_FIXED
-                        ac->fdsp->vector_fmul_scalar(cfo, cfo, sf[idx], off_len);
-#endif /* !USE_FIXED */
-                    }
-                }
-
-                CLOSE_READER(re, gb);
-            }
-        }
-        coef += g_len << 7;
-    }
-
-    if (pulse_present) {
-        idx = 0;
-        for (i = 0; i < pulse->num_pulse; i++) {
-            INTFLOAT co = coef_base[ pulse->pos[i] ];
-            while (offsets[idx + 1] <= pulse->pos[i])
-                idx++;
-            if (band_type[idx] != NOISE_BT && sf[idx]) {
-                INTFLOAT ico = -pulse->amp[i];
-#if USE_FIXED
-                if (co) {
-                    ico = co + (co > 0 ? -ico : ico);
-                }
-                coef_base[ pulse->pos[i] ] = ico;
-#else
-                if (co) {
-                    co /= sf[idx];
-                    ico = co / sqrtf(sqrtf(fabsf(co))) + (co > 0 ? -ico : ico);
-                }
-                coef_base[ pulse->pos[i] ] = cbrtf(fabsf(ico)) * ico * sf[idx];
-#endif /* USE_FIXED */
-            }
-        }
-    }
-#if USE_FIXED
-    coef = coef_base;
-    idx = 0;
-    for (g = 0; g < ics->num_window_groups; g++) {
-        unsigned g_len = ics->group_len[g];
-
-        for (i = 0; i < ics->max_sfb; i++, idx++) {
-            const unsigned cbt_m1 = band_type[idx] - 1;
-            int *cfo = coef + offsets[i];
-            int off_len = offsets[i + 1] - offsets[i];
-            int group;
-
-            if (cbt_m1 < NOISE_BT - 1) {
-                for (group = 0; group < (int)g_len; group++, cfo+=128) {
-                    ac->vector_pow43(cfo, off_len);
-                    ac->subband_scale(cfo, cfo, sf[idx], 34, off_len, ac->avctx);
-                }
-            }
-        }
-        coef += g_len << 7;
-    }
-#endif /* USE_FIXED */
-    return 0;
-}
-
-/**
- * Apply AAC-Main style frequency domain prediction.
- */
-static void apply_prediction(AACDecContext *ac, SingleChannelElement *sce)
-{
-    int sfb, k;
-
-    if (!sce->ics.predictor_initialized) {
-        reset_all_predictors(sce->predictor_state);
-        sce->ics.predictor_initialized = 1;
-    }
-
-    if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
-        for (sfb = 0;
-             sfb < ff_aac_pred_sfb_max[ac->oc[1].m4ac.sampling_index];
-             sfb++) {
-            for (k = sce->ics.swb_offset[sfb];
-                 k < sce->ics.swb_offset[sfb + 1];
-                 k++) {
-                predict(&sce->predictor_state[k], &sce->coeffs[k],
-                        sce->ics.predictor_present &&
-                        sce->ics.prediction_used[sfb]);
-            }
-        }
-        if (sce->ics.predictor_reset_group)
-            reset_predictor_group(sce->predictor_state,
-                                  sce->ics.predictor_reset_group);
-    } else
-        reset_all_predictors(sce->predictor_state);
-}
-
 static void decode_gain_control(SingleChannelElement * sce, GetBitContext * gb)
 {
     // wd_num, wd_test, aloc_size
@@ -2003,13 +1652,12 @@ static void decode_gain_control(SingleChannelElement * sce, GetBitContext * gb)
  *
  * @return  Returns error status. 0 - OK, !0 - error
  */
-static int decode_ics(AACDecContext *ac, SingleChannelElement *sce,
+int ff_aac_decode_ics(AACDecContext *ac, SingleChannelElement *sce,
                       GetBitContext *gb, int common_window, int scale_flag)
 {
     Pulse pulse;
     TemporalNoiseShaping    *tns = &sce->tns;
     IndividualChannelStream *ics = &sce->ics;
-    INTFLOAT *out = sce->coeffs;
     int global_gain, eld_syntax, er_syntax, pulse_present = 0;
     int ret;
 
@@ -2035,10 +1683,12 @@ static int decode_ics(AACDecContext *ac, SingleChannelElement *sce,
     if ((ret = decode_band_types(ac, sce->band_type,
                                  sce->band_type_run_end, gb, ics)) < 0)
         goto fail;
-    if ((ret = decode_scalefactors(ac, sce->sf, gb, global_gain, ics,
-                                  sce->band_type, sce->band_type_run_end)) < 0)
+    if ((ret = decode_scalefactors(ac, sce->sfo, gb, global_gain, ics,
+                                   sce->band_type, sce->band_type_run_end)) < 0)
         goto fail;
 
+    ac->dsp.dequant_scalefactors(sce);
+
     pulse_present = 0;
     if (!scale_flag) {
         if (!eld_syntax && (pulse_present = get_bits1(gb))) {
@@ -2077,13 +1727,14 @@ static int decode_ics(AACDecContext *ac, SingleChannelElement *sce,
         }
     }
 
-    ret = decode_spectrum_and_dequant(ac, out, gb, sce->sf, pulse_present,
-                                    &pulse, ics, sce->band_type);
+    ret = ac->proc.decode_spectrum_and_dequant(ac, gb,
+                                               pulse_present ? &pulse : NULL,
+                                               sce);
     if (ret < 0)
         goto fail;
 
     if (ac->oc[1].m4ac.object_type == AOT_AAC_MAIN && !common_window)
-        apply_prediction(ac, sce);
+        ac->dsp.apply_prediction(ac, sce);
 
     return 0;
 fail:
@@ -2092,92 +1743,6 @@ fail:
 }
 
 /**
- * Mid/Side stereo decoding; reference: 4.6.8.1.3.
- */
-static void apply_mid_side_stereo(AACDecContext *ac, ChannelElement *cpe)
-{
-    const IndividualChannelStream *ics = &cpe->ch[0].ics;
-    INTFLOAT *ch0 = cpe->ch[0].coeffs;
-    INTFLOAT *ch1 = cpe->ch[1].coeffs;
-    int g, i, group, idx = 0;
-    const uint16_t *offsets = ics->swb_offset;
-    for (g = 0; g < ics->num_window_groups; g++) {
-        for (i = 0; i < ics->max_sfb; i++, idx++) {
-            if (cpe->ms_mask[idx] &&
-                cpe->ch[0].band_type[idx] < NOISE_BT &&
-                cpe->ch[1].band_type[idx] < NOISE_BT) {
-#if USE_FIXED
-                for (group = 0; group < ics->group_len[g]; group++) {
-                    ac->fdsp->butterflies_fixed(ch0 + group * 128 + offsets[i],
-                                                ch1 + group * 128 + offsets[i],
-                                                offsets[i+1] - offsets[i]);
-#else
-                for (group = 0; group < ics->group_len[g]; group++) {
-                    ac->fdsp->butterflies_float(ch0 + group * 128 + offsets[i],
-                                               ch1 + group * 128 + offsets[i],
-                                               offsets[i+1] - offsets[i]);
-#endif /* USE_FIXED */
-                }
-            }
-        }
-        ch0 += ics->group_len[g] * 128;
-        ch1 += ics->group_len[g] * 128;
-    }
-}
-
-/**
- * intensity stereo decoding; reference: 4.6.8.2.3
- *
- * @param   ms_present  Indicates mid/side stereo presence. [0] mask is all 0s;
- *                      [1] mask is decoded from bitstream; [2] mask is all 1s;
- *                      [3] reserved for scalable AAC
- */
-static void apply_intensity_stereo(AACDecContext *ac,
-                                   ChannelElement *cpe, int ms_present)
-{
-    const IndividualChannelStream *ics = &cpe->ch[1].ics;
-    SingleChannelElement         *sce1 = &cpe->ch[1];
-    INTFLOAT *coef0 = cpe->ch[0].coeffs, *coef1 = cpe->ch[1].coeffs;
-    const uint16_t *offsets = ics->swb_offset;
-    int g, group, i, idx = 0;
-    int c;
-    INTFLOAT scale;
-    for (g = 0; g < ics->num_window_groups; g++) {
-        for (i = 0; i < ics->max_sfb;) {
-            if (sce1->band_type[idx] == INTENSITY_BT ||
-                sce1->band_type[idx] == INTENSITY_BT2) {
-                const int bt_run_end = sce1->band_type_run_end[idx];
-                for (; i < bt_run_end; i++, idx++) {
-                    c = -1 + 2 * (sce1->band_type[idx] - 14);
-                    if (ms_present)
-                        c *= 1 - 2 * cpe->ms_mask[idx];
-                    scale = c * sce1->sf[idx];
-                    for (group = 0; group < ics->group_len[g]; group++)
-#if USE_FIXED
-                        ac->subband_scale(coef1 + group * 128 + offsets[i],
-                                      coef0 + group * 128 + offsets[i],
-                                      scale,
-                                      23,
-                                      offsets[i + 1] - offsets[i] ,ac->avctx);
-#else
-                        ac->fdsp->vector_fmul_scalar(coef1 + group * 128 + offsets[i],
-                                                    coef0 + group * 128 + offsets[i],
-                                                    scale,
-                                                    offsets[i + 1] - offsets[i]);
-#endif /* USE_FIXED */
-                }
-            } else {
-                int bt_run_end = sce1->band_type_run_end[idx];
-                idx += bt_run_end - i;
-                i    = bt_run_end;
-            }
-        }
-        coef0 += ics->group_len[g] * 128;
-        coef1 += ics->group_len[g] * 128;
-    }
-}
-
-/**
  * Decode a channel_pair_element; reference: table 4.4.
  *
  * @return  Returns error status. 0 - OK, !0 - error
@@ -2197,7 +1762,7 @@ static int decode_cpe(AACDecContext *ac, GetBitContext *gb, ChannelElement *cpe)
         if (cpe->ch[1].ics.predictor_present &&
             (ac->oc[1].m4ac.object_type != AOT_AAC_MAIN))
             if ((cpe->ch[1].ics.ltp.present = get_bits(gb, 1)))
-                decode_ltp(&cpe->ch[1].ics.ltp, gb, cpe->ch[1].ics.max_sfb);
+                decode_ltp(ac, &cpe->ch[1].ics.ltp, gb, cpe->ch[1].ics.max_sfb);
         ms_present = get_bits(gb, 2);
         if (ms_present == 3) {
             av_log(ac->avctx, AV_LOG_ERROR, "ms_present = 3 is reserved.\n");
@@ -2205,112 +1770,21 @@ static int decode_cpe(AACDecContext *ac, GetBitContext *gb, ChannelElement *cpe)
         } else if (ms_present)
             decode_mid_side_stereo(cpe, gb, ms_present);
     }
-    if ((ret = decode_ics(ac, &cpe->ch[0], gb, common_window, 0)))
+    if ((ret = ff_aac_decode_ics(ac, &cpe->ch[0], gb, common_window, 0)))
         return ret;
-    if ((ret = decode_ics(ac, &cpe->ch[1], gb, common_window, 0)))
+    if ((ret = ff_aac_decode_ics(ac, &cpe->ch[1], gb, common_window, 0)))
         return ret;
 
     if (common_window) {
         if (ms_present)
-            apply_mid_side_stereo(ac, cpe);
+            ac->dsp.apply_mid_side_stereo(ac, cpe);
         if (ac->oc[1].m4ac.object_type == AOT_AAC_MAIN) {
-            apply_prediction(ac, &cpe->ch[0]);
-            apply_prediction(ac, &cpe->ch[1]);
+            ac->dsp.apply_prediction(ac, &cpe->ch[0]);
+            ac->dsp.apply_prediction(ac, &cpe->ch[1]);
         }
     }
 
-    apply_intensity_stereo(ac, cpe, ms_present);
-    return 0;
-}
-
-static const float cce_scale[] = {
-    1.09050773266525765921, //2^(1/8)
-    1.18920711500272106672, //2^(1/4)
-    M_SQRT2,
-    2,
-};
-
-/**
- * Decode coupling_channel_element; reference: table 4.8.
- *
- * @return  Returns error status. 0 - OK, !0 - error
- */
-static int decode_cce(AACDecContext *ac, GetBitContext *gb, ChannelElement *che)
-{
-    int num_gain = 0;
-    int c, g, sfb, ret;
-    int sign;
-    INTFLOAT scale;
-    SingleChannelElement *sce = &che->ch[0];
-    ChannelCoupling     *coup = &che->coup;
-
-    coup->coupling_point = 2 * get_bits1(gb);
-    coup->num_coupled = get_bits(gb, 3);
-    for (c = 0; c <= coup->num_coupled; c++) {
-        num_gain++;
-        coup->type[c] = get_bits1(gb) ? TYPE_CPE : TYPE_SCE;
-        coup->id_select[c] = get_bits(gb, 4);
-        if (coup->type[c] == TYPE_CPE) {
-            coup->ch_select[c] = get_bits(gb, 2);
-            if (coup->ch_select[c] == 3)
-                num_gain++;
-        } else
-            coup->ch_select[c] = 2;
-    }
-    coup->coupling_point += get_bits1(gb) || (coup->coupling_point >> 1);
-
-    sign  = get_bits(gb, 1);
-#if USE_FIXED
-    scale = get_bits(gb, 2);
-#else
-    scale = cce_scale[get_bits(gb, 2)];
-#endif
-
-    if ((ret = decode_ics(ac, sce, gb, 0, 0)))
-        return ret;
-
-    for (c = 0; c < num_gain; c++) {
-        int idx  = 0;
-        int cge  = 1;
-        int gain = 0;
-        INTFLOAT gain_cache = FIXR10(1.);
-        if (c) {
-            cge = coup->coupling_point == AFTER_IMDCT ? 1 : get_bits1(gb);
-            gain = cge ? get_vlc2(gb, ff_vlc_scalefactors, 7, 3) - 60: 0;
-            gain_cache = GET_GAIN(scale, gain);
-#if USE_FIXED
-            if ((abs(gain_cache)-1024) >> 3 > 30)
-                return AVERROR(ERANGE);
-#endif
-        }
-        if (coup->coupling_point == AFTER_IMDCT) {
-            coup->gain[c][0] = gain_cache;
-        } else {
-            for (g = 0; g < sce->ics.num_window_groups; g++) {
-                for (sfb = 0; sfb < sce->ics.max_sfb; sfb++, idx++) {
-                    if (sce->band_type[idx] != ZERO_BT) {
-                        if (!cge) {
-                            int t = get_vlc2(gb, ff_vlc_scalefactors, 7, 3) - 60;
-                            if (t) {
-                                int s = 1;
-                                t = gain += t;
-                                if (sign) {
-                                    s  -= 2 * (t & 0x1);
-                                    t >>= 1;
-                                }
-                                gain_cache = GET_GAIN(scale, t) * s;
-#if USE_FIXED
-                                if ((abs(gain_cache)-1024) >> 3 > 30)
-                                    return AVERROR(ERANGE);
-#endif
-                            }
-                        }
-                        coup->gain[c][idx] = gain_cache;
-                    }
-                }
-            }
-        }
-    }
+    ac->dsp.apply_intensity_stereo(ac, cpe, ms_present);
     return 0;
 }
 
@@ -2461,7 +1935,9 @@ static int decode_extension_payload(AACDecContext *ac, GetBitContext *gb, int cn
             ac->oc[1].m4ac.sbr = 1;
             ac->avctx->profile = AV_PROFILE_AAC_HE;
         }
-        res = AAC_RENAME(ff_decode_sbr_extension)(ac, &che->sbr, gb, crc_flag, cnt, elem_type);
+
+        ac->proc.sbr_decode_extension(ac, che, gb, crc_flag, cnt, elem_type);
+
         if (ac->oc[1].m4ac.ps == 1 && !ac->warned_he_aac_mono) {
             av_log(ac->avctx, AV_LOG_VERBOSE, "Treating HE-AAC mono as stereo.\n");
             ac->warned_he_aac_mono = 1;
@@ -2483,381 +1959,6 @@ static int decode_extension_payload(AACDecContext *ac, GetBitContext *gb, int cn
 }
 
 /**
- * Decode Temporal Noise Shaping filter coefficients and apply all-pole filters; reference: 4.6.9.3.
- *
- * @param   decode  1 if tool is used normally, 0 if tool is used in LTP.
- * @param   coef    spectral coefficients
- */
-static void apply_tns(INTFLOAT coef_param[1024], TemporalNoiseShaping *tns,
-                      IndividualChannelStream *ics, int decode)
-{
-    const int mmm = FFMIN(ics->tns_max_bands, ics->max_sfb);
-    int w, filt, m, i;
-    int bottom, top, order, start, end, size, inc;
-    INTFLOAT lpc[TNS_MAX_ORDER];
-    INTFLOAT tmp[TNS_MAX_ORDER+1];
-    UINTFLOAT *coef = coef_param;
-
-    if(!mmm)
-        return;
-
-    for (w = 0; w < ics->num_windows; w++) {
-        bottom = ics->num_swb;
-        for (filt = 0; filt < tns->n_filt[w]; filt++) {
-            top    = bottom;
-            bottom = FFMAX(0, top - tns->length[w][filt]);
-            order  = tns->order[w][filt];
-            if (order == 0)
-                continue;
-
-            // tns_decode_coef
-            compute_lpc_coefs(tns->coef[w][filt], order, lpc, 0, 0, 0);
-
-            start = ics->swb_offset[FFMIN(bottom, mmm)];
-            end   = ics->swb_offset[FFMIN(   top, mmm)];
-            if ((size = end - start) <= 0)
-                continue;
-            if (tns->direction[w][filt]) {
-                inc = -1;
-                start = end - 1;
-            } else {
-                inc = 1;
-            }
-            start += w * 128;
-
-            if (decode) {
-                // ar filter
-                for (m = 0; m < size; m++, start += inc)
-                    for (i = 1; i <= FFMIN(m, order); i++)
-                        coef[start] -= AAC_MUL26((INTFLOAT)coef[start - i * inc], lpc[i - 1]);
-            } else {
-                // ma filter
-                for (m = 0; m < size; m++, start += inc) {
-                    tmp[0] = coef[start];
-                    for (i = 1; i <= FFMIN(m, order); i++)
-                        coef[start] += AAC_MUL26(tmp[i], lpc[i - 1]);
-                    for (i = order; i > 0; i--)
-                        tmp[i] = tmp[i - 1];
-                }
-            }
-        }
-    }
-}
-
-/**
- *  Apply windowing and MDCT to obtain the spectral
- *  coefficient from the predicted sample by LTP.
- */
-static void windowing_and_mdct_ltp(AACDecContext *ac, INTFLOAT *out,
-                                   INTFLOAT *in, IndividualChannelStream *ics)
-{
-    const INTFLOAT *lwindow      = ics->use_kb_window[0] ? AAC_RENAME2(aac_kbd_long_1024) : AAC_RENAME2(sine_1024);
-    const INTFLOAT *swindow      = ics->use_kb_window[0] ? AAC_RENAME2(aac_kbd_short_128) : AAC_RENAME2(sine_128);
-    const INTFLOAT *lwindow_prev = ics->use_kb_window[1] ? AAC_RENAME2(aac_kbd_long_1024) : AAC_RENAME2(sine_1024);
-    const INTFLOAT *swindow_prev = ics->use_kb_window[1] ? AAC_RENAME2(aac_kbd_short_128) : AAC_RENAME2(sine_128);
-
-    if (ics->window_sequence[0] != LONG_STOP_SEQUENCE) {
-        ac->fdsp->vector_fmul(in, in, lwindow_prev, 1024);
-    } else {
-        memset(in, 0, 448 * sizeof(*in));
-        ac->fdsp->vector_fmul(in + 448, in + 448, swindow_prev, 128);
-    }
-    if (ics->window_sequence[0] != LONG_START_SEQUENCE) {
-        ac->fdsp->vector_fmul_reverse(in + 1024, in + 1024, lwindow, 1024);
-    } else {
-        ac->fdsp->vector_fmul_reverse(in + 1024 + 448, in + 1024 + 448, swindow, 128);
-        memset(in + 1024 + 576, 0, 448 * sizeof(*in));
-    }
-    ac->mdct_ltp_fn(ac->mdct_ltp, out, in, sizeof(INTFLOAT));
-}
-
-/**
- * Apply the long term prediction
- */
-static void apply_ltp(AACDecContext *ac, SingleChannelElement *sce)
-{
-    const LongTermPrediction *ltp = &sce->ics.ltp;
-    const uint16_t *offsets = sce->ics.swb_offset;
-    int i, sfb;
-
-    if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
-        INTFLOAT *predTime = sce->ret;
-        INTFLOAT *predFreq = ac->buf_mdct;
-        int16_t num_samples = 2048;
-
-        if (ltp->lag < 1024)
-            num_samples = ltp->lag + 1024;
-        for (i = 0; i < num_samples; i++)
-            predTime[i] = AAC_MUL30(sce->ltp_state[i + 2048 - ltp->lag], ltp->coef);
-        memset(&predTime[i], 0, (2048 - i) * sizeof(*predTime));
-
-        ac->windowing_and_mdct_ltp(ac, predFreq, predTime, &sce->ics);
-
-        if (sce->tns.present)
-            ac->apply_tns(predFreq, &sce->tns, &sce->ics, 0);
-
-        for (sfb = 0; sfb < FFMIN(sce->ics.max_sfb, MAX_LTP_LONG_SFB); sfb++)
-            if (ltp->used[sfb])
-                for (i = offsets[sfb]; i < offsets[sfb + 1]; i++)
-                    sce->coeffs[i] += (UINTFLOAT)predFreq[i];
-    }
-}
-
-/**
- * Update the LTP buffer for next frame
- */
-static void update_ltp(AACDecContext *ac, SingleChannelElement *sce)
-{
-    IndividualChannelStream *ics = &sce->ics;
-    INTFLOAT *saved     = sce->saved;
-    INTFLOAT *saved_ltp = sce->coeffs;
-    const INTFLOAT *lwindow = ics->use_kb_window[0] ? AAC_RENAME2(aac_kbd_long_1024) : AAC_RENAME2(sine_1024);
-    const INTFLOAT *swindow = ics->use_kb_window[0] ? AAC_RENAME2(aac_kbd_short_128) : AAC_RENAME2(sine_128);
-    int i;
-
-    if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
-        memcpy(saved_ltp,       saved, 512 * sizeof(*saved_ltp));
-        memset(saved_ltp + 576, 0,     448 * sizeof(*saved_ltp));
-        ac->fdsp->vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960,     &swindow[64],      64);
-
-        for (i = 0; i < 64; i++)
-            saved_ltp[i + 512] = AAC_MUL31(ac->buf_mdct[1023 - i], swindow[63 - i]);
-    } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
-        memcpy(saved_ltp,       ac->buf_mdct + 512, 448 * sizeof(*saved_ltp));
-        memset(saved_ltp + 576, 0,                  448 * sizeof(*saved_ltp));
-        ac->fdsp->vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960,     &swindow[64],      64);
-
-        for (i = 0; i < 64; i++)
-            saved_ltp[i + 512] = AAC_MUL31(ac->buf_mdct[1023 - i], swindow[63 - i]);
-    } else { // LONG_STOP or ONLY_LONG
-        ac->fdsp->vector_fmul_reverse(saved_ltp,       ac->buf_mdct + 512,     &lwindow[512],     512);
-
-        for (i = 0; i < 512; i++)
-            saved_ltp[i + 512] = AAC_MUL31(ac->buf_mdct[1023 - i], lwindow[511 - i]);
-    }
-
-    memcpy(sce->ltp_state,      sce->ltp_state+1024, 1024 * sizeof(*sce->ltp_state));
-    memcpy(sce->ltp_state+1024, sce->ret,            1024 * sizeof(*sce->ltp_state));
-    memcpy(sce->ltp_state+2048, saved_ltp,           1024 * sizeof(*sce->ltp_state));
-}
-
-/**
- * Conduct IMDCT and windowing.
- */
-static void imdct_and_windowing(AACDecContext *ac, SingleChannelElement *sce)
-{
-    IndividualChannelStream *ics = &sce->ics;
-    INTFLOAT *in    = sce->coeffs;
-    INTFLOAT *out   = sce->ret;
-    INTFLOAT *saved = sce->saved;
-    const INTFLOAT *swindow      = ics->use_kb_window[0] ? AAC_RENAME2(aac_kbd_short_128) : AAC_RENAME2(sine_128);
-    const INTFLOAT *lwindow_prev = ics->use_kb_window[1] ? AAC_RENAME2(aac_kbd_long_1024) : AAC_RENAME2(sine_1024);
-    const INTFLOAT *swindow_prev = ics->use_kb_window[1] ? AAC_RENAME2(aac_kbd_short_128) : AAC_RENAME2(sine_128);
-    INTFLOAT *buf  = ac->buf_mdct;
-    INTFLOAT *temp = ac->temp;
-    int i;
-
-    // imdct
-    if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
-        for (i = 0; i < 1024; i += 128)
-            ac->mdct128_fn(ac->mdct128, buf + i, in + i, sizeof(INTFLOAT));
-    } else {
-        ac->mdct1024_fn(ac->mdct1024, buf, in, sizeof(INTFLOAT));
-    }
-
-    /* window overlapping
-     * NOTE: To simplify the overlapping code, all 'meaningless' short to long
-     * and long to short transitions are considered to be short to short
-     * transitions. This leaves just two cases (long to long and short to short)
-     * with a little special sauce for EIGHT_SHORT_SEQUENCE.
-     */
-    if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
-            (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
-        ac->fdsp->vector_fmul_window(    out,               saved,            buf,         lwindow_prev, 512);
-    } else {
-        memcpy(                         out,               saved,            448 * sizeof(*out));
-
-        if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
-            ac->fdsp->vector_fmul_window(out + 448 + 0*128, saved + 448,      buf + 0*128, swindow_prev, 64);
-            ac->fdsp->vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow,      64);
-            ac->fdsp->vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow,      64);
-            ac->fdsp->vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow,      64);
-            ac->fdsp->vector_fmul_window(temp,              buf + 3*128 + 64, buf + 4*128, swindow,      64);
-            memcpy(                     out + 448 + 4*128, temp, 64 * sizeof(*out));
-        } else {
-            ac->fdsp->vector_fmul_window(out + 448,         saved + 448,      buf,         swindow_prev, 64);
-            memcpy(                     out + 576,         buf + 64,         448 * sizeof(*out));
-        }
-    }
-
-    // buffer update
-    if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
-        memcpy(                     saved,       temp + 64,         64 * sizeof(*saved));
-        ac->fdsp->vector_fmul_window(saved + 64,  buf + 4*128 + 64, buf + 5*128, swindow, 64);
-        ac->fdsp->vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 64);
-        ac->fdsp->vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 64);
-        memcpy(                     saved + 448, buf + 7*128 + 64,  64 * sizeof(*saved));
-    } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
-        memcpy(                     saved,       buf + 512,        448 * sizeof(*saved));
-        memcpy(                     saved + 448, buf + 7*128 + 64,  64 * sizeof(*saved));
-    } else { // LONG_STOP or ONLY_LONG
-        memcpy(                     saved,       buf + 512,        512 * sizeof(*saved));
-    }
-}
-
-/**
- * Conduct IMDCT and windowing.
- */
-static void imdct_and_windowing_960(AACDecContext *ac, SingleChannelElement *sce)
-{
-    IndividualChannelStream *ics = &sce->ics;
-    INTFLOAT *in    = sce->coeffs;
-    INTFLOAT *out   = sce->ret;
-    INTFLOAT *saved = sce->saved;
-    const INTFLOAT *swindow      = ics->use_kb_window[0] ? AAC_RENAME(aac_kbd_short_120) : AAC_RENAME(sine_120);
-    const INTFLOAT *lwindow_prev = ics->use_kb_window[1] ? AAC_RENAME(aac_kbd_long_960) : AAC_RENAME(sine_960);
-    const INTFLOAT *swindow_prev = ics->use_kb_window[1] ? AAC_RENAME(aac_kbd_short_120) : AAC_RENAME(sine_120);
-    INTFLOAT *buf  = ac->buf_mdct;
-    INTFLOAT *temp = ac->temp;
-    int i;
-
-    // imdct
-    if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
-        for (i = 0; i < 8; i++)
-            ac->mdct120_fn(ac->mdct120, buf + i * 120, in + i * 128, sizeof(INTFLOAT));
-    } else {
-        ac->mdct960_fn(ac->mdct960, buf, in, sizeof(INTFLOAT));
-    }
-
-    /* window overlapping
-     * NOTE: To simplify the overlapping code, all 'meaningless' short to long
-     * and long to short transitions are considered to be short to short
-     * transitions. This leaves just two cases (long to long and short to short)
-     * with a little special sauce for EIGHT_SHORT_SEQUENCE.
-     */
-
-    if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
-        (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
-        ac->fdsp->vector_fmul_window(    out,               saved,            buf,         lwindow_prev, 480);
-    } else {
-        memcpy(                          out,               saved,            420 * sizeof(*out));
-
-        if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
-            ac->fdsp->vector_fmul_window(out + 420 + 0*120, saved + 420,      buf + 0*120, swindow_prev, 60);
-            ac->fdsp->vector_fmul_window(out + 420 + 1*120, buf + 0*120 + 60, buf + 1*120, swindow,      60);
-            ac->fdsp->vector_fmul_window(out + 420 + 2*120, buf + 1*120 + 60, buf + 2*120, swindow,      60);
-            ac->fdsp->vector_fmul_window(out + 420 + 3*120, buf + 2*120 + 60, buf + 3*120, swindow,      60);
-            ac->fdsp->vector_fmul_window(temp,              buf + 3*120 + 60, buf + 4*120, swindow,      60);
-            memcpy(                      out + 420 + 4*120, temp, 60 * sizeof(*out));
-        } else {
-            ac->fdsp->vector_fmul_window(out + 420,         saved + 420,      buf,         swindow_prev, 60);
-            memcpy(                      out + 540,         buf + 60,         420 * sizeof(*out));
-        }
-    }
-
-    // buffer update
-    if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
-        memcpy(                      saved,       temp + 60,         60 * sizeof(*saved));
-        ac->fdsp->vector_fmul_window(saved + 60,  buf + 4*120 + 60, buf + 5*120, swindow, 60);
-        ac->fdsp->vector_fmul_window(saved + 180, buf + 5*120 + 60, buf + 6*120, swindow, 60);
-        ac->fdsp->vector_fmul_window(saved + 300, buf + 6*120 + 60, buf + 7*120, swindow, 60);
-        memcpy(                      saved + 420, buf + 7*120 + 60,  60 * sizeof(*saved));
-    } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
-        memcpy(                      saved,       buf + 480,        420 * sizeof(*saved));
-        memcpy(                      saved + 420, buf + 7*120 + 60,  60 * sizeof(*saved));
-    } else { // LONG_STOP or ONLY_LONG
-        memcpy(                      saved,       buf + 480,        480 * sizeof(*saved));
-    }
-}
-static void imdct_and_windowing_ld(AACDecContext *ac, SingleChannelElement *sce)
-{
-    IndividualChannelStream *ics = &sce->ics;
-    INTFLOAT *in    = sce->coeffs;
-    INTFLOAT *out   = sce->ret;
-    INTFLOAT *saved = sce->saved;
-    INTFLOAT *buf  = ac->buf_mdct;
-
-    // imdct
-    ac->mdct512_fn(ac->mdct512, buf, in, sizeof(INTFLOAT));
-
-    // window overlapping
-    if (ics->use_kb_window[1]) {
-        // AAC LD uses a low overlap sine window instead of a KBD window
-        memcpy(out, saved, 192 * sizeof(*out));
-        ac->fdsp->vector_fmul_window(out + 192, saved + 192, buf, AAC_RENAME2(sine_128), 64);
-        memcpy(                     out + 320, buf + 64, 192 * sizeof(*out));
-    } else {
-        ac->fdsp->vector_fmul_window(out, saved, buf, AAC_RENAME2(sine_512), 256);
-    }
-
-    // buffer update
-    memcpy(saved, buf + 256, 256 * sizeof(*saved));
-}
-
-static void imdct_and_windowing_eld(AACDecContext *ac, SingleChannelElement *sce)
-{
-    UINTFLOAT *in   = sce->coeffs;
-    INTFLOAT *out   = sce->ret;
-    INTFLOAT *saved = sce->saved;
-    INTFLOAT *buf  = ac->buf_mdct;
-    int i;
-    const int n  = ac->oc[1].m4ac.frame_length_short ? 480 : 512;
-    const int n2 = n >> 1;
-    const int n4 = n >> 2;
-    const INTFLOAT *const window = n == 480 ? AAC_RENAME(ff_aac_eld_window_480) :
-                                           AAC_RENAME(ff_aac_eld_window_512);
-
-    // Inverse transform, mapped to the conventional IMDCT by
-    // Chivukula, R.K.; Reznik, Y.A.; Devarajan, V.,
-    // "Efficient algorithms for MPEG-4 AAC-ELD, AAC-LD and AAC-LC filterbanks,"
-    // International Conference on Audio, Language and Image Processing, ICALIP 2008.
-    // URL: http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=4590245&isnumber=4589950
-    for (i = 0; i < n2; i+=2) {
-        INTFLOAT temp;
-        temp =  in[i    ]; in[i    ] = -in[n - 1 - i]; in[n - 1 - i] = temp;
-        temp = -in[i + 1]; in[i + 1] =  in[n - 2 - i]; in[n - 2 - i] = temp;
-    }
-
-    if (n == 480)
-        ac->mdct480_fn(ac->mdct480, buf, in, sizeof(INTFLOAT));
-    else
-        ac->mdct512_fn(ac->mdct512, buf, in, sizeof(INTFLOAT));
-
-    for (i = 0; i < n; i+=2) {
-        buf[i + 0] = -(UINTFLOAT)(USE_FIXED + 1)*buf[i + 0];
-        buf[i + 1] =  (UINTFLOAT)(USE_FIXED + 1)*buf[i + 1];
-    }
-    // Like with the regular IMDCT at this point we still have the middle half
-    // of a transform but with even symmetry on the left and odd symmetry on
-    // the right
-
-    // window overlapping
-    // The spec says to use samples [0..511] but the reference decoder uses
-    // samples [128..639].
-    for (i = n4; i < n2; i ++) {
-        out[i - n4] = AAC_MUL31(   buf[    n2 - 1 - i] , window[i       - n4]) +
-                      AAC_MUL31( saved[        i + n2] , window[i +   n - n4]) +
-                      AAC_MUL31(-saved[n + n2 - 1 - i] , window[i + 2*n - n4]) +
-                      AAC_MUL31(-saved[  2*n + n2 + i] , window[i + 3*n - n4]);
-    }
-    for (i = 0; i < n2; i ++) {
-        out[n4 + i] = AAC_MUL31(   buf[              i] , window[i + n2       - n4]) +
-                      AAC_MUL31(-saved[      n - 1 - i] , window[i + n2 +   n - n4]) +
-                      AAC_MUL31(-saved[          n + i] , window[i + n2 + 2*n - n4]) +
-                      AAC_MUL31( saved[2*n + n - 1 - i] , window[i + n2 + 3*n - n4]);
-    }
-    for (i = 0; i < n4; i ++) {
-        out[n2 + n4 + i] = AAC_MUL31(   buf[    i + n2] , window[i +   n - n4]) +
-                           AAC_MUL31(-saved[n2 - 1 - i] , window[i + 2*n - n4]) +
-                           AAC_MUL31(-saved[n + n2 + i] , window[i + 3*n - n4]);
-    }
-
-    // buffer update
-    memmove(saved + n, saved, 2 * n * sizeof(*saved));
-    memcpy( saved,       buf,     n * sizeof(*saved));
-}
-
-/**
  * channel coupling transformation interface
  *
  * @param   apply_coupling_method   pointer to (in)dependent coupling function
@@ -2901,64 +2002,57 @@ static void spectral_to_sample(AACDecContext *ac, int samples)
     void (*imdct_and_window)(AACDecContext *ac, SingleChannelElement *sce);
     switch (ac->oc[1].m4ac.object_type) {
     case AOT_ER_AAC_LD:
-        imdct_and_window = imdct_and_windowing_ld;
+        imdct_and_window = ac->dsp.imdct_and_windowing_ld;
         break;
     case AOT_ER_AAC_ELD:
-        imdct_and_window = imdct_and_windowing_eld;
+        imdct_and_window = ac->dsp.imdct_and_windowing_eld;
         break;
     default:
         if (ac->oc[1].m4ac.frame_length_short)
-            imdct_and_window = imdct_and_windowing_960;
+            imdct_and_window = ac->dsp.imdct_and_windowing_960;
         else
-            imdct_and_window = ac->imdct_and_windowing;
+            imdct_and_window = ac->dsp.imdct_and_windowing;
     }
     for (type = 3; type >= 0; type--) {
         for (i = 0; i < MAX_ELEM_ID; i++) {
             ChannelElement *che = ac->che[type][i];
             if (che && che->present) {
                 if (type <= TYPE_CPE)
-                    apply_channel_coupling(ac, che, type, i, BEFORE_TNS, AAC_RENAME(apply_dependent_coupling));
+                    apply_channel_coupling(ac, che, type, i, BEFORE_TNS, ac->dsp.apply_dependent_coupling);
                 if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) {
                     if (che->ch[0].ics.predictor_present) {
                         if (che->ch[0].ics.ltp.present)
-                            ac->apply_ltp(ac, &che->ch[0]);
+                            ac->dsp.apply_ltp(ac, &che->ch[0]);
                         if (che->ch[1].ics.ltp.present && type == TYPE_CPE)
-                            ac->apply_ltp(ac, &che->ch[1]);
+                            ac->dsp.apply_ltp(ac, &che->ch[1]);
                     }
                 }
                 if (che->ch[0].tns.present)
-                    ac->apply_tns(che->ch[0].coeffs, &che->ch[0].tns, &che->ch[0].ics, 1);
+                    ac->dsp.apply_tns(che->ch[0].coeffs,
+                                      &che->ch[0].tns, &che->ch[0].ics, 1);
                 if (che->ch[1].tns.present)
-                    ac->apply_tns(che->ch[1].coeffs, &che->ch[1].tns, &che->ch[1].ics, 1);
+                    ac->dsp.apply_tns(che->ch[1].coeffs,
+                                      &che->ch[1].tns, &che->ch[1].ics, 1);
                 if (type <= TYPE_CPE)
-                    apply_channel_coupling(ac, che, type, i, BETWEEN_TNS_AND_IMDCT, AAC_RENAME(apply_dependent_coupling));
+                    apply_channel_coupling(ac, che, type, i, BETWEEN_TNS_AND_IMDCT, ac->dsp.apply_dependent_coupling);
                 if (type != TYPE_CCE || che->coup.coupling_point == AFTER_IMDCT) {
                     imdct_and_window(ac, &che->ch[0]);
                     if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP)
-                        ac->update_ltp(ac, &che->ch[0]);
+                        ac->dsp.update_ltp(ac, &che->ch[0]);
                     if (type == TYPE_CPE) {
                         imdct_and_window(ac, &che->ch[1]);
                         if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP)
-                            ac->update_ltp(ac, &che->ch[1]);
+                            ac->dsp.update_ltp(ac, &che->ch[1]);
                     }
                     if (ac->oc[1].m4ac.sbr > 0) {
-                        AAC_RENAME(ff_sbr_apply)(ac, &che->sbr, type, che->ch[0].ret, che->ch[1].ret);
+                        ac->proc.sbr_apply(ac, che, type,
+                                           che->ch[0].output,
+                                           che->ch[1].output);
                     }
                 }
                 if (type <= TYPE_CCE)
-                    apply_channel_coupling(ac, che, type, i, AFTER_IMDCT, AAC_RENAME(apply_independent_coupling));
-
-#if USE_FIXED
-                {
-                    int j;
-                    /* preparation for resampler */
-                    for(j = 0; j<samples; j++){
-                        che->ch[0].ret[j] = (int32_t)av_clip64((int64_t)che->ch[0].ret[j]*128, INT32_MIN, INT32_MAX-0x8000)+0x8000;
-                        if (type == TYPE_CPE || (type == TYPE_SCE && ac->oc[1].m4ac.ps == 1))
-                            che->ch[1].ret[j] = (int32_t)av_clip64((int64_t)che->ch[1].ret[j]*128, INT32_MIN, INT32_MAX-0x8000)+0x8000;
-                    }
-                }
-#endif /* USE_FIXED */
+                    apply_channel_coupling(ac, che, type, i, AFTER_IMDCT, ac->dsp.apply_independent_coupling);
+                ac->dsp.clip_output(ac, che, type, samples);
                 che->present = 0;
             } else if (che) {
                 av_log(ac->avctx, AV_LOG_VERBOSE, "ChannelElement %d.%d missing \n", type, i);
@@ -3073,13 +2167,13 @@ static int aac_decode_er_frame(AVCodecContext *avctx, AVFrame *frame,
             skip_bits(gb, 4);
         switch (elem_type) {
         case TYPE_SCE:
-            err = decode_ics(ac, &che->ch[0], gb, 0, 0);
+            err = ff_aac_decode_ics(ac, &che->ch[0], gb, 0, 0);
             break;
         case TYPE_CPE:
             err = decode_cpe(ac, gb, che);
             break;
         case TYPE_LFE:
-            err = decode_ics(ac, &che->ch[0], gb, 0, 0);
+            err = ff_aac_decode_ics(ac, &che->ch[0], gb, 0, 0);
             break;
         }
         if (err < 0)
@@ -3174,7 +2268,7 @@ static int aac_decode_frame_int(AVCodecContext *avctx, AVFrame *frame,
         switch (elem_type) {
 
         case TYPE_SCE:
-            err = decode_ics(ac, &che->ch[0], gb, 0, 0);
+            err = ff_aac_decode_ics(ac, &che->ch[0], gb, 0, 0);
             audio_found = 1;
             sce_count++;
             break;
@@ -3185,11 +2279,11 @@ static int aac_decode_frame_int(AVCodecContext *avctx, AVFrame *frame,
             break;
 
         case TYPE_CCE:
-            err = decode_cce(ac, gb, che);
+            err = ac->proc.decode_cce(ac, gb, che);
             break;
 
         case TYPE_LFE:
-            err = decode_ics(ac, &che->ch[0], gb, 0, 0);
+            err = ff_aac_decode_ics(ac, &che->ch[0], gb, 0, 0);
             audio_found = 1;
             break;
 
@@ -3374,55 +2468,16 @@ static int aac_decode_frame(AVCodecContext *avctx, AVFrame *frame,
     return buf_size > buf_offset ? buf_consumed : buf_size;
 }
 
-static av_cold int aac_decode_close(AVCodecContext *avctx)
-{
-    AACDecContext *ac = avctx->priv_data;
-    int i, type;
-
-    for (i = 0; i < MAX_ELEM_ID; i++) {
-        for (type = 0; type < 4; type++) {
-            if (ac->che[type][i])
-                AAC_RENAME(ff_aac_sbr_ctx_close)(&ac->che[type][i]->sbr);
-            av_freep(&ac->che[type][i]);
-        }
-    }
-
-    av_tx_uninit(&ac->mdct120);
-    av_tx_uninit(&ac->mdct128);
-    av_tx_uninit(&ac->mdct480);
-    av_tx_uninit(&ac->mdct512);
-    av_tx_uninit(&ac->mdct960);
-    av_tx_uninit(&ac->mdct1024);
-    av_tx_uninit(&ac->mdct_ltp);
-
-    av_freep(&ac->fdsp);
-    return 0;
-}
-
-static void aacdec_init(AACDecContext *c)
-{
-    c->imdct_and_windowing                      = imdct_and_windowing;
-    c->apply_ltp                                = apply_ltp;
-    c->apply_tns                                = apply_tns;
-    c->windowing_and_mdct_ltp                   = windowing_and_mdct_ltp;
-    c->update_ltp                               = update_ltp;
-#if USE_FIXED
-    c->vector_pow43                             = vector_pow43;
-    c->subband_scale                            = subband_scale;
+#if CONFIG_AAC_LATM_DECODER
+#include "aacdec_latm.h"
 #endif
 
-#if !USE_FIXED
-#if ARCH_MIPS
-    ff_aacdec_init_mips(c);
-#endif
-#endif /* !USE_FIXED */
-}
-/**
- * AVOptions for Japanese DTV specific extensions (ADTS only)
- */
 #define AACDEC_FLAGS AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
 #define OFF(field) offsetof(AACDecContext, field)
 static const AVOption options[] = {
+    /**
+     * AVOptions for Japanese DTV specific extensions (ADTS only)
+     */
     {"dual_mono_mode", "Select the channel to decode for dual mono",
      OFF(force_dmono_mode), AV_OPT_TYPE_INT, {.i64=-1}, -1, 2,
      AACDEC_FLAGS, .unit = "dual_mono_mode"},
@@ -3443,9 +2498,53 @@ static const AVOption options[] = {
     {NULL},
 };
 
-static const AVClass aac_decoder_class = {
+static const AVClass decoder_class = {
     .class_name = "AAC decoder",
     .item_name  = av_default_item_name,
     .option     = options,
     .version    = LIBAVUTIL_VERSION_INT,
 };
+
+#if CONFIG_AAC_DECODER
+const FFCodec ff_aac_decoder = {
+    .p.name          = "aac",
+    CODEC_LONG_NAME("AAC (Advanced Audio Coding)"),
+    .p.type          = AVMEDIA_TYPE_AUDIO,
+    .p.id            = AV_CODEC_ID_AAC,
+    .p.priv_class    = &decoder_class,
+    .priv_data_size  = sizeof(AACDecContext),
+    .init            = ff_aac_decode_init_float,
+    .close           = decode_close,
+    FF_CODEC_DECODE_CB(aac_decode_frame),
+    .p.sample_fmts   = (const enum AVSampleFormat[]) {
+        AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE
+    },
+    .p.capabilities  = AV_CODEC_CAP_CHANNEL_CONF | AV_CODEC_CAP_DR1,
+    .caps_internal   = FF_CODEC_CAP_INIT_CLEANUP,
+    .p.ch_layouts    = ff_aac_ch_layout,
+    .flush = flush,
+    .p.profiles      = NULL_IF_CONFIG_SMALL(ff_aac_profiles),
+};
+#endif
+
+#if CONFIG_AAC_FIXED_DECODER
+const FFCodec ff_aac_fixed_decoder = {
+    .p.name          = "aac_fixed",
+    CODEC_LONG_NAME("AAC (Advanced Audio Coding)"),
+    .p.type          = AVMEDIA_TYPE_AUDIO,
+    .p.id            = AV_CODEC_ID_AAC,
+    .p.priv_class    = &decoder_class,
+    .priv_data_size  = sizeof(AACDecContext),
+    .init            = ff_aac_decode_init_fixed,
+    .close           = decode_close,
+    FF_CODEC_DECODE_CB(aac_decode_frame),
+    .p.sample_fmts   = (const enum AVSampleFormat[]) {
+        AV_SAMPLE_FMT_S32P, AV_SAMPLE_FMT_NONE
+    },
+    .p.capabilities  = AV_CODEC_CAP_CHANNEL_CONF | AV_CODEC_CAP_DR1,
+    .caps_internal   = FF_CODEC_CAP_INIT_CLEANUP,
+    .p.ch_layouts    = ff_aac_ch_layout,
+    .p.profiles      = NULL_IF_CONFIG_SMALL(ff_aac_profiles),
+    .flush = flush,
+};
+#endif
diff --git a/libavcodec/aacdec.h b/libavcodec/aac/aacdec.h
index 1b245f9258..eed53c6c96 100644
--- a/libavcodec/aacdec.h
+++ b/libavcodec/aac/aacdec.h
@@ -27,8 +27,8 @@
  * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
  */
 
-#ifndef AVCODEC_AACDEC_H
-#define AVCODEC_AACDEC_H
+#ifndef AVCODEC_AAC_AACDEC_H
+#define AVCODEC_AAC_AACDEC_H
 
 #include <stdint.h>
 
@@ -38,10 +38,10 @@
 #include "libavutil/mem_internal.h"
 #include "libavutil/tx.h"
 
-#include "aac.h"
-#include "aac_defines.h"
-#include "mpeg4audio.h"
-#include "sbr.h"
+#include "libavcodec/aac.h"
+#include "libavcodec/mpeg4audio.h"
+
+typedef struct AACDecContext AACDecContext;
 
 /**
  * Output configuration status
@@ -68,13 +68,27 @@ enum CouplingPoint {
     AFTER_IMDCT = 3,
 };
 
+// Supposed to be equal to AAC_RENAME() in case of USE_FIXED.
+#define RENAME_FIXED(name) name ## _fixed
+
+#define INTFLOAT_UNION(name, elems)     \
+    union {                             \
+        int   RENAME_FIXED(name) elems; \
+        float name       elems;         \
+    }
+
+#define INTFLOAT_ALIGNED_UNION(alignment, name, nb_elems)                \
+    union {                                                              \
+        DECLARE_ALIGNED(alignment, int,   RENAME_FIXED(name))[nb_elems]; \
+        DECLARE_ALIGNED(alignment, float, name)[nb_elems];               \
+    }
 /**
  * Long Term Prediction
  */
 typedef struct LongTermPrediction {
     int8_t present;
     int16_t lag;
-    INTFLOAT coef;
+    INTFLOAT_UNION(coef,);
     int8_t used[MAX_LTP_LONG_SFB];
 } LongTermPrediction;
 
@@ -108,7 +122,7 @@ typedef struct TemporalNoiseShaping {
     int length[8][4];
     int direction[8][4];
     int order[8][4];
-    INTFLOAT coef[8][4][TNS_MAX_ORDER];
+    INTFLOAT_UNION(coef, [8][4][TNS_MAX_ORDER]);
 } TemporalNoiseShaping;
 
 /**
@@ -122,7 +136,7 @@ typedef struct ChannelCoupling {
     int ch_select[8];      /**< [0] shared list of gains; [1] list of gains for right channel;
                             *   [2] list of gains for left channel; [3] lists of gains for both channels
                             */
-    INTFLOAT gain[16][120];
+    INTFLOAT_UNION(gain, [16][120]);
 } ChannelCoupling;
 
 /**
@@ -133,13 +147,20 @@ typedef struct SingleChannelElement {
     TemporalNoiseShaping tns;
     enum BandType band_type[128];                   ///< band types
     int band_type_run_end[120];                     ///< band type run end points
-    INTFLOAT sf[120];                               ///< scalefactors
-    DECLARE_ALIGNED(32, INTFLOAT, coeffs)[1024];    ///< coefficients for IMDCT, maybe processed
-    DECLARE_ALIGNED(32, INTFLOAT, saved)[1536];     ///< overlap
-    DECLARE_ALIGNED(32, INTFLOAT, ret_buf)[2048];   ///< PCM output buffer
-    DECLARE_ALIGNED(16, INTFLOAT, ltp_state)[3072]; ///< time signal for LTP
-    PredictorState predictor_state[MAX_PREDICTORS];
-    INTFLOAT *ret;                                  ///< PCM output
+    int sfo[120];                                   ///< scalefactor offsets
+    INTFLOAT_UNION(sf, [120]);                      ///< scalefactors
+    INTFLOAT_ALIGNED_UNION(32, coeffs,    1024);    ///< coefficients for IMDCT, maybe processed
+    INTFLOAT_ALIGNED_UNION(32, saved,     1536);    ///< overlap
+    INTFLOAT_ALIGNED_UNION(32, ret_buf,   2048);    ///< PCM output buffer
+    INTFLOAT_ALIGNED_UNION(16, ltp_state, 3072);    ///< time signal for LTP
+    union {
+        struct PredictorStateFixed *RENAME_FIXED(predictor_state);
+        struct PredictorState      *predictor_state;
+    };
+    union {
+        float *output;                              ///< PCM output
+        int   *RENAME_FIXED(output);                ///< PCM output
+    };
 } SingleChannelElement;
 
 /**
@@ -153,7 +174,6 @@ typedef struct ChannelElement {
     SingleChannelElement ch[2];
     // CCE specific
     ChannelCoupling coup;
-    SpectralBandReplication sbr;
 } ChannelElement;
 
 typedef struct OutputConfiguration {
@@ -181,11 +201,67 @@ typedef struct DynamicRangeControl {
 } DynamicRangeControl;
 
 /**
+ * Decode-specific primitives
+ */
+typedef struct AACDecProc {
+    int (*decode_spectrum_and_dequant)(AACDecContext *ac,
+                                       GetBitContext *gb,
+                                       const Pulse *pulse,
+                                       SingleChannelElement *sce);
+
+    int (*decode_cce)(AACDecContext *ac, GetBitContext *gb, ChannelElement *che);
+
+    int (*sbr_ctx_alloc_init)(AACDecContext *ac, ChannelElement **che, int id_aac);
+    int (*sbr_decode_extension)(AACDecContext *ac, ChannelElement *che,
+                                GetBitContext *gb, int crc, int cnt, int id_aac);
+    void (*sbr_apply)(AACDecContext *ac, ChannelElement *che,
+                      int id_aac, void /* INTFLOAT */ *L, void /* INTFLOAT */ *R);
+    void (*sbr_ctx_close)(ChannelElement *che);
+} AACDecProc;
+
+/**
+ * DSP-specific primitives
+ */
+typedef struct AACDecDSP {
+    void (*dequant_scalefactors)(SingleChannelElement *sce);
+
+    void (*apply_mid_side_stereo)(AACDecContext *ac, ChannelElement *cpe);
+    void (*apply_intensity_stereo)(AACDecContext *ac, ChannelElement *cpe,
+                                   int ms_present);
+
+    void (*apply_tns)(void *_coef_param, TemporalNoiseShaping *tns,
+                      IndividualChannelStream *ics, int decode);
+
+    void (*apply_ltp)(AACDecContext *ac, SingleChannelElement *sce);
+    void (*update_ltp)(AACDecContext *ac, SingleChannelElement *sce);
+
+    void (*apply_prediction)(AACDecContext *ac, SingleChannelElement *sce);
+
+    void (*apply_dependent_coupling)(AACDecContext *ac,
+                                     SingleChannelElement *target,
+                                     ChannelElement *cce, int index);
+    void (*apply_independent_coupling)(AACDecContext *ac,
+                                       SingleChannelElement *target,
+                                       ChannelElement *cce, int index);
+
+    void (*imdct_and_windowing)(AACDecContext *ac, SingleChannelElement *sce);
+    void (*imdct_and_windowing_960)(AACDecContext *ac, SingleChannelElement *sce);
+    void (*imdct_and_windowing_ld)(AACDecContext *ac, SingleChannelElement *sce);
+    void (*imdct_and_windowing_eld)(AACDecContext *ac, SingleChannelElement *sce);
+
+    void (*clip_output)(AACDecContext *ac, ChannelElement *che, int type, int samples);
+} AACDecDSP;
+
+/**
  * main AAC decoding context
  */
-typedef struct AACDecContext {
+struct AACDecContext {
     const struct AVClass  *class;
     struct AVCodecContext *avctx;
+
+    AACDecDSP dsp;
+    AACDecProc proc;
+
     struct AVFrame *frame;
 
     int is_saved;                 ///< Set if elements have stored overlap from previous frame.
@@ -206,8 +282,8 @@ typedef struct AACDecContext {
      * (We do not want to have these on the stack.)
      * @{
      */
-    DECLARE_ALIGNED(32, INTFLOAT, buf_mdct)[1024];
-    DECLARE_ALIGNED(32, INTFLOAT, temp)[128];
+    INTFLOAT_ALIGNED_UNION(32, buf_mdct, 1024);
+    INTFLOAT_ALIGNED_UNION(32, temp, 128);
     /** @} */
 
     /**
@@ -229,11 +305,10 @@ typedef struct AACDecContext {
     av_tx_fn mdct960_fn;
     av_tx_fn mdct1024_fn;
     av_tx_fn mdct_ltp_fn;
-#if USE_FIXED
-    AVFixedDSPContext *fdsp;
-#else
-    AVFloatDSPContext *fdsp;
-#endif /* USE_FIXED */
+    union {
+        AVFixedDSPContext *RENAME_FIXED(fdsp);
+        AVFloatDSPContext *fdsp;
+    };
     int random_state;
     /** @} */
 
@@ -262,18 +337,17 @@ typedef struct AACDecContext {
     int warned_gain_control;
     int warned_he_aac_mono;
 
-    /* aacdec functions pointers */
-    void (*imdct_and_windowing)(struct AACDecContext *ac, SingleChannelElement *sce);
-    void (*apply_ltp)(struct AACDecContext *ac, SingleChannelElement *sce);
-    void (*apply_tns)(INTFLOAT coef[1024], TemporalNoiseShaping *tns,
-                      IndividualChannelStream *ics, int decode);
-    void (*windowing_and_mdct_ltp)(struct AACDecContext *ac, INTFLOAT *out,
-                                   INTFLOAT *in, IndividualChannelStream *ics);
-    void (*update_ltp)(struct AACDecContext *ac, SingleChannelElement *sce);
-    void (*vector_pow43)(int *coefs, int len);
-    void (*subband_scale)(int *dst, int *src, int scale, int offset, int len, void *log_context);
-} AACDecContext;
+    int is_fixed;
+};
+
+#if defined(USE_FIXED) && USE_FIXED
+#define fdsp          RENAME_FIXED(fdsp)
+#endif
 
-void ff_aacdec_init_mips(AACDecContext *c);
+int ff_aac_decode_init(struct AVCodecContext *avctx);
+int ff_aac_decode_init_float(struct AVCodecContext *avctx);
+int ff_aac_decode_init_fixed(struct AVCodecContext *avctx);
+int ff_aac_decode_ics(AACDecContext *ac, SingleChannelElement *sce,
+                      GetBitContext *gb, int common_window, int scale_flag);
 
-#endif /* AVCODEC_AACDEC_H */
+#endif /* AVCODEC_AAC_AACDEC_H */
diff --git a/libavcodec/aac/aacdec_dsp_template.c b/libavcodec/aac/aacdec_dsp_template.c
new file mode 100644
index 0000000000..621baef8ca
--- /dev/null
+++ b/libavcodec/aac/aacdec_dsp_template.c
@@ -0,0 +1,640 @@
+/*
+ * AAC decoder
+ * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
+ * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
+ * Copyright (c) 2008-2013 Alex Converse <alex.converse@gmail.com>
+ *
+ * AAC LATM decoder
+ * Copyright (c) 2008-2010 Paul Kendall <paul@kcbbs.gen.nz>
+ * Copyright (c) 2010      Janne Grunau <janne-libav@jannau.net>
+ *
+ * AAC decoder fixed-point implementation
+ * Copyright (c) 2013
+ *      MIPS Technologies, Inc., California.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "aacdec.h"
+#include "libavcodec/lpc_functions.h"
+
+#include "libavcodec/aactab.h"
+
+/**
+ * Convert integer scalefactors to the decoder's native expected
+ * scalefactor values.
+ */
+static void AAC_RENAME(dequant_scalefactors)(SingleChannelElement *sce)
+{
+    IndividualChannelStream *ics = &sce->ics;
+    const enum BandType *band_type = sce->band_type;
+    const int *band_type_run_end = sce->band_type_run_end;
+    const int *sfo = sce->sfo;
+    INTFLOAT *sf = sce->AAC_RENAME(sf);
+
+    int g, i, idx = 0;
+    for (g = 0; g < ics->num_window_groups; g++) {
+        for (i = 0; i < ics->max_sfb;) {
+            int run_end = band_type_run_end[idx];
+            switch (band_type[idx]) {
+            case ZERO_BT:
+                for (; i < run_end; i++, idx++)
+                    sf[idx] = FIXR(0.);
+                break;
+            case INTENSITY_BT: /* fallthrough */
+            case INTENSITY_BT2:
+                for (; i < run_end; i++, idx++) {
+#if USE_FIXED
+                    sf[idx] = 100 - sfo[idx];
+#else
+                    sf[idx] = ff_aac_pow2sf_tab[-sfo[idx] + POW_SF2_ZERO];
+#endif /* USE_FIXED */
+                }
+                break;
+            case NOISE_BT:
+                for (; i < run_end; i++, idx++) {
+#if USE_FIXED
+                    sf[idx] = -(100 + sfo[idx]);
+#else
+                    sf[idx] = -ff_aac_pow2sf_tab[sfo[idx] + POW_SF2_ZERO];
+#endif /* USE_FIXED */
+                }
+                break;
+            default:
+                for (; i < run_end; i++, idx++) {
+#if USE_FIXED
+                    sf[idx] = -sfo[idx];
+#else
+                    sf[idx] = -ff_aac_pow2sf_tab[sfo[idx] - 100 + POW_SF2_ZERO];
+#endif /* USE_FIXED */
+                }
+                break;
+            }
+        }
+    }
+}
+
+/**
+ * Mid/Side stereo decoding; reference: 4.6.8.1.3.
+ */
+static void AAC_RENAME(apply_mid_side_stereo)(AACDecContext *ac, ChannelElement *cpe)
+{
+    const IndividualChannelStream *ics = &cpe->ch[0].ics;
+    INTFLOAT *ch0 = cpe->ch[0].AAC_RENAME(coeffs);
+    INTFLOAT *ch1 = cpe->ch[1].AAC_RENAME(coeffs);
+    int g, i, group, idx = 0;
+    const uint16_t *offsets = ics->swb_offset;
+    for (g = 0; g < ics->num_window_groups; g++) {
+        for (i = 0; i < ics->max_sfb; i++, idx++) {
+            if (cpe->ms_mask[idx] &&
+                cpe->ch[0].band_type[idx] < NOISE_BT &&
+                cpe->ch[1].band_type[idx] < NOISE_BT) {
+#if USE_FIXED
+                for (group = 0; group < ics->group_len[g]; group++) {
+                    ac->fdsp->butterflies_fixed(ch0 + group * 128 + offsets[i],
+                                                ch1 + group * 128 + offsets[i],
+                                                offsets[i+1] - offsets[i]);
+#else
+                for (group = 0; group < ics->group_len[g]; group++) {
+                    ac->fdsp->butterflies_float(ch0 + group * 128 + offsets[i],
+                                               ch1 + group * 128 + offsets[i],
+                                               offsets[i+1] - offsets[i]);
+#endif /* USE_FIXED */
+                }
+            }
+        }
+        ch0 += ics->group_len[g] * 128;
+        ch1 += ics->group_len[g] * 128;
+    }
+}
+
+/**
+ * intensity stereo decoding; reference: 4.6.8.2.3
+ *
+ * @param   ms_present  Indicates mid/side stereo presence. [0] mask is all 0s;
+ *                      [1] mask is decoded from bitstream; [2] mask is all 1s;
+ *                      [3] reserved for scalable AAC
+ */
+static void AAC_RENAME(apply_intensity_stereo)(AACDecContext *ac,
+                                               ChannelElement *cpe, int ms_present)
+{
+    const IndividualChannelStream *ics = &cpe->ch[1].ics;
+    SingleChannelElement         *sce1 = &cpe->ch[1];
+    INTFLOAT *coef0 = cpe->ch[0].AAC_RENAME(coeffs), *coef1 = cpe->ch[1].AAC_RENAME(coeffs);
+    const uint16_t *offsets = ics->swb_offset;
+    int g, group, i, idx = 0;
+    int c;
+    INTFLOAT scale;
+    for (g = 0; g < ics->num_window_groups; g++) {
+        for (i = 0; i < ics->max_sfb;) {
+            if (sce1->band_type[idx] == INTENSITY_BT ||
+                sce1->band_type[idx] == INTENSITY_BT2) {
+                const int bt_run_end = sce1->band_type_run_end[idx];
+                for (; i < bt_run_end; i++, idx++) {
+                    c = -1 + 2 * (sce1->band_type[idx] - 14);
+                    if (ms_present)
+                        c *= 1 - 2 * cpe->ms_mask[idx];
+                    scale = c * sce1->AAC_RENAME(sf)[idx];
+                    for (group = 0; group < ics->group_len[g]; group++)
+#if USE_FIXED
+                        subband_scale(coef1 + group * 128 + offsets[i],
+                                      coef0 + group * 128 + offsets[i],
+                                      scale,
+                                      23,
+                                      offsets[i + 1] - offsets[i] ,ac->avctx);
+#else
+                        ac->fdsp->vector_fmul_scalar(coef1 + group * 128 + offsets[i],
+                                                    coef0 + group * 128 + offsets[i],
+                                                    scale,
+                                                    offsets[i + 1] - offsets[i]);
+#endif /* USE_FIXED */
+                }
+            } else {
+                int bt_run_end = sce1->band_type_run_end[idx];
+                idx += bt_run_end - i;
+                i    = bt_run_end;
+            }
+        }
+        coef0 += ics->group_len[g] * 128;
+        coef1 += ics->group_len[g] * 128;
+    }
+}
+
+/**
+ * Decode Temporal Noise Shaping filter coefficients and apply all-pole filters; reference: 4.6.9.3.
+ *
+ * @param   decode  1 if tool is used normally, 0 if tool is used in LTP.
+ * @param   coef    spectral coefficients
+ */
+static void AAC_RENAME(apply_tns)(void *_coef_param, TemporalNoiseShaping *tns,
+                                  IndividualChannelStream *ics, int decode)
+{
+    const int mmm = FFMIN(ics->tns_max_bands, ics->max_sfb);
+    int w, filt, m, i;
+    int bottom, top, order, start, end, size, inc;
+    INTFLOAT *coef_param = _coef_param;
+    INTFLOAT lpc[TNS_MAX_ORDER];
+    INTFLOAT tmp[TNS_MAX_ORDER+1];
+    UINTFLOAT *coef = coef_param;
+
+    if(!mmm)
+        return;
+
+    for (w = 0; w < ics->num_windows; w++) {
+        bottom = ics->num_swb;
+        for (filt = 0; filt < tns->n_filt[w]; filt++) {
+            top    = bottom;
+            bottom = FFMAX(0, top - tns->length[w][filt]);
+            order  = tns->order[w][filt];
+            if (order == 0)
+                continue;
+
+            // tns_decode_coef
+            compute_lpc_coefs(tns->AAC_RENAME(coef)[w][filt], order, lpc, 0, 0, 0);
+
+            start = ics->swb_offset[FFMIN(bottom, mmm)];
+            end   = ics->swb_offset[FFMIN(   top, mmm)];
+            if ((size = end - start) <= 0)
+                continue;
+            if (tns->direction[w][filt]) {
+                inc = -1;
+                start = end - 1;
+            } else {
+                inc = 1;
+            }
+            start += w * 128;
+
+            if (decode) {
+                // ar filter
+                for (m = 0; m < size; m++, start += inc)
+                    for (i = 1; i <= FFMIN(m, order); i++)
+                        coef[start] -= AAC_MUL26((INTFLOAT)coef[start - i * inc], lpc[i - 1]);
+            } else {
+                // ma filter
+                for (m = 0; m < size; m++, start += inc) {
+                    tmp[0] = coef[start];
+                    for (i = 1; i <= FFMIN(m, order); i++)
+                        coef[start] += AAC_MUL26(tmp[i], lpc[i - 1]);
+                    for (i = order; i > 0; i--)
+                        tmp[i] = tmp[i - 1];
+                }
+            }
+        }
+    }
+}
+
+/**
+ *  Apply windowing and MDCT to obtain the spectral
+ *  coefficient from the predicted sample by LTP.
+ */
+static inline void AAC_RENAME(windowing_and_mdct_ltp)(AACDecContext *ac,
+                                                      INTFLOAT *out, INTFLOAT *in,
+                                                      IndividualChannelStream *ics)
+{
+    const INTFLOAT *lwindow      = ics->use_kb_window[0] ? AAC_RENAME2(aac_kbd_long_1024) : AAC_RENAME2(sine_1024);
+    const INTFLOAT *swindow      = ics->use_kb_window[0] ? AAC_RENAME2(aac_kbd_short_128) : AAC_RENAME2(sine_128);
+    const INTFLOAT *lwindow_prev = ics->use_kb_window[1] ? AAC_RENAME2(aac_kbd_long_1024) : AAC_RENAME2(sine_1024);
+    const INTFLOAT *swindow_prev = ics->use_kb_window[1] ? AAC_RENAME2(aac_kbd_short_128) : AAC_RENAME2(sine_128);
+
+    if (ics->window_sequence[0] != LONG_STOP_SEQUENCE) {
+        ac->fdsp->vector_fmul(in, in, lwindow_prev, 1024);
+    } else {
+        memset(in, 0, 448 * sizeof(*in));
+        ac->fdsp->vector_fmul(in + 448, in + 448, swindow_prev, 128);
+    }
+    if (ics->window_sequence[0] != LONG_START_SEQUENCE) {
+        ac->fdsp->vector_fmul_reverse(in + 1024, in + 1024, lwindow, 1024);
+    } else {
+        ac->fdsp->vector_fmul_reverse(in + 1024 + 448, in + 1024 + 448, swindow, 128);
+        memset(in + 1024 + 576, 0, 448 * sizeof(*in));
+    }
+    ac->mdct_ltp_fn(ac->mdct_ltp, out, in, sizeof(INTFLOAT));
+}
+
+/**
+ * Apply the long term prediction
+ */
+static void AAC_RENAME(apply_ltp)(AACDecContext *ac, SingleChannelElement *sce)
+{
+    const LongTermPrediction *ltp = &sce->ics.ltp;
+    const uint16_t *offsets = sce->ics.swb_offset;
+    int i, sfb;
+
+    if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
+        INTFLOAT *predTime = sce->AAC_RENAME(output);
+        INTFLOAT *predFreq = ac->AAC_RENAME(buf_mdct);
+        int16_t num_samples = 2048;
+
+        if (ltp->lag < 1024)
+            num_samples = ltp->lag + 1024;
+        for (i = 0; i < num_samples; i++)
+            predTime[i] = AAC_MUL30(sce->AAC_RENAME(ltp_state)[i + 2048 - ltp->lag], ltp->AAC_RENAME(coef));
+        memset(&predTime[i], 0, (2048 - i) * sizeof(*predTime));
+
+        AAC_RENAME(windowing_and_mdct_ltp)(ac, predFreq, predTime, &sce->ics);
+
+        if (sce->tns.present)
+            AAC_RENAME(apply_tns)(predFreq, &sce->tns, &sce->ics, 0);
+
+        for (sfb = 0; sfb < FFMIN(sce->ics.max_sfb, MAX_LTP_LONG_SFB); sfb++)
+            if (ltp->used[sfb])
+                for (i = offsets[sfb]; i < offsets[sfb + 1]; i++)
+                    sce->AAC_RENAME(coeffs)[i] += (UINTFLOAT)predFreq[i];
+    }
+}
+
+/**
+ * Update the LTP buffer for next frame
+ */
+static void AAC_RENAME(update_ltp)(AACDecContext *ac, SingleChannelElement *sce)
+{
+    IndividualChannelStream *ics = &sce->ics;
+    INTFLOAT *saved     = sce->AAC_RENAME(saved);
+    INTFLOAT *saved_ltp = sce->AAC_RENAME(coeffs);
+    const INTFLOAT *lwindow = ics->use_kb_window[0] ? AAC_RENAME2(aac_kbd_long_1024) : AAC_RENAME2(sine_1024);
+    const INTFLOAT *swindow = ics->use_kb_window[0] ? AAC_RENAME2(aac_kbd_short_128) : AAC_RENAME2(sine_128);
+    int i;
+
+    if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
+        memcpy(saved_ltp,       saved, 512 * sizeof(*saved_ltp));
+        memset(saved_ltp + 576, 0,     448 * sizeof(*saved_ltp));
+        ac->fdsp->vector_fmul_reverse(saved_ltp + 448, ac->AAC_RENAME(buf_mdct) + 960,     &swindow[64],      64);
+
+        for (i = 0; i < 64; i++)
+            saved_ltp[i + 512] = AAC_MUL31(ac->AAC_RENAME(buf_mdct)[1023 - i], swindow[63 - i]);
+    } else if (1 && ics->window_sequence[0] == LONG_START_SEQUENCE) {
+        memcpy(saved_ltp,       ac->AAC_RENAME(buf_mdct) + 512, 448 * sizeof(*saved_ltp));
+        memset(saved_ltp + 576, 0,                  448 * sizeof(*saved_ltp));
+        ac->fdsp->vector_fmul_reverse(saved_ltp + 448, ac->AAC_RENAME(buf_mdct) + 960,     &swindow[64],      64);
+
+        for (i = 0; i < 64; i++)
+            saved_ltp[i + 512] = AAC_MUL31(ac->AAC_RENAME(buf_mdct)[1023 - i], swindow[63 - i]);
+    } else if (1) { // LONG_STOP or ONLY_LONG
+        ac->fdsp->vector_fmul_reverse(saved_ltp, ac->AAC_RENAME(buf_mdct) + 512,     &lwindow[512],     512);
+
+        for (i = 0; i < 512; i++)
+            saved_ltp[i + 512] = AAC_MUL31(ac->AAC_RENAME(buf_mdct)[1023 - i], lwindow[511 - i]);
+    }
+
+    memcpy(sce->AAC_RENAME(ltp_state),      sce->AAC_RENAME(ltp_state)+1024,
+           1024 * sizeof(*sce->AAC_RENAME(ltp_state)));
+    memcpy(sce->AAC_RENAME(ltp_state) + 1024, sce->AAC_RENAME(output),
+           1024 * sizeof(*sce->AAC_RENAME(ltp_state)));
+    memcpy(sce->AAC_RENAME(ltp_state) + 2048, saved_ltp,
+           1024 * sizeof(*sce->AAC_RENAME(ltp_state)));
+}
+
+/**
+ * Conduct IMDCT and windowing.
+ */
+static void AAC_RENAME(imdct_and_windowing)(AACDecContext *ac, SingleChannelElement *sce)
+{
+    IndividualChannelStream *ics = &sce->ics;
+    INTFLOAT *in    = sce->AAC_RENAME(coeffs);
+    INTFLOAT *out   = sce->AAC_RENAME(output);
+    INTFLOAT *saved = sce->AAC_RENAME(saved);
+    const INTFLOAT *swindow      = ics->use_kb_window[0] ? AAC_RENAME2(aac_kbd_short_128) : AAC_RENAME2(sine_128);
+    const INTFLOAT *lwindow_prev = ics->use_kb_window[1] ? AAC_RENAME2(aac_kbd_long_1024) : AAC_RENAME2(sine_1024);
+    const INTFLOAT *swindow_prev = ics->use_kb_window[1] ? AAC_RENAME2(aac_kbd_short_128) : AAC_RENAME2(sine_128);
+    INTFLOAT *buf  = ac->AAC_RENAME(buf_mdct);
+    INTFLOAT *temp = ac->AAC_RENAME(temp);
+    int i;
+
+    // imdct
+    if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
+        for (i = 0; i < 1024; i += 128)
+            ac->mdct128_fn(ac->mdct128, buf + i, in + i, sizeof(INTFLOAT));
+    } else {
+        ac->mdct1024_fn(ac->mdct1024, buf, in, sizeof(INTFLOAT));
+    }
+
+    /* window overlapping
+     * NOTE: To simplify the overlapping code, all 'meaningless' short to long
+     * and long to short transitions are considered to be short to short
+     * transitions. This leaves just two cases (long to long and short to short)
+     * with a little special sauce for EIGHT_SHORT_SEQUENCE.
+     */
+    if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
+            (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
+        ac->fdsp->vector_fmul_window(    out,               saved,            buf,         lwindow_prev, 512);
+    } else {
+        memcpy(                         out,               saved,            448 * sizeof(*out));
+
+        if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
+            ac->fdsp->vector_fmul_window(out + 448 + 0*128, saved + 448,      buf + 0*128, swindow_prev, 64);
+            ac->fdsp->vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow,      64);
+            ac->fdsp->vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow,      64);
+            ac->fdsp->vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow,      64);
+            ac->fdsp->vector_fmul_window(temp,              buf + 3*128 + 64, buf + 4*128, swindow,      64);
+            memcpy(                     out + 448 + 4*128, temp, 64 * sizeof(*out));
+        } else {
+            ac->fdsp->vector_fmul_window(out + 448,         saved + 448,      buf,         swindow_prev, 64);
+            memcpy(                     out + 576,         buf + 64,         448 * sizeof(*out));
+        }
+    }
+
+    // buffer update
+    if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
+        memcpy(                     saved,       temp + 64,         64 * sizeof(*saved));
+        ac->fdsp->vector_fmul_window(saved + 64,  buf + 4*128 + 64, buf + 5*128, swindow, 64);
+        ac->fdsp->vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 64);
+        ac->fdsp->vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 64);
+        memcpy(                     saved + 448, buf + 7*128 + 64,  64 * sizeof(*saved));
+    } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
+        memcpy(                     saved,       buf + 512,        448 * sizeof(*saved));
+        memcpy(                     saved + 448, buf + 7*128 + 64,  64 * sizeof(*saved));
+    } else { // LONG_STOP or ONLY_LONG
+        memcpy(                     saved,       buf + 512,        512 * sizeof(*saved));
+    }
+}
+
+/**
+ * Conduct IMDCT and windowing.
+ */
+static void AAC_RENAME(imdct_and_windowing_960)(AACDecContext *ac, SingleChannelElement *sce)
+{
+    IndividualChannelStream *ics = &sce->ics;
+    INTFLOAT *in    = sce->AAC_RENAME(coeffs);
+    INTFLOAT *out   = sce->AAC_RENAME(output);
+    INTFLOAT *saved = sce->AAC_RENAME(saved);
+    const INTFLOAT *swindow      = ics->use_kb_window[0] ? AAC_RENAME(aac_kbd_short_120) : AAC_RENAME(sine_120);
+    const INTFLOAT *lwindow_prev = ics->use_kb_window[1] ? AAC_RENAME(aac_kbd_long_960) : AAC_RENAME(sine_960);
+    const INTFLOAT *swindow_prev = ics->use_kb_window[1] ? AAC_RENAME(aac_kbd_short_120) : AAC_RENAME(sine_120);
+    INTFLOAT *buf  = ac->AAC_RENAME(buf_mdct);
+    INTFLOAT *temp = ac->AAC_RENAME(temp);
+    int i;
+
+    // imdct
+    if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
+        for (i = 0; i < 8; i++)
+            ac->mdct120_fn(ac->mdct120, buf + i * 120, in + i * 128, sizeof(INTFLOAT));
+    } else {
+        ac->mdct960_fn(ac->mdct960, buf, in, sizeof(INTFLOAT));
+    }
+
+    /* window overlapping
+     * NOTE: To simplify the overlapping code, all 'meaningless' short to long
+     * and long to short transitions are considered to be short to short
+     * transitions. This leaves just two cases (long to long and short to short)
+     * with a little special sauce for EIGHT_SHORT_SEQUENCE.
+     */
+
+    if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
+        (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
+        ac->fdsp->vector_fmul_window(    out,               saved,            buf,         lwindow_prev, 480);
+    } else {
+        memcpy(                          out,               saved,            420 * sizeof(*out));
+
+        if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
+            ac->fdsp->vector_fmul_window(out + 420 + 0*120, saved + 420,      buf + 0*120, swindow_prev, 60);
+            ac->fdsp->vector_fmul_window(out + 420 + 1*120, buf + 0*120 + 60, buf + 1*120, swindow,      60);
+            ac->fdsp->vector_fmul_window(out + 420 + 2*120, buf + 1*120 + 60, buf + 2*120, swindow,      60);
+            ac->fdsp->vector_fmul_window(out + 420 + 3*120, buf + 2*120 + 60, buf + 3*120, swindow,      60);
+            ac->fdsp->vector_fmul_window(temp,              buf + 3*120 + 60, buf + 4*120, swindow,      60);
+            memcpy(                      out + 420 + 4*120, temp, 60 * sizeof(*out));
+        } else {
+            ac->fdsp->vector_fmul_window(out + 420,         saved + 420,      buf,         swindow_prev, 60);
+            memcpy(                      out + 540,         buf + 60,         420 * sizeof(*out));
+        }
+    }
+
+    // buffer update
+    if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
+        memcpy(                      saved,       temp + 60,         60 * sizeof(*saved));
+        ac->fdsp->vector_fmul_window(saved + 60,  buf + 4*120 + 60, buf + 5*120, swindow, 60);
+        ac->fdsp->vector_fmul_window(saved + 180, buf + 5*120 + 60, buf + 6*120, swindow, 60);
+        ac->fdsp->vector_fmul_window(saved + 300, buf + 6*120 + 60, buf + 7*120, swindow, 60);
+        memcpy(                      saved + 420, buf + 7*120 + 60,  60 * sizeof(*saved));
+    } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
+        memcpy(                      saved,       buf + 480,        420 * sizeof(*saved));
+        memcpy(                      saved + 420, buf + 7*120 + 60,  60 * sizeof(*saved));
+    } else { // LONG_STOP or ONLY_LONG
+        memcpy(                      saved,       buf + 480,        480 * sizeof(*saved));
+    }
+}
+static void AAC_RENAME(imdct_and_windowing_ld)(AACDecContext *ac, SingleChannelElement *sce)
+{
+    IndividualChannelStream *ics = &sce->ics;
+    INTFLOAT *in    = sce->AAC_RENAME(coeffs);
+    INTFLOAT *out   = sce->AAC_RENAME(output);
+    INTFLOAT *saved = sce->AAC_RENAME(saved);
+    INTFLOAT *buf   = ac->AAC_RENAME(buf_mdct);
+
+    // imdct
+    ac->mdct512_fn(ac->mdct512, buf, in, sizeof(INTFLOAT));
+
+    // window overlapping
+    if (ics->use_kb_window[1]) {
+        // AAC LD uses a low overlap sine window instead of a KBD window
+        memcpy(out, saved, 192 * sizeof(*out));
+        ac->fdsp->vector_fmul_window(out + 192, saved + 192, buf, AAC_RENAME2(sine_128), 64);
+        memcpy(                     out + 320, buf + 64, 192 * sizeof(*out));
+    } else {
+        ac->fdsp->vector_fmul_window(out, saved, buf, AAC_RENAME2(sine_512), 256);
+    }
+
+    // buffer update
+    memcpy(saved, buf + 256, 256 * sizeof(*saved));
+}
+
+static void AAC_RENAME(imdct_and_windowing_eld)(AACDecContext *ac, SingleChannelElement *sce)
+{
+    UINTFLOAT *in   = sce->AAC_RENAME(coeffs);
+    INTFLOAT *out   = sce->AAC_RENAME(output);
+    INTFLOAT *saved = sce->AAC_RENAME(saved);
+    INTFLOAT *buf   = ac->AAC_RENAME(buf_mdct);
+    int i;
+    const int n  = ac->oc[1].m4ac.frame_length_short ? 480 : 512;
+    const int n2 = n >> 1;
+    const int n4 = n >> 2;
+    const INTFLOAT *const window = n == 480 ? AAC_RENAME(ff_aac_eld_window_480) :
+                                           AAC_RENAME(ff_aac_eld_window_512);
+
+    // Inverse transform, mapped to the conventional IMDCT by
+    // Chivukula, R.K.; Reznik, Y.A.; Devarajan, V.,
+    // "Efficient algorithms for MPEG-4 AAC-ELD, AAC-LD and AAC-LC filterbanks,"
+    // International Conference on Audio, Language and Image Processing, ICALIP 2008.
+    // URL: http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=4590245&isnumber=4589950
+    for (i = 0; i < n2; i+=2) {
+        INTFLOAT temp;
+        temp =  in[i    ]; in[i    ] = -in[n - 1 - i]; in[n - 1 - i] = temp;
+        temp = -in[i + 1]; in[i + 1] =  in[n - 2 - i]; in[n - 2 - i] = temp;
+    }
+
+    if (n == 480)
+        ac->mdct480_fn(ac->mdct480, buf, in, sizeof(INTFLOAT));
+    else
+        ac->mdct512_fn(ac->mdct512, buf, in, sizeof(INTFLOAT));
+
+    for (i = 0; i < n; i+=2) {
+        buf[i + 0] = -(UINTFLOAT)(USE_FIXED + 1)*buf[i + 0];
+        buf[i + 1] =  (UINTFLOAT)(USE_FIXED + 1)*buf[i + 1];
+    }
+    // Like with the regular IMDCT at this point we still have the middle half
+    // of a transform but with even symmetry on the left and odd symmetry on
+    // the right
+
+    // window overlapping
+    // The spec says to use samples [0..511] but the reference decoder uses
+    // samples [128..639].
+    for (i = n4; i < n2; i ++) {
+        out[i - n4] = AAC_MUL31(   buf[    n2 - 1 - i] , window[i       - n4]) +
+                      AAC_MUL31( saved[        i + n2] , window[i +   n - n4]) +
+                      AAC_MUL31(-saved[n + n2 - 1 - i] , window[i + 2*n - n4]) +
+                      AAC_MUL31(-saved[  2*n + n2 + i] , window[i + 3*n - n4]);
+    }
+    for (i = 0; i < n2; i ++) {
+        out[n4 + i] = AAC_MUL31(   buf[              i] , window[i + n2       - n4]) +
+                      AAC_MUL31(-saved[      n - 1 - i] , window[i + n2 +   n - n4]) +
+                      AAC_MUL31(-saved[          n + i] , window[i + n2 + 2*n - n4]) +
+                      AAC_MUL31( saved[2*n + n - 1 - i] , window[i + n2 + 3*n - n4]);
+    }
+    for (i = 0; i < n4; i ++) {
+        out[n2 + n4 + i] = AAC_MUL31(   buf[    i + n2] , window[i +   n - n4]) +
+                           AAC_MUL31(-saved[n2 - 1 - i] , window[i + 2*n - n4]) +
+                           AAC_MUL31(-saved[n + n2 + i] , window[i + 3*n - n4]);
+    }
+
+    // buffer update
+    memmove(saved + n, saved, 2 * n * sizeof(*saved));
+    memcpy( saved,       buf,     n * sizeof(*saved));
+}
+
+static void AAC_RENAME(clip_output)(AACDecContext *ac, ChannelElement *che,
+                                    int type, int samples)
+{
+#if USE_FIXED
+    /* preparation for resampler */
+    for (int j = 0; j < samples; j++){
+        che->ch[0].output_fixed[j] = (int32_t)av_clip64((int64_t)che->ch[0].output_fixed[j]*128,
+                                                    INT32_MIN, INT32_MAX-0x8000)+0x8000;
+        if (type == TYPE_CPE || (type == TYPE_SCE && ac->oc[1].m4ac.ps == 1))
+            che->ch[1].output_fixed[j] = (int32_t)av_clip64((int64_t)che->ch[1].output_fixed[j]*128,
+                                                        INT32_MIN, INT32_MAX-0x8000)+0x8000;
+    }
+#endif
+}
+
+static inline void reset_all_predictors(PredictorState *ps)
+{
+    int i;
+    for (i = 0; i < MAX_PREDICTORS; i++)
+        reset_predict_state(&ps[i]);
+}
+
+static inline void reset_predictor_group(PredictorState *ps, int group_num)
+{
+    int i;
+    for (i = group_num - 1; i < MAX_PREDICTORS; i += 30)
+        reset_predict_state(&ps[i]);
+}
+
+/**
+ * Apply AAC-Main style frequency domain prediction.
+ */
+static void AAC_RENAME(apply_prediction)(AACDecContext *ac, SingleChannelElement *sce)
+{
+    int sfb, k;
+
+    if (!sce->ics.predictor_initialized) {
+        reset_all_predictors(sce->AAC_RENAME(predictor_state));
+        sce->ics.predictor_initialized = 1;
+    }
+
+    if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
+        for (sfb = 0;
+             sfb < ff_aac_pred_sfb_max[ac->oc[1].m4ac.sampling_index];
+             sfb++) {
+            for (k = sce->ics.swb_offset[sfb];
+                 k < sce->ics.swb_offset[sfb + 1];
+                 k++) {
+                predict(&sce->AAC_RENAME(predictor_state)[k],
+                        &sce->AAC_RENAME(coeffs)[k],
+                        sce->ics.predictor_present &&
+                        sce->ics.prediction_used[sfb]);
+            }
+        }
+        if (sce->ics.predictor_reset_group)
+            reset_predictor_group(sce->AAC_RENAME(predictor_state),
+                                  sce->ics.predictor_reset_group);
+    } else
+        reset_all_predictors(sce->AAC_RENAME(predictor_state));
+}
+
+static av_cold void AAC_RENAME(aac_dsp_init)(AACDecDSP *aac_dsp)
+{
+#define SET(member) aac_dsp->member = AAC_RENAME(member)
+    SET(dequant_scalefactors);
+    SET(apply_mid_side_stereo);
+    SET(apply_intensity_stereo);
+    SET(apply_tns);
+    SET(apply_ltp);
+    SET(update_ltp);
+
+    SET(apply_prediction);
+
+    SET(imdct_and_windowing);
+    SET(imdct_and_windowing_960);
+    SET(imdct_and_windowing_ld);
+    SET(imdct_and_windowing_eld);
+
+    SET(apply_dependent_coupling);
+    SET(apply_independent_coupling);
+
+    SET(clip_output);
+#undef SET
+}
diff --git a/libavcodec/aac/aacdec_fixed.c b/libavcodec/aac/aacdec_fixed.c
new file mode 100644
index 0000000000..de90880884
--- /dev/null
+++ b/libavcodec/aac/aacdec_fixed.c
@@ -0,0 +1,103 @@
+/*
+ * AAC decoder
+ * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
+ * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
+ * Copyright (c) 2008-2013 Alex Converse <alex.converse@gmail.com>
+ *
+ * AAC LATM decoder
+ * Copyright (c) 2008-2010 Paul Kendall <paul@kcbbs.gen.nz>
+ * Copyright (c) 2010      Janne Grunau <janne-libav@jannau.net>
+ *
+ * AAC decoder fixed-point implementation
+ * Copyright (c) 2013
+ *      MIPS Technologies, Inc., California.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define USE_FIXED 1
+
+#include "libavutil/thread.h"
+
+#include "libavcodec/aac_defines.h"
+
+#include "libavcodec/avcodec.h"
+#include "aacdec.h"
+#include "libavcodec/aactab.h"
+#include "libavcodec/sinewin_fixed_tablegen.h"
+#include "libavcodec/kbdwin.h"
+#include "libavcodec/cbrt_data.h"
+#include "libavcodec/aacsbr.h"
+
+DECLARE_ALIGNED(32, static int, aac_kbd_long_1024_fixed)[1024];
+DECLARE_ALIGNED(32, static int, aac_kbd_short_128_fixed)[128];
+DECLARE_ALIGNED(32, static int, aac_kbd_long_960_fixed)[960];
+DECLARE_ALIGNED(32, static int, aac_kbd_short_120_fixed)[120];
+
+static void init_tables_fixed_fn(void)
+{
+    ff_cbrt_tableinit_fixed();
+
+    ff_kbd_window_init_fixed(aac_kbd_long_1024_fixed, 4.0, 1024);
+    ff_kbd_window_init_fixed(aac_kbd_short_128_fixed, 6.0, 128);
+
+    ff_kbd_window_init_fixed(aac_kbd_long_960_fixed, 4.0, 960);
+    ff_kbd_window_init_fixed(aac_kbd_short_120_fixed, 6.0, 120);
+
+    ff_aac_sbr_init_fixed();
+
+    init_sine_windows_fixed();
+}
+
+static const int cce_scale_fixed[8] = {
+    Q30(1.0),          //2^(0/8)
+    Q30(1.0905077327), //2^(1/8)
+    Q30(1.1892071150), //2^(2/8)
+    Q30(1.2968395547), //2^(3/8)
+    Q30(1.4142135624), //2^(4/8)
+    Q30(1.5422108254), //2^(5/8)
+    Q30(1.6817928305), //2^(6/8)
+    Q30(1.8340080864), //2^(7/8)
+};
+
+/** Dequantization-related */
+#include "aacdec_fixed_dequant.h"
+
+#include "aacdec_fixed_coupling.h"
+#include "aacdec_fixed_prediction.h"
+#include "aacdec_dsp_template.c"
+#include "aacdec_proc_template.c"
+
+av_cold int ff_aac_decode_init_fixed(AVCodecContext *avctx)
+{
+    static AVOnce init_fixed_once = AV_ONCE_INIT;
+    AACDecContext *ac = avctx->priv_data;
+
+    ac->is_fixed = 1;
+    avctx->sample_fmt = AV_SAMPLE_FMT_S32P;
+
+    aac_dsp_init_fixed(&ac->dsp);
+    aac_proc_init_fixed(&ac->proc);
+
+    ac->fdsp = avpriv_alloc_fixed_dsp(avctx->flags & AV_CODEC_FLAG_BITEXACT);
+    if (!ac->fdsp)
+        return AVERROR(ENOMEM);
+
+    ff_thread_once(&init_fixed_once, init_tables_fixed_fn);
+
+    return ff_aac_decode_init(avctx);
+}
diff --git a/libavcodec/aac/aacdec_fixed_coupling.h b/libavcodec/aac/aacdec_fixed_coupling.h
new file mode 100644
index 0000000000..add4cd69da
--- /dev/null
+++ b/libavcodec/aac/aacdec_fixed_coupling.h
@@ -0,0 +1,137 @@
+/*
+ * AAC decoder
+ * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
+ * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
+ * Copyright (c) 2008-2013 Alex Converse <alex.converse@gmail.com>
+ *
+ * AAC LATM decoder
+ * Copyright (c) 2008-2010 Paul Kendall <paul@kcbbs.gen.nz>
+ * Copyright (c) 2010      Janne Grunau <janne-libav@jannau.net>
+ *
+ * AAC decoder fixed-point implementation
+ * Copyright (c) 2013
+ *      MIPS Technologies, Inc., California.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AAC_AACDEC_FIXED_COUPLING_H
+#define AVCODEC_AAC_AACDEC_FIXED_COUPLING_H
+
+#include "aacdec.h"
+
+/**
+ * Apply dependent channel coupling (applied before IMDCT).
+ *
+ * @param   index   index into coupling gain array
+ */
+static void AAC_RENAME(apply_dependent_coupling)(AACDecContext *ac,
+                                                 SingleChannelElement *target,
+                                                 ChannelElement *cce, int index)
+{
+    IndividualChannelStream *ics = &cce->ch[0].ics;
+    const uint16_t *offsets = ics->swb_offset;
+    int *dest = target->coeffs_fixed;
+    const int *src = cce->ch[0].coeffs_fixed;
+    int g, i, group, k, idx = 0;
+    if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) {
+        av_log(ac->avctx, AV_LOG_ERROR,
+               "Dependent coupling is not supported together with LTP\n");
+        return;
+    }
+    for (g = 0; g < ics->num_window_groups; g++) {
+        for (i = 0; i < ics->max_sfb; i++, idx++) {
+            if (cce->ch[0].band_type[idx] != ZERO_BT) {
+                const int gain = cce->coup.gain[index][idx];
+                int shift, round, c, tmp;
+
+                if (gain < 0) {
+                    c = -cce_scale_fixed[-gain & 7];
+                    shift = (-gain-1024) >> 3;
+                }
+                else {
+                    c = cce_scale_fixed[gain & 7];
+                    shift = (gain-1024) >> 3;
+                }
+
+                if (shift < -31) {
+                    // Nothing to do
+                } else if (shift < 0) {
+                    shift = -shift;
+                    round = 1 << (shift - 1);
+
+                    for (group = 0; group < ics->group_len[g]; group++) {
+                        for (k = offsets[i]; k < offsets[i + 1]; k++) {
+                            tmp = (int)(((int64_t)src[group * 128 + k] * c + \
+                                       (int64_t)0x1000000000) >> 37);
+                            dest[group * 128 + k] += (tmp + (int64_t)round) >> shift;
+                        }
+                    }
+                }
+                else {
+                    for (group = 0; group < ics->group_len[g]; group++) {
+                        for (k = offsets[i]; k < offsets[i + 1]; k++) {
+                            tmp = (int)(((int64_t)src[group * 128 + k] * c + \
+                                        (int64_t)0x1000000000) >> 37);
+                            dest[group * 128 + k] += tmp * (1U << shift);
+                        }
+                    }
+                }
+            }
+        }
+        dest += ics->group_len[g] * 128;
+        src  += ics->group_len[g] * 128;
+    }
+}
+
+/**
+ * Apply independent channel coupling (applied after IMDCT).
+ *
+ * @param   index   index into coupling gain array
+ */
+static void AAC_RENAME(apply_independent_coupling)(AACDecContext *ac,
+                                                   SingleChannelElement *target,
+                                                   ChannelElement *cce, int index)
+{
+    int i, c, shift, round, tmp;
+    const int gain = cce->coup.gain[index][0];
+    const int *src = cce->ch[0].output_fixed;
+    unsigned int *dest = target->output_fixed;
+    const int len = 1024 << (ac->oc[1].m4ac.sbr == 1);
+
+    c = cce_scale_fixed[gain & 7];
+    shift = (gain-1024) >> 3;
+    if (shift < -31) {
+        return;
+    } else if (shift < 0) {
+        shift = -shift;
+        round = 1 << (shift - 1);
+
+        for (i = 0; i < len; i++) {
+            tmp = (int)(((int64_t)src[i] * c + (int64_t)0x1000000000) >> 37);
+            dest[i] += (tmp + round) >> shift;
+        }
+    }
+    else {
+      for (i = 0; i < len; i++) {
+          tmp = (int)(((int64_t)src[i] * c + (int64_t)0x1000000000) >> 37);
+          dest[i] += tmp * (1U << shift);
+      }
+    }
+}
+
+#endif /* AVCODEC_AAC_AACDEC_FIXED_COUPLING_H */
diff --git a/libavcodec/aac/aacdec_fixed_dequant.h b/libavcodec/aac/aacdec_fixed_dequant.h
new file mode 100644
index 0000000000..5fb84fbed0
--- /dev/null
+++ b/libavcodec/aac/aacdec_fixed_dequant.h
@@ -0,0 +1,174 @@
+/*
+ * AAC decoder
+ * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
+ * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
+ * Copyright (c) 2008-2013 Alex Converse <alex.converse@gmail.com>
+ *
+ * AAC LATM decoder
+ * Copyright (c) 2008-2010 Paul Kendall <paul@kcbbs.gen.nz>
+ * Copyright (c) 2010      Janne Grunau <janne-libav@jannau.net>
+ *
+ * AAC decoder fixed-point implementation
+ * Copyright (c) 2013
+ *      MIPS Technologies, Inc., California.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AAC_AACDEC_FIXED_DEQUANT_H
+#define AVCODEC_AAC_AACDEC_FIXED_DEQUANT_H
+
+#include "aacdec_tab.h"
+
+static void inline vector_pow43(int *coefs, int len)
+{
+    int i, coef;
+
+    for (i=0; i<len; i++) {
+        coef = coefs[i];
+        if (coef < 0)
+            coef = -(int)ff_cbrt_tab_fixed[(-coef) & 8191];
+        else
+            coef =  (int)ff_cbrt_tab_fixed[  coef  & 8191];
+        coefs[i] = coef;
+    }
+}
+
+/* 2^0, 2^0.25, 2^0.5, 2^0.75 */
+static const int exp2tab[4] = {
+    Q31(1.0000000000/2), Q31(1.1892071150/2),
+    Q31(1.4142135624/2), Q31(1.6817928305/2)
+};
+
+static void inline subband_scale(int *dst, int *src, int scale,
+                                 int offset, int len, void *log_context)
+{
+    int ssign = scale < 0 ? -1 : 1;
+    int s = FFABS(scale);
+    unsigned int round;
+    int i, out, c = exp2tab[s & 3];
+
+    s = offset - (s >> 2);
+
+    if (s > 31) {
+        for (i=0; i<len; i++) {
+            dst[i] = 0;
+        }
+    } else if (s > 0) {
+        round = 1 << (s-1);
+        for (i=0; i<len; i++) {
+            out = (int)(((int64_t)src[i] * c) >> 32);
+            dst[i] = ((int)(out+round) >> s) * ssign;
+        }
+    } else if (s > -32) {
+        s = s + 32;
+        round = 1U << (s-1);
+        for (i=0; i<len; i++) {
+            out = (int)((int64_t)((int64_t)src[i] * c + round) >> s);
+            dst[i] = out * (unsigned)ssign;
+        }
+    } else {
+        av_log(log_context, AV_LOG_ERROR, "Overflow in subband_scale()\n");
+    }
+}
+
+static void noise_scale(int *coefs, int scale, int band_energy, int len)
+{
+    int s = -scale;
+    unsigned int round;
+    int i, out, c = exp2tab[s & 3];
+    int nlz = 0;
+
+    av_assert0(s >= 0);
+    while (band_energy > 0x7fff) {
+        band_energy >>= 1;
+        nlz++;
+    }
+    c /= band_energy;
+    s = 21 + nlz - (s >> 2);
+
+    if (s > 31) {
+        for (i=0; i<len; i++) {
+            coefs[i] = 0;
+        }
+    } else if (s >= 0) {
+        round = s ? 1 << (s-1) : 0;
+        for (i=0; i<len; i++) {
+            out = (int)(((int64_t)coefs[i] * c) >> 32);
+            coefs[i] = -((int)(out+round) >> s);
+        }
+    }
+    else {
+        s = s + 32;
+        if (s > 0) {
+            round = 1 << (s-1);
+            for (i=0; i<len; i++) {
+                out = (int)((int64_t)((int64_t)coefs[i] * c + round) >> s);
+                coefs[i] = -out;
+            }
+        } else {
+            for (i=0; i<len; i++)
+                coefs[i] = -(int64_t)coefs[i] * c * (1 << -s);
+        }
+    }
+}
+
+static inline int *DEC_SPAIR(int *dst, unsigned idx)
+{
+    dst[0] = (idx & 15) - 4;
+    dst[1] = (idx >> 4 & 15) - 4;
+
+    return dst + 2;
+}
+
+static inline int *DEC_SQUAD(int *dst, unsigned idx)
+{
+    dst[0] = (idx & 3) - 1;
+    dst[1] = (idx >> 2 & 3) - 1;
+    dst[2] = (idx >> 4 & 3) - 1;
+    dst[3] = (idx >> 6 & 3) - 1;
+
+    return dst + 4;
+}
+
+static inline int *DEC_UPAIR(int *dst, unsigned idx, unsigned sign)
+{
+    dst[0] = (idx & 15) * (1 - (sign & 0xFFFFFFFE));
+    dst[1] = (idx >> 4 & 15) * (1 - ((sign & 1) * 2));
+
+    return dst + 2;
+}
+
+static inline int *DEC_UQUAD(int *dst, unsigned idx, unsigned sign)
+{
+    unsigned nz = idx >> 12;
+
+    dst[0] = (idx & 3) * (1 + (((int)sign >> 31) * 2));
+    sign <<= nz & 1;
+    nz >>= 1;
+    dst[1] = (idx >> 2 & 3) * (1 + (((int)sign >> 31) * 2));
+    sign <<= nz & 1;
+    nz >>= 1;
+    dst[2] = (idx >> 4 & 3) * (1 + (((int)sign >> 31) * 2));
+    sign <<= nz & 1;
+    nz >>= 1;
+    dst[3] = (idx >> 6 & 3) * (1 + (((int)sign >> 31) * 2));
+
+    return dst + 4;
+}
+
+#endif /* AVCODEC_AAC_AACDEC_FIXED_DEQUANT_H */
diff --git a/libavcodec/aac/aacdec_fixed_prediction.h b/libavcodec/aac/aacdec_fixed_prediction.h
new file mode 100644
index 0000000000..6fb3354865
--- /dev/null
+++ b/libavcodec/aac/aacdec_fixed_prediction.h
@@ -0,0 +1,151 @@
+/*
+ * AAC decoder
+ * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
+ * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
+ * Copyright (c) 2008-2013 Alex Converse <alex.converse@gmail.com>
+ *
+ * AAC LATM decoder
+ * Copyright (c) 2008-2010 Paul Kendall <paul@kcbbs.gen.nz>
+ * Copyright (c) 2010      Janne Grunau <janne-libav@jannau.net>
+ *
+ * AAC decoder fixed-point implementation
+ * Copyright (c) 2013
+ *      MIPS Technologies, Inc., California.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AAC_AACDEC_FIXED_PREDICTION_H
+#define AVCODEC_AAC_AACDEC_FIXED_PREDICTION_H
+
+static av_always_inline SoftFloat flt16_round(SoftFloat pf)
+{
+    SoftFloat tmp;
+    int s;
+
+    tmp.exp = pf.exp;
+    s = pf.mant >> 31;
+    tmp.mant = (pf.mant ^ s) - s;
+    tmp.mant = (tmp.mant + 0x00200000U) & 0xFFC00000U;
+    tmp.mant = (tmp.mant ^ s) - s;
+
+    return tmp;
+}
+
+static av_always_inline SoftFloat flt16_even(SoftFloat pf)
+{
+    SoftFloat tmp;
+    int s;
+
+    tmp.exp = pf.exp;
+    s = pf.mant >> 31;
+    tmp.mant = (pf.mant ^ s) - s;
+    tmp.mant = (tmp.mant + 0x001FFFFFU + (tmp.mant & 0x00400000U >> 16)) & 0xFFC00000U;
+    tmp.mant = (tmp.mant ^ s) - s;
+
+    return tmp;
+}
+
+static av_always_inline SoftFloat flt16_trunc(SoftFloat pf)
+{
+    SoftFloat pun;
+    int s;
+
+    pun.exp = pf.exp;
+    s = pf.mant >> 31;
+    pun.mant = (pf.mant ^ s) - s;
+    pun.mant = pun.mant & 0xFFC00000U;
+    pun.mant = (pun.mant ^ s) - s;
+
+    return pun;
+}
+
+static av_always_inline void predict(PredictorState *ps, int *coef,
+                                     int output_enable)
+{
+    const SoftFloat a     = { 1023410176, 0 };  // 61.0 / 64
+    const SoftFloat alpha = {  973078528, 0 };  // 29.0 / 32
+    SoftFloat e0, e1;
+    SoftFloat pv;
+    SoftFloat k1, k2;
+    SoftFloat   r0 = ps->r0,     r1 = ps->r1;
+    SoftFloat cor0 = ps->cor0, cor1 = ps->cor1;
+    SoftFloat var0 = ps->var0, var1 = ps->var1;
+    SoftFloat tmp;
+
+    if (var0.exp > 1 || (var0.exp == 1 && var0.mant > 0x20000000)) {
+        k1 = av_mul_sf(cor0, flt16_even(av_div_sf(a, var0)));
+    }
+    else {
+        k1.mant = 0;
+        k1.exp = 0;
+    }
+
+    if (var1.exp > 1 || (var1.exp == 1 && var1.mant > 0x20000000)) {
+        k2 = av_mul_sf(cor1, flt16_even(av_div_sf(a, var1)));
+    }
+    else {
+        k2.mant = 0;
+        k2.exp = 0;
+    }
+
+    tmp = av_mul_sf(k1, r0);
+    pv = flt16_round(av_add_sf(tmp, av_mul_sf(k2, r1)));
+    if (output_enable) {
+        int shift = 28 - pv.exp;
+
+        if (shift < 31) {
+            if (shift > 0) {
+                *coef += (unsigned)((pv.mant + (1 << (shift - 1))) >> shift);
+            } else
+                *coef += (unsigned)pv.mant << -shift;
+        }
+    }
+
+    e0 = av_int2sf(*coef, 2);
+    e1 = av_sub_sf(e0, tmp);
+
+    ps->cor1 = flt16_trunc(av_add_sf(av_mul_sf(alpha, cor1), av_mul_sf(r1, e1)));
+    tmp = av_add_sf(av_mul_sf(r1, r1), av_mul_sf(e1, e1));
+    tmp.exp--;
+    ps->var1 = flt16_trunc(av_add_sf(av_mul_sf(alpha, var1), tmp));
+    ps->cor0 = flt16_trunc(av_add_sf(av_mul_sf(alpha, cor0), av_mul_sf(r0, e0)));
+    tmp = av_add_sf(av_mul_sf(r0, r0), av_mul_sf(e0, e0));
+    tmp.exp--;
+    ps->var0 = flt16_trunc(av_add_sf(av_mul_sf(alpha, var0), tmp));
+
+    ps->r1 = flt16_trunc(av_mul_sf(a, av_sub_sf(r0, av_mul_sf(k1, e0))));
+    ps->r0 = flt16_trunc(av_mul_sf(a, e0));
+}
+
+static av_always_inline void reset_predict_state(PredictorState *ps)
+{
+    ps->r0.mant   = 0;
+    ps->r0.exp   = 0;
+    ps->r1.mant   = 0;
+    ps->r1.exp   = 0;
+    ps->cor0.mant = 0;
+    ps->cor0.exp = 0;
+    ps->cor1.mant = 0;
+    ps->cor1.exp = 0;
+    ps->var0.mant = 0x20000000;
+    ps->var0.exp = 1;
+    ps->var1.mant = 0x20000000;
+    ps->var1.exp = 1;
+}
+
+#endif /* AVCODEC_AAC_AACDEC_FIXED_PREDICTION_H */
diff --git a/libavcodec/aac/aacdec_float.c b/libavcodec/aac/aacdec_float.c
new file mode 100644
index 0000000000..03ec264c50
--- /dev/null
+++ b/libavcodec/aac/aacdec_float.c
@@ -0,0 +1,178 @@
+/*
+ * AAC decoder
+ * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
+ * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
+ * Copyright (c) 2008-2013 Alex Converse <alex.converse@gmail.com>
+ *
+ * AAC LATM decoder
+ * Copyright (c) 2008-2010 Paul Kendall <paul@kcbbs.gen.nz>
+ * Copyright (c) 2010      Janne Grunau <janne-libav@jannau.net>
+ *
+ * AAC decoder fixed-point implementation
+ * Copyright (c) 2013
+ *      MIPS Technologies, Inc., California.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define USE_FIXED 0
+
+#include "libavutil/thread.h"
+
+#include "libavcodec/aac_defines.h"
+
+#include "libavcodec/avcodec.h"
+#include "aacdec.h"
+#include "libavcodec/aactab.h"
+#include "libavcodec/sinewin.h"
+#include "libavcodec/kbdwin.h"
+#include "libavcodec/cbrt_data.h"
+#include "libavutil/mathematics.h"
+#include "libavcodec/aacsbr.h"
+
+DECLARE_ALIGNED(32, static float, sine_120)[120];
+DECLARE_ALIGNED(32, static float, sine_960)[960];
+DECLARE_ALIGNED(32, static float, aac_kbd_long_960)[960];
+DECLARE_ALIGNED(32, static float, aac_kbd_short_120)[120];
+
+static void init_tables_float_fn(void)
+{
+    ff_cbrt_tableinit();
+
+    ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
+    ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
+
+    ff_kbd_window_init(aac_kbd_long_960, 4.0, 960);
+    ff_kbd_window_init(aac_kbd_short_120, 6.0, 120);
+
+    ff_sine_window_init(sine_960, 960);
+    ff_sine_window_init(sine_120, 120);
+    ff_init_ff_sine_windows(9);
+
+    ff_aac_sbr_init();
+
+    ff_aac_float_common_init();
+}
+
+static const float cce_scale[] = {
+    1.09050773266525765921, //2^(1/8)
+    1.18920711500272106672, //2^(1/4)
+    M_SQRT2,
+    2,
+};
+
+/** Dequantization-related **/
+#include "aacdec_tab.h"
+#include "libavutil/intfloat.h"
+
+#include "config.h"
+#if ARCH_ARM
+#include "libavcodec/arm/aac.h"
+#endif
+
+#ifndef VMUL2
+static inline float *VMUL2(float *dst, const float *v, unsigned idx,
+                           const float *scale)
+{
+    float s = *scale;
+    *dst++ = v[idx    & 15] * s;
+    *dst++ = v[idx>>4 & 15] * s;
+    return dst;
+}
+#endif
+
+#ifndef VMUL4
+static inline float *VMUL4(float *dst, const float *v, unsigned idx,
+                           const float *scale)
+{
+    float s = *scale;
+    *dst++ = v[idx    & 3] * s;
+    *dst++ = v[idx>>2 & 3] * s;
+    *dst++ = v[idx>>4 & 3] * s;
+    *dst++ = v[idx>>6 & 3] * s;
+    return dst;
+}
+#endif
+
+#ifndef VMUL2S
+static inline float *VMUL2S(float *dst, const float *v, unsigned idx,
+                            unsigned sign, const float *scale)
+{
+    union av_intfloat32 s0, s1;
+
+    s0.f = s1.f = *scale;
+    s0.i ^= sign >> 1 << 31;
+    s1.i ^= sign      << 31;
+
+    *dst++ = v[idx    & 15] * s0.f;
+    *dst++ = v[idx>>4 & 15] * s1.f;
+
+    return dst;
+}
+#endif
+
+#ifndef VMUL4S
+static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
+                            unsigned sign, const float *scale)
+{
+    unsigned nz = idx >> 12;
+    union av_intfloat32 s = { .f = *scale };
+    union av_intfloat32 t;
+
+    t.i = s.i ^ (sign & 1U<<31);
+    *dst++ = v[idx    & 3] * t.f;
+
+    sign <<= nz & 1; nz >>= 1;
+    t.i = s.i ^ (sign & 1U<<31);
+    *dst++ = v[idx>>2 & 3] * t.f;
+
+    sign <<= nz & 1; nz >>= 1;
+    t.i = s.i ^ (sign & 1U<<31);
+    *dst++ = v[idx>>4 & 3] * t.f;
+
+    sign <<= nz & 1;
+    t.i = s.i ^ (sign & 1U<<31);
+    *dst++ = v[idx>>6 & 3] * t.f;
+
+    return dst;
+}
+#endif
+
+#include "aacdec_float_coupling.h"
+#include "aacdec_float_prediction.h"
+#include "aacdec_dsp_template.c"
+#include "aacdec_proc_template.c"
+
+av_cold int ff_aac_decode_init_float(AVCodecContext *avctx)
+{
+    static AVOnce init_float_once = AV_ONCE_INIT;
+    AACDecContext *ac = avctx->priv_data;
+
+    ac->is_fixed = 0;
+    avctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
+
+    aac_dsp_init(&ac->dsp);
+    aac_proc_init(&ac->proc);
+
+    ac->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
+    if (!ac->fdsp)
+        return AVERROR(ENOMEM);
+
+    ff_thread_once(&init_float_once, init_tables_float_fn);
+
+    return ff_aac_decode_init(avctx);
+}
diff --git a/libavcodec/aac/aacdec_float_coupling.h b/libavcodec/aac/aacdec_float_coupling.h
new file mode 100644
index 0000000000..50ad76eda2
--- /dev/null
+++ b/libavcodec/aac/aacdec_float_coupling.h
@@ -0,0 +1,90 @@
+/*
+ * AAC decoder
+ * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
+ * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
+ * Copyright (c) 2008-2013 Alex Converse <alex.converse@gmail.com>
+ *
+ * AAC LATM decoder
+ * Copyright (c) 2008-2010 Paul Kendall <paul@kcbbs.gen.nz>
+ * Copyright (c) 2010      Janne Grunau <janne-libav@jannau.net>
+ *
+ * AAC decoder fixed-point implementation
+ * Copyright (c) 2013
+ *      MIPS Technologies, Inc., California.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AAC_AACDEC_FLOAT_COUPLING_H
+#define AVCODEC_AAC_AACDEC_FLOAT_COUPLING_H
+
+#include "aacdec.h"
+
+/**
+ * Apply dependent channel coupling (applied before IMDCT).
+ *
+ * @param   index   index into coupling gain array
+ */
+static void AAC_RENAME(apply_dependent_coupling)(AACDecContext *ac,
+                                                 SingleChannelElement *target,
+                                                 ChannelElement *cce, int index)
+{
+    IndividualChannelStream *ics = &cce->ch[0].ics;
+    const uint16_t *offsets = ics->swb_offset;
+    float *dest = target->coeffs;
+    const float *src = cce->ch[0].coeffs;
+    int g, i, group, k, idx = 0;
+    if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) {
+        av_log(ac->avctx, AV_LOG_ERROR,
+               "Dependent coupling is not supported together with LTP\n");
+        return;
+    }
+    for (g = 0; g < ics->num_window_groups; g++) {
+        for (i = 0; i < ics->max_sfb; i++, idx++) {
+            if (cce->ch[0].band_type[idx] != ZERO_BT) {
+                const float gain = cce->coup.gain[index][idx];
+                for (group = 0; group < ics->group_len[g]; group++) {
+                    for (k = offsets[i]; k < offsets[i + 1]; k++) {
+                        // FIXME: SIMDify
+                        dest[group * 128 + k] += gain * src[group * 128 + k];
+                    }
+                }
+            }
+        }
+        dest += ics->group_len[g] * 128;
+        src  += ics->group_len[g] * 128;
+    }
+}
+
+/**
+ * Apply independent channel coupling (applied after IMDCT).
+ *
+ * @param   index   index into coupling gain array
+ */
+static void AAC_RENAME(apply_independent_coupling)(AACDecContext *ac,
+                                                   SingleChannelElement *target,
+                                                   ChannelElement *cce, int index)
+{
+    const float gain = cce->coup.gain[index][0];
+    const float *src = cce->ch[0].output;
+    float *dest = target->output;
+    const int len = 1024 << (ac->oc[1].m4ac.sbr == 1);
+
+    ac->fdsp->vector_fmac_scalar(dest, src, gain, len);
+}
+
+#endif /* AVCODEC_AAC_AACDEC_FLOAT_COUPLING_H */
diff --git a/libavcodec/aac/aacdec_float_prediction.h b/libavcodec/aac/aacdec_float_prediction.h
new file mode 100644
index 0000000000..a4100fd36c
--- /dev/null
+++ b/libavcodec/aac/aacdec_float_prediction.h
@@ -0,0 +1,100 @@
+/*
+ * AAC decoder
+ * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
+ * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
+ * Copyright (c) 2008-2013 Alex Converse <alex.converse@gmail.com>
+ *
+ * AAC LATM decoder
+ * Copyright (c) 2008-2010 Paul Kendall <paul@kcbbs.gen.nz>
+ * Copyright (c) 2010      Janne Grunau <janne-libav@jannau.net>
+ *
+ * AAC decoder fixed-point implementation
+ * Copyright (c) 2013
+ *      MIPS Technologies, Inc., California.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AAC_AACDEC_FLOAT_PREDICTION_H
+#define AVCODEC_AAC_AACDEC_FLOAT_PREDICTION_H
+
+static av_always_inline float flt16_round(float pf)
+{
+    union av_intfloat32 tmp;
+    tmp.f = pf;
+    tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U;
+    return tmp.f;
+}
+
+static av_always_inline float flt16_even(float pf)
+{
+    union av_intfloat32 tmp;
+    tmp.f = pf;
+    tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U;
+    return tmp.f;
+}
+
+static av_always_inline float flt16_trunc(float pf)
+{
+    union av_intfloat32 pun;
+    pun.f = pf;
+    pun.i &= 0xFFFF0000U;
+    return pun.f;
+}
+
+static av_always_inline void predict(PredictorState *ps, float *coef,
+                                     int output_enable)
+{
+    const float a     = 0.953125; // 61.0 / 64
+    const float alpha = 0.90625;  // 29.0 / 32
+    float e0, e1;
+    float pv;
+    float k1, k2;
+    float   r0 = ps->r0,     r1 = ps->r1;
+    float cor0 = ps->cor0, cor1 = ps->cor1;
+    float var0 = ps->var0, var1 = ps->var1;
+
+    k1 = var0 > 1 ? cor0 * flt16_even(a / var0) : 0;
+    k2 = var1 > 1 ? cor1 * flt16_even(a / var1) : 0;
+
+    pv = flt16_round(k1 * r0 + k2 * r1);
+    if (output_enable)
+        *coef += pv;
+
+    e0 = *coef;
+    e1 = e0 - k1 * r0;
+
+    ps->cor1 = flt16_trunc(alpha * cor1 + r1 * e1);
+    ps->var1 = flt16_trunc(alpha * var1 + 0.5f * (r1 * r1 + e1 * e1));
+    ps->cor0 = flt16_trunc(alpha * cor0 + r0 * e0);
+    ps->var0 = flt16_trunc(alpha * var0 + 0.5f * (r0 * r0 + e0 * e0));
+
+    ps->r1 = flt16_trunc(a * (r0 - k1 * e0));
+    ps->r0 = flt16_trunc(a * e0);
+}
+
+static av_always_inline void reset_predict_state(PredictorState *ps)
+{
+    ps->r0   = 0.0f;
+    ps->r1   = 0.0f;
+    ps->cor0 = 0.0f;
+    ps->cor1 = 0.0f;
+    ps->var0 = 1.0f;
+    ps->var1 = 1.0f;
+}
+
+#endif /* AVCODEC_AAC_AACDEC_FLOAT_PREDICTION_H */
diff --git a/libavcodec/aacdec.c b/libavcodec/aac/aacdec_latm.h
index b4870a6b1f..e40a2fe1a7 100644
--- a/libavcodec/aacdec.c
+++ b/libavcodec/aac/aacdec_latm.h
@@ -8,6 +8,10 @@
  * Copyright (c) 2008-2010 Paul Kendall <paul@kcbbs.gen.nz>
  * Copyright (c) 2010      Janne Grunau <janne-libav@jannau.net>
  *
+ * AAC decoder fixed-point implementation
+ * Copyright (c) 2013
+ *      MIPS Technologies, Inc., California.
+ *
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
@@ -25,240 +29,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-/**
- * @file
- * AAC decoder
- * @author Oded Shimon  ( ods15 ods15 dyndns org )
- * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
- */
-
-#define USE_FIXED 0
-#define TX_TYPE AV_TX_FLOAT_MDCT
-
-#include "libavutil/float_dsp.h"
-#include "libavutil/mem.h"
-#include "libavutil/opt.h"
-#include "avcodec.h"
-#include "codec_internal.h"
-#include "get_bits.h"
-#include "kbdwin.h"
-#include "sinewin.h"
-
-#include "aac.h"
-#include "aacdec.h"
-#include "aactab.h"
-#include "aacdectab.h"
-#include "adts_header.h"
-#include "cbrt_data.h"
-#include "sbr.h"
-#include "aacsbr.h"
-#include "mpeg4audio.h"
-#include "profiles.h"
-#include "libavutil/intfloat.h"
-
-#include <errno.h>
-#include <math.h>
-#include <stdint.h>
-#include <string.h>
-
-#if ARCH_ARM
-#   include "arm/aac.h"
-#elif ARCH_MIPS
-#   include "mips/aacdec_mips.h"
-#endif
-
-DECLARE_ALIGNED(32, static INTFLOAT, AAC_RENAME(sine_120))[120];
-DECLARE_ALIGNED(32, static INTFLOAT, AAC_RENAME(sine_960))[960];
-DECLARE_ALIGNED(32, static INTFLOAT, AAC_RENAME(aac_kbd_long_960))[960];
-DECLARE_ALIGNED(32, static INTFLOAT, AAC_RENAME(aac_kbd_short_120))[120];
-
-static av_always_inline void reset_predict_state(PredictorState *ps)
-{
-    ps->r0   = 0.0f;
-    ps->r1   = 0.0f;
-    ps->cor0 = 0.0f;
-    ps->cor1 = 0.0f;
-    ps->var0 = 1.0f;
-    ps->var1 = 1.0f;
-}
-
-#ifndef VMUL2
-static inline float *VMUL2(float *dst, const float *v, unsigned idx,
-                           const float *scale)
-{
-    float s = *scale;
-    *dst++ = v[idx    & 15] * s;
-    *dst++ = v[idx>>4 & 15] * s;
-    return dst;
-}
-#endif
-
-#ifndef VMUL4
-static inline float *VMUL4(float *dst, const float *v, unsigned idx,
-                           const float *scale)
-{
-    float s = *scale;
-    *dst++ = v[idx    & 3] * s;
-    *dst++ = v[idx>>2 & 3] * s;
-    *dst++ = v[idx>>4 & 3] * s;
-    *dst++ = v[idx>>6 & 3] * s;
-    return dst;
-}
-#endif
-
-#ifndef VMUL2S
-static inline float *VMUL2S(float *dst, const float *v, unsigned idx,
-                            unsigned sign, const float *scale)
-{
-    union av_intfloat32 s0, s1;
-
-    s0.f = s1.f = *scale;
-    s0.i ^= sign >> 1 << 31;
-    s1.i ^= sign      << 31;
-
-    *dst++ = v[idx    & 15] * s0.f;
-    *dst++ = v[idx>>4 & 15] * s1.f;
-
-    return dst;
-}
-#endif
-
-#ifndef VMUL4S
-static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
-                            unsigned sign, const float *scale)
-{
-    unsigned nz = idx >> 12;
-    union av_intfloat32 s = { .f = *scale };
-    union av_intfloat32 t;
-
-    t.i = s.i ^ (sign & 1U<<31);
-    *dst++ = v[idx    & 3] * t.f;
-
-    sign <<= nz & 1; nz >>= 1;
-    t.i = s.i ^ (sign & 1U<<31);
-    *dst++ = v[idx>>2 & 3] * t.f;
-
-    sign <<= nz & 1; nz >>= 1;
-    t.i = s.i ^ (sign & 1U<<31);
-    *dst++ = v[idx>>4 & 3] * t.f;
-
-    sign <<= nz & 1;
-    t.i = s.i ^ (sign & 1U<<31);
-    *dst++ = v[idx>>6 & 3] * t.f;
-
-    return dst;
-}
-#endif
-
-static av_always_inline float flt16_round(float pf)
-{
-    union av_intfloat32 tmp;
-    tmp.f = pf;
-    tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U;
-    return tmp.f;
-}
-
-static av_always_inline float flt16_even(float pf)
-{
-    union av_intfloat32 tmp;
-    tmp.f = pf;
-    tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U;
-    return tmp.f;
-}
-
-static av_always_inline float flt16_trunc(float pf)
-{
-    union av_intfloat32 pun;
-    pun.f = pf;
-    pun.i &= 0xFFFF0000U;
-    return pun.f;
-}
-
-static av_always_inline void predict(PredictorState *ps, float *coef,
-                                     int output_enable)
-{
-    const float a     = 0.953125; // 61.0 / 64
-    const float alpha = 0.90625;  // 29.0 / 32
-    float e0, e1;
-    float pv;
-    float k1, k2;
-    float   r0 = ps->r0,     r1 = ps->r1;
-    float cor0 = ps->cor0, cor1 = ps->cor1;
-    float var0 = ps->var0, var1 = ps->var1;
-
-    k1 = var0 > 1 ? cor0 * flt16_even(a / var0) : 0;
-    k2 = var1 > 1 ? cor1 * flt16_even(a / var1) : 0;
-
-    pv = flt16_round(k1 * r0 + k2 * r1);
-    if (output_enable)
-        *coef += pv;
-
-    e0 = *coef;
-    e1 = e0 - k1 * r0;
-
-    ps->cor1 = flt16_trunc(alpha * cor1 + r1 * e1);
-    ps->var1 = flt16_trunc(alpha * var1 + 0.5f * (r1 * r1 + e1 * e1));
-    ps->cor0 = flt16_trunc(alpha * cor0 + r0 * e0);
-    ps->var0 = flt16_trunc(alpha * var0 + 0.5f * (r0 * r0 + e0 * e0));
-
-    ps->r1 = flt16_trunc(a * (r0 - k1 * e0));
-    ps->r0 = flt16_trunc(a * e0);
-}
-
-/**
- * Apply dependent channel coupling (applied before IMDCT).
- *
- * @param   index   index into coupling gain array
- */
-static void apply_dependent_coupling(AACDecContext *ac,
-                                     SingleChannelElement *target,
-                                     ChannelElement *cce, int index)
-{
-    IndividualChannelStream *ics = &cce->ch[0].ics;
-    const uint16_t *offsets = ics->swb_offset;
-    float *dest = target->coeffs;
-    const float *src = cce->ch[0].coeffs;
-    int g, i, group, k, idx = 0;
-    if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) {
-        av_log(ac->avctx, AV_LOG_ERROR,
-               "Dependent coupling is not supported together with LTP\n");
-        return;
-    }
-    for (g = 0; g < ics->num_window_groups; g++) {
-        for (i = 0; i < ics->max_sfb; i++, idx++) {
-            if (cce->ch[0].band_type[idx] != ZERO_BT) {
-                const float gain = cce->coup.gain[index][idx];
-                for (group = 0; group < ics->group_len[g]; group++) {
-                    for (k = offsets[i]; k < offsets[i + 1]; k++) {
-                        // FIXME: SIMDify
-                        dest[group * 128 + k] += gain * src[group * 128 + k];
-                    }
-                }
-            }
-        }
-        dest += ics->group_len[g] * 128;
-        src  += ics->group_len[g] * 128;
-    }
-}
-
-/**
- * Apply independent channel coupling (applied after IMDCT).
- *
- * @param   index   index into coupling gain array
- */
-static void apply_independent_coupling(AACDecContext *ac,
-                                       SingleChannelElement *target,
-                                       ChannelElement *cce, int index)
-{
-    const float gain = cce->coup.gain[index][0];
-    const float *src = cce->ch[0].ret;
-    float *dest = target->ret;
-    const int len = 1024 << (ac->oc[1].m4ac.sbr == 1);
-
-    ac->fdsp->vector_fmac_scalar(dest, src, gain, len);
-}
-
-#include "aacdec_template.c"
+#ifndef AVCODEC_AAC_AACDEC_LATM_H
+#define AVCODEC_AAC_AACDEC_LATM_H
 
 #define LOAS_SYNC_WORD   0x2b7       ///< 11 bits LOAS sync word
 
@@ -543,7 +315,7 @@ static int latm_decode_frame(AVCodecContext *avctx, AVFrame *out,
 static av_cold int latm_decode_init(AVCodecContext *avctx)
 {
     struct LATMContext *latmctx = avctx->priv_data;
-    int ret = aac_decode_init(avctx);
+    int ret = ff_aac_decode_init_float(avctx);
 
     if (avctx->extradata_size > 0)
         latmctx->initialized = !ret;
@@ -551,26 +323,6 @@ static av_cold int latm_decode_init(AVCodecContext *avctx)
     return ret;
 }
 
-const FFCodec ff_aac_decoder = {
-    .p.name          = "aac",
-    CODEC_LONG_NAME("AAC (Advanced Audio Coding)"),
-    .p.type          = AVMEDIA_TYPE_AUDIO,
-    .p.id            = AV_CODEC_ID_AAC,
-    .priv_data_size  = sizeof(AACDecContext),
-    .init            = aac_decode_init,
-    .close           = aac_decode_close,
-    FF_CODEC_DECODE_CB(aac_decode_frame),
-    .p.sample_fmts   = (const enum AVSampleFormat[]) {
-        AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE
-    },
-    .p.capabilities  = AV_CODEC_CAP_CHANNEL_CONF | AV_CODEC_CAP_DR1,
-    .caps_internal   = FF_CODEC_CAP_INIT_CLEANUP,
-    .p.ch_layouts    = ff_aac_ch_layout,
-    .flush = flush,
-    .p.priv_class    = &aac_decoder_class,
-    .p.profiles      = NULL_IF_CONFIG_SMALL(ff_aac_profiles),
-};
-
 /*
     Note: This decoder filter is intended to decode LATM streams transferred
     in MPEG transport streams which only contain one program.
@@ -583,7 +335,7 @@ const FFCodec ff_aac_latm_decoder = {
     .p.id            = AV_CODEC_ID_AAC_LATM,
     .priv_data_size  = sizeof(struct LATMContext),
     .init            = latm_decode_init,
-    .close           = aac_decode_close,
+    .close           = decode_close,
     FF_CODEC_DECODE_CB(latm_decode_frame),
     .p.sample_fmts   = (const enum AVSampleFormat[]) {
         AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE
@@ -594,3 +346,5 @@ const FFCodec ff_aac_latm_decoder = {
     .flush = flush,
     .p.profiles      = NULL_IF_CONFIG_SMALL(ff_aac_profiles),
 };
+
+#endif /* AVCODEC_AAC_AACDEC_LATM_H */
diff --git a/libavcodec/aac/aacdec_proc_template.c b/libavcodec/aac/aacdec_proc_template.c
new file mode 100644
index 0000000000..327f3117b5
--- /dev/null
+++ b/libavcodec/aac/aacdec_proc_template.c
@@ -0,0 +1,448 @@
+/*
+ * AAC decoder
+ * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
+ * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
+ * Copyright (c) 2008-2013 Alex Converse <alex.converse@gmail.com>
+ *
+ * AAC LATM decoder
+ * Copyright (c) 2008-2010 Paul Kendall <paul@kcbbs.gen.nz>
+ * Copyright (c) 2010      Janne Grunau <janne-libav@jannau.net>
+ *
+ * AAC decoder fixed-point implementation
+ * Copyright (c) 2013
+ *      MIPS Technologies, Inc., California.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * linear congruential pseudorandom number generator
+ *
+ * @param   previous_val    pointer to the current state of the generator
+ *
+ * @return  Returns a 32-bit pseudorandom integer
+ */
+static av_always_inline int lcg_random(unsigned previous_val)
+{
+    union { unsigned u; int s; } v = { previous_val * 1664525u + 1013904223 };
+    return v.s;
+}
+
+/**
+ * Decode spectral data; reference: table 4.50.
+ * Dequantize and scale spectral data; reference: 4.6.3.3.
+ *
+ * @param   coef            array of dequantized, scaled spectral data
+ * @param   sf              array of scalefactors or intensity stereo positions
+ * @param   pulse_present   set if pulses are present
+ * @param   pulse           pointer to pulse data struct
+ * @param   band_type       array of the used band type
+ *
+ * @return  Returns error status. 0 - OK, !0 - error
+ */
+static int AAC_RENAME(decode_spectrum_and_dequant)(AACDecContext *ac,
+                                                   GetBitContext *gb,
+                                                   const Pulse *pulse,
+                                                   SingleChannelElement *sce)
+{
+    int i, k, g, idx = 0;
+    INTFLOAT *coef = sce->AAC_RENAME(coeffs);
+    IndividualChannelStream *ics = &sce->ics;
+    const int c = 1024 / ics->num_windows;
+    const uint16_t *offsets = ics->swb_offset;
+    const INTFLOAT *sf = sce->AAC_RENAME(sf);
+    const enum BandType *band_type = sce->band_type;
+    INTFLOAT *coef_base = coef;
+
+    for (g = 0; g < ics->num_windows; g++)
+        memset(coef + g * 128 + offsets[ics->max_sfb], 0,
+               sizeof(INTFLOAT) * (c - offsets[ics->max_sfb]));
+
+    for (g = 0; g < ics->num_window_groups; g++) {
+        unsigned g_len = ics->group_len[g];
+
+        for (i = 0; i < ics->max_sfb; i++, idx++) {
+            const unsigned cbt_m1 = band_type[idx] - 1;
+            INTFLOAT *cfo = coef + offsets[i];
+            int off_len = offsets[i + 1] - offsets[i];
+            int group;
+
+            if (cbt_m1 >= INTENSITY_BT2 - 1) {
+                for (group = 0; group < (AAC_SIGNE)g_len; group++, cfo+=128) {
+                    memset(cfo, 0, off_len * sizeof(*cfo));
+                }
+            } else if (cbt_m1 == NOISE_BT - 1) {
+                for (group = 0; group < (AAC_SIGNE)g_len; group++, cfo+=128) {
+                    INTFLOAT band_energy;
+#if USE_FIXED
+                    for (k = 0; k < off_len; k++) {
+                        ac->random_state  = lcg_random(ac->random_state);
+                        cfo[k] = ac->random_state >> 3;
+                    }
+
+                    band_energy = ac->fdsp->scalarproduct_fixed(cfo, cfo, off_len);
+                    band_energy = fixed_sqrt(band_energy, 31);
+                    noise_scale(cfo, sf[idx], band_energy, off_len);
+#else
+                    float scale;
+
+                    for (k = 0; k < off_len; k++) {
+                        ac->random_state  = lcg_random(ac->random_state);
+                        cfo[k] = ac->random_state;
+                    }
+
+                    band_energy = ac->fdsp->scalarproduct_float(cfo, cfo, off_len);
+                    scale = sf[idx] / sqrtf(band_energy);
+                    ac->fdsp->vector_fmul_scalar(cfo, cfo, scale, off_len);
+#endif /* USE_FIXED */
+                }
+            } else {
+#if !USE_FIXED
+                const float *vq = ff_aac_codebook_vector_vals[cbt_m1];
+#endif /* !USE_FIXED */
+                const VLCElem *vlc_tab = ff_vlc_spectral[cbt_m1];
+                OPEN_READER(re, gb);
+
+                switch (cbt_m1 >> 1) {
+                case 0:
+                    for (group = 0; group < (AAC_SIGNE)g_len; group++, cfo+=128) {
+                        INTFLOAT *cf = cfo;
+                        int len = off_len;
+
+                        do {
+                            int code;
+                            unsigned cb_idx;
+
+                            UPDATE_CACHE(re, gb);
+                            GET_VLC(code, re, gb, vlc_tab, 8, 2);
+                            cb_idx = code;
+#if USE_FIXED
+                            cf = DEC_SQUAD(cf, cb_idx);
+#else
+                            cf = VMUL4(cf, vq, cb_idx, sf + idx);
+#endif /* USE_FIXED */
+                        } while (len -= 4);
+                    }
+                    break;
+
+                case 1:
+                    for (group = 0; group < (AAC_SIGNE)g_len; group++, cfo+=128) {
+                        INTFLOAT *cf = cfo;
+                        int len = off_len;
+
+                        do {
+                            int code;
+                            unsigned nnz;
+                            unsigned cb_idx;
+                            uint32_t bits;
+
+                            UPDATE_CACHE(re, gb);
+                            GET_VLC(code, re, gb, vlc_tab, 8, 2);
+                            cb_idx = code;
+                            nnz = cb_idx >> 8 & 15;
+                            bits = nnz ? GET_CACHE(re, gb) : 0;
+                            LAST_SKIP_BITS(re, gb, nnz);
+#if USE_FIXED
+                            cf = DEC_UQUAD(cf, cb_idx, bits);
+#else
+                            cf = VMUL4S(cf, vq, cb_idx, bits, sf + idx);
+#endif /* USE_FIXED */
+                        } while (len -= 4);
+                    }
+                    break;
+
+                case 2:
+                    for (group = 0; group < (AAC_SIGNE)g_len; group++, cfo+=128) {
+                        INTFLOAT *cf = cfo;
+                        int len = off_len;
+
+                        do {
+                            int code;
+                            unsigned cb_idx;
+
+                            UPDATE_CACHE(re, gb);
+                            GET_VLC(code, re, gb, vlc_tab, 8, 2);
+                            cb_idx = code;
+#if USE_FIXED
+                            cf = DEC_SPAIR(cf, cb_idx);
+#else
+                            cf = VMUL2(cf, vq, cb_idx, sf + idx);
+#endif /* USE_FIXED */
+                        } while (len -= 2);
+                    }
+                    break;
+
+                case 3:
+                case 4:
+                    for (group = 0; group < (AAC_SIGNE)g_len; group++, cfo+=128) {
+                        INTFLOAT *cf = cfo;
+                        int len = off_len;
+
+                        do {
+                            int code;
+                            unsigned nnz;
+                            unsigned cb_idx;
+                            unsigned sign;
+
+                            UPDATE_CACHE(re, gb);
+                            GET_VLC(code, re, gb, vlc_tab, 8, 2);
+                            cb_idx = code;
+                            nnz = cb_idx >> 8 & 15;
+                            sign = nnz ? SHOW_UBITS(re, gb, nnz) << (cb_idx >> 12) : 0;
+                            LAST_SKIP_BITS(re, gb, nnz);
+#if USE_FIXED
+                            cf = DEC_UPAIR(cf, cb_idx, sign);
+#else
+                            cf = VMUL2S(cf, vq, cb_idx, sign, sf + idx);
+#endif /* USE_FIXED */
+                        } while (len -= 2);
+                    }
+                    break;
+
+                default:
+                    for (group = 0; group < (AAC_SIGNE)g_len; group++, cfo+=128) {
+#if USE_FIXED
+                        int *icf = cfo;
+                        int v;
+#else
+                        float *cf = cfo;
+                        uint32_t *icf = (uint32_t *) cf;
+#endif /* USE_FIXED */
+                        int len = off_len;
+
+                        do {
+                            int code;
+                            unsigned nzt, nnz;
+                            unsigned cb_idx;
+                            uint32_t bits;
+                            int j;
+
+                            UPDATE_CACHE(re, gb);
+                            GET_VLC(code, re, gb, vlc_tab, 8, 2);
+                            cb_idx = code;
+
+                            if (cb_idx == 0x0000) {
+                                *icf++ = 0;
+                                *icf++ = 0;
+                                continue;
+                            }
+
+                            nnz = cb_idx >> 12;
+                            nzt = cb_idx >> 8;
+                            bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
+                            LAST_SKIP_BITS(re, gb, nnz);
+
+                            for (j = 0; j < 2; j++) {
+                                if (nzt & 1<<j) {
+                                    uint32_t b;
+                                    int n;
+                                    /* The total length of escape_sequence must be < 22 bits according
+                                       to the specification (i.e. max is 111111110xxxxxxxxxxxx). */
+                                    UPDATE_CACHE(re, gb);
+                                    b = GET_CACHE(re, gb);
+                                    b = 31 - av_log2(~b);
+
+                                    if (b > 8) {
+                                        av_log(ac->avctx, AV_LOG_ERROR, "error in spectral data, ESC overflow\n");
+                                        return AVERROR_INVALIDDATA;
+                                    }
+
+                                    SKIP_BITS(re, gb, b + 1);
+                                    b += 4;
+                                    n = (1 << b) + SHOW_UBITS(re, gb, b);
+                                    LAST_SKIP_BITS(re, gb, b);
+#if USE_FIXED
+                                    v = n;
+                                    if (bits & 1U<<31)
+                                        v = -v;
+                                    *icf++ = v;
+#else
+                                    *icf++ = ff_cbrt_tab[n] | (bits & 1U<<31);
+#endif /* USE_FIXED */
+                                    bits <<= 1;
+                                } else {
+#if USE_FIXED
+                                    v = cb_idx & 15;
+                                    if (bits & 1U<<31)
+                                        v = -v;
+                                    *icf++ = v;
+#else
+                                    unsigned v = ((const uint32_t*)vq)[cb_idx & 15];
+                                    *icf++ = (bits & 1U<<31) | v;
+#endif /* USE_FIXED */
+                                    bits <<= !!v;
+                                }
+                                cb_idx >>= 4;
+                            }
+                        } while (len -= 2);
+#if !USE_FIXED
+                        ac->fdsp->vector_fmul_scalar(cfo, cfo, sf[idx], off_len);
+#endif /* !USE_FIXED */
+                    }
+                }
+
+                CLOSE_READER(re, gb);
+            }
+        }
+        coef += g_len << 7;
+    }
+
+    if (pulse) {
+        idx = 0;
+        for (i = 0; i < pulse->num_pulse; i++) {
+            INTFLOAT co = coef_base[ pulse->pos[i] ];
+            while (offsets[idx + 1] <= pulse->pos[i])
+                idx++;
+            if (band_type[idx] != NOISE_BT && sf[idx]) {
+                INTFLOAT ico = -pulse->amp[i];
+#if USE_FIXED
+                if (co) {
+                    ico = co + (co > 0 ? -ico : ico);
+                }
+                coef_base[ pulse->pos[i] ] = ico;
+#else
+                if (co) {
+                    co /= sf[idx];
+                    ico = co / sqrtf(sqrtf(fabsf(co))) + (co > 0 ? -ico : ico);
+                }
+                coef_base[ pulse->pos[i] ] = cbrtf(fabsf(ico)) * ico * sf[idx];
+#endif /* USE_FIXED */
+            }
+        }
+    }
+#if USE_FIXED
+    coef = coef_base;
+    idx = 0;
+    for (g = 0; g < ics->num_window_groups; g++) {
+        unsigned g_len = ics->group_len[g];
+
+        for (i = 0; i < ics->max_sfb; i++, idx++) {
+            const unsigned cbt_m1 = band_type[idx] - 1;
+            int *cfo = coef + offsets[i];
+            int off_len = offsets[i + 1] - offsets[i];
+            int group;
+
+            if (cbt_m1 < NOISE_BT - 1) {
+                for (group = 0; group < (int)g_len; group++, cfo+=128) {
+                    vector_pow43(cfo, off_len);
+                    subband_scale(cfo, cfo, sf[idx], 34, off_len, ac->avctx);
+                }
+            }
+        }
+        coef += g_len << 7;
+    }
+#endif /* USE_FIXED */
+    return 0;
+}
+
+/**
+ * Decode coupling_channel_element; reference: table 4.8.
+ *
+ * @return  Returns error status. 0 - OK, !0 - error
+ */
+static int AAC_RENAME(decode_cce)(AACDecContext *ac, GetBitContext *gb, ChannelElement *che)
+{
+    int num_gain = 0;
+    int c, g, sfb, ret;
+    int sign;
+    INTFLOAT scale;
+    SingleChannelElement *sce = &che->ch[0];
+    ChannelCoupling     *coup = &che->coup;
+
+    coup->coupling_point = 2 * get_bits1(gb);
+    coup->num_coupled = get_bits(gb, 3);
+    for (c = 0; c <= coup->num_coupled; c++) {
+        num_gain++;
+        coup->type[c] = get_bits1(gb) ? TYPE_CPE : TYPE_SCE;
+        coup->id_select[c] = get_bits(gb, 4);
+        if (coup->type[c] == TYPE_CPE) {
+            coup->ch_select[c] = get_bits(gb, 2);
+            if (coup->ch_select[c] == 3)
+                num_gain++;
+        } else
+            coup->ch_select[c] = 2;
+    }
+    coup->coupling_point += get_bits1(gb) || (coup->coupling_point >> 1);
+
+    sign  = get_bits(gb, 1);
+#if USE_FIXED
+    scale = get_bits(gb, 2);
+#else
+    scale = cce_scale[get_bits(gb, 2)];
+#endif
+
+    if ((ret = ff_aac_decode_ics(ac, sce, gb, 0, 0)))
+        return ret;
+
+    for (c = 0; c < num_gain; c++) {
+        int idx  = 0;
+        int cge  = 1;
+        int gain = 0;
+        INTFLOAT gain_cache = FIXR10(1.);
+        if (c) {
+            cge = coup->coupling_point == AFTER_IMDCT ? 1 : get_bits1(gb);
+            gain = cge ? get_vlc2(gb, ff_vlc_scalefactors, 7, 3) - 60: 0;
+            gain_cache = GET_GAIN(scale, gain);
+#if USE_FIXED
+            if ((abs(gain_cache)-1024) >> 3 > 30)
+                return AVERROR(ERANGE);
+#endif
+        }
+        if (coup->coupling_point == AFTER_IMDCT) {
+            coup->gain[c][0] = gain_cache;
+        } else {
+            for (g = 0; g < sce->ics.num_window_groups; g++) {
+                for (sfb = 0; sfb < sce->ics.max_sfb; sfb++, idx++) {
+                    if (sce->band_type[idx] != ZERO_BT) {
+                        if (!cge) {
+                            int t = get_vlc2(gb, ff_vlc_scalefactors, 7, 3) - 60;
+                            if (t) {
+                                int s = 1;
+                                t = gain += t;
+                                if (sign) {
+                                    s  -= 2 * (t & 0x1);
+                                    t >>= 1;
+                                }
+                                gain_cache = GET_GAIN(scale, t) * s;
+#if USE_FIXED
+                                if ((abs(gain_cache)-1024) >> 3 > 30)
+                                    return AVERROR(ERANGE);
+#endif
+                            }
+                        }
+                        coup->gain[c][idx] = gain_cache;
+                    }
+                }
+            }
+        }
+    }
+    return 0;
+}
+
+static av_cold void AAC_RENAME(aac_proc_init)(AACDecProc *aac_proc)
+{
+#define SET(member) aac_proc->member = AAC_RENAME(member)
+    SET(decode_spectrum_and_dequant);
+    SET(decode_cce);
+#undef SET
+#define SET(member) aac_proc->member = AV_JOIN(ff_aac_, AAC_RENAME(member));
+    SET(sbr_ctx_alloc_init);
+    SET(sbr_decode_extension);
+    SET(sbr_apply);
+    SET(sbr_ctx_close);
+#undef SET
+}
diff --git a/libavcodec/aacdec_common.c b/libavcodec/aac/aacdec_tab.c
index 145c718047..45a84a9a72 100644
--- a/libavcodec/aacdec_common.c
+++ b/libavcodec/aac/aacdec_tab.c
@@ -25,11 +25,12 @@
  * Common code and tables of the AAC fixed- and floating-point decoders
  */
 
-#include "aac.h"
-#include "aacdectab.h"
-#include "aacps.h"
-#include "aactab.h"
-#include "vlc.h"
+#include "aacdec_tab.h"
+
+#include "libavcodec/aac.h"
+#include "libavcodec/aacps.h"
+#include "libavcodec/aactab.h"
+#include "libavcodec/vlc.h"
 
 #include "libavutil/attributes.h"
 #include "libavutil/thread.h"
diff --git a/libavcodec/aacdectab.h b/libavcodec/aac/aacdec_tab.h
index 184508f2f3..70e49af202 100644
--- a/libavcodec/aacdectab.h
+++ b/libavcodec/aac/aacdec_tab.h
@@ -25,12 +25,12 @@
  * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
  */
 
-#ifndef AVCODEC_AACDECTAB_H
-#define AVCODEC_AACDECTAB_H
+#ifndef AVCODEC_AAC_AACDEC_TAB_H
+#define AVCODEC_AAC_AACDEC_TAB_H
 
 #include <stdint.h>
 
-#include "vlc.h"
+#include "libavcodec/vlc.h"
 
 #include "libavutil/attributes_internal.h"
 #include "libavutil/channel_layout.h"
@@ -52,4 +52,4 @@ extern const int16_t ff_aac_channel_map[3][4][6];
 extern const AVChannelLayout ff_aac_ch_layout[];
 FF_VISIBILITY_POP_HIDDEN
 
-#endif /* AVCODEC_AACDECTAB_H */
+#endif /* AVCODEC_AAC_AACDEC_TAB_H */
diff --git a/libavcodec/aac_ac3_parser.c b/libavcodec/aac_ac3_parser.c
index 9305b4c50f..f45631d09f 100644
--- a/libavcodec/aac_ac3_parser.c
+++ b/libavcodec/aac_ac3_parser.c
@@ -144,10 +144,9 @@ get_next:
 #endif
         } else {
 #if CONFIG_AAC_PARSER
-            AACADTSHeaderInfo hdr, *phrd = &hdr;
-            int ret = avpriv_adts_header_parse(&phrd, buf, buf_size);
-
-            if (ret < 0)
+            AACADTSHeaderInfo hdr;
+            if (buf_size < AV_AAC_ADTS_HEADER_SIZE ||
+                ff_adts_header_parse_buf(buf, &hdr) < 0)
                 return i;
 
             bit_rate = hdr.bit_rate;
diff --git a/libavcodec/aac_ac3_parser.h b/libavcodec/aac_ac3_parser.h
index bc16181a19..e3259d1841 100644
--- a/libavcodec/aac_ac3_parser.h
+++ b/libavcodec/aac_ac3_parser.h
@@ -28,16 +28,6 @@
 #include "avcodec.h"
 #include "parser.h"
 
-typedef enum {
-    AAC_AC3_PARSE_ERROR_SYNC        = -0x1030c0a,
-    AAC_AC3_PARSE_ERROR_BSID        = -0x2030c0a,
-    AAC_AC3_PARSE_ERROR_SAMPLE_RATE = -0x3030c0a,
-    AAC_AC3_PARSE_ERROR_FRAME_SIZE  = -0x4030c0a,
-    AAC_AC3_PARSE_ERROR_FRAME_TYPE  = -0x5030c0a,
-    AAC_AC3_PARSE_ERROR_CRC         = -0x6030c0a,
-    AAC_AC3_PARSE_ERROR_CHANNEL_CFG = -0x7030c0a,
-} AACAC3ParseError;
-
 typedef struct AACAC3ParseContext {
     ParseContext pc;
     int header_size;
diff --git a/libavcodec/aac_defines.h b/libavcodec/aac_defines.h
index a0c23c33ff..a3b662115b 100644
--- a/libavcodec/aac_defines.h
+++ b/libavcodec/aac_defines.h
@@ -42,7 +42,6 @@ typedef int                 AAC_SIGNE;
 #define Q23(a)              (int)((a) * 8388608.0 + 0.5)
 #define Q30(x)              (int)((x)*1073741824.0 + 0.5)
 #define Q31(x)              (int)((x)*2147483648.0 + 0.5)
-#define TX_SCALE(x)         ((x) * 128.0f)
 #define GET_GAIN(x, y)      (-(y) * (1 << (x))) + 1024
 #define AAC_MUL16(x, y)     (int)(((int64_t)(x) * (y) + 0x8000) >> 16)
 #define AAC_MUL26(x, y)     (int)(((int64_t)(x) * (y) + 0x2000000) >> 26)
@@ -72,6 +71,20 @@ typedef int                 AAC_SIGNE;
                                         0x40000000) >> 31)
 #define AAC_HALF_SUM(x, y)  (((x) >> 1) + ((y) >> 1))
 
+/**
+ * Predictor State
+ */
+typedef struct PredictorStateFixed {
+    SoftFloat cor0;
+    SoftFloat cor1;
+    SoftFloat var0;
+    SoftFloat var1;
+    SoftFloat r0;
+    SoftFloat r1;
+    SoftFloat k1;
+    SoftFloat x_est;
+} PredictorState;
+
 #ifdef LPC_USE_FIXED
 #error aac_defines.h must be included before lpc_functions.h for fixed point decoder
 #endif
@@ -96,7 +109,6 @@ typedef unsigned            AAC_SIGNE;
 #define Q23(x)              ((float)(x))
 #define Q30(x)              ((float)(x))
 #define Q31(x)              ((float)(x))
-#define TX_SCALE(x)         ((x) / 32768.0f)
 #define GET_GAIN(x, y)      powf((x), -(y))
 #define AAC_MUL16(x, y)     ((x) * (y))
 #define AAC_MUL26(x, y)     ((x) * (y))
@@ -112,6 +124,20 @@ typedef unsigned            AAC_SIGNE;
 #define AAC_MSUB31_V3(x, y, z)    ((x) - (y)) * (z)
 #define AAC_HALF_SUM(x, y)  ((x) + (y)) * 0.5f
 
+/**
+ * Predictor State
+ */
+typedef struct PredictorState {
+    float cor0;
+    float cor1;
+    float var0;
+    float var1;
+    float r0;
+    float r1;
+    float k1;
+    float x_est;
+} PredictorState;
+
 #endif /* USE_FIXED */
 
 #endif /* AVCODEC_AAC_DEFINES_H */
diff --git a/libavcodec/aac_parser.c b/libavcodec/aac_parser.c
index f295dfccdd..186fcd887a 100644
--- a/libavcodec/aac_parser.c
+++ b/libavcodec/aac_parser.c
@@ -24,24 +24,18 @@
 #include "aac_ac3_parser.h"
 #include "adts_header.h"
 #include "adts_parser.h"
-#include "get_bits.h"
-#include "mpeg4audio.h"
+#include "libavutil/intreadwrite.h"
 
 static int aac_sync(uint64_t state, int *need_next_header, int *new_frame_start)
 {
-    GetBitContext bits;
+    uint8_t tmp[8 + AV_INPUT_BUFFER_PADDING_SIZE];
     AACADTSHeaderInfo hdr;
     int size;
-    union {
-        uint64_t u64;
-        uint8_t  u8[8 + AV_INPUT_BUFFER_PADDING_SIZE];
-    } tmp;
 
-    tmp.u64 = av_be2ne64(state);
-    init_get_bits(&bits, tmp.u8 + 8 - AV_AAC_ADTS_HEADER_SIZE,
-                  AV_AAC_ADTS_HEADER_SIZE * 8);
+    AV_WB64(tmp, state);
 
-    if ((size = ff_adts_header_parse(&bits, &hdr)) < 0)
+    size = ff_adts_header_parse_buf(tmp + 8 - AV_AAC_ADTS_HEADER_SIZE, &hdr);
+    if (size < 0)
         return 0;
     *need_next_header = 0;
     *new_frame_start  = 1;
diff --git a/libavcodec/aacdec_fixed.c b/libavcodec/aacdec_fixed.c
deleted file mode 100644
index 305bb0ba9a..0000000000
--- a/libavcodec/aacdec_fixed.c
+++ /dev/null
@@ -1,515 +0,0 @@
-/*
- * Copyright (c) 2013
- *      MIPS Technologies, Inc., California.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * AAC decoder fixed-point implementation
- *
- * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
- * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-/**
- * @file
- * AAC decoder
- * @author Oded Shimon  ( ods15 ods15 dyndns org )
- * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
- *
- * Fixed point implementation
- * @author Stanislav Ocovaj ( stanislav.ocovaj imgtec com )
- */
-
-#define USE_FIXED 1
-#define TX_TYPE AV_TX_INT32_MDCT
-
-#include "libavutil/fixed_dsp.h"
-#include "libavutil/opt.h"
-#include "avcodec.h"
-#include "codec_internal.h"
-#include "get_bits.h"
-#include "kbdwin.h"
-#include "sinewin_fixed_tablegen.h"
-
-#include "aac.h"
-#include "aacdec.h"
-#include "aactab.h"
-#include "aacdectab.h"
-#include "adts_header.h"
-#include "cbrt_data.h"
-#include "sbr.h"
-#include "aacsbr.h"
-#include "mpeg4audio.h"
-#include "profiles.h"
-#include "libavutil/intfloat.h"
-
-#include <math.h>
-#include <string.h>
-
-DECLARE_ALIGNED(32, static int, AAC_RENAME2(aac_kbd_long_1024))[1024];
-DECLARE_ALIGNED(32, static int, AAC_RENAME2(aac_kbd_short_128))[128];
-DECLARE_ALIGNED(32, static int, AAC_RENAME2(aac_kbd_long_960))[960];
-DECLARE_ALIGNED(32, static int, AAC_RENAME2(aac_kbd_short_120))[120];
-
-/* @name ltp_coef
- * Table of the LTP coefficients
- */
-static const int ltp_coef_fixed[8] = {
-    Q30(0.570829), Q30(0.696616), Q30(0.813004), Q30(0.911304),
-    Q30(0.984900), Q30(1.067894), Q30(1.194601), Q30(1.369533),
-};
-
-/* @name tns_tmp2_map
- * Tables of the tmp2[] arrays of LPC coefficients used for TNS.
- * The suffix _M_N[] indicate the values of coef_compress and coef_res
- * respectively.
- * @{
- */
-static const int tns_tmp2_map_1_3[4] = {
-    Q31(0.00000000), Q31(-0.43388373),  Q31(0.64278758),  Q31(0.34202015),
-};
-
-static const int tns_tmp2_map_0_3[8] = {
-    Q31(0.00000000), Q31(-0.43388373), Q31(-0.78183150), Q31(-0.97492790),
-    Q31(0.98480773), Q31( 0.86602539), Q31( 0.64278758), Q31( 0.34202015),
-};
-
-static const int tns_tmp2_map_1_4[8] = {
-    Q31(0.00000000), Q31(-0.20791170), Q31(-0.40673664), Q31(-0.58778524),
-    Q31(0.67369562), Q31( 0.52643216), Q31( 0.36124167), Q31( 0.18374951),
-};
-
-static const int tns_tmp2_map_0_4[16] = {
-    Q31( 0.00000000), Q31(-0.20791170), Q31(-0.40673664), Q31(-0.58778524),
-    Q31(-0.74314481), Q31(-0.86602539), Q31(-0.95105654), Q31(-0.99452192),
-    Q31( 0.99573416), Q31( 0.96182561), Q31( 0.89516330), Q31( 0.79801720),
-    Q31( 0.67369562), Q31( 0.52643216), Q31( 0.36124167), Q31( 0.18374951),
-};
-
-static const int * const tns_tmp2_map_fixed[4] = {
-    tns_tmp2_map_0_3,
-    tns_tmp2_map_0_4,
-    tns_tmp2_map_1_3,
-    tns_tmp2_map_1_4
-};
-// @}
-
-static av_always_inline void reset_predict_state(PredictorState *ps)
-{
-    ps->r0.mant   = 0;
-    ps->r0.exp   = 0;
-    ps->r1.mant   = 0;
-    ps->r1.exp   = 0;
-    ps->cor0.mant = 0;
-    ps->cor0.exp = 0;
-    ps->cor1.mant = 0;
-    ps->cor1.exp = 0;
-    ps->var0.mant = 0x20000000;
-    ps->var0.exp = 1;
-    ps->var1.mant = 0x20000000;
-    ps->var1.exp = 1;
-}
-
-static const int exp2tab[4] = { Q31(1.0000000000/2), Q31(1.1892071150/2), Q31(1.4142135624/2), Q31(1.6817928305/2) };  // 2^0, 2^0.25, 2^0.5, 2^0.75
-
-static inline int *DEC_SPAIR(int *dst, unsigned idx)
-{
-    dst[0] = (idx & 15) - 4;
-    dst[1] = (idx >> 4 & 15) - 4;
-
-    return dst + 2;
-}
-
-static inline int *DEC_SQUAD(int *dst, unsigned idx)
-{
-    dst[0] = (idx & 3) - 1;
-    dst[1] = (idx >> 2 & 3) - 1;
-    dst[2] = (idx >> 4 & 3) - 1;
-    dst[3] = (idx >> 6 & 3) - 1;
-
-    return dst + 4;
-}
-
-static inline int *DEC_UPAIR(int *dst, unsigned idx, unsigned sign)
-{
-    dst[0] = (idx & 15) * (1 - (sign & 0xFFFFFFFE));
-    dst[1] = (idx >> 4 & 15) * (1 - ((sign & 1) * 2));
-
-    return dst + 2;
-}
-
-static inline int *DEC_UQUAD(int *dst, unsigned idx, unsigned sign)
-{
-    unsigned nz = idx >> 12;
-
-    dst[0] = (idx & 3) * (1 + (((int)sign >> 31) * 2));
-    sign <<= nz & 1;
-    nz >>= 1;
-    dst[1] = (idx >> 2 & 3) * (1 + (((int)sign >> 31) * 2));
-    sign <<= nz & 1;
-    nz >>= 1;
-    dst[2] = (idx >> 4 & 3) * (1 + (((int)sign >> 31) * 2));
-    sign <<= nz & 1;
-    nz >>= 1;
-    dst[3] = (idx >> 6 & 3) * (1 + (((int)sign >> 31) * 2));
-
-    return dst + 4;
-}
-
-static void vector_pow43(int *coefs, int len)
-{
-    int i, coef;
-
-    for (i=0; i<len; i++) {
-        coef = coefs[i];
-        if (coef < 0)
-            coef = -(int)ff_cbrt_tab_fixed[(-coef) & 8191];
-        else
-            coef =  (int)ff_cbrt_tab_fixed[  coef  & 8191];
-        coefs[i] = coef;
-    }
-}
-
-static void subband_scale(int *dst, int *src, int scale, int offset, int len, void *log_context)
-{
-    int ssign = scale < 0 ? -1 : 1;
-    int s = FFABS(scale);
-    unsigned int round;
-    int i, out, c = exp2tab[s & 3];
-
-    s = offset - (s >> 2);
-
-    if (s > 31) {
-        for (i=0; i<len; i++) {
-            dst[i] = 0;
-        }
-    } else if (s > 0) {
-        round = 1 << (s-1);
-        for (i=0; i<len; i++) {
-            out = (int)(((int64_t)src[i] * c) >> 32);
-            dst[i] = ((int)(out+round) >> s) * ssign;
-        }
-    } else if (s > -32) {
-        s = s + 32;
-        round = 1U << (s-1);
-        for (i=0; i<len; i++) {
-            out = (int)((int64_t)((int64_t)src[i] * c + round) >> s);
-            dst[i] = out * (unsigned)ssign;
-        }
-    } else {
-        av_log(log_context, AV_LOG_ERROR, "Overflow in subband_scale()\n");
-    }
-}
-
-static void noise_scale(int *coefs, int scale, int band_energy, int len)
-{
-    int s = -scale;
-    unsigned int round;
-    int i, out, c = exp2tab[s & 3];
-    int nlz = 0;
-
-    av_assert0(s >= 0);
-    while (band_energy > 0x7fff) {
-        band_energy >>= 1;
-        nlz++;
-    }
-    c /= band_energy;
-    s = 21 + nlz - (s >> 2);
-
-    if (s > 31) {
-        for (i=0; i<len; i++) {
-            coefs[i] = 0;
-        }
-    } else if (s >= 0) {
-        round = s ? 1 << (s-1) : 0;
-        for (i=0; i<len; i++) {
-            out = (int)(((int64_t)coefs[i] * c) >> 32);
-            coefs[i] = -((int)(out+round) >> s);
-        }
-    }
-    else {
-        s = s + 32;
-        if (s > 0) {
-            round = 1 << (s-1);
-            for (i=0; i<len; i++) {
-                out = (int)((int64_t)((int64_t)coefs[i] * c + round) >> s);
-                coefs[i] = -out;
-            }
-        } else {
-            for (i=0; i<len; i++)
-                coefs[i] = -(int64_t)coefs[i] * c * (1 << -s);
-        }
-    }
-}
-
-static av_always_inline SoftFloat flt16_round(SoftFloat pf)
-{
-    SoftFloat tmp;
-    int s;
-
-    tmp.exp = pf.exp;
-    s = pf.mant >> 31;
-    tmp.mant = (pf.mant ^ s) - s;
-    tmp.mant = (tmp.mant + 0x00200000U) & 0xFFC00000U;
-    tmp.mant = (tmp.mant ^ s) - s;
-
-    return tmp;
-}
-
-static av_always_inline SoftFloat flt16_even(SoftFloat pf)
-{
-    SoftFloat tmp;
-    int s;
-
-    tmp.exp = pf.exp;
-    s = pf.mant >> 31;
-    tmp.mant = (pf.mant ^ s) - s;
-    tmp.mant = (tmp.mant + 0x001FFFFFU + (tmp.mant & 0x00400000U >> 16)) & 0xFFC00000U;
-    tmp.mant = (tmp.mant ^ s) - s;
-
-    return tmp;
-}
-
-static av_always_inline SoftFloat flt16_trunc(SoftFloat pf)
-{
-    SoftFloat pun;
-    int s;
-
-    pun.exp = pf.exp;
-    s = pf.mant >> 31;
-    pun.mant = (pf.mant ^ s) - s;
-    pun.mant = pun.mant & 0xFFC00000U;
-    pun.mant = (pun.mant ^ s) - s;
-
-    return pun;
-}
-
-static av_always_inline void predict(PredictorState *ps, int *coef,
-                                     int output_enable)
-{
-    const SoftFloat a     = { 1023410176, 0 };  // 61.0 / 64
-    const SoftFloat alpha = {  973078528, 0 };  // 29.0 / 32
-    SoftFloat e0, e1;
-    SoftFloat pv;
-    SoftFloat k1, k2;
-    SoftFloat   r0 = ps->r0,     r1 = ps->r1;
-    SoftFloat cor0 = ps->cor0, cor1 = ps->cor1;
-    SoftFloat var0 = ps->var0, var1 = ps->var1;
-    SoftFloat tmp;
-
-    if (var0.exp > 1 || (var0.exp == 1 && var0.mant > 0x20000000)) {
-        k1 = av_mul_sf(cor0, flt16_even(av_div_sf(a, var0)));
-    }
-    else {
-        k1.mant = 0;
-        k1.exp = 0;
-    }
-
-    if (var1.exp > 1 || (var1.exp == 1 && var1.mant > 0x20000000)) {
-        k2 = av_mul_sf(cor1, flt16_even(av_div_sf(a, var1)));
-    }
-    else {
-        k2.mant = 0;
-        k2.exp = 0;
-    }
-
-    tmp = av_mul_sf(k1, r0);
-    pv = flt16_round(av_add_sf(tmp, av_mul_sf(k2, r1)));
-    if (output_enable) {
-        int shift = 28 - pv.exp;
-
-        if (shift < 31) {
-            if (shift > 0) {
-                *coef += (unsigned)((pv.mant + (1 << (shift - 1))) >> shift);
-            } else
-                *coef += (unsigned)pv.mant << -shift;
-        }
-    }
-
-    e0 = av_int2sf(*coef, 2);
-    e1 = av_sub_sf(e0, tmp);
-
-    ps->cor1 = flt16_trunc(av_add_sf(av_mul_sf(alpha, cor1), av_mul_sf(r1, e1)));
-    tmp = av_add_sf(av_mul_sf(r1, r1), av_mul_sf(e1, e1));
-    tmp.exp--;
-    ps->var1 = flt16_trunc(av_add_sf(av_mul_sf(alpha, var1), tmp));
-    ps->cor0 = flt16_trunc(av_add_sf(av_mul_sf(alpha, cor0), av_mul_sf(r0, e0)));
-    tmp = av_add_sf(av_mul_sf(r0, r0), av_mul_sf(e0, e0));
-    tmp.exp--;
-    ps->var0 = flt16_trunc(av_add_sf(av_mul_sf(alpha, var0), tmp));
-
-    ps->r1 = flt16_trunc(av_mul_sf(a, av_sub_sf(r0, av_mul_sf(k1, e0))));
-    ps->r0 = flt16_trunc(av_mul_sf(a, e0));
-}
-
-
-static const int cce_scale_fixed[8] = {
-    Q30(1.0),          //2^(0/8)
-    Q30(1.0905077327), //2^(1/8)
-    Q30(1.1892071150), //2^(2/8)
-    Q30(1.2968395547), //2^(3/8)
-    Q30(1.4142135624), //2^(4/8)
-    Q30(1.5422108254), //2^(5/8)
-    Q30(1.6817928305), //2^(6/8)
-    Q30(1.8340080864), //2^(7/8)
-};
-
-/**
- * Apply dependent channel coupling (applied before IMDCT).
- *
- * @param   index   index into coupling gain array
- */
-static void apply_dependent_coupling_fixed(AACDecContext *ac,
-                                     SingleChannelElement *target,
-                                     ChannelElement *cce, int index)
-{
-    IndividualChannelStream *ics = &cce->ch[0].ics;
-    const uint16_t *offsets = ics->swb_offset;
-    int *dest = target->coeffs;
-    const int *src = cce->ch[0].coeffs;
-    int g, i, group, k, idx = 0;
-    if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) {
-        av_log(ac->avctx, AV_LOG_ERROR,
-               "Dependent coupling is not supported together with LTP\n");
-        return;
-    }
-    for (g = 0; g < ics->num_window_groups; g++) {
-        for (i = 0; i < ics->max_sfb; i++, idx++) {
-            if (cce->ch[0].band_type[idx] != ZERO_BT) {
-                const int gain = cce->coup.gain[index][idx];
-                int shift, round, c, tmp;
-
-                if (gain < 0) {
-                    c = -cce_scale_fixed[-gain & 7];
-                    shift = (-gain-1024) >> 3;
-                }
-                else {
-                    c = cce_scale_fixed[gain & 7];
-                    shift = (gain-1024) >> 3;
-                }
-
-                if (shift < -31) {
-                    // Nothing to do
-                } else if (shift < 0) {
-                    shift = -shift;
-                    round = 1 << (shift - 1);
-
-                    for (group = 0; group < ics->group_len[g]; group++) {
-                        for (k = offsets[i]; k < offsets[i + 1]; k++) {
-                            tmp = (int)(((int64_t)src[group * 128 + k] * c + \
-                                       (int64_t)0x1000000000) >> 37);
-                            dest[group * 128 + k] += (tmp + (int64_t)round) >> shift;
-                        }
-                    }
-                }
-                else {
-                    for (group = 0; group < ics->group_len[g]; group++) {
-                        for (k = offsets[i]; k < offsets[i + 1]; k++) {
-                            tmp = (int)(((int64_t)src[group * 128 + k] * c + \
-                                        (int64_t)0x1000000000) >> 37);
-                            dest[group * 128 + k] += tmp * (1U << shift);
-                        }
-                    }
-                }
-            }
-        }
-        dest += ics->group_len[g] * 128;
-        src  += ics->group_len[g] * 128;
-    }
-}
-
-/**
- * Apply independent channel coupling (applied after IMDCT).
- *
- * @param   index   index into coupling gain array
- */
-static void apply_independent_coupling_fixed(AACDecContext *ac,
-                                       SingleChannelElement *target,
-                                       ChannelElement *cce, int index)
-{
-    int i, c, shift, round, tmp;
-    const int gain = cce->coup.gain[index][0];
-    const int *src = cce->ch[0].ret;
-    unsigned int *dest = target->ret;
-    const int len = 1024 << (ac->oc[1].m4ac.sbr == 1);
-
-    c = cce_scale_fixed[gain & 7];
-    shift = (gain-1024) >> 3;
-    if (shift < -31) {
-        return;
-    } else if (shift < 0) {
-        shift = -shift;
-        round = 1 << (shift - 1);
-
-        for (i = 0; i < len; i++) {
-            tmp = (int)(((int64_t)src[i] * c + (int64_t)0x1000000000) >> 37);
-            dest[i] += (tmp + round) >> shift;
-        }
-    }
-    else {
-      for (i = 0; i < len; i++) {
-          tmp = (int)(((int64_t)src[i] * c + (int64_t)0x1000000000) >> 37);
-          dest[i] += tmp * (1U << shift);
-      }
-    }
-}
-
-#include "aacdec_template.c"
-
-const FFCodec ff_aac_fixed_decoder = {
-    .p.name          = "aac_fixed",
-    CODEC_LONG_NAME("AAC (Advanced Audio Coding)"),
-    .p.type          = AVMEDIA_TYPE_AUDIO,
-    .p.id            = AV_CODEC_ID_AAC,
-    .priv_data_size  = sizeof(AACDecContext),
-    .init            = aac_decode_init,
-    .close           = aac_decode_close,
-    FF_CODEC_DECODE_CB(aac_decode_frame),
-    .p.sample_fmts   = (const enum AVSampleFormat[]) {
-        AV_SAMPLE_FMT_S32P, AV_SAMPLE_FMT_NONE
-    },
-    .p.capabilities  = AV_CODEC_CAP_CHANNEL_CONF | AV_CODEC_CAP_DR1,
-    .caps_internal   = FF_CODEC_CAP_INIT_CLEANUP,
-    .p.ch_layouts    = ff_aac_ch_layout,
-    .p.priv_class    = &aac_decoder_class,
-    .p.profiles      = NULL_IF_CONFIG_SMALL(ff_aac_profiles),
-    .flush = flush,
-};
diff --git a/libavcodec/aacenc.c b/libavcodec/aacenc.c
index 7feb723289..163598e938 100644
--- a/libavcodec/aacenc.c
+++ b/libavcodec/aacenc.c
@@ -538,11 +538,9 @@ static void adjust_frame_information(ChannelElement *cpe, int chans)
         maxsfb = 0;
         cpe->ch[ch].pulse.num_pulse = 0;
         for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
-            for (w2 =  0; w2 < ics->group_len[w]; w2++) {
-                for (cmaxsfb = ics->num_swb; cmaxsfb > 0 && cpe->ch[ch].zeroes[w*16+cmaxsfb-1]; cmaxsfb--)
-                    ;
-                maxsfb = FFMAX(maxsfb, cmaxsfb);
-            }
+            for (cmaxsfb = ics->num_swb; cmaxsfb > 0 && cpe->ch[ch].zeroes[w*16+cmaxsfb-1]; cmaxsfb--)
+                ;
+            maxsfb = FFMAX(maxsfb, cmaxsfb);
         }
         ics->max_sfb = maxsfb;
 
diff --git a/libavcodec/aacenc.h b/libavcodec/aacenc.h
index 8899f90ac7..d07960620e 100644
--- a/libavcodec/aacenc.h
+++ b/libavcodec/aacenc.h
@@ -49,6 +49,20 @@ typedef enum AACCoder {
     AAC_CODER_NB,
 }AACCoder;
 
+/**
+ * Predictor State
+ */
+typedef struct PredictorState {
+    float cor0;
+    float cor1;
+    float var0;
+    float var1;
+    float r0;
+    float r1;
+    float k1;
+    float x_est;
+} PredictorState;
+
 typedef struct AACEncOptions {
     int coder;
     int pns;
diff --git a/libavcodec/aacenc_tns.c b/libavcodec/aacenc_tns.c
index 60888fece7..fa3cd2af39 100644
--- a/libavcodec/aacenc_tns.c
+++ b/libavcodec/aacenc_tns.c
@@ -181,7 +181,7 @@ void ff_aac_search_for_tns(AACEncContext *s, SingleChannelElement *sce)
 
     for (w = 0; w < sce->ics.num_windows; w++) {
         float en[2] = {0.0f, 0.0f};
-        int oc_start = 0, os_start = 0;
+        int oc_start = 0;
         int coef_start = sce->ics.swb_offset[sfb_start];
 
         for (g = sfb_start; g < sce->ics.num_swb && g <= sfb_end; g++) {
@@ -202,12 +202,11 @@ void ff_aac_search_for_tns(AACEncContext *s, SingleChannelElement *sce)
         tns->n_filt[w] = is8 ? 1 : order != TNS_MAX_ORDER ? 2 : 3;
         for (g = 0; g < tns->n_filt[w]; g++) {
             tns->direction[w][g] = slant != 2 ? slant : en[g] < en[!g];
-            tns->order[w][g] = g < tns->n_filt[w] ? order/tns->n_filt[w] : order - oc_start;
-            tns->length[w][g] = g < tns->n_filt[w] ? sfb_len/tns->n_filt[w] : sfb_len - os_start;
+            tns->order[w][g] = order/tns->n_filt[w];
+            tns->length[w][g] = sfb_len/tns->n_filt[w];
             quantize_coefs(&coefs[oc_start], tns->coef_idx[w][g], tns->coef[w][g],
                             tns->order[w][g], c_bits);
             oc_start += tns->order[w][g];
-            os_start += tns->length[w][g];
         }
         count++;
     }
diff --git a/libavcodec/aacpsdsp.h b/libavcodec/aacpsdsp.h
index 8b32761bdb..1491212250 100644
--- a/libavcodec/aacpsdsp.h
+++ b/libavcodec/aacpsdsp.h
@@ -54,7 +54,6 @@ typedef struct PSDSPContext {
 void AAC_RENAME(ff_psdsp_init)(PSDSPContext *s);
 void ff_psdsp_init_arm(PSDSPContext *s);
 void ff_psdsp_init_aarch64(PSDSPContext *s);
-void ff_psdsp_init_mips(PSDSPContext *s);
 void ff_psdsp_init_riscv(PSDSPContext *s);
 void ff_psdsp_init_x86(PSDSPContext *s);
 
diff --git a/libavcodec/aacpsdsp_template.c b/libavcodec/aacpsdsp_template.c
index 7100ae7bcb..c28ba2c9a5 100644
--- a/libavcodec/aacpsdsp_template.c
+++ b/libavcodec/aacpsdsp_template.c
@@ -226,8 +226,6 @@ av_cold void AAC_RENAME(ff_psdsp_init)(PSDSPContext *s)
     ff_psdsp_init_arm(s);
 #elif ARCH_AARCH64
     ff_psdsp_init_aarch64(s);
-#elif ARCH_MIPS
-    ff_psdsp_init_mips(s);
 #elif ARCH_RISCV
     ff_psdsp_init_riscv(s);
 #elif ARCH_X86
diff --git a/libavcodec/aacpsy.c b/libavcodec/aacpsy.c
index 84c8375d3f..019be09fa3 100644
--- a/libavcodec/aacpsy.c
+++ b/libavcodec/aacpsy.c
@@ -223,10 +223,6 @@ static const float psy_fir_coeffs[] = {
     -5.52212e-17 * 2, -0.313819 * 2
 };
 
-#if ARCH_MIPS
-#   include "mips/aacpsy_mips.h"
-#endif /* ARCH_MIPS */
-
 /**
  * Calculate the ABR attack threshold from the above LAME psymodel table.
  */
diff --git a/libavcodec/aacsbr.c b/libavcodec/aacsbr.c
index aafc00049a..78f0aead8e 100644
--- a/libavcodec/aacsbr.c
+++ b/libavcodec/aacsbr.c
@@ -43,10 +43,6 @@
 #include <float.h>
 #include <math.h>
 
-#if ARCH_MIPS
-#include "mips/aacsbr_mips.h"
-#endif /* ARCH_MIPS */
-
 /**
  * 2^(x) for integer x
  * @return correctly rounded float
diff --git a/libavcodec/aacsbr.h b/libavcodec/aacsbr.h
index cb680cc548..d4582d1100 100644
--- a/libavcodec/aacsbr.h
+++ b/libavcodec/aacsbr.h
@@ -30,14 +30,13 @@
 #define AVCODEC_AACSBR_H
 
 #include "get_bits.h"
-#include "aac_defines.h"
-#include "sbr.h"
+#include "aac/aacdec.h"
+
+#include "libavutil/attributes_internal.h"
 
 #define ENVELOPE_ADJUSTMENT_OFFSET 2
 #define NOISE_FLOOR_OFFSET 6
 
-struct AACDecContext;
-
 /**
  * SBR VLC tables
  */
@@ -68,19 +67,33 @@ enum {
     EXTENSION_ID_PS = 2,
 };
 
+FF_VISIBILITY_PUSH_HIDDEN
 /** Initialize SBR. */
-void AAC_RENAME(ff_aac_sbr_init)(void);
-/** Initialize one SBR context. */
-int AAC_RENAME(ff_aac_sbr_ctx_init)(struct AACDecContext *ac, SpectralBandReplication *sbr, int id_aac);
-/** Close one SBR context. */
-void AAC_RENAME(ff_aac_sbr_ctx_close)(SpectralBandReplication *sbr);
+void ff_aac_sbr_init(void);
+void ff_aac_sbr_init_fixed(void);
+/**
+ * Allocate an ExtChannelElement (if necessary) and
+ * initialize the SBR context contained in it.
+ */
+int ff_aac_sbr_ctx_alloc_init(AACDecContext *ac, ChannelElement **che, int id_aac);
+int ff_aac_sbr_ctx_alloc_init_fixed(AACDecContext *ac, ChannelElement **che, int id_aac);
+
+/** Close the SBR context implicitly contained in a ChannelElement. */
+void ff_aac_sbr_ctx_close(ChannelElement *che);
+void ff_aac_sbr_ctx_close_fixed(ChannelElement *che);
+
 /** Decode one SBR element. */
-int AAC_RENAME(ff_decode_sbr_extension)(struct AACDecContext *ac, SpectralBandReplication *sbr,
-                            GetBitContext *gb, int crc, int cnt, int id_aac);
+int ff_aac_sbr_decode_extension(AACDecContext *ac, ChannelElement *che,
+                                GetBitContext *gb, int crc, int cnt, int id_aac);
+int ff_aac_sbr_decode_extension_fixed(AACDecContext *ac, ChannelElement *che,
+                                      GetBitContext *gb, int crc, int cnt, int id_aac);
+
 /** Apply one SBR element to one AAC element. */
-void AAC_RENAME(ff_sbr_apply)(struct AACDecContext *ac, SpectralBandReplication *sbr, int id_aac,
-                  INTFLOAT* L, INTFLOAT *R);
+void ff_aac_sbr_apply(AACDecContext *ac, ChannelElement *che,
+                      int id_aac, void /* float */ *L, void /* float */ *R);
+void ff_aac_sbr_apply_fixed(AACDecContext *ac, ChannelElement *che,
+                            int id_aac, void /* int */ *L, void /* int */ *R);
 
-void ff_aacsbr_func_ptr_init_mips(AACSBRContext *c);
+FF_VISIBILITY_POP_HIDDEN
 
 #endif /* AVCODEC_AACSBR_H */
diff --git a/libavcodec/aacsbr_template.c b/libavcodec/aacsbr_template.c
index cdfaed636b..86f4d8c26e 100644
--- a/libavcodec/aacsbr_template.c
+++ b/libavcodec/aacsbr_template.c
@@ -32,23 +32,25 @@
  * @author Zoran Basaric ( zoran.basaric@imgtec.com )
  */
 
-#include "aacdec.h"
-#include "aacdectab.h"
+#include "aac/aacdec.h"
+#include "aac/aacdec_tab.h"
 #include "avcodec.h"
 #include "libavutil/qsort.h"
+#include "libavutil/mem.h"
 
-static av_cold void aacsbr_tableinit(void)
-{
-    int n;
+typedef struct ExtChannelElement {
+    ChannelElement ch;
+    PredictorState predictor_state[2][MAX_PREDICTORS];
+    SpectralBandReplication sbr;
+} ExtChannelElement;
 
-    for (n = 0; n < 320; n++)
-        sbr_qmf_window_ds[n] = sbr_qmf_window_us[2*n];
+static inline SpectralBandReplication *get_sbr(ChannelElement *ch)
+{
+    return &((ExtChannelElement*)ch)->sbr;
 }
 
 av_cold void AAC_RENAME(ff_aac_sbr_init)(void)
 {
-    aacsbr_tableinit();
-
     AAC_RENAME(ff_ps_init)();
 }
 
@@ -64,13 +66,20 @@ static void sbr_turnoff(SpectralBandReplication *sbr) {
     memset(&sbr->spectrum_params, -1, sizeof(SpectrumParameters));
 }
 
-av_cold int AAC_RENAME(ff_aac_sbr_ctx_init)(AACDecContext *ac, SpectralBandReplication *sbr, int id_aac)
+av_cold int AAC_RENAME(ff_aac_sbr_ctx_alloc_init)(AACDecContext *ac,
+                                                  ChannelElement **che, int id_aac)
 {
+    SpectralBandReplication *sbr;
+    ExtChannelElement *ext = av_mallocz(sizeof(*ext));
     int ret;
     float scale;
 
-    if (sbr->mdct)
-        return 0;
+    if (!ext)
+        return AVERROR(ENOMEM);
+    *che = &ext->ch;
+    sbr  = &ext->sbr;
+    ext->ch.ch[0].AAC_RENAME(predictor_state) = ext->predictor_state[0];
+    ext->ch.ch[1].AAC_RENAME(predictor_state) = ext->predictor_state[1];
 
     sbr->kx[0] = sbr->kx[1];
     sbr->id_aac = id_aac;
@@ -102,8 +111,9 @@ av_cold int AAC_RENAME(ff_aac_sbr_ctx_init)(AACDecContext *ac, SpectralBandRepli
     return 0;
 }
 
-av_cold void AAC_RENAME(ff_aac_sbr_ctx_close)(SpectralBandReplication *sbr)
+av_cold void AAC_RENAME(ff_aac_sbr_ctx_close)(ChannelElement *che)
 {
+    SpectralBandReplication *sbr = get_sbr(che);
     av_tx_uninit(&sbr->mdct);
     av_tx_uninit(&sbr->mdct_ana);
 }
@@ -1090,9 +1100,11 @@ static void sbr_reset(AACDecContext *ac, SpectralBandReplication *sbr)
  *
  * @return  Returns number of bytes consumed from the TYPE_FIL element.
  */
-int AAC_RENAME(ff_decode_sbr_extension)(AACDecContext *ac, SpectralBandReplication *sbr,
-                            GetBitContext *gb_host, int crc, int cnt, int id_aac)
+int AAC_RENAME(ff_aac_sbr_decode_extension)(AACDecContext *ac, ChannelElement *che,
+                                            GetBitContext *gb_host, int crc,
+                                            int cnt, int id_aac)
 {
+    SpectralBandReplication *sbr = get_sbr(che);
     unsigned int num_sbr_bits = 0, num_align_bits;
     unsigned bytes_read;
     GetBitContext gbc = *gb_host, *gb = &gbc;
@@ -1456,9 +1468,11 @@ static void sbr_env_estimate(AAC_FLOAT (*e_curr)[48], INTFLOAT X_high[64][40][2]
     }
 }
 
-void AAC_RENAME(ff_sbr_apply)(AACDecContext *ac, SpectralBandReplication *sbr, int id_aac,
-                  INTFLOAT* L, INTFLOAT* R)
+void AAC_RENAME(ff_aac_sbr_apply)(AACDecContext *ac, ChannelElement *che,
+                                  int id_aac, void *L_, void *R_)
 {
+    INTFLOAT *L = L_, *R = R_;
+    SpectralBandReplication *sbr = get_sbr(che);
     int downsampled = ac->oc[1].m4ac.ext_sample_rate < sbr->sample_rate;
     int ch;
     int nch = (id_aac == TYPE_CPE) ? 2 : 1;
@@ -1556,10 +1570,4 @@ static void aacsbr_func_ptr_init(AACSBRContext *c)
     c->sbr_hf_assemble       = sbr_hf_assemble;
     c->sbr_x_gen             = sbr_x_gen;
     c->sbr_hf_inverse_filter = sbr_hf_inverse_filter;
-
-#if !USE_FIXED
-#if ARCH_MIPS
-    ff_aacsbr_func_ptr_init_mips(c);
-#endif
-#endif
 }
diff --git a/libavcodec/aacsbrdata.h b/libavcodec/aacsbrdata.h
index b0585309e0..9c25098240 100644
--- a/libavcodec/aacsbrdata.h
+++ b/libavcodec/aacsbrdata.h
@@ -42,7 +42,169 @@ static const int8_t sbr_offset[6][16] = {
 };
 
 ///< window coefficients for analysis/synthesis QMF banks
-static DECLARE_ALIGNED(32, INTFLOAT, sbr_qmf_window_ds)[320];
+static const DECLARE_ALIGNED(32, INTFLOAT, sbr_qmf_window_ds)[320] = {
+     Q31( 0.0000000000f), Q31(-0.0005617692f),
+     Q31(-0.0004875227f), Q31(-0.0005040714f),
+     Q31(-0.0005466565f), Q31(-0.0005870930f),
+     Q31(-0.0006312493f), Q31(-0.0006777690f),
+     Q31(-0.0007157736f), Q31(-0.0007440941f),
+     Q31(-0.0007681371f), Q31(-0.0007834332f),
+     Q31(-0.0007803664f), Q31(-0.0007757977f),
+     Q31(-0.0007530001f), Q31(-0.0007215391f),
+     Q31(-0.0006650415f), Q31(-0.0005946118f),
+     Q31(-0.0005145572f), Q31(-0.0004095121f),
+     Q31(-0.0002896981f), Q31(-0.0001446380f),
+     Q31( 0.0000134949f), Q31( 0.0002043017f),
+     Q31( 0.0004026540f), Q31( 0.0006239376f),
+     Q31( 0.0008608443f), Q31( 0.0011250155f),
+     Q31( 0.0013902494f), Q31( 0.0016868083f),
+     Q31( 0.0019841140f), Q31( 0.0023017254f),
+     Q31( 0.0026201758f), Q31( 0.0029469447f),
+     Q31( 0.0032739613f), Q31( 0.0036008268f),
+     Q31( 0.0039207432f), Q31( 0.0042264269f),
+     Q31( 0.0045209852f), Q31( 0.0047932560f),
+     Q31( 0.0050393022f), Q31( 0.0052461166f),
+     Q31( 0.0054196775f), Q31( 0.0055475714f),
+     Q31( 0.0056220643f), Q31( 0.0056389199f),
+     Q31( 0.0055917128f), Q31( 0.0054753783f),
+     Q31( 0.0052715758f), Q31( 0.0049839687f),
+     Q31( 0.0046039530f), Q31( 0.0041251642f),
+     Q31( 0.0035401246f), Q31( 0.0028446757f),
+     Q31( 0.0020274176f), Q31( 0.0010902329f),
+     Q31( 0.0000276045f), Q31(-0.0011568135f),
+     Q31(-0.0024826723f), Q31(-0.0039401124f),
+     Q31(-0.0055337211f), Q31(-0.0072615816f),
+     Q31(-0.0091325329f), Q31(-0.0111315548f),
+     Q31( 0.0132718220f), Q31( 0.0155405553f),
+     Q31( 0.0179433381f), Q31( 0.0204531793f),
+     Q31( 0.0230680169f), Q31( 0.0257875847f),
+     Q31( 0.0286072173f), Q31( 0.0315017608f),
+     Q31( 0.0344620948f), Q31( 0.0374812850f),
+     Q31( 0.0405349170f), Q31( 0.0436097542f),
+     Q31( 0.0466843027f), Q31( 0.0497385755f),
+     Q31( 0.0527630746f), Q31( 0.0557173648f),
+     Q31( 0.0585915683f), Q31( 0.0613455171f),
+     Q31( 0.0639715898f), Q31( 0.0664367512f),
+     Q31( 0.0687043828f), Q31( 0.0707628710f),
+     Q31( 0.0725682583f), Q31( 0.0741003642f),
+     Q31( 0.0753137336f), Q31( 0.0761992479f),
+     Q31( 0.0767093490f), Q31( 0.0768230011f),
+     Q31( 0.0765050718f), Q31( 0.0757305756f),
+     Q31( 0.0744664394f), Q31( 0.0726774642f),
+     Q31( 0.0703533073f), Q31( 0.0674525021f),
+     Q31( 0.0639444805f), Q31( 0.0598166570f),
+     Q31( 0.0550460034f), Q31( 0.0495978676f),
+     Q31( 0.0434768782f), Q31( 0.0366418116f),
+     Q31( 0.0290824006f), Q31( 0.0207997072f),
+     Q31( 0.0117623832f), Q31( 0.0019765601f),
+     Q31(-0.0085711749f), Q31(-0.0198834129f),
+     Q31(-0.0319531274f), Q31(-0.0447806821f),
+     Q31(-0.0583705326f), Q31(-0.0726943300f),
+     Q31(-0.0877547536f), Q31(-0.1035329531f),
+     Q31(-0.1200077984f), Q31(-0.1371551761f),
+     Q31(-0.1549607071f), Q31(-0.1733808172f),
+     Q31(-0.1923966745f), Q31(-0.2119735853f),
+     Q31(-0.2320690870f), Q31(-0.2526480309f),
+     Q31(-0.2736634040f), Q31(-0.2950716717f),
+     Q31(-0.3168278913f), Q31(-0.3388722693f),
+     Q31( 0.3611589903f), Q31( 0.3836350013f),
+     Q31( 0.4062317676f), Q31( 0.4289119920f),
+     Q31( 0.4515996535f), Q31( 0.4742453214f),
+     Q31( 0.4967708254f), Q31( 0.5191234970f),
+     Q31( 0.5412553448f), Q31( 0.5630789140f),
+     Q31( 0.5845403235f), Q31( 0.6055783538f),
+     Q31( 0.6261242695f), Q31( 0.6461269695f),
+     Q31( 0.6655139880f), Q31( 0.6842353293f),
+     Q31( 0.7022388719f), Q31( 0.7194462634f),
+     Q31( 0.7358211758f), Q31( 0.7513137456f),
+     Q31( 0.7658674865f), Q31( 0.7794287519f),
+     Q31( 0.7919735841f), Q31( 0.8034485751f),
+     Q31( 0.8138191270f), Q31( 0.8230419890f),
+     Q31( 0.8311038457f), Q31( 0.8379717337f),
+     Q31( 0.8436238281f), Q31( 0.8480315777f),
+     Q31( 0.8511971524f), Q31( 0.8531020949f),
+     Q31( 0.8537385600f), Q31( 0.8531020949f),
+     Q31( 0.8511971524f), Q31( 0.8480315777f),
+     Q31( 0.8436238281f), Q31( 0.8379717337f),
+     Q31( 0.8311038457f), Q31( 0.8230419890f),
+     Q31( 0.8138191270f), Q31( 0.8034485751f),
+     Q31( 0.7919735841f), Q31( 0.7794287519f),
+     Q31( 0.7658674865f), Q31( 0.7513137456f),
+     Q31( 0.7358211758f), Q31( 0.7194462634f),
+     Q31( 0.7022388719f), Q31( 0.6842353293f),
+     Q31( 0.6655139880f), Q31( 0.6461269695f),
+     Q31( 0.6261242695f), Q31( 0.6055783538f),
+     Q31( 0.5845403235f), Q31( 0.5630789140f),
+     Q31( 0.5412553448f), Q31( 0.5191234970f),
+     Q31( 0.4967708254f), Q31( 0.4742453214f),
+     Q31( 0.4515996535f), Q31( 0.4289119920f),
+     Q31( 0.4062317676f), Q31( 0.3836350013f),
+    -Q31( 0.3611589903f), Q31(-0.3388722693f),
+     Q31(-0.3168278913f), Q31(-0.2950716717f),
+     Q31(-0.2736634040f), Q31(-0.2526480309f),
+     Q31(-0.2320690870f), Q31(-0.2119735853f),
+     Q31(-0.1923966745f), Q31(-0.1733808172f),
+     Q31(-0.1549607071f), Q31(-0.1371551761f),
+     Q31(-0.1200077984f), Q31(-0.1035329531f),
+     Q31(-0.0877547536f), Q31(-0.0726943300f),
+     Q31(-0.0583705326f), Q31(-0.0447806821f),
+     Q31(-0.0319531274f), Q31(-0.0198834129f),
+     Q31(-0.0085711749f), Q31( 0.0019765601f),
+     Q31( 0.0117623832f), Q31( 0.0207997072f),
+     Q31( 0.0290824006f), Q31( 0.0366418116f),
+     Q31( 0.0434768782f), Q31( 0.0495978676f),
+     Q31( 0.0550460034f), Q31( 0.0598166570f),
+     Q31( 0.0639444805f), Q31( 0.0674525021f),
+     Q31( 0.0703533073f), Q31( 0.0726774642f),
+     Q31( 0.0744664394f), Q31( 0.0757305756f),
+     Q31( 0.0765050718f), Q31( 0.0768230011f),
+     Q31( 0.0767093490f), Q31( 0.0761992479f),
+     Q31( 0.0753137336f), Q31( 0.0741003642f),
+     Q31( 0.0725682583f), Q31( 0.0707628710f),
+     Q31( 0.0687043828f), Q31( 0.0664367512f),
+     Q31( 0.0639715898f), Q31( 0.0613455171f),
+     Q31( 0.0585915683f), Q31( 0.0557173648f),
+     Q31( 0.0527630746f), Q31( 0.0497385755f),
+     Q31( 0.0466843027f), Q31( 0.0436097542f),
+     Q31( 0.0405349170f), Q31( 0.0374812850f),
+     Q31( 0.0344620948f), Q31( 0.0315017608f),
+     Q31( 0.0286072173f), Q31( 0.0257875847f),
+     Q31( 0.0230680169f), Q31( 0.0204531793f),
+     Q31( 0.0179433381f), Q31( 0.0155405553f),
+    -Q31( 0.0132718220f), Q31(-0.0111315548f),
+     Q31(-0.0091325329f), Q31(-0.0072615816f),
+     Q31(-0.0055337211f), Q31(-0.0039401124f),
+     Q31(-0.0024826723f), Q31(-0.0011568135f),
+     Q31( 0.0000276045f), Q31( 0.0010902329f),
+     Q31( 0.0020274176f), Q31( 0.0028446757f),
+     Q31( 0.0035401246f), Q31( 0.0041251642f),
+     Q31( 0.0046039530f), Q31( 0.0049839687f),
+     Q31( 0.0052715758f), Q31( 0.0054753783f),
+     Q31( 0.0055917128f), Q31( 0.0056389199f),
+     Q31( 0.0056220643f), Q31( 0.0055475714f),
+     Q31( 0.0054196775f), Q31( 0.0052461166f),
+     Q31( 0.0050393022f), Q31( 0.0047932560f),
+     Q31( 0.0045209852f), Q31( 0.0042264269f),
+     Q31( 0.0039207432f), Q31( 0.0036008268f),
+     Q31( 0.0032739613f), Q31( 0.0029469447f),
+     Q31( 0.0026201758f), Q31( 0.0023017254f),
+     Q31( 0.0019841140f), Q31( 0.0016868083f),
+     Q31( 0.0013902494f), Q31( 0.0011250155f),
+     Q31( 0.0008608443f), Q31( 0.0006239376f),
+     Q31( 0.0004026540f), Q31( 0.0002043017f),
+     Q31( 0.0000134949f), Q31(-0.0001446380f),
+     Q31(-0.0002896981f), Q31(-0.0004095121f),
+     Q31(-0.0005145572f), Q31(-0.0005946118f),
+     Q31(-0.0006650415f), Q31(-0.0007215391f),
+     Q31(-0.0007530001f), Q31(-0.0007757977f),
+     Q31(-0.0007803664f), Q31(-0.0007834332f),
+     Q31(-0.0007681371f), Q31(-0.0007440941f),
+     Q31(-0.0007157736f), Q31(-0.0006777690f),
+     Q31(-0.0006312493f), Q31(-0.0005870930f),
+     Q31(-0.0005466565f), Q31(-0.0005040714f),
+     Q31(-0.0004875227f), Q31(-0.0005617692f),
+};
+
 /* This table contains redundancy: It is symmetric about the entry #320
  * with the exception of entries 384 and 512 which are negated. */
 static const DECLARE_ALIGNED(32, INTFLOAT, sbr_qmf_window_us)[640] = {
diff --git a/libavcodec/aactab.c b/libavcodec/aactab.c
index 3cef9c5d2b..3718b81a07 100644
--- a/libavcodec/aactab.c
+++ b/libavcodec/aactab.c
@@ -105,6 +105,7 @@ av_cold void ff_aac_float_common_init(void)
     static AVOnce init_static_once = AV_ONCE_INIT;
     ff_thread_once(&init_static_once, aac_float_common_init);
 }
+#endif
 
 const float ff_ltp_coef[8] = {
     0.570829, 0.696616, 0.813004, 0.911304,
@@ -144,7 +145,6 @@ const float * const ff_tns_tmp2_map[4] = {
     tns_tmp2_map_1_3,
     tns_tmp2_map_1_4
 };
-#endif
 
 const uint8_t ff_aac_num_swb_1024[] = {
     41, 41, 47, 49, 49, 51, 47, 47, 43, 43, 43, 40, 40
diff --git a/libavcodec/aarch64/Makefile b/libavcodec/aarch64/Makefile
index beb6a02f5f..a3256bb1cc 100644
--- a/libavcodec/aarch64/Makefile
+++ b/libavcodec/aarch64/Makefile
@@ -1,4 +1,6 @@
 # subsystems
+OBJS-$(CONFIG_AC3DSP)                   += aarch64/ac3dsp_init_aarch64.o
+OBJS-$(CONFIG_FDCTDSP)                  += aarch64/fdctdsp_init_aarch64.o
 OBJS-$(CONFIG_FMTCONVERT)               += aarch64/fmtconvert_init.o
 OBJS-$(CONFIG_H264CHROMA)               += aarch64/h264chroma_init_aarch64.o
 OBJS-$(CONFIG_H264DSP)                  += aarch64/h264dsp_init_aarch64.o
@@ -35,6 +37,8 @@ ARMV8-OBJS-$(CONFIG_VIDEODSP)           += aarch64/videodsp.o
 
 # subsystems
 NEON-OBJS-$(CONFIG_AAC_DECODER)         += aarch64/sbrdsp_neon.o
+NEON-OBJS-$(CONFIG_AC3DSP)              += aarch64/ac3dsp_neon.o
+NEON-OBJS-$(CONFIG_FDCTDSP)             += aarch64/fdctdsp_neon.o
 NEON-OBJS-$(CONFIG_FMTCONVERT)          += aarch64/fmtconvert_neon.o
 NEON-OBJS-$(CONFIG_H264CHROMA)          += aarch64/h264cmc_neon.o
 NEON-OBJS-$(CONFIG_H264DSP)             += aarch64/h264dsp_neon.o              \
diff --git a/libavcodec/aarch64/ac3dsp_init_aarch64.c b/libavcodec/aarch64/ac3dsp_init_aarch64.c
new file mode 100644
index 0000000000..e367353e11
--- /dev/null
+++ b/libavcodec/aarch64/ac3dsp_init_aarch64.c
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2024 Geoff Hill <geoff@geoffhill.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <stdint.h>
+
+#include "libavutil/arm/cpu.h"
+#include "libavutil/attributes.h"
+#include "libavcodec/ac3dsp.h"
+#include "config.h"
+
+void ff_ac3_exponent_min_neon(uint8_t *exp, int num_reuse_blocks, int nb_coefs);
+void ff_ac3_extract_exponents_neon(uint8_t *exp, int32_t *coef, int nb_coefs);
+void ff_float_to_fixed24_neon(int32_t *dst, const float *src, size_t len);
+void ff_ac3_sum_square_butterfly_int32_neon(int64_t sum[4],
+                                            const int32_t *coef0,
+                                            const int32_t *coef1,
+                                            int len);
+void ff_ac3_sum_square_butterfly_float_neon(float sum[4],
+                                            const float *coef0,
+                                            const float *coef1,
+                                            int len);
+
+av_cold void ff_ac3dsp_init_aarch64(AC3DSPContext *c)
+{
+    int cpu_flags = av_get_cpu_flags();
+    if (!have_neon(cpu_flags)) return;
+
+    c->ac3_exponent_min = ff_ac3_exponent_min_neon;
+    c->extract_exponents = ff_ac3_extract_exponents_neon;
+    c->float_to_fixed24 = ff_float_to_fixed24_neon;
+    c->sum_square_butterfly_int32 = ff_ac3_sum_square_butterfly_int32_neon;
+    c->sum_square_butterfly_float = ff_ac3_sum_square_butterfly_float_neon;
+}
diff --git a/libavcodec/aarch64/ac3dsp_neon.S b/libavcodec/aarch64/ac3dsp_neon.S
new file mode 100644
index 0000000000..7e97cc39f7
--- /dev/null
+++ b/libavcodec/aarch64/ac3dsp_neon.S
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
+ * Copyright (c) 2024 Geoff Hill <geoff@geoffhill.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "libavutil/aarch64/asm.S"
+
+function ff_ac3_exponent_min_neon, export=1
+        cbz             w1, 3f
+1:      ld1             {v0.16b}, [x0]
+        mov             w3, w1
+        add             x4, x0, #256
+2:      ld1             {v1.16b}, [x4]
+        umin            v0.16b, v0.16b, v1.16b
+        add             x4, x4, #256
+        subs            w3, w3, #1
+        b.gt            2b
+        st1             {v0.16b}, [x0], #16
+        subs            w2, w2, #16
+        b.gt            1b
+3:      ret
+endfunc
+
+function ff_ac3_extract_exponents_neon, export=1
+        movi            v1.4s, #8
+1:      ld1             {v0.4s}, [x1], #16
+        abs             v0.4s, v0.4s
+        clz             v0.4s, v0.4s
+        sub             v0.4s, v0.4s, v1.4s
+        xtn             v0.4h, v0.4s
+        xtn             v0.8b, v0.8h
+        st1             {v0.s}[0], [x0], #4
+        subs            w2, w2, #4
+        b.gt            1b
+        ret
+endfunc
+
+function ff_float_to_fixed24_neon, export=1
+1:      ld1             {v0.4s, v1.4s}, [x1], #32
+        fcvtzs          v0.4s, v0.4s, #24
+        ld1             {v2.4s, v3.4s}, [x1], #32
+        fcvtzs          v1.4s, v1.4s, #24
+        fcvtzs          v2.4s, v2.4s, #24
+        st1             {v0.4s, v1.4s}, [x0], #32
+        fcvtzs          v3.4s, v3.4s, #24
+        st1             {v2.4s, v3.4s}, [x0], #32
+        subs            w2, w2, #16
+        b.ne            1b
+        ret
+endfunc
+
+function ff_ac3_sum_square_butterfly_int32_neon, export=1
+        movi            v0.2d, #0
+        movi            v1.2d, #0
+        movi            v2.2d, #0
+        movi            v3.2d, #0
+1:      ld1             {v4.2s}, [x1], #8
+        ld1             {v5.2s}, [x2], #8
+        add             v6.2s, v4.2s, v5.2s
+        sub             v7.2s, v4.2s, v5.2s
+        smlal           v0.2d, v4.2s, v4.2s
+        smlal           v1.2d, v5.2s, v5.2s
+        smlal           v2.2d, v6.2s, v6.2s
+        smlal           v3.2d, v7.2s, v7.2s
+        subs            w3, w3, #2
+        b.gt            1b
+        addp            d0, v0.2d
+        addp            d1, v1.2d
+        addp            d2, v2.2d
+        addp            d3, v3.2d
+        st1             {v0.1d-v3.1d}, [x0]
+        ret
+endfunc
+
+function ff_ac3_sum_square_butterfly_float_neon, export=1
+        movi            v0.4s, #0
+        movi            v1.4s, #0
+        movi            v2.4s, #0
+        movi            v3.4s, #0
+1:      ld1             {v30.4s}, [x1], #16
+        ld1             {v31.4s}, [x2], #16
+        fadd            v16.4s, v30.4s, v31.4s
+        fsub            v17.4s, v30.4s, v31.4s
+        fmla            v0.4s, v30.4s, v30.4s
+        fmla            v1.4s, v31.4s, v31.4s
+        fmla            v2.4s, v16.4s, v16.4s
+        fmla            v3.4s, v17.4s, v17.4s
+        subs            w3, w3, #4
+        b.gt            1b
+        faddp           v0.4s, v0.4s, v1.4s
+        faddp           v2.4s, v2.4s, v3.4s
+        faddp           v0.4s, v0.4s, v2.4s
+        st1             {v0.4s}, [x0]
+        ret
+endfunc
diff --git a/libavcodec/aarch64/fdct.h b/libavcodec/aarch64/fdct.h
new file mode 100644
index 0000000000..0901b53a83
--- /dev/null
+++ b/libavcodec/aarch64/fdct.h
@@ -0,0 +1,26 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AARCH64_FDCT_H
+#define AVCODEC_AARCH64_FDCT_H
+
+#include <stdint.h>
+
+void ff_fdct_neon(int16_t *block);
+
+#endif /* AVCODEC_AARCH64_FDCT_H */
diff --git a/libavcodec/aarch64/fdctdsp_init_aarch64.c b/libavcodec/aarch64/fdctdsp_init_aarch64.c
new file mode 100644
index 0000000000..59d91bc8fc
--- /dev/null
+++ b/libavcodec/aarch64/fdctdsp_init_aarch64.c
@@ -0,0 +1,39 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/aarch64/cpu.h"
+#include "libavcodec/avcodec.h"
+#include "libavcodec/fdctdsp.h"
+#include "fdct.h"
+
+av_cold void ff_fdctdsp_init_aarch64(FDCTDSPContext *c, AVCodecContext *avctx,
+                                     unsigned high_bit_depth)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags)) {
+        if (!high_bit_depth) {
+            if (avctx->dct_algo == FF_DCT_AUTO ||
+                avctx->dct_algo == FF_DCT_NEON) {
+                c->fdct = ff_fdct_neon;
+            }
+        }
+    }
+}
diff --git a/libavcodec/aarch64/fdctdsp_neon.S b/libavcodec/aarch64/fdctdsp_neon.S
new file mode 100644
index 0000000000..53fa4debe5
--- /dev/null
+++ b/libavcodec/aarch64/fdctdsp_neon.S
@@ -0,0 +1,368 @@
+/*
+ * Armv8 Neon optimizations for libjpeg-turbo
+ *
+ * Copyright (C) 2009-2011, Nokia Corporation and/or its subsidiary(-ies).
+ *                          All Rights Reserved.
+ * Author:  Siarhei Siamashka <siarhei.siamashka@nokia.com>
+ * Copyright (C) 2013-2014, Linaro Limited.  All Rights Reserved.
+ * Author:  Ragesh Radhakrishnan <ragesh.r@linaro.org>
+ * Copyright (C) 2014-2016, 2020, D. R. Commander.  All Rights Reserved.
+ * Copyright (C) 2015-2016, 2018, Matthieu Darbois.  All Rights Reserved.
+ * Copyright (C) 2016, Siarhei Siamashka.  All Rights Reserved.
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+#include "libavutil/aarch64/asm.S"
+#include "neon.S"
+
+// #define EIGHT_BIT_SAMPLES
+
+/* Constants for jsimd_fdct_islow_neon() */
+
+#define F_0_298   2446  /* FIX(0.298631336) */
+#define F_0_390   3196  /* FIX(0.390180644) */
+#define F_0_541   4433  /* FIX(0.541196100) */
+#define F_0_765   6270  /* FIX(0.765366865) */
+#define F_0_899   7373  /* FIX(0.899976223) */
+#define F_1_175   9633  /* FIX(1.175875602) */
+#define F_1_501  12299  /* FIX(1.501321110) */
+#define F_1_847  15137  /* FIX(1.847759065) */
+#define F_1_961  16069  /* FIX(1.961570560) */
+#define F_2_053  16819  /* FIX(2.053119869) */
+#define F_2_562  20995  /* FIX(2.562915447) */
+#define F_3_072  25172  /* FIX(3.072711026) */
+
+const jsimd_fdct_islow_neon_consts, align=4
+        .short F_0_298
+        .short -F_0_390
+        .short F_0_541
+        .short F_0_765
+        .short - F_0_899
+        .short F_1_175
+        .short F_1_501
+        .short - F_1_847
+        .short - F_1_961
+        .short F_2_053
+        .short - F_2_562
+        .short F_3_072
+        .short 0          /* padding */
+        .short 0
+        .short 0
+        .short 0
+endconst
+
+#undef F_0_298
+#undef F_0_390
+#undef F_0_541
+#undef F_0_765
+#undef F_0_899
+#undef F_1_175
+#undef F_1_501
+#undef F_1_847
+#undef F_1_961
+#undef F_2_053
+#undef F_2_562
+#undef F_3_072
+
+/*****************************************************************************/
+
+/*
+ * jsimd_fdct_islow_neon
+ *
+ * This file contains a slower but more accurate integer implementation of the
+ * forward DCT (Discrete Cosine Transform). The following code is based
+ * directly on the IJG''s original jfdctint.c; see the jfdctint.c for
+ * more details.
+ */
+
+#define CONST_BITS  13
+#ifdef EIGHT_BIT_SAMPLES
+#define PASS1_BITS  2
+#else
+#define PASS1_BITS  1   /* lose a little precision to avoid overflow */
+#endif
+
+#define DESCALE_P1  (CONST_BITS - PASS1_BITS)
+#define DESCALE_P2  (CONST_BITS + PASS1_BITS)
+
+#define XFIX_P_0_298  v0.h[0]
+#define XFIX_N_0_390  v0.h[1]
+#define XFIX_P_0_541  v0.h[2]
+#define XFIX_P_0_765  v0.h[3]
+#define XFIX_N_0_899  v0.h[4]
+#define XFIX_P_1_175  v0.h[5]
+#define XFIX_P_1_501  v0.h[6]
+#define XFIX_N_1_847  v0.h[7]
+#define XFIX_N_1_961  v1.h[0]
+#define XFIX_P_2_053  v1.h[1]
+#define XFIX_N_2_562  v1.h[2]
+#define XFIX_P_3_072  v1.h[3]
+
+function ff_fdct_neon, export=1
+
+        DATA            .req x0
+        TMP             .req x9
+
+        /* Load constants */
+        movrel          TMP, jsimd_fdct_islow_neon_consts
+        ld1             {v0.8h, v1.8h}, [TMP]
+
+        /* Load all DATA into Neon registers with the following allocation:
+         *       0 1 2 3 | 4 5 6 7
+         *      ---------+--------
+         *   0 | d16     | d17    | v16.8h
+         *   1 | d18     | d19    | v17.8h
+         *   2 | d20     | d21    | v18.8h
+         *   3 | d22     | d23    | v19.8h
+         *   4 | d24     | d25    | v20.8h
+         *   5 | d26     | d27    | v21.8h
+         *   6 | d28     | d29    | v22.8h
+         *   7 | d30     | d31    | v23.8h
+         */
+
+        ld1             {v16.8h, v17.8h, v18.8h, v19.8h}, [DATA], 64
+        ld1             {v20.8h, v21.8h, v22.8h, v23.8h}, [DATA]
+        sub             DATA, DATA, #64
+
+        /* Transpose */
+        transpose_8x8H  v16, v17, v18, v19, v20, v21, v22, v23, v31, v2
+
+        /* 1-D FDCT */
+        add             v24.8h, v16.8h, v23.8h  /* tmp0 = dataptr[0] + dataptr[7]; */
+        sub             v31.8h, v16.8h, v23.8h  /* tmp7 = dataptr[0] - dataptr[7]; */
+        add             v25.8h, v17.8h, v22.8h  /* tmp1 = dataptr[1] + dataptr[6]; */
+        sub             v30.8h, v17.8h, v22.8h  /* tmp6 = dataptr[1] - dataptr[6]; */
+        add             v26.8h, v18.8h, v21.8h  /* tmp2 = dataptr[2] + dataptr[5]; */
+        sub             v29.8h, v18.8h, v21.8h  /* tmp5 = dataptr[2] - dataptr[5]; */
+        add             v27.8h, v19.8h, v20.8h  /* tmp3 = dataptr[3] + dataptr[4]; */
+        sub             v28.8h, v19.8h, v20.8h  /* tmp4 = dataptr[3] - dataptr[4]; */
+
+        /* Even part */
+        add             v4.8h, v24.8h, v27.8h   /* tmp10 = tmp0 + tmp3; */
+        sub             v5.8h, v24.8h, v27.8h   /* tmp13 = tmp0 - tmp3; */
+        add             v6.8h, v25.8h, v26.8h   /* tmp11 = tmp1 + tmp2; */
+        sub             v7.8h, v25.8h, v26.8h   /* tmp12 = tmp1 - tmp2; */
+
+        add             v16.8h, v4.8h, v6.8h   /* tmp10 + tmp11 */
+        sub             v20.8h, v4.8h, v6.8h   /* tmp10 - tmp11 */
+
+        add             v18.8h, v7.8h, v5.8h   /* tmp12 + tmp13 */
+
+        shl             v16.8h, v16.8h, #PASS1_BITS  /* dataptr[0] = (DCTELEM)LEFT_SHIFT(tmp10 + tmp11, PASS1_BITS); */
+        shl             v20.8h, v20.8h, #PASS1_BITS  /* dataptr[4] = (DCTELEM)LEFT_SHIFT(tmp10 - tmp11, PASS1_BITS); */
+
+        smull2          v24.4s, v18.8h, XFIX_P_0_541  /* z1 hi = MULTIPLY(tmp12 + tmp13, XFIX_P_0_541); */
+        smull           v18.4s, v18.4h, XFIX_P_0_541  /* z1 lo = MULTIPLY(tmp12 + tmp13, XFIX_P_0_541); */
+        mov             v22.16b, v18.16b
+        mov             v25.16b, v24.16b
+
+        smlal           v18.4s, v5.4h, XFIX_P_0_765   /* lo z1 + MULTIPLY(tmp13, XFIX_P_0_765) */
+        smlal2          v24.4s, v5.8h, XFIX_P_0_765   /* hi z1 + MULTIPLY(tmp13, XFIX_P_0_765) */
+        smlal           v22.4s, v7.4h, XFIX_N_1_847   /* lo z1 + MULTIPLY(tmp12, XFIX_N_1_847) */
+        smlal2          v25.4s, v7.8h, XFIX_N_1_847   /* hi z1 + MULTIPLY(tmp12, XFIX_N_1_847) */
+
+        rshrn           v18.4h, v18.4s, #DESCALE_P1
+        rshrn           v22.4h, v22.4s, #DESCALE_P1
+        rshrn2          v18.8h, v24.4s, #DESCALE_P1  /* dataptr[2] = (DCTELEM)DESCALE(z1 + MULTIPLY(tmp13, XFIX_P_0_765), CONST_BITS-PASS1_BITS); */
+        rshrn2          v22.8h, v25.4s, #DESCALE_P1  /* dataptr[6] = (DCTELEM)DESCALE(z1 + MULTIPLY(tmp12, XFIX_N_1_847), CONST_BITS-PASS1_BITS); */
+
+        /* Odd part */
+        add             v2.8h, v28.8h, v31.8h        /* z1 = tmp4 + tmp7; */
+        add             v3.8h, v29.8h, v30.8h        /* z2 = tmp5 + tmp6; */
+        add             v6.8h, v28.8h, v30.8h        /* z3 = tmp4 + tmp6; */
+        add             v7.8h, v29.8h, v31.8h        /* z4 = tmp5 + tmp7; */
+        smull           v4.4s, v6.4h, XFIX_P_1_175   /* z5 lo = z3 lo * XFIX_P_1_175 */
+        smull2          v5.4s, v6.8h, XFIX_P_1_175
+        smlal           v4.4s, v7.4h, XFIX_P_1_175   /* z5 = MULTIPLY(z3 + z4, FIX_1_175875602); */
+        smlal2          v5.4s, v7.8h, XFIX_P_1_175
+
+        smull2          v24.4s, v28.8h, XFIX_P_0_298
+        smull2          v25.4s, v29.8h, XFIX_P_2_053
+        smull2          v26.4s, v30.8h, XFIX_P_3_072
+        smull2          v27.4s, v31.8h, XFIX_P_1_501
+        smull           v23.4s, v28.4h, XFIX_P_0_298  /* tmp4 = MULTIPLY(tmp4, FIX_0_298631336); */
+        smull           v21.4s, v29.4h, XFIX_P_2_053  /* tmp5 = MULTIPLY(tmp5, FIX_2_053119869); */
+        smull           v19.4s, v30.4h, XFIX_P_3_072  /* tmp6 = MULTIPLY(tmp6, FIX_3_072711026); */
+        smull           v17.4s, v31.4h, XFIX_P_1_501  /* tmp7 = MULTIPLY(tmp7, FIX_1_501321110); */
+
+        smull2          v28.4s, v2.8h, XFIX_N_0_899
+        smull2          v29.4s, v3.8h, XFIX_N_2_562
+        smull2          v30.4s, v6.8h, XFIX_N_1_961
+        smull2          v31.4s, v7.8h, XFIX_N_0_390
+        smull           v2.4s, v2.4h, XFIX_N_0_899    /* z1 = MULTIPLY(z1, -FIX_0_899976223); */
+        smull           v3.4s, v3.4h, XFIX_N_2_562    /* z2 = MULTIPLY(z2, -FIX_2_562915447); */
+        smull           v6.4s, v6.4h, XFIX_N_1_961    /* z3 = MULTIPLY(z3, -FIX_1_961570560); */
+        smull           v7.4s, v7.4h, XFIX_N_0_390    /* z4 = MULTIPLY(z4, -FIX_0_390180644); */
+
+        add             v6.4s, v6.4s, v4.4s    /* z3 += z5 */
+        add             v30.4s, v30.4s, v5.4s
+        add             v7.4s, v7.4s, v4.4s    /* z4 += z5 */
+        add             v31.4s, v31.4s, v5.4s
+
+        add             v23.4s, v23.4s, v2.4s   /* tmp4 += z1 */
+        add             v24.4s, v24.4s, v28.4s
+        add             v21.4s, v21.4s, v3.4s   /* tmp5 += z2 */
+        add             v25.4s, v25.4s, v29.4s
+        add             v19.4s, v19.4s, v6.4s   /* tmp6 += z3 */
+        add             v26.4s, v26.4s, v30.4s
+        add             v17.4s, v17.4s, v7.4s   /* tmp7 += z4 */
+        add             v27.4s, v27.4s, v31.4s
+
+        add             v23.4s, v23.4s, v6.4s   /* tmp4 += z3 */
+        add             v24.4s, v24.4s, v30.4s
+        add             v21.4s, v21.4s, v7.4s   /* tmp5 += z4 */
+        add             v25.4s, v25.4s, v31.4s
+        add             v19.4s, v19.4s, v3.4s   /* tmp6 += z2 */
+        add             v26.4s, v26.4s, v29.4s
+        add             v17.4s, v17.4s, v2.4s   /* tmp7 += z1 */
+        add             v27.4s, v27.4s, v28.4s
+
+        rshrn           v23.4h, v23.4s, #DESCALE_P1
+        rshrn           v21.4h, v21.4s, #DESCALE_P1
+        rshrn           v19.4h, v19.4s, #DESCALE_P1
+        rshrn           v17.4h, v17.4s, #DESCALE_P1
+        rshrn2          v23.8h, v24.4s, #DESCALE_P1  /* dataptr[7] = (DCTELEM)DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS); */
+        rshrn2          v21.8h, v25.4s, #DESCALE_P1  /* dataptr[5] = (DCTELEM)DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS); */
+        rshrn2          v19.8h, v26.4s, #DESCALE_P1  /* dataptr[3] = (DCTELEM)DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS); */
+        rshrn2          v17.8h, v27.4s, #DESCALE_P1  /* dataptr[1] = (DCTELEM)DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS); */
+
+        /* Transpose */
+        transpose_8x8H  v16, v17, v18, v19, v20, v21, v22, v23, v31, v2
+
+        /* 1-D FDCT */
+        add             v24.8h, v16.8h, v23.8h  /* tmp0 = dataptr[0] + dataptr[7]; */
+        sub             v31.8h, v16.8h, v23.8h  /* tmp7 = dataptr[0] - dataptr[7]; */
+        add             v25.8h, v17.8h, v22.8h  /* tmp1 = dataptr[1] + dataptr[6]; */
+        sub             v30.8h, v17.8h, v22.8h  /* tmp6 = dataptr[1] - dataptr[6]; */
+        add             v26.8h, v18.8h, v21.8h  /* tmp2 = dataptr[2] + dataptr[5]; */
+        sub             v29.8h, v18.8h, v21.8h  /* tmp5 = dataptr[2] - dataptr[5]; */
+        add             v27.8h, v19.8h, v20.8h  /* tmp3 = dataptr[3] + dataptr[4]; */
+        sub             v28.8h, v19.8h, v20.8h  /* tmp4 = dataptr[3] - dataptr[4]; */
+
+        /* Even part */
+        add             v4.8h, v24.8h, v27.8h   /* tmp10 = tmp0 + tmp3; */
+        sub             v5.8h, v24.8h, v27.8h   /* tmp13 = tmp0 - tmp3; */
+        add             v6.8h, v25.8h, v26.8h   /* tmp11 = tmp1 + tmp2; */
+        sub             v7.8h, v25.8h, v26.8h   /* tmp12 = tmp1 - tmp2; */
+
+        add             v16.8h, v4.8h, v6.8h   /* tmp10 + tmp11 */
+        sub             v20.8h, v4.8h, v6.8h   /* tmp10 - tmp11 */
+
+        add             v18.8h, v7.8h, v5.8h   /* tmp12 + tmp13 */
+
+        srshr           v16.8h, v16.8h, #PASS1_BITS  /* dataptr[0] = (DCTELEM)DESCALE(tmp10 + tmp11, PASS1_BITS); */
+        srshr           v20.8h, v20.8h, #PASS1_BITS  /* dataptr[4] = (DCTELEM)DESCALE(tmp10 - tmp11, PASS1_BITS); */
+
+        smull2          v24.4s, v18.8h, XFIX_P_0_541  /* z1 hi = MULTIPLY(tmp12 + tmp13, XFIX_P_0_541); */
+        smull           v18.4s, v18.4h, XFIX_P_0_541  /* z1 lo = MULTIPLY(tmp12 + tmp13, XFIX_P_0_541); */
+        mov             v22.16b, v18.16b
+        mov             v25.16b, v24.16b
+
+        smlal           v18.4s, v5.4h, XFIX_P_0_765   /* lo z1 + MULTIPLY(tmp13, XFIX_P_0_765) */
+        smlal2          v24.4s, v5.8h, XFIX_P_0_765   /* hi z1 + MULTIPLY(tmp13, XFIX_P_0_765) */
+        smlal           v22.4s, v7.4h, XFIX_N_1_847   /* lo z1 + MULTIPLY(tmp12, XFIX_N_1_847) */
+        smlal2          v25.4s, v7.8h, XFIX_N_1_847   /* hi z1 + MULTIPLY(tmp12, XFIX_N_1_847) */
+
+        rshrn           v18.4h, v18.4s, #DESCALE_P2
+        rshrn           v22.4h, v22.4s, #DESCALE_P2
+        rshrn2          v18.8h, v24.4s, #DESCALE_P2  /* dataptr[2] = (DCTELEM)DESCALE(z1 + MULTIPLY(tmp13, XFIX_P_0_765), CONST_BITS+PASS1_BITS); */
+        rshrn2          v22.8h, v25.4s, #DESCALE_P2  /* dataptr[6] = (DCTELEM)DESCALE(z1 + MULTIPLY(tmp12, XFIX_N_1_847), CONST_BITS+PASS1_BITS); */
+
+        /* Odd part */
+        add             v2.8h, v28.8h, v31.8h   /* z1 = tmp4 + tmp7; */
+        add             v3.8h, v29.8h, v30.8h   /* z2 = tmp5 + tmp6; */
+        add             v6.8h, v28.8h, v30.8h   /* z3 = tmp4 + tmp6; */
+        add             v7.8h, v29.8h, v31.8h   /* z4 = tmp5 + tmp7; */
+
+        smull           v4.4s, v6.4h, XFIX_P_1_175   /* z5 lo = z3 lo * XFIX_P_1_175 */
+        smull2          v5.4s, v6.8h, XFIX_P_1_175
+        smlal           v4.4s, v7.4h, XFIX_P_1_175   /* z5 = MULTIPLY(z3 + z4, FIX_1_175875602); */
+        smlal2          v5.4s, v7.8h, XFIX_P_1_175
+
+        smull2          v24.4s, v28.8h, XFIX_P_0_298
+        smull2          v25.4s, v29.8h, XFIX_P_2_053
+        smull2          v26.4s, v30.8h, XFIX_P_3_072
+        smull2          v27.4s, v31.8h, XFIX_P_1_501
+        smull           v23.4s, v28.4h, XFIX_P_0_298  /* tmp4 = MULTIPLY(tmp4, FIX_0_298631336); */
+        smull           v21.4s, v29.4h, XFIX_P_2_053  /* tmp5 = MULTIPLY(tmp5, FIX_2_053119869); */
+        smull           v19.4s, v30.4h, XFIX_P_3_072  /* tmp6 = MULTIPLY(tmp6, FIX_3_072711026); */
+        smull           v17.4s, v31.4h, XFIX_P_1_501  /* tmp7 = MULTIPLY(tmp7, FIX_1_501321110); */
+
+        smull2          v28.4s, v2.8h, XFIX_N_0_899
+        smull2          v29.4s, v3.8h, XFIX_N_2_562
+        smull2          v30.4s, v6.8h, XFIX_N_1_961
+        smull2          v31.4s, v7.8h, XFIX_N_0_390
+        smull           v2.4s, v2.4h, XFIX_N_0_899    /* z1 = MULTIPLY(z1, -FIX_0_899976223); */
+        smull           v3.4s, v3.4h, XFIX_N_2_562    /* z2 = MULTIPLY(z2, -FIX_2_562915447); */
+        smull           v6.4s, v6.4h, XFIX_N_1_961    /* z3 = MULTIPLY(z3, -FIX_1_961570560); */
+        smull           v7.4s, v7.4h, XFIX_N_0_390    /* z4 = MULTIPLY(z4, -FIX_0_390180644); */
+
+        add             v6.4s, v6.4s, v4.4s    /* z3 += z5 */
+        add             v30.4s, v30.4s, v5.4s
+        add             v7.4s, v7.4s, v4.4s    /* z4 += z5 */
+        add             v31.4s, v31.4s, v5.4s
+
+        add             v23.4s, v23.4s, v2.4s   /* tmp4 += z1 */
+        add             v24.4s, v24.4s, v28.4s
+        add             v21.4s, v21.4s, v3.4s   /* tmp5 += z2 */
+        add             v25.4s, v25.4s, v29.4s
+        add             v19.4s, v19.4s, v6.4s   /* tmp6 += z3 */
+        add             v26.4s, v26.4s, v30.4s
+        add             v17.4s, v17.4s, v7.4s   /* tmp7 += z4 */
+        add             v27.4s, v27.4s, v31.4s
+
+        add             v23.4s, v23.4s, v6.4s   /* tmp4 += z3 */
+        add             v24.4s, v24.4s, v30.4s
+        add             v21.4s, v21.4s, v7.4s   /* tmp5 += z4 */
+        add             v25.4s, v25.4s, v31.4s
+        add             v19.4s, v19.4s, v3.4s   /* tmp6 += z2 */
+        add             v26.4s, v26.4s, v29.4s
+        add             v17.4s, v17.4s, v2.4s   /* tmp7 += z1 */
+        add             v27.4s, v27.4s, v28.4s
+
+        rshrn           v23.4h, v23.4s, #DESCALE_P2
+        rshrn           v21.4h, v21.4s, #DESCALE_P2
+        rshrn           v19.4h, v19.4s, #DESCALE_P2
+        rshrn           v17.4h, v17.4s, #DESCALE_P2
+        rshrn2          v23.8h, v24.4s, #DESCALE_P2  /* dataptr[7] = (DCTELEM)DESCALE(tmp4 + z1 + z3, CONST_BITS+PASS1_BITS); */
+        rshrn2          v21.8h, v25.4s, #DESCALE_P2  /* dataptr[5] = (DCTELEM)DESCALE(tmp5 + z2 + z4, CONST_BITS+PASS1_BITS); */
+        rshrn2          v19.8h, v26.4s, #DESCALE_P2  /* dataptr[3] = (DCTELEM)DESCALE(tmp6 + z2 + z3, CONST_BITS+PASS1_BITS); */
+        rshrn2          v17.8h, v27.4s, #DESCALE_P2  /* dataptr[1] = (DCTELEM)DESCALE(tmp7 + z1 + z4, CONST_BITS+PASS1_BITS); */
+
+        /* Store results */
+        st1             {v16.8h, v17.8h, v18.8h, v19.8h}, [DATA], 64
+        st1             {v20.8h, v21.8h, v22.8h, v23.8h}, [DATA]
+
+        ret
+
+        .unreq          DATA
+        .unreq          TMP
+endfunc
+
+#undef XFIX_P_0_298
+#undef XFIX_N_0_390
+#undef XFIX_P_0_541
+#undef XFIX_P_0_765
+#undef XFIX_N_0_899
+#undef XFIX_P_1_175
+#undef XFIX_P_1_501
+#undef XFIX_N_1_847
+#undef XFIX_N_1_961
+#undef XFIX_P_2_053
+#undef XFIX_N_2_562
+#undef XFIX_P_3_072
diff --git a/libavcodec/aarch64/idctdsp_init_aarch64.c b/libavcodec/aarch64/idctdsp_init_aarch64.c
index eec21aa5a2..8efd5f5323 100644
--- a/libavcodec/aarch64/idctdsp_init_aarch64.c
+++ b/libavcodec/aarch64/idctdsp_init_aarch64.c
@@ -22,7 +22,7 @@
 
 #include "libavutil/attributes.h"
 #include "libavutil/cpu.h"
-#include "libavutil/arm/cpu.h"
+#include "libavutil/aarch64/cpu.h"
 #include "libavcodec/avcodec.h"
 #include "libavcodec/idctdsp.h"
 #include "idct.h"
diff --git a/libavcodec/aarch64/opusdsp_init.c b/libavcodec/aarch64/opusdsp_init.c
index bb6d71b66b..a727006593 100644
--- a/libavcodec/aarch64/opusdsp_init.c
+++ b/libavcodec/aarch64/opusdsp_init.c
@@ -23,7 +23,7 @@
 #include "libavcodec/opusdsp.h"
 
 void ff_opus_postfilter_neon(float *data, int period, float *gains, int len);
-float ff_opus_deemphasis_neon(float *out, float *in, float coeff, int len);
+float ff_opus_deemphasis_neon(float *out, float *in, float coeff, const float *weights, int len);
 
 av_cold void ff_opus_dsp_init_aarch64(OpusDSP *ctx)
 {
diff --git a/libavcodec/aarch64/opusdsp_neon.S b/libavcodec/aarch64/opusdsp_neon.S
index e933151ab4..253825aa61 100644
--- a/libavcodec/aarch64/opusdsp_neon.S
+++ b/libavcodec/aarch64/opusdsp_neon.S
@@ -18,29 +18,11 @@
 
 #include "libavutil/aarch64/asm.S"
 
-           // 0.85..^1    0.85..^2    0.85..^3    0.85..^4
-const tab_st, align=4
-        .word 0x3f599a00, 0x3f38f671, 0x3f1d382a, 0x3f05a32f
-endconst
-const tab_x0, align=4
-        .word 0x0,        0x3f599a00, 0x3f38f671, 0x3f1d382a
-endconst
-const tab_x1, align=4
-        .word 0x0,        0x0,        0x3f599a00, 0x3f38f671
-endconst
-const tab_x2, align=4
-        .word 0x0,        0x0,        0x0,        0x3f599a00
-endconst
-
 function ff_opus_deemphasis_neon, export=1
-        movrel          x4, tab_st
-        ld1             {v4.4s}, [x4]
-        movrel          x4, tab_x0
-        ld1             {v5.4s}, [x4]
-        movrel          x4, tab_x1
-        ld1             {v6.4s}, [x4]
-        movrel          x4, tab_x2
-        ld1             {v7.4s}, [x4]
+        ld1             {v4.4s}, [x2], #16
+        ld1             {v5.4s}, [x2], #16
+        ld1             {v6.4s}, [x2], #16
+        ld1             {v7.4s}, [x2]
 
         fmul            v0.4s, v4.4s, v0.s[0]
 
@@ -63,7 +45,7 @@ function ff_opus_deemphasis_neon, export=1
         st1             {v1.4s, v2.4s}, [x0], #32
         fmul            v0.4s, v4.4s, v2.s[3]
 
-        subs            w2, w2, #8
+        subs            w3, w3, #8
         b.gt            1b
 
         mov             s0, v2.s[3]
diff --git a/libavcodec/ac3_parser.c b/libavcodec/ac3_parser.c
index 4152fd4e01..69989690dd 100644
--- a/libavcodec/ac3_parser.c
+++ b/libavcodec/ac3_parser.c
@@ -81,12 +81,12 @@ int ff_ac3_parse_header(GetBitContext *gbc, AC3HeaderInfo *hdr)
 
     hdr->sync_word = get_bits(gbc, 16);
     if(hdr->sync_word != 0x0B77)
-        return AAC_AC3_PARSE_ERROR_SYNC;
+        return AC3_PARSE_ERROR_SYNC;
 
     /* read ahead to bsid to distinguish between AC-3 and E-AC-3 */
     hdr->bitstream_id = show_bits_long(gbc, 29) & 0x1F;
     if(hdr->bitstream_id > 16)
-        return AAC_AC3_PARSE_ERROR_BSID;
+        return AC3_PARSE_ERROR_BSID;
 
     hdr->num_blocks = 6;
     hdr->ac3_bit_rate_code = -1;
@@ -103,11 +103,11 @@ int ff_ac3_parse_header(GetBitContext *gbc, AC3HeaderInfo *hdr)
         hdr->crc1 = get_bits(gbc, 16);
         hdr->sr_code = get_bits(gbc, 2);
         if(hdr->sr_code == 3)
-            return AAC_AC3_PARSE_ERROR_SAMPLE_RATE;
+            return AC3_PARSE_ERROR_SAMPLE_RATE;
 
         frame_size_code = get_bits(gbc, 6);
         if(frame_size_code > 37)
-            return AAC_AC3_PARSE_ERROR_FRAME_SIZE;
+            return AC3_PARSE_ERROR_FRAME_SIZE;
 
         hdr->ac3_bit_rate_code = (frame_size_code >> 1);
 
@@ -138,19 +138,19 @@ int ff_ac3_parse_header(GetBitContext *gbc, AC3HeaderInfo *hdr)
         hdr->crc1 = 0;
         hdr->frame_type = get_bits(gbc, 2);
         if(hdr->frame_type == EAC3_FRAME_TYPE_RESERVED)
-            return AAC_AC3_PARSE_ERROR_FRAME_TYPE;
+            return AC3_PARSE_ERROR_FRAME_TYPE;
 
         hdr->substreamid = get_bits(gbc, 3);
 
         hdr->frame_size = (get_bits(gbc, 11) + 1) << 1;
         if(hdr->frame_size < AC3_HEADER_SIZE)
-            return AAC_AC3_PARSE_ERROR_FRAME_SIZE;
+            return AC3_PARSE_ERROR_FRAME_SIZE;
 
         hdr->sr_code = get_bits(gbc, 2);
         if (hdr->sr_code == 3) {
             int sr_code2 = get_bits(gbc, 2);
             if(sr_code2 == 3)
-                return AAC_AC3_PARSE_ERROR_SAMPLE_RATE;
+                return AC3_PARSE_ERROR_SAMPLE_RATE;
             hdr->sample_rate = ff_ac3_sample_rate_tab[sr_code2] / 2;
             hdr->sr_shift = 1;
         } else {
@@ -204,7 +204,9 @@ int av_ac3_parse_header(const uint8_t *buf, size_t size,
     AC3HeaderInfo hdr;
     int err;
 
-    init_get_bits8(&gb, buf, size);
+    err = init_get_bits8(&gb, buf, size);
+    if (err < 0)
+        return AVERROR_INVALIDDATA;
     err = ff_ac3_parse_header(&gb, &hdr);
     if (err < 0)
         return AVERROR_INVALIDDATA;
diff --git a/libavcodec/ac3_parser_internal.h b/libavcodec/ac3_parser_internal.h
index 2ac0e67ec2..46814bfb1f 100644
--- a/libavcodec/ac3_parser_internal.h
+++ b/libavcodec/ac3_parser_internal.h
@@ -64,15 +64,22 @@ typedef struct AC3HeaderInfo {
     /** @} */
 } AC3HeaderInfo;
 
+typedef enum {
+    AC3_PARSE_ERROR_SYNC        = -0x1030c0a,
+    AC3_PARSE_ERROR_BSID        = -0x2030c0a,
+    AC3_PARSE_ERROR_SAMPLE_RATE = -0x3030c0a,
+    AC3_PARSE_ERROR_FRAME_SIZE  = -0x4030c0a,
+    AC3_PARSE_ERROR_FRAME_TYPE  = -0x5030c0a,
+    AC3_PARSE_ERROR_CRC         = -0x6030c0a,
+} AC3ParseError;
+
 /**
  * Parse AC-3 frame header.
  * Parse the header up to the lfeon element, which is the first 52 or 54 bits
  * depending on the audio coding mode.
  * @param[in]  gbc BitContext containing the first 54 bits of the frame.
  * @param[out] hdr Pointer to struct where header info is written.
- * @return Returns 0 on success, -1 if there is a sync word mismatch,
- * -2 if the bsid (version) element is invalid, -3 if the fscod (sample rate)
- * element is invalid, or -4 if the frmsizecod (bit rate) element is invalid.
+ * @return 0 on success and AC3_PARSE_ERROR_* values otherwise.
  */
 int ff_ac3_parse_header(GetBitContext *gbc, AC3HeaderInfo *hdr);
 
diff --git a/libavcodec/ac3dec.c b/libavcodec/ac3dec.c
index 2d7e11c5b8..0a4d3375ee 100644
--- a/libavcodec/ac3dec.c
+++ b/libavcodec/ac3dec.c
@@ -39,7 +39,6 @@
 #include "libavutil/opt.h"
 #include "libavutil/thread.h"
 #include "bswapdsp.h"
-#include "aac_ac3_parser.h"
 #include "ac3_parser_internal.h"
 #include "ac3dec.h"
 #include "ac3dec_data.h"
@@ -1538,19 +1537,19 @@ dependent_frame:
 
     if (err) {
         switch (err) {
-        case AAC_AC3_PARSE_ERROR_SYNC:
+        case AC3_PARSE_ERROR_SYNC:
             av_log(avctx, AV_LOG_ERROR, "frame sync error\n");
             return AVERROR_INVALIDDATA;
-        case AAC_AC3_PARSE_ERROR_BSID:
+        case AC3_PARSE_ERROR_BSID:
             av_log(avctx, AV_LOG_ERROR, "invalid bitstream id\n");
             break;
-        case AAC_AC3_PARSE_ERROR_SAMPLE_RATE:
+        case AC3_PARSE_ERROR_SAMPLE_RATE:
             av_log(avctx, AV_LOG_ERROR, "invalid sample rate\n");
             break;
-        case AAC_AC3_PARSE_ERROR_FRAME_SIZE:
+        case AC3_PARSE_ERROR_FRAME_SIZE:
             av_log(avctx, AV_LOG_ERROR, "invalid frame size\n");
             break;
-        case AAC_AC3_PARSE_ERROR_FRAME_TYPE:
+        case AC3_PARSE_ERROR_FRAME_TYPE:
             /* skip frame if CRC is ok. otherwise use error concealment. */
             /* TODO: add support for substreams */
             if (s->substreamid) {
@@ -1563,8 +1562,7 @@ dependent_frame:
                 av_log(avctx, AV_LOG_ERROR, "invalid frame type\n");
             }
             break;
-        case AAC_AC3_PARSE_ERROR_CRC:
-        case AAC_AC3_PARSE_ERROR_CHANNEL_CFG:
+        case AC3_PARSE_ERROR_CRC:
             break;
         default: // Normal AVERROR do not try to recover.
             *got_frame_ptr = 0;
@@ -1574,7 +1572,7 @@ dependent_frame:
         /* check that reported frame size fits in input buffer */
         if (s->frame_size > buf_size) {
             av_log(avctx, AV_LOG_ERROR, "incomplete frame\n");
-            err = AAC_AC3_PARSE_ERROR_FRAME_SIZE;
+            err = AC3_PARSE_ERROR_FRAME_SIZE;
         } else if (avctx->err_recognition & (AV_EF_CRCCHECK|AV_EF_CAREFUL)) {
             /* check for crc mismatch */
             if (av_crc(av_crc_get_table(AV_CRC_16_ANSI), 0, &buf[2],
@@ -1582,7 +1580,7 @@ dependent_frame:
                 av_log(avctx, AV_LOG_ERROR, "frame CRC mismatch\n");
                 if (avctx->err_recognition & AV_EF_EXPLODE)
                     return AVERROR_INVALIDDATA;
-                err = AAC_AC3_PARSE_ERROR_CRC;
+                err = AC3_PARSE_ERROR_CRC;
             }
         }
     }
diff --git a/libavcodec/ac3dsp.c b/libavcodec/ac3dsp.c
index 8397e03d32..730fa70fff 100644
--- a/libavcodec/ac3dsp.c
+++ b/libavcodec/ac3dsp.c
@@ -389,7 +389,9 @@ av_cold void ff_ac3dsp_init(AC3DSPContext *c)
     c->downmix               = NULL;
     c->downmix_fixed         = NULL;
 
-#if ARCH_ARM
+#if ARCH_AARCH64
+    ff_ac3dsp_init_aarch64(c);
+#elif ARCH_ARM
     ff_ac3dsp_init_arm(c);
 #elif ARCH_X86
     ff_ac3dsp_init_x86(c);
diff --git a/libavcodec/ac3dsp.h b/libavcodec/ac3dsp.h
index ae33b361a9..b1b2bced8f 100644
--- a/libavcodec/ac3dsp.h
+++ b/libavcodec/ac3dsp.h
@@ -106,7 +106,8 @@ typedef struct AC3DSPContext {
     void (*downmix_fixed)(int32_t **samples, int16_t **matrix, int len);
 } AC3DSPContext;
 
-void ff_ac3dsp_init    (AC3DSPContext *c);
+void ff_ac3dsp_init(AC3DSPContext *c);
+void ff_ac3dsp_init_aarch64(AC3DSPContext *c);
 void ff_ac3dsp_init_arm(AC3DSPContext *c);
 void ff_ac3dsp_init_x86(AC3DSPContext *c);
 void ff_ac3dsp_init_mips(AC3DSPContext *c);
diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c
index 7a6bcf7900..3649289865 100644
--- a/libavcodec/ac3enc.c
+++ b/libavcodec/ac3enc.c
@@ -30,7 +30,6 @@
 
 #include "libavutil/attributes.h"
 #include "libavutil/avassert.h"
-#include "libavutil/avstring.h"
 #include "libavutil/channel_layout.h"
 #include "libavutil/crc.h"
 #include "libavutil/emms.h"
@@ -53,6 +52,9 @@
 #include "ac3enc.h"
 #include "eac3enc.h"
 
+#define SAMPLETYPE_SIZE(ctx) (sizeof(float) == sizeof(int32_t) ? sizeof(float) : \
+                                  (ctx)->fixed_point ? sizeof(int32_t) : sizeof(float))
+
 typedef struct AC3Mant {
     int16_t *qmant1_ptr, *qmant2_ptr, *qmant4_ptr; ///< mantissa pointers for bap=1,2,4
     int mant1_cnt, mant2_cnt, mant4_cnt;    ///< mantissa counts for bap=1,2,4
@@ -274,13 +276,222 @@ static const int8_t ac3_coupling_start_tab[6][3][19] = {
 };
 
 
+#define FLT_OPTION_THRESHOLD 0.01
+
+static int validate_float_option(float v, const float *v_list, int v_list_size)
+{
+    int i;
+
+    for (i = 0; i < v_list_size; i++) {
+        if (v < (v_list[i] + FLT_OPTION_THRESHOLD) &&
+            v > (v_list[i] - FLT_OPTION_THRESHOLD))
+            break;
+    }
+    if (i == v_list_size)
+        return AVERROR(EINVAL);
+
+    return i;
+}
+
+
+static void validate_mix_level(void *log_ctx, const char *opt_name,
+                               float *opt_param, const float *list,
+                               int list_size, int default_value, int min_value,
+                               int *ctx_param)
+{
+    int mixlev = validate_float_option(*opt_param, list, list_size);
+    if (mixlev < min_value) {
+        mixlev = default_value;
+        if (*opt_param >= 0.0) {
+            av_log(log_ctx, AV_LOG_WARNING, "requested %s is not valid. using "
+                   "default value: %0.3f\n", opt_name, list[mixlev]);
+        }
+    }
+    *opt_param = list[mixlev];
+    *ctx_param = mixlev;
+}
+
+
+/**
+ * Validate metadata options as set by AVOption system.
+ * These values can optionally be changed per-frame.
+ *
+ * @param s  AC-3 encoder private context
+ */
+static int ac3_validate_metadata(AC3EncodeContext *s)
+{
+    AVCodecContext *avctx = s->avctx;
+    AC3EncOptions *opt = &s->options;
+
+    opt->audio_production_info = 0;
+    opt->extended_bsi_1        = 0;
+    opt->extended_bsi_2        = 0;
+    opt->eac3_mixing_metadata  = 0;
+    opt->eac3_info_metadata    = 0;
+
+    /* determine mixing metadata / xbsi1 use */
+    if (s->channel_mode > AC3_CHMODE_STEREO && opt->preferred_stereo_downmix != AC3ENC_OPT_NONE) {
+        opt->extended_bsi_1       = 1;
+        opt->eac3_mixing_metadata = 1;
+    }
+    if (s->has_center &&
+        (opt->ltrt_center_mix_level >= 0 || opt->loro_center_mix_level >= 0)) {
+        opt->extended_bsi_1       = 1;
+        opt->eac3_mixing_metadata = 1;
+    }
+    if (s->has_surround &&
+        (opt->ltrt_surround_mix_level >= 0 || opt->loro_surround_mix_level >= 0)) {
+        opt->extended_bsi_1       = 1;
+        opt->eac3_mixing_metadata = 1;
+    }
+
+    if (s->eac3) {
+        /* determine info metadata use */
+        if (avctx->audio_service_type != AV_AUDIO_SERVICE_TYPE_MAIN)
+            opt->eac3_info_metadata = 1;
+        if (opt->copyright != AC3ENC_OPT_NONE || opt->original != AC3ENC_OPT_NONE)
+            opt->eac3_info_metadata = 1;
+        if (s->channel_mode == AC3_CHMODE_STEREO &&
+            (opt->dolby_headphone_mode != AC3ENC_OPT_NONE || opt->dolby_surround_mode != AC3ENC_OPT_NONE))
+            opt->eac3_info_metadata = 1;
+        if (s->channel_mode >= AC3_CHMODE_2F2R && opt->dolby_surround_ex_mode != AC3ENC_OPT_NONE)
+            opt->eac3_info_metadata = 1;
+        if (opt->mixing_level != AC3ENC_OPT_NONE || opt->room_type != AC3ENC_OPT_NONE ||
+            opt->ad_converter_type != AC3ENC_OPT_NONE) {
+            opt->audio_production_info = 1;
+            opt->eac3_info_metadata    = 1;
+        }
+    } else {
+        /* determine audio production info use */
+        if (opt->mixing_level != AC3ENC_OPT_NONE || opt->room_type != AC3ENC_OPT_NONE)
+            opt->audio_production_info = 1;
+
+        /* determine xbsi2 use */
+        if (s->channel_mode >= AC3_CHMODE_2F2R && opt->dolby_surround_ex_mode != AC3ENC_OPT_NONE)
+            opt->extended_bsi_2 = 1;
+        if (s->channel_mode == AC3_CHMODE_STEREO && opt->dolby_headphone_mode != AC3ENC_OPT_NONE)
+            opt->extended_bsi_2 = 1;
+        if (opt->ad_converter_type != AC3ENC_OPT_NONE)
+            opt->extended_bsi_2 = 1;
+    }
+
+    /* validate AC-3 mixing levels */
+    if (!s->eac3) {
+        if (s->has_center) {
+            validate_mix_level(avctx, "center_mix_level", &opt->center_mix_level,
+                               cmixlev_options, CMIXLEV_NUM_OPTIONS, 1, 0,
+                               &s->center_mix_level);
+        }
+        if (s->has_surround) {
+            validate_mix_level(avctx, "surround_mix_level", &opt->surround_mix_level,
+                               surmixlev_options, SURMIXLEV_NUM_OPTIONS, 1, 0,
+                               &s->surround_mix_level);
+        }
+    }
+
+    /* validate extended bsi 1 / mixing metadata */
+    if (opt->extended_bsi_1 || opt->eac3_mixing_metadata) {
+        /* default preferred stereo downmix */
+        if (opt->preferred_stereo_downmix == AC3ENC_OPT_NONE)
+            opt->preferred_stereo_downmix = AC3ENC_OPT_NOT_INDICATED;
+        if (!s->eac3 || s->has_center) {
+            /* validate Lt/Rt center mix level */
+            validate_mix_level(avctx, "ltrt_center_mix_level",
+                               &opt->ltrt_center_mix_level, extmixlev_options,
+                               EXTMIXLEV_NUM_OPTIONS, 5, 0,
+                               &s->ltrt_center_mix_level);
+            /* validate Lo/Ro center mix level */
+            validate_mix_level(avctx, "loro_center_mix_level",
+                               &opt->loro_center_mix_level, extmixlev_options,
+                               EXTMIXLEV_NUM_OPTIONS, 5, 0,
+                               &s->loro_center_mix_level);
+        }
+        if (!s->eac3 || s->has_surround) {
+            /* validate Lt/Rt surround mix level */
+            validate_mix_level(avctx, "ltrt_surround_mix_level",
+                               &opt->ltrt_surround_mix_level, extmixlev_options,
+                               EXTMIXLEV_NUM_OPTIONS, 6, 3,
+                               &s->ltrt_surround_mix_level);
+            /* validate Lo/Ro surround mix level */
+            validate_mix_level(avctx, "loro_surround_mix_level",
+                               &opt->loro_surround_mix_level, extmixlev_options,
+                               EXTMIXLEV_NUM_OPTIONS, 6, 3,
+                               &s->loro_surround_mix_level);
+        }
+    }
+
+    /* validate audio service type / channels combination */
+    if ((avctx->audio_service_type == AV_AUDIO_SERVICE_TYPE_KARAOKE &&
+         avctx->ch_layout.nb_channels == 1) ||
+        ((avctx->audio_service_type == AV_AUDIO_SERVICE_TYPE_COMMENTARY ||
+          avctx->audio_service_type == AV_AUDIO_SERVICE_TYPE_EMERGENCY  ||
+          avctx->audio_service_type == AV_AUDIO_SERVICE_TYPE_VOICE_OVER)
+         && avctx->ch_layout.nb_channels > 1)) {
+        av_log(avctx, AV_LOG_ERROR, "invalid audio service type for the "
+                                    "specified number of channels\n");
+        return AVERROR(EINVAL);
+    }
+
+    /* validate extended bsi 2 / info metadata */
+    if (opt->extended_bsi_2 || opt->eac3_info_metadata) {
+        /* default dolby headphone mode */
+        if (opt->dolby_headphone_mode == AC3ENC_OPT_NONE)
+            opt->dolby_headphone_mode = AC3ENC_OPT_NOT_INDICATED;
+        /* default dolby surround ex mode */
+        if (opt->dolby_surround_ex_mode == AC3ENC_OPT_NONE)
+            opt->dolby_surround_ex_mode = AC3ENC_OPT_NOT_INDICATED;
+        /* default A/D converter type */
+        if (opt->ad_converter_type == AC3ENC_OPT_NONE)
+            opt->ad_converter_type = AC3ENC_OPT_ADCONV_STANDARD;
+    }
+
+    /* copyright & original defaults */
+    if (!s->eac3 || opt->eac3_info_metadata) {
+        /* default copyright */
+        if (opt->copyright == AC3ENC_OPT_NONE)
+            opt->copyright = AC3ENC_OPT_OFF;
+        /* default original */
+        if (opt->original == AC3ENC_OPT_NONE)
+            opt->original = AC3ENC_OPT_ON;
+    }
+
+    /* dolby surround mode default */
+    if (!s->eac3 || opt->eac3_info_metadata) {
+        if (opt->dolby_surround_mode == AC3ENC_OPT_NONE)
+            opt->dolby_surround_mode = AC3ENC_OPT_NOT_INDICATED;
+    }
+
+    /* validate audio production info */
+    if (opt->audio_production_info) {
+        if (opt->mixing_level == AC3ENC_OPT_NONE) {
+            av_log(avctx, AV_LOG_ERROR, "mixing_level must be set if "
+                   "room_type is set\n");
+            return AVERROR(EINVAL);
+        }
+        if (opt->mixing_level < 80) {
+            av_log(avctx, AV_LOG_ERROR, "invalid mixing level. must be between "
+                   "80dB and 111dB\n");
+            return AVERROR(EINVAL);
+        }
+        /* default room type */
+        if (opt->room_type == AC3ENC_OPT_NONE)
+            opt->room_type = AC3ENC_OPT_NOT_INDICATED;
+    }
+
+    /* set bitstream id for alternate bitstream syntax */
+    if (!s->eac3 && (opt->extended_bsi_1 || opt->extended_bsi_2))
+        s->bitstream_id = 6;
+
+    return 0;
+}
+
 /**
  * Adjust the frame size to make the average bit rate match the target bit rate.
  * This is only needed for 11025, 22050, and 44100 sample rates or any E-AC-3.
  *
  * @param s  AC-3 encoder private context
  */
-void ff_ac3_adjust_frame_size(AC3EncodeContext *s)
+static void ac3_adjust_frame_size(AC3EncodeContext *s)
 {
     while (s->bits_written >= s->bit_rate && s->samples_written >= s->sample_rate) {
         s->bits_written    -= s->bit_rate;
@@ -292,7 +503,6 @@ void ff_ac3_adjust_frame_size(AC3EncodeContext *s)
     s->samples_written += AC3_BLOCK_SIZE * s->num_blocks;
 }
 
-
 /**
  * Set the initial coupling strategy parameters prior to coupling analysis.
  *
@@ -875,8 +1085,8 @@ static av_cold void bit_alloc_init(AC3EncodeContext *s)
     /* compute real values */
     /* currently none of these values change during encoding, so we can just
        set them once at initialization */
-    s->bit_alloc.slow_decay = ff_ac3_slow_decay_tab[s->slow_decay_code] >> s->bit_alloc.sr_shift;
-    s->bit_alloc.fast_decay = ff_ac3_fast_decay_tab[s->fast_decay_code] >> s->bit_alloc.sr_shift;
+    s->bit_alloc.slow_decay = ff_ac3_slow_decay_tab[s->slow_decay_code];
+    s->bit_alloc.fast_decay = ff_ac3_fast_decay_tab[s->fast_decay_code];
     s->bit_alloc.slow_gain  = ff_ac3_slow_gain_tab[s->slow_gain_code];
     s->bit_alloc.db_per_bit = ff_ac3_db_per_bit_tab[s->db_per_bit_code];
     s->bit_alloc.floor      = ff_ac3_floor_tab[s->floor_code];
@@ -1424,63 +1634,63 @@ static void ac3_quantize_mantissas(AC3EncodeContext *s)
 /*
  * Write the AC-3 frame header to the output bitstream.
  */
-static void ac3_output_frame_header(AC3EncodeContext *s)
+static void ac3_output_frame_header(AC3EncodeContext *s, PutBitContext *pb)
 {
     AC3EncOptions *opt = &s->options;
 
-    put_bits(&s->pb, 16, 0x0b77);   /* frame header */
-    put_bits(&s->pb, 16, 0);        /* crc1: will be filled later */
-    put_bits(&s->pb, 2,  s->bit_alloc.sr_code);
-    put_bits(&s->pb, 6,  s->frame_size_code + (s->frame_size - s->frame_size_min) / 2);
-    put_bits(&s->pb, 5,  s->bitstream_id);
-    put_bits(&s->pb, 3,  s->bitstream_mode);
-    put_bits(&s->pb, 3,  s->channel_mode);
+    put_bits(pb, 16, 0x0b77);   /* frame header */
+    put_bits(pb, 16, 0);        /* crc1: will be filled later */
+    put_bits(pb, 2,  s->bit_alloc.sr_code);
+    put_bits(pb, 6,  s->frame_size_code + (s->frame_size - s->frame_size_min) / 2);
+    put_bits(pb, 5,  s->bitstream_id);
+    put_bits(pb, 3,  s->bitstream_mode);
+    put_bits(pb, 3,  s->channel_mode);
     if ((s->channel_mode & 0x01) && s->channel_mode != AC3_CHMODE_MONO)
-        put_bits(&s->pb, 2, s->center_mix_level);
+        put_bits(pb, 2, s->center_mix_level);
     if (s->channel_mode & 0x04)
-        put_bits(&s->pb, 2, s->surround_mix_level);
+        put_bits(pb, 2, s->surround_mix_level);
     if (s->channel_mode == AC3_CHMODE_STEREO)
-        put_bits(&s->pb, 2, opt->dolby_surround_mode);
-    put_bits(&s->pb, 1, s->lfe_on); /* LFE */
-    put_bits(&s->pb, 5, -opt->dialogue_level);
-    put_bits(&s->pb, 1, 0);         /* no compression control word */
-    put_bits(&s->pb, 1, 0);         /* no lang code */
-    put_bits(&s->pb, 1, opt->audio_production_info);
+        put_bits(pb, 2, opt->dolby_surround_mode);
+    put_bits(pb, 1, s->lfe_on); /* LFE */
+    put_bits(pb, 5, -opt->dialogue_level);
+    put_bits(pb, 1, 0);         /* no compression control word */
+    put_bits(pb, 1, 0);         /* no lang code */
+    put_bits(pb, 1, opt->audio_production_info);
     if (opt->audio_production_info) {
-        put_bits(&s->pb, 5, opt->mixing_level - 80);
-        put_bits(&s->pb, 2, opt->room_type);
+        put_bits(pb, 5, opt->mixing_level - 80);
+        put_bits(pb, 2, opt->room_type);
     }
-    put_bits(&s->pb, 1, opt->copyright);
-    put_bits(&s->pb, 1, opt->original);
+    put_bits(pb, 1, opt->copyright);
+    put_bits(pb, 1, opt->original);
     if (s->bitstream_id == 6) {
         /* alternate bit stream syntax */
-        put_bits(&s->pb, 1, opt->extended_bsi_1);
+        put_bits(pb, 1, opt->extended_bsi_1);
         if (opt->extended_bsi_1) {
-            put_bits(&s->pb, 2, opt->preferred_stereo_downmix);
-            put_bits(&s->pb, 3, s->ltrt_center_mix_level);
-            put_bits(&s->pb, 3, s->ltrt_surround_mix_level);
-            put_bits(&s->pb, 3, s->loro_center_mix_level);
-            put_bits(&s->pb, 3, s->loro_surround_mix_level);
+            put_bits(pb, 2, opt->preferred_stereo_downmix);
+            put_bits(pb, 3, s->ltrt_center_mix_level);
+            put_bits(pb, 3, s->ltrt_surround_mix_level);
+            put_bits(pb, 3, s->loro_center_mix_level);
+            put_bits(pb, 3, s->loro_surround_mix_level);
         }
-        put_bits(&s->pb, 1, opt->extended_bsi_2);
+        put_bits(pb, 1, opt->extended_bsi_2);
         if (opt->extended_bsi_2) {
-            put_bits(&s->pb, 2, opt->dolby_surround_ex_mode);
-            put_bits(&s->pb, 2, opt->dolby_headphone_mode);
-            put_bits(&s->pb, 1, opt->ad_converter_type);
-            put_bits(&s->pb, 9, 0);     /* xbsi2 and encinfo : reserved */
+            put_bits(pb, 2, opt->dolby_surround_ex_mode);
+            put_bits(pb, 2, opt->dolby_headphone_mode);
+            put_bits(pb, 1, opt->ad_converter_type);
+            put_bits(pb, 9, 0);     /* xbsi2 and encinfo : reserved */
         }
     } else {
-        put_bits(&s->pb, 1, 0);     /* no time code 1 */
-        put_bits(&s->pb, 1, 0);     /* no time code 2 */
+        put_bits(pb, 1, 0);     /* no time code 1 */
+        put_bits(pb, 1, 0);     /* no time code 2 */
     }
-    put_bits(&s->pb, 1, 0);         /* no additional bit stream info */
+    put_bits(pb, 1, 0);         /* no additional bit stream info */
 }
 
 
 /*
  * Write one audio block to the output bitstream.
  */
-static void output_audio_block(AC3EncodeContext *s, int blk)
+static void output_audio_block(AC3EncodeContext *s, PutBitContext *pb, int blk)
 {
     int ch, i, baie, bnd, got_cpl, av_uninit(ch0);
     AC3Block *block = &s->blocks[blk];
@@ -1488,48 +1698,48 @@ static void output_audio_block(AC3EncodeContext *s, int blk)
     /* block switching */
     if (!s->eac3) {
         for (ch = 0; ch < s->fbw_channels; ch++)
-            put_bits(&s->pb, 1, 0);
+            put_bits(pb, 1, 0);
     }
 
     /* dither flags */
     if (!s->eac3) {
         for (ch = 0; ch < s->fbw_channels; ch++)
-            put_bits(&s->pb, 1, 1);
+            put_bits(pb, 1, 1);
     }
 
     /* dynamic range codes */
-    put_bits(&s->pb, 1, 0);
+    put_bits(pb, 1, 0);
 
     /* spectral extension */
     if (s->eac3)
-        put_bits(&s->pb, 1, 0);
+        put_bits(pb, 1, 0);
 
     /* channel coupling */
     if (!s->eac3)
-        put_bits(&s->pb, 1, block->new_cpl_strategy);
+        put_bits(pb, 1, block->new_cpl_strategy);
     if (block->new_cpl_strategy) {
         if (!s->eac3)
-            put_bits(&s->pb, 1, block->cpl_in_use);
+            put_bits(pb, 1, block->cpl_in_use);
         if (block->cpl_in_use) {
             int start_sub, end_sub;
             if (s->eac3)
-                put_bits(&s->pb, 1, 0); /* enhanced coupling */
+                put_bits(pb, 1, 0); /* enhanced coupling */
             if (!s->eac3 || s->channel_mode != AC3_CHMODE_STEREO) {
                 for (ch = 1; ch <= s->fbw_channels; ch++)
-                    put_bits(&s->pb, 1, block->channel_in_cpl[ch]);
+                    put_bits(pb, 1, block->channel_in_cpl[ch]);
             }
             if (s->channel_mode == AC3_CHMODE_STEREO)
-                put_bits(&s->pb, 1, 0); /* phase flags in use */
+                put_bits(pb, 1, 0); /* phase flags in use */
             start_sub = (s->start_freq[CPL_CH] - 37) / 12;
             end_sub   = (s->cpl_end_freq       - 37) / 12;
-            put_bits(&s->pb, 4, start_sub);
-            put_bits(&s->pb, 4, end_sub - 3);
+            put_bits(pb, 4, start_sub);
+            put_bits(pb, 4, end_sub - 3);
             /* coupling band structure */
             if (s->eac3) {
-                put_bits(&s->pb, 1, 0); /* use default */
+                put_bits(pb, 1, 0); /* use default */
             } else {
                 for (bnd = start_sub+1; bnd < end_sub; bnd++)
-                    put_bits(&s->pb, 1, ff_eac3_default_cpl_band_struct[bnd]);
+                    put_bits(pb, 1, ff_eac3_default_cpl_band_struct[bnd]);
             }
         }
     }
@@ -1539,12 +1749,12 @@ static void output_audio_block(AC3EncodeContext *s, int blk)
         for (ch = 1; ch <= s->fbw_channels; ch++) {
             if (block->channel_in_cpl[ch]) {
                 if (!s->eac3 || block->new_cpl_coords[ch] != 2)
-                    put_bits(&s->pb, 1, block->new_cpl_coords[ch]);
+                    put_bits(pb, 1, block->new_cpl_coords[ch]);
                 if (block->new_cpl_coords[ch]) {
-                    put_bits(&s->pb, 2, block->cpl_master_exp[ch]);
+                    put_bits(pb, 2, block->cpl_master_exp[ch]);
                     for (bnd = 0; bnd < s->num_cpl_bands; bnd++) {
-                        put_bits(&s->pb, 4, block->cpl_coord_exp [ch][bnd]);
-                        put_bits(&s->pb, 4, block->cpl_coord_mant[ch][bnd]);
+                        put_bits(pb, 4, block->cpl_coord_exp [ch][bnd]);
+                        put_bits(pb, 4, block->cpl_coord_mant[ch][bnd]);
                     }
                 }
             }
@@ -1554,26 +1764,26 @@ static void output_audio_block(AC3EncodeContext *s, int blk)
     /* stereo rematrixing */
     if (s->channel_mode == AC3_CHMODE_STEREO) {
         if (!s->eac3 || blk > 0)
-            put_bits(&s->pb, 1, block->new_rematrixing_strategy);
+            put_bits(pb, 1, block->new_rematrixing_strategy);
         if (block->new_rematrixing_strategy) {
             /* rematrixing flags */
             for (bnd = 0; bnd < block->num_rematrixing_bands; bnd++)
-                put_bits(&s->pb, 1, block->rematrixing_flags[bnd]);
+                put_bits(pb, 1, block->rematrixing_flags[bnd]);
         }
     }
 
     /* exponent strategy */
     if (!s->eac3) {
         for (ch = !block->cpl_in_use; ch <= s->fbw_channels; ch++)
-            put_bits(&s->pb, 2, s->exp_strategy[ch][blk]);
+            put_bits(pb, 2, s->exp_strategy[ch][blk]);
         if (s->lfe_on)
-            put_bits(&s->pb, 1, s->exp_strategy[s->lfe_channel][blk]);
+            put_bits(pb, 1, s->exp_strategy[s->lfe_channel][blk]);
     }
 
     /* bandwidth */
     for (ch = 1; ch <= s->fbw_channels; ch++) {
         if (s->exp_strategy[ch][blk] != EXP_REUSE && !block->channel_in_cpl[ch])
-            put_bits(&s->pb, 6, s->bandwidth_code);
+            put_bits(pb, 6, s->bandwidth_code);
     }
 
     /* exponents */
@@ -1585,58 +1795,58 @@ static void output_audio_block(AC3EncodeContext *s, int blk)
             continue;
 
         /* DC exponent */
-        put_bits(&s->pb, 4, block->grouped_exp[ch][0] >> cpl);
+        put_bits(pb, 4, block->grouped_exp[ch][0] >> cpl);
 
         /* exponent groups */
         nb_groups = exponent_group_tab[cpl][s->exp_strategy[ch][blk]-1][block->end_freq[ch]-s->start_freq[ch]];
         for (i = 1; i <= nb_groups; i++)
-            put_bits(&s->pb, 7, block->grouped_exp[ch][i]);
+            put_bits(pb, 7, block->grouped_exp[ch][i]);
 
         /* gain range info */
         if (ch != s->lfe_channel && !cpl)
-            put_bits(&s->pb, 2, 0);
+            put_bits(pb, 2, 0);
     }
 
     /* bit allocation info */
     if (!s->eac3) {
         baie = (blk == 0);
-        put_bits(&s->pb, 1, baie);
+        put_bits(pb, 1, baie);
         if (baie) {
-            put_bits(&s->pb, 2, s->slow_decay_code);
-            put_bits(&s->pb, 2, s->fast_decay_code);
-            put_bits(&s->pb, 2, s->slow_gain_code);
-            put_bits(&s->pb, 2, s->db_per_bit_code);
-            put_bits(&s->pb, 3, s->floor_code);
+            put_bits(pb, 2, s->slow_decay_code);
+            put_bits(pb, 2, s->fast_decay_code);
+            put_bits(pb, 2, s->slow_gain_code);
+            put_bits(pb, 2, s->db_per_bit_code);
+            put_bits(pb, 3, s->floor_code);
         }
     }
 
     /* snr offset */
     if (!s->eac3) {
-        put_bits(&s->pb, 1, block->new_snr_offsets);
+        put_bits(pb, 1, block->new_snr_offsets);
         if (block->new_snr_offsets) {
-            put_bits(&s->pb, 6, s->coarse_snr_offset);
+            put_bits(pb, 6, s->coarse_snr_offset);
             for (ch = !block->cpl_in_use; ch <= s->channels; ch++) {
-                put_bits(&s->pb, 4, s->fine_snr_offset[ch]);
-                put_bits(&s->pb, 3, s->fast_gain_code[ch]);
+                put_bits(pb, 4, s->fine_snr_offset[ch]);
+                put_bits(pb, 3, s->fast_gain_code[ch]);
             }
         }
     } else {
-        put_bits(&s->pb, 1, 0); /* no converter snr offset */
+        put_bits(pb, 1, 0); /* no converter snr offset */
     }
 
     /* coupling leak */
     if (block->cpl_in_use) {
         if (!s->eac3 || block->new_cpl_leak != 2)
-            put_bits(&s->pb, 1, block->new_cpl_leak);
+            put_bits(pb, 1, block->new_cpl_leak);
         if (block->new_cpl_leak) {
-            put_bits(&s->pb, 3, s->bit_alloc.cpl_fast_leak);
-            put_bits(&s->pb, 3, s->bit_alloc.cpl_slow_leak);
+            put_bits(pb, 3, s->bit_alloc.cpl_fast_leak);
+            put_bits(pb, 3, s->bit_alloc.cpl_slow_leak);
         }
     }
 
     if (!s->eac3) {
-        put_bits(&s->pb, 1, 0); /* no delta bit allocation */
-        put_bits(&s->pb, 1, 0); /* no data to skip */
+        put_bits(pb, 1, 0); /* no delta bit allocation */
+        put_bits(pb, 1, 0); /* no data to skip */
     }
 
     /* mantissas */
@@ -1654,13 +1864,13 @@ static void output_audio_block(AC3EncodeContext *s, int blk)
             b = s->ref_bap[ch][blk][i];
             switch (b) {
             case 0:                                          break;
-            case 1: if (q != 128) put_bits (&s->pb,   5, q); break;
-            case 2: if (q != 128) put_bits (&s->pb,   7, q); break;
-            case 3:               put_sbits(&s->pb,   3, q); break;
-            case 4: if (q != 128) put_bits (&s->pb,   7, q); break;
-            case 14:              put_sbits(&s->pb,  14, q); break;
-            case 15:              put_sbits(&s->pb,  16, q); break;
-            default:              put_sbits(&s->pb, b-1, q); break;
+            case 1: if (q != 128) put_bits (pb,   5, q); break;
+            case 2: if (q != 128) put_bits (pb,   7, q); break;
+            case 3:               put_sbits(pb,   3, q); break;
+            case 4: if (q != 128) put_bits (pb,   7, q); break;
+            case 14:              put_sbits(pb,  14, q); break;
+            case 15:              put_sbits(pb,  16, q); break;
+            default:              put_sbits(pb, b-1, q); break;
             }
         }
         if (ch == CPL_CH)
@@ -1707,26 +1917,26 @@ static unsigned int pow_poly(unsigned int a, unsigned int n, unsigned int poly)
 /*
  * Fill the end of the frame with 0's and compute the two CRCs.
  */
-static void output_frame_end(AC3EncodeContext *s)
+static void output_frame_end(AC3EncodeContext *s, PutBitContext *pb)
 {
     const AVCRC *crc_ctx = av_crc_get_table(AV_CRC_16_ANSI);
-    int frame_size_58, pad_bytes, crc1, crc2_partial, crc2, crc_inv;
+    int frame_size_58, pad_bytes, crc1, crc2, crc_inv;
     uint8_t *frame;
 
     frame_size_58 = ((s->frame_size >> 2) + (s->frame_size >> 4)) << 1;
 
     /* pad the remainder of the frame with zeros */
-    av_assert2(s->frame_size * 8 - put_bits_count(&s->pb) >= 18);
-    flush_put_bits(&s->pb);
-    frame = s->pb.buf;
-    pad_bytes = s->frame_size - (put_bits_ptr(&s->pb) - frame) - 2;
+    av_assert2(s->frame_size * 8 - put_bits_count(pb) >= 18);
+    flush_put_bits(pb);
+    frame = pb->buf;
+    pad_bytes = s->frame_size - (put_bits_ptr(pb) - frame) - 2;
     av_assert2(pad_bytes >= 0);
     if (pad_bytes > 0)
-        memset(put_bits_ptr(&s->pb), 0, pad_bytes);
+        memset(put_bits_ptr(pb), 0, pad_bytes);
 
     if (s->eac3) {
         /* compute crc2 */
-        crc2_partial = av_crc(crc_ctx, 0, frame + 2, s->frame_size - 5);
+        crc2 = av_crc(crc_ctx, 0, frame + 2, s->frame_size - 4);
     } else {
         /* compute crc1 */
         /* this is not so easy because it is at the beginning of the data... */
@@ -1736,16 +1946,17 @@ static void output_frame_end(AC3EncodeContext *s)
         AV_WB16(frame + 2, crc1);
 
         /* compute crc2 */
-        crc2_partial = av_crc(crc_ctx, 0, frame + frame_size_58,
-                              s->frame_size - frame_size_58 - 3);
+        crc2 = av_crc(crc_ctx, 0, frame + frame_size_58,
+                      s->frame_size - frame_size_58 - 2);
     }
-    crc2 = av_crc(crc_ctx, crc2_partial, frame + s->frame_size - 3, 1);
+    crc2 = av_bswap16(crc2);
     /* ensure crc2 does not match sync word by flipping crcrsv bit if needed */
-    if (crc2 == 0x770B) {
+    if (crc2 == 0x0B77) {
+        /* The CRC generator polynomial is x^16 + x^15 + x^2 + 1,
+         * so xor'ing with 0x18005 does not affect the CRC. */
         frame[s->frame_size - 3] ^= 0x1;
-        crc2 = av_crc(crc_ctx, crc2_partial, frame + s->frame_size - 3, 1);
+        crc2                     ^= 0x8005;
     }
-    crc2 = av_bswap16(crc2);
     AV_WB16(frame + s->frame_size - 2, crc2);
 }
 
@@ -1758,24 +1969,36 @@ static void output_frame_end(AC3EncodeContext *s)
  */
 static void ac3_output_frame(AC3EncodeContext *s, unsigned char *frame)
 {
+    PutBitContext pb;
     int blk;
 
-    init_put_bits(&s->pb, frame, s->frame_size);
+    init_put_bits(&pb, frame, s->frame_size);
 
-    s->output_frame_header(s);
+    s->output_frame_header(s, &pb);
 
     for (blk = 0; blk < s->num_blocks; blk++)
-        output_audio_block(s, blk);
+        output_audio_block(s, &pb, blk);
 
-    output_frame_end(s);
+    output_frame_end(s, &pb);
 }
 
-int ff_ac3_encode_frame_common_end(AVCodecContext *avctx, AVPacket *avpkt,
-                                   const AVFrame *frame, int *got_packet_ptr)
+int ff_ac3_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
+                        const AVFrame *frame, int *got_packet_ptr)
 {
     AC3EncodeContext *const s = avctx->priv_data;
     int ret;
 
+    if (s->options.allow_per_frame_metadata) {
+        ret = ac3_validate_metadata(s);
+        if (ret)
+            return ret;
+    }
+
+    if (s->bit_alloc.sr_code == 1 || s->eac3)
+        ac3_adjust_frame_size(s);
+
+    s->encode_frame(s, frame->extended_data);
+
     ac3_apply_rematrixing(s);
 
     ac3_process_exponents(s);
@@ -1807,17 +2030,16 @@ static void dprint_options(AC3EncodeContext *s)
 #ifdef DEBUG
     AVCodecContext *avctx = s->avctx;
     AC3EncOptions *opt = &s->options;
+    const char *msg;
     char strbuf[32];
 
     switch (s->bitstream_id) {
-    case  6:  av_strlcpy(strbuf, "AC-3 (alt syntax)",       32); break;
-    case  8:  av_strlcpy(strbuf, "AC-3 (standard)",         32); break;
-    case  9:  av_strlcpy(strbuf, "AC-3 (dnet half-rate)",   32); break;
-    case 10:  av_strlcpy(strbuf, "AC-3 (dnet quater-rate)", 32); break;
-    case 16:  av_strlcpy(strbuf, "E-AC-3 (enhanced)",       32); break;
-    default: snprintf(strbuf, 32, "ERROR");
-    }
-    ff_dlog(avctx, "bitstream_id: %s (%d)\n", strbuf, s->bitstream_id);
+    case  6: msg = "AC-3 (alt syntax)";       break;
+    case  8: msg = "AC-3 (standard)";         break;
+    case 16: msg = "E-AC-3 (enhanced)";       break;
+    default: msg = "ERROR";
+    }
+    ff_dlog(avctx, "bitstream_id: %s (%d)\n", msg, s->bitstream_id);
     ff_dlog(avctx, "sample_fmt: %s\n", av_get_sample_fmt_name(avctx->sample_fmt));
     av_channel_layout_describe(&avctx->ch_layout, strbuf, sizeof(strbuf));
     ff_dlog(avctx, "channel_layout: %s\n", strbuf);
@@ -1842,12 +2064,14 @@ static void dprint_options(AC3EncodeContext *s)
     if (opt->audio_production_info) {
         ff_dlog(avctx, "mixing_level: %ddB\n", opt->mixing_level);
         switch (opt->room_type) {
-        case AC3ENC_OPT_NOT_INDICATED: av_strlcpy(strbuf, "notindicated", 32); break;
-        case AC3ENC_OPT_LARGE_ROOM:    av_strlcpy(strbuf, "large", 32);        break;
-        case AC3ENC_OPT_SMALL_ROOM:    av_strlcpy(strbuf, "small", 32);        break;
-        default: snprintf(strbuf, 32, "ERROR (%d)", opt->room_type);
+        case AC3ENC_OPT_NOT_INDICATED: msg = "notindicated"; break;
+        case AC3ENC_OPT_LARGE_ROOM:    msg = "large";        break;
+        case AC3ENC_OPT_SMALL_ROOM:    msg = "small";        break;
+        default:
+            snprintf(strbuf, sizeof(strbuf), "ERROR (%d)", opt->room_type);
+            msg = strbuf;
         }
-        ff_dlog(avctx, "room_type: %s\n", strbuf);
+        ff_dlog(avctx, "room_type: %s\n", msg);
     } else {
         ff_dlog(avctx, "mixing_level: {not written}\n");
         ff_dlog(avctx, "room_type: {not written}\n");
@@ -1856,12 +2080,14 @@ static void dprint_options(AC3EncodeContext *s)
     ff_dlog(avctx, "dialnorm: %ddB\n", opt->dialogue_level);
     if (s->channel_mode == AC3_CHMODE_STEREO) {
         switch (opt->dolby_surround_mode) {
-        case AC3ENC_OPT_NOT_INDICATED: av_strlcpy(strbuf, "notindicated", 32); break;
-        case AC3ENC_OPT_MODE_ON:       av_strlcpy(strbuf, "on", 32);           break;
-        case AC3ENC_OPT_MODE_OFF:      av_strlcpy(strbuf, "off", 32);          break;
-        default: snprintf(strbuf, 32, "ERROR (%d)", opt->dolby_surround_mode);
+        case AC3ENC_OPT_NOT_INDICATED: msg = "notindicated"; break;
+        case AC3ENC_OPT_MODE_ON:       msg = "on";           break;
+        case AC3ENC_OPT_MODE_OFF:      msg = "off";          break;
+        default:
+            snprintf(strbuf, sizeof(strbuf), "ERROR (%d)", opt->dolby_surround_mode);
+            msg = strbuf;
         }
-        ff_dlog(avctx, "dsur_mode: %s\n", strbuf);
+        ff_dlog(avctx, "dsur_mode: %s\n", msg);
     } else {
         ff_dlog(avctx, "dsur_mode: {not written}\n");
     }
@@ -1870,12 +2096,14 @@ static void dprint_options(AC3EncodeContext *s)
     if (s->bitstream_id == 6) {
         if (opt->extended_bsi_1) {
             switch (opt->preferred_stereo_downmix) {
-            case AC3ENC_OPT_NOT_INDICATED: av_strlcpy(strbuf, "notindicated", 32); break;
-            case AC3ENC_OPT_DOWNMIX_LTRT:  av_strlcpy(strbuf, "ltrt", 32);         break;
-            case AC3ENC_OPT_DOWNMIX_LORO:  av_strlcpy(strbuf, "loro", 32);         break;
-            default: snprintf(strbuf, 32, "ERROR (%d)", opt->preferred_stereo_downmix);
+            case AC3ENC_OPT_NOT_INDICATED: msg = "notindicated"; break;
+            case AC3ENC_OPT_DOWNMIX_LTRT:  msg = "ltrt";         break;
+            case AC3ENC_OPT_DOWNMIX_LORO:  msg = "loro";         break;
+            default:
+                snprintf(strbuf, sizeof(strbuf), "ERROR (%d)", opt->preferred_stereo_downmix);
+                msg = strbuf;
             }
-            ff_dlog(avctx, "dmix_mode: %s\n", strbuf);
+            ff_dlog(avctx, "dmix_mode: %s\n", msg);
             ff_dlog(avctx, "ltrt_cmixlev: %0.3f (%d)\n",
                     opt->ltrt_center_mix_level, s->ltrt_center_mix_level);
             ff_dlog(avctx, "ltrt_surmixlev: %0.3f (%d)\n",
@@ -1889,26 +2117,32 @@ static void dprint_options(AC3EncodeContext *s)
         }
         if (opt->extended_bsi_2) {
             switch (opt->dolby_surround_ex_mode) {
-            case AC3ENC_OPT_NOT_INDICATED: av_strlcpy(strbuf, "notindicated", 32); break;
-            case AC3ENC_OPT_MODE_ON:       av_strlcpy(strbuf, "on", 32);           break;
-            case AC3ENC_OPT_MODE_OFF:      av_strlcpy(strbuf, "off", 32);          break;
-            default: snprintf(strbuf, 32, "ERROR (%d)", opt->dolby_surround_ex_mode);
+            case AC3ENC_OPT_NOT_INDICATED: msg = "notindicated"; break;
+            case AC3ENC_OPT_MODE_ON:       msg = "on";           break;
+            case AC3ENC_OPT_MODE_OFF:      msg = "off";          break;
+            default:
+                snprintf(strbuf, sizeof(strbuf), "ERROR (%d)", opt->dolby_surround_ex_mode);
+                msg = strbuf;
             }
-            ff_dlog(avctx, "dsurex_mode: %s\n", strbuf);
+            ff_dlog(avctx, "dsurex_mode: %s\n", msg);
             switch (opt->dolby_headphone_mode) {
-            case AC3ENC_OPT_NOT_INDICATED: av_strlcpy(strbuf, "notindicated", 32); break;
-            case AC3ENC_OPT_MODE_ON:       av_strlcpy(strbuf, "on", 32);           break;
-            case AC3ENC_OPT_MODE_OFF:      av_strlcpy(strbuf, "off", 32);          break;
-            default: snprintf(strbuf, 32, "ERROR (%d)", opt->dolby_headphone_mode);
+            case AC3ENC_OPT_NOT_INDICATED: msg = "notindicated"; break;
+            case AC3ENC_OPT_MODE_ON:       msg = "on";           break;
+            case AC3ENC_OPT_MODE_OFF:      msg = "off";          break;
+            default:
+                snprintf(strbuf, sizeof(strbuf), "ERROR (%d)", opt->dolby_headphone_mode);
+                msg = strbuf;
             }
-            ff_dlog(avctx, "dheadphone_mode: %s\n", strbuf);
+            ff_dlog(avctx, "dheadphone_mode: %s\n", msg);
 
             switch (opt->ad_converter_type) {
-            case AC3ENC_OPT_ADCONV_STANDARD: av_strlcpy(strbuf, "standard", 32); break;
-            case AC3ENC_OPT_ADCONV_HDCD:     av_strlcpy(strbuf, "hdcd", 32);     break;
-            default: snprintf(strbuf, 32, "ERROR (%d)", opt->ad_converter_type);
+            case AC3ENC_OPT_ADCONV_STANDARD: msg = "standard"; break;
+            case AC3ENC_OPT_ADCONV_HDCD:     msg = "hdcd";     break;
+            default:
+                snprintf(strbuf, sizeof(strbuf), "ERROR (%d)", opt->ad_converter_type);
+                msg = strbuf;
             }
-            ff_dlog(avctx, "ad_conv_type: %s\n", strbuf);
+            ff_dlog(avctx, "ad_conv_type: %s\n", msg);
         } else {
             ff_dlog(avctx, "extended bitstream info 2: {not written}\n");
         }
@@ -1916,227 +2150,6 @@ static void dprint_options(AC3EncodeContext *s)
 #endif
 }
 
-
-#define FLT_OPTION_THRESHOLD 0.01
-
-static int validate_float_option(float v, const float *v_list, int v_list_size)
-{
-    int i;
-
-    for (i = 0; i < v_list_size; i++) {
-        if (v < (v_list[i] + FLT_OPTION_THRESHOLD) &&
-            v > (v_list[i] - FLT_OPTION_THRESHOLD))
-            break;
-    }
-    if (i == v_list_size)
-        return AVERROR(EINVAL);
-
-    return i;
-}
-
-
-static void validate_mix_level(void *log_ctx, const char *opt_name,
-                               float *opt_param, const float *list,
-                               int list_size, int default_value, int min_value,
-                               int *ctx_param)
-{
-    int mixlev = validate_float_option(*opt_param, list, list_size);
-    if (mixlev < min_value) {
-        mixlev = default_value;
-        if (*opt_param >= 0.0) {
-            av_log(log_ctx, AV_LOG_WARNING, "requested %s is not valid. using "
-                   "default value: %0.3f\n", opt_name, list[mixlev]);
-        }
-    }
-    *opt_param = list[mixlev];
-    *ctx_param = mixlev;
-}
-
-
-/**
- * Validate metadata options as set by AVOption system.
- * These values can optionally be changed per-frame.
- *
- * @param s  AC-3 encoder private context
- */
-int ff_ac3_validate_metadata(AC3EncodeContext *s)
-{
-    AVCodecContext *avctx = s->avctx;
-    AC3EncOptions *opt = &s->options;
-
-    opt->audio_production_info = 0;
-    opt->extended_bsi_1        = 0;
-    opt->extended_bsi_2        = 0;
-    opt->eac3_mixing_metadata  = 0;
-    opt->eac3_info_metadata    = 0;
-
-    /* determine mixing metadata / xbsi1 use */
-    if (s->channel_mode > AC3_CHMODE_STEREO && opt->preferred_stereo_downmix != AC3ENC_OPT_NONE) {
-        opt->extended_bsi_1       = 1;
-        opt->eac3_mixing_metadata = 1;
-    }
-    if (s->has_center &&
-        (opt->ltrt_center_mix_level >= 0 || opt->loro_center_mix_level >= 0)) {
-        opt->extended_bsi_1       = 1;
-        opt->eac3_mixing_metadata = 1;
-    }
-    if (s->has_surround &&
-        (opt->ltrt_surround_mix_level >= 0 || opt->loro_surround_mix_level >= 0)) {
-        opt->extended_bsi_1       = 1;
-        opt->eac3_mixing_metadata = 1;
-    }
-
-    if (s->eac3) {
-        /* determine info metadata use */
-        if (avctx->audio_service_type != AV_AUDIO_SERVICE_TYPE_MAIN)
-            opt->eac3_info_metadata = 1;
-        if (opt->copyright != AC3ENC_OPT_NONE || opt->original != AC3ENC_OPT_NONE)
-            opt->eac3_info_metadata = 1;
-        if (s->channel_mode == AC3_CHMODE_STEREO &&
-            (opt->dolby_headphone_mode != AC3ENC_OPT_NONE || opt->dolby_surround_mode != AC3ENC_OPT_NONE))
-            opt->eac3_info_metadata = 1;
-        if (s->channel_mode >= AC3_CHMODE_2F2R && opt->dolby_surround_ex_mode != AC3ENC_OPT_NONE)
-            opt->eac3_info_metadata = 1;
-        if (opt->mixing_level != AC3ENC_OPT_NONE || opt->room_type != AC3ENC_OPT_NONE ||
-            opt->ad_converter_type != AC3ENC_OPT_NONE) {
-            opt->audio_production_info = 1;
-            opt->eac3_info_metadata    = 1;
-        }
-    } else {
-        /* determine audio production info use */
-        if (opt->mixing_level != AC3ENC_OPT_NONE || opt->room_type != AC3ENC_OPT_NONE)
-            opt->audio_production_info = 1;
-
-        /* determine xbsi2 use */
-        if (s->channel_mode >= AC3_CHMODE_2F2R && opt->dolby_surround_ex_mode != AC3ENC_OPT_NONE)
-            opt->extended_bsi_2 = 1;
-        if (s->channel_mode == AC3_CHMODE_STEREO && opt->dolby_headphone_mode != AC3ENC_OPT_NONE)
-            opt->extended_bsi_2 = 1;
-        if (opt->ad_converter_type != AC3ENC_OPT_NONE)
-            opt->extended_bsi_2 = 1;
-    }
-
-    /* validate AC-3 mixing levels */
-    if (!s->eac3) {
-        if (s->has_center) {
-            validate_mix_level(avctx, "center_mix_level", &opt->center_mix_level,
-                               cmixlev_options, CMIXLEV_NUM_OPTIONS, 1, 0,
-                               &s->center_mix_level);
-        }
-        if (s->has_surround) {
-            validate_mix_level(avctx, "surround_mix_level", &opt->surround_mix_level,
-                               surmixlev_options, SURMIXLEV_NUM_OPTIONS, 1, 0,
-                               &s->surround_mix_level);
-        }
-    }
-
-    /* validate extended bsi 1 / mixing metadata */
-    if (opt->extended_bsi_1 || opt->eac3_mixing_metadata) {
-        /* default preferred stereo downmix */
-        if (opt->preferred_stereo_downmix == AC3ENC_OPT_NONE)
-            opt->preferred_stereo_downmix = AC3ENC_OPT_NOT_INDICATED;
-        if (!s->eac3 || s->has_center) {
-            /* validate Lt/Rt center mix level */
-            validate_mix_level(avctx, "ltrt_center_mix_level",
-                               &opt->ltrt_center_mix_level, extmixlev_options,
-                               EXTMIXLEV_NUM_OPTIONS, 5, 0,
-                               &s->ltrt_center_mix_level);
-            /* validate Lo/Ro center mix level */
-            validate_mix_level(avctx, "loro_center_mix_level",
-                               &opt->loro_center_mix_level, extmixlev_options,
-                               EXTMIXLEV_NUM_OPTIONS, 5, 0,
-                               &s->loro_center_mix_level);
-        }
-        if (!s->eac3 || s->has_surround) {
-            /* validate Lt/Rt surround mix level */
-            validate_mix_level(avctx, "ltrt_surround_mix_level",
-                               &opt->ltrt_surround_mix_level, extmixlev_options,
-                               EXTMIXLEV_NUM_OPTIONS, 6, 3,
-                               &s->ltrt_surround_mix_level);
-            /* validate Lo/Ro surround mix level */
-            validate_mix_level(avctx, "loro_surround_mix_level",
-                               &opt->loro_surround_mix_level, extmixlev_options,
-                               EXTMIXLEV_NUM_OPTIONS, 6, 3,
-                               &s->loro_surround_mix_level);
-        }
-    }
-
-    /* validate audio service type / channels combination */
-    if ((avctx->audio_service_type == AV_AUDIO_SERVICE_TYPE_KARAOKE &&
-         avctx->ch_layout.nb_channels == 1) ||
-        ((avctx->audio_service_type == AV_AUDIO_SERVICE_TYPE_COMMENTARY ||
-          avctx->audio_service_type == AV_AUDIO_SERVICE_TYPE_EMERGENCY  ||
-          avctx->audio_service_type == AV_AUDIO_SERVICE_TYPE_VOICE_OVER)
-         && avctx->ch_layout.nb_channels > 1)) {
-        av_log(avctx, AV_LOG_ERROR, "invalid audio service type for the "
-                                    "specified number of channels\n");
-        return AVERROR(EINVAL);
-    }
-
-    /* validate extended bsi 2 / info metadata */
-    if (opt->extended_bsi_2 || opt->eac3_info_metadata) {
-        /* default dolby headphone mode */
-        if (opt->dolby_headphone_mode == AC3ENC_OPT_NONE)
-            opt->dolby_headphone_mode = AC3ENC_OPT_NOT_INDICATED;
-        /* default dolby surround ex mode */
-        if (opt->dolby_surround_ex_mode == AC3ENC_OPT_NONE)
-            opt->dolby_surround_ex_mode = AC3ENC_OPT_NOT_INDICATED;
-        /* default A/D converter type */
-        if (opt->ad_converter_type == AC3ENC_OPT_NONE)
-            opt->ad_converter_type = AC3ENC_OPT_ADCONV_STANDARD;
-    }
-
-    /* copyright & original defaults */
-    if (!s->eac3 || opt->eac3_info_metadata) {
-        /* default copyright */
-        if (opt->copyright == AC3ENC_OPT_NONE)
-            opt->copyright = AC3ENC_OPT_OFF;
-        /* default original */
-        if (opt->original == AC3ENC_OPT_NONE)
-            opt->original = AC3ENC_OPT_ON;
-    }
-
-    /* dolby surround mode default */
-    if (!s->eac3 || opt->eac3_info_metadata) {
-        if (opt->dolby_surround_mode == AC3ENC_OPT_NONE)
-            opt->dolby_surround_mode = AC3ENC_OPT_NOT_INDICATED;
-    }
-
-    /* validate audio production info */
-    if (opt->audio_production_info) {
-        if (opt->mixing_level == AC3ENC_OPT_NONE) {
-            av_log(avctx, AV_LOG_ERROR, "mixing_level must be set if "
-                   "room_type is set\n");
-            return AVERROR(EINVAL);
-        }
-        if (opt->mixing_level < 80) {
-            av_log(avctx, AV_LOG_ERROR, "invalid mixing level. must be between "
-                   "80dB and 111dB\n");
-            return AVERROR(EINVAL);
-        }
-        /* default room type */
-        if (opt->room_type == AC3ENC_OPT_NONE)
-            opt->room_type = AC3ENC_OPT_NOT_INDICATED;
-    }
-
-    /* set bitstream id for alternate bitstream syntax */
-    if (!s->eac3 && (opt->extended_bsi_1 || opt->extended_bsi_2)) {
-        if (s->bitstream_id > 8 && s->bitstream_id < 11) {
-            if (!s->warned_alternate_bitstream) {
-                av_log(avctx, AV_LOG_WARNING, "alternate bitstream syntax is "
-                       "not compatible with reduced samplerates. writing of "
-                       "extended bitstream information will be disabled.\n");
-                s->warned_alternate_bitstream = 1;
-            }
-        } else {
-            s->bitstream_id = 6;
-        }
-    }
-
-    return 0;
-}
-
-
 /**
  * Finalize encoding and free any memory allocated by the encoder.
  *
@@ -2144,15 +2157,10 @@ int ff_ac3_validate_metadata(AC3EncodeContext *s)
  */
 av_cold int ff_ac3_encode_close(AVCodecContext *avctx)
 {
-    int blk, ch;
     AC3EncodeContext *s = avctx->priv_data;
 
-    av_freep(&s->mdct_window);
-    av_freep(&s->windowed_samples);
-    if (s->planar_samples)
-        for (ch = 0; ch < s->channels; ch++)
-            av_freep(&s->planar_samples[ch]);
-    av_freep(&s->planar_samples);
+    for (int ch = 0; ch < s->channels; ch++)
+        av_freep(&s->planar_samples[ch]);
     av_freep(&s->bap_buffer);
     av_freep(&s->bap1_buffer);
     av_freep(&s->mdct_coef_buffer);
@@ -2163,22 +2171,8 @@ av_cold int ff_ac3_encode_close(AVCodecContext *avctx)
     av_freep(&s->band_psd_buffer);
     av_freep(&s->mask_buffer);
     av_freep(&s->qmant_buffer);
-    av_freep(&s->cpl_coord_exp_buffer);
-    av_freep(&s->cpl_coord_mant_buffer);
+    av_freep(&s->cpl_coord_buffer);
     av_freep(&s->fdsp);
-    for (blk = 0; blk < s->num_blocks; blk++) {
-        AC3Block *block = &s->blocks[blk];
-        av_freep(&block->mdct_coef);
-        av_freep(&block->fixed_coef);
-        av_freep(&block->exp);
-        av_freep(&block->grouped_exp);
-        av_freep(&block->psd);
-        av_freep(&block->band_psd);
-        av_freep(&block->mask);
-        av_freep(&block->qmant);
-        av_freep(&block->cpl_coord_exp);
-        av_freep(&block->cpl_coord_mant);
-    }
 
     av_tx_uninit(&s->tx);
 
@@ -2189,29 +2183,18 @@ av_cold int ff_ac3_encode_close(AVCodecContext *avctx)
 /*
  * Set channel information during initialization.
  */
-static av_cold int set_channel_info(AVCodecContext *avctx)
+static av_cold void set_channel_info(AVCodecContext *avctx)
 {
     AC3EncodeContext *s = avctx->priv_data;
+    uint64_t mask = av_channel_layout_subset(&avctx->ch_layout, ~(uint64_t)0);
     int channels = avctx->ch_layout.nb_channels;
-    uint64_t mask = avctx->ch_layout.u.mask;
-
-    if (channels < 1 || channels > AC3_MAX_CHANNELS)
-        return AVERROR(EINVAL);
-    if (mask > 0x7FF)
-        return AVERROR(EINVAL);
-
-    if (!mask)
-        av_channel_layout_default(&avctx->ch_layout, channels);
-    mask = avctx->ch_layout.u.mask;
 
     s->lfe_on       = !!(mask & AV_CH_LOW_FREQUENCY);
     s->channels     = channels;
     s->fbw_channels = channels - s->lfe_on;
     s->lfe_channel  = s->lfe_on ? s->fbw_channels + 1 : -1;
-    if (s->lfe_on)
-        mask -= AV_CH_LOW_FREQUENCY;
 
-    switch (mask) {
+    switch (mask & ~AV_CH_LOW_FREQUENCY) {
     case AV_CH_LAYOUT_MONO:           s->channel_mode = AC3_CHMODE_MONO;   break;
     case AV_CH_LAYOUT_STEREO:         s->channel_mode = AC3_CHMODE_STEREO; break;
     case AV_CH_LAYOUT_SURROUND:       s->channel_mode = AC3_CHMODE_3F;     break;
@@ -2221,55 +2204,30 @@ static av_cold int set_channel_info(AVCodecContext *avctx)
     case AV_CH_LAYOUT_2_2:            s->channel_mode = AC3_CHMODE_2F2R;   break;
     case AV_CH_LAYOUT_5POINT0:
     case AV_CH_LAYOUT_5POINT0_BACK:   s->channel_mode = AC3_CHMODE_3F2R;   break;
-    default:
-        return AVERROR(EINVAL);
     }
     s->has_center   = (s->channel_mode & 0x01) && s->channel_mode != AC3_CHMODE_MONO;
     s->has_surround =  s->channel_mode & 0x04;
 
     s->channel_map  = ac3_enc_channel_map[s->channel_mode][s->lfe_on];
-    if (s->lfe_on)
-        mask |= AV_CH_LOW_FREQUENCY;
-    av_channel_layout_from_mask(&avctx->ch_layout, mask);
-
-    return 0;
 }
 
 
 static av_cold int validate_options(AC3EncodeContext *s)
 {
     AVCodecContext *avctx = s->avctx;
-    int i, ret, max_sr;
+    int ret;
 
-    /* validate channel layout */
-    if (!avctx->ch_layout.nb_channels) {
-        av_log(avctx, AV_LOG_WARNING, "No channel layout specified. The "
-                                      "encoder will guess the layout, but it "
-                                      "might be incorrect.\n");
-    }
-    ret = set_channel_info(avctx);
-    if (ret) {
-        av_log(avctx, AV_LOG_ERROR, "invalid channel layout\n");
-        return ret;
-    }
+    set_channel_info(avctx);
 
-    /* validate sample rate */
-    /* note: max_sr could be changed from 2 to 5 for E-AC-3 once we find a
-             decoder that supports half sample rate so we can validate that
-             the generated files are correct. */
-    max_sr = s->eac3 ? 2 : 8;
-    for (i = 0; i <= max_sr; i++) {
-        if ((ff_ac3_sample_rate_tab[i % 3] >> (i / 3)) == avctx->sample_rate)
+    for (int i = 0;; i++) {
+        if (ff_ac3_sample_rate_tab[i] == avctx->sample_rate) {
+            s->bit_alloc.sr_code = i;
             break;
-    }
-    if (i > max_sr) {
-        av_log(avctx, AV_LOG_ERROR, "invalid sample rate\n");
-        return AVERROR(EINVAL);
+        }
+        av_assert1(ff_ac3_sample_rate_tab[i] != 0);
     }
     s->sample_rate        = avctx->sample_rate;
-    s->bit_alloc.sr_shift = i / 3;
-    s->bit_alloc.sr_code  = i % 3;
-    s->bitstream_id       = s->eac3 ? 16 : 8 + s->bit_alloc.sr_shift;
+    s->bitstream_id       = s->eac3 ? 16 : 8;
 
     /* select a default bit rate if not set by the user */
     if (!avctx->bit_rate) {
@@ -2317,7 +2275,7 @@ static av_cold int validate_options(AC3EncodeContext *s)
            parameter selection */
         min_br_code = -1;
         min_br_dist = INT64_MAX;
-        for (i = 0; i < 19; i++) {
+        for (int i = 0; i < 19; i++) {
             long long br_dist = llabs(ff_ac3_bitrate_tab[i] * 1000 - avctx->bit_rate);
             if (br_dist < min_br_dist) {
                 min_br_dist = br_dist;
@@ -2333,8 +2291,8 @@ static av_cold int validate_options(AC3EncodeContext *s)
     } else {
         int best_br = 0, best_code = 0;
         long long best_diff = INT64_MAX;
-        for (i = 0; i < 19; i++) {
-            int br   = (ff_ac3_bitrate_tab[i] >> s->bit_alloc.sr_shift) * 1000;
+        for (int i = 0; i < 19; i++) {
+            int br   = ff_ac3_bitrate_tab[i] * 1000;
             long long diff = llabs(br - avctx->bit_rate);
             if (diff < best_diff) {
                 best_br   = br;
@@ -2362,7 +2320,7 @@ static av_cold int validate_options(AC3EncodeContext *s)
     if (s->cutoff > (s->sample_rate >> 1))
         s->cutoff = s->sample_rate >> 1;
 
-    ret = ff_ac3_validate_metadata(s);
+    ret = ac3_validate_metadata(s);
     if (ret)
         return ret;
 
@@ -2457,9 +2415,14 @@ static av_cold int allocate_buffers(AC3EncodeContext *s)
     int channels = s->channels + 1; /* includes coupling channel */
     int channel_blocks = channels * s->num_blocks;
     int total_coefs    = AC3_MAX_COEFS * channel_blocks;
+    uint8_t *cpl_coord_mant_buffer;
+    const unsigned sampletype_size = SAMPLETYPE_SIZE(s);
 
-    if (s->allocate_sample_buffers(s))
-        return AVERROR(ENOMEM);
+    for (int ch = 0; ch < s->channels; ch++) {
+        s->planar_samples[ch] = av_mallocz(AC3_BLOCK_SIZE * sampletype_size);
+        if (!s->planar_samples[ch])
+            return AVERROR(ENOMEM);
+    }
 
     if (!FF_ALLOC_TYPED_ARRAY(s->bap_buffer,         total_coefs)          ||
         !FF_ALLOC_TYPED_ARRAY(s->bap1_buffer,        total_coefs)          ||
@@ -2472,29 +2435,18 @@ static av_cold int allocate_buffers(AC3EncodeContext *s)
         !FF_ALLOC_TYPED_ARRAY(s->qmant_buffer,       total_coefs))
         return AVERROR(ENOMEM);
 
+    if (!s->fixed_point) {
+        if (!FF_ALLOCZ_TYPED_ARRAY(s->fixed_coef_buffer, total_coefs))
+            return AVERROR(ENOMEM);
+    }
     if (s->cpl_enabled) {
-        if (!FF_ALLOC_TYPED_ARRAY(s->cpl_coord_exp_buffer,  channel_blocks * 16) ||
-            !FF_ALLOC_TYPED_ARRAY(s->cpl_coord_mant_buffer, channel_blocks * 16))
+        if (!FF_ALLOC_TYPED_ARRAY(s->cpl_coord_buffer, channel_blocks * 32))
             return AVERROR(ENOMEM);
+        cpl_coord_mant_buffer = s->cpl_coord_buffer + 16 * channel_blocks;
     }
     for (blk = 0; blk < s->num_blocks; blk++) {
         AC3Block *block = &s->blocks[blk];
 
-        if (!FF_ALLOCZ_TYPED_ARRAY(block->mdct_coef,   channels) ||
-            !FF_ALLOCZ_TYPED_ARRAY(block->exp,         channels) ||
-            !FF_ALLOCZ_TYPED_ARRAY(block->grouped_exp, channels) ||
-            !FF_ALLOCZ_TYPED_ARRAY(block->psd,         channels) ||
-            !FF_ALLOCZ_TYPED_ARRAY(block->band_psd,    channels) ||
-            !FF_ALLOCZ_TYPED_ARRAY(block->mask,        channels) ||
-            !FF_ALLOCZ_TYPED_ARRAY(block->qmant,       channels))
-            return AVERROR(ENOMEM);
-
-        if (s->cpl_enabled) {
-            if (!FF_ALLOCZ_TYPED_ARRAY(block->cpl_coord_exp,  channels) ||
-                !FF_ALLOCZ_TYPED_ARRAY(block->cpl_coord_mant, channels))
-                return AVERROR(ENOMEM);
-        }
-
         for (ch = 0; ch < channels; ch++) {
             /* arrangement: block, channel, coeff */
             block->grouped_exp[ch] = &s->grouped_exp_buffer[128           * (blk * channels + ch)];
@@ -2503,33 +2455,17 @@ static av_cold int allocate_buffers(AC3EncodeContext *s)
             block->mask[ch]        = &s->mask_buffer       [64            * (blk * channels + ch)];
             block->qmant[ch]       = &s->qmant_buffer      [AC3_MAX_COEFS * (blk * channels + ch)];
             if (s->cpl_enabled) {
-                block->cpl_coord_exp[ch]  = &s->cpl_coord_exp_buffer [16  * (blk * channels + ch)];
-                block->cpl_coord_mant[ch] = &s->cpl_coord_mant_buffer[16  * (blk * channels + ch)];
+                block->cpl_coord_exp[ch]  = &s->cpl_coord_buffer [16  * (blk * channels + ch)];
+                block->cpl_coord_mant[ch] = &cpl_coord_mant_buffer[16  * (blk * channels + ch)];
             }
 
             /* arrangement: channel, block, coeff */
             block->exp[ch]         = &s->exp_buffer        [AC3_MAX_COEFS * (s->num_blocks * ch + blk)];
             block->mdct_coef[ch]   = &s->mdct_coef_buffer  [AC3_MAX_COEFS * (s->num_blocks * ch + blk)];
-        }
-    }
-
-    if (!s->fixed_point) {
-        if (!FF_ALLOCZ_TYPED_ARRAY(s->fixed_coef_buffer, total_coefs))
-            return AVERROR(ENOMEM);
-        for (blk = 0; blk < s->num_blocks; blk++) {
-            AC3Block *block = &s->blocks[blk];
-            if (!FF_ALLOCZ_TYPED_ARRAY(block->fixed_coef, channels))
-                return AVERROR(ENOMEM);
-            for (ch = 0; ch < channels; ch++)
-                block->fixed_coef[ch] = &s->fixed_coef_buffer[AC3_MAX_COEFS * (s->num_blocks * ch + blk)];
-        }
-    } else {
-        for (blk = 0; blk < s->num_blocks; blk++) {
-            AC3Block *block = &s->blocks[blk];
-            if (!FF_ALLOCZ_TYPED_ARRAY(block->fixed_coef, channels))
-                return AVERROR(ENOMEM);
-            for (ch = 0; ch < channels; ch++)
+            if (s->fixed_point)
                 block->fixed_coef[ch] = (int32_t *)block->mdct_coef[ch];
+            else
+                block->fixed_coef[ch] = &s->fixed_coef_buffer[AC3_MAX_COEFS * (s->num_blocks * ch + blk)];
         }
     }
 
@@ -2545,8 +2481,6 @@ av_cold int ff_ac3_encode_init(AVCodecContext *avctx)
 
     s->avctx = avctx;
 
-    s->eac3 = avctx->codec_id == AV_CODEC_ID_EAC3;
-
     ret = validate_options(s);
     if (ret)
         return ret;
@@ -2569,21 +2503,13 @@ av_cold int ff_ac3_encode_init(AVCodecContext *avctx)
         s->crc_inv[1] = pow_poly((CRC16_POLY >> 1), (8 * frame_size_58) - 16, CRC16_POLY);
     }
 
-    if (CONFIG_EAC3_ENCODER && s->eac3) {
-        static AVOnce init_static_once_eac3 = AV_ONCE_INIT;
-        ff_thread_once(&init_static_once_eac3, ff_eac3_exponent_init);
-        s->output_frame_header = ff_eac3_output_frame_header;
-    } else
+    if (!s->output_frame_header)
         s->output_frame_header = ac3_output_frame_header;
 
     set_bandwidth(s);
 
     bit_alloc_init(s);
 
-    ret = s->mdct_init(s);
-    if (ret)
-        return ret;
-
     ret = allocate_buffers(s);
     if (ret)
         return ret;
diff --git a/libavcodec/ac3enc.h b/libavcodec/ac3enc.h
index 1cb1aac4b2..5e98ad188b 100644
--- a/libavcodec/ac3enc.h
+++ b/libavcodec/ac3enc.h
@@ -30,6 +30,7 @@
 
 #include <stdint.h>
 
+#include "libavutil/mem_internal.h"
 #include "libavutil/opt.h"
 #include "libavutil/tx.h"
 
@@ -40,7 +41,6 @@
 #include "codec_internal.h"
 #include "mathops.h"
 #include "me_cmp.h"
-#include "put_bits.h"
 #include "audiodsp.h"
 
 #ifndef AC3ENC_FLOAT
@@ -49,7 +49,6 @@
 
 #if AC3ENC_FLOAT
 #include "libavutil/float_dsp.h"
-#define AC3_NAME(x) ff_ac3_float_ ## x
 #define MAC_COEF(d,a,b) ((d)+=(a)*(b))
 #define COEF_MIN (-16777215.0/16777216.0)
 #define COEF_MAX ( 16777215.0/16777216.0)
@@ -59,7 +58,6 @@ typedef float CoefType;
 typedef float CoefSumType;
 #else
 #include "libavutil/fixed_dsp.h"
-#define AC3_NAME(x) ff_ac3_fixed_ ## x
 #define MAC_COEF(d,a,b) MAC64(d,a,b)
 #define COEF_MIN -16777215
 #define COEF_MAX  16777215
@@ -128,16 +126,16 @@ typedef struct AC3EncOptions {
  * Data for a single audio block.
  */
 typedef struct AC3Block {
-    CoefType **mdct_coef;                       ///< MDCT coefficients
-    int32_t  **fixed_coef;                      ///< fixed-point MDCT coefficients
-    uint8_t  **exp;                             ///< original exponents
-    uint8_t  **grouped_exp;                     ///< grouped exponents
-    int16_t  **psd;                             ///< psd per frequency bin
-    int16_t  **band_psd;                        ///< psd per critical band
-    int16_t  **mask;                            ///< masking curve
-    uint16_t **qmant;                           ///< quantized mantissas
-    uint8_t  **cpl_coord_exp;                   ///< coupling coord exponents           (cplcoexp)
-    uint8_t  **cpl_coord_mant;                  ///< coupling coord mantissas           (cplcomant)
+    CoefType *mdct_coef[AC3_MAX_CHANNELS];      ///< MDCT coefficients
+    int32_t  *fixed_coef[AC3_MAX_CHANNELS];     ///< fixed-point MDCT coefficients
+    uint8_t  *exp[AC3_MAX_CHANNELS];            ///< original exponents
+    uint8_t  *grouped_exp[AC3_MAX_CHANNELS];    ///< grouped exponents
+    int16_t  *psd[AC3_MAX_CHANNELS];            ///< psd per frequency bin
+    int16_t  *band_psd[AC3_MAX_CHANNELS];       ///< psd per critical band
+    int16_t  *mask[AC3_MAX_CHANNELS];           ///< masking curve
+    uint16_t *qmant[AC3_MAX_CHANNELS];          ///< quantized mantissas
+    uint8_t  *cpl_coord_exp[AC3_MAX_CHANNELS];  ///< coupling coord exponents           (cplcoexp)
+    uint8_t  *cpl_coord_mant[AC3_MAX_CHANNELS]; ///< coupling coord mantissas           (cplcomant)
     uint8_t  new_rematrixing_strategy;          ///< send new rematrixing flags in this block
     int      num_rematrixing_bands;             ///< number of rematrixing bands
     uint8_t  rematrixing_flags[4];              ///< rematrixing flags
@@ -152,6 +150,8 @@ typedef struct AC3Block {
     int      end_freq[AC3_MAX_CHANNELS];        ///< end frequency bin                  (endmant)
 } AC3Block;
 
+struct PutBitContext;
+
 /**
  * AC-3 encoder private context.
  */
@@ -159,7 +159,6 @@ typedef struct AC3EncodeContext {
     AVClass *av_class;                      ///< AVClass used for AVOption
     AC3EncOptions options;                  ///< encoding options
     AVCodecContext *avctx;                  ///< parent AVCodecContext
-    PutBitContext pb;                       ///< bitstream writer context
     AudioDSPContext adsp;
 #if AC3ENC_FLOAT
     AVFloatDSPContext *fdsp;
@@ -170,7 +169,6 @@ typedef struct AC3EncodeContext {
     AC3DSPContext ac3dsp;                   ///< AC-3 optimized functions
     AVTXContext *tx;                        ///< FFT context for MDCT calculation
     av_tx_fn tx_fn;
-    const SampleType *mdct_window;          ///< MDCT window function array
 
     AC3Block blocks[AC3_MAX_BLOCKS];        ///< per-block info
 
@@ -234,8 +232,7 @@ typedef struct AC3EncodeContext {
     int frame_bits;                         ///< all frame bits except exponents and mantissas
     int exponent_bits;                      ///< number of bits used for exponents
 
-    SampleType *windowed_samples;
-    SampleType **planar_samples;
+    uint8_t *planar_samples[AC3_MAX_CHANNELS - 1];
     uint8_t *bap_buffer;
     uint8_t *bap1_buffer;
     CoefType *mdct_coef_buffer;
@@ -246,8 +243,7 @@ typedef struct AC3EncodeContext {
     int16_t *band_psd_buffer;
     int16_t *mask_buffer;
     int16_t *qmant_buffer;
-    uint8_t *cpl_coord_exp_buffer;
-    uint8_t *cpl_coord_mant_buffer;
+    uint8_t *cpl_coord_buffer;
 
     uint8_t exp_strategy[AC3_MAX_CHANNELS][AC3_MAX_BLOCKS]; ///< exponent strategies
     uint8_t frame_exp_strategy[AC3_MAX_CHANNELS];           ///< frame exp strategy index
@@ -256,16 +252,20 @@ typedef struct AC3EncodeContext {
     uint8_t *ref_bap     [AC3_MAX_CHANNELS][AC3_MAX_BLOCKS]; ///< bit allocation pointers (bap)
     int ref_bap_set;                                         ///< indicates if ref_bap pointers have been set
 
-    int warned_alternate_bitstream;
-
-    /* fixed vs. float function pointers */
-    int  (*mdct_init)(struct AC3EncodeContext *s);
-
-    /* fixed vs. float templated function pointers */
-    int  (*allocate_sample_buffers)(struct AC3EncodeContext *s);
+    /** fixed vs. float function pointers */
+    void (*encode_frame)(struct AC3EncodeContext *s, uint8_t * const *samples);
 
     /* AC-3 vs. E-AC-3 function pointers */
-    void (*output_frame_header)(struct AC3EncodeContext *s);
+    void (*output_frame_header)(struct AC3EncodeContext *s, struct PutBitContext *pb);
+
+    union {
+        DECLARE_ALIGNED(32, float,   mdct_window_float)[AC3_BLOCK_SIZE];
+        DECLARE_ALIGNED(32, int32_t, mdct_window_fixed)[AC3_BLOCK_SIZE];
+    };
+    union {
+        DECLARE_ALIGNED(32, float,   windowed_samples_float)[AC3_WINDOW_SIZE];
+        DECLARE_ALIGNED(32, int32_t, windowed_samples_fixed)[AC3_WINDOW_SIZE];
+    };
 } AC3EncodeContext;
 
 extern const AVChannelLayout ff_ac3_ch_layouts[19];
@@ -278,20 +278,10 @@ int ff_ac3_float_encode_init(AVCodecContext *avctx);
 
 int ff_ac3_encode_close(AVCodecContext *avctx);
 
-int ff_ac3_validate_metadata(AC3EncodeContext *s);
-
-void ff_ac3_adjust_frame_size(AC3EncodeContext *s);
 
 void ff_ac3_compute_coupling_strategy(AC3EncodeContext *s);
 
-int ff_ac3_encode_frame_common_end(AVCodecContext *avctx, AVPacket *avpkt,
-                                   const AVFrame *frame, int *got_packet_ptr);
-
-/* prototypes for functions in ac3enc_template.c */
-
-int ff_ac3_fixed_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
-                              const AVFrame *frame, int *got_packet_ptr);
-int ff_ac3_float_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
-                              const AVFrame *frame, int *got_packet_ptr);
+int ff_ac3_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
+                        const AVFrame *frame, int *got_packet_ptr);
 
 #endif /* AVCODEC_AC3ENC_H */
diff --git a/libavcodec/ac3enc_fixed.c b/libavcodec/ac3enc_fixed.c
index c399d6cd09..869e1f27a2 100644
--- a/libavcodec/ac3enc_fixed.c
+++ b/libavcodec/ac3enc_fixed.c
@@ -27,7 +27,6 @@
  */
 
 #define AC3ENC_FLOAT 0
-#include "libavutil/mem.h"
 #include "audiodsp.h"
 #include "ac3enc.h"
 #include "codec_internal.h"
@@ -74,22 +73,18 @@ static CoefType calc_cpl_coord(CoefSumType energy_ch, CoefSumType energy_cpl)
  * @param s  AC-3 encoder private context
  * @return   0 on success, negative error code on failure
  */
-static av_cold int ac3_fixed_mdct_init(AC3EncodeContext *s)
+static av_cold int ac3_fixed_mdct_init(AVCodecContext *avctx, AC3EncodeContext *s)
 {
     float fwin[AC3_BLOCK_SIZE];
     const float scale = -1.0f;
 
-    int32_t *iwin = av_malloc_array(AC3_BLOCK_SIZE, sizeof(*iwin));
-    if (!iwin)
-        return AVERROR(ENOMEM);
+    int32_t *iwin = s->mdct_window_fixed;
 
     ff_kbd_window_init(fwin, 5.0, AC3_BLOCK_SIZE);
     for (int i = 0; i < AC3_BLOCK_SIZE; i++)
         iwin[i] = lrintf(fwin[i] * (1 << 22));
 
-    s->mdct_window = iwin;
-
-    s->fdsp = avpriv_alloc_fixed_dsp(s->avctx->flags & AV_CODEC_FLAG_BITEXACT);
+    s->fdsp = avpriv_alloc_fixed_dsp(avctx->flags & AV_CODEC_FLAG_BITEXACT);
     if (!s->fdsp)
         return AVERROR(ENOMEM);
 
@@ -101,9 +96,15 @@ static av_cold int ac3_fixed_mdct_init(AC3EncodeContext *s)
 static av_cold int ac3_fixed_encode_init(AVCodecContext *avctx)
 {
     AC3EncodeContext *s = avctx->priv_data;
+    int ret;
+
     s->fixed_point = 1;
-    s->mdct_init               = ac3_fixed_mdct_init;
-    s->allocate_sample_buffers = allocate_sample_buffers;
+    s->encode_frame            = encode_frame;
+
+    ret = ac3_fixed_mdct_init(avctx, s);
+    if (ret < 0)
+        return ret;
+
     return ff_ac3_encode_init(avctx);
 }
 
@@ -116,7 +117,7 @@ const FFCodec ff_ac3_fixed_encoder = {
     .p.capabilities  = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE,
     .priv_data_size  = sizeof(AC3EncodeContext),
     .init            = ac3_fixed_encode_init,
-    FF_CODEC_ENCODE_CB(ff_ac3_fixed_encode_frame),
+    FF_CODEC_ENCODE_CB(ff_ac3_encode_frame),
     .close           = ff_ac3_encode_close,
     .p.sample_fmts   = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S32P,
                                                       AV_SAMPLE_FMT_NONE },
diff --git a/libavcodec/ac3enc_float.c b/libavcodec/ac3enc_float.c
index 24960f318b..94e8ebc42d 100644
--- a/libavcodec/ac3enc_float.c
+++ b/libavcodec/ac3enc_float.c
@@ -27,7 +27,6 @@
  */
 
 #define AC3ENC_FLOAT 1
-#include "libavutil/mem.h"
 #include "audiodsp.h"
 #include "ac3enc.h"
 #include "codec_internal.h"
@@ -87,14 +86,8 @@ static void sum_square_butterfly(AC3EncodeContext *s, float sum[4],
 static av_cold int ac3_float_mdct_init(AC3EncodeContext *s)
 {
     const float scale = -2.0 / AC3_WINDOW_SIZE;
-    float *window = av_malloc_array(AC3_BLOCK_SIZE, sizeof(*window));
-    if (!window) {
-        av_log(s->avctx, AV_LOG_ERROR, "Cannot allocate memory.\n");
-        return AVERROR(ENOMEM);
-    }
 
-    ff_kbd_window_init(window, 5.0, AC3_BLOCK_SIZE);
-    s->mdct_window = window;
+    ff_kbd_window_init(s->mdct_window_float, 5.0, AC3_BLOCK_SIZE);
 
     return av_tx_init(&s->tx, &s->tx_fn, AV_TX_FLOAT_MDCT, 0,
                       AC3_BLOCK_SIZE, &scale, 0);
@@ -104,11 +97,17 @@ static av_cold int ac3_float_mdct_init(AC3EncodeContext *s)
 av_cold int ff_ac3_float_encode_init(AVCodecContext *avctx)
 {
     AC3EncodeContext *s = avctx->priv_data;
-    s->mdct_init               = ac3_float_mdct_init;
-    s->allocate_sample_buffers = allocate_sample_buffers;
+    int ret;
+
+    s->encode_frame            = encode_frame;
     s->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
     if (!s->fdsp)
         return AVERROR(ENOMEM);
+
+    ret = ac3_float_mdct_init(s);
+    if (ret < 0)
+        return ret;
+
     return ff_ac3_encode_init(avctx);
 }
 
@@ -120,7 +119,7 @@ const FFCodec ff_ac3_encoder = {
     .p.capabilities  = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE,
     .priv_data_size  = sizeof(AC3EncodeContext),
     .init            = ff_ac3_float_encode_init,
-    FF_CODEC_ENCODE_CB(ff_ac3_float_encode_frame),
+    FF_CODEC_ENCODE_CB(ff_ac3_encode_frame),
     .close           = ff_ac3_encode_close,
     .p.sample_fmts   = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP,
                                                       AV_SAMPLE_FMT_NONE },
diff --git a/libavcodec/ac3enc_template.c b/libavcodec/ac3enc_template.c
index 45dbc98804..049666fdca 100644
--- a/libavcodec/ac3enc_template.c
+++ b/libavcodec/ac3enc_template.c
@@ -31,77 +31,53 @@
 #include <stdint.h>
 
 #include "libavutil/attributes.h"
-#include "libavutil/internal.h"
-#include "libavutil/mem.h"
+#include "libavutil/avassert.h"
 #include "libavutil/mem_internal.h"
 
 #include "audiodsp.h"
 #include "ac3enc.h"
 #include "eac3enc.h"
 
-
-static int allocate_sample_buffers(AC3EncodeContext *s)
-{
-    int ch;
-
-    if (!FF_ALLOC_TYPED_ARRAY(s->windowed_samples, AC3_WINDOW_SIZE) ||
-        !FF_ALLOCZ_TYPED_ARRAY(s->planar_samples,  s->channels))
-        return AVERROR(ENOMEM);
-
-    for (ch = 0; ch < s->channels; ch++) {
-        if (!(s->planar_samples[ch] = av_mallocz((AC3_FRAME_SIZE + AC3_BLOCK_SIZE) *
-                                                  sizeof(**s->planar_samples))))
-            return AVERROR(ENOMEM);
-    }
-    return 0;
-}
-
-
-/*
- * Copy input samples.
- * Channels are reordered from FFmpeg's default order to AC-3 order.
- */
-static void copy_input_samples(AC3EncodeContext *s, SampleType **samples)
-{
-    int ch;
-
-    /* copy and remap input samples */
-    for (ch = 0; ch < s->channels; ch++) {
-        /* copy last 256 samples of previous frame to the start of the current frame */
-        memcpy(&s->planar_samples[ch][0], &s->planar_samples[ch][AC3_BLOCK_SIZE * s->num_blocks],
-               AC3_BLOCK_SIZE * sizeof(s->planar_samples[0][0]));
-
-        /* copy new samples for current frame */
-        memcpy(&s->planar_samples[ch][AC3_BLOCK_SIZE],
-               samples[s->channel_map[ch]],
-               AC3_BLOCK_SIZE * s->num_blocks * sizeof(s->planar_samples[0][0]));
-    }
-}
-
+#if AC3ENC_FLOAT
+#define RENAME(element) element ## _float
+#else
+#define RENAME(element) element ## _fixed
+#endif
 
 /*
  * Apply the MDCT to input samples to generate frequency coefficients.
  * This applies the KBD window and normalizes the input to reduce precision
  * loss due to fixed-point calculations.
  */
-static void apply_mdct(AC3EncodeContext *s)
+static void apply_mdct(AC3EncodeContext *s, uint8_t * const *samples)
 {
-    int blk, ch;
+    av_assert1(s->num_blocks > 0);
+
+    for (int ch = 0; ch < s->channels; ch++) {
+        const SampleType *input_samples0 = (const SampleType*)s->planar_samples[ch];
+        /* Reorder channels from native order to AC-3 order. */
+        const SampleType *input_samples1 = (const SampleType*)samples[s->channel_map[ch]];
+        int blk = 0;
 
-    for (ch = 0; ch < s->channels; ch++) {
-        for (blk = 0; blk < s->num_blocks; blk++) {
+        do {
             AC3Block *block = &s->blocks[blk];
-            const SampleType *input_samples = &s->planar_samples[ch][blk * AC3_BLOCK_SIZE];
+            SampleType *windowed_samples = s->RENAME(windowed_samples);
 
-            s->fdsp->vector_fmul(s->windowed_samples, input_samples,
-                                 s->mdct_window, AC3_BLOCK_SIZE);
-            s->fdsp->vector_fmul_reverse(s->windowed_samples + AC3_BLOCK_SIZE,
-                                         &input_samples[AC3_BLOCK_SIZE],
-                                         s->mdct_window, AC3_BLOCK_SIZE);
+            s->fdsp->vector_fmul(windowed_samples, input_samples0,
+                                 s->RENAME(mdct_window), AC3_BLOCK_SIZE);
+            s->fdsp->vector_fmul_reverse(windowed_samples + AC3_BLOCK_SIZE,
+                                         input_samples1,
+                                         s->RENAME(mdct_window), AC3_BLOCK_SIZE);
 
             s->tx_fn(s->tx, block->mdct_coef[ch+1],
-                     s->windowed_samples, sizeof(float));
-        }
+                     windowed_samples, sizeof(*windowed_samples));
+            input_samples0  = input_samples1;
+            input_samples1 += AC3_BLOCK_SIZE;
+        } while (++blk < s->num_blocks);
+
+        /* Store last 256 samples of current frame */
+        memcpy(s->planar_samples[ch], input_samples0,
+               AC3_BLOCK_SIZE * sizeof(*input_samples0));
     }
 }
 
@@ -371,24 +347,9 @@ static void compute_rematrixing_strategy(AC3EncodeContext *s)
 }
 
 
-int AC3_NAME(encode_frame)(AVCodecContext *avctx, AVPacket *avpkt,
-                           const AVFrame *frame, int *got_packet_ptr)
+static void encode_frame(AC3EncodeContext *s, uint8_t * const *samples)
 {
-    AC3EncodeContext *s = avctx->priv_data;
-    int ret;
-
-    if (s->options.allow_per_frame_metadata) {
-        ret = ff_ac3_validate_metadata(s);
-        if (ret)
-            return ret;
-    }
-
-    if (s->bit_alloc.sr_code == 1 || (AC3ENC_FLOAT && s->eac3))
-        ff_ac3_adjust_frame_size(s);
-
-    copy_input_samples(s, (SampleType **)frame->extended_data);
-
-    apply_mdct(s);
+    apply_mdct(s, samples);
 
     s->cpl_on = s->cpl_enabled;
     ff_ac3_compute_coupling_strategy(s);
@@ -401,6 +362,4 @@ int AC3_NAME(encode_frame)(AVCodecContext *avctx, AVPacket *avpkt,
 #if AC3ENC_FLOAT
     scale_coefficients(s);
 #endif
-
-    return ff_ac3_encode_frame_common_end(avctx, avpkt, frame, got_packet_ptr);
 }
diff --git a/libavcodec/adts_header.c b/libavcodec/adts_header.c
index 00fa0a5a99..0d6be1aa0e 100644
--- a/libavcodec/adts_header.c
+++ b/libavcodec/adts_header.c
@@ -21,11 +21,11 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include "aac_ac3_parser.h"
 #include "adts_header.h"
 #include "adts_parser.h"
 #include "get_bits.h"
 #include "mpeg4audio.h"
+#include "libavutil/avassert.h"
 
 int ff_adts_header_parse(GetBitContext *gbc, AACADTSHeaderInfo *hdr)
 {
@@ -35,7 +35,7 @@ int ff_adts_header_parse(GetBitContext *gbc, AACADTSHeaderInfo *hdr)
     memset(hdr, 0, sizeof(*hdr));
 
     if (get_bits(gbc, 12) != 0xfff)
-        return AAC_AC3_PARSE_ERROR_SYNC;
+        return AAC_PARSE_ERROR_SYNC;
 
     skip_bits1(gbc);             /* id */
     skip_bits(gbc, 2);           /* layer */
@@ -43,7 +43,7 @@ int ff_adts_header_parse(GetBitContext *gbc, AACADTSHeaderInfo *hdr)
     aot     = get_bits(gbc, 2);  /* profile_objecttype */
     sr      = get_bits(gbc, 4);  /* sample_frequency_index */
     if (!ff_mpeg4audio_sample_rates[sr])
-        return AAC_AC3_PARSE_ERROR_SAMPLE_RATE;
+        return AAC_PARSE_ERROR_SAMPLE_RATE;
     skip_bits1(gbc);             /* private_bit */
     ch = get_bits(gbc, 3);       /* channel_configuration */
 
@@ -55,7 +55,7 @@ int ff_adts_header_parse(GetBitContext *gbc, AACADTSHeaderInfo *hdr)
     skip_bits1(gbc);             /* copyright_identification_start */
     size = get_bits(gbc, 13);    /* aac_frame_length */
     if (size < AV_AAC_ADTS_HEADER_SIZE)
-        return AAC_AC3_PARSE_ERROR_FRAME_SIZE;
+        return AAC_PARSE_ERROR_FRAME_SIZE;
 
     skip_bits(gbc, 11);          /* adts_buffer_fullness */
     rdb = get_bits(gbc, 2);      /* number_of_raw_data_blocks_in_frame */
@@ -72,3 +72,12 @@ int ff_adts_header_parse(GetBitContext *gbc, AACADTSHeaderInfo *hdr)
 
     return size;
 }
+
+int ff_adts_header_parse_buf(const uint8_t buf[AV_AAC_ADTS_HEADER_SIZE + AV_INPUT_BUFFER_PADDING_SIZE],
+                             AACADTSHeaderInfo *hdr)
+{
+    GetBitContext gb;
+    av_unused int ret = init_get_bits8(&gb, buf, AV_AAC_ADTS_HEADER_SIZE);
+    av_assert1(ret >= 0);
+    return ff_adts_header_parse(&gb, hdr);
+}
diff --git a/libavcodec/adts_header.h b/libavcodec/adts_header.h
index 354d07e1f8..49bb74ee22 100644
--- a/libavcodec/adts_header.h
+++ b/libavcodec/adts_header.h
@@ -23,7 +23,14 @@
 #ifndef AVCODEC_ADTS_HEADER_H
 #define AVCODEC_ADTS_HEADER_H
 
-#include "get_bits.h"
+#include "adts_parser.h"
+#include "defs.h"
+
+typedef enum {
+    AAC_PARSE_ERROR_SYNC        = -0x1030c0a,
+    AAC_PARSE_ERROR_SAMPLE_RATE = -0x3030c0a,
+    AAC_PARSE_ERROR_FRAME_SIZE  = -0x4030c0a,
+} AACParseError;
 
 typedef struct AACADTSHeaderInfo {
     uint32_t sample_rate;
@@ -37,16 +44,24 @@ typedef struct AACADTSHeaderInfo {
     uint32_t frame_length;
 } AACADTSHeaderInfo;
 
+struct GetBitContext;
+
 /**
  * Parse the ADTS frame header to the end of the variable header, which is
  * the first 54 bits.
  * @param[in]  gbc BitContext containing the first 54 bits of the frame.
  * @param[out] hdr Pointer to struct where header info is written.
- * @return Returns 0 on success, -1 if there is a sync word mismatch,
- * -2 if the version element is invalid, -3 if the sample rate
- * element is invalid, or -4 if the bit rate element is invalid.
+ * @return the size in bytes of the header parsed on success and
+ *         AAC_PARSE_ERROR_* values otherwise.
+ */
+int ff_adts_header_parse(struct GetBitContext *gbc, AACADTSHeaderInfo *hdr);
+
+/**
+ * Wrapper around ff_adts_header_parse() for users that don't already have
+ * a suitable GetBitContext.
  */
-int ff_adts_header_parse(GetBitContext *gbc, AACADTSHeaderInfo *hdr);
+int ff_adts_header_parse_buf(const uint8_t buf[AV_AAC_ADTS_HEADER_SIZE + AV_INPUT_BUFFER_PADDING_SIZE],
+                             AACADTSHeaderInfo *hdr);
 
 /**
  * Parse the ADTS frame header contained in the buffer, which is
@@ -56,9 +71,8 @@ int ff_adts_header_parse(GetBitContext *gbc, AACADTSHeaderInfo *hdr);
  * @param[out] phdr Pointer to pointer to struct AACADTSHeaderInfo for which
  * memory is allocated and header info is written into it. After using the header
  * information, the allocated memory must be freed by using av_free.
- * @return Returns 0 on success, -1 if there is a sync word mismatch,
- * -2 if the version element is invalid, -3 if the sample rate
- * element is invalid, or -4 if the bit rate element is invalid.
+ * @return 0 on success, AAC_PARSE_ERROR_* values on invalid input and
+ *         ordinary AVERROR codes otherwise.
  */
 int avpriv_adts_header_parse(AACADTSHeaderInfo **phdr, const uint8_t *buf, size_t size);
 
diff --git a/libavcodec/adts_parser.c b/libavcodec/adts_parser.c
index 6c22c86ef2..66b988d6f6 100644
--- a/libavcodec/adts_parser.c
+++ b/libavcodec/adts_parser.c
@@ -20,7 +20,9 @@
 
 #include <stddef.h>
 #include <stdint.h>
+#include <string.h>
 
+#include "libavutil/error.h"
 #include "libavutil/mem.h"
 #include "adts_header.h"
 #include "adts_parser.h"
@@ -28,12 +30,13 @@
 int av_adts_header_parse(const uint8_t *buf, uint32_t *samples, uint8_t *frames)
 {
 #if CONFIG_ADTS_HEADER
-    GetBitContext gb;
+    uint8_t tmpbuf[AV_AAC_ADTS_HEADER_SIZE + AV_INPUT_BUFFER_PADDING_SIZE];
     AACADTSHeaderInfo hdr;
-    int err = init_get_bits8(&gb, buf, AV_AAC_ADTS_HEADER_SIZE);
-    if (err < 0)
-        return err;
-    err = ff_adts_header_parse(&gb, &hdr);
+    int err;
+    if (!buf)
+        return AVERROR(EINVAL);
+    memcpy(tmpbuf, buf, AV_AAC_ADTS_HEADER_SIZE);
+    err = ff_adts_header_parse_buf(tmpbuf, &hdr);
     if (err < 0)
         return err;
     *samples = hdr.samples;
@@ -49,7 +52,6 @@ int avpriv_adts_header_parse(AACADTSHeaderInfo **phdr, const uint8_t *buf, size_
 #if CONFIG_ADTS_HEADER
     int ret = 0;
     int allocated = 0;
-    GetBitContext gb;
 
     if (!phdr || !buf || size < AV_AAC_ADTS_HEADER_SIZE)
         return AVERROR_INVALIDDATA;
@@ -61,14 +63,7 @@ int avpriv_adts_header_parse(AACADTSHeaderInfo **phdr, const uint8_t *buf, size_
     if (!*phdr)
         return AVERROR(ENOMEM);
 
-    ret = init_get_bits8(&gb, buf, AV_AAC_ADTS_HEADER_SIZE);
-    if (ret < 0) {
-        if (allocated)
-            av_freep(phdr);
-        return ret;
-    }
-
-    ret = ff_adts_header_parse(&gb, *phdr);
+    ret = ff_adts_header_parse_buf(buf, *phdr);
     if (ret < 0) {
         if (allocated)
             av_freep(phdr);
diff --git a/libavcodec/aic.c b/libavcodec/aic.c
index 440c399049..3ff170b414 100644
--- a/libavcodec/aic.c
+++ b/libavcodec/aic.c
@@ -393,8 +393,6 @@ static int aic_decode_frame(AVCodecContext *avctx, AVFrame *frame,
     int slice_size;
 
     ctx->frame            = frame;
-    ctx->frame->pict_type = AV_PICTURE_TYPE_I;
-    ctx->frame->flags |= AV_FRAME_FLAG_KEY;
 
     off = FFALIGN(AIC_HDR_SIZE + ctx->num_x_slices * ctx->mb_height * 2, 4);
 
diff --git a/libavcodec/aliaspixdec.c b/libavcodec/aliaspixdec.c
index 72f810d408..50a6b72a0a 100644
--- a/libavcodec/aliaspixdec.c
+++ b/libavcodec/aliaspixdec.c
@@ -69,9 +69,6 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *f,
     if (ret < 0)
         return ret;
 
-    f->pict_type = AV_PICTURE_TYPE_I;
-    f->flags |= AV_FRAME_FLAG_KEY;
-
     x = 0;
     y = 1;
     out_buf = f->data[0];
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index f4705651fb..b102a8069e 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -28,7 +28,6 @@
 #include <string.h>
 
 #include "config.h"
-#include "config_components.h"
 #include "libavutil/thread.h"
 #include "codec.h"
 #include "codec_id.h"
@@ -805,15 +804,7 @@ extern const FFCodec ff_libvpx_vp9_decoder;
 extern const FFCodec ff_libwebp_anim_encoder;
 extern const FFCodec ff_libwebp_encoder;
 extern const FFCodec ff_libx262_encoder;
-#if CONFIG_LIBX264_ENCODER
-#include <x264.h>
-#if X264_BUILD < 153
-#define LIBX264_CONST
-#else
-#define LIBX264_CONST const
-#endif
-extern LIBX264_CONST FFCodec ff_libx264_encoder;
-#endif
+extern const FFCodec ff_libx264_encoder;
 extern const FFCodec ff_libx264rgb_encoder;
 extern FFCodec ff_libx265_encoder;
 extern const FFCodec ff_libxeve_encoder;
diff --git a/libavcodec/amrwbdec.c b/libavcodec/amrwbdec.c
index 9d75b972fa..21a730b835 100644
--- a/libavcodec/amrwbdec.c
+++ b/libavcodec/amrwbdec.c
@@ -26,6 +26,7 @@
 
 #include "config.h"
 
+#include "libavutil/avassert.h"
 #include "libavutil/channel_layout.h"
 #include "libavutil/common.h"
 #include "libavutil/lfg.h"
@@ -554,6 +555,8 @@ static void decode_fixed_vector(float *fixed_vector, const uint16_t *pulse_hi,
             decode_6p_track(sig_pos[i], (int) pulse_lo[i] +
                            ((int) pulse_hi[i] << 11), 4, 1);
         break;
+    default:
+        av_assert2(0);
     }
 
     memset(fixed_vector, 0, sizeof(float) * AMRWB_SFR_SIZE);
diff --git a/libavcodec/arm/ac3dsp_neon.S b/libavcodec/arm/ac3dsp_neon.S
index 89d0ae8048..dc829541aa 100644
--- a/libavcodec/arm/ac3dsp_neon.S
+++ b/libavcodec/arm/ac3dsp_neon.S
@@ -20,25 +20,6 @@
 
 #include "libavutil/arm/asm.S"
 
-function ff_ac3_max_msb_abs_int16_neon, export=1
-        vmov.i16        q0,  #0
-        vmov.i16        q2,  #0
-1:      vld1.16         {q1},     [r0,:128]!
-        vabs.s16        q1,  q1
-        vld1.16         {q3},     [r0,:128]!
-        vabs.s16        q3,  q3
-        vorr            q0,  q0,  q1
-        vorr            q2,  q2,  q3
-        subs            r1,  r1,  #16
-        bgt             1b
-        vorr            q0,  q0,  q2
-        vorr            d0,  d0,  d1
-        vpmax.u16       d0,  d0,  d0
-        vpmax.u16       d0,  d0,  d0
-        vmov.u16        r0,  d0[0]
-        bx              lr
-endfunc
-
 function ff_ac3_exponent_min_neon, export=1
         cmp             r1,  #0
         it              eq
@@ -59,27 +40,6 @@ function ff_ac3_exponent_min_neon, export=1
         pop             {pc}
 endfunc
 
-function ff_ac3_lshift_int16_neon, export=1
-        vdup.16         q0,  r2
-1:      vld1.16         {q1},     [r0,:128]
-        vshl.s16        q1,  q1,  q0
-        vst1.16         {q1},     [r0,:128]!
-        subs            r1,  r1,  #8
-        bgt             1b
-        bx              lr
-endfunc
-
-function ff_ac3_rshift_int32_neon, export=1
-        rsb             r2,  r2,  #0
-        vdup.32         q0,  r2
-1:      vld1.32         {q1},     [r0,:128]
-        vshl.s32        q1,  q1,  q0
-        vst1.32         {q1},     [r0,:128]!
-        subs            r1,  r1,  #4
-        bgt             1b
-        bx              lr
-endfunc
-
 function ff_float_to_fixed24_neon, export=1
 1:      vld1.32         {q0-q1},  [r1,:128]!
         vcvt.s32.f32    q0,  q0,  #24
@@ -109,29 +69,6 @@ function ff_ac3_extract_exponents_neon, export=1
         bx              lr
 endfunc
 
-function ff_apply_window_int16_neon, export=1
-        push            {r4,lr}
-        add             r4,  r1,  r3,  lsl #1
-        add             lr,  r0,  r3,  lsl #1
-        sub             r4,  r4,  #16
-        sub             lr,  lr,  #16
-        mov             r12, #-16
-1:
-        vld1.16         {q0},     [r1,:128]!
-        vld1.16         {q2},     [r2,:128]!
-        vld1.16         {q1},     [r4,:128], r12
-        vrev64.16       q3,  q2
-        vqrdmulh.s16    q0,  q0,  q2
-        vqrdmulh.s16    d2,  d2,  d7
-        vqrdmulh.s16    d3,  d3,  d6
-        vst1.16         {q0},     [r0,:128]!
-        vst1.16         {q1},     [lr,:128], r12
-        subs            r3,  r3,  #16
-        bgt             1b
-
-        pop             {r4,pc}
-endfunc
-
 function ff_ac3_sum_square_butterfly_int32_neon, export=1
         vmov.i64        q0,  #0
         vmov.i64        q1,  #0
diff --git a/libavcodec/ass.c b/libavcodec/ass.c
index 5ff4ac8e07..35861139ef 100644
--- a/libavcodec/ass.c
+++ b/libavcodec/ass.c
@@ -35,15 +35,15 @@ int ff_ass_subtitle_header_full(AVCodecContext *avctx,
                                 int border_style, int alignment)
 {
     avctx->subtitle_header = av_asprintf(
-             "[Script Info]\r\n"
-             "; Script generated by FFmpeg/Lavc%s\r\n"
-             "ScriptType: v4.00+\r\n"
-             "PlayResX: %d\r\n"
-             "PlayResY: %d\r\n"
-             "ScaledBorderAndShadow: yes\r\n"
-             "YCbCr Matrix: None\r\n"
-             "\r\n"
-             "[V4+ Styles]\r\n"
+             "[Script Info]\n"
+             "; Script generated by FFmpeg/Lavc%s\n"
+             "ScriptType: v4.00+\n"
+             "PlayResX: %d\n"
+             "PlayResY: %d\n"
+             "ScaledBorderAndShadow: yes\n"
+             "YCbCr Matrix: None\n"
+             "\n"
+             "[V4+ Styles]\n"
 
              /* ASS (v4+) header */
              "Format: Name, "
@@ -54,7 +54,7 @@ int ff_ass_subtitle_header_full(AVCodecContext *avctx,
              "Spacing, Angle, "
              "BorderStyle, Outline, Shadow, "
              "Alignment, MarginL, MarginR, MarginV, "
-             "Encoding\r\n"
+             "Encoding\n"
 
              "Style: "
              "Default,"             /* Name */
@@ -65,11 +65,11 @@ int ff_ass_subtitle_header_full(AVCodecContext *avctx,
              "0,0,"                 /* Spacing, Angle */
              "%d,1,0,"              /* BorderStyle, Outline, Shadow */
              "%d,10,10,10,"         /* Alignment, Margin[LRV] */
-             "1\r\n"                /* Encoding */
+             "1\n"                  /* Encoding */
 
-             "\r\n"
-             "[Events]\r\n"
-             "Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\r\n",
+             "\n"
+             "[Events]\n"
+             "Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n",
              !(avctx->flags & AV_CODEC_FLAG_BITEXACT) ? AV_STRINGIFY(LIBAVCODEC_VERSION) : "",
              play_res_x, play_res_y, font, font_size,
              primary_color, secondary_color, outline_color, back_color,
@@ -181,10 +181,21 @@ void ff_ass_bprint_text_event(AVBPrint *buf, const char *p, int size,
         if (linebreaks && strchr(linebreaks, *p)) {
             av_bprintf(buf, "\\N");
 
-        /* standard ASS escaping so random characters don't get mis-interpreted
-         * as ASS */
-        } else if (!keep_ass_markup && strchr("{}\\", *p)) {
-            av_bprintf(buf, "\\%c", *p);
+        /* cancel curly brackets to avoid bogus override tag blocks
+         * hiding text. Standard ASS has no character escapes,
+         * though (only) libass provides \{ and \}.
+         * Unpaired closing brackets don't need escaping at all though and
+         * to make the situation less bad in standard ASS insert an empty block
+         */
+        } else if (!keep_ass_markup && *p == '{') {
+            av_bprintf(buf, "\\{{}");
+
+        /* append word-joiner U+2060 as UTF-8 to break up sequences like \N */
+        } else if (!keep_ass_markup && *p == '\\') {
+            if (p_end - p <= 3 || strncmp(p + 1, "\xe2\x81\xa0", 3))
+                av_bprintf(buf, "\\\xe2\x81\xa0");
+            else
+                av_bprintf(buf, "\\");
 
         /* some packets might end abruptly (no \0 at the end, like for example
          * in some cases of demuxing from a classic video container), some
diff --git a/libavcodec/asvdec.c b/libavcodec/asvdec.c
index 568881ccd2..5abe279f35 100644
--- a/libavcodec/asvdec.c
+++ b/libavcodec/asvdec.c
@@ -244,8 +244,6 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p,
 
     if ((ret = ff_get_buffer(avctx, p, 0)) < 0)
         return ret;
-    p->pict_type = AV_PICTURE_TYPE_I;
-    p->flags |= AV_FRAME_FLAG_KEY;
 
     if (avctx->codec_id == AV_CODEC_ID_ASV1) {
         av_fast_padded_malloc(&a->bitstream_buffer, &a->bitstream_buffer_size,
diff --git a/libavcodec/atrac9dec.c b/libavcodec/atrac9dec.c
index df68407af9..e375f46fd0 100644
--- a/libavcodec/atrac9dec.c
+++ b/libavcodec/atrac9dec.c
@@ -802,7 +802,9 @@ static int atrac9_decode_frame(AVCodecContext *avctx, AVFrame *frame,
     if (ret < 0)
         return ret;
 
-    init_get_bits8(&gb, avpkt->data, avpkt->size);
+    ret = init_get_bits8(&gb, avpkt->data, avpkt->size);
+    if (ret < 0)
+        return ret;
 
     for (int i = 0; i < frames; i++) {
         for (int j = 0; j < s->block_config->count; j++) {
@@ -922,7 +924,9 @@ static av_cold int atrac9_decode_init(AVCodecContext *avctx)
         return AVERROR_INVALIDDATA;
     }
 
-    init_get_bits8(&gb, avctx->extradata + 4, avctx->extradata_size);
+    err = init_get_bits8(&gb, avctx->extradata + 4, avctx->extradata_size);
+    if (err < 0)
+        return err;
 
     if (get_bits(&gb, 8) != 0xFE) {
         av_log(avctx, AV_LOG_ERROR, "Incorrect magic byte!\n");
diff --git a/libavcodec/av1dec.c b/libavcodec/av1dec.c
index 824725c031..dd3c7f4734 100644
--- a/libavcodec/av1dec.c
+++ b/libavcodec/av1dec.c
@@ -38,8 +38,8 @@
 #include "itut35.h"
 #include "hwconfig.h"
 #include "profiles.h"
+#include "progressframe.h"
 #include "refstruct.h"
-#include "thread.h"
 
 /**< same with Div_Lut defined in spec 7.11.3.7 */
 static const uint16_t div_lut[AV1_DIV_LUT_NUM] = {
@@ -359,6 +359,30 @@ static void coded_lossless_param(AV1DecContext *s)
     }
 }
 
+static void order_hint_info(AV1DecContext *s)
+{
+    const AV1RawFrameHeader *header = s->raw_frame_header;
+    const AV1RawSequenceHeader *seq = s->raw_seq;
+    AV1Frame *frame = &s->cur_frame;
+
+    frame->order_hint = header->order_hint;
+
+    for (int i = 0; i < AV1_REFS_PER_FRAME; i++) {
+        int ref_name = i + AV1_REF_FRAME_LAST;
+        int ref_slot = header->ref_frame_idx[i];
+        int ref_order_hint = s->ref[ref_slot].order_hint;
+
+        frame->order_hints[ref_name] = ref_order_hint;
+        if (!seq->enable_order_hint) {
+            frame->ref_frame_sign_bias[ref_name] = 0;
+        } else {
+            frame->ref_frame_sign_bias[ref_name] =
+                get_relative_dist(seq, ref_order_hint,
+                                  frame->order_hint) > 0;
+        }
+    }
+}
+
 static void load_grain_params(AV1DecContext *s)
 {
     const AV1RawFrameHeader *header = s->raw_frame_header;
@@ -445,7 +469,7 @@ static int get_tiles_info(AVCodecContext *avctx, const AV1RawTileGroup *tile_gro
 static enum AVPixelFormat get_sw_pixel_format(void *logctx,
                                               const AV1RawSequenceHeader *seq)
 {
-    uint8_t bit_depth;
+    int bit_depth;
     enum AVPixelFormat pix_fmt = AV_PIX_FMT_NONE;
 
     if (seq->seq_profile == 2 && seq->color_config.high_bitdepth)
@@ -469,7 +493,7 @@ static enum AVPixelFormat get_sw_pixel_format(void *logctx,
             else if (bit_depth == 12)
                 pix_fmt = AV_PIX_FMT_YUV444P12;
             else
-                av_log(logctx, AV_LOG_WARNING, "Unknown AV1 pixel format.\n");
+                av_assert0(0);
         } else if (seq->color_config.subsampling_x == 1 &&
                    seq->color_config.subsampling_y == 0) {
             if (bit_depth == 8)
@@ -479,7 +503,7 @@ static enum AVPixelFormat get_sw_pixel_format(void *logctx,
             else if (bit_depth == 12)
                 pix_fmt = AV_PIX_FMT_YUV422P12;
             else
-                av_log(logctx, AV_LOG_WARNING, "Unknown AV1 pixel format.\n");
+                av_assert0(0);
         } else if (seq->color_config.subsampling_x == 1 &&
                    seq->color_config.subsampling_y == 1) {
             if (bit_depth == 8)
@@ -489,7 +513,7 @@ static enum AVPixelFormat get_sw_pixel_format(void *logctx,
             else if (bit_depth == 12)
                 pix_fmt = AV_PIX_FMT_YUV420P12;
             else
-                av_log(logctx, AV_LOG_WARNING, "Unknown AV1 pixel format.\n");
+                av_assert0(0);
         }
     } else {
         if (bit_depth == 8)
@@ -499,7 +523,7 @@ static enum AVPixelFormat get_sw_pixel_format(void *logctx,
         else if (bit_depth == 12)
             pix_fmt = AV_PIX_FMT_GRAY12;
         else
-            av_log(logctx, AV_LOG_WARNING, "Unknown AV1 pixel format.\n");
+            av_assert0(0);
     }
 
     return pix_fmt;
@@ -653,7 +677,7 @@ static int get_pixel_format(AVCodecContext *avctx)
 
 static void av1_frame_unref(AV1Frame *f)
 {
-    av_frame_unref(f->f);
+    ff_progress_frame_unref(&f->pf);
     ff_refstruct_unref(&f->hwaccel_picture_private);
     ff_refstruct_unref(&f->header_ref);
     f->raw_frame_header = NULL;
@@ -664,20 +688,15 @@ static void av1_frame_unref(AV1Frame *f)
     f->coded_lossless = 0;
 }
 
-static int av1_frame_ref(AVCodecContext *avctx, AV1Frame *dst, const AV1Frame *src)
+static void av1_frame_replace(AV1Frame *dst, const AV1Frame *src)
 {
-    int ret;
+    av_assert1(dst != src);
 
     ff_refstruct_replace(&dst->header_ref, src->header_ref);
 
     dst->raw_frame_header = src->raw_frame_header;
 
-    if (!src->f->buf[0])
-        return 0;
-
-    ret = av_frame_ref(dst->f, src->f);
-    if (ret < 0)
-        goto fail;
+    ff_progress_frame_replace(&dst->pf, &src->pf);
 
     ff_refstruct_replace(&dst->hwaccel_picture_private,
                           src->hwaccel_picture_private);
@@ -701,11 +720,13 @@ static int av1_frame_ref(AVCodecContext *avctx, AV1Frame *dst, const AV1Frame *s
            sizeof(dst->film_grain));
     dst->coded_lossless = src->coded_lossless;
 
-    return 0;
+    dst->order_hint = src->order_hint;
+    memcpy(dst->ref_frame_sign_bias, src->ref_frame_sign_bias,
+           sizeof(dst->ref_frame_sign_bias));
+    memcpy(dst->order_hints, src->order_hints,
+           sizeof(dst->order_hints));
 
-fail:
-    av1_frame_unref(dst);
-    return AVERROR(ENOMEM);
+    dst->force_integer_mv = src->force_integer_mv;
 }
 
 static av_cold int av1_decode_free(AVCodecContext *avctx)
@@ -713,16 +734,9 @@ static av_cold int av1_decode_free(AVCodecContext *avctx)
     AV1DecContext *s = avctx->priv_data;
     AV1RawMetadataITUTT35 itut_t35;
 
-    for (int i = 0; i < FF_ARRAY_ELEMS(s->ref); i++) {
-        if (s->ref[i].f) {
-            av1_frame_unref(&s->ref[i]);
-            av_frame_free(&s->ref[i].f);
-        }
-    }
-    if (s->cur_frame.f) {
-        av1_frame_unref(&s->cur_frame);
-        av_frame_free(&s->cur_frame.f);
-    }
+    for (int i = 0; i < FF_ARRAY_ELEMS(s->ref); i++)
+        av1_frame_unref(&s->ref[i]);
+    av1_frame_unref(&s->cur_frame);
     av_buffer_unref(&s->seq_data_ref);
     ff_refstruct_unref(&s->seq_ref);
     ff_refstruct_unref(&s->header_ref);
@@ -838,16 +852,6 @@ static av_cold int av1_decode_init(AVCodecContext *avctx)
     s->pkt = avctx->internal->in_pkt;
     s->pix_fmt = AV_PIX_FMT_NONE;
 
-    for (int i = 0; i < FF_ARRAY_ELEMS(s->ref); i++) {
-        s->ref[i].f = av_frame_alloc();
-        if (!s->ref[i].f)
-            return AVERROR(ENOMEM);
-    }
-
-    s->cur_frame.f = av_frame_alloc();
-    if (!s->cur_frame.f)
-        return AVERROR(ENOMEM);
-
     ret = ff_cbs_init(&s->cbc, AV_CODEC_ID_AV1, avctx);
     if (ret < 0)
         return ret;
@@ -888,10 +892,10 @@ static av_cold int av1_decode_init(AVCodecContext *avctx)
     }
 
     s->dovi.logctx = avctx;
-    s->dovi.dv_profile = 10; // default for AV1
+    s->dovi.cfg.dv_profile = 10; // default for AV1
     sd = ff_get_coded_side_data(avctx, AV_PKT_DATA_DOVI_CONF);
-    if (sd && sd->size > 0)
-        ff_dovi_update_cfg(&s->dovi, (AVDOVIDecoderConfigurationRecord *) sd->data);
+    if (sd && sd->size >= sizeof(s->dovi.cfg))
+        s->dovi.cfg = *(AVDOVIDecoderConfigurationRecord *) sd->data;
 
     return ret;
 }
@@ -909,7 +913,8 @@ static int av1_frame_alloc(AVCodecContext *avctx, AV1Frame *f)
         return ret;
     }
 
-    if ((ret = ff_thread_get_buffer(avctx, f->f, AV_GET_BUFFER_FLAG_REF)) < 0)
+    ret = ff_progress_frame_get_buffer(avctx, &f->pf, AV_GET_BUFFER_FLAG_REF);
+    if (ret < 0)
         goto fail;
 
     frame = f->f;
@@ -1186,23 +1191,15 @@ FF_ENABLE_DEPRECATION_WARNINGS
     return 0;
 }
 
-static int update_reference_list(AVCodecContext *avctx)
+static void update_reference_list(AVCodecContext *avctx)
 {
     AV1DecContext *s = avctx->priv_data;
     const AV1RawFrameHeader *header = s->raw_frame_header;
-    int ret;
 
     for (int i = 0; i < AV1_NUM_REF_FRAMES; i++) {
-        if (header->refresh_frame_flags & (1 << i)) {
-            av1_frame_unref(&s->ref[i]);
-            if ((ret = av1_frame_ref(avctx, &s->ref[i], &s->cur_frame)) < 0) {
-                av_log(avctx, AV_LOG_ERROR,
-                       "Failed to update frame %d in reference list\n", i);
-                return ret;
-            }
-        }
+        if (header->refresh_frame_flags & (1 << i))
+            av1_frame_replace(&s->ref[i], &s->cur_frame);
     }
-    return 0;
 }
 
 static int get_current_frame(AVCodecContext *avctx)
@@ -1257,8 +1254,14 @@ static int get_current_frame(AVCodecContext *avctx)
     global_motion_params(s);
     skip_mode_params(s);
     coded_lossless_param(s);
+    order_hint_info(s);
     load_grain_params(s);
 
+    s->cur_frame.force_integer_mv =
+        s->raw_frame_header->force_integer_mv ||
+        s->raw_frame_header->frame_type == AV1_FRAME_KEY ||
+        s->raw_frame_header->frame_type == AV1_FRAME_INTRA_ONLY;
+
     return ret;
 }
 
@@ -1330,29 +1333,22 @@ static int av1_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
                 s->raw_frame_header = &obu->obu.frame_header;
 
             if (s->raw_frame_header->show_existing_frame) {
-                av1_frame_unref(&s->cur_frame);
-
-                ret = av1_frame_ref(avctx, &s->cur_frame,
-                                    &s->ref[s->raw_frame_header->frame_to_show_map_idx]);
-                if (ret < 0) {
-                    av_log(avctx, AV_LOG_ERROR, "Failed to get reference frame.\n");
-                    goto end;
-                }
+                av1_frame_replace(&s->cur_frame,
+                                  &s->ref[s->raw_frame_header->frame_to_show_map_idx]);
 
-                ret = update_reference_list(avctx);
-                if (ret < 0) {
-                    av_log(avctx, AV_LOG_ERROR, "Failed to update reference list.\n");
-                    goto end;
-                }
+                update_reference_list(avctx);
 
-                if (s->cur_frame.f->buf[0]) {
+                if (s->cur_frame.f) {
                     ret = set_output_frame(avctx, frame);
-                    if (ret < 0)
+                    if (ret < 0) {
                         av_log(avctx, AV_LOG_ERROR, "Set output frame error.\n");
+                        goto end;
+                    }
                 }
 
                 s->raw_frame_header = NULL;
                 i++;
+                ret = 0;
 
                 goto end;
             }
@@ -1366,7 +1362,7 @@ static int av1_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
             s->cur_frame.spatial_id  = header->spatial_id;
             s->cur_frame.temporal_id = header->temporal_id;
 
-            if (avctx->hwaccel && s->cur_frame.f->buf[0]) {
+            if (avctx->hwaccel && s->cur_frame.f) {
                 ret = FF_HW_CALL(avctx, start_frame, unit->data, unit->data_size);
                 if (ret < 0) {
                     av_log(avctx, AV_LOG_ERROR, "HW accel start frame fail.\n");
@@ -1392,7 +1388,7 @@ static int av1_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
             if (ret < 0)
                 goto end;
 
-            if (avctx->hwaccel && s->cur_frame.f->buf[0]) {
+            if (avctx->hwaccel && s->cur_frame.f) {
                 ret = FF_HW_CALL(avctx, decode_slice, raw_tile_group->tile_data.data,
                                  raw_tile_group->tile_data.data_size);
                 if (ret < 0) {
@@ -1443,7 +1439,7 @@ static int av1_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
 
         if (raw_tile_group && (s->tile_num == raw_tile_group->tg_end + 1)) {
             int show_frame = s->raw_frame_header->show_frame;
-            if (avctx->hwaccel && s->cur_frame.f->buf[0]) {
+            if (avctx->hwaccel && s->cur_frame.f) {
                 ret = FF_HW_SIMPLE_CALL(avctx, end_frame);
                 if (ret < 0) {
                     av_log(avctx, AV_LOG_ERROR, "HW accel end frame fail.\n");
@@ -1451,23 +1447,22 @@ static int av1_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
                 }
             }
 
-            ret = update_reference_list(avctx);
-            if (ret < 0) {
-                av_log(avctx, AV_LOG_ERROR, "Failed to update reference list.\n");
-                goto end;
-            }
+            update_reference_list(avctx);
 
-            if (s->raw_frame_header->show_frame && s->cur_frame.f->buf[0]) {
-                ret = set_output_frame(avctx, frame);
-                if (ret < 0) {
-                    av_log(avctx, AV_LOG_ERROR, "Set output frame error\n");
-                    goto end;
-                }
-            }
-            raw_tile_group = NULL;
+            raw_tile_group      = NULL;
             s->raw_frame_header = NULL;
+
             if (show_frame) {
+                // cur_frame.f needn't exist due to skip_frame.
+                if (s->cur_frame.f) {
+                    ret = set_output_frame(avctx, frame);
+                    if (ret < 0) {
+                        av_log(avctx, AV_LOG_ERROR, "Set output frame error\n");
+                        goto end;
+                    }
+                }
                 i++;
+                ret = 0;
                 goto end;
             }
         }
@@ -1571,7 +1566,9 @@ const FFCodec ff_av1_decoder = {
     .close                 = av1_decode_free,
     FF_CODEC_RECEIVE_FRAME_CB(av1_receive_frame),
     .p.capabilities        = AV_CODEC_CAP_DR1,
-    .caps_internal         = FF_CODEC_CAP_INIT_CLEANUP | FF_CODEC_CAP_SKIP_FRAME_FILL_PARAM,
+    .caps_internal         = FF_CODEC_CAP_INIT_CLEANUP |
+                             FF_CODEC_CAP_SKIP_FRAME_FILL_PARAM |
+                             FF_CODEC_CAP_USES_PROGRESSFRAMES,
     .flush                 = av1_decode_flush,
     .p.profiles            = NULL_IF_CONFIG_SMALL(ff_av1_profiles),
     .p.priv_class          = &av1_class,
diff --git a/libavcodec/av1dec.h b/libavcodec/av1dec.h
index 336eb61359..8b2a7b0896 100644
--- a/libavcodec/av1dec.h
+++ b/libavcodec/av1dec.h
@@ -32,9 +32,15 @@
 #include "cbs.h"
 #include "cbs_av1.h"
 #include "dovi_rpu.h"
+#include "progressframe.h"
 
 typedef struct AV1Frame {
-    AVFrame *f;
+    union {
+        struct {
+            struct AVFrame *f;
+        };
+        ProgressFrame pf;
+    };
 
     void *hwaccel_picture_private; ///< RefStruct reference
 
@@ -53,6 +59,20 @@ typedef struct AV1Frame {
     AV1RawFilmGrainParams film_grain;
 
     uint8_t coded_lossless;
+
+    // OrderHint for this frame.
+    uint8_t order_hint;
+    // RefFrameSignBias[] used when decoding this frame.
+    uint8_t ref_frame_sign_bias[AV1_TOTAL_REFS_PER_FRAME];
+    // OrderHints[] when this is the current frame, otherwise
+    // SavedOrderHints[s][] when is the reference frame in slot s.
+    uint8_t order_hints[AV1_TOTAL_REFS_PER_FRAME];
+
+    // force_integer_mv value at the end of the frame header parsing.
+    // This is not the same as the syntax element value in
+    // raw_frame_header because the specification parsing tables
+    // override the value on intra frames.
+    uint8_t force_integer_mv;
 } AV1Frame;
 
 typedef struct TileGroupInfo {
diff --git a/libavcodec/avcodec.c b/libavcodec/avcodec.c
index 0401f5be99..214dca4566 100644
--- a/libavcodec/avcodec.c
+++ b/libavcodec/avcodec.c
@@ -381,13 +381,10 @@ void avcodec_flush_buffers(AVCodecContext *avctx)
 
     avci->draining      = 0;
     avci->draining_done = 0;
-    if (avci->buffer_frame)
-        av_frame_unref(avci->buffer_frame);
-    if (avci->buffer_pkt)
-        av_packet_unref(avci->buffer_pkt);
+    av_frame_unref(avci->buffer_frame);
+    av_packet_unref(avci->buffer_pkt);
 
-    if (HAVE_THREADS && avctx->active_thread_type & FF_THREAD_FRAME &&
-        !avci->is_frame_mt)
+    if (HAVE_THREADS && avctx->active_thread_type & FF_THREAD_FRAME)
         ff_thread_flush(avctx);
     else if (ffcodec(avctx->codec)->flush)
         ffcodec(avctx->codec)->flush(avctx);
@@ -444,6 +441,7 @@ av_cold void ff_codec_close(AVCodecContext *avctx)
         av_frame_free(&avci->recon_frame);
 
         ff_refstruct_unref(&avci->pool);
+        ff_refstruct_pool_uninit(&avci->progress_frame_pool);
 
         ff_hwaccel_uninit(avctx);
 
@@ -464,6 +462,8 @@ av_cold void ff_codec_close(AVCodecContext *avctx)
         av_freep(&avctx->coded_side_data[i].data);
     av_freep(&avctx->coded_side_data);
     avctx->nb_coded_side_data = 0;
+    av_frame_side_data_free(&avctx->decoded_side_data,
+                            &avctx->nb_decoded_side_data);
 
     av_buffer_unref(&avctx->hw_frames_ctx);
     av_buffer_unref(&avctx->hw_device_ctx);
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 83dc487251..2da63c87ea 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -1538,6 +1538,7 @@ typedef struct AVCodecContext {
 #define FF_DCT_MMX     3
 #define FF_DCT_ALTIVEC 5
 #define FF_DCT_FAAN    6
+#define FF_DCT_NEON    7
 
     /**
      * IDCT algorithm, see FF_IDCT_* below.
@@ -2071,7 +2072,7 @@ typedef struct AVCodecContext {
      * - encoding: may be set by user before calling avcodec_open2() for
      *             encoder configuration. Afterwards owned and freed by the
      *             encoder.
-     * - decoding: unused
+     * - decoding: may be set by libavcodec in avcodec_open2().
      */
     AVFrameSideData  **decoded_side_data;
     int             nb_decoded_side_data;
diff --git a/libavcodec/avcodec_internal.h b/libavcodec/avcodec_internal.h
index 2f0aaab93b..0a024378ae 100644
--- a/libavcodec/avcodec_internal.h
+++ b/libavcodec/avcodec_internal.h
@@ -72,35 +72,4 @@ struct AVCodecInternal *ff_encode_internal_alloc(void);
 
 void ff_codec_close(struct AVCodecContext *avctx);
 
-int ff_thread_init(struct AVCodecContext *s);
-void ff_thread_free(struct AVCodecContext *s);
-
-/**
- * Wait for decoding threads to finish and reset internal state.
- * Called by avcodec_flush_buffers().
- *
- * @param avctx The context.
- */
-void ff_thread_flush(struct AVCodecContext *avctx);
-
-/**
- * Submit available packets for decoding to worker threads, return a
- * decoded frame if available. Returns AVERROR(EAGAIN) if none is available.
- *
- * Parameters are the same as FFCodec.receive_frame.
- */
-int ff_thread_receive_frame(AVCodecContext *avctx, AVFrame *frame);
-
-/**
- * Do the actual decoding and obtain a decoded frame from the decoder, if
- * available. When frame threading is used, this is invoked by the worker
- * threads, otherwise by the top layer directly.
- */
-int ff_decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame);
-
-/**
- * Get a packet for decoding. This gets invoked by the worker threads.
- */
-int ff_thread_get_packet(AVCodecContext *avctx, AVPacket *pkt);
-
 #endif // AVCODEC_AVCODEC_INTERNAL_H
diff --git a/libavcodec/avfft.c b/libavcodec/avfft.c
index 627fd7a0be..f6787937f6 100644
--- a/libavcodec/avfft.c
+++ b/libavcodec/avfft.c
@@ -158,7 +158,7 @@ RDFTContext *av_rdft_init(int nbits, enum RDFTransformType trans)
         return NULL;
     }
 
-    s->stride = (trans == DFT_C2R) ? sizeof(float) : sizeof(AVComplexFloat);
+    s->stride = (trans == DFT_C2R) ? sizeof(AVComplexFloat) : sizeof(float);
     s->len = 1 << nbits;
     s->inv = trans == IDFT_C2R;
 
diff --git a/libavcodec/avrndec.c b/libavcodec/avrndec.c
index 97d2824625..02bdfb6516 100644
--- a/libavcodec/avrndec.c
+++ b/libavcodec/avrndec.c
@@ -67,8 +67,6 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p,
 
     if ((ret = ff_get_buffer(avctx, p, 0)) < 0)
         return ret;
-    p->pict_type= AV_PICTURE_TYPE_I;
-    p->flags |= AV_FRAME_FLAG_KEY;
 
     if(a->interlace) {
         buf += (true_height - avctx->height)*avctx->width;
diff --git a/libavcodec/avs2_parser.c b/libavcodec/avs2_parser.c
index 200134f91d..0d68ab1d00 100644
--- a/libavcodec/avs2_parser.c
+++ b/libavcodec/avs2_parser.c
@@ -72,13 +72,15 @@ static void parse_avs2_seq_header(AVCodecParserContext *s, const uint8_t *buf,
     unsigned aspect_ratio;
     unsigned frame_rate_code;
     int low_delay;
+    av_unused int ret;
     // update buf_size_min if parse more deeper
     const int buf_size_min = 15;
 
     if (buf_size < buf_size_min)
         return;
 
-    init_get_bits8(&gb, buf, buf_size_min);
+    ret = init_get_bits8(&gb, buf, buf_size_min);
+    av_assert1(ret >= 0);
 
     s->key_frame = 1;
     s->pict_type = AV_PICTURE_TYPE_I;
diff --git a/libavcodec/avs3_parser.c b/libavcodec/avs3_parser.c
index a819b5783d..ea495b1c7c 100644
--- a/libavcodec/avs3_parser.c
+++ b/libavcodec/avs3_parser.c
@@ -73,7 +73,8 @@ static void parse_avs3_nal_units(AVCodecParserContext *s, const uint8_t *buf,
             GetBitContext gb;
             int profile, ratecode, low_delay;
 
-            init_get_bits8(&gb, buf + 4, buf_size - 4);
+            av_unused int ret = init_get_bits(&gb, buf + 4, 100);
+            av_assert1(ret >= 0);
 
             s->key_frame = 1;
             s->pict_type = AV_PICTURE_TYPE_I;
diff --git a/libavcodec/avuidec.c b/libavcodec/avuidec.c
index 48b23d4875..64a1d019d3 100644
--- a/libavcodec/avuidec.c
+++ b/libavcodec/avuidec.c
@@ -71,9 +71,6 @@ static int avui_decode_frame(AVCodecContext *avctx, AVFrame *pic,
     if ((ret = ff_get_buffer(avctx, pic, 0)) < 0)
         return ret;
 
-    pic->flags |= AV_FRAME_FLAG_KEY;
-    pic->pict_type = AV_PICTURE_TYPE_I;
-
     if (!interlaced) {
         src  += avctx->width * skip;
         srca += avctx->width * skip;
diff --git a/libavcodec/bitpacked_dec.c b/libavcodec/bitpacked_dec.c
index 54c008bd86..a1d3b7b505 100644
--- a/libavcodec/bitpacked_dec.c
+++ b/libavcodec/bitpacked_dec.c
@@ -130,9 +130,6 @@ static int bitpacked_decode(AVCodecContext *avctx, AVFrame *frame,
     if (res)
         return res;
 
-    frame->pict_type = AV_PICTURE_TYPE_I;
-    frame->flags |= AV_FRAME_FLAG_KEY;
-
     *got_frame = 1;
     return buf_size;
 
diff --git a/libavcodec/bitstream_filters.c b/libavcodec/bitstream_filters.c
index 12860c332b..138246c50e 100644
--- a/libavcodec/bitstream_filters.c
+++ b/libavcodec/bitstream_filters.c
@@ -34,6 +34,7 @@ extern const FFBitStreamFilter ff_dca_core_bsf;
 extern const FFBitStreamFilter ff_dts2pts_bsf;
 extern const FFBitStreamFilter ff_dv_error_marker_bsf;
 extern const FFBitStreamFilter ff_eac3_core_bsf;
+extern const FFBitStreamFilter ff_evc_frame_merge_bsf;
 extern const FFBitStreamFilter ff_extract_extradata_bsf;
 extern const FFBitStreamFilter ff_filter_units_bsf;
 extern const FFBitStreamFilter ff_h264_metadata_bsf;
@@ -67,7 +68,6 @@ extern const FFBitStreamFilter ff_vp9_superframe_bsf;
 extern const FFBitStreamFilter ff_vp9_superframe_split_bsf;
 extern const FFBitStreamFilter ff_vvc_metadata_bsf;
 extern const FFBitStreamFilter ff_vvc_mp4toannexb_bsf;
-extern const FFBitStreamFilter ff_evc_frame_merge_bsf;
 
 #include "libavcodec/bsf_list.c"
 
diff --git a/libavcodec/bmp.c b/libavcodec/bmp.c
index d117c06cf4..360c103200 100644
--- a/libavcodec/bmp.c
+++ b/libavcodec/bmp.c
@@ -209,8 +209,6 @@ static int bmp_decode_frame(AVCodecContext *avctx, AVFrame *p,
 
     if ((ret = ff_get_buffer(avctx, p, 0)) < 0)
         return ret;
-    p->pict_type = AV_PICTURE_TYPE_I;
-    p->flags |= AV_FRAME_FLAG_KEY;
 
     buf   = buf0 + hsize;
     dsize = buf_size - hsize;
diff --git a/libavcodec/brenderpix.c b/libavcodec/brenderpix.c
index 70a3e6be2a..07bb47fff9 100644
--- a/libavcodec/brenderpix.c
+++ b/libavcodec/brenderpix.c
@@ -285,8 +285,6 @@ FF_ENABLE_DEPRECATION_WARNINGS
                         bytes_per_scanline,
                         bytes_per_scanline, hdr.height);
 
-    frame->pict_type = AV_PICTURE_TYPE_I;
-    frame->flags |= AV_FRAME_FLAG_KEY;
     *got_frame = 1;
 
     return avpkt->size;
diff --git a/libavcodec/bsf/aac_adtstoasc.c b/libavcodec/bsf/aac_adtstoasc.c
index dd5e8b2a31..b821414f2a 100644
--- a/libavcodec/bsf/aac_adtstoasc.c
+++ b/libavcodec/bsf/aac_adtstoasc.c
@@ -40,7 +40,6 @@ static int aac_adtstoasc_filter(AVBSFContext *bsfc, AVPacket *pkt)
 {
     AACBSFContext *ctx = bsfc->priv_data;
 
-    GetBitContext gb;
     PutBitContext pb;
     AACADTSHeaderInfo hdr;
     int ret;
@@ -55,9 +54,7 @@ static int aac_adtstoasc_filter(AVBSFContext *bsfc, AVPacket *pkt)
     if (pkt->size < AV_AAC_ADTS_HEADER_SIZE)
         goto packet_too_small;
 
-    init_get_bits(&gb, pkt->data, AV_AAC_ADTS_HEADER_SIZE * 8);
-
-    if (ff_adts_header_parse(&gb, &hdr) < 0) {
+    if (ff_adts_header_parse_buf(pkt->data, &hdr) < 0) {
         av_log(bsfc, AV_LOG_ERROR, "Error parsing ADTS frame header!\n");
         ret = AVERROR_INVALIDDATA;
         goto fail;
@@ -81,6 +78,7 @@ static int aac_adtstoasc_filter(AVBSFContext *bsfc, AVPacket *pkt)
         uint8_t       *extradata;
 
         if (!hdr.chan_config) {
+            GetBitContext gb;
             init_get_bits(&gb, pkt->data, pkt->size * 8);
             if (get_bits(&gb, 3) != 5) {
                 avpriv_report_missing_feature(bsfc,
diff --git a/libavcodec/bsf/dts2pts.c b/libavcodec/bsf/dts2pts.c
index 53a54fb1cc..ba4dc43f84 100644
--- a/libavcodec/bsf/dts2pts.c
+++ b/libavcodec/bsf/dts2pts.c
@@ -269,8 +269,8 @@ static int h264_filter(AVBSFContext *ctx)
             h264->sps.offset_for_non_ref_pic         = sps->offset_for_non_ref_pic;
             h264->sps.offset_for_top_to_bottom_field = sps->offset_for_top_to_bottom_field;
             h264->sps.poc_cycle_length               = sps->num_ref_frames_in_pic_order_cnt_cycle;
-            for (int i = 0; i < h264->sps.poc_cycle_length; i++)
-                h264->sps.offset_for_ref_frame[i] = sps->offset_for_ref_frame[i];
+            for (int j = 0; j < h264->sps.poc_cycle_length; j++)
+                h264->sps.offset_for_ref_frame[j] = sps->offset_for_ref_frame[j];
 
             h264->picture_structure = sps->frame_mbs_only_flag ? 3 :
                                       (header->field_pic_flag ?
diff --git a/libavcodec/cbs_av1.c b/libavcodec/cbs_av1.c
index 1d9ac5ab44..fb82996022 100644
--- a/libavcodec/cbs_av1.c
+++ b/libavcodec/cbs_av1.c
@@ -301,7 +301,7 @@ static int cbs_av1_write_increment(CodedBitstreamContext *ctx, PutBitContext *pb
         return AVERROR(ENOSPC);
 
     if (len > 0)
-        put_bits(pbc, len, (1 << len) - 1 - (value != range_max));
+        put_bits(pbc, len, (1U << len) - 1 - (value != range_max));
 
     CBS_TRACE_WRITE_END_NO_SUBSCRIPTS();
 
diff --git a/libavcodec/cbs_h2645.c b/libavcodec/cbs_h2645.c
index fe2e383ff3..1a45d424ba 100644
--- a/libavcodec/cbs_h2645.c
+++ b/libavcodec/cbs_h2645.c
@@ -709,7 +709,11 @@ static int cbs_h2645_split_fragment(CodedBitstreamContext *ctx,
 
             start = bytestream2_tell(&gbc);
             for(i = 0; i < num_nalus; i++) {
+                if (bytestream2_get_bytes_left(&gbc) < 2)
+                    return AVERROR_INVALIDDATA;
                 size = bytestream2_get_be16(&gbc);
+                if (bytestream2_get_bytes_left(&gbc) < size)
+                    return AVERROR_INVALIDDATA;
                 bytestream2_skip(&gbc, size);
             }
             end = bytestream2_tell(&gbc);
diff --git a/libavcodec/cbs_h266_syntax_template.c b/libavcodec/cbs_h266_syntax_template.c
index 725cee5b03..38685704c5 100644
--- a/libavcodec/cbs_h266_syntax_template.c
+++ b/libavcodec/cbs_h266_syntax_template.c
@@ -790,6 +790,21 @@ static int FUNC(vps) (CodedBitstreamContext *ctx, RWContext *rw,
         infer(vps_each_layer_is_an_ols_flag, 1);
         infer(vps_num_ptls_minus1, 0);
     }
+
+    for (i = 0; i <= current->vps_num_ptls_minus1; i++) {
+        if (i > 0)
+            flags(vps_pt_present_flag[i], 1, i);
+        else
+            infer(vps_pt_present_flag[i], 1);
+
+        if (!current->vps_default_ptl_dpb_hrd_max_tid_flag)
+            us(3, vps_ptl_max_tid[i], 0, current->vps_max_sublayers_minus1, 1, i);
+        else
+            infer(vps_ptl_max_tid[i], current->vps_max_sublayers_minus1);
+    }
+    while (byte_alignment(rw) != 0)
+        fixed(1, vps_ptl_alignment_zero_bit, 0);
+
     {
         //calc NumMultiLayerOlss
         int m;
@@ -916,19 +931,6 @@ static int FUNC(vps) (CodedBitstreamContext *ctx, RWContext *rw,
     }
 
     for (i = 0; i <= current->vps_num_ptls_minus1; i++) {
-        if (i > 0)
-            flags(vps_pt_present_flag[i], 1, i);
-        else
-            infer(vps_pt_present_flag[i], 1);
-
-        if (!current->vps_default_ptl_dpb_hrd_max_tid_flag)
-            us(3, vps_ptl_max_tid[i], 0, current->vps_max_sublayers_minus1, 1, i);
-        else
-            infer(vps_ptl_max_tid[i], current->vps_max_sublayers_minus1);
-    }
-    while (byte_alignment(rw) != 0)
-        fixed(1, vps_ptl_alignment_zero_bit, 0);
-    for (i = 0; i <= current->vps_num_ptls_minus1; i++) {
         CHECK(FUNC(profile_tier_level) (ctx, rw,
                                         current->vps_profile_tier_level + i,
                                         current->vps_pt_present_flag[i],
diff --git a/libavcodec/cdxl.c b/libavcodec/cdxl.c
index 26f05e6280..c30e85b2ed 100644
--- a/libavcodec/cdxl.c
+++ b/libavcodec/cdxl.c
@@ -306,8 +306,6 @@ static int cdxl_decode_frame(AVCodecContext *avctx, AVFrame *p,
 
     if ((ret = ff_get_buffer(avctx, p, 0)) < 0)
         return ret;
-    p->pict_type = AV_PICTURE_TYPE_I;
-    p->flags |= AV_FRAME_FLAG_KEY;
 
     if (encoding) {
         av_fast_padded_malloc(&c->new_video, &c->new_video_size,
diff --git a/libavcodec/cljrdec.c b/libavcodec/cljrdec.c
index a4baa015f6..66e9d56e0e 100644
--- a/libavcodec/cljrdec.c
+++ b/libavcodec/cljrdec.c
@@ -50,8 +50,6 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p,
 
     if ((ret = ff_get_buffer(avctx, p, 0)) < 0)
         return ret;
-    p->pict_type = AV_PICTURE_TYPE_I;
-    p->flags |= AV_FRAME_FLAG_KEY;
 
     init_get_bits(&gb, buf, buf_size * 8);
 
diff --git a/libavcodec/cllc.c b/libavcodec/cllc.c
index 9b07f7c78e..168b8cb3d0 100644
--- a/libavcodec/cllc.c
+++ b/libavcodec/cllc.c
@@ -461,9 +461,6 @@ static int cllc_decode_frame(AVCodecContext *avctx, AVFrame *pic,
         return AVERROR_INVALIDDATA;
     }
 
-    pic->flags |= AV_FRAME_FLAG_KEY;
-    pic->pict_type = AV_PICTURE_TYPE_I;
-
     *got_picture_ptr = 1;
 
     return avpkt->size;
diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c
index 7dba61dc8b..a28ef68061 100644
--- a/libavcodec/codec_desc.c
+++ b/libavcodec/codec_desc.c
@@ -905,7 +905,7 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .type      = AVMEDIA_TYPE_VIDEO,
         .name      = "tgq",
         .long_name = NULL_IF_CONFIG_SMALL("Electronic Arts TGQ video"),
-        .props     = AV_CODEC_PROP_LOSSY,
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
     },
     {
         .id        = AV_CODEC_ID_TQI,
@@ -1095,7 +1095,7 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .type      = AVMEDIA_TYPE_VIDEO,
         .name      = "jv",
         .long_name = NULL_IF_CONFIG_SMALL("Bitmap Brothers JV video"),
-        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+        .props     = AV_CODEC_PROP_LOSSY,
     },
     {
         .id        = AV_CODEC_ID_DFA,
@@ -1503,6 +1503,7 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .type      = AVMEDIA_TYPE_VIDEO,
         .name      = "avrn",
         .long_name = NULL_IF_CONFIG_SMALL("Avid AVI Codec"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
     },
     {
         .id        = AV_CODEC_ID_CPIA,
@@ -1820,7 +1821,7 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .type      = AVMEDIA_TYPE_VIDEO,
         .name      = "photocd",
         .long_name = NULL_IF_CONFIG_SMALL("Kodak Photo CD"),
-        .props     = AV_CODEC_PROP_LOSSY,
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
     },
     {
         .id        = AV_CODEC_ID_IPU,
@@ -1869,7 +1870,7 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .type      = AVMEDIA_TYPE_VIDEO,
         .name      = "vbn",
         .long_name = NULL_IF_CONFIG_SMALL("Vizrt Binary Image"),
-        .props     = AV_CODEC_PROP_LOSSY,
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
     },
     {
         .id        = AV_CODEC_ID_JPEGXL,
diff --git a/libavcodec/codec_internal.h b/libavcodec/codec_internal.h
index d6757e2def..1cd1f684f9 100644
--- a/libavcodec/codec_internal.h
+++ b/libavcodec/codec_internal.h
@@ -62,11 +62,10 @@
  * Codec initializes slice-based threading with a main function
  */
 #define FF_CODEC_CAP_SLICE_THREAD_HAS_MF    (1 << 5)
-/*
- * The codec supports frame threading and has inter-frame dependencies, so it
- * uses ff_thread_report/await_progress().
+/**
+ * The decoder might make use of the ProgressFrame API.
  */
-#define FF_CODEC_CAP_ALLOCATE_PROGRESS      (1 << 6)
+#define FF_CODEC_CAP_USES_PROGRESSFRAMES    (1 << 6)
 /**
  * Codec handles avctx->thread_count == 0 (auto) internally.
  */
diff --git a/libavcodec/codec_par.c b/libavcodec/codec_par.c
index 212cb97d77..790ea01d10 100644
--- a/libavcodec/codec_par.c
+++ b/libavcodec/codec_par.c
@@ -250,6 +250,7 @@ int avcodec_parameters_to_context(AVCodecContext *codec,
     }
 
     av_freep(&codec->extradata);
+    codec->extradata_size = 0;
     if (par->extradata) {
         codec->extradata = av_mallocz(par->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
         if (!codec->extradata)
diff --git a/libavcodec/cri.c b/libavcodec/cri.c
index 990e52ac99..7b9a350967 100644
--- a/libavcodec/cri.c
+++ b/libavcodec/cri.c
@@ -406,9 +406,6 @@ skip:
         }
     }
 
-    p->pict_type = AV_PICTURE_TYPE_I;
-    p->flags |= AV_FRAME_FLAG_KEY;
-
     *got_frame = 1;
 
     return 0;
diff --git a/libavcodec/dds.c b/libavcodec/dds.c
index 89cf225f25..2af7f5c98f 100644
--- a/libavcodec/dds.c
+++ b/libavcodec/dds.c
@@ -711,8 +711,6 @@ FF_ENABLE_DEPRECATION_WARNINGS
         run_postproc(avctx, frame);
 
     /* Frame is ready to be output. */
-    frame->pict_type = AV_PICTURE_TYPE_I;
-    frame->flags |= AV_FRAME_FLAG_KEY;
     *got_frame = 1;
 
     return avpkt->size;
diff --git a/libavcodec/decode.c b/libavcodec/decode.c
index c6f62fd46d..791940648d 100644
--- a/libavcodec/decode.c
+++ b/libavcodec/decode.c
@@ -49,12 +49,28 @@
 #include "hwconfig.h"
 #include "internal.h"
 #include "packet_internal.h"
+#include "progressframe.h"
 #include "refstruct.h"
 #include "thread.h"
+#include "threadprogress.h"
 
 typedef struct DecodeContext {
     AVCodecInternal avci;
 
+    /**
+     * This is set to AV_FRAME_FLAG_KEY for decoders of intra-only formats
+     * (those whose codec descriptor has AV_CODEC_PROP_INTRA_ONLY set)
+     * to set the flag generically.
+     */
+    int intra_only_flag;
+
+    /**
+     * This is set to AV_PICTURE_TYPE_I for intra only video decoders
+     * and to AV_PICTURE_TYPE_NONE for other decoders. It is used to set
+     * the AVFrame's pict_type before the decoder receives it.
+     */
+    enum AVPictureType initial_pict_type;
+
     /* to prevent infinite loop on errors when draining */
     int nb_draining_errors;
 
@@ -191,17 +207,14 @@ fail:
     return ret;
 }
 
-#if !HAVE_THREADS
-#define ff_thread_get_packet(avctx, pkt) (AVERROR_BUG)
-#define ff_thread_receive_frame(avctx, frame) (AVERROR_BUG)
-#endif
-
 static int decode_get_packet(AVCodecContext *avctx, AVPacket *pkt)
 {
     AVCodecInternal *avci = avctx->internal;
     int ret;
 
     ret = av_bsf_receive_packet(avci->bsf, pkt);
+    if (ret == AVERROR_EOF)
+        avci->draining = 1;
     if (ret < 0)
         return ret;
 
@@ -229,26 +242,19 @@ int ff_decode_get_packet(AVCodecContext *avctx, AVPacket *pkt)
     if (avci->draining)
         return AVERROR_EOF;
 
-    /* If we are a worker thread, get the next packet from the threading
-     * context. Otherwise we are the main (user-facing) context, so we get the
-     * next packet from the input filterchain.
-     */
-    if (avctx->internal->is_frame_mt)
-        return ff_thread_get_packet(avctx, pkt);
-
     while (1) {
         int ret = decode_get_packet(avctx, pkt);
         if (ret == AVERROR(EAGAIN) &&
             (!AVPACKET_IS_EMPTY(avci->buffer_pkt) || dc->draining_started)) {
             ret = av_bsf_send_packet(avci->bsf, avci->buffer_pkt);
-            if (ret >= 0)
-                continue;
+            if (ret < 0) {
+                av_packet_unref(avci->buffer_pkt);
+                return ret;
+            }
 
-            av_packet_unref(avci->buffer_pkt);
+            continue;
         }
 
-        if (ret == AVERROR_EOF)
-            avci->draining = 1;
         return ret;
     }
 }
@@ -390,6 +396,7 @@ static int discard_samples(AVCodecContext *avctx, AVFrame *frame, int64_t *disca
 static inline int decode_simple_internal(AVCodecContext *avctx, AVFrame *frame, int64_t *discarded_samples)
 {
     AVCodecInternal   *avci = avctx->internal;
+    DecodeContext     *dc = decode_ctx(avci);
     AVPacket     *const pkt = avci->in_pkt;
     const FFCodec *const codec = ffcodec(avctx->codec);
     int got_frame, consumed;
@@ -408,22 +415,29 @@ static inline int decode_simple_internal(AVCodecContext *avctx, AVFrame *frame,
         return AVERROR_EOF;
 
     if (!pkt->data &&
-        !(avctx->codec->capabilities & AV_CODEC_CAP_DELAY))
+        !(avctx->codec->capabilities & AV_CODEC_CAP_DELAY ||
+          avctx->active_thread_type & FF_THREAD_FRAME))
         return AVERROR_EOF;
 
     got_frame = 0;
 
-    consumed = codec->cb.decode(avctx, frame, &got_frame, pkt);
+    if (HAVE_THREADS && avctx->active_thread_type & FF_THREAD_FRAME) {
+        consumed = ff_thread_decode_frame(avctx, frame, &got_frame, pkt);
+    } else {
+        frame->pict_type = dc->initial_pict_type;
+        frame->flags    |= dc->intra_only_flag;
+        consumed = codec->cb.decode(avctx, frame, &got_frame, pkt);
 
-    if (!(codec->caps_internal & FF_CODEC_CAP_SETS_PKT_DTS))
-        frame->pkt_dts = pkt->dts;
-    if (avctx->codec->type == AVMEDIA_TYPE_VIDEO) {
+        if (!(codec->caps_internal & FF_CODEC_CAP_SETS_PKT_DTS))
+            frame->pkt_dts = pkt->dts;
+        if (avctx->codec->type == AVMEDIA_TYPE_VIDEO) {
 #if FF_API_FRAME_PKT
 FF_DISABLE_DEPRECATION_WARNINGS
-        if(!avctx->has_b_frames)
-            frame->pkt_pos = pkt->pos;
+            if(!avctx->has_b_frames)
+                frame->pkt_pos = pkt->pos;
 FF_ENABLE_DEPRECATION_WARNINGS
 #endif
+        }
     }
     emms_c();
 
@@ -434,7 +448,8 @@ FF_ENABLE_DEPRECATION_WARNINGS
     } else if (avctx->codec->type == AVMEDIA_TYPE_AUDIO) {
         ret =  !got_frame ? AVERROR(EAGAIN)
                           : discard_samples(avctx, frame, discarded_samples);
-    }
+    } else
+        av_assert0(0);
 
     if (ret == AVERROR(EAGAIN))
         av_frame_unref(frame);
@@ -590,15 +605,18 @@ static int decode_simple_receive_frame(AVCodecContext *avctx, AVFrame *frame)
     return 0;
 }
 
-int ff_decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
+static int decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
 {
     AVCodecInternal *avci = avctx->internal;
+    DecodeContext     *dc = decode_ctx(avci);
     const FFCodec *const codec = ffcodec(avctx->codec);
-    int ret;
+    int ret, ok;
 
     av_assert0(!frame->buf[0]);
 
     if (codec->cb_type == FF_CODEC_CB_TYPE_RECEIVE_FRAME) {
+        frame->pict_type = dc->initial_pict_type;
+        frame->flags    |= dc->intra_only_flag;
         ret = codec->cb.receive_frame(avctx, frame);
         emms_c();
         if (!ret) {
@@ -615,20 +633,6 @@ int ff_decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
     if (ret == AVERROR_EOF)
         avci->draining_done = 1;
 
-    return ret;
-}
-
-static int decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
-{
-    AVCodecInternal *avci = avctx->internal;
-    DecodeContext     *dc = decode_ctx(avci);
-    int ret, ok;
-
-    if (avctx->active_thread_type & FF_THREAD_FRAME)
-        ret = ff_thread_receive_frame(avctx, frame);
-    else
-        ret = ff_decode_receive_frame_internal(avctx, frame);
-
     /* preserve ret */
     ok = detect_colorspace(avctx, frame);
     if (ok < 0) {
@@ -642,8 +646,7 @@ static int decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
                 frame->width = avctx->width;
             if (!frame->height)
                 frame->height = avctx->height;
-        } else
-            frame->flags |= AV_FRAME_FLAG_KEY;
+        }
 
         ret = fill_frame_props(avctx, frame);
         if (ret < 0) {
@@ -1686,12 +1689,136 @@ int ff_reget_buffer(AVCodecContext *avctx, AVFrame *frame, int flags)
     return ret;
 }
 
+typedef struct ProgressInternal {
+    ThreadProgress progress;
+    struct AVFrame *f;
+} ProgressInternal;
+
+static void check_progress_consistency(const ProgressFrame *f)
+{
+    av_assert1(!!f->f == !!f->progress);
+    av_assert1(!f->progress || f->progress->f == f->f);
+}
+
+static int progress_frame_get(AVCodecContext *avctx, ProgressFrame *f)
+{
+    FFRefStructPool *pool = avctx->internal->progress_frame_pool;
+
+    av_assert1(!f->f && !f->progress);
+
+    f->progress = ff_refstruct_pool_get(pool);
+    if (!f->progress)
+        return AVERROR(ENOMEM);
+
+    f->f = f->progress->f;
+    return 0;
+}
+
+int ff_progress_frame_get_buffer(AVCodecContext *avctx, ProgressFrame *f, int flags)
+{
+    int ret;
+
+    ret = progress_frame_get(avctx, f);
+    if (ret < 0)
+        return ret;
+
+    ret = ff_thread_get_buffer(avctx, f->progress->f, flags);
+    if (ret < 0) {
+        f->f = NULL;
+        ff_refstruct_unref(&f->progress);
+        return ret;
+    }
+    return 0;
+}
+
+void ff_progress_frame_ref(ProgressFrame *dst, const ProgressFrame *src)
+{
+    av_assert1(src->progress && src->f && src->f == src->progress->f);
+    av_assert1(!dst->f && !dst->progress);
+    dst->f = src->f;
+    dst->progress = ff_refstruct_ref(src->progress);
+}
+
+void ff_progress_frame_unref(ProgressFrame *f)
+{
+    check_progress_consistency(f);
+    f->f = NULL;
+    ff_refstruct_unref(&f->progress);
+}
+
+void ff_progress_frame_replace(ProgressFrame *dst, const ProgressFrame *src)
+{
+    if (dst == src)
+        return;
+    ff_progress_frame_unref(dst);
+    check_progress_consistency(src);
+    if (src->f)
+        ff_progress_frame_ref(dst, src);
+}
+
+void ff_progress_frame_report(ProgressFrame *f, int n)
+{
+    ff_thread_progress_report(&f->progress->progress, n);
+}
+
+void ff_progress_frame_await(const ProgressFrame *f, int n)
+{
+    ff_thread_progress_await(&f->progress->progress, n);
+}
+
+#if !HAVE_THREADS
+enum ThreadingStatus ff_thread_sync_ref(AVCodecContext *avctx, size_t offset)
+{
+    return FF_THREAD_NO_FRAME_THREADING;
+}
+#endif /* !HAVE_THREADS */
+
+static av_cold int progress_frame_pool_init_cb(FFRefStructOpaque opaque, void *obj)
+{
+    const AVCodecContext *avctx = opaque.nc;
+    ProgressInternal *progress = obj;
+    int ret;
+
+    ret = ff_thread_progress_init(&progress->progress, avctx->active_thread_type & FF_THREAD_FRAME);
+    if (ret < 0)
+        return ret;
+
+    progress->f = av_frame_alloc();
+    if (!progress->f)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static void progress_frame_pool_reset_cb(FFRefStructOpaque unused, void *obj)
+{
+    ProgressInternal *progress = obj;
+
+    ff_thread_progress_reset(&progress->progress);
+    av_frame_unref(progress->f);
+}
+
+static av_cold void progress_frame_pool_free_entry_cb(FFRefStructOpaque opaque, void *obj)
+{
+    ProgressInternal *progress = obj;
+
+    ff_thread_progress_destroy(&progress->progress);
+    av_frame_free(&progress->f);
+}
+
 int ff_decode_preinit(AVCodecContext *avctx)
 {
     AVCodecInternal *avci = avctx->internal;
     DecodeContext     *dc = decode_ctx(avci);
     int ret = 0;
 
+    dc->initial_pict_type = AV_PICTURE_TYPE_NONE;
+    if (avctx->codec_descriptor->props & AV_CODEC_PROP_INTRA_ONLY) {
+        dc->intra_only_flag = AV_FRAME_FLAG_KEY;
+        if (avctx->codec_type == AVMEDIA_TYPE_VIDEO)
+            dc->initial_pict_type = AV_PICTURE_TYPE_I;
+    }
+
     /* if the decoder init function was already called previously,
      * free the already allocated subtitle_header before overwriting it */
     av_freep(&avctx->subtitle_header);
@@ -1784,6 +1911,16 @@ int ff_decode_preinit(AVCodecContext *avctx)
     if (!avci->in_pkt || !avci->last_pkt_props)
         return AVERROR(ENOMEM);
 
+    if (ffcodec(avctx->codec)->caps_internal & FF_CODEC_CAP_USES_PROGRESSFRAMES) {
+        avci->progress_frame_pool =
+            ff_refstruct_pool_alloc_ext(sizeof(ProgressInternal),
+                                        FF_REFSTRUCT_POOL_FLAG_FREE_ON_INIT_ERROR,
+                                        avctx, progress_frame_pool_init_cb,
+                                        progress_frame_pool_reset_cb,
+                                        progress_frame_pool_free_entry_cb, NULL);
+        if (!avci->progress_frame_pool)
+            return AVERROR(ENOMEM);
+    }
     ret = decode_bsfs_init(avctx);
     if (ret < 0)
         return ret;
@@ -1803,16 +1940,16 @@ int ff_decode_preinit(AVCodecContext *avctx)
  * @retval 0 side data of this type can be added to frame
  * @retval 1 side data of this type should not be added to frame
  */
-static int side_data_pref(const AVCodecContext *avctx, AVFrame *frame,
-                          enum AVFrameSideDataType type)
+static int side_data_pref(const AVCodecContext *avctx, AVFrameSideData ***sd,
+                          int *nb_sd, enum AVFrameSideDataType type)
 {
     DecodeContext *dc = decode_ctx(avctx->internal);
 
     // Note: could be skipped for `type` without corresponding packet sd
-    if (av_frame_get_side_data(frame, type)) {
+    if (av_frame_side_data_get(*sd, *nb_sd, type)) {
         if (dc->side_data_pref_mask & (1ULL << type))
             return 1;
-        av_frame_remove_side_data(frame, type);
+        av_frame_side_data_remove(sd, nb_sd, type);
     }
 
     return 0;
@@ -1825,7 +1962,7 @@ int ff_frame_new_side_data(const AVCodecContext *avctx, AVFrame *frame,
 {
     AVFrameSideData *sd;
 
-    if (side_data_pref(avctx, frame, type)) {
+    if (side_data_pref(avctx, &frame->side_data, &frame->nb_side_data, type)) {
         if (psd)
             *psd = NULL;
         return 0;
@@ -1838,34 +1975,71 @@ int ff_frame_new_side_data(const AVCodecContext *avctx, AVFrame *frame,
     return sd ? 0 : AVERROR(ENOMEM);
 }
 
-int ff_frame_new_side_data_from_buf(const AVCodecContext *avctx,
-                                    AVFrame *frame, enum AVFrameSideDataType type,
-                                    AVBufferRef **buf, AVFrameSideData **psd)
+int ff_frame_new_side_data_from_buf_ext(const AVCodecContext *avctx,
+                                        AVFrameSideData ***sd, int *nb_sd,
+                                        enum AVFrameSideDataType type,
+                                        AVBufferRef **buf)
 {
-    AVFrameSideData *sd = NULL;
     int ret = 0;
 
-    if (side_data_pref(avctx, frame, type))
+    if (side_data_pref(avctx, sd, nb_sd, type))
         goto finish;
 
-    sd = av_frame_new_side_data_from_buf(frame, type, *buf);
-    if (sd)
-        *buf = NULL;
-    else
+    if (!av_frame_side_data_add(sd, nb_sd, type, buf, 0))
         ret = AVERROR(ENOMEM);
 
 finish:
     av_buffer_unref(buf);
-    if (psd)
-        *psd = sd;
 
     return ret;
 }
 
+int ff_frame_new_side_data_from_buf(const AVCodecContext *avctx,
+                                    AVFrame *frame, enum AVFrameSideDataType type,
+                                    AVBufferRef **buf, AVFrameSideData **psd)
+{
+    return ff_frame_new_side_data_from_buf_ext(avctx,
+                                               &frame->side_data, &frame->nb_side_data,
+                                               type, buf);
+}
+
+int ff_decode_mastering_display_new_ext(const AVCodecContext *avctx,
+                                        AVFrameSideData ***sd, int *nb_sd,
+                                        struct AVMasteringDisplayMetadata **mdm)
+{
+    AVBufferRef *buf;
+    size_t size;
+
+    if (side_data_pref(avctx, sd, nb_sd, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA)) {
+        *mdm = NULL;
+        return 0;
+    }
+
+    *mdm = av_mastering_display_metadata_alloc_size(&size);
+    if (!*mdm)
+        return AVERROR(ENOMEM);
+
+    buf = av_buffer_create((uint8_t *)*mdm, size, NULL, NULL, 0);
+    if (!buf) {
+        av_freep(mdm);
+        return AVERROR(ENOMEM);
+    }
+
+    if (!av_frame_side_data_add(sd, nb_sd, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA,
+                                &buf, 0)) {
+        *mdm = NULL;
+        av_buffer_unref(&buf);
+        return AVERROR(ENOMEM);
+    }
+
+    return 0;
+}
+
 int ff_decode_mastering_display_new(const AVCodecContext *avctx, AVFrame *frame,
                                     AVMasteringDisplayMetadata **mdm)
 {
-    if (side_data_pref(avctx, frame, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA)) {
+    if (side_data_pref(avctx, &frame->side_data, &frame->nb_side_data,
+                       AV_FRAME_DATA_MASTERING_DISPLAY_METADATA)) {
         *mdm = NULL;
         return 0;
     }
@@ -1874,10 +2048,43 @@ int ff_decode_mastering_display_new(const AVCodecContext *avctx, AVFrame *frame,
     return *mdm ? 0 : AVERROR(ENOMEM);
 }
 
+int ff_decode_content_light_new_ext(const AVCodecContext *avctx,
+                                    AVFrameSideData ***sd, int *nb_sd,
+                                    AVContentLightMetadata **clm)
+{
+    AVBufferRef *buf;
+    size_t size;
+
+    if (side_data_pref(avctx, sd, nb_sd, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL)) {
+        *clm = NULL;
+        return 0;
+    }
+
+    *clm = av_content_light_metadata_alloc(&size);
+    if (!*clm)
+        return AVERROR(ENOMEM);
+
+    buf = av_buffer_create((uint8_t *)*clm, size, NULL, NULL, 0);
+    if (!buf) {
+        av_freep(clm);
+        return AVERROR(ENOMEM);
+    }
+
+    if (!av_frame_side_data_add(sd, nb_sd, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL,
+                                &buf, 0)) {
+        *clm = NULL;
+        av_buffer_unref(&buf);
+        return AVERROR(ENOMEM);
+    }
+
+    return 0;
+}
+
 int ff_decode_content_light_new(const AVCodecContext *avctx, AVFrame *frame,
                                 AVContentLightMetadata **clm)
 {
-    if (side_data_pref(avctx, frame, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL)) {
+    if (side_data_pref(avctx, &frame->side_data, &frame->nb_side_data,
+                       AV_FRAME_DATA_CONTENT_LIGHT_LEVEL)) {
         *clm = NULL;
         return 0;
     }
@@ -1941,8 +2148,7 @@ void ff_decode_flush_buffers(AVCodecContext *avctx)
     dc->pts_correction_last_pts =
     dc->pts_correction_last_dts = INT64_MIN;
 
-    if (avci->bsf)
-        av_bsf_flush(avci->bsf);
+    av_bsf_flush(avci->bsf);
 
     dc->nb_draining_errors = 0;
     dc->draining_started   = 0;
diff --git a/libavcodec/decode.h b/libavcodec/decode.h
index 4ffbd9db8e..72a775ff9d 100644
--- a/libavcodec/decode.h
+++ b/libavcodec/decode.h
@@ -175,6 +175,15 @@ int ff_frame_new_side_data_from_buf(const AVCodecContext *avctx,
                                     AVFrame *frame, enum AVFrameSideDataType type,
                                     AVBufferRef **buf, AVFrameSideData **sd);
 
+/**
+ * Same as `ff_frame_new_side_data_from_buf`, but taking a AVFrameSideData
+ * array directly instead of an AVFrame.
+ */
+int ff_frame_new_side_data_from_buf_ext(const AVCodecContext *avctx,
+                                        AVFrameSideData ***sd, int *nb_sd,
+                                        enum AVFrameSideDataType type,
+                                        AVBufferRef **buf);
+
 struct AVMasteringDisplayMetadata;
 struct AVContentLightMetadata;
 
@@ -188,6 +197,14 @@ int ff_decode_mastering_display_new(const AVCodecContext *avctx, AVFrame *frame,
                                     struct AVMasteringDisplayMetadata **mdm);
 
 /**
+ * Same as `ff_decode_mastering_display_new`, but taking a AVFrameSideData
+ * array directly instead of an AVFrame.
+ */
+int ff_decode_mastering_display_new_ext(const AVCodecContext *avctx,
+                                        AVFrameSideData ***sd, int *nb_sd,
+                                        struct AVMasteringDisplayMetadata **mdm);
+
+/**
  * Wrapper around av_content_light_metadata_create_side_data(), which
  * rejects side data overridden by the demuxer. Returns 0 on success, and a
  * negative error code otherwise. If successful, *clm may either be a pointer to
@@ -196,4 +213,11 @@ int ff_decode_mastering_display_new(const AVCodecContext *avctx, AVFrame *frame,
 int ff_decode_content_light_new(const AVCodecContext *avctx, AVFrame *frame,
                                 struct AVContentLightMetadata **clm);
 
+/**
+ * Same as `ff_decode_content_light_new`, but taking a AVFrameSideData
+ * array directly instead of an AVFrame.
+ */
+int ff_decode_content_light_new_ext(const AVCodecContext *avctx,
+                                    AVFrameSideData ***sd, int *nb_sd,
+                                    struct AVContentLightMetadata **clm);
 #endif /* AVCODEC_DECODE_H */
diff --git a/libavcodec/diracdec.c b/libavcodec/diracdec.c
index f1fde0b339..3a36479c59 100644
--- a/libavcodec/diracdec.c
+++ b/libavcodec/diracdec.c
@@ -37,7 +37,6 @@
 #include "golomb.h"
 #include "dirac_arith.h"
 #include "dirac_vlc.h"
-#include "mpegpicture.h"
 #include "mpegvideoencdsp.h"
 #include "dirac_dwt.h"
 #include "dirac.h"
@@ -45,6 +44,8 @@
 #include "diracdsp.h"
 #include "videodsp.h"
 
+#define EDGE_WIDTH 16
+
 /**
  * The spec limits this to 3 for frame coding, but in practice can be as high as 6
  */
diff --git a/libavcodec/dnxhddec.c b/libavcodec/dnxhddec.c
index e549b38720..fe0809a5f5 100644
--- a/libavcodec/dnxhddec.c
+++ b/libavcodec/dnxhddec.c
@@ -655,8 +655,6 @@ decode_coding_unit:
     if (first_field) {
         if ((ret = ff_thread_get_buffer(avctx, picture, 0)) < 0)
             return ret;
-        picture->pict_type = AV_PICTURE_TYPE_I;
-        picture->flags |= AV_FRAME_FLAG_KEY;
     }
 
     ctx->buf_size = buf_size - ctx->data_offset;
diff --git a/libavcodec/dovi_rpu.c b/libavcodec/dovi_rpu.c
index 9f7a6b0066..b26c19dd5e 100644
--- a/libavcodec/dovi_rpu.c
+++ b/libavcodec/dovi_rpu.c
@@ -2,7 +2,7 @@
  * Dolby Vision RPU decoder
  *
  * Copyright (C) 2021 Jan Ekström
- * Copyright (C) 2021 Niklas Haas
+ * Copyright (C) 2021-2024 Niklas Haas
  *
  * This file is part of FFmpeg.
  *
@@ -21,29 +21,11 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include "libavutil/buffer.h"
 #include "libavutil/mem.h"
-#include "libavutil/crc.h"
 
-#include "avcodec.h"
 #include "dovi_rpu.h"
-#include "golomb.h"
-#include "get_bits.h"
 #include "refstruct.h"
 
-enum {
-    RPU_COEFF_FIXED = 0,
-    RPU_COEFF_FLOAT = 1,
-};
-
-/**
- * Private contents of vdr.
- */
-typedef struct DOVIVdr {
-    AVDOVIDataMapping mapping;
-    AVDOVIColorMetadata color;
-} DOVIVdr;
-
 void ff_dovi_ctx_unref(DOVIContext *s)
 {
     for (int i = 0; i < FF_ARRAY_ELEMS(s->vdr); i++)
@@ -64,7 +46,7 @@ void ff_dovi_ctx_flush(DOVIContext *s)
 
     *s = (DOVIContext) {
         .logctx = s->logctx,
-        .dv_profile = s->dv_profile,
+        .cfg = s->cfg,
         /* preserve temporary buffer */
         .rpu_buf = s->rpu_buf,
         .rpu_buf_sz = s->rpu_buf_sz,
@@ -74,61 +56,16 @@ void ff_dovi_ctx_flush(DOVIContext *s)
 void ff_dovi_ctx_replace(DOVIContext *s, const DOVIContext *s0)
 {
     s->logctx = s0->logctx;
+    s->cfg = s0->cfg;
+    s->header = s0->header;
     s->mapping = s0->mapping;
     s->color = s0->color;
-    s->dv_profile = s0->dv_profile;
     for (int i = 0; i <= DOVI_MAX_DM_ID; i++)
         ff_refstruct_replace(&s->vdr[i], s0->vdr[i]);
     ff_refstruct_replace(&s->ext_blocks, s0->ext_blocks);
 }
 
-void ff_dovi_update_cfg(DOVIContext *s, const AVDOVIDecoderConfigurationRecord *cfg)
-{
-    if (!cfg)
-        return;
-
-    s->dv_profile = cfg->dv_profile;
-}
-
-int ff_dovi_attach_side_data(DOVIContext *s, AVFrame *frame)
-{
-    AVFrameSideData *sd;
-    AVBufferRef *buf;
-    AVDOVIMetadata *dovi;
-    size_t dovi_size, ext_sz;
-
-    if (!s->mapping || !s->color)
-        return 0; /* incomplete dovi metadata */
-
-    dovi = av_dovi_metadata_alloc(&dovi_size);
-    if (!dovi)
-        return AVERROR(ENOMEM);
-
-    buf = av_buffer_create((uint8_t *) dovi, dovi_size, NULL, NULL, 0);
-    if (!buf) {
-        av_free(dovi);
-        return AVERROR(ENOMEM);
-    }
-
-    sd = av_frame_new_side_data_from_buf(frame, AV_FRAME_DATA_DOVI_METADATA, buf);
-    if (!sd) {
-        av_buffer_unref(&buf);
-        return AVERROR(ENOMEM);
-    }
-
-    /* Copy only the parts of these structs known to us at compiler-time. */
-#define COPY(t, a, b, last) memcpy(a, b, offsetof(t, last) + sizeof((b)->last))
-    COPY(AVDOVIRpuDataHeader, av_dovi_get_header(dovi), &s->header, disable_residual_flag);
-    COPY(AVDOVIDataMapping, av_dovi_get_mapping(dovi), s->mapping, nlq_pivots);
-    COPY(AVDOVIColorMetadata, av_dovi_get_color(dovi), s->color, source_diagonal);
-    ext_sz = FFMIN(sizeof(AVDOVIDmData), dovi->ext_block_size);
-    for (int i = 0; i < s->num_ext_blocks; i++)
-        memcpy(av_dovi_get_ext(dovi, i), &s->ext_blocks[i], ext_sz);
-    dovi->num_ext_blocks = s->num_ext_blocks;
-    return 0;
-}
-
-static int guess_profile(const AVDOVIRpuDataHeader *hdr)
+int ff_dovi_guess_profile_hevc(const AVDOVIRpuDataHeader *hdr)
 {
     switch (hdr->vdr_rpu_profile) {
     case 0:
@@ -149,569 +86,3 @@ static int guess_profile(const AVDOVIRpuDataHeader *hdr)
 
     return 0; /* unknown */
 }
-
-static inline uint64_t get_ue_coef(GetBitContext *gb, const AVDOVIRpuDataHeader *hdr)
-{
-    uint64_t ipart;
-    union { uint32_t u32; float f32; } fpart;
-
-    switch (hdr->coef_data_type) {
-    case RPU_COEFF_FIXED:
-        ipart = get_ue_golomb_long(gb);
-        fpart.u32 = get_bits_long(gb, hdr->coef_log2_denom);
-        return (ipart << hdr->coef_log2_denom) | fpart.u32;
-
-    case RPU_COEFF_FLOAT:
-        fpart.u32 = get_bits_long(gb, 32);
-        return fpart.f32 * (1LL << hdr->coef_log2_denom);
-    }
-
-    return 0; /* unreachable */
-}
-
-static inline int64_t get_se_coef(GetBitContext *gb, const AVDOVIRpuDataHeader *hdr)
-{
-    int64_t ipart;
-    union { uint32_t u32; float f32; } fpart;
-
-    switch (hdr->coef_data_type) {
-    case RPU_COEFF_FIXED:
-        ipart = get_se_golomb_long(gb);
-        fpart.u32 = get_bits_long(gb, hdr->coef_log2_denom);
-        return ipart * (1LL << hdr->coef_log2_denom) | fpart.u32;
-
-    case RPU_COEFF_FLOAT:
-        fpart.u32 = get_bits_long(gb, 32);
-        return fpart.f32 * (1LL << hdr->coef_log2_denom);
-    }
-
-    return 0; /* unreachable */
-}
-
-static inline unsigned get_variable_bits(GetBitContext *gb, int n)
-{
-    unsigned int value = get_bits(gb, n);
-    int read_more = get_bits1(gb);
-    while (read_more) {
-        value = (value + 1) << n;
-        value |= get_bits(gb, n);
-        read_more = get_bits1(gb);
-    }
-    return value;
-}
-
-#define VALIDATE(VAR, MIN, MAX)                                                 \
-    do {                                                                        \
-        if (VAR < MIN || VAR > MAX) {                                           \
-            av_log(s->logctx, AV_LOG_ERROR, "RPU validation failed: "           \
-                   #MIN" <= "#VAR" = %d <= "#MAX"\n", (int) VAR);               \
-            goto fail;                                                          \
-        }                                                                       \
-    } while (0)
-
-static void parse_ext_v1(DOVIContext *s, GetBitContext *gb, AVDOVIDmData *dm)
-{
-    switch (dm->level) {
-    case 1:
-        dm->l1.min_pq = get_bits(gb, 12);
-        dm->l1.max_pq = get_bits(gb, 12);
-        dm->l1.avg_pq = get_bits(gb, 12);
-        break;
-    case 2:
-        dm->l2.target_max_pq = get_bits(gb, 12);
-        dm->l2.trim_slope = get_bits(gb, 12);
-        dm->l2.trim_offset = get_bits(gb, 12);
-        dm->l2.trim_power = get_bits(gb, 12);
-        dm->l2.trim_chroma_weight = get_bits(gb, 12);
-        dm->l2.trim_saturation_gain = get_bits(gb, 12);
-        dm->l2.ms_weight = get_bits(gb, 13) - 8192;
-        break;
-    case 4:
-        dm->l4.anchor_pq = get_bits(gb, 12);
-        dm->l4.anchor_power = get_bits(gb, 12);
-        break;
-    case 5:
-        dm->l5.left_offset = get_bits(gb, 13);
-        dm->l5.right_offset = get_bits(gb, 13);
-        dm->l5.top_offset = get_bits(gb, 13);
-        dm->l5.bottom_offset = get_bits(gb, 13);
-        break;
-    case 6:
-        dm->l6.max_luminance = get_bits(gb, 16);
-        dm->l6.min_luminance = get_bits(gb, 16);
-        dm->l6.max_cll = get_bits(gb, 16);
-        dm->l6.max_fall = get_bits(gb, 16);
-        break;
-    case 255:
-        dm->l255.dm_run_mode = get_bits(gb, 8);
-        dm->l255.dm_run_version = get_bits(gb, 8);
-        for (int i = 0; i < 4; i++)
-            dm->l255.dm_debug[i] = get_bits(gb, 8);
-        break;
-    default:
-        av_log(s->logctx, AV_LOG_WARNING,
-               "Unknown Dolby Vision DM v1 level: %u\n", dm->level);
-    }
-}
-
-static AVCIExy get_cie_xy(GetBitContext *gb)
-{
-    AVCIExy xy;
-    const int denom = 32767;
-    xy.x = av_make_q(get_sbits(gb, 16), denom);
-    xy.y = av_make_q(get_sbits(gb, 16), denom);
-    return xy;
-}
-
-static void parse_ext_v2(DOVIContext *s, GetBitContext *gb, AVDOVIDmData *dm,
-                         int ext_block_length)
-{
-    switch (dm->level) {
-    case 3:
-        dm->l3.min_pq_offset = get_bits(gb, 12);
-        dm->l3.max_pq_offset = get_bits(gb, 12);
-        dm->l3.avg_pq_offset = get_bits(gb, 12);
-        break;
-    case 8:
-        dm->l8.target_display_index = get_bits(gb, 8);
-        dm->l8.trim_slope = get_bits(gb, 12);
-        dm->l8.trim_offset = get_bits(gb, 12);
-        dm->l8.trim_power = get_bits(gb, 12);
-        dm->l8.trim_chroma_weight = get_bits(gb, 12);
-        dm->l8.trim_saturation_gain = get_bits(gb, 12);
-        dm->l8.ms_weight = get_bits(gb, 12) - 8192;
-        if (ext_block_length < 12)
-            break;
-        dm->l8.target_mid_contrast = get_bits(gb, 12);
-        if (ext_block_length < 13)
-            break;
-        dm->l8.clip_trim = get_bits(gb, 12);
-        if (ext_block_length < 19)
-            break;
-        for (int i = 0; i < 6; i++)
-            dm->l8.saturation_vector_field[i] = get_bits(gb, 8);
-        if (ext_block_length < 25)
-            break;
-        for (int i = 0; i < 6; i++)
-            dm->l8.hue_vector_field[i] = get_bits(gb, 8);
-        break;
-    case 9:
-        dm->l9.source_primary_index = get_bits(gb, 8);
-        if (ext_block_length < 17)
-            break;
-        dm->l9.source_display_primaries.prim.r = get_cie_xy(gb);
-        dm->l9.source_display_primaries.prim.g = get_cie_xy(gb);
-        dm->l9.source_display_primaries.prim.b = get_cie_xy(gb);
-        dm->l9.source_display_primaries.wp = get_cie_xy(gb);
-        break;
-    case 10:
-        dm->l10.target_display_index = get_bits(gb, 8);
-        dm->l10.target_max_pq = get_bits(gb, 12);
-        dm->l10.target_min_pq = get_bits(gb, 12);
-        dm->l10.target_primary_index = get_bits(gb, 8);
-        if (ext_block_length < 21)
-            break;
-        dm->l10.target_display_primaries.prim.r = get_cie_xy(gb);
-        dm->l10.target_display_primaries.prim.g = get_cie_xy(gb);
-        dm->l10.target_display_primaries.prim.b = get_cie_xy(gb);
-        dm->l10.target_display_primaries.wp = get_cie_xy(gb);
-        break;
-    case 11:
-        dm->l11.content_type = get_bits(gb, 8);
-        dm->l11.whitepoint = get_bits(gb, 4);
-        dm->l11.reference_mode_flag = get_bits1(gb);
-        skip_bits(gb, 3); /* reserved */
-        dm->l11.sharpness = get_bits(gb, 2);
-        dm->l11.noise_reduction = get_bits(gb, 2);
-        dm->l11.mpeg_noise_reduction = get_bits(gb, 2);
-        dm->l11.frame_rate_conversion = get_bits(gb, 2);
-        dm->l11.brightness = get_bits(gb, 2);
-        dm->l11.color = get_bits(gb, 2);
-        break;
-    case 254:
-        dm->l254.dm_mode = get_bits(gb, 8);
-        dm->l254.dm_version_index = get_bits(gb, 8);
-        break;
-    default:
-        av_log(s->logctx, AV_LOG_WARNING,
-               "Unknown Dolby Vision DM v2 level: %u\n", dm->level);
-    }
-}
-
-static int parse_ext_blocks(DOVIContext *s, GetBitContext *gb, int ver)
-{
-    int num_ext_blocks, ext_block_length, start_pos, parsed_bits;
-
-    num_ext_blocks = get_ue_golomb_31(gb);
-    align_get_bits(gb);
-    if (s->num_ext_blocks + num_ext_blocks > AV_DOVI_MAX_EXT_BLOCKS)
-        return AVERROR_INVALIDDATA;
-
-    if (!s->ext_blocks) {
-        s->ext_blocks = ff_refstruct_allocz(sizeof(AVDOVIDmData) * AV_DOVI_MAX_EXT_BLOCKS);
-        if (!s->ext_blocks)
-            return AVERROR(ENOMEM);
-    }
-
-    while (num_ext_blocks--) {
-        AVDOVIDmData *dm = &s->ext_blocks[s->num_ext_blocks++];
-        ext_block_length = get_ue_golomb_31(gb);
-        dm->level = get_bits(gb, 8);
-        start_pos = get_bits_count(gb);
-
-        switch (ver) {
-        case 1: parse_ext_v1(s, gb, dm); break;
-        case 2: parse_ext_v2(s, gb, dm, ext_block_length); break;
-        }
-
-        parsed_bits = get_bits_count(gb) - start_pos;
-        if (parsed_bits > ext_block_length * 8)
-            return AVERROR_INVALIDDATA;
-        skip_bits(gb, ext_block_length * 8 - parsed_bits);
-    }
-
-    return 0;
-}
-
-int ff_dovi_rpu_parse(DOVIContext *s, const uint8_t *rpu, size_t rpu_size,
-                      int err_recognition)
-{
-    AVDOVIRpuDataHeader *hdr = &s->header;
-    GetBitContext *gb = &(GetBitContext){0};
-    DOVIVdr *vdr;
-    int ret;
-
-    uint8_t rpu_type;
-    uint8_t vdr_seq_info_present;
-    uint8_t vdr_dm_metadata_present;
-    uint8_t use_prev_vdr_rpu;
-    uint8_t use_nlq;
-    uint8_t profile;
-
-    if (rpu_size < 5)
-        goto fail;
-
-    /* Container */
-    if (s->dv_profile == 10 /* dav1.10 */) {
-        /* DV inside AV1 re-uses an EMDF container skeleton, but with fixed
-         * values - so we can effectively treat this as a magic byte sequence.
-         *
-         * The exact fields are, as follows:
-         *   emdf_version            : f(2) = 0
-         *   key_id                  : f(3) = 6
-         *   emdf_payload_id         : f(5) = 31
-         *   emdf_payload_id_ext     : var(5) = 225
-         *   smploffste              : f(1) = 0
-         *   duratione               : f(1) = 0
-         *   groupide                : f(1) = 0
-         *   codecdatae              : f(1) = 0
-         *   discard_unknown_payload : f(1) = 1
-         */
-        const unsigned header_magic = 0x01be6841u;
-        unsigned emdf_header, emdf_payload_size, emdf_protection;
-        if ((ret = init_get_bits8(gb, rpu, rpu_size)) < 0)
-            return ret;
-        emdf_header = get_bits_long(gb, 27);
-        VALIDATE(emdf_header, header_magic, header_magic);
-        emdf_payload_size = get_variable_bits(gb, 8);
-        VALIDATE(emdf_payload_size, 6, 512);
-        if (emdf_payload_size * 8 > get_bits_left(gb))
-            return AVERROR_INVALIDDATA;
-
-        /* The payload is not byte-aligned (off by *one* bit, curse Dolby),
-         * so copy into a fresh buffer to preserve byte alignment of the
-         * RPU struct */
-        av_fast_padded_malloc(&s->rpu_buf, &s->rpu_buf_sz, emdf_payload_size);
-        if (!s->rpu_buf)
-            return AVERROR(ENOMEM);
-        for (int i = 0; i < emdf_payload_size; i++)
-            s->rpu_buf[i] = get_bits(gb, 8);
-        rpu = s->rpu_buf;
-        rpu_size = emdf_payload_size;
-
-        /* Validate EMDF footer */
-        emdf_protection = get_bits(gb, 5 + 12);
-        VALIDATE(emdf_protection, 0x400, 0x400);
-    } else {
-        /* NAL RBSP with prefix and trailing zeroes */
-        VALIDATE(rpu[0], 25, 25); /* NAL prefix */
-        rpu++;
-        rpu_size--;
-        /* Strip trailing padding bytes */
-        while (rpu_size && rpu[rpu_size - 1] == 0)
-            rpu_size--;
-    }
-
-    if (!rpu_size || rpu[rpu_size - 1] != 0x80)
-        goto fail;
-
-    if (err_recognition & AV_EF_CRCCHECK) {
-        uint32_t crc = av_bswap32(av_crc(av_crc_get_table(AV_CRC_32_IEEE),
-                                  -1, rpu, rpu_size - 1)); /* exclude 0x80 */
-        if (crc) {
-            av_log(s->logctx, AV_LOG_ERROR, "RPU CRC mismatch: %X\n", crc);
-            if (err_recognition & AV_EF_EXPLODE)
-                goto fail;
-        }
-    }
-
-    if ((ret = init_get_bits8(gb, rpu, rpu_size)) < 0)
-        return ret;
-
-    /* RPU header */
-    rpu_type = get_bits(gb, 6);
-    if (rpu_type != 2) {
-        av_log(s->logctx, AV_LOG_WARNING, "Unrecognized RPU type "
-               "%"PRIu8", ignoring\n", rpu_type);
-        return 0;
-    }
-
-    hdr->rpu_type = rpu_type;
-    hdr->rpu_format = get_bits(gb, 11);
-
-    /* Values specific to RPU type 2 */
-    hdr->vdr_rpu_profile = get_bits(gb, 4);
-    hdr->vdr_rpu_level = get_bits(gb, 4);
-
-    vdr_seq_info_present = get_bits1(gb);
-    if (vdr_seq_info_present) {
-        hdr->chroma_resampling_explicit_filter_flag = get_bits1(gb);
-        hdr->coef_data_type = get_bits(gb, 2);
-        VALIDATE(hdr->coef_data_type, RPU_COEFF_FIXED, RPU_COEFF_FLOAT);
-        switch (hdr->coef_data_type) {
-        case RPU_COEFF_FIXED:
-            hdr->coef_log2_denom = get_ue_golomb(gb);
-            VALIDATE(hdr->coef_log2_denom, 13, 32);
-            break;
-        case RPU_COEFF_FLOAT:
-            hdr->coef_log2_denom = 32; /* arbitrary, choose maximum precision */
-            break;
-        }
-
-        hdr->vdr_rpu_normalized_idc = get_bits(gb, 2);
-        hdr->bl_video_full_range_flag = get_bits1(gb);
-
-        if ((hdr->rpu_format & 0x700) == 0) {
-            int bl_bit_depth_minus8 = get_ue_golomb_31(gb);
-            int el_bit_depth_minus8 = get_ue_golomb_31(gb);
-            int vdr_bit_depth_minus8 = get_ue_golomb_31(gb);
-            VALIDATE(bl_bit_depth_minus8, 0, 8);
-            VALIDATE(el_bit_depth_minus8, 0, 8);
-            VALIDATE(vdr_bit_depth_minus8, 0, 8);
-            hdr->bl_bit_depth = bl_bit_depth_minus8 + 8;
-            hdr->el_bit_depth = el_bit_depth_minus8 + 8;
-            hdr->vdr_bit_depth = vdr_bit_depth_minus8 + 8;
-            hdr->spatial_resampling_filter_flag = get_bits1(gb);
-            skip_bits(gb, 3); /* reserved_zero_3bits */
-            hdr->el_spatial_resampling_filter_flag = get_bits1(gb);
-            hdr->disable_residual_flag = get_bits1(gb);
-        }
-    }
-
-    if (!hdr->bl_bit_depth) {
-        av_log(s->logctx, AV_LOG_ERROR, "Missing RPU VDR sequence info?\n");
-        goto fail;
-    }
-
-    vdr_dm_metadata_present = get_bits1(gb);
-    use_prev_vdr_rpu = get_bits1(gb);
-    use_nlq = (hdr->rpu_format & 0x700) == 0 && !hdr->disable_residual_flag;
-
-    profile = s->dv_profile ? s->dv_profile : guess_profile(hdr);
-    if (profile == 5 && use_nlq) {
-        av_log(s->logctx, AV_LOG_ERROR, "Profile 5 RPUs should not use NLQ\n");
-        goto fail;
-    }
-
-    if (use_prev_vdr_rpu) {
-        int prev_vdr_rpu_id = get_ue_golomb_31(gb);
-        VALIDATE(prev_vdr_rpu_id, 0, DOVI_MAX_DM_ID);
-        if (!s->vdr[prev_vdr_rpu_id]) {
-            av_log(s->logctx, AV_LOG_ERROR, "Unknown previous RPU ID: %u\n",
-                   prev_vdr_rpu_id);
-            goto fail;
-        }
-        vdr = s->vdr[prev_vdr_rpu_id];
-        s->mapping = &vdr->mapping;
-    } else {
-        int vdr_rpu_id = get_ue_golomb_31(gb);
-        VALIDATE(vdr_rpu_id, 0, DOVI_MAX_DM_ID);
-        if (!s->vdr[vdr_rpu_id]) {
-            s->vdr[vdr_rpu_id] = ff_refstruct_allocz(sizeof(DOVIVdr));
-            if (!s->vdr[vdr_rpu_id])
-                return AVERROR(ENOMEM);
-        }
-
-        vdr = s->vdr[vdr_rpu_id];
-        s->mapping = &vdr->mapping;
-
-        vdr->mapping.vdr_rpu_id = vdr_rpu_id;
-        vdr->mapping.mapping_color_space = get_ue_golomb_31(gb);
-        vdr->mapping.mapping_chroma_format_idc = get_ue_golomb_31(gb);
-
-        for (int c = 0; c < 3; c++) {
-            AVDOVIReshapingCurve *curve = &vdr->mapping.curves[c];
-            int num_pivots_minus_2 = get_ue_golomb_31(gb);
-            int pivot = 0;
-
-            VALIDATE(num_pivots_minus_2, 0, AV_DOVI_MAX_PIECES - 1);
-            curve->num_pivots = num_pivots_minus_2 + 2;
-            for (int i = 0; i < curve->num_pivots; i++) {
-                pivot += get_bits(gb, hdr->bl_bit_depth);
-                curve->pivots[i] = av_clip_uint16(pivot);
-            }
-        }
-
-        if (use_nlq) {
-            int nlq_pivot = 0;
-            vdr->mapping.nlq_method_idc = get_bits(gb, 3);
-
-            for (int i = 0; i < 2; i++) {
-                nlq_pivot += get_bits(gb, hdr->bl_bit_depth);
-                vdr->mapping.nlq_pivots[i] = av_clip_uint16(nlq_pivot);
-            }
-
-            /**
-             * The patent mentions another legal value, NLQ_MU_LAW, but it's
-             * not documented anywhere how to parse or apply that type of NLQ.
-             */
-            VALIDATE(vdr->mapping.nlq_method_idc, 0, AV_DOVI_NLQ_LINEAR_DZ);
-        } else {
-            vdr->mapping.nlq_method_idc = AV_DOVI_NLQ_NONE;
-        }
-
-        vdr->mapping.num_x_partitions = get_ue_golomb_long(gb) + 1;
-        vdr->mapping.num_y_partitions = get_ue_golomb_long(gb) + 1;
-        /* End of rpu_data_header(), start of vdr_rpu_data_payload() */
-
-        for (int c = 0; c < 3; c++) {
-            AVDOVIReshapingCurve *curve = &vdr->mapping.curves[c];
-            for (int i = 0; i < curve->num_pivots - 1; i++) {
-                int mapping_idc = get_ue_golomb_31(gb);
-                VALIDATE(mapping_idc, 0, 1);
-                curve->mapping_idc[i] = mapping_idc;
-                switch (mapping_idc) {
-                case AV_DOVI_MAPPING_POLYNOMIAL: {
-                    int poly_order_minus1 = get_ue_golomb_31(gb);
-                    VALIDATE(poly_order_minus1, 0, 1);
-                    curve->poly_order[i] = poly_order_minus1 + 1;
-                    if (poly_order_minus1 == 0) {
-                        int linear_interp_flag = get_bits1(gb);
-                        if (linear_interp_flag) {
-                            /* lack of documentation/samples */
-                            avpriv_request_sample(s->logctx, "Dolby Vision "
-                                                  "linear interpolation");
-                            ff_dovi_ctx_unref(s);
-                            return AVERROR_PATCHWELCOME;
-                        }
-                    }
-                    for (int k = 0; k <= curve->poly_order[i]; k++)
-                        curve->poly_coef[i][k] = get_se_coef(gb, hdr);
-                    break;
-                }
-                case AV_DOVI_MAPPING_MMR: {
-                    int mmr_order_minus1 = get_bits(gb, 2);
-                    VALIDATE(mmr_order_minus1, 0, 2);
-                    curve->mmr_order[i] = mmr_order_minus1 + 1;
-                    curve->mmr_constant[i] = get_se_coef(gb, hdr);
-                    for (int j = 0; j < curve->mmr_order[i]; j++) {
-                        for (int k = 0; k < 7; k++)
-                            curve->mmr_coef[i][j][k] = get_se_coef(gb, hdr);
-                    }
-                    break;
-                }
-                }
-            }
-        }
-
-        if (use_nlq) {
-            for (int c = 0; c < 3; c++) {
-                AVDOVINLQParams *nlq = &vdr->mapping.nlq[c];
-                nlq->nlq_offset = get_bits(gb, hdr->el_bit_depth);
-                nlq->vdr_in_max = get_ue_coef(gb, hdr);
-                switch (vdr->mapping.nlq_method_idc) {
-                case AV_DOVI_NLQ_LINEAR_DZ:
-                    nlq->linear_deadzone_slope = get_ue_coef(gb, hdr);
-                    nlq->linear_deadzone_threshold = get_ue_coef(gb, hdr);
-                    break;
-                }
-            }
-        }
-    }
-
-    if (vdr_dm_metadata_present) {
-        AVDOVIColorMetadata *color;
-        int affected_dm_id = get_ue_golomb_31(gb);
-        int current_dm_id = get_ue_golomb_31(gb);
-        VALIDATE(affected_dm_id, 0, DOVI_MAX_DM_ID);
-        VALIDATE(current_dm_id, 0, DOVI_MAX_DM_ID);
-        if (!s->vdr[affected_dm_id]) {
-            s->vdr[affected_dm_id] = ff_refstruct_allocz(sizeof(DOVIVdr));
-            if (!s->vdr[affected_dm_id])
-                return AVERROR(ENOMEM);
-        }
-
-        if (!s->vdr[current_dm_id]) {
-            av_log(s->logctx, AV_LOG_ERROR, "Unknown previous RPU DM ID: %u\n",
-                   current_dm_id);
-            goto fail;
-        }
-
-        /* Update current pointer based on current_dm_id */
-        vdr = s->vdr[current_dm_id];
-        s->color = &vdr->color;
-
-        /* Update values of affected_dm_id */
-        vdr = s->vdr[affected_dm_id];
-        color = &vdr->color;
-        color->dm_metadata_id = affected_dm_id;
-        color->scene_refresh_flag = get_ue_golomb_31(gb);
-        for (int i = 0; i < 9; i++)
-            color->ycc_to_rgb_matrix[i] = av_make_q(get_sbits(gb, 16), 1 << 13);
-        for (int i = 0; i < 3; i++) {
-            int denom = profile == 4 ? (1 << 30) : (1 << 28);
-            unsigned offset = get_bits_long(gb, 32);
-            if (offset > INT_MAX) {
-                /* Ensure the result fits inside AVRational */
-                offset >>= 1;
-                denom >>= 1;
-            }
-            color->ycc_to_rgb_offset[i] = av_make_q(offset, denom);
-        }
-        for (int i = 0; i < 9; i++)
-            color->rgb_to_lms_matrix[i] = av_make_q(get_sbits(gb, 16), 1 << 14);
-
-        color->signal_eotf = get_bits(gb, 16);
-        color->signal_eotf_param0 = get_bits(gb, 16);
-        color->signal_eotf_param1 = get_bits(gb, 16);
-        color->signal_eotf_param2 = get_bits_long(gb, 32);
-        color->signal_bit_depth = get_bits(gb, 5);
-        VALIDATE(color->signal_bit_depth, 8, 16);
-        color->signal_color_space = get_bits(gb, 2);
-        color->signal_chroma_format = get_bits(gb, 2);
-        color->signal_full_range_flag = get_bits(gb, 2);
-        color->source_min_pq = get_bits(gb, 12);
-        color->source_max_pq = get_bits(gb, 12);
-        color->source_diagonal = get_bits(gb, 10);
-    }
-
-    /* Parse extension blocks */
-    s->num_ext_blocks = 0;
-    if ((ret = parse_ext_blocks(s, gb, 1)) < 0) {
-        ff_dovi_ctx_unref(s);
-        return ret;
-    }
-
-    if (get_bits_left(gb) > 48 /* padding + CRC32 + terminator */) {
-        if ((ret = parse_ext_blocks(s, gb, 2)) < 0) {
-            ff_dovi_ctx_unref(s);
-            return ret;
-        }
-    }
-
-    return 0;
-
-fail:
-    ff_dovi_ctx_unref(s); /* don't leak potentially invalid state */
-    return AVERROR_INVALIDDATA;
-}
diff --git a/libavcodec/dovi_rpu.h b/libavcodec/dovi_rpu.h
index 9f26f332ce..8ce0c88e9d 100644
--- a/libavcodec/dovi_rpu.h
+++ b/libavcodec/dovi_rpu.h
@@ -26,26 +26,47 @@
 
 #include "libavutil/dovi_meta.h"
 #include "libavutil/frame.h"
+#include "avcodec.h"
 
 #define DOVI_MAX_DM_ID 15
 typedef struct DOVIContext {
     void *logctx;
 
     /**
-     * Currently active RPU data header, updates on every dovi_rpu_parse().
+     * Enable tri-state. For encoding only. FF_DOVI_AUTOMATIC enables Dolby
+     * Vision only if avctx->decoded_side_data contains an AVDOVIMetadata.
+     */
+#define FF_DOVI_AUTOMATIC -1
+    int enable;
+
+    /**
+     * Currently active dolby vision configuration, or {0} for none.
+     * Set by the user when decoding. Generated by ff_dovi_configure()
+     * when encoding.
+     *
+     * Note: sizeof(cfg) is not part of the libavutil ABI, so users should
+     * never pass &cfg to any other library calls. This is included merely as
+     * a way to look up the values of fields known at compile time.
+     */
+    AVDOVIDecoderConfigurationRecord cfg;
+
+    /**
+     * Currently active RPU data header, updates on every ff_dovi_rpu_parse()
+     * or ff_dovi_rpu_generate().
      */
     AVDOVIRpuDataHeader header;
 
     /**
      * Currently active data mappings, or NULL. Points into memory owned by the
      * corresponding rpu/vdr_ref, which becomes invalid on the next call to
-     * dovi_rpu_parse.
+     * ff_dovi_rpu_parse() or ff_dovi_rpu_generate().
      */
     const AVDOVIDataMapping *mapping;
     const AVDOVIColorMetadata *color;
 
     /**
      * Currently active extension blocks, updates on every ff_dovi_rpu_parse()
+     * or ff_dovi_rpu_generate().
      */
     AVDOVIDmData *ext_blocks;
     int num_ext_blocks;
@@ -56,7 +77,6 @@ typedef struct DOVIContext {
     struct DOVIVdr *vdr[DOVI_MAX_DM_ID+1]; ///< RefStruct references
     uint8_t *rpu_buf; ///< temporary buffer
     unsigned rpu_buf_sz;
-    uint8_t dv_profile;
 
 } DOVIContext;
 
@@ -68,22 +88,20 @@ void ff_dovi_ctx_replace(DOVIContext *s, const DOVIContext *s0);
 void ff_dovi_ctx_unref(DOVIContext *s);
 
 /**
- * Partially reset the internal state. Resets per-frame state while preserving
- * fields parsed from the configuration record.
+ * Partially reset the internal state. Resets per-frame state, but preserves
+ * the stream-wide configuration record.
  */
 void ff_dovi_ctx_flush(DOVIContext *s);
 
 /**
- * Read the contents of an AVDOVIDecoderConfigurationRecord (usually provided
- * by stream side data) and update internal state accordingly.
- */
-void ff_dovi_update_cfg(DOVIContext *s, const AVDOVIDecoderConfigurationRecord *cfg);
-
-/**
  * Parse the contents of a Dovi RPU NAL and update the parsed values in the
  * DOVIContext struct.
  *
  * Returns 0 or an error code.
+ *
+ * Note: `DOVIContext.cfg` should be initialized before calling into this
+ * function. If not done, the profile will be guessed according to HEVC
+ * semantics.
  */
 int ff_dovi_rpu_parse(DOVIContext *s, const uint8_t *rpu, size_t rpu_size,
                       int err_recognition);
@@ -93,4 +111,50 @@ int ff_dovi_rpu_parse(DOVIContext *s, const uint8_t *rpu, size_t rpu_size,
  */
 int ff_dovi_attach_side_data(DOVIContext *s, AVFrame *frame);
 
+/**
+ * Configure the encoder for Dolby Vision encoding. Generates a configuration
+ * record in s->cfg, and attaches it to avctx->coded_side_data. Sets the correct
+ * profile and compatibility ID based on the tagged AVCodecContext colorspace
+ * metadata, and the correct level based on the resolution and tagged framerate.
+ *
+ * Returns 0 or a negative error code.
+ */
+int ff_dovi_configure(DOVIContext *s, AVCodecContext *avctx);
+
+
+/***************************************************
+ * The following section is for internal use only. *
+ ***************************************************/
+
+typedef struct DOVIVdr {
+    AVDOVIDataMapping mapping;
+    AVDOVIColorMetadata color;
+} DOVIVdr;
+
+enum {
+    RPU_COEFF_FIXED = 0,
+    RPU_COEFF_FLOAT = 1,
+};
+
+/**
+ * Synthesize a Dolby Vision RPU reflecting the current state. Note that this
+ * assumes all previous calls to `ff_dovi_rpu_generate` have been appropriately
+ * signalled, i.e. it will not re-send already transmitted redundant data.
+ *
+ * Mutates the internal state of DOVIContext to reflect the change.
+ * Returns 0 or a negative error code.
+ *
+ * This generates a fully formed RPU ready for inclusion in the bitstream,
+ * including the EMDF header (profile 10) or NAL encapsulation (otherwise).
+ */
+int ff_dovi_rpu_generate(DOVIContext *s, const AVDOVIMetadata *metadata,
+                         uint8_t **out_rpu, int *out_size);
+
+/**
+ * Internal helper function to guess the correct DV profile for HEVC.
+ *
+ * Returns the profile number or 0 if unknown.
+ */
+int ff_dovi_guess_profile_hevc(const AVDOVIRpuDataHeader *hdr);
+
 #endif /* AVCODEC_DOVI_RPU_H */
diff --git a/libavcodec/dovi_rpudec.c b/libavcodec/dovi_rpudec.c
new file mode 100644
index 0000000000..7c7eda9d09
--- /dev/null
+++ b/libavcodec/dovi_rpudec.c
@@ -0,0 +1,635 @@
+/*
+ * Dolby Vision RPU decoder
+ *
+ * Copyright (C) 2021 Jan Ekström
+ * Copyright (C) 2021-2024 Niklas Haas
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/mem.h"
+#include "libavutil/crc.h"
+
+#include "avcodec.h"
+#include "dovi_rpu.h"
+#include "golomb.h"
+#include "get_bits.h"
+#include "refstruct.h"
+
+int ff_dovi_attach_side_data(DOVIContext *s, AVFrame *frame)
+{
+    AVFrameSideData *sd;
+    AVBufferRef *buf;
+    AVDOVIMetadata *dovi;
+    size_t dovi_size, ext_sz;
+
+    if (!s->mapping || !s->color)
+        return 0; /* incomplete dovi metadata */
+
+    dovi = av_dovi_metadata_alloc(&dovi_size);
+    if (!dovi)
+        return AVERROR(ENOMEM);
+
+    buf = av_buffer_create((uint8_t *) dovi, dovi_size, NULL, NULL, 0);
+    if (!buf) {
+        av_free(dovi);
+        return AVERROR(ENOMEM);
+    }
+
+    sd = av_frame_new_side_data_from_buf(frame, AV_FRAME_DATA_DOVI_METADATA, buf);
+    if (!sd) {
+        av_buffer_unref(&buf);
+        return AVERROR(ENOMEM);
+    }
+
+    /* Copy only the parts of these structs known to us at compiler-time. */
+#define COPY(t, a, b, last) memcpy(a, b, offsetof(t, last) + sizeof((b)->last))
+    COPY(AVDOVIRpuDataHeader, av_dovi_get_header(dovi), &s->header, disable_residual_flag);
+    COPY(AVDOVIDataMapping, av_dovi_get_mapping(dovi), s->mapping, nlq_pivots);
+    COPY(AVDOVIColorMetadata, av_dovi_get_color(dovi), s->color, source_diagonal);
+    ext_sz = FFMIN(sizeof(AVDOVIDmData), dovi->ext_block_size);
+    for (int i = 0; i < s->num_ext_blocks; i++)
+        memcpy(av_dovi_get_ext(dovi, i), &s->ext_blocks[i], ext_sz);
+    dovi->num_ext_blocks = s->num_ext_blocks;
+    return 0;
+}
+
+static inline uint64_t get_ue_coef(GetBitContext *gb, const AVDOVIRpuDataHeader *hdr)
+{
+    uint64_t ipart;
+    union { uint32_t u32; float f32; } fpart;
+
+    switch (hdr->coef_data_type) {
+    case RPU_COEFF_FIXED:
+        ipart = get_ue_golomb_long(gb);
+        fpart.u32 = get_bits_long(gb, hdr->coef_log2_denom);
+        return (ipart << hdr->coef_log2_denom) | fpart.u32;
+
+    case RPU_COEFF_FLOAT:
+        fpart.u32 = get_bits_long(gb, 32);
+        return fpart.f32 * (1LL << hdr->coef_log2_denom);
+    }
+
+    return 0; /* unreachable */
+}
+
+static inline int64_t get_se_coef(GetBitContext *gb, const AVDOVIRpuDataHeader *hdr)
+{
+    int64_t ipart;
+    union { uint32_t u32; float f32; } fpart;
+
+    switch (hdr->coef_data_type) {
+    case RPU_COEFF_FIXED:
+        ipart = get_se_golomb_long(gb);
+        fpart.u32 = get_bits_long(gb, hdr->coef_log2_denom);
+        return ipart * (1LL << hdr->coef_log2_denom) | fpart.u32;
+
+    case RPU_COEFF_FLOAT:
+        fpart.u32 = get_bits_long(gb, 32);
+        return fpart.f32 * (1LL << hdr->coef_log2_denom);
+    }
+
+    return 0; /* unreachable */
+}
+
+static inline unsigned get_variable_bits(GetBitContext *gb, int n)
+{
+    unsigned int value = get_bits(gb, n);
+    int read_more = get_bits1(gb);
+    while (read_more) {
+        value = (value + 1) << n;
+        value |= get_bits(gb, n);
+        read_more = get_bits1(gb);
+    }
+    return value;
+}
+
+#define VALIDATE(VAR, MIN, MAX)                                                 \
+    do {                                                                        \
+        if (VAR < MIN || VAR > MAX) {                                           \
+            av_log(s->logctx, AV_LOG_ERROR, "RPU validation failed: "           \
+                   #MIN" <= "#VAR" = %d <= "#MAX"\n", (int) VAR);               \
+            goto fail;                                                          \
+        }                                                                       \
+    } while (0)
+
+static void parse_ext_v1(DOVIContext *s, GetBitContext *gb, AVDOVIDmData *dm)
+{
+    switch (dm->level) {
+    case 1:
+        dm->l1.min_pq = get_bits(gb, 12);
+        dm->l1.max_pq = get_bits(gb, 12);
+        dm->l1.avg_pq = get_bits(gb, 12);
+        break;
+    case 2:
+        dm->l2.target_max_pq = get_bits(gb, 12);
+        dm->l2.trim_slope = get_bits(gb, 12);
+        dm->l2.trim_offset = get_bits(gb, 12);
+        dm->l2.trim_power = get_bits(gb, 12);
+        dm->l2.trim_chroma_weight = get_bits(gb, 12);
+        dm->l2.trim_saturation_gain = get_bits(gb, 12);
+        dm->l2.ms_weight = get_bits(gb, 13) - 8192;
+        break;
+    case 4:
+        dm->l4.anchor_pq = get_bits(gb, 12);
+        dm->l4.anchor_power = get_bits(gb, 12);
+        break;
+    case 5:
+        dm->l5.left_offset = get_bits(gb, 13);
+        dm->l5.right_offset = get_bits(gb, 13);
+        dm->l5.top_offset = get_bits(gb, 13);
+        dm->l5.bottom_offset = get_bits(gb, 13);
+        break;
+    case 6:
+        dm->l6.max_luminance = get_bits(gb, 16);
+        dm->l6.min_luminance = get_bits(gb, 16);
+        dm->l6.max_cll = get_bits(gb, 16);
+        dm->l6.max_fall = get_bits(gb, 16);
+        break;
+    case 255:
+        dm->l255.dm_run_mode = get_bits(gb, 8);
+        dm->l255.dm_run_version = get_bits(gb, 8);
+        for (int i = 0; i < 4; i++)
+            dm->l255.dm_debug[i] = get_bits(gb, 8);
+        break;
+    default:
+        av_log(s->logctx, AV_LOG_WARNING,
+               "Unknown Dolby Vision DM v1 level: %u\n", dm->level);
+    }
+}
+
+static AVCIExy get_cie_xy(GetBitContext *gb)
+{
+    AVCIExy xy;
+    const int denom = 32767;
+    xy.x = av_make_q(get_sbits(gb, 16), denom);
+    xy.y = av_make_q(get_sbits(gb, 16), denom);
+    return xy;
+}
+
+static void parse_ext_v2(DOVIContext *s, GetBitContext *gb, AVDOVIDmData *dm,
+                         int ext_block_length)
+{
+    switch (dm->level) {
+    case 3:
+        dm->l3.min_pq_offset = get_bits(gb, 12);
+        dm->l3.max_pq_offset = get_bits(gb, 12);
+        dm->l3.avg_pq_offset = get_bits(gb, 12);
+        break;
+    case 8:
+        dm->l8.target_display_index = get_bits(gb, 8);
+        dm->l8.trim_slope = get_bits(gb, 12);
+        dm->l8.trim_offset = get_bits(gb, 12);
+        dm->l8.trim_power = get_bits(gb, 12);
+        dm->l8.trim_chroma_weight = get_bits(gb, 12);
+        dm->l8.trim_saturation_gain = get_bits(gb, 12);
+        dm->l8.ms_weight = get_bits(gb, 12) - 8192;
+        if (ext_block_length < 12)
+            break;
+        dm->l8.target_mid_contrast = get_bits(gb, 12);
+        if (ext_block_length < 13)
+            break;
+        dm->l8.clip_trim = get_bits(gb, 12);
+        if (ext_block_length < 19)
+            break;
+        for (int i = 0; i < 6; i++)
+            dm->l8.saturation_vector_field[i] = get_bits(gb, 8);
+        if (ext_block_length < 25)
+            break;
+        for (int i = 0; i < 6; i++)
+            dm->l8.hue_vector_field[i] = get_bits(gb, 8);
+        break;
+    case 9:
+        dm->l9.source_primary_index = get_bits(gb, 8);
+        if (ext_block_length < 17)
+            break;
+        dm->l9.source_display_primaries.prim.r = get_cie_xy(gb);
+        dm->l9.source_display_primaries.prim.g = get_cie_xy(gb);
+        dm->l9.source_display_primaries.prim.b = get_cie_xy(gb);
+        dm->l9.source_display_primaries.wp = get_cie_xy(gb);
+        break;
+    case 10:
+        dm->l10.target_display_index = get_bits(gb, 8);
+        dm->l10.target_max_pq = get_bits(gb, 12);
+        dm->l10.target_min_pq = get_bits(gb, 12);
+        dm->l10.target_primary_index = get_bits(gb, 8);
+        if (ext_block_length < 21)
+            break;
+        dm->l10.target_display_primaries.prim.r = get_cie_xy(gb);
+        dm->l10.target_display_primaries.prim.g = get_cie_xy(gb);
+        dm->l10.target_display_primaries.prim.b = get_cie_xy(gb);
+        dm->l10.target_display_primaries.wp = get_cie_xy(gb);
+        break;
+    case 11:
+        dm->l11.content_type = get_bits(gb, 8);
+        dm->l11.whitepoint = get_bits(gb, 4);
+        dm->l11.reference_mode_flag = get_bits1(gb);
+        skip_bits(gb, 3); /* reserved */
+        dm->l11.sharpness = get_bits(gb, 2);
+        dm->l11.noise_reduction = get_bits(gb, 2);
+        dm->l11.mpeg_noise_reduction = get_bits(gb, 2);
+        dm->l11.frame_rate_conversion = get_bits(gb, 2);
+        dm->l11.brightness = get_bits(gb, 2);
+        dm->l11.color = get_bits(gb, 2);
+        break;
+    case 254:
+        dm->l254.dm_mode = get_bits(gb, 8);
+        dm->l254.dm_version_index = get_bits(gb, 8);
+        break;
+    default:
+        av_log(s->logctx, AV_LOG_WARNING,
+               "Unknown Dolby Vision DM v2 level: %u\n", dm->level);
+    }
+}
+
+static int parse_ext_blocks(DOVIContext *s, GetBitContext *gb, int ver)
+{
+    int num_ext_blocks, ext_block_length, start_pos, parsed_bits;
+
+    num_ext_blocks = get_ue_golomb_31(gb);
+    align_get_bits(gb);
+    if (s->num_ext_blocks + num_ext_blocks > AV_DOVI_MAX_EXT_BLOCKS)
+        return AVERROR_INVALIDDATA;
+
+    if (!s->ext_blocks) {
+        s->ext_blocks = ff_refstruct_allocz(sizeof(AVDOVIDmData) * AV_DOVI_MAX_EXT_BLOCKS);
+        if (!s->ext_blocks)
+            return AVERROR(ENOMEM);
+    }
+
+    while (num_ext_blocks--) {
+        AVDOVIDmData *dm = &s->ext_blocks[s->num_ext_blocks++];
+        ext_block_length = get_ue_golomb_31(gb);
+        dm->level = get_bits(gb, 8);
+        start_pos = get_bits_count(gb);
+
+        switch (ver) {
+        case 1: parse_ext_v1(s, gb, dm); break;
+        case 2: parse_ext_v2(s, gb, dm, ext_block_length); break;
+        }
+
+        parsed_bits = get_bits_count(gb) - start_pos;
+        if (parsed_bits > ext_block_length * 8)
+            return AVERROR_INVALIDDATA;
+        skip_bits(gb, ext_block_length * 8 - parsed_bits);
+    }
+
+    return 0;
+}
+
+int ff_dovi_rpu_parse(DOVIContext *s, const uint8_t *rpu, size_t rpu_size,
+                      int err_recognition)
+{
+    AVDOVIRpuDataHeader *hdr = &s->header;
+    GetBitContext *gb = &(GetBitContext){0};
+    DOVIVdr *vdr;
+    int ret;
+
+    uint8_t rpu_type;
+    uint8_t vdr_seq_info_present;
+    uint8_t vdr_dm_metadata_present;
+    uint8_t use_prev_vdr_rpu;
+    uint8_t use_nlq;
+    uint8_t profile;
+
+    if (rpu_size < 5)
+        goto fail;
+
+    /* Container */
+    if (s->cfg.dv_profile == 10 /* dav1.10 */) {
+        /* DV inside AV1 re-uses an EMDF container skeleton, but with fixed
+         * values - so we can effectively treat this as a magic byte sequence.
+         *
+         * The exact fields are, as follows:
+         *   emdf_version            : f(2) = 0
+         *   key_id                  : f(3) = 6
+         *   emdf_payload_id         : f(5) = 31
+         *   emdf_payload_id_ext     : var(5) = 225
+         *   smploffste              : f(1) = 0
+         *   duratione               : f(1) = 0
+         *   groupide                : f(1) = 0
+         *   codecdatae              : f(1) = 0
+         *   discard_unknown_payload : f(1) = 1
+         */
+        const unsigned header_magic = 0x01be6841u;
+        unsigned emdf_header, emdf_payload_size, emdf_protection;
+        if ((ret = init_get_bits8(gb, rpu, rpu_size)) < 0)
+            return ret;
+        emdf_header = get_bits_long(gb, 27);
+        VALIDATE(emdf_header, header_magic, header_magic);
+        emdf_payload_size = get_variable_bits(gb, 8);
+        VALIDATE(emdf_payload_size, 6, 512);
+        if (emdf_payload_size * 8 > get_bits_left(gb))
+            return AVERROR_INVALIDDATA;
+
+        /* The payload is not byte-aligned (off by *one* bit, curse Dolby),
+         * so copy into a fresh buffer to preserve byte alignment of the
+         * RPU struct */
+        av_fast_padded_malloc(&s->rpu_buf, &s->rpu_buf_sz, emdf_payload_size);
+        if (!s->rpu_buf)
+            return AVERROR(ENOMEM);
+        for (int i = 0; i < emdf_payload_size; i++)
+            s->rpu_buf[i] = get_bits(gb, 8);
+        rpu = s->rpu_buf;
+        rpu_size = emdf_payload_size;
+
+        /* Validate EMDF footer */
+        emdf_protection = get_bits(gb, 5 + 12);
+        VALIDATE(emdf_protection, 0x400, 0x400);
+    } else {
+        /* NAL RBSP with prefix and trailing zeroes */
+        VALIDATE(rpu[0], 25, 25); /* NAL prefix */
+        rpu++;
+        rpu_size--;
+        /* Strip trailing padding bytes */
+        while (rpu_size && rpu[rpu_size - 1] == 0)
+            rpu_size--;
+    }
+
+    if (!rpu_size || rpu[rpu_size - 1] != 0x80)
+        goto fail;
+
+    if (err_recognition & AV_EF_CRCCHECK) {
+        uint32_t crc = av_bswap32(av_crc(av_crc_get_table(AV_CRC_32_IEEE),
+                                  -1, rpu, rpu_size - 1)); /* exclude 0x80 */
+        if (crc) {
+            av_log(s->logctx, AV_LOG_ERROR, "RPU CRC mismatch: %X\n", crc);
+            if (err_recognition & AV_EF_EXPLODE)
+                goto fail;
+        }
+    }
+
+    if ((ret = init_get_bits8(gb, rpu, rpu_size)) < 0)
+        return ret;
+
+    /* RPU header */
+    rpu_type = get_bits(gb, 6);
+    if (rpu_type != 2) {
+        av_log(s->logctx, AV_LOG_WARNING, "Unrecognized RPU type "
+               "%"PRIu8", ignoring\n", rpu_type);
+        return 0;
+    }
+
+    hdr->rpu_type = rpu_type;
+    hdr->rpu_format = get_bits(gb, 11);
+
+    /* Values specific to RPU type 2 */
+    hdr->vdr_rpu_profile = get_bits(gb, 4);
+    hdr->vdr_rpu_level = get_bits(gb, 4);
+
+    vdr_seq_info_present = get_bits1(gb);
+    if (vdr_seq_info_present) {
+        hdr->chroma_resampling_explicit_filter_flag = get_bits1(gb);
+        hdr->coef_data_type = get_bits(gb, 2);
+        VALIDATE(hdr->coef_data_type, RPU_COEFF_FIXED, RPU_COEFF_FLOAT);
+        switch (hdr->coef_data_type) {
+        case RPU_COEFF_FIXED:
+            hdr->coef_log2_denom = get_ue_golomb(gb);
+            VALIDATE(hdr->coef_log2_denom, 13, 32);
+            break;
+        case RPU_COEFF_FLOAT:
+            hdr->coef_log2_denom = 32; /* arbitrary, choose maximum precision */
+            break;
+        }
+
+        hdr->vdr_rpu_normalized_idc = get_bits(gb, 2);
+        hdr->bl_video_full_range_flag = get_bits1(gb);
+
+        if ((hdr->rpu_format & 0x700) == 0) {
+            int bl_bit_depth_minus8 = get_ue_golomb_31(gb);
+            int el_bit_depth_minus8 = get_ue_golomb_31(gb);
+            int vdr_bit_depth_minus8 = get_ue_golomb_31(gb);
+            VALIDATE(bl_bit_depth_minus8, 0, 8);
+            VALIDATE(el_bit_depth_minus8, 0, 8);
+            VALIDATE(vdr_bit_depth_minus8, 0, 8);
+            hdr->bl_bit_depth = bl_bit_depth_minus8 + 8;
+            hdr->el_bit_depth = el_bit_depth_minus8 + 8;
+            hdr->vdr_bit_depth = vdr_bit_depth_minus8 + 8;
+            hdr->spatial_resampling_filter_flag = get_bits1(gb);
+            skip_bits(gb, 3); /* reserved_zero_3bits */
+            hdr->el_spatial_resampling_filter_flag = get_bits1(gb);
+            hdr->disable_residual_flag = get_bits1(gb);
+        }
+    } else {
+        /* lack of documentation/samples */
+        avpriv_request_sample(s->logctx, "Missing RPU VDR sequence info\n");
+        ff_dovi_ctx_unref(s);
+        return AVERROR_PATCHWELCOME;
+    }
+
+    vdr_dm_metadata_present = get_bits1(gb);
+    use_prev_vdr_rpu = get_bits1(gb);
+    use_nlq = (hdr->rpu_format & 0x700) == 0 && !hdr->disable_residual_flag;
+
+    profile = s->cfg.dv_profile ? s->cfg.dv_profile : ff_dovi_guess_profile_hevc(hdr);
+    if (profile == 5 && use_nlq) {
+        av_log(s->logctx, AV_LOG_ERROR, "Profile 5 RPUs should not use NLQ\n");
+        goto fail;
+    }
+
+    if (use_prev_vdr_rpu) {
+        int prev_vdr_rpu_id = get_ue_golomb_31(gb);
+        VALIDATE(prev_vdr_rpu_id, 0, DOVI_MAX_DM_ID);
+        if (!s->vdr[prev_vdr_rpu_id]) {
+            av_log(s->logctx, AV_LOG_ERROR, "Unknown previous RPU ID: %u\n",
+                   prev_vdr_rpu_id);
+            goto fail;
+        }
+        vdr = s->vdr[prev_vdr_rpu_id];
+        s->mapping = &vdr->mapping;
+    } else {
+        int vdr_rpu_id = get_ue_golomb_31(gb);
+        VALIDATE(vdr_rpu_id, 0, DOVI_MAX_DM_ID);
+        if (!s->vdr[vdr_rpu_id]) {
+            s->vdr[vdr_rpu_id] = ff_refstruct_allocz(sizeof(DOVIVdr));
+            if (!s->vdr[vdr_rpu_id])
+                return AVERROR(ENOMEM);
+        }
+
+        vdr = s->vdr[vdr_rpu_id];
+        s->mapping = &vdr->mapping;
+
+        vdr->mapping.vdr_rpu_id = vdr_rpu_id;
+        vdr->mapping.mapping_color_space = get_ue_golomb_31(gb);
+        vdr->mapping.mapping_chroma_format_idc = get_ue_golomb_31(gb);
+
+        for (int c = 0; c < 3; c++) {
+            AVDOVIReshapingCurve *curve = &vdr->mapping.curves[c];
+            int num_pivots_minus_2 = get_ue_golomb_31(gb);
+            int pivot = 0;
+
+            VALIDATE(num_pivots_minus_2, 0, AV_DOVI_MAX_PIECES - 1);
+            curve->num_pivots = num_pivots_minus_2 + 2;
+            for (int i = 0; i < curve->num_pivots; i++) {
+                pivot += get_bits(gb, hdr->bl_bit_depth);
+                curve->pivots[i] = av_clip_uint16(pivot);
+            }
+        }
+
+        if (use_nlq) {
+            int nlq_pivot = 0;
+            vdr->mapping.nlq_method_idc = get_bits(gb, 3);
+
+            for (int i = 0; i < 2; i++) {
+                nlq_pivot += get_bits(gb, hdr->bl_bit_depth);
+                vdr->mapping.nlq_pivots[i] = av_clip_uint16(nlq_pivot);
+            }
+
+            /**
+             * The patent mentions another legal value, NLQ_MU_LAW, but it's
+             * not documented anywhere how to parse or apply that type of NLQ.
+             */
+            VALIDATE(vdr->mapping.nlq_method_idc, 0, AV_DOVI_NLQ_LINEAR_DZ);
+        } else {
+            vdr->mapping.nlq_method_idc = AV_DOVI_NLQ_NONE;
+        }
+
+        vdr->mapping.num_x_partitions = get_ue_golomb_long(gb) + 1;
+        vdr->mapping.num_y_partitions = get_ue_golomb_long(gb) + 1;
+        /* End of rpu_data_header(), start of vdr_rpu_data_payload() */
+
+        for (int c = 0; c < 3; c++) {
+            AVDOVIReshapingCurve *curve = &vdr->mapping.curves[c];
+            for (int i = 0; i < curve->num_pivots - 1; i++) {
+                int mapping_idc = get_ue_golomb_31(gb);
+                VALIDATE(mapping_idc, 0, 1);
+                curve->mapping_idc[i] = mapping_idc;
+                switch (mapping_idc) {
+                case AV_DOVI_MAPPING_POLYNOMIAL: {
+                    int poly_order_minus1 = get_ue_golomb_31(gb);
+                    VALIDATE(poly_order_minus1, 0, 1);
+                    curve->poly_order[i] = poly_order_minus1 + 1;
+                    if (poly_order_minus1 == 0) {
+                        int linear_interp_flag = get_bits1(gb);
+                        if (linear_interp_flag) {
+                            /* lack of documentation/samples */
+                            avpriv_request_sample(s->logctx, "Dolby Vision "
+                                                  "linear interpolation");
+                            ff_dovi_ctx_unref(s);
+                            return AVERROR_PATCHWELCOME;
+                        }
+                    }
+                    for (int k = 0; k <= curve->poly_order[i]; k++)
+                        curve->poly_coef[i][k] = get_se_coef(gb, hdr);
+                    break;
+                }
+                case AV_DOVI_MAPPING_MMR: {
+                    int mmr_order_minus1 = get_bits(gb, 2);
+                    VALIDATE(mmr_order_minus1, 0, 2);
+                    curve->mmr_order[i] = mmr_order_minus1 + 1;
+                    curve->mmr_constant[i] = get_se_coef(gb, hdr);
+                    for (int j = 0; j < curve->mmr_order[i]; j++) {
+                        for (int k = 0; k < 7; k++)
+                            curve->mmr_coef[i][j][k] = get_se_coef(gb, hdr);
+                    }
+                    break;
+                }
+                }
+            }
+        }
+
+        if (use_nlq) {
+            for (int c = 0; c < 3; c++) {
+                AVDOVINLQParams *nlq = &vdr->mapping.nlq[c];
+                nlq->nlq_offset = get_bits(gb, hdr->el_bit_depth);
+                nlq->vdr_in_max = get_ue_coef(gb, hdr);
+                switch (vdr->mapping.nlq_method_idc) {
+                case AV_DOVI_NLQ_LINEAR_DZ:
+                    nlq->linear_deadzone_slope = get_ue_coef(gb, hdr);
+                    nlq->linear_deadzone_threshold = get_ue_coef(gb, hdr);
+                    break;
+                }
+            }
+        }
+    }
+
+    if (vdr_dm_metadata_present) {
+        AVDOVIColorMetadata *color;
+        int affected_dm_id = get_ue_golomb_31(gb);
+        int current_dm_id = get_ue_golomb_31(gb);
+        VALIDATE(affected_dm_id, 0, DOVI_MAX_DM_ID);
+        VALIDATE(current_dm_id, 0, DOVI_MAX_DM_ID);
+        if (!s->vdr[affected_dm_id]) {
+            s->vdr[affected_dm_id] = ff_refstruct_allocz(sizeof(DOVIVdr));
+            if (!s->vdr[affected_dm_id])
+                return AVERROR(ENOMEM);
+        }
+
+        if (!s->vdr[current_dm_id]) {
+            av_log(s->logctx, AV_LOG_ERROR, "Unknown previous RPU DM ID: %u\n",
+                   current_dm_id);
+            goto fail;
+        }
+
+        /* Update current pointer based on current_dm_id */
+        vdr = s->vdr[current_dm_id];
+        s->color = &vdr->color;
+
+        /* Update values of affected_dm_id */
+        vdr = s->vdr[affected_dm_id];
+        color = &vdr->color;
+        color->dm_metadata_id = affected_dm_id;
+        color->scene_refresh_flag = get_ue_golomb_31(gb);
+        for (int i = 0; i < 9; i++)
+            color->ycc_to_rgb_matrix[i] = av_make_q(get_sbits(gb, 16), 1 << 13);
+        for (int i = 0; i < 3; i++) {
+            int denom = profile == 4 ? (1 << 30) : (1 << 28);
+            unsigned offset = get_bits_long(gb, 32);
+            if (offset > INT_MAX) {
+                /* Ensure the result fits inside AVRational */
+                offset >>= 1;
+                denom >>= 1;
+            }
+            color->ycc_to_rgb_offset[i] = av_make_q(offset, denom);
+        }
+        for (int i = 0; i < 9; i++)
+            color->rgb_to_lms_matrix[i] = av_make_q(get_sbits(gb, 16), 1 << 14);
+
+        color->signal_eotf = get_bits(gb, 16);
+        color->signal_eotf_param0 = get_bits(gb, 16);
+        color->signal_eotf_param1 = get_bits(gb, 16);
+        color->signal_eotf_param2 = get_bits_long(gb, 32);
+        color->signal_bit_depth = get_bits(gb, 5);
+        VALIDATE(color->signal_bit_depth, 8, 16);
+        color->signal_color_space = get_bits(gb, 2);
+        color->signal_chroma_format = get_bits(gb, 2);
+        color->signal_full_range_flag = get_bits(gb, 2);
+        color->source_min_pq = get_bits(gb, 12);
+        color->source_max_pq = get_bits(gb, 12);
+        color->source_diagonal = get_bits(gb, 10);
+    }
+
+    /* Parse extension blocks */
+    s->num_ext_blocks = 0;
+    if ((ret = parse_ext_blocks(s, gb, 1)) < 0) {
+        ff_dovi_ctx_unref(s);
+        return ret;
+    }
+
+    if (get_bits_left(gb) > 48 /* padding + CRC32 + terminator */) {
+        if ((ret = parse_ext_blocks(s, gb, 2)) < 0) {
+            ff_dovi_ctx_unref(s);
+            return ret;
+        }
+    }
+
+    return 0;
+
+fail:
+    ff_dovi_ctx_unref(s); /* don't leak potentially invalid state */
+    return AVERROR_INVALIDDATA;
+}
diff --git a/libavcodec/dovi_rpuenc.c b/libavcodec/dovi_rpuenc.c
new file mode 100644
index 0000000000..3c3e0f84c0
--- /dev/null
+++ b/libavcodec/dovi_rpuenc.c
@@ -0,0 +1,743 @@
+/*
+ * Dolby Vision RPU encoder
+ *
+ * Copyright (C) 2024 Niklas Haas
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avassert.h"
+#include "libavutil/crc.h"
+#include "libavutil/mem.h"
+
+#include "avcodec.h"
+#include "dovi_rpu.h"
+#include "itut35.h"
+#include "put_bits.h"
+#include "put_golomb.h"
+#include "refstruct.h"
+
+static struct {
+    uint64_t pps; // maximum pixels per second
+    int width; // maximum width
+    int main; // maximum bitrate in main tier
+    int high; // maximum bitrate in high tier
+} dv_levels[] = {
+     [1] = {1280*720*24,    1280,  20,  50},
+     [2] = {1280*720*30,    1280,  20,  50},
+     [3] = {1920*1080*24,   1920,  20,  70},
+     [4] = {1920*1080*30,   2560,  20,  70},
+     [5] = {1920*1080*60,   3840,  20,  70},
+     [6] = {3840*2160*24,   3840,  25, 130},
+     [7] = {3840*2160*30,   3840,  25, 130},
+     [8] = {3840*2160*48,   3840,  40, 130},
+     [9] = {3840*2160*60,   3840,  40, 130},
+    [10] = {3840*2160*120,  3840,  60, 240},
+    [11] = {3840*2160*120,  7680,  60, 240},
+    [12] = {7680*4320*60,   7680, 120, 450},
+    [13] = {7680*4320*120u, 7680, 240, 800},
+};
+
+int ff_dovi_configure(DOVIContext *s, AVCodecContext *avctx)
+{
+    AVDOVIDecoderConfigurationRecord *cfg;
+    const AVDOVIRpuDataHeader *hdr = NULL;
+    const AVFrameSideData *sd;
+    int dv_profile, dv_level, bl_compat_id = -1;
+    size_t cfg_size;
+    uint64_t pps;
+
+    if (!s->enable)
+        goto skip;
+
+    sd = av_frame_side_data_get(avctx->decoded_side_data,
+                                avctx->nb_decoded_side_data, AV_FRAME_DATA_DOVI_METADATA);
+
+    if (sd)
+        hdr = av_dovi_get_header((const AVDOVIMetadata *) sd->data);
+
+    if (s->enable == FF_DOVI_AUTOMATIC && !hdr)
+        goto skip;
+
+    switch (avctx->codec_id) {
+    case AV_CODEC_ID_AV1:  dv_profile = 10; break;
+    case AV_CODEC_ID_H264: dv_profile = 9;  break;
+    case AV_CODEC_ID_HEVC: dv_profile = hdr ? ff_dovi_guess_profile_hevc(hdr) : 8; break;
+    default:
+        /* No other encoder should be calling this! */
+        av_assert0(0);
+        return AVERROR_BUG;
+    }
+
+    if (avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL) {
+        if (dv_profile == 9) {
+            if (avctx->pix_fmt != AV_PIX_FMT_YUV420P)
+                dv_profile = 0;
+        } else {
+            if (avctx->pix_fmt != AV_PIX_FMT_YUV420P10)
+                dv_profile = 0;
+        }
+    }
+
+    switch (dv_profile) {
+    case 4: /* HEVC with enhancement layer */
+    case 7:
+        if (s->enable > 0) {
+            av_log(s->logctx, AV_LOG_ERROR, "Coding of Dolby Vision enhancement "
+                   "layers is currently unsupported.");
+            return AVERROR_PATCHWELCOME;
+        } else {
+            goto skip;
+        }
+    case 5: /* HEVC with proprietary IPTPQc2 */
+        bl_compat_id = 0;
+        break;
+    case 10:
+        /* FIXME: check for proper H.273 tags once those are added */
+        if (hdr && hdr->bl_video_full_range_flag) {
+            /* AV1 with proprietary IPTPQc2 */
+            bl_compat_id = 0;
+            break;
+        }
+        /* fall through */
+    case 8: /* HEVC (or AV1) with BL compatibility */
+        if (avctx->colorspace == AVCOL_SPC_BT2020_NCL &&
+            avctx->color_primaries == AVCOL_PRI_BT2020 &&
+            avctx->color_trc == AVCOL_TRC_SMPTE2084) {
+            bl_compat_id = 1;
+        } else if (avctx->colorspace == AVCOL_SPC_BT2020_NCL &&
+                   avctx->color_primaries == AVCOL_PRI_BT2020 &&
+                   avctx->color_trc == AVCOL_TRC_ARIB_STD_B67) {
+            bl_compat_id = 4;
+        } else if (avctx->colorspace == AVCOL_SPC_BT709 &&
+                   avctx->color_primaries == AVCOL_PRI_BT709 &&
+                   avctx->color_trc == AVCOL_TRC_BT709) {
+            bl_compat_id = 2;
+        }
+    }
+
+    if (!dv_profile || bl_compat_id < 0) {
+        if (s->enable > 0) {
+            av_log(s->logctx, AV_LOG_ERROR, "Dolby Vision enabled, but could "
+                   "not determine profile and compatibility mode. Double-check "
+                   "colorspace and format settings for compatibility?\n");
+            return AVERROR(EINVAL);
+        }
+        goto skip;
+    }
+
+    pps = avctx->width * avctx->height;
+    if (avctx->framerate.num) {
+        pps = pps * avctx->framerate.num / avctx->framerate.den;
+    } else {
+        pps *= 25; /* sanity fallback */
+    }
+
+    dv_level = 0;
+    for (int i = 1; i < FF_ARRAY_ELEMS(dv_levels); i++) {
+        if (pps > dv_levels[i].pps)
+            continue;
+        if (avctx->width > dv_levels[i].width)
+            continue;
+        /* In theory, we should also test the bitrate when known, and
+         * distinguish between main and high tier. In practice, just ignore
+         * the bitrate constraints and hope they work out. This would ideally
+         * be handled by either the encoder or muxer directly. */
+        dv_level = i;
+        break;
+    }
+
+    if (!dv_level) {
+        if (avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT) {
+            av_log(s->logctx, AV_LOG_ERROR, "Coded PPS (%"PRIu64") and width (%d) "
+                   "exceed Dolby Vision limitations\n", pps, avctx->width);
+            return AVERROR(EINVAL);
+        } else {
+            av_log(s->logctx, AV_LOG_WARNING, "Coded PPS (%"PRIu64") and width (%d) "
+                   "exceed Dolby Vision limitations. Ignoring, resulting file "
+                   "may be non-conforming.\n", pps, avctx->width);
+            dv_level = FF_ARRAY_ELEMS(dv_levels) - 1;
+        }
+    }
+
+    cfg = av_dovi_alloc(&cfg_size);
+    if (!cfg)
+        return AVERROR(ENOMEM);
+
+    if (!av_packet_side_data_add(&avctx->coded_side_data, &avctx->nb_coded_side_data,
+                                 AV_PKT_DATA_DOVI_CONF, cfg, cfg_size, 0)) {
+        av_free(cfg);
+        return AVERROR(ENOMEM);
+    }
+
+    cfg->dv_version_major = 1;
+    cfg->dv_version_minor = 0;
+    cfg->dv_profile = dv_profile;
+    cfg->dv_level = dv_level;
+    cfg->rpu_present_flag = 1;
+    cfg->el_present_flag = 0;
+    cfg->bl_present_flag = 1;
+    cfg->dv_bl_signal_compatibility_id = bl_compat_id;
+
+    s->cfg = *cfg;
+    return 0;
+
+skip:
+    s->cfg = (AVDOVIDecoderConfigurationRecord) {0};
+    return 0;
+}
+
+static inline void put_ue_coef(PutBitContext *pb, const AVDOVIRpuDataHeader *hdr,
+                               uint64_t coef)
+{
+    union { uint32_t u32; float f32; } fpart;
+
+    switch (hdr->coef_data_type) {
+    case RPU_COEFF_FIXED:
+        set_ue_golomb(pb, coef >> hdr->coef_log2_denom);
+        put_bits64(pb, hdr->coef_log2_denom,
+                   coef & ((1LL << hdr->coef_log2_denom) - 1));
+        break;
+    case RPU_COEFF_FLOAT:
+        fpart.f32 = coef / (float) (1LL << hdr->coef_log2_denom);
+        put_bits64(pb, hdr->coef_log2_denom, fpart.u32);
+        break;
+    }
+}
+
+static inline void put_se_coef(PutBitContext *pb, const AVDOVIRpuDataHeader *hdr,
+                               uint64_t coef)
+{
+    union { uint32_t u32; float f32; } fpart;
+
+    switch (hdr->coef_data_type) {
+    case RPU_COEFF_FIXED:
+        set_se_golomb(pb, coef >> hdr->coef_log2_denom);
+        put_bits64(pb, hdr->coef_log2_denom,
+                   coef & ((1LL << hdr->coef_log2_denom) - 1));
+        break;
+    case RPU_COEFF_FLOAT:
+        fpart.f32 = coef / (float) (1LL << hdr->coef_log2_denom);
+        put_bits64(pb, hdr->coef_log2_denom, fpart.u32);
+        break;
+    }
+}
+
+static int av_q2den(AVRational q, int den)
+{
+    if (!q.den || q.den == den)
+        return q.num;
+    q = av_mul_q(q, av_make_q(den, 1));
+    return (q.num + (q.den >> 1)) / q.den;
+}
+
+static void generate_ext_v1(PutBitContext *pb, const AVDOVIDmData *dm)
+{
+    int ext_block_length, start_pos, pad_bits;
+
+    switch (dm->level) {
+    case 1:   ext_block_length = 5;  break;
+    case 2:   ext_block_length = 11; break;
+    case 4:   ext_block_length = 3;  break;
+    case 5:   ext_block_length = 7;  break;
+    case 6:   ext_block_length = 8;  break;
+    case 255: ext_block_length = 6;  break;
+    default: return;
+    }
+
+    set_ue_golomb(pb, ext_block_length);
+    put_bits(pb, 8, dm->level);
+    start_pos = put_bits_count(pb);
+
+    switch (dm->level) {
+    case 1:
+        put_bits(pb, 12, dm->l1.min_pq);
+        put_bits(pb, 12, dm->l1.max_pq);
+        put_bits(pb, 12, dm->l1.avg_pq);
+        break;
+    case 2:
+        put_bits(pb, 12, dm->l2.target_max_pq);
+        put_bits(pb, 12, dm->l2.trim_slope);
+        put_bits(pb, 12, dm->l2.trim_offset);
+        put_bits(pb, 12, dm->l2.trim_power);
+        put_bits(pb, 12, dm->l2.trim_chroma_weight);
+        put_bits(pb, 12, dm->l2.trim_saturation_gain);
+        put_bits(pb, 13, dm->l2.ms_weight + 8192);
+        break;
+    case 4:
+        put_bits(pb, 12, dm->l4.anchor_pq);
+        put_bits(pb, 12, dm->l4.anchor_power);
+        break;
+    case 5:
+        put_bits(pb, 13, dm->l5.left_offset);
+        put_bits(pb, 13, dm->l5.right_offset);
+        put_bits(pb, 13, dm->l5.top_offset);
+        put_bits(pb, 13, dm->l5.bottom_offset);
+        break;
+    case 6:
+        put_bits(pb, 16, dm->l6.max_luminance);
+        put_bits(pb, 16, dm->l6.min_luminance);
+        put_bits(pb, 16, dm->l6.max_cll);
+        put_bits(pb, 16, dm->l6.max_fall);
+        break;
+    case 255:
+        put_bits(pb, 8, dm->l255.dm_run_mode);
+        put_bits(pb, 8, dm->l255.dm_run_version);
+        for (int i = 0; i < 4; i++)
+            put_bits(pb, 8, dm->l255.dm_debug[i]);
+        break;
+    }
+
+    pad_bits = ext_block_length * 8 - (put_bits_count(pb) - start_pos);
+    av_assert1(pad_bits >= 0);
+    put_bits(pb, pad_bits, 0);
+}
+
+static void put_cie_xy(PutBitContext *pb, AVCIExy xy)
+{
+    const int denom = 32767;
+    put_sbits(pb, 16, av_q2den(xy.x, denom));
+    put_sbits(pb, 16, av_q2den(xy.y, denom));
+}
+
+#define ANY6(arr) (arr[0] || arr[1] || arr[2] || arr[3] || arr[4] || arr[5])
+#define ANY_XY(xy) (xy.x.num || xy.y.num)
+#define ANY_CSP(csp) (ANY_XY(csp.prim.r) || ANY_XY(csp.prim.g) || \
+                      ANY_XY(csp.prim.b) || ANY_XY(csp.wp))
+
+static void generate_ext_v2(PutBitContext *pb, const AVDOVIDmData *dm)
+{
+    int ext_block_length, start_pos, pad_bits;
+
+    switch (dm->level) {
+    case 3: ext_block_length = 5; break;
+    case 8:
+        if (ANY6(dm->l8.hue_vector_field)) {
+            ext_block_length = 25;
+        } else if (ANY6(dm->l8.saturation_vector_field)) {
+            ext_block_length = 19;
+        } else if (dm->l8.clip_trim) {
+            ext_block_length = 13;
+        } else if (dm->l8.target_mid_contrast) {
+            ext_block_length = 12;
+        } else {
+            ext_block_length = 10;
+        }
+        break;
+    case 9:
+        if (ANY_CSP(dm->l9.source_display_primaries)) {
+            ext_block_length = 17;
+        } else {
+            ext_block_length = 1;
+        }
+        break;
+    case 10:
+        if (ANY_CSP(dm->l10.target_display_primaries)) {
+            ext_block_length = 21;
+        } else {
+            ext_block_length = 5;
+        }
+        break;
+    case 11:  ext_block_length = 4; break;
+    case 254: ext_block_length = 2; break;
+    default: return;
+    }
+
+    set_ue_golomb(pb, ext_block_length);
+    put_bits(pb, 8, dm->level);
+    start_pos = put_bits_count(pb);
+
+    switch (dm->level) {
+    case 3:
+        put_bits(pb, 12, dm->l3.min_pq_offset);
+        put_bits(pb, 12, dm->l3.max_pq_offset);
+        put_bits(pb, 12, dm->l3.avg_pq_offset);
+        break;
+    case 8:
+        put_bits(pb, 8, dm->l8.target_display_index);
+        put_bits(pb, 12, dm->l8.trim_slope);
+        put_bits(pb, 12, dm->l8.trim_offset);
+        put_bits(pb, 12, dm->l8.trim_power);
+        put_bits(pb, 12, dm->l8.trim_chroma_weight);
+        put_bits(pb, 12, dm->l8.trim_saturation_gain);
+        put_bits(pb, 12, dm->l8.ms_weight + 8192);
+        if (ext_block_length < 12)
+            break;
+        put_bits(pb, 12, dm->l8.target_mid_contrast);
+        if (ext_block_length < 13)
+            break;
+        put_bits(pb, 12, dm->l8.clip_trim);
+        if (ext_block_length < 19)
+            break;
+        for (int i = 0; i < 6; i++)
+            put_bits(pb, 8, dm->l8.saturation_vector_field[i]);
+        if (ext_block_length < 25)
+            break;
+        for (int i = 0; i < 6; i++)
+            put_bits(pb, 8, dm->l8.hue_vector_field[i]);
+        break;
+    case 9:
+        put_bits(pb, 8, dm->l9.source_primary_index);
+        if (ext_block_length < 17)
+            break;
+        put_cie_xy(pb, dm->l9.source_display_primaries.prim.r);
+        put_cie_xy(pb, dm->l9.source_display_primaries.prim.g);
+        put_cie_xy(pb, dm->l9.source_display_primaries.prim.b);
+        put_cie_xy(pb, dm->l9.source_display_primaries.wp);
+        break;
+    case 10:
+        put_bits(pb, 8, dm->l10.target_display_index);
+        put_bits(pb, 12, dm->l10.target_max_pq);
+        put_bits(pb, 12, dm->l10.target_min_pq);
+        put_bits(pb, 8, dm->l10.target_primary_index);
+        if (ext_block_length < 21)
+            break;
+        put_cie_xy(pb, dm->l10.target_display_primaries.prim.r);
+        put_cie_xy(pb, dm->l10.target_display_primaries.prim.g);
+        put_cie_xy(pb, dm->l10.target_display_primaries.prim.b);
+        put_cie_xy(pb, dm->l10.target_display_primaries.wp);
+        break;
+    case 11:
+        put_bits(pb, 8, dm->l11.content_type);
+        put_bits(pb, 4, dm->l11.whitepoint);
+        put_bits(pb, 1, dm->l11.reference_mode_flag);
+        put_bits(pb, 3, 0); /* reserved */
+        put_bits(pb, 2, dm->l11.sharpness);
+        put_bits(pb, 2, dm->l11.noise_reduction);
+        put_bits(pb, 2, dm->l11.mpeg_noise_reduction);
+        put_bits(pb, 2, dm->l11.frame_rate_conversion);
+        put_bits(pb, 2, dm->l11.brightness);
+        put_bits(pb, 2, dm->l11.color);
+        break;
+    case 254:
+        put_bits(pb, 8, dm->l254.dm_mode);
+        put_bits(pb, 8, dm->l254.dm_version_index);
+        break;
+    }
+
+    pad_bits = ext_block_length * 8 - (put_bits_count(pb) - start_pos);
+    av_assert1(pad_bits >= 0);
+    put_bits(pb, pad_bits, 0);
+}
+
+int ff_dovi_rpu_generate(DOVIContext *s, const AVDOVIMetadata *metadata,
+                         uint8_t **out_rpu, int *out_size)
+{
+    PutBitContext *pb = &(PutBitContext){0};
+    const AVDOVIRpuDataHeader *hdr;
+    const AVDOVIDataMapping *mapping;
+    const AVDOVIColorMetadata *color;
+    int vdr_dm_metadata_changed, vdr_rpu_id, use_prev_vdr_rpu, profile,
+        buffer_size, rpu_size, pad, zero_run;
+    int num_ext_blocks_v1, num_ext_blocks_v2;
+    uint32_t crc;
+    uint8_t *dst;
+    if (!metadata) {
+        *out_rpu = NULL;
+        *out_size = 0;
+        return 0;
+    }
+
+    hdr = av_dovi_get_header(metadata);
+    mapping = av_dovi_get_mapping(metadata);
+    color = av_dovi_get_color(metadata);
+    av_assert0(s->cfg.dv_profile);
+
+    if (hdr->rpu_type != 2) {
+        av_log(s->logctx, AV_LOG_ERROR, "Unhandled RPU type %"PRIu8"\n",
+               hdr->rpu_type);
+        return AVERROR_INVALIDDATA;
+    }
+
+    vdr_rpu_id = -1;
+    for (int i = 0; i <= DOVI_MAX_DM_ID; i++) {
+        if (s->vdr[i] && !memcmp(&s->vdr[i]->mapping, mapping, sizeof(*mapping))) {
+            vdr_rpu_id = i;
+            break;
+        } else if (vdr_rpu_id < 0 && (!s->vdr[i] || i == DOVI_MAX_DM_ID)) {
+            vdr_rpu_id = i;
+        }
+    }
+
+    if (!s->vdr[vdr_rpu_id]) {
+        s->vdr[vdr_rpu_id] = ff_refstruct_allocz(sizeof(DOVIVdr));
+        if (!s->vdr[vdr_rpu_id])
+            return AVERROR(ENOMEM);
+    }
+
+    if (!s->vdr[color->dm_metadata_id]) {
+        s->vdr[color->dm_metadata_id] = ff_refstruct_allocz(sizeof(DOVIVdr));
+        if (!s->vdr[color->dm_metadata_id])
+            return AVERROR(ENOMEM);
+    }
+
+    num_ext_blocks_v1 = num_ext_blocks_v2 = 0;
+    for (int i = 0; i < metadata->num_ext_blocks; i++) {
+        const AVDOVIDmData *dm = av_dovi_get_ext(metadata, i);
+        switch (dm->level) {
+        case 1:
+        case 2:
+        case 4:
+        case 5:
+        case 6:
+        case 255:
+            num_ext_blocks_v1++;
+            break;
+        case 3:
+        case 8:
+        case 9:
+        case 10:
+        case 11:
+        case 254:
+            num_ext_blocks_v2++;
+            break;
+        default:
+            av_log(s->logctx, AV_LOG_ERROR, "Invalid ext block level %d\n",
+                   dm->level);
+            return AVERROR_INVALIDDATA;
+        }
+    }
+
+    vdr_dm_metadata_changed = !s->color || memcmp(s->color, color, sizeof(*color));
+    use_prev_vdr_rpu = !memcmp(&s->vdr[vdr_rpu_id]->mapping, mapping, sizeof(*mapping));
+
+    buffer_size = 12 /* vdr seq info */ + 5 /* CRC32 + terminator */;
+    buffer_size += num_ext_blocks_v1 * 13;
+    buffer_size += num_ext_blocks_v2 * 28;
+    if (!use_prev_vdr_rpu) {
+        buffer_size += 160;
+        for (int c = 0; c < 3; c++) {
+            for (int i = 0; i < mapping->curves[c].num_pivots - 1; i++) {
+                switch (mapping->curves[c].mapping_idc[i]) {
+                case AV_DOVI_MAPPING_POLYNOMIAL: buffer_size += 26;  break;
+                case AV_DOVI_MAPPING_MMR:        buffer_size += 177; break;
+                }
+            }
+        }
+    }
+    if (vdr_dm_metadata_changed)
+        buffer_size += 67;
+
+    av_fast_padded_malloc(&s->rpu_buf, &s->rpu_buf_sz, buffer_size);
+    if (!s->rpu_buf)
+        return AVERROR(ENOMEM);
+    init_put_bits(pb, s->rpu_buf, s->rpu_buf_sz);
+
+    /* RPU header */
+    put_bits(pb, 6, hdr->rpu_type);
+    put_bits(pb, 11, hdr->rpu_format);
+    put_bits(pb, 4, hdr->vdr_rpu_profile);
+    put_bits(pb, 4, hdr->vdr_rpu_level);
+    put_bits(pb, 1, 1); /* vdr_seq_info_present */
+    put_bits(pb, 1, hdr->chroma_resampling_explicit_filter_flag);
+    put_bits(pb, 2, hdr->coef_data_type);
+    if (hdr->coef_data_type == RPU_COEFF_FIXED)
+        set_ue_golomb(pb, hdr->coef_log2_denom);
+    put_bits(pb, 2, hdr->vdr_rpu_normalized_idc);
+    put_bits(pb, 1, hdr->bl_video_full_range_flag);
+    if ((hdr->rpu_format & 0x700) == 0) {
+        set_ue_golomb(pb, hdr->bl_bit_depth - 8);
+        set_ue_golomb(pb, hdr->el_bit_depth - 8);
+        set_ue_golomb(pb, hdr->vdr_bit_depth - 8);
+        put_bits(pb, 1, hdr->spatial_resampling_filter_flag);
+        put_bits(pb, 3, 0); /* reserved_zero_3bits */
+        put_bits(pb, 1, hdr->el_spatial_resampling_filter_flag);
+        put_bits(pb, 1, hdr->disable_residual_flag);
+    }
+    s->header = *hdr;
+
+    put_bits(pb, 1, vdr_dm_metadata_changed);
+    put_bits(pb, 1, use_prev_vdr_rpu);
+    set_ue_golomb(pb, vdr_rpu_id);
+    s->mapping = &s->vdr[vdr_rpu_id]->mapping;
+
+    profile = s->cfg.dv_profile ? s->cfg.dv_profile : ff_dovi_guess_profile_hevc(hdr);
+
+    if (!use_prev_vdr_rpu) {
+        set_ue_golomb(pb, mapping->mapping_color_space);
+        set_ue_golomb(pb, mapping->mapping_chroma_format_idc);
+        for (int c = 0; c < 3; c++) {
+            const AVDOVIReshapingCurve *curve = &mapping->curves[c];
+            int prev = 0;
+            set_ue_golomb(pb, curve->num_pivots - 2);
+            for (int i = 0; i < curve->num_pivots; i++) {
+                put_bits(pb, hdr->bl_bit_depth, curve->pivots[i] - prev);
+                prev = curve->pivots[i];
+            }
+        }
+
+        if (mapping->nlq_method_idc != AV_DOVI_NLQ_NONE) {
+            put_bits(pb, 3, mapping->nlq_method_idc);
+            put_bits(pb, hdr->bl_bit_depth, mapping->nlq_pivots[0]);
+            put_bits(pb, hdr->bl_bit_depth, mapping->nlq_pivots[1] - mapping->nlq_pivots[0]);
+        }
+
+        set_ue_golomb(pb, mapping->num_x_partitions - 1);
+        set_ue_golomb(pb, mapping->num_y_partitions - 1);
+
+        for (int c = 0; c < 3; c++) {
+            const AVDOVIReshapingCurve *curve = &mapping->curves[c];
+            for (int i = 0; i < curve->num_pivots - 1; i++) {
+                set_ue_golomb(pb, curve->mapping_idc[i]);
+                switch (curve->mapping_idc[i]) {
+                case AV_DOVI_MAPPING_POLYNOMIAL: {
+                    set_ue_golomb(pb, curve->poly_order[i] - 1);
+                    if (curve->poly_order[i] == 1)
+                        put_bits(pb, 1, 0); /* linear_interp_flag */
+                    for (int k = 0; k <= curve->poly_order[i]; k++)
+                        put_se_coef(pb, hdr, curve->poly_coef[i][k]);
+                    break;
+                }
+                case AV_DOVI_MAPPING_MMR: {
+                    put_bits(pb, 2, curve->mmr_order[i] - 1);
+                    put_se_coef(pb, hdr, curve->mmr_constant[i]);
+                    for (int j = 0; j < curve->mmr_order[i]; j++) {
+                        for (int k = 0; k < 7; k++)
+                            put_se_coef(pb, hdr, curve->mmr_coef[i][j][k]);
+                    }
+                    break;
+                }
+                }
+            }
+        }
+
+        if (mapping->nlq_method_idc != AV_DOVI_NLQ_NONE) {
+            for (int c = 0; c < 3; c++) {
+                const AVDOVINLQParams *nlq = &mapping->nlq[c];
+                put_bits(pb, hdr->el_bit_depth, nlq->nlq_offset);
+                put_ue_coef(pb, hdr, nlq->vdr_in_max);
+                switch (mapping->nlq_method_idc) {
+                case AV_DOVI_NLQ_LINEAR_DZ:
+                    put_ue_coef(pb, hdr, nlq->linear_deadzone_slope);
+                    put_ue_coef(pb, hdr, nlq->linear_deadzone_threshold);
+                    break;
+                }
+            }
+        }
+
+        memcpy(&s->vdr[vdr_rpu_id]->mapping, mapping, sizeof(*mapping));
+    }
+
+    if (vdr_dm_metadata_changed) {
+        const int denom = profile == 4 ? (1 << 30) : (1 << 28);
+        set_ue_golomb(pb, color->dm_metadata_id); /* affected_dm_id */
+        set_ue_golomb(pb, color->dm_metadata_id); /* current_dm_id */
+        set_ue_golomb(pb, color->scene_refresh_flag);
+        for (int i = 0; i < 9; i++)
+            put_sbits(pb, 16, av_q2den(color->ycc_to_rgb_matrix[i], 1 << 13));
+        for (int i = 0; i < 3; i++)
+            put_bits32(pb, av_q2den(color->ycc_to_rgb_offset[i], denom));
+        for (int i = 0; i < 9; i++)
+            put_sbits(pb, 16, av_q2den(color->rgb_to_lms_matrix[i], 1 << 14));
+        put_bits(pb, 16, color->signal_eotf);
+        put_bits(pb, 16, color->signal_eotf_param0);
+        put_bits(pb, 16, color->signal_eotf_param1);
+        put_bits32(pb, color->signal_eotf_param2);
+        put_bits(pb, 5, color->signal_bit_depth);
+        put_bits(pb, 2, color->signal_color_space);
+        put_bits(pb, 2, color->signal_chroma_format);
+        put_bits(pb, 2, color->signal_full_range_flag);
+        put_bits(pb, 12, color->source_min_pq);
+        put_bits(pb, 12, color->source_max_pq);
+        put_bits(pb, 10, color->source_diagonal);
+
+        memcpy(&s->vdr[color->dm_metadata_id]->color, color, sizeof(*color));
+        s->color = &s->vdr[color->dm_metadata_id]->color;
+    }
+
+    set_ue_golomb(pb, num_ext_blocks_v1);
+    align_put_bits(pb);
+    for (int i = 0; i < metadata->num_ext_blocks; i++)
+        generate_ext_v1(pb, av_dovi_get_ext(metadata, i));
+
+    if (num_ext_blocks_v2) {
+        set_ue_golomb(pb, num_ext_blocks_v2);
+        align_put_bits(pb);
+        for (int i = 0; i < metadata->num_ext_blocks; i++)
+            generate_ext_v2(pb, av_dovi_get_ext(metadata, i));
+    }
+
+    flush_put_bits(pb);
+    crc = av_bswap32(av_crc(av_crc_get_table(AV_CRC_32_IEEE), -1,
+                            s->rpu_buf, put_bytes_output(pb)));
+    put_bits32(pb, crc);
+    put_bits(pb, 8, 0x80); /* terminator */
+    flush_put_bits(pb);
+
+    rpu_size = put_bytes_output(pb);
+    switch (s->cfg.dv_profile) {
+    case 10:
+        /* AV1 uses T.35 OBU with EMDF header */
+        *out_rpu = av_malloc(rpu_size + 15);
+        if (!*out_rpu)
+            return AVERROR(ENOMEM);
+        init_put_bits(pb, *out_rpu, rpu_size + 15);
+        put_bits(pb,  8, ITU_T_T35_COUNTRY_CODE_US);
+        put_bits(pb, 16, ITU_T_T35_PROVIDER_CODE_DOLBY);
+        put_bits32(pb, 0x800); /* provider_oriented_code */
+        put_bits(pb, 27, 0x01be6841u); /* fixed EMDF header, see above */
+        if (rpu_size > 0xFF) {
+            av_assert2(rpu_size <= 0x10000);
+            put_bits(pb, 8, (rpu_size >> 8) - 1);
+            put_bits(pb, 1, 1); /* read_more */
+            put_bits(pb, 8, rpu_size & 0xFF);
+            put_bits(pb, 1, 0);
+        } else {
+            put_bits(pb, 8, rpu_size);
+            put_bits(pb, 1, 0);
+        }
+        ff_copy_bits(pb, s->rpu_buf, rpu_size * 8);
+        put_bits(pb, 17, 0x400); /* emdf payload id + emdf_protection */
+
+        pad = pb->bit_left & 7;
+        put_bits(pb, pad, (1 << pad) - 1); /* pad to next byte with 1 bits */
+        flush_put_bits(pb);
+        *out_size = put_bytes_output(pb);
+        return 0;
+
+    case 5:
+    case 8:
+        *out_rpu = dst = av_malloc(1 + rpu_size * 3 / 2); /* worst case */
+        if (!*out_rpu)
+            return AVERROR(ENOMEM);
+        *dst++ = 25; /* NAL prefix */
+        zero_run = 0;
+        for (int i = 0; i < rpu_size; i++) {
+            if (zero_run < 2) {
+                if (s->rpu_buf[i] == 0) {
+                    zero_run++;
+                } else {
+                    zero_run = 0;
+                }
+            } else {
+                if ((s->rpu_buf[i] & ~3) == 0) {
+                    /* emulation prevention */
+                    *dst++ = 3;
+                }
+                zero_run = s->rpu_buf[i] == 0;
+            }
+            *dst++ = s->rpu_buf[i];
+        }
+        *out_size = dst - *out_rpu;
+        return 0;
+
+    default:
+        /* Should be unreachable */
+        av_assert0(0);
+        return AVERROR_BUG;
+    }
+}
diff --git a/libavcodec/dv.c b/libavcodec/dv.c
index eb49978ad8..194d982562 100644
--- a/libavcodec/dv.c
+++ b/libavcodec/dv.c
@@ -166,7 +166,7 @@ static inline void dv_calc_mb_coordinates(const AVDVProfile *d, int chan,
     }
 }
 
-int ff_dv_init_dynamic_tables(DVwork_chunk *work_chunks, const AVDVProfile *d)
+void ff_dv_init_dynamic_tables(DVwork_chunk *work_chunks, const AVDVProfile *d)
 {
     int j, i, c, s, p;
 
@@ -185,6 +185,4 @@ int ff_dv_init_dynamic_tables(DVwork_chunk *work_chunks, const AVDVProfile *d)
             }
         }
     }
-
-    return 0;
 }
diff --git a/libavcodec/dv_internal.h b/libavcodec/dv_internal.h
index 4b4151c88d..05e26a8138 100644
--- a/libavcodec/dv_internal.h
+++ b/libavcodec/dv_internal.h
@@ -32,7 +32,7 @@ typedef struct DVwork_chunk {
     uint16_t mb_coordinates[5];
 } DVwork_chunk;
 
-int ff_dv_init_dynamic_tables(DVwork_chunk *work_chunks, const AVDVProfile *d);
+void ff_dv_init_dynamic_tables(DVwork_chunk *work_chunks, const AVDVProfile *d);
 
 static inline int dv_work_pool_size(const AVDVProfile *d)
 {
diff --git a/libavcodec/dvdec.c b/libavcodec/dvdec.c
index a06e4807e7..8297b6d2f3 100644
--- a/libavcodec/dvdec.c
+++ b/libavcodec/dvdec.c
@@ -637,18 +637,12 @@ static int dvvideo_decode_frame(AVCodecContext *avctx, AVFrame *frame,
     }
 
     if (sys != s->sys) {
-        ret = ff_dv_init_dynamic_tables(s->work_chunks, sys);
-        if (ret < 0) {
-            av_log(avctx, AV_LOG_ERROR, "Error initializing the work tables.\n");
-            return ret;
-        }
+        ff_dv_init_dynamic_tables(s->work_chunks, sys);
         dv_init_weight_tables(s, sys);
         s->sys = sys;
     }
 
     s->frame            = frame;
-    frame->flags |= AV_FRAME_FLAG_KEY;
-    frame->pict_type    = AV_PICTURE_TYPE_I;
     avctx->pix_fmt      = s->sys->pix_fmt;
     avctx->framerate    = av_inv_q(s->sys->time_base);
     avctx->bit_rate     = av_rescale_q(s->sys->frame_size,
diff --git a/libavcodec/dvenc.c b/libavcodec/dvenc.c
index ce21247081..3afeedbb87 100644
--- a/libavcodec/dvenc.c
+++ b/libavcodec/dvenc.c
@@ -93,11 +93,7 @@ static av_cold int dvvideo_encode_init(AVCodecContext *avctx)
         return AVERROR(EINVAL);
     }
 
-    ret = ff_dv_init_dynamic_tables(s->work_chunks, s->sys);
-    if (ret < 0) {
-        av_log(avctx, AV_LOG_ERROR, "Error initializing work tables.\n");
-        return ret;
-    }
+    ff_dv_init_dynamic_tables(s->work_chunks, s->sys);
 
     memset(&fdsp,0, sizeof(fdsp));
     memset(&mecc,0, sizeof(mecc));
diff --git a/libavcodec/dxtory.c b/libavcodec/dxtory.c
index f36420cdd9..a9b5d835db 100644
--- a/libavcodec/dxtory.c
+++ b/libavcodec/dxtory.c
@@ -863,8 +863,6 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *pic,
     if (ret)
         return ret;
 
-    pic->pict_type = AV_PICTURE_TYPE_I;
-    pic->flags |= AV_FRAME_FLAG_KEY;
     *got_frame = 1;
 
     return avpkt->size;
diff --git a/libavcodec/dxv.c b/libavcodec/dxv.c
index f130db3a4e..7c873a3e92 100644
--- a/libavcodec/dxv.c
+++ b/libavcodec/dxv.c
@@ -1048,8 +1048,6 @@ static int dxv_decode(AVCodecContext *avctx, AVFrame *frame,
     }
 
     /* Frame is ready to be output. */
-    frame->pict_type = AV_PICTURE_TYPE_I;
-    frame->flags |= AV_FRAME_FLAG_KEY;
     *got_frame = 1;
 
     return avpkt->size;
diff --git a/libavcodec/dxva2_av1.c b/libavcodec/dxva2_av1.c
index 184a922fd8..1b55510659 100644
--- a/libavcodec/dxva2_av1.c
+++ b/libavcodec/dxva2_av1.c
@@ -101,7 +101,7 @@ int ff_dxva2_av1_fill_picture_parameters(const AVCodecContext *avctx, AVDXVACont
     pp->coding.dual_filter                  = seq->enable_dual_filter;
     pp->coding.jnt_comp                     = seq->enable_jnt_comp;
     pp->coding.screen_content_tools         = frame_header->allow_screen_content_tools;
-    pp->coding.integer_mv                   = frame_header->force_integer_mv || !(frame_header->frame_type & 1);
+    pp->coding.integer_mv                   = h->cur_frame.force_integer_mv;
     pp->coding.cdef                         = seq->enable_cdef;
     pp->coding.restoration                  = seq->enable_restoration;
     pp->coding.film_grain                   = seq->film_grain_params_present && !(avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN);
@@ -138,9 +138,9 @@ int ff_dxva2_av1_fill_picture_parameters(const AVCodecContext *avctx, AVDXVACont
         int8_t ref_idx = frame_header->ref_frame_idx[i];
         AVFrame *ref_frame = h->ref[ref_idx].f;
 
-        pp->frame_refs[i].width  = ref_frame->width;
-        pp->frame_refs[i].height = ref_frame->height;
-        pp->frame_refs[i].Index  = ref_frame->buf[0] ? ref_idx : 0xFF;
+        pp->frame_refs[i].width  = ref_frame ? ref_frame->width  : 0;
+        pp->frame_refs[i].height = ref_frame ? ref_frame->height : 0;
+        pp->frame_refs[i].Index  = ref_frame ? ref_idx : 0xFF;
 
         /* Global Motion */
         pp->frame_refs[i].wminvalid = h->cur_frame.gm_invalid[AV1_REF_FRAME_LAST + i];
@@ -151,7 +151,7 @@ int ff_dxva2_av1_fill_picture_parameters(const AVCodecContext *avctx, AVDXVACont
     }
     for (i = 0; i < AV1_NUM_REF_FRAMES; i++) {
         AVFrame *ref_frame = h->ref[i].f;
-        if (ref_frame->buf[0])
+        if (ref_frame)
             pp->RefFrameMapTextureIndex[i] = ff_dxva2_get_surface_index(avctx, ctx, ref_frame, 0);
     }
 
diff --git a/libavcodec/dxva2_vp9.c b/libavcodec/dxva2_vp9.c
index 1498deb3c8..ca8b3b136d 100644
--- a/libavcodec/dxva2_vp9.c
+++ b/libavcodec/dxva2_vp9.c
@@ -79,7 +79,7 @@ int ff_dxva2_vp9_fill_picture_parameters(const AVCodecContext *avctx, AVDXVACont
     pp->Reserved8Bits = 0;
 
     for (i = 0; i < 8; i++) {
-        if (h->refs[i].f->buf[0]) {
+        if (h->refs[i].f) {
             fill_picture_entry(&pp->ref_frame_map[i], ff_dxva2_get_surface_index(avctx, ctx, h->refs[i].f, 0), 0);
             pp->ref_frame_coded_width[i]  = h->refs[i].f->width;
             pp->ref_frame_coded_height[i] = h->refs[i].f->height;
@@ -89,7 +89,7 @@ int ff_dxva2_vp9_fill_picture_parameters(const AVCodecContext *avctx, AVDXVACont
 
     for (i = 0; i < 3; i++) {
         uint8_t refidx = h->h.refidx[i];
-        if (h->refs[refidx].f->buf[0])
+        if (h->refs[refidx].f)
             fill_picture_entry(&pp->frame_refs[i], ff_dxva2_get_surface_index(avctx, ctx, h->refs[refidx].f, 0), 0);
         else
             pp->frame_refs[i].bPicEntry = 0xFF;
diff --git a/libavcodec/eac3dec.c b/libavcodec/eac3dec.c
index 5c71751a0c..2b3bffda6e 100644
--- a/libavcodec/eac3dec.c
+++ b/libavcodec/eac3dec.c
@@ -39,8 +39,8 @@
 
 
 #include "avcodec.h"
-#include "aac_ac3_parser.h"
 #include "ac3.h"
+#include "ac3_parser_internal.h"
 #include "ac3dec.h"
 #include "ac3dec_data.h"
 #include "eac3_data.h"
@@ -300,7 +300,7 @@ static int ff_eac3_parse_header(AC3DecodeContext *s)
        dependent streams which are used to add or replace channels. */
     if (s->frame_type == EAC3_FRAME_TYPE_RESERVED) {
         av_log(s->avctx, AV_LOG_ERROR, "Reserved frame type\n");
-        return AAC_AC3_PARSE_ERROR_FRAME_TYPE;
+        return AC3_PARSE_ERROR_FRAME_TYPE;
     }
 
     /* The substream id indicates which substream this frame belongs to. each
@@ -312,7 +312,7 @@ static int ff_eac3_parse_header(AC3DecodeContext *s)
             s->eac3_subsbtreamid_found = 1;
             avpriv_request_sample(s->avctx, "Additional substreams");
         }
-        return AAC_AC3_PARSE_ERROR_FRAME_TYPE;
+        return AC3_PARSE_ERROR_FRAME_TYPE;
     }
 
     if (s->bit_alloc_params.sr_code == EAC3_SR_CODE_REDUCED) {
diff --git a/libavcodec/eac3enc.c b/libavcodec/eac3enc.c
index 527f77e33a..8ef3e7e773 100644
--- a/libavcodec/eac3enc.c
+++ b/libavcodec/eac3enc.c
@@ -27,10 +27,12 @@
 #define AC3ENC_FLOAT 1
 
 #include "libavutil/attributes.h"
+#include "libavutil/thread.h"
 #include "ac3enc.h"
 #include "codec_internal.h"
 #include "eac3enc.h"
 #include "eac3_data.h"
+#include "put_bits.h"
 
 
 static const AVClass eac3enc_class = {
@@ -47,7 +49,10 @@ static const AVClass eac3enc_class = {
 static int8_t eac3_frame_expstr_index_tab[3][4][4][4][4][4];
 
 
-av_cold void ff_eac3_exponent_init(void)
+/**
+ * Initialize E-AC-3 exponent tables.
+ */
+static av_cold void eac3_exponent_init(void)
 {
     int i;
 
@@ -122,132 +127,141 @@ void ff_eac3_set_cpl_states(AC3EncodeContext *s)
     }
 }
 
-
-void ff_eac3_output_frame_header(AC3EncodeContext *s)
+/**
+ * Write the E-AC-3 frame header to the output bitstream.
+ */
+static void eac3_output_frame_header(AC3EncodeContext *s, PutBitContext *pb)
 {
     int blk, ch;
     AC3EncOptions *opt = &s->options;
 
-    put_bits(&s->pb, 16, 0x0b77);                   /* sync word */
+    put_bits(pb, 16, 0x0b77);                   /* sync word */
 
     /* BSI header */
-    put_bits(&s->pb,  2, 0);                        /* stream type = independent */
-    put_bits(&s->pb,  3, 0);                        /* substream id = 0 */
-    put_bits(&s->pb, 11, (s->frame_size / 2) - 1);  /* frame size */
-    if (s->bit_alloc.sr_shift) {
-        put_bits(&s->pb, 2, 0x3);                   /* fscod2 */
-        put_bits(&s->pb, 2, s->bit_alloc.sr_code);  /* sample rate code */
-    } else {
-        put_bits(&s->pb, 2, s->bit_alloc.sr_code);  /* sample rate code */
-        put_bits(&s->pb, 2, s->num_blks_code);      /* number of blocks */
-    }
-    put_bits(&s->pb, 3, s->channel_mode);           /* audio coding mode */
-    put_bits(&s->pb, 1, s->lfe_on);                 /* LFE channel indicator */
-    put_bits(&s->pb, 5, s->bitstream_id);           /* bitstream id (EAC3=16) */
-    put_bits(&s->pb, 5, -opt->dialogue_level);      /* dialogue normalization level */
-    put_bits(&s->pb, 1, 0);                         /* no compression gain */
+    put_bits(pb,  2, 0);                        /* stream type = independent */
+    put_bits(pb,  3, 0);                        /* substream id = 0 */
+    put_bits(pb, 11, (s->frame_size / 2) - 1);  /* frame size */
+    put_bits(pb, 2, s->bit_alloc.sr_code);      /* sample rate code */
+    put_bits(pb, 2, s->num_blks_code);          /* number of blocks */
+    put_bits(pb, 3, s->channel_mode);           /* audio coding mode */
+    put_bits(pb, 1, s->lfe_on);                 /* LFE channel indicator */
+    put_bits(pb, 5, s->bitstream_id);           /* bitstream id (EAC3=16) */
+    put_bits(pb, 5, -opt->dialogue_level);      /* dialogue normalization level */
+    put_bits(pb, 1, 0);                         /* no compression gain */
     /* mixing metadata*/
-    put_bits(&s->pb, 1, opt->eac3_mixing_metadata);
+    put_bits(pb, 1, opt->eac3_mixing_metadata);
     if (opt->eac3_mixing_metadata) {
         if (s->channel_mode > AC3_CHMODE_STEREO)
-            put_bits(&s->pb, 2, opt->preferred_stereo_downmix);
+            put_bits(pb, 2, opt->preferred_stereo_downmix);
         if (s->has_center) {
-            put_bits(&s->pb, 3, s->ltrt_center_mix_level);
-            put_bits(&s->pb, 3, s->loro_center_mix_level);
+            put_bits(pb, 3, s->ltrt_center_mix_level);
+            put_bits(pb, 3, s->loro_center_mix_level);
         }
         if (s->has_surround) {
-            put_bits(&s->pb, 3, s->ltrt_surround_mix_level);
-            put_bits(&s->pb, 3, s->loro_surround_mix_level);
+            put_bits(pb, 3, s->ltrt_surround_mix_level);
+            put_bits(pb, 3, s->loro_surround_mix_level);
         }
         if (s->lfe_on)
-            put_bits(&s->pb, 1, 0);
-        put_bits(&s->pb, 1, 0);                     /* no program scale */
-        put_bits(&s->pb, 1, 0);                     /* no ext program scale */
-        put_bits(&s->pb, 2, 0);                     /* no mixing parameters */
+            put_bits(pb, 1, 0);
+        put_bits(pb, 1, 0);                     /* no program scale */
+        put_bits(pb, 1, 0);                     /* no ext program scale */
+        put_bits(pb, 2, 0);                     /* no mixing parameters */
         if (s->channel_mode < AC3_CHMODE_STEREO)
-            put_bits(&s->pb, 1, 0);                 /* no pan info */
-        put_bits(&s->pb, 1, 0);                     /* no frame mix config info */
+            put_bits(pb, 1, 0);                 /* no pan info */
+        put_bits(pb, 1, 0);                     /* no frame mix config info */
     }
     /* info metadata*/
-    put_bits(&s->pb, 1, opt->eac3_info_metadata);
+    put_bits(pb, 1, opt->eac3_info_metadata);
     if (opt->eac3_info_metadata) {
-        put_bits(&s->pb, 3, s->bitstream_mode);
-        put_bits(&s->pb, 1, opt->copyright);
-        put_bits(&s->pb, 1, opt->original);
+        put_bits(pb, 3, s->bitstream_mode);
+        put_bits(pb, 1, opt->copyright);
+        put_bits(pb, 1, opt->original);
         if (s->channel_mode == AC3_CHMODE_STEREO) {
-            put_bits(&s->pb, 2, opt->dolby_surround_mode);
-            put_bits(&s->pb, 2, opt->dolby_headphone_mode);
+            put_bits(pb, 2, opt->dolby_surround_mode);
+            put_bits(pb, 2, opt->dolby_headphone_mode);
         }
         if (s->channel_mode >= AC3_CHMODE_2F2R)
-            put_bits(&s->pb, 2, opt->dolby_surround_ex_mode);
-        put_bits(&s->pb, 1, opt->audio_production_info);
+            put_bits(pb, 2, opt->dolby_surround_ex_mode);
+        put_bits(pb, 1, opt->audio_production_info);
         if (opt->audio_production_info) {
-            put_bits(&s->pb, 5, opt->mixing_level - 80);
-            put_bits(&s->pb, 2, opt->room_type);
-            put_bits(&s->pb, 1, opt->ad_converter_type);
+            put_bits(pb, 5, opt->mixing_level - 80);
+            put_bits(pb, 2, opt->room_type);
+            put_bits(pb, 1, opt->ad_converter_type);
         }
-        put_bits(&s->pb, 1, 0);
+        put_bits(pb, 1, 0);
     }
     if (s->num_blocks != 6)
-        put_bits(&s->pb, 1, !(s->avctx->frame_num % 6)); /* converter sync flag */
-    put_bits(&s->pb, 1, 0);                         /* no additional bit stream info */
+        put_bits(pb, 1, !(s->avctx->frame_num % 6)); /* converter sync flag */
+    put_bits(pb, 1, 0);                         /* no additional bit stream info */
 
     /* frame header */
     if (s->num_blocks == 6) {
-        put_bits(&s->pb, 1, !s->use_frame_exp_strategy); /* exponent strategy syntax */
-        put_bits(&s->pb, 1, 0);                     /* aht enabled = no */
+        put_bits(pb, 1, !s->use_frame_exp_strategy); /* exponent strategy syntax */
+        put_bits(pb, 1, 0);                     /* aht enabled = no */
     }
-    put_bits(&s->pb, 2, 0);                         /* snr offset strategy = 1 */
-    put_bits(&s->pb, 1, 0);                         /* transient pre-noise processing enabled = no */
-    put_bits(&s->pb, 1, 0);                         /* block switch syntax enabled = no */
-    put_bits(&s->pb, 1, 0);                         /* dither flag syntax enabled = no */
-    put_bits(&s->pb, 1, 0);                         /* bit allocation model syntax enabled = no */
-    put_bits(&s->pb, 1, 0);                         /* fast gain codes enabled = no */
-    put_bits(&s->pb, 1, 0);                         /* dba syntax enabled = no */
-    put_bits(&s->pb, 1, 0);                         /* skip field syntax enabled = no */
-    put_bits(&s->pb, 1, 0);                         /* spx enabled = no */
+    put_bits(pb, 2, 0);                         /* snr offset strategy = 1 */
+    put_bits(pb, 1, 0);                         /* transient pre-noise processing enabled = no */
+    put_bits(pb, 1, 0);                         /* block switch syntax enabled = no */
+    put_bits(pb, 1, 0);                         /* dither flag syntax enabled = no */
+    put_bits(pb, 1, 0);                         /* bit allocation model syntax enabled = no */
+    put_bits(pb, 1, 0);                         /* fast gain codes enabled = no */
+    put_bits(pb, 1, 0);                         /* dba syntax enabled = no */
+    put_bits(pb, 1, 0);                         /* skip field syntax enabled = no */
+    put_bits(pb, 1, 0);                         /* spx enabled = no */
     /* coupling strategy use flags */
     if (s->channel_mode > AC3_CHMODE_MONO) {
-        put_bits(&s->pb, 1, s->blocks[0].cpl_in_use);
+        put_bits(pb, 1, s->blocks[0].cpl_in_use);
         for (blk = 1; blk < s->num_blocks; blk++) {
             AC3Block *block = &s->blocks[blk];
-            put_bits(&s->pb, 1, block->new_cpl_strategy);
+            put_bits(pb, 1, block->new_cpl_strategy);
             if (block->new_cpl_strategy)
-                put_bits(&s->pb, 1, block->cpl_in_use);
+                put_bits(pb, 1, block->cpl_in_use);
         }
     }
     /* exponent strategy */
     if (s->use_frame_exp_strategy) {
         for (ch = !s->cpl_on; ch <= s->fbw_channels; ch++)
-            put_bits(&s->pb, 5, s->frame_exp_strategy[ch]);
+            put_bits(pb, 5, s->frame_exp_strategy[ch]);
     } else {
         for (blk = 0; blk < s->num_blocks; blk++)
             for (ch = !s->blocks[blk].cpl_in_use; ch <= s->fbw_channels; ch++)
-                put_bits(&s->pb, 2, s->exp_strategy[ch][blk]);
+                put_bits(pb, 2, s->exp_strategy[ch][blk]);
     }
     if (s->lfe_on) {
         for (blk = 0; blk < s->num_blocks; blk++)
-            put_bits(&s->pb, 1, s->exp_strategy[s->lfe_channel][blk]);
+            put_bits(pb, 1, s->exp_strategy[s->lfe_channel][blk]);
     }
     /* E-AC-3 to AC-3 converter exponent strategy (not optional when num blocks == 6) */
     if (s->num_blocks != 6) {
-        put_bits(&s->pb, 1, 0);
+        put_bits(pb, 1, 0);
     } else {
         for (ch = 1; ch <= s->fbw_channels; ch++) {
             if (s->use_frame_exp_strategy)
-                put_bits(&s->pb, 5, s->frame_exp_strategy[ch]);
+                put_bits(pb, 5, s->frame_exp_strategy[ch]);
             else
-                put_bits(&s->pb, 5, 0);
+                put_bits(pb, 5, 0);
         }
     }
     /* snr offsets */
-    put_bits(&s->pb, 6, s->coarse_snr_offset);
-    put_bits(&s->pb, 4, s->fine_snr_offset[1]);
+    put_bits(pb, 6, s->coarse_snr_offset);
+    put_bits(pb, 4, s->fine_snr_offset[1]);
     /* block start info */
     if (s->num_blocks > 1)
-        put_bits(&s->pb, 1, 0);
+        put_bits(pb, 1, 0);
 }
 
+static av_cold int eac3_encode_init(AVCodecContext *avctx)
+{
+    static AVOnce init_static_once = AV_ONCE_INIT;
+    AC3EncodeContext *s = avctx->priv_data;
+
+    s->eac3 = 1;
+    s->output_frame_header = eac3_output_frame_header;
+
+    ff_thread_once(&init_static_once, eac3_exponent_init);
+
+    return ff_ac3_float_encode_init(avctx);
+}
 
 const FFCodec ff_eac3_encoder = {
     .p.name          = "eac3",
@@ -256,8 +270,8 @@ const FFCodec ff_eac3_encoder = {
     .p.id            = AV_CODEC_ID_EAC3,
     .p.capabilities  = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE,
     .priv_data_size  = sizeof(AC3EncodeContext),
-    .init            = ff_ac3_float_encode_init,
-    FF_CODEC_ENCODE_CB(ff_ac3_float_encode_frame),
+    .init            = eac3_encode_init,
+    FF_CODEC_ENCODE_CB(ff_ac3_encode_frame),
     .close           = ff_ac3_encode_close,
     .p.sample_fmts   = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP,
                                                       AV_SAMPLE_FMT_NONE },
diff --git a/libavcodec/eac3enc.h b/libavcodec/eac3enc.h
index 7d6155975d..0523de411b 100644
--- a/libavcodec/eac3enc.h
+++ b/libavcodec/eac3enc.h
@@ -30,11 +30,6 @@
 #include "ac3enc.h"
 
 /**
- * Initialize E-AC-3 exponent tables.
- */
-void ff_eac3_exponent_init(void);
-
-/**
  * Determine frame exponent strategy use and indices.
  */
 void ff_eac3_get_frame_exp_strategy(AC3EncodeContext *s);
@@ -46,9 +41,4 @@ void ff_eac3_get_frame_exp_strategy(AC3EncodeContext *s);
  */
 void ff_eac3_set_cpl_states(AC3EncodeContext *s);
 
-/**
- * Write the E-AC-3 frame header to the output bitstream.
- */
-void ff_eac3_output_frame_header(AC3EncodeContext *s);
-
 #endif /* AVCODEC_EAC3ENC_H */
diff --git a/libavcodec/eatgq.c b/libavcodec/eatgq.c
index 0f0ed3585f..d326c05390 100644
--- a/libavcodec/eatgq.c
+++ b/libavcodec/eatgq.c
@@ -237,8 +237,6 @@ static int tgq_decode_frame(AVCodecContext *avctx, AVFrame *frame,
 
     if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    frame->flags |= AV_FRAME_FLAG_KEY;
-    frame->pict_type = AV_PICTURE_TYPE_I;
 
     for (y = 0; y < FFALIGN(avctx->height, 16) >> 4; y++)
         for (x = 0; x < FFALIGN(avctx->width, 16) >> 4; x++)
diff --git a/libavcodec/evc.h b/libavcodec/evc.h
index e493455a42..d68dc74997 100644
--- a/libavcodec/evc.h
+++ b/libavcodec/evc.h
@@ -106,15 +106,15 @@ enum EVCSliceType {
 };
 
 enum {
-    // 7.4.3.2: aps_video_parameter_set_id is u(4).
-    EVC_MAX_APS_COUNT = 32,
-
     // 7.4.3.1: sps_seq_parameter_set_id is in [0, 15].
     EVC_MAX_SPS_COUNT = 16,
 
     // 7.4.3.2: pps_pic_parameter_set_id is in [0, 63].
     EVC_MAX_PPS_COUNT = 64,
 
+    // 7.4.3.3: adaptional_parameter_set_id is in [0, 31].
+    EVC_MAX_APS_COUNT = 32,
+
     // 7.4.5: slice header slice_pic_parameter_set_id in [0, 63]
     EVC_MAX_SH_COUNT = 64,
 
diff --git a/libavcodec/exr.c b/libavcodec/exr.c
index 8bd39f78a4..4bac0be89b 100644
--- a/libavcodec/exr.c
+++ b/libavcodec/exr.c
@@ -1943,7 +1943,7 @@ static int decode_header(EXRContext *s, AVFrame *frame)
                                                      "preview", 16)) >= 0) {
             uint32_t pw = bytestream2_get_le32(gb);
             uint32_t ph = bytestream2_get_le32(gb);
-            uint64_t psize = pw * ph;
+            uint64_t psize = pw * (uint64_t)ph;
             if (psize > INT64_MAX / 4) {
                 ret = AVERROR_INVALIDDATA;
                 goto fail;
diff --git a/libavcodec/fdctdsp.c b/libavcodec/fdctdsp.c
index f8ba17426c..d20558ce88 100644
--- a/libavcodec/fdctdsp.c
+++ b/libavcodec/fdctdsp.c
@@ -42,7 +42,9 @@ av_cold void ff_fdctdsp_init(FDCTDSPContext *c, AVCodecContext *avctx)
         c->fdct248 = ff_fdct248_islow_8;
     }
 
-#if ARCH_PPC
+#if ARCH_AARCH64
+    ff_fdctdsp_init_aarch64(c, avctx, high_bit_depth);
+#elif ARCH_PPC
     ff_fdctdsp_init_ppc(c, avctx, high_bit_depth);
 #elif ARCH_X86
     ff_fdctdsp_init_x86(c, avctx, high_bit_depth);
diff --git a/libavcodec/fdctdsp.h b/libavcodec/fdctdsp.h
index 7378eab870..cad99ed7ca 100644
--- a/libavcodec/fdctdsp.h
+++ b/libavcodec/fdctdsp.h
@@ -32,6 +32,8 @@ typedef struct FDCTDSPContext {
 
 FF_VISIBILITY_PUSH_HIDDEN
 void ff_fdctdsp_init(FDCTDSPContext *c, struct AVCodecContext *avctx);
+void ff_fdctdsp_init_aarch64(FDCTDSPContext *c, struct AVCodecContext *avctx,
+                             unsigned high_bit_depth);
 void ff_fdctdsp_init_ppc(FDCTDSPContext *c, struct AVCodecContext *avctx,
                          unsigned high_bit_depth);
 void ff_fdctdsp_init_x86(FDCTDSPContext *c, struct AVCodecContext *avctx,
diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h
index 04869da5c9..acec22e83e 100644
--- a/libavcodec/ffv1.h
+++ b/libavcodec/ffv1.h
@@ -28,13 +28,12 @@
  * FF Video Codec 1 (a lossless codec)
  */
 
-#include "libavutil/imgutils.h"
 #include "avcodec.h"
 #include "get_bits.h"
 #include "mathops.h"
+#include "progressframe.h"
 #include "put_bits.h"
 #include "rangecoder.h"
-#include "threadframe.h"
 
 #ifdef __INTEL_COMPILER
 #undef av_flatten
@@ -87,7 +86,7 @@ typedef struct FFV1Context {
     int flags;
     int64_t picture_number;
     int key_frame;
-    ThreadFrame picture, last_picture;
+    ProgressFrame picture, last_picture;
     struct FFV1Context *fsrc;
 
     AVFrame *cur;
diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c
index ba535e800d..7a0d1909aa 100644
--- a/libavcodec/ffv1dec.c
+++ b/libavcodec/ffv1dec.c
@@ -37,8 +37,8 @@
 #include "golomb.h"
 #include "mathops.h"
 #include "ffv1.h"
+#include "progressframe.h"
 #include "thread.h"
-#include "threadframe.h"
 
 static inline av_flatten int get_symbol_inline(RangeCoder *c, uint8_t *state,
                                                int is_signed)
@@ -264,8 +264,8 @@ static int decode_slice(AVCodecContext *c, void *arg)
     for( si=0; fs != f->slice_context[si]; si ++)
         ;
 
-    if(f->fsrc && !(p->flags & AV_FRAME_FLAG_KEY))
-        ff_thread_await_progress(&f->last_picture, si, 0);
+    if (f->fsrc && !(p->flags & AV_FRAME_FLAG_KEY) && f->last_picture.f)
+        ff_progress_frame_await(&f->last_picture, si);
 
     if(f->fsrc && !(p->flags & AV_FRAME_FLAG_KEY)) {
         FFV1Context *fssrc = f->fsrc->slice_context[si];
@@ -370,7 +370,7 @@ static int decode_slice(AVCodecContext *c, void *arg)
         }
     }
 
-    ff_thread_report_progress(&f->picture, si, 0);
+    ff_progress_frame_report(&f->picture, si);
 
     return 0;
 }
@@ -858,11 +858,6 @@ static av_cold int decode_init(AVCodecContext *avctx)
     if ((ret = ff_ffv1_common_init(avctx)) < 0)
         return ret;
 
-    f->picture.f      = av_frame_alloc();
-    f->last_picture.f = av_frame_alloc();
-    if (!f->picture.f || !f->last_picture.f)
-        return AVERROR(ENOMEM);
-
     if (avctx->extradata_size > 0 && (ret = read_extra_header(f)) < 0)
         return ret;
 
@@ -879,31 +874,21 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe,
     int buf_size        = avpkt->size;
     FFV1Context *f      = avctx->priv_data;
     RangeCoder *const c = &f->slice_context[0]->c;
-    int i, ret;
+    int i, ret, key_frame;
     uint8_t keystate = 128;
     uint8_t *buf_p;
     AVFrame *p;
 
-    if (f->last_picture.f)
-        ff_thread_release_ext_buffer(&f->last_picture);
-    FFSWAP(ThreadFrame, f->picture, f->last_picture);
-
-    f->cur = p = f->picture.f;
+    ff_progress_frame_unref(&f->last_picture);
+    FFSWAP(ProgressFrame, f->picture, f->last_picture);
 
-    if (f->version < 3 && avctx->field_order > AV_FIELD_PROGRESSIVE) {
-        /* we have interlaced material flagged in container */
-        p->flags |= AV_FRAME_FLAG_INTERLACED;
-        if (avctx->field_order == AV_FIELD_TT || avctx->field_order == AV_FIELD_TB)
-            p->flags |= AV_FRAME_FLAG_TOP_FIELD_FIRST;
-    }
 
     f->avctx = avctx;
     ff_init_range_decoder(c, buf, buf_size);
     ff_build_rac_states(c, 0.05 * (1LL << 32), 256 - 8);
 
-    p->pict_type = AV_PICTURE_TYPE_I; //FIXME I vs. P
     if (get_rac(c, &keystate)) {
-        p->flags |= AV_FRAME_FLAG_KEY;
+        key_frame = AV_FRAME_FLAG_KEY;
         f->key_frame_ok = 0;
         if ((ret = read_header(f)) < 0)
             return ret;
@@ -914,7 +899,7 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe,
                    "Cannot decode non-keyframe without valid keyframe\n");
             return AVERROR_INVALIDDATA;
         }
-        p->flags &= ~AV_FRAME_FLAG_KEY;
+        key_frame = 0;
     }
 
     if (f->ac != AC_GOLOMB_RICE) {
@@ -932,10 +917,23 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe,
             return AVERROR_INVALIDDATA;
     }
 
-    ret = ff_thread_get_ext_buffer(avctx, &f->picture, AV_GET_BUFFER_FLAG_REF);
+    ret = ff_progress_frame_get_buffer(avctx, &f->picture,
+                                       AV_GET_BUFFER_FLAG_REF);
     if (ret < 0)
         return ret;
 
+    f->cur = p = f->picture.f;
+
+    p->pict_type = AV_PICTURE_TYPE_I; //FIXME I vs. P
+    p->flags     = (p->flags & ~AV_FRAME_FLAG_KEY) | key_frame;
+
+    if (f->version < 3 && avctx->field_order > AV_FIELD_PROGRESSIVE) {
+        /* we have interlaced material flagged in container */
+        p->flags |= AV_FRAME_FLAG_INTERLACED;
+        if (avctx->field_order == AV_FIELD_TT || avctx->field_order == AV_FIELD_TB)
+            p->flags |= AV_FRAME_FLAG_TOP_FIELD_FIRST;
+    }
+
     if (avctx->debug & FF_DEBUG_PICT_INFO)
         av_log(avctx, AV_LOG_DEBUG, "ver:%d keyframe:%d coder:%d ec:%d slices:%d bps:%d\n",
                f->version, !!(p->flags & AV_FRAME_FLAG_KEY), f->ac, f->ec, f->slice_count, f->avctx->bits_per_raw_sample);
@@ -954,7 +952,7 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe,
         } else                         v = buf_p - c->bytestream_start;
         if (buf_p - c->bytestream_start < v) {
             av_log(avctx, AV_LOG_ERROR, "Slice pointer chain broken\n");
-            ff_thread_report_progress(&f->picture, INT_MAX, 0);
+            ff_progress_frame_report(&f->picture, INT_MAX);
             return AVERROR_INVALIDDATA;
         }
         buf_p -= v;
@@ -996,11 +994,11 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe,
     for (i = f->slice_count - 1; i >= 0; i--) {
         FFV1Context *fs = f->slice_context[i];
         int j;
-        if (fs->slice_damaged && f->last_picture.f->data[0]) {
+        if (fs->slice_damaged && f->last_picture.f) {
             const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
             const uint8_t *src[4];
             uint8_t *dst[4];
-            ff_thread_await_progress(&f->last_picture, INT_MAX, 0);
+            ff_progress_frame_await(&f->last_picture, INT_MAX);
             for (j = 0; j < desc->nb_components; j++) {
                 int pixshift = desc->comp[j].depth > 8;
                 int sh = (j == 1 || j == 2) ? f->chroma_h_shift : 0;
@@ -1022,10 +1020,9 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe,
                           fs->slice_height);
         }
     }
-    ff_thread_report_progress(&f->picture, INT_MAX, 0);
+    ff_progress_frame_report(&f->picture, INT_MAX);
 
-    if (f->last_picture.f)
-        ff_thread_release_ext_buffer(&f->last_picture);
+    ff_progress_frame_unref(&f->last_picture);
     if ((ret = av_frame_ref(rframe, f->picture.f)) < 0)
         return ret;
 
@@ -1067,7 +1064,7 @@ static int update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
 {
     FFV1Context *fsrc = src->priv_data;
     FFV1Context *fdst = dst->priv_data;
-    int i, ret;
+    int i;
 
     if (dst == src)
         return 0;
@@ -1088,12 +1085,7 @@ static int update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
 
     av_assert1(fdst->max_slice_count == fsrc->max_slice_count);
 
-
-    ff_thread_release_ext_buffer(&fdst->picture);
-    if (fsrc->picture.f->data[0]) {
-        if ((ret = ff_thread_ref_frame(&fdst->picture, &fsrc->picture)) < 0)
-            return ret;
-    }
+    ff_progress_frame_replace(&fdst->picture, &fsrc->picture);
 
     fdst->fsrc = fsrc;
 
@@ -1105,15 +1097,9 @@ static av_cold int ffv1_decode_close(AVCodecContext *avctx)
 {
     FFV1Context *const s = avctx->priv_data;
 
-    if (s->picture.f) {
-        ff_thread_release_ext_buffer(&s->picture);
-        av_frame_free(&s->picture.f);
-    }
+    ff_progress_frame_unref(&s->picture);
+    ff_progress_frame_unref(&s->last_picture);
 
-    if (s->last_picture.f) {
-        ff_thread_release_ext_buffer(&s->last_picture);
-        av_frame_free(&s->last_picture.f);
-    }
     return ff_ffv1_close(avctx);
 }
 
@@ -1130,5 +1116,5 @@ const FFCodec ff_ffv1_decoder = {
     .p.capabilities = AV_CODEC_CAP_DR1 |
                       AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_SLICE_THREADS,
     .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP |
-                      FF_CODEC_CAP_ALLOCATE_PROGRESS,
+                      FF_CODEC_CAP_USES_PROGRESSFRAMES,
 };
diff --git a/libavcodec/fitsdec.c b/libavcodec/fitsdec.c
index 284e945ba5..56df3e4d6b 100644
--- a/libavcodec/fitsdec.c
+++ b/libavcodec/fitsdec.c
@@ -301,9 +301,6 @@ static int fits_decode_frame(AVCodecContext *avctx, AVFrame *p,
         }
     }
 
-    p->flags |= AV_FRAME_FLAG_KEY;
-    p->pict_type = AV_PICTURE_TYPE_I;
-
     *got_frame = 1;
 
     return avpkt->size;
diff --git a/libavcodec/flacdec.c b/libavcodec/flacdec.c
index 91bbdc657d..6e6a2896b4 100644
--- a/libavcodec/flacdec.c
+++ b/libavcodec/flacdec.c
@@ -602,13 +602,9 @@ static inline int decode_subframe(FLACContext *s, int channel)
 
     if (wasted) {
         if (wasted+bps == 33) {
-            int i;
-            for (i = 0; i < s->blocksize; i++)
-                s->decoded_33bps[i] = (uint64_t)decoded[i] << wasted;
+            s->dsp.wasted33(s->decoded_33bps, decoded, wasted, s->blocksize);
         } else if (wasted < 32) {
-            int i;
-            for (i = 0; i < s->blocksize; i++)
-                decoded[i] = (unsigned)decoded[i] << wasted;
+            s->dsp.wasted32(decoded, wasted, s->blocksize);
         }
     }
 
diff --git a/libavcodec/flacdsp.c b/libavcodec/flacdsp.c
index 71b4ac44aa..27d3e923ee 100644
--- a/libavcodec/flacdsp.c
+++ b/libavcodec/flacdsp.c
@@ -84,11 +84,27 @@ static void flac_lpc_32_c(int32_t *decoded, const int coeffs[32],
 
 }
 
+static void flac_wasted_32_c(int32_t *decoded, int wasted, int len)
+{
+    for (int i = 0; i < len; i++)
+        decoded[i] = (unsigned)decoded[i] << wasted;
+}
+
+static void flac_wasted_33_c(int64_t *decoded, const int32_t *residual,
+                             int wasted, int len)
+{
+    for (int i = 0; i < len; i++)
+        decoded[i] = (uint64_t)residual[i] << wasted;
+}
+
 av_cold void ff_flacdsp_init(FLACDSPContext *c, enum AVSampleFormat fmt, int channels)
 {
     c->lpc16        = flac_lpc_16_c;
     c->lpc32        = flac_lpc_32_c;
 
+    c->wasted32     = flac_wasted_32_c;
+    c->wasted33     = flac_wasted_33_c;
+
     switch (fmt) {
     case AV_SAMPLE_FMT_S32:
         c->decorrelate[0] = flac_decorrelate_indep_c_32;
diff --git a/libavcodec/flacdsp.h b/libavcodec/flacdsp.h
index 15149c026e..5a59c0c864 100644
--- a/libavcodec/flacdsp.h
+++ b/libavcodec/flacdsp.h
@@ -30,6 +30,9 @@ typedef struct FLACDSPContext {
                   int qlevel, int len);
     void (*lpc32)(int32_t *samples, const int coeffs[32], int order,
                   int qlevel, int len);
+    void (*wasted32)(int32_t *decoded, int wasted, int len);
+    void (*wasted33)(int64_t *decoded, const int32_t *residual,
+                     int wasted, int len);
     void (*lpc16_encode)(int32_t *res, const int32_t *smp, int len, int order,
                          const int32_t coefs[32], int shift);
     void (*lpc32_encode)(int32_t *res, const int32_t *smp, int len, int order,
diff --git a/libavcodec/flacenc.c b/libavcodec/flacenc.c
index e29be5822b..3a9578f5cd 100644
--- a/libavcodec/flacenc.c
+++ b/libavcodec/flacenc.c
@@ -525,11 +525,10 @@ static void copy_samples(FlacEncodeContext *s, const void *samples)
 {
     int i, j, ch;
     FlacFrame *frame;
-    int shift = av_get_bytes_per_sample(s->avctx->sample_fmt) * 8 -
-                s->avctx->bits_per_raw_sample;
 
-#define COPY_SAMPLES(bits) do {                                     \
+#define COPY_SAMPLES(bits, shift0) do {                             \
     const int ## bits ## _t *samples0 = samples;                    \
+    const int shift = shift0;                                       \
     frame = &s->frame;                                              \
     for (i = 0, j = 0; i < frame->blocksize; i++)                   \
         for (ch = 0; ch < s->channels; ch++, j++)                   \
@@ -537,9 +536,9 @@ static void copy_samples(FlacEncodeContext *s, const void *samples)
 } while (0)
 
     if (s->avctx->sample_fmt == AV_SAMPLE_FMT_S16)
-        COPY_SAMPLES(16);
+        COPY_SAMPLES(16, 0);
     else
-        COPY_SAMPLES(32);
+        COPY_SAMPLES(32, 32 - s->avctx->bits_per_raw_sample);
 }
 
 
diff --git a/libavcodec/fmvc.c b/libavcodec/fmvc.c
index 30f7aaf6bc..b51b18a9ee 100644
--- a/libavcodec/fmvc.c
+++ b/libavcodec/fmvc.c
@@ -101,7 +101,6 @@ static int decode_type2(GetByteContext *gb, PutByteContext *pb)
                             continue;
                         }
                     }
-                    repeat = 0;
                 }
                 repeat = 1;
             }
diff --git a/libavcodec/fraps.c b/libavcodec/fraps.c
index 4e45efeba8..d7b713c7c4 100644
--- a/libavcodec/fraps.c
+++ b/libavcodec/fraps.c
@@ -215,9 +215,6 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *f,
         }
     }
 
-    f->pict_type = AV_PICTURE_TYPE_I;
-    f->flags |= AV_FRAME_FLAG_KEY;
-
     avctx->pix_fmt = version & 1 ? is_pal ? AV_PIX_FMT_PAL8 : AV_PIX_FMT_BGR24 : AV_PIX_FMT_YUVJ420P;
     avctx->color_range = version & 1 ? AVCOL_RANGE_UNSPECIFIED
                                      : AVCOL_RANGE_JPEG;
diff --git a/libavcodec/frwu.c b/libavcodec/frwu.c
index 70bc136765..3b52b968fe 100644
--- a/libavcodec/frwu.c
+++ b/libavcodec/frwu.c
@@ -62,9 +62,6 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *pic,
     if ((ret = ff_get_buffer(avctx, pic, 0)) < 0)
         return ret;
 
-    pic->pict_type = AV_PICTURE_TYPE_I;
-    pic->flags |= AV_FRAME_FLAG_KEY;
-
     for (field = 0; field < 2; field++) {
         int i;
         int field_h = (avctx->height + !field) >> 1;
diff --git a/libavcodec/ftr_parser.c b/libavcodec/ftr_parser.c
index 05e6cfed98..656fd289f6 100644
--- a/libavcodec/ftr_parser.c
+++ b/libavcodec/ftr_parser.c
@@ -25,7 +25,6 @@
  */
 
 #include "parser.h"
-#include "get_bits.h"
 #include "adts_header.h"
 #include "adts_parser.h"
 #include "mpeg4audio.h"
@@ -45,7 +44,6 @@ static int ftr_parse(AVCodecParserContext *s, AVCodecContext *avctx,
     FTRParseContext *ftr = s->priv_data;
     uint64_t state = ftr->pc.state64;
     int next = END_NOT_FOUND;
-    GetBitContext bits;
     AACADTSHeaderInfo hdr;
     int size;
 
@@ -71,10 +69,9 @@ static int ftr_parse(AVCodecParserContext *s, AVCodecContext *avctx,
 
             state = (state << 8) | buf[i];
             AV_WB64(tmp, state);
-            init_get_bits(&bits, tmp + 8 - AV_AAC_ADTS_HEADER_SIZE,
-                          AV_AAC_ADTS_HEADER_SIZE * 8);
+            size = ff_adts_header_parse_buf(tmp + 8 - AV_AAC_ADTS_HEADER_SIZE, &hdr);
 
-            if ((size = ff_adts_header_parse(&bits, &hdr)) > 0) {
+            if (size > 0) {
                 ftr->skip = size - 6;
                 ftr->frame_index += ff_mpeg4audio_channels[hdr.chan_config];
                 if (ftr->frame_index >= avctx->ch_layout.nb_channels) {
diff --git a/libavcodec/h263dec.c b/libavcodec/h263dec.c
index 910df7585f..48bd467f30 100644
--- a/libavcodec/h263dec.c
+++ b/libavcodec/h263dec.c
@@ -42,7 +42,6 @@
 #include "mpeg4video.h"
 #include "mpeg4videodec.h"
 #include "mpeg4videodefs.h"
-#include "mpegutils.h"
 #include "mpegvideo.h"
 #include "mpegvideodec.h"
 #include "msmpeg4dec.h"
diff --git a/libavcodec/h263dec.h b/libavcodec/h263dec.h
index 89c5fcf58f..a01acc0834 100644
--- a/libavcodec/h263dec.h
+++ b/libavcodec/h263dec.h
@@ -23,6 +23,11 @@
 #include "mpegvideo.h"
 #include "vlc.h"
 
+/**
+ * Return value for header parsers if frame is not coded.
+ * */
+#define FRAME_SKIPPED 100
+
 // The defines below define the number of bits that are read at once for
 // reading vlc values. Changing these may improve speed and data cache needs
 // be aware though that decreasing them may need the number of stages that is
diff --git a/libavcodec/h2645_sei.c b/libavcodec/h2645_sei.c
index f0f107f73b..1deb76c765 100644
--- a/libavcodec/h2645_sei.c
+++ b/libavcodec/h2645_sei.c
@@ -529,6 +529,142 @@ static int is_frame_packing_type_valid(SEIFpaType type, enum AVCodecID codec_id)
                type >= SEI_FPA_TYPE_SIDE_BY_SIDE;
 }
 
+static int h2645_sei_to_side_data(AVCodecContext *avctx, H2645SEI *sei,
+                                  AVFrameSideData ***sd, int *nb_sd)
+{
+    int ret;
+
+    for (unsigned i = 0; i < sei->unregistered.nb_buf_ref; i++) {
+        H2645SEIUnregistered *unreg = &sei->unregistered;
+
+        if (unreg->buf_ref[i]) {
+            AVFrameSideData *entry =
+                av_frame_side_data_add(sd, nb_sd, AV_FRAME_DATA_SEI_UNREGISTERED,
+                                       &unreg->buf_ref[i], 0);
+            if (!entry)
+                av_buffer_unref(&unreg->buf_ref[i]);
+        }
+    }
+    sei->unregistered.nb_buf_ref = 0;
+
+    if (sei->ambient_viewing_environment.present) {
+        H2645SEIAmbientViewingEnvironment *env = &sei->ambient_viewing_environment;
+        AVBufferRef *buf;
+        size_t size;
+
+        AVAmbientViewingEnvironment *dst_env =
+            av_ambient_viewing_environment_alloc(&size);
+        if (!dst_env)
+            return AVERROR(ENOMEM);
+
+        buf = av_buffer_create((uint8_t *)dst_env, size, NULL, NULL, 0);
+        if (!buf) {
+            av_free(dst_env);
+            return AVERROR(ENOMEM);
+        }
+
+        ret = ff_frame_new_side_data_from_buf_ext(avctx, sd, nb_sd,
+                                                  AV_FRAME_DATA_AMBIENT_VIEWING_ENVIRONMENT, &buf);
+
+        if (ret < 0)
+            return ret;
+
+        dst_env->ambient_illuminance = av_make_q(env->ambient_illuminance, 10000);
+        dst_env->ambient_light_x     = av_make_q(env->ambient_light_x,     50000);
+        dst_env->ambient_light_y     = av_make_q(env->ambient_light_y,     50000);
+    }
+
+    if (sei->mastering_display.present) {
+        // HEVC uses a g,b,r ordering, which we convert to a more natural r,g,b
+        const int mapping[3] = {2, 0, 1};
+        const int chroma_den = 50000;
+        const int luma_den = 10000;
+        int i;
+        AVMasteringDisplayMetadata *metadata;
+
+        ret = ff_decode_mastering_display_new_ext(avctx, sd, nb_sd, &metadata);
+        if (ret < 0)
+            return ret;
+
+        if (metadata) {
+            metadata->has_luminance = 1;
+            metadata->has_primaries = 1;
+
+            for (i = 0; i < 3; i++) {
+                const int j = mapping[i];
+                metadata->display_primaries[i][0].num = sei->mastering_display.display_primaries[j][0];
+                metadata->display_primaries[i][0].den = chroma_den;
+                metadata->has_primaries &= sei->mastering_display.display_primaries[j][0] >= 5 &&
+                                           sei->mastering_display.display_primaries[j][0] <= 37000;
+
+                metadata->display_primaries[i][1].num = sei->mastering_display.display_primaries[j][1];
+                metadata->display_primaries[i][1].den = chroma_den;
+                metadata->has_primaries &= sei->mastering_display.display_primaries[j][1] >= 5 &&
+                                           sei->mastering_display.display_primaries[j][1] <= 42000;
+            }
+            metadata->white_point[0].num = sei->mastering_display.white_point[0];
+            metadata->white_point[0].den = chroma_den;
+            metadata->has_primaries &= sei->mastering_display.white_point[0] >= 5 &&
+                                       sei->mastering_display.white_point[0] <= 37000;
+
+            metadata->white_point[1].num = sei->mastering_display.white_point[1];
+            metadata->white_point[1].den = chroma_den;
+            metadata->has_primaries &= sei->mastering_display.white_point[1] >= 5 &&
+                                       sei->mastering_display.white_point[1] <= 42000;
+
+            metadata->max_luminance.num = sei->mastering_display.max_luminance;
+            metadata->max_luminance.den = luma_den;
+            metadata->has_luminance &= sei->mastering_display.max_luminance >= 50000 &&
+                                       sei->mastering_display.max_luminance <= 100000000;
+
+            metadata->min_luminance.num = sei->mastering_display.min_luminance;
+            metadata->min_luminance.den = luma_den;
+            metadata->has_luminance &= sei->mastering_display.min_luminance >= 1 &&
+                                       sei->mastering_display.min_luminance <= 50000 &&
+                                       sei->mastering_display.min_luminance <
+                                       sei->mastering_display.max_luminance;
+
+            if (metadata->has_luminance || metadata->has_primaries)
+                av_log(avctx, AV_LOG_DEBUG, "Mastering Display Metadata:\n");
+            if (metadata->has_primaries) {
+                av_log(avctx, AV_LOG_DEBUG,
+                       "r(%5.4f,%5.4f) g(%5.4f,%5.4f) b(%5.4f %5.4f) wp(%5.4f, %5.4f)\n",
+                       av_q2d(metadata->display_primaries[0][0]),
+                       av_q2d(metadata->display_primaries[0][1]),
+                       av_q2d(metadata->display_primaries[1][0]),
+                       av_q2d(metadata->display_primaries[1][1]),
+                       av_q2d(metadata->display_primaries[2][0]),
+                       av_q2d(metadata->display_primaries[2][1]),
+                       av_q2d(metadata->white_point[0]), av_q2d(metadata->white_point[1]));
+            }
+            if (metadata->has_luminance) {
+                av_log(avctx, AV_LOG_DEBUG,
+                       "min_luminance=%f, max_luminance=%f\n",
+                       av_q2d(metadata->min_luminance), av_q2d(metadata->max_luminance));
+            }
+        }
+    }
+
+    if (sei->content_light.present) {
+        AVContentLightMetadata *metadata;
+
+        ret = ff_decode_content_light_new_ext(avctx, sd, nb_sd, &metadata);
+        if (ret < 0)
+            return ret;
+
+        if (metadata) {
+            metadata->MaxCLL  = sei->content_light.max_content_light_level;
+            metadata->MaxFALL = sei->content_light.max_pic_average_light_level;
+
+            av_log(avctx, AV_LOG_DEBUG, "Content Light Level Metadata:\n");
+            av_log(avctx, AV_LOG_DEBUG, "MaxCLL=%d, MaxFALL=%d\n",
+                   metadata->MaxCLL, metadata->MaxFALL);
+        }
+    }
+
+    return 0;
+}
+
 int ff_h2645_sei_to_frame(AVFrame *frame, H2645SEI *sei,
                           enum AVCodecID codec_id,
                           AVCodecContext *avctx, const H2645VUI *vui,
@@ -621,23 +757,12 @@ int ff_h2645_sei_to_frame(AVFrame *frame, H2645SEI *sei,
         if (!sd)
             av_buffer_unref(&a53->buf_ref);
         a53->buf_ref = NULL;
-        if (avctx)
-            avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS;
+        avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS;
     }
 
-    for (unsigned i = 0; i < sei->unregistered.nb_buf_ref; i++) {
-        H2645SEIUnregistered *unreg = &sei->unregistered;
-
-        if (unreg->buf_ref[i]) {
-            AVFrameSideData *sd = av_frame_new_side_data_from_buf(frame,
-                    AV_FRAME_DATA_SEI_UNREGISTERED,
-                    unreg->buf_ref[i]);
-            if (!sd)
-                av_buffer_unref(&unreg->buf_ref[i]);
-            unreg->buf_ref[i] = NULL;
-        }
-    }
-    sei->unregistered.nb_buf_ref = 0;
+    ret = h2645_sei_to_side_data(avctx, sei, &frame->side_data, &frame->nb_side_data);
+    if (ret < 0)
+        return ret;
 
     if (sei->afd.present) {
         AVFrameSideData *sd = av_frame_new_side_data(frame, AV_FRAME_DATA_AFD,
@@ -718,8 +843,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
         else
             fgc->present = fgc->persistence_flag;
 
-        if (avctx)
-            avctx->properties |= FF_CODEC_PROPERTY_FILM_GRAIN;
+        avctx->properties |= FF_CODEC_PROPERTY_FILM_GRAIN;
     }
 
 #if CONFIG_HEVC_SEI
@@ -728,88 +852,15 @@ FF_ENABLE_DEPRECATION_WARNINGS
         return ret;
 #endif
 
-    if (sei->ambient_viewing_environment.present) {
-        H2645SEIAmbientViewingEnvironment *env =
-            &sei->ambient_viewing_environment;
-
-        AVAmbientViewingEnvironment *dst_env =
-            av_ambient_viewing_environment_create_side_data(frame);
-        if (!dst_env)
-            return AVERROR(ENOMEM);
-
-        dst_env->ambient_illuminance = av_make_q(env->ambient_illuminance, 10000);
-        dst_env->ambient_light_x     = av_make_q(env->ambient_light_x,     50000);
-        dst_env->ambient_light_y     = av_make_q(env->ambient_light_y,     50000);
-    }
-
-    if (sei->mastering_display.present) {
-        // HEVC uses a g,b,r ordering, which we convert to a more natural r,g,b
-        const int mapping[3] = {2, 0, 1};
-        const int chroma_den = 50000;
-        const int luma_den = 10000;
-        int i;
-        AVMasteringDisplayMetadata *metadata;
-
-        ret = ff_decode_mastering_display_new(avctx, frame, &metadata);
-        if (ret < 0)
-            return ret;
-
-        if (metadata) {
-            for (i = 0; i < 3; i++) {
-                const int j = mapping[i];
-                metadata->display_primaries[i][0].num = sei->mastering_display.display_primaries[j][0];
-                metadata->display_primaries[i][0].den = chroma_den;
-                metadata->display_primaries[i][1].num = sei->mastering_display.display_primaries[j][1];
-                metadata->display_primaries[i][1].den = chroma_den;
-            }
-            metadata->white_point[0].num = sei->mastering_display.white_point[0];
-            metadata->white_point[0].den = chroma_den;
-            metadata->white_point[1].num = sei->mastering_display.white_point[1];
-            metadata->white_point[1].den = chroma_den;
-
-            metadata->max_luminance.num = sei->mastering_display.max_luminance;
-            metadata->max_luminance.den = luma_den;
-            metadata->min_luminance.num = sei->mastering_display.min_luminance;
-            metadata->min_luminance.den = luma_den;
-            metadata->has_luminance = 1;
-            metadata->has_primaries = 1;
-
-            av_log(avctx, AV_LOG_DEBUG, "Mastering Display Metadata:\n");
-            av_log(avctx, AV_LOG_DEBUG,
-                   "r(%5.4f,%5.4f) g(%5.4f,%5.4f) b(%5.4f %5.4f) wp(%5.4f, %5.4f)\n",
-                   av_q2d(metadata->display_primaries[0][0]),
-                   av_q2d(metadata->display_primaries[0][1]),
-                   av_q2d(metadata->display_primaries[1][0]),
-                   av_q2d(metadata->display_primaries[1][1]),
-                   av_q2d(metadata->display_primaries[2][0]),
-                   av_q2d(metadata->display_primaries[2][1]),
-                   av_q2d(metadata->white_point[0]), av_q2d(metadata->white_point[1]));
-            av_log(avctx, AV_LOG_DEBUG,
-                   "min_luminance=%f, max_luminance=%f\n",
-                   av_q2d(metadata->min_luminance), av_q2d(metadata->max_luminance));
-        }
-    }
-
-    if (sei->content_light.present) {
-        AVContentLightMetadata *metadata;
-
-        ret = ff_decode_content_light_new(avctx, frame, &metadata);
-        if (ret < 0)
-            return ret;
-
-        if (metadata) {
-            metadata->MaxCLL  = sei->content_light.max_content_light_level;
-            metadata->MaxFALL = sei->content_light.max_pic_average_light_level;
-
-            av_log(avctx, AV_LOG_DEBUG, "Content Light Level Metadata:\n");
-            av_log(avctx, AV_LOG_DEBUG, "MaxCLL=%d, MaxFALL=%d\n",
-                   metadata->MaxCLL, metadata->MaxFALL);
-        }
-    }
-
     return 0;
 }
 
+int ff_h2645_sei_to_context(AVCodecContext *avctx, H2645SEI *sei)
+{
+    return h2645_sei_to_side_data(avctx, sei, &avctx->decoded_side_data,
+                                  &avctx->nb_decoded_side_data);
+}
+
 void ff_h2645_sei_reset(H2645SEI *s)
 {
     av_buffer_unref(&s->a53_caption.buf_ref);
diff --git a/libavcodec/h2645_sei.h b/libavcodec/h2645_sei.h
index b9a6c7587b..488dbcad7e 100644
--- a/libavcodec/h2645_sei.h
+++ b/libavcodec/h2645_sei.h
@@ -168,4 +168,6 @@ int ff_h2645_sei_to_frame(AVFrame *frame, H2645SEI *sei,
                           unsigned bit_depth_luma, unsigned bit_depth_chroma,
                           int seed);
 
+int ff_h2645_sei_to_context(AVCodecContext *avctx, H2645SEI *sei);
+
 #endif /* AVCODEC_H2645_SEI_H */
diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
index 4b01c54147..ce2c4caca1 100644
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -388,7 +388,6 @@ int ff_h264_update_thread_context(AVCodecContext *dst,
 
     h->width_from_caller    = h1->width_from_caller;
     h->height_from_caller   = h1->height_from_caller;
-    h->coded_picture_number = h1->coded_picture_number;
     h->first_field          = h1->first_field;
     h->picture_structure    = h1->picture_structure;
     h->mb_aff_frame         = h1->mb_aff_frame;
@@ -483,7 +482,7 @@ static int h264_frame_start(H264Context *h)
 
     if (!ff_thread_can_start_frame(h->avctx)) {
         av_log(h->avctx, AV_LOG_ERROR, "Attempt to start a frame outside SETUP state\n");
-        return -1;
+        return AVERROR_BUG;
     }
 
     release_unused_pictures(h, 1);
@@ -1397,7 +1396,7 @@ static int h264_field_start(H264Context *h, const H264SliceContext *sl,
 
     sps = h->ps.sps;
 
-    if (sps && sps->bitstream_restriction_flag &&
+    if (sps->bitstream_restriction_flag &&
         h->avctx->has_b_frames < sps->num_reorder_frames) {
         h->avctx->has_b_frames = sps->num_reorder_frames;
     }
diff --git a/libavcodec/h264dec.c b/libavcodec/h264dec.c
index 727dc1a662..fd23e367b4 100644
--- a/libavcodec/h264dec.c
+++ b/libavcodec/h264dec.c
@@ -994,30 +994,38 @@ static int send_next_delayed_frame(H264Context *h, AVFrame *dst_frame,
                                    int *got_frame, int buf_index)
 {
     int ret, i, out_idx;
-    H264Picture *out = h->delayed_pic[0];
+    H264Picture *out;
 
     h->cur_pic_ptr = NULL;
     h->first_field = 0;
 
-    out_idx = 0;
-    for (i = 1;
-         h->delayed_pic[i] &&
-         !(h->delayed_pic[i]->f->flags & AV_FRAME_FLAG_KEY) &&
-         !h->delayed_pic[i]->mmco_reset;
-         i++)
-        if (h->delayed_pic[i]->poc < out->poc) {
-            out     = h->delayed_pic[i];
-            out_idx = i;
-        }
+    while (h->delayed_pic[0]) {
+        out = h->delayed_pic[0];
+        out_idx = 0;
+        for (i = 1;
+             h->delayed_pic[i] &&
+             !(h->delayed_pic[i]->f->flags & AV_FRAME_FLAG_KEY) &&
+             !h->delayed_pic[i]->mmco_reset;
+             i++)
+            if (h->delayed_pic[i]->poc < out->poc) {
+                out     = h->delayed_pic[i];
+                out_idx = i;
+            }
 
-    for (i = out_idx; h->delayed_pic[i]; i++)
-        h->delayed_pic[i] = h->delayed_pic[i + 1];
+        for (i = out_idx; h->delayed_pic[i]; i++)
+            h->delayed_pic[i] = h->delayed_pic[i + 1];
 
-    if (out) {
-        out->reference &= ~DELAYED_PIC_REF;
-        ret = finalize_frame(h, dst_frame, out, got_frame);
-        if (ret < 0)
-            return ret;
+        if (out) {
+            h->frame_recovered |= out->recovered;
+            out->recovered |= h->frame_recovered & FRAME_RECOVERED_SEI;
+
+            out->reference &= ~DELAYED_PIC_REF;
+            ret = finalize_frame(h, dst_frame, out, got_frame);
+            if (ret < 0)
+                return ret;
+            if (*got_frame)
+                break;
+        }
     }
 
     return buf_index;
@@ -1156,7 +1164,7 @@ const FFCodec ff_h264_decoder = {
                                NULL
                            },
     .caps_internal         = FF_CODEC_CAP_EXPORTS_CROPPING |
-                             FF_CODEC_CAP_ALLOCATE_PROGRESS | FF_CODEC_CAP_INIT_CLEANUP,
+                             FF_CODEC_CAP_INIT_CLEANUP,
     .flush                 = h264_decode_flush,
     UPDATE_THREAD_CONTEXT(ff_h264_update_thread_context),
     UPDATE_THREAD_CONTEXT_FOR_USER(ff_h264_update_thread_context_for_user),
diff --git a/libavcodec/h264dec.h b/libavcodec/h264dec.h
index 447c2499d9..fc50df90f2 100644
--- a/libavcodec/h264dec.h
+++ b/libavcodec/h264dec.h
@@ -356,7 +356,6 @@ typedef struct H264Context {
     int chroma_x_shift, chroma_y_shift;
 
     int droppable;
-    int coded_picture_number;
 
     int context_initialized;
     int flags;
diff --git a/libavcodec/h264dsp.c b/libavcodec/h264dsp.c
index 4d2ee10bab..1ba936be1c 100644
--- a/libavcodec/h264dsp.c
+++ b/libavcodec/h264dsp.c
@@ -158,6 +158,8 @@ av_cold void ff_h264dsp_init(H264DSPContext *c, const int bit_depth,
     ff_h264dsp_init_arm(c, bit_depth, chroma_format_idc);
 #elif ARCH_PPC
     ff_h264dsp_init_ppc(c, bit_depth, chroma_format_idc);
+#elif ARCH_RISCV
+    ff_h264dsp_init_riscv(c, bit_depth, chroma_format_idc);
 #elif ARCH_X86
     ff_h264dsp_init_x86(c, bit_depth, chroma_format_idc);
 #elif ARCH_MIPS
diff --git a/libavcodec/h264dsp.h b/libavcodec/h264dsp.h
index e0880c4d88..4a9cb1568d 100644
--- a/libavcodec/h264dsp.h
+++ b/libavcodec/h264dsp.h
@@ -125,6 +125,8 @@ void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth,
                          const int chroma_format_idc);
 void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth,
                          const int chroma_format_idc);
+void ff_h264dsp_init_riscv(H264DSPContext *c, const int bit_depth,
+                           const int chroma_format_idc);
 void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
                          const int chroma_format_idc);
 void ff_h264dsp_init_mips(H264DSPContext *c, const int bit_depth,
diff --git a/libavcodec/hapdec.c b/libavcodec/hapdec.c
index 22b7c281d1..918eff7876 100644
--- a/libavcodec/hapdec.c
+++ b/libavcodec/hapdec.c
@@ -330,8 +330,6 @@ static int hap_decode(AVCodecContext *avctx, AVFrame *frame,
     }
 
     /* Frame is ready to be output */
-    frame->pict_type = AV_PICTURE_TYPE_I;
-    frame->flags |= AV_FRAME_FLAG_KEY;
     *got_frame = 1;
 
     return avpkt->size;
diff --git a/libavcodec/hdrdec.c b/libavcodec/hdrdec.c
index 9b6395bb6d..b7ade48e32 100644
--- a/libavcodec/hdrdec.c
+++ b/libavcodec/hdrdec.c
@@ -212,9 +212,6 @@ convert:
         }
     }
 
-    p->flags |= AV_FRAME_FLAG_KEY;
-    p->pict_type = AV_PICTURE_TYPE_I;
-
     *got_frame   = 1;
 
     return avpkt->size;
diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c
index 70d3ca588a..6bc3019147 100644
--- a/libavcodec/hevc_filter.c
+++ b/libavcodec/hevc_filter.c
@@ -26,7 +26,7 @@
 #include "libavutil/internal.h"
 
 #include "hevcdec.h"
-#include "threadframe.h"
+#include "progressframe.h"
 
 #define LUMA 0
 #define CB 1
@@ -874,15 +874,15 @@ void ff_hevc_hls_filter(HEVCLocalContext *lc, int x, int y, int ctb_size)
         if (y && x_end) {
             sao_filter_CTB(lc, s, x, y - ctb_size);
             if (s->threads_type & FF_THREAD_FRAME )
-                ff_thread_report_progress(&s->ref->tf, y, 0);
+                ff_progress_frame_report(&s->ref->tf, y);
         }
         if (x_end && y_end) {
             sao_filter_CTB(lc, s, x , y);
             if (s->threads_type & FF_THREAD_FRAME )
-                ff_thread_report_progress(&s->ref->tf, y + ctb_size, 0);
+                ff_progress_frame_report(&s->ref->tf, y + ctb_size);
         }
     } else if (s->threads_type & FF_THREAD_FRAME && x_end)
-        ff_thread_report_progress(&s->ref->tf, y + ctb_size - 4, 0);
+        ff_progress_frame_report(&s->ref->tf, y + ctb_size - 4);
 }
 
 void ff_hevc_hls_filters(HEVCLocalContext *lc, int x_ctb, int y_ctb, int ctb_size)
diff --git a/libavcodec/hevc_mvs.c b/libavcodec/hevc_mvs.c
index 0a8cc2c43d..5591919e2e 100644
--- a/libavcodec/hevc_mvs.c
+++ b/libavcodec/hevc_mvs.c
@@ -23,7 +23,7 @@
 
 #include "hevc.h"
 #include "hevcdec.h"
-#include "threadframe.h"
+#include "progressframe.h"
 
 static const uint8_t l0_l1_cand_idx[12][2] = {
     { 0, 1, },
@@ -248,7 +248,7 @@ static int temporal_luma_motion_vector(const HEVCContext *s, int x0, int y0,
         x                 &= ~15;
         y                 &= ~15;
         if (s->threads_type == FF_THREAD_FRAME)
-            ff_thread_await_progress(&ref->tf, y, 0);
+            ff_progress_frame_await(&ref->tf, y);
         x_pu               = x >> s->ps.sps->log2_min_pu_size;
         y_pu               = y >> s->ps.sps->log2_min_pu_size;
         temp_col           = TAB_MVF(x_pu, y_pu);
@@ -262,7 +262,7 @@ static int temporal_luma_motion_vector(const HEVCContext *s, int x0, int y0,
         x                 &= ~15;
         y                 &= ~15;
         if (s->threads_type == FF_THREAD_FRAME)
-            ff_thread_await_progress(&ref->tf, y, 0);
+            ff_progress_frame_await(&ref->tf, y);
         x_pu               = x >> s->ps.sps->log2_min_pu_size;
         y_pu               = y >> s->ps.sps->log2_min_pu_size;
         temp_col           = TAB_MVF(x_pu, y_pu);
diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c
index d90f172c46..7b486ce0af 100644
--- a/libavcodec/hevc_ps.c
+++ b/libavcodec/hevc_ps.c
@@ -107,6 +107,7 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx,
     int k  = 0;
     int i;
 
+    rps->used        = 0;
     rps->rps_predict = 0;
 
     if (rps != sps->st_rps && sps->nb_st_rps)
@@ -114,6 +115,7 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx,
 
     if (rps->rps_predict) {
         const ShortTermRPS *rps_ridx;
+        uint8_t used[32] = { 0 };
         int delta_rps;
 
         if (is_slice_header) {
@@ -139,13 +141,13 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx,
         }
         delta_rps      = (1 - (rps->delta_rps_sign << 1)) * rps->abs_delta_rps;
         for (i = 0; i <= rps_ridx->num_delta_pocs; i++) {
-            int used = rps->used[k] = get_bits1(gb);
+            used[k] = get_bits1(gb);
 
-            rps->use_delta_flag = 0;
-            if (!used)
-                rps->use_delta_flag = get_bits1(gb);
+            rps->use_delta = 0;
+            if (!used[k])
+                rps->use_delta = get_bits1(gb);
 
-            if (used || rps->use_delta_flag) {
+            if (used[k] || rps->use_delta) {
                 if (i < rps_ridx->num_delta_pocs)
                     delta_poc = delta_rps + rps_ridx->delta_poc[i];
                 else
@@ -157,7 +159,7 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx,
             }
         }
 
-        if (k >= FF_ARRAY_ELEMS(rps->used)) {
+        if (k >= FF_ARRAY_ELEMS(used)) {
             av_log(avctx, AV_LOG_ERROR,
                    "Invalid num_delta_pocs: %d\n", k);
             return AVERROR_INVALIDDATA;
@@ -167,35 +169,38 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx,
         rps->num_negative_pics = k0;
         // sort in increasing order (smallest first)
         if (rps->num_delta_pocs != 0) {
-            int used, tmp;
+            int u, tmp;
             for (i = 1; i < rps->num_delta_pocs; i++) {
                 delta_poc = rps->delta_poc[i];
-                used      = rps->used[i];
+                u         = used[i];
                 for (k = i - 1; k >= 0; k--) {
                     tmp = rps->delta_poc[k];
                     if (delta_poc < tmp) {
                         rps->delta_poc[k + 1] = tmp;
-                        rps->used[k + 1]      = rps->used[k];
+                        used[k + 1]           = used[k];
                         rps->delta_poc[k]     = delta_poc;
-                        rps->used[k]          = used;
+                        used[k]               = u;
                     }
                 }
             }
         }
         if ((rps->num_negative_pics >> 1) != 0) {
-            int used;
+            int u;
             k = rps->num_negative_pics - 1;
             // flip the negative values to largest first
             for (i = 0; i < rps->num_negative_pics >> 1; i++) {
                 delta_poc         = rps->delta_poc[i];
-                used              = rps->used[i];
+                u                 = used[i];
                 rps->delta_poc[i] = rps->delta_poc[k];
-                rps->used[i]      = rps->used[k];
+                used[i]           = used[k];
                 rps->delta_poc[k] = delta_poc;
-                rps->used[k]      = used;
+                used[k]           = u;
                 k--;
             }
         }
+
+        for (unsigned i = 0; i < FF_ARRAY_ELEMS(used); i++)
+            rps->used |= used[i] * (1 << i);
     } else {
         unsigned int nb_positive_pics;
 
@@ -213,7 +218,7 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx,
             int prev = 0;
 
             for (i = 0; i < rps->num_negative_pics; i++) {
-                delta_poc = rps->delta_poc_s0[i] = get_ue_golomb_long(gb) + 1;
+                delta_poc = get_ue_golomb_long(gb) + 1;
                 if (delta_poc < 1 || delta_poc > 32768) {
                     av_log(avctx, AV_LOG_ERROR,
                         "Invalid value of delta_poc: %d\n",
@@ -222,11 +227,11 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx,
                 }
                 prev -= delta_poc;
                 rps->delta_poc[i] = prev;
-                rps->used[i]      = get_bits1(gb);
+                rps->used        |= get_bits1(gb) * (1 << i);
             }
             prev = 0;
             for (i = 0; i < nb_positive_pics; i++) {
-                delta_poc = rps->delta_poc_s1[i] = get_ue_golomb_long(gb) + 1;
+                delta_poc = get_ue_golomb_long(gb) + 1;
                 if (delta_poc < 1 || delta_poc > 32768) {
                     av_log(avctx, AV_LOG_ERROR,
                         "Invalid value of delta_poc: %d\n",
@@ -235,7 +240,7 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx,
                 }
                 prev += delta_poc;
                 rps->delta_poc[rps->num_negative_pics + i] = prev;
-                rps->used[rps->num_negative_pics + i]      = get_bits1(gb);
+                rps->used                                 |= get_bits1(gb) * (1 << (rps->num_negative_pics + i));
             }
         }
     }
diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h
index 9801b4347f..d06d7cf1d4 100644
--- a/libavcodec/hevc_ps.h
+++ b/libavcodec/hevc_ps.h
@@ -70,18 +70,19 @@ typedef struct HEVCHdrParams {
 } HEVCHdrParams;
 
 typedef struct ShortTermRPS {
-    uint8_t rps_predict;
-    unsigned int delta_idx;
-    uint8_t use_delta_flag;
-    uint8_t delta_rps_sign;
-    unsigned int abs_delta_rps;
-    unsigned int num_negative_pics;
-    int num_delta_pocs;
-    int rps_idx_num_delta_pocs;
-    int32_t delta_poc_s0[32];
-    int32_t delta_poc_s1[32];
     int32_t delta_poc[32];
-    uint8_t used[32];
+    uint32_t used;
+
+    uint8_t delta_idx;
+    uint8_t num_negative_pics;
+    uint8_t num_delta_pocs;
+    uint8_t rps_idx_num_delta_pocs;
+
+    uint16_t abs_delta_rps;
+    unsigned delta_rps_sign:1;
+
+    unsigned rps_predict:1;
+    unsigned use_delta:1;
 } ShortTermRPS;
 
 typedef struct HEVCWindow {
@@ -240,38 +241,38 @@ typedef struct HEVCSPS {
     int max_transform_hierarchy_depth_inter;
     int max_transform_hierarchy_depth_intra;
 
-    unsigned separate_colour_plane:1;
-    unsigned conformance_window:1;
-    unsigned pcm_enabled:1;
-    unsigned pcm_loop_filter_disabled:1;
-    unsigned sublayer_ordering_info:1;
-    unsigned temporal_id_nesting:1;
-    unsigned extension_present:1;
-    unsigned scaling_list_enabled:1;
-    unsigned amp_enabled:1;
-    unsigned sao_enabled:1;
-    unsigned long_term_ref_pics_present:1;
-    unsigned temporal_mvp_enabled:1;
-    unsigned strong_intra_smoothing_enabled:1;
-    unsigned range_extension:1;
-    unsigned transform_skip_rotation_enabled:1;
-    unsigned transform_skip_context_enabled:1;
-    unsigned implicit_rdpcm_enabled:1;
-    unsigned explicit_rdpcm_enabled:1;
-    unsigned extended_precision_processing:1;
-    unsigned intra_smoothing_disabled:1;
-    unsigned high_precision_offsets_enabled:1;
-    unsigned persistent_rice_adaptation_enabled:1;
-    unsigned cabac_bypass_alignment_enabled:1;
-
-    unsigned multilayer_extension:1;
-    unsigned sps_3d_extension:1;
-
-    unsigned scc_extension:1;
-    unsigned curr_pic_ref_enabled:1;
-    unsigned palette_mode_enabled:1;
-    unsigned palette_predictor_initializers_present:1;
-    unsigned intra_boundary_filtering_disabled:1;
+    uint8_t separate_colour_plane;
+    uint8_t conformance_window;
+    uint8_t pcm_enabled;
+    uint8_t pcm_loop_filter_disabled;
+    uint8_t sublayer_ordering_info;
+    uint8_t temporal_id_nesting;
+    uint8_t extension_present;
+    uint8_t scaling_list_enabled;
+    uint8_t amp_enabled;
+    uint8_t sao_enabled;
+    uint8_t long_term_ref_pics_present;
+    uint8_t temporal_mvp_enabled;
+    uint8_t strong_intra_smoothing_enabled;
+    uint8_t range_extension;
+    uint8_t transform_skip_rotation_enabled;
+    uint8_t transform_skip_context_enabled;
+    uint8_t implicit_rdpcm_enabled;
+    uint8_t explicit_rdpcm_enabled;
+    uint8_t extended_precision_processing;
+    uint8_t intra_smoothing_disabled;
+    uint8_t high_precision_offsets_enabled;
+    uint8_t persistent_rice_adaptation_enabled;
+    uint8_t cabac_bypass_alignment_enabled;
+
+    uint8_t multilayer_extension;
+    uint8_t sps_3d_extension;
+
+    uint8_t scc_extension;
+    uint8_t curr_pic_ref_enabled;
+    uint8_t palette_mode_enabled;
+    uint8_t palette_predictor_initializers_present;
+    uint8_t intra_boundary_filtering_disabled;
 
     int palette_max_size;
     int delta_palette_max_predictor_size;
diff --git a/libavcodec/hevc_refs.c b/libavcodec/hevc_refs.c
index aed649933d..d6dc2f9e0a 100644
--- a/libavcodec/hevc_refs.c
+++ b/libavcodec/hevc_refs.c
@@ -26,18 +26,14 @@
 #include "decode.h"
 #include "hevc.h"
 #include "hevcdec.h"
+#include "progressframe.h"
 #include "refstruct.h"
-#include "threadframe.h"
 
 void ff_hevc_unref_frame(HEVCFrame *frame, int flags)
 {
-    /* frame->frame can be NULL if context init failed */
-    if (!frame->frame || !frame->frame->buf[0])
-        return;
-
     frame->flags &= ~flags;
     if (!frame->flags) {
-        ff_thread_release_ext_buffer(&frame->tf);
+        ff_progress_frame_unref(&frame->tf);
         av_frame_unref(frame->frame_grain);
         frame->needs_fg = 0;
 
@@ -83,11 +79,11 @@ static HEVCFrame *alloc_frame(HEVCContext *s)
     int i, j, ret;
     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
         HEVCFrame *frame = &s->DPB[i];
-        if (frame->frame->buf[0])
+        if (frame->frame)
             continue;
 
-        ret = ff_thread_get_ext_buffer(s->avctx, &frame->tf,
-                                       AV_GET_BUFFER_FLAG_REF);
+        ret = ff_progress_frame_get_buffer(s->avctx, &frame->tf,
+                                           AV_GET_BUFFER_FLAG_REF);
         if (ret < 0)
             return NULL;
 
@@ -135,7 +131,7 @@ int ff_hevc_set_new_ref(HEVCContext *s, AVFrame **frame, int poc)
     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
         HEVCFrame *frame = &s->DPB[i];
 
-        if (frame->frame->buf[0] && frame->sequence == s->seq_decode &&
+        if (frame->frame && frame->sequence == s->seq_decode &&
             frame->poc == poc) {
             av_log(s->avctx, AV_LOG_ERROR, "Duplicate POC in a sequence: %d.\n",
                    poc);
@@ -394,7 +390,7 @@ static HEVCFrame *find_ref_idx(HEVCContext *s, int poc, uint8_t use_msb)
 
     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
         HEVCFrame *ref = &s->DPB[i];
-        if (ref->frame->buf[0] && ref->sequence == s->seq_decode) {
+        if (ref->frame && ref->sequence == s->seq_decode) {
             if ((ref->poc & mask) == poc && (use_msb || ref->poc != s->poc))
                 return ref;
         }
@@ -441,7 +437,7 @@ static HEVCFrame *generate_missing_ref(HEVCContext *s, int poc)
     frame->flags    = 0;
 
     if (s->threads_type == FF_THREAD_FRAME)
-        ff_thread_report_progress(&frame->tf, INT_MAX, 0);
+        ff_progress_frame_report(&frame->tf, INT_MAX);
 
     return frame;
 }
@@ -501,7 +497,7 @@ int ff_hevc_frame_rps(HEVCContext *s)
         int poc = s->poc + short_rps->delta_poc[i];
         int list;
 
-        if (!short_rps->used[i])
+        if (!(short_rps->used & (1 << i)))
             list = ST_FOLL;
         else if (i < short_rps->num_negative_pics)
             list = ST_CURR_BEF;
@@ -540,9 +536,9 @@ int ff_hevc_frame_nb_refs(const HEVCContext *s)
 
     if (rps) {
         for (i = 0; i < rps->num_negative_pics; i++)
-            ret += !!rps->used[i];
+            ret += !!(rps->used & (1 << i));
         for (; i < rps->num_delta_pocs; i++)
-            ret += !!rps->used[i];
+            ret += !!(rps->used & (1 << i));
     }
 
     if (long_rps) {
diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
index 60c1553843..ff9a418926 100644
--- a/libavcodec/hevcdec.c
+++ b/libavcodec/hevcdec.c
@@ -49,9 +49,9 @@
 #include "hwconfig.h"
 #include "internal.h"
 #include "profiles.h"
+#include "progressframe.h"
 #include "refstruct.h"
 #include "thread.h"
-#include "threadframe.h"
 
 static const uint8_t hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 };
 
@@ -1867,7 +1867,7 @@ static void hevc_await_progress(const HEVCContext *s, const HEVCFrame *ref,
     if (s->threads_type == FF_THREAD_FRAME ) {
         int y = FFMAX(0, (mv->y >> 2) + y0 + height + 9);
 
-        ff_thread_await_progress(&ref->tf, y, 0);
+        ff_progress_frame_await(&ref->tf, y);
     }
 }
 
@@ -1969,13 +1969,13 @@ static void hls_prediction_unit(HEVCLocalContext *lc, int x0, int y0,
 
     if (current_mv.pred_flag & PF_L0) {
         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
-        if (!ref0 || !ref0->frame->data[0])
+        if (!ref0 || !ref0->frame)
             return;
         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
     }
     if (current_mv.pred_flag & PF_L1) {
         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
-        if (!ref1 || !ref1->frame->data[0])
+        if (!ref1 || !ref1->frame)
             return;
         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
     }
@@ -2899,10 +2899,15 @@ static int hevc_frame_start(HEVCContext *s)
         !(s->avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN) &&
         !s->avctx->hwaccel;
 
+    ret = set_side_data(s);
+    if (ret < 0)
+        goto fail;
+
     if (s->ref->needs_fg &&
-        s->sei.common.film_grain_characteristics.present &&
-        !ff_h274_film_grain_params_supported(s->sei.common.film_grain_characteristics.model_id,
-                                             s->ref->frame->format)) {
+        (s->sei.common.film_grain_characteristics.present &&
+         !ff_h274_film_grain_params_supported(s->sei.common.film_grain_characteristics.model_id,
+                                              s->ref->frame->format)
+         || !av_film_grain_params_select(s->ref->frame))) {
         av_log_once(s->avctx, AV_LOG_WARNING, AV_LOG_DEBUG, &s->film_grain_warning_shown,
                     "Unsupported film grain parameters. Ignoring film grain.\n");
         s->ref->needs_fg = 0;
@@ -2916,10 +2921,6 @@ static int hevc_frame_start(HEVCContext *s)
             goto fail;
     }
 
-    ret = set_side_data(s);
-    if (ret < 0)
-        goto fail;
-
     s->frame->pict_type = 3 - s->sh.slice_type;
 
     if (!IS_IRAP(s))
@@ -3244,7 +3245,7 @@ static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
 
 fail:
     if (s->ref && s->threads_type == FF_THREAD_FRAME)
-        ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
+        ff_progress_frame_report(&s->ref->tf, INT_MAX);
 
     return ret;
 }
@@ -3370,14 +3371,13 @@ static int hevc_decode_frame(AVCodecContext *avctx, AVFrame *rframe,
     }
 
     sd = av_packet_get_side_data(avpkt, AV_PKT_DATA_DOVI_CONF, &sd_size);
-    if (sd && sd_size > 0) {
-        int old = s->dovi_ctx.dv_profile;
-
-        ff_dovi_update_cfg(&s->dovi_ctx, (AVDOVIDecoderConfigurationRecord *) sd);
+    if (sd && sd_size >= sizeof(s->dovi_ctx.cfg)) {
+        int old = s->dovi_ctx.cfg.dv_profile;
+        s->dovi_ctx.cfg = *(AVDOVIDecoderConfigurationRecord *) sd;
         if (old)
             av_log(avctx, AV_LOG_DEBUG,
                    "New DOVI configuration record from input packet (profile %d -> %u).\n",
-                   old, s->dovi_ctx.dv_profile);
+                   old, s->dovi_ctx.cfg.dv_profile);
     }
 
     s->ref = s->collocated_ref = NULL;
@@ -3422,14 +3422,14 @@ static int hevc_ref_frame(HEVCFrame *dst, HEVCFrame *src)
 {
     int ret;
 
-    ret = ff_thread_ref_frame(&dst->tf, &src->tf);
-    if (ret < 0)
-        return ret;
+    ff_progress_frame_ref(&dst->tf, &src->tf);
 
     if (src->needs_fg) {
         ret = av_frame_ref(dst->frame_grain, src->frame_grain);
-        if (ret < 0)
+        if (ret < 0) {
+            ff_hevc_unref_frame(dst, ~0);
             return ret;
+        }
         dst->needs_fg = 1;
     }
 
@@ -3469,7 +3469,6 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx)
 
     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
         ff_hevc_unref_frame(&s->DPB[i], ~0);
-        av_frame_free(&s->DPB[i].frame);
         av_frame_free(&s->DPB[i].frame_grain);
     }
 
@@ -3511,11 +3510,6 @@ static av_cold int hevc_init_context(AVCodecContext *avctx)
         return AVERROR(ENOMEM);
 
     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
-        s->DPB[i].frame = av_frame_alloc();
-        if (!s->DPB[i].frame)
-            return AVERROR(ENOMEM);
-        s->DPB[i].tf.f = s->DPB[i].frame;
-
         s->DPB[i].frame_grain = av_frame_alloc();
         if (!s->DPB[i].frame_grain)
             return AVERROR(ENOMEM);
@@ -3547,7 +3541,7 @@ static int hevc_update_thread_context(AVCodecContext *dst,
 
     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
         ff_hevc_unref_frame(&s->DPB[i], ~0);
-        if (s0->DPB[i].frame->buf[0]) {
+        if (s0->DPB[i].frame) {
             ret = hevc_ref_frame(&s->DPB[i], &s0->DPB[i]);
             if (ret < 0)
                 return ret;
@@ -3660,11 +3654,15 @@ static av_cold int hevc_decode_init(AVCodecContext *avctx)
             if (ret < 0) {
                 return ret;
             }
+
+            ret = ff_h2645_sei_to_context(avctx, &s->sei.common);
+            if (ret < 0)
+                return ret;
         }
 
         sd = ff_get_coded_side_data(avctx, AV_PKT_DATA_DOVI_CONF);
-        if (sd && sd->size > 0)
-            ff_dovi_update_cfg(&s->dovi_ctx, (AVDOVIDecoderConfigurationRecord *) sd->data);
+        if (sd && sd->size >= sizeof(s->dovi_ctx.cfg))
+            s->dovi_ctx.cfg = *(AVDOVIDecoderConfigurationRecord *) sd->data;
     }
 
     return 0;
@@ -3717,7 +3715,8 @@ const FFCodec ff_hevc_decoder = {
     .p.capabilities        = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |
                              AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS,
     .caps_internal         = FF_CODEC_CAP_EXPORTS_CROPPING |
-                             FF_CODEC_CAP_ALLOCATE_PROGRESS | FF_CODEC_CAP_INIT_CLEANUP,
+                             FF_CODEC_CAP_USES_PROGRESSFRAMES |
+                             FF_CODEC_CAP_INIT_CLEANUP,
     .p.profiles            = NULL_IF_CONFIG_SMALL(ff_hevc_profiles),
     .hw_configs            = (const AVCodecHWConfigInternal *const []) {
 #if CONFIG_HEVC_DXVA2_HWACCEL
diff --git a/libavcodec/hevcdec.h b/libavcodec/hevcdec.h
index 5a4ed270e8..5aa3d40450 100644
--- a/libavcodec/hevcdec.h
+++ b/libavcodec/hevcdec.h
@@ -40,7 +40,7 @@
 #include "hevc_sei.h"
 #include "hevcdsp.h"
 #include "h274.h"
-#include "threadframe.h"
+#include "progressframe.h"
 #include "videodsp.h"
 
 #define SHIFT_CTB_WPP 2
@@ -352,9 +352,13 @@ typedef struct DBParams {
 #define HEVC_SEQUENCE_COUNTER_INVALID (HEVC_SEQUENCE_COUNTER_MASK + 1)
 
 typedef struct HEVCFrame {
-    AVFrame *frame;
+    union {
+        struct {
+            AVFrame *frame;
+        };
+        ProgressFrame tf;
+    };
     AVFrame *frame_grain;
-    ThreadFrame tf;
     int needs_fg; /* 1 if grain needs to be applied by the decoder */
     MvField *tab_mvf;              ///< RefStruct reference
     RefPicList *refPicList;
@@ -435,6 +439,10 @@ typedef struct HEVCLocalContext {
     /* properties of the boundary of the current CTB for the purposes
      * of the deblocking filter */
     int boundary_flags;
+
+    // an array of these structs is used for per-thread state - pad its size
+    // to avoid false sharing
+    char padding[128];
 } HEVCLocalContext;
 
 typedef struct HEVCContext {
diff --git a/libavcodec/hq_hqa.c b/libavcodec/hq_hqa.c
index 096fb65dc7..738ed9868d 100644
--- a/libavcodec/hq_hqa.c
+++ b/libavcodec/hq_hqa.c
@@ -366,9 +366,6 @@ static int hq_hqa_decode_frame(AVCodecContext *avctx, AVFrame *pic,
         return ret;
     }
 
-    pic->flags |= AV_FRAME_FLAG_KEY;
-    pic->pict_type = AV_PICTURE_TYPE_I;
-
     *got_frame = 1;
 
     return avpkt->size;
diff --git a/libavcodec/hqx.c b/libavcodec/hqx.c
index 51099aa684..ee6c5a6622 100644
--- a/libavcodec/hqx.c
+++ b/libavcodec/hqx.c
@@ -504,9 +504,6 @@ static int hqx_decode_frame(AVCodecContext *avctx, AVFrame *frame,
 
     avctx->execute2(avctx, decode_slice_thread, NULL, NULL, 16);
 
-    ctx->pic->flags |= AV_FRAME_FLAG_KEY;
-    ctx->pic->pict_type = AV_PICTURE_TYPE_I;
-
     *got_picture_ptr = 1;
 
     return avpkt->size;
diff --git a/libavcodec/huffyuv.c b/libavcodec/huffyuv.c
index aaba313bf1..f22c5ebc59 100644
--- a/libavcodec/huffyuv.c
+++ b/libavcodec/huffyuv.c
@@ -28,12 +28,12 @@
  * huffyuv codec for libavcodec.
  */
 
+#include <stddef.h>
 #include <stdint.h>
 
-#include "libavutil/attributes.h"
 #include "libavutil/error.h"
 #include "libavutil/log.h"
-#include "libavutil/mem.h"
+#include "libavutil/macros.h"
 
 #include "huffyuv.h"
 
@@ -49,7 +49,7 @@ int ff_huffyuv_generate_bits_table(uint32_t *dst, const uint8_t *len_table, int
     for (int i = FF_ARRAY_ELEMS(lens) - 1; i > 0; i--) {
         if ((lens[i] + codes[i]) & 1) {
             av_log(NULL, AV_LOG_ERROR, "Error generating huffman table\n");
-            return -1;
+            return AVERROR_INVALIDDATA;
         }
         codes[i - 1] = (lens[i] + codes[i]) >> 1;
     }
@@ -59,26 +59,3 @@ int ff_huffyuv_generate_bits_table(uint32_t *dst, const uint8_t *len_table, int
     }
     return 0;
 }
-
-av_cold int ff_huffyuv_alloc_temp(uint8_t *temp[3], uint16_t *temp16[3], int width)
-{
-    int i;
-
-    for (i=0; i<3; i++) {
-        temp[i] = av_malloc(4 * width + 16);
-        if (!temp[i])
-            return AVERROR(ENOMEM);
-        temp16[i] = (uint16_t*)temp[i];
-    }
-    return 0;
-}
-
-av_cold void ff_huffyuv_common_end(uint8_t *temp[3], uint16_t *temp16[3])
-{
-    int i;
-
-    for(i = 0; i < 3; i++) {
-        av_freep(&temp[i]);
-        temp16[i] = NULL;
-    }
-}
diff --git a/libavcodec/huffyuv.h b/libavcodec/huffyuv.h
index 22a766611e..62866b7a48 100644
--- a/libavcodec/huffyuv.h
+++ b/libavcodec/huffyuv.h
@@ -55,8 +55,6 @@ typedef enum Predictor {
     MEDIAN,
 } Predictor;
 
-void ff_huffyuv_common_end(uint8_t *temp[3], uint16_t *temp16[3]);
-int  ff_huffyuv_alloc_temp(uint8_t *temp[3], uint16_t *temp16[3], int width);
 int ff_huffyuv_generate_bits_table(uint32_t *dst, const uint8_t *len_table, int n);
 
 #endif /* AVCODEC_HUFFYUV_H */
diff --git a/libavcodec/huffyuvdec.c b/libavcodec/huffyuvdec.c
index 3bed27be21..a8ccb724f5 100644
--- a/libavcodec/huffyuvdec.c
+++ b/libavcodec/huffyuvdec.c
@@ -36,6 +36,7 @@
 
 #include "avcodec.h"
 #include "bswapdsp.h"
+#include "bytestream.h"
 #include "codec_internal.h"
 #include "get_bits.h"
 #include "huffyuv.h"
@@ -70,8 +71,10 @@ typedef struct HYuvDecContext {
     int context;
     int last_slice_end;
 
-    uint8_t *temp[3];
-    uint16_t *temp16[3];                    ///< identical to temp but 16bit type
+    union {
+        uint8_t  *temp[3];
+        uint16_t *temp16[3];
+    };
     uint8_t len[4][MAX_VLC_N];
     uint32_t bits[4][MAX_VLC_N];
     uint32_t pix_bgr_map[1<<VLC_BITS];
@@ -84,21 +87,17 @@ typedef struct HYuvDecContext {
 } HYuvDecContext;
 
 
-#define classic_shift_luma_table_size 42
-static const unsigned char classic_shift_luma[classic_shift_luma_table_size + AV_INPUT_BUFFER_PADDING_SIZE] = {
+static const uint8_t classic_shift_luma[] = {
     34, 36, 35, 69, 135, 232,   9, 16, 10, 24,  11,  23,  12,  16, 13, 10,
     14,  8, 15,  8,  16,   8,  17, 20, 16, 10, 207, 206, 205, 236, 11,  8,
-    10, 21,  9, 23,   8,   8, 199, 70, 69, 68,   0,
-  0,0,0,0,0,0,0,0,
+    10, 21,  9, 23,   8,   8, 199, 70, 69, 68,
 };
 
-#define classic_shift_chroma_table_size 59
-static const unsigned char classic_shift_chroma[classic_shift_chroma_table_size + AV_INPUT_BUFFER_PADDING_SIZE] = {
+static const uint8_t classic_shift_chroma[] = {
     66, 36,  37,  38, 39, 40,  41,  75,  76,  77, 110, 239, 144, 81, 82,  83,
     84, 85, 118, 183, 56, 57,  88,  89,  56,  89, 154,  57,  58, 57, 26, 141,
     57, 56,  58,  57, 58, 57, 184, 119, 214, 245, 116,  83,  82, 49, 80,  79,
-    78, 77,  44,  75, 41, 40,  39,  38,  37,  36,  34,  0,
-  0,0,0,0,0,0,0,0,
+    78, 77,  44,  75, 41, 40,  39,  38,  37,  36,  34,
 };
 
 static const unsigned char classic_add_luma[256] = {
@@ -139,23 +138,30 @@ static const unsigned char classic_add_chroma[256] = {
       6,  12,   8,  10,   7,   9,   6,   4,   6,   2,   2,   3,   3,   3,   3,   2,
 };
 
-static int read_len_table(uint8_t *dst, GetBitContext *gb, int n)
+static int read_len_table(uint8_t *dst, GetByteContext *gb, int n)
 {
     int i, val, repeat;
 
     for (i = 0; i < n;) {
-        repeat = get_bits(gb, 3);
-        val    = get_bits(gb, 5);
-        if (repeat == 0)
-            repeat = get_bits(gb, 8);
-        if (i + repeat > n || get_bits_left(gb) < 0) {
-            av_log(NULL, AV_LOG_ERROR, "Error reading huffman table\n");
-            return AVERROR_INVALIDDATA;
+        if (bytestream2_get_bytes_left(gb) <= 0)
+            goto error;
+        repeat = bytestream2_peek_byteu(gb) >> 5;
+        val    = bytestream2_get_byteu(gb) & 0x1F;
+        if (repeat == 0) {
+            if (bytestream2_get_bytes_left(gb) <= 0)
+                goto error;
+            repeat = bytestream2_get_byteu(gb);
         }
+        if (i + repeat > n)
+            goto error;
         while (repeat--)
             dst[i++] = val;
     }
     return 0;
+
+error:
+    av_log(NULL, AV_LOG_ERROR, "Error reading huffman table\n");
+    return AVERROR_INVALIDDATA;
 }
 
 static int generate_joint_tables(HYuvDecContext *s)
@@ -251,12 +257,11 @@ out:
 
 static int read_huffman_tables(HYuvDecContext *s, const uint8_t *src, int length)
 {
-    GetBitContext gb;
+    GetByteContext gb;
     int i, ret;
     int count = 3;
 
-    if ((ret = init_get_bits(&gb, src, length * 8)) < 0)
-        return ret;
+    bytestream2_init(&gb, src, length);
 
     if (s->version > 2)
         count = 1 + s->alpha + 2*s->chroma;
@@ -275,23 +280,23 @@ static int read_huffman_tables(HYuvDecContext *s, const uint8_t *src, int length
     if ((ret = generate_joint_tables(s)) < 0)
         return ret;
 
-    return (get_bits_count(&gb) + 7) / 8;
+    return bytestream2_tell(&gb);
 }
 
 static int read_old_huffman_tables(HYuvDecContext *s)
 {
-    GetBitContext gb;
+    GetByteContext gb;
     int i, ret;
 
-    init_get_bits(&gb, classic_shift_luma,
-                  classic_shift_luma_table_size * 8);
-    if ((ret = read_len_table(s->len[0], &gb, 256)) < 0)
-        return ret;
+    bytestream2_init(&gb, classic_shift_luma,
+                     sizeof(classic_shift_luma));
+    ret = read_len_table(s->len[0], &gb, 256);
+    av_assert1(ret >= 0);
 
-    init_get_bits(&gb, classic_shift_chroma,
-                  classic_shift_chroma_table_size * 8);
-    if ((ret = read_len_table(s->len[1], &gb, 256)) < 0)
-        return ret;
+    bytestream2_init(&gb, classic_shift_chroma,
+                     sizeof(classic_shift_chroma));
+    ret = read_len_table(s->len[1], &gb, 256);
+    av_assert1(ret >= 0);
 
     for (i = 0; i < 256; i++)
         s->bits[0][i] = classic_add_luma[i];
@@ -323,7 +328,9 @@ static av_cold int decode_end(AVCodecContext *avctx)
     HYuvDecContext *s = avctx->priv_data;
     int i;
 
-    ff_huffyuv_common_end(s->temp, s->temp16);
+    for (int i = 0; i < 3; i++)
+        av_freep(&s->temp[i]);
+
     av_freep(&s->bitstream_buffer);
 
     for (i = 0; i < 8; i++)
@@ -346,7 +353,6 @@ static av_cold int decode_init(AVCodecContext *avctx)
     ff_bswapdsp_init(&s->bdsp);
     ff_huffyuvdsp_init(&s->hdsp, avctx->pix_fmt);
     ff_llviddsp_init(&s->llviddsp);
-    memset(s->vlc, 0, 4 * sizeof(VLC));
 
     s->interlaced = avctx->height > 288;
     s->bgr32      = 1;
@@ -600,8 +606,11 @@ static av_cold int decode_init(AVCodecContext *avctx)
         return AVERROR_INVALIDDATA;
     }
 
-    if ((ret = ff_huffyuv_alloc_temp(s->temp, s->temp16, avctx->width)) < 0)
-        return ret;
+    for (int i = 0; i < 3; i++) {
+        s->temp[i] = av_malloc(4 * avctx->width + 16);
+        if (!s->temp[i])
+            return AVERROR(ENOMEM);
+    }
 
     return 0;
 }
diff --git a/libavcodec/huffyuvenc.c b/libavcodec/huffyuvenc.c
index 0222565245..294d6ad41c 100644
--- a/libavcodec/huffyuvenc.c
+++ b/libavcodec/huffyuvenc.c
@@ -65,8 +65,10 @@ typedef struct HYuvEncContext {
     int context;
     int picture_number;
 
-    uint8_t *temp[3];
-    uint16_t *temp16[3];                    ///< identical to temp but 16bit type
+    union {
+        uint8_t  *temp[3];
+        uint16_t *temp16[3];
+    };
     uint64_t stats[4][MAX_VLC_N];
     uint8_t len[4][MAX_VLC_N];
     uint32_t bits[4][MAX_VLC_N];
@@ -230,9 +232,9 @@ static int store_huffman_tables(HYuvEncContext *s, uint8_t *buf)
         if ((ret = ff_huff_gen_len_table(s->len[i], s->stats[i], s->vlc_n, 0)) < 0)
             return ret;
 
-        if (ff_huffyuv_generate_bits_table(s->bits[i], s->len[i], s->vlc_n) < 0) {
-            return -1;
-        }
+        ret = ff_huffyuv_generate_bits_table(s->bits[i], s->len[i], s->vlc_n);
+        if (ret < 0)
+            return ret;
 
         size += store_table(s, s->len[i], buf + size);
     }
@@ -430,12 +432,14 @@ static av_cold int encode_init(AVCodecContext *avctx)
                 s->stats[i][j]= 0;
     }
 
-    ret = ff_huffyuv_alloc_temp(s->temp, s->temp16, avctx->width);
-    if (ret < 0)
-        return ret;
-
     s->picture_number=0;
 
+    for (int i = 0; i < 3; i++) {
+        s->temp[i] = av_malloc(4 * avctx->width + 16);
+        if (!s->temp[i])
+            return AVERROR(ENOMEM);
+    }
+
     return 0;
 }
 static int encode_422_bitstream(HYuvEncContext *s, int offset, int count)
@@ -495,7 +499,7 @@ static int encode_422_bitstream(HYuvEncContext *s, int offset, int count)
 
 static int encode_plane_bitstream(HYuvEncContext *s, int width, int plane)
 {
-    int i, count = width/2;
+    int count = width/2;
 
     if (put_bytes_left(&s->pb, 0) < count * s->bps / 2) {
         av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
@@ -542,112 +546,52 @@ static int encode_plane_bitstream(HYuvEncContext *s, int width, int plane)
             put_bits(&s->pb, s->len[plane][y1>>2], s->bits[plane][y1>>2]);\
             put_bits(&s->pb, 2, y1&3);
 
-    if (s->bps <= 8) {
-    if (s->flags & AV_CODEC_FLAG_PASS1) {
-        for (i = 0; i < count; i++) {
-            LOAD2;
-            STAT2;
-        }
-        if (width&1) {
-            LOADEND;
-            STATEND;
-        }
-    }
-    if (s->avctx->flags2 & AV_CODEC_FLAG2_NO_OUTPUT)
-        return 0;
+#define ENCODE_PLANE(LOAD, LOADEND, WRITE, WRITEEND, STAT, STATEND)   \
+do {                                                                  \
+    if (s->flags & AV_CODEC_FLAG_PASS1) {                             \
+        for (int i = 0; i < count; i++) {                             \
+            LOAD;                                                     \
+            STAT;                                                     \
+        }                                                             \
+        if (width & 1) {                                              \
+            LOADEND;                                                  \
+            STATEND;                                                  \
+        }                                                             \
+    }                                                                 \
+    if (s->avctx->flags2 & AV_CODEC_FLAG2_NO_OUTPUT)                  \
+        return 0;                                                     \
+                                                                      \
+    if (s->context) {                                                 \
+        for (int i = 0; i < count; i++) {                             \
+            LOAD;                                                     \
+            STAT;                                                     \
+            WRITE;                                                    \
+        }                                                             \
+        if (width & 1) {                                              \
+            LOADEND;                                                  \
+            STATEND;                                                  \
+            WRITEEND;                                                 \
+        }                                                             \
+    } else {                                                          \
+        for (int i = 0; i < count; i++) {                             \
+            LOAD;                                                     \
+            WRITE;                                                    \
+        }                                                             \
+        if (width & 1) {                                              \
+            LOADEND;                                                  \
+            WRITEEND;                                                 \
+        }                                                             \
+    }                                                                 \
+} while (0)
 
-    if (s->context) {
-        for (i = 0; i < count; i++) {
-            LOAD2;
-            STAT2;
-            WRITE2;
-        }
-        if (width&1) {
-            LOADEND;
-            STATEND;
-            WRITEEND;
-        }
-    } else {
-        for (i = 0; i < count; i++) {
-            LOAD2;
-            WRITE2;
-        }
-        if (width&1) {
-            LOADEND;
-            WRITEEND;
-        }
-    }
+    if (s->bps <= 8) {
+        ENCODE_PLANE(LOAD2, LOADEND, WRITE2, WRITEEND, STAT2, STATEND);
     } else if (s->bps <= 14) {
         int mask = s->n - 1;
-        if (s->flags & AV_CODEC_FLAG_PASS1) {
-            for (i = 0; i < count; i++) {
-                LOAD2_14;
-                STAT2;
-            }
-            if (width&1) {
-                LOADEND_14;
-                STATEND;
-            }
-        }
-        if (s->avctx->flags2 & AV_CODEC_FLAG2_NO_OUTPUT)
-            return 0;
-
-        if (s->context) {
-            for (i = 0; i < count; i++) {
-                LOAD2_14;
-                STAT2;
-                WRITE2;
-            }
-            if (width&1) {
-                LOADEND_14;
-                STATEND;
-                WRITEEND;
-            }
-        } else {
-            for (i = 0; i < count; i++) {
-                LOAD2_14;
-                WRITE2;
-            }
-            if (width&1) {
-                LOADEND_14;
-                WRITEEND;
-            }
-        }
+
+        ENCODE_PLANE(LOAD2_14, LOADEND_14, WRITE2, WRITEEND, STAT2, STATEND);
     } else {
-        if (s->flags & AV_CODEC_FLAG_PASS1) {
-            for (i = 0; i < count; i++) {
-                LOAD2_16;
-                STAT2_16;
-            }
-            if (width&1) {
-                LOADEND_16;
-                STATEND_16;
-            }
-        }
-        if (s->avctx->flags2 & AV_CODEC_FLAG2_NO_OUTPUT)
-            return 0;
-
-        if (s->context) {
-            for (i = 0; i < count; i++) {
-                LOAD2_16;
-                STAT2_16;
-                WRITE2_16;
-            }
-            if (width&1) {
-                LOADEND_16;
-                STATEND_16;
-                WRITEEND_16;
-            }
-        } else {
-            for (i = 0; i < count; i++) {
-                LOAD2_16;
-                WRITE2_16;
-            }
-            if (width&1) {
-                LOADEND_16;
-                WRITEEND_16;
-            }
-        }
+        ENCODE_PLANE(LOAD2_16, LOADEND_16, WRITE2_16, WRITEEND_16, STAT2_16, STATEND_16);
     }
 #undef LOAD2
 #undef STAT2
@@ -751,16 +695,15 @@ static inline int encode_bgra_bitstream(HYuvEncContext *s, int count, int planes
 }
 
 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
-                        const AVFrame *pict, int *got_packet)
+                        const AVFrame *p, int *got_packet)
 {
     HYuvEncContext *s = avctx->priv_data;
     const int width = avctx->width;
     const int width2 = avctx->width >> 1;
     const int height = avctx->height;
-    const int fake_ystride = s->interlaced ? pict->linesize[0]*2  : pict->linesize[0];
-    const int fake_ustride = s->interlaced ? pict->linesize[1]*2  : pict->linesize[1];
-    const int fake_vstride = s->interlaced ? pict->linesize[2]*2  : pict->linesize[2];
-    const AVFrame * const p = pict;
+    const int fake_ystride = (1 + s->interlaced) * p->linesize[0];
+    const int fake_ustride = (1 + s->interlaced) * p->linesize[1];
+    const int fake_vstride = (1 + s->interlaced) * p->linesize[2];
     int i, j, size = 0, ret;
 
     if ((ret = ff_alloc_packet(avctx, pkt, width * height * 3 * 4 + FF_INPUT_BUFFER_MIN_SIZE)) < 0)
@@ -1035,47 +978,35 @@ static av_cold int encode_end(AVCodecContext *avctx)
 {
     HYuvEncContext *s = avctx->priv_data;
 
-    ff_huffyuv_common_end(s->temp, s->temp16);
-
     av_freep(&avctx->stats_out);
 
+    for (int i = 0; i < 3; i++)
+        av_freep(&s->temp[i]);
+
     return 0;
 }
 
 #define OFFSET(x) offsetof(HYuvEncContext, x)
 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 
-#define COMMON_OPTIONS \
-    { "non_deterministic", "Allow multithreading for e.g. context=1 at the expense of determinism", \
-      OFFSET(non_determ), AV_OPT_TYPE_BOOL, { .i64 = 0 }, \
-      0, 1, VE }, \
-    { "pred", "Prediction method", OFFSET(predictor), AV_OPT_TYPE_INT, { .i64 = LEFT }, LEFT, MEDIAN, VE, .unit = "pred" }, \
-        { "left",   NULL, 0, AV_OPT_TYPE_CONST, { .i64 = LEFT },   INT_MIN, INT_MAX, VE, .unit = "pred" }, \
-        { "plane",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PLANE },  INT_MIN, INT_MAX, VE, .unit = "pred" }, \
-        { "median", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MEDIAN }, INT_MIN, INT_MAX, VE, .unit = "pred" }, \
-
-static const AVOption normal_options[] = {
-    COMMON_OPTIONS
-    { NULL },
-};
-
-static const AVOption ff_options[] = {
-    COMMON_OPTIONS
+static const AVOption options[] = {
+    /* ffvhuff-only options */
     { "context", "Set per-frame huffman tables", OFFSET(context), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
+    /* Common options */
+    { "non_deterministic", "Allow multithreading for e.g. context=1 at the expense of determinism",
+      OFFSET(non_determ), AV_OPT_TYPE_BOOL, { .i64 = 0 },
+      0, 1, VE },
+    { "pred", "Prediction method", OFFSET(predictor), AV_OPT_TYPE_INT, { .i64 = LEFT }, LEFT, MEDIAN, VE, .unit = "pred" },
+        { "left",   NULL, 0, AV_OPT_TYPE_CONST, { .i64 = LEFT },   INT_MIN, INT_MAX, VE, .unit = "pred" },
+        { "plane",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PLANE },  INT_MIN, INT_MAX, VE, .unit = "pred" },
+        { "median", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MEDIAN }, INT_MIN, INT_MAX, VE, .unit = "pred" },
     { NULL },
 };
 
 static const AVClass normal_class = {
     .class_name = "huffyuv",
     .item_name  = av_default_item_name,
-    .option     = normal_options,
-    .version    = LIBAVUTIL_VERSION_INT,
-};
-
-static const AVClass ff_class = {
-    .class_name = "ffvhuff",
-    .item_name  = av_default_item_name,
-    .option     = ff_options,
+    .option     = options + 1,
     .version    = LIBAVUTIL_VERSION_INT,
 };
 
@@ -1099,6 +1030,13 @@ const FFCodec ff_huffyuv_encoder = {
 };
 
 #if CONFIG_FFVHUFF_ENCODER
+static const AVClass ff_class = {
+    .class_name = "ffvhuff",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
 const FFCodec ff_ffvhuff_encoder = {
     .p.name         = "ffvhuff",
     CODEC_LONG_NAME("Huffyuv FFmpeg variant"),
diff --git a/libavcodec/intelh263dec.c b/libavcodec/intelh263dec.c
index f8eeb6b44e..5d34892ef7 100644
--- a/libavcodec/intelh263dec.c
+++ b/libavcodec/intelh263dec.c
@@ -19,12 +19,10 @@
  */
 
 #include "codec_internal.h"
-#include "mpegutils.h"
 #include "mpegvideo.h"
 #include "mpegvideodec.h"
 #include "h263data.h"
 #include "h263dec.h"
-#include "mpegvideodata.h"
 
 /* don't understand why they choose a different header ! */
 int ff_intel_h263_decode_picture_header(MpegEncContext *s)
diff --git a/libavcodec/internal.h b/libavcodec/internal.h
index b67c57afca..bc20a797ae 100644
--- a/libavcodec/internal.h
+++ b/libavcodec/internal.h
@@ -54,13 +54,6 @@ typedef struct AVCodecInternal {
     int is_copy;
 
     /**
-     * This field is set to 1 when frame threading is being used and the parent
-     * AVCodecContext of this AVCodecInternal is a worker-thread context (i.e.
-     * one of those actually doing the decoding), 0 otherwise.
-     */
-    int is_frame_mt;
-
-    /**
      * Audio encoders can set this flag during init to indicate that they
      * want the small last frame to be padded to a multiple of pad_samples.
      */
@@ -68,6 +61,8 @@ typedef struct AVCodecInternal {
 
     struct FramePool *pool;
 
+    struct FFRefStructPool *progress_frame_pool;
+
     void *thread_ctx;
 
     /**
@@ -128,11 +123,7 @@ typedef struct AVCodecInternal {
     void *hwaccel_priv_data;
 
     /**
-     * decoding: AVERROR_EOF has been returned from ff_decode_get_packet(); must
-     *           not be used by decoders that use the decode() callback, as they
-     *           do not call ff_decode_get_packet() directly.
-     *
-     * encoding: a flush frame has been submitted to avcodec_send_frame().
+     * checks API usage: after codec draining, flush is required to resume operation
      */
     int draining;
 
diff --git a/libavcodec/jni.c b/libavcodec/jni.c
index 1193c608c3..fcb4837413 100644
--- a/libavcodec/jni.c
+++ b/libavcodec/jni.c
@@ -84,11 +84,13 @@ void *av_jni_get_java_vm(void *log_ctx)
 int av_jni_set_android_app_ctx(void *app_ctx, void *log_ctx)
 {
 #if CONFIG_JNI
+    jobjectRefType type;
+
     JNIEnv *env = ff_jni_get_env(log_ctx);
     if (!env)
         return AVERROR(EINVAL);
 
-    jobjectRefType type = (*env)->GetObjectRefType(env, app_ctx);
+    type = (*env)->GetObjectRefType(env, app_ctx);
     if (type != JNIGlobalRefType) {
         av_log(log_ctx, AV_LOG_ERROR, "Application context must be passed as a global reference");
         return AVERROR(EINVAL);
diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c
index 28bf6be2fe..d15502a527 100644
--- a/libavcodec/jpeg2000dec.c
+++ b/libavcodec/jpeg2000dec.c
@@ -835,9 +835,6 @@ static int get_tlm(Jpeg2000DecoderContext *s, int n)
         case 2:
             bytestream2_get_be16(&s->g);
             break;
-        case 3:
-            bytestream2_get_be32(&s->g);
-            break;
         }
         if (SP == 0) {
             bytestream2_get_be16(&s->g);
@@ -2506,8 +2503,6 @@ static int jpeg2000_decode_frame(AVCodecContext *avctx, AVFrame *picture,
     /* get picture buffer */
     if ((ret = ff_thread_get_buffer(avctx, picture, 0)) < 0)
         goto end;
-    picture->pict_type = AV_PICTURE_TYPE_I;
-    picture->flags |= AV_FRAME_FLAG_KEY;
 
     if (ret = jpeg2000_read_bitstream_packets(s))
         goto end;
diff --git a/libavcodec/jvdec.c b/libavcodec/jvdec.c
index 13ede9068a..2b7c9f0d29 100644
--- a/libavcodec/jvdec.c
+++ b/libavcodec/jvdec.c
@@ -215,8 +215,6 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe,
     }
 
     if (video_size) {
-        s->frame->flags |= AV_FRAME_FLAG_KEY;
-        s->frame->pict_type           = AV_PICTURE_TYPE_I;
 #if FF_API_PALETTE_HAS_CHANGED
 FF_DISABLE_DEPRECATION_WARNINGS
         s->frame->palette_has_changed = s->palette_has_changed;
diff --git a/libavcodec/lagarith.c b/libavcodec/lagarith.c
index 75b557e518..0969448eda 100644
--- a/libavcodec/lagarith.c
+++ b/libavcodec/lagarith.c
@@ -570,9 +570,6 @@ static int lag_decode_frame(AVCodecContext *avctx, AVFrame *p,
     int i, j, planes = 3;
     int ret = 0;
 
-    p->flags |= AV_FRAME_FLAG_KEY;
-    p->pict_type = AV_PICTURE_TYPE_I;
-
     frametype = buf[0];
 
     offset_gu = AV_RL32(buf + 1);
diff --git a/libavcodec/lcldec.c b/libavcodec/lcldec.c
index b4304618e4..b439dbe25e 100644
--- a/libavcodec/lcldec.c
+++ b/libavcodec/lcldec.c
@@ -481,9 +481,6 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *frame,
         return AVERROR_INVALIDDATA;
     }
 
-    frame->flags |= AV_FRAME_FLAG_KEY;
-    frame->pict_type = AV_PICTURE_TYPE_I;
-
     *got_frame = 1;
 
     /* always report that the buffer was completely consumed */
diff --git a/libavcodec/leaddec.c b/libavcodec/leaddec.c
index 1406cb0014..947c7275be 100644
--- a/libavcodec/leaddec.c
+++ b/libavcodec/leaddec.c
@@ -182,9 +182,6 @@ static int lead_decode_frame(AVCodecContext *avctx, AVFrame * frame,
     if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
 
-    frame->flags |= AV_FRAME_FLAG_KEY;
-    frame->pict_type = AV_PICTURE_TYPE_I;
-
     av_fast_padded_malloc(&s->bitstream_buf, &s->bitstream_buf_size, avpkt->size - 8);
     if (!s->bitstream_buf)
         return AVERROR(ENOMEM);
diff --git a/libavcodec/libaomenc.c b/libavcodec/libaomenc.c
index d660afab4e..dec74ebecd 100644
--- a/libavcodec/libaomenc.c
+++ b/libavcodec/libaomenc.c
@@ -43,6 +43,7 @@
 #include "avcodec.h"
 #include "bsf.h"
 #include "codec_internal.h"
+#include "dovi_rpu.h"
 #include "encode.h"
 #include "internal.h"
 #include "libaom.h"
@@ -70,6 +71,7 @@ struct FrameListData {
 typedef struct AOMEncoderContext {
     AVClass *class;
     AVBSFContext *bsf;
+    DOVIContext dovi;
     struct aom_codec_ctx encoder;
     struct aom_image rawimg;
     struct aom_fixed_buf twopass_stats;
@@ -155,27 +157,14 @@ static const char *const ctlidstr[] = {
     [AV1E_SET_TILE_COLUMNS]     = "AV1E_SET_TILE_COLUMNS",
     [AV1E_SET_TILE_ROWS]        = "AV1E_SET_TILE_ROWS",
     [AV1E_SET_ENABLE_RESTORATION] = "AV1E_SET_ENABLE_RESTORATION",
-#ifdef AOM_CTRL_AV1E_SET_ROW_MT
     [AV1E_SET_ROW_MT]           = "AV1E_SET_ROW_MT",
-#endif
-#ifdef AOM_CTRL_AV1E_SET_DENOISE_NOISE_LEVEL
     [AV1E_SET_DENOISE_NOISE_LEVEL] =  "AV1E_SET_DENOISE_NOISE_LEVEL",
-#endif
-#ifdef AOM_CTRL_AV1E_SET_DENOISE_BLOCK_SIZE
     [AV1E_SET_DENOISE_BLOCK_SIZE] =   "AV1E_SET_DENOISE_BLOCK_SIZE",
-#endif
-#ifdef AOM_CTRL_AV1E_SET_MAX_REFERENCE_FRAMES
     [AV1E_SET_MAX_REFERENCE_FRAMES] = "AV1E_SET_MAX_REFERENCE_FRAMES",
-#endif
-#ifdef AOM_CTRL_AV1E_SET_ENABLE_GLOBAL_MOTION
     [AV1E_SET_ENABLE_GLOBAL_MOTION] = "AV1E_SET_ENABLE_GLOBAL_MOTION",
-#endif
-#ifdef AOM_CTRL_AV1E_SET_ENABLE_INTRABC
     [AV1E_SET_ENABLE_INTRABC]   = "AV1E_SET_ENABLE_INTRABC",
-#endif
     [AV1E_SET_ENABLE_CDEF]      = "AV1E_SET_ENABLE_CDEF",
     [AOME_SET_TUNING]           = "AOME_SET_TUNING",
-#if AOM_ENCODER_ABI_VERSION >= 22
     [AV1E_SET_ENABLE_1TO4_PARTITIONS] = "AV1E_SET_ENABLE_1TO4_PARTITIONS",
     [AV1E_SET_ENABLE_AB_PARTITIONS]   = "AV1E_SET_ENABLE_AB_PARTITIONS",
     [AV1E_SET_ENABLE_RECT_PARTITIONS] = "AV1E_SET_ENABLE_RECT_PARTITIONS",
@@ -204,13 +193,10 @@ static const char *const ctlidstr[] = {
     [AV1E_SET_REDUCED_REFERENCE_SET]    = "AV1E_SET_REDUCED_REFERENCE_SET",
     [AV1E_SET_ENABLE_SMOOTH_INTERINTRA] = "AV1E_SET_ENABLE_SMOOTH_INTERINTRA",
     [AV1E_SET_ENABLE_REF_FRAME_MVS]     = "AV1E_SET_ENABLE_REF_FRAME_MVS",
-#endif
 #ifdef AOM_CTRL_AV1E_GET_NUM_OPERATING_POINTS
     [AV1E_GET_NUM_OPERATING_POINTS]     = "AV1E_GET_NUM_OPERATING_POINTS",
 #endif
-#ifdef AOM_CTRL_AV1E_GET_SEQ_LEVEL_IDX
     [AV1E_GET_SEQ_LEVEL_IDX]            = "AV1E_GET_SEQ_LEVEL_IDX",
-#endif
 #ifdef AOM_CTRL_AV1E_GET_TARGET_SEQ_LEVEL_IDX
     [AV1E_GET_TARGET_SEQ_LEVEL_IDX]     = "AV1E_GET_TARGET_SEQ_LEVEL_IDX",
 #endif
@@ -433,10 +419,12 @@ static av_cold int aom_free(AVCodecContext *avctx)
 #endif
 
     aom_codec_destroy(&ctx->encoder);
+    aom_img_remove_metadata(&ctx->rawimg);
     av_freep(&ctx->twopass_stats.buf);
     av_freep(&avctx->stats_out);
     free_frame_list(ctx->coded_frame_list);
     av_bsf_free(&ctx->bsf);
+    ff_dovi_ctx_unref(&ctx->dovi);
     return 0;
 }
 
@@ -691,12 +679,8 @@ static av_cold int aom_init(AVCodecContext *avctx,
     AOMContext *ctx = avctx->priv_data;
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
     struct aom_codec_enc_cfg enccfg = { 0 };
-#ifdef AOM_FRAME_IS_INTRAONLY
     aom_codec_flags_t flags =
         (avctx->flags & AV_CODEC_FLAG_PSNR) ? AOM_CODEC_USE_PSNR : 0;
-#else
-    aom_codec_flags_t flags = 0;
-#endif
     AVCPBProperties *cpb_props;
     int res;
     aom_img_fmt_t img_fmt;
@@ -886,7 +870,6 @@ static av_cold int aom_init(AVCodecContext *avctx,
         codecctl_int(avctx, AV1E_SET_ENABLE_CDEF, ctx->enable_cdef);
     if (ctx->enable_restoration >= 0)
         codecctl_int(avctx, AV1E_SET_ENABLE_RESTORATION, ctx->enable_restoration);
-#if AOM_ENCODER_ABI_VERSION >= 22
     if (ctx->enable_rect_partitions >= 0)
         codecctl_int(avctx, AV1E_SET_ENABLE_RECT_PARTITIONS, ctx->enable_rect_partitions);
     if (ctx->enable_1to4_partitions >= 0)
@@ -943,7 +926,6 @@ static av_cold int aom_init(AVCodecContext *avctx,
         codecctl_int(avctx, AV1E_SET_ENABLE_ONESIDED_COMP, ctx->enable_onesided_comp);
     if (ctx->enable_smooth_interintra >= 0)
         codecctl_int(avctx, AV1E_SET_ENABLE_SMOOTH_INTERINTRA, ctx->enable_smooth_interintra);
-#endif
 
     codecctl_int(avctx, AOME_SET_STATIC_THRESHOLD, ctx->static_thresh);
     if (ctx->crf >= 0)
@@ -972,37 +954,25 @@ static av_cold int aom_init(AVCodecContext *avctx,
         codecctl_int(avctx, AV1E_SET_TILE_ROWS,    ctx->tile_rows_log2);
     }
 
-#ifdef AOM_CTRL_AV1E_SET_DENOISE_NOISE_LEVEL
     if (ctx->denoise_noise_level >= 0)
         codecctl_int(avctx, AV1E_SET_DENOISE_NOISE_LEVEL, ctx->denoise_noise_level);
-#endif
-#ifdef AOM_CTRL_AV1E_SET_DENOISE_BLOCK_SIZE
     if (ctx->denoise_block_size >= 0)
         codecctl_int(avctx, AV1E_SET_DENOISE_BLOCK_SIZE, ctx->denoise_block_size);
-#endif
-#ifdef AOM_CTRL_AV1E_SET_ENABLE_GLOBAL_MOTION
     if (ctx->enable_global_motion >= 0)
         codecctl_int(avctx, AV1E_SET_ENABLE_GLOBAL_MOTION, ctx->enable_global_motion);
-#endif
-#ifdef AOM_CTRL_AV1E_SET_MAX_REFERENCE_FRAMES
     if (avctx->refs >= 3) {
         codecctl_int(avctx, AV1E_SET_MAX_REFERENCE_FRAMES, avctx->refs);
     }
-#endif
-#ifdef AOM_CTRL_AV1E_SET_ROW_MT
     if (ctx->row_mt >= 0)
         codecctl_int(avctx, AV1E_SET_ROW_MT, ctx->row_mt);
-#endif
-#ifdef AOM_CTRL_AV1E_SET_ENABLE_INTRABC
     if (ctx->enable_intrabc >= 0)
         codecctl_int(avctx, AV1E_SET_ENABLE_INTRABC, ctx->enable_intrabc);
-#endif
 
 #if AOM_ENCODER_ABI_VERSION >= 23
     {
-        AVDictionaryEntry *en = NULL;
+        const AVDictionaryEntry *en = NULL;
 
-        while ((en = av_dict_get(ctx->aom_params, "", en, AV_DICT_IGNORE_SUFFIX))) {
+        while ((en = av_dict_iterate(ctx->aom_params, en))) {
             int ret = aom_codec_set_option(&ctx->encoder, en->key, en->value);
             if (ret != AOM_CODEC_OK) {
                 log_encoder_error(avctx, en->key);
@@ -1023,6 +993,10 @@ static av_cold int aom_init(AVCodecContext *avctx,
     if (!cpb_props)
         return AVERROR(ENOMEM);
 
+    ctx->dovi.logctx = avctx;
+    if ((res = ff_dovi_configure(&ctx->dovi, avctx)) < 0)
+        return res;
+
     if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
         const AVBitStreamFilter *filter = av_bsf_get_by_name("extract_extradata");
         int ret;
@@ -1065,7 +1039,6 @@ static inline void cx_pktcpy(AOMContext *ctx,
     dst->flags    = src->data.frame.flags;
     dst->sz       = src->data.frame.sz;
     dst->buf      = src->data.frame.buf;
-#ifdef AOM_FRAME_IS_INTRAONLY
     dst->frame_number = ++ctx->frame_number;
     dst->have_sse = ctx->have_sse;
     if (ctx->have_sse) {
@@ -1074,7 +1047,6 @@ static inline void cx_pktcpy(AOMContext *ctx,
         memcpy(dst->sse, ctx->sse, sizeof(dst->sse));
         ctx->have_sse = 0;
     }
-#endif
 }
 
 /**
@@ -1101,7 +1073,6 @@ static int storeframe(AVCodecContext *avctx, struct FrameListData *cx_frame,
 
     if (!!(cx_frame->flags & AOM_FRAME_IS_KEY)) {
         pkt->flags |= AV_PKT_FLAG_KEY;
-#ifdef AOM_FRAME_IS_INTRAONLY
         pict_type = AV_PICTURE_TYPE_I;
     } else if (cx_frame->flags & AOM_FRAME_IS_INTRAONLY) {
         pict_type = AV_PICTURE_TYPE_I;
@@ -1118,7 +1089,6 @@ static int storeframe(AVCodecContext *avctx, struct FrameListData *cx_frame,
             avctx->error[i] += cx_frame->sse[i + 1];
         }
         cx_frame->have_sse = 0;
-#endif
     }
 
     if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
@@ -1221,7 +1191,6 @@ static int queue_frames(AVCodecContext *avctx, AVPacket *pkt_out)
             stats->sz += pkt->data.twopass_stats.sz;
             break;
         }
-#ifdef AOM_FRAME_IS_INTRAONLY
         case AOM_CODEC_PSNR_PKT:
         {
             av_assert0(!ctx->have_sse);
@@ -1232,7 +1201,6 @@ static int queue_frames(AVCodecContext *avctx, AVPacket *pkt_out)
             ctx->have_sse = 1;
             break;
         }
-#endif
         case AOM_CODEC_CUSTOM_PKT:
             // ignore unsupported/unrecognized packet types
             break;
@@ -1282,6 +1250,7 @@ static int aom_encode(AVCodecContext *avctx, AVPacket *pkt,
     unsigned long duration = 0;
     int res, coded_size;
     aom_enc_frame_flags_t flags = 0;
+    AVFrameSideData *sd;
 
     if (frame) {
         rawimg                      = &ctx->rawimg;
@@ -1319,6 +1288,25 @@ FF_ENABLE_DEPRECATION_WARNINGS
             break;
         }
 
+        aom_img_remove_metadata(rawimg);
+        sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DOVI_METADATA);
+        if (ctx->dovi.cfg.dv_profile && sd) {
+            const AVDOVIMetadata *metadata = (const AVDOVIMetadata *)sd->data;
+            uint8_t *t35;
+            int size;
+            if ((res = ff_dovi_rpu_generate(&ctx->dovi, metadata, &t35, &size)) < 0)
+                return res;
+            res = aom_img_add_metadata(rawimg, OBU_METADATA_TYPE_ITUT_T35,
+                                       t35, size, AOM_MIF_ANY_FRAME);
+            av_free(t35);
+            if (res != AOM_CODEC_OK)
+                return AVERROR(ENOMEM);
+        } else if (ctx->dovi.cfg.dv_profile) {
+            av_log(avctx, AV_LOG_ERROR, "Dolby Vision enabled, but received frame "
+                   "without AV_FRAME_DATA_DOVI_METADATA\n");
+            return AVERROR_INVALIDDATA;
+        }
+
         if (frame->pict_type == AV_PICTURE_TYPE_I)
             flags |= AOM_EFLAG_FORCE_KF;
     }
@@ -1448,9 +1436,6 @@ static av_cold void av1_init_static(FFCodec *codec)
     else
         codec->p.pix_fmts = supports_monochrome ? av1_pix_fmts_with_gray :
                                                   av1_pix_fmts;
-
-    if (aom_codec_version_major() < 2)
-        codec->p.capabilities |= AV_CODEC_CAP_EXPERIMENTAL;
 }
 
 static av_cold int av1_init(AVCodecContext *avctx)
@@ -1502,6 +1487,8 @@ static const AVOption options[] = {
     { "ssim",            NULL,         0, AV_OPT_TYPE_CONST, {.i64 = AOM_TUNE_SSIM}, 0, 0, VE, .unit = "tune"},
     FF_AV1_PROFILE_OPTS
     { "still-picture", "Encode in single frame mode (typically used for still AVIF images).", OFFSET(still_picture), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, VE },
+    { "dolbyvision",     "Enable Dolby Vision RPU coding", OFFSET(dovi.enable), AV_OPT_TYPE_BOOL, {.i64 = FF_DOVI_AUTOMATIC }, -1, 1, VE, .unit = "dovi" },
+    {   "auto", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_DOVI_AUTOMATIC}, .flags = VE, .unit = "dovi" },
     { "enable-rect-partitions", "Enable rectangular partitions", OFFSET(enable_rect_partitions), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, VE},
     { "enable-1to4-partitions", "Enable 1:4/4:1 partitions",     OFFSET(enable_1to4_partitions), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, VE},
     { "enable-ab-partitions",   "Enable ab shape partitions",    OFFSET(enable_ab_partitions),   AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, VE},
diff --git a/libavcodec/libdav1d.c b/libavcodec/libdav1d.c
index f022a4ad05..09fe767fb8 100644
--- a/libavcodec/libdav1d.c
+++ b/libavcodec/libdav1d.c
@@ -290,10 +290,10 @@ static av_cold int libdav1d_init(AVCodecContext *c)
 #endif
 
     dav1d->dovi.logctx = c;
-    dav1d->dovi.dv_profile = 10; // default for AV1
+    dav1d->dovi.cfg.dv_profile = 10; // default for AV1
     sd = ff_get_coded_side_data(c, AV_PKT_DATA_DOVI_CONF);
-    if (sd && sd->size > 0)
-        ff_dovi_update_cfg(&dav1d->dovi, (AVDOVIDecoderConfigurationRecord *) sd->data);
+    if (sd && sd->size >= sizeof(dav1d->dovi.cfg))
+        dav1d->dovi.cfg = *(AVDOVIDecoderConfigurationRecord *) sd->data;
     return 0;
 }
 
@@ -385,7 +385,7 @@ static int libdav1d_receive_frame(AVCodecContext *c, AVFrame *frame)
 {
     Libdav1dContext *dav1d = c->priv_data;
     Dav1dPicture pic = { 0 }, *p = &pic;
-    AVPacket *pkt;
+    const AVPacket *pkt;
 #if FF_DAV1D_VERSION_AT_LEAST(5,1)
     enum Dav1dEventFlags event_flags = 0;
 #endif
@@ -439,7 +439,7 @@ static int libdav1d_receive_frame(AVCodecContext *c, AVFrame *frame)
               INT_MAX);
     ff_set_sar(c, frame->sample_aspect_ratio);
 
-    pkt = (AVPacket *)p->m.user_data.data;
+    pkt = (const AVPacket *)p->m.user_data.data;
 
     // match timestamps and packet size
     res = ff_decode_frame_props_from_pkt(c, frame, pkt);
diff --git a/libavcodec/libkvazaar.c b/libavcodec/libkvazaar.c
index 0711d9ab38..cd731ae9d0 100644
--- a/libavcodec/libkvazaar.c
+++ b/libavcodec/libkvazaar.c
@@ -111,8 +111,8 @@ FF_ENABLE_DEPRECATION_WARNINGS
     if (ctx->kvz_params) {
         AVDictionary *dict = NULL;
         if (!av_dict_parse_string(&dict, ctx->kvz_params, "=", ",", 0)) {
-            AVDictionaryEntry *entry = NULL;
-            while ((entry = av_dict_get(dict, "", entry, AV_DICT_IGNORE_SUFFIX))) {
+            const AVDictionaryEntry *entry = NULL;
+            while ((entry = av_dict_iterate(dict, entry))) {
                 if (!api->config_parse(cfg, entry->key, entry->value)) {
                     av_log(avctx, AV_LOG_WARNING, "Invalid option: %s=%s.\n",
                            entry->key, entry->value);
diff --git a/libavcodec/liblc3dec.c b/libavcodec/liblc3dec.c
index 90da28679b..d250ace38a 100644
--- a/libavcodec/liblc3dec.c
+++ b/libavcodec/liblc3dec.c
@@ -34,7 +34,6 @@ typedef struct LibLC3DecContext {
     int frame_us, srate_hz, hr_mode;
     void *decoder_mem;
     lc3_decoder_t decoder[DECODER_MAX_CHANNELS];
-    int64_t length;
 } LibLC3DecContext;
 
 static av_cold int liblc3_decode_init(AVCodecContext *avctx)
@@ -44,12 +43,12 @@ static av_cold int liblc3_decode_init(AVCodecContext *avctx)
     int ep_mode;
     unsigned decoder_size;
 
-    if (avctx->extradata_size < 10)
+    if (avctx->extradata_size < 6)
         return AVERROR_INVALIDDATA;
     if (channels < 0 || channels > DECODER_MAX_CHANNELS) {
         av_log(avctx, AV_LOG_ERROR,
                "Invalid number of channels %d. Max %d channels are accepted\n",
-               channels, DECODER_MAX_CHANNES);
+               channels, DECODER_MAX_CHANNELS);
         return AVERROR(EINVAL);
     }
 
@@ -57,7 +56,6 @@ static av_cold int liblc3_decode_init(AVCodecContext *avctx)
     liblc3->srate_hz = avctx->sample_rate;
     ep_mode          = AV_RL16(avctx->extradata + 2);
     liblc3->hr_mode  = AV_RL16(avctx->extradata + 4);
-    liblc3->length   = AV_RL32(avctx->extradata + 6);
     if (ep_mode != 0) {
         av_log(avctx, AV_LOG_ERROR,
                "Error protection mode is not supported.\n");
@@ -126,11 +124,7 @@ static int liblc3_decode(AVCodecContext *avctx, AVFrame *frame,
         in += nbytes;
     }
 
-    if (liblc3->length > 0) {
-        int64_t end_pts = liblc3->length + avctx->delay;
-        frame->nb_samples = FFMIN(frame->nb_samples,
-                                  FFMAX(end_pts - frame->pts, 0));
-    }
+    frame->nb_samples = FFMIN(frame->nb_samples, avpkt->duration);
 
     *got_frame_ptr = 1;
 
diff --git a/libavcodec/libsvtav1.c b/libavcodec/libsvtav1.c
index 105c3369c0..2fef8c8971 100644
--- a/libavcodec/libsvtav1.c
+++ b/libavcodec/libsvtav1.c
@@ -23,6 +23,7 @@
 #include <stdint.h>
 #include <EbSvtAv1ErrorCodes.h>
 #include <EbSvtAv1Enc.h>
+#include <EbSvtAv1Metadata.h>
 
 #include "libavutil/common.h"
 #include "libavutil/frame.h"
@@ -35,6 +36,7 @@
 #include "libavutil/avassert.h"
 
 #include "codec_internal.h"
+#include "dovi_rpu.h"
 #include "encode.h"
 #include "packet_internal.h"
 #include "avcodec.h"
@@ -62,6 +64,8 @@ typedef struct SvtContext {
 
     EOS_STATUS eos_flag;
 
+    DOVIContext dovi;
+
     // User options.
     AVDictionary *svtav1_opts;
     int enc_mode;
@@ -206,7 +210,7 @@ static int config_enc_params(EbSvtAv1EncConfiguration *param,
 {
     SvtContext *svt_enc = avctx->priv_data;
     const AVPixFmtDescriptor *desc;
-    AVDictionaryEntry *en = NULL;
+    const AVDictionaryEntry av_unused *en = NULL;
 
     // Update param from options
     if (svt_enc->enc_mode >= -1)
@@ -322,7 +326,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
     handle_side_data(avctx, param);
 
 #if SVT_AV1_CHECK_VERSION(0, 9, 1)
-    while ((en = av_dict_get(svt_enc->svtav1_opts, "", en, AV_DICT_IGNORE_SUFFIX))) {
+    while ((en = av_dict_iterate(svt_enc->svtav1_opts, en))) {
         EbErrorType ret = svt_av1_enc_parse_parameter(param, en->key, en->value);
         if (ret != EB_ErrorNone) {
             int level = (avctx->err_recognition & AV_EF_EXPLODE) ? AV_LOG_ERROR : AV_LOG_WARNING;
@@ -332,7 +336,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
         }
     }
 #else
-    if ((en = av_dict_get(svt_enc->svtav1_opts, "", NULL, AV_DICT_IGNORE_SUFFIX))) {
+    if (av_dict_count(svt_enc->svtav1_opts)) {
         int level = (avctx->err_recognition & AV_EF_EXPLODE) ? AV_LOG_ERROR : AV_LOG_WARNING;
         av_log(avctx, level, "svt-params needs libavcodec to be compiled with SVT-AV1 "
                              "headers >= 0.9.1.\n");
@@ -418,6 +422,7 @@ static int read_in_data(EbSvtAv1EncConfiguration *param, const AVFrame *frame,
     in_data->cr_stride = AV_CEIL_RSHIFT(frame->linesize[2], bytes_shift);
 
     header_ptr->n_filled_len = frame_size;
+    svt_metadata_array_free(&header_ptr->metadata);
 
     return 0;
 }
@@ -451,6 +456,11 @@ static av_cold int eb_enc_init(AVCodecContext *avctx)
         return svt_print_error(avctx, svt_ret, "Error initializing encoder");
     }
 
+    svt_enc->dovi.logctx = avctx;
+    ret = ff_dovi_configure(&svt_enc->dovi, avctx);
+    if (ret < 0)
+        return ret;
+
     if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
         EbBufferHeaderType *headerPtr = NULL;
 
@@ -486,6 +496,8 @@ static int eb_send_frame(AVCodecContext *avctx, const AVFrame *frame)
 {
     SvtContext           *svt_enc = avctx->priv_data;
     EbBufferHeaderType  *headerPtr = svt_enc->in_buf;
+    AVFrameSideData *sd;
+    EbErrorType svt_ret;
     int ret;
 
     if (!frame) {
@@ -524,7 +536,27 @@ static int eb_send_frame(AVCodecContext *avctx, const AVFrame *frame)
     if (avctx->gop_size == 1)
         headerPtr->pic_type = EB_AV1_KEY_PICTURE;
 
-    svt_av1_enc_send_picture(svt_enc->svt_handle, headerPtr);
+    sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DOVI_METADATA);
+    if (svt_enc->dovi.cfg.dv_profile && sd) {
+        const AVDOVIMetadata *metadata = (const AVDOVIMetadata *)sd->data;
+        uint8_t *t35;
+        int size;
+        if ((ret = ff_dovi_rpu_generate(&svt_enc->dovi, metadata, &t35, &size)) < 0)
+            return ret;
+        ret = svt_add_metadata(headerPtr, EB_AV1_METADATA_TYPE_ITUT_T35, t35, size);
+        av_free(t35);
+        if (ret < 0)
+            return AVERROR(ENOMEM);
+    } else if (svt_enc->dovi.cfg.dv_profile) {
+        av_log(avctx, AV_LOG_ERROR, "Dolby Vision enabled, but received frame "
+               "without AV_FRAME_DATA_DOVI_METADATA\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+
+    svt_ret = svt_av1_enc_send_picture(svt_enc->svt_handle, headerPtr);
+    if (svt_ret != EB_ErrorNone)
+        return svt_print_error(avctx, svt_ret, "Error sending a frame to encoder");
 
     return 0;
 }
@@ -579,6 +611,8 @@ static int eb_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
     svt_ret = svt_av1_enc_get_packet(svt_enc->svt_handle, &headerPtr, svt_enc->eos_flag);
     if (svt_ret == EB_NoErrorEmptyQueue)
         return AVERROR(EAGAIN);
+    else if (svt_ret != EB_ErrorNone)
+        return svt_print_error(avctx, svt_ret, "Error getting an output packet from encoder");
 
 #if SVT_AV1_CHECK_VERSION(2, 0, 0)
     if (headerPtr->flags & EB_BUFFERFLAG_EOS) {
@@ -644,11 +678,13 @@ static av_cold int eb_enc_close(AVCodecContext *avctx)
     }
     if (svt_enc->in_buf) {
         av_free(svt_enc->in_buf->p_buffer);
+        svt_metadata_array_free(&svt_enc->in_buf->metadata);
         av_freep(&svt_enc->in_buf);
     }
 
     av_buffer_pool_uninit(&svt_enc->pool);
     av_frame_free(&svt_enc->frame);
+    ff_dovi_ctx_unref(&svt_enc->dovi);
 
     return 0;
 }
@@ -695,6 +731,9 @@ static const AVOption options[] = {
       AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 63, VE },
     { "svtav1-params", "Set the SVT-AV1 configuration using a :-separated list of key=value parameters", OFFSET(svtav1_opts), AV_OPT_TYPE_DICT, { 0 }, 0, 0, VE },
 
+    { "dolbyvision", "Enable Dolby Vision RPU coding", OFFSET(dovi.enable), AV_OPT_TYPE_BOOL, {.i64 = FF_DOVI_AUTOMATIC }, -1, 1, VE, .unit = "dovi" },
+    {   "auto", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_DOVI_AUTOMATIC}, .flags = VE, .unit = "dovi" },
+
     {NULL},
 };
 
diff --git a/libavcodec/libvpxenc.c b/libavcodec/libvpxenc.c
index bcbdc4981e..5c7b6e9de7 100644
--- a/libavcodec/libvpxenc.c
+++ b/libavcodec/libvpxenc.c
@@ -684,7 +684,7 @@ static int vpx_ts_param_parse(VPxContext *ctx, struct vpx_codec_enc_cfg *enccfg,
         vp8_ts_parse_int_array(enccfg->ts_layer_id, value, value_len, VPX_TS_MAX_PERIODICITY);
     } else if (!strcmp(key, "ts_layering_mode")) {
         /* option for pre-defined temporal structures in function set_temporal_layer_pattern. */
-        ts_layering_mode = strtoul(value, &value, 4);
+        ts_layering_mode = strtoul(value, &value, 10);
     }
 
 #if (VPX_ENCODER_ABI_VERSION >= 12) && CONFIG_LIBVPX_VP9_ENCODER
diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c
index eadb20d2b3..29d1a7ccbc 100644
--- a/libavcodec/libx264.c
+++ b/libavcodec/libx264.c
@@ -270,11 +270,9 @@ static void reconfig_encoder(AVCodecContext *ctx, const AVFrame *frame)
         case AV_STEREO3D_FRAMESEQUENCE:
             fpa_type = 5;
             break;
-#if X264_BUILD >= 145
         case AV_STEREO3D_2D:
             fpa_type = 6;
             break;
-#endif
         default:
             fpa_type = -1;
             break;
@@ -394,14 +392,14 @@ static int setup_mb_info(AVCodecContext *ctx, x264_picture_t *pic,
     return 0;
 }
 
-static int setup_roi(AVCodecContext *ctx, x264_picture_t *pic, int bit_depth,
+static int setup_roi(AVCodecContext *ctx, x264_picture_t *pic,
                      const AVFrame *frame, const uint8_t *data, size_t size)
 {
     X264Context *x4 = ctx->priv_data;
 
     int mbx = (frame->width + MB_SIZE - 1) / MB_SIZE;
     int mby = (frame->height + MB_SIZE - 1) / MB_SIZE;
-    int qp_range = 51 + 6 * (bit_depth - 8);
+    int qp_range = 51 + 6 * (x4->params.i_bitdepth - 8);
     int nb_rois;
     const AVRegionOfInterest *roi;
     uint32_t roi_size;
@@ -476,7 +474,7 @@ static int setup_frame(AVCodecContext *ctx, const AVFrame *frame,
     x264_sei_t     *sei = &pic->extra_sei;
     unsigned int sei_data_size = 0;
     int64_t wallclock = 0;
-    int bit_depth, ret;
+    int ret;
     AVFrameSideData *sd;
     AVFrameSideData *mbinfo_sd;
 
@@ -486,12 +484,7 @@ static int setup_frame(AVCodecContext *ctx, const AVFrame *frame,
 
     x264_picture_init(pic);
     pic->img.i_csp   = x4->params.i_csp;
-#if X264_BUILD >= 153
-    bit_depth = x4->params.i_bitdepth;
-#else
-    bit_depth = x264_bit_depth;
-#endif
-    if (bit_depth > 8)
+    if (x4->params.i_bitdepth > 8)
         pic->img.i_csp |= X264_CSP_HIGH_DEPTH;
     pic->img.i_plane = av_pix_fmt_count_planes(ctx->pix_fmt);
 
@@ -564,7 +557,7 @@ static int setup_frame(AVCodecContext *ctx, const AVFrame *frame,
 
     sd = av_frame_get_side_data(frame, AV_FRAME_DATA_REGIONS_OF_INTEREST);
     if (sd) {
-        ret = setup_roi(ctx, pic, bit_depth, frame, sd->data, sd->size);
+        ret = setup_roi(ctx, pic, frame, sd->data, sd->size);
         if (ret < 0)
             goto fail;
     }
@@ -1109,9 +1102,7 @@ static av_cold int X264_init(AVCodecContext *avctx)
     x4->params.p_log_private        = avctx;
     x4->params.i_log_level          = X264_LOG_DEBUG;
     x4->params.i_csp                = convert_pix_fmt(avctx->pix_fmt);
-#if X264_BUILD >= 153
     x4->params.i_bitdepth           = av_pix_fmt_desc_get(avctx->pix_fmt)->comp[0].depth;
-#endif
 
     PARSE_X264_OPT("weightp", wpredp);
 
@@ -1180,11 +1171,10 @@ static av_cold int X264_init(AVCodecContext *avctx)
     else if (x4->params.i_level_idc > 0) {
         int i;
         int mbn = AV_CEIL_RSHIFT(avctx->width, 4) * AV_CEIL_RSHIFT(avctx->height, 4);
-        int scale = X264_BUILD < 129 ? 384 : 1;
 
         for (i = 0; i<x264_levels[i].level_idc; i++)
             if (x264_levels[i].level_idc == x4->params.i_level_idc)
-                x4->params.i_frame_reference = av_clip(x264_levels[i].dpb / mbn / scale, 1, x4->params.i_frame_reference);
+                x4->params.i_frame_reference = av_clip(x264_levels[i].dpb / mbn, 1, x4->params.i_frame_reference);
     }
 
     if (avctx->trellis >= 0)
@@ -1228,12 +1218,7 @@ static av_cold int X264_init(AVCodecContext *avctx)
         x4->params.b_vfr_input = 0;
     }
     if (x4->avcintra_class >= 0)
-#if X264_BUILD >= 142
         x4->params.i_avcintra_class = x4->avcintra_class;
-#else
-        av_log(avctx, AV_LOG_ERROR,
-               "x264 too old for AVC Intra, at least version 142 needed\n");
-#endif
 
     if (x4->avcintra_class > 200) {
 #if X264_BUILD < 164
@@ -1395,15 +1380,13 @@ FF_ENABLE_DEPRECATION_WARNINGS
         }
     }
 
-#if X264_BUILD >= 142
     /* Separate headers not supported in AVC-Intra mode */
     if (x4->avcintra_class >= 0)
         x4->params.b_repeat_headers = 1;
-#endif
 
     {
-        AVDictionaryEntry *en = NULL;
-        while (en = av_dict_get(x4->x264_params, "", en, AV_DICT_IGNORE_SUFFIX)) {
+        const AVDictionaryEntry *en = NULL;
+        while (en = av_dict_iterate(x4->x264_params, en)) {
            if ((ret = x264_param_parse(&x4->params, en->key, en->value)) < 0) {
                av_log(avctx, AV_LOG_WARNING,
                       "Error parsing option '%s = %s'.\n",
@@ -1513,18 +1496,6 @@ static const enum AVPixelFormat pix_fmts_8bit_rgb[] = {
 };
 #endif
 
-#if X264_BUILD < 153
-static av_cold void X264_init_static(FFCodec *codec)
-{
-    if (x264_bit_depth == 8)
-        codec->p.pix_fmts = pix_fmts_8bit;
-    else if (x264_bit_depth == 9)
-        codec->p.pix_fmts = pix_fmts_9bit;
-    else if (x264_bit_depth == 10)
-        codec->p.pix_fmts = pix_fmts_10bit;
-}
-#endif
-
 #define OFFSET(x) offsetof(X264Context, x)
 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 static const AVOption options[] = {
@@ -1544,9 +1515,7 @@ static const AVOption options[] = {
     { "none",          NULL,                              0, AV_OPT_TYPE_CONST, {.i64 = X264_AQ_NONE},         INT_MIN, INT_MAX, VE, .unit = "aq_mode" },
     { "variance",      "Variance AQ (complexity mask)",   0, AV_OPT_TYPE_CONST, {.i64 = X264_AQ_VARIANCE},     INT_MIN, INT_MAX, VE, .unit = "aq_mode" },
     { "autovariance",  "Auto-variance AQ",                0, AV_OPT_TYPE_CONST, {.i64 = X264_AQ_AUTOVARIANCE}, INT_MIN, INT_MAX, VE, .unit = "aq_mode" },
-#if X264_BUILD >= 144
     { "autovariance-biased", "Auto-variance AQ with bias to dark scenes", 0, AV_OPT_TYPE_CONST, {.i64 = X264_AQ_AUTOVARIANCE_BIASED}, INT_MIN, INT_MAX, VE, .unit = "aq_mode" },
-#endif
     { "aq-strength",   "AQ strength. Reduces blocking and blurring in flat and textured areas.", OFFSET(aq_strength), AV_OPT_TYPE_FLOAT, {.dbl = -1}, -1, FLT_MAX, VE},
     { "psy",           "Use psychovisual optimizations.",                 OFFSET(psy),           AV_OPT_TYPE_BOOL,   { .i64 = -1 }, -1, 1, VE },
     { "psy-rd",        "Strength of psychovisual optimization, in <psy-rd>:<psy-trellis> format.", OFFSET(psy_rd), AV_OPT_TYPE_STRING,  {0 }, 0, 0, VE},
@@ -1644,10 +1613,7 @@ static const AVClass x264_class = {
     .version    = LIBAVUTIL_VERSION_INT,
 };
 
-#if X264_BUILD >= 153
-const
-#endif
-FFCodec ff_libx264_encoder = {
+const FFCodec ff_libx264_encoder = {
     .p.name           = "libx264",
     CODEC_LONG_NAME("libx264 H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
     .p.type           = AVMEDIA_TYPE_VIDEO,
@@ -1665,11 +1631,7 @@ FFCodec ff_libx264_encoder = {
     .flush            = X264_flush,
     .close            = X264_close,
     .defaults         = x264_defaults,
-#if X264_BUILD < 153
-    .init_static_data = X264_init_static,
-#else
     .p.pix_fmts       = pix_fmts_all,
-#endif
     .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP | FF_CODEC_CAP_AUTO_THREADS
 #if X264_BUILD < 158
                       | FF_CODEC_CAP_NOT_INIT_THREADSAFE
diff --git a/libavcodec/libx265.c b/libavcodec/libx265.c
index 0645cd2045..ac1dbc4f97 100644
--- a/libavcodec/libx265.c
+++ b/libavcodec/libx265.c
@@ -36,6 +36,7 @@
 #include "libavutil/pixdesc.h"
 #include "avcodec.h"
 #include "codec_internal.h"
+#include "dovi_rpu.h"
 #include "encode.h"
 #include "packet_internal.h"
 #include "atsc_a53.h"
@@ -78,6 +79,8 @@ typedef struct libx265Context {
      * encounter a frame with ROI side data.
      */
     int roi_warned;
+
+    DOVIContext dovi;
 } libx265Context;
 
 static int is_keyframe(NalUnitType naltype)
@@ -143,6 +146,8 @@ static av_cold int libx265_encode_close(AVCodecContext *avctx)
     if (ctx->encoder)
         ctx->api->encoder_close(ctx->encoder);
 
+    ff_dovi_ctx_unref(&ctx->dovi);
+
     return 0;
 }
 
@@ -490,8 +495,8 @@ FF_ENABLE_DEPRECATION_WARNINGS
     }
 
     {
-        AVDictionaryEntry *en = NULL;
-        while ((en = av_dict_get(ctx->x265_opts, "", en, AV_DICT_IGNORE_SUFFIX))) {
+        const AVDictionaryEntry *en = NULL;
+        while ((en = av_dict_iterate(ctx->x265_opts, en))) {
             int parse_ret = ctx->api->param_parse(ctx->params, en->key, en->value);
 
             switch (parse_ret) {
@@ -526,6 +531,14 @@ FF_ENABLE_DEPRECATION_WARNINGS
         }
     }
 
+#if X265_BUILD >= 167
+    ctx->dovi.logctx = avctx;
+    if ((ret = ff_dovi_configure(&ctx->dovi, avctx)) < 0)
+        return ret;
+    ctx->params->dolbyProfile = ctx->dovi.cfg.dv_profile * 10 +
+                                ctx->dovi.cfg.dv_bl_signal_compatibility_id;
+#endif
+
     ctx->encoder = ctx->api->encoder_open(ctx->params);
     if (!ctx->encoder) {
         av_log(avctx, AV_LOG_ERROR, "Cannot open libx265 encoder.\n");
@@ -629,6 +642,10 @@ static void free_picture(libx265Context *ctx, x265_picture *pic)
     for (int i = 0; i < sei->numPayloads; i++)
         av_free(sei->payloads[i].payload);
 
+#if X265_BUILD >= 167
+    av_free(pic->rpu.payload);
+#endif
+
     if (pic->userData) {
         int idx = (int)(intptr_t)pic->userData - 1;
         rd_release(ctx, idx);
@@ -660,6 +677,7 @@ static int libx265_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     sei->numPayloads = 0;
 
     if (pic) {
+        AVFrameSideData *sd;
         ReorderedData *rd;
         int rd_idx;
 
@@ -760,6 +778,24 @@ static int libx265_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
                 sei->numPayloads++;
             }
         }
+
+#if X265_BUILD >= 167
+        sd = av_frame_get_side_data(pic, AV_FRAME_DATA_DOVI_METADATA);
+        if (ctx->dovi.cfg.dv_profile && sd) {
+            const AVDOVIMetadata *metadata = (const AVDOVIMetadata *)sd->data;
+            ret = ff_dovi_rpu_generate(&ctx->dovi, metadata, &x265pic.rpu.payload,
+                                       &x265pic.rpu.payloadSize);
+            if (ret < 0) {
+                free_picture(ctx, &x265pic);
+                return ret;
+            }
+        } else if (ctx->dovi.cfg.dv_profile) {
+            av_log(avctx, AV_LOG_ERROR, "Dolby Vision enabled, but received frame "
+                   "without AV_FRAME_DATA_DOVI_METADATA");
+            free_picture(ctx, &x265pic);
+            return AVERROR_INVALIDDATA;
+        }
+#endif
     }
 
     ret = ctx->api->encoder_encode(ctx->encoder, &nal, &nnal,
@@ -914,6 +950,10 @@ static const AVOption options[] = {
     { "udu_sei",     "Use user data unregistered SEI if available",                                 OFFSET(udu_sei),   AV_OPT_TYPE_BOOL,   { .i64 = 0 }, 0, 1, VE },
     { "a53cc",       "Use A53 Closed Captions (if available)",                                      OFFSET(a53_cc),    AV_OPT_TYPE_BOOL,   { .i64 = 1 }, 0, 1, VE },
     { "x265-params", "set the x265 configuration using a :-separated list of key=value parameters", OFFSET(x265_opts), AV_OPT_TYPE_DICT,   { 0 }, 0, 0, VE },
+#if X265_BUILD >= 167
+    { "dolbyvision", "Enable Dolby Vision RPU coding", OFFSET(dovi.enable), AV_OPT_TYPE_BOOL, {.i64 = FF_DOVI_AUTOMATIC }, -1, 1, VE, .unit = "dovi" },
+    {   "auto", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_DOVI_AUTOMATIC}, .flags = VE, .unit = "dovi" },
+#endif
     { NULL }
 };
 
diff --git a/libavcodec/libxevd.c b/libavcodec/libxevd.c
index c6c7327e65..520fdab7d8 100644
--- a/libavcodec/libxevd.c
+++ b/libavcodec/libxevd.c
@@ -170,14 +170,12 @@ static int export_stream_params(const XevdContext *xectx, AVCodecContext *avctx)
     }
 
     // the function returns sps->num_reorder_pics
-    ret = xevd_config(xectx->id, XEVD_CFG_GET_MAX_CODING_DELAY, &avctx->max_b_frames, &size);
+    ret = xevd_config(xectx->id, XEVD_CFG_GET_MAX_CODING_DELAY, &avctx->has_b_frames, &size);
     if (XEVD_FAILED(ret)) {
         av_log(avctx, AV_LOG_ERROR, "Failed to get max_coding_delay\n");
         return AVERROR_EXTERNAL;
     }
 
-    avctx->has_b_frames = (avctx->max_b_frames) ? 1 : 0;
-
     return 0;
 }
 
@@ -415,6 +413,10 @@ static int libxevd_receive_frame(AVCodecContext *avctx, AVFrame *frame)
                             return  AVERROR(EAGAIN);
                         }
                     } else {
+                        if (stat.stype == XEVD_ST_I) {
+                            frame->pict_type = AV_PICTURE_TYPE_I;
+                            frame->flags |= AV_FRAME_FLAG_KEY;
+                        }
                         return libxevd_return_frame(avctx, frame, imgb, &pkt_au);
                     }
                 }
diff --git a/libavcodec/loco.c b/libavcodec/loco.c
index 3d11823284..b1294a9798 100644
--- a/libavcodec/loco.c
+++ b/libavcodec/loco.c
@@ -206,7 +206,6 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p,
 
     if ((ret = ff_get_buffer(avctx, p, 0)) < 0)
         return ret;
-    p->flags |= AV_FRAME_FLAG_KEY;
 
 #define ADVANCE_BY_DECODED do { \
     if (decoded < 0 || decoded >= buf_size) goto buf_too_small; \
diff --git a/libavcodec/lpc.c b/libavcodec/lpc.c
index 8305cc0596..dfd6114690 100644
--- a/libavcodec/lpc.c
+++ b/libavcodec/lpc.c
@@ -282,8 +282,10 @@ int ff_lpc_calc_coefs(LPCContext *s,
         double av_uninit(weight);
         memset(var, 0, FFALIGN(MAX_LPC_ORDER+1,4)*sizeof(*var));
 
-        for(j=0; j<max_order; j++)
-            m[0].coeff[max_order-1][j] = -lpc[max_order-1][j];
+        /* Avoids initializing with an unused value when lpc_passes == 1 */
+        if (lpc_passes > 1)
+            for(j=0; j<max_order; j++)
+                m[0].coeff[max_order-1][j] = -lpc[max_order-1][j];
 
         for(; pass<lpc_passes; pass++){
             avpriv_init_lls(&m[pass&1], max_order);
diff --git a/libavcodec/m101.c b/libavcodec/m101.c
index 43a3c7bbe5..3ed1cab281 100644
--- a/libavcodec/m101.c
+++ b/libavcodec/m101.c
@@ -66,8 +66,6 @@ static int m101_decode_frame(AVCodecContext *avctx, AVFrame *frame,
 
     if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    frame->pict_type = AV_PICTURE_TYPE_I;
-    frame->flags |= AV_FRAME_FLAG_KEY;
     if ((avctx->extradata[3*4] & 3) != 3) {
         frame->flags |= AV_FRAME_FLAG_INTERLACED;
         if (avctx->extradata[3*4] & 1)
diff --git a/libavcodec/magicyuv.c b/libavcodec/magicyuv.c
index 06fad8f3f7..b85505c428 100644
--- a/libavcodec/magicyuv.c
+++ b/libavcodec/magicyuv.c
@@ -652,9 +652,6 @@ static int magy_decode_frame(AVCodecContext *avctx, AVFrame *p,
     if (ret < 0)
         return ret;
 
-    p->pict_type = AV_PICTURE_TYPE_I;
-    p->flags |= AV_FRAME_FLAG_KEY;
-
     if ((ret = ff_thread_get_buffer(avctx, p, 0)) < 0)
         return ret;
 
diff --git a/libavcodec/mdec.c b/libavcodec/mdec.c
index 31c09de5bc..5ab40719ac 100644
--- a/libavcodec/mdec.c
+++ b/libavcodec/mdec.c
@@ -177,8 +177,6 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *frame,
 
     if ((ret = ff_thread_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    frame->pict_type = AV_PICTURE_TYPE_I;
-    frame->flags |= AV_FRAME_FLAG_KEY;
 
     av_fast_padded_malloc(&a->bitstream_buffer, &a->bitstream_buffer_size, buf_size);
     if (!a->bitstream_buffer)
diff --git a/libavcodec/mediacodec_wrapper.c b/libavcodec/mediacodec_wrapper.c
index 306359071e..96c886666a 100644
--- a/libavcodec/mediacodec_wrapper.c
+++ b/libavcodec/mediacodec_wrapper.c
@@ -365,6 +365,7 @@ int ff_AMediaCodecProfile_getProfileFromAVCodecContext(AVCodecContext *avctx)
     (void)VP9Profile3HDR;
     (void)VP9Profile2HDR10Plus;
     (void)VP9Profile3HDR10Plus;
+    (void)MPEG4ProfileSimpleFace;
     (void)AV1ProfileMain10;
     (void)AV1ProfileMain10HDR10;
     (void)AV1ProfileMain10HDR10Plus;
@@ -1828,25 +1829,8 @@ typedef struct FFAMediaFormatNdk {
     void *libmedia;
     AMediaFormat *impl;
 
-    AMediaFormat *(*new)(void);
-    media_status_t (*delete)(AMediaFormat*);
-
-    const char* (*toString)(AMediaFormat*);
-
-    bool (*getInt32)(AMediaFormat*, const char *name, int32_t *out);
-    bool (*getInt64)(AMediaFormat*, const char *name, int64_t *out);
-    bool (*getFloat)(AMediaFormat*, const char *name, float *out);
-    bool (*getSize)(AMediaFormat*, const char *name, size_t *out);
-    bool (*getBuffer)(AMediaFormat*, const char *name, void** data, size_t *size);
-    bool (*getString)(AMediaFormat*, const char *name, const char **out);
     bool (*getRect)(AMediaFormat *, const char *name,
                     int32_t *left, int32_t *top, int32_t *right, int32_t *bottom);
-
-    void (*setInt32)(AMediaFormat*, const char* name, int32_t value);
-    void (*setInt64)(AMediaFormat*, const char* name, int64_t value);
-    void (*setFloat)(AMediaFormat*, const char* name, float value);
-    void (*setString)(AMediaFormat*, const char* name, const char* value);
-    void (*setBuffer)(AMediaFormat*, const char* name, const void* data, size_t size);
     void (*setRect)(AMediaFormat *, const char *name,
                     int32_t left, int32_t top, int32_t right, int32_t bottom);
 } FFAMediaFormatNdk;
@@ -1858,34 +1842,6 @@ typedef struct FFAMediaCodecNdk {
     AMediaCodec *impl;
     ANativeWindow *window;
 
-    AMediaCodec* (*createCodecByName)(const char *name);
-    AMediaCodec* (*createDecoderByType)(const char *mime_type);
-    AMediaCodec* (*createEncoderByType)(const char *mime_type);
-    media_status_t (*delete)(AMediaCodec*);
-
-    media_status_t (*configure)(AMediaCodec *,
-                                const AMediaFormat *format,
-                                ANativeWindow *surface,
-                                AMediaCrypto *crypto,
-                                uint32_t flags);
-    media_status_t (*start)(AMediaCodec*);
-    media_status_t (*stop)(AMediaCodec*);
-    media_status_t (*flush)(AMediaCodec*);
-
-    uint8_t* (*getInputBuffer)(AMediaCodec*, size_t idx, size_t *out_size);
-    uint8_t* (*getOutputBuffer)(AMediaCodec*, size_t idx, size_t *out_size);
-
-    ssize_t (*dequeueInputBuffer)(AMediaCodec*, int64_t timeoutUs);
-    media_status_t (*queueInputBuffer)(AMediaCodec*, size_t idx,
-                                       long offset, size_t size,
-                                       uint64_t time, uint32_t flags);
-
-    ssize_t (*dequeueOutputBuffer)(AMediaCodec*, AMediaCodecBufferInfo *info, int64_t timeoutUs);
-    AMediaFormat* (*getOutputFormat)(AMediaCodec*);
-
-    media_status_t (*releaseOutputBuffer)(AMediaCodec*, size_t idx, bool render);
-    media_status_t (*releaseOutputBufferAtTime)(AMediaCodec *mData, size_t idx, int64_t timestampNs);
-
     // Available since API level 28.
     media_status_t (*getName)(AMediaCodec*, char** out_name);
     void (*releaseName)(AMediaCodec*, char* name);
@@ -1925,38 +1881,15 @@ static FFAMediaFormat *mediaformat_ndk_create(AMediaFormat *impl)
 #define GET_OPTIONAL_SYMBOL(sym) \
     format->sym = dlsym(format->libmedia, "AMediaFormat_" #sym);
 
-#define GET_SYMBOL(sym)         \
-    GET_OPTIONAL_SYMBOL(sym)    \
-    if (!format->sym)           \
-        goto error;
-
-    GET_SYMBOL(new)
-    GET_SYMBOL(delete)
-
-    GET_SYMBOL(toString)
-
-    GET_SYMBOL(getInt32)
-    GET_SYMBOL(getInt64)
-    GET_SYMBOL(getFloat)
-    GET_SYMBOL(getSize)
-    GET_SYMBOL(getBuffer)
-    GET_SYMBOL(getString)
     GET_OPTIONAL_SYMBOL(getRect)
-
-    GET_SYMBOL(setInt32)
-    GET_SYMBOL(setInt64)
-    GET_SYMBOL(setFloat)
-    GET_SYMBOL(setString)
-    GET_SYMBOL(setBuffer)
     GET_OPTIONAL_SYMBOL(setRect)
 
-#undef GET_SYMBOL
 #undef GET_OPTIONAL_SYMBOL
 
     if (impl) {
         format->impl = impl;
     } else {
-        format->impl = format->new();
+        format->impl = AMediaFormat_new();
         if (!format->impl)
             goto error;
     }
@@ -1984,7 +1917,7 @@ static int mediaformat_ndk_delete(FFAMediaFormat* ctx)
 
     av_assert0(format->api.class == &amediaformat_ndk_class);
 
-    if (format->impl && (format->delete(format->impl) != AMEDIA_OK))
+    if (format->impl && (AMediaFormat_delete(format->impl) != AMEDIA_OK))
             ret = AVERROR_EXTERNAL;
     if (format->libmedia)
         dlclose(format->libmedia);
@@ -1996,39 +1929,39 @@ static int mediaformat_ndk_delete(FFAMediaFormat* ctx)
 static char* mediaformat_ndk_toString(FFAMediaFormat* ctx)
 {
     FFAMediaFormatNdk *format = (FFAMediaFormatNdk *)ctx;
-    const char *str = format->toString(format->impl);
+    const char *str = AMediaFormat_toString(format->impl);
     return av_strdup(str);
 }
 
 static int mediaformat_ndk_getInt32(FFAMediaFormat* ctx, const char *name, int32_t *out)
 {
     FFAMediaFormatNdk *format = (FFAMediaFormatNdk *)ctx;
-    return format->getInt32(format->impl, name, out);
+    return AMediaFormat_getInt32(format->impl, name, out);
 }
 
 static int mediaformat_ndk_getInt64(FFAMediaFormat* ctx, const char *name, int64_t *out)
 {
     FFAMediaFormatNdk *format = (FFAMediaFormatNdk *)ctx;
-    return format->getInt64(format->impl, name, out);
+    return AMediaFormat_getInt64(format->impl, name, out);
 }
 
 static int mediaformat_ndk_getFloat(FFAMediaFormat* ctx, const char *name, float *out)
 {
     FFAMediaFormatNdk *format = (FFAMediaFormatNdk *)ctx;
-    return format->getFloat(format->impl, name, out);
+    return AMediaFormat_getFloat(format->impl, name, out);
 }
 
 static int mediaformat_ndk_getBuffer(FFAMediaFormat* ctx, const char *name, void** data, size_t *size)
 {
     FFAMediaFormatNdk *format = (FFAMediaFormatNdk *)ctx;
-    return format->getBuffer(format->impl, name, data, size);
+    return AMediaFormat_getBuffer(format->impl, name, data, size);
 }
 
 static int mediaformat_ndk_getString(FFAMediaFormat* ctx, const char *name, const char **out)
 {
     FFAMediaFormatNdk *format = (FFAMediaFormatNdk *)ctx;
     const char *tmp = NULL;
-    int ret = format->getString(format->impl, name, &tmp);
+    int ret = AMediaFormat_getString(format->impl, name, &tmp);
 
     if (tmp)
         *out = av_strdup(tmp);
@@ -2047,31 +1980,31 @@ static int mediaformat_ndk_getRect(FFAMediaFormat *ctx, const char *name,
 static void mediaformat_ndk_setInt32(FFAMediaFormat* ctx, const char* name, int32_t value)
 {
     FFAMediaFormatNdk *format = (FFAMediaFormatNdk *)ctx;
-    format->setInt32(format->impl, name, value);
+    AMediaFormat_setInt32(format->impl, name, value);
 }
 
 static void mediaformat_ndk_setInt64(FFAMediaFormat* ctx, const char* name, int64_t value)
 {
     FFAMediaFormatNdk *format = (FFAMediaFormatNdk *)ctx;
-    format->setInt64(format->impl, name, value);
+    AMediaFormat_setInt64(format->impl, name, value);
 }
 
 static void mediaformat_ndk_setFloat(FFAMediaFormat* ctx, const char* name, float value)
 {
     FFAMediaFormatNdk *format = (FFAMediaFormatNdk *)ctx;
-    format->setFloat(format->impl, name, value);
+    AMediaFormat_setFloat(format->impl, name, value);
 }
 
 static void mediaformat_ndk_setString(FFAMediaFormat* ctx, const char* name, const char* value)
 {
     FFAMediaFormatNdk *format = (FFAMediaFormatNdk *)ctx;
-    format->setString(format->impl, name, value);
+    AMediaFormat_setString(format->impl, name, value);
 }
 
 static void mediaformat_ndk_setBuffer(FFAMediaFormat* ctx, const char* name, void* data, size_t size)
 {
     FFAMediaFormatNdk *format = (FFAMediaFormatNdk *)ctx;
-    format->setBuffer(format->impl, name, data, size);
+    AMediaFormat_setBuffer(format->impl, name, data, size);
 }
 
 static void mediaformat_ndk_setRect(FFAMediaFormat *ctx, const char *name,
@@ -2117,54 +2050,28 @@ static inline FFAMediaCodec *ndk_codec_create(int method, const char *arg) {
     if (!codec->libmedia)
         goto error;
 
-#define GET_SYMBOL(sym, required)                                   \
+#define GET_SYMBOL(sym)                                             \
     codec->sym = dlsym(codec->libmedia, "AMediaCodec_" #sym);       \
-    if (!codec->sym) {                                              \
-        av_log(codec, required ? AV_LOG_ERROR : AV_LOG_INFO,        \
-               #sym "() unavailable from %s\n", lib_name);          \
-        if (required)                                               \
-            goto error;                                             \
-    }
-
-    GET_SYMBOL(createCodecByName, 1)
-    GET_SYMBOL(createDecoderByType, 1)
-    GET_SYMBOL(createEncoderByType, 1)
-    GET_SYMBOL(delete, 1)
-
-    GET_SYMBOL(configure, 1)
-    GET_SYMBOL(start, 1)
-    GET_SYMBOL(stop, 1)
-    GET_SYMBOL(flush, 1)
-
-    GET_SYMBOL(getInputBuffer, 1)
-    GET_SYMBOL(getOutputBuffer, 1)
-
-    GET_SYMBOL(dequeueInputBuffer, 1)
-    GET_SYMBOL(queueInputBuffer, 1)
-
-    GET_SYMBOL(dequeueOutputBuffer, 1)
-    GET_SYMBOL(getOutputFormat, 1)
-
-    GET_SYMBOL(releaseOutputBuffer, 1)
-    GET_SYMBOL(releaseOutputBufferAtTime, 1)
+    if (!codec->sym)                                                \
+        av_log(codec, AV_LOG_INFO, #sym "() unavailable from %s\n", lib_name);
 
-    GET_SYMBOL(getName, 0)
-    GET_SYMBOL(releaseName, 0)
+    GET_SYMBOL(getName)
+    GET_SYMBOL(releaseName)
 
-    GET_SYMBOL(setInputSurface, 0)
-    GET_SYMBOL(signalEndOfInputStream, 0)
+    GET_SYMBOL(setInputSurface)
+    GET_SYMBOL(signalEndOfInputStream)
 
 #undef GET_SYMBOL
 
     switch (method) {
     case CREATE_CODEC_BY_NAME:
-        codec->impl = codec->createCodecByName(arg);
+        codec->impl = AMediaCodec_createCodecByName(arg);
         break;
     case CREATE_DECODER_BY_TYPE:
-        codec->impl = codec->createDecoderByType(arg);
+        codec->impl = AMediaCodec_createDecoderByType(arg);
         break;
     case CREATE_ENCODER_BY_TYPE:
-        codec->impl = codec->createEncoderByType(arg);
+        codec->impl = AMediaCodec_createEncoderByType(arg);
         break;
     default:
         av_assert0(0);
@@ -2201,7 +2108,7 @@ static int mediacodec_ndk_delete(FFAMediaCodec* ctx)
 
     av_assert0(codec->api.class == &amediacodec_ndk_class);
 
-    if (codec->impl && (codec->delete(codec->impl) != AMEDIA_OK))
+    if (codec->impl && (AMediaCodec_delete(codec->impl) != AMEDIA_OK))
         ret = AVERROR_EXTERNAL;
     if (codec->window)
         ANativeWindow_release(codec->window);
@@ -2246,7 +2153,7 @@ static int mediacodec_ndk_configure(FFAMediaCodec* ctx,
             return AVERROR_EXTERNAL;
         }
 
-        status = codec->configure(codec->impl, format->impl, NULL, NULL, flags);
+        status = AMediaCodec_configure(codec->impl, format->impl, NULL, NULL, flags);
         if (status != AMEDIA_OK) {
             av_log(codec, AV_LOG_ERROR, "Encoder configure failed, %d\n", status);
             return AVERROR_EXTERNAL;
@@ -2261,7 +2168,7 @@ static int mediacodec_ndk_configure(FFAMediaCodec* ctx,
             return AVERROR_EXTERNAL;
         }
     } else {
-        status = codec->configure(codec->impl, format->impl, native_window, NULL, flags);
+        status = AMediaCodec_configure(codec->impl, format->impl, native_window, NULL, flags);
         if (status != AMEDIA_OK) {
             av_log(codec, AV_LOG_ERROR, "Decoder configure failed, %d\n", status);
             return AVERROR_EXTERNAL;
@@ -2275,7 +2182,7 @@ static int mediacodec_ndk_configure(FFAMediaCodec* ctx,
 static int mediacodec_ndk_ ## method(FFAMediaCodec* ctx)                 \
 {                                                                        \
     FFAMediaCodecNdk *codec = (FFAMediaCodecNdk *)ctx;                   \
-    media_status_t status = codec->method(codec->impl);                  \
+    media_status_t status = AMediaCodec_ ## method (codec->impl);                  \
                                                                          \
     if (status != AMEDIA_OK) {                                           \
         av_log(codec, AV_LOG_ERROR, #method " failed, %d\n", status);    \
@@ -2292,19 +2199,19 @@ MEDIACODEC_NDK_WRAPPER(flush)
 static uint8_t* mediacodec_ndk_getInputBuffer(FFAMediaCodec* ctx, size_t idx, size_t *out_size)
 {
     FFAMediaCodecNdk *codec = (FFAMediaCodecNdk *)ctx;
-    return codec->getInputBuffer(codec->impl, idx, out_size);
+    return AMediaCodec_getInputBuffer(codec->impl, idx, out_size);
 }
 
 static uint8_t* mediacodec_ndk_getOutputBuffer(FFAMediaCodec* ctx, size_t idx, size_t *out_size)
 {
     FFAMediaCodecNdk *codec = (FFAMediaCodecNdk *)ctx;
-    return codec->getOutputBuffer(codec->impl, idx, out_size);
+    return AMediaCodec_getOutputBuffer(codec->impl, idx, out_size);
 }
 
 static ssize_t mediacodec_ndk_dequeueInputBuffer(FFAMediaCodec* ctx, int64_t timeoutUs)
 {
     FFAMediaCodecNdk *codec = (FFAMediaCodecNdk *)ctx;
-    return codec->dequeueInputBuffer(codec->impl, timeoutUs);
+    return AMediaCodec_dequeueInputBuffer(codec->impl, timeoutUs);
 }
 
 static int mediacodec_ndk_queueInputBuffer(FFAMediaCodec *ctx, size_t idx,
@@ -2312,7 +2219,7 @@ static int mediacodec_ndk_queueInputBuffer(FFAMediaCodec *ctx, size_t idx,
                                            uint64_t time, uint32_t flags)
 {
     FFAMediaCodecNdk *codec = (FFAMediaCodecNdk *)ctx;
-    return codec->queueInputBuffer(codec->impl, idx, offset, size, time, flags);
+    return AMediaCodec_queueInputBuffer(codec->impl, idx, offset, size, time, flags);
 }
 
 static ssize_t mediacodec_ndk_dequeueOutputBuffer(FFAMediaCodec* ctx, FFAMediaCodecBufferInfo *info, int64_t timeoutUs)
@@ -2321,7 +2228,7 @@ static ssize_t mediacodec_ndk_dequeueOutputBuffer(FFAMediaCodec* ctx, FFAMediaCo
     AMediaCodecBufferInfo buf_info = {0};
     ssize_t ret;
 
-    ret = codec->dequeueOutputBuffer(codec->impl, &buf_info, timeoutUs);
+    ret = AMediaCodec_dequeueOutputBuffer(codec->impl, &buf_info, timeoutUs);
     info->offset = buf_info.offset;
     info->size = buf_info.size;
     info->presentationTimeUs = buf_info.presentationTimeUs;
@@ -2333,7 +2240,7 @@ static ssize_t mediacodec_ndk_dequeueOutputBuffer(FFAMediaCodec* ctx, FFAMediaCo
 static FFAMediaFormat* mediacodec_ndk_getOutputFormat(FFAMediaCodec* ctx)
 {
     FFAMediaCodecNdk *codec = (FFAMediaCodecNdk *)ctx;
-    AMediaFormat *format = codec->getOutputFormat(codec->impl);
+    AMediaFormat *format = AMediaCodec_getOutputFormat(codec->impl);
 
     if (!format)
         return NULL;
@@ -2345,7 +2252,7 @@ static int mediacodec_ndk_releaseOutputBuffer(FFAMediaCodec* ctx, size_t idx, in
     FFAMediaCodecNdk *codec = (FFAMediaCodecNdk *)ctx;
     media_status_t status;
 
-    status = codec->releaseOutputBuffer(codec->impl, idx, render);
+    status = AMediaCodec_releaseOutputBuffer(codec->impl, idx, render);
     if (status != AMEDIA_OK) {
         av_log(codec, AV_LOG_ERROR, "release output buffer failed, %d\n", status);
         return AVERROR_EXTERNAL;
@@ -2359,7 +2266,7 @@ static int mediacodec_ndk_releaseOutputBufferAtTime(FFAMediaCodec *ctx, size_t i
     FFAMediaCodecNdk *codec = (FFAMediaCodecNdk *)ctx;
     media_status_t status;
 
-    status = codec->releaseOutputBufferAtTime(codec->impl, idx, timestampNs);
+    status = AMediaCodec_releaseOutputBufferAtTime(codec->impl, idx, timestampNs);
     if (status != AMEDIA_OK) {
         av_log(codec, AV_LOG_ERROR, "releaseOutputBufferAtTime failed, %d\n", status);
         return AVERROR_EXTERNAL;
diff --git a/libavcodec/mediacodecenc.c b/libavcodec/mediacodecenc.c
index b59de75b9b..bbf570e7be 100644
--- a/libavcodec/mediacodecenc.c
+++ b/libavcodec/mediacodecenc.c
@@ -23,6 +23,7 @@
 #include "config_components.h"
 
 #include "libavutil/avassert.h"
+#include "libavutil/avstring.h"
 #include "libavutil/hwcontext_mediacodec.h"
 #include "libavutil/imgutils.h"
 #include "libavutil/mem.h"
@@ -74,6 +75,7 @@ typedef struct MediaCodecEncContext {
     int bitrate_mode;
     int level;
     int pts_as_dts;
+    int extract_extradata;
 } MediaCodecEncContext;
 
 enum {
@@ -112,6 +114,23 @@ static void mediacodec_output_format(AVCodecContext *avctx)
     ff_AMediaFormat_delete(out_format);
 }
 
+static int extract_extradata_support(AVCodecContext *avctx)
+{
+    const AVBitStreamFilter *bsf = av_bsf_get_by_name("extract_extradata");
+
+    if (!bsf) {
+        av_log(avctx, AV_LOG_WARNING, "extract_extradata bsf not found\n");
+        return 0;
+    }
+
+    for (int i = 0; bsf->codec_ids[i] != AV_CODEC_ID_NONE; i++) {
+        if (bsf->codec_ids[i] == avctx->codec_id)
+            return 1;
+    }
+
+    return 0;
+}
+
 static int mediacodec_init_bsf(AVCodecContext *avctx)
 {
     MediaCodecEncContext *s = avctx->priv_data;
@@ -120,20 +139,32 @@ static int mediacodec_init_bsf(AVCodecContext *avctx)
     int crop_right = s->width - avctx->width;
     int crop_bottom = s->height - avctx->height;
 
-    if (!crop_right && !crop_bottom)
+    /* Nothing can be done for this format now */
+    if (avctx->pix_fmt == AV_PIX_FMT_MEDIACODEC)
         return 0;
 
-    if (avctx->codec_id == AV_CODEC_ID_H264)
-        ret = snprintf(str, sizeof(str), "h264_metadata=crop_right=%d:crop_bottom=%d",
-                 crop_right, crop_bottom);
-    else if (avctx->codec_id == AV_CODEC_ID_HEVC)
-        ret = snprintf(str, sizeof(str), "hevc_metadata=crop_right=%d:crop_bottom=%d",
-                 crop_right, crop_bottom);
-    else
+    s->extract_extradata = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) &&
+                           extract_extradata_support(avctx);
+    if (!crop_right && !crop_bottom && !s->extract_extradata)
         return 0;
 
-    if (ret >= sizeof(str))
-        return AVERROR_BUFFER_TOO_SMALL;
+    ret = 0;
+    if (crop_right || crop_bottom) {
+        if (avctx->codec_id == AV_CODEC_ID_H264)
+            ret = snprintf(str, sizeof(str), "h264_metadata=crop_right=%d:crop_bottom=%d",
+                           crop_right, crop_bottom);
+        else if (avctx->codec_id == AV_CODEC_ID_HEVC)
+            ret = snprintf(str, sizeof(str), "hevc_metadata=crop_right=%d:crop_bottom=%d",
+                           crop_right, crop_bottom);
+        if (ret >= sizeof(str))
+            return AVERROR_BUFFER_TOO_SMALL;
+    }
+
+    if (s->extract_extradata) {
+        ret = av_strlcatf(str, sizeof(str), "%sextract_extradata", ret ? "," : "");
+        if (ret >= sizeof(str))
+            return AVERROR_BUFFER_TOO_SMALL;
+    }
 
     ret = av_bsf_list_parse_str(str, &s->bsf);
     if (ret < 0)
@@ -148,6 +179,8 @@ static int mediacodec_init_bsf(AVCodecContext *avctx)
     return ret;
 }
 
+static int mediacodec_generate_extradata(AVCodecContext *avctx);
+
 static av_cold int mediacodec_init(AVCodecContext *avctx)
 {
     const char *codec_mime = NULL;
@@ -337,14 +370,14 @@ static av_cold int mediacodec_init(AVCodecContext *avctx)
         goto bailout;
 
     mediacodec_output_format(avctx);
-    if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER)
-        av_log(avctx, AV_LOG_WARNING,
-                "Mediacodec encoder doesn't support AV_CODEC_FLAG_GLOBAL_HEADER. "
-                "Use extract_extradata bsf when necessary.\n");
 
     s->frame = av_frame_alloc();
-    if (!s->frame)
+    if (!s->frame) {
         ret = AVERROR(ENOMEM);
+        goto bailout;
+    }
+
+    ret = mediacodec_generate_extradata(avctx);
 
 bailout:
     if (format)
@@ -352,9 +385,7 @@ bailout:
     return ret;
 }
 
-static int mediacodec_receive(AVCodecContext *avctx,
-                               AVPacket *pkt,
-                               int *got_packet)
+static int mediacodec_receive(AVCodecContext *avctx, AVPacket *pkt)
 {
     MediaCodecEncContext *s = avctx->priv_data;
     FFAMediaCodec *codec = s->codec;
@@ -400,7 +431,7 @@ static int mediacodec_receive(AVCodecContext *avctx,
         memcpy(s->extradata, out_buf + out_info.offset, out_info.size);
         ff_AMediaCodec_releaseOutputBuffer(codec, index, false);
         // try immediately
-        return mediacodec_receive(avctx, pkt, got_packet);
+        return mediacodec_receive(avctx, pkt);
     }
 
     ret = ff_get_encode_buffer(avctx, pkt, out_info.size + s->extradata_size, 0);
@@ -419,7 +450,6 @@ static int mediacodec_receive(AVCodecContext *avctx,
     if (out_info.flags & ff_AMediaCodec_getBufferFlagKeyFrame(codec))
         pkt->flags |= AV_PKT_FLAG_KEY;
     ret = 0;
-    *got_packet = 1;
 
     av_log(avctx, AV_LOG_TRACE, "receive packet pts %" PRId64 " dts %" PRId64
            " flags %d extradata %d\n",
@@ -510,7 +540,6 @@ static int mediacodec_encode(AVCodecContext *avctx, AVPacket *pkt)
 {
     MediaCodecEncContext *s = avctx->priv_data;
     int ret;
-    int got_packet = 0;
 
     // Return on three case:
     // 1. Serious error
@@ -525,7 +554,7 @@ static int mediacodec_encode(AVCodecContext *avctx, AVPacket *pkt)
                 return ret;
         }
 
-        ret = mediacodec_receive(avctx, pkt, &got_packet);
+        ret = mediacodec_receive(avctx, pkt);
         if (s->bsf) {
             if (!ret || ret == AVERROR_EOF)
                 ret = av_bsf_send_packet(s->bsf, pkt);
@@ -534,7 +563,7 @@ static int mediacodec_encode(AVCodecContext *avctx, AVPacket *pkt)
                 return 0;
         }
 
-        if (ret != AVERROR(EAGAIN))
+        if (ret < 0 && ret != AVERROR(EAGAIN))
             return ret;
 
         if (!s->frame->buf[0]) {
@@ -553,6 +582,110 @@ static int mediacodec_encode(AVCodecContext *avctx, AVPacket *pkt)
     return 0;
 }
 
+static int mediacodec_send_dummy_frame(AVCodecContext *avctx)
+{
+    MediaCodecEncContext *s = avctx->priv_data;
+    int ret;
+
+    s->frame->width = avctx->width;
+    s->frame->height = avctx->height;
+    s->frame->format = avctx->pix_fmt;
+    s->frame->pts = 0;
+
+    ret = av_frame_get_buffer(s->frame, 0);
+    if (ret < 0)
+        return ret;
+
+    do {
+        ret = mediacodec_send(avctx, s->frame);
+    } while (ret == AVERROR(EAGAIN));
+    av_frame_unref(s->frame);
+
+    if (ret < 0)
+        return ret;
+
+    ret = mediacodec_send(avctx, NULL);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Flush failed: %s\n", av_err2str(ret));
+        return ret;
+    }
+
+    return 0;
+}
+
+static int mediacodec_receive_dummy_pkt(AVCodecContext *avctx, AVPacket *pkt)
+{
+    MediaCodecEncContext *s = avctx->priv_data;
+    int ret;
+
+    do {
+        ret = mediacodec_receive(avctx, pkt);
+    } while (ret == AVERROR(EAGAIN));
+
+    if (ret < 0)
+        return ret;
+
+    do {
+        ret = av_bsf_send_packet(s->bsf, pkt);
+        if (ret < 0)
+            return ret;
+        ret = av_bsf_receive_packet(s->bsf, pkt);
+    } while (ret == AVERROR(EAGAIN));
+
+    return ret;
+}
+
+static int mediacodec_generate_extradata(AVCodecContext *avctx)
+{
+    MediaCodecEncContext *s = avctx->priv_data;
+    AVPacket *pkt = NULL;
+    int ret;
+    size_t side_size;
+    uint8_t *side;
+
+    if (!(avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER))
+        return 0;
+
+    if (!s->extract_extradata) {
+        av_log(avctx, AV_LOG_WARNING,
+               "Mediacodec encoder doesn't support AV_CODEC_FLAG_GLOBAL_HEADER. "
+               "Use extract_extradata bsf when necessary.\n");
+        return 0;
+    }
+
+    pkt = av_packet_alloc();
+    if (!pkt)
+        return AVERROR(ENOMEM);
+
+    ret = mediacodec_send_dummy_frame(avctx);
+    if (ret < 0)
+        goto bailout;
+    ret = mediacodec_receive_dummy_pkt(avctx, pkt);
+    if (ret < 0)
+        goto bailout;
+
+    side = av_packet_get_side_data(pkt, AV_PKT_DATA_NEW_EXTRADATA, &side_size);
+    if (side && side_size > 0) {
+        avctx->extradata = av_mallocz(side_size + AV_INPUT_BUFFER_PADDING_SIZE);
+        if (!avctx->extradata) {
+            ret = AVERROR(ENOMEM);
+            goto bailout;
+        }
+
+        memcpy(avctx->extradata, side, side_size);
+        avctx->extradata_size = side_size;
+    }
+
+bailout:
+    if (s->eof_sent) {
+        s->eof_sent = 0;
+        ff_AMediaCodec_flush(s->codec);
+    }
+    av_bsf_flush(s->bsf);
+    av_packet_free(&pkt);
+    return ret;
+}
+
 static av_cold int mediacodec_close(AVCodecContext *avctx)
 {
     MediaCodecEncContext *s = avctx->priv_data;
@@ -573,6 +706,15 @@ static av_cold int mediacodec_close(AVCodecContext *avctx)
     return 0;
 }
 
+static av_cold void mediacodec_flush(AVCodecContext *avctx)
+{
+    MediaCodecEncContext *s = avctx->priv_data;
+    if (s->bsf)
+        av_bsf_flush(s->bsf);
+    av_frame_unref(s->frame);
+    ff_AMediaCodec_flush(s->codec);
+}
+
 static const AVCodecHWConfigInternal *const mediacodec_hw_configs[] = {
     &(const AVCodecHWConfigInternal) {
         .public          = {
@@ -623,13 +765,15 @@ const FFCodec ff_ ## short_name ## _mediacodec_encoder = {              \
     CODEC_LONG_NAME(long_name " Android MediaCodec encoder"),           \
     .p.type           = AVMEDIA_TYPE_VIDEO,                             \
     .p.id             = codec_id,                                       \
-    .p.capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY           \
-                        | AV_CODEC_CAP_HARDWARE,                        \
+    .p.capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |         \
+                        AV_CODEC_CAP_HARDWARE |                         \
+                        AV_CODEC_CAP_ENCODER_FLUSH,                     \
     .priv_data_size   = sizeof(MediaCodecEncContext),                   \
     .p.pix_fmts       = avc_pix_fmts,                                   \
     .init             = mediacodec_init,                                \
     FF_CODEC_RECEIVE_PACKET_CB(mediacodec_encode),                      \
     .close            = mediacodec_close,                               \
+    .flush            = mediacodec_flush,                               \
     .p.priv_class     = &short_name ## _mediacodec_class,               \
     .caps_internal    = FF_CODEC_CAP_INIT_CLEANUP,                      \
     .p.wrapper_name = "mediacodec",                                     \
diff --git a/libavcodec/mimic.c b/libavcodec/mimic.c
index 8928f24022..2925aa50f7 100644
--- a/libavcodec/mimic.c
+++ b/libavcodec/mimic.c
@@ -34,8 +34,8 @@
 #include "bswapdsp.h"
 #include "hpeldsp.h"
 #include "idctdsp.h"
+#include "progressframe.h"
 #include "thread.h"
-#include "threadframe.h"
 
 #define MIMIC_HEADER_SIZE   20
 #define MIMIC_VLC_BITS      11
@@ -52,7 +52,7 @@ typedef struct MimicContext {
     int             cur_index;
     int             prev_index;
 
-    ThreadFrame     frames     [16];
+    ProgressFrame   frames[16];
 
     DECLARE_ALIGNED(32, int16_t, dct_block)[64];
 
@@ -105,16 +105,12 @@ static const uint8_t col_zag[64] = {
 static av_cold int mimic_decode_end(AVCodecContext *avctx)
 {
     MimicContext *ctx = avctx->priv_data;
-    int i;
 
     av_freep(&ctx->swap_buf);
     ctx->swap_buf_size = 0;
 
-    for (i = 0; i < FF_ARRAY_ELEMS(ctx->frames); i++) {
-        if (ctx->frames[i].f)
-            ff_thread_release_ext_buffer(&ctx->frames[i]);
-        av_frame_free(&ctx->frames[i].f);
-    }
+    for (int i = 0; i < FF_ARRAY_ELEMS(ctx->frames); i++)
+        ff_progress_frame_unref(&ctx->frames[i]);
 
     return 0;
 }
@@ -130,7 +126,6 @@ static av_cold int mimic_decode_init(AVCodecContext *avctx)
 {
     static AVOnce init_static_once = AV_ONCE_INIT;
     MimicContext *ctx = avctx->priv_data;
-    int i;
 
     ctx->prev_index = 0;
     ctx->cur_index  = 15;
@@ -141,12 +136,6 @@ static av_cold int mimic_decode_init(AVCodecContext *avctx)
     ff_idctdsp_init(&ctx->idsp, avctx);
     ff_permute_scantable(ctx->permutated_scantable, col_zag, ctx->idsp.idct_permutation);
 
-    for (i = 0; i < FF_ARRAY_ELEMS(ctx->frames); i++) {
-        ctx->frames[i].f = av_frame_alloc();
-        if (!ctx->frames[i].f)
-            return AVERROR(ENOMEM);
-    }
-
     ff_thread_once(&init_static_once, mimic_init_static);
 
     return 0;
@@ -156,7 +145,6 @@ static av_cold int mimic_decode_init(AVCodecContext *avctx)
 static int mimic_decode_update_thread_context(AVCodecContext *avctx, const AVCodecContext *avctx_from)
 {
     MimicContext *dst = avctx->priv_data, *src = avctx_from->priv_data;
-    int i, ret;
 
     if (avctx == avctx_from)
         return 0;
@@ -164,13 +152,10 @@ static int mimic_decode_update_thread_context(AVCodecContext *avctx, const AVCod
     dst->cur_index  = src->next_cur_index;
     dst->prev_index = src->next_prev_index;
 
-    for (i = 0; i < FF_ARRAY_ELEMS(dst->frames); i++) {
-        ff_thread_release_ext_buffer(&dst->frames[i]);
-        if (i != src->next_cur_index && src->frames[i].f->data[0]) {
-            ret = ff_thread_ref_frame(&dst->frames[i], &src->frames[i]);
-            if (ret < 0)
-                return ret;
-        }
+    for (int i = 0; i < FF_ARRAY_ELEMS(dst->frames); i++) {
+        ff_progress_frame_unref(&dst->frames[i]);
+        if (i != src->next_cur_index && src->frames[i].f)
+            ff_progress_frame_ref(&dst->frames[i], &src->frames[i]);
     }
 
     return 0;
@@ -293,11 +278,10 @@ static int decode(MimicContext *ctx, int quality, int num_coeffs,
                     } else {
                         unsigned int backref = get_bits(&ctx->gb, 4);
                         int index            = (ctx->cur_index + backref) & 15;
-                        uint8_t *p           = ctx->frames[index].f->data[0];
 
-                        if (index != ctx->cur_index && p) {
-                            ff_thread_await_progress(&ctx->frames[index],
-                                                     cur_row, 0);
+                        if (index != ctx->cur_index && ctx->frames[index].f) {
+                            const uint8_t *p = ctx->frames[index].f->data[0];
+                            ff_progress_frame_await(&ctx->frames[index], cur_row);
                             p += src -
                                  ctx->frames[ctx->prev_index].f->data[plane];
                             ctx->hdsp.put_pixels_tab[1][0](dst, p, stride, 8);
@@ -307,8 +291,7 @@ static int decode(MimicContext *ctx, int quality, int num_coeffs,
                         }
                     }
                 } else {
-                    ff_thread_await_progress(&ctx->frames[ctx->prev_index],
-                                             cur_row, 0);
+                    ff_progress_frame_await(&ctx->frames[ctx->prev_index], cur_row);
                     ctx->hdsp.put_pixels_tab[1][0](dst, src, stride, 8);
                 }
                 src += 8;
@@ -317,8 +300,7 @@ static int decode(MimicContext *ctx, int quality, int num_coeffs,
             src += (stride - ctx->num_hblocks[plane]) << 3;
             dst += (stride - ctx->num_hblocks[plane]) << 3;
 
-            ff_thread_report_progress(&ctx->frames[ctx->cur_index],
-                                      cur_row++, 0);
+            ff_progress_frame_report(&ctx->frames[ctx->cur_index], cur_row++);
         }
     }
 
@@ -392,17 +374,18 @@ static int mimic_decode_frame(AVCodecContext *avctx, AVFrame *rframe,
         return AVERROR_PATCHWELCOME;
     }
 
-    if (is_pframe && !ctx->frames[ctx->prev_index].f->data[0]) {
+    if (is_pframe && !ctx->frames[ctx->prev_index].f) {
         av_log(avctx, AV_LOG_ERROR, "decoding must start with keyframe\n");
         return AVERROR_INVALIDDATA;
     }
 
-    ff_thread_release_ext_buffer(&ctx->frames[ctx->cur_index]);
+    ff_progress_frame_unref(&ctx->frames[ctx->cur_index]);
+    res = ff_progress_frame_get_buffer(avctx, &ctx->frames[ctx->cur_index],
+                                       AV_GET_BUFFER_FLAG_REF);
+    if (res < 0)
+        return res;
     ctx->frames[ctx->cur_index].f->pict_type = is_pframe ? AV_PICTURE_TYPE_P :
                                                            AV_PICTURE_TYPE_I;
-    if ((res = ff_thread_get_ext_buffer(avctx, &ctx->frames[ctx->cur_index],
-                                        AV_GET_BUFFER_FLAG_REF)) < 0)
-        return res;
 
     ctx->next_prev_index = ctx->cur_index;
     ctx->next_cur_index  = (ctx->cur_index - 1) & 15;
@@ -419,10 +402,10 @@ static int mimic_decode_frame(AVCodecContext *avctx, AVFrame *rframe,
     init_get_bits(&ctx->gb, ctx->swap_buf, swap_buf_size << 3);
 
     res = decode(ctx, quality, num_coeffs, !is_pframe);
-    ff_thread_report_progress(&ctx->frames[ctx->cur_index], INT_MAX, 0);
+    ff_progress_frame_report(&ctx->frames[ctx->cur_index], INT_MAX);
     if (res < 0) {
         if (!(avctx->active_thread_type & FF_THREAD_FRAME))
-            ff_thread_release_ext_buffer(&ctx->frames[ctx->cur_index]);
+            ff_progress_frame_unref(&ctx->frames[ctx->cur_index]);
         return res;
     }
 
@@ -449,6 +432,6 @@ const FFCodec ff_mimic_decoder = {
     FF_CODEC_DECODE_CB(mimic_decode_frame),
     .p.capabilities        = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
     UPDATE_THREAD_CONTEXT(mimic_decode_update_thread_context),
-    .caps_internal         = FF_CODEC_CAP_ALLOCATE_PROGRESS |
+    .caps_internal         = FF_CODEC_CAP_USES_PROGRESSFRAMES |
                              FF_CODEC_CAP_INIT_CLEANUP,
 };
diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index 50fe38a50e..0647c0142a 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -1,5 +1,4 @@
-ARCH_HEADERS                               = aacsbr_mips.h aacpsy_mips.h   \
-                                             cabac.h compute_antialias_fixed.h \
+ARCH_HEADERS                               = cabac.h compute_antialias_fixed.h \
                                              compute_antialias_float.h     \
 
 MIPSFPU-OBJS-$(CONFIG_AMRNB_DECODER)      += mips/acelp_filters_mips.o     \
@@ -15,10 +14,6 @@ MIPSFPU-OBJS-$(CONFIG_MPEGAUDIODSP)       += mips/mpegaudiodsp_mips_float.o
 MIPSDSP-OBJS-$(CONFIG_MPEGAUDIODSP)       += mips/mpegaudiodsp_mips_fixed.o
 MIPSFPU-OBJS-$(CONFIG_FMTCONVERT)         += mips/fmtconvert_mips.o
 OBJS-$(CONFIG_AC3DSP)                     += mips/ac3dsp_mips.o
-OBJS-$(CONFIG_AAC_DECODER)                += mips/aacdec_mips.o            \
-                                             mips/aacsbr_mips.o            \
-                                             mips/sbrdsp_mips.o            \
-                                             mips/aacpsdsp_mips.o
 MIPSFPU-OBJS-$(CONFIG_AAC_ENCODER)        += mips/iirfilter_mips.o
 OBJS-$(CONFIG_HEVC_DECODER)               += mips/hevcdsp_init_mips.o      \
                                              mips/hevcpred_init_mips.o
diff --git a/libavcodec/mips/aacdec_mips.c b/libavcodec/mips/aacdec_mips.c
deleted file mode 100644
index 456e270915..0000000000
--- a/libavcodec/mips/aacdec_mips.c
+++ /dev/null
@@ -1,443 +0,0 @@
-/*
- * Copyright (c) 2012
- *      MIPS Technologies, Inc., California.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * Authors:  Darko Laus      (darko@mips.com)
- *           Djordje Pesut   (djordje@mips.com)
- *           Mirjana Vulin   (mvulin@mips.com)
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-/**
- * @file
- * Reference: libavcodec/aacdec.c
- */
-
-#include "libavutil/attributes.h"
-#include "libavcodec/aacdec.h"
-#include "aacdec_mips.h"
-#include "libavcodec/aactab.h"
-#include "libavcodec/sinewin.h"
-#include "libavutil/mips/asmdefs.h"
-
-#if HAVE_INLINE_ASM
-#if HAVE_MIPSFPU
-static av_always_inline void float_copy(float *dst, const float *src, int count)
-{
-    // Copy 'count' floats from src to dst
-    const float *loop_end = src + count;
-    int temp[8];
-
-    // count must be a multiple of 8
-    av_assert2(count % 8 == 0);
-
-    // loop unrolled 8 times
-    __asm__ volatile (
-        ".set push                               \n\t"
-        ".set noreorder                          \n\t"
-    "1:                                          \n\t"
-        "lw      %[temp0],    0(%[src])          \n\t"
-        "lw      %[temp1],    4(%[src])          \n\t"
-        "lw      %[temp2],    8(%[src])          \n\t"
-        "lw      %[temp3],    12(%[src])         \n\t"
-        "lw      %[temp4],    16(%[src])         \n\t"
-        "lw      %[temp5],    20(%[src])         \n\t"
-        "lw      %[temp6],    24(%[src])         \n\t"
-        "lw      %[temp7],    28(%[src])         \n\t"
-        PTR_ADDIU "%[src],    %[src],      32    \n\t"
-        "sw      %[temp0],    0(%[dst])          \n\t"
-        "sw      %[temp1],    4(%[dst])          \n\t"
-        "sw      %[temp2],    8(%[dst])          \n\t"
-        "sw      %[temp3],    12(%[dst])         \n\t"
-        "sw      %[temp4],    16(%[dst])         \n\t"
-        "sw      %[temp5],    20(%[dst])         \n\t"
-        "sw      %[temp6],    24(%[dst])         \n\t"
-        "sw      %[temp7],    28(%[dst])         \n\t"
-        "bne     %[src],      %[loop_end], 1b    \n\t"
-        PTR_ADDIU "%[dst],    %[dst],      32    \n\t"
-        ".set pop                                \n\t"
-
-        : [temp0]"=&r"(temp[0]), [temp1]"=&r"(temp[1]),
-          [temp2]"=&r"(temp[2]), [temp3]"=&r"(temp[3]),
-          [temp4]"=&r"(temp[4]), [temp5]"=&r"(temp[5]),
-          [temp6]"=&r"(temp[6]), [temp7]"=&r"(temp[7]),
-          [src]"+r"(src), [dst]"+r"(dst)
-        : [loop_end]"r"(loop_end)
-        : "memory"
-    );
-}
-
-static av_always_inline int lcg_random(unsigned previous_val)
-{
-    union { unsigned u; int s; } v = { previous_val * 1664525u + 1013904223 };
-    return v.s;
-}
-
-static void imdct_and_windowing_mips(AACDecContext *ac, SingleChannelElement *sce)
-{
-    IndividualChannelStream *ics = &sce->ics;
-    float *in    = sce->coeffs;
-    float *out   = sce->ret;
-    float *saved = sce->saved;
-    const float *swindow      = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
-    const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
-    const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
-    float *buf  = ac->buf_mdct;
-    int i;
-
-    if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
-        for (i = 0; i < 1024; i += 128)
-            ac->mdct128_fn(ac->mdct128, buf + i, in + i, sizeof(float));
-    } else
-        ac->mdct1024_fn(ac->mdct1024, buf, in, sizeof(float));
-
-    /* window overlapping
-     * NOTE: To simplify the overlapping code, all 'meaningless' short to long
-     * and long to short transitions are considered to be short to short
-     * transitions. This leaves just two cases (long to long and short to short)
-     * with a little special sauce for EIGHT_SHORT_SEQUENCE.
-     */
-    if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
-            (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
-        ac->fdsp->vector_fmul_window(    out,               saved,            buf,         lwindow_prev, 512);
-    } else {
-        float_copy(out, saved, 448);
-
-        if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
-            {
-                float wi;
-                float wj;
-                int i;
-                float temp0, temp1, temp2, temp3;
-                float *dst0 = out + 448 + 0*128;
-                float *dst1 = dst0 + 64 + 63;
-                float *dst2 = saved + 63;
-                float *win0 = (float*)swindow;
-                float *win1 = win0 + 64 + 63;
-                float *win0_prev = (float*)swindow_prev;
-                float *win1_prev = win0_prev + 64 + 63;
-                float *src0_prev = saved + 448;
-                float *src1_prev = buf + 0*128 + 63;
-                float *src0 = buf + 0*128 + 64;
-                float *src1 = buf + 1*128 + 63;
-
-                for(i = 0; i < 64; i++)
-                {
-                    temp0 = src0_prev[0];
-                    temp1 = src1_prev[0];
-                    wi = *win0_prev;
-                    wj = *win1_prev;
-                    temp2 = src0[0];
-                    temp3 = src1[0];
-                    dst0[0] = temp0 * wj - temp1 * wi;
-                    dst1[0] = temp0 * wi + temp1 * wj;
-
-                    wi = *win0;
-                    wj = *win1;
-
-                    temp0 = src0[128];
-                    temp1 = src1[128];
-                    dst0[128] = temp2 * wj - temp3 * wi;
-                    dst1[128] = temp2 * wi + temp3 * wj;
-
-                    temp2 = src0[256];
-                    temp3 = src1[256];
-                    dst0[256] = temp0 * wj - temp1 * wi;
-                    dst1[256] = temp0 * wi + temp1 * wj;
-                    dst0[384] = temp2 * wj - temp3 * wi;
-                    dst1[384] = temp2 * wi + temp3 * wj;
-
-                    temp0 = src0[384];
-                    temp1 = src1[384];
-                    dst0[512] = temp0 * wj - temp1 * wi;
-                    dst2[0] = temp0 * wi + temp1 * wj;
-
-                    src0++;
-                    src1--;
-                    src0_prev++;
-                    src1_prev--;
-                    win0++;
-                    win1--;
-                    win0_prev++;
-                    win1_prev--;
-                    dst0++;
-                    dst1--;
-                    dst2--;
-                }
-            }
-        } else {
-            ac->fdsp->vector_fmul_window(out + 448,         saved + 448,      buf,         swindow_prev, 64);
-            float_copy(out + 576, buf + 64, 448);
-        }
-    }
-
-    // buffer update
-    if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
-        ac->fdsp->vector_fmul_window(saved + 64,  buf + 4*128 + 64, buf + 5*128, swindow, 64);
-        ac->fdsp->vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 64);
-        ac->fdsp->vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 64);
-        float_copy(saved + 448, buf + 7*128 + 64, 64);
-    } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
-        float_copy(saved, buf + 512, 448);
-        float_copy(saved + 448, buf + 7*128 + 64, 64);
-    } else { // LONG_STOP or ONLY_LONG
-        float_copy(saved, buf + 512, 512);
-    }
-}
-
-static void apply_ltp_mips(AACDecContext *ac, SingleChannelElement *sce)
-{
-    const LongTermPrediction *ltp = &sce->ics.ltp;
-    const uint16_t *offsets = sce->ics.swb_offset;
-    int i, sfb;
-    int j, k;
-
-    if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
-        float *predTime = sce->ret;
-        float *predFreq = ac->buf_mdct;
-        float *p_predTime;
-        int16_t num_samples = 2048;
-
-        if (ltp->lag < 1024)
-            num_samples = ltp->lag + 1024;
-        j = (2048 - num_samples) >> 2;
-        k = (2048 - num_samples) & 3;
-        p_predTime = &predTime[num_samples];
-
-        for (i = 0; i < num_samples; i++)
-            predTime[i] = sce->ltp_state[i + 2048 - ltp->lag] * ltp->coef;
-        for (i = 0; i < j; i++) {
-
-            /* loop unrolled 4 times */
-            __asm__ volatile (
-                "sw      $0,              0(%[p_predTime])        \n\t"
-                "sw      $0,              4(%[p_predTime])        \n\t"
-                "sw      $0,              8(%[p_predTime])        \n\t"
-                "sw      $0,              12(%[p_predTime])       \n\t"
-                PTR_ADDIU "%[p_predTime], %[p_predTime],     16   \n\t"
-
-                : [p_predTime]"+r"(p_predTime)
-                :
-                : "memory"
-            );
-        }
-        for (i = 0; i < k; i++) {
-
-            __asm__ volatile (
-                "sw      $0,              0(%[p_predTime])        \n\t"
-                PTR_ADDIU "%[p_predTime], %[p_predTime],     4    \n\t"
-
-                : [p_predTime]"+r"(p_predTime)
-                :
-                : "memory"
-            );
-        }
-
-        ac->windowing_and_mdct_ltp(ac, predFreq, predTime, &sce->ics);
-
-        if (sce->tns.present)
-            ac->apply_tns(predFreq, &sce->tns, &sce->ics, 0);
-
-        for (sfb = 0; sfb < FFMIN(sce->ics.max_sfb, MAX_LTP_LONG_SFB); sfb++)
-            if (ltp->used[sfb])
-                for (i = offsets[sfb]; i < offsets[sfb + 1]; i++)
-                    sce->coeffs[i] += predFreq[i];
-    }
-}
-
-static av_always_inline void fmul_and_reverse(float *dst, const float *src0, const float *src1, int count)
-{
-    /* Multiply 'count' floats in src0 by src1 and store the results in dst in reverse */
-    /* This should be equivalent to a normal fmul, followed by reversing dst */
-
-    // count must be a multiple of 4
-    av_assert2(count % 4 == 0);
-
-    // move src0 and src1 to the last element of their arrays
-    src0 += count - 1;
-    src1 += count - 1;
-
-    for (; count > 0; count -= 4){
-        float temp[12];
-
-        /* loop unrolled 4 times */
-        __asm__ volatile (
-            "lwc1    %[temp0],    0(%[ptr2])                \n\t"
-            "lwc1    %[temp1],    -4(%[ptr2])               \n\t"
-            "lwc1    %[temp2],    -8(%[ptr2])               \n\t"
-            "lwc1    %[temp3],    -12(%[ptr2])              \n\t"
-            "lwc1    %[temp4],    0(%[ptr3])                \n\t"
-            "lwc1    %[temp5],    -4(%[ptr3])               \n\t"
-            "lwc1    %[temp6],    -8(%[ptr3])               \n\t"
-            "lwc1    %[temp7],    -12(%[ptr3])              \n\t"
-            "mul.s   %[temp8],    %[temp0],     %[temp4]    \n\t"
-            "mul.s   %[temp9],    %[temp1],     %[temp5]    \n\t"
-            "mul.s   %[temp10],   %[temp2],     %[temp6]    \n\t"
-            "mul.s   %[temp11],   %[temp3],     %[temp7]    \n\t"
-            "swc1    %[temp8],    0(%[ptr1])                \n\t"
-            "swc1    %[temp9],    4(%[ptr1])                \n\t"
-            "swc1    %[temp10],   8(%[ptr1])                \n\t"
-            "swc1    %[temp11],   12(%[ptr1])               \n\t"
-            PTR_ADDIU "%[ptr1],   %[ptr1],      16          \n\t"
-            PTR_ADDIU "%[ptr2],   %[ptr2],      -16         \n\t"
-            PTR_ADDIU "%[ptr3],   %[ptr3],      -16         \n\t"
-
-            : [temp0]"=&f"(temp[0]), [temp1]"=&f"(temp[1]),
-              [temp2]"=&f"(temp[2]), [temp3]"=&f"(temp[3]),
-              [temp4]"=&f"(temp[4]), [temp5]"=&f"(temp[5]),
-              [temp6]"=&f"(temp[6]), [temp7]"=&f"(temp[7]),
-              [temp8]"=&f"(temp[8]), [temp9]"=&f"(temp[9]),
-              [temp10]"=&f"(temp[10]), [temp11]"=&f"(temp[11]),
-              [ptr1]"+r"(dst), [ptr2]"+r"(src0), [ptr3]"+r"(src1)
-            :
-            : "memory"
-        );
-    }
-}
-
-static void update_ltp_mips(AACDecContext *ac, SingleChannelElement *sce)
-{
-    IndividualChannelStream *ics = &sce->ics;
-    float *saved     = sce->saved;
-    float *saved_ltp = sce->coeffs;
-    const float *lwindow = ics->use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
-    const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
-    uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
-
-    if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
-        float *p_saved_ltp = saved_ltp + 576;
-        float *loop_end1 = p_saved_ltp + 448;
-
-        float_copy(saved_ltp, saved, 512);
-
-        /* loop unrolled 8 times */
-        __asm__ volatile (
-        "1:                                                   \n\t"
-            "sw     $0,              0(%[p_saved_ltp])        \n\t"
-            "sw     $0,              4(%[p_saved_ltp])        \n\t"
-            "sw     $0,              8(%[p_saved_ltp])        \n\t"
-            "sw     $0,              12(%[p_saved_ltp])       \n\t"
-            "sw     $0,              16(%[p_saved_ltp])       \n\t"
-            "sw     $0,              20(%[p_saved_ltp])       \n\t"
-            "sw     $0,              24(%[p_saved_ltp])       \n\t"
-            "sw     $0,              28(%[p_saved_ltp])       \n\t"
-            PTR_ADDIU "%[p_saved_ltp],%[p_saved_ltp],    32   \n\t"
-            "bne    %[p_saved_ltp],  %[loop_end1],       1b   \n\t"
-
-            : [p_saved_ltp]"+r"(p_saved_ltp)
-            : [loop_end1]"r"(loop_end1)
-            : "memory"
-        );
-
-        ac->fdsp->vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960,     &swindow[64],      64);
-        fmul_and_reverse(saved_ltp + 512, ac->buf_mdct + 960, swindow, 64);
-    } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
-        float *buff0 = saved;
-        float *buff1 = saved_ltp;
-        float *loop_end = saved + 448;
-
-        /* loop unrolled 8 times */
-        __asm__ volatile (
-            ".set push                                  \n\t"
-            ".set noreorder                             \n\t"
-        "1:                                             \n\t"
-            "lw      %[temp0],    0(%[src])             \n\t"
-            "lw      %[temp1],    4(%[src])             \n\t"
-            "lw      %[temp2],    8(%[src])             \n\t"
-            "lw      %[temp3],    12(%[src])            \n\t"
-            "lw      %[temp4],    16(%[src])            \n\t"
-            "lw      %[temp5],    20(%[src])            \n\t"
-            "lw      %[temp6],    24(%[src])            \n\t"
-            "lw      %[temp7],    28(%[src])            \n\t"
-            PTR_ADDIU "%[src],    %[src],         32    \n\t"
-            "sw      %[temp0],    0(%[dst])             \n\t"
-            "sw      %[temp1],    4(%[dst])             \n\t"
-            "sw      %[temp2],    8(%[dst])             \n\t"
-            "sw      %[temp3],    12(%[dst])            \n\t"
-            "sw      %[temp4],    16(%[dst])            \n\t"
-            "sw      %[temp5],    20(%[dst])            \n\t"
-            "sw      %[temp6],    24(%[dst])            \n\t"
-            "sw      %[temp7],    28(%[dst])            \n\t"
-            "sw      $0,          2304(%[dst])          \n\t"
-            "sw      $0,          2308(%[dst])          \n\t"
-            "sw      $0,          2312(%[dst])          \n\t"
-            "sw      $0,          2316(%[dst])          \n\t"
-            "sw      $0,          2320(%[dst])          \n\t"
-            "sw      $0,          2324(%[dst])          \n\t"
-            "sw      $0,          2328(%[dst])          \n\t"
-            "sw      $0,          2332(%[dst])          \n\t"
-            "bne     %[src],      %[loop_end],    1b    \n\t"
-            PTR_ADDIU "%[dst],    %[dst],         32    \n\t"
-            ".set pop                                   \n\t"
-
-            : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
-              [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
-              [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
-              [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
-              [src]"+r"(buff0), [dst]"+r"(buff1)
-            : [loop_end]"r"(loop_end)
-            : "memory"
-        );
-        ac->fdsp->vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960,     &swindow[64],      64);
-        fmul_and_reverse(saved_ltp + 512, ac->buf_mdct + 960, swindow, 64);
-    } else { // LONG_STOP or ONLY_LONG
-        ac->fdsp->vector_fmul_reverse(saved_ltp,       ac->buf_mdct + 512,     &lwindow[512],     512);
-        fmul_and_reverse(saved_ltp + 512, ac->buf_mdct + 512, lwindow, 512);
-    }
-
-    float_copy(sce->ltp_state, sce->ltp_state + 1024, 1024);
-    float_copy(sce->ltp_state + 1024, sce->ret, 1024);
-    float_copy(sce->ltp_state + 2048, saved_ltp, 1024);
-}
-#endif /* HAVE_MIPSFPU */
-#endif /* HAVE_INLINE_ASM */
-
-void ff_aacdec_init_mips(AACDecContext *c)
-{
-#if HAVE_INLINE_ASM
-#if HAVE_MIPSFPU
-    c->imdct_and_windowing         = imdct_and_windowing_mips;
-    c->apply_ltp                   = apply_ltp_mips;
-    c->update_ltp                  = update_ltp_mips;
-#endif /* HAVE_MIPSFPU */
-#endif /* HAVE_INLINE_ASM */
-}
diff --git a/libavcodec/mips/aacdec_mips.h b/libavcodec/mips/aacdec_mips.h
deleted file mode 100644
index 71581986dc..0000000000
--- a/libavcodec/mips/aacdec_mips.h
+++ /dev/null
@@ -1,253 +0,0 @@
-/*
- * Copyright (c) 2012
- *      MIPS Technologies, Inc., California.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * Authors:  Darko Laus      (darko@mips.com)
- *           Djordje Pesut   (djordje@mips.com)
- *           Mirjana Vulin   (mvulin@mips.com)
- *
- * AAC Spectral Band Replication decoding functions optimized for MIPS
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-/**
- * @file
- * Reference: libavcodec/aacdec.c
- */
-
-#ifndef AVCODEC_MIPS_AACDEC_MIPS_H
-#define AVCODEC_MIPS_AACDEC_MIPS_H
-
-#include "libavcodec/aacdec.h"
-#include "libavutil/mips/asmdefs.h"
-
-#if HAVE_INLINE_ASM && HAVE_MIPSFPU
-#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
-static inline float *VMUL2_mips(float *dst, const float *v, unsigned idx,
-                           const float *scale)
-{
-    float temp0, temp1, temp2;
-    int temp3, temp4;
-    float *ret;
-
-    __asm__ volatile(
-        "andi    %[temp3],  %[idx],       0x0F         \n\t"
-        "andi    %[temp4],  %[idx],       0xF0         \n\t"
-        "sll     %[temp3],  %[temp3],     2            \n\t"
-        "srl     %[temp4],  %[temp4],     2            \n\t"
-        "lwc1    %[temp2],  0(%[scale])                \n\t"
-        "lwxc1   %[temp0],  %[temp3](%[v])             \n\t"
-        "lwxc1   %[temp1],  %[temp4](%[v])             \n\t"
-        "mul.s   %[temp0],  %[temp0],     %[temp2]     \n\t"
-        "mul.s   %[temp1],  %[temp1],     %[temp2]     \n\t"
-        PTR_ADDIU "%[ret],  %[dst],       8            \n\t"
-        "swc1    %[temp0],  0(%[dst])                  \n\t"
-        "swc1    %[temp1],  4(%[dst])                  \n\t"
-
-        : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1),
-          [temp2]"=&f"(temp2), [temp3]"=&r"(temp3),
-          [temp4]"=&r"(temp4), [ret]"=&r"(ret)
-        : [idx]"r"(idx), [scale]"r"(scale), [v]"r"(v),
-          [dst]"r"(dst)
-        : "memory"
-    );
-    return ret;
-}
-
-static inline float *VMUL4_mips(float *dst, const float *v, unsigned idx,
-                           const float *scale)
-{
-    int temp0, temp1, temp2, temp3;
-    float temp4, temp5, temp6, temp7, temp8;
-    float *ret;
-
-    __asm__ volatile(
-        "andi    %[temp0],  %[idx],       0x03        \n\t"
-        "andi    %[temp1],  %[idx],       0x0C        \n\t"
-        "andi    %[temp2],  %[idx],       0x30        \n\t"
-        "andi    %[temp3],  %[idx],       0xC0        \n\t"
-        "sll     %[temp0],  %[temp0],     2           \n\t"
-        "srl     %[temp2],  %[temp2],     2           \n\t"
-        "srl     %[temp3],  %[temp3],     4           \n\t"
-        "lwc1    %[temp4],  0(%[scale])               \n\t"
-        "lwxc1   %[temp5],  %[temp0](%[v])            \n\t"
-        "lwxc1   %[temp6],  %[temp1](%[v])            \n\t"
-        "lwxc1   %[temp7],  %[temp2](%[v])            \n\t"
-        "lwxc1   %[temp8],  %[temp3](%[v])            \n\t"
-        "mul.s   %[temp5],  %[temp5],     %[temp4]    \n\t"
-        "mul.s   %[temp6],  %[temp6],     %[temp4]    \n\t"
-        "mul.s   %[temp7],  %[temp7],     %[temp4]    \n\t"
-        "mul.s   %[temp8],  %[temp8],     %[temp4]    \n\t"
-        PTR_ADDIU "%[ret],  %[dst],       16          \n\t"
-        "swc1    %[temp5],  0(%[dst])                 \n\t"
-        "swc1    %[temp6],  4(%[dst])                 \n\t"
-        "swc1    %[temp7],  8(%[dst])                 \n\t"
-        "swc1    %[temp8],  12(%[dst])                \n\t"
-
-        : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
-          [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
-          [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
-          [temp6]"=&f"(temp6), [temp7]"=&f"(temp7),
-          [temp8]"=&f"(temp8), [ret]"=&r"(ret)
-        : [idx]"r"(idx), [scale]"r"(scale), [v]"r"(v),
-          [dst]"r"(dst)
-        : "memory"
-    );
-    return ret;
-}
-
-static inline float *VMUL2S_mips(float *dst, const float *v, unsigned idx,
-                            unsigned sign, const float *scale)
-{
-    int temp0, temp1, temp2, temp3, temp4, temp5;
-    float temp6, temp7, temp8, temp9;
-    float *ret;
-
-    __asm__ volatile(
-        "andi    %[temp0],  %[idx],       0x0F       \n\t"
-        "andi    %[temp1],  %[idx],       0xF0       \n\t"
-        "lw      %[temp4],  0(%[scale])              \n\t"
-        "srl     %[temp2],  %[sign],      1          \n\t"
-        "sll     %[temp3],  %[sign],      31         \n\t"
-        "sll     %[temp2],  %[temp2],     31         \n\t"
-        "sll     %[temp0],  %[temp0],     2          \n\t"
-        "srl     %[temp1],  %[temp1],     2          \n\t"
-        "lwxc1   %[temp8],  %[temp0](%[v])           \n\t"
-        "lwxc1   %[temp9],  %[temp1](%[v])           \n\t"
-        "xor     %[temp5],  %[temp4],     %[temp2]   \n\t"
-        "xor     %[temp4],  %[temp4],     %[temp3]   \n\t"
-        "mtc1    %[temp5],  %[temp6]                 \n\t"
-        "mtc1    %[temp4],  %[temp7]                 \n\t"
-        "mul.s   %[temp8],  %[temp8],     %[temp6]   \n\t"
-        "mul.s   %[temp9],  %[temp9],     %[temp7]   \n\t"
-        PTR_ADDIU "%[ret],  %[dst],       8          \n\t"
-        "swc1    %[temp8],  0(%[dst])                \n\t"
-        "swc1    %[temp9],  4(%[dst])                \n\t"
-
-        : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
-          [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
-          [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
-          [temp6]"=&f"(temp6), [temp7]"=&f"(temp7),
-          [temp8]"=&f"(temp8), [temp9]"=&f"(temp9),
-          [ret]"=&r"(ret)
-        : [idx]"r"(idx), [scale]"r"(scale), [v]"r"(v),
-          [dst]"r"(dst), [sign]"r"(sign)
-        : "memory"
-    );
-    return ret;
-}
-
-static inline float *VMUL4S_mips(float *dst, const float *v, unsigned idx,
-                            unsigned sign, const float *scale)
-{
-    int temp0, temp1, temp2, temp3, temp4;
-    float temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17;
-    float *ret;
-    unsigned int mask = 1U << 31;
-
-    __asm__ volatile(
-        "lw      %[temp0],   0(%[scale])               \n\t"
-        "andi    %[temp1],  %[idx],       0x03         \n\t"
-        "andi    %[temp2],  %[idx],       0x0C         \n\t"
-        "andi    %[temp3],  %[idx],       0x30         \n\t"
-        "andi    %[temp4],  %[idx],       0xC0         \n\t"
-        "sll     %[temp1],  %[temp1],     2            \n\t"
-        "srl     %[temp3],  %[temp3],     2            \n\t"
-        "srl     %[temp4],  %[temp4],     4            \n\t"
-        "lwxc1   %[temp10],  %[temp1](%[v])            \n\t"
-        "lwxc1   %[temp11],  %[temp2](%[v])            \n\t"
-        "lwxc1   %[temp12],  %[temp3](%[v])            \n\t"
-        "lwxc1   %[temp13],  %[temp4](%[v])            \n\t"
-        "and     %[temp1],   %[sign],      %[mask]     \n\t"
-        "srl     %[temp2],   %[idx],       12          \n\t"
-        "srl     %[temp3],   %[idx],       13          \n\t"
-        "srl     %[temp4],   %[idx],       14          \n\t"
-        "andi    %[temp2],   %[temp2],     1           \n\t"
-        "andi    %[temp3],   %[temp3],     1           \n\t"
-        "andi    %[temp4],   %[temp4],     1           \n\t"
-        "sllv    %[sign],    %[sign],      %[temp2]    \n\t"
-        "xor     %[temp1],   %[temp0],     %[temp1]    \n\t"
-        "and     %[temp2],   %[sign],      %[mask]     \n\t"
-        "mtc1    %[temp1],   %[temp14]                 \n\t"
-        "xor     %[temp2],   %[temp0],     %[temp2]    \n\t"
-        "sllv    %[sign],    %[sign],      %[temp3]    \n\t"
-        "mtc1    %[temp2],   %[temp15]                 \n\t"
-        "and     %[temp3],   %[sign],      %[mask]     \n\t"
-        "sllv    %[sign],    %[sign],      %[temp4]    \n\t"
-        "xor     %[temp3],   %[temp0],     %[temp3]    \n\t"
-        "and     %[temp4],   %[sign],      %[mask]     \n\t"
-        "mtc1    %[temp3],   %[temp16]                 \n\t"
-        "xor     %[temp4],   %[temp0],     %[temp4]    \n\t"
-        "mtc1    %[temp4],   %[temp17]                 \n\t"
-        "mul.s   %[temp10],  %[temp10],    %[temp14]   \n\t"
-        "mul.s   %[temp11],  %[temp11],    %[temp15]   \n\t"
-        "mul.s   %[temp12],  %[temp12],    %[temp16]   \n\t"
-        "mul.s   %[temp13],  %[temp13],    %[temp17]   \n\t"
-        PTR_ADDIU "%[ret],   %[dst],       16          \n\t"
-        "swc1    %[temp10],  0(%[dst])                 \n\t"
-        "swc1    %[temp11],  4(%[dst])                 \n\t"
-        "swc1    %[temp12],  8(%[dst])                 \n\t"
-        "swc1    %[temp13],  12(%[dst])                \n\t"
-
-        : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
-          [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
-          [temp4]"=&r"(temp4), [temp10]"=&f"(temp10),
-          [temp11]"=&f"(temp11), [temp12]"=&f"(temp12),
-          [temp13]"=&f"(temp13), [temp14]"=&f"(temp14),
-          [temp15]"=&f"(temp15), [temp16]"=&f"(temp16),
-          [temp17]"=&f"(temp17), [ret]"=&r"(ret),
-          [sign]"+r"(sign)
-        : [idx]"r"(idx), [scale]"r"(scale), [v]"r"(v),
-          [dst]"r"(dst), [mask]"r"(mask)
-        : "memory"
-    );
-    return ret;
-}
-
-#define VMUL2 VMUL2_mips
-#define VMUL4 VMUL4_mips
-#define VMUL2S VMUL2S_mips
-#define VMUL4S VMUL4S_mips
-#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
-#endif /* HAVE_INLINE_ASM && HAVE_MIPSFPU */
-
-#endif /* AVCODEC_MIPS_AACDEC_MIPS_H */
diff --git a/libavcodec/mips/aacpsdsp_mips.c b/libavcodec/mips/aacpsdsp_mips.c
deleted file mode 100644
index f63541330d..0000000000
--- a/libavcodec/mips/aacpsdsp_mips.c
+++ /dev/null
@@ -1,465 +0,0 @@
-/*
- * Copyright (c) 2012
- *      MIPS Technologies, Inc., California.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * Authors:  Darko Laus      (darko@mips.com)
- *           Djordje Pesut   (djordje@mips.com)
- *           Mirjana Vulin   (mvulin@mips.com)
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-/**
- * @file
- * Reference: libavcodec/aacpsdsp.c
- */
-
-#include "config.h"
-#include "libavcodec/aacpsdsp.h"
-#include "libavutil/mips/asmdefs.h"
-
-#if HAVE_INLINE_ASM
-#if HAVE_MIPSFPU
-static void ps_hybrid_analysis_ileave_mips(float (*out)[32][2], float L[2][38][64],
-                                        int i, int len)
-{
-    int temp0, temp1, temp2, temp3;
-    int temp4, temp5, temp6, temp7;
-    float *out1=&out[i][0][0];
-    float *L1=&L[0][0][i];
-    float *j=out1+ len*2;
-
-    for (; i < 64; i++) {
-
-        /* loop unrolled 8 times */
-        __asm__ volatile (
-        "1:                                          \n\t"
-            "lw      %[temp0],   0(%[L1])            \n\t"
-            "lw      %[temp1],   9728(%[L1])         \n\t"
-            "lw      %[temp2],   256(%[L1])          \n\t"
-            "lw      %[temp3],   9984(%[L1])         \n\t"
-            "lw      %[temp4],   512(%[L1])          \n\t"
-            "lw      %[temp5],   10240(%[L1])        \n\t"
-            "lw      %[temp6],   768(%[L1])          \n\t"
-            "lw      %[temp7],   10496(%[L1])        \n\t"
-            "sw      %[temp0],   0(%[out1])          \n\t"
-            "sw      %[temp1],   4(%[out1])          \n\t"
-            "sw      %[temp2],   8(%[out1])          \n\t"
-            "sw      %[temp3],   12(%[out1])         \n\t"
-            "sw      %[temp4],   16(%[out1])         \n\t"
-            "sw      %[temp5],   20(%[out1])         \n\t"
-            "sw      %[temp6],   24(%[out1])         \n\t"
-            "sw      %[temp7],   28(%[out1])         \n\t"
-            PTR_ADDIU "%[out1],  %[out1],      32    \n\t"
-            PTR_ADDIU "%[L1],    %[L1],        1024  \n\t"
-            "bne     %[out1],    %[j],         1b    \n\t"
-
-            : [out1]"+r"(out1), [L1]"+r"(L1), [j]"+r"(j),
-              [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
-              [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
-              [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
-              [temp6]"=&r"(temp6), [temp7]"=&r"(temp7)
-            : [len]"r"(len)
-            : "memory"
-        );
-        out1-=(len<<1)-64;
-        L1-=(len<<6)-1;
-        j+=len*2;
-    }
-}
-
-static void ps_hybrid_synthesis_deint_mips(float out[2][38][64],
-                                        float (*in)[32][2],
-                                        int i, int len)
-{
-    int n;
-    int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
-    float *out1 = (float*)out + i;
-    float *out2 = (float*)out + 2432 + i;
-    float *in1 = (float*)in + 64 * i;
-    float *in2 = (float*)in + 64 * i + 1;
-
-    for (; i < 64; i++) {
-        for (n = 0; n < 7; n++) {
-
-            /* loop unrolled 8 times */
-            __asm__ volatile (
-                 "lw      %[temp0],   0(%[in1])               \n\t"
-                 "lw      %[temp1],   0(%[in2])               \n\t"
-                 "lw      %[temp2],   8(%[in1])               \n\t"
-                 "lw      %[temp3],   8(%[in2])               \n\t"
-                 "lw      %[temp4],   16(%[in1])              \n\t"
-                 "lw      %[temp5],   16(%[in2])              \n\t"
-                 "lw      %[temp6],   24(%[in1])              \n\t"
-                 "lw      %[temp7],   24(%[in2])              \n\t"
-                 PTR_ADDIU "%[out1],  %[out1],         1024   \n\t"
-                 PTR_ADDIU "%[out2],  %[out2],         1024   \n\t"
-                 PTR_ADDIU "%[in1],   %[in1],          32     \n\t"
-                 PTR_ADDIU "%[in2],   %[in2],          32     \n\t"
-                 "sw      %[temp0],   -1024(%[out1])          \n\t"
-                 "sw      %[temp1],   -1024(%[out2])          \n\t"
-                 "sw      %[temp2],   -768(%[out1])           \n\t"
-                 "sw      %[temp3],   -768(%[out2])           \n\t"
-                 "sw      %[temp4],   -512(%[out1])           \n\t"
-                 "sw      %[temp5],   -512(%[out2])           \n\t"
-                 "sw      %[temp6],   -256(%[out1])           \n\t"
-                 "sw      %[temp7],   -256(%[out2])           \n\t"
-
-                 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
-                   [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
-                   [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
-                   [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
-                   [out1]"+r"(out1), [out2]"+r"(out2),
-                   [in1]"+r"(in1), [in2]"+r"(in2)
-                 :
-                 : "memory"
-            );
-        }
-        /* loop unrolled 8 times */
-        __asm__ volatile (
-            "lw      %[temp0],   0(%[in1])               \n\t"
-            "lw      %[temp1],   0(%[in2])               \n\t"
-            "lw      %[temp2],   8(%[in1])               \n\t"
-            "lw      %[temp3],   8(%[in2])               \n\t"
-            "lw      %[temp4],   16(%[in1])              \n\t"
-            "lw      %[temp5],   16(%[in2])              \n\t"
-            "lw      %[temp6],   24(%[in1])              \n\t"
-            "lw      %[temp7],   24(%[in2])              \n\t"
-            PTR_ADDIU "%[out1],  %[out1],        -7164   \n\t"
-            PTR_ADDIU "%[out2],  %[out2],        -7164   \n\t"
-            PTR_ADDIU "%[in1],   %[in1],         32      \n\t"
-            PTR_ADDIU "%[in2],   %[in2],         32      \n\t"
-            "sw      %[temp0],   7164(%[out1])           \n\t"
-            "sw      %[temp1],   7164(%[out2])           \n\t"
-            "sw      %[temp2],   7420(%[out1])           \n\t"
-            "sw      %[temp3],   7420(%[out2])           \n\t"
-            "sw      %[temp4],   7676(%[out1])           \n\t"
-            "sw      %[temp5],   7676(%[out2])           \n\t"
-            "sw      %[temp6],   7932(%[out1])           \n\t"
-            "sw      %[temp7],   7932(%[out2])           \n\t"
-
-            : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
-              [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
-              [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
-              [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
-              [out1]"+r"(out1), [out2]"+r"(out2),
-              [in1]"+r"(in1), [in2]"+r"(in2)
-            :
-            : "memory"
-        );
-    }
-}
-
-#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
-static void ps_add_squares_mips(float *dst, const float (*src)[2], int n)
-{
-    int i;
-    float temp0, temp1, temp2, temp3, temp4, temp5;
-    float temp6, temp7, temp8, temp9, temp10, temp11;
-    float *src0 = (float*)&src[0][0];
-    float *dst0 = &dst[0];
-
-    for (i = 0; i < 8; i++) {
-        /* loop unrolled 4 times */
-        __asm__ volatile (
-            "lwc1     %[temp0],    0(%[src0])                          \n\t"
-            "lwc1     %[temp1],    4(%[src0])                          \n\t"
-            "lwc1     %[temp2],    8(%[src0])                          \n\t"
-            "lwc1     %[temp3],    12(%[src0])                         \n\t"
-            "lwc1     %[temp4],    16(%[src0])                         \n\t"
-            "lwc1     %[temp5],    20(%[src0])                         \n\t"
-            "lwc1     %[temp6],    24(%[src0])                         \n\t"
-            "lwc1     %[temp7],    28(%[src0])                         \n\t"
-            "lwc1     %[temp8],    0(%[dst0])                          \n\t"
-            "lwc1     %[temp9],    4(%[dst0])                          \n\t"
-            "lwc1     %[temp10],   8(%[dst0])                          \n\t"
-            "lwc1     %[temp11],   12(%[dst0])                         \n\t"
-            "mul.s    %[temp1],    %[temp1],    %[temp1]               \n\t"
-            "mul.s    %[temp3],    %[temp3],    %[temp3]               \n\t"
-            "mul.s    %[temp5],    %[temp5],    %[temp5]               \n\t"
-            "mul.s    %[temp7],    %[temp7],    %[temp7]               \n\t"
-            "madd.s   %[temp0],    %[temp1],    %[temp0],   %[temp0]   \n\t"
-            "madd.s   %[temp2],    %[temp3],    %[temp2],   %[temp2]   \n\t"
-            "madd.s   %[temp4],    %[temp5],    %[temp4],   %[temp4]   \n\t"
-            "madd.s   %[temp6],    %[temp7],    %[temp6],   %[temp6]   \n\t"
-            "add.s    %[temp0],    %[temp8],    %[temp0]               \n\t"
-            "add.s    %[temp2],    %[temp9],    %[temp2]               \n\t"
-            "add.s    %[temp4],    %[temp10],   %[temp4]               \n\t"
-            "add.s    %[temp6],    %[temp11],   %[temp6]               \n\t"
-            "swc1     %[temp0],    0(%[dst0])                          \n\t"
-            "swc1     %[temp2],    4(%[dst0])                          \n\t"
-            "swc1     %[temp4],    8(%[dst0])                          \n\t"
-            "swc1     %[temp6],    12(%[dst0])                         \n\t"
-            PTR_ADDIU "%[dst0],    %[dst0],     16                     \n\t"
-            PTR_ADDIU "%[src0],    %[src0],     32                     \n\t"
-
-            : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
-              [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
-              [temp6]"=&f"(temp6), [temp7]"=&f"(temp7), [temp8]"=&f"(temp8),
-              [temp9]"=&f"(temp9), [dst0]"+r"(dst0), [src0]"+r"(src0),
-              [temp10]"=&f"(temp10), [temp11]"=&f"(temp11)
-            :
-            : "memory"
-        );
-   }
-}
-
-static void ps_mul_pair_single_mips(float (*dst)[2], float (*src0)[2], float *src1,
-                                 int n)
-{
-    float temp0, temp1, temp2;
-    float *p_d, *p_s0, *p_s1, *end;
-    p_d = &dst[0][0];
-    p_s0 = &src0[0][0];
-    p_s1 = &src1[0];
-    end = p_s1 + n;
-
-    __asm__ volatile(
-        ".set push                                      \n\t"
-        ".set noreorder                                 \n\t"
-        "1:                                             \n\t"
-        "lwc1     %[temp2],   0(%[p_s1])                \n\t"
-        "lwc1     %[temp0],   0(%[p_s0])                \n\t"
-        "lwc1     %[temp1],   4(%[p_s0])                \n\t"
-        PTR_ADDIU "%[p_d],    %[p_d],       8           \n\t"
-        "mul.s    %[temp0],   %[temp0],     %[temp2]    \n\t"
-        "mul.s    %[temp1],   %[temp1],     %[temp2]    \n\t"
-        PTR_ADDIU "%[p_s0],   %[p_s0],      8           \n\t"
-        "swc1     %[temp0],   -8(%[p_d])                \n\t"
-        "swc1     %[temp1],   -4(%[p_d])                \n\t"
-        "bne      %[p_s1],    %[end],       1b          \n\t"
-        PTR_ADDIU "%[p_s1],   %[p_s1],      4           \n\t"
-        ".set pop                                       \n\t"
-
-        : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1),
-          [temp2]"=&f"(temp2), [p_d]"+r"(p_d),
-          [p_s0]"+r"(p_s0), [p_s1]"+r"(p_s1)
-        : [end]"r"(end)
-        : "memory"
-    );
-}
-
-static void ps_decorrelate_mips(float (*out)[2], float (*delay)[2],
-                             float (*ap_delay)[PS_QMF_TIME_SLOTS + PS_MAX_AP_DELAY][2],
-                             const float phi_fract[2], const float (*Q_fract)[2],
-                             const float *transient_gain,
-                             float g_decay_slope,
-                             int len)
-{
-    float *p_delay = &delay[0][0];
-    float *p_out = &out[0][0];
-    float *p_ap_delay = &ap_delay[0][0][0];
-    const float *p_t_gain = transient_gain;
-    const float *p_Q_fract = &Q_fract[0][0];
-    float ag0, ag1, ag2;
-    float phi_fract0 = phi_fract[0];
-    float phi_fract1 = phi_fract[1];
-    float temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
-    float f1, f2, f3;
-
-    float *p_delay_end = (p_delay + (len << 1));
-
-    /* merged 2 loops */
-    f1 = 0.65143905753106;
-    f2 = 0.56471812200776;
-    f3 = 0.48954165955695;
-    __asm__ volatile(
-        ".set    push                                                    \n\t"
-        ".set    noreorder                                               \n\t"
-        "mul.s   %[ag0],        %[ag0],        %[g_decay_slope]          \n\t"
-        "mul.s   %[ag1],        %[ag1],        %[g_decay_slope]          \n\t"
-        "mul.s   %[ag2],        %[ag2],        %[g_decay_slope]          \n\t"
-    "1:                                                                  \n\t"
-        "lwc1    %[temp0],      0(%[p_delay])                            \n\t"
-        "lwc1    %[temp1],      4(%[p_delay])                            \n\t"
-        "lwc1    %[temp4],      16(%[p_ap_delay])                        \n\t"
-        "lwc1    %[temp5],      20(%[p_ap_delay])                        \n\t"
-        "mul.s   %[temp3],      %[temp0],      %[phi_fract1]             \n\t"
-        "lwc1    %[temp6],      0(%[p_Q_fract])                          \n\t"
-        "mul.s   %[temp2],      %[temp1],      %[phi_fract1]             \n\t"
-        "lwc1    %[temp7],      4(%[p_Q_fract])                          \n\t"
-        "madd.s  %[temp3],      %[temp3],      %[temp1], %[phi_fract0]   \n\t"
-        "msub.s  %[temp2],      %[temp2],      %[temp0], %[phi_fract0]   \n\t"
-        "mul.s   %[temp8],      %[temp5],      %[temp7]                  \n\t"
-        "mul.s   %[temp9],      %[temp4],      %[temp7]                  \n\t"
-        "lwc1    %[temp7],      12(%[p_Q_fract])                         \n\t"
-        "mul.s   %[temp0],      %[ag0],        %[temp2]                  \n\t"
-        "mul.s   %[temp1],      %[ag0],        %[temp3]                  \n\t"
-        "msub.s  %[temp8],      %[temp8],      %[temp4], %[temp6]        \n\t"
-        "lwc1    %[temp4],      304(%[p_ap_delay])                       \n\t"
-        "madd.s  %[temp9],      %[temp9],      %[temp5], %[temp6]        \n\t"
-        "lwc1    %[temp5],      308(%[p_ap_delay])                       \n\t"
-        "sub.s   %[temp0],      %[temp8],      %[temp0]                  \n\t"
-        "sub.s   %[temp1],      %[temp9],      %[temp1]                  \n\t"
-        "madd.s  %[temp2],      %[temp2],      %[ag0],   %[temp0]        \n\t"
-        "lwc1    %[temp6],      8(%[p_Q_fract])                          \n\t"
-        "madd.s  %[temp3],      %[temp3],      %[ag0],   %[temp1]        \n\t"
-        "mul.s   %[temp8],      %[temp5],      %[temp7]                  \n\t"
-        "mul.s   %[temp9],      %[temp4],      %[temp7]                  \n\t"
-        "lwc1    %[temp7],      20(%[p_Q_fract])                         \n\t"
-        "msub.s  %[temp8],      %[temp8],      %[temp4], %[temp6]        \n\t"
-        "swc1    %[temp2],      40(%[p_ap_delay])                        \n\t"
-        "mul.s   %[temp2],      %[ag1],        %[temp0]                  \n\t"
-        "swc1    %[temp3],      44(%[p_ap_delay])                        \n\t"
-        "mul.s   %[temp3],      %[ag1],        %[temp1]                  \n\t"
-        "lwc1    %[temp4],      592(%[p_ap_delay])                       \n\t"
-        "madd.s  %[temp9],      %[temp9],      %[temp5], %[temp6]        \n\t"
-        "lwc1    %[temp5],      596(%[p_ap_delay])                       \n\t"
-        "sub.s   %[temp2],      %[temp8],      %[temp2]                  \n\t"
-        "sub.s   %[temp3],      %[temp9],      %[temp3]                  \n\t"
-        "lwc1    %[temp6],      16(%[p_Q_fract])                         \n\t"
-        "madd.s  %[temp0],      %[temp0],      %[ag1],   %[temp2]        \n\t"
-        "madd.s  %[temp1],      %[temp1],      %[ag1],   %[temp3]        \n\t"
-        "mul.s   %[temp8],      %[temp5],      %[temp7]                  \n\t"
-        "mul.s   %[temp9],      %[temp4],      %[temp7]                  \n\t"
-        "msub.s  %[temp8],      %[temp8],      %[temp4], %[temp6]        \n\t"
-        "madd.s  %[temp9],      %[temp9],      %[temp5], %[temp6]        \n\t"
-        "swc1    %[temp0],      336(%[p_ap_delay])                       \n\t"
-        "mul.s   %[temp0],      %[ag2],        %[temp2]                  \n\t"
-        "swc1    %[temp1],      340(%[p_ap_delay])                       \n\t"
-        "mul.s   %[temp1],      %[ag2],        %[temp3]                  \n\t"
-        "lwc1    %[temp4],      0(%[p_t_gain])                           \n\t"
-        "sub.s   %[temp0],      %[temp8],      %[temp0]                  \n\t"
-        PTR_ADDIU "%[p_ap_delay], %[p_ap_delay], 8                       \n\t"
-        "sub.s   %[temp1],      %[temp9],      %[temp1]                  \n\t"
-        PTR_ADDIU "%[p_t_gain], %[p_t_gain],   4                         \n\t"
-        "madd.s  %[temp2],      %[temp2],      %[ag2],   %[temp0]        \n\t"
-        PTR_ADDIU "%[p_delay],  %[p_delay],    8                         \n\t"
-        "madd.s  %[temp3],      %[temp3],      %[ag2],   %[temp1]        \n\t"
-        PTR_ADDIU "%[p_out],    %[p_out],      8                         \n\t"
-        "mul.s   %[temp5],      %[temp4],      %[temp0]                  \n\t"
-        "mul.s   %[temp6],      %[temp4],      %[temp1]                  \n\t"
-        "swc1    %[temp2],      624(%[p_ap_delay])                       \n\t"
-        "swc1    %[temp3],      628(%[p_ap_delay])                       \n\t"
-        "swc1    %[temp5],      -8(%[p_out])                             \n\t"
-        "swc1    %[temp6],      -4(%[p_out])                             \n\t"
-        "bne     %[p_delay],    %[p_delay_end],1b                        \n\t"
-        " swc1   %[temp6],      -4(%[p_out])                             \n\t"
-        ".set    pop                                                     \n\t"
-
-        : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
-          [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
-          [temp6]"=&f"(temp6), [temp7]"=&f"(temp7), [temp8]"=&f"(temp8),
-          [temp9]"=&f"(temp9), [p_delay]"+r"(p_delay), [p_ap_delay]"+r"(p_ap_delay),
-          [p_Q_fract]"+r"(p_Q_fract), [p_t_gain]"+r"(p_t_gain), [p_out]"+r"(p_out)
-        : [phi_fract0]"f"(phi_fract0), [phi_fract1]"f"(phi_fract1),
-          [p_delay_end]"r"(p_delay_end), [g_decay_slope]"f"(g_decay_slope),
-          [ag0]"f"(f1), [ag1]"f"(f2), [ag2]"f"(f3)
-        : "memory"
-    );
-}
-
-static void ps_stereo_interpolate_mips(float (*l)[2], float (*r)[2],
-                                    float h[2][4], float h_step[2][4],
-                                    int len)
-{
-    float h0 = h[0][0];
-    float h1 = h[0][1];
-    float h2 = h[0][2];
-    float h3 = h[0][3];
-    float hs0 = h_step[0][0];
-    float hs1 = h_step[0][1];
-    float hs2 = h_step[0][2];
-    float hs3 = h_step[0][3];
-    float temp0, temp1, temp2, temp3;
-    float l_re, l_im, r_re, r_im;
-
-    float *l_end = ((float *)l + (len << 1));
-
-    __asm__ volatile(
-        ".set    push                                     \n\t"
-        ".set    noreorder                                \n\t"
-    "1:                                                   \n\t"
-        "add.s   %[h0],     %[h0],     %[hs0]             \n\t"
-        "lwc1    %[l_re],   0(%[l])                       \n\t"
-        "add.s   %[h1],     %[h1],     %[hs1]             \n\t"
-        "lwc1    %[r_re],   0(%[r])                       \n\t"
-        "add.s   %[h2],     %[h2],     %[hs2]             \n\t"
-        "lwc1    %[l_im],   4(%[l])                       \n\t"
-        "add.s   %[h3],     %[h3],     %[hs3]             \n\t"
-        "lwc1    %[r_im],   4(%[r])                       \n\t"
-        "mul.s   %[temp0],  %[h0],     %[l_re]            \n\t"
-        PTR_ADDIU "%[l],    %[l],      8                  \n\t"
-        "mul.s   %[temp2],  %[h1],     %[l_re]            \n\t"
-        PTR_ADDIU "%[r],    %[r],      8                  \n\t"
-        "madd.s  %[temp0],  %[temp0],  %[h2],   %[r_re]   \n\t"
-        "madd.s  %[temp2],  %[temp2],  %[h3],   %[r_re]   \n\t"
-        "mul.s   %[temp1],  %[h0],     %[l_im]            \n\t"
-        "mul.s   %[temp3],  %[h1],     %[l_im]            \n\t"
-        "madd.s  %[temp1],  %[temp1],  %[h2],   %[r_im]   \n\t"
-        "madd.s  %[temp3],  %[temp3],  %[h3],   %[r_im]   \n\t"
-        "swc1    %[temp0],  -8(%[l])                      \n\t"
-        "swc1    %[temp2],  -8(%[r])                      \n\t"
-        "swc1    %[temp1],  -4(%[l])                      \n\t"
-        "bne     %[l],      %[l_end],  1b                 \n\t"
-        " swc1   %[temp3],  -4(%[r])                      \n\t"
-        ".set    pop                                      \n\t"
-
-        : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1),
-          [temp2]"=&f"(temp2), [temp3]"=&f"(temp3),
-          [h0]"+f"(h0), [h1]"+f"(h1), [h2]"+f"(h2),
-          [h3]"+f"(h3), [l]"+r"(l), [r]"+r"(r),
-          [l_re]"=&f"(l_re), [l_im]"=&f"(l_im),
-          [r_re]"=&f"(r_re), [r_im]"=&f"(r_im)
-        : [hs0]"f"(hs0), [hs1]"f"(hs1), [hs2]"f"(hs2),
-          [hs3]"f"(hs3), [l_end]"r"(l_end)
-        : "memory"
-    );
-}
-#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
-#endif /* HAVE_MIPSFPU */
-#endif /* HAVE_INLINE_ASM */
-
-void ff_psdsp_init_mips(PSDSPContext *s)
-{
-#if HAVE_INLINE_ASM
-#if HAVE_MIPSFPU
-    s->hybrid_analysis_ileave = ps_hybrid_analysis_ileave_mips;
-    s->hybrid_synthesis_deint = ps_hybrid_synthesis_deint_mips;
-#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
-    s->add_squares            = ps_add_squares_mips;
-    s->mul_pair_single        = ps_mul_pair_single_mips;
-    s->decorrelate            = ps_decorrelate_mips;
-    s->stereo_interpolate[0]  = ps_stereo_interpolate_mips;
-#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
-#endif /* HAVE_MIPSFPU */
-#endif /* HAVE_INLINE_ASM */
-}
diff --git a/libavcodec/mips/aacpsy_mips.h b/libavcodec/mips/aacpsy_mips.h
deleted file mode 100644
index 7d27d32f18..0000000000
--- a/libavcodec/mips/aacpsy_mips.h
+++ /dev/null
@@ -1,238 +0,0 @@
-/*
- * Copyright (c) 2012
- *      MIPS Technologies, Inc., California.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * Author:  Bojan Zivkovic   (bojan@mips.com)
- *
- * AAC encoder psychoacoustic model routines optimized
- * for MIPS floating-point architecture
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-/**
- * @file
- * Reference: libavcodec/aacpsy.c
- */
-
-#ifndef AVCODEC_MIPS_AACPSY_MIPS_H
-#define AVCODEC_MIPS_AACPSY_MIPS_H
-
-#include "libavutil/mips/asmdefs.h"
-
-#if HAVE_INLINE_ASM && HAVE_MIPSFPU && ( PSY_LAME_FIR_LEN == 21 )
-#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
-static void calc_thr_3gpp_mips(const FFPsyWindowInfo *wi, const int num_bands,
-                               AacPsyChannel *pch, const uint8_t *band_sizes,
-                               const float *coefs, const int cutoff)
-{
-    int i, w, g;
-    int start = 0, wstart = 0;
-    for (w = 0; w < wi->num_windows*16; w += 16) {
-        wstart = 0;
-        for (g = 0; g < num_bands; g++) {
-            AacPsyBand *band = &pch->band[w+g];
-
-            float form_factor = 0.0f;
-            float Temp;
-            band->energy = 0.0f;
-            if (wstart < cutoff) {
-                for (i = 0; i < band_sizes[g]; i+=4) {
-                    float a, b, c, d;
-                    float ax, bx, cx, dx;
-                    float *cf = (float *)&coefs[start+i];
-
-                    __asm__ volatile (
-                        "lwc1   %[a],   0(%[cf])                \n\t"
-                        "lwc1   %[b],   4(%[cf])                \n\t"
-                        "lwc1   %[c],   8(%[cf])                \n\t"
-                        "lwc1   %[d],   12(%[cf])               \n\t"
-                        "abs.s  %[a],   %[a]                    \n\t"
-                        "abs.s  %[b],   %[b]                    \n\t"
-                        "abs.s  %[c],   %[c]                    \n\t"
-                        "abs.s  %[d],   %[d]                    \n\t"
-                        "sqrt.s %[ax],  %[a]                    \n\t"
-                        "sqrt.s %[bx],  %[b]                    \n\t"
-                        "sqrt.s %[cx],  %[c]                    \n\t"
-                        "sqrt.s %[dx],  %[d]                    \n\t"
-                        "madd.s %[e],   %[e],   %[a],   %[a]    \n\t"
-                        "madd.s %[e],   %[e],   %[b],   %[b]    \n\t"
-                        "madd.s %[e],   %[e],   %[c],   %[c]    \n\t"
-                        "madd.s %[e],   %[e],   %[d],   %[d]    \n\t"
-                        "add.s  %[f],   %[f],   %[ax]           \n\t"
-                        "add.s  %[f],   %[f],   %[bx]           \n\t"
-                        "add.s  %[f],   %[f],   %[cx]           \n\t"
-                        "add.s  %[f],   %[f],   %[dx]           \n\t"
-
-                        : [a]"=&f"(a), [b]"=&f"(b),
-                          [c]"=&f"(c), [d]"=&f"(d),
-                          [e]"+f"(band->energy), [f]"+f"(form_factor),
-                          [ax]"=&f"(ax), [bx]"=&f"(bx),
-                          [cx]"=&f"(cx), [dx]"=&f"(dx)
-                        : [cf]"r"(cf)
-                        : "memory"
-                    );
-                }
-            }
-
-            Temp = sqrtf((float)band_sizes[g] / band->energy);
-            band->thr      = band->energy * 0.001258925f;
-            band->nz_lines = form_factor * sqrtf(Temp);
-            start += band_sizes[g];
-            wstart += band_sizes[g];
-        }
-    }
-}
-
-static void psy_hp_filter_mips(const float *firbuf, float *hpfsmpl, const float * psy_fir_coeffs)
-{
-    float sum1, sum2, sum3, sum4;
-    float *fb = (float*)firbuf;
-    float *fb_end = fb + AAC_BLOCK_SIZE_LONG;
-    float *hp = hpfsmpl;
-
-    float coeff0 = psy_fir_coeffs[1];
-    float coeff1 = psy_fir_coeffs[3];
-    float coeff2 = psy_fir_coeffs[5];
-    float coeff3 = psy_fir_coeffs[7];
-    float coeff4 = psy_fir_coeffs[9];
-
-    float f1 = 32768.0;
-    __asm__ volatile (
-        ".set push                                          \n\t"
-        ".set noreorder                                     \n\t"
-
-        "1:                                                 \n\t"
-        "lwc1   $f0,        40(%[fb])                       \n\t"
-        "lwc1   $f1,        4(%[fb])                        \n\t"
-        "lwc1   $f2,        80(%[fb])                       \n\t"
-        "lwc1   $f3,        44(%[fb])                       \n\t"
-        "lwc1   $f4,        8(%[fb])                        \n\t"
-        "madd.s %[sum1],    $f0,        $f1,    %[coeff0]   \n\t"
-        "lwc1   $f5,        84(%[fb])                       \n\t"
-        "lwc1   $f6,        48(%[fb])                       \n\t"
-        "madd.s %[sum2],    $f3,        $f4,    %[coeff0]   \n\t"
-        "lwc1   $f7,        12(%[fb])                       \n\t"
-        "madd.s %[sum1],    %[sum1],    $f2,    %[coeff0]   \n\t"
-        "lwc1   $f8,        88(%[fb])                       \n\t"
-        "lwc1   $f9,        52(%[fb])                       \n\t"
-        "madd.s %[sum2],    %[sum2],    $f5,    %[coeff0]   \n\t"
-        "madd.s %[sum3],    $f6,        $f7,    %[coeff0]   \n\t"
-        "lwc1   $f10,       16(%[fb])                       \n\t"
-        "lwc1   $f11,       92(%[fb])                       \n\t"
-        "madd.s %[sum1],    %[sum1],    $f7,    %[coeff1]   \n\t"
-        "lwc1   $f1,        72(%[fb])                       \n\t"
-        "madd.s %[sum3],    %[sum3],    $f8,    %[coeff0]   \n\t"
-        "madd.s %[sum4],    $f9,        $f10,   %[coeff0]   \n\t"
-        "madd.s %[sum2],    %[sum2],    $f10,   %[coeff1]   \n\t"
-        "madd.s %[sum1],    %[sum1],    $f1,    %[coeff1]   \n\t"
-        "lwc1   $f4,        76(%[fb])                       \n\t"
-        "lwc1   $f8,        20(%[fb])                       \n\t"
-        "madd.s %[sum4],    %[sum4],    $f11,   %[coeff0]   \n\t"
-        "lwc1   $f11,       24(%[fb])                       \n\t"
-        "madd.s %[sum2],    %[sum2],    $f4,    %[coeff1]   \n\t"
-        "madd.s %[sum1],    %[sum1],    $f8,    %[coeff2]   \n\t"
-        "madd.s %[sum3],    %[sum3],    $f8,    %[coeff1]   \n\t"
-        "madd.s %[sum4],    %[sum4],    $f11,   %[coeff1]   \n\t"
-        "lwc1   $f7,        64(%[fb])                       \n\t"
-        "madd.s %[sum2],    %[sum2],    $f11,   %[coeff2]   \n\t"
-        "lwc1   $f10,       68(%[fb])                       \n\t"
-        "madd.s %[sum3],    %[sum3],    $f2,    %[coeff1]   \n\t"
-        "madd.s %[sum4],    %[sum4],    $f5,    %[coeff1]   \n\t"
-        "madd.s %[sum1],    %[sum1],    $f7,    %[coeff2]   \n\t"
-        "madd.s %[sum2],    %[sum2],    $f10,   %[coeff2]   \n\t"
-        "lwc1   $f2,        28(%[fb])                       \n\t"
-        "lwc1   $f5,        32(%[fb])                       \n\t"
-        "lwc1   $f8,        56(%[fb])                       \n\t"
-        "lwc1   $f11,       60(%[fb])                       \n\t"
-        "madd.s %[sum3],    %[sum3],    $f2,    %[coeff2]   \n\t"
-        "madd.s %[sum4],    %[sum4],    $f5,    %[coeff2]   \n\t"
-        "madd.s %[sum1],    %[sum1],    $f2,    %[coeff3]   \n\t"
-        "madd.s %[sum2],    %[sum2],    $f5,    %[coeff3]   \n\t"
-        "madd.s %[sum3],    %[sum3],    $f1,    %[coeff2]   \n\t"
-        "madd.s %[sum4],    %[sum4],    $f4,    %[coeff2]   \n\t"
-        "madd.s %[sum1],    %[sum1],    $f8,    %[coeff3]   \n\t"
-        "madd.s %[sum2],    %[sum2],    $f11,   %[coeff3]   \n\t"
-        "lwc1   $f1,        36(%[fb])                       \n\t"
-        PTR_ADDIU "%[fb],   %[fb],      16                  \n\t"
-        "madd.s %[sum4],    %[sum4],    $f0,    %[coeff3]   \n\t"
-        "madd.s %[sum3],    %[sum3],    $f1,    %[coeff3]   \n\t"
-        "madd.s %[sum1],    %[sum1],    $f1,    %[coeff4]   \n\t"
-        "madd.s %[sum2],    %[sum2],    $f0,    %[coeff4]   \n\t"
-        "madd.s %[sum4],    %[sum4],    $f10,   %[coeff3]   \n\t"
-        "madd.s %[sum3],    %[sum3],    $f7,    %[coeff3]   \n\t"
-        "madd.s %[sum1],    %[sum1],    $f6,    %[coeff4]   \n\t"
-        "madd.s %[sum2],    %[sum2],    $f9,    %[coeff4]   \n\t"
-        "madd.s %[sum4],    %[sum4],    $f6,    %[coeff4]   \n\t"
-        "madd.s %[sum3],    %[sum3],    $f3,    %[coeff4]   \n\t"
-        "mul.s  %[sum1],    %[sum1],    %[f1]               \n\t"
-        "mul.s  %[sum2],    %[sum2],    %[f1]               \n\t"
-        "madd.s %[sum4],    %[sum4],    $f11,   %[coeff4]   \n\t"
-        "madd.s %[sum3],    %[sum3],    $f8,    %[coeff4]   \n\t"
-        "swc1   %[sum1],    0(%[hp])                        \n\t"
-        "swc1   %[sum2],    4(%[hp])                        \n\t"
-        "mul.s  %[sum4],    %[sum4],    %[f1]               \n\t"
-        "mul.s  %[sum3],    %[sum3],    %[f1]               \n\t"
-        "swc1   %[sum4],    12(%[hp])                       \n\t"
-        "swc1   %[sum3],    8(%[hp])                        \n\t"
-        "bne    %[fb],      %[fb_end],  1b                  \n\t"
-        PTR_ADDIU "%[hp],   %[hp],      16                  \n\t"
-
-        ".set pop                                           \n\t"
-
-        : [sum1]"=&f"(sum1), [sum2]"=&f"(sum2),
-          [sum3]"=&f"(sum3), [sum4]"=&f"(sum4),
-          [fb]"+r"(fb), [hp]"+r"(hp)
-        : [coeff0]"f"(coeff0), [coeff1]"f"(coeff1),
-          [coeff2]"f"(coeff2), [coeff3]"f"(coeff3),
-          [coeff4]"f"(coeff4), [fb_end]"r"(fb_end), [f1]"f"(f1)
-        : "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6",
-          "$f7", "$f8", "$f9", "$f10", "$f11",
-          "memory"
-    );
-}
-
-#define calc_thr_3gpp calc_thr_3gpp_mips
-#define psy_hp_filter psy_hp_filter_mips
-
-#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
-#endif /* HAVE_INLINE_ASM && HAVE_MIPSFPU */
-#endif /* AVCODEC_MIPS_AACPSY_MIPS_H */
diff --git a/libavcodec/mips/aacsbr_mips.c b/libavcodec/mips/aacsbr_mips.c
deleted file mode 100644
index e0715491e6..0000000000
--- a/libavcodec/mips/aacsbr_mips.c
+++ /dev/null
@@ -1,624 +0,0 @@
-/*
- * Copyright (c) 2012
- *      MIPS Technologies, Inc., California.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * Authors:  Djordje Pesut   (djordje@mips.com)
- *           Mirjana Vulin   (mvulin@mips.com)
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-/**
- * @file
- * Reference: libavcodec/aacsbr.c
- */
-
-#include "libavcodec/aacdec.h"
-#include "libavcodec/aacsbr.h"
-#include "libavutil/mem_internal.h"
-#include "libavutil/mips/asmdefs.h"
-
-#define ENVELOPE_ADJUSTMENT_OFFSET 2
-
-#if HAVE_INLINE_ASM
-#if HAVE_MIPSFPU
-static int sbr_lf_gen_mips(SpectralBandReplication *sbr,
-                      float X_low[32][40][2], const float W[2][32][32][2],
-                      int buf_idx)
-{
-    int i, k;
-    int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
-    float *p_x_low = &X_low[0][8][0];
-    float *p_w = (float*)&W[buf_idx][0][0][0];
-    float *p_x1_low = &X_low[0][0][0];
-    float *p_w1 = (float*)&W[1-buf_idx][24][0][0];
-
-    float *loop_end=p_x1_low + 2560;
-
-    /* loop unrolled 8 times */
-    __asm__ volatile (
-    "1:                                                 \n\t"
-        "sw     $0,            0(%[p_x1_low])           \n\t"
-        "sw     $0,            4(%[p_x1_low])           \n\t"
-        "sw     $0,            8(%[p_x1_low])           \n\t"
-        "sw     $0,            12(%[p_x1_low])          \n\t"
-        "sw     $0,            16(%[p_x1_low])          \n\t"
-        "sw     $0,            20(%[p_x1_low])          \n\t"
-        "sw     $0,            24(%[p_x1_low])          \n\t"
-        "sw     $0,            28(%[p_x1_low])          \n\t"
-        PTR_ADDIU "%[p_x1_low],%[p_x1_low],      32     \n\t"
-        "bne    %[p_x1_low],   %[loop_end],      1b     \n\t"
-        PTR_ADDIU "%[p_x1_low],%[p_x1_low],      -10240 \n\t"
-
-        : [p_x1_low]"+r"(p_x1_low)
-        : [loop_end]"r"(loop_end)
-        : "memory"
-    );
-
-    for (k = 0; k < sbr->kx[1]; k++) {
-        for (i = 0; i < 32; i+=4) {
-            /* loop unrolled 4 times */
-            __asm__ volatile (
-                "lw     %[temp0],   0(%[p_w])               \n\t"
-                "lw     %[temp1],   4(%[p_w])               \n\t"
-                "lw     %[temp2],   256(%[p_w])             \n\t"
-                "lw     %[temp3],   260(%[p_w])             \n\t"
-                "lw     %[temp4],   512(%[p_w])             \n\t"
-                "lw     %[temp5],   516(%[p_w])             \n\t"
-                "lw     %[temp6],   768(%[p_w])             \n\t"
-                "lw     %[temp7],   772(%[p_w])             \n\t"
-                "sw     %[temp0],   0(%[p_x_low])           \n\t"
-                "sw     %[temp1],   4(%[p_x_low])           \n\t"
-                "sw     %[temp2],   8(%[p_x_low])           \n\t"
-                "sw     %[temp3],   12(%[p_x_low])          \n\t"
-                "sw     %[temp4],   16(%[p_x_low])          \n\t"
-                "sw     %[temp5],   20(%[p_x_low])          \n\t"
-                "sw     %[temp6],   24(%[p_x_low])          \n\t"
-                "sw     %[temp7],   28(%[p_x_low])          \n\t"
-                PTR_ADDIU "%[p_x_low], %[p_x_low],  32      \n\t"
-                PTR_ADDIU "%[p_w],     %[p_w],      1024    \n\t"
-
-                : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
-                  [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
-                  [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
-                  [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
-                  [p_w]"+r"(p_w), [p_x_low]"+r"(p_x_low)
-                :
-                : "memory"
-            );
-        }
-        p_x_low += 16;
-        p_w -= 2046;
-    }
-
-    for (k = 0; k < sbr->kx[0]; k++) {
-        for (i = 0; i < 2; i++) {
-
-            /* loop unrolled 4 times */
-            __asm__ volatile (
-                "lw     %[temp0],    0(%[p_w1])             \n\t"
-                "lw     %[temp1],    4(%[p_w1])             \n\t"
-                "lw     %[temp2],    256(%[p_w1])           \n\t"
-                "lw     %[temp3],    260(%[p_w1])           \n\t"
-                "lw     %[temp4],    512(%[p_w1])           \n\t"
-                "lw     %[temp5],    516(%[p_w1])           \n\t"
-                "lw     %[temp6],    768(%[p_w1])           \n\t"
-                "lw     %[temp7],    772(%[p_w1])           \n\t"
-                "sw     %[temp0],    0(%[p_x1_low])         \n\t"
-                "sw     %[temp1],    4(%[p_x1_low])         \n\t"
-                "sw     %[temp2],    8(%[p_x1_low])         \n\t"
-                "sw     %[temp3],    12(%[p_x1_low])        \n\t"
-                "sw     %[temp4],    16(%[p_x1_low])        \n\t"
-                "sw     %[temp5],    20(%[p_x1_low])        \n\t"
-                "sw     %[temp6],    24(%[p_x1_low])        \n\t"
-                "sw     %[temp7],    28(%[p_x1_low])        \n\t"
-                PTR_ADDIU "%[p_x1_low], %[p_x1_low], 32     \n\t"
-                PTR_ADDIU "%[p_w1],     %[p_w1],     1024   \n\t"
-
-                : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
-                  [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
-                  [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
-                  [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
-                  [p_w1]"+r"(p_w1), [p_x1_low]"+r"(p_x1_low)
-                :
-                : "memory"
-            );
-        }
-        p_x1_low += 64;
-        p_w1 -= 510;
-    }
-    return 0;
-}
-
-static int sbr_x_gen_mips(SpectralBandReplication *sbr, float X[2][38][64],
-                     const float Y0[38][64][2], const float Y1[38][64][2],
-                     const float X_low[32][40][2], int ch)
-{
-    int k, i;
-    const int i_f = 32;
-    int temp0, temp1, temp2, temp3;
-    const float *X_low1, *Y01, *Y11;
-    float *x1=&X[0][0][0];
-    float *j=x1+4864;
-    const int i_Temp = FFMAX(2*sbr->data[ch].t_env_num_env_old - i_f, 0);
-
-    /* loop unrolled 8 times */
-    __asm__ volatile (
-    "1:                                       \n\t"
-        "sw     $0,      0(%[x1])             \n\t"
-        "sw     $0,      4(%[x1])             \n\t"
-        "sw     $0,      8(%[x1])             \n\t"
-        "sw     $0,      12(%[x1])            \n\t"
-        "sw     $0,      16(%[x1])            \n\t"
-        "sw     $0,      20(%[x1])            \n\t"
-        "sw     $0,      24(%[x1])            \n\t"
-        "sw     $0,      28(%[x1])            \n\t"
-        PTR_ADDIU "%[x1],%[x1],      32       \n\t"
-        "bne    %[x1],   %[j],       1b       \n\t"
-        PTR_ADDIU "%[x1],%[x1],      -19456   \n\t"
-
-        : [x1]"+r"(x1)
-        : [j]"r"(j)
-        : "memory"
-    );
-
-    if (i_Temp != 0) {
-
-        X_low1=&X_low[0][2][0];
-
-        for (k = 0; k < sbr->kx[0]; k++) {
-
-            __asm__ volatile (
-                "move    %[i],        $zero                  \n\t"
-            "2:                                              \n\t"
-                "lw      %[temp0],    0(%[X_low1])           \n\t"
-                "lw      %[temp1],    4(%[X_low1])           \n\t"
-                "sw      %[temp0],    0(%[x1])               \n\t"
-                "sw      %[temp1],    9728(%[x1])            \n\t"
-                PTR_ADDIU "%[x1],     %[x1],         256     \n\t"
-                PTR_ADDIU "%[X_low1], %[X_low1],     8       \n\t"
-                "addiu   %[i],        %[i],          1       \n\t"
-                "bne     %[i],        %[i_Temp],     2b      \n\t"
-
-                : [x1]"+r"(x1), [X_low1]"+r"(X_low1), [i]"=&r"(i),
-                  [temp0]"=&r"(temp0), [temp1]"=&r"(temp1)
-                : [i_Temp]"r"(i_Temp)
-                : "memory"
-            );
-            x1-=(i_Temp<<6)-1;
-            X_low1-=(i_Temp<<1)-80;
-        }
-
-        x1=&X[0][0][k];
-        Y01=(float*)&Y0[32][k][0];
-
-        for (; k < sbr->kx[0] + sbr->m[0]; k++) {
-            __asm__ volatile (
-                "move    %[i],       $zero               \n\t"
-            "3:                                          \n\t"
-                "lw      %[temp0],   0(%[Y01])           \n\t"
-                "lw      %[temp1],   4(%[Y01])           \n\t"
-                "sw      %[temp0],   0(%[x1])            \n\t"
-                "sw      %[temp1],   9728(%[x1])         \n\t"
-                PTR_ADDIU "%[x1],    %[x1],      256     \n\t"
-                PTR_ADDIU "%[Y01],   %[Y01],     512     \n\t"
-                "addiu   %[i],       %[i],       1       \n\t"
-                "bne     %[i],       %[i_Temp],  3b      \n\t"
-
-                : [x1]"+r"(x1), [Y01]"+r"(Y01), [i]"=&r"(i),
-                  [temp0]"=&r"(temp0), [temp1]"=&r"(temp1)
-                : [i_Temp]"r"(i_Temp)
-                : "memory"
-            );
-            x1 -=(i_Temp<<6)-1;
-            Y01 -=(i_Temp<<7)-2;
-        }
-    }
-
-    x1=&X[0][i_Temp][0];
-    X_low1=&X_low[0][i_Temp+2][0];
-    temp3=38;
-
-    for (k = 0; k < sbr->kx[1]; k++) {
-
-        __asm__ volatile (
-            "move    %[i],       %[i_Temp]              \n\t"
-        "4:                                             \n\t"
-            "lw      %[temp0],   0(%[X_low1])           \n\t"
-            "lw      %[temp1],   4(%[X_low1])           \n\t"
-            "sw      %[temp0],   0(%[x1])               \n\t"
-            "sw      %[temp1],   9728(%[x1])            \n\t"
-            PTR_ADDIU "%[x1],    %[x1],         256     \n\t"
-            PTR_ADDIU "%[X_low1],%[X_low1],     8       \n\t"
-            "addiu   %[i],       %[i],          1       \n\t"
-            "bne     %[i],       %[temp3],      4b      \n\t"
-
-            : [x1]"+r"(x1), [X_low1]"+r"(X_low1), [i]"=&r"(i),
-              [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
-              [temp2]"=&r"(temp2)
-            : [i_Temp]"r"(i_Temp), [temp3]"r"(temp3)
-            : "memory"
-        );
-        x1 -= ((38-i_Temp)<<6)-1;
-        X_low1 -= ((38-i_Temp)<<1)- 80;
-    }
-
-    x1=&X[0][i_Temp][k];
-    Y11=&Y1[i_Temp][k][0];
-    temp2=32;
-
-    for (; k < sbr->kx[1] + sbr->m[1]; k++) {
-
-        __asm__ volatile (
-           "move    %[i],       %[i_Temp]               \n\t"
-        "5:                                             \n\t"
-           "lw      %[temp0],   0(%[Y11])               \n\t"
-           "lw      %[temp1],   4(%[Y11])               \n\t"
-           "sw      %[temp0],   0(%[x1])                \n\t"
-           "sw      %[temp1],   9728(%[x1])             \n\t"
-           PTR_ADDIU "%[x1],    %[x1],          256     \n\t"
-           PTR_ADDIU "%[Y11],   %[Y11],         512     \n\t"
-           "addiu   %[i],       %[i],           1       \n\t"
-           "bne     %[i],       %[temp2],       5b      \n\t"
-
-           : [x1]"+r"(x1), [Y11]"+r"(Y11), [i]"=&r"(i),
-             [temp0]"=&r"(temp0), [temp1]"=&r"(temp1)
-           : [i_Temp]"r"(i_Temp), [temp3]"r"(temp3),
-             [temp2]"r"(temp2)
-           : "memory"
-        );
-
-        x1 -= ((32-i_Temp)<<6)-1;
-        Y11 -= ((32-i_Temp)<<7)-2;
-   }
-      return 0;
-}
-
-#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
-static void sbr_hf_assemble_mips(float Y1[38][64][2],
-                            const float X_high[64][40][2],
-                            SpectralBandReplication *sbr, SBRData *ch_data,
-                            const int e_a[2])
-{
-    int e, i, j, m;
-    const int h_SL = 4 * !sbr->bs_smoothing_mode;
-    const int kx = sbr->kx[1];
-    const int m_max = sbr->m[1];
-    static const float h_smooth[5] = {
-        0.33333333333333,
-        0.30150283239582,
-        0.21816949906249,
-        0.11516383427084,
-        0.03183050093751,
-    };
-
-    float (*g_temp)[48] = ch_data->g_temp, (*q_temp)[48] = ch_data->q_temp;
-    int indexnoise = ch_data->f_indexnoise;
-    int indexsine  = ch_data->f_indexsine;
-    float *g_temp1, *q_temp1, *pok, *pok1;
-    uint32_t temp1, temp2, temp3, temp4;
-    int size = m_max;
-
-    if (sbr->reset) {
-        for (i = 0; i < h_SL; i++) {
-            memcpy(g_temp[i + 2*ch_data->t_env[0]], sbr->gain[0], m_max * sizeof(sbr->gain[0][0]));
-            memcpy(q_temp[i + 2*ch_data->t_env[0]], sbr->q_m[0],  m_max * sizeof(sbr->q_m[0][0]));
-        }
-    } else if (h_SL) {
-        memcpy(g_temp[2*ch_data->t_env[0]], g_temp[2*ch_data->t_env_num_env_old], 4*sizeof(g_temp[0]));
-        memcpy(q_temp[2*ch_data->t_env[0]], q_temp[2*ch_data->t_env_num_env_old], 4*sizeof(q_temp[0]));
-    }
-
-    for (e = 0; e < ch_data->bs_num_env; e++) {
-        for (i = 2 * ch_data->t_env[e]; i < 2 * ch_data->t_env[e + 1]; i++) {
-            g_temp1 = g_temp[h_SL + i];
-            pok = sbr->gain[e];
-            q_temp1 = q_temp[h_SL + i];
-            pok1 = sbr->q_m[e];
-
-            /* loop unrolled 4 times */
-            for (j=0; j<(size>>2); j++) {
-                __asm__ volatile (
-                    "lw      %[temp1],   0(%[pok])               \n\t"
-                    "lw      %[temp2],   4(%[pok])               \n\t"
-                    "lw      %[temp3],   8(%[pok])               \n\t"
-                    "lw      %[temp4],   12(%[pok])              \n\t"
-                    "sw      %[temp1],   0(%[g_temp1])           \n\t"
-                    "sw      %[temp2],   4(%[g_temp1])           \n\t"
-                    "sw      %[temp3],   8(%[g_temp1])           \n\t"
-                    "sw      %[temp4],   12(%[g_temp1])          \n\t"
-                    "lw      %[temp1],   0(%[pok1])              \n\t"
-                    "lw      %[temp2],   4(%[pok1])              \n\t"
-                    "lw      %[temp3],   8(%[pok1])              \n\t"
-                    "lw      %[temp4],   12(%[pok1])             \n\t"
-                    "sw      %[temp1],   0(%[q_temp1])           \n\t"
-                    "sw      %[temp2],   4(%[q_temp1])           \n\t"
-                    "sw      %[temp3],   8(%[q_temp1])           \n\t"
-                    "sw      %[temp4],   12(%[q_temp1])          \n\t"
-                    PTR_ADDIU "%[pok],     %[pok],         16    \n\t"
-                    PTR_ADDIU "%[g_temp1], %[g_temp1],     16    \n\t"
-                    PTR_ADDIU "%[pok1],    %[pok1],        16    \n\t"
-                    PTR_ADDIU "%[q_temp1], %[q_temp1],     16    \n\t"
-
-                    : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
-                      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),
-                      [pok]"+r"(pok), [g_temp1]"+r"(g_temp1),
-                      [pok1]"+r"(pok1), [q_temp1]"+r"(q_temp1)
-                    :
-                    : "memory"
-                );
-            }
-
-            for (j=0; j<(size&3); j++) {
-                __asm__ volatile (
-                    "lw      %[temp1],   0(%[pok])              \n\t"
-                    "lw      %[temp2],   0(%[pok1])             \n\t"
-                    "sw      %[temp1],   0(%[g_temp1])          \n\t"
-                    "sw      %[temp2],   0(%[q_temp1])          \n\t"
-                    PTR_ADDIU "%[pok],     %[pok],        4     \n\t"
-                    PTR_ADDIU "%[g_temp1], %[g_temp1],    4     \n\t"
-                    PTR_ADDIU "%[pok1],    %[pok1],       4     \n\t"
-                    PTR_ADDIU "%[q_temp1], %[q_temp1],    4     \n\t"
-
-                    : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
-                      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),
-                      [pok]"+r"(pok), [g_temp1]"+r"(g_temp1),
-                      [pok1]"+r"(pok1), [q_temp1]"+r"(q_temp1)
-                    :
-                    : "memory"
-                );
-            }
-        }
-    }
-
-    for (e = 0; e < ch_data->bs_num_env; e++) {
-        for (i = 2 * ch_data->t_env[e]; i < 2 * ch_data->t_env[e + 1]; i++) {
-            LOCAL_ALIGNED_16(float, g_filt_tab, [48]);
-            LOCAL_ALIGNED_16(float, q_filt_tab, [48]);
-            float *g_filt, *q_filt;
-
-            if (h_SL && e != e_a[0] && e != e_a[1]) {
-                g_filt = g_filt_tab;
-                q_filt = q_filt_tab;
-
-                for (m = 0; m < m_max; m++) {
-                    const int idx1 = i + h_SL;
-                    g_filt[m] = 0.0f;
-                    q_filt[m] = 0.0f;
-
-                    for (j = 0; j <= h_SL; j++) {
-                        g_filt[m] += g_temp[idx1 - j][m] * h_smooth[j];
-                        q_filt[m] += q_temp[idx1 - j][m] * h_smooth[j];
-                    }
-                }
-            } else {
-                g_filt = g_temp[i + h_SL];
-                q_filt = q_temp[i];
-            }
-
-            sbr->dsp.hf_g_filt(Y1[i] + kx, X_high + kx, g_filt, m_max,
-                               i + ENVELOPE_ADJUSTMENT_OFFSET);
-
-            if (e != e_a[0] && e != e_a[1]) {
-                sbr->dsp.hf_apply_noise[indexsine](Y1[i] + kx, sbr->s_m[e],
-                                                   q_filt, indexnoise,
-                                                   kx, m_max);
-            } else {
-                int idx = indexsine&1;
-                int A = (1-((indexsine+(kx & 1))&2));
-                int B = (A^(-idx)) + idx;
-                float *out = &Y1[i][kx][idx];
-                float *in  = sbr->s_m[e];
-                float temp0, temp1, temp2, temp3, temp4, temp5;
-                float A_f = (float)A;
-                float B_f = (float)B;
-
-                for (m = 0; m+1 < m_max; m+=2) {
-
-                    temp2 = out[0];
-                    temp3 = out[2];
-
-                    __asm__ volatile(
-                        "lwc1    %[temp0],  0(%[in])                     \n\t"
-                        "lwc1    %[temp1],  4(%[in])                     \n\t"
-                        "madd.s  %[temp4],  %[temp2],  %[temp0], %[A_f]  \n\t"
-                        "madd.s  %[temp5],  %[temp3],  %[temp1], %[B_f]  \n\t"
-                        "swc1    %[temp4],  0(%[out])                    \n\t"
-                        "swc1    %[temp5],  8(%[out])                    \n\t"
-                        PTR_ADDIU "%[in],   %[in],     8                 \n\t"
-                        PTR_ADDIU "%[out],  %[out],    16                \n\t"
-
-                        : [temp0]"=&f" (temp0), [temp1]"=&f"(temp1),
-                          [temp4]"=&f" (temp4), [temp5]"=&f"(temp5),
-                          [in]"+r"(in), [out]"+r"(out)
-                        : [A_f]"f"(A_f), [B_f]"f"(B_f), [temp2]"f"(temp2),
-                          [temp3]"f"(temp3)
-                        : "memory"
-                    );
-                }
-                if(m_max&1)
-                    out[2*m  ] += in[m  ] * A;
-            }
-            indexnoise = (indexnoise + m_max) & 0x1ff;
-            indexsine = (indexsine + 1) & 3;
-        }
-    }
-    ch_data->f_indexnoise = indexnoise;
-    ch_data->f_indexsine  = indexsine;
-}
-
-static void sbr_hf_inverse_filter_mips(SBRDSPContext *dsp,
-                                  float (*alpha0)[2], float (*alpha1)[2],
-                                  const float X_low[32][40][2], int k0)
-{
-    int k;
-    float temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, c;
-    float *phi1, *alpha_1, *alpha_0, res1, res2, temp_real, temp_im;
-
-    c = 1.000001f;
-
-    for (k = 0; k < k0; k++) {
-        LOCAL_ALIGNED_16(float, phi, [3], [2][2]);
-        float dk;
-        phi1 = &phi[0][0][0];
-        alpha_1 = &alpha1[k][0];
-        alpha_0 = &alpha0[k][0];
-        dsp->autocorrelate(X_low[k], phi);
-
-        __asm__ volatile (
-            "lwc1    %[temp0],  40(%[phi1])                       \n\t"
-            "lwc1    %[temp1],  16(%[phi1])                       \n\t"
-            "lwc1    %[temp2],  24(%[phi1])                       \n\t"
-            "lwc1    %[temp3],  28(%[phi1])                       \n\t"
-            "mul.s   %[dk],     %[temp0],    %[temp1]             \n\t"
-            "lwc1    %[temp4],  0(%[phi1])                        \n\t"
-            "mul.s   %[res2],   %[temp2],    %[temp2]             \n\t"
-            "lwc1    %[temp5],  4(%[phi1])                        \n\t"
-            "madd.s  %[res2],   %[res2],     %[temp3],  %[temp3]  \n\t"
-            "lwc1    %[temp6],  8(%[phi1])                        \n\t"
-            "div.s   %[res2],   %[res2],     %[c]                 \n\t"
-            "lwc1    %[temp0],  12(%[phi1])                       \n\t"
-            "sub.s   %[dk],     %[dk],       %[res2]              \n\t"
-
-            : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
-              [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
-              [temp6]"=&f"(temp6), [res2]"=&f"(res2), [dk]"=&f"(dk)
-            : [phi1]"r"(phi1), [c]"f"(c)
-            : "memory"
-        );
-
-        if (!dk) {
-            alpha_1[0] = 0;
-            alpha_1[1] = 0;
-        } else {
-            __asm__ volatile (
-                "mul.s   %[temp_real], %[temp4],     %[temp2]            \n\t"
-                "nmsub.s %[temp_real], %[temp_real], %[temp5], %[temp3]  \n\t"
-                "nmsub.s %[temp_real], %[temp_real], %[temp6], %[temp1]  \n\t"
-                "mul.s   %[temp_im],   %[temp4],     %[temp3]            \n\t"
-                "madd.s  %[temp_im],   %[temp_im],   %[temp5], %[temp2]  \n\t"
-                "nmsub.s %[temp_im],   %[temp_im],   %[temp0], %[temp1]  \n\t"
-                "div.s   %[temp_real], %[temp_real], %[dk]               \n\t"
-                "div.s   %[temp_im],   %[temp_im],   %[dk]               \n\t"
-                "swc1    %[temp_real], 0(%[alpha_1])                     \n\t"
-                "swc1    %[temp_im],   4(%[alpha_1])                     \n\t"
-
-                : [temp_real]"=&f" (temp_real), [temp_im]"=&f"(temp_im)
-                : [phi1]"r"(phi1), [temp0]"f"(temp0), [temp1]"f"(temp1),
-                  [temp2]"f"(temp2), [temp3]"f"(temp3), [temp4]"f"(temp4),
-                  [temp5]"f"(temp5), [temp6]"f"(temp6),
-                  [alpha_1]"r"(alpha_1), [dk]"f"(dk)
-                : "memory"
-            );
-        }
-
-        if (!phi1[4]) {
-            alpha_0[0] = 0;
-            alpha_0[1] = 0;
-        } else {
-            __asm__ volatile (
-                "lwc1    %[temp6],     0(%[alpha_1])                     \n\t"
-                "lwc1    %[temp7],     4(%[alpha_1])                     \n\t"
-                "mul.s   %[temp_real], %[temp6],     %[temp2]            \n\t"
-                "add.s   %[temp_real], %[temp_real], %[temp4]            \n\t"
-                "madd.s  %[temp_real], %[temp_real], %[temp7], %[temp3]  \n\t"
-                "mul.s   %[temp_im],   %[temp7],     %[temp2]            \n\t"
-                "add.s   %[temp_im],   %[temp_im],   %[temp5]            \n\t"
-                "nmsub.s %[temp_im],   %[temp_im],   %[temp6], %[temp3]  \n\t"
-                "div.s   %[temp_real], %[temp_real], %[temp1]            \n\t"
-                "div.s   %[temp_im],   %[temp_im],   %[temp1]            \n\t"
-                "neg.s   %[temp_real], %[temp_real]                      \n\t"
-                "neg.s   %[temp_im],   %[temp_im]                        \n\t"
-                "swc1    %[temp_real], 0(%[alpha_0])                     \n\t"
-                "swc1    %[temp_im],   4(%[alpha_0])                     \n\t"
-
-                : [temp_real]"=&f"(temp_real), [temp_im]"=&f"(temp_im),
-                  [temp6]"=&f"(temp6), [temp7]"=&f"(temp7),
-                  [res1]"=&f"(res1), [res2]"=&f"(res2)
-                : [alpha_1]"r"(alpha_1), [alpha_0]"r"(alpha_0),
-                  [temp0]"f"(temp0), [temp1]"f"(temp1), [temp2]"f"(temp2),
-                  [temp3]"f"(temp3), [temp4]"f"(temp4), [temp5]"f"(temp5)
-                : "memory"
-            );
-        }
-
-        __asm__ volatile (
-            "lwc1    %[temp1],      0(%[alpha_1])                           \n\t"
-            "lwc1    %[temp2],      4(%[alpha_1])                           \n\t"
-            "lwc1    %[temp_real],  0(%[alpha_0])                           \n\t"
-            "lwc1    %[temp_im],    4(%[alpha_0])                           \n\t"
-            "mul.s   %[res1],       %[temp1],      %[temp1]                 \n\t"
-            "madd.s  %[res1],       %[res1],       %[temp2],    %[temp2]    \n\t"
-            "mul.s   %[res2],       %[temp_real],  %[temp_real]             \n\t"
-            "madd.s  %[res2],       %[res2],       %[temp_im],  %[temp_im]  \n\t"
-
-            : [temp_real]"=&f"(temp_real), [temp_im]"=&f"(temp_im),
-              [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
-              [res1]"=&f"(res1), [res2]"=&f"(res2)
-            : [alpha_1]"r"(alpha_1), [alpha_0]"r"(alpha_0)
-            : "memory"
-        );
-
-        if (res1 >= 16.0f || res2 >= 16.0f) {
-            alpha_1[0] = 0;
-            alpha_1[1] = 0;
-            alpha_0[0] = 0;
-            alpha_0[1] = 0;
-        }
-    }
-}
-#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
-#endif /* HAVE_MIPSFPU */
-#endif /* HAVE_INLINE_ASM */
-
-void ff_aacsbr_func_ptr_init_mips(AACSBRContext *c)
-{
-#if HAVE_INLINE_ASM
-#if HAVE_MIPSFPU
-    c->sbr_lf_gen            = sbr_lf_gen_mips;
-    c->sbr_x_gen             = sbr_x_gen_mips;
-#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
-    c->sbr_hf_inverse_filter = sbr_hf_inverse_filter_mips;
-    c->sbr_hf_assemble       = sbr_hf_assemble_mips;
-#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
-#endif /* HAVE_MIPSFPU */
-#endif /* HAVE_INLINE_ASM */
-}
diff --git a/libavcodec/mips/aacsbr_mips.h b/libavcodec/mips/aacsbr_mips.h
deleted file mode 100644
index 447393164a..0000000000
--- a/libavcodec/mips/aacsbr_mips.h
+++ /dev/null
@@ -1,496 +0,0 @@
-/*
- * Copyright (c) 2012
- *      MIPS Technologies, Inc., California.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * Authors:  Djordje Pesut   (djordje@mips.com)
- *           Mirjana Vulin   (mvulin@mips.com)
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-/**
- * @file
- * Reference: libavcodec/aacsbr.c
- */
-
-#ifndef AVCODEC_MIPS_AACSBR_MIPS_H
-#define AVCODEC_MIPS_AACSBR_MIPS_H
-
-#include "libavcodec/aacdec.h"
-#include "libavcodec/sbr.h"
-#include "libavutil/mips/asmdefs.h"
-
-#if HAVE_INLINE_ASM
-static void sbr_qmf_analysis_mips(AVFloatDSPContext *fdsp, AVTXContext *mdct, av_tx_fn mdct_fn,
-                             SBRDSPContext *sbrdsp, const float *in, float *x,
-                             float z[320], float W[2][32][32][2], int buf_idx)
-{
-    int i;
-    float *w0;
-    float *w1;
-    int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
-
-    w0 = x;
-    w1 = x + 1024;
-    for(i = 0; i < 36; i++)
-    {
-        /* loop unrolled 8 times */
-        __asm__ volatile(
-            "lw      %[temp0],   0(%[w1])         \n\t"
-            "lw      %[temp1],   4(%[w1])         \n\t"
-            "lw      %[temp2],   8(%[w1])         \n\t"
-            "lw      %[temp3],   12(%[w1])        \n\t"
-            "lw      %[temp4],   16(%[w1])        \n\t"
-            "lw      %[temp5],   20(%[w1])        \n\t"
-            "lw      %[temp6],   24(%[w1])        \n\t"
-            "lw      %[temp7],   28(%[w1])        \n\t"
-            "sw      %[temp0],   0(%[w0])         \n\t"
-            "sw      %[temp1],   4(%[w0])         \n\t"
-            "sw      %[temp2],   8(%[w0])         \n\t"
-            "sw      %[temp3],   12(%[w0])        \n\t"
-            "sw      %[temp4],   16(%[w0])        \n\t"
-            "sw      %[temp5],   20(%[w0])        \n\t"
-            "sw      %[temp6],   24(%[w0])        \n\t"
-            "sw      %[temp7],   28(%[w0])        \n\t"
-            PTR_ADDIU " %[w0],      %[w0],     32 \n\t"
-            PTR_ADDIU " %[w1],      %[w1],     32 \n\t"
-
-            : [w0]"+r"(w0), [w1]"+r"(w1),
-              [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
-              [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
-              [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
-              [temp6]"=&r"(temp6), [temp7]"=&r"(temp7)
-            :
-            : "memory"
-        );
-    }
-
-    w0 = x + 288;
-    w1 = (float*)in;
-    for(i = 0; i < 128; i++)
-    {
-        /* loop unrolled 8 times */
-        __asm__ volatile(
-            "lw       %[temp0],    0(%[w1])        \n\t"
-            "lw       %[temp1],    4(%[w1])        \n\t"
-            "lw       %[temp2],    8(%[w1])        \n\t"
-            "lw       %[temp3],    12(%[w1])       \n\t"
-            "lw       %[temp4],    16(%[w1])       \n\t"
-            "lw       %[temp5],    20(%[w1])       \n\t"
-            "lw       %[temp6],    24(%[w1])       \n\t"
-            "lw       %[temp7],    28(%[w1])       \n\t"
-            "sw       %[temp0],    0(%[w0])        \n\t"
-            "sw       %[temp1],    4(%[w0])        \n\t"
-            "sw       %[temp2],    8(%[w0])        \n\t"
-            "sw       %[temp3],    12(%[w0])       \n\t"
-            "sw       %[temp4],    16(%[w0])       \n\t"
-            "sw       %[temp5],    20(%[w0])       \n\t"
-            "sw       %[temp6],    24(%[w0])       \n\t"
-            "sw       %[temp7],    28(%[w0])       \n\t"
-            PTR_ADDIU "  %[w0],       %[w0],    32 \n\t"
-            PTR_ADDIU "  %[w1],       %[w1],    32 \n\t"
-
-            : [w0]"+r"(w0), [w1]"+r"(w1),
-              [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
-              [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
-              [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
-              [temp6]"=&r"(temp6), [temp7]"=&r"(temp7)
-            :
-            : "memory"
-        );
-    }
-
-    for (i = 0; i < 32; i++) { // numTimeSlots*RATE = 16*2 as 960 sample frames
-                               // are not supported
-        fdsp->vector_fmul_reverse(z, sbr_qmf_window_ds, x, 320);
-        sbrdsp->sum64x5(z);
-        sbrdsp->qmf_pre_shuffle(z);
-        mdct_fn(mdct, z, z+64, sizeof(float));
-        sbrdsp->qmf_post_shuffle(W[buf_idx][i], z);
-        x += 32;
-    }
-}
-
-#if HAVE_MIPSFPU
-#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
-static void sbr_qmf_synthesis_mips(AVTXContext *mdct, av_tx_fn mdct_fn,
-                              SBRDSPContext *sbrdsp, AVFloatDSPContext *fdsp,
-                              float *out, float X[2][38][64],
-                              float mdct_buf[2][64],
-                              float *v0, int *v_off, const unsigned int div)
-{
-    int i, n;
-    const float *sbr_qmf_window = div ? sbr_qmf_window_ds : sbr_qmf_window_us;
-    const int step = 128 >> div;
-    float *v;
-    float temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13;
-    float temp14, temp15, temp16, temp17, temp18, temp19;
-    float *vv0, *s0, *dst;
-    dst = out;
-
-    for (i = 0; i < 32; i++) {
-        if (*v_off < step) {
-            int saved_samples = (1280 - 128) >> div;
-            memcpy(&v0[SBR_SYNTHESIS_BUF_SIZE - saved_samples], v0, saved_samples * sizeof(float));
-            *v_off = SBR_SYNTHESIS_BUF_SIZE - saved_samples - step;
-        } else {
-            *v_off -= step;
-        }
-        v = v0 + *v_off;
-        if (div) {
-            for (n = 0; n < 32; n++) {
-                X[0][i][   n] = -X[0][i][n];
-                X[0][i][32+n] =  X[1][i][31-n];
-            }
-            mdct_fn(mdct, mdct_buf[0], X[0][i], sizeof(float));
-            sbrdsp->qmf_deint_neg(v, mdct_buf[0]);
-        } else {
-            sbrdsp->neg_odd_64(X[1][i]);
-            mdct_fn(mdct, mdct_buf[0], X[0][i], sizeof(float));
-            mdct_fn(mdct, mdct_buf[1], X[1][i], sizeof(float));
-            sbrdsp->qmf_deint_bfly(v, mdct_buf[1], mdct_buf[0]);
-        }
-
-        if(div == 0)
-        {
-            float *v0_end;
-            vv0 = v;
-            v0_end = v + 60;
-            s0 = (float*)sbr_qmf_window;
-
-            /* 10 calls of function vector_fmul_add merged into one loop
-               and loop unrolled 4 times */
-            __asm__ volatile(
-                ".set    push                                           \n\t"
-                ".set    noreorder                                      \n\t"
-                "lwc1    %[temp4],   0(%[v0])                           \n\t"
-                "lwc1    %[temp5],   0(%[s0])                           \n\t"
-                "lwc1    %[temp6],   4(%[v0])                           \n\t"
-                "lwc1    %[temp7],   4(%[s0])                           \n\t"
-                "lwc1    %[temp8],   8(%[v0])                           \n\t"
-                "lwc1    %[temp9],   8(%[s0])                           \n\t"
-                "lwc1    %[temp10],  12(%[v0])                          \n\t"
-                "lwc1    %[temp11],  12(%[s0])                          \n\t"
-                "lwc1    %[temp12],  768(%[v0])                         \n\t"
-                "lwc1    %[temp13],  256(%[s0])                         \n\t"
-                "lwc1    %[temp14],  772(%[v0])                         \n\t"
-                "lwc1    %[temp15],  260(%[s0])                         \n\t"
-                "lwc1    %[temp16],  776(%[v0])                         \n\t"
-                "lwc1    %[temp17],  264(%[s0])                         \n\t"
-                "lwc1    %[temp18],  780(%[v0])                         \n\t"
-                "lwc1    %[temp19],  268(%[s0])                         \n\t"
-            "1:                                                         \n\t"
-                "mul.s   %[temp0],   %[temp4],   %[temp5]               \n\t"
-                "lwc1    %[temp4],   1024(%[v0])                        \n\t"
-                "mul.s   %[temp1],   %[temp6],   %[temp7]               \n\t"
-                "lwc1    %[temp5],   512(%[s0])                         \n\t"
-                "mul.s   %[temp2],   %[temp8],   %[temp9]               \n\t"
-                "lwc1    %[temp6],   1028(%[v0])                        \n\t"
-                "mul.s   %[temp3],   %[temp10],  %[temp11]              \n\t"
-                "lwc1    %[temp7],   516(%[s0])                         \n\t"
-                "madd.s  %[temp0],   %[temp0],   %[temp12],  %[temp13]  \n\t"
-                "lwc1    %[temp8],   1032(%[v0])                        \n\t"
-                "madd.s  %[temp1],   %[temp1],   %[temp14],  %[temp15]  \n\t"
-                "lwc1    %[temp9],   520(%[s0])                         \n\t"
-                "madd.s  %[temp2],   %[temp2],   %[temp16],  %[temp17]  \n\t"
-                "lwc1    %[temp10],  1036(%[v0])                        \n\t"
-                "madd.s  %[temp3],   %[temp3],   %[temp18],  %[temp19]  \n\t"
-                "lwc1    %[temp11],  524(%[s0])                         \n\t"
-                "lwc1    %[temp12],  1792(%[v0])                        \n\t"
-                "lwc1    %[temp13],  768(%[s0])                         \n\t"
-                "lwc1    %[temp14],  1796(%[v0])                        \n\t"
-                "lwc1    %[temp15],  772(%[s0])                         \n\t"
-                "lwc1    %[temp16],  1800(%[v0])                        \n\t"
-                "lwc1    %[temp17],  776(%[s0])                         \n\t"
-                "lwc1    %[temp18],  1804(%[v0])                        \n\t"
-                "lwc1    %[temp19],  780(%[s0])                         \n\t"
-                "madd.s  %[temp0],   %[temp0],   %[temp4],   %[temp5]   \n\t"
-                "lwc1    %[temp4],   2048(%[v0])                        \n\t"
-                "madd.s  %[temp1],   %[temp1],   %[temp6],   %[temp7]   \n\t"
-                "lwc1    %[temp5],   1024(%[s0])                        \n\t"
-                "madd.s  %[temp2],   %[temp2],   %[temp8],   %[temp9]   \n\t"
-                "lwc1    %[temp6],   2052(%[v0])                        \n\t"
-                "madd.s  %[temp3],   %[temp3],   %[temp10],  %[temp11]  \n\t"
-                "lwc1    %[temp7],   1028(%[s0])                        \n\t"
-                "madd.s  %[temp0],   %[temp0],   %[temp12],  %[temp13]  \n\t"
-                "lwc1    %[temp8],   2056(%[v0])                        \n\t"
-                "madd.s  %[temp1],   %[temp1],   %[temp14],  %[temp15]  \n\t"
-                "lwc1    %[temp9],   1032(%[s0])                        \n\t"
-                "madd.s  %[temp2],   %[temp2],   %[temp16],  %[temp17]  \n\t"
-                "lwc1    %[temp10],  2060(%[v0])                        \n\t"
-                "madd.s  %[temp3],   %[temp3],   %[temp18],  %[temp19]  \n\t"
-                "lwc1    %[temp11],  1036(%[s0])                        \n\t"
-                "lwc1    %[temp12],  2816(%[v0])                        \n\t"
-                "lwc1    %[temp13],  1280(%[s0])                        \n\t"
-                "lwc1    %[temp14],  2820(%[v0])                        \n\t"
-                "lwc1    %[temp15],  1284(%[s0])                        \n\t"
-                "lwc1    %[temp16],  2824(%[v0])                        \n\t"
-                "lwc1    %[temp17],  1288(%[s0])                        \n\t"
-                "lwc1    %[temp18],  2828(%[v0])                        \n\t"
-                "lwc1    %[temp19],  1292(%[s0])                        \n\t"
-                "madd.s  %[temp0],   %[temp0],   %[temp4],   %[temp5]   \n\t"
-                "lwc1    %[temp4],   3072(%[v0])                        \n\t"
-                "madd.s  %[temp1],   %[temp1],   %[temp6],   %[temp7]   \n\t"
-                "lwc1    %[temp5],   1536(%[s0])                        \n\t"
-                "madd.s  %[temp2],   %[temp2],   %[temp8],   %[temp9]   \n\t"
-                "lwc1    %[temp6],   3076(%[v0])                        \n\t"
-                "madd.s  %[temp3],   %[temp3],   %[temp10],  %[temp11]  \n\t"
-                "lwc1    %[temp7],   1540(%[s0])                        \n\t"
-                "madd.s  %[temp0],   %[temp0],   %[temp12],  %[temp13]  \n\t"
-                "lwc1    %[temp8],   3080(%[v0])                        \n\t"
-                "madd.s  %[temp1],   %[temp1],   %[temp14],  %[temp15]  \n\t"
-                "lwc1    %[temp9],   1544(%[s0])                        \n\t"
-                "madd.s  %[temp2],   %[temp2],   %[temp16],  %[temp17]  \n\t"
-                "lwc1    %[temp10],  3084(%[v0])                        \n\t"
-                "madd.s  %[temp3],   %[temp3],   %[temp18],  %[temp19]  \n\t"
-                "lwc1    %[temp11],  1548(%[s0])                        \n\t"
-                "lwc1    %[temp12],  3840(%[v0])                        \n\t"
-                "lwc1    %[temp13],  1792(%[s0])                        \n\t"
-                "lwc1    %[temp14],  3844(%[v0])                        \n\t"
-                "lwc1    %[temp15],  1796(%[s0])                        \n\t"
-                "lwc1    %[temp16],  3848(%[v0])                        \n\t"
-                "lwc1    %[temp17],  1800(%[s0])                        \n\t"
-                "lwc1    %[temp18],  3852(%[v0])                        \n\t"
-                "lwc1    %[temp19],  1804(%[s0])                        \n\t"
-                "madd.s  %[temp0],   %[temp0],   %[temp4],   %[temp5]   \n\t"
-                "lwc1    %[temp4],   4096(%[v0])                        \n\t"
-                "madd.s  %[temp1],   %[temp1],   %[temp6],   %[temp7]   \n\t"
-                "lwc1    %[temp5],   2048(%[s0])                        \n\t"
-                "madd.s  %[temp2],   %[temp2],   %[temp8],   %[temp9]   \n\t"
-                "lwc1    %[temp6],   4100(%[v0])                        \n\t"
-                "madd.s  %[temp3],   %[temp3],   %[temp10],  %[temp11]  \n\t"
-                "lwc1    %[temp7],   2052(%[s0])                        \n\t"
-                "madd.s  %[temp0],   %[temp0],   %[temp12],  %[temp13]  \n\t"
-                "lwc1    %[temp8],   4104(%[v0])                        \n\t"
-                PTR_ADDIU "%[dst],     %[dst],      16                  \n\t"
-                "madd.s  %[temp1],   %[temp1],   %[temp14],  %[temp15]  \n\t"
-                "lwc1    %[temp9],   2056(%[s0])                        \n\t"
-                PTR_ADDIU " %[s0],      %[s0],      16                  \n\t"
-                "madd.s  %[temp2],   %[temp2],   %[temp16],  %[temp17]  \n\t"
-                "lwc1    %[temp10],  4108(%[v0])                        \n\t"
-                PTR_ADDIU " %[v0],      %[v0],      16                  \n\t"
-                "madd.s  %[temp3],   %[temp3],   %[temp18],  %[temp19]  \n\t"
-                "lwc1    %[temp11],  2044(%[s0])                        \n\t"
-                "lwc1    %[temp12],  4848(%[v0])                        \n\t"
-                "lwc1    %[temp13],  2288(%[s0])                        \n\t"
-                "lwc1    %[temp14],  4852(%[v0])                        \n\t"
-                "lwc1    %[temp15],  2292(%[s0])                        \n\t"
-                "lwc1    %[temp16],  4856(%[v0])                        \n\t"
-                "lwc1    %[temp17],  2296(%[s0])                        \n\t"
-                "lwc1    %[temp18],  4860(%[v0])                        \n\t"
-                "lwc1    %[temp19],  2300(%[s0])                        \n\t"
-                "madd.s  %[temp0],   %[temp0],   %[temp4],   %[temp5]   \n\t"
-                "lwc1    %[temp4],   0(%[v0])                           \n\t"
-                "madd.s  %[temp1],   %[temp1],   %[temp6],   %[temp7]   \n\t"
-                "lwc1    %[temp5],   0(%[s0])                           \n\t"
-                "madd.s  %[temp2],   %[temp2],   %[temp8],   %[temp9]   \n\t"
-                "lwc1    %[temp6],   4(%[v0])                           \n\t"
-                "madd.s  %[temp3],   %[temp3],   %[temp10],  %[temp11]  \n\t"
-                "lwc1    %[temp7],   4(%[s0])                           \n\t"
-                "madd.s  %[temp0],   %[temp0],   %[temp12],  %[temp13]  \n\t"
-                "lwc1    %[temp8],   8(%[v0])                           \n\t"
-                "madd.s  %[temp1],   %[temp1],   %[temp14],  %[temp15]  \n\t"
-                "lwc1    %[temp9],   8(%[s0])                           \n\t"
-                "madd.s  %[temp2],   %[temp2],   %[temp16],  %[temp17]  \n\t"
-                "lwc1    %[temp10],  12(%[v0])                          \n\t"
-                "madd.s  %[temp3],   %[temp3],   %[temp18],  %[temp19]  \n\t"
-                "lwc1    %[temp11],  12(%[s0])                          \n\t"
-                "lwc1    %[temp12],  768(%[v0])                         \n\t"
-                "lwc1    %[temp13],  256(%[s0])                         \n\t"
-                "lwc1    %[temp14],  772(%[v0])                         \n\t"
-                "lwc1    %[temp15],  260(%[s0])                         \n\t"
-                "lwc1    %[temp16],  776(%[v0])                         \n\t"
-                "lwc1    %[temp17],  264(%[s0])                         \n\t"
-                "lwc1    %[temp18],  780(%[v0])                         \n\t"
-                "lwc1    %[temp19],  268(%[s0])                         \n\t"
-                "swc1    %[temp0],   -16(%[dst])                        \n\t"
-                "swc1    %[temp1],   -12(%[dst])                        \n\t"
-                "swc1    %[temp2],   -8(%[dst])                         \n\t"
-                "bne     %[v0],      %[v0_end],  1b                     \n\t"
-                " swc1   %[temp3],   -4(%[dst])                         \n\t"
-                "mul.s   %[temp0],   %[temp4],   %[temp5]               \n\t"
-                "lwc1    %[temp4],   1024(%[v0])                        \n\t"
-                "mul.s   %[temp1],   %[temp6],   %[temp7]               \n\t"
-                "lwc1    %[temp5],   512(%[s0])                         \n\t"
-                "mul.s   %[temp2],   %[temp8],   %[temp9]               \n\t"
-                "lwc1    %[temp6],   1028(%[v0])                        \n\t"
-                "mul.s   %[temp3],   %[temp10],  %[temp11]              \n\t"
-                "lwc1    %[temp7],   516(%[s0])                         \n\t"
-                "madd.s  %[temp0],   %[temp0],   %[temp12],  %[temp13]  \n\t"
-                "lwc1    %[temp8],   1032(%[v0])                        \n\t"
-                "madd.s  %[temp1],   %[temp1],   %[temp14],  %[temp15]  \n\t"
-                "lwc1    %[temp9],   520(%[s0])                         \n\t"
-                "madd.s  %[temp2],   %[temp2],   %[temp16],  %[temp17]  \n\t"
-                "lwc1    %[temp10],  1036(%[v0])                        \n\t"
-                "madd.s  %[temp3],   %[temp3],   %[temp18],  %[temp19]  \n\t"
-                "lwc1    %[temp11],  524(%[s0])                         \n\t"
-                "lwc1    %[temp12],  1792(%[v0])                        \n\t"
-                "lwc1    %[temp13],  768(%[s0])                         \n\t"
-                "lwc1    %[temp14],  1796(%[v0])                        \n\t"
-                "lwc1    %[temp15],  772(%[s0])                         \n\t"
-                "lwc1    %[temp16],  1800(%[v0])                        \n\t"
-                "lwc1    %[temp17],  776(%[s0])                         \n\t"
-                "lwc1    %[temp18],  1804(%[v0])                        \n\t"
-                "lwc1    %[temp19],  780(%[s0])                         \n\t"
-                "madd.s  %[temp0],   %[temp0],   %[temp4],   %[temp5]   \n\t"
-                "lwc1    %[temp4],   2048(%[v0])                        \n\t"
-                "madd.s  %[temp1],   %[temp1],   %[temp6],   %[temp7]   \n\t"
-                "lwc1    %[temp5],   1024(%[s0])                        \n\t"
-                "madd.s  %[temp2],   %[temp2],   %[temp8],   %[temp9]   \n\t"
-                "lwc1    %[temp6],   2052(%[v0])                        \n\t"
-                "madd.s  %[temp3],   %[temp3],   %[temp10],  %[temp11]  \n\t"
-                "lwc1    %[temp7],   1028(%[s0])                        \n\t"
-                "madd.s  %[temp0],   %[temp0],   %[temp12],  %[temp13]  \n\t"
-                "lwc1    %[temp8],   2056(%[v0])                        \n\t"
-                "madd.s  %[temp1],   %[temp1],   %[temp14],  %[temp15]  \n\t"
-                "lwc1    %[temp9],   1032(%[s0])                        \n\t"
-                "madd.s  %[temp2],   %[temp2],   %[temp16],  %[temp17]  \n\t"
-                "lwc1    %[temp10],  2060(%[v0])                        \n\t"
-                "madd.s  %[temp3],   %[temp3],   %[temp18],  %[temp19]  \n\t"
-                "lwc1    %[temp11],  1036(%[s0])                        \n\t"
-                "lwc1    %[temp12],  2816(%[v0])                        \n\t"
-                "lwc1    %[temp13],  1280(%[s0])                        \n\t"
-                "lwc1    %[temp14],  2820(%[v0])                        \n\t"
-                "lwc1    %[temp15],  1284(%[s0])                        \n\t"
-                "lwc1    %[temp16],  2824(%[v0])                        \n\t"
-                "lwc1    %[temp17],  1288(%[s0])                        \n\t"
-                "lwc1    %[temp18],  2828(%[v0])                        \n\t"
-                "lwc1    %[temp19],  1292(%[s0])                        \n\t"
-                "madd.s  %[temp0],   %[temp0],   %[temp4],   %[temp5]   \n\t"
-                "lwc1    %[temp4],   3072(%[v0])                        \n\t"
-                "madd.s  %[temp1],   %[temp1],   %[temp6],   %[temp7]   \n\t"
-                "lwc1    %[temp5],   1536(%[s0])                        \n\t"
-                "madd.s  %[temp2],   %[temp2],   %[temp8],   %[temp9]   \n\t"
-                "lwc1    %[temp6],   3076(%[v0])                        \n\t"
-                "madd.s  %[temp3],   %[temp3],   %[temp10],  %[temp11]  \n\t"
-                "lwc1    %[temp7],   1540(%[s0])                        \n\t"
-                "madd.s  %[temp0],   %[temp0],   %[temp12],  %[temp13]  \n\t"
-                "lwc1    %[temp8],   3080(%[v0])                        \n\t"
-                "madd.s  %[temp1],   %[temp1],   %[temp14],  %[temp15]  \n\t"
-                "lwc1    %[temp9],   1544(%[s0])                        \n\t"
-                "madd.s  %[temp2],   %[temp2],   %[temp16],  %[temp17]  \n\t"
-                "lwc1    %[temp10],  3084(%[v0])                        \n\t"
-                "madd.s  %[temp3],   %[temp3],   %[temp18],  %[temp19]  \n\t"
-                "lwc1    %[temp11],  1548(%[s0])                        \n\t"
-                "lwc1    %[temp12],  3840(%[v0])                        \n\t"
-                "lwc1    %[temp13],  1792(%[s0])                        \n\t"
-                "lwc1    %[temp14],  3844(%[v0])                        \n\t"
-                "lwc1    %[temp15],  1796(%[s0])                        \n\t"
-                "lwc1    %[temp16],  3848(%[v0])                        \n\t"
-                "lwc1    %[temp17],  1800(%[s0])                        \n\t"
-                "lwc1    %[temp18],  3852(%[v0])                        \n\t"
-                "lwc1    %[temp19],  1804(%[s0])                        \n\t"
-                "madd.s  %[temp0],   %[temp0],   %[temp4],   %[temp5]   \n\t"
-                "lwc1    %[temp4],   4096(%[v0])                        \n\t"
-                "madd.s  %[temp1],   %[temp1],   %[temp6],   %[temp7]   \n\t"
-                "lwc1    %[temp5],   2048(%[s0])                        \n\t"
-                "madd.s  %[temp2],   %[temp2],   %[temp8],   %[temp9]   \n\t"
-                "lwc1    %[temp6],   4100(%[v0])                        \n\t"
-                "madd.s  %[temp3],   %[temp3],   %[temp10],  %[temp11]  \n\t"
-                "lwc1    %[temp7],   2052(%[s0])                        \n\t"
-                "madd.s  %[temp0],   %[temp0],   %[temp12],  %[temp13]  \n\t"
-                "lwc1    %[temp8],   4104(%[v0])                        \n\t"
-                "madd.s  %[temp1],   %[temp1],   %[temp14],  %[temp15]  \n\t"
-                "lwc1    %[temp9],   2056(%[s0])                        \n\t"
-                "madd.s  %[temp2],   %[temp2],   %[temp16],  %[temp17]  \n\t"
-                "lwc1    %[temp10],  4108(%[v0])                        \n\t"
-                "madd.s  %[temp3],   %[temp3],   %[temp18],  %[temp19]  \n\t"
-                "lwc1    %[temp11],  2060(%[s0])                        \n\t"
-                "lwc1    %[temp12],  4864(%[v0])                        \n\t"
-                "lwc1    %[temp13],  2304(%[s0])                        \n\t"
-                "lwc1    %[temp14],  4868(%[v0])                        \n\t"
-                "lwc1    %[temp15],  2308(%[s0])                        \n\t"
-                "madd.s  %[temp0],   %[temp0],   %[temp4],   %[temp5]   \n\t"
-                "lwc1    %[temp16],  4872(%[v0])                        \n\t"
-                "madd.s  %[temp1],   %[temp1],   %[temp6],   %[temp7]   \n\t"
-                "lwc1    %[temp17],  2312(%[s0])                        \n\t"
-                "madd.s  %[temp2],   %[temp2],   %[temp8],   %[temp9]   \n\t"
-                "lwc1    %[temp18],  4876(%[v0])                        \n\t"
-                "madd.s  %[temp3],   %[temp3],   %[temp10],  %[temp11]  \n\t"
-                "lwc1    %[temp19],  2316(%[s0])                        \n\t"
-                "madd.s  %[temp0],   %[temp0],   %[temp12],  %[temp13]  \n\t"
-                PTR_ADDIU "%[dst],     %[dst],     16                   \n\t"
-                "madd.s  %[temp1],   %[temp1],   %[temp14],  %[temp15]  \n\t"
-                "madd.s  %[temp2],   %[temp2],   %[temp16],  %[temp17]  \n\t"
-                "madd.s  %[temp3],   %[temp3],   %[temp18],  %[temp19]  \n\t"
-                "swc1    %[temp0],   -16(%[dst])                        \n\t"
-                "swc1    %[temp1],   -12(%[dst])                        \n\t"
-                "swc1    %[temp2],   -8(%[dst])                         \n\t"
-                "swc1    %[temp3],   -4(%[dst])                         \n\t"
-                ".set    pop                                            \n\t"
-
-                : [dst]"+r"(dst), [v0]"+r"(vv0), [s0]"+r"(s0),
-                  [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
-                  [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
-                  [temp6]"=&f"(temp6), [temp7]"=&f"(temp7), [temp8]"=&f"(temp8),
-                  [temp9]"=&f"(temp9), [temp10]"=&f"(temp10), [temp11]"=&f"(temp11),
-                  [temp12]"=&f"(temp12), [temp13]"=&f"(temp13), [temp14]"=&f"(temp14),
-                  [temp15]"=&f"(temp15), [temp16]"=&f"(temp16), [temp17]"=&f"(temp17),
-                  [temp18]"=&f"(temp18), [temp19]"=&f"(temp19)
-                : [v0_end]"r"(v0_end)
-                : "memory"
-            );
-        }
-        else
-        {
-            fdsp->vector_fmul   (out, v                , sbr_qmf_window                       , 64 >> div);
-            fdsp->vector_fmul_add(out, v + ( 192 >> div), sbr_qmf_window + ( 64 >> div), out   , 64 >> div);
-            fdsp->vector_fmul_add(out, v + ( 256 >> div), sbr_qmf_window + (128 >> div), out   , 64 >> div);
-            fdsp->vector_fmul_add(out, v + ( 448 >> div), sbr_qmf_window + (192 >> div), out   , 64 >> div);
-            fdsp->vector_fmul_add(out, v + ( 512 >> div), sbr_qmf_window + (256 >> div), out   , 64 >> div);
-            fdsp->vector_fmul_add(out, v + ( 704 >> div), sbr_qmf_window + (320 >> div), out   , 64 >> div);
-            fdsp->vector_fmul_add(out, v + ( 768 >> div), sbr_qmf_window + (384 >> div), out   , 64 >> div);
-            fdsp->vector_fmul_add(out, v + ( 960 >> div), sbr_qmf_window + (448 >> div), out   , 64 >> div);
-            fdsp->vector_fmul_add(out, v + (1024 >> div), sbr_qmf_window + (512 >> div), out   , 64 >> div);
-            fdsp->vector_fmul_add(out, v + (1216 >> div), sbr_qmf_window + (576 >> div), out   , 64 >> div);
-            out += 64 >> div;
-        }
-    }
-}
-
-#define sbr_qmf_analysis sbr_qmf_analysis_mips
-#define sbr_qmf_synthesis sbr_qmf_synthesis_mips
-
-#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
-#endif /* HAVE_MIPSFPU */
-#endif /* HAVE_INLINE_ASM */
-
-#endif /* AVCODEC_MIPS_AACSBR_MIPS_H */
diff --git a/libavcodec/mips/sbrdsp_mips.c b/libavcodec/mips/sbrdsp_mips.c
deleted file mode 100644
index 1c87c99251..0000000000
--- a/libavcodec/mips/sbrdsp_mips.c
+++ /dev/null
@@ -1,912 +0,0 @@
-/*
- * Copyright (c) 2012
- *      MIPS Technologies, Inc., California.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * Authors:  Darko Laus      (darko@mips.com)
- *           Djordje Pesut   (djordje@mips.com)
- *           Mirjana Vulin   (mvulin@mips.com)
- *
- * AAC Spectral Band Replication decoding functions optimized for MIPS
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-/**
- * @file
- * Reference: libavcodec/sbrdsp.c
- */
-
-#include "config.h"
-#include "libavcodec/sbrdsp.h"
-#include "libavutil/mips/asmdefs.h"
-
-#if HAVE_INLINE_ASM
-#if HAVE_MIPSFPU
-static void sbr_qmf_pre_shuffle_mips(float *z)
-{
-    int Temp1, Temp2, Temp3, Temp4, Temp5, Temp6;
-    float *z1 = &z[66];
-    float *z2 = &z[59];
-    float *z3 = &z[2];
-    float *z4 = z1 + 60;
-
-    /* loop unrolled 5 times */
-    __asm__ volatile (
-        "lui    %[Temp6],   0x8000                  \n\t"
-    "1:                                             \n\t"
-        "lw     %[Temp1],   0(%[z2])                \n\t"
-        "lw     %[Temp2],   4(%[z2])                \n\t"
-        "lw     %[Temp3],   8(%[z2])                \n\t"
-        "lw     %[Temp4],   12(%[z2])               \n\t"
-        "lw     %[Temp5],   16(%[z2])               \n\t"
-        "xor    %[Temp1],   %[Temp1],   %[Temp6]    \n\t"
-        "xor    %[Temp2],   %[Temp2],   %[Temp6]    \n\t"
-        "xor    %[Temp3],   %[Temp3],   %[Temp6]    \n\t"
-        "xor    %[Temp4],   %[Temp4],   %[Temp6]    \n\t"
-        "xor    %[Temp5],   %[Temp5],   %[Temp6]    \n\t"
-        PTR_ADDIU "%[z2],   %[z2],      -20         \n\t"
-        "sw     %[Temp1],   32(%[z1])               \n\t"
-        "sw     %[Temp2],   24(%[z1])               \n\t"
-        "sw     %[Temp3],   16(%[z1])               \n\t"
-        "sw     %[Temp4],   8(%[z1])                \n\t"
-        "sw     %[Temp5],   0(%[z1])                \n\t"
-        "lw     %[Temp1],   0(%[z3])                \n\t"
-        "lw     %[Temp2],   4(%[z3])                \n\t"
-        "lw     %[Temp3],   8(%[z3])                \n\t"
-        "lw     %[Temp4],   12(%[z3])               \n\t"
-        "lw     %[Temp5],   16(%[z3])               \n\t"
-        "sw     %[Temp1],   4(%[z1])                \n\t"
-        "sw     %[Temp2],   12(%[z1])               \n\t"
-        "sw     %[Temp3],   20(%[z1])               \n\t"
-        "sw     %[Temp4],   28(%[z1])               \n\t"
-        "sw     %[Temp5],   36(%[z1])               \n\t"
-        PTR_ADDIU "%[z3],   %[z3],      20          \n\t"
-        PTR_ADDIU "%[z1],   %[z1],      40          \n\t"
-        "bne    %[z1],      %[z4],      1b          \n\t"
-        "lw     %[Temp1],   132(%[z])               \n\t"
-        "lw     %[Temp2],   128(%[z])               \n\t"
-        "lw     %[Temp3],   0(%[z])                 \n\t"
-        "lw     %[Temp4],   4(%[z])                 \n\t"
-        "xor    %[Temp1],   %[Temp1],   %[Temp6]    \n\t"
-        "sw     %[Temp1],   504(%[z])               \n\t"
-        "sw     %[Temp2],   508(%[z])               \n\t"
-        "sw     %[Temp3],   256(%[z])               \n\t"
-        "sw     %[Temp4],   260(%[z])               \n\t"
-
-        : [Temp1]"=&r"(Temp1), [Temp2]"=&r"(Temp2),
-          [Temp3]"=&r"(Temp3), [Temp4]"=&r"(Temp4),
-          [Temp5]"=&r"(Temp5), [Temp6]"=&r"(Temp6),
-          [z1]"+r"(z1), [z2]"+r"(z2), [z3]"+r"(z3)
-        : [z4]"r"(z4), [z]"r"(z)
-        : "memory"
-    );
-}
-
-static void sbr_qmf_post_shuffle_mips(float W[32][2], const float *z)
-{
-    int Temp1, Temp2, Temp3, Temp4, Temp5;
-    float *W_ptr = (float *)W;
-    float *z1    = (float *)z;
-    float *z2    = (float *)&z[60];
-    float *z_end = z1 + 32;
-
-     /* loop unrolled 4 times */
-    __asm__ volatile (
-        "lui    %[Temp5],   0x8000                  \n\t"
-    "1:                                             \n\t"
-        "lw     %[Temp1],   0(%[z2])                \n\t"
-        "lw     %[Temp2],   4(%[z2])                \n\t"
-        "lw     %[Temp3],   8(%[z2])                \n\t"
-        "lw     %[Temp4],   12(%[z2])               \n\t"
-        "xor    %[Temp1],   %[Temp1],   %[Temp5]    \n\t"
-        "xor    %[Temp2],   %[Temp2],   %[Temp5]    \n\t"
-        "xor    %[Temp3],   %[Temp3],   %[Temp5]    \n\t"
-        "xor    %[Temp4],   %[Temp4],   %[Temp5]    \n\t"
-        PTR_ADDIU "%[z2],   %[z2],      -16         \n\t"
-        "sw     %[Temp1],   24(%[W_ptr])            \n\t"
-        "sw     %[Temp2],   16(%[W_ptr])            \n\t"
-        "sw     %[Temp3],   8(%[W_ptr])             \n\t"
-        "sw     %[Temp4],   0(%[W_ptr])             \n\t"
-        "lw     %[Temp1],   0(%[z1])                \n\t"
-        "lw     %[Temp2],   4(%[z1])                \n\t"
-        "lw     %[Temp3],   8(%[z1])                \n\t"
-        "lw     %[Temp4],   12(%[z1])               \n\t"
-        "sw     %[Temp1],   4(%[W_ptr])             \n\t"
-        "sw     %[Temp2],   12(%[W_ptr])            \n\t"
-        "sw     %[Temp3],   20(%[W_ptr])            \n\t"
-        "sw     %[Temp4],   28(%[W_ptr])            \n\t"
-        PTR_ADDIU "%[z1],   %[z1],      16          \n\t"
-        PTR_ADDIU "%[W_ptr],%[W_ptr],   32          \n\t"
-        "bne    %[z1],      %[z_end],   1b          \n\t"
-
-        : [Temp1]"=&r"(Temp1), [Temp2]"=&r"(Temp2),
-          [Temp3]"=&r"(Temp3), [Temp4]"=&r"(Temp4),
-          [Temp5]"=&r"(Temp5), [z1]"+r"(z1),
-          [z2]"+r"(z2), [W_ptr]"+r"(W_ptr)
-        : [z_end]"r"(z_end)
-        : "memory"
-    );
-}
-
-#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
-static void sbr_sum64x5_mips(float *z)
-{
-    int k;
-    float *z1;
-    float f1, f2, f3, f4, f5, f6, f7, f8;
-    for (k = 0; k < 64; k += 8) {
-
-        z1 = &z[k];
-
-         /* loop unrolled 8 times */
-        __asm__ volatile (
-            "lwc1   $f0,    0(%[z1])        \n\t"
-            "lwc1   $f1,    256(%[z1])      \n\t"
-            "lwc1   $f2,    4(%[z1])        \n\t"
-            "lwc1   $f3,    260(%[z1])      \n\t"
-            "lwc1   $f4,    8(%[z1])        \n\t"
-            "add.s  %[f1],  $f0,    $f1     \n\t"
-            "lwc1   $f5,    264(%[z1])      \n\t"
-            "add.s  %[f2],  $f2,    $f3     \n\t"
-            "lwc1   $f6,    12(%[z1])       \n\t"
-            "lwc1   $f7,    268(%[z1])      \n\t"
-            "add.s  %[f3],  $f4,    $f5     \n\t"
-            "lwc1   $f8,    16(%[z1])       \n\t"
-            "lwc1   $f9,    272(%[z1])      \n\t"
-            "add.s  %[f4],  $f6,    $f7     \n\t"
-            "lwc1   $f10,   20(%[z1])       \n\t"
-            "lwc1   $f11,   276(%[z1])      \n\t"
-            "add.s  %[f5],  $f8,    $f9     \n\t"
-            "lwc1   $f12,   24(%[z1])       \n\t"
-            "lwc1   $f13,   280(%[z1])      \n\t"
-            "add.s  %[f6],  $f10,   $f11    \n\t"
-            "lwc1   $f14,   28(%[z1])       \n\t"
-            "lwc1   $f15,   284(%[z1])      \n\t"
-            "add.s  %[f7],  $f12,   $f13    \n\t"
-            "lwc1   $f0,    512(%[z1])      \n\t"
-            "lwc1   $f1,    516(%[z1])      \n\t"
-            "add.s  %[f8],  $f14,   $f15    \n\t"
-            "lwc1   $f2,    520(%[z1])      \n\t"
-            "add.s  %[f1],  %[f1],  $f0     \n\t"
-            "add.s  %[f2],  %[f2],  $f1     \n\t"
-            "lwc1   $f3,    524(%[z1])      \n\t"
-            "add.s  %[f3],  %[f3],  $f2     \n\t"
-            "lwc1   $f4,    528(%[z1])      \n\t"
-            "lwc1   $f5,    532(%[z1])      \n\t"
-            "add.s  %[f4],  %[f4],  $f3     \n\t"
-            "lwc1   $f6,    536(%[z1])      \n\t"
-            "add.s  %[f5],  %[f5],  $f4     \n\t"
-            "add.s  %[f6],  %[f6],  $f5     \n\t"
-            "lwc1   $f7,    540(%[z1])      \n\t"
-            "add.s  %[f7],  %[f7],  $f6     \n\t"
-            "lwc1   $f0,    768(%[z1])      \n\t"
-            "lwc1   $f1,    772(%[z1])      \n\t"
-            "add.s  %[f8],  %[f8],  $f7     \n\t"
-            "lwc1   $f2,    776(%[z1])      \n\t"
-            "add.s  %[f1],  %[f1],  $f0     \n\t"
-            "add.s  %[f2],  %[f2],  $f1     \n\t"
-            "lwc1   $f3,    780(%[z1])      \n\t"
-            "add.s  %[f3],  %[f3],  $f2     \n\t"
-            "lwc1   $f4,    784(%[z1])      \n\t"
-            "lwc1   $f5,    788(%[z1])      \n\t"
-            "add.s  %[f4],  %[f4],  $f3     \n\t"
-            "lwc1   $f6,    792(%[z1])      \n\t"
-            "add.s  %[f5],  %[f5],  $f4     \n\t"
-            "add.s  %[f6],  %[f6],  $f5     \n\t"
-            "lwc1   $f7,    796(%[z1])      \n\t"
-            "add.s  %[f7],  %[f7],  $f6     \n\t"
-            "lwc1   $f0,    1024(%[z1])     \n\t"
-            "lwc1   $f1,    1028(%[z1])     \n\t"
-            "add.s  %[f8],  %[f8],  $f7     \n\t"
-            "lwc1   $f2,    1032(%[z1])     \n\t"
-            "add.s  %[f1],  %[f1],  $f0     \n\t"
-            "add.s  %[f2],  %[f2],  $f1     \n\t"
-            "lwc1   $f3,    1036(%[z1])     \n\t"
-            "add.s  %[f3],  %[f3],  $f2     \n\t"
-            "lwc1   $f4,    1040(%[z1])     \n\t"
-            "lwc1   $f5,    1044(%[z1])     \n\t"
-            "add.s  %[f4],  %[f4],  $f3     \n\t"
-            "lwc1   $f6,    1048(%[z1])     \n\t"
-            "add.s  %[f5],  %[f5],  $f4     \n\t"
-            "add.s  %[f6],  %[f6],  $f5     \n\t"
-            "lwc1   $f7,    1052(%[z1])     \n\t"
-            "add.s  %[f7],  %[f7],  $f6     \n\t"
-            "swc1   %[f1],  0(%[z1])        \n\t"
-            "swc1   %[f2],  4(%[z1])        \n\t"
-            "add.s  %[f8],  %[f8],  $f7     \n\t"
-            "swc1   %[f3],  8(%[z1])        \n\t"
-            "swc1   %[f4],  12(%[z1])       \n\t"
-            "swc1   %[f5],  16(%[z1])       \n\t"
-            "swc1   %[f6],  20(%[z1])       \n\t"
-            "swc1   %[f7],  24(%[z1])       \n\t"
-            "swc1   %[f8],  28(%[z1])       \n\t"
-
-            : [f1]"=&f"(f1), [f2]"=&f"(f2), [f3]"=&f"(f3),
-              [f4]"=&f"(f4), [f5]"=&f"(f5), [f6]"=&f"(f6),
-              [f7]"=&f"(f7), [f8]"=&f"(f8)
-            : [z1]"r"(z1)
-            : "$f0", "$f1", "$f2", "$f3", "$f4", "$f5",
-              "$f6", "$f7", "$f8", "$f9", "$f10", "$f11",
-              "$f12", "$f13", "$f14", "$f15",
-              "memory"
-        );
-    }
-}
-
-static float sbr_sum_square_mips(float (*x)[2], int n)
-{
-    float sum0 = 0.0f, sum1 = 0.0f;
-    float *p_x;
-    float temp0, temp1, temp2, temp3;
-    float *loop_end;
-    p_x = &x[0][0];
-    loop_end = p_x + (n >> 1)*4 - 4;
-
-    __asm__ volatile (
-        ".set      push                                             \n\t"
-        ".set      noreorder                                        \n\t"
-        "lwc1      %[temp0],   0(%[p_x])                            \n\t"
-        "lwc1      %[temp1],   4(%[p_x])                            \n\t"
-        "lwc1      %[temp2],   8(%[p_x])                            \n\t"
-        "lwc1      %[temp3],   12(%[p_x])                           \n\t"
-    "1:                                                             \n\t"
-        PTR_ADDIU "%[p_x],     %[p_x],       16                     \n\t"
-        "madd.s    %[sum0],    %[sum0],      %[temp0],   %[temp0]   \n\t"
-        "lwc1      %[temp0],   0(%[p_x])                            \n\t"
-        "madd.s    %[sum1],    %[sum1],      %[temp1],   %[temp1]   \n\t"
-        "lwc1      %[temp1],   4(%[p_x])                            \n\t"
-        "madd.s    %[sum0],    %[sum0],      %[temp2],   %[temp2]   \n\t"
-        "lwc1      %[temp2],   8(%[p_x])                            \n\t"
-        "madd.s    %[sum1],    %[sum1],      %[temp3],   %[temp3]   \n\t"
-        "bne       %[p_x],     %[loop_end],  1b                     \n\t"
-        " lwc1     %[temp3],   12(%[p_x])                           \n\t"
-        "madd.s    %[sum0],    %[sum0],      %[temp0],   %[temp0]   \n\t"
-        "madd.s    %[sum1],    %[sum1],      %[temp1],   %[temp1]   \n\t"
-        "madd.s    %[sum0],    %[sum0],      %[temp2],   %[temp2]   \n\t"
-        "madd.s    %[sum1],    %[sum1],      %[temp3],   %[temp3]   \n\t"
-        ".set      pop                                              \n\t"
-
-        : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
-          [temp3]"=&f"(temp3), [sum0]"+f"(sum0), [sum1]"+f"(sum1),
-          [p_x]"+r"(p_x)
-        : [loop_end]"r"(loop_end)
-        : "memory"
-    );
-    return sum0 + sum1;
-}
-
-static void sbr_qmf_deint_bfly_mips(float *v, const float *src0, const float *src1)
-{
-    int i;
-    float temp0, temp1, temp2, temp3, temp4, temp5;
-    float temp6, temp7, temp8, temp9, temp10, temp11;
-    float *v0 = v;
-    float *v1 = &v[127];
-    float *psrc0 = (float*)src0;
-    float *psrc1 = (float*)&src1[63];
-
-    for (i = 0; i < 4; i++) {
-
-         /* loop unrolled 16 times */
-        __asm__ volatile(
-            "lwc1       %[temp0],   0(%[src0])             \n\t"
-            "lwc1       %[temp1],   0(%[src1])             \n\t"
-            "lwc1       %[temp3],   4(%[src0])             \n\t"
-            "lwc1       %[temp4],   -4(%[src1])            \n\t"
-            "lwc1       %[temp6],   8(%[src0])             \n\t"
-            "lwc1       %[temp7],   -8(%[src1])            \n\t"
-            "lwc1       %[temp9],   12(%[src0])            \n\t"
-            "lwc1       %[temp10],  -12(%[src1])           \n\t"
-            "add.s      %[temp2],   %[temp0],   %[temp1]   \n\t"
-            "add.s      %[temp5],   %[temp3],   %[temp4]   \n\t"
-            "add.s      %[temp8],   %[temp6],   %[temp7]   \n\t"
-            "add.s      %[temp11],  %[temp9],   %[temp10]  \n\t"
-            "sub.s      %[temp0],   %[temp0],   %[temp1]   \n\t"
-            "sub.s      %[temp3],   %[temp3],   %[temp4]   \n\t"
-            "sub.s      %[temp6],   %[temp6],   %[temp7]   \n\t"
-            "sub.s      %[temp9],   %[temp9],   %[temp10]  \n\t"
-            "swc1       %[temp2],   0(%[v1])               \n\t"
-            "swc1       %[temp0],   0(%[v0])               \n\t"
-            "swc1       %[temp5],   -4(%[v1])              \n\t"
-            "swc1       %[temp3],   4(%[v0])               \n\t"
-            "swc1       %[temp8],   -8(%[v1])              \n\t"
-            "swc1       %[temp6],   8(%[v0])               \n\t"
-            "swc1       %[temp11],  -12(%[v1])             \n\t"
-            "swc1       %[temp9],   12(%[v0])              \n\t"
-            "lwc1       %[temp0],   16(%[src0])            \n\t"
-            "lwc1       %[temp1],   -16(%[src1])           \n\t"
-            "lwc1       %[temp3],   20(%[src0])            \n\t"
-            "lwc1       %[temp4],   -20(%[src1])           \n\t"
-            "lwc1       %[temp6],   24(%[src0])            \n\t"
-            "lwc1       %[temp7],   -24(%[src1])           \n\t"
-            "lwc1       %[temp9],   28(%[src0])            \n\t"
-            "lwc1       %[temp10],  -28(%[src1])           \n\t"
-            "add.s      %[temp2],   %[temp0],   %[temp1]   \n\t"
-            "add.s      %[temp5],   %[temp3],   %[temp4]   \n\t"
-            "add.s      %[temp8],   %[temp6],   %[temp7]   \n\t"
-            "add.s      %[temp11],  %[temp9],   %[temp10]  \n\t"
-            "sub.s      %[temp0],   %[temp0],   %[temp1]   \n\t"
-            "sub.s      %[temp3],   %[temp3],   %[temp4]   \n\t"
-            "sub.s      %[temp6],   %[temp6],   %[temp7]   \n\t"
-            "sub.s      %[temp9],   %[temp9],   %[temp10]  \n\t"
-            "swc1       %[temp2],   -16(%[v1])             \n\t"
-            "swc1       %[temp0],   16(%[v0])              \n\t"
-            "swc1       %[temp5],   -20(%[v1])             \n\t"
-            "swc1       %[temp3],   20(%[v0])              \n\t"
-            "swc1       %[temp8],   -24(%[v1])             \n\t"
-            "swc1       %[temp6],   24(%[v0])              \n\t"
-            "swc1       %[temp11],  -28(%[v1])             \n\t"
-            "swc1       %[temp9],   28(%[v0])              \n\t"
-            "lwc1       %[temp0],   32(%[src0])            \n\t"
-            "lwc1       %[temp1],   -32(%[src1])           \n\t"
-            "lwc1       %[temp3],   36(%[src0])            \n\t"
-            "lwc1       %[temp4],   -36(%[src1])           \n\t"
-            "lwc1       %[temp6],   40(%[src0])            \n\t"
-            "lwc1       %[temp7],   -40(%[src1])           \n\t"
-            "lwc1       %[temp9],   44(%[src0])            \n\t"
-            "lwc1       %[temp10],  -44(%[src1])           \n\t"
-            "add.s      %[temp2],   %[temp0],   %[temp1]   \n\t"
-            "add.s      %[temp5],   %[temp3],   %[temp4]   \n\t"
-            "add.s      %[temp8],   %[temp6],   %[temp7]   \n\t"
-            "add.s      %[temp11],  %[temp9],   %[temp10]  \n\t"
-            "sub.s      %[temp0],   %[temp0],   %[temp1]   \n\t"
-            "sub.s      %[temp3],   %[temp3],   %[temp4]   \n\t"
-            "sub.s      %[temp6],   %[temp6],   %[temp7]   \n\t"
-            "sub.s      %[temp9],   %[temp9],   %[temp10]  \n\t"
-            "swc1       %[temp2],   -32(%[v1])             \n\t"
-            "swc1       %[temp0],   32(%[v0])              \n\t"
-            "swc1       %[temp5],   -36(%[v1])             \n\t"
-            "swc1       %[temp3],   36(%[v0])              \n\t"
-            "swc1       %[temp8],   -40(%[v1])             \n\t"
-            "swc1       %[temp6],   40(%[v0])              \n\t"
-            "swc1       %[temp11],  -44(%[v1])             \n\t"
-            "swc1       %[temp9],   44(%[v0])              \n\t"
-            "lwc1       %[temp0],   48(%[src0])            \n\t"
-            "lwc1       %[temp1],   -48(%[src1])           \n\t"
-            "lwc1       %[temp3],   52(%[src0])            \n\t"
-            "lwc1       %[temp4],   -52(%[src1])           \n\t"
-            "lwc1       %[temp6],   56(%[src0])            \n\t"
-            "lwc1       %[temp7],   -56(%[src1])           \n\t"
-            "lwc1       %[temp9],   60(%[src0])            \n\t"
-            "lwc1       %[temp10],  -60(%[src1])           \n\t"
-            "add.s      %[temp2],   %[temp0],   %[temp1]   \n\t"
-            "add.s      %[temp5],   %[temp3],   %[temp4]   \n\t"
-            "add.s      %[temp8],   %[temp6],   %[temp7]   \n\t"
-            "add.s      %[temp11],  %[temp9],   %[temp10]  \n\t"
-            "sub.s      %[temp0],   %[temp0],   %[temp1]   \n\t"
-            "sub.s      %[temp3],   %[temp3],   %[temp4]   \n\t"
-            "sub.s      %[temp6],   %[temp6],   %[temp7]   \n\t"
-            "sub.s      %[temp9],   %[temp9],   %[temp10]  \n\t"
-            "swc1       %[temp2],   -48(%[v1])             \n\t"
-            "swc1       %[temp0],   48(%[v0])              \n\t"
-            "swc1       %[temp5],   -52(%[v1])             \n\t"
-            "swc1       %[temp3],   52(%[v0])              \n\t"
-            "swc1       %[temp8],   -56(%[v1])             \n\t"
-            "swc1       %[temp6],   56(%[v0])              \n\t"
-            "swc1       %[temp11],  -60(%[v1])             \n\t"
-            "swc1       %[temp9],   60(%[v0])              \n\t"
-            PTR_ADDIU " %[src0],    %[src0],    64         \n\t"
-            PTR_ADDIU " %[src1],    %[src1],    -64        \n\t"
-            PTR_ADDIU " %[v0],      %[v0],      64         \n\t"
-            PTR_ADDIU " %[v1],      %[v1],      -64        \n\t"
-
-            : [v0]"+r"(v0), [v1]"+r"(v1), [src0]"+r"(psrc0), [src1]"+r"(psrc1),
-              [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
-              [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
-              [temp6]"=&f"(temp6), [temp7]"=&f"(temp7), [temp8]"=&f"(temp8),
-              [temp9]"=&f"(temp9), [temp10]"=&f"(temp10), [temp11]"=&f"(temp11)
-            :
-            :"memory"
-        );
-    }
-}
-
-static void sbr_autocorrelate_mips(const float x[40][2], float phi[3][2][2])
-{
-    int i;
-    float real_sum_0 = 0.0f;
-    float real_sum_1 = 0.0f;
-    float real_sum_2 = 0.0f;
-    float imag_sum_1 = 0.0f;
-    float imag_sum_2 = 0.0f;
-    float *p_x, *p_phi;
-    float temp0, temp1, temp2, temp3, temp4, temp5, temp6;
-    float temp7, temp_r, temp_r1, temp_r2, temp_r3, temp_r4;
-    p_x = (float*)&x[0][0];
-    p_phi = &phi[0][0][0];
-
-    __asm__ volatile (
-        "lwc1    %[temp0],      8(%[p_x])                           \n\t"
-        "lwc1    %[temp1],      12(%[p_x])                          \n\t"
-        "lwc1    %[temp2],      16(%[p_x])                          \n\t"
-        "lwc1    %[temp3],      20(%[p_x])                          \n\t"
-        "lwc1    %[temp4],      24(%[p_x])                          \n\t"
-        "lwc1    %[temp5],      28(%[p_x])                          \n\t"
-        "mul.s   %[temp_r],     %[temp1],      %[temp1]             \n\t"
-        "mul.s   %[temp_r1],    %[temp1],      %[temp3]             \n\t"
-        "mul.s   %[temp_r2],    %[temp1],      %[temp2]             \n\t"
-        "mul.s   %[temp_r3],    %[temp1],      %[temp5]             \n\t"
-        "mul.s   %[temp_r4],    %[temp1],      %[temp4]             \n\t"
-        "madd.s  %[temp_r],     %[temp_r],     %[temp0],  %[temp0]  \n\t"
-        "madd.s  %[temp_r1],    %[temp_r1],    %[temp0],  %[temp2]  \n\t"
-        "msub.s  %[temp_r2],    %[temp_r2],    %[temp0],  %[temp3]  \n\t"
-        "madd.s  %[temp_r3],    %[temp_r3],    %[temp0],  %[temp4]  \n\t"
-        "msub.s  %[temp_r4],    %[temp_r4],    %[temp0],  %[temp5]  \n\t"
-        "add.s   %[real_sum_0], %[real_sum_0], %[temp_r]            \n\t"
-        "add.s   %[real_sum_1], %[real_sum_1], %[temp_r1]           \n\t"
-        "add.s   %[imag_sum_1], %[imag_sum_1], %[temp_r2]           \n\t"
-        "add.s   %[real_sum_2], %[real_sum_2], %[temp_r3]           \n\t"
-        "add.s   %[imag_sum_2], %[imag_sum_2], %[temp_r4]           \n\t"
-        PTR_ADDIU "%[p_x],      %[p_x],        8                    \n\t"
-
-        : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
-          [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
-          [real_sum_0]"+f"(real_sum_0), [real_sum_1]"+f"(real_sum_1),
-          [imag_sum_1]"+f"(imag_sum_1), [real_sum_2]"+f"(real_sum_2),
-          [temp_r]"=&f"(temp_r), [temp_r1]"=&f"(temp_r1), [temp_r2]"=&f"(temp_r2),
-          [temp_r3]"=&f"(temp_r3), [temp_r4]"=&f"(temp_r4),
-          [p_x]"+r"(p_x), [imag_sum_2]"+f"(imag_sum_2)
-        :
-        : "memory"
-    );
-
-    for (i = 0; i < 12; i++) {
-        __asm__ volatile (
-            "lwc1    %[temp0],      8(%[p_x])                           \n\t"
-            "lwc1    %[temp1],      12(%[p_x])                          \n\t"
-            "lwc1    %[temp2],      16(%[p_x])                          \n\t"
-            "lwc1    %[temp3],      20(%[p_x])                          \n\t"
-            "lwc1    %[temp4],      24(%[p_x])                          \n\t"
-            "lwc1    %[temp5],      28(%[p_x])                          \n\t"
-            "mul.s   %[temp_r],     %[temp1],      %[temp1]             \n\t"
-            "mul.s   %[temp_r1],    %[temp1],      %[temp3]             \n\t"
-            "mul.s   %[temp_r2],    %[temp1],      %[temp2]             \n\t"
-            "mul.s   %[temp_r3],    %[temp1],      %[temp5]             \n\t"
-            "mul.s   %[temp_r4],    %[temp1],      %[temp4]             \n\t"
-            "madd.s  %[temp_r],     %[temp_r],     %[temp0],  %[temp0]  \n\t"
-            "madd.s  %[temp_r1],    %[temp_r1],    %[temp0],  %[temp2]  \n\t"
-            "msub.s  %[temp_r2],    %[temp_r2],    %[temp0],  %[temp3]  \n\t"
-            "madd.s  %[temp_r3],    %[temp_r3],    %[temp0],  %[temp4]  \n\t"
-            "msub.s  %[temp_r4],    %[temp_r4],    %[temp0],  %[temp5]  \n\t"
-            "add.s   %[real_sum_0], %[real_sum_0], %[temp_r]            \n\t"
-            "add.s   %[real_sum_1], %[real_sum_1], %[temp_r1]           \n\t"
-            "add.s   %[imag_sum_1], %[imag_sum_1], %[temp_r2]           \n\t"
-            "add.s   %[real_sum_2], %[real_sum_2], %[temp_r3]           \n\t"
-            "add.s   %[imag_sum_2], %[imag_sum_2], %[temp_r4]           \n\t"
-            "lwc1    %[temp0],      32(%[p_x])                          \n\t"
-            "lwc1    %[temp1],      36(%[p_x])                          \n\t"
-            "mul.s   %[temp_r],     %[temp3],      %[temp3]             \n\t"
-            "mul.s   %[temp_r1],    %[temp3],      %[temp5]             \n\t"
-            "mul.s   %[temp_r2],    %[temp3],      %[temp4]             \n\t"
-            "mul.s   %[temp_r3],    %[temp3],      %[temp1]             \n\t"
-            "mul.s   %[temp_r4],    %[temp3],      %[temp0]             \n\t"
-            "madd.s  %[temp_r],     %[temp_r],     %[temp2],  %[temp2]  \n\t"
-            "madd.s  %[temp_r1],    %[temp_r1],    %[temp2],  %[temp4]  \n\t"
-            "msub.s  %[temp_r2],    %[temp_r2],    %[temp2],  %[temp5]  \n\t"
-            "madd.s  %[temp_r3],    %[temp_r3],    %[temp2],  %[temp0]  \n\t"
-            "msub.s  %[temp_r4],    %[temp_r4],    %[temp2],  %[temp1]  \n\t"
-            "add.s   %[real_sum_0], %[real_sum_0], %[temp_r]            \n\t"
-            "add.s   %[real_sum_1], %[real_sum_1], %[temp_r1]           \n\t"
-            "add.s   %[imag_sum_1], %[imag_sum_1], %[temp_r2]           \n\t"
-            "add.s   %[real_sum_2], %[real_sum_2], %[temp_r3]           \n\t"
-            "add.s   %[imag_sum_2], %[imag_sum_2], %[temp_r4]           \n\t"
-            "lwc1    %[temp2],      40(%[p_x])                          \n\t"
-            "lwc1    %[temp3],      44(%[p_x])                          \n\t"
-            "mul.s   %[temp_r],     %[temp5],      %[temp5]             \n\t"
-            "mul.s   %[temp_r1],    %[temp5],      %[temp1]             \n\t"
-            "mul.s   %[temp_r2],    %[temp5],      %[temp0]             \n\t"
-            "mul.s   %[temp_r3],    %[temp5],      %[temp3]             \n\t"
-            "mul.s   %[temp_r4],    %[temp5],      %[temp2]             \n\t"
-            "madd.s  %[temp_r],     %[temp_r],     %[temp4],  %[temp4]  \n\t"
-            "madd.s  %[temp_r1],    %[temp_r1],    %[temp4],  %[temp0]  \n\t"
-            "msub.s  %[temp_r2],    %[temp_r2],    %[temp4],  %[temp1]  \n\t"
-            "madd.s  %[temp_r3],    %[temp_r3],    %[temp4],  %[temp2]  \n\t"
-            "msub.s  %[temp_r4],    %[temp_r4],    %[temp4],  %[temp3]  \n\t"
-            "add.s   %[real_sum_0], %[real_sum_0], %[temp_r]            \n\t"
-            "add.s   %[real_sum_1], %[real_sum_1], %[temp_r1]           \n\t"
-            "add.s   %[imag_sum_1], %[imag_sum_1], %[temp_r2]           \n\t"
-            "add.s   %[real_sum_2], %[real_sum_2], %[temp_r3]           \n\t"
-            "add.s   %[imag_sum_2], %[imag_sum_2], %[temp_r4]           \n\t"
-            PTR_ADDIU "%[p_x],      %[p_x],        24                   \n\t"
-
-            : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
-              [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
-              [real_sum_0]"+f"(real_sum_0), [real_sum_1]"+f"(real_sum_1),
-              [imag_sum_1]"+f"(imag_sum_1), [real_sum_2]"+f"(real_sum_2),
-              [temp_r]"=&f"(temp_r), [temp_r1]"=&f"(temp_r1),
-              [temp_r2]"=&f"(temp_r2), [temp_r3]"=&f"(temp_r3),
-              [temp_r4]"=&f"(temp_r4), [p_x]"+r"(p_x),
-              [imag_sum_2]"+f"(imag_sum_2)
-            :
-            : "memory"
-        );
-    }
-    __asm__ volatile (
-        "lwc1    %[temp0],    -296(%[p_x])                        \n\t"
-        "lwc1    %[temp1],    -292(%[p_x])                        \n\t"
-        "lwc1    %[temp2],    8(%[p_x])                           \n\t"
-        "lwc1    %[temp3],    12(%[p_x])                          \n\t"
-        "lwc1    %[temp4],    -288(%[p_x])                        \n\t"
-        "lwc1    %[temp5],    -284(%[p_x])                        \n\t"
-        "lwc1    %[temp6],    -280(%[p_x])                        \n\t"
-        "lwc1    %[temp7],    -276(%[p_x])                        \n\t"
-        "madd.s  %[temp_r],   %[real_sum_0], %[temp0],  %[temp0]  \n\t"
-        "madd.s  %[temp_r1],  %[real_sum_0], %[temp2],  %[temp2]  \n\t"
-        "madd.s  %[temp_r2],  %[real_sum_1], %[temp0],  %[temp4]  \n\t"
-        "madd.s  %[temp_r3],  %[imag_sum_1], %[temp0],  %[temp5]  \n\t"
-        "madd.s  %[temp_r],   %[temp_r],     %[temp1],  %[temp1]  \n\t"
-        "madd.s  %[temp_r1],  %[temp_r1],    %[temp3],  %[temp3]  \n\t"
-        "madd.s  %[temp_r2],  %[temp_r2],    %[temp1],  %[temp5]  \n\t"
-        "nmsub.s  %[temp_r3], %[temp_r3],    %[temp1],  %[temp4]  \n\t"
-        "lwc1    %[temp4],    16(%[p_x])                          \n\t"
-        "lwc1    %[temp5],    20(%[p_x])                          \n\t"
-        "swc1    %[temp_r],   40(%[p_phi])                        \n\t"
-        "swc1    %[temp_r1],  16(%[p_phi])                        \n\t"
-        "swc1    %[temp_r2],  24(%[p_phi])                        \n\t"
-        "swc1    %[temp_r3],  28(%[p_phi])                        \n\t"
-        "madd.s  %[temp_r],   %[real_sum_1], %[temp2],  %[temp4]  \n\t"
-        "madd.s  %[temp_r1],  %[imag_sum_1], %[temp2],  %[temp5]  \n\t"
-        "madd.s  %[temp_r2],  %[real_sum_2], %[temp0],  %[temp6]  \n\t"
-        "madd.s  %[temp_r3],  %[imag_sum_2], %[temp0],  %[temp7]  \n\t"
-        "madd.s  %[temp_r],   %[temp_r],     %[temp3],  %[temp5]  \n\t"
-        "nmsub.s %[temp_r1],  %[temp_r1],    %[temp3],  %[temp4]  \n\t"
-        "madd.s  %[temp_r2],  %[temp_r2],    %[temp1],  %[temp7]  \n\t"
-        "nmsub.s %[temp_r3],  %[temp_r3],    %[temp1],  %[temp6]  \n\t"
-        "swc1    %[temp_r],   0(%[p_phi])                         \n\t"
-        "swc1    %[temp_r1],  4(%[p_phi])                         \n\t"
-        "swc1    %[temp_r2],  8(%[p_phi])                         \n\t"
-        "swc1    %[temp_r3],  12(%[p_phi])                        \n\t"
-
-        : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
-          [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
-          [temp6]"=&f"(temp6), [temp7]"=&f"(temp7), [temp_r]"=&f"(temp_r),
-          [real_sum_0]"+f"(real_sum_0), [real_sum_1]"+f"(real_sum_1),
-          [real_sum_2]"+f"(real_sum_2), [imag_sum_1]"+f"(imag_sum_1),
-          [temp_r2]"=&f"(temp_r2), [temp_r3]"=&f"(temp_r3),
-          [temp_r1]"=&f"(temp_r1), [p_phi]"+r"(p_phi),
-          [imag_sum_2]"+f"(imag_sum_2)
-        : [p_x]"r"(p_x)
-        : "memory"
-    );
-}
-
-static void sbr_hf_gen_mips(float (*X_high)[2], const float (*X_low)[2],
-                         const float alpha0[2], const float alpha1[2],
-                         float bw, int start, int end)
-{
-    float alpha[4];
-    int i;
-    float *p_x_low = (float*)&X_low[0][0] + 2*start;
-    float *p_x_high = &X_high[0][0] + 2*start;
-    float temp0, temp1, temp2, temp3, temp4, temp5, temp6;
-    float temp7, temp8, temp9, temp10, temp11, temp12;
-
-    alpha[0] = alpha1[0] * bw * bw;
-    alpha[1] = alpha1[1] * bw * bw;
-    alpha[2] = alpha0[0] * bw;
-    alpha[3] = alpha0[1] * bw;
-
-    for (i = start; i < end; i++) {
-        __asm__ volatile (
-            "lwc1    %[temp0],    -16(%[p_x_low])                        \n\t"
-            "lwc1    %[temp1],    -12(%[p_x_low])                        \n\t"
-            "lwc1    %[temp2],    -8(%[p_x_low])                         \n\t"
-            "lwc1    %[temp3],    -4(%[p_x_low])                         \n\t"
-            "lwc1    %[temp5],    0(%[p_x_low])                          \n\t"
-            "lwc1    %[temp6],    4(%[p_x_low])                          \n\t"
-            "lwc1    %[temp7],    0(%[alpha])                            \n\t"
-            "lwc1    %[temp8],    4(%[alpha])                            \n\t"
-            "lwc1    %[temp9],    8(%[alpha])                            \n\t"
-            "lwc1    %[temp10],   12(%[alpha])                           \n\t"
-            PTR_ADDIU "%[p_x_high], %[p_x_high],   8                     \n\t"
-            PTR_ADDIU "%[p_x_low],  %[p_x_low],    8                     \n\t"
-            "mul.s   %[temp11],   %[temp1],        %[temp8]              \n\t"
-            "msub.s  %[temp11],   %[temp11],       %[temp0],  %[temp7]   \n\t"
-            "madd.s  %[temp11],   %[temp11],       %[temp2],  %[temp9]   \n\t"
-            "nmsub.s %[temp11],   %[temp11],       %[temp3],  %[temp10]  \n\t"
-            "add.s   %[temp11],   %[temp11],       %[temp5]              \n\t"
-            "swc1    %[temp11],   -8(%[p_x_high])                        \n\t"
-            "mul.s   %[temp12],   %[temp1],        %[temp7]              \n\t"
-            "madd.s  %[temp12],   %[temp12],       %[temp0],  %[temp8]   \n\t"
-            "madd.s  %[temp12],   %[temp12],       %[temp3],  %[temp9]   \n\t"
-            "madd.s  %[temp12],   %[temp12],       %[temp2],  %[temp10]  \n\t"
-            "add.s   %[temp12],   %[temp12],       %[temp6]              \n\t"
-            "swc1    %[temp12],   -4(%[p_x_high])                        \n\t"
-
-            : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
-              [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
-              [temp6]"=&f"(temp6), [temp7]"=&f"(temp7), [temp8]"=&f"(temp8),
-              [temp9]"=&f"(temp9), [temp10]"=&f"(temp10), [temp11]"=&f"(temp11),
-              [temp12]"=&f"(temp12), [p_x_high]"+r"(p_x_high),
-              [p_x_low]"+r"(p_x_low)
-            : [alpha]"r"(alpha)
-            : "memory"
-        );
-    }
-}
-
-static void sbr_hf_g_filt_mips(float (*Y)[2], const float (*X_high)[40][2],
-                            const float *g_filt, int m_max, intptr_t ixh)
-{
-    const float *p_x, *p_g, *loop_end;
-    float *p_y;
-    float temp0, temp1, temp2;
-
-    p_g = &g_filt[0];
-    p_y = &Y[0][0];
-    p_x = &X_high[0][ixh][0];
-    loop_end = p_g + m_max;
-
-    __asm__ volatile(
-        ".set    push                                \n\t"
-        ".set    noreorder                           \n\t"
-    "1:                                              \n\t"
-        "lwc1    %[temp0],   0(%[p_g])               \n\t"
-        "lwc1    %[temp1],   0(%[p_x])               \n\t"
-        "lwc1    %[temp2],   4(%[p_x])               \n\t"
-        "mul.s   %[temp1],   %[temp1],     %[temp0]  \n\t"
-        "mul.s   %[temp2],   %[temp2],     %[temp0]  \n\t"
-        PTR_ADDIU "%[p_g],   %[p_g],       4         \n\t"
-        PTR_ADDIU "%[p_x],   %[p_x],       320       \n\t"
-        "swc1    %[temp1],   0(%[p_y])               \n\t"
-        "swc1    %[temp2],   4(%[p_y])               \n\t"
-        "bne     %[p_g],     %[loop_end],  1b        \n\t"
-        PTR_ADDIU "%[p_y],   %[p_y],       8         \n\t"
-        ".set    pop                                 \n\t"
-
-        : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1),
-          [temp2]"=&f"(temp2), [p_x]"+r"(p_x),
-          [p_y]"+r"(p_y), [p_g]"+r"(p_g)
-        : [loop_end]"r"(loop_end)
-        : "memory"
-    );
-}
-
-static void sbr_hf_apply_noise_0_mips(float (*Y)[2], const float *s_m,
-                                 const float *q_filt, int noise,
-                                 int kx, int m_max)
-{
-    int m;
-
-    for (m = 0; m < m_max; m++){
-
-        float *Y1=&Y[m][0];
-        float *ff_table;
-        float y0,y1, temp1, temp2, temp4, temp5;
-        int temp0, temp3;
-        const float *s_m1=&s_m[m];
-        const float *q_filt1= &q_filt[m];
-
-        __asm__ volatile(
-            "lwc1    %[y0],       0(%[Y1])                                    \n\t"
-            "lwc1    %[temp1],    0(%[s_m1])                                  \n\t"
-            "addiu   %[noise],    %[noise],              1                    \n\t"
-            "andi    %[noise],    %[noise],              0x1ff                \n\t"
-            "sll     %[temp0],    %[noise], 3                                 \n\t"
-            PTR_ADDU "%[ff_table],%[ff_sbr_noise_table], %[temp0]             \n\t"
-            "add.s   %[y0],       %[y0],                 %[temp1]             \n\t"
-            "mfc1    %[temp3],    %[temp1]                                    \n\t"
-            "bne     %[temp3],    $0,                    1f                   \n\t"
-            "lwc1    %[y1],       4(%[Y1])                                    \n\t"
-            "lwc1    %[temp2],    0(%[q_filt1])                               \n\t"
-            "lwc1    %[temp4],    0(%[ff_table])                              \n\t"
-            "lwc1    %[temp5],    4(%[ff_table])                              \n\t"
-            "madd.s  %[y0],       %[y0],                 %[temp2],  %[temp4]  \n\t"
-            "madd.s  %[y1],       %[y1],                 %[temp2],  %[temp5]  \n\t"
-            "swc1    %[y1],       4(%[Y1])                                    \n\t"
-        "1:                                                                   \n\t"
-            "swc1    %[y0],       0(%[Y1])                                    \n\t"
-
-            : [ff_table]"=&r"(ff_table), [y0]"=&f"(y0), [y1]"=&f"(y1),
-              [temp0]"=&r"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
-              [temp3]"=&r"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5)
-            : [ff_sbr_noise_table]"r"(ff_sbr_noise_table), [noise]"r"(noise),
-              [Y1]"r"(Y1), [s_m1]"r"(s_m1), [q_filt1]"r"(q_filt1)
-            : "memory"
-        );
-    }
-}
-
-static void sbr_hf_apply_noise_1_mips(float (*Y)[2], const float *s_m,
-                                 const float *q_filt, int noise,
-                                 int kx, int m_max)
-{
-    float y0,y1,temp1, temp2, temp4, temp5;
-    int temp0, temp3, m;
-    float phi_sign = 1 - 2 * (kx & 1);
-
-    for (m = 0; m < m_max; m++) {
-
-        float *ff_table;
-        float *Y1=&Y[m][0];
-        const float *s_m1=&s_m[m];
-        const float *q_filt1= &q_filt[m];
-
-        __asm__ volatile(
-            "lwc1   %[y1],       4(%[Y1])                                     \n\t"
-            "lwc1   %[temp1],    0(%[s_m1])                                   \n\t"
-            "lw     %[temp3],    0(%[s_m1])                                   \n\t"
-            "addiu  %[noise],    %[noise],               1                    \n\t"
-            "andi   %[noise],    %[noise],               0x1ff                \n\t"
-            "sll    %[temp0],    %[noise],               3                    \n\t"
-            PTR_ADDU "%[ff_table],%[ff_sbr_noise_table],%[temp0]              \n\t"
-            "madd.s %[y1],       %[y1],                 %[temp1], %[phi_sign] \n\t"
-            "bne    %[temp3],    $0,                    1f                    \n\t"
-            "lwc1   %[y0],       0(%[Y1])                                     \n\t"
-            "lwc1   %[temp2],    0(%[q_filt1])                                \n\t"
-            "lwc1   %[temp4],    0(%[ff_table])                               \n\t"
-            "lwc1   %[temp5],    4(%[ff_table])                               \n\t"
-            "madd.s %[y0],       %[y0],                 %[temp2], %[temp4]    \n\t"
-            "madd.s %[y1],       %[y1],                 %[temp2], %[temp5]    \n\t"
-            "swc1   %[y0],       0(%[Y1])                                     \n\t"
-        "1:                                                                   \n\t"
-            "swc1   %[y1],       4(%[Y1])                                     \n\t"
-
-            : [ff_table] "=&r" (ff_table), [y0] "=&f" (y0), [y1] "=&f" (y1),
-              [temp0] "=&r" (temp0), [temp1] "=&f" (temp1), [temp2] "=&f" (temp2),
-              [temp3] "=&r" (temp3), [temp4] "=&f" (temp4), [temp5] "=&f" (temp5)
-            : [ff_sbr_noise_table] "r" (ff_sbr_noise_table), [noise] "r" (noise),
-              [Y1] "r" (Y1), [s_m1] "r" (s_m1), [q_filt1] "r" (q_filt1),
-              [phi_sign] "f" (phi_sign)
-            : "memory"
-        );
-        phi_sign = -phi_sign;
-    }
-}
-
-static void sbr_hf_apply_noise_2_mips(float (*Y)[2], const float *s_m,
-                                 const float *q_filt, int noise,
-                                 int kx, int m_max)
-{
-    int m, temp0, temp1;
-    float *ff_table;
-    float y0, y1, temp2, temp3, temp4, temp5;
-
-    for (m = 0; m < m_max; m++) {
-
-        float *Y1=&Y[m][0];
-        const float *s_m1=&s_m[m];
-        const float *q_filt1= &q_filt[m];
-
-        __asm__ volatile(
-            "lwc1   %[y0],       0(%[Y1])                                  \n\t"
-            "lwc1   %[temp3],    0(%[s_m1])                                \n\t"
-            "addiu  %[noise],    %[noise],              1                  \n\t"
-            "andi   %[noise],    %[noise],              0x1ff              \n\t"
-            "sll    %[temp0],    %[noise],              3                  \n\t"
-            PTR_ADDU "%[ff_table],%[ff_sbr_noise_table],%[temp0]           \n\t"
-            "sub.s  %[y0],       %[y0],                 %[temp3]           \n\t"
-            "mfc1   %[temp1],    %[temp3]                                  \n\t"
-            "bne    %[temp1],    $0,                    1f                 \n\t"
-            "lwc1   %[y1],       4(%[Y1])                                  \n\t"
-            "lwc1   %[temp2],    0(%[q_filt1])                             \n\t"
-            "lwc1   %[temp4],    0(%[ff_table])                            \n\t"
-            "lwc1   %[temp5],    4(%[ff_table])                            \n\t"
-            "madd.s %[y0],       %[y0],                 %[temp2], %[temp4] \n\t"
-            "madd.s %[y1],       %[y1],                 %[temp2], %[temp5] \n\t"
-            "swc1   %[y1],       4(%[Y1])                                  \n\t"
-        "1:                                                                \n\t"
-            "swc1   %[y0],       0(%[Y1])                                  \n\t"
-
-            : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [y0]"=&f"(y0),
-              [y1]"=&f"(y1), [ff_table]"=&r"(ff_table),
-              [temp2]"=&f"(temp2), [temp3]"=&f"(temp3),
-              [temp4]"=&f"(temp4), [temp5]"=&f"(temp5)
-            : [ff_sbr_noise_table]"r"(ff_sbr_noise_table), [noise]"r"(noise),
-              [Y1]"r"(Y1), [s_m1]"r"(s_m1), [q_filt1]"r"(q_filt1)
-            : "memory"
-        );
-    }
-}
-
-static void sbr_hf_apply_noise_3_mips(float (*Y)[2], const float *s_m,
-                                 const float *q_filt, int noise,
-                                 int kx, int m_max)
-{
-    float phi_sign = 1 - 2 * (kx & 1);
-    int m;
-
-    for (m = 0; m < m_max; m++) {
-
-        float *Y1=&Y[m][0];
-        float *ff_table;
-        float y0,y1, temp1, temp2, temp4, temp5;
-        int temp0, temp3;
-        const float *s_m1=&s_m[m];
-        const float *q_filt1= &q_filt[m];
-
-        __asm__ volatile(
-            "lwc1    %[y1],       4(%[Y1])                                     \n\t"
-            "lwc1    %[temp1],    0(%[s_m1])                                   \n\t"
-            "addiu   %[noise],    %[noise],              1                     \n\t"
-            "andi    %[noise],    %[noise],              0x1ff                 \n\t"
-            "sll     %[temp0],    %[noise],              3                     \n\t"
-            PTR_ADDU "%[ff_table],%[ff_sbr_noise_table], %[temp0]              \n\t"
-            "nmsub.s %[y1],       %[y1],                 %[temp1], %[phi_sign] \n\t"
-            "mfc1    %[temp3],    %[temp1]                                     \n\t"
-            "bne     %[temp3],    $0,                    1f                    \n\t"
-            "lwc1    %[y0],       0(%[Y1])                                     \n\t"
-            "lwc1    %[temp2],    0(%[q_filt1])                                \n\t"
-            "lwc1    %[temp4],    0(%[ff_table])                               \n\t"
-            "lwc1    %[temp5],    4(%[ff_table])                               \n\t"
-            "madd.s  %[y0],       %[y0],                 %[temp2], %[temp4]    \n\t"
-            "madd.s  %[y1],       %[y1],                 %[temp2], %[temp5]    \n\t"
-            "swc1    %[y0],       0(%[Y1])                                     \n\t"
-            "1:                                                                \n\t"
-            "swc1    %[y1],       4(%[Y1])                                     \n\t"
-
-            : [ff_table]"=&r"(ff_table), [y0]"=&f"(y0), [y1]"=&f"(y1),
-              [temp0]"=&r"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
-              [temp3]"=&r"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5)
-            : [ff_sbr_noise_table]"r"(ff_sbr_noise_table), [noise]"r"(noise),
-              [Y1]"r"(Y1), [s_m1]"r"(s_m1), [q_filt1]"r"(q_filt1),
-              [phi_sign]"f"(phi_sign)
-            : "memory"
-        );
-       phi_sign = -phi_sign;
-    }
-}
-#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
-#endif /* HAVE_MIPSFPU */
-#endif /* HAVE_INLINE_ASM */
-
-void ff_sbrdsp_init_mips(SBRDSPContext *s)
-{
-#if HAVE_INLINE_ASM
-#if HAVE_MIPSFPU
-    s->qmf_pre_shuffle = sbr_qmf_pre_shuffle_mips;
-    s->qmf_post_shuffle = sbr_qmf_post_shuffle_mips;
-#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
-    s->sum64x5 = sbr_sum64x5_mips;
-    s->sum_square = sbr_sum_square_mips;
-    s->qmf_deint_bfly = sbr_qmf_deint_bfly_mips;
-    s->autocorrelate = sbr_autocorrelate_mips;
-    s->hf_gen = sbr_hf_gen_mips;
-    s->hf_g_filt = sbr_hf_g_filt_mips;
-
-    s->hf_apply_noise[0] = sbr_hf_apply_noise_0_mips;
-    s->hf_apply_noise[1] = sbr_hf_apply_noise_1_mips;
-    s->hf_apply_noise[2] = sbr_hf_apply_noise_2_mips;
-    s->hf_apply_noise[3] = sbr_hf_apply_noise_3_mips;
-#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
-#endif /* HAVE_MIPSFPU */
-#endif /* HAVE_INLINE_ASM */
-}
diff --git a/libavcodec/mlpdec.c b/libavcodec/mlpdec.c
index 305c5d2b36..e85dac36a7 100644
--- a/libavcodec/mlpdec.c
+++ b/libavcodec/mlpdec.c
@@ -1212,6 +1212,7 @@ static int read_access_unit(AVCodecContext *avctx, AVFrame *frame,
             goto error;
         m->is_major_sync_unit = 1;
         header_size += m->major_sync_header_size;
+        frame->flags |= AV_FRAME_FLAG_KEY;
     }
 
     if (!m->params_valid) {
diff --git a/libavcodec/motion_est.c b/libavcodec/motion_est.c
index df9d1befa8..fb569ede8a 100644
--- a/libavcodec/motion_est.c
+++ b/libavcodec/motion_est.c
@@ -1127,9 +1127,6 @@ static int estimate_motion_b(MpegEncContext *s, int mb_x, int mb_y,
     const uint8_t * const mv_penalty = c->mv_penalty[f_code] + MAX_DMV;
     int mv_scale;
 
-    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
-    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
-    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
     c->current_mv_penalty= mv_penalty;
 
     get_limits(s, 16*mb_x, 16*mb_y);
@@ -1495,7 +1492,6 @@ void ff_estimate_b_frame_motion(MpegEncContext * s,
                              int mb_x, int mb_y)
 {
     MotionEstContext * const c= &s->me;
-    const int penalty_factor= c->mb_penalty_factor;
     int fmin, bmin, dmin, fbmin, bimin, fimin;
     int type=0;
     const int xy = mb_y*s->mb_stride + mb_x;
@@ -1517,22 +1513,27 @@ void ff_estimate_b_frame_motion(MpegEncContext * s,
         return;
     }
 
+    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
+    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
+    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
+
     if (s->codec_id == AV_CODEC_ID_MPEG4)
         dmin= direct_search(s, mb_x, mb_y);
     else
         dmin= INT_MAX;
+
 // FIXME penalty stuff for non-MPEG-4
     c->skip=0;
     fmin = estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, 0, s->f_code) +
-           3 * penalty_factor;
+           3 * c->mb_penalty_factor;
 
     c->skip=0;
     bmin = estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, 2, s->b_code) +
-           2 * penalty_factor;
+           2 * c->mb_penalty_factor;
     ff_dlog(s, " %d %d ", s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1]);
 
     c->skip=0;
-    fbmin= bidir_refine(s, mb_x, mb_y) + penalty_factor;
+    fbmin= bidir_refine(s, mb_x, mb_y) + c->mb_penalty_factor;
     ff_dlog(s, "%d %d %d %d\n", dmin, fmin, bmin, fbmin);
 
     if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
diff --git a/libavcodec/mpeg12.c b/libavcodec/mpeg12.c
index 8d88820c46..62d7fd1814 100644
--- a/libavcodec/mpeg12.c
+++ b/libavcodec/mpeg12.c
@@ -180,8 +180,6 @@ int ff_mpeg1_decode_block_intra(GetBitContext *gb,
     component = index <= 3 ? 0 : index - 4 + 1;
 
     diff = decode_dc(gb, component);
-    if (diff >= 0xffff)
-        return AVERROR_INVALIDDATA;
 
     dc  = last_dc[component];
     dc += diff;
diff --git a/libavcodec/mpeg12dec.c b/libavcodec/mpeg12dec.c
index 4ad1eb6572..9fd765f030 100644
--- a/libavcodec/mpeg12dec.c
+++ b/libavcodec/mpeg12dec.c
@@ -71,7 +71,6 @@ enum Mpeg2ClosedCaptionsFormat {
 
 typedef struct Mpeg1Context {
     MpegEncContext mpeg_enc_ctx;
-    int mpeg_enc_ctx_allocated; /* true if decoding context allocated */
     int repeat_field;           /* true if we must repeat the field */
     AVPanScan pan_scan;         /* some temporary storage for the panscan */
     AVStereo3D stereo3d;
@@ -803,7 +802,6 @@ static av_cold int mpeg_decode_init(AVCodecContext *avctx)
     ff_mpeg12_init_vlcs();
 
     s2->chroma_format              = 1;
-    s->mpeg_enc_ctx_allocated      = 0;
     s->repeat_field                = 0;
     avctx->color_range             = AVCOL_RANGE_MPEG;
     return 0;
@@ -817,16 +815,14 @@ static int mpeg_decode_update_thread_context(AVCodecContext *avctx,
     MpegEncContext *s = &ctx->mpeg_enc_ctx, *s1 = &ctx_from->mpeg_enc_ctx;
     int err;
 
-    if (avctx == avctx_from               ||
-        !ctx_from->mpeg_enc_ctx_allocated ||
-        !s1->context_initialized)
+    if (avctx == avctx_from || !s1->context_initialized)
         return 0;
 
     err = ff_mpeg_update_thread_context(avctx, avctx_from);
     if (err)
         return err;
 
-    if (!ctx->mpeg_enc_ctx_allocated)
+    if (!s->context_initialized)
         memcpy(s + 1, s1 + 1, sizeof(Mpeg1Context) - sizeof(MpegEncContext));
 
     return 0;
@@ -961,7 +957,7 @@ static int mpeg_decode_postinit(AVCodecContext *avctx)
         avctx->sample_aspect_ratio = (AVRational){ 0, 1 };
     }
 
-    if ((s1->mpeg_enc_ctx_allocated == 0)                   ||
+    if (!s->context_initialized                             ||
         avctx->coded_width       != s->width                ||
         avctx->coded_height      != s->height               ||
         s1->save_width           != s->width                ||
@@ -969,10 +965,8 @@ static int mpeg_decode_postinit(AVCodecContext *avctx)
         av_cmp_q(s1->save_aspect, s->avctx->sample_aspect_ratio) ||
         (s1->save_progressive_seq != s->progressive_sequence && FFALIGN(s->height, 16) != FFALIGN(s->height, 32)) ||
         0) {
-        if (s1->mpeg_enc_ctx_allocated) {
+        if (s->context_initialized)
             ff_mpv_common_end(s);
-            s1->mpeg_enc_ctx_allocated = 0;
-        }
 
         ret = ff_set_dimensions(avctx, s->width, s->height);
         if (ret < 0)
@@ -1029,8 +1023,6 @@ FF_ENABLE_DEPRECATION_WARNINGS
 
         if ((ret = ff_mpv_common_init(s)) < 0)
             return ret;
-
-        s1->mpeg_enc_ctx_allocated = 1;
     }
     return 0;
 }
@@ -1233,7 +1225,7 @@ static int mpeg_decode_picture_coding_extension(Mpeg1Context *s1)
     s->mpeg_f_code[0][1] += !s->mpeg_f_code[0][1];
     s->mpeg_f_code[1][0] += !s->mpeg_f_code[1][0];
     s->mpeg_f_code[1][1] += !s->mpeg_f_code[1][1];
-    if (!s->pict_type && s1->mpeg_enc_ctx_allocated) {
+    if (!s->pict_type && s->context_initialized) {
         av_log(s->avctx, AV_LOG_ERROR, "Missing picture start code\n");
         if (s->avctx->err_recognition & AV_EF_EXPLODE)
             return AVERROR_INVALIDDATA;
@@ -1299,6 +1291,21 @@ static int mpeg_field_start(MpegEncContext *s, const uint8_t *buf, int buf_size)
         if ((ret = ff_mpv_frame_start(s, avctx)) < 0)
             return ret;
 
+        if (s->picture_structure != PICT_FRAME) {
+            s->current_picture_ptr->f->flags |= AV_FRAME_FLAG_TOP_FIELD_FIRST *
+                                                (s->picture_structure == PICT_TOP_FIELD);
+
+            for (int i = 0; i < 3; i++) {
+                if (s->picture_structure == PICT_BOTTOM_FIELD) {
+                    s->current_picture.f->data[i] = FF_PTR_ADD(s->current_picture.f->data[i],
+                                                               s->current_picture.f->linesize[i]);
+                }
+                s->current_picture.f->linesize[i] *= 2;
+                s->last_picture.f->linesize[i]    *= 2;
+                s->next_picture.f->linesize[i]    *= 2;
+            }
+        }
+
         ff_mpeg_er_frame_start(s);
 
         /* first check if we must repeat the frame */
@@ -1353,8 +1360,6 @@ static int mpeg_field_start(MpegEncContext *s, const uint8_t *buf, int buf_size)
         if (HAVE_THREADS && (avctx->active_thread_type & FF_THREAD_FRAME))
             ff_thread_finish_setup(avctx);
     } else { // second field
-        int i;
-
         if (!s->current_picture_ptr) {
             av_log(s->avctx, AV_LOG_ERROR, "first field missing\n");
             return AVERROR_INVALIDDATA;
@@ -1368,7 +1373,7 @@ static int mpeg_field_start(MpegEncContext *s, const uint8_t *buf, int buf_size)
             }
         }
 
-        for (i = 0; i < 4; i++) {
+        for (int i = 0; i < 3; i++) {
             s->current_picture.f->data[i] = s->current_picture_ptr->f->data[i];
             if (s->picture_structure == PICT_BOTTOM_FIELD)
                 s->current_picture.f->data[i] +=
@@ -1727,7 +1732,7 @@ static int slice_end(AVCodecContext *avctx, AVFrame *pict)
     Mpeg1Context *s1  = avctx->priv_data;
     MpegEncContext *s = &s1->mpeg_enc_ctx;
 
-    if (!s1->mpeg_enc_ctx_allocated || !s->current_picture_ptr)
+    if (!s->context_initialized || !s->current_picture_ptr)
         return 0;
 
     if (s->avctx->hwaccel) {
@@ -1868,10 +1873,9 @@ static int vcr2_init_sequence(AVCodecContext *avctx)
 
     /* start new MPEG-1 context decoding */
     s->out_format = FMT_MPEG1;
-    if (s1->mpeg_enc_ctx_allocated) {
+    if (s->context_initialized)
         ff_mpv_common_end(s);
-        s1->mpeg_enc_ctx_allocated = 0;
-    }
+
     s->width            = avctx->coded_width;
     s->height           = avctx->coded_height;
     avctx->has_b_frames = 0; // true?
@@ -1881,7 +1885,6 @@ static int vcr2_init_sequence(AVCodecContext *avctx)
 
     if ((ret = ff_mpv_common_init(s)) < 0)
         return ret;
-    s1->mpeg_enc_ctx_allocated = 1;
 
     for (i = 0; i < 64; i++) {
         int j = s->idsp.idct_permutation[i];
@@ -2435,7 +2438,7 @@ static int decode_chunks(AVCodecContext *avctx, AVFrame *picture,
                     break;
                 }
 
-                if (!s->mpeg_enc_ctx_allocated)
+                if (!s2->context_initialized)
                     break;
 
                 if (s2->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
@@ -2533,9 +2536,8 @@ static int mpeg_decode_frame(AVCodecContext *avctx, AVFrame *picture,
         return buf_size;
     }
 
-    if (s->mpeg_enc_ctx_allocated == 0 && (   s2->codec_tag == AV_RL32("VCR2")
-                                           || s2->codec_tag == AV_RL32("BW10")
-                                          ))
+    if (!s2->context_initialized &&
+        (s2->codec_tag == AV_RL32("VCR2") || s2->codec_tag == AV_RL32("BW10")))
         vcr2_init_sequence(avctx);
 
     s->slice_count = 0;
@@ -2593,8 +2595,7 @@ static av_cold int mpeg_decode_end(AVCodecContext *avctx)
 {
     Mpeg1Context *s = avctx->priv_data;
 
-    if (s->mpeg_enc_ctx_allocated)
-        ff_mpv_common_end(&s->mpeg_enc_ctx);
+    ff_mpv_common_end(&s->mpeg_enc_ctx);
     av_buffer_unref(&s->a53_buf_ref);
     return 0;
 }
@@ -2733,7 +2734,7 @@ static int ipu_decode_frame(AVCodecContext *avctx, AVFrame *frame,
     int ret;
 
     // Check for minimal intra MB size (considering mb header, luma & chroma dc VLC, ac EOB VLC)
-    if (avpkt->size*8LL < (avctx->width+15)/16 * ((avctx->height+15)/16) * (2 + 3*4 + 2*2 + 2*6))
+    if (avpkt->size*8LL < (avctx->width+15)/16 * ((avctx->height+15)/16) * (2LL + 3*4 + 2*2 + 2*6))
         return AVERROR_INVALIDDATA;
 
     ret = ff_get_buffer(avctx, frame, 0);
@@ -2821,8 +2822,6 @@ static int ipu_decode_frame(AVCodecContext *avctx, AVFrame *frame,
     if (get_bits_left(gb) != 32)
         return AVERROR_INVALIDDATA;
 
-    frame->pict_type = AV_PICTURE_TYPE_I;
-    frame->flags |= AV_FRAME_FLAG_KEY;
     *got_frame = 1;
 
     return avpkt->size;
@@ -2855,15 +2854,6 @@ static av_cold int ipu_decode_init(AVCodecContext *avctx)
     return 0;
 }
 
-static av_cold int ipu_decode_end(AVCodecContext *avctx)
-{
-    IPUContext *s = avctx->priv_data;
-
-    ff_mpv_common_end(&s->m);
-
-    return 0;
-}
-
 const FFCodec ff_ipu_decoder = {
     .p.name         = "ipu",
     CODEC_LONG_NAME("IPU Video"),
@@ -2872,7 +2862,5 @@ const FFCodec ff_ipu_decoder = {
     .priv_data_size = sizeof(IPUContext),
     .init           = ipu_decode_init,
     FF_CODEC_DECODE_CB(ipu_decode_frame),
-    .close          = ipu_decode_end,
     .p.capabilities = AV_CODEC_CAP_DR1,
-    .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
 };
diff --git a/libavcodec/mpeg4videodec.c b/libavcodec/mpeg4videodec.c
index 07de5d6d91..df1e22207d 100644
--- a/libavcodec/mpeg4videodec.c
+++ b/libavcodec/mpeg4videodec.c
@@ -597,6 +597,8 @@ static int mpeg4_decode_sprite_trajectory(Mpeg4DecContext *ctx, GetBitContext *g
         ctx->sprite_shift[0]  = alpha + beta + rho - min_ab;
         ctx->sprite_shift[1]  = alpha + beta + rho - min_ab + 2;
         break;
+    default:
+        av_assert0(0);
     }
     /* try to simplify the situation */
     if (sprite_delta[0][0] == a << ctx->sprite_shift[0] &&
@@ -3861,8 +3863,7 @@ const FFCodec ff_mpeg4_decoder = {
     FF_CODEC_DECODE_CB(ff_h263_decode_frame),
     .p.capabilities        = AV_CODEC_CAP_DRAW_HORIZ_BAND | AV_CODEC_CAP_DR1 |
                              AV_CODEC_CAP_DELAY | AV_CODEC_CAP_FRAME_THREADS,
-    .caps_internal         = FF_CODEC_CAP_SKIP_FRAME_FILL_PARAM |
-                             FF_CODEC_CAP_ALLOCATE_PROGRESS,
+    .caps_internal         = FF_CODEC_CAP_SKIP_FRAME_FILL_PARAM,
     .flush                 = ff_mpeg_flush,
     .p.max_lowres          = 3,
     .p.profiles            = NULL_IF_CONFIG_SMALL(ff_mpeg4_video_profiles),
diff --git a/libavcodec/mpeg4videoenc.c b/libavcodec/mpeg4videoenc.c
index f806ad8a74..71dda802e2 100644
--- a/libavcodec/mpeg4videoenc.c
+++ b/libavcodec/mpeg4videoenc.c
@@ -26,7 +26,6 @@
 #include "libavutil/opt.h"
 #include "libavutil/thread.h"
 #include "codec_internal.h"
-#include "mpegutils.h"
 #include "mpegvideo.h"
 #include "h263.h"
 #include "h263enc.h"
diff --git a/libavcodec/mpegpicture.c b/libavcodec/mpegpicture.c
index 5bf85bb7fe..06b6daa01a 100644
--- a/libavcodec/mpegpicture.c
+++ b/libavcodec/mpegpicture.c
@@ -29,7 +29,7 @@
 #include "avcodec.h"
 #include "motion_est.h"
 #include "mpegpicture.h"
-#include "mpegutils.h"
+#include "mpegvideo.h"
 #include "refstruct.h"
 #include "threadframe.h"
 
diff --git a/libavcodec/mpegutils.h b/libavcodec/mpegutils.h
index 386110bb8c..3da1e7ed38 100644
--- a/libavcodec/mpegutils.h
+++ b/libavcodec/mpegutils.h
@@ -27,11 +27,6 @@
 
 #include "avcodec.h"
 
-/**
- * Return value for header parsers if frame is not coded.
- * */
-#define FRAME_SKIPPED 100
-
 /* picture type */
 #define PICT_TOP_FIELD     1
 #define PICT_BOTTOM_FIELD  2
@@ -93,35 +88,6 @@
 
 #define HAS_CBP(a)       ((a) & MB_TYPE_CBP)
 
-/* MB types for encoding */
-#define CANDIDATE_MB_TYPE_INTRA      (1 <<  0)
-#define CANDIDATE_MB_TYPE_INTER      (1 <<  1)
-#define CANDIDATE_MB_TYPE_INTER4V    (1 <<  2)
-#define CANDIDATE_MB_TYPE_SKIPPED    (1 <<  3)
-
-#define CANDIDATE_MB_TYPE_DIRECT     (1 <<  4)
-#define CANDIDATE_MB_TYPE_FORWARD    (1 <<  5)
-#define CANDIDATE_MB_TYPE_BACKWARD   (1 <<  6)
-#define CANDIDATE_MB_TYPE_BIDIR      (1 <<  7)
-
-#define CANDIDATE_MB_TYPE_INTER_I    (1 <<  8)
-#define CANDIDATE_MB_TYPE_FORWARD_I  (1 <<  9)
-#define CANDIDATE_MB_TYPE_BACKWARD_I (1 << 10)
-#define CANDIDATE_MB_TYPE_BIDIR_I    (1 << 11)
-
-#define CANDIDATE_MB_TYPE_DIRECT0    (1 << 12)
-
-#define INPLACE_OFFSET 16
-
-enum OutputFormat {
-    FMT_MPEG1,
-    FMT_H261,
-    FMT_H263,
-    FMT_MJPEG,
-    FMT_SPEEDHQ,
-};
-
-
 /**
  * Draw a horizontal band if supported.
  *
diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
index 8a733afdb8..7af823b8bd 100644
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -789,9 +789,6 @@ void ff_mpv_common_end(MpegEncContext *s)
     av_freep(&s->bitstream_buffer);
     s->allocated_bitstream_buffer_size = 0;
 
-    if (!s->avctx)
-        return;
-
     if (s->picture) {
         for (int i = 0; i < MAX_PICTURE_COUNT; i++)
             ff_mpv_picture_free(&s->picture[i]);
diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h
index d7c2f57682..215df0fd5b 100644
--- a/libavcodec/mpegvideo.h
+++ b/libavcodec/mpegvideo.h
@@ -28,7 +28,6 @@
 #ifndef AVCODEC_MPEGVIDEO_H
 #define AVCODEC_MPEGVIDEO_H
 
-#include "avcodec.h"
 #include "blockdsp.h"
 #include "error_resilience.h"
 #include "fdctdsp.h"
@@ -44,7 +43,6 @@
 #include "pixblockdsp.h"
 #include "put_bits.h"
 #include "ratecontrol.h"
-#include "mpegutils.h"
 #include "qpeldsp.h"
 #include "videodsp.h"
 
@@ -61,6 +59,14 @@ typedef struct ScanTable {
     uint8_t raster_end[64];
 } ScanTable;
 
+enum OutputFormat {
+    FMT_MPEG1,
+    FMT_H261,
+    FMT_H263,
+    FMT_MJPEG,
+    FMT_SPEEDHQ,
+};
+
 /**
  * MpegEncContext.
  */
@@ -283,7 +289,7 @@ typedef struct MpegEncContext {
     int mb_x, mb_y;
     int mb_skip_run;
     int mb_intra;
-    uint16_t *mb_type;  ///< Table for candidate MB types for encoding (defines in mpegutils.h)
+    uint16_t *mb_type;  ///< Table for candidate MB types for encoding (defines in mpegvideoenc.h)
 
     int block_index[6]; ///< index to current MB in block based arrays with edges
     int block_wrap[6];
diff --git a/libavcodec/mpegvideo_dec.c b/libavcodec/mpegvideo_dec.c
index 88facfc39d..4353f1fd68 100644
--- a/libavcodec/mpegvideo_dec.c
+++ b/libavcodec/mpegvideo_dec.c
@@ -235,12 +235,20 @@ int ff_mpv_common_frame_size_change(MpegEncContext *s)
     return err;
 }
 
-static int alloc_picture(MpegEncContext *s, Picture *pic)
+static int alloc_picture(MpegEncContext *s, Picture **picp, int reference)
 {
     AVCodecContext *avctx = s->avctx;
+    int idx = ff_find_unused_picture(s->avctx, s->picture, 0);
+    Picture *pic;
     int ret;
 
+    if (idx < 0)
+        return idx;
+
+    pic = &s->picture[idx];
+
     pic->tf.f = pic->f;
+    pic->reference = reference;
 
     /* WM Image / Screen codecs allocate internal buffers with different
      * dimensions / colorspaces; ignore user-defined callbacks for these. */
@@ -248,7 +256,7 @@ static int alloc_picture(MpegEncContext *s, Picture *pic)
         avctx->codec_id != AV_CODEC_ID_VC1IMAGE  &&
         avctx->codec_id != AV_CODEC_ID_MSS2) {
         ret = ff_thread_get_ext_buffer(avctx, &pic->tf,
-                                       pic->reference ? AV_GET_BUFFER_FLAG_REF : 0);
+                                       reference ? AV_GET_BUFFER_FLAG_REF : 0);
     } else {
         pic->f->width  = avctx->width;
         pic->f->height = avctx->height;
@@ -262,14 +270,34 @@ static int alloc_picture(MpegEncContext *s, Picture *pic)
     if (ret < 0)
         goto fail;
 
-    return ff_alloc_picture(s->avctx, pic, &s->me, &s->sc, 0, s->out_format,
-                            s->mb_stride, s->mb_width, s->mb_height, s->b8_stride,
-                            &s->linesize, &s->uvlinesize);
+    ret = ff_alloc_picture(s->avctx, pic, &s->me, &s->sc, 0, s->out_format,
+                           s->mb_stride, s->mb_width, s->mb_height, s->b8_stride,
+                           &s->linesize, &s->uvlinesize);
+    if (ret < 0)
+        goto fail;
+    *picp = pic;
+
+    return 0;
 fail:
     ff_mpeg_unref_picture(pic);
     return ret;
 }
 
+static int av_cold alloc_dummy_frame(MpegEncContext *s, Picture **picp)
+{
+    Picture *pic;
+    int ret = alloc_picture(s, picp, 1);
+    if (ret < 0)
+        return ret;
+
+    pic = *picp;
+
+    ff_thread_report_progress(&pic->tf, INT_MAX, 0);
+    ff_thread_report_progress(&pic->tf, INT_MAX, 1);
+
+    return 0;
+}
+
 static void color_frame(AVFrame *frame, int luma)
 {
     int h_chroma_shift, v_chroma_shift;
@@ -294,14 +322,13 @@ static void color_frame(AVFrame *frame, int luma)
  */
 int ff_mpv_frame_start(MpegEncContext *s, AVCodecContext *avctx)
 {
-    Picture *pic;
-    int idx, ret;
+    int ret;
 
     s->mb_skipped = 0;
 
     if (!ff_thread_can_start_frame(avctx)) {
         av_log(avctx, AV_LOG_ERROR, "Attempt to start a frame outside SETUP state\n");
-        return -1;
+        return AVERROR_BUG;
     }
 
     /* mark & release old frames */
@@ -325,37 +352,12 @@ int ff_mpv_frame_start(MpegEncContext *s, AVCodecContext *avctx)
     ff_mpeg_unref_picture(&s->last_picture);
     ff_mpeg_unref_picture(&s->next_picture);
 
-    if (s->current_picture_ptr && !s->current_picture_ptr->f->buf[0]) {
-        // we already have an unused image
-        // (maybe it was set before reading the header)
-        pic = s->current_picture_ptr;
-    } else {
-        idx = ff_find_unused_picture(s->avctx, s->picture, 0);
-        if (idx < 0) {
-            av_log(s->avctx, AV_LOG_ERROR, "no frame buffer available\n");
-            return idx;
-        }
-        pic = &s->picture[idx];
-    }
-
-    pic->reference = 0;
-    if (!s->droppable) {
-        if (s->pict_type != AV_PICTURE_TYPE_B)
-            pic->reference = 3;
-    }
-
-    if (alloc_picture(s, pic) < 0)
-        return -1;
+    ret = alloc_picture(s, &s->current_picture_ptr,
+                        s->pict_type != AV_PICTURE_TYPE_B && !s->droppable);
+    if (ret < 0)
+        return ret;
 
-    s->current_picture_ptr = pic;
-    // FIXME use only the vars from current_pic
     s->current_picture_ptr->f->flags |= AV_FRAME_FLAG_TOP_FIELD_FIRST * !!s->top_field_first;
-    if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
-        s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
-        if (s->picture_structure != PICT_FRAME)
-            s->current_picture_ptr->f->flags |= AV_FRAME_FLAG_TOP_FIELD_FIRST *
-                ((s->picture_structure == PICT_TOP_FIELD) == s->first_field);
-    }
     s->current_picture_ptr->f->flags |= AV_FRAME_FLAG_INTERLACED * (!s->progressive_frame &&
                                                                     !s->progressive_sequence);
     s->current_picture_ptr->field_picture      =  s->picture_structure != PICT_FRAME;
@@ -387,55 +389,27 @@ int ff_mpv_frame_start(MpegEncContext *s, AVCodecContext *avctx)
         if (s->pict_type == AV_PICTURE_TYPE_B && s->next_picture_ptr && s->next_picture_ptr->f->buf[0])
             av_log(avctx, AV_LOG_DEBUG,
                    "allocating dummy last picture for B frame\n");
-        else if (s->pict_type != AV_PICTURE_TYPE_I)
+        else if (s->codec_id != AV_CODEC_ID_H261)
             av_log(avctx, AV_LOG_ERROR,
                    "warning: first frame is no keyframe\n");
 
         /* Allocate a dummy frame */
-        idx = ff_find_unused_picture(s->avctx, s->picture, 0);
-        if (idx < 0) {
-            av_log(s->avctx, AV_LOG_ERROR, "no frame buffer available\n");
-            return idx;
-        }
-        s->last_picture_ptr = &s->picture[idx];
-
-        s->last_picture_ptr->reference    = 3;
-        s->last_picture_ptr->f->flags &= ~AV_FRAME_FLAG_KEY;
-        s->last_picture_ptr->f->pict_type = AV_PICTURE_TYPE_P;
-
-        if (alloc_picture(s, s->last_picture_ptr) < 0) {
-            s->last_picture_ptr = NULL;
-            return -1;
-        }
+        ret = alloc_dummy_frame(s, &s->last_picture_ptr);
+        if (ret < 0)
+            return ret;
 
         if (!avctx->hwaccel) {
             int luma_val = s->codec_id == AV_CODEC_ID_FLV1 || s->codec_id == AV_CODEC_ID_H263 ? 16 : 0x80;
             color_frame(s->last_picture_ptr->f, luma_val);
         }
 
-        ff_thread_report_progress(&s->last_picture_ptr->tf, INT_MAX, 0);
-        ff_thread_report_progress(&s->last_picture_ptr->tf, INT_MAX, 1);
     }
     if ((!s->next_picture_ptr || !s->next_picture_ptr->f->buf[0]) &&
         s->pict_type == AV_PICTURE_TYPE_B) {
         /* Allocate a dummy frame */
-        idx = ff_find_unused_picture(s->avctx, s->picture, 0);
-        if (idx < 0) {
-            av_log(s->avctx, AV_LOG_ERROR, "no frame buffer available\n");
-            return idx;
-        }
-        s->next_picture_ptr = &s->picture[idx];
-
-        s->next_picture_ptr->reference   = 3;
-        s->next_picture_ptr->f->flags &= ~AV_FRAME_FLAG_KEY;
-        s->next_picture_ptr->f->pict_type = AV_PICTURE_TYPE_P;
-
-        if (alloc_picture(s, s->next_picture_ptr) < 0) {
-            s->next_picture_ptr = NULL;
-            return -1;
-        }
-        ff_thread_report_progress(&s->next_picture_ptr->tf, INT_MAX, 0);
-        ff_thread_report_progress(&s->next_picture_ptr->tf, INT_MAX, 1);
+        ret = alloc_dummy_frame(s, &s->next_picture_ptr);
+        if (ret < 0)
+            return ret;
     }
 
     if (s->last_picture_ptr) {
@@ -454,18 +428,6 @@ int ff_mpv_frame_start(MpegEncContext *s, AVCodecContext *avctx)
     av_assert0(s->pict_type == AV_PICTURE_TYPE_I || (s->last_picture_ptr &&
                                                  s->last_picture_ptr->f->buf[0]));
 
-    if (s->picture_structure != PICT_FRAME) {
-        for (int i = 0; i < 4; i++) {
-            if (s->picture_structure == PICT_BOTTOM_FIELD) {
-                s->current_picture.f->data[i] = FF_PTR_ADD(s->current_picture.f->data[i],
-                                                           s->current_picture.f->linesize[i]);
-            }
-            s->current_picture.f->linesize[i] *= 2;
-            s->last_picture.f->linesize[i]    *= 2;
-            s->next_picture.f->linesize[i]    *= 2;
-        }
-    }
-
     /* set dequantizer, we can't do it during init as
      * it might change for MPEG-4 and we can't do it in the header
      * decode as init is not called for MPEG-4 there yet */
diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c
index d1b1917824..73a9082265 100644
--- a/libavcodec/mpegvideo_enc.c
+++ b/libavcodec/mpegvideo_enc.c
@@ -251,7 +251,6 @@ static void update_duplicate_context_after_me(MpegEncContext *dst,
 {
 #define COPY(a) dst->a= src->a
     COPY(pict_type);
-    COPY(current_picture);
     COPY(f_code);
     COPY(b_code);
     COPY(qscale);
@@ -1199,8 +1198,8 @@ static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
                 ptrdiff_t dst_stride = i ? s->uvlinesize : s->linesize;
                 int h_shift = i ? s->chroma_x_shift : 0;
                 int v_shift = i ? s->chroma_y_shift : 0;
-                int w = s->width  >> h_shift;
-                int h = s->height >> v_shift;
+                int w = AV_CEIL_RSHIFT(s->width , h_shift);
+                int h = AV_CEIL_RSHIFT(s->height, v_shift);
                 const uint8_t *src = pic_arg->data[i];
                 uint8_t *dst = pic->f->data[i];
                 int vpad = 16;
@@ -1434,7 +1433,7 @@ static int estimate_best_b_count(MpegEncContext *s)
                 goto fail;
             }
 
-            rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
+            rd += (out_size * (uint64_t)lambda2) >> (FF_LAMBDA_SHIFT - 3);
         }
 
         /* get the delayed frames */
@@ -1443,7 +1442,7 @@ static int estimate_best_b_count(MpegEncContext *s)
             ret = out_size;
             goto fail;
         }
-        rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
+        rd += (out_size * (uint64_t)lambda2) >> (FF_LAMBDA_SHIFT - 3);
 
         rd += c->error[0] + c->error[1] + c->error[2];
 
@@ -3624,6 +3623,9 @@ static int encode_picture(MpegEncContext *s)
         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
     }
 
+    if(ff_init_me(s)<0)
+        return -1;
+
     s->mb_intra=0; //for the rate distortion & bit compare functions
     for(i=1; i<context_count; i++){
         ret = ff_update_duplicate_context(s->thread_context[i], s);
@@ -3631,9 +3633,6 @@ static int encode_picture(MpegEncContext *s)
             return ret;
     }
 
-    if(ff_init_me(s)<0)
-        return -1;
-
     /* Estimate motion for every MB */
     if(s->pict_type != AV_PICTURE_TYPE_I){
         s->lambda  = (s->lambda  * s->me_penalty_compensation + 128) >> 8;
diff --git a/libavcodec/mpegvideoenc.h b/libavcodec/mpegvideoenc.h
index 1e29782660..c20ea500eb 100644
--- a/libavcodec/mpegvideoenc.h
+++ b/libavcodec/mpegvideoenc.h
@@ -34,6 +34,25 @@
 #include "mpegvideo.h"
 
 #define UNI_AC_ENC_INDEX(run,level) ((run)*128 + (level))
+#define INPLACE_OFFSET 16
+
+/* MB types for encoding */
+#define CANDIDATE_MB_TYPE_INTRA      (1 <<  0)
+#define CANDIDATE_MB_TYPE_INTER      (1 <<  1)
+#define CANDIDATE_MB_TYPE_INTER4V    (1 <<  2)
+#define CANDIDATE_MB_TYPE_SKIPPED    (1 <<  3)
+
+#define CANDIDATE_MB_TYPE_DIRECT     (1 <<  4)
+#define CANDIDATE_MB_TYPE_FORWARD    (1 <<  5)
+#define CANDIDATE_MB_TYPE_BACKWARD   (1 <<  6)
+#define CANDIDATE_MB_TYPE_BIDIR      (1 <<  7)
+
+#define CANDIDATE_MB_TYPE_INTER_I    (1 <<  8)
+#define CANDIDATE_MB_TYPE_FORWARD_I  (1 <<  9)
+#define CANDIDATE_MB_TYPE_BACKWARD_I (1 << 10)
+#define CANDIDATE_MB_TYPE_BIDIR_I    (1 << 11)
+
+#define CANDIDATE_MB_TYPE_DIRECT0    (1 << 12)
 
 /* mpegvideo_enc common options */
 #define FF_MPV_FLAG_SKIP_RD      0x0001
diff --git a/libavcodec/mscc.c b/libavcodec/mscc.c
index 39bfad0b98..2d6f6265bf 100644
--- a/libavcodec/mscc.c
+++ b/libavcodec/mscc.c
@@ -54,6 +54,9 @@ static int rle_uncompress(AVCodecContext *avctx, GetByteContext *gb, PutByteCont
         unsigned run = bytestream2_get_byte(gb);
 
         if (run) {
+            if (bytestream2_get_bytes_left_p(pb) < run * s->bpp)
+                return AVERROR_INVALIDDATA;
+
             switch (avctx->bits_per_coded_sample) {
             case 8:
                 fill = bytestream2_get_byte(gb);
@@ -102,6 +105,9 @@ static int rle_uncompress(AVCodecContext *avctx, GetByteContext *gb, PutByteCont
 
                 bytestream2_seek_p(pb, y * avctx->width * s->bpp + x * s->bpp, SEEK_SET);
             } else {
+                if (bytestream2_get_bytes_left_p(pb) < copy * s->bpp)
+                    return AVERROR_INVALIDDATA;
+
                 for (j = 0; j < copy; j++) {
                     switch (avctx->bits_per_coded_sample) {
                     case 8:
@@ -205,9 +211,6 @@ inflate_error:
                s->uncomp_buf + s->bpp * j * avctx->width, s->bpp * avctx->width);
     }
 
-    frame->flags |= AV_FRAME_FLAG_KEY;
-    frame->pict_type = AV_PICTURE_TYPE_I;
-
     *got_frame = 1;
 
     return avpkt->size;
diff --git a/libavcodec/msp2dec.c b/libavcodec/msp2dec.c
index 30a2825e47..e1daeeb144 100644
--- a/libavcodec/msp2dec.c
+++ b/libavcodec/msp2dec.c
@@ -46,9 +46,6 @@ static int msp2_decode_frame(AVCodecContext *avctx, AVFrame *p,
     if ((ret = ff_get_buffer(avctx, p, 0)) < 0)
         return ret;
 
-    p->pict_type = AV_PICTURE_TYPE_I;
-    p->flags |= AV_FRAME_FLAG_KEY;
-
     bytestream2_init(&idx, buf, 2 * avctx->height);
     buf += 2 * avctx->height;
     buf_size -= 2 * avctx->height;
diff --git a/libavcodec/mvcdec.c b/libavcodec/mvcdec.c
index 6c971f709e..44cab1889b 100644
--- a/libavcodec/mvcdec.c
+++ b/libavcodec/mvcdec.c
@@ -246,9 +246,6 @@ static int mvc_decode_frame(AVCodecContext *avctx, AVFrame *frame,
     if (ret < 0)
         return ret;
 
-    frame->pict_type = AV_PICTURE_TYPE_I;
-    frame->flags |= AV_FRAME_FLAG_KEY;
-
     *got_frame = 1;
 
     return avpkt->size;
diff --git a/libavcodec/mvha.c b/libavcodec/mvha.c
index 356cebc64e..24dd88e854 100644
--- a/libavcodec/mvha.c
+++ b/libavcodec/mvha.c
@@ -271,8 +271,6 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *frame,
         }
     }
 
-    frame->pict_type = AV_PICTURE_TYPE_I;
-    frame->flags |= AV_FRAME_FLAG_KEY;
     *got_frame = 1;
 
     return avpkt->size;
diff --git a/libavcodec/mwsc.c b/libavcodec/mwsc.c
index 06a151a72a..0d4ee9791a 100644
--- a/libavcodec/mwsc.c
+++ b/libavcodec/mwsc.c
@@ -51,6 +51,10 @@ static int rle_uncompress(GetByteContext *gb, PutByteContext *pb, GetByteContext
 
         if (run == 0) {
             run = bytestream2_get_le32(gb);
+
+            if (bytestream2_tell_p(pb) + width - w < run)
+                return AVERROR_INVALIDDATA;
+
             for (int j = 0; j < run; j++, w++) {
                 if (w == width) {
                     w = 0;
@@ -62,6 +66,10 @@ static int rle_uncompress(GetByteContext *gb, PutByteContext *pb, GetByteContext
             int pos = bytestream2_tell_p(pb);
 
             bytestream2_seek(gbp, pos, SEEK_SET);
+
+            if (pos + width - w < fill)
+                return AVERROR_INVALIDDATA;
+
             for (int j = 0; j < fill; j++, w++) {
                 if (w == width) {
                     w = 0;
@@ -73,6 +81,9 @@ static int rle_uncompress(GetByteContext *gb, PutByteContext *pb, GetByteContext
 
             intra = 0;
         } else {
+            if (bytestream2_tell_p(pb) + width - w < run)
+                return AVERROR_INVALIDDATA;
+
             for (int j = 0; j < run; j++, w++) {
                 if (w == width) {
                     w = 0;
diff --git a/libavcodec/notchlc.c b/libavcodec/notchlc.c
index 6dd3f88440..39c646d790 100644
--- a/libavcodec/notchlc.c
+++ b/libavcodec/notchlc.c
@@ -515,9 +515,6 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p,
     if (ret < 0)
         return ret;
 
-    p->pict_type = AV_PICTURE_TYPE_I;
-    p->flags |= AV_FRAME_FLAG_KEY;
-
     *got_frame = 1;
 
     return avpkt->size;
diff --git a/libavcodec/nvdec_av1.c b/libavcodec/nvdec_av1.c
index b0b013846e..6b408edb87 100644
--- a/libavcodec/nvdec_av1.c
+++ b/libavcodec/nvdec_av1.c
@@ -106,9 +106,7 @@ static int nvdec_av1_start_frame(AVCodecContext *avctx, const uint8_t *buffer, u
             .show_frame                   = frame_header->show_frame,
             .disable_cdf_update           = frame_header->disable_cdf_update,
             .allow_screen_content_tools   = frame_header->allow_screen_content_tools,
-            .force_integer_mv             = frame_header->force_integer_mv ||
-                                            frame_header->frame_type == AV1_FRAME_INTRA_ONLY ||
-                                            frame_header->frame_type == AV1_FRAME_KEY,
+            .force_integer_mv             = s->cur_frame.force_integer_mv,
             .coded_denom                  = frame_header->coded_denom,
             .allow_intrabc                = frame_header->allow_intrabc,
             .allow_high_precision_mv      = frame_header->allow_high_precision_mv,
@@ -251,8 +249,8 @@ static int nvdec_av1_start_frame(AVCodecContext *avctx, const uint8_t *buffer, u
         AVFrame *ref_frame = s->ref[ref_idx].f;
 
         ppc->ref_frame[i].index = ppc->ref_frame_map[ref_idx];
-        ppc->ref_frame[i].width = ref_frame->width;
-        ppc->ref_frame[i].height = ref_frame->height;
+        ppc->ref_frame[i].width  = ref_frame ? ref_frame->width  : 0;
+        ppc->ref_frame[i].height = ref_frame ? ref_frame->height : 0;
 
         /* Global Motion */
         ppc->global_motion[i].invalid = !frame_header->is_global[AV1_REF_FRAME_LAST + i];
diff --git a/libavcodec/nvdec_mpeg12.c b/libavcodec/nvdec_mpeg12.c
index a4603c7ea7..139f287617 100644
--- a/libavcodec/nvdec_mpeg12.c
+++ b/libavcodec/nvdec_mpeg12.c
@@ -25,6 +25,7 @@
 #include "avcodec.h"
 #include "hwaccel_internal.h"
 #include "internal.h"
+#include "mpegutils.h"
 #include "mpegvideo.h"
 #include "nvdec.h"
 #include "decode.h"
diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c
index b6c5ed3e6b..e48224347d 100644
--- a/libavcodec/nvenc.c
+++ b/libavcodec/nvenc.c
@@ -514,7 +514,7 @@ static int nvenc_check_capabilities(AVCodecContext *avctx)
     }
 
     ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_10BIT_ENCODE);
-    if (IS_10BIT(ctx->data_pix_fmt) && ret <= 0) {
+    if ((IS_10BIT(ctx->data_pix_fmt) || ctx->highbitdepth) && ret <= 0) {
         av_log(avctx, AV_LOG_WARNING, "10 bit encode not supported\n");
         return AVERROR(ENOSYS);
     }
@@ -1420,8 +1420,8 @@ static av_cold int nvenc_setup_hevc_config(AVCodecContext *avctx)
         break;
     }
 
-    // force setting profile as main10 if input is 10 bit
-    if (IS_10BIT(ctx->data_pix_fmt)) {
+    // force setting profile as main10 if input is 10 bit or if it should be encoded as 10 bit
+    if (IS_10BIT(ctx->data_pix_fmt) || ctx->highbitdepth) {
         cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN10_GUID;
         avctx->profile = AV_PROFILE_HEVC_MAIN_10;
     }
@@ -1435,8 +1435,8 @@ static av_cold int nvenc_setup_hevc_config(AVCodecContext *avctx)
     hevc->chromaFormatIDC = IS_YUV444(ctx->data_pix_fmt) ? 3 : 1;
 
 #ifdef NVENC_HAVE_NEW_BIT_DEPTH_API
-    hevc->inputBitDepth = hevc->outputBitDepth =
-        IS_10BIT(ctx->data_pix_fmt) ? NV_ENC_BIT_DEPTH_10 : NV_ENC_BIT_DEPTH_8;
+    hevc->inputBitDepth = IS_10BIT(ctx->data_pix_fmt) ? NV_ENC_BIT_DEPTH_10 : NV_ENC_BIT_DEPTH_8;
+    hevc->outputBitDepth = (IS_10BIT(ctx->data_pix_fmt) || ctx->highbitdepth) ? NV_ENC_BIT_DEPTH_10 : NV_ENC_BIT_DEPTH_8;
 #else
     hevc->pixelBitDepthMinus8 = IS_10BIT(ctx->data_pix_fmt) ? 2 : 0;
 #endif
@@ -1696,6 +1696,15 @@ FF_ENABLE_DEPRECATION_WARNINGS
     if (ctx->weighted_pred == 1)
         ctx->init_encode_params.enableWeightedPrediction = 1;
 
+#ifdef NVENC_HAVE_SPLIT_FRAME_ENCODING
+    ctx->init_encode_params.splitEncodeMode = ctx->split_encode_mode;
+
+    if (ctx->split_encode_mode != NV_ENC_SPLIT_DISABLE_MODE) {
+        if (avctx->codec->id == AV_CODEC_ID_HEVC && ctx->weighted_pred == 1)
+            av_log(avctx, AV_LOG_WARNING, "Split encoding not supported with weighted prediction enabled.\n");
+    }
+#endif
+
     if (ctx->bluray_compat) {
         ctx->aud = 1;
         ctx->dpb_size = FFMIN(FFMAX(avctx->refs, 0), 6);
diff --git a/libavcodec/nvenc.h b/libavcodec/nvenc.h
index 85ecaf1b5f..09de00badc 100644
--- a/libavcodec/nvenc.h
+++ b/libavcodec/nvenc.h
@@ -81,6 +81,7 @@ typedef void ID3D11Device;
 // SDK 12.1 compile time feature checks
 #if NVENCAPI_CHECK_VERSION(12, 1)
 #define NVENC_NO_DEPRECATED_RC
+#define NVENC_HAVE_SPLIT_FRAME_ENCODING
 #endif
 
 // SDK 12.2 compile time feature checks
@@ -280,6 +281,7 @@ typedef struct NvencContext
     int tf_level;
     int lookahead_level;
     int unidir_b;
+    int split_encode_mode;
 } NvencContext;
 
 int ff_nvenc_encode_init(AVCodecContext *avctx);
diff --git a/libavcodec/nvenc_av1.c b/libavcodec/nvenc_av1.c
index d37ee07bff..a9e065e3b9 100644
--- a/libavcodec/nvenc_av1.c
+++ b/libavcodec/nvenc_av1.c
@@ -158,6 +158,14 @@ static const AVOption options[] = {
     { "2",            "",                                   0,                    AV_OPT_TYPE_CONST, { .i64 = NV_ENC_LOOKAHEAD_LEVEL_2 }, 0, 0, VE, .unit = "lookahead_level" },
     { "3",            "",                                   0,                    AV_OPT_TYPE_CONST, { .i64 = NV_ENC_LOOKAHEAD_LEVEL_3 }, 0, 0, VE, .unit = "lookahead_level" },
 #endif
+#ifdef NVENC_HAVE_SPLIT_FRAME_ENCODING
+    { "split_encode_mode", "Specifies the split encoding mode", OFFSET(split_encode_mode), AV_OPT_TYPE_INT, { .i64 = NV_ENC_SPLIT_AUTO_MODE }, 0, NV_ENC_SPLIT_DISABLE_MODE, VE, .unit = "split_encode_mode" },
+    { "disabled",          "Disabled for all configurations",                                                0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_SPLIT_DISABLE_MODE },      0, 0, VE, .unit = "split_encode_mode" },
+    { "auto",              "Enabled or disabled depending on the preset and tuning info",                    0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_SPLIT_AUTO_MODE },         0, 0, VE, .unit = "split_encode_mode" },
+    { "forced",            "Enabled with number of horizontal strips selected by the driver",                0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_SPLIT_AUTO_FORCED_MODE },  0, 0, VE, .unit = "split_encode_mode" },
+    { "2",                 "Enabled with number of horizontal strips forced to 2 when number of NVENCs > 1", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_SPLIT_TWO_FORCED_MODE },   0, 0, VE, .unit = "split_encode_mode" },
+    { "3",                 "Enabled with number of horizontal strips forced to 3 when number of NVENCs > 2", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_SPLIT_THREE_FORCED_MODE }, 0, 0, VE, .unit = "split_encode_mode" },
+#endif
     { NULL }
 };
 
diff --git a/libavcodec/nvenc_hevc.c b/libavcodec/nvenc_hevc.c
index bd8b6153f3..d54e5f2512 100644
--- a/libavcodec/nvenc_hevc.c
+++ b/libavcodec/nvenc_hevc.c
@@ -183,6 +183,9 @@ static const AVOption options[] = {
     { "fullres",      "Two Pass encoding is enabled where first Pass is full resolution",
                                                             0,                    AV_OPT_TYPE_CONST, { .i64 = NV_ENC_TWO_PASS_FULL_RESOLUTION },    0,                          0,                               VE, .unit = "multipass" },
 #endif
+#ifdef NVENC_HAVE_NEW_BIT_DEPTH_API
+    { "highbitdepth", "Enable 10 bit encode for 8 bit input",OFFSET(highbitdepth),AV_OPT_TYPE_BOOL,  { .i64 = 0 }, 0, 1, VE },
+#endif
 #ifdef NVENC_HAVE_LDKFS
     { "ldkfs",        "Low delay key frame scale; Specifies the Scene Change frame size increase allowed in case of single frame VBV and CBR",
                                                             OFFSET(ldkfs),        AV_OPT_TYPE_INT,   { .i64 = 0 }, 0, UCHAR_MAX, VE },
@@ -217,6 +220,14 @@ static const AVOption options[] = {
 #ifdef NVENC_HAVE_UNIDIR_B
     { "unidir_b",     "Enable use of unidirectional B-Frames.", OFFSET(unidir_b), AV_OPT_TYPE_BOOL,  { .i64 = 0 }, 0, 1, VE },
 #endif
+#ifdef NVENC_HAVE_SPLIT_FRAME_ENCODING
+    { "split_encode_mode", "Specifies the split encoding mode", OFFSET(split_encode_mode), AV_OPT_TYPE_INT, { .i64 = NV_ENC_SPLIT_AUTO_MODE }, 0, NV_ENC_SPLIT_DISABLE_MODE, VE, .unit = "split_encode_mode" },
+    { "disabled",          "Disabled for all configurations",                                                0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_SPLIT_DISABLE_MODE },      0, 0, VE, .unit = "split_encode_mode" },
+    { "auto",              "Enabled or disabled depending on the preset and tuning info",                    0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_SPLIT_AUTO_MODE },         0, 0, VE, .unit = "split_encode_mode" },
+    { "forced",            "Enabled with number of horizontal strips selected by the driver",                0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_SPLIT_AUTO_FORCED_MODE },  0, 0, VE, .unit = "split_encode_mode" },
+    { "2",                 "Enabled with number of horizontal strips forced to 2 when number of NVENCs > 1", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_SPLIT_TWO_FORCED_MODE },   0, 0, VE, .unit = "split_encode_mode" },
+    { "3",                 "Enabled with number of horizontal strips forced to 3 when number of NVENCs > 2", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_SPLIT_THREE_FORCED_MODE }, 0, 0, VE, .unit = "split_encode_mode" },
+#endif
     { NULL }
 };
 
diff --git a/libavcodec/options.c b/libavcodec/options.c
index 0c3b40a186..f60c41bdc3 100644
--- a/libavcodec/options.c
+++ b/libavcodec/options.c
@@ -177,8 +177,6 @@ void avcodec_free_context(AVCodecContext **pavctx)
     av_freep(&avctx->inter_matrix);
     av_freep(&avctx->rc_override);
     av_channel_layout_uninit(&avctx->ch_layout);
-    av_frame_side_data_free(
-        &avctx->decoded_side_data, &avctx->nb_decoded_side_data);
 
     av_freep(pavctx);
 }
diff --git a/libavcodec/options_table.h b/libavcodec/options_table.h
index 7a70fa7b6c..33f1bce887 100644
--- a/libavcodec/options_table.h
+++ b/libavcodec/options_table.h
@@ -158,6 +158,7 @@ static const AVOption avcodec_options[] = {
 {"mmx", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_MMX }, INT_MIN, INT_MAX, V|E, .unit = "dct"},
 {"altivec", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_ALTIVEC }, INT_MIN, INT_MAX, V|E, .unit = "dct"},
 {"faan", "floating point AAN DCT", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_FAAN }, INT_MIN, INT_MAX, V|E, .unit = "dct"},
+{"neon", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_NEON }, INT_MIN, INT_MAX, V|E, .unit = "dct"},
 {"lumi_mask", "compresses bright areas stronger than medium ones", OFFSET(lumi_masking), AV_OPT_TYPE_FLOAT, {.dbl = 0 }, -FLT_MAX, FLT_MAX, V|E},
 {"tcplx_mask", "temporal complexity masking", OFFSET(temporal_cplx_masking), AV_OPT_TYPE_FLOAT, {.dbl = 0 }, -FLT_MAX, FLT_MAX, V|E},
 {"scplx_mask", "spatial complexity masking", OFFSET(spatial_cplx_masking), AV_OPT_TYPE_FLOAT, {.dbl = 0 }, -FLT_MAX, FLT_MAX, V|E},
diff --git a/libavcodec/opusdec_celt.c b/libavcodec/opusdec_celt.c
index fd8e9929e9..b19342337d 100644
--- a/libavcodec/opusdec_celt.c
+++ b/libavcodec/opusdec_celt.c
@@ -460,7 +460,9 @@ int ff_celt_decode_frame(CeltFrame *f, OpusRangeCoder *rc,
         /* deemphasis */
         block->emph_coeff = f->opusdsp.deemphasis(output[i],
                                                   &block->buf[1024 - frame_size],
-                                                  block->emph_coeff, frame_size);
+                                                  block->emph_coeff,
+                                                  ff_opus_deemph_weights,
+                                                  frame_size);
     }
 
     if (channels == 1)
@@ -516,7 +518,7 @@ void ff_celt_flush(CeltFrame *f)
          * a lesser discontinuity when seeking.
          * The deemphasis functions differ from libopus in that they require
          * an initial state divided by the coefficient. */
-        block->emph_coeff = 0.0f / CELT_EMPH_COEFF;
+        block->emph_coeff = 0.0f / ff_opus_deemph_weights[0];
     }
     f->seed = 0;
 
diff --git a/libavcodec/opusdsp.c b/libavcodec/opusdsp.c
index 0764d712e4..e61cc36098 100644
--- a/libavcodec/opusdsp.c
+++ b/libavcodec/opusdsp.c
@@ -18,6 +18,7 @@
 
 #include "config.h"
 #include "libavutil/attributes.h"
+#include "libavutil/mem_internal.h"
 #include "opusdsp.h"
 
 static void postfilter_c(float *data, int period, float *gains, int len)
@@ -43,10 +44,11 @@ static void postfilter_c(float *data, int period, float *gains, int len)
     }
 }
 
-static float deemphasis_c(float *y, float *x, float coeff, int len)
+static float deemphasis_c(float *y, float *x, float coeff, const float *weights, int len)
 {
+    const float c = weights[0];
     for (int i = 0; i < len; i++)
-        coeff = y[i] = x[i] + coeff*CELT_EMPH_COEFF;
+        coeff = y[i] = x[i] + coeff*c;
 
     return coeff;
 }
diff --git a/libavcodec/opusdsp.h b/libavcodec/opusdsp.h
index c2a301e832..9d93336cfe 100644
--- a/libavcodec/opusdsp.h
+++ b/libavcodec/opusdsp.h
@@ -19,11 +19,9 @@
 #ifndef AVCODEC_OPUSDSP_H
 #define AVCODEC_OPUSDSP_H
 
-#define CELT_EMPH_COEFF 0.8500061035f
-
 typedef struct OpusDSP {
     void (*postfilter)(float *data, int period, float *gains, int len);
-    float (*deemphasis)(float *out, float *in, float coeff, int len);
+    float (*deemphasis)(float *out, float *in, float coeff, const float *weights, int len);
 } OpusDSP;
 
 void ff_opus_dsp_init(OpusDSP *ctx);
diff --git a/libavcodec/opusenc.c b/libavcodec/opusenc.c
index e0c1732227..0e4c3752a5 100644
--- a/libavcodec/opusenc.c
+++ b/libavcodec/opusenc.c
@@ -164,6 +164,7 @@ static void celt_apply_preemph_filter(OpusEncContext *s, CeltFrame *f)
 {
     const int subframesize = s->avctx->frame_size;
     const int subframes = OPUS_BLOCK_SIZE(s->packet.framesize) / subframesize;
+    const float c = ff_opus_deemph_weights[0];
 
     /* Filter overlap */
     for (int ch = 0; ch < f->channels; ch++) {
@@ -172,7 +173,7 @@ static void celt_apply_preemph_filter(OpusEncContext *s, CeltFrame *f)
         for (int i = 0; i < CELT_OVERLAP; i++) {
             float sample = b->overlap[i];
             b->overlap[i] = sample - m;
-            m = sample * CELT_EMPH_COEFF;
+            m = sample * c;
         }
         b->emph_coeff = m;
     }
@@ -185,7 +186,7 @@ static void celt_apply_preemph_filter(OpusEncContext *s, CeltFrame *f)
             for (int i = 0; i < subframesize; i++) {
                 float sample = b->samples[sf*subframesize + i];
                 b->samples[sf*subframesize + i] = sample - m;
-                m = sample * CELT_EMPH_COEFF;
+                m = sample * c;
             }
             if (sf != (subframes - 1))
                 b->emph_coeff = m;
diff --git a/libavcodec/opustab.c b/libavcodec/opustab.c
index 2a57511177..917375253e 100644
--- a/libavcodec/opustab.c
+++ b/libavcodec/opustab.c
@@ -1159,3 +1159,31 @@ const uint32_t * const ff_celt_pvq_u_row[15] = {
     celt_pvq_u + 1207, celt_pvq_u + 1226, celt_pvq_u + 1240,
     celt_pvq_u + 1248, celt_pvq_u + 1254, celt_pvq_u + 1257
 };
+
+/* Deemphasis constant (alpha_p), as specified in RFC6716 as 0.8500061035.
+ * libopus uses a slighly rounded constant, set to 0.85 exactly,
+ * to simplify its fixed-point version, but it's not significant to impact
+ * compliance. */
+#define CELT_EMPH_COEFF 0.8500061035
+
+DECLARE_ALIGNED(16, const float, ff_opus_deemph_weights)[] = {
+    CELT_EMPH_COEFF,
+    CELT_EMPH_COEFF*CELT_EMPH_COEFF,
+    CELT_EMPH_COEFF*CELT_EMPH_COEFF*CELT_EMPH_COEFF,
+    CELT_EMPH_COEFF*CELT_EMPH_COEFF*CELT_EMPH_COEFF*CELT_EMPH_COEFF,
+
+    0,
+    CELT_EMPH_COEFF,
+    CELT_EMPH_COEFF*CELT_EMPH_COEFF,
+    CELT_EMPH_COEFF*CELT_EMPH_COEFF*CELT_EMPH_COEFF,
+
+    0,
+    0,
+    CELT_EMPH_COEFF,
+    CELT_EMPH_COEFF*CELT_EMPH_COEFF,
+
+    0,
+    0,
+    0,
+    CELT_EMPH_COEFF,
+};
diff --git a/libavcodec/opustab.h b/libavcodec/opustab.h
index 9c9f1b9d98..57e546aef5 100644
--- a/libavcodec/opustab.h
+++ b/libavcodec/opustab.h
@@ -161,6 +161,8 @@ extern const float    ff_celt_window2[120];
 extern const float    ff_celt_window_padded[];
 static const float *const ff_celt_window = &ff_celt_window_padded[8];
 
+extern const float    ff_opus_deemph_weights[];
+
 extern const uint32_t * const ff_celt_pvq_u_row[15];
 FF_VISIBILITY_POP_HIDDEN
 
diff --git a/libavcodec/pgxdec.c b/libavcodec/pgxdec.c
index cc7cdb8c9a..bffd57eddc 100644
--- a/libavcodec/pgxdec.c
+++ b/libavcodec/pgxdec.c
@@ -139,8 +139,6 @@ static int pgx_decode_frame(AVCodecContext *avctx, AVFrame *p,
         return AVERROR_INVALIDDATA;
     if ((ret = ff_get_buffer(avctx, p, 0)) < 0)
         return ret;
-    p->pict_type = AV_PICTURE_TYPE_I;
-    p->flags |= AV_FRAME_FLAG_KEY;
     avctx->bits_per_raw_sample = depth;
     if (bpp == 8)
         write_frame_8(p, &g, width, height, sign, depth);
diff --git a/libavcodec/photocd.c b/libavcodec/photocd.c
index 07e8d460bd..75948d1b42 100644
--- a/libavcodec/photocd.c
+++ b/libavcodec/photocd.c
@@ -331,9 +331,6 @@ static int photocd_decode_frame(AVCodecContext *avctx, AVFrame *p,
     if ((ret = ff_thread_get_buffer(avctx, p, 0)) < 0)
         return ret;
 
-    p->pict_type = AV_PICTURE_TYPE_I;
-    p->flags |= AV_FRAME_FLAG_KEY;
-
     bytestream2_init(gb, avpkt->data, avpkt->size);
 
     if (s->resolution < 3) {
diff --git a/libavcodec/pixlet.c b/libavcodec/pixlet.c
index f432b15d71..b3baaf6260 100644
--- a/libavcodec/pixlet.c
+++ b/libavcodec/pixlet.c
@@ -667,8 +667,6 @@ static int pixlet_decode_frame(AVCodecContext *avctx, AVFrame *p,
 
     bytestream2_skip(&ctx->gb, 8);
 
-    p->pict_type = AV_PICTURE_TYPE_I;
-    p->flags |= AV_FRAME_FLAG_KEY;
     p->color_range = AVCOL_RANGE_JPEG;
 
     ret = ff_thread_get_buffer(avctx, p, 0);
diff --git a/libavcodec/pngdec.c b/libavcodec/pngdec.c
index 16e35a8cc6..8934a95a7f 100644
--- a/libavcodec/pngdec.c
+++ b/libavcodec/pngdec.c
@@ -42,8 +42,8 @@
 #include "apng.h"
 #include "png.h"
 #include "pngdsp.h"
+#include "progressframe.h"
 #include "thread.h"
-#include "threadframe.h"
 #include "zlib_wrapper.h"
 
 #include <zlib.h>
@@ -63,8 +63,8 @@ typedef struct PNGDecContext {
     AVCodecContext *avctx;
 
     GetByteContext gb;
-    ThreadFrame last_picture;
-    ThreadFrame picture;
+    ProgressFrame last_picture;
+    ProgressFrame picture;
 
     AVDictionary *frame_metadata;
 
@@ -874,7 +874,7 @@ static int decode_idat_chunk(AVCodecContext *avctx, PNGDecContext *s,
             s->bpp += byte_depth;
         }
 
-        ff_thread_release_ext_buffer(&s->picture);
+        ff_progress_frame_unref(&s->picture);
         if (s->dispose_op == APNG_DISPOSE_OP_PREVIOUS) {
             /* We only need a buffer for the current picture. */
             ret = ff_thread_get_buffer(avctx, p, 0);
@@ -883,8 +883,8 @@ static int decode_idat_chunk(AVCodecContext *avctx, PNGDecContext *s,
         } else if (s->dispose_op == APNG_DISPOSE_OP_BACKGROUND) {
             /* We need a buffer for the current picture as well as
              * a buffer for the reference to retain. */
-            ret = ff_thread_get_ext_buffer(avctx, &s->picture,
-                                           AV_GET_BUFFER_FLAG_REF);
+            ret = ff_progress_frame_get_buffer(avctx, &s->picture,
+                                               AV_GET_BUFFER_FLAG_REF);
             if (ret < 0)
                 return ret;
             ret = ff_thread_get_buffer(avctx, p, 0);
@@ -892,8 +892,9 @@ static int decode_idat_chunk(AVCodecContext *avctx, PNGDecContext *s,
                 return ret;
         } else {
             /* The picture output this time and the reference to retain coincide. */
-            if ((ret = ff_thread_get_ext_buffer(avctx, &s->picture,
-                                                AV_GET_BUFFER_FLAG_REF)) < 0)
+            ret = ff_progress_frame_get_buffer(avctx, &s->picture,
+                                                AV_GET_BUFFER_FLAG_REF);
+            if (ret < 0)
                 return ret;
             ret = av_frame_ref(p, s->picture.f);
             if (ret < 0)
@@ -1217,7 +1218,7 @@ static int decode_fctl_chunk(AVCodecContext *avctx, PNGDecContext *s,
         return AVERROR_INVALIDDATA;
     }
 
-    if ((sequence_number == 0 || !s->last_picture.f->data[0]) &&
+    if ((sequence_number == 0 || !s->last_picture.f) &&
         dispose_op == APNG_DISPOSE_OP_PREVIOUS) {
         // No previous frame to revert to for the first frame
         // Spec says to just treat it as a APNG_DISPOSE_OP_BACKGROUND
@@ -1254,7 +1255,7 @@ static void handle_p_frame_png(PNGDecContext *s, AVFrame *p)
 
     ls = FFMIN(ls, s->width * s->bpp);
 
-    ff_thread_await_progress(&s->last_picture, INT_MAX, 0);
+    ff_progress_frame_await(&s->last_picture, INT_MAX);
     for (j = 0; j < s->height; j++) {
         for (i = 0; i < ls; i++)
             pd[i] += pd_last[i];
@@ -1286,7 +1287,7 @@ static int handle_p_frame_apng(AVCodecContext *avctx, PNGDecContext *s,
         return AVERROR_PATCHWELCOME;
     }
 
-    ff_thread_await_progress(&s->last_picture, INT_MAX, 0);
+    ff_progress_frame_await(&s->last_picture, INT_MAX);
 
     // copy unchanged rectangles from the last frame
     for (y = 0; y < s->y_offset; y++)
@@ -1674,7 +1675,7 @@ exit_loop:
     }
 
     /* handle P-frames only if a predecessor frame is available */
-    if (s->last_picture.f->data[0]) {
+    if (s->last_picture.f) {
         if (   !(avpkt->flags & AV_PKT_FLAG_KEY) && avctx->codec_tag != AV_RL32("MPNG")
             && s->last_picture.f->width == p->width
             && s->last_picture.f->height== p->height
@@ -1691,12 +1692,11 @@ exit_loop:
     if (CONFIG_APNG_DECODER && s->dispose_op == APNG_DISPOSE_OP_BACKGROUND)
         apng_reset_background(s, p);
 
-    ff_thread_report_progress(&s->picture, INT_MAX, 0);
-
-    return 0;
-
+    ret = 0;
 fail:
-    ff_thread_report_progress(&s->picture, INT_MAX, 0);
+    if (s->picture.f)
+        ff_progress_frame_report(&s->picture, INT_MAX);
+
     return ret;
 }
 
@@ -1783,8 +1783,8 @@ static int decode_frame_png(AVCodecContext *avctx, AVFrame *p,
         goto the_end;
 
     if (!(avctx->active_thread_type & FF_THREAD_FRAME)) {
-        ff_thread_release_ext_buffer(&s->last_picture);
-        FFSWAP(ThreadFrame, s->picture, s->last_picture);
+        ff_progress_frame_unref(&s->last_picture);
+        FFSWAP(ProgressFrame, s->picture, s->last_picture);
     }
 
     *got_frame = 1;
@@ -1835,12 +1835,9 @@ static int decode_frame_apng(AVCodecContext *avctx, AVFrame *p,
         return ret;
 
     if (!(avctx->active_thread_type & FF_THREAD_FRAME)) {
-        if (s->dispose_op == APNG_DISPOSE_OP_PREVIOUS) {
-            ff_thread_release_ext_buffer(&s->picture);
-        } else {
-            ff_thread_release_ext_buffer(&s->last_picture);
-            FFSWAP(ThreadFrame, s->picture, s->last_picture);
-        }
+        if (s->dispose_op != APNG_DISPOSE_OP_PREVIOUS)
+            FFSWAP(ProgressFrame, s->picture, s->last_picture);
+        ff_progress_frame_unref(&s->picture);
     }
 
     *got_frame = 1;
@@ -1853,8 +1850,7 @@ static int update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
 {
     PNGDecContext *psrc = src->priv_data;
     PNGDecContext *pdst = dst->priv_data;
-    ThreadFrame *src_frame = NULL;
-    int ret;
+    const ProgressFrame *src_frame;
 
     if (dst == src)
         return 0;
@@ -1879,12 +1875,7 @@ static int update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
     src_frame = psrc->dispose_op == APNG_DISPOSE_OP_PREVIOUS ?
                 &psrc->last_picture : &psrc->picture;
 
-    ff_thread_release_ext_buffer(&pdst->last_picture);
-    if (src_frame && src_frame->f->data[0]) {
-        ret = ff_thread_ref_frame(&pdst->last_picture, src_frame);
-        if (ret < 0)
-            return ret;
-    }
+    ff_progress_frame_replace(&pdst->last_picture, src_frame);
 
     return 0;
 }
@@ -1895,10 +1886,6 @@ static av_cold int png_dec_init(AVCodecContext *avctx)
     PNGDecContext *s = avctx->priv_data;
 
     s->avctx = avctx;
-    s->last_picture.f = av_frame_alloc();
-    s->picture.f = av_frame_alloc();
-    if (!s->last_picture.f || !s->picture.f)
-        return AVERROR(ENOMEM);
 
     ff_pngdsp_init(&s->dsp);
 
@@ -1909,10 +1896,8 @@ static av_cold int png_dec_end(AVCodecContext *avctx)
 {
     PNGDecContext *s = avctx->priv_data;
 
-    ff_thread_release_ext_buffer(&s->last_picture);
-    av_frame_free(&s->last_picture.f);
-    ff_thread_release_ext_buffer(&s->picture);
-    av_frame_free(&s->picture.f);
+    ff_progress_frame_unref(&s->last_picture);
+    ff_progress_frame_unref(&s->picture);
     av_freep(&s->buffer);
     s->buffer_size = 0;
     av_freep(&s->last_row);
@@ -1940,7 +1925,7 @@ const FFCodec ff_apng_decoder = {
     UPDATE_THREAD_CONTEXT(update_thread_context),
     .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
     .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP |
-                      FF_CODEC_CAP_ALLOCATE_PROGRESS |
+                      FF_CODEC_CAP_USES_PROGRESSFRAMES |
                       FF_CODEC_CAP_ICC_PROFILES,
 };
 #endif
@@ -1958,7 +1943,8 @@ const FFCodec ff_png_decoder = {
     UPDATE_THREAD_CONTEXT(update_thread_context),
     .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
     .caps_internal  = FF_CODEC_CAP_SKIP_FRAME_FILL_PARAM |
-                      FF_CODEC_CAP_ALLOCATE_PROGRESS | FF_CODEC_CAP_INIT_CLEANUP |
+                      FF_CODEC_CAP_INIT_CLEANUP |
+                      FF_CODEC_CAP_USES_PROGRESSFRAMES |
                       FF_CODEC_CAP_ICC_PROFILES,
 };
 #endif
diff --git a/libavcodec/pnmdec.c b/libavcodec/pnmdec.c
index acd77ea810..a6945549bd 100644
--- a/libavcodec/pnmdec.c
+++ b/libavcodec/pnmdec.c
@@ -64,8 +64,6 @@ static int pnm_decode_frame(AVCodecContext *avctx, AVFrame *p,
 
     if ((ret = ff_get_buffer(avctx, p, 0)) < 0)
         return ret;
-    p->pict_type = AV_PICTURE_TYPE_I;
-    p->flags |= AV_FRAME_FLAG_KEY;
     avctx->bits_per_raw_sample = av_log2(s->maxval) + 1;
 
     switch (avctx->pix_fmt) {
diff --git a/libavcodec/progressframe.h b/libavcodec/progressframe.h
new file mode 100644
index 0000000000..428a461659
--- /dev/null
+++ b/libavcodec/progressframe.h
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2022 Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_PROGRESSFRAME_H
+#define AVCODEC_PROGRESSFRAME_H
+
+/**
+ * ProgressFrame is an API to easily share frames without an underlying
+ * av_frame_ref(). Its main usecase is in frame-threading scenarios,
+ * yet it could also be used for purely single-threaded decoders that
+ * want to keep multiple references to the same frame.
+ *
+ * The underlying principle behind the API is that all that is needed
+ * to share a frame is a reference count and a contract between all parties.
+ * The ProgressFrame provides the reference count and the frame is unreferenced
+ * via ff_thread_release_buffer() when the reference count reaches zero.
+ *
+ * In order to make this API also usable for frame-threaded decoders it also
+ * provides a way of exchanging simple information about the state of
+ * decoding the frame via ff_thread_progress_report() and
+ * ff_thread_progress_await().
+ *
+ * The typical contract for frame-threaded decoders is as follows:
+ * Thread A initializes a ProgressFrame via ff_thread_progress_get_buffer()
+ * (which already allocates the AVFrame's data buffers), calls
+ * ff_thread_finish_setup() and starts decoding the frame. Later threads
+ * receive a reference to this frame, which means they get a pointer
+ * to the AVFrame and the internal reference count gets incremented.
+ * Later threads whose frames use A's frame as reference as well as
+ * the thread that will eventually output A's frame will wait for
+ * progress on said frame reported by A. As soon as A has reported
+ * that it has finished decoding its frame, it must no longer modify it
+ * (neither its data nor its properties).
+ *
+ * Because creating a reference with this API does not involve reads
+ * from the actual AVFrame, the decoding thread may modify the properties
+ * (i.e. non-data fields) until it has indicated to be done with this
+ * frame. This is important for e.g. propagating decode_error_flags;
+ * it also allows to add side-data late.
+ */
+
+struct AVCodecContext;
+
+/**
+ * The ProgressFrame structure.
+ * Hint: It is guaranteed that the AVFrame pointer is at the start
+ *       of ProgressFrame. This allows to use an unnamed
+ *       union {
+ *            struct {
+ *                AVFrame *f;
+ *            };
+ *            ProgressFrame pf;
+ *       };
+ *       to simplify accessing the embedded AVFrame.
+ */
+typedef struct ProgressFrame {
+    struct AVFrame *f;
+    struct ProgressInternal *progress;
+} ProgressFrame;
+
+/**
+ * Notify later decoding threads when part of their reference frame is ready.
+ * Call this when some part of the frame is finished decoding.
+ * Later calls with lower values of progress have no effect.
+ *
+ * @param f The frame being decoded.
+ * @param progress Value, in arbitrary units, of how much of the frame has decoded.
+ *
+ * @warning Calling this on a blank ProgressFrame causes undefined behaviour
+ */
+void ff_progress_frame_report(ProgressFrame *f, int progress);
+
+/**
+ * Wait for earlier decoding threads to finish reference frames.
+ * Call this before accessing some part of a frame, with a given
+ * value for progress, and it will return after the responsible decoding
+ * thread calls ff_thread_progress_report() with the same or
+ * higher value for progress.
+ *
+ * @param f The frame being referenced.
+ * @param progress Value, in arbitrary units, to wait for.
+ *
+ * @warning Calling this on a blank ProgressFrame causes undefined behaviour
+ */
+void ff_progress_frame_await(const ProgressFrame *f, int progress);
+
+/**
+ * This function sets up the ProgressFrame, i.e. gets ProgressFrame.f
+ * and also calls ff_thread_get_buffer() on the frame.
+ *
+ * @note: This must only be called by codecs with the
+ *        FF_CODEC_CAP_USES_PROGRESSFRAMES internal cap.
+ */
+int ff_progress_frame_get_buffer(struct AVCodecContext *avctx,
+                                 ProgressFrame *f, int flags);
+
+/**
+ * Give up a reference to the underlying frame contained in a ProgressFrame
+ * and reset the ProgressFrame, setting all pointers to NULL.
+ *
+ * @note: This implies that when using this API the check for whether
+ *        a frame exists is by checking ProgressFrame.f and not
+ *        ProgressFrame.f->data[0] or ProgressFrame.f->buf[0].
+ */
+void ff_progress_frame_unref(ProgressFrame *f);
+
+/**
+ * Set dst->f to src->f and make dst a co-owner of src->f.
+ * dst can then be used to wait on progress of the underlying frame.
+ *
+ * @note: There is no underlying av_frame_ref() here. dst->f and src->f
+ *        really point to the same AVFrame. Typically this means that
+ *        the decoding thread is allowed to set all the properties of
+ *        the AVFrame until it has indicated to have finished decoding.
+ *        Afterwards later threads may read all of these fields.
+ *        Access to the frame's data is governed by
+ *        ff_thread_progress_report/await().
+ */
+void ff_progress_frame_ref(ProgressFrame *dst, const ProgressFrame *src);
+
+/**
+ * Do nothing if dst and src already refer to the same AVFrame;
+ * otherwise unreference dst and if src is not blank, put a reference
+ * to src's AVFrame in its place (in case src is not blank).
+ */
+void ff_progress_frame_replace(ProgressFrame *dst, const ProgressFrame *src);
+
+#endif /* AVCODEC_PROGRESSFRAME_H */
diff --git a/libavcodec/proresdec.c b/libavcodec/proresdec.c
index 73fbd3458e..ec1d0bd0be 100644
--- a/libavcodec/proresdec.c
+++ b/libavcodec/proresdec.c
@@ -787,8 +787,6 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *frame,
     }
 
     ctx->frame = frame;
-    ctx->frame->pict_type = AV_PICTURE_TYPE_I;
-    ctx->frame->flags |= AV_FRAME_FLAG_KEY;
     ctx->first_field = 1;
 
     buf += 8;
diff --git a/libavcodec/prosumer.c b/libavcodec/prosumer.c
index a1ed6a9e53..1930e3e3e6 100644
--- a/libavcodec/prosumer.c
+++ b/libavcodec/prosumer.c
@@ -194,8 +194,6 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *frame,
         }
     }
 
-    frame->pict_type = AV_PICTURE_TYPE_I;
-    frame->flags |= AV_FRAME_FLAG_KEY;
     *got_frame = 1;
 
     return avpkt->size;
diff --git a/libavcodec/pthread.c b/libavcodec/pthread.c
index d32e56de0d..ca84b81391 100644
--- a/libavcodec/pthread.c
+++ b/libavcodec/pthread.c
@@ -32,7 +32,6 @@
 #include "libavutil/thread.h"
 
 #include "avcodec.h"
-#include "avcodec_internal.h"
 #include "codec_internal.h"
 #include "pthread_internal.h"
 #include "thread.h"
diff --git a/libavcodec/pthread_frame.c b/libavcodec/pthread_frame.c
index 6e6d9d67df..982e4a64c5 100644
--- a/libavcodec/pthread_frame.c
+++ b/libavcodec/pthread_frame.c
@@ -22,19 +22,16 @@
  * @see doc/multithreading.txt
  */
 
-#include "config.h"
-
 #include <stdatomic.h>
-#include <stdint.h>
 
 #include "avcodec.h"
 #include "avcodec_internal.h"
+#include "codec_desc.h"
 #include "codec_internal.h"
 #include "decode.h"
 #include "hwaccel_internal.h"
 #include "hwconfig.h"
 #include "internal.h"
-#include "packet_internal.h"
 #include "pthread_internal.h"
 #include "refstruct.h"
 #include "thread.h"
@@ -67,12 +64,6 @@ enum {
     INITIALIZED,    ///< Thread has been properly set up
 };
 
-typedef struct DecodedFrames {
-    AVFrame  **f;
-    size_t  nb_f;
-    size_t  nb_f_allocated;
-} DecodedFrames;
-
 typedef struct ThreadFrameProgress {
     atomic_int progress[2];
 } ThreadFrameProgress;
@@ -97,10 +88,8 @@ typedef struct PerThreadContext {
 
     AVPacket       *avpkt;          ///< Input packet (for decoding) or output (for encoding).
 
-    /**
-     * Decoded frames from a single decode iteration.
-     */
-    DecodedFrames df;
+    AVFrame *frame;                 ///< Output frame (for decoding) or input (for encoding).
+    int     got_frame;              ///< The output of got_picture_ptr from the last avcodec_decode_video() call.
     int     result;                 ///< The result of the last codec decode/encode() call.
 
     atomic_int state;
@@ -117,6 +106,10 @@ typedef struct PerThreadContext {
     int hwaccel_threadsafe;
 
     atomic_int debug_threads;       ///< Set if the FF_DEBUG_THREADS option is set.
+
+    /// The following two fields have the same semantics as the DecodeContext field
+    int intra_only_flag;
+    enum AVPictureType initial_pict_type;
 } PerThreadContext;
 
 /**
@@ -137,17 +130,14 @@ typedef struct FrameThreadContext {
     pthread_cond_t async_cond;
     int async_lock;
 
-    DecodedFrames df;
-    int result;
-
-    /**
-     * Packet to be submitted to the next thread for decoding.
-     */
-    AVPacket *next_pkt;
-
     int next_decoding;             ///< The next context to submit a packet to.
     int next_finished;             ///< The next context to return output from.
 
+    int delaying;                  /**<
+                                    * Set for the first N packets, where N is the number of threads.
+                                    * While it is set, ff_thread_en/decode_frame won't return any results.
+                                    */
+
     /* hwaccel state for thread-unsafe hwaccels is temporarily stored here in
      * order to transfer its ownership to the next decoding thread without the
      * need for extra synchronization */
@@ -190,52 +180,6 @@ static void thread_set_name(PerThreadContext *p)
     ff_thread_setname(name);
 }
 
-// get a free frame to decode into
-static AVFrame *decoded_frames_get_free(DecodedFrames *df)
-{
-    if (df->nb_f == df->nb_f_allocated) {
-        AVFrame **tmp = av_realloc_array(df->f, df->nb_f + 1,
-                                         sizeof(*df->f));
-        if (!tmp)
-            return NULL;
-        df->f = tmp;
-
-        df->f[df->nb_f] = av_frame_alloc();
-        if (!df->f[df->nb_f])
-            return NULL;
-
-        df->nb_f_allocated++;
-    }
-
-    av_assert0(!df->f[df->nb_f]->buf[0]);
-
-    return df->f[df->nb_f];
-}
-
-static void decoded_frames_pop(DecodedFrames *df, AVFrame *dst)
-{
-    AVFrame *tmp_frame = df->f[0];
-    av_frame_move_ref(dst, tmp_frame);
-    memmove(df->f, df->f + 1, (df->nb_f - 1) * sizeof(*df->f));
-    df->f[--df->nb_f] = tmp_frame;
-}
-
-static void decoded_frames_flush(DecodedFrames *df)
-{
-    for (size_t i = 0; i < df->nb_f; i++)
-        av_frame_unref(df->f[i]);
-    df->nb_f = 0;
-}
-
-static void decoded_frames_free(DecodedFrames *df)
-{
-    for (size_t i = 0; i < df->nb_f_allocated; i++)
-        av_frame_free(&df->f[i]);
-    av_freep(&df->f);
-    df->nb_f           = 0;
-    df->nb_f_allocated = 0;
-}
-
 /**
  * Codec worker thread.
  *
@@ -253,8 +197,6 @@ static attribute_align_arg void *frame_worker_thread(void *arg)
 
     pthread_mutex_lock(&p->mutex);
     while (1) {
-        int ret;
-
         while (atomic_load(&p->state) == STATE_INPUT_READY && !p->die)
             pthread_cond_wait(&p->input_cond, &p->mutex);
 
@@ -278,31 +220,18 @@ static attribute_align_arg void *frame_worker_thread(void *arg)
             p->hwaccel_serializing = 1;
         }
 
-        ret = 0;
-        while (ret >= 0) {
-            AVFrame *frame;
-
-            /* get the frame which will store the output */
-            frame = decoded_frames_get_free(&p->df);
-            if (!frame) {
-                p->result = AVERROR(ENOMEM);
-                goto alloc_fail;
-            }
+        av_frame_unref(p->frame);
+        p->got_frame = 0;
+        p->frame->pict_type = p->initial_pict_type;
+        p->frame->flags    |= p->intra_only_flag;
+        p->result = codec->cb.decode(avctx, p->frame, &p->got_frame, p->avpkt);
 
-            /* do the actual decoding */
-            ret = ff_decode_receive_frame_internal(avctx, frame);
-            if (ret == 0)
-                p->df.nb_f++;
-            else if (ret < 0 && frame->buf[0])
-                av_frame_unref(frame);
-
-            p->result = (ret == AVERROR(EAGAIN)) ? 0 : ret;
-        }
+        if ((p->result < 0 || !p->got_frame) && p->frame->buf[0])
+            av_frame_unref(p->frame);
 
         if (atomic_load(&p->state) == STATE_SETTING_UP)
             ff_thread_finish_setup(avctx);
 
-alloc_fail:
         if (p->hwaccel_serializing) {
             /* wipe hwaccel state for thread-unsafe hwaccels to avoid stale
              * pointers lying around;
@@ -497,20 +426,17 @@ static int update_context_from_user(AVCodecContext *dst, const AVCodecContext *s
 }
 
 static int submit_packet(PerThreadContext *p, AVCodecContext *user_avctx,
-                         AVPacket *in_pkt)
+                         AVPacket *avpkt)
 {
     FrameThreadContext *fctx = p->parent;
     PerThreadContext *prev_thread = fctx->prev_thread;
     const AVCodec *codec = p->avctx->codec;
     int ret;
 
-    pthread_mutex_lock(&p->mutex);
-
-    av_packet_unref(p->avpkt);
-    av_packet_move_ref(p->avpkt, in_pkt);
+    if (!avpkt->size && !(codec->capabilities & AV_CODEC_CAP_DELAY))
+        return 0;
 
-    if (AVPACKET_IS_EMPTY(p->avpkt))
-        p->avctx->internal->draining = 1;
+    pthread_mutex_lock(&p->mutex);
 
     ret = update_context_from_user(p->avctx, user_avctx);
     if (ret) {
@@ -522,6 +448,7 @@ static int submit_packet(PerThreadContext *p, AVCodecContext *user_avctx,
                           memory_order_relaxed);
 
     if (prev_thread) {
+        int err;
         if (atomic_load(&prev_thread->state) == STATE_SETTING_UP) {
             pthread_mutex_lock(&prev_thread->progress_mutex);
             while (atomic_load(&prev_thread->state) == STATE_SETTING_UP)
@@ -529,16 +456,10 @@ static int submit_packet(PerThreadContext *p, AVCodecContext *user_avctx,
             pthread_mutex_unlock(&prev_thread->progress_mutex);
         }
 
-        /* codecs without delay might not be prepared to be called repeatedly here during
-         * flushing (vp3/theora), and also don't need to be, since from this point on, they
-         * will always return EOF anyway */
-        if (!p->avctx->internal->draining ||
-            (codec->capabilities & AV_CODEC_CAP_DELAY)) {
-            ret = update_context_from_thread(p->avctx, prev_thread->avctx, 0);
-            if (ret) {
-                pthread_mutex_unlock(&p->mutex);
-                return ret;
-            }
+        err = update_context_from_thread(p->avctx, prev_thread->avctx, 0);
+        if (err) {
+            pthread_mutex_unlock(&p->mutex);
+            return err;
         }
     }
 
@@ -550,47 +471,70 @@ static int submit_packet(PerThreadContext *p, AVCodecContext *user_avctx,
         FFSWAP(void*,            p->avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv);
     }
 
+    av_packet_unref(p->avpkt);
+    ret = av_packet_ref(p->avpkt, avpkt);
+    if (ret < 0) {
+        pthread_mutex_unlock(&p->mutex);
+        av_log(p->avctx, AV_LOG_ERROR, "av_packet_ref() failed in submit_packet()\n");
+        return ret;
+    }
+
     atomic_store(&p->state, STATE_SETTING_UP);
     pthread_cond_signal(&p->input_cond);
     pthread_mutex_unlock(&p->mutex);
 
     fctx->prev_thread = p;
-    fctx->next_decoding = (fctx->next_decoding + 1) % p->avctx->thread_count;
+    fctx->next_decoding++;
 
     return 0;
 }
 
-int ff_thread_receive_frame(AVCodecContext *avctx, AVFrame *frame)
+int ff_thread_decode_frame(AVCodecContext *avctx,
+                           AVFrame *picture, int *got_picture_ptr,
+                           AVPacket *avpkt)
 {
     FrameThreadContext *fctx = avctx->internal->thread_ctx;
-    int ret = 0;
+    int finished = fctx->next_finished;
+    PerThreadContext *p;
+    int err;
 
     /* release the async lock, permitting blocked hwaccel threads to
      * go forward while we are in this function */
     async_unlock(fctx);
 
-    /* submit packets to threads while there are no buffered results to return */
-    while (!fctx->df.nb_f && !fctx->result) {
-        PerThreadContext *p;
+    /*
+     * Submit a packet to the next decoding thread.
+     */
 
-        /* get a packet to be submitted to the next thread */
-        av_packet_unref(fctx->next_pkt);
-        ret = ff_decode_get_packet(avctx, fctx->next_pkt);
-        if (ret < 0 && ret != AVERROR_EOF)
-            goto finish;
+    p = &fctx->threads[fctx->next_decoding];
+    err = submit_packet(p, avctx, avpkt);
+    if (err)
+        goto finish;
 
-        ret = submit_packet(&fctx->threads[fctx->next_decoding], avctx,
-                            fctx->next_pkt);
-        if (ret < 0)
-             goto finish;
+    /*
+     * If we're still receiving the initial packets, don't return a frame.
+     */
 
-        /* do not return any frames until all threads have something to do */
-        if (fctx->next_decoding != fctx->next_finished &&
-            !avctx->internal->draining)
-            continue;
+    if (fctx->next_decoding > (avctx->thread_count-1-(avctx->codec_id == AV_CODEC_ID_FFV1)))
+        fctx->delaying = 0;
 
-        p                   = &fctx->threads[fctx->next_finished];
-        fctx->next_finished = (fctx->next_finished + 1) % avctx->thread_count;
+    if (fctx->delaying) {
+        *got_picture_ptr=0;
+        if (avpkt->size) {
+            err = avpkt->size;
+            goto finish;
+        }
+    }
+
+    /*
+     * Return the next available frame from the oldest thread.
+     * If we're at the end of the stream, then we have to skip threads that
+     * didn't output a frame/error, because we don't want to accidentally signal
+     * EOF (avpkt->size == 0 && *got_picture_ptr == 0 && err >= 0).
+     */
+
+    do {
+        p = &fctx->threads[finished++];
 
         if (atomic_load(&p->state) != STATE_INPUT_READY) {
             pthread_mutex_lock(&p->progress_mutex);
@@ -599,26 +543,35 @@ int ff_thread_receive_frame(AVCodecContext *avctx, AVFrame *frame)
             pthread_mutex_unlock(&p->progress_mutex);
         }
 
-        update_context_from_thread(avctx, p->avctx, 1);
-        fctx->result = p->result;
-        p->result    = 0;
-        if (p->df.nb_f)
-            FFSWAP(DecodedFrames, fctx->df, p->df);
-    }
+        av_frame_move_ref(picture, p->frame);
+        *got_picture_ptr = p->got_frame;
+        picture->pkt_dts = p->avpkt->dts;
+        err = p->result;
+
+        /*
+         * A later call with avkpt->size == 0 may loop over all threads,
+         * including this one, searching for a frame/error to return before being
+         * stopped by the "finished != fctx->next_finished" condition.
+         * Make sure we don't mistakenly return the same frame/error again.
+         */
+        p->got_frame = 0;
+        p->result = 0;
 
-    /* a thread may return multiple frames AND an error
-     * we first return all the frames, then the error */
-    if (fctx->df.nb_f) {
-        decoded_frames_pop(&fctx->df, frame);
-        ret = 0;
-    } else {
-        ret = fctx->result;
-        fctx->result = 0;
-    }
+        if (finished >= avctx->thread_count) finished = 0;
+    } while (!avpkt->size && !*got_picture_ptr && err >= 0 && finished != fctx->next_finished);
+
+    update_context_from_thread(avctx, p->avctx, 1);
+
+    if (fctx->next_decoding >= avctx->thread_count) fctx->next_decoding = 0;
+
+    fctx->next_finished = finished;
 
+    /* return the size of the consumed packet if no error occurred */
+    if (err >= 0)
+        err = avpkt->size;
 finish:
     async_lock(fctx);
-    return ret;
+    return err;
 }
 
 void ff_thread_report_progress(ThreadFrame *f, int n, int field)
@@ -726,6 +679,7 @@ static void park_frame_worker_threads(FrameThreadContext *fctx, int thread_count
                 pthread_cond_wait(&p->output_cond, &p->progress_mutex);
             pthread_mutex_unlock(&p->progress_mutex);
         }
+        p->got_frame = 0;
     }
 
     async_lock(fctx);
@@ -778,13 +732,14 @@ void ff_frame_thread_free(AVCodecContext *avctx, int thread_count)
             }
 
             ff_refstruct_unref(&ctx->internal->pool);
-            av_packet_free(&ctx->internal->in_pkt);
             av_packet_free(&ctx->internal->last_pkt_props);
             av_freep(&ctx->internal);
             av_buffer_unref(&ctx->hw_frames_ctx);
+            av_frame_side_data_free(&ctx->decoded_side_data,
+                                    &ctx->nb_decoded_side_data);
         }
 
-        decoded_frames_free(&p->df);
+        av_frame_free(&p->frame);
 
         ff_pthread_free(p, per_thread_offsets);
         av_packet_free(&p->avpkt);
@@ -792,9 +747,6 @@ void ff_frame_thread_free(AVCodecContext *avctx, int thread_count)
         av_freep(&p->avctx);
     }
 
-    decoded_frames_free(&fctx->df);
-    av_packet_free(&fctx->next_pkt);
-
     av_freep(&fctx->threads);
     ff_pthread_free(fctx, thread_ctx_offsets);
 
@@ -815,12 +767,21 @@ static av_cold int init_thread(PerThreadContext *p, int *threads_to_free,
     AVCodecContext *copy;
     int err;
 
+    p->initial_pict_type = AV_PICTURE_TYPE_NONE;
+    if (avctx->codec_descriptor->props & AV_CODEC_PROP_INTRA_ONLY) {
+        p->intra_only_flag = AV_FRAME_FLAG_KEY;
+        if (avctx->codec_type == AVMEDIA_TYPE_VIDEO)
+            p->initial_pict_type = AV_PICTURE_TYPE_I;
+    }
+
     atomic_init(&p->state, STATE_INPUT_READY);
 
     copy = av_memdup(avctx, sizeof(*avctx));
     if (!copy)
         return AVERROR(ENOMEM);
     copy->priv_data = NULL;
+    copy->decoded_side_data = NULL;
+    copy->nb_decoded_side_data = 0;
 
     /* From now on, this PerThreadContext will be cleaned up by
      * ff_frame_thread_free in case of errors. */
@@ -833,6 +794,7 @@ static av_cold int init_thread(PerThreadContext *p, int *threads_to_free,
     if (!copy->internal)
         return AVERROR(ENOMEM);
     copy->internal->thread_ctx = p;
+    copy->internal->progress_frame_pool = avctx->internal->progress_frame_pool;
 
     copy->delay = avctx->delay;
 
@@ -853,17 +815,13 @@ static av_cold int init_thread(PerThreadContext *p, int *threads_to_free,
     if (err < 0)
         return err;
 
-    if (!(p->avpkt = av_packet_alloc()))
+    if (!(p->frame = av_frame_alloc()) ||
+        !(p->avpkt = av_packet_alloc()))
         return AVERROR(ENOMEM);
 
-    copy->internal->is_frame_mt = 1;
     if (!first)
         copy->internal->is_copy = 1;
 
-    copy->internal->in_pkt = av_packet_alloc();
-    if (!copy->internal->in_pkt)
-        return AVERROR(ENOMEM);
-
     copy->internal->last_pkt_props = av_packet_alloc();
     if (!copy->internal->last_pkt_props)
         return AVERROR(ENOMEM);
@@ -878,9 +836,19 @@ static av_cold int init_thread(PerThreadContext *p, int *threads_to_free,
     }
     p->thread_init = NEEDS_CLOSE;
 
-    if (first)
+    if (first) {
         update_context_from_thread(avctx, copy, 1);
 
+        av_frame_side_data_free(&avctx->decoded_side_data, &avctx->nb_decoded_side_data);
+        for (int i = 0; i < copy->nb_decoded_side_data; i++) {
+            err = av_frame_side_data_clone(&avctx->decoded_side_data,
+                                           &avctx->nb_decoded_side_data,
+                                           copy->decoded_side_data[i], 0);
+            if (err < 0)
+                return err;
+        }
+    }
+
     atomic_init(&p->debug_threads, (copy->debug & FF_DEBUG_THREADS) != 0);
 
     err = AVERROR(pthread_create(&p->thread, NULL, frame_worker_thread, p));
@@ -923,11 +891,8 @@ int ff_frame_thread_init(AVCodecContext *avctx)
         return err;
     }
 
-    fctx->next_pkt = av_packet_alloc();
-    if (!fctx->next_pkt)
-        return AVERROR(ENOMEM);
-
     fctx->async_lock = 1;
+    fctx->delaying = 1;
 
     if (codec->p.type == AVMEDIA_TYPE_VIDEO)
         avctx->delay = avctx->thread_count - 1;
@@ -968,18 +933,17 @@ void ff_thread_flush(AVCodecContext *avctx)
     }
 
     fctx->next_decoding = fctx->next_finished = 0;
+    fctx->delaying = 1;
     fctx->prev_thread = NULL;
-
-    decoded_frames_flush(&fctx->df);
-    fctx->result = 0;
-
     for (i = 0; i < avctx->thread_count; i++) {
         PerThreadContext *p = &fctx->threads[i];
-
-        decoded_frames_flush(&p->df);
+        // Make sure decode flush calls with size=0 won't return old frames
+        p->got_frame = 0;
+        av_frame_unref(p->frame);
         p->result = 0;
 
-        avcodec_flush_buffers(p->avctx);
+        if (ffcodec(avctx->codec)->flush)
+            ffcodec(avctx->codec)->flush(p->avctx);
     }
 }
 
@@ -1035,19 +999,17 @@ int ff_thread_get_ext_buffer(AVCodecContext *avctx, ThreadFrame *f, int flags)
     /* Hint: It is possible for this function to be called with codecs
      * that don't support frame threading at all, namely in case
      * a frame-threaded decoder shares code with codecs that are not.
-     * This currently affects non-MPEG-4 mpegvideo codecs and and VP7.
+     * This currently affects non-MPEG-4 mpegvideo codecs.
      * The following check will always be true for them. */
     if (!(avctx->active_thread_type & FF_THREAD_FRAME))
         return ff_get_buffer(avctx, f->f, flags);
 
-    if (ffcodec(avctx->codec)->caps_internal & FF_CODEC_CAP_ALLOCATE_PROGRESS) {
-        f->progress = ff_refstruct_allocz(sizeof(*f->progress));
-        if (!f->progress)
-            return AVERROR(ENOMEM);
+    f->progress = ff_refstruct_allocz(sizeof(*f->progress));
+    if (!f->progress)
+        return AVERROR(ENOMEM);
 
-        atomic_init(&f->progress->progress[0], -1);
-        atomic_init(&f->progress->progress[1], -1);
-    }
+    atomic_init(&f->progress->progress[0], -1);
+    atomic_init(&f->progress->progress[1], -1);
 
     ret = ff_thread_get_buffer(avctx, f->f, flags);
     if (ret)
@@ -1063,14 +1025,22 @@ void ff_thread_release_ext_buffer(ThreadFrame *f)
         av_frame_unref(f->f);
 }
 
-int ff_thread_get_packet(AVCodecContext *avctx, AVPacket *pkt)
+enum ThreadingStatus ff_thread_sync_ref(AVCodecContext *avctx, size_t offset)
 {
-    PerThreadContext *p = avctx->internal->thread_ctx;
+    PerThreadContext *p;
+    const void *ref;
 
-    if (!AVPACKET_IS_EMPTY(p->avpkt)) {
-        av_packet_move_ref(pkt, p->avpkt);
-        return 0;
-    }
+    if (!avctx->internal->is_copy)
+        return avctx->active_thread_type & FF_THREAD_FRAME ?
+                  FF_THREAD_IS_FIRST_THREAD : FF_THREAD_NO_FRAME_THREADING;
+
+    p = avctx->internal->thread_ctx;
+
+    av_assert1(memcpy(&ref, (char*)avctx->priv_data + offset, sizeof(ref)) && ref == NULL);
+
+    memcpy(&ref, (const char*)p->parent->threads[0].avctx->priv_data + offset, sizeof(ref));
+    av_assert1(ref);
+    ff_refstruct_replace((char*)avctx->priv_data + offset, ref);
 
-    return avctx->internal->draining ? AVERROR_EOF : AVERROR(EAGAIN);
+    return FF_THREAD_IS_COPY;
 }
diff --git a/libavcodec/qdrw.c b/libavcodec/qdrw.c
index 21a53b8e72..ca38f48bd9 100644
--- a/libavcodec/qdrw.c
+++ b/libavcodec/qdrw.c
@@ -506,9 +506,6 @@ FF_ENABLE_DEPRECATION_WARNINGS
     }
 
     if (*got_frame) {
-        p->pict_type = AV_PICTURE_TYPE_I;
-        p->flags |= AV_FRAME_FLAG_KEY;
-
         return avpkt->size;
     } else {
         av_log(avctx, AV_LOG_ERROR, "Frame contained no usable data\n");
diff --git a/libavcodec/qoidec.c b/libavcodec/qoidec.c
index 37bc2084c0..bb48fa23cf 100644
--- a/libavcodec/qoidec.c
+++ b/libavcodec/qoidec.c
@@ -106,9 +106,6 @@ static int qoi_decode_frame(AVCodecContext *avctx, AVFrame *p,
         memcpy(&dst[off_x * channels], px, channels);
     }
 
-    p->flags |= AV_FRAME_FLAG_KEY;
-    p->pict_type = AV_PICTURE_TYPE_I;
-
     *got_frame   = 1;
 
     return avpkt->size;
diff --git a/libavcodec/qsv.c b/libavcodec/qsv.c
index d9c81b7158..6bbfe2a5a9 100644
--- a/libavcodec/qsv.c
+++ b/libavcodec/qsv.c
@@ -35,6 +35,7 @@
 
 #include "avcodec.h"
 #include "qsv_internal.h"
+#include "refstruct.h"
 
 #define MFX_IMPL_VIA_MASK(impl) (0x0f00 & (impl))
 #define QSV_HAVE_USER_PLUGIN    !QSV_ONEVPL
@@ -740,20 +741,19 @@ int ff_qsv_init_internal_session(AVCodecContext *avctx, QSVSession *qs,
     return 0;
 }
 
-static void mids_buf_free(void *opaque, uint8_t *data)
+static void mids_buf_free(FFRefStructOpaque opaque, void *obj)
 {
-    AVBufferRef *hw_frames_ref = opaque;
+    AVBufferRef *hw_frames_ref = opaque.nc;
     av_buffer_unref(&hw_frames_ref);
-    av_freep(&data);
 }
 
-static AVBufferRef *qsv_create_mids(AVBufferRef *hw_frames_ref)
+static QSVMid *qsv_create_mids(AVBufferRef *hw_frames_ref)
 {
     AVHWFramesContext    *frames_ctx = (AVHWFramesContext*)hw_frames_ref->data;
     AVQSVFramesContext *frames_hwctx = frames_ctx->hwctx;
     int                  nb_surfaces = frames_hwctx->nb_surfaces;
 
-    AVBufferRef *mids_buf, *hw_frames_ref1;
+    AVBufferRef *hw_frames_ref1;
     QSVMid *mids;
     int i;
 
@@ -761,35 +761,27 @@ static AVBufferRef *qsv_create_mids(AVBufferRef *hw_frames_ref)
     if (!hw_frames_ref1)
         return NULL;
 
-    mids = av_calloc(nb_surfaces, sizeof(*mids));
+    mids = ff_refstruct_alloc_ext(nb_surfaces * sizeof(*mids), 0,
+                                  hw_frames_ref1, mids_buf_free);
     if (!mids) {
         av_buffer_unref(&hw_frames_ref1);
         return NULL;
     }
 
-    mids_buf = av_buffer_create((uint8_t*)mids, nb_surfaces * sizeof(*mids),
-                                mids_buf_free, hw_frames_ref1, 0);
-    if (!mids_buf) {
-        av_buffer_unref(&hw_frames_ref1);
-        av_freep(&mids);
-        return NULL;
-    }
-
     for (i = 0; i < nb_surfaces; i++) {
         QSVMid *mid = &mids[i];
         mid->handle_pair   = (mfxHDLPair*)frames_hwctx->surfaces[i].Data.MemId;
         mid->hw_frames_ref = hw_frames_ref1;
     }
 
-    return mids_buf;
+    return mids;
 }
 
 static int qsv_setup_mids(mfxFrameAllocResponse *resp, AVBufferRef *hw_frames_ref,
-                          AVBufferRef *mids_buf)
+                          QSVMid *mids)
 {
     AVHWFramesContext    *frames_ctx = (AVHWFramesContext*)hw_frames_ref->data;
     AVQSVFramesContext *frames_hwctx = frames_ctx->hwctx;
-    QSVMid                     *mids = (QSVMid*)mids_buf->data;
     int                  nb_surfaces = frames_hwctx->nb_surfaces;
     int i;
 
@@ -810,12 +802,7 @@ static int qsv_setup_mids(mfxFrameAllocResponse *resp, AVBufferRef *hw_frames_re
         return AVERROR(ENOMEM);
     }
 
-    resp->mids[resp->NumFrameActual + 1] = av_buffer_ref(mids_buf);
-    if (!resp->mids[resp->NumFrameActual + 1]) {
-        av_buffer_unref((AVBufferRef**)&resp->mids[resp->NumFrameActual]);
-        av_freep(&resp->mids);
-        return AVERROR(ENOMEM);
-    }
+    resp->mids[resp->NumFrameActual + 1] = ff_refstruct_ref(mids);
 
     return 0;
 }
@@ -838,8 +825,16 @@ static mfxStatus qsv_frame_alloc(mfxHDL pthis, mfxFrameAllocRequest *req,
         AVHWFramesContext *frames_ctx = (AVHWFramesContext*)ctx->hw_frames_ctx->data;
         AVQSVFramesContext *frames_hwctx = frames_ctx->hwctx;
         mfxFrameInfo      *i  = &req->Info;
-        mfxFrameInfo      *i1 = &frames_hwctx->surfaces[0].Info;
+        mfxFrameInfo      *i1;
 
+        if (!frames_hwctx->nb_surfaces) {
+            av_log(ctx->logctx, AV_LOG_DEBUG,
+                   "Dynamic frame pools, no frame is pre-allocated\n");
+
+            return MFX_ERR_NONE;
+        }
+
+        i1 = &frames_hwctx->surfaces[0].Info;
         if (i->Width  > i1->Width  || i->Height > i1->Height ||
             i->FourCC != i1->FourCC || i->ChromaFormat != i1->ChromaFormat) {
             av_log(ctx->logctx, AV_LOG_ERROR, "Mismatching surface properties in an "
@@ -849,7 +844,7 @@ static mfxStatus qsv_frame_alloc(mfxHDL pthis, mfxFrameAllocRequest *req,
             return MFX_ERR_UNSUPPORTED;
         }
 
-        ret = qsv_setup_mids(resp, ctx->hw_frames_ctx, ctx->mids_buf);
+        ret = qsv_setup_mids(resp, ctx->hw_frames_ctx, ctx->mids);
         if (ret < 0) {
             av_log(ctx->logctx, AV_LOG_ERROR,
                    "Error filling an external frame allocation request\n");
@@ -858,12 +853,17 @@ static mfxStatus qsv_frame_alloc(mfxHDL pthis, mfxFrameAllocRequest *req,
     } else if (req->Type & MFX_MEMTYPE_INTERNAL_FRAME) {
         /* internal frames -- allocate a new hw frames context */
         AVHWFramesContext *ext_frames_ctx = (AVHWFramesContext*)ctx->hw_frames_ctx->data;
+        AVQSVFramesContext *ext_frames_hwctx = ext_frames_ctx->hwctx;
         mfxFrameInfo      *i  = &req->Info;
 
-        AVBufferRef *frames_ref, *mids_buf;
+        AVBufferRef *frames_ref;
+        QSVMid *mids;
         AVHWFramesContext *frames_ctx;
         AVQSVFramesContext *frames_hwctx;
 
+        if (!ext_frames_hwctx->nb_surfaces)
+            return MFX_ERR_UNSUPPORTED;
+
         frames_ref = av_hwframe_ctx_alloc(ext_frames_ctx->device_ref);
         if (!frames_ref)
             return MFX_ERR_MEMORY_ALLOC;
@@ -888,14 +888,14 @@ static mfxStatus qsv_frame_alloc(mfxHDL pthis, mfxFrameAllocRequest *req,
             return MFX_ERR_MEMORY_ALLOC;
         }
 
-        mids_buf = qsv_create_mids(frames_ref);
-        if (!mids_buf) {
+        mids = qsv_create_mids(frames_ref);
+        if (!mids) {
             av_buffer_unref(&frames_ref);
             return MFX_ERR_MEMORY_ALLOC;
         }
 
-        ret = qsv_setup_mids(resp, frames_ref, mids_buf);
-        av_buffer_unref(&mids_buf);
+        ret = qsv_setup_mids(resp, frames_ref, mids);
+        ff_refstruct_unref(&mids);
         av_buffer_unref(&frames_ref);
         if (ret < 0) {
             av_log(ctx->logctx, AV_LOG_ERROR,
@@ -911,19 +911,31 @@ static mfxStatus qsv_frame_alloc(mfxHDL pthis, mfxFrameAllocRequest *req,
 
 static mfxStatus qsv_frame_free(mfxHDL pthis, mfxFrameAllocResponse *resp)
 {
+    if (!resp->mids)
+        return MFX_ERR_NONE;
+
     av_buffer_unref((AVBufferRef**)&resp->mids[resp->NumFrameActual]);
-    av_buffer_unref((AVBufferRef**)&resp->mids[resp->NumFrameActual + 1]);
+    ff_refstruct_unref(&resp->mids[resp->NumFrameActual + 1]);
     av_freep(&resp->mids);
     return MFX_ERR_NONE;
 }
 
 static mfxStatus qsv_frame_lock(mfxHDL pthis, mfxMemId mid, mfxFrameData *ptr)
 {
-    QSVMid *qsv_mid = mid;
-    AVHWFramesContext *hw_frames_ctx = (AVHWFramesContext*)qsv_mid->hw_frames_ref->data;
-    AVQSVFramesContext *hw_frames_hwctx = hw_frames_ctx->hwctx;
+    QSVFramesContext *ctx = (QSVFramesContext *)pthis;
+    AVHWFramesContext *frames_ctx = (AVHWFramesContext*)ctx->hw_frames_ctx->data;
+    AVQSVFramesContext *frames_hwctx = frames_ctx->hwctx;
+    QSVMid *qsv_mid;
+    AVHWFramesContext *hw_frames_ctx;
+    AVQSVFramesContext *hw_frames_hwctx;
     int ret;
 
+    if (!frames_hwctx->nb_surfaces)
+        return MFX_ERR_UNSUPPORTED;
+
+    qsv_mid = mid;
+    hw_frames_ctx = (AVHWFramesContext*)qsv_mid->hw_frames_ref->data;
+    hw_frames_hwctx = hw_frames_ctx->hwctx;
     if (qsv_mid->locked_frame)
         return MFX_ERR_UNDEFINED_BEHAVIOR;
 
@@ -976,8 +988,15 @@ fail:
 
 static mfxStatus qsv_frame_unlock(mfxHDL pthis, mfxMemId mid, mfxFrameData *ptr)
 {
-    QSVMid *qsv_mid = mid;
+    QSVFramesContext *ctx = (QSVFramesContext *)pthis;
+    AVHWFramesContext *frames_ctx = (AVHWFramesContext*)ctx->hw_frames_ctx->data;
+    AVQSVFramesContext *frames_hwctx = frames_ctx->hwctx;
+    QSVMid *qsv_mid;
+
+    if (!frames_hwctx->nb_surfaces)
+        return MFX_ERR_UNSUPPORTED;
 
+    qsv_mid = mid;
     av_frame_free(&qsv_mid->locked_frame);
     av_frame_free(&qsv_mid->hw_frame);
 
@@ -986,9 +1005,18 @@ static mfxStatus qsv_frame_unlock(mfxHDL pthis, mfxMemId mid, mfxFrameData *ptr)
 
 static mfxStatus qsv_frame_get_hdl(mfxHDL pthis, mfxMemId mid, mfxHDL *hdl)
 {
-    QSVMid *qsv_mid = (QSVMid*)mid;
+    QSVFramesContext *ctx = (QSVFramesContext *)pthis;
+    AVHWFramesContext *frames_ctx = (AVHWFramesContext*)ctx->hw_frames_ctx->data;
+    AVQSVFramesContext *frames_hwctx = frames_ctx->hwctx;
     mfxHDLPair *pair_dst = (mfxHDLPair*)hdl;
-    mfxHDLPair *pair_src = (mfxHDLPair*)qsv_mid->handle_pair;
+    mfxHDLPair *pair_src;
+
+    if (frames_hwctx->nb_surfaces) {
+        QSVMid *qsv_mid = (QSVMid*)mid;
+        pair_src = (mfxHDLPair*)qsv_mid->handle_pair;
+    } else {
+        pair_src = (mfxHDLPair*)mid;
+    }
 
     pair_dst->first = pair_src->first;
 
@@ -1102,14 +1130,17 @@ int ff_qsv_init_session_frames(AVCodecContext *avctx, mfxSession *psession,
 
     if (!opaque) {
         qsv_frames_ctx->logctx = avctx;
+        qsv_frames_ctx->mids = NULL;
+        qsv_frames_ctx->nb_mids = 0;
 
         /* allocate the memory ids for the external frames */
-        av_buffer_unref(&qsv_frames_ctx->mids_buf);
-        qsv_frames_ctx->mids_buf = qsv_create_mids(qsv_frames_ctx->hw_frames_ctx);
-        if (!qsv_frames_ctx->mids_buf)
-            return AVERROR(ENOMEM);
-        qsv_frames_ctx->mids    = (QSVMid*)qsv_frames_ctx->mids_buf->data;
-        qsv_frames_ctx->nb_mids = frames_hwctx->nb_surfaces;
+        if (frames_hwctx->nb_surfaces) {
+            ff_refstruct_unref(&qsv_frames_ctx->mids);
+            qsv_frames_ctx->mids = qsv_create_mids(qsv_frames_ctx->hw_frames_ctx);
+            if (!qsv_frames_ctx->mids)
+                return AVERROR(ENOMEM);
+            qsv_frames_ctx->nb_mids = frames_hwctx->nb_surfaces;
+        }
 
         err = MFXVideoCORE_SetFrameAllocator(session, &frame_allocator);
         if (err != MFX_ERR_NONE)
diff --git a/libavcodec/qsv_internal.h b/libavcodec/qsv_internal.h
index c2d301b4a2..d970cd20f0 100644
--- a/libavcodec/qsv_internal.h
+++ b/libavcodec/qsv_internal.h
@@ -115,11 +115,12 @@ typedef struct QSVFramesContext {
     AVBufferRef *hw_frames_ctx;
     void *logctx;
 
-    /* The memory ids for the external frames.
-     * Refcounted, since we need one reference owned by the QSVFramesContext
-     * (i.e. by the encoder/decoder) and another one given to the MFX session
-     * from the frame allocator. */
-    AVBufferRef *mids_buf;
+    /**
+     * The memory ids for the external frames.
+     * Refcounted (via the RefStruct API), since we need one reference
+     * owned by the QSVFramesContext (i.e. by the encoder/decoder) and
+     * another one given to the MFX session from the frame allocator.
+     */
     QSVMid *mids;
     int  nb_mids;
 } QSVFramesContext;
diff --git a/libavcodec/qsvdec.c b/libavcodec/qsvdec.c
index fd9267c6f4..df0d49bc10 100644
--- a/libavcodec/qsvdec.c
+++ b/libavcodec/qsvdec.c
@@ -42,6 +42,7 @@
 #include "libavutil/imgutils.h"
 #include "libavutil/film_grain_params.h"
 #include "libavutil/mastering_display_metadata.h"
+#include "libavutil/avassert.h"
 
 #include "avcodec.h"
 #include "codec_internal.h"
@@ -50,6 +51,7 @@
 #include "hwconfig.h"
 #include "qsv.h"
 #include "qsv_internal.h"
+#include "refstruct.h"
 
 #if QSV_ONEVPL
 #include <mfxdispatcher.h>
@@ -67,6 +69,8 @@ static const AVRational mfx_tb = { 1, 90000 };
     AV_NOPTS_VALUE : pts_tb.num ? \
     av_rescale_q(mfx_pts, mfx_tb, pts_tb) : mfx_pts)
 
+#define MFX_IMPL_VIA_MASK(impl) (0x0f00 & (impl))
+
 typedef struct QSVAsyncFrame {
     mfxSyncPoint *sync;
     QSVFrame     *frame;
@@ -76,6 +80,7 @@ typedef struct QSVContext {
     // the session used for decoding
     mfxSession session;
     mfxVersion ver;
+    mfxHandleType handle_type;
 
     // the session we allocated internally, in case the caller did not provide
     // one
@@ -182,6 +187,7 @@ static int qsv_init_session(AVCodecContext *avctx, QSVContext *q, mfxSession ses
                             AVBufferRef *hw_frames_ref, AVBufferRef *hw_device_ref)
 {
     int ret;
+    mfxIMPL impl;
 
     if (q->gpu_copy == MFX_GPUCOPY_ON &&
         !(q->iopattern & MFX_IOPATTERN_OUT_SYSTEM_MEMORY)) {
@@ -239,27 +245,52 @@ static int qsv_init_session(AVCodecContext *avctx, QSVContext *q, mfxSession ses
         q->session = q->internal_qs.session;
     }
 
-    if (MFXQueryVersion(q->session, &q->ver) != MFX_ERR_NONE) {
-        av_log(avctx, AV_LOG_ERROR, "Error querying the session version. \n");
-        q->session = NULL;
+    if (MFXQueryIMPL(q->session, &impl) == MFX_ERR_NONE) {
+        switch (MFX_IMPL_VIA_MASK(impl)) {
+        case MFX_IMPL_VIA_VAAPI:
+            q->handle_type = MFX_HANDLE_VA_DISPLAY;
+            break;
 
-        if (q->internal_qs.session) {
-            MFXClose(q->internal_qs.session);
-            q->internal_qs.session = NULL;
-        }
+        case MFX_IMPL_VIA_D3D11:
+            q->handle_type = MFX_HANDLE_D3D11_DEVICE;
+            break;
 
-        if (q->internal_qs.loader) {
-            MFXUnload(q->internal_qs.loader);
-            q->internal_qs.loader = NULL;
+        case MFX_IMPL_VIA_D3D9:
+            q->handle_type = MFX_HANDLE_D3D9_DEVICE_MANAGER;
+            break;
+
+        default:
+            av_assert0(!"should not reach here");
         }
+    } else {
+        av_log(avctx, AV_LOG_ERROR, "Error querying the implementation. \n");
+        goto fail;
+    }
 
-        return AVERROR_EXTERNAL;
+    if (MFXQueryVersion(q->session, &q->ver) != MFX_ERR_NONE) {
+        av_log(avctx, AV_LOG_ERROR, "Error querying the session version. \n");
+        goto fail;
     }
 
     /* make sure the decoder is uninitialized */
     MFXVideoDECODE_Close(q->session);
 
     return 0;
+
+fail:
+    q->session = NULL;
+
+    if (q->internal_qs.session) {
+        MFXClose(q->internal_qs.session);
+        q->internal_qs.session = NULL;
+    }
+
+    if (q->internal_qs.loader) {
+        MFXUnload(q->internal_qs.loader);
+        q->internal_qs.loader = NULL;
+    }
+
+    return AVERROR_EXTERNAL;
 }
 
 static int qsv_decode_preinit(AVCodecContext *avctx, QSVContext *q, enum AVPixelFormat pix_fmt, mfxVideoParam *param)
@@ -309,7 +340,10 @@ static int qsv_decode_preinit(AVCodecContext *avctx, QSVContext *q, enum AVPixel
         hwframes_ctx->height            = FFALIGN(avctx->coded_height, 32);
         hwframes_ctx->format            = AV_PIX_FMT_QSV;
         hwframes_ctx->sw_format         = avctx->sw_pix_fmt;
-        hwframes_ctx->initial_pool_size = q->suggest_pool_size + 16 + avctx->extra_hw_frames;
+        if (QSV_RUNTIME_VERSION_ATLEAST(q->ver, 2, 9) && q->handle_type != MFX_HANDLE_D3D9_DEVICE_MANAGER)
+            hwframes_ctx->initial_pool_size = 0;
+        else
+            hwframes_ctx->initial_pool_size = q->suggest_pool_size + 16 + avctx->extra_hw_frames;
         frames_hwctx->frame_type        = MFX_MEMTYPE_VIDEO_MEMORY_DECODER_TARGET;
 
         ret = av_hwframe_ctx_init(avctx->hw_frames_ctx);
@@ -378,9 +412,12 @@ static int qsv_decode_init_context(AVCodecContext *avctx, QSVContext *q, mfxVide
 
     q->frame_info = param->mfx.FrameInfo;
 
-    if (!avctx->hw_frames_ctx)
-        q->pool = av_buffer_pool_init(av_image_get_buffer_size(avctx->pix_fmt,
-                    FFALIGN(avctx->width, 128), FFALIGN(avctx->height, 64), 1), av_buffer_allocz);
+    if (!avctx->hw_frames_ctx) {
+        ret = av_image_get_buffer_size(avctx->pix_fmt, FFALIGN(avctx->width, 128), FFALIGN(avctx->height, 64), 1);
+        if (ret < 0)
+            return ret;
+        q->pool = av_buffer_pool_init(ret, av_buffer_allocz);
+    }
     return 0;
 }
 
@@ -440,6 +477,11 @@ static int qsv_decode_header(AVCodecContext *avctx, QSVContext *q,
     param->ExtParam    = q->ext_buffers;
     param->NumExtParam = q->nb_ext_buffers;
 
+    if (param->mfx.FrameInfo.FrameRateExtN == 0 || param->mfx.FrameInfo.FrameRateExtD == 0) {
+        param->mfx.FrameInfo.FrameRateExtN = 25;
+        param->mfx.FrameInfo.FrameRateExtD = 1;
+    }
+
 #if QSV_VERSION_ATLEAST(1, 34)
     if (QSV_RUNTIME_VERSION_ATLEAST(q->ver, 1, 34) && avctx->codec_id == AV_CODEC_ID_AV1)
         param->mfx.FilmGrain = (avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN) ? 0 : param->mfx.FilmGrain;
@@ -885,7 +927,7 @@ static void qsv_decode_close_qsvcontext(QSVContext *q)
     ff_qsv_close_internal_session(&q->internal_qs);
 
     av_buffer_unref(&q->frames_ctx.hw_frames_ctx);
-    av_buffer_unref(&q->frames_ctx.mids_buf);
+    ff_refstruct_unref(&q->frames_ctx.mids);
     av_buffer_pool_uninit(&q->pool);
 }
 
diff --git a/libavcodec/qsvenc.c b/libavcodec/qsvenc.c
index 3a8607fca6..3df355ce78 100644
--- a/libavcodec/qsvenc.c
+++ b/libavcodec/qsvenc.c
@@ -31,6 +31,7 @@
 #include "libavutil/hwcontext_qsv.h"
 #include "libavutil/mem.h"
 #include "libavutil/log.h"
+#include "libavutil/dict.h"
 #include "libavutil/time.h"
 #include "libavutil/imgutils.h"
 
@@ -41,6 +42,7 @@
 #include "qsv.h"
 #include "qsv_internal.h"
 #include "qsvenc.h"
+#include "refstruct.h"
 
 struct profile_names {
     mfxU16 profile;
@@ -743,8 +745,9 @@ static int init_video_param_jpeg(AVCodecContext *avctx, QSVEncContext *q)
     if (avctx->hw_frames_ctx) {
         AVHWFramesContext *frames_ctx    = (AVHWFramesContext *)avctx->hw_frames_ctx->data;
         AVQSVFramesContext *frames_hwctx = frames_ctx->hwctx;
-        q->param.mfx.FrameInfo.Width  = frames_hwctx->surfaces[0].Info.Width;
-        q->param.mfx.FrameInfo.Height = frames_hwctx->surfaces[0].Info.Height;
+        mfxFrameInfo *info = frames_hwctx->nb_surfaces ? &frames_hwctx->surfaces[0].Info : frames_hwctx->info;
+        q->param.mfx.FrameInfo.Width  = info->Width;
+        q->param.mfx.FrameInfo.Height = info->Height;
     }
 
     if (avctx->framerate.den > 0 && avctx->framerate.num > 0) {
@@ -867,8 +870,9 @@ static int init_video_param(AVCodecContext *avctx, QSVEncContext *q)
     if (avctx->hw_frames_ctx) {
         AVHWFramesContext *frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
         AVQSVFramesContext *frames_hwctx = frames_ctx->hwctx;
-        q->param.mfx.FrameInfo.Width  = frames_hwctx->surfaces[0].Info.Width;
-        q->param.mfx.FrameInfo.Height = frames_hwctx->surfaces[0].Info.Height;
+        mfxFrameInfo *info = frames_hwctx->nb_surfaces ? &frames_hwctx->surfaces[0].Info : frames_hwctx->info;
+        q->param.mfx.FrameInfo.Width  = info->Width;
+        q->param.mfx.FrameInfo.Height = info->Height;
     }
 
     if (avctx->framerate.den > 0 && avctx->framerate.num > 0) {
@@ -1633,6 +1637,12 @@ int ff_qsv_enc_init(AVCodecContext *avctx, QSVEncContext *q)
     int iopattern = 0;
     int opaque_alloc = 0;
     int ret;
+    void *tmp;
+#if HAVE_STRUCT_MFXCONFIGINTERFACE
+    mfxExtBuffer ext_buf;
+    mfxConfigInterface *iface = NULL;
+    const AVDictionaryEntry *param = NULL;
+#endif
 
     q->param.AsyncDepth = q->async_depth;
 
@@ -1693,35 +1703,92 @@ int ff_qsv_enc_init(AVCodecContext *avctx, QSVEncContext *q)
     if (ret < 0)
         return ret;
 
+    tmp = av_realloc_array(q->extparam, q->nb_extparam_internal, sizeof(*q->extparam));
+    if (!tmp)
+        return AVERROR(ENOMEM);
+
+    q->extparam = tmp;
+    q->nb_extparam = q->nb_extparam_internal;
+    memcpy(q->extparam, q->extparam_internal, q->nb_extparam * sizeof(*q->extparam));
+
     if (avctx->hwaccel_context) {
         AVQSVContext *qsv = avctx->hwaccel_context;
         int i, j;
 
-        q->extparam = av_calloc(qsv->nb_ext_buffers + q->nb_extparam_internal,
-                                sizeof(*q->extparam));
-        if (!q->extparam)
-            return AVERROR(ENOMEM);
-
-        q->param.ExtParam = q->extparam;
-        for (i = 0; i < qsv->nb_ext_buffers; i++)
-            q->param.ExtParam[i] = qsv->ext_buffers[i];
-        q->param.NumExtParam = qsv->nb_ext_buffers;
-
-        for (i = 0; i < q->nb_extparam_internal; i++) {
-            for (j = 0; j < qsv->nb_ext_buffers; j++) {
-                if (qsv->ext_buffers[j]->BufferId == q->extparam_internal[i]->BufferId)
+        for (i = 0; i < qsv->nb_ext_buffers; i++) {
+            for (j = 0; j < q->nb_extparam_internal; j++) {
+                if (qsv->ext_buffers[i]->BufferId == q->extparam_internal[j]->BufferId) {
+                    q->extparam[j] = qsv->ext_buffers[i];
                     break;
+                }
             }
-            if (j < qsv->nb_ext_buffers)
-                continue;
 
-            q->param.ExtParam[q->param.NumExtParam++] = q->extparam_internal[i];
+            if (j == q->nb_extparam_internal) {
+                tmp = av_realloc_array(q->extparam, q->nb_extparam + 1, sizeof(*q->extparam));
+                if (!tmp)
+                    return AVERROR(ENOMEM);
+
+                q->extparam = tmp;
+                q->extparam[q->nb_extparam++] = qsv->ext_buffers[i];
+            }
         }
-    } else {
-        q->param.ExtParam    = q->extparam_internal;
-        q->param.NumExtParam = q->nb_extparam_internal;
     }
 
+    q->param.ExtParam    = q->extparam;
+    q->param.NumExtParam = q->nb_extparam;
+
+#if HAVE_STRUCT_MFXCONFIGINTERFACE
+    ret = MFXVideoCORE_GetHandle(q->session, MFX_HANDLE_CONFIG_INTERFACE, (mfxHDL *)(&iface));
+    if (ret < 0)
+        return ff_qsv_print_error(avctx, ret,
+                                  "Error getting mfx config interface handle");
+
+    while ((param = av_dict_get(q->qsv_params, "", param, AV_DICT_IGNORE_SUFFIX))) {
+        const char *param_key = param->key;
+        const char *param_value = param->value;
+        mfxExtBuffer *new_ext_buf;
+        void *tmp;
+
+        av_log(avctx, AV_LOG_VERBOSE, "Parameter key: %s, value: %s\n", param_key, param_value);
+
+        // Set encoding parameters using MFXSetParameter
+        for (int i = 0; i < 2; i++) {
+            ret = iface->SetParameter(iface, (mfxU8*)param_key, (mfxU8*)param_value, MFX_STRUCTURE_TYPE_VIDEO_PARAM, &q->param, &ext_buf);
+            if (ret == MFX_ERR_NONE) {
+                break;
+            } else if (i == 0 && ret == MFX_ERR_MORE_EXTBUFFER) {
+                tmp = av_realloc_array(q->extparam_str, q->nb_extparam_str + 1, sizeof(*q->extparam_str));
+                if (!tmp)
+                    return AVERROR(ENOMEM);
+                q->extparam_str = tmp;
+
+                tmp = av_realloc_array(q->extparam, q->nb_extparam + 1, sizeof(*q->extparam));
+                if (!tmp)
+                    return AVERROR(ENOMEM);
+                q->extparam = tmp;
+
+                new_ext_buf = (mfxExtBuffer*)av_mallocz(ext_buf.BufferSz);
+                if (!new_ext_buf)
+                    return AVERROR(ENOMEM);
+
+                new_ext_buf->BufferId = ext_buf.BufferId;
+                new_ext_buf->BufferSz = ext_buf.BufferSz;
+                q->extparam_str[q->nb_extparam_str++] = new_ext_buf;
+                q->extparam[q->nb_extparam++] = new_ext_buf;
+                q->param.ExtParam    = q->extparam;
+                q->param.NumExtParam = q->nb_extparam;
+            } else {
+                av_log(avctx, AV_LOG_ERROR, "Failed to set parameter: %s\n", param_key);
+                return AVERROR_UNKNOWN;
+            }
+       }
+    }
+#else
+    if (q->qsv_params) {
+        av_log(avctx, AV_LOG_WARNING, "MFX string API is not supported, ignore qsv_params option\n");
+    }
+#endif
+
     ret = MFXVideoENCODE_Query(q->session, &q->param, &q->param);
     if (ret == MFX_WRN_PARTIAL_ACCELERATION) {
         av_log(avctx, AV_LOG_WARNING, "Encoder will work with partial HW acceleration\n");
@@ -2649,7 +2716,7 @@ int ff_qsv_enc_close(AVCodecContext *avctx, QSVEncContext *q)
     ff_qsv_close_internal_session(&q->internal_qs);
 
     av_buffer_unref(&q->frames_ctx.hw_frames_ctx);
-    av_buffer_unref(&q->frames_ctx.mids_buf);
+    ff_refstruct_unref(&q->frames_ctx.mids);
 
     cur = q->work_frames;
     while (cur) {
@@ -2681,6 +2748,10 @@ int ff_qsv_enc_close(AVCodecContext *avctx, QSVEncContext *q)
     av_buffer_unref(&q->opaque_alloc_buf);
 #endif
 
+    for (int i = 0; i < q->nb_extparam_str; i++)
+        av_free(q->extparam_str[i]);
+
+    av_freep(&q->extparam_str);
     av_freep(&q->extparam);
 
     return 0;
diff --git a/libavcodec/qsvenc.h b/libavcodec/qsvenc.h
index c71bf2ed50..e3eb083746 100644
--- a/libavcodec/qsvenc.h
+++ b/libavcodec/qsvenc.h
@@ -64,7 +64,8 @@
 { "slower",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_TARGETUSAGE_2  },            INT_MIN, INT_MAX, VE, .unit = "preset" },                                          \
 { "veryslow",    NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_TARGETUSAGE_BEST_QUALITY  }, INT_MIN, INT_MAX, VE, .unit = "preset" },                                          \
 { "forced_idr",     "Forcing I frames as IDR frames",         OFFSET(qsv.forced_idr),     AV_OPT_TYPE_BOOL,{ .i64 = 0  },  0,          1, VE },                         \
-{ "low_power", "enable low power mode(experimental: many limitations by mfx version, BRC modes, etc.)", OFFSET(qsv.low_power), AV_OPT_TYPE_BOOL, { .i64 = -1}, -1, 1, VE},
+{ "low_power", "enable low power mode(experimental: many limitations by mfx version, BRC modes, etc.)", OFFSET(qsv.low_power), AV_OPT_TYPE_BOOL, { .i64 = -1}, -1, 1, VE},\
+{ "qsv_params", "Set QSV encoder parameters as key1=value1:key2=value2:...", OFFSET(qsv.qsv_params), AV_OPT_TYPE_DICT, { 0 }, 0, 0, VE },
 
 #if QSV_HAVE_HE
 #define QSV_HE_OPTIONS \
@@ -195,7 +196,11 @@ typedef struct QSVEncContext {
     mfxExtBuffer  *extparam_internal[5 + (QSV_HAVE_MF * 2) + (QSV_HAVE_EXT_AV1_PARAM * 2) + QSV_HAVE_HE];
     int         nb_extparam_internal;
 
+    mfxExtBuffer  **extparam_str;
+    int         nb_extparam_str;
+
     mfxExtBuffer **extparam;
+    int         nb_extparam;
 
     AVFifo *async_fifo;
 
@@ -314,6 +319,8 @@ typedef struct QSVEncContext {
     int skip_frame;
     // This is used for Hyper Encode
     int dual_gfx;
+
+    AVDictionary *qsv_params;
 } QSVEncContext;
 
 int ff_qsv_enc_init(AVCodecContext *avctx, QSVEncContext *q);
diff --git a/libavcodec/r210dec.c b/libavcodec/r210dec.c
index fe6a025988..7405da68e8 100644
--- a/libavcodec/r210dec.c
+++ b/libavcodec/r210dec.c
@@ -56,8 +56,6 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *pic,
     if ((ret = ff_get_buffer(avctx, pic, 0)) < 0)
         return ret;
 
-    pic->pict_type = AV_PICTURE_TYPE_I;
-    pic->flags |= AV_FRAME_FLAG_KEY;
     g_line = pic->data[0];
     b_line = pic->data[1];
     r_line = pic->data[2];
diff --git a/libavcodec/ratecontrol.c b/libavcodec/ratecontrol.c
index 1ff209c00b..9ee08ecb88 100644
--- a/libavcodec/ratecontrol.c
+++ b/libavcodec/ratecontrol.c
@@ -32,7 +32,6 @@
 
 #include "avcodec.h"
 #include "ratecontrol.h"
-#include "mpegutils.h"
 #include "mpegvideoenc.h"
 #include "libavutil/eval.h"
 
diff --git a/libavcodec/rawdec.c b/libavcodec/rawdec.c
index 8de90e0477..b02edac371 100644
--- a/libavcodec/rawdec.c
+++ b/libavcodec/rawdec.c
@@ -225,9 +225,6 @@ static int raw_decode(AVCodecContext *avctx, AVFrame *frame,
 
     need_copy = !avpkt->buf || context->is_1_2_4_8_bpp || context->is_yuv2 || context->is_lt_16bpp;
 
-    frame->pict_type        = AV_PICTURE_TYPE_I;
-    frame->flags |= AV_FRAME_FLAG_KEY;
-
     res = ff_decode_frame_props(avctx, frame);
     if (res < 0)
         return res;
diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
index 6c2ce3001a..67e198d754 100644
--- a/libavcodec/riscv/Makefile
+++ b/libavcodec/riscv/Makefile
@@ -5,6 +5,7 @@ RVV-OBJS-$(CONFIG_AAC_ENCODER) += riscv/aacencdsp_rvv.o
 OBJS-$(CONFIG_AC3DSP) += riscv/ac3dsp_init.o
 RV-OBJS-$(CONFIG_AC3DSP) += riscv/ac3dsp_rvb.o
 RVV-OBJS-$(CONFIG_AC3DSP) += riscv/ac3dsp_rvv.o
+RVVB-OBJS-$(CONFIG_AC3DSP) += riscv/ac3dsp_rvvb.o
 OBJS-$(CONFIG_ALAC_DECODER) += riscv/alacdsp_init.o
 RVV-OBJS-$(CONFIG_ALAC_DECODER) += riscv/alacdsp_rvv.o
 OBJS-$(CONFIG_AUDIODSP) += riscv/audiodsp_init.o
@@ -27,6 +28,7 @@ OBJS-$(CONFIG_JPEG2000_DECODER) += riscv/jpeg2000dsp_init.o
 RVV-OBJS-$(CONFIG_JPEG2000_DECODER) += riscv/jpeg2000dsp_rvv.o
 OBJS-$(CONFIG_H264CHROMA) += riscv/h264_chroma_init_riscv.o
 RVV-OBJS-$(CONFIG_H264CHROMA) += riscv/h264_mc_chroma.o
+OBJS-$(CONFIG_H264DSP) += riscv/h264dsp_init.o
 OBJS-$(CONFIG_HUFFYUV_DECODER) += riscv/huffyuvdsp_init.o
 RVV-OBJS-$(CONFIG_HUFFYUV_DECODER) += riscv/huffyuvdsp_rvv.o
 OBJS-$(CONFIG_IDCTDSP) += riscv/idctdsp_init.o
@@ -48,6 +50,10 @@ RV-OBJS-$(CONFIG_PIXBLOCKDSP) += riscv/pixblockdsp_rvi.o
 RVV-OBJS-$(CONFIG_PIXBLOCKDSP) += riscv/pixblockdsp_rvv.o
 OBJS-$(CONFIG_RV34DSP) += riscv/rv34dsp_init.o
 RVV-OBJS-$(CONFIG_RV34DSP) += riscv/rv34dsp_rvv.o
+OBJS-$(CONFIG_RV40_DECODER) += riscv/rv40dsp_init.o
+RVV-OBJS-$(CONFIG_RV40_DECODER) += riscv/rv40dsp_rvv.o
+RV-OBJS-$(CONFIG_STARTCODE) += riscv/startcode_rvb.o
+RVV-OBJS-$(CONFIG_STARTCODE) += riscv/startcode_rvv.o
 OBJS-$(CONFIG_SVQ1_ENCODER) += riscv/svqenc_init.o
 RVV-OBJS-$(CONFIG_SVQ1_ENCODER) += riscv/svqenc_rvv.o
 OBJS-$(CONFIG_TAK_DECODER) += riscv/takdsp_init.o
@@ -55,8 +61,15 @@ RVV-OBJS-$(CONFIG_TAK_DECODER) += riscv/takdsp_rvv.o
 OBJS-$(CONFIG_UTVIDEO_DECODER) += riscv/utvideodsp_init.o
 RVV-OBJS-$(CONFIG_UTVIDEO_DECODER) += riscv/utvideodsp_rvv.o
 OBJS-$(CONFIG_VC1DSP) += riscv/vc1dsp_init.o
+RV-OBJS-$(CONFIG_VC1DSP) += riscv/vc1dsp_rvi.o
 RVV-OBJS-$(CONFIG_VC1DSP) += riscv/vc1dsp_rvv.o
 OBJS-$(CONFIG_VP8DSP) += riscv/vp8dsp_init.o
+RV-OBJS-$(CONFIG_VP8DSP) += riscv/vp8dsp_rvi.o
 RVV-OBJS-$(CONFIG_VP8DSP) += riscv/vp8dsp_rvv.o
+OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9dsp_init.o
+RV-OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9_intra_rvi.o \
+                                 riscv/vp9_mc_rvi.o
+RVV-OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9_intra_rvv.o \
+                                  riscv/vp9_mc_rvv.o
 OBJS-$(CONFIG_VORBIS_DECODER) += riscv/vorbisdsp_init.o
 RVV-OBJS-$(CONFIG_VORBIS_DECODER) += riscv/vorbisdsp_rvv.o
diff --git a/libavcodec/riscv/aacpsdsp_rvv.S b/libavcodec/riscv/aacpsdsp_rvv.S
index a79d7d7818..2d6858688a 100644
--- a/libavcodec/riscv/aacpsdsp_rvv.S
+++ b/libavcodec/riscv/aacpsdsp_rvv.S
@@ -207,7 +207,7 @@ func ff_ps_hybrid_synthesis_deint_rvv, zve64x
         ret
 endfunc
 
-func ff_ps_stereo_interpolate_rvv, zve32f
+func ff_ps_stereo_interpolate_rvv, zve32f, zbb
         vsetvli      t0, zero, e32, m2, ta, ma
         vid.v        v24
         flw          ft0,   (a2)
diff --git a/libavcodec/riscv/ac3dsp_init.c b/libavcodec/riscv/ac3dsp_init.c
index b9e14d56ca..f66b6cac57 100644
--- a/libavcodec/riscv/ac3dsp_init.c
+++ b/libavcodec/riscv/ac3dsp_init.c
@@ -26,19 +26,41 @@
 #include "libavutil/cpu.h"
 #include "libavcodec/ac3dsp.h"
 
+void ff_ac3_exponent_min_rvb(uint8_t *exp, int, int);
+void ff_ac3_exponent_min_rvv(uint8_t *exp, int, int);
 void ff_extract_exponents_rvb(uint8_t *exp, int32_t *coef, int nb_coefs);
+void ff_extract_exponents_rvvb(uint8_t *exp, int32_t *coef, int nb_coefs);
 void ff_float_to_fixed24_rvv(int32_t *dst, const float *src, size_t len);
+void ff_sum_square_butterfly_int32_rvv(int64_t *, const int32_t *,
+                                       const int32_t *, int);
+void ff_sum_square_butterfly_float_rvv(float *, const float *,
+                                       const float *, int);
 
 av_cold void ff_ac3dsp_init_riscv(AC3DSPContext *c)
 {
 #if HAVE_RV
     int flags = av_get_cpu_flags();
 
+    if (flags & AV_CPU_FLAG_RVB_BASIC)
+        c->ac3_exponent_min = ff_ac3_exponent_min_rvb;
+    if (flags & AV_CPU_FLAG_RVV_I32)
+        c->ac3_exponent_min = ff_ac3_exponent_min_rvv;
+
     if (flags & AV_CPU_FLAG_RVB_ADDR) {
         if (flags & AV_CPU_FLAG_RVB_BASIC)
             c->extract_exponents = ff_extract_exponents_rvb;
-        if (flags & AV_CPU_FLAG_RVV_F32)
+# if HAVE_RV_ZVBB
+        if (flags & AV_CPU_FLAG_RV_ZVBB)
+            c->extract_exponents = ff_extract_exponents_rvvb;
+# endif
+        if (flags & AV_CPU_FLAG_RVV_F32) {
             c->float_to_fixed24 = ff_float_to_fixed24_rvv;
+            c->sum_square_butterfly_float = ff_sum_square_butterfly_float_rvv;
+        }
+# if __riscv_xlen >= 64
+        if (flags & AV_CPU_FLAG_RVV_I64)
+            c->sum_square_butterfly_int32 = ff_sum_square_butterfly_int32_rvv;
+# endif
     }
 #endif
 }
diff --git a/libavcodec/riscv/ac3dsp_rvb.S b/libavcodec/riscv/ac3dsp_rvb.S
index 48f8bb101e..0ca56466e1 100644
--- a/libavcodec/riscv/ac3dsp_rvb.S
+++ b/libavcodec/riscv/ac3dsp_rvb.S
@@ -21,6 +21,27 @@
 #include "config.h"
 #include "libavutil/riscv/asm.S"
 
+func ff_ac3_exponent_min_rvb, zbb
+        beqz    a1, 3f
+1:
+        addi    a2, a2, -1
+        lb      t3, (a0)
+        addi    t0, a0, 256
+        mv      t1, a1
+2:
+        lb      t4, (t0)
+        addi    t1, t1, -1
+        addi    t0, t0, 256
+        minu    t3, t3, t4
+        bnez    t1, 2b
+
+        sb      t3, (a0)
+        addi    a0, a0, 1
+        bnez    a2, 1b
+3:
+        ret
+endfunc
+
 func ff_extract_exponents_rvb, zbb
 1:
         lw       t0, (a1)
diff --git a/libavcodec/riscv/ac3dsp_rvv.S b/libavcodec/riscv/ac3dsp_rvv.S
index b8d32c4677..1b5f67a9ec 100644
--- a/libavcodec/riscv/ac3dsp_rvv.S
+++ b/libavcodec/riscv/ac3dsp_rvv.S
@@ -21,6 +21,28 @@
 #include "config.h"
 #include "libavutil/riscv/asm.S"
 
+func ff_ac3_exponent_min_rvv, zve32x
+        beqz     a1, 3f
+1:
+        vsetvli  t2, a2, e8, m8, ta, ma
+        vle8.v   v8, (a0)
+        addi     t0, a0, 256
+        sub      a2, a2, t2
+        mv       t1, a1
+2:
+        vle8.v   v16, (t0)
+        addi     t1, t1, -1
+        vminu.vv v8, v8, v16
+        addi     t0, t0, 256
+        bnez     t1, 2b
+
+        vse8.v   v8, (a0)
+        add      a0, a0, t2
+        bnez     a2, 1b
+3:
+        ret
+endfunc
+
 func ff_float_to_fixed24_rvv, zve32f
         li            t1, 1 << 24
         fcvt.s.w      f0, t1
@@ -37,3 +59,83 @@ func ff_float_to_fixed24_rvv, zve32f
 
         ret
 endfunc
+
+#if __riscv_xlen >= 64
+func ff_sum_square_butterfly_int32_rvv, zve64x
+        vsetvli    t0, zero, e64, m8, ta, ma
+        vmv.v.x    v0, zero
+        vmv.v.x    v8, zero
+1:
+        vsetvli    t0, a3, e32, m2, tu, ma
+        vle32.v    v16, (a1)
+        sub        a3, a3, t0
+        vle32.v    v20, (a2)
+        sh2add     a1, t0, a1
+        vadd.vv    v24, v16, v20
+        sh2add     a2, t0, a2
+        vsub.vv    v28, v16, v20
+        vwmacc.vv  v0, v16, v16
+        vwmacc.vv  v4, v20, v20
+        vwmacc.vv  v8, v24, v24
+        vwmacc.vv  v12, v28, v28
+        bnez       a3, 1b
+
+        vsetvli    t0, zero, e64, m4, ta, ma
+        vmv.s.x    v16, zero
+        vmv.s.x    v17, zero
+        vredsum.vs v16, v0, v16
+        vmv.s.x    v18, zero
+        vredsum.vs v17, v4, v17
+        vmv.s.x    v19, zero
+        vredsum.vs v18, v8, v18
+        vmv.x.s    t0, v16
+        vredsum.vs v19, v12, v19
+        vmv.x.s    t1, v17
+        sd         t0,   (a0)
+        vmv.x.s    t2, v18
+        sd         t1,  8(a0)
+        vmv.x.s    t3, v19
+        sd         t2, 16(a0)
+        sd         t3, 24(a0)
+        ret
+endfunc
+#endif
+
+func ff_sum_square_butterfly_float_rvv, zve32f
+        vsetvli     t0, zero, e32, m8, ta, ma
+        vmv.v.x     v0, zero
+        vmv.v.x     v8, zero
+1:
+        vsetvli     t0, a3, e32, m4, tu, ma
+        vle32.v     v16, (a1)
+        sub         a3, a3, t0
+        vle32.v     v20, (a2)
+        sh2add      a1, t0, a1
+        vfadd.vv    v24, v16, v20
+        sh2add      a2, t0, a2
+        vfsub.vv    v28, v16, v20
+        vfmacc.vv   v0, v16, v16
+        vfmacc.vv   v4, v20, v20
+        vfmacc.vv   v8, v24, v24
+        vfmacc.vv   v12, v28, v28
+        bnez        a3, 1b
+
+        vsetvli     t0, zero, e32, m4, ta, ma
+        vmv.s.x     v16, zero
+        vmv.s.x     v17, zero
+        vfredsum.vs v16, v0, v16
+        vmv.s.x     v18, zero
+        vfredsum.vs v17, v4, v17
+        vmv.s.x     v19, zero
+        vfredsum.vs v18, v8, v18
+        vfmv.f.s    ft0, v16
+        vfredsum.vs v19, v12, v19
+        vfmv.f.s    ft1, v17
+        fsw         ft0,   (a0)
+        vfmv.f.s    ft2, v18
+        fsw         ft1,  4(a0)
+        vfmv.f.s    ft3, v19
+        fsw         ft2,  8(a0)
+        fsw         ft3, 12(a0)
+        ret
+endfunc
diff --git a/libavcodec/riscv/ac3dsp_rvvb.S b/libavcodec/riscv/ac3dsp_rvvb.S
new file mode 100644
index 0000000000..64766b56be
--- /dev/null
+++ b/libavcodec/riscv/ac3dsp_rvvb.S
@@ -0,0 +1,43 @@
+/*
+ * Copyright © 2023 Rémi Denis-Courmont.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/riscv/asm.S"
+
+func ff_extract_exponents_rvvb, zve32x, zvbb
+1:
+        vsetvli     t0, a2, e32, m8, ta, ma
+        vle32.v     v8, (a1)
+        sub         a2, a2, t0
+        vneg.v      v16, v8
+        sh2add      a1, t0, a1
+        vmax.vv     v8, v8, v16
+        vclz.v      v8, v8
+        vsetvli     zero, zero, e16, m4, ta, ma
+        vncvt.x.x.w v4, v8
+        vsetvli     zero, zero, e8, m2, ta, ma
+        vncvt.x.x.w v2, v4
+        vadd.vi     v2, v2, 24 - 32
+        vse8.v      v2, (a0)
+        add         a0, a0, t0
+        bnez        a2, 1b
+
+        ret
+endfunc
diff --git a/libavcodec/riscv/blockdsp_init.c b/libavcodec/riscv/blockdsp_init.c
index 59b2f9d47b..adde0b890b 100644
--- a/libavcodec/riscv/blockdsp_init.c
+++ b/libavcodec/riscv/blockdsp_init.c
@@ -27,15 +27,21 @@
 
 void ff_clear_block_rvv(int16_t *block);
 void ff_clear_blocks_rvv(int16_t *block);
+void ff_fill_block16_rvv(uint8_t *block, uint8_t value, ptrdiff_t line_size,
+                           int h);
+void ff_fill_block8_rvv(uint8_t *block, uint8_t value, ptrdiff_t line_size,
+                           int h);
 
 av_cold void ff_blockdsp_init_riscv(BlockDSPContext *c)
 {
 #if HAVE_RVV
     int flags = av_get_cpu_flags();
 
-    if (flags & AV_CPU_FLAG_RVV_I64 && ff_get_rv_vlenb() >= 16) {
+    if (flags & AV_CPU_FLAG_RVV_I64 && ff_rv_vlen_least(128)) {
         c->clear_block = ff_clear_block_rvv;
         c->clear_blocks = ff_clear_blocks_rvv;
+        c->fill_block_tab[0] = ff_fill_block16_rvv;
+        c->fill_block_tab[1] = ff_fill_block8_rvv;
     }
 #endif
 }
diff --git a/libavcodec/riscv/blockdsp_rvv.S b/libavcodec/riscv/blockdsp_rvv.S
index 8bb00bb467..18ab17da00 100644
--- a/libavcodec/riscv/blockdsp_rvv.S
+++ b/libavcodec/riscv/blockdsp_rvv.S
@@ -40,3 +40,24 @@ func ff_clear_blocks_rvv, zve64x
 
         ret
 endfunc
+
+func ff_fill_block16_rvv, zve32x
+        vsetivli      t0, 16, e8, m1, ta, ma
+        vmv.v.x       v8, a1
+1:
+        addi          a3, a3, -1
+        vse8.v        v8, (a0)
+        add           a0, a0, a2
+        bnez          a3, 1b
+
+        ret
+endfunc
+
+func ff_fill_block8_rvv, zve64x
+        vsetvli       t0, zero, e8, m4, ta, ma
+        vmv.v.x       v8, a1
+        vsetvli       t0, a3, e64, m4, ta, ma
+        vsse64.v      v8, (a0), a2
+
+        ret
+endfunc
diff --git a/libavcodec/riscv/flacdsp_init.c b/libavcodec/riscv/flacdsp_init.c
index 6cfb50ead8..830ae36534 100644
--- a/libavcodec/riscv/flacdsp_init.c
+++ b/libavcodec/riscv/flacdsp_init.c
@@ -31,6 +31,8 @@ void ff_flac_lpc32_rvv(int32_t *decoded, const int coeffs[32],
                        int pred_order, int qlevel, int len);
 void ff_flac_lpc32_rvv_simple(int32_t *decoded, const int coeffs[32],
                               int pred_order, int qlevel, int len);
+void ff_flac_wasted32_rvv(int32_t *, int shift, int len);
+void ff_flac_wasted33_rvv(int64_t *, const int32_t *, int shift, int len);
 void ff_flac_decorrelate_indep2_16_rvv(uint8_t **out, int32_t **in,
                                        int channels, int len, int shift);
 void ff_flac_decorrelate_indep4_16_rvv(uint8_t **out, int32_t **in,
@@ -69,15 +71,25 @@ av_cold void ff_flacdsp_init_riscv(FLACDSPContext *c, enum AVSampleFormat fmt,
     if ((flags & AV_CPU_FLAG_RVV_I32) && (flags & AV_CPU_FLAG_RVB_ADDR)) {
         int vlenb = ff_get_rv_vlenb();
 
-        if (vlenb >= 16) {
+        if ((flags & AV_CPU_FLAG_RVB_BASIC) && vlenb >= 16) {
             c->lpc16 = ff_flac_lpc16_rvv;
+
 # if (__riscv_xlen >= 64)
-            if (vlenb > 16)
-                c->lpc32 = ff_flac_lpc32_rvv_simple;
-            else
-                c->lpc32 = ff_flac_lpc32_rvv;
+            if (flags & AV_CPU_FLAG_RVV_I64) {
+                if (vlenb > 16)
+                    c->lpc32 = ff_flac_lpc32_rvv_simple;
+                else
+                    c->lpc32 = ff_flac_lpc32_rvv;
+            }
+# endif
         }
 
+        c->wasted32 = ff_flac_wasted32_rvv;
+
+        if (flags & AV_CPU_FLAG_RVV_I64)
+            c->wasted33 = ff_flac_wasted33_rvv;
+
+# if (__riscv_xlen >= 64)
         switch (fmt) {
         case AV_SAMPLE_FMT_S16:
             switch (channels) {
@@ -117,8 +129,8 @@ av_cold void ff_flacdsp_init_riscv(FLACDSPContext *c, enum AVSampleFormat fmt,
             c->decorrelate[2] = ff_flac_decorrelate_rs_32_rvv;
             c->decorrelate[3] = ff_flac_decorrelate_ms_32_rvv;
             break;
-# endif
         }
+# endif
     }
 #endif
 }
diff --git a/libavcodec/riscv/flacdsp_rvv.S b/libavcodec/riscv/flacdsp_rvv.S
index 2a0b50f7a9..2941928465 100644
--- a/libavcodec/riscv/flacdsp_rvv.S
+++ b/libavcodec/riscv/flacdsp_rvv.S
@@ -20,8 +20,9 @@
 
 #include "libavutil/riscv/asm.S"
 
-func ff_flac_lpc16_rvv, zve32x
-        vsetvli zero, a2, e32, m8, ta, ma
+func ff_flac_lpc16_rvv, zve32x, zbb
+        vtype_vli t0, a2, t2, e32, ta, ma
+        vsetvl  zero, a2, t0
         vle32.v v8, (a1)
         sub     a4, a4, a2
         vle32.v v16, (a0)
@@ -44,7 +45,7 @@ func ff_flac_lpc16_rvv, zve32x
 endfunc
 
 #if (__riscv_xlen == 64)
-func ff_flac_lpc32_rvv, zve32x
+func ff_flac_lpc32_rvv, zve64x
         addi    t2, a2, -16
         ble     t2, zero, ff_flac_lpc32_rvv_simple
         vsetivli zero, 1, e64, m1, ta, ma
@@ -75,22 +76,24 @@ func ff_flac_lpc32_rvv, zve32x
         ret
 endfunc
 
-func ff_flac_lpc32_rvv_simple, zve32x
-        vsetivli zero, 1, e64, m1, ta, ma
+func ff_flac_lpc32_rvv_simple, zve64x, zbb
+        vtype_vli t3, a2, t1, e64, ta, ma
+        vntypei t2, t3
+        vsetvl  zero, a2, t3 // e64
         vmv.s.x v0, zero
-        vsetvli zero, a2, e32, m4, ta, ma
+        vsetvl  zero, zero, t2 // e32
         vle32.v v8, (a1)
         sub     a4, a4, a2
         vle32.v v16, (a0)
         sh2add  a0, a2, a0
 1:
         vwmul.vv v24, v8, v16
-        vsetvli zero, zero, e64, m8, ta, ma
+        vsetvl  zero, zero, t3 // e64
         vredsum.vs v24, v24, v0
         lw      t0, (a0)
         addi    a4, a4, -1
         vmv.x.s t1, v24
-        vsetvli zero, zero, e32, m4, ta, ma
+        vsetvl  zero, zero, t2 // e32
         sra     t1, t1, a3
         add     t0, t0, t1
         vslide1down.vx v16, v16, t0
@@ -100,7 +103,54 @@ func ff_flac_lpc32_rvv_simple, zve32x
 
         ret
 endfunc
+#endif
+
+func ff_flac_wasted32_rvv, zve32x
+1:
+        vsetvli t0, a2, e32, m8, ta, ma
+        vle32.v v8, (a0)
+        sub     a2, a2, t0
+        vsll.vx v8, v8, a1
+        vse32.v v8, (a0)
+        sh2add  a0, t0, a0
+        bnez    a2, 1b
+
+        ret
+endfunc
 
+func ff_flac_wasted33_rvv, zve64x
+        srli         t0, a2, 5
+        li           t1, 1
+        bnez         t0, 2f
+        sll          a2, t1, a2
+1:
+        vsetvli      t0, a3, e32, m4, ta, ma
+        vle32.v      v8, (a1)
+        sub          a3, a3, t0
+        vwmulsu.vx   v16, v8, a2
+        sh2add       a1, t0, a1
+        vse64.v      v16, (a0)
+        sh3add       a0, t0, a0
+        bnez         a3, 1b
+
+        ret
+
+2:      // Pessimistic case: wasted >= 32
+        vsetvli      t0, a3, e32, m4, ta, ma
+        vle32.v      v8, (a1)
+        sub          a3, a3, t0
+        vwcvtu.x.x.v v16, v8
+        sh2add       a1, t0, a1
+        vsetvli      zero, zero, e64, m8, ta, ma
+        vsll.vx      v16, v16, a2
+        vse64.v      v16, (a0)
+        sh3add       a0, t0, a0
+        bnez         a3, 2b
+
+        ret
+endfunc
+
+#if (__riscv_xlen == 64)
 func ff_flac_decorrelate_indep2_16_rvv, zve32x
         ld      a0,  (a0)
         ld      a2, 8(a1)
diff --git a/libavcodec/riscv/g722dsp_init.c b/libavcodec/riscv/g722dsp_init.c
index 77e29bfb56..0c7e7919e2 100644
--- a/libavcodec/riscv/g722dsp_init.c
+++ b/libavcodec/riscv/g722dsp_init.c
@@ -34,7 +34,7 @@ av_cold void ff_g722dsp_init_riscv(G722DSPContext *dsp)
 #if HAVE_RVV
     int flags = av_get_cpu_flags();
 
-    if ((flags & AV_CPU_FLAG_RVV_I32) && ff_get_rv_vlenb() >= 16)
+    if ((flags & AV_CPU_FLAG_RVV_I32) && ff_rv_vlen_least(128))
         dsp->apply_qmf = ff_g722_apply_qmf_rvv;
 #endif
 }
diff --git a/libavcodec/riscv/h264_chroma_init_riscv.c b/libavcodec/riscv/h264_chroma_init_riscv.c
index e6fe5f6ed6..9802fa517f 100644
--- a/libavcodec/riscv/h264_chroma_init_riscv.c
+++ b/libavcodec/riscv/h264_chroma_init_riscv.c
@@ -38,7 +38,7 @@ av_cold void ff_h264chroma_init_riscv(H264ChromaContext *c, int bit_depth)
     int flags = av_get_cpu_flags();
 
     if (bit_depth == 8 && (flags & AV_CPU_FLAG_RVV_I32) &&
-        (flags & AV_CPU_FLAG_RVB_ADDR) && ff_get_rv_vlenb() >= 16) {
+        (flags & AV_CPU_FLAG_RVB_ADDR) && ff_rv_vlen_least(128)) {
         c->put_h264_chroma_pixels_tab[0] = h264_put_chroma_mc8_rvv;
         c->avg_h264_chroma_pixels_tab[0] = h264_avg_chroma_mc8_rvv;
         c->put_h264_chroma_pixels_tab[1] = h264_put_chroma_mc4_rvv;
diff --git a/libavcodec/riscv/h264dsp_init.c b/libavcodec/riscv/h264dsp_init.c
new file mode 100644
index 0000000000..dbbf3db400
--- /dev/null
+++ b/libavcodec/riscv/h264dsp_init.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright © 2024 Rémi Denis-Courmont.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavcodec/h264dsp.h"
+
+extern int ff_startcode_find_candidate_rvb(const uint8_t *, int);
+extern int ff_startcode_find_candidate_rvv(const uint8_t *, int);
+
+av_cold void ff_h264dsp_init_riscv(H264DSPContext *dsp, const int bit_depth,
+                                   const int chroma_format_idc)
+{
+#if HAVE_RV
+    int flags = av_get_cpu_flags();
+
+    if (flags & AV_CPU_FLAG_RVB_BASIC)
+        dsp->startcode_find_candidate = ff_startcode_find_candidate_rvb;
+# if HAVE_RVV
+    if (flags & AV_CPU_FLAG_RVV_I32)
+        dsp->startcode_find_candidate = ff_startcode_find_candidate_rvv;
+# endif
+#endif
+}
diff --git a/libavcodec/riscv/huffyuvdsp_init.c b/libavcodec/riscv/huffyuvdsp_init.c
index b49b3dc097..79e93e213f 100644
--- a/libavcodec/riscv/huffyuvdsp_init.c
+++ b/libavcodec/riscv/huffyuvdsp_init.c
@@ -35,7 +35,8 @@ av_cold void ff_huffyuvdsp_init_riscv(HuffYUVDSPContext *c,
 
     if ((flags & AV_CPU_FLAG_RVV_I32) && (flags & AV_CPU_FLAG_RVB_ADDR)) {
         c->add_int16 = ff_add_int16_rvv;
-        c->add_hfyu_left_pred_bgr32 = ff_add_hfyu_left_pred_bgr32_rvv;
+        if (flags & AV_CPU_FLAG_RVB_BASIC)
+            c->add_hfyu_left_pred_bgr32 = ff_add_hfyu_left_pred_bgr32_rvv;
     }
 #endif
 }
diff --git a/libavcodec/riscv/huffyuvdsp_rvv.S b/libavcodec/riscv/huffyuvdsp_rvv.S
index 9c4434907d..d334f5c6d0 100644
--- a/libavcodec/riscv/huffyuvdsp_rvv.S
+++ b/libavcodec/riscv/huffyuvdsp_rvv.S
@@ -36,8 +36,10 @@ func ff_add_int16_rvv, zve32x
         ret
 endfunc
 
-func ff_add_hfyu_left_pred_bgr32_rvv, zve32x
-        vsetivli zero, 4, e8, m1, ta, ma
+func ff_add_hfyu_left_pred_bgr32_rvv, zve32x, zbb
+        vtype_ivli t1, 4, e8, ta, ma
+        li      t0, 4
+        vsetvl  zero, t0, t1
         vle8.v  v8, (a3)
         sh2add  a2, a2, a1
 1:
diff --git a/libavcodec/riscv/idctdsp_init.c b/libavcodec/riscv/idctdsp_init.c
index 4106d90c55..0decc15955 100644
--- a/libavcodec/riscv/idctdsp_init.c
+++ b/libavcodec/riscv/idctdsp_init.c
@@ -39,7 +39,7 @@ av_cold void ff_idctdsp_init_riscv(IDCTDSPContext *c, AVCodecContext *avctx,
 #if HAVE_RVV
     int flags = av_get_cpu_flags();
 
-    if ((flags & AV_CPU_FLAG_RVV_I64) && ff_get_rv_vlenb() >= 16) {
+    if ((flags & AV_CPU_FLAG_RVV_I64) && ff_rv_vlen_least(128)) {
         c->put_pixels_clamped = ff_put_pixels_clamped_rvv;
         c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_rvv;
         c->add_pixels_clamped = ff_add_pixels_clamped_rvv;
diff --git a/libavcodec/riscv/me_cmp_init.c b/libavcodec/riscv/me_cmp_init.c
index 858e2ccdb8..f246e55cb1 100644
--- a/libavcodec/riscv/me_cmp_init.c
+++ b/libavcodec/riscv/me_cmp_init.c
@@ -82,7 +82,7 @@ av_cold void ff_me_cmp_init_riscv(MECmpContext *c, AVCodecContext *avctx)
 #if HAVE_RVV
     int flags = av_get_cpu_flags();
 
-    if (flags & AV_CPU_FLAG_RVV_I32 && ff_get_rv_vlenb() >= 16) {
+    if (flags & AV_CPU_FLAG_RVV_I32 && ff_rv_vlen_least(128)) {
         c->pix_abs[0][0] = ff_pix_abs16_rvv;
         c->sad[0] = ff_pix_abs16_rvv;
         c->pix_abs[1][0] = ff_pix_abs8_rvv;
diff --git a/libavcodec/riscv/opusdsp_rvv.S b/libavcodec/riscv/opusdsp_rvv.S
index 9a8914c78d..42d845a370 100644
--- a/libavcodec/riscv/opusdsp_rvv.S
+++ b/libavcodec/riscv/opusdsp_rvv.S
@@ -20,7 +20,7 @@
 
 #include "libavutil/riscv/asm.S"
 
-func ff_opus_postfilter_rvv, zve32f
+func ff_opus_postfilter_rvv, zve32f, zbb
         flw     fa0, 0(a2) // g0
         slli    t1, a1, 2
         flw     fa1, 4(a2) // g1
diff --git a/libavcodec/riscv/pixblockdsp_init.c b/libavcodec/riscv/pixblockdsp_init.c
index 0584100cfc..b205841101 100644
--- a/libavcodec/riscv/pixblockdsp_init.c
+++ b/libavcodec/riscv/pixblockdsp_init.c
@@ -56,7 +56,7 @@ av_cold void ff_pixblockdsp_init_riscv(PixblockDSPContext *c,
     }
 
 #if HAVE_RVV
-    if ((cpu_flags & AV_CPU_FLAG_RVV_I32) && ff_get_rv_vlenb() >= 16) {
+    if ((cpu_flags & AV_CPU_FLAG_RVV_I32) && ff_rv_vlen_least(128)) {
         c->diff_pixels = ff_diff_pixels_unaligned_rvv;
         c->diff_pixels_unaligned = ff_diff_pixels_unaligned_rvv;
     }
diff --git a/libavcodec/riscv/rv34dsp_init.c b/libavcodec/riscv/rv34dsp_init.c
index 7dcadc7e43..051dc75653 100644
--- a/libavcodec/riscv/rv34dsp_init.c
+++ b/libavcodec/riscv/rv34dsp_init.c
@@ -33,7 +33,7 @@ av_cold void ff_rv34dsp_init_riscv(RV34DSPContext *c)
 #if HAVE_RVV
     int flags = av_get_cpu_flags();
 
-    if (flags & AV_CPU_FLAG_RVV_I32 && ff_get_rv_vlenb() >= 16) {
+    if (flags & AV_CPU_FLAG_RVV_I32 && ff_rv_vlen_least(128)) {
         c->rv34_inv_transform_dc = ff_rv34_inv_transform_dc_rvv;
         c->rv34_idct_dc_add = ff_rv34_idct_dc_add_rvv;
     }
diff --git a/libavcodec/riscv/rv40dsp_init.c b/libavcodec/riscv/rv40dsp_init.c
new file mode 100644
index 0000000000..47df0e98c5
--- /dev/null
+++ b/libavcodec/riscv/rv40dsp_init.c
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/riscv/cpu.h"
+#include "libavcodec/rv34dsp.h"
+
+void ff_put_rv40_chroma_mc8_rvv(uint8_t *dst, const uint8_t *src, ptrdiff_t stride,
+                                 int h, int x, int y);
+void ff_put_rv40_chroma_mc4_rvv(uint8_t *dst, const uint8_t *src, ptrdiff_t stride,
+                                 int h, int x, int y);
+
+void ff_avg_rv40_chroma_mc8_rvv(uint8_t *dst, const uint8_t *src, ptrdiff_t stride,
+                                 int h, int x, int y);
+void ff_avg_rv40_chroma_mc4_rvv(uint8_t *dst, const uint8_t *src, ptrdiff_t stride,
+                                 int h, int x, int y);
+
+av_cold void ff_rv40dsp_init_riscv(RV34DSPContext *c)
+{
+#if HAVE_RVV
+    int flags = av_get_cpu_flags();
+
+    if ((flags & AV_CPU_FLAG_RVV_I32) && ff_rv_vlen_least(128) &&
+        (flags & AV_CPU_FLAG_RVB_ADDR)) {
+        c->put_chroma_pixels_tab[0] = ff_put_rv40_chroma_mc8_rvv;
+        c->put_chroma_pixels_tab[1] = ff_put_rv40_chroma_mc4_rvv;
+        c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_rvv;
+        c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_rvv;
+    }
+#endif
+}
diff --git a/libavcodec/riscv/rv40dsp_rvv.S b/libavcodec/riscv/rv40dsp_rvv.S
new file mode 100644
index 0000000000..e49345ef70
--- /dev/null
+++ b/libavcodec/riscv/rv40dsp_rvv.S
@@ -0,0 +1,371 @@
+/*
+ * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/riscv/asm.S"
+
+.macro manual_avg dst src1 src2
+        vadd.vv         \dst, \src1, \src2
+        vadd.vi         \dst, \dst, 1
+        vsrl.vi         \dst, \dst, 1
+.endm
+
+.macro  do_chroma_mc type unroll
+        csrwi           vxrm, 2
+        slli            t2, a5, 3
+        mul             t1, a5, a4
+        sh3add          a5, a4, t2
+        slli            a4, a4, 3
+        sub             a5, t1, a5
+        sub             a7, a4, t1
+        addi            a6, a5, 64
+        sub             t0, t2, t1
+        vsetvli         t3, t6, e8, m1, ta, mu
+        beqz            t1, 2f
+        blez            a3, 8f
+        li              t4, 0
+        li              t2, 0
+        li              t5, 1
+        addi            a5, t3, 1
+        slli            t3, a2, (1 + \unroll)
+1:                                # if (xy != 0)
+        add             a4, a1, t4
+        vsetvli         zero, a5, e8, m1, ta, ma
+  .ifc \unroll,1
+        addi            t2, t2, 4
+  .else
+        addi            t2, t2, 2
+  .endif
+        vle8.v          v10, (a4)
+        add             a4, a4, a2
+        vslide1down.vx  v11, v10, t5
+        vsetvli         zero, t6, e8, m1, ta, ma
+        vwmulu.vx       v8, v10, a6
+        vwmaccu.vx      v8, a7, v11
+        vsetvli         zero, a5, e8, m1, ta, ma
+        vle8.v          v12, (a4)
+        vsetvli         zero, t6, e8, m1, ta, ma
+        add             a4, a4, a2
+        vwmaccu.vx      v8, t0, v12
+        vsetvli         zero, a5, e8, m1, ta, ma
+        vslide1down.vx  v13, v12, t5
+        vsetvli         zero, t6, e8, m1, ta, ma
+        vwmulu.vx       v10, v12, a6
+        vwmaccu.vx      v8, t1, v13
+        vwmaccu.vx      v10, a7, v13
+        vsetvli         zero, a5, e8, m1, ta, ma
+        vle8.v          v14, (a4)
+        vsetvli         zero, t6, e8, m1, ta, ma
+        add             a4, a4, a2
+        vwmaccu.vx      v10, t0, v14
+        vsetvli         zero, a5, e8, m1, ta, ma
+        vslide1down.vx  v15, v14, t5
+        vsetvli         zero, t6, e8, m1, ta, ma
+        vwmulu.vx       v12, v14, a6
+        vwmaccu.vx      v10, t1, v15
+        vwmaccu.vx      v12, a7, v15
+        vnclipu.wi      v15, v8, 6
+  .ifc \type,avg
+        vle8.v          v9, (a0)
+        manual_avg      v15, v15, v9
+  .endif
+        vse8.v          v15, (a0)
+        add             a0, a0, a2
+        vnclipu.wi      v8, v10, 6
+  .ifc \type,avg
+        vle8.v          v9, (a0)
+        manual_avg      v8, v8, v9
+  .endif
+        add             t4, t4, t3
+        vse8.v          v8, (a0)
+        add             a0, a0, a2
+  .ifc \unroll,1
+        vsetvli         zero, a5, e8, m1, ta, ma
+        vle8.v          v14, (a4)
+        vsetvli         zero, t6, e8, m1, ta, ma
+        add             a4, a4, a2
+        vwmaccu.vx      v12, t0, v14
+        vsetvli         zero, a5, e8, m1, ta, ma
+        vslide1down.vx  v15, v14, t5
+        vsetvli         zero, t6, e8, m1, ta, ma
+        vwmulu.vx       v16, v14, a6
+        vwmaccu.vx      v12, t1, v15
+        vwmaccu.vx      v16, a7, v15
+        vsetvli         zero, a5, e8, m1, ta, ma
+        vle8.v          v14, (a4)
+        vsetvli         zero, t6, e8, m1, ta, ma
+        vwmaccu.vx      v16, t0, v14
+        vsetvli         zero, a5, e8, m1, ta, ma
+        vslide1down.vx  v14, v14, t5
+        vsetvli         zero, t6, e8, m1, ta, ma
+        vwmaccu.vx      v16, t1, v14
+        vnclipu.wi      v8, v12, 6
+  .ifc \type,avg
+        vle8.v          v9, (a0)
+        manual_avg      v8, v8, v9
+  .endif
+        vse8.v          v8, (a0)
+        add             a0, a0, a2
+        vnclipu.wi      v8, v16, 6
+  .ifc \type,avg
+        vle8.v          v9, (a0)
+        manual_avg      v8, v8, v9
+  .endif
+        vse8.v          v8, (a0)
+        add             a0, a0, a2
+  .endif
+        blt             t2, a3, 1b
+        j               8f
+2:
+        bnez            a4, 4f
+        beqz            t2, 4f
+        blez            a3, 8f
+        li              a4, 0
+        li              t1, 0
+        slli            a7, a2, (1 + \unroll)
+3:                                # if ((x8 - xy) == 0 && (y8 -xy) != 0)
+        add             a5, a1, a4
+        vsetvli         zero, zero, e8, m1, ta, ma
+  .ifc \unroll,1
+        addi            t1, t1, 4
+  .else
+        addi            t1, t1, 2
+  .endif
+        vle8.v          v8, (a5)
+        add             a5, a5, a2
+        add             t2, a5, a2
+        vwmulu.vx       v10, v8, a6
+        vle8.v          v8, (a5)
+        vwmulu.vx       v12, v8, a6
+        vle8.v          v9, (t2)
+        add             t2, t2, a2
+        add             a5, t2, a2
+        vwmaccu.vx      v10, t0, v8
+        add             a4, a4, a7
+        vwmaccu.vx      v12, t0, v9
+        vnclipu.wi      v15, v10, 6
+        vwmulu.vx       v10, v9, a6
+        vnclipu.wi      v9, v12, 6
+  .ifc \type,avg
+        vle8.v          v16, (a0)
+        manual_avg      v15, v15, v16
+  .endif
+        vse8.v          v15, (a0)
+        add             a0, a0, a2
+  .ifc \type,avg
+        vle8.v          v16, (a0)
+        manual_avg      v9, v9, v16
+  .endif
+        vse8.v          v9, (a0)
+        add             a0, a0, a2
+  .ifc \unroll,1
+        vle8.v          v8, (t2)
+        vle8.v          v14, (a5)
+        vwmaccu.vx      v10, t0, v8
+        vwmulu.vx       v12, v8, a6
+        vnclipu.wi      v8, v10, 6
+        vwmaccu.vx      v12, t0, v14
+  .ifc \type,avg
+        vle8.v          v16, (a0)
+        manual_avg      v8, v8, v16
+  .endif
+        vse8.v          v8, (a0)
+        add             a0, a0, a2
+        vnclipu.wi      v8, v12, 6
+  .ifc \type,avg
+        vle8.v          v16, (a0)
+        manual_avg      v8, v8, v16
+  .endif
+        vse8.v          v8, (a0)
+        add             a0, a0, a2
+  .endif
+        blt             t1, a3, 3b
+        j               8f
+4:
+        beqz            a4, 6f
+        bnez            t2, 6f
+        blez            a3, 8f
+        li              a4, 0
+        li              t2, 0
+        addi            t0, t3, 1
+        slli            t1, a2, (1 + \unroll)
+5:                               # if ((x8 - xy) != 0 && (y8 -xy) == 0)
+        add             a5, a1, a4
+        vsetvli         zero, t0, e8, m1, ta, ma
+  .ifc \unroll,1
+        addi            t2, t2, 4
+  .else
+        addi            t2, t2, 2
+  .endif
+        vle8.v          v8, (a5)
+        add             a5, a5, a2
+        vslide1down.vx  v9, v8, t5
+        vsetvli         zero, t6, e8, m1, ta, ma
+        vwmulu.vx       v10, v8, a6
+        vwmaccu.vx      v10, a7, v9
+        vsetvli         zero, t0, e8, m1, ta, ma
+        vle8.v          v8, (a5)
+        add             a5, a5, a2
+        vslide1down.vx  v9, v8, t5
+        vsetvli         zero, t6, e8, m1, ta, ma
+        vwmulu.vx       v12, v8, a6
+        vwmaccu.vx      v12, a7, v9
+        vnclipu.wi      v16, v10, 6
+  .ifc \type,avg
+        vle8.v          v18, (a0)
+        manual_avg      v16, v16, v18
+  .endif
+        vse8.v          v16, (a0)
+        add             a0, a0, a2
+        vnclipu.wi      v10, v12, 6
+  .ifc \type,avg
+        vle8.v          v18, (a0)
+        manual_avg      v10, v10, v18
+  .endif
+        add             a4, a4, t1
+        vse8.v          v10, (a0)
+        add             a0, a0, a2
+  .ifc \unroll,1
+        vsetvli         zero, t0, e8, m1, ta, ma
+        vle8.v          v8, (a5)
+        add             a5, a5, a2
+        vslide1down.vx  v9, v8, t5
+        vsetvli         zero, t6, e8, m1, ta, ma
+        vwmulu.vx       v14, v8, a6
+        vwmaccu.vx      v14, a7, v9
+        vsetvli         zero, t0, e8, m1, ta, ma
+        vle8.v          v8, (a5)
+        vslide1down.vx  v9, v8, t5
+        vsetvli         zero, t6, e8, m1, ta, ma
+        vwmulu.vx       v12, v8, a6
+        vnclipu.wi      v8, v14, 6
+        vwmaccu.vx      v12, a7, v9
+  .ifc \type,avg
+        vle8.v          v18, (a0)
+        manual_avg      v8, v8, v18
+  .endif
+        vse8.v          v8, (a0)
+        add             a0, a0, a2
+        vnclipu.wi      v8, v12, 6
+  .ifc \type,avg
+        vle8.v          v18, (a0)
+        manual_avg      v8, v8, v18
+  .endif
+        vse8.v          v8, (a0)
+        add             a0, a0, a2
+  .endif
+        blt             t2, a3, 5b
+        j               8f
+6:
+        blez            a3, 8f
+        li              a4, 0
+        li              t2, 0
+        slli            a7, a2, (1 + \unroll)
+7:                               # the final else, none of the above conditions are met
+        add             t0, a1, a4
+        vsetvli         zero, zero, e8, m1, ta, ma
+        add             a5, a0, a4
+        add             a4, a4, a7
+  .ifc \unroll,1
+        addi            t2, t2, 4
+  .else
+        addi            t2, t2, 2
+  .endif
+        vle8.v          v8, (t0)
+        add             t0, t0, a2
+        add             t1, t0, a2
+        vwmulu.vx       v10, v8, a6
+        vle8.v          v8, (t0)
+        add             t0, t1, a2
+        vnclipu.wi      v13, v10, 6
+        vwmulu.vx       v10, v8, a6
+  .ifc \type,avg
+        vle8.v          v18, (a5)
+        manual_avg      v13, v13, v18
+  .endif
+        vse8.v          v13, (a5)
+        add             a5, a5, a2
+        vnclipu.wi      v8, v10, 6
+  .ifc \type,avg
+        vle8.v          v18, (a5)
+        manual_avg      v8, v8, v18
+  .endif
+        vse8.v          v8, (a5)
+        add             a5, a5, a2
+  .ifc \unroll,1
+        vle8.v          v9, (t1)
+        vle8.v          v12, (t0)
+        vwmulu.vx       v10, v9, a6
+        vnclipu.wi      v8, v10, 6
+        vwmulu.vx       v10, v12, a6
+  .ifc \type,avg
+        vle8.v          v18, (a5)
+        manual_avg      v8, v8, v18
+  .endif
+        vse8.v          v8, (a5)
+        add             a5, a5, a2
+        vnclipu.wi      v8, v10, 6
+  .ifc \type,avg
+        vle8.v          v18, (a5)
+        manual_avg      v8, v8, v18
+  .endif
+        vse8.v          v8, (a5)
+  .endif
+        blt             t2, a3, 7b
+8:
+        ret
+.endm
+
+func ff_put_rv40_chroma_mc_rvv, zve32x
+11:
+        li      a7, 3
+        blt     a3, a7, 12f
+        do_chroma_mc put 1
+12:
+        do_chroma_mc put 0
+endfunc
+
+func ff_avg_rv40_chroma_mc_rvv, zve32x
+21:
+        li      a7, 3
+        blt     a3, a7, 22f
+        do_chroma_mc avg 1
+22:
+        do_chroma_mc avg 0
+endfunc
+
+func ff_put_rv40_chroma_mc8_rvv, zve32x
+        li      t6, 8
+        j       11b
+endfunc
+
+func ff_put_rv40_chroma_mc4_rvv, zve32x
+        li      t6, 4
+        j       11b
+endfunc
+
+func ff_avg_rv40_chroma_mc8_rvv, zve32x
+        li      t6, 8
+        j       21b
+endfunc
+
+func ff_avg_rv40_chroma_mc4_rvv, zve32x
+        li      t6, 4
+        j       21b
+endfunc
diff --git a/libavcodec/riscv/sbrdsp_init.c b/libavcodec/riscv/sbrdsp_init.c
index 2ed46153ea..d3bafa961e 100644
--- a/libavcodec/riscv/sbrdsp_init.c
+++ b/libavcodec/riscv/sbrdsp_init.c
@@ -26,7 +26,6 @@
 
 void ff_sbr_sum64x5_rvv(float *z);
 float ff_sbr_sum_square_rvv(float (*x)[2], int n);
-void ff_sbr_neg_odd_64_rvv(float *x);
 void ff_sbr_autocorrelate_rvv(const float x[40][2], float phi[3][2][2]);
 void ff_sbr_hf_gen_rvv(float (*X_high)[2], const float (*X_low)[2],
                        const float alpha0[2], const float alpha1[2],
@@ -53,7 +52,7 @@ av_cold void ff_sbrdsp_init_riscv(SBRDSPContext *c)
             c->sum_square = ff_sbr_sum_square_rvv;
             c->hf_gen = ff_sbr_hf_gen_rvv;
             c->hf_g_filt = ff_sbr_hf_g_filt_rvv;
-            if (ff_get_rv_vlenb() <= 16) {
+            if (ff_rv_vlen_least(128)) {
                 c->hf_apply_noise[0] = ff_sbr_hf_apply_noise_0_rvv;
                 c->hf_apply_noise[2] = ff_sbr_hf_apply_noise_2_rvv;
                 if (flags & AV_CPU_FLAG_RVB_BASIC) {
@@ -64,9 +63,5 @@ av_cold void ff_sbrdsp_init_riscv(SBRDSPContext *c)
         }
         c->autocorrelate = ff_sbr_autocorrelate_rvv;
     }
-#if __riscv_xlen >= 64
-    if ((flags & AV_CPU_FLAG_RVV_I64) && (flags & AV_CPU_FLAG_RVB_ADDR))
-        c->neg_odd_64 = ff_sbr_neg_odd_64_rvv;
-#endif
 #endif
 }
diff --git a/libavcodec/riscv/sbrdsp_rvv.S b/libavcodec/riscv/sbrdsp_rvv.S
index 02feb6451e..aba9a28108 100644
--- a/libavcodec/riscv/sbrdsp_rvv.S
+++ b/libavcodec/riscv/sbrdsp_rvv.S
@@ -68,23 +68,6 @@ NOHWF   fmv.x.w  a0, fa0
         ret
 endfunc
 
-#if __riscv_xlen >= 64
-func ff_sbr_neg_odd_64_rvv, zve64x
-        li      a1, 32
-        li      t1, 1 << 63
-1:
-        vsetvli t0, a1, e64, m8, ta, ma
-        vle64.v v8, (a0)
-        sub     a1, a1, t0
-        vxor.vx v8, v8, t1
-        vse64.v v8, (a0)
-        sh3add  a0, t0, a0
-        bnez    t0, 1b
-
-        ret
-endfunc
-#endif
-
 func ff_sbr_autocorrelate_rvv, zve32f
         vsetvli t0, zero, e32, m4, ta, ma
         vmv.v.x v0, zero
@@ -290,16 +273,16 @@ endfunc
         ret
 .endm
 
-func ff_sbr_hf_apply_noise_0_rvv, zve32f
+func ff_sbr_hf_apply_noise_0_rvv, zve32f, zbb
         hf_apply_noise 0
 endfunc
 
-func ff_sbr_hf_apply_noise_3_rvv, zve32f
+func ff_sbr_hf_apply_noise_3_rvv, zve32f, zbb
        not     a4, a4 // invert parity of kx
        // fall through
 endfunc
 
-func ff_sbr_hf_apply_noise_1_rvv, zve32f
+func ff_sbr_hf_apply_noise_1_rvv, zve32f, zbb
         vsetvli t0, zero, e32, m4, ta, ma
         vid.v   v4
         vxor.vx v4, v4, a4
@@ -307,6 +290,6 @@ func ff_sbr_hf_apply_noise_1_rvv, zve32f
         hf_apply_noise 1
 endfunc
 
-func ff_sbr_hf_apply_noise_2_rvv, zve32f
+func ff_sbr_hf_apply_noise_2_rvv, zve32f, zbb
         hf_apply_noise 2
 endfunc
diff --git a/libavcodec/riscv/startcode_rvb.S b/libavcodec/riscv/startcode_rvb.S
new file mode 100644
index 0000000000..c043d59809
--- /dev/null
+++ b/libavcodec/riscv/startcode_rvb.S
@@ -0,0 +1,83 @@
+/*
+ * Copyright © 2024 Rémi Denis-Courmont.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "libavutil/riscv/asm.S"
+
+        .macro  lx rd, addr
+#if (__riscv_xlen == 32)
+        lw      \rd, \addr
+#elif (__riscv_xlen == 64)
+        ld      \rd, \addr
+#else
+        lq      \rd, \addr
+#endif
+        .endm
+
+func ff_startcode_find_candidate_rvb, zbb
+        add     a1, a0, a1
+
+        // Potentially unaligned head
+        andi    t0, a0, -(__riscv_xlen / 8)
+        beq     a0, a1, 2f
+
+        andi    t1, a0, (__riscv_xlen / 8) - 1
+        lx      t2, (t0)
+        li      t3, __riscv_xlen
+        orc.b   t2, t2
+        slli    t1, t1, 3
+        not     t2, t2
+        sub     t3, t3, t1
+        srl     t2, t2, t1
+        addi    t0, t0, __riscv_xlen / 8
+        sll     t2, t2, t1
+        bnez    t2, 4f
+
+        // Main loop (including potentially short tail)
+        bge     t0, a1, 2f
+        li      t3, -1
+1:
+        lx      t2, (t0)
+        addi    t0, t0, __riscv_xlen / 8
+        orc.b   t2, t2
+        bne     t2, t3, 3f // t2 != -1 iff (at least one) zero byte
+        blt     t0, a1, 1b
+
+2:      // No zero byte found
+        sub     a0, a1, a0
+        ret
+
+3:      // Zero byte found in main loop
+        not     t2, t2
+4:      // Zero byte found in head
+        ctz     t2, t2
+        addi    t0, t0, -(__riscv_xlen / 8) // back-track
+        srl     t2, t2, 3
+        add     t0, t0, t2
+        // Uncomment the following line for exact POSIX C strnlen() semantics.
+        //minu    t0, t0, a1 // ignore zero byte in tail
+        sub     a0, t0, a0
+        ret
+endfunc
diff --git a/libavcodec/riscv/startcode_rvv.S b/libavcodec/riscv/startcode_rvv.S
new file mode 100644
index 0000000000..7c43b1d7f3
--- /dev/null
+++ b/libavcodec/riscv/startcode_rvv.S
@@ -0,0 +1,44 @@
+/*
+ * Copyright © 2024 Rémi Denis-Courmont.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "libavutil/riscv/asm.S"
+
+func ff_startcode_find_candidate_rvv, zve32x
+        mv       t0, a0
+1:
+        vsetvli  t1, a1, e8, m8, ta, ma
+        vle8.v   v8, (t0)
+        sub      a1, a1, t1
+        vmseq.vi v0, v8, 0
+        vfirst.m t2, v0
+        bgez     t2, 2f
+        add      t0, t0, t1
+        bnez     a1, 1b
+2:
+        add      t0, t0, t2
+        sub      a0, t0, a0
+        ret
+endfunc
diff --git a/libavcodec/riscv/vc1dsp_init.c b/libavcodec/riscv/vc1dsp_init.c
index e47b644f80..f105a3a3c6 100644
--- a/libavcodec/riscv/vc1dsp_init.c
+++ b/libavcodec/riscv/vc1dsp_init.c
@@ -29,19 +29,42 @@ void ff_vc1_inv_trans_8x8_dc_rvv(uint8_t *dest, ptrdiff_t stride, int16_t *block
 void ff_vc1_inv_trans_4x8_dc_rvv(uint8_t *dest, ptrdiff_t stride, int16_t *block);
 void ff_vc1_inv_trans_8x4_dc_rvv(uint8_t *dest, ptrdiff_t stride, int16_t *block);
 void ff_vc1_inv_trans_4x4_dc_rvv(uint8_t *dest, ptrdiff_t stride, int16_t *block);
+void ff_put_pixels16x16_rvi(uint8_t *dst, const uint8_t *src, ptrdiff_t line_size, int rnd);
+void ff_put_pixels8x8_rvi(uint8_t *dst, const uint8_t *src, ptrdiff_t line_size, int rnd);
+void ff_avg_pixels16x16_rvv(uint8_t *dst, const uint8_t *src, ptrdiff_t line_size, int rnd);
+void ff_avg_pixels8x8_rvv(uint8_t *dst, const uint8_t *src, ptrdiff_t line_size, int rnd);
+int ff_startcode_find_candidate_rvb(const uint8_t *, int);
+int ff_startcode_find_candidate_rvv(const uint8_t *, int);
+int ff_vc1_unescape_buffer_rvv(const uint8_t *, int, uint8_t *);
 
 av_cold void ff_vc1dsp_init_riscv(VC1DSPContext *dsp)
 {
-#if HAVE_RVV
+#if HAVE_RV
     int flags = av_get_cpu_flags();
 
-    if (flags & AV_CPU_FLAG_RVV_I32 && ff_get_rv_vlenb() >= 16) {
-        dsp->vc1_inv_trans_4x8_dc = ff_vc1_inv_trans_4x8_dc_rvv;
-        dsp->vc1_inv_trans_4x4_dc = ff_vc1_inv_trans_4x4_dc_rvv;
-        if (flags & AV_CPU_FLAG_RVV_I64) {
-            dsp->vc1_inv_trans_8x8_dc = ff_vc1_inv_trans_8x8_dc_rvv;
-            dsp->vc1_inv_trans_8x4_dc = ff_vc1_inv_trans_8x4_dc_rvv;
+# if __riscv_xlen >= 64
+    if (flags & AV_CPU_FLAG_RVI) {
+        dsp->put_vc1_mspel_pixels_tab[1][0] = ff_put_pixels8x8_rvi;
+        dsp->put_vc1_mspel_pixels_tab[0][0] = ff_put_pixels16x16_rvi;
+    }
+# endif
+    if (flags & AV_CPU_FLAG_RVB_BASIC)
+        dsp->startcode_find_candidate = ff_startcode_find_candidate_rvb;
+# if HAVE_RVV
+    if (flags & AV_CPU_FLAG_RVV_I32) {
+        if (ff_rv_vlen_least(128)) {
+            dsp->vc1_inv_trans_4x8_dc = ff_vc1_inv_trans_4x8_dc_rvv;
+            dsp->vc1_inv_trans_4x4_dc = ff_vc1_inv_trans_4x4_dc_rvv;
+            dsp->avg_vc1_mspel_pixels_tab[0][0] = ff_avg_pixels16x16_rvv;
+            if (flags & AV_CPU_FLAG_RVV_I64) {
+                dsp->vc1_inv_trans_8x8_dc = ff_vc1_inv_trans_8x8_dc_rvv;
+                dsp->vc1_inv_trans_8x4_dc = ff_vc1_inv_trans_8x4_dc_rvv;
+                dsp->avg_vc1_mspel_pixels_tab[1][0] = ff_avg_pixels8x8_rvv;
+            }
         }
+        dsp->startcode_find_candidate = ff_startcode_find_candidate_rvv;
+        dsp->vc1_unescape_buffer = ff_vc1_unescape_buffer_rvv;
     }
+# endif
 #endif
 }
diff --git a/libavcodec/riscv/vc1dsp_rvi.S b/libavcodec/riscv/vc1dsp_rvi.S
new file mode 100644
index 0000000000..d4a1b5bf49
--- /dev/null
+++ b/libavcodec/riscv/vc1dsp_rvi.S
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/riscv/asm.S"
+
+#if __riscv_xlen >= 64
+func ff_put_pixels8x8_rvi
+.rept 8
+        ld t0, (a1)
+        sd t0, (a0)
+        add a1, a1, a2
+        add a0, a0, a2
+.endr
+
+        ret
+endfunc
+
+func ff_put_pixels16x16_rvi
+.rept 16
+        ld t0, (a1)
+        ld t1, 8(a1)
+        sd t0, (a0)
+        sd t1, 8(a0)
+        add a1, a1, a2
+        add a0, a0, a2
+.endr
+
+        ret
+endfunc
+#endif
diff --git a/libavcodec/riscv/vc1dsp_rvv.S b/libavcodec/riscv/vc1dsp_rvv.S
index 4a00945ead..1166f35cad 100644
--- a/libavcodec/riscv/vc1dsp_rvv.S
+++ b/libavcodec/riscv/vc1dsp_rvv.S
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2023 Institue of Software Chinese Academy of Sciences (ISCAS).
+ * Copyright (c) 2024 Rémi Denis-Courmont.
  *
  * This file is part of FFmpeg.
  *
@@ -111,3 +112,103 @@ func ff_vc1_inv_trans_4x4_dc_rvv, zve32x
         vsse32.v      v0, (a0), a1
         ret
 endfunc
+
+.macro mspel_op op pos n1 n2
+        add           t1, \pos, a2
+        v\op\()e8.v   v\n1, (\pos)
+        sh1add        \pos, a2, \pos
+        v\op\()e8.v   v\n2, (t1)
+.endm
+
+.macro mspel_op_all op pos a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 a12 a13 a14 a15 a16
+        mspel_op      \op \pos \a1 \a2
+        mspel_op      \op \pos \a3 \a4
+        mspel_op      \op \pos \a5 \a6
+        mspel_op      \op \pos \a7 \a8
+        mspel_op      \op \pos \a9 \a10
+        mspel_op      \op \pos \a11 \a12
+        mspel_op      \op \pos \a13 \a14
+        mspel_op      \op \pos \a15 \a16
+.endm
+
+func ff_avg_pixels16x16_rvv, zve32x
+        csrwi         vxrm, 0
+        vsetivli      zero, 16, e8, m1, ta, ma
+        mspel_op_all  l a1 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
+        mspel_op_all  l a0 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
+        vsetvli       t0, zero, e8, m8, ta, ma
+        sub           a0, a0, a2
+        vaaddu.vv     v0, v0, v16
+        neg           a2, a2
+        vaaddu.vv     v8, v8, v24
+        vsetivli      zero, 16, e8, m1, ta, ma
+        mspel_op_all  s a0 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
+
+        ret
+endfunc
+
+func ff_avg_pixels8x8_rvv, zve64x
+        csrwi         vxrm, 0
+        li            t0, 64
+        vsetivli      zero, 8, e8, mf2, ta, ma
+        vlse64.v      v16, (a1), a2
+        vlse64.v      v8, (a0), a2
+        vsetvli       zero, t0, e8, m4, ta, ma
+        vaaddu.vv     v16, v16, v8
+        vsetivli      zero, 8, e8, mf2, ta, ma
+        vsse64.v      v16, (a0), a2
+
+        ret
+endfunc
+
+func ff_vc1_unescape_buffer_rvv, zve32x
+        vsetivli       zero, 2, e8, m1, ta, ma
+        vmv.v.i        v8, -1
+        li             t4, 1
+        vmv.v.i        v12, -1
+        li             t3, -1
+        mv             t5, a2
+        blez           a1, 3f
+1:
+        vsetvli        t0, a1, e8, m4, ta, ma
+        vle8.v         v16, (a0)
+        vslideup.vi    v8, v16, 2
+        addi           t0, t0, -1 # we cannot fully process the last element
+        vslideup.vi    v12, v16, 1
+        vslide1down.vx v20, v16, t3
+        vsetvli        zero, t0, e8, m4, ta, ma
+        vmseq.vi       v0, v8, 0
+        vmseq.vi       v1, v12, 0
+        vmseq.vi       v2, v16, 3
+        vmand.mm       v0, v0, v1
+        vmsltu.vi      v3, v20, 4
+        vmand.mm       v0, v0, v2
+        vmand.mm       v0, v0, v3
+        vfirst.m       t2, v0
+        bgez           t2, 4f # found an escape byte?
+
+        vse8.v         v16, (a2)
+        addi           t2, t0, -2
+        add            a2, a2, t0
+2:
+        vslidedown.vx  v8, v16, t2
+        sub            a1, a1, t0
+        vslidedown.vi  v12, v8, 1
+        add            a0, a0, t0
+        bgtu           a1, t4, 1b // size > 1
+
+        lb             t0, (a0)
+        sb             t0, (a2) # copy last byte (cannot be escaped)
+        addi           a2, a2, 1
+3:
+        sub            a0, a2, t5
+        ret
+4:
+        vsetvli        zero, t2, e8, m4, ta, ma
+        vse8.v         v16, (a2)
+        addi           t0, t2, 1
+        add            a2, a2, t2
+        addi           t2, t2, -1
+        vsetvli        zero, t0, e8, m4, ta, ma
+        j              2b
+endfunc
diff --git a/libavcodec/riscv/vp8dsp.h b/libavcodec/riscv/vp8dsp.h
new file mode 100644
index 0000000000..971c5c0a96
--- /dev/null
+++ b/libavcodec/riscv/vp8dsp.h
@@ -0,0 +1,75 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_RISCV_VP8DSP_H
+#define AVCODEC_RISCV_VP8DSP_H
+
+#include "libavcodec/vp8dsp.h"
+
+#define VP8_LF_Y(hv, inner, opt)                                             \
+    void ff_vp8_##hv##_loop_filter16##inner##_##opt(uint8_t *dst,            \
+                                                    ptrdiff_t stride,        \
+                                                    int flim_E, int flim_I,  \
+                                                    int hev_thresh)
+
+#define VP8_LF_UV(hv, inner, opt)                                            \
+    void ff_vp8_##hv##_loop_filter8uv##inner##_##opt(uint8_t *dstU,          \
+                                                     uint8_t *dstV,          \
+                                                     ptrdiff_t stride,       \
+                                                     int flim_E, int flim_I, \
+                                                     int hev_thresh)
+
+#define VP8_LF_SIMPLE(hv, opt)                                          \
+    void ff_vp8_##hv##_loop_filter16_simple_##opt(uint8_t *dst,         \
+                                                  ptrdiff_t stride,     \
+                                                  int flim)
+
+#define VP8_LF_HV(inner, opt)                   \
+    VP8_LF_Y(h,  inner, opt);                   \
+    VP8_LF_Y(v,  inner, opt);                   \
+    VP8_LF_UV(h, inner, opt);                   \
+    VP8_LF_UV(v, inner, opt)
+
+#define VP8_LF(opt)                             \
+    VP8_LF_HV(,       opt);                     \
+    VP8_LF_HV(_inner, opt);                     \
+    VP8_LF_SIMPLE(h, opt);                      \
+    VP8_LF_SIMPLE(v, opt)
+
+#define VP8_MC(n, opt)                                                  \
+    void ff_put_vp8_##n##_##opt(uint8_t *dst, ptrdiff_t dststride,      \
+                                const uint8_t *src, ptrdiff_t srcstride,\
+                                int h, int x, int y)
+
+#define VP8_EPEL(w, opt)                        \
+    VP8_MC(pixels ## w, opt);                   \
+    VP8_MC(epel ## w ## _h4, opt);              \
+    VP8_MC(epel ## w ## _h6, opt);              \
+    VP8_MC(epel ## w ## _v4, opt);              \
+    VP8_MC(epel ## w ## _h4v4, opt);            \
+    VP8_MC(epel ## w ## _h6v4, opt);            \
+    VP8_MC(epel ## w ## _v6, opt);              \
+    VP8_MC(epel ## w ## _h4v6, opt);            \
+    VP8_MC(epel ## w ## _h6v6, opt)
+
+#define VP8_BILIN(w, opt)                       \
+    VP8_MC(bilin ## w ## _h, opt);              \
+    VP8_MC(bilin ## w ## _v, opt);              \
+    VP8_MC(bilin ## w ## _hv, opt)
+
+#endif /* AVCODEC_RISCV_VP8DSP_H */
diff --git a/libavcodec/riscv/vp8dsp_init.c b/libavcodec/riscv/vp8dsp_init.c
index af57aabb71..31e8227fa4 100644
--- a/libavcodec/riscv/vp8dsp_init.c
+++ b/libavcodec/riscv/vp8dsp_init.c
@@ -24,17 +24,90 @@
 #include "libavutil/cpu.h"
 #include "libavutil/riscv/cpu.h"
 #include "libavcodec/vp8dsp.h"
+#include "vp8dsp.h"
 
 void ff_vp8_idct_dc_add_rvv(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
 void ff_vp8_idct_dc_add4y_rvv(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride);
 void ff_vp8_idct_dc_add4uv_rvv(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride);
 
+VP8_EPEL(16, rvi);
+VP8_EPEL(8,  rvi);
+VP8_EPEL(4,  rvi);
+VP8_EPEL(16, rvv);
+VP8_EPEL(8,  rvv);
+VP8_EPEL(4,  rvv);
+
+VP8_BILIN(16, rvv);
+VP8_BILIN(8,  rvv);
+VP8_BILIN(4,  rvv);
+
+av_cold void ff_vp78dsp_init_riscv(VP8DSPContext *c)
+{
+#if HAVE_RV
+    int flags = av_get_cpu_flags();
+    if (flags & AV_CPU_FLAG_RV_MISALIGNED) {
+#if __riscv_xlen >= 64
+        c->put_vp8_epel_pixels_tab[0][0][0] = ff_put_vp8_pixels16_rvi;
+        c->put_vp8_epel_pixels_tab[1][0][0] = ff_put_vp8_pixels8_rvi;
+        c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_rvi;
+        c->put_vp8_bilinear_pixels_tab[1][0][0] = ff_put_vp8_pixels8_rvi;
+#endif
+        c->put_vp8_epel_pixels_tab[2][0][0] = ff_put_vp8_pixels4_rvi;
+        c->put_vp8_bilinear_pixels_tab[2][0][0] = ff_put_vp8_pixels4_rvi;
+    }
+#if HAVE_RVV
+    if (flags & AV_CPU_FLAG_RVV_I32 && ff_rv_vlen_least(128)) {
+        c->put_vp8_bilinear_pixels_tab[0][0][1] = ff_put_vp8_bilin16_h_rvv;
+        c->put_vp8_bilinear_pixels_tab[0][0][2] = ff_put_vp8_bilin16_h_rvv;
+        c->put_vp8_bilinear_pixels_tab[1][0][1] = ff_put_vp8_bilin8_h_rvv;
+        c->put_vp8_bilinear_pixels_tab[1][0][2] = ff_put_vp8_bilin8_h_rvv;
+        c->put_vp8_bilinear_pixels_tab[2][0][1] = ff_put_vp8_bilin4_h_rvv;
+        c->put_vp8_bilinear_pixels_tab[2][0][2] = ff_put_vp8_bilin4_h_rvv;
+
+        c->put_vp8_bilinear_pixels_tab[0][1][0] = ff_put_vp8_bilin16_v_rvv;
+        c->put_vp8_bilinear_pixels_tab[0][2][0] = ff_put_vp8_bilin16_v_rvv;
+        c->put_vp8_bilinear_pixels_tab[1][1][0] = ff_put_vp8_bilin8_v_rvv;
+        c->put_vp8_bilinear_pixels_tab[1][2][0] = ff_put_vp8_bilin8_v_rvv;
+        c->put_vp8_bilinear_pixels_tab[2][1][0] = ff_put_vp8_bilin4_v_rvv;
+        c->put_vp8_bilinear_pixels_tab[2][2][0] = ff_put_vp8_bilin4_v_rvv;
+
+        c->put_vp8_bilinear_pixels_tab[0][1][1] = ff_put_vp8_bilin16_hv_rvv;
+        c->put_vp8_bilinear_pixels_tab[0][1][2] = ff_put_vp8_bilin16_hv_rvv;
+        c->put_vp8_bilinear_pixels_tab[0][2][1] = ff_put_vp8_bilin16_hv_rvv;
+        c->put_vp8_bilinear_pixels_tab[0][2][2] = ff_put_vp8_bilin16_hv_rvv;
+        c->put_vp8_bilinear_pixels_tab[1][1][1] = ff_put_vp8_bilin8_hv_rvv;
+        c->put_vp8_bilinear_pixels_tab[1][1][2] = ff_put_vp8_bilin8_hv_rvv;
+        c->put_vp8_bilinear_pixels_tab[1][2][1] = ff_put_vp8_bilin8_hv_rvv;
+        c->put_vp8_bilinear_pixels_tab[1][2][2] = ff_put_vp8_bilin8_hv_rvv;
+        c->put_vp8_bilinear_pixels_tab[2][1][1] = ff_put_vp8_bilin4_hv_rvv;
+        c->put_vp8_bilinear_pixels_tab[2][1][2] = ff_put_vp8_bilin4_hv_rvv;
+        c->put_vp8_bilinear_pixels_tab[2][2][1] = ff_put_vp8_bilin4_hv_rvv;
+        c->put_vp8_bilinear_pixels_tab[2][2][2] = ff_put_vp8_bilin4_hv_rvv;
+
+        c->put_vp8_epel_pixels_tab[0][0][2] = ff_put_vp8_epel16_h6_rvv;
+        c->put_vp8_epel_pixels_tab[1][0][2] = ff_put_vp8_epel8_h6_rvv;
+        c->put_vp8_epel_pixels_tab[2][0][2] = ff_put_vp8_epel4_h6_rvv;
+        c->put_vp8_epel_pixels_tab[0][0][1] = ff_put_vp8_epel16_h4_rvv;
+        c->put_vp8_epel_pixels_tab[1][0][1] = ff_put_vp8_epel8_h4_rvv;
+        c->put_vp8_epel_pixels_tab[2][0][1] = ff_put_vp8_epel4_h4_rvv;
+
+        c->put_vp8_epel_pixels_tab[0][2][0] = ff_put_vp8_epel16_v6_rvv;
+        c->put_vp8_epel_pixels_tab[1][2][0] = ff_put_vp8_epel8_v6_rvv;
+        c->put_vp8_epel_pixels_tab[2][2][0] = ff_put_vp8_epel4_v6_rvv;
+        c->put_vp8_epel_pixels_tab[0][1][0] = ff_put_vp8_epel16_v4_rvv;
+        c->put_vp8_epel_pixels_tab[1][1][0] = ff_put_vp8_epel8_v4_rvv;
+        c->put_vp8_epel_pixels_tab[2][1][0] = ff_put_vp8_epel4_v4_rvv;
+    }
+#endif
+#endif
+}
+
 av_cold void ff_vp8dsp_init_riscv(VP8DSPContext *c)
 {
 #if HAVE_RVV
     int flags = av_get_cpu_flags();
 
-    if (flags & AV_CPU_FLAG_RVV_I32 && ff_get_rv_vlenb() >= 16) {
+    if (flags & AV_CPU_FLAG_RVV_I32 && ff_rv_vlen_least(128)) {
         c->vp8_idct_dc_add = ff_vp8_idct_dc_add_rvv;
         c->vp8_idct_dc_add4y = ff_vp8_idct_dc_add4y_rvv;
         if (flags & AV_CPU_FLAG_RVB_ADDR) {
diff --git a/libavcodec/riscv/vp8dsp_rvi.S b/libavcodec/riscv/vp8dsp_rvi.S
new file mode 100644
index 0000000000..50ba4f293f
--- /dev/null
+++ b/libavcodec/riscv/vp8dsp_rvi.S
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/riscv/asm.S"
+
+#if __riscv_xlen >= 64
+func ff_put_vp8_pixels16_rvi
+1:
+        addi          a4, a4, -1
+        ld            t0, (a2)
+        ld            t1, 8(a2)
+        sd            t0, (a0)
+        sd            t1, 8(a0)
+        add           a2, a2, a3
+        add           a0, a0, a1
+        bnez          a4, 1b
+
+        ret
+endfunc
+
+func ff_put_vp8_pixels8_rvi
+1:
+        addi          a4, a4, -1
+        ld            t0, (a2)
+        sd            t0, (a0)
+        add           a2, a2, a3
+        add           a0, a0, a1
+        bnez          a4, 1b
+
+        ret
+endfunc
+#endif
+
+func ff_put_vp8_pixels4_rvi
+1:
+        addi          a4, a4, -1
+        lw            t0, (a2)
+        sw            t0, (a0)
+        add           a2, a2, a3
+        add           a0, a0, a1
+        bnez          a4, 1b
+
+        ret
+endfunc
diff --git a/libavcodec/riscv/vp8dsp_rvv.S b/libavcodec/riscv/vp8dsp_rvv.S
index 8a0773f964..0ba9fa443d 100644
--- a/libavcodec/riscv/vp8dsp_rvv.S
+++ b/libavcodec/riscv/vp8dsp_rvv.S
@@ -20,6 +20,28 @@
 
 #include "libavutil/riscv/asm.S"
 
+.macro vsetvlstatic8 len
+.if \len <= 4
+        vsetivli        zero, \len, e8, mf4, ta, ma
+.elseif \len <= 8
+        vsetivli        zero, \len, e8, mf2, ta, ma
+.elseif \len <= 16
+        vsetivli        zero, \len, e8, m1, ta, ma
+.elseif \len <= 31
+        vsetivli        zero, \len, e8, m2, ta, ma
+.endif
+.endm
+
+.macro vsetvlstatic16 len
+.if \len <= 4
+        vsetivli        zero, \len, e16, mf2, ta, ma
+.elseif \len <= 8
+        vsetivli        zero, \len, e16, m1, ta, ma
+.elseif \len <= 16
+        vsetivli        zero, \len, e16, m2, ta, ma
+.endif
+.endm
+
 .macro vp8_idct_dc_add
         vlse32.v      v0, (a0), a2
         lh            a5, 0(a1)
@@ -71,3 +93,155 @@ func ff_vp8_idct_dc_add4uv_rvv, zve32x
 
         ret
 endfunc
+
+.macro bilin_load dst len type mn
+.ifc \type,v
+        add             t5, a2, a3
+.else
+        addi            t5, a2, 1
+.endif
+        vle8.v          \dst, (a2)
+        vle8.v          v2, (t5)
+        vwmulu.vx       v28, \dst, t1
+        vwmaccu.vx      v28, \mn, v2
+        vwaddu.wx       v24, v28, t4
+        vnsra.wi        \dst, v24, 3
+.endm
+
+.macro put_vp8_bilin_h_v len type mn
+func ff_put_vp8_bilin\len\()_\type\()_rvv, zve32x
+        vsetvlstatic8   \len
+        li              t1, 8
+        li              t4, 4
+        sub             t1, t1, \mn
+1:
+        addi            a4, a4, -1
+        bilin_load      v0, \len, \type, \mn
+        vse8.v          v0, (a0)
+        add             a2, a2, a3
+        add             a0, a0, a1
+        bnez            a4, 1b
+
+        ret
+endfunc
+.endm
+
+.macro put_vp8_bilin_hv len
+func ff_put_vp8_bilin\len\()_hv_rvv, zve32x
+        vsetvlstatic8   \len
+        li              t3, 8
+        sub             t1, t3, a5
+        sub             t2, t3, a6
+        li              t4, 4
+        bilin_load      v4, \len, h, a5
+        add             a2, a2, a3
+1:
+        addi            a4, a4, -1
+        vwmulu.vx       v20, v4, t2
+        bilin_load      v4, \len, h, a5
+        vwmaccu.vx      v20, a6, v4
+        vwaddu.wx       v24, v20, t4
+        vnsra.wi        v0, v24, 3
+        vse8.v          v0, (a0)
+        add             a2, a2, a3
+        add             a0, a0, a1
+        bnez            a4, 1b
+
+        ret
+endfunc
+.endm
+
+const subpel_filters
+        .byte 0,  -6, 123,  12,  -1, 0
+        .byte 2, -11, 108,  36,  -8, 1
+        .byte 0,  -9,  93,  50,  -6, 0
+        .byte 3, -16,  77,  77, -16, 3
+        .byte 0,  -6,  50,  93,  -9, 0
+        .byte 1,  -8,  36, 108, -11, 2
+        .byte 0,  -1,  12, 123,  -6, 0
+endconst
+
+.macro epel_filter size type
+        lla             t2, subpel_filters
+.ifc \type,v
+        addi            t0, a6, -1
+.else
+        addi            t0, a5, -1
+.endif
+        li              t1, 6
+        mul             t0, t0, t1
+        add             t0, t0, t2
+        .irp n,1,2,3,4
+        lb              t\n, \n(t0)
+        .endr
+.ifc \size,6
+        lb              t5, 5(t0)
+        lb              t0, (t0)
+.endif
+.endm
+
+.macro epel_load dst len size type
+.ifc \type,v
+        mv              a5, a3
+.else
+        li              a5, 1
+.endif
+        sub             t6, a2, a5
+        add             a7, a2, a5
+
+        vle8.v          v24, (a2)
+        vle8.v          v22, (t6)
+        vle8.v          v26, (a7)
+        add             a7, a7, a5
+        vle8.v          v28, (a7)
+        vwmulu.vx       v16, v24, t2
+        vwmulu.vx       v20, v26, t3
+.ifc \size,6
+        sub             t6, t6, a5
+        add             a7, a7, a5
+        vle8.v          v24, (t6)
+        vle8.v          v26, (a7)
+        vwmaccu.vx      v16, t0, v24
+        vwmaccu.vx      v16, t5, v26
+.endif
+        li              t6, 64
+        vwmaccsu.vx     v16, t1, v22
+        vwmaccsu.vx     v16, t4, v28
+        vwadd.wx        v16, v16, t6
+        vsetvlstatic16  \len
+        vwadd.vv        v24, v16, v20
+        vnsra.wi        v24, v24, 7
+        vmax.vx         v24, v24, zero
+        vsetvlstatic8   \len
+        vnclipu.wi      \dst, v24, 0
+.endm
+
+.macro epel_load_inc dst len size type
+        epel_load       \dst \len \size \type
+        add             a2, a2, a3
+.endm
+
+.macro epel len size type
+func ff_put_vp8_epel\len\()_\type\()\size\()_rvv, zve32x
+        epel_filter     \size \type
+        vsetvlstatic8   \len
+1:
+        addi            a4, a4, -1
+        epel_load_inc   v30 \len \size \type
+        vse8.v          v30, (a0)
+        add             a0, a0, a1
+        bnez            a4, 1b
+
+        ret
+endfunc
+.endm
+
+.irp len,16,8,4
+put_vp8_bilin_h_v \len h a5
+put_vp8_bilin_h_v \len v a6
+put_vp8_bilin_hv \len
+epel \len 6 h
+epel \len 4 h
+epel \len 6 v
+epel \len 4 v
+.endr
diff --git a/libavcodec/riscv/vp9_intra_rvi.S b/libavcodec/riscv/vp9_intra_rvi.S
new file mode 100644
index 0000000000..16b6bdb25a
--- /dev/null
+++ b/libavcodec/riscv/vp9_intra_rvi.S
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/riscv/asm.S"
+
+#if __riscv_xlen >= 64
+func ff_v_32x32_rvi
+        ld           t0, (a3)
+        ld           t1, 8(a3)
+        ld           t2, 16(a3)
+        ld           t3, 24(a3)
+        .rept 16
+        add          a7, a0, a1
+        sd           t0, (a0)
+        sd           t1, 8(a0)
+        sd           t2, 16(a0)
+        sd           t3, 24(a0)
+        sh1add       a0, a1, a0
+        sd           t0, (a7)
+        sd           t1, 8(a7)
+        sd           t2, 16(a7)
+        sd           t3, 24(a7)
+        .endr
+
+        ret
+endfunc
+
+func ff_v_16x16_rvi
+        ld           t0, (a3)
+        ld           t1, 8(a3)
+        .rept 8
+        add          a7, a0, a1
+        sd           t0, (a0)
+        sd           t1, 8(a0)
+        sh1add       a0, a1, a0
+        sd           t0, (a7)
+        sd           t1, 8(a7)
+        .endr
+
+        ret
+endfunc
+
+func ff_v_8x8_rvi
+        ld           t0, (a3)
+        .rept 4
+        add          a7, a0, a1
+        sd           t0, (a0)
+        sh1add       a0, a1, a0
+        sd           t0, (a7)
+        .endr
+
+        ret
+endfunc
+#endif
diff --git a/libavcodec/riscv/vp9_intra_rvv.S b/libavcodec/riscv/vp9_intra_rvv.S
new file mode 100644
index 0000000000..beeb1ff88c
--- /dev/null
+++ b/libavcodec/riscv/vp9_intra_rvv.S
@@ -0,0 +1,293 @@
+/*
+ * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/riscv/asm.S"
+
+.macro avgdc size
+        vwredsumu.vs v16, v8, v16
+        vsetivli     zero, 1, e16, m1, ta, ma
+        vmv.x.s      t1, v16
+        addi         t1, t1, 1 << (\size - 1)
+        srai         t1, t1, \size
+.endm
+
+.macro getdc type size
+.ifc \type,top
+        vmv.v.x      v16, zero
+        vle8.v       v8, (a3)
+        avgdc        \size
+.else
+.ifc \type,left
+        vmv.v.x      v16, zero
+        vle8.v       v8, (a2)
+        avgdc        \size
+.else
+.ifc \type,dc
+        vmv.v.x      v16, zero
+        vle8.v       v8, (a2)
+        vwredsumu.vs v16, v8, v16
+        vle8.v       v8, (a3)
+        avgdc        \size
+.else
+        li           t1, \type
+.endif
+.endif
+.endif
+.endm
+
+.macro dc_e32 type size n restore
+.ifc \size,32
+        li           t0, 32
+        vsetvli      zero, t0, e8, m2, ta, ma
+.else
+        vsetivli     zero, 16, e8, m1, ta, ma
+.endif
+        getdc        \type \n
+
+.if \restore == 1 && \size == 32
+        vsetvli      zero, t0, e8, m2, ta, ma
+.elseif \restore == 1 && \size == 16
+        vsetivli     zero, 16, e8, m1, ta, ma
+.endif
+        vmv.v.x      v0, t1
+
+        .rept \size
+        vse8.v       v0, (a0)
+        add          a0, a0, a1
+        .endr
+
+        ret
+.endm
+
+.macro dc_e64 type size n restore
+        vsetivli     zero, 8, e8, mf2, ta, ma
+        getdc        \type \n
+
+        li           t0, 64
+        vsetvli      zero, t0, e8, m4, ta, ma
+        vmv.v.x      v0, t1
+        vsetivli     zero, 8, e8, mf2, ta, ma
+        vsse64.v     v0, (a0), a1
+
+        ret
+.endm
+
+.macro func_dc name size type n restore ext
+func ff_\()\name\()_\()\size\()x\size\()_rvv, \ext
+.if \size == 8
+        dc_e64 \type \size \n \restore
+.else
+        dc_e32 \type \size \n \restore
+.endif
+endfunc
+.endm
+
+func_dc dc_127   32  127  0  0  zve32x
+func_dc dc_127   16  127  0  0  zve32x
+func_dc dc_127   8   127  0  0  zve64x
+func_dc dc_128   32  128  0  0  zve32x
+func_dc dc_128   16  128  0  0  zve32x
+func_dc dc_128   8   128  0  0  zve64x
+func_dc dc_129   32  129  0  0  zve32x
+func_dc dc_129   16  129  0  0  zve32x
+func_dc dc_129   8   129  0  0  zve64x
+func_dc dc       32  dc   6  1  zve32x
+func_dc dc       16  dc   5  1  zve32x
+func_dc dc       8   dc   4  0  zve64x
+func_dc dc_left  32  left 5  1  zve32x
+func_dc dc_left  16  left 4  1  zve32x
+func_dc dc_left  8   left 3  0  zve64x
+func_dc dc_top   32  top  5  1  zve32x
+func_dc dc_top   16  top  4  1  zve32x
+func_dc dc_top   8   top  3  0  zve64x
+
+func ff_h_32x32_rvv, zve32x
+        li           t0, 32
+        addi         a2, a2, 31
+        vsetvli      zero, t0, e8, m2, ta, ma
+
+        .rept 2
+        .irp n, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
+        lbu          t1, (a2)
+        addi         a2, a2, -1
+        vmv.v.x      v\n, t1
+        .endr
+        .irp n, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
+        vse8.v       v\n, (a0)
+        add          a0, a0, a1
+        .endr
+        .endr
+
+        ret
+endfunc
+
+func ff_h_16x16_rvv, zve32x
+        addi         a2, a2, 15
+        vsetivli     zero, 16, e8, m1, ta, ma
+
+        .irp n, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23
+        lbu          t1, (a2)
+        addi         a2, a2, -1
+        vmv.v.x      v\n, t1
+        .endr
+        .irp n, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22
+        vse8.v       v\n, (a0)
+        add          a0, a0, a1
+        .endr
+        vse8.v       v23, (a0)
+
+        ret
+endfunc
+
+func ff_h_8x8_rvv, zve32x
+        addi         a2, a2, 7
+        vsetivli     zero, 8, e8, mf2, ta, ma
+
+        .irp n, 8, 9, 10, 11, 12, 13, 14, 15
+        lbu          t1, (a2)
+        addi         a2, a2, -1
+        vmv.v.x      v\n, t1
+        .endr
+        .irp n, 8, 9, 10, 11, 12, 13, 14
+        vse8.v       v\n, (a0)
+        add          a0, a0, a1
+        .endr
+        vse8.v       v15, (a0)
+
+        ret
+endfunc
+
+.macro tm_sum4 dst1, dst2, dst3, dst4, top, n1
+        lbu          t1, \n1(a2)
+        lbu          t2, (\n1-1)(a2)
+        lbu          t3, (\n1-2)(a2)
+        lbu          t4, (\n1-3)(a2)
+        sub          t1, t1, a4
+        sub          t2, t2, a4
+        sub          t3, t3, a4
+        sub          t4, t4, a4
+        vadd.vx      \dst1, \top, t1
+        vadd.vx      \dst2, \top, t2
+        vadd.vx      \dst3, \top, t3
+        vadd.vx      \dst4, \top, t4
+.endm
+
+func ff_tm_32x32_rvv, zve32x
+        lbu          a4, -1(a3)
+        li           t5, 32
+
+        .irp offset, 31, 23, 15, 7
+        vsetvli      zero, t5, e16, m4, ta, ma
+        vle8.v       v8, (a3)
+        vzext.vf2    v28, v8
+
+        tm_sum4      v0, v4, v8, v12, v28, \offset
+        tm_sum4      v16, v20, v24, v28, v28, (\offset-4)
+
+        .irp n, 0, 4, 8, 12, 16, 20, 24, 28
+        vmax.vx      v\n, v\n, zero
+        .endr
+
+        vsetvli      zero, zero, e8, m2, ta, ma
+        .irp n, 0, 4, 8, 12, 16, 20, 24, 28
+        vnclipu.wi   v\n, v\n, 0
+        vse8.v       v\n, (a0)
+        add          a0, a0, a1
+        .endr
+        .endr
+
+        ret
+endfunc
+
+func ff_tm_16x16_rvv, zve32x
+        vsetivli      zero, 16, e16, m2, ta, ma
+        vle8.v        v8, (a3)
+        vzext.vf2     v30, v8
+        lbu           a4, -1(a3)
+
+        tm_sum4       v0, v2, v4, v6, v30, 15
+        tm_sum4       v8, v10, v12, v14, v30, 11
+        tm_sum4       v16, v18, v20, v22, v30, 7
+        tm_sum4       v24, v26, v28, v30, v30, 3
+
+        .irp n, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
+        vmax.vx      v\n, v\n, zero
+        .endr
+
+        vsetvli      zero, zero, e8, m1, ta, ma
+        .irp n, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28
+        vnclipu.wi   v\n, v\n, 0
+        vse8.v       v\n, (a0)
+        add          a0, a0, a1
+        .endr
+        vnclipu.wi   v30, v30, 0
+        vse8.v       v30, (a0)
+
+        ret
+endfunc
+
+func ff_tm_8x8_rvv, zve32x
+        vsetivli     zero, 8, e16, m1, ta, ma
+        vle8.v       v8, (a3)
+        vzext.vf2    v28, v8
+        lbu          a4, -1(a3)
+
+        tm_sum4      v16, v17, v18, v19, v28, 7
+        tm_sum4      v20, v21, v22, v23, v28, 3
+
+        .irp n, 16, 17, 18, 19, 20, 21, 22, 23
+        vmax.vx      v\n, v\n, zero
+        .endr
+
+        vsetvli      zero, zero, e8, mf2, ta, ma
+        .irp n, 16, 17, 18, 19, 20, 21, 22
+        vnclipu.wi   v\n, v\n, 0
+        vse8.v       v\n, (a0)
+        add          a0, a0, a1
+        .endr
+        vnclipu.wi   v24, v23, 0
+        vse8.v       v24, (a0)
+
+        ret
+endfunc
+
+func ff_tm_4x4_rvv, zve32x
+        vsetivli     zero, 4, e16, mf2, ta, ma
+        vle8.v       v8, (a3)
+        vzext.vf2    v28, v8
+        lbu          a4, -1(a3)
+
+        tm_sum4      v16, v17, v18, v19, v28, 3
+
+        .irp n, 16, 17, 18, 19
+        vmax.vx      v\n, v\n, zero
+        .endr
+
+        vsetvli      zero, zero, e8, mf4, ta, ma
+        .irp n, 16, 17, 18
+        vnclipu.wi   v\n, v\n, 0
+        vse8.v       v\n, (a0)
+        add          a0, a0, a1
+        .endr
+        vnclipu.wi   v24, v19, 0
+        vse8.v       v24, (a0)
+
+        ret
+endfunc
diff --git a/libavcodec/riscv/vp9_mc_rvi.S b/libavcodec/riscv/vp9_mc_rvi.S
new file mode 100644
index 0000000000..0db14e83c7
--- /dev/null
+++ b/libavcodec/riscv/vp9_mc_rvi.S
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/riscv/asm.S"
+
+#if __riscv_xlen >= 64
+func ff_copy64_rvi
+1:
+        addi          a4, a4, -1
+        ld            t0, (a2)
+        ld            t1, 8(a2)
+        ld            t2, 16(a2)
+        ld            t3, 24(a2)
+        ld            t4, 32(a2)
+        ld            t5, 40(a2)
+        ld            t6, 48(a2)
+        ld            a7, 56(a2)
+        sd            t0, (a0)
+        sd            t1, 8(a0)
+        sd            t2, 16(a0)
+        sd            t3, 24(a0)
+        sd            t4, 32(a0)
+        sd            t5, 40(a0)
+        sd            t6, 48(a0)
+        sd            a7, 56(a0)
+        add           a2, a2, a3
+        add           a0, a0, a1
+        bnez          a4, 1b
+
+        ret
+endfunc
+
+func ff_copy32_rvi
+1:
+        addi          a4, a4, -1
+        ld            t0, (a2)
+        ld            t1, 8(a2)
+        ld            t2, 16(a2)
+        ld            t3, 24(a2)
+        sd            t0, (a0)
+        sd            t1, 8(a0)
+        sd            t2, 16(a0)
+        sd            t3, 24(a0)
+        add           a2, a2, a3
+        add           a0, a0, a1
+        bnez          a4, 1b
+
+        ret
+endfunc
+
+func ff_copy16_rvi
+1:
+        addi          a4, a4, -1
+        ld            t0, (a2)
+        ld            t1, 8(a2)
+        sd            t0, (a0)
+        sd            t1, 8(a0)
+        add           a2, a2, a3
+        add           a0, a0, a1
+        bnez          a4, 1b
+
+        ret
+endfunc
+
+func ff_copy8_rvi
+1:
+        addi          a4, a4, -1
+        ld            t0, (a2)
+        sd            t0, (a0)
+        add           a2, a2, a3
+        add           a0, a0, a1
+        bnez          a4, 1b
+
+        ret
+endfunc
+#endif
+
+func ff_copy4_rvi
+1:
+        addi          a4, a4, -1
+        lw            t0, (a2)
+        sw            t0, (a0)
+        add           a2, a2, a3
+        add           a0, a0, a1
+        bnez          a4, 1b
+
+        ret
+endfunc
diff --git a/libavcodec/riscv/vp9_mc_rvv.S b/libavcodec/riscv/vp9_mc_rvv.S
new file mode 100644
index 0000000000..7811cd9928
--- /dev/null
+++ b/libavcodec/riscv/vp9_mc_rvv.S
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/riscv/asm.S"
+
+.macro vsetvlstatic8 len an maxlen mn=m4
+.if \len == 4
+        vsetivli        zero, \len, e8, mf4, ta, ma
+.elseif \len == 8
+        vsetivli        zero, \len, e8, mf2, ta, ma
+.elseif \len == 16
+        vsetivli        zero, \len, e8, m1, ta, ma
+.elseif \len == 32
+        li              \an, \len
+        vsetvli         zero, \an, e8, m2, ta, ma
+.elseif \len == 64
+        li              \an, \maxlen
+        vsetvli         zero, \an, e8, \mn, ta, ma
+.endif
+.endm
+
+.macro copy_avg len
+func ff_avg\len\()_rvv, zve32x
+        csrwi           vxrm, 0
+        vsetvlstatic8   \len t0 64
+1:
+        vle8.v          v8, (a2)
+        vle8.v          v16, (a0)
+        vaaddu.vv       v8, v8, v16
+        addi            a4, a4, -1
+        vse8.v          v8, (a0)
+        add             a2, a2, a3
+        add             a0, a0, a1
+        bnez            a4, 1b
+        ret
+endfunc
+.endm
+
+.irp len, 64, 32, 16, 8, 4
+        copy_avg \len
+.endr
diff --git a/libavcodec/riscv/vp9dsp.h b/libavcodec/riscv/vp9dsp.h
new file mode 100644
index 0000000000..79330b4968
--- /dev/null
+++ b/libavcodec/riscv/vp9dsp.h
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_RISCV_VP9DSP_H
+#define AVCODEC_RISCV_VP9DSP_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+void ff_dc_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+                     const uint8_t *a);
+void ff_dc_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+                     const uint8_t *a);
+void ff_dc_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+                   const uint8_t *a);
+void ff_dc_top_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+                         const uint8_t *a);
+void ff_dc_top_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+                         const uint8_t *a);
+void ff_dc_top_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+                       const uint8_t *a);
+void ff_dc_left_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+                          const uint8_t *a);
+void ff_dc_left_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+                          const uint8_t *a);
+void ff_dc_left_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+                        const uint8_t *a);
+void ff_dc_127_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+                         const uint8_t *a);
+void ff_dc_127_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+                         const uint8_t *a);
+void ff_dc_127_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+                       const uint8_t *a);
+void ff_dc_128_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+                         const uint8_t *a);
+void ff_dc_128_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+                         const uint8_t *a);
+void ff_dc_128_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+                       const uint8_t *a);
+void ff_dc_129_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+                         const uint8_t *a);
+void ff_dc_129_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+                         const uint8_t *a);
+void ff_dc_129_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+                       const uint8_t *a);
+void ff_v_32x32_rvi(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+                    const uint8_t *a);
+void ff_v_16x16_rvi(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+                    const uint8_t *a);
+void ff_v_8x8_rvi(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+                  const uint8_t *a);
+void ff_h_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+                    const uint8_t *a);
+void ff_h_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+                    const uint8_t *a);
+void ff_h_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+                  const uint8_t *a);
+void ff_tm_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+                     const uint8_t *a);
+void ff_tm_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+                     const uint8_t *a);
+void ff_tm_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+                   const uint8_t *a);
+void ff_tm_4x4_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+                   const uint8_t *a);
+
+#define VP9_8TAP_RISCV_RVV_FUNC(SIZE, type, type_idx)                         \
+void ff_put_8tap_##type##_##SIZE##h_rvv(uint8_t *dst, ptrdiff_t dststride,   \
+                                        const uint8_t *src,                  \
+                                        ptrdiff_t srcstride,                 \
+                                        int h, int mx, int my);              \
+                                                                             \
+void ff_put_8tap_##type##_##SIZE##v_rvv(uint8_t *dst, ptrdiff_t dststride,   \
+                                        const uint8_t *src,                  \
+                                        ptrdiff_t srcstride,                 \
+                                        int h, int mx, int my);              \
+                                                                             \
+void ff_put_8tap_##type##_##SIZE##hv_rvv(uint8_t *dst, ptrdiff_t dststride,  \
+                                         const uint8_t *src,                 \
+                                         ptrdiff_t srcstride,                \
+                                         int h, int mx, int my);             \
+                                                                             \
+void ff_avg_8tap_##type##_##SIZE##h_rvv(uint8_t *dst, ptrdiff_t dststride,   \
+                                        const uint8_t *src,                  \
+                                        ptrdiff_t srcstride,                 \
+                                        int h, int mx, int my);              \
+                                                                             \
+void ff_avg_8tap_##type##_##SIZE##v_rvv(uint8_t *dst, ptrdiff_t dststride,   \
+                                        const uint8_t *src,                  \
+                                        ptrdiff_t srcstride,                 \
+                                        int h, int mx, int my);              \
+                                                                             \
+void ff_avg_8tap_##type##_##SIZE##hv_rvv(uint8_t *dst, ptrdiff_t dststride,  \
+                                         const uint8_t *src,                 \
+                                         ptrdiff_t srcstride,                \
+                                         int h, int mx, int my);
+
+#define VP9_BILINEAR_RISCV_RVV_FUNC(SIZE)                                   \
+void ff_put_bilin_##SIZE##h_rvv(uint8_t *dst, ptrdiff_t dststride,         \
+                                const uint8_t *src, ptrdiff_t srcstride,   \
+                                int h, int mx, int my);                    \
+                                                                           \
+void ff_put_bilin_##SIZE##v_rvv(uint8_t *dst, ptrdiff_t dststride,         \
+                                const uint8_t *src, ptrdiff_t srcstride,   \
+                                int h, int mx, int my);                    \
+                                                                           \
+void ff_put_bilin_##SIZE##hv_rvv(uint8_t *dst, ptrdiff_t dststride,        \
+                                 const uint8_t *src, ptrdiff_t srcstride,  \
+                                 int h, int mx, int my);                   \
+                                                                           \
+void ff_avg_bilin_##SIZE##h_rvv(uint8_t *dst, ptrdiff_t dststride,         \
+                                const uint8_t *src, ptrdiff_t srcstride,   \
+                                int h, int mx, int my);                    \
+                                                                           \
+void ff_avg_bilin_##SIZE##v_rvv(uint8_t *dst, ptrdiff_t dststride,         \
+                                const uint8_t *src, ptrdiff_t srcstride,   \
+                                int h, int mx, int my);                    \
+                                                                           \
+void ff_avg_bilin_##SIZE##hv_rvv(uint8_t *dst, ptrdiff_t dststride,        \
+                                 const uint8_t *src, ptrdiff_t srcstride,  \
+                                 int h, int mx, int my);
+
+#define VP9_COPY_AVG_RISCV_RVV_FUNC(SIZE)                           \
+void ff_copy##SIZE##_rvv(uint8_t *dst, ptrdiff_t dststride,        \
+                         const uint8_t *src, ptrdiff_t srcstride,  \
+                         int h, int mx, int my);                   \
+                                                                   \
+void ff_avg##SIZE##_rvv(uint8_t *dst, ptrdiff_t dststride,         \
+                        const uint8_t *src, ptrdiff_t srcstride,   \
+                        int h, int mx, int my);
+
+VP9_8TAP_RISCV_RVV_FUNC(64, regular, FILTER_8TAP_REGULAR);
+VP9_8TAP_RISCV_RVV_FUNC(32, regular, FILTER_8TAP_REGULAR);
+VP9_8TAP_RISCV_RVV_FUNC(16, regular, FILTER_8TAP_REGULAR);
+VP9_8TAP_RISCV_RVV_FUNC(8, regular, FILTER_8TAP_REGULAR);
+VP9_8TAP_RISCV_RVV_FUNC(4, regular, FILTER_8TAP_REGULAR);
+
+VP9_8TAP_RISCV_RVV_FUNC(64, sharp, FILTER_8TAP_SHARP);
+VP9_8TAP_RISCV_RVV_FUNC(32, sharp, FILTER_8TAP_SHARP);
+VP9_8TAP_RISCV_RVV_FUNC(16, sharp, FILTER_8TAP_SHARP);
+VP9_8TAP_RISCV_RVV_FUNC(8, sharp, FILTER_8TAP_SHARP);
+VP9_8TAP_RISCV_RVV_FUNC(4, sharp, FILTER_8TAP_SHARP);
+
+VP9_8TAP_RISCV_RVV_FUNC(64, smooth, FILTER_8TAP_SMOOTH);
+VP9_8TAP_RISCV_RVV_FUNC(32, smooth, FILTER_8TAP_SMOOTH);
+VP9_8TAP_RISCV_RVV_FUNC(16, smooth, FILTER_8TAP_SMOOTH);
+VP9_8TAP_RISCV_RVV_FUNC(8, smooth, FILTER_8TAP_SMOOTH);
+VP9_8TAP_RISCV_RVV_FUNC(4, smooth, FILTER_8TAP_SMOOTH);
+
+VP9_BILINEAR_RISCV_RVV_FUNC(64);
+VP9_BILINEAR_RISCV_RVV_FUNC(32);
+VP9_BILINEAR_RISCV_RVV_FUNC(16);
+VP9_BILINEAR_RISCV_RVV_FUNC(8);
+VP9_BILINEAR_RISCV_RVV_FUNC(4);
+
+VP9_COPY_AVG_RISCV_RVV_FUNC(64);
+VP9_COPY_AVG_RISCV_RVV_FUNC(32);
+VP9_COPY_AVG_RISCV_RVV_FUNC(16);
+VP9_COPY_AVG_RISCV_RVV_FUNC(8);
+VP9_COPY_AVG_RISCV_RVV_FUNC(4);
+
+#define VP9_COPY_RISCV_RVI_FUNC(SIZE)                           \
+void ff_copy##SIZE##_rvi(uint8_t *dst, ptrdiff_t dststride,        \
+                         const uint8_t *src, ptrdiff_t srcstride,  \
+                         int h, int mx, int my);
+
+VP9_COPY_RISCV_RVI_FUNC(64);
+VP9_COPY_RISCV_RVI_FUNC(32);
+VP9_COPY_RISCV_RVI_FUNC(16);
+VP9_COPY_RISCV_RVI_FUNC(8);
+VP9_COPY_RISCV_RVI_FUNC(4);
+
+#undef VP9_8TAP_RISCV_RVV_FUNC
+#undef VP9_BILINEAR_RISCV_RVV_FUNC
+#undef VP9_COPY_AVG_RISCV_RVV_FUNC
+
+#endif  // #ifndef AVCODEC_RISCV_VP9DSP_H
diff --git a/libavcodec/riscv/vp9dsp_init.c b/libavcodec/riscv/vp9dsp_init.c
new file mode 100644
index 0000000000..6bfe23563a
--- /dev/null
+++ b/libavcodec/riscv/vp9dsp_init.c
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lervvr General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lervvr General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lervvr General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/riscv/cpu.h"
+#include "libavcodec/vp9dsp.h"
+#include "vp9dsp.h"
+
+static av_cold void vp9dsp_mc_init_riscv(VP9DSPContext *dsp, int bpp)
+{
+#if HAVE_RV
+    int flags = av_get_cpu_flags();
+
+# if __riscv_xlen >= 64
+    if (bpp == 8 && (flags & AV_CPU_FLAG_RV_MISALIGNED)) {
+
+#define init_fpel(idx1, sz)                                           \
+    dsp->mc[idx1][FILTER_8TAP_SMOOTH ][0][0][0] = ff_copy##sz##_rvi;  \
+    dsp->mc[idx1][FILTER_8TAP_REGULAR][0][0][0] = ff_copy##sz##_rvi;  \
+    dsp->mc[idx1][FILTER_8TAP_SHARP  ][0][0][0] = ff_copy##sz##_rvi;  \
+    dsp->mc[idx1][FILTER_BILINEAR    ][0][0][0] = ff_copy##sz##_rvi
+
+    init_fpel(0, 64);
+    init_fpel(1, 32);
+    init_fpel(2, 16);
+    init_fpel(3, 8);
+    init_fpel(4, 4);
+
+#undef init_fpel
+    }
+# endif
+
+#if HAVE_RVV
+    if (bpp == 8 && (flags & AV_CPU_FLAG_RVV_I32) && ff_rv_vlen_least(128)) {
+
+#define init_fpel(idx1, sz)                                           \
+    dsp->mc[idx1][FILTER_8TAP_SMOOTH ][1][0][0] = ff_avg##sz##_rvv;  \
+    dsp->mc[idx1][FILTER_8TAP_REGULAR][1][0][0] = ff_avg##sz##_rvv;  \
+    dsp->mc[idx1][FILTER_8TAP_SHARP  ][1][0][0] = ff_avg##sz##_rvv;  \
+    dsp->mc[idx1][FILTER_BILINEAR    ][1][0][0] = ff_avg##sz##_rvv
+
+    init_fpel(0, 64);
+    init_fpel(1, 32);
+    init_fpel(2, 16);
+    init_fpel(3, 8);
+    init_fpel(4, 4);
+
+#undef init_fpel
+    }
+#endif
+#endif
+}
+
+static av_cold void vp9dsp_intrapred_init_riscv(VP9DSPContext *dsp, int bpp)
+{
+#if HAVE_RV
+    int flags = av_get_cpu_flags();
+
+# if __riscv_xlen >= 64
+    if (bpp == 8 && (flags & AV_CPU_FLAG_RVB_ADDR)) {
+        dsp->intra_pred[TX_32X32][VERT_PRED] = ff_v_32x32_rvi;
+        dsp->intra_pred[TX_16X16][VERT_PRED] = ff_v_16x16_rvi;
+        dsp->intra_pred[TX_8X8][VERT_PRED] = ff_v_8x8_rvi;
+    }
+# endif
+#if HAVE_RVV
+    if (bpp == 8 && flags & AV_CPU_FLAG_RVV_I64 && ff_rv_vlen_least(128)) {
+        dsp->intra_pred[TX_8X8][DC_PRED] = ff_dc_8x8_rvv;
+        dsp->intra_pred[TX_8X8][LEFT_DC_PRED] = ff_dc_left_8x8_rvv;
+        dsp->intra_pred[TX_8X8][DC_127_PRED] = ff_dc_127_8x8_rvv;
+        dsp->intra_pred[TX_8X8][DC_128_PRED] = ff_dc_128_8x8_rvv;
+        dsp->intra_pred[TX_8X8][DC_129_PRED] = ff_dc_129_8x8_rvv;
+        dsp->intra_pred[TX_8X8][TOP_DC_PRED] = ff_dc_top_8x8_rvv;
+    }
+
+    if (bpp == 8 && flags & AV_CPU_FLAG_RVV_I32 && ff_rv_vlen_least(128)) {
+        dsp->intra_pred[TX_32X32][DC_PRED] = ff_dc_32x32_rvv;
+        dsp->intra_pred[TX_16X16][DC_PRED] = ff_dc_16x16_rvv;
+        dsp->intra_pred[TX_32X32][LEFT_DC_PRED] = ff_dc_left_32x32_rvv;
+        dsp->intra_pred[TX_16X16][LEFT_DC_PRED] = ff_dc_left_16x16_rvv;
+        dsp->intra_pred[TX_32X32][DC_127_PRED] = ff_dc_127_32x32_rvv;
+        dsp->intra_pred[TX_16X16][DC_127_PRED] = ff_dc_127_16x16_rvv;
+        dsp->intra_pred[TX_32X32][DC_128_PRED] = ff_dc_128_32x32_rvv;
+        dsp->intra_pred[TX_16X16][DC_128_PRED] = ff_dc_128_16x16_rvv;
+        dsp->intra_pred[TX_32X32][DC_129_PRED] = ff_dc_129_32x32_rvv;
+        dsp->intra_pred[TX_16X16][DC_129_PRED] = ff_dc_129_16x16_rvv;
+        dsp->intra_pred[TX_32X32][TOP_DC_PRED] = ff_dc_top_32x32_rvv;
+        dsp->intra_pred[TX_16X16][TOP_DC_PRED] = ff_dc_top_16x16_rvv;
+        dsp->intra_pred[TX_32X32][HOR_PRED] = ff_h_32x32_rvv;
+        dsp->intra_pred[TX_16X16][HOR_PRED] = ff_h_16x16_rvv;
+        dsp->intra_pred[TX_8X8][HOR_PRED] = ff_h_8x8_rvv;
+        dsp->intra_pred[TX_32X32][TM_VP8_PRED] = ff_tm_32x32_rvv;
+        dsp->intra_pred[TX_16X16][TM_VP8_PRED] = ff_tm_16x16_rvv;
+        dsp->intra_pred[TX_8X8][TM_VP8_PRED] = ff_tm_8x8_rvv;
+        dsp->intra_pred[TX_4X4][TM_VP8_PRED] = ff_tm_4x4_rvv;
+    }
+#endif
+#endif
+}
+
+av_cold void ff_vp9dsp_init_riscv(VP9DSPContext *dsp, int bpp, int bitexact)
+{
+    vp9dsp_intrapred_init_riscv(dsp, bpp);
+    vp9dsp_mc_init_riscv(dsp, bpp);
+}
diff --git a/libavcodec/rkmppdec.c b/libavcodec/rkmppdec.c
index 7665098c6a..47b076dbd8 100644
--- a/libavcodec/rkmppdec.c
+++ b/libavcodec/rkmppdec.c
@@ -30,6 +30,7 @@
 #include "codec_internal.h"
 #include "decode.h"
 #include "hwconfig.h"
+#include "refstruct.h"
 #include "libavutil/buffer.h"
 #include "libavutil/common.h"
 #include "libavutil/frame.h"
@@ -57,12 +58,12 @@ typedef struct {
 
 typedef struct {
     AVClass *av_class;
-    AVBufferRef *decoder_ref;
+    RKMPPDecoder *decoder;           ///< RefStruct reference
 } RKMPPDecodeContext;
 
 typedef struct {
     MppFrame frame;
-    AVBufferRef *decoder_ref;
+    const RKMPPDecoder *decoder_ref; ///< RefStruct reference
 } RKMPPFrameContext;
 
 static MppCodingType rkmpp_get_codingtype(AVCodecContext *avctx)
@@ -90,7 +91,7 @@ static uint32_t rkmpp_get_frameformat(MppFrameFormat mppformat)
 static int rkmpp_write_data(AVCodecContext *avctx, uint8_t *buffer, int size, int64_t pts)
 {
     RKMPPDecodeContext *rk_context = avctx->priv_data;
-    RKMPPDecoder *decoder = (RKMPPDecoder *)rk_context->decoder_ref->data;
+    RKMPPDecoder *decoder = rk_context->decoder;
     int ret;
     MppPacket packet;
 
@@ -125,13 +126,13 @@ static int rkmpp_write_data(AVCodecContext *avctx, uint8_t *buffer, int size, in
 static int rkmpp_close_decoder(AVCodecContext *avctx)
 {
     RKMPPDecodeContext *rk_context = avctx->priv_data;
-    av_buffer_unref(&rk_context->decoder_ref);
+    ff_refstruct_unref(&rk_context->decoder);
     return 0;
 }
 
-static void rkmpp_release_decoder(void *opaque, uint8_t *data)
+static void rkmpp_release_decoder(FFRefStructOpaque unused, void *obj)
 {
-    RKMPPDecoder *decoder = (RKMPPDecoder *)data;
+    RKMPPDecoder *decoder = obj;
 
     if (decoder->mpi) {
         decoder->mpi->reset(decoder->ctx);
@@ -146,8 +147,6 @@ static void rkmpp_release_decoder(void *opaque, uint8_t *data)
 
     av_buffer_unref(&decoder->frames_ref);
     av_buffer_unref(&decoder->device_ref);
-
-    av_free(decoder);
 }
 
 static int rkmpp_init_decoder(AVCodecContext *avctx)
@@ -162,19 +161,13 @@ static int rkmpp_init_decoder(AVCodecContext *avctx)
     avctx->pix_fmt = AV_PIX_FMT_DRM_PRIME;
 
     // create a decoder and a ref to it
-    decoder = av_mallocz(sizeof(RKMPPDecoder));
+    decoder = ff_refstruct_alloc_ext(sizeof(*decoder), 0,
+                                     NULL, rkmpp_release_decoder);
     if (!decoder) {
         ret = AVERROR(ENOMEM);
         goto fail;
     }
-
-    rk_context->decoder_ref = av_buffer_create((uint8_t *)decoder, sizeof(*decoder), rkmpp_release_decoder,
-                                               NULL, AV_BUFFER_FLAG_READONLY);
-    if (!rk_context->decoder_ref) {
-        av_free(decoder);
-        ret = AVERROR(ENOMEM);
-        goto fail;
-    }
+    rk_context->decoder = decoder;
 
     av_log(avctx, AV_LOG_DEBUG, "Initializing RKMPP decoder.\n");
 
@@ -270,7 +263,7 @@ fail:
 static int rkmpp_send_packet(AVCodecContext *avctx, const AVPacket *avpkt)
 {
     RKMPPDecodeContext *rk_context = avctx->priv_data;
-    RKMPPDecoder *decoder = (RKMPPDecoder *)rk_context->decoder_ref->data;
+    RKMPPDecoder *decoder = rk_context->decoder;
     int ret;
 
     // handle EOF
@@ -308,12 +301,10 @@ static int rkmpp_send_packet(AVCodecContext *avctx, const AVPacket *avpkt)
 static void rkmpp_release_frame(void *opaque, uint8_t *data)
 {
     AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)data;
-    AVBufferRef *framecontextref = (AVBufferRef *)opaque;
-    RKMPPFrameContext *framecontext = (RKMPPFrameContext *)framecontextref->data;
+    RKMPPFrameContext *framecontext = opaque;
 
     mpp_frame_deinit(&framecontext->frame);
-    av_buffer_unref(&framecontext->decoder_ref);
-    av_buffer_unref(&framecontextref);
+    ff_refstruct_unref(&framecontext->decoder_ref);
 
     av_free(desc);
 }
@@ -321,13 +312,10 @@ static void rkmpp_release_frame(void *opaque, uint8_t *data)
 static int rkmpp_retrieve_frame(AVCodecContext *avctx, AVFrame *frame)
 {
     RKMPPDecodeContext *rk_context = avctx->priv_data;
-    RKMPPDecoder *decoder = (RKMPPDecoder *)rk_context->decoder_ref->data;
-    RKMPPFrameContext *framecontext = NULL;
-    AVBufferRef *framecontextref = NULL;
+    RKMPPDecoder *decoder = rk_context->decoder;
     int ret;
     MppFrame mppframe = NULL;
     MppBuffer buffer = NULL;
-    AVDRMFrameDescriptor *desc = NULL;
     AVDRMLayerDescriptor *layer = NULL;
     int mode;
     MppFrameFormat mppformat;
@@ -416,11 +404,21 @@ static int rkmpp_retrieve_frame(AVCodecContext *avctx, AVFrame *frame)
         // now setup the frame buffer info
         buffer = mpp_frame_get_buffer(mppframe);
         if (buffer) {
-            desc = av_mallocz(sizeof(AVDRMFrameDescriptor));
-            if (!desc) {
+            RKMPPFrameContext *framecontext;
+            AVDRMFrameDescriptor *desc;
+            // We allocate the descriptor in buf[0] jointly with a structure
+            // that will allow to hold additional information
+            // for properly releasing MPP frames and decoder.
+            struct {
+                AVDRMFrameDescriptor desc;
+                RKMPPFrameContext framecontext;
+            } *combined_desc = av_mallocz(sizeof(*combined_desc));
+            if (!combined_desc) {
                 ret = AVERROR(ENOMEM);
                 goto fail;
             }
+            desc         = &combined_desc->desc;
+            framecontext = &combined_desc->framecontext;
 
             desc->nb_objects = 1;
             desc->objects[0].fd = mpp_buffer_get_fd(buffer);
@@ -439,32 +437,24 @@ static int rkmpp_retrieve_frame(AVCodecContext *avctx, AVFrame *frame)
             layer->planes[1].offset = layer->planes[0].pitch * mpp_frame_get_ver_stride(mppframe);
             layer->planes[1].pitch = layer->planes[0].pitch;
 
-            // we also allocate a struct in buf[0] that will allow to hold additionnal information
-            // for releasing properly MPP frames and decoder
-            framecontextref = av_buffer_allocz(sizeof(*framecontext));
-            if (!framecontextref) {
-                ret = AVERROR(ENOMEM);
-                goto fail;
-            }
-
             // MPP decoder needs to be closed only when all frames have been released.
-            framecontext = (RKMPPFrameContext *)framecontextref->data;
-            framecontext->decoder_ref = av_buffer_ref(rk_context->decoder_ref);
             framecontext->frame = mppframe;
 
             frame->data[0]  = (uint8_t *)desc;
             frame->buf[0]   = av_buffer_create((uint8_t *)desc, sizeof(*desc), rkmpp_release_frame,
-                                               framecontextref, AV_BUFFER_FLAG_READONLY);
+                                               framecontext, AV_BUFFER_FLAG_READONLY);
 
             if (!frame->buf[0]) {
+                av_free(combined_desc);
                 ret = AVERROR(ENOMEM);
                 goto fail;
             }
+            framecontext->decoder_ref = ff_refstruct_ref(rk_context->decoder);
 
             frame->hw_frames_ctx = av_buffer_ref(decoder->frames_ref);
             if (!frame->hw_frames_ctx) {
-                ret = AVERROR(ENOMEM);
-                goto fail;
+                av_frame_unref(frame);
+                return AVERROR(ENOMEM);
             }
 
             return 0;
@@ -484,22 +474,13 @@ fail:
     if (mppframe)
         mpp_frame_deinit(&mppframe);
 
-    if (framecontext)
-        av_buffer_unref(&framecontext->decoder_ref);
-
-    if (framecontextref)
-        av_buffer_unref(&framecontextref);
-
-    if (desc)
-        av_free(desc);
-
     return ret;
 }
 
 static int rkmpp_receive_frame(AVCodecContext *avctx, AVFrame *frame)
 {
     RKMPPDecodeContext *rk_context = avctx->priv_data;
-    RKMPPDecoder *decoder = (RKMPPDecoder *)rk_context->decoder_ref->data;
+    RKMPPDecoder *decoder = rk_context->decoder;
     int ret = MPP_NOK;
     AVPacket pkt = {0};
     RK_S32 usedslots, freeslots;
@@ -539,7 +520,7 @@ static int rkmpp_receive_frame(AVCodecContext *avctx, AVFrame *frame)
 static void rkmpp_flush(AVCodecContext *avctx)
 {
     RKMPPDecodeContext *rk_context = avctx->priv_data;
-    RKMPPDecoder *decoder = (RKMPPDecoder *)rk_context->decoder_ref->data;
+    RKMPPDecoder *decoder = rk_context->decoder;
     int ret = MPP_NOK;
 
     av_log(avctx, AV_LOG_DEBUG, "Flush.\n");
diff --git a/libavcodec/rtv1.c b/libavcodec/rtv1.c
index 807c8a3466..c40f9ada56 100644
--- a/libavcodec/rtv1.c
+++ b/libavcodec/rtv1.c
@@ -132,9 +132,6 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p,
     if (ret < 0)
         return ret;
 
-    p->pict_type = AV_PICTURE_TYPE_I;
-    p->flags |= AV_FRAME_FLAG_KEY;
-
     *got_frame = 1;
 
     return avpkt->size;
diff --git a/libavcodec/rv10.c b/libavcodec/rv10.c
index d32faa628b..df487b24a9 100644
--- a/libavcodec/rv10.c
+++ b/libavcodec/rv10.c
@@ -38,7 +38,6 @@
 #include "h263data.h"
 #include "h263dec.h"
 #include "mpeg_er.h"
-#include "mpegutils.h"
 #include "mpegvideo.h"
 #include "mpegvideodec.h"
 #include "mpeg4video.h"
diff --git a/libavcodec/rv30.c b/libavcodec/rv30.c
index 9e13e71805..316962fbbb 100644
--- a/libavcodec/rv30.c
+++ b/libavcodec/rv30.c
@@ -304,5 +304,4 @@ const FFCodec ff_rv30_decoder = {
                              AV_CODEC_CAP_FRAME_THREADS,
     .flush                 = ff_mpeg_flush,
     UPDATE_THREAD_CONTEXT(ff_rv34_decode_update_thread_context),
-    .caps_internal         = FF_CODEC_CAP_ALLOCATE_PROGRESS,
 };
diff --git a/libavcodec/rv34dsp.h b/libavcodec/rv34dsp.h
index b15424d4ae..d59b3c2732 100644
--- a/libavcodec/rv34dsp.h
+++ b/libavcodec/rv34dsp.h
@@ -83,6 +83,7 @@ void ff_rv34dsp_init_riscv(RV34DSPContext *c);
 void ff_rv34dsp_init_x86(RV34DSPContext *c);
 
 void ff_rv40dsp_init_aarch64(RV34DSPContext *c);
+void ff_rv40dsp_init_riscv(RV34DSPContext *c);
 void ff_rv40dsp_init_x86(RV34DSPContext *c);
 void ff_rv40dsp_init_arm(RV34DSPContext *c);
 
diff --git a/libavcodec/rv40.c b/libavcodec/rv40.c
index e48aa1f684..19d4e742df 100644
--- a/libavcodec/rv40.c
+++ b/libavcodec/rv40.c
@@ -582,5 +582,4 @@ const FFCodec ff_rv40_decoder = {
                              AV_CODEC_CAP_FRAME_THREADS,
     .flush                 = ff_mpeg_flush,
     UPDATE_THREAD_CONTEXT(ff_rv34_decode_update_thread_context),
-    .caps_internal         = FF_CODEC_CAP_ALLOCATE_PROGRESS,
 };
diff --git a/libavcodec/rv40dsp.c b/libavcodec/rv40dsp.c
index f0208b16ea..970faec5de 100644
--- a/libavcodec/rv40dsp.c
+++ b/libavcodec/rv40dsp.c
@@ -709,6 +709,8 @@ av_cold void ff_rv40dsp_init(RV34DSPContext *c)
     ff_rv40dsp_init_aarch64(c);
 #elif ARCH_ARM
     ff_rv40dsp_init_arm(c);
+#elif ARCH_RISCV
+    ff_rv40dsp_init_riscv(c);
 #elif ARCH_X86
     ff_rv40dsp_init_x86(c);
 #endif
diff --git a/libavcodec/sbrdsp.h b/libavcodec/sbrdsp.h
index 49782202a7..09b2cbfc10 100644
--- a/libavcodec/sbrdsp.h
+++ b/libavcodec/sbrdsp.h
@@ -50,6 +50,5 @@ void ff_sbrdsp_init_arm(SBRDSPContext *s);
 void ff_sbrdsp_init_aarch64(SBRDSPContext *s);
 void ff_sbrdsp_init_riscv(SBRDSPContext *s);
 void ff_sbrdsp_init_x86(SBRDSPContext *s);
-void ff_sbrdsp_init_mips(SBRDSPContext *s);
 
 #endif /* AVCODEC_SBRDSP_H */
diff --git a/libavcodec/sbrdsp_template.c b/libavcodec/sbrdsp_template.c
index c1e583ea56..80d535cbfa 100644
--- a/libavcodec/sbrdsp_template.c
+++ b/libavcodec/sbrdsp_template.c
@@ -104,8 +104,6 @@ av_cold void AAC_RENAME(ff_sbrdsp_init)(SBRDSPContext *s)
     ff_sbrdsp_init_riscv(s);
 #elif ARCH_X86
     ff_sbrdsp_init_x86(s);
-#elif ARCH_MIPS
-    ff_sbrdsp_init_mips(s);
 #endif
 #endif /* !USE_FIXED */
 }
diff --git a/libavcodec/sgidec.c b/libavcodec/sgidec.c
index 04a347c51e..40186dcdb7 100644
--- a/libavcodec/sgidec.c
+++ b/libavcodec/sgidec.c
@@ -248,8 +248,6 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p,
         MAP(2, 1);
         break;
     }
-    p->pict_type = AV_PICTURE_TYPE_I;
-    p->flags |= AV_FRAME_FLAG_KEY;
 
     /* Skip header. */
     bytestream2_seek(&g, SGI_HEADER_SIZE, SEEK_SET);
diff --git a/libavcodec/sgirledec.c b/libavcodec/sgirledec.c
index 18bf8081fc..3ce72eccb8 100644
--- a/libavcodec/sgirledec.c
+++ b/libavcodec/sgirledec.c
@@ -123,9 +123,6 @@ static int sgirle_decode_frame(AVCodecContext *avctx, AVFrame *frame,
     if (ret < 0)
         return ret;
 
-    frame->pict_type = AV_PICTURE_TYPE_I;
-    frame->flags |= AV_FRAME_FLAG_KEY;
-
     *got_frame = 1;
 
     return avpkt->size;
diff --git a/libavcodec/sheervideo.c b/libavcodec/sheervideo.c
index 660e2661a4..ef20633e1f 100644
--- a/libavcodec/sheervideo.c
+++ b/libavcodec/sheervideo.c
@@ -1972,9 +1972,6 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p,
         return AVERROR_INVALIDDATA;
     }
 
-    p->pict_type = AV_PICTURE_TYPE_I;
-    p->flags |= AV_FRAME_FLAG_KEY;
-
     if ((ret = ff_thread_get_buffer(avctx, p, 0)) < 0)
         return ret;
 
diff --git a/libavcodec/speedhqdec.c b/libavcodec/speedhqdec.c
index d3605b0649..829a91261e 100644
--- a/libavcodec/speedhqdec.c
+++ b/libavcodec/speedhqdec.c
@@ -40,6 +40,7 @@
 #include "mpeg12data.h"
 #include "mpeg12vlc.h"
 #include "speedhq.h"
+#include "thread.h"
 
 #define MAX_INDEX (64 - 1)
 
@@ -423,6 +424,9 @@ static int speedhq_decode_frame(AVCodecContext *avctx, AVFrame *frame,
         return AVERROR_INVALIDDATA;
     }
 
+    if (avctx->skip_frame >= AVDISCARD_ALL)
+        return avpkt->size;
+
     compute_quant_matrix(s->quant_matrix, 100 - quality);
 
     second_field_offset = AV_RL24(buf + 1);
@@ -433,10 +437,9 @@ static int speedhq_decode_frame(AVCodecContext *avctx, AVFrame *frame,
     avctx->coded_width = FFALIGN(avctx->width, 16);
     avctx->coded_height = FFALIGN(avctx->height, 16);
 
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
+    if ((ret = ff_thread_get_buffer(avctx, frame, 0)) < 0) {
         return ret;
     }
-    frame->flags |= AV_FRAME_FLAG_KEY;
 
     if (second_field_offset == 4 || second_field_offset == (buf_size-4)) {
         /*
@@ -649,5 +652,5 @@ const FFCodec ff_speedhq_decoder = {
     .priv_data_size = sizeof(SHQContext),
     .init           = speedhq_decode_init,
     FF_CODEC_DECODE_CB(speedhq_decode_frame),
-    .p.capabilities = AV_CODEC_CAP_DR1,
+    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
 };
diff --git a/libavcodec/targa_y216dec.c b/libavcodec/targa_y216dec.c
index 2874a51aae..08f85391d8 100644
--- a/libavcodec/targa_y216dec.c
+++ b/libavcodec/targa_y216dec.c
@@ -47,9 +47,6 @@ static int y216_decode_frame(AVCodecContext *avctx, AVFrame *pic,
     if ((ret = ff_get_buffer(avctx, pic, 0)) < 0)
         return ret;
 
-    pic->flags |= AV_FRAME_FLAG_KEY;
-    pic->pict_type = AV_PICTURE_TYPE_I;
-
     y = (uint16_t *)pic->data[0];
     u = (uint16_t *)pic->data[1];
     v = (uint16_t *)pic->data[2];
diff --git a/libavcodec/tests/aarch64/dct.c b/libavcodec/tests/aarch64/dct.c
index 9e477328d5..e98a887cd5 100644
--- a/libavcodec/tests/aarch64/dct.c
+++ b/libavcodec/tests/aarch64/dct.c
@@ -19,9 +19,11 @@
 #include "config.h"
 
 #include "libavutil/cpu.h"
+#include "libavcodec/aarch64/fdct.h"
 #include "libavcodec/aarch64/idct.h"
 
 static const struct algo fdct_tab_arch[] = {
+    { "neon", ff_fdct_neon, FF_IDCT_PERM_NONE, AV_CPU_FLAG_NEON },
     { 0 }
 };
 
diff --git a/libavcodec/tests/avcodec.c b/libavcodec/tests/avcodec.c
index 08ca507bf0..cd949f6385 100644
--- a/libavcodec/tests/avcodec.c
+++ b/libavcodec/tests/avcodec.c
@@ -141,7 +141,7 @@ int main(void){
                     ret = 1;
                 }
             }
-            if (codec2->caps_internal & (FF_CODEC_CAP_ALLOCATE_PROGRESS |
+            if (codec2->caps_internal & (FF_CODEC_CAP_USES_PROGRESSFRAMES |
                                         FF_CODEC_CAP_SETS_PKT_DTS |
                                         FF_CODEC_CAP_SKIP_FRAME_FILL_PARAM |
                                         FF_CODEC_CAP_EXPORTS_CROPPING |
@@ -171,10 +171,6 @@ int main(void){
                                        AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE |
                                        AV_CODEC_CAP_ENCODER_FLUSH))
                 ERR("Decoder %s has encoder-only capabilities\n");
-            if (codec2->caps_internal & FF_CODEC_CAP_ALLOCATE_PROGRESS &&
-                !(codec->capabilities & AV_CODEC_CAP_FRAME_THREADS))
-                ERR("Decoder %s wants allocated progress without supporting"
-                    "frame threads\n");
             if (codec2->cb_type != FF_CODEC_CB_TYPE_DECODE &&
                 codec2->caps_internal & FF_CODEC_CAP_SETS_PKT_DTS)
                 ERR("Decoder %s is marked as setting pkt_dts when it doesn't have"
diff --git a/libavcodec/tests/x86/dct.c b/libavcodec/tests/x86/dct.c
index ef0662ae37..7800abc7f7 100644
--- a/libavcodec/tests/x86/dct.c
+++ b/libavcodec/tests/x86/dct.c
@@ -26,7 +26,7 @@
 #include "libavcodec/x86/xvididct.h"
 #include "libavcodec/x86/simple_idct.h"
 
-#if (CONFIG_PRORES_DECODER || CONFIG_PRORES_LGPL_DECODER) && ARCH_X86_64 && HAVE_X86ASM
+#if CONFIG_PRORES_DECODER && ARCH_X86_64 && HAVE_X86ASM
 void ff_prores_idct_put_10_sse2(uint16_t *dst, int linesize,
                                 int16_t *block, int16_t *qmat);
 
@@ -70,7 +70,7 @@ static const struct algo idct_tab_arch[] = {
     { "XVID-SSE2",   ff_xvid_idct_sse2,   FF_IDCT_PERM_SSE2,   AV_CPU_FLAG_SSE2,   1 },
 #endif
 #endif /* CONFIG_MPEG4_DECODER && HAVE_X86ASM */
-#if (CONFIG_PRORES_DECODER || CONFIG_PRORES_LGPL_DECODER) && ARCH_X86_64 && HAVE_X86ASM
+#if CONFIG_PRORES_DECODER && ARCH_X86_64 && HAVE_X86ASM
     { "PR-SSE2",     ff_prores_idct_put_10_sse2_wrap, FF_IDCT_PERM_TRANSPOSE, AV_CPU_FLAG_SSE2, 1 },
 # if HAVE_AVX_EXTERNAL
     { "PR-AVX",      ff_prores_idct_put_10_avx_wrap, FF_IDCT_PERM_TRANSPOSE, AV_CPU_FLAG_AVX, 1 },
diff --git a/libavcodec/thread.h b/libavcodec/thread.h
index 4272fd87d4..5ab12848b4 100644
--- a/libavcodec/thread.h
+++ b/libavcodec/thread.h
@@ -20,7 +20,7 @@
 
 /**
  * @file
- * Multithreading API for decoders
+ * Multithreading support functions
  * @author Alexander Strange <astrange@ithinksw.com>
  */
 
@@ -31,6 +31,27 @@
 
 #include "avcodec.h"
 
+/**
+ * Wait for decoding threads to finish and reset internal state.
+ * Called by avcodec_flush_buffers().
+ *
+ * @param avctx The context.
+ */
+void ff_thread_flush(AVCodecContext *avctx);
+
+/**
+ * Submit a new frame to a decoding thread.
+ * Returns the next available frame in picture. *got_picture_ptr
+ * will be 0 if none is available.
+ * The return value on success is the size of the consumed packet for
+ * compatibility with FFCodec.decode. This means the decoder
+ * has to consume the full packet.
+ *
+ * Parameters are the same as FFCodec.decode.
+ */
+int ff_thread_decode_frame(AVCodecContext *avctx, AVFrame *picture,
+                           int *got_picture_ptr, AVPacket *avpkt);
+
 int ff_thread_can_start_frame(AVCodecContext *avctx);
 
 /**
@@ -53,12 +74,44 @@ void ff_thread_finish_setup(AVCodecContext *avctx);
  */
 int ff_thread_get_buffer(AVCodecContext *avctx, AVFrame *f, int flags);
 
+int ff_thread_init(AVCodecContext *s);
 int ff_slice_thread_execute_with_mainfunc(AVCodecContext *avctx,
         int (*action_func2)(AVCodecContext *c, void *arg, int jobnr, int threadnr),
         int (*main_func)(AVCodecContext *c), void *arg, int *ret, int job_count);
+void ff_thread_free(AVCodecContext *s);
 int ff_slice_thread_allocz_entries(AVCodecContext *avctx, int count);
 int ff_slice_thread_init_progress(AVCodecContext *avctx);
 void ff_thread_report_progress2(AVCodecContext *avctx, int field, int thread, int n);
 void ff_thread_await_progress2(AVCodecContext *avctx,  int field, int thread, int shift);
 
+enum ThreadingStatus {
+    FF_THREAD_IS_COPY,
+    FF_THREAD_IS_FIRST_THREAD,
+    FF_THREAD_NO_FRAME_THREADING,
+};
+
+/**
+ * Allows to synchronize objects whose lifetime is the whole decoding
+ * process among all frame threads.
+ *
+ * When called from a non-copy thread, do nothing.
+ * When called from another thread, place a new RefStruct reference
+ * at the given offset in the calling thread's private data from
+ * the RefStruct reference in the private data of the first decoding thread.
+ * The first thread must have a valid RefStruct reference at the given
+ * offset in its private data; the calling thread must not have
+ * a reference at this offset in its private data (must be NULL).
+ *
+ * @param avctx  an AVCodecContext
+ * @param offset offset of the RefStruct reference in avctx's private data
+ *
+ * @retval FF_THREAD_IS_COPY if frame-threading is in use and the
+ *         calling thread is a copy; in this case, the RefStruct reference
+ *         will be set.
+ * @retval FF_THREAD_IS_MAIN_THREAD if frame-threading is in use
+ *         and the calling thread is the main thread.
+ * @retval FF_THREAD_NO_FRAME_THREADING if frame-threading is not in use.
+ */
+enum ThreadingStatus ff_thread_sync_ref(AVCodecContext *avctx, size_t offset);
+
 #endif /* AVCODEC_THREAD_H */
diff --git a/libavcodec/threadprogress.c b/libavcodec/threadprogress.c
new file mode 100644
index 0000000000..62c4fd898b
--- /dev/null
+++ b/libavcodec/threadprogress.c
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2022 Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <limits.h>
+#include <stdatomic.h>
+
+#include "pthread_internal.h"
+#include "threadprogress.h"
+#include "libavutil/attributes.h"
+#include "libavutil/thread.h"
+
+DEFINE_OFFSET_ARRAY(ThreadProgress, thread_progress, init,
+                    (offsetof(ThreadProgress, progress_mutex)),
+                    (offsetof(ThreadProgress, progress_cond)));
+
+av_cold int ff_thread_progress_init(ThreadProgress *pro, int init_mode)
+{
+    atomic_init(&pro->progress, init_mode ? -1 : INT_MAX);
+#if HAVE_THREADS
+    if (init_mode)
+        return ff_pthread_init(pro, thread_progress_offsets);
+#endif
+    pro->init = init_mode;
+    return 0;
+}
+
+av_cold void ff_thread_progress_destroy(ThreadProgress *pro)
+{
+#if HAVE_THREADS
+    ff_pthread_free(pro, thread_progress_offsets);
+#else
+    pro->init = 0;
+#endif
+}
+
+void ff_thread_progress_report(ThreadProgress *pro, int n)
+{
+    if (atomic_load_explicit(&pro->progress, memory_order_relaxed) >= n)
+        return;
+
+    atomic_store_explicit(&pro->progress, n, memory_order_release);
+
+    ff_mutex_lock(&pro->progress_mutex);
+    ff_cond_broadcast(&pro->progress_cond);
+    ff_mutex_unlock(&pro->progress_mutex);
+}
+
+void ff_thread_progress_await(const ThreadProgress *pro_c, int n)
+{
+    /* Casting const away here is safe, because we only read from progress
+     * and will leave pro_c in the same state upon leaving the function
+     * as it had at the beginning. */
+    ThreadProgress *pro = (ThreadProgress*)pro_c;
+
+    if (atomic_load_explicit(&pro->progress, memory_order_acquire) >= n)
+        return;
+
+    ff_mutex_lock(&pro->progress_mutex);
+    while (atomic_load_explicit(&pro->progress, memory_order_relaxed) < n)
+        ff_cond_wait(&pro->progress_cond, &pro->progress_mutex);
+    ff_mutex_unlock(&pro->progress_mutex);
+}
diff --git a/libavcodec/threadprogress.h b/libavcodec/threadprogress.h
new file mode 100644
index 0000000000..cc3414c2ce
--- /dev/null
+++ b/libavcodec/threadprogress.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2022 Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_THREADPROGRESS_H
+#define AVCODEC_THREADPROGRESS_H
+
+/**
+ * ThreadProgress is an API to easily notify other threads about progress
+ * of any kind as long as it can be packaged into an int and is consistent
+ * with the natural ordering of integers.
+ *
+ * Each initialized ThreadProgress can be in one of two modes: No-op mode
+ * or ordinary mode. In the former mode, ff_thread_report_progress() and
+ * ff_thread_await_progress() are no-ops to simply support usecases like
+ * non-frame-threading. Only in the latter case perform these functions
+ * what their name already implies.
+ */
+
+#include <limits.h>
+#include <stdatomic.h>
+#include "libavutil/thread.h"
+
+/**
+ * This struct should be treated as opaque by users.
+ */
+typedef struct ThreadProgress {
+    atomic_int progress;
+    unsigned   init;
+    AVMutex progress_mutex;
+    AVCond  progress_cond;
+} ThreadProgress;
+
+/**
+ * Initialize a ThreadProgress.
+ *
+ * @param init_mode If zero, the ThreadProgress will be initialized
+ *                  to be in no-op mode as described above. Otherwise
+ *                  it is initialized to be in ordinary mode.
+ */
+int ff_thread_progress_init(ThreadProgress *pro, int init_mode);
+
+/**
+ * Destroy a ThreadProgress. Can be called on a ThreadProgress that
+ * has never been initialized provided that the ThreadProgress struct
+ * has been initially zeroed. Must be called even if ff_thread_progress_init()
+ * failed.
+ */
+void ff_thread_progress_destroy(ThreadProgress *pro);
+
+/**
+ * Reset the ::ThreadProgress.progress counter; must only be called
+ * if the ThreadProgress is not in use in any way (e.g. no thread
+ * may wait on it via ff_thread_progress_await()).
+ */
+static inline void ff_thread_progress_reset(ThreadProgress *pro)
+{
+    atomic_init(&pro->progress, pro->init ? -1 : INT_MAX);
+}
+
+/**
+ * This function is a no-op in no-op mode; otherwise it notifies
+ * other threads that a certain level of progress has been reached.
+ * Later calls with lower values of progress have no effect.
+ */
+void ff_thread_progress_report(ThreadProgress *pro, int progress);
+
+/**
+ * This function is a no-op in no-op mode; otherwise it waits
+ * until other threads have reached a certain level of progress:
+ * This function will return after another thread has called
+ * ff_thread_progress_report() with the same or higher value for progress.
+ */
+void ff_thread_progress_await(const ThreadProgress *pro, int progress);
+
+#endif /* AVCODEC_THREADPROGRESS_H */
diff --git a/libavcodec/tiff.c b/libavcodec/tiff.c
index 19301d9e49..05ab0e70b0 100644
--- a/libavcodec/tiff.c
+++ b/libavcodec/tiff.c
@@ -2267,8 +2267,10 @@ again:
             group_size = s->width * channels;
 
             tmpbuf = av_malloc(ssize);
-            if (!tmpbuf)
+            if (!tmpbuf) {
+                av_free(five_planes);
                 return AVERROR(ENOMEM);
+            }
 
             if (s->avctx->pix_fmt == AV_PIX_FMT_RGBF32LE ||
                 s->avctx->pix_fmt == AV_PIX_FMT_RGBAF32LE) {
@@ -2381,7 +2383,6 @@ again:
         }
     }
 
-    p->flags |= AV_FRAME_FLAG_KEY;
     *got_frame = 1;
 
     return avpkt->size;
diff --git a/libavcodec/tmv.c b/libavcodec/tmv.c
index 2a7e1a105f..2ff1424bd8 100644
--- a/libavcodec/tmv.c
+++ b/libavcodec/tmv.c
@@ -56,8 +56,6 @@ static int tmv_decode_frame(AVCodecContext *avctx, AVFrame *frame,
         return AVERROR_INVALIDDATA;
     }
 
-    frame->pict_type = AV_PICTURE_TYPE_I;
-    frame->flags |= AV_FRAME_FLAG_KEY;
     dst              = frame->data[0];
 
 #if FF_API_PALETTE_HAS_CHANGED
diff --git a/libavcodec/truemotion2rt.c b/libavcodec/truemotion2rt.c
index 4f8590fc82..438f96f6b4 100644
--- a/libavcodec/truemotion2rt.c
+++ b/libavcodec/truemotion2rt.c
@@ -201,8 +201,6 @@ static int truemotion2rt_decode_frame(AVCodecContext *avctx, AVFrame *p,
         dst += p->linesize[2];
     }
 
-    p->pict_type = AV_PICTURE_TYPE_I;
-    p->flags |= AV_FRAME_FLAG_KEY;
     *got_frame = 1;
 
     return avpkt->size;
diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index b17fc3c7e2..337c00e789 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -806,6 +806,14 @@ int av_get_audio_frame_duration2(AVCodecParameters *par, int frame_bytes)
     return FFMAX(0, duration);
 }
 
+#if !HAVE_THREADS
+int ff_thread_init(AVCodecContext *s)
+{
+    return -1;
+}
+
+#endif
+
 unsigned int av_xiphlacing(unsigned char *s, unsigned int v)
 {
     unsigned int n = 0;
diff --git a/libavcodec/utvideodec.c b/libavcodec/utvideodec.c
index 5377926fa6..4c0fa2ca67 100644
--- a/libavcodec/utvideodec.c
+++ b/libavcodec/utvideodec.c
@@ -890,8 +890,6 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *frame,
         break;
     }
 
-    frame->flags |= AV_FRAME_FLAG_KEY;
-    frame->pict_type = AV_PICTURE_TYPE_I;
     if (c->interlaced)
         frame->flags |= AV_FRAME_FLAG_INTERLACED;
 
diff --git a/libavcodec/v210dec.c b/libavcodec/v210dec.c
index 5cead150d3..8b370e5659 100644
--- a/libavcodec/v210dec.c
+++ b/libavcodec/v210dec.c
@@ -187,9 +187,6 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *pic,
     if ((ret = ff_thread_get_buffer(avctx, pic, 0)) < 0)
         return ret;
 
-    pic->pict_type = AV_PICTURE_TYPE_I;
-    pic->flags |= AV_FRAME_FLAG_KEY;
-
     if (stride) {
         td.stride = stride;
         td.buf = psrc;
diff --git a/libavcodec/v210x.c b/libavcodec/v210x.c
index 55630fa2fb..a4b43883ed 100644
--- a/libavcodec/v210x.c
+++ b/libavcodec/v210x.c
@@ -61,8 +61,6 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *pic,
     udst = (uint16_t *)pic->data[1];
     vdst = (uint16_t *)pic->data[2];
     yend = ydst + width;
-    pic->pict_type = AV_PICTURE_TYPE_I;
-    pic->flags |= AV_FRAME_FLAG_KEY;
 
     for (;;) {
         uint32_t v = av_be2ne32(*src++);
diff --git a/libavcodec/v308dec.c b/libavcodec/v308dec.c
index 4bc4ea4e21..b591a79dd2 100644
--- a/libavcodec/v308dec.c
+++ b/libavcodec/v308dec.c
@@ -48,9 +48,6 @@ static int v308_decode_frame(AVCodecContext *avctx, AVFrame *pic,
     if ((ret = ff_get_buffer(avctx, pic, 0)) < 0)
         return ret;
 
-    pic->flags |= AV_FRAME_FLAG_KEY;
-    pic->pict_type = AV_PICTURE_TYPE_I;
-
     y = pic->data[0];
     u = pic->data[1];
     v = pic->data[2];
diff --git a/libavcodec/v408dec.c b/libavcodec/v408dec.c
index 4d8bccd650..2433c6de14 100644
--- a/libavcodec/v408dec.c
+++ b/libavcodec/v408dec.c
@@ -19,8 +19,6 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include "config_components.h"
-
 #include "avcodec.h"
 #include "codec_internal.h"
 #include "decode.h"
@@ -47,9 +45,6 @@ static int v408_decode_frame(AVCodecContext *avctx, AVFrame *pic,
     if ((ret = ff_get_buffer(avctx, pic, 0)) < 0)
         return ret;
 
-    pic->flags |= AV_FRAME_FLAG_KEY;
-    pic->pict_type = AV_PICTURE_TYPE_I;
-
     y = pic->data[0];
     u = pic->data[1];
     v = pic->data[2];
@@ -74,7 +69,6 @@ static int v408_decode_frame(AVCodecContext *avctx, AVFrame *pic,
     return avpkt->size;
 }
 
-#if CONFIG_V408_DECODER
 const FFCodec ff_v408_decoder = {
     .p.name       = "v408",
     CODEC_LONG_NAME("Uncompressed packed QT 4:4:4:4"),
@@ -84,4 +78,3 @@ const FFCodec ff_v408_decoder = {
     FF_CODEC_DECODE_CB(v408_decode_frame),
     .p.capabilities = AV_CODEC_CAP_DR1,
 };
-#endif
diff --git a/libavcodec/v408enc.c b/libavcodec/v408enc.c
index c1bf0f6158..c173f650ef 100644
--- a/libavcodec/v408enc.c
+++ b/libavcodec/v408enc.c
@@ -20,9 +20,6 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include "config_components.h"
-
-#include "libavutil/intreadwrite.h"
 #include "avcodec.h"
 #include "codec_internal.h"
 #include "encode.h"
@@ -72,7 +69,6 @@ static int v408_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 
 static const enum AVPixelFormat pix_fmt[] = { AV_PIX_FMT_YUVA444P, AV_PIX_FMT_NONE };
 
-#if CONFIG_V408_ENCODER
 const FFCodec ff_v408_encoder = {
     .p.name       = "v408",
     CODEC_LONG_NAME("Uncompressed packed QT 4:4:4:4"),
@@ -83,4 +79,3 @@ const FFCodec ff_v408_encoder = {
     FF_CODEC_ENCODE_CB(v408_encode_frame),
     .p.pix_fmts   = pix_fmt,
 };
-#endif
diff --git a/libavcodec/v410dec.c b/libavcodec/v410dec.c
index 35e4a8ae03..04be830ad2 100644
--- a/libavcodec/v410dec.c
+++ b/libavcodec/v410dec.c
@@ -102,9 +102,6 @@ static int v410_decode_frame(AVCodecContext *avctx, AVFrame *pic,
     if ((ret = ff_thread_get_buffer(avctx, pic, 0)) < 0)
         return ret;
 
-    pic->flags |= AV_FRAME_FLAG_KEY;
-    pic->pict_type = AV_PICTURE_TYPE_I;
-
     td.buf = src;
     td.frame = pic;
     avctx->execute2(avctx, v410_decode_slice, &td, NULL, thread_count);
diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c
index 2277135699..23474ee143 100644
--- a/libavcodec/v4l2_buffers.c
+++ b/libavcodec/v4l2_buffers.c
@@ -29,6 +29,7 @@
 #include <poll.h>
 #include "libavcodec/avcodec.h"
 #include "libavutil/pixdesc.h"
+#include "refstruct.h"
 #include "v4l2_context.h"
 #include "v4l2_buffers.h"
 #include "v4l2_m2m.h"
@@ -229,7 +230,7 @@ static void v4l2_free_buffer(void *opaque, uint8_t *unused)
                 ff_v4l2_buffer_enqueue(avbuf);
         }
 
-        av_buffer_unref(&avbuf->context_ref);
+        ff_refstruct_unref(&avbuf->context_ref);
     }
 }
 
@@ -240,9 +241,7 @@ static int v4l2_buf_increase_ref(V4L2Buffer *in)
     if (in->context_ref)
         atomic_fetch_add(&in->context_refcount, 1);
     else {
-        in->context_ref = av_buffer_ref(s->self_ref);
-        if (!in->context_ref)
-            return AVERROR(ENOMEM);
+        in->context_ref = ff_refstruct_ref(s->self_ref);
 
         in->context_refcount = 1;
     }
diff --git a/libavcodec/v4l2_buffers.h b/libavcodec/v4l2_buffers.h
index 3d2ff1b9a5..e35b161309 100644
--- a/libavcodec/v4l2_buffers.h
+++ b/libavcodec/v4l2_buffers.h
@@ -28,7 +28,6 @@
 #include <stddef.h>
 #include <linux/videodev2.h>
 
-#include "libavutil/buffer.h"
 #include "libavutil/frame.h"
 #include "packet.h"
 
@@ -46,8 +45,9 @@ typedef struct V4L2Buffer {
     struct V4L2Context *context;
 
     /* This object is refcounted per-plane, so we need to keep track
-     * of how many context-refs we are holding. */
-    AVBufferRef *context_ref;
+     * of how many context-refs we are holding.
+     * This pointer is a RefStruct reference. */
+    const struct V4L2m2mContext *context_ref;
     atomic_uint context_refcount;
 
     /* keep track of the mmap address and mmap length */
diff --git a/libavcodec/v4l2_m2m.c b/libavcodec/v4l2_m2m.c
index ac086a7913..15415cfc4e 100644
--- a/libavcodec/v4l2_m2m.c
+++ b/libavcodec/v4l2_m2m.c
@@ -32,6 +32,7 @@
 #include "libavutil/pixdesc.h"
 #include "libavutil/imgutils.h"
 #include "libavutil/pixfmt.h"
+#include "refstruct.h"
 #include "v4l2_context.h"
 #include "v4l2_fmt.h"
 #include "v4l2_m2m.h"
@@ -247,9 +248,9 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mContext *s)
     return 0;
 }
 
-static void v4l2_m2m_destroy_context(void *opaque, uint8_t *context)
+static void v4l2_m2m_destroy_context(FFRefStructOpaque unused, void *context)
 {
-    V4L2m2mContext *s = (V4L2m2mContext*)context;
+    V4L2m2mContext *s = context;
 
     ff_v4l2_context_release(&s->capture);
     sem_destroy(&s->refsync);
@@ -258,8 +259,6 @@ static void v4l2_m2m_destroy_context(void *opaque, uint8_t *context)
         close(s->fd);
     av_frame_free(&s->frame);
     av_packet_unref(&s->buf_pkt);
-
-    av_free(s);
 }
 
 int ff_v4l2_m2m_codec_end(V4L2m2mPriv *priv)
@@ -283,7 +282,7 @@ int ff_v4l2_m2m_codec_end(V4L2m2mPriv *priv)
     ff_v4l2_context_release(&s->output);
 
     s->self_ref = NULL;
-    av_buffer_unref(&priv->context_ref);
+    ff_refstruct_unref(&priv->context);
 
     return 0;
 }
@@ -328,17 +327,11 @@ int ff_v4l2_m2m_codec_init(V4L2m2mPriv *priv)
 
 int ff_v4l2_m2m_create_context(V4L2m2mPriv *priv, V4L2m2mContext **s)
 {
-    *s = av_mallocz(sizeof(V4L2m2mContext));
+    *s = ff_refstruct_alloc_ext(sizeof(**s), 0, NULL,
+                                &v4l2_m2m_destroy_context);
     if (!*s)
         return AVERROR(ENOMEM);
 
-    priv->context_ref = av_buffer_create((uint8_t *) *s, sizeof(V4L2m2mContext),
-                                         &v4l2_m2m_destroy_context, NULL, 0);
-    if (!priv->context_ref) {
-        av_freep(s);
-        return AVERROR(ENOMEM);
-    }
-
     /* assign the context */
     priv->context = *s;
     (*s)->priv = priv;
@@ -346,13 +339,13 @@ int ff_v4l2_m2m_create_context(V4L2m2mPriv *priv, V4L2m2mContext **s)
     /* populate it */
     priv->context->capture.num_buffers = priv->num_capture_buffers;
     priv->context->output.num_buffers  = priv->num_output_buffers;
-    priv->context->self_ref = priv->context_ref;
+    priv->context->self_ref = priv->context;
     priv->context->fd = -1;
 
     priv->context->frame = av_frame_alloc();
     if (!priv->context->frame) {
-        av_buffer_unref(&priv->context_ref);
-        *s = NULL; /* freed when unreferencing context_ref */
+        ff_refstruct_unref(&priv->context);
+        *s = NULL; /* freed when unreferencing context */
         return AVERROR(ENOMEM);
     }
 
diff --git a/libavcodec/v4l2_m2m.h b/libavcodec/v4l2_m2m.h
index 04d86d7b92..4ba33dc335 100644
--- a/libavcodec/v4l2_m2m.h
+++ b/libavcodec/v4l2_m2m.h
@@ -62,7 +62,7 @@ typedef struct V4L2m2mContext {
     AVFrame *frame;
 
     /* Reference to self; only valid while codec is active. */
-    AVBufferRef *self_ref;
+    struct V4L2m2mContext *self_ref;
 
     /* reference back to V4L2m2mPriv */
     void *priv;
@@ -71,8 +71,7 @@ typedef struct V4L2m2mContext {
 typedef struct V4L2m2mPriv {
     AVClass *class;
 
-    V4L2m2mContext *context;
-    AVBufferRef    *context_ref;
+    V4L2m2mContext *context;   ///< RefStruct reference
 
     int num_output_buffers;
     int num_capture_buffers;
diff --git a/libavcodec/vaapi_av1.c b/libavcodec/vaapi_av1.c
index 1f9a6071ba..f61bf63098 100644
--- a/libavcodec/vaapi_av1.c
+++ b/libavcodec/vaapi_av1.c
@@ -46,7 +46,7 @@ typedef struct VAAPIAV1DecContext {
 
 static VASurfaceID vaapi_av1_surface_id(AV1Frame *vf)
 {
-    if (vf)
+    if (vf->f)
         return ff_vaapi_get_surface_id(vf->f);
     else
         return VA_INVALID_SURFACE;
@@ -132,7 +132,7 @@ static int vaapi_av1_start_frame(AVCodecContext *avctx,
             goto fail;
         pic->output_surface = ff_vaapi_get_surface_id(ctx->tmp_frame);
     } else {
-        pic->output_surface = vaapi_av1_surface_id(&s->cur_frame);
+        pic->output_surface = ff_vaapi_get_surface_id(s->cur_frame.f);
     }
 
     memset(&pic_param, 0, sizeof(VADecPictureParameterBufferAV1));
@@ -142,7 +142,7 @@ static int vaapi_av1_start_frame(AVCodecContext *avctx,
         .bit_depth_idx              = bit_depth_idx,
         .matrix_coefficients        = seq->color_config.matrix_coefficients,
         .current_frame              = pic->output_surface,
-        .current_display_picture    = vaapi_av1_surface_id(&s->cur_frame),
+        .current_display_picture    = ff_vaapi_get_surface_id(s->cur_frame.f),
         .frame_width_minus1         = frame_header->frame_width_minus_1,
         .frame_height_minus1        = frame_header->frame_height_minus_1,
         .primary_ref_frame          = frame_header->primary_ref_frame,
@@ -220,7 +220,7 @@ static int vaapi_av1_start_frame(AVCodecContext *avctx,
             .error_resilient_mode         = frame_header->error_resilient_mode,
             .disable_cdf_update           = frame_header->disable_cdf_update,
             .allow_screen_content_tools   = frame_header->allow_screen_content_tools,
-            .force_integer_mv             = frame_header->force_integer_mv,
+            .force_integer_mv             = s->cur_frame.force_integer_mv,
             .allow_intrabc                = frame_header->allow_intrabc,
             .use_superres                 = frame_header->use_superres,
             .allow_high_precision_mv      = frame_header->allow_high_precision_mv,
diff --git a/libavcodec/vaapi_decode.c b/libavcodec/vaapi_decode.c
index 5665639dd7..7f2fe032db 100644
--- a/libavcodec/vaapi_decode.c
+++ b/libavcodec/vaapi_decode.c
@@ -156,6 +156,11 @@ int ff_vaapi_decode_issue(AVCodecContext *avctx,
     VAStatus vas;
     int err;
 
+    if (pic->nb_slices <= 0) {
+        err = AVERROR(EINVAL);
+        goto fail;
+    }
+
     av_log(avctx, AV_LOG_DEBUG, "Decode to surface %#x.\n",
            pic->output_surface);
 
@@ -599,22 +604,26 @@ static int vaapi_decode_make_config(AVCodecContext *avctx,
         if (err < 0)
             goto fail;
 
-        frames->initial_pool_size = 1;
-        // Add per-codec number of surfaces used for storing reference frames.
-        switch (avctx->codec_id) {
-        case AV_CODEC_ID_H264:
-        case AV_CODEC_ID_HEVC:
-        case AV_CODEC_ID_AV1:
-            frames->initial_pool_size += 16;
-            break;
-        case AV_CODEC_ID_VP9:
-            frames->initial_pool_size += 8;
-            break;
-        case AV_CODEC_ID_VP8:
-            frames->initial_pool_size += 3;
-            break;
-        default:
-            frames->initial_pool_size += 2;
+        if (CONFIG_VAAPI_1)
+            frames->initial_pool_size = 0;
+        else {
+            frames->initial_pool_size = 1;
+            // Add per-codec number of surfaces used for storing reference frames.
+            switch (avctx->codec_id) {
+            case AV_CODEC_ID_H264:
+            case AV_CODEC_ID_HEVC:
+            case AV_CODEC_ID_AV1:
+                frames->initial_pool_size += 16;
+                break;
+            case AV_CODEC_ID_VP9:
+                frames->initial_pool_size += 8;
+                break;
+            case AV_CODEC_ID_VP8:
+                frames->initial_pool_size += 3;
+                break;
+            default:
+                frames->initial_pool_size += 2;
+            }
         }
     }
 
diff --git a/libavcodec/vaapi_encode_av1.c b/libavcodec/vaapi_encode_av1.c
index 02a31b894d..b868f5b66a 100644
--- a/libavcodec/vaapi_encode_av1.c
+++ b/libavcodec/vaapi_encode_av1.c
@@ -23,6 +23,7 @@
 
 #include "libavutil/pixdesc.h"
 #include "libavutil/opt.h"
+#include "libavutil/mastering_display_metadata.h"
 
 #include "cbs_av1.h"
 #include "put_bits.h"
@@ -41,6 +42,8 @@ typedef struct VAAPIEncodeAV1Context {
     VAAPIEncodeContext common;
     AV1RawOBU sh; /**< sequence header.*/
     AV1RawOBU fh; /**< frame header.*/
+    AV1RawOBU mh[4]; /**< metadata header.*/
+    int nb_mh;
     CodedBitstreamContext *cbc;
     CodedBitstreamFragment current_obu;
     VAConfigAttribValEncAV1 attr;
@@ -659,6 +662,68 @@ static int vaapi_encode_av1_init_picture_params(AVCodecContext *avctx,
                                                2 : 1));
     }
 
+    priv->nb_mh = 0;
+
+    if (pic->type == PICTURE_TYPE_IDR) {
+        AVFrameSideData *sd =
+            av_frame_get_side_data(pic->input_image,
+                                   AV_FRAME_DATA_MASTERING_DISPLAY_METADATA);
+        if (sd) {
+            AVMasteringDisplayMetadata *mdm =
+                (AVMasteringDisplayMetadata *)sd->data;
+            if (mdm->has_primaries && mdm->has_luminance) {
+                AV1RawOBU              *obu = &priv->mh[priv->nb_mh++];
+                AV1RawMetadata          *md = &obu->obu.metadata;
+                AV1RawMetadataHDRMDCV *mdcv = &md->metadata.hdr_mdcv;
+                const int        chroma_den = 1 << 16;
+                const int      max_luma_den = 1 << 8;
+                const int      min_luma_den = 1 << 14;
+
+                memset(obu, 0, sizeof(*obu));
+                obu->header.obu_type = AV1_OBU_METADATA;
+                md->metadata_type = AV1_METADATA_TYPE_HDR_MDCV;
+
+                for (i = 0; i < 3; i++) {
+                    mdcv->primary_chromaticity_x[i] =
+                        av_rescale(mdm->display_primaries[i][0].num, chroma_den,
+                                   mdm->display_primaries[i][0].den);
+                    mdcv->primary_chromaticity_y[i] =
+                        av_rescale(mdm->display_primaries[i][1].num, chroma_den,
+                                   mdm->display_primaries[i][1].den);
+                }
+
+                mdcv->white_point_chromaticity_x =
+                    av_rescale(mdm->white_point[0].num, chroma_den,
+                               mdm->white_point[0].den);
+                mdcv->white_point_chromaticity_y =
+                    av_rescale(mdm->white_point[1].num, chroma_den,
+                               mdm->white_point[1].den);
+
+                mdcv->luminance_max =
+                    av_rescale(mdm->max_luminance.num, max_luma_den,
+                               mdm->max_luminance.den);
+                mdcv->luminance_min =
+                    av_rescale(mdm->min_luminance.num, min_luma_den,
+                               mdm->min_luminance.den);
+            }
+        }
+
+        sd = av_frame_get_side_data(pic->input_image,
+                                    AV_FRAME_DATA_CONTENT_LIGHT_LEVEL);
+        if (sd) {
+            AVContentLightMetadata *cllm = (AVContentLightMetadata *)sd->data;
+            AV1RawOBU               *obu = &priv->mh[priv->nb_mh++];
+            AV1RawMetadata           *md = &obu->obu.metadata;
+            AV1RawMetadataHDRCLL    *cll = &md->metadata.hdr_cll;
+
+            memset(obu, 0, sizeof(*obu));
+            obu->header.obu_type = AV1_OBU_METADATA;
+            md->metadata_type    = AV1_METADATA_TYPE_HDR_CLL;
+            cll->max_cll         = cllm->MaxCLL;
+            cll->max_fall        = cllm->MaxFALL;
+        }
+    }
+
 end:
     ff_cbs_fragment_reset(obu);
     return ret;
@@ -735,6 +800,39 @@ end:
     return ret;
 }
 
+static int vaapi_encode_av1_write_extra_header(AVCodecContext *avctx,
+                                               VAAPIEncodePicture *pic,
+                                               int index, int *type,
+                                               char *data, size_t *data_len)
+{
+    VAAPIEncodeAV1Context  *priv = avctx->priv_data;
+    CodedBitstreamFragment *obu  = &priv->current_obu;
+    AV1RawOBU *mh_obu;
+    char mh_data[MAX_PARAM_BUFFER_SIZE];
+    size_t mh_data_len;
+    int ret = 0;
+
+    if (index >= priv->nb_mh)
+        return AVERROR_EOF;
+
+    mh_obu = &priv->mh[index];
+    ret = vaapi_encode_av1_add_obu(avctx, obu, AV1_OBU_METADATA, mh_obu);
+    if (ret < 0)
+        goto end;
+
+    ret = vaapi_encode_av1_write_obu(avctx, mh_data, &mh_data_len, obu);
+    if (ret < 0)
+        goto end;
+
+    memcpy(data, mh_data, MAX_PARAM_BUFFER_SIZE * sizeof(char));
+    *data_len = mh_data_len;
+    *type = VAEncPackedHeaderRawData;
+
+end:
+    ff_cbs_fragment_reset(obu);
+    return ret;
+}
+
 static const VAAPIEncodeProfile vaapi_encode_av1_profiles[] = {
     { AV_PROFILE_AV1_MAIN,  8, 3, 1, 1, VAProfileAV1Profile0 },
     { AV_PROFILE_AV1_MAIN, 10, 3, 1, 1, VAProfileAV1Profile0 },
@@ -762,6 +860,8 @@ static const VAAPIEncodeType vaapi_encode_type_av1 = {
 
     .slice_params_size = sizeof(VAEncTileGroupBufferAV1),
     .init_slice_params = &vaapi_encode_av1_init_slice_params,
+
+    .write_extra_header     = &vaapi_encode_av1_write_extra_header,
 };
 
 static av_cold int vaapi_encode_av1_init(AVCodecContext *avctx)
@@ -776,7 +876,8 @@ static av_cold int vaapi_encode_av1_init(AVCodecContext *avctx)
 
     ctx->desired_packed_headers =
         VA_ENC_PACKED_HEADER_SEQUENCE |
-        VA_ENC_PACKED_HEADER_PICTURE;
+        VA_ENC_PACKED_HEADER_PICTURE |
+        VA_ENC_PACKED_HEADER_MISC;      // Metadata
 
     if (avctx->profile == AV_PROFILE_UNKNOWN)
         avctx->profile = priv->profile;
diff --git a/libavcodec/vaapi_encode_h264.c b/libavcodec/vaapi_encode_h264.c
index bf51df0f51..d656b1020f 100644
--- a/libavcodec/vaapi_encode_h264.c
+++ b/libavcodec/vaapi_encode_h264.c
@@ -758,7 +758,7 @@ static int vaapi_encode_h264_init_picture_params(AVCodecContext *avctx,
     vpic->frame_num = hpic->frame_num;
 
     vpic->pic_fields.bits.idr_pic_flag       = (pic->type == PICTURE_TYPE_IDR);
-    vpic->pic_fields.bits.reference_pic_flag = (pic->type != PICTURE_TYPE_B);
+    vpic->pic_fields.bits.reference_pic_flag = pic->is_reference;
 
     return 0;
 }
diff --git a/libavcodec/vaapi_encode_h265.c b/libavcodec/vaapi_encode_h265.c
index 2777632fb6..2f59161346 100644
--- a/libavcodec/vaapi_encode_h265.c
+++ b/libavcodec/vaapi_encode_h265.c
@@ -944,26 +944,23 @@ static int vaapi_encode_h265_init_picture_params(AVCodecContext *avctx,
 
     vpic->nal_unit_type = hpic->slice_nal_unit;
 
+    vpic->pic_fields.bits.reference_pic_flag = pic->is_reference;
     switch (pic->type) {
     case PICTURE_TYPE_IDR:
         vpic->pic_fields.bits.idr_pic_flag       = 1;
         vpic->pic_fields.bits.coding_type        = 1;
-        vpic->pic_fields.bits.reference_pic_flag = 1;
         break;
     case PICTURE_TYPE_I:
         vpic->pic_fields.bits.idr_pic_flag       = 0;
         vpic->pic_fields.bits.coding_type        = 1;
-        vpic->pic_fields.bits.reference_pic_flag = 1;
         break;
     case PICTURE_TYPE_P:
         vpic->pic_fields.bits.idr_pic_flag       = 0;
         vpic->pic_fields.bits.coding_type        = 2;
-        vpic->pic_fields.bits.reference_pic_flag = 1;
         break;
     case PICTURE_TYPE_B:
         vpic->pic_fields.bits.idr_pic_flag       = 0;
         vpic->pic_fields.bits.coding_type        = 3;
-        vpic->pic_fields.bits.reference_pic_flag = 0;
         break;
     default:
         av_assert0(0 && "invalid picture type");
diff --git a/libavcodec/vaapi_vp9.c b/libavcodec/vaapi_vp9.c
index 9dc7d5e72b..b8e760c807 100644
--- a/libavcodec/vaapi_vp9.c
+++ b/libavcodec/vaapi_vp9.c
@@ -100,7 +100,7 @@ static int vaapi_vp9_start_frame(AVCodecContext          *avctx,
     }
 
     for (i = 0; i < 8; i++) {
-        if (h->refs[i].f->buf[0])
+        if (h->refs[i].f)
             pic_param.reference_frames[i] = ff_vaapi_get_surface_id(h->refs[i].f);
         else
             pic_param.reference_frames[i] = VA_INVALID_ID;
diff --git a/libavcodec/vble.c b/libavcodec/vble.c
index 32157913c7..4511433a6c 100644
--- a/libavcodec/vble.c
+++ b/libavcodec/vble.c
@@ -134,10 +134,6 @@ static int vble_decode_frame(AVCodecContext *avctx, AVFrame *pic,
     if ((ret = ff_thread_get_buffer(avctx, pic, 0)) < 0)
         return ret;
 
-    /* Set flags */
-    pic->flags |= AV_FRAME_FLAG_KEY;
-    pic->pict_type = AV_PICTURE_TYPE_I;
-
     /* Version should always be 1 */
     version = AV_RL32(src);
 
@@ -191,6 +187,9 @@ static av_cold int vble_decode_init(AVCodecContext *avctx)
     ctx->size = av_image_get_buffer_size(avctx->pix_fmt,
                                          avctx->width, avctx->height, 1);
 
+    if (ctx->size < 0)
+        return ctx->size;
+
     ctx->val = av_malloc_array(ctx->size, sizeof(*ctx->val));
 
     if (!ctx->val) {
diff --git a/libavcodec/vbndec.c b/libavcodec/vbndec.c
index 4a38b02168..a96843f212 100644
--- a/libavcodec/vbndec.c
+++ b/libavcodec/vbndec.c
@@ -151,9 +151,6 @@ static int vbn_decode_frame(AVCodecContext *avctx,
     if (ret < 0)
         goto out;
 
-    frame->pict_type = AV_PICTURE_TYPE_I;
-    frame->flags |= AV_FRAME_FLAG_KEY;
-
     if (format == VBN_FORMAT_RAW) {
         uint8_t *flipped = frame->data[0] + frame->linesize[0] * (frame->height - 1);
         av_image_copy_plane(flipped, -frame->linesize[0], image_buf ? image_buf : gb->buffer, linesize, linesize, frame->height);
diff --git a/libavcodec/vc1_parser.c b/libavcodec/vc1_parser.c
index ec284dca00..a536a7bcf8 100644
--- a/libavcodec/vc1_parser.c
+++ b/libavcodec/vc1_parser.c
@@ -26,6 +26,7 @@
  */
 
 #include "libavutil/attributes.h"
+#include "libavutil/avassert.h"
 #include "parser.h"
 #include "vc1.h"
 #include "get_bits.h"
@@ -66,7 +67,9 @@ static void vc1_extract_header(AVCodecParserContext *s, AVCodecContext *avctx,
     GetBitContext gb;
     int ret;
     vpc->v.s.avctx = avctx;
-    init_get_bits8(&gb, buf, buf_size);
+    ret = init_get_bits8(&gb, buf, buf_size);
+    av_assert1(ret >= 0); // buf_size is bounded by UNESCAPED_THRESHOLD
+
     switch (vpc->prev_start_code) {
     case VC1_CODE_SEQHDR & 0xFF:
         ff_vc1_decode_sequence_header(avctx, &vpc->v, &gb);
diff --git a/libavcodec/vc1dec.c b/libavcodec/vc1dec.c
index 2c413e0bf1..3b5b016cf9 100644
--- a/libavcodec/vc1dec.c
+++ b/libavcodec/vc1dec.c
@@ -36,6 +36,7 @@
 #include "hwaccel_internal.h"
 #include "hwconfig.h"
 #include "mpeg_er.h"
+#include "mpegutils.h"
 #include "mpegvideo.h"
 #include "mpegvideodec.h"
 #include "msmpeg4_vc1_data.h"
diff --git a/libavcodec/vcr1.c b/libavcodec/vcr1.c
index 771337e262..c9ed62266a 100644
--- a/libavcodec/vcr1.c
+++ b/libavcodec/vcr1.c
@@ -62,8 +62,6 @@ static int vcr1_decode_frame(AVCodecContext *avctx, AVFrame *p,
 
     if ((ret = ff_get_buffer(avctx, p, 0)) < 0)
         return ret;
-    p->pict_type = AV_PICTURE_TYPE_I;
-    p->flags |= AV_FRAME_FLAG_KEY;
 
     for (i = 0; i < 16; i++) {
         a->delta[i] = *bytestream++;
diff --git a/libavcodec/vdpau_av1.c b/libavcodec/vdpau_av1.c
index 80923092b9..a1aff79bb7 100644
--- a/libavcodec/vdpau_av1.c
+++ b/libavcodec/vdpau_av1.c
@@ -91,9 +91,7 @@ static int vdpau_av1_start_frame(AVCodecContext *avctx,
     info->show_frame                   = frame_header->show_frame;
     info->disable_cdf_update           = frame_header->disable_cdf_update;
     info->allow_screen_content_tools   = frame_header->allow_screen_content_tools;
-    info->force_integer_mv             = frame_header->force_integer_mv ||
-                                    frame_header->frame_type == AV1_FRAME_INTRA_ONLY ||
-                                    frame_header->frame_type == AV1_FRAME_KEY;
+    info->force_integer_mv             = s->cur_frame.force_integer_mv;
     info->coded_denom                  = frame_header->coded_denom;
     info->allow_intrabc                = frame_header->allow_intrabc;
     info->allow_high_precision_mv      = frame_header->allow_high_precision_mv;
@@ -219,7 +217,8 @@ static int vdpau_av1_start_frame(AVCodecContext *avctx,
         info->loop_filter_ref_deltas[i] = frame_header->loop_filter_ref_deltas[i];
 
         /* Reference Frames */
-        info->ref_frame_map[i] = ff_vdpau_get_surface_id(s->ref[i].f) ? ff_vdpau_get_surface_id(s->ref[i].f) : VDP_INVALID_HANDLE;
+        info->ref_frame_map[i] = s->ref[i].f && ff_vdpau_get_surface_id(s->ref[i].f) ?
+                                     ff_vdpau_get_surface_id(s->ref[i].f) : VDP_INVALID_HANDLE;
     }
 
     if (frame_header->primary_ref_frame == AV1_PRIMARY_REF_NONE) {
@@ -235,8 +234,8 @@ static int vdpau_av1_start_frame(AVCodecContext *avctx,
         AVFrame *ref_frame = s->ref[ref_idx].f;
 
         info->ref_frame[i].index = info->ref_frame_map[ref_idx];
-        info->ref_frame[i].width = ref_frame->width;
-        info->ref_frame[i].height = ref_frame->height;
+        info->ref_frame[i].width  = ref_frame ? ref_frame->width  : 0;
+        info->ref_frame[i].height = ref_frame ? ref_frame->height : 0;
 
         /* Global Motion */
         info->global_motion[i].invalid = !frame_header->is_global[AV1_REF_FRAME_LAST + i];
diff --git a/libavcodec/version.h b/libavcodec/version.h
index 84a1c02ce4..3d2de546b3 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -30,7 +30,7 @@
 #include "version_major.h"
 
 #define LIBAVCODEC_VERSION_MINOR   5
-#define LIBAVCODEC_VERSION_MICRO 101
+#define LIBAVCODEC_VERSION_MICRO 104
 
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
                                                LIBAVCODEC_VERSION_MINOR, \
diff --git a/libavcodec/vmixdec.c b/libavcodec/vmixdec.c
index f61fd02092..46d817c7f3 100644
--- a/libavcodec/vmixdec.c
+++ b/libavcodec/vmixdec.c
@@ -289,9 +289,6 @@ static int decode_frame(AVCodecContext *avctx,
 
     avctx->execute2(avctx, decode_slices, frame, NULL, s->nb_slices);
 
-    frame->pict_type = AV_PICTURE_TYPE_I;
-    frame->flags |= AV_FRAME_FLAG_KEY;
-
     *got_frame = 1;
 
     return avpkt->size;
diff --git a/libavcodec/vp3.c b/libavcodec/vp3.c
index 201bab0e32..d03a1c9dbc 100644
--- a/libavcodec/vp3.c
+++ b/libavcodec/vp3.c
@@ -49,9 +49,9 @@
 #include "internal.h"
 #include "jpegquanttables.h"
 #include "mathops.h"
+#include "progressframe.h"
 #include "refstruct.h"
 #include "thread.h"
-#include "threadframe.h"
 #include "videodsp.h"
 #include "vp3data.h"
 #include "vp4data.h"
@@ -201,9 +201,9 @@ typedef struct Vp3DecodeContext {
     int version;
     int width, height;
     int chroma_x_shift, chroma_y_shift;
-    ThreadFrame golden_frame;
-    ThreadFrame last_frame;
-    ThreadFrame current_frame;
+    ProgressFrame golden_frame;
+    ProgressFrame last_frame;
+    ProgressFrame current_frame;
     int keyframe;
     uint8_t idct_permutation[64];
     uint8_t idct_scantable[64];
@@ -353,12 +353,9 @@ static void vp3_decode_flush(AVCodecContext *avctx)
 {
     Vp3DecodeContext *s = avctx->priv_data;
 
-    if (s->golden_frame.f)
-        ff_thread_release_ext_buffer(&s->golden_frame);
-    if (s->last_frame.f)
-        ff_thread_release_ext_buffer(&s->last_frame);
-    if (s->current_frame.f)
-        ff_thread_release_ext_buffer(&s->current_frame);
+    ff_progress_frame_unref(&s->golden_frame);
+    ff_progress_frame_unref(&s->last_frame);
+    ff_progress_frame_unref(&s->current_frame);
 }
 
 static av_cold int vp3_decode_end(AVCodecContext *avctx)
@@ -372,9 +369,6 @@ static av_cold int vp3_decode_end(AVCodecContext *avctx)
 
     /* release all frames */
     vp3_decode_flush(avctx);
-    av_frame_free(&s->current_frame.f);
-    av_frame_free(&s->last_frame.f);
-    av_frame_free(&s->golden_frame.f);
 
     ff_refstruct_unref(&s->coeff_vlc);
 
@@ -1908,10 +1902,9 @@ static void vp3_draw_horiz_band(Vp3DecodeContext *s, int y)
         /* At the end of the frame, report INT_MAX instead of the height of
          * the frame. This makes the other threads' ff_thread_await_progress()
          * calls cheaper, because they don't have to clip their values. */
-        ff_thread_report_progress(&s->current_frame,
-                                  y_flipped == s->height ? INT_MAX
-                                                         : y_flipped - 1,
-                                  0);
+        ff_progress_frame_report(&s->current_frame,
+                                 y_flipped == s->height ? INT_MAX
+                                                        : y_flipped - 1);
     }
 
     if (!s->avctx->draw_horiz_band)
@@ -1942,7 +1935,7 @@ static void vp3_draw_horiz_band(Vp3DecodeContext *s, int y)
 static void await_reference_row(Vp3DecodeContext *s, const Vp3Fragment *fragment,
                                 int motion_y, int y)
 {
-    const ThreadFrame *ref_frame;
+    const ProgressFrame *ref_frame;
     int ref_row;
     int border = motion_y & 1;
 
@@ -1955,7 +1948,7 @@ static void await_reference_row(Vp3DecodeContext *s, const Vp3Fragment *fragment
     ref_row = y + (motion_y >> 1);
     ref_row = FFMAX(FFABS(ref_row), ref_row + 8 + border);
 
-    ff_thread_await_progress(ref_frame, ref_row, 0);
+    ff_progress_frame_await(ref_frame, ref_row);
 }
 
 #if CONFIG_VP4_DECODER
@@ -2008,8 +2001,7 @@ static int vp4_mc_loop_filter(Vp3DecodeContext *s, int plane, int motion_x, int
         x_offset = (-(x + 2) & 7) + 2;
         y_offset = (-(y + 2) & 7) + 2;
 
-        if (x_offset > 8 + x_subpel && y_offset > 8 + y_subpel)
-            return 0;
+        av_assert1(!(x_offset > 8 + x_subpel && y_offset > 8 + y_subpel));
 
         s->vdsp.emulated_edge_mc(loop, motion_source - stride - 1,
              loop_stride, stride,
@@ -2066,12 +2058,12 @@ static void render_slice(Vp3DecodeContext *s, int slice)
     int16_t *block = s->block;
     int motion_x = 0xdeadbeef, motion_y = 0xdeadbeef;
     /* When decoding keyframes, the earlier frames may not be available,
-     * so to avoid using undefined pointer arithmetic on them we just
-     * use the current frame instead. Nothing is ever read from these
-     * frames in case of a keyframe. */
-    const AVFrame *last_frame   = s->last_frame.f->data[0]   ?
+     * so we just use the current frame in this case instead;
+     * it also avoid using undefined pointer arithmetic. Nothing is
+     * ever read from these frames in case of a keyframe. */
+    const AVFrame *last_frame   = s->last_frame.f   ?
                                       s->last_frame.f   : s->current_frame.f;
-    const AVFrame *golden_frame = s->golden_frame.f->data[0] ?
+    const AVFrame *golden_frame = s->golden_frame.f ?
                                       s->golden_frame.f : s->current_frame.f;
     int motion_halfpel_index;
     int first_pixel;
@@ -2353,17 +2345,6 @@ static av_cold int allocate_tables(AVCodecContext *avctx)
     return 0;
 }
 
-static av_cold int init_frames(Vp3DecodeContext *s)
-{
-    s->current_frame.f = av_frame_alloc();
-    s->last_frame.f    = av_frame_alloc();
-    s->golden_frame.f  = av_frame_alloc();
-
-    if (!s->current_frame.f || !s->last_frame.f || !s->golden_frame.f)
-        return AVERROR(ENOMEM);
-
-    return 0;
-}
 
 static av_cold void free_vlc_tables(FFRefStructOpaque unused, void *obj)
 {
@@ -2382,10 +2363,6 @@ static av_cold int vp3_decode_init(AVCodecContext *avctx)
     int c_height;
     int y_fragment_count, c_fragment_count;
 
-    ret = init_frames(s);
-    if (ret < 0)
-        return ret;
-
     if (avctx->codec_tag == MKTAG('V', 'P', '4', '0')) {
         s->version = 3;
 #if !CONFIG_VP4_DECODER
@@ -2524,61 +2501,42 @@ static av_cold int vp3_decode_init(AVCodecContext *avctx)
 }
 
 /// Release and shuffle frames after decode finishes
-static int update_frames(AVCodecContext *avctx)
+static void update_frames(AVCodecContext *avctx)
 {
     Vp3DecodeContext *s = avctx->priv_data;
-    int ret = 0;
 
-    if (s->keyframe) {
-        ff_thread_release_ext_buffer(&s->golden_frame);
-        ret = ff_thread_ref_frame(&s->golden_frame, &s->current_frame);
-    }
-    /* shuffle frames */
-    ff_thread_release_ext_buffer(&s->last_frame);
-    FFSWAP(ThreadFrame, s->last_frame, s->current_frame);
+    if (s->keyframe)
+        ff_progress_frame_replace(&s->golden_frame, &s->current_frame);
 
-    return ret;
+    /* shuffle frames */
+    ff_progress_frame_unref(&s->last_frame);
+    FFSWAP(ProgressFrame, s->last_frame, s->current_frame);
 }
 
 #if HAVE_THREADS
-static int ref_frame(ThreadFrame *dst, const ThreadFrame *src)
-{
-    ff_thread_release_ext_buffer(dst);
-    if (src->f->data[0])
-        return ff_thread_ref_frame(dst, src);
-    return 0;
-}
-
-static int ref_frames(Vp3DecodeContext *dst, const Vp3DecodeContext *src)
+static void ref_frames(Vp3DecodeContext *dst, const Vp3DecodeContext *src)
 {
-    int ret;
-    if ((ret = ref_frame(&dst->current_frame, &src->current_frame)) < 0 ||
-        (ret = ref_frame(&dst->golden_frame,  &src->golden_frame)) < 0  ||
-        (ret = ref_frame(&dst->last_frame,    &src->last_frame)) < 0)
-        return ret;
-    return 0;
+    ff_progress_frame_replace(&dst->current_frame, &src->current_frame);
+    ff_progress_frame_replace(&dst->golden_frame,  &src->golden_frame);
+    ff_progress_frame_replace(&dst->last_frame,    &src->last_frame);
 }
 
 static int vp3_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
 {
     Vp3DecodeContext *s = dst->priv_data;
     const Vp3DecodeContext *s1 = src->priv_data;
-    int qps_changed = 0, err;
+    int qps_changed = 0;
 
     ff_refstruct_replace(&s->coeff_vlc, s1->coeff_vlc);
 
-    if (!s1->current_frame.f->data[0] ||
+    // copy previous frame data
+    ref_frames(s, s1);
+    if (!s1->current_frame.f ||
         s->width != s1->width || s->height != s1->height) {
-        if (s != s1)
-            ref_frames(s, s1);
         return -1;
     }
 
     if (s != s1) {
-        // copy previous frame data
-        if ((err = ref_frames(s, s1)) < 0)
-            return err;
-
         s->keyframe = s1->keyframe;
 
         // copy qscale data if necessary
@@ -2600,7 +2558,8 @@ static int vp3_update_thread_context(AVCodecContext *dst, const AVCodecContext *
         }
     }
 
-    return update_frames(dst);
+    update_frames(dst);
+    return 0;
 }
 #endif
 
@@ -2691,15 +2650,20 @@ static int vp3_decode_frame(AVCodecContext *avctx, AVFrame *frame,
     if (avctx->skip_frame >= AVDISCARD_NONKEY && !s->keyframe)
         return buf_size;
 
+    ff_progress_frame_unref(&s->current_frame);
+    ret = ff_progress_frame_get_buffer(avctx, &s->current_frame,
+                                       AV_GET_BUFFER_FLAG_REF);
+    if (ret < 0) {
+        // Don't goto error here, as one can't report progress on or
+        // unref a non-existent frame.
+        return ret;
+    }
     s->current_frame.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
                                                 : AV_PICTURE_TYPE_P;
     if (s->keyframe)
         s->current_frame.f->flags |= AV_FRAME_FLAG_KEY;
     else
         s->current_frame.f->flags &= ~AV_FRAME_FLAG_KEY;
-    if ((ret = ff_thread_get_ext_buffer(avctx, &s->current_frame,
-                                        AV_GET_BUFFER_FLAG_REF)) < 0)
-        goto error;
 
     if (!s->edge_emu_buffer) {
         s->edge_emu_buffer = av_malloc(9 * FFABS(s->current_frame.f->linesize[0]));
@@ -2757,19 +2721,16 @@ static int vp3_decode_frame(AVCodecContext *avctx, AVFrame *frame,
 #endif
         }
     } else {
-        if (!s->golden_frame.f->data[0]) {
+        if (!s->golden_frame.f) {
             av_log(s->avctx, AV_LOG_WARNING,
                    "vp3: first frame not a keyframe\n");
 
-            s->golden_frame.f->pict_type = AV_PICTURE_TYPE_I;
-            if ((ret = ff_thread_get_ext_buffer(avctx, &s->golden_frame,
-                                                AV_GET_BUFFER_FLAG_REF)) < 0)
+            if ((ret = ff_progress_frame_get_buffer(avctx, &s->golden_frame,
+                                                    AV_GET_BUFFER_FLAG_REF)) < 0)
                 goto error;
-            ff_thread_release_ext_buffer(&s->last_frame);
-            if ((ret = ff_thread_ref_frame(&s->last_frame,
-                                           &s->golden_frame)) < 0)
-                goto error;
-            ff_thread_report_progress(&s->last_frame, INT_MAX, 0);
+            s->golden_frame.f->pict_type = AV_PICTURE_TYPE_I;
+            ff_progress_frame_replace(&s->last_frame, &s->golden_frame);
+            ff_progress_frame_report(&s->golden_frame, INT_MAX);
         }
     }
     ff_thread_finish_setup(avctx);
@@ -2847,16 +2808,13 @@ static int vp3_decode_frame(AVCodecContext *avctx, AVFrame *frame,
 
     *got_frame = 1;
 
-    if (!HAVE_THREADS || !(s->avctx->active_thread_type & FF_THREAD_FRAME)) {
-        ret = update_frames(avctx);
-        if (ret < 0)
-            return ret;
-    }
+    if (!HAVE_THREADS || !(s->avctx->active_thread_type & FF_THREAD_FRAME))
+        update_frames(avctx);
 
     return buf_size;
 
 error:
-    ff_thread_report_progress(&s->current_frame, INT_MAX, 0);
+    ff_progress_frame_report(&s->current_frame, INT_MAX);
 
     if (!HAVE_THREADS || !(s->avctx->active_thread_type & FF_THREAD_FRAME))
         av_frame_unref(s->current_frame.f);
@@ -3206,7 +3164,8 @@ const FFCodec ff_theora_decoder = {
     .flush                 = vp3_decode_flush,
     UPDATE_THREAD_CONTEXT(vp3_update_thread_context),
     .caps_internal         = FF_CODEC_CAP_INIT_CLEANUP |
-                             FF_CODEC_CAP_EXPORTS_CROPPING | FF_CODEC_CAP_ALLOCATE_PROGRESS,
+                             FF_CODEC_CAP_EXPORTS_CROPPING |
+                             FF_CODEC_CAP_USES_PROGRESSFRAMES,
 };
 #endif
 
@@ -3224,7 +3183,7 @@ const FFCodec ff_vp3_decoder = {
     .flush                 = vp3_decode_flush,
     UPDATE_THREAD_CONTEXT(vp3_update_thread_context),
     .caps_internal         = FF_CODEC_CAP_INIT_CLEANUP |
-                             FF_CODEC_CAP_ALLOCATE_PROGRESS,
+                             FF_CODEC_CAP_USES_PROGRESSFRAMES,
 };
 
 #if CONFIG_VP4_DECODER
@@ -3242,6 +3201,6 @@ const FFCodec ff_vp4_decoder = {
     .flush                 = vp3_decode_flush,
     UPDATE_THREAD_CONTEXT(vp3_update_thread_context),
     .caps_internal         = FF_CODEC_CAP_INIT_CLEANUP |
-                             FF_CODEC_CAP_ALLOCATE_PROGRESS,
+                             FF_CODEC_CAP_USES_PROGRESSFRAMES,
 };
 #endif
diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c
index 539b5c5395..8e91613068 100644
--- a/libavcodec/vp8.c
+++ b/libavcodec/vp8.c
@@ -35,9 +35,9 @@
 #include "hwaccel_internal.h"
 #include "hwconfig.h"
 #include "mathops.h"
+#include "progressframe.h"
 #include "refstruct.h"
 #include "thread.h"
-#include "threadframe.h"
 #include "vp8.h"
 #include "vp89_rac.h"
 #include "vp8data.h"
@@ -103,12 +103,15 @@ static void free_buffers(VP8Context *s)
 
 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
 {
-    int ret;
-    if ((ret = ff_thread_get_ext_buffer(s->avctx, &f->tf,
-                                        ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
+    int ret = ff_progress_frame_get_buffer(s->avctx, &f->tf,
+                                           ref ? AV_GET_BUFFER_FLAG_REF : 0);
+    if (ret < 0)
         return ret;
-    if (!(f->seg_map = ff_refstruct_allocz(s->mb_width * s->mb_height)))
+    f->seg_map = ff_refstruct_allocz(s->mb_width * s->mb_height);
+    if (!f->seg_map) {
+        ret = AVERROR(ENOMEM);
         goto fail;
+    }
     ret = ff_hwaccel_frame_priv_alloc(s->avctx, &f->hwaccel_picture_private);
     if (ret < 0)
         goto fail;
@@ -117,7 +120,7 @@ static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
 
 fail:
     ff_refstruct_unref(&f->seg_map);
-    ff_thread_release_ext_buffer(&f->tf);
+    ff_progress_frame_unref(&f->tf);
     return ret;
 }
 
@@ -125,27 +128,10 @@ static void vp8_release_frame(VP8Frame *f)
 {
     ff_refstruct_unref(&f->seg_map);
     ff_refstruct_unref(&f->hwaccel_picture_private);
-    ff_thread_release_ext_buffer(&f->tf);
-}
-
-#if CONFIG_VP8_DECODER
-static int vp8_ref_frame(VP8Frame *dst, const VP8Frame *src)
-{
-    int ret;
-
-    vp8_release_frame(dst);
-
-    if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
-        return ret;
-    ff_refstruct_replace(&dst->seg_map, src->seg_map);
-    ff_refstruct_replace(&dst->hwaccel_picture_private,
-                          src->hwaccel_picture_private);
-
-    return 0;
+    ff_progress_frame_unref(&f->tf);
 }
-#endif /* CONFIG_VP8_DECODER */
 
-static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
+static av_cold void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
 {
     VP8Context *s = avctx->priv_data;
     int i;
@@ -161,7 +147,7 @@ static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
         FF_HW_SIMPLE_CALL(avctx, flush);
 }
 
-static void vp8_decode_flush(AVCodecContext *avctx)
+static av_cold void vp8_decode_flush(AVCodecContext *avctx)
 {
     vp8_decode_flush_impl(avctx, 0);
 }
@@ -184,7 +170,7 @@ static VP8Frame *vp8_find_free_buffer(VP8Context *s)
         av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
         abort();
     }
-    if (frame->tf.f->buf[0])
+    if (frame->tf.f)
         vp8_release_frame(frame);
 
     return frame;
@@ -355,9 +341,8 @@ static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
     }
 
     s->coeff_partition_size[i] = buf_size;
-    ff_vpx_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
 
-    return 0;
+    return ff_vpx_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
 }
 
 static void vp7_get_quants(VP8Context *s)
@@ -1830,7 +1815,7 @@ static const uint8_t subpel_idx[3][8] = {
  */
 static av_always_inline
 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
-                 const ThreadFrame *ref, const VP8mv *mv,
+                 const ProgressFrame *ref, const VP8mv *mv,
                  int x_off, int y_off, int block_w, int block_h,
                  int width, int height, ptrdiff_t linesize,
                  vp8_mc_func mc_func[3][3])
@@ -1847,7 +1832,7 @@ void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
         y_off += mv->y >> 2;
 
         // edge emulation
-        ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
+        ff_progress_frame_await(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4);
         src += y_off * linesize + x_off;
         if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
             y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
@@ -1863,7 +1848,7 @@ void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
         }
         mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
     } else {
-        ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
+        ff_progress_frame_await(ref, (3 + y_off + block_h) >> 4);
         mc_func[0][0](dst, linesize, src + y_off * linesize + x_off,
                       linesize, block_h, 0, 0);
     }
@@ -1888,7 +1873,7 @@ void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
  */
 static av_always_inline
 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1,
-                   uint8_t *dst2, const ThreadFrame *ref, const VP8mv *mv,
+                   uint8_t *dst2, const ProgressFrame *ref, const VP8mv *mv,
                    int x_off, int y_off, int block_w, int block_h,
                    int width, int height, ptrdiff_t linesize,
                    vp8_mc_func mc_func[3][3])
@@ -1905,7 +1890,7 @@ void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1,
         // edge emulation
         src1 += y_off * linesize + x_off;
         src2 += y_off * linesize + x_off;
-        ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
+        ff_progress_frame_await(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3);
         if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
             y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
             s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
@@ -1930,7 +1915,7 @@ void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1,
             mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
         }
     } else {
-        ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
+        ff_progress_frame_await(ref, (3 + y_off + block_h) >> 3);
         mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
         mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
     }
@@ -1938,7 +1923,7 @@ void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1,
 
 static av_always_inline
 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *const dst[3],
-                 const ThreadFrame *ref_frame, int x_off, int y_off,
+                 const ProgressFrame *ref_frame, int x_off, int y_off,
                  int bx_off, int by_off, int block_w, int block_h,
                  int width, int height, const VP8mv *mv)
 {
@@ -2003,7 +1988,7 @@ void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *const dst[3],
 {
     int x_off = mb_x << 4, y_off = mb_y << 4;
     int width = 16 * s->mb_width, height = 16 * s->mb_height;
-    const ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
+    const ProgressFrame *ref = &s->framep[mb->ref_frame]->tf;
     const VP8mv *bmv = mb->bmv;
 
     switch (mb->partitioning) {
@@ -2423,7 +2408,7 @@ static av_always_inline int decode_mb_row_no_filter(AVCodecContext *avctx, void
         // if we re-use the same map.
         if (prev_frame && s->segmentation.enabled &&
             !s->segmentation.update_map)
-            ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
+            ff_progress_frame_await(&prev_frame->tf, mb_y);
         mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
         memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
         AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
@@ -2631,7 +2616,7 @@ int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
         td->mv_bounds.mv_max.y -= 64 * num_jobs;
 
         if (avctx->active_thread_type == FF_THREAD_FRAME)
-            ff_thread_report_progress(&curframe->tf, mb_y, 0);
+            ff_progress_frame_report(&curframe->tf, mb_y);
     }
 
     return 0;
@@ -2699,7 +2684,7 @@ int vp78_decode_frame(AVCodecContext *avctx, AVFrame *rframe, int *got_frame,
 
     // release no longer referenced frames
     for (i = 0; i < 5; i++)
-        if (s->frames[i].tf.f->buf[0] &&
+        if (s->frames[i].tf.f &&
             &s->frames[i] != prev_frame &&
             &s->frames[i] != s->framep[VP8_FRAME_PREVIOUS] &&
             &s->frames[i] != s->framep[VP8_FRAME_GOLDEN]   &&
@@ -2728,14 +2713,14 @@ int vp78_decode_frame(AVCodecContext *avctx, AVFrame *rframe, int *got_frame,
         goto err;
     }
 
+    if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
+        goto err;
     if (s->keyframe)
         curframe->tf.f->flags |= AV_FRAME_FLAG_KEY;
     else
         curframe->tf.f->flags &= ~AV_FRAME_FLAG_KEY;
     curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
                                             : AV_PICTURE_TYPE_P;
-    if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
-        goto err;
 
     // check if golden and altref are swapped
     if (s->update_altref != VP8_FRAME_NONE)
@@ -2792,7 +2777,7 @@ int vp78_decode_frame(AVCodecContext *avctx, AVFrame *rframe, int *got_frame,
             // if we re-use the same map.
             if (prev_frame && s->segmentation.enabled &&
                 !s->segmentation.update_map)
-                ff_thread_await_progress(&prev_frame->tf, 1, 0);
+                ff_progress_frame_await(&prev_frame->tf, 1);
             if (is_vp7)
                 ret = vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
             else
@@ -2823,7 +2808,7 @@ int vp78_decode_frame(AVCodecContext *avctx, AVFrame *rframe, int *got_frame,
                             num_jobs);
     }
 
-    ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
+    ff_progress_frame_report(&curframe->tf, INT_MAX);
     memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
 
 skip_decode:
@@ -2860,24 +2845,8 @@ static int vp7_decode_frame(AVCodecContext *avctx, AVFrame *frame,
 
 av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
 {
-    VP8Context *s = avctx->priv_data;
-    int i;
-
     vp8_decode_flush_impl(avctx, 1);
-    for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
-        av_frame_free(&s->frames[i].tf.f);
-
-    return 0;
-}
 
-static av_cold int vp8_init_frames(VP8Context *s)
-{
-    int i;
-    for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
-        s->frames[i].tf.f = av_frame_alloc();
-        if (!s->frames[i].tf.f)
-            return AVERROR(ENOMEM);
-    }
     return 0;
 }
 
@@ -2885,7 +2854,6 @@ static av_always_inline
 int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
 {
     VP8Context *s = avctx->priv_data;
-    int ret;
 
     s->avctx = avctx;
     s->pix_fmt = AV_PIX_FMT_NONE;
@@ -2909,11 +2877,6 @@ int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
     /* does not change for VP8 */
     memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
 
-    if ((ret = vp8_init_frames(s)) < 0) {
-        ff_vp8_decode_free(avctx);
-        return ret;
-    }
-
     return 0;
 }
 
@@ -2931,13 +2894,20 @@ av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
 
 #if CONFIG_VP8_DECODER
 #if HAVE_THREADS
+static void vp8_replace_frame(VP8Frame *dst, const VP8Frame *src)
+{
+    ff_progress_frame_replace(&dst->tf, &src->tf);
+    ff_refstruct_replace(&dst->seg_map, src->seg_map);
+    ff_refstruct_replace(&dst->hwaccel_picture_private,
+                          src->hwaccel_picture_private);
+}
+
 #define REBASE(pic) ((pic) ? (pic) - &s_src->frames[0] + &s->frames[0] : NULL)
 
 static int vp8_decode_update_thread_context(AVCodecContext *dst,
                                             const AVCodecContext *src)
 {
     VP8Context *s = dst->priv_data, *s_src = src->priv_data;
-    int i;
 
     if (s->macroblocks_base &&
         (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
@@ -2952,13 +2922,8 @@ static int vp8_decode_update_thread_context(AVCodecContext *dst,
     s->lf_delta     = s_src->lf_delta;
     memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
 
-    for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
-        if (s_src->frames[i].tf.f->buf[0]) {
-            int ret = vp8_ref_frame(&s->frames[i], &s_src->frames[i]);
-            if (ret < 0)
-                return ret;
-        }
-    }
+    for (int i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++)
+        vp8_replace_frame(&s->frames[i], &s_src->frames[i]);
 
     s->framep[0] = REBASE(s_src->next_framep[0]);
     s->framep[1] = REBASE(s_src->next_framep[1]);
@@ -2982,6 +2947,7 @@ const FFCodec ff_vp7_decoder = {
     FF_CODEC_DECODE_CB(vp7_decode_frame),
     .p.capabilities        = AV_CODEC_CAP_DR1,
     .flush                 = vp8_decode_flush,
+    .caps_internal         = FF_CODEC_CAP_USES_PROGRESSFRAMES,
 };
 #endif /* CONFIG_VP7_DECODER */
 
@@ -2997,7 +2963,7 @@ const FFCodec ff_vp8_decoder = {
     FF_CODEC_DECODE_CB(ff_vp8_decode_frame),
     .p.capabilities        = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS |
                              AV_CODEC_CAP_SLICE_THREADS,
-    .caps_internal         = FF_CODEC_CAP_ALLOCATE_PROGRESS,
+    .caps_internal         = FF_CODEC_CAP_USES_PROGRESSFRAMES,
     .flush                 = vp8_decode_flush,
     UPDATE_THREAD_CONTEXT(vp8_decode_update_thread_context),
     .hw_configs            = (const AVCodecHWConfigInternal *const []) {
diff --git a/libavcodec/vp8.h b/libavcodec/vp8.h
index 798f67b3de..9bdef0aa88 100644
--- a/libavcodec/vp8.h
+++ b/libavcodec/vp8.h
@@ -31,8 +31,9 @@
 #include "libavutil/mem_internal.h"
 #include "libavutil/thread.h"
 
+#include "avcodec.h"
 #include "h264pred.h"
-#include "threadframe.h"
+#include "progressframe.h"
 #include "videodsp.h"
 #include "vp8dsp.h"
 #include "vpx_rac.h"
@@ -150,7 +151,7 @@ typedef struct VP8ThreadData {
 } VP8ThreadData;
 
 typedef struct VP8Frame {
-    ThreadFrame tf;
+    ProgressFrame tf;
     uint8_t *seg_map; ///< RefStruct reference
 
     void *hwaccel_picture_private; ///< RefStruct reference
diff --git a/libavcodec/vp8dsp.c b/libavcodec/vp8dsp.c
index 72d4ea3793..8624c3ae15 100644
--- a/libavcodec/vp8dsp.c
+++ b/libavcodec/vp8dsp.c
@@ -681,6 +681,8 @@ av_cold void ff_vp78dsp_init(VP8DSPContext *dsp)
     ff_vp78dsp_init_arm(dsp);
 #elif ARCH_PPC
     ff_vp78dsp_init_ppc(dsp);
+#elif ARCH_RISCV
+    ff_vp78dsp_init_riscv(dsp);
 #elif ARCH_X86
     ff_vp78dsp_init_x86(dsp);
 #endif
diff --git a/libavcodec/vp8dsp.h b/libavcodec/vp8dsp.h
index 30dc2c6cc1..3bf12b6b45 100644
--- a/libavcodec/vp8dsp.h
+++ b/libavcodec/vp8dsp.h
@@ -87,6 +87,7 @@ void ff_vp78dsp_init(VP8DSPContext *c);
 void ff_vp78dsp_init_aarch64(VP8DSPContext *c);
 void ff_vp78dsp_init_arm(VP8DSPContext *c);
 void ff_vp78dsp_init_ppc(VP8DSPContext *c);
+void ff_vp78dsp_init_riscv(VP8DSPContext *c);
 void ff_vp78dsp_init_x86(VP8DSPContext *c);
 
 void ff_vp8dsp_init(VP8DSPContext *c);
diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c
index 6bcda8bfff..6e2d18bf95 100644
--- a/libavcodec/vp9.c
+++ b/libavcodec/vp9.c
@@ -30,9 +30,9 @@
 #include "hwaccel_internal.h"
 #include "hwconfig.h"
 #include "profiles.h"
+#include "progressframe.h"
 #include "refstruct.h"
 #include "thread.h"
-#include "threadframe.h"
 #include "pthread_internal.h"
 
 #include "videodsp.h"
@@ -55,7 +55,6 @@ DEFINE_OFFSET_ARRAY(VP9Context, vp9_context, pthread_init_cnt,
 
 static int vp9_alloc_entries(AVCodecContext *avctx, int n) {
     VP9Context *s = avctx->priv_data;
-    int i;
 
     if (avctx->active_thread_type & FF_THREAD_SLICE)  {
         if (s->entries)
@@ -64,9 +63,6 @@ static int vp9_alloc_entries(AVCodecContext *avctx, int n) {
         s->entries = av_malloc_array(n, sizeof(atomic_int));
         if (!s->entries)
             return AVERROR(ENOMEM);
-
-        for (i  = 0; i < n; i++)
-            atomic_init(&s->entries[i], 0);
     }
     return 0;
 }
@@ -100,7 +96,7 @@ static void vp9_tile_data_free(VP9TileData *td)
 
 static void vp9_frame_unref(VP9Frame *f)
 {
-    ff_thread_release_ext_buffer(&f->tf);
+    ff_progress_frame_unref(&f->tf);
     ff_refstruct_unref(&f->extradata);
     ff_refstruct_unref(&f->hwaccel_picture_private);
     f->segmentation_map = NULL;
@@ -111,7 +107,7 @@ static int vp9_frame_alloc(AVCodecContext *avctx, VP9Frame *f)
     VP9Context *s = avctx->priv_data;
     int ret, sz;
 
-    ret = ff_thread_get_ext_buffer(avctx, &f->tf, AV_GET_BUFFER_FLAG_REF);
+    ret = ff_progress_frame_get_buffer(avctx, &f->tf, AV_GET_BUFFER_FLAG_REF);
     if (ret < 0)
         return ret;
 
@@ -147,15 +143,11 @@ fail:
     return ret;
 }
 
-static int vp9_frame_ref(VP9Frame *dst, VP9Frame *src)
+static void vp9_frame_replace(VP9Frame *dst, const VP9Frame *src)
 {
-    int ret;
+    ff_progress_frame_replace(&dst->tf, &src->tf);
 
-    ret = ff_thread_ref_frame(&dst->tf, &src->tf);
-    if (ret < 0)
-        return ret;
-
-    dst->extradata = ff_refstruct_ref(src->extradata);
+    ff_refstruct_replace(&dst->extradata, src->extradata);
 
     dst->segmentation_map = src->segmentation_map;
     dst->mv = src->mv;
@@ -163,8 +155,6 @@ static int vp9_frame_ref(VP9Frame *dst, VP9Frame *src)
 
     ff_refstruct_replace(&dst->hwaccel_picture_private,
                           src->hwaccel_picture_private);
-
-    return 0;
 }
 
 static int update_size(AVCodecContext *avctx, int w, int h)
@@ -589,9 +579,9 @@ static int decode_frame_header(AVCodecContext *avctx,
             s->s.h.signbias[1]    = get_bits1(&s->gb) && !s->s.h.errorres;
             s->s.h.refidx[2]      = get_bits(&s->gb, 3);
             s->s.h.signbias[2]    = get_bits1(&s->gb) && !s->s.h.errorres;
-            if (!s->s.refs[s->s.h.refidx[0]].f->buf[0] ||
-                !s->s.refs[s->s.h.refidx[1]].f->buf[0] ||
-                !s->s.refs[s->s.h.refidx[2]].f->buf[0]) {
+            if (!s->s.refs[s->s.h.refidx[0]].f ||
+                !s->s.refs[s->s.h.refidx[1]].f ||
+                !s->s.refs[s->s.h.refidx[2]].f) {
                 av_log(avctx, AV_LOG_ERROR, "Not all references are available\n");
                 return AVERROR_INVALIDDATA;
             }
@@ -611,7 +601,8 @@ static int decode_frame_header(AVCodecContext *avctx,
             // Note that in this code, "CUR_FRAME" is actually before we
             // have formally allocated a frame, and thus actually represents
             // the _last_ frame
-            s->s.h.use_last_frame_mvs &= s->s.frames[CUR_FRAME].tf.f->width == w &&
+            s->s.h.use_last_frame_mvs &= s->s.frames[CUR_FRAME].tf.f &&
+                                         s->s.frames[CUR_FRAME].tf.f->width == w &&
                                        s->s.frames[CUR_FRAME].tf.f->height == h;
             if (get_bits1(&s->gb)) // display size
                 skip_bits(&s->gb, 32);
@@ -1240,16 +1231,12 @@ static av_cold int vp9_decode_free(AVCodecContext *avctx)
     VP9Context *s = avctx->priv_data;
     int i;
 
-    for (i = 0; i < 3; i++) {
+    for (int i = 0; i < 3; i++)
         vp9_frame_unref(&s->s.frames[i]);
-        av_frame_free(&s->s.frames[i].tf.f);
-    }
     ff_refstruct_pool_uninit(&s->frame_extradata_pool);
     for (i = 0; i < 8; i++) {
-        ff_thread_release_ext_buffer(&s->s.refs[i]);
-        av_frame_free(&s->s.refs[i].f);
-        ff_thread_release_ext_buffer(&s->next_refs[i]);
-        av_frame_free(&s->next_refs[i].f);
+        ff_progress_frame_unref(&s->s.refs[i]);
+        ff_progress_frame_unref(&s->next_refs[i]);
     }
 
     free_buffers(s);
@@ -1384,7 +1371,7 @@ static int decode_tiles(AVCodecContext *avctx,
             // FIXME maybe we can make this more finegrained by running the
             // loopfilter per-block instead of after each sbrow
             // In fact that would also make intra pred left preparation easier?
-            ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, row >> 3, 0);
+            ff_progress_frame_report(&s->s.frames[CUR_FRAME].tf, row >> 3);
         }
     }
     return 0;
@@ -1561,46 +1548,37 @@ static int vp9_decode_frame(AVCodecContext *avctx, AVFrame *frame,
     int ret, i, j, ref;
     int retain_segmap_ref = s->s.frames[REF_FRAME_SEGMAP].segmentation_map &&
                             (!s->s.h.segmentation.enabled || !s->s.h.segmentation.update_map);
+    const VP9Frame *src;
     AVFrame *f;
 
     if ((ret = decode_frame_header(avctx, data, size, &ref)) < 0) {
         return ret;
     } else if (ret == 0) {
-        if (!s->s.refs[ref].f->buf[0]) {
+        if (!s->s.refs[ref].f) {
             av_log(avctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
             return AVERROR_INVALIDDATA;
         }
+        for (int i = 0; i < 8; i++)
+            ff_progress_frame_replace(&s->next_refs[i], &s->s.refs[i]);
+        ff_thread_finish_setup(avctx);
+        ff_progress_frame_await(&s->s.refs[ref], INT_MAX);
+
         if ((ret = av_frame_ref(frame, s->s.refs[ref].f)) < 0)
             return ret;
         frame->pts     = pkt->pts;
         frame->pkt_dts = pkt->dts;
-        for (i = 0; i < 8; i++) {
-            if (s->next_refs[i].f->buf[0])
-                ff_thread_release_ext_buffer(&s->next_refs[i]);
-            if (s->s.refs[i].f->buf[0] &&
-                (ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i])) < 0)
-                return ret;
-        }
         *got_frame = 1;
         return pkt->size;
     }
     data += ret;
     size -= ret;
 
-    if (!retain_segmap_ref || s->s.h.keyframe || s->s.h.intraonly) {
-        if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0])
-            vp9_frame_unref(&s->s.frames[REF_FRAME_SEGMAP]);
-        if (!s->s.h.keyframe && !s->s.h.intraonly && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
-            (ret = vp9_frame_ref(&s->s.frames[REF_FRAME_SEGMAP], &s->s.frames[CUR_FRAME])) < 0)
-            return ret;
-    }
-    if (s->s.frames[REF_FRAME_MVPAIR].tf.f->buf[0])
-        vp9_frame_unref(&s->s.frames[REF_FRAME_MVPAIR]);
-    if (!s->s.h.intraonly && !s->s.h.keyframe && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
-        (ret = vp9_frame_ref(&s->s.frames[REF_FRAME_MVPAIR], &s->s.frames[CUR_FRAME])) < 0)
-        return ret;
-    if (s->s.frames[CUR_FRAME].tf.f->buf[0])
-        vp9_frame_unref(&s->s.frames[CUR_FRAME]);
+    src = !s->s.h.keyframe && !s->s.h.intraonly && !s->s.h.errorres ?
+              &s->s.frames[CUR_FRAME] : &s->s.frames[BLANK_FRAME];
+    if (!retain_segmap_ref || s->s.h.keyframe || s->s.h.intraonly)
+        vp9_frame_replace(&s->s.frames[REF_FRAME_SEGMAP], src);
+    vp9_frame_replace(&s->s.frames[REF_FRAME_MVPAIR], src);
+    vp9_frame_unref(&s->s.frames[CUR_FRAME]);
     if ((ret = vp9_frame_alloc(avctx, &s->s.frames[CUR_FRAME])) < 0)
         return ret;
     f = s->s.frames[CUR_FRAME].tf.f;
@@ -1610,7 +1588,8 @@ static int vp9_decode_frame(AVCodecContext *avctx, AVFrame *frame,
         f->flags &= ~AV_FRAME_FLAG_KEY;
     f->pict_type = (s->s.h.keyframe || s->s.h.intraonly) ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
 
-    if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0] &&
+    // Non-existent frames have the implicit dimension 0x0 != CUR_FRAME
+    if (!s->s.frames[REF_FRAME_MVPAIR].tf.f ||
         (s->s.frames[REF_FRAME_MVPAIR].tf.f->width  != s->s.frames[CUR_FRAME].tf.f->width ||
          s->s.frames[REF_FRAME_MVPAIR].tf.f->height != s->s.frames[CUR_FRAME].tf.f->height)) {
         vp9_frame_unref(&s->s.frames[REF_FRAME_SEGMAP]);
@@ -1618,15 +1597,9 @@ static int vp9_decode_frame(AVCodecContext *avctx, AVFrame *frame,
 
     // ref frame setup
     for (i = 0; i < 8; i++) {
-        if (s->next_refs[i].f->buf[0])
-            ff_thread_release_ext_buffer(&s->next_refs[i]);
-        if (s->s.h.refreshrefmask & (1 << i)) {
-            ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.frames[CUR_FRAME].tf);
-        } else if (s->s.refs[i].f->buf[0]) {
-            ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i]);
-        }
-        if (ret < 0)
-            return ret;
+        ff_progress_frame_replace(&s->next_refs[i],
+                                   s->s.h.refreshrefmask & (1 << i) ?
+                                       &s->s.frames[CUR_FRAME].tf : &s->s.refs[i]);
     }
 
     if (avctx->hwaccel) {
@@ -1684,7 +1657,7 @@ static int vp9_decode_frame(AVCodecContext *avctx, AVFrame *frame,
 #if HAVE_THREADS
     if (avctx->active_thread_type & FF_THREAD_SLICE) {
         for (i = 0; i < s->sb_rows; i++)
-            atomic_store(&s->entries[i], 0);
+            atomic_init(&s->entries[i], 0);
     }
 #endif
 
@@ -1735,10 +1708,8 @@ static int vp9_decode_frame(AVCodecContext *avctx, AVFrame *frame,
 #endif
         {
             ret = decode_tiles(avctx, data, size);
-            if (ret < 0) {
-                ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
-                return ret;
-            }
+            if (ret < 0)
+                goto fail;
         }
 
         // Sum all counts fields into td[0].counts for tile threading
@@ -1752,28 +1723,24 @@ static int vp9_decode_frame(AVCodecContext *avctx, AVFrame *frame,
             ff_thread_finish_setup(avctx);
         }
     } while (s->pass++ == 1);
-    ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
 
     if (s->td->error_info < 0) {
         av_log(avctx, AV_LOG_ERROR, "Failed to decode tile data\n");
         s->td->error_info = 0;
-        return AVERROR_INVALIDDATA;
+        ret = AVERROR_INVALIDDATA;
+        goto fail;
     }
     if (avctx->export_side_data & AV_CODEC_EXPORT_DATA_VIDEO_ENC_PARAMS) {
         ret = vp9_export_enc_params(s, &s->s.frames[CUR_FRAME]);
         if (ret < 0)
-            return ret;
+            goto fail;
     }
 
 finish:
+    ff_progress_frame_report(&s->s.frames[CUR_FRAME].tf, INT_MAX);
     // ref frame setup
-    for (i = 0; i < 8; i++) {
-        if (s->s.refs[i].f->buf[0])
-            ff_thread_release_ext_buffer(&s->s.refs[i]);
-        if (s->next_refs[i].f->buf[0] &&
-            (ret = ff_thread_ref_frame(&s->s.refs[i], &s->next_refs[i])) < 0)
-            return ret;
-    }
+    for (int i = 0; i < 8; i++)
+        ff_progress_frame_replace(&s->s.refs[i], &s->next_refs[i]);
 
     if (!s->s.h.invisible) {
         if ((ret = av_frame_ref(frame, s->s.frames[CUR_FRAME].tf.f)) < 0)
@@ -1782,6 +1749,9 @@ finish:
     }
 
     return pkt->size;
+fail:
+    ff_progress_frame_report(&s->s.frames[CUR_FRAME].tf, INT_MAX);
+    return ret;
 }
 
 static void vp9_decode_flush(AVCodecContext *avctx)
@@ -1792,7 +1762,7 @@ static void vp9_decode_flush(AVCodecContext *avctx)
     for (i = 0; i < 3; i++)
         vp9_frame_unref(&s->s.frames[i]);
     for (i = 0; i < 8; i++)
-        ff_thread_release_ext_buffer(&s->s.refs[i]);
+        ff_progress_frame_unref(&s->s.refs[i]);
 
     if (FF_HW_HAS_CB(avctx, flush))
         FF_HW_SIMPLE_CALL(avctx, flush);
@@ -1814,42 +1784,18 @@ static av_cold int vp9_decode_init(AVCodecContext *avctx)
     }
 #endif
 
-    for (int i = 0; i < 3; i++) {
-        s->s.frames[i].tf.f = av_frame_alloc();
-        if (!s->s.frames[i].tf.f)
-            return AVERROR(ENOMEM);
-    }
-    for (int i = 0; i < 8; i++) {
-        s->s.refs[i].f      = av_frame_alloc();
-        s->next_refs[i].f   = av_frame_alloc();
-        if (!s->s.refs[i].f || !s->next_refs[i].f)
-            return AVERROR(ENOMEM);
-    }
     return 0;
 }
 
 #if HAVE_THREADS
 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
 {
-    int i, ret;
     VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
 
-    for (i = 0; i < 3; i++) {
-        if (s->s.frames[i].tf.f->buf[0])
-            vp9_frame_unref(&s->s.frames[i]);
-        if (ssrc->s.frames[i].tf.f->buf[0]) {
-            if ((ret = vp9_frame_ref(&s->s.frames[i], &ssrc->s.frames[i])) < 0)
-                return ret;
-        }
-    }
-    for (i = 0; i < 8; i++) {
-        if (s->s.refs[i].f->buf[0])
-            ff_thread_release_ext_buffer(&s->s.refs[i]);
-        if (ssrc->next_refs[i].f->buf[0]) {
-            if ((ret = ff_thread_ref_frame(&s->s.refs[i], &ssrc->next_refs[i])) < 0)
-                return ret;
-        }
-    }
+    for (int i = 0; i < 3; i++)
+        vp9_frame_replace(&s->s.frames[i], &ssrc->s.frames[i]);
+    for (int i = 0; i < 8; i++)
+        ff_progress_frame_replace(&s->s.refs[i], &ssrc->next_refs[i]);
     ff_refstruct_replace(&s->frame_extradata_pool, ssrc->frame_extradata_pool);
     s->frame_extradata_pool_size = ssrc->frame_extradata_pool_size;
 
@@ -1889,7 +1835,7 @@ const FFCodec ff_vp9_decoder = {
     .p.capabilities        = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_SLICE_THREADS,
     .caps_internal         = FF_CODEC_CAP_INIT_CLEANUP |
                              FF_CODEC_CAP_SLICE_THREAD_HAS_MF |
-                             FF_CODEC_CAP_ALLOCATE_PROGRESS,
+                             FF_CODEC_CAP_USES_PROGRESSFRAMES,
     .flush                 = vp9_decode_flush,
     UPDATE_THREAD_CONTEXT(vp9_decode_update_thread_context),
     .p.profiles            = NULL_IF_CONFIG_SMALL(ff_vp9_profiles),
diff --git a/libavcodec/vp9_mc_template.c b/libavcodec/vp9_mc_template.c
index e654c0e5ed..81e4ed59c7 100644
--- a/libavcodec/vp9_mc_template.c
+++ b/libavcodec/vp9_mc_template.c
@@ -36,7 +36,7 @@ static void FN(inter_pred)(VP9TileData *td)
     const VP9Context *s = td->s;
     VP9Block *b = td->b;
     int row = td->row, col = td->col;
-    const ThreadFrame *tref1 = &s->s.refs[s->s.h.refidx[b->ref[0]]], *tref2;
+    const ProgressFrame *tref1 = &s->s.refs[s->s.h.refidx[b->ref[0]]], *tref2;
     const AVFrame *ref1 = tref1->f, *ref2;
     int w1 = ref1->width, h1 = ref1->height, w2, h2;
     ptrdiff_t ls_y = td->y_stride, ls_uv = td->uv_stride;
diff --git a/libavcodec/vp9block.c b/libavcodec/vp9block.c
index 5743f048cc..3a694763ce 100644
--- a/libavcodec/vp9block.c
+++ b/libavcodec/vp9block.c
@@ -22,8 +22,9 @@
  */
 
 #include "libavutil/avassert.h"
+#include "libavutil/frame.h"
 
-#include "threadframe.h"
+#include "progressframe.h"
 #include "vp89_rac.h"
 #include "vp9.h"
 #include "vp9data.h"
@@ -113,7 +114,7 @@ static void decode_mode(VP9TileData *td)
             uint8_t *refsegmap = s->s.frames[REF_FRAME_SEGMAP].segmentation_map;
 
             if (!s->s.frames[REF_FRAME_SEGMAP].uses_2pass)
-                ff_thread_await_progress(&s->s.frames[REF_FRAME_SEGMAP].tf, row >> 3, 0);
+                ff_progress_frame_await(&s->s.frames[REF_FRAME_SEGMAP].tf, row >> 3);
             for (y = 0; y < h4; y++) {
                 int idx_base = (y + row) * 8 * s->sb_cols + col;
                 for (x = 0; x < w4; x++)
diff --git a/libavcodec/vp9dec.h b/libavcodec/vp9dec.h
index 013aac49eb..81dc801052 100644
--- a/libavcodec/vp9dec.h
+++ b/libavcodec/vp9dec.h
@@ -29,8 +29,8 @@
 #include <stdatomic.h>
 
 #include "libavutil/mem_internal.h"
+#include "libavutil/pixfmt.h"
 #include "libavutil/thread.h"
-#include "libavutil/internal.h"
 
 #include "get_bits.h"
 #include "videodsp.h"
@@ -120,7 +120,7 @@ typedef struct VP9Context {
     int w, h;
     enum AVPixelFormat pix_fmt, last_fmt, gf_fmt;
     unsigned sb_cols, sb_rows, rows, cols;
-    ThreadFrame next_refs[8];
+    ProgressFrame next_refs[8];
 
     struct {
         uint8_t lim_lut[64];
@@ -245,7 +245,7 @@ void ff_vp9_decode_block(VP9TileData *td, int row, int col,
                          VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
                          enum BlockLevel bl, enum BlockPartition bp);
 
-void ff_vp9_loopfilter_sb(AVCodecContext *avctx, VP9Filter *lflvl,
+void ff_vp9_loopfilter_sb(struct AVCodecContext *avctx, VP9Filter *lflvl,
                           int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff);
 
 void ff_vp9_intra_recon_8bpp(VP9TileData *td,
diff --git a/libavcodec/vp9dsp.c b/libavcodec/vp9dsp.c
index d8ddf74d4f..967e6e1e1a 100644
--- a/libavcodec/vp9dsp.c
+++ b/libavcodec/vp9dsp.c
@@ -100,6 +100,8 @@ av_cold void ff_vp9dsp_init(VP9DSPContext *dsp, int bpp, int bitexact)
     ff_vp9dsp_init_aarch64(dsp, bpp);
 #elif ARCH_ARM
     ff_vp9dsp_init_arm(dsp, bpp);
+#elif ARCH_RISCV
+    ff_vp9dsp_init_riscv(dsp, bpp, bitexact);
 #elif ARCH_X86
     ff_vp9dsp_init_x86(dsp, bpp, bitexact);
 #elif ARCH_MIPS
diff --git a/libavcodec/vp9dsp.h b/libavcodec/vp9dsp.h
index be0ac0b181..772848e349 100644
--- a/libavcodec/vp9dsp.h
+++ b/libavcodec/vp9dsp.h
@@ -131,6 +131,7 @@ void ff_vp9dsp_init_12(VP9DSPContext *dsp);
 
 void ff_vp9dsp_init_aarch64(VP9DSPContext *dsp, int bpp);
 void ff_vp9dsp_init_arm(VP9DSPContext *dsp, int bpp);
+void ff_vp9dsp_init_riscv(VP9DSPContext *dsp, int bpp, int bitexact);
 void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact);
 void ff_vp9dsp_init_mips(VP9DSPContext *dsp, int bpp);
 void ff_vp9dsp_init_loongarch(VP9DSPContext *dsp, int bpp);
diff --git a/libavcodec/vp9lpf.c b/libavcodec/vp9lpf.c
index 414cede852..afeebebf59 100644
--- a/libavcodec/vp9lpf.c
+++ b/libavcodec/vp9lpf.c
@@ -21,6 +21,7 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "avcodec.h"
 #include "vp9dec.h"
 
 static av_always_inline void filter_plane_cols(VP9Context *s, int col, int ss_h, int ss_v,
diff --git a/libavcodec/vp9mvs.c b/libavcodec/vp9mvs.c
index b93d878d6f..b706d1660f 100644
--- a/libavcodec/vp9mvs.c
+++ b/libavcodec/vp9mvs.c
@@ -21,7 +21,7 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include "threadframe.h"
+#include "progressframe.h"
 #include "vp89_rac.h"
 #include "vp9data.h"
 #include "vp9dec.h"
@@ -175,7 +175,7 @@ static void find_ref_mvs(VP9TileData *td,
         VP9mvrefPair *mv = &s->s.frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
 
         if (!s->s.frames[REF_FRAME_MVPAIR].uses_2pass)
-            ff_thread_await_progress(&s->s.frames[REF_FRAME_MVPAIR].tf, row >> 3, 0);
+            ff_progress_frame_await(&s->s.frames[REF_FRAME_MVPAIR].tf, row >> 3);
         if (mv->ref[0] == ref)
             RETURN_MV(mv->mv[0]);
         else if (mv->ref[1] == ref)
diff --git a/libavcodec/vp9recon.c b/libavcodec/vp9recon.c
index 073c04b47d..ef08ed17c8 100644
--- a/libavcodec/vp9recon.c
+++ b/libavcodec/vp9recon.c
@@ -22,9 +22,10 @@
  */
 
 #include "libavutil/avassert.h"
+#include "libavutil/frame.h"
 #include "libavutil/mem_internal.h"
 
-#include "threadframe.h"
+#include "progressframe.h"
 #include "videodsp.h"
 #include "vp9data.h"
 #include "vp9dec.h"
@@ -298,7 +299,7 @@ void ff_vp9_intra_recon_16bpp(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off
 static av_always_inline void mc_luma_unscaled(VP9TileData *td, const vp9_mc_func (*mc)[2],
                                               uint8_t *dst, ptrdiff_t dst_stride,
                                               const uint8_t *ref, ptrdiff_t ref_stride,
-                                              const ThreadFrame *ref_frame,
+                                              const ProgressFrame *ref_frame,
                                               ptrdiff_t y, ptrdiff_t x, const VP9mv *mv,
                                               int bw, int bh, int w, int h, int bytesperpixel)
 {
@@ -314,7 +315,7 @@ static av_always_inline void mc_luma_unscaled(VP9TileData *td, const vp9_mc_func
     // we use +7 because the last 7 pixels of each sbrow can be changed in
     // the longest loopfilter of the next sbrow
     th = (y + bh + 4 * !!my + 7) >> 6;
-    ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
+    ff_progress_frame_await(ref_frame, FFMAX(th, 0));
     // The arm/aarch64 _hv filters read one more row than what actually is
     // needed, so switch to emulated edge one pixel sooner vertically
     // (!!my * 5) than horizontally (!!mx * 4).
@@ -336,7 +337,7 @@ static av_always_inline void mc_chroma_unscaled(VP9TileData *td, const vp9_mc_fu
                                                 ptrdiff_t dst_stride,
                                                 const uint8_t *ref_u, ptrdiff_t src_stride_u,
                                                 const uint8_t *ref_v, ptrdiff_t src_stride_v,
-                                                const ThreadFrame *ref_frame,
+                                                const ProgressFrame *ref_frame,
                                                 ptrdiff_t y, ptrdiff_t x, const VP9mv *mv,
                                                 int bw, int bh, int w, int h, int bytesperpixel)
 {
@@ -353,7 +354,7 @@ static av_always_inline void mc_chroma_unscaled(VP9TileData *td, const vp9_mc_fu
     // we use +7 because the last 7 pixels of each sbrow can be changed in
     // the longest loopfilter of the next sbrow
     th = (y + bh + 4 * !!my + 7) >> (6 - s->ss_v);
-    ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
+    ff_progress_frame_await(ref_frame, FFMAX(th, 0));
     // The arm/aarch64 _hv filters read one more row than what actually is
     // needed, so switch to emulated edge one pixel sooner vertically
     // (!!my * 5) than horizontally (!!mx * 4).
@@ -407,7 +408,7 @@ static av_always_inline void mc_luma_scaled(VP9TileData *td, vp9_scaled_mc_func
                                             const vp9_mc_func (*mc)[2],
                                             uint8_t *dst, ptrdiff_t dst_stride,
                                             const uint8_t *ref, ptrdiff_t ref_stride,
-                                            const ThreadFrame *ref_frame,
+                                            const ProgressFrame *ref_frame,
                                             ptrdiff_t y, ptrdiff_t x, const VP9mv *in_mv,
                                             int px, int py, int pw, int ph,
                                             int bw, int bh, int w, int h, int bytesperpixel,
@@ -444,7 +445,7 @@ static av_always_inline void mc_luma_scaled(VP9TileData *td, vp9_scaled_mc_func
     // we use +7 because the last 7 pixels of each sbrow can be changed in
     // the longest loopfilter of the next sbrow
     th = (y + refbh_m1 + 4 + 7) >> 6;
-    ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
+    ff_progress_frame_await(ref_frame, FFMAX(th, 0));
     // The arm/aarch64 _hv filters read one more row than what actually is
     // needed, so switch to emulated edge one pixel sooner vertically
     // (y + 5 >= h - refbh_m1) than horizontally (x + 4 >= w - refbw_m1).
@@ -467,7 +468,7 @@ static av_always_inline void mc_chroma_scaled(VP9TileData *td, vp9_scaled_mc_fun
                                               ptrdiff_t dst_stride,
                                               const uint8_t *ref_u, ptrdiff_t src_stride_u,
                                               const uint8_t *ref_v, ptrdiff_t src_stride_v,
-                                              const ThreadFrame *ref_frame,
+                                              const ProgressFrame *ref_frame,
                                               ptrdiff_t y, ptrdiff_t x, const VP9mv *in_mv,
                                               int px, int py, int pw, int ph,
                                               int bw, int bh, int w, int h, int bytesperpixel,
@@ -514,7 +515,7 @@ static av_always_inline void mc_chroma_scaled(VP9TileData *td, vp9_scaled_mc_fun
     // we use +7 because the last 7 pixels of each sbrow can be changed in
     // the longest loopfilter of the next sbrow
     th = (y + refbh_m1 + 4 + 7) >> (6 - s->ss_v);
-    ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
+    ff_progress_frame_await(ref_frame, FFMAX(th, 0));
     // The arm/aarch64 _hv filters read one more row than what actually is
     // needed, so switch to emulated edge one pixel sooner vertically
     // (y + 5 >= h - refbh_m1) than horizontally (x + 4 >= w - refbw_m1).
diff --git a/libavcodec/vp9shared.h b/libavcodec/vp9shared.h
index b445a2a746..8a450c26a6 100644
--- a/libavcodec/vp9shared.h
+++ b/libavcodec/vp9shared.h
@@ -29,8 +29,8 @@
 
 #include "libavutil/mem_internal.h"
 
+#include "progressframe.h"
 #include "vp9.h"
-#include "threadframe.h"
 
 enum BlockPartition {
     PARTITION_NONE,    // [ ] <-.
@@ -63,7 +63,7 @@ typedef struct VP9mvrefPair {
 } VP9mvrefPair;
 
 typedef struct VP9Frame {
-    ThreadFrame tf;
+    ProgressFrame tf;
     void *extradata;               ///< RefStruct reference
     uint8_t *segmentation_map;
     VP9mvrefPair *mv;
@@ -164,11 +164,12 @@ typedef struct VP9BitstreamHeader {
 typedef struct VP9SharedContext {
     VP9BitstreamHeader h;
 
-    ThreadFrame refs[8];
+    ProgressFrame refs[8];
 #define CUR_FRAME 0
 #define REF_FRAME_MVPAIR 1
 #define REF_FRAME_SEGMAP 2
-    VP9Frame frames[3];
+#define BLANK_FRAME 3
+    VP9Frame frames[4];
 } VP9SharedContext;
 
 #endif /* AVCODEC_VP9SHARED_H */
diff --git a/libavcodec/vqcdec.c b/libavcodec/vqcdec.c
index 5c6cab3c1a..bb69844327 100644
--- a/libavcodec/vqcdec.c
+++ b/libavcodec/vqcdec.c
@@ -147,10 +147,13 @@ static int decode_vectors(VqcContext * s, const uint8_t * buf, int size, int wid
     GetBitContext gb;
     uint8_t * vectors = s->vectors;
     uint8_t * vectors_end = s->vectors + (width * height * 3) / 2;
+    int ret;
 
     memset(vectors, 0, 3 * width * height / 2);
 
-    init_get_bits8(&gb, buf, size);
+    ret = init_get_bits8(&gb, buf, size);
+    if (ret < 0)
+        return ret;
 
     for (int i = 0; i < 3 * width * height / 2 / 32; i++) {
         uint8_t * dst = vectors;
diff --git a/libavcodec/vulkan_av1.c b/libavcodec/vulkan_av1.c
index c9e398eaec..a550215e32 100644
--- a/libavcodec/vulkan_av1.c
+++ b/libavcodec/vulkan_av1.c
@@ -76,7 +76,7 @@ static int vk_av1_fill_pict(AVCodecContext *avctx, const AV1Frame **ref_src,
                             StdVideoDecodeAV1ReferenceInfo *vkav1_std_ref,
                             VkVideoDecodeAV1DpbSlotInfoKHR *vkav1_ref, /* Goes in ^ */
                             const AV1Frame *pic, int is_current, int has_grain,
-                            int *saved_order_hints)
+                            const uint8_t *saved_order_hints)
 {
     FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
     AV1VulkanDecodePicture *hp = pic->hwaccel_picture_private;
@@ -97,9 +97,14 @@ static int vk_av1_fill_pict(AVCodecContext *avctx, const AV1Frame **ref_src,
         .RefFrameSignBias = hp->ref_frame_sign_bias_mask,
     };
 
-    if (saved_order_hints)
-        for (int i = 0; i < AV1_TOTAL_REFS_PER_FRAME; i++)
-            vkav1_std_ref->SavedOrderHints[i] = saved_order_hints[i];
+    if (saved_order_hints) {
+        if (dec->quirk_av1_offset)
+            for (int i = 1; i < STD_VIDEO_AV1_TOTAL_REFS_PER_FRAME; i++)
+                vkav1_std_ref->SavedOrderHints[i - 1] = saved_order_hints[i];
+        else
+            for (int i = 0; i < STD_VIDEO_AV1_TOTAL_REFS_PER_FRAME; i++)
+                vkav1_std_ref->SavedOrderHints[i] = saved_order_hints[i];
+    }
 
     *vkav1_ref = (VkVideoDecodeAV1DpbSlotInfoKHR) {
         .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_DPB_SLOT_INFO_KHR,
@@ -242,7 +247,6 @@ static int vk_av1_start_frame(AVCodecContext          *avctx,
 
     const AV1RawFrameHeader *frame_header = s->raw_frame_header;
     const AV1RawFilmGrainParams *film_grain = &s->cur_frame.film_grain;
-    CodedBitstreamAV1Context *cbs_ctx = (CodedBitstreamAV1Context *)(s->cbc->priv_data);
 
     const int apply_grain = !(avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN) &&
                             film_grain->apply_grain;
@@ -272,7 +276,7 @@ static int vk_av1_start_frame(AVCodecContext          *avctx,
 
     ap->ref_frame_sign_bias_mask = 0x0;
     for (int i = 0; i < STD_VIDEO_AV1_TOTAL_REFS_PER_FRAME; i++)
-        ap->ref_frame_sign_bias_mask |= cbs_ctx->ref_frame_sign_bias[i] << i;
+        ap->ref_frame_sign_bias_mask |= pic->ref_frame_sign_bias[i] << i;
 
     for (int i = 0; i < STD_VIDEO_AV1_REFS_PER_FRAME; i++) {
         const int idx = pic->raw_frame_header->ref_frame_idx[i];
@@ -280,7 +284,7 @@ static int vk_av1_start_frame(AVCodecContext          *avctx,
         AV1VulkanDecodePicture *hp = ref_frame->hwaccel_picture_private;
         int found = 0;
 
-        if (ref_frame->f->pict_type == AV_PICTURE_TYPE_NONE)
+        if (!ref_frame->f)
             continue;
 
         for (int j = 0; j < ref_count; j++) {
@@ -294,7 +298,7 @@ static int vk_av1_start_frame(AVCodecContext          *avctx,
 
         err = vk_av1_fill_pict(avctx, &ap->ref_src[ref_count], &vp->ref_slots[ref_count],
                                &vp->refs[ref_count], &ap->std_refs[ref_count], &ap->vkav1_refs[ref_count],
-                               ref_frame, 0, 0, cbs_ctx->ref[idx].saved_order_hints);
+                               ref_frame, 0, 0, ref_frame->order_hints);
         if (err < 0)
             return err;
 
@@ -322,7 +326,7 @@ static int vk_av1_start_frame(AVCodecContext          *avctx,
         const AV1Frame *ref_frame = &s->ref[idx];
         AV1VulkanDecodePicture *hp = ref_frame->hwaccel_picture_private;
 
-        if (ref_frame->f->pict_type == AV_PICTURE_TYPE_NONE)
+        if (!ref_frame->f)
             ap->av1_pic_info.referenceNameSlotIndices[i] = AV1_REF_FRAME_NONE;
         else
             ap->av1_pic_info.referenceNameSlotIndices[i] = hp->frame_id;
@@ -431,7 +435,7 @@ static int vk_av1_start_frame(AVCodecContext          *avctx,
             .render_and_frame_size_different = frame_header->render_and_frame_size_different,
             .allow_screen_content_tools = frame_header->allow_screen_content_tools,
             .is_filter_switchable = frame_header->is_filter_switchable,
-            .force_integer_mv = frame_header->force_integer_mv,
+            .force_integer_mv = pic->force_integer_mv,
             .frame_size_override_flag = frame_header->frame_size_override_flag,
             .buffer_removal_time_present_flag = frame_header->buffer_removal_time_present_flag,
             .allow_intrabc = frame_header->allow_intrabc,
@@ -491,8 +495,20 @@ static int vk_av1_start_frame(AVCodecContext          *avctx,
         }
     }
 
-    for (int i = 0; i < STD_VIDEO_AV1_TOTAL_REFS_PER_FRAME; i++)
+    if (dec->quirk_av1_offset)
+        for (int i = 1; i < STD_VIDEO_AV1_TOTAL_REFS_PER_FRAME; i++)
+            ap->std_pic_info.OrderHints[i - 1] = pic->order_hints[i];
+    else
+        for (int i = 0; i < STD_VIDEO_AV1_TOTAL_REFS_PER_FRAME; i++)
+            ap->std_pic_info.OrderHints[i] = pic->order_hints[i];
+
+    for (int i = 0; i < STD_VIDEO_AV1_TOTAL_REFS_PER_FRAME; i++) {
         ap->loop_filter.loop_filter_ref_deltas[i] = frame_header->loop_filter_ref_deltas[i];
+        ap->global_motion.GmType[i] = s->cur_frame.gm_type[i];
+        for (int j = 0; j < STD_VIDEO_AV1_GLOBAL_MOTION_PARAMS; j++) {
+            ap->global_motion.gm_params[i][j] = s->cur_frame.gm_params[i][j];
+        }
+    }
 
     for (int i = 0; i < STD_VIDEO_AV1_MAX_CDEF_FILTER_STRENGTHS; i++) {
         ap->cdef.cdef_y_pri_strength[i] = frame_header->cdef_y_pri_strength[i];
@@ -501,14 +517,6 @@ static int vk_av1_start_frame(AVCodecContext          *avctx,
         ap->cdef.cdef_uv_sec_strength[i] = frame_header->cdef_uv_sec_strength[i];
     }
 
-    for (int i = 0; i < STD_VIDEO_AV1_NUM_REF_FRAMES; i++) {
-        ap->std_pic_info.OrderHints[i] = frame_header->ref_order_hint[i];
-        ap->global_motion.GmType[i] = s->cur_frame.gm_type[i];
-        for (int j = 0; j < STD_VIDEO_AV1_GLOBAL_MOTION_PARAMS; j++) {
-            ap->global_motion.gm_params[i][j] = s->cur_frame.gm_params[i][j];
-        }
-    }
-
     if (apply_grain) {
         for (int i = 0; i < STD_VIDEO_AV1_MAX_NUM_Y_POINTS; i++) {
             ap->film_grain.point_y_value[i] = film_grain->point_y_value[i];
diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c
index 9c6c2d4efb..d8c75cd0e6 100644
--- a/libavcodec/vulkan_decode.c
+++ b/libavcodec/vulkan_decode.c
@@ -1115,6 +1115,7 @@ int ff_vk_decode_init(AVCodecContext *avctx)
     FFVulkanFunctions *vk;
     const VkVideoProfileInfoKHR *profile;
     const FFVulkanDecodeDescriptor *vk_desc;
+    const VkPhysicalDeviceDriverProperties *driver_props;
 
     VkVideoDecodeH264SessionParametersCreateInfoKHR h264_params = {
         .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_SESSION_PARAMETERS_CREATE_INFO_KHR,
@@ -1276,6 +1277,14 @@ int ff_vk_decode_init(AVCodecContext *avctx)
         return AVERROR_EXTERNAL;
     }
 
+    driver_props = &dec->shared_ctx->s.driver_props;
+    if (driver_props->driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY &&
+        driver_props->conformanceVersion.major == 1 &&
+        driver_props->conformanceVersion.minor == 3 &&
+        driver_props->conformanceVersion.subminor == 8 &&
+        driver_props->conformanceVersion.patch < 3)
+        dec->quirk_av1_offset = 1;
+
     ff_vk_decode_flush(avctx);
 
     av_log(avctx, AV_LOG_VERBOSE, "Vulkan decoder initialization sucessful\n");
diff --git a/libavcodec/vulkan_decode.h b/libavcodec/vulkan_decode.h
index 7ba8b239cb..076af93499 100644
--- a/libavcodec/vulkan_decode.h
+++ b/libavcodec/vulkan_decode.h
@@ -72,6 +72,10 @@ typedef struct FFVulkanDecodeContext {
     int external_fg;   /* Oddity  #2 - hardware can't apply film grain */
     uint32_t frame_id_alloc_mask; /* For AV1 only */
 
+    /* Workaround for NVIDIA drivers tested with CTS version 1.3.8 for AV1.
+     * The tests were incorrect as the OrderHints were offset by 1. */
+    int quirk_av1_offset;
+
     /* Thread-local state below */
     struct HEVCHeaderSet *hevc_headers;
     size_t hevc_headers_size;
diff --git a/libavcodec/vulkan_hevc.c b/libavcodec/vulkan_hevc.c
index df86049d22..4b7b303e4f 100644
--- a/libavcodec/vulkan_hevc.c
+++ b/libavcodec/vulkan_hevc.c
@@ -351,13 +351,15 @@ static void set_sps(const HEVCSPS *sps, int sps_idx,
             pal->PredictorPaletteEntries[i][j] = sps->sps_palette_predictor_initializer[i][j];
 
     for (int i = 0; i < sps->nb_st_rps; i++) {
+        const ShortTermRPS *st_rps = &sps->st_rps[i];
+
         str[i] = (StdVideoH265ShortTermRefPicSet) {
             .flags = (StdVideoH265ShortTermRefPicSetFlags) {
                 .inter_ref_pic_set_prediction_flag = sps->st_rps[i].rps_predict,
                 .delta_rps_sign = sps->st_rps[i].delta_rps_sign,
             },
             .delta_idx_minus1 = sps->st_rps[i].delta_idx - 1,
-            .use_delta_flag = sps->st_rps[i].use_delta_flag,
+            .use_delta_flag = sps->st_rps[i].use_delta,
             .abs_delta_rps_minus1 = sps->st_rps[i].abs_delta_rps - 1,
             .used_by_curr_pic_flag    = 0x0,
             .used_by_curr_pic_s0_flag = 0x0,
@@ -371,17 +373,17 @@ static void set_sps(const HEVCSPS *sps, int sps_idx,
 
         /* NOTE: This is the predicted, and *reordered* version.
          * Probably incorrect, but the spec doesn't say which version to use. */
-        for (int j = 0; j < sps->st_rps[i].num_delta_pocs; j++)
-            str[i].used_by_curr_pic_flag |= sps->st_rps[i].used[j] << j;
+        str[i].used_by_curr_pic_flag = st_rps->used;
 
         for (int j = 0; j < str[i].num_negative_pics; j++) {
-            str[i].delta_poc_s0_minus1[j] = sps->st_rps[i].delta_poc_s0[j] - 1;
-            str[i].used_by_curr_pic_s0_flag |= sps->st_rps[i].used[j] << j;
+            str[i].delta_poc_s0_minus1[j]    = st_rps->delta_poc[j] - (j ? st_rps->delta_poc[j - 1] : 0) - 1;
+            str[i].used_by_curr_pic_s0_flag |= st_rps->used & ((1 << str[i].num_negative_pics) - 1);
         }
 
         for (int j = 0; j < str[i].num_positive_pics; j++) {
-            str[i].delta_poc_s1_minus1[j] = sps->st_rps[i].delta_poc_s1[j] - 1;
-            str[i].used_by_curr_pic_s0_flag |= sps->st_rps[i].used[str[i].num_negative_pics + j] << j;
+            str[i].delta_poc_s1_minus1[j] = st_rps->delta_poc[st_rps->num_negative_pics + j] -
+                                            (j ? st_rps->delta_poc[st_rps->num_negative_pics + j - 1] : 0) - 1;
+            str[i].used_by_curr_pic_s1_flag |= st_rps->used >> str[i].num_negative_pics;
         }
     }
 
diff --git a/libavcodec/vvc/ctu.c b/libavcodec/vvc/ctu.c
index 53f92ca10f..809510b93d 100644
--- a/libavcodec/vvc/ctu.c
+++ b/libavcodec/vvc/ctu.c
@@ -1263,8 +1263,8 @@ static void derive_mmvd(const VVCLocalContext *lc, MvField *mvf, const Mv *mmvd_
         const RefPicList *rpl = sc->rpl;
         const int poc = lc->fc->ps.ph.poc;
         const int diff[] = {
-            poc - rpl[0].list[mvf->ref_idx[0]],
-            poc - rpl[1].list[mvf->ref_idx[1]]
+            poc - rpl[L0].refs[mvf->ref_idx[L0]].poc,
+            poc - rpl[L1].refs[mvf->ref_idx[L1]].poc
         };
         const int sign = FFSIGN(diff[0]) != FFSIGN(diff[1]);
 
@@ -1275,7 +1275,7 @@ static void derive_mmvd(const VVCLocalContext *lc, MvField *mvf, const Mv *mmvd_
             const int i = FFABS(diff[0]) < FFABS(diff[1]);
             const int o = !i;
             mmvd[i] = *mmvd_offset;
-            if (!rpl[0].isLongTerm[mvf->ref_idx[0]] && !rpl[1].isLongTerm[mvf->ref_idx[1]]) {
+            if (!rpl[L0].refs[mvf->ref_idx[L0]].is_lt && !rpl[L1].refs[mvf->ref_idx[L1]].is_lt) {
                 ff_vvc_mv_scale(&mmvd[o], mmvd_offset, diff[i], diff[o]);
             }
             else {
@@ -1689,25 +1689,25 @@ static void derive_dmvr_bdof_flag(const VVCLocalContext *lc, PredictionUnit *pu)
     const VVCPH *ph             = &fc->ps.ph;
     const VVCSH *sh             = &lc->sc->sh;
     const int poc               = ph->poc;
-    const RefPicList *rpl0      = lc->sc->rpl + L0;
-    const RefPicList *rpl1      = lc->sc->rpl + L1;
-    const int8_t *ref_idx       = pu->mi.ref_idx;
     const MotionInfo *mi        = &pu->mi;
+    const int8_t *ref_idx       = mi->ref_idx;
+    const VVCRefPic *rp0        = &lc->sc->rpl[L0].refs[ref_idx[L0]];
+    const VVCRefPic *rp1        = &lc->sc->rpl[L1].refs[ref_idx[L1]];
     const CodingUnit *cu        = lc->cu;
     const PredWeightTable *w    = pps->r->pps_wp_info_in_ph_flag ? &fc->ps.ph.pwt : &sh->pwt;
 
     pu->bdof_flag = 0;
 
     if (mi->pred_flag == PF_BI &&
-        (poc - rpl0->list[ref_idx[L0]] == rpl1->list[ref_idx[L1]] - poc) &&
-        !rpl0->isLongTerm[ref_idx[L0]] && !rpl1->isLongTerm[ref_idx[L1]] &&
+        (poc - rp0->poc == rp1->poc - poc) &&
+        !rp0->is_lt && !rp1->is_lt &&
         !cu->ciip_flag &&
         !mi->bcw_idx &&
-        !w->weight_flag[L0][LUMA][mi->ref_idx[L0]] && !w->weight_flag[L1][LUMA][mi->ref_idx[L1]] &&
-        !w->weight_flag[L0][CHROMA][mi->ref_idx[L0]] && !w->weight_flag[L1][CHROMA][mi->ref_idx[L1]] &&
+        !w->weight_flag[L0][LUMA][ref_idx[L0]] && !w->weight_flag[L1][LUMA][ref_idx[L1]] &&
+        !w->weight_flag[L0][CHROMA][ref_idx[L0]] && !w->weight_flag[L1][CHROMA][ref_idx[L1]] &&
         cu->cb_width >= 8 && cu->cb_height >= 8 &&
-        (cu->cb_width * cu->cb_height >= 128)) {
-        // fixme: for RprConstraintsActiveFlag
+        (cu->cb_width * cu->cb_height >= 128) &&
+        !rp0->is_scaled && !rp1->is_scaled) {
         if (!ph->r->ph_bdof_disabled_flag &&
             mi->motion_model_idc == MOTION_TRANSLATION &&
             !pu->merge_subblock_flag &&
diff --git a/libavcodec/vvc/ctu.h b/libavcodec/vvc/ctu.h
index 4e38ecf54a..a987328d81 100644
--- a/libavcodec/vvc/ctu.h
+++ b/libavcodec/vvc/ctu.h
@@ -46,7 +46,8 @@
 #define MAX_QP                  63
 
 #define MAX_PB_SIZE             128
-#define EDGE_EMU_BUFFER_STRIDE  (MAX_PB_SIZE + 32)
+#define MAX_SCALING_RATIO       8
+#define EDGE_EMU_BUFFER_STRIDE  ((MAX_PB_SIZE + 32) * MAX_SCALING_RATIO)
 
 #define CHROMA_EXTRA_BEFORE     1
 #define CHROMA_EXTRA_AFTER      2
@@ -58,6 +59,8 @@
 #define BILINEAR_EXTRA_AFTER    1
 #define BILINEAR_EXTRA          1
 
+#define SCALED_INT(pos) ((pos) >> 10)
+
 #define MAX_CONTROL_POINTS      3
 
 #define AFFINE_MIN_BLOCK_SIZE   4
@@ -373,15 +376,12 @@ typedef struct VVCLocalContext {
     int     end_of_tiles_x;
     int     end_of_tiles_y;
 
-    /* +7 is for subpixel interpolation, *2 for high bit depths */
-    DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[(MAX_PB_SIZE + 7) * EDGE_EMU_BUFFER_STRIDE * 2];
-    /* The extended size between the new edge emu buffer is abused by SAO */
-    DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer2)[(MAX_PB_SIZE + 7) * EDGE_EMU_BUFFER_STRIDE * 2];
+    /* *2 for high bit depths */
+    DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[EDGE_EMU_BUFFER_STRIDE * EDGE_EMU_BUFFER_STRIDE * 2];
     DECLARE_ALIGNED(32, int16_t, tmp)[MAX_PB_SIZE * MAX_PB_SIZE];
     DECLARE_ALIGNED(32, int16_t, tmp1)[MAX_PB_SIZE * MAX_PB_SIZE];
     DECLARE_ALIGNED(32, int16_t, tmp2)[MAX_PB_SIZE * MAX_PB_SIZE];
-    DECLARE_ALIGNED(32, uint8_t, ciip_tmp1)[MAX_PB_SIZE * MAX_PB_SIZE * 2];
-    DECLARE_ALIGNED(32, uint8_t, ciip_tmp2)[MAX_PB_SIZE * MAX_PB_SIZE * 2];
+    DECLARE_ALIGNED(32, uint8_t, ciip_tmp)[MAX_PB_SIZE * MAX_PB_SIZE * 2];
     DECLARE_ALIGNED(32, uint8_t, sao_buffer)[(MAX_CTU_SIZE + 2 * SAO_PADDING_SIZE) * EDGE_EMU_BUFFER_STRIDE * 2];
     DECLARE_ALIGNED(32, uint8_t, alf_buffer_luma)[(MAX_CTU_SIZE + 2 * ALF_PADDING_SIZE) * EDGE_EMU_BUFFER_STRIDE * 2];
     DECLARE_ALIGNED(32, uint8_t, alf_buffer_chroma)[(MAX_CTU_SIZE + 2 * ALF_PADDING_SIZE) * EDGE_EMU_BUFFER_STRIDE * 2];
diff --git a/libavcodec/vvc/data.c b/libavcodec/vvc/data.c
index ace585b663..a91e20754e 100644
--- a/libavcodec/vvc/data.c
+++ b/libavcodec/vvc/data.c
@@ -1732,7 +1732,7 @@ const uint8_t ff_vvc_alf_aps_class_to_filt_map[25] = {
     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
 };
 
-const int8_t ff_vvc_inter_luma_filters[VVC_INTER_FILTER_TYPES][VVC_INTER_LUMA_FACTS][VVC_INTER_LUMA_TAPS] = {
+const int8_t ff_vvc_inter_luma_filters[VVC_INTER_LUMA_FILTER_TYPES][VVC_INTER_LUMA_FACTS][VVC_INTER_LUMA_TAPS] = {
     {
         //1x, hpelIfIdx == 0, Table 27
         {  0, 0,   0, 64,  0,   0,  0,  0 },
@@ -1774,6 +1774,46 @@ const int8_t ff_vvc_inter_luma_filters[VVC_INTER_FILTER_TYPES][VVC_INTER_LUMA_FA
     },
 
     {
+        //1.5x, Table 28
+        { -1, -5, 17, 42, 17, -5, -1,  0 },
+        {  0, -5, 15, 41, 19, -5, -1,  0 },
+        {  0, -5, 13, 40, 21, -4, -1,  0 },
+        {  0, -5, 11, 39, 24, -4, -2,  1 },
+        {  0, -5,  9, 38, 26, -3, -2,  1 },
+        {  0, -5,  7, 38, 28, -2, -3,  1 },
+        {  1, -5,  5, 36, 30, -1, -3,  1 },
+        {  1, -4,  3, 35, 32,  0, -4,  1 },
+        {  1, -4,  2, 33, 33,  2, -4,  1 },
+        {  1, -4,  0, 32, 35,  3, -4,  1 },
+        {  1, -3, -1, 30, 36,  5, -5,  1 },
+        {  1, -3, -2, 28, 38,  7, -5,  0 },
+        {  1, -2, -3, 26, 38,  9, -5,  0 },
+        {  1, -2, -4, 24, 39, 11, -5,  0 },
+        {  0, -1, -4, 21, 40, 13, -5,  0 },
+        {  0, -1, -5, 19, 41, 15, -5,  0 },
+    },
+
+    {
+        //2x, Table 29
+        { -4,  2, 20, 28, 20,  2, -4,  0 },
+        { -4,  0, 19, 29, 21,  5, -4, -2 },
+        { -4, -1, 18, 29, 22,  6, -4, -2 },
+        { -4, -1, 16, 29, 23,  7, -4, -2 },
+        { -4, -1, 16, 28, 24,  7, -4, -2 },
+        { -4, -1, 14, 28, 25,  8, -4, -2 },
+        { -3, -3, 14, 27, 26,  9, -3, -3 },
+        { -3, -1, 12, 28, 25, 10, -4, -3 },
+        { -3, -3, 11, 27, 27, 11, -3, -3 },
+        { -3, -4, 10, 25, 28, 12, -1, -3 },
+        { -3, -3,  9, 26, 27, 14, -3, -3 },
+        { -2, -4,  8, 25, 28, 14, -1, -4 },
+        { -2, -4,  7, 24, 28, 16, -1, -4 },
+        { -2, -4,  7, 23, 29, 16, -1, -4 },
+        { -2, -4,  6, 22, 29, 18, -1, -4 },
+        { -2, -4,  5, 21, 29, 19,  0, -4 },
+    },
+
+    {
         //1x, affine, Table 30
         {  0, 0,   0, 64,  0,   0,  0,  0 },
         {  0, 1,  -3, 63,  4,  -2,  1,  0 },
@@ -1793,9 +1833,48 @@ const int8_t ff_vvc_inter_luma_filters[VVC_INTER_FILTER_TYPES][VVC_INTER_LUMA_FA
         {  0, 1,  -2,  4, 63,  -3,  1,  0 },
     },
 
+    {
+        //1.5x, affine, Table 31
+        {  0, -6, 17, 42, 17, -5, -1,  0 },
+        {  0, -5, 15, 41, 19, -5, -1,  0 },
+        {  0, -5, 13, 40, 21, -4, -1,  0 },
+        {  0, -5, 11, 39, 24, -4, -1,  0 },
+        {  0, -5,  9, 38, 26, -3, -1,  0 },
+        {  0, -5,  7, 38, 28, -2, -2,  0 },
+        {  0, -4,  5, 36, 30, -1, -2,  0 },
+        {  0, -3,  3, 35, 32,  0, -3,  0 },
+        {  0, -3,  2, 33, 33,  2, -3,  0 },
+        {  0, -3,  0, 32, 35,  3, -3,  0 },
+        {  0, -2, -1, 30, 36,  5, -4,  0 },
+        {  0, -2, -2, 28, 38,  7, -5,  0 },
+        {  0, -1, -3, 26, 38,  9, -5,  0 },
+        {  0, -1, -4, 24, 39, 11, -5,  0 },
+        {  0, -1, -4, 21, 40, 13, -5,  0 },
+        {  0, -1, -5, 19, 41, 15, -5,  0 },
+    },
+
+    {
+        //2x, affine, Table 32
+        {  0, -2, 20, 28, 20,  2, -4,  0 },
+        {  0, -4, 19, 29, 21,  5, -6,  0 },
+        {  0, -5, 18, 29, 22,  6, -6,  0 },
+        {  0, -5, 16, 29, 23,  7, -6,  0 },
+        {  0, -5, 16, 28, 24,  7, -6,  0 },
+        {  0, -5, 14, 28, 25,  8, -6,  0 },
+        {  0, -6, 14, 27, 26,  9, -6,  0 },
+        {  0, -4, 12, 28, 25, 10, -7,  0 },
+        {  0, -6, 11, 27, 27, 11, -6,  0 },
+        {  0, -7, 10, 25, 28, 12, -4,  0 },
+        {  0, -6,  9, 26, 27, 14, -6,  0 },
+        {  0, -6,  8, 25, 28, 14, -5,  0 },
+        {  0, -6,  7, 24, 28, 16, -5,  0 },
+        {  0, -6,  7, 23, 29, 16, -5,  0 },
+        {  0, -6,  6, 22, 29, 18, -5,  0 },
+        {  0, -6,  5, 21, 29, 19, -4,  0 },
+    }
 };
 
-const int8_t ff_vvc_inter_chroma_filters[VVC_INTER_FILTER_TYPES][VVC_INTER_CHROMA_FACTS][VVC_INTER_CHROMA_TAPS] = {
+const int8_t ff_vvc_inter_chroma_filters[VVC_INTER_CHROMA_FILTER_TYPES][VVC_INTER_CHROMA_FACTS][VVC_INTER_CHROMA_TAPS] = {
     {
         //1x, Table 33
         {  0, 64,  0,  0 },
diff --git a/libavcodec/vvc/data.h b/libavcodec/vvc/data.h
index e493b9e0e6..a0512e626b 100644
--- a/libavcodec/vvc/data.h
+++ b/libavcodec/vvc/data.h
@@ -43,15 +43,19 @@ extern const int8_t ff_vvc_lfnst_8x8[4][2][16][48];
 extern const uint8_t ff_vvc_lfnst_tr_set_index[95];
 extern uint8_t ff_vvc_default_scale_m[64 * 64];
 
-#define VVC_INTER_FILTER_TYPES       3
+#define VVC_INTER_LUMA_FILTER_TYPE_AFFINE   4
+
+#define VVC_INTER_LUMA_FILTER_TYPES         7
+#define VVC_INTER_CHROMA_FILTER_TYPES       3
+
 #define VVC_INTER_LUMA_FACTS        16
 #define VVC_INTER_LUMA_TAPS          8
 #define VVC_INTER_CHROMA_FACTS      32
 #define VVC_INTER_CHROMA_TAPS        4
 #define VVC_INTER_LUMA_DMVR_FACTS   16
 #define VVC_INTER_LUMA_DMVR_TAPS     2
-extern const int8_t ff_vvc_inter_luma_filters[VVC_INTER_FILTER_TYPES][VVC_INTER_LUMA_FACTS][VVC_INTER_LUMA_TAPS];
-extern const int8_t ff_vvc_inter_chroma_filters[VVC_INTER_FILTER_TYPES][VVC_INTER_CHROMA_FACTS][VVC_INTER_CHROMA_TAPS];
+extern const int8_t ff_vvc_inter_luma_filters[VVC_INTER_LUMA_FILTER_TYPES][VVC_INTER_LUMA_FACTS][VVC_INTER_LUMA_TAPS];
+extern const int8_t ff_vvc_inter_chroma_filters[VVC_INTER_CHROMA_FILTER_TYPES][VVC_INTER_CHROMA_FACTS][VVC_INTER_CHROMA_TAPS];
 extern const int8_t ff_vvc_inter_luma_dmvr_filters[VVC_INTER_LUMA_DMVR_FACTS][VVC_INTER_LUMA_DMVR_TAPS];
 
 #define VVC_INTRA_LUMA_TYPES         2
diff --git a/libavcodec/vvc/dec.c b/libavcodec/vvc/dec.c
index eb447604fe..e53ad4e607 100644
--- a/libavcodec/vvc/dec.c
+++ b/libavcodec/vvc/dec.c
@@ -403,8 +403,8 @@ static int8_t smvd_find(const VVCFrameContext *fc, const SliceContext *sc, int l
     int8_t idx                    = -1;
     int old_diff                  = -1;
     for (int i = 0; i < rsh->num_ref_idx_active[lx]; i++) {
-        if (!rpl->isLongTerm[i]) {
-            int diff = poc - rpl->list[i];
+        if (!rpl->refs[i].is_lt) {
+            int diff = poc - rpl->refs[i].poc;
             if (find(idx, diff, old_diff)) {
                 idx = i;
                 old_diff = diff;
@@ -497,9 +497,11 @@ static void ep_init_cabac_decoder(SliceContext *sc, const int index,
             skipped++;
         }
         size = end - start;
+        size = av_clip(size, 0, get_bits_left(gb) / 8);
     } else {
         size = get_bits_left(gb) / 8;
     }
+    av_assert0(gb->buffer + get_bits_count(gb) / 8 + size <= gb->buffer_end);
     ff_init_cabac_decoder (&ep->cc, gb->buffer + get_bits_count(gb) / 8, size);
     skip_bits(gb, size * 8);
 }
@@ -512,6 +514,7 @@ static int slice_init_entry_points(SliceContext *sc,
     int nb_eps                = sh->r->num_entry_points + 1;
     int ctu_addr              = 0;
     GetBitContext gb;
+    int ret;
 
     if (sc->nb_eps != nb_eps) {
         eps_free(sc);
@@ -521,7 +524,9 @@ static int slice_init_entry_points(SliceContext *sc,
         sc->nb_eps = nb_eps;
     }
 
-    init_get_bits8(&gb, slice->data, slice->data_size);
+    ret = init_get_bits8(&gb, slice->data, slice->data_size);
+    if (ret < 0)
+        return ret;
     for (int i = 0; i < sc->nb_eps; i++)
     {
         EntryPoint *ep = sc->eps + i;
@@ -558,6 +563,9 @@ static int ref_frame(VVCFrame *dst, const VVCFrame *src)
     if (ret < 0)
         return ret;
 
+    ff_refstruct_replace(&dst->sps, src->sps);
+    ff_refstruct_replace(&dst->pps, src->pps);
+
     ff_refstruct_replace(&dst->progress, src->progress);
 
     ff_refstruct_replace(&dst->tab_dmvr_mvf, src->tab_dmvr_mvf);
@@ -568,6 +576,11 @@ static int ref_frame(VVCFrame *dst, const VVCFrame *src)
 
     dst->poc = src->poc;
     dst->ctb_count = src->ctb_count;
+
+    dst->scaling_win = src->scaling_win;
+    dst->ref_width   = src->ref_width;
+    dst->ref_height  = src->ref_height;
+
     dst->flags = src->flags;
     dst->sequence = src->sequence;
 
@@ -783,6 +796,12 @@ static int decode_nal_unit(VVCContext *s, VVCFrameContext *fc, const H2645NAL *n
 
     s->temporal_id = nal->temporal_id;
 
+    if (nal->nuh_layer_id > 0) {
+        avpriv_report_missing_feature(fc->log_ctx,
+                "Decoding of multilayer bitstreams");
+        return AVERROR_PATCHWELCOME;
+    }
+
     switch (unit->type) {
     case VVC_VPS_NUT:
     case VVC_SPS_NUT:
@@ -885,10 +904,16 @@ static int wait_delayed_frame(VVCContext *s, AVFrame *output, int *got_output)
 
 static int submit_frame(VVCContext *s, VVCFrameContext *fc, AVFrame *output, int *got_output)
 {
-    int ret;
+    int ret = ff_vvc_frame_submit(s, fc);
+
+    if (ret < 0) {
+        ff_vvc_report_frame_finished(fc->ref);
+        return ret;
+    }
+
     s->nb_frames++;
     s->nb_delayed++;
-    ff_vvc_frame_submit(s, fc);
+
     if (s->nb_delayed >= s->nb_fcs) {
         if ((ret = wait_delayed_frame(s, output, got_output)) < 0)
             return ret;
@@ -963,6 +988,8 @@ static av_cold void vvc_decode_flush(AVCodecContext *avctx)
         ff_vvc_flush_dpb(last);
     }
 
+    s->ps.sps_id_used = 0;
+
     s->eos = 1;
 }
 
diff --git a/libavcodec/vvc/dec.h b/libavcodec/vvc/dec.h
index 4dacefc06a..1e0b76f283 100644
--- a/libavcodec/vvc/dec.h
+++ b/libavcodec/vvc/dec.h
@@ -42,10 +42,18 @@
 #define L0                      0
 #define L1                      1
 
+typedef struct VVCRefPic {
+    struct VVCFrame *ref;
+    int poc;
+    int is_lt;                  // is long term reference
+
+    // for RPR
+    int is_scaled;              ///< RprConstraintsActiveFlag
+    int scale[2];               ///< RefPicScale[]
+} VVCRefPic;
+
 typedef struct RefPicList {
-    struct VVCFrame *ref[VVC_MAX_REF_ENTRIES];
-    int list[VVC_MAX_REF_ENTRIES];
-    int isLongTerm[VVC_MAX_REF_ENTRIES];
+    VVCRefPic refs[VVC_MAX_REF_ENTRIES];
     int nb_refs;
 } RefPicList;
 
@@ -53,9 +61,18 @@ typedef struct RefPicListTab {
     RefPicList refPicList[2];
 } RefPicListTab;
 
+typedef struct VVCWindow {
+    int16_t left_offset;
+    int16_t right_offset;
+    int16_t top_offset;
+    int16_t bottom_offset;
+} VVCWindow;
+
 typedef struct VVCFrame {
     struct AVFrame *frame;
 
+    const VVCSPS *sps;                          ///< RefStruct reference
+    const VVCPPS *pps;                          ///< RefStruct reference
     struct MvField *tab_dmvr_mvf;               ///< RefStruct reference
     RefPicListTab **rpl_tab;                    ///< RefStruct reference
     RefPicListTab  *rpl;                        ///< RefStruct reference
@@ -65,6 +82,12 @@ typedef struct VVCFrame {
 
     int poc;
 
+    //for RPR
+    VVCWindow scaling_win;                      ///< pps_scaling_win_left_offset * SubWithC,  pps_scaling_win_right_offset  * SubWithC,
+                                                ///< pps_scaling_win_top_offset  * SubHeigtC, pps_scaling_win_bottom_offset * SubHiehgtC
+    int ref_width;                              ///< CurrPicScalWinWidthL
+    int ref_height;                             ///< CurrPicScalWinHeightL
+
     struct VVCFrame *collocated_ref;
 
     struct FrameProgress *progress;             ///< RefStruct reference
diff --git a/libavcodec/vvc/dsp.h b/libavcodec/vvc/dsp.h
index 9810ac314c..1f14096c41 100644
--- a/libavcodec/vvc/dsp.h
+++ b/libavcodec/vvc/dsp.h
@@ -57,6 +57,19 @@ typedef struct VVCInterDSPContext {
         uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src, ptrdiff_t src_stride, int height,
         int denom, int wx, int ox, const int8_t *hf, const int8_t *vf, int width);
 
+    void (*put_scaled[2 /* luma, chroma */][7 /* log2(width) - 1 */])(
+        int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, int src_height,
+        int x, int y, int dx, int dy, int height, const int8_t *hf, const int8_t *vf, int width);
+
+    void (*put_uni_scaled[2 /* luma, chroma */][7 /* log2(width) - 1 */])(
+        uint8_t *dst, const ptrdiff_t dst_stride, const uint8_t *src, ptrdiff_t src_stride, int src_height,
+        int x, int y, int dx, int dy, int height, const int8_t *hf, const int8_t *vf, int width);
+
+    void (*put_uni_w_scaled[2 /* luma, chroma */][7 /* log2(width) - 1 */])(
+        uint8_t *dst, const ptrdiff_t dst_stride, const uint8_t *src, ptrdiff_t src_stride, int src_height,
+        int x, int y, int dx, int dy, int height, int denom, int wx, int ox, const int8_t *hf, const int8_t *vf,
+        int width);
+
     void (*avg)(uint8_t *dst, ptrdiff_t dst_stride,
         const int16_t *src0, const int16_t *src1, int width, int height);
 
diff --git a/libavcodec/vvc/filter.c b/libavcodec/vvc/filter.c
index 8f44255ce4..7844d34eac 100644
--- a/libavcodec/vvc/filter.c
+++ b/libavcodec/vvc/filter.c
@@ -321,9 +321,9 @@ static int boundary_strength(const VVCLocalContext *lc, const MvField *curr, con
 
     if (curr->pred_flag == PF_BI &&  neigh->pred_flag == PF_BI) {
         // same L0 and L1
-        if (rpl[0].list[curr->ref_idx[0]] == neigh_rpl[0].list[neigh->ref_idx[0]]  &&
-            rpl[0].list[curr->ref_idx[0]] == rpl[1].list[curr->ref_idx[1]] &&
-            neigh_rpl[0].list[neigh->ref_idx[0]] == neigh_rpl[1].list[neigh->ref_idx[1]]) {
+        if (rpl[L0].refs[curr->ref_idx[L0]].poc == neigh_rpl[L0].refs[neigh->ref_idx[L0]].poc  &&
+            rpl[L0].refs[curr->ref_idx[L0]].poc == rpl[L1].refs[curr->ref_idx[L1]].poc &&
+            neigh_rpl[L0].refs[neigh->ref_idx[L0]].poc == neigh_rpl[L1].refs[neigh->ref_idx[L1]].poc) {
             if ((FFABS(neigh->mv[0].x - curr->mv[0].x) >= 8 || FFABS(neigh->mv[0].y - curr->mv[0].y) >= 8 ||
                  FFABS(neigh->mv[1].x - curr->mv[1].x) >= 8 || FFABS(neigh->mv[1].y - curr->mv[1].y) >= 8) &&
                 (FFABS(neigh->mv[1].x - curr->mv[0].x) >= 8 || FFABS(neigh->mv[1].y - curr->mv[0].y) >= 8 ||
@@ -331,15 +331,15 @@ static int boundary_strength(const VVCLocalContext *lc, const MvField *curr, con
                 return 1;
             else
                 return 0;
-        } else if (neigh_rpl[0].list[neigh->ref_idx[0]] == rpl[0].list[curr->ref_idx[0]] &&
-                   neigh_rpl[1].list[neigh->ref_idx[1]] == rpl[1].list[curr->ref_idx[1]]) {
+        } else if (neigh_rpl[L0].refs[neigh->ref_idx[L0]].poc == rpl[L0].refs[curr->ref_idx[L0]].poc &&
+                   neigh_rpl[L1].refs[neigh->ref_idx[L1]].poc == rpl[L1].refs[curr->ref_idx[L1]].poc) {
             if (FFABS(neigh->mv[0].x - curr->mv[0].x) >= 8 || FFABS(neigh->mv[0].y - curr->mv[0].y) >= 8 ||
                 FFABS(neigh->mv[1].x - curr->mv[1].x) >= 8 || FFABS(neigh->mv[1].y - curr->mv[1].y) >= 8)
                 return 1;
             else
                 return 0;
-        } else if (neigh_rpl[1].list[neigh->ref_idx[1]] == rpl[0].list[curr->ref_idx[0]] &&
-                   neigh_rpl[0].list[neigh->ref_idx[0]] == rpl[1].list[curr->ref_idx[1]]) {
+        } else if (neigh_rpl[L1].refs[neigh->ref_idx[L1]].poc == rpl[L0].refs[curr->ref_idx[L0]].poc &&
+                   neigh_rpl[L0].refs[neigh->ref_idx[L0]].poc == rpl[L1].refs[curr->ref_idx[L1]].poc) {
             if (FFABS(neigh->mv[1].x - curr->mv[0].x) >= 8 || FFABS(neigh->mv[1].y - curr->mv[0].y) >= 8 ||
                 FFABS(neigh->mv[0].x - curr->mv[1].x) >= 8 || FFABS(neigh->mv[0].y - curr->mv[1].y) >= 8)
                 return 1;
@@ -354,18 +354,18 @@ static int boundary_strength(const VVCLocalContext *lc, const MvField *curr, con
 
         if (curr->pred_flag & 1) {
             A     = curr->mv[0];
-            ref_A = rpl[0].list[curr->ref_idx[0]];
+            ref_A = rpl[L0].refs[curr->ref_idx[L0]].poc;
         } else {
             A     = curr->mv[1];
-            ref_A = rpl[1].list[curr->ref_idx[1]];
+            ref_A = rpl[L1].refs[curr->ref_idx[L1]].poc;
         }
 
         if (neigh->pred_flag & 1) {
             B     = neigh->mv[0];
-            ref_B = neigh_rpl[0].list[neigh->ref_idx[0]];
+            ref_B = neigh_rpl[L0].refs[neigh->ref_idx[L0]].poc;
         } else {
             B     = neigh->mv[1];
-            ref_B = neigh_rpl[1].list[neigh->ref_idx[1]];
+            ref_B = neigh_rpl[L1].refs[neigh->ref_idx[L1]].poc;
         }
 
         if (ref_A == ref_B) {
diff --git a/libavcodec/vvc/inter.c b/libavcodec/vvc/inter.c
index 4a8d1d866a..e1011b4fa1 100644
--- a/libavcodec/vvc/inter.c
+++ b/libavcodec/vvc/inter.c
@@ -31,33 +31,33 @@
 static const int bcw_w_lut[] = {4, 5, 3, 10, -2};
 
 static void subpic_offset(int *x_off, int *y_off,
-    const VVCSPS *sps, const VVCPPS *pps, const int subpic_idx, const int is_luma)
+    const VVCSPS *sps, const VVCPPS *pps, const int subpic_idx, const int is_chroma)
 {
-    *x_off -= pps->subpic_x[subpic_idx] >> sps->hshift[!is_luma];
-    *y_off -= pps->subpic_y[subpic_idx] >> sps->vshift[!is_luma];
+    *x_off -= pps->subpic_x[subpic_idx] >> sps->hshift[is_chroma];
+    *y_off -= pps->subpic_y[subpic_idx] >> sps->vshift[is_chroma];
 }
 
 static void subpic_width_height(int *pic_width, int *pic_height,
-    const VVCSPS *sps, const VVCPPS *pps, const int subpic_idx, const int is_luma)
+    const VVCSPS *sps, const VVCPPS *pps, const int subpic_idx, const int is_chroma)
 {
-    *pic_width  = pps->subpic_width[subpic_idx]  >> sps->hshift[!is_luma];
-    *pic_height = pps->subpic_height[subpic_idx] >> sps->vshift[!is_luma];
+    *pic_width  = pps->subpic_width[subpic_idx]  >> sps->hshift[is_chroma];
+    *pic_height = pps->subpic_height[subpic_idx] >> sps->vshift[is_chroma];
 }
 
-static int emulated_edge(const VVCLocalContext *lc, uint8_t *dst, const uint8_t **src, ptrdiff_t *src_stride,
-    int x_off, int y_off, const int block_w, const int block_h, const int is_luma)
+static int emulated_edge(const VVCLocalContext *lc, uint8_t *dst, const uint8_t **src, ptrdiff_t *src_stride, const VVCFrame *src_frame,
+    int x_off, int y_off, const int block_w, const int block_h, const int is_chroma)
 {
     const VVCFrameContext *fc = lc->fc;
-    const VVCSPS *sps         = fc->ps.sps;
-    const VVCPPS *pps         = fc->ps.pps;
+    const VVCSPS *sps         = src_frame->sps;
+    const VVCPPS *pps         = src_frame->pps;
     const int subpic_idx      = lc->sc->sh.r->curr_subpic_idx;
-    const int extra_before    = is_luma ? LUMA_EXTRA_BEFORE : CHROMA_EXTRA_BEFORE;
-    const int extra_after     = is_luma ? LUMA_EXTRA_AFTER : CHROMA_EXTRA_AFTER;
-    const int extra           = is_luma ? LUMA_EXTRA : CHROMA_EXTRA;
+    const int extra_before    = is_chroma ? CHROMA_EXTRA_BEFORE : LUMA_EXTRA_BEFORE;
+    const int extra_after     = is_chroma ? CHROMA_EXTRA_AFTER : LUMA_EXTRA_AFTER;
+    const int extra           = is_chroma ? CHROMA_EXTRA : LUMA_EXTRA;
     int pic_width, pic_height;
 
-    subpic_offset(&x_off, &y_off, sps, pps, subpic_idx, is_luma);
-    subpic_width_height(&pic_width, &pic_height, sps, pps, subpic_idx, is_luma);
+    subpic_offset(&x_off, &y_off, sps, pps, subpic_idx, is_chroma);
+    subpic_width_height(&pic_width, &pic_height, sps, pps, subpic_idx, is_chroma);
 
     if (x_off < extra_before || y_off < extra_before ||
         x_off >= pic_width - block_w - extra_after ||
@@ -78,20 +78,20 @@ static int emulated_edge(const VVCLocalContext *lc, uint8_t *dst, const uint8_t
 }
 
 static void emulated_edge_dmvr(const VVCLocalContext *lc, uint8_t *dst, const uint8_t **src, ptrdiff_t *src_stride,
-    int x_sb, int y_sb, int x_off,  int y_off, const int block_w, const int block_h, const int is_luma)
+    int x_sb, int y_sb, int x_off,  int y_off, const int block_w, const int block_h, const int is_chroma)
 {
     const VVCFrameContext *fc = lc->fc;
     const VVCSPS *sps         = fc->ps.sps;
     const VVCPPS *pps         = fc->ps.pps;
     const int subpic_idx      = lc->sc->sh.r->curr_subpic_idx;
-    const int extra_before    = is_luma ? LUMA_EXTRA_BEFORE : CHROMA_EXTRA_BEFORE;
-    const int extra_after     = is_luma ? LUMA_EXTRA_AFTER : CHROMA_EXTRA_AFTER;
-    const int extra           = is_luma ? LUMA_EXTRA : CHROMA_EXTRA;
+    const int extra_before    = is_chroma ? CHROMA_EXTRA_BEFORE : LUMA_EXTRA_BEFORE;
+    const int extra_after     = is_chroma ? CHROMA_EXTRA_AFTER : LUMA_EXTRA_AFTER;
+    const int extra           = is_chroma ? CHROMA_EXTRA : LUMA_EXTRA;
     int pic_width, pic_height;
 
-    subpic_offset(&x_off, &y_off, sps, pps, subpic_idx, is_luma);
-    subpic_offset(&x_sb, &y_sb, sps, pps, subpic_idx, is_luma);
-    subpic_width_height(&pic_width, &pic_height, sps, pps, subpic_idx, is_luma);
+    subpic_offset(&x_off, &y_off, sps, pps, subpic_idx, is_chroma);
+    subpic_offset(&x_sb, &y_sb, sps, pps, subpic_idx, is_chroma);
+    subpic_width_height(&pic_width, &pic_height, sps, pps, subpic_idx, is_chroma);
 
     if (x_off < extra_before || y_off < extra_before ||
         x_off >= pic_width - block_w - extra_after ||
@@ -124,8 +124,8 @@ static void emulated_edge_bilinear(const VVCLocalContext *lc, uint8_t *dst, cons
     const int subpic_idx      = lc->sc->sh.r->curr_subpic_idx;
     int pic_width, pic_height;
 
-    subpic_offset(&x_off, &y_off, sps, pps, subpic_idx, 1);
-    subpic_width_height(&pic_width, &pic_height, sps, pps, subpic_idx, 1);
+    subpic_offset(&x_off, &y_off, sps, pps, subpic_idx, 0);
+    subpic_width_height(&pic_width, &pic_height, sps, pps, subpic_idx, 0);
 
     if (x_off < BILINEAR_EXTRA_BEFORE || y_off < BILINEAR_EXTRA_BEFORE ||
         x_off >= pic_width - block_w - BILINEAR_EXTRA_AFTER ||
@@ -142,20 +142,13 @@ static void emulated_edge_bilinear(const VVCLocalContext *lc, uint8_t *dst, cons
     }
 }
 
+#define MC_EMULATED_EDGE(dst, src, src_stride, x_off, y_off)                                                \
+    emulated_edge(lc, dst, src, src_stride, ref, x_off, y_off, block_w, block_h, is_chroma)
 
-#define EMULATED_EDGE_LUMA(dst, src, src_stride, x_off, y_off)                      \
-    emulated_edge(lc, dst, src, src_stride, x_off, y_off, block_w, block_h, 1)
-
-#define EMULATED_EDGE_CHROMA(dst, src, src_stride, x_off, y_off)                    \
-    emulated_edge(lc, dst, src, src_stride, x_off, y_off, block_w, block_h, 0)
-
-#define EMULATED_EDGE_DMVR_LUMA(dst, src, src_stride, x_sb, y_sb, x_off, y_off)     \
-    emulated_edge_dmvr(lc, dst, src, src_stride, x_sb, y_sb, x_off, y_off, block_w, block_h, 1)
-
-#define EMULATED_EDGE_DMVR_CHROMA(dst, src, src_stride, x_sb, y_sb, x_off, y_off)   \
-    emulated_edge_dmvr(lc, dst, src, src_stride, x_sb, y_sb, x_off, y_off, block_w, block_h, 0)
+#define MC_EMULATED_EDGE_DMVR(dst, src, src_stride, x_sb, y_sb, x_off, y_off)                               \
+    emulated_edge_dmvr(lc, dst, src, src_stride, x_sb, y_sb, x_off, y_off, block_w, block_h, is_chroma)
 
-#define EMULATED_EDGE_BILINEAR(dst, src, src_stride, x_off, y_off)                  \
+#define MC_EMULATED_EDGE_BILINEAR(dst, src, src_stride, x_off, y_off)                                       \
     emulated_edge_bilinear(lc, dst, src, src_stride, x_off, y_off, pred_w, pred_h)
 
 // part of 8.5.6.6 Weighted sample prediction process
@@ -209,117 +202,109 @@ static int derive_weight(int *denom, int *w0, int *w1, int *o0, int *o1,
     return 1;
 }
 
-static void luma_mc(VVCLocalContext *lc, int16_t *dst, const AVFrame *ref, const Mv *mv,
-    int x_off, int y_off, const int block_w, const int block_h)
-{
-    const VVCFrameContext *fc   = lc->fc;
-    const uint8_t *src          = ref->data[0];
-    ptrdiff_t src_stride        = ref->linesize[0];
-    const int idx               = av_log2(block_w) - 1;
-    const int mx                = mv->x & 0xf;
-    const int my                = mv->y & 0xf;
-    const int8_t *hf            = ff_vvc_inter_luma_filters[0][mx];
-    const int8_t *vf            = ff_vvc_inter_luma_filters[0][my];
-
-    x_off += mv->x >> 4;
-    y_off += mv->y >> 4;
-    src   += y_off * src_stride + (x_off * (1 << fc->ps.sps->pixel_shift));
-
-    EMULATED_EDGE_LUMA(lc->edge_emu_buffer, &src, &src_stride, x_off, y_off);
-
-    fc->vvcdsp.inter.put[LUMA][idx][!!my][!!mx](dst, src, src_stride, block_h, hf, vf, block_w);
-}
+#define INTER_FILTER(t, frac)  (is_chroma ? ff_vvc_inter_chroma_filters[t][frac] : ff_vvc_inter_luma_filters[t][frac])
 
-static void chroma_mc(VVCLocalContext *lc, int16_t *dst, const AVFrame *ref, const Mv *mv,
+static void mc(VVCLocalContext *lc, int16_t *dst, const VVCFrame *ref, const Mv *mv,
     int x_off, int y_off, const int block_w, const int block_h, const int c_idx)
 {
     const VVCFrameContext *fc   = lc->fc;
-    const uint8_t *src          = ref->data[c_idx];
-    ptrdiff_t src_stride        = ref->linesize[c_idx];
-    int hs                      = fc->ps.sps->hshift[c_idx];
-    int vs                      = fc->ps.sps->vshift[c_idx];
+    const PredictionUnit *pu    = &lc->cu->pu;
+    const uint8_t *src          = ref->frame->data[c_idx];
+    ptrdiff_t src_stride        = ref->frame->linesize[c_idx];
+    const int is_chroma         = !!c_idx;
+    const int hs                = fc->ps.sps->hshift[c_idx];
+    const int vs                = fc->ps.sps->vshift[c_idx];
     const int idx               = av_log2(block_w) - 1;
-    const intptr_t mx           = av_mod_uintp2(mv->x, 4 + hs) << (1 - hs);
-    const intptr_t my           = av_mod_uintp2(mv->y, 4 + vs) << (1 - vs);
-    const int8_t *hf            = ff_vvc_inter_chroma_filters[0][mx];
-    const int8_t *vf            = ff_vvc_inter_chroma_filters[0][my];
+    const intptr_t mx           = av_mod_uintp2(mv->x, 4 + hs) << (is_chroma - hs);
+    const intptr_t my           = av_mod_uintp2(mv->y, 4 + vs) << (is_chroma - vs);
+    const int hpel_if_idx       = (is_chroma || pu->merge_gpm_flag) ? 0 : pu->mi.hpel_if_idx;
+    const int8_t *hf            = INTER_FILTER(hpel_if_idx, mx);
+    const int8_t *vf            = INTER_FILTER(hpel_if_idx, my);
 
     x_off += mv->x >> (4 + hs);
     y_off += mv->y >> (4 + vs);
     src  += y_off * src_stride + (x_off * (1 << fc->ps.sps->pixel_shift));
 
-    EMULATED_EDGE_CHROMA(lc->edge_emu_buffer, &src, &src_stride, x_off, y_off);
-    fc->vvcdsp.inter.put[CHROMA][idx][!!my][!!mx](dst, src, src_stride, block_h, hf, vf, block_w);
+    MC_EMULATED_EDGE(lc->edge_emu_buffer, &src, &src_stride, x_off, y_off);
+    fc->vvcdsp.inter.put[is_chroma][idx][!!my][!!mx](dst, src, src_stride, block_h, hf, vf, block_w);
 }
 
-static void luma_mc_uni(VVCLocalContext *lc, uint8_t *dst, const ptrdiff_t dst_stride,
-    const AVFrame *ref, const MvField *mvf, int x_off, int y_off, const int block_w, const int block_h,
-    const int hf_idx, const int vf_idx)
+static void mc_uni(VVCLocalContext *lc, uint8_t *dst, const ptrdiff_t dst_stride,
+    const VVCFrame *ref, const MvField *mvf, int x_off, int y_off, const int block_w, const int block_h,
+    const int c_idx)
 {
     const VVCFrameContext *fc   = lc->fc;
+    const PredictionUnit *pu    = &lc->cu->pu;
+    const uint8_t *src          = ref->frame->data[c_idx];
+    ptrdiff_t src_stride        = ref->frame->linesize[c_idx];
     const int lx                = mvf->pred_flag - PF_L0;
-    const Mv *mv                = mvf->mv + lx;
-    const uint8_t *src          = ref->data[0];
-    ptrdiff_t src_stride        = ref->linesize[0];
+    const int hs                = fc->ps.sps->hshift[c_idx];
+    const int vs                = fc->ps.sps->vshift[c_idx];
     const int idx               = av_log2(block_w) - 1;
-    const int mx                = mv->x & 0xf;
-    const int my                = mv->y & 0xf;
-    const int8_t *hf            = ff_vvc_inter_luma_filters[hf_idx][mx];
-    const int8_t *vf            = ff_vvc_inter_luma_filters[vf_idx][my];
+    const Mv *mv                = &mvf->mv[lx];
+    const int is_chroma         = !!c_idx;
+    const intptr_t mx           = av_mod_uintp2(mv->x, 4 + hs) << (is_chroma - hs);
+    const intptr_t my           = av_mod_uintp2(mv->y, 4 + vs) << (is_chroma - vs);
+    const int hpel_if_idx       = is_chroma ? 0 : pu->mi.hpel_if_idx;
+    const int8_t *hf            = INTER_FILTER(hpel_if_idx, mx);
+    const int8_t *vf            = INTER_FILTER(hpel_if_idx, my);
     int denom, wx, ox;
 
-    x_off += mv->x >> 4;
-    y_off += mv->y >> 4;
-    src   += y_off * src_stride + (x_off * (1 << fc->ps.sps->pixel_shift));
-
-    EMULATED_EDGE_LUMA(lc->edge_emu_buffer, &src, &src_stride, x_off, y_off);
+    x_off += mv->x >> (4 + hs);
+    y_off += mv->y >> (4 + vs);
+    src  += y_off * src_stride + (x_off * (1 << fc->ps.sps->pixel_shift));
 
-    if (derive_weight_uni(&denom, &wx, &ox, lc, mvf, LUMA)) {
-        fc->vvcdsp.inter.put_uni_w[LUMA][idx][!!my][!!mx](dst, dst_stride, src, src_stride,
+    MC_EMULATED_EDGE(lc->edge_emu_buffer, &src, &src_stride, x_off, y_off);
+    if (derive_weight_uni(&denom, &wx, &ox, lc, mvf, c_idx)) {
+        fc->vvcdsp.inter.put_uni_w[is_chroma][idx][!!my][!!mx](dst, dst_stride, src, src_stride,
             block_h, denom, wx, ox, hf, vf, block_w);
     } else {
-        fc->vvcdsp.inter.put_uni[LUMA][idx][!!my][!!mx](dst, dst_stride, src, src_stride,
+        fc->vvcdsp.inter.put_uni[is_chroma][idx][!!my][!!mx](dst, dst_stride, src, src_stride,
             block_h, hf, vf, block_w);
     }
 }
 
-static void luma_mc_bi(VVCLocalContext *lc, uint8_t *dst, const ptrdiff_t dst_stride,
-    const AVFrame *ref0, const Mv *mv0, const int x_off, const int y_off, const int block_w, const int block_h,
-    const AVFrame *ref1, const Mv *mv1, const MvField *mvf, const int hf_idx, const int vf_idx,
-    const MvField *orig_mv, const int sb_bdof_flag)
+static void mc_bi(VVCLocalContext *lc, uint8_t *dst, const ptrdiff_t dst_stride,
+    const VVCFrame *ref0, const VVCFrame *ref1, const MvField *mvf, const MvField *orig_mv,
+    const int x_off, const int y_off, const int block_w, const int block_h, const int c_idx,
+    const int sb_bdof_flag)
 {
     const VVCFrameContext *fc   = lc->fc;
     const PredictionUnit *pu    = &lc->cu->pu;
+    const int hs                = fc->ps.sps->hshift[c_idx];
+    const int vs                = fc->ps.sps->vshift[c_idx];
     const int idx               = av_log2(block_w) - 1;
-    const AVFrame *ref[]        = { ref0, ref1 };
+    const VVCFrame *refs[]      = { ref0, ref1 };
     int16_t *tmp[]              = { lc->tmp + sb_bdof_flag * PROF_TEMP_OFFSET, lc->tmp1 + sb_bdof_flag * PROF_TEMP_OFFSET };
     int denom, w0, w1, o0, o1;
-    const int weight_flag       = derive_weight(&denom, &w0, &w1, &o0, &o1, lc, mvf, LUMA, pu->dmvr_flag);
+    const int weight_flag       = derive_weight(&denom, &w0, &w1, &o0, &o1, lc, mvf, c_idx, pu->dmvr_flag);
+    const int is_chroma         = !!c_idx;
+    const int hpel_if_idx       = is_chroma ? 0 : pu->mi.hpel_if_idx;
 
     for (int i = L0; i <= L1; i++) {
         const Mv *mv            = mvf->mv + i;
-        const int mx            = mv->x & 0xf;
-        const int my            = mv->y & 0xf;
-        const int ox            = x_off + (mv->x >> 4);
-        const int oy            = y_off + (mv->y >> 4);
-        ptrdiff_t src_stride    = ref[i]->linesize[0];
-        const uint8_t *src      = ref[i]->data[0] + oy * src_stride + (ox * (1 << fc->ps.sps->pixel_shift));
-        const int8_t *hf        = ff_vvc_inter_luma_filters[hf_idx][mx];
-        const int8_t *vf        = ff_vvc_inter_luma_filters[vf_idx][my];
+        const int mx            = av_mod_uintp2(mv->x, 4 + hs) << (is_chroma - hs);
+        const int my            = av_mod_uintp2(mv->y, 4 + vs) << (is_chroma - vs);
+        const int ox            = x_off + (mv->x >> (4 + hs));
+        const int oy            = y_off + (mv->y >> (4 + vs));
+        const VVCFrame *ref     = refs[i];
+        ptrdiff_t src_stride    = ref->frame->linesize[c_idx];
+        const uint8_t *src      = ref->frame->data[c_idx] + oy * src_stride + (ox * (1 << fc->ps.sps->pixel_shift));
+        const int8_t *hf        = INTER_FILTER(hpel_if_idx, mx);
+        const int8_t *vf        = INTER_FILTER(hpel_if_idx, my);
 
         if (pu->dmvr_flag) {
-            const int x_sb = x_off + (orig_mv->mv[i].x >> 4);
-            const int y_sb = y_off + (orig_mv->mv[i].y >> 4);
+            const int x_sb = x_off + (orig_mv->mv[i].x >> (4 + hs));
+            const int y_sb = y_off + (orig_mv->mv[i].y >> (4 + vs));
 
-            EMULATED_EDGE_DMVR_LUMA(lc->edge_emu_buffer, &src, &src_stride, x_sb, y_sb, ox, oy);
+            MC_EMULATED_EDGE_DMVR(lc->edge_emu_buffer,  &src, &src_stride, x_sb, y_sb, ox, oy);
         } else {
-            EMULATED_EDGE_LUMA(lc->edge_emu_buffer, &src, &src_stride, ox, oy);
+            MC_EMULATED_EDGE(lc->edge_emu_buffer, &src, &src_stride, ox, oy);
         }
-        fc->vvcdsp.inter.put[LUMA][idx][!!my][!!mx](tmp[i], src, src_stride, block_h, hf, vf, block_w);
+        fc->vvcdsp.inter.put[is_chroma][idx][!!my][!!mx](tmp[i],  src, src_stride, block_h, hf, vf, block_w);
         if (sb_bdof_flag)
             fc->vvcdsp.inter.bdof_fetch_samples(tmp[i], src, src_stride, mx, my, block_w, block_h);
     }
-
     if (sb_bdof_flag)
         fc->vvcdsp.inter.apply_bdof(dst, dst_stride, tmp[L0], tmp[L1], block_w, block_h);
     else if (weight_flag)
@@ -328,70 +313,134 @@ static void luma_mc_bi(VVCLocalContext *lc, uint8_t *dst, const ptrdiff_t dst_st
         fc->vvcdsp.inter.avg(dst, dst_stride, tmp[L0], tmp[L1], block_w, block_h);
 }
 
-static void chroma_mc_uni(VVCLocalContext *lc, uint8_t *dst, const ptrdiff_t dst_stride,
-    const uint8_t *src, ptrdiff_t src_stride, int x_off, int y_off,
-    const int block_w, const int block_h, const MvField *mvf, const int c_idx,
-    const int hf_idx, const int vf_idx)
+static const int8_t* inter_filter_scaled(const int scale, const int is_chroma, const int is_affine)
 {
-    const VVCFrameContext *fc   = lc->fc;
-    const int lx                = mvf->pred_flag - PF_L0;
-    const int hs                = fc->ps.sps->hshift[1];
-    const int vs                = fc->ps.sps->vshift[1];
-    const int idx               = av_log2(block_w) - 1;
-    const Mv *mv                = &mvf->mv[lx];
-    const intptr_t mx           = av_mod_uintp2(mv->x, 4 + hs) << (1 - hs);
-    const intptr_t my           = av_mod_uintp2(mv->y, 4 + vs) << (1 - vs);
-    const int8_t *hf            = ff_vvc_inter_chroma_filters[hf_idx][mx];
-    const int8_t *vf            = ff_vvc_inter_chroma_filters[vf_idx][my];
-    int denom, wx, ox;
+#define SCALE_THRESHOLD_1 20480
+#define SCALE_THRESHOLD_2 28672
 
-    x_off += mv->x >> (4 + hs);
-    y_off += mv->y >> (4 + vs);
-    src  += y_off * src_stride + (x_off * (1 << fc->ps.sps->pixel_shift));
+    const int i = (scale > SCALE_THRESHOLD_2) + (scale > SCALE_THRESHOLD_1);
+
+    if (!is_chroma) {
+        if (!is_affine)
+            return &ff_vvc_inter_luma_filters[i + !!i][0][0];   //hpel 1 is not needed for scaled
+        return &ff_vvc_inter_luma_filters[VVC_INTER_LUMA_FILTER_TYPE_AFFINE + i][0][0];
+    }
+
+    return &ff_vvc_inter_chroma_filters[i][0][0];
+}
+#define INTER_FILTER_SCALED(scale) inter_filter_scaled(scale, is_chroma, is_affine)
+
+#define SCALED_CHROMA_ADDIN(scale, collocated_flag) (is_chroma ? (collocated_flag ? 0 : 8 * (scale - (1 << 14))) : 0)
+#define SCALED_REF_SB(off, scaling_off, ref_mv, scale, add, shift) ((((off - (scaling_off << shift)) << (4 + shift)) + ref_mv) * scale + add)
+#define SCALED_REF(ref_sb, offset, shift) (FFSIGN(ref_sb) * ((FFABS(ref_sb) + (128 << is_chroma)) >> (8 + is_chroma)) + (offset << (10 - shift)) + (32 >> is_chroma))
+#define SCALED_STEP(scale) ((scale + 8) >> 4)
+
+static void scaled_ref_pos_and_step(const VVCLocalContext *lc, const VVCRefPic *refp, const Mv *mv, const int x_off, const int y_off, const int c_idx,
+    int *x, int *y, int *dx, int *dy)
+{
+    const VVCFrameContext *fc = lc->fc;
+    const VVCSPS *sps         = fc->ps.sps;
+    const int is_chroma       = !!c_idx;
+    const int hs              = sps->hshift[c_idx];
+    const int vs              = sps->vshift[c_idx];
+    const int left_offset     = fc->ref->scaling_win.left_offset;
+    const int top_offset      = fc->ref->scaling_win.top_offset;
+    const int addx            = SCALED_CHROMA_ADDIN(refp->scale[0], sps->r->sps_chroma_horizontal_collocated_flag);
+    const int addy            = SCALED_CHROMA_ADDIN(refp->scale[1], sps->r->sps_chroma_vertical_collocated_flag);
+    const int refx_sb         = SCALED_REF_SB(x_off, left_offset, mv->x, refp->scale[0], addx, hs);
+    const int refy_sb         = SCALED_REF_SB(y_off, top_offset,  mv->y, refp->scale[1], addy, vs);
+
+    *x  = SCALED_REF(refx_sb, left_offset, hs);
+    *y  = SCALED_REF(refy_sb, top_offset, vs);
+    *dx = SCALED_STEP(refp->scale[0]);
+    *dy = SCALED_STEP(refp->scale[1]);
+}
+
+static void emulated_edge_scaled(VVCLocalContext *lc, const uint8_t **src, ptrdiff_t *src_stride, int *src_height,
+    const VVCFrame *ref, const int x, const int y, const int dx, const int dy,
+    const int block_w, const int block_h, const int is_chroma)
+{
+    const VVCFrameContext *fc = lc->fc;
+    const int x0              = SCALED_INT(x);
+    const int y0              = SCALED_INT(y);
+    const int x_end           = SCALED_INT(x + block_w * dx);
+    const int y_end           = SCALED_INT(y + block_h * dy);
+    const int x_last          = SCALED_INT(x + (block_w - 1) * dx);
+    const int y_last          = SCALED_INT(y + (block_h - 1) * dy);
+    const int src_width       = x_end - x0 + (x_end == x_last);
+
+    *src_height = y_end - y0 + (y_end == y_last);
+
+    *src  += y0 * *src_stride + (x0 * (1 << fc->ps.sps->pixel_shift));
+
+    emulated_edge(lc, lc->edge_emu_buffer, src, src_stride, ref, x0, y0, src_width, *src_height, is_chroma);
+}
+
+static void mc_scaled(VVCLocalContext *lc, int16_t *dst, const VVCRefPic *refp, const Mv *mv,
+    int x_off, int y_off, const int block_w, const int block_h, const int c_idx)
+{
+    const VVCFrameContext *fc = lc->fc;
+    const PredictionUnit *pu  = &lc->cu->pu;
+    const uint8_t *src        = refp->ref->frame->data[c_idx];
+    ptrdiff_t src_stride      = refp->ref->frame->linesize[c_idx];
+    const int is_affine       = pu->inter_affine_flag;
+    const int is_chroma       = !!c_idx;
+    const int idx             = av_log2(block_w) - 1;
+    const int8_t *hf          = INTER_FILTER_SCALED(refp->scale[0]);
+    const int8_t *vf          = INTER_FILTER_SCALED(refp->scale[1]);
+    int x, y, dx, dy, src_height;
+
+    scaled_ref_pos_and_step(lc, refp, mv, x_off, y_off, c_idx, &x, &y, &dx, &dy);
+    emulated_edge_scaled(lc, &src, &src_stride, &src_height, refp->ref, x, y, dx, dy, block_w, block_h, is_chroma);
+    fc->vvcdsp.inter.put_scaled[is_chroma][idx](dst, src, src_stride, src_height, x, y, dx, dy, block_h, hf, vf, block_w);
+}
 
+static void mc_uni_scaled(VVCLocalContext *lc, uint8_t *dst, const ptrdiff_t dst_stride, const VVCRefPic *refp,
+    const MvField *mvf, const int x_off, const int y_off, const int block_w, const int block_h, const int c_idx)
+{
+    const VVCFrameContext *fc = lc->fc;
+    const PredictionUnit *pu  = &lc->cu->pu;
+    const uint8_t *src        = refp->ref->frame->data[c_idx];
+    ptrdiff_t src_stride      = refp->ref->frame->linesize[c_idx];
+    const int lx              = mvf->pred_flag - PF_L0;
+    const Mv *mv              = &mvf->mv[lx];
+    const int is_affine       = pu->inter_affine_flag;
+    const int is_chroma       = !!c_idx;
+    const int idx             = av_log2(block_w) - 1;
+    const int8_t *hf          = INTER_FILTER_SCALED(refp->scale[0]);
+    const int8_t *vf          = INTER_FILTER_SCALED(refp->scale[1]);
+    int denom, wx, ox, x, y, dx, dy, src_height;
+
+    scaled_ref_pos_and_step(lc, refp, mv, x_off, y_off, c_idx, &x, &y, &dx, &dy);
+    emulated_edge_scaled(lc, &src, &src_stride, &src_height, refp->ref, x, y, dx, dy, block_w, block_h, is_chroma);
 
-    EMULATED_EDGE_CHROMA(lc->edge_emu_buffer, &src, &src_stride, x_off, y_off);
     if (derive_weight_uni(&denom, &wx, &ox, lc, mvf, c_idx)) {
-        fc->vvcdsp.inter.put_uni_w[CHROMA][idx][!!my][!!mx](dst, dst_stride, src, src_stride,
-            block_h, denom, wx, ox, hf, vf, block_w);
+        fc->vvcdsp.inter.put_uni_w_scaled[is_chroma][idx](dst, dst_stride, src, src_stride, src_height,
+            x, y, dx, dy, block_h, denom, wx, ox, hf, vf, block_w);
     } else {
-        fc->vvcdsp.inter.put_uni[CHROMA][idx][!!my][!!mx](dst, dst_stride, src, src_stride,
-            block_h, hf, vf, block_w);
+        fc->vvcdsp.inter.put_uni_scaled[is_chroma][idx](dst, dst_stride, src, src_stride, src_height,
+            x, y, dx, dy, block_h, hf, vf, block_w);
     }
 }
 
-static void chroma_mc_bi(VVCLocalContext *lc, uint8_t *dst, const ptrdiff_t dst_stride,
-    const AVFrame *ref0, const AVFrame *ref1, const int x_off, const int y_off,
-    const int block_w, const int block_h,  const MvField *mvf, const int c_idx,
-    const int hf_idx, const int vf_idx, const MvField *orig_mv, const int dmvr_flag, const int ciip_flag)
+static void mc_bi_scaled(VVCLocalContext *lc, uint8_t *dst, const ptrdiff_t dst_stride,
+   const VVCRefPic *refp0, const VVCRefPic *refp1, const MvField *mvf,
+   const int x_off, const int y_off, const int block_w, const int block_h, const int c_idx)
 {
-    const VVCFrameContext *fc   = lc->fc;
-    const int hs                = fc->ps.sps->hshift[1];
-    const int vs                = fc->ps.sps->vshift[1];
-    const int idx               = av_log2(block_w) - 1;
-    const AVFrame *ref[]        = { ref0, ref1 };
-    int16_t *tmp[]              = { lc->tmp, lc->tmp1 };
     int denom, w0, w1, o0, o1;
-    const int weight_flag       = derive_weight(&denom, &w0, &w1, &o0, &o1, lc, mvf, c_idx, dmvr_flag);
+    const VVCFrameContext *fc = lc->fc;
+    const int weight_flag     = derive_weight(&denom, &w0, &w1, &o0, &o1, lc, mvf, c_idx, lc->cu->pu.dmvr_flag);
+    const VVCRefPic *refps[]  = { refp0, refp1 };
+    int16_t *tmp[]            = { lc->tmp, lc->tmp1 };
 
     for (int i = L0; i <= L1; i++) {
-        const Mv *mv            = mvf->mv + i;
-        const int mx            = av_mod_uintp2(mv->x, 4 + hs) << (1 - hs);
-        const int my            = av_mod_uintp2(mv->y, 4 + vs) << (1 - vs);
-        const int ox            = x_off + (mv->x >> (4 + hs));
-        const int oy            = y_off + (mv->y >> (4 + vs));
-        ptrdiff_t src_stride    = ref[i]->linesize[c_idx];
-        const uint8_t *src      = ref[i]->data[c_idx] + oy * src_stride + (ox * (1 << fc->ps.sps->pixel_shift));
-        const int8_t *hf        = ff_vvc_inter_chroma_filters[hf_idx][mx];
-        const int8_t *vf        = ff_vvc_inter_chroma_filters[vf_idx][my];
-        if (dmvr_flag) {
-            const int x_sb = x_off + (orig_mv->mv[i].x >> (4 + hs));
-            const int y_sb = y_off + (orig_mv->mv[i].y >> (4 + vs));
-            EMULATED_EDGE_DMVR_CHROMA(lc->edge_emu_buffer,  &src, &src_stride, x_sb, y_sb, ox, oy);
-        } else {
-            EMULATED_EDGE_CHROMA(lc->edge_emu_buffer, &src, &src_stride, ox, oy);
-        }
-        fc->vvcdsp.inter.put[CHROMA][idx][!!my][!!mx](tmp[i],  src, src_stride, block_h, hf, vf, block_w);
+        const Mv *mv          = mvf->mv + i;
+        const VVCRefPic *refp = refps[i];
+
+        if (refp->is_scaled)
+            mc_scaled(lc, tmp[i], refp, mv, x_off, y_off, block_w, block_h, c_idx);
+        else
+            mc(lc, tmp[i], refp->ref, mv, x_off, y_off, block_w, block_h, c_idx);
     }
     if (weight_flag)
         fc->vvcdsp.inter.w_avg(dst, dst_stride, tmp[L0], tmp[L1], block_w, block_h, denom, w0, w1, o0, o1);
@@ -400,28 +449,29 @@ static void chroma_mc_bi(VVCLocalContext *lc, uint8_t *dst, const ptrdiff_t dst_
 }
 
 static void luma_prof_uni(VVCLocalContext *lc, uint8_t *dst, const ptrdiff_t dst_stride,
-    const AVFrame *ref, const MvField *mvf, int x_off, int y_off, const int block_w, const int block_h,
+    const VVCFrame *ref, const MvField *mvf, int x_off, int y_off, const int block_w, const int block_h,
     const int cb_prof_flag, const int16_t *diff_mv_x, const int16_t *diff_mv_y)
 {
     const VVCFrameContext *fc   = lc->fc;
-    const uint8_t *src          = ref->data[0];
-    ptrdiff_t src_stride        = ref->linesize[0];
+    const uint8_t *src          = ref->frame->data[LUMA];
+    ptrdiff_t src_stride        = ref->frame->linesize[LUMA];
     uint16_t *prof_tmp          = lc->tmp + PROF_TEMP_OFFSET;
     const int idx               = av_log2(block_w) - 1;
     const int lx                = mvf->pred_flag - PF_L0;
     const Mv *mv                = mvf->mv + lx;
     const int mx                = mv->x & 0xf;
     const int my                = mv->y & 0xf;
-    const int8_t *hf            = ff_vvc_inter_luma_filters[2][mx];
-    const int8_t *vf            = ff_vvc_inter_luma_filters[2][my];
+    const int8_t *hf            = ff_vvc_inter_luma_filters[VVC_INTER_LUMA_FILTER_TYPE_AFFINE][mx];
+    const int8_t *vf            = ff_vvc_inter_luma_filters[VVC_INTER_LUMA_FILTER_TYPE_AFFINE][my];
     int denom, wx, ox;
     const int weight_flag       = derive_weight_uni(&denom, &wx, &ox, lc, mvf, LUMA);
+    const int is_chroma         = 0;
 
     x_off += mv->x >> 4;
     y_off += mv->y >> 4;
     src   += y_off * src_stride + (x_off * (1 << fc->ps.sps->pixel_shift));
 
-    EMULATED_EDGE_LUMA(lc->edge_emu_buffer, &src, &src_stride, x_off, y_off);
+    MC_EMULATED_EDGE(lc->edge_emu_buffer, &src, &src_stride, x_off, y_off);
     if (cb_prof_flag) {
         fc->vvcdsp.inter.put[LUMA][idx][!!my][!!mx](prof_tmp, src, src_stride, AFFINE_MIN_BLOCK_SIZE, hf, vf, AFFINE_MIN_BLOCK_SIZE);
         fc->vvcdsp.inter.fetch_samples(prof_tmp, src, src_stride, mx, my);
@@ -437,38 +487,51 @@ static void luma_prof_uni(VVCLocalContext *lc, uint8_t *dst, const ptrdiff_t dst
     }
 }
 
+static void luma_prof(VVCLocalContext *lc, int16_t *dst, const VVCFrame *ref,
+    const Mv *mv , const int x_off, const int y_off, const int block_w, const int block_h, const int lx)
+{
+    const VVCFrameContext *fc = lc->fc;
+    const PredictionUnit *pu  = &lc->cu->pu;
+    const int mx              = mv->x & 0xf;
+    const int my              = mv->y & 0xf;
+    const int ox              = x_off + (mv->x >> 4);
+    const int oy              = y_off + (mv->y >> 4);
+    const int idx             = av_log2(block_w) - 1;
+    const int is_chroma       = 0;
+    uint16_t *prof_tmp        = lc->tmp2 + PROF_TEMP_OFFSET;
+    ptrdiff_t src_stride      = ref->frame->linesize[0];
+    const uint8_t *src        = ref->frame->data[0] + oy * src_stride + (ox * (1 << fc->ps.sps->pixel_shift));
+    const int8_t *hf          = ff_vvc_inter_luma_filters[VVC_INTER_LUMA_FILTER_TYPE_AFFINE][mx];
+    const int8_t *vf          = ff_vvc_inter_luma_filters[VVC_INTER_LUMA_FILTER_TYPE_AFFINE][my];
+
+    MC_EMULATED_EDGE(lc->edge_emu_buffer, &src, &src_stride, ox, oy);
+    if (!pu->cb_prof_flag[lx]) {
+        fc->vvcdsp.inter.put[LUMA][idx][!!my][!!mx](dst, src, src_stride, block_h, hf, vf, block_w);
+    } else {
+        fc->vvcdsp.inter.put[LUMA][idx][!!my][!!mx](prof_tmp, src, src_stride, AFFINE_MIN_BLOCK_SIZE, hf, vf, AFFINE_MIN_BLOCK_SIZE);
+        fc->vvcdsp.inter.fetch_samples(prof_tmp, src, src_stride, mx, my);
+        fc->vvcdsp.inter.apply_prof(dst, prof_tmp, pu->diff_mv_x[lx], pu->diff_mv_y[lx]);
+    }
+}
+
 static void luma_prof_bi(VVCLocalContext *lc, uint8_t *dst, const ptrdiff_t dst_stride,
-    const AVFrame *ref0, const AVFrame *ref1, const MvField *mvf, const int x_off, const int y_off,
+    const VVCRefPic *ref0, const VVCRefPic *ref1, const MvField *mvf, const int x_off, const int y_off,
     const int block_w, const int block_h)
 {
     const VVCFrameContext *fc   = lc->fc;
-    const PredictionUnit *pu    = &lc->cu->pu;
-    const AVFrame *ref[]        = { ref0, ref1 };
+    const VVCRefPic *refps[]    = { ref0, ref1 };
     int16_t *tmp[]              = { lc->tmp, lc->tmp1 };
-    uint16_t *prof_tmp          = lc->tmp2 + PROF_TEMP_OFFSET;
-    const int idx               = av_log2(block_w) - 1;
     int denom, w0, w1, o0, o1;
     const int weight_flag       = derive_weight(&denom, &w0, &w1, &o0, &o1, lc, mvf, LUMA, 0);
 
     for (int i = L0; i <= L1; i++) {
-        const Mv *mv            = mvf->mv + i;
-        const int mx            = mv->x & 0xf;
-        const int my            = mv->y & 0xf;
-        const int ox            = x_off + (mv->x >> 4);
-        const int oy            = y_off + (mv->y >> 4);
-        ptrdiff_t src_stride    = ref[i]->linesize[0];
-        const uint8_t *src      = ref[i]->data[0] + oy * src_stride + (ox * (1 << fc->ps.sps->pixel_shift));
-        const int8_t *hf        = ff_vvc_inter_luma_filters[2][mx];
-        const int8_t *vf        = ff_vvc_inter_luma_filters[2][my];
-
-        EMULATED_EDGE_LUMA(lc->edge_emu_buffer, &src, &src_stride, ox, oy);
-        if (!pu->cb_prof_flag[i]) {
-            fc->vvcdsp.inter.put[LUMA][idx][!!my][!!mx](tmp[i], src, src_stride, block_h, hf, vf, block_w);
-        } else {
-            fc->vvcdsp.inter.put[LUMA][idx][!!my][!!mx](prof_tmp, src, src_stride, AFFINE_MIN_BLOCK_SIZE, hf, vf, AFFINE_MIN_BLOCK_SIZE);
-            fc->vvcdsp.inter.fetch_samples(prof_tmp, src, src_stride, mx, my);
-            fc->vvcdsp.inter.apply_prof(tmp[i], prof_tmp, pu->diff_mv_x[i], pu->diff_mv_y[i]);
-        }
+        const VVCRefPic *refp = refps[i];
+        const Mv *mv          = mvf->mv + i;
+
+        if (refp->is_scaled)
+            mc_scaled(lc, tmp[i], refp, mv, x_off, y_off, block_w, block_h, LUMA);
+        else
+            luma_prof(lc, tmp[i], refp->ref, mv, x_off, y_off, block_w, block_h, i);
     }
 
     if (weight_flag)
@@ -477,15 +540,15 @@ static void luma_prof_bi(VVCLocalContext *lc, uint8_t *dst, const ptrdiff_t dst_
         fc->vvcdsp.inter.avg(dst, dst_stride, tmp[L0], tmp[L1], block_w, block_h);
 }
 
-static int pred_get_refs(const VVCLocalContext *lc, VVCFrame *ref[2],  const MvField *mv)
+static int pred_get_refs(const VVCLocalContext *lc, VVCRefPic *refp[2], const MvField *mv)
 {
-    const RefPicList *rpl = lc->sc->rpl;
+    RefPicList *rpl = lc->sc->rpl;
 
     for (int mask = PF_L0; mask <= PF_L1; mask++) {
         if (mv->pred_flag & mask) {
             const int lx = mask - PF_L0;
-            ref[lx] = rpl[lx].ref[mv->ref_idx[lx]];
-            if (!ref[lx])
+            refp[lx] = rpl[lx].refs + mv->ref_idx[lx];
+            if (!refp[lx]->ref)
                 return AVERROR_INVALIDDATA;
         }
     }
@@ -540,13 +603,13 @@ static void pred_gpm_blk(VVCLocalContext *lc)
         for (int i = 0; i < 2; i++) {
             const MvField *mv = pu->gpm_mv + i;
             const int lx = mv->pred_flag - PF_L0;
-            VVCFrame *ref = lc->sc->rpl[lx].ref[mv->ref_idx[lx]];
-            if (!ref)
+            VVCRefPic *refp = lc->sc->rpl[lx].refs + mv->ref_idx[lx];
+            if (!refp->ref)
                 return;
-            if (c_idx)
-                chroma_mc(lc, tmp[i], ref->frame, mv->mv + lx, x, y, width, height, c_idx);
+            if (refp->is_scaled)
+                mc_scaled(lc, tmp[i], refp, mv->mv + lx, x, y, width, height, c_idx);
             else
-                luma_mc(lc, tmp[i], ref->frame, mv->mv + lx, x, y, width, height);
+                mc(lc, tmp[i], refp->ref, mv->mv + lx, x, y, width, height, c_idx);
         }
         fc->vvcdsp.inter.put_gpm(dst, dst_stride, width, height, tmp[0], tmp[1], weights, step_x, step_y);
     }
@@ -575,99 +638,57 @@ static int ciip_derive_intra_weight(const VVCLocalContext *lc, const int x0, con
     return w;
 }
 
-static void pred_regular_luma(VVCLocalContext *lc, const int hf_idx, const int vf_idx, const MvField *mv,
-    const int x0, const int y0, const int sbw, const int sbh, const MvField *orig_mv, const int sb_bdof_flag)
+static void pred_regular(VVCLocalContext *lc, const MvField *mvf, const MvField *orig_mvf,
+    const int x0, const int y0, const int sbw, const int sbh, const int sb_bdof_flag, const int c_start)
 {
-    const SliceContext *sc          = lc->sc;
-    const VVCFrameContext *fc       = lc->fc;
-    const int ciip_flag             = lc->cu->ciip_flag;
-    uint8_t *dst                    = POS(0, x0, y0);
-    const ptrdiff_t dst_stride      = fc->frame->linesize[0];
-    uint8_t *inter                  = ciip_flag ? (uint8_t *)lc->ciip_tmp1 : dst;
-    const ptrdiff_t inter_stride    = ciip_flag ? (MAX_PB_SIZE * sizeof(uint16_t)) : dst_stride;
-    VVCFrame *ref[2];
-
-    if (pred_get_refs(lc, ref, mv) < 0)
-        return;
-
-    if (mv->pred_flag != PF_BI) {
-        const int lx = mv->pred_flag - PF_L0;
-        luma_mc_uni(lc, inter, inter_stride, ref[lx]->frame,
-            mv, x0, y0, sbw, sbh, hf_idx, vf_idx);
-    } else {
-        luma_mc_bi(lc, inter, inter_stride, ref[0]->frame,
-            &mv->mv[0], x0, y0, sbw, sbh, ref[1]->frame, &mv->mv[1], mv,
-            hf_idx, vf_idx, orig_mv, sb_bdof_flag);
-    }
-
-    if (ciip_flag) {
-        const int intra_weight = ciip_derive_intra_weight(lc, x0, y0, sbw, sbh);
-        fc->vvcdsp.intra.intra_pred(lc, x0, y0, sbw, sbh, 0);
-        if (sc->sh.r->sh_lmcs_used_flag)
-            fc->vvcdsp.lmcs.filter(inter, inter_stride, sbw, sbh, &fc->ps.lmcs.fwd_lut);
-        fc->vvcdsp.inter.put_ciip(dst, dst_stride, sbw, sbh, inter, inter_stride, intra_weight);
-
-    }
-}
+    const VVCFrameContext *fc = lc->fc;
+    const int c_end           = fc->ps.sps->r->sps_chroma_format_idc ? CR : LUMA;
+    VVCRefPic *refp[2];
 
-static void pred_regular_chroma(VVCLocalContext *lc, const MvField *mv,
-    const int x0, const int y0, const int sbw, const int sbh, const MvField *orig_mv, const int dmvr_flag)
-{
-    const VVCFrameContext *fc   = lc->fc;
-    const int hs                = fc->ps.sps->hshift[1];
-    const int vs                = fc->ps.sps->vshift[1];
-    const int x0_c              = x0 >> hs;
-    const int y0_c              = y0 >> vs;
-    const int w_c               = sbw >> hs;
-    const int h_c               = sbh >> vs;
-    const int do_ciip           = lc->cu->ciip_flag && (w_c > 2);
-
-    uint8_t* dst1               = POS(1, x0, y0);
-    uint8_t* dst2               = POS(2, x0, y0);
-    const ptrdiff_t dst1_stride = fc->frame->linesize[1];
-    const ptrdiff_t dst2_stride = fc->frame->linesize[2];
-
-    uint8_t *inter1 = do_ciip ? (uint8_t *)lc->ciip_tmp1 : dst1;
-    const ptrdiff_t inter1_stride = do_ciip ? (MAX_PB_SIZE * sizeof(uint16_t)) : dst1_stride;
-
-    uint8_t *inter2 = do_ciip ? (uint8_t *)lc->ciip_tmp2 : dst2;
-    const ptrdiff_t inter2_stride = do_ciip ? (MAX_PB_SIZE * sizeof(uint16_t)) : dst2_stride;
-
-    //fix me
-    const int hf_idx = 0;
-    const int vf_idx = 0;
-    VVCFrame *ref[2];
-
-    if (pred_get_refs(lc, ref, mv) < 0)
+    if (pred_get_refs(lc, refp, mvf) < 0)
         return;
 
-    if (mv->pred_flag != PF_BI) {
-        const int lx = mv->pred_flag - PF_L0;
-        if (!ref[lx])
-            return;
-
-        chroma_mc_uni(lc, inter1, inter1_stride, ref[lx]->frame->data[1], ref[lx]->frame->linesize[1],
-            x0_c, y0_c, w_c, h_c, mv, CB, hf_idx, vf_idx);
-        chroma_mc_uni(lc, inter2, inter2_stride, ref[lx]->frame->data[2], ref[lx]->frame->linesize[2],
-            x0_c, y0_c, w_c, h_c, mv, CR, hf_idx, vf_idx);
-    } else {
-        if (!ref[0] || !ref[1])
-            return;
-
-        chroma_mc_bi(lc, inter1, inter1_stride, ref[0]->frame, ref[1]->frame,
-            x0_c, y0_c, w_c, h_c, mv, CB, hf_idx, vf_idx, orig_mv, dmvr_flag, lc->cu->ciip_flag);
-
-        chroma_mc_bi(lc, inter2, inter2_stride, ref[0]->frame, ref[1]->frame,
-            x0_c, y0_c, w_c, h_c, mv, CR, hf_idx, vf_idx, orig_mv, dmvr_flag, lc->cu->ciip_flag);
-
-    }
-    if (do_ciip) {
-        const int intra_weight = ciip_derive_intra_weight(lc, x0, y0, sbw, sbh);
-        fc->vvcdsp.intra.intra_pred(lc, x0, y0, sbw, sbh, 1);
-        fc->vvcdsp.intra.intra_pred(lc, x0, y0, sbw, sbh, 2);
-        fc->vvcdsp.inter.put_ciip(dst1, dst1_stride, w_c, h_c, inter1, inter1_stride, intra_weight);
-        fc->vvcdsp.inter.put_ciip(dst2, dst2_stride, w_c, h_c, inter2, inter2_stride, intra_weight);
-
+    for (int c_idx = c_start; c_idx <= c_end; c_idx++) {
+        uint8_t *dst                 = POS(c_idx, x0, y0);
+        const ptrdiff_t dst_stride   = fc->frame->linesize[c_idx];
+        const int hs                 = fc->ps.sps->hshift[c_idx];
+        const int vs                 = fc->ps.sps->vshift[c_idx];
+        const int x                  = x0 >> hs;
+        const int y                  = y0 >> vs;
+        const int w                  = sbw >> hs;
+        const int h                  = sbh >> vs;
+        const int is_luma            = !c_idx;
+        const int do_ciip            = lc->cu->ciip_flag && (is_luma || (w > 2));
+        uint8_t *inter               = do_ciip ? (uint8_t *)lc->ciip_tmp : dst;
+        const ptrdiff_t inter_stride = do_ciip ? (MAX_PB_SIZE * sizeof(uint16_t)) : dst_stride;
+        const int do_bdof            = is_luma && sb_bdof_flag;
+
+        if (mvf->pred_flag != PF_BI) {
+            const int lx = mvf->pred_flag - PF_L0;
+
+            if (refp[lx]->is_scaled) {
+                mc_uni_scaled(lc, inter, inter_stride, refp[lx], mvf,
+                    x, y, w, h, c_idx);
+            } else {
+                mc_uni(lc, inter, inter_stride, refp[lx]->ref, mvf,
+                    x, y, w, h, c_idx);
+            }
+        } else {
+            if (refp[L0]->is_scaled || refp[L1]->is_scaled) {
+                mc_bi_scaled(lc, inter, inter_stride, refp[L0], refp[L1], mvf,
+                    x, y, w, h, c_idx);
+            } else {
+                mc_bi(lc, inter, inter_stride, refp[L0]->ref, refp[L1]->ref, mvf, orig_mvf,
+                    x, y, w, h, c_idx, do_bdof);
+            }
+        }
+        if (do_ciip) {
+            const int intra_weight = ciip_derive_intra_weight(lc, x0, y0, sbw, sbh);
+            fc->vvcdsp.intra.intra_pred(lc, x0, y0, sbw, sbh, c_idx);
+            if (!c_idx && lc->sc->sh.r->sh_lmcs_used_flag)
+                fc->vvcdsp.lmcs.filter(inter, inter_stride, w, h, &fc->ps.lmcs.fwd_lut);
+            fc->vvcdsp.inter.put_ciip(dst, dst_stride, w, h, inter, inter_stride, intra_weight);
+        }
     }
 }
 
@@ -738,7 +759,7 @@ static void dmvr_mv_refine(VVCLocalContext *lc, MvField *mvf, MvField *orig_mv,
         const int oy            = y_off + (mv->y >> 4) - sr_range;
         ptrdiff_t src_stride    = ref[i]->linesize[LUMA];
         const uint8_t *src      = ref[i]->data[LUMA] + oy * src_stride + (ox * (1 << fc->ps.sps->pixel_shift));
-        EMULATED_EDGE_BILINEAR(lc->edge_emu_buffer, &src, &src_stride, ox, oy);
+        MC_EMULATED_EDGE_BILINEAR(lc->edge_emu_buffer, &src, &src_stride, ox, oy);
         fc->vvcdsp.inter.dmvr[!!my][!!mx](tmp[i], src, src_stride, pred_h, mx, my, pred_w);
     }
 
@@ -804,17 +825,16 @@ static void derive_sb_mv(VVCLocalContext *lc, MvField *mv, MvField *orig_mv, int
     if (pu->bdof_flag)
         *sb_bdof_flag = 1;
     if (pu->dmvr_flag) {
-        VVCFrame* ref[2];
-        if (pred_get_refs(lc, ref, mv) < 0)
+        VVCRefPic *refp[2];
+        if (pred_get_refs(lc, refp, mv) < 0)
             return;
-        dmvr_mv_refine(lc, mv, orig_mv, sb_bdof_flag, ref[0]->frame, ref[1]->frame, x0, y0, sbw, sbh);
+        dmvr_mv_refine(lc, mv, orig_mv, sb_bdof_flag, refp[L0]->ref->frame, refp[L1]->ref->frame, x0, y0, sbw, sbh);
         set_dmvr_info(fc, x0, y0, sbw, sbh, mv);
     }
 }
 
 static void pred_regular_blk(VVCLocalContext *lc, const int skip_ciip)
 {
-    const VVCFrameContext *fc   = lc->fc;
     const CodingUnit *cu        = lc->cu;
     PredictionUnit *pu          = &lc->cu->pu;
     const MotionInfo *mi        = &pu->mi;
@@ -836,9 +856,7 @@ static void pred_regular_blk(VVCLocalContext *lc, const int skip_ciip)
                 ff_vvc_set_neighbour_available(lc, x0, y0, sbw, sbh);
 
             derive_sb_mv(lc, &mv, &orig_mv, &sb_bdof_flag, x0, y0, sbw, sbh);
-            pred_regular_luma(lc, mi->hpel_if_idx, mi->hpel_if_idx, &mv, x0, y0, sbw, sbh, &orig_mv, sb_bdof_flag);
-            if (fc->ps.sps->r->sps_chroma_format_idc)
-                pred_regular_chroma(lc, &mv, x0, y0, sbw, sbh, &orig_mv, pu->dmvr_flag);
+            pred_regular(lc, &mv, &orig_mv, x0, y0, sbw, sbh, sb_bdof_flag, LUMA);
         }
     }
 }
@@ -873,6 +891,7 @@ static void pred_affine_blk(VVCLocalContext *lc)
     const int sbh  = cu->cb_height / mi->num_sb_y;
     const int hs = fc->ps.sps->hshift[1];
     const int vs = fc->ps.sps->vshift[1];
+    const int dst_stride = fc->frame->linesize[LUMA];
 
     for (int sby = 0; sby < mi->num_sb_y; sby++) {
         for (int sbx = 0; sbx < mi->num_sb_x; sbx++) {
@@ -881,26 +900,29 @@ static void pred_affine_blk(VVCLocalContext *lc)
 
             uint8_t *dst0 = POS(0, x, y);
             const MvField *mv = ff_vvc_get_mvf(fc, x, y);
-            VVCFrame *ref[2];
+            VVCRefPic *refp[2];
 
-            if (pred_get_refs(lc, ref, mv) < 0)
+            if (pred_get_refs(lc, refp, mv) < 0)
                 return;
 
             if (mi->pred_flag != PF_BI) {
                 const int lx = mi->pred_flag - PF_L0;
-                luma_prof_uni(lc, dst0, fc->frame->linesize[0], ref[lx]->frame,
-                    mv, x, y, sbw, sbh, pu->cb_prof_flag[lx],
-                    pu->diff_mv_x[lx], pu->diff_mv_y[lx]);
+                if (refp[lx]->is_scaled) {
+                    mc_uni_scaled(lc, dst0, dst_stride, refp[lx], mv, x, y, sbw, sbh, LUMA);
+                } else {
+                    luma_prof_uni(lc, dst0, dst_stride, refp[lx]->ref,
+                        mv, x, y, sbw, sbh, pu->cb_prof_flag[lx],
+                        pu->diff_mv_x[lx], pu->diff_mv_y[lx]);
+                }
             } else {
-                luma_prof_bi(lc, dst0, fc->frame->linesize[0], ref[0]->frame, ref[1]->frame,
-                    mv, x, y, sbw, sbh);
+                luma_prof_bi(lc, dst0, dst_stride, refp[L0], refp[L1], mv, x, y, sbw, sbh);
             }
             if (fc->ps.sps->r->sps_chroma_format_idc) {
                 if (!av_mod_uintp2(sby, vs) && !av_mod_uintp2(sbx, hs)) {
                     MvField mvc;
-                    derive_affine_mvc(&mvc, fc, mv, x, y, sbw, sbh);
-                    pred_regular_chroma(lc, &mvc, x, y, sbw<<hs, sbh<<vs, NULL, 0);
 
+                    derive_affine_mvc(&mvc, fc, mv, x, y, sbw, sbh);
+                    pred_regular(lc, &mvc, NULL, x, y, sbw << hs, sbh << vs, 0, CB);
                 }
             }
 
diff --git a/libavcodec/vvc/inter_template.c b/libavcodec/vvc/inter_template.c
index e2fbfd4fc0..a8068f4ba8 100644
--- a/libavcodec/vvc/inter_template.c
+++ b/libavcodec/vvc/inter_template.c
@@ -22,6 +22,165 @@
 
 #include "libavcodec/h26x/h2656_inter_template.c"
 
+#define TMP_STRIDE EDGE_EMU_BUFFER_STRIDE
+static void av_always_inline FUNC(put_scaled)(uint8_t *_dst, const ptrdiff_t _dst_stride,
+    const uint8_t *const _src, ptrdiff_t _src_stride, const int src_height,
+    const int _x, const int _y, const int dx, const int dy,
+    const int height, const int8_t *hf, const int8_t *vf, const int width, const int is_uni, const int is_chroma)
+{
+    int16_t tmp_array[TMP_STRIDE * MAX_PB_SIZE];
+    int16_t *tmp                 = tmp_array;
+    pixel *dst                   = (pixel*)_dst;
+    int16_t *dst16               = (int16_t*)_dst;
+    const ptrdiff_t dst_stride   = _dst_stride / sizeof(pixel);
+    const ptrdiff_t src_stride   = _src_stride / sizeof(pixel);
+    const int shift              = FFMAX(2, 14 - BIT_DEPTH);
+    const int offset             = 1 << (shift - 1);
+    const int taps               = is_chroma ? VVC_INTER_CHROMA_TAPS : VVC_INTER_LUMA_TAPS;
+    const int extra              = is_chroma ? CHROMA_EXTRA : LUMA_EXTRA;
+    const int extra_before       = is_chroma ? CHROMA_EXTRA_BEFORE : LUMA_EXTRA_BEFORE;
+    const int shift1             = 6 - is_chroma;
+    const int shift2             = 4 + is_chroma;
+    const int x0                 = SCALED_INT(_x);
+    const int y0                 = SCALED_INT(_y);
+
+    for (int i = 0; i < width; i++) {
+        const int tx         = _x + dx * i;
+        const int x          = SCALED_INT(tx) - x0;
+        const int mx         = av_mod_uintp2(tx >> shift1, shift2);
+        const int8_t *filter = hf + mx * taps;
+        const pixel *src     = (pixel*)_src - extra_before * src_stride;
+
+        for (int j = 0; j < src_height + extra; j++) {
+            tmp[j] = (is_chroma ? CHROMA_FILTER(src, 1) : LUMA_FILTER(src, 1)) >> (BIT_DEPTH - 8);
+            src += src_stride;
+        }
+        tmp += TMP_STRIDE;
+    }
+
+    for (int i = 0; i < height; i++) {
+        const int ty         = _y + dy * i;
+        const int x          = SCALED_INT(ty) - y0;
+        const int mx         = av_mod_uintp2(ty >> shift1, shift2);
+        const int8_t *filter = vf + mx * taps;
+
+        tmp = tmp_array + extra_before;
+        for (int j = 0; j < width; j++) {
+            const int val = (is_chroma ? CHROMA_FILTER(tmp, 1) : LUMA_FILTER(tmp, 1)) >> 6;
+            if (is_uni)
+                dst[j] = av_clip_pixel((val  + offset) >> shift);
+            else
+                dst16[j] = val;
+            tmp += TMP_STRIDE;
+        }
+        if (is_uni)
+            dst += dst_stride;
+        else
+            dst16 += dst_stride;
+    }
+}
+
+static void FUNC(put_luma_scaled)(int16_t *_dst,
+    const uint8_t *_src, ptrdiff_t _src_stride, const int src_height,
+    const int x, const int y, const int dx, const int dy,
+    const int height, const int8_t *hf, const int8_t *vf, const int width)
+{
+    FUNC(put_scaled)((uint8_t *)_dst, MAX_PB_SIZE * sizeof(pixel), _src, _src_stride, src_height, x, y, dx, dy, height, hf, vf, width, 0, 0);
+}
+
+static void FUNC(put_chroma_scaled)(int16_t *_dst,
+    const uint8_t *_src, ptrdiff_t _src_stride, const int src_height,
+    const int x, const int y, const int dx, const int dy,
+    const int height, const int8_t *hf, const int8_t *vf, const int width)
+{
+    FUNC(put_scaled)((uint8_t *)_dst, MAX_PB_SIZE * sizeof(pixel), _src, _src_stride, src_height, x, y, dx, dy, height, hf, vf, width, 0, 1);
+}
+
+static void FUNC(put_uni_luma_scaled)(uint8_t *_dst, const ptrdiff_t _dst_stride,
+    const uint8_t *_src, ptrdiff_t _src_stride, const int src_height,
+    const int x, const int y, const int dx, const int dy,
+    const int height, const int8_t *hf, const int8_t *vf, const int width)
+{
+    FUNC(put_scaled)(_dst, _dst_stride, _src, _src_stride, src_height, x, y, dx, dy, height, hf, vf, width, 1, 0);
+}
+
+static void FUNC(put_uni_chroma_scaled)(uint8_t *_dst, const ptrdiff_t _dst_stride,
+    const uint8_t *_src, ptrdiff_t _src_stride, const int src_height,
+    const int x, const int y, const int dx, const int dy,
+    const int height, const int8_t *hf, const int8_t *vf, const int width)
+{
+    FUNC(put_scaled)(_dst, _dst_stride, _src, _src_stride, src_height, x, y, dx, dy, height, hf, vf, width, 1, 1);
+}
+
+static void av_always_inline FUNC(put_uni_w_scaled)(uint8_t *_dst, const ptrdiff_t _dst_stride,
+    const uint8_t *const _src, ptrdiff_t _src_stride, const int src_height,
+    const int _x, const int _y, const int dx, const int dy, const int denom, const int wx, const int _ox,
+    const int height, const int8_t *hf, const int8_t *vf, const int width, const int is_chroma)
+{
+    int16_t tmp_array[TMP_STRIDE * MAX_PB_SIZE];
+    int16_t *tmp                 = tmp_array;
+    pixel *dst                   = (pixel*)_dst;
+    const ptrdiff_t dst_stride   = _dst_stride / sizeof(pixel);
+    const ptrdiff_t src_stride   = _src_stride / sizeof(pixel);
+    const int shift              = FFMAX(2, 14 - BIT_DEPTH);
+    const int offset             = 1 << (shift - 1);
+    const int ox                 = _ox * (1 << (BIT_DEPTH - 8));
+    const int taps               = is_chroma ? VVC_INTER_CHROMA_TAPS : VVC_INTER_LUMA_TAPS;
+    const int extra              = is_chroma ? CHROMA_EXTRA : LUMA_EXTRA;
+    const int extra_before       = is_chroma ? CHROMA_EXTRA_BEFORE : LUMA_EXTRA_BEFORE;
+    const int shift1             = 6 - is_chroma;
+    const int shift2             = 4 + is_chroma;
+    const int x0                 = SCALED_INT(_x);
+    const int y0                 = SCALED_INT(_y);
+
+    for (int i = 0; i < width; i++) {
+        const int tx         = _x + dx * i;
+        const int x          = SCALED_INT(tx) - x0;
+        const int mx         = av_mod_uintp2(tx >> shift1, shift2);
+        const int8_t *filter = hf + mx * taps;
+        const pixel *src     = (pixel*)_src - extra_before * src_stride;
+
+        for (int j = 0; j < src_height + extra; j++) {
+            tmp[j] = (is_chroma ? CHROMA_FILTER(src, 1) : LUMA_FILTER(src, 1)) >> (BIT_DEPTH - 8);
+            src += src_stride;
+        }
+        tmp += TMP_STRIDE;
+    }
+
+    for (int i = 0; i < height; i++) {
+        const int ty         = _y + dy * i;
+        const int x          = SCALED_INT(ty) - y0;
+        const int mx         = av_mod_uintp2(ty >> shift1, shift2);
+        const int8_t *filter = vf + mx * taps;
+
+        tmp = tmp_array + extra_before;
+        for (int j = 0; j < width; j++) {
+            const int val = (is_chroma ? CHROMA_FILTER(tmp, 1) : LUMA_FILTER(tmp, 1)) >> 6;
+            dst[j] = av_clip_pixel(((wx * val  + offset) >> shift) + ox);
+            tmp += TMP_STRIDE;
+        }
+        dst += dst_stride;
+    }
+}
+
+static void FUNC(put_uni_luma_w_scaled)(uint8_t *_dst, const ptrdiff_t _dst_stride,
+    const uint8_t *_src, ptrdiff_t _src_stride, const int src_height,
+    const int x, const int y, const int dx, const int dy, const int denom, const int wx, const int ox,
+    const int height, const int8_t *hf, const int8_t *vf, const int width)
+{
+    FUNC(put_uni_w_scaled)(_dst, _dst_stride, _src, _src_stride, src_height, x, y, dx, dy, denom, wx, ox, height, hf, vf, width, 0);
+}
+
+static void FUNC(put_uni_chroma_w_scaled)(uint8_t *_dst, const ptrdiff_t _dst_stride,
+    const uint8_t *_src, ptrdiff_t _src_stride, const int src_height,
+    const int x, const int y, const int dx, const int dy, const int denom, const int wx, const int ox,
+    const int height, const int8_t *hf, const int8_t *vf, const int width)
+{
+    FUNC(put_uni_w_scaled)(_dst, _dst_stride, _src, _src_stride, src_height, x, y, dx, dy,  denom, wx, ox, height, hf, vf, width, 1);
+}
+
+#undef TMP_STRIDE
+
 static void FUNC(avg)(uint8_t *_dst, const ptrdiff_t _dst_stride,
     const int16_t *src0, const int16_t *src1, const int width, const int height)
 {
@@ -440,6 +599,15 @@ static void FUNC(ff_vvc_inter_dsp_init)(VVCInterDSPContext *const inter)
     FUNCS(LUMA, luma);
     FUNCS(CHROMA, chroma);
 
+    for (int i = 0; i < FF_ARRAY_ELEMS(inter->put_scaled[LUMA]); i++) {
+        inter->put_scaled[LUMA][i]         = FUNC(put_luma_scaled);
+        inter->put_scaled[CHROMA][i]       = FUNC(put_chroma_scaled);
+        inter->put_uni_scaled[LUMA][i]     = FUNC(put_uni_luma_scaled);
+        inter->put_uni_scaled[CHROMA][i]   = FUNC(put_uni_chroma_scaled);
+        inter->put_uni_w_scaled[LUMA][i]   = FUNC(put_uni_luma_w_scaled);
+        inter->put_uni_w_scaled[CHROMA][i] = FUNC(put_uni_chroma_w_scaled);
+    }
+
     inter->avg                  = FUNC(avg);
     inter->w_avg                = FUNC(w_avg);
 
diff --git a/libavcodec/vvc/intra.c b/libavcodec/vvc/intra.c
index e515fb9710..f77a012f09 100644
--- a/libavcodec/vvc/intra.c
+++ b/libavcodec/vvc/intra.c
@@ -339,18 +339,20 @@ static void derive_qp(const VVCLocalContext *lc, const TransformUnit *tu, Transf
 //8.7.3 Scaling process for transform coefficients
 static av_always_inline int derive_scale(const TransformBlock *tb, const int sh_dep_quant_used_flag)
 {
-    static const uint8_t rem6[63 + 2 * 6 + 1] = {
-        0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
-        3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
-        0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
-        4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3
+    static const uint8_t rem6[63 + 8 * 6 + 1] = {
+         0,  1,  2,  3,  4,  5,  0,  1,  2,  3,  4,  5,  0,  1,  2,  3,  4,  5,  0,  1,  2,  3,  4,  5,
+         0,  1,  2,  3,  4,  5,  0,  1,  2,  3,  4,  5,  0,  1,  2,  3,  4,  5,  0,  1,  2,  3,  4,  5,
+         0,  1,  2,  3,  4,  5,  0,  1,  2,  3,  4,  5,  0,  1,  2,  3,  4,  5,  0,  1,  2,  3,  4,  5,
+         0,  1,  2,  3,  4,  5,  0,  1,  2,  3,  4,  5,  0,  1,  2,  3,  4,  5,  0,  1,  2,  3,  4,  5,
+         0,  1,  2,  3,  4,  5,  0,  1,  2,  3,  4,  5,  0,  1,  2,  3,
     };
 
-    static const uint8_t div6[63 + 2 * 6 + 1] = {
-        0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3,  3,  3,
-        3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6,  6,  6,
-        7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
-        10, 10, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12
+    static const uint8_t div6[63 + 8 * 6 + 1] = {
+         0,  0,  0,  0,  0,  0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  2,  3,  3,  3,  3,  3,  3,
+         4,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  6,  6,  6,  6,  6,  6,  7,  7,  7,  7,  7,  7,
+         8,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9,  9, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11,
+        12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15,
+        16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18,
     };
 
     const static int level_scale[2][6] = {
@@ -416,7 +418,7 @@ static const uint8_t* derive_scale_m(const VVCLocalContext *lc, const TransformB
 static av_always_inline int scale_coeff(const TransformBlock *tb, int coeff,
     const int scale, const int scale_m, const int log2_transform_range)
 {
-    coeff = (coeff * scale * scale_m + tb->bd_offset) >> tb->bd_shift;
+    coeff = ((int64_t) coeff * scale * scale_m + tb->bd_offset) >> tb->bd_shift;
     coeff = av_clip_intp2(coeff, log2_transform_range);
     return coeff;
 }
diff --git a/libavcodec/vvc/mvs.c b/libavcodec/vvc/mvs.c
index fe7d923460..e78a9b4694 100644
--- a/libavcodec/vvc/mvs.c
+++ b/libavcodec/vvc/mvs.c
@@ -88,8 +88,8 @@ static int check_mvset(Mv *mvLXCol, Mv *mvCol,
                        const RefPicList *refPicList, int X, int refIdxLx,
                        const RefPicList *refPicList_col, int listCol, int refidxCol)
 {
-    int cur_lt = refPicList[X].isLongTerm[refIdxLx];
-    int col_lt = refPicList_col[listCol].isLongTerm[refidxCol];
+    int cur_lt = refPicList[X].refs[refIdxLx].is_lt;
+    int col_lt = refPicList_col[listCol].refs[refidxCol].is_lt;
     int col_poc_diff, cur_poc_diff;
 
     if (cur_lt != col_lt) {
@@ -98,8 +98,8 @@ static int check_mvset(Mv *mvLXCol, Mv *mvCol,
         return 0;
     }
 
-    col_poc_diff = colPic - refPicList_col[listCol].list[refidxCol];
-    cur_poc_diff = poc    - refPicList[X].list[refIdxLx];
+    col_poc_diff = colPic - refPicList_col[listCol].refs[refidxCol].poc;
+    cur_poc_diff = poc    - refPicList[X].refs[refIdxLx].poc;
 
     mv_compression(mvCol);
     if (cur_lt || col_poc_diff == cur_poc_diff) {
@@ -126,7 +126,7 @@ int ff_vvc_no_backward_pred_flag(const VVCLocalContext *lc)
 
     for (j = 0; j < 2; j++) {
         for (i = 0; i < lc->sc->sh.r->num_ref_idx_active[j]; i++) {
-            if (rpl[j].list[i] > lc->fc->ps.ph.poc) {
+            if (rpl[j].refs[i].poc > lc->fc->ps.ph.poc) {
                 check_diffpicount++;
                 break;
             }
@@ -297,7 +297,8 @@ static int derive_cb_prof_flag_lx(const VVCLocalContext *lc, const PredictionUni
         if (IS_SAME_MV(cp_mv, cp_mv + 1) && IS_SAME_MV(cp_mv, cp_mv + 2))
             return 0;
     }
-    //fixme: RprConstraintsActiveFlag
+    if (lc->sc->rpl[lx].refs[mi->ref_idx[lx]].is_scaled)
+        return 0;
     return 1;
 }
 
@@ -1059,9 +1060,9 @@ static int sb_temporal_luma_motion_data(const VVCLocalContext *lc, const MvField
     colPic  = ref->poc;
 
     if (a1) {
-        if ((a1->pred_flag & PF_L0) && colPic == rpl[0].list[a1->ref_idx[0]])
+        if ((a1->pred_flag & PF_L0) && colPic == rpl[L0].refs[a1->ref_idx[L0]].poc)
             *temp_mv = a1->mv[0];
-        else if ((a1->pred_flag & PF_L1) && colPic == rpl[1].list[a1->ref_idx[1]])
+        else if ((a1->pred_flag & PF_L1) && colPic == rpl[L1].refs[a1->ref_idx[L1]].poc)
             *temp_mv = a1->mv[1];
         ff_vvc_round_mv(temp_mv, 0, 4);
     }
@@ -1418,16 +1419,16 @@ static int mvp_candidate(const VVCLocalContext *lc, const int x_cand, const int
     const MvField* tab_mvf          = fc->tab.mvf;
     const MvField *mvf              = &TAB_MVF(x_cand, y_cand);
     const PredFlag maskx = lx + 1;
-    const int poc = rpl[lx].list[ref_idx[lx]];
+    const int poc = rpl[lx].refs[ref_idx[lx]].poc;
     int available = 0;
 
-    if ((mvf->pred_flag & maskx) && rpl[lx].list[mvf->ref_idx[lx]] == poc) {
+    if ((mvf->pred_flag & maskx) && rpl[lx].refs[mvf->ref_idx[lx]].poc == poc) {
         available = 1;
         *mv = mvf->mv[lx];
     } else {
         const int ly = !lx;
         const PredFlag masky = ly + 1;
-        if ((mvf->pred_flag & masky) && rpl[ly].list[mvf->ref_idx[ly]] == poc) {
+        if ((mvf->pred_flag & masky) && rpl[ly].refs[mvf->ref_idx[ly]].poc == poc) {
             available = 1;
             *mv = mvf->mv[ly];
         }
@@ -1450,15 +1451,15 @@ static int affine_mvp_candidate(const VVCLocalContext *lc,
         const MvField *mvf = &TAB_MVF(x_nb, y_nb);
         RefPicList* rpl = lc->sc->rpl;
         const PredFlag maskx = lx + 1;
-        const int poc = rpl[lx].list[ref_idx[lx]];
+        const int poc = rpl[lx].refs[ref_idx[lx]].poc;
 
-        if ((mvf->pred_flag & maskx) && rpl[lx].list[mvf->ref_idx[lx]] == poc) {
+        if ((mvf->pred_flag & maskx) && rpl[lx].refs[mvf->ref_idx[lx]].poc == poc) {
             available = 1;
             affine_cps_from_nb(lc, x_nb, y_nb, nbw, nbh, lx, cps, num_cp);
         } else {
             const int ly = !lx;
             const PredFlag masky = ly + 1;
-            if ((mvf->pred_flag & masky) && rpl[ly].list[mvf->ref_idx[ly]] == poc) {
+            if ((mvf->pred_flag & masky) && rpl[ly].refs[mvf->ref_idx[ly]].poc == poc) {
                 available = 1;
                 affine_cps_from_nb(lc, x_nb, y_nb, nbw, nbh, ly, cps, num_cp);
             }
@@ -1550,7 +1551,7 @@ static int mvp_history_candidates(const VVCLocalContext *lc,
 {
     const EntryPoint* ep            = lc->ep;
     const RefPicList* rpl           = lc->sc->rpl;
-    const int poc                   = rpl[lx].list[ref_idx];
+    const int poc                   = rpl[lx].refs[ref_idx].poc;
 
     if (ep->num_hmvp == 0)
         return 0;
@@ -1559,7 +1560,7 @@ static int mvp_history_candidates(const VVCLocalContext *lc,
         for (int j = 0; j < 2; j++) {
             const int ly = (j ? !lx : lx);
             PredFlag mask = PF_L0 + ly;
-            if ((h->pred_flag & mask) && poc == rpl[ly].list[h->ref_idx[ly]]) {
+            if ((h->pred_flag & mask) && poc == rpl[ly].refs[h->ref_idx[ly]].poc) {
                 if (mvp_lx_flag == num_cands) {
                     *mv = h->mv[ly];
                     ff_vvc_round_mv(mv, amvr_shift, amvr_shift);
@@ -1725,14 +1726,14 @@ static int affine_mvp_constructed_cp(NeighbourContext *ctx,
         if (check_available(n, ctx->lc, 0)) {
             const PredFlag maskx = lx + 1;
             const MvField* mvf = &TAB_MVF(n->x, n->y);
-            const int poc = rpl[lx].list[ref_idx];
-            if ((mvf->pred_flag & maskx) && rpl[lx].list[mvf->ref_idx[lx]] == poc) {
+            const int poc = rpl[lx].refs[ref_idx].poc;
+            if ((mvf->pred_flag & maskx) && rpl[lx].refs[mvf->ref_idx[lx]].poc == poc) {
                 available = 1;
                 *cp = mvf->mv[lx];
             } else {
                 const int ly = !lx;
                 const PredFlag masky = ly + 1;
-                if ((mvf->pred_flag & masky) && rpl[ly].list[mvf->ref_idx[ly]] == poc) {
+                if ((mvf->pred_flag & masky) && rpl[ly].refs[mvf->ref_idx[ly]].poc == poc) {
                     available = 1;
                     *cp = mvf->mv[ly];
                 }
diff --git a/libavcodec/vvc/ps.c b/libavcodec/vvc/ps.c
index 0365feab47..1b23675c98 100644
--- a/libavcodec/vvc/ps.c
+++ b/libavcodec/vvc/ps.c
@@ -219,14 +219,22 @@ fail:
     return NULL;
 }
 
-static int decode_sps(VVCParamSets *ps, const H266RawSPS *rsps, void *log_ctx)
+static int decode_sps(VVCParamSets *ps, const H266RawSPS *rsps, void *log_ctx, int is_clvss)
 {
     const int sps_id        = rsps->sps_seq_parameter_set_id;
     const VVCSPS *old_sps   = ps->sps_list[sps_id];
     const VVCSPS *sps;
 
-    if (old_sps && old_sps->r == rsps)
-        return 0;
+    if (is_clvss) {
+        ps->sps_id_used = 0;
+    }
+
+    if (old_sps) {
+        if (old_sps->r == rsps || !memcmp(old_sps->r, rsps, sizeof(*old_sps->r)))
+            return 0;
+        else if (ps->sps_id_used & (1 << sps_id))
+            return AVERROR_INVALIDDATA;
+    }
 
     sps = sps_alloc(rsps, log_ctx);
     if (!sps)
@@ -234,6 +242,7 @@ static int decode_sps(VVCParamSets *ps, const H266RawSPS *rsps, void *log_ctx)
 
     ff_refstruct_unref(&ps->sps_list[sps_id]);
     ps->sps_list[sps_id] = sps;
+    ps->sps_id_used |= (1 << sps_id);
 
     return 0;
 }
@@ -610,7 +619,7 @@ static int decode_pps(VVCParamSets *ps, const H266RawPPS *rpps)
     return ret;
 }
 
-static int decode_ps(VVCParamSets *ps, const CodedBitstreamH266Context *h266, void *log_ctx)
+static int decode_ps(VVCParamSets *ps, const CodedBitstreamH266Context *h266, void *log_ctx, int is_clvss)
 {
     const H266RawPictureHeader *ph = h266->ph;
     const H266RawPPS *rpps;
@@ -628,7 +637,7 @@ static int decode_ps(VVCParamSets *ps, const CodedBitstreamH266Context *h266, vo
     if (!rsps)
         return AVERROR_INVALIDDATA;
 
-    ret = decode_sps(ps, rsps, log_ctx);
+    ret = decode_sps(ps, rsps, log_ctx, is_clvss);
     if (ret < 0)
         return ret;
 
@@ -867,13 +876,16 @@ int ff_vvc_decode_frame_ps(VVCFrameParamSets *fps, struct VVCContext *s)
     int ret = 0;
     VVCParamSets *ps                        = &s->ps;
     const CodedBitstreamH266Context *h266   = s->cbc->priv_data;
+    int is_clvss;
+
+    decode_recovery_flag(s);
+    is_clvss = IS_CLVSS(s);
 
-    ret = decode_ps(ps, h266, s->avctx);
+    ret = decode_ps(ps, h266, s->avctx, is_clvss);
     if (ret < 0)
         return ret;
 
-    decode_recovery_flag(s);
-    ret = decode_frame_ps(fps, ps, h266, s->poc_tid0, IS_CLVSS(s));
+    ret = decode_frame_ps(fps, ps, h266, s->poc_tid0, is_clvss);
     decode_recovery_poc(s, &fps->ph);
     return ret;
 }
@@ -1184,12 +1196,12 @@ static void sh_deblock_offsets(VVCSH *sh)
     const H266RawSliceHeader *r = sh->r;
 
     if (!r->sh_deblocking_filter_disabled_flag) {
-        sh->deblock.beta_offset[LUMA] = r->sh_luma_beta_offset_div2 << 1;
-        sh->deblock.tc_offset[LUMA]   = r->sh_luma_tc_offset_div2 << 1;
-        sh->deblock.beta_offset[CB]   = r->sh_cb_beta_offset_div2 << 1;
-        sh->deblock.tc_offset[CB]     = r->sh_cb_tc_offset_div2 << 1;
-        sh->deblock.beta_offset[CR]   = r->sh_cr_beta_offset_div2 << 1;
-        sh->deblock.tc_offset[CR]     = r->sh_cr_tc_offset_div2 << 1;
+        sh->deblock.beta_offset[LUMA] = r->sh_luma_beta_offset_div2 * 2;
+        sh->deblock.tc_offset[LUMA]   = r->sh_luma_tc_offset_div2 * 2;
+        sh->deblock.beta_offset[CB]   = r->sh_cb_beta_offset_div2 * 2;
+        sh->deblock.tc_offset[CB]     = r->sh_cb_tc_offset_div2 * 2;
+        sh->deblock.beta_offset[CR]   = r->sh_cr_beta_offset_div2 * 2;
+        sh->deblock.tc_offset[CR]     = r->sh_cr_tc_offset_div2 * 2;
     }
 }
 
diff --git a/libavcodec/vvc/ps.h b/libavcodec/vvc/ps.h
index 3efb097b41..6656a06320 100644
--- a/libavcodec/vvc/ps.h
+++ b/libavcodec/vvc/ps.h
@@ -69,7 +69,7 @@ typedef struct VVCSPS {
     uint8_t     bit_depth;                                          ///< BitDepth
     uint8_t     qp_bd_offset;                                       ///< QpBdOffset
     uint8_t     ctb_log2_size_y;                                    ///< CtbLog2SizeY
-    uint8_t     ctb_size_y;                                         ///< CtbSizeY
+    uint16_t    ctb_size_y;                                         ///< CtbSizeY
     uint8_t     min_cb_log2_size_y;                                 ///< MinCbLog2SizeY
     uint8_t     min_cb_size_y;                                      ///< MinCbSizeY
     uint8_t     max_tb_size_y;                                      ///< MaxTbSizeY
@@ -214,6 +214,9 @@ typedef struct VVCParamSets {
     const VVCALF            *alf_list[VVC_MAX_ALF_COUNT];       ///< RefStruct reference
     const H266RawAPS        *lmcs_list[VVC_MAX_LMCS_COUNT];     ///< RefStruct reference
     const VVCScalingList    *scaling_list[VVC_MAX_SL_COUNT];    ///< RefStruct reference
+
+    // Bit field of SPS IDs used in the current CVS
+    uint16_t                 sps_id_used;
 } VVCParamSets;
 
 typedef struct VVCFrameParamSets {
diff --git a/libavcodec/vvc/refs.c b/libavcodec/vvc/refs.c
index a5ee7338d6..fb42963034 100644
--- a/libavcodec/vvc/refs.c
+++ b/libavcodec/vvc/refs.c
@@ -52,6 +52,8 @@ void ff_vvc_unref_frame(VVCFrameContext *fc, VVCFrame *frame, int flags)
     frame->flags &= ~flags;
     if (!frame->flags) {
         av_frame_unref(frame->frame);
+        ff_refstruct_unref(&frame->sps);
+        ff_refstruct_unref(&frame->pps);
         ff_refstruct_unref(&frame->progress);
 
         ff_refstruct_unref(&frame->tab_dmvr_mvf);
@@ -112,13 +114,18 @@ static FrameProgress *alloc_progress(void)
 
 static VVCFrame *alloc_frame(VVCContext *s, VVCFrameContext *fc)
 {
+    const VVCSPS *sps = fc->ps.sps;
     const VVCPPS *pps = fc->ps.pps;
     for (int i = 0; i < FF_ARRAY_ELEMS(fc->DPB); i++) {
         int ret;
         VVCFrame *frame = &fc->DPB[i];
+        VVCWindow *win = &frame->scaling_win;
         if (frame->frame->buf[0])
             continue;
 
+        frame->sps = ff_refstruct_ref_c(fc->ps.sps);
+        frame->pps = ff_refstruct_ref_c(fc->ps.pps);
+
         ret = ff_thread_get_buffer(s->avctx, frame->frame, AV_GET_BUFFER_FLAG_REF);
         if (ret < 0)
             return NULL;
@@ -139,6 +146,13 @@ static VVCFrame *alloc_frame(VVCContext *s, VVCFrameContext *fc)
         for (int j = 0; j < frame->ctb_count; j++)
             frame->rpl_tab[j] = frame->rpl;
 
+        win->left_offset   = pps->r->pps_scaling_win_left_offset   << sps->hshift[CHROMA];
+        win->right_offset  = pps->r->pps_scaling_win_right_offset  << sps->hshift[CHROMA];
+        win->top_offset    = pps->r->pps_scaling_win_top_offset    << sps->vshift[CHROMA];
+        win->bottom_offset = pps->r->pps_scaling_win_bottom_offset << sps->vshift[CHROMA];
+        frame->ref_width   = pps->r->pps_pic_width_in_luma_samples  - win->left_offset   - win->right_offset;
+        frame->ref_height  = pps->r->pps_pic_height_in_luma_samples - win->bottom_offset - win->top_offset;
+
         frame->progress = alloc_progress();
         if (!frame->progress)
             goto fail;
@@ -226,7 +240,7 @@ int ff_vvc_output_frame(VVCContext *s, VVCFrameContext *fc, AVFrame *out, const
 
         /* wait for more frames before output */
         if (!flush && s->seq_output == s->seq_decode && sps &&
-            nb_output <= sps->r->sps_dpb_params.dpb_max_dec_pic_buffering_minus1[sps->r->sps_max_sublayers_minus1] + 1)
+            nb_output <= sps->r->sps_dpb_params.dpb_max_num_reorder_pics[sps->r->sps_max_sublayers_minus1])
             return 0;
 
         if (nb_output) {
@@ -348,11 +362,30 @@ static VVCFrame *generate_missing_ref(VVCContext *s, VVCFrameContext *fc, int po
     return frame;
 }
 
+#define CHECK_MAX(d) (frame->ref_##d * frame->sps->r->sps_pic_##d##_max_in_luma_samples >= ref->ref_##d * (frame->pps->r->pps_pic_##d##_in_luma_samples - max))
+#define CHECK_SAMPLES(d) (frame->pps->r->pps_pic_##d##_in_luma_samples == ref->pps->r->pps_pic_##d##_in_luma_samples)
+static int check_candidate_ref(const VVCFrame *frame, const VVCRefPic *refp)
+{
+    const VVCFrame *ref = refp->ref;
+
+    if (refp->is_scaled) {
+        const int max = FFMAX(8, frame->sps->min_cb_size_y);
+        return frame->ref_width * 2 >= ref->ref_width &&
+            frame->ref_height * 2 >= ref->ref_height &&
+            frame->ref_width <= ref->ref_width * 8 &&
+            frame->ref_height <= ref->ref_height * 8 &&
+            CHECK_MAX(width) && CHECK_MAX(height);
+    }
+    return CHECK_SAMPLES(width) && CHECK_SAMPLES(height);
+}
+
+#define RPR_SCALE(f) (((ref->f << 14) + (fc->ref->f >> 1)) / fc->ref->f)
 /* add a reference with the given poc to the list and mark it as used in DPB */
 static int add_candidate_ref(VVCContext *s, VVCFrameContext *fc, RefPicList *list,
                              int poc, int ref_flag, uint8_t use_msb)
 {
-    VVCFrame *ref = find_ref_idx(s, fc, poc, use_msb);
+    VVCFrame *ref   = find_ref_idx(s, fc, poc, use_msb);
+    VVCRefPic *refp = &list->refs[list->nb_refs];
 
     if (ref == fc->ref || list->nb_refs >= VVC_MAX_REF_ENTRIES)
         return AVERROR_INVALIDDATA;
@@ -363,9 +396,21 @@ static int add_candidate_ref(VVCContext *s, VVCFrameContext *fc, RefPicList *lis
             return AVERROR(ENOMEM);
     }
 
-    list->list[list->nb_refs] = poc;
-    list->ref[list->nb_refs]  = ref;
-    list->isLongTerm[list->nb_refs] = ref_flag & VVC_FRAME_FLAG_LONG_REF;
+    refp->poc = poc;
+    refp->ref = ref;
+    refp->is_lt = ref_flag & VVC_FRAME_FLAG_LONG_REF;
+    refp->is_scaled = ref->sps->r->sps_num_subpics_minus1 != fc->ref->sps->r->sps_num_subpics_minus1||
+        memcmp(&ref->scaling_win, &fc->ref->scaling_win, sizeof(ref->scaling_win)) ||
+        ref->pps->r->pps_pic_width_in_luma_samples != fc->ref->pps->r->pps_pic_width_in_luma_samples ||
+        ref->pps->r->pps_pic_height_in_luma_samples != fc->ref->pps->r->pps_pic_height_in_luma_samples;
+
+    if (!check_candidate_ref(fc->ref, refp))
+        return AVERROR_INVALIDDATA;
+
+    if (refp->is_scaled) {
+        refp->scale[0] = RPR_SCALE(ref_width);
+        refp->scale[1] = RPR_SCALE(ref_height);
+    }
     list->nb_refs++;
 
     mark_ref(ref, ref_flag);
@@ -463,7 +508,7 @@ int ff_vvc_slice_rpl(VVCContext *s, VVCFrameContext *fc, SliceContext *sc)
         }
         if ((!rsh->sh_collocated_from_l0_flag) == lx &&
             rsh->sh_collocated_ref_idx < rpl->nb_refs)
-            fc->ref->collocated_ref = rpl->ref[rsh->sh_collocated_ref_idx];
+            fc->ref->collocated_ref = rpl->refs[rsh->sh_collocated_ref_idx].ref;
     }
     return 0;
 }
diff --git a/libavcodec/vvc/thread.c b/libavcodec/vvc/thread.c
index 01c3ff75b1..8777d380bf 100644
--- a/libavcodec/vvc/thread.c
+++ b/libavcodec/vvc/thread.c
@@ -124,11 +124,17 @@ static void task_init(VVCTask *t, VVCTaskStage stage, VVCFrameContext *fc, const
     atomic_store(&t->target_inter_score, 0);
 }
 
-static void task_init_parse(VVCTask *t, SliceContext *sc, EntryPoint *ep, const int ctu_idx)
+static int task_init_parse(VVCTask *t, SliceContext *sc, EntryPoint *ep, const int ctu_idx)
 {
+    if (t->sc) {
+        // the task already inited, error bitstream
+        return AVERROR_INVALIDDATA;
+    }
     t->sc      = sc;
     t->ep      = ep;
     t->ctu_idx = ctu_idx;
+
+    return 0;
 }
 
 static uint8_t task_add_score(VVCTask *t, const VVCTaskStage stage)
@@ -287,10 +293,14 @@ static void schedule_inter(VVCContext *s, VVCFrameContext *fc, const SliceContex
         CTU *ctu = fc->tab.ctus + rs;
         for (int lx = 0; lx < 2; lx++) {
             for (int i = 0; i < sh->r->num_ref_idx_active[lx]; i++) {
-                const int y = ctu->max_y[lx][i];
-                VVCFrame *ref = sc->rpl[lx].ref[i];
-                if (ref && y >= 0)
+                int y = ctu->max_y[lx][i];
+                VVCRefPic *refp = sc->rpl[lx].refs + i;
+                VVCFrame *ref   = refp->ref;
+                if (ref && y >= 0) {
+                    if (refp->is_scaled)
+                        y = y * refp->scale[1] >> 14;
                     add_progress_listener(ref, &t->listener[lx][i], t, s, VVC_PROGRESS_PIXEL, y + LUMA_EXTRA_AFTER);
+                }
             }
         }
     }
@@ -758,24 +768,35 @@ static void submit_entry_point(VVCContext *s, VVCFrameThread *ft, SliceContext *
     frame_thread_add_score(s, ft, t->rx, t->ry, VVC_TASK_STAGE_PARSE);
 }
 
-void ff_vvc_frame_submit(VVCContext *s, VVCFrameContext *fc)
+int ff_vvc_frame_submit(VVCContext *s, VVCFrameContext *fc)
 {
     VVCFrameThread *ft = fc->ft;
 
-    for (int i = 0; i < fc->nb_slices; i++) {
-        SliceContext *sc = fc->slices[i];
-        for (int j = 0; j < sc->nb_eps; j++) {
-            EntryPoint *ep = sc->eps + j;
-            for (int k = ep->ctu_start; k < ep->ctu_end; k++) {
-                const int rs = sc->sh.ctb_addr_in_curr_slice[k];
-                VVCTask *t   = ft->tasks + rs;
-
-                task_init_parse(t, sc, ep, k);
-                check_colocation(s, t);
+    // We'll handle this in two passes:
+    // Pass 0 to initialize tasks with parser, this will help detect bit stream error
+    // Pass 1 to shedule location check and submit the entry point
+    for (int pass = 0; pass < 2; pass++) {
+        for (int i = 0; i < fc->nb_slices; i++) {
+            SliceContext *sc = fc->slices[i];
+            for (int j = 0; j < sc->nb_eps; j++) {
+                EntryPoint *ep = sc->eps + j;
+                for (int k = ep->ctu_start; k < ep->ctu_end; k++) {
+                    const int rs = sc->sh.ctb_addr_in_curr_slice[k];
+                    VVCTask *t   = ft->tasks + rs;
+                    if (pass) {
+                        check_colocation(s, t);
+                    } else {
+                        const int ret = task_init_parse(t, sc, ep, k);
+                        if (ret < 0)
+                            return ret;
+                    }
+                }
+                if (pass)
+                    submit_entry_point(s, ft, sc, ep);
             }
-            submit_entry_point(s, ft, sc, ep);
         }
     }
+    return 0;
 }
 
 int ff_vvc_frame_wait(VVCContext *s, VVCFrameContext *fc)
diff --git a/libavcodec/vvc/thread.h b/libavcodec/vvc/thread.h
index 55bb4ea244..8ac59b2ecf 100644
--- a/libavcodec/vvc/thread.h
+++ b/libavcodec/vvc/thread.h
@@ -30,7 +30,7 @@ void ff_vvc_executor_free(struct AVExecutor **e);
 
 int ff_vvc_frame_thread_init(VVCFrameContext *fc);
 void ff_vvc_frame_thread_free(VVCFrameContext *fc);
-void ff_vvc_frame_submit(VVCContext *s, VVCFrameContext *fc);
+int ff_vvc_frame_submit(VVCContext *s, VVCFrameContext *fc);
 int ff_vvc_frame_wait(VVCContext *s, VVCFrameContext *fc);
 
 #endif // AVCODEC_VVC_THREAD_H
diff --git a/libavcodec/vvc_parser.c b/libavcodec/vvc_parser.c
index a6a5be27ae..5373875aae 100644
--- a/libavcodec/vvc_parser.c
+++ b/libavcodec/vvc_parser.c
@@ -185,14 +185,13 @@ static void set_parser_ctx(AVCodecParserContext *s, AVCodecContext *avctx,
     avctx->color_range =
         sps->vui.vui_full_range_flag ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
 
-    avctx->has_b_frames = (sps->sps_max_sublayers_minus1 + 1) > 2 ? 2 :
-                           sps->sps_max_sublayers_minus1;
-    avctx->max_b_frames = sps->sps_max_sublayers_minus1;
+    avctx->has_b_frames =
+        sps->sps_dpb_params.dpb_max_num_reorder_pics[sps->sps_max_sublayers_minus1];
 
     if (sps->sps_ptl_dpb_hrd_params_present_flag &&
         sps->sps_timing_hrd_params_present_flag) {
-        int num = sps->sps_general_timing_hrd_parameters.num_units_in_tick;
-        int den = sps->sps_general_timing_hrd_parameters.time_scale;
+        uint32_t num = sps->sps_general_timing_hrd_parameters.num_units_in_tick;
+        uint32_t den = sps->sps_general_timing_hrd_parameters.time_scale;
 
         if (num != 0 && den != 0)
             av_reduce(&avctx->framerate.den, &avctx->framerate.num,
diff --git a/libavcodec/wavarc.c b/libavcodec/wavarc.c
index b4b26958e6..93b76c43e8 100644
--- a/libavcodec/wavarc.c
+++ b/libavcodec/wavarc.c
@@ -689,7 +689,7 @@ static int decode_5elp(AVCodecContext *avctx,
                 for (int o = 0; o < order; o++)
                     sum += s->filter[ch][o] * (unsigned)samples[n + 70 - o - 1];
 
-                samples[n + 70] += ac_out[n] + (sum >> 4);
+                samples[n + 70] += ac_out[n] + (unsigned)(sum >> 4);
             }
 
             for (int n = 0; n < 70; n++)
diff --git a/libavcodec/wavpack.c b/libavcodec/wavpack.c
index 73d69d66ff..d4cf489c0f 100644
--- a/libavcodec/wavpack.c
+++ b/libavcodec/wavpack.c
@@ -30,7 +30,7 @@
 #include "get_bits.h"
 #include "refstruct.h"
 #include "thread.h"
-#include "threadframe.h"
+#include "threadprogress.h"
 #include "unary.h"
 #include "wavpack.h"
 #include "dsd.h"
@@ -103,15 +103,14 @@ typedef struct WavpackContext {
     WavpackFrameContext **fdec;
     int fdec_num;
 
-    int block;
     int samples;
     int ch_offset;
 
-    AVFrame *frame;
-    ThreadFrame curr_frame, prev_frame;
     Modulation modulation;
 
     DSDContext *dsdctx; ///< RefStruct reference
+    ThreadProgress *curr_progress, *prev_progress; ///< RefStruct references
+    FFRefStructPool *progress_pool; ///< RefStruct reference
     int dsd_channels;
 } WavpackContext;
 
@@ -994,11 +993,14 @@ static int wv_dsd_reset(WavpackContext *s, int channels)
 
     s->dsd_channels = 0;
     ff_refstruct_unref(&s->dsdctx);
+    ff_refstruct_unref(&s->curr_progress);
+    ff_refstruct_unref(&s->prev_progress);
 
     if (!channels)
         return 0;
 
-    if (channels > INT_MAX / sizeof(*s->dsdctx))
+    if (WV_MAX_CHANNELS > SIZE_MAX / sizeof(*s->dsdctx) &&
+        channels > SIZE_MAX / sizeof(*s->dsdctx))
         return AVERROR(EINVAL);
 
     s->dsdctx = ff_refstruct_allocz(channels * sizeof(*s->dsdctx));
@@ -1009,6 +1011,8 @@ static int wv_dsd_reset(WavpackContext *s, int channels)
     for (i = 0; i < channels; i++)
         memset(s->dsdctx[i].buf, 0x69, sizeof(s->dsdctx[i].buf));
 
+    ff_init_dsd_data();
+
     return 0;
 }
 
@@ -1017,22 +1021,31 @@ static int update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
 {
     WavpackContext *fsrc = src->priv_data;
     WavpackContext *fdst = dst->priv_data;
-    int ret;
-
-    if (dst == src)
-        return 0;
-
-    ff_thread_release_ext_buffer(&fdst->curr_frame);
-    if (fsrc->curr_frame.f->data[0]) {
-        if ((ret = ff_thread_ref_frame(&fdst->curr_frame, &fsrc->curr_frame)) < 0)
-            return ret;
-    }
 
+    ff_refstruct_replace(&fdst->curr_progress, fsrc->curr_progress);
     ff_refstruct_replace(&fdst->dsdctx, fsrc->dsdctx);
     fdst->dsd_channels = fsrc->dsd_channels;
 
     return 0;
 }
+
+static av_cold int progress_pool_init_cb(FFRefStructOpaque opaque, void *obj)
+{
+    ThreadProgress *progress = obj;
+    return ff_thread_progress_init(progress, 1);
+}
+
+static void progress_pool_reset_cb(FFRefStructOpaque opaque, void *obj)
+{
+    ThreadProgress *progress = obj;
+    ff_thread_progress_reset(progress);
+}
+
+static av_cold void progress_pool_free_entry_cb(FFRefStructOpaque opaque, void *obj)
+{
+    ThreadProgress *progress = obj;
+    ff_thread_progress_destroy(progress);
+}
 #endif
 
 static av_cold int wavpack_decode_init(AVCodecContext *avctx)
@@ -1043,11 +1056,17 @@ static av_cold int wavpack_decode_init(AVCodecContext *avctx)
 
     s->fdec_num = 0;
 
-    s->curr_frame.f = av_frame_alloc();
-    s->prev_frame.f = av_frame_alloc();
-
-    if (!s->curr_frame.f || !s->prev_frame.f)
-        return AVERROR(ENOMEM);
+#if HAVE_THREADS
+    if (ff_thread_sync_ref(avctx, offsetof(WavpackContext, progress_pool)) == FF_THREAD_IS_FIRST_THREAD) {
+        s->progress_pool = ff_refstruct_pool_alloc_ext(sizeof(*s->curr_progress),
+                                                       FF_REFSTRUCT_POOL_FLAG_FREE_ON_INIT_ERROR, NULL,
+                                                       progress_pool_init_cb,
+                                                       progress_pool_reset_cb,
+                                                       progress_pool_free_entry_cb, NULL);
+        if (!s->progress_pool)
+            return AVERROR(ENOMEM);
+    }
+#endif
 
     return 0;
 }
@@ -1061,19 +1080,14 @@ static av_cold int wavpack_decode_end(AVCodecContext *avctx)
     av_freep(&s->fdec);
     s->fdec_num = 0;
 
-    ff_thread_release_ext_buffer(&s->curr_frame);
-    av_frame_free(&s->curr_frame.f);
-
-    ff_thread_release_ext_buffer(&s->prev_frame);
-    av_frame_free(&s->prev_frame.f);
-
-    ff_refstruct_unref(&s->dsdctx);
+    ff_refstruct_pool_uninit(&s->progress_pool);
+    wv_dsd_reset(s, 0);
 
     return 0;
 }
 
-static int wavpack_decode_block(AVCodecContext *avctx, int block_no,
-                                const uint8_t *buf, int buf_size)
+static int wavpack_decode_block(AVCodecContext *avctx, AVFrame *frame, int block_no,
+                                const uint8_t *buf, int buf_size, int *new_progress)
 {
     WavpackContext *wc = avctx->priv_data;
     WavpackFrameContext *s;
@@ -1419,6 +1433,7 @@ static int wavpack_decode_block(AVCodecContext *avctx, int block_no,
                 av_log(avctx, AV_LOG_ERROR, "Invalid channel info size %d\n",
                        size);
             }
+            av_assert1(chan <= WV_MAX_CHANNELS);
             break;
         case WP_ID_SAMPLE_RATE:
             if (size != 3) {
@@ -1510,37 +1525,43 @@ static int wavpack_decode_block(AVCodecContext *avctx, int block_no,
         } else {
             av_channel_layout_default(&new_ch_layout, s->stereo + 1);
         }
+        av_assert1(new_ch_layout.nb_channels <= WV_MAX_CHANNELS);
 
         /* clear DSD state if stream properties change */
-        if (new_ch_layout.nb_channels != wc->dsd_channels ||
-            av_channel_layout_compare(&new_ch_layout, &avctx->ch_layout) ||
-            new_samplerate != avctx->sample_rate    ||
-            !!got_dsd      != !!wc->dsdctx) {
+        if ((wc->dsdctx && !got_dsd) ||
+            got_dsd && (new_ch_layout.nb_channels != wc->dsd_channels ||
+                        av_channel_layout_compare(&new_ch_layout, &avctx->ch_layout) ||
+                        new_samplerate != avctx->sample_rate)) {
             ret = wv_dsd_reset(wc, got_dsd ? new_ch_layout.nb_channels : 0);
             if (ret < 0) {
                 av_log(avctx, AV_LOG_ERROR, "Error reinitializing the DSD context\n");
                 return ret;
             }
-            ff_thread_release_ext_buffer(&wc->curr_frame);
-            ff_init_dsd_data();
         }
         av_channel_layout_copy(&avctx->ch_layout, &new_ch_layout);
         avctx->sample_rate         = new_samplerate;
         avctx->sample_fmt          = sample_fmt;
         avctx->bits_per_raw_sample = orig_bpp;
 
-        ff_thread_release_ext_buffer(&wc->prev_frame);
-        FFSWAP(ThreadFrame, wc->curr_frame, wc->prev_frame);
-
         /* get output buffer */
-        wc->curr_frame.f->nb_samples = s->samples;
-        ret = ff_thread_get_ext_buffer(avctx, &wc->curr_frame,
-                                       AV_GET_BUFFER_FLAG_REF);
+        frame->nb_samples = s->samples;
+        ret = ff_thread_get_buffer(avctx, frame, 0);
         if (ret < 0)
             return ret;
 
-        wc->frame = wc->curr_frame.f;
-        ff_thread_finish_setup(avctx);
+        av_assert1(!!wc->progress_pool == !!(avctx->active_thread_type & FF_THREAD_FRAME));
+        if (wc->progress_pool) {
+            if (wc->dsdctx) {
+                ff_refstruct_unref(&wc->prev_progress);
+                wc->prev_progress = ff_refstruct_pool_get(wc->progress_pool);
+                if (!wc->prev_progress)
+                    return AVERROR(ENOMEM);
+                FFSWAP(ThreadProgress*, wc->prev_progress, wc->curr_progress);
+                *new_progress = 1;
+            }
+            av_assert1(!!wc->dsdctx == !!wc->curr_progress);
+            ff_thread_finish_setup(avctx);
+        }
     }
 
     if (wc->ch_offset + s->stereo >= avctx->ch_layout.nb_channels) {
@@ -1548,9 +1569,9 @@ static int wavpack_decode_block(AVCodecContext *avctx, int block_no,
         return ((avctx->err_recognition & AV_EF_EXPLODE) || !wc->ch_offset) ? AVERROR_INVALIDDATA : 0;
     }
 
-    samples_l = wc->frame->extended_data[wc->ch_offset];
+    samples_l = frame->extended_data[wc->ch_offset];
     if (s->stereo)
-        samples_r = wc->frame->extended_data[wc->ch_offset + 1];
+        samples_r = frame->extended_data[wc->ch_offset + 1];
 
     wc->ch_offset += 1 + s->stereo;
 
@@ -1602,26 +1623,27 @@ static int dsd_channel(AVCodecContext *avctx, void *frmptr, int jobnr, int threa
     const WavpackContext *s  = avctx->priv_data;
     AVFrame *frame = frmptr;
 
-    ff_dsd2pcm_translate (&s->dsdctx [jobnr], s->samples, 0,
+    ff_dsd2pcm_translate(&s->dsdctx[jobnr], s->samples, 0,
         (uint8_t *)frame->extended_data[jobnr], 4,
         (float *)frame->extended_data[jobnr], 1);
 
     return 0;
 }
 
-static int wavpack_decode_frame(AVCodecContext *avctx, AVFrame *rframe,
+static int wavpack_decode_frame(AVCodecContext *avctx, AVFrame *frame,
                                 int *got_frame_ptr, AVPacket *avpkt)
 {
     WavpackContext *s  = avctx->priv_data;
     const uint8_t *buf = avpkt->data;
     int buf_size       = avpkt->size;
     int frame_size, ret, frame_flags;
+    int block = 0, new_progress = 0;
+
+    av_assert1(!s->curr_progress || s->dsdctx);
 
     if (avpkt->size <= WV_HEADER_SIZE)
         return AVERROR_INVALIDDATA;
 
-    s->frame     = NULL;
-    s->block     = 0;
     s->ch_offset = 0;
 
     /* determine number of samples */
@@ -1642,13 +1664,15 @@ static int wavpack_decode_frame(AVCodecContext *avctx, AVFrame *rframe,
         if (frame_size <= 0 || frame_size > buf_size) {
             av_log(avctx, AV_LOG_ERROR,
                    "Block %d has invalid size (size %d vs. %d bytes left)\n",
-                   s->block, frame_size, buf_size);
+                   block, frame_size, buf_size);
             ret = AVERROR_INVALIDDATA;
             goto error;
         }
-        if ((ret = wavpack_decode_block(avctx, s->block, buf, frame_size)) < 0)
+        ret = wavpack_decode_block(avctx, frame, block, buf,
+                                   frame_size, &new_progress);
+        if (ret < 0)
             goto error;
-        s->block++;
+        block++;
         buf      += frame_size;
         buf_size -= frame_size;
     }
@@ -1659,26 +1683,23 @@ static int wavpack_decode_frame(AVCodecContext *avctx, AVFrame *rframe,
         goto error;
     }
 
-    ff_thread_await_progress(&s->prev_frame, INT_MAX, 0);
-    ff_thread_release_ext_buffer(&s->prev_frame);
-
-    if (s->modulation == MODULATION_DSD)
-        avctx->execute2(avctx, dsd_channel, s->frame, NULL, avctx->ch_layout.nb_channels);
-
-    ff_thread_report_progress(&s->curr_frame, INT_MAX, 0);
-
-    if ((ret = av_frame_ref(rframe, s->frame)) < 0)
-        return ret;
+    if (s->dsdctx) {
+        if (s->prev_progress)
+            ff_thread_progress_await(s->prev_progress, INT_MAX);
+        avctx->execute2(avctx, dsd_channel, frame, NULL, avctx->ch_layout.nb_channels);
+        if (s->curr_progress)
+            ff_thread_progress_report(s->curr_progress, INT_MAX);
+    }
 
     *got_frame_ptr = 1;
 
     return avpkt->size;
 
 error:
-    if (s->frame) {
-        ff_thread_await_progress(&s->prev_frame, INT_MAX, 0);
-        ff_thread_release_ext_buffer(&s->prev_frame);
-        ff_thread_report_progress(&s->curr_frame, INT_MAX, 0);
+    if (new_progress) {
+        if (s->prev_progress)
+            ff_thread_progress_await(s->prev_progress, INT_MAX);
+        ff_thread_progress_report(s->curr_progress, INT_MAX);
     }
 
     return ret;
@@ -1697,6 +1718,5 @@ const FFCodec ff_wavpack_decoder = {
     UPDATE_THREAD_CONTEXT(update_thread_context),
     .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS |
                       AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_CHANNEL_CONF,
-    .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP |
-                      FF_CODEC_CAP_ALLOCATE_PROGRESS,
+    .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
 };
diff --git a/libavcodec/wavpack.h b/libavcodec/wavpack.h
index 9f62f8406d..2efbb1fd06 100644
--- a/libavcodec/wavpack.h
+++ b/libavcodec/wavpack.h
@@ -57,6 +57,7 @@
 #define WV_FLT_ZERO_SENT  0x08
 #define WV_FLT_ZERO_SIGN  0x10
 
+#define WV_MAX_CHANNELS   (1 << 12)
 #define WV_MAX_SAMPLES    150000
 
 enum WP_ID_Flags {
diff --git a/libavcodec/wbmpdec.c b/libavcodec/wbmpdec.c
index 3b5753abcd..50c729047d 100644
--- a/libavcodec/wbmpdec.c
+++ b/libavcodec/wbmpdec.c
@@ -74,9 +74,6 @@ static int wbmp_decode_frame(AVCodecContext *avctx, AVFrame *p,
     else
         readbits(p->data[0], width, height, p->linesize[0], gb.buffer, gb.buffer_end - gb.buffer);
 
-    p->flags |= AV_FRAME_FLAG_KEY;
-    p->pict_type = AV_PICTURE_TYPE_I;
-
     *got_frame   = 1;
 
     return buf_size;
diff --git a/libavcodec/webp.c b/libavcodec/webp.c
index dbcc5e73eb..7c2a5f0111 100644
--- a/libavcodec/webp.c
+++ b/libavcodec/webp.c
@@ -1571,5 +1571,6 @@ const FFCodec ff_webp_decoder = {
     FF_CODEC_DECODE_CB(webp_decode_frame),
     .close          = webp_decode_close,
     .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
-    .caps_internal  = FF_CODEC_CAP_ICC_PROFILES,
+    .caps_internal  = FF_CODEC_CAP_ICC_PROFILES |
+                      FF_CODEC_CAP_USES_PROGRESSFRAMES,
 };
diff --git a/libavcodec/webvttdec.c b/libavcodec/webvttdec.c
index 690f00dc47..35bdbe805d 100644
--- a/libavcodec/webvttdec.c
+++ b/libavcodec/webvttdec.c
@@ -37,9 +37,9 @@ static const struct {
     {"<i>", "{\\i1}"}, {"</i>", "{\\i0}"},
     {"<b>", "{\\b1}"}, {"</b>", "{\\b0}"},
     {"<u>", "{\\u1}"}, {"</u>", "{\\u0}"},
-    {"{", "\\{"}, {"}", "\\}"}, // escape to avoid ASS markup conflicts
+    {"{", "\\{{}"}, {"\\", "\\\xe2\x81\xa0"}, // escape to avoid ASS markup conflicts
     {"&gt;", ">"}, {"&lt;", "<"},
-    {"&lrm;", ""}, {"&rlm;", ""}, // FIXME: properly honor bidi marks
+    {"&lrm;", "\xe2\x80\x8e"}, {"&rlm;", "\xe2\x80\x8f"},
     {"&amp;", "&"}, {"&nbsp;", "\\h"},
 };
 
diff --git a/libavcodec/wnv1.c b/libavcodec/wnv1.c
index 0e8dae598f..3d0d90fe38 100644
--- a/libavcodec/wnv1.c
+++ b/libavcodec/wnv1.c
@@ -69,7 +69,6 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p,
 
     if ((ret = ff_get_buffer(avctx, p, 0)) < 0)
         return ret;
-    p->flags |= AV_FRAME_FLAG_KEY;
 
     if ((ret = init_get_bits8(&gb, buf + 8, buf_size - 8)) < 0)
         return ret;
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index 21c5e9b0d4..331183f450 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -61,7 +61,6 @@ OBJS-$(CONFIG_MLP_DECODER)             += x86/mlpdsp_init.o
 OBJS-$(CONFIG_MPEG4_DECODER)           += x86/mpeg4videodsp.o x86/xvididct_init.o
 OBJS-$(CONFIG_PNG_DECODER)             += x86/pngdsp_init.o
 OBJS-$(CONFIG_PRORES_DECODER)          += x86/proresdsp_init.o
-OBJS-$(CONFIG_PRORES_LGPL_DECODER)     += x86/proresdsp_init.o
 OBJS-$(CONFIG_RV40_DECODER)            += x86/rv40dsp_init.o
 OBJS-$(CONFIG_SBC_ENCODER)             += x86/sbcdsp_init.o
 OBJS-$(CONFIG_SVQ1_ENCODER)            += x86/svq1enc_init.o
@@ -176,7 +175,6 @@ X86ASM-OBJS-$(CONFIG_MLP_DECODER)      += x86/mlpdsp.o
 X86ASM-OBJS-$(CONFIG_MPEG4_DECODER)    += x86/xvididct.o
 X86ASM-OBJS-$(CONFIG_PNG_DECODER)      += x86/pngdsp.o
 X86ASM-OBJS-$(CONFIG_PRORES_DECODER)   += x86/proresdsp.o
-X86ASM-OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp.o
 X86ASM-OBJS-$(CONFIG_RV40_DECODER)     += x86/rv40dsp.o
 X86ASM-OBJS-$(CONFIG_SBC_ENCODER)      += x86/sbcdsp.o
 X86ASM-OBJS-$(CONFIG_SVQ1_ENCODER)     += x86/svq1enc.o
diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm
index 0ba980aa7b..21f59708b7 100644
--- a/libavcodec/x86/ac3dsp.asm
+++ b/libavcodec/x86/ac3dsp.asm
@@ -43,7 +43,7 @@ SECTION .text
 
 %macro AC3_EXPONENT_MIN 0
 cglobal ac3_exponent_min, 3, 4, 2, exp, reuse_blks, expn, offset
-    shl  reuse_blksq, 8
+    shl  reuse_blksd, 8
     jz .end
     LOOP_ALIGN
 .nextexp:
@@ -57,7 +57,7 @@ cglobal ac3_exponent_min, 3, 4, 2, exp, reuse_blks, expn, offset
     jae .nextblk
     mova      [expq], m0
     add         expq, mmsize
-    sub        expnq, mmsize
+    sub        expnd, mmsize
     jg .nextexp
 .end:
     RET
@@ -71,7 +71,7 @@ AC3_EXPONENT_MIN
 %undef LOOP_ALIGN
 
 ;-----------------------------------------------------------------------------
-; void ff_float_to_fixed24(int32_t *dst, const float *src, unsigned int len)
+; void ff_float_to_fixed24(int32_t *dst, const float *src, size_t len)
 ;-----------------------------------------------------------------------------
 
 INIT_XMM sse2
@@ -217,6 +217,7 @@ cglobal ac3_compute_mantissa_size, 1, 2, 4, mant_cnt, sum
 
 %macro AC3_EXTRACT_EXPONENTS 0
 cglobal ac3_extract_exponents, 3, 3, 4, exp, coef, len
+    movsxdifnidn lenq, lend
     add     expq, lenq
     lea    coefq, [coefq+4*lenq]
     neg     lenq
diff --git a/libavcodec/x86/blockdsp.asm b/libavcodec/x86/blockdsp.asm
index e380308d4a..cccc9a801a 100644
--- a/libavcodec/x86/blockdsp.asm
+++ b/libavcodec/x86/blockdsp.asm
@@ -80,3 +80,36 @@ INIT_XMM sse
 CLEAR_BLOCKS 1
 INIT_YMM avx
 CLEAR_BLOCKS 1
+
+;-----------------------------------------
+; void ff_fill_block_tab_%1(uint8_t *block, uint8_t value,
+;                           ptrdiff_t line_size, int h);
+;-----------------------------------------
+%macro FILL_BLOCK_TAB 2
+cglobal fill_block_tab_%1, 4, 5, 1, block, value, stride, h, stride3
+    lea stride3q, [strideq + strideq * 2]
+%if cpuflag(avx2)
+    movd m0, valued
+    vpbroadcastb m0, m0
+%else
+    SPLATB_REG m0, value, x
+%endif
+.loop:
+    mov%2 [blockq], m0
+    mov%2 [blockq + strideq], m0
+    mov%2 [blockq + strideq * 2], m0
+    mov%2 [blockq + stride3q], m0
+    lea blockq, [blockq + strideq * 4]
+    sub hd, 4
+    jg .loop
+    RET
+%endmacro
+
+INIT_XMM sse2
+FILL_BLOCK_TAB 8, q
+FILL_BLOCK_TAB 16, a
+%if HAVE_AVX2_EXTERNAL
+INIT_XMM avx2
+FILL_BLOCK_TAB 8, q
+FILL_BLOCK_TAB 16, a
+%endif
diff --git a/libavcodec/x86/blockdsp_init.c b/libavcodec/x86/blockdsp_init.c
index 996124114f..37f3bb6a84 100644
--- a/libavcodec/x86/blockdsp_init.c
+++ b/libavcodec/x86/blockdsp_init.c
@@ -29,6 +29,11 @@ void ff_clear_block_avx(int16_t *block);
 void ff_clear_blocks_sse(int16_t *blocks);
 void ff_clear_blocks_avx(int16_t *blocks);
 
+void ff_fill_block_tab_16_sse2(uint8_t *block, uint8_t value, ptrdiff_t line_size, int h);
+void ff_fill_block_tab_8_sse2(uint8_t *block, uint8_t value, ptrdiff_t line_size, int h);
+void ff_fill_block_tab_16_avx2(uint8_t *block, uint8_t value, ptrdiff_t line_size, int h);
+void ff_fill_block_tab_8_avx2(uint8_t *block, uint8_t value, ptrdiff_t line_size, int h);
+
 av_cold void ff_blockdsp_init_x86(BlockDSPContext *c)
 {
 #if HAVE_X86ASM
@@ -38,9 +43,17 @@ av_cold void ff_blockdsp_init_x86(BlockDSPContext *c)
         c->clear_block  = ff_clear_block_sse;
         c->clear_blocks = ff_clear_blocks_sse;
     }
+    if (EXTERNAL_SSE2(cpu_flags)) {
+        c->fill_block_tab[0] = ff_fill_block_tab_16_sse2;
+        c->fill_block_tab[1] = ff_fill_block_tab_8_sse2;
+    }
     if (EXTERNAL_AVX_FAST(cpu_flags)) {
         c->clear_block  = ff_clear_block_avx;
         c->clear_blocks = ff_clear_blocks_avx;
     }
+    if (EXTERNAL_AVX2(cpu_flags)) {
+        c->fill_block_tab[0] = ff_fill_block_tab_16_avx2;
+        c->fill_block_tab[1] = ff_fill_block_tab_8_avx2;
+    }
 #endif /* HAVE_X86ASM */
 }
diff --git a/libavcodec/x86/flacdsp.asm b/libavcodec/x86/flacdsp.asm
index 4b2fd65435..9fc82ee9ae 100644
--- a/libavcodec/x86/flacdsp.asm
+++ b/libavcodec/x86/flacdsp.asm
@@ -29,7 +29,7 @@ vector:  db 0,1,4,5,8,9,12,13,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,0,
 
 SECTION .text
 
-%macro PMACSDQL 5
+%macro PMACSDQL 3
 %if cpuflag(xop)
     pmacsdql %1, %2, %3, %1
 %else
@@ -38,9 +38,9 @@ SECTION .text
 %endif
 %endmacro
 
-%macro LPC_32 1
+%macro LPC_32 3
 INIT_XMM %1
-cglobal flac_lpc_32, 5,6,5, decoded, coeffs, pred_order, qlevel, len, j
+cglobal flac_lpc_%2, 5,6,5, decoded, coeffs, pred_order, qlevel, len, j
     sub    lend, pred_orderd
     jle .ret
     movsxdifnidn pred_orderq, pred_orderd
@@ -59,22 +59,22 @@ ALIGN 16
     test   jq, jq
     jz .end_order
 .loop_order:
-    PMACSDQL m2, m0, m1, m2, m0
+    PMACSDQL m2, m0, m1
     movd   m0, [decodedq+jq*4]
-    PMACSDQL m3, m1, m0, m3, m1
+    PMACSDQL m3, m1, m0
     movd   m1, [coeffsq+jq*4]
     inc    jq
     jl .loop_order
 .end_order:
-    PMACSDQL m2, m0, m1, m2, m0
-    psrlq  m2, m4
+    PMACSDQL m2, m0, m1
+    %3     m2, m4
     movd   m0, [decodedq]
     paddd  m0, m2
     movd   [decodedq], m0
     sub  lend, 2
     jl .ret
-    PMACSDQL m3, m1, m0, m3, m1
-    psrlq  m3, m4
+    PMACSDQL m3, m1, m0
+    %3     m3, m4
     movd   m1, [decodedq+4]
     paddd  m1, m3
     movd   [decodedq+4], m1
@@ -83,10 +83,60 @@ ALIGN 16
     RET
 %endmacro
 
+LPC_32 sse4, 16, psrad
+LPC_32 sse4, 32, psrlq
 %if HAVE_XOP_EXTERNAL
-LPC_32 xop
+LPC_32 xop,  32, psrlq
 %endif
-LPC_32 sse4
+
+INIT_XMM sse2
+cglobal flac_wasted_32, 3,3,5, decoded, wasted, len
+    shl   lend, 2
+    add   decodedq, lenq
+    neg   lenq
+    movd  m4, wastedd
+ALIGN 16
+.loop:
+    mova  m0, [decodedq+lenq+mmsize*0]
+    mova  m1, [decodedq+lenq+mmsize*1]
+    mova  m2, [decodedq+lenq+mmsize*2]
+    mova  m3, [decodedq+lenq+mmsize*3]
+    pslld m0, m4
+    pslld m1, m4
+    pslld m2, m4
+    pslld m3, m4
+    mova  [decodedq+lenq+mmsize*0], m0
+    mova  [decodedq+lenq+mmsize*1], m1
+    mova  [decodedq+lenq+mmsize*2], m2
+    mova  [decodedq+lenq+mmsize*3], m3
+    add lenq, mmsize * 4
+    jl .loop
+    RET
+
+INIT_XMM sse4
+cglobal flac_wasted_33, 4,4,5, decoded, residuals, wasted, len
+    shl   lend, 2
+    lea   decodedq, [decodedq+lenq*2]
+    add   residualsq, lenq
+    neg   lenq
+    movd  m4, wastedd
+ALIGN 16
+.loop:
+    pmovsxdq  m0, [residualsq+lenq+mmsize*0]
+    pmovsxdq  m1, [residualsq+lenq+mmsize/2]
+    pmovsxdq  m2, [residualsq+lenq+mmsize*1]
+    pmovsxdq  m3, [residualsq+lenq+mmsize*1+mmsize/2]
+    psllq m0, m4
+    psllq m1, m4
+    psllq m2, m4
+    psllq m3, m4
+    mova  [decodedq+lenq*2+mmsize*0], m0
+    mova  [decodedq+lenq*2+mmsize*1], m1
+    mova  [decodedq+lenq*2+mmsize*2], m2
+    mova  [decodedq+lenq*2+mmsize*3], m3
+    add lenq, mmsize * 2
+    jl .loop
+    RET
 
 ;----------------------------------------------------------------------------------
 ;void ff_flac_decorrelate_[lrm]s_16_sse2(uint8_t **out, int32_t **in, int channels,
diff --git a/libavcodec/x86/flacdsp_init.c b/libavcodec/x86/flacdsp_init.c
index 87daed7005..fa993d3466 100644
--- a/libavcodec/x86/flacdsp_init.c
+++ b/libavcodec/x86/flacdsp_init.c
@@ -23,11 +23,16 @@
 #include "libavutil/x86/cpu.h"
 #include "config.h"
 
+void ff_flac_lpc_16_sse4(int32_t *samples, const int coeffs[32], int order,
+                         int qlevel, int len);
 void ff_flac_lpc_32_sse4(int32_t *samples, const int coeffs[32], int order,
                          int qlevel, int len);
 void ff_flac_lpc_32_xop(int32_t *samples, const int coeffs[32], int order,
                         int qlevel, int len);
 
+void ff_flac_wasted_32_sse2(int32_t *decoded, int wasted, int len);
+void ff_flac_wasted_33_sse4(int64_t *decoded, const int32_t *residual, int wasted, int len);
+
 #define DECORRELATE_FUNCS(fmt, opt)                                                      \
 void ff_flac_decorrelate_ls_##fmt##_##opt(uint8_t **out, int32_t **in, int channels,     \
                                           int len, int shift);                           \
@@ -61,6 +66,7 @@ av_cold void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt, int
     int cpu_flags = av_get_cpu_flags();
 
     if (EXTERNAL_SSE2(cpu_flags)) {
+        c->wasted32 = ff_flac_wasted_32_sse2;
         if (fmt == AV_SAMPLE_FMT_S16) {
             c->decorrelate[1] = ff_flac_decorrelate_ls_16_sse2;
             c->decorrelate[2] = ff_flac_decorrelate_rs_16_sse2;
@@ -93,7 +99,9 @@ av_cold void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt, int
         }
     }
     if (EXTERNAL_SSE4(cpu_flags)) {
+        c->lpc16 = ff_flac_lpc_16_sse4;
         c->lpc32 = ff_flac_lpc_32_sse4;
+        c->wasted33 = ff_flac_wasted_33_sse4;
     }
     if (EXTERNAL_AVX(cpu_flags)) {
         if (fmt == AV_SAMPLE_FMT_S16) {
diff --git a/libavcodec/x86/opusdsp.asm b/libavcodec/x86/opusdsp.asm
index 418cc16330..635f59f83c 100644
--- a/libavcodec/x86/opusdsp.asm
+++ b/libavcodec/x86/opusdsp.asm
@@ -22,16 +22,13 @@
 
 SECTION_RODATA
 
-         ; 0.85..^1    0.85..^2    0.85..^3    0.85..^4
-tab_st: dd 0x3f599a00, 0x3f38f671, 0x3f1d382a, 0x3f05a32f
-
 SECTION .text
 
 INIT_XMM fma3
 %if UNIX64
-cglobal opus_deemphasis, 3, 3, 8, out, in, len
+cglobal opus_deemphasis, 4, 4, 8, out, in, weights, len
 %else
-cglobal opus_deemphasis, 4, 4, 8, out, in, coeff, len
+cglobal opus_deemphasis, 5, 5, 8, out, in, coeff, weights, len
 %endif
 %if ARCH_X86_32
     VBROADCASTSS m0, coeffm
@@ -41,7 +38,7 @@ cglobal opus_deemphasis, 4, 4, 8, out, in, coeff, len
     shufps m0, m0, 0
 %endif
 
-    movaps m4, [tab_st]
+    movaps m4, [weightsq]
     VBROADCASTSS m5, m4
     shufps m6, m4, m4, q1111
     shufps m7, m4, m4, q2222
diff --git a/libavcodec/x86/opusdsp_init.c b/libavcodec/x86/opusdsp_init.c
index 582fbb4f0c..66d6839589 100644
--- a/libavcodec/x86/opusdsp_init.c
+++ b/libavcodec/x86/opusdsp_init.c
@@ -23,7 +23,7 @@
 #include "libavcodec/opusdsp.h"
 
 void ff_opus_postfilter_fma3(float *data, int period, float *gains, int len);
-float ff_opus_deemphasis_fma3(float *out, float *in, float coeff, int len);
+float ff_opus_deemphasis_fma3(float *out, float *in, float coeff, const float *weights, int len);
 
 av_cold void ff_opus_dsp_init_x86(OpusDSP *ctx)
 {
diff --git a/libavcodec/x86/vp3dsp_init.c b/libavcodec/x86/vp3dsp_init.c
index f54fa57b3e..edac1764cb 100644
--- a/libavcodec/x86/vp3dsp_init.c
+++ b/libavcodec/x86/vp3dsp_init.c
@@ -53,7 +53,7 @@ av_cold void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags)
 
         if (!(flags & AV_CODEC_FLAG_BITEXACT)) {
             c->v_loop_filter = c->v_loop_filter_unaligned = ff_vp3_v_loop_filter_mmxext;
-            c->h_loop_filter = c->v_loop_filter_unaligned = ff_vp3_h_loop_filter_mmxext;
+            c->h_loop_filter = c->h_loop_filter_unaligned = ff_vp3_h_loop_filter_mmxext;
         }
     }
 
diff --git a/libavcodec/x86/vvc/Makefile b/libavcodec/x86/vvc/Makefile
index d1623bd46a..d6a66f860a 100644
--- a/libavcodec/x86/vvc/Makefile
+++ b/libavcodec/x86/vvc/Makefile
@@ -3,5 +3,6 @@ clean::
 
 OBJS-$(CONFIG_VVC_DECODER)             += x86/vvc/vvcdsp_init.o \
                                           x86/h26x/h2656dsp.o
-X86ASM-OBJS-$(CONFIG_VVC_DECODER)      += x86/vvc/vvc_mc.o       \
+X86ASM-OBJS-$(CONFIG_VVC_DECODER)      += x86/vvc/vvc_alf.o      \
+                                          x86/vvc/vvc_mc.o       \
                                           x86/h26x/h2656_inter.o
diff --git a/libavcodec/x86/vvc/vvc_alf.asm b/libavcodec/x86/vvc/vvc_alf.asm
new file mode 100644
index 0000000000..b3d118962f
--- /dev/null
+++ b/libavcodec/x86/vvc/vvc_alf.asm
@@ -0,0 +1,810 @@
+;******************************************************************************
+;* VVC Adaptive Loop Filter SIMD optimizations
+;*
+;* Copyright (c) 2023-2024 Nuo Mi <nuomi2021@gmail.com>
+;* Copyright (c) 2023-2024 Wu Jianhua <toqsxw@outlook.com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+%macro PARAM_SHUFFE 1
+%assign i (%1  * 2)
+%assign j ((i + 1) << 8) + (i)
+param_shuffe_ %+ %1:
+%rep 2
+    times 4 dw j
+    times 4 dw (j + 0x0808)
+%endrep
+%endmacro
+
+PARAM_SHUFFE 0
+PARAM_SHUFFE 1
+PARAM_SHUFFE 2
+PARAM_SHUFFE 3
+
+CLASSIFY_SHUFFE: times 2    db 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13
+TRANSPOSE_PERMUTE:          dd 0, 1, 4, 5, 2, 3, 6, 7
+ARG_VAR_SHUFFE: times 2     db 0, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4
+
+dd448: times 8             dd 512 - 64
+dw64: times 8              dd 64
+dd2:  times 8              dd 2
+dw3:  times 8              dd 3
+dw5:  times 8              dd 5
+dd15: times 8              dd 15
+
+SECTION .text
+
+
+%define ALF_NUM_COEFF_LUMA      12
+%define ALF_NUM_COEFF_CHROMA     6
+%define ALF_NUM_COEFF_CC         7
+
+;%1-%3 out
+;%4 clip or filter
+%macro LOAD_LUMA_PARAMS_W16 4
+    lea                 offsetq, [3 * xq]                       ;xq * ALF_NUM_COEFF_LUMA / ALF_BLOCK_SIZE
+    movu                    m%1, [%4q + 2 * offsetq + 0 * 32]   ; 2 * for sizeof(int16_t)
+    movu                    m%2, [%4q + 2 * offsetq + 1 * 32]
+    movu                    m%3, [%4q + 2 * offsetq + 2 * 32]
+%endmacro
+
+%macro LOAD_LUMA_PARAMS_W16 6
+    LOAD_LUMA_PARAMS_W16    %1, %2, %3, %4
+    ;m%1 = 03 02 01 00
+    ;m%2 = 07 06 05 04
+    ;m%3 = 11 10 09 08
+
+    vshufpd                 m%5, m%1, m%2, 0011b        ;06 02 05 01
+    vshufpd                 m%6, m%3, m%5, 1001b        ;06 10 01 09
+
+    vshufpd                 m%1, m%1, m%6, 1100b        ;06 03 09 00
+    vshufpd                 m%2, m%2, m%6, 0110b        ;10 07 01 04
+    vshufpd                 m%3, m%3, m%5, 0110b        ;02 11 05 08
+
+    vpermpd                 m%1, m%1, 01111000b         ;09 06 03 00
+    vshufpd                 m%2, m%2, m%2, 1001b        ;10 07 04 01
+    vpermpd                 m%3, m%3, 10000111b         ;11 08 05 02
+%endmacro
+
+; %1-%3 out
+; %4    clip or filter
+; %5-%6 tmp
+%macro LOAD_LUMA_PARAMS 6
+    LOAD_LUMA_PARAMS_W16 %1, %2, %3, %4, %5, %6
+%endmacro
+
+%macro LOAD_CHROMA_PARAMS 4
+    ; LOAD_CHROMA_PARAMS_W %+ WIDTH %1, %2, %3, %4
+    movq                   xm%1, [%3q]
+    movd                   xm%2, [%3q + 8]
+    vpbroadcastq            m%1, xm%1
+    vpbroadcastq            m%2, xm%2
+%endmacro
+
+%macro LOAD_PARAMS 0
+%if LUMA
+    LOAD_LUMA_PARAMS          3, 4, 5, filter, 6, 7
+    LOAD_LUMA_PARAMS          6, 7, 8, clip,   9, 10
+%else
+    LOAD_CHROMA_PARAMS        3, 4, filter, 5
+    LOAD_CHROMA_PARAMS        6, 7, clip, 8
+%endif
+%endmacro
+
+; FILTER(param_idx)
+; input:   m2, m9, m10
+; output:  m0, m1
+; tmp:     m11-m13
+%macro FILTER 1
+    %assign i (%1 % 4)
+    %assign j (%1 / 4 + 3)
+    %assign k (%1 / 4 + 6)
+    %define filters m %+ j
+    %define clips m %+ k
+
+    pshufb           m12, clips, [param_shuffe_ %+ i]        ;clip
+    pxor             m11, m11
+    psubw            m11, m12                                ;-clip
+
+    vpsubw            m9, m2
+    CLIPW             m9, m11, m12
+
+    vpsubw           m10, m2
+    CLIPW            m10, m11, m12
+
+    vpunpckhwd       m13, m9, m10
+    vpunpcklwd        m9, m9, m10
+
+    pshufb           m12, filters, [param_shuffe_ %+ i]       ;filter
+    vpunpcklwd       m10, m12, m12
+    vpunpckhwd       m12, m12, m12
+
+    vpmaddwd          m9, m10
+    vpmaddwd         m12, m13
+
+    paddd             m0, m9
+    paddd             m1, m12
+%endmacro
+
+; FILTER(param_idx, bottom, top, byte_offset)
+; input:  param_idx, bottom, top, byte_offset
+; output: m0, m1
+; temp:   m9, m10
+%macro FILTER 4
+    LOAD_PIXELS      m10, [%2 + %4]
+    LOAD_PIXELS       m9,  [%3 - %4]
+    FILTER  %1
+%endmacro
+
+; GET_SRCS(line)
+; brief:  get source lines
+; input:  src, src_stride, vb_pos
+; output: s1...s6
+%macro GET_SRCS 1
+    lea              s1q, [srcq + src_strideq]
+    lea              s3q, [s1q  + src_strideq]
+%if LUMA
+    lea              s5q, [s3q  + src_strideq]
+%endif
+    neg      src_strideq
+    lea              s2q, [srcq + src_strideq]
+    lea              s4q, [s2q  + src_strideq]
+%if LUMA
+    lea              s6q, [s4q  + src_strideq]
+%endif
+    neg      src_strideq
+
+%if LUMA
+    cmp          vb_posq, 0
+    je       %%vb_bottom
+    cmp          vb_posq, 4
+    jne         %%vb_end
+%else
+    cmp          vb_posq, 2
+    jne         %%vb_end
+    cmp               %1, 2
+    jge      %%vb_bottom
+%endif
+
+%%vb_above:
+    ; above
+    ; p1 = (y + i == vb_pos - 1) ? p0 : p1;
+    ; p2 = (y + i == vb_pos - 1) ? p0 : p2;
+    ; p3 = (y + i >= vb_pos - 2) ? p1 : p3;
+    ; p4 = (y + i >= vb_pos - 2) ? p2 : p4;
+    ; p5 = (y + i >= vb_pos - 3) ? p3 : p5;
+    ; p6 = (y + i >= vb_pos - 3) ? p4 : p6;
+    dec          vb_posq
+    cmp          vb_posq, %1
+    cmove            s1q, srcq
+    cmove            s2q, srcq
+
+    dec          vb_posq
+    cmp          vb_posq, %1
+    cmovbe           s3q, s1q
+    cmovbe           s4q, s2q
+
+    dec          vb_posq
+%if LUMA
+    cmp          vb_posq, %1
+    cmovbe           s5q, s3q
+    cmovbe           s6q, s4q
+%endif
+    add          vb_posq, 3
+    jmp         %%vb_end
+
+%%vb_bottom:
+    ; bottom
+    ; p1 = (y + i == vb_pos    ) ? p0 : p1;
+    ; p2 = (y + i == vb_pos    ) ? p0 : p2;
+    ; p3 = (y + i <= vb_pos + 1) ? p1 : p3;
+    ; p4 = (y + i <= vb_pos + 1) ? p2 : p4;
+    ; p5 = (y + i <= vb_pos + 2) ? p3 : p5;
+    ; p6 = (y + i <= vb_pos + 2) ? p4 : p6;
+    cmp          vb_posq, %1
+    cmove            s1q, srcq
+    cmove            s2q, srcq
+
+    inc          vb_posq
+    cmp          vb_posq, %1
+    cmovae           s3q, s1q
+    cmovae           s4q, s2q
+
+    inc          vb_posq
+%if LUMA
+    cmp          vb_posq, %1
+    cmovae           s5q, s3q
+    cmovae           s6q, s4q
+%endif
+    sub          vb_posq, 2
+%%vb_end:
+%endmacro
+
+; SHIFT_VB(line)
+; brief: shift filter result
+; input:  m0, m1, vb_pos
+; output: m0
+; temp:   m9
+%macro SHIFT_VB 1
+%define SHIFT 7
+%if LUMA
+    cmp               %1, 3
+    je      %%near_above
+    cmp               %1, 0
+    je      %%near_below
+    jmp          %%no_vb
+    %%near_above:
+        cmp      vb_posq, 4
+        je     %%near_vb
+        jmp      %%no_vb
+    %%near_below:
+        cmp      vb_posq, 0
+        je     %%near_vb
+%else
+    cmp               %1, 0
+    je           %%no_vb
+    cmp               %1, 3
+    je           %%no_vb
+    cmp          vb_posq, 2
+    je         %%near_vb
+%endif
+%%no_vb:
+    vpsrad            m0, SHIFT
+    vpsrad            m1, SHIFT
+    jmp      %%shift_end
+%%near_vb:
+    vpbroadcastd      m9, [dd448]
+    paddd             m0, m9
+    paddd             m1, m9
+    vpsrad            m0, SHIFT + 3
+    vpsrad            m1, SHIFT + 3
+%%shift_end:
+    vpackssdw         m0, m0, m1
+%endmacro
+
+; FILTER_VB(line)
+; brief: filter pixels for luma and chroma
+; input:  line
+; output: m0, m1
+; temp:   s0q...s1q
+%macro FILTER_VB 1
+    vpbroadcastd      m0, [dw64]
+    vpbroadcastd      m1, [dw64]
+
+    GET_SRCS %1
+%if LUMA
+    FILTER         0,  s5q,  s6q,  0 * ps
+    FILTER         1,  s3q,  s4q,  1 * ps
+    FILTER         2,  s3q,  s4q,  0 * ps
+    FILTER         3,  s3q,  s4q, -1 * ps
+    FILTER         4,  s1q,  s2q,  2 * ps
+    FILTER         5,  s1q,  s2q,  1 * ps
+    FILTER         6,  s1q,  s2q,  0 * ps
+    FILTER         7,  s1q,  s2q, -1 * ps
+    FILTER         8,  s1q,  s2q, -2 * ps
+    FILTER         9, srcq, srcq,  3 * ps
+    FILTER        10, srcq, srcq,  2 * ps
+    FILTER        11, srcq, srcq,  1 * ps
+%else
+    FILTER         0,  s3q,  s4q,  0 * ps
+    FILTER         1,  s1q,  s2q,  1 * ps
+    FILTER         2,  s1q,  s2q,  0 * ps
+    FILTER         3,  s1q,  s2q, -1 * ps
+    FILTER         4, srcq, srcq,  2 * ps
+    FILTER         5, srcq, srcq,  1 * ps
+%endif
+    SHIFT_VB %1
+%endmacro
+
+; LOAD_PIXELS(dest, src)
+%macro LOAD_PIXELS 2
+%if ps == 2
+    movu      %1, %2
+%else
+    vpmovzxbw %1, %2
+%endif
+%endmacro
+
+; STORE_PIXELS(dst, src)
+%macro STORE_PIXELS 2
+    %if ps == 2
+        movu         %1, m%2
+    %else
+        vpackuswb   m%2, m%2
+        vpermq      m%2, m%2, 0x8
+        movu         %1, xm%2
+    %endif
+%endmacro
+
+%macro FILTER_16x4 0
+%if LUMA
+    push clipq
+    push strideq
+    %define s1q clipq
+    %define s2q strideq
+%else
+    %define s1q s5q
+    %define s2q s6q
+%endif
+
+    %define s3q pixel_maxq
+    %define s4q offsetq
+    push xq
+
+    xor               xq, xq
+%%filter_16x4_loop:
+    LOAD_PIXELS       m2, [srcq]   ;p0
+
+    FILTER_VB         xq
+
+    paddw             m0, m2
+
+    ; clip to pixel
+    CLIPW             m0, m14, m15
+
+    STORE_PIXELS  [dstq], 0
+
+    lea             srcq, [srcq + src_strideq]
+    lea             dstq, [dstq + dst_strideq]
+    inc               xq
+    cmp               xq, 4
+    jl %%filter_16x4_loop
+
+    mov               xq, src_strideq
+    neg               xq
+    lea             srcq, [srcq + xq * 4]
+    mov               xq, dst_strideq
+    neg               xq
+    lea             dstq, [dstq + xq * 4]
+
+    pop xq
+
+%if LUMA
+    pop strideq
+    pop clipq
+%endif
+%endmacro
+
+; FILTER(bpc, luma/chroma)
+%macro ALF_FILTER 2
+%xdefine BPC   %1
+%ifidn %2, luma
+    %xdefine LUMA 1
+%else
+    %xdefine LUMA 0
+%endif
+
+; ******************************
+; void vvc_alf_filter_%2_%1bpc_avx2(uint8_t *dst, ptrdiff_t dst_stride,
+;      const uint8_t *src, ptrdiff_t src_stride, const ptrdiff_t width, cosnt ptr_diff_t height,
+;      const int16_t *filter, const int16_t *clip, ptrdiff_t stride, ptrdiff_t vb_pos, ptrdiff_t pixel_max);
+; ******************************
+cglobal vvc_alf_filter_%2_%1bpc, 11, 15, 16, 0-0x28, dst, dst_stride, src, src_stride, width, height, filter, clip, stride, vb_pos, pixel_max, \
+    offset, x, s5, s6
+%define ps (%1 / 8) ; pixel size
+    movd            xm15, pixel_maxd
+    vpbroadcastw     m15, xm15
+    pxor             m14, m14
+
+.loop:
+    push            srcq
+    push            dstq
+    xor               xd, xd
+
+    .loop_w:
+        LOAD_PARAMS
+        FILTER_16x4
+
+        add         srcq, 16 * ps
+        add         dstq, 16 * ps
+        add           xd, 16
+        cmp           xd, widthd
+        jl       .loop_w
+
+    pop             dstq
+    pop             srcq
+    lea             srcq, [srcq + 4 * src_strideq]
+    lea             dstq, [dstq + 4 * dst_strideq]
+
+    lea          filterq, [filterq + 2 * strideq]
+    lea            clipq, [clipq + 2 * strideq]
+
+    sub          vb_posq, 4
+    sub          heightq, 4
+    jg             .loop
+    RET
+%endmacro
+
+; FILTER(bpc)
+%macro ALF_FILTER 1
+    ALF_FILTER  %1, luma
+    ALF_FILTER  %1, chroma
+%endmacro
+
+%define ALF_GRADIENT_BORDER 2
+%define ALF_BORDER_LUMA     3
+
+; ******************************
+; void ff_vvc_alf_classify_grad(int *gradient_sum, const uint8_t *src,
+;      ptrdiff_t src_stride,  intptr_t width, intptr_t height, intptr_t vb_pos);
+; ******************************
+%macro ALF_CLASSIFY_GRAD 1
+cglobal vvc_alf_classify_grad_%1bpc, 6, 14, 16, gradient_sum, src, src_stride, width, height, vb_pos, \
+    x, y, s0, s1, s2, s3, vb_pos_below, src_stride3
+
+    lea         src_stride3q, [src_strideq * 2 + src_strideq]
+
+    lea        vb_pos_belowd, [vb_posd + ALF_GRADIENT_BORDER]
+
+    ; src = src - ALF_BORDER_LUMA * src_stride - ALF_BORDER_LUMA
+    sub                 srcq, src_stride3q
+    sub                 srcq, ALF_BORDER_LUMA * ps
+
+    add               widthd, ALF_GRADIENT_BORDER * 2
+    add              heightd, ALF_GRADIENT_BORDER * 2
+
+    xor                   yd, yd
+
+.loop_h:
+    xor                   xd,  xd
+    pxor                 m15, m15 ; prev
+    .loop_w:
+        lea              s0q, [srcq + xq * ps]
+        lea              s1q, [s0q + src_strideq]
+        lea              s2q, [s0q + 2 * src_strideq]
+        lea              s3q, [s0q + src_stride3q]
+
+        cmp               yd, vb_pos_belowd
+        cmove            s0q, s1q
+
+        cmp               yd, vb_posd
+        cmove            s3q, s2q
+
+        LOAD_PIXELS       m0, [s0q]
+        LOAD_PIXELS       m1, [s1q]
+        LOAD_PIXELS       m2, [s2q]
+        LOAD_PIXELS       m3, [s3q]
+
+        LOAD_PIXELS       m4, [s0q + 2 * ps]
+        LOAD_PIXELS       m5, [s1q + 2 * ps]
+        LOAD_PIXELS       m6, [s2q + 2 * ps]
+        LOAD_PIXELS       m7, [s3q + 2 * ps]
+
+        vpblendw          m8, m0, m1, 0xaa             ; nw
+        vpblendw          m9, m0, m5, 0x55             ; n
+        vpblendw         m10, m4, m5, 0xaa             ; ne
+        vpblendw         m11, m1, m2, 0xaa             ; w
+        vpblendw         m12, m5, m6, 0xaa             ; e
+        vpblendw         m13, m2, m3, 0xaa             ; sw
+        vpblendw         m14, m2, m7, 0x55             ; s
+
+        vpblendw          m0, m1, m6, 0x55
+        vpaddw            m0, m0                       ; c
+
+        movu              m1, [CLASSIFY_SHUFFE]
+        pshufb            m1, m0, m1                   ; d
+
+        vpaddw            m9, m14                      ; n + s
+        vpsubw            m9, m0                       ; (n + s) - c
+        vpabsw            m9, m9                       ; ver
+
+        vpaddw           m11, m12                      ; w + e
+        vpsubw           m11, m1                       ; (w + e) - d
+        vpabsw           m11, m11                      ; hor
+
+        vpblendw         m14, m6, m7, 0xaa             ; se
+        vpaddw            m8, m14                      ; nw + se
+        vpsubw            m8, m1                       ; (nw + se) - d
+        vpabsw            m8, m8                       ; di0
+
+        vpaddw           m10, m13                      ; ne + sw
+        vpsubw           m10, m1                       ; (nw + se) - d
+        vpabsw           m10, m10                      ; di1
+
+        phaddw            m9,  m11                     ; vh,  each word represent 2x2 pixels
+        phaddw            m8,  m10                     ; di,  each word represent 2x2 pixels
+        phaddw            m0,  m9, m8                  ; all = each word represent 4x2 pixels, order is v_h_d0_d1 x 4
+
+        vinserti128      m15, m15, xm0, 1
+        vpblendw          m1,  m0, m15, 0xaa           ; t
+
+        phaddw            m1,  m0                      ; each word represent 8x2 pixels, adjacent word share 4x2 pixels
+
+        vextracti128    xm15, m0, 1                    ; prev
+
+        movu [gradient_sumq], m1
+
+        add    gradient_sumq, 32
+        add               xd, 16
+        cmp               xd, widthd
+        jl           .loop_w
+
+    lea                 srcq, [srcq + 2 * src_strideq]
+    add                   yd, 2
+    cmp                   yd, heightd
+    jl               .loop_h
+    RET
+%endmacro
+
+; SAVE_CLASSIFY_PARAM_W16(dest, src)
+%macro SAVE_CLASSIFY_PARAM_W16 2
+    lea                   tempq, [%1q + xq]
+    movu                [tempq], xm%2
+    vperm2i128              m%2, m%2, m%2, 1
+    movu       [tempq + widthq], xm%2
+%endmacro
+
+; SAVE_CLASSIFY_PARAM_W8
+%macro SAVE_CLASSIFY_PARAM_W8 2
+    movq                   [%1], xm%2
+    vperm2i128              m%2, m%2, m%2, 1
+    movq          [%1 + widthq], xm%2
+%endmacro
+
+; SAVE_CLASSIFY_PARAM_W4
+%macro SAVE_CLASSIFY_PARAM_W4 2
+    movd                   [%1], xm%2
+    vperm2i128              m%2, m%2, m%2, 1
+    movd          [%1 + widthq], xm%2
+%endmacro
+
+; SAVE_CLASSIFY_PARAM_W(dest, src)
+%macro SAVE_CLASSIFY_PARAM_W 2
+    lea                  tempq, [%1q + xq]
+    cmp                     wd, 8
+    jl %%w4
+    SAVE_CLASSIFY_PARAM_W8 tempq, %2
+    vpermq                 m%2, m%2, 00010011b
+    add                  tempq, 8
+    cmp                     wd, 8
+    je                   %%end
+%%w4:
+    SAVE_CLASSIFY_PARAM_W4 tempq, %2
+%%end:
+%endmacro
+
+%macro ALF_CLASSIFY_H8 0
+    ; first line, sum of 16x4 pixels (includes borders)
+    lea            gradq, [gradient_sumq + 2 * xq]
+    movu              m0, [gradq]
+    movu              m1, [gradq + sum_strideq]
+    movu              m2, [gradq + 2 * sum_strideq]
+
+    pcmpeqb          m11, m11
+    movd            xm13, yd
+    vpbroadcastd     m13, xm13
+    movd            xm12, vb_posd
+    vpbroadcastd     m12, xm12
+    vpcmpeqd         m13, m12       ; y == vb_pos
+    pandn            m13, m11       ; y != vb_pos
+
+    vpbroadcastd     m14, [dw3]
+    pblendvb         m14, m14, [dd2], m13    ; ac
+
+    pblendvb          m3, m15, [gradq + sum_stride3q], m13
+
+    ; extent to dword to avoid overflow
+    vpunpcklwd        m4, m0, m15
+    vpunpckhwd        m5, m0, m15
+    vpunpcklwd        m6, m1, m15
+    vpunpckhwd        m7, m1, m15
+    vpunpcklwd        m8, m2, m15
+    vpunpckhwd        m9, m2, m15
+    vpunpcklwd       m10, m3, m15
+    vpunpckhwd       m11, m3, m15
+
+    vpaddd            m0, m4, m6
+    vpaddd            m1, m5, m7
+    vpaddd            m2, m8, m10
+    vpaddd            m3, m9, m11
+
+    ; sum of the first row
+    vpaddd            m0, m2           ; low
+    vpaddd            m1, m3           ; high
+
+    lea            gradq, [gradq + 2 * sum_strideq]
+
+    pblendvb         m10, m15, [gradq], m13
+
+    movu             m11, [gradq + sum_strideq]
+    movu             m12, [gradq + 2 * sum_strideq]
+    movu             m13, [gradq + sum_stride3q]
+
+    vpunpcklwd        m4,  m10, m15
+    vpunpckhwd        m5,  m10, m15
+    vpunpcklwd        m6,  m11, m15
+    vpunpckhwd        m7,  m11, m15
+    vpunpcklwd        m8,  m12, m15
+    vpunpckhwd        m9,  m12, m15
+    vpunpcklwd       m10,  m13, m15
+    vpunpckhwd       m11,  m13, m15
+
+    vpaddd            m2, m4, m6
+    vpaddd            m3, m5, m7
+    vpaddd            m4, m8, m10
+    vpaddd            m5, m9, m11
+
+    ; sum of the second row
+    vpaddd            m2, m4        ; low
+    vpaddd            m3, m5        ; high
+
+    vpunpckldq        m4, m0, m2
+    vpunpckhdq        m5, m0, m2
+    vpunpckldq        m6, m1, m3
+    vpunpckhdq        m7, m1, m3
+
+    ; each dword represent 4x2 alf blocks
+    ; the order is 01452367
+    vpunpckldq        m0, m4, m6         ; sum_v
+    vpunpckhdq        m1, m4, m6         ; sum_h
+    vpunpckldq        m2, m5, m7         ; sum_d0
+    vpunpckhdq        m3, m5, m7         ; sum_d1
+
+    vpcmpgtd          m4, m0, m1         ; dir_hv - 1
+    vpmaxsd           m5, m0, m1         ; hv1
+    vpminsd           m6, m0, m1         ; hv0
+
+    vpaddd            m0, m1;            ; sum_hv
+
+    vpcmpgtd          m7, m2, m3         ; dir_d - 1
+    vpmaxsd           m8, m2, m3         ; d1
+    vpminsd           m9, m2, m3         ; d0
+
+    ; *transpose_idx = dir_d * 2 + dir_hv;
+    vpbroadcastd     m10, [dw3]
+    vpaddd           m11, m7, m7
+    vpaddd           m11, m4
+    vpaddd           m10, m11
+    vpermq           m10, m10, 11011000b
+    SAVE_CLASSIFY_PARAM transpose_idx, 10
+
+    vpsrlq           m10, m8, 32
+    vpsrlq           m11, m6, 32
+    pmuldq           m12, m10, m11       ; d1 * hv0 high
+    vpsrlq            m1,  m9, 32
+    vpsrlq            m2,  m5, 32
+    pmuldq            m3,  m1, m2        ; d0 * hv1 high
+    vpcmpgtq         m10, m12, m3        ; dir1 - 1 high
+
+    pmuldq            m1, m8, m6         ; d1 * hv0 low
+    pmuldq            m2, m9, m5         ; d0 * hv1 low
+    vpcmpgtq          m1, m2             ; dir1 - 1 low
+
+    vpblendd          m1, m1, m10, 0xaa  ; dir1 - 1
+
+    pblendvb          m2, m5, m8, m1     ; hvd1
+    pblendvb          m3, m6, m9, m1     ; hvd0
+
+    movd             xm5, bit_depthd
+    vpbroadcastd      m5, xm5
+
+    ;*class_idx = arg_var[av_clip_uintp2(sum_hv * ac >> (BIT_DEPTH - 1), 4)];
+    vpmulld           m0, m14            ; sum_hv * ac
+    vpsrlvd           m0, m0, m5
+    vpminsd           m0, [dd15]
+    movu              m6, [ARG_VAR_SHUFFE]
+    pshufb            m6, m0             ; class_idx
+
+    vpbroadcastd     m10, [dw5]
+
+    ; if (hvd1 * 2 > 9 * hvd0)
+    ;   *class_idx += ((dir1 << 1) + 2) * 5;
+    ; else if (hvd1 > 2 * hvd0)
+    ;   *class_idx += ((dir1 << 1) + 1) * 5;
+    paddd             m7,  m3, m3
+    pcmpgtd           m7,  m2, m7        ; hvd1 > 2 * hvd0
+    pand              m7, m10
+    paddd             m6,  m7            ; class_idx
+
+    paddd             m8, m2, m2
+    vpslld            m9, m3, 3
+    paddd             m9, m3
+    pcmpgtd           m8, m9             ; hvd1 * 2 > 9 * hvd0
+    pand              m8, m10
+    paddd             m6, m8             ; class_idx
+
+    pandn             m1, m7
+    paddd             m1, m1             ; dir1 << 1
+    paddd             m6, m1             ; class_idx
+    vpermq            m6, m6, 11011000b
+
+    SAVE_CLASSIFY_PARAM class_idx, 6
+%endmacro
+
+%macro ALF_CLASSIFY_16x8 0
+%define SAVE_CLASSIFY_PARAM SAVE_CLASSIFY_PARAM_W16
+    ALF_CLASSIFY_H8
+%undef SAVE_CLASSIFY_PARAM
+%endmacro
+
+%macro ALF_CLASSIFY_Wx8 0
+%define SAVE_CLASSIFY_PARAM SAVE_CLASSIFY_PARAM_W
+    ALF_CLASSIFY_H8
+%undef SAVE_CLASSIFY_PARAM
+%endmacro
+
+; ******************************
+;void ff_vvc_alf_classify(int *class_idx, int *transpose_idx, const int *gradient_sum,
+;      intptr_t width, intptr_t height, intptr_t vb_pos, int *gradient_tmp, intptr_t bit_depth);
+; ******************************
+%macro ALF_CLASSIFY 1
+%define ps (%1 / 8)
+ALF_CLASSIFY_GRAD %1
+cglobal vvc_alf_classify_%1bpc, 7, 15, 16, class_idx, transpose_idx, gradient_sum, width, height, vb_pos, bit_depth, \
+    x, y, grad, sum_stride, sum_stride3, temp, w
+
+    sub       bit_depthq, 1
+
+    ; now we can use gradient to get class idx and transpose idx
+    lea      sum_strideq, [widthd + ALF_GRADIENT_BORDER * 2]
+    add      sum_strideq, 15
+    and      sum_strideq, ~15               ; align to 16
+    add      sum_strideq, sum_strideq       ; two rows a time
+
+    add    gradient_sumq, 8                 ; first 4 words are garbage
+
+    lea     sum_stride3q, [3 * sum_strideq]
+
+    xor               yd, yd
+    and          vb_posd, ~7                ; floor align to 8
+    pxor             m15, m15
+
+.loop_sum_h:
+    xor               xd,  xd
+    .loop_sum_w16:
+        lea           wd, [widthd]
+        sub           wd, xd
+        cmp           wd, 16
+        jl .loop_sum_w16_end
+
+        ALF_CLASSIFY_16x8
+
+        add           xd, 16
+        jmp .loop_sum_w16
+    .loop_sum_w16_end:
+
+    cmp               wd, 0
+    je   .loop_sum_w_end
+
+    ALF_CLASSIFY_Wx8
+
+.loop_sum_w_end:
+    lea    gradient_sumq, [gradient_sumq + 4 * sum_strideq]
+    lea   transpose_idxq, [transpose_idxq + 2 * widthq]
+    lea       class_idxq, [class_idxq + 2 * widthq]
+
+    add               yd, 8
+    cmp               yd, heightd
+    jl        .loop_sum_h
+
+    RET
+%endmacro
+
+%if ARCH_X86_64
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+ALF_FILTER   16
+ALF_FILTER   8
+ALF_CLASSIFY 16
+ALF_CLASSIFY 8
+%endif
+%endif
diff --git a/libavcodec/x86/vvc/vvcdsp_init.c b/libavcodec/x86/vvc/vvcdsp_init.c
index 23a3172c45..0e68971b2c 100644
--- a/libavcodec/x86/vvc/vvcdsp_init.c
+++ b/libavcodec/x86/vvc/vvcdsp_init.c
@@ -30,9 +30,94 @@
 #include "libavcodec/vvc/dsp.h"
 #include "libavcodec/x86/h26x/h2656dsp.h"
 
+#define PUT_PROTOTYPE(name, depth, opt) \
+void ff_vvc_put_ ## name ## _ ## depth ## _##opt(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride, int height, const int8_t *hf, const int8_t *vf, int width);
+
+#define PUT_PROTOTYPES(name, bitd, opt) \
+        PUT_PROTOTYPE(name##2,   bitd, opt) \
+        PUT_PROTOTYPE(name##4,   bitd, opt) \
+        PUT_PROTOTYPE(name##8,   bitd, opt) \
+        PUT_PROTOTYPE(name##12,  bitd, opt) \
+        PUT_PROTOTYPE(name##16,  bitd, opt) \
+        PUT_PROTOTYPE(name##24,  bitd, opt) \
+        PUT_PROTOTYPE(name##32,  bitd, opt) \
+        PUT_PROTOTYPE(name##48,  bitd, opt) \
+        PUT_PROTOTYPE(name##64,  bitd, opt) \
+        PUT_PROTOTYPE(name##128, bitd, opt)
+
+#define PUT_BPC_PROTOTYPES(name, opt) \
+    PUT_PROTOTYPES(name,  8, opt)     \
+    PUT_PROTOTYPES(name, 10, opt)     \
+    PUT_PROTOTYPES(name, 12, opt)
+
+#define PUT_TAP_PROTOTYPES(n, opt) \
+    PUT_BPC_PROTOTYPES(n##tap_h,  opt) \
+    PUT_BPC_PROTOTYPES(n##tap_v,  opt) \
+    PUT_BPC_PROTOTYPES(n##tap_hv, opt)
+
+PUT_BPC_PROTOTYPES(pixels, sse4)
+PUT_BPC_PROTOTYPES(pixels, avx2)
+
+PUT_TAP_PROTOTYPES(4, sse4)
+PUT_TAP_PROTOTYPES(8, sse4)
+PUT_TAP_PROTOTYPES(4, avx2)
+PUT_TAP_PROTOTYPES(8, avx2)
+
+#define bf(fn, bd,  opt) fn##_##bd##_##opt
+#define BF(fn, bpc, opt) fn##_##bpc##bpc_##opt
+
+#define AVG_BPC_PROTOTYPES(bpc, opt)                                                                 \
+void BF(ff_vvc_avg, bpc, opt)(uint8_t *dst, ptrdiff_t dst_stride,                                    \
+    const int16_t *src0, const int16_t *src1, intptr_t width, intptr_t height, intptr_t pixel_max);  \
+void BF(ff_vvc_w_avg, bpc, opt)(uint8_t *dst, ptrdiff_t dst_stride,                                  \
+    const int16_t *src0, const int16_t *src1, intptr_t width, intptr_t height,                       \
+    intptr_t denom, intptr_t w0, intptr_t w1,  intptr_t o0, intptr_t o1, intptr_t pixel_max);
+
+#define AVG_PROTOTYPES(bd, opt)                                                                      \
+void bf(ff_vvc_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride,                                     \
+    const int16_t *src0, const int16_t *src1, int width, int height);                                \
+void bf(ff_vvc_w_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride,                                   \
+    const int16_t *src0, const int16_t *src1, int width, int height,                                 \
+    int denom, int w0, int w1, int o0, int o1);
+
+AVG_BPC_PROTOTYPES( 8, avx2)
+AVG_BPC_PROTOTYPES(16, avx2)
+
+AVG_PROTOTYPES( 8, avx2)
+AVG_PROTOTYPES(10, avx2)
+AVG_PROTOTYPES(12, avx2)
+
+#define ALF_BPC_PROTOTYPES(bpc, opt)                                                                                     \
+void BF(ff_vvc_alf_filter_luma, bpc, opt)(uint8_t *dst, ptrdiff_t dst_stride,                                            \
+    const uint8_t *src, ptrdiff_t src_stride, ptrdiff_t width, ptrdiff_t height,                                         \
+    const int16_t *filter, const int16_t *clip, ptrdiff_t stride, ptrdiff_t vb_pos, ptrdiff_t pixel_max);                \
+void BF(ff_vvc_alf_filter_chroma, bpc, opt)(uint8_t *dst, ptrdiff_t dst_stride,                                          \
+    const uint8_t *src, ptrdiff_t src_stride, ptrdiff_t width, ptrdiff_t height,                                         \
+    const int16_t *filter, const int16_t *clip, ptrdiff_t stride, ptrdiff_t vb_pos, ptrdiff_t pixel_max);                \
+void BF(ff_vvc_alf_classify_grad, bpc, opt)(int *gradient_sum,                                                           \
+    const uint8_t *src, ptrdiff_t src_stride, intptr_t width, intptr_t height, intptr_t vb_pos);                         \
+void BF(ff_vvc_alf_classify, bpc, opt)(int *class_idx, int *transpose_idx, const int *gradient_sum,                      \
+    intptr_t width, intptr_t height, intptr_t vb_pos, intptr_t bit_depth);                                               \
+
+#define ALF_PROTOTYPES(bpc, bd, opt)                                                                                     \
+void bf(ff_vvc_alf_filter_luma, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src, ptrdiff_t src_stride,   \
+    int width, int height, const int16_t *filter, const int16_t *clip, const int vb_pos);                                \
+void bf(ff_vvc_alf_filter_chroma, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src, ptrdiff_t src_stride, \
+    int width, int height, const int16_t *filter, const int16_t *clip, const int vb_pos);                                \
+void bf(ff_vvc_alf_classify, bd, opt)(int *class_idx, int *transpose_idx,                                                \
+    const uint8_t *src, ptrdiff_t src_stride, int width, int height, int vb_pos, int *gradient_tmp);                     \
+
+ALF_BPC_PROTOTYPES(8,  avx2)
+ALF_BPC_PROTOTYPES(16, avx2)
+
+ALF_PROTOTYPES(8,  8,  avx2)
+ALF_PROTOTYPES(16, 10, avx2)
+ALF_PROTOTYPES(16, 12, avx2)
+
 #if ARCH_X86_64
+#if HAVE_SSE4_EXTERNAL
 #define FW_PUT(name, depth, opt) \
-static void ff_vvc_put_ ## name ## _ ## depth ## _##opt(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride, \
+void ff_vvc_put_ ## name ## _ ## depth ## _##opt(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride,        \
                                                  int height, const int8_t *hf, const int8_t *vf, int width)    \
 {                                                                                                              \
     ff_h2656_put_## name ## _ ## depth ## _##opt(dst, 2 * MAX_PB_SIZE, src, srcstride, height, hf, vf, width); \
@@ -68,7 +153,9 @@ static void ff_vvc_put_ ## name ## _ ## depth ## _##opt(int16_t *dst, const uint
 FW_PUT_SSE4( 8)
 FW_PUT_SSE4(10)
 FW_PUT_SSE4(12)
+#endif
 
+#if HAVE_AVX2_EXTERNAL
 #define FW_PUT_TAP_AVX2(n, bitd)        \
     FW_PUT(n ## tap_h32,   bitd, avx2)  \
     FW_PUT(n ## tap_h64,   bitd, avx2)  \
@@ -104,6 +191,51 @@ FW_PUT_AVX2(12)
 FW_PUT_16BPC_AVX2(10)
 FW_PUT_16BPC_AVX2(12)
 
+#define AVG_FUNCS(bpc, bd, opt)                                                                     \
+void bf(ff_vvc_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride,                                    \
+    const int16_t *src0, const int16_t *src1, int width, int height)                                \
+{                                                                                                   \
+    BF(ff_vvc_avg, bpc, opt)(dst, dst_stride, src0, src1, width, height, (1 << bd)  - 1);           \
+}                                                                                                   \
+void bf(ff_vvc_w_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride,                                  \
+    const int16_t *src0, const int16_t *src1, int width, int height,                                \
+    int denom, int w0, int w1, int o0, int o1)                                                      \
+{                                                                                                   \
+    BF(ff_vvc_w_avg, bpc, opt)(dst, dst_stride, src0, src1, width, height,                          \
+        denom, w0, w1, o0, o1, (1 << bd)  - 1);                                                     \
+}
+
+AVG_FUNCS(8,  8,  avx2)
+AVG_FUNCS(16, 10, avx2)
+AVG_FUNCS(16, 12, avx2)
+
+#define ALF_FUNCS(bpc, bd, opt)                                                                                          \
+void bf(ff_vvc_alf_filter_luma, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src, ptrdiff_t src_stride,   \
+    int width, int height, const int16_t *filter, const int16_t *clip, const int vb_pos)                                 \
+{                                                                                                                        \
+    const int param_stride  = (width >> 2) * ALF_NUM_COEFF_LUMA;                                                         \
+    BF(ff_vvc_alf_filter_luma, bpc, opt)(dst, dst_stride, src, src_stride, width, height,                                \
+        filter, clip, param_stride, vb_pos, (1 << bd)  - 1);                                                             \
+}                                                                                                                        \
+void bf(ff_vvc_alf_filter_chroma, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src, ptrdiff_t src_stride, \
+    int width, int height, const int16_t *filter, const int16_t *clip, const int vb_pos)                                 \
+{                                                                                                                        \
+    BF(ff_vvc_alf_filter_chroma, bpc, opt)(dst, dst_stride, src, src_stride, width, height,                              \
+        filter, clip, 0, vb_pos,(1 << bd)  - 1);                                                                         \
+}                                                                                                                        \
+void bf(ff_vvc_alf_classify, bd, opt)(int *class_idx, int *transpose_idx,                                                \
+    const uint8_t *src, ptrdiff_t src_stride, int width, int height, int vb_pos, int *gradient_tmp)                      \
+{                                                                                                                        \
+    BF(ff_vvc_alf_classify_grad, bpc, opt)(gradient_tmp, src, src_stride, width, height, vb_pos);                        \
+    BF(ff_vvc_alf_classify, bpc, opt)(class_idx, transpose_idx, gradient_tmp, width, height, vb_pos, bd);                \
+}                                                                                                                        \
+
+ALF_FUNCS(8,  8,  avx2)
+ALF_FUNCS(16, 10, avx2)
+ALF_FUNCS(16, 12, avx2)
+
+#endif
+
 #define PEL_LINK(dst, C, W, idx1, idx2, name, D, opt)                              \
     dst[C][W][idx1][idx2] = ff_vvc_put_## name ## _ ## D ## _##opt;                \
     dst ## _uni[C][W][idx1][idx2] = ff_h2656_put_uni_ ## name ## _ ## D ## _##opt; \
@@ -169,40 +301,15 @@ FW_PUT_16BPC_AVX2(12)
     MC_TAP_LINKS_16BPC_AVX2(LUMA,   8, bd);                          \
     MC_TAP_LINKS_16BPC_AVX2(CHROMA, 4, bd);
 
-#define bf(fn, bd,  opt) fn##_##bd##_##opt
-#define BF(fn, bpc, opt) fn##_##bpc##bpc_##opt
-
-#define AVG_BPC_FUNC(bpc, opt)                                                                      \
-void BF(ff_vvc_avg, bpc, opt)(uint8_t *dst, ptrdiff_t dst_stride,                                   \
-    const int16_t *src0, const int16_t *src1, intptr_t width, intptr_t height, intptr_t pixel_max); \
-void BF(ff_vvc_w_avg, bpc, opt)(uint8_t *dst, ptrdiff_t dst_stride,                                 \
-    const int16_t *src0, const int16_t *src1, intptr_t width, intptr_t height,                      \
-    intptr_t denom, intptr_t w0, intptr_t w1,  intptr_t o0, intptr_t o1, intptr_t pixel_max);
-
-#define AVG_FUNCS(bpc, bd, opt)                                                                     \
-static void bf(avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride,                                    \
-    const int16_t *src0, const int16_t *src1, int width, int height)                                \
-{                                                                                                   \
-    BF(ff_vvc_avg, bpc, opt)(dst, dst_stride, src0, src1, width, height, (1 << bd)  - 1);           \
-}                                                                                                   \
-static void bf(w_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride,                                  \
-    const int16_t *src0, const int16_t *src1, int width, int height,                                \
-    int denom, int w0, int w1, int o0, int o1)                                                      \
-{                                                                                                   \
-    BF(ff_vvc_w_avg, bpc, opt)(dst, dst_stride, src0, src1, width, height,                          \
-        denom, w0, w1, o0, o1, (1 << bd)  - 1);                                                     \
-}
-
-AVG_BPC_FUNC(8,   avx2)
-AVG_BPC_FUNC(16,  avx2)
-
-AVG_FUNCS(8,  8,  avx2)
-AVG_FUNCS(16, 10, avx2)
-AVG_FUNCS(16, 12, avx2)
+#define AVG_INIT(bd, opt) do {                                       \
+    c->inter.avg    = bf(ff_vvc_avg, bd, opt);                       \
+    c->inter.w_avg  = bf(ff_vvc_w_avg, bd, opt);                     \
+} while (0)
 
-#define AVG_INIT(bd, opt) do {                                          \
-    c->inter.avg    = bf(avg, bd, opt);                                 \
-    c->inter.w_avg  = bf(w_avg, bd, opt);                               \
+#define ALF_INIT(bd) do {                                            \
+    c->alf.filter[LUMA]   = ff_vvc_alf_filter_luma_##bd##_avx2;      \
+    c->alf.filter[CHROMA] = ff_vvc_alf_filter_chroma_##bd##_avx2;    \
+    c->alf.classify       = ff_vvc_alf_classify_##bd##_avx2;         \
 } while (0)
 #endif
 
@@ -211,45 +318,41 @@ void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const int bd)
 #if ARCH_X86_64
     const int cpu_flags = av_get_cpu_flags();
 
-    if (bd == 8) {
+    switch (bd) {
+    case 8:
         if (EXTERNAL_SSE4(cpu_flags)) {
             MC_LINK_SSE4(8);
         }
         if (EXTERNAL_AVX2_FAST(cpu_flags)) {
+            ALF_INIT(8);
+            AVG_INIT(8, avx2);
             MC_LINKS_AVX2(8);
         }
-    } else if (bd == 10) {
+        break;
+    case 10:
         if (EXTERNAL_SSE4(cpu_flags)) {
             MC_LINK_SSE4(10);
         }
         if (EXTERNAL_AVX2_FAST(cpu_flags)) {
+            ALF_INIT(10);
+            AVG_INIT(10, avx2);
             MC_LINKS_AVX2(10);
             MC_LINKS_16BPC_AVX2(10);
         }
-    } else if (bd == 12) {
+        break;
+    case 12:
         if (EXTERNAL_SSE4(cpu_flags)) {
             MC_LINK_SSE4(12);
         }
         if (EXTERNAL_AVX2_FAST(cpu_flags)) {
+            ALF_INIT(12);
+            AVG_INIT(12, avx2);
             MC_LINKS_AVX2(12);
             MC_LINKS_16BPC_AVX2(12);
         }
-    }
-
-    if (EXTERNAL_AVX2(cpu_flags)) {
-        switch (bd) {
-            case 8:
-                AVG_INIT(8, avx2);
-                break;
-            case 10:
-                AVG_INIT(10, avx2);
-                break;
-            case 12:
-                AVG_INIT(12, avx2);
-                break;
-            default:
-                break;
-        }
+        break;
+    default:
+        break;
     }
 #endif
 }
diff --git a/libavcodec/xbmdec.c b/libavcodec/xbmdec.c
index 2d84327e02..09096b9df6 100644
--- a/libavcodec/xbmdec.c
+++ b/libavcodec/xbmdec.c
@@ -135,9 +135,6 @@ static int xbm_decode_frame(AVCodecContext *avctx, AVFrame *p,
         }
     }
 
-    p->flags |= AV_FRAME_FLAG_KEY;
-    p->pict_type = AV_PICTURE_TYPE_I;
-
     *got_frame       = 1;
 
     return avpkt->size;
diff --git a/libavcodec/xl.c b/libavcodec/xl.c
index f008d56e89..6d60611ad3 100644
--- a/libavcodec/xl.c
+++ b/libavcodec/xl.c
@@ -59,8 +59,6 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p,
 
     if ((ret = ff_get_buffer(avctx, p, 0)) < 0)
         return ret;
-    p->pict_type = AV_PICTURE_TYPE_I;
-    p->flags |= AV_FRAME_FLAG_KEY;
 
     Y = p->data[0];
     U = p->data[1];
diff --git a/libavcodec/xpmdec.c b/libavcodec/xpmdec.c
index 6db9f5248e..2eb5daf545 100644
--- a/libavcodec/xpmdec.c
+++ b/libavcodec/xpmdec.c
@@ -429,9 +429,6 @@ static int xpm_decode_frame(AVCodecContext *avctx, AVFrame *p,
         ptr += mod_strcspn(ptr, ",") + 1;
     }
 
-    p->flags |= AV_FRAME_FLAG_KEY;
-    p->pict_type = AV_PICTURE_TYPE_I;
-
     *got_frame = 1;
 
     return avpkt->size;
diff --git a/libavcodec/xwddec.c b/libavcodec/xwddec.c
index f691587be9..fac8c21af8 100644
--- a/libavcodec/xwddec.c
+++ b/libavcodec/xwddec.c
@@ -216,9 +216,6 @@ static int xwd_decode_frame(AVCodecContext *avctx, AVFrame *p,
     if ((ret = ff_get_buffer(avctx, p, 0)) < 0)
         return ret;
 
-    p->flags |= AV_FRAME_FLAG_KEY;
-    p->pict_type = AV_PICTURE_TYPE_I;
-
     if (avctx->pix_fmt == AV_PIX_FMT_PAL8) {
         uint32_t *dst = (uint32_t *)p->data[1];
         uint8_t red, green, blue;
diff --git a/libavcodec/y41pdec.c b/libavcodec/y41pdec.c
index 14e36dc998..270b23c14f 100644
--- a/libavcodec/y41pdec.c
+++ b/libavcodec/y41pdec.c
@@ -51,9 +51,6 @@ static int y41p_decode_frame(AVCodecContext *avctx, AVFrame *pic,
     if ((ret = ff_get_buffer(avctx, pic, 0)) < 0)
         return ret;
 
-    pic->flags |= AV_FRAME_FLAG_KEY;
-    pic->pict_type = AV_PICTURE_TYPE_I;
-
     for (i = avctx->height - 1; i >= 0 ; i--) {
         y = &pic->data[0][i * pic->linesize[0]];
         u = &pic->data[1][i * pic->linesize[1]];
diff --git a/libavcodec/ylc.c b/libavcodec/ylc.c
index b03df31556..44cbfa712a 100644
--- a/libavcodec/ylc.c
+++ b/libavcodec/ylc.c
@@ -426,8 +426,6 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p,
         dst += p->linesize[0];
     }
 
-    p->pict_type = AV_PICTURE_TYPE_I;
-    p->flags |= AV_FRAME_FLAG_KEY;
     *got_frame   = 1;
 
     return avpkt->size;
diff --git a/libavcodec/yop.c b/libavcodec/yop.c
index 1294c5cc00..77b8d6e055 100644
--- a/libavcodec/yop.c
+++ b/libavcodec/yop.c
@@ -274,6 +274,7 @@ const FFCodec ff_yop_decoder = {
     CODEC_LONG_NAME("Psygnosis YOP Video"),
     .p.type         = AVMEDIA_TYPE_VIDEO,
     .p.id           = AV_CODEC_ID_YOP,
+    .p.capabilities = AV_CODEC_CAP_DR1,
     .priv_data_size = sizeof(YopDecContext),
     .init           = yop_decode_init,
     .close          = yop_decode_close,
diff --git a/libavcodec/yuv4dec.c b/libavcodec/yuv4dec.c
index ad83a2125c..3b406057f9 100644
--- a/libavcodec/yuv4dec.c
+++ b/libavcodec/yuv4dec.c
@@ -46,9 +46,6 @@ static int yuv4_decode_frame(AVCodecContext *avctx, AVFrame *pic,
     if ((ret = ff_get_buffer(avctx, pic, 0)) < 0)
         return ret;
 
-    pic->flags |= AV_FRAME_FLAG_KEY;
-    pic->pict_type = AV_PICTURE_TYPE_I;
-
     y = pic->data[0];
     u = pic->data[1];
     v = pic->data[2];
diff --git a/libavdevice/v4l2.c b/libavdevice/v4l2.c
index 3706582bc6..74f43ef6a9 100644
--- a/libavdevice/v4l2.c
+++ b/libavdevice/v4l2.c
@@ -108,10 +108,10 @@ struct video_data {
     int (*open_f)(const char *file, int oflag, ...);
     int (*close_f)(int fd);
     int (*dup_f)(int fd);
-#ifdef __GLIBC__
-    int (*ioctl_f)(int fd, unsigned long int request, ...);
-#else
+#if defined(__sun) || defined(__BIONIC__) || defined(__musl__) /* POSIX-like */
     int (*ioctl_f)(int fd, int request, ...);
+#else
+    int (*ioctl_f)(int fd, unsigned long int request, ...);
 #endif
     ssize_t (*read_f)(int fd, void *buffer, size_t n);
     void *(*mmap_f)(void *start, size_t length, int prot, int flags, int fd, int64_t offset);
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 994d9773ba..5992fd161f 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -167,6 +167,7 @@ OBJS-$(CONFIG_STEREOTOOLS_FILTER)            += af_stereotools.o
 OBJS-$(CONFIG_STEREOWIDEN_FILTER)            += af_stereowiden.o
 OBJS-$(CONFIG_SUPEREQUALIZER_FILTER)         += af_superequalizer.o
 OBJS-$(CONFIG_SURROUND_FILTER)               += af_surround.o
+OBJS-$(CONFIG_TILTSHELF_FILTER)              += af_biquads.o
 OBJS-$(CONFIG_TREBLE_FILTER)                 += af_biquads.o
 OBJS-$(CONFIG_TREMOLO_FILTER)                += af_tremolo.o
 OBJS-$(CONFIG_VIBRATO_FILTER)                += af_vibrato.o generate_wave_table.o
@@ -239,7 +240,7 @@ OBJS-$(CONFIG_COLORKEY_OPENCL_FILTER)        += vf_colorkey_opencl.o opencl.o \
                                                 opencl/colorkey.o
 OBJS-$(CONFIG_COLORHOLD_FILTER)              += vf_colorkey.o
 OBJS-$(CONFIG_COLORLEVELS_FILTER)            += vf_colorlevels.o
-OBJS-$(CONFIG_COLORMAP_FILTER)               += vf_colormap.o
+OBJS-$(CONFIG_COLORMAP_FILTER)               += vf_colormap.o framesync.o
 OBJS-$(CONFIG_COLORMATRIX_FILTER)            += vf_colormatrix.o
 OBJS-$(CONFIG_COLORSPACE_FILTER)             += vf_colorspace.o colorspacedsp.o
 OBJS-$(CONFIG_COLORSPACE_CUDA_FILTER)        += vf_colorspace_cuda.o \
@@ -330,7 +331,7 @@ OBJS-$(CONFIG_GRADFUN_FILTER)                += vf_gradfun.o
 OBJS-$(CONFIG_GRAPHMONITOR_FILTER)           += f_graphmonitor.o
 OBJS-$(CONFIG_GRAYWORLD_FILTER)              += vf_grayworld.o
 OBJS-$(CONFIG_GREYEDGE_FILTER)               += vf_colorconstancy.o
-OBJS-$(CONFIG_GUIDED_FILTER)                 += vf_guided.o
+OBJS-$(CONFIG_GUIDED_FILTER)                 += vf_guided.o framesync.o
 OBJS-$(CONFIG_HALDCLUT_FILTER)               += vf_lut3d.o framesync.o
 OBJS-$(CONFIG_HFLIP_FILTER)                  += vf_hflip.o
 OBJS-$(CONFIG_HFLIP_VULKAN_FILTER)           += vf_flip_vulkan.o vulkan.o
@@ -350,7 +351,7 @@ OBJS-$(CONFIG_HWUPLOAD_FILTER)               += vf_hwupload.o
 OBJS-$(CONFIG_HYSTERESIS_FILTER)             += vf_hysteresis.o framesync.o
 OBJS-$(CONFIG_ICCDETECT_FILTER)              += vf_iccdetect.o fflcms2.o
 OBJS-$(CONFIG_ICCGEN_FILTER)                 += vf_iccgen.o fflcms2.o
-OBJS-$(CONFIG_IDENTITY_FILTER)               += vf_identity.o
+OBJS-$(CONFIG_IDENTITY_FILTER)               += vf_identity.o framesync.o
 OBJS-$(CONFIG_IDET_FILTER)                   += vf_idet.o
 OBJS-$(CONFIG_IL_FILTER)                     += vf_il.o
 OBJS-$(CONFIG_INFLATE_FILTER)                += vf_neighbor.o
@@ -390,9 +391,9 @@ OBJS-$(CONFIG_MIDEQUALIZER_FILTER)           += vf_midequalizer.o framesync.o
 OBJS-$(CONFIG_MINTERPOLATE_FILTER)           += vf_minterpolate.o motion_estimation.o
 OBJS-$(CONFIG_MIX_FILTER)                    += vf_mix.o framesync.o
 OBJS-$(CONFIG_MONOCHROME_FILTER)             += vf_monochrome.o
-OBJS-$(CONFIG_MORPHO_FILTER)                 += vf_morpho.o
+OBJS-$(CONFIG_MORPHO_FILTER)                 += vf_morpho.o framesync.o
 OBJS-$(CONFIG_MPDECIMATE_FILTER)             += vf_mpdecimate.o
-OBJS-$(CONFIG_MULTIPLY_FILTER)               += vf_multiply.o
+OBJS-$(CONFIG_MULTIPLY_FILTER)               += vf_multiply.o framesync.o
 OBJS-$(CONFIG_NEGATE_FILTER)                 += vf_negate.o
 OBJS-$(CONFIG_NLMEANS_FILTER)                += vf_nlmeans.o
 OBJS-$(CONFIG_NLMEANS_OPENCL_FILTER)         += vf_nlmeans_opencl.o opencl.o opencl/nlmeans.o
@@ -455,7 +456,7 @@ OBJS-$(CONFIG_ROBERTS_OPENCL_FILTER)         += vf_convolution_opencl.o opencl.o
                                                 opencl/convolution.o
 OBJS-$(CONFIG_ROTATE_FILTER)                 += vf_rotate.o
 OBJS-$(CONFIG_SAB_FILTER)                    += vf_sab.o
-OBJS-$(CONFIG_SCALE_FILTER)                  += vf_scale.o scale_eval.o
+OBJS-$(CONFIG_SCALE_FILTER)                  += vf_scale.o scale_eval.o framesync.o
 OBJS-$(CONFIG_SCALE_CUDA_FILTER)             += vf_scale_cuda.o scale_eval.o \
                                                 vf_scale_cuda.ptx.o cuda/load_helper.o
 OBJS-$(CONFIG_SCALE_NPP_FILTER)              += vf_scale_npp.o scale_eval.o
@@ -463,7 +464,7 @@ OBJS-$(CONFIG_SCALE_QSV_FILTER)              += vf_vpp_qsv.o
 OBJS-$(CONFIG_SCALE_VAAPI_FILTER)            += vf_scale_vaapi.o scale_eval.o vaapi_vpp.o
 OBJS-$(CONFIG_SCALE_VT_FILTER)               += vf_scale_vt.o scale_eval.o
 OBJS-$(CONFIG_SCALE_VULKAN_FILTER)           += vf_scale_vulkan.o vulkan.o vulkan_filter.o
-OBJS-$(CONFIG_SCALE2REF_FILTER)              += vf_scale.o scale_eval.o
+OBJS-$(CONFIG_SCALE2REF_FILTER)              += vf_scale.o scale_eval.o framesync.o
 OBJS-$(CONFIG_SCALE2REF_NPP_FILTER)          += vf_scale_npp.o scale_eval.o
 OBJS-$(CONFIG_SCDET_FILTER)                  += vf_scdet.o
 OBJS-$(CONFIG_SCHARR_FILTER)                 += vf_convolution.o
@@ -581,6 +582,8 @@ OBJS-$(CONFIG_XSTACK_VAAPI_FILTER)           += vf_stack_vaapi.o framesync.o vaa
 OBJS-$(CONFIG_HSTACK_QSV_FILTER)             += vf_stack_qsv.o framesync.o
 OBJS-$(CONFIG_VSTACK_QSV_FILTER)             += vf_stack_qsv.o framesync.o
 OBJS-$(CONFIG_XSTACK_QSV_FILTER)             += vf_stack_qsv.o framesync.o
+OBJS-$(CONFIG_PAD_VAAPI_FILTER)              += vf_pad_vaapi.o vaapi_vpp.o
+OBJS-$(CONFIG_DRAWBOX_VAAPI_FILTER)          += vf_drawbox_vaapi.o vaapi_vpp.o
 
 OBJS-$(CONFIG_ALLRGB_FILTER)                 += vsrc_testsrc.o
 OBJS-$(CONFIG_ALLYUV_FILTER)                 += vsrc_testsrc.o
diff --git a/libavfilter/af_channelsplit.c b/libavfilter/af_channelsplit.c
index d18d91dcb6..1c4e815c6e 100644
--- a/libavfilter/af_channelsplit.c
+++ b/libavfilter/af_channelsplit.c
@@ -163,8 +163,10 @@ static int filter_frame(AVFilterLink *outlink, AVFrame *buf)
 
     buf_out->data[0] = buf_out->extended_data[0] = buf_out->extended_data[s->map[i]];
     ret = av_channel_layout_from_mask(&buf_out->ch_layout, 1ULL << channel);
-    if (ret < 0)
+    if (ret < 0) {
+        av_frame_free(&buf_out);
         return ret;
+    }
 
     return ff_filter_frame(ctx->outputs[i], buf_out);
 }
diff --git a/libavfilter/af_volume.c b/libavfilter/af_volume.c
index 5a3c9220f4..b3dd57c5e5 100644
--- a/libavfilter/af_volume.c
+++ b/libavfilter/af_volume.c
@@ -127,7 +127,6 @@ static av_cold void uninit(AVFilterContext *ctx)
 {
     VolumeContext *vol = ctx->priv;
     av_expr_free(vol->volume_pexpr);
-    av_opt_free(vol);
     av_freep(&vol->fdsp);
 }
 
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 149bf50997..c532682fc2 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -546,6 +546,8 @@ extern const AVFilter ff_vf_xstack_vaapi;
 extern const AVFilter ff_vf_hstack_qsv;
 extern const AVFilter ff_vf_vstack_qsv;
 extern const AVFilter ff_vf_xstack_qsv;
+extern const AVFilter ff_vf_pad_vaapi;
+extern const AVFilter ff_vf_drawbox_vaapi;
 
 extern const AVFilter ff_vsrc_allrgb;
 extern const AVFilter ff_vsrc_allyuv;
diff --git a/libavfilter/avfilter.c b/libavfilter/avfilter.c
index 7f94e71fbc..2dc8820184 100644
--- a/libavfilter/avfilter.c
+++ b/libavfilter/avfilter.c
@@ -652,7 +652,8 @@ static const AVOption avfilter_options[] = {
         { "slice", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AVFILTER_THREAD_SLICE }, .flags = FLAGS, .unit = "thread_type" },
     { "enable", "set enable expression", OFFSET(enable_str), AV_OPT_TYPE_STRING, {.str=NULL}, .flags = TFLAGS },
     { "threads", "Allowed number of threads", OFFSET(nb_threads), AV_OPT_TYPE_INT,
-        { .i64 = 0 }, 0, INT_MAX, FLAGS },
+        { .i64 = 0 }, 0, INT_MAX, FLAGS, .unit = "threads" },
+        {"auto", "autodetect a suitable number of threads to use", 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, .flags = FLAGS, .unit = "threads"},
     { "extra_hw_frames", "Number of extra hardware frames to allocate for the user",
         OFFSET(extra_hw_frames), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, FLAGS },
     { NULL },
@@ -940,7 +941,7 @@ int avfilter_init_dict(AVFilterContext *ctx, AVDictionary **options)
 int avfilter_init_str(AVFilterContext *filter, const char *args)
 {
     AVDictionary *options = NULL;
-    AVDictionaryEntry *e;
+    const AVDictionaryEntry *e;
     int ret = 0;
 
     if (args && *args) {
@@ -953,7 +954,7 @@ int avfilter_init_str(AVFilterContext *filter, const char *args)
     if (ret < 0)
         goto fail;
 
-    if ((e = av_dict_get(options, "", NULL, AV_DICT_IGNORE_SUFFIX))) {
+    if ((e = av_dict_iterate(options, NULL))) {
         av_log(filter, AV_LOG_ERROR, "No such option: %s.\n", e->key);
         ret = AVERROR_OPTION_NOT_FOUND;
         goto fail;
diff --git a/libavfilter/avfiltergraph.c b/libavfilter/avfiltergraph.c
index 0c569eb218..bd3bed9a35 100644
--- a/libavfilter/avfiltergraph.c
+++ b/libavfilter/avfiltergraph.c
@@ -1410,7 +1410,7 @@ int avfilter_graph_request_oldest(AVFilterGraph *graph)
         if (r == AVERROR(EAGAIN) &&
             !oldest->frame_wanted_out && !oldesti->frame_blocked_in &&
             !oldesti->status_in)
-            ff_request_frame(oldest);
+            (void)ff_request_frame(oldest);
         else if (r < 0)
             return r;
     }
diff --git a/libavfilter/blend.h b/libavfilter/blend.h
index b046e062bc..e6636839db 100644
--- a/libavfilter/blend.h
+++ b/libavfilter/blend.h
@@ -69,16 +69,22 @@ enum BlendMode {
     BLEND_NB
 };
 
+typedef struct SliceParams {
+    double *values;
+    int starty;
+    AVExpr *e;
+} SliceParams;
+
 typedef struct FilterParams {
     enum BlendMode mode;
     double opacity;
-    AVExpr *e;
+    AVExpr **e;
     char *expr_str;
     void (*blend)(const uint8_t *top, ptrdiff_t top_linesize,
                   const uint8_t *bottom, ptrdiff_t bottom_linesize,
                   uint8_t *dst, ptrdiff_t dst_linesize,
                   ptrdiff_t width, ptrdiff_t height,
-                  struct FilterParams *param, double *values, int starty);
+                  struct FilterParams *param, SliceParams *sliceparam);
 } FilterParams;
 
 void ff_blend_init_x86(FilterParams *param, int depth);
diff --git a/libavfilter/blend_modes.c b/libavfilter/blend_modes.c
index 65c5e6f890..def5ae8e0d 100644
--- a/libavfilter/blend_modes.c
+++ b/libavfilter/blend_modes.c
@@ -91,7 +91,7 @@ static void fn0(NAME)(const uint8_t *_top, ptrdiff_t top_linesize, \
      const uint8_t *_bottom, ptrdiff_t bottom_linesize,       \
      uint8_t *_dst, ptrdiff_t dst_linesize,                   \
      ptrdiff_t width, ptrdiff_t height,                       \
-     FilterParams *param, double *values, int starty)         \
+     FilterParams *param, SliceParams *sliceparam)            \
 {                                                                                   \
     const PIXEL *top = (const PIXEL *)_top;                                         \
     const PIXEL *bottom = (const PIXEL *)_bottom;                                   \
diff --git a/libavfilter/buffersrc.c b/libavfilter/buffersrc.c
index bfe459c8aa..a61960310b 100644
--- a/libavfilter/buffersrc.c
+++ b/libavfilter/buffersrc.c
@@ -215,7 +215,7 @@ int attribute_align_arg av_buffersrc_add_frame_flags(AVFilterContext *ctx, AVFra
             break;
         case AVMEDIA_TYPE_AUDIO:
             /* For layouts unknown on input but known on link after negotiation. */
-            if (frame->ch_layout.order == AV_CHANNEL_ORDER_UNSPEC) {
+            if (frame->ch_layout.order == AV_CHANNEL_ORDER_UNSPEC && frame->ch_layout.nb_channels == s->ch_layout.nb_channels) {
                 ret = av_channel_layout_copy(&frame->ch_layout, &s->ch_layout);
                 if (ret < 0)
                     return ret;
diff --git a/libavfilter/convolution.h b/libavfilter/convolution.h
index ee7477ef89..1196c1fcdf 100644
--- a/libavfilter/convolution.h
+++ b/libavfilter/convolution.h
@@ -119,7 +119,7 @@ static void filter16_sobel(uint8_t *dstp, int width,
     }
 }
 
-static av_unused void ff_sobel_init(ConvolutionContext *s, int depth, int nb_planes)
+static inline void ff_sobel_init(ConvolutionContext *s, int depth, int nb_planes)
 {
     for (int i = 0; i < 4; i++) {
         s->filter[i] = filter_sobel;
diff --git a/libavfilter/dnn/dnn_backend_common.h b/libavfilter/dnn/dnn_backend_common.h
index 42c67c7040..9f5d37b3e0 100644
--- a/libavfilter/dnn/dnn_backend_common.h
+++ b/libavfilter/dnn/dnn_backend_common.h
@@ -28,9 +28,16 @@
 #include "../dnn_interface.h"
 #include "libavutil/thread.h"
 
-#define DNN_BACKEND_COMMON_OPTIONS \
-    { "nireq",           "number of request",             OFFSET(options.nireq),           AV_OPT_TYPE_INT,    { .i64 = 0 },     0, INT_MAX, FLAGS }, \
-    { "async",           "use DNN async inference",       OFFSET(options.async),           AV_OPT_TYPE_BOOL,   { .i64 = 1 },     0,       1, FLAGS },
+#define DNN_DEFINE_CLASS_EXT(name, desc, options) \
+    {                                           \
+        .class_name = desc,                     \
+        .item_name  = av_default_item_name,     \
+        .option     = options,                  \
+        .version    = LIBAVUTIL_VERSION_INT,    \
+        .category   = AV_CLASS_CATEGORY_FILTER, \
+    }
+#define DNN_DEFINE_CLASS(fname) \
+    DNN_DEFINE_CLASS_EXT(fname, #fname, fname##_options)
 
 // one task for one function call from dnn interface
 typedef struct TaskItem {
diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c
index 374f21b7a1..c4b0682f11 100644
--- a/libavfilter/dnn/dnn_backend_openvino.c
+++ b/libavfilter/dnn/dnn_backend_openvino.c
@@ -40,24 +40,8 @@
 #endif
 #include "dnn_backend_common.h"
 
-typedef struct OVOptions{
-    char *device_type;
-    int nireq;
-    uint8_t async;
-    int batch_size;
-    int input_resizable;
-    DNNLayout layout;
-    float scale;
-    float mean;
-} OVOptions;
-
-typedef struct OVContext {
-    const AVClass *class;
-    OVOptions options;
-} OVContext;
-
 typedef struct OVModel{
-    OVContext ctx;
+    DnnContext *ctx;
     DNNModel *model;
 #if HAVE_OPENVINO2
     ov_core_t *core;
@@ -98,24 +82,20 @@ typedef struct OVRequestItem {
     generated_string = generated_string ? av_asprintf("%s %s", generated_string, iterate_string) : \
                                           av_asprintf("%s", iterate_string);
 
-#define OFFSET(x) offsetof(OVContext, x)
+#define OFFSET(x) offsetof(OVOptions, x)
 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM
 static const AVOption dnn_openvino_options[] = {
-    { "device", "device to run model", OFFSET(options.device_type), AV_OPT_TYPE_STRING, { .str = "CPU" }, 0, 0, FLAGS },
-    DNN_BACKEND_COMMON_OPTIONS
-    { "batch_size",  "batch size per request", OFFSET(options.batch_size),  AV_OPT_TYPE_INT,    { .i64 = 1 },     1, 1000, FLAGS},
-    { "input_resizable", "can input be resizable or not", OFFSET(options.input_resizable), AV_OPT_TYPE_BOOL,   { .i64 = 0 },     0, 1, FLAGS },
-    { "layout", "input layout of model", OFFSET(options.layout), AV_OPT_TYPE_INT, { .i64 = DL_NONE}, DL_NONE, DL_NHWC, FLAGS, .unit = "layout" },
+    { "batch_size",  "batch size per request", OFFSET(batch_size),  AV_OPT_TYPE_INT,    { .i64 = 1 },     1, 1000, FLAGS},
+    { "input_resizable", "can input be resizable or not", OFFSET(input_resizable), AV_OPT_TYPE_BOOL,   { .i64 = 0 },     0, 1, FLAGS },
+    { "layout", "input layout of model", OFFSET(layout), AV_OPT_TYPE_INT, { .i64 = DL_NONE}, DL_NONE, DL_NHWC, FLAGS, .unit = "layout" },
         { "none",  "none", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NONE }, 0, 0, FLAGS, .unit = "layout"},
         { "nchw",  "nchw", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NCHW }, 0, 0, FLAGS, .unit = "layout"},
         { "nhwc",  "nhwc", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NHWC }, 0, 0, FLAGS, .unit = "layout"},
-    { "scale", "Add scale preprocess operation. Divide each element of input by specified value.", OFFSET(options.scale), AV_OPT_TYPE_FLOAT, { .dbl = 0 }, INT_MIN, INT_MAX, FLAGS},
-    { "mean",  "Add mean preprocess operation. Subtract specified value from each element of input.", OFFSET(options.mean),  AV_OPT_TYPE_FLOAT, { .dbl = 0 }, INT_MIN, INT_MAX, FLAGS},
+    { "scale", "Add scale preprocess operation. Divide each element of input by specified value.", OFFSET(scale), AV_OPT_TYPE_FLOAT, { .dbl = 0 }, INT_MIN, INT_MAX, FLAGS},
+    { "mean",  "Add mean preprocess operation. Subtract specified value from each element of input.", OFFSET(mean),  AV_OPT_TYPE_FLOAT, { .dbl = 0 }, INT_MIN, INT_MAX, FLAGS},
     { NULL }
 };
 
-AVFILTER_DEFINE_CLASS(dnn_openvino);
-
 #if HAVE_OPENVINO2
 static const struct {
     ov_status_e status;
@@ -199,7 +179,7 @@ static int fill_model_input_ov(OVModel *ov_model, OVRequestItem *request)
     DNNData input;
     LastLevelTaskItem *lltask;
     TaskItem *task;
-    OVContext *ctx = &ov_model->ctx;
+    DnnContext *ctx = ov_model->ctx;
 #if HAVE_OPENVINO2
     int64_t* dims;
     ov_status_e status;
@@ -292,7 +272,7 @@ static int fill_model_input_ov(OVModel *ov_model, OVRequestItem *request)
     input.scale = 1;
     input.mean = 0;
 
-    for (int i = 0; i < ctx->options.batch_size; ++i) {
+    for (int i = 0; i < ctx->ov_option.batch_size; ++i) {
         lltask = ff_queue_pop_front(ov_model->lltask_queue);
         if (!lltask) {
             break;
@@ -360,7 +340,7 @@ static void infer_completion_callback(void *args)
     OVModel *ov_model = task->model;
     SafeQueue *requestq = ov_model->request_queue;
     DNNData *outputs;
-    OVContext *ctx = &ov_model->ctx;
+    DnnContext *ctx = ov_model->ctx;
 #if HAVE_OPENVINO2
     size_t* dims;
     ov_status_e status;
@@ -410,9 +390,9 @@ static void infer_completion_callback(void *args)
         outputs[i].dims[2]  = output_shape.rank > 1 ? dims[output_shape.rank - 2] : 1;
         outputs[i].dims[3]  = output_shape.rank > 0 ? dims[output_shape.rank - 1] : 1;
         av_assert0(request->lltask_count <= dims[0]);
-        outputs[i].layout   = ctx->options.layout;
-        outputs[i].scale    = ctx->options.scale;
-        outputs[i].mean     = ctx->options.mean;
+        outputs[i].layout   = ctx->ov_option.layout;
+        outputs[i].scale    = ctx->ov_option.scale;
+        outputs[i].mean     = ctx->ov_option.mean;
         ov_shape_free(&output_shape);
         ov_tensor_free(output_tensor);
         output_tensor = NULL;
@@ -452,9 +432,9 @@ static void infer_completion_callback(void *args)
         output.dims[i] = dims.dims[i];
     av_assert0(request->lltask_count <= dims.dims[0]);
     output.dt       = precision_to_datatype(precision);
-    output.layout   = ctx->options.layout;
-    output.scale    = ctx->options.scale;
-    output.mean     = ctx->options.mean;
+    output.layout   = ctx->ov_option.layout;
+    output.scale    = ctx->ov_option.scale;
+    output.mean     = ctx->ov_option.mean;
     outputs = &output;
 #endif
 
@@ -590,7 +570,6 @@ static void dnn_free_model_ov(DNNModel **model)
     av_free(ov_model->all_output_names);
     av_free(ov_model->all_input_names);
 #endif
-    av_opt_free(&ov_model->ctx);
     av_freep(&ov_model);
     av_freep(model);
 }
@@ -599,7 +578,7 @@ static void dnn_free_model_ov(DNNModel **model)
 static int init_model_ov(OVModel *ov_model, const char *input_name, const char **output_names, int nb_outputs)
 {
     int ret = 0;
-    OVContext *ctx = &ov_model->ctx;
+    DnnContext *ctx = ov_model->ctx;
 #if HAVE_OPENVINO2
     ov_status_e status;
     ov_preprocess_input_tensor_info_t* input_tensor_info = NULL;
@@ -610,7 +589,7 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char *
     ov_layout_t* NCHW_layout = NULL;
     const char* NHWC_desc = "NHWC";
     const char* NCHW_desc = "NCHW";
-    const char* device = ctx->options.device_type;
+    const char* device = ctx->device ? ctx->device : "CPU";
 #else
     IEStatusCode status;
     ie_available_devices_t a_dev;
@@ -618,17 +597,17 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char *
     char *all_dev_names = NULL;
 #endif
     // We scale pixel by default when do frame processing.
-    if (fabsf(ctx->options.scale) < 1e-6f)
-        ctx->options.scale = ov_model->model->func_type == DFT_PROCESS_FRAME ? 255 : 1;
+    if (fabsf(ctx->ov_option.scale) < 1e-6f)
+        ctx->ov_option.scale = ov_model->model->func_type == DFT_PROCESS_FRAME ? 255 : 1;
     // batch size
-    if (ctx->options.batch_size <= 0) {
-        ctx->options.batch_size = 1;
+    if (ctx->ov_option.batch_size <= 0) {
+        ctx->ov_option.batch_size = 1;
     }
 #if HAVE_OPENVINO2
-    if (ctx->options.batch_size > 1) {
+    if (ctx->ov_option.batch_size > 1) {
         avpriv_report_missing_feature(ctx, "Do not support batch_size > 1 for now,"
                                            "change batch_size to 1.\n");
-        ctx->options.batch_size = 1;
+        ctx->ov_option.batch_size = 1;
     }
 
     status = ov_preprocess_prepostprocessor_create(ov_model->ov_model, &ov_model->preprocess);
@@ -677,9 +656,9 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char *
         ret = ov2_map_error(status, NULL);
         goto err;
     }
-    if (ctx->options.layout == DL_NCHW)
+    if (ctx->ov_option.layout == DL_NCHW)
         status = ov_preprocess_input_model_info_set_layout(input_model_info, NCHW_layout);
-    else if (ctx->options.layout == DL_NHWC)
+    else if (ctx->ov_option.layout == DL_NHWC)
         status = ov_preprocess_input_model_info_set_layout(input_model_info, NHWC_layout);
     if (status != OK) {
         av_log(ctx, AV_LOG_ERROR, "Failed to get set input model layout\n");
@@ -725,7 +704,7 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char *
         }
         if (ov_model->model->func_type != DFT_PROCESS_FRAME)
             status |= ov_preprocess_output_set_element_type(output_tensor_info, F32);
-        else if (fabsf(ctx->options.scale - 1) > 1e-6f || fabsf(ctx->options.mean) > 1e-6f)
+        else if (fabsf(ctx->ov_option.scale - 1) > 1e-6f || fabsf(ctx->ov_option.mean) > 1e-6f)
             status |= ov_preprocess_output_set_element_type(output_tensor_info, F32);
         else
             status |= ov_preprocess_output_set_element_type(output_tensor_info, U8);
@@ -740,7 +719,7 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char *
         ov_model->output_info = NULL;
     }
     // set preprocess steps.
-    if (fabsf(ctx->options.scale - 1) > 1e-6f || fabsf(ctx->options.mean) > 1e-6f) {
+    if (fabsf(ctx->ov_option.scale - 1) > 1e-6f || fabsf(ctx->ov_option.mean) > 1e-6f) {
         ov_preprocess_preprocess_steps_t* input_process_steps = NULL;
         status = ov_preprocess_input_info_get_preprocess_steps(ov_model->input_info, &input_process_steps);
         if (status != OK) {
@@ -749,8 +728,8 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char *
             goto err;
         }
         status = ov_preprocess_preprocess_steps_convert_element_type(input_process_steps, F32);
-        status |= ov_preprocess_preprocess_steps_mean(input_process_steps, ctx->options.mean);
-        status |= ov_preprocess_preprocess_steps_scale(input_process_steps, ctx->options.scale);
+        status |= ov_preprocess_preprocess_steps_mean(input_process_steps, ctx->ov_option.mean);
+        status |= ov_preprocess_preprocess_steps_scale(input_process_steps, ctx->ov_option.scale);
         if (status != OK) {
             av_log(ctx, AV_LOG_ERROR, "Failed to set preprocess steps\n");
             ov_preprocess_preprocess_steps_free(input_process_steps);
@@ -824,7 +803,7 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char *
     ov_layout_free(NCHW_layout);
     ov_layout_free(NHWC_layout);
 #else
-    if (ctx->options.batch_size > 1) {
+    if (ctx->ov_option.batch_size > 1) {
         input_shapes_t input_shapes;
         status = ie_network_get_input_shapes(ov_model->network, &input_shapes);
         if (status != OK) {
@@ -832,7 +811,7 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char *
             goto err;
         }
         for (int i = 0; i < input_shapes.shape_num; i++)
-            input_shapes.shapes[i].shape.dims[0] = ctx->options.batch_size;
+            input_shapes.shapes[i].shape.dims[0] = ctx->ov_option.batch_size;
         status = ie_network_reshape(ov_model->network, input_shapes);
         ie_network_input_shapes_free(&input_shapes);
         if (status != OK) {
@@ -882,7 +861,7 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char *
         }
     }
 
-    status = ie_core_load_network(ov_model->core, ov_model->network, ctx->options.device_type, &config, &ov_model->exe_network);
+    status = ie_core_load_network(ov_model->core, ov_model->network, ctx->device, &config, &ov_model->exe_network);
     if (status != OK) {
         av_log(ctx, AV_LOG_ERROR, "Failed to load OpenVINO model network\n");
         status = ie_core_get_available_devices(ov_model->core, &a_dev);
@@ -895,15 +874,15 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char *
             APPEND_STRING(all_dev_names, a_dev.devices[i])
         }
         av_log(ctx, AV_LOG_ERROR,"device %s may not be supported, all available devices are: \"%s\"\n",
-               ctx->options.device_type, all_dev_names);
+               ctx->device, all_dev_names);
         ret = AVERROR(ENODEV);
         goto err;
     }
 #endif
     // create infer_requests for async execution
-    if (ctx->options.nireq <= 0) {
+    if (ctx->nireq <= 0) {
         // the default value is a rough estimation
-        ctx->options.nireq = av_cpu_count() / 2 + 1;
+        ctx->nireq = av_cpu_count() / 2 + 1;
     }
 
     ov_model->request_queue = ff_safe_queue_create();
@@ -912,7 +891,7 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char *
         goto err;
     }
 
-    for (int i = 0; i < ctx->options.nireq; i++) {
+    for (int i = 0; i < ctx->nireq; i++) {
         OVRequestItem *item = av_mallocz(sizeof(*item));
         if (!item) {
             ret = AVERROR(ENOMEM);
@@ -945,7 +924,7 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char *
         }
 #endif
 
-        item->lltasks = av_malloc_array(ctx->options.batch_size, sizeof(*item->lltasks));
+        item->lltasks = av_malloc_array(ctx->ov_option.batch_size, sizeof(*item->lltasks));
         if (!item->lltasks) {
             ret = AVERROR(ENOMEM);
             goto err;
@@ -994,7 +973,7 @@ static int execute_model_ov(OVRequestItem *request, Queue *inferenceq)
     LastLevelTaskItem *lltask;
     int ret = 0;
     TaskItem *task;
-    OVContext *ctx;
+    DnnContext *ctx;
     OVModel *ov_model;
 
     if (ff_queue_size(inferenceq) == 0) {
@@ -1010,7 +989,7 @@ static int execute_model_ov(OVRequestItem *request, Queue *inferenceq)
     lltask = ff_queue_peek_front(inferenceq);
     task = lltask->task;
     ov_model = task->model;
-    ctx = &ov_model->ctx;
+    ctx = ov_model->ctx;
 
     ret = fill_model_input_ov(ov_model, request);
     if (ret != 0) {
@@ -1084,8 +1063,8 @@ err:
 static int get_input_ov(void *model, DNNData *input, const char *input_name)
 {
     OVModel *ov_model = model;
-    OVContext *ctx = &ov_model->ctx;
-    int input_resizable = ctx->options.input_resizable;
+    DnnContext *ctx = ov_model->ctx;
+    int input_resizable = ctx->ov_option.input_resizable;
 
 #if HAVE_OPENVINO2
     ov_shape_t input_shape = {0};
@@ -1291,7 +1270,7 @@ static int get_output_ov(void *model, const char *input_name, int input_width, i
 #endif
     int ret;
     OVModel *ov_model = model;
-    OVContext *ctx = &ov_model->ctx;
+    DnnContext *ctx = ov_model->ctx;
     TaskItem task;
     OVRequestItem *request;
     DNNExecBaseParams exec_params = {
@@ -1308,7 +1287,7 @@ static int get_output_ov(void *model, const char *input_name, int input_width, i
     }
 
 #if HAVE_OPENVINO2
-    if (ctx->options.input_resizable) {
+    if (ctx->ov_option.input_resizable) {
         status = ov_partial_shape_create(4, dims, &partial_shape);
         if (status != OK) {
             av_log(ctx, AV_LOG_ERROR, "Failed to create partial shape.\n");
@@ -1339,7 +1318,7 @@ static int get_output_ov(void *model, const char *input_name, int input_width, i
 
     if (!ov_model->compiled_model) {
 #else
-    if (ctx->options.input_resizable) {
+    if (ctx->ov_option.input_resizable) {
         status = ie_network_get_input_shapes(ov_model->network, &input_shapes);
         input_shapes.shapes->shape.dims[2] = input_height;
         input_shapes.shapes->shape.dims[3] = input_width;
@@ -1386,11 +1365,10 @@ err:
     return ret;
 }
 
-static DNNModel *dnn_load_model_ov(const char *model_filename, DNNFunctionType func_type, const char *options, AVFilterContext *filter_ctx)
+static DNNModel *dnn_load_model_ov(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *filter_ctx)
 {
     DNNModel *model = NULL;
     OVModel *ov_model = NULL;
-    OVContext *ctx = NULL;
 #if HAVE_OPENVINO2
     ov_core_t* core = NULL;
     ov_model_t* ovmodel = NULL;
@@ -1411,17 +1389,9 @@ static DNNModel *dnn_load_model_ov(const char *model_filename, DNNFunctionType f
         av_freep(&model);
         return NULL;
     }
+    ov_model->ctx = ctx;
     model->model = ov_model;
     ov_model->model = model;
-    ov_model->ctx.class = &dnn_openvino_class;
-    ctx = &ov_model->ctx;
-
-    //parse options
-    av_opt_set_defaults(ctx);
-    if (av_opt_set_from_string(ctx, options, NULL, "=", "&") < 0) {
-        av_log(ctx, AV_LOG_ERROR, "Failed to parse options \"%s\"\n", options);
-        goto err;
-    }
 
 #if HAVE_OPENVINO2
     status = ov_core_create(&core);
@@ -1430,13 +1400,13 @@ static DNNModel *dnn_load_model_ov(const char *model_filename, DNNFunctionType f
     }
     ov_model->core = core;
 
-    status = ov_core_read_model(core, model_filename, NULL, &ovmodel);
+    status = ov_core_read_model(core, ctx->model_filename, NULL, &ovmodel);
     if (status != OK) {
         ov_version_t ver;
         status = ov_get_openvino_version(&ver);
         av_log(NULL, AV_LOG_ERROR, "Failed to read the network from model file %s,\n"
                                   "Please check if the model version matches the runtime OpenVINO Version:\n",
-                                   model_filename);
+                                   ctx->model_filename);
         if (status == OK) {
             av_log(NULL, AV_LOG_ERROR, "BuildNumber: %s\n", ver.buildNumber);
         }
@@ -1452,13 +1422,13 @@ static DNNModel *dnn_load_model_ov(const char *model_filename, DNNFunctionType f
     if (status != OK)
         goto err;
 
-    status = ie_core_read_network(ov_model->core, model_filename, NULL, &ov_model->network);
+    status = ie_core_read_network(ov_model->core, ctx->model_filename, NULL, &ov_model->network);
     if (status != OK) {
         ie_version_t ver;
         ver = ie_c_api_version();
         av_log(ctx, AV_LOG_ERROR, "Failed to read the network from model file %s,\n"
                                   "Please check if the model version matches the runtime OpenVINO %s\n",
-                                   model_filename, ver.api_version);
+                                   ctx->model_filename, ver.api_version);
         ie_version_free(&ver);
         goto err;
     }
@@ -1496,7 +1466,6 @@ static DNNModel *dnn_load_model_ov(const char *model_filename, DNNFunctionType f
 
     model->get_input = &get_input_ov;
     model->get_output = &get_output_ov;
-    model->options = options;
     model->filter_ctx = filter_ctx;
     model->func_type = func_type;
 
@@ -1510,7 +1479,7 @@ err:
 static int dnn_execute_model_ov(const DNNModel *model, DNNExecBaseParams *exec_params)
 {
     OVModel *ov_model = model->model;
-    OVContext *ctx = &ov_model->ctx;
+    DnnContext *ctx = ov_model->ctx;
     OVRequestItem *request;
     TaskItem *task;
     int ret;
@@ -1539,7 +1508,7 @@ static int dnn_execute_model_ov(const DNNModel *model, DNNExecBaseParams *exec_p
         return AVERROR(ENOMEM);
     }
 
-    ret = ff_dnn_fill_task(task, exec_params, ov_model, ctx->options.async, 1);
+    ret = ff_dnn_fill_task(task, exec_params, ov_model, ctx->async, 1);
     if (ret != 0) {
         av_freep(&task);
         return ret;
@@ -1557,8 +1526,8 @@ static int dnn_execute_model_ov(const DNNModel *model, DNNExecBaseParams *exec_p
         return ret;
     }
 
-    if (ctx->options.async) {
-        while (ff_queue_size(ov_model->lltask_queue) >= ctx->options.batch_size) {
+    if (ctx->async) {
+        while (ff_queue_size(ov_model->lltask_queue) >= ctx->ov_option.batch_size) {
             request = ff_safe_queue_pop_front(ov_model->request_queue);
             if (!request) {
                 av_log(ctx, AV_LOG_ERROR, "unable to get infer request.\n");
@@ -1581,7 +1550,7 @@ static int dnn_execute_model_ov(const DNNModel *model, DNNExecBaseParams *exec_p
             return AVERROR(ENOSYS);
         }
 
-        if (ctx->options.batch_size > 1) {
+        if (ctx->ov_option.batch_size > 1) {
             avpriv_report_missing_feature(ctx, "batch mode for sync execution");
             return AVERROR(ENOSYS);
         }
@@ -1604,7 +1573,7 @@ static DNNAsyncStatusType dnn_get_result_ov(const DNNModel *model, AVFrame **in,
 static int dnn_flush_ov(const DNNModel *model)
 {
     OVModel *ov_model = model->model;
-    OVContext *ctx = &ov_model->ctx;
+    DnnContext *ctx = ov_model->ctx;
     OVRequestItem *request;
 #if HAVE_OPENVINO2
     ov_status_e status;
@@ -1652,6 +1621,7 @@ static int dnn_flush_ov(const DNNModel *model)
 }
 
 const DNNModule ff_dnn_backend_openvino = {
+    .clazz          = DNN_DEFINE_CLASS(dnn_openvino),
     .load_model     = dnn_load_model_ov,
     .execute_model  = dnn_execute_model_ov,
     .get_result     = dnn_get_result_ov,
diff --git a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c
index 2ed17c3c87..d24591b90b 100644
--- a/libavfilter/dnn/dnn_backend_tf.c
+++ b/libavfilter/dnn/dnn_backend_tf.c
@@ -36,19 +36,8 @@
 #include "safe_queue.h"
 #include <tensorflow/c/c_api.h>
 
-typedef struct TFOptions{
-    char *sess_config;
-    uint8_t async;
-    uint32_t nireq;
-} TFOptions;
-
-typedef struct TFContext {
-    const AVClass *class;
-    TFOptions options;
-} TFContext;
-
-typedef struct TFModel{
-    TFContext ctx;
+typedef struct TFModel {
+    DnnContext *ctx;
     DNNModel *model;
     TF_Graph *graph;
     TF_Session *session;
@@ -76,15 +65,13 @@ typedef struct TFRequestItem {
     DNNAsyncExecModule exec_module;
 } TFRequestItem;
 
-#define OFFSET(x) offsetof(TFContext, x)
+#define OFFSET(x) offsetof(TFOptions, x)
 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM
 static const AVOption dnn_tensorflow_options[] = {
-    { "sess_config", "config for SessionOptions", OFFSET(options.sess_config), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },
-    DNN_BACKEND_COMMON_OPTIONS
+    { "sess_config", "config for SessionOptions", OFFSET(sess_config), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },
     { NULL }
 };
 
-AVFILTER_DEFINE_CLASS(dnn_tensorflow);
 
 static int execute_model_tf(TFRequestItem *request, Queue *lltask_queue);
 static void infer_completion_callback(void *args);
@@ -160,7 +147,7 @@ static int tf_start_inference(void *args)
     TFModel *tf_model = task->model;
 
     if (!request) {
-        av_log(&tf_model->ctx, AV_LOG_ERROR, "TFRequestItem is NULL\n");
+        av_log(tf_model->ctx, AV_LOG_ERROR, "TFRequestItem is NULL\n");
         return AVERROR(EINVAL);
     }
 
@@ -170,7 +157,7 @@ static int tf_start_inference(void *args)
                   task->nb_output, NULL, 0, NULL,
                   request->status);
     if (TF_GetCode(request->status) != TF_OK) {
-        av_log(&tf_model->ctx, AV_LOG_ERROR, "%s", TF_Message(request->status));
+        av_log(tf_model->ctx, AV_LOG_ERROR, "%s", TF_Message(request->status));
         return DNN_GENERIC_ERROR;
     }
     return 0;
@@ -198,7 +185,7 @@ static inline void destroy_request_item(TFRequestItem **arg) {
 static int extract_lltask_from_task(TaskItem *task, Queue *lltask_queue)
 {
     TFModel *tf_model = task->model;
-    TFContext *ctx = &tf_model->ctx;
+    DnnContext *ctx = tf_model->ctx;
     LastLevelTaskItem *lltask = av_malloc(sizeof(*lltask));
     if (!lltask) {
         av_log(ctx, AV_LOG_ERROR, "Unable to allocate space for LastLevelTaskItem\n");
@@ -278,7 +265,7 @@ static TF_Tensor *allocate_input_tensor(const DNNData *input)
 static int get_input_tf(void *model, DNNData *input, const char *input_name)
 {
     TFModel *tf_model = model;
-    TFContext *ctx = &tf_model->ctx;
+    DnnContext *ctx = tf_model->ctx;
     TF_Status *status;
     TF_DataType dt;
     int64_t dims[4];
@@ -328,7 +315,7 @@ static int get_output_tf(void *model, const char *input_name, int input_width, i
 {
     int ret;
     TFModel *tf_model = model;
-    TFContext *ctx = &tf_model->ctx;
+    DnnContext *ctx = tf_model->ctx;
     TaskItem task;
     TFRequestItem *request;
     DNNExecBaseParams exec_params = {
@@ -399,7 +386,7 @@ static int hex_to_data(uint8_t *data, const char *p)
 
 static int load_tf_model(TFModel *tf_model, const char *model_filename)
 {
-    TFContext *ctx = &tf_model->ctx;
+    DnnContext *ctx = tf_model->ctx;
     TF_Buffer *graph_def;
     TF_ImportGraphDefOptions *graph_opts;
     TF_SessionOptions *sess_opts;
@@ -408,7 +395,7 @@ static int load_tf_model(TFModel *tf_model, const char *model_filename)
     int sess_config_length = 0;
 
     // prepare the sess config data
-    if (tf_model->ctx.options.sess_config != NULL) {
+    if (ctx->tf_option.sess_config != NULL) {
         const char *config;
         /*
         tf_model->ctx.options.sess_config is hex to present the serialized proto
@@ -416,11 +403,11 @@ static int load_tf_model(TFModel *tf_model, const char *model_filename)
         proto in a python script, tools/python/tf_sess_config.py is a script example
         to generate the configs of sess_config.
         */
-        if (strncmp(tf_model->ctx.options.sess_config, "0x", 2) != 0) {
+        if (strncmp(ctx->tf_option.sess_config, "0x", 2) != 0) {
             av_log(ctx, AV_LOG_ERROR, "sess_config should start with '0x'\n");
             return AVERROR(EINVAL);
         }
-        config = tf_model->ctx.options.sess_config + 2;
+        config = ctx->tf_option.sess_config + 2;
         sess_config_length = hex_to_data(NULL, config);
 
         sess_config = av_mallocz(sess_config_length + AV_INPUT_BUFFER_PADDING_SIZE);
@@ -461,7 +448,7 @@ static int load_tf_model(TFModel *tf_model, const char *model_filename)
         if (TF_GetCode(tf_model->status) != TF_OK) {
             TF_DeleteSessionOptions(sess_opts);
             av_log(ctx, AV_LOG_ERROR, "Failed to set config for sess options with %s\n",
-                                      tf_model->ctx.options.sess_config);
+                                      ctx->tf_option.sess_config);
             return DNN_GENERIC_ERROR;
         }
     }
@@ -529,15 +516,14 @@ static void dnn_free_model_tf(DNNModel **model)
             TF_DeleteStatus(tf_model->status);
         }
         av_freep(&tf_model);
-        av_freep(model);
+        av_freep(&model);
     }
 }
 
-static DNNModel *dnn_load_model_tf(const char *model_filename, DNNFunctionType func_type, const char *options, AVFilterContext *filter_ctx)
+static DNNModel *dnn_load_model_tf(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *filter_ctx)
 {
     DNNModel *model = NULL;
     TFModel *tf_model = NULL;
-    TFContext *ctx = NULL;
 
     model = av_mallocz(sizeof(DNNModel));
     if (!model){
@@ -551,23 +537,15 @@ static DNNModel *dnn_load_model_tf(const char *model_filename, DNNFunctionType f
     }
     model->model = tf_model;
     tf_model->model = model;
-    ctx = &tf_model->ctx;
-    ctx->class = &dnn_tensorflow_class;
-
-    //parse options
-    av_opt_set_defaults(ctx);
-    if (av_opt_set_from_string(ctx, options, NULL, "=", "&") < 0) {
-        av_log(ctx, AV_LOG_ERROR, "Failed to parse options \"%s\"\n", options);
-        goto err;
-    }
+    tf_model->ctx = ctx;
 
-    if (load_tf_model(tf_model, model_filename) != 0){
-        av_log(ctx, AV_LOG_ERROR, "Failed to load TensorFlow model: \"%s\"\n", model_filename);
+    if (load_tf_model(tf_model, ctx->model_filename) != 0){
+        av_log(ctx, AV_LOG_ERROR, "Failed to load TensorFlow model: \"%s\"\n", ctx->model_filename);
         goto err;
     }
 
-    if (ctx->options.nireq <= 0) {
-        ctx->options.nireq = av_cpu_count() / 2 + 1;
+    if (ctx->nireq <= 0) {
+        ctx->nireq = av_cpu_count() / 2 + 1;
     }
 
 #if !HAVE_PTHREAD_CANCEL
@@ -582,7 +560,7 @@ static DNNModel *dnn_load_model_tf(const char *model_filename, DNNFunctionType f
         goto err;
     }
 
-    for (int i = 0; i < ctx->options.nireq; i++) {
+    for (int i = 0; i < ctx->nireq; i++) {
         TFRequestItem *item = av_mallocz(sizeof(*item));
         if (!item) {
             goto err;
@@ -617,7 +595,6 @@ static DNNModel *dnn_load_model_tf(const char *model_filename, DNNFunctionType f
 
     model->get_input = &get_input_tf;
     model->get_output = &get_output_tf;
-    model->options = options;
     model->filter_ctx = filter_ctx;
     model->func_type = func_type;
 
@@ -632,7 +609,7 @@ static int fill_model_input_tf(TFModel *tf_model, TFRequestItem *request) {
     LastLevelTaskItem *lltask;
     TaskItem *task;
     TFInferRequest *infer_request = NULL;
-    TFContext *ctx = &tf_model->ctx;
+    DnnContext *ctx = tf_model->ctx;
     int ret = 0;
 
     lltask = ff_queue_pop_front(tf_model->lltask_queue);
@@ -728,7 +705,7 @@ static void infer_completion_callback(void *args) {
     DNNData *outputs;
     TFInferRequest *infer_request = request->infer_request;
     TFModel *tf_model = task->model;
-    TFContext *ctx = &tf_model->ctx;
+    DnnContext *ctx = tf_model->ctx;
 
     outputs = av_calloc(task->nb_output, sizeof(*outputs));
     if (!outputs) {
@@ -787,7 +764,7 @@ err:
 static int execute_model_tf(TFRequestItem *request, Queue *lltask_queue)
 {
     TFModel *tf_model;
-    TFContext *ctx;
+    DnnContext *ctx;
     LastLevelTaskItem *lltask;
     TaskItem *task;
     int ret = 0;
@@ -800,7 +777,7 @@ static int execute_model_tf(TFRequestItem *request, Queue *lltask_queue)
     lltask = ff_queue_peek_front(lltask_queue);
     task = lltask->task;
     tf_model = task->model;
-    ctx = &tf_model->ctx;
+    ctx = tf_model->ctx;
 
     ret = fill_model_input_tf(tf_model, request);
     if (ret != 0) {
@@ -833,7 +810,7 @@ err:
 static int dnn_execute_model_tf(const DNNModel *model, DNNExecBaseParams *exec_params)
 {
     TFModel *tf_model = model->model;
-    TFContext *ctx = &tf_model->ctx;
+    DnnContext *ctx = tf_model->ctx;
     TaskItem *task;
     TFRequestItem *request;
     int ret = 0;
@@ -849,7 +826,7 @@ static int dnn_execute_model_tf(const DNNModel *model, DNNExecBaseParams *exec_p
         return AVERROR(ENOMEM);
     }
 
-    ret = ff_dnn_fill_task(task, exec_params, tf_model, ctx->options.async, 1);
+    ret = ff_dnn_fill_task(task, exec_params, tf_model, ctx->async, 1);
     if (ret != 0) {
         av_log(ctx, AV_LOG_ERROR, "Fill task with invalid parameter(s).\n");
         av_freep(&task);
@@ -887,7 +864,7 @@ static DNNAsyncStatusType dnn_get_result_tf(const DNNModel *model, AVFrame **in,
 static int dnn_flush_tf(const DNNModel *model)
 {
     TFModel *tf_model = model->model;
-    TFContext *ctx = &tf_model->ctx;
+    DnnContext *ctx = tf_model->ctx;
     TFRequestItem *request;
     int ret;
 
@@ -915,6 +892,7 @@ static int dnn_flush_tf(const DNNModel *model)
 }
 
 const DNNModule ff_dnn_backend_tf = {
+    .clazz          = DNN_DEFINE_CLASS(dnn_tensorflow),
     .load_model     = dnn_load_model_tf,
     .execute_model  = dnn_execute_model_tf,
     .get_result     = dnn_get_result_tf,
diff --git a/libavfilter/dnn/dnn_backend_torch.cpp b/libavfilter/dnn/dnn_backend_torch.cpp
index fa9a2e6d99..abdef1f178 100644
--- a/libavfilter/dnn/dnn_backend_torch.cpp
+++ b/libavfilter/dnn/dnn_backend_torch.cpp
@@ -31,22 +31,13 @@ extern "C" {
 #include "dnn_io_proc.h"
 #include "dnn_backend_common.h"
 #include "libavutil/opt.h"
+#include "libavutil/mem.h"
 #include "queue.h"
 #include "safe_queue.h"
 }
 
-typedef struct THOptions{
-    char *device_name;
-    int optimize;
-} THOptions;
-
-typedef struct THContext {
-    const AVClass *c_class;
-    THOptions options;
-} THContext;
-
 typedef struct THModel {
-    THContext ctx;
+    DnnContext *ctx;
     DNNModel *model;
     torch::jit::Module *jit_model;
     SafeQueue *request_queue;
@@ -66,20 +57,17 @@ typedef struct THRequestItem {
 } THRequestItem;
 
 
-#define OFFSET(x) offsetof(THContext, x)
+#define OFFSET(x) offsetof(THOptions, x)
 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM
 static const AVOption dnn_th_options[] = {
-    { "device", "device to run model", OFFSET(options.device_name), AV_OPT_TYPE_STRING, { .str = "cpu" }, 0, 0, FLAGS },
-    { "optimize", "turn on graph executor optimization", OFFSET(options.optimize), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, FLAGS},
+    { "optimize", "turn on graph executor optimization", OFFSET(optimize), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, FLAGS},
     { NULL }
 };
 
-AVFILTER_DEFINE_CLASS(dnn_th);
-
 static int extract_lltask_from_task(TaskItem *task, Queue *lltask_queue)
 {
     THModel *th_model = (THModel *)task->model;
-    THContext *ctx = &th_model->ctx;
+    DnnContext *ctx = th_model->ctx;
     LastLevelTaskItem *lltask = (LastLevelTaskItem *)av_malloc(sizeof(*lltask));
     if (!lltask) {
         av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for LastLevelTaskItem\n");
@@ -152,7 +140,6 @@ static void dnn_free_model_th(DNNModel **model)
     }
     ff_queue_destroy(th_model->task_queue);
     delete th_model->jit_model;
-    av_opt_free(&th_model->ctx);
     av_freep(&th_model);
     av_freep(model);
 }
@@ -180,7 +167,7 @@ static int fill_model_input_th(THModel *th_model, THRequestItem *request)
     TaskItem *task = NULL;
     THInferRequest *infer_request = NULL;
     DNNData input = { 0 };
-    THContext *ctx = &th_model->ctx;
+    DnnContext *ctx = th_model->ctx;
     int ret, width_idx, height_idx, channel_idx;
 
     lltask = (LastLevelTaskItem *)ff_queue_pop_front(th_model->lltask_queue);
@@ -240,7 +227,7 @@ static int th_start_inference(void *args)
     LastLevelTaskItem *lltask = NULL;
     TaskItem *task = NULL;
     THModel *th_model = NULL;
-    THContext *ctx = NULL;
+    DnnContext *ctx = NULL;
     std::vector<torch::jit::IValue> inputs;
     torch::NoGradGuard no_grad;
 
@@ -252,9 +239,9 @@ static int th_start_inference(void *args)
     lltask = request->lltask;
     task = lltask->task;
     th_model = (THModel *)task->model;
-    ctx = &th_model->ctx;
+    ctx = th_model->ctx;
 
-    if (ctx->options.optimize)
+    if (ctx->torch_option.optimize)
         torch::jit::setGraphExecutorOptimize(true);
     else
         torch::jit::setGraphExecutorOptimize(false);
@@ -291,7 +278,7 @@ static void infer_completion_callback(void *args) {
         outputs.dims[2] = sizes.at(2); // H
         outputs.dims[3] = sizes.at(3); // W
     } else {
-        avpriv_report_missing_feature(&th_model->ctx, "Support of this kind of model");
+        avpriv_report_missing_feature(th_model->ctx, "Support of this kind of model");
         goto err;
     }
 
@@ -303,7 +290,7 @@ static void infer_completion_callback(void *args) {
             if (th_model->model->frame_post_proc != NULL) {
                 th_model->model->frame_post_proc(task->out_frame, &outputs, th_model->model->filter_ctx);
             } else {
-                ff_proc_from_dnn_to_frame(task->out_frame, &outputs, &th_model->ctx);
+                ff_proc_from_dnn_to_frame(task->out_frame, &outputs, th_model->ctx);
             }
         } else {
             task->out_frame->width = outputs.dims[dnn_get_width_idx_by_layout(outputs.layout)];
@@ -311,7 +298,7 @@ static void infer_completion_callback(void *args) {
         }
         break;
     default:
-        avpriv_report_missing_feature(&th_model->ctx, "model function type %d", th_model->model->func_type);
+        avpriv_report_missing_feature(th_model->ctx, "model function type %d", th_model->model->func_type);
         goto err;
     }
     task->inference_done++;
@@ -321,7 +308,7 @@ err:
 
     if (ff_safe_queue_push_back(th_model->request_queue, request) < 0) {
         destroy_request_item(&request);
-        av_log(&th_model->ctx, AV_LOG_ERROR, "Unable to push back request_queue when failed to start inference.\n");
+        av_log(th_model->ctx, AV_LOG_ERROR, "Unable to push back request_queue when failed to start inference.\n");
     }
 }
 
@@ -351,7 +338,7 @@ static int execute_model_th(THRequestItem *request, Queue *lltask_queue)
         goto err;
     }
     if (task->async) {
-        avpriv_report_missing_feature(&th_model->ctx, "LibTorch async");
+        avpriv_report_missing_feature(th_model->ctx, "LibTorch async");
     } else {
         ret = th_start_inference((void *)(request));
         if (ret != 0) {
@@ -374,7 +361,7 @@ static int get_output_th(void *model, const char *input_name, int input_width, i
 {
     int ret = 0;
     THModel *th_model = (THModel*) model;
-    THContext *ctx = &th_model->ctx;
+    DnnContext *ctx = th_model->ctx;
     TaskItem task = { 0 };
     THRequestItem *request = NULL;
     DNNExecBaseParams exec_params = {
@@ -423,12 +410,12 @@ static THInferRequest *th_create_inference_request(void)
     return request;
 }
 
-static DNNModel *dnn_load_model_th(const char *model_filename, DNNFunctionType func_type, const char *options, AVFilterContext *filter_ctx)
+static DNNModel *dnn_load_model_th(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *filter_ctx)
 {
     DNNModel *model = NULL;
     THModel *th_model = NULL;
     THRequestItem *item = NULL;
-    THContext *ctx;
+    const char *device_name = ctx->device ? ctx->device : "cpu";
 
     model = (DNNModel *)av_mallocz(sizeof(DNNModel));
     if (!model) {
@@ -442,24 +429,17 @@ static DNNModel *dnn_load_model_th(const char *model_filename, DNNFunctionType f
     }
     th_model->model = model;
     model->model = th_model;
-    th_model->ctx.c_class = &dnn_th_class;
-    ctx = &th_model->ctx;
-    //parse options
-    av_opt_set_defaults(ctx);
-    if (av_opt_set_from_string(ctx, options, NULL, "=", "&") < 0) {
-        av_log(ctx, AV_LOG_ERROR, "Failed to parse options \"%s\"\n", options);
-        return NULL;
-    }
+    th_model->ctx = ctx;
 
-    c10::Device device = c10::Device(ctx->options.device_name);
+    c10::Device device = c10::Device(device_name);
     if (!device.is_cpu()) {
-        av_log(ctx, AV_LOG_ERROR, "Not supported device:\"%s\"\n", ctx->options.device_name);
+        av_log(ctx, AV_LOG_ERROR, "Not supported device:\"%s\"\n", device_name);
         goto fail;
     }
 
     try {
         th_model->jit_model = new torch::jit::Module;
-        (*th_model->jit_model) = torch::jit::load(model_filename);
+        (*th_model->jit_model) = torch::jit::load(ctx->model_filename);
     } catch (const c10::Error& e) {
         av_log(ctx, AV_LOG_ERROR, "Failed to load torch model\n");
         goto fail;
@@ -501,7 +481,6 @@ static DNNModel *dnn_load_model_th(const char *model_filename, DNNFunctionType f
 
     model->get_input = &get_input_th;
     model->get_output = &get_output_th;
-    model->options = NULL;
     model->filter_ctx = filter_ctx;
     model->func_type = func_type;
     return model;
@@ -518,7 +497,7 @@ fail:
 static int dnn_execute_model_th(const DNNModel *model, DNNExecBaseParams *exec_params)
 {
     THModel *th_model = (THModel *)model->model;
-    THContext *ctx = &th_model->ctx;
+    DnnContext *ctx = th_model->ctx;
     TaskItem *task;
     THRequestItem *request;
     int ret = 0;
@@ -581,7 +560,7 @@ static int dnn_flush_th(const DNNModel *model)
 
     request = (THRequestItem *)ff_safe_queue_pop_front(th_model->request_queue);
     if (!request) {
-        av_log(&th_model->ctx, AV_LOG_ERROR, "unable to get infer request.\n");
+        av_log(th_model->ctx, AV_LOG_ERROR, "unable to get infer request.\n");
         return AVERROR(EINVAL);
     }
 
@@ -589,6 +568,7 @@ static int dnn_flush_th(const DNNModel *model)
 }
 
 extern const DNNModule ff_dnn_backend_torch = {
+    .clazz          = DNN_DEFINE_CLASS(dnn_th),
     .load_model     = dnn_load_model_th,
     .execute_model  = dnn_execute_model_th,
     .get_result     = dnn_get_result_th,
diff --git a/libavfilter/dnn/dnn_interface.c b/libavfilter/dnn/dnn_interface.c
index b9f71aea53..e7453f1bb1 100644
--- a/libavfilter/dnn/dnn_interface.c
+++ b/libavfilter/dnn/dnn_interface.c
@@ -24,12 +24,61 @@
  */
 
 #include "../dnn_interface.h"
+#include "libavutil/avassert.h"
 #include "libavutil/mem.h"
+#include "libavutil/opt.h"
+#include "libavfilter/internal.h"
 
 extern const DNNModule ff_dnn_backend_openvino;
 extern const DNNModule ff_dnn_backend_tf;
 extern const DNNModule ff_dnn_backend_torch;
 
+#define OFFSET(x) offsetof(DnnContext, x)
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM
+static const AVOption dnn_base_options[] = {
+        {"model", "path to model file",
+                OFFSET(model_filename), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS},
+        {"input", "input name of the model",
+                OFFSET(model_inputname), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS},
+        {"output", "output name of the model",
+                OFFSET(model_outputnames_string), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS},
+        {"backend_configs", "backend configs (deprecated)",
+                OFFSET(backend_options), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS | AV_OPT_FLAG_DEPRECATED},
+        {"options", "backend configs (deprecated)",
+                OFFSET(backend_options), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS | AV_OPT_FLAG_DEPRECATED},
+        {"nireq", "number of request",
+                OFFSET(nireq), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, FLAGS},
+        {"async", "use DNN async inference",
+                OFFSET(async), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS},
+        {"device", "device to run model",
+                OFFSET(device), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS},
+        {NULL}
+};
+
+AVFILTER_DEFINE_CLASS(dnn_base);
+
+typedef struct DnnBackendInfo {
+    const size_t offset;
+    union {
+        const AVClass *class;
+        const DNNModule *module;
+    };
+} DnnBackendInfo;
+
+static const DnnBackendInfo dnn_backend_info_list[] = {
+        {0, .class = &dnn_base_class},
+        // Must keep the same order as in DNNOptions, so offset value in incremental order
+#if CONFIG_LIBTENSORFLOW
+        {offsetof(DnnContext, tf_option), .module = &ff_dnn_backend_tf},
+#endif
+#if CONFIG_LIBOPENVINO
+        {offsetof(DnnContext, ov_option), .module = &ff_dnn_backend_openvino},
+#endif
+#if CONFIG_LIBTORCH
+        {offsetof(DnnContext, torch_option), .module = &ff_dnn_backend_torch},
+#endif
+};
+
 const DNNModule *ff_get_dnn_module(DNNBackendType backend_type, void *log_ctx)
 {
     switch(backend_type){
@@ -52,3 +101,44 @@ const DNNModule *ff_get_dnn_module(DNNBackendType backend_type, void *log_ctx)
         return NULL;
     }
 }
+
+void ff_dnn_init_child_class(DnnContext *ctx)
+{
+    for (int i = 0; i < FF_ARRAY_ELEMS(dnn_backend_info_list); i++) {
+        const AVClass **ptr = (const AVClass **) ((char *) ctx + dnn_backend_info_list[i].offset);
+        *ptr = dnn_backend_info_list[i].class;
+    }
+}
+
+void *ff_dnn_child_next(DnnContext *obj, void *prev) {
+    size_t pre_offset;
+
+    if (!prev) {
+        av_assert0(obj->clazz);
+        return obj;
+    }
+
+    pre_offset = (char *)prev - (char *)obj;
+    for (int i = 0; i < FF_ARRAY_ELEMS(dnn_backend_info_list) - 1; i++) {
+        if (dnn_backend_info_list[i].offset == pre_offset) {
+            const AVClass **ptr = (const AVClass **) ((char *) obj + dnn_backend_info_list[i + 1].offset);
+            av_assert0(*ptr);
+            return ptr;
+        }
+    }
+
+    return NULL;
+}
+
+const AVClass *ff_dnn_child_class_iterate(void **iter)
+{
+    uintptr_t i = (uintptr_t) *iter;
+
+    if (i < FF_ARRAY_ELEMS(dnn_backend_info_list)) {
+        *iter = (void *)(i + 1);
+        return dnn_backend_info_list[i].class;
+    }
+
+    return NULL;
+}
+
diff --git a/libavfilter/dnn_filter_common.c b/libavfilter/dnn_filter_common.c
index 5e76b9ba45..860ca7591f 100644
--- a/libavfilter/dnn_filter_common.c
+++ b/libavfilter/dnn_filter_common.c
@@ -19,6 +19,7 @@
 #include "dnn_filter_common.h"
 #include "libavutil/avstring.h"
 #include "libavutil/mem.h"
+#include "libavutil/opt.h"
 
 #define MAX_SUPPORTED_OUTPUTS_NB 4
 
@@ -52,6 +53,23 @@ static char **separate_output_names(const char *expr, const char *val_sep, int *
     return parsed_vals;
 }
 
+typedef struct DnnFilterBase {
+    const AVClass *class;
+    DnnContext dnnctx;
+} DnnFilterBase;
+
+int ff_dnn_filter_init_child_class(AVFilterContext *filter) {
+    DnnFilterBase *base = filter->priv;
+    ff_dnn_init_child_class(&base->dnnctx);
+    return 0;
+}
+
+void *ff_dnn_filter_child_next(void *obj, void *prev)
+{
+    DnnFilterBase *base = obj;
+    return ff_dnn_child_next(&base->dnnctx, prev);
+}
+
 int ff_dnn_init(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *filter_ctx)
 {
     DNNBackendType backend = ctx->backend_type;
@@ -91,7 +109,25 @@ int ff_dnn_init(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *fil
         return AVERROR(EINVAL);
     }
 
-    ctx->model = (ctx->dnn_module->load_model)(ctx->model_filename, func_type, ctx->backend_options, filter_ctx);
+    if (ctx->backend_options) {
+        void *child = NULL;
+
+        av_log(filter_ctx, AV_LOG_WARNING,
+               "backend_configs is deprecated, please set backend options directly\n");
+        while (child = ff_dnn_child_next(ctx, child)) {
+            if (*(const AVClass **)child == &ctx->dnn_module->clazz) {
+                int ret = av_opt_set_from_string(child, ctx->backend_options,
+                                                 NULL, "=", "&");
+                if (ret < 0) {
+                    av_log(filter_ctx, AV_LOG_ERROR, "failed to parse options \"%s\"\n",
+                           ctx->backend_options);
+                    return ret;
+                }
+            }
+        }
+    }
+
+    ctx->model = (ctx->dnn_module->load_model)(ctx, func_type, filter_ctx);
     if (!ctx->model) {
         av_log(filter_ctx, AV_LOG_ERROR, "could not load DNN model\n");
         return AVERROR(EINVAL);
diff --git a/libavfilter/dnn_filter_common.h b/libavfilter/dnn_filter_common.h
index 30871ee381..b52b55a90d 100644
--- a/libavfilter/dnn_filter_common.h
+++ b/libavfilter/dnn_filter_common.h
@@ -26,28 +26,23 @@
 
 #include "dnn_interface.h"
 
-typedef struct DnnContext {
-    char *model_filename;
-    DNNBackendType backend_type;
-    char *model_inputname;
-    char *model_outputnames_string;
-    char *backend_options;
-    int async;
-
-    char **model_outputnames;
-    uint32_t nb_outputs;
-    const DNNModule *dnn_module;
-    DNNModel *model;
-} DnnContext;
-
-#define DNN_COMMON_OPTIONS \
-    { "model",              "path to model file",         OFFSET(model_filename),   AV_OPT_TYPE_STRING,    { .str = NULL }, 0, 0, FLAGS },\
-    { "input",              "input name of the model",    OFFSET(model_inputname),  AV_OPT_TYPE_STRING,    { .str = NULL }, 0, 0, FLAGS },\
-    { "output",             "output name of the model",   OFFSET(model_outputnames_string), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },\
-    { "backend_configs",    "backend configs",            OFFSET(backend_options),  AV_OPT_TYPE_STRING,    { .str = NULL }, 0, 0, FLAGS },\
-    { "options", "backend configs (deprecated, use backend_configs)", OFFSET(backend_options),  AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS | AV_OPT_FLAG_DEPRECATED},\
-    { "async",              "use DNN async inference (ignored, use backend_configs='async=1')",    OFFSET(async),            AV_OPT_TYPE_BOOL,      { .i64 = 1},     0, 1, FLAGS},
-
+#define AVFILTER_DNN_DEFINE_CLASS_EXT(name, desc, options) \
+    static const AVClass name##_class = {       \
+        .class_name = desc,                     \
+        .item_name  = av_default_item_name,     \
+        .option     = options,                  \
+        .version    = LIBAVUTIL_VERSION_INT,    \
+        .category   = AV_CLASS_CATEGORY_FILTER,            \
+        .child_next = ff_dnn_filter_child_next,            \
+        .child_class_iterate = ff_dnn_child_class_iterate, \
+    }
+
+#define AVFILTER_DNN_DEFINE_CLASS(fname) \
+    AVFILTER_DNN_DEFINE_CLASS_EXT(fname, #fname, fname##_options)
+
+void *ff_dnn_filter_child_next(void *obj, void *prev);
+
+int ff_dnn_filter_init_child_class(AVFilterContext *filter);
 
 int ff_dnn_init(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *filter_ctx);
 int ff_dnn_set_frame_proc(DnnContext *ctx, FramePrePostProc pre_proc, FramePrePostProc post_proc);
diff --git a/libavfilter/dnn_interface.h b/libavfilter/dnn_interface.h
index 63f492e690..4e544486cc 100644
--- a/libavfilter/dnn_interface.h
+++ b/libavfilter/dnn_interface.h
@@ -93,8 +93,6 @@ typedef int (*ClassifyPostProc)(AVFrame *frame, DNNData *output, uint32_t bbox_i
 typedef struct DNNModel{
     // Stores model that can be different for different backends.
     void *model;
-    // Stores options when the model is executed by the backend
-    const char *options;
     // Stores FilterContext used for the interaction between AVFrame and DNNData
     AVFilterContext *filter_ctx;
     // Stores function type of the model
@@ -117,10 +115,65 @@ typedef struct DNNModel{
     ClassifyPostProc classify_post_proc;
 } DNNModel;
 
+typedef struct TFOptions{
+    const AVClass *clazz;
+
+    char *sess_config;
+} TFOptions;
+
+typedef struct OVOptions {
+    const AVClass *clazz;
+
+    int batch_size;
+    int input_resizable;
+    DNNLayout layout;
+    float scale;
+    float mean;
+} OVOptions;
+
+typedef struct THOptions {
+    const AVClass *clazz;
+    int optimize;
+} THOptions;
+
+typedef struct DNNModule DNNModule;
+
+typedef struct DnnContext {
+    const AVClass *clazz;
+
+    DNNModel *model;
+
+    char *model_filename;
+    DNNBackendType backend_type;
+    char *model_inputname;
+    char *model_outputnames_string;
+    char *backend_options;
+    int async;
+
+    char **model_outputnames;
+    uint32_t nb_outputs;
+    const DNNModule *dnn_module;
+
+    int nireq;
+    char *device;
+
+#if CONFIG_LIBTENSORFLOW
+    TFOptions tf_option;
+#endif
+
+#if CONFIG_LIBOPENVINO
+    OVOptions ov_option;
+#endif
+#if CONFIG_LIBTORCH
+    THOptions torch_option;
+#endif
+} DnnContext;
+
 // Stores pointers to functions for loading, executing, freeing DNN models for one of the backends.
-typedef struct DNNModule{
+struct DNNModule {
+    const AVClass clazz;
     // Loads model and parameters from given file. Returns NULL if it is not possible.
-    DNNModel *(*load_model)(const char *model_filename, DNNFunctionType func_type, const char *options, AVFilterContext *filter_ctx);
+    DNNModel *(*load_model)(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *filter_ctx);
     // Executes model with specified input and output. Returns the error code otherwise.
     int (*execute_model)(const DNNModel *model, DNNExecBaseParams *exec_params);
     // Retrieve inference result.
@@ -129,11 +182,15 @@ typedef struct DNNModule{
     int (*flush)(const DNNModel *model);
     // Frees memory allocated for model.
     void (*free_model)(DNNModel **model);
-} DNNModule;
+};
 
 // Initializes DNNModule depending on chosen backend.
 const DNNModule *ff_get_dnn_module(DNNBackendType backend_type, void *log_ctx);
 
+void ff_dnn_init_child_class(DnnContext *ctx);
+void *ff_dnn_child_next(DnnContext *obj, void *prev);
+const AVClass *ff_dnn_child_class_iterate(void **iter);
+
 static inline int dnn_get_width_idx_by_layout(DNNLayout layout)
 {
     return layout == DL_NHWC ? 2 : 3;
diff --git a/libavfilter/f_select.c b/libavfilter/f_select.c
index 9b330a0673..7402d3169f 100644
--- a/libavfilter/f_select.c
+++ b/libavfilter/f_select.c
@@ -90,6 +90,9 @@ static const char *const var_names[] = {
 
     "concatdec_select",  ///< frame is within the interval set by the concat demuxer
 
+    "ih",                ///< ih: Represents the height of the input video frame.
+    "iw",                ///< iw: Represents the width of the input video frame.
+
     NULL
 };
 
@@ -144,6 +147,9 @@ enum var_name {
 
     VAR_CONCATDEC_SELECT,
 
+    VAR_IH,
+    VAR_IW,
+
     VAR_VARS_NB
 };
 
@@ -264,6 +270,9 @@ static int config_input(AVFilterLink *inlink)
     select->var_values[VAR_CONSUMED_SAMPLES_N] = NAN;
     select->var_values[VAR_SAMPLES_N]          = NAN;
 
+    select->var_values[VAR_IH] = NAN;
+    select->var_values[VAR_IW] = NAN;
+
     select->var_values[VAR_SAMPLE_RATE] =
         inlink->type == AVMEDIA_TYPE_AUDIO ? inlink->sample_rate : NAN;
 
@@ -357,6 +366,9 @@ FF_ENABLE_DEPRECATION_WARNINGS
         break;
 
     case AVMEDIA_TYPE_VIDEO:
+        select->var_values[VAR_IH] = frame->height;
+        select->var_values[VAR_IW] = frame->width;
+
         select->var_values[VAR_INTERLACE_TYPE] =
             !(frame->flags & AV_FRAME_FLAG_INTERLACED) ? INTERLACE_TYPE_P :
         (frame->flags & AV_FRAME_FLAG_TOP_FIELD_FIRST) ? INTERLACE_TYPE_T : INTERLACE_TYPE_B;
diff --git a/libavfilter/f_sidedata.c b/libavfilter/f_sidedata.c
index fe9607ed52..616fd0750c 100644
--- a/libavfilter/f_sidedata.c
+++ b/libavfilter/f_sidedata.c
@@ -71,8 +71,16 @@ static const AVOption filt_name##_options[] = { \
     {   "S12M_TIMECOD",               "", 0,             AV_OPT_TYPE_CONST,  {.i64 = AV_FRAME_DATA_S12M_TIMECODE              }, 0, 0, FLAGS, .unit = "type" }, \
     {   "DYNAMIC_HDR_PLUS",           "", 0,             AV_OPT_TYPE_CONST,  {.i64 = AV_FRAME_DATA_DYNAMIC_HDR_PLUS           }, 0, 0, FLAGS, .unit = "type" }, \
     {   "REGIONS_OF_INTEREST",        "", 0,             AV_OPT_TYPE_CONST,  {.i64 = AV_FRAME_DATA_REGIONS_OF_INTEREST        }, 0, 0, FLAGS, .unit = "type" }, \
-    {   "DETECTION_BOUNDING_BOXES",   "", 0,             AV_OPT_TYPE_CONST,  {.i64 = AV_FRAME_DATA_DETECTION_BBOXES           }, 0, 0, FLAGS, .unit = "type" }, \
+    {   "VIDEO_ENC_PARAMS",           "", 0,             AV_OPT_TYPE_CONST,  {.i64 = AV_FRAME_DATA_VIDEO_ENC_PARAMS           }, 0, 0, FLAGS, .unit = "type" }, \
     {   "SEI_UNREGISTERED",           "", 0,             AV_OPT_TYPE_CONST,  {.i64 = AV_FRAME_DATA_SEI_UNREGISTERED           }, 0, 0, FLAGS, .unit = "type" }, \
+    {   "FILM_GRAIN_PARAMS",          "", 0,             AV_OPT_TYPE_CONST,  {.i64 = AV_FRAME_DATA_FILM_GRAIN_PARAMS          }, 0, 0, FLAGS, .unit = "type" }, \
+    {   "DETECTION_BOUNDING_BOXES",   "", 0,             AV_OPT_TYPE_CONST,  {.i64 = AV_FRAME_DATA_DETECTION_BBOXES           }, 0, 0, FLAGS, .unit = "type" }, \
+    {   "DETECTION_BBOXES",           "", 0,             AV_OPT_TYPE_CONST,  {.i64 = AV_FRAME_DATA_DETECTION_BBOXES           }, 0, 0, FLAGS, .unit = "type" }, \
+    {   "DOVI_RPU_BUFFER",            "", 0,             AV_OPT_TYPE_CONST,  {.i64 = AV_FRAME_DATA_DOVI_RPU_BUFFER            }, 0, 0, FLAGS, .unit = "type" }, \
+    {   "DOVI_METADATA",              "", 0,             AV_OPT_TYPE_CONST,  {.i64 = AV_FRAME_DATA_DOVI_METADATA              }, 0, 0, FLAGS, .unit = "type" }, \
+    {   "DYNAMIC_HDR_VIVID",          "", 0,             AV_OPT_TYPE_CONST,  {.i64 = AV_FRAME_DATA_DYNAMIC_HDR_VIVID          }, 0, 0, FLAGS, .unit = "type" }, \
+    {   "AMBIENT_VIEWING_ENVIRONMENT","", 0,             AV_OPT_TYPE_CONST,  {.i64 = AV_FRAME_DATA_AMBIENT_VIEWING_ENVIRONMENT}, 0, 0, FLAGS, .unit = "type" }, \
+    {   "VIDEO_HINT",                 "", 0,             AV_OPT_TYPE_CONST,  {.i64 = AV_FRAME_DATA_VIDEO_HINT                 }, 0, 0, FLAGS, .unit = "type" }, \
     { NULL } \
 }
 
diff --git a/libavfilter/framesync.c b/libavfilter/framesync.c
index 1afd70ab21..535fbe9c7c 100644
--- a/libavfilter/framesync.c
+++ b/libavfilter/framesync.c
@@ -51,7 +51,7 @@ static const AVOption framesync_options[] = {
             0, AV_OPT_TYPE_CONST, { .i64 = TS_NEAREST }, .flags = FLAGS, .unit = "ts_sync_mode" },
     { NULL }
 };
-static const AVClass framesync_class = {
+const AVClass ff_framesync_class = {
     .version                   = LIBAVUTIL_VERSION_INT,
     .class_name                = "framesync",
     .item_name                 = framesync_name,
@@ -62,7 +62,7 @@ static const AVClass framesync_class = {
 
 const AVClass *ff_framesync_child_class_iterate(void **iter)
 {
-    const AVClass *c = *iter ? NULL : &framesync_class;
+    const AVClass *c = *iter ? NULL : &ff_framesync_class;
     *iter = (void *)(uintptr_t)c;
     return c;
 }
@@ -79,7 +79,7 @@ void ff_framesync_preinit(FFFrameSync *fs)
 {
     if (fs->class)
         return;
-    fs->class  = &framesync_class;
+    fs->class  = &ff_framesync_class;
     av_opt_set_defaults(fs);
 }
 
@@ -95,8 +95,11 @@ int ff_framesync_init(FFFrameSync *fs, AVFilterContext *parent, unsigned nb_in)
     fs->nb_in  = nb_in;
 
     fs->in = av_calloc(nb_in, sizeof(*fs->in));
-    if (!fs->in)
+    if (!fs->in) {
+        fs->nb_in = 0;
         return AVERROR(ENOMEM);
+    }
+
     return 0;
 }
 
diff --git a/libavfilter/framesync.h b/libavfilter/framesync.h
index 233f50a0eb..130d067bae 100644
--- a/libavfilter/framesync.h
+++ b/libavfilter/framesync.h
@@ -316,6 +316,7 @@ int ff_framesync_dualinput_get(FFFrameSync *fs, AVFrame **f0, AVFrame **f1);
 int ff_framesync_dualinput_get_writable(FFFrameSync *fs, AVFrame **f0, AVFrame **f1);
 
 const AVClass *ff_framesync_child_class_iterate(void **iter);
+extern const AVClass ff_framesync_class;
 
 #define FRAMESYNC_DEFINE_PURE_CLASS(name, desc, func_prefix, options) \
 static const AVClass name##_class = {                                 \
diff --git a/libavfilter/qsvvpp.c b/libavfilter/qsvvpp.c
index 8c92fec0c1..1c9773df09 100644
--- a/libavfilter/qsvvpp.c
+++ b/libavfilter/qsvvpp.c
@@ -308,7 +308,7 @@ static int fill_frameinfo_by_link(mfxFrameInfo *frameinfo, AVFilterLink *link)
 
         frames_ctx   = (AVHWFramesContext *)link->hw_frames_ctx->data;
         frames_hwctx = frames_ctx->hwctx;
-        *frameinfo   = frames_hwctx->surfaces[0].Info;
+        *frameinfo   = frames_hwctx->nb_surfaces ? frames_hwctx->surfaces[0].Info : *frames_hwctx->info;
     } else {
         pix_fmt = link->format;
         desc = av_pix_fmt_desc_get(pix_fmt);
@@ -441,11 +441,6 @@ static QSVFrame *submit_frame(QSVVPPContext *s, AVFilterLink *inlink, AVFrame *p
                 av_frame_free(&qsv_frame->frame);
                 return NULL;
             }
-
-            if (av_frame_copy_props(qsv_frame->frame, picref) < 0) {
-                av_frame_free(&qsv_frame->frame);
-                return NULL;
-            }
         } else
             qsv_frame->frame = av_frame_clone(picref);
 
@@ -494,12 +489,6 @@ static QSVFrame *query_frame(QSVVPPContext *s, AVFilterLink *outlink, const AVFr
         if (!out_frame->frame)
             return NULL;
 
-        ret = av_frame_copy_props(out_frame->frame, in);
-        if (ret < 0) {
-            av_log(ctx, AV_LOG_ERROR, "Failed to copy metadata fields from src to dst.\n");
-            return NULL;
-        }
-
         ret = av_hwframe_get_buffer(outlink->hw_frames_ctx, out_frame->frame, 0);
         if (ret < 0) {
             av_log(ctx, AV_LOG_ERROR, "Can't allocate a surface.\n");
@@ -516,12 +505,6 @@ static QSVFrame *query_frame(QSVVPPContext *s, AVFilterLink *outlink, const AVFr
         if (!out_frame->frame)
             return NULL;
 
-        ret = av_frame_copy_props(out_frame->frame, in);
-        if (ret < 0) {
-            av_log(ctx, AV_LOG_ERROR, "Failed to copy metadata fields from src to dst.\n");
-            return NULL;
-        }
-
         ret = map_frame_to_surface(out_frame->frame,
                                    &out_frame->surface);
         if (ret < 0)
@@ -604,6 +587,26 @@ static int init_vpp_session(AVFilterContext *avctx, QSVVPPContext *s)
     device_ctx   = (AVHWDeviceContext *)device_ref->data;
     device_hwctx = device_ctx->hwctx;
 
+    /* extract the properties of the "master" session given to us */
+    ret = MFXQueryIMPL(device_hwctx->session, &impl);
+    if (ret == MFX_ERR_NONE)
+        ret = MFXQueryVersion(device_hwctx->session, &ver);
+    if (ret != MFX_ERR_NONE) {
+        av_log(avctx, AV_LOG_ERROR, "Error querying the session attributes\n");
+        return AVERROR_UNKNOWN;
+    }
+
+    if (MFX_IMPL_VIA_VAAPI == MFX_IMPL_VIA_MASK(impl)) {
+        handle_type = MFX_HANDLE_VA_DISPLAY;
+    } else if (MFX_IMPL_VIA_D3D11 == MFX_IMPL_VIA_MASK(impl)) {
+        handle_type = MFX_HANDLE_D3D11_DEVICE;
+    } else if (MFX_IMPL_VIA_D3D9 == MFX_IMPL_VIA_MASK(impl)) {
+        handle_type = MFX_HANDLE_D3D9_DEVICE_MANAGER;
+    } else {
+        av_log(avctx, AV_LOG_ERROR, "Error unsupported handle type\n");
+        return AVERROR_UNKNOWN;
+    }
+
     if (outlink->format == AV_PIX_FMT_QSV) {
         AVHWFramesContext *out_frames_ctx;
         AVBufferRef *out_frames_ref = av_hwframe_ctx_alloc(device_ref);
@@ -625,9 +628,15 @@ static int init_vpp_session(AVFilterContext *avctx, QSVVPPContext *s)
         out_frames_ctx->width             = FFALIGN(outlink->w, 32);
         out_frames_ctx->height            = FFALIGN(outlink->h, 32);
         out_frames_ctx->sw_format         = s->out_sw_format;
-        out_frames_ctx->initial_pool_size = 64;
-        if (avctx->extra_hw_frames > 0)
-            out_frames_ctx->initial_pool_size += avctx->extra_hw_frames;
+
+        if (QSV_RUNTIME_VERSION_ATLEAST(ver, 2, 9) && handle_type != MFX_HANDLE_D3D9_DEVICE_MANAGER)
+            out_frames_ctx->initial_pool_size = 0;
+        else {
+            out_frames_ctx->initial_pool_size = 64;
+            if (avctx->extra_hw_frames > 0)
+                out_frames_ctx->initial_pool_size += avctx->extra_hw_frames;
+        }
+
         out_frames_hwctx->frame_type      = s->out_mem_mode;
 
         ret = av_hwframe_ctx_init(out_frames_ref);
@@ -653,26 +662,6 @@ static int init_vpp_session(AVFilterContext *avctx, QSVVPPContext *s)
     } else
         s->out_mem_mode = MFX_MEMTYPE_SYSTEM_MEMORY;
 
-    /* extract the properties of the "master" session given to us */
-    ret = MFXQueryIMPL(device_hwctx->session, &impl);
-    if (ret == MFX_ERR_NONE)
-        ret = MFXQueryVersion(device_hwctx->session, &ver);
-    if (ret != MFX_ERR_NONE) {
-        av_log(avctx, AV_LOG_ERROR, "Error querying the session attributes\n");
-        return AVERROR_UNKNOWN;
-    }
-
-    if (MFX_IMPL_VIA_VAAPI == MFX_IMPL_VIA_MASK(impl)) {
-        handle_type = MFX_HANDLE_VA_DISPLAY;
-    } else if (MFX_IMPL_VIA_D3D11 == MFX_IMPL_VIA_MASK(impl)) {
-        handle_type = MFX_HANDLE_D3D11_DEVICE;
-    } else if (MFX_IMPL_VIA_D3D9 == MFX_IMPL_VIA_MASK(impl)) {
-        handle_type = MFX_HANDLE_D3D9_DEVICE_MANAGER;
-    } else {
-        av_log(avctx, AV_LOG_ERROR, "Error unsupported handle type\n");
-        return AVERROR_UNKNOWN;
-    }
-
     ret = MFXVideoCORE_GetHandle(device_hwctx->session, handle_type, &handle);
     if (ret < 0)
         return ff_qsvvpp_print_error(avctx, ret, "Error getting the session handle");
@@ -958,7 +947,7 @@ int ff_qsvvpp_close(AVFilterContext *avctx)
     return 0;
 }
 
-int ff_qsvvpp_filter_frame(QSVVPPContext *s, AVFilterLink *inlink, AVFrame *picref)
+int ff_qsvvpp_filter_frame(QSVVPPContext *s, AVFilterLink *inlink, AVFrame *picref, AVFrame *propref)
 {
     AVFilterContext  *ctx     = inlink->dst;
     AVFilterLink     *outlink = ctx->outputs[0];
@@ -1015,6 +1004,16 @@ int ff_qsvvpp_filter_frame(QSVVPPContext *s, AVFilterLink *inlink, AVFrame *picr
                 return AVERROR(EAGAIN);
             break;
         }
+
+        if (propref) {
+            ret1 = av_frame_copy_props(out_frame->frame, propref);
+            if (ret1 < 0) {
+                av_frame_free(&out_frame->frame);
+                av_log(ctx, AV_LOG_ERROR, "Failed to copy metadata fields from src to dst.\n");
+                return ret1;
+            }
+        }
+
         out_frame->frame->pts = av_rescale_q(out_frame->surface.Data.TimeStamp,
                                              default_tb, outlink->time_base);
 
diff --git a/libavfilter/qsvvpp.h b/libavfilter/qsvvpp.h
index 4eea7a46c7..3b9192b62e 100644
--- a/libavfilter/qsvvpp.h
+++ b/libavfilter/qsvvpp.h
@@ -131,7 +131,7 @@ int ff_qsvvpp_init(AVFilterContext *avctx, QSVVPPParam *param);
 int ff_qsvvpp_close(AVFilterContext *avctx);
 
 /* vpp filter frame and call the cb if needed */
-int ff_qsvvpp_filter_frame(QSVVPPContext *vpp, AVFilterLink *inlink, AVFrame *frame);
+int ff_qsvvpp_filter_frame(QSVVPPContext *vpp, AVFilterLink *inlink, AVFrame *frame, AVFrame *propref);
 
 int ff_qsvvpp_print_iopattern(void *log_ctx, int mfx_iopattern,
                               const char *extra_string);
diff --git a/libavfilter/riscv/Makefile b/libavfilter/riscv/Makefile
index 0b968a9c0d..277dde2aed 100644
--- a/libavfilter/riscv/Makefile
+++ b/libavfilter/riscv/Makefile
@@ -1,2 +1,2 @@
-OBJS += riscv/af_afir_init.o
-RVV-OBJS += riscv/af_afir_rvv.o
+OBJS-$(CONFIG_AFIR_FILTER)                   += riscv/af_afir_init.o
+RVV-OBJS-$(CONFIG_AFIR_FILTER)               += riscv/af_afir_rvv.o
diff --git a/libavfilter/signature_lookup.c b/libavfilter/signature_lookup.c
index ab7320d47f..a0ca818a9b 100644
--- a/libavfilter/signature_lookup.c
+++ b/libavfilter/signature_lookup.c
@@ -448,14 +448,14 @@ static MatchingInfo evaluate_parameters(AVFilterContext *ctx, SignatureContext *
                 }
 
                 if (tolerancecount > 2) {
-                    a = aprev;
-                    b = bprev;
                     if (dir == DIR_NEXT) {
                         /* turn around */
                         a = infos->first;
                         b = infos->second;
                         dir = DIR_PREV;
                     } else {
+                        a = aprev;
+                        b = bprev;
                         break;
                     }
                 }
@@ -496,10 +496,10 @@ static MatchingInfo evaluate_parameters(AVFilterContext *ctx, SignatureContext *
             continue; /* matching sequence is too short */
         if ((double) goodfcount / (double) fcount < sc->thit)
             continue;
-        if ((double) goodfcount*0.5 < FFMAX(gooda, goodb))
+        if ((double) goodfcount*0.5 <= FFMAX(gooda, goodb))
             continue;
 
-        meandist = (double) goodfcount / (double) distsum;
+        meandist = (double) distsum / (double) goodfcount;
 
         if (meandist < minmeandist ||
                 status == (STATUS_END_REACHED | STATUS_BEGIN_REACHED) ||
diff --git a/libavfilter/vaapi_vpp.c b/libavfilter/vaapi_vpp.c
index ace1153a23..9ef7a289fb 100644
--- a/libavfilter/vaapi_vpp.c
+++ b/libavfilter/vaapi_vpp.c
@@ -204,7 +204,10 @@ int ff_vaapi_vpp_config_output(AVFilterLink *outlink)
     output_frames->width     = ctx->output_width;
     output_frames->height    = ctx->output_height;
 
-    output_frames->initial_pool_size = 4;
+    if (CONFIG_VAAPI_1)
+        output_frames->initial_pool_size = 0;
+    else
+        output_frames->initial_pool_size = 4;
 
     err = ff_filter_init_hw_frames(avctx, outlink, 10);
     if (err < 0)
@@ -220,6 +223,8 @@ int ff_vaapi_vpp_config_output(AVFilterLink *outlink)
     va_frames = output_frames->hwctx;
 
     av_assert0(ctx->va_context == VA_INVALID_ID);
+    av_assert0(output_frames->initial_pool_size ||
+               (va_frames->surface_ids == NULL && va_frames->nb_surfaces == 0));
     vas = vaCreateContext(ctx->hwctx->display, ctx->va_config,
                           ctx->output_width, ctx->output_height,
                           VA_PROGRESSIVE,
diff --git a/libavfilter/version.h b/libavfilter/version.h
index 408c700767..d8cd8a2cfb 100644
--- a/libavfilter/version.h
+++ b/libavfilter/version.h
@@ -32,7 +32,7 @@
 #include "version_major.h"
 
 #define LIBAVFILTER_VERSION_MINOR   2
-#define LIBAVFILTER_VERSION_MICRO 101
+#define LIBAVFILTER_VERSION_MICRO 102
 
 
 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
diff --git a/libavfilter/vf_blend.c b/libavfilter/vf_blend.c
index 6b52647966..5ea6df2e75 100644
--- a/libavfilter/vf_blend.c
+++ b/libavfilter/vf_blend.c
@@ -47,6 +47,7 @@ typedef struct BlendContext {
     FilterParams params[4];
     int tblend;
     AVFrame *prev_frame;        /* only used with tblend */
+    int nb_threads;
 } BlendContext;
 
 static const char *const var_names[] = {   "X",   "Y",   "W",   "H",   "SW",   "SH",   "T",   "N",   "A",   "B",   "TOP",   "BOTTOM",        NULL };
@@ -132,12 +133,14 @@ static void blend_expr_## name(const uint8_t *_top, ptrdiff_t top_linesize,
                                const uint8_t *_bottom, ptrdiff_t bottom_linesize,    \
                                uint8_t *_dst, ptrdiff_t dst_linesize,                \
                                ptrdiff_t width, ptrdiff_t height,              \
-                               FilterParams *param, double *values, int starty) \
+                               FilterParams *param, SliceParams *sliceparam)   \
 {                                                                              \
     const type *top = (const type*)_top;                                       \
     const type *bottom = (const type*)_bottom;                                 \
+    double *values = sliceparam->values;                                       \
+    int starty = sliceparam->starty;                                           \
     type *dst = (type*)_dst;                                                   \
-    AVExpr *e = param->e;                                                      \
+    AVExpr *e = sliceparam->e;                                                 \
     int y, x;                                                                  \
     dst_linesize /= div;                                                       \
     top_linesize /= div;                                                       \
@@ -171,6 +174,7 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
     const uint8_t *bottom = td->bottom->data[td->plane];
     uint8_t *dst    = td->dst->data[td->plane];
     double values[VAR_VARS_NB];
+    SliceParams sliceparam = {.values = &values[0], .starty = slice_start, .e = td->param->e ? td->param->e[jobnr] : NULL};
 
     values[VAR_N]  = td->inlink->frame_count_out;
     values[VAR_T]  = td->dst->pts == AV_NOPTS_VALUE ? NAN : td->dst->pts * av_q2d(td->inlink->time_base);
@@ -185,7 +189,7 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
                      td->bottom->linesize[td->plane],
                      dst + slice_start * td->dst->linesize[td->plane],
                      td->dst->linesize[td->plane],
-                     td->w, height, td->param, &values[0], slice_start);
+                     td->w, height, td->param, &sliceparam);
     return 0;
 }
 
@@ -218,7 +222,7 @@ static AVFrame *blend_frame(AVFilterContext *ctx, AVFrame *top_buf,
                           .inlink = inlink };
 
         ff_filter_execute(ctx, filter_slice, &td, NULL,
-                          FFMIN(outh, ff_filter_get_nb_threads(ctx)));
+                          FFMIN(outh, s->nb_threads));
     }
 
     if (!s->tblend)
@@ -247,6 +251,7 @@ static av_cold int init(AVFilterContext *ctx)
     BlendContext *s = ctx->priv;
 
     s->tblend = !strcmp(ctx->filter->name, "tblend");
+    s->nb_threads = ff_filter_get_nb_threads(ctx);
 
     s->fs.on_event = blend_frame_for_dualinput;
     return 0;
@@ -281,8 +286,14 @@ static av_cold void uninit(AVFilterContext *ctx)
     ff_framesync_uninit(&s->fs);
     av_frame_free(&s->prev_frame);
 
-    for (i = 0; i < FF_ARRAY_ELEMS(s->params); i++)
-        av_expr_free(s->params[i].e);
+    for (i = 0; i < FF_ARRAY_ELEMS(s->params); i++) {
+        if (s->params[i].e) {
+            for (int j = 0; j < s->nb_threads; j++)
+                av_expr_free(s->params[i].e[j]);
+            av_freep(&s->params[i].e);
+        }
+    }
+
 }
 
 static int config_params(AVFilterContext *ctx)
@@ -306,10 +317,19 @@ static int config_params(AVFilterContext *ctx)
                 return AVERROR(ENOMEM);
         }
         if (param->expr_str) {
-            ret = av_expr_parse(&param->e, param->expr_str, var_names,
-                                NULL, NULL, NULL, NULL, 0, ctx);
-            if (ret < 0)
-                return ret;
+            if (!param->e) {
+                param->e = av_calloc(s->nb_threads, sizeof(*param->e));
+                if (!param->e)
+                    return AVERROR(ENOMEM);
+            }
+            for (int i = 0; i < s->nb_threads; i++) {
+                av_expr_free(param->e[i]);
+                param->e[i] = NULL;
+                ret = av_expr_parse(&param->e[i], param->expr_str, var_names,
+                                    NULL, NULL, NULL, NULL, 0, ctx);
+                if (ret < 0)
+                    return ret;
+            }
             param->blend = s->depth > 8 ? s->depth > 16 ? blend_expr_32bit : blend_expr_16bit : blend_expr_8bit;
         }
     }
diff --git a/libavfilter/vf_blend_init.h b/libavfilter/vf_blend_init.h
index d24f178032..956e1cb9fc 100644
--- a/libavfilter/vf_blend_init.h
+++ b/libavfilter/vf_blend_init.h
@@ -58,7 +58,7 @@ static void blend_copy ## src##_##depth(const uint8_t *top, ptrdiff_t top_linesi
                             const uint8_t *bottom, ptrdiff_t bottom_linesize,\
                             uint8_t *dst, ptrdiff_t dst_linesize,            \
                             ptrdiff_t width, ptrdiff_t height,               \
-                            FilterParams *param, double *values, int starty) \
+                            FilterParams *param, SliceParams *sliceparam)    \
 {                                                                            \
     av_image_copy_plane(dst, dst_linesize, src, src ## _linesize,            \
                         width * depth / 8, height);                          \
@@ -80,7 +80,7 @@ static void blend_normal_##name(const uint8_t *_top, ptrdiff_t top_linesize,
                                 const uint8_t *_bottom, ptrdiff_t bottom_linesize,\
                                 uint8_t *_dst, ptrdiff_t dst_linesize,            \
                                 ptrdiff_t width, ptrdiff_t height,                \
-                                FilterParams *param, double *values, int starty)  \
+                                FilterParams *param, SliceParams *sliceparam)     \
 {                                                                                 \
     const type *top = (const type*)_top;                                          \
     const type *bottom = (const type*)_bottom;                                    \
diff --git a/libavfilter/vf_colorspace.c b/libavfilter/vf_colorspace.c
index d181e81ace..7bacd7892a 100644
--- a/libavfilter/vf_colorspace.c
+++ b/libavfilter/vf_colorspace.c
@@ -433,8 +433,7 @@ static int create_filtergraph(AVFilterContext *ctx,
     if (out->color_trc       != s->out_trc) s->out_txchr     = NULL;
     if (in->colorspace       != s->in_csp ||
         in->color_range      != s->in_rng)  s->in_lumacoef   = NULL;
-    if (out->colorspace      != s->out_csp ||
-        out->color_range     != s->out_rng) s->out_lumacoef  = NULL;
+    if (out->color_range     != s->out_rng) s->rgb2yuv       = NULL;
 
     if (!s->out_primaries || !s->in_primaries) {
         s->in_prm = in->color_primaries;
@@ -563,26 +562,8 @@ static int create_filtergraph(AVFilterContext *ctx,
         redo_yuv2rgb = 1;
     }
 
-    if (!s->out_lumacoef) {
-        s->out_csp = out->colorspace;
+    if (!s->rgb2yuv) {
         s->out_rng = out->color_range;
-        s->out_lumacoef = av_csp_luma_coeffs_from_avcsp(s->out_csp);
-        if (!s->out_lumacoef) {
-            if (s->out_csp == AVCOL_SPC_UNSPECIFIED) {
-                if (s->user_all == CS_UNSPECIFIED) {
-                    av_log(ctx, AV_LOG_ERROR,
-                           "Please specify output colorspace\n");
-                } else {
-                    av_log(ctx, AV_LOG_ERROR,
-                           "Unsupported output color property %d\n", s->user_all);
-                }
-            } else {
-                av_log(ctx, AV_LOG_ERROR,
-                       "Unsupported output colorspace %d (%s)\n", s->out_csp,
-                       av_color_space_name(s->out_csp));
-            }
-            return AVERROR(EINVAL);
-        }
         redo_rgb2yuv = 1;
     }
 
@@ -687,6 +668,26 @@ static av_cold int init(AVFilterContext *ctx)
 {
     ColorSpaceContext *s = ctx->priv;
 
+    s->out_csp  = s->user_csp == AVCOL_SPC_UNSPECIFIED ?
+                  default_csp[FFMIN(s->user_all, CS_NB)] : s->user_csp;
+    s->out_lumacoef = av_csp_luma_coeffs_from_avcsp(s->out_csp);
+    if (!s->out_lumacoef) {
+        if (s->out_csp == AVCOL_SPC_UNSPECIFIED) {
+            if (s->user_all == CS_UNSPECIFIED) {
+                av_log(ctx, AV_LOG_ERROR,
+                       "Please specify output colorspace\n");
+            } else {
+                av_log(ctx, AV_LOG_ERROR,
+                       "Unsupported output color property %d\n", s->user_all);
+            }
+        } else {
+            av_log(ctx, AV_LOG_ERROR,
+                   "Unsupported output colorspace %d (%s)\n", s->out_csp,
+                   av_color_space_name(s->out_csp));
+        }
+        return AVERROR(EINVAL);
+    }
+
     ff_colorspacedsp_init(&s->dsp);
 
     return 0;
@@ -735,6 +736,9 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
         return res;
     }
 
+    out->colorspace =      s->out_csp;
+    out->color_range =     s->user_rng == AVCOL_RANGE_UNSPECIFIED ?
+                           in->color_range : s->user_rng;
     out->color_primaries = s->user_prm == AVCOL_PRI_UNSPECIFIED ?
                            default_prm[FFMIN(s->user_all, CS_NB)] : s->user_prm;
     if (s->user_trc == AVCOL_TRC_UNSPECIFIED) {
@@ -746,10 +750,6 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
     } else {
         out->color_trc   = s->user_trc;
     }
-    out->colorspace      = s->user_csp == AVCOL_SPC_UNSPECIFIED ?
-                           default_csp[FFMIN(s->user_all, CS_NB)] : s->user_csp;
-    out->color_range     = s->user_rng == AVCOL_RANGE_UNSPECIFIED ?
-                           in->color_range : s->user_rng;
     if (rgb_sz != s->rgb_sz) {
         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(out->format);
         int uvw = in->width >> desc->log2_chroma_w;
@@ -841,8 +841,18 @@ static int query_formats(AVFilterContext *ctx)
     };
     int res;
     ColorSpaceContext *s = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
     AVFilterFormats *formats = ff_make_format_list(pix_fmts);
 
+    res = ff_formats_ref(ff_make_formats_list_singleton(s->out_csp), &outlink->incfg.color_spaces);
+    if (res < 0)
+        return res;
+    if (s->user_rng != AVCOL_RANGE_UNSPECIFIED) {
+        res = ff_formats_ref(ff_make_formats_list_singleton(s->user_rng), &outlink->incfg.color_ranges);
+        if (res < 0)
+            return res;
+    }
+
     if (!formats)
         return AVERROR(ENOMEM);
     if (s->user_format == AV_PIX_FMT_NONE)
@@ -855,7 +865,7 @@ static int query_formats(AVFilterContext *ctx)
     if (res < 0)
         return res;
 
-    return ff_formats_ref(formats, &ctx->outputs[0]->incfg.formats);
+    return ff_formats_ref(formats, &outlink->incfg.formats);
 }
 
 static int config_props(AVFilterLink *outlink)
diff --git a/libavfilter/vf_convolution.c b/libavfilter/vf_convolution.c
index d516db3717..bb78e33d80 100644
--- a/libavfilter/vf_convolution.c
+++ b/libavfilter/vf_convolution.c
@@ -761,8 +761,10 @@ static int param_init(AVFilterContext *ctx)
             s->rdiv[i] = s->scale;
             s->bias[i] = s->delta;
         }
+#if CONFIG_SOBEL_FILTER
     } else if (!strcmp(ctx->filter->name, "sobel")) {
         ff_sobel_init(s, s->depth, s->nb_planes);
+#endif
     } else if (!strcmp(ctx->filter->name, "kirsch")) {
         for (i = 0; i < 4; i++) {
             s->filter[i] = filter_kirsch;
diff --git a/libavfilter/vf_curves.c b/libavfilter/vf_curves.c
index 3e4a42bab3..97f284db22 100644
--- a/libavfilter/vf_curves.c
+++ b/libavfilter/vf_curves.c
@@ -182,20 +182,22 @@ static int parse_points_str(AVFilterContext *ctx, struct keypoint **points, cons
         if (point->x < 0 || point->x > 1 || point->y < 0 || point->y > 1) {
             av_log(ctx, AV_LOG_ERROR, "Invalid key point coordinates (%f;%f), "
                    "x and y must be in the [0;1] range.\n", point->x, point->y);
+            av_free(point);
             return AVERROR(EINVAL);
         }
-        if (!*points)
-            *points = point;
         if (last) {
             if ((int)(last->x * scale) >= (int)(point->x * scale)) {
                 av_log(ctx, AV_LOG_ERROR, "Key point coordinates (%f;%f) "
                        "and (%f;%f) are too close from each other or not "
                        "strictly increasing on the x-axis\n",
                        last->x, last->y, point->x, point->y);
+                av_free(point);
                 return AVERROR(EINVAL);
             }
             last->next = point;
         }
+        if (!*points)
+            *points = point;
         last = point;
     }
 
diff --git a/libavfilter/vf_derain.c b/libavfilter/vf_derain.c
index c8848dd7ba..7f665b73ab 100644
--- a/libavfilter/vf_derain.c
+++ b/libavfilter/vf_derain.c
@@ -46,13 +46,10 @@ static const AVOption derain_options[] = {
 #if (CONFIG_LIBTENSORFLOW == 1)
     { "tensorflow",  "tensorflow backend flag",     0,                      AV_OPT_TYPE_CONST,  { .i64 = 1 },    0, 0, FLAGS, .unit = "backend" },
 #endif
-    { "model",       "path to model file",          OFFSET(dnnctx.model_filename),   AV_OPT_TYPE_STRING,    { .str = NULL }, 0, 0, FLAGS },
-    { "input",       "input name of the model",     OFFSET(dnnctx.model_inputname),  AV_OPT_TYPE_STRING,    { .str = "x" },  0, 0, FLAGS },
-    { "output",      "output name of the model",    OFFSET(dnnctx.model_outputnames_string), AV_OPT_TYPE_STRING,    { .str = "y" },  0, 0, FLAGS },
     { NULL }
 };
 
-AVFILTER_DEFINE_CLASS(derain);
+AVFILTER_DNN_DEFINE_CLASS(derain);
 
 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 {
@@ -113,6 +110,7 @@ const AVFilter ff_vf_derain = {
     .name          = "derain",
     .description   = NULL_IF_CONFIG_SMALL("Apply derain filter to the input."),
     .priv_size     = sizeof(DRContext),
+    .preinit       = ff_dnn_filter_init_child_class,
     .init          = init,
     .uninit        = uninit,
     FILTER_INPUTS(derain_inputs),
diff --git a/libavfilter/vf_dnn_classify.c b/libavfilter/vf_dnn_classify.c
index 1f8f227e3a..965779a8ab 100644
--- a/libavfilter/vf_dnn_classify.c
+++ b/libavfilter/vf_dnn_classify.c
@@ -50,14 +50,13 @@ static const AVOption dnn_classify_options[] = {
 #if (CONFIG_LIBOPENVINO == 1)
     { "openvino",    "openvino backend flag",      0,                        AV_OPT_TYPE_CONST,     { .i64 = DNN_OV },    0, 0, FLAGS, .unit = "backend" },
 #endif
-    DNN_COMMON_OPTIONS
     { "confidence",  "threshold of confidence",    OFFSET2(confidence),      AV_OPT_TYPE_FLOAT,     { .dbl = 0.5 },  0, 1, FLAGS},
     { "labels",      "path to labels file",        OFFSET2(labels_filename), AV_OPT_TYPE_STRING,    { .str = NULL }, 0, 0, FLAGS },
     { "target",      "which one to be classified", OFFSET2(target),          AV_OPT_TYPE_STRING,    { .str = NULL }, 0, 0, FLAGS },
     { NULL }
 };
 
-AVFILTER_DEFINE_CLASS(dnn_classify);
+AVFILTER_DNN_DEFINE_CLASS(dnn_classify);
 
 static int dnn_classify_post_proc(AVFrame *frame, DNNData *output, uint32_t bbox_index, AVFilterContext *filter_ctx)
 {
@@ -299,6 +298,7 @@ const AVFilter ff_vf_dnn_classify = {
     .name          = "dnn_classify",
     .description   = NULL_IF_CONFIG_SMALL("Apply DNN classify filter to the input."),
     .priv_size     = sizeof(DnnClassifyContext),
+    .preinit       = ff_dnn_filter_init_child_class,
     .init          = dnn_classify_init,
     .uninit        = dnn_classify_uninit,
     FILTER_INPUTS(ff_video_default_filterpad),
diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c
index bacea3ef29..1830bae181 100644
--- a/libavfilter/vf_dnn_detect.c
+++ b/libavfilter/vf_dnn_detect.c
@@ -70,7 +70,6 @@ static const AVOption dnn_detect_options[] = {
 #if (CONFIG_LIBOPENVINO == 1)
     { "openvino",    "openvino backend flag",      0,                        AV_OPT_TYPE_CONST,     { .i64 = DNN_OV },    0, 0, FLAGS, .unit = "backend" },
 #endif
-    DNN_COMMON_OPTIONS
     { "confidence",  "threshold of confidence",    OFFSET2(confidence),      AV_OPT_TYPE_FLOAT,     { .dbl = 0.5 },  0, 1, FLAGS},
     { "labels",      "path to labels file",        OFFSET2(labels_filename), AV_OPT_TYPE_STRING,    { .str = NULL }, 0, 0, FLAGS },
     { "model_type",  "DNN detection model type",   OFFSET2(model_type),      AV_OPT_TYPE_INT,       { .i64 = DDMT_SSD },    INT_MIN, INT_MAX, FLAGS, .unit = "model_type" },
@@ -85,7 +84,7 @@ static const AVOption dnn_detect_options[] = {
     { NULL }
 };
 
-AVFILTER_DEFINE_CLASS(dnn_detect);
+AVFILTER_DNN_DEFINE_CLASS(dnn_detect);
 
 static inline float sigmoid(float x) {
     return 1.f / (1.f + exp(-x));
@@ -808,11 +807,13 @@ static av_cold void dnn_detect_uninit(AVFilterContext *context)
     DnnDetectContext *ctx = context->priv;
     AVDetectionBBox *bbox;
     ff_dnn_uninit(&ctx->dnnctx);
-    while(av_fifo_can_read(ctx->bboxes_fifo)) {
-        av_fifo_read(ctx->bboxes_fifo, &bbox, 1);
-        av_freep(&bbox);
+    if (ctx->bboxes_fifo) {
+        while (av_fifo_can_read(ctx->bboxes_fifo)) {
+            av_fifo_read(ctx->bboxes_fifo, &bbox, 1);
+            av_freep(&bbox);
+        }
+        av_fifo_freep2(&ctx->bboxes_fifo);
     }
-    av_fifo_freep2(&ctx->bboxes_fifo);
     av_freep(&ctx->anchors);
     free_detect_labels(ctx);
 }
@@ -851,6 +852,7 @@ const AVFilter ff_vf_dnn_detect = {
     .name          = "dnn_detect",
     .description   = NULL_IF_CONFIG_SMALL("Apply DNN detect filter to the input."),
     .priv_size     = sizeof(DnnDetectContext),
+    .preinit       = ff_dnn_filter_init_child_class,
     .init          = dnn_detect_init,
     .uninit        = dnn_detect_uninit,
     FILTER_INPUTS(dnn_detect_inputs),
diff --git a/libavfilter/vf_dnn_processing.c b/libavfilter/vf_dnn_processing.c
index fdac31665e..9a1dd2a356 100644
--- a/libavfilter/vf_dnn_processing.c
+++ b/libavfilter/vf_dnn_processing.c
@@ -54,11 +54,10 @@ static const AVOption dnn_processing_options[] = {
 #if (CONFIG_LIBTORCH == 1)
     { "torch",       "torch backend flag",         0,                        AV_OPT_TYPE_CONST,     { .i64 = DNN_TH },    0, 0, FLAGS, "backend" },
 #endif
-    DNN_COMMON_OPTIONS
     { NULL }
 };
 
-AVFILTER_DEFINE_CLASS(dnn_processing);
+AVFILTER_DNN_DEFINE_CLASS(dnn_processing);
 
 static av_cold int init(AVFilterContext *context)
 {
@@ -373,6 +372,7 @@ const AVFilter ff_vf_dnn_processing = {
     .name          = "dnn_processing",
     .description   = NULL_IF_CONFIG_SMALL("Apply DNN processing filter to the input."),
     .priv_size     = sizeof(DnnProcessingContext),
+    .preinit       = ff_dnn_filter_init_child_class,
     .init          = init,
     .uninit        = uninit,
     FILTER_INPUTS(dnn_processing_inputs),
diff --git a/libavfilter/vf_drawbox_vaapi.c b/libavfilter/vf_drawbox_vaapi.c
new file mode 100644
index 0000000000..1081d463e9
--- /dev/null
+++ b/libavfilter/vf_drawbox_vaapi.c
@@ -0,0 +1,369 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/colorspace.h"
+#include "libavutil/eval.h"
+#include "libavutil/opt.h"
+
+#include "avfilter.h"
+#include "internal.h"
+#include "vaapi_vpp.h"
+#include "video.h"
+
+static const char *const var_names[] = {
+    "in_h", "ih",
+    "in_w", "iw",
+    "x",
+    "y",
+    "h",
+    "w",
+    "t",
+    "fill",
+    NULL
+};
+
+enum var_name {
+    VAR_IN_H, VAR_IH,
+    VAR_IN_W, VAR_IW,
+    VAR_X,
+    VAR_Y,
+    VAR_H,
+    VAR_W,
+    VAR_T,
+    VAR_MAX,
+    VARS_NB
+};
+
+static const int NUM_EXPR_EVALS = 5;
+
+typedef struct DrawboxVAAPIContext {
+    VAAPIVPPContext vpp_ctx; // must be the first field
+    VARectangle outer_rect, inner_rect;
+
+    /* The hardware frame context containing the frames for outer_rect. */
+    AVBufferRef *outer_frames_ref;
+    AVHWFramesContext *outer_frames;
+    AVFrame *outer_frame;
+
+    char *x_expr;
+    char *y_expr;
+    char *w_expr;
+    char *h_expr;
+    char *t_expr;
+
+    int w, h;
+    int x, y;
+    int replace;
+    uint32_t thickness;
+    uint8_t drawbox_rgba[4];
+
+    int fill;
+
+} DrawboxVAAPIContext;
+
+static int drawbox_vaapi_config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *avctx = outlink->src;
+    AVFilterLink *inlink = avctx->inputs[0];
+    DrawboxVAAPIContext *ctx = avctx->priv;
+    VAAPIVPPContext *vpp_ctx = avctx->priv;
+    double var_values[VARS_NB], res;
+    int ret, i;
+    char *expr;
+
+    var_values[VAR_IN_W] = var_values[VAR_IW] = inlink->w;
+    var_values[VAR_IN_H] = var_values[VAR_IH] = inlink->h;
+    var_values[VAR_X] = NAN;
+    var_values[VAR_Y] = NAN;
+    var_values[VAR_H] = NAN;
+    var_values[VAR_W] = NAN;
+    var_values[VAR_T] = NAN;
+
+    for (i = 0; i <= NUM_EXPR_EVALS; i++) {
+        /* evaluate expressions, fail on last iteration */
+        var_values[VAR_MAX] = inlink->w;
+        if ((ret = av_expr_parse_and_eval(&res, (expr = ctx->x_expr),
+                                          var_names, var_values,
+                                          NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0 && i == NUM_EXPR_EVALS)
+            goto fail;
+        ctx->x = var_values[VAR_X] = res;
+
+        var_values[VAR_MAX] = inlink->h;
+        if ((ret = av_expr_parse_and_eval(&res, (expr = ctx->y_expr),
+                                          var_names, var_values,
+                                          NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0 && i == NUM_EXPR_EVALS)
+            goto fail;
+        ctx->y = var_values[VAR_Y] = res;
+
+        var_values[VAR_MAX] = inlink->w - ctx->x;
+        if ((ret = av_expr_parse_and_eval(&res, (expr = ctx->w_expr),
+                                          var_names, var_values,
+                                          NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0 && i == NUM_EXPR_EVALS)
+            goto fail;
+        ctx->w = var_values[VAR_W] = res;
+
+        var_values[VAR_MAX] = inlink->h - ctx->y;
+        if ((ret = av_expr_parse_and_eval(&res, (expr = ctx->h_expr),
+                                          var_names, var_values,
+                                          NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0 && i == NUM_EXPR_EVALS)
+            goto fail;
+        ctx->h = var_values[VAR_H] = res;
+
+        var_values[VAR_MAX] = INT_MAX;
+        if ((ret = av_expr_parse_and_eval(&res, (expr = ctx->t_expr),
+                                          var_names, var_values,
+                                          NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0 && i == NUM_EXPR_EVALS)
+            goto fail;
+        ctx->thickness = var_values[VAR_T] = res;
+    }
+
+    /* Sanity check */
+    ctx->w = (ctx->w > 0) ? ctx->w : inlink->w;
+    ctx->h = (ctx->h > 0) ? ctx->h : inlink->h;
+    if (ctx->x + ctx->w > inlink->w)
+        ctx->w = inlink->w - ctx->x;
+    if (ctx->y + ctx->h > inlink->h)
+        ctx->h = inlink->h - ctx->y;
+
+    ctx->outer_rect.x = ctx->x;
+    ctx->outer_rect.y = ctx->y;
+    ctx->outer_rect.width = ctx->w;
+    ctx->outer_rect.height = ctx->h;
+
+    if (ctx->outer_rect.width <= ctx->thickness * 2 ||
+        ctx->outer_rect.height <= ctx->thickness * 2) {
+        ctx->fill = 1;
+    } else {
+        ctx->fill = 0;
+        ctx->inner_rect.x = ctx->outer_rect.x + ctx->thickness;
+        ctx->inner_rect.y = ctx->outer_rect.y + ctx->thickness;
+        ctx->inner_rect.width = ctx->outer_rect.width - ctx->thickness * 2;
+        ctx->inner_rect.height = ctx->outer_rect.height - ctx->thickness * 2;
+    }
+
+    vpp_ctx->output_width = inlink->w;
+    vpp_ctx->output_height = inlink->h;
+
+    ret = ff_vaapi_vpp_config_output(outlink);
+    if (ret < 0)
+        return ret;
+
+    ctx->outer_frames_ref = av_hwframe_ctx_alloc(vpp_ctx->device_ref);
+    if (!ctx->outer_frames_ref) {
+        return AVERROR(ENOMEM);
+    }
+
+    ctx->outer_frames = (AVHWFramesContext*)ctx->outer_frames_ref->data;
+
+    ctx->outer_frames->format    = AV_PIX_FMT_VAAPI;
+    ctx->outer_frames->sw_format = vpp_ctx->input_frames->sw_format;
+    ctx->outer_frames->width     = ctx->outer_rect.width;
+    ctx->outer_frames->height    = ctx->outer_rect.height;
+
+    return av_hwframe_ctx_init(ctx->outer_frames_ref);
+
+fail:
+    av_log(avctx, AV_LOG_ERROR,
+           "Error when evaluating the expression '%s'.\n",
+           expr);
+    return ret;
+}
+
+static int drawbox_vaapi_filter_frame(AVFilterLink *link, AVFrame *input_frame)
+{
+    AVFilterContext *avctx = link->dst;
+    AVFilterLink *outlink = avctx->outputs[0];
+    VAAPIVPPContext *vpp_ctx = avctx->priv;
+    DrawboxVAAPIContext *drawbox_ctx = avctx->priv;
+    AVFrame *output_frame = NULL;
+    VAProcPipelineParameterBuffer box_params;
+    VAProcPipelineParameterBuffer params[3];
+    VABlendState blend_state = {
+        .flags = VA_BLEND_GLOBAL_ALPHA,
+    };
+    VARectangle box[4];
+    int err, nb_params = 0;
+
+    if (!input_frame->hw_frames_ctx ||
+        vpp_ctx->va_context == VA_INVALID_ID) {
+        err = AVERROR(EINVAL);
+        goto fail;
+    }
+
+    if (!drawbox_ctx->outer_frame) {
+        drawbox_ctx->outer_frame = av_frame_alloc();
+        if (!drawbox_ctx->outer_frame) {
+            err = AVERROR(ENOMEM);
+            goto fail;
+        }
+
+        err = av_hwframe_get_buffer(drawbox_ctx->outer_frames_ref, drawbox_ctx->outer_frame, 0);
+        if (err < 0) {
+            err = AVERROR(ENOMEM);
+            goto fail;
+        }
+
+        /* Create image for the outer rect */
+        err = ff_vaapi_vpp_init_params(avctx, &box_params,
+                                       input_frame, drawbox_ctx->outer_frame);
+        if (err < 0)
+            goto fail;
+
+        blend_state.global_alpha = 0.0f;
+        box_params.surface_region = &drawbox_ctx->outer_rect;
+        box_params.blend_state = &blend_state;
+        box_params.output_background_color = (drawbox_ctx->drawbox_rgba[3] << 24 |
+                                              drawbox_ctx->drawbox_rgba[0] << 16 |
+                                              drawbox_ctx->drawbox_rgba[1] << 8 |
+                                              drawbox_ctx->drawbox_rgba[2]);
+
+        err = ff_vaapi_vpp_render_picture(avctx, &box_params, drawbox_ctx->outer_frame);
+        if (err < 0)
+            goto fail;
+    }
+
+    /* Draw outer & inner rects on the input video, then we can get a box*/
+    output_frame = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!output_frame) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    err = av_frame_copy_props(output_frame, input_frame);
+    if (err < 0)
+        goto fail;
+
+    err = ff_vaapi_vpp_init_params(avctx, &params[nb_params],
+                                   input_frame, output_frame);
+    if (err < 0)
+        goto fail;
+
+    box[0].x = 0;
+    box[0].y = 0;
+    box[0].width = link->w;
+    box[0].height = link->h;
+    params[nb_params].surface_region = &box[0];
+    params[nb_params].output_background_color = 0;
+    nb_params++;
+
+    err = ff_vaapi_vpp_init_params(avctx, &params[nb_params],
+                                   drawbox_ctx->outer_frame, output_frame);
+    if (err < 0)
+        goto fail;
+
+    box[1] = drawbox_ctx->outer_rect;
+    if (drawbox_ctx->drawbox_rgba[3] != 255 && !drawbox_ctx->replace) {
+        blend_state.global_alpha = (float)drawbox_ctx->drawbox_rgba[3] / 255;
+        params[nb_params].blend_state = &blend_state;
+    }
+    params[nb_params].output_region = &box[1];
+    params[nb_params].output_background_color = 0;
+    nb_params++;
+
+    if (!drawbox_ctx->fill) {
+        box[3] = box[2] = drawbox_ctx->inner_rect;
+        params[nb_params] = params[0];
+        params[nb_params].surface_region = &box[2];
+        params[nb_params].output_region = &box[3];
+        params[nb_params].output_background_color = 0;
+        nb_params++;
+    }
+
+    err = ff_vaapi_vpp_render_pictures(avctx, params, nb_params, output_frame);
+    if (err < 0)
+        goto fail;
+
+    av_frame_free(&input_frame);
+
+    return ff_filter_frame(outlink, output_frame);
+
+fail:
+    av_frame_free(&input_frame);
+    av_frame_free(&output_frame);
+    return err;
+}
+
+static av_cold int drawbox_vaapi_init(AVFilterContext *avctx)
+{
+    VAAPIVPPContext *vpp_ctx = avctx->priv;
+
+    ff_vaapi_vpp_ctx_init(avctx);
+    vpp_ctx->pipeline_uninit = ff_vaapi_vpp_pipeline_uninit;
+    vpp_ctx->output_format = AV_PIX_FMT_NONE;
+
+    return 0;
+}
+
+static av_cold void drawbox_vaapi_uninit(AVFilterContext *avctx)
+{
+    DrawboxVAAPIContext *ctx = avctx->priv;
+
+    av_frame_free(&ctx->outer_frame);
+    av_buffer_unref(&ctx->outer_frames_ref);
+    ff_vaapi_vpp_ctx_uninit(avctx);
+}
+
+#define OFFSET(x) offsetof(DrawboxVAAPIContext, x)
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
+
+static const AVOption drawbox_vaapi_options[] = {
+    { "x",         "set horizontal position of the left box edge", OFFSET(x_expr),       AV_OPT_TYPE_STRING, { .str="0" },       0, 0, FLAGS },
+    { "y",         "set vertical position of the top box edge",    OFFSET(y_expr),       AV_OPT_TYPE_STRING, { .str="0" },       0, 0, FLAGS },
+    { "width",     "set width of the box",                         OFFSET(w_expr),       AV_OPT_TYPE_STRING, { .str="0" },       0, 0, FLAGS },
+    { "w",         "set width of the box",                         OFFSET(w_expr),       AV_OPT_TYPE_STRING, { .str="0" },       0, 0, FLAGS },
+    { "height",    "set height of the box",                        OFFSET(h_expr),       AV_OPT_TYPE_STRING, { .str="0" },       0, 0, FLAGS },
+    { "h",         "set height of the box",                        OFFSET(h_expr),       AV_OPT_TYPE_STRING, { .str="0" },       0, 0, FLAGS },
+    { "color",     "set color of the box",                         OFFSET(drawbox_rgba), AV_OPT_TYPE_COLOR,  { .str = "black" }, 0, 0, FLAGS },
+    { "c",         "set color of the box",                         OFFSET(drawbox_rgba), AV_OPT_TYPE_COLOR,  { .str = "black" }, 0, 0, FLAGS },
+    { "thickness", "set the box thickness",                        OFFSET(t_expr),       AV_OPT_TYPE_STRING, { .str="3" },       0, 0, FLAGS },
+    { "t",         "set the box thickness",                        OFFSET(t_expr),       AV_OPT_TYPE_STRING, { .str="3" },       0, 0, FLAGS },
+    { "replace",   "replace color",                                OFFSET(replace),      AV_OPT_TYPE_BOOL,   { .i64=0   },       0, 1, FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(drawbox_vaapi);
+
+static const AVFilterPad drawbox_vaapi_inputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = drawbox_vaapi_filter_frame,
+        .config_props = &ff_vaapi_vpp_config_input,
+    },
+};
+
+static const AVFilterPad drawbox_vaapi_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+        .config_props = &drawbox_vaapi_config_output,
+    },
+};
+
+const AVFilter ff_vf_drawbox_vaapi = {
+    .name           = "drawbox_vaapi",
+    .description    = NULL_IF_CONFIG_SMALL("Draw a colored box on the input video."),
+    .priv_size      = sizeof(DrawboxVAAPIContext),
+    .priv_class     = &drawbox_vaapi_class,
+    .init           = &drawbox_vaapi_init,
+    .uninit         = &drawbox_vaapi_uninit,
+    FILTER_INPUTS(drawbox_vaapi_inputs),
+    FILTER_OUTPUTS(drawbox_vaapi_outputs),
+    FILTER_QUERY_FUNC(&ff_vaapi_vpp_query_formats),
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};
diff --git a/libavfilter/vf_geq.c b/libavfilter/vf_geq.c
index dbe50e5250..0efbce4d4f 100644
--- a/libavfilter/vf_geq.c
+++ b/libavfilter/vf_geq.c
@@ -112,8 +112,12 @@ static inline double getpix(void *priv, double x, double y, int plane)
         return 0;
 
     if (geq->interpolation == INTERP_BILINEAR) {
-        xi = x = av_clipd(x, 0, w - 2);
-        yi = y = av_clipd(y, 0, h - 2);
+        int xn, yn;
+
+        xi = x = av_clipd(x, 0, w - 1);
+        yi = y = av_clipd(y, 0, h - 1);
+        xn = FFMIN(xi + 1, w - 1);
+        yn = FFMIN(yi + 1, h - 1);
 
         x -= xi;
         y -= yi;
@@ -122,17 +126,17 @@ static inline double getpix(void *priv, double x, double y, int plane)
             const uint16_t *src16 = (const uint16_t*)src;
             linesize /= 2;
 
-            return (1-y)*((1-x)*src16[xi +  yi    * linesize] + x*src16[xi + 1 +  yi    * linesize])
-                  +   y *((1-x)*src16[xi + (yi+1) * linesize] + x*src16[xi + 1 + (yi+1) * linesize]);
+            return (1-y)*((1-x)*src16[xi + yi * linesize] + x*src16[xn + yi * linesize])
+                  +   y *((1-x)*src16[xi + yn * linesize] + x*src16[xn + yn * linesize]);
         } else if (geq->bps == 32) {
             const float *src32 = (const float*)src;
             linesize /= 4;
 
-            return (1-y)*((1-x)*src32[xi +  yi    * linesize] + x*src32[xi + 1 +  yi    * linesize])
-                  +   y *((1-x)*src32[xi + (yi+1) * linesize] + x*src32[xi + 1 + (yi+1) * linesize]);
+            return (1-y)*((1-x)*src32[xi + yi * linesize] + x*src32[xn + yi * linesize])
+                  +   y *((1-x)*src32[xi + yn * linesize] + x*src32[xn + yn * linesize]);
         } else if (geq->bps == 8) {
-            return (1-y)*((1-x)*src[xi +  yi    * linesize] + x*src[xi + 1 +  yi    * linesize])
-                  +   y *((1-x)*src[xi + (yi+1) * linesize] + x*src[xi + 1 + (yi+1) * linesize]);
+            return (1-y)*((1-x)*src[xi + yi * linesize] + x*src[xn + yi * linesize])
+                  +   y *((1-x)*src[xi + yn * linesize] + x*src[xn + yn * linesize]);
         }
     } else {
         xi = av_clipd(x, 0, w - 1);
diff --git a/libavfilter/vf_lut3d.c b/libavfilter/vf_lut3d.c
index b3ddd3e69f..d8ceb2a424 100644
--- a/libavfilter/vf_lut3d.c
+++ b/libavfilter/vf_lut3d.c
@@ -1111,6 +1111,8 @@ static const enum AVPixelFormat pix_fmts[] = {
     AV_PIX_FMT_NONE
 };
 
+#if CONFIG_LUT3D_FILTER || CONFIG_HALDCLUT_FILTER
+
 static int config_input(AVFilterLink *inlink)
 {
     int depth, is16bit, isfloat, planar;
@@ -1207,8 +1209,6 @@ static int process_command(AVFilterContext *ctx, const char *cmd, const char *ar
     return config_input(ctx->inputs[0]);
 }
 
-#if CONFIG_LUT3D_FILTER || CONFIG_HALDCLUT_FILTER
-
 /* These options are shared between several filters;
  * &lut3d_haldclut_options[COMMON_OPTIONS_OFFSET] must always
  * point to the first of the COMMON_OPTIONS. */
diff --git a/libavfilter/vf_overlay_qsv.c b/libavfilter/vf_overlay_qsv.c
index 0f52c93245..059602fe03 100644
--- a/libavfilter/vf_overlay_qsv.c
+++ b/libavfilter/vf_overlay_qsv.c
@@ -228,13 +228,16 @@ static int process_frame(FFFrameSync *fs)
 {
     AVFilterContext  *ctx = fs->parent;
     QSVVPPContext    *qsv = fs->opaque;
-    AVFrame        *frame = NULL;
+    AVFrame        *frame = NULL, *propref = NULL;
     int               ret = 0, i;
 
     for (i = 0; i < ctx->nb_inputs; i++) {
         ret = ff_framesync_get_frame(fs, i, &frame, 0);
-        if (ret == 0)
-            ret = ff_qsvvpp_filter_frame(qsv, ctx->inputs[i], frame);
+        if (ret == 0) {
+            if (i == 0)
+                propref = frame;
+            ret = ff_qsvvpp_filter_frame(qsv, ctx->inputs[i], frame, propref);
+        }
         if (ret < 0 && ret != AVERROR(EAGAIN))
             break;
     }
diff --git a/libavfilter/vf_pad_vaapi.c b/libavfilter/vf_pad_vaapi.c
new file mode 100644
index 0000000000..98f6285222
--- /dev/null
+++ b/libavfilter/vf_pad_vaapi.c
@@ -0,0 +1,283 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/colorspace.h"
+#include "libavutil/eval.h"
+#include "libavutil/opt.h"
+
+#include "avfilter.h"
+#include "internal.h"
+#include "vaapi_vpp.h"
+#include "video.h"
+
+static const char *const var_names[] = {
+    "in_w",   "iw",
+    "in_h",   "ih",
+    "out_w",  "ow",
+    "out_h",  "oh",
+    "x",
+    "y",
+    "a",
+    "sar",
+    "dar",
+    NULL
+};
+
+enum var_name {
+    VAR_IN_W,   VAR_IW,
+    VAR_IN_H,   VAR_IH,
+    VAR_OUT_W,  VAR_OW,
+    VAR_OUT_H,  VAR_OH,
+    VAR_X,
+    VAR_Y,
+    VAR_A,
+    VAR_SAR,
+    VAR_DAR,
+    VARS_NB
+};
+
+typedef struct PadVAAPIContext {
+    VAAPIVPPContext vpp_ctx; // must be the first field
+    VARectangle rect;
+
+    char *w_expr;
+    char *h_expr;
+    char *x_expr;
+    char *y_expr;
+    AVRational aspect;
+
+    int w, h;
+    int x, y;
+    uint8_t pad_rgba[4];
+} PadVAAPIContext;
+
+static int pad_vaapi_config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *avctx = outlink->src;
+    AVFilterLink *inlink = avctx->inputs[0];
+    PadVAAPIContext *ctx = avctx->priv;
+    VAAPIVPPContext *vpp_ctx = avctx->priv;
+    AVRational adjusted_aspect = ctx->aspect;
+    double var_values[VARS_NB], res;
+    int err, ret;
+    char *expr;
+
+    var_values[VAR_IN_W]  = var_values[VAR_IW] = inlink->w;
+    var_values[VAR_IN_H]  = var_values[VAR_IH] = inlink->h;
+    var_values[VAR_OUT_W] = var_values[VAR_OW] = NAN;
+    var_values[VAR_OUT_H] = var_values[VAR_OH] = NAN;
+    var_values[VAR_A]     = (double) inlink->w / inlink->h;
+    var_values[VAR_SAR]   = inlink->sample_aspect_ratio.num ?
+        (double) inlink->sample_aspect_ratio.num / inlink->sample_aspect_ratio.den : 1;
+    var_values[VAR_DAR]   = var_values[VAR_A] * var_values[VAR_SAR];
+
+    av_expr_parse_and_eval(&res, (expr = ctx->w_expr),
+                           var_names, var_values,
+                           NULL, NULL, NULL, NULL, NULL, 0, ctx);
+    ctx->w = var_values[VAR_OUT_W] = var_values[VAR_OW] = res;
+    if ((ret = av_expr_parse_and_eval(&res, (expr = ctx->h_expr),
+                                      var_names, var_values,
+                                      NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0)
+        return ret;
+    ctx->h = var_values[VAR_OUT_H] = var_values[VAR_OH] = res;
+    if (!ctx->h)
+        var_values[VAR_OUT_H] = var_values[VAR_OH] = ctx->h = inlink->h;
+
+    /* evaluate the width again, as it may depend on the evaluated output height */
+    if ((ret = av_expr_parse_and_eval(&res, (expr = ctx->w_expr),
+                                      var_names, var_values,
+                                      NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0)
+        return ret;
+    ctx->w = var_values[VAR_OUT_W] = var_values[VAR_OW] = res;
+    if (!ctx->w)
+        var_values[VAR_OUT_W] = var_values[VAR_OW] = ctx->w = inlink->w;
+
+    if (adjusted_aspect.num && adjusted_aspect.den) {
+        adjusted_aspect = av_div_q(adjusted_aspect, inlink->sample_aspect_ratio);
+        if (ctx->h < av_rescale(ctx->w, adjusted_aspect.den, adjusted_aspect.num)) {
+            ctx->h = var_values[VAR_OUT_H] = var_values[VAR_OH] = av_rescale(ctx->w, adjusted_aspect.den, adjusted_aspect.num);
+        } else {
+            ctx->w = var_values[VAR_OUT_W] = var_values[VAR_OW] = av_rescale(ctx->h, adjusted_aspect.num, adjusted_aspect.den);
+        }
+    }
+
+    /* evaluate x and y */
+    av_expr_parse_and_eval(&res, (expr = ctx->x_expr),
+                           var_names, var_values,
+                           NULL, NULL, NULL, NULL, NULL, 0, ctx);
+    ctx->x = var_values[VAR_X] = res;
+    if ((ret = av_expr_parse_and_eval(&res, (expr = ctx->y_expr),
+                                      var_names, var_values,
+                                      NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0)
+        return ret;
+    ctx->y = var_values[VAR_Y] = res;
+    /* evaluate x again, as it may depend on the evaluated y value */
+    if ((ret = av_expr_parse_and_eval(&res, (expr = ctx->x_expr),
+                                      var_names, var_values,
+                                      NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0)
+        return ret;
+    ctx->x = var_values[VAR_X] = res;
+
+    if (ctx->x < 0 || ctx->x + inlink->w > ctx->w)
+        ctx->x = var_values[VAR_X] = (ctx->w - inlink->w) / 2;
+    if (ctx->y < 0 || ctx->y + inlink->h > ctx->h)
+        ctx->y = var_values[VAR_Y] = (ctx->h - inlink->h) / 2;
+
+    /* sanity check params */
+    if (ctx->w < inlink->w || ctx->h < inlink->h) {
+        av_log(ctx, AV_LOG_ERROR, "Padded dimensions cannot be smaller than input dimensions.\n");
+        return AVERROR(EINVAL);
+    }
+
+    if (ctx->w > avctx->inputs[0]->w) {
+        vpp_ctx->output_width  = ctx->w;
+    } else {
+        vpp_ctx->output_width  = avctx->inputs[0]->w;
+    }
+
+    if (ctx->h > avctx->inputs[0]->h) {
+        vpp_ctx->output_height = ctx->h;
+    } else {
+        vpp_ctx->output_height = avctx->inputs[0]->h;
+    }
+
+    if (ctx->x + avctx->inputs[0]->w > vpp_ctx->output_width ||
+        ctx->y + avctx->inputs[0]->h > vpp_ctx->output_height) {
+        return AVERROR(EINVAL);
+    }
+
+    err = ff_vaapi_vpp_config_output(outlink);
+    if (err < 0)
+        return err;
+
+    return 0;
+}
+
+static int pad_vaapi_filter_frame(AVFilterLink *link, AVFrame *input_frame)
+{
+    AVFilterContext *avctx = link->dst;
+    AVFilterLink *outlink = avctx->outputs[0];
+    VAAPIVPPContext *vpp_ctx = avctx->priv;
+    PadVAAPIContext *pad_ctx = avctx->priv;
+    AVFrame *output_frame = NULL;
+    VAProcPipelineParameterBuffer params;
+    int err;
+
+    if (!input_frame->hw_frames_ctx ||
+        vpp_ctx->va_context == VA_INVALID_ID) {
+        err = AVERROR(EINVAL);
+        goto fail;
+    }
+
+    output_frame = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!output_frame) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    err = av_frame_copy_props(output_frame, input_frame);
+    if (err < 0)
+        goto fail;
+
+    err = ff_vaapi_vpp_init_params(avctx, &params,
+                                   input_frame, output_frame);
+    if (err < 0)
+        goto fail;
+
+    pad_ctx->rect.x = pad_ctx->x;
+    pad_ctx->rect.y = pad_ctx->y;
+    pad_ctx->rect.width = link->w;
+    pad_ctx->rect.height = link->h;
+    params.output_region = &pad_ctx->rect;
+
+    params.output_background_color = (pad_ctx->pad_rgba[3] << 24 |
+                                      pad_ctx->pad_rgba[0] << 16 |
+                                      pad_ctx->pad_rgba[1] << 8 |
+                                      pad_ctx->pad_rgba[2]);
+
+    err = ff_vaapi_vpp_render_picture(avctx, &params, output_frame);
+    if (err < 0)
+        goto fail;
+
+    av_frame_free(&input_frame);
+
+    return ff_filter_frame(outlink, output_frame);
+
+fail:
+    av_frame_free(&input_frame);
+    av_frame_free(&output_frame);
+    return err;
+}
+
+static av_cold int pad_vaapi_init(AVFilterContext *avctx)
+{
+    VAAPIVPPContext *vpp_ctx = avctx->priv;
+
+    ff_vaapi_vpp_ctx_init(avctx);
+    vpp_ctx->pipeline_uninit = ff_vaapi_vpp_pipeline_uninit;
+    vpp_ctx->output_format = AV_PIX_FMT_NONE;
+
+    return 0;
+}
+
+#define OFFSET(x) offsetof(PadVAAPIContext, x)
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
+
+static const AVOption pad_vaapi_options[] = {
+    { "width",  "set the pad area width",       OFFSET(w_expr), AV_OPT_TYPE_STRING, {.str = "iw"}, 0, 0, FLAGS },
+    { "w",      "set the pad area width",       OFFSET(w_expr), AV_OPT_TYPE_STRING, {.str = "iw"}, 0, 0, FLAGS },
+    { "height", "set the pad area height",      OFFSET(h_expr), AV_OPT_TYPE_STRING, {.str = "ih"}, 0, 0, FLAGS },
+    { "h",      "set the pad area height",      OFFSET(h_expr), AV_OPT_TYPE_STRING, {.str = "ih"}, 0, 0, FLAGS },
+    { "x",      "set the x offset for the input image position", OFFSET(x_expr), AV_OPT_TYPE_STRING, {.str = "0"}, 0, INT16_MAX, FLAGS },
+    { "y",      "set the y offset for the input image position", OFFSET(y_expr), AV_OPT_TYPE_STRING, {.str = "0"}, 0, INT16_MAX, FLAGS },
+    { "color", "set the color of the padded area border", OFFSET(pad_rgba), AV_OPT_TYPE_COLOR, { .str = "black" }, 0, 0, FLAGS },
+    { "aspect",  "pad to fit an aspect instead of a resolution", OFFSET(aspect), AV_OPT_TYPE_RATIONAL, {.dbl = 0}, 0, INT16_MAX, FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(pad_vaapi);
+
+static const AVFilterPad pad_vaapi_inputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = pad_vaapi_filter_frame,
+        .config_props = &ff_vaapi_vpp_config_input,
+    },
+};
+
+static const AVFilterPad pad_vaapi_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+        .config_props = &pad_vaapi_config_output,
+    },
+};
+
+const AVFilter ff_vf_pad_vaapi = {
+    .name           = "pad_vaapi",
+    .description    = NULL_IF_CONFIG_SMALL("Pad the input video."),
+    .priv_size      = sizeof(PadVAAPIContext),
+    .priv_class     = &pad_vaapi_class,
+    .init           = &pad_vaapi_init,
+    .uninit         = &ff_vaapi_vpp_ctx_uninit,
+    FILTER_INPUTS(pad_vaapi_inputs),
+    FILTER_OUTPUTS(pad_vaapi_outputs),
+    FILTER_QUERY_FUNC(&ff_vaapi_vpp_query_formats),
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};
diff --git a/libavfilter/vf_scale.c b/libavfilter/vf_scale.c
index fc0e3802db..841075193e 100644
--- a/libavfilter/vf_scale.c
+++ b/libavfilter/vf_scale.c
@@ -29,6 +29,7 @@
 
 #include "avfilter.h"
 #include "formats.h"
+#include "framesync.h"
 #include "internal.h"
 #include "scale_eval.h"
 #include "video.h"
@@ -58,6 +59,17 @@ static const char *const var_names[] = {
 #if FF_API_FRAME_PKT
     "pos",
 #endif
+    "ref_w", "rw",
+    "ref_h", "rh",
+    "ref_a",
+    "ref_sar",
+    "ref_dar", "rdar",
+    "ref_hsub",
+    "ref_vsub",
+    "ref_n",
+    "ref_t",
+    "ref_pos",
+    /* Legacy variables for scale2ref */
     "main_w",
     "main_h",
     "main_a",
@@ -88,6 +100,16 @@ enum var_name {
 #if FF_API_FRAME_PKT
     VAR_POS,
 #endif
+    VAR_REF_W, VAR_RW,
+    VAR_REF_H, VAR_RH,
+    VAR_REF_A,
+    VAR_REF_SAR,
+    VAR_REF_DAR, VAR_RDAR,
+    VAR_REF_HSUB,
+    VAR_REF_VSUB,
+    VAR_REF_N,
+    VAR_REF_T,
+    VAR_REF_POS,
     VAR_S2R_MAIN_W,
     VAR_S2R_MAIN_H,
     VAR_S2R_MAIN_A,
@@ -113,6 +135,7 @@ typedef struct ScaleContext {
     struct SwsContext *isws[2]; ///< software scaler context for interlaced material
     // context used for forwarding options to sws
     struct SwsContext *sws_opts;
+    FFFrameSync fs;
 
     /**
      * New dimensions. Special values are:
@@ -129,6 +152,7 @@ typedef struct ScaleContext {
     int input_is_pal;           ///< set to 1 if the input format is paletted
     int output_is_pal;          ///< set to 1 if the output format is paletted
     int interlaced;
+    int uses_ref;
 
     char *w_expr;               ///< width  expression string
     char *h_expr;               ///< height expression string
@@ -188,6 +212,38 @@ static int check_exprs(AVFilterContext *ctx)
         av_log(ctx, AV_LOG_WARNING, "Circular references detected for width '%s' and height '%s' - possibly invalid.\n", scale->w_expr, scale->h_expr);
     }
 
+    if (vars_w[VAR_REF_W]    || vars_h[VAR_REF_W]    ||
+        vars_w[VAR_RW]       || vars_h[VAR_RW]       ||
+        vars_w[VAR_REF_H]    || vars_h[VAR_REF_H]    ||
+        vars_w[VAR_RH]       || vars_h[VAR_RH]       ||
+        vars_w[VAR_REF_A]    || vars_h[VAR_REF_A]    ||
+        vars_w[VAR_REF_SAR]  || vars_h[VAR_REF_SAR]  ||
+        vars_w[VAR_REF_DAR]  || vars_h[VAR_REF_DAR]  ||
+        vars_w[VAR_RDAR]     || vars_h[VAR_RDAR]     ||
+        vars_w[VAR_REF_HSUB] || vars_h[VAR_REF_HSUB] ||
+        vars_w[VAR_REF_VSUB] || vars_h[VAR_REF_VSUB] ||
+        vars_w[VAR_REF_N]    || vars_h[VAR_REF_N]    ||
+        vars_w[VAR_REF_T]    || vars_h[VAR_REF_T]    ||
+        vars_w[VAR_REF_POS]  || vars_h[VAR_REF_POS]) {
+        scale->uses_ref = 1;
+    }
+
+    if (ctx->filter != &ff_vf_scale2ref &&
+        (vars_w[VAR_S2R_MAIN_W]    || vars_h[VAR_S2R_MAIN_W]    ||
+         vars_w[VAR_S2R_MAIN_H]    || vars_h[VAR_S2R_MAIN_H]    ||
+         vars_w[VAR_S2R_MAIN_A]    || vars_h[VAR_S2R_MAIN_A]    ||
+         vars_w[VAR_S2R_MAIN_SAR]  || vars_h[VAR_S2R_MAIN_SAR]  ||
+         vars_w[VAR_S2R_MAIN_DAR]  || vars_h[VAR_S2R_MAIN_DAR]  ||
+         vars_w[VAR_S2R_MDAR]      || vars_h[VAR_S2R_MDAR]      ||
+         vars_w[VAR_S2R_MAIN_HSUB] || vars_h[VAR_S2R_MAIN_HSUB] ||
+         vars_w[VAR_S2R_MAIN_VSUB] || vars_h[VAR_S2R_MAIN_VSUB] ||
+         vars_w[VAR_S2R_MAIN_N]    || vars_h[VAR_S2R_MAIN_N]    ||
+         vars_w[VAR_S2R_MAIN_T]    || vars_h[VAR_S2R_MAIN_T]    ||
+         vars_w[VAR_S2R_MAIN_POS]  || vars_h[VAR_S2R_MAIN_POS]) ) {
+        av_log(ctx, AV_LOG_ERROR, "Expressions with scale2ref variables are not valid in scale filter.\n");
+        return AVERROR(EINVAL);
+    }
+
     if (ctx->filter != &ff_vf_scale2ref &&
         (vars_w[VAR_S2R_MAIN_W]    || vars_h[VAR_S2R_MAIN_W]    ||
          vars_w[VAR_S2R_MAIN_H]    || vars_h[VAR_S2R_MAIN_H]    ||
@@ -287,6 +343,8 @@ static av_cold int preinit(AVFilterContext *ctx)
     if (ret < 0)
         return ret;
 
+    ff_framesync_preinit(&scale->fs);
+
     return 0;
 }
 
@@ -302,12 +360,17 @@ static const int sws_colorspaces[] = {
     -1
 };
 
+static int do_scale(FFFrameSync *fs);
+
 static av_cold int init(AVFilterContext *ctx)
 {
     ScaleContext *scale = ctx->priv;
     int64_t threads;
     int ret;
 
+    if (ctx->filter == &ff_vf_scale2ref)
+        av_log(ctx, AV_LOG_WARNING, "scale2ref is deprecated, use scale=rw:rh instead\n");
+
     if (scale->size_str && (scale->w_expr || scale->h_expr)) {
         av_log(ctx, AV_LOG_ERROR,
                "Size and width/height expressions cannot be set at the same time.\n");
@@ -379,6 +442,16 @@ static av_cold int init(AVFilterContext *ctx)
     if (!threads)
         av_opt_set_int(scale->sws_opts, "threads", ff_filter_get_nb_threads(ctx), 0);
 
+    if (ctx->filter != &ff_vf_scale2ref && scale->uses_ref) {
+        AVFilterPad pad = {
+            .name = "ref",
+            .type = AVMEDIA_TYPE_VIDEO,
+        };
+        ret = ff_append_inpad(ctx, &pad);
+        if (ret < 0)
+            return ret;
+    }
+
     return 0;
 }
 
@@ -388,6 +461,7 @@ static av_cold void uninit(AVFilterContext *ctx)
     av_expr_free(scale->w_pexpr);
     av_expr_free(scale->h_pexpr);
     scale->w_pexpr = scale->h_pexpr = NULL;
+    ff_framesync_uninit(&scale->fs);
     sws_freeContext(scale->sws_opts);
     sws_freeContext(scale->sws);
     sws_freeContext(scale->isws[0]);
@@ -499,6 +573,20 @@ static int scale_eval_dimensions(AVFilterContext *ctx)
         scale->var_values[VAR_S2R_MAIN_VSUB] = 1 << main_desc->log2_chroma_h;
     }
 
+    if (scale->uses_ref) {
+        const AVFilterLink *reflink = ctx->inputs[1];
+        const AVPixFmtDescriptor *ref_desc = av_pix_fmt_desc_get(reflink->format);
+        scale->var_values[VAR_REF_W] = scale->var_values[VAR_RW] = reflink->w;
+        scale->var_values[VAR_REF_H] = scale->var_values[VAR_RH] = reflink->h;
+        scale->var_values[VAR_REF_A] = (double) reflink->w / reflink->h;
+        scale->var_values[VAR_REF_SAR] = reflink->sample_aspect_ratio.num ?
+            (double) reflink->sample_aspect_ratio.num / reflink->sample_aspect_ratio.den : 1;
+        scale->var_values[VAR_REF_DAR] = scale->var_values[VAR_RDAR] =
+            scale->var_values[VAR_REF_A] * scale->var_values[VAR_REF_SAR];
+        scale->var_values[VAR_REF_HSUB] = 1 << ref_desc->log2_chroma_w;
+        scale->var_values[VAR_REF_VSUB] = 1 << ref_desc->log2_chroma_h;
+    }
+
     res = av_expr_eval(scale->w_pexpr, scale->var_values, NULL);
     eval_w = scale->var_values[VAR_OUT_W] = scale->var_values[VAR_OW] = (int) res == 0 ? inlink->w : (int) res;
 
@@ -677,6 +765,29 @@ static int config_props(AVFilterLink *outlink)
            flags_val);
     av_freep(&flags_val);
 
+    if (ctx->filter != &ff_vf_scale2ref) {
+        ff_framesync_uninit(&scale->fs);
+        ret = ff_framesync_init(&scale->fs, ctx, ctx->nb_inputs);
+        if (ret < 0)
+            return ret;
+        scale->fs.on_event        = do_scale;
+        scale->fs.in[0].time_base = ctx->inputs[0]->time_base;
+        scale->fs.in[0].sync      = 1;
+        scale->fs.in[0].before    = EXT_STOP;
+        scale->fs.in[0].after     = EXT_STOP;
+        if (scale->uses_ref) {
+            av_assert0(ctx->nb_inputs == 2);
+            scale->fs.in[1].time_base = ctx->inputs[1]->time_base;
+            scale->fs.in[1].sync      = 0;
+            scale->fs.in[1].before    = EXT_NULL;
+            scale->fs.in[1].after     = EXT_INFINITY;
+        }
+
+        ret = ff_framesync_configure(&scale->fs);
+        if (ret < 0)
+            return ret;
+    }
+
     return 0;
 
 fail:
@@ -894,6 +1005,71 @@ scale:
     return ret;
 }
 
+static int do_scale(FFFrameSync *fs)
+{
+    AVFilterContext *ctx = fs->parent;
+    ScaleContext *scale = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
+    AVFrame *out, *in = NULL, *ref = NULL;
+    int ret = 0, frame_changed;
+
+    ret = ff_framesync_get_frame(fs, 0, &in, 1);
+    if (ret < 0)
+        goto err;
+
+    if (scale->uses_ref) {
+        ret = ff_framesync_get_frame(fs, 1, &ref, 0);
+        if (ret < 0)
+            goto err;
+    }
+
+    if (ref) {
+        AVFilterLink *reflink = ctx->inputs[1];
+        frame_changed = ref->width  != reflink->w ||
+                        ref->height != reflink->h ||
+                        ref->format != reflink->format ||
+                        ref->sample_aspect_ratio.den != reflink->sample_aspect_ratio.den ||
+                        ref->sample_aspect_ratio.num != reflink->sample_aspect_ratio.num ||
+                        ref->colorspace != reflink->colorspace ||
+                        ref->color_range != reflink->color_range;
+
+        if (frame_changed) {
+            reflink->format = ref->format;
+            reflink->w = ref->width;
+            reflink->h = ref->height;
+            reflink->sample_aspect_ratio.num = ref->sample_aspect_ratio.num;
+            reflink->sample_aspect_ratio.den = ref->sample_aspect_ratio.den;
+            reflink->colorspace = ref->colorspace;
+            reflink->color_range = ref->color_range;
+
+            ret = config_props(outlink);
+            if (ret < 0)
+                goto err;
+        }
+
+        if (scale->eval_mode == EVAL_MODE_FRAME) {
+            scale->var_values[VAR_REF_N] = reflink->frame_count_out;
+            scale->var_values[VAR_REF_T] = TS2T(ref->pts, reflink->time_base);
+#if FF_API_FRAME_PKT
+FF_DISABLE_DEPRECATION_WARNINGS
+            scale->var_values[VAR_REF_POS] = ref->pkt_pos == -1 ? NAN : ref->pkt_pos;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+        }
+    }
+
+    ret = scale_frame(ctx->inputs[0], in, &out);
+    if (out) {
+        out->pts = av_rescale_q(fs->pts, fs->time_base, outlink->time_base);
+        return ff_filter_frame(outlink, out);
+    }
+
+err:
+    if (ret < 0)
+        av_frame_free(&in);
+    return ret;
+}
+
 static int filter_frame(AVFilterLink *link, AVFrame *in)
 {
     AVFilterContext *ctx = link->dst;
@@ -972,11 +1148,24 @@ static int process_command(AVFilterContext *ctx, const char *cmd, const char *ar
     return ret;
 }
 
+static int activate(AVFilterContext *ctx)
+{
+    ScaleContext *scale = ctx->priv;
+    return ff_framesync_activate(&scale->fs);
+}
+
 static const AVClass *child_class_iterate(void **iter)
 {
-    const AVClass *c = *iter ? NULL : sws_get_class();
-    *iter = (void*)(uintptr_t)c;
-    return c;
+    switch ((uintptr_t) *iter) {
+    case 0:
+        *iter = (void*)(uintptr_t) 1;
+        return sws_get_class();
+    case 1:
+        *iter = (void*)(uintptr_t) 2;
+        return &ff_framesync_class;
+    }
+
+    return NULL;
 }
 
 static void *child_next(void *obj, void *prev)
@@ -984,6 +1173,8 @@ static void *child_next(void *obj, void *prev)
     ScaleContext *s = obj;
     if (!prev)
         return s->sws_opts;
+    if (prev == s->sws_opts)
+        return &s->fs;
     return NULL;
 }
 
@@ -998,8 +1189,8 @@ static const AVOption scale_options[] = {
     { "height","Output video height",         OFFSET(h_expr),    AV_OPT_TYPE_STRING,        .flags = TFLAGS },
     { "flags", "Flags to pass to libswscale", OFFSET(flags_str), AV_OPT_TYPE_STRING, { .str = "" }, .flags = FLAGS },
     { "interl", "set interlacing", OFFSET(interlaced), AV_OPT_TYPE_BOOL, {.i64 = 0 }, -1, 1, FLAGS },
-    { "size",   "set video size",          OFFSET(size_str), AV_OPT_TYPE_STRING, {.str = NULL}, 0, FLAGS },
-    { "s",      "set video size",          OFFSET(size_str), AV_OPT_TYPE_STRING, {.str = NULL}, 0, FLAGS },
+    { "size",   "set video size",          OFFSET(size_str), AV_OPT_TYPE_STRING, {.str = NULL}, 0, .flags = FLAGS },
+    { "s",      "set video size",          OFFSET(size_str), AV_OPT_TYPE_STRING, {.str = NULL}, 0, .flags = FLAGS },
     {  "in_color_matrix", "set input YCbCr type",   OFFSET(in_color_matrix),  AV_OPT_TYPE_INT, { .i64 = -1 }, -1, AVCOL_SPC_NB-1, .flags = FLAGS, .unit = "color" },
     { "out_color_matrix", "set output YCbCr type",  OFFSET(out_color_matrix), AV_OPT_TYPE_INT, { .i64 = AVCOL_SPC_UNSPECIFIED }, 0, AVCOL_SPC_NB-1, .flags = FLAGS, .unit = "color"},
         { "auto",        NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },                     0, 0, FLAGS, .unit = "color" },
@@ -1038,7 +1229,7 @@ static const AVOption scale_options[] = {
 };
 
 static const AVClass scale_class = {
-    .class_name       = "scale(2ref)",
+    .class_name       = "scale",
     .item_name        = av_default_item_name,
     .option           = scale_options,
     .version          = LIBAVUTIL_VERSION_INT,
@@ -1051,7 +1242,6 @@ static const AVFilterPad avfilter_vf_scale_inputs[] = {
     {
         .name         = "default",
         .type         = AVMEDIA_TYPE_VIDEO,
-        .filter_frame = filter_frame,
     },
 };
 
@@ -1074,7 +1264,34 @@ const AVFilter ff_vf_scale = {
     FILTER_INPUTS(avfilter_vf_scale_inputs),
     FILTER_OUTPUTS(avfilter_vf_scale_outputs),
     FILTER_QUERY_FUNC(query_formats),
+    .activate        = activate,
     .process_command = process_command,
+    .flags           = AVFILTER_FLAG_DYNAMIC_INPUTS,
+};
+
+static const AVClass *scale2ref_child_class_iterate(void **iter)
+{
+    const AVClass *c = *iter ? NULL : sws_get_class();
+    *iter = (void*)(uintptr_t)c;
+    return c;
+}
+
+static void *scale2ref_child_next(void *obj, void *prev)
+{
+    ScaleContext *s = obj;
+    if (!prev)
+        return s->sws_opts;
+    return NULL;
+}
+
+static const AVClass scale2ref_class = {
+    .class_name       = "scale(2ref)",
+    .item_name        = av_default_item_name,
+    .option           = scale_options,
+    .version          = LIBAVUTIL_VERSION_INT,
+    .category         = AV_CLASS_CATEGORY_FILTER,
+    .child_class_iterate = scale2ref_child_class_iterate,
+    .child_next          = scale2ref_child_next,
 };
 
 static const AVFilterPad avfilter_vf_scale2ref_inputs[] = {
@@ -1112,7 +1329,7 @@ const AVFilter ff_vf_scale2ref = {
     .init            = init,
     .uninit          = uninit,
     .priv_size       = sizeof(ScaleContext),
-    .priv_class      = &scale_class,
+    .priv_class      = &scale2ref_class,
     FILTER_INPUTS(avfilter_vf_scale2ref_inputs),
     FILTER_OUTPUTS(avfilter_vf_scale2ref_outputs),
     FILTER_QUERY_FUNC(query_formats),
diff --git a/libavfilter/vf_signalstats.c b/libavfilter/vf_signalstats.c
index c2358c66cb..50c9df4298 100644
--- a/libavfilter/vf_signalstats.c
+++ b/libavfilter/vf_signalstats.c
@@ -538,13 +538,12 @@ static unsigned compute_bit_depth(uint16_t mask)
     return av_popcount(mask);
 }
 
-static int filter_frame8(AVFilterLink *link, AVFrame *in)
+static int filter_frame(AVFilterLink *link, AVFrame *in)
 {
     AVFilterContext *ctx = link->dst;
     SignalstatsContext *s = ctx->priv;
     AVFilterLink *outlink = ctx->outputs[0];
     AVFrame *out = in;
-    int i, j;
     int  w = 0,  cw = 0, // in
         pw = 0, cpw = 0; // prev
     int fil;
@@ -563,20 +562,17 @@ static int filter_frame8(AVFilterLink *link, AVFrame *in)
     int accy, accu, accv;
     int accsat, acchue = 0;
     int medhue, maxhue;
-    int toty = 0, totu = 0, totv = 0, totsat=0;
-    int tothue = 0;
-    int dify = 0, difu = 0, difv = 0;
+    int64_t toty = 0, totu = 0, totv = 0, totsat=0;
+    int64_t tothue = 0;
+    int64_t dify = 0, difu = 0, difv = 0;
     uint16_t masky = 0, masku = 0, maskv = 0;
-    int ret;
+
     int filtot[FILT_NUMB] = {0};
     AVFrame *prev;
-
+    int ret;
     AVFrame *sat = s->frame_sat;
     AVFrame *hue = s->frame_hue;
-    const uint8_t *p_sat = sat->data[0];
-    const uint8_t *p_hue = hue->data[0];
-    const int lsz_sat = sat->linesize[0];
-    const int lsz_hue = hue->linesize[0];
+    const int hbd = s->depth > 8;
     ThreadDataHueSatMetrics td_huesat = {
         .src     = in,
         .dst_sat = sat,
@@ -602,277 +598,93 @@ static int filter_frame8(AVFilterLink *link, AVFrame *in)
         }
     }
 
-    ff_filter_execute(ctx, compute_sat_hue_metrics8, &td_huesat,
+    ff_filter_execute(ctx, hbd ? compute_sat_hue_metrics16
+                               : compute_sat_hue_metrics8, &td_huesat,
                       NULL, FFMIN(s->chromah, ff_filter_get_nb_threads(ctx)));
 
-    // Calculate luma histogram and difference with previous frame or field.
     memset(s->histy, 0, s->maxsize * sizeof(*s->histy));
-    for (j = 0; j < link->h; j++) {
-        for (i = 0; i < link->w; i++) {
-            const int yuv = in->data[0][w + i];
-
-            masky |= yuv;
-            histy[yuv]++;
-            dify += abs(yuv - prev->data[0][pw + i]);
-        }
-        w  += in->linesize[0];
-        pw += prev->linesize[0];
-    }
-
-    // Calculate chroma histogram and difference with previous frame or field.
     memset(s->histu, 0, s->maxsize * sizeof(*s->histu));
     memset(s->histv, 0, s->maxsize * sizeof(*s->histv));
     memset(s->histsat, 0, s->maxsize * sizeof(*s->histsat));
-    for (j = 0; j < s->chromah; j++) {
-        for (i = 0; i < s->chromaw; i++) {
-            const int yuvu = in->data[1][cw+i];
-            const int yuvv = in->data[2][cw+i];
-
-            masku |= yuvu;
-            maskv |= yuvv;
-            histu[yuvu]++;
-            difu += abs(yuvu - prev->data[1][cpw+i]);
-            histv[yuvv]++;
-            difv += abs(yuvv - prev->data[2][cpw+i]);
-
-            histsat[p_sat[i]]++;
-            histhue[((int16_t*)p_hue)[i]]++;
-        }
-        cw  += in->linesize[1];
-        cpw += prev->linesize[1];
-        p_sat += lsz_sat;
-        p_hue += lsz_hue;
-    }
-
-    for (fil = 0; fil < FILT_NUMB; fil ++) {
-        if (s->filters & 1<<fil) {
-            ThreadData td = {
-                .in = in,
-                .out = out != in && s->outfilter == fil ? out : NULL,
-            };
-            memset(s->jobs_rets, 0, s->nb_jobs * sizeof(*s->jobs_rets));
-            ff_filter_execute(ctx, filters_def[fil].process8,
-                              &td, s->jobs_rets, s->nb_jobs);
-            for (i = 0; i < s->nb_jobs; i++)
-                filtot[fil] += s->jobs_rets[i];
-        }
-    }
-
-    // find low / high based on histogram percentile
-    // these only need to be calculated once.
-
-    lowp   = lrint(s->fs  * 10 / 100.);
-    highp  = lrint(s->fs  * 90 / 100.);
-    clowp  = lrint(s->cfs * 10 / 100.);
-    chighp = lrint(s->cfs * 90 / 100.);
-
-    accy = accu = accv = accsat = 0;
-    for (fil = 0; fil < s->maxsize; fil++) {
-        if (miny   < 0 && histy[fil])   miny = fil;
-        if (minu   < 0 && histu[fil])   minu = fil;
-        if (minv   < 0 && histv[fil])   minv = fil;
-        if (minsat < 0 && histsat[fil]) minsat = fil;
-
-        if (histy[fil])   maxy   = fil;
-        if (histu[fil])   maxu   = fil;
-        if (histv[fil])   maxv   = fil;
-        if (histsat[fil]) maxsat = fil;
-
-        toty   += histy[fil]   * fil;
-        totu   += histu[fil]   * fil;
-        totv   += histv[fil]   * fil;
-        totsat += histsat[fil] * fil;
-
-        accy   += histy[fil];
-        accu   += histu[fil];
-        accv   += histv[fil];
-        accsat += histsat[fil];
-
-        if (lowy   == -1 && accy   >=  lowp) lowy   = fil;
-        if (lowu   == -1 && accu   >= clowp) lowu   = fil;
-        if (lowv   == -1 && accv   >= clowp) lowv   = fil;
-        if (lowsat == -1 && accsat >= clowp) lowsat = fil;
-
-        if (highy   == -1 && accy   >=  highp) highy   = fil;
-        if (highu   == -1 && accu   >= chighp) highu   = fil;
-        if (highv   == -1 && accv   >= chighp) highv   = fil;
-        if (highsat == -1 && accsat >= chighp) highsat = fil;
-    }
-
-    maxhue = histhue[0];
-    medhue = -1;
-    for (fil = 0; fil < 360; fil++) {
-        tothue += histhue[fil] * fil;
-        acchue += histhue[fil];
-
-        if (medhue == -1 && acchue > s->cfs / 2)
-            medhue = fil;
-        if (histhue[fil] > maxhue) {
-            maxhue = histhue[fil];
-        }
-    }
-
-    av_frame_free(&s->frame_prev);
-    s->frame_prev = av_frame_clone(in);
-
-#define SET_META(key, fmt, val) do {                                \
-    snprintf(metabuf, sizeof(metabuf), fmt, val);                   \
-    av_dict_set(&out->metadata, "lavfi.signalstats." key, metabuf, 0);   \
-} while (0)
-
-    SET_META("YMIN",    "%d", miny);
-    SET_META("YLOW",    "%d", lowy);
-    SET_META("YAVG",    "%g", 1.0 * toty / s->fs);
-    SET_META("YHIGH",   "%d", highy);
-    SET_META("YMAX",    "%d", maxy);
-
-    SET_META("UMIN",    "%d", minu);
-    SET_META("ULOW",    "%d", lowu);
-    SET_META("UAVG",    "%g", 1.0 * totu / s->cfs);
-    SET_META("UHIGH",   "%d", highu);
-    SET_META("UMAX",    "%d", maxu);
-
-    SET_META("VMIN",    "%d", minv);
-    SET_META("VLOW",    "%d", lowv);
-    SET_META("VAVG",    "%g", 1.0 * totv / s->cfs);
-    SET_META("VHIGH",   "%d", highv);
-    SET_META("VMAX",    "%d", maxv);
-
-    SET_META("SATMIN",  "%d", minsat);
-    SET_META("SATLOW",  "%d", lowsat);
-    SET_META("SATAVG",  "%g", 1.0 * totsat / s->cfs);
-    SET_META("SATHIGH", "%d", highsat);
-    SET_META("SATMAX",  "%d", maxsat);
-
-    SET_META("HUEMED",  "%d", medhue);
-    SET_META("HUEAVG",  "%g", 1.0 * tothue / s->cfs);
 
-    SET_META("YDIF",    "%g", 1.0 * dify / s->fs);
-    SET_META("UDIF",    "%g", 1.0 * difu / s->cfs);
-    SET_META("VDIF",    "%g", 1.0 * difv / s->cfs);
-
-    SET_META("YBITDEPTH", "%d", compute_bit_depth(masky));
-    SET_META("UBITDEPTH", "%d", compute_bit_depth(masku));
-    SET_META("VBITDEPTH", "%d", compute_bit_depth(maskv));
-
-    for (fil = 0; fil < FILT_NUMB; fil ++) {
-        if (s->filters & 1<<fil) {
-            char metaname[128];
-            snprintf(metabuf,  sizeof(metabuf),  "%g", 1.0 * filtot[fil] / s->fs);
-            snprintf(metaname, sizeof(metaname), "lavfi.signalstats.%s", filters_def[fil].name);
-            av_dict_set(&out->metadata, metaname, metabuf, 0);
+    if (hbd) {
+        const uint16_t *p_sat = (uint16_t *)sat->data[0];
+        const uint16_t *p_hue = (uint16_t *)hue->data[0];
+        const int lsz_sat = sat->linesize[0] / 2;
+        const int lsz_hue = hue->linesize[0] / 2;
+        // Calculate luma histogram and difference with previous frame or field.
+        for (int j = 0; j < link->h; j++) {
+            for (int i = 0; i < link->w; i++) {
+                const int yuv = AV_RN16(in->data[0] + w + i * 2);
+
+                masky |= yuv;
+                histy[yuv]++;
+                dify += abs(yuv - (int)AV_RN16(prev->data[0] + pw + i * 2));
+            }
+            w  += in->linesize[0];
+            pw += prev->linesize[0];
         }
-    }
-
-    if (in != out)
-        av_frame_free(&in);
-    return ff_filter_frame(outlink, out);
-}
-
-static int filter_frame16(AVFilterLink *link, AVFrame *in)
-{
-    AVFilterContext *ctx = link->dst;
-    SignalstatsContext *s = ctx->priv;
-    AVFilterLink *outlink = ctx->outputs[0];
-    AVFrame *out = in;
-    int i, j;
-    int  w = 0,  cw = 0, // in
-        pw = 0, cpw = 0; // prev
-    int fil;
-    char metabuf[128];
-    unsigned int *histy = s->histy,
-                 *histu = s->histu,
-                 *histv = s->histv,
-                 histhue[360] = {0},
-                 *histsat = s->histsat;
-    int miny  = -1, minu  = -1, minv  = -1;
-    int maxy  = -1, maxu  = -1, maxv  = -1;
-    int lowy  = -1, lowu  = -1, lowv  = -1;
-    int highy = -1, highu = -1, highv = -1;
-    int minsat = -1, maxsat = -1, lowsat = -1, highsat = -1;
-    int lowp, highp, clowp, chighp;
-    int accy, accu, accv;
-    int accsat, acchue = 0;
-    int medhue, maxhue;
-    int64_t toty = 0, totu = 0, totv = 0, totsat=0;
-    int64_t tothue = 0;
-    int64_t dify = 0, difu = 0, difv = 0;
-    uint16_t masky = 0, masku = 0, maskv = 0;
-
-    int filtot[FILT_NUMB] = {0};
-    AVFrame *prev;
-    int ret;
-    AVFrame *sat = s->frame_sat;
-    AVFrame *hue = s->frame_hue;
-    const uint16_t *p_sat = (uint16_t *)sat->data[0];
-    const uint16_t *p_hue = (uint16_t *)hue->data[0];
-    const int lsz_sat = sat->linesize[0] / 2;
-    const int lsz_hue = hue->linesize[0] / 2;
-    ThreadDataHueSatMetrics td_huesat = {
-        .src     = in,
-        .dst_sat = sat,
-        .dst_hue = hue,
-    };
 
-    if (!s->frame_prev)
-        s->frame_prev = av_frame_clone(in);
-
-    prev = s->frame_prev;
-
-    if (s->outfilter != FILTER_NONE) {
-        out = av_frame_clone(in);
-        if (!out) {
-            av_frame_free(&in);
-            return AVERROR(ENOMEM);
-        }
-        ret = ff_inlink_make_frame_writable(link, &out);
-        if (ret < 0) {
-            av_frame_free(&out);
-            av_frame_free(&in);
-            return ret;
+        // Calculate chroma histogram and difference with previous frame or field.
+        for (int j = 0; j < s->chromah; j++) {
+            for (int i = 0; i < s->chromaw; i++) {
+                const int yuvu = AV_RN16(in->data[1] + cw + i * 2);
+                const int yuvv = AV_RN16(in->data[2] + cw + i * 2);
+
+                masku |= yuvu;
+                maskv |= yuvv;
+                histu[yuvu]++;
+                difu += abs(yuvu - (int)AV_RN16(prev->data[1] + cpw + i * 2));
+                histv[yuvv]++;
+                difv += abs(yuvv - (int)AV_RN16(prev->data[2] + cpw + i * 2));
+
+                histsat[p_sat[i]]++;
+                histhue[((int16_t*)p_hue)[i]]++;
+            }
+            cw  += in->linesize[1];
+            cpw += prev->linesize[1];
+            p_sat += lsz_sat;
+            p_hue += lsz_hue;
         }
-    }
-
-    ff_filter_execute(ctx, compute_sat_hue_metrics16, &td_huesat,
-                      NULL, FFMIN(s->chromah, ff_filter_get_nb_threads(ctx)));
-
-    // Calculate luma histogram and difference with previous frame or field.
-    memset(s->histy, 0, s->maxsize * sizeof(*s->histy));
-    for (j = 0; j < link->h; j++) {
-        for (i = 0; i < link->w; i++) {
-            const int yuv = AV_RN16(in->data[0] + w + i * 2);
-
-            masky |= yuv;
-            histy[yuv]++;
-            dify += abs(yuv - (int)AV_RN16(prev->data[0] + pw + i * 2));
+    } else {
+        const uint8_t *p_sat = sat->data[0];
+        const uint8_t *p_hue = hue->data[0];
+        const int lsz_sat = sat->linesize[0];
+        const int lsz_hue = hue->linesize[0];
+        // Calculate luma histogram and difference with previous frame or field.
+        for (int j = 0; j < link->h; j++) {
+            for (int i = 0; i < link->w; i++) {
+                const int yuv = in->data[0][w + i];
+
+                masky |= yuv;
+                histy[yuv]++;
+                dify += abs(yuv - prev->data[0][pw + i]);
+            }
+            w  += in->linesize[0];
+            pw += prev->linesize[0];
         }
-        w  += in->linesize[0];
-        pw += prev->linesize[0];
-    }
 
-    // Calculate chroma histogram and difference with previous frame or field.
-    memset(s->histu, 0, s->maxsize * sizeof(*s->histu));
-    memset(s->histv, 0, s->maxsize * sizeof(*s->histv));
-    memset(s->histsat, 0, s->maxsize * sizeof(*s->histsat));
-    for (j = 0; j < s->chromah; j++) {
-        for (i = 0; i < s->chromaw; i++) {
-            const int yuvu = AV_RN16(in->data[1] + cw + i * 2);
-            const int yuvv = AV_RN16(in->data[2] + cw + i * 2);
-
-            masku |= yuvu;
-            maskv |= yuvv;
-            histu[yuvu]++;
-            difu += abs(yuvu - (int)AV_RN16(prev->data[1] + cpw + i * 2));
-            histv[yuvv]++;
-            difv += abs(yuvv - (int)AV_RN16(prev->data[2] + cpw + i * 2));
-
-            histsat[p_sat[i]]++;
-            histhue[((int16_t*)p_hue)[i]]++;
+        // Calculate chroma histogram and difference with previous frame or field.
+        for (int j = 0; j < s->chromah; j++) {
+            for (int i = 0; i < s->chromaw; i++) {
+                const int yuvu = in->data[1][cw+i];
+                const int yuvv = in->data[2][cw+i];
+
+                masku |= yuvu;
+                maskv |= yuvv;
+                histu[yuvu]++;
+                difu += abs(yuvu - prev->data[1][cpw+i]);
+                histv[yuvv]++;
+                difv += abs(yuvv - prev->data[2][cpw+i]);
+
+                histsat[p_sat[i]]++;
+                histhue[((int16_t*)p_hue)[i]]++;
+            }
+            cw  += in->linesize[1];
+            cpw += prev->linesize[1];
+            p_sat += lsz_sat;
+            p_hue += lsz_hue;
         }
-        cw  += in->linesize[1];
-        cpw += prev->linesize[1];
-        p_sat += lsz_sat;
-        p_hue += lsz_hue;
     }
 
     for (fil = 0; fil < FILT_NUMB; fil ++) {
@@ -882,9 +694,9 @@ static int filter_frame16(AVFilterLink *link, AVFrame *in)
                 .out = out != in && s->outfilter == fil ? out : NULL,
             };
             memset(s->jobs_rets, 0, s->nb_jobs * sizeof(*s->jobs_rets));
-            ff_filter_execute(ctx, filters_def[fil].process16,
+            ff_filter_execute(ctx, hbd ? filters_def[fil].process16 : filters_def[fil].process8,
                               &td, s->jobs_rets, s->nb_jobs);
-            for (i = 0; i < s->nb_jobs; i++)
+            for (int i = 0; i < s->nb_jobs; i++)
                 filtot[fil] += s->jobs_rets[i];
         }
     }
@@ -946,40 +758,45 @@ static int filter_frame16(AVFilterLink *link, AVFrame *in)
     av_frame_free(&s->frame_prev);
     s->frame_prev = av_frame_clone(in);
 
-    SET_META("YMIN",    "%d", miny);
-    SET_META("YLOW",    "%d", lowy);
+#define SET_META(key, fmt, val) do {                                \
+    snprintf(metabuf, sizeof(metabuf), fmt, val);                   \
+    av_dict_set(&out->metadata, "lavfi.signalstats." key, metabuf, 0);   \
+} while (0)
+
+    av_dict_set_int(&out->metadata, "lavfi.signalstats.YMIN", miny, 0);
+    av_dict_set_int(&out->metadata, "lavfi.signalstats.YLOW", lowy, 0);
     SET_META("YAVG",    "%g", 1.0 * toty / s->fs);
-    SET_META("YHIGH",   "%d", highy);
-    SET_META("YMAX",    "%d", maxy);
+    av_dict_set_int(&out->metadata, "lavfi.signalstats.YHIGH", highy, 0);
+    av_dict_set_int(&out->metadata, "lavfi.signalstats.YMAX", maxy, 0);
 
-    SET_META("UMIN",    "%d", minu);
-    SET_META("ULOW",    "%d", lowu);
+    av_dict_set_int(&out->metadata, "lavfi.signalstats.UMIN", minu, 0);
+    av_dict_set_int(&out->metadata, "lavfi.signalstats.ULOW", lowu, 0);
     SET_META("UAVG",    "%g", 1.0 * totu / s->cfs);
-    SET_META("UHIGH",   "%d", highu);
-    SET_META("UMAX",    "%d", maxu);
+    av_dict_set_int(&out->metadata, "lavfi.signalstats.UHIGH", highu, 0);
+    av_dict_set_int(&out->metadata, "lavfi.signalstats.UMAX", maxu, 0);
 
-    SET_META("VMIN",    "%d", minv);
-    SET_META("VLOW",    "%d", lowv);
+    av_dict_set_int(&out->metadata, "lavfi.signalstats.VMIN", minv, 0);
+    av_dict_set_int(&out->metadata, "lavfi.signalstats.VLOW", lowv, 0);
     SET_META("VAVG",    "%g", 1.0 * totv / s->cfs);
-    SET_META("VHIGH",   "%d", highv);
-    SET_META("VMAX",    "%d", maxv);
+    av_dict_set_int(&out->metadata, "lavfi.signalstats.VHIGH", highv, 0);
+    av_dict_set_int(&out->metadata, "lavfi.signalstats.VMAX", maxv, 0);
 
-    SET_META("SATMIN",  "%d", minsat);
-    SET_META("SATLOW",  "%d", lowsat);
+    av_dict_set_int(&out->metadata, "lavfi.signalstats.SATMIN", minsat, 0);
+    av_dict_set_int(&out->metadata, "lavfi.signalstats.SATLOW", lowsat, 0);
     SET_META("SATAVG",  "%g", 1.0 * totsat / s->cfs);
-    SET_META("SATHIGH", "%d", highsat);
-    SET_META("SATMAX",  "%d", maxsat);
+    av_dict_set_int(&out->metadata, "lavfi.signalstats.SATHIGH", highsat, 0);
+    av_dict_set_int(&out->metadata, "lavfi.signalstats.SATMAX", maxsat, 0);
 
-    SET_META("HUEMED",  "%d", medhue);
+    av_dict_set_int(&out->metadata, "lavfi.signalstats.HUEMED", medhue, 0);
     SET_META("HUEAVG",  "%g", 1.0 * tothue / s->cfs);
 
     SET_META("YDIF",    "%g", 1.0 * dify / s->fs);
     SET_META("UDIF",    "%g", 1.0 * difu / s->cfs);
     SET_META("VDIF",    "%g", 1.0 * difv / s->cfs);
 
-    SET_META("YBITDEPTH", "%d", compute_bit_depth(masky));
-    SET_META("UBITDEPTH", "%d", compute_bit_depth(masku));
-    SET_META("VBITDEPTH", "%d", compute_bit_depth(maskv));
+    av_dict_set_int(&out->metadata, "lavfi.signalstats.YBITDEPTH", compute_bit_depth(masky), 0);
+    av_dict_set_int(&out->metadata, "lavfi.signalstats.UBITDEPTH", compute_bit_depth(masku), 0);
+    av_dict_set_int(&out->metadata, "lavfi.signalstats.VBITDEPTH", compute_bit_depth(maskv), 0);
 
     for (fil = 0; fil < FILT_NUMB; fil ++) {
         if (s->filters & 1<<fil) {
@@ -995,17 +812,6 @@ static int filter_frame16(AVFilterLink *link, AVFrame *in)
     return ff_filter_frame(outlink, out);
 }
 
-static int filter_frame(AVFilterLink *link, AVFrame *in)
-{
-    AVFilterContext *ctx = link->dst;
-    SignalstatsContext *s = ctx->priv;
-
-    if (s->depth > 8)
-        return filter_frame16(link, in);
-    else
-        return filter_frame8(link, in);
-}
-
 static const AVFilterPad signalstats_inputs[] = {
     {
         .name           = "default",
diff --git a/libavfilter/vf_smartblur.c b/libavfilter/vf_smartblur.c
index ae0ec05b2d..dbbb74339d 100644
--- a/libavfilter/vf_smartblur.c
+++ b/libavfilter/vf_smartblur.c
@@ -54,6 +54,7 @@ typedef struct SmartblurContext {
     const AVClass *class;
     FilterParam  luma;
     FilterParam  chroma;
+    FilterParam  alpha;
     int          hsub;
     int          vsub;
     unsigned int sws_flags;
@@ -77,6 +78,13 @@ static const AVOption smartblur_options[] = {
     { "chroma_threshold", "set chroma threshold", OFFSET(chroma.threshold), AV_OPT_TYPE_INT,   {.i64=THRESHOLD_MIN-1}, THRESHOLD_MIN-1, THRESHOLD_MAX, .flags=FLAGS },
     { "ct",               "set chroma threshold", OFFSET(chroma.threshold), AV_OPT_TYPE_INT,   {.i64=THRESHOLD_MIN-1}, THRESHOLD_MIN-1, THRESHOLD_MAX, .flags=FLAGS },
 
+    { "alpha_radius",    "set alpha radius",    OFFSET(alpha.radius),    AV_OPT_TYPE_FLOAT, {.dbl=RADIUS_MIN-1}, RADIUS_MIN-1, RADIUS_MAX, .flags=FLAGS },
+    { "ar"         ,     "set alpha radius",    OFFSET(alpha.radius),    AV_OPT_TYPE_FLOAT, {.dbl=RADIUS_MIN-1}, RADIUS_MIN-1, RADIUS_MAX, .flags=FLAGS },
+    { "alpha_strength",  "set alpha strength",  OFFSET(alpha.strength),  AV_OPT_TYPE_FLOAT, {.dbl=STRENGTH_MIN-1}, STRENGTH_MIN-1, STRENGTH_MAX, .flags=FLAGS },
+    { "as",              "set alpha strength",  OFFSET(alpha.strength),  AV_OPT_TYPE_FLOAT, {.dbl=STRENGTH_MIN-1}, STRENGTH_MIN-1, STRENGTH_MAX, .flags=FLAGS },
+    { "alpha_threshold", "set alpha threshold", OFFSET(alpha.threshold), AV_OPT_TYPE_INT,   {.i64=THRESHOLD_MIN-1}, THRESHOLD_MIN-1, THRESHOLD_MAX, .flags=FLAGS },
+    { "at",              "set alpha threshold", OFFSET(alpha.threshold), AV_OPT_TYPE_INT,   {.i64=THRESHOLD_MIN-1}, THRESHOLD_MIN-1, THRESHOLD_MAX, .flags=FLAGS },
+
     { NULL }
 };
 
@@ -94,15 +102,24 @@ static av_cold int init(AVFilterContext *ctx)
     if (s->chroma.threshold < THRESHOLD_MIN)
         s->chroma.threshold = s->luma.threshold;
 
-    s->luma.quality = s->chroma.quality = 3.0;
+    /* make alpha default to luma values, if not explicitly set */
+    if (s->alpha.radius < RADIUS_MIN)
+        s->alpha.radius = s->luma.radius;
+    if (s->alpha.strength < STRENGTH_MIN)
+        s->alpha.strength  = s->luma.strength;
+    if (s->alpha.threshold < THRESHOLD_MIN)
+        s->alpha.threshold = s->luma.threshold;
+
+    s->luma.quality = s->chroma.quality = s->alpha.quality = 3.0;
     s->sws_flags = SWS_BICUBIC;
 
     av_log(ctx, AV_LOG_VERBOSE,
            "luma_radius:%f luma_strength:%f luma_threshold:%d "
-           "chroma_radius:%f chroma_strength:%f chroma_threshold:%d\n",
+           "chroma_radius:%f chroma_strength:%f chroma_threshold:%d "
+           "alpha_radius:%f alpha_strength:%f alpha_threshold:%d\n",
            s->luma.radius, s->luma.strength, s->luma.threshold,
-           s->chroma.radius, s->chroma.strength, s->chroma.threshold);
-
+           s->chroma.radius, s->chroma.strength, s->chroma.threshold,
+           s->alpha.radius, s->alpha.strength, s->alpha.threshold);
     return 0;
 }
 
@@ -112,13 +129,15 @@ static av_cold void uninit(AVFilterContext *ctx)
 
     sws_freeContext(s->luma.filter_context);
     sws_freeContext(s->chroma.filter_context);
+    sws_freeContext(s->alpha.filter_context);
 }
 
 static const enum AVPixelFormat pix_fmts[] = {
-    AV_PIX_FMT_YUV444P,      AV_PIX_FMT_YUV422P,
-    AV_PIX_FMT_YUV420P,      AV_PIX_FMT_YUV411P,
-    AV_PIX_FMT_YUV410P,      AV_PIX_FMT_YUV440P,
-    AV_PIX_FMT_GRAY8,
+    AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUVA444P,
+    AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUVA422P,
+    AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUVA420P,
+    AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV410P,
+    AV_PIX_FMT_YUV440P, AV_PIX_FMT_GRAY8,
     AV_PIX_FMT_NONE
 };
 
@@ -162,6 +181,7 @@ static int config_props(AVFilterLink *inlink)
                       AV_CEIL_RSHIFT(inlink->w, s->hsub),
                       AV_CEIL_RSHIFT(inlink->h, s->vsub),
                       s->sws_flags);
+    alloc_sws_context(&s->alpha, inlink->w, inlink->h, s->sws_flags);
 
     return 0;
 }
@@ -261,6 +281,13 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *inpic)
              s->chroma.filter_context);
     }
 
+    if (inpic->data[3]) {
+        blur(outpic->data[3], outpic->linesize[3],
+             inpic->data[3],  inpic->linesize[3],
+             inlink->w, inlink->h, s->alpha.threshold,
+             s->alpha.filter_context);
+    }
+
     av_frame_free(&inpic);
     return ff_filter_frame(outlink, outpic);
 }
diff --git a/libavfilter/vf_sr.c b/libavfilter/vf_sr.c
index 60683b5209..f14c0c0cd3 100644
--- a/libavfilter/vf_sr.c
+++ b/libavfilter/vf_sr.c
@@ -50,13 +50,10 @@ static const AVOption sr_options[] = {
     { "tensorflow", "tensorflow backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, .unit = "backend" },
 #endif
     { "scale_factor", "scale factor for SRCNN model", OFFSET(scale_factor), AV_OPT_TYPE_INT, { .i64 = 2 }, 2, 4, FLAGS },
-    { "model", "path to model file specifying network architecture and its parameters", OFFSET(dnnctx.model_filename), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS },
-    { "input",       "input name of the model",     OFFSET(dnnctx.model_inputname),  AV_OPT_TYPE_STRING,    { .str = "x" },  0, 0, FLAGS },
-    { "output",      "output name of the model",    OFFSET(dnnctx.model_outputnames_string), AV_OPT_TYPE_STRING,    { .str = "y" },  0, 0, FLAGS },
     { NULL }
 };
 
-AVFILTER_DEFINE_CLASS(sr);
+AVFILTER_DNN_DEFINE_CLASS(sr);
 
 static av_cold int init(AVFilterContext *context)
 {
@@ -192,6 +189,7 @@ const AVFilter ff_vf_sr = {
     .name          = "sr",
     .description   = NULL_IF_CONFIG_SMALL("Apply DNN-based image super resolution to the input."),
     .priv_size     = sizeof(SRContext),
+    .preinit       = ff_dnn_filter_init_child_class,
     .init          = init,
     .uninit        = uninit,
     FILTER_INPUTS(sr_inputs),
diff --git a/libavfilter/vf_stack_qsv.c b/libavfilter/vf_stack_qsv.c
index abaf156915..d4c1ac997f 100644
--- a/libavfilter/vf_stack_qsv.c
+++ b/libavfilter/vf_stack_qsv.c
@@ -71,13 +71,16 @@ static int process_frame(FFFrameSync *fs)
 {
     AVFilterContext *ctx = fs->parent;
     QSVVPPContext *qsv = fs->opaque;
-    AVFrame *frame = NULL;
+    AVFrame *frame = NULL, *propref = NULL;
     int ret = 0;
 
     for (int i = 0; i < ctx->nb_inputs; i++) {
         ret = ff_framesync_get_frame(fs, i, &frame, 0);
-        if (ret == 0)
-            ret = ff_qsvvpp_filter_frame(qsv, ctx->inputs[i], frame);
+        if (ret == 0) {
+            if (i == 0)
+                propref = frame;
+            ret = ff_qsvvpp_filter_frame(qsv, ctx->inputs[i], frame, propref);
+        }
         if (ret < 0 && ret != AVERROR(EAGAIN))
             break;
     }
diff --git a/libavfilter/vf_thumbnail_cuda.c b/libavfilter/vf_thumbnail_cuda.c
index 0459070800..8efb54f079 100644
--- a/libavfilter/vf_thumbnail_cuda.c
+++ b/libavfilter/vf_thumbnail_cuda.c
@@ -291,7 +291,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
             hist[i] = 4 * hist[i];
     }
 
-    CHECK_CU(cu->cuCtxPopCurrent(&dummy));
+    ret = CHECK_CU(cu->cuCtxPopCurrent(&dummy));
     if (ret < 0)
         return ret;
 
diff --git a/libavfilter/vf_tonemap_vaapi.c b/libavfilter/vf_tonemap_vaapi.c
index 0b767202d2..7ebcb18f79 100644
--- a/libavfilter/vf_tonemap_vaapi.c
+++ b/libavfilter/vf_tonemap_vaapi.c
@@ -39,7 +39,11 @@ typedef struct HDRVAAPIContext {
     enum AVColorTransferCharacteristic color_transfer;
     enum AVColorSpace color_matrix;
 
+    char *mastering_display;
+    char *content_light;
+
     VAHdrMetaDataHDR10  in_metadata;
+    VAHdrMetaDataHDR10  out_metadata;
 
     AVFrameSideData    *src_display;
     AVFrameSideData    *src_light;
@@ -146,6 +150,87 @@ static int tonemap_vaapi_save_metadata(AVFilterContext *avctx, AVFrame *input_fr
     return 0;
 }
 
+static int tonemap_vaapi_update_sidedata(AVFilterContext *avctx, AVFrame *output_frame)
+{
+    HDRVAAPIContext *ctx = avctx->priv;
+    AVFrameSideData *metadata;
+    AVMasteringDisplayMetadata *hdr_meta;
+    AVFrameSideData *metadata_lt;
+    AVContentLightMetadata *hdr_meta_lt;
+    int i;
+    const int mapping[3] = {1, 2, 0};  //green, blue, red
+    const int chroma_den = 50000;
+    const int luma_den   = 10000;
+
+    metadata = av_frame_new_side_data(output_frame,
+                                      AV_FRAME_DATA_MASTERING_DISPLAY_METADATA,
+                                      sizeof(AVMasteringDisplayMetadata));
+    if (!metadata)
+        return AVERROR(ENOMEM);
+
+    hdr_meta = (AVMasteringDisplayMetadata *)metadata->data;
+
+    for (i = 0; i < 3; i++) {
+        const int j = mapping[i];
+        hdr_meta->display_primaries[j][0].num = ctx->out_metadata.display_primaries_x[i];
+        hdr_meta->display_primaries[j][0].den = chroma_den;
+
+        hdr_meta->display_primaries[j][1].num = ctx->out_metadata.display_primaries_y[i];
+        hdr_meta->display_primaries[j][1].den = chroma_den;
+    }
+
+    hdr_meta->white_point[0].num = ctx->out_metadata.white_point_x;
+    hdr_meta->white_point[0].den = chroma_den;
+
+    hdr_meta->white_point[1].num = ctx->out_metadata.white_point_y;
+    hdr_meta->white_point[1].den = chroma_den;
+    hdr_meta->has_primaries = 1;
+
+    hdr_meta->max_luminance.num = ctx->out_metadata.max_display_mastering_luminance;
+    hdr_meta->max_luminance.den = luma_den;
+
+    hdr_meta->min_luminance.num = ctx->out_metadata.min_display_mastering_luminance;
+    hdr_meta->min_luminance.den = luma_den;
+    hdr_meta->has_luminance = 1;
+
+    av_log(avctx, AV_LOG_DEBUG,
+           "Mastering display colour volume(out):\n");
+    av_log(avctx, AV_LOG_DEBUG,
+           "G(%u,%u) B(%u,%u) R(%u,%u) WP(%u,%u)\n",
+           ctx->out_metadata.display_primaries_x[0],
+           ctx->out_metadata.display_primaries_y[0],
+           ctx->out_metadata.display_primaries_x[1],
+           ctx->out_metadata.display_primaries_y[1],
+           ctx->out_metadata.display_primaries_x[2],
+           ctx->out_metadata.display_primaries_y[2],
+           ctx->out_metadata.white_point_x,
+           ctx->out_metadata.white_point_y);
+    av_log(avctx, AV_LOG_DEBUG,
+           "max_display_mastering_luminance=%u, min_display_mastering_luminance=%u\n",
+           ctx->out_metadata.max_display_mastering_luminance,
+           ctx->out_metadata.min_display_mastering_luminance);
+
+    metadata_lt = av_frame_new_side_data(output_frame,
+                                         AV_FRAME_DATA_CONTENT_LIGHT_LEVEL,
+                                         sizeof(AVContentLightMetadata));
+    if (!metadata_lt)
+        return AVERROR(ENOMEM);
+
+    hdr_meta_lt = (AVContentLightMetadata *)metadata_lt->data;
+
+    hdr_meta_lt->MaxCLL = FFMIN(ctx->out_metadata.max_content_light_level, 65535);
+    hdr_meta_lt->MaxFALL = FFMIN(ctx->out_metadata.max_pic_average_light_level, 65535);
+
+    av_log(avctx, AV_LOG_DEBUG,
+           "Content light level information(out):\n");
+    av_log(avctx, AV_LOG_DEBUG,
+           "MaxCLL(%u) MaxFALL(%u)\n",
+           ctx->out_metadata.max_content_light_level,
+           ctx->out_metadata.max_pic_average_light_level);
+
+    return 0;
+}
+
 static int tonemap_vaapi_set_filter_params(AVFilterContext *avctx, AVFrame *input_frame)
 {
     VAAPIVPPContext *vpp_ctx   = avctx->priv;
@@ -208,15 +293,26 @@ static int tonemap_vaapi_build_filter_params(AVFilterContext *avctx)
         return AVERROR(EINVAL);
     }
 
-    for (i = 0; i < num_query_caps; i++) {
-        if (VA_TONE_MAPPING_HDR_TO_SDR & hdr_cap[i].caps_flag)
-            break;
-    }
-
-    if (i >= num_query_caps) {
-        av_log(avctx, AV_LOG_ERROR,
-               "VAAPI driver doesn't support HDR to SDR\n");
-        return AVERROR(EINVAL);
+    if (ctx->mastering_display) {
+        for (i = 0; i < num_query_caps; i++) {
+            if (VA_TONE_MAPPING_HDR_TO_HDR & hdr_cap[i].caps_flag)
+                break;
+        }
+        if (i >= num_query_caps) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "VAAPI driver doesn't support HDR to HDR\n");
+            return AVERROR(EINVAL);
+        }
+    } else {
+        for (i = 0; i < num_query_caps; i++) {
+            if (VA_TONE_MAPPING_HDR_TO_SDR & hdr_cap[i].caps_flag)
+                break;
+        }
+        if (i >= num_query_caps) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "VAAPI driver doesn't support HDR to SDR\n");
+            return AVERROR(EINVAL);
+        }
     }
 
     hdrtm_param.type = VAProcFilterHighDynamicRangeToneMapping;
@@ -241,6 +337,8 @@ static int tonemap_vaapi_filter_frame(AVFilterLink *inlink, AVFrame *input_frame
     VAProcPipelineParameterBuffer params;
     int err;
 
+    VAHdrMetaData              out_hdr_metadata;
+
     av_log(avctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n",
            av_get_pix_fmt_name(input_frame->format),
            input_frame->width, input_frame->height, input_frame->pts);
@@ -278,22 +376,43 @@ static int tonemap_vaapi_filter_frame(AVFilterLink *inlink, AVFrame *input_frame
     if (err < 0)
         goto fail;
 
+    av_frame_remove_side_data(output_frame, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL);
+    av_frame_remove_side_data(output_frame, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA);
+
+    if (!ctx->mastering_display) {
+        /* Use BT709 by default for HDR to SDR output frame */
+        output_frame->color_primaries = AVCOL_PRI_BT709;
+        output_frame->color_trc = AVCOL_TRC_BT709;
+        output_frame->colorspace = AVCOL_SPC_BT709;
+    }
+
     if (ctx->color_primaries != AVCOL_PRI_UNSPECIFIED)
         output_frame->color_primaries = ctx->color_primaries;
 
     if (ctx->color_transfer != AVCOL_TRC_UNSPECIFIED)
         output_frame->color_trc = ctx->color_transfer;
-    else
-        output_frame->color_trc = AVCOL_TRC_BT709;
 
     if (ctx->color_matrix != AVCOL_SPC_UNSPECIFIED)
         output_frame->colorspace = ctx->color_matrix;
 
+    if (ctx->mastering_display) {
+        err = tonemap_vaapi_update_sidedata(avctx, output_frame);
+        if (err < 0)
+            goto fail;
+    }
+
     err = ff_vaapi_vpp_init_params(avctx, &params,
                                    input_frame, output_frame);
     if (err < 0)
         goto fail;
 
+    if (ctx->mastering_display) {
+        out_hdr_metadata.metadata_type = VAProcHighDynamicRangeMetadataHDR10;
+        out_hdr_metadata.metadata      = &ctx->out_metadata;
+        out_hdr_metadata.metadata_size = sizeof(VAHdrMetaDataHDR10);
+        params.output_hdr_metadata     = &out_hdr_metadata;
+    }
+
     if (vpp_ctx->nb_filter_buffers) {
         params.filters = &vpp_ctx->filter_buffers[0];
         params.num_filters = vpp_ctx->nb_filter_buffers;
@@ -309,9 +428,6 @@ static int tonemap_vaapi_filter_frame(AVFilterLink *inlink, AVFrame *input_frame
            av_get_pix_fmt_name(output_frame->format),
            output_frame->width, output_frame->height, output_frame->pts);
 
-    av_frame_remove_side_data(output_frame, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL);
-    av_frame_remove_side_data(output_frame, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA);
-
     return ff_filter_frame(outlink, output_frame);
 
 fail:
@@ -332,8 +448,13 @@ static av_cold int tonemap_vaapi_init(AVFilterContext *avctx)
     if (ctx->output_format_string) {
         vpp_ctx->output_format = av_get_pix_fmt(ctx->output_format_string);
     } else {
-        vpp_ctx->output_format = AV_PIX_FMT_NV12;
-        av_log(avctx, AV_LOG_WARNING, "Output format not set, use default format NV12\n");
+        if (ctx->mastering_display) {
+            vpp_ctx->output_format = AV_PIX_FMT_P010;
+            av_log(avctx, AV_LOG_VERBOSE, "Output format not set, use default format P010 for HDR to HDR tone mapping.\n");
+        } else {
+            vpp_ctx->output_format = AV_PIX_FMT_NV12;
+            av_log(avctx, AV_LOG_VERBOSE, "Output format not set, use default format NV12 for HDR to SDR tone mapping.\n");
+        }
     }
 
 #define STRING_OPTION(var_name, func_name, default_value) do { \
@@ -353,6 +474,37 @@ static av_cold int tonemap_vaapi_init(AVFilterContext *avctx)
     STRING_OPTION(color_transfer,  color_transfer,  AVCOL_TRC_UNSPECIFIED);
     STRING_OPTION(color_matrix,    color_space,     AVCOL_SPC_UNSPECIFIED);
 
+    if (ctx->mastering_display) {
+        if (10 != sscanf(ctx->mastering_display,
+                         "%hu %hu|%hu %hu|%hu %hu|%hu %hu|%u %u",
+                         &ctx->out_metadata.display_primaries_x[0],
+                         &ctx->out_metadata.display_primaries_y[0],
+                         &ctx->out_metadata.display_primaries_x[1],
+                         &ctx->out_metadata.display_primaries_y[1],
+                         &ctx->out_metadata.display_primaries_x[2],
+                         &ctx->out_metadata.display_primaries_y[2],
+                         &ctx->out_metadata.white_point_x,
+                         &ctx->out_metadata.white_point_y,
+                         &ctx->out_metadata.min_display_mastering_luminance,
+                         &ctx->out_metadata.max_display_mastering_luminance)) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "Option mastering-display input invalid\n");
+            return AVERROR(EINVAL);
+        }
+
+        if (!ctx->content_light) {
+            ctx->out_metadata.max_content_light_level = 0;
+            ctx->out_metadata.max_pic_average_light_level = 0;
+        } else if (2 != sscanf(ctx->content_light,
+                               "%hu %hu",
+                               &ctx->out_metadata.max_content_light_level,
+                               &ctx->out_metadata.max_pic_average_light_level)) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "Option content-light input invalid\n");
+            return AVERROR(EINVAL);
+        }
+    }
+
     return 0;
 }
 
@@ -378,6 +530,12 @@ static const AVOption tonemap_vaapi_options[] = {
     { "t",        "Output color transfer characteristics set",
       OFFSET(color_transfer_string),  AV_OPT_TYPE_STRING,
       { .str = NULL }, .flags = FLAGS, .unit = "transfer" },
+    { "display",  "set mastering display colour volume",
+      OFFSET(mastering_display),      AV_OPT_TYPE_STRING,
+      { .str = NULL }, .flags = FLAGS },
+    { "light",    "set content light level information",
+      OFFSET(content_light),          AV_OPT_TYPE_STRING,
+      { .str = NULL }, .flags = FLAGS },
     { NULL }
 };
 
diff --git a/libavfilter/vf_v360.c b/libavfilter/vf_v360.c
index 5ea6e0d865..299dbe9ff5 100644
--- a/libavfilter/vf_v360.c
+++ b/libavfilter/vf_v360.c
@@ -286,6 +286,8 @@ static int remap##ws##_##bits##bit_slice(AVFilterContext *ctx, void *arg, int jo
     const AVFrame *in = td->in;                                                                            \
     AVFrame *out = td->out;                                                                                \
                                                                                                            \
+    av_assert1(s->nb_planes <= AV_VIDEO_MAX_PLANES);                                                       \
+                                                                                                           \
     for (int stereo = 0; stereo < 1 + s->out_stereo > STEREO_2D; stereo++) {                               \
         for (int plane = 0; plane < s->nb_planes; plane++) {                                               \
             const unsigned map = s->map[plane];                                                            \
diff --git a/libavfilter/vf_vpp_qsv.c b/libavfilter/vf_vpp_qsv.c
index 598c85be09..6071c46ca1 100644
--- a/libavfilter/vf_vpp_qsv.c
+++ b/libavfilter/vf_vpp_qsv.c
@@ -748,7 +748,7 @@ static int activate(AVFilterContext *ctx)
 
     if (qsv->session) {
         if (in || qsv->eof) {
-            ret = ff_qsvvpp_filter_frame(qsv, inlink, in);
+            ret = ff_qsvvpp_filter_frame(qsv, inlink, in, in);
             av_frame_free(&in);
             if (ret == AVERROR(EAGAIN))
                 goto not_ready;
diff --git a/libavfilter/vf_xmedian.c b/libavfilter/vf_xmedian.c
index 334d4018a2..4e83b48843 100644
--- a/libavfilter/vf_xmedian.c
+++ b/libavfilter/vf_xmedian.c
@@ -60,6 +60,10 @@ typedef struct XMedianContext {
     int (*median_frames)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
 } XMedianContext;
 
+#define OFFSET(x) offsetof(XMedianContext, x)
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_FILTERING_PARAM
+#define TFLAGS AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_RUNTIME_PARAM
+
 static const enum AVPixelFormat pixel_fmts[] = {
     AV_PIX_FMT_GRAY8,
     AV_PIX_FMT_GRAY9,
@@ -361,10 +365,6 @@ static av_cold int xmedian_init(AVFilterContext *ctx)
     return init(ctx);
 }
 
-#define OFFSET(x) offsetof(XMedianContext, x)
-#define FLAGS AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_FILTERING_PARAM
-#define TFLAGS AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_RUNTIME_PARAM
-
 static const AVOption xmedian_options[] = {
     { "inputs", "set number of inputs", OFFSET(nb_inputs), AV_OPT_TYPE_INT, {.i64=3},  3, 255, .flags = FLAGS },
     { "planes", "set planes to filter", OFFSET(planes),    AV_OPT_TYPE_INT, {.i64=15}, 0,  15, .flags =TFLAGS },
diff --git a/libavfilter/vsrc_testsrc.c b/libavfilter/vsrc_testsrc.c
index 273a852f82..4dc12c8a01 100644
--- a/libavfilter/vsrc_testsrc.c
+++ b/libavfilter/vsrc_testsrc.c
@@ -260,8 +260,6 @@ static int color_config_props(AVFilterLink *inlink)
                   inlink->color_range, 0);
     ff_draw_color(&test->draw, &test->color, test->color_rgba);
 
-    test->w = ff_draw_round_to_sub(&test->draw, 0, -1, test->w);
-    test->h = ff_draw_round_to_sub(&test->draw, 1, -1, test->h);
     if (av_image_check_size(test->w, test->h, 0, ctx) < 0)
         return AVERROR(EINVAL);
 
@@ -697,6 +695,15 @@ const AVFilter ff_vsrc_testsrc = {
 
 #endif /* CONFIG_TESTSRC_FILTER */
 
+static void av_unused set_color(TestSourceContext *s, FFDrawColor *color, uint32_t argb)
+{
+    uint8_t rgba[4] = { (argb >> 16) & 0xFF,
+                        (argb >>  8) & 0xFF,
+                        (argb >>  0) & 0xFF,
+                        (argb >> 24) & 0xFF, };
+    ff_draw_color(&s->draw, color, rgba);
+}
+
 #if CONFIG_TESTSRC2_FILTER
 
 static const AVOption testsrc2_options[] = {
@@ -707,15 +714,6 @@ static const AVOption testsrc2_options[] = {
 
 AVFILTER_DEFINE_CLASS(testsrc2);
 
-static void set_color(TestSourceContext *s, FFDrawColor *color, uint32_t argb)
-{
-    uint8_t rgba[4] = { (argb >> 16) & 0xFF,
-                        (argb >>  8) & 0xFF,
-                        (argb >>  0) & 0xFF,
-                        (argb >> 24) & 0xFF, };
-    ff_draw_color(&s->draw, color, rgba);
-}
-
 static uint32_t color_gradient(unsigned index)
 {
     unsigned si = index & 0xFF, sd = 0xFF - si;
diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile
index e87481bd7a..c05000d7fd 100644
--- a/libavfilter/x86/Makefile
+++ b/libavfilter/x86/Makefile
@@ -12,6 +12,7 @@ OBJS-$(CONFIG_FSPP_FILTER)                   += x86/vf_fspp_init.o
 OBJS-$(CONFIG_GBLUR_FILTER)                  += x86/vf_gblur_init.o
 OBJS-$(CONFIG_GRADFUN_FILTER)                += x86/vf_gradfun_init.o
 OBJS-$(CONFIG_FRAMERATE_FILTER)              += x86/vf_framerate_init.o
+OBJS-$(CONFIG_HALDCLUT_FILTER)               += x86/vf_lut3d_init.o
 OBJS-$(CONFIG_HFLIP_FILTER)                  += x86/vf_hflip_init.o
 OBJS-$(CONFIG_HQDN3D_FILTER)                 += x86/vf_hqdn3d_init.o
 OBJS-$(CONFIG_IDET_FILTER)                   += x86/vf_idet_init.o
@@ -28,6 +29,7 @@ OBJS-$(CONFIG_PSNR_FILTER)                   += x86/vf_psnr_init.o
 OBJS-$(CONFIG_PULLUP_FILTER)                 += x86/vf_pullup_init.o
 OBJS-$(CONFIG_REMOVEGRAIN_FILTER)            += x86/vf_removegrain_init.o
 OBJS-$(CONFIG_SHOWCQT_FILTER)                += x86/avf_showcqt_init.o
+OBJS-$(CONFIG_SOBEL_FILTER)                  += x86/vf_convolution_init.o
 OBJS-$(CONFIG_SPP_FILTER)                    += x86/vf_spp.o
 OBJS-$(CONFIG_SSIM_FILTER)                   += x86/vf_ssim_init.o
 OBJS-$(CONFIG_STEREO3D_FILTER)               += x86/vf_stereo3d_init.o
@@ -54,6 +56,7 @@ X86ASM-OBJS-$(CONFIG_FRAMERATE_FILTER)       += x86/vf_framerate.o
 X86ASM-OBJS-$(CONFIG_FSPP_FILTER)            += x86/vf_fspp.o
 X86ASM-OBJS-$(CONFIG_GBLUR_FILTER)           += x86/vf_gblur.o
 X86ASM-OBJS-$(CONFIG_GRADFUN_FILTER)         += x86/vf_gradfun.o
+X86ASM-OBJS-$(CONFIG_HALDCLUT_FILTER)        += x86/vf_lut3d.o
 X86ASM-OBJS-$(CONFIG_HFLIP_FILTER)           += x86/vf_hflip.o
 X86ASM-OBJS-$(CONFIG_HQDN3D_FILTER)          += x86/vf_hqdn3d.o
 X86ASM-OBJS-$(CONFIG_IDET_FILTER)            += x86/vf_idet.o
@@ -71,6 +74,7 @@ ifdef CONFIG_GPL
 X86ASM-OBJS-$(CONFIG_REMOVEGRAIN_FILTER)     += x86/vf_removegrain.o
 endif
 X86ASM-OBJS-$(CONFIG_SHOWCQT_FILTER)         += x86/avf_showcqt.o
+X86ASM-OBJS-$(CONFIG_SOBEL_FILTER)           += x86/vf_convolution.o
 X86ASM-OBJS-$(CONFIG_SSIM_FILTER)            += x86/vf_ssim.o
 X86ASM-OBJS-$(CONFIG_STEREO3D_FILTER)        += x86/vf_stereo3d.o
 X86ASM-OBJS-$(CONFIG_TBLEND_FILTER)          += x86/vf_blend.o
diff --git a/libavfilter/x86/vf_blend_init.c b/libavfilter/x86/vf_blend_init.c
index c326c43362..f4e097ee3d 100644
--- a/libavfilter/x86/vf_blend_init.c
+++ b/libavfilter/x86/vf_blend_init.c
@@ -28,7 +28,7 @@ void ff_blend_##name##_##opt(const uint8_t *top, ptrdiff_t top_linesize,       \
                              const uint8_t *bottom, ptrdiff_t bottom_linesize, \
                              uint8_t *dst, ptrdiff_t dst_linesize,             \
                              ptrdiff_t width, ptrdiff_t height,                \
-                             struct FilterParams *param, double *values, int starty);
+                             FilterParams *param, SliceParams *sliceparam);
 
 BLEND_FUNC(addition, sse2)
 BLEND_FUNC(addition, avx2)
diff --git a/libavformat/Makefile b/libavformat/Makefile
index 9981799cc9..1c4d9deccd 100644
--- a/libavformat/Makefile
+++ b/libavformat/Makefile
@@ -35,6 +35,7 @@ OBJS-$(HAVE_LIBC_MSVCRT)                 += file_open.o
 
 # subsystems
 OBJS-$(CONFIG_ISO_MEDIA)                 += isom.o
+OBJS-$(CONFIG_ISO_WRITER)                += av1.o avc.o hevc.o vvc.o vpcc.o
 OBJS-$(CONFIG_IAMFDEC)                   += iamf_reader.o iamf_parse.o iamf.o
 OBJS-$(CONFIG_IAMFENC)                   += iamf_writer.o iamf.o
 OBJS-$(CONFIG_NETWORK)                   += network.o
@@ -127,7 +128,8 @@ OBJS-$(CONFIG_ASF_DEMUXER)               += asfdec_f.o asf.o asfcrypt.o \
                                             asf_tags.o avlanguage.o
 OBJS-$(CONFIG_ASF_O_DEMUXER)             += asfdec_o.o asf.o asfcrypt.o \
                                             asf_tags.o avlanguage.o
-OBJS-$(CONFIG_ASF_MUXER)                 += asfenc.o asf.o avlanguage.o
+OBJS-$(CONFIG_ASF_MUXER)                 += asfenc.o asf.o asf_tags.o \
+                                            avlanguage.o
 OBJS-$(CONFIG_ASS_DEMUXER)               += assdec.o subtitles.o
 OBJS-$(CONFIG_ASS_MUXER)                 += assenc.o
 OBJS-$(CONFIG_AST_DEMUXER)               += ast.o astdec.o
@@ -219,7 +221,7 @@ OBJS-$(CONFIG_FLAC_MUXER)                += flacenc.o flacenc_header.o \
 OBJS-$(CONFIG_FLIC_DEMUXER)              += flic.o
 OBJS-$(CONFIG_FLV_DEMUXER)               += flvdec.o
 OBJS-$(CONFIG_LIVE_FLV_DEMUXER)          += flvdec.o
-OBJS-$(CONFIG_FLV_MUXER)                 += flvenc.o avc.o hevc.o av1.o vpcc.o
+OBJS-$(CONFIG_FLV_MUXER)                 += flvenc.o
 OBJS-$(CONFIG_FOURXM_DEMUXER)            += 4xm.o
 OBJS-$(CONFIG_FRAMECRC_MUXER)            += framecrcenc.o framehash.o
 OBJS-$(CONFIG_FRAMEHASH_MUXER)           += hashenc.o framehash.o
@@ -259,7 +261,7 @@ OBJS-$(CONFIG_HEVC_MUXER)                += rawenc.o
 OBJS-$(CONFIG_EVC_DEMUXER)               += evcdec.o rawdec.o
 OBJS-$(CONFIG_EVC_MUXER)                 += rawenc.o
 OBJS-$(CONFIG_HLS_DEMUXER)               += hls.o hls_sample_encryption.o
-OBJS-$(CONFIG_HLS_MUXER)                 += hlsenc.o hlsplaylist.o avc.o
+OBJS-$(CONFIG_HLS_MUXER)                 += hlsenc.o hlsplaylist.o
 OBJS-$(CONFIG_HNM_DEMUXER)               += hnm.o
 OBJS-$(CONFIG_IAMF_DEMUXER)              += iamfdec.o
 OBJS-$(CONFIG_IAMF_MUXER)                += iamfenc.o
@@ -332,6 +334,8 @@ OBJS-$(CONFIG_KVAG_DEMUXER)              += kvag.o
 OBJS-$(CONFIG_KVAG_MUXER)                += kvag.o rawenc.o
 OBJS-$(CONFIG_LAF_DEMUXER)               += lafdec.o
 OBJS-$(CONFIG_LATM_MUXER)                += latmenc.o rawenc.o
+OBJS-$(CONFIG_LC3_DEMUXER)               += lc3.o
+OBJS-$(CONFIG_LC3_MUXER)                 += lc3.o
 OBJS-$(CONFIG_LMLM4_DEMUXER)             += lmlm4.o
 OBJS-$(CONFIG_LOAS_DEMUXER)              += loasdec.o rawdec.o
 OBJS-$(CONFIG_LUODAT_DEMUXER)            += luodatdec.o
@@ -346,7 +350,6 @@ OBJS-$(CONFIG_MATROSKA_DEMUXER)          += matroskadec.o matroska.o  \
                                             oggparsevorbis.o vorbiscomment.o \
                                             qtpalette.o replaygain.o dovi_isom.o
 OBJS-$(CONFIG_MATROSKA_MUXER)            += matroskaenc.o matroska.o \
-                                            av1.o avc.o hevc.o vvc.o\
                                             flacenc_header.o avlanguage.o \
                                             vorbiscomment.o wv.o dovi_isom.o
 OBJS-$(CONFIG_MCA_DEMUXER)               += mca.o
@@ -368,7 +371,7 @@ OBJS-$(CONFIG_MODS_DEMUXER)              += mods.o
 OBJS-$(CONFIG_MOFLEX_DEMUXER)            += moflex.o
 OBJS-$(CONFIG_MOV_DEMUXER)               += mov.o mov_chan.o mov_esds.o \
                                             qtpalette.o replaygain.o dovi_isom.o
-OBJS-$(CONFIG_MOV_MUXER)                 += movenc.o av1.o avc.o hevc.o vvc.o vpcc.o \
+OBJS-$(CONFIG_MOV_MUXER)                 += movenc.o \
                                             movenchint.o mov_chan.o rtp.o \
                                             movenccenc.o movenc_ttml.o rawutils.o \
                                             dovi_isom.o evc.o
@@ -401,7 +404,7 @@ OBJS-$(CONFIG_MUSX_DEMUXER)              += musx.o
 OBJS-$(CONFIG_MV_DEMUXER)                += mvdec.o
 OBJS-$(CONFIG_MVI_DEMUXER)               += mvi.o
 OBJS-$(CONFIG_MXF_DEMUXER)               += mxfdec.o mxf.o avlanguage.o
-OBJS-$(CONFIG_MXF_MUXER)                 += mxfenc.o mxf.o avc.o
+OBJS-$(CONFIG_MXF_MUXER)                 += mxfenc.o mxf.o
 OBJS-$(CONFIG_MXG_DEMUXER)               += mxg.o
 OBJS-$(CONFIG_NC_DEMUXER)                += ncdec.o
 OBJS-$(CONFIG_NISTSPHERE_DEMUXER)        += nistspheredec.o pcm.o
@@ -523,8 +526,7 @@ OBJS-$(CONFIG_RTP_MUXER)                 += rtp.o         \
                                             rtpenc_vc2hq.o              \
                                             rtpenc_vp8.o  \
                                             rtpenc_vp9.o                \
-                                            rtpenc_xiph.o \
-                                            avc.o hevc.o vvc.o
+                                            rtpenc_xiph.o
 OBJS-$(CONFIG_RTSP_DEMUXER)              += rtsp.o rtspdec.o httpauth.o \
                                             urldecode.o
 OBJS-$(CONFIG_RTSP_MUXER)                += rtsp.o rtspenc.o httpauth.o \
@@ -726,6 +728,7 @@ OBJS-$(CONFIG_LIBZMQ_PROTOCOL)           += libzmq.o
 # Objects duplicated from other libraries for shared builds
 SHLIBOBJS                                += log2_tab.o to_upper4.o
 SHLIBOBJS-$(CONFIG_ISO_MEDIA)            += mpegaudiotabs.o
+SHLIBOBJS-$(CONFIG_ISO_WRITER)           += bitstream.o
 SHLIBOBJS-$(CONFIG_FLV_MUXER)            += mpeg4audio_sample_rates.o
 SHLIBOBJS-$(CONFIG_HLS_DEMUXER)          += ac3_channel_layout_tab.o
 SHLIBOBJS-$(CONFIG_IMAGE_JPEGXL_PIPE_DEMUXER)    += jpegxl_parse.o
diff --git a/libavformat/aacdec.c b/libavformat/aacdec.c
index e267886e1a..0b4bd69dd2 100644
--- a/libavformat/aacdec.c
+++ b/libavformat/aacdec.c
@@ -119,7 +119,7 @@ static int adts_aac_read_header(AVFormatContext *s)
 
     ff_id3v1_read(s);
     if ((s->pb->seekable & AVIO_SEEKABLE_NORMAL) &&
-        !av_dict_get(s->metadata, "", NULL, AV_DICT_IGNORE_SUFFIX)) {
+        !av_dict_count(s->metadata)) {
         int64_t cur = avio_tell(s->pb);
         ff_ape_parse_tag(s);
         avio_seek(s->pb, cur, SEEK_SET);
@@ -143,9 +143,8 @@ static int handle_id3(AVFormatContext *s, AVPacket *pkt)
     int ret;
 
     ret = av_append_packet(s->pb, pkt, ff_id3v2_tag_len(pkt->data) - pkt->size);
-    if (ret < 0) {
+    if (ret < 0)
         return ret;
-    }
 
     ffio_init_read_context(&pb, pkt->data, pkt->size);
     ff_id3v2_read_dict(&pb.pub, &metadata, ID3v2_DEFAULT_MAGIC, &id3v2_extra_meta);
@@ -175,9 +174,8 @@ retry:
     if (ret < 0)
         return ret;
 
-    if (ret < ADTS_HEADER_SIZE) {
+    if (ret < ADTS_HEADER_SIZE)
         return AVERROR(EIO);
-    }
 
     if ((AV_RB16(pkt->data) >> 4) != 0xfff) {
         // Parse all the ID3 headers between frames
@@ -185,9 +183,8 @@ retry:
 
         av_assert2(append > 0);
         ret = av_append_packet(s->pb, pkt, append);
-        if (ret != append) {
+        if (ret != append)
             return AVERROR(EIO);
-        }
         if (!ff_id3v2_match(pkt->data, ID3v2_DEFAULT_MAGIC)) {
             av_packet_unref(pkt);
             ret = adts_aac_resync(s);
@@ -200,9 +197,8 @@ retry:
     }
 
     fsize = (AV_RB32(pkt->data + 3) >> 13) & 0x1FFF;
-    if (fsize < ADTS_HEADER_SIZE) {
+    if (fsize < ADTS_HEADER_SIZE)
         return AVERROR_INVALIDDATA;
-    }
 
     ret = av_append_packet(s->pb, pkt, fsize - pkt->size);
 
diff --git a/libavformat/allformats.c b/libavformat/allformats.c
index ae925dcf60..305fa46532 100644
--- a/libavformat/allformats.c
+++ b/libavformat/allformats.c
@@ -252,6 +252,8 @@ extern const FFInputFormat  ff_kvag_demuxer;
 extern const FFOutputFormat ff_kvag_muxer;
 extern const FFInputFormat  ff_laf_demuxer;
 extern const FFOutputFormat ff_latm_muxer;
+extern const FFInputFormat  ff_lc3_demuxer;
+extern const FFOutputFormat ff_lc3_muxer;
 extern const FFInputFormat  ff_lmlm4_demuxer;
 extern const FFInputFormat  ff_loas_demuxer;
 extern const FFInputFormat  ff_luodat_demuxer;
diff --git a/libavformat/avisynth.c b/libavformat/avisynth.c
index 1709bf4051..625bdf7e3a 100644
--- a/libavformat/avisynth.c
+++ b/libavformat/avisynth.c
@@ -23,6 +23,7 @@
 #include "libavutil/internal.h"
 #include "libavutil/mem.h"
 #include "libavutil/opt.h"
+#include "libavutil/thread.h"
 
 #include "avformat.h"
 #include "demux.h"
@@ -130,6 +131,8 @@ static const int avs_planes_yuva[4]   = { AVS_PLANAR_Y, AVS_PLANAR_U,
 static const int avs_planes_rgba[4]   = { AVS_PLANAR_G, AVS_PLANAR_B,
                                           AVS_PLANAR_R, AVS_PLANAR_A };
 
+static AVMutex avisynth_mutex = AV_MUTEX_INITIALIZER;
+
 /* A conflict between C++ global objects, atexit, and dynamic loading requires
  * us to register our own atexit handler to prevent double freeing. */
 static AviSynthLibrary avs_library;
@@ -1083,15 +1086,15 @@ static av_cold int avisynth_read_header(AVFormatContext *s)
     int ret;
 
     // Calling library must implement a lock for thread-safe opens.
-    if (ret = ff_lock_avformat())
-        return ret;
+    if (ff_mutex_lock(&avisynth_mutex))
+        return AVERROR_UNKNOWN;
 
     if (ret = avisynth_open_file(s)) {
-        ff_unlock_avformat();
+        ff_mutex_unlock(&avisynth_mutex);
         return ret;
     }
 
-    ff_unlock_avformat();
+    ff_mutex_unlock(&avisynth_mutex);
     return 0;
 }
 
@@ -1127,11 +1130,11 @@ static int avisynth_read_packet(AVFormatContext *s, AVPacket *pkt)
 
 static av_cold int avisynth_read_close(AVFormatContext *s)
 {
-    if (ff_lock_avformat())
+    if (ff_mutex_lock(&avisynth_mutex))
         return AVERROR_UNKNOWN;
 
     avisynth_context_destroy(s->priv_data);
-    ff_unlock_avformat();
+    ff_mutex_unlock(&avisynth_mutex);
     return 0;
 }
 
diff --git a/libavformat/bitstream.c b/libavformat/bitstream.c
new file mode 100644
index 0000000000..2afda37c30
--- /dev/null
+++ b/libavformat/bitstream.c
@@ -0,0 +1 @@
+#include "libavcodec/bitstream.c"
diff --git a/libavformat/chromaprint.c b/libavformat/chromaprint.c
index 1cdca47ea5..eae233a651 100644
--- a/libavformat/chromaprint.c
+++ b/libavformat/chromaprint.c
@@ -20,15 +20,17 @@
  */
 
 #include "avformat.h"
-#include "internal.h"
 #include "mux.h"
 #include "libavutil/opt.h"
+#include "libavutil/thread.h"
 #include <chromaprint.h>
 
 #define CPR_VERSION_INT AV_VERSION_INT(CHROMAPRINT_VERSION_MAJOR, \
                                        CHROMAPRINT_VERSION_MINOR, \
                                        CHROMAPRINT_VERSION_PATCH)
 
+static AVMutex chromaprint_mutex = AV_MUTEX_INITIALIZER;
+
 typedef enum FingerprintFormat {
     FINGERPRINT_RAW,
     FINGERPRINT_COMPRESSED,
@@ -52,9 +54,9 @@ static void deinit(AVFormatContext *s)
     ChromaprintMuxContext *const cpr = s->priv_data;
 
     if (cpr->ctx) {
-        ff_lock_avformat();
+        ff_mutex_lock(&chromaprint_mutex);
         chromaprint_free(cpr->ctx);
-        ff_unlock_avformat();
+        ff_mutex_unlock(&chromaprint_mutex);
     }
 }
 
@@ -63,9 +65,9 @@ static av_cold int init(AVFormatContext *s)
     ChromaprintMuxContext *cpr = s->priv_data;
     AVStream *st;
 
-    ff_lock_avformat();
+    ff_mutex_lock(&chromaprint_mutex);
     cpr->ctx = chromaprint_new(cpr->algorithm);
-    ff_unlock_avformat();
+    ff_mutex_unlock(&chromaprint_mutex);
 
     if (!cpr->ctx) {
         av_log(s, AV_LOG_ERROR, "Failed to create chromaprint context.\n");
diff --git a/libavformat/concatdec.c b/libavformat/concatdec.c
index 493659649c..fe65d0c768 100644
--- a/libavformat/concatdec.c
+++ b/libavformat/concatdec.c
@@ -639,6 +639,11 @@ static int concat_parse_script(AVFormatContext *avf)
         }
     }
 
+    if (!file) {
+        ret = AVERROR_INVALIDDATA;
+        goto fail;
+    }
+
     if (file->inpoint != AV_NOPTS_VALUE && file->outpoint != AV_NOPTS_VALUE) {
         if (file->inpoint  > file->outpoint ||
             file->outpoint - (uint64_t)file->inpoint > INT64_MAX)
diff --git a/libavformat/demux.c b/libavformat/demux.c
index abfd5fee7d..ecefe7e0a7 100644
--- a/libavformat/demux.c
+++ b/libavformat/demux.c
@@ -1319,6 +1319,8 @@ fail:
     return ret;
 }
 
+static int extract_extradata(FFFormatContext *si, AVStream *st, const AVPacket *pkt);
+
 static int read_frame_internal(AVFormatContext *s, AVPacket *pkt)
 {
     FFFormatContext *const si = ffformatcontext(s);
@@ -1373,6 +1375,16 @@ static int read_frame_internal(AVFormatContext *s, AVPacket *pkt)
                 return ret;
             }
 
+            if (!sti->avctx->extradata) {
+                sti->extract_extradata.inited = 0;
+
+                ret = extract_extradata(si, st, pkt);
+                if (ret < 0) {
+                    av_packet_unref(pkt);
+                    return ret;
+                }
+            }
+
             sti->codec_desc = avcodec_descriptor_get(sti->avctx->codec_id);
 
             sti->need_context_update = 0;
@@ -2427,6 +2439,7 @@ static int extract_extradata_init(AVStream *st)
     if (!ret)
         goto finish;
 
+    av_bsf_free(&sti->extract_extradata.bsf);
     ret = av_bsf_alloc(f, &sti->extract_extradata.bsf);
     if (ret < 0)
         return ret;
diff --git a/libavformat/evc.c b/libavformat/evc.c
index fabccb054c..2f4c74fe51 100644
--- a/libavformat/evc.c
+++ b/libavformat/evc.c
@@ -233,7 +233,7 @@ static int evcc_write(AVIOContext *pb, EVCDecoderConfigurationRecord *evcc)
     /* unsigned int(8) profile_idc */
     avio_w8(pb, evcc->profile_idc);
 
-    /* unsigned int(8) profile_idc */
+    /* unsigned int(8) level_idc */
     avio_w8(pb, evcc->level_idc);
 
     /* unsigned int(32) toolset_idc_h */
@@ -254,7 +254,7 @@ static int evcc_write(AVIOContext *pb, EVCDecoderConfigurationRecord *evcc)
     /* unsigned int(16) pic_width_in_luma_samples; */
     avio_wb16(pb, evcc->pic_width_in_luma_samples);
 
-    /* unsigned int(16) pic_width_in_luma_samples; */
+    /* unsigned int(16) pic_height_in_luma_samples; */
     avio_wb16(pb, evcc->pic_height_in_luma_samples);
 
     /*
diff --git a/libavformat/file.c b/libavformat/file.c
index 0b7452bc20..0ed4cff266 100644
--- a/libavformat/file.c
+++ b/libavformat/file.c
@@ -442,13 +442,16 @@ static int pipe_open(URLContext *h, const char *filename, int flags)
     if (c->fd < 0) {
         av_strstart(filename, "pipe:", &filename);
 
-        fd = strtol(filename, &final, 10);
-        if((filename == final) || *final ) {/* No digits found, or something like 10ab */
+        if (!*filename) {
             if (flags & AVIO_FLAG_WRITE) {
                 fd = 1;
             } else {
                 fd = 0;
             }
+        } else {
+            fd = strtol(filename, &final, 10);
+            if (*final) /* No digits found, or something like 10ab */
+                return AVERROR(EINVAL);
         }
         c->fd = fd;
     }
diff --git a/libavformat/flacdec.c b/libavformat/flacdec.c
index 3d35da5fea..3c317acaee 100644
--- a/libavformat/flacdec.c
+++ b/libavformat/flacdec.c
@@ -283,12 +283,6 @@ static av_unused int64_t flac_read_timestamp(AVFormatContext *s, int stream_inde
     if (avio_seek(s->pb, *ppos, SEEK_SET) < 0)
         return AV_NOPTS_VALUE;
 
-    parser = av_parser_init(st->codecpar->codec_id);
-    if (!parser){
-        return AV_NOPTS_VALUE;
-    }
-    parser->flags |= PARSER_FLAG_USE_CODEC_TS;
-
     if (!flac->parser_dec) {
         flac->parser_dec = avcodec_alloc_context3(NULL);
         if (!flac->parser_dec)
@@ -299,6 +293,11 @@ static av_unused int64_t flac_read_timestamp(AVFormatContext *s, int stream_inde
             return ret;
     }
 
+    parser = av_parser_init(st->codecpar->codec_id);
+    if (!parser)
+        return AV_NOPTS_VALUE;
+    parser->flags |= PARSER_FLAG_USE_CODEC_TS;
+
     for (;;){
         uint8_t *data;
         int size;
diff --git a/libavformat/hlsenc.c b/libavformat/hlsenc.c
index ac8bb189f0..0e2843c6bc 100644
--- a/libavformat/hlsenc.c
+++ b/libavformat/hlsenc.c
@@ -570,12 +570,6 @@ static void reflush_dynbuf(VariantStream *vs, int *range_length)
     avio_write(vs->out, vs->temp_buffer, *range_length);
 }
 
-#if HAVE_DOS_PATHS
-#define SEPARATOR '\\'
-#else
-#define SEPARATOR '/'
-#endif
-
 static int hls_delete_file(HLSContext *hls, AVFormatContext *avf,
                            char *path, const char *proto)
 {
@@ -668,7 +662,7 @@ static int hls_delete_old_segments(AVFormatContext *s, HLSContext *hls,
         av_log(hls, AV_LOG_DEBUG, "deleting old segment %s\n",
                segment->filename);
         if (!hls->use_localtime_mkdir) // segment->filename contains basename only
-            av_bprintf(&path, "%s%c", dirname, SEPARATOR);
+            av_bprintf(&path, "%s/", dirname);
         av_bprintf(&path, "%s", segment->filename);
 
         if (!av_bprint_is_complete(&path)) {
@@ -685,8 +679,7 @@ static int hls_delete_old_segments(AVFormatContext *s, HLSContext *hls,
             vtt_dirname = av_dirname(vtt_dirname_r);
 
             av_bprint_clear(&path);
-            av_bprintf(&path, "%s%c%s", vtt_dirname, SEPARATOR,
-                                         segment->sub_filename);
+            av_bprintf(&path, "%s/%s", vtt_dirname, segment->sub_filename);
             av_freep(&vtt_dirname_r);
 
             if (!av_bprint_is_complete(&path)) {
@@ -2764,6 +2757,7 @@ static int hls_write_trailer(struct AVFormatContext *s)
             filename = av_asprintf("%s", oc->url);
         }
         if (!filename) {
+            av_dict_free(&options);
             av_freep(&old_filename);
             return AVERROR(ENOMEM);
         }
diff --git a/libavformat/http.c b/libavformat/http.c
index ed20359552..ec60bc0b17 100644
--- a/libavformat/http.c
+++ b/libavformat/http.c
@@ -31,6 +31,7 @@
 #include "libavutil/avstring.h"
 #include "libavutil/bprint.h"
 #include "libavutil/getenv_utf8.h"
+#include "libavutil/macros.h"
 #include "libavutil/mem.h"
 #include "libavutil/opt.h"
 #include "libavutil/time.h"
@@ -56,7 +57,7 @@
 #define MAX_CACHED_REDIRECTS 32
 #define HTTP_SINGLE   1
 #define HTTP_MUTLI    2
-#define MAX_EXPIRY    19
+#define MAX_DATE_LEN  19
 #define WHITESPACES " \n\t\r"
 typedef enum {
     LOWER_PROTO,
@@ -138,6 +139,10 @@ typedef struct HTTPContext {
     char *new_location;
     AVDictionary *redirect_cache;
     uint64_t filesize_from_content_range;
+    int respect_retry_after;
+    unsigned int retry_after;
+    int reconnect_max_retries;
+    int reconnect_delay_total_max;
 } HTTPContext;
 
 #define OFFSET(x) offsetof(HTTPContext, x)
@@ -176,6 +181,9 @@ static const AVOption options[] = {
     { "reconnect_on_http_error", "list of http status codes to reconnect on", OFFSET(reconnect_on_http_error), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, D },
     { "reconnect_streamed", "auto reconnect streamed / non seekable streams", OFFSET(reconnect_streamed), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, D },
     { "reconnect_delay_max", "max reconnect delay in seconds after which to give up", OFFSET(reconnect_delay_max), AV_OPT_TYPE_INT, { .i64 = 120 }, 0, UINT_MAX/1000/1000, D },
+    { "reconnect_max_retries", "the max number of times to retry a connection", OFFSET(reconnect_max_retries), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, D },
+    { "reconnect_delay_total_max", "max total reconnect delay in seconds after which to give up", OFFSET(reconnect_delay_total_max), AV_OPT_TYPE_INT, { .i64 = 256 }, 0, UINT_MAX/1000/1000, D },
+    { "respect_retry_after", "respect the Retry-After header when retrying connections", OFFSET(respect_retry_after), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, D },
     { "listen", "listen on HTTP", OFFSET(listen), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 2, D | E },
     { "resource", "The resource requested by a client", OFFSET(resource), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, E },
     { "reply_code", "The http status code to return to a client", OFFSET(reply_code), AV_OPT_TYPE_INT, { .i64 = 200}, INT_MIN, 599, E},
@@ -286,6 +294,7 @@ static int http_should_reconnect(HTTPContext *s, int err)
     case AVERROR_HTTP_UNAUTHORIZED:
     case AVERROR_HTTP_FORBIDDEN:
     case AVERROR_HTTP_NOT_FOUND:
+    case AVERROR_HTTP_TOO_MANY_REQUESTS:
     case AVERROR_HTTP_OTHER_4XX:
         status_group = "4xx";
         break;
@@ -355,8 +364,9 @@ static int http_open_cnx(URLContext *h, AVDictionary **options)
 {
     HTTPAuthType cur_auth_type, cur_proxy_auth_type;
     HTTPContext *s = h->priv_data;
-    int ret, attempts = 0, redirects = 0;
+    int ret, conn_attempts = 1, auth_attempts = 0, redirects = 0;
     int reconnect_delay = 0;
+    int reconnect_delay_total = 0;
     uint64_t off;
     char *cached;
 
@@ -382,14 +392,26 @@ redo:
     ret = http_open_cnx_internal(h, options);
     if (ret < 0) {
         if (!http_should_reconnect(s, ret) ||
-            reconnect_delay > s->reconnect_delay_max)
+            reconnect_delay > s->reconnect_delay_max ||
+            (s->reconnect_max_retries >= 0 && conn_attempts > s->reconnect_max_retries) ||
+            reconnect_delay_total > s->reconnect_delay_total_max)
             goto fail;
 
+        /* Both fields here are in seconds. */
+        if (s->respect_retry_after && s->retry_after > 0) {
+            reconnect_delay = s->retry_after;
+            if (reconnect_delay > s->reconnect_delay_max)
+                goto fail;
+            s->retry_after = 0;
+        }
+
         av_log(h, AV_LOG_WARNING, "Will reconnect at %"PRIu64" in %d second(s).\n", off, reconnect_delay);
         ret = ff_network_sleep_interruptible(1000U * 1000 * reconnect_delay, &h->interrupt_callback);
         if (ret != AVERROR(ETIMEDOUT))
             goto fail;
+        reconnect_delay_total += reconnect_delay;
         reconnect_delay = 1 + 2 * reconnect_delay;
+        conn_attempts++;
 
         /* restore the offset (http_connect resets it) */
         s->off = off;
@@ -398,10 +420,10 @@ redo:
         goto redo;
     }
 
-    attempts++;
+    auth_attempts++;
     if (s->http_code == 401) {
         if ((cur_auth_type == HTTP_AUTH_NONE || s->auth_state.stale) &&
-            s->auth_state.auth_type != HTTP_AUTH_NONE && attempts < 4) {
+            s->auth_state.auth_type != HTTP_AUTH_NONE && auth_attempts < 4) {
             ffurl_closep(&s->hd);
             goto redo;
         } else
@@ -409,7 +431,7 @@ redo:
     }
     if (s->http_code == 407) {
         if ((cur_proxy_auth_type == HTTP_AUTH_NONE || s->proxy_auth_state.stale) &&
-            s->proxy_auth_state.auth_type != HTTP_AUTH_NONE && attempts < 4) {
+            s->proxy_auth_state.auth_type != HTTP_AUTH_NONE && auth_attempts < 4) {
             ffurl_closep(&s->hd);
             goto redo;
         } else
@@ -438,7 +460,7 @@ redo:
         /* Restart the authentication process with the new target, which
          * might use a different auth mechanism. */
         memset(&s->auth_state, 0, sizeof(s->auth_state));
-        attempts         = 0;
+        auth_attempts         = 0;
         goto redo;
     }
     return 0;
@@ -522,6 +544,7 @@ int ff_http_averror(int status_code, int default_averror)
         case 401: return AVERROR_HTTP_UNAUTHORIZED;
         case 403: return AVERROR_HTTP_FORBIDDEN;
         case 404: return AVERROR_HTTP_NOT_FOUND;
+        case 429: return AVERROR_HTTP_TOO_MANY_REQUESTS;
         default: break;
     }
     if (status_code >= 400 && status_code <= 499)
@@ -558,6 +581,11 @@ static int http_write_reply(URLContext* h, int status_code)
         reply_code = 404;
         reply_text = "Not Found";
         break;
+    case AVERROR_HTTP_TOO_MANY_REQUESTS:
+    case 429:
+        reply_code = 429;
+        reply_text = "Too Many Requests";
+        break;
     case 200:
         reply_code = 200;
         reply_text = "OK";
@@ -897,29 +925,29 @@ static int parse_icy(HTTPContext *s, const char *tag, const char *p)
     return 0;
 }
 
-static int parse_set_cookie_expiry_time(const char *exp_str, struct tm *buf)
+static int parse_http_date(const char *date_str, struct tm *buf)
 {
-    char exp_buf[MAX_EXPIRY];
-    int i, j, exp_buf_len = MAX_EXPIRY-1;
-    char *expiry;
+    char date_buf[MAX_DATE_LEN];
+    int i, j, date_buf_len = MAX_DATE_LEN-1;
+    char *date;
 
     // strip off any punctuation or whitespace
-    for (i = 0, j = 0; exp_str[i] != '\0' && j < exp_buf_len; i++) {
-        if ((exp_str[i] >= '0' && exp_str[i] <= '9') ||
-            (exp_str[i] >= 'A' && exp_str[i] <= 'Z') ||
-            (exp_str[i] >= 'a' && exp_str[i] <= 'z')) {
-            exp_buf[j] = exp_str[i];
+    for (i = 0, j = 0; date_str[i] != '\0' && j < date_buf_len; i++) {
+        if ((date_str[i] >= '0' && date_str[i] <= '9') ||
+            (date_str[i] >= 'A' && date_str[i] <= 'Z') ||
+            (date_str[i] >= 'a' && date_str[i] <= 'z')) {
+            date_buf[j] = date_str[i];
             j++;
         }
     }
-    exp_buf[j] = '\0';
-    expiry = exp_buf;
+    date_buf[j] = '\0';
+    date = date_buf;
 
     // move the string beyond the day of week
-    while ((*expiry < '0' || *expiry > '9') && *expiry != '\0')
-        expiry++;
+    while ((*date < '0' || *date > '9') && *date != '\0')
+        date++;
 
-    return av_small_strptime(expiry, "%d%b%Y%H%M%S", buf) ? 0 : AVERROR(EINVAL);
+    return av_small_strptime(date, "%d%b%Y%H%M%S", buf) ? 0 : AVERROR(EINVAL);
 }
 
 static int parse_set_cookie(const char *set_cookie, AVDictionary **dict)
@@ -962,7 +990,7 @@ static int parse_set_cookie(const char *set_cookie, AVDictionary **dict)
 static int parse_cookie(HTTPContext *s, const char *p, AVDictionary **cookies)
 {
     AVDictionary *new_params = NULL;
-    AVDictionaryEntry *e, *cookie_entry;
+    const AVDictionaryEntry *e, *cookie_entry;
     char *eql, *name;
 
     // ensure the cookie is parsable
@@ -970,7 +998,7 @@ static int parse_cookie(HTTPContext *s, const char *p, AVDictionary **cookies)
         return -1;
 
     // if there is no cookie value there is nothing to parse
-    cookie_entry = av_dict_get(new_params, "", NULL, AV_DICT_IGNORE_SUFFIX);
+    cookie_entry = av_dict_iterate(new_params, NULL);
     if (!cookie_entry || !cookie_entry->value) {
         av_dict_free(&new_params);
         return -1;
@@ -979,7 +1007,7 @@ static int parse_cookie(HTTPContext *s, const char *p, AVDictionary **cookies)
     // ensure the cookie is not expired or older than an existing value
     if ((e = av_dict_get(new_params, "expires", NULL, 0)) && e->value) {
         struct tm new_tm = {0};
-        if (!parse_set_cookie_expiry_time(e->value, &new_tm)) {
+        if (!parse_http_date(e->value, &new_tm)) {
             AVDictionaryEntry *e2;
 
             // if the cookie has already expired ignore it
@@ -996,7 +1024,7 @@ static int parse_cookie(HTTPContext *s, const char *p, AVDictionary **cookies)
                     e2 = av_dict_get(old_params, "expires", NULL, 0);
                     if (e2 && e2->value) {
                         struct tm old_tm = {0};
-                        if (!parse_set_cookie_expiry_time(e->value, &old_tm)) {
+                        if (!parse_http_date(e->value, &old_tm)) {
                             if (av_timegm(&new_tm) < av_timegm(&old_tm)) {
                                 av_dict_free(&new_params);
                                 av_dict_free(&old_params);
@@ -1048,7 +1076,7 @@ static void parse_expires(HTTPContext *s, const char *p)
 {
     struct tm tm;
 
-    if (!parse_set_cookie_expiry_time(p, &tm)) {
+    if (!parse_http_date(p, &tm)) {
         s->expires = av_timegm(&tm);
     }
 }
@@ -1080,7 +1108,7 @@ static void parse_cache_control(HTTPContext *s, const char *p)
     }
 }
 
-static int process_line(URLContext *h, char *line, int line_count)
+static int process_line(URLContext *h, char *line, int line_count, int *parsed_http_code)
 {
     HTTPContext *s = h->priv_data;
     const char *auto_method =  h->flags & AVIO_FLAG_READ ? "POST" : "GET";
@@ -1160,6 +1188,8 @@ static int process_line(URLContext *h, char *line, int line_count)
 
             av_log(h, AV_LOG_TRACE, "http_code=%d\n", s->http_code);
 
+            *parsed_http_code = 1;
+
             if ((ret = check_http_code(h, s->http_code, end)) < 0)
                 return ret;
         }
@@ -1223,6 +1253,18 @@ static int process_line(URLContext *h, char *line, int line_count)
             parse_expires(s, p);
         } else if (!av_strcasecmp(tag, "Cache-Control")) {
             parse_cache_control(s, p);
+        } else if (!av_strcasecmp(tag, "Retry-After")) {
+            /* The header can be either an integer that represents seconds, or a date. */
+            struct tm tm;
+            int date_ret = parse_http_date(p, &tm);
+            if (!date_ret) {
+                time_t retry   = av_timegm(&tm);
+                int64_t now    = av_gettime() / 1000000;
+                int64_t diff   = ((int64_t) retry) - now;
+                s->retry_after = (unsigned int) FFMAX(0, diff);
+            } else {
+                s->retry_after = strtoul(p, NULL, 10);
+            }
         }
     }
     return 1;
@@ -1258,7 +1300,7 @@ static int get_cookies(HTTPContext *s, char **cookies, const char *path,
     *cookies = NULL;
     while ((cookie = av_strtok(next, "\n", &saveptr)) && !ret) {
         AVDictionary *cookie_params = NULL;
-        AVDictionaryEntry *cookie_entry, *e;
+        const AVDictionaryEntry *cookie_entry, *e;
 
         next = NULL;
         // store the cookie in a dict in case it is updated in the response
@@ -1270,14 +1312,14 @@ static int get_cookies(HTTPContext *s, char **cookies, const char *path,
             goto skip_cookie;
 
         // if the cookie has no value, skip it
-        cookie_entry = av_dict_get(cookie_params, "", NULL, AV_DICT_IGNORE_SUFFIX);
+        cookie_entry = av_dict_iterate(cookie_params, NULL);
         if (!cookie_entry || !cookie_entry->value)
             goto skip_cookie;
 
         // if the cookie has expired, don't add it
         if ((e = av_dict_get(cookie_params, "expires", NULL, 0)) && e->value) {
             struct tm tm_buf = {0};
-            if (!parse_set_cookie_expiry_time(e->value, &tm_buf)) {
+            if (!parse_http_date(e->value, &tm_buf)) {
                 if (av_timegm(&tm_buf) < av_gettime() / 1000000)
                     goto skip_cookie;
             }
@@ -1332,7 +1374,7 @@ static int http_read_header(URLContext *h)
 {
     HTTPContext *s = h->priv_data;
     char line[MAX_URL_SIZE];
-    int err = 0;
+    int err = 0, http_err = 0;
 
     av_freep(&s->new_location);
     s->expires = 0;
@@ -1340,18 +1382,31 @@ static int http_read_header(URLContext *h)
     s->filesize_from_content_range = UINT64_MAX;
 
     for (;;) {
+        int parsed_http_code = 0;
+
         if ((err = http_get_line(s, line, sizeof(line))) < 0)
             return err;
 
         av_log(h, AV_LOG_TRACE, "header='%s'\n", line);
 
-        err = process_line(h, line, s->line_count);
-        if (err < 0)
-            return err;
+        err = process_line(h, line, s->line_count, &parsed_http_code);
+        if (err < 0) {
+            if (parsed_http_code) {
+                http_err = err;
+            } else {
+                /* Prefer to return HTTP code error if we've already seen one. */
+                if (http_err)
+                    return http_err;
+                else
+                    return err;
+            }
+        }
         if (err == 0)
             break;
         s->line_count++;
     }
+    if (http_err)
+        return http_err;
 
     // filesize from Content-Range can always be used, even if using chunked Transfer-Encoding
     if (s->filesize_from_content_range != UINT64_MAX)
@@ -1673,6 +1728,8 @@ static int http_read_stream(URLContext *h, uint8_t *buf, int size)
     int err, read_ret;
     int64_t seek_ret;
     int reconnect_delay = 0;
+    int reconnect_delay_total = 0;
+    int conn_attempts = 1;
 
     if (!s->hd)
         return AVERROR_EOF;
@@ -1701,14 +1758,17 @@ static int http_read_stream(URLContext *h, uint8_t *buf, int size)
             !(s->reconnect_at_eof && read_ret == AVERROR_EOF))
             break;
 
-        if (reconnect_delay > s->reconnect_delay_max)
+        if (reconnect_delay > s->reconnect_delay_max || (s->reconnect_max_retries >= 0 && conn_attempts > s->reconnect_max_retries) ||
+            reconnect_delay_total > s->reconnect_delay_total_max)
             return AVERROR(EIO);
 
         av_log(h, AV_LOG_WARNING, "Will reconnect at %"PRIu64" in %d second(s), error=%s.\n", s->off, reconnect_delay, av_err2str(read_ret));
         err = ff_network_sleep_interruptible(1000U*1000*reconnect_delay, &h->interrupt_callback);
         if (err != AVERROR(ETIMEDOUT))
             return err;
+        reconnect_delay_total += reconnect_delay;
         reconnect_delay = 1 + 2*reconnect_delay;
+        conn_attempts++;
         seek_ret = http_seek_internal(h, target, SEEK_SET, 1);
         if (seek_ret >= 0 && seek_ret != target) {
             av_log(h, AV_LOG_ERROR, "Failed to reconnect at %"PRIu64".\n", target);
@@ -2049,7 +2109,7 @@ static int http_proxy_open(URLContext *h, const char *uri, int flags)
     char hostname[1024], hoststr[1024];
     char auth[1024], pathbuf[1024], *path;
     char lower_url[100];
-    int port, ret = 0, attempts = 0;
+    int port, ret = 0, auth_attempts = 0;
     HTTPAuthType cur_auth_type;
     char *authstr;
 
@@ -2109,10 +2169,10 @@ redo:
     if (ret < 0)
         goto fail;
 
-    attempts++;
+    auth_attempts++;
     if (s->http_code == 407 &&
         (cur_auth_type == HTTP_AUTH_NONE || s->proxy_auth_state.stale) &&
-        s->proxy_auth_state.auth_type != HTTP_AUTH_NONE && attempts < 2) {
+        s->proxy_auth_state.auth_type != HTTP_AUTH_NONE && auth_attempts < 2) {
         ffurl_closep(&s->hd);
         goto redo;
     }
diff --git a/libavformat/iamf_reader.c b/libavformat/iamf_reader.c
index 014e8e3ecc..cdb412f637 100644
--- a/libavformat/iamf_reader.c
+++ b/libavformat/iamf_reader.c
@@ -276,7 +276,8 @@ int ff_iamf_read_packet(AVFormatContext *s, IAMFDemuxContext *c,
         unsigned skip_samples, discard_padding;
         int ret, len, size, start_pos;
 
-        if ((ret = ffio_ensure_seekback(pb, FFMIN(MAX_IAMF_OBU_HEADER_SIZE, max_size))) < 0)
+        ret = ffio_ensure_seekback(pb, FFMIN(MAX_IAMF_OBU_HEADER_SIZE, max_size));
+        if (ret < 0)
             return ret;
         size = avio_read(pb, header, FFMIN(MAX_IAMF_OBU_HEADER_SIZE, max_size));
         if (size < 0)
diff --git a/libavformat/iamf_writer.c b/libavformat/iamf_writer.c
index 37ec8e732a..6d4e4082eb 100644
--- a/libavformat/iamf_writer.c
+++ b/libavformat/iamf_writer.c
@@ -275,6 +275,17 @@ int ff_iamf_add_audio_element(IAMFContext *iamf, const AVStreamGroup *stg, void
         }
     }
 
+    for (int i = 0; i < audio_element->nb_substreams; i++) {
+        for (int j = i + 1; j < audio_element->nb_substreams; j++)
+            if (audio_element->substreams[i].audio_substream_id ==
+                audio_element->substreams[j].audio_substream_id) {
+                av_log(log_ctx, AV_LOG_ERROR, "Duplicate id %u in streams %u and %u from stream group %u\n",
+                       audio_element->substreams[i].audio_substream_id, i, j, stg->index);
+                ret = AVERROR(EINVAL);
+                goto fail;
+            }
+    }
+
     if (iamf_audio_element->demixing_info) {
         AVIAMFParamDefinition *param = iamf_audio_element->demixing_info;
         const IAMFParamDefinition *param_definition = ff_iamf_get_param_definition(iamf, param->parameter_id);
diff --git a/libavformat/iamfdec.c b/libavformat/iamfdec.c
index e34d13e74c..ce6d4aa064 100644
--- a/libavformat/iamfdec.c
+++ b/libavformat/iamfdec.c
@@ -154,6 +154,9 @@ static int iamf_read_header(AVFormatContext *s)
         }
     }
 
+    if (!s->nb_streams)
+        return AVERROR_INVALIDDATA;
+
     return 0;
 }
 
diff --git a/libavformat/img2.c b/libavformat/img2.c
index 06e48549ac..9981867f82 100644
--- a/libavformat/img2.c
+++ b/libavformat/img2.c
@@ -20,80 +20,90 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include <assert.h>
+
 #include "libavutil/avstring.h"
 #include "internal.h"
 #include "img2.h"
 
+#define IMG_TAGS(TAG)               \
+    TAG(MJPEG,           jpeg     ) \
+    TAG(MJPEG,           jpg      ) \
+    TAG(MJPEG,           jps      ) \
+    TAG(MJPEG,           mpo      ) \
+    TAG(LJPEG,           ljpg     ) \
+    TAG(JPEGLS,          jls      ) \
+    TAG(PNG,             png      ) \
+    TAG(PNG,             pns      ) \
+    TAG(PNG,             mng      ) \
+    TAG(PPM,             ppm      ) \
+    TAG(PPM,             pnm      ) \
+    TAG(PGM,             pgm      ) \
+    TAG(PGMYUV,          pgmyuv   ) \
+    TAG(PBM,             pbm      ) \
+    TAG(PAM,             pam      ) \
+    TAG(PFM,             pfm      ) \
+    TAG(PHM,             phm      ) \
+    TAG(CRI,             cri      ) \
+    TAG(ALIAS_PIX,       pix      ) \
+    TAG(DDS,             dds      ) \
+    TAG(MPEG1VIDEO,      mpg1-img ) \
+    TAG(MPEG2VIDEO,      mpg2-img ) \
+    TAG(MPEG4,           mpg4-img ) \
+    TAG(RAWVIDEO,        y        ) \
+    TAG(RAWVIDEO,        raw      ) \
+    TAG(BMP,             bmp      ) \
+    TAG(TARGA,           tga      ) \
+    TAG(TIFF,            tiff     ) \
+    TAG(TIFF,            tif      ) \
+    TAG(TIFF,            dng      ) \
+    TAG(SGI,             sgi      ) \
+    TAG(PTX,             ptx      ) \
+    TAG(PHOTOCD,         pcd      ) \
+    TAG(PCX,             pcx      ) \
+    TAG(QDRAW,           pic      ) \
+    TAG(QDRAW,           pct      ) \
+    TAG(QDRAW,           pict     ) \
+    TAG(SUNRAST,         sun      ) \
+    TAG(SUNRAST,         ras      ) \
+    TAG(SUNRAST,         rs       ) \
+    TAG(SUNRAST,         im1      ) \
+    TAG(SUNRAST,         im8      ) \
+    TAG(SUNRAST,         im24     ) \
+    TAG(SUNRAST,         im32     ) \
+    TAG(SUNRAST,         sunras   ) \
+    TAG(SVG,             svg      ) \
+    TAG(SVG,             svgz     ) \
+    TAG(JPEG2000,        j2c      ) \
+    TAG(JPEG2000,        jp2      ) \
+    TAG(JPEG2000,        jpc      ) \
+    TAG(JPEG2000,        j2k      ) \
+    TAG(DPX,             dpx      ) \
+    TAG(EXR,             exr      ) \
+    TAG(PICTOR,          pic      ) \
+    TAG(V210X,           yuv10    ) \
+    TAG(WEBP,            webp     ) \
+    TAG(XBM,             xbm      ) \
+    TAG(XPM,             xpm      ) \
+    TAG(XFACE,           xface    ) \
+    TAG(XWD,             xwd      ) \
+    TAG(GEM,             img      ) \
+    TAG(GEM,             ximg     ) \
+    TAG(GEM,             timg     ) \
+    TAG(VBN,             vbn      ) \
+    TAG(JPEGXL,          jxl      ) \
+    TAG(QOI,             qoi      ) \
+    TAG(RADIANCE_HDR,    hdr      ) \
+    TAG(WBMP,            wbmp     ) \
+    TAG(NONE,                     )
+
+#define LENGTH_CHECK(CODECID, STR) \
+    static_assert(sizeof(#STR) <= sizeof(ff_img_tags->str), #STR " does not fit into IdStrMap.str\n");
+IMG_TAGS(LENGTH_CHECK)
+
 const IdStrMap ff_img_tags[] = {
-    { AV_CODEC_ID_MJPEG,      "jpeg"     },
-    { AV_CODEC_ID_MJPEG,      "jpg"      },
-    { AV_CODEC_ID_MJPEG,      "jps"      },
-    { AV_CODEC_ID_MJPEG,      "mpo"      },
-    { AV_CODEC_ID_LJPEG,      "ljpg"     },
-    { AV_CODEC_ID_JPEGLS,     "jls"      },
-    { AV_CODEC_ID_PNG,        "png"      },
-    { AV_CODEC_ID_PNG,        "pns"      },
-    { AV_CODEC_ID_PNG,        "mng"      },
-    { AV_CODEC_ID_PPM,        "ppm"      },
-    { AV_CODEC_ID_PPM,        "pnm"      },
-    { AV_CODEC_ID_PGM,        "pgm"      },
-    { AV_CODEC_ID_PGMYUV,     "pgmyuv"   },
-    { AV_CODEC_ID_PBM,        "pbm"      },
-    { AV_CODEC_ID_PAM,        "pam"      },
-    { AV_CODEC_ID_PFM,        "pfm"      },
-    { AV_CODEC_ID_PHM,        "phm"      },
-    { AV_CODEC_ID_CRI,        "cri"      },
-    { AV_CODEC_ID_ALIAS_PIX,  "pix"      },
-    { AV_CODEC_ID_DDS,        "dds"      },
-    { AV_CODEC_ID_MPEG1VIDEO, "mpg1-img" },
-    { AV_CODEC_ID_MPEG2VIDEO, "mpg2-img" },
-    { AV_CODEC_ID_MPEG4,      "mpg4-img" },
-    { AV_CODEC_ID_RAWVIDEO,   "y"        },
-    { AV_CODEC_ID_RAWVIDEO,   "raw"      },
-    { AV_CODEC_ID_BMP,        "bmp"      },
-    { AV_CODEC_ID_TARGA,      "tga"      },
-    { AV_CODEC_ID_TIFF,       "tiff"     },
-    { AV_CODEC_ID_TIFF,       "tif"      },
-    { AV_CODEC_ID_TIFF,       "dng"      },
-    { AV_CODEC_ID_SGI,        "sgi"      },
-    { AV_CODEC_ID_PTX,        "ptx"      },
-    { AV_CODEC_ID_PHOTOCD,    "pcd"      },
-    { AV_CODEC_ID_PCX,        "pcx"      },
-    { AV_CODEC_ID_QDRAW,      "pic"      },
-    { AV_CODEC_ID_QDRAW,      "pct"      },
-    { AV_CODEC_ID_QDRAW,      "pict"     },
-    { AV_CODEC_ID_SUNRAST,    "sun"      },
-    { AV_CODEC_ID_SUNRAST,    "ras"      },
-    { AV_CODEC_ID_SUNRAST,    "rs"       },
-    { AV_CODEC_ID_SUNRAST,    "im1"      },
-    { AV_CODEC_ID_SUNRAST,    "im8"      },
-    { AV_CODEC_ID_SUNRAST,    "im24"     },
-    { AV_CODEC_ID_SUNRAST,    "im32"     },
-    { AV_CODEC_ID_SUNRAST,    "sunras"   },
-    { AV_CODEC_ID_SVG,        "svg"      },
-    { AV_CODEC_ID_SVG,        "svgz"     },
-    { AV_CODEC_ID_JPEG2000,   "j2c"      },
-    { AV_CODEC_ID_JPEG2000,   "jp2"      },
-    { AV_CODEC_ID_JPEG2000,   "jpc"      },
-    { AV_CODEC_ID_JPEG2000,   "j2k"      },
-    { AV_CODEC_ID_DPX,        "dpx"      },
-    { AV_CODEC_ID_EXR,        "exr"      },
-    { AV_CODEC_ID_PICTOR,     "pic"      },
-    { AV_CODEC_ID_V210X,      "yuv10"    },
-    { AV_CODEC_ID_WEBP,       "webp"     },
-    { AV_CODEC_ID_XBM,        "xbm"      },
-    { AV_CODEC_ID_XPM,        "xpm"      },
-    { AV_CODEC_ID_XFACE,      "xface"    },
-    { AV_CODEC_ID_XWD,        "xwd"      },
-    { AV_CODEC_ID_GEM,        "img"      },
-    { AV_CODEC_ID_GEM,        "ximg"     },
-    { AV_CODEC_ID_GEM,        "timg"     },
-    { AV_CODEC_ID_VBN,        "vbn"      },
-    { AV_CODEC_ID_JPEGXL,     "jxl"      },
-    { AV_CODEC_ID_QOI,        "qoi"      },
-    { AV_CODEC_ID_RADIANCE_HDR, "hdr"    },
-    { AV_CODEC_ID_WBMP,       "wbmp"     },
-    { AV_CODEC_ID_NONE,       NULL       }
+#define TAG(CODECID, STR) { AV_CODEC_ID_ ## CODECID, #STR },
+IMG_TAGS(TAG)
 };
 
 static enum AVCodecID str2id(const IdStrMap *tags, const char *str)
diff --git a/libavformat/img2.h b/libavformat/img2.h
index 5fd8ff77fc..e98902c96f 100644
--- a/libavformat/img2.h
+++ b/libavformat/img2.h
@@ -66,7 +66,7 @@ typedef struct VideoDemuxData {
 
 typedef struct IdStrMap {
     enum AVCodecID id;
-    const char *str;
+    char str[12];
 } IdStrMap;
 
 extern const IdStrMap ff_img_tags[];
diff --git a/libavformat/internal.h b/libavformat/internal.h
index 7f3d1c0086..6bad4fd119 100644
--- a/libavformat/internal.h
+++ b/libavformat/internal.h
@@ -727,9 +727,6 @@ struct AVBPrint;
  */
 int ff_bprint_to_codecpar_extradata(AVCodecParameters *par, struct AVBPrint *buf);
 
-int ff_lock_avformat(void);
-int ff_unlock_avformat(void);
-
 /**
  * Set AVFormatContext url field to the provided pointer. The pointer must
  * point to a valid string. The existing url field is freed if necessary. Also
diff --git a/libavformat/ivfenc.c b/libavformat/ivfenc.c
index 09782eecd6..9feaea3516 100644
--- a/libavformat/ivfenc.c
+++ b/libavformat/ivfenc.c
@@ -24,7 +24,7 @@
 
 typedef struct IVFEncContext {
     unsigned frame_cnt;
-    uint64_t last_pts, sum_delta_pts, last_pkt_duration;
+    uint64_t last_pts, last_pkt_duration;
 } IVFEncContext;
 
 static int ivf_init(AVFormatContext *s)
@@ -80,8 +80,6 @@ static int ivf_write_packet(AVFormatContext *s, AVPacket *pkt)
     avio_wl32(pb, pkt->size);
     avio_wl64(pb, pkt->pts);
     avio_write(pb, pkt->data, pkt->size);
-    if (ctx->frame_cnt)
-        ctx->sum_delta_pts += pkt->pts - ctx->last_pts;
     ctx->last_pkt_duration = pkt->duration;
     ctx->frame_cnt++;
     ctx->last_pts = pkt->pts;
diff --git a/libavformat/kvag.c b/libavformat/kvag.c
index 1d0aee0994..b55aa893ec 100644
--- a/libavformat/kvag.c
+++ b/libavformat/kvag.c
@@ -38,7 +38,7 @@
 typedef struct KVAGHeader {
     uint32_t    magic;
     uint32_t    data_size;
-    uint32_t    sample_rate;
+    int    sample_rate;
     uint16_t    stereo;
 } KVAGHeader;
 
@@ -70,6 +70,9 @@ static int kvag_read_header(AVFormatContext *s)
     hdr.sample_rate             = AV_RL32(buf +  8);
     hdr.stereo                  = AV_RL16(buf + 12);
 
+    if (hdr.sample_rate <= 0)
+        return AVERROR_INVALIDDATA;
+
     par                         = st->codecpar;
     par->codec_type             = AVMEDIA_TYPE_AUDIO;
     par->codec_id               = AV_CODEC_ID_ADPCM_IMA_SSI;
diff --git a/libavformat/lc3.c b/libavformat/lc3.c
new file mode 100644
index 0000000000..16c12a98d7
--- /dev/null
+++ b/libavformat/lc3.c
@@ -0,0 +1,244 @@
+/*
+ * LC3 muxer and demuxer
+ * Copyright (C) 2024  Antoine Soulier <asoulier@google.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Based on the file format specified by :
+ *
+ * - Bluetooth SIG - Low Complexity Communication Codec Test Suite
+ *   https://www.bluetooth.org/docman/handlers/downloaddoc.ashx?doc_id=502301
+ *   3.2.8.2 Reference LC3 Codec Bitstream Format
+ *
+ * - ETSI TI 103 634 V1.4.1 - Low Complexity Communication Codec plus
+ *   https://www.etsi.org/deliver/etsi_ts/103600_103699/103634/01.04.01_60/ts_103634v010401p.pdf
+ *   LC3plus conformance script package
+ */
+
+#include "config_components.h"
+
+#include "libavcodec/packet.h"
+#include "libavutil/intreadwrite.h"
+
+#include "avformat.h"
+#include "avio.h"
+#include "demux.h"
+#include "internal.h"
+#include "mux.h"
+
+static int check_frame_length(void *avcl, int srate_hz, int frame_us)
+{
+    if (srate_hz !=  8000 && srate_hz != 16000 && srate_hz != 24000 &&
+        srate_hz != 32000 && srate_hz != 48000 && srate_hz != 96000) {
+        if (avcl)
+            av_log(avcl, AV_LOG_ERROR,
+                   "Invalid LC3 sample rate: %d Hz.\n", srate_hz);
+        return -1;
+    }
+
+    if (frame_us != 2500 && frame_us !=  5000 &&
+        frame_us != 7500 && frame_us != 10000) {
+        if (avcl)
+            av_log(avcl, AV_LOG_ERROR,
+                   "Invalid LC3 frame duration: %.1f ms.\n", frame_us / 1000.f);
+        return -1;
+    }
+
+    return 0;
+}
+
+#if CONFIG_LC3_DEMUXER
+
+typedef struct LC3DemuxContext {
+    int frame_samples;
+    int64_t end_dts;
+} LC3DemuxContext;
+
+static int lc3_read_probe(const AVProbeData *p)
+{
+    int frame_us, srate_hz;
+
+    if (p->buf_size < 12)
+        return 0;
+
+    if (AV_RB16(p->buf + 0) != 0x1ccc ||
+        AV_RL16(p->buf + 2) <  9 * sizeof(uint16_t))
+        return 0;
+
+    srate_hz = AV_RL16(p->buf + 4) * 100;
+    frame_us = AV_RL16(p->buf + 10) * 10;
+    if (check_frame_length(NULL, srate_hz, frame_us) < 0)
+        return 0;
+
+    return AVPROBE_SCORE_MAX;
+}
+
+static int lc3_read_header(AVFormatContext *s)
+{
+    LC3DemuxContext *lc3 = s->priv_data;
+    AVStream *st = NULL;
+    uint16_t tag, hdr_size;
+    uint32_t length;
+    int srate_hz, frame_us, channels, bit_rate;
+    int ep_mode, hr_mode;
+    int num_extra_params;
+    int delay, ret;
+
+    tag = avio_rb16(s->pb);
+    hdr_size = avio_rl16(s->pb);
+
+    if (tag != 0x1ccc || hdr_size < 9 * sizeof(uint16_t))
+        return AVERROR_INVALIDDATA;
+
+    num_extra_params = hdr_size / sizeof(uint16_t) - 9;
+
+    srate_hz = avio_rl16(s->pb) * 100;
+    bit_rate = avio_rl16(s->pb) * 100;
+    channels = avio_rl16(s->pb);
+    frame_us = avio_rl16(s->pb) * 10;
+    ep_mode  = avio_rl16(s->pb) != 0;
+    length   = avio_rl32(s->pb);
+    hr_mode  = num_extra_params >= 1 && avio_rl16(s->pb);
+
+    if (check_frame_length(s, srate_hz, frame_us) < 0)
+        return AVERROR_INVALIDDATA;
+
+    st = avformat_new_stream(s, NULL);
+    if (!st)
+        return AVERROR(ENOMEM);
+
+    avpriv_set_pts_info(st, 64, 1, srate_hz);
+    avpriv_update_cur_dts(s, st, 0);
+    st->duration = length;
+
+    st->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
+    st->codecpar->codec_id = AV_CODEC_ID_LC3;
+    st->codecpar->sample_rate = srate_hz;
+    st->codecpar->bit_rate = bit_rate;
+    st->codecpar->ch_layout.nb_channels = channels;
+
+    if ((ret = ff_alloc_extradata(st->codecpar, 6)) < 0)
+        return ret;
+
+    AV_WL16(st->codecpar->extradata + 0, frame_us / 10);
+    AV_WL16(st->codecpar->extradata + 2, ep_mode);
+    AV_WL16(st->codecpar->extradata + 4, hr_mode);
+
+    lc3->frame_samples = av_rescale(frame_us, srate_hz, 1000*1000);
+
+    delay = av_rescale(frame_us == 7500 ? 4000 : 2500, srate_hz, 1000*1000);
+    lc3->end_dts = length ? length + delay : -1;
+
+    return 0;
+}
+
+static int lc3_read_packet(AVFormatContext *s, AVPacket *pkt)
+{
+    LC3DemuxContext *lc3 = s->priv_data;
+    AVStream *st = s->streams[0];
+    AVIOContext *pb = s->pb;
+    int64_t pos = avio_tell(pb);
+    int64_t remaining_samples;
+    int ret;
+
+    ret = av_get_packet(s->pb, pkt, avio_rl16(pb));
+    if (ret < 0)
+        return ret;
+
+    pkt->pos = pos;
+
+    remaining_samples = lc3->end_dts < 0 ? lc3->frame_samples :
+                        FFMAX(lc3->end_dts - ffstream(st)->cur_dts, 0);
+    pkt->duration = FFMIN(lc3->frame_samples, remaining_samples);
+
+    return 0;
+}
+
+const FFInputFormat ff_lc3_demuxer = {
+    .p.name         = "lc3",
+    .p.long_name    = NULL_IF_CONFIG_SMALL("LC3 (Low Complexity Communication Codec)"),
+    .p.extensions   = "lc3",
+    .p.flags        = AVFMT_GENERIC_INDEX,
+    .priv_data_size = sizeof(LC3DemuxContext),
+    .read_probe     = lc3_read_probe,
+    .read_header    = lc3_read_header,
+    .read_packet    = lc3_read_packet,
+};
+
+#endif /* CONFIG_LC3_DEMUXER */
+
+#if CONFIG_LC3_MUXER
+
+static int lc3_write_header(AVFormatContext *s)
+{
+    AVStream *st = s->streams[0];
+    int channels = st->codecpar->ch_layout.nb_channels;
+    int srate_hz = st->codecpar->sample_rate;
+    int bit_rate = st->codecpar->bit_rate;
+    int frame_us, ep_mode, hr_mode;
+    uint32_t nb_samples = av_rescale_q(
+        st->duration, st->time_base, (AVRational){ 1, srate_hz });
+
+    if (st->codecpar->extradata_size < 6)
+        return AVERROR_INVALIDDATA;
+
+    frame_us = AV_RL16(st->codecpar->extradata + 0) * 10;
+    ep_mode = AV_RL16(st->codecpar->extradata + 2) != 0;
+    hr_mode = AV_RL16(st->codecpar->extradata + 4) != 0;
+
+    if (check_frame_length(s, srate_hz, frame_us) < 0)
+        return AVERROR_INVALIDDATA;
+
+    avio_wb16(s->pb, 0x1ccc);
+    avio_wl16(s->pb, (9 + hr_mode) * sizeof(uint16_t));
+    avio_wl16(s->pb, srate_hz / 100);
+    avio_wl16(s->pb, bit_rate / 100);
+    avio_wl16(s->pb, channels);
+    avio_wl16(s->pb, frame_us / 10);
+    avio_wl16(s->pb, ep_mode);
+    avio_wl32(s->pb, nb_samples);
+    if (hr_mode)
+        avio_wl16(s->pb, hr_mode);
+
+    return 0;
+}
+
+static int lc3_write_packet(AVFormatContext *s, AVPacket *pkt)
+{
+    avio_wl16(s->pb, pkt->size);
+    avio_write(s->pb, pkt->data, pkt->size);
+    return 0;
+}
+
+const FFOutputFormat ff_lc3_muxer = {
+    .p.name        = "lc3",
+    .p.long_name   = NULL_IF_CONFIG_SMALL("LC3 (Low Complexity Communication Codec)"),
+    .p.extensions  = "lc3",
+    .p.audio_codec = AV_CODEC_ID_LC3,
+    .p.video_codec = AV_CODEC_ID_NONE,
+    .p.subtitle_codec = AV_CODEC_ID_NONE,
+    .p.flags       = AVFMT_NOTIMESTAMPS,
+    .flags_internal   = FF_OFMT_FLAG_MAX_ONE_OF_EACH |
+                        FF_OFMT_FLAG_ONLY_DEFAULT_CODECS,
+    .write_header  = lc3_write_header,
+    .write_packet  = lc3_write_packet,
+};
+
+#endif /* CONFIG_LC3_MUXER */
diff --git a/libavformat/lrc.c b/libavformat/lrc.c
index 139c6506e4..6c78a91d61 100644
--- a/libavformat/lrc.c
+++ b/libavformat/lrc.c
@@ -1,5 +1,5 @@
 /*
- * LRC lyrics file format decoder
+ * LRC lyrics file format common structs
  * Copyright (c) 2014 StarBrilliant <m13253@hotmail.com>
  *
  * This file is part of FFmpeg.
diff --git a/libavformat/lrcdec.c b/libavformat/lrcdec.c
index 5435a65b15..68c44bce97 100644
--- a/libavformat/lrcdec.c
+++ b/libavformat/lrcdec.c
@@ -1,5 +1,5 @@
 /*
- * LRC lyrics file format decoder
+ * LRC lyrics file format demuxer
  * Copyright (c) 2014 StarBrilliant <m13253@hotmail.com>
  *
  * This file is part of FFmpeg.
diff --git a/libavformat/lrcenc.c b/libavformat/lrcenc.c
index 15c31d33b3..7570529c20 100644
--- a/libavformat/lrcenc.c
+++ b/libavformat/lrcenc.c
@@ -1,5 +1,5 @@
 /*
- * LRC lyrics file format decoder
+ * LRC lyrics file format muxer
  * Copyright (c) 2014 StarBrilliant <m13253@hotmail.com>
  *
  * This file is part of FFmpeg.
diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c
index 566e9f4981..63bae9fe1c 100644
--- a/libavformat/matroskaenc.c
+++ b/libavformat/matroskaenc.c
@@ -3500,20 +3500,20 @@ static const AVCodecTag additional_subtitle_tags[] = {
 #define OFFSET(x) offsetof(MatroskaMuxContext, x)
 #define FLAGS AV_OPT_FLAG_ENCODING_PARAM
 static const AVOption options[] = {
-    { "reserve_index_space", "Reserve a given amount of space (in bytes) at the beginning of the file for the index (cues).", OFFSET(reserve_cues_space), AV_OPT_TYPE_INT,   { .i64 = 0 },   0, INT_MAX,   FLAGS },
-    { "cues_to_front", "Move Cues (the index) to the front by shifting data if necessary", OFFSET(move_cues_to_front), AV_OPT_TYPE_BOOL, { .i64 = 0}, 0, 1, FLAGS },
-    { "cluster_size_limit",  "Store at most the provided amount of bytes in a cluster. ",                                     OFFSET(cluster_size_limit), AV_OPT_TYPE_INT  , { .i64 = -1 }, -1, INT_MAX,   FLAGS },
-    { "cluster_time_limit",  "Store at most the provided number of milliseconds in a cluster.",                               OFFSET(cluster_time_limit), AV_OPT_TYPE_INT64, { .i64 = -1 }, -1, INT64_MAX, FLAGS },
-    { "dash", "Create a WebM file conforming to WebM DASH specification", OFFSET(is_dash), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS },
-    { "dash_track_number", "Track number for the DASH stream", OFFSET(dash_track_number), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, INT_MAX, FLAGS },
-    { "live", "Write files assuming it is a live stream.", OFFSET(is_live), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS },
-    { "allow_raw_vfw", "allow RAW VFW mode", OFFSET(allow_raw_vfw), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS },
-    { "flipped_raw_rgb", "Raw RGB bitmaps in VFW mode are stored bottom-up", OFFSET(flipped_raw_rgb), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS },
+    { "reserve_index_space", "reserve a given amount of space (in bytes) at the beginning of the file for the index (cues)", OFFSET(reserve_cues_space), AV_OPT_TYPE_INT,   { .i64 = 0 },   0, INT_MAX,   FLAGS },
+    { "cues_to_front", "move Cues (the index) to the front by shifting data if necessary", OFFSET(move_cues_to_front), AV_OPT_TYPE_BOOL, { .i64 = 0}, 0, 1, FLAGS },
+    { "cluster_size_limit",  "store at most the provided amount of bytes in a cluster",                                     OFFSET(cluster_size_limit), AV_OPT_TYPE_INT  , { .i64 = -1 }, -1, INT_MAX,   FLAGS },
+    { "cluster_time_limit",  "store at most the provided number of milliseconds in a cluster",                               OFFSET(cluster_time_limit), AV_OPT_TYPE_INT64, { .i64 = -1 }, -1, INT64_MAX, FLAGS },
+    { "dash", "create a WebM file conforming to WebM DASH specification", OFFSET(is_dash), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS },
+    { "dash_track_number", "track number for the DASH stream", OFFSET(dash_track_number), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, INT_MAX, FLAGS },
+    { "live", "write files assuming it is a live stream", OFFSET(is_live), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS },
+    { "allow_raw_vfw", "allow raw VFW mode", OFFSET(allow_raw_vfw), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS },
+    { "flipped_raw_rgb", "store raw RGB bitmaps in VFW mode in bottom-up mode", OFFSET(flipped_raw_rgb), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS },
     { "write_crc32", "write a CRC32 element inside every Level 1 element", OFFSET(write_crc), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS },
-    { "default_mode", "Controls how a track's FlagDefault is inferred", OFFSET(default_mode), AV_OPT_TYPE_INT, { .i64 = DEFAULT_MODE_PASSTHROUGH }, DEFAULT_MODE_INFER, DEFAULT_MODE_PASSTHROUGH, FLAGS, .unit = "default_mode" },
-    { "infer", "For each track type, mark each track of disposition default as default; if none exists, mark the first track as default.", 0, AV_OPT_TYPE_CONST, { .i64 = DEFAULT_MODE_INFER }, 0, 0, FLAGS, .unit = "default_mode" },
-    { "infer_no_subs", "For each track type, mark each track of disposition default as default; for audio and video: if none exists, mark the first track as default.", 0, AV_OPT_TYPE_CONST, { .i64 = DEFAULT_MODE_INFER_NO_SUBS }, 0, 0, FLAGS, .unit = "default_mode" },
-    { "passthrough", "Use the disposition flag as-is", 0, AV_OPT_TYPE_CONST, { .i64 = DEFAULT_MODE_PASSTHROUGH }, 0, 0, FLAGS, .unit = "default_mode" },
+    { "default_mode", "control how a track's FlagDefault is inferred", OFFSET(default_mode), AV_OPT_TYPE_INT, { .i64 = DEFAULT_MODE_PASSTHROUGH }, DEFAULT_MODE_INFER, DEFAULT_MODE_PASSTHROUGH, FLAGS, .unit = "default_mode" },
+    { "infer", "for each track type, mark each track of disposition default as default; if none exists, mark the first track as default", 0, AV_OPT_TYPE_CONST, { .i64 = DEFAULT_MODE_INFER }, 0, 0, FLAGS, .unit = "default_mode" },
+    { "infer_no_subs", "for each track type, mark each track of disposition default as default; for audio and video: if none exists, mark the first track as default", 0, AV_OPT_TYPE_CONST, { .i64 = DEFAULT_MODE_INFER_NO_SUBS }, 0, 0, FLAGS, .unit = "default_mode" },
+    { "passthrough", "use the disposition flag as-is", 0, AV_OPT_TYPE_CONST, { .i64 = DEFAULT_MODE_PASSTHROUGH }, 0, 0, FLAGS, .unit = "default_mode" },
     { NULL },
 };
 
diff --git a/libavformat/mov.c b/libavformat/mov.c
index 0c892b56c8..6174a04c31 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -84,6 +84,7 @@ typedef struct MOVParseTableEntry {
 
 static int mov_read_default(MOVContext *c, AVIOContext *pb, MOVAtom atom);
 static int mov_read_mfra(MOVContext *c, AVIOContext *f);
+static void mov_free_stream_context(AVFormatContext *s, AVStream *st);
 static int64_t add_ctts_entry(MOVCtts** ctts_data, unsigned int* ctts_count, unsigned int* allocated_size,
                               int count, int duration);
 
@@ -7925,8 +7926,10 @@ cleanup:
 static int mov_read_SA3D(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 {
     AVStream *st;
-    int i, version, type;
+    AVChannelLayout ch_layout = { 0 };
+    int ret, i, version, type;
     int ambisonic_order, channel_order, normalization, channel_count;
+    int ambi_channels, non_diegetic_channels;
 
     if (c->fc->nb_streams < 1)
         return 0;
@@ -7945,11 +7948,12 @@ static int mov_read_SA3D(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     }
 
     type = avio_r8(pb);
-    if (type) {
+    if (type & 0x7f) {
         av_log(c->fc, AV_LOG_WARNING,
-               "Unsupported ambisonic type %d\n", type);
+               "Unsupported ambisonic type %d\n", type & 0x7f);
         return 0;
     }
+    non_diegetic_channels = (type >> 7) * 2; // head_locked_stereo
 
     ambisonic_order = avio_rb32(pb);
 
@@ -7968,24 +7972,43 @@ static int mov_read_SA3D(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     }
 
     channel_count = avio_rb32(pb);
-    if (ambisonic_order < 0 || channel_count != (ambisonic_order + 1LL) * (ambisonic_order + 1LL)) {
+    if (ambisonic_order < 0 || ambisonic_order > 31 ||
+        channel_count != ((ambisonic_order + 1LL) * (ambisonic_order + 1LL) +
+                           non_diegetic_channels)) {
         av_log(c->fc, AV_LOG_ERROR,
                "Invalid number of channels (%d / %d)\n",
                channel_count, ambisonic_order);
         return 0;
     }
+    ambi_channels = channel_count - non_diegetic_channels;
+
+    ret = av_channel_layout_custom_init(&ch_layout, channel_count);
+    if (ret < 0)
+        return 0;
 
     for (i = 0; i < channel_count; i++) {
-        if (i != avio_rb32(pb)) {
-            av_log(c->fc, AV_LOG_WARNING,
-                   "Ambisonic channel reordering is not supported\n");
+        unsigned channel = avio_rb32(pb);
+
+        if (channel >= channel_count) {
+            av_log(c->fc, AV_LOG_ERROR, "Invalid channel index (%d / %d)\n",
+                   channel, ambisonic_order);
+            av_channel_layout_uninit(&ch_layout);
             return 0;
         }
+        if (channel >= ambi_channels)
+            ch_layout.u.map[i].id = channel - ambi_channels;
+        else
+            ch_layout.u.map[i].id = AV_CHAN_AMBISONIC_BASE + channel;
+    }
+
+    ret = av_channel_layout_retype(&ch_layout, 0, AV_CHANNEL_LAYOUT_RETYPE_FLAG_CANONICAL);
+    if (ret < 0) {
+        av_channel_layout_uninit(&ch_layout);
+        return 0;
     }
 
     av_channel_layout_uninit(&st->codecpar->ch_layout);
-    st->codecpar->ch_layout.order = AV_CHANNEL_ORDER_AMBISONIC;
-    st->codecpar->ch_layout.nb_channels = channel_count;
+    st->codecpar->ch_layout = ch_layout;
 
     return 0;
 }
@@ -8137,8 +8160,9 @@ static int mov_read_infe(MOVContext *c, AVIOContext *pb, MOVAtom atom, int idx)
     size -= 4;
 
     if (version < 2) {
-        av_log(c->fc, AV_LOG_ERROR, "infe: version < 2 not supported\n");
-        return AVERROR_PATCHWELCOME;
+        avpriv_report_missing_feature(c->fc, "infe version < 2");
+        avio_skip(pb, size);
+        return 1;
     }
 
     item_id = version > 2 ? avio_rb32(pb) : avio_rb16(pb);
@@ -8181,7 +8205,7 @@ static int mov_read_iinf(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 {
     HEIFItem *heif_item;
     int entry_count;
-    int version, ret;
+    int version, got_stream = 0, ret, i;
 
     if (c->found_iinf) {
         av_log(c->fc, AV_LOG_WARNING, "Duplicate iinf box found\n");
@@ -8201,18 +8225,33 @@ static int mov_read_iinf(MOVContext *c, AVIOContext *pb, MOVAtom atom)
                sizeof(*c->heif_item) * (entry_count - c->nb_heif_item));
     c->nb_heif_item = FFMAX(c->nb_heif_item, entry_count);
 
-    for (int i = 0; i < entry_count; i++) {
+    for (i = 0; i < entry_count; i++) {
         MOVAtom infe;
 
         infe.size = avio_rb32(pb) - 8;
         infe.type = avio_rl32(pb);
         ret = mov_read_infe(c, pb, infe, i);
         if (ret < 0)
-            return ret;
+            goto fail;
+        if (!ret)
+            got_stream = 1;
     }
 
-    c->found_iinf = 1;
+    c->found_iinf = got_stream;
     return 0;
+fail:
+    for (; i >= 0; i--) {
+        HEIFItem *item = &c->heif_item[i];
+
+        av_freep(&item->name);
+        if (!item->st)
+            continue;
+
+        mov_free_stream_context(c->fc, item->st);
+        ff_remove_stream(c->fc, item->st);
+        item->st = NULL;
+    }
+    return ret;
 }
 
 static int mov_read_iref_dimg(MOVContext *c, AVIOContext *pb, int version)
@@ -9438,7 +9477,8 @@ static int mov_parse_tiles(AVFormatContext *s)
                 break;
             }
 
-            if (k == grid->nb_tiles) {
+            if (k == mov->nb_heif_item) {
+                av_assert0(loop);
                 av_log(s, AV_LOG_WARNING, "HEIF item id %d referenced by grid id %d doesn't "
                                           "exist\n",
                        tile_id, grid->item->item_id);
@@ -9508,14 +9548,15 @@ static int mov_read_header(AVFormatContext *s)
             av_log(s, AV_LOG_ERROR, "error reading header\n");
             return err;
         }
-    } while ((pb->seekable & AVIO_SEEKABLE_NORMAL) && !mov->found_moov && !mov->found_iloc && !mov->moov_retry++);
-    if (!mov->found_moov && !mov->found_iloc) {
+    } while ((pb->seekable & AVIO_SEEKABLE_NORMAL) &&
+             !mov->found_moov && (!mov->found_iloc || !mov->found_iinf) && !mov->moov_retry++);
+    if (!mov->found_moov && !mov->found_iloc && !mov->found_iinf) {
         av_log(s, AV_LOG_ERROR, "moov atom not found\n");
         return AVERROR_INVALIDDATA;
     }
     av_log(mov->fc, AV_LOG_TRACE, "on_parse_exit_offset=%"PRId64"\n", avio_tell(pb));
 
-    if (mov->found_iloc) {
+    if (mov->found_iloc && mov->found_iinf) {
         for (i = 0; i < mov->nb_heif_item; i++) {
             HEIFItem *item = &mov->heif_item[i];
             MOVStreamContext *sc;
@@ -9557,6 +9598,10 @@ static int mov_read_header(AVFormatContext *s)
                 return err;
         }
     }
+    // prevent iloc and iinf boxes from being parsed while reading packets.
+    // this is needed because an iinf box may have been parsed but ignored
+    // for having old infe boxes which create no streams.
+    mov->found_iloc = mov->found_iinf = 1;
 
     if (pb->seekable & AVIO_SEEKABLE_NORMAL) {
         if (mov->nb_chapter_tracks > 0 && !mov->ignore_chapters)
@@ -9622,25 +9667,7 @@ static int mov_read_header(AVFormatContext *s)
         }
     }
 
-    if (mov->trex_data) {
-        for (i = 0; i < s->nb_streams; i++) {
-            AVStream *st = s->streams[i];
-            MOVStreamContext *sc = st->priv_data;
-            if (st->duration > 0) {
-                /* Akin to sc->data_size * 8 * sc->time_scale / st->duration but accounting for overflows. */
-                st->codecpar->bit_rate = av_rescale(sc->data_size, ((int64_t) sc->time_scale) * 8, st->duration);
-                if (st->codecpar->bit_rate == INT64_MIN) {
-                    av_log(s, AV_LOG_WARNING, "Overflow during bit rate calculation %"PRId64" * 8 * %d\n",
-                           sc->data_size, sc->time_scale);
-                    st->codecpar->bit_rate = 0;
-                    if (s->error_recognition & AV_EF_EXPLODE)
-                        return AVERROR_INVALIDDATA;
-                }
-            }
-        }
-    }
-
-    if (mov->use_mfra_for > 0) {
+    if (mov->trex_data || mov->use_mfra_for > 0) {
         for (i = 0; i < s->nb_streams; i++) {
             AVStream *st = s->streams[i];
             MOVStreamContext *sc = st->priv_data;
@@ -10106,7 +10133,7 @@ static int mov_seek_stream(AVFormatContext *s, AVStream *st, int64_t timestamp,
 {
     MOVStreamContext *sc = st->priv_data;
     FFStream *const sti = ffstream(st);
-    int sample, time_sample, ret;
+    int sample, time_sample, ret, next_ts, requested_sample;
     unsigned int i;
 
     // Here we consider timestamp to be PTS, hence try to offset it so that we
@@ -10127,7 +10154,17 @@ static int mov_seek_stream(AVFormatContext *s, AVStream *st, int64_t timestamp,
 
         if (!sample || can_seek_to_key_sample(st, sample, timestamp))
             break;
-        timestamp -= FFMAX(sc->min_sample_duration, 1);
+
+        next_ts = timestamp - FFMAX(sc->min_sample_duration, 1);
+        requested_sample = av_index_search_timestamp(st, next_ts, flags);
+
+        // If we've reached a different sample trying to find a good pts to
+        // seek to, give up searching because we'll end up seeking back to
+        // sample 0 on every seek.
+        if (!can_seek_to_key_sample(st, requested_sample, next_ts) && sample != requested_sample)
+            break;
+
+        timestamp = next_ts;
     }
 
     mov_current_sample_set(sc, sample);
diff --git a/libavformat/mov_chan.c b/libavformat/mov_chan.c
index e7d181d71f..5ffacf64b5 100644
--- a/libavformat/mov_chan.c
+++ b/libavformat/mov_chan.c
@@ -23,6 +23,7 @@
  * @author Justin Ruggles
  */
 
+#include <assert.h>
 #include <stdint.h>
 
 #include "libavutil/avassert.h"
@@ -74,6 +75,46 @@ enum {
     c_Haptic = AV_CHAN_NONE,
 };
 
+enum {
+    iso_L    = AV_CHAN_FRONT_LEFT,
+    iso_R    = AV_CHAN_FRONT_RIGHT,
+    iso_C    = AV_CHAN_FRONT_CENTER,
+    iso_LFE  = AV_CHAN_LOW_FREQUENCY,
+    iso_Lsr  = AV_CHAN_BACK_LEFT,
+    iso_Rsr  = AV_CHAN_BACK_RIGHT,
+    iso_Lc   = AV_CHAN_FRONT_LEFT_OF_CENTER,
+    iso_Rc   = AV_CHAN_FRONT_RIGHT_OF_CENTER,
+    iso_Cs   = AV_CHAN_BACK_CENTER,
+    /* Side and surround are not exactly the same, but in order to have
+     * consistent 5.1/7.1 layouts we map them to the side channels. */
+    iso_Ls   = AV_CHAN_SIDE_LEFT,
+    iso_Lss  = AV_CHAN_SIDE_LEFT,
+    iso_Rs   = AV_CHAN_SIDE_RIGHT,
+    iso_Rss  = AV_CHAN_SIDE_RIGHT,
+    iso_Ts   = AV_CHAN_TOP_CENTER,
+    iso_Lv   = AV_CHAN_TOP_FRONT_LEFT,
+    iso_Cv   = AV_CHAN_TOP_FRONT_CENTER,
+    iso_Rv   = AV_CHAN_TOP_FRONT_RIGHT,
+    iso_Lvr  = AV_CHAN_TOP_BACK_LEFT,
+    iso_Cvr  = AV_CHAN_TOP_BACK_CENTER,
+    iso_Rvr  = AV_CHAN_TOP_BACK_RIGHT,
+    //       = AV_CHAN_STEREO_LEFT,
+    //       = AV_CHAN_STEREO_RIGHT,
+    iso_Lw   = AV_CHAN_WIDE_LEFT,
+    iso_Rw   = AV_CHAN_WIDE_RIGHT,
+    iso_Lsd  = AV_CHAN_SURROUND_DIRECT_LEFT,
+    iso_Rsd  = AV_CHAN_SURROUND_DIRECT_RIGHT,
+    iso_LFE2 = AV_CHAN_LOW_FREQUENCY_2,
+    iso_Lvss = AV_CHAN_TOP_SIDE_LEFT,
+    iso_Rvss = AV_CHAN_TOP_SIDE_RIGHT,
+    iso_Cb   = AV_CHAN_BOTTOM_FRONT_CENTER,
+    iso_Lb   = AV_CHAN_BOTTOM_FRONT_LEFT,
+    iso_Rb   = AV_CHAN_BOTTOM_FRONT_RIGHT,
+    /* The following have no exact counterparts */
+    iso_Lvs  = AV_CHAN_NONE,
+    iso_Rvs  = AV_CHAN_NONE,
+};
+
 struct MovChannelLayoutMap {
     union {
         uint32_t tag;
@@ -81,108 +122,142 @@ struct MovChannelLayoutMap {
     };
 };
 
-#define TAG(_0)                                          {.tag = _0}
-#define ID(_0)                                           {.id = c_##_0}
-#define CHLIST(_0, ...)                                  TAG(_0), __VA_ARGS__
-#define CHLIST01(_0, _1)                                 CHLIST(_0, ID(_1))
-#define CHLIST02(_0, _1, _2)                             CHLIST(_0, ID(_1), ID(_2))
-#define CHLIST03(_0, _1, _2, _3)                         CHLIST(_0, ID(_1), ID(_2), ID(_3))
-#define CHLIST04(_0, _1, _2, _3, _4)                     CHLIST(_0, ID(_1), ID(_2), ID(_3), ID(_4))
-#define CHLIST05(_0, _1, _2, _3, _4, _5)                 CHLIST(_0, ID(_1), ID(_2), ID(_3), ID(_4), ID(_5))
-#define CHLIST06(_0, _1, _2, _3, _4, _5, _6)             CHLIST(_0, ID(_1), ID(_2), ID(_3), ID(_4), ID(_5), ID(_6))
-#define CHLIST07(_0, _1, _2, _3, _4, _5, _6, _7)         CHLIST(_0, ID(_1), ID(_2), ID(_3), ID(_4), ID(_5), ID(_6), ID(_7))
-#define CHLIST08(_0, _1, _2, _3, _4, _5, _6, _7, _8)     CHLIST(_0, ID(_1), ID(_2), ID(_3), ID(_4), ID(_5), ID(_6), ID(_7), ID(_8))
-#define CHLIST09(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9) CHLIST(_0, ID(_1), ID(_2), ID(_3), ID(_4), ID(_5), ID(_6), ID(_7), ID(_8), ID(_9))
-#define CHLIST16(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16) \
-    CHLIST(_0, ID(_1),  ID(_2),  ID(_3),  ID(_4),  ID(_5),  ID(_6), ID(_7), ID(_8), ID(_9), ID(_10), \
-               ID(_11), ID(_12), ID(_13), ID(_14), ID(_15), ID(_16))
-#define CHLIST21(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21) \
-    CHLIST(_0, ID(_1),  ID(_2),  ID(_3),  ID(_4),  ID(_5),  ID(_6),  ID(_7),  ID(_8),  ID(_9),  ID(_10), \
-               ID(_11), ID(_12), ID(_13), ID(_14), ID(_15), ID(_16), ID(_17), ID(_18), ID(_19), ID(_20), ID(_21))
-
+#define TAG(_tag, _cnt)                                    {.tag = _tag}
+#define ID(_0)                                             {.id = c_##_0}
+#define CHLIST01(_tag, _1)                                 CHLIST(_tag, 1, ID(_1))
+#define CHLIST02(_tag, _1, _2)                             CHLIST(_tag, 2, ID(_1), ID(_2))
+#define CHLIST03(_tag, _1, _2, _3)                         CHLIST(_tag, 3, ID(_1), ID(_2), ID(_3))
+#define CHLIST04(_tag, _1, _2, _3, _4)                     CHLIST(_tag, 4, ID(_1), ID(_2), ID(_3), ID(_4))
+#define CHLIST05(_tag, _1, _2, _3, _4, _5)                 CHLIST(_tag, 5, ID(_1), ID(_2), ID(_3), ID(_4), ID(_5))
+#define CHLIST06(_tag, _1, _2, _3, _4, _5, _6)             CHLIST(_tag, 6, ID(_1), ID(_2), ID(_3), ID(_4), ID(_5), ID(_6))
+#define CHLIST07(_tag, _1, _2, _3, _4, _5, _6, _7)         CHLIST(_tag, 7, ID(_1), ID(_2), ID(_3), ID(_4), ID(_5), ID(_6), ID(_7))
+#define CHLIST08(_tag, _1, _2, _3, _4, _5, _6, _7, _8)     CHLIST(_tag, 8, ID(_1), ID(_2), ID(_3), ID(_4), ID(_5), ID(_6), ID(_7), ID(_8))
+#define CHLIST09(_tag, _1, _2, _3, _4, _5, _6, _7, _8, _9) CHLIST(_tag, 9, ID(_1), ID(_2), ID(_3), ID(_4), ID(_5), ID(_6), ID(_7), ID(_8), ID(_9))
+#define CHLIST16(_tag, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16) \
+    CHLIST(_tag, 16, ID(_1),  ID(_2),  ID(_3),  ID(_4),  ID(_5),  ID(_6), ID(_7), ID(_8), ID(_9), ID(_10), \
+                     ID(_11), ID(_12), ID(_13), ID(_14), ID(_15), ID(_16))
+#define CHLIST21(_tag, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21) \
+    CHLIST(_tag, 21, ID(_1),  ID(_2),  ID(_3),  ID(_4),  ID(_5),  ID(_6),  ID(_7),  ID(_8),  ID(_9),  ID(_10), \
+                     ID(_11), ID(_12), ID(_13), ID(_14), ID(_15), ID(_16), ID(_17), ID(_18), ID(_19), ID(_20), ID(_21))
+#define CHLIST24(_tag, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24) \
+    CHLIST(_tag, 24, ID(_1),  ID(_2),  ID(_3),  ID(_4),  ID(_5),  ID(_6),  ID(_7),  ID(_8),  ID(_9),  ID(_10), \
+                     ID(_11), ID(_12), ID(_13), ID(_14), ID(_15), ID(_16), ID(_17), ID(_18), ID(_19), ID(_20), \
+                     ID(_21), ID(_22), ID(_23), ID(_24))
+
+#define MOV_CH_LAYOUT_MAP \
+    CHLIST01( MOV_CH_LAYOUT_MONO,                 C    )\
+    CHLIST02( MOV_CH_LAYOUT_STEREO,               L,   R    )\
+    CHLIST02( MOV_CH_LAYOUT_STEREOHEADPHONES,     L,   R    )\
+    CHLIST02( MOV_CH_LAYOUT_BINAURAL,             L,   R    )\
+    CHLIST02( MOV_CH_LAYOUT_MIDSIDE,              L,   R    )\
+    CHLIST02( MOV_CH_LAYOUT_XY,                   L,   R    )\
+    CHLIST02( MOV_CH_LAYOUT_MATRIXSTEREO,         Lt,  Rt   )\
+    CHLIST02( MOV_CH_LAYOUT_AC3_1_0_1,            C,   LFE  )\
+    CHLIST03( MOV_CH_LAYOUT_MPEG_3_0_A,           L,   R,   C     )\
+    CHLIST03( MOV_CH_LAYOUT_MPEG_3_0_B,           C,   L,   R     )\
+    CHLIST03( MOV_CH_LAYOUT_AC3_3_0,              L,   C,   R     )\
+    CHLIST03( MOV_CH_LAYOUT_ITU_2_1,              L,   R,   Cs    )\
+    CHLIST03( MOV_CH_LAYOUT_DVD_4,                L,   R,   LFE   )\
+    CHLIST04( MOV_CH_LAYOUT_AMBISONIC_B_FORMAT,   W,   X,   Y,    Z     )\
+    CHLIST04( MOV_CH_LAYOUT_QUADRAPHONIC,         L,   R,   Rls,  Rrs   )\
+    CHLIST04( MOV_CH_LAYOUT_MPEG_4_0_A,           L,   R,   C,    Cs    )\
+    CHLIST04( MOV_CH_LAYOUT_MPEG_4_0_B,           C,   L,   R,    Cs    )\
+    CHLIST04( MOV_CH_LAYOUT_AC3_3_1,              L,   C,   R,    Cs    )\
+    CHLIST04( MOV_CH_LAYOUT_ITU_2_2,              L,   R,   Ls,   Rs    )\
+    CHLIST04( MOV_CH_LAYOUT_DVD_5,                L,   R,   LFE,  Cs    )\
+    CHLIST04( MOV_CH_LAYOUT_AC3_2_1_1,            L,   R,   Cs,   LFE   )\
+    CHLIST04( MOV_CH_LAYOUT_DVD_10,               L,   R,   C,    LFE   )\
+    CHLIST04( MOV_CH_LAYOUT_AC3_3_0_1,            L,   C,   R,    LFE   )\
+    CHLIST04( MOV_CH_LAYOUT_DTS_3_1,              C,   L,   R,    LFE   )\
+    CHLIST05( MOV_CH_LAYOUT_PENTAGONAL,           L,   R,   Rls,  Rrs,  C     )\
+    CHLIST05( MOV_CH_LAYOUT_MPEG_5_0_A,           L,   R,   C,    Ls,   Rs    )\
+    CHLIST05( MOV_CH_LAYOUT_MPEG_5_0_B,           L,   R,   Ls,   Rs,   C     )\
+    CHLIST05( MOV_CH_LAYOUT_MPEG_5_0_C,           L,   C,   R,    Ls,   Rs    )\
+    CHLIST05( MOV_CH_LAYOUT_MPEG_5_0_D,           C,   L,   R,    Ls,   Rs    )\
+    CHLIST05( MOV_CH_LAYOUT_DVD_6,                L,   R,   LFE,  Ls,   Rs    )\
+    CHLIST05( MOV_CH_LAYOUT_DVD_18,               L,   R,   Ls,   Rs,   LFE   )\
+    CHLIST05( MOV_CH_LAYOUT_DVD_11,               L,   R,   C,    LFE,  Cs    )\
+    CHLIST05( MOV_CH_LAYOUT_AC3_3_1_1,            L,   C,   R,    Cs,   LFE   )\
+    CHLIST05( MOV_CH_LAYOUT_DTS_4_1,              C,   L,   R,    Cs,   LFE   )\
+    CHLIST06( MOV_CH_LAYOUT_HEXAGONAL,            L,   R,   Rls,  Rrs,  C,    Cs     )\
+    CHLIST06( MOV_CH_LAYOUT_DTS_6_0_C,            C,   Cs,  L,    R,    Rls,  Rrs    )\
+    CHLIST06( MOV_CH_LAYOUT_MPEG_5_1_A,           L,   R,   C,    LFE,  Ls,   Rs     )\
+    CHLIST06( MOV_CH_LAYOUT_MPEG_5_1_B,           L,   R,   Ls,   Rs,   C,    LFE    )\
+    CHLIST06( MOV_CH_LAYOUT_MPEG_5_1_C,           L,   C,   R,    Ls,   Rs,   LFE    )\
+    CHLIST06( MOV_CH_LAYOUT_MPEG_5_1_D,           C,   L,   R,    Ls,   Rs,   LFE    )\
+    CHLIST06( MOV_CH_LAYOUT_AUDIOUNIT_6_0,        L,   R,   Ls,   Rs,   C,    Cs     )\
+    CHLIST06( MOV_CH_LAYOUT_AAC_6_0,              C,   L,   R,    Ls,   Rs,   Cs     )\
+    CHLIST06( MOV_CH_LAYOUT_EAC3_6_0_A,           L,   C,   R,    Ls,   Rs,   Cs     )\
+    CHLIST06( MOV_CH_LAYOUT_DTS_6_0_A,            Lc,  Rc,  L,    R,    Ls,   Rs     )\
+    CHLIST06( MOV_CH_LAYOUT_DTS_6_0_B,            C,   L,   R,    Rls,  Rrs,  Ts     )\
+    CHLIST07( MOV_CH_LAYOUT_MPEG_6_1_A,           L,   R,   C,    LFE,  Ls,   Rs,    Cs    )\
+    CHLIST07( MOV_CH_LAYOUT_AAC_6_1,              C,   L,   R,    Ls,   Rs,   Cs,    LFE   )\
+    CHLIST07( MOV_CH_LAYOUT_EAC3_6_1_A,           L,   C,   R,    Ls,   Rs,   LFE,   Cs    )\
+    CHLIST07( MOV_CH_LAYOUT_DTS_6_1_D,            C,   L,   R,    Ls,   Rs,   LFE,   Cs    )\
+    CHLIST07( MOV_CH_LAYOUT_AUDIOUNIT_7_0,        L,   R,   Ls,   Rs,   C,    Rls,   Rrs   )\
+    CHLIST07( MOV_CH_LAYOUT_AAC_7_0,              C,   L,   R,    Ls,   Rs,   Rls,   Rrs   )\
+    CHLIST07( MOV_CH_LAYOUT_EAC3_7_0_A,           L,   C,   R,    Ls,   Rs,   Rls,   Rrs   )\
+    CHLIST07( MOV_CH_LAYOUT_AUDIOUNIT_7_0_FRONT,  L,   R,   Ls,   Rs,   C,    Lc,    Rc    )\
+    CHLIST07( MOV_CH_LAYOUT_DTS_7_0,              Lc,  C,   Rc,   L,    R,    Ls,    Rs    )\
+    CHLIST07( MOV_CH_LAYOUT_EAC3_6_1_B,           L,   C,   R,    Ls,   Rs,   LFE,   Ts    )\
+    CHLIST07( MOV_CH_LAYOUT_EAC3_6_1_C,           L,   C,   R,    Ls,   Rs,   LFE,   Vhc   )\
+    CHLIST07( MOV_CH_LAYOUT_DTS_6_1_A,            Lc,  Rc,  L,    R,    Ls,   Rs,    LFE   )\
+    CHLIST07( MOV_CH_LAYOUT_DTS_6_1_B,            C,   L,   R,    Rls,  Rrs,  Ts,    LFE   )\
+    CHLIST07( MOV_CH_LAYOUT_DTS_6_1_C,            C,   Cs,  L,    R,    Rls,  Rrs,   LFE   )\
+    CHLIST08( MOV_CH_LAYOUT_OCTAGONAL,            L,   R,   Rls,  Rrs,  C,    Cs,    Ls,   Rs    )\
+    CHLIST08( MOV_CH_LAYOUT_AAC_OCTAGONAL,        C,   L,   R,    Ls,   Rs,   Rls,   Rrs,  Cs    )\
+    CHLIST08( MOV_CH_LAYOUT_CUBE,                 L,   R,   Rls,  Rrs,  Vhl,  Vhr,   Rlt,  Rrt   )\
+    CHLIST08( MOV_CH_LAYOUT_MPEG_7_1_A,           L,   R,   C,    LFE,  Ls,   Rs,    Lc,   Rc    )\
+    CHLIST08( MOV_CH_LAYOUT_MPEG_7_1_B,           C,   Lc,  Rc,   L,    R,    Ls,    Rs,   LFE   )\
+    CHLIST08( MOV_CH_LAYOUT_EMAGIC_DEFAULT_7_1,   L,   R,   Ls,   Rs,   C,    LFE,   Lc,   Rc    )\
+    CHLIST08( MOV_CH_LAYOUT_EAC3_7_1_B,           L,   C,   R,    Ls,   Rs,   LFE,   Lc,   Rc    )\
+    CHLIST08( MOV_CH_LAYOUT_DTS_7_1,              Lc,  C,   Rc,   L,    R,    Ls,    Rs,   LFE   )\
+    CHLIST08( MOV_CH_LAYOUT_MPEG_7_1_C,           L,   R,   C,    LFE,  Ls,   Rs,    Rls,  Rrs   )\
+    CHLIST08( MOV_CH_LAYOUT_EAC3_7_1_A,           L,   C,   R,    Ls,   Rs,   LFE,   Rls,  Rrs   )\
+    CHLIST08( MOV_CH_LAYOUT_SMPTE_DTV,            L,   R,   C,    LFE,  Ls,   Rs,    Lt,   Rt    )\
+    CHLIST08( MOV_CH_LAYOUT_EAC3_7_1_C,           L,   C,   R,    Ls,   Rs,   LFE,   Lsd,  Rsd   )\
+    CHLIST08( MOV_CH_LAYOUT_EAC3_7_1_D,           L,   C,   R,    Ls,   Rs,   LFE,   Lw,   Rw    )\
+    CHLIST08( MOV_CH_LAYOUT_EAC3_7_1_E,           L,   C,   R,    Ls,   Rs,   LFE,   Vhl,  Vhr   )\
+    CHLIST08( MOV_CH_LAYOUT_EAC3_7_1_F,           L,   C,   R,    Ls,   Rs,   LFE,   Cs,   Ts    )\
+    CHLIST08( MOV_CH_LAYOUT_EAC3_7_1_G,           L,   C,   R,    Ls,   Rs,   LFE,   Cs,   Vhc   )\
+    CHLIST08( MOV_CH_LAYOUT_EAC3_7_1_H,           L,   C,   R,    Ls,   Rs,   LFE,   Ts,   Vhc   )\
+    CHLIST08( MOV_CH_LAYOUT_DTS_8_0_A,            Lc,  Rc,  L,    R,    Ls,   Rs,    Rls,  Rrs   )\
+    CHLIST08( MOV_CH_LAYOUT_DTS_8_0_B,            Lc,  C,   Rc,   L,    R,    Ls,    Cs,   Rs    )\
+    CHLIST09( MOV_CH_LAYOUT_DTS_8_1_A,            Lc,  Rc,  L,    R,    Ls,   Rs,    Rls,  Rrs,  LFE   )\
+    CHLIST09( MOV_CH_LAYOUT_DTS_8_1_B,            Lc,  C,   Rc,   L,    R,    Ls,    Cs,   Rs,   LFE   )\
+    CHLIST16( MOV_CH_LAYOUT_TMH_10_2_STD,         L,   R,   C,    Vhc,  Lsd,  Rsd,   Ls,   Rs,   Vhl,  Vhr,  Lw,  Rw,  Csd,  Cs,  LFE1,  LFE2  )\
+    CHLIST21( MOV_CH_LAYOUT_TMH_10_2_FULL,        L,   R,   C,    Vhc,  Lsd,  Rsd,   Ls,   Rs,   Vhl,  Vhr,  Lw,  Rw,  Csd,  Cs,  LFE1,  LFE2,  Lc,  Rc,  HI,  VI,  Haptic  )\
+
+#define CHLIST(_tag, _cnt, ...)    static_assert((_tag & 0xffff) == _cnt, "Channel count of " #_tag " is not " #_cnt);
+MOV_CH_LAYOUT_MAP
+#undef CHLIST
+#define CHLIST(_tag, _cnt, ...)    TAG(_tag, _cnt), __VA_ARGS__,
 static const struct MovChannelLayoutMap mov_ch_layout_map[] = {
-    CHLIST01( MOV_CH_LAYOUT_MONO,                 C ),
-    CHLIST02( MOV_CH_LAYOUT_STEREO,               L,   R   ),
-    CHLIST02( MOV_CH_LAYOUT_STEREOHEADPHONES,     L,   R   ),
-    CHLIST02( MOV_CH_LAYOUT_BINAURAL,             L,   R   ),
-    CHLIST02( MOV_CH_LAYOUT_MIDSIDE,              L,   R   ),     //C, sides
-    CHLIST02( MOV_CH_LAYOUT_XY,                   L,   R   ),     //X (left ), Y (right )
-    CHLIST02( MOV_CH_LAYOUT_MATRIXSTEREO,         Lt,  Rt  ),
-    CHLIST02( MOV_CH_LAYOUT_AC3_1_0_1,            C,   LFE ),
-    CHLIST03( MOV_CH_LAYOUT_MPEG_3_0_A,           L,   R,   C   ),
-    CHLIST03( MOV_CH_LAYOUT_MPEG_3_0_B,           C,   L,   R   ),
-    CHLIST03( MOV_CH_LAYOUT_AC3_3_0,              L,   C,   R   ),
-    CHLIST03( MOV_CH_LAYOUT_ITU_2_1,              L,   R,   Cs  ),
-    CHLIST03( MOV_CH_LAYOUT_DVD_4,                L,   R,   LFE ),
-    CHLIST04( MOV_CH_LAYOUT_AMBISONIC_B_FORMAT,   W,   X,   Y,    Z   ),
-    CHLIST04( MOV_CH_LAYOUT_QUADRAPHONIC,         L,   R,   Rls,  Rrs ),
-    CHLIST04( MOV_CH_LAYOUT_MPEG_4_0_A,           L,   R,   C,    Cs  ),
-    CHLIST04( MOV_CH_LAYOUT_MPEG_4_0_B,           C,   L,   R,    Cs  ),
-    CHLIST04( MOV_CH_LAYOUT_AC3_3_1,              L,   C,   R,    Cs  ),
-    CHLIST04( MOV_CH_LAYOUT_ITU_2_2,              L,   R,   Ls,   Rs  ),
-    CHLIST04( MOV_CH_LAYOUT_DVD_5,                L,   R,   LFE,  Cs  ),
-    CHLIST04( MOV_CH_LAYOUT_AC3_2_1_1,            L,   R,   Cs,   LFE ),
-    CHLIST04( MOV_CH_LAYOUT_DVD_10,               L,   R,   C,    LFE ),
-    CHLIST04( MOV_CH_LAYOUT_AC3_3_0_1,            L,   C,   R,    LFE ),
-    CHLIST04( MOV_CH_LAYOUT_DTS_3_1,              C,   L,   R,    LFE ),
-    CHLIST05( MOV_CH_LAYOUT_PENTAGONAL,           L,   R,   Rls,  Rrs,  C   ),
-    CHLIST05( MOV_CH_LAYOUT_MPEG_5_0_A,           L,   R,   C,    Ls,   Rs  ),
-    CHLIST05( MOV_CH_LAYOUT_MPEG_5_0_B,           L,   R,   Ls,   Rs,   C   ),
-    CHLIST05( MOV_CH_LAYOUT_MPEG_5_0_C,           L,   C,   R,    Ls,   Rs  ),
-    CHLIST05( MOV_CH_LAYOUT_MPEG_5_0_D,           C,   L,   R,    Ls,   Rs  ),
-    CHLIST05( MOV_CH_LAYOUT_DVD_6,                L,   R,   LFE,  Ls,   Rs  ),
-    CHLIST05( MOV_CH_LAYOUT_DVD_18,               L,   R,   Ls,   Rs,   LFE ),
-    CHLIST05( MOV_CH_LAYOUT_DVD_11,               L,   R,   C,    LFE,  Cs  ),
-    CHLIST05( MOV_CH_LAYOUT_AC3_3_1_1,            L,   C,   R,    Cs,   LFE ),
-    CHLIST05( MOV_CH_LAYOUT_DTS_4_1,              C,   L,   R,    Cs,   LFE ),
-    CHLIST06( MOV_CH_LAYOUT_HEXAGONAL,            L,   R,   Rls,  Rrs,  C,    Cs  ),
-    CHLIST06( MOV_CH_LAYOUT_DTS_6_0_C,            C,   Cs,  L,    R,    Rls,  Rrs ),
-    CHLIST06( MOV_CH_LAYOUT_MPEG_5_1_A,           L,   R,   C,    LFE,  Ls,   Rs  ),
-    CHLIST06( MOV_CH_LAYOUT_MPEG_5_1_B,           L,   R,   Ls,   Rs,   C,    LFE ),
-    CHLIST06( MOV_CH_LAYOUT_MPEG_5_1_C,           L,   C,   R,    Ls,   Rs,   LFE ),
-    CHLIST06( MOV_CH_LAYOUT_MPEG_5_1_D,           C,   L,   R,    Ls,   Rs,   LFE ),
-    CHLIST06( MOV_CH_LAYOUT_AUDIOUNIT_6_0,        L,   R,   Ls,   Rs,   C,    Cs  ),
-    CHLIST06( MOV_CH_LAYOUT_AAC_6_0,              C,   L,   R,    Ls,   Rs,   Cs  ),
-    CHLIST06( MOV_CH_LAYOUT_EAC3_6_0_A,           L,   C,   R,    Ls,   Rs,   Cs  ),
-    CHLIST06( MOV_CH_LAYOUT_DTS_6_0_A,            Lc,  Rc,  L,    R,    Ls,   Rs  ),
-    CHLIST06( MOV_CH_LAYOUT_DTS_6_0_B,            C,   L,   R,    Rls,  Rrs,  Ts  ),
-    CHLIST07( MOV_CH_LAYOUT_MPEG_6_1_A,           L,   R,   C,    LFE,  Ls,   Rs,    Cs  ),
-    CHLIST07( MOV_CH_LAYOUT_AAC_6_1,              C,   L,   R,    Ls,   Rs,   Cs,    LFE ),
-    CHLIST07( MOV_CH_LAYOUT_EAC3_6_1_A,           L,   C,   R,    Ls,   Rs,   LFE,   Cs  ),
-    CHLIST07( MOV_CH_LAYOUT_DTS_6_1_D,            C,   L,   R,    Ls,   Rs,   LFE,   Cs  ),
-    CHLIST07( MOV_CH_LAYOUT_AUDIOUNIT_7_0,        L,   R,   Ls,   Rs,   C,    Rls,   Rrs ),
-    CHLIST07( MOV_CH_LAYOUT_AAC_7_0,              C,   L,   R,    Ls,   Rs,   Rls,   Rrs ),
-    CHLIST07( MOV_CH_LAYOUT_EAC3_7_0_A,           L,   C,   R,    Ls,   Rs,   Rls,   Rrs ),
-    CHLIST07( MOV_CH_LAYOUT_AUDIOUNIT_7_0_FRONT,  L,   R,   Ls,   Rs,   C,    Lc,    Rc  ),
-    CHLIST07( MOV_CH_LAYOUT_DTS_7_0,              Lc,  C,   Rc,   L,    R,    Ls,    Rs  ),
-    CHLIST07( MOV_CH_LAYOUT_EAC3_6_1_B,           L,   C,   R,    Ls,   Rs,   LFE,   Ts  ),
-    CHLIST07( MOV_CH_LAYOUT_EAC3_6_1_C,           L,   C,   R,    Ls,   Rs,   LFE,   Vhc ),
-    CHLIST07( MOV_CH_LAYOUT_DTS_6_1_A,            Lc,  Rc,  L,    R,    Ls,   Rs,    LFE ),
-    CHLIST07( MOV_CH_LAYOUT_DTS_6_1_B,            C,   L,   R,    Rls,  Rrs,  Ts,    LFE ),
-    CHLIST07( MOV_CH_LAYOUT_DTS_6_1_C,            C,   Cs,  L,    R,    Rls,  Rrs,   LFE ),
-    CHLIST08( MOV_CH_LAYOUT_OCTAGONAL,            L,   R,   Rls,  Rrs,  C,    Cs,    Ls,   Rs  ),
-    CHLIST08( MOV_CH_LAYOUT_AAC_OCTAGONAL,        C,   L,   R,    Ls,   Rs,   Rls,   Rrs,  Cs  ),
-    CHLIST08( MOV_CH_LAYOUT_CUBE,                 L,   R,   Rls,  Rrs,  Vhl,  Vhr,   Rlt,  Rrt ),
-    CHLIST08( MOV_CH_LAYOUT_MPEG_7_1_A,           L,   R,   C,    LFE,  Ls,   Rs,    Lc,   Rc  ),
-    CHLIST08( MOV_CH_LAYOUT_MPEG_7_1_B,           C,   Lc,  Rc,   L,    R,    Ls,    Rs,   LFE ),
-    CHLIST08( MOV_CH_LAYOUT_EMAGIC_DEFAULT_7_1,   L,   R,   Ls,   Rs,   C,    LFE,   Lc,   Rc  ),
-    CHLIST08( MOV_CH_LAYOUT_EAC3_7_1_B,           L,   C,   R,    Ls,   Rs,   LFE,   Lc,   Rc  ),
-    CHLIST08( MOV_CH_LAYOUT_DTS_7_1,              Lc,  C,   Rc,   L,    R,    Ls,    Rs,   LFE ),
-    CHLIST08( MOV_CH_LAYOUT_MPEG_7_1_C,           L,   R,   C,    LFE,  Ls,   Rs,    Rls,  Rrs ),
-    CHLIST08( MOV_CH_LAYOUT_EAC3_7_1_A,           L,   C,   R,    Ls,   Rs,   LFE,   Rls,  Rrs ),
-    CHLIST08( MOV_CH_LAYOUT_SMPTE_DTV,            L,   R,   C,    LFE,  Ls,   Rs,    Lt,   Rt  ),
-    CHLIST08( MOV_CH_LAYOUT_EAC3_7_1_C,           L,   C,   R,    Ls,   Rs,   LFE,   Lsd,  Rsd ),
-    CHLIST08( MOV_CH_LAYOUT_EAC3_7_1_D,           L,   C,   R,    Ls,   Rs,   LFE,   Lw,   Rw  ),
-    CHLIST08( MOV_CH_LAYOUT_EAC3_7_1_E,           L,   C,   R,    Ls,   Rs,   LFE,   Vhl,  Vhr ),
-    CHLIST08( MOV_CH_LAYOUT_EAC3_7_1_F,           L,   C,   R,    Ls,   Rs,   LFE,   Cs,   Ts  ),
-    CHLIST08( MOV_CH_LAYOUT_EAC3_7_1_G,           L,   C,   R,    Ls,   Rs,   LFE,   Cs,   Vhc ),
-    CHLIST08( MOV_CH_LAYOUT_EAC3_7_1_H,           L,   C,   R,    Ls,   Rs,   LFE,   Ts,   Vhc ),
-    CHLIST08( MOV_CH_LAYOUT_DTS_8_0_A,            Lc,  Rc,  L,    R,    Ls,   Rs,    Rls,  Rrs ),
-    CHLIST08( MOV_CH_LAYOUT_DTS_8_0_B,            Lc,  C,   Rc,   L,    R,    Ls,    Cs,   Rs  ),
-    CHLIST09( MOV_CH_LAYOUT_DTS_8_1_A,            Lc,  Rc,  L,    R,    Ls,   Rs,    Rls,  Rrs,  LFE ),
-    CHLIST09( MOV_CH_LAYOUT_DTS_8_1_B,            Lc,  C,   Rc,   L,    R,    Ls,    Cs,   Rs,   LFE ),
-    CHLIST16( MOV_CH_LAYOUT_TMH_10_2_STD,         L,   R,   C,    Vhc,  Lsd,  Rsd,   Ls,   Rs,   Vhl,  Vhr,  Lw,  Rw,  Csd,  Cs,  LFE1,  LFE2),
-    CHLIST21( MOV_CH_LAYOUT_TMH_10_2_FULL,        L,   R,   C,    Vhc,  Lsd,  Rsd,   Ls,   Rs,   Vhl,  Vhr,  Lw,  Rw,  Csd,  Cs,  LFE1,  LFE2,  Lc,  Rc,  HI,  VI,  Haptic),
+    MOV_CH_LAYOUT_MAP
+    { {0} },
+};
+
+#undef ID
+#undef TAG
+#define ID(_0)            {.id = iso_##_0}
+#define TAG(_tag, _cnt)   {.tag = (_tag << 16) | _cnt}
+
+/* ISO/IEC 23001-8 */
+static const struct MovChannelLayoutMap iso_ch_layout_map[] = {
+    CHLIST01(  1,  C )
+    CHLIST02(  2,  L,   R   )
+    CHLIST03(  3,  C,   L,   R   )
+    CHLIST04(  4,  C,   L,   R,    Cs  )
+    CHLIST05(  5,  C,   L,   R,    Ls,   Rs  )
+    CHLIST06(  6,  C,   L,   R,    Ls,   Rs,   LFE )
+    CHLIST08(  7,  C,   Lc,  Rc,   L,    R,    Ls,   Rs,   LFE )
+    CHLIST03(  9,  L,   R,   Cs  )
+    CHLIST04( 10,  L,   R,   Ls,   Rs  )
+    CHLIST07( 11,  C,   L,   R,    Ls,   Rs,   Cs,   LFE )
+    CHLIST08( 12,  C,   L,   R,    Ls,   Rs,   Lsr,  Rsr,  LFE )
+    CHLIST24( 13,  C,   Lc,  Rc,   L,    R,    Lss,  Rss,  Lsr,  Rsr,  Cs,  LFE,  LFE2,  Cv,  Lv,  Rv,  Lvss,  Rvss,  Ts,  Lvr,  Rvr,  Cvr,  Cb,  Lb,  Rb)
+    CHLIST08( 14,  C,   L,   R,    Ls,   Rs,   LFE,  Lv,   Rv)
+    { {0} },
 };
 
 static const enum MovChannelLayoutTag mov_ch_layouts_aac[] = {
@@ -279,19 +354,11 @@ static const struct {
     { AV_CODEC_ID_NONE,    NULL                    },
 };
 
-static const struct MovChannelLayoutMap* find_layout_map(uint32_t tag)
+static const struct MovChannelLayoutMap* find_layout_map(uint32_t tag, const struct MovChannelLayoutMap *map)
 {
-#if defined(ASSERT_LEVEL) && ASSERT_LEVEL > 1
-    {
-        int i;
-        for (i = 0; i < FF_ARRAY_ELEMS(mov_ch_layout_map); i += 1 + (mov_ch_layout_map[i].tag & 0xffff))
-            av_assert2(mov_ch_layout_map[i].tag & 0xffff0000);
-        av_assert2(i == FF_ARRAY_ELEMS(mov_ch_layout_map));
-    }
-#endif
-    for (int i = 0; i < FF_ARRAY_ELEMS(mov_ch_layout_map); i += 1 + (mov_ch_layout_map[i].tag & 0xffff))
-        if (mov_ch_layout_map[i].tag == tag)
-            return &mov_ch_layout_map[i + 1];
+    for (int i = 0; map[i].tag & 0xffff; i += 1 + (map[i].tag & 0xffff))
+        if (map[i].tag == tag)
+            return &map[i + 1];
     return NULL;
 }
 
@@ -303,25 +370,33 @@ static const struct MovChannelLayoutMap* find_layout_map(uint32_t tag)
  * @param[in]      tag        channel layout tag
  * @return                    <0 on error
  */
-static int mov_get_channel_layout(AVChannelLayout *ch_layout, uint32_t tag)
+static int mov_get_channel_layout(AVChannelLayout *ch_layout, uint32_t tag, uint64_t omitted_channel_map, const struct MovChannelLayoutMap *map)
 {
-    int i, channels;
     const struct MovChannelLayoutMap *layout_map;
 
-    channels = tag & 0xFFFF;
-
     /* find the channel layout for the specified layout tag */
-    layout_map = find_layout_map(tag);
+    layout_map = find_layout_map(tag, map);
     if (layout_map) {
         int ret;
+        int map_layout_nb_channels = tag & 0xFFFF;
+        int nb_channels = ch_layout->nb_channels;
+
+        /* Omitted channel bits must not exceed number of channels in map */
+        if (omitted_channel_map >> map_layout_nb_channels)
+            return AVERROR_INVALIDDATA;
+
         av_channel_layout_uninit(ch_layout);
-        ret = av_channel_layout_custom_init(ch_layout, channels);
+        ret = av_channel_layout_custom_init(ch_layout, nb_channels);
         if (ret < 0)
             return ret;
-        for (i = 0; i < channels; i++) {
-            enum AVChannel id = layout_map[i].id;
-            ch_layout->u.map[i].id = (id != AV_CHAN_NONE ? id : AV_CHAN_UNKNOWN);
+
+        for (int i = 0, idx = 0; i < map_layout_nb_channels && idx < nb_channels; i++, omitted_channel_map >>= 1) {
+            if (!(omitted_channel_map & 1)) {
+                enum AVChannel id = layout_map[i].id;
+                ch_layout->u.map[idx++].id = (id != AV_CHAN_NONE ? id : AV_CHAN_UNKNOWN);
+            }
         }
+
         return av_channel_layout_retype(ch_layout, 0, AV_CHANNEL_LAYOUT_RETYPE_FLAG_CANONICAL);
     }
     return 0;
@@ -365,12 +440,34 @@ static uint32_t mov_get_channel_label(enum AVChannel channel)
     return 0;
 }
 
+static int is_layout_valid_for_tag(const AVChannelLayout *ch_layout, uint32_t tag, const struct MovChannelLayoutMap *map)
+{
+    const struct MovChannelLayoutMap *layout_map;
+    int channels = ch_layout->nb_channels;
+
+    /* get the layout map based on the channel count */
+    if ((tag & 0xFFFF) != channels)
+        return 0;
+
+    layout_map = find_layout_map(tag, map);
+    if (layout_map) {
+        int i;
+        for (i = 0; i < channels; i++) {
+            if (av_channel_layout_channel_from_index(ch_layout, i) != layout_map[i].id)
+                break;
+        }
+        if (i == channels)
+            return 1;
+    }
+    return 0;
+}
+
 int ff_mov_get_channel_layout_tag(const AVCodecParameters *par,
                                   uint32_t *layout,
                                   uint32_t *bitmap,
                                   uint32_t **pchannel_desc)
 {
-    int i, j;
+    int i;
     uint32_t tag = 0;
     const enum MovChannelLayoutTag *layouts = NULL;
 
@@ -383,26 +480,11 @@ int ff_mov_get_channel_layout_tag(const AVCodecParameters *par,
         layouts = mov_codec_ch_layouts[i].layouts;
 
     if (layouts) {
-        int channels;
-        const struct MovChannelLayoutMap *layout_map;
-
-        /* get the layout map based on the channel count */
-        channels = par->ch_layout.nb_channels;
-
         /* find the layout tag for the specified channel layout */
-        for (i = 0; layouts[i] != 0; i++) {
-            if ((layouts[i] & 0xFFFF) != channels)
-                continue;
-            layout_map = find_layout_map(layouts[i]);
-            if (layout_map) {
-                for (j = 0; j < channels; j++) {
-                    if (av_channel_layout_channel_from_index(&par->ch_layout, j) != layout_map[j].id)
-                        break;
-                }
-                if (j == channels)
-                    break;
-            }
-        }
+        for (i = 0; layouts[i] != 0; i++)
+            if (is_layout_valid_for_tag(&par->ch_layout, layouts[i], mov_ch_layout_map))
+                break;
+
         tag = layouts[i];
     }
 
@@ -509,7 +591,7 @@ int ff_mov_read_chan(AVFormatContext *s, AVIOContext *pb, AVStream *st,
         if (!ch_layout->nb_channels)
             ch_layout->nb_channels = nb_channels;
         if (nb_channels == ch_layout->nb_channels) {
-            ret = mov_get_channel_layout(ch_layout, layout_tag);
+            ret = mov_get_channel_layout(ch_layout, layout_tag, 0, mov_ch_layout_map);
             if (ret < 0)
                 return ret;
         } else {
@@ -525,60 +607,6 @@ out:
     return ret;
 }
 
-/* ISO/IEC 23001-8, 8.2 */
-static const AVChannelLayout iso_channel_configuration[] = {
-    // 0: any setup
-    {0},
-
-    // 1: centre front
-    AV_CHANNEL_LAYOUT_MONO,
-
-    // 2: left front, right front
-    AV_CHANNEL_LAYOUT_STEREO,
-
-    // 3: centre front, left front, right front
-    AV_CHANNEL_LAYOUT_SURROUND,
-
-    // 4: centre front, left front, right front, rear centre
-    AV_CHANNEL_LAYOUT_4POINT0,
-
-    // 5: centre front, left front, right front, left surround, right surround
-    AV_CHANNEL_LAYOUT_5POINT0,
-
-    // 6: 5 + LFE
-    AV_CHANNEL_LAYOUT_5POINT1,
-
-    // 7: centre front, left front centre, right front centre,
-    // left front, right front, left surround, right surround, LFE
-    AV_CHANNEL_LAYOUT_7POINT1_WIDE,
-
-    // 8: channel1, channel2
-    AV_CHANNEL_LAYOUT_STEREO_DOWNMIX,
-
-    // 9: left front, right front, rear centre
-    AV_CHANNEL_LAYOUT_2_1,
-
-    // 10: left front, right front, left surround, right surround
-    AV_CHANNEL_LAYOUT_2_2,
-
-    // 11: centre front, left front, right front, left surround, right surround, rear centre, LFE
-    AV_CHANNEL_LAYOUT_6POINT1,
-
-    // 12: centre front, left front, right front
-    // left surround, right surround
-    // rear surround left, rear surround right
-    // LFE
-    AV_CHANNEL_LAYOUT_7POINT1,
-
-    // 13:
-    AV_CHANNEL_LAYOUT_22POINT2,
-
-    // 14:
-    AV_CHANNEL_LAYOUT_7POINT1_TOP_BACK,
-
-    // TODO: 15 - 20
-};
-
 /* ISO/IEC 23001-8, table 8 */
 static const enum AVChannel iso_channel_position[] = {
     // 0: left front
@@ -714,9 +742,9 @@ int ff_mov_get_channel_config_from_layout(const AVChannelLayout *layout, int *co
 {
     // Set default value which means any setup in 23001-8
     *config = 0;
-    for (int i = 0; i < FF_ARRAY_ELEMS(iso_channel_configuration); i++) {
-        if (!av_channel_layout_compare(layout, iso_channel_configuration + i)) {
-            *config = i;
+    for (int i = 0; iso_ch_layout_map[i].tag & 0xffff; i += 1 + (iso_ch_layout_map[i].tag & 0xffff)) {
+        if (is_layout_valid_for_tag(layout, iso_ch_layout_map[i].tag, &iso_ch_layout_map[i])) {
+            *config = iso_ch_layout_map[i].tag >> 16;
             break;
         }
     }
@@ -724,14 +752,19 @@ int ff_mov_get_channel_config_from_layout(const AVChannelLayout *layout, int *co
     return 0;
 }
 
-int ff_mov_get_channel_layout_from_config(int config, AVChannelLayout *layout)
+int ff_mov_get_channel_layout_from_config(int config, AVChannelLayout *layout, uint64_t omitted_channel_map)
 {
-    if (config > 0 && config < FF_ARRAY_ELEMS(iso_channel_configuration)) {
-        av_channel_layout_copy(layout, &iso_channel_configuration[config]);
-        return 0;
-    }
+    if (config > 0) {
+        uint32_t layout_tag;
+        int nb_omitted_channels = av_popcount64(omitted_channel_map);
+
+        if (layout->nb_channels <= 0 || layout->nb_channels > UINT16_MAX - nb_omitted_channels)
+            return AVERROR_INVALIDDATA;
 
-    return -1;
+        layout_tag = (config << 16) | (layout->nb_channels + nb_omitted_channels);
+        return mov_get_channel_layout(layout, layout_tag, omitted_channel_map, iso_ch_layout_map);
+    }
+    return 1;
 }
 
 int ff_mov_get_channel_positions_from_layout(const AVChannelLayout *layout,
@@ -805,13 +838,9 @@ int ff_mov_read_chnl(AVFormatContext *s, AVIOContext *pb, AVStream *st)
                 return ret;
         } else {
             uint64_t omitted_channel_map = avio_rb64(pb);
-
-            if (omitted_channel_map) {
-                avpriv_request_sample(s, "omitted_channel_map 0x%" PRIx64 " != 0",
-                                      omitted_channel_map);
-                return AVERROR_PATCHWELCOME;
-            }
-            ff_mov_get_channel_layout_from_config(layout, &st->codecpar->ch_layout);
+            ret = ff_mov_get_channel_layout_from_config(layout, &st->codecpar->ch_layout, omitted_channel_map);
+            if (ret < 0)
+                return ret;
         }
     }
 
diff --git a/libavformat/mov_chan.h b/libavformat/mov_chan.h
index e480809c44..ea8be47196 100644
--- a/libavformat/mov_chan.h
+++ b/libavformat/mov_chan.h
@@ -172,10 +172,11 @@ int ff_mov_get_channel_config_from_layout(const AVChannelLayout *layout, int *co
 /**
  * Get AVChannelLayout from ISO/IEC 23001-8 ChannelConfiguration.
  *
- * @return 0 for success, -1 for doesn't match, layout is untouched on failure
+ * @return 1  if the config was unknown, layout is untouched in this case
+ *         0  if the config was found
+ *         <0 on error
  */
-
-int ff_mov_get_channel_layout_from_config(int config, AVChannelLayout *layout);
+int ff_mov_get_channel_layout_from_config(int config, AVChannelLayout *layout, uint64_t omitted_channel_map);
 
 /**
  * Get ISO/IEC 23001-8 OutputChannelPosition from AVChannelLayout.
diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index 15b65dcf96..f907f67752 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -1175,8 +1175,6 @@ static int get_samples_per_packet(MOVTrack *track)
 {
     int i, first_duration;
 
-// return track->par->frame_size;
-
     /* use 1 for raw PCM */
     if (!track->audio_vbr)
         return 1;
@@ -1223,6 +1221,8 @@ static int mov_write_chnl_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra
     if (ret || !config) {
         config = 0;
         speaker_pos = av_malloc(layout->nb_channels);
+        if (!speaker_pos)
+            return AVERROR(ENOMEM);
         ret = ff_mov_get_channel_positions_from_layout(layout,
                 speaker_pos, layout->nb_channels);
         if (ret) {
@@ -1244,8 +1244,7 @@ static int mov_write_chnl_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra
     if (config) {
         avio_wb64(pb, 0);
     } else {
-        for (int i = 0; i < layout->nb_channels; i++)
-            avio_w8(pb, speaker_pos[i]);
+        avio_write(pb, speaker_pos, layout->nb_channels);
         av_freep(&speaker_pos);
     }
 
@@ -2528,16 +2527,21 @@ static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex
         const AVPacketSideData *spherical_mapping = av_packet_side_data_get(track->st->codecpar->coded_side_data,
                                                                             track->st->codecpar->nb_coded_side_data,
                                                                             AV_PKT_DATA_SPHERICAL);
-        const AVPacketSideData *dovi = av_packet_side_data_get(track->st->codecpar->coded_side_data,
-                                                               track->st->codecpar->nb_coded_side_data,
-                                                               AV_PKT_DATA_DOVI_CONF);
-
         if (stereo_3d)
             mov_write_st3d_tag(s, pb, (AVStereo3D*)stereo_3d->data);
         if (spherical_mapping)
             mov_write_sv3d_tag(mov->fc, pb, (AVSphericalMapping*)spherical_mapping->data);
-        if (dovi)
+    }
+
+    if (track->mode == MODE_MP4) {
+        const AVPacketSideData *dovi = av_packet_side_data_get(track->st->codecpar->coded_side_data,
+                                                               track->st->codecpar->nb_coded_side_data,
+                                                               AV_PKT_DATA_DOVI_CONF);
+        if (dovi && mov->fc->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL) {
             mov_write_dvcc_dvvc_tag(s, pb, (AVDOVIDecoderConfigurationRecord *)dovi->data);
+        } else if (dovi) {
+            av_log(mov->fc, AV_LOG_WARNING, "Not writing 'dvcC'/'dvvC' box. Requires -strict unofficial.\n");
+        }
     }
 
     if (track->par->sample_aspect_ratio.den && track->par->sample_aspect_ratio.num) {
@@ -5521,7 +5525,7 @@ static int mov_write_ftyp_tag(AVIOContext *pb, AVFormatContext *s)
 {
     MOVMuxContext *mov = s->priv_data;
     int64_t pos = avio_tell(pb);
-    int has_h264 = 0, has_av1 = 0, has_video = 0, has_dolby = 0;
+    int has_h264 = 0, has_av1 = 0, has_video = 0, has_dolby = 0, has_id3 = 0;
     int has_iamf = 0;
 
 #if CONFIG_IAMFENC
@@ -5552,6 +5556,8 @@ static int mov_write_ftyp_tag(AVIOContext *pb, AVFormatContext *s)
                                     st->codecpar->nb_coded_side_data,
                                     AV_PKT_DATA_DOVI_CONF))
             has_dolby = 1;
+        if (st->codecpar->codec_id == AV_CODEC_ID_TIMED_ID3)
+            has_id3 = 1;
     }
 
     avio_wb32(pb, 0); /* size */
@@ -5631,6 +5637,9 @@ static int mov_write_ftyp_tag(AVIOContext *pb, AVFormatContext *s)
     if (mov->flags & FF_MOV_FLAG_DASH && mov->flags & FF_MOV_FLAG_GLOBAL_SIDX)
         ffio_wfourcc(pb, "dash");
 
+    if (has_id3)
+        ffio_wfourcc(pb, "aid3");
+
     return update_size(pb, pos);
 }
 
@@ -6664,6 +6673,7 @@ static int mov_write_subtitle_end_packet(AVFormatContext *s,
 #if CONFIG_IAMFENC
 static int mov_build_iamf_packet(AVFormatContext *s, MOVTrack *trk, AVPacket *pkt)
 {
+    uint8_t *data;
     int ret;
 
     if (pkt->stream_index == trk->first_iamf_idx) {
@@ -6677,43 +6687,65 @@ static int mov_build_iamf_packet(AVFormatContext *s, MOVTrack *trk, AVPacket *pk
     if (ret < 0)
         return ret;
 
-    if (pkt->stream_index == trk->last_iamf_idx) {
-        uint8_t *data;
-
-        ret = avio_close_dyn_buf(trk->iamf_buf, &data);
-        trk->iamf_buf = NULL;
-
-        if (!ret) {
-            if (pkt->size) {
-                // Either all or none of the packets for a single
-                // IA Sample may be empty.
-                av_log(s, AV_LOG_ERROR, "Unexpected packet from "
-                                        "stream #%d\n", pkt->stream_index);
-                ret = AVERROR_INVALIDDATA;
-            }
-            av_free(data);
-            return ret;
-        }
-        av_buffer_unref(&pkt->buf);
-        pkt->buf = av_buffer_create(data, ret, NULL, NULL, 0);
-        if (!pkt->buf) {
-            av_free(data);
-            return AVERROR(ENOMEM);
+    if (pkt->stream_index != trk->last_iamf_idx)
+        return AVERROR(EAGAIN);
+
+    ret = avio_close_dyn_buf(trk->iamf_buf, &data);
+    trk->iamf_buf = NULL;
+    if (!ret) {
+        if (pkt->size) {
+            // Either all or none of the packets for a single
+            // IA Sample may be empty.
+            av_log(s, AV_LOG_ERROR, "Unexpected packet from "
+                                     "stream #%d\n", pkt->stream_index);
+            ret = AVERROR_INVALIDDATA;
         }
-        pkt->data = data;
-        pkt->size = ret;
-        pkt->stream_index = trk->first_iamf_idx;
+        av_free(data);
+        return ret;
+    }
 
-        ret = avio_open_dyn_buf(&trk->iamf_buf);
-        if (ret < 0)
-            return ret;
-    } else
-        ret = AVERROR(EAGAIN);
+    av_buffer_unref(&pkt->buf);
+    pkt->buf = av_buffer_create(data, ret, NULL, NULL, 0);
+    if (!pkt->buf) {
+        av_free(data);
+        return AVERROR(ENOMEM);
+    }
+    pkt->data = data;
+    pkt->size = ret;
+    pkt->stream_index = trk->first_iamf_idx;
 
-    return ret;
+    return avio_open_dyn_buf(&trk->iamf_buf);
 }
 #endif
 
+static int mov_write_emsg_tag(AVIOContext *pb, AVStream *st, AVPacket *pkt)
+{
+    int64_t pos = avio_tell(pb);
+    const char *scheme_id_uri = "https://aomedia.org/emsg/ID3";
+    const char *value = "";
+
+    av_assert0(st->time_base.num == 1);
+
+    avio_write_marker(pb,
+                      av_rescale_q(pkt->pts, st->time_base, AV_TIME_BASE_Q),
+                      AVIO_DATA_MARKER_BOUNDARY_POINT);
+
+    avio_wb32(pb, 0); /* size */
+    ffio_wfourcc(pb, "emsg");
+    avio_w8(pb, 1); /* version */
+    avio_wb24(pb, 0);
+    avio_wb32(pb, st->time_base.den); /* timescale */
+    avio_wb64(pb, pkt->pts); /* presentation_time */
+    avio_wb32(pb, 0xFFFFFFFFU); /* event_duration */
+    avio_wb32(pb, 0); /* id */
+    /* null terminated UTF8 strings */
+    avio_write(pb, scheme_id_uri, strlen(scheme_id_uri) + 1);
+    avio_write(pb, value, strlen(value) + 1);
+    avio_write(pb, pkt->data, pkt->size);
+
+    return update_size(pb, pos);
+}
+
 static int mov_write_packet(AVFormatContext *s, AVPacket *pkt)
 {
     MOVMuxContext *mov = s->priv_data;
@@ -6724,6 +6756,11 @@ static int mov_write_packet(AVFormatContext *s, AVPacket *pkt)
         return 1;
     }
 
+    if (s->streams[pkt->stream_index]->codecpar->codec_id == AV_CODEC_ID_TIMED_ID3) {
+        mov_write_emsg_tag(s->pb, s->streams[pkt->stream_index], pkt);
+        return 0;
+    }
+
     trk = s->streams[pkt->stream_index]->priv_data;
 
 #if CONFIG_IAMFENC
@@ -7383,6 +7420,12 @@ static int mov_init(AVFormatContext *s)
         AVStream *st = s->streams[i];
         if (st->priv_data)
             continue;
+        // Don't produce a track in the output file for timed ID3 streams.
+        if (st->codecpar->codec_id == AV_CODEC_ID_TIMED_ID3) {
+            // Leave priv_data set to NULL for these AVStreams that don't
+            // have a corresponding track.
+            continue;
+        }
         st->priv_data = st;
         mov->nb_tracks++;
     }
@@ -7482,6 +7525,9 @@ static int mov_init(AVFormatContext *s)
         MOVTrack *track = st->priv_data;
         AVDictionaryEntry *lang = av_dict_get(st->metadata, "language", NULL,0);
 
+        if (!track)
+            continue;
+
         if (!track->st) {
             track->st  = st;
             track->par = st->codecpar;
diff --git a/libavformat/mp3dec.c b/libavformat/mp3dec.c
index ec6cf567bc..f421e03926 100644
--- a/libavformat/mp3dec.c
+++ b/libavformat/mp3dec.c
@@ -32,6 +32,7 @@
 #include "replaygain.h"
 
 #include "libavcodec/codec_id.h"
+#include "libavcodec/mpegaudio.h"
 #include "libavcodec/mpegaudiodecheader.h"
 
 #define XING_FLAG_FRAMES 0x01
@@ -400,27 +401,22 @@ static int mp3_read_header(AVFormatContext *s)
     if (ret < 0)
         return ret;
 
+    ret = ffio_ensure_seekback(s->pb, 64 * 1024 + MPA_MAX_CODED_FRAME_SIZE + 4);
+    if (ret < 0)
+        return ret;
+
     off = avio_tell(s->pb);
     for (i = 0; i < 64 * 1024; i++) {
         uint32_t header, header2;
         int frame_size;
-        if (!(i&1023))
-            ffio_ensure_seekback(s->pb, i + 1024 + 4);
         frame_size = check(s->pb, off + i, &header);
         if (frame_size > 0) {
-            ffio_ensure_seekback(s->pb, i + 1024 + frame_size + 4);
             ret = check(s->pb, off + i + frame_size, &header2);
-            if (ret >= 0 &&
-                (header & MP3_MASK) == (header2 & MP3_MASK))
-            {
+            if (ret >= 0 && (header & MP3_MASK) == (header2 & MP3_MASK))
                 break;
-            } else if (ret == CHECK_SEEK_FAILED) {
-                av_log(s, AV_LOG_ERROR, "Invalid frame size (%d): Could not seek to %"PRId64".\n", frame_size, off + i + frame_size);
-                return AVERROR(EINVAL);
-            }
         } else if (frame_size == CHECK_SEEK_FAILED) {
-            av_log(s, AV_LOG_ERROR, "Failed to read frame size: Could not seek to %"PRId64".\n", (int64_t) (i + 1024 + frame_size + 4));
-            return AVERROR(EINVAL);
+            av_log(s, AV_LOG_ERROR, "Failed to find two consecutive MPEG audio frames.\n");
+            return AVERROR_INVALIDDATA;
         }
     }
     if (i == 64 * 1024) {
diff --git a/libavformat/mpc.c b/libavformat/mpc.c
index 60cb768ab6..1e0e170c7d 100644
--- a/libavformat/mpc.c
+++ b/libavformat/mpc.c
@@ -112,7 +112,7 @@ static int mpc_read_header(AVFormatContext *s)
     if (s->pb->seekable & AVIO_SEEKABLE_NORMAL) {
         int64_t pos = avio_tell(s->pb);
         ff_ape_parse_tag(s);
-        if (!av_dict_get(s->metadata, "", NULL, AV_DICT_IGNORE_SUFFIX))
+        if (av_dict_count(s->metadata) == 0)
             ff_id3v1_read(s);
         avio_seek(s->pb, pos, SEEK_SET);
     }
diff --git a/libavformat/mxfdec.c b/libavformat/mxfdec.c
index 233d614f78..e65cec74c2 100644
--- a/libavformat/mxfdec.c
+++ b/libavformat/mxfdec.c
@@ -791,6 +791,9 @@ static int mxf_read_partition_pack(void *arg, AVIOContext *pb, int tag, int size
     partition->index_sid = avio_rb32(pb);
     partition->body_offset = avio_rb64(pb);
     partition->body_sid = avio_rb32(pb);
+    if (partition->body_offset < 0)
+        return AVERROR_INVALIDDATA;
+
     if (avio_read(pb, op, sizeof(UID)) != sizeof(UID)) {
         av_log(mxf->fc, AV_LOG_ERROR, "Failed reading UID\n");
         return AVERROR_INVALIDDATA;
diff --git a/libavformat/network.c b/libavformat/network.c
index f752efc411..6db82b6d26 100644
--- a/libavformat/network.c
+++ b/libavformat/network.c
@@ -18,8 +18,13 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "config.h"
 #include "config_components.h"
 
+#if CONFIG_TLS_PROTOCOL && CONFIG_OPENSSL
+#include <openssl/opensslv.h>
+#endif
+
 #include <fcntl.h>
 #include "network.h"
 #include "tls.h"
@@ -31,7 +36,7 @@
 int ff_tls_init(void)
 {
 #if CONFIG_TLS_PROTOCOL
-#if CONFIG_OPENSSL
+#if CONFIG_OPENSSL && OPENSSL_VERSION_NUMBER < 0x10100000L
     int ret;
     if ((ret = ff_openssl_init()) < 0)
         return ret;
@@ -46,7 +51,7 @@ int ff_tls_init(void)
 void ff_tls_deinit(void)
 {
 #if CONFIG_TLS_PROTOCOL
-#if CONFIG_OPENSSL
+#if CONFIG_OPENSSL && OPENSSL_VERSION_NUMBER < 0x10100000L
     ff_openssl_deinit();
 #endif
 #if CONFIG_GNUTLS
diff --git a/libavformat/oggenc.c b/libavformat/oggenc.c
index f5782cb583..224519a4da 100644
--- a/libavformat/oggenc.c
+++ b/libavformat/oggenc.c
@@ -432,7 +432,7 @@ static int ogg_build_vp8_headers(AVFormatContext *s, AVStream *st,
     bytestream_put_be32(&p, st->time_base.num);
 
     /* optional second packet: VorbisComment */
-    if (av_dict_get(st->metadata, "", NULL, AV_DICT_IGNORE_SUFFIX)) {
+    if (av_dict_count(st->metadata)) {
         p = ogg_write_vorbiscomment(7, bitexact, &oggstream->header_len[1], &st->metadata, 0, NULL, 0);
         if (!p)
             return AVERROR(ENOMEM);
diff --git a/libavformat/riff.c b/libavformat/riff.c
index 157976583c..306dc3b47a 100644
--- a/libavformat/riff.c
+++ b/libavformat/riff.c
@@ -506,6 +506,7 @@ const AVCodecTag ff_codec_bmp_tags[] = {
     { AV_CODEC_ID_RTV1,         MKTAG('R', 'T', 'V', '1') },
     { AV_CODEC_ID_VMIX,         MKTAG('V', 'M', 'X', '1') },
     { AV_CODEC_ID_LEAD,         MKTAG('L', 'E', 'A', 'D') },
+    { AV_CODEC_ID_EVC,          MKTAG('e', 'v', 'c', '1') },
     { AV_CODEC_ID_NONE,         0 }
 };
 
diff --git a/libavformat/rtsp.c b/libavformat/rtsp.c
index b0c61ee00a..db78735c7a 100644
--- a/libavformat/rtsp.c
+++ b/libavformat/rtsp.c
@@ -2071,6 +2071,7 @@ static int udp_read_packet(AVFormatContext *s, RTSPStream **prtsp_st,
                 if (fdsnum != 2) {
                     av_log(s, AV_LOG_ERROR,
                            "Number of fds %d not supported\n", fdsnum);
+                    av_freep(&fds);
                     return AVERROR_INVALIDDATA;
                 }
                 for (fdsidx = 0; fdsidx < fdsnum; fdsidx++) {
diff --git a/libavformat/tee.c b/libavformat/tee.c
index 1cbbb80dbb..1a2a8ead82 100644
--- a/libavformat/tee.c
+++ b/libavformat/tee.c
@@ -119,7 +119,6 @@ static int parse_slave_fifo_options(const char *fifo_options, TeeSlave *tee_slav
 static int close_slave(TeeSlave *tee_slave)
 {
     AVFormatContext *avf;
-    unsigned i;
     int ret = 0;
 
     av_dict_free(&tee_slave->fifo_options);
@@ -131,7 +130,7 @@ static int close_slave(TeeSlave *tee_slave)
         ret = av_write_trailer(avf);
 
     if (tee_slave->bsfs) {
-        for (i = 0; i < avf->nb_streams; ++i)
+        for (unsigned i = 0; i < avf->nb_streams; ++i)
             av_bsf_free(&tee_slave->bsfs[i]);
     }
     av_freep(&tee_slave->stream_map);
@@ -146,9 +145,8 @@ static int close_slave(TeeSlave *tee_slave)
 static void close_slaves(AVFormatContext *avf)
 {
     TeeContext *tee = avf->priv_data;
-    unsigned i;
 
-    for (i = 0; i < tee->nb_slaves; i++) {
+    for (unsigned i = 0; i < tee->nb_slaves; i++) {
         close_slave(&tee->slaves[i]);
     }
     av_freep(&tee->slaves);
@@ -156,14 +154,12 @@ static void close_slaves(AVFormatContext *avf)
 
 static int open_slave(AVFormatContext *avf, char *slave, TeeSlave *tee_slave)
 {
-    int i, ret;
+    int ret;
     AVDictionary *options = NULL, *bsf_options = NULL;
-    AVDictionaryEntry *entry;
+    const AVDictionaryEntry *entry;
     char *filename;
-    char *format = NULL, *select = NULL, *on_fail = NULL;
-    char *use_fifo = NULL, *fifo_options_str = NULL;
+    char *format = NULL, *select = NULL;
     AVFormatContext *avf2 = NULL;
-    AVStream *st, *st2;
     int stream_count;
     int fullret;
     char *subselect = NULL, *next_subselect = NULL, *first_subselect = NULL, *tmp_select = NULL;
@@ -172,30 +168,34 @@ static int open_slave(AVFormatContext *avf, char *slave, TeeSlave *tee_slave)
         return ret;
 
 #define CONSUME_OPTION(option, field, action) do {                      \
-        if ((entry = av_dict_get(options, option, NULL, 0))) {          \
-            field = entry->value;                                       \
+        AVDictionaryEntry *en = av_dict_get(options, option, NULL, 0);  \
+        if (en) {                                                       \
+            field = en->value;                                          \
             { action }                                                  \
             av_dict_set(&options, option, NULL, 0);                     \
         }                                                               \
     } while (0)
 #define STEAL_OPTION(option, field)                                     \
     CONSUME_OPTION(option, field,                                       \
-                   entry->value = NULL; /* prevent it from being freed */)
-#define PROCESS_OPTION(option, field, function, on_error)               \
-    CONSUME_OPTION(option, field, if ((ret = function) < 0) { { on_error } goto end; })
+                   en->value = NULL; /* prevent it from being freed */)
+#define PROCESS_OPTION(option, function, on_error) do {                 \
+        const char *value;                                              \
+        CONSUME_OPTION(option, value, if ((ret = function) < 0)         \
+                                          { { on_error } goto end; });  \
+    } while (0)
 
     STEAL_OPTION("f", format);
     STEAL_OPTION("select", select);
-    PROCESS_OPTION("onfail", on_fail,
-                   parse_slave_failure_policy_option(on_fail, tee_slave),
+    PROCESS_OPTION("onfail",
+                   parse_slave_failure_policy_option(value, tee_slave),
                    av_log(avf, AV_LOG_ERROR, "Invalid onfail option value, "
                           "valid options are 'abort' and 'ignore'\n"););
-    PROCESS_OPTION("use_fifo", use_fifo,
-                   parse_slave_fifo_policy(use_fifo, tee_slave),
+    PROCESS_OPTION("use_fifo",
+                   parse_slave_fifo_policy(value, tee_slave),
                    av_log(avf, AV_LOG_ERROR, "Error parsing fifo options: %s\n",
                           av_err2str(ret)););
-    PROCESS_OPTION("fifo_options", fifo_options_str,
-                   parse_slave_fifo_options(fifo_options_str, tee_slave), ;);
+    PROCESS_OPTION("fifo_options",
+                   parse_slave_fifo_options(value, tee_slave), ;);
     entry = NULL;
     while ((entry = av_dict_get(options, "bsfs", entry, AV_DICT_IGNORE_SUFFIX))) {
         /* trim out strlen("bsfs") characters from key */
@@ -249,8 +249,9 @@ static int open_slave(AVFormatContext *avf, char *slave, TeeSlave *tee_slave)
     }
 
     stream_count = 0;
-    for (i = 0; i < avf->nb_streams; i++) {
-        st = avf->streams[i];
+    for (unsigned i = 0; i < avf->nb_streams; i++) {
+        const AVStream *st = avf->streams[i];
+        AVStream *st2;
         if (select) {
             tmp_select = av_strdup(select);  // av_strtok is destructive so we regenerate it in each loop
             if (!tmp_select) {
@@ -312,7 +313,7 @@ static int open_slave(AVFormatContext *avf, char *slave, TeeSlave *tee_slave)
     }
 
     entry = NULL;
-    while (entry = av_dict_get(bsf_options, "", NULL, AV_DICT_IGNORE_SUFFIX)) {
+    while (entry = av_dict_iterate(bsf_options, NULL)) {
         const char *spec = entry->key;
         if (*spec) {
             if (strspn(spec, slave_bsfs_spec_sep) != 1) {
@@ -325,7 +326,7 @@ static int open_slave(AVFormatContext *avf, char *slave, TeeSlave *tee_slave)
             spec++; /* consume separator */
         }
 
-        for (i = 0; i < avf2->nb_streams; i++) {
+        for (unsigned i = 0; i < avf2->nb_streams; i++) {
             ret = avformat_match_stream_specifier(avf2, avf2->streams[i], spec);
             if (ret < 0) {
                 av_log(avf, AV_LOG_ERROR,
@@ -356,7 +357,7 @@ static int open_slave(AVFormatContext *avf, char *slave, TeeSlave *tee_slave)
         av_dict_set(&bsf_options, entry->key, NULL, 0);
     }
 
-    for (i = 0; i < avf->nb_streams; i++){
+    for (unsigned i = 0; i < avf->nb_streams; i++){
         int target_stream = tee_slave->stream_map[i];
         if (target_stream < 0)
             continue;
@@ -389,7 +390,7 @@ static int open_slave(AVFormatContext *avf, char *slave, TeeSlave *tee_slave)
 
     if (options) {
         entry = NULL;
-        while ((entry = av_dict_get(options, "", entry, AV_DICT_IGNORE_SUFFIX)))
+        while ((entry = av_dict_iterate(options, entry)))
             av_log(avf2, AV_LOG_ERROR, "Unknown option '%s'\n", entry->key);
         ret = AVERROR_OPTION_NOT_FOUND;
         goto end;
@@ -406,10 +407,9 @@ end:
 
 static void log_slave(TeeSlave *slave, void *log_ctx, int log_level)
 {
-    int i;
     av_log(log_ctx, log_level, "filename:'%s' format:%s\n",
            slave->avf->url, slave->avf->oformat->name);
-    for (i = 0; i < slave->avf->nb_streams; i++) {
+    for (unsigned i = 0; i < slave->avf->nb_streams; i++) {
         AVStream *st = slave->avf->streams[i];
         AVBSFContext *bsf = slave->bsfs[i];
         const char *bsf_name;
@@ -449,7 +449,7 @@ static int tee_process_slave_failure(AVFormatContext *avf, unsigned slave_idx, i
 static int tee_write_header(AVFormatContext *avf)
 {
     TeeContext *tee = avf->priv_data;
-    unsigned nb_slaves = 0, i;
+    unsigned nb_slaves = 0;
     const char *filename = avf->url;
     char **slaves = NULL;
     int ret;
@@ -475,7 +475,7 @@ static int tee_write_header(AVFormatContext *avf)
     }
     tee->nb_slaves = tee->nb_alive = nb_slaves;
 
-    for (i = 0; i < nb_slaves; i++) {
+    for (unsigned i = 0; i < nb_slaves; i++) {
 
         tee->slaves[i].use_fifo = tee->use_fifo;
         ret = av_dict_copy(&tee->slaves[i].fifo_options, tee->fifo_options, 0);
@@ -492,9 +492,9 @@ static int tee_write_header(AVFormatContext *avf)
         av_freep(&slaves[i]);
     }
 
-    for (i = 0; i < avf->nb_streams; i++) {
-        int j, mapped = 0;
-        for (j = 0; j < tee->nb_slaves; j++)
+    for (unsigned i = 0; i < avf->nb_streams; i++) {
+        int mapped = 0;
+        for (unsigned j = 0; j < tee->nb_slaves; j++)
             if (tee->slaves[j].avf)
                 mapped += tee->slaves[j].stream_map[i] >= 0;
         if (!mapped)
@@ -505,7 +505,7 @@ static int tee_write_header(AVFormatContext *avf)
     return 0;
 
 fail:
-    for (i = 0; i < nb_slaves; i++)
+    for (unsigned i = 0; i < nb_slaves; i++)
         av_freep(&slaves[i]);
     close_slaves(avf);
     av_free(slaves);
@@ -516,9 +516,8 @@ static int tee_write_trailer(AVFormatContext *avf)
 {
     TeeContext *tee = avf->priv_data;
     int ret_all = 0, ret;
-    unsigned i;
 
-    for (i = 0; i < tee->nb_slaves; i++) {
+    for (unsigned i = 0; i < tee->nb_slaves; i++) {
         if ((ret = close_slave(&tee->slaves[i])) < 0) {
             ret = tee_process_slave_failure(avf, i, ret);
             if (!ret_all && ret < 0)
@@ -532,15 +531,16 @@ static int tee_write_trailer(AVFormatContext *avf)
 static int tee_write_packet(AVFormatContext *avf, AVPacket *pkt)
 {
     TeeContext *tee = avf->priv_data;
-    AVFormatContext *avf2;
-    AVBSFContext *bsfs;
     AVPacket *const pkt2 = ffformatcontext(avf)->pkt;
     int ret_all = 0, ret;
-    unsigned i, s;
+    unsigned s;
     int s2;
 
-    for (i = 0; i < tee->nb_slaves; i++) {
-        if (!(avf2 = tee->slaves[i].avf))
+    for (unsigned i = 0; i < tee->nb_slaves; i++) {
+        AVFormatContext *avf2 = tee->slaves[i].avf;
+        AVBSFContext *bsfs;
+
+        if (!avf2)
             continue;
 
         /* Flush slave if pkt is NULL*/
diff --git a/libavformat/tests/movenc.c b/libavformat/tests/movenc.c
index 77f73abdfa..2fd5c67e76 100644
--- a/libavformat/tests/movenc.c
+++ b/libavformat/tests/movenc.c
@@ -58,7 +58,7 @@ struct AVMD5* md5;
 uint8_t hash[HASH_SIZE];
 
 AVPacket *pkt;
-AVStream *video_st, *audio_st;
+AVStream *video_st, *audio_st, *id3_st;
 int64_t audio_dts, video_dts;
 
 int bframes;
@@ -177,7 +177,7 @@ static void check_func(int value, int line, const char *msg, ...)
 }
 #define check(value, ...) check_func(value, __LINE__, __VA_ARGS__)
 
-static void init_fps(int bf, int audio_preroll, int fps)
+static void init_fps(int bf, int audio_preroll, int fps, int id3)
 {
     AVStream *st;
     int iobuf_size = force_iobuf_size ? force_iobuf_size : sizeof(iobuf);
@@ -215,6 +215,7 @@ static void init_fps(int bf, int audio_preroll, int fps)
     st->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
     st->codecpar->codec_id = AV_CODEC_ID_AAC;
     st->codecpar->sample_rate = 44100;
+    st->codecpar->frame_size = 1024;
     st->codecpar->ch_layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO;
     st->time_base.num = 1;
     st->time_base.den = 44100;
@@ -225,6 +226,17 @@ static void init_fps(int bf, int audio_preroll, int fps)
     memcpy(st->codecpar->extradata, aac_extradata, sizeof(aac_extradata));
     audio_st = st;
 
+    if (id3) {
+        st = avformat_new_stream(ctx, NULL);
+        if (!st)
+            exit(1);
+        st->codecpar->codec_type = AVMEDIA_TYPE_DATA;
+        st->codecpar->codec_id = AV_CODEC_ID_TIMED_ID3;
+        st->time_base.num = 1;
+        st->time_base.den = 1000;
+        id3_st = st;
+    }
+
     if (avformat_write_header(ctx, &opts) < 0)
         exit(1);
     av_dict_free(&opts);
@@ -232,9 +244,10 @@ static void init_fps(int bf, int audio_preroll, int fps)
     frames = 0;
     gop_size = 30;
     duration = video_st->time_base.den / fps;
-    audio_duration = 1024LL * audio_st->time_base.den / audio_st->codecpar->sample_rate;
+    audio_duration = (long long)audio_st->codecpar->frame_size *
+                     audio_st->time_base.den / audio_st->codecpar->sample_rate;
     if (audio_preroll)
-        audio_preroll = 2048LL * audio_st->time_base.den / audio_st->codecpar->sample_rate;
+        audio_preroll = 2 * audio_duration;
 
     bframes = bf;
     video_dts = bframes ? -duration : 0;
@@ -243,7 +256,7 @@ static void init_fps(int bf, int audio_preroll, int fps)
 
 static void init(int bf, int audio_preroll)
 {
-    init_fps(bf, audio_preroll, 30);
+    init_fps(bf, audio_preroll, 30, 0);
 }
 
 static void mux_frames(int n, int c)
@@ -314,6 +327,23 @@ static void mux_frames(int n, int c)
     }
 }
 
+static void mux_id3(void)
+{
+    uint8_t pktdata[8] = { 0 };
+    av_packet_unref(pkt);
+
+    pkt->dts = pkt->pts = av_rescale_q(video_dts + (bframes ? duration : 0),
+                                       video_st->time_base, id3_st->time_base);
+    pkt->stream_index = id3_st->index;
+    pkt->duration = 0;
+
+    AV_WB32(pktdata + 4, pkt->pts);
+    pkt->data = pktdata;
+    pkt->size = 8;
+
+    av_write_frame(ctx, pkt);
+}
+
 static void mux_gops(int n)
 {
     mux_frames(gop_size * n, 0);
@@ -442,6 +472,7 @@ int main(int argc, char **argv)
     // Similar to the previous one, but with input that doesn't start at
     // pts/dts 0. avoid_negative_ts behaves in the same way as
     // in non-empty-moov-no-elst above.
+    init_count_warnings();
     init_out("empty-moov-no-elst");
     av_dict_set(&opts, "movflags", "+frag_keyframe+empty_moov", 0);
     init(1, 0);
@@ -449,6 +480,9 @@ int main(int argc, char **argv)
     finish();
     close_out();
 
+    reset_count_warnings();
+    check(num_warnings == 0, "Unexpected warnings printed");
+
     // Same as the previous one, but disable avoid_negative_ts (which
     // would require using an edit list, but with empty_moov, one can't
     // write a sensible edit list, when the start timestamps aren't known).
@@ -702,7 +736,7 @@ int main(int argc, char **argv)
     // by the edit list.
     init_out("vfr");
     av_dict_set(&opts, "movflags", "+frag_keyframe+delay_moov+dash", 0);
-    init_fps(1, 1, 3);
+    init_fps(1, 1, 3, 0);
     mux_frames(gop_size/2, 0);
     duration /= 10;
     mux_frames(gop_size/2, 0);
@@ -721,7 +755,7 @@ int main(int argc, char **argv)
     clear_duration = 1;
     init_out("vfr-noduration");
     av_dict_set(&opts, "movflags", "+frag_keyframe+delay_moov+dash", 0);
-    init_fps(1, 1, 3);
+    init_fps(1, 1, 3, 0);
     mux_frames(gop_size/2, 0);
     duration /= 10;
     mux_frames(gop_size/2, 0);
@@ -737,7 +771,7 @@ int main(int argc, char **argv)
     force_iobuf_size = 1500;
     init_out("large_frag");
     av_dict_set(&opts, "movflags", "+frag_keyframe+delay_moov", 0);
-    init_fps(1, 1, 3);
+    init_fps(1, 1, 3, 0);
     mux_gops(2);
     finish();
     close_out();
@@ -751,7 +785,7 @@ int main(int argc, char **argv)
     init_out("vfr-noduration-interleave");
     av_dict_set(&opts, "movflags", "+frag_keyframe+delay_moov", 0);
     av_dict_set(&opts, "frag_duration", "650000", 0);
-    init_fps(1, 1, 30);
+    init_fps(1, 1, 30, 0);
     mux_frames(gop_size/2, 0);
     // Pretend that the packet duration is the normal, even if
     // we actually skip a bunch of frames. (I.e., simulate that
@@ -788,6 +822,19 @@ int main(int argc, char **argv)
     finish();
     close_out();
 
+    // Write a manually fragmented file, with timed ID3 packets at the head
+    // of each fragment.
+    init_out("emsg");
+    av_dict_set(&opts, "movflags", "+frag_custom+cmaf", 0);
+    init_fps(1, 0, 30, 1);
+    mux_id3();
+    mux_gops(2);
+    av_write_frame(ctx, NULL); // Flush fragment.
+    mux_id3();
+    mux_gops(2);
+    finish();
+    close_out();
+
     av_free(md5);
     av_packet_free(&pkt);
 
diff --git a/libavformat/tls_gnutls.c b/libavformat/tls_gnutls.c
index 2ab38a199b..df251ad79c 100644
--- a/libavformat/tls_gnutls.c
+++ b/libavformat/tls_gnutls.c
@@ -25,15 +25,12 @@
 #include <gnutls/x509.h>
 
 #include "avformat.h"
-#include "internal.h"
 #include "network.h"
 #include "os_support.h"
 #include "url.h"
 #include "tls.h"
-#include "libavcodec/internal.h"
-#include "libavutil/avstring.h"
 #include "libavutil/opt.h"
-#include "libavutil/parseutils.h"
+#include "libavutil/thread.h"
 
 #ifndef GNUTLS_VERSION_NUMBER
 #define GNUTLS_VERSION_NUMBER LIBGNUTLS_VERSION_NUMBER
@@ -41,7 +38,6 @@
 
 #if HAVE_THREADS && GNUTLS_VERSION_NUMBER <= 0x020b00
 #include <gcrypt.h>
-#include "libavutil/thread.h"
 GCRY_THREAD_OPTION_PTHREAD_IMPL;
 #endif
 
@@ -54,22 +50,24 @@ typedef struct TLSContext {
     int io_err;
 } TLSContext;
 
+static AVMutex gnutls_mutex = AV_MUTEX_INITIALIZER;
+
 void ff_gnutls_init(void)
 {
-    ff_lock_avformat();
+    ff_mutex_lock(&gnutls_mutex);
 #if HAVE_THREADS && GNUTLS_VERSION_NUMBER < 0x020b00
     if (gcry_control(GCRYCTL_ANY_INITIALIZATION_P) == 0)
         gcry_control(GCRYCTL_SET_THREAD_CBS, &gcry_threads_pthread);
 #endif
     gnutls_global_init();
-    ff_unlock_avformat();
+    ff_mutex_unlock(&gnutls_mutex);
 }
 
 void ff_gnutls_deinit(void)
 {
-    ff_lock_avformat();
+    ff_mutex_lock(&gnutls_mutex);
     gnutls_global_deinit();
-    ff_unlock_avformat();
+    ff_mutex_unlock(&gnutls_mutex);
 }
 
 static int print_tls_error(URLContext *h, int ret)
diff --git a/libavformat/tls_mbedtls.c b/libavformat/tls_mbedtls.c
index 59ed4b447a..1a182e735e 100644
--- a/libavformat/tls_mbedtls.c
+++ b/libavformat/tls_mbedtls.c
@@ -310,6 +310,8 @@ static int tls_read(URLContext *h, uint8_t *buf, int size)
     TLSContext *tls_ctx = h->priv_data;
     int ret;
 
+    tls_ctx->tls_shared.tcp->flags &= ~AVIO_FLAG_NONBLOCK;
+    tls_ctx->tls_shared.tcp->flags |= h->flags & AVIO_FLAG_NONBLOCK;
     if ((ret = mbedtls_ssl_read(&tls_ctx->ssl_context, buf, size)) > 0) {
         // return read length
         return ret;
@@ -323,6 +325,8 @@ static int tls_write(URLContext *h, const uint8_t *buf, int size)
     TLSContext *tls_ctx = h->priv_data;
     int ret;
 
+    tls_ctx->tls_shared.tcp->flags &= ~AVIO_FLAG_NONBLOCK;
+    tls_ctx->tls_shared.tcp->flags |= h->flags & AVIO_FLAG_NONBLOCK;
     if ((ret = mbedtls_ssl_write(&tls_ctx->ssl_context, buf, size)) > 0) {
         // return written length
         return ret;
diff --git a/libavformat/tls_openssl.c b/libavformat/tls_openssl.c
index b875be32f0..8b0cf9efb2 100644
--- a/libavformat/tls_openssl.c
+++ b/libavformat/tls_openssl.c
@@ -19,25 +19,16 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include "avformat.h"
-#include "internal.h"
 #include "network.h"
 #include "os_support.h"
 #include "url.h"
 #include "tls.h"
-#include "libavutil/avstring.h"
-#include "libavutil/avutil.h"
-#include "libavutil/mem.h"
 #include "libavutil/opt.h"
-#include "libavutil/parseutils.h"
-#include "libavutil/thread.h"
 
 #include <openssl/bio.h>
 #include <openssl/ssl.h>
 #include <openssl/err.h>
 
-static int openssl_init;
-
 typedef struct TLSContext {
     const AVClass *class;
     TLSShared tls_shared;
@@ -49,8 +40,22 @@ typedef struct TLSContext {
     int io_err;
 } TLSContext;
 
-#if HAVE_THREADS && OPENSSL_VERSION_NUMBER < 0x10100000L
+/* OpenSSL 1.0.2 or below, then you would use SSL_library_init. If you are
+ * using OpenSSL 1.1.0 or above, then the library will initialize
+ * itself automatically.
+ * https://wiki.openssl.org/index.php/Library_Initialization
+ */
+#if OPENSSL_VERSION_NUMBER < 0x10100000L
+#include "libavutil/thread.h"
+
+static AVMutex openssl_mutex = AV_MUTEX_INITIALIZER;
+
+static int openssl_init;
+
+#if HAVE_THREADS
 #include <openssl/crypto.h>
+#include "libavutil/mem.h"
+
 pthread_mutex_t *openssl_mutexes;
 static void openssl_lock(int mode, int type, const char *file, int line)
 {
@@ -69,23 +74,16 @@ static unsigned long openssl_thread_id(void)
 
 int ff_openssl_init(void)
 {
-    ff_lock_avformat();
+    ff_mutex_lock(&openssl_mutex);
     if (!openssl_init) {
-        /* OpenSSL 1.0.2 or below, then you would use SSL_library_init. If you are
-         * using OpenSSL 1.1.0 or above, then the library will initialize
-         * itself automatically.
-         * https://wiki.openssl.org/index.php/Library_Initialization
-         */
-#if OPENSSL_VERSION_NUMBER < 0x10100000L
         SSL_library_init();
         SSL_load_error_strings();
-#endif
-#if HAVE_THREADS && OPENSSL_VERSION_NUMBER < 0x10100000L
+#if HAVE_THREADS
         if (!CRYPTO_get_locking_callback()) {
             int i;
             openssl_mutexes = av_malloc_array(sizeof(pthread_mutex_t), CRYPTO_num_locks());
             if (!openssl_mutexes) {
-                ff_unlock_avformat();
+                ff_mutex_unlock(&openssl_mutex);
                 return AVERROR(ENOMEM);
             }
 
@@ -99,17 +97,17 @@ int ff_openssl_init(void)
 #endif
     }
     openssl_init++;
-    ff_unlock_avformat();
+    ff_mutex_unlock(&openssl_mutex);
 
     return 0;
 }
 
 void ff_openssl_deinit(void)
 {
-    ff_lock_avformat();
+    ff_mutex_lock(&openssl_mutex);
     openssl_init--;
     if (!openssl_init) {
-#if HAVE_THREADS && OPENSSL_VERSION_NUMBER < 0x10100000L
+#if HAVE_THREADS
         if (CRYPTO_get_locking_callback() == openssl_lock) {
             int i;
             CRYPTO_set_locking_callback(NULL);
@@ -119,8 +117,9 @@ void ff_openssl_deinit(void)
         }
 #endif
     }
-    ff_unlock_avformat();
+    ff_mutex_unlock(&openssl_mutex);
 }
+#endif
 
 static int print_tls_error(URLContext *h, int ret)
 {
@@ -160,7 +159,9 @@ static int tls_close(URLContext *h)
     if (c->url_bio_method)
         BIO_meth_free(c->url_bio_method);
 #endif
+#if OPENSSL_VERSION_NUMBER < 0x10100000L
     ff_openssl_deinit();
+#endif
     return 0;
 }
 
@@ -256,8 +257,10 @@ static int tls_open(URLContext *h, const char *uri, int flags, AVDictionary **op
     BIO *bio;
     int ret;
 
+#if OPENSSL_VERSION_NUMBER < 0x10100000L
     if ((ret = ff_openssl_init()) < 0)
         return ret;
+#endif
 
     if ((ret = ff_tls_open_underlying(c, h, uri, options)) < 0)
         goto fail;
diff --git a/libavformat/utils.c b/libavformat/utils.c
index 4dded7aea4..e9ded627ad 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -27,7 +27,6 @@
 #include "libavutil/bprint.h"
 #include "libavutil/internal.h"
 #include "libavutil/mem.h"
-#include "libavutil/thread.h"
 #include "libavutil/time.h"
 
 #include "libavcodec/internal.h"
@@ -40,23 +39,11 @@
 #endif
 #include "os_support.h"
 
-static AVMutex avformat_mutex = AV_MUTEX_INITIALIZER;
-
 /**
  * @file
  * various utility functions for use within FFmpeg
  */
 
-int ff_lock_avformat(void)
-{
-    return ff_mutex_lock(&avformat_mutex) ? -1 : 0;
-}
-
-int ff_unlock_avformat(void)
-{
-    return ff_mutex_unlock(&avformat_mutex) ? -1 : 0;
-}
-
 /* an arbitrarily chosen "sane" max packet size -- 50M */
 #define SANE_CHUNK_SIZE (50000000)
 
diff --git a/libavformat/version.h b/libavformat/version.h
index 7ff1483912..5310326bda 100644
--- a/libavformat/version.h
+++ b/libavformat/version.h
@@ -32,7 +32,7 @@
 #include "version_major.h"
 
 #define LIBAVFORMAT_VERSION_MINOR   3
-#define LIBAVFORMAT_VERSION_MICRO 100
+#define LIBAVFORMAT_VERSION_MICRO 103
 
 #define LIBAVFORMAT_VERSION_INT AV_VERSION_INT(LIBAVFORMAT_VERSION_MAJOR, \
                                                LIBAVFORMAT_VERSION_MINOR, \
diff --git a/libavformat/vvc.c b/libavformat/vvc.c
index 3123cd8d83..449c933881 100644
--- a/libavformat/vvc.c
+++ b/libavformat/vvc.c
@@ -21,8 +21,10 @@
  */
 
 #include "libavcodec/get_bits.h"
+#include "libavcodec/put_bits.h"
 #include "libavcodec/golomb.h"
 #include "libavcodec/vvc.h"
+#include "libavutil/avassert.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/mem.h"
 #include "avc.h"
@@ -68,32 +70,15 @@ typedef struct VVCDecoderConfigurationRecord {
     VVCCNALUnitArray *array;
 } VVCDecoderConfigurationRecord;
 
-typedef struct VVCCProfileTierLevel {
-    uint8_t profile_idc;
-    uint8_t tier_flag;
-    uint8_t general_level_idc;
-    uint8_t ptl_frame_only_constraint_flag;
-    uint8_t ptl_multilayer_enabled_flag;
-// general_constraint_info
-    uint8_t gci_present_flag;
-    uint8_t gci_general_constraints[9];
-    uint8_t gci_num_reserved_bits;
-// end general_constraint_info
-    uint8_t ptl_sublayer_level_present_flag[VVC_MAX_SUBLAYERS - 1];
-    uint8_t sublayer_level_idc[VVC_MAX_SUBLAYERS - 1];
-    uint8_t ptl_num_sub_profiles;
-    uint32_t general_sub_profile_idc[VVC_MAX_SUB_PROFILES];
-} VVCCProfileTierLevel;
-
 static void vvcc_update_ptl(VVCDecoderConfigurationRecord *vvcc,
-                            VVCCProfileTierLevel *ptl)
+                            VVCPTLRecord *ptl)
 {
     /*
      * The level indication general_level_idc must indicate a level of
      * capability equal to or greater than the highest level indicated for the
      * highest tier in all the parameter sets.
      */
-    if (vvcc->ptl.general_tier_flag < ptl->tier_flag)
+    if (vvcc->ptl.general_tier_flag < ptl->general_tier_flag)
         vvcc->ptl.general_level_idc = ptl->general_level_idc;
     else
         vvcc->ptl.general_level_idc =
@@ -104,7 +89,7 @@ static void vvcc_update_ptl(VVCDecoderConfigurationRecord *vvcc,
      * greater than the highest tier indicated in all the parameter sets.
      */
     vvcc->ptl.general_tier_flag =
-        FFMAX(vvcc->ptl.general_tier_flag, ptl->tier_flag);
+        FFMAX(vvcc->ptl.general_tier_flag, ptl->general_tier_flag);
 
     /*
      * The profile indication general_profile_idc must indicate a profile to
@@ -121,7 +106,7 @@ static void vvcc_update_ptl(VVCDecoderConfigurationRecord *vvcc,
      * Note: set the profile to the highest value for the sake of simplicity.
      */
     vvcc->ptl.general_profile_idc =
-        FFMAX(vvcc->ptl.general_profile_idc, ptl->profile_idc);
+        FFMAX(vvcc->ptl.general_profile_idc, ptl->general_profile_idc);
 
     /*
      * Each bit in flags may only be set if all
@@ -134,14 +119,13 @@ static void vvcc_update_ptl(VVCDecoderConfigurationRecord *vvcc,
     /*
      * Constraints Info
      */
-    if (ptl->gci_present_flag) {
-        vvcc->ptl.num_bytes_constraint_info = 9;
+    if (ptl->num_bytes_constraint_info) {
+        vvcc->ptl.num_bytes_constraint_info = ptl->num_bytes_constraint_info;
         memcpy(&vvcc->ptl.general_constraint_info[0],
-               &ptl->gci_general_constraints[0], sizeof(uint8_t) * 9);
-
+               &ptl->general_constraint_info[0], ptl->num_bytes_constraint_info);
     } else {
         vvcc->ptl.num_bytes_constraint_info = 1;
-        memset(&vvcc->ptl.general_constraint_info[0], 0, sizeof(uint8_t) * 9);
+        memset(&vvcc->ptl.general_constraint_info[0], 0, sizeof(vvcc->ptl.general_constraint_info));
     }
 
     /*
@@ -185,36 +169,35 @@ static void vvcc_parse_ptl(GetBitContext *gb,
                            unsigned int profileTierPresentFlag,
                            unsigned int max_sub_layers_minus1)
 {
-    VVCCProfileTierLevel general_ptl;
-    int j;
+    VVCPTLRecord general_ptl = { 0 };
 
     if (profileTierPresentFlag) {
-        general_ptl.profile_idc = get_bits(gb, 7);
-        general_ptl.tier_flag = get_bits1(gb);
+        general_ptl.general_profile_idc = get_bits(gb, 7);
+        general_ptl.general_tier_flag = get_bits1(gb);
     }
     general_ptl.general_level_idc = get_bits(gb, 8);
 
     general_ptl.ptl_frame_only_constraint_flag = get_bits1(gb);
     general_ptl.ptl_multilayer_enabled_flag = get_bits1(gb);
     if (profileTierPresentFlag) {       // parse constraint info
-        general_ptl.gci_present_flag = get_bits1(gb);
-        if (general_ptl.gci_present_flag) {
+        general_ptl.num_bytes_constraint_info = get_bits1(gb); // gci_present_flag
+        if (general_ptl.num_bytes_constraint_info) {
+            int gci_num_reserved_bits, j;
             for (j = 0; j < 8; j++)
-                general_ptl.gci_general_constraints[j] = get_bits(gb, 8);
-            general_ptl.gci_general_constraints[8] = get_bits(gb, 7);
+                general_ptl.general_constraint_info[j] = get_bits(gb, 8);
+            general_ptl.general_constraint_info[j++] = get_bits(gb, 7);
 
-            general_ptl.gci_num_reserved_bits = get_bits(gb, 8);
-            skip_bits(gb, general_ptl.gci_num_reserved_bits);
+            gci_num_reserved_bits = get_bits(gb, 8);
+            general_ptl.num_bytes_constraint_info = j;
+            skip_bits(gb, gci_num_reserved_bits);
         }
-        while (gb->index % 8 != 0)
-            skip_bits1(gb);
+        align_get_bits(gb);
     }
 
     for (int i = max_sub_layers_minus1 - 1; i >= 0; i--)
         general_ptl.ptl_sublayer_level_present_flag[i] = get_bits1(gb);
 
-    while (gb->index % 8 != 0)
-        skip_bits1(gb);
+    align_get_bits(gb);
 
     for (int i = max_sub_layers_minus1 - 1; i >= 0; i--) {
         if (general_ptl.ptl_sublayer_level_present_flag[i])
@@ -239,8 +222,6 @@ static int vvcc_parse_vps(GetBitContext *gb,
     unsigned int vps_max_sublayers_minus1;
     unsigned int vps_default_ptl_dpb_hrd_max_tid_flag;
     unsigned int vps_all_independent_layers_flag;
-    unsigned int vps_each_layer_is_an_ols_flag;
-    unsigned int vps_ols_mode_idc;
 
     unsigned int vps_pt_present_flag[VVC_MAX_PTLS];
     unsigned int vps_ptl_max_tid[VVC_MAX_PTLS];
@@ -267,6 +248,8 @@ static int vvcc_parse_vps(GetBitContext *gb,
 
     if (vps_max_layers_minus1 > 0 && vps_max_sublayers_minus1 > 0)
         vps_default_ptl_dpb_hrd_max_tid_flag = get_bits1(gb);
+    else
+        vps_default_ptl_dpb_hrd_max_tid_flag = 0;
     if (vps_max_layers_minus1 > 0)
         vps_all_independent_layers_flag = get_bits1(gb);
     else
@@ -275,10 +258,11 @@ static int vvcc_parse_vps(GetBitContext *gb,
     for (int i = 0; i <= vps_max_layers_minus1; i++) {
         skip_bits(gb, 6);    //vps_layer_id[i]
         if (i > 0 && !vps_all_independent_layers_flag) {
-            if (get_bits1(gb)) {    // vps_independent_layer_flag[i]
+            if (!get_bits1(gb)) {   // vps_independent_layer_flag[i]
                 unsigned int vps_max_tid_ref_present_flag = get_bits1(gb);
                 for (int j = 0; j < i; j++) {
-                    if (vps_max_tid_ref_present_flag && get_bits1(gb))  // vps_direct_ref_layer_flag[i][j]
+                    unsigned int vps_direct_ref_layer_flag = get_bits1(gb);
+                    if (vps_max_tid_ref_present_flag && vps_direct_ref_layer_flag)
                         skip_bits(gb, 3);                               // vps_max_tid_il_ref_pics_plus1
                 }
             }
@@ -286,11 +270,13 @@ static int vvcc_parse_vps(GetBitContext *gb,
     }
 
     if (vps_max_layers_minus1 > 0) {
+        unsigned int vps_each_layer_is_an_ols_flag;
         if (vps_all_independent_layers_flag)
             vps_each_layer_is_an_ols_flag = get_bits1(gb);
         else
             vps_each_layer_is_an_ols_flag = 0;
         if (!vps_each_layer_is_an_ols_flag) {
+            unsigned int vps_ols_mode_idc;
             if (!vps_all_independent_layers_flag)
                 vps_ols_mode_idc = get_bits(gb, 2);
             else
@@ -305,8 +291,6 @@ static int vvcc_parse_vps(GetBitContext *gb,
             }
         }
         vps_num_ptls_minus1 = get_bits(gb, 8);
-    } else {
-        vps_each_layer_is_an_ols_flag = 0;
     }
 
     for (int i = 0; i <= vps_num_ptls_minus1; i++) {
@@ -321,11 +305,11 @@ static int vvcc_parse_vps(GetBitContext *gb,
             vps_ptl_max_tid[i] = vps_max_sublayers_minus1;
     }
 
-    while (gb->index % 8 != 0)
-        skip_bits1(gb);
+    align_get_bits(gb);
 
     for (int i = 0; i <= vps_num_ptls_minus1; i++)
         vvcc_parse_ptl(gb, vvcc, vps_pt_present_flag[i], vps_ptl_max_tid[i]);
+    vvcc->ptl_present_flag = 1;
 
     /* nothing useful for vvcc past this point */
     return 0;
@@ -356,8 +340,10 @@ static int vvcc_parse_sps(GetBitContext *gb,
     vvcc->chroma_format_idc = get_bits(gb, 2);
     sps_log2_ctu_size_minus5 = get_bits(gb, 2);
 
-    if (get_bits1(gb))          // sps_ptl_dpb_hrd_params_present_flag
+    if (get_bits1(gb)) {        // sps_ptl_dpb_hrd_params_present_flag
+        vvcc->ptl_present_flag = 1;
         vvcc_parse_ptl(gb, vvcc, 1, sps_max_sublayers_minus1);
+    }
 
     skip_bits1(gb);             // sps_gdr_enabled_flag
     if (get_bits(gb, 1))        // sps_ref_pic_resampling_enabled_flag
@@ -579,10 +565,6 @@ static void vvcc_init(VVCDecoderConfigurationRecord *vvcc)
 {
     memset(vvcc, 0, sizeof(VVCDecoderConfigurationRecord));
     vvcc->lengthSizeMinusOne = 3;       // 4 bytes
-
-    vvcc->ptl.num_bytes_constraint_info = 1;
-
-    vvcc->ptl_present_flag = 1;
 }
 
 static void vvcc_close(VVCDecoderConfigurationRecord *vvcc)
@@ -603,7 +585,6 @@ static int vvcc_write(AVIOContext *pb, VVCDecoderConfigurationRecord *vvcc)
 {
     uint8_t i;
     uint16_t j, vps_count = 0, sps_count = 0, pps_count = 0;
-    unsigned char *buf = NULL;
     /*
      * It's unclear how to properly compute these fields, so
      * let's always set them to values meaning 'unspecified'.
@@ -735,6 +716,10 @@ static int vvcc_write(AVIOContext *pb, VVCDecoderConfigurationRecord *vvcc)
     avio_w8(pb, vvcc->lengthSizeMinusOne << 1 | vvcc->ptl_present_flag | 0xf8);
 
     if (vvcc->ptl_present_flag) {
+        uint8_t buf[64];
+        PutBitContext pbc;
+
+        init_put_bits(&pbc, buf, sizeof(buf));
         /*
          * unsigned int(9) ols_idx;
          * unsigned int(3) num_sublayers;
@@ -766,15 +751,14 @@ static int vvcc_write(AVIOContext *pb, VVCDecoderConfigurationRecord *vvcc)
          * unsigned int (1) ptl_frame_only_constraint_flag
          * unsigned int (1) ptl_multilayer_enabled_flag
          * unsigned int (8*num_bytes_constraint_info -2) general_constraint_info */
-        buf =
-            (unsigned char *) malloc(sizeof(unsigned char) *
-                                     vvcc->ptl.num_bytes_constraint_info);
-        *buf = vvcc->ptl.ptl_frame_only_constraint_flag << vvcc->ptl.
-            num_bytes_constraint_info * 8 - 1 | vvcc->ptl.
-            ptl_multilayer_enabled_flag << vvcc->ptl.num_bytes_constraint_info *
-            8 - 2 | *vvcc->ptl.general_constraint_info >> 2;
-        avio_write(pb, buf, vvcc->ptl.num_bytes_constraint_info);
-        free(buf);
+        put_bits(&pbc, 1, vvcc->ptl.ptl_frame_only_constraint_flag);
+        put_bits(&pbc, 1, vvcc->ptl.ptl_multilayer_enabled_flag);
+        av_assert0(vvcc->ptl.num_bytes_constraint_info);
+        if (vvcc->ptl.num_bytes_constraint_info > 1)
+            ff_copy_bits(&pbc, vvcc->ptl.general_constraint_info, (vvcc->ptl.num_bytes_constraint_info - 1) * 8);
+        put_bits(&pbc, 6, vvcc->ptl.general_constraint_info[vvcc->ptl.num_bytes_constraint_info - 1] & 0x3f);
+        flush_put_bits(&pbc);
+        avio_write(pb, buf, put_bytes_count(&pbc, 1));
 
         if (vvcc->num_sublayers > 1) {
             uint8_t ptl_sublayer_level_present_flags = 0;
diff --git a/libavformat/wvdec.c b/libavformat/wvdec.c
index b25c1eee83..e2a79957f7 100644
--- a/libavformat/wvdec.c
+++ b/libavformat/wvdec.c
@@ -268,7 +268,7 @@ static int wv_read_header(AVFormatContext *s)
     if (s->pb->seekable & AVIO_SEEKABLE_NORMAL) {
         int64_t cur = avio_tell(s->pb);
         wc->apetag_start = ff_ape_parse_tag(s);
-        if (!av_dict_get(s->metadata, "", NULL, AV_DICT_IGNORE_SUFFIX))
+        if (av_dict_count(s->metadata) == 0)
             ff_id3v1_read(s);
         avio_seek(s->pb, cur, SEEK_SET);
     }
diff --git a/libavutil/aarch64/cpu.c b/libavutil/aarch64/cpu.c
index 7a05391343..196bdaf6b0 100644
--- a/libavutil/aarch64/cpu.c
+++ b/libavutil/aarch64/cpu.c
@@ -45,22 +45,23 @@ static int detect_flags(void)
 #elif defined(__APPLE__) && HAVE_SYSCTLBYNAME
 #include <sys/sysctl.h>
 
+static int have_feature(const char *feature) {
+    uint32_t value = 0;
+    size_t size = sizeof(value);
+    if (!sysctlbyname(feature, &value, &size, NULL, 0))
+        return value;
+    return 0;
+}
+
 static int detect_flags(void)
 {
-    uint32_t value = 0;
-    size_t size;
     int flags = 0;
 
-    size = sizeof(value);
-    if (!sysctlbyname("hw.optional.arm.FEAT_DotProd", &value, &size, NULL, 0)) {
-        if (value)
-            flags |= AV_CPU_FLAG_DOTPROD;
-    }
-    size = sizeof(value);
-    if (!sysctlbyname("hw.optional.arm.FEAT_I8MM", &value, &size, NULL, 0)) {
-        if (value)
-            flags |= AV_CPU_FLAG_I8MM;
-    }
+    if (have_feature("hw.optional.arm.FEAT_DotProd"))
+        flags |= AV_CPU_FLAG_DOTPROD;
+    if (have_feature("hw.optional.arm.FEAT_I8MM"))
+        flags |= AV_CPU_FLAG_I8MM;
+
     return flags;
 }
 
diff --git a/libavutil/base64.c b/libavutil/base64.c
index 3e66f4fcbe..69e11e6f5e 100644
--- a/libavutil/base64.c
+++ b/libavutil/base64.c
@@ -127,10 +127,12 @@ validity_check:
     }
 
 out3:
-    *dst++ = v >> 10;
+    if (end - dst)
+        *dst++ = v >> 10;
     v <<= 2;
 out2:
-    *dst++ = v >> 4;
+    if (end - dst)
+        *dst++ = v >> 4;
 out1:
 out0:
     return bits & 1 ? AVERROR_INVALIDDATA : out ? dst - out : 0;
diff --git a/libavutil/cpu.c b/libavutil/cpu.c
index d4f947360a..9ac2f01c20 100644
--- a/libavutil/cpu.c
+++ b/libavutil/cpu.c
@@ -192,6 +192,8 @@ int av_parse_cpu_caps(unsigned *flags, const char *s)
         { "zve64d",   NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RVV_F64  },    .unit = "flags" },
         { "zba",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RVB_ADDR },    .unit = "flags" },
         { "zbb",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RVB_BASIC },   .unit = "flags" },
+        { "zvbb",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RV_ZVBB },   .unit = "flags" },
+        { "misaligned", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RV_MISALIGNED },   .unit = "flags" },
 #endif
         { NULL },
     };
diff --git a/libavutil/cpu.h b/libavutil/cpu.h
index 8dff341886..a25901433e 100644
--- a/libavutil/cpu.h
+++ b/libavutil/cpu.h
@@ -90,6 +90,8 @@
 #define AV_CPU_FLAG_RVV_F64      (1 << 6) ///< Vectors of double's
 #define AV_CPU_FLAG_RVB_BASIC    (1 << 7) ///< Basic bit-manipulations
 #define AV_CPU_FLAG_RVB_ADDR     (1 << 8) ///< Address bit-manipulations
+#define AV_CPU_FLAG_RV_ZVBB      (1 << 9) ///< Vector basic bit-manipulations
+#define AV_CPU_FLAG_RV_MISALIGNED (1 <<10) ///< Fast misaligned accesses
 
 /**
  * Return the flags which specify extensions supported by the CPU.
diff --git a/libavutil/error.c b/libavutil/error.c
index 938a8bc000..90bab7b9d3 100644
--- a/libavutil/error.c
+++ b/libavutil/error.c
@@ -61,6 +61,7 @@ static const struct error_entry error_entries[] = {
     { ERROR_TAG(HTTP_UNAUTHORIZED),  "Server returned 401 Unauthorized (authorization failed)" },
     { ERROR_TAG(HTTP_FORBIDDEN),     "Server returned 403 Forbidden (access denied)" },
     { ERROR_TAG(HTTP_NOT_FOUND),     "Server returned 404 Not Found"           },
+    { ERROR_TAG(HTTP_TOO_MANY_REQUESTS), "Server returned 429 Too Many Requests"      },
     { ERROR_TAG(HTTP_OTHER_4XX),     "Server returned 4XX Client Error, but not one of 40{0,1,3,4}" },
     { ERROR_TAG(HTTP_SERVER_ERROR),  "Server returned 5XX Server Error reply" },
 #if !HAVE_STRERROR_R
diff --git a/libavutil/error.h b/libavutil/error.h
index 0d3269aa6d..1efa86c4c1 100644
--- a/libavutil/error.h
+++ b/libavutil/error.h
@@ -79,6 +79,7 @@
 #define AVERROR_HTTP_UNAUTHORIZED  FFERRTAG(0xF8,'4','0','1')
 #define AVERROR_HTTP_FORBIDDEN     FFERRTAG(0xF8,'4','0','3')
 #define AVERROR_HTTP_NOT_FOUND     FFERRTAG(0xF8,'4','0','4')
+#define AVERROR_HTTP_TOO_MANY_REQUESTS FFERRTAG(0xF8,'4','2','9')
 #define AVERROR_HTTP_OTHER_4XX     FFERRTAG(0xF8,'4','X','X')
 #define AVERROR_HTTP_SERVER_ERROR  FFERRTAG(0xF8,'5','X','X')
 
diff --git a/libavutil/frame.c b/libavutil/frame.c
index 930f01dc19..0775e2abd9 100644
--- a/libavutil/frame.c
+++ b/libavutil/frame.c
@@ -804,15 +804,37 @@ AVFrameSideData *av_frame_new_side_data(AVFrame *frame,
     return ret;
 }
 
+static AVFrameSideData *replace_side_data_from_buf(AVFrameSideData *dst,
+                                                   AVBufferRef *buf, int flags)
+{
+    if (!(flags & AV_FRAME_SIDE_DATA_FLAG_REPLACE))
+        return NULL;
+
+    av_dict_free(&dst->metadata);
+    av_buffer_unref(&dst->buf);
+    dst->buf  = buf;
+    dst->data = buf->data;
+    dst->size = buf->size;
+    return dst;
+}
+
 AVFrameSideData *av_frame_side_data_new(AVFrameSideData ***sd, int *nb_sd,
                                         enum AVFrameSideDataType type,
                                         size_t size, unsigned int flags)
 {
+    const AVSideDataDescriptor *desc = av_frame_side_data_desc(type);
     AVBufferRef     *buf = av_buffer_alloc(size);
     AVFrameSideData *ret = NULL;
 
     if (flags & AV_FRAME_SIDE_DATA_FLAG_UNIQUE)
         remove_side_data(sd, nb_sd, type);
+    if ((!desc || !(desc->props & AV_SIDE_DATA_PROP_MULTI)) &&
+        (ret = (AVFrameSideData *)av_frame_side_data_get(*sd, *nb_sd, type))) {
+        ret = replace_side_data_from_buf(ret, buf, flags);
+        if (!ret)
+            av_buffer_unref(&buf);
+        return ret;
+    }
 
     ret = add_side_data_from_buf(sd, nb_sd, type, buf);
     if (!ret)
@@ -821,9 +843,36 @@ AVFrameSideData *av_frame_side_data_new(AVFrameSideData ***sd, int *nb_sd,
     return ret;
 }
 
+AVFrameSideData *av_frame_side_data_add(AVFrameSideData ***sd, int *nb_sd,
+                                        enum AVFrameSideDataType type,
+                                        AVBufferRef **pbuf, unsigned int flags)
+{
+    const AVSideDataDescriptor *desc = av_frame_side_data_desc(type);
+    AVFrameSideData *sd_dst  = NULL;
+    AVBufferRef *buf = *pbuf;
+
+    if (flags & AV_FRAME_SIDE_DATA_FLAG_UNIQUE)
+        remove_side_data(sd, nb_sd, type);
+    if ((!desc || !(desc->props & AV_SIDE_DATA_PROP_MULTI)) &&
+        (sd_dst = (AVFrameSideData *)av_frame_side_data_get(*sd, *nb_sd, type))) {
+        sd_dst = replace_side_data_from_buf(sd_dst, buf, flags);
+        if (sd_dst)
+            *pbuf = NULL;
+        return sd_dst;
+    }
+
+    sd_dst = add_side_data_from_buf(sd, nb_sd, type, buf);
+    if (!sd_dst)
+        return NULL;
+
+    *pbuf = NULL;
+    return sd_dst;
+}
+
 int av_frame_side_data_clone(AVFrameSideData ***sd, int *nb_sd,
                              const AVFrameSideData *src, unsigned int flags)
 {
+    const AVSideDataDescriptor *desc;
     AVBufferRef     *buf    = NULL;
     AVFrameSideData *sd_dst = NULL;
     int              ret    = AVERROR_BUG;
@@ -831,13 +880,37 @@ int av_frame_side_data_clone(AVFrameSideData ***sd, int *nb_sd,
     if (!sd || !src || !nb_sd || (*nb_sd && !*sd))
         return AVERROR(EINVAL);
 
+    desc = av_frame_side_data_desc(src->type);
+    if (flags & AV_FRAME_SIDE_DATA_FLAG_UNIQUE)
+        remove_side_data(sd, nb_sd, src->type);
+    if ((!desc || !(desc->props & AV_SIDE_DATA_PROP_MULTI)) &&
+        (sd_dst = (AVFrameSideData *)av_frame_side_data_get(*sd, *nb_sd, src->type))) {
+        AVDictionary *dict = NULL;
+
+        if (!(flags & AV_FRAME_SIDE_DATA_FLAG_REPLACE))
+            return AVERROR(EEXIST);
+
+        ret = av_dict_copy(&dict, src->metadata, 0);
+        if (ret < 0)
+            return ret;
+
+        ret = av_buffer_replace(&sd_dst->buf, src->buf);
+        if (ret < 0) {
+            av_dict_free(&dict);
+            return ret;
+        }
+
+        av_dict_free(&sd_dst->metadata);
+        sd_dst->metadata = dict;
+        sd_dst->data     = src->data;
+        sd_dst->size     = src->size;
+        return 0;
+    }
+
     buf = av_buffer_ref(src->buf);
     if (!buf)
         return AVERROR(ENOMEM);
 
-    if (flags & AV_FRAME_SIDE_DATA_FLAG_UNIQUE)
-        remove_side_data(sd, nb_sd, src->type);
-
     sd_dst = add_side_data_from_buf_ext(sd, nb_sd, src->type, buf,
                                         src->data, src->size);
     if (!sd_dst) {
@@ -865,6 +938,12 @@ const AVFrameSideData *av_frame_side_data_get_c(const AVFrameSideData * const *s
     return NULL;
 }
 
+void av_frame_side_data_remove(AVFrameSideData ***sd, int *nb_sd,
+                               enum AVFrameSideDataType type)
+{
+    remove_side_data(sd, nb_sd, type);
+}
+
 AVFrameSideData *av_frame_get_side_data(const AVFrame *frame,
                                         enum AVFrameSideDataType type)
 {
diff --git a/libavutil/frame.h b/libavutil/frame.h
index 3b6d746a16..60bb966f8b 100644
--- a/libavutil/frame.h
+++ b/libavutil/frame.h
@@ -369,8 +369,7 @@ typedef struct AVRegionOfInterest {
  * to the end with a minor bump.
  *
  * Fields can be accessed through AVOptions, the name string used, matches the
- * C structure field name for fields accessible through AVOptions. The AVClass
- * for AVFrame can be obtained from avcodec_get_frame_class()
+ * C structure field name for fields accessible through AVOptions.
  */
 typedef struct AVFrame {
 #define AV_NUM_DATA_POINTERS 8
@@ -1040,7 +1039,15 @@ const AVSideDataDescriptor *av_frame_side_data_desc(enum AVFrameSideDataType typ
  */
 void av_frame_side_data_free(AVFrameSideData ***sd, int *nb_sd);
 
+/**
+ * Remove existing entries before adding new ones.
+ */
 #define AV_FRAME_SIDE_DATA_FLAG_UNIQUE (1 << 0)
+/**
+ * Don't add a new entry if another of the same type exists.
+ * Applies only for side data types without the AV_SIDE_DATA_PROP_MULTI prop.
+ */
+#define AV_FRAME_SIDE_DATA_FLAG_REPLACE (1 << 1)
 
 /**
  * Add new side data entry to an array.
@@ -1053,16 +1060,44 @@ void av_frame_side_data_free(AVFrameSideData ***sd, int *nb_sd);
  * @param size  size of the side data
  * @param flags Some combination of AV_FRAME_SIDE_DATA_FLAG_* flags, or 0.
  *
- * @return newly added side data on success, NULL on error. In case of
- *         AV_FRAME_SIDE_DATA_FLAG_UNIQUE being set, entries of matching
- *         AVFrameSideDataType will be removed before the addition is
- *         attempted.
+ * @return newly added side data on success, NULL on error.
+ * @note In case of AV_FRAME_SIDE_DATA_FLAG_UNIQUE being set, entries of
+ *       matching AVFrameSideDataType will be removed before the addition
+ *       is attempted.
+ * @note In case of AV_FRAME_SIDE_DATA_FLAG_REPLACE being set, if an
+ *       entry of the same type already exists, it will be replaced instead.
  */
 AVFrameSideData *av_frame_side_data_new(AVFrameSideData ***sd, int *nb_sd,
                                         enum AVFrameSideDataType type,
                                         size_t size, unsigned int flags);
 
 /**
+ * Add a new side data entry to an array from an existing AVBufferRef.
+ *
+ * @param sd    pointer to array of side data to which to add another entry,
+ *              or to NULL in order to start a new array.
+ * @param nb_sd pointer to an integer containing the number of entries in
+ *              the array.
+ * @param type  type of the added side data
+ * @param buf   Pointer to AVBufferRef to add to the array. On success,
+ *              the function takes ownership of the AVBufferRef and *buf is
+ *              set to NULL, unless AV_FRAME_SIDE_DATA_FLAG_NEW_REF is set
+ *              in which case the ownership will remain with the caller.
+ * @param flags Some combination of AV_FRAME_SIDE_DATA_FLAG_* flags, or 0.
+ *
+ * @return newly added side data on success, NULL on error.
+ * @note In case of AV_FRAME_SIDE_DATA_FLAG_UNIQUE being set, entries of
+ *       matching AVFrameSideDataType will be removed before the addition
+ *       is attempted.
+ * @note In case of AV_FRAME_SIDE_DATA_FLAG_REPLACE being set, if an
+ *       entry of the same type already exists, it will be replaced instead.
+ *
+ */
+AVFrameSideData *av_frame_side_data_add(AVFrameSideData ***sd, int *nb_sd,
+                                        enum AVFrameSideDataType type,
+                                        AVBufferRef **buf, unsigned int flags);
+
+/**
  * Add a new side data entry to an array based on existing side data, taking
  * a reference towards the contained AVBufferRef.
  *
@@ -1074,10 +1109,12 @@ AVFrameSideData *av_frame_side_data_new(AVFrameSideData ***sd, int *nb_sd,
  *              for the buffer.
  * @param flags Some combination of AV_FRAME_SIDE_DATA_FLAG_* flags, or 0.
  *
- * @return negative error code on failure, >=0 on success. In case of
- *         AV_FRAME_SIDE_DATA_FLAG_UNIQUE being set, entries of matching
- *         AVFrameSideDataType will be removed before the addition is
- *         attempted.
+ * @return negative error code on failure, >=0 on success.
+ * @note In case of AV_FRAME_SIDE_DATA_FLAG_UNIQUE being set, entries of
+ *       matching AVFrameSideDataType will be removed before the addition
+ *       is attempted.
+ * @note In case of AV_FRAME_SIDE_DATA_FLAG_REPLACE being set, if an
+ *       entry of the same type already exists, it will be replaced instead.
  */
 int av_frame_side_data_clone(AVFrameSideData ***sd, int *nb_sd,
                              const AVFrameSideData *src, unsigned int flags);
@@ -1112,6 +1149,11 @@ const AVFrameSideData *av_frame_side_data_get(AVFrameSideData * const *sd,
 }
 
 /**
+ * Remove and free all side data instances of the given type from an array.
+ */
+void av_frame_side_data_remove(AVFrameSideData ***sd, int *nb_sd,
+                               enum AVFrameSideDataType type);
+/**
  * @}
  */
 
diff --git a/libavutil/hwcontext_qsv.c b/libavutil/hwcontext_qsv.c
index c7c7878644..f3d919daea 100644
--- a/libavutil/hwcontext_qsv.c
+++ b/libavutil/hwcontext_qsv.c
@@ -36,6 +36,7 @@
 #include "hwcontext_d3d11va.h"
 #endif
 #if CONFIG_DXVA2
+#include <initguid.h>
 #include "hwcontext_dxva2.h"
 #endif
 
@@ -118,8 +119,15 @@ typedef struct QSVFramesContext {
 #endif
     AVFrame realigned_upload_frame;
     AVFrame realigned_download_frame;
+
+    mfxFrameInfo frame_info;
 } QSVFramesContext;
 
+typedef struct QSVSurface {
+    mfxFrameSurface1 mfx_surface;
+    AVFrame *child_frame;
+} QSVSurface;
+
 static const struct {
     enum AVPixelFormat pix_fmt;
     uint32_t           fourcc;
@@ -165,6 +173,8 @@ extern int ff_qsv_get_surface_base_handle(mfxFrameSurface1 *surf,
                                           enum AVHWDeviceType base_dev_type,
                                           void **base_handle);
 
+static int qsv_init_surface(AVHWFramesContext *ctx, mfxFrameSurface1 *surf);
+
 /**
  * Caller needs to allocate enough space for base_handle pointer.
  **/
@@ -373,7 +383,32 @@ static void qsv_pool_release_dummy(void *opaque, uint8_t *data)
 {
 }
 
-static AVBufferRef *qsv_pool_alloc(void *opaque, size_t size)
+static void qsv_pool_release(void *opaque, uint8_t *data)
+{
+    AVHWFramesContext *ctx = (AVHWFramesContext*)opaque;
+    QSVFramesContext *s = ctx->hwctx;
+    QSVSurface *qsv_surface = (QSVSurface *)data;
+    mfxHDLPair *hdl_pair = (mfxHDLPair *)qsv_surface->mfx_surface.Data.MemId;
+    AVHWFramesContext *child_frames_ctx;
+
+    if (!s->child_frames_ref)
+        return;
+
+    child_frames_ctx = (AVHWFramesContext*)s->child_frames_ref->data;
+    if (!child_frames_ctx->device_ctx)
+        return;
+
+#if CONFIG_VAAPI
+    if (child_frames_ctx->device_ctx->type == AV_HWDEVICE_TYPE_VAAPI)
+        av_freep(&hdl_pair->first);
+#endif
+
+    av_freep(&hdl_pair);
+    av_frame_free(&qsv_surface->child_frame);
+    av_freep(&qsv_surface);
+}
+
+static AVBufferRef *qsv_fixed_pool_alloc(void *opaque, size_t size)
 {
     AVHWFramesContext    *ctx = (AVHWFramesContext*)opaque;
     QSVFramesContext       *s = ctx->hwctx;
@@ -388,6 +423,104 @@ static AVBufferRef *qsv_pool_alloc(void *opaque, size_t size)
     return NULL;
 }
 
+static AVBufferRef *qsv_dynamic_pool_alloc(void *opaque, size_t size)
+{
+    AVHWFramesContext    *ctx = (AVHWFramesContext*)opaque;
+    QSVFramesContext       *s = ctx->hwctx;
+    AVHWFramesContext *child_frames_ctx;
+    QSVSurface *qsv_surface = NULL;
+    mfxHDLPair *handle_pairs_internal = NULL;
+    int ret;
+
+    if (!s->child_frames_ref)
+        goto fail;
+
+    child_frames_ctx = (AVHWFramesContext*)s->child_frames_ref->data;
+    if (!child_frames_ctx->device_ctx)
+        goto fail;
+
+#if CONFIG_DXVA2
+    if (child_frames_ctx->device_ctx->type == AV_HWDEVICE_TYPE_DXVA2) {
+        av_log(ctx, AV_LOG_ERROR,
+               "QSV on dxva2 requires a fixed frame pool size\n");
+        goto fail;
+    }
+#endif
+
+    qsv_surface = av_calloc(1, sizeof(*qsv_surface));
+    if (!qsv_surface)
+        goto fail;
+
+    qsv_surface->child_frame = av_frame_alloc();
+    if (!qsv_surface->child_frame)
+        goto fail;
+
+    ret = av_hwframe_get_buffer(s->child_frames_ref, qsv_surface->child_frame, 0);
+    if (ret < 0)
+        goto fail;
+
+    handle_pairs_internal = av_calloc(1, sizeof(*handle_pairs_internal));
+    if (!handle_pairs_internal)
+        goto fail;
+
+    ret = qsv_init_surface(ctx, &qsv_surface->mfx_surface);
+    if (ret < 0)
+        goto fail;
+
+#if CONFIG_VAAPI
+    if (child_frames_ctx->device_ctx->type == AV_HWDEVICE_TYPE_VAAPI) {
+        VASurfaceID *surface_id_internal;
+
+        surface_id_internal = av_calloc(1, sizeof(*surface_id_internal));
+        if (!surface_id_internal)
+            goto fail;
+
+        *surface_id_internal = (VASurfaceID)(uintptr_t)qsv_surface->child_frame->data[3];
+        handle_pairs_internal->first = (mfxHDL)surface_id_internal;
+        handle_pairs_internal->second = (mfxMemId)MFX_INFINITE;
+    }
+#endif
+
+#if CONFIG_D3D11VA
+    if (child_frames_ctx->device_ctx->type == AV_HWDEVICE_TYPE_D3D11VA) {
+        AVD3D11VAFramesContext *child_frames_hwctx = child_frames_ctx->hwctx;
+        handle_pairs_internal->first = (mfxMemId)qsv_surface->child_frame->data[0];
+
+        if (child_frames_hwctx->BindFlags & D3D11_BIND_RENDER_TARGET)
+            handle_pairs_internal->second = (mfxMemId)MFX_INFINITE;
+        else
+            handle_pairs_internal->second = (mfxMemId)qsv_surface->child_frame->data[1];
+
+    }
+#endif
+
+    qsv_surface->mfx_surface.Data.MemId = (mfxMemId)handle_pairs_internal;
+    return av_buffer_create((uint8_t *)qsv_surface, sizeof(*qsv_surface),
+                            qsv_pool_release, ctx, 0);
+
+fail:
+    if (qsv_surface) {
+        av_frame_free(&qsv_surface->child_frame);
+    }
+
+    av_freep(&qsv_surface);
+    av_freep(&handle_pairs_internal);
+
+    return NULL;
+}
+
+static AVBufferRef *qsv_pool_alloc(void *opaque, size_t size)
+{
+    AVHWFramesContext    *ctx = (AVHWFramesContext*)opaque;
+    AVQSVFramesContext *hwctx = ctx->hwctx;
+
+    if (hwctx->nb_surfaces == 0) {
+        return qsv_dynamic_pool_alloc(opaque, size);
+    } else {
+        return qsv_fixed_pool_alloc(opaque, size);
+    }
+}
+
 static int qsv_init_child_ctx(AVHWFramesContext *ctx)
 {
     QSVDeviceContext *device_priv = ctx->device_ctx->hwctx;
@@ -576,9 +709,28 @@ static int qsv_init_pool(AVHWFramesContext *ctx, uint32_t fourcc)
 
     int i, ret = 0;
 
-    if (ctx->initial_pool_size <= 0) {
-        av_log(ctx, AV_LOG_ERROR, "QSV requires a fixed frame pool size\n");
+    if (ctx->initial_pool_size < 0) {
+        av_log(ctx, AV_LOG_ERROR, "Invalid frame pool size\n");
         return AVERROR(EINVAL);
+    } else if (ctx->initial_pool_size == 0) {
+        mfxFrameSurface1 mfx_surf1;
+
+        ret = qsv_init_child_ctx(ctx);
+        if (ret < 0)
+            return ret;
+
+        ffhwframesctx(ctx)->pool_internal = av_buffer_pool_init2(sizeof(mfxFrameSurface1),
+                                                                 ctx, qsv_pool_alloc, NULL);
+        if (!ffhwframesctx(ctx)->pool_internal)
+            return AVERROR(ENOMEM);
+
+        memset(&mfx_surf1, 0, sizeof(mfx_surf1));
+        qsv_init_surface(ctx, &mfx_surf1);
+        s->frame_info = mfx_surf1.Info;
+        frames_hwctx->info = &s->frame_info;
+        frames_hwctx->nb_surfaces = 0;
+
+        return 0;
     }
 
     s->handle_pairs_internal = av_calloc(ctx->initial_pool_size,
@@ -627,7 +779,7 @@ static mfxStatus frame_alloc(mfxHDL pthis, mfxFrameAllocRequest *req,
     QSVFramesContext       *s = ctx->hwctx;
     AVQSVFramesContext *hwctx = &s->p;
     mfxFrameInfo *i  = &req->Info;
-    mfxFrameInfo *i1 = &hwctx->surfaces[0].Info;
+    mfxFrameInfo *i1 = hwctx->nb_surfaces ? &hwctx->surfaces[0].Info : hwctx->info;
 
     if (!(req->Type & MFX_MEMTYPE_VIDEO_MEMORY_PROCESSOR_TARGET) ||
         !(req->Type & (MFX_MEMTYPE_FROM_VPPIN | MFX_MEMTYPE_FROM_VPPOUT)) ||
@@ -753,9 +905,11 @@ static int qsv_d3d9_update_config(void *ctx, mfxHDL handle, mfxConfig cfg)
 #if CONFIG_DXVA2
     mfxStatus sts;
     IDirect3DDeviceManager9* devmgr = handle;
-    IDirect3DDevice9Ex *device = NULL;
+    IDirect3DDevice9 *device = NULL;
+    IDirect3DDevice9Ex *device_ex = NULL;
     HANDLE device_handle = 0;
     IDirect3D9Ex *d3d9ex = NULL;
+    IDirect3D9 *d3d9 = NULL;
     LUID luid;
     D3DDEVICE_CREATION_PARAMETERS params;
     HRESULT hr;
@@ -773,18 +927,31 @@ static int qsv_d3d9_update_config(void *ctx, mfxHDL handle, mfxConfig cfg)
         IDirect3DDeviceManager9_CloseDeviceHandle(devmgr, device_handle);
         goto fail;
     }
+    hr = IDirect3DDevice9_QueryInterface(device, &IID_IDirect3DDevice9Ex, (void **)&device_ex);
+    IDirect3DDevice9_Release(device);
+    if (FAILED(hr)) {
+        av_log(ctx, AV_LOG_ERROR, "Error IDirect3DDevice9_QueryInterface %d\n", hr);
+        goto unlock;
+    }
 
-    hr = IDirect3DDevice9Ex_GetCreationParameters(device, &params);
+    hr = IDirect3DDevice9Ex_GetCreationParameters(device_ex, &params);
     if (FAILED(hr)) {
         av_log(ctx, AV_LOG_ERROR, "Error IDirect3DDevice9_GetCreationParameters %d\n", hr);
-        IDirect3DDevice9Ex_Release(device);
+        IDirect3DDevice9Ex_Release(device_ex);
         goto unlock;
     }
 
-    hr = IDirect3DDevice9Ex_GetDirect3D(device, &d3d9ex);
+    hr = IDirect3DDevice9Ex_GetDirect3D(device_ex, &d3d9);
     if (FAILED(hr)) {
-        av_log(ctx, AV_LOG_ERROR, "Error IDirect3DDevice9Ex_GetAdapterLUID %d\n", hr);
-        IDirect3DDevice9Ex_Release(device);
+        av_log(ctx, AV_LOG_ERROR, "Error IDirect3DDevice9Ex_GetDirect3D %d\n", hr);
+        IDirect3DDevice9Ex_Release(device_ex);
+        goto unlock;
+    }
+    hr = IDirect3D9_QueryInterface(d3d9, &IID_IDirect3D9Ex, (void **)&d3d9ex);
+    IDirect3D9_Release(d3d9);
+    if (FAILED(hr)) {
+        av_log(ctx, AV_LOG_ERROR, "Error IDirect3D9_QueryInterface3D %d\n", hr);
+        IDirect3DDevice9Ex_Release(device_ex);
         goto unlock;
     }
 
@@ -808,7 +975,7 @@ static int qsv_d3d9_update_config(void *ctx, mfxHDL handle, mfxConfig cfg)
 
 release:
     IDirect3D9Ex_Release(d3d9ex);
-    IDirect3DDevice9Ex_Release(device);
+    IDirect3DDevice9Ex_Release(device_ex);
 
 unlock:
     IDirect3DDeviceManager9_UnlockDevice(devmgr, device_handle, FALSE);
@@ -1207,7 +1374,7 @@ static int qsv_init_internal_session(AVHWFramesContext *ctx,
                               MFX_IOPATTERN_OUT_SYSTEM_MEMORY;
     par.AsyncDepth = 1;
 
-    par.vpp.In = frames_hwctx->surfaces[0].Info;
+    par.vpp.In = frames_hwctx->nb_surfaces ? frames_hwctx->surfaces[0].Info : *frames_hwctx->info;
 
     /* Apparently VPP requires the frame rate to be set to some value, otherwise
      * init will fail (probably for the framerate conversion filter). Since we
@@ -1368,8 +1535,9 @@ static int qsv_frames_derive_from(AVHWFramesContext *dst_ctx,
     case AV_HWDEVICE_TYPE_D3D11VA:
         {
             D3D11_TEXTURE2D_DESC texDesc;
+            AVD3D11VAFramesContext *dst_hwctx;
             dst_ctx->initial_pool_size = src_ctx->initial_pool_size;
-            AVD3D11VAFramesContext *dst_hwctx = dst_ctx->hwctx;
+            dst_hwctx = dst_ctx->hwctx;
             dst_hwctx->texture_infos = av_calloc(src_hwctx->nb_surfaces,
                                                  sizeof(*dst_hwctx->texture_infos));
             if (!dst_hwctx->texture_infos)
@@ -1821,19 +1989,53 @@ static int qsv_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst,
     return 0;
 }
 
-static int qsv_frames_derive_to(AVHWFramesContext *dst_ctx,
-                                AVHWFramesContext *src_ctx, int flags)
+static int qsv_dynamic_frames_derive_to(AVHWFramesContext *dst_ctx,
+                                        AVHWFramesContext *src_ctx, int flags)
 {
     QSVFramesContext *s = dst_ctx->hwctx;
     AVQSVFramesContext *dst_hwctx = &s->p;
-    int i;
+    mfxFrameSurface1 mfx_surf1;
 
-    if (src_ctx->initial_pool_size == 0) {
-        av_log(dst_ctx, AV_LOG_ERROR, "Only fixed-size pools can be "
-            "mapped to QSV frames.\n");
-        return AVERROR(EINVAL);
+    switch (src_ctx->device_ctx->type) {
+#if CONFIG_VAAPI
+    case AV_HWDEVICE_TYPE_VAAPI:
+        dst_hwctx->frame_type  = MFX_MEMTYPE_VIDEO_MEMORY_DECODER_TARGET;
+        break;
+#endif
+
+#if CONFIG_D3D11VA
+    case AV_HWDEVICE_TYPE_D3D11VA:
+    {
+        AVD3D11VAFramesContext *src_hwctx = src_ctx->hwctx;
+
+        if (src_hwctx->BindFlags & D3D11_BIND_RENDER_TARGET) {
+            dst_hwctx->frame_type |= MFX_MEMTYPE_VIDEO_MEMORY_PROCESSOR_TARGET;
+        } else {
+            dst_hwctx->frame_type |= MFX_MEMTYPE_VIDEO_MEMORY_DECODER_TARGET;
+        }
+    }
+    break;
+#endif
+
+    default:
+        return AVERROR(ENOSYS);
     }
 
+    memset(&mfx_surf1, 0, sizeof(mfx_surf1));
+    qsv_init_surface(dst_ctx, &mfx_surf1);
+    s->frame_info = mfx_surf1.Info;
+    dst_hwctx->info = &s->frame_info;
+    dst_hwctx->nb_surfaces = 0;
+    return 0;
+}
+
+static int qsv_fixed_frames_derive_to(AVHWFramesContext *dst_ctx,
+                                      AVHWFramesContext *src_ctx, int flags)
+{
+    QSVFramesContext *s = dst_ctx->hwctx;
+    AVQSVFramesContext *dst_hwctx = &s->p;
+    int i;
+
     switch (src_ctx->device_ctx->type) {
 #if CONFIG_VAAPI
     case AV_HWDEVICE_TYPE_VAAPI:
@@ -1924,8 +2126,21 @@ static int qsv_frames_derive_to(AVHWFramesContext *dst_ctx,
     return 0;
 }
 
-static int qsv_map_to(AVHWFramesContext *dst_ctx,
-                      AVFrame *dst, const AVFrame *src, int flags)
+static int qsv_frames_derive_to(AVHWFramesContext *dst_ctx,
+                                AVHWFramesContext *src_ctx, int flags)
+{
+    if (src_ctx->initial_pool_size < 0) {
+        av_log(dst_ctx, AV_LOG_ERROR, "Invalid src frame pool. \n");
+        return AVERROR(EINVAL);
+    } else if (src_ctx->initial_pool_size == 0) {
+        return qsv_dynamic_frames_derive_to(dst_ctx, src_ctx, flags);
+    } else {
+        return qsv_fixed_frames_derive_to(dst_ctx, src_ctx, flags);
+    }
+}
+
+static int qsv_fixed_pool_map_to(AVHWFramesContext *dst_ctx,
+                                 AVFrame *dst, const AVFrame *src, int flags)
 {
     AVQSVFramesContext *hwctx = dst_ctx->hwctx;
     int i, err, index = -1;
@@ -1984,6 +2199,133 @@ static int qsv_map_to(AVHWFramesContext *dst_ctx,
     return 0;
 }
 
+static void qsv_dynamic_pool_unmap(AVHWFramesContext *ctx, HWMapDescriptor *hwmap)
+{
+    mfxFrameSurface1 *surfaces_internal = (mfxFrameSurface1 *)hwmap->priv;
+    mfxHDLPair *handle_pairs_internal = (mfxHDLPair *)surfaces_internal->Data.MemId;
+    AVHWFramesContext *src_ctx = (AVHWFramesContext *)ffhwframesctx(ctx)->source_frames->data;
+
+    switch (src_ctx->format) {
+#if CONFIG_VAAPI
+    case AV_PIX_FMT_VAAPI:
+    {
+        av_freep(&handle_pairs_internal->first);
+
+        break;
+    }
+#endif
+
+#if CONFIG_D3D11VA
+    case AV_PIX_FMT_D3D11:
+    {
+        /* Do nothing */
+        break;
+    }
+#endif
+    default:
+        av_log(ctx, AV_LOG_ERROR, "Should not reach here. \n");
+        break;
+    }
+
+    av_freep(&handle_pairs_internal);
+    av_freep(&surfaces_internal);
+}
+
+static int qsv_dynamic_pool_map_to(AVHWFramesContext *dst_ctx,
+                                   AVFrame *dst, const AVFrame *src, int flags)
+{
+    mfxFrameSurface1 *surfaces_internal = NULL;
+    mfxHDLPair *handle_pairs_internal = NULL;
+    int ret = 0;
+
+    surfaces_internal = av_calloc(1, sizeof(*surfaces_internal));
+    if (!surfaces_internal) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    handle_pairs_internal = av_calloc(1, sizeof(*handle_pairs_internal));
+    if (!handle_pairs_internal) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    ret = qsv_init_surface(dst_ctx, surfaces_internal);
+    if (ret < 0)
+        goto fail;
+
+    switch (src->format) {
+#if CONFIG_VAAPI
+    case AV_PIX_FMT_VAAPI:
+    {
+        VASurfaceID *surface_id_internal;
+
+        surface_id_internal = av_calloc(1, sizeof(*surface_id_internal));
+        if (!surface_id_internal) {
+            ret =AVERROR(ENOMEM);
+            goto fail;
+        }
+
+        *surface_id_internal = (VASurfaceID)(uintptr_t)src->data[3];
+        handle_pairs_internal->first = (mfxHDL)surface_id_internal;
+        handle_pairs_internal->second = (mfxMemId)MFX_INFINITE;
+
+        break;
+    }
+#endif
+
+#if CONFIG_D3D11VA
+    case AV_PIX_FMT_D3D11:
+    {
+        AVHWFramesContext *src_ctx = (AVHWFramesContext*)src->hw_frames_ctx->data;
+        AVD3D11VAFramesContext *src_hwctx = src_ctx->hwctx;
+
+        handle_pairs_internal->first = (mfxMemId)src->data[0];
+
+        if (src_hwctx->BindFlags & D3D11_BIND_RENDER_TARGET) {
+            handle_pairs_internal->second = (mfxMemId)MFX_INFINITE;
+        } else {
+            handle_pairs_internal->second = (mfxMemId)src->data[1];
+        }
+
+        break;
+    }
+#endif
+    default:
+        ret = AVERROR(ENOSYS);
+        goto fail;
+    }
+
+    surfaces_internal->Data.MemId = (mfxMemId)handle_pairs_internal;
+
+    ret = ff_hwframe_map_create(dst->hw_frames_ctx,
+                                dst, src, qsv_dynamic_pool_unmap, surfaces_internal);
+    if (ret)
+        goto fail;
+
+    dst->width   = src->width;
+    dst->height  = src->height;
+    dst->data[3] = (uint8_t*)surfaces_internal;
+
+    return 0;
+
+fail:
+    av_freep(&handle_pairs_internal);
+    av_freep(&surfaces_internal);
+    return ret;
+}
+
+static int qsv_map_to(AVHWFramesContext *dst_ctx,
+                      AVFrame *dst, const AVFrame *src, int flags)
+{
+    AVQSVFramesContext *hwctx = dst_ctx->hwctx;
+
+    if (hwctx->nb_surfaces)
+        return qsv_fixed_pool_map_to(dst_ctx, dst, src, flags);
+    else
+        return qsv_dynamic_pool_map_to(dst_ctx, dst, src, flags);
+}
+
 static int qsv_frames_get_constraints(AVHWDeviceContext *ctx,
                                       const void *hwconfig,
                                       AVHWFramesConstraints *constraints)
diff --git a/libavutil/hwcontext_qsv.h b/libavutil/hwcontext_qsv.h
index e2dba8ad83..35530e4e93 100644
--- a/libavutil/hwcontext_qsv.h
+++ b/libavutil/hwcontext_qsv.h
@@ -25,8 +25,8 @@
  * @file
  * An API-specific header for AV_HWDEVICE_TYPE_QSV.
  *
- * This API does not support dynamic frame pools. AVHWFramesContext.pool must
- * contain AVBufferRefs whose data pointer points to an mfxFrameSurface1 struct.
+ * AVHWFramesContext.pool must contain AVBufferRefs whose data pointer points
+ * to a mfxFrameSurface1 struct.
  */
 
 /**
@@ -51,13 +51,36 @@ typedef struct AVQSVDeviceContext {
  * This struct is allocated as AVHWFramesContext.hwctx
  */
 typedef struct AVQSVFramesContext {
+    /**
+     * A pointer to a mfxFrameSurface1 struct
+     *
+     * It is available when nb_surfaces is non-zero.
+     */
     mfxFrameSurface1 *surfaces;
+
+    /**
+     * Number of frames in the pool
+     *
+     * It is 0 for dynamic frame pools or AVHWFramesContext.initial_pool_size
+     * for fixed frame pools.
+     *
+     * Note only oneVPL GPU runtime 2.9+ can support dynamic frame pools
+     * on d3d11va or vaapi
+     */
     int            nb_surfaces;
 
     /**
      * A combination of MFX_MEMTYPE_* describing the frame pool.
      */
     int frame_type;
+
+    /**
+     * A pointer to a mfxFrameInfo struct
+     *
+     * It is available when nb_surfaces is 0, all buffers allocated from the
+     * pool have the same mfxFrameInfo.
+     */
+    mfxFrameInfo *info;
 } AVQSVFramesContext;
 
 #endif /* AVUTIL_HWCONTEXT_QSV_H */
diff --git a/libavutil/hwcontext_vaapi.c b/libavutil/hwcontext_vaapi.c
index 56d03aa4cd..4cb25dd032 100644
--- a/libavutil/hwcontext_vaapi.c
+++ b/libavutil/hwcontext_vaapi.c
@@ -809,6 +809,9 @@ static int vaapi_map_frame(AVHWFramesContext *hwfc,
     VAStatus vas;
     void *address = NULL;
     int err, i;
+#if VA_CHECK_VERSION(1, 21, 0)
+    uint32_t vaflags = 0;
+#endif
 
     surface_id = (VASurfaceID)(uintptr_t)src->data[3];
     av_log(hwfc, AV_LOG_DEBUG, "Map surface %#x.\n", surface_id);
@@ -892,7 +895,16 @@ static int vaapi_map_frame(AVHWFramesContext *hwfc,
         }
     }
 
+#if VA_CHECK_VERSION(1, 21, 0)
+    if (flags & AV_HWFRAME_MAP_READ)
+        vaflags |= VA_MAPBUFFER_FLAG_READ;
+    if (flags & AV_HWFRAME_MAP_WRITE)
+        vaflags |= VA_MAPBUFFER_FLAG_WRITE;
+    // On drivers not implementing vaMapBuffer2 libva calls vaMapBuffer instead.
+    vas = vaMapBuffer2(hwctx->display, map->image.buf, &address, vaflags);
+#else
     vas = vaMapBuffer(hwctx->display, map->image.buf, &address);
+#endif
     if (vas != VA_STATUS_SUCCESS) {
         av_log(hwfc, AV_LOG_ERROR, "Failed to map image from surface "
                "%#x: %d (%s).\n", surface_id, vas, vaErrorStr(vas));
diff --git a/libavutil/iamf.c b/libavutil/iamf.c
index 84bed5a45e..791954d951 100644
--- a/libavutil/iamf.c
+++ b/libavutil/iamf.c
@@ -64,17 +64,17 @@ child_type *av_iamf_ ## parent_name ## _add_ ## child_name(parent_type *parent_n
 //
 #define OFFSET(x) offsetof(AVIAMFMixGain, x)
 static const AVOption mix_gain_options[] = {
-    { "subblock_duration", "set subblock_duration", OFFSET(subblock_duration), AV_OPT_TYPE_INT, {.i64 = 1 }, 1, UINT_MAX, FLAGS },
-    { "animation_type", "set animation_type", OFFSET(animation_type), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 2, FLAGS },
-    { "start_point_value", "set start_point_value", OFFSET(animation_type), AV_OPT_TYPE_RATIONAL, {.dbl = 0 }, -128.0, 128.0, FLAGS },
-    { "end_point_value", "set end_point_value", OFFSET(animation_type), AV_OPT_TYPE_RATIONAL, {.dbl = 0 }, -128.0, 128.0, FLAGS },
-    { "control_point_value", "set control_point_value", OFFSET(animation_type), AV_OPT_TYPE_RATIONAL, {.dbl = 0 }, -128.0, 128.0, FLAGS },
-    { "control_point_relative_time", "set control_point_relative_time", OFFSET(animation_type), AV_OPT_TYPE_RATIONAL, {.dbl = 0 }, 0.0, 1.0, FLAGS },
+    { "subblock_duration", "set subblock_duration", OFFSET(subblock_duration), AV_OPT_TYPE_UINT, {.i64 = 1 }, 1, UINT_MAX, FLAGS },
+    { "animation_type", "set animation_type", OFFSET(animation_type), AV_OPT_TYPE_UINT, {.i64 = 0 }, 0, 2, FLAGS },
+    { "start_point_value", "set start_point_value", OFFSET(start_point_value), AV_OPT_TYPE_RATIONAL, {.dbl = 0 }, -128.0, 128.0, FLAGS },
+    { "end_point_value", "set end_point_value", OFFSET(end_point_value), AV_OPT_TYPE_RATIONAL, {.dbl = 0 }, -128.0, 128.0, FLAGS },
+    { "control_point_value", "set control_point_value", OFFSET(control_point_value), AV_OPT_TYPE_RATIONAL, {.dbl = 0 }, -128.0, 128.0, FLAGS },
+    { "control_point_relative_time", "set control_point_relative_time", OFFSET(control_point_relative_time), AV_OPT_TYPE_RATIONAL, {.dbl = 0 }, 0.0, 1.0, FLAGS },
     { NULL },
 };
 
 static const AVClass mix_gain_class = {
-    .class_name     = "AVIAMFSubmixElement",
+    .class_name     = "AVIAMFMixGain",
     .item_name      = av_default_item_name,
     .version        = LIBAVUTIL_VERSION_INT,
     .option         = mix_gain_options,
@@ -83,8 +83,8 @@ static const AVClass mix_gain_class = {
 #undef OFFSET
 #define OFFSET(x) offsetof(AVIAMFDemixingInfo, x)
 static const AVOption demixing_info_options[] = {
-    { "subblock_duration", "set subblock_duration", OFFSET(subblock_duration), AV_OPT_TYPE_INT, {.i64 = 1 }, 1, UINT_MAX, FLAGS },
-    { "dmixp_mode", "set dmixp_mode", OFFSET(dmixp_mode), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 6, FLAGS },
+    { "subblock_duration", "set subblock_duration", OFFSET(subblock_duration), AV_OPT_TYPE_UINT, {.i64 = 1 }, 1, UINT_MAX, FLAGS },
+    { "dmixp_mode", "set dmixp_mode", OFFSET(dmixp_mode), AV_OPT_TYPE_UINT, {.i64 = 0 }, 0, 6, FLAGS },
     { NULL },
 };
 
@@ -98,7 +98,7 @@ static const AVClass demixing_info_class = {
 #undef OFFSET
 #define OFFSET(x) offsetof(AVIAMFReconGain, x)
 static const AVOption recon_gain_options[] = {
-    { "subblock_duration", "set subblock_duration", OFFSET(subblock_duration), AV_OPT_TYPE_INT, {.i64 = 1 }, 1, UINT_MAX, FLAGS },
+    { "subblock_duration", "set subblock_duration", OFFSET(subblock_duration), AV_OPT_TYPE_UINT, {.i64 = 1 }, 1, UINT_MAX, FLAGS },
     { NULL },
 };
 
@@ -112,10 +112,10 @@ static const AVClass recon_gain_class = {
 #undef OFFSET
 #define OFFSET(x) offsetof(AVIAMFParamDefinition, x)
 static const AVOption param_definition_options[] = {
-    { "parameter_id", "set parameter_id", OFFSET(parameter_id), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, UINT_MAX, FLAGS },
-    { "parameter_rate", "set parameter_rate", OFFSET(parameter_rate), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, UINT_MAX, FLAGS },
-    { "duration", "set duration", OFFSET(duration), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, UINT_MAX, FLAGS },
-    { "constant_subblock_duration", "set constant_subblock_duration", OFFSET(constant_subblock_duration), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, UINT_MAX, FLAGS },
+    { "parameter_id", "set parameter_id", OFFSET(parameter_id), AV_OPT_TYPE_UINT, {.i64 = 0 }, 0, UINT_MAX, FLAGS },
+    { "parameter_rate", "set parameter_rate", OFFSET(parameter_rate), AV_OPT_TYPE_UINT, {.i64 = 0 }, 0, UINT_MAX, FLAGS },
+    { "duration", "set duration", OFFSET(duration), AV_OPT_TYPE_UINT, {.i64 = 0 }, 0, UINT_MAX, FLAGS },
+    { "constant_subblock_duration", "set constant_subblock_duration", OFFSET(constant_subblock_duration), AV_OPT_TYPE_UINT, {.i64 = 0 }, 0, UINT_MAX, FLAGS },
     { NULL },
 };
 
@@ -289,7 +289,7 @@ static const AVOption audio_element_options[] = {
                    { .i64 = AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL }, .unit = "audio_element_type" },
         { "scene",   NULL, 0, AV_OPT_TYPE_CONST,
                    { .i64 = AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE },   .unit = "audio_element_type" },
-    { "default_w", "set default_w", OFFSET(default_w), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 10, FLAGS },
+    { "default_w", "set default_w", OFFSET(default_w), AV_OPT_TYPE_UINT, {.i64 = 0 }, 0, 10, FLAGS },
     { NULL },
 };
 
diff --git a/libavutil/mastering_display_metadata.c b/libavutil/mastering_display_metadata.c
index 6069347617..ea41f13f9d 100644
--- a/libavutil/mastering_display_metadata.c
+++ b/libavutil/mastering_display_metadata.c
@@ -18,6 +18,7 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include <stddef.h>
 #include <stdint.h>
 #include <string.h>
 
@@ -29,6 +30,18 @@ AVMasteringDisplayMetadata *av_mastering_display_metadata_alloc(void)
     return av_mallocz(sizeof(AVMasteringDisplayMetadata));
 }
 
+AVMasteringDisplayMetadata *av_mastering_display_metadata_alloc_size(size_t *size)
+{
+    AVMasteringDisplayMetadata *mastering = av_mallocz(sizeof(AVMasteringDisplayMetadata));
+    if (!mastering)
+        return NULL;
+
+    if (size)
+        *size = sizeof(*mastering);
+
+    return mastering;
+}
+
 AVMasteringDisplayMetadata *av_mastering_display_metadata_create_side_data(AVFrame *frame)
 {
     AVFrameSideData *side_data = av_frame_new_side_data(frame,
diff --git a/libavutil/mastering_display_metadata.h b/libavutil/mastering_display_metadata.h
index c23b07c3cd..52fcef9e37 100644
--- a/libavutil/mastering_display_metadata.h
+++ b/libavutil/mastering_display_metadata.h
@@ -78,6 +78,15 @@ typedef struct AVMasteringDisplayMetadata {
 AVMasteringDisplayMetadata *av_mastering_display_metadata_alloc(void);
 
 /**
+ * Allocate an AVMasteringDisplayMetadata structure and set its fields to
+ * default values. The resulting struct can be freed using av_freep().
+ *
+ * @return An AVMasteringDisplayMetadata filled with default values or NULL
+ *         on failure.
+ */
+AVMasteringDisplayMetadata *av_mastering_display_metadata_alloc_size(size_t *size);
+
+/**
  * Allocate a complete AVMasteringDisplayMetadata and add it to the frame.
  *
  * @param frame The frame which side data is added to.
diff --git a/libavutil/opt.c b/libavutil/opt.c
index d11e9d2ac5..32a9e059e3 100644
--- a/libavutil/opt.c
+++ b/libavutil/opt.c
@@ -62,6 +62,7 @@ static const size_t opt_elem_size[] = {
     [AV_OPT_TYPE_FLAGS]         = sizeof(unsigned),
     [AV_OPT_TYPE_INT]           = sizeof(int),
     [AV_OPT_TYPE_INT64]         = sizeof(int64_t),
+    [AV_OPT_TYPE_UINT]          = sizeof(unsigned),
     [AV_OPT_TYPE_UINT64]        = sizeof(uint64_t),
     [AV_OPT_TYPE_DOUBLE]        = sizeof(double),
     [AV_OPT_TYPE_FLOAT]         = sizeof(float),
@@ -166,6 +167,9 @@ static int read_number(const AVOption *o, const void *dst, double *num, int *den
     case AV_OPT_TYPE_INT:
         *intnum = *(int *)dst;
         return 0;
+    case AV_OPT_TYPE_UINT:
+        *intnum = *(unsigned int *)dst;
+        return 0;
     case AV_OPT_TYPE_DURATION:
     case AV_OPT_TYPE_INT64:
     case AV_OPT_TYPE_UINT64:
@@ -219,6 +223,7 @@ static int write_number(void *obj, const AVOption *o, void *dst, double num, int
     case AV_OPT_TYPE_BOOL:
     case AV_OPT_TYPE_FLAGS:
     case AV_OPT_TYPE_INT:
+    case AV_OPT_TYPE_UINT:
         *(int *)dst = llrint(num / den) * intnum;
         break;
     case AV_OPT_TYPE_DURATION:
@@ -319,6 +324,7 @@ static int set_string(void *obj, const AVOption *o, const char *val, uint8_t **d
                               opt->type == AV_OPT_TYPE_UINT64 || \
                               opt->type == AV_OPT_TYPE_CONST || \
                               opt->type == AV_OPT_TYPE_FLAGS || \
+                              opt->type == AV_OPT_TYPE_UINT  || \
                               opt->type == AV_OPT_TYPE_INT)     \
                              ? opt->default_val.i64             \
                              : opt->default_val.dbl)
@@ -605,6 +611,7 @@ static int opt_set_elem(void *obj, void *target_obj, const AVOption *o,
         return set_string_binary(obj, o, val, dst);
     case AV_OPT_TYPE_FLAGS:
     case AV_OPT_TYPE_INT:
+    case AV_OPT_TYPE_UINT:
     case AV_OPT_TYPE_INT64:
     case AV_OPT_TYPE_UINT64:
     case AV_OPT_TYPE_FLOAT:
@@ -767,6 +774,7 @@ int av_opt_eval_ ## name(void *obj, const AVOption *o,                  \
 
 OPT_EVAL_NUMBER(flags,  AV_OPT_TYPE_FLAGS,    int)
 OPT_EVAL_NUMBER(int,    AV_OPT_TYPE_INT,      int)
+OPT_EVAL_NUMBER(uint,   AV_OPT_TYPE_UINT,     unsigned)
 OPT_EVAL_NUMBER(int64,  AV_OPT_TYPE_INT64,    int64_t)
 OPT_EVAL_NUMBER(float,  AV_OPT_TYPE_FLOAT,    float)
 OPT_EVAL_NUMBER(double, AV_OPT_TYPE_DOUBLE,   double)
@@ -926,9 +934,8 @@ int av_opt_set_dict_val(void *obj, const char *name, const AVDictionary *val,
 
     dst = (AVDictionary **)(((uint8_t *)target_obj) + o->offset);
     av_dict_free(dst);
-    av_dict_copy(dst, val, 0);
 
-    return 0;
+    return av_dict_copy(dst, val, 0);
 }
 
 int av_opt_set_chlayout(void *obj, const char *name,
@@ -941,6 +948,8 @@ int av_opt_set_chlayout(void *obj, const char *name,
 
     if (!o || !target_obj)
         return AVERROR_OPTION_NOT_FOUND;
+    if (o->flags & AV_OPT_FLAG_READONLY)
+        return AVERROR(EINVAL);
 
     dst = (AVChannelLayout*)((uint8_t*)target_obj + o->offset);
 
@@ -997,6 +1006,9 @@ static int opt_get_elem(const AVOption *o, uint8_t **pbuf, size_t buf_len,
     case AV_OPT_TYPE_INT:
         ret = snprintf(*pbuf, buf_len, "%d", *(int *)dst);
         break;
+    case AV_OPT_TYPE_UINT:
+        ret = snprintf(*pbuf, buf_len, "%u", *(unsigned *)dst);
+        break;
     case AV_OPT_TYPE_INT64:
         ret = snprintf(*pbuf, buf_len, "%"PRId64, *(int64_t *)dst);
         break;
@@ -1335,9 +1347,8 @@ int av_opt_get_dict_val(void *obj, const char *name, int search_flags, AVDiction
         return AVERROR(EINVAL);
 
     src = *(AVDictionary **)(((uint8_t *)target_obj) + o->offset);
-    av_dict_copy(out_val, src, 0);
 
-    return 0;
+    return av_dict_copy(out_val, src, 0);
 }
 
 int av_opt_flag_is_set(void *obj, const char *field_name, const char *flag_name)
@@ -1444,6 +1455,7 @@ static void log_type(void *av_log_obj, const AVOption *o,
         [AV_OPT_TYPE_FLAGS]         = "<flags>",
         [AV_OPT_TYPE_INT]           = "<int>",
         [AV_OPT_TYPE_INT64]         = "<int64>",
+        [AV_OPT_TYPE_UINT]          = "<unsigned>",
         [AV_OPT_TYPE_UINT64]        = "<uint64>",
         [AV_OPT_TYPE_DOUBLE]        = "<double>",
         [AV_OPT_TYPE_FLOAT]         = "<float>",
@@ -1515,6 +1527,7 @@ static void log_default(void *obj, void *av_log_obj, const AVOption *opt)
         av_log(av_log_obj, AV_LOG_INFO, "%s", buf);
         break;
     }
+    case AV_OPT_TYPE_UINT:
     case AV_OPT_TYPE_INT:
     case AV_OPT_TYPE_UINT64:
     case AV_OPT_TYPE_INT64: {
@@ -1600,6 +1613,7 @@ static void opt_list(void *obj, void *av_log_obj, const char *unit,
         if (av_opt_query_ranges(&r, obj, opt->name, AV_OPT_SEARCH_FAKE_OBJ) >= 0) {
             switch (opt->type) {
             case AV_OPT_TYPE_INT:
+            case AV_OPT_TYPE_UINT:
             case AV_OPT_TYPE_INT64:
             case AV_OPT_TYPE_UINT64:
             case AV_OPT_TYPE_DOUBLE:
@@ -1676,6 +1690,7 @@ void av_opt_set_defaults2(void *s, int mask, int flags)
             case AV_OPT_TYPE_BOOL:
             case AV_OPT_TYPE_FLAGS:
             case AV_OPT_TYPE_INT:
+            case AV_OPT_TYPE_UINT:
             case AV_OPT_TYPE_INT64:
             case AV_OPT_TYPE_UINT64:
             case AV_OPT_TYPE_DURATION:
@@ -2177,6 +2192,7 @@ int av_opt_query_ranges_default(AVOptionRanges **ranges_arg, void *obj, const ch
     switch (field->type) {
     case AV_OPT_TYPE_BOOL:
     case AV_OPT_TYPE_INT:
+    case AV_OPT_TYPE_UINT:
     case AV_OPT_TYPE_INT64:
     case AV_OPT_TYPE_UINT64:
     case AV_OPT_TYPE_PIXEL_FMT:
@@ -2281,6 +2297,7 @@ int av_opt_is_set_to_default(void *obj, const AVOption *o)
     case AV_OPT_TYPE_PIXEL_FMT:
     case AV_OPT_TYPE_SAMPLE_FMT:
     case AV_OPT_TYPE_INT:
+    case AV_OPT_TYPE_UINT:
     case AV_OPT_TYPE_DURATION:
     case AV_OPT_TYPE_INT64:
     case AV_OPT_TYPE_UINT64:
@@ -2292,7 +2309,9 @@ int av_opt_is_set_to_default(void *obj, const AVOption *o)
             if ((ret = av_channel_layout_from_string(&ch_layout, o->default_val.str)) < 0)
                 return ret;
         }
-        return !av_channel_layout_compare((AVChannelLayout *)dst, &ch_layout);
+        ret = !av_channel_layout_compare((AVChannelLayout *)dst, &ch_layout);
+        av_channel_layout_uninit(&ch_layout);
+        return ret;
     }
     case AV_OPT_TYPE_STRING:
         str = *(char **)dst;
@@ -2386,26 +2405,22 @@ int av_opt_is_set_to_default_by_name(void *obj, const char *name, int search_fla
     return av_opt_is_set_to_default(target, o);
 }
 
-int av_opt_serialize(void *obj, int opt_flags, int flags, char **buffer,
-                     const char key_val_sep, const char pairs_sep)
+static int opt_serialize(void *obj, int opt_flags, int flags, int *cnt,
+                         AVBPrint *bprint, const char key_val_sep, const char pairs_sep)
 {
     const AVOption *o = NULL;
+    void *child = NULL;
     uint8_t *buf;
-    AVBPrint bprint;
-    int ret, cnt = 0;
+    int ret;
     const char special_chars[] = {pairs_sep, key_val_sep, '\0'};
 
-    if (pairs_sep == '\0' || key_val_sep == '\0' || pairs_sep == key_val_sep ||
-        pairs_sep == '\\' || key_val_sep == '\\') {
-        av_log(obj, AV_LOG_ERROR, "Invalid separator(s) found.");
-        return AVERROR(EINVAL);
-    }
-
-    if (!obj || !buffer)
-        return AVERROR(EINVAL);
-
-    *buffer = NULL;
-    av_bprint_init(&bprint, 64, AV_BPRINT_SIZE_UNLIMITED);
+    if (flags & AV_OPT_SERIALIZE_SEARCH_CHILDREN)
+        while (child = av_opt_child_next(obj, child)) {
+            ret = opt_serialize(child, opt_flags, flags, cnt, bprint,
+                                key_val_sep, pairs_sep);
+            if (ret < 0)
+                return ret;
+        }
 
     while (o = av_opt_next(obj, o)) {
         if (o->type == AV_OPT_TYPE_CONST)
@@ -2417,18 +2432,45 @@ int av_opt_serialize(void *obj, int opt_flags, int flags, char **buffer,
         if (flags & AV_OPT_SERIALIZE_SKIP_DEFAULTS && av_opt_is_set_to_default(obj, o) > 0)
             continue;
         if ((ret = av_opt_get(obj, o->name, 0, &buf)) < 0) {
-            av_bprint_finalize(&bprint, NULL);
+            av_bprint_finalize(bprint, NULL);
             return ret;
         }
         if (buf) {
-            if (cnt++)
-                av_bprint_append_data(&bprint, &pairs_sep, 1);
-            av_bprint_escape(&bprint, o->name, special_chars, AV_ESCAPE_MODE_BACKSLASH, 0);
-            av_bprint_append_data(&bprint, &key_val_sep, 1);
-            av_bprint_escape(&bprint, buf, special_chars, AV_ESCAPE_MODE_BACKSLASH, 0);
+            if ((*cnt)++)
+                av_bprint_append_data(bprint, &pairs_sep, 1);
+            av_bprint_escape(bprint, o->name, special_chars, AV_ESCAPE_MODE_BACKSLASH, 0);
+            av_bprint_append_data(bprint, &key_val_sep, 1);
+            av_bprint_escape(bprint, buf, special_chars, AV_ESCAPE_MODE_BACKSLASH, 0);
             av_freep(&buf);
         }
     }
+
+    return 0;
+}
+
+int av_opt_serialize(void *obj, int opt_flags, int flags, char **buffer,
+                     const char key_val_sep, const char pairs_sep)
+{
+    AVBPrint bprint;
+    int ret, cnt = 0;
+
+    if (pairs_sep == '\0' || key_val_sep == '\0' || pairs_sep == key_val_sep ||
+        pairs_sep == '\\' || key_val_sep == '\\') {
+        av_log(obj, AV_LOG_ERROR, "Invalid separator(s) found.");
+        return AVERROR(EINVAL);
+    }
+
+    if (!obj || !buffer)
+        return AVERROR(EINVAL);
+
+    *buffer = NULL;
+    av_bprint_init(&bprint, 64, AV_BPRINT_SIZE_UNLIMITED);
+
+    ret = opt_serialize(obj, opt_flags, flags, &cnt, &bprint,
+                        key_val_sep, pairs_sep);
+    if (ret < 0)
+        return ret;
+
     ret = av_bprint_finalize(&bprint, buffer);
     if (ret < 0)
         return ret;
diff --git a/libavutil/opt.h b/libavutil/opt.h
index e6013662f6..07e27a9208 100644
--- a/libavutil/opt.h
+++ b/libavutil/opt.h
@@ -53,6 +53,16 @@
  * question is allowed to access the field. This allows us to extend the
  * semantics of those fields without breaking API compatibility.
  *
+ * @section avoptions_scope Scope of AVOptions
+ *
+ * AVOptions is designed to support any set of multimedia configuration options
+ * that can be defined at compile-time.  Although it is mainly used to expose
+ * FFmpeg options, you are welcome to adapt it to your own use case.
+ *
+ * No single approach can ever fully solve the problem of configuration,
+ * but please submit a patch if you believe you have found a problem
+ * that is best solved by extending AVOptions.
+ *
  * @section avoptions_implement Implementing AVOptions
  * This section describes how to add AVOptions capabilities to a struct.
  *
@@ -242,7 +252,7 @@ enum AVOptionType{
     AV_OPT_TYPE_DICT,
     AV_OPT_TYPE_UINT64,
     AV_OPT_TYPE_CONST,
-    AV_OPT_TYPE_IMAGE_SIZE, ///< offset must point to two consecutive integers
+    AV_OPT_TYPE_IMAGE_SIZE, ///< offset must point to two consecutive ints
     AV_OPT_TYPE_PIXEL_FMT,
     AV_OPT_TYPE_SAMPLE_FMT,
     AV_OPT_TYPE_VIDEO_RATE, ///< offset must point to AVRational
@@ -250,6 +260,7 @@ enum AVOptionType{
     AV_OPT_TYPE_COLOR,
     AV_OPT_TYPE_BOOL,
     AV_OPT_TYPE_CHLAYOUT,
+    AV_OPT_TYPE_UINT,
 
     /**
      * May be combined with another regular option type to declare an array
@@ -786,6 +797,10 @@ int av_opt_set_image_size(void *obj, const char *name, int w, int h, int search_
 int av_opt_set_pixel_fmt (void *obj, const char *name, enum AVPixelFormat fmt, int search_flags);
 int av_opt_set_sample_fmt(void *obj, const char *name, enum AVSampleFormat fmt, int search_flags);
 int av_opt_set_video_rate(void *obj, const char *name, AVRational val, int search_flags);
+/**
+ * @note Any old chlayout present is discarded and replaced with a copy of the new one. The
+ * caller still owns layout and is responsible for uninitializing it.
+ */
 int av_opt_set_chlayout(void *obj, const char *name, const AVChannelLayout *layout, int search_flags);
 /**
  * @note Any old dictionary present is discarded and replaced with a copy of the new one. The
@@ -847,6 +862,10 @@ int av_opt_get_image_size(void *obj, const char *name, int search_flags, int *w_
 int av_opt_get_pixel_fmt (void *obj, const char *name, int search_flags, enum AVPixelFormat *out_fmt);
 int av_opt_get_sample_fmt(void *obj, const char *name, int search_flags, enum AVSampleFormat *out_fmt);
 int av_opt_get_video_rate(void *obj, const char *name, int search_flags, AVRational *out_val);
+/**
+ * @param[out] layout The returned layout is a copy of the actual value and must
+ * be freed with av_channel_layout_uninit() by the caller
+ */
 int av_opt_get_chlayout(void *obj, const char *name, int search_flags, AVChannelLayout *layout);
 /**
  * @param[out] out_val The returned dictionary is a copy of the actual value and must
@@ -873,6 +892,7 @@ int av_opt_get_dict_val(void *obj, const char *name, int search_flags, AVDiction
  */
 int av_opt_eval_flags (void *obj, const AVOption *o, const char *val, int        *flags_out);
 int av_opt_eval_int   (void *obj, const AVOption *o, const char *val, int        *int_out);
+int av_opt_eval_uint  (void *obj, const AVOption *o, const char *val, unsigned   *uint_out);
 int av_opt_eval_int64 (void *obj, const AVOption *o, const char *val, int64_t    *int64_out);
 int av_opt_eval_float (void *obj, const AVOption *o, const char *val, float      *float_out);
 int av_opt_eval_double(void *obj, const AVOption *o, const char *val, double     *double_out);
@@ -929,6 +949,7 @@ int av_opt_flag_is_set(void *obj, const char *field_name, const char *flag_name)
 
 #define AV_OPT_SERIALIZE_SKIP_DEFAULTS              0x00000001  ///< Serialize options that are not set to default values only.
 #define AV_OPT_SERIALIZE_OPT_FLAGS_EXACT            0x00000002  ///< Serialize options that exactly match opt_flags only.
+#define AV_OPT_SERIALIZE_SEARCH_CHILDREN            0x00000004  ///< Serialize options in possible children of the given object.
 
 /**
  * Serialize object's options.
diff --git a/libavutil/ppc/cpu.c b/libavutil/ppc/cpu.c
index bc8bb5f47c..2b13cda662 100644
--- a/libavutil/ppc/cpu.c
+++ b/libavutil/ppc/cpu.c
@@ -27,7 +27,7 @@
 #if HAVE_UNISTD_H
 #include <unistd.h>
 #endif
-#elif defined(__OpenBSD__)
+#elif defined(__NetBSD__) || defined(__OpenBSD__)
 #include <sys/types.h>
 #include <sys/sysctl.h>
 #include <machine/cpu.h>
@@ -56,8 +56,8 @@ int ff_get_cpu_flags_ppc(void)
     if (result == VECTORTYPE_ALTIVEC)
         return AV_CPU_FLAG_ALTIVEC;
     return 0;
-#elif defined(__APPLE__) || defined(__OpenBSD__)
-#ifdef __OpenBSD__
+#elif defined(__APPLE__) || defined(__NetBSD__) || defined(__OpenBSD__)
+#if defined(__NetBSD__) || defined(__OpenBSD__)
     int sels[2] = {CTL_MACHDEP, CPU_ALTIVEC};
 #else
     int sels[2] = {CTL_HW, HW_VECTORUNIT};
diff --git a/libavutil/riscv/asm.S b/libavutil/riscv/asm.S
index 0a9e2e0d3f..1e6358dcb5 100644
--- a/libavutil/riscv/asm.S
+++ b/libavutil/riscv/asm.S
@@ -36,13 +36,16 @@
 #define HWD
 #endif
 
-        .macro func sym, ext=
+        .macro func sym, ext1=, ext2=
             .text
             .align 2
 
             .option push
-            .ifnb \ext
-            .option arch, +\ext
+            .ifnb \ext1
+            .option arch, +\ext1
+            .ifnb \ext2
+            .option arch, +\ext2
+            .endif
             .endif
 
             .global \sym
@@ -92,83 +95,146 @@
         shnadd  3, \rd, \rs1, \rs2
         .endm
 #endif
-#if !defined (__riscv_zbb)
-        .macro  min rd, rs1, rs2
-        .insn r OP, 4, 5, \rd, \rs1, \rs2
-        .endm
-#endif
 
-        /* Convenience macro to load a Vector type (vtype) as immediate */
-        .macro  lvtypei rd, e, m=m1, tp=tu, mp=mu
+#if defined (__riscv_v_elen)
+# define RV_V_ELEN __riscv_v_elen
+#else
+/* Run-time detection of the V extension implies ELEN >= 64. */
+# define RV_V_ELEN 64
+#endif
+#if RV_V_ELEN == 32
+# define VSEW_MAX 2
+#else
+# define VSEW_MAX 3
+#endif
 
-        .ifc \e,e8
-        .equ ei, 0
+        .macro  parse_vtype ew, tp, mp
+        .ifc    \ew,e8
+        .equ    vsew, 0
         .else
-        .ifc \e,e16
-        .equ ei, 8
+        .ifc    \ew,e16
+        .equ    vsew, 1
         .else
-        .ifc \e,e32
-        .equ ei, 16
+        .ifc    \ew,e32
+        .equ    vsew, 2
         .else
-        .ifc \e,e64
-        .equ ei, 24
+        .ifc    \ew,e64
+        .equ    vsew, 3
         .else
-        .error "Unknown element type"
+        .error  "Unknown element width \ew"
         .endif
         .endif
         .endif
         .endif
 
-        .ifc \m,m1
-        .equ mi, 0
+        .ifc    \tp,tu
+        .equ    tp, 0
         .else
-        .ifc \m,m2
-        .equ mi, 1
+        .ifc    \tp,ta
+        .equ    tp, 1
         .else
-        .ifc \m,m4
-        .equ mi, 2
-        .else
-        .ifc \m,m8
-        .equ mi, 3
-        .else
-        .ifc \m,mf8
-        .equ mi, 5
-        .else
-        .ifc \m,mf4
-        .equ mi, 6
-        .else
-        .ifc \m,mf2
-        .equ mi, 7
-        .else
-        .error "Unknown multiplier"
-        .equ mi, 3
-        .endif
-        .endif
-        .endif
-        .endif
-        .endif
+        .error  "Unknown tail policy \tp"
         .endif
         .endif
 
-        .ifc \tp,tu
-        .equ tpi, 0
+        .ifc    \mp,mu
+        .equ    mp, 0
         .else
-        .ifc \tp,ta
-        .equ tpi, 64
+        .ifc    \mp,ma
+        .equ    mp, 1
         .else
-        .error "Unknown tail policy"
+        .error  "Unknown mask policy \mp"
         .endif
         .endif
+        .endm
 
-        .ifc \mp,mu
-        .equ mpi, 0
-        .else
-        .ifc \mp,ma
-        .equ mpi, 128
+        /**
+         * Gets the vector type with the smallest suitable LMUL value.
+         * @param[out] rd vector type destination register
+         * @param vl vector length constant
+         * @param ew element width: e8, e16, e32 or e64
+         * @param tp tail policy: tu or ta
+         * @param mp mask policty: mu or ma
+         */
+        .macro  vtype_ivli rd, avl, ew, tp=tu, mp=mu
+        .if     \avl <= 1
+        .equ    log2vl, 0
+        .elseif \avl <= 2
+        .equ    log2vl, 1
+        .elseif \avl <= 4
+        .equ    log2vl, 2
+        .elseif \avl <= 8
+        .equ    log2vl, 3
+        .elseif \avl <= 16
+        .equ    log2vl, 4
+        .elseif \avl <= 32
+        .equ    log2vl, 5
+        .elseif \avl <= 64
+        .equ    log2vl, 6
+        .elseif \avl <= 128
+        .equ    log2vl, 7
         .else
-        .error "Unknown mask policy"
+        .error  "Vector length \avl out of range"
         .endif
+        parse_vtype \ew, \tp, \mp
+        csrr    \rd, vlenb
+        clz     \rd, \rd
+        addi    \rd, \rd, log2vl + 1 + VSEW_MAX - __riscv_xlen
+        max     \rd, \rd, zero // VLMUL must be >= VSEW - VSEW_MAX
+        .if     vsew < VSEW_MAX
+        addi    \rd, \rd, vsew - VSEW_MAX
+        andi    \rd, \rd, 7
         .endif
+        ori     \rd, \rd, (vsew << 3) | (tp << 6) | (mp << 7)
+        .endm
+
+        /**
+         * Gets the vector type with the smallest suitable LMUL value.
+         * @param[out] rd vector type destination register
+         * @param rs vector length source register
+         * @param[out] tmp temporary register to be clobbered
+         * @param ew element width: e8, e16, e32 or e64
+         * @param tp tail policy: tu or ta
+         * @param mp mask policty: mu or ma
+         */
+        .macro  vtype_vli rd, rs, tmp, ew, tp=tu, mp=mu
+        parse_vtype \ew, \tp, \mp
+        /*
+         * The difference between the CLZ's notionally equals the VLMUL value
+         * for 4-bit elements. But we want the value for SEW_MAX-bit elements.
+         */
+        slli    \tmp, \rs, 1 + VSEW_MAX
+        csrr    \rd, vlenb
+        addi    \tmp, \tmp, -1
+        clz     \rd, \rd
+        clz     \tmp, \tmp
+        sub     \rd, \rd, \tmp
+        max     \rd, \rd, zero // VLMUL must be >= VSEW - VSEW_MAX
+        .if     vsew < VSEW_MAX
+        addi    \rd, \rd, vsew - VSEW_MAX
+        andi    \rd, \rd, 7
+        .endif
+        ori     \rd, \rd, (vsew << 3) | (tp << 6) | (mp << 7)
+        .endm
+
+        /**
+         * Widens a vector type.
+         * @param[out] rd widened vector type destination register
+         * @param rs vector type source register
+         * @param n number of times to widen (once by default)
+         */
+        .macro  vwtypei rd, rs, n=1
+        xori    \rd, \rs, 4
+        addi    \rd, \rd, (\n) * 011
+        xori    \rd, \rd, 4
+        .endm
 
-        li      \rd, (ei | mi | tpi | mpi)
+        /**
+         * Narrows a vector type.
+         * @param[out] rd narrowed vector type destination register
+         * @param rs vector type source register
+         * @param n number of times to narrow (once by default)
+         */
+        .macro  vntypei rd, rs, n=1
+        vwtypei \rd, \rs, -(\n)
         .endm
diff --git a/libavutil/riscv/cpu.c b/libavutil/riscv/cpu.c
index 460d3e9f91..73abd289a6 100644
--- a/libavutil/riscv/cpu.c
+++ b/libavutil/riscv/cpu.c
@@ -18,8 +18,10 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#define _GNU_SOURCE
 #include "libavutil/cpu.h"
 #include "libavutil/cpu_internal.h"
+#include "libavutil/macros.h"
 #include "libavutil/log.h"
 #include "config.h"
 
@@ -27,26 +29,78 @@
 #include <sys/auxv.h>
 #define HWCAP_RV(letter) (1ul << ((letter) - 'A'))
 #endif
+#if HAVE_SYS_HWPROBE_H
+#include <sys/hwprobe.h>
+#elif HAVE_ASM_HWPROBE_H
+#include <asm/hwprobe.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+static int __riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
+                           size_t cpu_count, unsigned long *cpus,
+                           unsigned int flags)
+{
+        return syscall(__NR_riscv_hwprobe, pairs, pair_count, cpu_count, cpus,
+                       flags);
+}
+#endif
 
 int ff_get_cpu_flags_riscv(void)
 {
     int ret = 0;
+#if HAVE_SYS_HWPROBE_H || HAVE_ASM_HWPROBE_H
+    struct riscv_hwprobe pairs[] = {
+        { RISCV_HWPROBE_KEY_BASE_BEHAVIOR, 0 },
+        { RISCV_HWPROBE_KEY_IMA_EXT_0, 0 },
+        { RISCV_HWPROBE_KEY_CPUPERF_0, 0 },
+    };
+
+    if (__riscv_hwprobe(pairs, FF_ARRAY_ELEMS(pairs), 0, NULL, 0) == 0) {
+        if (pairs[0].value & RISCV_HWPROBE_BASE_BEHAVIOR_IMA)
+            ret |= AV_CPU_FLAG_RVI;
+        if (pairs[1].value & RISCV_HWPROBE_IMA_FD)
+            ret |= AV_CPU_FLAG_RVF | AV_CPU_FLAG_RVD;
+#ifdef RISCV_HWPROBE_IMA_V
+        if (pairs[1].value & RISCV_HWPROBE_IMA_V)
+            ret |= AV_CPU_FLAG_RVV_I32 | AV_CPU_FLAG_RVV_I64
+                 | AV_CPU_FLAG_RVV_F32 | AV_CPU_FLAG_RVV_F64;
+#endif
+#ifdef RISCV_HWPROBE_EXT_ZBA
+        if (pairs[1].value & RISCV_HWPROBE_EXT_ZBA)
+            ret |= AV_CPU_FLAG_RVB_ADDR;
+#endif
+#ifdef RISCV_HWPROBE_EXT_ZBB
+        if (pairs[1].value & RISCV_HWPROBE_EXT_ZBB)
+            ret |= AV_CPU_FLAG_RVB_BASIC;
+#endif
+#ifdef RISCV_HWPROBE_EXT_ZVBB
+        if (pairs[1].value & RISCV_HWPROBE_EXT_ZVBB)
+            ret |= AV_CPU_FLAG_RV_ZVBB;
+#endif
+        switch (pairs[2].value & RISCV_HWPROBE_MISALIGNED_MASK) {
+            case RISCV_HWPROBE_MISALIGNED_FAST:
+                ret |= AV_CPU_FLAG_RV_MISALIGNED;
+                break;
+            default:
+        }
+    } else
+#endif
 #if HAVE_GETAUXVAL
-    const unsigned long hwcap = getauxval(AT_HWCAP);
+    {
+        const unsigned long hwcap = getauxval(AT_HWCAP);
 
-    if (hwcap & HWCAP_RV('I'))
-        ret |= AV_CPU_FLAG_RVI;
-    if (hwcap & HWCAP_RV('F'))
-        ret |= AV_CPU_FLAG_RVF;
-    if (hwcap & HWCAP_RV('D'))
-        ret |= AV_CPU_FLAG_RVD;
-    if (hwcap & HWCAP_RV('B'))
-        ret |= AV_CPU_FLAG_RVB_ADDR | AV_CPU_FLAG_RVB_BASIC;
+        if (hwcap & HWCAP_RV('I'))
+            ret |= AV_CPU_FLAG_RVI;
+        if (hwcap & HWCAP_RV('F'))
+            ret |= AV_CPU_FLAG_RVF;
+        if (hwcap & HWCAP_RV('D'))
+            ret |= AV_CPU_FLAG_RVD;
 
-    /* The V extension implies all Zve* functional subsets */
-    if (hwcap & HWCAP_RV('V'))
-        ret |= AV_CPU_FLAG_RVV_I32 | AV_CPU_FLAG_RVV_I64
-             | AV_CPU_FLAG_RVV_F32 | AV_CPU_FLAG_RVV_F64;
+        /* The V extension implies all Zve* functional subsets */
+        if (hwcap & HWCAP_RV('V'))
+             ret |= AV_CPU_FLAG_RVV_I32 | AV_CPU_FLAG_RVV_I64
+                  | AV_CPU_FLAG_RVV_F32 | AV_CPU_FLAG_RVV_F64;
+    }
 #endif
 
 #ifdef __riscv_i
@@ -79,6 +133,9 @@ int ff_get_cpu_flags_riscv(void)
 #endif
 #endif
 #endif
+#ifdef __riscv_zvbb
+    ret |= AV_CPU_FLAG_RV_ZVBB;
+#endif
 
     return ret;
 }
diff --git a/libavutil/riscv/cpu.h b/libavutil/riscv/cpu.h
index 56035f8556..af1440f626 100644
--- a/libavutil/riscv/cpu.h
+++ b/libavutil/riscv/cpu.h
@@ -22,6 +22,7 @@
 #define AVUTIL_RISCV_CPU_H
 
 #include "config.h"
+#include <stdbool.h>
 #include <stddef.h>
 #include "libavutil/cpu.h"
 
@@ -42,4 +43,24 @@ static inline size_t ff_get_rv_vlenb(void)
     return vlenb;
 }
 #endif
+
+/**
+ * Checks that the vector bit-size is at least the given value.
+ * This is potentially undefined behaviour if vectors are not implemented.
+ */
+static inline bool ff_rv_vlen_least(unsigned int bits)
+{
+#ifdef __riscv_v_min_vlen
+    if (bits <= __riscv_min_vlen)
+        return true;
+#else
+    /*
+     * Vector lengths smaller than 128 bits are only possible in embedded cases
+     * and cannot be run-time detected, so we can assume 128 bits at least.
+     */
+    if (bits <= 128)
+        return true;
+#endif
+    return bits <= (8 * ff_get_rv_vlenb());
+}
 #endif
diff --git a/libavutil/tests/base64.c b/libavutil/tests/base64.c
index 400e01cefe..66d0fdc1fc 100644
--- a/libavutil/tests/base64.c
+++ b/libavutil/tests/base64.c
@@ -64,6 +64,16 @@ static int test_encode_decode(const uint8_t *data, unsigned int data_size,
         printf("Failed: decode to NULL buffer\n");
         return 1;
     }
+    if (data_size > 0 && (data2_size = av_base64_decode(data2, encoded, data_size - 1)) != data_size - 1) {
+        printf("Failed: out of array write\n"
+               "Encoded:\n%s\n", encoded);
+        return 1;
+    }
+    if (data_size > 1 && (data2_size = av_base64_decode(data2, encoded, data_size - 2)) != data_size - 2) {
+        printf("Failed: out of array write\n"
+               "Encoded:\n%s\n", encoded);
+        return 1;
+    }
     if (strlen(encoded)) {
         char *end = strchr(encoded, '=');
         if (!end)
diff --git a/libavutil/tests/cpu.c b/libavutil/tests/cpu.c
index d91bfeab5c..02b98682e3 100644
--- a/libavutil/tests/cpu.c
+++ b/libavutil/tests/cpu.c
@@ -94,6 +94,8 @@ static const struct {
     { AV_CPU_FLAG_RVV_F32,   "zve32f"     },
     { AV_CPU_FLAG_RVV_I64,   "zve64x"     },
     { AV_CPU_FLAG_RVV_F64,   "zve64d"     },
+    { AV_CPU_FLAG_RV_ZVBB,   "zvbb"       },
+    { AV_CPU_FLAG_RV_MISALIGNED, "misaligned" },
 #endif
     { 0 }
 };
diff --git a/libavutil/tests/opt.c b/libavutil/tests/opt.c
index ccf3a54f96..d189938d9b 100644
--- a/libavutil/tests/opt.c
+++ b/libavutil/tests/opt.c
@@ -30,7 +30,9 @@
 
 typedef struct TestContext {
     const AVClass *class;
+    struct ChildContext *child;
     int num;
+    int unum;
     int toggle;
     char *string;
     int flags;
@@ -85,7 +87,8 @@ static const AVOptionArrayDef array_dict = {
 };
 
 static const AVOption test_options[]= {
-    {"num",        "set num",            OFFSET(num),            AV_OPT_TYPE_INT,            { .i64 = 0 },                      0,       100, 1 },
+    {"num",        "set num",            OFFSET(num),            AV_OPT_TYPE_INT,            { .i64 = 0 },                     -1,       100, 1 },
+    {"unum",       "set unum",           OFFSET(unum),           AV_OPT_TYPE_UINT,           { .i64 = 1U << 31 },               0,  1U << 31, 1 },
     {"toggle",     "set toggle",         OFFSET(toggle),         AV_OPT_TYPE_INT,            { .i64 = 1 },                      0,         1, 1 },
     {"rational",   "set rational",       OFFSET(rational),       AV_OPT_TYPE_RATIONAL,       { .dbl = 1 },                      0,        10, 1 },
     {"string",     "set string",         OFFSET(string),         AV_OPT_TYPE_STRING,         { .str = "default" },       CHAR_MIN,  CHAR_MAX, 1 },
@@ -104,7 +107,7 @@ static const AVOption test_options[]= {
     {"bin",        "set binary value",   OFFSET(binary),         AV_OPT_TYPE_BINARY,         { .str="62696e00" },               0,         0, 1 },
     {"bin1",       "set binary value",   OFFSET(binary1),        AV_OPT_TYPE_BINARY,         { .str=NULL },                     0,         0, 1 },
     {"bin2",       "set binary value",   OFFSET(binary2),        AV_OPT_TYPE_BINARY,         { .str="" },                       0,         0, 1 },
-    {"num64",      "set num 64bit",      OFFSET(num64),          AV_OPT_TYPE_INT64,          { .i64 = 1 },                      0,       100, 1 },
+    {"num64",      "set num 64bit",      OFFSET(num64),          AV_OPT_TYPE_INT64,          { .i64 = 1LL << 32 },             -1, 1LL << 32, 1 },
     {"flt",        "set float",          OFFSET(flt),            AV_OPT_TYPE_FLOAT,          { .dbl = 1.0 / 3 },                0,       100, 1 },
     {"dbl",        "set double",         OFFSET(dbl),            AV_OPT_TYPE_DOUBLE,         { .dbl = 1.0 / 3 },                0,       100, 1 },
     {"bool1",      "set boolean value",  OFFSET(bool1),          AV_OPT_TYPE_BOOL,           { .i64 = -1 },                    -1,         1, 1 },
@@ -123,10 +126,46 @@ static const char *test_get_name(void *ctx)
     return "test";
 }
 
+typedef struct ChildContext {
+    const AVClass *class;
+    int64_t child_num64;
+    int child_num;
+} ChildContext;
+
+#undef OFFSET
+#define OFFSET(x) offsetof(ChildContext, x)
+
+static const AVOption child_options[]= {
+    {"child_num64", "set num 64bit", OFFSET(child_num64), AV_OPT_TYPE_INT64, { .i64 = 0 }, 0, 100, 1 },
+    {"child_num",   "set child_num", OFFSET(child_num),   AV_OPT_TYPE_INT,   { .i64 = 1 }, 0, 100, 1 },
+    { NULL },
+};
+
+static const char *child_get_name(void *ctx)
+{
+    return "child";
+}
+
+static const AVClass child_class = {
+    .class_name = "ChildContext",
+    .item_name  = child_get_name,
+    .option     = child_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+static void *test_child_next(void *obj, void *prev)
+{
+    TestContext *test_ctx = obj;
+    if (!prev)
+        return test_ctx->child;
+    return NULL;
+}
+
 static const AVClass test_class = {
     .class_name = "TestContext",
     .item_name  = test_get_name,
     .option     = test_options,
+    .child_next = test_child_next,
     .version    = LIBAVUTIL_VERSION_INT,
 };
 
@@ -149,6 +188,7 @@ int main(void)
         av_opt_set_defaults(&test_ctx);
 
         printf("num=%d\n", test_ctx.num);
+        printf("unum=%u\n", test_ctx.unum);
         printf("toggle=%d\n", test_ctx.toggle);
         printf("string=%s\n", test_ctx.string);
         printf("escape=%s\n", test_ctx.escape);
@@ -277,8 +317,19 @@ int main(void)
             av_set_options_string(&test_ctx, buf, "=", ",");
             av_free(buf);
             if (av_opt_serialize(&test_ctx, 0, 0, &buf, '=', ',') >= 0) {
+                ChildContext child_ctx = { 0 };
                 printf("%s\n", buf);
                 av_free(buf);
+                child_ctx.class = &child_class;
+                test_ctx.child = &child_ctx;
+                if (av_opt_serialize(&test_ctx, 0,
+                                     AV_OPT_SERIALIZE_SKIP_DEFAULTS|AV_OPT_SERIALIZE_SEARCH_CHILDREN,
+                                     &buf, '=', ',') >= 0) {
+                    printf("%s\n", buf);
+                    av_free(buf);
+                }
+                av_opt_free(&child_ctx);
+                test_ctx.child = NULL;
             }
         }
         av_opt_free(&test_ctx);
@@ -332,11 +383,25 @@ int main(void)
             "bin=boguss",
             "bin=111",
             "bin=ffff",
+            "num=bogus",
+            "num=44",
+            "num=44.4",
+            "num=-1",
+            "num=-2",
+            "num=101",
+            "unum=bogus",
+            "unum=44",
+            "unum=44.4",
+            "unum=-1",
+            "unum=2147483648",
+            "unum=2147483649",
             "num64=bogus",
             "num64=44",
             "num64=44.4",
             "num64=-1",
-            "num64=101",
+            "num64=-2",
+            "num64=4294967296",
+            "num64=4294967297",
             "flt=bogus",
             "flt=2",
             "flt=2.2",
@@ -404,5 +469,54 @@ int main(void)
         av_opt_free(&test_ctx);
     }
 
+    printf("\nTesting av_opt_find2()\n");
+    {
+        TestContext test_ctx = { 0 };
+        ChildContext child_ctx = { 0 };
+        void *target;
+        const AVOption *opt;
+
+        test_ctx.class = &test_class;
+        child_ctx.class = &child_class;
+        test_ctx.child = &child_ctx;
+
+        av_log_set_level(AV_LOG_QUIET);
+
+        // Should succeed. num exists and has opt_flags 1
+        opt = av_opt_find2(&test_ctx, "num", NULL, 1, 0, &target);
+        if (opt && target == &test_ctx)
+            printf("OK    '%s'\n", opt->name);
+        else
+            printf("Error 'num'\n");
+
+        // Should fail. num64 exists but has opt_flags 1, not 2
+        opt = av_opt_find(&test_ctx, "num64", NULL, 2, 0);
+        if (opt)
+            printf("OK    '%s'\n", opt->name);
+        else
+            printf("Error 'num64'\n");
+
+        // Should fail. child_num exists but in a child object we're not searching
+        opt = av_opt_find(&test_ctx, "child_num", NULL, 0, 0);
+        if (opt)
+            printf("OK    '%s'\n", opt->name);
+        else
+            printf("Error 'child_num'\n");
+
+        // Should succeed. child_num exists in a child object we're searching
+        opt = av_opt_find2(&test_ctx, "child_num", NULL, 0, AV_OPT_SEARCH_CHILDREN, &target);
+        if (opt && target == &child_ctx)
+            printf("OK    '%s'\n", opt->name);
+        else
+            printf("Error 'child_num'\n");
+
+        // Should fail. foo doesn't exist
+        opt = av_opt_find(&test_ctx, "foo", NULL, 0, 0);
+        if (opt)
+            printf("OK    '%s'\n", opt->name);
+        else
+            printf("Error 'foo'\n");
+    }
+
     return 0;
 }
diff --git a/libavutil/tests/side_data_array.c b/libavutil/tests/side_data_array.c
index 793a62c009..633e9ee681 100644
--- a/libavutil/tests/side_data_array.c
+++ b/libavutil/tests/side_data_array.c
@@ -20,23 +20,22 @@
 
 #include <stdio.h>
 #include "libavutil/frame.c"
-#include "libavutil/mastering_display_metadata.h"
+#include "libavutil/internal.h"
 
-static void print_clls(const AVFrameSideData **sd, const int nb_sd)
+static void print_entries(const AVFrameSideData **sd, const int nb_sd)
 {
     for (int i = 0; i < nb_sd; i++) {
         const AVFrameSideData *entry = sd[i];
 
-        printf("sd %d, %s",
-               i, av_frame_side_data_name(entry->type));
+        printf("sd %d (size %"SIZE_SPECIFIER"), %s",
+               i, entry->size, av_frame_side_data_name(entry->type));
 
-        if (entry->type != AV_FRAME_DATA_CONTENT_LIGHT_LEVEL) {
+        if (entry->type != AV_FRAME_DATA_SEI_UNREGISTERED) {
             putchar('\n');
             continue;
         }
 
-        printf(": MaxCLL: %u\n",
-               ((AVContentLightMetadata *)entry->data)->MaxCLL);
+        printf(": %d\n", *(int32_t *)entry->data);
     }
 }
 
@@ -51,51 +50,60 @@ int main(void)
 
     av_assert0(
         av_frame_side_data_new(&set.sd, &set.nb_sd,
-                               AV_FRAME_DATA_AMBIENT_VIEWING_ENVIRONMENT,
-                               0, 0));
+                               AV_FRAME_DATA_CONTENT_LIGHT_LEVEL,
+                               sizeof(int64_t), 0));
+    av_assert0(
+        av_frame_side_data_new(&set.sd, &set.nb_sd,
+                               AV_FRAME_DATA_CONTENT_LIGHT_LEVEL,
+                               sizeof(int32_t), AV_FRAME_SIDE_DATA_FLAG_REPLACE));
 
     // test entries in the middle
     for (int value = 1; value < 4; value++) {
         AVFrameSideData *sd = av_frame_side_data_new(
-            &set.sd, &set.nb_sd, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL,
-            sizeof(AVContentLightMetadata), 0);
+            &set.sd, &set.nb_sd, AV_FRAME_DATA_SEI_UNREGISTERED,
+            sizeof(int32_t), 0);
 
         av_assert0(sd);
 
-        ((AVContentLightMetadata *)sd->data)->MaxCLL = value;
+        *(int32_t *)sd->data = value;
     }
 
     av_assert0(
         av_frame_side_data_new(
-            &set.sd, &set.nb_sd, AV_FRAME_DATA_SPHERICAL, 0, 0));
+            &set.sd, &set.nb_sd, AV_FRAME_DATA_SPHERICAL,
+           sizeof(int64_t), 0));
+
+    av_assert0(
+        av_frame_side_data_new(
+            &set.sd, &set.nb_sd, AV_FRAME_DATA_SPHERICAL,
+            sizeof(int32_t), AV_FRAME_SIDE_DATA_FLAG_REPLACE));
 
     // test entries at the end
     for (int value = 1; value < 4; value++) {
         AVFrameSideData *sd = av_frame_side_data_new(
-            &set.sd, &set.nb_sd, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL,
-            sizeof(AVContentLightMetadata), 0);
+            &set.sd, &set.nb_sd, AV_FRAME_DATA_SEI_UNREGISTERED,
+            sizeof(int32_t), 0);
 
         av_assert0(sd);
 
-        ((AVContentLightMetadata *)sd->data)->MaxCLL = value + 3;
+        *(int32_t *)sd->data = value + 3;
     }
 
     puts("Initial addition results with duplicates:");
-    print_clls((const AVFrameSideData **)set.sd, set.nb_sd);
+    print_entries((const AVFrameSideData **)set.sd, set.nb_sd);
 
     {
         AVFrameSideData *sd = av_frame_side_data_new(
-            &set.sd, &set.nb_sd, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL,
-            sizeof(AVContentLightMetadata),
-            AV_FRAME_SIDE_DATA_FLAG_UNIQUE);
+            &set.sd, &set.nb_sd, AV_FRAME_DATA_SEI_UNREGISTERED,
+            sizeof(int32_t), AV_FRAME_SIDE_DATA_FLAG_UNIQUE);
 
         av_assert0(sd);
 
-        ((AVContentLightMetadata *)sd->data)->MaxCLL = 1337;
+        *(int32_t *)sd->data = 1337;
     }
 
     puts("\nFinal state after a single 'no-duplicates' addition:");
-    print_clls((const AVFrameSideData **)set.sd, set.nb_sd);
+    print_entries((const AVFrameSideData **)set.sd, set.nb_sd);
 
     av_frame_side_data_free(&set.sd, &set.nb_sd);
 
diff --git a/libavutil/version.h b/libavutil/version.h
index da1a833255..3221c4c592 100644
--- a/libavutil/version.h
+++ b/libavutil/version.h
@@ -79,7 +79,7 @@
  */
 
 #define LIBAVUTIL_VERSION_MAJOR  59
-#define LIBAVUTIL_VERSION_MINOR  13
+#define LIBAVUTIL_VERSION_MINOR  19
 #define LIBAVUTIL_VERSION_MICRO 100
 
 #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
diff --git a/libswscale/loongarch/Makefile b/libswscale/loongarch/Makefile
index c35ba309a4..7ba11d492e 100644
--- a/libswscale/loongarch/Makefile
+++ b/libswscale/loongarch/Makefile
@@ -9,4 +9,5 @@ LSX-OBJS-$(CONFIG_SWSCALE)  += loongarch/swscale.o \
                                loongarch/input.o   \
                                loongarch/output.o  \
                                loongarch/output_lsx.o  \
+                               loongarch/input_lsx.o   \
                                loongarch/yuv2rgb_lsx.o
diff --git a/libswscale/loongarch/input.S b/libswscale/loongarch/input.S
index d01f7384b1..717592b004 100644
--- a/libswscale/loongarch/input.S
+++ b/libswscale/loongarch/input.S
@@ -283,3 +283,498 @@ function planar_rgb_to_uv_lsx
     ld.d            s3,     sp,    16
     addi.d          sp,     sp,    24
 endfunc
+
+/*
+ * void yuy2ToUV_lsx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
+ *                   const uint8_t *src2, int width, uint32_t *unused, void *opq)
+ */
+function yuy2ToUV_lsx
+    andi         t0,    a5,    7
+    srli.d       a5,    a5,    3
+    beqz         a5,    2f
+1:
+    vld          vr0,   a3,    1
+    vld          vr1,   a3,    17
+    addi.d       a5,    a5,    -1
+    addi.d       a3,    a3,    32
+    vpickev.b    vr2,   vr1,   vr0
+    vpickev.b    vr0,   vr2,   vr2
+    vpickod.b    vr1,   vr2,   vr2
+    fst.d        f0,    a0,    0
+    fst.d        f1,    a1,    0
+    addi.d       a0,    a0,    8
+    addi.d       a1,    a1,    8
+    bnez         a5,    1b
+2:
+    beqz         t0,    4f
+3:
+    ld.b         t1,    a3,    1
+    ld.b         t2,    a3,    3
+    addi.d       a3,    a3,    4
+    addi.d       t0,    t0,    -1
+    st.b         t1,    a0,    0
+    st.b         t2,    a1,    0
+    addi.d       a0,    a0,    1
+    addi.d       a1,    a1,    1
+    bnez         t0,    3b
+4:
+endfunc
+
+function yuy2ToUV_lasx
+    andi         t0,    a5,    15
+    srli.d       a5,    a5,    4
+    beqz         a5,    2f
+1:
+    xvld         xr0,   a3,    1
+    xvld         xr1,   a3,    33
+    addi.d       a5,    a5,    -1
+    addi.d       a3,    a3,    64
+    xvpickev.b   xr2,   xr1,   xr0
+    xvpermi.d    xr2,   xr2,   0xd8
+    xvpickev.b   xr0,   xr2,   xr2
+    xvpermi.d    xr0,   xr0,   0xd8
+    xvpickod.b   xr1,   xr2,   xr2
+    xvpermi.d    xr1,   xr1,   0xd8
+    vst          vr0,   a0,    0
+    vst          vr1,   a1,    0
+    addi.d       a0,    a0,    16
+    addi.d       a1,    a1,    16
+    bnez         a5,    1b
+2:
+    beqz         t0,    4f
+3:
+    ld.b         t1,    a3,    1
+    ld.b         t2,    a3,    3
+    addi.d       a3,    a3,    4
+    addi.d       t0,    t0,    -1
+    st.b         t1,    a0,    0
+    st.b         t2,    a1,    0
+    addi.d       a0,    a0,    1
+    addi.d       a1,    a1,    1
+    bnez         t0,    3b
+4:
+endfunc
+
+/*
+ * void yvy2ToUV_lsx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
+ *                   const uint8_t *src2, int width, uint32_t *unused, void *opq)
+ */
+function yvy2ToUV_lsx
+    andi         t0,    a5,    7
+    srli.d       a5,    a5,    3
+    beqz         a5,    2f
+1:
+    vld          vr0,   a3,    1
+    vld          vr1,   a3,    17
+    addi.d       a5,    a5,    -1
+    addi.d       a3,    a3,    32
+    vpickev.b    vr2,   vr1,   vr0
+    vpickev.b    vr0,   vr2,   vr2
+    vpickod.b    vr1,   vr2,   vr2
+    fst.d        f0,    a1,    0
+    fst.d        f1,    a0,    0
+    addi.d       a0,    a0,    8
+    addi.d       a1,    a1,    8
+    bnez         a5,    1b
+2:
+    beqz         t0,    4f
+3:
+    ld.b         t1,    a3,    1
+    ld.b         t2,    a3,    3
+    addi.d       a3,    a3,    4
+    addi.d       t0,    t0,    -1
+    st.b         t1,    a1,    0
+    st.b         t2,    a0,    0
+    addi.d       a0,    a0,    1
+    addi.d       a1,    a1,    1
+    bnez         t0,    3b
+4:
+endfunc
+
+function yvy2ToUV_lasx
+    andi         t0,    a5,    15
+    srli.d       a5,    a5,    4
+    beqz         a5,    2f
+1:
+    xvld         xr0,   a3,    1
+    xvld         xr1,   a3,    33
+    addi.d       a5,    a5,    -1
+    addi.d       a3,    a3,    64
+    xvpickev.b   xr2,   xr1,   xr0
+    xvpermi.d    xr2,   xr2,   0xd8
+    xvpickev.b   xr0,   xr2,   xr2
+    xvpermi.d    xr0,   xr0,   0xd8
+    xvpickod.b   xr1,   xr2,   xr2
+    xvpermi.d    xr1,   xr1,   0xd8
+    vst          vr0,   a1,    0
+    vst          vr1,   a0,    0
+    addi.d       a0,    a0,    16
+    addi.d       a1,    a1,    16
+    bnez         a5,    1b
+2:
+    beqz         t0,    4f
+3:
+    ld.b         t1,    a3,    1
+    ld.b         t2,    a3,    3
+    addi.d       a3,    a3,    4
+    addi.d       t0,    t0,    -1
+    st.b         t1,    a1,    0
+    st.b         t2,    a0,    0
+    addi.d       a0,    a0,    1
+    addi.d       a1,    a1,    1
+    bnez         t0,    3b
+4:
+endfunc
+
+/*
+ * void uyvyToUV_lsx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
+ *                   const uint8_t *src2, int width, uint32_t *unused, void *opq)
+ */
+function uyvyToUV_lsx
+    andi         t0,    a5,    7
+    srli.d       a5,    a5,    3
+    beqz         a5,    2f
+1:
+    vld          vr0,   a3,    0
+    vld          vr1,   a3,    16
+    addi.d       a5,    a5,    -1
+    addi.d       a3,    a3,    32
+    vpickev.b    vr2,   vr1,   vr0
+    vpickev.b    vr0,   vr2,   vr2
+    vpickod.b    vr1,   vr2,   vr2
+    fst.d        f0,    a0,    0
+    fst.d        f1,    a1,    0
+    addi.d       a0,    a0,    8
+    addi.d       a1,    a1,    8
+    bnez         a5,    1b
+2:
+    beqz         t0,    4f
+3:
+    ld.b         t1,    a3,    1
+    ld.b         t2,    a3,    3
+    addi.d       a3,    a3,    4
+    addi.d       t0,    t0,    -1
+    st.b         t1,    a0,    0
+    st.b         t2,    a1,    0
+    addi.d       a0,    a0,    1
+    addi.d       a1,    a1,    1
+    bnez         t0,    3b
+4:
+endfunc
+
+function uyvyToUV_lasx
+    andi         t0,    a5,    15
+    srli.d       a5,    a5,    4
+    beqz         a5,    2f
+1:
+    xvld         xr0,   a3,    0
+    xvld         xr1,   a3,    32
+    addi.d       a5,    a5,    -1
+    addi.d       a3,    a3,    64
+    xvpickev.b   xr2,   xr1,   xr0
+    xvpermi.d    xr2,   xr2,   0xd8
+    xvpickev.b   xr0,   xr2,   xr2
+    xvpermi.d    xr0,   xr0,   0xd8
+    xvpickod.b   xr1,   xr2,   xr2
+    xvpermi.d    xr1,   xr1,   0xd8
+    vst          vr0,   a0,    0
+    vst          vr1,   a1,    0
+    addi.d       a0,    a0,    16
+    addi.d       a1,    a1,    16
+    bnez         a5,    1b
+2:
+    beqz         t0,    4f
+3:
+    ld.b         t1,    a3,    1
+    ld.b         t2,    a3,    3
+    addi.d       a3,    a3,    4
+    addi.d       t0,    t0,    -1
+    st.b         t1,    a0,    0
+    st.b         t2,    a1,    0
+    addi.d       a0,    a0,    1
+    addi.d       a1,    a1,    1
+    bnez         t0,    3b
+4:
+endfunc
+
+/*
+ * void nv12ToUV_lsx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
+ *                   const uint8_t *src2, int width, uint32_t *unused, void *opq)
+ */
+function nv12ToUV_lsx
+    andi         t0,    a5,    15
+    srli.d       a5,    a5,    4
+    beqz         a5,    2f
+1:
+    vld          vr0,   a3,    0
+    vld          vr1,   a3,    16
+    addi.d       a5,    a5,    -1
+    addi.d       a3,    a3,    32
+    vpickev.b    vr2,   vr1,   vr0
+    vpickod.b    vr3,   vr1,   vr0
+    vst          vr2,   a0,    0
+    vst          vr3,   a1,    0
+    addi.d       a0,    a0,    16
+    addi.d       a1,    a1,    16
+    bnez         a5,    1b
+2:
+    beqz         t0,    4f
+3:
+    ld.b         t1,    a3,    0
+    ld.b         t2,    a3,    1
+    addi.d       a3,    a3,    2
+    addi.d       t0,    t0,    -1
+    st.b         t1,    a0,    0
+    st.b         t2,    a1,    0
+    addi.d       a0,    a0,    1
+    addi.d       a1,    a1,    1
+    bnez         t0,    3b
+4:
+endfunc
+
+function nv12ToUV_lasx
+    andi         t0,    a5,    31
+    srli.d       a5,    a5,    5
+    beqz         a5,    2f
+1:
+    xvld         xr0,   a3,    0
+    xvld         xr1,   a3,    32
+    addi.d       a5,    a5,    -1
+    addi.d       a3,    a3,    64
+    xvpickev.b   xr2,   xr1,   xr0
+    xvpickod.b   xr3,   xr1,   xr0
+    xvpermi.d    xr2,   xr2,   0xd8
+    xvpermi.d    xr3,   xr3,   0xd8
+    xvst         xr2,   a0,    0
+    xvst         xr3,   a1,    0
+    addi.d       a0,    a0,    32
+    addi.d       a1,    a1,    32
+    bnez         a5,    1b
+2:
+    beqz         t0,    4f
+3:
+    ld.b         t1,    a3,    0
+    ld.b         t2,    a3,    1
+    addi.d       a3,    a3,    2
+    addi.d       t0,    t0,    -1
+    st.b         t1,    a0,    0
+    st.b         t2,    a1,    0
+    addi.d       a0,    a0,    1
+    addi.d       a1,    a1,    1
+    bnez         t0,    3b
+4:
+endfunc
+
+/*
+ * void nv21ToUV_lsx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
+ *                   const uint8_t *src2, int width, uint32_t *unused, void *opq)
+ */
+function nv21ToUV_lsx
+    andi         t0,    a5,    15
+    srli.d       a5,    a5,    4
+    beqz         a5,    2f
+1:
+    vld          vr0,   a3,    0
+    vld          vr1,   a3,    16
+    addi.d       a5,    a5,    -1
+    addi.d       a3,    a3,    32
+    vpickev.b    vr2,   vr1,   vr0
+    vpickod.b    vr3,   vr1,   vr0
+    vst          vr2,   a1,    0
+    vst          vr3,   a0,    0
+    addi.d       a0,    a0,    16
+    addi.d       a1,    a1,    16
+    bnez         a5,    1b
+2:
+    beqz         t0,    4f
+3:
+    ld.b         t1,    a3,    0
+    ld.b         t2,    a3,    1
+    addi.d       a3,    a3,    2
+    addi.d       t0,    t0,    -1
+    st.b         t1,    a1,    0
+    st.b         t2,    a0,    0
+    addi.d       a0,    a0,    1
+    addi.d       a1,    a1,    1
+    bnez         t0,    3b
+4:
+endfunc
+
+function nv21ToUV_lasx
+    andi         t0,    a5,    31
+    srli.d       a5,    a5,    5
+    beqz         a5,    2f
+1:
+    xvld         xr0,   a3,    0
+    xvld         xr1,   a3,    32
+    addi.d       a5,    a5,    -1
+    addi.d       a3,    a3,    64
+    xvpickev.b   xr2,   xr1,   xr0
+    xvpickod.b   xr3,   xr1,   xr0
+    xvpermi.d    xr2,   xr2,   0xd8
+    xvpermi.d    xr3,   xr3,   0xd8
+    xvst         xr2,   a1,    0
+    xvst         xr3,   a0,    0
+    addi.d       a0,    a0,    32
+    addi.d       a1,    a1,    32
+    bnez         a5,    1b
+2:
+    beqz         t0,    4f
+3:
+    ld.b         t1,    a3,    0
+    ld.b         t2,    a3,    1
+    addi.d       a3,    a3,    2
+    addi.d       t0,    t0,    -1
+    st.b         t1,    a1,    0
+    st.b         t2,    a0,    0
+    addi.d       a0,    a0,    1
+    addi.d       a1,    a1,    1
+    bnez         t0,    3b
+4:
+endfunc
+
+/*
+ *void abgrToA_lsx(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1,
+ *                 const uint8_t *unused2, int width, uint32_t *unused, void *opq)
+ */
+function abgrToA_lsx
+    andi         t0,    a4,    7
+    srli.d       a4,    a4,    3
+    vxor.v       vr0,   vr0,   vr0
+    beqz         a4,    2f
+1:
+    vld          vr1,   a1,    0
+    vld          vr2,   a1,    16
+    addi.d       a4,    a4,    -1
+    addi.d       a1,    a1,    32
+    vpickev.b    vr3,   vr2,   vr1
+    vpackev.b    vr3,   vr0,   vr3
+    vslli.h      vr1,   vr3,   6
+    vsrli.h      vr2,   vr3,   2
+    vor.v        vr3,   vr2,   vr1
+    vst          vr3,   a0,    0
+    addi.d       a0,    a0,    16
+    bnez         a4,    1b
+2:
+    beqz         t0,    4f
+3:
+    ld.b         t1,    a1,    3
+    addi.d       t0,    t0,    -1
+    addi.d       a1,    a1,    4
+    andi         t1,    t1,    0xff
+    slli.w       t2,    t1,    6
+    srli.w       t3,    t1,    2
+    or           t1,    t2,    t3
+    st.h         t1,    a0,    0
+    addi.d       a0,    a0,    2
+    bnez         t0,    3b
+4:
+endfunc
+
+function abgrToA_lasx
+    andi         t0,    a4,    15
+    srli.d       a4,    a4,    4
+    xvxor.v      xr0,   xr0,   xr0
+    beqz         a4,    2f
+1:
+    xvld         xr1,   a1,    0
+    xvld         xr2,   a1,    32
+    addi.d       a4,    a4,    -1
+    addi.d       a1,    a1,    64
+    xvpickev.b   xr3,   xr2,   xr1
+    xvpermi.d    xr3,   xr3,   0xd8
+    xvpackev.b   xr3,   xr0,   xr3
+    xvslli.h     xr1,   xr3,   6
+    xvsrli.h     xr2,   xr3,   2
+    xvor.v       xr3,   xr2,   xr1
+    xvst         xr3,   a0,    0
+    addi.d       a0,    a0,    32
+    bnez         a4,    1b
+2:
+    beqz         t0,    4f
+3:
+    ld.b         t1,    a1,    3
+    addi.d       t0,    t0,    -1
+    addi.d       a1,    a1,    4
+    andi         t1,    t1,    0xff
+    slli.w       t2,    t1,    6
+    srli.w       t3,    t1,    2
+    or           t1,    t2,    t3
+    st.h         t1,    a0,    0
+    addi.d       a0,    a0,    2
+    bnez         t0,    3b
+4:
+endfunc
+
+/*
+ *void rgbaToA_lsx(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1,
+ *                 const uint8_t *unused2, int width, uint32_t *unused, void *opq)
+ */
+function rgbaToA_lsx
+    andi         t0,    a4,    7
+    srli.d       a4,    a4,    3
+    vxor.v       vr0,   vr0,   vr0
+    beqz         a4,    2f
+1:
+    vld          vr1,   a1,    3
+    vld          vr2,   a1,    19
+    addi.d       a4,    a4,    -1
+    addi.d       a1,    a1,    32
+    vpickev.b    vr3,   vr2,   vr1
+    vpackev.b    vr3,   vr0,   vr3
+    vslli.h      vr1,   vr3,   6
+    vsrli.h      vr2,   vr3,   2
+    vor.v        vr3,   vr2,   vr1
+    vst          vr3,   a0,    0
+    addi.d       a0,    a0,    16
+    bnez         a4,    1b
+2:
+    beqz         t0,    4f
+3:
+    ld.b         t1,    a1,    3
+    addi.d       t0,    t0,    -1
+    addi.d       a1,    a1,    4
+    andi         t1,    t1,    0xff
+    slli.w       t2,    t1,    6
+    srli.w       t3,    t1,    2
+    or           t1,    t2,    t3
+    st.h         t1,    a0,    0
+    addi.d       a0,    a0,    2
+    bnez         t0,    3b
+4:
+endfunc
+
+function rgbaToA_lasx
+    andi         t0,    a4,    15
+    srli.d       a4,    a4,    4
+    xvxor.v      xr0,   xr0,   xr0
+    beqz         a4,    2f
+1:
+    xvld         xr1,   a1,    3
+    xvld         xr2,   a1,    35
+    addi.d       a4,    a4,    -1
+    addi.d       a1,    a1,    64
+    xvpickev.b   xr3,   xr2,   xr1
+    xvpermi.d    xr3,   xr3,   0xd8
+    xvpackev.b   xr3,   xr0,   xr3
+    xvslli.h     xr1,   xr3,   6
+    xvsrli.h     xr2,   xr3,   2
+    xvor.v       xr3,   xr2,   xr1
+    xvst         xr3,   a0,    0
+    addi.d       a0,    a0,    32
+    bnez         a4,    1b
+2:
+    beqz         t0,    4f
+3:
+    ld.b         t1,    a1,    3
+    addi.d       t0,    t0,    -1
+    addi.d       a1,    a1,    4
+    andi         t1,    t1,    0xff
+    slli.w       t2,    t1,    6
+    srli.w       t3,    t1,    2
+    or           t1,    t2,    t3
+    st.h         t1,    a0,    0
+    addi.d       a0,    a0,    2
+    bnez         t0,    3b
+4:
+endfunc
diff --git a/libswscale/loongarch/input_lasx.c b/libswscale/loongarch/input_lasx.c
index 4830072eaf..0f1d954880 100644
--- a/libswscale/loongarch/input_lasx.c
+++ b/libswscale/loongarch/input_lasx.c
@@ -200,3 +200,46 @@ void planar_rgb_to_y_lasx(uint8_t *_dst, const uint8_t *src[4], int width,
         dst[i] = (tem_ry * r + tem_gy * g + tem_by * b + set) >> shift;
     }
 }
+
+av_cold void ff_sws_init_input_lasx(SwsContext *c)
+{
+    enum AVPixelFormat srcFormat = c->srcFormat;
+
+    switch (srcFormat) {
+    case AV_PIX_FMT_YUYV422:
+        c->chrToYV12 = yuy2ToUV_lasx;
+        break;
+    case AV_PIX_FMT_YVYU422:
+        c->chrToYV12 = yvy2ToUV_lasx;
+        break;
+    case AV_PIX_FMT_UYVY422:
+        c->chrToYV12 = uyvyToUV_lasx;
+        break;
+    case AV_PIX_FMT_NV12:
+    case AV_PIX_FMT_NV16:
+    case AV_PIX_FMT_NV24:
+        c->chrToYV12 = nv12ToUV_lasx;
+        break;
+    case AV_PIX_FMT_NV21:
+    case AV_PIX_FMT_NV42:
+        c->chrToYV12 = nv21ToUV_lasx;
+        break;
+    case AV_PIX_FMT_GBRAP:
+    case AV_PIX_FMT_GBRP:
+        c->readChrPlanar = planar_rgb_to_uv_lasx;
+        break;
+    }
+
+    if (c->needAlpha) {
+        switch (srcFormat) {
+        case AV_PIX_FMT_BGRA:
+        case AV_PIX_FMT_RGBA:
+            c->alpToYV12 = rgbaToA_lasx;
+            break;
+        case AV_PIX_FMT_ABGR:
+        case AV_PIX_FMT_ARGB:
+            c->alpToYV12 = abgrToA_lasx;
+            break;
+        }
+    }
+}
diff --git a/libswscale/loongarch/input_lsx.c b/libswscale/loongarch/input_lsx.c
new file mode 100644
index 0000000000..1bb04457bb
--- /dev/null
+++ b/libswscale/loongarch/input_lsx.c
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2024 Loongson Technology Corporation Limited
+ * Contributed by Shiyou Yin<yinshiyou-hf@loongson.cn>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "swscale_loongarch.h"
+
+av_cold void ff_sws_init_input_lsx(SwsContext *c)
+{
+    enum AVPixelFormat srcFormat = c->srcFormat;
+
+    switch (srcFormat) {
+    case AV_PIX_FMT_YUYV422:
+        c->chrToYV12 = yuy2ToUV_lsx;
+        break;
+    case AV_PIX_FMT_YVYU422:
+        c->chrToYV12 = yvy2ToUV_lsx;
+        break;
+    case AV_PIX_FMT_UYVY422:
+        c->chrToYV12 = uyvyToUV_lsx;
+        break;
+    case AV_PIX_FMT_NV12:
+    case AV_PIX_FMT_NV16:
+    case AV_PIX_FMT_NV24:
+        c->chrToYV12 = nv12ToUV_lsx;
+        break;
+    case AV_PIX_FMT_NV21:
+    case AV_PIX_FMT_NV42:
+        c->chrToYV12 = nv21ToUV_lsx;
+        break;
+    case AV_PIX_FMT_GBRAP:
+    case AV_PIX_FMT_GBRP:
+        c->readChrPlanar = planar_rgb_to_uv_lsx;
+        break;
+    }
+
+    if (c->needAlpha) {
+        switch (srcFormat) {
+        case AV_PIX_FMT_BGRA:
+        case AV_PIX_FMT_RGBA:
+            c->alpToYV12 = rgbaToA_lsx;
+            break;
+        case AV_PIX_FMT_ABGR:
+        case AV_PIX_FMT_ARGB:
+            c->alpToYV12 = abgrToA_lsx;
+            break;
+        }
+    }
+}
diff --git a/libswscale/loongarch/output.S b/libswscale/loongarch/output.S
index b44bac502a..d71667e38a 100644
--- a/libswscale/loongarch/output.S
+++ b/libswscale/loongarch/output.S
@@ -23,11 +23,11 @@
 
 #include "libavcodec/loongarch/loongson_asm.S"
 
-/* static void ff_yuv2planeX_8_lsx(const int16_t *filter, int filterSize,
+/* static void yuv2planeX_8_lsx(const int16_t *filter, int filterSize,
  *                                 const int16_t **src, uint8_t *dest, int dstW,
  *                                 const uint8_t *dither, int offset)
  */
-function ff_yuv2planeX_8_lsx
+function yuv2planeX_8_lsx
     addi.w          t1,     a6,     1
     addi.w          t2,     a6,     2
     addi.w          t3,     a6,     3
@@ -136,3 +136,253 @@ function ff_yuv2planeX_8_lsx
     blt             zero,   a4,     .DEST
 .END:
 endfunc
+
+/*
+ * void yuv2plane1_8_lsx(const int16_t *src, uint8_t *dest, int dstW,
+ *                       const uint8_t *dither, int offset)
+ */
+function yuv2plane1_8_lsx
+    addi.w       t1,    a4,    1
+    addi.w       t2,    a4,    2
+    addi.w       t3,    a4,    3
+    addi.w       t4,    a4,    4
+    addi.w       t5,    a4,    5
+    addi.w       t6,    a4,    6
+    addi.w       t7,    a4,    7
+    andi         t0,    a4,    7
+    andi         t1,    t1,    7
+    andi         t2,    t2,    7
+    andi         t3,    t3,    7
+    andi         t4,    t4,    7
+    andi         t5,    t5,    7
+    andi         t6,    t6,    7
+    andi         t7,    t7,    7
+    ldx.bu       t0,    a3,    t0
+    ldx.bu       t1,    a3,    t1
+    ldx.bu       t2,    a3,    t2
+    ldx.bu       t3,    a3,    t3
+    ldx.bu       t4,    a3,    t4
+    ldx.bu       t5,    a3,    t5
+    ldx.bu       t6,    a3,    t6
+    ldx.bu       t7,    a3,    t7
+    vinsgr2vr.h  vr1,   t0,    0
+    vinsgr2vr.h  vr1,   t1,    1
+    vinsgr2vr.h  vr1,   t2,    2
+    vinsgr2vr.h  vr1,   t3,    3
+    vinsgr2vr.h  vr1,   t4,    4
+    vinsgr2vr.h  vr1,   t5,    5
+    vinsgr2vr.h  vr1,   t6,    6
+    vinsgr2vr.h  vr1,   t7,    7
+    vsub.h       vr0,   vr0,   vr0
+    vilvl.h      vr2,   vr0,   vr1
+    vilvh.h      vr3,   vr0,   vr1
+
+    andi         t8,    a2,    7
+    srli.d       a2,    a2,    3
+    beqz         a2,    2f
+1:
+    vld          vr1,   a0,    0
+    addi.d       a0,    a0,    16
+    vshuf4i.d    vr0,   vr1,   8
+    vexth.w.h    vr4,   vr0
+    vexth.w.h    vr5,   vr1
+
+    vadd.w       vr4,   vr2,   vr4
+    vadd.w       vr5,   vr3,   vr5
+    vsrai.w      vr4,   vr4,   7
+    vsrai.w      vr5,   vr5,   7
+    vclip255.w   vr4,   vr4
+    vclip255.w   vr5,   vr5
+    vpickev.h    vr1,   vr5,   vr4
+    vpickev.b    vr1,   vr1,   vr1
+    fst.d        f1,    a1,    0
+    addi.d       a1,    a1,    8
+    addi.d       a2,    a2,    -1
+    bnez         a2,    1b
+2:
+    beqz         t8,    4f
+3:
+    add.w        a4,    a4,    t8
+    addi.w       t1,    a4,    1
+    addi.w       t2,    a4,    2
+    addi.w       t3,    a4,    3
+    addi.w       t4,    a4,    4
+    addi.w       t5,    a4,    5
+    addi.w       t6,    a4,    6
+    addi.w       t7,    a4,    7
+    andi         t0,    a4,    7
+    andi         t1,    t1,    7
+    andi         t2,    t2,    7
+    andi         t3,    t3,    7
+    andi         t4,    t4,    7
+    andi         t5,    t5,    7
+    andi         t6,    t6,    7
+    andi         t7,    t7,    7
+    ldx.bu       t0,    a3,    t0
+    ldx.bu       t1,    a3,    t1
+    ldx.bu       t2,    a3,    t2
+    ldx.bu       t3,    a3,    t3
+    ldx.bu       t4,    a3,    t4
+    ldx.bu       t5,    a3,    t5
+    ldx.bu       t6,    a3,    t6
+    ldx.bu       t7,    a3,    t7
+    vinsgr2vr.h  vr1,   t0,    0
+    vinsgr2vr.h  vr1,   t1,    1
+    vinsgr2vr.h  vr1,   t2,    2
+    vinsgr2vr.h  vr1,   t3,    3
+    vinsgr2vr.h  vr1,   t4,    4
+    vinsgr2vr.h  vr1,   t5,    5
+    vinsgr2vr.h  vr1,   t6,    6
+    vinsgr2vr.h  vr1,   t7,    7
+    vsub.h       vr0,   vr0,   vr0
+    vilvl.h      vr2,   vr0,   vr1
+    vilvh.h      vr3,   vr0,   vr1
+
+    addi.d       a0,    a0,    -16
+    add.d        a0,    a0,    t8
+    add.d        a0,    a0,    t8
+    addi.d       a1,    a1,    -8
+    add.d        a1,    a1,    t8
+
+    vld          vr1,   a0,    0
+    vshuf4i.d    vr0,   vr1,   8
+    vexth.w.h    vr4,   vr0
+    vexth.w.h    vr5,   vr1
+
+    vadd.w       vr4,   vr2,   vr4
+    vadd.w       vr5,   vr3,   vr5
+    vsrai.w      vr4,   vr4,   7
+    vsrai.w      vr5,   vr5,   7
+    vclip255.w   vr4,   vr4
+    vclip255.w   vr5,   vr5
+    vpickev.h    vr1,   vr5,   vr4
+    vpickev.b    vr1,   vr1,   vr1
+    fst.d        f1,    a1,    0
+4:
+endfunc
+
+function yuv2plane1_8_lasx
+    addi.w       t1,    a4,    1
+    addi.w       t2,    a4,    2
+    addi.w       t3,    a4,    3
+    addi.w       t4,    a4,    4
+    addi.w       t5,    a4,    5
+    addi.w       t6,    a4,    6
+    addi.w       t7,    a4,    7
+    andi         t0,    a4,    7
+    andi         t1,    t1,    7
+    andi         t2,    t2,    7
+    andi         t3,    t3,    7
+    andi         t4,    t4,    7
+    andi         t5,    t5,    7
+    andi         t6,    t6,    7
+    andi         t7,    t7,    7
+    ldx.bu       t0,    a3,    t0
+    ldx.bu       t1,    a3,    t1
+    ldx.bu       t2,    a3,    t2
+    ldx.bu       t3,    a3,    t3
+    ldx.bu       t4,    a3,    t4
+    ldx.bu       t5,    a3,    t5
+    ldx.bu       t6,    a3,    t6
+    ldx.bu       t7,    a3,    t7
+    vinsgr2vr.h  vr1,   t0,    0
+    vinsgr2vr.h  vr1,   t1,    1
+    vinsgr2vr.h  vr1,   t2,    2
+    vinsgr2vr.h  vr1,   t3,    3
+    vinsgr2vr.h  vr1,   t4,    4
+    vinsgr2vr.h  vr1,   t5,    5
+    vinsgr2vr.h  vr1,   t6,    6
+    vinsgr2vr.h  vr1,   t7,    7
+    xvpermi.q    xr1,   xr1,   0
+    xvsub.h      xr0,   xr0,   xr0
+    xvilvl.h     xr2,   xr0,   xr1
+    xvilvh.h     xr3,   xr0,   xr1
+
+    andi         t8,    a2,    15
+    srli.d       a2,    a2,    4
+    beqz         a2,    2f
+1:
+    xvld         xr1,   a0,    0
+    addi.d       a0,    a0,    32
+    xvpermi.d    xr0,   xr1,   0xa0
+    xvexth.w.h   xr4,   xr0
+    xvexth.w.h   xr5,   xr1
+
+    xvadd.w      xr4,   xr2,   xr4
+    xvadd.w      xr5,   xr3,   xr5
+    xvsrai.w     xr4,   xr4,   7
+    xvsrai.w     xr5,   xr5,   7
+    xvclip255.w  xr4,   xr4
+    xvclip255.w  xr5,   xr5
+    xvpickev.h   xr1,   xr5,   xr4
+    xvpickev.b   xr0,   xr1,   xr1
+    xvpermi.q    xr1,   xr0,   1
+    fst.d        f0,    a1,    0
+    fst.d        f1,    a1,    8
+    addi.d       a1,    a1,    16
+    addi.d       a2,    a2,    -1
+    bnez         a2,    1b
+2:
+    beqz         t8,    4f
+3:
+    add.w        a4,    a4,    t8
+    addi.w       t1,    a4,    1
+    addi.w       t2,    a4,    2
+    addi.w       t3,    a4,    3
+    addi.w       t4,    a4,    4
+    addi.w       t5,    a4,    5
+    addi.w       t6,    a4,    6
+    addi.w       t7,    a4,    7
+    andi         t0,    a4,    7
+    andi         t1,    t1,    7
+    andi         t2,    t2,    7
+    andi         t3,    t3,    7
+    andi         t4,    t4,    7
+    andi         t5,    t5,    7
+    andi         t6,    t6,    7
+    andi         t7,    t7,    7
+    ldx.bu       t0,    a3,    t0
+    ldx.bu       t1,    a3,    t1
+    ldx.bu       t2,    a3,    t2
+    ldx.bu       t3,    a3,    t3
+    ldx.bu       t4,    a3,    t4
+    ldx.bu       t5,    a3,    t5
+    ldx.bu       t6,    a3,    t6
+    ldx.bu       t7,    a3,    t7
+    vinsgr2vr.h  vr1,   t0,    0
+    vinsgr2vr.h  vr1,   t1,    1
+    vinsgr2vr.h  vr1,   t2,    2
+    vinsgr2vr.h  vr1,   t3,    3
+    vinsgr2vr.h  vr1,   t4,    4
+    vinsgr2vr.h  vr1,   t5,    5
+    vinsgr2vr.h  vr1,   t6,    6
+    vinsgr2vr.h  vr1,   t7,    7
+    xvpermi.q    xr1,   xr1,   0
+    xvsub.h      xr0,   xr0,   xr0
+    xvilvl.h     xr2,   xr0,   xr1
+    xvilvh.h     xr3,   xr0,   xr1
+
+    addi.d       a0,    a0,    -32
+    add.d        a0,    a0,    t8
+    add.d        a0,    a0,    t8
+    addi.d       a1,    a1,    -16
+    add.d        a1,    a1,    t8
+
+    xvld         xr1,   a0,    0
+    xvpermi.d    xr0,   xr1,   0xa0
+    xvexth.w.h   xr4,   xr0
+    xvexth.w.h   xr5,   xr1
+
+    xvadd.w      xr4,   xr2,   xr4
+    xvadd.w      xr5,   xr3,   xr5
+    xvsrai.w     xr4,   xr4,   7
+    xvsrai.w     xr5,   xr5,   7
+    xvclip255.w  xr4,   xr4
+    xvclip255.w  xr5,   xr5
+    xvpickev.h   xr1,   xr5,   xr4
+    xvpickev.b   xr0,   xr1,   xr1
+    xvpermi.q    xr1,   xr0,   1
+    fst.d        f0,    a1,    0
+    fst.d        f1,    a1,    8
+4:
+endfunc
diff --git a/libswscale/loongarch/output_lasx.c b/libswscale/loongarch/output_lasx.c
index 277d7063e6..bc8ab8cf36 100644
--- a/libswscale/loongarch/output_lasx.c
+++ b/libswscale/loongarch/output_lasx.c
@@ -22,7 +22,7 @@
 #include "swscale_loongarch.h"
 #include "libavutil/loongarch/loongson_intrinsics.h"
 
-void ff_yuv2planeX_8_lasx(const int16_t *filter, int filterSize,
+void yuv2planeX_8_lasx(const int16_t *filter, int filterSize,
                           const int16_t **src, uint8_t *dest, int dstW,
                           const uint8_t *dither, int offset)
 {
@@ -1775,8 +1775,27 @@ YUV2RGBWRAPPER(yuv2, rgb_full, bgr8_full,   AV_PIX_FMT_BGR8,  0)
 YUV2RGBWRAPPER(yuv2, rgb_full, rgb8_full,   AV_PIX_FMT_RGB8,  0)
 
 
-av_cold void ff_sws_init_output_lasx(SwsContext *c)
+av_cold void ff_sws_init_output_lasx(SwsContext *c,
+                                     yuv2planar1_fn *yuv2plane1,
+                                     yuv2planarX_fn *yuv2planeX,
+                                     yuv2interleavedX_fn *yuv2nv12cX,
+                                     yuv2packed1_fn *yuv2packed1,
+                                     yuv2packed2_fn *yuv2packed2,
+                                     yuv2packedX_fn *yuv2packedX,
+                                     yuv2anyX_fn *yuv2anyX)
 {
+    enum AVPixelFormat dstFormat = c->dstFormat;
+
+    /* Add initialization once optimized */
+    if (isSemiPlanarYUV(dstFormat) && isDataInHighBits(dstFormat)) {
+    } else if (is16BPS(dstFormat)) {
+    } else if (isNBPS(dstFormat)) {
+    } else if (dstFormat == AV_PIX_FMT_GRAYF32BE) {
+    } else if (dstFormat == AV_PIX_FMT_GRAYF32LE) {
+    } else {
+        *yuv2plane1 = yuv2plane1_8_lasx;
+        *yuv2planeX = yuv2planeX_8_lasx;
+    }
 
     if(c->flags & SWS_FULL_CHR_H_INT) {
         switch (c->dstFormat) {
diff --git a/libswscale/loongarch/output_lsx.c b/libswscale/loongarch/output_lsx.c
index 768cc3abc6..de9b1534ee 100644
--- a/libswscale/loongarch/output_lsx.c
+++ b/libswscale/loongarch/output_lsx.c
@@ -1624,8 +1624,28 @@ YUV2RGBWRAPPER(yuv2, rgb_full, bgr8_full,   AV_PIX_FMT_BGR8,  0)
 YUV2RGBWRAPPER(yuv2, rgb_full, rgb8_full,   AV_PIX_FMT_RGB8,  0)
 
 
-av_cold void ff_sws_init_output_lsx(SwsContext *c)
+av_cold void ff_sws_init_output_lsx(SwsContext *c,
+                                    yuv2planar1_fn *yuv2plane1,
+                                    yuv2planarX_fn *yuv2planeX,
+                                    yuv2interleavedX_fn *yuv2nv12cX,
+                                    yuv2packed1_fn *yuv2packed1,
+                                    yuv2packed2_fn *yuv2packed2,
+                                    yuv2packedX_fn *yuv2packedX,
+                                    yuv2anyX_fn *yuv2anyX)
 {
+    enum AVPixelFormat dstFormat = c->dstFormat;
+
+    /* Add initialization once optimized */
+    if (isSemiPlanarYUV(dstFormat) && isDataInHighBits(dstFormat)) {
+    } else if (is16BPS(dstFormat)) {
+    } else if (isNBPS(dstFormat)) {
+    } else if (dstFormat == AV_PIX_FMT_GRAYF32BE) {
+    } else if (dstFormat == AV_PIX_FMT_GRAYF32LE) {
+    } else {
+        *yuv2plane1 = yuv2plane1_8_lsx;
+        *yuv2planeX = yuv2planeX_8_lsx;
+    }
+
     if(c->flags & SWS_FULL_CHR_H_INT) {
         switch (c->dstFormat) {
         case AV_PIX_FMT_RGBA:
diff --git a/libswscale/loongarch/swscale.S b/libswscale/loongarch/swscale.S
index aa4c5cbe28..67b1bc834d 100644
--- a/libswscale/loongarch/swscale.S
+++ b/libswscale/loongarch/swscale.S
@@ -1866,3 +1866,371 @@ function ff_hscale_16_to_19_sub_lsx
     ld.d             s8,      sp,     64
     addi.d           sp,      sp,     72
 endfunc
+
+function lumRangeFromJpeg_lsx
+    li.w          t0,    14071
+    li.w          t1,    33561947
+    vreplgr2vr.h  vr0,   t0
+    srli.w        t2,    a1,    3
+    andi          t3,    a1,    7
+    beqz          t2,    2f
+1:
+    vld           vr1,   a0,    0
+    vreplgr2vr.w  vr2,   t1
+    vreplgr2vr.w  vr3,   t1
+    vmaddwev.w.h  vr2,   vr0,   vr1
+    vmaddwod.w.h  vr3,   vr0,   vr1
+    vsrai.w       vr2,   vr2,   14
+    vsrai.w       vr3,   vr3,   14
+    vpackev.h     vr1,   vr3,   vr2
+    vst           vr1,   a0,    0
+    addi.d        a0,    a0,    16
+    addi.d        t2,    t2,    -1
+    bnez          t2,    1b
+2:
+    beqz          t3,    4f
+3:
+    ld.h          t4,    a0,    0
+    mul.w         t4,    t4,    t0
+    add.w         t4,    t4,    t1
+    srai.w        t4,    t4,    14
+    st.h          t4,    a0,    0
+    addi.d        a0,    a0,    2
+    addi.d        t3,    t3,    -1
+    bnez          t3,    3b
+4:
+endfunc
+
+function lumRangeFromJpeg_lasx
+    li.w           t0,    14071
+    li.w           t1,    33561947
+    xvreplgr2vr.h  xr0,   t0
+    srli.w         t2,    a1,    4
+    andi           t3,    a1,    15
+    beqz           t2,    2f
+1:
+    xvld           xr1,   a0,    0
+    xvreplgr2vr.w  xr2,   t1
+    xvreplgr2vr.w  xr3,   t1
+    xvmaddwev.w.h  xr2,   xr0,   xr1
+    xvmaddwod.w.h  xr3,   xr0,   xr1
+    xvsrai.w       xr2,   xr2,   14
+    xvsrai.w       xr3,   xr3,   14
+    xvpackev.h     xr1,   xr3,   xr2
+    xvst           xr1,   a0,    0
+    addi.d         a0,    a0,    32
+    addi.d         t2,    t2,    -1
+    bnez           t2,    1b
+2:
+    beqz          t3,    4f
+3:
+    ld.h          t4,    a0,    0
+    mul.w         t4,    t4,    t0
+    add.w         t4,    t4,    t1
+    srai.w        t4,    t4,    14
+    st.h          t4,    a0,    0
+    addi.d        a0,    a0,    2
+    addi.d        t3,    t3,    -1
+    bnez          t3,    3b
+4:
+endfunc
+
+function lumRangeToJpeg_lsx
+    li.w          t0,    19077
+    li.w          t1,    -39057361
+    li.w          t2,    30189
+    vreplgr2vr.h  vr0,   t0
+    vreplgr2vr.h  vr4,   t2
+    srli.w        t2,    a1,    3
+    andi          t3,    a1,    7
+    beqz          t2,    2f
+1:
+    vld           vr1,   a0,    0
+    vreplgr2vr.w  vr2,   t1
+    vreplgr2vr.w  vr3,   t1
+    vmin.h        vr1,   vr1,   vr4
+    vmaddwev.w.h  vr2,   vr0,   vr1
+    vmaddwod.w.h  vr3,   vr0,   vr1
+    vsrai.w       vr2,   vr2,   14
+    vsrai.w       vr3,   vr3,   14
+    vpackev.h     vr1,   vr3,   vr2
+    vst           vr1,   a0,    0
+    addi.d        a0,    a0,    16
+    addi.d        t2,    t2,    -1
+    bnez          t2,    1b
+2:
+    beqz          t3,    4f
+3:
+    ld.h          t4,    a0,    0
+    vreplgr2vr.h  vr1,   t4
+    vmin.h        vr1,   vr1,   vr4
+    vpickve2gr.h  t4,    vr1,   0
+    mul.w         t4,    t4,    t0
+    add.w         t4,    t4,    t1
+    srai.w        t4,    t4,    14
+    st.h          t4,    a0,    0
+    addi.d        a0,    a0,    2
+    addi.d        t3,    t3,    -1
+    bnez          t3,    3b
+4:
+endfunc
+
+function lumRangeToJpeg_lasx
+    li.w           t0,    19077
+    li.w           t1,    -39057361
+    li.w           t2,    30189
+    xvreplgr2vr.h  xr0,   t0
+    xvreplgr2vr.h  xr4,   t2
+    srli.w         t2,    a1,    4
+    andi           t3,    a1,    15
+    beqz           t2,    2f
+1:
+    xvld           xr1,   a0,    0
+    xvreplgr2vr.w  xr2,   t1
+    xvreplgr2vr.w  xr3,   t1
+    xvmin.h        xr1,   xr1,   xr4
+    xvmaddwev.w.h  xr2,   xr0,   xr1
+    xvmaddwod.w.h  xr3,   xr0,   xr1
+    xvsrai.w       xr2,   xr2,   14
+    xvsrai.w       xr3,   xr3,   14
+    xvpackev.h     xr1,   xr3,   xr2
+    xvst           xr1,   a0,    0
+    addi.d         a0,    a0,    32
+    addi.d         t2,    t2,    -1
+    bnez           t2,    1b
+2:
+    beqz           t3,    4f
+3:
+    ld.h           t4,    a0,    0
+    vreplgr2vr.h   vr1,   t4
+    vmin.h         vr1,   vr1,   vr4
+    vpickve2gr.h   t4,    vr1,   0
+    mul.w          t4,    t4,    t0
+    add.w          t4,    t4,    t1
+    srai.w         t4,    t4,    14
+    st.h           t4,    a0,    0
+    addi.d         a0,    a0,    2
+    addi.d         t3,    t3,    -1
+    bnez           t3,    3b
+4:
+endfunc
+
+function chrRangeFromJpeg_lsx
+    li.w          t0,    1799
+    li.w          t1,    4081085
+    vreplgr2vr.h  vr0,   t0
+    srli.w        t2,    a2,    3
+    andi          t3,    a2,    7
+    beqz          t2,    2f
+1:
+    vld           vr1,   a0,    0
+    vld           vr2,   a1,    0
+    vreplgr2vr.w  vr3,   t1
+    vreplgr2vr.w  vr4,   t1
+    vreplgr2vr.w  vr5,   t1
+    vreplgr2vr.w  vr6,   t1
+    vmaddwev.w.h  vr3,   vr0,   vr1
+    vmaddwod.w.h  vr4,   vr0,   vr1
+    vmaddwev.w.h  vr5,   vr0,   vr2
+    vmaddwod.w.h  vr6,   vr0,   vr2
+    vsrai.w       vr3,   vr3,   11
+    vsrai.w       vr4,   vr4,   11
+    vsrai.w       vr5,   vr5,   11
+    vsrai.w       vr6,   vr6,   11
+    vpackev.h     vr1,   vr4,   vr3
+    vpackev.h     vr2,   vr6,   vr5
+    vst           vr1,   a0,    0
+    vst           vr2,   a1,    0
+    addi.d        a0,    a0,    16
+    addi.d        a1,    a1,    16
+    addi.d        t2,    t2,    -1
+    bnez          t2,    1b
+2:
+    beqz          t3,    4f
+3:
+    ld.h          t4,    a0,    0
+    ld.h          t5,    a1,    0
+    mul.w         t4,    t4,    t0
+    mul.w         t5,    t5,    t0
+    add.w         t4,    t4,    t1
+    add.w         t5,    t5,    t1
+    srai.w        t4,    t4,    11
+    srai.w        t5,    t5,    11
+    st.h          t4,    a0,    0
+    st.h          t5,    a1,    0
+    addi.d        a0,    a0,    2
+    addi.d        a1,    a1,    2
+    addi.d        t3,    t3,    -1
+    bnez          t3,    3b
+4:
+endfunc
+
+function chrRangeFromJpeg_lasx
+    li.w           t0,    1799
+    li.w           t1,    4081085
+    xvreplgr2vr.h  xr0,   t0
+    srli.w         t2,    a2,    4
+    andi           t3,    a2,    15
+    beqz           t2,    2f
+1:
+    xvld           xr1,   a0,    0
+    xvld           xr2,   a1,    0
+    xvreplgr2vr.w  xr3,   t1
+    xvreplgr2vr.w  xr4,   t1
+    xvreplgr2vr.w  xr5,   t1
+    xvreplgr2vr.w  xr6,   t1
+    xvmaddwev.w.h  xr3,   xr0,   xr1
+    xvmaddwod.w.h  xr4,   xr0,   xr1
+    xvmaddwev.w.h  xr5,   xr0,   xr2
+    xvmaddwod.w.h  xr6,   xr0,   xr2
+    xvsrai.w       xr3,   xr3,   11
+    xvsrai.w       xr4,   xr4,   11
+    xvsrai.w       xr5,   xr5,   11
+    xvsrai.w       xr6,   xr6,   11
+    xvpackev.h     xr1,   xr4,   xr3
+    xvpackev.h     xr2,   xr6,   xr5
+    xvst           xr1,   a0,    0
+    xvst           xr2,   a1,    0
+    addi.d         a0,    a0,    32
+    addi.d         a1,    a1,    32
+    addi.d         t2,    t2,    -1
+    bnez           t2,    1b
+2:
+    beqz          t3,    4f
+3:
+    ld.h          t4,    a0,    0
+    ld.h          t5,    a1,    0
+    mul.w         t4,    t4,    t0
+    mul.w         t5,    t5,    t0
+    add.w         t4,    t4,    t1
+    add.w         t5,    t5,    t1
+    srai.w        t4,    t4,    11
+    srai.w        t5,    t5,    11
+    st.h          t4,    a0,    0
+    st.h          t5,    a1,    0
+    addi.d        a0,    a0,    2
+    addi.d        a1,    a1,    2
+    addi.d        t3,    t3,    -1
+    bnez          t3,    3b
+4:
+endfunc
+
+function chrRangeToJpeg_lsx
+    li.w          t0,    4663
+    li.w          t1,    -9289992
+    li.w          t2,    30775
+    vreplgr2vr.h  vr0,   t0
+    vreplgr2vr.h  vr7,   t2
+    srli.w        t2,    a2,    3
+    andi          t3,    a2,    7
+    beqz          t2,    2f
+1:
+    vld           vr1,   a0,    0
+    vld           vr2,   a1,    0
+    vreplgr2vr.w  vr3,   t1
+    vreplgr2vr.w  vr4,   t1
+    vreplgr2vr.w  vr5,   t1
+    vreplgr2vr.w  vr6,   t1
+    vmin.h        vr1,   vr1,   vr7
+    vmin.h        vr2,   vr2,   vr7
+    vmaddwev.w.h  vr3,   vr0,   vr1
+    vmaddwod.w.h  vr4,   vr0,   vr1
+    vmaddwev.w.h  vr5,   vr0,   vr2
+    vmaddwod.w.h  vr6,   vr0,   vr2
+    vsrai.w       vr3,   vr3,   12
+    vsrai.w       vr4,   vr4,   12
+    vsrai.w       vr5,   vr5,   12
+    vsrai.w       vr6,   vr6,   12
+    vpackev.h     vr1,   vr4,   vr3
+    vpackev.h     vr2,   vr6,   vr5
+    vst           vr1,   a0,    0
+    vst           vr2,   a1,    0
+    addi.d        a0,    a0,    16
+    addi.d        a1,    a1,    16
+    addi.d        t2,    t2,    -1
+    bnez          t2,    1b
+2:
+    beqz          t3,    4f
+3:
+    ld.h          t4,    a0,    0
+    ld.h          t5,    a1,    0
+    vreplgr2vr.h  vr1,   t4
+    vreplgr2vr.h  vr2,   t5
+    vmin.h        vr1,   vr1,   vr7
+    vmin.h        vr2,   vr2,   vr7
+    vpickve2gr.h  t4,    vr1,   0
+    vpickve2gr.h  t5,    vr2,   0
+    mul.w         t4,    t4,    t0
+    mul.w         t5,    t5,    t0
+    add.w         t4,    t4,    t1
+    add.w         t5,    t5,    t1
+    srai.w        t4,    t4,    12
+    srai.w        t5,    t5,    12
+    st.h          t4,    a0,    0
+    st.h          t5,    a1,    0
+    addi.d        a0,    a0,    2
+    addi.d        a1,    a1,    2
+    addi.d        t3,    t3,    -1
+    bnez          t3,    3b
+4:
+endfunc
+
+function chrRangeToJpeg_lasx
+    li.w           t0,    4663
+    li.w           t1,    -9289992
+    li.w           t2,    30775
+    xvreplgr2vr.h  xr0,   t0
+    xvreplgr2vr.h  xr7,   t2
+    srli.w         t2,    a2,    4
+    andi           t3,    a2,    15
+    beqz           t2,    2f
+1:
+    xvld           xr1,   a0,    0
+    xvld           xr2,   a1,    0
+    xvreplgr2vr.w  xr3,   t1
+    xvreplgr2vr.w  xr4,   t1
+    xvreplgr2vr.w  xr5,   t1
+    xvreplgr2vr.w  xr6,   t1
+    xvmin.h        xr1,   xr1,   xr7
+    xvmin.h        xr2,   xr2,   xr7
+    xvmaddwev.w.h  xr3,   xr0,   xr1
+    xvmaddwod.w.h  xr4,   xr0,   xr1
+    xvmaddwev.w.h  xr5,   xr0,   xr2
+    xvmaddwod.w.h  xr6,   xr0,   xr2
+    xvsrai.w       xr3,   xr3,   12
+    xvsrai.w       xr4,   xr4,   12
+    xvsrai.w       xr5,   xr5,   12
+    xvsrai.w       xr6,   xr6,   12
+    xvpackev.h     xr1,   xr4,   xr3
+    xvpackev.h     xr2,   xr6,   xr5
+    xvst           xr1,   a0,    0
+    xvst           xr2,   a1,    0
+    addi.d         a0,    a0,    32
+    addi.d         a1,    a1,    32
+    addi.d         t2,    t2,    -1
+    bnez           t2,    1b
+2:
+    beqz          t3,    4f
+3:
+    ld.h          t4,    a0,    0
+    ld.h          t5,    a1,    0
+    vreplgr2vr.h  vr1,   t4
+    vreplgr2vr.h  vr2,   t5
+    vmin.h        vr1,   vr1,   vr7
+    vmin.h        vr2,   vr2,   vr7
+    vpickve2gr.h  t4,    vr1,   0
+    vpickve2gr.h  t5,    vr2,   0
+    mul.w         t4,    t4,    t0
+    mul.w         t5,    t5,    t0
+    add.w         t4,    t4,    t1
+    add.w         t5,    t5,    t1
+    srai.w        t4,    t4,    12
+    srai.w        t5,    t5,    12
+    st.h          t4,    a0,    0
+    st.h          t5,    a1,    0
+    addi.d        a0,    a0,    2
+    addi.d        a1,    a1,    2
+    addi.d        t3,    t3,    -1
+    bnez          t3,    3b
+4:
+endfunc
diff --git a/libswscale/loongarch/swscale_init_loongarch.c b/libswscale/loongarch/swscale_init_loongarch.c
index 53e4f970b6..3a5a7ee856 100644
--- a/libswscale/loongarch/swscale_init_loongarch.c
+++ b/libswscale/loongarch/swscale_init_loongarch.c
@@ -24,11 +24,46 @@
 #include "libswscale/rgb2rgb.h"
 #include "libavutil/loongarch/cpu.h"
 
+av_cold void ff_sws_init_range_convert_loongarch(SwsContext *c)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_lsx(cpu_flags)) {
+        if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
+            if (c->dstBpc <= 14) {
+                if (c->srcRange) {
+                    c->lumConvertRange = lumRangeFromJpeg_lsx;
+                    c->chrConvertRange = chrRangeFromJpeg_lsx;
+                } else {
+                    c->lumConvertRange = lumRangeToJpeg_lsx;
+                    c->chrConvertRange = chrRangeToJpeg_lsx;
+                }
+            }
+        }
+    }
+    if (have_lasx(cpu_flags)) {
+        if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
+            if (c->dstBpc <= 14) {
+                if (c->srcRange) {
+                    c->lumConvertRange = lumRangeFromJpeg_lasx;
+                    c->chrConvertRange = chrRangeFromJpeg_lasx;
+                } else {
+                    c->lumConvertRange = lumRangeToJpeg_lasx;
+                    c->chrConvertRange = chrRangeToJpeg_lasx;
+                }
+            }
+        }
+    }
+}
+
 av_cold void ff_sws_init_swscale_loongarch(SwsContext *c)
 {
     int cpu_flags = av_get_cpu_flags();
     if (have_lsx(cpu_flags)) {
-        ff_sws_init_output_lsx(c);
+        ff_sws_init_output_lsx(c, &c->yuv2plane1, &c->yuv2planeX,
+                               &c->yuv2nv12cX, &c->yuv2packed1,
+                               &c->yuv2packed2, &c->yuv2packedX, &c->yuv2anyX);
+        ff_sws_init_input_lsx(c);
         if (c->srcBpc == 8) {
             if (c->dstBpc <= 14) {
                 c->hyScale = c->hcScale = ff_hscale_8_to_15_lsx;
@@ -39,21 +74,13 @@ av_cold void ff_sws_init_swscale_loongarch(SwsContext *c)
             c->hyScale = c->hcScale = c->dstBpc > 14 ? ff_hscale_16_to_19_lsx
                                                      : ff_hscale_16_to_15_lsx;
         }
-        switch (c->srcFormat) {
-        case AV_PIX_FMT_GBRAP:
-        case AV_PIX_FMT_GBRP:
-            {
-                c->readChrPlanar = planar_rgb_to_uv_lsx;
-                c->readLumPlanar = planar_rgb_to_y_lsx;
-            }
-            break;
-        }
-        if (c->dstBpc == 8)
-            c->yuv2planeX = ff_yuv2planeX_8_lsx;
     }
 #if HAVE_LASX
     if (have_lasx(cpu_flags)) {
-        ff_sws_init_output_lasx(c);
+        ff_sws_init_output_lasx(c, &c->yuv2plane1, &c->yuv2planeX,
+                                &c->yuv2nv12cX, &c->yuv2packed1,
+                                &c->yuv2packed2, &c->yuv2packedX, &c->yuv2anyX);
+        ff_sws_init_input_lasx(c);
         if (c->srcBpc == 8) {
             if (c->dstBpc <= 14) {
                 c->hyScale = c->hcScale = ff_hscale_8_to_15_lasx;
@@ -64,19 +91,9 @@ av_cold void ff_sws_init_swscale_loongarch(SwsContext *c)
             c->hyScale = c->hcScale = c->dstBpc > 14 ? ff_hscale_16_to_19_lasx
                                                      : ff_hscale_16_to_15_lasx;
         }
-        switch (c->srcFormat) {
-        case AV_PIX_FMT_GBRAP:
-        case AV_PIX_FMT_GBRP:
-            {
-                c->readChrPlanar = planar_rgb_to_uv_lasx;
-                c->readLumPlanar = planar_rgb_to_y_lasx;
-            }
-            break;
-        }
-        if (c->dstBpc == 8)
-            c->yuv2planeX = ff_yuv2planeX_8_lasx;
     }
 #endif // #if HAVE_LASX
+    ff_sws_init_range_convert_loongarch(c);
 }
 
 av_cold void rgb2rgb_init_loongarch(void)
diff --git a/libswscale/loongarch/swscale_loongarch.h b/libswscale/loongarch/swscale_loongarch.h
index 0514abae21..07c91bc25c 100644
--- a/libswscale/loongarch/swscale_loongarch.h
+++ b/libswscale/loongarch/swscale_loongarch.h
@@ -50,17 +50,55 @@ void ff_hscale_16_to_19_sub_lsx(SwsContext *c, int16_t *_dst, int dstW,
                                 const uint8_t *_src, const int16_t *filter,
                                 const int32_t *filterPos, int filterSize, int sh);
 
+void lumRangeFromJpeg_lsx(int16_t *dst, int width);
+void chrRangeFromJpeg_lsx(int16_t *dstU, int16_t *dstV, int width);
+void lumRangeToJpeg_lsx(int16_t *dst, int width);
+void chrRangeToJpeg_lsx(int16_t *dstU, int16_t *dstV, int width);
+
 void planar_rgb_to_uv_lsx(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *src[4],
                           int width, int32_t *rgb2yuv, void *opq);
 
 void planar_rgb_to_y_lsx(uint8_t *_dst, const uint8_t *src[4], int width,
                          int32_t *rgb2yuv, void *opq);
 
-void ff_yuv2planeX_8_lsx(const int16_t *filter, int filterSize,
+void yuv2planeX_8_lsx(const int16_t *filter, int filterSize,
                          const int16_t **src, uint8_t *dest, int dstW,
                          const uint8_t *dither, int offset);
 
-av_cold void ff_sws_init_output_lsx(SwsContext *c);
+void yuv2plane1_8_lsx(const int16_t *src, uint8_t *dest, int dstW,
+                      const uint8_t *dither, int offset);
+
+void yuy2ToUV_lsx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
+                  const uint8_t *src2, int width, uint32_t *unused, void *opq);
+
+void yvy2ToUV_lsx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
+                  const uint8_t *src2, int width, uint32_t *unused, void *opq);
+
+void uyvyToUV_lsx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
+                  const uint8_t *src2, int width, uint32_t *unused, void *opq);
+
+void nv12ToUV_lsx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
+                  const uint8_t *src2, int width, uint32_t *unused, void *opq);
+
+void nv21ToUV_lsx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
+                  const uint8_t *src2, int width, uint32_t *unused, void *opq);
+
+void abgrToA_lsx(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1,
+                 const uint8_t *unused2, int width, uint32_t *unused, void *opq);
+
+void rgbaToA_lsx(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1,
+                 const uint8_t *unused2, int width, uint32_t *unused, void *opq);
+
+av_cold void ff_sws_init_input_lsx(SwsContext *c);
+
+av_cold void ff_sws_init_output_lsx(SwsContext *c,
+                                    yuv2planar1_fn *yuv2plane1,
+                                    yuv2planarX_fn *yuv2planeX,
+                                    yuv2interleavedX_fn *yuv2nv12cX,
+                                    yuv2packed1_fn *yuv2packed1,
+                                    yuv2packed2_fn *yuv2packed2,
+                                    yuv2packedX_fn *yuv2packedX,
+                                    yuv2anyX_fn *yuv2anyX);
 
 int yuv420_rgb24_lsx(SwsContext *c, const uint8_t *src[], int srcStride[],
                      int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]);
@@ -97,6 +135,11 @@ void ff_hscale_16_to_15_lasx(SwsContext *c, int16_t *dst, int dstW,
                              const uint8_t *_src, const int16_t *filter,
                              const int32_t *filterPos, int filterSize);
 
+void lumRangeFromJpeg_lasx(int16_t *dst, int width);
+void chrRangeFromJpeg_lasx(int16_t *dstU, int16_t *dstV, int width);
+void lumRangeToJpeg_lasx(int16_t *dst, int width);
+void chrRangeToJpeg_lasx(int16_t *dstU, int16_t *dstV, int width);
+
 void planar_rgb_to_uv_lasx(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *src[4],
                            int width, int32_t *rgb2yuv, void *opq);
 
@@ -125,11 +168,44 @@ void ff_interleave_bytes_lasx(const uint8_t *src1, const uint8_t *src2,
                               uint8_t *dest, int width, int height,
                               int src1Stride, int src2Stride, int dstStride);
 
-void ff_yuv2planeX_8_lasx(const int16_t *filter, int filterSize,
+void yuv2planeX_8_lasx(const int16_t *filter, int filterSize,
                           const int16_t **src, uint8_t *dest, int dstW,
                           const uint8_t *dither, int offset);
 
-av_cold void ff_sws_init_output_lasx(SwsContext *c);
+void yuv2plane1_8_lasx(const int16_t *src, uint8_t *dest, int dstW,
+                      const uint8_t *dither, int offset);
+
+void yuy2ToUV_lasx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
+                   const uint8_t *src2, int width, uint32_t *unused, void *opq);
+
+void yvy2ToUV_lasx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
+                   const uint8_t *src2, int width, uint32_t *unused, void *opq);
+
+void uyvyToUV_lasx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
+                   const uint8_t *src2, int width, uint32_t *unused, void *opq);
+
+void nv12ToUV_lasx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
+                   const uint8_t *src2, int width, uint32_t *unused, void *opq);
+
+void nv21ToUV_lasx(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
+                   const uint8_t *src2, int width, uint32_t *unused, void *opq);
+
+void abgrToA_lasx(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1,
+                  const uint8_t *unused2, int width, uint32_t *unused, void *opq);
+
+void rgbaToA_lasx(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1,
+                  const uint8_t *unused2, int width, uint32_t *unused, void *opq);
+
+av_cold void ff_sws_init_input_lasx(SwsContext *c);
+
+av_cold void ff_sws_init_output_lasx(SwsContext *c,
+                                     yuv2planar1_fn *yuv2plane1,
+                                     yuv2planarX_fn *yuv2planeX,
+                                     yuv2interleavedX_fn *yuv2nv12cX,
+                                     yuv2packed1_fn *yuv2packed1,
+                                     yuv2packed2_fn *yuv2packed2,
+                                     yuv2packedX_fn *yuv2packedX,
+                                     yuv2anyX_fn *yuv2anyX);
 #endif // #if HAVE_LASX
 
 #endif /* SWSCALE_LOONGARCH_SWSCALE_LOONGARCH_H */
diff --git a/libswscale/output.c b/libswscale/output.c
index 8849a3201a..b234f9c6b9 100644
--- a/libswscale/output.c
+++ b/libswscale/output.c
@@ -1207,8 +1207,8 @@ yuv2rgba64_1_c_template(SwsContext *c, const int32_t *buf0,
 
     if (uvalpha < 2048) {
         for (i = 0; i < ((dstW + 1) >> 1); i++) {
-            int Y1 = (buf0[i * 2]    ) >> 2;
-            int Y2 = (buf0[i * 2 + 1]) >> 2;
+            SUINT Y1 = (buf0[i * 2]    ) >> 2;
+            SUINT Y2 = (buf0[i * 2 + 1]) >> 2;
             int U  = (ubuf0[i] - (128 << 11)) >> 2;
             int V  = (vbuf0[i] - (128 << 11)) >> 2;
             int R, G, B;
@@ -1232,20 +1232,20 @@ yuv2rgba64_1_c_template(SwsContext *c, const int32_t *buf0,
             G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
             B =                            U * c->yuv2rgb_u2b_coeff;
 
-            output_pixel(&dest[0], av_clip_uintp2(((R_B + Y1) >> 14) + (1<<15), 16));
-            output_pixel(&dest[1], av_clip_uintp2(((  G + Y1) >> 14) + (1<<15), 16));
-            output_pixel(&dest[2], av_clip_uintp2(((B_R + Y1) >> 14) + (1<<15), 16));
+            output_pixel(&dest[0], av_clip_uintp2(((int)(R_B + Y1) >> 14) + (1<<15), 16));
+            output_pixel(&dest[1], av_clip_uintp2(((int)(  G + Y1) >> 14) + (1<<15), 16));
+            output_pixel(&dest[2], av_clip_uintp2(((int)(B_R + Y1) >> 14) + (1<<15), 16));
             if (eightbytes) {
                 output_pixel(&dest[3], av_clip_uintp2(A1      , 30) >> 14);
-                output_pixel(&dest[4], av_clip_uintp2(((R_B + Y2) >> 14) + (1<<15), 16));
-                output_pixel(&dest[5], av_clip_uintp2(((  G + Y2) >> 14) + (1<<15), 16));
-                output_pixel(&dest[6], av_clip_uintp2(((B_R + Y2) >> 14) + (1<<15), 16));
+                output_pixel(&dest[4], av_clip_uintp2(((int)(R_B + Y2) >> 14) + (1<<15), 16));
+                output_pixel(&dest[5], av_clip_uintp2(((int)(  G + Y2) >> 14) + (1<<15), 16));
+                output_pixel(&dest[6], av_clip_uintp2(((int)(B_R + Y2) >> 14) + (1<<15), 16));
                 output_pixel(&dest[7], av_clip_uintp2(A2      , 30) >> 14);
                 dest += 8;
             } else {
-                output_pixel(&dest[3], av_clip_uintp2(((R_B + Y2) >> 14) + (1<<15), 16));
-                output_pixel(&dest[4], av_clip_uintp2(((  G + Y2) >> 14) + (1<<15), 16));
-                output_pixel(&dest[5], av_clip_uintp2(((B_R + Y2) >> 14) + (1<<15), 16));
+                output_pixel(&dest[3], av_clip_uintp2(((int)(R_B + Y2) >> 14) + (1<<15), 16));
+                output_pixel(&dest[4], av_clip_uintp2(((int)(  G + Y2) >> 14) + (1<<15), 16));
+                output_pixel(&dest[5], av_clip_uintp2(((int)(B_R + Y2) >> 14) + (1<<15), 16));
                 dest += 6;
             }
         }
@@ -1253,8 +1253,8 @@ yuv2rgba64_1_c_template(SwsContext *c, const int32_t *buf0,
         const int32_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
         int A1 = 0xffff<<14, A2 = 0xffff<<14;
         for (i = 0; i < ((dstW + 1) >> 1); i++) {
-            int Y1 = (buf0[i * 2]    ) >> 2;
-            int Y2 = (buf0[i * 2 + 1]) >> 2;
+            SUINT Y1 = (buf0[i * 2]    ) >> 2;
+            SUINT Y2 = (buf0[i * 2 + 1]) >> 2;
             int U  = (ubuf0[i] + ubuf1[i] - (128 << 12)) >> 3;
             int V  = (vbuf0[i] + vbuf1[i] - (128 << 12)) >> 3;
             int R, G, B;
@@ -1278,20 +1278,20 @@ yuv2rgba64_1_c_template(SwsContext *c, const int32_t *buf0,
             G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
             B =                            U * c->yuv2rgb_u2b_coeff;
 
-            output_pixel(&dest[0], av_clip_uintp2(((R_B + Y1) >> 14) + (1<<15), 16));
-            output_pixel(&dest[1], av_clip_uintp2(((  G + Y1) >> 14) + (1<<15), 16));
-            output_pixel(&dest[2], av_clip_uintp2(((B_R + Y1) >> 14) + (1<<15), 16));
+            output_pixel(&dest[0], av_clip_uintp2(((int)(R_B + Y1) >> 14) + (1<<15), 16));
+            output_pixel(&dest[1], av_clip_uintp2(((int)(  G + Y1) >> 14) + (1<<15), 16));
+            output_pixel(&dest[2], av_clip_uintp2(((int)(B_R + Y1) >> 14) + (1<<15), 16));
             if (eightbytes) {
                 output_pixel(&dest[3], av_clip_uintp2(A1      , 30) >> 14);
-                output_pixel(&dest[4], av_clip_uintp2(((R_B + Y2) >> 14) + (1<<15), 16));
-                output_pixel(&dest[5], av_clip_uintp2(((  G + Y2) >> 14) + (1<<15), 16));
-                output_pixel(&dest[6], av_clip_uintp2(((B_R + Y2) >> 14) + (1<<15), 16));
+                output_pixel(&dest[4], av_clip_uintp2(((int)(R_B + Y2) >> 14) + (1<<15), 16));
+                output_pixel(&dest[5], av_clip_uintp2(((int)(  G + Y2) >> 14) + (1<<15), 16));
+                output_pixel(&dest[6], av_clip_uintp2(((int)(B_R + Y2) >> 14) + (1<<15), 16));
                 output_pixel(&dest[7], av_clip_uintp2(A2      , 30) >> 14);
                 dest += 8;
             } else {
-                output_pixel(&dest[3], av_clip_uintp2(((R_B + Y2) >> 14) + (1<<15), 16));
-                output_pixel(&dest[4], av_clip_uintp2(((  G + Y2) >> 14) + (1<<15), 16));
-                output_pixel(&dest[5], av_clip_uintp2(((B_R + Y2) >> 14) + (1<<15), 16));
+                output_pixel(&dest[3], av_clip_uintp2(((int)(R_B + Y2) >> 14) + (1<<15), 16));
+                output_pixel(&dest[4], av_clip_uintp2(((int)(  G + Y2) >> 14) + (1<<15), 16));
+                output_pixel(&dest[5], av_clip_uintp2(((int)(B_R + Y2) >> 14) + (1<<15), 16));
                 dest += 6;
             }
         }
@@ -1429,7 +1429,7 @@ yuv2rgba64_full_1_c_template(SwsContext *c, const int32_t *buf0,
 
     if (uvalpha < 2048) {
         for (i = 0; i < dstW; i++) {
-            int Y  = (buf0[i]) >> 2;
+            SUINT Y  = (buf0[i]) >> 2;
             int U  = (ubuf0[i] - (128 << 11)) >> 2;
             int V  = (vbuf0[i] - (128 << 11)) >> 2;
             int R, G, B;
@@ -1448,9 +1448,9 @@ yuv2rgba64_full_1_c_template(SwsContext *c, const int32_t *buf0,
             G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
             B =                            U * c->yuv2rgb_u2b_coeff;
 
-            output_pixel(&dest[0], av_clip_uintp2(((R_B + Y) >> 14) + (1<<15), 16));
-            output_pixel(&dest[1], av_clip_uintp2(((  G + Y) >> 14) + (1<<15), 16));
-            output_pixel(&dest[2], av_clip_uintp2(((B_R + Y) >> 14) + (1<<15), 16));
+            output_pixel(&dest[0], av_clip_uintp2(((int)(R_B + Y) >> 14) + (1<<15), 16));
+            output_pixel(&dest[1], av_clip_uintp2(((int)(  G + Y) >> 14) + (1<<15), 16));
+            output_pixel(&dest[2], av_clip_uintp2(((int)(B_R + Y) >> 14) + (1<<15), 16));
             if (eightbytes) {
                 output_pixel(&dest[3], av_clip_uintp2(A, 30) >> 14);
                 dest += 4;
@@ -1462,7 +1462,7 @@ yuv2rgba64_full_1_c_template(SwsContext *c, const int32_t *buf0,
         const int32_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
         int A = 0xffff<<14;
         for (i = 0; i < dstW; i++) {
-            int Y  = (buf0[i]    ) >> 2;
+            SUINT Y  = (buf0[i]    ) >> 2;
             int U  = (ubuf0[i] + ubuf1[i] - (128 << 12)) >> 3;
             int V  = (vbuf0[i] + vbuf1[i] - (128 << 12)) >> 3;
             int R, G, B;
@@ -1481,9 +1481,9 @@ yuv2rgba64_full_1_c_template(SwsContext *c, const int32_t *buf0,
             G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
             B =                            U * c->yuv2rgb_u2b_coeff;
 
-            output_pixel(&dest[0], av_clip_uintp2(((R_B + Y) >> 14) + (1<<15), 16));
-            output_pixel(&dest[1], av_clip_uintp2(((  G + Y) >> 14) + (1<<15), 16));
-            output_pixel(&dest[2], av_clip_uintp2(((B_R + Y) >> 14) + (1<<15), 16));
+            output_pixel(&dest[0], av_clip_uintp2(((int)(R_B + Y) >> 14) + (1<<15), 16));
+            output_pixel(&dest[1], av_clip_uintp2(((int)(  G + Y) >> 14) + (1<<15), 16));
+            output_pixel(&dest[2], av_clip_uintp2(((int)(B_R + Y) >> 14) + (1<<15), 16));
             if (eightbytes) {
                 output_pixel(&dest[3], av_clip_uintp2(A, 30) >> 14);
                 dest += 4;
diff --git a/libswscale/riscv/rgb2rgb_rvv.S b/libswscale/riscv/rgb2rgb_rvv.S
index 21e30ab8bb..19f7aaf67d 100644
--- a/libswscale/riscv/rgb2rgb_rvv.S
+++ b/libswscale/riscv/rgb2rgb_rvv.S
@@ -165,10 +165,10 @@ endfunc
         ret
 .endm
 
-func ff_uyvytoyuv422_rvv, zve32x
+func ff_uyvytoyuv422_rvv, zve32x, zbb
         yuy2_to_i422p v20, v16
 endfunc
 
-func ff_yuyvtoyuv422_rvv, zve32x
+func ff_yuyvtoyuv422_rvv, zve32x, zbb
         yuy2_to_i422p v16, v20
 endfunc
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index c2cc736dd2..d4b0c3cee2 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -697,6 +697,7 @@ void ff_yuv2rgb_init_tables_ppc(SwsContext *c, const int inv_table[4],
 void ff_updateMMXDitherTables(SwsContext *c, int dstY);
 
 av_cold void ff_sws_init_range_convert(SwsContext *c);
+av_cold void ff_sws_init_range_convert_loongarch(SwsContext *c);
 
 SwsFunc ff_yuv2rgb_init_x86(SwsContext *c);
 SwsFunc ff_yuv2rgb_init_ppc(SwsContext *c);
diff --git a/libswscale/utils.c b/libswscale/utils.c
index df14eb016c..476a24fea5 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -1078,8 +1078,12 @@ int sws_setColorspaceDetails(struct SwsContext *c, const int inv_table[4],
     c->srcRange   = srcRange;
     c->dstRange   = dstRange;
 
-    if (need_reinit)
+    if (need_reinit) {
         ff_sws_init_range_convert(c);
+#if ARCH_LOONGARCH64
+        ff_sws_init_range_convert_loongarch(c);
+#endif
+    }
 
     c->dstFormatBpp = av_get_bits_per_pixel(desc_dst);
     c->srcFormatBpp = av_get_bits_per_pixel(desc_src);
diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
index 2673e1d098..92624aab0a 100644
--- a/tests/checkasm/Makefile
+++ b/tests/checkasm/Makefile
@@ -4,6 +4,7 @@ AVCODECOBJS-$(CONFIG_AC3DSP)            += ac3dsp.o
 AVCODECOBJS-$(CONFIG_AUDIODSP)          += audiodsp.o
 AVCODECOBJS-$(CONFIG_BLOCKDSP)          += blockdsp.o
 AVCODECOBJS-$(CONFIG_BSWAPDSP)          += bswapdsp.o
+AVCODECOBJS-$(CONFIG_FDCTDSP)           += fdctdsp.o
 AVCODECOBJS-$(CONFIG_FMTCONVERT)        += fmtconvert.o
 AVCODECOBJS-$(CONFIG_G722DSP)           += g722dsp.o
 AVCODECOBJS-$(CONFIG_H264CHROMA)        += h264chroma.o
@@ -34,6 +35,7 @@ AVCODECOBJS-$(CONFIG_OPUS_DECODER)      += opusdsp.o
 AVCODECOBJS-$(CONFIG_PIXBLOCKDSP)       += pixblockdsp.o
 AVCODECOBJS-$(CONFIG_HEVC_DECODER)      += hevc_add_res.o hevc_deblock.o hevc_idct.o hevc_sao.o hevc_pel.o
 AVCODECOBJS-$(CONFIG_RV34DSP)           += rv34dsp.o
+AVCODECOBJS-$(CONFIG_RV40_DECODER)      += rv40dsp.o
 AVCODECOBJS-$(CONFIG_SVQ1_ENCODER)      += svq1enc.o
 AVCODECOBJS-$(CONFIG_TAK_DECODER)       += takdsp.o
 AVCODECOBJS-$(CONFIG_UTVIDEO_DECODER)   += utvideodsp.o
@@ -41,7 +43,7 @@ AVCODECOBJS-$(CONFIG_V210_DECODER)      += v210dec.o
 AVCODECOBJS-$(CONFIG_V210_ENCODER)      += v210enc.o
 AVCODECOBJS-$(CONFIG_VORBIS_DECODER)    += vorbisdsp.o
 AVCODECOBJS-$(CONFIG_VP9_DECODER)       += vp9dsp.o
-AVCODECOBJS-$(CONFIG_VVC_DECODER)       += vvc_mc.o
+AVCODECOBJS-$(CONFIG_VVC_DECODER)       += vvc_alf.o vvc_mc.o
 
 CHECKASMOBJS-$(CONFIG_AVCODEC)          += $(AVCODECOBJS-yes)
 
diff --git a/tests/checkasm/ac3dsp.c b/tests/checkasm/ac3dsp.c
index 344e1fe5c2..442e965f3b 100644
--- a/tests/checkasm/ac3dsp.c
+++ b/tests/checkasm/ac3dsp.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2023 Institue of Software Chinese Academy of Sciences (ISCAS).
+ * Copyright (c) 2024 Geoff Hill <geoff@geoffhill.org>
  *
  * This file is part of FFmpeg.
  *
@@ -18,6 +19,7 @@
  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
+#include <stdint.h>
 #include <string.h>
 
 #include "libavutil/mem.h"
@@ -27,6 +29,24 @@
 
 #include "checkasm.h"
 
+#define randomize_exp(buf, len)        \
+    do {                               \
+        int i;                         \
+        for (i = 0; i < len; i++) {    \
+            buf[i] = (uint8_t)rnd();   \
+        }                              \
+    } while (0)
+
+#define randomize_i24(buf, len)          \
+    do {                                 \
+        int i;                           \
+        for (i = 0; i < len; i++) {      \
+            int32_t v = (int32_t)rnd();  \
+            int32_t u = (v & 0xFFFFFF);  \
+            buf[i] = (v < 0) ? -u : u;   \
+        }                                \
+    } while (0)
+
 #define randomize_float(buf, len)                               \
     do {                                                        \
         int i;                                                  \
@@ -36,6 +56,64 @@
         }                                                       \
     } while (0)
 
+static void check_ac3_exponent_min(AC3DSPContext *c) {
+#define MAX_COEFS 256
+#define MAX_CTXT 6
+#define EXP_SIZE (MAX_CTXT * MAX_COEFS)
+
+    LOCAL_ALIGNED_16(uint8_t, src, [EXP_SIZE]);
+    LOCAL_ALIGNED_16(uint8_t, v1, [EXP_SIZE]);
+    LOCAL_ALIGNED_16(uint8_t, v2, [EXP_SIZE]);
+    int n;
+
+    declare_func(void, uint8_t *, int, int);
+
+    for (n = 0; n < MAX_CTXT; ++n) {
+        if (check_func(c->ac3_exponent_min, "ac3_exponent_min_reuse%d", n)) {
+            randomize_exp(src, EXP_SIZE);
+
+            memcpy(v1, src, EXP_SIZE);
+            memcpy(v2, src, EXP_SIZE);
+
+            call_ref(v1, n, MAX_COEFS);
+            call_new(v2, n, MAX_COEFS);
+
+            if (memcmp(v1, v2, EXP_SIZE) != 0)
+                fail();
+
+            bench_new(v2, n, MAX_COEFS);
+        }
+    }
+
+    report("ac3_exponent_min");
+}
+
+static void check_ac3_extract_exponents(AC3DSPContext *c) {
+#define MAX_EXPS 3072
+    LOCAL_ALIGNED_16(int32_t, src, [MAX_EXPS]);
+    LOCAL_ALIGNED_16(uint8_t, v1, [MAX_EXPS]);
+    LOCAL_ALIGNED_16(uint8_t, v2, [MAX_EXPS]);
+    int n;
+
+    declare_func(void, uint8_t *, int32_t *, int);
+
+    for (n = 512; n <= MAX_EXPS; n += 256) {
+        if (check_func(c->extract_exponents, "ac3_extract_exponents_n%d", n)) {
+            randomize_i24(src, n);
+
+            call_ref(v1, src, n);
+            call_new(v2, src, n);
+
+            if (memcmp(v1, v2, n) != 0)
+                fail();
+
+            bench_new(v1, src, n);
+        }
+    }
+
+    report("ac3_extract_exponents");
+}
+
 static void check_float_to_fixed24(AC3DSPContext *c) {
 #define BUF_SIZE 1024
     LOCAL_ALIGNED_32(float, src, [BUF_SIZE]);
@@ -61,10 +139,65 @@ static void check_float_to_fixed24(AC3DSPContext *c) {
     report("float_to_fixed24");
 }
 
+static void check_ac3_sum_square_butterfly_int32(AC3DSPContext *c) {
+#define ELEMS 240
+    LOCAL_ALIGNED_16(int32_t, lt, [ELEMS]);
+    LOCAL_ALIGNED_16(int32_t, rt, [ELEMS]);
+    LOCAL_ALIGNED_16(uint64_t, v1, [4]);
+    LOCAL_ALIGNED_16(uint64_t, v2, [4]);
+
+    declare_func(void, int64_t[4], const int32_t *, const int32_t *, int);
+
+    randomize_i24(lt, ELEMS);
+    randomize_i24(rt, ELEMS);
+
+    if (check_func(c->sum_square_butterfly_int32,
+                   "ac3_sum_square_bufferfly_int32")) {
+        call_ref(v1, lt, rt, ELEMS);
+        call_new(v2, lt, rt, ELEMS);
+
+        if (memcmp(v1, v2, sizeof(int64_t[4])) != 0)
+            fail();
+
+        bench_new(v2, lt, rt, ELEMS);
+    }
+
+    report("ac3_sum_square_butterfly_int32");
+}
+
+static void check_ac3_sum_square_butterfly_float(AC3DSPContext *c) {
+    LOCAL_ALIGNED_32(float, lt, [ELEMS]);
+    LOCAL_ALIGNED_32(float, rt, [ELEMS]);
+    LOCAL_ALIGNED_16(float, v1, [4]);
+    LOCAL_ALIGNED_16(float, v2, [4]);
+
+    declare_func(void, float[4], const float *, const float *, int);
+
+    randomize_float(lt, ELEMS);
+    randomize_float(rt, ELEMS);
+
+    if (check_func(c->sum_square_butterfly_float,
+                   "ac3_sum_square_bufferfly_float")) {
+        call_ref(v1, lt, rt, ELEMS);
+        call_new(v2, lt, rt, ELEMS);
+
+        if (!float_near_ulp_array(v1, v2, 10, 4))
+            fail();
+
+        bench_new(v2, lt, rt, ELEMS);
+    }
+
+    report("ac3_sum_square_butterfly_float");
+}
+
 void checkasm_check_ac3dsp(void)
 {
     AC3DSPContext c;
     ff_ac3dsp_init(&c);
 
+    check_ac3_exponent_min(&c);
+    check_ac3_extract_exponents(&c);
     check_float_to_fixed24(&c);
+    check_ac3_sum_square_butterfly_int32(&c);
+    check_ac3_sum_square_butterfly_float(&c);
 }
diff --git a/tests/checkasm/blockdsp.c b/tests/checkasm/blockdsp.c
index 22a2f79455..5f4d46b8fa 100644
--- a/tests/checkasm/blockdsp.c
+++ b/tests/checkasm/blockdsp.c
@@ -52,6 +52,29 @@ do {                                                                \
     }                                                               \
 } while (0)
 
+static void check_fill(BlockDSPContext *h){
+    LOCAL_ALIGNED_16(uint8_t, buf0_16, [16 * 16]);
+    LOCAL_ALIGNED_16(uint8_t, buf1_16, [16 * 16]);
+
+    for (int t = 0; t < 2; ++t) {
+        uint8_t *buf0 = buf0_16 + t * /* force 8 byte alignment */ 8;
+        uint8_t *buf1 = buf1_16 + t * /* force 8 byte alignment */ 8;
+        int n = 16 - 8 * t;
+        declare_func(void, uint8_t *block, uint8_t value,
+                     ptrdiff_t line_size, int h);
+        if (check_func(h->fill_block_tab[t], "blockdsp.fill_block_tab[%d]", t)) {
+            uint8_t value = rnd();
+            memset(buf0, 0, sizeof(*buf0) * n * n);
+            memset(buf1, 0, sizeof(*buf1) * n * n);
+            call_ref(buf0, value, n, n);
+            call_new(buf1, value, n, n);
+            if (memcmp(buf0, buf1, sizeof(*buf0) * n * n))
+                fail();
+            bench_new(buf0, value, n, n);
+        }
+    }
+}
+
 void checkasm_check_blockdsp(void)
 {
     LOCAL_ALIGNED_32(uint16_t, buf0, [6 * 8 * 8]);
@@ -64,5 +87,7 @@ void checkasm_check_blockdsp(void)
     check_clear(clear_block,  8 * 8);
     check_clear(clear_blocks, 8 * 8 * 6);
 
+    check_fill(&h);
+
     report("blockdsp");
 }
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index dcd2fd6957..b1c175b95d 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -72,6 +72,9 @@
 void (*checkasm_checked_call)(void *func, int dummy, ...) = checkasm_checked_call_novfp;
 #endif
 
+/* Trade-off between speed and accuracy */
+uint64_t bench_runs = 1U << 10;
+
 /* List of tests to invoke */
 static const struct {
     const char *name;
@@ -106,6 +109,9 @@ static const struct {
     #if CONFIG_EXR_DECODER
         { "exrdsp", checkasm_check_exrdsp },
     #endif
+    #if CONFIG_FDCTDSP
+        { "fdctdsp", checkasm_check_fdctdsp },
+    #endif
     #if CONFIG_FLAC_DECODER
         { "flacdsp", checkasm_check_flacdsp },
     #endif
@@ -167,6 +173,9 @@ static const struct {
     #if CONFIG_RV34DSP
         { "rv34dsp", checkasm_check_rv34dsp },
     #endif
+    #if CONFIG_RV40_DECODER
+        { "rv40dsp", checkasm_check_rv40dsp },
+    #endif
     #if CONFIG_SVQ1_ENCODER
         { "svq1enc", checkasm_check_svq1enc },
     #endif
@@ -198,7 +207,8 @@ static const struct {
         { "vorbisdsp", checkasm_check_vorbisdsp },
     #endif
     #if CONFIG_VVC_DECODER
-        { "vvc_mc", checkasm_check_vvc_mc },
+        { "vvc_alf", checkasm_check_vvc_alf },
+        { "vvc_mc",  checkasm_check_vvc_mc  },
     #endif
 #endif
 #if CONFIG_AVFILTER
@@ -279,6 +289,8 @@ static const struct {
     { "RVVf32",   "rvv_f32",  AV_CPU_FLAG_RVV_F32 },
     { "RVVi64",   "rvv_i64",  AV_CPU_FLAG_RVV_I64 },
     { "RVVf64",   "rvv_f64",  AV_CPU_FLAG_RVV_F64 },
+    { "RV_Zvbb",  "rv_zvbb",  AV_CPU_FLAG_RV_ZVBB },
+    { "misaligned", "misaligned", AV_CPU_FLAG_RV_MISALIGNED },
 #elif ARCH_MIPS
     { "MMI",      "mmi",      AV_CPU_FLAG_MMI },
     { "MSA",      "msa",      AV_CPU_FLAG_MSA },
@@ -742,6 +754,9 @@ static int bench_init_linux(void)
         .disabled       = 1, // start counting only on demand
         .exclude_kernel = 1,
         .exclude_hv     = 1,
+#if !ARCH_X86
+        .exclude_guest  = 1,
+#endif
     };
 
     printf("benchmarking with Linux Perf Monitoring API\n");
@@ -808,7 +823,7 @@ static void bench_uninit(void)
 static int usage(const char *path)
 {
     fprintf(stderr,
-            "Usage: %s [--bench] [--test=<pattern>] [--verbose] [seed]\n",
+            "Usage: %s [--bench] [--runs=<ptwo>] [--test=<pattern>] [--verbose] [seed]\n",
             path);
     return 1;
 }
@@ -855,6 +870,17 @@ int main(int argc, char *argv[])
             state.test_name = arg + 7;
         } else if (!strcmp(arg, "--verbose") || !strcmp(arg, "-v")) {
             state.verbose = 1;
+        } else if (!strncmp(arg, "--runs=", 7)) {
+            l = strtoul(arg + 7, &end, 10);
+            if (*end == '\0') {
+                if (l > 30) {
+                    fprintf(stderr, "checkasm: error: runs exponent must be within the range 0 <= 30\n");
+                    usage(argv[0]);
+                }
+                bench_runs = 1U << l;
+            } else {
+                return usage(argv[0]);
+            }
         } else if ((l = strtoul(arg, &end, 10)) <= UINT_MAX &&
                    *end == '\0') {
             seed = l;
@@ -866,6 +892,9 @@ int main(int argc, char *argv[])
     fprintf(stderr, "checkasm: using random seed %u\n", seed);
     av_lfg_init(&checkasm_lfg, seed);
 
+    if (state.bench_pattern)
+        fprintf(stderr, "checkasm: bench runs %" PRIu64 " (1 << %i)\n", bench_runs, av_log2(bench_runs));
+
     check_cpu_flag(NULL, 0);
     for (i = 0; cpus[i].flag; i++)
         check_cpu_flag(cpus[i].name, cpus[i].flag);
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index f90920dee7..e05053cbf6 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -85,6 +85,7 @@ void checkasm_check_blockdsp(void);
 void checkasm_check_bswapdsp(void);
 void checkasm_check_colorspace(void);
 void checkasm_check_exrdsp(void);
+void checkasm_check_fdctdsp(void);
 void checkasm_check_fixed_dsp(void);
 void checkasm_check_flacdsp(void);
 void checkasm_check_float_dsp(void);
@@ -112,6 +113,7 @@ void checkasm_check_opusdsp(void);
 void checkasm_check_pixblockdsp(void);
 void checkasm_check_sbrdsp(void);
 void checkasm_check_rv34dsp(void);
+void checkasm_check_rv40dsp(void);
 void checkasm_check_svq1enc(void);
 void checkasm_check_synth_filter(void);
 void checkasm_check_sw_gbrp(void);
@@ -132,6 +134,7 @@ void checkasm_check_vp8dsp(void);
 void checkasm_check_vp9dsp(void);
 void checkasm_check_videodsp(void);
 void checkasm_check_vorbisdsp(void);
+void checkasm_check_vvc_alf(void);
 void checkasm_check_vvc_mc(void);
 
 struct CheckasmPerf;
@@ -164,7 +167,7 @@ extern AVLFG checkasm_lfg;
 
 static av_unused void *func_ref, *func_new;
 
-#define BENCH_RUNS 1000 /* Trade-off between accuracy and speed */
+extern uint64_t bench_runs;
 
 /* Decide whether or not the specified function needs to be tested */
 #define check_func(func, ...) (checkasm_save_context(), func_ref = checkasm_check_func((func_new = func), __VA_ARGS__))
@@ -333,10 +336,11 @@ typedef struct CheckasmPerf {
             av_unused const int sysfd = perf->sysfd;\
             func_type *tfunc = func_new;\
             uint64_t tsum = 0;\
-            int ti, tcount = 0;\
+            uint64_t ti, tcount = 0;\
             uint64_t t = 0; \
+            const uint64_t truns = bench_runs;\
             checkasm_set_signal_handler_state(1);\
-            for (ti = 0; ti < BENCH_RUNS; ti++) {\
+            for (ti = 0; ti < truns; ti++) {\
                 PERF_START(t);\
                 tfunc(__VA_ARGS__);\
                 tfunc(__VA_ARGS__);\
diff --git a/tests/checkasm/fdctdsp.c b/tests/checkasm/fdctdsp.c
new file mode 100644
index 0000000000..c640a00656
--- /dev/null
+++ b/tests/checkasm/fdctdsp.c
@@ -0,0 +1,71 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+
+#include "checkasm.h"
+
+#include "libavcodec/avcodec.h"
+#include "libavcodec/fdctdsp.h"
+
+#include "libavutil/common.h"
+#include "libavutil/internal.h"
+#include "libavutil/mem_internal.h"
+
+static int int16_cmp_off_by_n(const int16_t *ref, const int16_t *test, size_t n, int accuracy)
+{
+    for (size_t i = 0; i < n; i++) {
+        if (abs(ref[i] - test[i]) > accuracy)
+            return 1;
+    }
+    return 0;
+}
+
+static void check_fdct(void)
+{
+    LOCAL_ALIGNED_16(int16_t, block0, [64]);
+    LOCAL_ALIGNED_16(int16_t, block1, [64]);
+
+    AVCodecContext avctx = {
+        .bits_per_raw_sample = 8,
+        .dct_algo = FF_DCT_AUTO,
+    };
+    FDCTDSPContext h;
+
+    ff_fdctdsp_init(&h, &avctx);
+
+    if (check_func(h.fdct, "fdct")) {
+        declare_func(void, int16_t *);
+        for (int i = 0; i < 64; i++) {
+            uint8_t r = rnd();
+            block0[i] = r;
+            block1[i] = r;
+        }
+        call_ref(block0);
+        call_new(block1);
+        if (int16_cmp_off_by_n(block0, block1, 64, 2))
+            fail();
+        bench_new(block1);
+    }
+}
+
+void checkasm_check_fdctdsp(void)
+{
+    check_fdct();
+    report("fdctdsp");
+}
diff --git a/tests/checkasm/flacdsp.c b/tests/checkasm/flacdsp.c
index b308237db1..96b7d05f7e 100644
--- a/tests/checkasm/flacdsp.c
+++ b/tests/checkasm/flacdsp.c
@@ -21,6 +21,7 @@
 #include <string.h>
 #include "checkasm.h"
 #include "libavcodec/flacdsp.h"
+#include "libavcodec/mathops.h"
 #include "libavutil/common.h"
 #include "libavutil/internal.h"
 #include "libavutil/intreadwrite.h"
@@ -54,26 +55,72 @@ static void check_decorrelate(uint8_t **ref_dst, uint8_t **ref_src, uint8_t **ne
     bench_new(new_dst, (int32_t **)new_src, channels, BUF_SIZE / sizeof(int32_t), 8);
 }
 
-static void check_lpc(int pred_order)
+static void check_lpc(int pred_order, int bps)
 {
     int qlevel = rnd() % 16;
+    int coeff_prec = (rnd() % 15) + 1;
     LOCAL_ALIGNED_16(int32_t, coeffs, [32]);
+    LOCAL_ALIGNED_16(int32_t, dst,  [BUF_SIZE]);
     LOCAL_ALIGNED_16(int32_t, dst0, [BUF_SIZE]);
     LOCAL_ALIGNED_16(int32_t, dst1, [BUF_SIZE]);
 
     declare_func(void, int32_t *, const int[32], int, int, int);
 
+    if (bps <= 16)
+        coeff_prec = av_clip(coeff_prec, 0, 32 - bps - av_log2(pred_order));
+
     for (int i = 0; i < 32; i++)
-        coeffs[i] = rnd();
+        coeffs[i] = sign_extend(rnd(), coeff_prec);
     for (int i = 0; i < BUF_SIZE; i++)
-        dst0[i] = rnd();
+        dst[i] = sign_extend(rnd(), bps);
 
-    memcpy(dst1, dst0, BUF_SIZE * sizeof (int32_t));
+    memcpy(dst0, dst, BUF_SIZE * sizeof (int32_t));
+    memcpy(dst1, dst, BUF_SIZE * sizeof (int32_t));
     call_ref(dst0, coeffs, pred_order, qlevel, BUF_SIZE);
     call_new(dst1, coeffs, pred_order, qlevel, BUF_SIZE);
     if (memcmp(dst0, dst1, BUF_SIZE * sizeof (int32_t)) != 0)
        fail();
-    bench_new(dst1, coeffs, pred_order, qlevel, BUF_SIZE);
+    bench_new(dst, coeffs, pred_order, qlevel, BUF_SIZE);
+}
+
+static void check_wasted32(void)
+{
+    int wasted = rnd() % 32;
+    LOCAL_ALIGNED_16(int32_t, dst,  [BUF_SIZE]);
+    LOCAL_ALIGNED_16(int32_t, dst0, [BUF_SIZE]);
+    LOCAL_ALIGNED_16(int32_t, dst1, [BUF_SIZE]);
+
+    declare_func(void, int32_t *, int, int);
+
+    for (int i = 0; i < BUF_SIZE; i++)
+        dst[i] = rnd();
+
+    memcpy(dst0, dst, BUF_SIZE * sizeof (int32_t));
+    memcpy(dst1, dst, BUF_SIZE * sizeof (int32_t));
+    call_ref(dst0, wasted, BUF_SIZE);
+    call_new(dst1, wasted, BUF_SIZE);
+    if (memcmp(dst0, dst1, BUF_SIZE * sizeof (int32_t)) != 0)
+       fail();
+    bench_new(dst, wasted, BUF_SIZE);
+}
+
+static void check_wasted33(void)
+{
+    int wasted = rnd() % 33;
+    LOCAL_ALIGNED_16(int64_t, dst0, [BUF_SIZE]);
+    LOCAL_ALIGNED_16(int64_t, dst1, [BUF_SIZE]);
+    LOCAL_ALIGNED_16(int32_t, residuals, [BUF_SIZE]);
+
+    declare_func(void, int64_t *, const int32_t *, int, int);
+
+    for (int i = 0; i < BUF_SIZE; i++)
+        residuals[i] = rnd();
+
+    call_ref(dst0, residuals, wasted, BUF_SIZE);
+    call_new(dst1, residuals, wasted, BUF_SIZE);
+    if (memcmp(dst0, dst1, BUF_SIZE * sizeof (int64_t)) != 0)
+       fail();
+    bench_new(dst0, residuals, wasted, BUF_SIZE);
 }
 
 void checkasm_check_flacdsp(void)
@@ -114,10 +161,17 @@ void checkasm_check_flacdsp(void)
 
     for (i = 0; i < FF_ARRAY_ELEMS(pred_orders); i++)
         if (check_func(h.lpc16, "flac_lpc_16_%d", pred_orders[i]))
-            check_lpc(pred_orders[i]);
+            check_lpc(pred_orders[i], 16);
     for (i = 0; i < FF_ARRAY_ELEMS(pred_orders); i++)
         if (check_func(h.lpc32, "flac_lpc_32_%d", pred_orders[i]))
-            check_lpc(pred_orders[i]);
+            check_lpc(pred_orders[i], 32);
 
     report("lpc");
+
+    if (check_func(h.wasted32, "flac_wasted_32"))
+        check_wasted32();
+    if (check_func(h.wasted33, "flac_wasted_33"))
+        check_wasted33();
+
+    report("wasted");
 }
diff --git a/tests/checkasm/h264chroma.c b/tests/checkasm/h264chroma.c
index 1aa28c2ee1..9579fceab7 100644
--- a/tests/checkasm/h264chroma.c
+++ b/tests/checkasm/h264chroma.c
@@ -53,23 +53,23 @@ static void check_chroma_mc(void)
         randomize_buffers(bit_depth);
         for (int size = 0; size < 4; size++) {
 
-#define CHECK_CHROMA_MC(name)                                                                         \
-            do {                                                                                      \
-                if (check_func(h.name## _pixels_tab[size], #name "_mc%d_%d", 1 << size, bit_depth)) { \
-                    for (int x = 0; x < 2; x++) {                                                     \
-                        for (int y = 0; y < 2; y++) {                                                 \
-                            memcpy(dst0, src, 16 * 18 * SIZEOF_PIXEL);                                \
-                            memcpy(dst1, src, 16 * 18 * SIZEOF_PIXEL);                                \
-                            call_ref(dst0, src, 16 * SIZEOF_PIXEL, 16, x, y);                         \
-                            call_new(dst1, src, 16 * SIZEOF_PIXEL, 16, x, y);                         \
-                            if (memcmp(dst0, dst1, 16 * 16 * SIZEOF_PIXEL)) {                         \
-                                fprintf(stderr, #name ": x:%i, y:%i\n", x, y);                        \
-                                fail();                                                               \
-                            }                                                                         \
-                            bench_new(dst1, src, 16 * SIZEOF_PIXEL, 16, x, y);                        \
-                        }                                                                             \
-                    }                                                                                 \
-                }                                                                                     \
+#define CHECK_CHROMA_MC(name)                                                                             \
+            do {                                                                                          \
+                if (check_func(h.name## _pixels_tab[size], #name "_mc%d_%d", 1 << (3-size), bit_depth)) { \
+                    for (int x = 0; x < 2; x++) {                                                         \
+                        for (int y = 0; y < 2; y++) {                                                     \
+                            memcpy(dst0, src, 16 * 18 * SIZEOF_PIXEL);                                    \
+                            memcpy(dst1, src, 16 * 18 * SIZEOF_PIXEL);                                    \
+                            call_ref(dst0, src, 16 * SIZEOF_PIXEL, 16, x, y);                             \
+                            call_new(dst1, src, 16 * SIZEOF_PIXEL, 16, x, y);                             \
+                            if (memcmp(dst0, dst1, 16 * 16 * SIZEOF_PIXEL)) {                             \
+                                fprintf(stderr, #name ": x:%i, y:%i\n", x, y);                            \
+                                fail();                                                                   \
+                            }                                                                             \
+                            bench_new(dst1, src, 16 * SIZEOF_PIXEL, 16, x, y);                            \
+                        }                                                                                 \
+                    }                                                                                     \
+                }                                                                                         \
             } while (0)
 
             CHECK_CHROMA_MC(put_h264_chroma);
diff --git a/tests/checkasm/h264dsp.c b/tests/checkasm/h264dsp.c
index 0f484e3f43..67b8dce53c 100644
--- a/tests/checkasm/h264dsp.c
+++ b/tests/checkasm/h264dsp.c
@@ -27,17 +27,17 @@
 #include "libavutil/intreadwrite.h"
 #include "libavutil/mem_internal.h"
 
-static const uint32_t pixel_mask[3] = { 0xffffffff, 0x01ff01ff, 0x03ff03ff };
+static const uint32_t pixel_mask[5] = { 0xffffffff, 0x01ff01ff, 0x03ff03ff, 0x0fff0fff, 0x3fff3fff };
 static const uint32_t pixel_mask_lf[3] = { 0xff0fff0f, 0x01ff000f, 0x03ff000f };
 
 #define SIZEOF_PIXEL ((bit_depth + 7) / 8)
 #define SIZEOF_COEF  (2 * ((bit_depth + 7) / 8))
 #define PIXEL_STRIDE 16
 
-#define randomize_buffers()                                                  \
+#define randomize_buffers(idx)                                               \
     do {                                                                     \
         int x, y;                                                            \
-        uint32_t mask = pixel_mask[bit_depth - 8];                           \
+        uint32_t mask = pixel_mask[(idx)];                                   \
         for (y = 0; y < sz; y++) {                                           \
             for (x = 0; x < PIXEL_STRIDE; x += 4) {                          \
                 AV_WN32A(src + y * PIXEL_STRIDE + x, rnd() & mask);          \
@@ -83,7 +83,7 @@ static void dct4x4_##size(dctcoef *coef)                                     \
     }                                                                        \
     for (y = 0; y < 4; y++) {                                                \
         for (x = 0; x < 4; x++) {                                            \
-            static const int scale[] = { 13107 * 10, 8066 * 13, 5243 * 16 }; \
+            const int64_t scale[] = { 13107 * 10, 8066 * 13, 5243 * 16 };    \
             const int idx = (y & 1) + (x & 1);                               \
             coef[y*4 + x] = (coef[y*4 + x] * scale[idx] + (1 << 14)) >> 15;  \
         }                                                                    \
@@ -173,6 +173,7 @@ static void dct8x8(int16_t *coef, int bit_depth)
 
 static void check_idct(void)
 {
+    static const int depths[5] = { 8, 9, 10, 12, 14 };
     LOCAL_ALIGNED_16(uint8_t, src,  [8 * 8 * 2]);
     LOCAL_ALIGNED_16(uint8_t, dst,  [8 * 8 * 2]);
     LOCAL_ALIGNED_16(uint8_t, dst0, [8 * 8 * 2]);
@@ -181,13 +182,14 @@ static void check_idct(void)
     LOCAL_ALIGNED_16(int16_t, subcoef0, [8 * 8 * 2]);
     LOCAL_ALIGNED_16(int16_t, subcoef1, [8 * 8 * 2]);
     H264DSPContext h;
-    int bit_depth, sz, align, dc;
+    int bit_depth, sz, align, dc, i;
     declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, int16_t *block, int stride);
 
-    for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
+    for (i = 0; i < FF_ARRAY_ELEMS(depths); i++) {
+        bit_depth = depths[i];
         ff_h264dsp_init(&h, bit_depth, 1);
         for (sz = 4; sz <= 8; sz += 4) {
-            randomize_buffers();
+            randomize_buffers(i);
 
             if (sz == 4)
                 dct4x4(coef, bit_depth);
@@ -275,7 +277,7 @@ static void check_idct_multiple(void)
                 int offset = (block_y * 16 + block_x) * SIZEOF_PIXEL;
                 int nnz = rnd() % 3;
 
-                randomize_buffers();
+                randomize_buffers(bit_depth - 8);
                 if (sz == 4)
                     dct4x4(coef, bit_depth);
                 else
@@ -360,7 +362,7 @@ static void check_loop_filter(void)
                                 tc0[j][0], tc0[j][1], tc0[j][2], tc0[j][3]); \
                         fail();                                         \
                     }                                                   \
-                    bench_new(dst1, 32, alphas[j], betas[j], tc0[j]);   \
+                    bench_new(dst1 + off, 32, alphas[j], betas[j], tc0[j]);\
                 }                                                       \
             }                                                           \
         } while (0)
@@ -419,7 +421,7 @@ static void check_loop_filter_intra(void)
                                 j, alphas[j], betas[j]);                \
                         fail();                                         \
                     }                                                   \
-                    bench_new(dst1, 32, alphas[j], betas[j]);           \
+                    bench_new(dst1 + off, 32, alphas[j], betas[j]);     \
                 }                                                       \
             }                                                           \
         } while (0)
diff --git a/tests/checkasm/hevc_add_res.c b/tests/checkasm/hevc_add_res.c
index f35e9fccd9..9dec3705c1 100644
--- a/tests/checkasm/hevc_add_res.c
+++ b/tests/checkasm/hevc_add_res.c
@@ -50,7 +50,7 @@ static void compare_add_res(int size, ptrdiff_t stride, int overflow_test, int m
     LOCAL_ALIGNED_32(uint8_t, dst0, [32 * 32 * 2]);
     LOCAL_ALIGNED_32(uint8_t, dst1, [32 * 32 * 2]);
 
-    declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, int16_t *res, ptrdiff_t stride);
+    declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, const int16_t *res, ptrdiff_t stride);
 
     randomize_buffers(res0, size);
     randomize_buffers2(dst0, size, mask);
diff --git a/tests/checkasm/hevc_deblock.c b/tests/checkasm/hevc_deblock.c
index 04cf9d87ac..c7f4f7e9ab 100644
--- a/tests/checkasm/hevc_deblock.c
+++ b/tests/checkasm/hevc_deblock.c
@@ -57,7 +57,8 @@ static void check_deblock_chroma(HEVCDSPContext *h, int bit_depth, int c)
     LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
     LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);
 
-    declare_func(void, uint8_t *pix, ptrdiff_t stride, int32_t *tc, uint8_t *no_p, uint8_t *no_q);
+    declare_func(void, uint8_t *pix, ptrdiff_t stride,
+                 const int32_t *tc, const uint8_t *no_p, const uint8_t *no_q);
 
     if (check_func(c ? h->hevc_h_loop_filter_chroma_c : h->hevc_h_loop_filter_chroma,
                          "hevc_h_loop_filter_chroma%d%s", bit_depth, c ? "_full" : ""))
@@ -226,7 +227,8 @@ static void check_deblock_luma(HEVCDSPContext *h, int bit_depth, int c)
     uint8_t *ptr0 = buf0 + BUF_OFFSET,
             *ptr1 = buf1 + BUF_OFFSET;
 
-    declare_func(void, uint8_t *pix, ptrdiff_t stride, int beta, int32_t *tc, uint8_t *no_p, uint8_t *no_q);
+    declare_func(void, uint8_t *pix, ptrdiff_t stride, int beta,
+                 const int32_t *tc, const uint8_t *no_p, const uint8_t *no_q);
     memset(buf0, 0, BUF_SIZE);
 
     for (int j = 0; j < 3; j++) {
diff --git a/tests/checkasm/hevc_pel.c b/tests/checkasm/hevc_pel.c
index ed22ec4f9d..aebdf104e6 100644
--- a/tests/checkasm/hevc_pel.c
+++ b/tests/checkasm/hevc_pel.c
@@ -88,7 +88,7 @@ static void checkasm_check_hevc_qpel(void)
 
     HEVCDSPContext h;
     int size, bit_depth, i, j;
-    declare_func(void, int16_t *dst, uint8_t *src, ptrdiff_t srcstride,
+    declare_func(void, int16_t *dst, const uint8_t *src, ptrdiff_t srcstride,
                  int height, intptr_t mx, intptr_t my, int width);
 
     for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
@@ -132,7 +132,7 @@ static void checkasm_check_hevc_qpel_uni(void)
 
     HEVCDSPContext h;
     int size, bit_depth, i, j;
-    declare_func(void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
+    declare_func(void, uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, ptrdiff_t srcstride,
                  int height, intptr_t mx, intptr_t my, int width);
 
     for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
@@ -182,7 +182,7 @@ static void checkasm_check_hevc_qpel_uni_w(void)
     HEVCDSPContext h;
     int size, bit_depth, i, j;
     const int *denom, *wx, *ox;
-    declare_func(void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
+    declare_func(void, uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, ptrdiff_t srcstride,
                  int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width);
 
     for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
@@ -239,8 +239,8 @@ static void checkasm_check_hevc_qpel_bi(void)
 
     HEVCDSPContext h;
     int size, bit_depth, i, j;
-    declare_func(void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
-                 int16_t *src2,
+    declare_func(void, uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, ptrdiff_t srcstride,
+                 const int16_t *src2,
                  int height, intptr_t mx, intptr_t my, int width);
 
     for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
@@ -292,8 +292,8 @@ static void checkasm_check_hevc_qpel_bi_w(void)
     HEVCDSPContext h;
     int size, bit_depth, i, j;
     const int *denom, *wx, *ox;
-    declare_func(void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
-                 int16_t *src2,
+    declare_func(void, uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, ptrdiff_t srcstride,
+                 const int16_t *src2,
                  int height, int denom, int wx0, int wx1,
                  int ox0, int ox1, intptr_t mx, intptr_t my, int width);
 
@@ -352,7 +352,7 @@ static void checkasm_check_hevc_epel(void)
 
     HEVCDSPContext h;
     int size, bit_depth, i, j;
-    declare_func(void, int16_t *dst, uint8_t *src, ptrdiff_t srcstride,
+    declare_func(void, int16_t *dst, const uint8_t *src, ptrdiff_t srcstride,
                  int height, intptr_t mx, intptr_t my, int width);
 
     for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
@@ -396,7 +396,7 @@ static void checkasm_check_hevc_epel_uni(void)
 
     HEVCDSPContext h;
     int size, bit_depth, i, j;
-    declare_func(void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
+    declare_func(void, uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, ptrdiff_t srcstride,
                  int height, intptr_t mx, intptr_t my, int width);
 
     for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
@@ -446,7 +446,7 @@ static void checkasm_check_hevc_epel_uni_w(void)
     HEVCDSPContext h;
     int size, bit_depth, i, j;
     const int *denom, *wx, *ox;
-    declare_func(void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
+    declare_func(void, uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, ptrdiff_t srcstride,
                  int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width);
 
     for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
@@ -503,8 +503,8 @@ static void checkasm_check_hevc_epel_bi(void)
 
     HEVCDSPContext h;
     int size, bit_depth, i, j;
-    declare_func(void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
-                 int16_t *src2,
+    declare_func(void, uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, ptrdiff_t srcstride,
+                 const int16_t *src2,
                  int height, intptr_t mx, intptr_t my, int width);
 
     for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
@@ -556,8 +556,8 @@ static void checkasm_check_hevc_epel_bi_w(void)
     HEVCDSPContext h;
     int size, bit_depth, i, j;
     const int *denom, *wx, *ox;
-    declare_func(void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
-                 int16_t *src2,
+    declare_func(void, uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, ptrdiff_t srcstride,
+                 const int16_t *src2,
                  int height, int denom, int wx0, int wx1,
                  int ox0, int ox1, intptr_t mx, intptr_t my, int width);
 
diff --git a/tests/checkasm/hevc_sao.c b/tests/checkasm/hevc_sao.c
index d05af9ac72..21c22b9749 100644
--- a/tests/checkasm/hevc_sao.c
+++ b/tests/checkasm/hevc_sao.c
@@ -78,8 +78,8 @@ static void check_sao_band(HEVCDSPContext *h, int bit_depth)
         int block_size = sao_size[i];
         int prev_size = i > 0 ? sao_size[i - 1] : 0;
         ptrdiff_t stride = PIXEL_STRIDE*SIZEOF_PIXEL;
-        declare_func(void, uint8_t *dst, uint8_t *src, ptrdiff_t dst_stride, ptrdiff_t src_stride,
-                     int16_t *sao_offset_val, int sao_left_class, int width, int height);
+        declare_func(void, uint8_t *dst, const uint8_t *src, ptrdiff_t dst_stride, ptrdiff_t src_stride,
+                     const int16_t *sao_offset_val, int sao_left_class, int width, int height);
 
         if (check_func(h->sao_band_filter[i], "hevc_sao_band_%d_%d", block_size, bit_depth)) {
 
@@ -116,8 +116,8 @@ static void check_sao_edge(HEVCDSPContext *h, int bit_depth)
         int prev_size = i > 0 ? sao_size[i - 1] : 0;
         ptrdiff_t stride = PIXEL_STRIDE*SIZEOF_PIXEL;
         int offset = (AV_INPUT_BUFFER_PADDING_SIZE + PIXEL_STRIDE)*SIZEOF_PIXEL;
-        declare_func(void, uint8_t *dst, uint8_t *src, ptrdiff_t stride_dst,
-                     int16_t *sao_offset_val, int eo, int width, int height);
+        declare_func(void, uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst,
+                     const int16_t *sao_offset_val, int eo, int width, int height);
 
         for (int w = prev_size + 4; w <= block_size; w += 4) {
             randomize_buffers(src0, src1, BUF_SIZE);
diff --git a/tests/checkasm/huffyuvdsp.c b/tests/checkasm/huffyuvdsp.c
index a08f5a8391..7491a8f14c 100644
--- a/tests/checkasm/huffyuvdsp.c
+++ b/tests/checkasm/huffyuvdsp.c
@@ -42,7 +42,7 @@ static void check_add_int16(HuffYUVDSPContext *c, unsigned mask, int width, cons
     uint16_t *dst0 = av_mallocz(width * sizeof(uint16_t));
     uint16_t *dst1 = av_mallocz(width * sizeof(uint16_t));
 
-    declare_func(void, uint16_t *dst, uint16_t *src, unsigned mask, int w);
+    declare_func(void, uint16_t *dst, const uint16_t *src, unsigned mask, int w);
 
     if (!src0 || !src1 || !dst0 || !dst1)
         fail();
diff --git a/tests/checkasm/llauddsp.c b/tests/checkasm/llauddsp.c
index e6eab589b2..969166baca 100644
--- a/tests/checkasm/llauddsp.c
+++ b/tests/checkasm/llauddsp.c
@@ -49,7 +49,7 @@ static void check_scalarproduct_and_madd_int16(LLAudDSPContext *c)
     randomize_buf(v1, BUF_SIZE);
     randomize_buf(v2, BUF_SIZE);
     randomize_buf(v3, BUF_SIZE);
-    mul = rnd();
+    mul = (int16_t)rnd();
 
     if (check_func(c->scalarproduct_and_madd_int16,
                    "scalarproduct_and_madd_int16")) {
@@ -84,7 +84,7 @@ static void check_scalarproduct_and_madd_int32(LLAudDSPContext *c)
     randomize_buf(v1, BUF_SIZE);
     randomize_buf(v2, BUF_SIZE);
     randomize_buf(v3, BUF_SIZE);
-    mul = rnd();
+    mul = (int16_t)rnd();
 
     if (check_func(c->scalarproduct_and_madd_int32,
                    "scalarproduct_and_madd_int32")) {
diff --git a/tests/checkasm/llviddsp.c b/tests/checkasm/llviddsp.c
index 00ad21a7cc..b75c0ea099 100644
--- a/tests/checkasm/llviddsp.c
+++ b/tests/checkasm/llviddsp.c
@@ -114,7 +114,7 @@ static void check_add_left_pred(LLVidDSPContext *c, int width, int acc, const ch
     uint8_t *dst1 = av_mallocz(width);
     uint8_t *src0 = av_calloc(width, sizeof(*src0));
     uint8_t *src1 = av_calloc(width, sizeof(*src1));
-    declare_func(int, uint8_t *dst, uint8_t *src, ptrdiff_t w, int acc);
+    declare_func(int, uint8_t *dst, const uint8_t *src, ptrdiff_t w, int acc);
 
     init_buffer(src0, src1, uint8_t, width);
 
@@ -143,7 +143,7 @@ static void check_add_left_pred_16(LLVidDSPContext *c, unsigned mask, int width,
     uint16_t *dst1 = av_calloc(width, sizeof(*dst1));
     uint16_t *src0 = av_calloc(width, sizeof(*src0));
     uint16_t *src1 = av_calloc(width, sizeof(*src1));
-    declare_func(int, uint16_t *dst, uint16_t *src, unsigned mask, ptrdiff_t w, unsigned acc);
+    declare_func(int, uint16_t *dst, const uint16_t *src, unsigned mask, ptrdiff_t w, unsigned acc);
 
     init_buffer(src0, src1, uint16_t, width);
 
diff --git a/tests/checkasm/motion.c b/tests/checkasm/motion.c
index 71a0ee3fc0..e7a36bbbda 100644
--- a/tests/checkasm/motion.c
+++ b/tests/checkasm/motion.c
@@ -52,8 +52,8 @@ static void test_motion(const char *name, me_cmp_func test_func)
     LOCAL_ALIGNED_16(uint8_t, img2, [WIDTH * HEIGHT]);
 
     declare_func_emms(AV_CPU_FLAG_MMX, int, struct MpegEncContext *c,
-                      uint8_t *blk1 /* align width (8 or 16) */,
-                      uint8_t *blk2 /* align 1 */, ptrdiff_t stride,
+                      const uint8_t *blk1 /* align width (8 or 16) */,
+                      const uint8_t *blk2 /* align 1 */, ptrdiff_t stride,
                       int h);
 
     if (test_func == NULL) {
diff --git a/tests/checkasm/opusdsp.c b/tests/checkasm/opusdsp.c
index 34ee5a4edc..b9ecc55240 100644
--- a/tests/checkasm/opusdsp.c
+++ b/tests/checkasm/opusdsp.c
@@ -19,6 +19,7 @@
 #include "libavutil/mem_internal.h"
 
 #include "libavcodec/opusdsp.h"
+#include "libavcodec/opustab.h"
 
 #include "checkasm.h"
 
@@ -69,17 +70,17 @@ static void test_deemphasis(void)
     LOCAL_ALIGNED(16, float, dst1, [FFALIGN(MAX_SIZE, 4)]);
     float coeff0 = (float)rnd() / (UINT_MAX >> 5) - 16.0f, coeff1 = coeff0;
 
-    declare_func_float(float, float *out, float *in, float coeff, int len);
+    declare_func_float(float, float *out, float *in, float coeff, const float *weights, int len);
 
     randomize_float(src, MAX_SIZE);
 
-    coeff0 = call_ref(dst0, src, coeff0, MAX_SIZE);
-    coeff1 = call_new(dst1, src, coeff1, MAX_SIZE);
+    coeff0 = call_ref(dst0, src, coeff0, ff_opus_deemph_weights, MAX_SIZE);
+    coeff1 = call_new(dst1, src, coeff1, ff_opus_deemph_weights, MAX_SIZE);
 
     if (!float_near_abs_eps(coeff0, coeff1, EPS) ||
         !float_near_abs_eps_array(dst0, dst1, EPS, MAX_SIZE))
         fail();
-    bench_new(dst1, src, coeff1, MAX_SIZE);
+    bench_new(dst1, src, coeff1, ff_opus_deemph_weights, MAX_SIZE);
 }
 
 void checkasm_check_opusdsp(void)
diff --git a/tests/checkasm/rv40dsp.c b/tests/checkasm/rv40dsp.c
new file mode 100644
index 0000000000..a1a873d430
--- /dev/null
+++ b/tests/checkasm/rv40dsp.c
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+#include <stdint.h>
+#include "checkasm.h"
+#include "libavcodec/rv40dsp.c"
+#include "libavutil/mem_internal.h"
+
+#define randomize_buffers()                  \
+    do {                                     \
+        for (int i = 0; i < 16*18*2; i++)    \
+            src[i] = rnd() & 0x3;            \
+    } while (0)
+
+static void check_chroma_mc(void)
+{
+    RV34DSPContext h;
+    LOCAL_ALIGNED_32(uint8_t, src,  [16 * 18 * 2]);
+    LOCAL_ALIGNED_32(uint8_t, dst0, [16 * 18 * 2]);
+    LOCAL_ALIGNED_32(uint8_t, dst1, [16 * 18 * 2]);
+
+    declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, const uint8_t *src,
+                      ptrdiff_t stride, int h, int x, int y);
+
+    ff_rv40dsp_init(&h);
+    randomize_buffers();
+    for (int size = 0; size < 2; size++) {
+
+#define CHECK_CHROMA_MC(name)                                                                     \
+        do {                                                                                      \
+            if (check_func(h.name## _pixels_tab[size], #name "_mc%d", 1 << (3 - size))) {         \
+                for (int x = 0; x < 2; x++) {                                                     \
+                    for (int y = 0; y < 2; y++) {                                                 \
+                        memcpy(dst0, src, 16 * 18);                                               \
+                        memcpy(dst1, src, 16 * 18);                                               \
+                        call_ref(dst0, src, 16, 16, x, y);                                        \
+                        call_new(dst1, src, 16, 16, x, y);                                        \
+                        if (memcmp(dst0, dst1, 16 * 16)) {                                        \
+                            fprintf(stderr, #name ": x:%i, y:%i\n", x, y);                        \
+                            fail();                                                               \
+                        }                                                                         \
+                        bench_new(dst1, src, 16, 16, x, y);                                       \
+                    }                                                                             \
+                }                                                                                 \
+            }                                                                                     \
+        } while (0)
+
+        CHECK_CHROMA_MC(put_chroma);
+        CHECK_CHROMA_MC(avg_chroma);
+    }
+}
+
+void checkasm_check_rv40dsp(void)
+{
+    check_chroma_mc();
+    report("chroma_mc");
+}
diff --git a/tests/checkasm/svq1enc.c b/tests/checkasm/svq1enc.c
index 1a6f531141..f9abdcbff8 100644
--- a/tests/checkasm/svq1enc.c
+++ b/tests/checkasm/svq1enc.c
@@ -18,7 +18,6 @@
  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
-#include "libavutil/mem.h"
 #include "libavutil/mem_internal.h"
 
 #include "libavcodec/svq1encdsp.h"
@@ -26,11 +25,13 @@
 #include "checkasm.h"
 
 #define BUF_SIZE 1024
+#define MIN_VAL (-255 - 5 * 127)
+#define MAX_VAL ( 255 + 5 * 128)
 
 #define randomize(buf, len) \
     do { \
         for (int i = 0; i < len; i++) \
-            buf[i] = ((rnd() % 65281) - 32641); \
+            buf[i] = ((rnd() % (MAX_VAL - MIN_VAL + 1)) + MIN_VAL); \
     } while (0)
 
 static void test_ssd_int8_vs_int16(SVQ1EncDSPContext *s) {
diff --git a/tests/checkasm/sw_gbrp.c b/tests/checkasm/sw_gbrp.c
index 848e5366ad..b845da32a6 100644
--- a/tests/checkasm/sw_gbrp.c
+++ b/tests/checkasm/sw_gbrp.c
@@ -76,7 +76,7 @@ static void check_output_yuv2gbrp(void)
     uint8_t *dst0[4];
     uint8_t *dst1[4];
 
-    declare_func(void, void *c, const int16_t *lumFilter,
+    declare_func(void, struct SwsContext *c, const int16_t *lumFilter,
                        const int16_t **lumSrcx, int lumFilterSize,
                        const int16_t *chrFilter, const int16_t **chrUSrcx,
                        const int16_t **chrVSrcx, int chrFilterSize,
@@ -202,10 +202,11 @@ static void check_input_planar_rgb_to_y(void)
 #define LARGEST_INPUT_SIZE 512
 #define INPUT_SIZES 6
     static const int input_sizes[] = {8, 24, 128, 144, 256, 512};
-    uint8_t *src[4];
+    const uint8_t *src[4];
     int32_t rgb2yuv[9] = {0};
 
-    declare_func(void, uint8_t *dst, uint8_t *src[4], int w, int32_t *rgb2yuv);
+    declare_func(void, uint8_t *dst, const uint8_t *src[4],
+                       int w, int32_t *rgb2yuv, void *opaque);
 
     LOCAL_ALIGNED_8(int32_t, src_r, [LARGEST_INPUT_SIZE]);
     LOCAL_ALIGNED_8(int32_t, src_g, [LARGEST_INPUT_SIZE]);
@@ -243,13 +244,13 @@ static void check_input_planar_rgb_to_y(void)
                 memset(dst0_y, 0xFF, LARGEST_INPUT_SIZE * sizeof(int32_t));
                 memset(dst1_y, 0xFF, LARGEST_INPUT_SIZE * sizeof(int32_t));
 
-                call_ref(dst0_y, src, dstW, rgb2yuv);
-                call_new(dst1_y, src, dstW, rgb2yuv);
+                call_ref(dst0_y, src, dstW, rgb2yuv, NULL);
+                call_new(dst1_y, src, dstW, rgb2yuv, NULL);
 
                 if (memcmp(dst0_y, dst1_y, dstW * byte_size))
                     fail();
 
-                bench_new(dst1_y, src, dstW, rgb2yuv);
+                bench_new(dst1_y, src, dstW, rgb2yuv, NULL);
 
             }
         }
@@ -269,11 +270,11 @@ static void check_input_planar_rgb_to_uv(void)
 #define LARGEST_INPUT_SIZE 512
 #define INPUT_SIZES 6
     static const int input_sizes[] = {8, 24, 128, 144, 256, 512};
-    uint8_t *src[4];
+    const uint8_t *src[4];
     int32_t rgb2yuv[9] = {0};
 
     declare_func(void, uint8_t *dstU, uint8_t *dstV,
-                       uint8_t *src[4], int w, int32_t *rgb2yuv);
+                       const uint8_t *src[4], int w, int32_t *rgb2yuv, void *opaque);
 
     LOCAL_ALIGNED_8(int32_t, src_r, [LARGEST_INPUT_SIZE]);
     LOCAL_ALIGNED_8(int32_t, src_g, [LARGEST_INPUT_SIZE]);
@@ -316,14 +317,14 @@ static void check_input_planar_rgb_to_uv(void)
                 memset(dst1_u, 0xFF, LARGEST_INPUT_SIZE * sizeof(int32_t));
                 memset(dst1_v, 0xFF, LARGEST_INPUT_SIZE * sizeof(int32_t));
 
-                call_ref(dst0_u, dst0_v, src, dstW, rgb2yuv);
-                call_new(dst1_u, dst1_v, src, dstW, rgb2yuv);
+                call_ref(dst0_u, dst0_v, src, dstW, rgb2yuv, NULL);
+                call_new(dst1_u, dst1_v, src, dstW, rgb2yuv, NULL);
 
                 if (memcmp(dst0_u, dst1_u, dstW * byte_size) ||
                     memcmp(dst0_v, dst1_v, dstW * byte_size))
                     fail();
 
-                bench_new(dst1_u, dst1_v, src, dstW, rgb2yuv);
+                bench_new(dst1_u, dst1_v, src, dstW, rgb2yuv, NULL);
             }
         }
     }
@@ -342,10 +343,11 @@ static void check_input_planar_rgb_to_a(void)
 #define LARGEST_INPUT_SIZE 512
 #define INPUT_SIZES 6
     static const int input_sizes[] = {8, 24, 128, 144, 256, 512};
-    uint8_t *src[4];
+    const uint8_t *src[4];
     int32_t rgb2yuv[9] = {0};
 
-    declare_func(void, uint8_t *dst, uint8_t *src[4], int w, int32_t *rgb2yuv);
+    declare_func(void, uint8_t *dst, const uint8_t *src[4],
+                       int w, int32_t *rgb2yuv, void *opaque);
 
     LOCAL_ALIGNED_8(int32_t, src_r, [LARGEST_INPUT_SIZE]);
     LOCAL_ALIGNED_8(int32_t, src_g, [LARGEST_INPUT_SIZE]);
@@ -386,12 +388,12 @@ static void check_input_planar_rgb_to_a(void)
                 memset(dst0_a, 0x00, LARGEST_INPUT_SIZE * sizeof(int32_t));
                 memset(dst1_a, 0x00, LARGEST_INPUT_SIZE * sizeof(int32_t));
 
-                call_ref(dst0_a, src, dstW, rgb2yuv);
-                call_new(dst1_a, src, dstW, rgb2yuv);
+                call_ref(dst0_a, src, dstW, rgb2yuv, NULL);
+                call_new(dst1_a, src, dstW, rgb2yuv, NULL);
 
                 if (memcmp(dst0_a, dst1_a, dstW * byte_size))
                     fail();
-                bench_new(dst1_a, src, dstW, rgb2yuv);
+                bench_new(dst1_a, src, dstW, rgb2yuv, NULL);
             }
         }
     }
diff --git a/tests/checkasm/vc1dsp.c b/tests/checkasm/vc1dsp.c
index 8ad2b51acf..f18f0f8251 100644
--- a/tests/checkasm/vc1dsp.c
+++ b/tests/checkasm/vc1dsp.c
@@ -439,6 +439,40 @@ static void check_unescape(void)
     }
 }
 
+static void check_mspel_pixels(void)
+{
+    LOCAL_ALIGNED_16(uint8_t, src0, [32 * 32]);
+    LOCAL_ALIGNED_16(uint8_t, src1, [32 * 32]);
+    LOCAL_ALIGNED_16(uint8_t, dst0, [32 * 32]);
+    LOCAL_ALIGNED_16(uint8_t, dst1, [32 * 32]);
+
+    VC1DSPContext h;
+
+    const test tests[] = {
+        VC1DSP_SIZED_TEST(put_vc1_mspel_pixels_tab[0][0], 16, 16)
+        VC1DSP_SIZED_TEST(put_vc1_mspel_pixels_tab[1][0], 8, 8)
+        VC1DSP_SIZED_TEST(avg_vc1_mspel_pixels_tab[0][0], 16, 16)
+        VC1DSP_SIZED_TEST(avg_vc1_mspel_pixels_tab[1][0], 8, 8)
+    };
+
+    ff_vc1dsp_init(&h);
+
+    for (size_t t = 0; t < FF_ARRAY_ELEMS(tests); ++t) {
+        void (*func)(uint8_t *, const uint8_t*, ptrdiff_t, int) = *(void **)((intptr_t) &h + tests[t].offset);
+        if (check_func(func, "vc1dsp.%s", tests[t].name)) {
+            declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, const uint8_t*, ptrdiff_t, int);
+            RANDOMIZE_BUFFER8(dst, 32 * 32);
+            RANDOMIZE_BUFFER8(src, 32 * 32);
+            call_ref(dst0, src0, 32, 0);
+            call_new(dst1, src1, 32, 0);
+            if (memcmp(dst0, dst1, 32 * 32)) {
+                fail();
+            }
+            bench_new(dst1, src0, 32, 0);
+        }
+    }
+}
+
 void checkasm_check_vc1dsp(void)
 {
     check_inv_trans_inplace();
@@ -450,4 +484,7 @@ void checkasm_check_vc1dsp(void)
 
     check_unescape();
     report("unescape_buffer");
+
+    check_mspel_pixels();
+    report("mspel_pixels");
 }
diff --git a/tests/checkasm/vf_blend.c b/tests/checkasm/vf_blend.c
index b5a96ee4bc..c387d36814 100644
--- a/tests/checkasm/vf_blend.c
+++ b/tests/checkasm/vf_blend.c
@@ -68,7 +68,7 @@
                      const uint8_t *bottom, ptrdiff_t bottom_linesize,                     \
                      uint8_t *dst, ptrdiff_t dst_linesize,                                 \
                      ptrdiff_t width, ptrdiff_t height,                                    \
-                     struct FilterParams *param, double *values);                          \
+                     struct FilterParams *param, struct SliceParams *sliceparam);          \
         w = WIDTH / depth;                                                                 \
                                                                                            \
         for (i = 0; i < BUF_UNITS - 1; i++) {                                              \
diff --git a/tests/checkasm/vf_bwdif.c b/tests/checkasm/vf_bwdif.c
index fae61b62e4..222e6aa4fa 100644
--- a/tests/checkasm/vf_bwdif.c
+++ b/tests/checkasm/vf_bwdif.c
@@ -40,7 +40,7 @@
         const int stride = WIDTH;                                              \
         const int mask = (1<<depth)-1;                                         \
                                                                                \
-        declare_func(void, void *dst, void *prev, void *cur, void *next,       \
+        declare_func(void, void *dst, const void *prev, const void *cur, const void *next, \
                         int w, int prefs, int mrefs, int prefs2, int mrefs2,   \
                         int prefs3, int mrefs3, int prefs4, int mrefs4,        \
                         int parity, int clip_max);                             \
@@ -181,7 +181,7 @@ void checkasm_check_vf_bwdif(void)
             for (parity = 0; parity != 2; ++parity) {
                 if (check_func(ctx_8.filter_edge, "bwdif8.edge.s%d.p%d", spat, parity)) {
 
-                    declare_func(void, void *dst1, void *prev1, void *cur1, void *next1,
+                    declare_func(void, void *dst1, const void *prev1, const void *cur1, const void *next1,
                                             int w, int prefs, int mrefs, int prefs2, int mrefs2,
                                             int parity, int clip_max, int spat);
 
@@ -225,7 +225,7 @@ void checkasm_check_vf_bwdif(void)
         const int stride = WIDTH;
         const int mask = (1<<8)-1;
 
-        declare_func(void, void *dst1, void *cur1, int w, int prefs, int mrefs,
+        declare_func(void, void *dst1, const void *cur1, int w, int prefs, int mrefs,
                      int prefs3, int mrefs3, int parity, int clip_max);
 
         randomize_buffers( cur0,  cur1, mask, 11*WIDTH);
diff --git a/tests/checkasm/vf_colorspace.c b/tests/checkasm/vf_colorspace.c
index 31efa671f9..1b0d202ce6 100644
--- a/tests/checkasm/vf_colorspace.c
+++ b/tests/checkasm/vf_colorspace.c
@@ -53,8 +53,8 @@ static const unsigned bpp_mask[] = { 0xffffffff, 0x03ff03ff, 0x0fff0fff };
 
 static void check_yuv2yuv(void)
 {
-    declare_func(void, uint8_t *dst[3], ptrdiff_t dst_stride[3],
-                 uint8_t *src[3], ptrdiff_t src_stride[3],
+    declare_func(void, uint8_t *dst[3], const ptrdiff_t dst_stride[3],
+                 uint8_t *src[3], const ptrdiff_t src_stride[3],
                  int w, int h, const int16_t coeff[3][3][8],
                  const int16_t off[2][8]);
     ColorSpaceDSPContext dsp;
@@ -122,7 +122,7 @@ static void check_yuv2yuv(void)
 static void check_yuv2rgb(void)
 {
     declare_func(void, int16_t *dst[3], ptrdiff_t dst_stride,
-                 uint8_t *src[3], ptrdiff_t src_stride[3],
+                 uint8_t *src[3], const ptrdiff_t src_stride[3],
                  int w, int h, const int16_t coeff[3][3][8],
                  const int16_t off[8]);
     ColorSpaceDSPContext dsp;
@@ -198,7 +198,7 @@ static void check_yuv2rgb(void)
 
 static void check_rgb2yuv(void)
 {
-    declare_func(void, uint8_t *dst[3], ptrdiff_t dst_stride[3],
+    declare_func(void, uint8_t *dst[3], const ptrdiff_t dst_stride[3],
                  int16_t *src[3], ptrdiff_t src_stride,
                  int w, int h, const int16_t coeff[3][3][8],
                  const int16_t off[8]);
diff --git a/tests/checkasm/vp8dsp.c b/tests/checkasm/vp8dsp.c
index 4cd0f8ac4f..6e989819ef 100644
--- a/tests/checkasm/vp8dsp.c
+++ b/tests/checkasm/vp8dsp.c
@@ -269,7 +269,8 @@ static void check_mc(void)
     LOCAL_ALIGNED_16(uint8_t, dst1, [16 * 16]);
     VP8DSPContext d;
     int type, k, dx, dy;
-    declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, ptrdiff_t, uint8_t *, ptrdiff_t, int, int, int);
+    declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, ptrdiff_t,
+                      const uint8_t *, ptrdiff_t, int, int, int);
 
     ff_vp78dsp_init(&d);
 
diff --git a/tests/checkasm/vvc_alf.c b/tests/checkasm/vvc_alf.c
new file mode 100644
index 0000000000..6dd89bfafc
--- /dev/null
+++ b/tests/checkasm/vvc_alf.c
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2023-2024 Nuo Mi <nuomi2021@gmail.com>
+ * Copyright (c) 2023-2024 Wu Jianhua <toqsxw@outlook.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+
+#include "checkasm.h"
+#include "libavcodec/vvc/ctu.h"
+#include "libavcodec/vvc/data.h"
+#include "libavcodec/vvc/dsp.h"
+
+#include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/mem_internal.h"
+
+static const uint32_t pixel_mask[3] = { 0xffffffff, 0x03ff03ff, 0x0fff0fff };
+
+#define SIZEOF_PIXEL ((bit_depth + 7) / 8)
+#define SRC_PIXEL_STRIDE (MAX_CTU_SIZE + 2 * ALF_PADDING_SIZE)
+#define DST_PIXEL_STRIDE (SRC_PIXEL_STRIDE + 4)
+#define SRC_BUF_SIZE (SRC_PIXEL_STRIDE * (MAX_CTU_SIZE + 3 * 2) * 2) //+3 * 2 for top and bottom row, *2 for high bit depth
+#define DST_BUF_SIZE (DST_PIXEL_STRIDE * (MAX_CTU_SIZE + 3 * 2) * 2)
+#define LUMA_PARAMS_SIZE (MAX_CTU_SIZE * MAX_CTU_SIZE / ALF_BLOCK_SIZE / ALF_BLOCK_SIZE * ALF_NUM_COEFF_LUMA)
+
+#define randomize_buffers(buf0, buf1, size)                 \
+    do {                                                    \
+        uint32_t mask = pixel_mask[(bit_depth - 8) >> 1];   \
+        int k;                                              \
+        for (k = 0; k < size; k += 4) {                     \
+            uint32_t r = rnd() & mask;                      \
+            AV_WN32A(buf0 + k, r);                          \
+            AV_WN32A(buf1 + k, r);                          \
+        }                                                   \
+    } while (0)
+
+#define randomize_buffers2(buf, size, filter)               \
+    do {                                                    \
+        int k;                                              \
+        if (filter) {                                       \
+            for (k = 0; k < size; k++) {                    \
+                int8_t r = rnd();                           \
+                buf[k] = r;                                 \
+            }                                               \
+        } else {                                            \
+            for (k = 0; k < size; k++) {                    \
+                int r = rnd() % FF_ARRAY_ELEMS(clip_set);   \
+                buf[k] = clip_set[r];                       \
+            }                                               \
+        }                                                   \
+    } while (0)
+
+static void check_alf_filter(VVCDSPContext *c, const int bit_depth)
+{
+    LOCAL_ALIGNED_32(uint8_t, dst0, [DST_BUF_SIZE]);
+    LOCAL_ALIGNED_32(uint8_t, dst1, [DST_BUF_SIZE]);
+    LOCAL_ALIGNED_32(uint8_t, src0, [SRC_BUF_SIZE]);
+    LOCAL_ALIGNED_32(uint8_t, src1, [SRC_BUF_SIZE]);
+    int16_t filter[LUMA_PARAMS_SIZE];
+    int16_t clip[LUMA_PARAMS_SIZE];
+
+    const int16_t clip_set[] = {
+        1 << bit_depth, 1 << (bit_depth - 3), 1 << (bit_depth - 5), 1 << (bit_depth - 7)
+    };
+
+    ptrdiff_t src_stride = SRC_PIXEL_STRIDE * SIZEOF_PIXEL;
+    ptrdiff_t dst_stride = DST_PIXEL_STRIDE * SIZEOF_PIXEL;
+    int offset = (3 * SRC_PIXEL_STRIDE + 3) * SIZEOF_PIXEL;
+
+    declare_func_emms(AV_CPU_FLAG_AVX2, void, uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src, ptrdiff_t src_stride,
+        int width, int height, const int16_t *filter, const int16_t *clip, const int vb_pos);
+
+    randomize_buffers(src0, src1, SRC_BUF_SIZE);
+    randomize_buffers2(filter, LUMA_PARAMS_SIZE, 1);
+    randomize_buffers2(clip, LUMA_PARAMS_SIZE, 0);
+
+    for (int h = 4; h <= MAX_CTU_SIZE; h += 4) {
+        for (int w = 4; w <= MAX_CTU_SIZE; w += 4) {
+            const int ctu_size = MAX_CTU_SIZE;
+            if (check_func(c->alf.filter[LUMA], "vvc_alf_filter_luma_%dx%d_%d", w, h, bit_depth)) {
+                const int vb_pos = ctu_size - ALF_VB_POS_ABOVE_LUMA;
+                memset(dst0, 0, DST_BUF_SIZE);
+                memset(dst1, 0, DST_BUF_SIZE);
+                call_ref(dst0, dst_stride, src0 + offset, src_stride, w, h, filter, clip, vb_pos);
+                call_new(dst1, dst_stride, src1 + offset, src_stride, w, h, filter, clip, vb_pos);
+                for (int i = 0; i < h; i++) {
+                    if (memcmp(dst0 + i * dst_stride, dst1 + i * dst_stride, w * SIZEOF_PIXEL))
+                        fail();
+                }
+                // Bench only square sizes, and ones with dimensions being a power of two.
+                if (w == h && (w & (w - 1)) == 0)
+                    bench_new(dst1, dst_stride, src1 + offset, src_stride, w, h, filter, clip, vb_pos);
+            }
+            if (check_func(c->alf.filter[CHROMA], "vvc_alf_filter_chroma_%dx%d_%d", w, h, bit_depth)) {
+                const int vb_pos = ctu_size - ALF_VB_POS_ABOVE_CHROMA;
+                memset(dst0, 0, DST_BUF_SIZE);
+                memset(dst1, 0, DST_BUF_SIZE);
+                call_ref(dst0, dst_stride, src0 + offset, src_stride, w, h, filter, clip, vb_pos);
+                call_new(dst1, dst_stride, src1 + offset, src_stride, w, h, filter, clip, vb_pos);
+                for (int i = 0; i < h; i++) {
+                    if (memcmp(dst0 + i * dst_stride, dst1 + i * dst_stride, w * SIZEOF_PIXEL))
+                        fail();
+                }
+                if (w == h && (w & (w - 1)) == 0)
+                    bench_new(dst1, dst_stride, src1 + offset, src_stride, w, h, filter, clip, vb_pos);
+            }
+        }
+    }
+}
+
+static void check_alf_classify(VVCDSPContext *c, const int bit_depth)
+{
+    LOCAL_ALIGNED_32(int, class_idx0, [SRC_BUF_SIZE]);
+    LOCAL_ALIGNED_32(int, transpose_idx0, [SRC_BUF_SIZE]);
+    LOCAL_ALIGNED_32(int, class_idx1, [SRC_BUF_SIZE]);
+    LOCAL_ALIGNED_32(int, transpose_idx1, [SRC_BUF_SIZE]);
+    LOCAL_ALIGNED_32(uint8_t, src0, [SRC_BUF_SIZE]);
+    LOCAL_ALIGNED_32(uint8_t, src1, [SRC_BUF_SIZE]);
+    LOCAL_ALIGNED_32(int32_t, alf_gradient_tmp, [ALF_GRADIENT_SIZE * ALF_GRADIENT_SIZE * ALF_NUM_DIR]);
+
+    ptrdiff_t stride = SRC_PIXEL_STRIDE * SIZEOF_PIXEL;
+    int offset = (3 * SRC_PIXEL_STRIDE + 3) * SIZEOF_PIXEL;
+
+    declare_func_emms(AV_CPU_FLAG_AVX2, void, int *class_idx, int *transpose_idx,
+        const uint8_t *src, ptrdiff_t src_stride, int width, int height, int vb_pos, int *gradient_tmp);
+
+    randomize_buffers(src0, src1, SRC_BUF_SIZE);
+
+    for (int h = 4; h <= MAX_CTU_SIZE; h += 4) {
+        for (int w = 4; w <= MAX_CTU_SIZE; w += 4) {
+            const int id_size = w * h / ALF_BLOCK_SIZE / ALF_BLOCK_SIZE * sizeof(int);
+            const int vb_pos  = MAX_CTU_SIZE - ALF_BLOCK_SIZE;
+            if (check_func(c->alf.classify, "vvc_alf_classify_%dx%d_%d", w, h, bit_depth)) {
+                memset(class_idx0, 0, id_size);
+                memset(class_idx1, 0, id_size);
+                memset(transpose_idx0, 0, id_size);
+                memset(transpose_idx1, 0, id_size);
+                call_ref(class_idx0, transpose_idx0, src0 + offset, stride, w, h, vb_pos, alf_gradient_tmp);
+
+                call_new(class_idx1, transpose_idx1, src1 + offset, stride, w, h, vb_pos, alf_gradient_tmp);
+
+                if (memcmp(class_idx0, class_idx1, id_size))
+                    fail();
+                if (memcmp(transpose_idx0, transpose_idx1, id_size))
+                    fail();
+                // Bench only square sizes, and ones with dimensions being a power of two.
+                if (w == h && (w & (w - 1)) == 0)
+                    bench_new(class_idx1, transpose_idx1, src1 + offset, stride, w, h, vb_pos, alf_gradient_tmp);
+            }
+        }
+    }
+}
+
+void checkasm_check_vvc_alf(void)
+{
+    int bit_depth;
+    VVCDSPContext h;
+    for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
+        ff_vvc_dsp_init(&h, bit_depth);
+        check_alf_filter(&h, bit_depth);
+    }
+    report("alf_filter");
+
+    for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
+        ff_vvc_dsp_init(&h, bit_depth);
+        check_alf_classify(&h, bit_depth);
+    }
+    report("alf_classify");
+}
diff --git a/tests/checkasm/vvc_mc.c b/tests/checkasm/vvc_mc.c
index 97f57cb401..a5ec7b7869 100644
--- a/tests/checkasm/vvc_mc.c
+++ b/tests/checkasm/vvc_mc.c
@@ -121,7 +121,8 @@ static void check_put_vvc_luma_uni(void)
 
     VVCDSPContext c;
     declare_func(void, uint8_t *dst, ptrdiff_t dststride,
-        uint8_t *src, ptrdiff_t srcstride,  int height, const int8_t *hf, const int8_t *vf, int width);
+        const uint8_t *src, ptrdiff_t srcstride,  int height,
+        const int8_t *hf, const int8_t *vf, int width);
 
     for (int bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
         ff_vvc_dsp_init(&c, bit_depth);
@@ -133,8 +134,8 @@ static void check_put_vvc_luma_uni(void)
                         const int idx       = av_log2(w) - 1;
                         const int mx        = rnd() % VVC_INTER_LUMA_FACTS;
                         const int my        = rnd() % VVC_INTER_LUMA_FACTS;
-                        const int8_t *hf    = ff_vvc_inter_luma_filters[rnd() % VVC_INTER_FILTER_TYPES][mx];
-                        const int8_t *vf    = ff_vvc_inter_luma_filters[rnd() % VVC_INTER_FILTER_TYPES][my];
+                        const int8_t *hf    = ff_vvc_inter_luma_filters[rnd() % VVC_INTER_LUMA_FILTER_TYPES][mx];
+                        const int8_t *vf    = ff_vvc_inter_luma_filters[rnd() % VVC_INTER_LUMA_FILTER_TYPES][my];
                         const char *type;
 
                         switch ((j << 1) | i) {
@@ -183,8 +184,8 @@ static void check_put_vvc_chroma(void)
                         const int idx       = av_log2(w) - 1;
                         const int mx        = rnd() % VVC_INTER_CHROMA_FACTS;
                         const int my        = rnd() % VVC_INTER_CHROMA_FACTS;
-                        const int8_t *hf    = ff_vvc_inter_chroma_filters[rnd() % VVC_INTER_FILTER_TYPES][mx];
-                        const int8_t *vf    = ff_vvc_inter_chroma_filters[rnd() % VVC_INTER_FILTER_TYPES][my];
+                        const int8_t *hf    = ff_vvc_inter_chroma_filters[rnd() % VVC_INTER_CHROMA_FILTER_TYPES][mx];
+                        const int8_t *vf    = ff_vvc_inter_chroma_filters[rnd() % VVC_INTER_CHROMA_FILTER_TYPES][my];
                         const char *type;
                         switch ((j << 1) | i) {
                             case 0: type = "put_chroma_pixels"; break; // 0 0
@@ -219,7 +220,8 @@ static void check_put_vvc_chroma_uni(void)
 
     VVCDSPContext c;
     declare_func(void, uint8_t *dst, ptrdiff_t dststride,
-        uint8_t *src, ptrdiff_t srcstride,  int height, const int8_t *hf, const int8_t *vf, int width);
+        const uint8_t *src, ptrdiff_t srcstride, int height,
+        const int8_t *hf, const int8_t *vf, int width);
 
     for (int bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
         ff_vvc_dsp_init(&c, bit_depth);
@@ -231,8 +233,8 @@ static void check_put_vvc_chroma_uni(void)
                         const int idx       = av_log2(w) - 1;
                         const int mx        = rnd() % VVC_INTER_CHROMA_FACTS;
                         const int my        = rnd() % VVC_INTER_CHROMA_FACTS;
-                        const int8_t *hf    = ff_vvc_inter_chroma_filters[rnd() % VVC_INTER_FILTER_TYPES][mx];
-                        const int8_t *vf    = ff_vvc_inter_chroma_filters[rnd() % VVC_INTER_FILTER_TYPES][my];
+                        const int8_t *hf    = ff_vvc_inter_chroma_filters[rnd() % VVC_INTER_CHROMA_FILTER_TYPES][mx];
+                        const int8_t *vf    = ff_vvc_inter_chroma_filters[rnd() % VVC_INTER_CHROMA_FILTER_TYPES][my];
                         const char *type;
 
                         switch ((j << 1) | i) {
diff --git a/tests/fate-run.sh b/tests/fate-run.sh
index 9863e4f2d9..6ae0320c60 100755
--- a/tests/fate-run.sh
+++ b/tests/fate-run.sh
@@ -1,4 +1,4 @@
-#! /bin/sh
+#!/bin/sh
 
 export LC_ALL=C
 
diff --git a/tests/fate.sh b/tests/fate.sh
index 07908be3a5..4081e865ae 100755
--- a/tests/fate.sh
+++ b/tests/fate.sh
@@ -1,4 +1,4 @@
-#! /bin/sh
+#!/bin/sh
 
 config=$1
 
@@ -30,14 +30,14 @@ lock(){
 checkout(){
     case "$repo" in
         file:*|/*) src="${repo#file:}"      ;;
-        git:*)     git clone --quiet --branch "$branch" "$repo" "$src" ;;
+        git:*|https:*) git clone --quiet --branch "$branch" "$repo" "$src" ;;
     esac
 }
 
 update()(
     cd ${src} || return
     case "$repo" in
-        git:*) git fetch --quiet --force && git reset --quiet --hard "origin/$branch" ;;
+        git:*|https:*) git fetch --quiet --force && git reset --quiet --hard "origin/$branch" ;;
     esac
 )
 
diff --git a/tests/fate/checkasm.mak b/tests/fate/checkasm.mak
index 3b5b867a97..d021e124d1 100644
--- a/tests/fate/checkasm.mak
+++ b/tests/fate/checkasm.mak
@@ -8,6 +8,7 @@ FATE_CHECKASM = fate-checkasm-aacencdsp                                 \
                 fate-checkasm-blockdsp                                  \
                 fate-checkasm-bswapdsp                                  \
                 fate-checkasm-exrdsp                                    \
+                fate-checkasm-fdctdsp                                   \
                 fate-checkasm-fixed_dsp                                 \
                 fate-checkasm-flacdsp                                   \
                 fate-checkasm-float_dsp                                 \
@@ -34,6 +35,7 @@ FATE_CHECKASM = fate-checkasm-aacencdsp                                 \
                 fate-checkasm-pixblockdsp                               \
                 fate-checkasm-sbrdsp                                    \
                 fate-checkasm-rv34dsp                                   \
+                fate-checkasm-rv40dsp                                   \
                 fate-checkasm-svq1enc                                   \
                 fate-checkasm-synth_filter                              \
                 fate-checkasm-sw_gbrp                                   \
@@ -57,6 +59,7 @@ FATE_CHECKASM = fate-checkasm-aacencdsp                                 \
                 fate-checkasm-vorbisdsp                                 \
                 fate-checkasm-vp8dsp                                    \
                 fate-checkasm-vp9dsp                                    \
+                fate-checkasm-vvc_alf                                   \
                 fate-checkasm-vvc_mc                                    \
 
 $(FATE_CHECKASM): tests/checkasm/checkasm$(EXESUF)
diff --git a/tests/fate/iamf.mak b/tests/fate/iamf.mak
index cf56a67e6f..164fd78bf6 100644
--- a/tests/fate/iamf.mak
+++ b/tests/fate/iamf.mak
@@ -37,8 +37,20 @@ fate-iamf-ambisonic_1: CMD = transcode wav $(SRC) iamf "-auto_conversion_filters
   -streamid 0:0 -streamid 1:1 -streamid 2:2 -streamid 3:3 -map [MONO0] -map [MONO1] -map [MONO2] -map [MONO3] -c:a flac -t 1" "-c:a copy -map 0" \
   "-show_entries stream_group=index,id,nb_streams,type:stream_group_components:stream_group_stream=index,id:stream_group_stream_disposition"
 
+FATE_IAMF_SAMPLES-$(call FRAMECRC, IAMF, OPUS) += fate-iamf-5_1-demux
+fate-iamf-5_1-demux: CMD = stream_demux iamf $(TARGET_SAMPLES)/iamf/test_000059.iamf "" \
+  "-c:a copy -frames:a 0 -map 0" \
+  "-show_entries stream_group=index,id,nb_streams,type:stream_group_components:stream_group_stream=index,id:stream_group_stream_disposition"
+
+FATE_IAMF_SAMPLES-$(call REMUX, IAMF, OPUS_DECODER) += fate-iamf-5_1-copy
+fate-iamf-5_1-copy: CMD = stream_remux iamf $(TARGET_SAMPLES)/iamf/test_000059.iamf "" iamf \
+  "-map 0 -stream_group map=0=0:st=0:st=1:st=2:st=3 -stream_group map=0=1:stg=0 -streamid 0:0 -streamid 1:1 -streamid 2:2 -streamid 3:3" "" "-c:a copy -frames:a 0 -map 0" \
+  "-show_entries stream_group=index,id,nb_streams,type:stream_group_components:stream_group_stream=index,id:stream_group_stream_disposition"
+
 FATE_IAMF += $(FATE_IAMF-yes)
+FATE_IAMF_SAMPLES += $(FATE_IAMF_SAMPLES-yes)
 
 FATE_FFMPEG_FFPROBE += $(FATE_IAMF)
+FATE_SAMPLES_FFMPEG_FFPROBE += $(FATE_IAMF_SAMPLES)
 
-fate-iamf: $(FATE_IAMF)
+fate-iamf: $(FATE_IAMF) $(FATE_IAMF_SAMPLES)
diff --git a/tests/fate/lavf-container.mak b/tests/fate/lavf-container.mak
index d89174d221..d84117c50f 100644
--- a/tests/fate/lavf-container.mak
+++ b/tests/fate/lavf-container.mak
@@ -74,6 +74,7 @@ FATE_LAVF_CONTAINER_FATE-$(call ALLYES, IVF_DEMUXER AV1_DECODER AV1_PARSER MOV_M
 FATE_LAVF_CONTAINER_FATE-$(call ALLYES, IVF_DEMUXER AV1_DECODER AV1_PARSER MATROSKA_MUXER) += av1.mkv
 FATE_LAVF_CONTAINER_FATE-$(call ALLYES, EVC_DEMUXER EVC_PARSER MOV_MUXER)      += evc.mp4
 FATE_LAVF_CONTAINER_FATE-$(call ALLYES, H264_DEMUXER H264_PARSER MOV_MUXER)    += h264.mp4
+FATE_LAVF_CONTAINER_FATE-$(call ALLYES, VVC_DEMUXER VVC_PARSER MOV_MUXER)      += vvc.mp4
 FATE_LAVF_CONTAINER_FATE-$(call ALLYES, MATROSKA_DEMUXER   OGG_MUXER)          += vp3.ogg
 FATE_LAVF_CONTAINER_FATE-$(call ALLYES, MATROSKA_DEMUXER   OGV_MUXER)          += vp8.ogg
 FATE_LAVF_CONTAINER_FATE-$(call ALLYES, MOV_DEMUXER        LATM_MUXER)         += latm
@@ -91,6 +92,7 @@ fate-lavf-fate-av1.mp4: CMD = lavf_container_fate "av1-test-vectors/av1-1-b8-05-
 fate-lavf-fate-av1.mkv: CMD = lavf_container_fate "av1-test-vectors/av1-1-b8-05-mv.ivf" "-c:v av1" "-c:v copy"
 fate-lavf-fate-evc.mp4: CMD = lavf_container_fate "evc/akiyo_cif.evc" "" "-c:v copy"
 fate-lavf-fate-h264.mp4: CMD = lavf_container_fate "h264/intra_refresh.h264" "" "-c:v copy"
+fate-lavf-fate-vvc.mp4: CMD = lavf_container_fate "vvc-conformance/VPS_A_3.bit" "" "-c:v copy"
 fate-lavf-fate-vp3.ogg: CMD = lavf_container_fate "vp3/coeff_level64.mkv" "-idct auto"
 fate-lavf-fate-vp8.ogg: CMD = lavf_container_fate "vp8/RRSF49-short.webm" "" "-acodec copy"
 fate-lavf-fate-latm: CMD = lavf_container_fate "aac/al04_44.mp4" "" "-acodec copy"
diff --git a/tests/filtergraphs/scale2ref_keep_aspect b/tests/filtergraphs/scale2ref_keep_aspect
index f407460ec7..00b04fc3d1 100644
--- a/tests/filtergraphs/scale2ref_keep_aspect
+++ b/tests/filtergraphs/scale2ref_keep_aspect
@@ -1,5 +1,4 @@
 sws_flags=+accurate_rnd+bitexact;
 testsrc=size=320x240 [main];
 testsrc=size=640x360 [ref];
-[main][ref] scale2ref=iw/4:ow/mdar [main][ref];
-[ref] nullsink
+[main][ref] scale=rw/4:ow/dar [main]
diff --git a/tests/ref/fate/exif-image-tiff b/tests/ref/fate/exif-image-tiff
index 887c039df9..f5ff4dc16c 100644
--- a/tests/ref/fate/exif-image-tiff
+++ b/tests/ref/fate/exif-image-tiff
@@ -20,7 +20,7 @@ crop_left=0
 crop_right=0
 pix_fmt=rgb24
 sample_aspect_ratio=1:1
-pict_type=?
+pict_type=I
 interlaced_frame=0
 top_field_first=0
 repeat_pict=0
diff --git a/tests/ref/fate/iamf-5_1-copy b/tests/ref/fate/iamf-5_1-copy
new file mode 100644
index 0000000000..d3530702ee
--- /dev/null
+++ b/tests/ref/fate/iamf-5_1-copy
@@ -0,0 +1,305 @@
+#extradata 0:       19, 0x379c0490
+#extradata 1:       19, 0x379c0490
+#extradata 2:       19, 0x3792048f
+#extradata 3:       19, 0x3792048f
+#tb 0: 1/48000
+#media_type 0: audio
+#codec_id 0: opus
+#sample_rate 0: 48000
+#channel_layout_name 0: stereo
+#tb 1: 1/48000
+#media_type 1: audio
+#codec_id 1: opus
+#sample_rate 1: 48000
+#channel_layout_name 1: stereo
+#tb 2: 1/48000
+#media_type 2: audio
+#codec_id 2: opus
+#sample_rate 2: 48000
+#channel_layout_name 2: mono
+#tb 3: 1/48000
+#media_type 3: audio
+#codec_id 3: opus
+#sample_rate 3: 48000
+#channel_layout_name 3: mono
+[STREAM_GROUP]
+index=0
+id=0x12c
+nb_streams=4
+type=IAMF Audio Element
+[COMPONENT]
+nb_layers=2
+audio_element_type=0
+default_w=0
+[SUBCOMPONENT]
+channel_layout=stereo
+output_gain_flags=0
+output_gain=0/1
+[/SUBCOMPONENT]
+[SUBCOMPONENT]
+channel_layout=5.1
+output_gain_flags=0
+output_gain=0/1
+[/SUBCOMPONENT]
+[SUBCOMPONENT]
+name=demixing_info
+nb_subblocks=1
+type=1
+parameter_id=998
+parameter_rate=48000
+duration=960
+constant_subblock_duration=960
+[PIECE]
+subblock_duration=960
+dmixp_mode=1
+[/PIECE]
+[/SUBCOMPONENT]
+[SUBCOMPONENT]
+name=recon_gain_info
+nb_subblocks=1
+type=2
+parameter_id=101
+parameter_rate=48000
+duration=960
+constant_subblock_duration=960
+[PIECE]
+subblock_duration=960
+[/PIECE]
+[/SUBCOMPONENT]
+[/COMPONENT]
+[STREAM]
+index=0
+id=0x0
+DISPOSITION:default=1
+DISPOSITION:dub=0
+DISPOSITION:original=0
+DISPOSITION:comment=0
+DISPOSITION:lyrics=0
+DISPOSITION:karaoke=0
+DISPOSITION:forced=0
+DISPOSITION:hearing_impaired=0
+DISPOSITION:visual_impaired=0
+DISPOSITION:clean_effects=0
+DISPOSITION:attached_pic=0
+DISPOSITION:timed_thumbnails=0
+DISPOSITION:non_diegetic=0
+DISPOSITION:captions=0
+DISPOSITION:descriptions=0
+DISPOSITION:metadata=0
+DISPOSITION:dependent=0
+DISPOSITION:still_image=0
+[/STREAM]
+[STREAM]
+index=1
+id=0x1
+DISPOSITION:default=0
+DISPOSITION:dub=0
+DISPOSITION:original=0
+DISPOSITION:comment=0
+DISPOSITION:lyrics=0
+DISPOSITION:karaoke=0
+DISPOSITION:forced=0
+DISPOSITION:hearing_impaired=0
+DISPOSITION:visual_impaired=0
+DISPOSITION:clean_effects=0
+DISPOSITION:attached_pic=0
+DISPOSITION:timed_thumbnails=0
+DISPOSITION:non_diegetic=0
+DISPOSITION:captions=0
+DISPOSITION:descriptions=0
+DISPOSITION:metadata=0
+DISPOSITION:dependent=1
+DISPOSITION:still_image=0
+[/STREAM]
+[STREAM]
+index=2
+id=0x2
+DISPOSITION:default=0
+DISPOSITION:dub=0
+DISPOSITION:original=0
+DISPOSITION:comment=0
+DISPOSITION:lyrics=0
+DISPOSITION:karaoke=0
+DISPOSITION:forced=0
+DISPOSITION:hearing_impaired=0
+DISPOSITION:visual_impaired=0
+DISPOSITION:clean_effects=0
+DISPOSITION:attached_pic=0
+DISPOSITION:timed_thumbnails=0
+DISPOSITION:non_diegetic=0
+DISPOSITION:captions=0
+DISPOSITION:descriptions=0
+DISPOSITION:metadata=0
+DISPOSITION:dependent=1
+DISPOSITION:still_image=0
+[/STREAM]
+[STREAM]
+index=3
+id=0x3
+DISPOSITION:default=0
+DISPOSITION:dub=0
+DISPOSITION:original=0
+DISPOSITION:comment=0
+DISPOSITION:lyrics=0
+DISPOSITION:karaoke=0
+DISPOSITION:forced=0
+DISPOSITION:hearing_impaired=0
+DISPOSITION:visual_impaired=0
+DISPOSITION:clean_effects=0
+DISPOSITION:attached_pic=0
+DISPOSITION:timed_thumbnails=0
+DISPOSITION:non_diegetic=0
+DISPOSITION:captions=0
+DISPOSITION:descriptions=0
+DISPOSITION:metadata=0
+DISPOSITION:dependent=1
+DISPOSITION:still_image=0
+[/STREAM]
+[/STREAM_GROUP]
+[STREAM_GROUP]
+index=1
+id=0x2a
+nb_streams=4
+type=IAMF Mix Presentation
+[COMPONENT]
+nb_submixes=1
+[SUBCOMPONENT]
+en-us=test_mix_pres
+[/SUBCOMPONENT]
+[SUBCOMPONENT]
+nb_elements=1
+nb_layouts=2
+default_mix_gain=0/256
+[PIECE]
+stream_id=300
+default_mix_gain=0/256
+headphones_rendering_mode=0
+[SUBPIECE]
+en-us=test_sub_mix_0_audio_element_0
+[/SUBPIECE]
+[SUBPIECE]
+name=element_mix_config
+nb_subblocks=0
+type=0
+parameter_id=100
+parameter_rate=48000
+duration=0
+constant_subblock_duration=0
+[/SUBPIECE]
+[/PIECE]
+[PIECE]
+name=output_mix_config
+nb_subblocks=0
+type=0
+parameter_id=100
+parameter_rate=48000
+duration=0
+constant_subblock_duration=0
+[/PIECE]
+[PIECE]
+sound_system=stereo
+integrated_loudness=-5273/256
+digital_peak=-2197/256
+true_peak=0/1
+dialogue_anchored_loudness=0/1
+album_anchored_loudness=0/1
+[/PIECE]
+[PIECE]
+sound_system=5.1
+integrated_loudness=-5886/256
+digital_peak=-2845/256
+true_peak=0/1
+dialogue_anchored_loudness=0/1
+album_anchored_loudness=0/1
+[/PIECE]
+[/SUBCOMPONENT]
+[/COMPONENT]
+[STREAM]
+index=0
+id=0x0
+DISPOSITION:default=1
+DISPOSITION:dub=0
+DISPOSITION:original=0
+DISPOSITION:comment=0
+DISPOSITION:lyrics=0
+DISPOSITION:karaoke=0
+DISPOSITION:forced=0
+DISPOSITION:hearing_impaired=0
+DISPOSITION:visual_impaired=0
+DISPOSITION:clean_effects=0
+DISPOSITION:attached_pic=0
+DISPOSITION:timed_thumbnails=0
+DISPOSITION:non_diegetic=0
+DISPOSITION:captions=0
+DISPOSITION:descriptions=0
+DISPOSITION:metadata=0
+DISPOSITION:dependent=0
+DISPOSITION:still_image=0
+[/STREAM]
+[STREAM]
+index=1
+id=0x1
+DISPOSITION:default=0
+DISPOSITION:dub=0
+DISPOSITION:original=0
+DISPOSITION:comment=0
+DISPOSITION:lyrics=0
+DISPOSITION:karaoke=0
+DISPOSITION:forced=0
+DISPOSITION:hearing_impaired=0
+DISPOSITION:visual_impaired=0
+DISPOSITION:clean_effects=0
+DISPOSITION:attached_pic=0
+DISPOSITION:timed_thumbnails=0
+DISPOSITION:non_diegetic=0
+DISPOSITION:captions=0
+DISPOSITION:descriptions=0
+DISPOSITION:metadata=0
+DISPOSITION:dependent=1
+DISPOSITION:still_image=0
+[/STREAM]
+[STREAM]
+index=2
+id=0x2
+DISPOSITION:default=0
+DISPOSITION:dub=0
+DISPOSITION:original=0
+DISPOSITION:comment=0
+DISPOSITION:lyrics=0
+DISPOSITION:karaoke=0
+DISPOSITION:forced=0
+DISPOSITION:hearing_impaired=0
+DISPOSITION:visual_impaired=0
+DISPOSITION:clean_effects=0
+DISPOSITION:attached_pic=0
+DISPOSITION:timed_thumbnails=0
+DISPOSITION:non_diegetic=0
+DISPOSITION:captions=0
+DISPOSITION:descriptions=0
+DISPOSITION:metadata=0
+DISPOSITION:dependent=1
+DISPOSITION:still_image=0
+[/STREAM]
+[STREAM]
+index=3
+id=0x3
+DISPOSITION:default=0
+DISPOSITION:dub=0
+DISPOSITION:original=0
+DISPOSITION:comment=0
+DISPOSITION:lyrics=0
+DISPOSITION:karaoke=0
+DISPOSITION:forced=0
+DISPOSITION:hearing_impaired=0
+DISPOSITION:visual_impaired=0
+DISPOSITION:clean_effects=0
+DISPOSITION:attached_pic=0
+DISPOSITION:timed_thumbnails=0
+DISPOSITION:non_diegetic=0
+DISPOSITION:captions=0
+DISPOSITION:descriptions=0
+DISPOSITION:metadata=0
+DISPOSITION:dependent=1
+DISPOSITION:still_image=0
+[/STREAM]
+[/STREAM_GROUP]
diff --git a/tests/ref/fate/iamf-5_1-demux b/tests/ref/fate/iamf-5_1-demux
new file mode 100644
index 0000000000..d3530702ee
--- /dev/null
+++ b/tests/ref/fate/iamf-5_1-demux
@@ -0,0 +1,305 @@
+#extradata 0:       19, 0x379c0490
+#extradata 1:       19, 0x379c0490
+#extradata 2:       19, 0x3792048f
+#extradata 3:       19, 0x3792048f
+#tb 0: 1/48000
+#media_type 0: audio
+#codec_id 0: opus
+#sample_rate 0: 48000
+#channel_layout_name 0: stereo
+#tb 1: 1/48000
+#media_type 1: audio
+#codec_id 1: opus
+#sample_rate 1: 48000
+#channel_layout_name 1: stereo
+#tb 2: 1/48000
+#media_type 2: audio
+#codec_id 2: opus
+#sample_rate 2: 48000
+#channel_layout_name 2: mono
+#tb 3: 1/48000
+#media_type 3: audio
+#codec_id 3: opus
+#sample_rate 3: 48000
+#channel_layout_name 3: mono
+[STREAM_GROUP]
+index=0
+id=0x12c
+nb_streams=4
+type=IAMF Audio Element
+[COMPONENT]
+nb_layers=2
+audio_element_type=0
+default_w=0
+[SUBCOMPONENT]
+channel_layout=stereo
+output_gain_flags=0
+output_gain=0/1
+[/SUBCOMPONENT]
+[SUBCOMPONENT]
+channel_layout=5.1
+output_gain_flags=0
+output_gain=0/1
+[/SUBCOMPONENT]
+[SUBCOMPONENT]
+name=demixing_info
+nb_subblocks=1
+type=1
+parameter_id=998
+parameter_rate=48000
+duration=960
+constant_subblock_duration=960
+[PIECE]
+subblock_duration=960
+dmixp_mode=1
+[/PIECE]
+[/SUBCOMPONENT]
+[SUBCOMPONENT]
+name=recon_gain_info
+nb_subblocks=1
+type=2
+parameter_id=101
+parameter_rate=48000
+duration=960
+constant_subblock_duration=960
+[PIECE]
+subblock_duration=960
+[/PIECE]
+[/SUBCOMPONENT]
+[/COMPONENT]
+[STREAM]
+index=0
+id=0x0
+DISPOSITION:default=1
+DISPOSITION:dub=0
+DISPOSITION:original=0
+DISPOSITION:comment=0
+DISPOSITION:lyrics=0
+DISPOSITION:karaoke=0
+DISPOSITION:forced=0
+DISPOSITION:hearing_impaired=0
+DISPOSITION:visual_impaired=0
+DISPOSITION:clean_effects=0
+DISPOSITION:attached_pic=0
+DISPOSITION:timed_thumbnails=0
+DISPOSITION:non_diegetic=0
+DISPOSITION:captions=0
+DISPOSITION:descriptions=0
+DISPOSITION:metadata=0
+DISPOSITION:dependent=0
+DISPOSITION:still_image=0
+[/STREAM]
+[STREAM]
+index=1
+id=0x1
+DISPOSITION:default=0
+DISPOSITION:dub=0
+DISPOSITION:original=0
+DISPOSITION:comment=0
+DISPOSITION:lyrics=0
+DISPOSITION:karaoke=0
+DISPOSITION:forced=0
+DISPOSITION:hearing_impaired=0
+DISPOSITION:visual_impaired=0
+DISPOSITION:clean_effects=0
+DISPOSITION:attached_pic=0
+DISPOSITION:timed_thumbnails=0
+DISPOSITION:non_diegetic=0
+DISPOSITION:captions=0
+DISPOSITION:descriptions=0
+DISPOSITION:metadata=0
+DISPOSITION:dependent=1
+DISPOSITION:still_image=0
+[/STREAM]
+[STREAM]
+index=2
+id=0x2
+DISPOSITION:default=0
+DISPOSITION:dub=0
+DISPOSITION:original=0
+DISPOSITION:comment=0
+DISPOSITION:lyrics=0
+DISPOSITION:karaoke=0
+DISPOSITION:forced=0
+DISPOSITION:hearing_impaired=0
+DISPOSITION:visual_impaired=0
+DISPOSITION:clean_effects=0
+DISPOSITION:attached_pic=0
+DISPOSITION:timed_thumbnails=0
+DISPOSITION:non_diegetic=0
+DISPOSITION:captions=0
+DISPOSITION:descriptions=0
+DISPOSITION:metadata=0
+DISPOSITION:dependent=1
+DISPOSITION:still_image=0
+[/STREAM]
+[STREAM]
+index=3
+id=0x3
+DISPOSITION:default=0
+DISPOSITION:dub=0
+DISPOSITION:original=0
+DISPOSITION:comment=0
+DISPOSITION:lyrics=0
+DISPOSITION:karaoke=0
+DISPOSITION:forced=0
+DISPOSITION:hearing_impaired=0
+DISPOSITION:visual_impaired=0
+DISPOSITION:clean_effects=0
+DISPOSITION:attached_pic=0
+DISPOSITION:timed_thumbnails=0
+DISPOSITION:non_diegetic=0
+DISPOSITION:captions=0
+DISPOSITION:descriptions=0
+DISPOSITION:metadata=0
+DISPOSITION:dependent=1
+DISPOSITION:still_image=0
+[/STREAM]
+[/STREAM_GROUP]
+[STREAM_GROUP]
+index=1
+id=0x2a
+nb_streams=4
+type=IAMF Mix Presentation
+[COMPONENT]
+nb_submixes=1
+[SUBCOMPONENT]
+en-us=test_mix_pres
+[/SUBCOMPONENT]
+[SUBCOMPONENT]
+nb_elements=1
+nb_layouts=2
+default_mix_gain=0/256
+[PIECE]
+stream_id=300
+default_mix_gain=0/256
+headphones_rendering_mode=0
+[SUBPIECE]
+en-us=test_sub_mix_0_audio_element_0
+[/SUBPIECE]
+[SUBPIECE]
+name=element_mix_config
+nb_subblocks=0
+type=0
+parameter_id=100
+parameter_rate=48000
+duration=0
+constant_subblock_duration=0
+[/SUBPIECE]
+[/PIECE]
+[PIECE]
+name=output_mix_config
+nb_subblocks=0
+type=0
+parameter_id=100
+parameter_rate=48000
+duration=0
+constant_subblock_duration=0
+[/PIECE]
+[PIECE]
+sound_system=stereo
+integrated_loudness=-5273/256
+digital_peak=-2197/256
+true_peak=0/1
+dialogue_anchored_loudness=0/1
+album_anchored_loudness=0/1
+[/PIECE]
+[PIECE]
+sound_system=5.1
+integrated_loudness=-5886/256
+digital_peak=-2845/256
+true_peak=0/1
+dialogue_anchored_loudness=0/1
+album_anchored_loudness=0/1
+[/PIECE]
+[/SUBCOMPONENT]
+[/COMPONENT]
+[STREAM]
+index=0
+id=0x0
+DISPOSITION:default=1
+DISPOSITION:dub=0
+DISPOSITION:original=0
+DISPOSITION:comment=0
+DISPOSITION:lyrics=0
+DISPOSITION:karaoke=0
+DISPOSITION:forced=0
+DISPOSITION:hearing_impaired=0
+DISPOSITION:visual_impaired=0
+DISPOSITION:clean_effects=0
+DISPOSITION:attached_pic=0
+DISPOSITION:timed_thumbnails=0
+DISPOSITION:non_diegetic=0
+DISPOSITION:captions=0
+DISPOSITION:descriptions=0
+DISPOSITION:metadata=0
+DISPOSITION:dependent=0
+DISPOSITION:still_image=0
+[/STREAM]
+[STREAM]
+index=1
+id=0x1
+DISPOSITION:default=0
+DISPOSITION:dub=0
+DISPOSITION:original=0
+DISPOSITION:comment=0
+DISPOSITION:lyrics=0
+DISPOSITION:karaoke=0
+DISPOSITION:forced=0
+DISPOSITION:hearing_impaired=0
+DISPOSITION:visual_impaired=0
+DISPOSITION:clean_effects=0
+DISPOSITION:attached_pic=0
+DISPOSITION:timed_thumbnails=0
+DISPOSITION:non_diegetic=0
+DISPOSITION:captions=0
+DISPOSITION:descriptions=0
+DISPOSITION:metadata=0
+DISPOSITION:dependent=1
+DISPOSITION:still_image=0
+[/STREAM]
+[STREAM]
+index=2
+id=0x2
+DISPOSITION:default=0
+DISPOSITION:dub=0
+DISPOSITION:original=0
+DISPOSITION:comment=0
+DISPOSITION:lyrics=0
+DISPOSITION:karaoke=0
+DISPOSITION:forced=0
+DISPOSITION:hearing_impaired=0
+DISPOSITION:visual_impaired=0
+DISPOSITION:clean_effects=0
+DISPOSITION:attached_pic=0
+DISPOSITION:timed_thumbnails=0
+DISPOSITION:non_diegetic=0
+DISPOSITION:captions=0
+DISPOSITION:descriptions=0
+DISPOSITION:metadata=0
+DISPOSITION:dependent=1
+DISPOSITION:still_image=0
+[/STREAM]
+[STREAM]
+index=3
+id=0x3
+DISPOSITION:default=0
+DISPOSITION:dub=0
+DISPOSITION:original=0
+DISPOSITION:comment=0
+DISPOSITION:lyrics=0
+DISPOSITION:karaoke=0
+DISPOSITION:forced=0
+DISPOSITION:hearing_impaired=0
+DISPOSITION:visual_impaired=0
+DISPOSITION:clean_effects=0
+DISPOSITION:attached_pic=0
+DISPOSITION:timed_thumbnails=0
+DISPOSITION:non_diegetic=0
+DISPOSITION:captions=0
+DISPOSITION:descriptions=0
+DISPOSITION:metadata=0
+DISPOSITION:dependent=1
+DISPOSITION:still_image=0
+[/STREAM]
+[/STREAM_GROUP]
diff --git a/tests/ref/fate/jv-demux b/tests/ref/fate/jv-demux
index b5d3196cf5..22a9217e23 100644
--- a/tests/ref/fate/jv-demux
+++ b/tests/ref/fate/jv-demux
@@ -11,13 +11,13 @@
 0,          0,          0,        1,        6, 0x000a0003
 1,          0,          0,   131072,   131072, 0x14c664d6
 0,          1,          1,        1,      773, 0x11802a51
-0,          2,          2,        1,    12974, 0xc2e466b7
-0,          3,          3,        1,    12200, 0x3c0eeb31
-0,          4,          4,        1,    13339, 0x91d82488
-0,          5,          5,        1,    13940, 0x064c350a
-0,          6,          6,        1,    14418, 0x078d2dd2
-0,          7,          7,        1,    14539, 0x145167ed
-0,          8,          8,        1,     2552, 0xcf2b1db7, F=0x3
+0,          2,          2,        1,    12974, 0xc2e466b7, F=0x0
+0,          3,          3,        1,    12200, 0x3c0eeb31, F=0x0
+0,          4,          4,        1,    13339, 0x91d82488, F=0x0
+0,          5,          5,        1,    13940, 0x064c350a, F=0x0
+0,          6,          6,        1,    14418, 0x078d2dd2, F=0x0
+0,          7,          7,        1,    14539, 0x145167ed, F=0x0
+0,          8,          8,        1,     2552, 0xcf2b1db7, F=0x2
 1,     131072,     131072,     1764,     1764, 0x30be734d
 1,     132836,     132836,     1764,     1764, 0xa4c873a7
 1,     134600,     134600,     1764,     1764, 0xd5f17443
diff --git a/tests/ref/fate/mov-mp4-pcm b/tests/ref/fate/mov-mp4-pcm
index 6bae8f800b..19a978df95 100644
--- a/tests/ref/fate/mov-mp4-pcm
+++ b/tests/ref/fate/mov-mp4-pcm
@@ -1,4 +1,4 @@
-99ad26b4054794e84bd962a1124cdccf *tests/data/fate/mov-mp4-pcm.mp4
+462668dd69e7ce4fde4934d1d5978531 *tests/data/fate/mov-mp4-pcm.mp4
 10587977 tests/data/fate/mov-mp4-pcm.mp4
 #tb 0: 1/44100
 #media_type 0: audio
diff --git a/tests/ref/fate/movenc b/tests/ref/fate/movenc
index 968a3d27f2..5c12aeb29f 100644
--- a/tests/ref/fate/movenc
+++ b/tests/ref/fate/movenc
@@ -20,7 +20,7 @@ write_data len 828, time nopts, type unknown atom -
 write_data len 728, time 999999, type sync atom moof
 write_data len 812, time nopts, type unknown atom -
 write_data len 148, time nopts, type trailer atom -
-92ce825ff40505ec8676191705adb7e7 4439 ismv
+d2df24d323f4a8896441cd91203ac5f8 4439 ismv
 write_data len 36, time nopts, type header atom ftyp
 write_data len 1123, time nopts, type header atom -
 write_data len 796, time 0, type sync atom moof
@@ -151,3 +151,11 @@ write_data len 900, time 0, type sync atom moof
 write_data len 908, time 1000000, type sync atom moof
 write_data len 148, time nopts, type trailer atom -
 3be575022e446855bca1e45b7942cc0c 3115 empty-moov-neg-cts
+write_data len 28, time nopts, type header atom ftyp
+write_data len 1123, time nopts, type header atom -
+write_data len 70, time 0, type boundary atom emsg
+write_data len 1832, time 0, type sync atom moof
+write_data len 70, time 2000000, type boundary atom emsg
+write_data len 1840, time 2000000, type sync atom moof
+write_data len 148, time nopts, type trailer atom -
+b72c56c795693820b156f452354a51ff 5111 emsg
diff --git a/tests/ref/fate/opt b/tests/ref/fate/opt
index f4fce1bd49..578350bc33 100644
--- a/tests/ref/fate/opt
+++ b/tests/ref/fate/opt
@@ -1,5 +1,6 @@
 Testing default values
 num=0
+unum=2147483648
 toggle=1
 string=default
 escape=\=,
@@ -14,7 +15,7 @@ color=255 192 203 255
 channel_layout=311=311
 binary=62 69 6e 0
 binary_size=4
-num64=1
+num64=4294967296
 flt=0.333333
 dbl=0.333333
 array_str[0]=str0
@@ -24,7 +25,8 @@ array_dict[0]: k00	v\00
 array_dict[0]: k01	v,01
 array_dict[1]: k10	v=1:0
 TestContext AVOptions:
-  -num               <int>        E.......... set num (from 0 to 100) (default 0)
+  -num               <int>        E.......... set num (from -1 to 100) (default 0)
+  -unum              <unsigned>   E.......... set unum (from 0 to 2.14748e+09) (default 2147483648)
   -toggle            <int>        E.......... set toggle (from 0 to 1) (default 1)
   -rational          <rational>   E.......... set rational (from 0 to 10) (default 1/1)
   -string            <string>     E.......... set string (default "default")
@@ -43,7 +45,7 @@ TestContext AVOptions:
   -bin               <binary>     E.......... set binary value
   -bin1              <binary>     E.......... set binary value
   -bin2              <binary>     E.......... set binary value
-  -num64             <int64>      E.......... set num 64bit (from 0 to 100) (default 1)
+  -num64             <int64>      E.......... set num 64bit (from -1 to 4.29497e+09) (default 4294967296)
   -flt               <float>      E.......... set float (from 0 to 100) (default 0.333333)
   -dbl               <double>     E.......... set double (from 0 to 100) (default 0.333333)
   -bool1             <boolean>    E.......... set boolean value (default auto)
@@ -57,6 +59,7 @@ TestContext AVOptions:
 
 Testing av_opt_is_set_to_default()
 name:       num default:1 error:
+name:      unum default:0 error:
 name:    toggle default:0 error:
 name:  rational default:0 error:
 name:    string default:0 error:
@@ -87,6 +90,7 @@ name: array_int default:0 error:
 name: array_str default:0 error:
 name:array_dict default:0 error:
 name:       num default:1 error:
+name:      unum default:1 error:
 name:    toggle default:1 error:
 name:  rational default:1 error:
 name:    string default:1 error:
@@ -119,6 +123,7 @@ name:array_dict default:1 error:
 
 Testing av_opt_get/av_opt_set()
 name: num         get: 0                set: OK               get: 0                OK
+name: unum        get: 2147483648       set: OK               get: 2147483648       OK
 name: toggle      get: 1                set: OK               get: 1                OK
 name: rational    get: 1/1              set: OK               get: 1/1              OK
 name: string      get: default          set: OK               get: default          OK
@@ -134,7 +139,7 @@ name: cl          get: hexagonal        set: OK               get: hexagonal
 name: bin         get: 62696E00         set: OK               get: 62696E00         OK
 name: bin1        get:                  set: OK               get:                  OK
 name: bin2        get:                  set: OK               get:                  OK
-name: num64       get: 1                set: OK               get: 1                OK
+name: num64       get: 4294967296       set: OK               get: 4294967296       OK
 name: flt         get: 0.333333         set: OK               get: 0.333333         OK
 name: dbl         get: 0.333333         set: OK               get: 0.333333         OK
 name: bool1       get: auto             set: OK               get: auto             OK
@@ -150,8 +155,9 @@ array_dict=NULL; nb_array_dict=0
 av_opt_get("array_dict") -> NULL
 
 Test av_opt_serialize()
-num=0,toggle=1,rational=1/1,string=default,escape=\\\=\,,flags=0x00000001,size=200x300,pix_fmt=0bgr,sample_fmt=s16,video_rate=25/1,duration=0.001,color=0xffc0cbff,cl=hexagonal,bin=62696E00,bin1=,bin2=,num64=1,flt=0.333333,dbl=0.333333,bool1=auto,bool2=true,bool3=false,dict1=,dict2=happy\=\\:-),array_int=,array_str=str0|str\\|1|str\\\\2,array_dict=k00\=v\\\\\\\\00:k01\=v\\\,01\,k10\=v\\\\\=1\\\\:0
+num=0,unum=2147483648,toggle=1,rational=1/1,string=default,escape=\\\=\,,flags=0x00000001,size=200x300,pix_fmt=0bgr,sample_fmt=s16,video_rate=25/1,duration=0.001,color=0xffc0cbff,cl=hexagonal,bin=62696E00,bin1=,bin2=,num64=4294967296,flt=0.333333,dbl=0.333333,bool1=auto,bool2=true,bool3=false,dict1=,dict2=happy\=\\:-),array_int=,array_str=str0|str\\|1|str\\\\2,array_dict=k00\=v\\\\\\\\00:k01\=v\\\,01\,k10\=v\\\\\=1\\\\:0
 Setting entry with key 'num' to value '0'
+Setting entry with key 'unum' to value '2147483648'
 Setting entry with key 'toggle' to value '1'
 Setting entry with key 'rational' to value '1/1'
 Setting entry with key 'string' to value 'default'
@@ -167,7 +173,7 @@ Setting entry with key 'cl' to value 'hexagonal'
 Setting entry with key 'bin' to value '62696E00'
 Setting entry with key 'bin1' to value ''
 Setting entry with key 'bin2' to value ''
-Setting entry with key 'num64' to value '1'
+Setting entry with key 'num64' to value '4294967296'
 Setting entry with key 'flt' to value '0.333333'
 Setting entry with key 'dbl' to value '0.333333'
 Setting entry with key 'bool1' to value 'auto'
@@ -178,7 +184,8 @@ Setting entry with key 'dict2' to value 'happy=\:-)'
 Setting entry with key 'array_int' to value ''
 Setting entry with key 'array_str' to value 'str0|str\|1|str\\2'
 Setting entry with key 'array_dict' to value 'k00=v\\\\00:k01=v\,01,k10=v\\=1\\:0'
-num=0,toggle=1,rational=1/1,string=default,escape=\\\=\,,flags=0x00000001,size=200x300,pix_fmt=0bgr,sample_fmt=s16,video_rate=25/1,duration=0.001,color=0xffc0cbff,cl=hexagonal,bin=62696E00,bin1=,bin2=,num64=1,flt=0.333333,dbl=0.333333,bool1=auto,bool2=true,bool3=false,dict1=,dict2=happy\=\\:-),array_int=,array_str=str0|str\\|1|str\\\\2,array_dict=k00\=v\\\\\\\\00:k01\=v\\\,01\,k10\=v\\\\\=1\\\\:0
+num=0,unum=2147483648,toggle=1,rational=1/1,string=default,escape=\\\=\,,flags=0x00000001,size=200x300,pix_fmt=0bgr,sample_fmt=s16,video_rate=25/1,duration=0.001,color=0xffc0cbff,cl=hexagonal,bin=62696E00,bin1=,bin2=,num64=4294967296,flt=0.333333,dbl=0.333333,bool1=auto,bool2=true,bool3=false,dict1=,dict2=happy\=\\:-),array_int=,array_str=str0|str\\|1|str\\\\2,array_dict=k00\=v\\\\\\\\00:k01\=v\\\,01\,k10\=v\\\\\=1\\\\:0
+child_num=0,flt=0.333333,dbl=0.333333,array_int=
 
 Testing av_set_options_string()
 Setting options string ''
@@ -336,6 +343,50 @@ Error 'bin=111'
 Setting options string 'bin=ffff'
 Setting entry with key 'bin' to value 'ffff'
 OK    'bin=ffff'
+Setting options string 'num=bogus'
+Setting entry with key 'num' to value 'bogus'
+Undefined constant or missing '(' in 'bogus'
+Unable to parse option value "bogus"
+Error 'num=bogus'
+Setting options string 'num=44'
+Setting entry with key 'num' to value '44'
+OK    'num=44'
+Setting options string 'num=44.4'
+Setting entry with key 'num' to value '44.4'
+OK    'num=44.4'
+Setting options string 'num=-1'
+Setting entry with key 'num' to value '-1'
+OK    'num=-1'
+Setting options string 'num=-2'
+Setting entry with key 'num' to value '-2'
+Value -2.000000 for parameter 'num' out of range [-1 - 100]
+Error 'num=-2'
+Setting options string 'num=101'
+Setting entry with key 'num' to value '101'
+Value 101.000000 for parameter 'num' out of range [-1 - 100]
+Error 'num=101'
+Setting options string 'unum=bogus'
+Setting entry with key 'unum' to value 'bogus'
+Undefined constant or missing '(' in 'bogus'
+Unable to parse option value "bogus"
+Error 'unum=bogus'
+Setting options string 'unum=44'
+Setting entry with key 'unum' to value '44'
+OK    'unum=44'
+Setting options string 'unum=44.4'
+Setting entry with key 'unum' to value '44.4'
+OK    'unum=44.4'
+Setting options string 'unum=-1'
+Setting entry with key 'unum' to value '-1'
+Value -1.000000 for parameter 'unum' out of range [0 - 2.14748e+09]
+Error 'unum=-1'
+Setting options string 'unum=2147483648'
+Setting entry with key 'unum' to value '2147483648'
+OK    'unum=2147483648'
+Setting options string 'unum=2147483649'
+Setting entry with key 'unum' to value '2147483649'
+Value 2147483649.000000 for parameter 'unum' out of range [0 - 2.14748e+09]
+Error 'unum=2147483649'
 Setting options string 'num64=bogus'
 Setting entry with key 'num64' to value 'bogus'
 Undefined constant or missing '(' in 'bogus'
@@ -349,12 +400,18 @@ Setting entry with key 'num64' to value '44.4'
 OK    'num64=44.4'
 Setting options string 'num64=-1'
 Setting entry with key 'num64' to value '-1'
-Value -1.000000 for parameter 'num64' out of range [0 - 100]
-Error 'num64=-1'
-Setting options string 'num64=101'
-Setting entry with key 'num64' to value '101'
-Value 101.000000 for parameter 'num64' out of range [0 - 100]
-Error 'num64=101'
+OK    'num64=-1'
+Setting options string 'num64=-2'
+Setting entry with key 'num64' to value '-2'
+Value -2.000000 for parameter 'num64' out of range [-1 - 4.29497e+09]
+Error 'num64=-2'
+Setting options string 'num64=4294967296'
+Setting entry with key 'num64' to value '4294967296'
+OK    'num64=4294967296'
+Setting options string 'num64=4294967297'
+Setting entry with key 'num64' to value '4294967297'
+Value 4294967297.000000 for parameter 'num64' out of range [-1 - 4.29497e+09]
+Error 'num64=4294967297'
 Setting options string 'flt=bogus'
 Setting entry with key 'flt' to value 'bogus'
 Undefined constant or missing '(' in 'bogus'
@@ -448,3 +505,10 @@ Setting options string 'a_very_long_option_name_that_will_need_to_be_ellipsized_
 Setting 'a_very_long_option_name_that_will_need_to_be_ellipsized_around_here' to value '42'
 Option 'a_very_long_option_name_that_will_need_to_be_ellipsized_around_here' not found
 Error 'a_very_long_option_name_that_will_need_to_be_ellipsized_around_here=42'
+
+Testing av_opt_find2()
+OK    'num'
+Error 'num64'
+Error 'child_num'
+OK    'child_num'
+Error 'foo'
diff --git a/tests/ref/fate/side_data_array b/tests/ref/fate/side_data_array
index 7d8c684d8f..c1d77b0445 100644
--- a/tests/ref/fate/side_data_array
+++ b/tests/ref/fate/side_data_array
@@ -1,14 +1,14 @@
 Initial addition results with duplicates:
-sd 0, Ambient viewing environment
-sd 1, Content light level metadata: MaxCLL: 1
-sd 2, Content light level metadata: MaxCLL: 2
-sd 3, Content light level metadata: MaxCLL: 3
-sd 4, Spherical Mapping
-sd 5, Content light level metadata: MaxCLL: 4
-sd 6, Content light level metadata: MaxCLL: 5
-sd 7, Content light level metadata: MaxCLL: 6
+sd 0 (size 4), Content light level metadata
+sd 1 (size 4), H.26[45] User Data Unregistered SEI message: 1
+sd 2 (size 4), H.26[45] User Data Unregistered SEI message: 2
+sd 3 (size 4), H.26[45] User Data Unregistered SEI message: 3
+sd 4 (size 4), Spherical Mapping
+sd 5 (size 4), H.26[45] User Data Unregistered SEI message: 4
+sd 6 (size 4), H.26[45] User Data Unregistered SEI message: 5
+sd 7 (size 4), H.26[45] User Data Unregistered SEI message: 6
 
 Final state after a single 'no-duplicates' addition:
-sd 0, Ambient viewing environment
-sd 1, Spherical Mapping
-sd 2, Content light level metadata: MaxCLL: 1337
+sd 0 (size 4), Content light level metadata
+sd 1 (size 4), Spherical Mapping
+sd 2 (size 4), H.26[45] User Data Unregistered SEI message: 1337
diff --git a/tests/ref/fate/source b/tests/ref/fate/source
index 7b5f14b4f0..723e2e06c7 100644
--- a/tests/ref/fate/source
+++ b/tests/ref/fate/source
@@ -5,11 +5,14 @@ libavcodec/ilbcdec.c
 libavcodec/interplayacm.c
 libavcodec/log2_tab.c
 libavcodec/reverse.c
+libavcodec/riscv/startcode_rvb.S
+libavcodec/riscv/startcode_rvv.S
 libavdevice/file_open.c
 libavdevice/reverse.c
 libavfilter/af_arnndn.c
 libavfilter/file_open.c
 libavfilter/log2_tab.c
+libavformat/bitstream.c
 libavformat/file_open.c
 libavformat/golomb_tab.c
 libavformat/log2_tab.c
diff --git a/tests/ref/fate/sub-webvtt b/tests/ref/fate/sub-webvtt
index ea587b327c..fae50607fb 100644
--- a/tests/ref/fate/sub-webvtt
+++ b/tests/ref/fate/sub-webvtt
@@ -21,7 +21,7 @@ Dialogue: 0,0:00:22.00,0:00:24.00,Default,,0,0,0,,at the AMNH.
 Dialogue: 0,0:00:24.00,0:00:26.00,Default,,0,0,0,,Thank you for walking down here.
 Dialogue: 0,0:00:27.00,0:00:30.00,Default,,0,0,0,,And I want to do a follow-up on the last conversation we did.\Nmultiple lines\Nagain
 Dialogue: 0,0:00:30.00,0:00:31.50,Default,,0,0,0,,When we e-mailed—
-Dialogue: 0,0:00:30.50,0:00:32.50,Default,,0,0,0,,Didn't we {\b1}talk {\i1}about\N{\i0} enough{\b0} in that conversation? \{I'm not an ASS comment\}
+Dialogue: 0,0:00:30.50,0:00:32.50,Default,,0,0,0,,Didn't we {\b1}talk {\i1}about\N{\i0} enough{\b0} in that conversation? \{{}I'm not an ASS comment}
 Dialogue: 0,0:00:32.00,0:00:35.50,Default,,0,0,0,,No! No no no no; 'cos 'cos obviously 'cos
 Dialogue: 0,0:00:32.50,0:00:33.50,Default,,0,0,0,,{\i1}Laughs{\i0}
 Dialogue: 0,0:00:35.50,0:00:38.00,Default,,0,0,0,,You know I'm so excited my glasses are falling off here.
diff --git a/tests/ref/fate/sub-webvtt2 b/tests/ref/fate/sub-webvtt2
index 90f78d904b..2925d892a0 100644
--- a/tests/ref/fate/sub-webvtt2
+++ b/tests/ref/fate/sub-webvtt2
@@ -21,6 +21,6 @@ Dialogue: 0,0:00:12.50,0:00:32.50,Default,,0,0,0,,OK, let’s go.
 Dialogue: 0,0:00:38.00,0:00:43.00,Default,,0,0,0,,I want to 愛あい love you\NThat's not proper English!
 Dialogue: 0,0:00:43.00,0:00:46.00,Default,,0,0,0,,{\i1}キツネ{\i0}じゃない　キツネじゃない\N乙女おとめは
 Dialogue: 0,0:00:50.00,0:00:55.00,Default,,0,0,0,,Some time ago in a rather distant place....
-Dialogue: 0,0:00:55.00,0:01:00.00,Default,,0,0,0,,Descending: 123456\NAscending: 123456
+Dialogue: 0,0:00:55.00,0:01:00.00,Default,,0,0,0,,Descending: ‏123456‎\NAscending: 123456
 Dialogue: 0,0:01:00.00,0:01:05.00,Default,,0,0,0,,>> Never gonna give you up Never gonna let you down\NNever\hgonna\hrun\haround & desert\hyou
 Dialogue: 0,0:55:00.00,1:00:00.00,Default,,0,0,0,,Transcrit par Célestes™
diff --git a/tests/ref/lavf-fate/vvc.mp4 b/tests/ref/lavf-fate/vvc.mp4
new file mode 100644
index 0000000000..240715884b
--- /dev/null
+++ b/tests/ref/lavf-fate/vvc.mp4
@@ -0,0 +1,3 @@
+30b856d289220b477b5a0935f60aa793 *tests/data/lavf-fate/lavf.vvc.mp4
+16625 tests/data/lavf-fate/lavf.vvc.mp4
+tests/data/lavf-fate/lavf.vvc.mp4 CRC=0x895790b1
diff --git a/tests/ref/vsynth/vsynth1-mpeg4-thread b/tests/ref/vsynth/vsynth1-mpeg4-thread
index 6b69fb4c12..6b110c49fb 100644
--- a/tests/ref/vsynth/vsynth1-mpeg4-thread
+++ b/tests/ref/vsynth/vsynth1-mpeg4-thread
@@ -1,4 +1,4 @@
-369ace2f9613261af869efd9fbb3c149 *tests/data/fate/vsynth1-mpeg4-thread.avi
-774754 tests/data/fate/vsynth1-mpeg4-thread.avi
-9aa327a244d5179acf7fe64dc1459bff *tests/data/fate/vsynth1-mpeg4-thread.out.rawvideo
+7761391e354266976a9e0155eff983dd *tests/data/fate/vsynth1-mpeg4-thread.avi
+774752 tests/data/fate/vsynth1-mpeg4-thread.avi
+bbdbe9af4f5b106b847595bf3040699f *tests/data/fate/vsynth1-mpeg4-thread.out.rawvideo
 stddev:   10.13 PSNR: 28.02 MAXDIFF:  183 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth2-mpeg2-ivlc-qprd b/tests/ref/vsynth/vsynth2-mpeg2-ivlc-qprd
index 16de39edfc..f5bbecfcb2 100644
--- a/tests/ref/vsynth/vsynth2-mpeg2-ivlc-qprd
+++ b/tests/ref/vsynth/vsynth2-mpeg2-ivlc-qprd
@@ -1,4 +1,4 @@
-907a30295ed8323780eee08e606af0ab *tests/data/fate/vsynth2-mpeg2-ivlc-qprd.mpeg2video
-269722 tests/data/fate/vsynth2-mpeg2-ivlc-qprd.mpeg2video
-d2d9793bf8f3427b5cc17a1be78ddd64 *tests/data/fate/vsynth2-mpeg2-ivlc-qprd.out.rawvideo
+f612ea89aa79a7f7b93a8acf332705c4 *tests/data/fate/vsynth2-mpeg2-ivlc-qprd.mpeg2video
+269723 tests/data/fate/vsynth2-mpeg2-ivlc-qprd.mpeg2video
+88e17886e6383755829d7da519fd5e79 *tests/data/fate/vsynth2-mpeg2-ivlc-qprd.out.rawvideo
 stddev:    5.54 PSNR: 33.25 MAXDIFF:   94 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth2-mpeg4-adap b/tests/ref/vsynth/vsynth2-mpeg4-adap
index 35b2b6aac9..e058cd1ce3 100644
--- a/tests/ref/vsynth/vsynth2-mpeg4-adap
+++ b/tests/ref/vsynth/vsynth2-mpeg4-adap
@@ -1,4 +1,4 @@
-06a397fe43dab7b6cf56870410fbbbaf *tests/data/fate/vsynth2-mpeg4-adap.avi
-203000 tests/data/fate/vsynth2-mpeg4-adap.avi
-686565d42d8ba5aea790824b04fa0a18 *tests/data/fate/vsynth2-mpeg4-adap.out.rawvideo
-stddev:    4.55 PSNR: 34.95 MAXDIFF:   84 bytes:  7603200/  7603200
+9465ef120d560537d8fcfb5564782e01 *tests/data/fate/vsynth2-mpeg4-adap.avi
+203004 tests/data/fate/vsynth2-mpeg4-adap.avi
+d7851ab1ca9744f8e618a24193e5ef76 *tests/data/fate/vsynth2-mpeg4-adap.out.rawvideo
+stddev:    4.56 PSNR: 34.95 MAXDIFF:   84 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth2-mpeg4-qprd b/tests/ref/vsynth/vsynth2-mpeg4-qprd
index 0a8786b89a..3face947c2 100644
--- a/tests/ref/vsynth/vsynth2-mpeg4-qprd
+++ b/tests/ref/vsynth/vsynth2-mpeg4-qprd
@@ -1,4 +1,4 @@
-4ddd2fef35854d9b387bbcbda03dc7f0 *tests/data/fate/vsynth2-mpeg4-qprd.avi
-248706 tests/data/fate/vsynth2-mpeg4-qprd.avi
-baa8d0d57a7fb5e393642cb20efed2c2 *tests/data/fate/vsynth2-mpeg4-qprd.out.rawvideo
+33fc3d5507cc8d2c8b63b8f811e62e4c *tests/data/fate/vsynth2-mpeg4-qprd.avi
+248734 tests/data/fate/vsynth2-mpeg4-qprd.avi
+61f8006e8903915056493fb1f05d1b2f *tests/data/fate/vsynth2-mpeg4-qprd.out.rawvideo
 stddev:    4.85 PSNR: 34.40 MAXDIFF:   85 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth2-mpeg4-thread b/tests/ref/vsynth/vsynth2-mpeg4-thread
index 49c0ce0241..a1791c49ec 100644
--- a/tests/ref/vsynth/vsynth2-mpeg4-thread
+++ b/tests/ref/vsynth/vsynth2-mpeg4-thread
@@ -1,4 +1,4 @@
-92128f8adc4ac70a66fdddf58e46b923 *tests/data/fate/vsynth2-mpeg4-thread.avi
-268396 tests/data/fate/vsynth2-mpeg4-thread.avi
-f432bd8d897c7c8e286e385b77cedcfa *tests/data/fate/vsynth2-mpeg4-thread.out.rawvideo
+44df605055498a01afb53eaaabdb94b4 *tests/data/fate/vsynth2-mpeg4-thread.avi
+268394 tests/data/fate/vsynth2-mpeg4-thread.avi
+13240eaccc345bf4b45f24d44cfc5ca2 *tests/data/fate/vsynth2-mpeg4-thread.out.rawvideo
 stddev:    4.89 PSNR: 34.34 MAXDIFF:   86 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth_lena-mpeg4-rc b/tests/ref/vsynth/vsynth_lena-mpeg4-rc
index fd6a998046..a21ddc87e3 100644
--- a/tests/ref/vsynth/vsynth_lena-mpeg4-rc
+++ b/tests/ref/vsynth/vsynth_lena-mpeg4-rc
@@ -1,4 +1,4 @@
-396a76466dee56e2714dfa42cebe3d2d *tests/data/fate/vsynth_lena-mpeg4-rc.avi
+3ae5a2590bdd0e80a95bf374b06c553f *tests/data/fate/vsynth_lena-mpeg4-rc.avi
 226314 tests/data/fate/vsynth_lena-mpeg4-rc.avi
-6e8b62e8c3bcbfdcc58afb69a0b1c4e3 *tests/data/fate/vsynth_lena-mpeg4-rc.out.rawvideo
+27c8771df4154f2be317465a3d3cbd56 *tests/data/fate/vsynth_lena-mpeg4-rc.out.rawvideo
 stddev:    4.23 PSNR: 35.60 MAXDIFF:   85 bytes:  7603200/  7603200
diff --git a/tools/Makefile b/tools/Makefile
index 72e8e709a8..2a11fa0ae6 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -5,6 +5,9 @@ TOOLS-$(CONFIG_ZLIB) += cws2fws
 tools/target_dec_%_fuzzer.o: tools/target_dec_fuzzer.c
 	$(COMPILE_C) -DFFMPEG_DECODER=$*
 
+tools/target_enc_%_fuzzer.o: tools/target_enc_fuzzer.c
+	$(COMPILE_C) -DFFMPEG_ENCODER=$*
+
 tools/target_bsf_%_fuzzer.o: tools/target_bsf_fuzzer.c
 	$(COMPILE_C) -DFFMPEG_BSF=$*
 
diff --git a/tools/target_enc_fuzzer.c b/tools/target_enc_fuzzer.c
new file mode 100644
index 0000000000..0345595ada
--- /dev/null
+++ b/tools/target_enc_fuzzer.c
@@ -0,0 +1,206 @@
+/*
+ * Copyright (c) 2024 Michael Niedermayer <michael-ffmpeg@niedermayer.cc>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Based on target_dec_fuzzer
+ */
+
+#include "config.h"
+#include "libavutil/avassert.h"
+#include "libavutil/avstring.h"
+#include "libavutil/cpu.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/mem.h"
+
+#include "libavcodec/avcodec.h"
+#include "libavcodec/bytestream.h"
+#include "libavcodec/codec_internal.h"
+#include "libavformat/avformat.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size);
+
+extern const FFCodec * codec_list[];
+
+static void error(const char *err)
+{
+    fprintf(stderr, "%s", err);
+    exit(1);
+}
+
+static const FFCodec *c = NULL;
+
+// Ensure we don't loop forever
+const uint32_t maxiteration = 8096;
+
+
+static int encode(AVCodecContext *enc_ctx, AVFrame *frame, AVPacket *pkt)
+{
+    int ret;
+
+    ret = avcodec_send_frame(enc_ctx, frame);
+    if (ret < 0)
+        return ret;
+
+    while (ret >= 0) {
+        ret = avcodec_receive_packet(enc_ctx, pkt);
+        if (ret == AVERROR(EAGAIN)) {
+            return 0;
+        } else if (ret < 0) {
+            return ret;
+        }
+
+        av_packet_unref(pkt);
+    }
+    av_assert0(0);
+}
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+    uint64_t maxpixels_per_frame = 512 * 512;
+    uint64_t maxpixels;
+
+    const uint8_t *end = data + size;
+    uint32_t it = 0;
+    uint64_t nb_samples = 0;
+    AVDictionary *opts = NULL;
+
+    if (!c) {
+#define ENCODER_SYMBOL0(CODEC) ff_##CODEC##_encoder
+#define ENCODER_SYMBOL(CODEC) ENCODER_SYMBOL0(CODEC)
+        extern FFCodec ENCODER_SYMBOL(FFMPEG_ENCODER);
+        codec_list[0] = &ENCODER_SYMBOL(FFMPEG_ENCODER);
+
+        c = &ENCODER_SYMBOL(FFMPEG_ENCODER);
+        av_log_set_level(AV_LOG_PANIC);
+    }
+
+    if (c->p.type != AVMEDIA_TYPE_VIDEO)
+        return 0;
+
+    maxpixels = maxpixels_per_frame * maxiteration;
+
+    maxpixels_per_frame  = FFMIN(maxpixels_per_frame , maxpixels);
+
+    AVCodecContext* ctx = avcodec_alloc_context3(&c->p);
+    if (!ctx)
+        error("Failed memory allocation");
+
+    if (ctx->max_pixels == 0 || ctx->max_pixels > maxpixels_per_frame)
+        ctx->max_pixels = maxpixels_per_frame; //To reduce false positive OOM and hangs
+
+    ctx->pix_fmt = AV_PIX_FMT_YUV420P;
+    if (size > 1024) {
+        GetByteContext gbc;
+        int flags;
+        int64_t flags64;
+
+        size -= 1024;
+        bytestream2_init(&gbc, data + size, 1024);
+        ctx->width                              = bytestream2_get_le32(&gbc) & 0xFFFF;
+        ctx->height                             = bytestream2_get_le32(&gbc) & 0xFFFF;
+        ctx->bit_rate                           = bytestream2_get_le64(&gbc);
+        ctx->gop_size                           = bytestream2_get_le32(&gbc) & 0x7FFFFFFF;
+        ctx->max_b_frames                       = bytestream2_get_le32(&gbc) & 0x7FFFFFFF;
+        ctx->time_base.num                      = bytestream2_get_le32(&gbc) & 0x7FFFFFFF;
+        ctx->time_base.den                      = bytestream2_get_le32(&gbc) & 0x7FFFFFFF;
+        ctx->framerate.num                      = bytestream2_get_le32(&gbc) & 0x7FFFFFFF;
+        ctx->framerate.den                      = bytestream2_get_le32(&gbc) & 0x7FFFFFFF;
+
+        flags = bytestream2_get_byte(&gbc);
+        if (flags & 2)
+            ctx->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL;
+
+        if (flags & 0x40)
+            av_force_cpu_flags(0);
+
+        flags64 = bytestream2_get_le64(&gbc);
+
+        if (c->p.pix_fmts) {
+            int npixfmts = 0;
+            while (c->p.pix_fmts[npixfmts++] != AV_PIX_FMT_NONE)
+                ;
+            ctx->pix_fmt = c->p.pix_fmts[bytestream2_get_byte(&gbc) % npixfmts];
+        }
+
+        switch (c->p.id) {
+        case AV_CODEC_ID_FFV1:{
+            int coder = bytestream2_get_byte(&gbc)&3;
+            if (coder == 3) coder = -2;
+            av_dict_set_int(&opts, "coder", coder, 0);
+            av_dict_set_int(&opts, "context", bytestream2_get_byte(&gbc)&1, 0);
+            av_dict_set_int(&opts, "slicecrc", bytestream2_get_byte(&gbc)&1, 0);
+            break;}
+        }
+    }
+    if (ctx->width == 0 || av_image_check_size(ctx->width, ctx->height, 0, ctx))
+        ctx->width = ctx->height = 64;
+
+    int res = avcodec_open2(ctx, &c->p, &opts);
+    if (res < 0) {
+        avcodec_free_context(&ctx);
+        av_dict_free(&opts);
+        return 0; // Failure of avcodec_open2() does not imply that a issue was found
+    }
+
+
+    AVFrame *frame = av_frame_alloc();
+    AVPacket *avpkt = av_packet_alloc();
+    if (!frame || !avpkt)
+        error("Failed memory allocation");
+
+    frame->format = ctx->pix_fmt;
+    frame->width  = ctx->width;
+    frame->height = ctx->height;
+
+    while (data < end && it < maxiteration) {
+        res = av_frame_get_buffer(frame, 0);
+        if (res < 0)
+            error("Failed av_frame_get_buffer");
+
+        for (int i=0; i<FF_ARRAY_ELEMS(frame->buf); i++) {
+            if (frame->buf[i]) {
+                int buf_size = FFMIN(end-data, frame->buf[i]->size);
+                memcpy(frame->buf[i]->data, data, buf_size);
+                memset(frame->buf[i]->data + buf_size, 0, frame->buf[i]->size - buf_size);
+                data += buf_size;
+            }
+        }
+
+        frame->pts = nb_samples;
+
+        res = encode(ctx, frame, avpkt);
+        if (res < 0)
+            break;
+        it++;
+        for (int i=0; i<FF_ARRAY_ELEMS(frame->buf); i++)
+            av_buffer_unref(&frame->buf[i]);
+
+        av_packet_unref(avpkt);
+    }
+
+    encode(ctx, NULL, avpkt);
+    av_packet_unref(avpkt);
+
+//     fprintf(stderr, "frames encoded: %"PRId64",  iterations: %d\n", nb_samples  , it);
+
+    av_frame_free(&frame);
+    avcodec_free_context(&ctx);
+    av_packet_free(&avpkt);
+    av_dict_free(&opts);
+    return 0;
+}