Libav 0.7.1
|
00001 /* 00002 * VP8 DSP functions x86-optimized 00003 * Copyright (c) 2010 Ronald S. Bultje <rsbultje@gmail.com> 00004 * Copyright (c) 2010 Jason Garrett-Glaser <darkshikari@gmail.com> 00005 * 00006 * This file is part of Libav. 00007 * 00008 * Libav is free software; you can redistribute it and/or 00009 * modify it under the terms of the GNU Lesser General Public 00010 * License as published by the Free Software Foundation; either 00011 * version 2.1 of the License, or (at your option) any later version. 00012 * 00013 * Libav is distributed in the hope that it will be useful, 00014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00016 * Lesser General Public License for more details. 00017 * 00018 * You should have received a copy of the GNU Lesser General Public 00019 * License along with Libav; if not, write to the Free Software 00020 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00021 */ 00022 00023 #include "libavutil/cpu.h" 00024 #include "libavutil/x86_cpu.h" 00025 #include "libavcodec/vp8dsp.h" 00026 00027 #if HAVE_YASM 00028 00029 /* 00030 * MC functions 00031 */ 00032 extern void ff_put_vp8_epel4_h4_mmxext(uint8_t *dst, int dststride, 00033 uint8_t *src, int srcstride, 00034 int height, int mx, int my); 00035 extern void ff_put_vp8_epel4_h6_mmxext(uint8_t *dst, int dststride, 00036 uint8_t *src, int srcstride, 00037 int height, int mx, int my); 00038 extern void ff_put_vp8_epel4_v4_mmxext(uint8_t *dst, int dststride, 00039 uint8_t *src, int srcstride, 00040 int height, int mx, int my); 00041 extern void ff_put_vp8_epel4_v6_mmxext(uint8_t *dst, int dststride, 00042 uint8_t *src, int srcstride, 00043 int height, int mx, int my); 00044 00045 extern void ff_put_vp8_epel8_h4_sse2 (uint8_t *dst, int dststride, 00046 uint8_t *src, int srcstride, 00047 int height, int mx, int my); 00048 extern void ff_put_vp8_epel8_h6_sse2 (uint8_t *dst, int dststride, 00049 uint8_t *src, int srcstride, 00050 int height, int mx, int my); 00051 extern void ff_put_vp8_epel8_v4_sse2 (uint8_t *dst, int dststride, 00052 uint8_t *src, int srcstride, 00053 int height, int mx, int my); 00054 extern void ff_put_vp8_epel8_v6_sse2 (uint8_t *dst, int dststride, 00055 uint8_t *src, int srcstride, 00056 int height, int mx, int my); 00057 00058 extern void ff_put_vp8_epel4_h4_ssse3 (uint8_t *dst, int dststride, 00059 uint8_t *src, int srcstride, 00060 int height, int mx, int my); 00061 extern void ff_put_vp8_epel4_h6_ssse3 (uint8_t *dst, int dststride, 00062 uint8_t *src, int srcstride, 00063 int height, int mx, int my); 00064 extern void ff_put_vp8_epel4_v4_ssse3 (uint8_t *dst, int dststride, 00065 uint8_t *src, int srcstride, 00066 int height, int mx, int my); 00067 extern void ff_put_vp8_epel4_v6_ssse3 (uint8_t *dst, int dststride, 00068 uint8_t *src, int srcstride, 00069 int height, int mx, int my); 00070 extern void ff_put_vp8_epel8_h4_ssse3 (uint8_t *dst, int dststride, 00071 uint8_t *src, int srcstride, 00072 int height, int mx, int my); 00073 extern void ff_put_vp8_epel8_h6_ssse3 (uint8_t *dst, int dststride, 00074 uint8_t *src, int srcstride, 00075 int height, int mx, int my); 00076 extern void ff_put_vp8_epel8_v4_ssse3 (uint8_t *dst, int dststride, 00077 uint8_t *src, int srcstride, 00078 int height, int mx, int my); 00079 extern void ff_put_vp8_epel8_v6_ssse3 (uint8_t *dst, int dststride, 00080 uint8_t *src, int srcstride, 00081 int height, int mx, int my); 00082 00083 extern void ff_put_vp8_bilinear4_h_mmxext(uint8_t *dst, int dststride, 00084 uint8_t *src, int srcstride, 00085 int height, int mx, int my); 00086 extern void ff_put_vp8_bilinear8_h_sse2 (uint8_t *dst, int dststride, 00087 uint8_t *src, int srcstride, 00088 int height, int mx, int my); 00089 extern void ff_put_vp8_bilinear4_h_ssse3 (uint8_t *dst, int dststride, 00090 uint8_t *src, int srcstride, 00091 int height, int mx, int my); 00092 extern void ff_put_vp8_bilinear8_h_ssse3 (uint8_t *dst, int dststride, 00093 uint8_t *src, int srcstride, 00094 int height, int mx, int my); 00095 00096 extern void ff_put_vp8_bilinear4_v_mmxext(uint8_t *dst, int dststride, 00097 uint8_t *src, int srcstride, 00098 int height, int mx, int my); 00099 extern void ff_put_vp8_bilinear8_v_sse2 (uint8_t *dst, int dststride, 00100 uint8_t *src, int srcstride, 00101 int height, int mx, int my); 00102 extern void ff_put_vp8_bilinear4_v_ssse3 (uint8_t *dst, int dststride, 00103 uint8_t *src, int srcstride, 00104 int height, int mx, int my); 00105 extern void ff_put_vp8_bilinear8_v_ssse3 (uint8_t *dst, int dststride, 00106 uint8_t *src, int srcstride, 00107 int height, int mx, int my); 00108 00109 00110 extern void ff_put_vp8_pixels8_mmx (uint8_t *dst, int dststride, 00111 uint8_t *src, int srcstride, 00112 int height, int mx, int my); 00113 extern void ff_put_vp8_pixels16_mmx(uint8_t *dst, int dststride, 00114 uint8_t *src, int srcstride, 00115 int height, int mx, int my); 00116 extern void ff_put_vp8_pixels16_sse(uint8_t *dst, int dststride, 00117 uint8_t *src, int srcstride, 00118 int height, int mx, int my); 00119 00120 #define TAP_W16(OPT, FILTERTYPE, TAPTYPE) \ 00121 static void ff_put_vp8_ ## FILTERTYPE ## 16_ ## TAPTYPE ## _ ## OPT( \ 00122 uint8_t *dst, int dststride, uint8_t *src, \ 00123 int srcstride, int height, int mx, int my) \ 00124 { \ 00125 ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \ 00126 dst, dststride, src, srcstride, height, mx, my); \ 00127 ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \ 00128 dst + 8, dststride, src + 8, srcstride, height, mx, my); \ 00129 } 00130 #define TAP_W8(OPT, FILTERTYPE, TAPTYPE) \ 00131 static void ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \ 00132 uint8_t *dst, int dststride, uint8_t *src, \ 00133 int srcstride, int height, int mx, int my) \ 00134 { \ 00135 ff_put_vp8_ ## FILTERTYPE ## 4_ ## TAPTYPE ## _ ## OPT( \ 00136 dst, dststride, src, srcstride, height, mx, my); \ 00137 ff_put_vp8_ ## FILTERTYPE ## 4_ ## TAPTYPE ## _ ## OPT( \ 00138 dst + 4, dststride, src + 4, srcstride, height, mx, my); \ 00139 } 00140 00141 TAP_W8 (mmxext, epel, h4) 00142 TAP_W8 (mmxext, epel, h6) 00143 TAP_W16(mmxext, epel, h6) 00144 TAP_W8 (mmxext, epel, v4) 00145 TAP_W8 (mmxext, epel, v6) 00146 TAP_W16(mmxext, epel, v6) 00147 TAP_W8 (mmxext, bilinear, h) 00148 TAP_W16(mmxext, bilinear, h) 00149 TAP_W8 (mmxext, bilinear, v) 00150 TAP_W16(mmxext, bilinear, v) 00151 00152 TAP_W16(sse2, epel, h6) 00153 TAP_W16(sse2, epel, v6) 00154 TAP_W16(sse2, bilinear, h) 00155 TAP_W16(sse2, bilinear, v) 00156 00157 TAP_W16(ssse3, epel, h6) 00158 TAP_W16(ssse3, epel, v6) 00159 TAP_W16(ssse3, bilinear, h) 00160 TAP_W16(ssse3, bilinear, v) 00161 00162 #define HVTAP(OPT, ALIGN, TAPNUMX, TAPNUMY, SIZE, MAXHEIGHT) \ 00163 static void ff_put_vp8_epel ## SIZE ## _h ## TAPNUMX ## v ## TAPNUMY ## _ ## OPT( \ 00164 uint8_t *dst, int dststride, uint8_t *src, \ 00165 int srcstride, int height, int mx, int my) \ 00166 { \ 00167 DECLARE_ALIGNED(ALIGN, uint8_t, tmp)[SIZE * (MAXHEIGHT + TAPNUMY - 1)]; \ 00168 uint8_t *tmpptr = tmp + SIZE * (TAPNUMY / 2 - 1); \ 00169 src -= srcstride * (TAPNUMY / 2 - 1); \ 00170 ff_put_vp8_epel ## SIZE ## _h ## TAPNUMX ## _ ## OPT( \ 00171 tmp, SIZE, src, srcstride, height + TAPNUMY - 1, mx, my); \ 00172 ff_put_vp8_epel ## SIZE ## _v ## TAPNUMY ## _ ## OPT( \ 00173 dst, dststride, tmpptr, SIZE, height, mx, my); \ 00174 } 00175 00176 #define HVTAPMMX(x, y) \ 00177 HVTAP(mmxext, 8, x, y, 4, 8) \ 00178 HVTAP(mmxext, 8, x, y, 8, 16) 00179 00180 HVTAPMMX(4, 4) 00181 HVTAPMMX(4, 6) 00182 HVTAPMMX(6, 4) 00183 HVTAPMMX(6, 6) 00184 HVTAP(mmxext, 8, 6, 6, 16, 16) 00185 00186 #define HVTAPSSE2(x, y, w) \ 00187 HVTAP(sse2, 16, x, y, w, 16) \ 00188 HVTAP(ssse3, 16, x, y, w, 16) 00189 00190 HVTAPSSE2(4, 4, 8) 00191 HVTAPSSE2(4, 6, 8) 00192 HVTAPSSE2(6, 4, 8) 00193 HVTAPSSE2(6, 6, 8) 00194 HVTAPSSE2(6, 6, 16) 00195 00196 HVTAP(ssse3, 16, 4, 4, 4, 8) 00197 HVTAP(ssse3, 16, 4, 6, 4, 8) 00198 HVTAP(ssse3, 16, 6, 4, 4, 8) 00199 HVTAP(ssse3, 16, 6, 6, 4, 8) 00200 00201 #define HVBILIN(OPT, ALIGN, SIZE, MAXHEIGHT) \ 00202 static void ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT( \ 00203 uint8_t *dst, int dststride, uint8_t *src, \ 00204 int srcstride, int height, int mx, int my) \ 00205 { \ 00206 DECLARE_ALIGNED(ALIGN, uint8_t, tmp)[SIZE * (MAXHEIGHT + 2)]; \ 00207 ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT( \ 00208 tmp, SIZE, src, srcstride, height + 1, mx, my); \ 00209 ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT( \ 00210 dst, dststride, tmp, SIZE, height, mx, my); \ 00211 } 00212 00213 HVBILIN(mmxext, 8, 4, 8) 00214 HVBILIN(mmxext, 8, 8, 16) 00215 HVBILIN(mmxext, 8, 16, 16) 00216 HVBILIN(sse2, 8, 8, 16) 00217 HVBILIN(sse2, 8, 16, 16) 00218 HVBILIN(ssse3, 8, 4, 8) 00219 HVBILIN(ssse3, 8, 8, 16) 00220 HVBILIN(ssse3, 8, 16, 16) 00221 00222 extern void ff_vp8_idct_dc_add_mmx(uint8_t *dst, DCTELEM block[16], int stride); 00223 extern void ff_vp8_idct_dc_add_sse4(uint8_t *dst, DCTELEM block[16], int stride); 00224 extern void ff_vp8_idct_dc_add4y_mmx(uint8_t *dst, DCTELEM block[4][16], int stride); 00225 extern void ff_vp8_idct_dc_add4y_sse2(uint8_t *dst, DCTELEM block[4][16], int stride); 00226 extern void ff_vp8_idct_dc_add4uv_mmx(uint8_t *dst, DCTELEM block[2][16], int stride); 00227 extern void ff_vp8_luma_dc_wht_mmx(DCTELEM block[4][4][16], DCTELEM dc[16]); 00228 extern void ff_vp8_luma_dc_wht_sse(DCTELEM block[4][4][16], DCTELEM dc[16]); 00229 extern void ff_vp8_idct_add_mmx(uint8_t *dst, DCTELEM block[16], int stride); 00230 extern void ff_vp8_idct_add_sse(uint8_t *dst, DCTELEM block[16], int stride); 00231 00232 #define DECLARE_LOOP_FILTER(NAME)\ 00233 extern void ff_vp8_v_loop_filter_simple_ ## NAME(uint8_t *dst, int stride, int flim);\ 00234 extern void ff_vp8_h_loop_filter_simple_ ## NAME(uint8_t *dst, int stride, int flim);\ 00235 extern void ff_vp8_v_loop_filter16y_inner_ ## NAME (uint8_t *dst, int stride,\ 00236 int e, int i, int hvt);\ 00237 extern void ff_vp8_h_loop_filter16y_inner_ ## NAME (uint8_t *dst, int stride,\ 00238 int e, int i, int hvt);\ 00239 extern void ff_vp8_v_loop_filter8uv_inner_ ## NAME (uint8_t *dstU, uint8_t *dstV,\ 00240 int s, int e, int i, int hvt);\ 00241 extern void ff_vp8_h_loop_filter8uv_inner_ ## NAME (uint8_t *dstU, uint8_t *dstV,\ 00242 int s, int e, int i, int hvt);\ 00243 extern void ff_vp8_v_loop_filter16y_mbedge_ ## NAME(uint8_t *dst, int stride,\ 00244 int e, int i, int hvt);\ 00245 extern void ff_vp8_h_loop_filter16y_mbedge_ ## NAME(uint8_t *dst, int stride,\ 00246 int e, int i, int hvt);\ 00247 extern void ff_vp8_v_loop_filter8uv_mbedge_ ## NAME(uint8_t *dstU, uint8_t *dstV,\ 00248 int s, int e, int i, int hvt);\ 00249 extern void ff_vp8_h_loop_filter8uv_mbedge_ ## NAME(uint8_t *dstU, uint8_t *dstV,\ 00250 int s, int e, int i, int hvt); 00251 00252 DECLARE_LOOP_FILTER(mmx) 00253 DECLARE_LOOP_FILTER(mmxext) 00254 DECLARE_LOOP_FILTER(sse2) 00255 DECLARE_LOOP_FILTER(ssse3) 00256 DECLARE_LOOP_FILTER(sse4) 00257 00258 #endif 00259 00260 #define VP8_LUMA_MC_FUNC(IDX, SIZE, OPT) \ 00261 c->put_vp8_epel_pixels_tab[IDX][0][2] = ff_put_vp8_epel ## SIZE ## _h6_ ## OPT; \ 00262 c->put_vp8_epel_pixels_tab[IDX][2][0] = ff_put_vp8_epel ## SIZE ## _v6_ ## OPT; \ 00263 c->put_vp8_epel_pixels_tab[IDX][2][2] = ff_put_vp8_epel ## SIZE ## _h6v6_ ## OPT 00264 00265 #define VP8_MC_FUNC(IDX, SIZE, OPT) \ 00266 c->put_vp8_epel_pixels_tab[IDX][0][1] = ff_put_vp8_epel ## SIZE ## _h4_ ## OPT; \ 00267 c->put_vp8_epel_pixels_tab[IDX][1][0] = ff_put_vp8_epel ## SIZE ## _v4_ ## OPT; \ 00268 c->put_vp8_epel_pixels_tab[IDX][1][1] = ff_put_vp8_epel ## SIZE ## _h4v4_ ## OPT; \ 00269 c->put_vp8_epel_pixels_tab[IDX][1][2] = ff_put_vp8_epel ## SIZE ## _h6v4_ ## OPT; \ 00270 c->put_vp8_epel_pixels_tab[IDX][2][1] = ff_put_vp8_epel ## SIZE ## _h4v6_ ## OPT; \ 00271 VP8_LUMA_MC_FUNC(IDX, SIZE, OPT) 00272 00273 #define VP8_BILINEAR_MC_FUNC(IDX, SIZE, OPT) \ 00274 c->put_vp8_bilinear_pixels_tab[IDX][0][1] = ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT; \ 00275 c->put_vp8_bilinear_pixels_tab[IDX][0][2] = ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT; \ 00276 c->put_vp8_bilinear_pixels_tab[IDX][1][0] = ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT; \ 00277 c->put_vp8_bilinear_pixels_tab[IDX][1][1] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \ 00278 c->put_vp8_bilinear_pixels_tab[IDX][1][2] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \ 00279 c->put_vp8_bilinear_pixels_tab[IDX][2][0] = ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT; \ 00280 c->put_vp8_bilinear_pixels_tab[IDX][2][1] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \ 00281 c->put_vp8_bilinear_pixels_tab[IDX][2][2] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT 00282 00283 00284 av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c) 00285 { 00286 int mm_flags = av_get_cpu_flags(); 00287 00288 #if HAVE_YASM 00289 if (mm_flags & AV_CPU_FLAG_MMX) { 00290 c->vp8_idct_dc_add = ff_vp8_idct_dc_add_mmx; 00291 c->vp8_idct_dc_add4y = ff_vp8_idct_dc_add4y_mmx; 00292 c->vp8_idct_dc_add4uv = ff_vp8_idct_dc_add4uv_mmx; 00293 c->vp8_idct_add = ff_vp8_idct_add_mmx; 00294 c->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_mmx; 00295 c->put_vp8_epel_pixels_tab[0][0][0] = 00296 c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_mmx; 00297 c->put_vp8_epel_pixels_tab[1][0][0] = 00298 c->put_vp8_bilinear_pixels_tab[1][0][0] = ff_put_vp8_pixels8_mmx; 00299 00300 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmx; 00301 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmx; 00302 00303 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmx; 00304 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmx; 00305 c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmx; 00306 c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmx; 00307 00308 c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_mmx; 00309 c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_mmx; 00310 c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_mmx; 00311 c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_mmx; 00312 } 00313 00314 /* note that 4-tap width=16 functions are missing because w=16 00315 * is only used for luma, and luma is always a copy or sixtap. */ 00316 if (mm_flags & AV_CPU_FLAG_MMX2) { 00317 VP8_LUMA_MC_FUNC(0, 16, mmxext); 00318 VP8_MC_FUNC(1, 8, mmxext); 00319 VP8_MC_FUNC(2, 4, mmxext); 00320 VP8_BILINEAR_MC_FUNC(0, 16, mmxext); 00321 VP8_BILINEAR_MC_FUNC(1, 8, mmxext); 00322 VP8_BILINEAR_MC_FUNC(2, 4, mmxext); 00323 00324 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmxext; 00325 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmxext; 00326 00327 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmxext; 00328 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmxext; 00329 c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmxext; 00330 c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmxext; 00331 00332 c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_mmxext; 00333 c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_mmxext; 00334 c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_mmxext; 00335 c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_mmxext; 00336 } 00337 00338 if (mm_flags & AV_CPU_FLAG_SSE) { 00339 c->vp8_idct_add = ff_vp8_idct_add_sse; 00340 c->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_sse; 00341 c->put_vp8_epel_pixels_tab[0][0][0] = 00342 c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse; 00343 } 00344 00345 if (mm_flags & (AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW)) { 00346 VP8_LUMA_MC_FUNC(0, 16, sse2); 00347 VP8_MC_FUNC(1, 8, sse2); 00348 VP8_BILINEAR_MC_FUNC(0, 16, sse2); 00349 VP8_BILINEAR_MC_FUNC(1, 8, sse2); 00350 00351 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2; 00352 00353 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2; 00354 c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_sse2; 00355 00356 c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_sse2; 00357 c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_sse2; 00358 } 00359 00360 if (mm_flags & AV_CPU_FLAG_SSE2) { 00361 c->vp8_idct_dc_add4y = ff_vp8_idct_dc_add4y_sse2; 00362 00363 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse2; 00364 00365 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_sse2; 00366 c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_sse2; 00367 00368 c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_sse2; 00369 c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_sse2; 00370 } 00371 00372 if (mm_flags & AV_CPU_FLAG_SSSE3) { 00373 VP8_LUMA_MC_FUNC(0, 16, ssse3); 00374 VP8_MC_FUNC(1, 8, ssse3); 00375 VP8_MC_FUNC(2, 4, ssse3); 00376 VP8_BILINEAR_MC_FUNC(0, 16, ssse3); 00377 VP8_BILINEAR_MC_FUNC(1, 8, ssse3); 00378 VP8_BILINEAR_MC_FUNC(2, 4, ssse3); 00379 00380 c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_ssse3; 00381 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_ssse3; 00382 00383 c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_ssse3; 00384 c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_ssse3; 00385 c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_ssse3; 00386 c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_ssse3; 00387 00388 c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_ssse3; 00389 c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_ssse3; 00390 c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_ssse3; 00391 c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_ssse3; 00392 } 00393 00394 if (mm_flags & AV_CPU_FLAG_SSE4) { 00395 c->vp8_idct_dc_add = ff_vp8_idct_dc_add_sse4; 00396 00397 c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse4; 00398 c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_sse4; 00399 c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_sse4; 00400 } 00401 #endif 00402 }