• Main Page
  • Related Pages
  • Modules
  • Data Structures
  • Files
  • File List
  • Globals

libavcodec/arm/dsputil_neon.c

Go to the documentation of this file.
00001 /*
00002  * ARM NEON optimised DSP functions
00003  * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
00004  *
00005  * This file is part of FFmpeg.
00006  *
00007  * FFmpeg is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public
00009  * License as published by the Free Software Foundation; either
00010  * version 2.1 of the License, or (at your option) any later version.
00011  *
00012  * FFmpeg is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with FFmpeg; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00020  */
00021 
00022 #include <stdint.h>
00023 
00024 #include "libavcodec/avcodec.h"
00025 #include "libavcodec/dsputil.h"
00026 
00027 void ff_put_pixels16_neon(uint8_t *, const uint8_t *, int, int);
00028 void ff_put_pixels16_x2_neon(uint8_t *, const uint8_t *, int, int);
00029 void ff_put_pixels16_y2_neon(uint8_t *, const uint8_t *, int, int);
00030 void ff_put_pixels16_xy2_neon(uint8_t *, const uint8_t *, int, int);
00031 void ff_put_pixels8_neon(uint8_t *, const uint8_t *, int, int);
00032 void ff_put_pixels8_x2_neon(uint8_t *, const uint8_t *, int, int);
00033 void ff_put_pixels8_y2_neon(uint8_t *, const uint8_t *, int, int);
00034 void ff_put_pixels8_xy2_neon(uint8_t *, const uint8_t *, int, int);
00035 void ff_put_pixels16_x2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
00036 void ff_put_pixels16_y2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
00037 void ff_put_pixels16_xy2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
00038 void ff_put_pixels8_x2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
00039 void ff_put_pixels8_y2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
00040 void ff_put_pixels8_xy2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
00041 
00042 void ff_avg_pixels16_neon(uint8_t *, const uint8_t *, int, int);
00043 
00044 void ff_add_pixels_clamped_neon(const DCTELEM *, uint8_t *, int);
00045 void ff_put_pixels_clamped_neon(const DCTELEM *, uint8_t *, int);
00046 void ff_put_signed_pixels_clamped_neon(const DCTELEM *, uint8_t *, int);
00047 
00048 void ff_put_h264_qpel16_mc00_neon(uint8_t *, uint8_t *, int);
00049 void ff_put_h264_qpel16_mc10_neon(uint8_t *, uint8_t *, int);
00050 void ff_put_h264_qpel16_mc20_neon(uint8_t *, uint8_t *, int);
00051 void ff_put_h264_qpel16_mc30_neon(uint8_t *, uint8_t *, int);
00052 void ff_put_h264_qpel16_mc01_neon(uint8_t *, uint8_t *, int);
00053 void ff_put_h264_qpel16_mc11_neon(uint8_t *, uint8_t *, int);
00054 void ff_put_h264_qpel16_mc21_neon(uint8_t *, uint8_t *, int);
00055 void ff_put_h264_qpel16_mc31_neon(uint8_t *, uint8_t *, int);
00056 void ff_put_h264_qpel16_mc02_neon(uint8_t *, uint8_t *, int);
00057 void ff_put_h264_qpel16_mc12_neon(uint8_t *, uint8_t *, int);
00058 void ff_put_h264_qpel16_mc22_neon(uint8_t *, uint8_t *, int);
00059 void ff_put_h264_qpel16_mc32_neon(uint8_t *, uint8_t *, int);
00060 void ff_put_h264_qpel16_mc03_neon(uint8_t *, uint8_t *, int);
00061 void ff_put_h264_qpel16_mc13_neon(uint8_t *, uint8_t *, int);
00062 void ff_put_h264_qpel16_mc23_neon(uint8_t *, uint8_t *, int);
00063 void ff_put_h264_qpel16_mc33_neon(uint8_t *, uint8_t *, int);
00064 
00065 void ff_put_h264_qpel8_mc00_neon(uint8_t *, uint8_t *, int);
00066 void ff_put_h264_qpel8_mc10_neon(uint8_t *, uint8_t *, int);
00067 void ff_put_h264_qpel8_mc20_neon(uint8_t *, uint8_t *, int);
00068 void ff_put_h264_qpel8_mc30_neon(uint8_t *, uint8_t *, int);
00069 void ff_put_h264_qpel8_mc01_neon(uint8_t *, uint8_t *, int);
00070 void ff_put_h264_qpel8_mc11_neon(uint8_t *, uint8_t *, int);
00071 void ff_put_h264_qpel8_mc21_neon(uint8_t *, uint8_t *, int);
00072 void ff_put_h264_qpel8_mc31_neon(uint8_t *, uint8_t *, int);
00073 void ff_put_h264_qpel8_mc02_neon(uint8_t *, uint8_t *, int);
00074 void ff_put_h264_qpel8_mc12_neon(uint8_t *, uint8_t *, int);
00075 void ff_put_h264_qpel8_mc22_neon(uint8_t *, uint8_t *, int);
00076 void ff_put_h264_qpel8_mc32_neon(uint8_t *, uint8_t *, int);
00077 void ff_put_h264_qpel8_mc03_neon(uint8_t *, uint8_t *, int);
00078 void ff_put_h264_qpel8_mc13_neon(uint8_t *, uint8_t *, int);
00079 void ff_put_h264_qpel8_mc23_neon(uint8_t *, uint8_t *, int);
00080 void ff_put_h264_qpel8_mc33_neon(uint8_t *, uint8_t *, int);
00081 
00082 void ff_avg_h264_qpel16_mc00_neon(uint8_t *, uint8_t *, int);
00083 
00084 void ff_put_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
00085 void ff_put_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
00086 
00087 void ff_avg_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
00088 void ff_avg_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
00089 
00090 void ff_h264_v_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha,
00091                                      int beta, int8_t *tc0);
00092 void ff_h264_h_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha,
00093                                      int beta, int8_t *tc0);
00094 void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha,
00095                                        int beta, int8_t *tc0);
00096 void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha,
00097                                        int beta, int8_t *tc0);
00098 
00099 void ff_weight_h264_pixels_16x16_neon(uint8_t *ds, int stride, int log2_den,
00100                                       int weight, int offset);
00101 void ff_weight_h264_pixels_16x8_neon(uint8_t *ds, int stride, int log2_den,
00102                                      int weight, int offset);
00103 void ff_weight_h264_pixels_8x16_neon(uint8_t *ds, int stride, int log2_den,
00104                                      int weight, int offset);
00105 void ff_weight_h264_pixels_8x8_neon(uint8_t *ds, int stride, int log2_den,
00106                                     int weight, int offset);
00107 void ff_weight_h264_pixels_8x4_neon(uint8_t *ds, int stride, int log2_den,
00108                                     int weight, int offset);
00109 void ff_weight_h264_pixels_4x8_neon(uint8_t *ds, int stride, int log2_den,
00110                                     int weight, int offset);
00111 void ff_weight_h264_pixels_4x4_neon(uint8_t *ds, int stride, int log2_den,
00112                                     int weight, int offset);
00113 void ff_weight_h264_pixels_4x2_neon(uint8_t *ds, int stride, int log2_den,
00114                                     int weight, int offset);
00115 
00116 void ff_biweight_h264_pixels_16x16_neon(uint8_t *dst, uint8_t *src, int stride,
00117                                         int log2_den, int weightd, int weights,
00118                                         int offset);
00119 void ff_biweight_h264_pixels_16x8_neon(uint8_t *dst, uint8_t *src, int stride,
00120                                        int log2_den, int weightd, int weights,
00121                                        int offset);
00122 void ff_biweight_h264_pixels_8x16_neon(uint8_t *dst, uint8_t *src, int stride,
00123                                        int log2_den, int weightd, int weights,
00124                                        int offset);
00125 void ff_biweight_h264_pixels_8x8_neon(uint8_t *dst, uint8_t *src, int stride,
00126                                       int log2_den, int weightd, int weights,
00127                                       int offset);
00128 void ff_biweight_h264_pixels_8x4_neon(uint8_t *dst, uint8_t *src, int stride,
00129                                       int log2_den, int weightd, int weights,
00130                                       int offset);
00131 void ff_biweight_h264_pixels_4x8_neon(uint8_t *dst, uint8_t *src, int stride,
00132                                       int log2_den, int weightd, int weights,
00133                                       int offset);
00134 void ff_biweight_h264_pixels_4x4_neon(uint8_t *dst, uint8_t *src, int stride,
00135                                       int log2_den, int weightd, int weights,
00136                                       int offset);
00137 void ff_biweight_h264_pixels_4x2_neon(uint8_t *dst, uint8_t *src, int stride,
00138                                       int log2_den, int weightd, int weights,
00139                                       int offset);
00140 
00141 void ff_h264_idct_add_neon(uint8_t *dst, DCTELEM *block, int stride);
00142 void ff_h264_idct_dc_add_neon(uint8_t *dst, DCTELEM *block, int stride);
00143 void ff_h264_idct_add16_neon(uint8_t *dst, const int *block_offset,
00144                              DCTELEM *block, int stride,
00145                              const uint8_t nnzc[6*8]);
00146 void ff_h264_idct_add16intra_neon(uint8_t *dst, const int *block_offset,
00147                                   DCTELEM *block, int stride,
00148                                   const uint8_t nnzc[6*8]);
00149 void ff_h264_idct_add8_neon(uint8_t **dest, const int *block_offset,
00150                             DCTELEM *block, int stride,
00151                             const uint8_t nnzc[6*8]);
00152 
00153 void ff_vp3_v_loop_filter_neon(uint8_t *, int, int *);
00154 void ff_vp3_h_loop_filter_neon(uint8_t *, int, int *);
00155 
00156 void ff_vector_fmul_neon(float *dst, const float *src, int len);
00157 void ff_vector_fmul_window_neon(float *dst, const float *src0,
00158                                 const float *src1, const float *win,
00159                                 float add_bias, int len);
00160 
00161 void ff_float_to_int16_neon(int16_t *, const float *, long);
00162 void ff_float_to_int16_interleave_neon(int16_t *, const float **, long, int);
00163 
00164 void ff_vorbis_inverse_coupling_neon(float *mag, float *ang, int blocksize);
00165 
00166 void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
00167 {
00168     c->put_pixels_tab[0][0] = ff_put_pixels16_neon;
00169     c->put_pixels_tab[0][1] = ff_put_pixels16_x2_neon;
00170     c->put_pixels_tab[0][2] = ff_put_pixels16_y2_neon;
00171     c->put_pixels_tab[0][3] = ff_put_pixels16_xy2_neon;
00172     c->put_pixels_tab[1][0] = ff_put_pixels8_neon;
00173     c->put_pixels_tab[1][1] = ff_put_pixels8_x2_neon;
00174     c->put_pixels_tab[1][2] = ff_put_pixels8_y2_neon;
00175     c->put_pixels_tab[1][3] = ff_put_pixels8_xy2_neon;
00176 
00177     c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_neon;
00178     c->put_no_rnd_pixels_tab[0][1] = ff_put_pixels16_x2_no_rnd_neon;
00179     c->put_no_rnd_pixels_tab[0][2] = ff_put_pixels16_y2_no_rnd_neon;
00180     c->put_no_rnd_pixels_tab[0][3] = ff_put_pixels16_xy2_no_rnd_neon;
00181     c->put_no_rnd_pixels_tab[1][0] = ff_put_pixels8_neon;
00182     c->put_no_rnd_pixels_tab[1][1] = ff_put_pixels8_x2_no_rnd_neon;
00183     c->put_no_rnd_pixels_tab[1][2] = ff_put_pixels8_y2_no_rnd_neon;
00184     c->put_no_rnd_pixels_tab[1][3] = ff_put_pixels8_xy2_no_rnd_neon;
00185 
00186     c->avg_pixels_tab[0][0] = ff_avg_pixels16_neon;
00187 
00188     c->add_pixels_clamped = ff_add_pixels_clamped_neon;
00189     c->put_pixels_clamped = ff_put_pixels_clamped_neon;
00190     c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_neon;
00191 
00192     c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_neon;
00193     c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_neon;
00194 
00195     c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_neon;
00196     c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_neon;
00197 
00198     c->put_h264_qpel_pixels_tab[0][ 0] = ff_put_h264_qpel16_mc00_neon;
00199     c->put_h264_qpel_pixels_tab[0][ 1] = ff_put_h264_qpel16_mc10_neon;
00200     c->put_h264_qpel_pixels_tab[0][ 2] = ff_put_h264_qpel16_mc20_neon;
00201     c->put_h264_qpel_pixels_tab[0][ 3] = ff_put_h264_qpel16_mc30_neon;
00202     c->put_h264_qpel_pixels_tab[0][ 4] = ff_put_h264_qpel16_mc01_neon;
00203     c->put_h264_qpel_pixels_tab[0][ 5] = ff_put_h264_qpel16_mc11_neon;
00204     c->put_h264_qpel_pixels_tab[0][ 6] = ff_put_h264_qpel16_mc21_neon;
00205     c->put_h264_qpel_pixels_tab[0][ 7] = ff_put_h264_qpel16_mc31_neon;
00206     c->put_h264_qpel_pixels_tab[0][ 8] = ff_put_h264_qpel16_mc02_neon;
00207     c->put_h264_qpel_pixels_tab[0][ 9] = ff_put_h264_qpel16_mc12_neon;
00208     c->put_h264_qpel_pixels_tab[0][10] = ff_put_h264_qpel16_mc22_neon;
00209     c->put_h264_qpel_pixels_tab[0][11] = ff_put_h264_qpel16_mc32_neon;
00210     c->put_h264_qpel_pixels_tab[0][12] = ff_put_h264_qpel16_mc03_neon;
00211     c->put_h264_qpel_pixels_tab[0][13] = ff_put_h264_qpel16_mc13_neon;
00212     c->put_h264_qpel_pixels_tab[0][14] = ff_put_h264_qpel16_mc23_neon;
00213     c->put_h264_qpel_pixels_tab[0][15] = ff_put_h264_qpel16_mc33_neon;
00214 
00215     c->put_h264_qpel_pixels_tab[1][ 0] = ff_put_h264_qpel8_mc00_neon;
00216     c->put_h264_qpel_pixels_tab[1][ 1] = ff_put_h264_qpel8_mc10_neon;
00217     c->put_h264_qpel_pixels_tab[1][ 2] = ff_put_h264_qpel8_mc20_neon;
00218     c->put_h264_qpel_pixels_tab[1][ 3] = ff_put_h264_qpel8_mc30_neon;
00219     c->put_h264_qpel_pixels_tab[1][ 4] = ff_put_h264_qpel8_mc01_neon;
00220     c->put_h264_qpel_pixels_tab[1][ 5] = ff_put_h264_qpel8_mc11_neon;
00221     c->put_h264_qpel_pixels_tab[1][ 6] = ff_put_h264_qpel8_mc21_neon;
00222     c->put_h264_qpel_pixels_tab[1][ 7] = ff_put_h264_qpel8_mc31_neon;
00223     c->put_h264_qpel_pixels_tab[1][ 8] = ff_put_h264_qpel8_mc02_neon;
00224     c->put_h264_qpel_pixels_tab[1][ 9] = ff_put_h264_qpel8_mc12_neon;
00225     c->put_h264_qpel_pixels_tab[1][10] = ff_put_h264_qpel8_mc22_neon;
00226     c->put_h264_qpel_pixels_tab[1][11] = ff_put_h264_qpel8_mc32_neon;
00227     c->put_h264_qpel_pixels_tab[1][12] = ff_put_h264_qpel8_mc03_neon;
00228     c->put_h264_qpel_pixels_tab[1][13] = ff_put_h264_qpel8_mc13_neon;
00229     c->put_h264_qpel_pixels_tab[1][14] = ff_put_h264_qpel8_mc23_neon;
00230     c->put_h264_qpel_pixels_tab[1][15] = ff_put_h264_qpel8_mc33_neon;
00231 
00232     c->avg_h264_qpel_pixels_tab[0][ 0] = ff_avg_h264_qpel16_mc00_neon;
00233 
00234     c->h264_v_loop_filter_luma = ff_h264_v_loop_filter_luma_neon;
00235     c->h264_h_loop_filter_luma = ff_h264_h_loop_filter_luma_neon;
00236     c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon;
00237     c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon;
00238 
00239     c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels_16x16_neon;
00240     c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels_16x8_neon;
00241     c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels_8x16_neon;
00242     c->weight_h264_pixels_tab[3] = ff_weight_h264_pixels_8x8_neon;
00243     c->weight_h264_pixels_tab[4] = ff_weight_h264_pixels_8x4_neon;
00244     c->weight_h264_pixels_tab[5] = ff_weight_h264_pixels_4x8_neon;
00245     c->weight_h264_pixels_tab[6] = ff_weight_h264_pixels_4x4_neon;
00246     c->weight_h264_pixels_tab[7] = ff_weight_h264_pixels_4x2_neon;
00247 
00248     c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels_16x16_neon;
00249     c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels_16x8_neon;
00250     c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels_8x16_neon;
00251     c->biweight_h264_pixels_tab[3] = ff_biweight_h264_pixels_8x8_neon;
00252     c->biweight_h264_pixels_tab[4] = ff_biweight_h264_pixels_8x4_neon;
00253     c->biweight_h264_pixels_tab[5] = ff_biweight_h264_pixels_4x8_neon;
00254     c->biweight_h264_pixels_tab[6] = ff_biweight_h264_pixels_4x4_neon;
00255     c->biweight_h264_pixels_tab[7] = ff_biweight_h264_pixels_4x2_neon;
00256 
00257     c->h264_idct_add = ff_h264_idct_add_neon;
00258     c->h264_idct_dc_add = ff_h264_idct_dc_add_neon;
00259     c->h264_idct_add16      = ff_h264_idct_add16_neon;
00260     c->h264_idct_add16intra = ff_h264_idct_add16intra_neon;
00261     c->h264_idct_add8       = ff_h264_idct_add8_neon;
00262 
00263     if (CONFIG_VP3_DECODER || CONFIG_THEORA_DECODER) {
00264         c->vp3_v_loop_filter = ff_vp3_v_loop_filter_neon;
00265         c->vp3_h_loop_filter = ff_vp3_h_loop_filter_neon;
00266     }
00267 
00268     c->vector_fmul = ff_vector_fmul_neon;
00269     c->vector_fmul_window = ff_vector_fmul_window_neon;
00270 
00271     if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
00272         c->float_to_int16 = ff_float_to_int16_neon;
00273         c->float_to_int16_interleave = ff_float_to_int16_interleave_neon;
00274     }
00275 
00276     if (CONFIG_VORBIS_DECODER)
00277         c->vorbis_inverse_coupling = ff_vorbis_inverse_coupling_neon;
00278 }

Generated on Tue Nov 4 2014 12:59:21 for ffmpeg by  doxygen 1.7.1