• Main Page
  • Related Pages
  • Modules
  • Data Structures
  • Files
  • File List
  • Globals

libavcodec/ppc/dsputil_ppc.c

Go to the documentation of this file.
00001 /*
00002  * Copyright (c) 2002 Brian Foley
00003  * Copyright (c) 2002 Dieter Shirley
00004  * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
00005  *
00006  * This file is part of FFmpeg.
00007  *
00008  * FFmpeg is free software; you can redistribute it and/or
00009  * modify it under the terms of the GNU Lesser General Public
00010  * License as published by the Free Software Foundation; either
00011  * version 2.1 of the License, or (at your option) any later version.
00012  *
00013  * FFmpeg is distributed in the hope that it will be useful,
00014  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00015  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00016  * Lesser General Public License for more details.
00017  *
00018  * You should have received a copy of the GNU Lesser General Public
00019  * License along with FFmpeg; if not, write to the Free Software
00020  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00021  */
00022 
00023 #include "libavcodec/dsputil.h"
00024 
00025 #include "dsputil_ppc.h"
00026 
00027 #include "dsputil_altivec.h"
00028 
00029 void fdct_altivec(int16_t *block);
00030 void gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h,
00031                   int x16, int y16, int rounder);
00032 void idct_put_altivec(uint8_t *dest, int line_size, int16_t *block);
00033 void idct_add_altivec(uint8_t *dest, int line_size, int16_t *block);
00034 
00035 void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx);
00036 
00037 void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx);
00038 void vc1dsp_init_altivec(DSPContext* c, AVCodecContext *avctx);
00039 void snow_init_altivec(DSPContext* c, AVCodecContext *avctx);
00040 void float_init_altivec(DSPContext* c, AVCodecContext *avctx);
00041 void int_init_altivec(DSPContext* c, AVCodecContext *avctx);
00042 
00043 int mm_flags = 0;
00044 
00045 int mm_support(void)
00046 {
00047     int result = 0;
00048 #if HAVE_ALTIVEC
00049     if (has_altivec()) {
00050         result |= FF_MM_ALTIVEC;
00051     }
00052 #endif /* result */
00053     return result;
00054 }
00055 
00056 #if CONFIG_POWERPC_PERF
00057 unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][powerpc_data_total];
00058 /* list below must match enum in dsputil_ppc.h */
00059 static unsigned char* perfname[] = {
00060     "ff_fft_calc_altivec",
00061     "gmc1_altivec",
00062     "dct_unquantize_h263_altivec",
00063     "fdct_altivec",
00064     "idct_add_altivec",
00065     "idct_put_altivec",
00066     "put_pixels16_altivec",
00067     "avg_pixels16_altivec",
00068     "avg_pixels8_altivec",
00069     "put_pixels8_xy2_altivec",
00070     "put_no_rnd_pixels8_xy2_altivec",
00071     "put_pixels16_xy2_altivec",
00072     "put_no_rnd_pixels16_xy2_altivec",
00073     "hadamard8_diff8x8_altivec",
00074     "hadamard8_diff16_altivec",
00075     "avg_pixels8_xy2_altivec",
00076     "clear_blocks_dcbz32_ppc",
00077     "clear_blocks_dcbz128_ppc",
00078     "put_h264_chroma_mc8_altivec",
00079     "avg_h264_chroma_mc8_altivec",
00080     "put_h264_qpel16_h_lowpass_altivec",
00081     "avg_h264_qpel16_h_lowpass_altivec",
00082     "put_h264_qpel16_v_lowpass_altivec",
00083     "avg_h264_qpel16_v_lowpass_altivec",
00084     "put_h264_qpel16_hv_lowpass_altivec",
00085     "avg_h264_qpel16_hv_lowpass_altivec",
00086     ""
00087 };
00088 #include <stdio.h>
00089 #endif
00090 
00091 #if CONFIG_POWERPC_PERF
00092 void powerpc_display_perf_report(void)
00093 {
00094     int i, j;
00095     av_log(NULL, AV_LOG_INFO, "PowerPC performance report\n Values are from the PMC registers, and represent whatever the registers are set to record.\n");
00096     for(i = 0 ; i < powerpc_perf_total ; i++) {
00097         for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++) {
00098             if (perfdata[j][i][powerpc_data_num] != (unsigned long long)0)
00099                 av_log(NULL, AV_LOG_INFO,
00100                        " Function \"%s\" (pmc%d):\n\tmin: %"PRIu64"\n\tmax: %"PRIu64"\n\tavg: %1.2lf (%"PRIu64")\n",
00101                        perfname[i],
00102                        j+1,
00103                        perfdata[j][i][powerpc_data_min],
00104                        perfdata[j][i][powerpc_data_max],
00105                        (double)perfdata[j][i][powerpc_data_sum] /
00106                        (double)perfdata[j][i][powerpc_data_num],
00107                        perfdata[j][i][powerpc_data_num]);
00108         }
00109     }
00110 }
00111 #endif /* CONFIG_POWERPC_PERF */
00112 
00113 /* ***** WARNING ***** WARNING ***** WARNING ***** */
00114 /*
00115 clear_blocks_dcbz32_ppc will not work properly on PowerPC processors with a
00116 cache line size not equal to 32 bytes.
00117 Fortunately all processor used by Apple up to at least the 7450 (aka second
00118 generation G4) use 32 bytes cache line.
00119 This is due to the use of the 'dcbz' instruction. It simply clear to zero a
00120 single cache line, so you need to know the cache line size to use it !
00121 It's absurd, but it's fast...
00122 
00123 update 24/06/2003 : Apple released yesterday the G5, with a PPC970. cache line
00124 size: 128 bytes. Oups.
00125 The semantic of dcbz was changed, it always clear 32 bytes. so the function
00126 below will work, but will be slow. So I fixed check_dcbz_effect to use dcbzl,
00127 which is defined to clear a cache line (as dcbz before). So we still can
00128 distinguish, and use dcbz (32 bytes) or dcbzl (one cache line) as required.
00129 
00130 see <http://developer.apple.com/technotes/tn/tn2087.html>
00131 and <http://developer.apple.com/technotes/tn/tn2086.html>
00132 */
00133 void clear_blocks_dcbz32_ppc(DCTELEM *blocks)
00134 {
00135 POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz32, 1);
00136     register int misal = ((unsigned long)blocks & 0x00000010);
00137     register int i = 0;
00138 POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz32, 1);
00139 #if 1
00140     if (misal) {
00141         ((unsigned long*)blocks)[0] = 0L;
00142         ((unsigned long*)blocks)[1] = 0L;
00143         ((unsigned long*)blocks)[2] = 0L;
00144         ((unsigned long*)blocks)[3] = 0L;
00145         i += 16;
00146     }
00147     for ( ; i < sizeof(DCTELEM)*6*64-31 ; i += 32) {
00148         __asm__ volatile("dcbz %0,%1" : : "b" (blocks), "r" (i) : "memory");
00149     }
00150     if (misal) {
00151         ((unsigned long*)blocks)[188] = 0L;
00152         ((unsigned long*)blocks)[189] = 0L;
00153         ((unsigned long*)blocks)[190] = 0L;
00154         ((unsigned long*)blocks)[191] = 0L;
00155         i += 16;
00156     }
00157 #else
00158     memset(blocks, 0, sizeof(DCTELEM)*6*64);
00159 #endif
00160 POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz32, 1);
00161 }
00162 
00163 /* same as above, when dcbzl clear a whole 128B cache line
00164    i.e. the PPC970 aka G5 */
00165 #if HAVE_DCBZL
00166 void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
00167 {
00168 POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz128, 1);
00169     register int misal = ((unsigned long)blocks & 0x0000007f);
00170     register int i = 0;
00171 POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz128, 1);
00172 #if 1
00173     if (misal) {
00174         // we could probably also optimize this case,
00175         // but there's not much point as the machines
00176         // aren't available yet (2003-06-26)
00177         memset(blocks, 0, sizeof(DCTELEM)*6*64);
00178     }
00179     else
00180         for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) {
00181             __asm__ volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory");
00182         }
00183 #else
00184     memset(blocks, 0, sizeof(DCTELEM)*6*64);
00185 #endif
00186 POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz128, 1);
00187 }
00188 #else
00189 void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
00190 {
00191     memset(blocks, 0, sizeof(DCTELEM)*6*64);
00192 }
00193 #endif
00194 
00195 #if HAVE_DCBZL
00196 /* check dcbz report how many bytes are set to 0 by dcbz */
00197 /* update 24/06/2003 : replace dcbz by dcbzl to get
00198    the intended effect (Apple "fixed" dcbz)
00199    unfortunately this cannot be used unless the assembler
00200    knows about dcbzl ... */
00201 long check_dcbzl_effect(void)
00202 {
00203     register char *fakedata = av_malloc(1024);
00204     register char *fakedata_middle;
00205     register long zero = 0;
00206     register long i = 0;
00207     long count = 0;
00208 
00209     if (!fakedata) {
00210         return 0L;
00211     }
00212 
00213     fakedata_middle = (fakedata + 512);
00214 
00215     memset(fakedata, 0xFF, 1024);
00216 
00217     /* below the constraint "b" seems to mean "Address base register"
00218        in gcc-3.3 / RS/6000 speaks. seems to avoid using r0, so.... */
00219     __asm__ volatile("dcbzl %0, %1" : : "b" (fakedata_middle), "r" (zero));
00220 
00221     for (i = 0; i < 1024 ; i ++) {
00222         if (fakedata[i] == (char)0)
00223             count++;
00224     }
00225 
00226     av_free(fakedata);
00227 
00228     return count;
00229 }
00230 #else
00231 long check_dcbzl_effect(void)
00232 {
00233   return 0;
00234 }
00235 #endif
00236 
00237 static void prefetch_ppc(void *mem, int stride, int h)
00238 {
00239     register const uint8_t *p = mem;
00240     do {
00241         __asm__ volatile ("dcbt 0,%0" : : "r" (p));
00242         p+= stride;
00243     } while(--h);
00244 }
00245 
00246 void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
00247 {
00248     // Common optimizations whether AltiVec is available or not
00249     c->prefetch = prefetch_ppc;
00250     switch (check_dcbzl_effect()) {
00251         case 32:
00252             c->clear_blocks = clear_blocks_dcbz32_ppc;
00253             break;
00254         case 128:
00255             c->clear_blocks = clear_blocks_dcbz128_ppc;
00256             break;
00257         default:
00258             break;
00259     }
00260 
00261 #if HAVE_ALTIVEC
00262     if(CONFIG_H264_DECODER) dsputil_h264_init_ppc(c, avctx);
00263 
00264     if (has_altivec()) {
00265         mm_flags |= FF_MM_ALTIVEC;
00266 
00267         dsputil_init_altivec(c, avctx);
00268         if(CONFIG_SNOW_DECODER) snow_init_altivec(c, avctx);
00269         if(CONFIG_VC1_DECODER || CONFIG_WMV3_DECODER)
00270             vc1dsp_init_altivec(c, avctx);
00271         float_init_altivec(c, avctx);
00272         int_init_altivec(c, avctx);
00273         c->gmc1 = gmc1_altivec;
00274 
00275 #if CONFIG_ENCODERS
00276         if (avctx->dct_algo == FF_DCT_AUTO ||
00277             avctx->dct_algo == FF_DCT_ALTIVEC) {
00278             c->fdct = fdct_altivec;
00279         }
00280 #endif //CONFIG_ENCODERS
00281 
00282         if (avctx->lowres==0) {
00283             if ((avctx->idct_algo == FF_IDCT_AUTO) ||
00284                 (avctx->idct_algo == FF_IDCT_ALTIVEC)) {
00285                 c->idct_put = idct_put_altivec;
00286                 c->idct_add = idct_add_altivec;
00287                 c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
00288             }
00289         }
00290 
00291 #if CONFIG_POWERPC_PERF
00292         {
00293             int i, j;
00294             for (i = 0 ; i < powerpc_perf_total ; i++) {
00295                 for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++) {
00296                     perfdata[j][i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFFULL;
00297                     perfdata[j][i][powerpc_data_max] = 0x0000000000000000ULL;
00298                     perfdata[j][i][powerpc_data_sum] = 0x0000000000000000ULL;
00299                     perfdata[j][i][powerpc_data_num] = 0x0000000000000000ULL;
00300                 }
00301             }
00302         }
00303 #endif /* CONFIG_POWERPC_PERF */
00304     }
00305 #endif /* HAVE_ALTIVEC */
00306 }

Generated on Tue Nov 4 2014 12:59:22 for ffmpeg by  doxygen 1.7.1