Libav 0.7.1
|
00001 /* 00002 * BlackFin MPEGVIDEO OPTIMIZATIONS 00003 * 00004 * Copyright (C) 2007 Marc Hoffman <mmh@pleasantst.com> 00005 * 00006 * This file is part of Libav. 00007 * 00008 * Libav is free software; you can redistribute it and/or 00009 * modify it under the terms of the GNU Lesser General Public 00010 * License as published by the Free Software Foundation; either 00011 * version 2.1 of the License, or (at your option) any later version. 00012 * 00013 * Libav is distributed in the hope that it will be useful, 00014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00016 * Lesser General Public License for more details. 00017 * 00018 * You should have received a copy of the GNU Lesser General Public 00019 * License along with Libav; if not, write to the Free Software 00020 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00021 */ 00022 00023 #include "libavcodec/avcodec.h" 00024 #include "libavcodec/dsputil.h" 00025 #include "libavcodec/mpegvideo.h" 00026 #include "dsputil_bfin.h" 00027 00028 static int dct_quantize_bfin (MpegEncContext *s, 00029 DCTELEM *block, int n, 00030 int qscale, int *overflow) 00031 { 00032 int last_non_zero, q, start_i; 00033 const short *qmat; 00034 short *bias; 00035 const uint8_t *scantable= s->intra_scantable.scantable; 00036 short dc; 00037 int max=0; 00038 00039 PROF("fdct",0); 00040 s->dsp.fdct(block); 00041 EPROF(); 00042 00043 PROF("denoise",1); 00044 if(s->dct_error_sum) 00045 s->denoise_dct(s, block); 00046 EPROF(); 00047 00048 PROF("quant-init",2); 00049 if (s->mb_intra) { 00050 if (!s->h263_aic) { 00051 if (n < 4) 00052 q = s->y_dc_scale; 00053 else 00054 q = s->c_dc_scale; 00055 q = q << 3; 00056 } else 00057 /* For AIC we skip quant/dequant of INTRADC */ 00058 q = 1 << 3; 00059 00060 /* note: block[0] is assumed to be positive */ 00061 dc = block[0] = (block[0] + (q >> 1)) / q; 00062 start_i = 1; 00063 last_non_zero = 0; 00064 bias = s->q_intra_matrix16[qscale][1]; 00065 qmat = s->q_intra_matrix16[qscale][0]; 00066 00067 } else { 00068 start_i = 0; 00069 last_non_zero = -1; 00070 bias = s->q_inter_matrix16[qscale][1]; 00071 qmat = s->q_inter_matrix16[qscale][0]; 00072 00073 } 00074 EPROF(); 00075 00076 PROF("quantize",4); 00077 00078 /* for(i=start_i; i<64; i++) { */ 00079 /* sign = (block[i]>>15)|1; */ 00080 /* level = ((abs(block[i])+bias[0])*qmat[i])>>16; */ 00081 /* if (level < 0) level = 0; */ 00082 /* max |= level; */ 00083 /* level = level * sign; */ 00084 /* block[i] = level; */ 00085 /* } */ 00086 00087 __asm__ volatile 00088 ("i2=%1;\n\t" 00089 "r1=[%1++]; \n\t" 00090 "r0=r1>>>15 (v); \n\t" 00091 "lsetup (0f,1f) lc0=%3; \n\t" 00092 "0: r0=r0|%4; \n\t" 00093 " r1=abs r1 (v) || r2=[%2++];\n\t" 00094 " r1=r1+|+%5; \n\t" 00095 " r1=max(r1,%6) (v); \n\t" 00096 " r1.h=(a1 =r1.h*r2.h), r1.l=(a0 =r1.l*r2.l) (tfu); \n\t" 00097 " %0=%0|r1; \n\t" 00098 " r0.h=(a1 =r1.h*r0.h), r0.l=(a0 =r1.l*r0.l) (is) || r1=[%1++];\n\t" 00099 "1: r0=r1>>>15 (v) || [i2++]=r0;\n\t" 00100 "r1=%0>>16; \n\t" 00101 "%0=%0|r1; \n\t" 00102 "%0.h=0; \n\t" 00103 : "=&d" (max) 00104 : "b" (block), "b" (qmat), "a" (32), "d" (0x00010001), "d" (bias[0]*0x10001), "d" (0) 00105 : "R0","R1","R2", "I2"); 00106 if (start_i == 1) block[0] = dc; 00107 00108 EPROF(); 00109 00110 00111 PROF("zzscan",5); 00112 00113 __asm__ volatile 00114 ("r0=b[%1--] (x); \n\t" 00115 "lsetup (0f,1f) lc0=%3; \n\t" /* for(i=63; i>=start_i; i--) { */ 00116 "0: p0=r0; \n\t" /* j = scantable[i]; */ 00117 " p0=%2+(p0<<1); \n\t" /* if (block[j]) { */ 00118 " r0=w[p0]; \n\t" /* last_non_zero = i; */ 00119 " cc=r0==0; \n\t" /* break; */ 00120 " if !cc jump 2f; \n\t" /* } */ 00121 "1: r0=b[%1--] (x); \n\t" /* } */ 00122 " %0=%4; \n\t" 00123 " jump 3f; \n\t" 00124 "2: %0=lc0; \n\t" 00125 "3:\n\t" 00126 00127 : "=d" (last_non_zero) 00128 : "a" (scantable+63), "a" (block), "a" (63), "d" (last_non_zero) 00129 : "P0","R0"); 00130 00131 EPROF(); 00132 00133 *overflow= s->max_qcoeff < max; //overflow might have happened 00134 00135 bfprof(); 00136 00137 /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */ 00138 if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM) 00139 ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero); 00140 00141 return last_non_zero; 00142 } 00143 00144 void MPV_common_init_bfin (MpegEncContext *s) 00145 { 00146 /* s->dct_quantize= dct_quantize_bfin; */ 00147 } 00148