Libav 0.7.1
|
00001 /* 00002 * DSP utils 00003 * Copyright (c) 2000, 2001 Fabrice Bellard 00004 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> 00005 * 00006 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at> 00007 * 00008 * This file is part of Libav. 00009 * 00010 * Libav is free software; you can redistribute it and/or 00011 * modify it under the terms of the GNU Lesser General Public 00012 * License as published by the Free Software Foundation; either 00013 * version 2.1 of the License, or (at your option) any later version. 00014 * 00015 * Libav is distributed in the hope that it will be useful, 00016 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00017 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 * Lesser General Public License for more details. 00019 * 00020 * You should have received a copy of the GNU Lesser General Public 00021 * License along with Libav; if not, write to the Free Software 00022 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00023 */ 00024 00030 #include "libavutil/imgutils.h" 00031 #include "avcodec.h" 00032 #include "dsputil.h" 00033 #include "simple_idct.h" 00034 #include "faandct.h" 00035 #include "faanidct.h" 00036 #include "mathops.h" 00037 #include "mpegvideo.h" 00038 #include "config.h" 00039 #include "ac3dec.h" 00040 #include "vorbis.h" 00041 #include "png.h" 00042 00043 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, }; 00044 uint32_t ff_squareTbl[512] = {0, }; 00045 00046 #define BIT_DEPTH 9 00047 #include "dsputil_template.c" 00048 #undef BIT_DEPTH 00049 00050 #define BIT_DEPTH 10 00051 #include "dsputil_template.c" 00052 #undef BIT_DEPTH 00053 00054 #define BIT_DEPTH 8 00055 #include "dsputil_template.c" 00056 00057 // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size 00058 #define pb_7f (~0UL/255 * 0x7f) 00059 #define pb_80 (~0UL/255 * 0x80) 00060 00061 const uint8_t ff_zigzag_direct[64] = { 00062 0, 1, 8, 16, 9, 2, 3, 10, 00063 17, 24, 32, 25, 18, 11, 4, 5, 00064 12, 19, 26, 33, 40, 48, 41, 34, 00065 27, 20, 13, 6, 7, 14, 21, 28, 00066 35, 42, 49, 56, 57, 50, 43, 36, 00067 29, 22, 15, 23, 30, 37, 44, 51, 00068 58, 59, 52, 45, 38, 31, 39, 46, 00069 53, 60, 61, 54, 47, 55, 62, 63 00070 }; 00071 00072 /* Specific zigzag scan for 248 idct. NOTE that unlike the 00073 specification, we interleave the fields */ 00074 const uint8_t ff_zigzag248_direct[64] = { 00075 0, 8, 1, 9, 16, 24, 2, 10, 00076 17, 25, 32, 40, 48, 56, 33, 41, 00077 18, 26, 3, 11, 4, 12, 19, 27, 00078 34, 42, 49, 57, 50, 58, 35, 43, 00079 20, 28, 5, 13, 6, 14, 21, 29, 00080 36, 44, 51, 59, 52, 60, 37, 45, 00081 22, 30, 7, 15, 23, 31, 38, 46, 00082 53, 61, 54, 62, 39, 47, 55, 63, 00083 }; 00084 00085 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */ 00086 DECLARE_ALIGNED(16, uint16_t, inv_zigzag_direct16)[64]; 00087 00088 const uint8_t ff_alternate_horizontal_scan[64] = { 00089 0, 1, 2, 3, 8, 9, 16, 17, 00090 10, 11, 4, 5, 6, 7, 15, 14, 00091 13, 12, 19, 18, 24, 25, 32, 33, 00092 26, 27, 20, 21, 22, 23, 28, 29, 00093 30, 31, 34, 35, 40, 41, 48, 49, 00094 42, 43, 36, 37, 38, 39, 44, 45, 00095 46, 47, 50, 51, 56, 57, 58, 59, 00096 52, 53, 54, 55, 60, 61, 62, 63, 00097 }; 00098 00099 const uint8_t ff_alternate_vertical_scan[64] = { 00100 0, 8, 16, 24, 1, 9, 2, 10, 00101 17, 25, 32, 40, 48, 56, 57, 49, 00102 41, 33, 26, 18, 3, 11, 4, 12, 00103 19, 27, 34, 42, 50, 58, 35, 43, 00104 51, 59, 20, 28, 5, 13, 6, 14, 00105 21, 29, 36, 44, 52, 60, 37, 45, 00106 53, 61, 22, 30, 7, 15, 23, 31, 00107 38, 46, 54, 62, 39, 47, 55, 63, 00108 }; 00109 00110 /* Input permutation for the simple_idct_mmx */ 00111 static const uint8_t simple_mmx_permutation[64]={ 00112 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, 00113 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, 00114 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, 00115 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, 00116 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, 00117 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, 00118 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, 00119 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, 00120 }; 00121 00122 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7}; 00123 00124 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){ 00125 int i; 00126 int end; 00127 00128 st->scantable= src_scantable; 00129 00130 for(i=0; i<64; i++){ 00131 int j; 00132 j = src_scantable[i]; 00133 st->permutated[i] = permutation[j]; 00134 #if ARCH_PPC 00135 st->inverse[j] = i; 00136 #endif 00137 } 00138 00139 end=-1; 00140 for(i=0; i<64; i++){ 00141 int j; 00142 j = st->permutated[i]; 00143 if(j>end) end=j; 00144 st->raster_end[i]= end; 00145 } 00146 } 00147 00148 static int pix_sum_c(uint8_t * pix, int line_size) 00149 { 00150 int s, i, j; 00151 00152 s = 0; 00153 for (i = 0; i < 16; i++) { 00154 for (j = 0; j < 16; j += 8) { 00155 s += pix[0]; 00156 s += pix[1]; 00157 s += pix[2]; 00158 s += pix[3]; 00159 s += pix[4]; 00160 s += pix[5]; 00161 s += pix[6]; 00162 s += pix[7]; 00163 pix += 8; 00164 } 00165 pix += line_size - 16; 00166 } 00167 return s; 00168 } 00169 00170 static int pix_norm1_c(uint8_t * pix, int line_size) 00171 { 00172 int s, i, j; 00173 uint32_t *sq = ff_squareTbl + 256; 00174 00175 s = 0; 00176 for (i = 0; i < 16; i++) { 00177 for (j = 0; j < 16; j += 8) { 00178 #if 0 00179 s += sq[pix[0]]; 00180 s += sq[pix[1]]; 00181 s += sq[pix[2]]; 00182 s += sq[pix[3]]; 00183 s += sq[pix[4]]; 00184 s += sq[pix[5]]; 00185 s += sq[pix[6]]; 00186 s += sq[pix[7]]; 00187 #else 00188 #if LONG_MAX > 2147483647 00189 register uint64_t x=*(uint64_t*)pix; 00190 s += sq[x&0xff]; 00191 s += sq[(x>>8)&0xff]; 00192 s += sq[(x>>16)&0xff]; 00193 s += sq[(x>>24)&0xff]; 00194 s += sq[(x>>32)&0xff]; 00195 s += sq[(x>>40)&0xff]; 00196 s += sq[(x>>48)&0xff]; 00197 s += sq[(x>>56)&0xff]; 00198 #else 00199 register uint32_t x=*(uint32_t*)pix; 00200 s += sq[x&0xff]; 00201 s += sq[(x>>8)&0xff]; 00202 s += sq[(x>>16)&0xff]; 00203 s += sq[(x>>24)&0xff]; 00204 x=*(uint32_t*)(pix+4); 00205 s += sq[x&0xff]; 00206 s += sq[(x>>8)&0xff]; 00207 s += sq[(x>>16)&0xff]; 00208 s += sq[(x>>24)&0xff]; 00209 #endif 00210 #endif 00211 pix += 8; 00212 } 00213 pix += line_size - 16; 00214 } 00215 return s; 00216 } 00217 00218 static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){ 00219 int i; 00220 00221 for(i=0; i+8<=w; i+=8){ 00222 dst[i+0]= av_bswap32(src[i+0]); 00223 dst[i+1]= av_bswap32(src[i+1]); 00224 dst[i+2]= av_bswap32(src[i+2]); 00225 dst[i+3]= av_bswap32(src[i+3]); 00226 dst[i+4]= av_bswap32(src[i+4]); 00227 dst[i+5]= av_bswap32(src[i+5]); 00228 dst[i+6]= av_bswap32(src[i+6]); 00229 dst[i+7]= av_bswap32(src[i+7]); 00230 } 00231 for(;i<w; i++){ 00232 dst[i+0]= av_bswap32(src[i+0]); 00233 } 00234 } 00235 00236 static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len) 00237 { 00238 while (len--) 00239 *dst++ = av_bswap16(*src++); 00240 } 00241 00242 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) 00243 { 00244 int s, i; 00245 uint32_t *sq = ff_squareTbl + 256; 00246 00247 s = 0; 00248 for (i = 0; i < h; i++) { 00249 s += sq[pix1[0] - pix2[0]]; 00250 s += sq[pix1[1] - pix2[1]]; 00251 s += sq[pix1[2] - pix2[2]]; 00252 s += sq[pix1[3] - pix2[3]]; 00253 pix1 += line_size; 00254 pix2 += line_size; 00255 } 00256 return s; 00257 } 00258 00259 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) 00260 { 00261 int s, i; 00262 uint32_t *sq = ff_squareTbl + 256; 00263 00264 s = 0; 00265 for (i = 0; i < h; i++) { 00266 s += sq[pix1[0] - pix2[0]]; 00267 s += sq[pix1[1] - pix2[1]]; 00268 s += sq[pix1[2] - pix2[2]]; 00269 s += sq[pix1[3] - pix2[3]]; 00270 s += sq[pix1[4] - pix2[4]]; 00271 s += sq[pix1[5] - pix2[5]]; 00272 s += sq[pix1[6] - pix2[6]]; 00273 s += sq[pix1[7] - pix2[7]]; 00274 pix1 += line_size; 00275 pix2 += line_size; 00276 } 00277 return s; 00278 } 00279 00280 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) 00281 { 00282 int s, i; 00283 uint32_t *sq = ff_squareTbl + 256; 00284 00285 s = 0; 00286 for (i = 0; i < h; i++) { 00287 s += sq[pix1[ 0] - pix2[ 0]]; 00288 s += sq[pix1[ 1] - pix2[ 1]]; 00289 s += sq[pix1[ 2] - pix2[ 2]]; 00290 s += sq[pix1[ 3] - pix2[ 3]]; 00291 s += sq[pix1[ 4] - pix2[ 4]]; 00292 s += sq[pix1[ 5] - pix2[ 5]]; 00293 s += sq[pix1[ 6] - pix2[ 6]]; 00294 s += sq[pix1[ 7] - pix2[ 7]]; 00295 s += sq[pix1[ 8] - pix2[ 8]]; 00296 s += sq[pix1[ 9] - pix2[ 9]]; 00297 s += sq[pix1[10] - pix2[10]]; 00298 s += sq[pix1[11] - pix2[11]]; 00299 s += sq[pix1[12] - pix2[12]]; 00300 s += sq[pix1[13] - pix2[13]]; 00301 s += sq[pix1[14] - pix2[14]]; 00302 s += sq[pix1[15] - pix2[15]]; 00303 00304 pix1 += line_size; 00305 pix2 += line_size; 00306 } 00307 return s; 00308 } 00309 00310 static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size) 00311 { 00312 int i; 00313 00314 /* read the pixels */ 00315 for(i=0;i<8;i++) { 00316 block[0] = pixels[0]; 00317 block[1] = pixels[1]; 00318 block[2] = pixels[2]; 00319 block[3] = pixels[3]; 00320 block[4] = pixels[4]; 00321 block[5] = pixels[5]; 00322 block[6] = pixels[6]; 00323 block[7] = pixels[7]; 00324 pixels += line_size; 00325 block += 8; 00326 } 00327 } 00328 00329 static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1, 00330 const uint8_t *s2, int stride){ 00331 int i; 00332 00333 /* read the pixels */ 00334 for(i=0;i<8;i++) { 00335 block[0] = s1[0] - s2[0]; 00336 block[1] = s1[1] - s2[1]; 00337 block[2] = s1[2] - s2[2]; 00338 block[3] = s1[3] - s2[3]; 00339 block[4] = s1[4] - s2[4]; 00340 block[5] = s1[5] - s2[5]; 00341 block[6] = s1[6] - s2[6]; 00342 block[7] = s1[7] - s2[7]; 00343 s1 += stride; 00344 s2 += stride; 00345 block += 8; 00346 } 00347 } 00348 00349 00350 void ff_put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, 00351 int line_size) 00352 { 00353 int i; 00354 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; 00355 00356 /* read the pixels */ 00357 for(i=0;i<8;i++) { 00358 pixels[0] = cm[block[0]]; 00359 pixels[1] = cm[block[1]]; 00360 pixels[2] = cm[block[2]]; 00361 pixels[3] = cm[block[3]]; 00362 pixels[4] = cm[block[4]]; 00363 pixels[5] = cm[block[5]]; 00364 pixels[6] = cm[block[6]]; 00365 pixels[7] = cm[block[7]]; 00366 00367 pixels += line_size; 00368 block += 8; 00369 } 00370 } 00371 00372 static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels, 00373 int line_size) 00374 { 00375 int i; 00376 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; 00377 00378 /* read the pixels */ 00379 for(i=0;i<4;i++) { 00380 pixels[0] = cm[block[0]]; 00381 pixels[1] = cm[block[1]]; 00382 pixels[2] = cm[block[2]]; 00383 pixels[3] = cm[block[3]]; 00384 00385 pixels += line_size; 00386 block += 8; 00387 } 00388 } 00389 00390 static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels, 00391 int line_size) 00392 { 00393 int i; 00394 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; 00395 00396 /* read the pixels */ 00397 for(i=0;i<2;i++) { 00398 pixels[0] = cm[block[0]]; 00399 pixels[1] = cm[block[1]]; 00400 00401 pixels += line_size; 00402 block += 8; 00403 } 00404 } 00405 00406 void ff_put_signed_pixels_clamped_c(const DCTELEM *block, 00407 uint8_t *restrict pixels, 00408 int line_size) 00409 { 00410 int i, j; 00411 00412 for (i = 0; i < 8; i++) { 00413 for (j = 0; j < 8; j++) { 00414 if (*block < -128) 00415 *pixels = 0; 00416 else if (*block > 127) 00417 *pixels = 255; 00418 else 00419 *pixels = (uint8_t)(*block + 128); 00420 block++; 00421 pixels++; 00422 } 00423 pixels += (line_size - 8); 00424 } 00425 } 00426 00427 static void put_pixels_nonclamped_c(const DCTELEM *block, uint8_t *restrict pixels, 00428 int line_size) 00429 { 00430 int i; 00431 00432 /* read the pixels */ 00433 for(i=0;i<8;i++) { 00434 pixels[0] = block[0]; 00435 pixels[1] = block[1]; 00436 pixels[2] = block[2]; 00437 pixels[3] = block[3]; 00438 pixels[4] = block[4]; 00439 pixels[5] = block[5]; 00440 pixels[6] = block[6]; 00441 pixels[7] = block[7]; 00442 00443 pixels += line_size; 00444 block += 8; 00445 } 00446 } 00447 00448 void ff_add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, 00449 int line_size) 00450 { 00451 int i; 00452 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; 00453 00454 /* read the pixels */ 00455 for(i=0;i<8;i++) { 00456 pixels[0] = cm[pixels[0] + block[0]]; 00457 pixels[1] = cm[pixels[1] + block[1]]; 00458 pixels[2] = cm[pixels[2] + block[2]]; 00459 pixels[3] = cm[pixels[3] + block[3]]; 00460 pixels[4] = cm[pixels[4] + block[4]]; 00461 pixels[5] = cm[pixels[5] + block[5]]; 00462 pixels[6] = cm[pixels[6] + block[6]]; 00463 pixels[7] = cm[pixels[7] + block[7]]; 00464 pixels += line_size; 00465 block += 8; 00466 } 00467 } 00468 00469 static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels, 00470 int line_size) 00471 { 00472 int i; 00473 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; 00474 00475 /* read the pixels */ 00476 for(i=0;i<4;i++) { 00477 pixels[0] = cm[pixels[0] + block[0]]; 00478 pixels[1] = cm[pixels[1] + block[1]]; 00479 pixels[2] = cm[pixels[2] + block[2]]; 00480 pixels[3] = cm[pixels[3] + block[3]]; 00481 pixels += line_size; 00482 block += 8; 00483 } 00484 } 00485 00486 static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels, 00487 int line_size) 00488 { 00489 int i; 00490 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; 00491 00492 /* read the pixels */ 00493 for(i=0;i<2;i++) { 00494 pixels[0] = cm[pixels[0] + block[0]]; 00495 pixels[1] = cm[pixels[1] + block[1]]; 00496 pixels += line_size; 00497 block += 8; 00498 } 00499 } 00500 00501 static int sum_abs_dctelem_c(DCTELEM *block) 00502 { 00503 int sum=0, i; 00504 for(i=0; i<64; i++) 00505 sum+= FFABS(block[i]); 00506 return sum; 00507 } 00508 00509 static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h) 00510 { 00511 int i; 00512 00513 for (i = 0; i < h; i++) { 00514 memset(block, value, 16); 00515 block += line_size; 00516 } 00517 } 00518 00519 static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h) 00520 { 00521 int i; 00522 00523 for (i = 0; i < h; i++) { 00524 memset(block, value, 8); 00525 block += line_size; 00526 } 00527 } 00528 00529 static void scale_block_c(const uint8_t src[64]/*align 8*/, uint8_t *dst/*align 8*/, int linesize) 00530 { 00531 int i, j; 00532 uint16_t *dst1 = (uint16_t *) dst; 00533 uint16_t *dst2 = (uint16_t *)(dst + linesize); 00534 00535 for (j = 0; j < 8; j++) { 00536 for (i = 0; i < 8; i++) { 00537 dst1[i] = dst2[i] = src[i] * 0x0101; 00538 } 00539 src += 8; 00540 dst1 += linesize; 00541 dst2 += linesize; 00542 } 00543 } 00544 00545 #define avg2(a,b) ((a+b+1)>>1) 00546 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2) 00547 00548 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder) 00549 { 00550 const int A=(16-x16)*(16-y16); 00551 const int B=( x16)*(16-y16); 00552 const int C=(16-x16)*( y16); 00553 const int D=( x16)*( y16); 00554 int i; 00555 00556 for(i=0; i<h; i++) 00557 { 00558 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8; 00559 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8; 00560 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8; 00561 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8; 00562 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8; 00563 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8; 00564 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8; 00565 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8; 00566 dst+= stride; 00567 src+= stride; 00568 } 00569 } 00570 00571 void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, 00572 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height) 00573 { 00574 int y, vx, vy; 00575 const int s= 1<<shift; 00576 00577 width--; 00578 height--; 00579 00580 for(y=0; y<h; y++){ 00581 int x; 00582 00583 vx= ox; 00584 vy= oy; 00585 for(x=0; x<8; x++){ //XXX FIXME optimize 00586 int src_x, src_y, frac_x, frac_y, index; 00587 00588 src_x= vx>>16; 00589 src_y= vy>>16; 00590 frac_x= src_x&(s-1); 00591 frac_y= src_y&(s-1); 00592 src_x>>=shift; 00593 src_y>>=shift; 00594 00595 if((unsigned)src_x < width){ 00596 if((unsigned)src_y < height){ 00597 index= src_x + src_y*stride; 00598 dst[y*stride + x]= ( ( src[index ]*(s-frac_x) 00599 + src[index +1]* frac_x )*(s-frac_y) 00600 + ( src[index+stride ]*(s-frac_x) 00601 + src[index+stride+1]* frac_x )* frac_y 00602 + r)>>(shift*2); 00603 }else{ 00604 index= src_x + av_clip(src_y, 0, height)*stride; 00605 dst[y*stride + x]= ( ( src[index ]*(s-frac_x) 00606 + src[index +1]* frac_x )*s 00607 + r)>>(shift*2); 00608 } 00609 }else{ 00610 if((unsigned)src_y < height){ 00611 index= av_clip(src_x, 0, width) + src_y*stride; 00612 dst[y*stride + x]= ( ( src[index ]*(s-frac_y) 00613 + src[index+stride ]* frac_y )*s 00614 + r)>>(shift*2); 00615 }else{ 00616 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride; 00617 dst[y*stride + x]= src[index ]; 00618 } 00619 } 00620 00621 vx+= dxx; 00622 vy+= dyx; 00623 } 00624 ox += dxy; 00625 oy += dyy; 00626 } 00627 } 00628 00629 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 00630 switch(width){ 00631 case 2: put_pixels2_8_c (dst, src, stride, height); break; 00632 case 4: put_pixels4_8_c (dst, src, stride, height); break; 00633 case 8: put_pixels8_8_c (dst, src, stride, height); break; 00634 case 16:put_pixels16_8_c(dst, src, stride, height); break; 00635 } 00636 } 00637 00638 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 00639 int i,j; 00640 for (i=0; i < height; i++) { 00641 for (j=0; j < width; j++) { 00642 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11; 00643 } 00644 src += stride; 00645 dst += stride; 00646 } 00647 } 00648 00649 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 00650 int i,j; 00651 for (i=0; i < height; i++) { 00652 for (j=0; j < width; j++) { 00653 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11; 00654 } 00655 src += stride; 00656 dst += stride; 00657 } 00658 } 00659 00660 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 00661 int i,j; 00662 for (i=0; i < height; i++) { 00663 for (j=0; j < width; j++) { 00664 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11; 00665 } 00666 src += stride; 00667 dst += stride; 00668 } 00669 } 00670 00671 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 00672 int i,j; 00673 for (i=0; i < height; i++) { 00674 for (j=0; j < width; j++) { 00675 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15; 00676 } 00677 src += stride; 00678 dst += stride; 00679 } 00680 } 00681 00682 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 00683 int i,j; 00684 for (i=0; i < height; i++) { 00685 for (j=0; j < width; j++) { 00686 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15; 00687 } 00688 src += stride; 00689 dst += stride; 00690 } 00691 } 00692 00693 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 00694 int i,j; 00695 for (i=0; i < height; i++) { 00696 for (j=0; j < width; j++) { 00697 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11; 00698 } 00699 src += stride; 00700 dst += stride; 00701 } 00702 } 00703 00704 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 00705 int i,j; 00706 for (i=0; i < height; i++) { 00707 for (j=0; j < width; j++) { 00708 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15; 00709 } 00710 src += stride; 00711 dst += stride; 00712 } 00713 } 00714 00715 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 00716 int i,j; 00717 for (i=0; i < height; i++) { 00718 for (j=0; j < width; j++) { 00719 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15; 00720 } 00721 src += stride; 00722 dst += stride; 00723 } 00724 } 00725 00726 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 00727 switch(width){ 00728 case 2: avg_pixels2_8_c (dst, src, stride, height); break; 00729 case 4: avg_pixels4_8_c (dst, src, stride, height); break; 00730 case 8: avg_pixels8_8_c (dst, src, stride, height); break; 00731 case 16:avg_pixels16_8_c(dst, src, stride, height); break; 00732 } 00733 } 00734 00735 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 00736 int i,j; 00737 for (i=0; i < height; i++) { 00738 for (j=0; j < width; j++) { 00739 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1; 00740 } 00741 src += stride; 00742 dst += stride; 00743 } 00744 } 00745 00746 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 00747 int i,j; 00748 for (i=0; i < height; i++) { 00749 for (j=0; j < width; j++) { 00750 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1; 00751 } 00752 src += stride; 00753 dst += stride; 00754 } 00755 } 00756 00757 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 00758 int i,j; 00759 for (i=0; i < height; i++) { 00760 for (j=0; j < width; j++) { 00761 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1; 00762 } 00763 src += stride; 00764 dst += stride; 00765 } 00766 } 00767 00768 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 00769 int i,j; 00770 for (i=0; i < height; i++) { 00771 for (j=0; j < width; j++) { 00772 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1; 00773 } 00774 src += stride; 00775 dst += stride; 00776 } 00777 } 00778 00779 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 00780 int i,j; 00781 for (i=0; i < height; i++) { 00782 for (j=0; j < width; j++) { 00783 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1; 00784 } 00785 src += stride; 00786 dst += stride; 00787 } 00788 } 00789 00790 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 00791 int i,j; 00792 for (i=0; i < height; i++) { 00793 for (j=0; j < width; j++) { 00794 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1; 00795 } 00796 src += stride; 00797 dst += stride; 00798 } 00799 } 00800 00801 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 00802 int i,j; 00803 for (i=0; i < height; i++) { 00804 for (j=0; j < width; j++) { 00805 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1; 00806 } 00807 src += stride; 00808 dst += stride; 00809 } 00810 } 00811 00812 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ 00813 int i,j; 00814 for (i=0; i < height; i++) { 00815 for (j=0; j < width; j++) { 00816 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1; 00817 } 00818 src += stride; 00819 dst += stride; 00820 } 00821 } 00822 #if 0 00823 #define TPEL_WIDTH(width)\ 00824 static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ 00825 void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\ 00826 static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ 00827 void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\ 00828 static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ 00829 void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\ 00830 static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ 00831 void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\ 00832 static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ 00833 void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\ 00834 static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ 00835 void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\ 00836 static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ 00837 void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\ 00838 static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ 00839 void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\ 00840 static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ 00841 void put_tpel_pixels_mc22_c(dst, src, stride, width, height);} 00842 #endif 00843 00844 #define QPEL_MC(r, OPNAME, RND, OP) \ 00845 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ 00846 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ 00847 int i;\ 00848 for(i=0; i<h; i++)\ 00849 {\ 00850 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\ 00851 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\ 00852 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\ 00853 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\ 00854 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\ 00855 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\ 00856 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\ 00857 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\ 00858 dst+=dstStride;\ 00859 src+=srcStride;\ 00860 }\ 00861 }\ 00862 \ 00863 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ 00864 const int w=8;\ 00865 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ 00866 int i;\ 00867 for(i=0; i<w; i++)\ 00868 {\ 00869 const int src0= src[0*srcStride];\ 00870 const int src1= src[1*srcStride];\ 00871 const int src2= src[2*srcStride];\ 00872 const int src3= src[3*srcStride];\ 00873 const int src4= src[4*srcStride];\ 00874 const int src5= src[5*srcStride];\ 00875 const int src6= src[6*srcStride];\ 00876 const int src7= src[7*srcStride];\ 00877 const int src8= src[8*srcStride];\ 00878 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\ 00879 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\ 00880 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\ 00881 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\ 00882 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\ 00883 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\ 00884 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\ 00885 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\ 00886 dst++;\ 00887 src++;\ 00888 }\ 00889 }\ 00890 \ 00891 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ 00892 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ 00893 int i;\ 00894 \ 00895 for(i=0; i<h; i++)\ 00896 {\ 00897 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\ 00898 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\ 00899 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\ 00900 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\ 00901 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\ 00902 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\ 00903 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\ 00904 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\ 00905 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\ 00906 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\ 00907 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\ 00908 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\ 00909 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\ 00910 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\ 00911 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\ 00912 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\ 00913 dst+=dstStride;\ 00914 src+=srcStride;\ 00915 }\ 00916 }\ 00917 \ 00918 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ 00919 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ 00920 int i;\ 00921 const int w=16;\ 00922 for(i=0; i<w; i++)\ 00923 {\ 00924 const int src0= src[0*srcStride];\ 00925 const int src1= src[1*srcStride];\ 00926 const int src2= src[2*srcStride];\ 00927 const int src3= src[3*srcStride];\ 00928 const int src4= src[4*srcStride];\ 00929 const int src5= src[5*srcStride];\ 00930 const int src6= src[6*srcStride];\ 00931 const int src7= src[7*srcStride];\ 00932 const int src8= src[8*srcStride];\ 00933 const int src9= src[9*srcStride];\ 00934 const int src10= src[10*srcStride];\ 00935 const int src11= src[11*srcStride];\ 00936 const int src12= src[12*srcStride];\ 00937 const int src13= src[13*srcStride];\ 00938 const int src14= src[14*srcStride];\ 00939 const int src15= src[15*srcStride];\ 00940 const int src16= src[16*srcStride];\ 00941 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\ 00942 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\ 00943 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\ 00944 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\ 00945 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\ 00946 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\ 00947 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\ 00948 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\ 00949 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\ 00950 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\ 00951 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\ 00952 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\ 00953 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\ 00954 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\ 00955 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\ 00956 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\ 00957 dst++;\ 00958 src++;\ 00959 }\ 00960 }\ 00961 \ 00962 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\ 00963 uint8_t half[64];\ 00964 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\ 00965 OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\ 00966 }\ 00967 \ 00968 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\ 00969 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\ 00970 }\ 00971 \ 00972 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\ 00973 uint8_t half[64];\ 00974 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\ 00975 OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\ 00976 }\ 00977 \ 00978 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\ 00979 uint8_t full[16*9];\ 00980 uint8_t half[64];\ 00981 copy_block9(full, src, 16, stride, 9);\ 00982 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\ 00983 OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\ 00984 }\ 00985 \ 00986 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\ 00987 uint8_t full[16*9];\ 00988 copy_block9(full, src, 16, stride, 9);\ 00989 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\ 00990 }\ 00991 \ 00992 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\ 00993 uint8_t full[16*9];\ 00994 uint8_t half[64];\ 00995 copy_block9(full, src, 16, stride, 9);\ 00996 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\ 00997 OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\ 00998 }\ 00999 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\ 01000 uint8_t full[16*9];\ 01001 uint8_t halfH[72];\ 01002 uint8_t halfV[64];\ 01003 uint8_t halfHV[64];\ 01004 copy_block9(full, src, 16, stride, 9);\ 01005 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ 01006 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ 01007 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ 01008 OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ 01009 }\ 01010 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\ 01011 uint8_t full[16*9];\ 01012 uint8_t halfH[72];\ 01013 uint8_t halfHV[64];\ 01014 copy_block9(full, src, 16, stride, 9);\ 01015 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ 01016 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\ 01017 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ 01018 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\ 01019 }\ 01020 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\ 01021 uint8_t full[16*9];\ 01022 uint8_t halfH[72];\ 01023 uint8_t halfV[64];\ 01024 uint8_t halfHV[64];\ 01025 copy_block9(full, src, 16, stride, 9);\ 01026 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ 01027 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ 01028 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ 01029 OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ 01030 }\ 01031 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\ 01032 uint8_t full[16*9];\ 01033 uint8_t halfH[72];\ 01034 uint8_t halfHV[64];\ 01035 copy_block9(full, src, 16, stride, 9);\ 01036 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ 01037 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\ 01038 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ 01039 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\ 01040 }\ 01041 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\ 01042 uint8_t full[16*9];\ 01043 uint8_t halfH[72];\ 01044 uint8_t halfV[64];\ 01045 uint8_t halfHV[64];\ 01046 copy_block9(full, src, 16, stride, 9);\ 01047 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ 01048 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ 01049 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ 01050 OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ 01051 }\ 01052 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\ 01053 uint8_t full[16*9];\ 01054 uint8_t halfH[72];\ 01055 uint8_t halfHV[64];\ 01056 copy_block9(full, src, 16, stride, 9);\ 01057 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ 01058 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\ 01059 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ 01060 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\ 01061 }\ 01062 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\ 01063 uint8_t full[16*9];\ 01064 uint8_t halfH[72];\ 01065 uint8_t halfV[64];\ 01066 uint8_t halfHV[64];\ 01067 copy_block9(full, src, 16, stride, 9);\ 01068 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\ 01069 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ 01070 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ 01071 OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ 01072 }\ 01073 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\ 01074 uint8_t full[16*9];\ 01075 uint8_t halfH[72];\ 01076 uint8_t halfHV[64];\ 01077 copy_block9(full, src, 16, stride, 9);\ 01078 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ 01079 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\ 01080 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ 01081 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\ 01082 }\ 01083 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\ 01084 uint8_t halfH[72];\ 01085 uint8_t halfHV[64];\ 01086 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ 01087 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ 01088 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\ 01089 }\ 01090 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\ 01091 uint8_t halfH[72];\ 01092 uint8_t halfHV[64];\ 01093 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ 01094 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ 01095 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\ 01096 }\ 01097 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\ 01098 uint8_t full[16*9];\ 01099 uint8_t halfH[72];\ 01100 uint8_t halfV[64];\ 01101 uint8_t halfHV[64];\ 01102 copy_block9(full, src, 16, stride, 9);\ 01103 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ 01104 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ 01105 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ 01106 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\ 01107 }\ 01108 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\ 01109 uint8_t full[16*9];\ 01110 uint8_t halfH[72];\ 01111 copy_block9(full, src, 16, stride, 9);\ 01112 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ 01113 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\ 01114 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ 01115 }\ 01116 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\ 01117 uint8_t full[16*9];\ 01118 uint8_t halfH[72];\ 01119 uint8_t halfV[64];\ 01120 uint8_t halfHV[64];\ 01121 copy_block9(full, src, 16, stride, 9);\ 01122 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ 01123 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ 01124 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ 01125 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\ 01126 }\ 01127 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\ 01128 uint8_t full[16*9];\ 01129 uint8_t halfH[72];\ 01130 copy_block9(full, src, 16, stride, 9);\ 01131 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ 01132 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\ 01133 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ 01134 }\ 01135 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\ 01136 uint8_t halfH[72];\ 01137 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ 01138 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ 01139 }\ 01140 \ 01141 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\ 01142 uint8_t half[256];\ 01143 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\ 01144 OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\ 01145 }\ 01146 \ 01147 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\ 01148 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\ 01149 }\ 01150 \ 01151 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\ 01152 uint8_t half[256];\ 01153 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\ 01154 OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\ 01155 }\ 01156 \ 01157 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\ 01158 uint8_t full[24*17];\ 01159 uint8_t half[256];\ 01160 copy_block17(full, src, 24, stride, 17);\ 01161 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\ 01162 OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\ 01163 }\ 01164 \ 01165 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\ 01166 uint8_t full[24*17];\ 01167 copy_block17(full, src, 24, stride, 17);\ 01168 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\ 01169 }\ 01170 \ 01171 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\ 01172 uint8_t full[24*17];\ 01173 uint8_t half[256];\ 01174 copy_block17(full, src, 24, stride, 17);\ 01175 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\ 01176 OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\ 01177 }\ 01178 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\ 01179 uint8_t full[24*17];\ 01180 uint8_t halfH[272];\ 01181 uint8_t halfV[256];\ 01182 uint8_t halfHV[256];\ 01183 copy_block17(full, src, 24, stride, 17);\ 01184 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ 01185 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ 01186 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ 01187 OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ 01188 }\ 01189 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\ 01190 uint8_t full[24*17];\ 01191 uint8_t halfH[272];\ 01192 uint8_t halfHV[256];\ 01193 copy_block17(full, src, 24, stride, 17);\ 01194 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ 01195 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\ 01196 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ 01197 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\ 01198 }\ 01199 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\ 01200 uint8_t full[24*17];\ 01201 uint8_t halfH[272];\ 01202 uint8_t halfV[256];\ 01203 uint8_t halfHV[256];\ 01204 copy_block17(full, src, 24, stride, 17);\ 01205 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ 01206 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ 01207 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ 01208 OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ 01209 }\ 01210 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\ 01211 uint8_t full[24*17];\ 01212 uint8_t halfH[272];\ 01213 uint8_t halfHV[256];\ 01214 copy_block17(full, src, 24, stride, 17);\ 01215 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ 01216 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\ 01217 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ 01218 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\ 01219 }\ 01220 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\ 01221 uint8_t full[24*17];\ 01222 uint8_t halfH[272];\ 01223 uint8_t halfV[256];\ 01224 uint8_t halfHV[256];\ 01225 copy_block17(full, src, 24, stride, 17);\ 01226 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ 01227 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ 01228 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ 01229 OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ 01230 }\ 01231 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\ 01232 uint8_t full[24*17];\ 01233 uint8_t halfH[272];\ 01234 uint8_t halfHV[256];\ 01235 copy_block17(full, src, 24, stride, 17);\ 01236 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ 01237 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\ 01238 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ 01239 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\ 01240 }\ 01241 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\ 01242 uint8_t full[24*17];\ 01243 uint8_t halfH[272];\ 01244 uint8_t halfV[256];\ 01245 uint8_t halfHV[256];\ 01246 copy_block17(full, src, 24, stride, 17);\ 01247 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\ 01248 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ 01249 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ 01250 OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ 01251 }\ 01252 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\ 01253 uint8_t full[24*17];\ 01254 uint8_t halfH[272];\ 01255 uint8_t halfHV[256];\ 01256 copy_block17(full, src, 24, stride, 17);\ 01257 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ 01258 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\ 01259 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ 01260 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\ 01261 }\ 01262 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\ 01263 uint8_t halfH[272];\ 01264 uint8_t halfHV[256];\ 01265 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ 01266 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ 01267 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\ 01268 }\ 01269 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\ 01270 uint8_t halfH[272];\ 01271 uint8_t halfHV[256];\ 01272 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ 01273 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ 01274 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\ 01275 }\ 01276 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\ 01277 uint8_t full[24*17];\ 01278 uint8_t halfH[272];\ 01279 uint8_t halfV[256];\ 01280 uint8_t halfHV[256];\ 01281 copy_block17(full, src, 24, stride, 17);\ 01282 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ 01283 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ 01284 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ 01285 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\ 01286 }\ 01287 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\ 01288 uint8_t full[24*17];\ 01289 uint8_t halfH[272];\ 01290 copy_block17(full, src, 24, stride, 17);\ 01291 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ 01292 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\ 01293 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ 01294 }\ 01295 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\ 01296 uint8_t full[24*17];\ 01297 uint8_t halfH[272];\ 01298 uint8_t halfV[256];\ 01299 uint8_t halfHV[256];\ 01300 copy_block17(full, src, 24, stride, 17);\ 01301 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ 01302 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ 01303 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ 01304 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\ 01305 }\ 01306 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\ 01307 uint8_t full[24*17];\ 01308 uint8_t halfH[272];\ 01309 copy_block17(full, src, 24, stride, 17);\ 01310 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ 01311 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\ 01312 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ 01313 }\ 01314 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\ 01315 uint8_t halfH[272];\ 01316 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ 01317 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ 01318 } 01319 01320 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1) 01321 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1) 01322 #define op_put(a, b) a = cm[((b) + 16)>>5] 01323 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5] 01324 01325 QPEL_MC(0, put_ , _ , op_put) 01326 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd) 01327 QPEL_MC(0, avg_ , _ , op_avg) 01328 //QPEL_MC(1, avg_no_rnd , _ , op_avg) 01329 #undef op_avg 01330 #undef op_avg_no_rnd 01331 #undef op_put 01332 #undef op_put_no_rnd 01333 01334 #define put_qpel8_mc00_c ff_put_pixels8x8_c 01335 #define avg_qpel8_mc00_c ff_avg_pixels8x8_c 01336 #define put_qpel16_mc00_c ff_put_pixels16x16_c 01337 #define avg_qpel16_mc00_c ff_avg_pixels16x16_c 01338 #define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c 01339 #define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_8_c 01340 01341 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){ 01342 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; 01343 int i; 01344 01345 for(i=0; i<h; i++){ 01346 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4]; 01347 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4]; 01348 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4]; 01349 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4]; 01350 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4]; 01351 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4]; 01352 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4]; 01353 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4]; 01354 dst+=dstStride; 01355 src+=srcStride; 01356 } 01357 } 01358 01359 #if CONFIG_RV40_DECODER 01360 static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){ 01361 put_pixels16_xy2_8_c(dst, src, stride, 16); 01362 } 01363 static void avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){ 01364 avg_pixels16_xy2_8_c(dst, src, stride, 16); 01365 } 01366 static void put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){ 01367 put_pixels8_xy2_8_c(dst, src, stride, 8); 01368 } 01369 static void avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){ 01370 avg_pixels8_xy2_8_c(dst, src, stride, 8); 01371 } 01372 #endif /* CONFIG_RV40_DECODER */ 01373 01374 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){ 01375 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; 01376 int i; 01377 01378 for(i=0; i<w; i++){ 01379 const int src_1= src[ -srcStride]; 01380 const int src0 = src[0 ]; 01381 const int src1 = src[ srcStride]; 01382 const int src2 = src[2*srcStride]; 01383 const int src3 = src[3*srcStride]; 01384 const int src4 = src[4*srcStride]; 01385 const int src5 = src[5*srcStride]; 01386 const int src6 = src[6*srcStride]; 01387 const int src7 = src[7*srcStride]; 01388 const int src8 = src[8*srcStride]; 01389 const int src9 = src[9*srcStride]; 01390 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4]; 01391 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4]; 01392 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4]; 01393 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4]; 01394 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4]; 01395 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4]; 01396 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4]; 01397 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4]; 01398 src++; 01399 dst++; 01400 } 01401 } 01402 01403 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){ 01404 uint8_t half[64]; 01405 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8); 01406 put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8); 01407 } 01408 01409 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){ 01410 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8); 01411 } 01412 01413 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){ 01414 uint8_t half[64]; 01415 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8); 01416 put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8); 01417 } 01418 01419 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){ 01420 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8); 01421 } 01422 01423 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){ 01424 uint8_t halfH[88]; 01425 uint8_t halfV[64]; 01426 uint8_t halfHV[64]; 01427 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); 01428 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8); 01429 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8); 01430 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8); 01431 } 01432 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){ 01433 uint8_t halfH[88]; 01434 uint8_t halfV[64]; 01435 uint8_t halfHV[64]; 01436 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); 01437 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8); 01438 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8); 01439 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8); 01440 } 01441 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){ 01442 uint8_t halfH[88]; 01443 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); 01444 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8); 01445 } 01446 01447 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){ 01448 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) { 01449 int x; 01450 const int strength= ff_h263_loop_filter_strength[qscale]; 01451 01452 for(x=0; x<8; x++){ 01453 int d1, d2, ad1; 01454 int p0= src[x-2*stride]; 01455 int p1= src[x-1*stride]; 01456 int p2= src[x+0*stride]; 01457 int p3= src[x+1*stride]; 01458 int d = (p0 - p3 + 4*(p2 - p1)) / 8; 01459 01460 if (d<-2*strength) d1= 0; 01461 else if(d<- strength) d1=-2*strength - d; 01462 else if(d< strength) d1= d; 01463 else if(d< 2*strength) d1= 2*strength - d; 01464 else d1= 0; 01465 01466 p1 += d1; 01467 p2 -= d1; 01468 if(p1&256) p1= ~(p1>>31); 01469 if(p2&256) p2= ~(p2>>31); 01470 01471 src[x-1*stride] = p1; 01472 src[x+0*stride] = p2; 01473 01474 ad1= FFABS(d1)>>1; 01475 01476 d2= av_clip((p0-p3)/4, -ad1, ad1); 01477 01478 src[x-2*stride] = p0 - d2; 01479 src[x+ stride] = p3 + d2; 01480 } 01481 } 01482 } 01483 01484 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){ 01485 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) { 01486 int y; 01487 const int strength= ff_h263_loop_filter_strength[qscale]; 01488 01489 for(y=0; y<8; y++){ 01490 int d1, d2, ad1; 01491 int p0= src[y*stride-2]; 01492 int p1= src[y*stride-1]; 01493 int p2= src[y*stride+0]; 01494 int p3= src[y*stride+1]; 01495 int d = (p0 - p3 + 4*(p2 - p1)) / 8; 01496 01497 if (d<-2*strength) d1= 0; 01498 else if(d<- strength) d1=-2*strength - d; 01499 else if(d< strength) d1= d; 01500 else if(d< 2*strength) d1= 2*strength - d; 01501 else d1= 0; 01502 01503 p1 += d1; 01504 p2 -= d1; 01505 if(p1&256) p1= ~(p1>>31); 01506 if(p2&256) p2= ~(p2>>31); 01507 01508 src[y*stride-1] = p1; 01509 src[y*stride+0] = p2; 01510 01511 ad1= FFABS(d1)>>1; 01512 01513 d2= av_clip((p0-p3)/4, -ad1, ad1); 01514 01515 src[y*stride-2] = p0 - d2; 01516 src[y*stride+1] = p3 + d2; 01517 } 01518 } 01519 } 01520 01521 static void h261_loop_filter_c(uint8_t *src, int stride){ 01522 int x,y,xy,yz; 01523 int temp[64]; 01524 01525 for(x=0; x<8; x++){ 01526 temp[x ] = 4*src[x ]; 01527 temp[x + 7*8] = 4*src[x + 7*stride]; 01528 } 01529 for(y=1; y<7; y++){ 01530 for(x=0; x<8; x++){ 01531 xy = y * stride + x; 01532 yz = y * 8 + x; 01533 temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride]; 01534 } 01535 } 01536 01537 for(y=0; y<8; y++){ 01538 src[ y*stride] = (temp[ y*8] + 2)>>2; 01539 src[7+y*stride] = (temp[7+y*8] + 2)>>2; 01540 for(x=1; x<7; x++){ 01541 xy = y * stride + x; 01542 yz = y * 8 + x; 01543 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4; 01544 } 01545 } 01546 } 01547 01548 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) 01549 { 01550 int s, i; 01551 01552 s = 0; 01553 for(i=0;i<h;i++) { 01554 s += abs(pix1[0] - pix2[0]); 01555 s += abs(pix1[1] - pix2[1]); 01556 s += abs(pix1[2] - pix2[2]); 01557 s += abs(pix1[3] - pix2[3]); 01558 s += abs(pix1[4] - pix2[4]); 01559 s += abs(pix1[5] - pix2[5]); 01560 s += abs(pix1[6] - pix2[6]); 01561 s += abs(pix1[7] - pix2[7]); 01562 s += abs(pix1[8] - pix2[8]); 01563 s += abs(pix1[9] - pix2[9]); 01564 s += abs(pix1[10] - pix2[10]); 01565 s += abs(pix1[11] - pix2[11]); 01566 s += abs(pix1[12] - pix2[12]); 01567 s += abs(pix1[13] - pix2[13]); 01568 s += abs(pix1[14] - pix2[14]); 01569 s += abs(pix1[15] - pix2[15]); 01570 pix1 += line_size; 01571 pix2 += line_size; 01572 } 01573 return s; 01574 } 01575 01576 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) 01577 { 01578 int s, i; 01579 01580 s = 0; 01581 for(i=0;i<h;i++) { 01582 s += abs(pix1[0] - avg2(pix2[0], pix2[1])); 01583 s += abs(pix1[1] - avg2(pix2[1], pix2[2])); 01584 s += abs(pix1[2] - avg2(pix2[2], pix2[3])); 01585 s += abs(pix1[3] - avg2(pix2[3], pix2[4])); 01586 s += abs(pix1[4] - avg2(pix2[4], pix2[5])); 01587 s += abs(pix1[5] - avg2(pix2[5], pix2[6])); 01588 s += abs(pix1[6] - avg2(pix2[6], pix2[7])); 01589 s += abs(pix1[7] - avg2(pix2[7], pix2[8])); 01590 s += abs(pix1[8] - avg2(pix2[8], pix2[9])); 01591 s += abs(pix1[9] - avg2(pix2[9], pix2[10])); 01592 s += abs(pix1[10] - avg2(pix2[10], pix2[11])); 01593 s += abs(pix1[11] - avg2(pix2[11], pix2[12])); 01594 s += abs(pix1[12] - avg2(pix2[12], pix2[13])); 01595 s += abs(pix1[13] - avg2(pix2[13], pix2[14])); 01596 s += abs(pix1[14] - avg2(pix2[14], pix2[15])); 01597 s += abs(pix1[15] - avg2(pix2[15], pix2[16])); 01598 pix1 += line_size; 01599 pix2 += line_size; 01600 } 01601 return s; 01602 } 01603 01604 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) 01605 { 01606 int s, i; 01607 uint8_t *pix3 = pix2 + line_size; 01608 01609 s = 0; 01610 for(i=0;i<h;i++) { 01611 s += abs(pix1[0] - avg2(pix2[0], pix3[0])); 01612 s += abs(pix1[1] - avg2(pix2[1], pix3[1])); 01613 s += abs(pix1[2] - avg2(pix2[2], pix3[2])); 01614 s += abs(pix1[3] - avg2(pix2[3], pix3[3])); 01615 s += abs(pix1[4] - avg2(pix2[4], pix3[4])); 01616 s += abs(pix1[5] - avg2(pix2[5], pix3[5])); 01617 s += abs(pix1[6] - avg2(pix2[6], pix3[6])); 01618 s += abs(pix1[7] - avg2(pix2[7], pix3[7])); 01619 s += abs(pix1[8] - avg2(pix2[8], pix3[8])); 01620 s += abs(pix1[9] - avg2(pix2[9], pix3[9])); 01621 s += abs(pix1[10] - avg2(pix2[10], pix3[10])); 01622 s += abs(pix1[11] - avg2(pix2[11], pix3[11])); 01623 s += abs(pix1[12] - avg2(pix2[12], pix3[12])); 01624 s += abs(pix1[13] - avg2(pix2[13], pix3[13])); 01625 s += abs(pix1[14] - avg2(pix2[14], pix3[14])); 01626 s += abs(pix1[15] - avg2(pix2[15], pix3[15])); 01627 pix1 += line_size; 01628 pix2 += line_size; 01629 pix3 += line_size; 01630 } 01631 return s; 01632 } 01633 01634 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) 01635 { 01636 int s, i; 01637 uint8_t *pix3 = pix2 + line_size; 01638 01639 s = 0; 01640 for(i=0;i<h;i++) { 01641 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1])); 01642 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2])); 01643 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3])); 01644 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4])); 01645 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5])); 01646 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6])); 01647 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7])); 01648 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8])); 01649 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9])); 01650 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10])); 01651 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11])); 01652 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12])); 01653 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13])); 01654 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14])); 01655 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15])); 01656 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16])); 01657 pix1 += line_size; 01658 pix2 += line_size; 01659 pix3 += line_size; 01660 } 01661 return s; 01662 } 01663 01664 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) 01665 { 01666 int s, i; 01667 01668 s = 0; 01669 for(i=0;i<h;i++) { 01670 s += abs(pix1[0] - pix2[0]); 01671 s += abs(pix1[1] - pix2[1]); 01672 s += abs(pix1[2] - pix2[2]); 01673 s += abs(pix1[3] - pix2[3]); 01674 s += abs(pix1[4] - pix2[4]); 01675 s += abs(pix1[5] - pix2[5]); 01676 s += abs(pix1[6] - pix2[6]); 01677 s += abs(pix1[7] - pix2[7]); 01678 pix1 += line_size; 01679 pix2 += line_size; 01680 } 01681 return s; 01682 } 01683 01684 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) 01685 { 01686 int s, i; 01687 01688 s = 0; 01689 for(i=0;i<h;i++) { 01690 s += abs(pix1[0] - avg2(pix2[0], pix2[1])); 01691 s += abs(pix1[1] - avg2(pix2[1], pix2[2])); 01692 s += abs(pix1[2] - avg2(pix2[2], pix2[3])); 01693 s += abs(pix1[3] - avg2(pix2[3], pix2[4])); 01694 s += abs(pix1[4] - avg2(pix2[4], pix2[5])); 01695 s += abs(pix1[5] - avg2(pix2[5], pix2[6])); 01696 s += abs(pix1[6] - avg2(pix2[6], pix2[7])); 01697 s += abs(pix1[7] - avg2(pix2[7], pix2[8])); 01698 pix1 += line_size; 01699 pix2 += line_size; 01700 } 01701 return s; 01702 } 01703 01704 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) 01705 { 01706 int s, i; 01707 uint8_t *pix3 = pix2 + line_size; 01708 01709 s = 0; 01710 for(i=0;i<h;i++) { 01711 s += abs(pix1[0] - avg2(pix2[0], pix3[0])); 01712 s += abs(pix1[1] - avg2(pix2[1], pix3[1])); 01713 s += abs(pix1[2] - avg2(pix2[2], pix3[2])); 01714 s += abs(pix1[3] - avg2(pix2[3], pix3[3])); 01715 s += abs(pix1[4] - avg2(pix2[4], pix3[4])); 01716 s += abs(pix1[5] - avg2(pix2[5], pix3[5])); 01717 s += abs(pix1[6] - avg2(pix2[6], pix3[6])); 01718 s += abs(pix1[7] - avg2(pix2[7], pix3[7])); 01719 pix1 += line_size; 01720 pix2 += line_size; 01721 pix3 += line_size; 01722 } 01723 return s; 01724 } 01725 01726 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) 01727 { 01728 int s, i; 01729 uint8_t *pix3 = pix2 + line_size; 01730 01731 s = 0; 01732 for(i=0;i<h;i++) { 01733 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1])); 01734 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2])); 01735 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3])); 01736 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4])); 01737 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5])); 01738 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6])); 01739 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7])); 01740 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8])); 01741 pix1 += line_size; 01742 pix2 += line_size; 01743 pix3 += line_size; 01744 } 01745 return s; 01746 } 01747 01748 static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){ 01749 MpegEncContext *c = v; 01750 int score1=0; 01751 int score2=0; 01752 int x,y; 01753 01754 for(y=0; y<h; y++){ 01755 for(x=0; x<16; x++){ 01756 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]); 01757 } 01758 if(y+1<h){ 01759 for(x=0; x<15; x++){ 01760 score2+= FFABS( s1[x ] - s1[x +stride] 01761 - s1[x+1] + s1[x+1+stride]) 01762 -FFABS( s2[x ] - s2[x +stride] 01763 - s2[x+1] + s2[x+1+stride]); 01764 } 01765 } 01766 s1+= stride; 01767 s2+= stride; 01768 } 01769 01770 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight; 01771 else return score1 + FFABS(score2)*8; 01772 } 01773 01774 static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){ 01775 MpegEncContext *c = v; 01776 int score1=0; 01777 int score2=0; 01778 int x,y; 01779 01780 for(y=0; y<h; y++){ 01781 for(x=0; x<8; x++){ 01782 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]); 01783 } 01784 if(y+1<h){ 01785 for(x=0; x<7; x++){ 01786 score2+= FFABS( s1[x ] - s1[x +stride] 01787 - s1[x+1] + s1[x+1+stride]) 01788 -FFABS( s2[x ] - s2[x +stride] 01789 - s2[x+1] + s2[x+1+stride]); 01790 } 01791 } 01792 s1+= stride; 01793 s2+= stride; 01794 } 01795 01796 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight; 01797 else return score1 + FFABS(score2)*8; 01798 } 01799 01800 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){ 01801 int i; 01802 unsigned int sum=0; 01803 01804 for(i=0; i<8*8; i++){ 01805 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT)); 01806 int w= weight[i]; 01807 b>>= RECON_SHIFT; 01808 assert(-512<b && b<512); 01809 01810 sum += (w*b)*(w*b)>>4; 01811 } 01812 return sum>>2; 01813 } 01814 01815 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){ 01816 int i; 01817 01818 for(i=0; i<8*8; i++){ 01819 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT); 01820 } 01821 } 01822 01831 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last) 01832 { 01833 int i; 01834 DCTELEM temp[64]; 01835 01836 if(last<=0) return; 01837 //if(permutation[1]==1) return; //FIXME it is ok but not clean and might fail for some permutations 01838 01839 for(i=0; i<=last; i++){ 01840 const int j= scantable[i]; 01841 temp[j]= block[j]; 01842 block[j]=0; 01843 } 01844 01845 for(i=0; i<=last; i++){ 01846 const int j= scantable[i]; 01847 const int perm_j= permutation[j]; 01848 block[perm_j]= temp[j]; 01849 } 01850 } 01851 01852 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){ 01853 return 0; 01854 } 01855 01856 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){ 01857 int i; 01858 01859 memset(cmp, 0, sizeof(void*)*6); 01860 01861 for(i=0; i<6; i++){ 01862 switch(type&0xFF){ 01863 case FF_CMP_SAD: 01864 cmp[i]= c->sad[i]; 01865 break; 01866 case FF_CMP_SATD: 01867 cmp[i]= c->hadamard8_diff[i]; 01868 break; 01869 case FF_CMP_SSE: 01870 cmp[i]= c->sse[i]; 01871 break; 01872 case FF_CMP_DCT: 01873 cmp[i]= c->dct_sad[i]; 01874 break; 01875 case FF_CMP_DCT264: 01876 cmp[i]= c->dct264_sad[i]; 01877 break; 01878 case FF_CMP_DCTMAX: 01879 cmp[i]= c->dct_max[i]; 01880 break; 01881 case FF_CMP_PSNR: 01882 cmp[i]= c->quant_psnr[i]; 01883 break; 01884 case FF_CMP_BIT: 01885 cmp[i]= c->bit[i]; 01886 break; 01887 case FF_CMP_RD: 01888 cmp[i]= c->rd[i]; 01889 break; 01890 case FF_CMP_VSAD: 01891 cmp[i]= c->vsad[i]; 01892 break; 01893 case FF_CMP_VSSE: 01894 cmp[i]= c->vsse[i]; 01895 break; 01896 case FF_CMP_ZERO: 01897 cmp[i]= zero_cmp; 01898 break; 01899 case FF_CMP_NSSE: 01900 cmp[i]= c->nsse[i]; 01901 break; 01902 #if CONFIG_DWT 01903 case FF_CMP_W53: 01904 cmp[i]= c->w53[i]; 01905 break; 01906 case FF_CMP_W97: 01907 cmp[i]= c->w97[i]; 01908 break; 01909 #endif 01910 default: 01911 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n"); 01912 } 01913 } 01914 } 01915 01916 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){ 01917 long i; 01918 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){ 01919 long a = *(long*)(src+i); 01920 long b = *(long*)(dst+i); 01921 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80); 01922 } 01923 for(; i<w; i++) 01924 dst[i+0] += src[i+0]; 01925 } 01926 01927 static void add_bytes_l2_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ 01928 long i; 01929 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){ 01930 long a = *(long*)(src1+i); 01931 long b = *(long*)(src2+i); 01932 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80); 01933 } 01934 for(; i<w; i++) 01935 dst[i] = src1[i]+src2[i]; 01936 } 01937 01938 static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ 01939 long i; 01940 #if !HAVE_FAST_UNALIGNED 01941 if((long)src2 & (sizeof(long)-1)){ 01942 for(i=0; i+7<w; i+=8){ 01943 dst[i+0] = src1[i+0]-src2[i+0]; 01944 dst[i+1] = src1[i+1]-src2[i+1]; 01945 dst[i+2] = src1[i+2]-src2[i+2]; 01946 dst[i+3] = src1[i+3]-src2[i+3]; 01947 dst[i+4] = src1[i+4]-src2[i+4]; 01948 dst[i+5] = src1[i+5]-src2[i+5]; 01949 dst[i+6] = src1[i+6]-src2[i+6]; 01950 dst[i+7] = src1[i+7]-src2[i+7]; 01951 } 01952 }else 01953 #endif 01954 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){ 01955 long a = *(long*)(src1+i); 01956 long b = *(long*)(src2+i); 01957 *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80); 01958 } 01959 for(; i<w; i++) 01960 dst[i+0] = src1[i+0]-src2[i+0]; 01961 } 01962 01963 static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){ 01964 int i; 01965 uint8_t l, lt; 01966 01967 l= *left; 01968 lt= *left_top; 01969 01970 for(i=0; i<w; i++){ 01971 l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i]; 01972 lt= src1[i]; 01973 dst[i]= l; 01974 } 01975 01976 *left= l; 01977 *left_top= lt; 01978 } 01979 01980 static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){ 01981 int i; 01982 uint8_t l, lt; 01983 01984 l= *left; 01985 lt= *left_top; 01986 01987 for(i=0; i<w; i++){ 01988 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF); 01989 lt= src1[i]; 01990 l= src2[i]; 01991 dst[i]= l - pred; 01992 } 01993 01994 *left= l; 01995 *left_top= lt; 01996 } 01997 01998 static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){ 01999 int i; 02000 02001 for(i=0; i<w-1; i++){ 02002 acc+= src[i]; 02003 dst[i]= acc; 02004 i++; 02005 acc+= src[i]; 02006 dst[i]= acc; 02007 } 02008 02009 for(; i<w; i++){ 02010 acc+= src[i]; 02011 dst[i]= acc; 02012 } 02013 02014 return acc; 02015 } 02016 02017 #if HAVE_BIGENDIAN 02018 #define B 3 02019 #define G 2 02020 #define R 1 02021 #define A 0 02022 #else 02023 #define B 0 02024 #define G 1 02025 #define R 2 02026 #define A 3 02027 #endif 02028 static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha){ 02029 int i; 02030 int r,g,b,a; 02031 r= *red; 02032 g= *green; 02033 b= *blue; 02034 a= *alpha; 02035 02036 for(i=0; i<w; i++){ 02037 b+= src[4*i+B]; 02038 g+= src[4*i+G]; 02039 r+= src[4*i+R]; 02040 a+= src[4*i+A]; 02041 02042 dst[4*i+B]= b; 02043 dst[4*i+G]= g; 02044 dst[4*i+R]= r; 02045 dst[4*i+A]= a; 02046 } 02047 02048 *red= r; 02049 *green= g; 02050 *blue= b; 02051 *alpha= a; 02052 } 02053 #undef B 02054 #undef G 02055 #undef R 02056 #undef A 02057 02058 #define BUTTERFLY2(o1,o2,i1,i2) \ 02059 o1= (i1)+(i2);\ 02060 o2= (i1)-(i2); 02061 02062 #define BUTTERFLY1(x,y) \ 02063 {\ 02064 int a,b;\ 02065 a= x;\ 02066 b= y;\ 02067 x= a+b;\ 02068 y= a-b;\ 02069 } 02070 02071 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y))) 02072 02073 static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){ 02074 int i; 02075 int temp[64]; 02076 int sum=0; 02077 02078 assert(h==8); 02079 02080 for(i=0; i<8; i++){ 02081 //FIXME try pointer walks 02082 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]); 02083 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]); 02084 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]); 02085 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]); 02086 02087 BUTTERFLY1(temp[8*i+0], temp[8*i+2]); 02088 BUTTERFLY1(temp[8*i+1], temp[8*i+3]); 02089 BUTTERFLY1(temp[8*i+4], temp[8*i+6]); 02090 BUTTERFLY1(temp[8*i+5], temp[8*i+7]); 02091 02092 BUTTERFLY1(temp[8*i+0], temp[8*i+4]); 02093 BUTTERFLY1(temp[8*i+1], temp[8*i+5]); 02094 BUTTERFLY1(temp[8*i+2], temp[8*i+6]); 02095 BUTTERFLY1(temp[8*i+3], temp[8*i+7]); 02096 } 02097 02098 for(i=0; i<8; i++){ 02099 BUTTERFLY1(temp[8*0+i], temp[8*1+i]); 02100 BUTTERFLY1(temp[8*2+i], temp[8*3+i]); 02101 BUTTERFLY1(temp[8*4+i], temp[8*5+i]); 02102 BUTTERFLY1(temp[8*6+i], temp[8*7+i]); 02103 02104 BUTTERFLY1(temp[8*0+i], temp[8*2+i]); 02105 BUTTERFLY1(temp[8*1+i], temp[8*3+i]); 02106 BUTTERFLY1(temp[8*4+i], temp[8*6+i]); 02107 BUTTERFLY1(temp[8*5+i], temp[8*7+i]); 02108 02109 sum += 02110 BUTTERFLYA(temp[8*0+i], temp[8*4+i]) 02111 +BUTTERFLYA(temp[8*1+i], temp[8*5+i]) 02112 +BUTTERFLYA(temp[8*2+i], temp[8*6+i]) 02113 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]); 02114 } 02115 return sum; 02116 } 02117 02118 static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){ 02119 int i; 02120 int temp[64]; 02121 int sum=0; 02122 02123 assert(h==8); 02124 02125 for(i=0; i<8; i++){ 02126 //FIXME try pointer walks 02127 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]); 02128 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]); 02129 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]); 02130 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]); 02131 02132 BUTTERFLY1(temp[8*i+0], temp[8*i+2]); 02133 BUTTERFLY1(temp[8*i+1], temp[8*i+3]); 02134 BUTTERFLY1(temp[8*i+4], temp[8*i+6]); 02135 BUTTERFLY1(temp[8*i+5], temp[8*i+7]); 02136 02137 BUTTERFLY1(temp[8*i+0], temp[8*i+4]); 02138 BUTTERFLY1(temp[8*i+1], temp[8*i+5]); 02139 BUTTERFLY1(temp[8*i+2], temp[8*i+6]); 02140 BUTTERFLY1(temp[8*i+3], temp[8*i+7]); 02141 } 02142 02143 for(i=0; i<8; i++){ 02144 BUTTERFLY1(temp[8*0+i], temp[8*1+i]); 02145 BUTTERFLY1(temp[8*2+i], temp[8*3+i]); 02146 BUTTERFLY1(temp[8*4+i], temp[8*5+i]); 02147 BUTTERFLY1(temp[8*6+i], temp[8*7+i]); 02148 02149 BUTTERFLY1(temp[8*0+i], temp[8*2+i]); 02150 BUTTERFLY1(temp[8*1+i], temp[8*3+i]); 02151 BUTTERFLY1(temp[8*4+i], temp[8*6+i]); 02152 BUTTERFLY1(temp[8*5+i], temp[8*7+i]); 02153 02154 sum += 02155 BUTTERFLYA(temp[8*0+i], temp[8*4+i]) 02156 +BUTTERFLYA(temp[8*1+i], temp[8*5+i]) 02157 +BUTTERFLYA(temp[8*2+i], temp[8*6+i]) 02158 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]); 02159 } 02160 02161 sum -= FFABS(temp[8*0] + temp[8*4]); // -mean 02162 02163 return sum; 02164 } 02165 02166 static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ 02167 MpegEncContext * const s= (MpegEncContext *)c; 02168 LOCAL_ALIGNED_16(DCTELEM, temp, [64]); 02169 02170 assert(h==8); 02171 02172 s->dsp.diff_pixels(temp, src1, src2, stride); 02173 s->dsp.fdct(temp); 02174 return s->dsp.sum_abs_dctelem(temp); 02175 } 02176 02177 #if CONFIG_GPL 02178 #define DCT8_1D {\ 02179 const int s07 = SRC(0) + SRC(7);\ 02180 const int s16 = SRC(1) + SRC(6);\ 02181 const int s25 = SRC(2) + SRC(5);\ 02182 const int s34 = SRC(3) + SRC(4);\ 02183 const int a0 = s07 + s34;\ 02184 const int a1 = s16 + s25;\ 02185 const int a2 = s07 - s34;\ 02186 const int a3 = s16 - s25;\ 02187 const int d07 = SRC(0) - SRC(7);\ 02188 const int d16 = SRC(1) - SRC(6);\ 02189 const int d25 = SRC(2) - SRC(5);\ 02190 const int d34 = SRC(3) - SRC(4);\ 02191 const int a4 = d16 + d25 + (d07 + (d07>>1));\ 02192 const int a5 = d07 - d34 - (d25 + (d25>>1));\ 02193 const int a6 = d07 + d34 - (d16 + (d16>>1));\ 02194 const int a7 = d16 - d25 + (d34 + (d34>>1));\ 02195 DST(0, a0 + a1 ) ;\ 02196 DST(1, a4 + (a7>>2)) ;\ 02197 DST(2, a2 + (a3>>1)) ;\ 02198 DST(3, a5 + (a6>>2)) ;\ 02199 DST(4, a0 - a1 ) ;\ 02200 DST(5, a6 - (a5>>2)) ;\ 02201 DST(6, (a2>>1) - a3 ) ;\ 02202 DST(7, (a4>>2) - a7 ) ;\ 02203 } 02204 02205 static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ 02206 MpegEncContext * const s= (MpegEncContext *)c; 02207 DCTELEM dct[8][8]; 02208 int i; 02209 int sum=0; 02210 02211 s->dsp.diff_pixels(dct[0], src1, src2, stride); 02212 02213 #define SRC(x) dct[i][x] 02214 #define DST(x,v) dct[i][x]= v 02215 for( i = 0; i < 8; i++ ) 02216 DCT8_1D 02217 #undef SRC 02218 #undef DST 02219 02220 #define SRC(x) dct[x][i] 02221 #define DST(x,v) sum += FFABS(v) 02222 for( i = 0; i < 8; i++ ) 02223 DCT8_1D 02224 #undef SRC 02225 #undef DST 02226 return sum; 02227 } 02228 #endif 02229 02230 static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ 02231 MpegEncContext * const s= (MpegEncContext *)c; 02232 LOCAL_ALIGNED_16(DCTELEM, temp, [64]); 02233 int sum=0, i; 02234 02235 assert(h==8); 02236 02237 s->dsp.diff_pixels(temp, src1, src2, stride); 02238 s->dsp.fdct(temp); 02239 02240 for(i=0; i<64; i++) 02241 sum= FFMAX(sum, FFABS(temp[i])); 02242 02243 return sum; 02244 } 02245 02246 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ 02247 MpegEncContext * const s= (MpegEncContext *)c; 02248 LOCAL_ALIGNED_16(DCTELEM, temp, [64*2]); 02249 DCTELEM * const bak = temp+64; 02250 int sum=0, i; 02251 02252 assert(h==8); 02253 s->mb_intra=0; 02254 02255 s->dsp.diff_pixels(temp, src1, src2, stride); 02256 02257 memcpy(bak, temp, 64*sizeof(DCTELEM)); 02258 02259 s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); 02260 s->dct_unquantize_inter(s, temp, 0, s->qscale); 02261 ff_simple_idct(temp); //FIXME 02262 02263 for(i=0; i<64; i++) 02264 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]); 02265 02266 return sum; 02267 } 02268 02269 static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ 02270 MpegEncContext * const s= (MpegEncContext *)c; 02271 const uint8_t *scantable= s->intra_scantable.permutated; 02272 LOCAL_ALIGNED_16(DCTELEM, temp, [64]); 02273 LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]); 02274 LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]); 02275 int i, last, run, bits, level, distortion, start_i; 02276 const int esc_length= s->ac_esc_length; 02277 uint8_t * length; 02278 uint8_t * last_length; 02279 02280 assert(h==8); 02281 02282 copy_block8(lsrc1, src1, 8, stride, 8); 02283 copy_block8(lsrc2, src2, 8, stride, 8); 02284 02285 s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8); 02286 02287 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); 02288 02289 bits=0; 02290 02291 if (s->mb_intra) { 02292 start_i = 1; 02293 length = s->intra_ac_vlc_length; 02294 last_length= s->intra_ac_vlc_last_length; 02295 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma 02296 } else { 02297 start_i = 0; 02298 length = s->inter_ac_vlc_length; 02299 last_length= s->inter_ac_vlc_last_length; 02300 } 02301 02302 if(last>=start_i){ 02303 run=0; 02304 for(i=start_i; i<last; i++){ 02305 int j= scantable[i]; 02306 level= temp[j]; 02307 02308 if(level){ 02309 level+=64; 02310 if((level&(~127)) == 0){ 02311 bits+= length[UNI_AC_ENC_INDEX(run, level)]; 02312 }else 02313 bits+= esc_length; 02314 run=0; 02315 }else 02316 run++; 02317 } 02318 i= scantable[last]; 02319 02320 level= temp[i] + 64; 02321 02322 assert(level - 64); 02323 02324 if((level&(~127)) == 0){ 02325 bits+= last_length[UNI_AC_ENC_INDEX(run, level)]; 02326 }else 02327 bits+= esc_length; 02328 02329 } 02330 02331 if(last>=0){ 02332 if(s->mb_intra) 02333 s->dct_unquantize_intra(s, temp, 0, s->qscale); 02334 else 02335 s->dct_unquantize_inter(s, temp, 0, s->qscale); 02336 } 02337 02338 s->dsp.idct_add(lsrc2, 8, temp); 02339 02340 distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8); 02341 02342 return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7); 02343 } 02344 02345 static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ 02346 MpegEncContext * const s= (MpegEncContext *)c; 02347 const uint8_t *scantable= s->intra_scantable.permutated; 02348 LOCAL_ALIGNED_16(DCTELEM, temp, [64]); 02349 int i, last, run, bits, level, start_i; 02350 const int esc_length= s->ac_esc_length; 02351 uint8_t * length; 02352 uint8_t * last_length; 02353 02354 assert(h==8); 02355 02356 s->dsp.diff_pixels(temp, src1, src2, stride); 02357 02358 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); 02359 02360 bits=0; 02361 02362 if (s->mb_intra) { 02363 start_i = 1; 02364 length = s->intra_ac_vlc_length; 02365 last_length= s->intra_ac_vlc_last_length; 02366 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma 02367 } else { 02368 start_i = 0; 02369 length = s->inter_ac_vlc_length; 02370 last_length= s->inter_ac_vlc_last_length; 02371 } 02372 02373 if(last>=start_i){ 02374 run=0; 02375 for(i=start_i; i<last; i++){ 02376 int j= scantable[i]; 02377 level= temp[j]; 02378 02379 if(level){ 02380 level+=64; 02381 if((level&(~127)) == 0){ 02382 bits+= length[UNI_AC_ENC_INDEX(run, level)]; 02383 }else 02384 bits+= esc_length; 02385 run=0; 02386 }else 02387 run++; 02388 } 02389 i= scantable[last]; 02390 02391 level= temp[i] + 64; 02392 02393 assert(level - 64); 02394 02395 if((level&(~127)) == 0){ 02396 bits+= last_length[UNI_AC_ENC_INDEX(run, level)]; 02397 }else 02398 bits+= esc_length; 02399 } 02400 02401 return bits; 02402 } 02403 02404 #define VSAD_INTRA(size) \ 02405 static int vsad_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \ 02406 int score=0; \ 02407 int x,y; \ 02408 \ 02409 for(y=1; y<h; y++){ \ 02410 for(x=0; x<size; x+=4){ \ 02411 score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \ 02412 +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \ 02413 } \ 02414 s+= stride; \ 02415 } \ 02416 \ 02417 return score; \ 02418 } 02419 VSAD_INTRA(8) 02420 VSAD_INTRA(16) 02421 02422 static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){ 02423 int score=0; 02424 int x,y; 02425 02426 for(y=1; y<h; y++){ 02427 for(x=0; x<16; x++){ 02428 score+= FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]); 02429 } 02430 s1+= stride; 02431 s2+= stride; 02432 } 02433 02434 return score; 02435 } 02436 02437 #define SQ(a) ((a)*(a)) 02438 #define VSSE_INTRA(size) \ 02439 static int vsse_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \ 02440 int score=0; \ 02441 int x,y; \ 02442 \ 02443 for(y=1; y<h; y++){ \ 02444 for(x=0; x<size; x+=4){ \ 02445 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \ 02446 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \ 02447 } \ 02448 s+= stride; \ 02449 } \ 02450 \ 02451 return score; \ 02452 } 02453 VSSE_INTRA(8) 02454 VSSE_INTRA(16) 02455 02456 static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){ 02457 int score=0; 02458 int x,y; 02459 02460 for(y=1; y<h; y++){ 02461 for(x=0; x<16; x++){ 02462 score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]); 02463 } 02464 s1+= stride; 02465 s2+= stride; 02466 } 02467 02468 return score; 02469 } 02470 02471 static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2, 02472 int size){ 02473 int score=0; 02474 int i; 02475 for(i=0; i<size; i++) 02476 score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]); 02477 return score; 02478 } 02479 02480 WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c) 02481 WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c) 02482 WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c) 02483 #if CONFIG_GPL 02484 WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c) 02485 #endif 02486 WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c) 02487 WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c) 02488 WRAPPER8_16_SQ(rd8x8_c, rd16_c) 02489 WRAPPER8_16_SQ(bit8x8_c, bit16_c) 02490 02491 static void vector_fmul_c(float *dst, const float *src0, const float *src1, int len){ 02492 int i; 02493 for(i=0; i<len; i++) 02494 dst[i] = src0[i] * src1[i]; 02495 } 02496 02497 static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){ 02498 int i; 02499 src1 += len-1; 02500 for(i=0; i<len; i++) 02501 dst[i] = src0[i] * src1[-i]; 02502 } 02503 02504 static void vector_fmul_add_c(float *dst, const float *src0, const float *src1, const float *src2, int len){ 02505 int i; 02506 for(i=0; i<len; i++) 02507 dst[i] = src0[i] * src1[i] + src2[i]; 02508 } 02509 02510 static void vector_fmul_window_c(float *dst, const float *src0, 02511 const float *src1, const float *win, int len) 02512 { 02513 int i,j; 02514 dst += len; 02515 win += len; 02516 src0+= len; 02517 for(i=-len, j=len-1; i<0; i++, j--) { 02518 float s0 = src0[i]; 02519 float s1 = src1[j]; 02520 float wi = win[i]; 02521 float wj = win[j]; 02522 dst[i] = s0*wj - s1*wi; 02523 dst[j] = s0*wi + s1*wj; 02524 } 02525 } 02526 02527 static void vector_fmul_scalar_c(float *dst, const float *src, float mul, 02528 int len) 02529 { 02530 int i; 02531 for (i = 0; i < len; i++) 02532 dst[i] = src[i] * mul; 02533 } 02534 02535 static void vector_fmul_sv_scalar_2_c(float *dst, const float *src, 02536 const float **sv, float mul, int len) 02537 { 02538 int i; 02539 for (i = 0; i < len; i += 2, sv++) { 02540 dst[i ] = src[i ] * sv[0][0] * mul; 02541 dst[i+1] = src[i+1] * sv[0][1] * mul; 02542 } 02543 } 02544 02545 static void vector_fmul_sv_scalar_4_c(float *dst, const float *src, 02546 const float **sv, float mul, int len) 02547 { 02548 int i; 02549 for (i = 0; i < len; i += 4, sv++) { 02550 dst[i ] = src[i ] * sv[0][0] * mul; 02551 dst[i+1] = src[i+1] * sv[0][1] * mul; 02552 dst[i+2] = src[i+2] * sv[0][2] * mul; 02553 dst[i+3] = src[i+3] * sv[0][3] * mul; 02554 } 02555 } 02556 02557 static void sv_fmul_scalar_2_c(float *dst, const float **sv, float mul, 02558 int len) 02559 { 02560 int i; 02561 for (i = 0; i < len; i += 2, sv++) { 02562 dst[i ] = sv[0][0] * mul; 02563 dst[i+1] = sv[0][1] * mul; 02564 } 02565 } 02566 02567 static void sv_fmul_scalar_4_c(float *dst, const float **sv, float mul, 02568 int len) 02569 { 02570 int i; 02571 for (i = 0; i < len; i += 4, sv++) { 02572 dst[i ] = sv[0][0] * mul; 02573 dst[i+1] = sv[0][1] * mul; 02574 dst[i+2] = sv[0][2] * mul; 02575 dst[i+3] = sv[0][3] * mul; 02576 } 02577 } 02578 02579 static void butterflies_float_c(float *restrict v1, float *restrict v2, 02580 int len) 02581 { 02582 int i; 02583 for (i = 0; i < len; i++) { 02584 float t = v1[i] - v2[i]; 02585 v1[i] += v2[i]; 02586 v2[i] = t; 02587 } 02588 } 02589 02590 static float scalarproduct_float_c(const float *v1, const float *v2, int len) 02591 { 02592 float p = 0.0; 02593 int i; 02594 02595 for (i = 0; i < len; i++) 02596 p += v1[i] * v2[i]; 02597 02598 return p; 02599 } 02600 02601 static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini, 02602 uint32_t maxi, uint32_t maxisign) 02603 { 02604 02605 if(a > mini) return mini; 02606 else if((a^(1U<<31)) > maxisign) return maxi; 02607 else return a; 02608 } 02609 02610 static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){ 02611 int i; 02612 uint32_t mini = *(uint32_t*)min; 02613 uint32_t maxi = *(uint32_t*)max; 02614 uint32_t maxisign = maxi ^ (1U<<31); 02615 uint32_t *dsti = (uint32_t*)dst; 02616 const uint32_t *srci = (const uint32_t*)src; 02617 for(i=0; i<len; i+=8) { 02618 dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign); 02619 dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign); 02620 dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign); 02621 dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign); 02622 dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign); 02623 dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign); 02624 dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign); 02625 dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign); 02626 } 02627 } 02628 static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){ 02629 int i; 02630 if(min < 0 && max > 0) { 02631 vector_clipf_c_opposite_sign(dst, src, &min, &max, len); 02632 } else { 02633 for(i=0; i < len; i+=8) { 02634 dst[i ] = av_clipf(src[i ], min, max); 02635 dst[i + 1] = av_clipf(src[i + 1], min, max); 02636 dst[i + 2] = av_clipf(src[i + 2], min, max); 02637 dst[i + 3] = av_clipf(src[i + 3], min, max); 02638 dst[i + 4] = av_clipf(src[i + 4], min, max); 02639 dst[i + 5] = av_clipf(src[i + 5], min, max); 02640 dst[i + 6] = av_clipf(src[i + 6], min, max); 02641 dst[i + 7] = av_clipf(src[i + 7], min, max); 02642 } 02643 } 02644 } 02645 02646 static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order, int shift) 02647 { 02648 int res = 0; 02649 02650 while (order--) 02651 res += (*v1++ * *v2++) >> shift; 02652 02653 return res; 02654 } 02655 02656 static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul) 02657 { 02658 int res = 0; 02659 while (order--) { 02660 res += *v1 * *v2++; 02661 *v1++ += mul * *v3++; 02662 } 02663 return res; 02664 } 02665 02666 static void apply_window_int16_c(int16_t *output, const int16_t *input, 02667 const int16_t *window, unsigned int len) 02668 { 02669 int i; 02670 int len2 = len >> 1; 02671 02672 for (i = 0; i < len2; i++) { 02673 int16_t w = window[i]; 02674 output[i] = (MUL16(input[i], w) + (1 << 14)) >> 15; 02675 output[len-i-1] = (MUL16(input[len-i-1], w) + (1 << 14)) >> 15; 02676 } 02677 } 02678 02679 #define W0 2048 02680 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */ 02681 #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */ 02682 #define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */ 02683 #define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */ 02684 #define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */ 02685 #define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */ 02686 #define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */ 02687 02688 static void wmv2_idct_row(short * b) 02689 { 02690 int s1,s2; 02691 int a0,a1,a2,a3,a4,a5,a6,a7; 02692 /*step 1*/ 02693 a1 = W1*b[1]+W7*b[7]; 02694 a7 = W7*b[1]-W1*b[7]; 02695 a5 = W5*b[5]+W3*b[3]; 02696 a3 = W3*b[5]-W5*b[3]; 02697 a2 = W2*b[2]+W6*b[6]; 02698 a6 = W6*b[2]-W2*b[6]; 02699 a0 = W0*b[0]+W0*b[4]; 02700 a4 = W0*b[0]-W0*b[4]; 02701 /*step 2*/ 02702 s1 = (181*(a1-a5+a7-a3)+128)>>8;//1,3,5,7, 02703 s2 = (181*(a1-a5-a7+a3)+128)>>8; 02704 /*step 3*/ 02705 b[0] = (a0+a2+a1+a5 + (1<<7))>>8; 02706 b[1] = (a4+a6 +s1 + (1<<7))>>8; 02707 b[2] = (a4-a6 +s2 + (1<<7))>>8; 02708 b[3] = (a0-a2+a7+a3 + (1<<7))>>8; 02709 b[4] = (a0-a2-a7-a3 + (1<<7))>>8; 02710 b[5] = (a4-a6 -s2 + (1<<7))>>8; 02711 b[6] = (a4+a6 -s1 + (1<<7))>>8; 02712 b[7] = (a0+a2-a1-a5 + (1<<7))>>8; 02713 } 02714 static void wmv2_idct_col(short * b) 02715 { 02716 int s1,s2; 02717 int a0,a1,a2,a3,a4,a5,a6,a7; 02718 /*step 1, with extended precision*/ 02719 a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3; 02720 a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3; 02721 a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3; 02722 a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3; 02723 a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3; 02724 a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3; 02725 a0 = (W0*b[8*0]+W0*b[8*4] )>>3; 02726 a4 = (W0*b[8*0]-W0*b[8*4] )>>3; 02727 /*step 2*/ 02728 s1 = (181*(a1-a5+a7-a3)+128)>>8; 02729 s2 = (181*(a1-a5-a7+a3)+128)>>8; 02730 /*step 3*/ 02731 b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14; 02732 b[8*1] = (a4+a6 +s1 + (1<<13))>>14; 02733 b[8*2] = (a4-a6 +s2 + (1<<13))>>14; 02734 b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14; 02735 02736 b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14; 02737 b[8*5] = (a4-a6 -s2 + (1<<13))>>14; 02738 b[8*6] = (a4+a6 -s1 + (1<<13))>>14; 02739 b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14; 02740 } 02741 void ff_wmv2_idct_c(short * block){ 02742 int i; 02743 02744 for(i=0;i<64;i+=8){ 02745 wmv2_idct_row(block+i); 02746 } 02747 for(i=0;i<8;i++){ 02748 wmv2_idct_col(block+i); 02749 } 02750 } 02751 /* XXX: those functions should be suppressed ASAP when all IDCTs are 02752 converted */ 02753 static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block) 02754 { 02755 ff_wmv2_idct_c(block); 02756 ff_put_pixels_clamped_c(block, dest, line_size); 02757 } 02758 static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block) 02759 { 02760 ff_wmv2_idct_c(block); 02761 ff_add_pixels_clamped_c(block, dest, line_size); 02762 } 02763 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block) 02764 { 02765 j_rev_dct (block); 02766 ff_put_pixels_clamped_c(block, dest, line_size); 02767 } 02768 static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block) 02769 { 02770 j_rev_dct (block); 02771 ff_add_pixels_clamped_c(block, dest, line_size); 02772 } 02773 02774 static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block) 02775 { 02776 j_rev_dct4 (block); 02777 put_pixels_clamped4_c(block, dest, line_size); 02778 } 02779 static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block) 02780 { 02781 j_rev_dct4 (block); 02782 add_pixels_clamped4_c(block, dest, line_size); 02783 } 02784 02785 static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block) 02786 { 02787 j_rev_dct2 (block); 02788 put_pixels_clamped2_c(block, dest, line_size); 02789 } 02790 static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block) 02791 { 02792 j_rev_dct2 (block); 02793 add_pixels_clamped2_c(block, dest, line_size); 02794 } 02795 02796 static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block) 02797 { 02798 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; 02799 02800 dest[0] = cm[(block[0] + 4)>>3]; 02801 } 02802 static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block) 02803 { 02804 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; 02805 02806 dest[0] = cm[dest[0] + ((block[0] + 4)>>3)]; 02807 } 02808 02809 static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; } 02810 02811 /* init static data */ 02812 av_cold void dsputil_static_init(void) 02813 { 02814 int i; 02815 02816 for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i; 02817 for(i=0;i<MAX_NEG_CROP;i++) { 02818 ff_cropTbl[i] = 0; 02819 ff_cropTbl[i + MAX_NEG_CROP + 256] = 255; 02820 } 02821 02822 for(i=0;i<512;i++) { 02823 ff_squareTbl[i] = (i - 256) * (i - 256); 02824 } 02825 02826 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1; 02827 } 02828 02829 int ff_check_alignment(void){ 02830 static int did_fail=0; 02831 DECLARE_ALIGNED(16, int, aligned); 02832 02833 if((intptr_t)&aligned & 15){ 02834 if(!did_fail){ 02835 #if HAVE_MMX || HAVE_ALTIVEC 02836 av_log(NULL, AV_LOG_ERROR, 02837 "Compiler did not align stack variables. Libavcodec has been miscompiled\n" 02838 "and may be very slow or crash. This is not a bug in libavcodec,\n" 02839 "but in the compiler. You may try recompiling using gcc >= 4.2.\n" 02840 "Do not report crashes to Libav developers.\n"); 02841 #endif 02842 did_fail=1; 02843 } 02844 return -1; 02845 } 02846 return 0; 02847 } 02848 02849 av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx) 02850 { 02851 int i; 02852 02853 ff_check_alignment(); 02854 02855 #if CONFIG_ENCODERS 02856 if(avctx->dct_algo==FF_DCT_FASTINT) { 02857 c->fdct = fdct_ifast; 02858 c->fdct248 = fdct_ifast248; 02859 } 02860 else if(avctx->dct_algo==FF_DCT_FAAN) { 02861 c->fdct = ff_faandct; 02862 c->fdct248 = ff_faandct248; 02863 } 02864 else { 02865 c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default 02866 c->fdct248 = ff_fdct248_islow; 02867 } 02868 #endif //CONFIG_ENCODERS 02869 02870 if(avctx->lowres==1){ 02871 if(avctx->idct_algo==FF_IDCT_INT || avctx->idct_algo==FF_IDCT_AUTO || !CONFIG_H264_DECODER){ 02872 c->idct_put= ff_jref_idct4_put; 02873 c->idct_add= ff_jref_idct4_add; 02874 }else{ 02875 if (avctx->codec_id != CODEC_ID_H264) { 02876 c->idct_put= ff_h264_lowres_idct_put_8_c; 02877 c->idct_add= ff_h264_lowres_idct_add_8_c; 02878 } else { 02879 switch (avctx->bits_per_raw_sample) { 02880 case 9: 02881 c->idct_put= ff_h264_lowres_idct_put_9_c; 02882 c->idct_add= ff_h264_lowres_idct_add_9_c; 02883 break; 02884 case 10: 02885 c->idct_put= ff_h264_lowres_idct_put_10_c; 02886 c->idct_add= ff_h264_lowres_idct_add_10_c; 02887 break; 02888 default: 02889 c->idct_put= ff_h264_lowres_idct_put_8_c; 02890 c->idct_add= ff_h264_lowres_idct_add_8_c; 02891 } 02892 } 02893 } 02894 c->idct = j_rev_dct4; 02895 c->idct_permutation_type= FF_NO_IDCT_PERM; 02896 }else if(avctx->lowres==2){ 02897 c->idct_put= ff_jref_idct2_put; 02898 c->idct_add= ff_jref_idct2_add; 02899 c->idct = j_rev_dct2; 02900 c->idct_permutation_type= FF_NO_IDCT_PERM; 02901 }else if(avctx->lowres==3){ 02902 c->idct_put= ff_jref_idct1_put; 02903 c->idct_add= ff_jref_idct1_add; 02904 c->idct = j_rev_dct1; 02905 c->idct_permutation_type= FF_NO_IDCT_PERM; 02906 }else{ 02907 if(avctx->idct_algo==FF_IDCT_INT){ 02908 c->idct_put= ff_jref_idct_put; 02909 c->idct_add= ff_jref_idct_add; 02910 c->idct = j_rev_dct; 02911 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; 02912 }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER ) && 02913 avctx->idct_algo==FF_IDCT_VP3){ 02914 c->idct_put= ff_vp3_idct_put_c; 02915 c->idct_add= ff_vp3_idct_add_c; 02916 c->idct = ff_vp3_idct_c; 02917 c->idct_permutation_type= FF_NO_IDCT_PERM; 02918 }else if(avctx->idct_algo==FF_IDCT_WMV2){ 02919 c->idct_put= ff_wmv2_idct_put_c; 02920 c->idct_add= ff_wmv2_idct_add_c; 02921 c->idct = ff_wmv2_idct_c; 02922 c->idct_permutation_type= FF_NO_IDCT_PERM; 02923 }else if(avctx->idct_algo==FF_IDCT_FAAN){ 02924 c->idct_put= ff_faanidct_put; 02925 c->idct_add= ff_faanidct_add; 02926 c->idct = ff_faanidct; 02927 c->idct_permutation_type= FF_NO_IDCT_PERM; 02928 }else if(CONFIG_EATGQ_DECODER && avctx->idct_algo==FF_IDCT_EA) { 02929 c->idct_put= ff_ea_idct_put_c; 02930 c->idct_permutation_type= FF_NO_IDCT_PERM; 02931 }else if(CONFIG_BINK_DECODER && avctx->idct_algo==FF_IDCT_BINK) { 02932 c->idct = ff_bink_idct_c; 02933 c->idct_add = ff_bink_idct_add_c; 02934 c->idct_put = ff_bink_idct_put_c; 02935 c->idct_permutation_type = FF_NO_IDCT_PERM; 02936 }else{ //accurate/default 02937 c->idct_put= ff_simple_idct_put; 02938 c->idct_add= ff_simple_idct_add; 02939 c->idct = ff_simple_idct; 02940 c->idct_permutation_type= FF_NO_IDCT_PERM; 02941 } 02942 } 02943 02944 c->get_pixels = get_pixels_c; 02945 c->diff_pixels = diff_pixels_c; 02946 c->put_pixels_clamped = ff_put_pixels_clamped_c; 02947 c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_c; 02948 c->put_pixels_nonclamped = put_pixels_nonclamped_c; 02949 c->add_pixels_clamped = ff_add_pixels_clamped_c; 02950 c->sum_abs_dctelem = sum_abs_dctelem_c; 02951 c->gmc1 = gmc1_c; 02952 c->gmc = ff_gmc_c; 02953 c->pix_sum = pix_sum_c; 02954 c->pix_norm1 = pix_norm1_c; 02955 02956 c->fill_block_tab[0] = fill_block16_c; 02957 c->fill_block_tab[1] = fill_block8_c; 02958 c->scale_block = scale_block_c; 02959 02960 /* TODO [0] 16 [1] 8 */ 02961 c->pix_abs[0][0] = pix_abs16_c; 02962 c->pix_abs[0][1] = pix_abs16_x2_c; 02963 c->pix_abs[0][2] = pix_abs16_y2_c; 02964 c->pix_abs[0][3] = pix_abs16_xy2_c; 02965 c->pix_abs[1][0] = pix_abs8_c; 02966 c->pix_abs[1][1] = pix_abs8_x2_c; 02967 c->pix_abs[1][2] = pix_abs8_y2_c; 02968 c->pix_abs[1][3] = pix_abs8_xy2_c; 02969 02970 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c; 02971 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c; 02972 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c; 02973 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c; 02974 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c; 02975 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c; 02976 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c; 02977 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c; 02978 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c; 02979 02980 c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c; 02981 c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c; 02982 c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c; 02983 c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c; 02984 c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c; 02985 c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c; 02986 c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c; 02987 c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c; 02988 c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c; 02989 02990 #define dspfunc(PFX, IDX, NUM) \ 02991 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \ 02992 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \ 02993 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \ 02994 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \ 02995 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \ 02996 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \ 02997 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \ 02998 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \ 02999 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \ 03000 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \ 03001 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \ 03002 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \ 03003 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \ 03004 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \ 03005 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \ 03006 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c 03007 03008 dspfunc(put_qpel, 0, 16); 03009 dspfunc(put_no_rnd_qpel, 0, 16); 03010 03011 dspfunc(avg_qpel, 0, 16); 03012 /* dspfunc(avg_no_rnd_qpel, 0, 16); */ 03013 03014 dspfunc(put_qpel, 1, 8); 03015 dspfunc(put_no_rnd_qpel, 1, 8); 03016 03017 dspfunc(avg_qpel, 1, 8); 03018 /* dspfunc(avg_no_rnd_qpel, 1, 8); */ 03019 03020 #undef dspfunc 03021 03022 #if CONFIG_MLP_DECODER || CONFIG_TRUEHD_DECODER 03023 ff_mlp_init(c, avctx); 03024 #endif 03025 #if CONFIG_WMV2_DECODER || CONFIG_VC1_DECODER 03026 ff_intrax8dsp_init(c,avctx); 03027 #endif 03028 #if CONFIG_RV30_DECODER 03029 ff_rv30dsp_init(c,avctx); 03030 #endif 03031 #if CONFIG_RV40_DECODER 03032 ff_rv40dsp_init(c,avctx); 03033 c->put_rv40_qpel_pixels_tab[0][15] = put_rv40_qpel16_mc33_c; 03034 c->avg_rv40_qpel_pixels_tab[0][15] = avg_rv40_qpel16_mc33_c; 03035 c->put_rv40_qpel_pixels_tab[1][15] = put_rv40_qpel8_mc33_c; 03036 c->avg_rv40_qpel_pixels_tab[1][15] = avg_rv40_qpel8_mc33_c; 03037 #endif 03038 03039 c->put_mspel_pixels_tab[0]= ff_put_pixels8x8_c; 03040 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c; 03041 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c; 03042 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c; 03043 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c; 03044 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c; 03045 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c; 03046 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c; 03047 03048 #define SET_CMP_FUNC(name) \ 03049 c->name[0]= name ## 16_c;\ 03050 c->name[1]= name ## 8x8_c; 03051 03052 SET_CMP_FUNC(hadamard8_diff) 03053 c->hadamard8_diff[4]= hadamard8_intra16_c; 03054 c->hadamard8_diff[5]= hadamard8_intra8x8_c; 03055 SET_CMP_FUNC(dct_sad) 03056 SET_CMP_FUNC(dct_max) 03057 #if CONFIG_GPL 03058 SET_CMP_FUNC(dct264_sad) 03059 #endif 03060 c->sad[0]= pix_abs16_c; 03061 c->sad[1]= pix_abs8_c; 03062 c->sse[0]= sse16_c; 03063 c->sse[1]= sse8_c; 03064 c->sse[2]= sse4_c; 03065 SET_CMP_FUNC(quant_psnr) 03066 SET_CMP_FUNC(rd) 03067 SET_CMP_FUNC(bit) 03068 c->vsad[0]= vsad16_c; 03069 c->vsad[4]= vsad_intra16_c; 03070 c->vsad[5]= vsad_intra8_c; 03071 c->vsse[0]= vsse16_c; 03072 c->vsse[4]= vsse_intra16_c; 03073 c->vsse[5]= vsse_intra8_c; 03074 c->nsse[0]= nsse16_c; 03075 c->nsse[1]= nsse8_c; 03076 #if CONFIG_DWT 03077 ff_dsputil_init_dwt(c); 03078 #endif 03079 03080 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c; 03081 03082 c->add_bytes= add_bytes_c; 03083 c->add_bytes_l2= add_bytes_l2_c; 03084 c->diff_bytes= diff_bytes_c; 03085 c->add_hfyu_median_prediction= add_hfyu_median_prediction_c; 03086 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c; 03087 c->add_hfyu_left_prediction = add_hfyu_left_prediction_c; 03088 c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c; 03089 c->bswap_buf= bswap_buf; 03090 c->bswap16_buf = bswap16_buf; 03091 #if CONFIG_PNG_DECODER 03092 c->add_png_paeth_prediction= ff_add_png_paeth_prediction; 03093 #endif 03094 03095 if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) { 03096 c->h263_h_loop_filter= h263_h_loop_filter_c; 03097 c->h263_v_loop_filter= h263_v_loop_filter_c; 03098 } 03099 03100 if (CONFIG_VP3_DECODER) { 03101 c->vp3_h_loop_filter= ff_vp3_h_loop_filter_c; 03102 c->vp3_v_loop_filter= ff_vp3_v_loop_filter_c; 03103 c->vp3_idct_dc_add= ff_vp3_idct_dc_add_c; 03104 } 03105 03106 c->h261_loop_filter= h261_loop_filter_c; 03107 03108 c->try_8x8basis= try_8x8basis_c; 03109 c->add_8x8basis= add_8x8basis_c; 03110 03111 #if CONFIG_VORBIS_DECODER 03112 c->vorbis_inverse_coupling = vorbis_inverse_coupling; 03113 #endif 03114 #if CONFIG_AC3_DECODER 03115 c->ac3_downmix = ff_ac3_downmix_c; 03116 #endif 03117 c->vector_fmul = vector_fmul_c; 03118 c->vector_fmul_reverse = vector_fmul_reverse_c; 03119 c->vector_fmul_add = vector_fmul_add_c; 03120 c->vector_fmul_window = vector_fmul_window_c; 03121 c->vector_clipf = vector_clipf_c; 03122 c->scalarproduct_int16 = scalarproduct_int16_c; 03123 c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c; 03124 c->apply_window_int16 = apply_window_int16_c; 03125 c->scalarproduct_float = scalarproduct_float_c; 03126 c->butterflies_float = butterflies_float_c; 03127 c->vector_fmul_scalar = vector_fmul_scalar_c; 03128 03129 c->vector_fmul_sv_scalar[0] = vector_fmul_sv_scalar_2_c; 03130 c->vector_fmul_sv_scalar[1] = vector_fmul_sv_scalar_4_c; 03131 03132 c->sv_fmul_scalar[0] = sv_fmul_scalar_2_c; 03133 c->sv_fmul_scalar[1] = sv_fmul_scalar_4_c; 03134 03135 c->shrink[0]= av_image_copy_plane; 03136 c->shrink[1]= ff_shrink22; 03137 c->shrink[2]= ff_shrink44; 03138 c->shrink[3]= ff_shrink88; 03139 03140 c->prefetch= just_return; 03141 03142 memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab)); 03143 memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab)); 03144 03145 #undef FUNC 03146 #undef FUNCC 03147 #define FUNC(f, depth) f ## _ ## depth 03148 #define FUNCC(f, depth) f ## _ ## depth ## _c 03149 03150 #define dspfunc1(PFX, IDX, NUM, depth)\ 03151 c->PFX ## _pixels_tab[IDX][0] = FUNCC(PFX ## _pixels ## NUM , depth);\ 03152 c->PFX ## _pixels_tab[IDX][1] = FUNCC(PFX ## _pixels ## NUM ## _x2 , depth);\ 03153 c->PFX ## _pixels_tab[IDX][2] = FUNCC(PFX ## _pixels ## NUM ## _y2 , depth);\ 03154 c->PFX ## _pixels_tab[IDX][3] = FUNCC(PFX ## _pixels ## NUM ## _xy2, depth) 03155 03156 #define dspfunc2(PFX, IDX, NUM, depth)\ 03157 c->PFX ## _pixels_tab[IDX][ 0] = FUNCC(PFX ## NUM ## _mc00, depth);\ 03158 c->PFX ## _pixels_tab[IDX][ 1] = FUNCC(PFX ## NUM ## _mc10, depth);\ 03159 c->PFX ## _pixels_tab[IDX][ 2] = FUNCC(PFX ## NUM ## _mc20, depth);\ 03160 c->PFX ## _pixels_tab[IDX][ 3] = FUNCC(PFX ## NUM ## _mc30, depth);\ 03161 c->PFX ## _pixels_tab[IDX][ 4] = FUNCC(PFX ## NUM ## _mc01, depth);\ 03162 c->PFX ## _pixels_tab[IDX][ 5] = FUNCC(PFX ## NUM ## _mc11, depth);\ 03163 c->PFX ## _pixels_tab[IDX][ 6] = FUNCC(PFX ## NUM ## _mc21, depth);\ 03164 c->PFX ## _pixels_tab[IDX][ 7] = FUNCC(PFX ## NUM ## _mc31, depth);\ 03165 c->PFX ## _pixels_tab[IDX][ 8] = FUNCC(PFX ## NUM ## _mc02, depth);\ 03166 c->PFX ## _pixels_tab[IDX][ 9] = FUNCC(PFX ## NUM ## _mc12, depth);\ 03167 c->PFX ## _pixels_tab[IDX][10] = FUNCC(PFX ## NUM ## _mc22, depth);\ 03168 c->PFX ## _pixels_tab[IDX][11] = FUNCC(PFX ## NUM ## _mc32, depth);\ 03169 c->PFX ## _pixels_tab[IDX][12] = FUNCC(PFX ## NUM ## _mc03, depth);\ 03170 c->PFX ## _pixels_tab[IDX][13] = FUNCC(PFX ## NUM ## _mc13, depth);\ 03171 c->PFX ## _pixels_tab[IDX][14] = FUNCC(PFX ## NUM ## _mc23, depth);\ 03172 c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth) 03173 03174 03175 #define BIT_DEPTH_FUNCS(depth)\ 03176 c->draw_edges = FUNCC(draw_edges , depth);\ 03177 c->emulated_edge_mc = FUNC (ff_emulated_edge_mc , depth);\ 03178 c->clear_block = FUNCC(clear_block , depth);\ 03179 c->clear_blocks = FUNCC(clear_blocks , depth);\ 03180 c->add_pixels8 = FUNCC(add_pixels8 , depth);\ 03181 c->add_pixels4 = FUNCC(add_pixels4 , depth);\ 03182 c->put_no_rnd_pixels_l2[0] = FUNCC(put_no_rnd_pixels16_l2, depth);\ 03183 c->put_no_rnd_pixels_l2[1] = FUNCC(put_no_rnd_pixels8_l2 , depth);\ 03184 \ 03185 c->put_h264_chroma_pixels_tab[0] = FUNCC(put_h264_chroma_mc8 , depth);\ 03186 c->put_h264_chroma_pixels_tab[1] = FUNCC(put_h264_chroma_mc4 , depth);\ 03187 c->put_h264_chroma_pixels_tab[2] = FUNCC(put_h264_chroma_mc2 , depth);\ 03188 c->avg_h264_chroma_pixels_tab[0] = FUNCC(avg_h264_chroma_mc8 , depth);\ 03189 c->avg_h264_chroma_pixels_tab[1] = FUNCC(avg_h264_chroma_mc4 , depth);\ 03190 c->avg_h264_chroma_pixels_tab[2] = FUNCC(avg_h264_chroma_mc2 , depth);\ 03191 \ 03192 dspfunc1(put , 0, 16, depth);\ 03193 dspfunc1(put , 1, 8, depth);\ 03194 dspfunc1(put , 2, 4, depth);\ 03195 dspfunc1(put , 3, 2, depth);\ 03196 dspfunc1(put_no_rnd, 0, 16, depth);\ 03197 dspfunc1(put_no_rnd, 1, 8, depth);\ 03198 dspfunc1(avg , 0, 16, depth);\ 03199 dspfunc1(avg , 1, 8, depth);\ 03200 dspfunc1(avg , 2, 4, depth);\ 03201 dspfunc1(avg , 3, 2, depth);\ 03202 dspfunc1(avg_no_rnd, 0, 16, depth);\ 03203 dspfunc1(avg_no_rnd, 1, 8, depth);\ 03204 \ 03205 dspfunc2(put_h264_qpel, 0, 16, depth);\ 03206 dspfunc2(put_h264_qpel, 1, 8, depth);\ 03207 dspfunc2(put_h264_qpel, 2, 4, depth);\ 03208 dspfunc2(put_h264_qpel, 3, 2, depth);\ 03209 dspfunc2(avg_h264_qpel, 0, 16, depth);\ 03210 dspfunc2(avg_h264_qpel, 1, 8, depth);\ 03211 dspfunc2(avg_h264_qpel, 2, 4, depth); 03212 03213 if (avctx->codec_id != CODEC_ID_H264 || avctx->bits_per_raw_sample == 8) { 03214 BIT_DEPTH_FUNCS(8) 03215 } else { 03216 switch (avctx->bits_per_raw_sample) { 03217 case 9: 03218 BIT_DEPTH_FUNCS(9) 03219 break; 03220 case 10: 03221 BIT_DEPTH_FUNCS(10) 03222 break; 03223 default: 03224 av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", avctx->bits_per_raw_sample); 03225 BIT_DEPTH_FUNCS(8) 03226 break; 03227 } 03228 } 03229 03230 03231 if (HAVE_MMX) dsputil_init_mmx (c, avctx); 03232 if (ARCH_ARM) dsputil_init_arm (c, avctx); 03233 if (CONFIG_MLIB) dsputil_init_mlib (c, avctx); 03234 if (HAVE_VIS) dsputil_init_vis (c, avctx); 03235 if (ARCH_ALPHA) dsputil_init_alpha (c, avctx); 03236 if (ARCH_PPC) dsputil_init_ppc (c, avctx); 03237 if (HAVE_MMI) dsputil_init_mmi (c, avctx); 03238 if (ARCH_SH4) dsputil_init_sh4 (c, avctx); 03239 if (ARCH_BFIN) dsputil_init_bfin (c, avctx); 03240 03241 for(i=0; i<64; i++){ 03242 if(!c->put_2tap_qpel_pixels_tab[0][i]) 03243 c->put_2tap_qpel_pixels_tab[0][i]= c->put_h264_qpel_pixels_tab[0][i]; 03244 if(!c->avg_2tap_qpel_pixels_tab[0][i]) 03245 c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i]; 03246 } 03247 03248 c->put_rv30_tpel_pixels_tab[0][0] = c->put_h264_qpel_pixels_tab[0][0]; 03249 c->put_rv30_tpel_pixels_tab[1][0] = c->put_h264_qpel_pixels_tab[1][0]; 03250 c->avg_rv30_tpel_pixels_tab[0][0] = c->avg_h264_qpel_pixels_tab[0][0]; 03251 c->avg_rv30_tpel_pixels_tab[1][0] = c->avg_h264_qpel_pixels_tab[1][0]; 03252 03253 c->put_rv40_qpel_pixels_tab[0][0] = c->put_h264_qpel_pixels_tab[0][0]; 03254 c->put_rv40_qpel_pixels_tab[1][0] = c->put_h264_qpel_pixels_tab[1][0]; 03255 c->avg_rv40_qpel_pixels_tab[0][0] = c->avg_h264_qpel_pixels_tab[0][0]; 03256 c->avg_rv40_qpel_pixels_tab[1][0] = c->avg_h264_qpel_pixels_tab[1][0]; 03257 03258 switch(c->idct_permutation_type){ 03259 case FF_NO_IDCT_PERM: 03260 for(i=0; i<64; i++) 03261 c->idct_permutation[i]= i; 03262 break; 03263 case FF_LIBMPEG2_IDCT_PERM: 03264 for(i=0; i<64; i++) 03265 c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); 03266 break; 03267 case FF_SIMPLE_IDCT_PERM: 03268 for(i=0; i<64; i++) 03269 c->idct_permutation[i]= simple_mmx_permutation[i]; 03270 break; 03271 case FF_TRANSPOSE_IDCT_PERM: 03272 for(i=0; i<64; i++) 03273 c->idct_permutation[i]= ((i&7)<<3) | (i>>3); 03274 break; 03275 case FF_PARTTRANS_IDCT_PERM: 03276 for(i=0; i<64; i++) 03277 c->idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3); 03278 break; 03279 case FF_SSE2_IDCT_PERM: 03280 for(i=0; i<64; i++) 03281 c->idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7]; 03282 break; 03283 default: 03284 av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n"); 03285 } 03286 } 03287