Libav 0.7.1
|
00001 /* 00002 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder 00003 * Copyright (c) 2003-2011 Michael Niedermayer <michaelni@gmx.at> 00004 * 00005 * This file is part of Libav. 00006 * 00007 * Libav is free software; you can redistribute it and/or 00008 * modify it under the terms of the GNU Lesser General Public 00009 * License as published by the Free Software Foundation; either 00010 * version 2.1 of the License, or (at your option) any later version. 00011 * 00012 * Libav is distributed in the hope that it will be useful, 00013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00015 * Lesser General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU Lesser General Public 00018 * License along with Libav; if not, write to the Free Software 00019 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00020 */ 00021 00028 #include "mathops.h" 00029 #include "high_bit_depth.h" 00030 00031 static void FUNCC(pred4x4_vertical)(uint8_t *_src, const uint8_t *topright, int _stride){ 00032 pixel *src = (pixel*)_src; 00033 int stride = _stride/sizeof(pixel); 00034 const pixel4 a= AV_RN4PA(src-stride); 00035 00036 AV_WN4PA(src+0*stride, a); 00037 AV_WN4PA(src+1*stride, a); 00038 AV_WN4PA(src+2*stride, a); 00039 AV_WN4PA(src+3*stride, a); 00040 } 00041 00042 static void FUNCC(pred4x4_horizontal)(uint8_t *_src, const uint8_t *topright, int _stride){ 00043 pixel *src = (pixel*)_src; 00044 int stride = _stride/sizeof(pixel); 00045 AV_WN4PA(src+0*stride, PIXEL_SPLAT_X4(src[-1+0*stride])); 00046 AV_WN4PA(src+1*stride, PIXEL_SPLAT_X4(src[-1+1*stride])); 00047 AV_WN4PA(src+2*stride, PIXEL_SPLAT_X4(src[-1+2*stride])); 00048 AV_WN4PA(src+3*stride, PIXEL_SPLAT_X4(src[-1+3*stride])); 00049 } 00050 00051 static void FUNCC(pred4x4_dc)(uint8_t *_src, const uint8_t *topright, int _stride){ 00052 pixel *src = (pixel*)_src; 00053 int stride = _stride/sizeof(pixel); 00054 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] 00055 + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3; 00056 const pixel4 a = PIXEL_SPLAT_X4(dc); 00057 00058 AV_WN4PA(src+0*stride, a); 00059 AV_WN4PA(src+1*stride, a); 00060 AV_WN4PA(src+2*stride, a); 00061 AV_WN4PA(src+3*stride, a); 00062 } 00063 00064 static void FUNCC(pred4x4_left_dc)(uint8_t *_src, const uint8_t *topright, int _stride){ 00065 pixel *src = (pixel*)_src; 00066 int stride = _stride/sizeof(pixel); 00067 const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2; 00068 const pixel4 a = PIXEL_SPLAT_X4(dc); 00069 00070 AV_WN4PA(src+0*stride, a); 00071 AV_WN4PA(src+1*stride, a); 00072 AV_WN4PA(src+2*stride, a); 00073 AV_WN4PA(src+3*stride, a); 00074 } 00075 00076 static void FUNCC(pred4x4_top_dc)(uint8_t *_src, const uint8_t *topright, int _stride){ 00077 pixel *src = (pixel*)_src; 00078 int stride = _stride/sizeof(pixel); 00079 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2; 00080 const pixel4 a = PIXEL_SPLAT_X4(dc); 00081 00082 AV_WN4PA(src+0*stride, a); 00083 AV_WN4PA(src+1*stride, a); 00084 AV_WN4PA(src+2*stride, a); 00085 AV_WN4PA(src+3*stride, a); 00086 } 00087 00088 static void FUNCC(pred4x4_128_dc)(uint8_t *_src, const uint8_t *topright, int _stride){ 00089 pixel *src = (pixel*)_src; 00090 int stride = _stride/sizeof(pixel); 00091 const pixel4 a = PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1)); 00092 00093 AV_WN4PA(src+0*stride, a); 00094 AV_WN4PA(src+1*stride, a); 00095 AV_WN4PA(src+2*stride, a); 00096 AV_WN4PA(src+3*stride, a); 00097 } 00098 00099 static void FUNCC(pred4x4_127_dc)(uint8_t *_src, const uint8_t *topright, int _stride){ 00100 pixel *src = (pixel*)_src; 00101 int stride = _stride/sizeof(pixel); 00102 const pixel4 a = PIXEL_SPLAT_X4((1<<(BIT_DEPTH-1))-1); 00103 00104 AV_WN4PA(src+0*stride, a); 00105 AV_WN4PA(src+1*stride, a); 00106 AV_WN4PA(src+2*stride, a); 00107 AV_WN4PA(src+3*stride, a); 00108 } 00109 00110 static void FUNCC(pred4x4_129_dc)(uint8_t *_src, const uint8_t *topright, int _stride){ 00111 pixel *src = (pixel*)_src; 00112 int stride = _stride/sizeof(pixel); 00113 const pixel4 a = PIXEL_SPLAT_X4((1<<(BIT_DEPTH-1))+1); 00114 00115 AV_WN4PA(src+0*stride, a); 00116 AV_WN4PA(src+1*stride, a); 00117 AV_WN4PA(src+2*stride, a); 00118 AV_WN4PA(src+3*stride, a); 00119 } 00120 00121 00122 #define LOAD_TOP_RIGHT_EDGE\ 00123 const unsigned av_unused t4 = topright[0];\ 00124 const unsigned av_unused t5 = topright[1];\ 00125 const unsigned av_unused t6 = topright[2];\ 00126 const unsigned av_unused t7 = topright[3];\ 00127 00128 #define LOAD_DOWN_LEFT_EDGE\ 00129 const unsigned av_unused l4 = src[-1+4*stride];\ 00130 const unsigned av_unused l5 = src[-1+5*stride];\ 00131 const unsigned av_unused l6 = src[-1+6*stride];\ 00132 const unsigned av_unused l7 = src[-1+7*stride];\ 00133 00134 #define LOAD_LEFT_EDGE\ 00135 const unsigned av_unused l0 = src[-1+0*stride];\ 00136 const unsigned av_unused l1 = src[-1+1*stride];\ 00137 const unsigned av_unused l2 = src[-1+2*stride];\ 00138 const unsigned av_unused l3 = src[-1+3*stride];\ 00139 00140 #define LOAD_TOP_EDGE\ 00141 const unsigned av_unused t0 = src[ 0-1*stride];\ 00142 const unsigned av_unused t1 = src[ 1-1*stride];\ 00143 const unsigned av_unused t2 = src[ 2-1*stride];\ 00144 const unsigned av_unused t3 = src[ 3-1*stride];\ 00145 00146 static void FUNCC(pred4x4_down_right)(uint8_t *_src, const uint8_t *topright, int _stride){ 00147 pixel *src = (pixel*)_src; 00148 int stride = _stride/sizeof(pixel); 00149 const int lt= src[-1-1*stride]; 00150 LOAD_TOP_EDGE 00151 LOAD_LEFT_EDGE 00152 00153 src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2; 00154 src[0+2*stride]= 00155 src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2; 00156 src[0+1*stride]= 00157 src[1+2*stride]= 00158 src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2; 00159 src[0+0*stride]= 00160 src[1+1*stride]= 00161 src[2+2*stride]= 00162 src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2; 00163 src[1+0*stride]= 00164 src[2+1*stride]= 00165 src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2; 00166 src[2+0*stride]= 00167 src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2; 00168 src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2; 00169 } 00170 00171 static void FUNCC(pred4x4_down_left)(uint8_t *_src, const uint8_t *_topright, int _stride){ 00172 pixel *src = (pixel*)_src; 00173 const pixel *topright = (const pixel*)_topright; 00174 int stride = _stride/sizeof(pixel); 00175 LOAD_TOP_EDGE 00176 LOAD_TOP_RIGHT_EDGE 00177 // LOAD_LEFT_EDGE 00178 00179 src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2; 00180 src[1+0*stride]= 00181 src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2; 00182 src[2+0*stride]= 00183 src[1+1*stride]= 00184 src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2; 00185 src[3+0*stride]= 00186 src[2+1*stride]= 00187 src[1+2*stride]= 00188 src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2; 00189 src[3+1*stride]= 00190 src[2+2*stride]= 00191 src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2; 00192 src[3+2*stride]= 00193 src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2; 00194 src[3+3*stride]=(t6 + 3*t7 + 2)>>2; 00195 } 00196 00197 static void FUNCC(pred4x4_vertical_right)(uint8_t *_src, const uint8_t *topright, int _stride){ 00198 pixel *src = (pixel*)_src; 00199 int stride = _stride/sizeof(pixel); 00200 const int lt= src[-1-1*stride]; 00201 LOAD_TOP_EDGE 00202 LOAD_LEFT_EDGE 00203 00204 src[0+0*stride]= 00205 src[1+2*stride]=(lt + t0 + 1)>>1; 00206 src[1+0*stride]= 00207 src[2+2*stride]=(t0 + t1 + 1)>>1; 00208 src[2+0*stride]= 00209 src[3+2*stride]=(t1 + t2 + 1)>>1; 00210 src[3+0*stride]=(t2 + t3 + 1)>>1; 00211 src[0+1*stride]= 00212 src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2; 00213 src[1+1*stride]= 00214 src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2; 00215 src[2+1*stride]= 00216 src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2; 00217 src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2; 00218 src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2; 00219 src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2; 00220 } 00221 00222 static void FUNCC(pred4x4_vertical_left)(uint8_t *_src, const uint8_t *_topright, int _stride){ 00223 pixel *src = (pixel*)_src; 00224 const pixel *topright = (const pixel*)_topright; 00225 int stride = _stride/sizeof(pixel); 00226 LOAD_TOP_EDGE 00227 LOAD_TOP_RIGHT_EDGE 00228 00229 src[0+0*stride]=(t0 + t1 + 1)>>1; 00230 src[1+0*stride]= 00231 src[0+2*stride]=(t1 + t2 + 1)>>1; 00232 src[2+0*stride]= 00233 src[1+2*stride]=(t2 + t3 + 1)>>1; 00234 src[3+0*stride]= 00235 src[2+2*stride]=(t3 + t4+ 1)>>1; 00236 src[3+2*stride]=(t4 + t5+ 1)>>1; 00237 src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2; 00238 src[1+1*stride]= 00239 src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2; 00240 src[2+1*stride]= 00241 src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2; 00242 src[3+1*stride]= 00243 src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2; 00244 src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2; 00245 } 00246 00247 static void FUNCC(pred4x4_horizontal_up)(uint8_t *_src, const uint8_t *topright, int _stride){ 00248 pixel *src = (pixel*)_src; 00249 int stride = _stride/sizeof(pixel); 00250 LOAD_LEFT_EDGE 00251 00252 src[0+0*stride]=(l0 + l1 + 1)>>1; 00253 src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2; 00254 src[2+0*stride]= 00255 src[0+1*stride]=(l1 + l2 + 1)>>1; 00256 src[3+0*stride]= 00257 src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2; 00258 src[2+1*stride]= 00259 src[0+2*stride]=(l2 + l3 + 1)>>1; 00260 src[3+1*stride]= 00261 src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2; 00262 src[3+2*stride]= 00263 src[1+3*stride]= 00264 src[0+3*stride]= 00265 src[2+2*stride]= 00266 src[2+3*stride]= 00267 src[3+3*stride]=l3; 00268 } 00269 00270 static void FUNCC(pred4x4_horizontal_down)(uint8_t *_src, const uint8_t *topright, int _stride){ 00271 pixel *src = (pixel*)_src; 00272 int stride = _stride/sizeof(pixel); 00273 const int lt= src[-1-1*stride]; 00274 LOAD_TOP_EDGE 00275 LOAD_LEFT_EDGE 00276 00277 src[0+0*stride]= 00278 src[2+1*stride]=(lt + l0 + 1)>>1; 00279 src[1+0*stride]= 00280 src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2; 00281 src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2; 00282 src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2; 00283 src[0+1*stride]= 00284 src[2+2*stride]=(l0 + l1 + 1)>>1; 00285 src[1+1*stride]= 00286 src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2; 00287 src[0+2*stride]= 00288 src[2+3*stride]=(l1 + l2+ 1)>>1; 00289 src[1+2*stride]= 00290 src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2; 00291 src[0+3*stride]=(l2 + l3 + 1)>>1; 00292 src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2; 00293 } 00294 00295 static void FUNCC(pred16x16_vertical)(uint8_t *_src, int _stride){ 00296 int i; 00297 pixel *src = (pixel*)_src; 00298 int stride = _stride/sizeof(pixel); 00299 const pixel4 a = AV_RN4PA(((pixel4*)(src-stride))+0); 00300 const pixel4 b = AV_RN4PA(((pixel4*)(src-stride))+1); 00301 const pixel4 c = AV_RN4PA(((pixel4*)(src-stride))+2); 00302 const pixel4 d = AV_RN4PA(((pixel4*)(src-stride))+3); 00303 00304 for(i=0; i<16; i++){ 00305 AV_WN4PA(((pixel4*)(src+i*stride))+0, a); 00306 AV_WN4PA(((pixel4*)(src+i*stride))+1, b); 00307 AV_WN4PA(((pixel4*)(src+i*stride))+2, c); 00308 AV_WN4PA(((pixel4*)(src+i*stride))+3, d); 00309 } 00310 } 00311 00312 static void FUNCC(pred16x16_horizontal)(uint8_t *_src, int stride){ 00313 int i; 00314 pixel *src = (pixel*)_src; 00315 stride /= sizeof(pixel); 00316 00317 for(i=0; i<16; i++){ 00318 const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]); 00319 00320 AV_WN4PA(((pixel4*)(src+i*stride))+0, a); 00321 AV_WN4PA(((pixel4*)(src+i*stride))+1, a); 00322 AV_WN4PA(((pixel4*)(src+i*stride))+2, a); 00323 AV_WN4PA(((pixel4*)(src+i*stride))+3, a); 00324 } 00325 } 00326 00327 #define PREDICT_16x16_DC(v)\ 00328 for(i=0; i<16; i++){\ 00329 AV_WN4PA(src+ 0, v);\ 00330 AV_WN4PA(src+ 4, v);\ 00331 AV_WN4PA(src+ 8, v);\ 00332 AV_WN4PA(src+12, v);\ 00333 src += stride;\ 00334 } 00335 00336 static void FUNCC(pred16x16_dc)(uint8_t *_src, int stride){ 00337 int i, dc=0; 00338 pixel *src = (pixel*)_src; 00339 pixel4 dcsplat; 00340 stride /= sizeof(pixel); 00341 00342 for(i=0;i<16; i++){ 00343 dc+= src[-1+i*stride]; 00344 } 00345 00346 for(i=0;i<16; i++){ 00347 dc+= src[i-stride]; 00348 } 00349 00350 dcsplat = PIXEL_SPLAT_X4((dc+16)>>5); 00351 PREDICT_16x16_DC(dcsplat); 00352 } 00353 00354 static void FUNCC(pred16x16_left_dc)(uint8_t *_src, int stride){ 00355 int i, dc=0; 00356 pixel *src = (pixel*)_src; 00357 pixel4 dcsplat; 00358 stride /= sizeof(pixel); 00359 00360 for(i=0;i<16; i++){ 00361 dc+= src[-1+i*stride]; 00362 } 00363 00364 dcsplat = PIXEL_SPLAT_X4((dc+8)>>4); 00365 PREDICT_16x16_DC(dcsplat); 00366 } 00367 00368 static void FUNCC(pred16x16_top_dc)(uint8_t *_src, int stride){ 00369 int i, dc=0; 00370 pixel *src = (pixel*)_src; 00371 pixel4 dcsplat; 00372 stride /= sizeof(pixel); 00373 00374 for(i=0;i<16; i++){ 00375 dc+= src[i-stride]; 00376 } 00377 00378 dcsplat = PIXEL_SPLAT_X4((dc+8)>>4); 00379 PREDICT_16x16_DC(dcsplat); 00380 } 00381 00382 #define PRED16x16_X(n, v) \ 00383 static void FUNCC(pred16x16_##n##_dc)(uint8_t *_src, int stride){\ 00384 int i;\ 00385 pixel *src = (pixel*)_src;\ 00386 stride /= sizeof(pixel);\ 00387 PREDICT_16x16_DC(PIXEL_SPLAT_X4(v));\ 00388 } 00389 00390 PRED16x16_X(127, (1<<(BIT_DEPTH-1))-1); 00391 PRED16x16_X(128, (1<<(BIT_DEPTH-1))+0); 00392 PRED16x16_X(129, (1<<(BIT_DEPTH-1))+1); 00393 00394 static inline void FUNCC(pred16x16_plane_compat)(uint8_t *_src, int _stride, const int svq3, const int rv40){ 00395 int i, j, k; 00396 int a; 00397 INIT_CLIP 00398 pixel *src = (pixel*)_src; 00399 int stride = _stride/sizeof(pixel); 00400 const pixel * const src0 = src +7-stride; 00401 const pixel * src1 = src +8*stride-1; 00402 const pixel * src2 = src1-2*stride; // == src+6*stride-1; 00403 int H = src0[1] - src0[-1]; 00404 int V = src1[0] - src2[ 0]; 00405 for(k=2; k<=8; ++k) { 00406 src1 += stride; src2 -= stride; 00407 H += k*(src0[k] - src0[-k]); 00408 V += k*(src1[0] - src2[ 0]); 00409 } 00410 if(svq3){ 00411 H = ( 5*(H/4) ) / 16; 00412 V = ( 5*(V/4) ) / 16; 00413 00414 /* required for 100% accuracy */ 00415 i = H; H = V; V = i; 00416 }else if(rv40){ 00417 H = ( H + (H>>2) ) >> 4; 00418 V = ( V + (V>>2) ) >> 4; 00419 }else{ 00420 H = ( 5*H+32 ) >> 6; 00421 V = ( 5*V+32 ) >> 6; 00422 } 00423 00424 a = 16*(src1[0] + src2[16] + 1) - 7*(V+H); 00425 for(j=16; j>0; --j) { 00426 int b = a; 00427 a += V; 00428 for(i=-16; i<0; i+=4) { 00429 src[16+i] = CLIP((b ) >> 5); 00430 src[17+i] = CLIP((b+ H) >> 5); 00431 src[18+i] = CLIP((b+2*H) >> 5); 00432 src[19+i] = CLIP((b+3*H) >> 5); 00433 b += 4*H; 00434 } 00435 src += stride; 00436 } 00437 } 00438 00439 static void FUNCC(pred16x16_plane)(uint8_t *src, int stride){ 00440 FUNCC(pred16x16_plane_compat)(src, stride, 0, 0); 00441 } 00442 00443 static void FUNCC(pred8x8_vertical)(uint8_t *_src, int _stride){ 00444 int i; 00445 pixel *src = (pixel*)_src; 00446 int stride = _stride/sizeof(pixel); 00447 const pixel4 a= AV_RN4PA(((pixel4*)(src-stride))+0); 00448 const pixel4 b= AV_RN4PA(((pixel4*)(src-stride))+1); 00449 00450 for(i=0; i<8; i++){ 00451 AV_WN4PA(((pixel4*)(src+i*stride))+0, a); 00452 AV_WN4PA(((pixel4*)(src+i*stride))+1, b); 00453 } 00454 } 00455 00456 static void FUNCC(pred8x8_horizontal)(uint8_t *_src, int stride){ 00457 int i; 00458 pixel *src = (pixel*)_src; 00459 stride /= sizeof(pixel); 00460 00461 for(i=0; i<8; i++){ 00462 const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]); 00463 AV_WN4PA(((pixel4*)(src+i*stride))+0, a); 00464 AV_WN4PA(((pixel4*)(src+i*stride))+1, a); 00465 } 00466 } 00467 00468 #define PRED8x8_X(n, v)\ 00469 static void FUNCC(pred8x8_##n##_dc)(uint8_t *_src, int stride){\ 00470 int i;\ 00471 const pixel4 a = PIXEL_SPLAT_X4(v);\ 00472 pixel *src = (pixel*)_src;\ 00473 stride /= sizeof(pixel);\ 00474 for(i=0; i<8; i++){\ 00475 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);\ 00476 AV_WN4PA(((pixel4*)(src+i*stride))+1, a);\ 00477 }\ 00478 } 00479 00480 PRED8x8_X(127, (1<<(BIT_DEPTH-1))-1); 00481 PRED8x8_X(128, (1<<(BIT_DEPTH-1))+0); 00482 PRED8x8_X(129, (1<<(BIT_DEPTH-1))+1); 00483 00484 static void FUNCC(pred8x8_left_dc)(uint8_t *_src, int stride){ 00485 int i; 00486 int dc0, dc2; 00487 pixel4 dc0splat, dc2splat; 00488 pixel *src = (pixel*)_src; 00489 stride /= sizeof(pixel); 00490 00491 dc0=dc2=0; 00492 for(i=0;i<4; i++){ 00493 dc0+= src[-1+i*stride]; 00494 dc2+= src[-1+(i+4)*stride]; 00495 } 00496 dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2); 00497 dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2); 00498 00499 for(i=0; i<4; i++){ 00500 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat); 00501 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc0splat); 00502 } 00503 for(i=4; i<8; i++){ 00504 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat); 00505 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc2splat); 00506 } 00507 } 00508 00509 static void FUNCC(pred8x8_top_dc)(uint8_t *_src, int stride){ 00510 int i; 00511 int dc0, dc1; 00512 pixel4 dc0splat, dc1splat; 00513 pixel *src = (pixel*)_src; 00514 stride /= sizeof(pixel); 00515 00516 dc0=dc1=0; 00517 for(i=0;i<4; i++){ 00518 dc0+= src[i-stride]; 00519 dc1+= src[4+i-stride]; 00520 } 00521 dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2); 00522 dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2); 00523 00524 for(i=0; i<4; i++){ 00525 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat); 00526 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat); 00527 } 00528 for(i=4; i<8; i++){ 00529 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat); 00530 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat); 00531 } 00532 } 00533 00534 static void FUNCC(pred8x8_dc)(uint8_t *_src, int stride){ 00535 int i; 00536 int dc0, dc1, dc2; 00537 pixel4 dc0splat, dc1splat, dc2splat, dc3splat; 00538 pixel *src = (pixel*)_src; 00539 stride /= sizeof(pixel); 00540 00541 dc0=dc1=dc2=0; 00542 for(i=0;i<4; i++){ 00543 dc0+= src[-1+i*stride] + src[i-stride]; 00544 dc1+= src[4+i-stride]; 00545 dc2+= src[-1+(i+4)*stride]; 00546 } 00547 dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3); 00548 dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2); 00549 dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2); 00550 dc3splat = PIXEL_SPLAT_X4((dc1 + dc2 + 4)>>3); 00551 00552 for(i=0; i<4; i++){ 00553 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat); 00554 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat); 00555 } 00556 for(i=4; i<8; i++){ 00557 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat); 00558 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc3splat); 00559 } 00560 } 00561 00562 //the following 4 function should not be optimized! 00563 static void FUNC(pred8x8_mad_cow_dc_l0t)(uint8_t *src, int stride){ 00564 FUNCC(pred8x8_top_dc)(src, stride); 00565 FUNCC(pred4x4_dc)(src, NULL, stride); 00566 } 00567 00568 static void FUNC(pred8x8_mad_cow_dc_0lt)(uint8_t *src, int stride){ 00569 FUNCC(pred8x8_dc)(src, stride); 00570 FUNCC(pred4x4_top_dc)(src, NULL, stride); 00571 } 00572 00573 static void FUNC(pred8x8_mad_cow_dc_l00)(uint8_t *src, int stride){ 00574 FUNCC(pred8x8_left_dc)(src, stride); 00575 FUNCC(pred4x4_128_dc)(src + 4*stride , NULL, stride); 00576 FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride); 00577 } 00578 00579 static void FUNC(pred8x8_mad_cow_dc_0l0)(uint8_t *src, int stride){ 00580 FUNCC(pred8x8_left_dc)(src, stride); 00581 FUNCC(pred4x4_128_dc)(src , NULL, stride); 00582 FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride); 00583 } 00584 00585 static void FUNCC(pred8x8_plane)(uint8_t *_src, int _stride){ 00586 int j, k; 00587 int a; 00588 INIT_CLIP 00589 pixel *src = (pixel*)_src; 00590 int stride = _stride/sizeof(pixel); 00591 const pixel * const src0 = src +3-stride; 00592 const pixel * src1 = src +4*stride-1; 00593 const pixel * src2 = src1-2*stride; // == src+2*stride-1; 00594 int H = src0[1] - src0[-1]; 00595 int V = src1[0] - src2[ 0]; 00596 for(k=2; k<=4; ++k) { 00597 src1 += stride; src2 -= stride; 00598 H += k*(src0[k] - src0[-k]); 00599 V += k*(src1[0] - src2[ 0]); 00600 } 00601 H = ( 17*H+16 ) >> 5; 00602 V = ( 17*V+16 ) >> 5; 00603 00604 a = 16*(src1[0] + src2[8]+1) - 3*(V+H); 00605 for(j=8; j>0; --j) { 00606 int b = a; 00607 a += V; 00608 src[0] = CLIP((b ) >> 5); 00609 src[1] = CLIP((b+ H) >> 5); 00610 src[2] = CLIP((b+2*H) >> 5); 00611 src[3] = CLIP((b+3*H) >> 5); 00612 src[4] = CLIP((b+4*H) >> 5); 00613 src[5] = CLIP((b+5*H) >> 5); 00614 src[6] = CLIP((b+6*H) >> 5); 00615 src[7] = CLIP((b+7*H) >> 5); 00616 src += stride; 00617 } 00618 } 00619 00620 #define SRC(x,y) src[(x)+(y)*stride] 00621 #define PL(y) \ 00622 const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2; 00623 #define PREDICT_8x8_LOAD_LEFT \ 00624 const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \ 00625 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \ 00626 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \ 00627 const int l7 av_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2 00628 00629 #define PT(x) \ 00630 const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2; 00631 #define PREDICT_8x8_LOAD_TOP \ 00632 const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \ 00633 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \ 00634 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \ 00635 const int t7 av_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \ 00636 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2 00637 00638 #define PTR(x) \ 00639 t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2; 00640 #define PREDICT_8x8_LOAD_TOPRIGHT \ 00641 int t8, t9, t10, t11, t12, t13, t14, t15; \ 00642 if(has_topright) { \ 00643 PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \ 00644 t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \ 00645 } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1); 00646 00647 #define PREDICT_8x8_LOAD_TOPLEFT \ 00648 const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2 00649 00650 #define PREDICT_8x8_DC(v) \ 00651 int y; \ 00652 for( y = 0; y < 8; y++ ) { \ 00653 AV_WN4PA(((pixel4*)src)+0, v); \ 00654 AV_WN4PA(((pixel4*)src)+1, v); \ 00655 src += stride; \ 00656 } 00657 00658 static void FUNCC(pred8x8l_128_dc)(uint8_t *_src, int has_topleft, int has_topright, int _stride) 00659 { 00660 pixel *src = (pixel*)_src; 00661 int stride = _stride/sizeof(pixel); 00662 00663 PREDICT_8x8_DC(PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1))); 00664 } 00665 static void FUNCC(pred8x8l_left_dc)(uint8_t *_src, int has_topleft, int has_topright, int _stride) 00666 { 00667 pixel *src = (pixel*)_src; 00668 int stride = _stride/sizeof(pixel); 00669 00670 PREDICT_8x8_LOAD_LEFT; 00671 const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3); 00672 PREDICT_8x8_DC(dc); 00673 } 00674 static void FUNCC(pred8x8l_top_dc)(uint8_t *_src, int has_topleft, int has_topright, int _stride) 00675 { 00676 pixel *src = (pixel*)_src; 00677 int stride = _stride/sizeof(pixel); 00678 00679 PREDICT_8x8_LOAD_TOP; 00680 const pixel4 dc = PIXEL_SPLAT_X4((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3); 00681 PREDICT_8x8_DC(dc); 00682 } 00683 static void FUNCC(pred8x8l_dc)(uint8_t *_src, int has_topleft, int has_topright, int _stride) 00684 { 00685 pixel *src = (pixel*)_src; 00686 int stride = _stride/sizeof(pixel); 00687 00688 PREDICT_8x8_LOAD_LEFT; 00689 PREDICT_8x8_LOAD_TOP; 00690 const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7 00691 +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4); 00692 PREDICT_8x8_DC(dc); 00693 } 00694 static void FUNCC(pred8x8l_horizontal)(uint8_t *_src, int has_topleft, int has_topright, int _stride) 00695 { 00696 pixel *src = (pixel*)_src; 00697 int stride = _stride/sizeof(pixel); 00698 pixel4 a; 00699 00700 PREDICT_8x8_LOAD_LEFT; 00701 #define ROW(y) a = PIXEL_SPLAT_X4(l##y); \ 00702 AV_WN4PA(src+y*stride, a); \ 00703 AV_WN4PA(src+y*stride+4, a); 00704 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7); 00705 #undef ROW 00706 } 00707 static void FUNCC(pred8x8l_vertical)(uint8_t *_src, int has_topleft, int has_topright, int _stride) 00708 { 00709 int y; 00710 pixel *src = (pixel*)_src; 00711 int stride = _stride/sizeof(pixel); 00712 pixel4 a, b; 00713 00714 PREDICT_8x8_LOAD_TOP; 00715 src[0] = t0; 00716 src[1] = t1; 00717 src[2] = t2; 00718 src[3] = t3; 00719 src[4] = t4; 00720 src[5] = t5; 00721 src[6] = t6; 00722 src[7] = t7; 00723 a = AV_RN4PA(((pixel4*)src)+0); 00724 b = AV_RN4PA(((pixel4*)src)+1); 00725 for( y = 1; y < 8; y++ ) { 00726 AV_WN4PA(((pixel4*)(src+y*stride))+0, a); 00727 AV_WN4PA(((pixel4*)(src+y*stride))+1, b); 00728 } 00729 } 00730 static void FUNCC(pred8x8l_down_left)(uint8_t *_src, int has_topleft, int has_topright, int _stride) 00731 { 00732 pixel *src = (pixel*)_src; 00733 int stride = _stride/sizeof(pixel); 00734 PREDICT_8x8_LOAD_TOP; 00735 PREDICT_8x8_LOAD_TOPRIGHT; 00736 SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2; 00737 SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2; 00738 SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2; 00739 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2; 00740 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2; 00741 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2; 00742 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2; 00743 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2; 00744 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2; 00745 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2; 00746 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2; 00747 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2; 00748 SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2; 00749 SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2; 00750 SRC(7,7)= (t14 + 3*t15 + 2) >> 2; 00751 } 00752 static void FUNCC(pred8x8l_down_right)(uint8_t *_src, int has_topleft, int has_topright, int _stride) 00753 { 00754 pixel *src = (pixel*)_src; 00755 int stride = _stride/sizeof(pixel); 00756 PREDICT_8x8_LOAD_TOP; 00757 PREDICT_8x8_LOAD_LEFT; 00758 PREDICT_8x8_LOAD_TOPLEFT; 00759 SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2; 00760 SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2; 00761 SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2; 00762 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2; 00763 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2; 00764 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2; 00765 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2; 00766 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2; 00767 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2; 00768 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2; 00769 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2; 00770 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2; 00771 SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2; 00772 SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2; 00773 SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2; 00774 } 00775 static void FUNCC(pred8x8l_vertical_right)(uint8_t *_src, int has_topleft, int has_topright, int _stride) 00776 { 00777 pixel *src = (pixel*)_src; 00778 int stride = _stride/sizeof(pixel); 00779 PREDICT_8x8_LOAD_TOP; 00780 PREDICT_8x8_LOAD_LEFT; 00781 PREDICT_8x8_LOAD_TOPLEFT; 00782 SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2; 00783 SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2; 00784 SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2; 00785 SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2; 00786 SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2; 00787 SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2; 00788 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2; 00789 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1; 00790 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2; 00791 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1; 00792 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2; 00793 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1; 00794 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2; 00795 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1; 00796 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2; 00797 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1; 00798 SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2; 00799 SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1; 00800 SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2; 00801 SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1; 00802 SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2; 00803 SRC(7,0)= (t6 + t7 + 1) >> 1; 00804 } 00805 static void FUNCC(pred8x8l_horizontal_down)(uint8_t *_src, int has_topleft, int has_topright, int _stride) 00806 { 00807 pixel *src = (pixel*)_src; 00808 int stride = _stride/sizeof(pixel); 00809 PREDICT_8x8_LOAD_TOP; 00810 PREDICT_8x8_LOAD_LEFT; 00811 PREDICT_8x8_LOAD_TOPLEFT; 00812 SRC(0,7)= (l6 + l7 + 1) >> 1; 00813 SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2; 00814 SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1; 00815 SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2; 00816 SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1; 00817 SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2; 00818 SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1; 00819 SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2; 00820 SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1; 00821 SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2; 00822 SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1; 00823 SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2; 00824 SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1; 00825 SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2; 00826 SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1; 00827 SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2; 00828 SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2; 00829 SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2; 00830 SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2; 00831 SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2; 00832 SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2; 00833 SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2; 00834 } 00835 static void FUNCC(pred8x8l_vertical_left)(uint8_t *_src, int has_topleft, int has_topright, int _stride) 00836 { 00837 pixel *src = (pixel*)_src; 00838 int stride = _stride/sizeof(pixel); 00839 PREDICT_8x8_LOAD_TOP; 00840 PREDICT_8x8_LOAD_TOPRIGHT; 00841 SRC(0,0)= (t0 + t1 + 1) >> 1; 00842 SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2; 00843 SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1; 00844 SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2; 00845 SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1; 00846 SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2; 00847 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1; 00848 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2; 00849 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1; 00850 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2; 00851 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1; 00852 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2; 00853 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1; 00854 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2; 00855 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1; 00856 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2; 00857 SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1; 00858 SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2; 00859 SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1; 00860 SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2; 00861 SRC(7,6)= (t10 + t11 + 1) >> 1; 00862 SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2; 00863 } 00864 static void FUNCC(pred8x8l_horizontal_up)(uint8_t *_src, int has_topleft, int has_topright, int _stride) 00865 { 00866 pixel *src = (pixel*)_src; 00867 int stride = _stride/sizeof(pixel); 00868 PREDICT_8x8_LOAD_LEFT; 00869 SRC(0,0)= (l0 + l1 + 1) >> 1; 00870 SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2; 00871 SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1; 00872 SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2; 00873 SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1; 00874 SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2; 00875 SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1; 00876 SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2; 00877 SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1; 00878 SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2; 00879 SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1; 00880 SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2; 00881 SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1; 00882 SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2; 00883 SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)= 00884 SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)= 00885 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)= 00886 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7; 00887 } 00888 #undef PREDICT_8x8_LOAD_LEFT 00889 #undef PREDICT_8x8_LOAD_TOP 00890 #undef PREDICT_8x8_LOAD_TOPLEFT 00891 #undef PREDICT_8x8_LOAD_TOPRIGHT 00892 #undef PREDICT_8x8_DC 00893 #undef PTR 00894 #undef PT 00895 #undef PL 00896 #undef SRC 00897 00898 static void FUNCC(pred4x4_vertical_add)(uint8_t *_pix, const DCTELEM *_block, int stride){ 00899 int i; 00900 pixel *pix = (pixel*)_pix; 00901 const dctcoef *block = (const dctcoef*)_block; 00902 stride /= sizeof(pixel); 00903 pix -= stride; 00904 for(i=0; i<4; i++){ 00905 pixel v = pix[0]; 00906 pix[1*stride]= v += block[0]; 00907 pix[2*stride]= v += block[4]; 00908 pix[3*stride]= v += block[8]; 00909 pix[4*stride]= v + block[12]; 00910 pix++; 00911 block++; 00912 } 00913 } 00914 00915 static void FUNCC(pred4x4_horizontal_add)(uint8_t *_pix, const DCTELEM *_block, int stride){ 00916 int i; 00917 pixel *pix = (pixel*)_pix; 00918 const dctcoef *block = (const dctcoef*)_block; 00919 stride /= sizeof(pixel); 00920 for(i=0; i<4; i++){ 00921 pixel v = pix[-1]; 00922 pix[0]= v += block[0]; 00923 pix[1]= v += block[1]; 00924 pix[2]= v += block[2]; 00925 pix[3]= v + block[3]; 00926 pix+= stride; 00927 block+= 4; 00928 } 00929 } 00930 00931 static void FUNCC(pred8x8l_vertical_add)(uint8_t *_pix, const DCTELEM *_block, int stride){ 00932 int i; 00933 pixel *pix = (pixel*)_pix; 00934 const dctcoef *block = (const dctcoef*)_block; 00935 stride /= sizeof(pixel); 00936 pix -= stride; 00937 for(i=0; i<8; i++){ 00938 pixel v = pix[0]; 00939 pix[1*stride]= v += block[0]; 00940 pix[2*stride]= v += block[8]; 00941 pix[3*stride]= v += block[16]; 00942 pix[4*stride]= v += block[24]; 00943 pix[5*stride]= v += block[32]; 00944 pix[6*stride]= v += block[40]; 00945 pix[7*stride]= v += block[48]; 00946 pix[8*stride]= v + block[56]; 00947 pix++; 00948 block++; 00949 } 00950 } 00951 00952 static void FUNCC(pred8x8l_horizontal_add)(uint8_t *_pix, const DCTELEM *_block, int stride){ 00953 int i; 00954 pixel *pix = (pixel*)_pix; 00955 const dctcoef *block = (const dctcoef*)_block; 00956 stride /= sizeof(pixel); 00957 for(i=0; i<8; i++){ 00958 pixel v = pix[-1]; 00959 pix[0]= v += block[0]; 00960 pix[1]= v += block[1]; 00961 pix[2]= v += block[2]; 00962 pix[3]= v += block[3]; 00963 pix[4]= v += block[4]; 00964 pix[5]= v += block[5]; 00965 pix[6]= v += block[6]; 00966 pix[7]= v + block[7]; 00967 pix+= stride; 00968 block+= 8; 00969 } 00970 } 00971 00972 static void FUNCC(pred16x16_vertical_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){ 00973 int i; 00974 for(i=0; i<16; i++) 00975 FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); 00976 } 00977 00978 static void FUNCC(pred16x16_horizontal_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){ 00979 int i; 00980 for(i=0; i<16; i++) 00981 FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); 00982 } 00983 00984 static void FUNCC(pred8x8_vertical_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){ 00985 int i; 00986 for(i=0; i<4; i++) 00987 FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); 00988 } 00989 00990 static void FUNCC(pred8x8_horizontal_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){ 00991 int i; 00992 for(i=0; i<4; i++) 00993 FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); 00994 }