Libav 0.7.1
|
00001 /* 00002 * Copyright (C) 2006 Michael Niedermayer <michaelni@gmx.at> 00003 * 00004 * This file is part of Libav. 00005 * 00006 * Libav is free software; you can redistribute it and/or modify 00007 * it under the terms of the GNU General Public License as published by 00008 * the Free Software Foundation; either version 2 of the License, or 00009 * (at your option) any later version. 00010 * 00011 * Libav is distributed in the hope that it will be useful, 00012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 * GNU General Public License for more details. 00015 * 00016 * You should have received a copy of the GNU General Public License along 00017 * with Libav; if not, write to the Free Software Foundation, Inc., 00018 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 00019 */ 00020 00021 #ifdef COMPILE_TEMPLATE_SSE 00022 #define MM "%%xmm" 00023 #define MOV "movq" 00024 #define MOVQ "movdqa" 00025 #define MOVQU "movdqu" 00026 #define STEP 8 00027 #define LOAD(mem,dst) \ 00028 MOV" "mem", "dst" \n\t"\ 00029 "punpcklbw "MM"7, "dst" \n\t" 00030 #define PSRL1(reg) "psrldq $1, "reg" \n\t" 00031 #define PSRL2(reg) "psrldq $2, "reg" \n\t" 00032 #define PSHUF(src,dst) "movdqa "dst", "src" \n\t"\ 00033 "psrldq $2, "src" \n\t" 00034 #else 00035 #define MM "%%mm" 00036 #define MOV "movd" 00037 #define MOVQ "movq" 00038 #define MOVQU "movq" 00039 #define STEP 4 00040 #define LOAD(mem,dst) \ 00041 MOV" "mem", "dst" \n\t"\ 00042 "punpcklbw "MM"7, "dst" \n\t" 00043 #define PSRL1(reg) "psrlq $8, "reg" \n\t" 00044 #define PSRL2(reg) "psrlq $16, "reg" \n\t" 00045 #define PSHUF(src,dst) "pshufw $9, "dst", "src" \n\t" 00046 #endif 00047 00048 #ifdef COMPILE_TEMPLATE_SSSE3 00049 #define PABS(tmp,dst) \ 00050 "pabsw "dst", "dst" \n\t" 00051 #else 00052 #define PABS(tmp,dst) \ 00053 "pxor "tmp", "tmp" \n\t"\ 00054 "psubw "dst", "tmp" \n\t"\ 00055 "pmaxsw "tmp", "dst" \n\t" 00056 #endif 00057 00058 #define CHECK(pj,mj) \ 00059 MOVQU" "#pj"(%[cur],%[mrefs]), "MM"2 \n\t" /* cur[x-refs-1+j] */\ 00060 MOVQU" "#mj"(%[cur],%[prefs]), "MM"3 \n\t" /* cur[x+refs-1-j] */\ 00061 MOVQ" "MM"2, "MM"4 \n\t"\ 00062 MOVQ" "MM"2, "MM"5 \n\t"\ 00063 "pxor "MM"3, "MM"4 \n\t"\ 00064 "pavgb "MM"3, "MM"5 \n\t"\ 00065 "pand "MANGLE(pb_1)", "MM"4 \n\t"\ 00066 "psubusb "MM"4, "MM"5 \n\t"\ 00067 PSRL1(MM"5") \ 00068 "punpcklbw "MM"7, "MM"5 \n\t" /* (cur[x-refs+j] + cur[x+refs-j])>>1 */\ 00069 MOVQ" "MM"2, "MM"4 \n\t"\ 00070 "psubusb "MM"3, "MM"2 \n\t"\ 00071 "psubusb "MM"4, "MM"3 \n\t"\ 00072 "pmaxub "MM"3, "MM"2 \n\t"\ 00073 MOVQ" "MM"2, "MM"3 \n\t"\ 00074 MOVQ" "MM"2, "MM"4 \n\t" /* ABS(cur[x-refs-1+j] - cur[x+refs-1-j]) */\ 00075 PSRL1(MM"3") /* ABS(cur[x-refs +j] - cur[x+refs -j]) */\ 00076 PSRL2(MM"4") /* ABS(cur[x-refs+1+j] - cur[x+refs+1-j]) */\ 00077 "punpcklbw "MM"7, "MM"2 \n\t"\ 00078 "punpcklbw "MM"7, "MM"3 \n\t"\ 00079 "punpcklbw "MM"7, "MM"4 \n\t"\ 00080 "paddw "MM"3, "MM"2 \n\t"\ 00081 "paddw "MM"4, "MM"2 \n\t" /* score */ 00082 00083 #define CHECK1 \ 00084 MOVQ" "MM"0, "MM"3 \n\t"\ 00085 "pcmpgtw "MM"2, "MM"3 \n\t" /* if(score < spatial_score) */\ 00086 "pminsw "MM"2, "MM"0 \n\t" /* spatial_score= score; */\ 00087 MOVQ" "MM"3, "MM"6 \n\t"\ 00088 "pand "MM"3, "MM"5 \n\t"\ 00089 "pandn "MM"1, "MM"3 \n\t"\ 00090 "por "MM"5, "MM"3 \n\t"\ 00091 MOVQ" "MM"3, "MM"1 \n\t" /* spatial_pred= (cur[x-refs+j] + cur[x+refs-j])>>1; */ 00092 00093 #define CHECK2 /* pretend not to have checked dir=2 if dir=1 was bad.\ 00094 hurts both quality and speed, but matches the C version. */\ 00095 "paddw "MANGLE(pw_1)", "MM"6 \n\t"\ 00096 "psllw $14, "MM"6 \n\t"\ 00097 "paddsw "MM"6, "MM"2 \n\t"\ 00098 MOVQ" "MM"0, "MM"3 \n\t"\ 00099 "pcmpgtw "MM"2, "MM"3 \n\t"\ 00100 "pminsw "MM"2, "MM"0 \n\t"\ 00101 "pand "MM"3, "MM"5 \n\t"\ 00102 "pandn "MM"1, "MM"3 \n\t"\ 00103 "por "MM"5, "MM"3 \n\t"\ 00104 MOVQ" "MM"3, "MM"1 \n\t" 00105 00106 void RENAME(ff_yadif_filter_line)(uint8_t *dst, 00107 uint8_t *prev, uint8_t *cur, uint8_t *next, 00108 int w, int prefs, int mrefs, int parity, int mode) 00109 { 00110 DECLARE_ALIGNED(16, uint8_t, tmp0[16]); 00111 DECLARE_ALIGNED(16, uint8_t, tmp1[16]); 00112 DECLARE_ALIGNED(16, uint8_t, tmp2[16]); 00113 DECLARE_ALIGNED(16, uint8_t, tmp3[16]); 00114 int x; 00115 00116 #define FILTER\ 00117 for(x=0; x<w; x+=STEP){\ 00118 __asm__ volatile(\ 00119 "pxor "MM"7, "MM"7 \n\t"\ 00120 LOAD("(%[cur],%[mrefs])", MM"0") /* c = cur[x-refs] */\ 00121 LOAD("(%[cur],%[prefs])", MM"1") /* e = cur[x+refs] */\ 00122 LOAD("(%["prev2"])", MM"2") /* prev2[x] */\ 00123 LOAD("(%["next2"])", MM"3") /* next2[x] */\ 00124 MOVQ" "MM"3, "MM"4 \n\t"\ 00125 "paddw "MM"2, "MM"3 \n\t"\ 00126 "psraw $1, "MM"3 \n\t" /* d = (prev2[x] + next2[x])>>1 */\ 00127 MOVQ" "MM"0, %[tmp0] \n\t" /* c */\ 00128 MOVQ" "MM"3, %[tmp1] \n\t" /* d */\ 00129 MOVQ" "MM"1, %[tmp2] \n\t" /* e */\ 00130 "psubw "MM"4, "MM"2 \n\t"\ 00131 PABS( MM"4", MM"2") /* temporal_diff0 */\ 00132 LOAD("(%[prev],%[mrefs])", MM"3") /* prev[x-refs] */\ 00133 LOAD("(%[prev],%[prefs])", MM"4") /* prev[x+refs] */\ 00134 "psubw "MM"0, "MM"3 \n\t"\ 00135 "psubw "MM"1, "MM"4 \n\t"\ 00136 PABS( MM"5", MM"3")\ 00137 PABS( MM"5", MM"4")\ 00138 "paddw "MM"4, "MM"3 \n\t" /* temporal_diff1 */\ 00139 "psrlw $1, "MM"2 \n\t"\ 00140 "psrlw $1, "MM"3 \n\t"\ 00141 "pmaxsw "MM"3, "MM"2 \n\t"\ 00142 LOAD("(%[next],%[mrefs])", MM"3") /* next[x-refs] */\ 00143 LOAD("(%[next],%[prefs])", MM"4") /* next[x+refs] */\ 00144 "psubw "MM"0, "MM"3 \n\t"\ 00145 "psubw "MM"1, "MM"4 \n\t"\ 00146 PABS( MM"5", MM"3")\ 00147 PABS( MM"5", MM"4")\ 00148 "paddw "MM"4, "MM"3 \n\t" /* temporal_diff2 */\ 00149 "psrlw $1, "MM"3 \n\t"\ 00150 "pmaxsw "MM"3, "MM"2 \n\t"\ 00151 MOVQ" "MM"2, %[tmp3] \n\t" /* diff */\ 00152 \ 00153 "paddw "MM"0, "MM"1 \n\t"\ 00154 "paddw "MM"0, "MM"0 \n\t"\ 00155 "psubw "MM"1, "MM"0 \n\t"\ 00156 "psrlw $1, "MM"1 \n\t" /* spatial_pred */\ 00157 PABS( MM"2", MM"0") /* ABS(c-e) */\ 00158 \ 00159 MOVQU" -1(%[cur],%[mrefs]), "MM"2 \n\t" /* cur[x-refs-1] */\ 00160 MOVQU" -1(%[cur],%[prefs]), "MM"3 \n\t" /* cur[x+refs-1] */\ 00161 MOVQ" "MM"2, "MM"4 \n\t"\ 00162 "psubusb "MM"3, "MM"2 \n\t"\ 00163 "psubusb "MM"4, "MM"3 \n\t"\ 00164 "pmaxub "MM"3, "MM"2 \n\t"\ 00165 PSHUF(MM"3", MM"2") \ 00166 "punpcklbw "MM"7, "MM"2 \n\t" /* ABS(cur[x-refs-1] - cur[x+refs-1]) */\ 00167 "punpcklbw "MM"7, "MM"3 \n\t" /* ABS(cur[x-refs+1] - cur[x+refs+1]) */\ 00168 "paddw "MM"2, "MM"0 \n\t"\ 00169 "paddw "MM"3, "MM"0 \n\t"\ 00170 "psubw "MANGLE(pw_1)", "MM"0 \n\t" /* spatial_score */\ 00171 \ 00172 CHECK(-2,0)\ 00173 CHECK1\ 00174 CHECK(-3,1)\ 00175 CHECK2\ 00176 CHECK(0,-2)\ 00177 CHECK1\ 00178 CHECK(1,-3)\ 00179 CHECK2\ 00180 \ 00181 /* if(p->mode<2) ... */\ 00182 MOVQ" %[tmp3], "MM"6 \n\t" /* diff */\ 00183 "cmpl $2, %[mode] \n\t"\ 00184 "jge 1f \n\t"\ 00185 LOAD("(%["prev2"],%[mrefs],2)", MM"2") /* prev2[x-2*refs] */\ 00186 LOAD("(%["next2"],%[mrefs],2)", MM"4") /* next2[x-2*refs] */\ 00187 LOAD("(%["prev2"],%[prefs],2)", MM"3") /* prev2[x+2*refs] */\ 00188 LOAD("(%["next2"],%[prefs],2)", MM"5") /* next2[x+2*refs] */\ 00189 "paddw "MM"4, "MM"2 \n\t"\ 00190 "paddw "MM"5, "MM"3 \n\t"\ 00191 "psrlw $1, "MM"2 \n\t" /* b */\ 00192 "psrlw $1, "MM"3 \n\t" /* f */\ 00193 MOVQ" %[tmp0], "MM"4 \n\t" /* c */\ 00194 MOVQ" %[tmp1], "MM"5 \n\t" /* d */\ 00195 MOVQ" %[tmp2], "MM"7 \n\t" /* e */\ 00196 "psubw "MM"4, "MM"2 \n\t" /* b-c */\ 00197 "psubw "MM"7, "MM"3 \n\t" /* f-e */\ 00198 MOVQ" "MM"5, "MM"0 \n\t"\ 00199 "psubw "MM"4, "MM"5 \n\t" /* d-c */\ 00200 "psubw "MM"7, "MM"0 \n\t" /* d-e */\ 00201 MOVQ" "MM"2, "MM"4 \n\t"\ 00202 "pminsw "MM"3, "MM"2 \n\t"\ 00203 "pmaxsw "MM"4, "MM"3 \n\t"\ 00204 "pmaxsw "MM"5, "MM"2 \n\t"\ 00205 "pminsw "MM"5, "MM"3 \n\t"\ 00206 "pmaxsw "MM"0, "MM"2 \n\t" /* max */\ 00207 "pminsw "MM"0, "MM"3 \n\t" /* min */\ 00208 "pxor "MM"4, "MM"4 \n\t"\ 00209 "pmaxsw "MM"3, "MM"6 \n\t"\ 00210 "psubw "MM"2, "MM"4 \n\t" /* -max */\ 00211 "pmaxsw "MM"4, "MM"6 \n\t" /* diff= MAX3(diff, min, -max); */\ 00212 "1: \n\t"\ 00213 \ 00214 MOVQ" %[tmp1], "MM"2 \n\t" /* d */\ 00215 MOVQ" "MM"2, "MM"3 \n\t"\ 00216 "psubw "MM"6, "MM"2 \n\t" /* d-diff */\ 00217 "paddw "MM"6, "MM"3 \n\t" /* d+diff */\ 00218 "pmaxsw "MM"2, "MM"1 \n\t"\ 00219 "pminsw "MM"3, "MM"1 \n\t" /* d = clip(spatial_pred, d-diff, d+diff); */\ 00220 "packuswb "MM"1, "MM"1 \n\t"\ 00221 \ 00222 :[tmp0]"=m"(tmp0),\ 00223 [tmp1]"=m"(tmp1),\ 00224 [tmp2]"=m"(tmp2),\ 00225 [tmp3]"=m"(tmp3)\ 00226 :[prev] "r"(prev),\ 00227 [cur] "r"(cur),\ 00228 [next] "r"(next),\ 00229 [prefs]"r"((x86_reg)prefs),\ 00230 [mrefs]"r"((x86_reg)mrefs),\ 00231 [mode] "g"(mode)\ 00232 );\ 00233 __asm__ volatile(MOV" "MM"1, %0" :"=m"(*dst));\ 00234 dst += STEP;\ 00235 prev+= STEP;\ 00236 cur += STEP;\ 00237 next+= STEP;\ 00238 } 00239 00240 if (parity) { 00241 #define prev2 "prev" 00242 #define next2 "cur" 00243 FILTER 00244 #undef prev2 00245 #undef next2 00246 } else { 00247 #define prev2 "cur" 00248 #define next2 "next" 00249 FILTER 00250 #undef prev2 00251 #undef next2 00252 } 00253 } 00254 #undef STEP 00255 #undef MM 00256 #undef MOV 00257 #undef MOVQ 00258 #undef MOVQU 00259 #undef PSHUF 00260 #undef PSRL1 00261 #undef PSRL2 00262 #undef LOAD 00263 #undef PABS 00264 #undef CHECK 00265 #undef CHECK1 00266 #undef CHECK2 00267 #undef FILTER 00268