Libav
|
00001 /* 00002 * Copyright (c) 2008 Loren Merritt 00003 * 00004 * This file is part of FFmpeg. 00005 * 00006 * FFmpeg is free software; you can redistribute it and/or 00007 * modify it under the terms of the GNU Lesser General Public 00008 * License as published by the Free Software Foundation; either 00009 * version 2.1 of the License, or (at your option) any later version. 00010 * 00011 * FFmpeg is distributed in the hope that it will be useful, 00012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00014 * Lesser General Public License for more details. 00015 * 00016 * You should have received a copy of the GNU Lesser General Public 00017 * License along with FFmpeg; if not, write to the Free Software 00018 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00019 */ 00020 00027 static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y, int rnd) 00028 { 00029 if(y==0 && x==0) { 00030 /* no filter needed */ 00031 H264_CHROMA_MC8_MV0(dst, src, stride, h); 00032 return; 00033 } 00034 00035 assert(x<8 && y<8 && x>=0 && y>=0); 00036 00037 if(y==0 || x==0) 00038 { 00039 /* 1 dimensional filter only */ 00040 __asm__ volatile( 00041 "movd %0, %%xmm7 \n\t" 00042 "movq %1, %%xmm6 \n\t" 00043 "pshuflw $0, %%xmm7, %%xmm7 \n\t" 00044 "movlhps %%xmm6, %%xmm6 \n\t" 00045 "movlhps %%xmm7, %%xmm7 \n\t" 00046 :: "r"(255*(x+y)+8), "m"(*(rnd?&ff_pw_4:&ff_pw_3)) 00047 ); 00048 00049 if(x) { 00050 __asm__ volatile( 00051 "1: \n\t" 00052 "movq (%1), %%xmm0 \n\t" 00053 "movq 1(%1), %%xmm1 \n\t" 00054 "movq (%1,%3), %%xmm2 \n\t" 00055 "movq 1(%1,%3), %%xmm3 \n\t" 00056 "punpcklbw %%xmm1, %%xmm0 \n\t" 00057 "punpcklbw %%xmm3, %%xmm2 \n\t" 00058 "pmaddubsw %%xmm7, %%xmm0 \n\t" 00059 "pmaddubsw %%xmm7, %%xmm2 \n\t" 00060 AVG_OP("movq (%0), %%xmm4 \n\t") 00061 AVG_OP("movhps (%0,%3), %%xmm4 \n\t") 00062 "paddw %%xmm6, %%xmm0 \n\t" 00063 "paddw %%xmm6, %%xmm2 \n\t" 00064 "psrlw $3, %%xmm0 \n\t" 00065 "psrlw $3, %%xmm2 \n\t" 00066 "packuswb %%xmm2, %%xmm0 \n\t" 00067 AVG_OP("pavgb %%xmm4, %%xmm0 \n\t") 00068 "movq %%xmm0, (%0) \n\t" 00069 "movhps %%xmm0, (%0,%3) \n\t" 00070 "sub $2, %2 \n\t" 00071 "lea (%1,%3,2), %1 \n\t" 00072 "lea (%0,%3,2), %0 \n\t" 00073 "jg 1b \n\t" 00074 :"+r"(dst), "+r"(src), "+r"(h) 00075 :"r"((x86_reg)stride) 00076 ); 00077 } else { 00078 __asm__ volatile( 00079 "1: \n\t" 00080 "movq (%1), %%xmm0 \n\t" 00081 "movq (%1,%3), %%xmm1 \n\t" 00082 "movdqa %%xmm1, %%xmm2 \n\t" 00083 "movq (%1,%3,2), %%xmm3 \n\t" 00084 "punpcklbw %%xmm1, %%xmm0 \n\t" 00085 "punpcklbw %%xmm3, %%xmm2 \n\t" 00086 "pmaddubsw %%xmm7, %%xmm0 \n\t" 00087 "pmaddubsw %%xmm7, %%xmm2 \n\t" 00088 AVG_OP("movq (%0), %%xmm4 \n\t") 00089 AVG_OP("movhps (%0,%3), %%xmm4 \n\t") 00090 "paddw %%xmm6, %%xmm0 \n\t" 00091 "paddw %%xmm6, %%xmm2 \n\t" 00092 "psrlw $3, %%xmm0 \n\t" 00093 "psrlw $3, %%xmm2 \n\t" 00094 "packuswb %%xmm2, %%xmm0 \n\t" 00095 AVG_OP("pavgb %%xmm4, %%xmm0 \n\t") 00096 "movq %%xmm0, (%0) \n\t" 00097 "movhps %%xmm0, (%0,%3) \n\t" 00098 "sub $2, %2 \n\t" 00099 "lea (%1,%3,2), %1 \n\t" 00100 "lea (%0,%3,2), %0 \n\t" 00101 "jg 1b \n\t" 00102 :"+r"(dst), "+r"(src), "+r"(h) 00103 :"r"((x86_reg)stride) 00104 ); 00105 } 00106 return; 00107 } 00108 00109 /* general case, bilinear */ 00110 __asm__ volatile( 00111 "movd %0, %%xmm7 \n\t" 00112 "movd %1, %%xmm6 \n\t" 00113 "movdqa %2, %%xmm5 \n\t" 00114 "pshuflw $0, %%xmm7, %%xmm7 \n\t" 00115 "pshuflw $0, %%xmm6, %%xmm6 \n\t" 00116 "movlhps %%xmm7, %%xmm7 \n\t" 00117 "movlhps %%xmm6, %%xmm6 \n\t" 00118 :: "r"((x*255+8)*(8-y)), "r"((x*255+8)*y), "m"(*(rnd?&ff_pw_32:&ff_pw_28)) 00119 ); 00120 00121 __asm__ volatile( 00122 "movq (%1), %%xmm0 \n\t" 00123 "movq 1(%1), %%xmm1 \n\t" 00124 "punpcklbw %%xmm1, %%xmm0 \n\t" 00125 "add %3, %1 \n\t" 00126 "1: \n\t" 00127 "movq (%1), %%xmm1 \n\t" 00128 "movq 1(%1), %%xmm2 \n\t" 00129 "movq (%1,%3), %%xmm3 \n\t" 00130 "movq 1(%1,%3), %%xmm4 \n\t" 00131 "lea (%1,%3,2), %1 \n\t" 00132 "punpcklbw %%xmm2, %%xmm1 \n\t" 00133 "punpcklbw %%xmm4, %%xmm3 \n\t" 00134 "movdqa %%xmm1, %%xmm2 \n\t" 00135 "movdqa %%xmm3, %%xmm4 \n\t" 00136 "pmaddubsw %%xmm7, %%xmm0 \n\t" 00137 "pmaddubsw %%xmm6, %%xmm1 \n\t" 00138 "pmaddubsw %%xmm7, %%xmm2 \n\t" 00139 "pmaddubsw %%xmm6, %%xmm3 \n\t" 00140 "paddw %%xmm5, %%xmm0 \n\t" 00141 "paddw %%xmm5, %%xmm2 \n\t" 00142 "paddw %%xmm0, %%xmm1 \n\t" 00143 "paddw %%xmm2, %%xmm3 \n\t" 00144 "movdqa %%xmm4, %%xmm0 \n\t" 00145 "psrlw $6, %%xmm1 \n\t" 00146 "psrlw $6, %%xmm3 \n\t" 00147 AVG_OP("movq (%0), %%xmm2 \n\t") 00148 AVG_OP("movhps (%0,%3), %%xmm2 \n\t") 00149 "packuswb %%xmm3, %%xmm1 \n\t" 00150 AVG_OP("pavgb %%xmm2, %%xmm1 \n\t") 00151 "movq %%xmm1, (%0)\n\t" 00152 "movhps %%xmm1, (%0,%3)\n\t" 00153 "sub $2, %2 \n\t" 00154 "lea (%0,%3,2), %0 \n\t" 00155 "jg 1b \n\t" 00156 :"+r"(dst), "+r"(src), "+r"(h) 00157 :"r"((x86_reg)stride) 00158 ); 00159 } 00160 00161 static void H264_CHROMA_MC4_TMPL(uint8_t *dst/*align 4*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y) 00162 { 00163 __asm__ volatile( 00164 "movd %0, %%mm7 \n\t" 00165 "movd %1, %%mm6 \n\t" 00166 "movq %2, %%mm5 \n\t" 00167 "pshufw $0, %%mm7, %%mm7 \n\t" 00168 "pshufw $0, %%mm6, %%mm6 \n\t" 00169 :: "r"((x*255+8)*(8-y)), "r"((x*255+8)*y), "m"(ff_pw_32) 00170 ); 00171 00172 __asm__ volatile( 00173 "movd (%1), %%mm0 \n\t" 00174 "punpcklbw 1(%1), %%mm0 \n\t" 00175 "add %3, %1 \n\t" 00176 "1: \n\t" 00177 "movd (%1), %%mm1 \n\t" 00178 "movd (%1,%3), %%mm3 \n\t" 00179 "punpcklbw 1(%1), %%mm1 \n\t" 00180 "punpcklbw 1(%1,%3), %%mm3 \n\t" 00181 "lea (%1,%3,2), %1 \n\t" 00182 "movq %%mm1, %%mm2 \n\t" 00183 "movq %%mm3, %%mm4 \n\t" 00184 "pmaddubsw %%mm7, %%mm0 \n\t" 00185 "pmaddubsw %%mm6, %%mm1 \n\t" 00186 "pmaddubsw %%mm7, %%mm2 \n\t" 00187 "pmaddubsw %%mm6, %%mm3 \n\t" 00188 "paddw %%mm5, %%mm0 \n\t" 00189 "paddw %%mm5, %%mm2 \n\t" 00190 "paddw %%mm0, %%mm1 \n\t" 00191 "paddw %%mm2, %%mm3 \n\t" 00192 "movq %%mm4, %%mm0 \n\t" 00193 "psrlw $6, %%mm1 \n\t" 00194 "psrlw $6, %%mm3 \n\t" 00195 "packuswb %%mm1, %%mm1 \n\t" 00196 "packuswb %%mm3, %%mm3 \n\t" 00197 AVG_OP("pavgb (%0), %%mm1 \n\t") 00198 AVG_OP("pavgb (%0,%3), %%mm3 \n\t") 00199 "movd %%mm1, (%0)\n\t" 00200 "movd %%mm3, (%0,%3)\n\t" 00201 "sub $2, %2 \n\t" 00202 "lea (%0,%3,2), %0 \n\t" 00203 "jg 1b \n\t" 00204 :"+r"(dst), "+r"(src), "+r"(h) 00205 :"r"((x86_reg)stride) 00206 ); 00207 } 00208