Main Page | Class List | File List | Class Members | File Members

filters_sse.h

Go to the documentation of this file.
00001 /* Copyright (C) 2002 Jean-Marc Valin 00002 File: filters.c 00003 Various analysis/synthesis filters 00004 00005 Redistribution and use in source and binary forms, with or without 00006 modification, are permitted provided that the following conditions 00007 are met: 00008 00009 - Redistributions of source code must retain the above copyright 00010 notice, this list of conditions and the following disclaimer. 00011 00012 - Redistributions in binary form must reproduce the above copyright 00013 notice, this list of conditions and the following disclaimer in the 00014 documentation and/or other materials provided with the distribution. 00015 00016 - Neither the name of the Xiph.org Foundation nor the names of its 00017 contributors may be used to endorse or promote products derived from 00018 this software without specific prior written permission. 00019 00020 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00021 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00022 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 00023 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR 00024 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00025 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00026 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00027 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00028 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00029 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00030 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00031 */ 00032 00033 void filter_mem2(float *x, float *_num, float *_den, float *y, int N, int ord, float *_mem) 00034 { 00035 float __num[20], __den[20], __mem[20]; 00036 float *num, *den, *mem; 00037 int i; 00038 00039 num = (float*)(((int)(__num+4))&0xfffffff0)-1; 00040 den = (float*)(((int)(__den+4))&0xfffffff0)-1; 00041 mem = (float*)(((int)(__mem+4))&0xfffffff0)-1; 00042 for (i=0;i<=10;i++) 00043 num[i]=den[i]=0; 00044 for (i=0;i<10;i++) 00045 mem[i]=0; 00046 00047 for (i=0;i<ord+1;i++) 00048 { 00049 num[i]=_num[i]; 00050 den[i]=_den[i]; 00051 } 00052 for (i=0;i<ord;i++) 00053 mem[i]=_mem[i]; 00054 for (i=0;i<N;i+=4) 00055 { 00056 00057 __asm__ __volatile__ 00058 ( 00059 "\tmovss (%1), %%xmm0\n" 00060 "\tmovss (%0), %%xmm1\n" 00061 "\taddss %%xmm0, %%xmm1\n" 00062 "\tmovss %%xmm1, (%2)\n" 00063 "\tshufps $0x00, %%xmm0, %%xmm0\n" 00064 "\tshufps $0x00, %%xmm1, %%xmm1\n" 00065 00066 "\tmovaps 4(%3), %%xmm2\n" 00067 "\tmovaps 4(%4), %%xmm3\n" 00068 "\tmulps %%xmm0, %%xmm2\n" 00069 "\tmulps %%xmm1, %%xmm3\n" 00070 "\tmovaps 20(%3), %%xmm4\n" 00071 "\tmulps %%xmm0, %%xmm4\n" 00072 "\taddps 4(%0), %%xmm2\n" 00073 "\tmovaps 20(%4), %%xmm5\n" 00074 "\tmulps %%xmm1, %%xmm5\n" 00075 "\taddps 20(%0), %%xmm4\n" 00076 "\tsubps %%xmm3, %%xmm2\n" 00077 "\tmovups %%xmm2, (%0)\n" 00078 "\tsubps %%xmm5, %%xmm4\n" 00079 "\tmovups %%xmm4, 16(%0)\n" 00080 00081 "\tmovss 36(%3), %%xmm2\n" 00082 "\tmulss %%xmm0, %%xmm2\n" 00083 "\tmovss 36(%4), %%xmm3\n" 00084 "\tmulss %%xmm1, %%xmm3\n" 00085 "\taddss 36(%0), %%xmm2\n" 00086 "\tmovss 40(%3), %%xmm4\n" 00087 "\tmulss %%xmm0, %%xmm4\n" 00088 "\tmovss 40(%4), %%xmm5\n" 00089 "\tmulss %%xmm1, %%xmm5\n" 00090 "\tsubss %%xmm3, %%xmm2\n" 00091 "\tmovss %%xmm2, 32(%0) \n" 00092 "\tsubss %%xmm5, %%xmm4\n" 00093 "\tmovss %%xmm4, 36(%0)\n" 00094 00095 00096 00097 "\tmovss 4(%1), %%xmm0\n" 00098 "\tmovss (%0), %%xmm1\n" 00099 "\taddss %%xmm0, %%xmm1\n" 00100 "\tmovss %%xmm1, 4(%2)\n" 00101 "\tshufps $0x00, %%xmm0, %%xmm0\n" 00102 "\tshufps $0x00, %%xmm1, %%xmm1\n" 00103 00104 "\tmovaps 4(%3), %%xmm2\n" 00105 "\tmovaps 4(%4), %%xmm3\n" 00106 "\tmulps %%xmm0, %%xmm2\n" 00107 "\tmulps %%xmm1, %%xmm3\n" 00108 "\tmovaps 20(%3), %%xmm4\n" 00109 "\tmulps %%xmm0, %%xmm4\n" 00110 "\taddps 4(%0), %%xmm2\n" 00111 "\tmovaps 20(%4), %%xmm5\n" 00112 "\tmulps %%xmm1, %%xmm5\n" 00113 "\taddps 20(%0), %%xmm4\n" 00114 "\tsubps %%xmm3, %%xmm2\n" 00115 "\tmovups %%xmm2, (%0)\n" 00116 "\tsubps %%xmm5, %%xmm4\n" 00117 "\tmovups %%xmm4, 16(%0)\n" 00118 00119 "\tmovss 36(%3), %%xmm2\n" 00120 "\tmulss %%xmm0, %%xmm2\n" 00121 "\tmovss 36(%4), %%xmm3\n" 00122 "\tmulss %%xmm1, %%xmm3\n" 00123 "\taddss 36(%0), %%xmm2\n" 00124 "\tmovss 40(%3), %%xmm4\n" 00125 "\tmulss %%xmm0, %%xmm4\n" 00126 "\tmovss 40(%4), %%xmm5\n" 00127 "\tmulss %%xmm1, %%xmm5\n" 00128 "\tsubss %%xmm3, %%xmm2\n" 00129 "\tmovss %%xmm2, 32(%0) \n" 00130 "\tsubss %%xmm5, %%xmm4\n" 00131 "\tmovss %%xmm4, 36(%0)\n" 00132 00133 00134 00135 "\tmovss 8(%1), %%xmm0\n" 00136 "\tmovss (%0), %%xmm1\n" 00137 "\taddss %%xmm0, %%xmm1\n" 00138 "\tmovss %%xmm1, 8(%2)\n" 00139 "\tshufps $0x00, %%xmm0, %%xmm0\n" 00140 "\tshufps $0x00, %%xmm1, %%xmm1\n" 00141 00142 "\tmovaps 4(%3), %%xmm2\n" 00143 "\tmovaps 4(%4), %%xmm3\n" 00144 "\tmulps %%xmm0, %%xmm2\n" 00145 "\tmulps %%xmm1, %%xmm3\n" 00146 "\tmovaps 20(%3), %%xmm4\n" 00147 "\tmulps %%xmm0, %%xmm4\n" 00148 "\taddps 4(%0), %%xmm2\n" 00149 "\tmovaps 20(%4), %%xmm5\n" 00150 "\tmulps %%xmm1, %%xmm5\n" 00151 "\taddps 20(%0), %%xmm4\n" 00152 "\tsubps %%xmm3, %%xmm2\n" 00153 "\tmovups %%xmm2, (%0)\n" 00154 "\tsubps %%xmm5, %%xmm4\n" 00155 "\tmovups %%xmm4, 16(%0)\n" 00156 00157 "\tmovss 36(%3), %%xmm2\n" 00158 "\tmulss %%xmm0, %%xmm2\n" 00159 "\tmovss 36(%4), %%xmm3\n" 00160 "\tmulss %%xmm1, %%xmm3\n" 00161 "\taddss 36(%0), %%xmm2\n" 00162 "\tmovss 40(%3), %%xmm4\n" 00163 "\tmulss %%xmm0, %%xmm4\n" 00164 "\tmovss 40(%4), %%xmm5\n" 00165 "\tmulss %%xmm1, %%xmm5\n" 00166 "\tsubss %%xmm3, %%xmm2\n" 00167 "\tmovss %%xmm2, 32(%0) \n" 00168 "\tsubss %%xmm5, %%xmm4\n" 00169 "\tmovss %%xmm4, 36(%0)\n" 00170 00171 00172 00173 "\tmovss 12(%1), %%xmm0\n" 00174 "\tmovss (%0), %%xmm1\n" 00175 "\taddss %%xmm0, %%xmm1\n" 00176 "\tmovss %%xmm1, 12(%2)\n" 00177 "\tshufps $0x00, %%xmm0, %%xmm0\n" 00178 "\tshufps $0x00, %%xmm1, %%xmm1\n" 00179 00180 "\tmovaps 4(%3), %%xmm2\n" 00181 "\tmovaps 4(%4), %%xmm3\n" 00182 "\tmulps %%xmm0, %%xmm2\n" 00183 "\tmulps %%xmm1, %%xmm3\n" 00184 "\tmovaps 20(%3), %%xmm4\n" 00185 "\tmulps %%xmm0, %%xmm4\n" 00186 "\taddps 4(%0), %%xmm2\n" 00187 "\tmovaps 20(%4), %%xmm5\n" 00188 "\tmulps %%xmm1, %%xmm5\n" 00189 "\taddps 20(%0), %%xmm4\n" 00190 "\tsubps %%xmm3, %%xmm2\n" 00191 "\tmovups %%xmm2, (%0)\n" 00192 "\tsubps %%xmm5, %%xmm4\n" 00193 "\tmovups %%xmm4, 16(%0)\n" 00194 00195 "\tmovss 36(%3), %%xmm2\n" 00196 "\tmulss %%xmm0, %%xmm2\n" 00197 "\tmovss 36(%4), %%xmm3\n" 00198 "\tmulss %%xmm1, %%xmm3\n" 00199 "\taddss 36(%0), %%xmm2\n" 00200 "\tmovss 40(%3), %%xmm4\n" 00201 "\tmulss %%xmm0, %%xmm4\n" 00202 "\tmovss 40(%4), %%xmm5\n" 00203 "\tmulss %%xmm1, %%xmm5\n" 00204 "\tsubss %%xmm3, %%xmm2\n" 00205 "\tmovss %%xmm2, 32(%0) \n" 00206 "\tsubss %%xmm5, %%xmm4\n" 00207 "\tmovss %%xmm4, 36(%0)\n" 00208 00209 : : "r" (mem), "r" (x+i), "r" (y+i), "r" (num), "r" (den) 00210 : "memory" ); 00211 00212 } 00213 for (i=0;i<ord;i++) 00214 _mem[i]=mem[i]; 00215 00216 } 00217 00218 00219 void iir_mem2(float *x, float *_den, float *y, int N, int ord, float *_mem) 00220 { 00221 float __den[20], __mem[20]; 00222 float *den, *mem; 00223 int i; 00224 00225 den = (float*)(((int)(__den+4))&0xfffffff0)-1; 00226 mem = (float*)(((int)(__mem+4))&0xfffffff0)-1; 00227 for (i=0;i<=10;i++) 00228 den[i]=0; 00229 for (i=0;i<10;i++) 00230 mem[i]=0; 00231 for (i=0;i<ord+1;i++) 00232 { 00233 den[i]=_den[i]; 00234 } 00235 for (i=0;i<ord;i++) 00236 mem[i]=_mem[i]; 00237 00238 for (i=0;i<N;i++) 00239 { 00240 #if 0 00241 y[i] = x[i] + mem[0]; 00242 for (j=0;j<ord-1;j++) 00243 { 00244 mem[j] = mem[j+1] - den[j+1]*y[i]; 00245 } 00246 mem[ord-1] = - den[ord]*y[i]; 00247 #else 00248 __asm__ __volatile__ 00249 ( 00250 "\tmovss (%1), %%xmm0\n" 00251 "\tmovss (%0), %%xmm1\n" 00252 "\taddss %%xmm0, %%xmm1\n" 00253 "\tmovss %%xmm1, (%2)\n" 00254 "\tshufps $0x00, %%xmm0, %%xmm0\n" 00255 "\tshufps $0x00, %%xmm1, %%xmm1\n" 00256 00257 00258 "\tmovaps 4(%3), %%xmm2\n" 00259 "\tmovaps 20(%3), %%xmm3\n" 00260 "\tmulps %%xmm1, %%xmm2\n" 00261 "\tmulps %%xmm1, %%xmm3\n" 00262 "\tmovss 36(%3), %%xmm4\n" 00263 "\tmovss 40(%3), %%xmm5\n" 00264 "\tmulss %%xmm1, %%xmm4\n" 00265 "\tmulss %%xmm1, %%xmm5\n" 00266 "\tmovaps 4(%0), %%xmm6\n" 00267 "\tsubps %%xmm2, %%xmm6\n" 00268 "\tmovups %%xmm6, (%0)\n" 00269 "\tmovaps 20(%0), %%xmm7\n" 00270 "\tsubps %%xmm3, %%xmm7\n" 00271 "\tmovups %%xmm7, 16(%0)\n" 00272 00273 00274 "\tmovss 36(%0), %%xmm7\n" 00275 "\tsubss %%xmm4, %%xmm7\n" 00276 "\tmovss %%xmm7, 32(%0) \n" 00277 "\txorps %%xmm2, %%xmm2\n" 00278 "\tsubss %%xmm5, %%xmm2\n" 00279 "\tmovss %%xmm2, 36(%0)\n" 00280 00281 : : "r" (mem), "r" (x+i), "r" (y+i), "r" (den) 00282 : "memory" ); 00283 #endif 00284 } 00285 for (i=0;i<ord;i++) 00286 _mem[i]=mem[i]; 00287 00288 } 00289

Generated on Mon Oct 11 15:42:53 2004 for speex by doxygen 1.3.7