00001
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035 #include <stdio.h>
00036
00037 #define OVERRIDE_NORMALIZE16
00038 int normalize16(const spx_sig_t *x, spx_word16_t *y, spx_sig_t max_scale, int len)
00039 {
00040 spx_sig_t max_val=1;
00041 int sig_shift;
00042
00043 __asm__
00044 (
00045 "%0 = 0;\n\t"
00046 "I0 = %1;\n\t"
00047 "L0 = 0;\n\t"
00048 "R1 = [I0++];\n\t"
00049 "LOOP norm_max%= LC0 = %2;\n\t"
00050 "LOOP_BEGIN norm_max%=;\n\t"
00051 "R2 = ABS R1 || R1 = [I0++];\n\t"
00052 "%0 = MAX(%0, R2);\n\t"
00053 "LOOP_END norm_max%=;\n\t"
00054 : "=&d" (max_val)
00055 : "a" (x), "a" (len)
00056 : "R1", "R2"
00057 );
00058
00059 sig_shift=0;
00060 while (max_val>max_scale)
00061 {
00062 sig_shift++;
00063 max_val >>= 1;
00064 }
00065
00066 __asm__ __volatile__
00067 (
00068 "I0 = %0;\n\t"
00069 "L0 = 0;\n\t"
00070 "I1 = %1;\n\t"
00071 "L1 = 0;\n\t"
00072 "R0 = [I0++];\n\t"
00073 "LOOP norm_shift%= LC0 = %3 >> 1;\n\t"
00074 "LOOP_BEGIN norm_shift%=;\n\t"
00075 "R1 = ASHIFT R0 by %2.L || R2 = [I0++];\n\t"
00076 "R3 = ASHIFT R2 by %2.L || R0 = [I0++];\n\t"
00077 "R3 = PACK(R3.L, R1.L);\n\t"
00078 "[I1++] = R3;\n\t"
00079 "LOOP_END norm_shift%=;\n\t"
00080 : : "a" (x), "a" (y), "d" (-sig_shift), "a" (len)
00081 : "I0", "L0", "I1", "L1", "R0", "R1", "R2", "R3", "memory"
00082 );
00083 return sig_shift;
00084 }
00085
00086 #define OVERRIDE_FILTER_MEM2
00087 void filter_mem2(const spx_sig_t *_x, const spx_coef_t *num, const spx_coef_t *den, spx_sig_t *_y, int N, int ord, spx_mem_t *mem)
00088 {
00089 spx_word32_t xy2[N+1];
00090 spx_word32_t *xy = xy2+1;
00091 spx_word32_t numden_a[2*ord+2];
00092 spx_word16_t *numden = (spx_word16_t*) numden_a;
00093 int i;
00094 for (i=0;i<ord;i++)
00095 {
00096 numden[2*i] = num[i];
00097 numden[2*i+1] = den[i];
00098 }
00099 __asm__ __volatile__
00100 (
00101
00102 "R0 = %5;\n\t"
00103
00104 "P0 = %3;\n\t"
00105 "I0 = P0;\n\t"
00106 "B0 = P0;\n\t"
00107 "L0 = 0;\n\t"
00108
00109 "P2 = %0;\n\t"
00110 "I2 = P2;\n\t"
00111 "L2 = 0;\n\t"
00112
00113 "P4 = %6;\n\t"
00114 "P0 = %1;\n\t"
00115 "P1 = %2;\n\t"
00116
00117
00118 "R1 = [P4++];\n\t"
00119 "R1 <<= 1;\n\t"
00120 "R2 = [P0++];\n\t"
00121 "R1 = R1 + R2;\n\t"
00122 "[P1++] = R1;\n\t"
00123 "R1 <<= 2;\n\t"
00124 "R2 <<= 2;\n\t"
00125 "R2 = PACK(R1.H, R2.H);\n\t"
00126 "[P2] = R2;\n\t"
00127
00128
00129 "R0 += -1;\n\t"
00130 "R3 = 0;\n\t"
00131 "LC0 = R0;\n\t"
00132 "LOOP filter_start%= LC0;\n\t"
00133 "LOOP_BEGIN filter_start%=;\n\t"
00134 "R3 += 1;\n\t"
00135 "LC1 = R3;\n\t"
00136
00137 "R1 = [P4++];\n\t"
00138 "A1 = R1;\n\t"
00139 "A0 = 0;\n\t"
00140 "I0 = B0;\n\t"
00141 "I2 = P2;\n\t"
00142 "P2 += 4;\n\t"
00143 "R4 = [I0++] || R5 = [I2--];\n\t"
00144 "LOOP filter_start_inner%= LC1;\n\t"
00145 "LOOP_BEGIN filter_start_inner%=;\n\t"
00146 "A0 += R4.L*R5.L (IS), A1 -= R4.H*R5.H (IS) || R4 = [I0++] || R5 = [I2--];\n\t"
00147 "LOOP_END filter_start_inner%=;\n\t"
00148 "A0 += A1;\n\t"
00149 "R4 = A0;\n\t"
00150 "R4 <<= 1;\n\t"
00151 "R2 = [P0++];\n\t"
00152 "R4 = R4 + R2;\n\t"
00153 "[P1++] = R4;\n\t"
00154 "R4 <<= 2;\n\t"
00155 "R2 <<= 2;\n\t"
00156 "R2 = PACK(R4.H, R2.H);\n\t"
00157 "[P2] = R2;\n\t"
00158
00159 "LOOP_END filter_start%=;\n\t"
00160
00161
00162 "R0 = %5;\n\t"
00163 "R0 <<= 1;\n\t"
00164 "I0 = B0;\n\t"
00165 "R0 <<= 1;\n\t"
00166 "L0 = R0;\n\t"
00167
00168 "R0 = %5;\n\t"
00169 "R2 = %4;\n\t"
00170 "R2 = R2 - R0;\n\t"
00171 "R4 = [I0++];\n\t"
00172 "LC0 = R2;\n\t"
00173 "P3 = R0;\n\t"
00174 "R0 <<= 2;\n\t"
00175 "R0 += 8;\n\t"
00176 "I2 = P2;\n\t"
00177 "M0 = R0;\n\t"
00178 "A0 = A1 = 0;\n\t"
00179 "R5 = [I2--];\n\t"
00180 "LOOP filter_mid%= LC0;\n\t"
00181 "LOOP_BEGIN filter_mid%=;\n\t"
00182 "LOOP filter_mid_inner%= LC1=P3;\n\t"
00183 "LOOP_BEGIN filter_mid_inner%=;\n\t"
00184 "A0 += R4.L*R5.L (IS), A1 -= R4.H*R5.H (IS) || R4 = [I0++] || R5 = [I2--];\n\t"
00185 "LOOP_END filter_mid_inner%=;\n\t"
00186 "R0 = (A0 += A1) || I2 += M0;\n\t"
00187 "R0 = R0 << 1 || R5 = [P0++];\n\t"
00188 "R0 = R0 + R5;\n\t"
00189 "R0 = R0 << 2 || [P1++] = R0;\n\t"
00190 "R5 = R5 << 2;\n\t"
00191 "R5 = PACK(R0.H, R5.H);\n\t"
00192 "A0 = A1 = 0 || [I2--] = R5\n\t"
00193 "LOOP_END filter_mid%=;\n\t"
00194 "I2 += 4;\n\t"
00195 "P2 = I2;\n\t"
00196
00197 "P4 = %6;\n\t"
00198 "R0 = %5;\n\t"
00199 "LC0 = R0;\n\t"
00200 "P0 = B0;\n\t"
00201 "A0 = A1 = 0;\n\t"
00202 "LOOP mem_update%= LC0;\n\t"
00203 "LOOP_BEGIN mem_update%=;\n\t"
00204 "I2 = P2;\n\t"
00205 "I0 = P0;\n\t"
00206 "P0 += 4;\n\t"
00207 "R0 = LC0;\n\t"
00208 "LC1 = R0;\n\t"
00209 "R5 = [I2--] || R4 = [I0++];\n\t"
00210 "LOOP mem_accum%= LC1;\n\t"
00211 "LOOP_BEGIN mem_accum%=;\n\t"
00212 "A0 += R4.L*R5.L (IS), A1 -= R4.H*R5.H (IS) || R4 = [I0++] || R5 = [I2--];\n\t"
00213 "LOOP_END mem_accum%=;\n\t"
00214 "R0 = (A0 += A1);\n\t"
00215 "A0 = A1 = 0 || [P4++] = R0;\n\t"
00216 "LOOP_END mem_update%=;\n\t"
00217 "L0 = 0;\n\t"
00218 : : "m" (xy), "m" (_x), "m" (_y), "m" (numden), "m" (N), "m" (ord), "m" (mem)
00219 : "A0", "A1", "R0", "R1", "R2", "R3", "R4", "R5", "P0", "P1", "P2", "P3", "P4", "B0", "I0", "I2", "L0", "L2", "M0", "memory"
00220 );
00221
00222 }
00223
00224
00225
00226
00227 #define OVERRIDE_IIR_MEM2
00228 void iir_mem2(const spx_sig_t *_x, const spx_coef_t *den, spx_sig_t *_y, int N, int ord, spx_mem_t *mem)
00229 {
00230 spx_word16_t y[N+2];
00231 spx_word16_t *yy;
00232 yy = y+2;
00233 __asm__ __volatile__
00234 (
00235
00236 "R0 = %5;\n\t"
00237
00238 "P1 = %3;\n\t"
00239 "I1 = P1;\n\t"
00240 "B1 = P1;\n\t"
00241 "L1 = 0;\n\t"
00242
00243 "P3 = %0;\n\t"
00244 "I3 = P3;\n\t"
00245 "L3 = 0;\n\t"
00246
00247 "P4 = %6;\n\t"
00248 "P0 = %1;\n\t"
00249 "P1 = %2;\n\t"
00250
00251
00252 "R1 = [P4++];\n\t"
00253 "R1 <<= 1;\n\t"
00254 "R2 = [P0++];\n\t"
00255 "R1 = R1 + R2;\n\t"
00256 "[P1++] = R1;\n\t"
00257 "R1 <<= 2;\n\t"
00258 "W[P3] = R1.H;\n\t"
00259 "R2 <<= 2;\n\t"
00260
00261
00262 "R0 += -1;\n\t"
00263 "R3 = 0;\n\t"
00264 "LC0 = R0;\n\t"
00265 "LOOP filter_start%= LC0;\n\t"
00266 "LOOP_BEGIN filter_start%=;\n\t"
00267 "R3 += 1;\n\t"
00268 "LC1 = R3;\n\t"
00269
00270 "R1 = [P4++];\n\t"
00271 "A1 = R1;\n\t"
00272 "I1 = B1;\n\t"
00273 "I3 = P3;\n\t"
00274 "P3 += 2;\n\t"
00275 "LOOP filter_start_inner%= LC1;\n\t"
00276 "LOOP_BEGIN filter_start_inner%=;\n\t"
00277 "R4.L = W[I1++];\n\t"
00278 "R5.L = W[I3--];\n\t"
00279 "A1 -= R4.L*R5.L (IS);\n\t"
00280 "LOOP_END filter_start_inner%=;\n\t"
00281
00282 "R1 = A1;\n\t"
00283 "R1 <<= 1;\n\t"
00284 "R2 = [P0++];\n\t"
00285 "R1 = R1 + R2;\n\t"
00286 "[P1++] = R1;\n\t"
00287 "R1 <<= 2;\n\t"
00288 "W[P3] = R1.H;\n\t"
00289 "R2 <<= 2;\n\t"
00290 "LOOP_END filter_start%=;\n\t"
00291
00292
00293 "R0 = %5;\n\t"
00294 "R0 <<= 1;\n\t"
00295 "I1 = B1;\n\t"
00296 "L1 = R0;\n\t"
00297
00298 "R0 = %5;\n\t"
00299 "R2 = %4;\n\t"
00300 "R2 = R2 - R0;\n\t"
00301 "R4.L = W[I1++];\n\t"
00302 "LC0 = R2;\n\t"
00303 "LOOP filter_mid%= LC0;\n\t"
00304 "LOOP_BEGIN filter_mid%=;\n\t"
00305 "LC1 = R0;\n\t"
00306 "A1 = 0;\n\t"
00307 "I3 = P3;\n\t"
00308 "P3 += 2;\n\t"
00309 "R5.L = W[I3--];\n\t"
00310 "LOOP filter_mid_inner%= LC1;\n\t"
00311 "LOOP_BEGIN filter_mid_inner%=;\n\t"
00312 "A1 -= R4.L*R5.L (IS) || R4.L = W[I1++] || R5.L = W[I3--];\n\t"
00313 "LOOP_END filter_mid_inner%=;\n\t"
00314 "R1 = A1;\n\t"
00315 "R1 = R1 << 1 || R2 = [P0++];\n\t"
00316 "R1 = R1 + R2;\n\t"
00317 "R1 = R1 << 2 || [P1++] = R1;\n\t"
00318 "W[P3] = R1.H;\n\t"
00319 "LOOP_END filter_mid%=;\n\t"
00320
00321
00322 "P4 = %6;\n\t"
00323 "R0 = %5;\n\t"
00324 "LC0 = R0;\n\t"
00325 "P1 = B1;\n\t"
00326 "LOOP mem_update%= LC0;\n\t"
00327 "LOOP_BEGIN mem_update%=;\n\t"
00328 "A0 = 0;\n\t"
00329 "I3 = P3;\n\t"
00330 "I1 = P1;\n\t"
00331 "P1 += 2;\n\t"
00332 "R0 = LC0;\n\t"
00333 "LC1=R0;\n\t"
00334 "R5.L = W[I3--] || R4.L = W[I1++];\n\t"
00335 "LOOP mem_accum%= LC1;\n\t"
00336 "LOOP_BEGIN mem_accum%=;\n\t"
00337 "A0 -= R4.L*R5.L (IS) || R4.L = W[I1++] || R5.L = W[I3--];\n\t"
00338 "LOOP_END mem_accum%=;\n\t"
00339 "R0 = A0;\n\t"
00340 "[P4++] = R0;\n\t"
00341 "LOOP_END mem_update%=;\n\t"
00342 "L1 = 0;\n\t"
00343 : : "m" (yy), "m" (_x), "m" (_y), "m" (den), "m" (N), "m" (ord), "m" (mem)
00344 : "A0", "A1", "R0", "R1", "R2", "R3", "R4", "R5", "P0", "P1", "P2", "P3", "P4", "B1", "I1", "I3", "L1", "L3", "memory"
00345 );
00346
00347 }
00348
00349 #define OVERRIDE_FIR_MEM2
00350 void fir_mem2(const spx_sig_t *x, const spx_coef_t *num, spx_sig_t *y, int N, int ord, spx_mem_t *mem)
00351 {
00352 int i;
00353 spx_coef_t den2[12];
00354 spx_coef_t *den;
00355 den = (spx_coef_t*)((((int)den2)+4)&0xfffffffc);
00356 for (i=0;i<10;i++)
00357 den[i] = 0;
00358 filter_mem2(x, num, den, y, N, ord, mem);
00359 }
00360
00361
00362 #define OVERRIDE_COMPUTE_IMPULSE_RESPONSE
00363 void compute_impulse_response(const spx_coef_t *ak, const spx_coef_t *awk1, const spx_coef_t *awk2, spx_word16_t *y, int N, int ord, char *stack)
00364 {
00365 int i;
00366 VARDECL(spx_word16_t *ytmp);
00367 ALLOC(ytmp, N, spx_word16_t);
00368 spx_word16_t *ytmp2 = ytmp;
00369 y[0] = LPC_SCALING;
00370 for (i=0;i<ord;i++)
00371 y[i+1] = awk1[i];
00372 i++;
00373 for (;i<N;i++)
00374 y[i] = 0;
00375
00376 N-=1;
00377 __asm__ __volatile__
00378 (
00379 "I0 = %0;\n\t"
00380 "I1 = %1;\n\t"
00381 "L0 = 0;\n\t"
00382 "L1 = 0;\n\t"
00383 "L2 = 0;\n\t"
00384 "L3 = 0;\n\t"
00385 "R0 = 1;\n\t"
00386 "R0 <<= 13;\n\t"
00387 "W[I0] = R0.L;\n\t"
00388 "R0 <<= 1;\n\t"
00389 "W[I1] = R0.L;\n\t"
00390 "R0 = %5;\n\t"
00391 "LC0 = R0;\n\t"
00392 "R2 = 0;\n\t"
00393 "LOOP samples%= LC0;\n\t"
00394 "LOOP_BEGIN samples%=;\n\t"
00395 "R2 += 1;\n\t"
00396 "R2 = MIN(R2, %4);\n\t"
00397 "I0 = %0;\n\t"
00398 "I1 = %1;\n\t"
00399 "I2 = %2;\n\t"
00400 "I3 = %3;\n\t"
00401 "%0 += 2;\n\t"
00402 "%1 += 2;\n\t"
00403 "A0 = A1 = 0;\n\t"
00404 "R0.L = W[I0--] || R1.L = W[I2++];\n\t"
00405 "LC1 = R2;\n\t"
00406 "LOOP filter%= LC1;\n\t"
00407 "LOOP_BEGIN filter%=;\n\t"
00408 "A0 -= R0.L*R1.L (IS) || R0.L = W[I1--] || R1.L = W[I3++];\n\t"
00409 "A1 -= R0.L*R1.L (IS) || R0.L = W[I0--] || R1.L = W[I2++];\n\t"
00410 "LOOP_END filter%=;\n\t"
00411 "R0 = A0, R1 = A1;\n\t"
00412 "R3 = W[%1] (X);\n\t"
00413 "R3 <<= 13;\n\t"
00414 "R0 = R0 + R3;\n\t"
00415 "R3 = R0 >>> 13;\n\t"
00416 "W[%0] = R3.L;\n\t"
00417 "R0 <<= 1;\n\t"
00418 "R1 = R1 + R0;\n\t"
00419 "R1 >>>= 13;\n\t"
00420 "W[%1] = R1.L;\n\t"
00421 "LOOP_END samples%=;\n\t"
00422 : "=a" (ytmp2), "=a" (y)
00423 : "a" (awk2), "a" (ak), "d" (ord), "m" (N), "0" (ytmp2), "1" (y)
00424 : "A0", "A1", "R0", "R1", "R2", "R3", "I0", "I1", "I2", "I3", "L0", "L1", "L2", "L3", "A0", "A1"
00425 );
00426 }
00427
00428
00429
00430 #if 0
00431 #define min(a,b) ((a)<(b) ? (a):(b))
00432
00433 void compute_impulse_response(const spx_coef_t *ak, const spx_coef_t *awk1, const spx_coef_t *awk2, spx_word16_t *y, int N, int ord, char *stack)
00434 {
00435 int i,j;
00436 VARDECL(spx_word16_t *ytmp);
00437 ALLOC(ytmp, N, spx_word16_t);
00438
00439 y[0] = LPC_SCALING;
00440 for (i=0;i<ord;i++)
00441 y[i+1] = awk1[i];
00442 i++;
00443 for (;i<N;i++)
00444 y[i] = 0;
00445
00446 for (i=0;i<N;i++)
00447 {
00448 spx_word32_t yi = SHL32(EXTEND32(y[i]),LPC_SHIFT);
00449 spx_word32_t yi2 = 0;
00450 for (j=0;j<min(i,ord);j++)
00451 {
00452 yi = MAC16_16(yi, awk2[j], -ytmp[i-j-1]);
00453 yi2 = MAC16_16(yi2, ak[j], -y[i-j-1]);
00454 }
00455 ytmp[i] = EXTRACT16(SHR32(yi,LPC_SHIFT));
00456 yi2 = ADD32(yi2,SHL32(yi,1));
00457 y[i] = EXTRACT16(SHR32(yi2,LPC_SHIFT));
00458 }
00459
00460 }
00461
00462
00463 void filter_mem2(const spx_sig_t *_x, const spx_coef_t *num, const spx_coef_t *den, spx_sig_t *_y, int N, int ord, spx_mem_t *mem)
00464 {
00465 int i,j;
00466 spx_word16_t xi,yi,nyi;
00467 spx_word16_t x[N],y[N];
00468 spx_word16_t *xx, *yy;
00469 xx = x;
00470 yy = y;
00471 for (i=0;i<N;i++)
00472 {
00473 x[i] = EXTRACT16(SHR32(_x[i],SIG_SHIFT));
00474 }
00475
00476 for (i=0;i<ord;i++)
00477 {
00478 spx_word32_t yi = mem[i];
00479 for (j=0;j<i;j++)
00480 {
00481 yi = MAC16_16(yi, num[j], x[i-j-1]);
00482 yi = MAC16_16(yi, den[j], -y[i-j-1]);
00483 }
00484 _y[i] = ADD32(_x[i],SHL32(yi,1));
00485 y[i] = EXTRACT16(SHR32(_y[i],SIG_SHIFT));
00486 }
00487 for (i=ord;i<N;i++)
00488 {
00489 spx_word32_t yi = 0;
00490 for (j=0;j<ord;j++)
00491 {
00492 yi = MAC16_16(yi, num[j], x[i-j-1]);
00493 yi = MAC16_16(yi, den[j], -y[i-j-1]);
00494 }
00495 _y[i] = ADD32(_x[i],SHL32(yi,1));
00496 y[i] = EXTRACT16(SHR32(_y[i],SIG_SHIFT));
00497 }
00498
00499 for (i=0;i<ord;i++)
00500 {
00501 spx_mem_t m = 0;
00502 for (j=0;j<ord-i;j++)
00503 {
00504 m = MAC16_16(m, x[N-1-j], num[j+i]);
00505 m = MAC16_16(m, -y[N-1-j], den[j+i]);
00506 }
00507 mem[i] = m;
00508 }
00509 }
00510 #endif