filters_bfin.h

Go to the documentation of this file.
00001 /* Copyright (C) 2005 Analog Devices */
00006 /*
00007    Redistribution and use in source and binary forms, with or without
00008    modification, are permitted provided that the following conditions
00009    are met:
00010    
00011    - Redistributions of source code must retain the above copyright
00012    notice, this list of conditions and the following disclaimer.
00013    
00014    - Redistributions in binary form must reproduce the above copyright
00015    notice, this list of conditions and the following disclaimer in the
00016    documentation and/or other materials provided with the distribution.
00017    
00018    - Neither the name of the Xiph.org Foundation nor the names of its
00019    contributors may be used to endorse or promote products derived from
00020    this software without specific prior written permission.
00021    
00022    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00023    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00024    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
00025    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
00026    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00027    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00028    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00029    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00030    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00031    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00032    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033 */
00034 
00035 #include <stdio.h>
00036 
00037 #define OVERRIDE_NORMALIZE16
00038 int normalize16(const spx_sig_t *x, spx_word16_t *y, spx_sig_t max_scale, int len)
00039 {
00040    spx_sig_t max_val=1;
00041    int sig_shift;
00042 
00043    __asm__ 
00044    (
00045    "%0 = 0;\n\t"
00046    "I0 = %1;\n\t"
00047    "L0 = 0;\n\t"
00048    "R1 = [I0++];\n\t"
00049    "LOOP norm_max%= LC0 = %2;\n\t"
00050    "LOOP_BEGIN norm_max%=;\n\t"
00051       "R2 = ABS R1 || R1 = [I0++];\n\t"
00052       "%0 = MAX(%0, R2);\n\t"
00053    "LOOP_END norm_max%=;\n\t"
00054    : "=&d" (max_val)
00055    : "a" (x), "a" (len)
00056    : "R1", "R2"
00057    );
00058 
00059    sig_shift=0;
00060    while (max_val>max_scale)
00061    {
00062       sig_shift++;
00063       max_val >>= 1;
00064    }
00065 
00066    __asm__ __volatile__ 
00067    (
00068    "I0 = %0;\n\t"
00069    "L0 = 0;\n\t"
00070    "I1 = %1;\n\t"
00071    "L1 = 0;\n\t"
00072    "R0 = [I0++];\n\t"
00073    "LOOP norm_shift%= LC0 = %3 >> 1;\n\t"
00074    "LOOP_BEGIN norm_shift%=;\n\t"
00075       "R1 = ASHIFT R0 by %2.L || R2 = [I0++];\n\t"
00076       "R3 = ASHIFT R2 by %2.L || R0 = [I0++];\n\t"
00077       "R3 = PACK(R3.L, R1.L);\n\t"
00078       "[I1++] = R3;\n\t"
00079    "LOOP_END norm_shift%=;\n\t"
00080    : : "a" (x), "a" (y), "d" (-sig_shift), "a" (len)
00081    : "I0", "L0", "I1", "L1", "R0", "R1", "R2", "R3", "memory"
00082    );
00083    return sig_shift;
00084 }
00085 
00086 #define OVERRIDE_FILTER_MEM2
00087 void filter_mem2(const spx_sig_t *_x, const spx_coef_t *num, const spx_coef_t *den, spx_sig_t *_y, int N, int ord, spx_mem_t *mem)
00088 {
00089    spx_word32_t xy2[N+1];
00090    spx_word32_t *xy = xy2+1;
00091    spx_word32_t numden_a[2*ord+2];
00092    spx_word16_t *numden = (spx_word16_t*) numden_a;
00093    int i;
00094    for (i=0;i<ord;i++)
00095    {
00096       numden[2*i] = num[i];
00097       numden[2*i+1] = den[i];
00098    }
00099    __asm__ __volatile__
00100    (
00101    /* Register setup */
00102    "R0 = %5;\n\t"      /*ord */
00103    
00104    "P0 = %3;\n\t"
00105    "I0 = P0;\n\t"
00106    "B0 = P0;\n\t"
00107    "L0 = 0;\n\t"
00108       
00109    "P2 = %0;\n\t"
00110    "I2 = P2;\n\t"
00111    "L2 = 0;\n\t"
00112    
00113    "P4 = %6;\n\t"
00114    "P0 = %1;\n\t"
00115    "P1 = %2;\n\t"
00116    
00117    /* First sample */
00118    "R1 = [P4++];\n\t"
00119    "R1 <<= 1;\n\t"
00120    "R2 = [P0++];\n\t"
00121    "R1 = R1 + R2;\n\t"
00122    "[P1++] = R1;\n\t"
00123    "R1 <<= 2;\n\t"
00124    "R2 <<= 2;\n\t"
00125    "R2 = PACK(R1.H, R2.H);\n\t"
00126    "[P2] = R2;\n\t"
00127                
00128    /* Samples 1 to ord-1 (using memory) */
00129    "R0 += -1;\n\t"
00130    "R3 = 0;\n\t"
00131    "LC0 = R0;\n\t"
00132    "LOOP filter_start%= LC0;\n\t"
00133    "LOOP_BEGIN filter_start%=;\n\t"
00134       "R3 += 1;\n\t"
00135       "LC1 = R3;\n\t"
00136       
00137       "R1 = [P4++];\n\t"
00138       "A1 = R1;\n\t"
00139       "A0 = 0;\n\t"
00140       "I0 = B0;\n\t"
00141       "I2 = P2;\n\t"
00142       "P2 += 4;\n\t"
00143       "R4 = [I0++] || R5 = [I2--];\n\t"
00144       "LOOP filter_start_inner%= LC1;\n\t"
00145       "LOOP_BEGIN filter_start_inner%=;\n\t"
00146          "A0 += R4.L*R5.L (IS), A1 -= R4.H*R5.H (IS) || R4 = [I0++] || R5 = [I2--];\n\t"
00147       "LOOP_END filter_start_inner%=;\n\t"
00148       "A0 += A1;\n\t"
00149       "R4 = A0;\n\t"
00150       "R4 <<= 1;\n\t"
00151       "R2 = [P0++];\n\t"
00152       "R4 = R4 + R2;\n\t"
00153       "[P1++] = R4;\n\t"
00154       "R4 <<= 2;\n\t"
00155       "R2 <<= 2;\n\t"
00156       "R2 = PACK(R4.H, R2.H);\n\t"
00157       "[P2] = R2;\n\t"
00158 
00159    "LOOP_END filter_start%=;\n\t"
00160 
00161    /* Samples ord to N*/   
00162    "R0 = %5;\n\t"
00163    "R0 <<= 1;\n\t"
00164    "I0 = B0;\n\t"
00165    "R0 <<= 1;\n\t"   
00166    "L0 = R0;\n\t"
00167    
00168    "R0 = %5;\n\t"
00169    "R2 = %4;\n\t"
00170    "R2 = R2 - R0;\n\t"
00171    "R4 = [I0++];\n\t"
00172    "LC0 = R2;\n\t"
00173    "P3 = R0;\n\t"
00174    "R0 <<= 2;\n\t"
00175    "R0 += 8;\n\t"
00176    "I2 = P2;\n\t"
00177    "M0 = R0;\n\t"
00178    "A0 = A1 = 0;\n\t"
00179    "R5 = [I2--];\n\t"
00180    "LOOP filter_mid%= LC0;\n\t"
00181    "LOOP_BEGIN filter_mid%=;\n\t"
00182       "LOOP filter_mid_inner%= LC1=P3;\n\t"
00183       "LOOP_BEGIN filter_mid_inner%=;\n\t"
00184          "A0 += R4.L*R5.L (IS), A1 -= R4.H*R5.H (IS) || R4 = [I0++] || R5 = [I2--];\n\t"
00185       "LOOP_END filter_mid_inner%=;\n\t"
00186       "R0 = (A0 += A1) || I2 += M0;\n\t"
00187       "R0 = R0 << 1 || R5 = [P0++];\n\t"
00188       "R0 = R0 + R5;\n\t"
00189       "R0 = R0 << 2 || [P1++] = R0;\n\t"
00190       "R5 = R5 << 2;\n\t"
00191       "R5 = PACK(R0.H, R5.H);\n\t"
00192       "A0 = A1 = 0 || [I2--] = R5\n\t"
00193       "LOOP_END filter_mid%=;\n\t"
00194    "I2 += 4;\n\t"
00195    "P2 = I2;\n\t"
00196    /* Update memory */
00197    "P4 = %6;\n\t"
00198    "R0 = %5;\n\t"
00199    "LC0 = R0;\n\t"
00200    "P0 = B0;\n\t"
00201    "A0 = A1 = 0;\n\t"
00202    "LOOP mem_update%= LC0;\n\t"
00203    "LOOP_BEGIN mem_update%=;\n\t"
00204       "I2 = P2;\n\t"
00205       "I0 = P0;\n\t"
00206       "P0 += 4;\n\t"
00207       "R0 = LC0;\n\t"
00208       "LC1 = R0;\n\t"
00209       "R5 = [I2--] || R4 = [I0++];\n\t"
00210       "LOOP mem_accum%= LC1;\n\t"
00211       "LOOP_BEGIN mem_accum%=;\n\t"
00212          "A0 += R4.L*R5.L (IS), A1 -= R4.H*R5.H (IS) || R4 = [I0++] || R5 = [I2--];\n\t"
00213       "LOOP_END mem_accum%=;\n\t"
00214       "R0 = (A0 += A1);\n\t"
00215       "A0 = A1 = 0 || [P4++] = R0;\n\t"
00216    "LOOP_END mem_update%=;\n\t"
00217    "L0 = 0;\n\t"
00218    : : "m" (xy), "m" (_x), "m" (_y), "m" (numden), "m" (N), "m" (ord), "m" (mem)
00219    : "A0", "A1", "R0", "R1", "R2", "R3", "R4", "R5", "P0", "P1", "P2", "P3", "P4", "B0", "I0", "I2", "L0", "L2", "M0", "memory"
00220    );
00221 
00222 }
00223 
00224 
00225 
00226 
00227 #define OVERRIDE_IIR_MEM2
00228 void iir_mem2(const spx_sig_t *_x, const spx_coef_t *den, spx_sig_t *_y, int N, int ord, spx_mem_t *mem)
00229 {
00230    spx_word16_t y[N+2];
00231    spx_word16_t *yy;
00232    yy = y+2;
00233    __asm__ __volatile__
00234    (
00235    /* Register setup */
00236    "R0 = %5;\n\t"      /*ord */
00237    
00238    "P1 = %3;\n\t"
00239    "I1 = P1;\n\t"
00240    "B1 = P1;\n\t"
00241    "L1 = 0;\n\t"
00242    
00243    "P3 = %0;\n\t"
00244    "I3 = P3;\n\t"
00245    "L3 = 0;\n\t"
00246    
00247    "P4 = %6;\n\t"
00248    "P0 = %1;\n\t"
00249    "P1 = %2;\n\t"
00250    
00251    /* First sample */
00252    "R1 = [P4++];\n\t"
00253    "R1 <<= 1;\n\t"
00254    "R2 = [P0++];\n\t"
00255    "R1 = R1 + R2;\n\t"
00256    "[P1++] = R1;\n\t"
00257    "R1 <<= 2;\n\t"
00258    "W[P3] = R1.H;\n\t"
00259    "R2 <<= 2;\n\t"
00260 
00261    /* Samples 1 to ord-1 (using memory) */
00262    "R0 += -1;\n\t"
00263    "R3 = 0;\n\t"
00264    "LC0 = R0;\n\t"
00265    "LOOP filter_start%= LC0;\n\t"
00266    "LOOP_BEGIN filter_start%=;\n\t"
00267       "R3 += 1;\n\t"
00268       "LC1 = R3;\n\t"
00269       
00270       "R1 = [P4++];\n\t"
00271       "A1 = R1;\n\t"
00272       "I1 = B1;\n\t"
00273       "I3 = P3;\n\t"
00274       "P3 += 2;\n\t"
00275       "LOOP filter_start_inner%= LC1;\n\t"
00276       "LOOP_BEGIN filter_start_inner%=;\n\t"
00277          "R4.L = W[I1++];\n\t"
00278          "R5.L = W[I3--];\n\t"
00279          "A1 -= R4.L*R5.L (IS);\n\t"
00280       "LOOP_END filter_start_inner%=;\n\t"
00281    
00282       "R1 = A1;\n\t"
00283       "R1 <<= 1;\n\t"
00284       "R2 = [P0++];\n\t"
00285       "R1 = R1 + R2;\n\t"
00286       "[P1++] = R1;\n\t"
00287       "R1 <<= 2;\n\t"
00288       "W[P3] = R1.H;\n\t"
00289       "R2 <<= 2;\n\t"
00290    "LOOP_END filter_start%=;\n\t"
00291 
00292    /* Samples ord to N*/   
00293    "R0 = %5;\n\t"
00294    "R0 <<= 1;\n\t"
00295    "I1 = B1;\n\t"
00296    "L1 = R0;\n\t"
00297    
00298    "R0 = %5;\n\t"
00299    "R2 = %4;\n\t"
00300    "R2 = R2 - R0;\n\t"
00301    "R4.L = W[I1++];\n\t"
00302    "LC0 = R2;\n\t"
00303    "LOOP filter_mid%= LC0;\n\t"
00304    "LOOP_BEGIN filter_mid%=;\n\t"
00305       "LC1 = R0;\n\t"
00306       "A1 = 0;\n\t"
00307       "I3 = P3;\n\t"
00308       "P3 += 2;\n\t"
00309       "R5.L = W[I3--];\n\t"
00310       "LOOP filter_mid_inner%= LC1;\n\t"
00311       "LOOP_BEGIN filter_mid_inner%=;\n\t"
00312          "A1 -= R4.L*R5.L (IS) || R4.L = W[I1++] || R5.L = W[I3--];\n\t"
00313       "LOOP_END filter_mid_inner%=;\n\t"
00314       "R1 = A1;\n\t"
00315       "R1 = R1 << 1 || R2 = [P0++];\n\t"
00316       "R1 = R1 + R2;\n\t"
00317       "R1 = R1 << 2 || [P1++] = R1;\n\t"
00318       "W[P3] = R1.H;\n\t"
00319    "LOOP_END filter_mid%=;\n\t"
00320      
00321    /* Update memory */
00322    "P4 = %6;\n\t"
00323    "R0 = %5;\n\t"
00324    "LC0 = R0;\n\t"
00325    "P1 = B1;\n\t"
00326    "LOOP mem_update%= LC0;\n\t"
00327    "LOOP_BEGIN mem_update%=;\n\t"
00328       "A0 = 0;\n\t"
00329       "I3 = P3;\n\t"
00330       "I1 = P1;\n\t"
00331       "P1 += 2;\n\t"
00332       "R0 = LC0;\n\t"
00333       "LC1=R0;\n\t"
00334       "R5.L = W[I3--] || R4.L = W[I1++];\n\t"
00335       "LOOP mem_accum%= LC1;\n\t"
00336       "LOOP_BEGIN mem_accum%=;\n\t"
00337          "A0 -= R4.L*R5.L (IS) || R4.L = W[I1++] || R5.L = W[I3--];\n\t"
00338       "LOOP_END mem_accum%=;\n\t"
00339       "R0 = A0;\n\t"
00340       "[P4++] = R0;\n\t"
00341    "LOOP_END mem_update%=;\n\t"
00342    "L1 = 0;\n\t"
00343    : : "m" (yy), "m" (_x), "m" (_y), "m" (den), "m" (N), "m" (ord), "m" (mem)
00344    : "A0", "A1", "R0", "R1", "R2", "R3", "R4", "R5", "P0", "P1", "P2", "P3", "P4", "B1", "I1", "I3", "L1", "L3", "memory"
00345    );
00346 
00347 }
00348 
00349 #define OVERRIDE_FIR_MEM2
00350 void fir_mem2(const spx_sig_t *x, const spx_coef_t *num, spx_sig_t *y, int N, int ord, spx_mem_t *mem)
00351 {
00352    int i;
00353    spx_coef_t den2[12];
00354    spx_coef_t *den;
00355    den = (spx_coef_t*)((((int)den2)+4)&0xfffffffc);
00356    for (i=0;i<10;i++)
00357       den[i] = 0;
00358    filter_mem2(x, num, den, y, N, ord, mem);
00359 }
00360 
00361 
00362 #define OVERRIDE_COMPUTE_IMPULSE_RESPONSE
00363 void compute_impulse_response(const spx_coef_t *ak, const spx_coef_t *awk1, const spx_coef_t *awk2, spx_word16_t *y, int N, int ord, char *stack)
00364 {
00365    int i;
00366    VARDECL(spx_word16_t *ytmp);
00367    ALLOC(ytmp, N, spx_word16_t);
00368    spx_word16_t *ytmp2 = ytmp;
00369    y[0] = LPC_SCALING;
00370    for (i=0;i<ord;i++)
00371       y[i+1] = awk1[i];
00372    i++;
00373    for (;i<N;i++)
00374       y[i] = 0;
00375 
00376    N-=1;
00377    __asm__ __volatile__
00378    (
00379          "I0 = %0;\n\t"
00380          "I1 = %1;\n\t"
00381          "L0 = 0;\n\t"
00382          "L1 = 0;\n\t"
00383          "L2 = 0;\n\t"
00384          "L3 = 0;\n\t"
00385          "R0 = 1;\n\t"
00386          "R0 <<= 13;\n\t"
00387          "W[I0] = R0.L;\n\t"
00388          "R0 <<= 1;\n\t"
00389          "W[I1] = R0.L;\n\t"
00390          "R0 = %5;\n\t"
00391          "LC0 = R0;\n\t"
00392          "R2 = 0;\n\t"
00393          "LOOP samples%= LC0;\n\t"
00394          "LOOP_BEGIN samples%=;\n\t"
00395             "R2 += 1;\n\t"
00396             "R2 = MIN(R2, %4);\n\t"
00397             "I0 = %0;\n\t"
00398             "I1 = %1;\n\t"
00399             "I2 = %2;\n\t"
00400             "I3 = %3;\n\t"
00401             "%0 += 2;\n\t"
00402             "%1 += 2;\n\t"
00403             "A0 = A1 = 0;\n\t"
00404             "R0.L = W[I0--] || R1.L = W[I2++];\n\t"
00405             "LC1 = R2;\n\t"
00406             "LOOP filter%= LC1;\n\t"
00407             "LOOP_BEGIN filter%=;\n\t"
00408                "A0 -= R0.L*R1.L (IS) || R0.L = W[I1--] || R1.L = W[I3++];\n\t"
00409                "A1 -= R0.L*R1.L (IS) || R0.L = W[I0--] || R1.L = W[I2++];\n\t"
00410             "LOOP_END filter%=;\n\t"
00411             "R0 = A0, R1 = A1;\n\t"
00412             "R3 = W[%1] (X);\n\t"
00413             "R3 <<= 13;\n\t"
00414             "R0 = R0 + R3;\n\t"
00415             "R3 = R0 >>> 13;\n\t"
00416             "W[%0] = R3.L;\n\t"
00417             "R0 <<= 1;\n\t"
00418             "R1 = R1 + R0;\n\t"
00419             "R1 >>>= 13;\n\t"
00420             "W[%1] = R1.L;\n\t"
00421          "LOOP_END samples%=;\n\t"
00422    : "=a" (ytmp2), "=a" (y)
00423    : "a" (awk2), "a" (ak), "d" (ord), "m" (N), "0" (ytmp2), "1" (y)
00424    : "A0", "A1", "R0", "R1", "R2", "R3", "I0", "I1", "I2", "I3", "L0", "L1", "L2", "L3", "A0", "A1"
00425    );
00426 }
00427 
00428 
00429 
00430 #if 0 /* Equivalent C function for filter_mem2 and compute_impulse_response */
00431 #define min(a,b) ((a)<(b) ? (a):(b))
00432 
00433 void compute_impulse_response(const spx_coef_t *ak, const spx_coef_t *awk1, const spx_coef_t *awk2, spx_word16_t *y, int N, int ord, char *stack)
00434 {
00435    int i,j;
00436    VARDECL(spx_word16_t *ytmp);
00437    ALLOC(ytmp, N, spx_word16_t);
00438    
00439    y[0] = LPC_SCALING;
00440    for (i=0;i<ord;i++)
00441       y[i+1] = awk1[i];
00442    i++;
00443    for (;i<N;i++)
00444       y[i] = 0;
00445 
00446    for (i=0;i<N;i++)
00447    {
00448       spx_word32_t yi = SHL32(EXTEND32(y[i]),LPC_SHIFT);
00449       spx_word32_t yi2 = 0;
00450       for (j=0;j<min(i,ord);j++)
00451       {
00452          yi = MAC16_16(yi, awk2[j], -ytmp[i-j-1]);
00453          yi2 = MAC16_16(yi2, ak[j], -y[i-j-1]);
00454       }
00455       ytmp[i] = EXTRACT16(SHR32(yi,LPC_SHIFT));
00456       yi2 = ADD32(yi2,SHL32(yi,1));
00457       y[i] = EXTRACT16(SHR32(yi2,LPC_SHIFT));
00458    }
00459 
00460 }
00461 
00462 
00463 void filter_mem2(const spx_sig_t *_x, const spx_coef_t *num, const spx_coef_t *den, spx_sig_t *_y, int N, int ord, spx_mem_t *mem)
00464 {
00465    int i,j;
00466    spx_word16_t xi,yi,nyi;
00467    spx_word16_t x[N],y[N];
00468    spx_word16_t *xx, *yy;
00469    xx = x;
00470    yy = y;
00471    for (i=0;i<N;i++)
00472    {
00473       x[i] = EXTRACT16(SHR32(_x[i],SIG_SHIFT));
00474    }
00475    
00476    for (i=0;i<ord;i++)
00477    {
00478       spx_word32_t yi = mem[i];
00479       for (j=0;j<i;j++)
00480       {
00481          yi = MAC16_16(yi, num[j], x[i-j-1]);
00482          yi = MAC16_16(yi, den[j], -y[i-j-1]);
00483       }
00484       _y[i] = ADD32(_x[i],SHL32(yi,1));
00485       y[i] = EXTRACT16(SHR32(_y[i],SIG_SHIFT));
00486    }
00487    for (i=ord;i<N;i++)
00488    {
00489       spx_word32_t yi = 0;
00490       for (j=0;j<ord;j++)
00491       {
00492          yi = MAC16_16(yi, num[j], x[i-j-1]);
00493          yi = MAC16_16(yi, den[j], -y[i-j-1]);
00494       }
00495       _y[i] = ADD32(_x[i],SHL32(yi,1));
00496       y[i] = EXTRACT16(SHR32(_y[i],SIG_SHIFT));
00497    }
00498 
00499    for (i=0;i<ord;i++)
00500    {
00501       spx_mem_t m = 0;
00502       for (j=0;j<ord-i;j++)
00503       {
00504          m = MAC16_16(m, x[N-1-j], num[j+i]);
00505          m = MAC16_16(m, -y[N-1-j], den[j+i]);
00506       }
00507       mem[i] = m;
00508    }
00509 }
00510 #endif

Generated on Fri Dec 9 04:54:40 2005 for speex by  doxygen 1.4.5