cb_search_sse.h

Go to the documentation of this file.
00001 /* Copyright (C) 2004 Jean-Marc Valin */
00006 /*
00007    Redistribution and use in source and binary forms, with or without
00008    modification, are permitted provided that the following conditions
00009    are met:
00010    
00011    - Redistributions of source code must retain the above copyright
00012    notice, this list of conditions and the following disclaimer.
00013    
00014    - Redistributions in binary form must reproduce the above copyright
00015    notice, this list of conditions and the following disclaimer in the
00016    documentation and/or other materials provided with the distribution.
00017    
00018    - Neither the name of the Xiph.org Foundation nor the names of its
00019    contributors may be used to endorse or promote products derived from
00020    this software without specific prior written permission.
00021    
00022    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00023    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00024    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
00025    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
00026    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00027    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00028    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00029    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00030    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00031    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00032    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033 */
00034 
00035 #include <xmmintrin.h>
00036 
00037 static inline void _spx_mm_getr_ps (__m128 U, float *__Z, float *__Y, float *__X, float *__W)
00038 {
00039   union {
00040     float __a[4];
00041     __m128 __v;
00042   } __u;
00043   
00044   __u.__v = U;
00045 
00046   *__Z = __u.__a[0];
00047   *__Y = __u.__a[1];
00048   *__X = __u.__a[2];
00049   *__W = __u.__a[3];
00050 
00051 }
00052 
00053 #define OVERRIDE_COMPUTE_WEIGHTED_CODEBOOK
00054 static void compute_weighted_codebook(const signed char *shape_cb, const spx_sig_t *_r, float *resp, __m128 *resp2, __m128 *E, int shape_cb_size, int subvect_size, char *stack)
00055 {
00056    int i, j, k;
00057    __m128 resj, EE;
00058    VARDECL(__m128 *r);
00059    VARDECL(__m128 *shape);
00060    ALLOC(r, subvect_size, __m128);
00061    ALLOC(shape, subvect_size, __m128);
00062    for(j=0;j<subvect_size;j++)
00063       r[j] = _mm_load_ps1(_r+j);
00064    for (i=0;i<shape_cb_size;i+=4)
00065    {
00066       float *_res = resp+i*subvect_size;
00067       const signed char *_shape = shape_cb+i*subvect_size;
00068       EE = _mm_setzero_ps();
00069       for(j=0;j<subvect_size;j++)
00070       {
00071          shape[j] = _mm_setr_ps(0.03125*_shape[j], 0.03125*_shape[subvect_size+j], 0.03125*_shape[2*subvect_size+j], 0.03125*_shape[3*subvect_size+j]);
00072       }
00073       for(j=0;j<subvect_size;j++)
00074       {
00075          resj = _mm_setzero_ps();
00076          for (k=0;k<=j;k++)
00077             resj = _mm_add_ps(resj, _mm_mul_ps(shape[k],r[j-k]));
00078          _spx_mm_getr_ps(resj, _res+j, _res+subvect_size+j, _res+2*subvect_size+j, _res+3*subvect_size+j);
00079          *resp2++ = resj;
00080          EE = _mm_add_ps(EE, _mm_mul_ps(resj, resj));
00081       }
00082       E[i>>2] = EE;
00083    }
00084 }

Generated on Mon Jun 19 22:40:58 2006 for speex by  doxygen 1.4.6