00001
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035 #include <xmmintrin.h>
00036
00037 static inline void _spx_mm_getr_ps (__m128 U, float *__Z, float *__Y, float *__X, float *__W)
00038 {
00039 union {
00040 float __a[4];
00041 __m128 __v;
00042 } __u;
00043
00044 __u.__v = U;
00045
00046 *__Z = __u.__a[0];
00047 *__Y = __u.__a[1];
00048 *__X = __u.__a[2];
00049 *__W = __u.__a[3];
00050
00051 }
00052
00053 #define OVERRIDE_COMPUTE_WEIGHTED_CODEBOOK
00054 static void compute_weighted_codebook(const signed char *shape_cb, const spx_sig_t *_r, float *resp, __m128 *resp2, __m128 *E, int shape_cb_size, int subvect_size, char *stack)
00055 {
00056 int i, j, k;
00057 __m128 resj, EE;
00058 VARDECL(__m128 *r);
00059 VARDECL(__m128 *shape);
00060 ALLOC(r, subvect_size, __m128);
00061 ALLOC(shape, subvect_size, __m128);
00062 for(j=0;j<subvect_size;j++)
00063 r[j] = _mm_load_ps1(_r+j);
00064 for (i=0;i<shape_cb_size;i+=4)
00065 {
00066 float *_res = resp+i*subvect_size;
00067 const signed char *_shape = shape_cb+i*subvect_size;
00068 EE = _mm_setzero_ps();
00069 for(j=0;j<subvect_size;j++)
00070 {
00071 shape[j] = _mm_setr_ps(0.03125*_shape[j], 0.03125*_shape[subvect_size+j], 0.03125*_shape[2*subvect_size+j], 0.03125*_shape[3*subvect_size+j]);
00072 }
00073 for(j=0;j<subvect_size;j++)
00074 {
00075 resj = _mm_setzero_ps();
00076 for (k=0;k<=j;k++)
00077 resj = _mm_add_ps(resj, _mm_mul_ps(shape[k],r[j-k]));
00078 _spx_mm_getr_ps(resj, _res+j, _res+subvect_size+j, _res+2*subvect_size+j, _res+3*subvect_size+j);
00079 *resp2++ = resj;
00080 EE = _mm_add_ps(EE, _mm_mul_ps(resj, resj));
00081 }
00082 E[i>>2] = EE;
00083 }
00084 }