00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include "lib/config.h"
00013 #include "lib/Mathematics.h"
00014
00015 #include <string.h>
00016 #include <stdlib.h>
00017
00018 #ifdef HAVE_LAPACK
00019 #include "lib/lapack.h"
00020
00021 #include "lib/common.h"
00022 #include "preproc/PCACut.h"
00023 #include "preproc/SimplePreProc.h"
00024 #include "features/Features.h"
00025 #include "features/RealFeatures.h"
00026 #include "lib/io.h"
00027
00028 CPCACut::CPCACut(INT do_whitening_, double thresh_)
00029 : CSimplePreProc<DREAL>("PCACut", "PCAC"), T(NULL), num_dim(0), mean(NULL),
00030 initialized(false), do_whitening(do_whitening_), thresh(thresh_)
00031 {
00032 }
00033
00034 CPCACut::~CPCACut()
00035 {
00036 delete[] T;
00037 delete[] mean;
00038 }
00039
00041 bool CPCACut::init(CFeatures* f)
00042 {
00043 if (!initialized)
00044 {
00045 ASSERT(f->get_feature_class()==C_SIMPLE);
00046 ASSERT(f->get_feature_type()==F_DREAL);
00047
00048 SG_INFO("calling CPCACut::init\n") ;
00049 INT num_vectors=((CRealFeatures*)f)->get_num_vectors() ;
00050 INT num_features=((CRealFeatures*)f)->get_num_features() ;
00051 SG_INFO("num_examples: %ld num_features: %ld \n", num_vectors, num_features);
00052 delete[] mean ;
00053 mean=new double[num_features+1] ;
00054
00055 INT i,j;
00056
00058
00059
00060 for (j=0; j<num_features; j++)
00061 {
00062 mean[j]=0 ;
00063 }
00064
00065
00066 for (i=0; i<num_vectors; i++)
00067 {
00068 INT len;
00069 bool free;
00070 DREAL* vec=((CRealFeatures*) f)->get_feature_vector(i, len, free);
00071 for (j=0; j<num_features; j++)
00072 {
00073 mean[j]+= vec[j];
00074 }
00075 ((CRealFeatures*) f)->free_feature_vector(vec, i, free);
00076 }
00077
00078
00079 for (j=0; j<num_features; j++)
00080 mean[j]/=num_vectors;
00081
00082 SG_DONE();
00083 SG_DEBUG("Computing covariance matrix... of size %.2f M\n", num_features*num_features/1024.0/1024.0);
00084 double *cov=new double[num_features*num_features];
00085
00086 for (j=0; j<num_features*num_features; j++)
00087 cov[j]=0.0 ;
00088
00089 for (i=0; i<num_vectors; i++)
00090 {
00091 if (!(i % (num_vectors/10+1)))
00092 SG_PROGRESS(i, 0, num_vectors);
00093
00094 INT len;
00095 bool free;
00096
00097 DREAL* vec=((CRealFeatures*) f)->get_feature_vector(i, len, free) ;
00098
00099 for (INT jj=0; jj<num_features; jj++)
00100 vec[jj]-=mean[jj] ;
00101
00103 cblas_dger(CblasColMajor, num_features,num_features, 1.0, vec, 1,
00104 vec, 1, cov, (int)num_features) ;
00105
00106
00107
00108
00109
00110 ((CRealFeatures*) f)->free_feature_vector(vec, i, free) ;
00111 }
00112
00113 SG_DONE();
00114
00115 for (i=0; i<num_features; i++)
00116 for (j=0; j<num_features; j++)
00117 cov[i*num_features+j]/=num_vectors ;
00118
00119 SG_DONE();
00120
00121 SG_INFO("Computing Eigenvalues ... ") ;
00122 char V='V';
00123 char U='U';
00124
00125
00126
00127
00128
00129
00130
00131
00132 int info;
00133 int ord= (int) num_features;
00134 int lda= (int) num_features;
00135 double* eigenvalues=new double[num_features] ;
00136
00137
00138 for (i=0; i<num_features; i++)
00139 eigenvalues[i]=0;
00140
00141
00142 wrap_dsyev(V, U, ord, cov, lda, eigenvalues, &info);
00143
00144 num_dim=0;
00145 for (i=0; i<num_features; i++)
00146 {
00147
00148 if (eigenvalues[i]>thresh)
00149 num_dim++ ;
00150 } ;
00151
00152 SG_INFO("Done\nReducing from %i to %i features..", num_features, num_dim) ;
00153
00154 delete[] T;
00155 T=new DREAL[num_dim*num_features];
00156 num_old_dim=num_features;
00157
00158 if (do_whitening)
00159 {
00160 INT offs=0 ;
00161 for (i=0; i<num_features; i++)
00162 {
00163 if (eigenvalues[i]>1e-6)
00164 {
00165 for (INT jj=0; jj<num_features; jj++)
00166 T[offs+jj*num_dim]=cov[num_features*i+jj]/sqrt(eigenvalues[i]) ;
00167 offs++ ;
00168 } ;
00169 }
00170 } ;
00171
00172 delete[] eigenvalues;
00173 delete[] cov;
00174 initialized=true;
00175 SG_INFO("Done\n") ;
00176 return true ;
00177 }
00178 return
00179 false;
00180 }
00181
00183 void CPCACut::cleanup()
00184 {
00185 delete[] T ;
00186 T=NULL ;
00187 }
00188
00192 DREAL* CPCACut::apply_to_feature_matrix(CFeatures* f)
00193 {
00194 INT num_vectors=0;
00195 INT num_features=0;
00196
00197 DREAL* m=((CRealFeatures*) f)->get_feature_matrix(num_features, num_vectors);
00198 SG_INFO("get Feature matrix: %ix%i\n", num_vectors, num_features) ;
00199
00200 if (m)
00201 {
00202 SG_INFO("Preprocessing feature matrix\n");
00203 DREAL* res= new DREAL[num_dim];
00204 double* sub_mean= new double[num_features];
00205
00206 for (INT vec=0; vec<num_vectors; vec++)
00207 {
00208 INT i;
00209
00210 for (i=0; i<num_features; i++)
00211 sub_mean[i]=m[num_features*vec+i]-mean[i] ;
00212
00213 cblas_dgemv(CblasColMajor, CblasNoTrans, num_dim, num_features, 1.0,
00214 T, num_dim, sub_mean, 1, 0, res, 1);
00215
00216 DREAL* m_transformed=&m[num_dim*vec];
00217 for (i=0; i<num_dim; i++)
00218 m_transformed[i]=m[i];
00219 }
00220 delete[] res;
00221 delete[] sub_mean;
00222
00223 ((CRealFeatures*) f)->set_num_features(num_dim);
00224 ((CRealFeatures*) f)->get_feature_matrix(num_features, num_vectors);
00225 SG_INFO("new Feature matrix: %ix%i\n", num_vectors, num_features);
00226 }
00227
00228 return m;
00229 }
00230
00233 DREAL* CPCACut::apply_to_feature_vector(DREAL* f, INT &len)
00234 {
00235 DREAL *ret=new DREAL[num_dim];
00236 DREAL *sub_mean=new DREAL[len];
00237 for (INT i=0; i<len; i++)
00238 sub_mean[i]=f[i]-mean[i];
00239
00240 cblas_dgemv(CblasColMajor, CblasNoTrans, num_dim, len, 1.0 , T, num_dim, sub_mean, 1, 0, ret, 1) ;
00241
00242
00243
00244
00245
00246
00247
00248 delete[] sub_mean ;
00249 len=num_dim ;
00250
00251 return ret;
00252 }
00253
00255 bool CPCACut::load_init_data(FILE* src)
00256 {
00257 ASSERT(fread(&num_dim, sizeof(int), 1, src)==1);
00258 ASSERT(fread(&num_old_dim, sizeof(int), 1, src)==1);
00259 delete[] mean;
00260 delete[] T;
00261 mean=new double[num_dim];
00262 T=new double[num_dim*num_old_dim];
00263 ASSERT (mean!=NULL && T!=NULL);
00264 ASSERT(fread(mean, sizeof(double), num_old_dim, src)==(UINT) num_old_dim);
00265 ASSERT(fread(T, sizeof(double), num_dim*num_old_dim, src)==(UINT) num_old_dim*num_dim);
00266 return true;
00267 }
00268
00270 bool CPCACut::save_init_data(FILE* dst)
00271 {
00272 ASSERT(fwrite(&num_dim, sizeof(int), 1, dst)==1);
00273 ASSERT(fwrite(&num_old_dim, sizeof(int), 1, dst)==1);
00274 ASSERT(fwrite(mean, sizeof(double), num_old_dim, dst)==(UINT) num_old_dim);
00275 ASSERT(fwrite(T, sizeof(double), num_dim*num_old_dim, dst)==(UINT) num_old_dim*num_dim);
00276 return true;
00277 }
00278 #endif // HAVE_LAPACK