PruneVarSubMean.cpp
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include "preproc/PruneVarSubMean.h"
00013 #include "preproc/SimplePreProc.h"
00014 #include "features/Features.h"
00015 #include "features/RealFeatures.h"
00016 #include "lib/io.h"
00017 #include "lib/Mathematics.h"
00018
00019 CPruneVarSubMean::CPruneVarSubMean(bool divide)
00020 : CSimplePreProc<DREAL>("PruneVarSubMean","PVSM"), idx(NULL), mean(NULL),
00021 std(NULL), num_idx(0), divide_by_std(divide), initialized(false)
00022 {
00023 }
00024
00025 CPruneVarSubMean::~CPruneVarSubMean()
00026 {
00027 cleanup();
00028 }
00029
00031 bool CPruneVarSubMean::init(CFeatures* p_f)
00032 {
00033 if (!initialized)
00034 {
00035 ASSERT(p_f->get_feature_class()==C_SIMPLE);
00036 ASSERT(p_f->get_feature_type()==F_DREAL);
00037
00038 CRealFeatures *f=(CRealFeatures*) p_f;
00039 INT num_examples=f->get_num_vectors();
00040 INT num_features=((CRealFeatures*)f)->get_num_features();
00041
00042 delete[] mean;
00043 delete[] idx;
00044 delete[] std;
00045 mean=NULL;
00046 idx=NULL;
00047 std=NULL;
00048
00049 mean=new double[num_features];
00050 double* var=new double[num_features];
00051 INT i,j;
00052
00053 for (i=0; i<num_features; i++)
00054 {
00055 mean[i]=0;
00056 var[i]=0 ;
00057 }
00058
00059
00060 for (i=0; i<num_examples; i++)
00061 {
00062 INT len ; bool free ;
00063 DREAL* feature=f->get_feature_vector(i, len, free) ;
00064
00065 for (j=0; j<len; j++)
00066 mean[j]+=feature[j];
00067
00068 f->free_feature_vector(feature, i, free) ;
00069 }
00070
00071 for (j=0; j<num_features; j++)
00072 mean[j]/=num_examples ;
00073
00074
00075 for (i=0; i<num_examples; i++)
00076 {
00077 INT len ; bool free ;
00078 DREAL* feature=f->get_feature_vector(i, len, free) ;
00079
00080 for (j=0; j<num_features; j++)
00081 var[j]+=(mean[j]-feature[j])*(mean[j]-feature[j]) ;
00082
00083 f->free_feature_vector(feature, i, free) ;
00084 }
00085
00086 INT num_ok=0;
00087 INT* idx_ok=new int[num_features];
00088
00089 for (j=0; j<num_features; j++)
00090 {
00091 var[j]/=num_examples ;
00092
00093 if (var[j]>=1e-14)
00094 {
00095 idx_ok[num_ok]=j ;
00096 num_ok++ ;
00097 }
00098 }
00099
00100 SG_INFO( "Reducing number of features from %i to %i\n", num_features, num_ok) ;
00101
00102 delete[] idx ;
00103 idx=new int[num_ok];
00104 DREAL* new_mean=new DREAL[num_ok];
00105 std=new DREAL[num_ok];
00106
00107 for (j=0; j<num_ok; j++)
00108 {
00109 idx[j]=idx_ok[j] ;
00110 new_mean[j]=mean[idx_ok[j]];
00111 std[j]=sqrt(var[idx_ok[j]]);
00112 }
00113 num_idx=num_ok ;
00114 delete[] idx_ok ;
00115 delete[] mean;
00116 delete[] var;
00117 mean=new_mean;
00118
00119 initialized=true;
00120 return true ;
00121 }
00122 else
00123 return false;
00124 }
00125
00127 void CPruneVarSubMean::cleanup()
00128 {
00129 delete[] idx;
00130 idx=NULL;
00131 delete[] mean;
00132 mean=NULL;
00133 delete[] std;
00134 std=NULL;
00135 }
00136
00140 DREAL* CPruneVarSubMean::apply_to_feature_matrix(CFeatures* f)
00141 {
00142 ASSERT(initialized);
00143
00144 INT num_vectors=0;
00145 INT num_features=0;
00146 DREAL* m=((CRealFeatures*) f)->get_feature_matrix(num_features, num_vectors);
00147
00148 SG_INFO( "get Feature matrix: %ix%i\n", num_vectors, num_features);
00149 SG_INFO( "Preprocessing feature matrix\n");
00150 for (INT vec=0; vec<num_vectors; vec++)
00151 {
00152 DREAL* v_src=&m[num_features*vec];
00153 DREAL* v_dst=&m[num_idx*vec];
00154
00155 if (divide_by_std)
00156 {
00157 for (INT feat=0; feat<num_idx; feat++)
00158 v_dst[feat]=(v_src[idx[feat]]-mean[feat])/std[feat];
00159 }
00160 else
00161 {
00162 for (INT feat=0; feat<num_idx; feat++)
00163 v_dst[feat]=(v_src[idx[feat]]-mean[feat]);
00164 }
00165 }
00166
00167 ((CRealFeatures*) f)->set_num_features(num_idx);
00168 ((CRealFeatures*) f)->get_feature_matrix(num_features, num_vectors);
00169 SG_INFO( "new Feature matrix: %ix%i\n", num_vectors, num_features);
00170
00171 return m;
00172 }
00173
00176 DREAL* CPruneVarSubMean::apply_to_feature_vector(DREAL* f, INT &len)
00177 {
00178 DREAL* ret=NULL;
00179
00180 if (initialized)
00181 {
00182 ret=new DREAL[num_idx] ;
00183
00184 if (divide_by_std)
00185 {
00186 for (INT i=0; i<num_idx; i++)
00187 ret[i]=(f[idx[i]]-mean[i])/std[i];
00188 }
00189 else
00190 {
00191 for (INT i=0; i<num_idx; i++)
00192 ret[i]=(f[idx[i]]-mean[i]);
00193 }
00194 len=num_idx ;
00195 }
00196 else
00197 {
00198 ret=new DREAL[len] ;
00199 for (INT i=0; i<len; i++)
00200 ret[i]=f[i];
00201 }
00202
00203 return ret;
00204 }
00205
00207 bool CPruneVarSubMean::load_init_data(FILE* src)
00208 {
00209 bool result=false;
00210 INT divide=0;
00211
00212 ASSERT(fread(÷, sizeof(int), 1, src)==1);
00213 ASSERT(fread(&num_idx, sizeof(int), 1, src)==1);
00214 SG_INFO( "divide:%d num_idx:%d\n", divide, num_idx);
00215 delete[] mean;
00216 delete[] idx;
00217 delete[] std;
00218 idx=new int[num_idx];
00219 mean=new DREAL[num_idx];
00220 std=new DREAL[num_idx];
00221 ASSERT (mean!=NULL && idx!=NULL && std!=NULL);
00222 ASSERT(fread(idx, sizeof(int), num_idx, src)==(UINT) num_idx);
00223 ASSERT(fread(mean, sizeof(DREAL), num_idx, src)==(UINT) num_idx);
00224 ASSERT(fread(std, sizeof(DREAL), num_idx, src)==(UINT) num_idx);
00225
00226 result=true;
00227 divide_by_std=(divide==1);
00228 initialized=true;
00229 return result;
00230 }
00231
00233 bool CPruneVarSubMean::save_init_data(FILE* dst)
00234 {
00235 return false;
00236 }