00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include "features/TOPFeatures.h"
00013 #include "lib/io.h"
00014 #include "lib/Mathematics.h"
00015
00016 CTOPFeatures::CTOPFeatures(INT size, CHMM* p, CHMM* n, bool neglin, bool poslin)
00017 : CRealFeatures(size), neglinear(neglin), poslinear(poslin)
00018 {
00019 memset(&pos_relevant_indizes, 0, sizeof(pos_relevant_indizes));
00020 memset(&neg_relevant_indizes, 0, sizeof(neg_relevant_indizes));
00021 set_models(p,n);
00022 }
00023
00024 CTOPFeatures::CTOPFeatures(const CTOPFeatures &orig)
00025 : CRealFeatures(orig), pos(orig.pos), neg(orig.neg), neglinear(orig.neglinear),
00026 poslinear(orig.poslinear)
00027 {
00028 }
00029
00030 CTOPFeatures::~CTOPFeatures()
00031 {
00032 delete[] pos_relevant_indizes.idx_p;
00033 delete[] pos_relevant_indizes.idx_q;
00034 delete[] pos_relevant_indizes.idx_a_cols;
00035 delete[] pos_relevant_indizes.idx_a_rows;
00036 delete[] pos_relevant_indizes.idx_b_cols;
00037 delete[] pos_relevant_indizes.idx_b_rows;
00038
00039 delete[] neg_relevant_indizes.idx_p;
00040 delete[] neg_relevant_indizes.idx_q;
00041 delete[] neg_relevant_indizes.idx_a_cols;
00042 delete[] neg_relevant_indizes.idx_a_rows;
00043 delete[] neg_relevant_indizes.idx_b_cols;
00044 delete[] neg_relevant_indizes.idx_b_rows;
00045
00046 SG_UNREF(pos);
00047 SG_UNREF(neg);
00048 }
00049
00050 void CTOPFeatures::set_models(CHMM* p, CHMM* n)
00051 {
00052 ASSERT(p && n);
00053 SG_REF(p);
00054 SG_REF(n);
00055
00056 pos=p;
00057 neg=n;
00058 set_num_vectors(0);
00059
00060 delete[] feature_matrix ;
00061 feature_matrix=NULL ;
00062
00063
00064 if (pos && pos->get_observations())
00065 set_num_vectors(pos->get_observations()->get_num_vectors());
00066
00067 compute_relevant_indizes(p, &pos_relevant_indizes);
00068 compute_relevant_indizes(n, &neg_relevant_indizes);
00069 num_features=compute_num_features();
00070
00071 SG_DEBUG( "pos_feat=[%i,%i,%i,%i],neg_feat=[%i,%i,%i,%i] -> %i features\n", pos->get_N(), pos->get_N(), pos->get_N()*pos->get_N(), pos->get_N()*pos->get_M(), neg->get_N(), neg->get_N(), neg->get_N()*neg->get_N(), neg->get_N()*neg->get_M(),num_features) ;
00072 }
00073
00074 DREAL* CTOPFeatures::compute_feature_vector(INT num, INT &len, DREAL* target)
00075 {
00076 DREAL* featurevector=target;
00077
00078 if (!featurevector)
00079 featurevector=new DREAL[get_num_features()];
00080
00081 if (!featurevector)
00082 return NULL;
00083
00084 compute_feature_vector(featurevector, num, len);
00085
00086 return featurevector;
00087 }
00088
00089 void CTOPFeatures::compute_feature_vector(DREAL* featurevector, INT num, INT& len)
00090 {
00091 INT i,j,p=0,x=num;
00092 INT idx=0;
00093
00094 double posx=(poslinear) ? (pos->linear_model_probability(x)) : (pos->model_probability(x));
00095 double negx=(neglinear) ? (neg->linear_model_probability(x)) : (neg->model_probability(x));
00096
00097 len=get_num_features();
00098
00099 featurevector[p++]=(posx-negx);
00100
00101
00102 if (poslinear)
00103 {
00104 for (i=0; i<pos->get_N(); i++)
00105 {
00106 for (j=0; j<pos->get_M(); j++)
00107 featurevector[p++]=exp(pos->linear_model_derivative(i, j, x)-posx);
00108 }
00109 }
00110 else
00111 {
00112 for (idx=0; idx< pos_relevant_indizes.num_p; idx++)
00113 featurevector[p++]=exp(pos->model_derivative_p(pos_relevant_indizes.idx_p[idx], x)-posx);
00114
00115 for (idx=0; idx< pos_relevant_indizes.num_q; idx++)
00116 featurevector[p++]=exp(pos->model_derivative_q(pos_relevant_indizes.idx_q[idx], x)-posx);
00117
00118 for (idx=0; idx< pos_relevant_indizes.num_a; idx++)
00119 featurevector[p++]=exp(pos->model_derivative_a(pos_relevant_indizes.idx_a_rows[idx], pos_relevant_indizes.idx_a_cols[idx], x)-posx);
00120
00121 for (idx=0; idx< pos_relevant_indizes.num_b; idx++)
00122 featurevector[p++]=exp(pos->model_derivative_b(pos_relevant_indizes.idx_b_rows[idx], pos_relevant_indizes.idx_b_cols[idx], x)-posx);
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136 }
00137
00138
00139 if (neglinear)
00140 {
00141 for (i=0; i<neg->get_N(); i++)
00142 {
00143 for (j=0; j<neg->get_M(); j++)
00144 featurevector[p++]= - exp(neg->linear_model_derivative(i, j, x)-negx);
00145 }
00146 }
00147 else
00148 {
00149 for (idx=0; idx< neg_relevant_indizes.num_p; idx++)
00150 featurevector[p++]= - exp(neg->model_derivative_p(neg_relevant_indizes.idx_p[idx], x)-negx);
00151
00152 for (idx=0; idx< neg_relevant_indizes.num_q; idx++)
00153 featurevector[p++]= - exp(neg->model_derivative_q(neg_relevant_indizes.idx_q[idx], x)-negx);
00154
00155 for (idx=0; idx< neg_relevant_indizes.num_a; idx++)
00156 featurevector[p++]= - exp(neg->model_derivative_a(neg_relevant_indizes.idx_a_rows[idx], neg_relevant_indizes.idx_a_cols[idx], x)-negx);
00157
00158 for (idx=0; idx< neg_relevant_indizes.num_b; idx++)
00159 featurevector[p++]= - exp(neg->model_derivative_b(neg_relevant_indizes.idx_b_rows[idx], neg_relevant_indizes.idx_b_cols[idx], x)-negx);
00160
00161
00162
00163
00164
00165
00166
00167
00168
00169
00170
00171
00172 }
00173 }
00174
00175 DREAL* CTOPFeatures::set_feature_matrix()
00176 {
00177 INT len=0;
00178
00179 num_features=get_num_features();
00180 ASSERT(num_features);
00181 ASSERT(pos);
00182 ASSERT(pos->get_observations());
00183
00184 num_vectors=pos->get_observations()->get_num_vectors();
00185 SG_INFO( "allocating top feature cache of size %.2fM\n", sizeof(double)*num_features*num_vectors/1024.0/1024.0);
00186 delete[] feature_matrix;
00187 feature_matrix=new DREAL[num_features*num_vectors];
00188 if (!feature_matrix)
00189 {
00190 SG_ERROR( "allocation not successful!");
00191 return NULL ;
00192 } ;
00193
00194 SG_INFO( "calculating top feature matrix\n");
00195
00196 for (INT x=0; x<num_vectors; x++)
00197 {
00198 if (!(x % (num_vectors/10+1)))
00199 SG_DEBUG( "%02d%%.", (int) (100.0*x/num_vectors));
00200 else if (!(x % (num_vectors/200+1)))
00201 SG_DEBUG( ".");
00202
00203 compute_feature_vector(&feature_matrix[x*num_features], x, len);
00204 }
00205
00206 SG_DONE();
00207
00208 num_vectors=get_num_vectors() ;
00209 num_features=get_num_features() ;
00210
00211 return feature_matrix;
00212 }
00213
00214 bool CTOPFeatures::compute_relevant_indizes(CHMM* hmm, T_HMM_INDIZES* hmm_idx)
00215 {
00216 INT i=0;
00217 INT j=0;
00218
00219 hmm_idx->num_p=0;
00220 hmm_idx->num_q=0;
00221 hmm_idx->num_a=0;
00222 hmm_idx->num_b=0;
00223
00224 for (i=0; i<hmm->get_N(); i++)
00225 {
00226 if (hmm->get_p(i)>CMath::ALMOST_NEG_INFTY)
00227 hmm_idx->num_p++;
00228
00229 if (hmm->get_q(i)>CMath::ALMOST_NEG_INFTY)
00230 hmm_idx->num_q++;
00231
00232 for (j=0; j<hmm->get_N(); j++)
00233 {
00234 if (hmm->get_a(i,j)>CMath::ALMOST_NEG_INFTY)
00235 hmm_idx->num_a++;
00236 }
00237
00238 for (j=0; j<pos->get_M(); j++)
00239 {
00240 if (hmm->get_b(i,j)>CMath::ALMOST_NEG_INFTY)
00241 hmm_idx->num_b++;
00242 }
00243 }
00244
00245 if (hmm_idx->num_p > 0)
00246 {
00247 hmm_idx->idx_p=new INT[hmm_idx->num_p];
00248 ASSERT(hmm_idx->idx_p);
00249 }
00250
00251 if (hmm_idx->num_q > 0)
00252 {
00253 hmm_idx->idx_q=new INT[hmm_idx->num_q];
00254 ASSERT(hmm_idx->idx_q);
00255 }
00256
00257 if (hmm_idx->num_a > 0)
00258 {
00259 hmm_idx->idx_a_rows=new INT[hmm_idx->num_a];
00260 hmm_idx->idx_a_cols=new INT[hmm_idx->num_a];
00261 ASSERT(hmm_idx->idx_a_rows);
00262 ASSERT(hmm_idx->idx_a_cols);
00263 }
00264
00265 if (hmm_idx->num_b > 0)
00266 {
00267 hmm_idx->idx_b_rows=new INT[hmm_idx->num_b];
00268 hmm_idx->idx_b_cols=new INT[hmm_idx->num_b];
00269 ASSERT(hmm_idx->idx_b_rows);
00270 ASSERT(hmm_idx->idx_b_cols);
00271 }
00272
00273
00274 INT idx_p=0;
00275 INT idx_q=0;
00276 INT idx_a=0;
00277 INT idx_b=0;
00278
00279 for (i=0; i<hmm->get_N(); i++)
00280 {
00281 if (hmm->get_p(i)>CMath::ALMOST_NEG_INFTY)
00282 {
00283 ASSERT(idx_p < hmm_idx->num_p);
00284 hmm_idx->idx_p[idx_p++]=i;
00285 }
00286
00287 if (hmm->get_q(i)>CMath::ALMOST_NEG_INFTY)
00288 {
00289 ASSERT(idx_q < hmm_idx->num_q);
00290 hmm_idx->idx_q[idx_q++]=i;
00291 }
00292
00293 for (j=0; j<hmm->get_N(); j++)
00294 {
00295 if (hmm->get_a(i,j)>CMath::ALMOST_NEG_INFTY)
00296 {
00297 ASSERT(idx_a < hmm_idx->num_a);
00298 hmm_idx->idx_a_rows[idx_a]=i;
00299 hmm_idx->idx_a_cols[idx_a++]=j;
00300 }
00301 }
00302
00303 for (j=0; j<pos->get_M(); j++)
00304 {
00305 if (hmm->get_b(i,j)>CMath::ALMOST_NEG_INFTY)
00306 {
00307 ASSERT(idx_b < hmm_idx->num_b);
00308 hmm_idx->idx_b_rows[idx_b]=i;
00309 hmm_idx->idx_b_cols[idx_b++]=j;
00310 }
00311 }
00312 }
00313
00314 return true;
00315 }
00316
00317 INT CTOPFeatures::compute_num_features()
00318 {
00319 INT num=0;
00320
00321 if (pos && neg)
00322 {
00323 num+=1;
00324
00325 if (poslinear)
00326 num+=pos->get_N()*pos->get_M();
00327 else
00328 {
00329 num+= pos_relevant_indizes.num_p + pos_relevant_indizes.num_q + pos_relevant_indizes.num_a + pos_relevant_indizes.num_b;
00330 }
00331
00332 if (neglinear)
00333 num+=neg->get_N()*neg->get_M();
00334 else
00335 {
00336 num+= neg_relevant_indizes.num_p + neg_relevant_indizes.num_q + neg_relevant_indizes.num_a + neg_relevant_indizes.num_b;
00337 }
00338
00339
00340
00341
00342 }
00343 return num;
00344 }