TOPFeatures.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2008 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Copyright (C) 1999-2008 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #include "features/TOPFeatures.h"
00013 #include "lib/io.h"
00014 #include "lib/Mathematics.h"
00015 
00016 CTOPFeatures::CTOPFeatures(
00017     int32_t size, CHMM* p, CHMM* n, bool neglin, bool poslin)
00018 : CRealFeatures(size), neglinear(neglin), poslinear(poslin)
00019 {
00020     memset(&pos_relevant_indizes, 0, sizeof(pos_relevant_indizes));
00021     memset(&neg_relevant_indizes, 0, sizeof(neg_relevant_indizes));
00022     set_models(p,n);
00023 }
00024 
00025 CTOPFeatures::CTOPFeatures(const CTOPFeatures &orig)
00026 : CRealFeatures(orig), pos(orig.pos), neg(orig.neg), neglinear(orig.neglinear),
00027     poslinear(orig.poslinear)
00028 {
00029 }
00030 
00031 CTOPFeatures::~CTOPFeatures()
00032 {
00033     delete[] pos_relevant_indizes.idx_p;
00034     delete[] pos_relevant_indizes.idx_q;
00035     delete[] pos_relevant_indizes.idx_a_cols;
00036     delete[] pos_relevant_indizes.idx_a_rows;
00037     delete[] pos_relevant_indizes.idx_b_cols;
00038     delete[] pos_relevant_indizes.idx_b_rows;
00039 
00040     delete[] neg_relevant_indizes.idx_p;
00041     delete[] neg_relevant_indizes.idx_q;
00042     delete[] neg_relevant_indizes.idx_a_cols;
00043     delete[] neg_relevant_indizes.idx_a_rows;
00044     delete[] neg_relevant_indizes.idx_b_cols;
00045     delete[] neg_relevant_indizes.idx_b_rows;
00046 
00047     SG_UNREF(pos);
00048     SG_UNREF(neg);
00049 }
00050 
00051 void CTOPFeatures::set_models(CHMM* p, CHMM* n)
00052 {
00053     ASSERT(p && n);
00054     SG_REF(p);
00055     SG_REF(n);
00056 
00057     pos=p; 
00058     neg=n;
00059     set_num_vectors(0);
00060 
00061     delete[] feature_matrix  ;
00062     feature_matrix=NULL ;
00063 
00064 
00065     if (pos && pos->get_observations())
00066         set_num_vectors(pos->get_observations()->get_num_vectors());
00067 
00068     compute_relevant_indizes(p, &pos_relevant_indizes);
00069     compute_relevant_indizes(n, &neg_relevant_indizes);
00070     num_features=compute_num_features();
00071 
00072     SG_DEBUG( "pos_feat=[%i,%i,%i,%i],neg_feat=[%i,%i,%i,%i] -> %i features\n", pos->get_N(), pos->get_N(), pos->get_N()*pos->get_N(), pos->get_N()*pos->get_M(), neg->get_N(), neg->get_N(), neg->get_N()*neg->get_N(), neg->get_N()*neg->get_M(),num_features) ;
00073 }
00074 
00075 float64_t* CTOPFeatures::compute_feature_vector(
00076     int32_t num, int32_t &len, float64_t* target)
00077 {
00078     float64_t* featurevector=target;
00079 
00080     if (!featurevector) 
00081         featurevector=new float64_t[get_num_features()];
00082 
00083     if (!featurevector)
00084         return NULL;
00085 
00086     compute_feature_vector(featurevector, num, len);
00087 
00088     return featurevector;
00089 }
00090 
00091 void CTOPFeatures::compute_feature_vector(
00092     float64_t* featurevector, int32_t num, int32_t& len)
00093 {
00094     int32_t i,j,p=0,x=num;
00095     int32_t idx=0;
00096 
00097     float64_t posx=(poslinear) ?
00098         (pos->linear_model_probability(x)) : (pos->model_probability(x));
00099     float64_t negx=(neglinear) ?
00100         (neg->linear_model_probability(x)) : (neg->model_probability(x));
00101 
00102     len=get_num_features();
00103 
00104     featurevector[p++]=(posx-negx);
00105 
00106     //first do positive model
00107     if (poslinear)
00108     {
00109         for (i=0; i<pos->get_N(); i++)
00110         {
00111             for (j=0; j<pos->get_M(); j++)
00112                 featurevector[p++]=exp(pos->linear_model_derivative(i, j, x)-posx);
00113         }
00114     }
00115     else
00116     {
00117         for (idx=0; idx< pos_relevant_indizes.num_p; idx++)
00118             featurevector[p++]=exp(pos->model_derivative_p(pos_relevant_indizes.idx_p[idx], x)-posx);
00119 
00120         for (idx=0; idx< pos_relevant_indizes.num_q; idx++)
00121             featurevector[p++]=exp(pos->model_derivative_q(pos_relevant_indizes.idx_q[idx], x)-posx);
00122 
00123         for (idx=0; idx< pos_relevant_indizes.num_a; idx++)
00124                 featurevector[p++]=exp(pos->model_derivative_a(pos_relevant_indizes.idx_a_rows[idx], pos_relevant_indizes.idx_a_cols[idx], x)-posx);
00125 
00126         for (idx=0; idx< pos_relevant_indizes.num_b; idx++)
00127                 featurevector[p++]=exp(pos->model_derivative_b(pos_relevant_indizes.idx_b_rows[idx], pos_relevant_indizes.idx_b_cols[idx], x)-posx);
00128 
00129 
00130         //for (i=0; i<pos->get_N(); i++)
00131         //{
00132         //  featurevector[p++]=exp(pos->model_derivative_p(i, x)-posx);
00133         //  featurevector[p++]=exp(pos->model_derivative_q(i, x)-posx);
00134 
00135         //  for (j=0; j<pos->get_N(); j++)
00136         //      featurevector[p++]=exp(pos->model_derivative_a(i, j, x)-posx);
00137 
00138         //  for (j=0; j<pos->get_M(); j++)
00139         //      featurevector[p++]=exp(pos->model_derivative_b(i, j, x)-posx);
00140         //}
00141     }
00142 
00143     //then do negative
00144     if (neglinear)
00145     {
00146         for (i=0; i<neg->get_N(); i++)
00147         {
00148             for (j=0; j<neg->get_M(); j++)
00149                 featurevector[p++]= - exp(neg->linear_model_derivative(i, j, x)-negx);
00150         }
00151     }
00152     else
00153     {
00154         for (idx=0; idx< neg_relevant_indizes.num_p; idx++)
00155             featurevector[p++]= - exp(neg->model_derivative_p(neg_relevant_indizes.idx_p[idx], x)-negx);
00156 
00157         for (idx=0; idx< neg_relevant_indizes.num_q; idx++)
00158             featurevector[p++]= - exp(neg->model_derivative_q(neg_relevant_indizes.idx_q[idx], x)-negx);
00159 
00160         for (idx=0; idx< neg_relevant_indizes.num_a; idx++)
00161                 featurevector[p++]= - exp(neg->model_derivative_a(neg_relevant_indizes.idx_a_rows[idx], neg_relevant_indizes.idx_a_cols[idx], x)-negx);
00162 
00163         for (idx=0; idx< neg_relevant_indizes.num_b; idx++)
00164                 featurevector[p++]= - exp(neg->model_derivative_b(neg_relevant_indizes.idx_b_rows[idx], neg_relevant_indizes.idx_b_cols[idx], x)-negx);
00165 
00166         //for (i=0; i<neg->get_N(); i++)
00167         //{
00168         //  featurevector[p++]= - exp(neg->model_derivative_p(i, x)-negx);
00169         //  featurevector[p++]= - exp(neg->model_derivative_q(i, x)-negx);
00170 
00171         //  for (j=0; j<neg->get_N(); j++)
00172         //      featurevector[p++]= - exp(neg->model_derivative_a(i, j, x)-negx);
00173 
00174         //  for (j=0; j<neg->get_M(); j++)
00175         //      featurevector[p++]= - exp(neg->model_derivative_b(i, j, x)-negx);
00176         //}
00177     }
00178 }
00179 
00180 float64_t* CTOPFeatures::set_feature_matrix()
00181 {
00182     int32_t len=0;
00183 
00184     num_features=get_num_features();
00185     ASSERT(num_features);
00186     ASSERT(pos);
00187     ASSERT(pos->get_observations());
00188 
00189     num_vectors=pos->get_observations()->get_num_vectors();
00190     SG_INFO( "allocating top feature cache of size %.2fM\n", sizeof(float64_t)*num_features*num_vectors/1024.0/1024.0);
00191     delete[] feature_matrix;
00192     feature_matrix=new float64_t[num_features*num_vectors];
00193     if (!feature_matrix)
00194     {
00195       SG_ERROR( "allocation not successful!");
00196         return NULL ;
00197     } ;
00198 
00199     SG_INFO( "calculating top feature matrix\n");
00200 
00201     for (int32_t x=0; x<num_vectors; x++)
00202     {
00203         if (!(x % (num_vectors/10+1)))
00204             SG_DEBUG( "%02d%%.", (int) (100.0*x/num_vectors));
00205         else if (!(x % (num_vectors/200+1)))
00206             SG_DEBUG( ".");
00207 
00208         compute_feature_vector(&feature_matrix[x*num_features], x, len);
00209     }
00210 
00211     SG_DONE();
00212 
00213     num_vectors=get_num_vectors() ;
00214     num_features=get_num_features() ;
00215 
00216     return feature_matrix;
00217 }
00218 
00219 bool CTOPFeatures::compute_relevant_indizes(CHMM* hmm, T_HMM_INDIZES* hmm_idx)
00220 {
00221     int32_t i=0;
00222     int32_t j=0;
00223 
00224     hmm_idx->num_p=0;
00225     hmm_idx->num_q=0;
00226     hmm_idx->num_a=0;
00227     hmm_idx->num_b=0;
00228 
00229     for (i=0; i<hmm->get_N(); i++)
00230     {
00231         if (hmm->get_p(i)>CMath::ALMOST_NEG_INFTY)
00232             hmm_idx->num_p++;
00233 
00234         if (hmm->get_q(i)>CMath::ALMOST_NEG_INFTY)
00235             hmm_idx->num_q++;
00236 
00237         for (j=0; j<hmm->get_N(); j++)
00238         {
00239             if (hmm->get_a(i,j)>CMath::ALMOST_NEG_INFTY)
00240                 hmm_idx->num_a++;
00241         }
00242 
00243         for (j=0; j<pos->get_M(); j++)
00244         {
00245             if (hmm->get_b(i,j)>CMath::ALMOST_NEG_INFTY)
00246                 hmm_idx->num_b++;
00247         }
00248     }
00249 
00250     if (hmm_idx->num_p > 0)
00251     {
00252         hmm_idx->idx_p=new int32_t[hmm_idx->num_p];
00253         ASSERT(hmm_idx->idx_p);
00254     }
00255 
00256     if (hmm_idx->num_q > 0)
00257     {
00258         hmm_idx->idx_q=new int32_t[hmm_idx->num_q];
00259         ASSERT(hmm_idx->idx_q);
00260     }
00261 
00262     if (hmm_idx->num_a > 0)
00263     {
00264         hmm_idx->idx_a_rows=new int32_t[hmm_idx->num_a];
00265         hmm_idx->idx_a_cols=new int32_t[hmm_idx->num_a];
00266         ASSERT(hmm_idx->idx_a_rows);
00267         ASSERT(hmm_idx->idx_a_cols);
00268     }
00269 
00270     if (hmm_idx->num_b > 0)
00271     {
00272         hmm_idx->idx_b_rows=new int32_t[hmm_idx->num_b];
00273         hmm_idx->idx_b_cols=new int32_t[hmm_idx->num_b];
00274         ASSERT(hmm_idx->idx_b_rows);
00275         ASSERT(hmm_idx->idx_b_cols);
00276     }
00277 
00278 
00279     int32_t idx_p=0;
00280     int32_t idx_q=0;
00281     int32_t idx_a=0;
00282     int32_t idx_b=0;
00283 
00284     for (i=0; i<hmm->get_N(); i++)
00285     {
00286         if (hmm->get_p(i)>CMath::ALMOST_NEG_INFTY)
00287         {
00288             ASSERT(idx_p < hmm_idx->num_p);
00289             hmm_idx->idx_p[idx_p++]=i;
00290         }
00291         
00292         if (hmm->get_q(i)>CMath::ALMOST_NEG_INFTY)
00293         {
00294             ASSERT(idx_q < hmm_idx->num_q);
00295             hmm_idx->idx_q[idx_q++]=i;
00296         }
00297 
00298         for (j=0; j<hmm->get_N(); j++)
00299         {
00300             if (hmm->get_a(i,j)>CMath::ALMOST_NEG_INFTY)
00301             {
00302                 ASSERT(idx_a < hmm_idx->num_a);
00303                 hmm_idx->idx_a_rows[idx_a]=i;
00304                 hmm_idx->idx_a_cols[idx_a++]=j;
00305             }
00306         }
00307 
00308         for (j=0; j<pos->get_M(); j++)
00309         {
00310             if (hmm->get_b(i,j)>CMath::ALMOST_NEG_INFTY)
00311             {
00312                 ASSERT(idx_b < hmm_idx->num_b);
00313                 hmm_idx->idx_b_rows[idx_b]=i;
00314                 hmm_idx->idx_b_cols[idx_b++]=j;
00315             }
00316         }
00317     }
00318 
00319     return true;
00320 }
00321 
00322 int32_t CTOPFeatures::compute_num_features()
00323 {
00324     int32_t num=0;
00325 
00326     if (pos && neg)
00327     {
00328         num+=1; //zeroth- component
00329 
00330         if (poslinear)
00331             num+=pos->get_N()*pos->get_M();
00332         else
00333         {
00334             num+= pos_relevant_indizes.num_p + pos_relevant_indizes.num_q + pos_relevant_indizes.num_a + pos_relevant_indizes.num_b;
00335         }
00336 
00337         if (neglinear)
00338             num+=neg->get_N()*neg->get_M();
00339         else
00340         {
00341             num+= neg_relevant_indizes.num_p + neg_relevant_indizes.num_q + neg_relevant_indizes.num_a + neg_relevant_indizes.num_b;
00342         }
00343 
00344         //num+=1; //zeroth- component
00345         //num+= (poslinear) ? (pos->get_N()*pos->get_M()) : (pos->get_N()*(1+pos->get_N()+1+pos->get_M()));
00346         //num+= (neglinear) ? (neg->get_N()*neg->get_M()) : (neg->get_N()*(1+neg->get_N()+1+neg->get_M()));
00347     }
00348     return num;
00349 }

SHOGUN Machine Learning Toolbox - Documentation