00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include "features/FKFeatures.h"
00013 #include "features/StringFeatures.h"
00014 #include "lib/io.h"
00015
00016 CFKFeatures::CFKFeatures(int32_t size, CHMM* p, CHMM* n)
00017 : CRealFeatures(size)
00018 {
00019 pos_prob=NULL;
00020 neg_prob=NULL;
00021 weight_a=-1;
00022 set_models(p,n);
00023 }
00024
00025 CFKFeatures::CFKFeatures(const CFKFeatures &orig)
00026 : CRealFeatures(orig), pos(orig.pos), neg(orig.neg), weight_a(orig.weight_a)
00027 {
00028 }
00029
00030 CFKFeatures::~CFKFeatures()
00031 {
00032 SG_UNREF(pos);
00033 SG_UNREF(neg);
00034 }
00035
00036 float64_t CFKFeatures::deriv_a(float64_t a, int32_t dimension)
00037 {
00038 CStringFeatures<uint16_t> *Obs=pos->get_observations() ;
00039 float64_t deriv=0.0 ;
00040 int32_t i=dimension ;
00041
00042 if (dimension==-1)
00043 {
00044 for (i=0; i<Obs->get_num_vectors(); i++)
00045 {
00046
00047
00048 float64_t pp=(pos_prob) ? pos_prob[i] : pos->model_probability(i);
00049 float64_t pn=(neg_prob) ? neg_prob[i] : neg->model_probability(i);
00050 float64_t sub=pp ;
00051 if (pn>pp) sub=pn ;
00052 pp-=sub ;
00053 pn-=sub ;
00054 pp=exp(pp) ;
00055 pn=exp(pn) ;
00056 float64_t p=a*pp+(1-a)*pn ;
00057 deriv+=(pp-pn)/p ;
00058
00059
00060
00061
00062
00063
00064
00065 } ;
00066 } else
00067 {
00068 float64_t pp=pos->model_probability(i) ;
00069 float64_t pn=neg->model_probability(i) ;
00070 float64_t sub=pp ;
00071 if (pn>pp) sub=pn ;
00072 pp-=sub ;
00073 pn-=sub ;
00074 pp=exp(pp) ;
00075 pn=exp(pn) ;
00076 float64_t p=a*pp+(1-a)*pn ;
00077 deriv+=(pp-pn)/p ;
00078 } ;
00079
00080 return deriv ;
00081 }
00082
00083
00084 float64_t CFKFeatures::set_opt_a(float64_t a)
00085 {
00086 if (a==-1)
00087 {
00088 SG_INFO( "estimating a.\n");
00089 pos_prob=new float64_t[pos->get_observations()->get_num_vectors()];
00090 neg_prob=new float64_t[pos->get_observations()->get_num_vectors()];
00091 for (int32_t i=0; i<pos->get_observations()->get_num_vectors(); i++)
00092 {
00093 pos_prob[i]=pos->model_probability(i) ;
00094 neg_prob[i]=neg->model_probability(i) ;
00095 }
00096
00097 float64_t la=0;
00098 float64_t ua=1;
00099 a=(la+ua)/2;
00100 while (CMath::abs(ua-la)>1e-6)
00101 {
00102 float64_t da=deriv_a(a);
00103 if (da>0)
00104 la=a;
00105 if (da<=0)
00106 ua=a;
00107 a=(la+ua)/2;
00108 SG_INFO( "opt_a: a=%1.3e deriv=%1.3e la=%1.3e ua=%1.3e\n", a, da, la ,ua);
00109 }
00110 delete[] pos_prob;
00111 delete[] neg_prob;
00112 pos_prob=NULL;
00113 neg_prob=NULL;
00114 }
00115
00116 weight_a=a;
00117 SG_INFO( "setting opt_a: %g\n", a);
00118 return a;
00119 }
00120
00121 void CFKFeatures::set_models(CHMM* p, CHMM* n)
00122 {
00123 ASSERT(p && n);
00124 SG_REF(p);
00125 SG_REF(n);
00126
00127 pos=p;
00128 neg=n;
00129 set_num_vectors(0);
00130
00131 free_feature_matrix();
00132
00133 SG_INFO( "pos_feat=[%i,%i,%i,%i],neg_feat=[%i,%i,%i,%i]\n", pos->get_N(), pos->get_N(), pos->get_N()*pos->get_N(), pos->get_N()*pos->get_M(), neg->get_N(), neg->get_N(), neg->get_N()*neg->get_N(), neg->get_N()*neg->get_M()) ;
00134
00135 if (pos && pos->get_observations())
00136 set_num_vectors(pos->get_observations()->get_num_vectors());
00137 if (pos && neg)
00138 num_features=1+pos->get_N()*(1+pos->get_N()+1+pos->get_M()) + neg->get_N()*(1+neg->get_N()+1+neg->get_M()) ;
00139 }
00140
00141 float64_t* CFKFeatures::compute_feature_vector(
00142 int32_t num, int32_t &len, float64_t* target)
00143 {
00144 float64_t* featurevector=target;
00145
00146 if (!featurevector)
00147 featurevector=new float64_t[
00148 1+
00149 pos->get_N()*(1+pos->get_N()+1+pos->get_M())+
00150 neg->get_N()*(1+neg->get_N()+1+neg->get_M())
00151 ];
00152
00153 if (!featurevector)
00154 return NULL;
00155
00156 compute_feature_vector(featurevector, num, len);
00157
00158 return featurevector;
00159 }
00160
00161 void CFKFeatures::compute_feature_vector(
00162 float64_t* featurevector, int32_t num, int32_t& len)
00163 {
00164 int32_t i,j,p=0,x=num;
00165
00166 float64_t posx=pos->model_probability(x);
00167 float64_t negx=neg->model_probability(x);
00168
00169 len=1+pos->get_N()*(1+pos->get_N()+1+pos->get_M()) + neg->get_N()*(1+neg->get_N()+1+neg->get_M());
00170
00171 featurevector[p++] = deriv_a(weight_a, x);
00172 float64_t px=CMath::logarithmic_sum(
00173 posx+log(weight_a),negx+log(1-weight_a));
00174
00175
00176 for (i=0; i<pos->get_N(); i++)
00177 {
00178 featurevector[p++]=weight_a*exp(pos->model_derivative_p(i, x)-px);
00179 featurevector[p++]=weight_a*exp(pos->model_derivative_q(i, x)-px);
00180
00181 for (j=0; j<pos->get_N(); j++) {
00182 featurevector[p++]=weight_a*exp(pos->model_derivative_a(i, j, x)-px);
00183 }
00184
00185 for (j=0; j<pos->get_M(); j++) {
00186 featurevector[p++]=weight_a*exp(pos->model_derivative_b(i, j, x)-px);
00187 }
00188
00189 }
00190
00191
00192 for (i=0; i<neg->get_N(); i++)
00193 {
00194 featurevector[p++]= (1-weight_a)*exp(neg->model_derivative_p(i, x)-px);
00195 featurevector[p++]= (1-weight_a)* exp(neg->model_derivative_q(i, x)-px);
00196
00197 for (j=0; j<neg->get_N(); j++) {
00198 featurevector[p++]= (1-weight_a)*exp(neg->model_derivative_a(i, j, x)-px);
00199 }
00200
00201 for (j=0; j<neg->get_M(); j++) {
00202 featurevector[p++]= (1-weight_a)*exp(neg->model_derivative_b(i, j, x)-px);
00203 }
00204 }
00205 }
00206
00207 float64_t* CFKFeatures::set_feature_matrix()
00208 {
00209 ASSERT(pos);
00210 ASSERT(pos->get_observations());
00211 ASSERT(neg);
00212 ASSERT(neg->get_observations());
00213
00214 int32_t len=0;
00215 num_features=1+ pos->get_N()*(1+pos->get_N()+1+pos->get_M()) + neg->get_N()*(1+neg->get_N()+1+neg->get_M());
00216
00217 num_vectors=pos->get_observations()->get_num_vectors();
00218 ASSERT(num_vectors);
00219
00220 SG_INFO( "allocating FK feature cache of size %.2fM\n", sizeof(float64_t)*num_features*num_vectors/1024.0/1024.0);
00221 free_feature_matrix();
00222 feature_matrix=new float64_t[num_features*num_vectors];
00223
00224 SG_INFO( "calculating FK feature matrix\n");
00225
00226 for (int32_t x=0; x<num_vectors; x++)
00227 {
00228 if (!(x % (num_vectors/10+1)))
00229 SG_DEBUG("%02d%%.", (int) (100.0*x/num_vectors));
00230 else if (!(x % (num_vectors/200+1)))
00231 SG_DEBUG(".");
00232
00233 compute_feature_vector(&feature_matrix[x*num_features], x, len);
00234 }
00235
00236 SG_DONE();
00237
00238 num_vectors=get_num_vectors() ;
00239 num_features=get_num_features() ;
00240
00241 return feature_matrix;
00242 }