LinearHMM.cpp
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include "distributions/hmm/LinearHMM.h"
00013 #include "lib/common.h"
00014 #include "features/StringFeatures.h"
00015 #include "lib/io.h"
00016
00017 CLinearHMM::CLinearHMM(CStringFeatures<WORD>* f)
00018 : CDistribution(), transition_probs(NULL), log_transition_probs(NULL)
00019 {
00020 features=f;
00021 sequence_length = f->get_vector_length(0);
00022 num_symbols = (INT) f->get_num_symbols();
00023 num_params = sequence_length*num_symbols;
00024 }
00025
00026 CLinearHMM::CLinearHMM(INT p_num_features, INT p_num_symbols)
00027 : CDistribution(), transition_probs(NULL), log_transition_probs(NULL)
00028 {
00029 sequence_length = p_num_features;
00030 num_symbols = p_num_symbols;
00031 num_params = sequence_length*num_symbols;
00032 }
00033
00034 CLinearHMM::~CLinearHMM()
00035 {
00036 delete[] transition_probs;
00037 delete[] log_transition_probs;
00038 }
00039
00040 bool CLinearHMM::train()
00041 {
00042 delete[] transition_probs;
00043 delete[] log_transition_probs;
00044 INT* int_transition_probs=new INT[num_params];
00045
00046 INT vec;
00047 INT i;
00048
00049 for (i=0; i< num_params; i++)
00050 int_transition_probs[i]=0;
00051
00052 for (vec=0; vec<features->get_num_vectors(); vec++)
00053 {
00054 INT len;
00055
00056 WORD* vector=((CStringFeatures<WORD>*) features)->get_feature_vector(vec, len);
00057
00058
00059 for (INT feat=0; feat<len ; feat++)
00060 int_transition_probs[feat*num_symbols+vector[feat]]++;
00061 }
00062
00063
00064 transition_probs=new DREAL[num_params];
00065 log_transition_probs=new DREAL[num_params];
00066
00067 for (i=0;i<sequence_length;i++)
00068 {
00069 for (INT j=0; j<num_symbols; j++)
00070 {
00071 DREAL sum=0;
00072 INT offs=i*num_symbols+((CStringFeatures<WORD> *) features)->get_masked_symbols((WORD)j,(BYTE) 254);
00073 INT original_num_symbols=(INT) ((CStringFeatures<WORD> *) features)->get_original_num_symbols();
00074
00075 for (INT k=0; k<original_num_symbols; k++)
00076 sum+=int_transition_probs[offs+k];
00077
00078 transition_probs[i*num_symbols+j]=(int_transition_probs[i*num_symbols+j]+pseudo_count)/(sum+((CStringFeatures<WORD> *) features)->get_original_num_symbols()*pseudo_count);
00079 log_transition_probs[i*num_symbols+j]=log(transition_probs[i*num_symbols+j]);
00080 }
00081 }
00082
00083 delete[] int_transition_probs;
00084 return true;
00085 }
00086
00087 bool CLinearHMM::train(const INT* indizes, INT num_indizes, DREAL pseudo)
00088 {
00089 delete[] transition_probs;
00090 delete[] log_transition_probs;
00091 INT* int_transition_probs=new INT[num_params];
00092 INT vec;
00093 INT i;
00094
00095 for (i=0; i< num_params; i++)
00096 int_transition_probs[i]=0;
00097
00098 for (vec=0; vec<num_indizes; vec++)
00099 {
00100 INT len;
00101
00102 ASSERT(indizes[vec]>=0 && indizes[vec]<((CStringFeatures<WORD>*) features)->get_num_vectors());
00103 WORD* vector=((CStringFeatures<WORD>*) features)->get_feature_vector(indizes[vec], len);
00104
00105
00106
00107 for (INT feat=0; feat<len ; feat++)
00108 int_transition_probs[feat*num_symbols+vector[feat]]++;
00109 }
00110
00111
00112 transition_probs=new DREAL[num_params];
00113 log_transition_probs=new DREAL[num_params];
00114
00115 for (i=0;i<sequence_length;i++)
00116 {
00117 for (INT j=0; j<num_symbols; j++)
00118 {
00119 DREAL sum=0;
00120 INT original_num_symbols= (INT) ((CStringFeatures<WORD> *) features)->get_original_num_symbols();
00121 for (INT k=0; k<original_num_symbols; k++)
00122 {
00123 sum+=int_transition_probs[i*num_symbols+
00124 ((CStringFeatures<WORD>*) features)->
00125 get_masked_symbols((WORD)j,(BYTE) 254)+k];
00126 }
00127
00128 transition_probs[i*num_symbols+j]=(int_transition_probs[i*num_symbols+j]+pseudo)/(sum+((CStringFeatures<WORD>*) features)->get_original_num_symbols()*pseudo);
00129 log_transition_probs[i*num_symbols+j]=log(transition_probs[i*num_symbols+j]);
00130 }
00131 }
00132
00133 delete[] int_transition_probs;
00134 return true;
00135 }
00136
00137 DREAL CLinearHMM::get_log_likelihood_example(WORD* vector, INT len)
00138 {
00139 DREAL result=log_transition_probs[vector[0]];
00140
00141 for (INT i=1; i<len; i++)
00142 result+=log_transition_probs[i*num_symbols+vector[i]];
00143
00144 return result;
00145 }
00146
00147 DREAL CLinearHMM::get_log_likelihood_example(INT num_example)
00148 {
00149 INT len;
00150 WORD* vector=((CStringFeatures<WORD>*) features)->get_feature_vector(num_example, len);
00151 DREAL result=log_transition_probs[vector[0]];
00152
00153 for (INT i=1; i<len; i++)
00154 result+=log_transition_probs[i*num_symbols+vector[i]];
00155
00156 return result;
00157 }
00158
00159 DREAL CLinearHMM::get_likelihood_example(WORD* vector, INT len)
00160 {
00161 DREAL result=transition_probs[vector[0]];
00162
00163 for (INT i=1; i<len; i++)
00164 result*=transition_probs[i*num_symbols+vector[i]];
00165
00166 return result;
00167 }
00168
00169 DREAL CLinearHMM::get_log_derivative(INT num_param, INT num_example)
00170 {
00171 INT len;
00172 WORD* vector=((CStringFeatures<WORD>*) features)->get_feature_vector(num_example, len);
00173 DREAL result=0;
00174 INT position=num_param/num_symbols;
00175 ASSERT(position>=0 && position<len);
00176 WORD sym=(WORD) (num_param-position*num_symbols);
00177
00178 if (vector[position]==sym && transition_probs[num_param]!=0)
00179 result=1.0/transition_probs[num_param];
00180
00181 return result;
00182 }
00183
00184 void CLinearHMM::get_transition_probs(DREAL** dst, INT* num)
00185 {
00186 *num=num_params;
00187 size_t sz=sizeof(*transition_probs)*(*num);
00188 *dst=(DREAL*) malloc(sz);
00189 ASSERT(dst);
00190
00191 memcpy(*dst, transition_probs, sz);
00192 }
00193
00194 bool CLinearHMM::set_transition_probs(const DREAL* src, INT num)
00195 {
00196 if (num!=-1)
00197 ASSERT(num==num_params);
00198
00199 if (!log_transition_probs)
00200 log_transition_probs=new DREAL[num_params];
00201
00202 if (!transition_probs)
00203 transition_probs=new DREAL[num_params];
00204
00205 for (INT i=0; i<num_params; i++)
00206 {
00207 transition_probs[i]=src[i];
00208 log_transition_probs[i]=log(transition_probs[i]);
00209 }
00210
00211 return true;
00212 }
00213
00214 void CLinearHMM::get_log_transition_probs(DREAL** dst, INT* num)
00215 {
00216 *num=num_params;
00217 size_t sz=sizeof(*log_transition_probs)*(*num);
00218 *dst=(DREAL*) malloc(sz);
00219 ASSERT(dst);
00220
00221 memcpy(*dst, log_transition_probs, sz);
00222 }
00223
00224 bool CLinearHMM::set_log_transition_probs(const DREAL* src, INT num)
00225 {
00226 if (num!=-1)
00227 ASSERT(num==num_params);
00228
00229 if (!log_transition_probs)
00230 log_transition_probs=new DREAL[num_params];
00231
00232 if (!transition_probs)
00233 transition_probs=new DREAL[num_params];
00234
00235 for (INT i=0; i< num_params; i++)
00236 {
00237 log_transition_probs[i]=src[i];
00238 transition_probs[i]=exp(log_transition_probs[i]);
00239 }
00240
00241 return true;
00242 }
00243
00244
00245
00246