00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00015
00016 #ifndef __CHMM_H__
00017 #define __CHMM_H__
00018
00019 #include "lib/Mathematics.h"
00020 #include "lib/common.h"
00021 #include "lib/io.h"
00022 #include "lib/config.h"
00023 #include "features/StringFeatures.h"
00024 #include "distributions/Distribution.h"
00025
00026 #include <stdio.h>
00027
00028 #ifdef USE_HMMPARALLEL
00029 #define USE_HMMPARALLEL_STRUCTURES 1
00030 #endif
00031
00032 class CHMM;
00035
00037 typedef DREAL T_ALPHA_BETA_TABLE;
00038
00040 struct T_ALPHA_BETA
00041 {
00043 INT dimension;
00044
00046 T_ALPHA_BETA_TABLE* table;
00047
00049 bool updated;
00050
00052 DREAL sum;
00053 };
00054
00059 #ifdef USE_BIGSTATES
00060 typedef WORD T_STATES ;
00061 #else
00062 typedef BYTE T_STATES ;
00063 #endif
00064 typedef T_STATES* P_STATES ;
00065
00067
00068 enum BaumWelchViterbiType
00069 {
00070 BW_NORMAL,
00071 BW_TRANS,
00072 BW_DEFINED,
00073 VIT_NORMAL,
00074 VIT_DEFINED
00075 };
00076
00077
00079 class CModel
00080 {
00081 public:
00083 CModel();
00084
00086 virtual ~CModel();
00087
00089 inline void sort_learn_a()
00090 {
00091 CMath::sort(learn_a,2) ;
00092 }
00093
00095 inline void sort_learn_b()
00096 {
00097 CMath::sort(learn_b,2) ;
00098 }
00099
00104
00105 inline INT get_learn_a(INT line, INT column) const
00106 {
00107 return learn_a[line*2 + column];
00108 }
00109
00111 inline INT get_learn_b(INT line, INT column) const
00112 {
00113 return learn_b[line*2 + column];
00114 }
00115
00117 inline INT get_learn_p(INT offset) const
00118 {
00119 return learn_p[offset];
00120 }
00121
00123 inline INT get_learn_q(INT offset) const
00124 {
00125 return learn_q[offset];
00126 }
00127
00129 inline INT get_const_a(INT line, INT column) const
00130 {
00131 return const_a[line*2 + column];
00132 }
00133
00135 inline INT get_const_b(INT line, INT column) const
00136 {
00137 return const_b[line*2 + column];
00138 }
00139
00141 inline INT get_const_p(INT offset) const
00142 {
00143 return const_p[offset];
00144 }
00145
00147 inline INT get_const_q(INT offset) const
00148 {
00149 return const_q[offset];
00150 }
00151
00153 inline DREAL get_const_a_val(INT line) const
00154 {
00155 return const_a_val[line];
00156 }
00157
00159 inline DREAL get_const_b_val(INT line) const
00160 {
00161 return const_b_val[line];
00162 }
00163
00165 inline DREAL get_const_p_val(INT offset) const
00166 {
00167 return const_p_val[offset];
00168 }
00169
00171 inline DREAL get_const_q_val(INT offset) const
00172 {
00173 return const_q_val[offset];
00174 }
00175 #ifdef FIX_POS
00177 inline CHAR get_fix_pos_state(INT pos, T_STATES state, T_STATES num_states)
00178 {
00179 #ifdef HMM_DEBUG
00180 if ((pos<0)||(pos*num_states+state>65336))
00181 SG_DEBUG("index out of range in get_fix_pos_state(%i,%i,%i) \n", pos,state,num_states) ;
00182 #endif
00183 return fix_pos_state[pos*num_states+state] ;
00184 }
00185 #endif
00186
00187
00192
00193 inline void set_learn_a(INT offset, INT value)
00194 {
00195 learn_a[offset]=value;
00196 }
00197
00199 inline void set_learn_b(INT offset, INT value)
00200 {
00201 learn_b[offset]=value;
00202 }
00203
00205 inline void set_learn_p(INT offset, INT value)
00206 {
00207 learn_p[offset]=value;
00208 }
00209
00211 inline void set_learn_q(INT offset, INT value)
00212 {
00213 learn_q[offset]=value;
00214 }
00215
00217 inline void set_const_a(INT offset, INT value)
00218 {
00219 const_a[offset]=value;
00220 }
00221
00223 inline void set_const_b(INT offset, INT value)
00224 {
00225 const_b[offset]=value;
00226 }
00227
00229 inline void set_const_p(INT offset, INT value)
00230 {
00231 const_p[offset]=value;
00232 }
00233
00235 inline void set_const_q(INT offset, INT value)
00236 {
00237 const_q[offset]=value;
00238 }
00239
00241 inline void set_const_a_val(INT offset, DREAL value)
00242 {
00243 const_a_val[offset]=value;
00244 }
00245
00247 inline void set_const_b_val(INT offset, DREAL value)
00248 {
00249 const_b_val[offset]=value;
00250 }
00251
00253 inline void set_const_p_val(INT offset, DREAL value)
00254 {
00255 const_p_val[offset]=value;
00256 }
00257
00259 inline void set_const_q_val(INT offset, DREAL value)
00260 {
00261 const_q_val[offset]=value;
00262 }
00263 #ifdef FIX_POS
00265 inline void set_fix_pos_state(INT pos, T_STATES state, T_STATES num_states, CHAR value)
00266 {
00267 #ifdef HMM_DEBUG
00268 if ((pos<0)||(pos*num_states+state>65336))
00269 SG_DEBUG("index out of range in set_fix_pos_state(%i,%i,%i,%i) [%i]\n", pos,state,num_states,(int)value, pos*num_states+state) ;
00270 #endif
00271 fix_pos_state[pos*num_states+state]=value;
00272 if (value==FIX_ALLOWED)
00273 for (INT i=0; i<num_states; i++)
00274 if (get_fix_pos_state(pos,i,num_states)==FIX_DEFAULT)
00275 set_fix_pos_state(pos,i,num_states,FIX_DISALLOWED) ;
00276 }
00278
00280 const static CHAR FIX_DISALLOWED ;
00281
00283 const static CHAR FIX_ALLOWED ;
00284
00286 const static CHAR FIX_DEFAULT ;
00287
00289 const static DREAL DISALLOWED_PENALTY ;
00290 #endif
00291 protected:
00298
00299 INT* learn_a;
00300
00302 INT* learn_b;
00303
00305 INT* learn_p;
00306
00308 INT* learn_q;
00310
00317
00318 INT* const_a;
00319
00321 INT* const_b;
00322
00324 INT* const_p;
00325
00327 INT* const_q;
00328
00329
00331 DREAL* const_a_val;
00332
00334 DREAL* const_b_val;
00335
00337 DREAL* const_p_val;
00338
00340 DREAL* const_q_val;
00341
00342 #ifdef FIX_POS
00343
00346 CHAR* fix_pos_state;
00347 #endif
00348
00349 };
00350
00351
00358 class CHMM : public CDistribution
00359 {
00360 private:
00361
00362 T_STATES trans_list_len ;
00363 T_STATES **trans_list_forward ;
00364 T_STATES *trans_list_forward_cnt ;
00365 DREAL **trans_list_forward_val ;
00366 T_STATES **trans_list_backward ;
00367 T_STATES *trans_list_backward_cnt ;
00368 bool mem_initialized ;
00369
00370 #ifdef USE_HMMPARALLEL_STRUCTURES
00371
00373 struct S_MODEL_PROB_THREAD_PARAM
00374 {
00375 CHMM * hmm;
00376 INT dim_start;
00377 INT dim_stop;
00378
00379 DREAL prob_sum;
00380 };
00381
00383 struct S_BW_THREAD_PARAM
00384 {
00385 CHMM * hmm;
00386 INT dim ;
00387 INT dim_start;
00388 INT dim_stop;
00389
00390 DREAL ret;
00391 DREAL prob;
00392
00393 DREAL* p_buf;
00394 DREAL* q_buf;
00395 DREAL* a_buf;
00396 DREAL* b_buf;
00397 };
00398
00399 inline T_ALPHA_BETA & ALPHA_CACHE(INT dim) {
00400 return alpha_cache[dim%parallel.get_num_threads()] ; } ;
00401 inline T_ALPHA_BETA & BETA_CACHE(INT dim) {
00402 return beta_cache[dim%parallel.get_num_threads()] ; } ;
00403 #ifdef USE_LOGSUMARRAY
00404 inline DREAL* ARRAYS(INT dim) {
00405 return arrayS[dim%parallel.get_num_threads()] ; } ;
00406 #endif
00407 inline DREAL* ARRAYN1(INT dim) {
00408 return arrayN1[dim%parallel.get_num_threads()] ; } ;
00409 inline DREAL* ARRAYN2(INT dim) {
00410 return arrayN2[dim%parallel.get_num_threads()] ; } ;
00411 inline T_STATES* STATES_PER_OBSERVATION_PSI(INT dim) {
00412 return states_per_observation_psi[dim%parallel.get_num_threads()] ; } ;
00413 inline const T_STATES* STATES_PER_OBSERVATION_PSI(INT dim) const {
00414 return states_per_observation_psi[dim%parallel.get_num_threads()] ; } ;
00415 inline T_STATES* PATH(INT dim) {
00416 return path[dim%parallel.get_num_threads()] ; } ;
00417 inline bool & PATH_PROB_UPDATED(INT dim) {
00418 return path_prob_updated[dim%parallel.get_num_threads()] ; } ;
00419 inline INT & PATH_PROB_DIMENSION(INT dim) {
00420 return path_prob_dimension[dim%parallel.get_num_threads()] ; } ;
00421 #else
00422 inline T_ALPHA_BETA & ALPHA_CACHE(INT ) {
00423 return alpha_cache ; } ;
00424 inline T_ALPHA_BETA & BETA_CACHE(INT ) {
00425 return beta_cache ; } ;
00426 #ifdef USE_LOGSUMARRAY
00427 inline DREAL* ARRAYS(INT dim) {
00428 return arrayS ; } ;
00429 #endif
00430 inline DREAL* ARRAYN1(INT ) {
00431 return arrayN1 ; } ;
00432 inline DREAL* ARRAYN2(INT ) {
00433 return arrayN2 ; } ;
00434 inline T_STATES* STATES_PER_OBSERVATION_PSI(INT ) {
00435 return states_per_observation_psi ; } ;
00436 inline const T_STATES* STATES_PER_OBSERVATION_PSI(INT ) const {
00437 return states_per_observation_psi ; } ;
00438 inline T_STATES* PATH(INT ) {
00439 return path ; } ;
00440 inline bool & PATH_PROB_UPDATED(INT ) {
00441 return path_prob_updated ; } ;
00442 inline INT & PATH_PROB_DIMENSION(INT ) {
00443 return path_prob_dimension ; } ;
00444 #endif
00445
00450 bool converge(DREAL x, DREAL y);
00451
00456 void switch_model(CHMM** m1, CHMM** m2);
00457
00463 public:
00474 CHMM(INT N, INT M, CModel* model, DREAL PSEUDO);
00475 CHMM(CStringFeatures<WORD>* obs, INT N, INT M, DREAL PSEUDO);
00476 CHMM(INT N, double* p, double* q, double* a) ;
00477 CHMM(INT N, double* p, double* q, int num_trans, double* a_trans) ;
00478
00483 CHMM(FILE* model_file, DREAL PSEUDO);
00484
00486 CHMM(CHMM* h);
00487
00489 virtual ~CHMM();
00490
00491 virtual inline bool train() { return false; }
00492 virtual inline INT get_num_model_parameters() { return N*(N+M+2); }
00493 virtual DREAL get_log_model_parameter(INT num_param);
00494 virtual DREAL get_log_derivative(INT num_param, INT num_example);
00495 virtual DREAL get_log_likelihood_example(INT num_example)
00496 {
00497 return model_probability(num_example);
00498 }
00499
00505 bool initialize(CModel* model, DREAL PSEUDO, FILE* model_file=NULL);
00507
00509 bool alloc_state_dependend_arrays();
00510
00512 void free_state_dependend_arrays();
00513
00525 DREAL forward_comp(INT time, INT state, INT dimension);
00526 DREAL forward_comp_old(INT time, INT state, INT dimension);
00527
00535 DREAL backward_comp(INT time, INT state, INT dimension);
00536 DREAL backward_comp_old(INT time, INT state, INT dimension);
00537
00538 #ifndef NOVIT
00539
00543 DREAL best_path(INT dimension);
00544 inline WORD get_best_path_state(INT dim, INT t)
00545 {
00546 ASSERT(PATH(dim));
00547 return PATH(dim)[t];
00548 }
00549 #endif
00550
00553 DREAL model_probability_comp() ;
00554
00556 inline DREAL model_probability(INT dimension=-1)
00557 {
00558
00559 if (dimension==-1)
00560 {
00561 if (mod_prob_updated)
00562 return mod_prob/p_observations->get_num_vectors();
00563 else
00564 return model_probability_comp()/p_observations->get_num_vectors();
00565 }
00566 else
00567 return forward(p_observations->get_vector_length(dimension), 0, dimension);
00568 }
00569
00575 inline DREAL linear_model_probability(INT dimension)
00576 {
00577 DREAL lik=0;
00578 INT len=0;
00579 WORD* o=p_observations->get_feature_vector(dimension, len);
00580 DREAL* obs_b=observation_matrix_b;
00581
00582 ASSERT(N==len);
00583
00584 for (INT i=0; i<N; i++)
00585 {
00586 lik+=obs_b[*o++];
00587 obs_b+=M;
00588 }
00589 return lik;
00590
00591
00592
00593
00594
00595
00596
00597
00598
00599 }
00600
00602
00605 inline bool set_iterations(INT num) { iterations=num; return true; }
00606 inline INT get_iterations() { return iterations; }
00607 inline bool set_epsilon (DREAL eps) { epsilon=eps; return true; }
00608 inline DREAL get_epsilon() { return epsilon; }
00609
00613 bool baum_welch_viterbi_train(BaumWelchViterbiType type);
00614
00621 void estimate_model_baum_welch(CHMM* train);
00622 void estimate_model_baum_welch_old(CHMM* train);
00623 void estimate_model_baum_welch_trans(CHMM* train);
00624
00625 #ifdef USE_HMMPARALLEL_STRUCTURES
00626 void ab_buf_comp(DREAL* p_buf, DREAL* q_buf, DREAL* a_buf, DREAL* b_buf, INT dim) ;
00627 #endif
00628
00632 void estimate_model_baum_welch_defined(CHMM* train);
00633
00637 void estimate_model_viterbi(CHMM* train);
00638
00642 void estimate_model_viterbi_defined(CHMM* train);
00643
00645
00647 bool linear_train(bool right_align=false);
00648
00650 bool permutation_entropy(INT window_width, INT sequence_number);
00651
00658 void output_model(bool verbose=false);
00659
00661 void output_model_defined(bool verbose=false);
00663
00664
00667
00669 void normalize(bool keep_dead_states=false);
00670
00674 void add_states(INT num_states, DREAL default_val=0);
00675
00681 bool append_model(CHMM* append_model, DREAL* cur_out, DREAL* app_out);
00682
00686 bool append_model(CHMM* append_model);
00687
00689 void chop(DREAL value);
00690
00692 void convert_to_log();
00693
00695 void init_model_random();
00696
00702 void init_model_defined();
00703
00705 void clear_model();
00706
00708 void clear_model_defined();
00709
00711 void copy_model(CHMM* l);
00712
00717 void invalidate_model();
00718
00722 inline bool get_status() const
00723 {
00724 return status;
00725 }
00726
00728 inline DREAL get_pseudo() const
00729 {
00730 return PSEUDO ;
00731 }
00732
00734 inline void set_pseudo(DREAL pseudo)
00735 {
00736 PSEUDO=pseudo ;
00737 }
00738
00739 #ifdef USE_HMMPARALLEL_STRUCTURES
00740
00741 static void* bw_dim_prefetch(void * params);
00742 #ifndef NOVIT
00743 static void* vit_dim_prefetch(void * params);
00744 #endif
00745
00749 DREAL prefetch(INT dim, bool bw, DREAL* p_buff=NULL, DREAL* q_buf=NULL, DREAL* a_buf=NULL, DREAL* b_buf=NULL) ;
00750 #endif
00751
00752 #ifdef FIX_POS
00753
00756 inline bool set_fix_pos_state(INT pos, T_STATES state, CHAR value)
00757 {
00758 if (!model)
00759 return false ;
00760 model->set_fix_pos_state(pos, state, N, value) ;
00761 return true ;
00762 } ;
00763 #endif
00764
00765
00774 void set_observations(CStringFeatures<WORD>* obs, CHMM* lambda=NULL);
00775
00779 void set_observation_nocache(CStringFeatures<WORD>* obs);
00780
00782 inline CStringFeatures<WORD>* get_observations()
00783 {
00784 return p_observations;
00785 }
00787
00855 bool load_definitions(FILE* file, bool verbose, bool initialize=true);
00856
00892 bool load_model(FILE* file);
00893
00897 bool save_model(FILE* file);
00898
00902 bool save_model_derivatives(FILE* file);
00903
00907 bool save_model_derivatives_bin(FILE* file);
00908
00912 bool save_model_bin(FILE* file);
00913
00915 bool check_model_derivatives() ;
00916 bool check_model_derivatives_combined() ;
00917 #ifndef NOVIT
00918
00923 T_STATES* get_path(INT dim, DREAL& prob);
00924
00928 bool save_path(FILE* file);
00929
00933 bool save_path_derivatives(FILE* file);
00934
00938 bool save_path_derivatives_bin(FILE* file);
00939
00940 #ifdef USE_HMMDEBUG
00942 bool check_path_derivatives() ;
00943 #endif //USE_HMMDEBUG
00944 #endif //NOVIT
00945
00949 bool save_likelihood_bin(FILE* file);
00950
00954 bool save_likelihood(FILE* file);
00956
00962
00964 inline T_STATES get_N() const { return N ; }
00965
00967 inline INT get_M() const { return M ; }
00968
00973 inline void set_q(T_STATES offset, DREAL value)
00974 {
00975 #ifdef HMM_DEBUG
00976 if (offset>=N)
00977 SG_DEBUG("index out of range in set_q(%i,%e) [%i]\n", offset,value,N) ;
00978 #endif
00979 end_state_distribution_q[offset]=value;
00980 }
00981
00986 inline void set_p(T_STATES offset, DREAL value)
00987 {
00988 #ifdef HMM_DEBUG
00989 if (offset>=N)
00990 SG_DEBUG("index out of range in set_p(%i,.) [%i]\n", offset,N) ;
00991 #endif
00992 initial_state_distribution_p[offset]=value;
00993 }
00994
01000 inline void set_A(T_STATES line_, T_STATES column, DREAL value)
01001 {
01002 #ifdef HMM_DEBUG
01003 if ((line_>N)||(column>N))
01004 SG_DEBUG("index out of range in set_A(%i,%i,.) [%i,%i]\n",line_,column,N,N) ;
01005 #endif
01006 transition_matrix_A[line_+column*N]=value;
01007 }
01008
01014 inline void set_a(T_STATES line_, T_STATES column, DREAL value)
01015 {
01016 #ifdef HMM_DEBUG
01017 if ((line_>N)||(column>N))
01018 SG_DEBUG("index out of range in set_a(%i,%i,.) [%i,%i]\n",line_,column,N,N) ;
01019 #endif
01020 transition_matrix_a[line_+column*N]=value;
01021 }
01022
01028 inline void set_B(T_STATES line_, WORD column, DREAL value)
01029 {
01030 #ifdef HMM_DEBUG
01031 if ((line_>=N)||(column>=M))
01032 SG_DEBUG("index out of range in set_B(%i,%i) [%i,%i]\n", line_, column,N,M) ;
01033 #endif
01034 observation_matrix_B[line_*M+column]=value;
01035 }
01036
01042 inline void set_b(T_STATES line_, WORD column, DREAL value)
01043 {
01044 #ifdef HMM_DEBUG
01045 if ((line_>=N)||(column>=M))
01046 SG_DEBUG("index out of range in set_b(%i,%i) [%i,%i]\n", line_, column,N,M) ;
01047 #endif
01048 observation_matrix_b[line_*M+column]=value;
01049 }
01050
01051 #ifndef NOVIT
01052
01058 inline void set_psi(INT time, T_STATES state, T_STATES value, INT dimension)
01059 {
01060 #ifdef HMM_DEBUG
01061 if ((time>=p_observations->get_max_vector_length())||(state>N))
01062 SG_DEBUG("index out of range in set_psi(%i,%i,.) [%i,%i]\n",time,state,p_observations->get_max_vector_length(),N) ;
01063 #endif
01064 STATES_PER_OBSERVATION_PSI(dimension)[time*N+state]=value;
01065 }
01066 #endif // NOVIT
01067
01072 inline DREAL get_q(T_STATES offset) const
01073 {
01074 #ifdef HMM_DEBUG
01075 if (offset>=N)
01076 SG_DEBUG("index out of range in %e=get_q(%i) [%i]\n", end_state_distribution_q[offset],offset,N) ;
01077 #endif
01078 return end_state_distribution_q[offset];
01079 }
01080
01085 inline DREAL get_p(T_STATES offset) const
01086 {
01087 #ifdef HMM_DEBUG
01088 if (offset>=N)
01089 SG_DEBUG("index out of range in get_p(%i,.) [%i]\n", offset,N) ;
01090 #endif
01091 return initial_state_distribution_p[offset];
01092 }
01093
01099 inline DREAL get_A(T_STATES line_, T_STATES column) const
01100 {
01101 #ifdef HMM_DEBUG
01102 if ((line_>N)||(column>N))
01103 SG_DEBUG("index out of range in get_A(%i,%i) [%i,%i]\n",line_,column,N,N) ;
01104 #endif
01105 return transition_matrix_A[line_+column*N];
01106 }
01107
01113 inline DREAL get_a(T_STATES line_, T_STATES column) const
01114 {
01115 #ifdef HMM_DEBUG
01116 if ((line_>N)||(column>N))
01117 SG_DEBUG("index out of range in get_a(%i,%i) [%i,%i]\n",line_,column,N,N) ;
01118 #endif
01119 return transition_matrix_a[line_+column*N];
01120 }
01121
01127 inline DREAL get_B(T_STATES line_, WORD column) const
01128 {
01129 #ifdef HMM_DEBUG
01130 if ((line_>=N)||(column>=M))
01131 SG_DEBUG("index out of range in get_B(%i,%i) [%i,%i]\n", line_, column,N,M) ;
01132 #endif
01133 return observation_matrix_B[line_*M+column];
01134 }
01135
01141 inline DREAL get_b(T_STATES line_, WORD column) const
01142 {
01143 #ifdef HMM_DEBUG
01144 if ((line_>=N)||(column>=M))
01145 SG_DEBUG("index out of range in get_b(%i,%i) [%i,%i]\n", line_, column,N,M) ;
01146 #endif
01147
01148 return observation_matrix_b[line_*M+column];
01149 }
01150
01151 #ifndef NOVIT
01152
01158 inline T_STATES get_psi(INT time, T_STATES state, INT dimension) const
01159 {
01160 #ifdef HMM_DEBUG
01161 if ((time>=p_observations->get_max_vector_length())||(state>N))
01162 SG_DEBUG("index out of range in get_psi(%i,%i) [%i,%i]\n",time,state,p_observations->get_max_vector_length(),N) ;
01163 #endif
01164 return STATES_PER_OBSERVATION_PSI(dimension)[time*N+state];
01165 }
01166 #endif //NOVIT
01167
01168 protected:
01173
01174 INT M;
01175
01177 INT N;
01178
01180 DREAL PSEUDO;
01181
01182
01183 INT line;
01184
01186 CStringFeatures<WORD>* p_observations;
01187
01188
01189 CModel* model;
01190
01192 DREAL* transition_matrix_A;
01193
01195 DREAL* observation_matrix_B;
01196
01198 DREAL* transition_matrix_a;
01199
01201 DREAL* initial_state_distribution_p;
01202
01204 DREAL* end_state_distribution_q;
01205
01207 DREAL* observation_matrix_b;
01208
01210 INT iterations;
01211 INT iteration_count;
01212
01214 DREAL epsilon;
01215 INT conv_it;
01216
01217 #ifndef NOVIT
01219 DREAL all_pat_prob;
01220
01222 DREAL pat_prob;
01223 #endif // NOVIT
01225 DREAL mod_prob;
01226
01228 bool mod_prob_updated;
01229 #ifndef NOVIT
01231 bool all_path_prob_updated;
01232
01234 INT path_deriv_dimension;
01235
01237 bool path_deriv_updated;
01238 #endif // NOVIT
01239
01240
01241 bool loglikelihood;
01242
01243
01244 bool status;
01245
01246
01247 bool reused_caches;
01249
01250 #ifdef USE_HMMPARALLEL_STRUCTURES
01251
01252 DREAL** arrayN1 ;
01254 DREAL** arrayN2 ;
01255 #else //USE_HMMPARALLEL_STRUCTURES
01256
01257 DREAL* arrayN1;
01259 DREAL* arrayN2;
01260 #endif //USE_HMMPARALLEL_STRUCTURES
01261
01262 #ifdef USE_LOGSUMARRAY
01263 #ifdef USE_HMMPARALLEL_STRUCTURES
01264
01265 DREAL** arrayS ;
01266 #else
01267
01268 DREAL* arrayS;
01269 #endif // USE_HMMPARALLEL_STRUCTURES
01270 #endif // USE_LOGSUMARRAY
01271
01272 #ifdef USE_HMMPARALLEL_STRUCTURES
01273
01275 T_ALPHA_BETA* alpha_cache ;
01277 T_ALPHA_BETA* beta_cache ;
01278
01279 #ifndef NOVIT
01281 T_STATES** states_per_observation_psi ;
01282
01284 T_STATES** path ;
01285
01287 bool* path_prob_updated ;
01288
01290 INT* path_prob_dimension ;
01291 #endif //NOVIT
01292
01293 #else //USE_HMMPARALLEL_STRUCTURES
01295 T_ALPHA_BETA alpha_cache;
01297 T_ALPHA_BETA beta_cache;
01298
01299 #ifndef NOVIT
01301 T_STATES* states_per_observation_psi;
01302
01304 T_STATES* path;
01305
01307 bool path_prob_updated;
01308
01310 INT path_prob_dimension;
01311 #endif // NOVIT
01312
01313 #endif //USE_HMMPARALLEL_STRUCTURES
01314
01315
01317 static const INT GOTN;
01319 static const INT GOTM;
01321 static const INT GOTO;
01323 static const INT GOTa;
01325 static const INT GOTb;
01327 static const INT GOTp;
01329 static const INT GOTq;
01330
01332 static const INT GOTlearn_a;
01334 static const INT GOTlearn_b;
01336 static const INT GOTlearn_p;
01338 static const INT GOTlearn_q;
01340 static const INT GOTconst_a;
01342 static const INT GOTconst_b;
01344 static const INT GOTconst_p;
01346 static const INT GOTconst_q;
01347
01348 public:
01353
01355 inline DREAL state_probability(INT time, INT state, INT dimension)
01356 {
01357 return forward(time, state, dimension) + backward(time, state, dimension) - model_probability(dimension);
01358 }
01359
01361 inline DREAL transition_probability(INT time, INT state_i, INT state_j, INT dimension)
01362 {
01363 return forward(time, state_i, dimension) +
01364 backward(time+1, state_j, dimension) +
01365 get_a(state_i,state_j) + get_b(state_j,p_observations->get_feature(dimension ,time+1)) - model_probability(dimension);
01366 }
01367
01374
01377 inline DREAL linear_model_derivative(T_STATES i, WORD j, INT dimension)
01378 {
01379 DREAL der=0;
01380
01381 for (INT k=0; k<N; k++)
01382 {
01383 if (k!=i || p_observations->get_feature(dimension, k) != j)
01384 der+=get_b(k, p_observations->get_feature(dimension, k));
01385 }
01386
01387 return der;
01388 }
01389
01393 inline DREAL model_derivative_p(T_STATES i, INT dimension)
01394 {
01395 return backward(0,i,dimension)+get_b(i, p_observations->get_feature(dimension, 0));
01396 }
01397
01401 inline DREAL model_derivative_q(T_STATES i, INT dimension)
01402 {
01403 return forward(p_observations->get_vector_length(dimension)-1,i,dimension) ;
01404 }
01405
01407 inline DREAL model_derivative_a(T_STATES i, T_STATES j, INT dimension)
01408 {
01409 DREAL sum=-CMath::INFTY;
01410 for (INT t=0; t<p_observations->get_vector_length(dimension)-1; t++)
01411 sum= CMath::logarithmic_sum(sum, forward(t, i, dimension) + backward(t+1, j, dimension) + get_b(j, p_observations->get_feature(dimension,t+1)));
01412
01413 return sum;
01414 }
01415
01416
01418 inline DREAL model_derivative_b(T_STATES i, WORD j, INT dimension)
01419 {
01420 DREAL sum=-CMath::INFTY;
01421 for (INT t=0; t<p_observations->get_vector_length(dimension); t++)
01422 {
01423 if (p_observations->get_feature(dimension,t)==j)
01424 sum= CMath::logarithmic_sum(sum, forward(t,i,dimension)+backward(t,i,dimension)-get_b(i,p_observations->get_feature(dimension,t)));
01425 }
01426
01427
01428 return sum;
01429 }
01431
01432 #ifndef NOVIT
01433
01439
01441 inline DREAL path_derivative_p(T_STATES i, INT dimension)
01442 {
01443 best_path(dimension);
01444 return (i==PATH(dimension)[0]) ? (exp(-get_p(PATH(dimension)[0]))) : (0) ;
01445 }
01446
01448 inline DREAL path_derivative_q(T_STATES i, INT dimension)
01449 {
01450 best_path(dimension);
01451 return (i==PATH(dimension)[p_observations->get_vector_length(dimension)-1]) ? (exp(-get_q(PATH(dimension)[p_observations->get_vector_length(dimension)-1]))) : 0 ;
01452 }
01453
01455 inline DREAL path_derivative_a(T_STATES i, T_STATES j, INT dimension)
01456 {
01457 prepare_path_derivative(dimension) ;
01458 return (get_A(i,j)==0) ? (0) : (get_A(i,j)*exp(-get_a(i,j))) ;
01459 }
01460
01462 inline DREAL path_derivative_b(T_STATES i, WORD j, INT dimension)
01463 {
01464 prepare_path_derivative(dimension) ;
01465 return (get_B(i,j)==0) ? (0) : (get_B(i,j)*exp(-get_b(i,j))) ;
01466 }
01467
01469
01470
01471 protected:
01476
01477 bool get_numbuffer(FILE* file, CHAR* buffer, INT length);
01478
01480 void open_bracket(FILE* file);
01481
01483 void close_bracket(FILE* file);
01484
01486 bool comma_or_space(FILE* file);
01487
01489 inline void error(INT p_line, const CHAR* str)
01490 {
01491 if (p_line)
01492 SG_ERROR( "error in line %d %s\n", p_line, str);
01493 else
01494 SG_ERROR( "error %s\n", str);
01495 }
01497
01499 inline void prepare_path_derivative(INT dim)
01500 {
01501 if (path_deriv_updated && (path_deriv_dimension==dim))
01502 return ;
01503 INT i,j,t ;
01504 best_path(dim);
01505
01506 for (i=0; i<N; i++)
01507 {
01508 for (j=0; j<N; j++)
01509 set_A(i,j, 0);
01510 for (j=0; j<M; j++)
01511 set_B(i,j, 0);
01512 }
01513
01514
01515 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
01516 {
01517 set_A(PATH(dim)[t], PATH(dim)[t+1], get_A(PATH(dim)[t], PATH(dim)[t+1])+1);
01518 set_B(PATH(dim)[t], p_observations->get_feature(dim,t), get_B(PATH(dim)[t], p_observations->get_feature(dim,t))+1);
01519 }
01520 set_B(PATH(dim)[p_observations->get_vector_length(dim)-1], p_observations->get_feature(dim,p_observations->get_vector_length(dim)-1), get_B(PATH(dim)[p_observations->get_vector_length(dim)-1], p_observations->get_feature(dim,p_observations->get_vector_length(dim)-1)) + 1);
01521 path_deriv_dimension=dim ;
01522 path_deriv_updated=true ;
01523 } ;
01524 #endif // NOVIT
01525
01526
01528 inline DREAL forward(INT time, INT state, INT dimension)
01529 {
01530 if (time<1)
01531 time=0;
01532
01533 if (ALPHA_CACHE(dimension).table && (dimension==ALPHA_CACHE(dimension).dimension) && ALPHA_CACHE(dimension).updated)
01534 {
01535 if (time<p_observations->get_vector_length(dimension))
01536 return ALPHA_CACHE(dimension).table[time*N+state];
01537 else
01538 return ALPHA_CACHE(dimension).sum;
01539 }
01540 else
01541 {
01542
01543 return forward_comp(time, state, dimension) ;
01544 } ;
01545 } ;
01546
01548 inline DREAL backward(INT time, INT state, INT dimension)
01549 {
01550 if (BETA_CACHE(dimension).table && (dimension==BETA_CACHE(dimension).dimension) && (BETA_CACHE(dimension).updated))
01551 {
01552 if (time<0)
01553 return BETA_CACHE(dimension).sum;
01554 if (time<p_observations->get_vector_length(dimension))
01555 return BETA_CACHE(dimension).table[time*N+state];
01556 else
01557 return -CMath::INFTY;
01558 }
01559 else
01560 {
01561
01562 return backward_comp(time, state, dimension) ;
01563 } ;
01564 } ;
01565
01566 };
01567 #endif