00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #ifndef __CHMM_H__
00013 #define __CHMM_H__
00014
00015 #include "lib/Mathematics.h"
00016 #include "lib/common.h"
00017 #include "lib/io.h"
00018 #include "lib/config.h"
00019 #include "features/StringFeatures.h"
00020 #include "distributions/Distribution.h"
00021
00022 #include <stdio.h>
00023
00024 #ifdef USE_HMMPARALLEL
00025 #define USE_HMMPARALLEL_STRUCTURES 1
00026 #endif
00027
00028 class CHMM;
00031
00033 typedef float64_t T_ALPHA_BETA_TABLE;
00034
00036 struct T_ALPHA_BETA
00037 {
00039 int32_t dimension;
00040
00042 T_ALPHA_BETA_TABLE* table;
00043
00045 bool updated;
00046
00048 float64_t sum;
00049 };
00050
00055 #ifdef USE_BIGSTATES
00056 typedef uint16_t T_STATES ;
00057 #else
00058 typedef uint8_t T_STATES ;
00059 #endif
00060 typedef T_STATES* P_STATES ;
00061
00063
00064 enum BaumWelchViterbiType
00065 {
00066 BW_NORMAL,
00067 BW_TRANS,
00068 BW_DEFINED,
00069 VIT_NORMAL,
00070 VIT_DEFINED
00071 };
00072
00073
00075 class CModel
00076 {
00077 public:
00079 CModel();
00080
00082 virtual ~CModel();
00083
00085 inline void sort_learn_a()
00086 {
00087 CMath::sort(learn_a,2) ;
00088 }
00089
00091 inline void sort_learn_b()
00092 {
00093 CMath::sort(learn_b,2) ;
00094 }
00095
00100
00101 inline int32_t get_learn_a(int32_t line, int32_t column) const
00102 {
00103 return learn_a[line*2 + column];
00104 }
00105
00107 inline int32_t get_learn_b(int32_t line, int32_t column) const
00108 {
00109 return learn_b[line*2 + column];
00110 }
00111
00113 inline int32_t get_learn_p(int32_t offset) const
00114 {
00115 return learn_p[offset];
00116 }
00117
00119 inline int32_t get_learn_q(int32_t offset) const
00120 {
00121 return learn_q[offset];
00122 }
00123
00125 inline int32_t get_const_a(int32_t line, int32_t column) const
00126 {
00127 return const_a[line*2 + column];
00128 }
00129
00131 inline int32_t get_const_b(int32_t line, int32_t column) const
00132 {
00133 return const_b[line*2 + column];
00134 }
00135
00137 inline int32_t get_const_p(int32_t offset) const
00138 {
00139 return const_p[offset];
00140 }
00141
00143 inline int32_t get_const_q(int32_t offset) const
00144 {
00145 return const_q[offset];
00146 }
00147
00149 inline float64_t get_const_a_val(int32_t line) const
00150 {
00151 return const_a_val[line];
00152 }
00153
00155 inline float64_t get_const_b_val(int32_t line) const
00156 {
00157 return const_b_val[line];
00158 }
00159
00161 inline float64_t get_const_p_val(int32_t offset) const
00162 {
00163 return const_p_val[offset];
00164 }
00165
00167 inline float64_t get_const_q_val(int32_t offset) const
00168 {
00169 return const_q_val[offset];
00170 }
00171 #ifdef FIX_POS
00173 inline char get_fix_pos_state(int32_t pos, T_STATES state, T_STATES num_states)
00174 {
00175 #ifdef HMM_DEBUG
00176 if ((pos<0)||(pos*num_states+state>65336))
00177 SG_DEBUG("index out of range in get_fix_pos_state(%i,%i,%i) \n", pos,state,num_states) ;
00178 #endif
00179 return fix_pos_state[pos*num_states+state] ;
00180 }
00181 #endif
00182
00183
00188
00189 inline void set_learn_a(int32_t offset, int32_t value)
00190 {
00191 learn_a[offset]=value;
00192 }
00193
00195 inline void set_learn_b(int32_t offset, int32_t value)
00196 {
00197 learn_b[offset]=value;
00198 }
00199
00201 inline void set_learn_p(int32_t offset, int32_t value)
00202 {
00203 learn_p[offset]=value;
00204 }
00205
00207 inline void set_learn_q(int32_t offset, int32_t value)
00208 {
00209 learn_q[offset]=value;
00210 }
00211
00213 inline void set_const_a(int32_t offset, int32_t value)
00214 {
00215 const_a[offset]=value;
00216 }
00217
00219 inline void set_const_b(int32_t offset, int32_t value)
00220 {
00221 const_b[offset]=value;
00222 }
00223
00225 inline void set_const_p(int32_t offset, int32_t value)
00226 {
00227 const_p[offset]=value;
00228 }
00229
00231 inline void set_const_q(int32_t offset, int32_t value)
00232 {
00233 const_q[offset]=value;
00234 }
00235
00237 inline void set_const_a_val(int32_t offset, float64_t value)
00238 {
00239 const_a_val[offset]=value;
00240 }
00241
00243 inline void set_const_b_val(int32_t offset, float64_t value)
00244 {
00245 const_b_val[offset]=value;
00246 }
00247
00249 inline void set_const_p_val(int32_t offset, float64_t value)
00250 {
00251 const_p_val[offset]=value;
00252 }
00253
00255 inline void set_const_q_val(int32_t offset, float64_t value)
00256 {
00257 const_q_val[offset]=value;
00258 }
00259 #ifdef FIX_POS
00261 inline void set_fix_pos_state(
00262 int32_t pos, T_STATES state, T_STATES num_states, char value)
00263 {
00264 #ifdef HMM_DEBUG
00265 if ((pos<0)||(pos*num_states+state>65336))
00266 SG_DEBUG("index out of range in set_fix_pos_state(%i,%i,%i,%i) [%i]\n", pos,state,num_states,(int)value, pos*num_states+state) ;
00267 #endif
00268 fix_pos_state[pos*num_states+state]=value;
00269 if (value==FIX_ALLOWED)
00270 for (int32_t i=0; i<num_states; i++)
00271 if (get_fix_pos_state(pos,i,num_states)==FIX_DEFAULT)
00272 set_fix_pos_state(pos,i,num_states,FIX_DISALLOWED) ;
00273 }
00275
00277 const static char FIX_DISALLOWED ;
00278
00280 const static char FIX_ALLOWED ;
00281
00283 const static char FIX_DEFAULT ;
00284
00286 const static float64_t DISALLOWED_PENALTY ;
00287 #endif
00288 protected:
00295
00296 int32_t* learn_a;
00297
00299 int32_t* learn_b;
00300
00302 int32_t* learn_p;
00303
00305 int32_t* learn_q;
00307
00314
00315 int32_t* const_a;
00316
00318 int32_t* const_b;
00319
00321 int32_t* const_p;
00322
00324 int32_t* const_q;
00325
00326
00328 float64_t* const_a_val;
00329
00331 float64_t* const_b_val;
00332
00334 float64_t* const_p_val;
00335
00337 float64_t* const_q_val;
00338
00339 #ifdef FIX_POS
00340
00343 char* fix_pos_state;
00344 #endif
00345
00346 };
00347
00348
00359 class CHMM : public CDistribution
00360 {
00361 private:
00362
00363 T_STATES trans_list_len ;
00364 T_STATES **trans_list_forward ;
00365 T_STATES *trans_list_forward_cnt ;
00366 float64_t **trans_list_forward_val ;
00367 T_STATES **trans_list_backward ;
00368 T_STATES *trans_list_backward_cnt ;
00369 bool mem_initialized ;
00370
00371 #ifdef USE_HMMPARALLEL_STRUCTURES
00372
00374 struct S_MODEL_PROB_THREAD_PARAM
00375 {
00376 CHMM * hmm;
00377 int32_t dim_start;
00378 int32_t dim_stop;
00379
00380 float64_t prob_sum;
00381 };
00382
00384 struct S_BW_THREAD_PARAM
00385 {
00386 CHMM * hmm;
00387 int32_t dim ;
00388 int32_t dim_start;
00389 int32_t dim_stop;
00390
00391 float64_t ret;
00392 float64_t prob;
00393
00394 float64_t* p_buf;
00395 float64_t* q_buf;
00396 float64_t* a_buf;
00397 float64_t* b_buf;
00398 };
00399
00400 inline T_ALPHA_BETA & ALPHA_CACHE(int32_t dim) {
00401 return alpha_cache[dim%parallel.get_num_threads()] ; } ;
00402 inline T_ALPHA_BETA & BETA_CACHE(int32_t dim) {
00403 return beta_cache[dim%parallel.get_num_threads()] ; } ;
00404 #ifdef USE_LOGSUMARRAY
00405 inline float64_t* ARRAYS(int32_t dim) {
00406 return arrayS[dim%parallel.get_num_threads()] ; } ;
00407 #endif
00408 inline float64_t* ARRAYN1(int32_t dim) {
00409 return arrayN1[dim%parallel.get_num_threads()] ; } ;
00410 inline float64_t* ARRAYN2(int32_t dim) {
00411 return arrayN2[dim%parallel.get_num_threads()] ; } ;
00412 inline T_STATES* STATES_PER_OBSERVATION_PSI(int32_t dim) {
00413 return states_per_observation_psi[dim%parallel.get_num_threads()] ; } ;
00414 inline const T_STATES* STATES_PER_OBSERVATION_PSI(int32_t dim) const {
00415 return states_per_observation_psi[dim%parallel.get_num_threads()] ; } ;
00416 inline T_STATES* PATH(int32_t dim) {
00417 return path[dim%parallel.get_num_threads()] ; } ;
00418 inline bool & PATH_PROB_UPDATED(int32_t dim) {
00419 return path_prob_updated[dim%parallel.get_num_threads()] ; } ;
00420 inline int32_t & PATH_PROB_DIMENSION(int32_t dim) {
00421 return path_prob_dimension[dim%parallel.get_num_threads()] ; } ;
00422 #else
00423 inline T_ALPHA_BETA & ALPHA_CACHE(int32_t ) {
00424 return alpha_cache ; } ;
00425 inline T_ALPHA_BETA & BETA_CACHE(int32_t ) {
00426 return beta_cache ; } ;
00427 #ifdef USE_LOGSUMARRAY
00428 inline float64_t* ARRAYS(int32_t dim) {
00429 return arrayS ; } ;
00430 #endif
00431 inline float64_t* ARRAYN1(int32_t ) {
00432 return arrayN1 ; } ;
00433 inline float64_t* ARRAYN2(int32_t ) {
00434 return arrayN2 ; } ;
00435 inline T_STATES* STATES_PER_OBSERVATION_PSI(int32_t ) {
00436 return states_per_observation_psi ; } ;
00437 inline const T_STATES* STATES_PER_OBSERVATION_PSI(int32_t ) const {
00438 return states_per_observation_psi ; } ;
00439 inline T_STATES* PATH(int32_t ) {
00440 return path ; } ;
00441 inline bool & PATH_PROB_UPDATED(int32_t ) {
00442 return path_prob_updated ; } ;
00443 inline int32_t & PATH_PROB_DIMENSION(int32_t ) {
00444 return path_prob_dimension ; } ;
00445 #endif
00446
00451 bool converge(float64_t x, float64_t y);
00452
00457 void switch_model(CHMM** m1, CHMM** m2);
00458
00464 public:
00475 CHMM(
00476 int32_t N, int32_t M, CModel* model, float64_t PSEUDO);
00477 CHMM(
00478 CStringFeatures<uint16_t>* obs, int32_t N, int32_t M,
00479 float64_t PSEUDO);
00480 CHMM(
00481 int32_t N, float64_t* p, float64_t* q, float64_t* a);
00482 CHMM(
00483 int32_t N, float64_t* p, float64_t* q, int32_t num_trans,
00484 float64_t* a_trans);
00485
00490 CHMM(FILE* model_file, float64_t PSEUDO);
00491
00493 CHMM(CHMM* h);
00494
00496 virtual ~CHMM();
00497
00498 virtual inline bool train() { return false; }
00499 virtual inline int32_t get_num_model_parameters() { return N*(N+M+2); }
00500 virtual float64_t get_log_model_parameter(int32_t num_param);
00501 virtual float64_t get_log_derivative(int32_t num_param, int32_t num_example);
00502 virtual float64_t get_log_likelihood_example(int32_t num_example)
00503 {
00504 return model_probability(num_example);
00505 }
00506
00512 bool initialize(CModel* model, float64_t PSEUDO, FILE* model_file=NULL);
00514
00516 bool alloc_state_dependend_arrays();
00517
00519 void free_state_dependend_arrays();
00520
00532 float64_t forward_comp(int32_t time, int32_t state, int32_t dimension);
00533 float64_t forward_comp_old(
00534 int32_t time, int32_t state, int32_t dimension);
00535
00543 float64_t backward_comp(int32_t time, int32_t state, int32_t dimension);
00544 float64_t backward_comp_old(
00545 int32_t time, int32_t state, int32_t dimension);
00546
00547 #ifndef NOVIT
00548
00552 float64_t best_path(int32_t dimension);
00553 inline uint16_t get_best_path_state(int32_t dim, int32_t t)
00554 {
00555 ASSERT(PATH(dim));
00556 return PATH(dim)[t];
00557 }
00558 #endif
00559
00562 float64_t model_probability_comp() ;
00563
00565 inline float64_t model_probability(int32_t dimension=-1)
00566 {
00567
00568 if (dimension==-1)
00569 {
00570 if (mod_prob_updated)
00571 return mod_prob/p_observations->get_num_vectors();
00572 else
00573 return model_probability_comp()/p_observations->get_num_vectors();
00574 }
00575 else
00576 return forward(p_observations->get_vector_length(dimension), 0, dimension);
00577 }
00578
00584 inline float64_t linear_model_probability(int32_t dimension)
00585 {
00586 float64_t lik=0;
00587 int32_t len=0;
00588 uint16_t* o=p_observations->get_feature_vector(dimension, len);
00589 float64_t* obs_b=observation_matrix_b;
00590
00591 ASSERT(N==len);
00592
00593 for (int32_t i=0; i<N; i++)
00594 {
00595 lik+=obs_b[*o++];
00596 obs_b+=M;
00597 }
00598 return lik;
00599
00600
00601
00602
00603
00604
00605
00606
00607
00608 }
00609
00611
00614 inline bool set_iterations(int32_t num) { iterations=num; return true; }
00615 inline int32_t get_iterations() { return iterations; }
00616 inline bool set_epsilon (float64_t eps) { epsilon=eps; return true; }
00617 inline float64_t get_epsilon() { return epsilon; }
00618
00622 bool baum_welch_viterbi_train(BaumWelchViterbiType type);
00623
00630 void estimate_model_baum_welch(CHMM* train);
00631 void estimate_model_baum_welch_old(CHMM* train);
00632 void estimate_model_baum_welch_trans(CHMM* train);
00633
00634 #ifdef USE_HMMPARALLEL_STRUCTURES
00635 void ab_buf_comp(
00636 float64_t* p_buf, float64_t* q_buf, float64_t* a_buf,
00637 float64_t* b_buf, int32_t dim) ;
00638 #endif
00639
00643 void estimate_model_baum_welch_defined(CHMM* train);
00644
00648 void estimate_model_viterbi(CHMM* train);
00649
00653 void estimate_model_viterbi_defined(CHMM* train);
00654
00656
00658 bool linear_train(bool right_align=false);
00659
00661 bool permutation_entropy(int32_t window_width, int32_t sequence_number);
00662
00669 void output_model(bool verbose=false);
00670
00672 void output_model_defined(bool verbose=false);
00674
00675
00678
00680 void normalize(bool keep_dead_states=false);
00681
00685 void add_states(int32_t num_states, float64_t default_val=0);
00686
00692 bool append_model(
00693 CHMM* append_model, float64_t* cur_out, float64_t* app_out);
00694
00698 bool append_model(CHMM* append_model);
00699
00701 void chop(float64_t value);
00702
00704 void convert_to_log();
00705
00707 void init_model_random();
00708
00714 void init_model_defined();
00715
00717 void clear_model();
00718
00720 void clear_model_defined();
00721
00723 void copy_model(CHMM* l);
00724
00729 void invalidate_model();
00730
00734 inline bool get_status() const
00735 {
00736 return status;
00737 }
00738
00740 inline float64_t get_pseudo() const
00741 {
00742 return PSEUDO ;
00743 }
00744
00746 inline void set_pseudo(float64_t pseudo)
00747 {
00748 PSEUDO=pseudo ;
00749 }
00750
00751 #ifdef USE_HMMPARALLEL_STRUCTURES
00752
00753 static void* bw_dim_prefetch(void * params);
00754 #ifndef NOVIT
00755 static void* vit_dim_prefetch(void * params);
00756 #endif
00757
00761 float64_t prefetch(
00762 int32_t dim, bool bw, float64_t* p_buff=NULL,
00763 float64_t* q_buf=NULL, float64_t* a_buf=NULL,
00764 float64_t* b_buf=NULL) ;
00765 #endif
00766
00767 #ifdef FIX_POS
00768
00771 inline bool set_fix_pos_state(int32_t pos, T_STATES state, char value)
00772 {
00773 if (!model)
00774 return false ;
00775 model->set_fix_pos_state(pos, state, N, value) ;
00776 return true ;
00777 } ;
00778 #endif
00779
00780
00789 void set_observations(CStringFeatures<uint16_t>* obs, CHMM* lambda=NULL);
00790
00794 void set_observation_nocache(CStringFeatures<uint16_t>* obs);
00795
00797 inline CStringFeatures<uint16_t>* get_observations()
00798 {
00799 return p_observations;
00800 }
00802
00870 bool load_definitions(FILE* file, bool verbose, bool initialize=true);
00871
00907 bool load_model(FILE* file);
00908
00912 bool save_model(FILE* file);
00913
00917 bool save_model_derivatives(FILE* file);
00918
00922 bool save_model_derivatives_bin(FILE* file);
00923
00927 bool save_model_bin(FILE* file);
00928
00930 bool check_model_derivatives() ;
00931 bool check_model_derivatives_combined() ;
00932 #ifndef NOVIT
00933
00938 T_STATES* get_path(int32_t dim, float64_t& prob);
00939
00943 bool save_path(FILE* file);
00944
00948 bool save_path_derivatives(FILE* file);
00949
00953 bool save_path_derivatives_bin(FILE* file);
00954
00955 #ifdef USE_HMMDEBUG
00957 bool check_path_derivatives() ;
00958 #endif //USE_HMMDEBUG
00959 #endif //NOVIT
00960
00964 bool save_likelihood_bin(FILE* file);
00965
00969 bool save_likelihood(FILE* file);
00971
00977
00979 inline T_STATES get_N() const { return N ; }
00980
00982 inline int32_t get_M() const { return M ; }
00983
00988 inline void set_q(T_STATES offset, float64_t value)
00989 {
00990 #ifdef HMM_DEBUG
00991 if (offset>=N)
00992 SG_DEBUG("index out of range in set_q(%i,%e) [%i]\n", offset,value,N) ;
00993 #endif
00994 end_state_distribution_q[offset]=value;
00995 }
00996
01001 inline void set_p(T_STATES offset, float64_t value)
01002 {
01003 #ifdef HMM_DEBUG
01004 if (offset>=N)
01005 SG_DEBUG("index out of range in set_p(%i,.) [%i]\n", offset,N) ;
01006 #endif
01007 initial_state_distribution_p[offset]=value;
01008 }
01009
01015 inline void set_A(T_STATES line_, T_STATES column, float64_t value)
01016 {
01017 #ifdef HMM_DEBUG
01018 if ((line_>N)||(column>N))
01019 SG_DEBUG("index out of range in set_A(%i,%i,.) [%i,%i]\n",line_,column,N,N) ;
01020 #endif
01021 transition_matrix_A[line_+column*N]=value;
01022 }
01023
01029 inline void set_a(T_STATES line_, T_STATES column, float64_t value)
01030 {
01031 #ifdef HMM_DEBUG
01032 if ((line_>N)||(column>N))
01033 SG_DEBUG("index out of range in set_a(%i,%i,.) [%i,%i]\n",line_,column,N,N) ;
01034 #endif
01035 transition_matrix_a[line_+column*N]=value;
01036 }
01037
01043 inline void set_B(T_STATES line_, uint16_t column, float64_t value)
01044 {
01045 #ifdef HMM_DEBUG
01046 if ((line_>=N)||(column>=M))
01047 SG_DEBUG("index out of range in set_B(%i,%i) [%i,%i]\n", line_, column,N,M) ;
01048 #endif
01049 observation_matrix_B[line_*M+column]=value;
01050 }
01051
01057 inline void set_b(T_STATES line_, uint16_t column, float64_t value)
01058 {
01059 #ifdef HMM_DEBUG
01060 if ((line_>=N)||(column>=M))
01061 SG_DEBUG("index out of range in set_b(%i,%i) [%i,%i]\n", line_, column,N,M) ;
01062 #endif
01063 observation_matrix_b[line_*M+column]=value;
01064 }
01065
01066 #ifndef NOVIT
01067
01073 inline void set_psi(
01074 int32_t time, T_STATES state, T_STATES value, int32_t dimension)
01075 {
01076 #ifdef HMM_DEBUG
01077 if ((time>=p_observations->get_max_vector_length())||(state>N))
01078 SG_DEBUG("index out of range in set_psi(%i,%i,.) [%i,%i]\n",time,state,p_observations->get_max_vector_length(),N) ;
01079 #endif
01080 STATES_PER_OBSERVATION_PSI(dimension)[time*N+state]=value;
01081 }
01082 #endif // NOVIT
01083
01088 inline float64_t get_q(T_STATES offset) const
01089 {
01090 #ifdef HMM_DEBUG
01091 if (offset>=N)
01092 SG_DEBUG("index out of range in %e=get_q(%i) [%i]\n", end_state_distribution_q[offset],offset,N) ;
01093 #endif
01094 return end_state_distribution_q[offset];
01095 }
01096
01101 inline float64_t get_p(T_STATES offset) const
01102 {
01103 #ifdef HMM_DEBUG
01104 if (offset>=N)
01105 SG_DEBUG("index out of range in get_p(%i,.) [%i]\n", offset,N) ;
01106 #endif
01107 return initial_state_distribution_p[offset];
01108 }
01109
01115 inline float64_t get_A(T_STATES line_, T_STATES column) const
01116 {
01117 #ifdef HMM_DEBUG
01118 if ((line_>N)||(column>N))
01119 SG_DEBUG("index out of range in get_A(%i,%i) [%i,%i]\n",line_,column,N,N) ;
01120 #endif
01121 return transition_matrix_A[line_+column*N];
01122 }
01123
01129 inline float64_t get_a(T_STATES line_, T_STATES column) const
01130 {
01131 #ifdef HMM_DEBUG
01132 if ((line_>N)||(column>N))
01133 SG_DEBUG("index out of range in get_a(%i,%i) [%i,%i]\n",line_,column,N,N) ;
01134 #endif
01135 return transition_matrix_a[line_+column*N];
01136 }
01137
01143 inline float64_t get_B(T_STATES line_, uint16_t column) const
01144 {
01145 #ifdef HMM_DEBUG
01146 if ((line_>=N)||(column>=M))
01147 SG_DEBUG("index out of range in get_B(%i,%i) [%i,%i]\n", line_, column,N,M) ;
01148 #endif
01149 return observation_matrix_B[line_*M+column];
01150 }
01151
01157 inline float64_t get_b(T_STATES line_, uint16_t column) const
01158 {
01159 #ifdef HMM_DEBUG
01160 if ((line_>=N)||(column>=M))
01161 SG_DEBUG("index out of range in get_b(%i,%i) [%i,%i]\n", line_, column,N,M) ;
01162 #endif
01163
01164 return observation_matrix_b[line_*M+column];
01165 }
01166
01167 #ifndef NOVIT
01168
01174 inline T_STATES get_psi(
01175 int32_t time, T_STATES state, int32_t dimension) const
01176 {
01177 #ifdef HMM_DEBUG
01178 if ((time>=p_observations->get_max_vector_length())||(state>N))
01179 SG_DEBUG("index out of range in get_psi(%i,%i) [%i,%i]\n",time,state,p_observations->get_max_vector_length(),N) ;
01180 #endif
01181 return STATES_PER_OBSERVATION_PSI(dimension)[time*N+state];
01182 }
01183 #endif //NOVIT
01184
01185 protected:
01190
01191 int32_t M;
01192
01194 int32_t N;
01195
01197 float64_t PSEUDO;
01198
01199
01200 int32_t line;
01201
01203 CStringFeatures<uint16_t>* p_observations;
01204
01205
01206 CModel* model;
01207
01209 float64_t* transition_matrix_A;
01210
01212 float64_t* observation_matrix_B;
01213
01215 float64_t* transition_matrix_a;
01216
01218 float64_t* initial_state_distribution_p;
01219
01221 float64_t* end_state_distribution_q;
01222
01224 float64_t* observation_matrix_b;
01225
01227 int32_t iterations;
01228 int32_t iteration_count;
01229
01231 float64_t epsilon;
01232 int32_t conv_it;
01233
01234 #ifndef NOVIT
01236 float64_t all_pat_prob;
01237
01239 float64_t pat_prob;
01240 #endif // NOVIT
01242 float64_t mod_prob;
01243
01245 bool mod_prob_updated;
01246 #ifndef NOVIT
01248 bool all_path_prob_updated;
01249
01251 int32_t path_deriv_dimension;
01252
01254 bool path_deriv_updated;
01255 #endif // NOVIT
01256
01257
01258 bool loglikelihood;
01259
01260
01261 bool status;
01262
01263
01264 bool reused_caches;
01266
01267 #ifdef USE_HMMPARALLEL_STRUCTURES
01268
01269 float64_t** arrayN1 ;
01271 float64_t** arrayN2 ;
01272 #else //USE_HMMPARALLEL_STRUCTURES
01273
01274 float64_t* arrayN1;
01276 float64_t* arrayN2;
01277 #endif //USE_HMMPARALLEL_STRUCTURES
01278
01279 #ifdef USE_LOGSUMARRAY
01280 #ifdef USE_HMMPARALLEL_STRUCTURES
01281
01282 float64_t** arrayS ;
01283 #else
01284
01285 float64_t* arrayS;
01286 #endif // USE_HMMPARALLEL_STRUCTURES
01287 #endif // USE_LOGSUMARRAY
01288
01289 #ifdef USE_HMMPARALLEL_STRUCTURES
01290
01292 T_ALPHA_BETA* alpha_cache ;
01294 T_ALPHA_BETA* beta_cache ;
01295
01296 #ifndef NOVIT
01298 T_STATES** states_per_observation_psi ;
01299
01301 T_STATES** path ;
01302
01304 bool* path_prob_updated ;
01305
01307 int32_t* path_prob_dimension ;
01308 #endif //NOVIT
01309
01310 #else //USE_HMMPARALLEL_STRUCTURES
01312 T_ALPHA_BETA alpha_cache;
01314 T_ALPHA_BETA beta_cache;
01315
01316 #ifndef NOVIT
01318 T_STATES* states_per_observation_psi;
01319
01321 T_STATES* path;
01322
01324 bool path_prob_updated;
01325
01327 int32_t path_prob_dimension;
01328 #endif // NOVIT
01329
01330 #endif //USE_HMMPARALLEL_STRUCTURES
01331
01332
01334 static const int32_t GOTN;
01336 static const int32_t GOTM;
01338 static const int32_t GOTO;
01340 static const int32_t GOTa;
01342 static const int32_t GOTb;
01344 static const int32_t GOTp;
01346 static const int32_t GOTq;
01347
01349 static const int32_t GOTlearn_a;
01351 static const int32_t GOTlearn_b;
01353 static const int32_t GOTlearn_p;
01355 static const int32_t GOTlearn_q;
01357 static const int32_t GOTconst_a;
01359 static const int32_t GOTconst_b;
01361 static const int32_t GOTconst_p;
01363 static const int32_t GOTconst_q;
01364
01365 public:
01370
01372 inline float64_t state_probability(
01373 int32_t time, int32_t state, int32_t dimension)
01374 {
01375 return forward(time, state, dimension) + backward(time, state, dimension) - model_probability(dimension);
01376 }
01377
01379 inline float64_t transition_probability(
01380 int32_t time, int32_t state_i, int32_t state_j, int32_t dimension)
01381 {
01382 return forward(time, state_i, dimension) +
01383 backward(time+1, state_j, dimension) +
01384 get_a(state_i,state_j) + get_b(state_j,p_observations->get_feature(dimension ,time+1)) - model_probability(dimension);
01385 }
01386
01393
01396 inline float64_t linear_model_derivative(
01397 T_STATES i, uint16_t j, int32_t dimension)
01398 {
01399 float64_t der=0;
01400
01401 for (int32_t k=0; k<N; k++)
01402 {
01403 if (k!=i || p_observations->get_feature(dimension, k) != j)
01404 der+=get_b(k, p_observations->get_feature(dimension, k));
01405 }
01406
01407 return der;
01408 }
01409
01413 inline float64_t model_derivative_p(T_STATES i, int32_t dimension)
01414 {
01415 return backward(0,i,dimension)+get_b(i, p_observations->get_feature(dimension, 0));
01416 }
01417
01421 inline float64_t model_derivative_q(T_STATES i, int32_t dimension)
01422 {
01423 return forward(p_observations->get_vector_length(dimension)-1,i,dimension) ;
01424 }
01425
01427 inline float64_t model_derivative_a(T_STATES i, T_STATES j, int32_t dimension)
01428 {
01429 float64_t sum=-CMath::INFTY;
01430 for (int32_t t=0; t<p_observations->get_vector_length(dimension)-1; t++)
01431 sum= CMath::logarithmic_sum(sum, forward(t, i, dimension) + backward(t+1, j, dimension) + get_b(j, p_observations->get_feature(dimension,t+1)));
01432
01433 return sum;
01434 }
01435
01436
01438 inline float64_t model_derivative_b(T_STATES i, uint16_t j, int32_t dimension)
01439 {
01440 float64_t sum=-CMath::INFTY;
01441 for (int32_t t=0; t<p_observations->get_vector_length(dimension); t++)
01442 {
01443 if (p_observations->get_feature(dimension,t)==j)
01444 sum= CMath::logarithmic_sum(sum, forward(t,i,dimension)+backward(t,i,dimension)-get_b(i,p_observations->get_feature(dimension,t)));
01445 }
01446
01447
01448 return sum;
01449 }
01451
01452 #ifndef NOVIT
01453
01459
01461 inline float64_t path_derivative_p(T_STATES i, int32_t dimension)
01462 {
01463 best_path(dimension);
01464 return (i==PATH(dimension)[0]) ? (exp(-get_p(PATH(dimension)[0]))) : (0) ;
01465 }
01466
01468 inline float64_t path_derivative_q(T_STATES i, int32_t dimension)
01469 {
01470 best_path(dimension);
01471 return (i==PATH(dimension)[p_observations->get_vector_length(dimension)-1]) ? (exp(-get_q(PATH(dimension)[p_observations->get_vector_length(dimension)-1]))) : 0 ;
01472 }
01473
01475 inline float64_t path_derivative_a(T_STATES i, T_STATES j, int32_t dimension)
01476 {
01477 prepare_path_derivative(dimension) ;
01478 return (get_A(i,j)==0) ? (0) : (get_A(i,j)*exp(-get_a(i,j))) ;
01479 }
01480
01482 inline float64_t path_derivative_b(T_STATES i, uint16_t j, int32_t dimension)
01483 {
01484 prepare_path_derivative(dimension) ;
01485 return (get_B(i,j)==0) ? (0) : (get_B(i,j)*exp(-get_b(i,j))) ;
01486 }
01487
01489
01490
01491 protected:
01496
01497 bool get_numbuffer(FILE* file, char* buffer, int32_t length);
01498
01500 void open_bracket(FILE* file);
01501
01503 void close_bracket(FILE* file);
01504
01506 bool comma_or_space(FILE* file);
01507
01509 inline void error(int32_t p_line, const char* str)
01510 {
01511 if (p_line)
01512 SG_ERROR( "error in line %d %s\n", p_line, str);
01513 else
01514 SG_ERROR( "error %s\n", str);
01515 }
01517
01519 inline void prepare_path_derivative(int32_t dim)
01520 {
01521 if (path_deriv_updated && (path_deriv_dimension==dim))
01522 return ;
01523 int32_t i,j,t ;
01524 best_path(dim);
01525
01526 for (i=0; i<N; i++)
01527 {
01528 for (j=0; j<N; j++)
01529 set_A(i,j, 0);
01530 for (j=0; j<M; j++)
01531 set_B(i,j, 0);
01532 }
01533
01534
01535 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
01536 {
01537 set_A(PATH(dim)[t], PATH(dim)[t+1], get_A(PATH(dim)[t], PATH(dim)[t+1])+1);
01538 set_B(PATH(dim)[t], p_observations->get_feature(dim,t), get_B(PATH(dim)[t], p_observations->get_feature(dim,t))+1);
01539 }
01540 set_B(PATH(dim)[p_observations->get_vector_length(dim)-1], p_observations->get_feature(dim,p_observations->get_vector_length(dim)-1), get_B(PATH(dim)[p_observations->get_vector_length(dim)-1], p_observations->get_feature(dim,p_observations->get_vector_length(dim)-1)) + 1);
01541 path_deriv_dimension=dim ;
01542 path_deriv_updated=true ;
01543 } ;
01544 #endif // NOVIT
01545
01546
01548 inline float64_t forward(int32_t time, int32_t state, int32_t dimension)
01549 {
01550 if (time<1)
01551 time=0;
01552
01553 if (ALPHA_CACHE(dimension).table && (dimension==ALPHA_CACHE(dimension).dimension) && ALPHA_CACHE(dimension).updated)
01554 {
01555 if (time<p_observations->get_vector_length(dimension))
01556 return ALPHA_CACHE(dimension).table[time*N+state];
01557 else
01558 return ALPHA_CACHE(dimension).sum;
01559 }
01560 else
01561 {
01562
01563 return forward_comp(time, state, dimension) ;
01564 } ;
01565 } ;
01566
01568 inline float64_t backward(int32_t time, int32_t state, int32_t dimension)
01569 {
01570 if (BETA_CACHE(dimension).table && (dimension==BETA_CACHE(dimension).dimension) && (BETA_CACHE(dimension).updated))
01571 {
01572 if (time<0)
01573 return BETA_CACHE(dimension).sum;
01574 if (time<p_observations->get_vector_length(dimension))
01575 return BETA_CACHE(dimension).table[time*N+state];
01576 else
01577 return -CMath::INFTY;
01578 }
01579 else
01580 {
01581
01582 return backward_comp(time, state, dimension) ;
01583 } ;
01584 } ;
01585
01586 };
01587 #endif