00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00015
00016 #include "distributions/hmm/HMM.h"
00017 #include "lib/Mathematics.h"
00018 #include "lib/io.h"
00019 #include "lib/config.h"
00020 #include "base/Parallel.h"
00021 #include "features/StringFeatures.h"
00022 #include "features/CharFeatures.h"
00023 #include "features/Alphabet.h"
00024
00025 #include <stdlib.h>
00026 #include <stdio.h>
00027 #include <time.h>
00028 #include <ctype.h>
00029
00030 #define VAL_MACRO log((default_value == 0) ? (CMath::random(MIN_RAND, MAX_RAND)) : default_value)
00031 #define ARRAY_SIZE 65336
00032
00033 #ifdef SUNOS
00034 extern "C" int finite(double);
00035 #endif
00036
00038
00040
00041 const int32_t CHMM::GOTN= (1<<1);
00042 const int32_t CHMM::GOTM= (1<<2);
00043 const int32_t CHMM::GOTO= (1<<3);
00044 const int32_t CHMM::GOTa= (1<<4);
00045 const int32_t CHMM::GOTb= (1<<5);
00046 const int32_t CHMM::GOTp= (1<<6);
00047 const int32_t CHMM::GOTq= (1<<7);
00048
00049 const int32_t CHMM::GOTlearn_a= (1<<1);
00050 const int32_t CHMM::GOTlearn_b= (1<<2);
00051 const int32_t CHMM::GOTlearn_p= (1<<3);
00052 const int32_t CHMM::GOTlearn_q= (1<<4);
00053 const int32_t CHMM::GOTconst_a= (1<<5);
00054 const int32_t CHMM::GOTconst_b= (1<<6);
00055 const int32_t CHMM::GOTconst_p= (1<<7);
00056 const int32_t CHMM::GOTconst_q= (1<<8);
00057
00058 enum E_STATE
00059 {
00060 INITIAL,
00061 ARRAYs,
00062 GET_N,
00063 GET_M,
00064 GET_a,
00065 GET_b,
00066 GET_p,
00067 GET_q,
00068 GET_learn_a,
00069 GET_learn_b,
00070 GET_learn_p,
00071 GET_learn_q,
00072 GET_const_a,
00073 GET_const_b,
00074 GET_const_p,
00075 GET_const_q,
00076 COMMENT,
00077 END
00078 };
00079
00080
00081 #ifdef FIX_POS
00082 const char CModel::FIX_DISALLOWED=0 ;
00083 const char CModel::FIX_ALLOWED=1 ;
00084 const char CModel::FIX_DEFAULT=-1 ;
00085 const float64_t CModel::DISALLOWED_PENALTY=CMath::ALMOST_NEG_INFTY ;
00086 #endif
00087
00088 CModel::CModel()
00089 {
00090 const_a=new int[ARRAY_SIZE];
00091 const_b=new int[ARRAY_SIZE];
00092 const_p=new int[ARRAY_SIZE];
00093 const_q=new int[ARRAY_SIZE];
00094 const_a_val=new float64_t[ARRAY_SIZE];
00095 const_b_val=new float64_t[ARRAY_SIZE];
00096 const_p_val=new float64_t[ARRAY_SIZE];
00097 const_q_val=new float64_t[ARRAY_SIZE];
00098
00099
00100 learn_a=new int[ARRAY_SIZE];
00101 learn_b=new int[ARRAY_SIZE];
00102 learn_p=new int[ARRAY_SIZE];
00103 learn_q=new int[ARRAY_SIZE];
00104
00105 #ifdef FIX_POS
00106 fix_pos_state = new char[ARRAY_SIZE];
00107 #endif
00108 for (int32_t i=0; i<ARRAY_SIZE; i++)
00109 {
00110 const_a[i]=-1 ;
00111 const_b[i]=-1 ;
00112 const_p[i]=-1 ;
00113 const_q[i]=-1 ;
00114 const_a_val[i]=1.0 ;
00115 const_b_val[i]=1.0 ;
00116 const_p_val[i]=1.0 ;
00117 const_q_val[i]=1.0 ;
00118 learn_a[i]=-1 ;
00119 learn_b[i]=-1 ;
00120 learn_p[i]=-1 ;
00121 learn_q[i]=-1 ;
00122 #ifdef FIX_POS
00123 fix_pos_state[i] = FIX_DEFAULT ;
00124 #endif
00125 } ;
00126 }
00127
00128 CModel::~CModel()
00129 {
00130 delete[] const_a;
00131 delete[] const_b;
00132 delete[] const_p;
00133 delete[] const_q;
00134 delete[] const_a_val;
00135 delete[] const_b_val;
00136 delete[] const_p_val;
00137 delete[] const_q_val;
00138
00139 delete[] learn_a;
00140 delete[] learn_b;
00141 delete[] learn_p;
00142 delete[] learn_q;
00143
00144 #ifdef FIX_POS
00145 delete[] fix_pos_state;
00146 #endif
00147
00148 }
00149
00150 CHMM::CHMM(CHMM* h)
00151 : CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
00152 {
00153 SG_INFO( "hmm is using %i separate tables\n", parallel.get_num_threads()) ;
00154
00155 this->N=h->get_N();
00156 this->M=h->get_M();
00157 status=initialize(NULL, h->get_pseudo());
00158 this->copy_model(h);
00159 set_observations(h->get_observations());
00160 }
00161
00162 CHMM::CHMM(int32_t p_N, int32_t p_M, CModel* p_model, float64_t p_PSEUDO)
00163 : CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
00164 {
00165 this->N=p_N;
00166 this->M=p_M;
00167 model=NULL ;
00168
00169 SG_INFO( "hmm is using %i separate tables\n", parallel.get_num_threads()) ;
00170
00171 status=initialize(p_model, p_PSEUDO);
00172 }
00173
00174 CHMM::CHMM(
00175 CStringFeatures<uint16_t>* obs, int32_t p_N, int32_t p_M,
00176 float64_t p_PSEUDO)
00177 : CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
00178 {
00179 this->N=p_N;
00180 this->M=p_M;
00181 model=NULL ;
00182
00183 SG_INFO( "hmm is using %i separate tables\n", parallel.get_num_threads()) ;
00184
00185 initialize(model, p_PSEUDO);
00186 set_observations(obs);
00187 }
00188
00189 CHMM::CHMM(int32_t p_N, float64_t* p, float64_t* q, float64_t* a)
00190 : CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
00191 {
00192 this->N=p_N;
00193 this->M=0;
00194 model=NULL ;
00195
00196 trans_list_forward = NULL ;
00197 trans_list_forward_cnt = NULL ;
00198 trans_list_forward_val = NULL ;
00199 trans_list_backward = NULL ;
00200 trans_list_backward_cnt = NULL ;
00201 trans_list_len = 0 ;
00202 mem_initialized = false ;
00203
00204 this->transition_matrix_a=NULL;
00205 this->observation_matrix_b=NULL;
00206 this->initial_state_distribution_p=NULL;
00207 this->end_state_distribution_q=NULL;
00208 this->PSEUDO= PSEUDO;
00209 this->model= model;
00210 this->p_observations=NULL;
00211 this->reused_caches=false;
00212
00213 this->alpha_cache.table=NULL;
00214 this->beta_cache.table=NULL;
00215 this->alpha_cache.dimension=0;
00216 this->beta_cache.dimension=0;
00217 #ifndef NOVIT
00218 this->states_per_observation_psi=NULL ;
00219 this->path=NULL;
00220 #endif //NOVIT
00221 arrayN1=NULL ;
00222 arrayN2=NULL ;
00223
00224 this->loglikelihood=false;
00225 mem_initialized = true ;
00226
00227 transition_matrix_a=a ;
00228 observation_matrix_b=NULL ;
00229 initial_state_distribution_p=p ;
00230 end_state_distribution_q=q ;
00231 transition_matrix_A=NULL ;
00232 observation_matrix_B=NULL ;
00233
00234
00235 }
00236
00237 CHMM::CHMM(
00238 int32_t p_N, float64_t* p, float64_t* q, int32_t num_trans,
00239 float64_t* a_trans)
00240 : CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
00241 {
00242 model=NULL ;
00243
00244 this->N=p_N;
00245 this->M=0;
00246
00247 trans_list_forward = NULL ;
00248 trans_list_forward_cnt = NULL ;
00249 trans_list_forward_val = NULL ;
00250 trans_list_backward = NULL ;
00251 trans_list_backward_cnt = NULL ;
00252 trans_list_len = 0 ;
00253 mem_initialized = false ;
00254
00255 this->transition_matrix_a=NULL;
00256 this->observation_matrix_b=NULL;
00257 this->initial_state_distribution_p=NULL;
00258 this->end_state_distribution_q=NULL;
00259 this->PSEUDO= PSEUDO;
00260 this->model= model;
00261 this->p_observations=NULL;
00262 this->reused_caches=false;
00263
00264 this->alpha_cache.table=NULL;
00265 this->beta_cache.table=NULL;
00266 this->alpha_cache.dimension=0;
00267 this->beta_cache.dimension=0;
00268 #ifndef NOVIT
00269 this->states_per_observation_psi=NULL ;
00270 this->path=NULL;
00271 #endif //NOVIT
00272 arrayN1=NULL ;
00273 arrayN2=NULL ;
00274
00275 this->loglikelihood=false;
00276 mem_initialized = true ;
00277
00278 trans_list_forward_cnt=NULL ;
00279 trans_list_len = N ;
00280 trans_list_forward = new T_STATES*[N] ;
00281 trans_list_forward_val = new float64_t*[N] ;
00282 trans_list_forward_cnt = new T_STATES[N] ;
00283
00284 int32_t start_idx=0;
00285 for (int32_t j=0; j<N; j++)
00286 {
00287 int32_t old_start_idx=start_idx;
00288
00289 while (start_idx<num_trans && a_trans[start_idx+num_trans]==j)
00290 {
00291 start_idx++;
00292
00293 if (start_idx>1 && start_idx<num_trans)
00294 ASSERT(a_trans[start_idx+num_trans-1]<=
00295 a_trans[start_idx+num_trans]);
00296 }
00297
00298 if (start_idx>1 && start_idx<num_trans)
00299 ASSERT(a_trans[start_idx+num_trans-1]<=
00300 a_trans[start_idx+num_trans]);
00301
00302 int32_t len=start_idx-old_start_idx;
00303 ASSERT(len>=0);
00304
00305 trans_list_forward_cnt[j] = 0 ;
00306
00307 if (len>0)
00308 {
00309 trans_list_forward[j] = new T_STATES[len] ;
00310 trans_list_forward_val[j] = new float64_t[len] ;
00311 }
00312 else
00313 {
00314 trans_list_forward[j] = NULL;
00315 trans_list_forward_val[j] = NULL;
00316 }
00317 }
00318
00319 for (int32_t i=0; i<num_trans; i++)
00320 {
00321 int32_t from = (int32_t)a_trans[i+num_trans] ;
00322 int32_t to = (int32_t)a_trans[i] ;
00323 float64_t val = a_trans[i+num_trans*2] ;
00324
00325 ASSERT(from>=0 && from<N);
00326 ASSERT(to>=0 && to<N);
00327
00328 trans_list_forward[from][trans_list_forward_cnt[from]]=to ;
00329 trans_list_forward_val[from][trans_list_forward_cnt[from]]=val ;
00330 trans_list_forward_cnt[from]++ ;
00331
00332 } ;
00333
00334 transition_matrix_a=NULL ;
00335 observation_matrix_b=NULL ;
00336 initial_state_distribution_p=p ;
00337 end_state_distribution_q=q ;
00338 transition_matrix_A=NULL ;
00339 observation_matrix_B=NULL ;
00340
00341
00342 }
00343
00344
00345 CHMM::CHMM(FILE* model_file, float64_t p_PSEUDO)
00346 : CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
00347 {
00348 SG_INFO( "hmm is using %i separate tables\n", parallel.get_num_threads()) ;
00349
00350 status=initialize(NULL, p_PSEUDO, model_file);
00351 }
00352
00353 CHMM::~CHMM()
00354 {
00355 SG_UNREF(p_observations);
00356
00357 if (trans_list_forward_cnt)
00358 delete[] trans_list_forward_cnt ;
00359 if (trans_list_backward_cnt)
00360 delete[] trans_list_backward_cnt ;
00361 if (trans_list_forward)
00362 {
00363 for (int32_t i=0; i<trans_list_len; i++)
00364 if (trans_list_forward[i])
00365 delete[] trans_list_forward[i] ;
00366 delete[] trans_list_forward ;
00367 }
00368 if (trans_list_forward_val)
00369 {
00370 for (int32_t i=0; i<trans_list_len; i++)
00371 if (trans_list_forward_val[i])
00372 delete[] trans_list_forward_val[i] ;
00373 delete[] trans_list_forward_val ;
00374 }
00375 if (trans_list_backward)
00376 {
00377 for (int32_t i=0; i<trans_list_len; i++)
00378 if (trans_list_backward[i])
00379 delete[] trans_list_backward[i] ;
00380 delete[] trans_list_backward ;
00381 } ;
00382
00383 free_state_dependend_arrays();
00384
00385 if (!reused_caches)
00386 {
00387 #ifdef USE_HMMPARALLEL_STRUCTURES
00388 for (int32_t i=0; i<parallel.get_num_threads(); i++)
00389 {
00390 delete[] alpha_cache[i].table;
00391 delete[] beta_cache[i].table;
00392 alpha_cache[i].table=NULL;
00393 beta_cache[i].table=NULL;
00394 }
00395
00396 delete[] alpha_cache;
00397 delete[] beta_cache;
00398 alpha_cache=NULL;
00399 beta_cache=NULL;
00400 #else // USE_HMMPARALLEL_STRUCTURES
00401 delete[] alpha_cache.table;
00402 delete[] beta_cache.table;
00403 alpha_cache.table=NULL;
00404 beta_cache.table=NULL;
00405 #endif // USE_HMMPARALLEL_STRUCTURES
00406
00407 #ifndef NOVIT
00408 delete[] states_per_observation_psi;
00409 states_per_observation_psi=NULL;
00410 #endif // NOVIT
00411 }
00412
00413 #ifdef USE_LOGSUMARRAY
00414 #ifdef USE_HMMPARALLEL_STRUCTURES
00415 {
00416 for (int32_t i=0; i<parallel.get_num_threads(); i++)
00417 delete[] arrayS[i];
00418 delete[] arrayS ;
00419 } ;
00420 #else //USE_HMMPARALLEL_STRUCTURES
00421 delete[] arrayS;
00422 #endif //USE_HMMPARALLEL_STRUCTURES
00423 #endif //USE_LOGSUMARRAY
00424
00425 if (!reused_caches)
00426 {
00427 #ifndef NOVIT
00428 #ifdef USE_HMMPARALLEL_STRUCTURES
00429 delete[] path_prob_updated ;
00430 delete[] path_prob_dimension ;
00431 for (int32_t i=0; i<parallel.get_num_threads(); i++)
00432 delete[] path[i] ;
00433 #endif //USE_HMMPARALLEL_STRUCTURES
00434 delete[] path;
00435 #endif
00436 }
00437 }
00438
00439 bool CHMM::alloc_state_dependend_arrays()
00440 {
00441
00442 if (!transition_matrix_a && !observation_matrix_b &&
00443 !initial_state_distribution_p && !end_state_distribution_q)
00444 {
00445 transition_matrix_a=new float64_t[N*N];
00446 observation_matrix_b=new float64_t[N*M];
00447 initial_state_distribution_p=new float64_t[N];
00448 end_state_distribution_q=new float64_t[N];
00449 init_model_random();
00450 convert_to_log();
00451 }
00452
00453 #ifdef USE_HMMPARALLEL_STRUCTURES
00454 for (int32_t i=0; i<parallel.get_num_threads(); i++)
00455 {
00456 arrayN1[i]=new float64_t[N];
00457 arrayN2[i]=new float64_t[N];
00458 }
00459 #else //USE_HMMPARALLEL_STRUCTURES
00460 arrayN1=new float64_t[N];
00461 arrayN2=new float64_t[N];
00462 #endif //USE_HMMPARALLEL_STRUCTURES
00463
00464 #ifdef LOG_SUMARRAY
00465 #ifdef USE_HMMPARALLEL_STRUCTURES
00466 for (int32_t i=0; i<parallel.get_num_threads(); i++)
00467 arrayS[i]=new float64_t[(int32_t)(this->N/2+1)];
00468 #else //USE_HMMPARALLEL_STRUCTURES
00469 arrayS=new float64_t[(int32_t)(this->N/2+1)];
00470 #endif //USE_HMMPARALLEL_STRUCTURES
00471 #endif //LOG_SUMARRAY
00472 transition_matrix_A=new float64_t[this->N*this->N];
00473 observation_matrix_B=new float64_t[this->N*this->M];
00474
00475 if (p_observations)
00476 {
00477 #ifdef USE_HMMPARALLEL_STRUCTURES
00478 if (alpha_cache[0].table!=NULL)
00479 #else //USE_HMMPARALLEL_STRUCTURES
00480 if (alpha_cache.table!=NULL)
00481 #endif //USE_HMMPARALLEL_STRUCTURES
00482 set_observations(p_observations);
00483 else
00484 set_observation_nocache(p_observations);
00485 }
00486
00487 this->invalidate_model();
00488
00489 return ((transition_matrix_A != NULL) && (observation_matrix_B != NULL) &&
00490 (transition_matrix_a != NULL) && (observation_matrix_b != NULL) &&
00491 (initial_state_distribution_p != NULL) &&
00492 (end_state_distribution_q != NULL));
00493 }
00494
00495 void CHMM::free_state_dependend_arrays()
00496 {
00497 #ifdef USE_HMMPARALLEL_STRUCTURES
00498 for (int32_t i=0; i<parallel.get_num_threads(); i++)
00499 {
00500 delete[] arrayN1[i];
00501 delete[] arrayN2[i];
00502
00503 arrayN1[i]=NULL;
00504 arrayN2[i]=NULL;
00505 }
00506 #else
00507 delete[] arrayN1;
00508 delete[] arrayN2;
00509 arrayN1=NULL;
00510 arrayN2=NULL;
00511 #endif
00512 if (observation_matrix_b)
00513 {
00514 delete[] transition_matrix_A;
00515 delete[] observation_matrix_B;
00516 delete[] transition_matrix_a;
00517 delete[] observation_matrix_b;
00518 delete[] initial_state_distribution_p;
00519 delete[] end_state_distribution_q;
00520 } ;
00521
00522 transition_matrix_A=NULL;
00523 observation_matrix_B=NULL;
00524 transition_matrix_a=NULL;
00525 observation_matrix_b=NULL;
00526 initial_state_distribution_p=NULL;
00527 end_state_distribution_q=NULL;
00528 }
00529
00530 bool CHMM::initialize(CModel* m, float64_t pseudo, FILE* modelfile)
00531 {
00532
00533 bool files_ok=true;
00534
00535 trans_list_forward = NULL ;
00536 trans_list_forward_cnt = NULL ;
00537 trans_list_forward_val = NULL ;
00538 trans_list_backward = NULL ;
00539 trans_list_backward_cnt = NULL ;
00540 trans_list_len = 0 ;
00541 mem_initialized = false ;
00542
00543 this->transition_matrix_a=NULL;
00544 this->observation_matrix_b=NULL;
00545 this->initial_state_distribution_p=NULL;
00546 this->end_state_distribution_q=NULL;
00547 this->PSEUDO= pseudo;
00548 this->model= m;
00549 this->p_observations=NULL;
00550 this->reused_caches=false;
00551
00552 #ifdef USE_HMMPARALLEL_STRUCTURES
00553 alpha_cache=new T_ALPHA_BETA[parallel.get_num_threads()] ;
00554 beta_cache=new T_ALPHA_BETA[parallel.get_num_threads()] ;
00555 states_per_observation_psi=new P_STATES[parallel.get_num_threads()] ;
00556
00557 for (int32_t i=0; i<parallel.get_num_threads(); i++)
00558 {
00559 this->alpha_cache[i].table=NULL;
00560 this->beta_cache[i].table=NULL;
00561 this->alpha_cache[i].dimension=0;
00562 this->beta_cache[i].dimension=0;
00563 #ifndef NOVIT
00564 this->states_per_observation_psi[i]=NULL ;
00565 #endif // NOVIT
00566 }
00567
00568 #else // USE_HMMPARALLEL_STRUCTURES
00569 this->alpha_cache.table=NULL;
00570 this->beta_cache.table=NULL;
00571 this->alpha_cache.dimension=0;
00572 this->beta_cache.dimension=0;
00573 #ifndef NOVIT
00574 this->states_per_observation_psi=NULL ;
00575 #endif //NOVIT
00576
00577 #endif //USE_HMMPARALLEL_STRUCTURES
00578
00579
00580 if (modelfile)
00581 files_ok= files_ok && load_model(modelfile);
00582
00583 #ifdef USE_HMMPARALLEL_STRUCTURES
00584 path_prob_updated=new bool[parallel.get_num_threads()];
00585 path_prob_dimension=new int[parallel.get_num_threads()];
00586
00587 path=new P_STATES[parallel.get_num_threads()];
00588
00589 for (int32_t i=0; i<parallel.get_num_threads(); i++)
00590 {
00591 #ifndef NOVIT
00592 this->path[i]=NULL;
00593 #endif // NOVIT
00594 }
00595 #else // USE_HMMPARALLEL_STRUCTURES
00596 #ifndef NOVIT
00597 this->path=NULL;
00598 #endif //NOVIT
00599
00600 #endif //USE_HMMPARALLEL_STRUCTURES
00601
00602 #ifdef USE_HMMPARALLEL_STRUCTURES
00603 arrayN1=new P_float64_t[parallel.get_num_threads()];
00604 arrayN2=new P_float64_t[parallel.get_num_threads()];
00605 #endif //USE_HMMPARALLEL_STRUCTURES
00606
00607 #ifdef LOG_SUMARRAY
00608 #ifdef USE_HMMPARALLEL_STRUCTURES
00609 arrayS=new P_float64_t[parallel.get_num_threads()] ;
00610 #endif // USE_HMMPARALLEL_STRUCTURES
00611 #endif //LOG_SUMARRAY
00612
00613 alloc_state_dependend_arrays();
00614
00615 this->loglikelihood=false;
00616 mem_initialized = true ;
00617 this->invalidate_model();
00618
00619 return ((files_ok) &&
00620 (transition_matrix_A != NULL) && (observation_matrix_B != NULL) &&
00621 (transition_matrix_a != NULL) && (observation_matrix_b != NULL) && (initial_state_distribution_p != NULL) &&
00622 (end_state_distribution_q != NULL));
00623 }
00624
00625
00626
00627
00628
00629
00630 float64_t CHMM::forward_comp(int32_t time, int32_t state, int32_t dimension)
00631 {
00632 T_ALPHA_BETA_TABLE* alpha_new;
00633 T_ALPHA_BETA_TABLE* alpha;
00634 T_ALPHA_BETA_TABLE* dummy;
00635 if (time<1)
00636 time=0;
00637
00638
00639 int32_t wanted_time=time;
00640
00641 if (ALPHA_CACHE(dimension).table)
00642 {
00643 alpha=&ALPHA_CACHE(dimension).table[0];
00644 alpha_new=&ALPHA_CACHE(dimension).table[N];
00645 time=p_observations->get_vector_length(dimension)+1;
00646 }
00647 else
00648 {
00649 alpha_new=(T_ALPHA_BETA_TABLE*)ARRAYN1(dimension);
00650 alpha=(T_ALPHA_BETA_TABLE*)ARRAYN2(dimension);
00651 }
00652
00653 if (time<1)
00654 return get_p(state) + get_b(state, p_observations->get_feature(dimension,0));
00655 else
00656 {
00657
00658 for (int32_t i=0; i<N; i++)
00659 alpha[i] = get_p(i) + get_b(i, p_observations->get_feature(dimension,0)) ;
00660
00661
00662 for (register int32_t t=1; t<time && t < p_observations->get_vector_length(dimension); t++)
00663 {
00664
00665 for (int32_t j=0; j<N; j++)
00666 {
00667 register int32_t i, num = trans_list_forward_cnt[j] ;
00668 float64_t sum=-CMath::INFTY;
00669 for (i=0; i < num; i++)
00670 {
00671 int32_t ii = trans_list_forward[j][i] ;
00672 sum = CMath::logarithmic_sum(sum, alpha[ii] + get_a(ii,j));
00673 } ;
00674
00675 alpha_new[j]= sum + get_b(j, p_observations->get_feature(dimension,t));
00676 }
00677
00678 if (!ALPHA_CACHE(dimension).table)
00679 {
00680 dummy=alpha;
00681 alpha=alpha_new;
00682 alpha_new=dummy;
00683 }
00684 else
00685 {
00686 alpha=alpha_new;
00687 alpha_new+=N;
00688 }
00689 }
00690
00691
00692 if (time<p_observations->get_vector_length(dimension))
00693 {
00694 register int32_t i, num=trans_list_forward_cnt[state];
00695 register float64_t sum=-CMath::INFTY;
00696 for (i=0; i<num; i++)
00697 {
00698 int32_t ii = trans_list_forward[state][i] ;
00699 sum= CMath::logarithmic_sum(sum, alpha[ii] + get_a(ii, state));
00700 } ;
00701
00702 return sum + get_b(state, p_observations->get_feature(dimension,time));
00703 }
00704 else
00705 {
00706
00707 register int32_t i ;
00708 float64_t sum ;
00709 sum=-CMath::INFTY;
00710 for (i=0; i<N; i++)
00711 sum=CMath::logarithmic_sum(sum, alpha[i] + get_q(i));
00712
00713 if (!ALPHA_CACHE(dimension).table)
00714 return sum;
00715 else
00716 {
00717 ALPHA_CACHE(dimension).dimension=dimension;
00718 ALPHA_CACHE(dimension).updated=true;
00719 ALPHA_CACHE(dimension).sum=sum;
00720
00721 if (wanted_time<p_observations->get_vector_length(dimension))
00722 return ALPHA_CACHE(dimension).table[wanted_time*N+state];
00723 else
00724 return ALPHA_CACHE(dimension).sum;
00725 }
00726 }
00727 }
00728 }
00729
00730
00731
00732
00733
00734 float64_t CHMM::forward_comp_old(int32_t time, int32_t state, int32_t dimension)
00735 {
00736 T_ALPHA_BETA_TABLE* alpha_new;
00737 T_ALPHA_BETA_TABLE* alpha;
00738 T_ALPHA_BETA_TABLE* dummy;
00739 if (time<1)
00740 time=0;
00741
00742 int32_t wanted_time=time;
00743
00744 if (ALPHA_CACHE(dimension).table)
00745 {
00746 alpha=&ALPHA_CACHE(dimension).table[0];
00747 alpha_new=&ALPHA_CACHE(dimension).table[N];
00748 time=p_observations->get_vector_length(dimension)+1;
00749 }
00750 else
00751 {
00752 alpha_new=(T_ALPHA_BETA_TABLE*)ARRAYN1(dimension);
00753 alpha=(T_ALPHA_BETA_TABLE*)ARRAYN2(dimension);
00754 }
00755
00756 if (time<1)
00757 return get_p(state) + get_b(state, p_observations->get_feature(dimension,0));
00758 else
00759 {
00760
00761 for (int32_t i=0; i<N; i++)
00762 alpha[i] = get_p(i) + get_b(i, p_observations->get_feature(dimension,0)) ;
00763
00764
00765 for (register int32_t t=1; t<time && t < p_observations->get_vector_length(dimension); t++)
00766 {
00767
00768 for (int32_t j=0; j<N; j++)
00769 {
00770 register int32_t i ;
00771 #ifdef USE_LOGSUMARRAY
00772 for (i=0; i<(N>>1); i++)
00773 ARRAYS(dimension)[i]=CMath::logarithmic_sum(alpha[i<<1] + get_a(i<<1,j),
00774 alpha[(i<<1)+1] + get_a((i<<1)+1,j));
00775 if (N%2==1)
00776 alpha_new[j]=get_b(j, p_observations->get_feature(dimension,t))+
00777 CMath::logarithmic_sum(alpha[N-1]+get_a(N-1,j),
00778 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
00779 else
00780 alpha_new[j]=get_b(j, p_observations->get_feature(dimension,t))+CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
00781 #else //USE_LOGSUMARRAY
00782 float64_t sum=-CMath::INFTY;
00783 for (i=0; i<N; i++)
00784 sum= CMath::logarithmic_sum(sum, alpha[i] + get_a(i,j));
00785
00786 alpha_new[j]= sum + get_b(j, p_observations->get_feature(dimension,t));
00787 #endif //USE_LOGSUMARRAY
00788 }
00789
00790 if (!ALPHA_CACHE(dimension).table)
00791 {
00792 dummy=alpha;
00793 alpha=alpha_new;
00794 alpha_new=dummy;
00795 }
00796 else
00797 {
00798 alpha=alpha_new;
00799 alpha_new+=N;
00800 }
00801 }
00802
00803
00804 if (time<p_observations->get_vector_length(dimension))
00805 {
00806 register int32_t i;
00807 #ifdef USE_LOGSUMARRAY
00808 for (i=0; i<(N>>1); i++)
00809 ARRAYS(dimension)[i]=CMath::logarithmic_sum(alpha[i<<1] + get_a(i<<1,state),
00810 alpha[(i<<1)+1] + get_a((i<<1)+1,state));
00811 if (N%2==1)
00812 return get_b(state, p_observations->get_feature(dimension,time))+
00813 CMath::logarithmic_sum(alpha[N-1]+get_a(N-1,state),
00814 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
00815 else
00816 return get_b(state, p_observations->get_feature(dimension,time))+CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
00817 #else //USE_LOGSUMARRAY
00818 register float64_t sum=-CMath::INFTY;
00819 for (i=0; i<N; i++)
00820 sum= CMath::logarithmic_sum(sum, alpha[i] + get_a(i, state));
00821
00822 return sum + get_b(state, p_observations->get_feature(dimension,time));
00823 #endif //USE_LOGSUMARRAY
00824 }
00825 else
00826 {
00827
00828 register int32_t i ;
00829 float64_t sum ;
00830 #ifdef USE_LOGSUMARRAY
00831 for (i=0; i<(N>>1); i++)
00832 ARRAYS(dimension)[i]=CMath::logarithmic_sum(alpha[i<<1] + get_q(i<<1),
00833 alpha[(i<<1)+1] + get_q((i<<1)+1));
00834 if (N%2==1)
00835 sum=CMath::logarithmic_sum(alpha[N-1]+get_q(N-1),
00836 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
00837 else
00838 sum=CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
00839 #else //USE_LOGSUMARRAY
00840 sum=-CMath::INFTY;
00841 for (i=0; i<N; i++)
00842 sum=CMath::logarithmic_sum(sum, alpha[i] + get_q(i));
00843 #endif //USE_LOGSUMARRAY
00844
00845 if (!ALPHA_CACHE(dimension).table)
00846 return sum;
00847 else
00848 {
00849 ALPHA_CACHE(dimension).dimension=dimension;
00850 ALPHA_CACHE(dimension).updated=true;
00851 ALPHA_CACHE(dimension).sum=sum;
00852
00853 if (wanted_time<p_observations->get_vector_length(dimension))
00854 return ALPHA_CACHE(dimension).table[wanted_time*N+state];
00855 else
00856 return ALPHA_CACHE(dimension).sum;
00857 }
00858 }
00859 }
00860 }
00861
00862
00863
00864
00865
00866 float64_t CHMM::backward_comp(int32_t time, int32_t state, int32_t dimension)
00867 {
00868 T_ALPHA_BETA_TABLE* beta_new;
00869 T_ALPHA_BETA_TABLE* beta;
00870 T_ALPHA_BETA_TABLE* dummy;
00871 int32_t wanted_time=time;
00872
00873 if (time<0)
00874 forward(time, state, dimension);
00875
00876 if (BETA_CACHE(dimension).table)
00877 {
00878 beta=&BETA_CACHE(dimension).table[N*(p_observations->get_vector_length(dimension)-1)];
00879 beta_new=&BETA_CACHE(dimension).table[N*(p_observations->get_vector_length(dimension)-2)];
00880 time=-1;
00881 }
00882 else
00883 {
00884 beta_new=(T_ALPHA_BETA_TABLE*)ARRAYN1(dimension);
00885 beta=(T_ALPHA_BETA_TABLE*)ARRAYN2(dimension);
00886 }
00887
00888 if (time>=p_observations->get_vector_length(dimension)-1)
00889
00890
00891 return get_q(state);
00892 else
00893 {
00894
00895 for (register int32_t i=0; i<N; i++)
00896 beta[i]=get_q(i);
00897
00898
00899 for (register int32_t t=p_observations->get_vector_length(dimension)-1; t>time+1 && t>0; t--)
00900 {
00901 for (register int32_t i=0; i<N; i++)
00902 {
00903 register int32_t j, num=trans_list_backward_cnt[i] ;
00904 float64_t sum=-CMath::INFTY;
00905 for (j=0; j<num; j++)
00906 {
00907 int32_t jj = trans_list_backward[i][j] ;
00908 sum= CMath::logarithmic_sum(sum, get_a(i, jj) + get_b(jj, p_observations->get_feature(dimension,t)) + beta[jj]);
00909 } ;
00910 beta_new[i]=sum;
00911 }
00912
00913 if (!BETA_CACHE(dimension).table)
00914 {
00915 dummy=beta;
00916 beta=beta_new;
00917 beta_new=dummy;
00918 }
00919 else
00920 {
00921 beta=beta_new;
00922 beta_new-=N;
00923 }
00924 }
00925
00926 if (time>=0)
00927 {
00928 register int32_t j, num=trans_list_backward_cnt[state] ;
00929 float64_t sum=-CMath::INFTY;
00930 for (j=0; j<num; j++)
00931 {
00932 int32_t jj = trans_list_backward[state][j] ;
00933 sum= CMath::logarithmic_sum(sum, get_a(state, jj) + get_b(jj, p_observations->get_feature(dimension,time+1))+beta[jj]);
00934 } ;
00935 return sum;
00936 }
00937 else
00938 {
00939 if (BETA_CACHE(dimension).table)
00940 {
00941 float64_t sum=-CMath::INFTY;
00942 for (register int32_t j=0; j<N; j++)
00943 sum= CMath::logarithmic_sum(sum, get_p(j) + get_b(j, p_observations->get_feature(dimension,0))+beta[j]);
00944 BETA_CACHE(dimension).sum=sum;
00945 BETA_CACHE(dimension).dimension=dimension;
00946 BETA_CACHE(dimension).updated=true;
00947
00948 if (wanted_time<p_observations->get_vector_length(dimension))
00949 return BETA_CACHE(dimension).table[wanted_time*N+state];
00950 else
00951 return BETA_CACHE(dimension).sum;
00952 }
00953 else
00954 {
00955 float64_t sum=-CMath::INFTY;
00956 for (register int32_t j=0; j<N; j++)
00957 sum= CMath::logarithmic_sum(sum, get_p(j) + get_b(j, p_observations->get_feature(dimension,0))+beta[j]);
00958 return sum;
00959 }
00960 }
00961 }
00962 }
00963
00964
00965 float64_t CHMM::backward_comp_old(
00966 int32_t time, int32_t state, int32_t dimension)
00967 {
00968 T_ALPHA_BETA_TABLE* beta_new;
00969 T_ALPHA_BETA_TABLE* beta;
00970 T_ALPHA_BETA_TABLE* dummy;
00971 int32_t wanted_time=time;
00972
00973 if (time<0)
00974 forward(time, state, dimension);
00975
00976 if (BETA_CACHE(dimension).table)
00977 {
00978 beta=&BETA_CACHE(dimension).table[N*(p_observations->get_vector_length(dimension)-1)];
00979 beta_new=&BETA_CACHE(dimension).table[N*(p_observations->get_vector_length(dimension)-2)];
00980 time=-1;
00981 }
00982 else
00983 {
00984 beta_new=(T_ALPHA_BETA_TABLE*)ARRAYN1(dimension);
00985 beta=(T_ALPHA_BETA_TABLE*)ARRAYN2(dimension);
00986 }
00987
00988 if (time>=p_observations->get_vector_length(dimension)-1)
00989
00990
00991 return get_q(state);
00992 else
00993 {
00994
00995 for (register int32_t i=0; i<N; i++)
00996 beta[i]=get_q(i);
00997
00998
00999 for (register int32_t t=p_observations->get_vector_length(dimension)-1; t>time+1 && t>0; t--)
01000 {
01001 for (register int32_t i=0; i<N; i++)
01002 {
01003 register int32_t j ;
01004 #ifdef USE_LOGSUMARRAY
01005 for (j=0; j<(N>>1); j++)
01006 ARRAYS(dimension)[j]=CMath::logarithmic_sum(
01007 get_a(i, j<<1) + get_b(j<<1, p_observations->get_feature(dimension,t)) + beta[j<<1],
01008 get_a(i, (j<<1)+1) + get_b((j<<1)+1, p_observations->get_feature(dimension,t)) + beta[(j<<1)+1]);
01009 if (N%2==1)
01010 beta_new[i]=CMath::logarithmic_sum(get_a(i, N-1) + get_b(N-1, p_observations->get_feature(dimension,t)) + beta[N-1],
01011 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
01012 else
01013 beta_new[i]=CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
01014 #else //USE_LOGSUMARRAY
01015 float64_t sum=-CMath::INFTY;
01016 for (j=0; j<N; j++)
01017 sum= CMath::logarithmic_sum(sum, get_a(i, j) + get_b(j, p_observations->get_feature(dimension,t)) + beta[j]);
01018
01019 beta_new[i]=sum;
01020 #endif //USE_LOGSUMARRAY
01021 }
01022
01023 if (!BETA_CACHE(dimension).table)
01024 {
01025 dummy=beta;
01026 beta=beta_new;
01027 beta_new=dummy;
01028 }
01029 else
01030 {
01031 beta=beta_new;
01032 beta_new-=N;
01033 }
01034 }
01035
01036 if (time>=0)
01037 {
01038 register int32_t j ;
01039 #ifdef USE_LOGSUMARRAY
01040 for (j=0; j<(N>>1); j++)
01041 ARRAYS(dimension)[j]=CMath::logarithmic_sum(
01042 get_a(state, j<<1) + get_b(j<<1, p_observations->get_feature(dimension,time+1)) + beta[j<<1],
01043 get_a(state, (j<<1)+1) + get_b((j<<1)+1, p_observations->get_feature(dimension,time+1)) + beta[(j<<1)+1]);
01044 if (N%2==1)
01045 return CMath::logarithmic_sum(get_a(state, N-1) + get_b(N-1, p_observations->get_feature(dimension,time+1)) + beta[N-1],
01046 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
01047 else
01048 return CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
01049 #else //USE_LOGSUMARRAY
01050 float64_t sum=-CMath::INFTY;
01051 for (j=0; j<N; j++)
01052 sum= CMath::logarithmic_sum(sum, get_a(state, j) + get_b(j, p_observations->get_feature(dimension,time+1))+beta[j]);
01053
01054 return sum;
01055 #endif //USE_LOGSUMARRAY
01056 }
01057 else
01058 {
01059 if (BETA_CACHE(dimension).table)
01060 {
01061 #ifdef USE_LOGSUMARRAY//AAA
01062 for (int32_t j=0; j<(N>>1); j++)
01063 ARRAYS(dimension)[j]=CMath::logarithmic_sum(get_p(j<<1) + get_b(j<<1, p_observations->get_feature(dimension,0))+beta[j<<1],
01064 get_p((j<<1)+1) + get_b((j<<1)+1, p_observations->get_feature(dimension,0))+beta[(j<<1)+1]) ;
01065 if (N%2==1)
01066 BETA_CACHE(dimension).sum=CMath::logarithmic_sum(get_p(N-1) + get_b(N-1, p_observations->get_feature(dimension,0))+beta[N-1],
01067 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
01068 else
01069 BETA_CACHE(dimension).sum=CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
01070 #else //USE_LOGSUMARRAY
01071 float64_t sum=-CMath::INFTY;
01072 for (register int32_t j=0; j<N; j++)
01073 sum= CMath::logarithmic_sum(sum, get_p(j) + get_b(j, p_observations->get_feature(dimension,0))+beta[j]);
01074 BETA_CACHE(dimension).sum=sum;
01075 #endif //USE_LOGSUMARRAY
01076 BETA_CACHE(dimension).dimension=dimension;
01077 BETA_CACHE(dimension).updated=true;
01078
01079 if (wanted_time<p_observations->get_vector_length(dimension))
01080 return BETA_CACHE(dimension).table[wanted_time*N+state];
01081 else
01082 return BETA_CACHE(dimension).sum;
01083 }
01084 else
01085 {
01086 float64_t sum=-CMath::INFTY;
01087 for (register int32_t j=0; j<N; j++)
01088 sum= CMath::logarithmic_sum(sum, get_p(j) + get_b(j, p_observations->get_feature(dimension,0))+beta[j]);
01089 return sum;
01090 }
01091 }
01092 }
01093 }
01094
01095 #ifndef NOVIT
01096
01097
01098 float64_t CHMM::best_path(int32_t dimension)
01099 {
01100 if (!p_observations)
01101 return -1;
01102
01103 if (dimension==-1)
01104 {
01105 if (!all_path_prob_updated)
01106 {
01107 SG_INFO( "computing full viterbi likelihood\n") ;
01108 float64_t sum = 0 ;
01109 for (int32_t i=0; i<p_observations->get_num_vectors(); i++)
01110 sum+=best_path(i) ;
01111 sum /= p_observations->get_num_vectors() ;
01112 all_pat_prob=sum ;
01113 all_path_prob_updated=true ;
01114 return sum ;
01115 } else
01116 return all_pat_prob ;
01117 } ;
01118
01119 if (!STATES_PER_OBSERVATION_PSI(dimension))
01120 return -1 ;
01121
01122 int32_t len=0;
01123 if (!p_observations->get_feature_vector(dimension,len))
01124 return -1;
01125
01126 if (PATH_PROB_UPDATED(dimension) && dimension==PATH_PROB_DIMENSION(dimension))
01127 return pat_prob;
01128 else
01129 {
01130 register float64_t* delta= ARRAYN2(dimension);
01131 register float64_t* delta_new= ARRAYN1(dimension);
01132
01133 {
01134 for (register int32_t i=0; i<N; i++)
01135 {
01136 delta[i]=get_p(i)+get_b(i, p_observations->get_feature(dimension,0));
01137 set_psi(0, i, 0, dimension);
01138 }
01139 }
01140
01141 #ifdef USE_PATHDEBUG
01142 float64_t worst=-CMath::INFTY/4 ;
01143 #endif
01144
01145 for (register int32_t t=1; t<p_observations->get_vector_length(dimension); t++)
01146 {
01147 register float64_t* dummy;
01148 register int32_t NN=N ;
01149 for (register int32_t j=0; j<NN; j++)
01150 {
01151 register float64_t * matrix_a=&transition_matrix_a[j*N] ;
01152 register float64_t maxj=delta[0] + matrix_a[0];
01153 register int32_t argmax=0;
01154
01155 for (register int32_t i=1; i<NN; i++)
01156 {
01157 register float64_t temp = delta[i] + matrix_a[i];
01158
01159 if (temp>maxj)
01160 {
01161 maxj=temp;
01162 argmax=i;
01163 }
01164 }
01165 #ifdef FIX_POS
01166 if ((!model) || (model->get_fix_pos_state(t,j,NN)!=CModel::FIX_DISALLOWED))
01167 #endif
01168 delta_new[j]=maxj + get_b(j,p_observations->get_feature(dimension,t));
01169 #ifdef FIX_POS
01170 else
01171 delta_new[j]=maxj + get_b(j,p_observations->get_feature(dimension,t)) + CModel::DISALLOWED_PENALTY;
01172 #endif
01173 set_psi(t, j, argmax, dimension);
01174 }
01175
01176 #ifdef USE_PATHDEBUG
01177 float64_t best=log(0) ;
01178 for (int32_t jj=0; jj<N; jj++)
01179 if (delta_new[jj]>best)
01180 best=delta_new[jj] ;
01181
01182 if (best<-CMath::INFTY/2)
01183 {
01184 SG_DEBUG( "worst case at %i: %e:%e\n", t, best, worst) ;
01185 worst=best ;
01186 } ;
01187 #endif
01188
01189 dummy=delta;
01190 delta=delta_new;
01191 delta_new=dummy;
01192 }
01193
01194
01195 {
01196 register float64_t maxj=delta[0]+get_q(0);
01197 register int32_t argmax=0;
01198
01199 for (register int32_t i=1; i<N; i++)
01200 {
01201 register float64_t temp=delta[i]+get_q(i);
01202
01203 if (temp>maxj)
01204 {
01205 maxj=temp;
01206 argmax=i;
01207 }
01208 }
01209 pat_prob=maxj;
01210 PATH(dimension)[p_observations->get_vector_length(dimension)-1]=argmax;
01211 } ;
01212
01213
01214 {
01215 for (register int32_t t=p_observations->get_vector_length(dimension)-1; t>0; t--)
01216 {
01217 PATH(dimension)[t-1]=get_psi(t, PATH(dimension)[t], dimension);
01218 }
01219 }
01220 PATH_PROB_UPDATED(dimension)=true;
01221 PATH_PROB_DIMENSION(dimension)=dimension;
01222 return pat_prob ;
01223 }
01224 }
01225
01226
01227 #endif // NOVIT
01228
01229 #ifndef USE_HMMPARALLEL
01230 float64_t CHMM::model_probability_comp()
01231 {
01232
01233 mod_prob=0 ;
01234 for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
01235 mod_prob+=forward(p_observations->get_vector_length(dim), 0, dim);
01236
01237 mod_prob_updated=true;
01238 return mod_prob;
01239 }
01240
01241 #else
01242
01243 float64_t CHMM::model_probability_comp()
01244 {
01245 pthread_t *threads=new pthread_t[parallel.get_num_threads()];
01246 S_MODEL_PROB_PARAM *params=new S_MODEL_PROB_PARAM[parallel.get_num_threads()];
01247
01248 SG_INFO( "computing full model probablity\n");
01249 mod_prob=0;
01250
01251 for (int32_t cpu=0; cpu<parallel.get_num_threads(); cpu++)
01252 {
01253 params[cpu].hmm=this ;
01254 params[cpu].dim_start= p_observations->get_num_vectors()*cpu/parallel.get_num_threads();
01255 params[cpu].dim_stop= p_observations->get_num_vectors()*(cpu+1)/parallel.get_num_threads();
01256 #ifdef SUNOS
01257 thr_create(NULL,0,bw_dim_prefetch, (void*)¶ms[cpu], PTHREAD_SCOPE_SYSTEM, &threads[cpu]);
01258 #else // SUNOS
01259 pthread_create(&threads[cpu], NULL, bw_dim_prefetch, (void*)¶ms[cpu]);
01260 #endif // SUNOS
01261 }
01262
01263 for (cpu=0; cpu<parallel.get_num_threads(); cpu++)
01264 {
01265 void* ret;
01266 pthread_join(threads[cpu], &ret);
01267 mod_prob+=(float64_t) ret;
01268 }
01269
01270 delete[] threads;
01271 delete[] params;
01272
01273 mod_prob_updated=true;
01274 return mod_prob;
01275 }
01276
01277 void* CHMM::bw_dim_prefetch(void * params)
01278 {
01279 CHMM* hmm=((S_THREAD_PARAM*)params)->hmm ;
01280 int32_t dim=((S_THREAD_PARAM*)params)->dim ;
01281 float64_t*p_buf=((S_THREAD_PARAM*)params)->p_buf ;
01282 float64_t*q_buf=((S_THREAD_PARAM*)params)->q_buf ;
01283 float64_t*a_buf=((S_THREAD_PARAM*)params)->a_buf ;
01284 float64_t*b_buf=((S_THREAD_PARAM*)params)->b_buf ;
01285 ((S_THREAD_PARAM*)params)->ret = hmm->prefetch(dim, true, p_buf, q_buf, a_buf, b_buf) ;
01286 return NULL ;
01287 }
01288
01289 float64_t CHMM::model_prob_thread(void* params)
01290 {
01291 CHMM* hmm=((S_THREAD_PARAM*)params)->hmm ;
01292 int32_t dim_start=((S_THREAD_PARAM*)params)->dim_start;
01293 int32_t dim_stop=((S_THREAD_PARAM*)params)->dim_stop;
01294
01295 float64_t prob=0;
01296 for (int32_t dim=dim_start; dim<dim_stop; dim++)
01297 hmm->model_probability(dim);
01298
01299 ab_buf_comp(p_buf, q_buf, a_buf, b_buf, dim) ;
01300 return modprob ;
01301 } ;
01302
01303 void* CHMM::bw_dim_prefetch(void * params)
01304 {
01305 CHMM* hmm=((S_THREAD_PARAM*)params)->hmm ;
01306 int32_t dim=((S_THREAD_PARAM*)params)->dim ;
01307 float64_t*p_buf=((S_THREAD_PARAM*)params)->p_buf ;
01308 float64_t*q_buf=((S_THREAD_PARAM*)params)->q_buf ;
01309 float64_t*a_buf=((S_THREAD_PARAM*)params)->a_buf ;
01310 float64_t*b_buf=((S_THREAD_PARAM*)params)->b_buf ;
01311 ((S_THREAD_PARAM*)params)->ret = hmm->prefetch(dim, true, p_buf, q_buf, a_buf, b_buf) ;
01312 return NULL ;
01313 }
01314
01315 #ifndef NOVIT
01316 void* CHMM::vit_dim_prefetch(void * params)
01317 {
01318 CHMM* hmm=((S_THREAD_PARAM*)params)->hmm ;
01319 int32_t dim=((S_THREAD_PARAM*)params)->dim ;
01320 ((S_THREAD_PARAM*)params)->ret = hmm->prefetch(dim, false) ;
01321 return NULL ;
01322 } ;
01323 #endif // NOVIT
01324
01325 float64_t CHMM::prefetch(
01326 int32_t dim, bool bw, float64_t* p_buf, float64_t* q_buf, float64_t* a_buf,
01327 float64_t* b_buf)
01328 {
01329 if (bw)
01330 {
01331 forward_comp(p_observations->get_vector_length(dim), N-1, dim) ;
01332 backward_comp(p_observations->get_vector_length(dim), N-1, dim) ;
01333 float64_t modprob=model_probability(dim) ;
01334 ab_buf_comp(p_buf, q_buf, a_buf, b_buf, dim) ;
01335 return modprob ;
01336 }
01337 #ifndef NOVIT
01338 else
01339 return best_path(dim) ;
01340 #endif // NOVIT
01341 } ;
01342 #endif //USE_HMMPARALLEL
01343
01344
01345 #ifdef USE_HMMPARALLEL
01346
01347 void CHMM::ab_buf_comp(
01348 float64_t* p_buf, float64_t* q_buf, float64_t *a_buf, float64_t* b_buf,
01349 int32_t dim)
01350 {
01351 int32_t i,j,t ;
01352 float64_t a_sum;
01353 float64_t b_sum;
01354 float64_t prob=0;
01355
01356 float64_t dimmodprob=model_probability(dim);
01357
01358 for (i=0; i<N; i++)
01359 {
01360
01361 p_buf[i]=CMath::logarithmic_sum(get_p(i), get_p(i)+get_b(i,p_observations->get_feature(dim,0))+backward(0,i,dim) - dimmodprob);
01362 q_buf[i]=CMath::logarithmic_sum(get_q(i), forward(p_observations->get_vector_length(dim)-1, i, dim)+get_q(i) - dimmodprob);
01363
01364
01365 for (j=0; j<N; j++)
01366 {
01367 a_sum=-CMath::INFTY;
01368
01369 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
01370 {
01371 a_sum= CMath::logarithmic_sum(a_sum, forward(t,i,dim)+
01372 get_a(i,j)+get_b(j,p_observations->get_feature(dim,t+1))+backward(t+1,j,dim));
01373 }
01374 a_buf[N*i+j]=a_sum-dimmodprob ;
01375 }
01376
01377
01378 for (j=0; j<M; j++)
01379 {
01380 b_sum=CMath::ALMOST_NEG_INFTY;
01381
01382 for (t=0; t<p_observations->get_vector_length(dim); t++)
01383 {
01384 if (p_observations->get_feature(dim,t)==j)
01385 b_sum=CMath::logarithmic_sum(b_sum, forward(t,i,dim)+backward(t, i, dim));
01386 }
01387
01388 b_buf[M*i+j]=b_sum-dimmodprob ;
01389 }
01390 }
01391 }
01392
01393
01394 void CHMM::estimate_model_baum_welch(CHMM* train)
01395 {
01396 int32_t i,j,t,cpu;
01397 float64_t a_sum, b_sum;
01398 float64_t fullmodprob=0;
01399
01400
01401 for (i=0; i<N; i++)
01402 {
01403 if (train->get_p(i)>CMath::ALMOST_NEG_INFTY)
01404 set_p(i,log(PSEUDO));
01405 else
01406 set_p(i,train->get_p(i));
01407 if (train->get_q(i)>CMath::ALMOST_NEG_INFTY)
01408 set_q(i,log(PSEUDO));
01409 else
01410 set_q(i,train->get_q(i));
01411
01412 for (j=0; j<N; j++)
01413 if (train->get_a(i,j)>CMath::ALMOST_NEG_INFTY)
01414 set_a(i,j, log(PSEUDO));
01415 else
01416 set_a(i,j,train->get_a(i,j));
01417 for (j=0; j<M; j++)
01418 if (train->get_b(i,j)>CMath::ALMOST_NEG_INFTY)
01419 set_b(i,j, log(PSEUDO));
01420 else
01421 set_b(i,j,train->get_b(i,j));
01422 }
01423
01424 pthread_t *threads=new pthread_t[parallel.get_num_threads()] ;
01425 S_THREAD_PARAM *params=new S_THREAD_PARAM[parallel.get_num_threads()] ;
01426
01427 for (i=0; i<parallel.get_num_threads(); i++)
01428 {
01429 params[i].p_buf=new float64_t[N];
01430 params[i].q_buf=new float64_t[N];
01431 params[i].a_buf=new float64_t[N*N];
01432 params[i].b_buf=new float64_t[N*M];
01433 } ;
01434
01435 for (cpu=0; cpu<parallel.get_num_threads(); cpu++)
01436 {
01437 params[cpu].hmm=train;
01438 params[cpu].dim_start=p_observations->get_num_vectors()*cpu / parallel.get_num_threads();
01439 params[cpu].dim_stop= p_observations->get_num_vectors()*(cpu+1) / parallel.get_num_threads();
01440
01441 #ifdef SUNOS
01442 thr_create(NULL,0, bw_dim_prefetch, (void*)¶ms[cpu], PTHREAD_SCOPE_SYSTEM, &threads[cpu]) ;
01443 #else // SUNOS
01444 pthread_create(&threads[cpu], NULL, bw_dim_prefetch, (void*)¶ms[cpu]) ;
01445 #endif
01446 }
01447
01448 for (cpu=0; cpu<parallel.get_num_threads(); cpu++)
01449 {
01450 void* ret;
01451 pthread_join(threads[cpu], &ret) ;
01452
01453
01454 for (i=0; i<N; i++)
01455 {
01456
01457 set_p(i, CMath::logarithmic_sum(get_p(i), params[cpu].p_buf[i]));
01458 set_q(i, CMath::logarithmic_sum(get_q(i), params[cpu].q_buf[i]));
01459
01460
01461 for (j=0; j<N; j++)
01462 set_a(i,j, CMath::logarithmic_sum(get_a(i,j), params[cpu].a_buf[N*i+j]));
01463
01464
01465 for (j=0; j<M; j++)
01466 set_b(i,j, CMath::logarithmic_sum(get_b(i,j), params[cpu].b_buf[M*i+j]));
01467 }
01468
01469 fullmodprob+=params[cpu].prob;
01470 }
01471
01472 for (i=0; i<parallel.get_num_threads(); i++)
01473 {
01474 delete[] params[i].p_buf;
01475 delete[] params[i].q_buf;
01476 delete[] params[i].a_buf;
01477 delete[] params[i].b_buf;
01478 }
01479
01480 delete[] threads ;
01481 delete[] params ;
01482
01483
01484 train->mod_prob=fullmodprob;
01485 train->mod_prob_updated=true ;
01486
01487
01488 normalize();
01489 invalidate_model();
01490 }
01491
01492 #else // USE_HMMPARALLEL
01493
01494 #if !defined(NEGATIVE_MODEL_HACK) && !defined(NEGATIVE_MODEL_HACK_DON)
01495
01496
01497 void CHMM::estimate_model_baum_welch(CHMM* estimate)
01498 {
01499 int32_t i,j,t,dim;
01500 float64_t a_sum, b_sum;
01501 float64_t dimmodprob=0;
01502 float64_t fullmodprob=0;
01503
01504
01505 for (i=0; i<N; i++)
01506 {
01507 if (estimate->get_p(i)>CMath::ALMOST_NEG_INFTY)
01508 set_p(i,log(PSEUDO));
01509 else
01510 set_p(i,estimate->get_p(i));
01511 if (estimate->get_q(i)>CMath::ALMOST_NEG_INFTY)
01512 set_q(i,log(PSEUDO));
01513 else
01514 set_q(i,estimate->get_q(i));
01515
01516 for (j=0; j<N; j++)
01517 if (estimate->get_a(i,j)>CMath::ALMOST_NEG_INFTY)
01518 set_a(i,j, log(PSEUDO));
01519 else
01520 set_a(i,j,estimate->get_a(i,j));
01521 for (j=0; j<M; j++)
01522 if (estimate->get_b(i,j)>CMath::ALMOST_NEG_INFTY)
01523 set_b(i,j, log(PSEUDO));
01524 else
01525 set_b(i,j,estimate->get_b(i,j));
01526 }
01527 invalidate_model();
01528
01529
01530 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
01531 {
01532 dimmodprob=estimate->model_probability(dim);
01533 fullmodprob+=dimmodprob ;
01534
01535 for (i=0; i<N; i++)
01536 {
01537
01538 set_p(i, CMath::logarithmic_sum(get_p(i), estimate->get_p(i)+estimate->get_b(i,p_observations->get_feature(dim,0))+estimate->backward(0,i,dim) - dimmodprob));
01539 set_q(i, CMath::logarithmic_sum(get_q(i), estimate->forward(p_observations->get_vector_length(dim)-1, i, dim)+estimate->get_q(i) - dimmodprob ));
01540
01541 int32_t num = trans_list_backward_cnt[i] ;
01542
01543
01544 for (j=0; j<num; j++)
01545 {
01546 int32_t jj = trans_list_backward[i][j] ;
01547 a_sum=-CMath::INFTY;
01548
01549 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
01550 {
01551 a_sum= CMath::logarithmic_sum(a_sum, estimate->forward(t,i,dim)+
01552 estimate->get_a(i,jj)+estimate->get_b(jj,p_observations->get_feature(dim,t+1))+estimate->backward(t+1,jj,dim));
01553 }
01554 set_a(i,jj, CMath::logarithmic_sum(get_a(i,jj), a_sum-dimmodprob));
01555 }
01556
01557
01558 for (j=0; j<M; j++)
01559 {
01560 b_sum=-CMath::INFTY;
01561
01562 for (t=0; t<p_observations->get_vector_length(dim); t++)
01563 {
01564 if (p_observations->get_feature(dim,t)==j)
01565 b_sum=CMath::logarithmic_sum(b_sum, estimate->forward(t,i,dim)+estimate->backward(t, i, dim));
01566 }
01567
01568 set_b(i,j, CMath::logarithmic_sum(get_b(i,j), b_sum-dimmodprob));
01569 }
01570 }
01571 }
01572
01573
01574 estimate->mod_prob=fullmodprob;
01575 estimate->mod_prob_updated=true ;
01576
01577
01578 normalize();
01579 invalidate_model();
01580 }
01581
01582
01583
01584 void CHMM::estimate_model_baum_welch_trans(CHMM* estimate)
01585 {
01586 int32_t i,j,t,dim;
01587 float64_t a_sum;
01588 float64_t dimmodprob=0;
01589 float64_t fullmodprob=0;
01590
01591
01592 for (i=0; i<N; i++)
01593 {
01594 if (estimate->get_p(i)>CMath::ALMOST_NEG_INFTY)
01595 set_p(i,log(PSEUDO));
01596 else
01597 set_p(i,estimate->get_p(i));
01598 if (estimate->get_q(i)>CMath::ALMOST_NEG_INFTY)
01599 set_q(i,log(PSEUDO));
01600 else
01601 set_q(i,estimate->get_q(i));
01602
01603 for (j=0; j<N; j++)
01604 if (estimate->get_a(i,j)>CMath::ALMOST_NEG_INFTY)
01605 set_a(i,j, log(PSEUDO));
01606 else
01607 set_a(i,j,estimate->get_a(i,j));
01608 for (j=0; j<M; j++)
01609 set_b(i,j,estimate->get_b(i,j));
01610 }
01611 invalidate_model();
01612
01613
01614 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
01615 {
01616 dimmodprob=estimate->model_probability(dim);
01617 fullmodprob+=dimmodprob ;
01618
01619 for (i=0; i<N; i++)
01620 {
01621
01622 set_p(i, CMath::logarithmic_sum(get_p(i), estimate->get_p(i)+estimate->get_b(i,p_observations->get_feature(dim,0))+estimate->backward(0,i,dim) - dimmodprob));
01623 set_q(i, CMath::logarithmic_sum(get_q(i), estimate->forward(p_observations->get_vector_length(dim)-1, i, dim)+estimate->get_q(i) - dimmodprob ));
01624
01625 int32_t num = trans_list_backward_cnt[i] ;
01626
01627 for (j=0; j<num; j++)
01628 {
01629 int32_t jj = trans_list_backward[i][j] ;
01630 a_sum=-CMath::INFTY;
01631
01632 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
01633 {
01634 a_sum= CMath::logarithmic_sum(a_sum, estimate->forward(t,i,dim)+
01635 estimate->get_a(i,jj)+estimate->get_b(jj,p_observations->get_feature(dim,t+1))+estimate->backward(t+1,jj,dim));
01636 }
01637 set_a(i,jj, CMath::logarithmic_sum(get_a(i,jj), a_sum-dimmodprob));
01638 }
01639 }
01640 }
01641
01642
01643 estimate->mod_prob=fullmodprob;
01644 estimate->mod_prob_updated=true ;
01645
01646
01647 normalize();
01648 invalidate_model();
01649 }
01650
01651
01652
01653 void CHMM::estimate_model_baum_welch_old(CHMM* estimate)
01654 {
01655 int32_t i,j,t,dim;
01656 float64_t a_sum, b_sum;
01657 float64_t dimmodprob=0;
01658 float64_t fullmodprob=0;
01659
01660
01661 for (i=0; i<N; i++)
01662 {
01663 if (estimate->get_p(i)>CMath::ALMOST_NEG_INFTY)
01664 set_p(i,log(PSEUDO));
01665 else
01666 set_p(i,estimate->get_p(i));
01667 if (estimate->get_q(i)>CMath::ALMOST_NEG_INFTY)
01668 set_q(i,log(PSEUDO));
01669 else
01670 set_q(i,estimate->get_q(i));
01671
01672 for (j=0; j<N; j++)
01673 if (estimate->get_a(i,j)>CMath::ALMOST_NEG_INFTY)
01674 set_a(i,j, log(PSEUDO));
01675 else
01676 set_a(i,j,estimate->get_a(i,j));
01677 for (j=0; j<M; j++)
01678 if (estimate->get_b(i,j)>CMath::ALMOST_NEG_INFTY)
01679 set_b(i,j, log(PSEUDO));
01680 else
01681 set_b(i,j,estimate->get_b(i,j));
01682 }
01683 invalidate_model();
01684
01685
01686 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
01687 {
01688 dimmodprob=estimate->model_probability(dim);
01689 fullmodprob+=dimmodprob ;
01690
01691 for (i=0; i<N; i++)
01692 {
01693
01694 set_p(i, CMath::logarithmic_sum(get_p(i), estimate->get_p(i)+estimate->get_b(i,p_observations->get_feature(dim,0))+estimate->backward(0,i,dim) - dimmodprob));
01695 set_q(i, CMath::logarithmic_sum(get_q(i), estimate->forward(p_observations->get_vector_length(dim)-1, i, dim)+estimate->get_q(i) - dimmodprob ));
01696
01697
01698 for (j=0; j<N; j++)
01699 {
01700 a_sum=-CMath::INFTY;
01701
01702 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
01703 {
01704 a_sum= CMath::logarithmic_sum(a_sum, estimate->forward(t,i,dim)+
01705 estimate->get_a(i,j)+estimate->get_b(j,p_observations->get_feature(dim,t+1))+estimate->backward(t+1,j,dim));
01706 }
01707 set_a(i,j, CMath::logarithmic_sum(get_a(i,j), a_sum-dimmodprob));
01708 }
01709
01710
01711 for (j=0; j<M; j++)
01712 {
01713 b_sum=-CMath::INFTY;
01714
01715 for (t=0; t<p_observations->get_vector_length(dim); t++)
01716 {
01717 if (p_observations->get_feature(dim,t)==j)
01718 b_sum=CMath::logarithmic_sum(b_sum, estimate->forward(t,i,dim)+estimate->backward(t, i, dim));
01719 }
01720
01721 set_b(i,j, CMath::logarithmic_sum(get_b(i,j), b_sum-dimmodprob));
01722 }
01723 }
01724 }
01725
01726
01727 estimate->mod_prob=fullmodprob;
01728 estimate->mod_prob_updated=true ;
01729
01730
01731 normalize();
01732 invalidate_model();
01733 }
01734
01735
01736 #elif defined(NEGATIVE_MODEL_HACK)
01737
01738 void CHMM::estimate_model_baum_welch(CHMM* estimate)
01739 {
01740 int32_t i,j,t,dim;
01741 float64_t a_sum, b_sum;
01742 float64_t dimmodprob=0;
01743 float64_t fullmodprob=0;
01744
01745 const float64_t MIN_RAND=23e-3;
01746
01747 SG_DEBUG( "M:%d\n",M);
01748
01749 if (estimate->get_p(0)>-0.00001)
01750 {
01751 for (i=0; i<N; i++)
01752 {
01753 if (i==25)
01754 estimate->set_p(i,-CMath::INFTY);
01755 else
01756 estimate->set_p(i, log(CMath::random(MIN_RAND, 1.0)));
01757
01758 if (i<49)
01759 estimate->set_q(i, -CMath::INFTY);
01760 else
01761 estimate->set_q(i, log(CMath::random(MIN_RAND, 1.0)));
01762
01763 if (i<25)
01764 {
01765 for (j=0; j<M; j++)
01766 estimate->set_b(i,j, log(CMath::random(MIN_RAND, 1.0)));
01767 }
01768 }
01769 }
01770
01771 for (i=0; i<N; i++)
01772 {
01773 if (i==25)
01774 estimate->set_p(i,-CMath::INFTY);
01775
01776 if (i<49)
01777 estimate->set_q(i, -CMath::INFTY);
01778
01779 }
01780 estimate->invalidate_model();
01781 estimate->normalize();
01782
01783
01784 for (i=0; i<N; i++)
01785 {
01786
01787 set_p(i,log(PSEUDO));
01788
01789
01790
01791 set_q(i,log(PSEUDO));
01792
01793 for (j=0; j<N; j++)
01794 set_a(i,j, estimate->get_a(i,j));
01795
01796 if (i<25)
01797 {
01798 for (j=0; j<M; j++)
01799 set_b(i,j, log(PSEUDO));
01800 }
01801 else
01802 {
01803 for (j=0; j<M; j++)
01804 set_b(i,j, estimate->get_b(i,j));
01805 }
01806 }
01807
01808
01809 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
01810 {
01811 dimmodprob=estimate->model_probability(dim);
01812 fullmodprob+=dimmodprob ;
01813
01814 for (i=0; i<N; i++)
01815 {
01816
01817 set_p(i, CMath::logarithmic_sum(get_p(i), estimate->get_p(i)+estimate->get_b(i,p_observations->get_feature(dim,0))+estimate->backward(0,i,dim) - dimmodprob));
01818 set_q(i, CMath::logarithmic_sum(get_q(i), estimate->forward(p_observations->get_vector_length(dim)-1, i, dim)+estimate->get_q(i) - dimmodprob ));
01819 }
01820
01821 for (i=0; i<25; i++)
01822 {
01823
01824 for (j=0; j<M; j++)
01825 {
01826 b_sum=CMath::NEG_INFTY;
01827
01828 for (t=0; t<p_observations->get_vector_length(dim); t++)
01829 {
01830 if (p_observations->get_feature(dim,t)==j)
01831 b_sum=CMath::logarithmic_sum(b_sum, estimate->forward(t,i,dim)+estimate->backward(t, i, dim));
01832 }
01833
01834 set_b(i,j, CMath::logarithmic_sum(get_b(i,j), b_sum-dimmodprob));
01835 }
01836 }
01837 }
01838
01839
01840 estimate->mod_prob=fullmodprob;
01841 estimate->mod_prob_updated=true ;
01842
01843
01844 normalize();
01845 invalidate_model();
01846 }
01847 #else
01848
01849 void CHMM::estimate_model_baum_welch(CHMM* estimate)
01850 {
01851 int32_t i,j,t,dim;
01852 float64_t a_sum, b_sum;
01853 float64_t dimmodprob=0;
01854 float64_t fullmodprob=0;
01855
01856 const float64_t MIN_RAND=23e-3;
01857 static bool bla=true;
01858
01859 if ((bla) && estimate->get_q(N-1)>-0.00001)
01860 {
01861 bla=false;
01862 for (i=0; i<N; i++)
01863 {
01864 if (i<=N-50)
01865 estimate->set_p(i, log(CMath::random(MIN_RAND, 1.0)));
01866 else
01867 estimate->set_p(i, -1000);
01868
01869 if ( i==N-25-1)
01870 estimate->set_q(i,-10000);
01871 else
01872 estimate->set_q(i, log(CMath::random(MIN_RAND, 1.0)));
01873 SG_DEBUG( "q[%d]=%lf\n", i, estimate->get_q(i));
01874
01875 if (i>=N-25)
01876 {
01877 for (j=0; j<M; j++)
01878 estimate->set_b(i,j, log(CMath::random(MIN_RAND, 1.0)));
01879 }
01880 }
01881 estimate->invalidate_model();
01882 estimate->normalize(true);
01883 }
01884
01885
01886 for (i=0; i<N; i++)
01887 {
01888 set_p(i,log(PSEUDO));
01889 set_q(i,log(PSEUDO));
01890
01891 for (j=0; j<N; j++)
01892 set_a(i,j, estimate->get_a(i,j));
01893
01894 if (i>=N-25)
01895 {
01896 for (j=0; j<M; j++)
01897 set_b(i,j, log(PSEUDO));
01898 }
01899 else
01900 {
01901 for (j=0; j<M; j++)
01902 set_b(i,j, estimate->get_b(i,j));
01903
01904 if (i==N-25-1-24 || i==N-25-1-23)
01905 {
01906 for (j=0; j<M; j++)
01907 {
01908 if (estimate->get_b(i,j)<-10)
01909 set_b(i,j, -CMath::INFTY);
01910 }
01911 }
01912 }
01913 }
01914
01915
01916 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
01917 {
01918 dimmodprob=estimate->model_probability(dim);
01919 fullmodprob+=dimmodprob ;
01920
01921 for (i=0; i<N; i++)
01922 {
01923
01924 if (i<=N-50)
01925 set_p(i, CMath::logarithmic_sum(get_p(i), estimate->get_p(i)+estimate->get_b(i,p_observations->get_feature(dim,0))+estimate->backward(0,i,dim) - dimmodprob));
01926 else
01927 set_p(i, -1000);
01928
01929 if (i==N-25-1)
01930 set_q(i,-10000);
01931 else
01932 set_q(i, CMath::logarithmic_sum(get_q(i), estimate->forward(p_observations->get_vector_length(dim)-1, i, dim)+estimate->get_q(i) - dimmodprob ));
01933 }
01934
01935 for (i=N-25; i<N; i++)
01936 {
01937
01938 for (j=0; j<M; j++)
01939 {
01940 b_sum=-CMath::INFTY;
01941
01942 for (t=0; t<p_observations->get_vector_length(dim); t++)
01943 {
01944 if (p_observations->get_feature(dim,t)==j)
01945 b_sum=CMath::logarithmic_sum(b_sum, estimate->forward(t,i,dim)+estimate->backward(t, i, dim));
01946 }
01947
01948 set_b(i,j, CMath::logarithmic_sum(get_b(i,j), b_sum-dimmodprob));
01949 }
01950 }
01951 }
01952
01953
01954 estimate->mod_prob=fullmodprob;
01955 estimate->mod_prob_updated=true ;
01956
01957
01958 normalize(true);
01959 invalidate_model();
01960 }
01961 #endif //NEGATIVE_MODEL_HACK || .._DON
01962 #endif // USE_HMMPARALLEL
01963
01964
01965
01966 void CHMM::estimate_model_baum_welch_defined(CHMM* estimate)
01967 {
01968 int32_t i,j,old_i,k,t,dim;
01969 float64_t a_sum_num, b_sum_num;
01970 float64_t a_sum_denom, b_sum_denom;
01971 float64_t dimmodprob=-CMath::INFTY;
01972 float64_t fullmodprob=0;
01973 float64_t* A=ARRAYN1(0);
01974 float64_t* B=ARRAYN2(0);
01975
01976
01977
01978 for (k=0; (i=model->get_learn_p(k))!=-1; k++)
01979 set_p(i,log(PSEUDO));
01980
01981 for (k=0; (i=model->get_learn_q(k))!=-1; k++)
01982 set_q(i,log(PSEUDO));
01983
01984 for (k=0; (i=model->get_learn_a(k,0))!=-1; k++)
01985 {
01986 j=model->get_learn_a(k,1);
01987 set_a(i,j, log(PSEUDO));
01988 }
01989
01990 for (k=0; (i=model->get_learn_b(k,0))!=-1; k++)
01991 {
01992 j=model->get_learn_b(k,1);
01993 set_b(i,j, log(PSEUDO));
01994 }
01995
01996 for (i=0; i<N; i++)
01997 {
01998 A[i]=log(PSEUDO);
01999 B[i]=log(PSEUDO);
02000 }
02001
02002 #ifdef USE_HMMPARALLEL
02003 pthread_t *threads=new pthread_t[parallel.get_num_threads()] ;
02004 S_THREAD_PARAM *params=new S_THREAD_PARAM[parallel.get_num_threads()] ;
02005 #endif
02006
02007
02008 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
02009 {
02010 #ifdef USE_HMMPARALLEL
02011 if (dim%parallel.get_num_threads()==0)
02012 {
02013 int32_t i ;
02014 for (i=0; i<parallel.get_num_threads(); i++)
02015 if (dim+i<p_observations->get_num_vectors())
02016 {
02017 params[i].hmm=estimate ;
02018 params[i].dim=dim+i ;
02019 #ifdef SUNOS
02020 thr_create(NULL,0, bw_dim_prefetch, (void*)¶ms[i], PTHREAD_SCOPE_SYSTEM, &threads[i]) ;
02021 #else // SUNOS
02022 pthread_create(&threads[i], NULL, bw_dim_prefetch, (void*)¶ms[i]) ;
02023 #endif
02024 } ;
02025 for (i=0; i<parallel.get_num_threads(); i++)
02026 if (dim+i<p_observations->get_num_vectors())
02027 {
02028 void * ret ;
02029 pthread_join(threads[i], &ret) ;
02030 dimmodprob = params[i].ret ;
02031 } ;
02032 }
02033 #else
02034 dimmodprob=estimate->model_probability(dim);
02035 #endif // USE_HMMPARALLEL
02036
02037
02038 fullmodprob+= dimmodprob;
02039
02040
02041 for (k=0; (i=model->get_learn_p(k))!=-1; k++)
02042 set_p(i, CMath::logarithmic_sum(get_p(i), estimate->forward(0,i,dim)+estimate->backward(0,i,dim) - dimmodprob ) );
02043
02044 for (k=0; (i=model->get_learn_q(k))!=-1; k++)
02045 set_q(i, CMath::logarithmic_sum(get_q(i), estimate->forward(p_observations->get_vector_length(dim)-1, i, dim)+
02046 estimate->backward(p_observations->get_vector_length(dim)-1, i, dim) - dimmodprob ) );
02047
02048
02049 old_i=-1;
02050 for (k=0; (i=model->get_learn_a(k,0))!=-1; k++)
02051 {
02052
02053
02054 if (old_i!=i)
02055 {
02056 old_i=i;
02057 a_sum_denom=-CMath::INFTY;
02058
02059 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
02060 a_sum_denom= CMath::logarithmic_sum(a_sum_denom, estimate->forward(t,i,dim)+estimate->backward(t,i,dim));
02061
02062 A[i]= CMath::logarithmic_sum(A[i], a_sum_denom-dimmodprob);
02063 }
02064
02065 j=model->get_learn_a(k,1);
02066 a_sum_num=-CMath::INFTY;
02067 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
02068 {
02069 a_sum_num= CMath::logarithmic_sum(a_sum_num, estimate->forward(t,i,dim)+
02070 estimate->get_a(i,j)+estimate->get_b(j,p_observations->get_feature(dim,t+1))+estimate->backward(t+1,j,dim));
02071 }
02072
02073 set_a(i,j, CMath::logarithmic_sum(get_a(i,j), a_sum_num-dimmodprob));
02074 }
02075
02076
02077 old_i=-1;
02078 for (k=0; (i=model->get_learn_b(k,0))!=-1; k++)
02079 {
02080
02081
02082
02083 if (old_i!=i)
02084 {
02085 old_i=i;
02086 b_sum_denom=-CMath::INFTY;
02087
02088 for (t=0; t<p_observations->get_vector_length(dim); t++)
02089 b_sum_denom= CMath::logarithmic_sum(b_sum_denom, estimate->forward(t,i,dim)+estimate->backward(t,i,dim));
02090
02091 B[i]= CMath::logarithmic_sum(B[i], b_sum_denom-dimmodprob);
02092 }
02093
02094 j=model->get_learn_b(k,1);
02095 b_sum_num=-CMath::INFTY;
02096 for (t=0; t<p_observations->get_vector_length(dim); t++)
02097 {
02098 if (p_observations->get_feature(dim,t)==j)
02099 b_sum_num=CMath::logarithmic_sum(b_sum_num, estimate->forward(t,i,dim)+estimate->backward(t, i, dim));
02100 }
02101
02102 set_b(i,j, CMath::logarithmic_sum(get_b(i,j), b_sum_num-dimmodprob));
02103 }
02104 }
02105 #ifdef USE_HMMPARALLEL
02106 delete[] threads ;
02107 delete[] params ;
02108 #endif
02109
02110
02111
02112 for (k=0; (i=model->get_learn_p(k))!=-1; k++)
02113 set_p(i, get_p(i)-log(p_observations->get_num_vectors()+N*PSEUDO) );
02114
02115 for (k=0; (i=model->get_learn_q(k))!=-1; k++)
02116 set_q(i, get_q(i)-log(p_observations->get_num_vectors()+N*PSEUDO) );
02117
02118 for (k=0; (i=model->get_learn_a(k,0))!=-1; k++)
02119 {
02120 j=model->get_learn_a(k,1);
02121 set_a(i,j, get_a(i,j) - A[i]);
02122 }
02123
02124 for (k=0; (i=model->get_learn_b(k,0))!=-1; k++)
02125 {
02126 j=model->get_learn_b(k,1);
02127 set_b(i,j, get_b(i,j) - B[i]);
02128 }
02129
02130
02131 estimate->mod_prob=fullmodprob;
02132 estimate->mod_prob_updated=true ;
02133
02134
02135 normalize();
02136 invalidate_model();
02137 }
02138
02139 #ifndef NOVIT
02140
02141 void CHMM::estimate_model_viterbi(CHMM* estimate)
02142 {
02143 int32_t i,j,t;
02144 float64_t sum;
02145 float64_t* P=ARRAYN1(0);
02146 float64_t* Q=ARRAYN2(0);
02147
02148 path_deriv_updated=false ;
02149
02150
02151 for (i=0; i<N; i++)
02152 {
02153 for (j=0; j<N; j++)
02154 set_A(i,j, PSEUDO);
02155
02156 for (j=0; j<M; j++)
02157 set_B(i,j, PSEUDO);
02158
02159 P[i]=PSEUDO;
02160 Q[i]=PSEUDO;
02161 }
02162
02163 float64_t allpatprob=0 ;
02164
02165 #ifdef USE_HMMPARALLEL
02166 pthread_t *threads=new pthread_t[parallel.get_num_threads()] ;
02167 S_THREAD_PARAM *params=new S_THREAD_PARAM[parallel.get_num_threads()] ;
02168 #endif
02169
02170 for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
02171 {
02172
02173 #ifdef USE_HMMPARALLEL
02174 if (dim%parallel.get_num_threads()==0)
02175 {
02176 int32_t i ;
02177 for (i=0; i<parallel.get_num_threads(); i++)
02178 if (dim+i<p_observations->get_num_vectors())
02179 {
02180 params[i].hmm=estimate ;
02181 params[i].dim=dim+i ;
02182 #ifdef SUNOS
02183 thr_create(NULL,0, vit_dim_prefetch, (void*)¶ms[i], PTHREAD_SCOPE_SYSTEM, &threads[i]) ;
02184 #else // SUNOS
02185 pthread_create(&threads[i], NULL, vit_dim_prefetch, (void*)¶ms[i]) ;
02186 #endif
02187 } ;
02188 for (i=0; i<parallel.get_num_threads(); i++)
02189 if (dim+i<p_observations->get_num_vectors())
02190 {
02191 void * ret ;
02192 pthread_join(threads[i], &ret) ;
02193 allpatprob += params[i].ret ;
02194 } ;
02195 } ;
02196 #else
02197
02198 allpatprob += estimate->best_path(dim);
02199 #endif // USE_HMMPARALLEL
02200
02201
02202 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
02203 {
02204 set_A(estimate->PATH(dim)[t], estimate->PATH(dim)[t+1], get_A(estimate->PATH(dim)[t], estimate->PATH(dim)[t+1])+1);
02205 set_B(estimate->PATH(dim)[t], p_observations->get_feature(dim,t), get_B(estimate->PATH(dim)[t], p_observations->get_feature(dim,t))+1);
02206 }
02207
02208 set_B(estimate->PATH(dim)[p_observations->get_vector_length(dim)-1], p_observations->get_feature(dim,p_observations->get_vector_length(dim)-1), get_B(estimate->PATH(dim)[p_observations->get_vector_length(dim)-1], p_observations->get_feature(dim,p_observations->get_vector_length(dim)-1)) + 1 );
02209
02210 P[estimate->PATH(dim)[0]]++;
02211 Q[estimate->PATH(dim)[p_observations->get_vector_length(dim)-1]]++;
02212 }
02213
02214 #ifdef USE_HMMPARALLEL
02215 delete[] threads ;
02216 delete[] params ;
02217 #endif
02218
02219 allpatprob/=p_observations->get_num_vectors() ;
02220 estimate->all_pat_prob=allpatprob ;
02221 estimate->all_path_prob_updated=true ;
02222
02223
02224 for (i=0; i<N; i++)
02225 {
02226 sum=0;
02227 for (j=0; j<N; j++)
02228 sum+=get_A(i,j);
02229
02230 for (j=0; j<N; j++)
02231 set_a(i,j, log(get_A(i,j)/sum));
02232 }
02233
02234
02235 for (i=0; i<N; i++)
02236 {
02237 sum=0;
02238 for (j=0; j<M; j++)
02239 sum+=get_B(i,j);
02240
02241 for (j=0; j<M; j++)
02242 set_b(i,j, log(get_B(i, j)/sum));
02243 }
02244
02245
02246 sum=0;
02247 for (i=0; i<N; i++)
02248 sum+=P[i];
02249
02250 for (i=0; i<N; i++)
02251 set_p(i, log(P[i]/sum));
02252
02253
02254 sum=0;
02255 for (i=0; i<N; i++)
02256 sum+=Q[i];
02257
02258 for (i=0; i<N; i++)
02259 set_q(i, log(Q[i]/sum));
02260
02261
02262 invalidate_model();
02263 }
02264
02265
02266 void CHMM::estimate_model_viterbi_defined(CHMM* estimate)
02267 {
02268 int32_t i,j,k,t;
02269 float64_t sum;
02270 float64_t* P=ARRAYN1(0);
02271 float64_t* Q=ARRAYN2(0);
02272
02273 path_deriv_updated=false ;
02274
02275
02276 for (i=0; i<N; i++)
02277 {
02278 for (j=0; j<N; j++)
02279 set_A(i,j, PSEUDO);
02280
02281 for (j=0; j<M; j++)
02282 set_B(i,j, PSEUDO);
02283
02284 P[i]=PSEUDO;
02285 Q[i]=PSEUDO;
02286 }
02287
02288 #ifdef USE_HMMPARALLEL
02289 pthread_t *threads=new pthread_t[parallel.get_num_threads()] ;
02290 S_THREAD_PARAM *params=new S_THREAD_PARAM[parallel.get_num_threads()] ;
02291 #endif
02292
02293 float64_t allpatprob=0.0 ;
02294 for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
02295 {
02296
02297 #ifdef USE_HMMPARALLEL
02298 if (dim%parallel.get_num_threads()==0)
02299 {
02300 int32_t i ;
02301 for (i=0; i<parallel.get_num_threads(); i++)
02302 if (dim+i<p_observations->get_num_vectors())
02303 {
02304 params[i].hmm=estimate ;
02305 params[i].dim=dim+i ;
02306 #ifdef SUNOS
02307 thr_create(NULL,0,vit_dim_prefetch, (void*)¶ms[i], PTHREAD_SCOPE_SYSTEM, &threads[i]) ;
02308 #else // SUNOS
02309 pthread_create(&threads[i], NULL, vit_dim_prefetch, (void*)¶ms[i]) ;
02310 #endif //SUNOS
02311 } ;
02312 for (i=0; i<parallel.get_num_threads(); i++)
02313 if (dim+i<p_observations->get_num_vectors())
02314 {
02315 void * ret ;
02316 pthread_join(threads[i], &ret) ;
02317 allpatprob += params[i].ret ;
02318 } ;
02319 } ;
02320 #else // USE_HMMPARALLEL
02321
02322 allpatprob += estimate->best_path(dim);
02323 #endif // USE_HMMPARALLEL
02324
02325
02326
02327 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
02328 {
02329 set_A(estimate->PATH(dim)[t], estimate->PATH(dim)[t+1], get_A(estimate->PATH(dim)[t], estimate->PATH(dim)[t+1])+1);
02330 set_B(estimate->PATH(dim)[t], p_observations->get_feature(dim,t), get_B(estimate->PATH(dim)[t], p_observations->get_feature(dim,t))+1);
02331 }
02332
02333 set_B(estimate->PATH(dim)[p_observations->get_vector_length(dim)-1], p_observations->get_feature(dim,p_observations->get_vector_length(dim)-1), get_B(estimate->PATH(dim)[p_observations->get_vector_length(dim)-1], p_observations->get_feature(dim,p_observations->get_vector_length(dim)-1)) + 1 );
02334
02335 P[estimate->PATH(dim)[0]]++;
02336 Q[estimate->PATH(dim)[p_observations->get_vector_length(dim)-1]]++;
02337 }
02338
02339 #ifdef USE_HMMPARALLEL
02340 delete[] threads ;
02341 delete[] params ;
02342 #endif
02343
02344
02345
02346
02347 allpatprob/=p_observations->get_num_vectors() ;
02348 estimate->all_pat_prob=allpatprob ;
02349 estimate->all_path_prob_updated=true ;
02350
02351
02352
02353 for (i=0; i<N; i++)
02354 {
02355 for (j=0; j<N; j++)
02356 set_a(i,j, estimate->get_a(i,j));
02357
02358 for (j=0; j<M; j++)
02359 set_b(i,j, estimate->get_b(i,j));
02360 }
02361
02362
02363 i=0;
02364 sum=0;
02365 j=model->get_learn_a(i,0);
02366 k=i;
02367 while (model->get_learn_a(i,0)!=-1 || k<i)
02368 {
02369 if (j==model->get_learn_a(i,0))
02370 {
02371 sum+=get_A(model->get_learn_a(i,0), model->get_learn_a(i,1));
02372 i++;
02373 }
02374 else
02375 {
02376 while (k<i)
02377 {
02378 set_a(model->get_learn_a(k,0), model->get_learn_a(k,1), log (get_A(model->get_learn_a(k,0), model->get_learn_a(k,1)) / sum));
02379 k++;
02380 }
02381
02382 sum=0;
02383 j=model->get_learn_a(i,0);
02384 k=i;
02385 }
02386 }
02387
02388
02389 i=0;
02390 sum=0;
02391 j=model->get_learn_b(i,0);
02392 k=i;
02393 while (model->get_learn_b(i,0)!=-1 || k<i)
02394 {
02395 if (j==model->get_learn_b(i,0))
02396 {
02397 sum+=get_B(model->get_learn_b(i,0),model->get_learn_b(i,1));
02398 i++;
02399 }
02400 else
02401 {
02402 while (k<i)
02403 {
02404 set_b(model->get_learn_b(k,0),model->get_learn_b(k,1), log (get_B(model->get_learn_b(k,0), model->get_learn_b(k,1)) / sum));
02405 k++;
02406 }
02407
02408 sum=0;
02409 j=model->get_learn_b(i,0);
02410 k=i;
02411 }
02412 }
02413
02414 i=0;
02415 sum=0;
02416 while (model->get_learn_p(i)!=-1)
02417 {
02418 sum+=P[model->get_learn_p(i)] ;
02419 i++ ;
02420 } ;
02421 i=0 ;
02422 while (model->get_learn_p(i)!=-1)
02423 {
02424 set_p(model->get_learn_p(i), log(P[model->get_learn_p(i)]/sum));
02425 i++ ;
02426 } ;
02427
02428 i=0;
02429 sum=0;
02430 while (model->get_learn_q(i)!=-1)
02431 {
02432 sum+=Q[model->get_learn_q(i)] ;
02433 i++ ;
02434 } ;
02435 i=0 ;
02436 while (model->get_learn_q(i)!=-1)
02437 {
02438 set_q(model->get_learn_q(i), log(Q[model->get_learn_q(i)]/sum));
02439 i++ ;
02440 } ;
02441
02442
02443
02444 invalidate_model();
02445 }
02446 #endif // NOVIT
02447
02448
02449
02450
02451 void CHMM::output_model(bool verbose)
02452 {
02453 int32_t i,j;
02454 float64_t checksum;
02455
02456
02457 SG_INFO( "log(Pr[O|model])=%e, #states: %i, #observationssymbols: %i, #observations: %ix%i\n",
02458 (float64_t)((p_observations) ? model_probability() : -CMath::INFTY),
02459 N, M, ((p_observations) ? p_observations->get_max_vector_length() : 0), ((p_observations) ? p_observations->get_num_vectors() : 0));
02460
02461 if (verbose)
02462 {
02463
02464 SG_INFO( "\ntransition matrix\n");
02465 for (i=0; i<N; i++)
02466 {
02467 checksum= get_q(i);
02468 for (j=0; j<N; j++)
02469 {
02470 checksum= CMath::logarithmic_sum(checksum, get_a(i,j));
02471
02472 SG_INFO( "a(%02i,%02i)=%1.4f ",i,j, (float32_t) exp(get_a(i,j)));
02473
02474 if (j % 4 == 3)
02475 SG_PRINT( "\n");
02476 }
02477 if (fabs(checksum)>1e-5)
02478 SG_DEBUG( " checksum % E ******* \n",checksum);
02479 else
02480 SG_DEBUG( " checksum % E\n",checksum);
02481 }
02482
02483
02484 SG_INFO( "\ndistribution of start states\n");
02485 checksum=-CMath::INFTY;
02486 for (i=0; i<N; i++)
02487 {
02488 checksum= CMath::logarithmic_sum(checksum, get_p(i));
02489 SG_INFO( "p(%02i)=%1.4f ",i, (float32_t) exp(get_p(i)));
02490 if (i % 4 == 3)
02491 SG_PRINT( "\n");
02492 }
02493 if (fabs(checksum)>1e-5)
02494 SG_DEBUG( " checksum % E ******* \n",checksum);
02495 else
02496 SG_DEBUG( " checksum=% E\n", checksum);
02497
02498
02499 SG_INFO( "\ndistribution of terminal states\n");
02500 checksum=-CMath::INFTY;
02501 for (i=0; i<N; i++)
02502 {
02503 checksum= CMath::logarithmic_sum(checksum, get_q(i));
02504 SG_INFO("q(%02i)=%1.4f ",i, (float32_t) exp(get_q(i)));
02505 if (i % 4 == 3)
02506 SG_INFO("\n");
02507 }
02508 if (fabs(checksum)>1e-5)
02509 SG_DEBUG( " checksum % E ******* \n",checksum);
02510 else
02511 SG_DEBUG( " checksum=% E\n", checksum);
02512
02513
02514 SG_INFO("\ndistribution of observations given the state\n");
02515 for (i=0; i<N; i++)
02516 {
02517 checksum=-CMath::INFTY;
02518 for (j=0; j<M; j++)
02519 {
02520 checksum=CMath::logarithmic_sum(checksum, get_b(i,j));
02521 SG_INFO("b(%02i,%02i)=%1.4f ",i,j, (float32_t) exp(get_b(i,j)));
02522 if (j % 4 == 3)
02523 SG_PRINT("\n");
02524 }
02525 if (fabs(checksum)>1e-5)
02526 SG_DEBUG( " checksum % E ******* \n",checksum);
02527 else
02528 SG_DEBUG( " checksum % E\n",checksum);
02529 }
02530 }
02531 SG_PRINT("\n");
02532 }
02533
02534
02535 void CHMM::output_model_defined(bool verbose)
02536 {
02537 int32_t i,j;
02538 if (!model)
02539 return ;
02540
02541
02542 SG_INFO("log(Pr[O|model])=%e, #states: %i, #observationssymbols: %i, #observations: %ix%i\n",
02543 (float64_t)((p_observations) ? model_probability() : -CMath::INFTY),
02544 N, M, ((p_observations) ? p_observations->get_max_vector_length() : 0), ((p_observations) ? p_observations->get_num_vectors() : 0));
02545
02546 if (verbose)
02547 {
02548
02549 SG_INFO("\ntransition matrix\n");
02550
02551
02552 i=0;
02553 j=model->get_learn_a(i,0);
02554 while (model->get_learn_a(i,0)!=-1)
02555 {
02556 if (j!=model->get_learn_a(i,0))
02557 {
02558 j=model->get_learn_a(i,0);
02559 SG_PRINT("\n");
02560 }
02561
02562 SG_INFO("a(%02i,%02i)=%1.4f ",model->get_learn_a(i,0), model->get_learn_a(i,1), (float32_t) exp(get_a(model->get_learn_a(i,0), model->get_learn_a(i,1))));
02563 i++;
02564 }
02565
02566
02567 SG_INFO("\n\ndistribution of observations given the state\n");
02568 i=0;
02569 j=model->get_learn_b(i,0);
02570 while (model->get_learn_b(i,0)!=-1)
02571 {
02572 if (j!=model->get_learn_b(i,0))
02573 {
02574 j=model->get_learn_b(i,0);
02575 SG_PRINT("\n");
02576 }
02577
02578 SG_INFO("b(%02i,%02i)=%1.4f ",model->get_learn_b(i,0),model->get_learn_b(i,1), (float32_t) exp(get_b(model->get_learn_b(i,0),model->get_learn_b(i,1))));
02579 i++;
02580 }
02581
02582 SG_PRINT("\n");
02583 }
02584 SG_PRINT("\n");
02585 }
02586
02587
02588
02589
02590 void CHMM::convert_to_log()
02591 {
02592 int32_t i,j;
02593
02594 for (i=0; i<N; i++)
02595 {
02596 if (get_p(i)!=0)
02597 set_p(i, log(get_p(i)));
02598 else
02599 set_p(i, -CMath::INFTY);;
02600 }
02601
02602 for (i=0; i<N; i++)
02603 {
02604 if (get_q(i)!=0)
02605 set_q(i, log(get_q(i)));
02606 else
02607 set_q(i, -CMath::INFTY);;
02608 }
02609
02610 for (i=0; i<N; i++)
02611 {
02612 for (j=0; j<N; j++)
02613 {
02614 if (get_a(i,j)!=0)
02615 set_a(i,j, log(get_a(i,j)));
02616 else
02617 set_a(i,j, -CMath::INFTY);
02618 }
02619 }
02620
02621 for (i=0; i<N; i++)
02622 {
02623 for (j=0; j<M; j++)
02624 {
02625 if (get_b(i,j)!=0)
02626 set_b(i,j, log(get_b(i,j)));
02627 else
02628 set_b(i,j, -CMath::INFTY);
02629 }
02630 }
02631 loglikelihood=true;
02632
02633 invalidate_model();
02634 }
02635
02636
02637 void CHMM::init_model_random()
02638 {
02639 const float64_t MIN_RAND=23e-3;
02640
02641 float64_t sum;
02642 int32_t i,j;
02643
02644
02645 for (i=0; i<N; i++)
02646 {
02647 sum=0;
02648 for (j=0; j<N; j++)
02649 {
02650 set_a(i,j, CMath::random(MIN_RAND, 1.0));
02651
02652 sum+=get_a(i,j);
02653 }
02654
02655 for (j=0; j<N; j++)
02656 set_a(i,j, get_a(i,j)/sum);
02657 }
02658
02659
02660 sum=0;
02661 for (i=0; i<N; i++)
02662 {
02663 set_p(i, CMath::random(MIN_RAND, 1.0));
02664
02665 sum+=get_p(i);
02666 }
02667
02668 for (i=0; i<N; i++)
02669 set_p(i, get_p(i)/sum);
02670
02671
02672 sum=0;
02673 for (i=0; i<N; i++)
02674 {
02675 set_q(i, CMath::random(MIN_RAND, 1.0));
02676
02677 sum+=get_q(i);
02678 }
02679
02680 for (i=0; i<N; i++)
02681 set_q(i, get_q(i)/sum);
02682
02683
02684 for (i=0; i<N; i++)
02685 {
02686 sum=0;
02687 for (j=0; j<M; j++)
02688 {
02689 set_b(i,j, CMath::random(MIN_RAND, 1.0));
02690
02691 sum+=get_b(i,j);
02692 }
02693
02694 for (j=0; j<M; j++)
02695 set_b(i,j, get_b(i,j)/sum);
02696 }
02697
02698
02699 invalidate_model();
02700 }
02701
02702
02703 void CHMM::init_model_defined()
02704 {
02705 int32_t i,j,k,r;
02706 float64_t sum;
02707 const float64_t MIN_RAND=23e-3;
02708
02709
02710 for (i=0; i<N; i++)
02711 for (j=0; j<N; j++)
02712 set_a(i,j, 0);
02713
02714
02715 for (i=0; i<N; i++)
02716 set_p(i, 0);
02717
02718
02719 for (i=0; i<N; i++)
02720 set_q(i, 0);
02721
02722
02723 for (i=0; i<N; i++)
02724 for (j=0; j<M; j++)
02725 set_b(i,j, 0);
02726
02727
02728
02729 float64_t *R=new float64_t[N] ;
02730 for (r=0; r<N; r++) R[r]=CMath::random(MIN_RAND,1.0);
02731 i=0; sum=0; k=i;
02732 j=model->get_learn_a(i,0);
02733 while (model->get_learn_a(i,0)!=-1 || k<i)
02734 {
02735 if (j==model->get_learn_a(i,0))
02736 {
02737 sum+=R[model->get_learn_a(i,1)] ;
02738 i++;
02739 }
02740 else
02741 {
02742 while (k<i)
02743 {
02744 set_a(model->get_learn_a(k,0), model->get_learn_a(k,1),
02745 R[model->get_learn_a(k,1)]/sum);
02746 k++;
02747 }
02748 j=model->get_learn_a(i,0);
02749 k=i;
02750 sum=0;
02751 for (r=0; r<N; r++) R[r]=CMath::random(MIN_RAND,1.0);
02752 }
02753 }
02754 delete[] R ; R=NULL ;
02755
02756
02757 R=new float64_t[M] ;
02758 for (r=0; r<M; r++) R[r]=CMath::random(MIN_RAND,1.0);
02759 i=0; sum=0; k=0 ;
02760 j=model->get_learn_b(i,0);
02761 while (model->get_learn_b(i,0)!=-1 || k<i)
02762 {
02763 if (j==model->get_learn_b(i,0))
02764 {
02765 sum+=R[model->get_learn_b(i,1)] ;
02766 i++;
02767 }
02768 else
02769 {
02770 while (k<i)
02771 {
02772 set_b(model->get_learn_b(k,0),model->get_learn_b(k,1),
02773 R[model->get_learn_b(k,1)]/sum);
02774 k++;
02775 }
02776
02777 j=model->get_learn_b(i,0);
02778 k=i;
02779 sum=0;
02780 for (r=0; r<M; r++) R[r]=CMath::random(MIN_RAND,1.0);
02781 }
02782 }
02783 delete[] R ; R=NULL ;
02784
02785
02786 i=0;
02787 while (model->get_const_a(i,0) != -1)
02788 {
02789 set_a(model->get_const_a(i,0), model->get_const_a(i,1), model->get_const_a_val(i));
02790 i++;
02791 }
02792
02793
02794 i=0;
02795 while (model->get_const_b(i,0) != -1)
02796 {
02797 set_b(model->get_const_b(i,0), model->get_const_b(i,1), model->get_const_b_val(i));
02798 i++;
02799 }
02800
02801
02802 i=0;
02803 while (model->get_const_p(i) != -1)
02804 {
02805 set_p(model->get_const_p(i), model->get_const_p_val(i));
02806 i++;
02807 }
02808
02809
02810 for (i=0; i<N; i++)
02811 set_q(i, 0.0);
02812
02813
02814 i=0;
02815 while (model->get_const_q(i) != -1)
02816 {
02817 set_q(model->get_const_q(i), model->get_const_q_val(i));
02818 i++;
02819 }
02820
02821
02822 i=0;
02823 sum=0;
02824 while (model->get_learn_p(i)!=-1)
02825 {
02826 set_p(model->get_learn_p(i),CMath::random(MIN_RAND,1.0)) ;
02827 sum+=get_p(model->get_learn_p(i)) ;
02828 i++ ;
02829 } ;
02830 i=0 ;
02831 while (model->get_learn_p(i)!=-1)
02832 {
02833 set_p(model->get_learn_p(i), get_p(model->get_learn_p(i))/sum);
02834 i++ ;
02835 } ;
02836
02837
02838 i=0;
02839 sum=0;
02840 while (model->get_learn_q(i)!=-1)
02841 {
02842 set_q(model->get_learn_q(i),CMath::random(MIN_RAND,1.0)) ;
02843 sum+=get_q(model->get_learn_q(i)) ;
02844 i++ ;
02845 } ;
02846 i=0 ;
02847 while (model->get_learn_q(i)!=-1)
02848 {
02849 set_q(model->get_learn_q(i), get_q(model->get_learn_q(i))/sum);
02850 i++ ;
02851 } ;
02852
02853
02854 invalidate_model();
02855 }
02856
02857 void CHMM::clear_model()
02858 {
02859 int32_t i,j;
02860 for (i=0; i<N; i++)
02861 {
02862 set_p(i, log(PSEUDO));
02863 set_q(i, log(PSEUDO));
02864
02865 for (j=0; j<N; j++)
02866 set_a(i,j, log(PSEUDO));
02867
02868 for (j=0; j<M; j++)
02869 set_b(i,j, log(PSEUDO));
02870 }
02871 }
02872
02873 void CHMM::clear_model_defined()
02874 {
02875 int32_t i,j,k;
02876
02877 for (i=0; (j=model->get_learn_p(i))!=-1; i++)
02878 set_p(j, log(PSEUDO));
02879
02880 for (i=0; (j=model->get_learn_q(i))!=-1; i++)
02881 set_q(j, log(PSEUDO));
02882
02883 for (i=0; (j=model->get_learn_a(i,0))!=-1; i++)
02884 {
02885 k=model->get_learn_a(i,1);
02886 set_a(j,k, log(PSEUDO));
02887 }
02888
02889 for (i=0; (j=model->get_learn_b(i,0))!=-1; i++)
02890 {
02891 k=model->get_learn_b(i,1);
02892 set_b(j,k, log(PSEUDO));
02893 }
02894 }
02895
02896 void CHMM::copy_model(CHMM* l)
02897 {
02898 int32_t i,j;
02899 for (i=0; i<N; i++)
02900 {
02901 set_p(i, l->get_p(i));
02902 set_q(i, l->get_q(i));
02903
02904 for (j=0; j<N; j++)
02905 set_a(i,j, l->get_a(i,j));
02906
02907 for (j=0; j<M; j++)
02908 set_b(i,j, l->get_b(i,j));
02909 }
02910 }
02911
02912 void CHMM::invalidate_model()
02913 {
02914
02915 this->mod_prob=0.0;
02916 this->mod_prob_updated=false;
02917
02918 if (mem_initialized)
02919 {
02920 if (trans_list_forward_cnt)
02921 delete[] trans_list_forward_cnt ;
02922 trans_list_forward_cnt=NULL ;
02923 if (trans_list_backward_cnt)
02924 delete[] trans_list_backward_cnt ;
02925 trans_list_backward_cnt=NULL ;
02926 if (trans_list_forward)
02927 {
02928 for (int32_t i=0; i<trans_list_len; i++)
02929 if (trans_list_forward[i])
02930 delete[] trans_list_forward[i] ;
02931 delete[] trans_list_forward ;
02932 trans_list_forward=NULL ;
02933 }
02934 if (trans_list_backward)
02935 {
02936 for (int32_t i=0; i<trans_list_len; i++)
02937 if (trans_list_backward[i])
02938 delete[] trans_list_backward[i] ;
02939 delete[] trans_list_backward ;
02940 trans_list_backward = NULL ;
02941 } ;
02942
02943 trans_list_len = N ;
02944 trans_list_forward = new T_STATES*[N] ;
02945 trans_list_forward_cnt = new T_STATES[N] ;
02946
02947 for (int32_t j=0; j<N; j++)
02948 {
02949 trans_list_forward_cnt[j]= 0 ;
02950 trans_list_forward[j]= new T_STATES[N] ;
02951 for (int32_t i=0; i<N; i++)
02952 if (get_a(i,j)>CMath::ALMOST_NEG_INFTY)
02953 {
02954 trans_list_forward[j][trans_list_forward_cnt[j]]=i ;
02955 trans_list_forward_cnt[j]++ ;
02956 }
02957 } ;
02958
02959 trans_list_backward = new T_STATES*[N] ;
02960 trans_list_backward_cnt = new T_STATES[N] ;
02961
02962 for (int32_t i=0; i<N; i++)
02963 {
02964 trans_list_backward_cnt[i]= 0 ;
02965 trans_list_backward[i]= new T_STATES[N] ;
02966 for (int32_t j=0; j<N; j++)
02967 if (get_a(i,j)>CMath::ALMOST_NEG_INFTY)
02968 {
02969 trans_list_backward[i][trans_list_backward_cnt[i]]=j ;
02970 trans_list_backward_cnt[i]++ ;
02971 }
02972 } ;
02973 } ;
02974 #ifndef NOVIT
02975 this->all_pat_prob=0.0;
02976 this->pat_prob=0.0;
02977 this->path_deriv_updated=false ;
02978 this->path_deriv_dimension=-1 ;
02979 this->all_path_prob_updated=false;
02980 #endif //NOVIT
02981
02982 #ifdef USE_HMMPARALLEL_STRUCTURES
02983 {
02984 for (int32_t i=0; i<parallel.get_num_threads(); i++)
02985 {
02986 this->alpha_cache[i].updated=false;
02987 this->beta_cache[i].updated=false;
02988 #ifndef NOVIT
02989 path_prob_updated[i]=false ;
02990 path_prob_dimension[i]=-1 ;
02991 #endif // NOVIT
02992 } ;
02993 }
02994 #else // USE_HMMPARALLEL_STRUCTURES
02995 this->alpha_cache.updated=false;
02996 this->beta_cache.updated=false;
02997 #ifndef NOVIT
02998 this->path_prob_dimension=-1;
02999 this->path_prob_updated=false;
03000 #endif //NOVIT
03001
03002 #endif // USE_HMMPARALLEL_STRUCTURES
03003 }
03004
03005 void CHMM::open_bracket(FILE* file)
03006 {
03007 int32_t value;
03008 while (((value=fgetc(file)) != EOF) && (value!='['))
03009 {
03010 if (value=='\n')
03011 line++;
03012 }
03013
03014 if (value==EOF)
03015 error(line, "expected \"[\" in input file");
03016
03017 while (((value=fgetc(file)) != EOF) && (isspace(value)))
03018 {
03019 if (value=='\n')
03020 line++;
03021 }
03022
03023 ungetc(value, file);
03024 }
03025
03026 void CHMM::close_bracket(FILE* file)
03027 {
03028 int32_t value;
03029 while (((value=fgetc(file)) != EOF) && (value!=']'))
03030 {
03031 if (value=='\n')
03032 line++;
03033 }
03034
03035 if (value==EOF)
03036 error(line, "expected \"]\" in input file");
03037 }
03038
03039 bool CHMM::comma_or_space(FILE* file)
03040 {
03041 int32_t value;
03042 while (((value=fgetc(file)) != EOF) && (value!=',') && (value!=';') && (value!=']'))
03043 {
03044 if (value=='\n')
03045 line++;
03046 }
03047 if (value==']')
03048 {
03049 ungetc(value, file);
03050 SG_ERROR( "found ']' instead of ';' or ','\n") ;
03051 return false ;
03052 } ;
03053
03054 if (value==EOF)
03055 error(line, "expected \";\" or \",\" in input file");
03056
03057 while (((value=fgetc(file)) != EOF) && (isspace(value)))
03058 {
03059 if (value=='\n')
03060 line++;
03061 }
03062 ungetc(value, file);
03063 return true ;
03064 }
03065
03066 bool CHMM::get_numbuffer(FILE* file, char* buffer, int32_t length)
03067 {
03068 signed char value;
03069
03070 while (((value=fgetc(file)) != EOF) &&
03071 !isdigit(value) && (value!='A')
03072 && (value!='C') && (value!='G') && (value!='T')
03073 && (value!='N') && (value!='n')
03074 && (value!='.') && (value!='-') && (value!='e') && (value!=']'))
03075 {
03076 if (value=='\n')
03077 line++;
03078 }
03079 if (value==']')
03080 {
03081 ungetc(value,file) ;
03082 return false ;
03083 } ;
03084 if (value!=EOF)
03085 {
03086 int32_t i=0;
03087 switch (value)
03088 {
03089 case 'A':
03090 value='0' +CAlphabet::B_A;
03091 break;
03092 case 'C':
03093 value='0' +CAlphabet::B_C;
03094 break;
03095 case 'G':
03096 value='0' +CAlphabet::B_G;
03097 break;
03098 case 'T':
03099 value='0' +CAlphabet::B_T;
03100 break;
03101 };
03102
03103 buffer[i++]=value;
03104
03105 while (((value=fgetc(file)) != EOF) &&
03106 (isdigit(value) || (value=='.') || (value=='-') || (value=='e')
03107 || (value=='A') || (value=='C') || (value=='G')|| (value=='T')
03108 || (value=='N') || (value=='n')) && (i<length))
03109 {
03110 switch (value)
03111 {
03112 case 'A':
03113 value='0' +CAlphabet::B_A;
03114 break;
03115 case 'C':
03116 value='0' +CAlphabet::B_C;
03117 break;
03118 case 'G':
03119 value='0' +CAlphabet::B_G;
03120 break;
03121 case 'T':
03122 value='0' +CAlphabet::B_T;
03123 break;
03124 case '1': case '2': case'3': case '4': case'5':
03125 case '6': case '7': case'8': case '9': case '0': break ;
03126 case '.': case 'e': case '-': break ;
03127 default:
03128 SG_ERROR( "found crap: %i %c (pos:%li)\n",i,value,ftell(file)) ;
03129 };
03130 buffer[i++]=value;
03131 }
03132 ungetc(value, file);
03133 buffer[i]='\0';
03134
03135 return (i<=length) && (i>0);
03136 }
03137 return false;
03138 }
03139
03140
03141
03142
03143
03144
03145
03146
03147
03148
03149
03150
03151
03152
03153
03154
03155
03156
03157
03158
03159
03160
03161
03162
03163
03164
03165
03166
03167
03168
03169
03170
03171
03172
03173
03174
03175 bool CHMM::load_model(FILE* file)
03176 {
03177 int32_t received_params=0;
03178
03179 bool result=false;
03180 E_STATE state=INITIAL;
03181 char buffer[1024];
03182
03183 line=1;
03184 int32_t i,j;
03185
03186 if (file)
03187 {
03188 while (state!=END)
03189 {
03190 int32_t value=fgetc(file);
03191
03192 if (value=='\n')
03193 line++;
03194 if (value==EOF)
03195 state=END;
03196
03197 switch (state)
03198 {
03199 case INITIAL:
03200 if (value=='N')
03201 {
03202 if (received_params & GOTN)
03203 error(line, "in model file: \"p double defined\"");
03204 else
03205 state=GET_N;
03206 }
03207 else if (value=='M')
03208 {
03209 if (received_params & GOTM)
03210 error(line, "in model file: \"p double defined\"");
03211 else
03212 state=GET_M;
03213 }
03214 else if (value=='%')
03215 {
03216 state=COMMENT;
03217 }
03218 case ARRAYs:
03219 if (value=='p')
03220 {
03221 if (received_params & GOTp)
03222 error(line, "in model file: \"p double defined\"");
03223 else
03224 state=GET_p;
03225 }
03226 if (value=='q')
03227 {
03228 if (received_params & GOTq)
03229 error(line, "in model file: \"q double defined\"");
03230 else
03231 state=GET_q;
03232 }
03233 else if (value=='a')
03234 {
03235 if (received_params & GOTa)
03236 error(line, "in model file: \"a double defined\"");
03237 else
03238 state=GET_a;
03239 }
03240 else if (value=='b')
03241 {
03242 if (received_params & GOTb)
03243 error(line, "in model file: \"b double defined\"");
03244 else
03245 state=GET_b;
03246 }
03247 else if (value=='%')
03248 {
03249 state=COMMENT;
03250 }
03251 break;
03252 case GET_N:
03253 if (value=='=')
03254 {
03255 if (get_numbuffer(file, buffer, 4))
03256 {
03257 this->N= atoi(buffer);
03258 received_params|=GOTN;
03259 state= (received_params == (GOTN | GOTM | GOTO)) ? ARRAYs : INITIAL;
03260 }
03261 else
03262 state=END;
03263 }
03264 break;
03265 case GET_M:
03266 if (value=='=')
03267 {
03268 if (get_numbuffer(file, buffer, 4))
03269 {
03270 this->M= atoi(buffer);
03271 received_params|=GOTM;
03272 state= (received_params == (GOTN | GOTM | GOTO)) ? ARRAYs : INITIAL;
03273 }
03274 else
03275 state=END;
03276 }
03277 break;
03278 case GET_a:
03279 if (value=='=')
03280 {
03281 float64_t f;
03282
03283 transition_matrix_a=new float64_t[N*N];
03284 open_bracket(file);
03285 for (i=0; i<this->N; i++)
03286 {
03287 open_bracket(file);
03288
03289 for (j=0; j<this->N ; j++)
03290 {
03291
03292 if (fscanf( file, "%le", &f ) != 1)
03293 error(line, "float64_t expected");
03294 else
03295 set_a(i,j, f);
03296
03297 if (j<this->N-1)
03298 comma_or_space(file);
03299 else
03300 close_bracket(file);
03301 }
03302
03303 if (i<this->N-1)
03304 comma_or_space(file);
03305 else
03306 close_bracket(file);
03307 }
03308 received_params|=GOTa;
03309 }
03310 state= (received_params == (GOTa | GOTb | GOTp | GOTq)) ? END : ARRAYs;
03311 break;
03312 case GET_b:
03313 if (value=='=')
03314 {
03315 float64_t f;
03316
03317 observation_matrix_b=new float64_t[N*M];
03318 open_bracket(file);
03319 for (i=0; i<this->N; i++)
03320 {
03321 open_bracket(file);
03322
03323 for (j=0; j<this->M ; j++)
03324 {
03325
03326 if (fscanf( file, "%le", &f ) != 1)
03327 error(line, "float64_t expected");
03328 else
03329 set_b(i,j, f);
03330
03331 if (j<this->M-1)
03332 comma_or_space(file);
03333 else
03334 close_bracket(file);
03335 }
03336
03337 if (i<this->N-1)
03338 comma_or_space(file);
03339 else
03340 close_bracket(file);
03341 }
03342 received_params|=GOTb;
03343 }
03344 state= ((received_params & (GOTa | GOTb | GOTp | GOTq)) == (GOTa | GOTb | GOTp | GOTq)) ? END : ARRAYs;
03345 break;
03346 case GET_p:
03347 if (value=='=')
03348 {
03349 float64_t f;
03350
03351 initial_state_distribution_p=new float64_t[N];
03352 open_bracket(file);
03353 for (i=0; i<this->N ; i++)
03354 {
03355 if (fscanf( file, "%le", &f ) != 1)
03356 error(line, "float64_t expected");
03357 else
03358 set_p(i, f);
03359
03360 if (i<this->N-1)
03361 comma_or_space(file);
03362 else
03363 close_bracket(file);
03364 }
03365 received_params|=GOTp;
03366 }
03367 state= (received_params == (GOTa | GOTb | GOTp | GOTq)) ? END : ARRAYs;
03368 break;
03369 case GET_q:
03370 if (value=='=')
03371 {
03372 float64_t f;
03373
03374 end_state_distribution_q=new float64_t[N];
03375 open_bracket(file);
03376 for (i=0; i<this->N ; i++)
03377 {
03378 if (fscanf( file, "%le", &f ) != 1)
03379 error(line, "float64_t expected");
03380 else
03381 set_q(i, f);
03382
03383 if (i<this->N-1)
03384 comma_or_space(file);
03385 else
03386 close_bracket(file);
03387 }
03388 received_params|=GOTq;
03389 }
03390 state= (received_params == (GOTa | GOTb | GOTp | GOTq)) ? END : ARRAYs;
03391 break;
03392 case COMMENT:
03393 if (value==EOF)
03394 state=END;
03395 else if (value=='\n')
03396 {
03397 line++;
03398 state=INITIAL;
03399 }
03400 break;
03401
03402 default:
03403 break;
03404 }
03405 }
03406 result= (received_params== (GOTa | GOTb | GOTp | GOTq | GOTN | GOTM | GOTO));
03407 }
03408
03409 SG_WARNING( "not normalizing anymore, call normalize_hmm to make sure the hmm is valid!!\n");
03411 return result;
03412 }
03413
03414
03415
03416
03417
03418
03419
03420
03421
03422
03423
03424
03425
03426
03427
03428
03429
03430
03431
03432
03433
03434
03435
03436
03437
03438
03439
03440
03441
03442
03443
03444
03445
03446
03447
03448
03449
03450
03451
03452
03453
03454
03455
03456
03457
03458
03459
03460
03461
03462
03463
03464
03465
03466
03467
03468
03469
03470
03471
03472
03473 bool CHMM::load_definitions(FILE* file, bool verbose, bool init)
03474 {
03475 if (model)
03476 delete model ;
03477 model=new CModel();
03478
03479 int32_t received_params=0x0000000;
03480 char buffer[1024];
03481
03482 bool result=false;
03483 E_STATE state=INITIAL;
03484
03485 {
03486 model->set_learn_a(0, -1);
03487 model->set_learn_a(1, -1);
03488 model->set_const_a(0, -1);
03489 model->set_const_a(1, -1);
03490 model->set_const_a_val(0, 1.0);
03491 model->set_learn_b(0, -1);
03492 model->set_const_b(0, -1);
03493 model->set_const_b_val(0, 1.0);
03494 model->set_learn_p(0, -1);
03495 model->set_learn_q(0, -1);
03496 model->set_const_p(0, -1);
03497 model->set_const_q(0, -1);
03498 } ;
03499
03500 line=1;
03501
03502 if (file)
03503 {
03504 while (state!=END)
03505 {
03506 int32_t value=fgetc(file);
03507
03508 if (value=='\n')
03509 line++;
03510
03511 if (value==EOF)
03512 state=END;
03513
03514 switch (state)
03515 {
03516 case INITIAL:
03517 if (value=='l')
03518 {
03519 if (fgetc(file)=='e' && fgetc(file)=='a' && fgetc(file)=='r' && fgetc(file)=='n' && fgetc(file)=='_')
03520 {
03521 switch(fgetc(file))
03522 {
03523 case 'a':
03524 state=GET_learn_a;
03525 break;
03526 case 'b':
03527 state=GET_learn_b;
03528 break;
03529 case 'p':
03530 state=GET_learn_p;
03531 break;
03532 case 'q':
03533 state=GET_learn_q;
03534 break;
03535 default:
03536 error(line, "a,b,p or q expected in train definition file");
03537 };
03538 }
03539 }
03540 else if (value=='c')
03541 {
03542 if (fgetc(file)=='o' && fgetc(file)=='n' && fgetc(file)=='s'
03543 && fgetc(file)=='t' && fgetc(file)=='_')
03544 {
03545 switch(fgetc(file))
03546 {
03547 case 'a':
03548 state=GET_const_a;
03549 break;
03550 case 'b':
03551 state=GET_const_b;
03552 break;
03553 case 'p':
03554 state=GET_const_p;
03555 break;
03556 case 'q':
03557 state=GET_const_q;
03558 break;
03559 default:
03560 error(line, "a,b,p or q expected in train definition file");
03561 };
03562 }
03563 }
03564 else if (value=='%')
03565 {
03566 state=COMMENT;
03567 }
03568 else if (value==EOF)
03569 {
03570 state=END;
03571 }
03572 break;
03573 case GET_learn_a:
03574 if (value=='=')
03575 {
03576 open_bracket(file);
03577 bool finished=false;
03578 int32_t i=0;
03579
03580 if (verbose)
03581 SG_DEBUG( "\nlearn for transition matrix: ") ;
03582 while (!finished)
03583 {
03584 open_bracket(file);
03585
03586 if (get_numbuffer(file, buffer, 4))
03587 {
03588 value=atoi(buffer);
03589 model->set_learn_a(i++, value);
03590
03591 if (value<0)
03592 {
03593 finished=true;
03594 break;
03595 }
03596 if (value>=N)
03597 SG_ERROR( "invalid value for learn_a(%i,0): %i\n",i/2,(int)value) ;
03598 }
03599 else
03600 break;
03601
03602 comma_or_space(file);
03603
03604 if (get_numbuffer(file, buffer, 4))
03605 {
03606 value=atoi(buffer);
03607 model->set_learn_a(i++, value);
03608
03609 if (value<0)
03610 {
03611 finished=true;
03612 break;
03613 }
03614 if (value>=N)
03615 SG_ERROR( "invalid value for learn_a(%i,1): %i\n",i/2-1,(int)value) ;
03616
03617 }
03618 else
03619 break;
03620 close_bracket(file);
03621 }
03622 close_bracket(file);
03623 if (verbose)
03624 SG_DEBUG( "%i Entries",(int)(i/2)) ;
03625
03626 if (finished)
03627 {
03628 received_params|=GOTlearn_a;
03629
03630 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03631 }
03632 else
03633 state=END;
03634 }
03635 break;
03636 case GET_learn_b:
03637 if (value=='=')
03638 {
03639 open_bracket(file);
03640 bool finished=false;
03641 int32_t i=0;
03642
03643 if (verbose)
03644 SG_DEBUG( "\nlearn for emission matrix: ") ;
03645
03646 while (!finished)
03647 {
03648 open_bracket(file);
03649
03650 int32_t combine=0;
03651
03652 for (int32_t j=0; j<2; j++)
03653 {
03654 if (get_numbuffer(file, buffer, 4))
03655 {
03656 value=atoi(buffer);
03657
03658 if (j==0)
03659 {
03660 model->set_learn_b(i++, value);
03661
03662 if (value<0)
03663 {
03664 finished=true;
03665 break;
03666 }
03667 if (value>=N)
03668 SG_ERROR( "invalid value for learn_b(%i,0): %i\n",i/2,(int)value) ;
03669 }
03670 else
03671 combine=value;
03672 }
03673 else
03674 break;
03675
03676 if (j<1)
03677 comma_or_space(file);
03678 else
03679 close_bracket(file);
03680 }
03681 model->set_learn_b(i++, combine);
03682 if (combine>=M)
03683
03684 SG_ERROR("invalid value for learn_b(%i,1): %i\n",i/2-1,(int)value) ;
03685 }
03686 close_bracket(file);
03687 if (verbose)
03688 SG_DEBUG( "%i Entries",(int)(i/2-1)) ;
03689
03690 if (finished)
03691 {
03692 received_params|=GOTlearn_b;
03693 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03694 }
03695 else
03696 state=END;
03697 }
03698 break;
03699 case GET_learn_p:
03700 if (value=='=')
03701 {
03702 open_bracket(file);
03703 bool finished=false;
03704 int32_t i=0;
03705
03706 if (verbose)
03707 SG_DEBUG( "\nlearn start states: ") ;
03708 while (!finished)
03709 {
03710 if (get_numbuffer(file, buffer, 4))
03711 {
03712 value=atoi(buffer);
03713
03714 model->set_learn_p(i++, value);
03715
03716 if (value<0)
03717 {
03718 finished=true;
03719 break;
03720 }
03721 if (value>=N)
03722 SG_ERROR( "invalid value for learn_p(%i): %i\n",i-1,(int)value) ;
03723 }
03724 else
03725 break;
03726
03727 comma_or_space(file);
03728 }
03729
03730 close_bracket(file);
03731 if (verbose)
03732 SG_DEBUG( "%i Entries",i-1) ;
03733
03734 if (finished)
03735 {
03736 received_params|=GOTlearn_p;
03737 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03738 }
03739 else
03740 state=END;
03741 }
03742 break;
03743 case GET_learn_q:
03744 if (value=='=')
03745 {
03746 open_bracket(file);
03747 bool finished=false;
03748 int32_t i=0;
03749
03750 if (verbose)
03751 SG_DEBUG( "\nlearn terminal states: ") ;
03752 while (!finished)
03753 {
03754 if (get_numbuffer(file, buffer, 4))
03755 {
03756 value=atoi(buffer);
03757 model->set_learn_q(i++, value);
03758
03759 if (value<0)
03760 {
03761 finished=true;
03762 break;
03763 }
03764 if (value>=N)
03765 SG_ERROR( "invalid value for learn_q(%i): %i\n",i-1,(int)value) ;
03766 }
03767 else
03768 break;
03769
03770 comma_or_space(file);
03771 }
03772
03773 close_bracket(file);
03774 if (verbose)
03775 SG_DEBUG( "%i Entries",i-1) ;
03776
03777 if (finished)
03778 {
03779 received_params|=GOTlearn_q;
03780 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03781 }
03782 else
03783 state=END;
03784 }
03785 break;
03786 case GET_const_a:
03787 if (value=='=')
03788 {
03789 open_bracket(file);
03790 bool finished=false;
03791 int32_t i=0;
03792
03793 if (verbose)
03794 #ifdef USE_HMMDEBUG
03795 SG_DEBUG( "\nconst for transition matrix: \n") ;
03796 #else
03797 SG_DEBUG( "\nconst for transition matrix: ") ;
03798 #endif
03799 while (!finished)
03800 {
03801 open_bracket(file);
03802
03803 if (get_numbuffer(file, buffer, 4))
03804 {
03805 value=atoi(buffer);
03806 model->set_const_a(i++, value);
03807
03808 if (value<0)
03809 {
03810 finished=true;
03811 model->set_const_a(i++, value);
03812 model->set_const_a_val((int32_t)i/2 - 1, value);
03813 break;
03814 }
03815 if (value>=N)
03816 SG_ERROR( "invalid value for const_a(%i,0): %i\n",i/2,(int)value) ;
03817 }
03818 else
03819 break;
03820
03821 comma_or_space(file);
03822
03823 if (get_numbuffer(file, buffer, 4))
03824 {
03825 value=atoi(buffer);
03826 model->set_const_a(i++, value);
03827
03828 if (value<0)
03829 {
03830 finished=true;
03831 model->set_const_a_val((int32_t)i/2 - 1, value);
03832 break;
03833 }
03834 if (value>=N)
03835 SG_ERROR( "invalid value for const_a(%i,1): %i\n",i/2-1,(int)value) ;
03836 }
03837 else
03838 break;
03839
03840 if (!comma_or_space(file))
03841 model->set_const_a_val((int32_t)i/2 - 1, 1.0);
03842 else
03843 if (get_numbuffer(file, buffer, 10))
03844 {
03845 float64_t dvalue=atof(buffer);
03846 model->set_const_a_val((int32_t)i/2 - 1, dvalue);
03847 if (dvalue<0)
03848 {
03849 finished=true;
03850 break;
03851 }
03852 if ((dvalue>1.0) || (dvalue<0.0))
03853 SG_ERROR( "invalid value for const_a_val(%i): %e\n",(int)i/2-1,dvalue) ;
03854 }
03855 else
03856 model->set_const_a_val((int32_t)i/2 - 1, 1.0);
03857
03858 #ifdef USE_HMMDEBUG
03859 if (verbose)
03860 SG_ERROR("const_a(%i,%i)=%e\n", model->get_const_a((int32_t)i/2-1,0),model->get_const_a((int32_t)i/2-1,1),model->get_const_a_val((int32_t)i/2-1)) ;
03861 #endif
03862 close_bracket(file);
03863 }
03864 close_bracket(file);
03865 if (verbose)
03866 SG_DEBUG( "%i Entries",(int)i/2-1) ;
03867
03868 if (finished)
03869 {
03870 received_params|=GOTconst_a;
03871 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03872 }
03873 else
03874 state=END;
03875 }
03876 break;
03877
03878 case GET_const_b:
03879 if (value=='=')
03880 {
03881 open_bracket(file);
03882 bool finished=false;
03883 int32_t i=0;
03884
03885 if (verbose)
03886 #ifdef USE_HMMDEBUG
03887 SG_DEBUG( "\nconst for emission matrix: \n") ;
03888 #else
03889 SG_DEBUG( "\nconst for emission matrix: ") ;
03890 #endif
03891 while (!finished)
03892 {
03893 open_bracket(file);
03894 int32_t combine=0;
03895 for (int32_t j=0; j<3; j++)
03896 {
03897 if (get_numbuffer(file, buffer, 10))
03898 {
03899 if (j==0)
03900 {
03901 value=atoi(buffer);
03902
03903 model->set_const_b(i++, value);
03904
03905 if (value<0)
03906 {
03907 finished=true;
03908
03909 break;
03910 }
03911 if (value>=N)
03912 SG_ERROR( "invalid value for const_b(%i,0): %i\n",i/2-1,(int)value) ;
03913 }
03914 else if (j==2)
03915 {
03916 float64_t dvalue=atof(buffer);
03917 model->set_const_b_val((int32_t)(i-1)/2, dvalue);
03918 if (dvalue<0)
03919 {
03920 finished=true;
03921 break;
03922 } ;
03923 if ((dvalue>1.0) || (dvalue<0.0))
03924 SG_ERROR( "invalid value for const_b_val(%i,1): %e\n",i/2-1,dvalue) ;
03925 }
03926 else
03927 {
03928 value=atoi(buffer);
03929 combine= value;
03930 } ;
03931 }
03932 else
03933 {
03934 if (j==2)
03935 model->set_const_b_val((int32_t)(i-1)/2, 1.0);
03936 break;
03937 } ;
03938 if (j<2)
03939 if ((!comma_or_space(file)) && (j==1))
03940 {
03941 model->set_const_b_val((int32_t)(i-1)/2, 1.0) ;
03942 break ;
03943 } ;
03944 }
03945 close_bracket(file);
03946 model->set_const_b(i++, combine);
03947 if (combine>=M)
03948 SG_ERROR("invalid value for const_b(%i,1): %i\n",i/2-1, combine) ;
03949 #ifdef USE_HMMDEBUG
03950 if (verbose && !finished)
03951 SG_ERROR("const_b(%i,%i)=%e\n", model->get_const_b((int32_t)i/2-1,0),model->get_const_b((int32_t)i/2-1,1),model->get_const_b_val((int32_t)i/2-1)) ;
03952 #endif
03953 }
03954 close_bracket(file);
03955 if (verbose)
03956 SG_ERROR( "%i Entries",(int)i/2-1) ;
03957
03958 if (finished)
03959 {
03960 received_params|=GOTconst_b;
03961 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03962 }
03963 else
03964 state=END;
03965 }
03966 break;
03967 case GET_const_p:
03968 if (value=='=')
03969 {
03970 open_bracket(file);
03971 bool finished=false;
03972 int32_t i=0;
03973
03974 if (verbose)
03975 #ifdef USE_HMMDEBUG
03976 SG_DEBUG( "\nconst for start states: \n") ;
03977 #else
03978 SG_DEBUG( "\nconst for start states: ") ;
03979 #endif
03980 while (!finished)
03981 {
03982 open_bracket(file);
03983
03984 if (get_numbuffer(file, buffer, 4))
03985 {
03986 value=atoi(buffer);
03987 model->set_const_p(i, value);
03988
03989 if (value<0)
03990 {
03991 finished=true;
03992 model->set_const_p_val(i++, value);
03993 break;
03994 }
03995 if (value>=N)
03996 SG_ERROR( "invalid value for const_p(%i): %i\n",i,(int)value) ;
03997
03998 }
03999 else
04000 break;
04001
04002 if (!comma_or_space(file))
04003 model->set_const_p_val(i++, 1.0);
04004 else
04005 if (get_numbuffer(file, buffer, 10))
04006 {
04007 float64_t dvalue=atof(buffer);
04008 model->set_const_p_val(i++, dvalue);
04009 if (dvalue<0)
04010 {
04011 finished=true;
04012 break;
04013 }
04014 if ((dvalue>1) || (dvalue<0))
04015 SG_ERROR( "invalid value for const_p_val(%i): %e\n",i,dvalue) ;
04016 }
04017 else
04018 model->set_const_p_val(i++, 1.0);
04019
04020 close_bracket(file);
04021
04022 #ifdef USE_HMMDEBUG
04023 if (verbose)
04024 SG_DEBUG("const_p(%i)=%e\n", model->get_const_p(i-1),model->get_const_p_val(i-1)) ;
04025 #endif
04026 }
04027 if (verbose)
04028 SG_DEBUG( "%i Entries",i-1) ;
04029
04030 close_bracket(file);
04031
04032 if (finished)
04033 {
04034 received_params|=GOTconst_p;
04035 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
04036 }
04037 else
04038 state=END;
04039 }
04040 break;
04041 case GET_const_q:
04042 if (value=='=')
04043 {
04044 open_bracket(file);
04045 bool finished=false;
04046 if (verbose)
04047 #ifdef USE_HMMDEBUG
04048 SG_DEBUG( "\nconst for terminal states: \n") ;
04049 #else
04050 SG_DEBUG( "\nconst for terminal states: ") ;
04051 #endif
04052 int32_t i=0;
04053
04054 while (!finished)
04055 {
04056 open_bracket(file) ;
04057 if (get_numbuffer(file, buffer, 4))
04058 {
04059 value=atoi(buffer);
04060 model->set_const_q(i, value);
04061 if (value<0)
04062 {
04063 finished=true;
04064 model->set_const_q_val(i++, value);
04065 break;
04066 }
04067 if (value>=N)
04068 SG_ERROR( "invalid value for const_q(%i): %i\n",i,(int)value) ;
04069 }
04070 else
04071 break;
04072
04073 if (!comma_or_space(file))
04074 model->set_const_q_val(i++, 1.0);
04075 else
04076 if (get_numbuffer(file, buffer, 10))
04077 {
04078 float64_t dvalue=atof(buffer);
04079 model->set_const_q_val(i++, dvalue);
04080 if (dvalue<0)
04081 {
04082 finished=true;
04083 break;
04084 }
04085 if ((dvalue>1) || (dvalue<0))
04086 SG_ERROR("invalid value for const_q_val(%i): %e\n",i,(double) dvalue) ;
04087 }
04088 else
04089 model->set_const_q_val(i++, 1.0);
04090
04091 close_bracket(file);
04092 #ifdef USE_HMMDEBUG
04093 if (verbose)
04094 SG_DEBUG("const_q(%i)=%e\n", model->get_const_q(i-1),model->get_const_q_val(i-1)) ;
04095 #endif
04096 }
04097 if (verbose)
04098 SG_DEBUG( "%i Entries",i-1) ;
04099
04100 close_bracket(file);
04101
04102 if (finished)
04103 {
04104 received_params|=GOTconst_q;
04105 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
04106 }
04107 else
04108 state=END;
04109 }
04110 break;
04111 case COMMENT:
04112 if (value==EOF)
04113 state=END;
04114 else if (value=='\n')
04115 state=INITIAL;
04116 break;
04117
04118 default:
04119 break;
04120 }
04121 }
04122 }
04123
04124
04125
04126
04127
04128 result=1 ;
04129 if (result)
04130 {
04131 model->sort_learn_a() ;
04132 model->sort_learn_b() ;
04133 if (init)
04134 {
04135 init_model_defined(); ;
04136 convert_to_log();
04137 } ;
04138 }
04139 if (verbose)
04140 SG_DEBUG( "\n") ;
04141 return result;
04142 }
04143
04144
04145
04146
04147
04148
04149
04150
04151
04152
04153
04154
04155
04156
04157
04158
04159
04160
04161
04162
04163
04164
04165
04166
04167
04168
04169
04170
04171
04172
04173
04174
04175
04176
04177
04178 bool CHMM::save_model(FILE* file)
04179 {
04180 bool result=false;
04181 int32_t i,j;
04182 const float32_t NAN_REPLACEMENT = (float32_t) CMath::ALMOST_NEG_INFTY ;
04183
04184 if (file)
04185 {
04186 fprintf(file,"%s","% HMM - specification\n% N - number of states\n% M - number of observation_tokens\n% a is state_transition_matrix\n% size(a)= [N,N]\n%\n% b is observation_per_state_matrix\n% size(b)= [N,M]\n%\n% p is initial distribution\n% size(p)= [1, N]\n\n% q is distribution of end states\n% size(q)= [1, N]\n");
04187 fprintf(file,"N=%d;\n",N);
04188 fprintf(file,"M=%d;\n",M);
04189
04190 fprintf(file,"p=[");
04191 for (i=0; i<N; i++)
04192 {
04193 if (i<N-1) {
04194 if (finite(get_p(i)))
04195 fprintf(file, "%e,", (double)get_p(i));
04196 else
04197 fprintf(file, "%f,", NAN_REPLACEMENT);
04198 }
04199 else {
04200 if (finite(get_p(i)))
04201 fprintf(file, "%e", (double)get_p(i));
04202 else
04203 fprintf(file, "%f", NAN_REPLACEMENT);
04204 }
04205 }
04206
04207 fprintf(file,"];\n\nq=[");
04208 for (i=0; i<N; i++)
04209 {
04210 if (i<N-1) {
04211 if (finite(get_q(i)))
04212 fprintf(file, "%e,", (double)get_q(i));
04213 else
04214 fprintf(file, "%f,", NAN_REPLACEMENT);
04215 }
04216 else {
04217 if (finite(get_q(i)))
04218 fprintf(file, "%e", (double)get_q(i));
04219 else
04220 fprintf(file, "%f", NAN_REPLACEMENT);
04221 }
04222 }
04223 fprintf(file,"];\n\na=[");
04224
04225 for (i=0; i<N; i++)
04226 {
04227 fprintf(file, "\t[");
04228
04229 for (j=0; j<N; j++)
04230 {
04231 if (j<N-1) {
04232 if (finite(get_a(i,j)))
04233 fprintf(file, "%e,", (double)get_a(i,j));
04234 else
04235 fprintf(file, "%f,", NAN_REPLACEMENT);
04236 }
04237 else {
04238 if (finite(get_a(i,j)))
04239 fprintf(file, "%e];\n", (double)get_a(i,j));
04240 else
04241 fprintf(file, "%f];\n", NAN_REPLACEMENT);
04242 }
04243 }
04244 }
04245
04246 fprintf(file," ];\n\nb=[");
04247
04248 for (i=0; i<N; i++)
04249 {
04250 fprintf(file, "\t[");
04251
04252 for (j=0; j<M; j++)
04253 {
04254 if (j<M-1) {
04255 if (finite(get_b(i,j)))
04256 fprintf(file, "%e,", (double)get_b(i,j));
04257 else
04258 fprintf(file, "%f,", NAN_REPLACEMENT);
04259 }
04260 else {
04261 if (finite(get_b(i,j)))
04262 fprintf(file, "%e];\n", (double)get_b(i,j));
04263 else
04264 fprintf(file, "%f];\n", NAN_REPLACEMENT);
04265 }
04266 }
04267 }
04268 result= (fprintf(file," ];\n") == 5);
04269 }
04270
04271 return result;
04272 }
04273
04274 #ifndef NOVIT
04275 T_STATES* CHMM::get_path(int32_t dim, float64_t& prob)
04276 {
04277 T_STATES* result = NULL;
04278
04279 prob = best_path(dim);
04280 result = new T_STATES[p_observations->get_vector_length(dim)];
04281
04282 for (int32_t i=0; i<p_observations->get_vector_length(dim); i++)
04283 result[i]=PATH(dim)[i];
04284
04285 return result;
04286 }
04287
04288 bool CHMM::save_path(FILE* file)
04289 {
04290 bool result=false;
04291
04292 if (file)
04293 {
04294 for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
04295 {
04296 if (dim%100==0)
04297 SG_PRINT( "%i..", dim) ;
04298 float64_t prob = best_path(dim);
04299 fprintf(file,"%i. path probability:%e\nstate sequence:\n", dim, prob);
04300 for (int32_t i=0; i<p_observations->get_vector_length(dim)-1; i++)
04301 fprintf(file,"%d ", PATH(dim)[i]);
04302 fprintf(file,"%d", PATH(dim)[p_observations->get_vector_length(dim)-1]);
04303 fprintf(file,"\n\n") ;
04304 }
04305 SG_DONE();
04306 result=true;
04307 }
04308
04309 return result;
04310 }
04311 #endif // NOVIT
04312
04313 bool CHMM::save_likelihood_bin(FILE* file)
04314 {
04315 bool result=false;
04316
04317 if (file)
04318 {
04319 for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
04320 {
04321 float32_t prob= (float32_t) model_probability(dim);
04322 fwrite(&prob, sizeof(float32_t), 1, file);
04323 }
04324 result=true;
04325 }
04326
04327 return result;
04328 }
04329
04330 bool CHMM::save_likelihood(FILE* file)
04331 {
04332 bool result=false;
04333
04334 if (file)
04335 {
04336 fprintf(file, "%% likelihood of model per observation\n%% P[O|model]=[ P[O|model]_1 P[O|model]_2 ... P[O|model]_dim ]\n%%\n");
04337
04338 fprintf(file, "P=[");
04339 for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
04340 fprintf(file, "%e ", (double) model_probability(dim));
04341
04342 fprintf(file,"];");
04343 result=true;
04344 }
04345
04346 return result;
04347 }
04348
04349 #define FLOATWRITE(file, value) { float32_t rrr=float32_t(value); fwrite(&rrr, sizeof(float32_t), 1, file); num_floats++;}
04350
04351 bool CHMM::save_model_bin(FILE* file)
04352 {
04353 int32_t i,j,q, num_floats=0 ;
04354 if (!model)
04355 {
04356 if (file)
04357 {
04358
04359 FLOATWRITE(file, (float32_t)CMath::INFTY);
04360 FLOATWRITE(file, (float32_t) 1);
04361
04362
04363 for (i=0; i<N; i++)
04364 FLOATWRITE(file, get_p(i));
04365 SG_INFO( "wrote %i parameters for p\n",N) ;
04366
04367 for (i=0; i<N; i++)
04368 FLOATWRITE(file, get_q(i)) ;
04369 SG_INFO( "wrote %i parameters for q\n",N) ;
04370
04371
04372 for (i=0; i<N; i++)
04373 for (j=0; j<N; j++)
04374 FLOATWRITE(file, get_a(i,j));
04375 SG_INFO( "wrote %i parameters for a\n",N*N) ;
04376
04377 for (i=0; i<N; i++)
04378 for (j=0; j<M; j++)
04379 FLOATWRITE(file, get_b(i,j));
04380 SG_INFO( "wrote %i parameters for b\n",N*M) ;
04381
04382
04383 FLOATWRITE(file, (float32_t)CMath::INFTY);
04384 FLOATWRITE(file, (float32_t) 3);
04385
04386
04387 FLOATWRITE(file, (float32_t) N);
04388 FLOATWRITE(file, (float32_t) N);
04389 FLOATWRITE(file, (float32_t) N*N);
04390 FLOATWRITE(file, (float32_t) N*M);
04391 FLOATWRITE(file, (float32_t) N);
04392 FLOATWRITE(file, (float32_t) M);
04393 } ;
04394 }
04395 else
04396 {
04397 if (file)
04398 {
04399 int32_t num_p, num_q, num_a, num_b ;
04400
04401 FLOATWRITE(file, (float32_t)CMath::INFTY);
04402 FLOATWRITE(file, (float32_t) 2);
04403
04404 for (i=0; model->get_learn_p(i)>=0; i++)
04405 FLOATWRITE(file, get_p(model->get_learn_p(i)));
04406 num_p=i ;
04407 SG_INFO( "wrote %i parameters for p\n",num_p) ;
04408
04409 for (i=0; model->get_learn_q(i)>=0; i++)
04410 FLOATWRITE(file, get_q(model->get_learn_q(i)));
04411 num_q=i ;
04412 SG_INFO( "wrote %i parameters for q\n",num_q) ;
04413
04414
04415 for (q=0; model->get_learn_a(q,1)>=0; q++)
04416 {
04417 i=model->get_learn_a(q,0) ;
04418 j=model->get_learn_a(q,1) ;
04419 FLOATWRITE(file, (float32_t)i);
04420 FLOATWRITE(file, (float32_t)j);
04421 FLOATWRITE(file, get_a(i,j));
04422 } ;
04423 num_a=q ;
04424 SG_INFO( "wrote %i parameters for a\n",num_a) ;
04425
04426 for (q=0; model->get_learn_b(q,0)>=0; q++)
04427 {
04428 i=model->get_learn_b(q,0) ;
04429 j=model->get_learn_b(q,1) ;
04430 FLOATWRITE(file, (float32_t)i);
04431 FLOATWRITE(file, (float32_t)j);
04432 FLOATWRITE(file, get_b(i,j));
04433 } ;
04434 num_b=q ;
04435 SG_INFO( "wrote %i parameters for b\n",num_b) ;
04436
04437
04438 FLOATWRITE(file, (float32_t)CMath::INFTY);
04439 FLOATWRITE(file, (float32_t) 3);
04440
04441
04442 FLOATWRITE(file, (float32_t) num_p);
04443 FLOATWRITE(file, (float32_t) num_q);
04444 FLOATWRITE(file, (float32_t) num_a);
04445 FLOATWRITE(file, (float32_t) num_b);
04446 FLOATWRITE(file, (float32_t) N);
04447 FLOATWRITE(file, (float32_t) M);
04448 } ;
04449 } ;
04450 return true ;
04451 }
04452
04453 #ifndef NOVIT
04454 bool CHMM::save_path_derivatives(FILE* logfile)
04455 {
04456 int32_t dim,i,j;
04457 float64_t prob;
04458
04459 if (logfile)
04460 {
04461 fprintf(logfile,"%% lambda denotes the model\n%% O denotes the observation sequence\n%% Q denotes the path\n%% \n%% calculating derivatives of P[O,Q|lambda]=p_{Q1}b_{Q1}(O_1}*a_{Q1}{Q2}b_{Q2}(O2)*...*q_{T-1}{T}b_{QT}(O_T}q_{Q_T} against p,q,a,b\n%%\n");
04462 fprintf(logfile,"%% dPr[...]=[ [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_NN]\n");
04463 fprintf(logfile,"%% [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_NN]\n");
04464 fprintf(logfile,"%% ............................. \n");
04465 fprintf(logfile,"%% [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_MM]\n");
04466 fprintf(logfile,"%% ];\n%%\n\ndPr(log()) = [\n");
04467 }
04468 else
04469 return false ;
04470
04471 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
04472 {
04473 prob=best_path(dim);
04474
04475 fprintf(logfile, "[ ");
04476
04477
04478 for (i=0; i<N; i++)
04479 fprintf(logfile,"%e, ", (double) path_derivative_p(i,dim) );
04480
04481 for (i=0; i<N; i++)
04482 fprintf(logfile,"%e, ", (double) path_derivative_q(i,dim) );
04483
04484
04485 for (i=0; i<N; i++)
04486 for (j=0; j<N; j++)
04487 fprintf(logfile, "%e,", (double) path_derivative_a(i,j,dim) );
04488
04489 for (i=0; i<N; i++)
04490 for (j=0; j<M; j++)
04491 fprintf(logfile, "%e,", (double) path_derivative_b(i,j,dim) );
04492
04493 fseek(logfile,ftell(logfile)-1,SEEK_SET);
04494 fprintf(logfile, " ];\n");
04495 }
04496
04497 fprintf(logfile, "];");
04498
04499 return true ;
04500 }
04501
04502 bool CHMM::save_path_derivatives_bin(FILE* logfile)
04503 {
04504 bool result=false;
04505 int32_t dim,i,j,q;
04506 float64_t prob=0 ;
04507 int32_t num_floats=0 ;
04508
04509 float64_t sum_prob=0.0 ;
04510 if (!model)
04511 SG_WARNING( "No definitions loaded -- writing derivatives of all weights\n") ;
04512 else
04513 SG_INFO( "writing derivatives of changed weights only\n") ;
04514
04515 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
04516 {
04517 if (dim%100==0)
04518 {
04519 SG_PRINT( ".") ;
04520
04521 } ;
04522
04523 prob=best_path(dim);
04524 sum_prob+=prob ;
04525
04526 if (!model)
04527 {
04528 if (logfile)
04529 {
04530
04531 FLOATWRITE(logfile, prob);
04532
04533 for (i=0; i<N; i++)
04534 FLOATWRITE(logfile, path_derivative_p(i,dim));
04535
04536 for (i=0; i<N; i++)
04537 FLOATWRITE(logfile, path_derivative_q(i,dim));
04538
04539 for (i=0; i<N; i++)
04540 for (j=0; j<N; j++)
04541 FLOATWRITE(logfile, path_derivative_a(i,j,dim));
04542
04543 for (i=0; i<N; i++)
04544 for (j=0; j<M; j++)
04545 FLOATWRITE(logfile, path_derivative_b(i,j,dim));
04546
04547 }
04548 }
04549 else
04550 {
04551 if (logfile)
04552 {
04553
04554 FLOATWRITE(logfile, prob);
04555
04556 for (i=0; model->get_learn_p(i)>=0; i++)
04557 FLOATWRITE(logfile, path_derivative_p(model->get_learn_p(i),dim));
04558
04559 for (i=0; model->get_learn_q(i)>=0; i++)
04560 FLOATWRITE(logfile, path_derivative_q(model->get_learn_q(i),dim));
04561
04562 for (q=0; model->get_learn_a(q,0)>=0; q++)
04563 {
04564 i=model->get_learn_a(q,0) ;
04565 j=model->get_learn_a(q,1) ;
04566 FLOATWRITE(logfile, path_derivative_a(i,j,dim));
04567 } ;
04568
04569 for (q=0; model->get_learn_b(q,0)>=0; q++)
04570 {
04571 i=model->get_learn_b(q,0) ;
04572 j=model->get_learn_b(q,1) ;
04573 FLOATWRITE(logfile, path_derivative_b(i,j,dim));
04574 } ;
04575 }
04576 } ;
04577 }
04578 save_model_bin(logfile) ;
04579
04580 result=true;
04581 SG_PRINT( "\n") ;
04582 return result;
04583 }
04584 #endif // NOVIT
04585
04586 bool CHMM::save_model_derivatives_bin(FILE* file)
04587 {
04588 bool result=false;
04589 int32_t dim,i,j,q ;
04590 int32_t num_floats=0 ;
04591
04592 if (!model)
04593 SG_WARNING( "No definitions loaded -- writing derivatives of all weights\n") ;
04594 else
04595 SG_INFO( "writing derivatives of changed weights only\n") ;
04596
04597 #ifdef USE_HMMPARALLEL
04598 pthread_t *threads=new pthread_t[parallel.get_num_threads()] ;
04599 S_THREAD_PARAM *params=new S_THREAD_PARAM[parallel.get_num_threads()] ;
04600 #endif
04601
04602 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
04603 {
04604 if (dim%20==0)
04605 {
04606 SG_PRINT( ".") ;
04607
04608 } ;
04609
04610 #ifdef USE_HMMPARALLEL
04611 if (dim%parallel.get_num_threads()==0)
04612 {
04613 int32_t i ;
04614 for (i=0; i<parallel.get_num_threads(); i++)
04615 if (dim+i<p_observations->get_num_vectors())
04616 {
04617 params[i].hmm=this ;
04618 params[i].dim=dim+i ;
04619 #ifdef SUNOS
04620 thr_create(NULL,0,bw_dim_prefetch, (void*)¶ms[i], PTHREAD_SCOPE_SYSTEM, &threads[i]) ;
04621 #else // SUNOS
04622 pthread_create(&threads[i], NULL, bw_dim_prefetch, (void*)¶ms[i]) ;
04623 #endif // SUNOS
04624 } ;
04625 for (i=0; i<parallel.get_num_threads(); i++)
04626 if (dim+i<p_observations->get_num_vectors())
04627 {
04628 void * ret ;
04629 pthread_join(threads[i], &ret) ;
04630 } ;
04631 } ;
04632 #endif
04633
04634 float64_t prob=model_probability(dim) ;
04635 if (!model)
04636 {
04637 if (file)
04638 {
04639
04640 FLOATWRITE(file, prob);
04641
04642
04643 for (i=0; i<N; i++)
04644 FLOATWRITE(file, model_derivative_p(i,dim));
04645
04646 for (i=0; i<N; i++)
04647 FLOATWRITE(file, model_derivative_q(i,dim));
04648
04649
04650 for (i=0; i<N; i++)
04651 for (j=0; j<N; j++)
04652 FLOATWRITE(file, model_derivative_a(i,j,dim));
04653
04654 for (i=0; i<N; i++)
04655 for (j=0; j<M; j++)
04656 FLOATWRITE(file, model_derivative_b(i,j,dim));
04657
04658 if (dim==0)
04659 SG_INFO("Number of parameters (including posterior prob.): %i\n", num_floats) ;
04660 } ;
04661 }
04662 else
04663 {
04664 if (file)
04665 {
04666
04667 FLOATWRITE(file, prob);
04668
04669 for (i=0; model->get_learn_p(i)>=0; i++)
04670 FLOATWRITE(file, model_derivative_p(model->get_learn_p(i),dim));
04671
04672 for (i=0; model->get_learn_q(i)>=0; i++)
04673 FLOATWRITE(file, model_derivative_q(model->get_learn_q(i),dim));
04674
04675
04676 for (q=0; model->get_learn_a(q,1)>=0; q++)
04677 {
04678 i=model->get_learn_a(q,0) ;
04679 j=model->get_learn_a(q,1) ;
04680 FLOATWRITE(file, model_derivative_a(i,j,dim));
04681 } ;
04682
04683 for (q=0; model->get_learn_b(q,0)>=0; q++)
04684 {
04685 i=model->get_learn_b(q,0) ;
04686 j=model->get_learn_b(q,1) ;
04687 FLOATWRITE(file, model_derivative_b(i,j,dim));
04688 } ;
04689 if (dim==0)
04690 SG_INFO("Number of parameters (including posterior prob.): %i\n", num_floats) ;
04691 } ;
04692 } ;
04693 }
04694 save_model_bin(file) ;
04695
04696 #ifdef USE_HMMPARALLEL
04697 delete[] threads ;
04698 delete[] params ;
04699 #endif
04700
04701 result=true;
04702 SG_PRINT( "\n") ;
04703 return result;
04704 }
04705
04706 bool CHMM::save_model_derivatives(FILE* file)
04707 {
04708 bool result=false;
04709 int32_t dim,i,j;
04710
04711 if (file)
04712 {
04713
04714 fprintf(file,"%% lambda denotes the model\n%% O denotes the observation sequence\n%% Q denotes the path\n%%\n%% calculating derivatives of P[O|lambda]=sum_{all Q}p_{Q1}b_{Q1}(O_1}*a_{Q1}{Q2}b_{Q2}(O2)*...*q_{T-1}{T}b_{QT}(O_T}q_{Q_T} against p,q,a,b\n%%\n");
04715 fprintf(file,"%% dPr[...]=[ [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_NN]\n");
04716 fprintf(file,"%% [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_NN]\n");
04717 fprintf(file,"%% ............................. \n");
04718 fprintf(file,"%% [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_MM]\n");
04719 fprintf(file,"%% ];\n%%\n\nlog(dPr) = [\n");
04720
04721
04722 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
04723 {
04724 fprintf(file, "[ ");
04725
04726
04727 for (i=0; i<N; i++)
04728 fprintf(file,"%e, ", (double) model_derivative_p(i, dim) );
04729
04730 for (i=0; i<N; i++)
04731 fprintf(file,"%e, ", (double) model_derivative_q(i, dim) );
04732
04733
04734 for (i=0; i<N; i++)
04735 for (j=0; j<N; j++)
04736 fprintf(file, "%e,", (double) model_derivative_a(i,j,dim) );
04737
04738 for (i=0; i<N; i++)
04739 for (j=0; j<M; j++)
04740 fprintf(file, "%e,", (double) model_derivative_b(i,j,dim) );
04741
04742 fseek(file,ftell(file)-1,SEEK_SET);
04743 fprintf(file, " ];\n");
04744 }
04745
04746
04747 fprintf(file, "];");
04748
04749 result=true;
04750 }
04751 return result;
04752 }
04753
04754 bool CHMM::check_model_derivatives_combined()
04755 {
04756
04757 const float64_t delta=5e-4 ;
04758
04759 int32_t i ;
04760
04761
04762
04763
04764
04765
04766
04767
04768
04769
04770
04771
04772
04773
04774
04775
04776
04777
04778
04779
04780
04781
04782
04783
04784
04785
04786
04787
04788
04789
04790
04791 i=0;
04792 {
04793 for (int32_t j=0; j<M; j++)
04794 {
04795 float64_t old_b=get_b(i,j) ;
04796
04797 set_b(i,j, log(exp(old_b)-delta)) ;
04798 invalidate_model() ;
04799 float64_t prob_old=(model_probability(-1)*p_observations->get_num_vectors()) ;
04800
04801 set_b(i,j, log(exp(old_b)+delta)) ;
04802 invalidate_model() ;
04803 float64_t prob_new=(model_probability(-1)*p_observations->get_num_vectors());
04804
04805 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04806
04807 set_b(i,j, old_b) ;
04808 invalidate_model() ;
04809
04810 float64_t deriv_calc=0 ;
04811 for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
04812 {
04813 deriv_calc+=exp(model_derivative_b(i, j, dim)-model_probability(dim)) ;
04814 if (j==1)
04815 SG_INFO("deriv_calc[%i]=%e\n",dim,deriv_calc) ;
04816 } ;
04817
04818 SG_ERROR( "b(%i,%i)=%e db(%i,%i) = %e:%e\t (%1.5f%%)\n", i,j,exp(old_b),i,j, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
04819 } ;
04820 } ;
04821 return true ;
04822 }
04823
04824 bool CHMM::check_model_derivatives()
04825 {
04826 bool result=false;
04827 const float64_t delta=3e-4 ;
04828
04829 for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
04830 {
04831 int32_t i ;
04832
04833 for (i=0; i<N; i++)
04834 {
04835 for (int32_t j=0; j<N; j++)
04836 {
04837 float64_t old_a=get_a(i,j) ;
04838
04839 set_a(i,j, log(exp(old_a)-delta)) ;
04840 invalidate_model() ;
04841 float64_t prob_old=exp(model_probability(dim)) ;
04842
04843 set_a(i,j, log(exp(old_a)+delta)) ;
04844 invalidate_model() ;
04845 float64_t prob_new=exp(model_probability(dim));
04846
04847 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04848
04849 set_a(i,j, old_a) ;
04850 invalidate_model() ;
04851 float64_t deriv_calc=exp(model_derivative_a(i, j, dim)) ;
04852
04853 SG_DEBUG( "da(%i,%i) = %e:%e\t (%1.5f%%)\n", i,j, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
04854 invalidate_model() ;
04855 } ;
04856 } ;
04857 for (i=0; i<N; i++)
04858 {
04859 for (int32_t j=0; j<M; j++)
04860 {
04861 float64_t old_b=get_b(i,j) ;
04862
04863 set_b(i,j, log(exp(old_b)-delta)) ;
04864 invalidate_model() ;
04865 float64_t prob_old=exp(model_probability(dim)) ;
04866
04867 set_b(i,j, log(exp(old_b)+delta)) ;
04868 invalidate_model() ;
04869 float64_t prob_new=exp(model_probability(dim));
04870
04871 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04872
04873 set_b(i,j, old_b) ;
04874 invalidate_model() ;
04875 float64_t deriv_calc=exp(model_derivative_b(i, j, dim));
04876
04877 SG_DEBUG( "db(%i,%i) = %e:%e\t (%1.5f%%)\n", i,j, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/(deriv_calc));
04878 } ;
04879 } ;
04880
04881 #ifdef TEST
04882 for (i=0; i<N; i++)
04883 {
04884 float64_t old_p=get_p(i) ;
04885
04886 set_p(i, log(exp(old_p)-delta)) ;
04887 invalidate_model() ;
04888 float64_t prob_old=exp(model_probability(dim)) ;
04889
04890 set_p(i, log(exp(old_p)+delta)) ;
04891 invalidate_model() ;
04892 float64_t prob_new=exp(model_probability(dim));
04893 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04894
04895 set_p(i, old_p) ;
04896 invalidate_model() ;
04897 float64_t deriv_calc=exp(model_derivative_p(i, dim));
04898
04899
04900 SG_DEBUG( "dp(%i) = %e:%e\t (%1.5f%%)\n", i, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
04901 } ;
04902 for (i=0; i<N; i++)
04903 {
04904 float64_t old_q=get_q(i) ;
04905
04906 set_q(i, log(exp(old_q)-delta)) ;
04907 invalidate_model() ;
04908 float64_t prob_old=exp(model_probability(dim)) ;
04909
04910 set_q(i, log(exp(old_q)+delta)) ;
04911 invalidate_model() ;
04912 float64_t prob_new=exp(model_probability(dim));
04913
04914 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04915
04916 set_q(i, old_q) ;
04917 invalidate_model() ;
04918 float64_t deriv_calc=exp(model_derivative_q(i, dim));
04919
04920
04921 SG_DEBUG( "dq(%i) = %e:%e\t (%1.5f%%)\n", i, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
04922 } ;
04923 #endif
04924 }
04925 return result;
04926 }
04927
04928 #ifdef USE_HMMDEBUG
04929 bool CHMM::check_path_derivatives()
04930 {
04931 bool result=false;
04932 const float64_t delta=1e-4 ;
04933
04934 for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
04935 {
04936 int32_t i ;
04937
04938 for (i=0; i<N; i++)
04939 {
04940 for (int32_t j=0; j<N; j++)
04941 {
04942 float64_t old_a=get_a(i,j) ;
04943
04944 set_a(i,j, log(exp(old_a)-delta)) ;
04945 invalidate_model() ;
04946 float64_t prob_old=best_path(dim) ;
04947
04948 set_a(i,j, log(exp(old_a)+delta)) ;
04949 invalidate_model() ;
04950 float64_t prob_new=best_path(dim);
04951
04952 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04953
04954 set_a(i,j, old_a) ;
04955 invalidate_model() ;
04956 float64_t deriv_calc=path_derivative_a(i, j, dim) ;
04957
04958 SG_DEBUG( "da(%i,%i) = %e:%e\t (%1.5f%%)\n", i,j, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
04959 } ;
04960 } ;
04961 for (i=0; i<N; i++)
04962 {
04963 for (int32_t j=0; j<M; j++)
04964 {
04965 float64_t old_b=get_b(i,j) ;
04966
04967 set_b(i,j, log(exp(old_b)-delta)) ;
04968 invalidate_model() ;
04969 float64_t prob_old=best_path(dim) ;
04970
04971 set_b(i,j, log(exp(old_b)+delta)) ;
04972 invalidate_model() ;
04973 float64_t prob_new=best_path(dim);
04974
04975 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04976
04977 set_b(i,j, old_b) ;
04978 invalidate_model() ;
04979 float64_t deriv_calc=path_derivative_b(i, j, dim);
04980
04981 SG_DEBUG( "db(%i,%i) = %e:%e\t (%1.5f%%)\n", i,j, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/(deriv_calc));
04982 } ;
04983 } ;
04984
04985 for (i=0; i<N; i++)
04986 {
04987 float64_t old_p=get_p(i) ;
04988
04989 set_p(i, log(exp(old_p)-delta)) ;
04990 invalidate_model() ;
04991 float64_t prob_old=best_path(dim) ;
04992
04993 set_p(i, log(exp(old_p)+delta)) ;
04994 invalidate_model() ;
04995 float64_t prob_new=best_path(dim);
04996 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04997
04998 set_p(i, old_p) ;
04999 invalidate_model() ;
05000 float64_t deriv_calc=path_derivative_p(i, dim);
05001
05002
05003 SG_DEBUG( "dp(%i) = %e:%e\t (%1.5f%%)\n", i, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
05004 } ;
05005 for (i=0; i<N; i++)
05006 {
05007 float64_t old_q=get_q(i) ;
05008
05009 set_q(i, log(exp(old_q)-delta)) ;
05010 invalidate_model() ;
05011 float64_t prob_old=best_path(dim) ;
05012
05013 set_q(i, log(exp(old_q)+delta)) ;
05014 invalidate_model() ;
05015 float64_t prob_new=best_path(dim);
05016
05017 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
05018
05019 set_q(i, old_q) ;
05020 invalidate_model() ;
05021 float64_t deriv_calc=path_derivative_q(i, dim);
05022
05023
05024 SG_DEBUG( "dq(%i) = %e:%e\t (%1.5f%%)\n", i, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
05025 } ;
05026 }
05027 return result;
05028 }
05029 #endif // USE_HMMDEBUG
05030
05031
05032 void CHMM::normalize(bool keep_dead_states)
05033 {
05034 int32_t i,j;
05035 const float64_t INF=-1e10;
05036 float64_t sum_p =INF;
05037
05038 for (i=0; i<N; i++)
05039 {
05040 sum_p=CMath::logarithmic_sum(sum_p, get_p(i));
05041
05042 float64_t sum_b =INF;
05043 float64_t sum_a =get_q(i);
05044
05045 for (j=0; j<N; j++)
05046 sum_a=CMath::logarithmic_sum(sum_a, get_a(i,j));
05047
05048 if (sum_a>CMath::ALMOST_NEG_INFTY/N || (!keep_dead_states) )
05049 {
05050 for (j=0; j<N; j++)
05051 set_a(i,j, get_a(i,j)-sum_a);
05052 set_q(i, get_q(i)-sum_a);
05053 }
05054
05055 for (j=0; j<M; j++)
05056 sum_b=CMath::logarithmic_sum(sum_b, get_b(i,j));
05057 for (j=0; j<M; j++)
05058 set_b(i,j, get_b(i,j)-sum_b);
05059 }
05060
05061 for (i=0; i<N; i++)
05062 set_p(i, get_p(i)-sum_p);
05063
05064 invalidate_model();
05065 }
05066
05067 bool CHMM::append_model(CHMM* app_model)
05068 {
05069 bool result=false;
05070 const int32_t num_states=app_model->get_N();
05071 int32_t i,j;
05072
05073 SG_DEBUG( "cur N:%d M:%d\n", N, M);
05074 SG_DEBUG( "old N:%d M:%d\n", app_model->get_N(), app_model->get_M());
05075 if (app_model->get_M() == get_M())
05076 {
05077 float64_t* n_p=new float64_t[N+num_states];
05078 float64_t* n_q=new float64_t[N+num_states];
05079 float64_t* n_a=new float64_t[(N+num_states)*(N+num_states)];
05080
05081 float64_t* n_b=new float64_t[(N+num_states)*M];
05082
05083
05084 for (i=0; i<N+num_states; i++)
05085 {
05086 n_p[i]=-CMath::INFTY;
05087 n_q[i]=-CMath::INFTY;
05088
05089 for (j=0; j<N+num_states; j++)
05090 n_a[(N+num_states)*i+j]=-CMath::INFTY;
05091
05092 for (j=0; j<M; j++)
05093 n_b[M*i+j]=-CMath::INFTY;
05094 }
05095
05096
05097
05098
05099
05100
05101 for (i=0; i<N; i++)
05102 {
05103 n_p[i]=get_p(i);
05104
05105 for (j=0; j<N; j++)
05106 n_a[(N+num_states)*j+i]=get_a(i,j);
05107
05108 for (j=0; j<M; j++)
05109 {
05110 n_b[M*i+j]=get_b(i,j);
05111 }
05112 }
05113
05114
05115 for (i=0; i<app_model->get_N(); i++)
05116 {
05117 n_q[i+N]=app_model->get_q(i);
05118
05119 for (j=0; j<app_model->get_N(); j++)
05120 n_a[(N+num_states)*(j+N)+(i+N)]=app_model->get_a(i,j);
05121 for (j=0; j<app_model->get_M(); j++)
05122 n_b[M*(i+N)+j]=app_model->get_b(i,j);
05123 }
05124
05125
05126
05127 for (i=0; i<N; i++)
05128 {
05129 for (j=N; j<N+num_states; j++)
05130 n_a[(N+num_states)*j + i]=CMath::logarithmic_sum(get_q(i)+app_model->get_p(j-N), n_a[(N+num_states)*j + i]);
05131 }
05132
05133 free_state_dependend_arrays();
05134 N+=num_states;
05135
05136 alloc_state_dependend_arrays();
05137
05138
05139 delete[] initial_state_distribution_p;
05140 delete[] end_state_distribution_q;
05141 delete[] transition_matrix_a;
05142 delete[] observation_matrix_b;
05143
05144 transition_matrix_a=n_a;
05145 observation_matrix_b=n_b;
05146 initial_state_distribution_p=n_p;
05147 end_state_distribution_q=n_q;
05148
05149 SG_WARNING( "not normalizing anymore, call normalize_hmm to make sure the hmm is valid!!\n");
05151 invalidate_model();
05152 }
05153 else
05154 SG_ERROR( "number of observations is different for append model, doing nothing!\n");
05155
05156 return result;
05157 }
05158
05159 bool CHMM::append_model(CHMM* app_model, float64_t* cur_out, float64_t* app_out)
05160 {
05161 bool result=false;
05162 const int32_t num_states=app_model->get_N()+2;
05163 int32_t i,j;
05164
05165 if (app_model->get_M() == get_M())
05166 {
05167 float64_t* n_p=new float64_t[N+num_states];
05168 float64_t* n_q=new float64_t[N+num_states];
05169 float64_t* n_a=new float64_t[(N+num_states)*(N+num_states)];
05170
05171 float64_t* n_b=new float64_t[(N+num_states)*M];
05172
05173
05174 for (i=0; i<N+num_states; i++)
05175 {
05176 n_p[i]=-CMath::INFTY;
05177 n_q[i]=-CMath::INFTY;
05178
05179 for (j=0; j<N+num_states; j++)
05180 n_a[(N+num_states)*j+i]=-CMath::INFTY;
05181
05182 for (j=0; j<M; j++)
05183 n_b[M*i+j]=-CMath::INFTY;
05184 }
05185
05186
05187
05188
05189
05190
05191 for (i=0; i<N; i++)
05192 {
05193 n_p[i]=get_p(i);
05194
05195 for (j=0; j<N; j++)
05196 n_a[(N+num_states)*j+i]=get_a(i,j);
05197
05198 for (j=0; j<M; j++)
05199 {
05200 n_b[M*i+j]=get_b(i,j);
05201 }
05202 }
05203
05204
05205 for (i=0; i<app_model->get_N(); i++)
05206 {
05207 n_q[i+N+2]=app_model->get_q(i);
05208
05209 for (j=0; j<app_model->get_N(); j++)
05210 n_a[(N+num_states)*(j+N+2)+(i+N+2)]=app_model->get_a(i,j);
05211 for (j=0; j<app_model->get_M(); j++)
05212 n_b[M*(i+N+2)+j]=app_model->get_b(i,j);
05213 }
05214
05215
05216
05217
05218 for (i=0; i<M; i++)
05219 {
05220 n_b[M*N+i]=cur_out[i];
05221 n_b[M*(N+1)+i]=app_out[i];
05222 }
05223
05224
05225 for (i=0; i<N+num_states; i++)
05226 {
05227
05228 if (i==N+1)
05229 n_a[(N+num_states)*i + N]=0;
05230
05231
05232
05233 if (i<N)
05234 n_a[(N+num_states)*N+i]=get_q(i);
05235
05236
05237
05238 if (i>=N+2)
05239 n_a[(N+num_states)*i+(N+1)]=app_model->get_p(i-(N+2));
05240 }
05241
05242 free_state_dependend_arrays();
05243 N+=num_states;
05244
05245 alloc_state_dependend_arrays();
05246
05247
05248 delete[] initial_state_distribution_p;
05249 delete[] end_state_distribution_q;
05250 delete[] transition_matrix_a;
05251 delete[] observation_matrix_b;
05252
05253 transition_matrix_a=n_a;
05254 observation_matrix_b=n_b;
05255 initial_state_distribution_p=n_p;
05256 end_state_distribution_q=n_q;
05257
05258 SG_WARNING( "not normalizing anymore, call normalize_hmm to make sure the hmm is valid!!\n");
05260 invalidate_model();
05261 }
05262
05263 return result;
05264 }
05265
05266
05267 void CHMM::add_states(int32_t num_states, float64_t default_value)
05268 {
05269 int32_t i,j;
05270 const float64_t MIN_RAND=1e-2;
05271 const float64_t MAX_RAND=2e-1;
05272
05273 float64_t* n_p=new float64_t[N+num_states];
05274 float64_t* n_q=new float64_t[N+num_states];
05275 float64_t* n_a=new float64_t[(N+num_states)*(N+num_states)];
05276
05277 float64_t* n_b=new float64_t[(N+num_states)*M];
05278
05279
05280
05281 for (i=0; i<N; i++)
05282 {
05283 n_p[i]=get_p(i);
05284 n_q[i]=get_q(i);
05285
05286 for (j=0; j<N; j++)
05287 n_a[(N+num_states)*j+i]=get_a(i,j);
05288
05289 for (j=0; j<M; j++)
05290 n_b[M*i+j]=get_b(i,j);
05291 }
05292
05293 for (i=N; i<N+num_states; i++)
05294 {
05295 n_p[i]=VAL_MACRO;
05296 n_q[i]=VAL_MACRO;
05297
05298 for (j=0; j<N; j++)
05299 n_a[(N+num_states)*i+j]=VAL_MACRO;
05300
05301 for (j=0; j<N+num_states; j++)
05302 n_a[(N+num_states)*j+i]=VAL_MACRO;
05303
05304 for (j=0; j<M; j++)
05305 n_b[M*i+j]=VAL_MACRO;
05306 }
05307 free_state_dependend_arrays();
05308 N+=num_states;
05309
05310 alloc_state_dependend_arrays();
05311
05312
05313 delete[] initial_state_distribution_p;
05314 delete[] end_state_distribution_q;
05315 delete[] transition_matrix_a;
05316 delete[] observation_matrix_b;
05317
05318 transition_matrix_a=n_a;
05319 observation_matrix_b=n_b;
05320 initial_state_distribution_p=n_p;
05321 end_state_distribution_q=n_q;
05322
05323 invalidate_model();
05324 normalize();
05325 }
05326
05327 void CHMM::chop(float64_t value)
05328 {
05329 for (int32_t i=0; i<N; i++)
05330 {
05331 int32_t j;
05332
05333 if (exp(get_p(i)) < value)
05334 set_p(i, CMath::ALMOST_NEG_INFTY);
05335
05336 if (exp(get_q(i)) < value)
05337 set_q(i, CMath::ALMOST_NEG_INFTY);
05338
05339 for (j=0; j<N; j++)
05340 {
05341 if (exp(get_a(i,j)) < value)
05342 set_a(i,j, CMath::ALMOST_NEG_INFTY);
05343 }
05344
05345 for (j=0; j<M; j++)
05346 {
05347 if (exp(get_b(i,j)) < value)
05348 set_b(i,j, CMath::ALMOST_NEG_INFTY);
05349 }
05350 }
05351 normalize();
05352 invalidate_model();
05353 }
05354
05355 bool CHMM::linear_train(bool right_align)
05356 {
05357 if (p_observations)
05358 {
05359 int32_t histsize=(get_M()*get_N());
05360 int32_t* hist=new int32_t[histsize];
05361 int32_t* startendhist=new int32_t[get_N()];
05362 int32_t i,dim;
05363
05364 ASSERT(p_observations->get_max_vector_length()<=get_N());
05365
05366 for (i=0; i<histsize; i++)
05367 hist[i]=0;
05368
05369 for (i=0; i<get_N(); i++)
05370 startendhist[i]=0;
05371
05372 if (right_align)
05373 {
05374 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
05375 {
05376 int32_t len=0;
05377 uint16_t* obs=p_observations->get_feature_vector(dim, len);
05378
05379 ASSERT(len<=get_N());
05380 startendhist[(get_N()-len)]++;
05381
05382 for (i=0;i<len;i++)
05383 hist[(get_N()-len+i)*get_M() + *obs++]++;
05384 }
05385
05386 set_q(get_N()-1, 1);
05387 for (i=0; i<get_N()-1; i++)
05388 set_q(i, 0);
05389
05390 for (i=0; i<get_N(); i++)
05391 set_p(i, startendhist[i]+PSEUDO);
05392
05393 for (i=0;i<get_N();i++)
05394 {
05395 for (int32_t j=0; j<get_N(); j++)
05396 {
05397 if (i==j-1)
05398 set_a(i,j, 1);
05399 else
05400 set_a(i,j, 0);
05401 }
05402 }
05403 }
05404 else
05405 {
05406 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
05407 {
05408 int32_t len=0;
05409 uint16_t* obs=p_observations->get_feature_vector(dim, len);
05410
05411 ASSERT(len<=get_N());
05412 for (i=0;i<len;i++)
05413 hist[i*get_M() + *obs++]++;
05414
05415 startendhist[len-1]++;
05416 }
05417
05418 set_p(0, 1);
05419 for (i=1; i<get_N(); i++)
05420 set_p(i, 0);
05421
05422 for (i=0; i<get_N(); i++)
05423 set_q(i, startendhist[i]+PSEUDO);
05424
05425 int32_t total=p_observations->get_num_vectors();
05426
05427 for (i=0;i<get_N();i++)
05428 {
05429 total-= startendhist[i] ;
05430
05431 for (int32_t j=0; j<get_N(); j++)
05432 {
05433 if (i==j-1)
05434 set_a(i,j, total+PSEUDO);
05435 else
05436 set_a(i,j, 0);
05437 }
05438 }
05439 ASSERT(total==0);
05440 }
05441
05442 for (i=0;i<get_N();i++)
05443 {
05444 for (int32_t j=0; j<get_M(); j++)
05445 {
05446 float64_t sum=0;
05447 int32_t offs=i*get_M()+ p_observations->get_masked_symbols((uint16_t) j, (uint8_t) 254);
05448
05449 for (int32_t k=0; k<p_observations->get_original_num_symbols(); k++)
05450 sum+=hist[offs+k];
05451
05452 set_b(i,j, (PSEUDO+hist[i*get_M()+j])/(sum+PSEUDO*p_observations->get_original_num_symbols()));
05453 }
05454 }
05455
05456 delete[] hist;
05457 delete[] startendhist;
05458 convert_to_log();
05459 invalidate_model();
05460 return true;
05461 }
05462 else
05463 return false;
05464 }
05465
05466 void CHMM::set_observation_nocache(CStringFeatures<uint16_t>* obs)
05467 {
05468 ASSERT(obs);
05469 p_observations=obs;
05470 SG_REF(obs);
05471
05472 if (obs)
05473 if (obs->get_num_symbols() > M)
05474 SG_ERROR( "number of symbols in observation (%d) larger than M (%d)\n", obs->get_num_symbols(), M);
05475
05476 if (!reused_caches)
05477 {
05478 #ifdef USE_HMMPARALLEL_STRUCTURES
05479 for (int32_t i=0; i<parallel.get_num_threads(); i++)
05480 {
05481 delete[] alpha_cache[i].table;
05482 delete[] beta_cache[i].table;
05483 delete[] states_per_observation_psi[i];
05484 delete[] path[i];
05485
05486 alpha_cache[i].table=NULL;
05487 beta_cache[i].table=NULL;
05488 #ifndef NOVIT
05489 states_per_observation_psi[i]=NULL;
05490 #endif // NOVIT
05491 path[i]=NULL;
05492 } ;
05493 #else
05494 delete[] alpha_cache.table;
05495 delete[] beta_cache.table;
05496 delete[] states_per_observation_psi;
05497 delete[] path;
05498
05499 alpha_cache.table=NULL;
05500 beta_cache.table=NULL;
05501 states_per_observation_psi=NULL;
05502 path=NULL;
05503
05504 #endif //USE_HMMPARALLEL_STRUCTURES
05505 }
05506
05507 invalidate_model();
05508 }
05509
05510 void CHMM::set_observations(CStringFeatures<uint16_t>* obs, CHMM* lambda)
05511 {
05512 ASSERT(obs);
05513 p_observations=obs;
05514 SG_REF(obs);
05515
05516
05517
05518 features=obs;
05519
05520 SG_DEBUG("num symbols alphabet: %ld\n", obs->get_alphabet()->get_num_symbols());
05521 SG_DEBUG("num symbols: %ld\n", obs->get_num_symbols());
05522 SG_DEBUG("M: %d\n", M);
05523
05524 if (obs)
05525 {
05526 if (obs->get_num_symbols() > M)
05527 {
05528 SG_ERROR( "number of symbols in observation (%d) larger than M (%d)\n", obs->get_num_symbols(), M);
05529 }
05530 }
05531
05532 if (!reused_caches)
05533 {
05534 #ifdef USE_HMMPARALLEL_STRUCTURES
05535 for (int32_t i=0; i<parallel.get_num_threads(); i++)
05536 {
05537 delete[] alpha_cache[i].table;
05538 delete[] beta_cache[i].table;
05539 #ifndef NOVIT
05540 delete[] states_per_observation_psi[i];
05541 #endif // NOVIT
05542 delete[] path[i];
05543
05544 alpha_cache[i].table=NULL;
05545 beta_cache[i].table=NULL;
05546 #ifndef NOVIT
05547 states_per_observation_psi[i]=NULL;
05548 #endif // NOVIT
05549 path[i]=NULL;
05550 } ;
05551 #else
05552 delete[] alpha_cache.table;
05553 delete[] beta_cache.table;
05554 delete[] states_per_observation_psi;
05555 delete[] path;
05556
05557 alpha_cache.table=NULL;
05558 beta_cache.table=NULL;
05559 states_per_observation_psi=NULL;
05560 path=NULL;
05561
05562 #endif //USE_HMMPARALLEL_STRUCTURES
05563 }
05564
05565 if (obs!=NULL)
05566 {
05567 int32_t max_T=obs->get_max_vector_length();
05568
05569 if (lambda)
05570 {
05571 #ifdef USE_HMMPARALLEL_STRUCTURES
05572 for (int32_t i=0; i<parallel.get_num_threads(); i++)
05573 {
05574 this->alpha_cache[i].table= lambda->alpha_cache[i].table;
05575 this->beta_cache[i].table= lambda->beta_cache[i].table;
05576 this->states_per_observation_psi[i]=lambda->states_per_observation_psi[i] ;
05577 this->path[i]=lambda->path[i];
05578 } ;
05579 #else
05580 this->alpha_cache.table= lambda->alpha_cache.table;
05581 this->beta_cache.table= lambda->beta_cache.table;
05582 this->states_per_observation_psi= lambda->states_per_observation_psi;
05583 this->path=lambda->path;
05584 #endif //USE_HMMPARALLEL_STRUCTURES
05585
05586 this->reused_caches=true;
05587 }
05588 else
05589 {
05590 this->reused_caches=false;
05591 #ifdef USE_HMMPARALLEL_STRUCTURES
05592 SG_INFO( "allocating mem for path-table of size %.2f Megabytes (%d*%d) each:\n", ((float32_t)max_T)*N*sizeof(T_STATES)/(1024*1024), max_T, N);
05593 for (int32_t i=0; i<parallel.get_num_threads(); i++)
05594 {
05595 if ((states_per_observation_psi[i]=new T_STATES[max_T*N])!=NULL)
05596 SG_DEBUG( "path_table[%i] successfully allocated\n",i) ;
05597 else
05598 SG_ERROR( "failed allocating memory for path_table[%i].\n",i) ;
05599 path[i]=new T_STATES[max_T];
05600 }
05601 #else // no USE_HMMPARALLEL_STRUCTURES
05602 SG_INFO( "allocating mem of size %.2f Megabytes (%d*%d) for path-table ....", ((float32_t)max_T)*N*sizeof(T_STATES)/(1024*1024), max_T, N);
05603 if ((states_per_observation_psi=new T_STATES[max_T*N]) != NULL)
05604 SG_DONE();
05605 else
05606 SG_ERROR( "failed.\n") ;
05607
05608 path=new T_STATES[max_T];
05609 #endif // USE_HMMPARALLEL_STRUCTURES
05610 #ifdef USE_HMMCACHE
05611 SG_INFO( "allocating mem for caches each of size %.2f Megabytes (%d*%d) ....\n", ((float32_t)max_T)*N*sizeof(T_ALPHA_BETA_TABLE)/(1024*1024), max_T, N);
05612
05613 #ifdef USE_HMMPARALLEL_STRUCTURES
05614 for (int32_t i=0; i<parallel.get_num_threads(); i++)
05615 {
05616 if ((alpha_cache[i].table=new T_ALPHA_BETA_TABLE[max_T*N])!=NULL)
05617 SG_DEBUG( "alpha_cache[%i].table successfully allocated\n",i) ;
05618 else
05619 SG_ERROR("allocation of alpha_cache[%i].table failed\n",i) ;
05620
05621 if ((beta_cache[i].table=new T_ALPHA_BETA_TABLE[max_T*N]) != NULL)
05622 SG_DEBUG("beta_cache[%i].table successfully allocated\n",i) ;
05623 else
05624 SG_ERROR("allocation of beta_cache[%i].table failed\n",i) ;
05625 } ;
05626 #else // USE_HMMPARALLEL_STRUCTURES
05627 if ((alpha_cache.table=new T_ALPHA_BETA_TABLE[max_T*N]) != NULL)
05628 SG_DEBUG( "alpha_cache.table successfully allocated\n") ;
05629 else
05630 SG_ERROR( "allocation of alpha_cache.table failed\n") ;
05631
05632 if ((beta_cache.table=new T_ALPHA_BETA_TABLE[max_T*N]) != NULL)
05633 SG_DEBUG( "beta_cache.table successfully allocated\n") ;
05634 else
05635 SG_ERROR( "allocation of beta_cache.table failed\n") ;
05636
05637 #endif // USE_HMMPARALLEL_STRUCTURES
05638 #else // USE_HMMCACHE
05639 #ifdef USE_HMMPARALLEL_STRUCTURES
05640 for (int32_t i=0; i<parallel.get_num_threads(); i++)
05641 {
05642 alpha_cache[i].table=NULL ;
05643 beta_cache[i].table=NULL ;
05644 } ;
05645 #else //USE_HMMPARALLEL_STRUCTURES
05646 alpha_cache.table=NULL ;
05647 beta_cache.table=NULL ;
05648 #endif //USE_HMMPARALLEL_STRUCTURES
05649 #endif //USE_HMMCACHE
05650 }
05651 }
05652
05653
05654 invalidate_model();
05655 }
05656
05657 bool CHMM::permutation_entropy(int32_t window_width, int32_t sequence_number)
05658 {
05659 if (p_observations && window_width>0 &&
05660 ( sequence_number<0 || sequence_number < p_observations->get_num_vectors()))
05661 {
05662 int32_t min_sequence=sequence_number;
05663 int32_t max_sequence=sequence_number;
05664
05665 if (sequence_number<0)
05666 {
05667 min_sequence=0;
05668 max_sequence=p_observations->get_num_vectors();
05669 SG_INFO( "numseq: %d\n", max_sequence);
05670 }
05671
05672 SG_INFO( "min_sequence: %d max_sequence: %d\n", min_sequence, max_sequence);
05673 for (sequence_number=min_sequence; sequence_number<max_sequence; sequence_number++)
05674 {
05675 int32_t sequence_length=0;
05676 uint16_t* obs=p_observations->get_feature_vector(sequence_number, sequence_length);
05677
05678 int32_t histsize=get_M();
05679 int64_t* hist=new int64_t[histsize];
05680 int32_t i,j;
05681
05682 for (i=0; i<sequence_length-window_width; i++)
05683 {
05684 for (j=0; j<histsize; j++)
05685 hist[j]=0;
05686
05687 uint16_t* ptr=&obs[i];
05688 for (j=0; j<window_width; j++)
05689 {
05690 hist[*ptr++]++;
05691 }
05692
05693 float64_t perm_entropy=0;
05694 for (j=0; j<get_M(); j++)
05695 {
05696 float64_t p=
05697 (((float64_t) hist[j])+PSEUDO)/
05698 (window_width+get_M()*PSEUDO);
05699 perm_entropy+=p*log(p);
05700 }
05701
05702 SG_PRINT( "%f\n", perm_entropy);
05703 }
05704
05705 delete[] hist;
05706 }
05707 return true;
05708 }
05709 else
05710 return false;
05711 }
05712
05713 float64_t CHMM::get_log_derivative(int32_t num_param, int32_t num_example)
05714 {
05715 if (num_param<N)
05716 return model_derivative_p(num_param, num_example);
05717 else if (num_param<2*N)
05718 return model_derivative_q(num_param-N, num_example);
05719 else if (num_param<N*(N+2))
05720 {
05721 int32_t k=num_param-2*N;
05722 int32_t i=k/N;
05723 int32_t j=k%N;
05724 return model_derivative_a(i,j, num_example);
05725 }
05726 else if (num_param<N*(N+2+M))
05727 {
05728 int32_t k=num_param-N*(N+2);
05729 int32_t i=k/M;
05730 int32_t j=k%M;
05731 return model_derivative_b(i,j, num_example);
05732 }
05733
05734 ASSERT(false);
05735 return -1;
05736 }
05737
05738 float64_t CHMM::get_log_model_parameter(int32_t num_param)
05739 {
05740 if (num_param<N)
05741 return get_p(num_param);
05742 else if (num_param<2*N)
05743 return get_q(num_param-N);
05744 else if (num_param<N*(N+2))
05745 return transition_matrix_a[num_param-2*N];
05746 else if (num_param<N*(N+2+M))
05747 return observation_matrix_b[num_param-N*(N+2)];
05748
05749 ASSERT(false);
05750 return -1;
05751 }
05752
05753
05754
05755 bool CHMM::converge(float64_t x, float64_t y)
05756 {
05757 float64_t diff=y-x;
05758 float64_t absdiff=fabs(diff);
05759
05760 SG_INFO( "\n #%03d\tbest result so far: %G (eps: %f)", iteration_count, y, diff);
05761
05762 if (iteration_count--==0 || (absdiff<epsilon && conv_it<=0))
05763 {
05764 iteration_count=iterations;
05765 SG_INFO( "...finished\n");
05766 conv_it=5;
05767 return true;
05768 }
05769 else
05770 {
05771 if (absdiff<epsilon)
05772 conv_it--;
05773 else
05774 conv_it=5;
05775
05776 return false;
05777 }
05778 }
05779
05780
05781 void CHMM::switch_model(CHMM** m1, CHMM** m2)
05782 {
05783 CHMM* dummy=*m1;
05784
05785 *m1=*m2;
05786 *m2=dummy;
05787 }
05788
05789 bool CHMM::baum_welch_viterbi_train(BaumWelchViterbiType type)
05790 {
05791 CHMM* estimate=new CHMM(this);
05792 CHMM* working=this;
05793 float64_t prob_max=-CMath::INFTY;
05794 float64_t prob=-CMath::INFTY;
05795 float64_t prob_train=CMath::ALMOST_NEG_INFTY;
05796 iteration_count=iterations;
05797
05798 while (!converge(prob, prob_train))
05799 {
05800 switch_model(&working, &estimate);
05801 prob=prob_train;
05802
05803
05804
05805
05806 switch (type) {
05807 case BW_NORMAL:
05808 estimate_model_baum_welch(estimate); break;
05809 case BW_TRANS:
05810 estimate_model_baum_welch_trans(estimate); break;
05811 case BW_DEFINED:
05812 estimate_model_baum_welch_defined(estimate); break;
05813 case VIT_NORMAL:
05814 estimate_model_viterbi(estimate); break;
05815 case VIT_DEFINED:
05816 estimate_model_viterbi_defined(estimate); break;
05817 }
05818 prob_train=estimate->model_probability();
05819 if (prob_max<prob_train)
05820 prob_max=prob_train;
05821 }
05822
05823 delete estimate;
05824 estimate=NULL;
05825
05826 return true;
05827 }
05828