00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00015
00016 #include "distributions/hmm/HMM.h"
00017 #include "lib/Mathematics.h"
00018 #include "lib/io.h"
00019 #include "lib/config.h"
00020 #include "base/Parallel.h"
00021 #include "features/StringFeatures.h"
00022 #include "features/CharFeatures.h"
00023 #include "features/Alphabet.h"
00024
00025 #include <stdlib.h>
00026 #include <stdio.h>
00027 #include <time.h>
00028 #include <ctype.h>
00029
00030 #define VAL_MACRO log((default_value == 0) ? (CMath::random(MIN_RAND, MAX_RAND)) : default_value)
00031 #define ARRAY_SIZE 65336
00032
00033 #ifdef SUNOS
00034 extern "C" int finite(double);
00035 #endif
00036
00038
00040
00041 const INT CHMM::GOTN= (1<<1);
00042 const INT CHMM::GOTM= (1<<2);
00043 const INT CHMM::GOTO= (1<<3);
00044 const INT CHMM::GOTa= (1<<4);
00045 const INT CHMM::GOTb= (1<<5);
00046 const INT CHMM::GOTp= (1<<6);
00047 const INT CHMM::GOTq= (1<<7);
00048
00049 const INT CHMM::GOTlearn_a= (1<<1);
00050 const INT CHMM::GOTlearn_b= (1<<2);
00051 const INT CHMM::GOTlearn_p= (1<<3);
00052 const INT CHMM::GOTlearn_q= (1<<4);
00053 const INT CHMM::GOTconst_a= (1<<5);
00054 const INT CHMM::GOTconst_b= (1<<6);
00055 const INT CHMM::GOTconst_p= (1<<7);
00056 const INT CHMM::GOTconst_q= (1<<8);
00057
00058 enum E_STATE
00059 {
00060 INITIAL,
00061 ARRAYs,
00062 GET_N,
00063 GET_M,
00064 GET_a,
00065 GET_b,
00066 GET_p,
00067 GET_q,
00068 GET_learn_a,
00069 GET_learn_b,
00070 GET_learn_p,
00071 GET_learn_q,
00072 GET_const_a,
00073 GET_const_b,
00074 GET_const_p,
00075 GET_const_q,
00076 COMMENT,
00077 END
00078 };
00079
00080
00081 #ifdef FIX_POS
00082 const CHAR CModel::FIX_DISALLOWED=0 ;
00083 const CHAR CModel::FIX_ALLOWED=1 ;
00084 const CHAR CModel::FIX_DEFAULT=-1 ;
00085 const DREAL CModel::DISALLOWED_PENALTY=CMath::ALMOST_NEG_INFTY ;
00086 #endif
00087
00088 CModel::CModel()
00089 {
00090 const_a=new int[ARRAY_SIZE];
00091 const_b=new int[ARRAY_SIZE];
00092 const_p=new int[ARRAY_SIZE];
00093 const_q=new int[ARRAY_SIZE];
00094 const_a_val=new DREAL[ARRAY_SIZE];
00095 const_b_val=new DREAL[ARRAY_SIZE];
00096 const_p_val=new DREAL[ARRAY_SIZE];
00097 const_q_val=new DREAL[ARRAY_SIZE];
00098
00099
00100 learn_a=new int[ARRAY_SIZE];
00101 learn_b=new int[ARRAY_SIZE];
00102 learn_p=new int[ARRAY_SIZE];
00103 learn_q=new int[ARRAY_SIZE];
00104
00105 #ifdef FIX_POS
00106 fix_pos_state = new char[ARRAY_SIZE];
00107 #endif
00108 for (INT i=0; i<ARRAY_SIZE; i++)
00109 {
00110 const_a[i]=-1 ;
00111 const_b[i]=-1 ;
00112 const_p[i]=-1 ;
00113 const_q[i]=-1 ;
00114 const_a_val[i]=1.0 ;
00115 const_b_val[i]=1.0 ;
00116 const_p_val[i]=1.0 ;
00117 const_q_val[i]=1.0 ;
00118 learn_a[i]=-1 ;
00119 learn_b[i]=-1 ;
00120 learn_p[i]=-1 ;
00121 learn_q[i]=-1 ;
00122 #ifdef FIX_POS
00123 fix_pos_state[i] = FIX_DEFAULT ;
00124 #endif
00125 } ;
00126 }
00127
00128 CModel::~CModel()
00129 {
00130 delete[] const_a;
00131 delete[] const_b;
00132 delete[] const_p;
00133 delete[] const_q;
00134 delete[] const_a_val;
00135 delete[] const_b_val;
00136 delete[] const_p_val;
00137 delete[] const_q_val;
00138
00139 delete[] learn_a;
00140 delete[] learn_b;
00141 delete[] learn_p;
00142 delete[] learn_q;
00143
00144 #ifdef FIX_POS
00145 delete[] fix_pos_state;
00146 #endif
00147
00148 }
00149
00150 CHMM::CHMM(CHMM* h)
00151 : CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
00152 {
00153 SG_INFO( "hmm is using %i separate tables\n", parallel.get_num_threads()) ;
00154
00155 this->N=h->get_N();
00156 this->M=h->get_M();
00157 status=initialize(NULL, h->get_pseudo());
00158 this->copy_model(h);
00159 set_observations(h->get_observations());
00160 }
00161
00162 CHMM::CHMM(INT p_N, INT p_M, CModel* p_model, DREAL p_PSEUDO)
00163 : CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
00164 {
00165 this->N=p_N;
00166 this->M=p_M;
00167 model=NULL ;
00168
00169 SG_INFO( "hmm is using %i separate tables\n", parallel.get_num_threads()) ;
00170
00171 status=initialize(p_model, p_PSEUDO);
00172 }
00173
00174 CHMM::CHMM(CStringFeatures<WORD>* obs, INT p_N, INT p_M, DREAL p_PSEUDO)
00175 : CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
00176 {
00177 this->N=p_N;
00178 this->M=p_M;
00179 model=NULL ;
00180
00181 SG_INFO( "hmm is using %i separate tables\n", parallel.get_num_threads()) ;
00182
00183 initialize(model, p_PSEUDO);
00184 set_observations(obs);
00185 }
00186
00187 CHMM::CHMM(INT p_N, double* p, double* q, double* a)
00188 : CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
00189 {
00190 this->N=p_N;
00191 this->M=0;
00192 model=NULL ;
00193
00194 trans_list_forward = NULL ;
00195 trans_list_forward_cnt = NULL ;
00196 trans_list_forward_val = NULL ;
00197 trans_list_backward = NULL ;
00198 trans_list_backward_cnt = NULL ;
00199 trans_list_len = 0 ;
00200 mem_initialized = false ;
00201
00202 this->transition_matrix_a=NULL;
00203 this->observation_matrix_b=NULL;
00204 this->initial_state_distribution_p=NULL;
00205 this->end_state_distribution_q=NULL;
00206 this->PSEUDO= PSEUDO;
00207 this->model= model;
00208 this->p_observations=NULL;
00209 this->reused_caches=false;
00210
00211 this->alpha_cache.table=NULL;
00212 this->beta_cache.table=NULL;
00213 this->alpha_cache.dimension=0;
00214 this->beta_cache.dimension=0;
00215 #ifndef NOVIT
00216 this->states_per_observation_psi=NULL ;
00217 this->path=NULL;
00218 #endif //NOVIT
00219 arrayN1=NULL ;
00220 arrayN2=NULL ;
00221
00222 this->loglikelihood=false;
00223 mem_initialized = true ;
00224
00225 transition_matrix_a=a ;
00226 observation_matrix_b=NULL ;
00227 initial_state_distribution_p=p ;
00228 end_state_distribution_q=q ;
00229 transition_matrix_A=NULL ;
00230 observation_matrix_B=NULL ;
00231
00232
00233 }
00234
00235 CHMM::CHMM(INT p_N, double* p, double* q, int num_trans, double* a_trans)
00236 : CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
00237 {
00238 model=NULL ;
00239
00240 this->N=p_N;
00241 this->M=0;
00242
00243 trans_list_forward = NULL ;
00244 trans_list_forward_cnt = NULL ;
00245 trans_list_forward_val = NULL ;
00246 trans_list_backward = NULL ;
00247 trans_list_backward_cnt = NULL ;
00248 trans_list_len = 0 ;
00249 mem_initialized = false ;
00250
00251 this->transition_matrix_a=NULL;
00252 this->observation_matrix_b=NULL;
00253 this->initial_state_distribution_p=NULL;
00254 this->end_state_distribution_q=NULL;
00255 this->PSEUDO= PSEUDO;
00256 this->model= model;
00257 this->p_observations=NULL;
00258 this->reused_caches=false;
00259
00260 this->alpha_cache.table=NULL;
00261 this->beta_cache.table=NULL;
00262 this->alpha_cache.dimension=0;
00263 this->beta_cache.dimension=0;
00264 #ifndef NOVIT
00265 this->states_per_observation_psi=NULL ;
00266 this->path=NULL;
00267 #endif //NOVIT
00268 arrayN1=NULL ;
00269 arrayN2=NULL ;
00270
00271 this->loglikelihood=false;
00272 mem_initialized = true ;
00273
00274 trans_list_forward_cnt=NULL ;
00275 trans_list_len = N ;
00276 trans_list_forward = new T_STATES*[N] ;
00277 trans_list_forward_val = new DREAL*[N] ;
00278 trans_list_forward_cnt = new T_STATES[N] ;
00279
00280 INT start_idx=0;
00281 for (INT j=0; j<N; j++)
00282 {
00283 INT old_start_idx=start_idx;
00284
00285 while (start_idx<num_trans && a_trans[start_idx+num_trans]==j)
00286 {
00287 start_idx++;
00288
00289 if (start_idx>1 && start_idx<num_trans)
00290 ASSERT(a_trans[start_idx+num_trans-1]<=a_trans[start_idx+num_trans]);
00291 }
00292
00293 if (start_idx>1 && start_idx<num_trans)
00294 ASSERT(a_trans[start_idx+num_trans-1]<=a_trans[start_idx+num_trans]);
00295
00296 INT len=start_idx-old_start_idx;
00297 ASSERT(len>=0);
00298
00299 trans_list_forward_cnt[j] = 0 ;
00300
00301 if (len>0)
00302 {
00303 trans_list_forward[j] = new T_STATES[len] ;
00304 trans_list_forward_val[j] = new DREAL[len] ;
00305 }
00306 else
00307 {
00308 trans_list_forward[j] = NULL;
00309 trans_list_forward_val[j] = NULL;
00310 }
00311 }
00312
00313 for (INT i=0; i<num_trans; i++)
00314 {
00315 INT from = (INT)a_trans[i+num_trans] ;
00316 INT to = (INT)a_trans[i] ;
00317 DREAL val = a_trans[i+num_trans*2] ;
00318
00319 ASSERT(from>=0 && from<N);
00320 ASSERT(to>=0 && to<N);
00321
00322 trans_list_forward[from][trans_list_forward_cnt[from]]=to ;
00323 trans_list_forward_val[from][trans_list_forward_cnt[from]]=val ;
00324 trans_list_forward_cnt[from]++ ;
00325
00326 } ;
00327
00328 transition_matrix_a=NULL ;
00329 observation_matrix_b=NULL ;
00330 initial_state_distribution_p=p ;
00331 end_state_distribution_q=q ;
00332 transition_matrix_A=NULL ;
00333 observation_matrix_B=NULL ;
00334
00335
00336 }
00337
00338
00339 CHMM::CHMM(FILE* model_file, DREAL p_PSEUDO)
00340 : CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
00341 {
00342 SG_INFO( "hmm is using %i separate tables\n", parallel.get_num_threads()) ;
00343
00344 status=initialize(NULL, p_PSEUDO, model_file);
00345 }
00346
00347 CHMM::~CHMM()
00348 {
00349 SG_UNREF(p_observations);
00350
00351 if (trans_list_forward_cnt)
00352 delete[] trans_list_forward_cnt ;
00353 if (trans_list_backward_cnt)
00354 delete[] trans_list_backward_cnt ;
00355 if (trans_list_forward)
00356 {
00357 for (INT i=0; i<trans_list_len; i++)
00358 if (trans_list_forward[i])
00359 delete[] trans_list_forward[i] ;
00360 delete[] trans_list_forward ;
00361 }
00362 if (trans_list_forward_val)
00363 {
00364 for (INT i=0; i<trans_list_len; i++)
00365 if (trans_list_forward_val[i])
00366 delete[] trans_list_forward_val[i] ;
00367 delete[] trans_list_forward_val ;
00368 }
00369 if (trans_list_backward)
00370 {
00371 for (INT i=0; i<trans_list_len; i++)
00372 if (trans_list_backward[i])
00373 delete[] trans_list_backward[i] ;
00374 delete[] trans_list_backward ;
00375 } ;
00376
00377 free_state_dependend_arrays();
00378
00379 if (!reused_caches)
00380 {
00381 #ifdef USE_HMMPARALLEL_STRUCTURES
00382 for (INT i=0; i<parallel.get_num_threads(); i++)
00383 {
00384 delete[] alpha_cache[i].table;
00385 delete[] beta_cache[i].table;
00386 alpha_cache[i].table=NULL;
00387 beta_cache[i].table=NULL;
00388 }
00389
00390 delete[] alpha_cache;
00391 delete[] beta_cache;
00392 alpha_cache=NULL;
00393 beta_cache=NULL;
00394 #else // USE_HMMPARALLEL_STRUCTURES
00395 delete[] alpha_cache.table;
00396 delete[] beta_cache.table;
00397 alpha_cache.table=NULL;
00398 beta_cache.table=NULL;
00399 #endif // USE_HMMPARALLEL_STRUCTURES
00400
00401 #ifndef NOVIT
00402 delete[] states_per_observation_psi;
00403 states_per_observation_psi=NULL;
00404 #endif // NOVIT
00405 }
00406
00407 #ifdef USE_LOGSUMARRAY
00408 #ifdef USE_HMMPARALLEL_STRUCTURES
00409 {
00410 for (INT i=0; i<parallel.get_num_threads(); i++)
00411 delete[] arrayS[i];
00412 delete[] arrayS ;
00413 } ;
00414 #else //USE_HMMPARALLEL_STRUCTURES
00415 delete[] arrayS;
00416 #endif //USE_HMMPARALLEL_STRUCTURES
00417 #endif //USE_LOGSUMARRAY
00418
00419 if (!reused_caches)
00420 {
00421 #ifndef NOVIT
00422 #ifdef USE_HMMPARALLEL_STRUCTURES
00423 delete[] path_prob_updated ;
00424 delete[] path_prob_dimension ;
00425 for (INT i=0; i<parallel.get_num_threads(); i++)
00426 delete[] path[i] ;
00427 #endif //USE_HMMPARALLEL_STRUCTURES
00428 delete[] path;
00429 #endif
00430 }
00431 }
00432
00433 bool CHMM::alloc_state_dependend_arrays()
00434 {
00435
00436 if (!transition_matrix_a && !observation_matrix_b && !initial_state_distribution_p && !end_state_distribution_q)
00437 {
00438 transition_matrix_a=new DREAL[N*N];
00439 observation_matrix_b=new DREAL[N*M];
00440 initial_state_distribution_p=new DREAL[N];
00441 end_state_distribution_q=new DREAL[N];
00442 init_model_random();
00443 convert_to_log();
00444 }
00445
00446 #ifdef USE_HMMPARALLEL_STRUCTURES
00447 for (INT i=0; i<parallel.get_num_threads(); i++)
00448 {
00449 arrayN1[i]=new DREAL[N];
00450 arrayN2[i]=new DREAL[N];
00451 }
00452 #else //USE_HMMPARALLEL_STRUCTURES
00453 arrayN1=new DREAL[N];
00454 arrayN2=new DREAL[N];
00455 #endif //USE_HMMPARALLEL_STRUCTURES
00456
00457 #ifdef LOG_SUMARRAY
00458 #ifdef USE_HMMPARALLEL_STRUCTURES
00459 for (INT i=0; i<parallel.get_num_threads(); i++)
00460 arrayS[i]=new DREAL[(int)(this->N/2+1)];
00461 #else //USE_HMMPARALLEL_STRUCTURES
00462 arrayS=new DREAL[(int)(this->N/2+1)];
00463 #endif //USE_HMMPARALLEL_STRUCTURES
00464 #endif //LOG_SUMARRAY
00465 transition_matrix_A=new DREAL[this->N*this->N];
00466 observation_matrix_B=new DREAL[this->N*this->M];
00467
00468 if (p_observations)
00469 {
00470 #ifdef USE_HMMPARALLEL_STRUCTURES
00471 if (alpha_cache[0].table!=NULL)
00472 #else //USE_HMMPARALLEL_STRUCTURES
00473 if (alpha_cache.table!=NULL)
00474 #endif //USE_HMMPARALLEL_STRUCTURES
00475 set_observations(p_observations);
00476 else
00477 set_observation_nocache(p_observations);
00478 }
00479
00480 this->invalidate_model();
00481
00482 return ((transition_matrix_A != NULL) && (observation_matrix_B != NULL) &&
00483 (transition_matrix_a != NULL) && (observation_matrix_b != NULL) && (initial_state_distribution_p != NULL) &&
00484 (end_state_distribution_q != NULL));
00485 }
00486
00487 void CHMM::free_state_dependend_arrays()
00488 {
00489 #ifdef USE_HMMPARALLEL_STRUCTURES
00490 for (INT i=0; i<parallel.get_num_threads(); i++)
00491 {
00492 delete[] arrayN1[i];
00493 delete[] arrayN2[i];
00494
00495 arrayN1[i]=NULL;
00496 arrayN2[i]=NULL;
00497 }
00498 #else
00499 delete[] arrayN1;
00500 delete[] arrayN2;
00501 arrayN1=NULL;
00502 arrayN2=NULL;
00503 #endif
00504 if (observation_matrix_b)
00505 {
00506 delete[] transition_matrix_A;
00507 delete[] observation_matrix_B;
00508 delete[] transition_matrix_a;
00509 delete[] observation_matrix_b;
00510 delete[] initial_state_distribution_p;
00511 delete[] end_state_distribution_q;
00512 } ;
00513
00514 transition_matrix_A=NULL;
00515 observation_matrix_B=NULL;
00516 transition_matrix_a=NULL;
00517 observation_matrix_b=NULL;
00518 initial_state_distribution_p=NULL;
00519 end_state_distribution_q=NULL;
00520 }
00521
00522 bool CHMM::initialize(CModel* m, DREAL pseudo, FILE* modelfile)
00523 {
00524
00525 bool files_ok=true;
00526
00527 trans_list_forward = NULL ;
00528 trans_list_forward_cnt = NULL ;
00529 trans_list_forward_val = NULL ;
00530 trans_list_backward = NULL ;
00531 trans_list_backward_cnt = NULL ;
00532 trans_list_len = 0 ;
00533 mem_initialized = false ;
00534
00535 this->transition_matrix_a=NULL;
00536 this->observation_matrix_b=NULL;
00537 this->initial_state_distribution_p=NULL;
00538 this->end_state_distribution_q=NULL;
00539 this->PSEUDO= pseudo;
00540 this->model= m;
00541 this->p_observations=NULL;
00542 this->reused_caches=false;
00543
00544 #ifdef USE_HMMPARALLEL_STRUCTURES
00545 alpha_cache=new T_ALPHA_BETA[parallel.get_num_threads()] ;
00546 beta_cache=new T_ALPHA_BETA[parallel.get_num_threads()] ;
00547 states_per_observation_psi=new P_STATES[parallel.get_num_threads()] ;
00548
00549 for (INT i=0; i<parallel.get_num_threads(); i++)
00550 {
00551 this->alpha_cache[i].table=NULL;
00552 this->beta_cache[i].table=NULL;
00553 this->alpha_cache[i].dimension=0;
00554 this->beta_cache[i].dimension=0;
00555 #ifndef NOVIT
00556 this->states_per_observation_psi[i]=NULL ;
00557 #endif // NOVIT
00558 }
00559
00560 #else // USE_HMMPARALLEL_STRUCTURES
00561 this->alpha_cache.table=NULL;
00562 this->beta_cache.table=NULL;
00563 this->alpha_cache.dimension=0;
00564 this->beta_cache.dimension=0;
00565 #ifndef NOVIT
00566 this->states_per_observation_psi=NULL ;
00567 #endif //NOVIT
00568
00569 #endif //USE_HMMPARALLEL_STRUCTURES
00570
00571
00572 if (modelfile)
00573 files_ok= files_ok && load_model(modelfile);
00574
00575 #ifdef USE_HMMPARALLEL_STRUCTURES
00576 path_prob_updated=new bool[parallel.get_num_threads()];
00577 path_prob_dimension=new int[parallel.get_num_threads()];
00578
00579 path=new P_STATES[parallel.get_num_threads()];
00580
00581 for (INT i=0; i<parallel.get_num_threads(); i++)
00582 {
00583 #ifndef NOVIT
00584 this->path[i]=NULL;
00585 #endif // NOVIT
00586 }
00587 #else // USE_HMMPARALLEL_STRUCTURES
00588 #ifndef NOVIT
00589 this->path=NULL;
00590 #endif //NOVIT
00591
00592 #endif //USE_HMMPARALLEL_STRUCTURES
00593
00594 #ifdef USE_HMMPARALLEL_STRUCTURES
00595 arrayN1=new P_DREAL[parallel.get_num_threads()];
00596 arrayN2=new P_DREAL[parallel.get_num_threads()];
00597 #endif //USE_HMMPARALLEL_STRUCTURES
00598
00599 #ifdef LOG_SUMARRAY
00600 #ifdef USE_HMMPARALLEL_STRUCTURES
00601 arrayS=new P_DREAL[parallel.get_num_threads()] ;
00602 #endif // USE_HMMPARALLEL_STRUCTURES
00603 #endif //LOG_SUMARRAY
00604
00605 alloc_state_dependend_arrays();
00606
00607 this->loglikelihood=false;
00608 mem_initialized = true ;
00609 this->invalidate_model();
00610
00611 return ((files_ok) &&
00612 (transition_matrix_A != NULL) && (observation_matrix_B != NULL) &&
00613 (transition_matrix_a != NULL) && (observation_matrix_b != NULL) && (initial_state_distribution_p != NULL) &&
00614 (end_state_distribution_q != NULL));
00615 }
00616
00617
00618
00619
00620
00621
00622 DREAL CHMM::forward_comp(INT time, INT state, INT dimension)
00623 {
00624 T_ALPHA_BETA_TABLE* alpha_new;
00625 T_ALPHA_BETA_TABLE* alpha;
00626 T_ALPHA_BETA_TABLE* dummy;
00627 if (time<1)
00628 time=0;
00629
00630
00631 INT wanted_time=time;
00632
00633 if (ALPHA_CACHE(dimension).table)
00634 {
00635 alpha=&ALPHA_CACHE(dimension).table[0];
00636 alpha_new=&ALPHA_CACHE(dimension).table[N];
00637 time=p_observations->get_vector_length(dimension)+1;
00638 }
00639 else
00640 {
00641 alpha_new=(T_ALPHA_BETA_TABLE*)ARRAYN1(dimension);
00642 alpha=(T_ALPHA_BETA_TABLE*)ARRAYN2(dimension);
00643 }
00644
00645 if (time<1)
00646 return get_p(state) + get_b(state, p_observations->get_feature(dimension,0));
00647 else
00648 {
00649
00650 for (INT i=0; i<N; i++)
00651 alpha[i] = get_p(i) + get_b(i, p_observations->get_feature(dimension,0)) ;
00652
00653
00654 for (register INT t=1; t<time && t < p_observations->get_vector_length(dimension); t++)
00655 {
00656
00657 for (INT j=0; j<N; j++)
00658 {
00659 register INT i, num = trans_list_forward_cnt[j] ;
00660 DREAL sum=-CMath::INFTY;
00661 for (i=0; i < num; i++)
00662 {
00663 INT ii = trans_list_forward[j][i] ;
00664 sum = CMath::logarithmic_sum(sum, alpha[ii] + get_a(ii,j));
00665 } ;
00666
00667 alpha_new[j]= sum + get_b(j, p_observations->get_feature(dimension,t));
00668 }
00669
00670 if (!ALPHA_CACHE(dimension).table)
00671 {
00672 dummy=alpha;
00673 alpha=alpha_new;
00674 alpha_new=dummy;
00675 }
00676 else
00677 {
00678 alpha=alpha_new;
00679 alpha_new+=N;
00680 }
00681 }
00682
00683
00684 if (time<p_observations->get_vector_length(dimension))
00685 {
00686 register INT i, num=trans_list_forward_cnt[state];
00687 register DREAL sum=-CMath::INFTY;
00688 for (i=0; i<num; i++)
00689 {
00690 int ii = trans_list_forward[state][i] ;
00691 sum= CMath::logarithmic_sum(sum, alpha[ii] + get_a(ii, state));
00692 } ;
00693
00694 return sum + get_b(state, p_observations->get_feature(dimension,time));
00695 }
00696 else
00697 {
00698
00699 register INT i ;
00700 DREAL sum ;
00701 sum=-CMath::INFTY;
00702 for (i=0; i<N; i++)
00703 sum=CMath::logarithmic_sum(sum, alpha[i] + get_q(i));
00704
00705 if (!ALPHA_CACHE(dimension).table)
00706 return sum;
00707 else
00708 {
00709 ALPHA_CACHE(dimension).dimension=dimension;
00710 ALPHA_CACHE(dimension).updated=true;
00711 ALPHA_CACHE(dimension).sum=sum;
00712
00713 if (wanted_time<p_observations->get_vector_length(dimension))
00714 return ALPHA_CACHE(dimension).table[wanted_time*N+state];
00715 else
00716 return ALPHA_CACHE(dimension).sum;
00717 }
00718 }
00719 }
00720 }
00721
00722
00723
00724
00725
00726 DREAL CHMM::forward_comp_old(INT time, INT state, INT dimension)
00727 {
00728 T_ALPHA_BETA_TABLE* alpha_new;
00729 T_ALPHA_BETA_TABLE* alpha;
00730 T_ALPHA_BETA_TABLE* dummy;
00731 if (time<1)
00732 time=0;
00733
00734 INT wanted_time=time;
00735
00736 if (ALPHA_CACHE(dimension).table)
00737 {
00738 alpha=&ALPHA_CACHE(dimension).table[0];
00739 alpha_new=&ALPHA_CACHE(dimension).table[N];
00740 time=p_observations->get_vector_length(dimension)+1;
00741 }
00742 else
00743 {
00744 alpha_new=(T_ALPHA_BETA_TABLE*)ARRAYN1(dimension);
00745 alpha=(T_ALPHA_BETA_TABLE*)ARRAYN2(dimension);
00746 }
00747
00748 if (time<1)
00749 return get_p(state) + get_b(state, p_observations->get_feature(dimension,0));
00750 else
00751 {
00752
00753 for (INT i=0; i<N; i++)
00754 alpha[i] = get_p(i) + get_b(i, p_observations->get_feature(dimension,0)) ;
00755
00756
00757 for (register INT t=1; t<time && t < p_observations->get_vector_length(dimension); t++)
00758 {
00759
00760 for (INT j=0; j<N; j++)
00761 {
00762 register INT i ;
00763 #ifdef USE_LOGSUMARRAY
00764 for (i=0; i<(N>>1); i++)
00765 ARRAYS(dimension)[i]=CMath::logarithmic_sum(alpha[i<<1] + get_a(i<<1,j),
00766 alpha[(i<<1)+1] + get_a((i<<1)+1,j));
00767 if (N%2==1)
00768 alpha_new[j]=get_b(j, p_observations->get_feature(dimension,t))+
00769 CMath::logarithmic_sum(alpha[N-1]+get_a(N-1,j),
00770 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
00771 else
00772 alpha_new[j]=get_b(j, p_observations->get_feature(dimension,t))+CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
00773 #else //USE_LOGSUMARRAY
00774 DREAL sum=-CMath::INFTY;
00775 for (i=0; i<N; i++)
00776 sum= CMath::logarithmic_sum(sum, alpha[i] + get_a(i,j));
00777
00778 alpha_new[j]= sum + get_b(j, p_observations->get_feature(dimension,t));
00779 #endif //USE_LOGSUMARRAY
00780 }
00781
00782 if (!ALPHA_CACHE(dimension).table)
00783 {
00784 dummy=alpha;
00785 alpha=alpha_new;
00786 alpha_new=dummy;
00787 }
00788 else
00789 {
00790 alpha=alpha_new;
00791 alpha_new+=N;
00792 }
00793 }
00794
00795
00796 if (time<p_observations->get_vector_length(dimension))
00797 {
00798 register INT i;
00799 #ifdef USE_LOGSUMARRAY
00800 for (i=0; i<(N>>1); i++)
00801 ARRAYS(dimension)[i]=CMath::logarithmic_sum(alpha[i<<1] + get_a(i<<1,state),
00802 alpha[(i<<1)+1] + get_a((i<<1)+1,state));
00803 if (N%2==1)
00804 return get_b(state, p_observations->get_feature(dimension,time))+
00805 CMath::logarithmic_sum(alpha[N-1]+get_a(N-1,state),
00806 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
00807 else
00808 return get_b(state, p_observations->get_feature(dimension,time))+CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
00809 #else //USE_LOGSUMARRAY
00810 register DREAL sum=-CMath::INFTY;
00811 for (i=0; i<N; i++)
00812 sum= CMath::logarithmic_sum(sum, alpha[i] + get_a(i, state));
00813
00814 return sum + get_b(state, p_observations->get_feature(dimension,time));
00815 #endif //USE_LOGSUMARRAY
00816 }
00817 else
00818 {
00819
00820 register INT i ;
00821 DREAL sum ;
00822 #ifdef USE_LOGSUMARRAY
00823 for (i=0; i<(N>>1); i++)
00824 ARRAYS(dimension)[i]=CMath::logarithmic_sum(alpha[i<<1] + get_q(i<<1),
00825 alpha[(i<<1)+1] + get_q((i<<1)+1));
00826 if (N%2==1)
00827 sum=CMath::logarithmic_sum(alpha[N-1]+get_q(N-1),
00828 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
00829 else
00830 sum=CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
00831 #else //USE_LOGSUMARRAY
00832 sum=-CMath::INFTY;
00833 for (i=0; i<N; i++)
00834 sum=CMath::logarithmic_sum(sum, alpha[i] + get_q(i));
00835 #endif //USE_LOGSUMARRAY
00836
00837 if (!ALPHA_CACHE(dimension).table)
00838 return sum;
00839 else
00840 {
00841 ALPHA_CACHE(dimension).dimension=dimension;
00842 ALPHA_CACHE(dimension).updated=true;
00843 ALPHA_CACHE(dimension).sum=sum;
00844
00845 if (wanted_time<p_observations->get_vector_length(dimension))
00846 return ALPHA_CACHE(dimension).table[wanted_time*N+state];
00847 else
00848 return ALPHA_CACHE(dimension).sum;
00849 }
00850 }
00851 }
00852 }
00853
00854
00855
00856
00857
00858 DREAL CHMM::backward_comp(INT time, INT state, INT dimension)
00859 {
00860 T_ALPHA_BETA_TABLE* beta_new;
00861 T_ALPHA_BETA_TABLE* beta;
00862 T_ALPHA_BETA_TABLE* dummy;
00863 INT wanted_time=time;
00864
00865 if (time<0)
00866 forward(time, state, dimension);
00867
00868 if (BETA_CACHE(dimension).table)
00869 {
00870 beta=&BETA_CACHE(dimension).table[N*(p_observations->get_vector_length(dimension)-1)];
00871 beta_new=&BETA_CACHE(dimension).table[N*(p_observations->get_vector_length(dimension)-2)];
00872 time=-1;
00873 }
00874 else
00875 {
00876 beta_new=(T_ALPHA_BETA_TABLE*)ARRAYN1(dimension);
00877 beta=(T_ALPHA_BETA_TABLE*)ARRAYN2(dimension);
00878 }
00879
00880 if (time>=p_observations->get_vector_length(dimension)-1)
00881
00882
00883 return get_q(state);
00884 else
00885 {
00886
00887 for (register INT i=0; i<N; i++)
00888 beta[i]=get_q(i);
00889
00890
00891 for (register INT t=p_observations->get_vector_length(dimension)-1; t>time+1 && t>0; t--)
00892 {
00893 for (register INT i=0; i<N; i++)
00894 {
00895 register INT j, num=trans_list_backward_cnt[i] ;
00896 DREAL sum=-CMath::INFTY;
00897 for (j=0; j<num; j++)
00898 {
00899 INT jj = trans_list_backward[i][j] ;
00900 sum= CMath::logarithmic_sum(sum, get_a(i, jj) + get_b(jj, p_observations->get_feature(dimension,t)) + beta[jj]);
00901 } ;
00902 beta_new[i]=sum;
00903 }
00904
00905 if (!BETA_CACHE(dimension).table)
00906 {
00907 dummy=beta;
00908 beta=beta_new;
00909 beta_new=dummy;
00910 }
00911 else
00912 {
00913 beta=beta_new;
00914 beta_new-=N;
00915 }
00916 }
00917
00918 if (time>=0)
00919 {
00920 register INT j, num=trans_list_backward_cnt[state] ;
00921 DREAL sum=-CMath::INFTY;
00922 for (j=0; j<num; j++)
00923 {
00924 INT jj = trans_list_backward[state][j] ;
00925 sum= CMath::logarithmic_sum(sum, get_a(state, jj) + get_b(jj, p_observations->get_feature(dimension,time+1))+beta[jj]);
00926 } ;
00927 return sum;
00928 }
00929 else
00930 {
00931 if (BETA_CACHE(dimension).table)
00932 {
00933 DREAL sum=-CMath::INFTY;
00934 for (register INT j=0; j<N; j++)
00935 sum= CMath::logarithmic_sum(sum, get_p(j) + get_b(j, p_observations->get_feature(dimension,0))+beta[j]);
00936 BETA_CACHE(dimension).sum=sum;
00937 BETA_CACHE(dimension).dimension=dimension;
00938 BETA_CACHE(dimension).updated=true;
00939
00940 if (wanted_time<p_observations->get_vector_length(dimension))
00941 return BETA_CACHE(dimension).table[wanted_time*N+state];
00942 else
00943 return BETA_CACHE(dimension).sum;
00944 }
00945 else
00946 {
00947 DREAL sum=-CMath::INFTY;
00948 for (register INT j=0; j<N; j++)
00949 sum= CMath::logarithmic_sum(sum, get_p(j) + get_b(j, p_observations->get_feature(dimension,0))+beta[j]);
00950 return sum;
00951 }
00952 }
00953 }
00954 }
00955
00956
00957 DREAL CHMM::backward_comp_old(INT time, INT state, INT dimension)
00958 {
00959 T_ALPHA_BETA_TABLE* beta_new;
00960 T_ALPHA_BETA_TABLE* beta;
00961 T_ALPHA_BETA_TABLE* dummy;
00962 INT wanted_time=time;
00963
00964 if (time<0)
00965 forward(time, state, dimension);
00966
00967 if (BETA_CACHE(dimension).table)
00968 {
00969 beta=&BETA_CACHE(dimension).table[N*(p_observations->get_vector_length(dimension)-1)];
00970 beta_new=&BETA_CACHE(dimension).table[N*(p_observations->get_vector_length(dimension)-2)];
00971 time=-1;
00972 }
00973 else
00974 {
00975 beta_new=(T_ALPHA_BETA_TABLE*)ARRAYN1(dimension);
00976 beta=(T_ALPHA_BETA_TABLE*)ARRAYN2(dimension);
00977 }
00978
00979 if (time>=p_observations->get_vector_length(dimension)-1)
00980
00981
00982 return get_q(state);
00983 else
00984 {
00985
00986 for (register INT i=0; i<N; i++)
00987 beta[i]=get_q(i);
00988
00989
00990 for (register INT t=p_observations->get_vector_length(dimension)-1; t>time+1 && t>0; t--)
00991 {
00992 for (register INT i=0; i<N; i++)
00993 {
00994 register INT j ;
00995 #ifdef USE_LOGSUMARRAY
00996 for (j=0; j<(N>>1); j++)
00997 ARRAYS(dimension)[j]=CMath::logarithmic_sum(
00998 get_a(i, j<<1) + get_b(j<<1, p_observations->get_feature(dimension,t)) + beta[j<<1],
00999 get_a(i, (j<<1)+1) + get_b((j<<1)+1, p_observations->get_feature(dimension,t)) + beta[(j<<1)+1]);
01000 if (N%2==1)
01001 beta_new[i]=CMath::logarithmic_sum(get_a(i, N-1) + get_b(N-1, p_observations->get_feature(dimension,t)) + beta[N-1],
01002 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
01003 else
01004 beta_new[i]=CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
01005 #else //USE_LOGSUMARRAY
01006 DREAL sum=-CMath::INFTY;
01007 for (j=0; j<N; j++)
01008 sum= CMath::logarithmic_sum(sum, get_a(i, j) + get_b(j, p_observations->get_feature(dimension,t)) + beta[j]);
01009
01010 beta_new[i]=sum;
01011 #endif //USE_LOGSUMARRAY
01012 }
01013
01014 if (!BETA_CACHE(dimension).table)
01015 {
01016 dummy=beta;
01017 beta=beta_new;
01018 beta_new=dummy;
01019 }
01020 else
01021 {
01022 beta=beta_new;
01023 beta_new-=N;
01024 }
01025 }
01026
01027 if (time>=0)
01028 {
01029 register INT j ;
01030 #ifdef USE_LOGSUMARRAY
01031 for (j=0; j<(N>>1); j++)
01032 ARRAYS(dimension)[j]=CMath::logarithmic_sum(
01033 get_a(state, j<<1) + get_b(j<<1, p_observations->get_feature(dimension,time+1)) + beta[j<<1],
01034 get_a(state, (j<<1)+1) + get_b((j<<1)+1, p_observations->get_feature(dimension,time+1)) + beta[(j<<1)+1]);
01035 if (N%2==1)
01036 return CMath::logarithmic_sum(get_a(state, N-1) + get_b(N-1, p_observations->get_feature(dimension,time+1)) + beta[N-1],
01037 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
01038 else
01039 return CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
01040 #else //USE_LOGSUMARRAY
01041 DREAL sum=-CMath::INFTY;
01042 for (j=0; j<N; j++)
01043 sum= CMath::logarithmic_sum(sum, get_a(state, j) + get_b(j, p_observations->get_feature(dimension,time+1))+beta[j]);
01044
01045 return sum;
01046 #endif //USE_LOGSUMARRAY
01047 }
01048 else
01049 {
01050 if (BETA_CACHE(dimension).table)
01051 {
01052 #ifdef USE_LOGSUMARRAY//AAA
01053 for (INT j=0; j<(N>>1); j++)
01054 ARRAYS(dimension)[j]=CMath::logarithmic_sum(get_p(j<<1) + get_b(j<<1, p_observations->get_feature(dimension,0))+beta[j<<1],
01055 get_p((j<<1)+1) + get_b((j<<1)+1, p_observations->get_feature(dimension,0))+beta[(j<<1)+1]) ;
01056 if (N%2==1)
01057 BETA_CACHE(dimension).sum=CMath::logarithmic_sum(get_p(N-1) + get_b(N-1, p_observations->get_feature(dimension,0))+beta[N-1],
01058 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
01059 else
01060 BETA_CACHE(dimension).sum=CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
01061 #else //USE_LOGSUMARRAY
01062 DREAL sum=-CMath::INFTY;
01063 for (register INT j=0; j<N; j++)
01064 sum= CMath::logarithmic_sum(sum, get_p(j) + get_b(j, p_observations->get_feature(dimension,0))+beta[j]);
01065 BETA_CACHE(dimension).sum=sum;
01066 #endif //USE_LOGSUMARRAY
01067 BETA_CACHE(dimension).dimension=dimension;
01068 BETA_CACHE(dimension).updated=true;
01069
01070 if (wanted_time<p_observations->get_vector_length(dimension))
01071 return BETA_CACHE(dimension).table[wanted_time*N+state];
01072 else
01073 return BETA_CACHE(dimension).sum;
01074 }
01075 else
01076 {
01077 DREAL sum=-CMath::INFTY;
01078 for (register INT j=0; j<N; j++)
01079 sum= CMath::logarithmic_sum(sum, get_p(j) + get_b(j, p_observations->get_feature(dimension,0))+beta[j]);
01080 return sum;
01081 }
01082 }
01083 }
01084 }
01085
01086 #ifndef NOVIT
01087
01088
01089 DREAL CHMM::best_path(INT dimension)
01090 {
01091 if (!p_observations)
01092 return -1;
01093
01094 if (dimension==-1)
01095 {
01096 if (!all_path_prob_updated)
01097 {
01098 SG_INFO( "computing full viterbi likelihood\n") ;
01099 DREAL sum = 0 ;
01100 for (INT i=0; i<p_observations->get_num_vectors(); i++)
01101 sum+=best_path(i) ;
01102 sum /= p_observations->get_num_vectors() ;
01103 all_pat_prob=sum ;
01104 all_path_prob_updated=true ;
01105 return sum ;
01106 } else
01107 return all_pat_prob ;
01108 } ;
01109
01110 if (!STATES_PER_OBSERVATION_PSI(dimension))
01111 return -1 ;
01112
01113 INT len=0;
01114 if (!p_observations->get_feature_vector(dimension,len))
01115 return -1;
01116
01117 if (PATH_PROB_UPDATED(dimension) && dimension==PATH_PROB_DIMENSION(dimension))
01118 return pat_prob;
01119 else
01120 {
01121 register DREAL* delta= ARRAYN2(dimension);
01122 register DREAL* delta_new= ARRAYN1(dimension);
01123
01124 {
01125 for (register INT i=0; i<N; i++)
01126 {
01127 delta[i]=get_p(i)+get_b(i, p_observations->get_feature(dimension,0));
01128 set_psi(0, i, 0, dimension);
01129 }
01130 }
01131
01132 #ifdef USE_PATHDEBUG
01133 DREAL worst=-CMath::INFTY/4 ;
01134 #endif
01135
01136 for (register INT t=1; t<p_observations->get_vector_length(dimension); t++)
01137 {
01138 register DREAL* dummy;
01139 register INT NN=N ;
01140 for (register INT j=0; j<NN; j++)
01141 {
01142 register DREAL * matrix_a=&transition_matrix_a[j*N] ;
01143 register DREAL maxj=delta[0] + matrix_a[0];
01144 register INT argmax=0;
01145
01146 for (register INT i=1; i<NN; i++)
01147 {
01148 register DREAL temp = delta[i] + matrix_a[i];
01149
01150 if (temp>maxj)
01151 {
01152 maxj=temp;
01153 argmax=i;
01154 }
01155 }
01156 #ifdef FIX_POS
01157 if ((!model) || (model->get_fix_pos_state(t,j,NN)!=CModel::FIX_DISALLOWED))
01158 #endif
01159 delta_new[j]=maxj + get_b(j,p_observations->get_feature(dimension,t));
01160 #ifdef FIX_POS
01161 else
01162 delta_new[j]=maxj + get_b(j,p_observations->get_feature(dimension,t)) + CModel::DISALLOWED_PENALTY;
01163 #endif
01164 set_psi(t, j, argmax, dimension);
01165 }
01166
01167 #ifdef USE_PATHDEBUG
01168 DREAL best=log(0) ;
01169 for (INT jj=0; jj<N; jj++)
01170 if (delta_new[jj]>best)
01171 best=delta_new[jj] ;
01172
01173 if (best<-CMath::INFTY/2)
01174 {
01175 SG_DEBUG( "worst case at %i: %e:%e\n", t, best, worst) ;
01176 worst=best ;
01177 } ;
01178 #endif
01179
01180 dummy=delta;
01181 delta=delta_new;
01182 delta_new=dummy;
01183 }
01184
01185
01186 {
01187 register DREAL maxj=delta[0]+get_q(0);
01188 register INT argmax=0;
01189
01190 for (register INT i=1; i<N; i++)
01191 {
01192 register DREAL temp=delta[i]+get_q(i);
01193
01194 if (temp>maxj)
01195 {
01196 maxj=temp;
01197 argmax=i;
01198 }
01199 }
01200 pat_prob=maxj;
01201 PATH(dimension)[p_observations->get_vector_length(dimension)-1]=argmax;
01202 } ;
01203
01204
01205 {
01206 for (register INT t=p_observations->get_vector_length(dimension)-1; t>0; t--)
01207 {
01208 PATH(dimension)[t-1]=get_psi(t, PATH(dimension)[t], dimension);
01209 }
01210 }
01211 PATH_PROB_UPDATED(dimension)=true;
01212 PATH_PROB_DIMENSION(dimension)=dimension;
01213 return pat_prob ;
01214 }
01215 }
01216
01217
01218 #endif // NOVIT
01219
01220 #ifndef USE_HMMPARALLEL
01221 DREAL CHMM::model_probability_comp()
01222 {
01223
01224 mod_prob=0 ;
01225 for (INT dim=0; dim<p_observations->get_num_vectors(); dim++)
01226 mod_prob+=forward(p_observations->get_vector_length(dim), 0, dim);
01227
01228 mod_prob_updated=true;
01229 return mod_prob;
01230 }
01231
01232 #else
01233
01234 DREAL CHMM::model_probability_comp()
01235 {
01236 pthread_t *threads=new pthread_t[parallel.get_num_threads()];
01237 S_MODEL_PROB_PARAM *params=new S_MODEL_PROB_PARAM[parallel.get_num_threads()];
01238
01239 SG_INFO( "computing full model probablity\n");
01240 mod_prob=0;
01241
01242 for (INT cpu=0; cpu<parallel.get_num_threads(); cpu++)
01243 {
01244 params[cpu].hmm=this ;
01245 params[cpu].dim_start= p_observations->get_num_vectors()*cpu/parallel.get_num_threads();
01246 params[cpu].dim_stop= p_observations->get_num_vectors()*(cpu+1)/parallel.get_num_threads();
01247 #ifdef SUNOS
01248 thr_create(NULL,0,bw_dim_prefetch, (void*)¶ms[cpu], PTHREAD_SCOPE_SYSTEM, &threads[cpu]);
01249 #else // SUNOS
01250 pthread_create(&threads[cpu], NULL, bw_dim_prefetch, (void*)¶ms[cpu]);
01251 #endif // SUNOS
01252 }
01253
01254 for (cpu=0; cpu<parallel.get_num_threads(); cpu++)
01255 {
01256 void* ret;
01257 pthread_join(threads[cpu], &ret);
01258 mod_prob+=(DREAL) ret;
01259 }
01260
01261 delete[] threads;
01262 delete[] params;
01263
01264 mod_prob_updated=true;
01265 return mod_prob;
01266 }
01267
01268 void* CHMM::bw_dim_prefetch(void * params)
01269 {
01270 CHMM* hmm=((S_THREAD_PARAM*)params)->hmm ;
01271 INT dim=((S_THREAD_PARAM*)params)->dim ;
01272 DREAL*p_buf=((S_THREAD_PARAM*)params)->p_buf ;
01273 DREAL*q_buf=((S_THREAD_PARAM*)params)->q_buf ;
01274 DREAL*a_buf=((S_THREAD_PARAM*)params)->a_buf ;
01275 DREAL*b_buf=((S_THREAD_PARAM*)params)->b_buf ;
01276 ((S_THREAD_PARAM*)params)->ret = hmm->prefetch(dim, true, p_buf, q_buf, a_buf, b_buf) ;
01277 return NULL ;
01278 }
01279
01280 DREAL CHMM::model_prob_thread(void* params)
01281 {
01282 CHMM* hmm=((S_THREAD_PARAM*)params)->hmm ;
01283 INT dim_start=((S_THREAD_PARAM*)params)->dim_start;
01284 INT dim_stop=((S_THREAD_PARAM*)params)->dim_stop;
01285
01286 DREAL prob=0;
01287 for (INT dim=dim_start; dim<dim_stop; dim++)
01288 hmm->model_probability(dim);
01289
01290 ab_buf_comp(p_buf, q_buf, a_buf, b_buf, dim) ;
01291 return modprob ;
01292 } ;
01293
01294 void* CHMM::bw_dim_prefetch(void * params)
01295 {
01296 CHMM* hmm=((S_THREAD_PARAM*)params)->hmm ;
01297 INT dim=((S_THREAD_PARAM*)params)->dim ;
01298 DREAL*p_buf=((S_THREAD_PARAM*)params)->p_buf ;
01299 DREAL*q_buf=((S_THREAD_PARAM*)params)->q_buf ;
01300 DREAL*a_buf=((S_THREAD_PARAM*)params)->a_buf ;
01301 DREAL*b_buf=((S_THREAD_PARAM*)params)->b_buf ;
01302 ((S_THREAD_PARAM*)params)->ret = hmm->prefetch(dim, true, p_buf, q_buf, a_buf, b_buf) ;
01303 return NULL ;
01304 }
01305
01306 #ifndef NOVIT
01307 void* CHMM::vit_dim_prefetch(void * params)
01308 {
01309 CHMM* hmm=((S_THREAD_PARAM*)params)->hmm ;
01310 INT dim=((S_THREAD_PARAM*)params)->dim ;
01311 ((S_THREAD_PARAM*)params)->ret = hmm->prefetch(dim, false) ;
01312 return NULL ;
01313 } ;
01314 #endif // NOVIT
01315
01316 DREAL CHMM::prefetch(INT dim, bool bw, DREAL* p_buf, DREAL* q_buf, DREAL* a_buf, DREAL* b_buf)
01317 {
01318 if (bw)
01319 {
01320 forward_comp(p_observations->get_vector_length(dim), N-1, dim) ;
01321 backward_comp(p_observations->get_vector_length(dim), N-1, dim) ;
01322 DREAL modprob=model_probability(dim) ;
01323 ab_buf_comp(p_buf, q_buf, a_buf, b_buf, dim) ;
01324 return modprob ;
01325 }
01326 #ifndef NOVIT
01327 else
01328 return best_path(dim) ;
01329 #endif // NOVIT
01330 } ;
01331 #endif //USE_HMMPARALLEL
01332
01333
01334 #ifdef USE_HMMPARALLEL
01335
01336 void CHMM::ab_buf_comp(DREAL* p_buf, DREAL* q_buf, DREAL *a_buf, DREAL* b_buf, INT dim)
01337 {
01338 INT i,j,t ;
01339 DREAL a_sum;
01340 DREAL b_sum;
01341 DREAL prob=0;
01342
01343 DREAL dimmodprob=model_probability(dim);
01344
01345 for (i=0; i<N; i++)
01346 {
01347
01348 p_buf[i]=CMath::logarithmic_sum(get_p(i), get_p(i)+get_b(i,p_observations->get_feature(dim,0))+backward(0,i,dim) - dimmodprob);
01349 q_buf[i]=CMath::logarithmic_sum(get_q(i), forward(p_observations->get_vector_length(dim)-1, i, dim)+get_q(i) - dimmodprob);
01350
01351
01352 for (j=0; j<N; j++)
01353 {
01354 a_sum=-CMath::INFTY;
01355
01356 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
01357 {
01358 a_sum= CMath::logarithmic_sum(a_sum, forward(t,i,dim)+
01359 get_a(i,j)+get_b(j,p_observations->get_feature(dim,t+1))+backward(t+1,j,dim));
01360 }
01361 a_buf[N*i+j]=a_sum-dimmodprob ;
01362 }
01363
01364
01365 for (j=0; j<M; j++)
01366 {
01367 b_sum=CMath::ALMOST_NEG_INFTY;
01368
01369 for (t=0; t<p_observations->get_vector_length(dim); t++)
01370 {
01371 if (p_observations->get_feature(dim,t)==j)
01372 b_sum=CMath::logarithmic_sum(b_sum, forward(t,i,dim)+backward(t, i, dim));
01373 }
01374
01375 b_buf[M*i+j]=b_sum-dimmodprob ;
01376 }
01377 }
01378 }
01379
01380
01381 void CHMM::estimate_model_baum_welch(CHMM* train)
01382 {
01383 INT i,j,t,cpu;
01384 DREAL a_sum, b_sum;
01385 DREAL fullmodprob=0;
01386
01387
01388 for (i=0; i<N; i++)
01389 {
01390 if (train->get_p(i)>CMath::ALMOST_NEG_INFTY)
01391 set_p(i,log(PSEUDO));
01392 else
01393 set_p(i,train->get_p(i));
01394 if (train->get_q(i)>CMath::ALMOST_NEG_INFTY)
01395 set_q(i,log(PSEUDO));
01396 else
01397 set_q(i,train->get_q(i));
01398
01399 for (j=0; j<N; j++)
01400 if (train->get_a(i,j)>CMath::ALMOST_NEG_INFTY)
01401 set_a(i,j, log(PSEUDO));
01402 else
01403 set_a(i,j,train->get_a(i,j));
01404 for (j=0; j<M; j++)
01405 if (train->get_b(i,j)>CMath::ALMOST_NEG_INFTY)
01406 set_b(i,j, log(PSEUDO));
01407 else
01408 set_b(i,j,train->get_b(i,j));
01409 }
01410
01411 pthread_t *threads=new pthread_t[parallel.get_num_threads()] ;
01412 S_THREAD_PARAM *params=new S_THREAD_PARAM[parallel.get_num_threads()] ;
01413
01414 for (i=0; i<parallel.get_num_threads(); i++)
01415 {
01416 params[i].p_buf=new DREAL[N];
01417 params[i].q_buf=new DREAL[N];
01418 params[i].a_buf=new DREAL[N*N];
01419 params[i].b_buf=new DREAL[N*M];
01420 } ;
01421
01422 for (cpu=0; cpu<parallel.get_num_threads(); cpu++)
01423 {
01424 params[cpu].hmm=train;
01425 params[cpu].dim_start=p_observations->get_num_vectors()*cpu / parallel.get_num_threads();
01426 params[cpu].dim_stop= p_observations->get_num_vectors()*(cpu+1) / parallel.get_num_threads();
01427
01428 #ifdef SUNOS
01429 thr_create(NULL,0, bw_dim_prefetch, (void*)¶ms[cpu], PTHREAD_SCOPE_SYSTEM, &threads[cpu]) ;
01430 #else // SUNOS
01431 pthread_create(&threads[cpu], NULL, bw_dim_prefetch, (void*)¶ms[cpu]) ;
01432 #endif
01433 }
01434
01435 for (cpu=0; cpu<parallel.get_num_threads(); cpu++)
01436 {
01437 void* ret;
01438 pthread_join(threads[cpu], &ret) ;
01439
01440
01441 for (i=0; i<N; i++)
01442 {
01443
01444 set_p(i, CMath::logarithmic_sum(get_p(i), params[cpu].p_buf[i]));
01445 set_q(i, CMath::logarithmic_sum(get_q(i), params[cpu].q_buf[i]));
01446
01447
01448 for (j=0; j<N; j++)
01449 set_a(i,j, CMath::logarithmic_sum(get_a(i,j), params[cpu].a_buf[N*i+j]));
01450
01451
01452 for (j=0; j<M; j++)
01453 set_b(i,j, CMath::logarithmic_sum(get_b(i,j), params[cpu].b_buf[M*i+j]));
01454 }
01455
01456 fullmodprob+=params[cpu].prob;
01457 }
01458
01459 for (i=0; i<parallel.get_num_threads(); i++)
01460 {
01461 delete[] params[i].p_buf;
01462 delete[] params[i].q_buf;
01463 delete[] params[i].a_buf;
01464 delete[] params[i].b_buf;
01465 }
01466
01467 delete[] threads ;
01468 delete[] params ;
01469
01470
01471 train->mod_prob=fullmodprob;
01472 train->mod_prob_updated=true ;
01473
01474
01475 normalize();
01476 invalidate_model();
01477 }
01478
01479 #else // USE_HMMPARALLEL
01480
01481 #if !defined(NEGATIVE_MODEL_HACK) && !defined(NEGATIVE_MODEL_HACK_DON)
01482
01483
01484 void CHMM::estimate_model_baum_welch(CHMM* estimate)
01485 {
01486 INT i,j,t,dim;
01487 DREAL a_sum, b_sum;
01488 DREAL dimmodprob=0;
01489 DREAL fullmodprob=0;
01490
01491
01492 for (i=0; i<N; i++)
01493 {
01494 if (estimate->get_p(i)>CMath::ALMOST_NEG_INFTY)
01495 set_p(i,log(PSEUDO));
01496 else
01497 set_p(i,estimate->get_p(i));
01498 if (estimate->get_q(i)>CMath::ALMOST_NEG_INFTY)
01499 set_q(i,log(PSEUDO));
01500 else
01501 set_q(i,estimate->get_q(i));
01502
01503 for (j=0; j<N; j++)
01504 if (estimate->get_a(i,j)>CMath::ALMOST_NEG_INFTY)
01505 set_a(i,j, log(PSEUDO));
01506 else
01507 set_a(i,j,estimate->get_a(i,j));
01508 for (j=0; j<M; j++)
01509 if (estimate->get_b(i,j)>CMath::ALMOST_NEG_INFTY)
01510 set_b(i,j, log(PSEUDO));
01511 else
01512 set_b(i,j,estimate->get_b(i,j));
01513 }
01514 invalidate_model();
01515
01516
01517 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
01518 {
01519 dimmodprob=estimate->model_probability(dim);
01520 fullmodprob+=dimmodprob ;
01521
01522 for (i=0; i<N; i++)
01523 {
01524
01525 set_p(i, CMath::logarithmic_sum(get_p(i), estimate->get_p(i)+estimate->get_b(i,p_observations->get_feature(dim,0))+estimate->backward(0,i,dim) - dimmodprob));
01526 set_q(i, CMath::logarithmic_sum(get_q(i), estimate->forward(p_observations->get_vector_length(dim)-1, i, dim)+estimate->get_q(i) - dimmodprob ));
01527
01528 INT num = trans_list_backward_cnt[i] ;
01529
01530
01531 for (j=0; j<num; j++)
01532 {
01533 INT jj = trans_list_backward[i][j] ;
01534 a_sum=-CMath::INFTY;
01535
01536 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
01537 {
01538 a_sum= CMath::logarithmic_sum(a_sum, estimate->forward(t,i,dim)+
01539 estimate->get_a(i,jj)+estimate->get_b(jj,p_observations->get_feature(dim,t+1))+estimate->backward(t+1,jj,dim));
01540 }
01541 set_a(i,jj, CMath::logarithmic_sum(get_a(i,jj), a_sum-dimmodprob));
01542 }
01543
01544
01545 for (j=0; j<M; j++)
01546 {
01547 b_sum=-CMath::INFTY;
01548
01549 for (t=0; t<p_observations->get_vector_length(dim); t++)
01550 {
01551 if (p_observations->get_feature(dim,t)==j)
01552 b_sum=CMath::logarithmic_sum(b_sum, estimate->forward(t,i,dim)+estimate->backward(t, i, dim));
01553 }
01554
01555 set_b(i,j, CMath::logarithmic_sum(get_b(i,j), b_sum-dimmodprob));
01556 }
01557 }
01558 }
01559
01560
01561 estimate->mod_prob=fullmodprob;
01562 estimate->mod_prob_updated=true ;
01563
01564
01565 normalize();
01566 invalidate_model();
01567 }
01568
01569
01570
01571 void CHMM::estimate_model_baum_welch_trans(CHMM* estimate)
01572 {
01573 INT i,j,t,dim;
01574 DREAL a_sum;
01575 DREAL dimmodprob=0;
01576 DREAL fullmodprob=0;
01577
01578
01579 for (i=0; i<N; i++)
01580 {
01581 if (estimate->get_p(i)>CMath::ALMOST_NEG_INFTY)
01582 set_p(i,log(PSEUDO));
01583 else
01584 set_p(i,estimate->get_p(i));
01585 if (estimate->get_q(i)>CMath::ALMOST_NEG_INFTY)
01586 set_q(i,log(PSEUDO));
01587 else
01588 set_q(i,estimate->get_q(i));
01589
01590 for (j=0; j<N; j++)
01591 if (estimate->get_a(i,j)>CMath::ALMOST_NEG_INFTY)
01592 set_a(i,j, log(PSEUDO));
01593 else
01594 set_a(i,j,estimate->get_a(i,j));
01595 for (j=0; j<M; j++)
01596 set_b(i,j,estimate->get_b(i,j));
01597 }
01598 invalidate_model();
01599
01600
01601 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
01602 {
01603 dimmodprob=estimate->model_probability(dim);
01604 fullmodprob+=dimmodprob ;
01605
01606 for (i=0; i<N; i++)
01607 {
01608
01609 set_p(i, CMath::logarithmic_sum(get_p(i), estimate->get_p(i)+estimate->get_b(i,p_observations->get_feature(dim,0))+estimate->backward(0,i,dim) - dimmodprob));
01610 set_q(i, CMath::logarithmic_sum(get_q(i), estimate->forward(p_observations->get_vector_length(dim)-1, i, dim)+estimate->get_q(i) - dimmodprob ));
01611
01612 INT num = trans_list_backward_cnt[i] ;
01613
01614 for (j=0; j<num; j++)
01615 {
01616 INT jj = trans_list_backward[i][j] ;
01617 a_sum=-CMath::INFTY;
01618
01619 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
01620 {
01621 a_sum= CMath::logarithmic_sum(a_sum, estimate->forward(t,i,dim)+
01622 estimate->get_a(i,jj)+estimate->get_b(jj,p_observations->get_feature(dim,t+1))+estimate->backward(t+1,jj,dim));
01623 }
01624 set_a(i,jj, CMath::logarithmic_sum(get_a(i,jj), a_sum-dimmodprob));
01625 }
01626 }
01627 }
01628
01629
01630 estimate->mod_prob=fullmodprob;
01631 estimate->mod_prob_updated=true ;
01632
01633
01634 normalize();
01635 invalidate_model();
01636 }
01637
01638
01639
01640 void CHMM::estimate_model_baum_welch_old(CHMM* estimate)
01641 {
01642 INT i,j,t,dim;
01643 DREAL a_sum, b_sum;
01644 DREAL dimmodprob=0;
01645 DREAL fullmodprob=0;
01646
01647
01648 for (i=0; i<N; i++)
01649 {
01650 if (estimate->get_p(i)>CMath::ALMOST_NEG_INFTY)
01651 set_p(i,log(PSEUDO));
01652 else
01653 set_p(i,estimate->get_p(i));
01654 if (estimate->get_q(i)>CMath::ALMOST_NEG_INFTY)
01655 set_q(i,log(PSEUDO));
01656 else
01657 set_q(i,estimate->get_q(i));
01658
01659 for (j=0; j<N; j++)
01660 if (estimate->get_a(i,j)>CMath::ALMOST_NEG_INFTY)
01661 set_a(i,j, log(PSEUDO));
01662 else
01663 set_a(i,j,estimate->get_a(i,j));
01664 for (j=0; j<M; j++)
01665 if (estimate->get_b(i,j)>CMath::ALMOST_NEG_INFTY)
01666 set_b(i,j, log(PSEUDO));
01667 else
01668 set_b(i,j,estimate->get_b(i,j));
01669 }
01670 invalidate_model();
01671
01672
01673 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
01674 {
01675 dimmodprob=estimate->model_probability(dim);
01676 fullmodprob+=dimmodprob ;
01677
01678 for (i=0; i<N; i++)
01679 {
01680
01681 set_p(i, CMath::logarithmic_sum(get_p(i), estimate->get_p(i)+estimate->get_b(i,p_observations->get_feature(dim,0))+estimate->backward(0,i,dim) - dimmodprob));
01682 set_q(i, CMath::logarithmic_sum(get_q(i), estimate->forward(p_observations->get_vector_length(dim)-1, i, dim)+estimate->get_q(i) - dimmodprob ));
01683
01684
01685 for (j=0; j<N; j++)
01686 {
01687 a_sum=-CMath::INFTY;
01688
01689 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
01690 {
01691 a_sum= CMath::logarithmic_sum(a_sum, estimate->forward(t,i,dim)+
01692 estimate->get_a(i,j)+estimate->get_b(j,p_observations->get_feature(dim,t+1))+estimate->backward(t+1,j,dim));
01693 }
01694 set_a(i,j, CMath::logarithmic_sum(get_a(i,j), a_sum-dimmodprob));
01695 }
01696
01697
01698 for (j=0; j<M; j++)
01699 {
01700 b_sum=-CMath::INFTY;
01701
01702 for (t=0; t<p_observations->get_vector_length(dim); t++)
01703 {
01704 if (p_observations->get_feature(dim,t)==j)
01705 b_sum=CMath::logarithmic_sum(b_sum, estimate->forward(t,i,dim)+estimate->backward(t, i, dim));
01706 }
01707
01708 set_b(i,j, CMath::logarithmic_sum(get_b(i,j), b_sum-dimmodprob));
01709 }
01710 }
01711 }
01712
01713
01714 estimate->mod_prob=fullmodprob;
01715 estimate->mod_prob_updated=true ;
01716
01717
01718 normalize();
01719 invalidate_model();
01720 }
01721
01722
01723 #elif defined(NEGATIVE_MODEL_HACK)
01724
01725 void CHMM::estimate_model_baum_welch(CHMM* estimate)
01726 {
01727 INT i,j,t,dim;
01728 DREAL a_sum, b_sum;
01729 DREAL dimmodprob=0;
01730 DREAL fullmodprob=0;
01731
01732 const DREAL MIN_RAND=23e-3;
01733
01734 SG_DEBUG( "M:%d\n",M);
01735
01736 if (estimate->get_p(0)>-0.00001)
01737 {
01738 for (i=0; i<N; i++)
01739 {
01740 if (i==25)
01741 estimate->set_p(i,-CMath::INFTY);
01742 else
01743 estimate->set_p(i, log(CMath::random(MIN_RAND, 1.0)));
01744
01745 if (i<49)
01746 estimate->set_q(i, -CMath::INFTY);
01747 else
01748 estimate->set_q(i, log(CMath::random(MIN_RAND, 1.0)));
01749
01750 if (i<25)
01751 {
01752 for (j=0; j<M; j++)
01753 estimate->set_b(i,j, log(CMath::random(MIN_RAND, 1.0)));
01754 }
01755 }
01756 }
01757
01758 for (i=0; i<N; i++)
01759 {
01760 if (i==25)
01761 estimate->set_p(i,-CMath::INFTY);
01762
01763 if (i<49)
01764 estimate->set_q(i, -CMath::INFTY);
01765
01766 }
01767 estimate->invalidate_model();
01768 estimate->normalize();
01769
01770
01771 for (i=0; i<N; i++)
01772 {
01773
01774 set_p(i,log(PSEUDO));
01775
01776
01777
01778 set_q(i,log(PSEUDO));
01779
01780 for (j=0; j<N; j++)
01781 set_a(i,j, estimate->get_a(i,j));
01782
01783 if (i<25)
01784 {
01785 for (j=0; j<M; j++)
01786 set_b(i,j, log(PSEUDO));
01787 }
01788 else
01789 {
01790 for (j=0; j<M; j++)
01791 set_b(i,j, estimate->get_b(i,j));
01792 }
01793 }
01794
01795
01796 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
01797 {
01798 dimmodprob=estimate->model_probability(dim);
01799 fullmodprob+=dimmodprob ;
01800
01801 for (i=0; i<N; i++)
01802 {
01803
01804 set_p(i, CMath::logarithmic_sum(get_p(i), estimate->get_p(i)+estimate->get_b(i,p_observations->get_feature(dim,0))+estimate->backward(0,i,dim) - dimmodprob));
01805 set_q(i, CMath::logarithmic_sum(get_q(i), estimate->forward(p_observations->get_vector_length(dim)-1, i, dim)+estimate->get_q(i) - dimmodprob ));
01806 }
01807
01808 for (i=0; i<25; i++)
01809 {
01810
01811 for (j=0; j<M; j++)
01812 {
01813 b_sum=CMath::NEG_INFTY;
01814
01815 for (t=0; t<p_observations->get_vector_length(dim); t++)
01816 {
01817 if (p_observations->get_feature(dim,t)==j)
01818 b_sum=CMath::logarithmic_sum(b_sum, estimate->forward(t,i,dim)+estimate->backward(t, i, dim));
01819 }
01820
01821 set_b(i,j, CMath::logarithmic_sum(get_b(i,j), b_sum-dimmodprob));
01822 }
01823 }
01824 }
01825
01826
01827 estimate->mod_prob=fullmodprob;
01828 estimate->mod_prob_updated=true ;
01829
01830
01831 normalize();
01832 invalidate_model();
01833 }
01834 #else
01835
01836 void CHMM::estimate_model_baum_welch(CHMM* estimate)
01837 {
01838 INT i,j,t,dim;
01839 DREAL a_sum, b_sum;
01840 DREAL dimmodprob=0;
01841 DREAL fullmodprob=0;
01842
01843 const DREAL MIN_RAND=23e-3;
01844 static bool bla=true;
01845
01846 if ((bla) && estimate->get_q(N-1)>-0.00001)
01847 {
01848 bla=false;
01849 for (i=0; i<N; i++)
01850 {
01851 if (i<=N-50)
01852 estimate->set_p(i, log(CMath::random(MIN_RAND, 1.0)));
01853 else
01854 estimate->set_p(i, -1000);
01855
01856 if ( i==N-25-1)
01857 estimate->set_q(i,-10000);
01858 else
01859 estimate->set_q(i, log(CMath::random(MIN_RAND, 1.0)));
01860 SG_DEBUG( "q[%d]=%lf\n", i, estimate->get_q(i));
01861
01862 if (i>=N-25)
01863 {
01864 for (j=0; j<M; j++)
01865 estimate->set_b(i,j, log(CMath::random(MIN_RAND, 1.0)));
01866 }
01867 }
01868 estimate->invalidate_model();
01869 estimate->normalize(true);
01870 }
01871
01872
01873 for (i=0; i<N; i++)
01874 {
01875 set_p(i,log(PSEUDO));
01876 set_q(i,log(PSEUDO));
01877
01878 for (j=0; j<N; j++)
01879 set_a(i,j, estimate->get_a(i,j));
01880
01881 if (i>=N-25)
01882 {
01883 for (j=0; j<M; j++)
01884 set_b(i,j, log(PSEUDO));
01885 }
01886 else
01887 {
01888 for (j=0; j<M; j++)
01889 set_b(i,j, estimate->get_b(i,j));
01890
01891 if (i==N-25-1-24 || i==N-25-1-23)
01892 {
01893 for (j=0; j<M; j++)
01894 {
01895 if (estimate->get_b(i,j)<-10)
01896 set_b(i,j, -CMath::INFTY);
01897 }
01898 }
01899 }
01900 }
01901
01902
01903 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
01904 {
01905 dimmodprob=estimate->model_probability(dim);
01906 fullmodprob+=dimmodprob ;
01907
01908 for (i=0; i<N; i++)
01909 {
01910
01911 if (i<=N-50)
01912 set_p(i, CMath::logarithmic_sum(get_p(i), estimate->get_p(i)+estimate->get_b(i,p_observations->get_feature(dim,0))+estimate->backward(0,i,dim) - dimmodprob));
01913 else
01914 set_p(i, -1000);
01915
01916 if (i==N-25-1)
01917 set_q(i,-10000);
01918 else
01919 set_q(i, CMath::logarithmic_sum(get_q(i), estimate->forward(p_observations->get_vector_length(dim)-1, i, dim)+estimate->get_q(i) - dimmodprob ));
01920 }
01921
01922 for (i=N-25; i<N; i++)
01923 {
01924
01925 for (j=0; j<M; j++)
01926 {
01927 b_sum=-CMath::INFTY;
01928
01929 for (t=0; t<p_observations->get_vector_length(dim); t++)
01930 {
01931 if (p_observations->get_feature(dim,t)==j)
01932 b_sum=CMath::logarithmic_sum(b_sum, estimate->forward(t,i,dim)+estimate->backward(t, i, dim));
01933 }
01934
01935 set_b(i,j, CMath::logarithmic_sum(get_b(i,j), b_sum-dimmodprob));
01936 }
01937 }
01938 }
01939
01940
01941 estimate->mod_prob=fullmodprob;
01942 estimate->mod_prob_updated=true ;
01943
01944
01945 normalize(true);
01946 invalidate_model();
01947 }
01948 #endif //NEGATIVE_MODEL_HACK || .._DON
01949 #endif // USE_HMMPARALLEL
01950
01951
01952
01953 void CHMM::estimate_model_baum_welch_defined(CHMM* estimate)
01954 {
01955 INT i,j,old_i,k,t,dim;
01956 DREAL a_sum_num, b_sum_num;
01957 DREAL a_sum_denom, b_sum_denom;
01958 DREAL dimmodprob=-CMath::INFTY;
01959 DREAL fullmodprob=0;
01960 DREAL* A=ARRAYN1(0);
01961 DREAL* B=ARRAYN2(0);
01962
01963
01964
01965 for (k=0; (i=model->get_learn_p(k))!=-1; k++)
01966 set_p(i,log(PSEUDO));
01967
01968 for (k=0; (i=model->get_learn_q(k))!=-1; k++)
01969 set_q(i,log(PSEUDO));
01970
01971 for (k=0; (i=model->get_learn_a(k,0))!=-1; k++)
01972 {
01973 j=model->get_learn_a(k,1);
01974 set_a(i,j, log(PSEUDO));
01975 }
01976
01977 for (k=0; (i=model->get_learn_b(k,0))!=-1; k++)
01978 {
01979 j=model->get_learn_b(k,1);
01980 set_b(i,j, log(PSEUDO));
01981 }
01982
01983 for (i=0; i<N; i++)
01984 {
01985 A[i]=log(PSEUDO);
01986 B[i]=log(PSEUDO);
01987 }
01988
01989 #ifdef USE_HMMPARALLEL
01990 pthread_t *threads=new pthread_t[parallel.get_num_threads()] ;
01991 S_THREAD_PARAM *params=new S_THREAD_PARAM[parallel.get_num_threads()] ;
01992 #endif
01993
01994
01995 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
01996 {
01997 #ifdef USE_HMMPARALLEL
01998 if (dim%parallel.get_num_threads()==0)
01999 {
02000 INT i ;
02001 for (i=0; i<parallel.get_num_threads(); i++)
02002 if (dim+i<p_observations->get_num_vectors())
02003 {
02004 params[i].hmm=estimate ;
02005 params[i].dim=dim+i ;
02006 #ifdef SUNOS
02007 thr_create(NULL,0, bw_dim_prefetch, (void*)¶ms[i], PTHREAD_SCOPE_SYSTEM, &threads[i]) ;
02008 #else // SUNOS
02009 pthread_create(&threads[i], NULL, bw_dim_prefetch, (void*)¶ms[i]) ;
02010 #endif
02011 } ;
02012 for (i=0; i<parallel.get_num_threads(); i++)
02013 if (dim+i<p_observations->get_num_vectors())
02014 {
02015 void * ret ;
02016 pthread_join(threads[i], &ret) ;
02017 dimmodprob = params[i].ret ;
02018 } ;
02019 }
02020 #else
02021 dimmodprob=estimate->model_probability(dim);
02022 #endif // USE_HMMPARALLEL
02023
02024
02025 fullmodprob+= dimmodprob;
02026
02027
02028 for (k=0; (i=model->get_learn_p(k))!=-1; k++)
02029 set_p(i, CMath::logarithmic_sum(get_p(i), estimate->forward(0,i,dim)+estimate->backward(0,i,dim) - dimmodprob ) );
02030
02031 for (k=0; (i=model->get_learn_q(k))!=-1; k++)
02032 set_q(i, CMath::logarithmic_sum(get_q(i), estimate->forward(p_observations->get_vector_length(dim)-1, i, dim)+
02033 estimate->backward(p_observations->get_vector_length(dim)-1, i, dim) - dimmodprob ) );
02034
02035
02036 old_i=-1;
02037 for (k=0; (i=model->get_learn_a(k,0))!=-1; k++)
02038 {
02039
02040
02041 if (old_i!=i)
02042 {
02043 old_i=i;
02044 a_sum_denom=-CMath::INFTY;
02045
02046 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
02047 a_sum_denom= CMath::logarithmic_sum(a_sum_denom, estimate->forward(t,i,dim)+estimate->backward(t,i,dim));
02048
02049 A[i]= CMath::logarithmic_sum(A[i], a_sum_denom-dimmodprob);
02050 }
02051
02052 j=model->get_learn_a(k,1);
02053 a_sum_num=-CMath::INFTY;
02054 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
02055 {
02056 a_sum_num= CMath::logarithmic_sum(a_sum_num, estimate->forward(t,i,dim)+
02057 estimate->get_a(i,j)+estimate->get_b(j,p_observations->get_feature(dim,t+1))+estimate->backward(t+1,j,dim));
02058 }
02059
02060 set_a(i,j, CMath::logarithmic_sum(get_a(i,j), a_sum_num-dimmodprob));
02061 }
02062
02063
02064 old_i=-1;
02065 for (k=0; (i=model->get_learn_b(k,0))!=-1; k++)
02066 {
02067
02068
02069
02070 if (old_i!=i)
02071 {
02072 old_i=i;
02073 b_sum_denom=-CMath::INFTY;
02074
02075 for (t=0; t<p_observations->get_vector_length(dim); t++)
02076 b_sum_denom= CMath::logarithmic_sum(b_sum_denom, estimate->forward(t,i,dim)+estimate->backward(t,i,dim));
02077
02078 B[i]= CMath::logarithmic_sum(B[i], b_sum_denom-dimmodprob);
02079 }
02080
02081 j=model->get_learn_b(k,1);
02082 b_sum_num=-CMath::INFTY;
02083 for (t=0; t<p_observations->get_vector_length(dim); t++)
02084 {
02085 if (p_observations->get_feature(dim,t)==j)
02086 b_sum_num=CMath::logarithmic_sum(b_sum_num, estimate->forward(t,i,dim)+estimate->backward(t, i, dim));
02087 }
02088
02089 set_b(i,j, CMath::logarithmic_sum(get_b(i,j), b_sum_num-dimmodprob));
02090 }
02091 }
02092 #ifdef USE_HMMPARALLEL
02093 delete[] threads ;
02094 delete[] params ;
02095 #endif
02096
02097
02098
02099 for (k=0; (i=model->get_learn_p(k))!=-1; k++)
02100 set_p(i, get_p(i)-log(p_observations->get_num_vectors()+N*PSEUDO) );
02101
02102 for (k=0; (i=model->get_learn_q(k))!=-1; k++)
02103 set_q(i, get_q(i)-log(p_observations->get_num_vectors()+N*PSEUDO) );
02104
02105 for (k=0; (i=model->get_learn_a(k,0))!=-1; k++)
02106 {
02107 j=model->get_learn_a(k,1);
02108 set_a(i,j, get_a(i,j) - A[i]);
02109 }
02110
02111 for (k=0; (i=model->get_learn_b(k,0))!=-1; k++)
02112 {
02113 j=model->get_learn_b(k,1);
02114 set_b(i,j, get_b(i,j) - B[i]);
02115 }
02116
02117
02118 estimate->mod_prob=fullmodprob;
02119 estimate->mod_prob_updated=true ;
02120
02121
02122 normalize();
02123 invalidate_model();
02124 }
02125
02126 #ifndef NOVIT
02127
02128 void CHMM::estimate_model_viterbi(CHMM* estimate)
02129 {
02130 INT i,j,t;
02131 DREAL sum;
02132 DREAL* P=ARRAYN1(0);
02133 DREAL* Q=ARRAYN2(0);
02134
02135 path_deriv_updated=false ;
02136
02137
02138 for (i=0; i<N; i++)
02139 {
02140 for (j=0; j<N; j++)
02141 set_A(i,j, PSEUDO);
02142
02143 for (j=0; j<M; j++)
02144 set_B(i,j, PSEUDO);
02145
02146 P[i]=PSEUDO;
02147 Q[i]=PSEUDO;
02148 }
02149
02150 DREAL allpatprob=0 ;
02151
02152 #ifdef USE_HMMPARALLEL
02153 pthread_t *threads=new pthread_t[parallel.get_num_threads()] ;
02154 S_THREAD_PARAM *params=new S_THREAD_PARAM[parallel.get_num_threads()] ;
02155 #endif
02156
02157 for (INT dim=0; dim<p_observations->get_num_vectors(); dim++)
02158 {
02159
02160 #ifdef USE_HMMPARALLEL
02161 if (dim%parallel.get_num_threads()==0)
02162 {
02163 INT i ;
02164 for (i=0; i<parallel.get_num_threads(); i++)
02165 if (dim+i<p_observations->get_num_vectors())
02166 {
02167 params[i].hmm=estimate ;
02168 params[i].dim=dim+i ;
02169 #ifdef SUNOS
02170 thr_create(NULL,0, vit_dim_prefetch, (void*)¶ms[i], PTHREAD_SCOPE_SYSTEM, &threads[i]) ;
02171 #else // SUNOS
02172 pthread_create(&threads[i], NULL, vit_dim_prefetch, (void*)¶ms[i]) ;
02173 #endif
02174 } ;
02175 for (i=0; i<parallel.get_num_threads(); i++)
02176 if (dim+i<p_observations->get_num_vectors())
02177 {
02178 void * ret ;
02179 pthread_join(threads[i], &ret) ;
02180 allpatprob += params[i].ret ;
02181 } ;
02182 } ;
02183 #else
02184
02185 allpatprob += estimate->best_path(dim);
02186 #endif // USE_HMMPARALLEL
02187
02188
02189 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
02190 {
02191 set_A(estimate->PATH(dim)[t], estimate->PATH(dim)[t+1], get_A(estimate->PATH(dim)[t], estimate->PATH(dim)[t+1])+1);
02192 set_B(estimate->PATH(dim)[t], p_observations->get_feature(dim,t), get_B(estimate->PATH(dim)[t], p_observations->get_feature(dim,t))+1);
02193 }
02194
02195 set_B(estimate->PATH(dim)[p_observations->get_vector_length(dim)-1], p_observations->get_feature(dim,p_observations->get_vector_length(dim)-1), get_B(estimate->PATH(dim)[p_observations->get_vector_length(dim)-1], p_observations->get_feature(dim,p_observations->get_vector_length(dim)-1)) + 1 );
02196
02197 P[estimate->PATH(dim)[0]]++;
02198 Q[estimate->PATH(dim)[p_observations->get_vector_length(dim)-1]]++;
02199 }
02200
02201 #ifdef USE_HMMPARALLEL
02202 delete[] threads ;
02203 delete[] params ;
02204 #endif
02205
02206 allpatprob/=p_observations->get_num_vectors() ;
02207 estimate->all_pat_prob=allpatprob ;
02208 estimate->all_path_prob_updated=true ;
02209
02210
02211 for (i=0; i<N; i++)
02212 {
02213 sum=0;
02214 for (j=0; j<N; j++)
02215 sum+=get_A(i,j);
02216
02217 for (j=0; j<N; j++)
02218 set_a(i,j, log(get_A(i,j)/sum));
02219 }
02220
02221
02222 for (i=0; i<N; i++)
02223 {
02224 sum=0;
02225 for (j=0; j<M; j++)
02226 sum+=get_B(i,j);
02227
02228 for (j=0; j<M; j++)
02229 set_b(i,j, log(get_B(i, j)/sum));
02230 }
02231
02232
02233 sum=0;
02234 for (i=0; i<N; i++)
02235 sum+=P[i];
02236
02237 for (i=0; i<N; i++)
02238 set_p(i, log(P[i]/sum));
02239
02240
02241 sum=0;
02242 for (i=0; i<N; i++)
02243 sum+=Q[i];
02244
02245 for (i=0; i<N; i++)
02246 set_q(i, log(Q[i]/sum));
02247
02248
02249 invalidate_model();
02250 }
02251
02252
02253 void CHMM::estimate_model_viterbi_defined(CHMM* estimate)
02254 {
02255 INT i,j,k,t;
02256 DREAL sum;
02257 DREAL* P=ARRAYN1(0);
02258 DREAL* Q=ARRAYN2(0);
02259
02260 path_deriv_updated=false ;
02261
02262
02263 for (i=0; i<N; i++)
02264 {
02265 for (j=0; j<N; j++)
02266 set_A(i,j, PSEUDO);
02267
02268 for (j=0; j<M; j++)
02269 set_B(i,j, PSEUDO);
02270
02271 P[i]=PSEUDO;
02272 Q[i]=PSEUDO;
02273 }
02274
02275 #ifdef USE_HMMPARALLEL
02276 pthread_t *threads=new pthread_t[parallel.get_num_threads()] ;
02277 S_THREAD_PARAM *params=new S_THREAD_PARAM[parallel.get_num_threads()] ;
02278 #endif
02279
02280 DREAL allpatprob=0.0 ;
02281 for (INT dim=0; dim<p_observations->get_num_vectors(); dim++)
02282 {
02283
02284 #ifdef USE_HMMPARALLEL
02285 if (dim%parallel.get_num_threads()==0)
02286 {
02287 INT i ;
02288 for (i=0; i<parallel.get_num_threads(); i++)
02289 if (dim+i<p_observations->get_num_vectors())
02290 {
02291 params[i].hmm=estimate ;
02292 params[i].dim=dim+i ;
02293 #ifdef SUNOS
02294 thr_create(NULL,0,vit_dim_prefetch, (void*)¶ms[i], PTHREAD_SCOPE_SYSTEM, &threads[i]) ;
02295 #else // SUNOS
02296 pthread_create(&threads[i], NULL, vit_dim_prefetch, (void*)¶ms[i]) ;
02297 #endif //SUNOS
02298 } ;
02299 for (i=0; i<parallel.get_num_threads(); i++)
02300 if (dim+i<p_observations->get_num_vectors())
02301 {
02302 void * ret ;
02303 pthread_join(threads[i], &ret) ;
02304 allpatprob += params[i].ret ;
02305 } ;
02306 } ;
02307 #else // USE_HMMPARALLEL
02308
02309 allpatprob += estimate->best_path(dim);
02310 #endif // USE_HMMPARALLEL
02311
02312
02313
02314 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
02315 {
02316 set_A(estimate->PATH(dim)[t], estimate->PATH(dim)[t+1], get_A(estimate->PATH(dim)[t], estimate->PATH(dim)[t+1])+1);
02317 set_B(estimate->PATH(dim)[t], p_observations->get_feature(dim,t), get_B(estimate->PATH(dim)[t], p_observations->get_feature(dim,t))+1);
02318 }
02319
02320 set_B(estimate->PATH(dim)[p_observations->get_vector_length(dim)-1], p_observations->get_feature(dim,p_observations->get_vector_length(dim)-1), get_B(estimate->PATH(dim)[p_observations->get_vector_length(dim)-1], p_observations->get_feature(dim,p_observations->get_vector_length(dim)-1)) + 1 );
02321
02322 P[estimate->PATH(dim)[0]]++;
02323 Q[estimate->PATH(dim)[p_observations->get_vector_length(dim)-1]]++;
02324 }
02325
02326 #ifdef USE_HMMPARALLEL
02327 delete[] threads ;
02328 delete[] params ;
02329 #endif
02330
02331
02332
02333
02334 allpatprob/=p_observations->get_num_vectors() ;
02335 estimate->all_pat_prob=allpatprob ;
02336 estimate->all_path_prob_updated=true ;
02337
02338
02339
02340 for (i=0; i<N; i++)
02341 {
02342 for (j=0; j<N; j++)
02343 set_a(i,j, estimate->get_a(i,j));
02344
02345 for (j=0; j<M; j++)
02346 set_b(i,j, estimate->get_b(i,j));
02347 }
02348
02349
02350 i=0;
02351 sum=0;
02352 j=model->get_learn_a(i,0);
02353 k=i;
02354 while (model->get_learn_a(i,0)!=-1 || k<i)
02355 {
02356 if (j==model->get_learn_a(i,0))
02357 {
02358 sum+=get_A(model->get_learn_a(i,0), model->get_learn_a(i,1));
02359 i++;
02360 }
02361 else
02362 {
02363 while (k<i)
02364 {
02365 set_a(model->get_learn_a(k,0), model->get_learn_a(k,1), log (get_A(model->get_learn_a(k,0), model->get_learn_a(k,1)) / sum));
02366 k++;
02367 }
02368
02369 sum=0;
02370 j=model->get_learn_a(i,0);
02371 k=i;
02372 }
02373 }
02374
02375
02376 i=0;
02377 sum=0;
02378 j=model->get_learn_b(i,0);
02379 k=i;
02380 while (model->get_learn_b(i,0)!=-1 || k<i)
02381 {
02382 if (j==model->get_learn_b(i,0))
02383 {
02384 sum+=get_B(model->get_learn_b(i,0),model->get_learn_b(i,1));
02385 i++;
02386 }
02387 else
02388 {
02389 while (k<i)
02390 {
02391 set_b(model->get_learn_b(k,0),model->get_learn_b(k,1), log (get_B(model->get_learn_b(k,0), model->get_learn_b(k,1)) / sum));
02392 k++;
02393 }
02394
02395 sum=0;
02396 j=model->get_learn_b(i,0);
02397 k=i;
02398 }
02399 }
02400
02401 i=0;
02402 sum=0;
02403 while (model->get_learn_p(i)!=-1)
02404 {
02405 sum+=P[model->get_learn_p(i)] ;
02406 i++ ;
02407 } ;
02408 i=0 ;
02409 while (model->get_learn_p(i)!=-1)
02410 {
02411 set_p(model->get_learn_p(i), log(P[model->get_learn_p(i)]/sum));
02412 i++ ;
02413 } ;
02414
02415 i=0;
02416 sum=0;
02417 while (model->get_learn_q(i)!=-1)
02418 {
02419 sum+=Q[model->get_learn_q(i)] ;
02420 i++ ;
02421 } ;
02422 i=0 ;
02423 while (model->get_learn_q(i)!=-1)
02424 {
02425 set_q(model->get_learn_q(i), log(Q[model->get_learn_q(i)]/sum));
02426 i++ ;
02427 } ;
02428
02429
02430
02431 invalidate_model();
02432 }
02433 #endif // NOVIT
02434
02435
02436
02437
02438 void CHMM::output_model(bool verbose)
02439 {
02440 INT i,j;
02441 DREAL checksum;
02442
02443
02444 SG_INFO( "log(Pr[O|model])=%e, #states: %i, #observationssymbols: %i, #observations: %ix%i\n",
02445 (double)((p_observations) ? model_probability() : -CMath::INFTY),
02446 N, M, ((p_observations) ? p_observations->get_max_vector_length() : 0), ((p_observations) ? p_observations->get_num_vectors() : 0));
02447
02448 if (verbose)
02449 {
02450
02451 SG_INFO( "\ntransition matrix\n");
02452 for (i=0; i<N; i++)
02453 {
02454 checksum= get_q(i);
02455 for (j=0; j<N; j++)
02456 {
02457 checksum= CMath::logarithmic_sum(checksum, get_a(i,j));
02458
02459 SG_INFO( "a(%02i,%02i)=%1.4f ",i,j, (float) exp(get_a(i,j)));
02460
02461 if (j % 4 == 3)
02462 SG_PRINT( "\n");
02463 }
02464 if (fabs(checksum)>1e-5)
02465 SG_DEBUG( " checksum % E ******* \n",checksum);
02466 else
02467 SG_DEBUG( " checksum % E\n",checksum);
02468 }
02469
02470
02471 SG_INFO( "\ndistribution of start states\n");
02472 checksum=-CMath::INFTY;
02473 for (i=0; i<N; i++)
02474 {
02475 checksum= CMath::logarithmic_sum(checksum, get_p(i));
02476 SG_INFO( "p(%02i)=%1.4f ",i, (float) exp(get_p(i)));
02477 if (i % 4 == 3)
02478 SG_PRINT( "\n");
02479 }
02480 if (fabs(checksum)>1e-5)
02481 SG_DEBUG( " checksum % E ******* \n",checksum);
02482 else
02483 SG_DEBUG( " checksum=% E\n", checksum);
02484
02485
02486 SG_INFO( "\ndistribution of terminal states\n");
02487 checksum=-CMath::INFTY;
02488 for (i=0; i<N; i++)
02489 {
02490 checksum= CMath::logarithmic_sum(checksum, get_q(i));
02491 SG_INFO("q(%02i)=%1.4f ",i, (float) exp(get_q(i)));
02492 if (i % 4 == 3)
02493 SG_INFO("\n");
02494 }
02495 if (fabs(checksum)>1e-5)
02496 SG_DEBUG( " checksum % E ******* \n",checksum);
02497 else
02498 SG_DEBUG( " checksum=% E\n", checksum);
02499
02500
02501 SG_INFO("\ndistribution of observations given the state\n");
02502 for (i=0; i<N; i++)
02503 {
02504 checksum=-CMath::INFTY;
02505 for (j=0; j<M; j++)
02506 {
02507 checksum=CMath::logarithmic_sum(checksum, get_b(i,j));
02508 SG_INFO("b(%02i,%02i)=%1.4f ",i,j, (float) exp(get_b(i,j)));
02509 if (j % 4 == 3)
02510 SG_PRINT("\n");
02511 }
02512 if (fabs(checksum)>1e-5)
02513 SG_DEBUG( " checksum % E ******* \n",checksum);
02514 else
02515 SG_DEBUG( " checksum % E\n",checksum);
02516 }
02517 }
02518 SG_PRINT("\n");
02519 }
02520
02521
02522 void CHMM::output_model_defined(bool verbose)
02523 {
02524 INT i,j;
02525 if (!model)
02526 return ;
02527
02528
02529 SG_INFO("log(Pr[O|model])=%e, #states: %i, #observationssymbols: %i, #observations: %ix%i\n",
02530 (double)((p_observations) ? model_probability() : -CMath::INFTY),
02531 N, M, ((p_observations) ? p_observations->get_max_vector_length() : 0), ((p_observations) ? p_observations->get_num_vectors() : 0));
02532
02533 if (verbose)
02534 {
02535
02536 SG_INFO("\ntransition matrix\n");
02537
02538
02539 i=0;
02540 j=model->get_learn_a(i,0);
02541 while (model->get_learn_a(i,0)!=-1)
02542 {
02543 if (j!=model->get_learn_a(i,0))
02544 {
02545 j=model->get_learn_a(i,0);
02546 SG_PRINT("\n");
02547 }
02548
02549 SG_INFO("a(%02i,%02i)=%1.4f ",model->get_learn_a(i,0), model->get_learn_a(i,1), (float) exp(get_a(model->get_learn_a(i,0), model->get_learn_a(i,1))));
02550 i++;
02551 }
02552
02553
02554 SG_INFO("\n\ndistribution of observations given the state\n");
02555 i=0;
02556 j=model->get_learn_b(i,0);
02557 while (model->get_learn_b(i,0)!=-1)
02558 {
02559 if (j!=model->get_learn_b(i,0))
02560 {
02561 j=model->get_learn_b(i,0);
02562 SG_PRINT("\n");
02563 }
02564
02565 SG_INFO("b(%02i,%02i)=%1.4f ",model->get_learn_b(i,0),model->get_learn_b(i,1), (float) exp(get_b(model->get_learn_b(i,0),model->get_learn_b(i,1))));
02566 i++;
02567 }
02568
02569 SG_PRINT("\n");
02570 }
02571 SG_PRINT("\n");
02572 }
02573
02574
02575
02576
02577 void CHMM::convert_to_log()
02578 {
02579 INT i,j;
02580
02581 for (i=0; i<N; i++)
02582 {
02583 if (get_p(i)!=0)
02584 set_p(i, log(get_p(i)));
02585 else
02586 set_p(i, -CMath::INFTY);;
02587 }
02588
02589 for (i=0; i<N; i++)
02590 {
02591 if (get_q(i)!=0)
02592 set_q(i, log(get_q(i)));
02593 else
02594 set_q(i, -CMath::INFTY);;
02595 }
02596
02597 for (i=0; i<N; i++)
02598 {
02599 for (j=0; j<N; j++)
02600 {
02601 if (get_a(i,j)!=0)
02602 set_a(i,j, log(get_a(i,j)));
02603 else
02604 set_a(i,j, -CMath::INFTY);
02605 }
02606 }
02607
02608 for (i=0; i<N; i++)
02609 {
02610 for (j=0; j<M; j++)
02611 {
02612 if (get_b(i,j)!=0)
02613 set_b(i,j, log(get_b(i,j)));
02614 else
02615 set_b(i,j, -CMath::INFTY);
02616 }
02617 }
02618 loglikelihood=true;
02619
02620 invalidate_model();
02621 }
02622
02623
02624 void CHMM::init_model_random()
02625 {
02626 const DREAL MIN_RAND=23e-3;
02627
02628 DREAL sum;
02629 INT i,j;
02630
02631
02632 for (i=0; i<N; i++)
02633 {
02634 sum=0;
02635 for (j=0; j<N; j++)
02636 {
02637 set_a(i,j, CMath::random(MIN_RAND, 1.0));
02638
02639 sum+=get_a(i,j);
02640 }
02641
02642 for (j=0; j<N; j++)
02643 set_a(i,j, get_a(i,j)/sum);
02644 }
02645
02646
02647 sum=0;
02648 for (i=0; i<N; i++)
02649 {
02650 set_p(i, CMath::random(MIN_RAND, 1.0));
02651
02652 sum+=get_p(i);
02653 }
02654
02655 for (i=0; i<N; i++)
02656 set_p(i, get_p(i)/sum);
02657
02658
02659 sum=0;
02660 for (i=0; i<N; i++)
02661 {
02662 set_q(i, CMath::random(MIN_RAND, 1.0));
02663
02664 sum+=get_q(i);
02665 }
02666
02667 for (i=0; i<N; i++)
02668 set_q(i, get_q(i)/sum);
02669
02670
02671 for (i=0; i<N; i++)
02672 {
02673 sum=0;
02674 for (j=0; j<M; j++)
02675 {
02676 set_b(i,j, CMath::random(MIN_RAND, 1.0));
02677
02678 sum+=get_b(i,j);
02679 }
02680
02681 for (j=0; j<M; j++)
02682 set_b(i,j, get_b(i,j)/sum);
02683 }
02684
02685
02686 invalidate_model();
02687 }
02688
02689
02690 void CHMM::init_model_defined()
02691 {
02692 INT i,j,k,r;
02693 DREAL sum;
02694 const DREAL MIN_RAND=23e-3;
02695
02696
02697 for (i=0; i<N; i++)
02698 for (j=0; j<N; j++)
02699 set_a(i,j, 0);
02700
02701
02702 for (i=0; i<N; i++)
02703 set_p(i, 0);
02704
02705
02706 for (i=0; i<N; i++)
02707 set_q(i, 0);
02708
02709
02710 for (i=0; i<N; i++)
02711 for (j=0; j<M; j++)
02712 set_b(i,j, 0);
02713
02714
02715
02716 DREAL *R=new DREAL[N] ;
02717 for (r=0; r<N; r++) R[r]=CMath::random(MIN_RAND,1.0);
02718 i=0; sum=0; k=i;
02719 j=model->get_learn_a(i,0);
02720 while (model->get_learn_a(i,0)!=-1 || k<i)
02721 {
02722 if (j==model->get_learn_a(i,0))
02723 {
02724 sum+=R[model->get_learn_a(i,1)] ;
02725 i++;
02726 }
02727 else
02728 {
02729 while (k<i)
02730 {
02731 set_a(model->get_learn_a(k,0), model->get_learn_a(k,1),
02732 R[model->get_learn_a(k,1)]/sum);
02733 k++;
02734 }
02735 j=model->get_learn_a(i,0);
02736 k=i;
02737 sum=0;
02738 for (r=0; r<N; r++) R[r]=CMath::random(MIN_RAND,1.0);
02739 }
02740 }
02741 delete[] R ; R=NULL ;
02742
02743
02744 R=new DREAL[M] ;
02745 for (r=0; r<M; r++) R[r]=CMath::random(MIN_RAND,1.0);
02746 i=0; sum=0; k=0 ;
02747 j=model->get_learn_b(i,0);
02748 while (model->get_learn_b(i,0)!=-1 || k<i)
02749 {
02750 if (j==model->get_learn_b(i,0))
02751 {
02752 sum+=R[model->get_learn_b(i,1)] ;
02753 i++;
02754 }
02755 else
02756 {
02757 while (k<i)
02758 {
02759 set_b(model->get_learn_b(k,0),model->get_learn_b(k,1),
02760 R[model->get_learn_b(k,1)]/sum);
02761 k++;
02762 }
02763
02764 j=model->get_learn_b(i,0);
02765 k=i;
02766 sum=0;
02767 for (r=0; r<M; r++) R[r]=CMath::random(MIN_RAND,1.0);
02768 }
02769 }
02770 delete[] R ; R=NULL ;
02771
02772
02773 i=0;
02774 while (model->get_const_a(i,0) != -1)
02775 {
02776 set_a(model->get_const_a(i,0), model->get_const_a(i,1), model->get_const_a_val(i));
02777 i++;
02778 }
02779
02780
02781 i=0;
02782 while (model->get_const_b(i,0) != -1)
02783 {
02784 set_b(model->get_const_b(i,0), model->get_const_b(i,1), model->get_const_b_val(i));
02785 i++;
02786 }
02787
02788
02789 i=0;
02790 while (model->get_const_p(i) != -1)
02791 {
02792 set_p(model->get_const_p(i), model->get_const_p_val(i));
02793 i++;
02794 }
02795
02796
02797 for (i=0; i<N; i++)
02798 set_q(i, 0.0);
02799
02800
02801 i=0;
02802 while (model->get_const_q(i) != -1)
02803 {
02804 set_q(model->get_const_q(i), model->get_const_q_val(i));
02805 i++;
02806 }
02807
02808
02809 i=0;
02810 sum=0;
02811 while (model->get_learn_p(i)!=-1)
02812 {
02813 set_p(model->get_learn_p(i),CMath::random(MIN_RAND,1.0)) ;
02814 sum+=get_p(model->get_learn_p(i)) ;
02815 i++ ;
02816 } ;
02817 i=0 ;
02818 while (model->get_learn_p(i)!=-1)
02819 {
02820 set_p(model->get_learn_p(i), get_p(model->get_learn_p(i))/sum);
02821 i++ ;
02822 } ;
02823
02824
02825 i=0;
02826 sum=0;
02827 while (model->get_learn_q(i)!=-1)
02828 {
02829 set_q(model->get_learn_q(i),CMath::random(MIN_RAND,1.0)) ;
02830 sum+=get_q(model->get_learn_q(i)) ;
02831 i++ ;
02832 } ;
02833 i=0 ;
02834 while (model->get_learn_q(i)!=-1)
02835 {
02836 set_q(model->get_learn_q(i), get_q(model->get_learn_q(i))/sum);
02837 i++ ;
02838 } ;
02839
02840
02841 invalidate_model();
02842 }
02843
02844 void CHMM::clear_model()
02845 {
02846 INT i,j;
02847 for (i=0; i<N; i++)
02848 {
02849 set_p(i, log(PSEUDO));
02850 set_q(i, log(PSEUDO));
02851
02852 for (j=0; j<N; j++)
02853 set_a(i,j, log(PSEUDO));
02854
02855 for (j=0; j<M; j++)
02856 set_b(i,j, log(PSEUDO));
02857 }
02858 }
02859
02860 void CHMM::clear_model_defined()
02861 {
02862 INT i,j,k;
02863
02864 for (i=0; (j=model->get_learn_p(i))!=-1; i++)
02865 set_p(j, log(PSEUDO));
02866
02867 for (i=0; (j=model->get_learn_q(i))!=-1; i++)
02868 set_q(j, log(PSEUDO));
02869
02870 for (i=0; (j=model->get_learn_a(i,0))!=-1; i++)
02871 {
02872 k=model->get_learn_a(i,1);
02873 set_a(j,k, log(PSEUDO));
02874 }
02875
02876 for (i=0; (j=model->get_learn_b(i,0))!=-1; i++)
02877 {
02878 k=model->get_learn_b(i,1);
02879 set_b(j,k, log(PSEUDO));
02880 }
02881 }
02882
02883 void CHMM::copy_model(CHMM* l)
02884 {
02885 INT i,j;
02886 for (i=0; i<N; i++)
02887 {
02888 set_p(i, l->get_p(i));
02889 set_q(i, l->get_q(i));
02890
02891 for (j=0; j<N; j++)
02892 set_a(i,j, l->get_a(i,j));
02893
02894 for (j=0; j<M; j++)
02895 set_b(i,j, l->get_b(i,j));
02896 }
02897 }
02898
02899 void CHMM::invalidate_model()
02900 {
02901
02902 this->mod_prob=0.0;
02903 this->mod_prob_updated=false;
02904
02905 if (mem_initialized)
02906 {
02907 if (trans_list_forward_cnt)
02908 delete[] trans_list_forward_cnt ;
02909 trans_list_forward_cnt=NULL ;
02910 if (trans_list_backward_cnt)
02911 delete[] trans_list_backward_cnt ;
02912 trans_list_backward_cnt=NULL ;
02913 if (trans_list_forward)
02914 {
02915 for (INT i=0; i<trans_list_len; i++)
02916 if (trans_list_forward[i])
02917 delete[] trans_list_forward[i] ;
02918 delete[] trans_list_forward ;
02919 trans_list_forward=NULL ;
02920 }
02921 if (trans_list_backward)
02922 {
02923 for (INT i=0; i<trans_list_len; i++)
02924 if (trans_list_backward[i])
02925 delete[] trans_list_backward[i] ;
02926 delete[] trans_list_backward ;
02927 trans_list_backward = NULL ;
02928 } ;
02929
02930 trans_list_len = N ;
02931 trans_list_forward = new T_STATES*[N] ;
02932 trans_list_forward_cnt = new T_STATES[N] ;
02933
02934 for (INT j=0; j<N; j++)
02935 {
02936 trans_list_forward_cnt[j]= 0 ;
02937 trans_list_forward[j]= new T_STATES[N] ;
02938 for (INT i=0; i<N; i++)
02939 if (get_a(i,j)>CMath::ALMOST_NEG_INFTY)
02940 {
02941 trans_list_forward[j][trans_list_forward_cnt[j]]=i ;
02942 trans_list_forward_cnt[j]++ ;
02943 }
02944 } ;
02945
02946 trans_list_backward = new T_STATES*[N] ;
02947 trans_list_backward_cnt = new T_STATES[N] ;
02948
02949 for (INT i=0; i<N; i++)
02950 {
02951 trans_list_backward_cnt[i]= 0 ;
02952 trans_list_backward[i]= new T_STATES[N] ;
02953 for (INT j=0; j<N; j++)
02954 if (get_a(i,j)>CMath::ALMOST_NEG_INFTY)
02955 {
02956 trans_list_backward[i][trans_list_backward_cnt[i]]=j ;
02957 trans_list_backward_cnt[i]++ ;
02958 }
02959 } ;
02960 } ;
02961 #ifndef NOVIT
02962 this->all_pat_prob=0.0;
02963 this->pat_prob=0.0;
02964 this->path_deriv_updated=false ;
02965 this->path_deriv_dimension=-1 ;
02966 this->all_path_prob_updated=false;
02967 #endif //NOVIT
02968
02969 #ifdef USE_HMMPARALLEL_STRUCTURES
02970 {
02971 for (INT i=0; i<parallel.get_num_threads(); i++)
02972 {
02973 this->alpha_cache[i].updated=false;
02974 this->beta_cache[i].updated=false;
02975 #ifndef NOVIT
02976 path_prob_updated[i]=false ;
02977 path_prob_dimension[i]=-1 ;
02978 #endif // NOVIT
02979 } ;
02980 }
02981 #else // USE_HMMPARALLEL_STRUCTURES
02982 this->alpha_cache.updated=false;
02983 this->beta_cache.updated=false;
02984 #ifndef NOVIT
02985 this->path_prob_dimension=-1;
02986 this->path_prob_updated=false;
02987 #endif //NOVIT
02988
02989 #endif // USE_HMMPARALLEL_STRUCTURES
02990 }
02991
02992 void CHMM::open_bracket(FILE* file)
02993 {
02994 INT value;
02995 while (((value=fgetc(file)) != EOF) && (value!='['))
02996 {
02997 if (value=='\n')
02998 line++;
02999 }
03000
03001 if (value==EOF)
03002 error(line, "expected \"[\" in input file");
03003
03004 while (((value=fgetc(file)) != EOF) && (isspace(value)))
03005 {
03006 if (value=='\n')
03007 line++;
03008 }
03009
03010 ungetc(value, file);
03011 }
03012
03013 void CHMM::close_bracket(FILE* file)
03014 {
03015 INT value;
03016 while (((value=fgetc(file)) != EOF) && (value!=']'))
03017 {
03018 if (value=='\n')
03019 line++;
03020 }
03021
03022 if (value==EOF)
03023 error(line, "expected \"]\" in input file");
03024 }
03025
03026 bool CHMM::comma_or_space(FILE* file)
03027 {
03028 INT value;
03029 while (((value=fgetc(file)) != EOF) && (value!=',') && (value!=';') && (value!=']'))
03030 {
03031 if (value=='\n')
03032 line++;
03033 }
03034 if (value==']')
03035 {
03036 ungetc(value, file);
03037 SG_ERROR( "found ']' instead of ';' or ','\n") ;
03038 return false ;
03039 } ;
03040
03041 if (value==EOF)
03042 error(line, "expected \";\" or \",\" in input file");
03043
03044 while (((value=fgetc(file)) != EOF) && (isspace(value)))
03045 {
03046 if (value=='\n')
03047 line++;
03048 }
03049 ungetc(value, file);
03050 return true ;
03051 }
03052
03053 bool CHMM::get_numbuffer(FILE* file, CHAR* buffer, INT length)
03054 {
03055 signed char value;
03056
03057 while (((value=fgetc(file)) != EOF) &&
03058 !isdigit(value) && (value!='A')
03059 && (value!='C') && (value!='G') && (value!='T')
03060 && (value!='N') && (value!='n')
03061 && (value!='.') && (value!='-') && (value!='e') && (value!=']'))
03062 {
03063 if (value=='\n')
03064 line++;
03065 }
03066 if (value==']')
03067 {
03068 ungetc(value,file) ;
03069 return false ;
03070 } ;
03071 if (value!=EOF)
03072 {
03073 INT i=0;
03074 switch (value)
03075 {
03076 case 'A':
03077 value='0' +CAlphabet::B_A;
03078 break;
03079 case 'C':
03080 value='0' +CAlphabet::B_C;
03081 break;
03082 case 'G':
03083 value='0' +CAlphabet::B_G;
03084 break;
03085 case 'T':
03086 value='0' +CAlphabet::B_T;
03087 break;
03088 };
03089
03090 buffer[i++]=value;
03091
03092 while (((value=fgetc(file)) != EOF) &&
03093 (isdigit(value) || (value=='.') || (value=='-') || (value=='e')
03094 || (value=='A') || (value=='C') || (value=='G')|| (value=='T')
03095 || (value=='N') || (value=='n')) && (i<length))
03096 {
03097 switch (value)
03098 {
03099 case 'A':
03100 value='0' +CAlphabet::B_A;
03101 break;
03102 case 'C':
03103 value='0' +CAlphabet::B_C;
03104 break;
03105 case 'G':
03106 value='0' +CAlphabet::B_G;
03107 break;
03108 case 'T':
03109 value='0' +CAlphabet::B_T;
03110 break;
03111 case '1': case '2': case'3': case '4': case'5':
03112 case '6': case '7': case'8': case '9': case '0': break ;
03113 case '.': case 'e': case '-': break ;
03114 default:
03115 SG_ERROR( "found crap: %i %c (pos:%li)\n",i,value,ftell(file)) ;
03116 };
03117 buffer[i++]=value;
03118 }
03119 ungetc(value, file);
03120 buffer[i]='\0';
03121
03122 return (i<=length) && (i>0);
03123 }
03124 return false;
03125 }
03126
03127
03128
03129
03130
03131
03132
03133
03134
03135
03136
03137
03138
03139
03140
03141
03142
03143
03144
03145
03146
03147
03148
03149
03150
03151
03152
03153
03154
03155
03156
03157
03158
03159
03160
03161
03162 bool CHMM::load_model(FILE* file)
03163 {
03164 INT received_params=0;
03165
03166 bool result=false;
03167 E_STATE state=INITIAL;
03168 CHAR buffer[1024];
03169
03170 line=1;
03171 INT i,j;
03172
03173 if (file)
03174 {
03175 while (state!=END)
03176 {
03177 INT value=fgetc(file);
03178
03179 if (value=='\n')
03180 line++;
03181 if (value==EOF)
03182 state=END;
03183
03184 switch (state)
03185 {
03186 case INITIAL:
03187 if (value=='N')
03188 {
03189 if (received_params & GOTN)
03190 error(line, "in model file: \"p double defined\"");
03191 else
03192 state=GET_N;
03193 }
03194 else if (value=='M')
03195 {
03196 if (received_params & GOTM)
03197 error(line, "in model file: \"p double defined\"");
03198 else
03199 state=GET_M;
03200 }
03201 else if (value=='%')
03202 {
03203 state=COMMENT;
03204 }
03205 case ARRAYs:
03206 if (value=='p')
03207 {
03208 if (received_params & GOTp)
03209 error(line, "in model file: \"p double defined\"");
03210 else
03211 state=GET_p;
03212 }
03213 if (value=='q')
03214 {
03215 if (received_params & GOTq)
03216 error(line, "in model file: \"q double defined\"");
03217 else
03218 state=GET_q;
03219 }
03220 else if (value=='a')
03221 {
03222 if (received_params & GOTa)
03223 error(line, "in model file: \"a double defined\"");
03224 else
03225 state=GET_a;
03226 }
03227 else if (value=='b')
03228 {
03229 if (received_params & GOTb)
03230 error(line, "in model file: \"b double defined\"");
03231 else
03232 state=GET_b;
03233 }
03234 else if (value=='%')
03235 {
03236 state=COMMENT;
03237 }
03238 break;
03239 case GET_N:
03240 if (value=='=')
03241 {
03242 if (get_numbuffer(file, buffer, 4))
03243 {
03244 this->N= atoi(buffer);
03245 received_params|=GOTN;
03246 state= (received_params == (GOTN | GOTM | GOTO)) ? ARRAYs : INITIAL;
03247 }
03248 else
03249 state=END;
03250 }
03251 break;
03252 case GET_M:
03253 if (value=='=')
03254 {
03255 if (get_numbuffer(file, buffer, 4))
03256 {
03257 this->M= atoi(buffer);
03258 received_params|=GOTM;
03259 state= (received_params == (GOTN | GOTM | GOTO)) ? ARRAYs : INITIAL;
03260 }
03261 else
03262 state=END;
03263 }
03264 break;
03265 case GET_a:
03266 if (value=='=')
03267 {
03268 double f;
03269
03270 transition_matrix_a=new DREAL[N*N];
03271 open_bracket(file);
03272 for (i=0; i<this->N; i++)
03273 {
03274 open_bracket(file);
03275
03276 for (j=0; j<this->N ; j++)
03277 {
03278
03279 if (fscanf( file, "%le", &f ) != 1)
03280 error(line, "double expected");
03281 else
03282 set_a(i,j, f);
03283
03284 if (j<this->N-1)
03285 comma_or_space(file);
03286 else
03287 close_bracket(file);
03288 }
03289
03290 if (i<this->N-1)
03291 comma_or_space(file);
03292 else
03293 close_bracket(file);
03294 }
03295 received_params|=GOTa;
03296 }
03297 state= (received_params == (GOTa | GOTb | GOTp | GOTq)) ? END : ARRAYs;
03298 break;
03299 case GET_b:
03300 if (value=='=')
03301 {
03302 double f;
03303
03304 observation_matrix_b=new DREAL[N*M];
03305 open_bracket(file);
03306 for (i=0; i<this->N; i++)
03307 {
03308 open_bracket(file);
03309
03310 for (j=0; j<this->M ; j++)
03311 {
03312
03313 if (fscanf( file, "%le", &f ) != 1)
03314 error(line, "double expected");
03315 else
03316 set_b(i,j, f);
03317
03318 if (j<this->M-1)
03319 comma_or_space(file);
03320 else
03321 close_bracket(file);
03322 }
03323
03324 if (i<this->N-1)
03325 comma_or_space(file);
03326 else
03327 close_bracket(file);
03328 }
03329 received_params|=GOTb;
03330 }
03331 state= ((received_params & (GOTa | GOTb | GOTp | GOTq)) == (GOTa | GOTb | GOTp | GOTq)) ? END : ARRAYs;
03332 break;
03333 case GET_p:
03334 if (value=='=')
03335 {
03336 double f;
03337
03338 initial_state_distribution_p=new DREAL[N];
03339 open_bracket(file);
03340 for (i=0; i<this->N ; i++)
03341 {
03342 if (fscanf( file, "%le", &f ) != 1)
03343 error(line, "double expected");
03344 else
03345 set_p(i, f);
03346
03347 if (i<this->N-1)
03348 comma_or_space(file);
03349 else
03350 close_bracket(file);
03351 }
03352 received_params|=GOTp;
03353 }
03354 state= (received_params == (GOTa | GOTb | GOTp | GOTq)) ? END : ARRAYs;
03355 break;
03356 case GET_q:
03357 if (value=='=')
03358 {
03359 double f;
03360
03361 end_state_distribution_q=new DREAL[N];
03362 open_bracket(file);
03363 for (i=0; i<this->N ; i++)
03364 {
03365 if (fscanf( file, "%le", &f ) != 1)
03366 error(line, "double expected");
03367 else
03368 set_q(i, f);
03369
03370 if (i<this->N-1)
03371 comma_or_space(file);
03372 else
03373 close_bracket(file);
03374 }
03375 received_params|=GOTq;
03376 }
03377 state= (received_params == (GOTa | GOTb | GOTp | GOTq)) ? END : ARRAYs;
03378 break;
03379 case COMMENT:
03380 if (value==EOF)
03381 state=END;
03382 else if (value=='\n')
03383 {
03384 line++;
03385 state=INITIAL;
03386 }
03387 break;
03388
03389 default:
03390 break;
03391 }
03392 }
03393 result= (received_params== (GOTa | GOTb | GOTp | GOTq | GOTN | GOTM | GOTO));
03394 }
03395
03396 SG_WARNING( "not normalizing anymore, call normalize_hmm to make sure the hmm is valid!!\n");
03398 return result;
03399 }
03400
03401
03402
03403
03404
03405
03406
03407
03408
03409
03410
03411
03412
03413
03414
03415
03416
03417
03418
03419
03420
03421
03422
03423
03424
03425
03426
03427
03428
03429
03430
03431
03432
03433
03434
03435
03436
03437
03438
03439
03440
03441
03442
03443
03444
03445
03446
03447
03448
03449
03450
03451
03452
03453
03454
03455
03456
03457
03458
03459
03460 bool CHMM::load_definitions(FILE* file, bool verbose, bool init)
03461 {
03462 if (model)
03463 delete model ;
03464 model=new CModel();
03465
03466 INT received_params=0x0000000;
03467 CHAR buffer[1024];
03468
03469 bool result=false;
03470 E_STATE state=INITIAL;
03471
03472 {
03473 model->set_learn_a(0, -1);
03474 model->set_learn_a(1, -1);
03475 model->set_const_a(0, -1);
03476 model->set_const_a(1, -1);
03477 model->set_const_a_val(0, 1.0);
03478 model->set_learn_b(0, -1);
03479 model->set_const_b(0, -1);
03480 model->set_const_b_val(0, 1.0);
03481 model->set_learn_p(0, -1);
03482 model->set_learn_q(0, -1);
03483 model->set_const_p(0, -1);
03484 model->set_const_q(0, -1);
03485 } ;
03486
03487 line=1;
03488
03489 if (file)
03490 {
03491 while (state!=END)
03492 {
03493 INT value=fgetc(file);
03494
03495 if (value=='\n')
03496 line++;
03497
03498 if (value==EOF)
03499 state=END;
03500
03501 switch (state)
03502 {
03503 case INITIAL:
03504 if (value=='l')
03505 {
03506 if (fgetc(file)=='e' && fgetc(file)=='a' && fgetc(file)=='r' && fgetc(file)=='n' && fgetc(file)=='_')
03507 {
03508 switch(fgetc(file))
03509 {
03510 case 'a':
03511 state=GET_learn_a;
03512 break;
03513 case 'b':
03514 state=GET_learn_b;
03515 break;
03516 case 'p':
03517 state=GET_learn_p;
03518 break;
03519 case 'q':
03520 state=GET_learn_q;
03521 break;
03522 default:
03523 error(line, "a,b,p or q expected in train definition file");
03524 };
03525 }
03526 }
03527 else if (value=='c')
03528 {
03529 if (fgetc(file)=='o' && fgetc(file)=='n' && fgetc(file)=='s'
03530 && fgetc(file)=='t' && fgetc(file)=='_')
03531 {
03532 switch(fgetc(file))
03533 {
03534 case 'a':
03535 state=GET_const_a;
03536 break;
03537 case 'b':
03538 state=GET_const_b;
03539 break;
03540 case 'p':
03541 state=GET_const_p;
03542 break;
03543 case 'q':
03544 state=GET_const_q;
03545 break;
03546 default:
03547 error(line, "a,b,p or q expected in train definition file");
03548 };
03549 }
03550 }
03551 else if (value=='%')
03552 {
03553 state=COMMENT;
03554 }
03555 else if (value==EOF)
03556 {
03557 state=END;
03558 }
03559 break;
03560 case GET_learn_a:
03561 if (value=='=')
03562 {
03563 open_bracket(file);
03564 bool finished=false;
03565 INT i=0;
03566
03567 if (verbose)
03568 SG_DEBUG( "\nlearn for transition matrix: ") ;
03569 while (!finished)
03570 {
03571 open_bracket(file);
03572
03573 if (get_numbuffer(file, buffer, 4))
03574 {
03575 value=atoi(buffer);
03576 model->set_learn_a(i++, value);
03577
03578 if (value<0)
03579 {
03580 finished=true;
03581 break;
03582 }
03583 if (value>=N)
03584 SG_ERROR( "invalid value for learn_a(%i,0): %i\n",i/2,(int)value) ;
03585 }
03586 else
03587 break;
03588
03589 comma_or_space(file);
03590
03591 if (get_numbuffer(file, buffer, 4))
03592 {
03593 value=atoi(buffer);
03594 model->set_learn_a(i++, value);
03595
03596 if (value<0)
03597 {
03598 finished=true;
03599 break;
03600 }
03601 if (value>=N)
03602 SG_ERROR( "invalid value for learn_a(%i,1): %i\n",i/2-1,(int)value) ;
03603
03604 }
03605 else
03606 break;
03607 close_bracket(file);
03608 }
03609 close_bracket(file);
03610 if (verbose)
03611 SG_DEBUG( "%i Entries",(int)(i/2)) ;
03612
03613 if (finished)
03614 {
03615 received_params|=GOTlearn_a;
03616
03617 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03618 }
03619 else
03620 state=END;
03621 }
03622 break;
03623 case GET_learn_b:
03624 if (value=='=')
03625 {
03626 open_bracket(file);
03627 bool finished=false;
03628 INT i=0;
03629
03630 if (verbose)
03631 SG_DEBUG( "\nlearn for emission matrix: ") ;
03632
03633 while (!finished)
03634 {
03635 open_bracket(file);
03636
03637 INT combine=0;
03638
03639 for (int j=0; j<2; j++)
03640 {
03641 if (get_numbuffer(file, buffer, 4))
03642 {
03643 value=atoi(buffer);
03644
03645 if (j==0)
03646 {
03647 model->set_learn_b(i++, value);
03648
03649 if (value<0)
03650 {
03651 finished=true;
03652 break;
03653 }
03654 if (value>=N)
03655 SG_ERROR( "invalid value for learn_b(%i,0): %i\n",i/2,(int)value) ;
03656 }
03657 else
03658 combine=value;
03659 }
03660 else
03661 break;
03662
03663 if (j<1)
03664 comma_or_space(file);
03665 else
03666 close_bracket(file);
03667 }
03668 model->set_learn_b(i++, combine);
03669 if (combine>=M)
03670
03671 SG_ERROR("invalid value for learn_b(%i,1): %i\n",i/2-1,(int)value) ;
03672 }
03673 close_bracket(file);
03674 if (verbose)
03675 SG_DEBUG( "%i Entries",(int)(i/2-1)) ;
03676
03677 if (finished)
03678 {
03679 received_params|=GOTlearn_b;
03680 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03681 }
03682 else
03683 state=END;
03684 }
03685 break;
03686 case GET_learn_p:
03687 if (value=='=')
03688 {
03689 open_bracket(file);
03690 bool finished=false;
03691 INT i=0;
03692
03693 if (verbose)
03694 SG_DEBUG( "\nlearn start states: ") ;
03695 while (!finished)
03696 {
03697 if (get_numbuffer(file, buffer, 4))
03698 {
03699 value=atoi(buffer);
03700
03701 model->set_learn_p(i++, value);
03702
03703 if (value<0)
03704 {
03705 finished=true;
03706 break;
03707 }
03708 if (value>=N)
03709 SG_ERROR( "invalid value for learn_p(%i): %i\n",i-1,(int)value) ;
03710 }
03711 else
03712 break;
03713
03714 comma_or_space(file);
03715 }
03716
03717 close_bracket(file);
03718 if (verbose)
03719 SG_DEBUG( "%i Entries",i-1) ;
03720
03721 if (finished)
03722 {
03723 received_params|=GOTlearn_p;
03724 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03725 }
03726 else
03727 state=END;
03728 }
03729 break;
03730 case GET_learn_q:
03731 if (value=='=')
03732 {
03733 open_bracket(file);
03734 bool finished=false;
03735 INT i=0;
03736
03737 if (verbose)
03738 SG_DEBUG( "\nlearn terminal states: ") ;
03739 while (!finished)
03740 {
03741 if (get_numbuffer(file, buffer, 4))
03742 {
03743 value=atoi(buffer);
03744 model->set_learn_q(i++, value);
03745
03746 if (value<0)
03747 {
03748 finished=true;
03749 break;
03750 }
03751 if (value>=N)
03752 SG_ERROR( "invalid value for learn_q(%i): %i\n",i-1,(int)value) ;
03753 }
03754 else
03755 break;
03756
03757 comma_or_space(file);
03758 }
03759
03760 close_bracket(file);
03761 if (verbose)
03762 SG_DEBUG( "%i Entries",i-1) ;
03763
03764 if (finished)
03765 {
03766 received_params|=GOTlearn_q;
03767 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03768 }
03769 else
03770 state=END;
03771 }
03772 break;
03773 case GET_const_a:
03774 if (value=='=')
03775 {
03776 open_bracket(file);
03777 bool finished=false;
03778 INT i=0;
03779
03780 if (verbose)
03781 #ifdef USE_HMMDEBUG
03782 SG_DEBUG( "\nconst for transition matrix: \n") ;
03783 #else
03784 SG_DEBUG( "\nconst for transition matrix: ") ;
03785 #endif
03786 while (!finished)
03787 {
03788 open_bracket(file);
03789
03790 if (get_numbuffer(file, buffer, 4))
03791 {
03792 value=atoi(buffer);
03793 model->set_const_a(i++, value);
03794
03795 if (value<0)
03796 {
03797 finished=true;
03798 model->set_const_a(i++, value);
03799 model->set_const_a_val((int)i/2 - 1, value);
03800 break;
03801 }
03802 if (value>=N)
03803 SG_ERROR( "invalid value for const_a(%i,0): %i\n",i/2,(int)value) ;
03804 }
03805 else
03806 break;
03807
03808 comma_or_space(file);
03809
03810 if (get_numbuffer(file, buffer, 4))
03811 {
03812 value=atoi(buffer);
03813 model->set_const_a(i++, value);
03814
03815 if (value<0)
03816 {
03817 finished=true;
03818 model->set_const_a_val((int)i/2 - 1, value);
03819 break;
03820 }
03821 if (value>=N)
03822 SG_ERROR( "invalid value for const_a(%i,1): %i\n",i/2-1,(int)value) ;
03823 }
03824 else
03825 break;
03826
03827 if (!comma_or_space(file))
03828 model->set_const_a_val((int)i/2 - 1, 1.0);
03829 else
03830 if (get_numbuffer(file, buffer, 10))
03831 {
03832 DREAL dvalue=atof(buffer);
03833 model->set_const_a_val((int)i/2 - 1, dvalue);
03834 if (dvalue<0)
03835 {
03836 finished=true;
03837 break;
03838 }
03839 if ((dvalue>1.0) || (dvalue<0.0))
03840 SG_ERROR( "invalid value for const_a_val(%i): %e\n",(int)i/2-1,dvalue) ;
03841 }
03842 else
03843 model->set_const_a_val((int)i/2 - 1, 1.0);
03844
03845 #ifdef USE_HMMDEBUG
03846 if (verbose)
03847 SG_ERROR("const_a(%i,%i)=%e\n", model->get_const_a((int)i/2-1,0),model->get_const_a((int)i/2-1,1),model->get_const_a_val((int)i/2-1)) ;
03848 #endif
03849 close_bracket(file);
03850 }
03851 close_bracket(file);
03852 if (verbose)
03853 SG_DEBUG( "%i Entries",(int)i/2-1) ;
03854
03855 if (finished)
03856 {
03857 received_params|=GOTconst_a;
03858 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03859 }
03860 else
03861 state=END;
03862 }
03863 break;
03864
03865 case GET_const_b:
03866 if (value=='=')
03867 {
03868 open_bracket(file);
03869 bool finished=false;
03870 INT i=0;
03871
03872 if (verbose)
03873 #ifdef USE_HMMDEBUG
03874 SG_DEBUG( "\nconst for emission matrix: \n") ;
03875 #else
03876 SG_DEBUG( "\nconst for emission matrix: ") ;
03877 #endif
03878 while (!finished)
03879 {
03880 open_bracket(file);
03881 INT combine=0;
03882 for (INT j=0; j<3; j++)
03883 {
03884 if (get_numbuffer(file, buffer, 10))
03885 {
03886 if (j==0)
03887 {
03888 value=atoi(buffer);
03889
03890 model->set_const_b(i++, value);
03891
03892 if (value<0)
03893 {
03894 finished=true;
03895
03896 break;
03897 }
03898 if (value>=N)
03899 SG_ERROR( "invalid value for const_b(%i,0): %i\n",i/2-1,(int)value) ;
03900 }
03901 else if (j==2)
03902 {
03903 DREAL dvalue=atof(buffer);
03904 model->set_const_b_val((int)(i-1)/2, dvalue);
03905 if (dvalue<0)
03906 {
03907 finished=true;
03908 break;
03909 } ;
03910 if ((dvalue>1.0) || (dvalue<0.0))
03911 SG_ERROR( "invalid value for const_b_val(%i,1): %e\n",i/2-1,dvalue) ;
03912 }
03913 else
03914 {
03915 value=atoi(buffer);
03916 combine= value;
03917 } ;
03918 }
03919 else
03920 {
03921 if (j==2)
03922 model->set_const_b_val((int)(i-1)/2, 1.0);
03923 break;
03924 } ;
03925 if (j<2)
03926 if ((!comma_or_space(file)) && (j==1))
03927 {
03928 model->set_const_b_val((int)(i-1)/2, 1.0) ;
03929 break ;
03930 } ;
03931 }
03932 close_bracket(file);
03933 model->set_const_b(i++, combine);
03934 if (combine>=M)
03935 SG_ERROR("invalid value for const_b(%i,1): %i\n",i/2-1, combine) ;
03936 #ifdef USE_HMMDEBUG
03937 if (verbose && !finished)
03938 SG_ERROR("const_b(%i,%i)=%e\n", model->get_const_b((int)i/2-1,0),model->get_const_b((int)i/2-1,1),model->get_const_b_val((int)i/2-1)) ;
03939 #endif
03940 }
03941 close_bracket(file);
03942 if (verbose)
03943 SG_ERROR( "%i Entries",(int)i/2-1) ;
03944
03945 if (finished)
03946 {
03947 received_params|=GOTconst_b;
03948 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03949 }
03950 else
03951 state=END;
03952 }
03953 break;
03954 case GET_const_p:
03955 if (value=='=')
03956 {
03957 open_bracket(file);
03958 bool finished=false;
03959 INT i=0;
03960
03961 if (verbose)
03962 #ifdef USE_HMMDEBUG
03963 SG_DEBUG( "\nconst for start states: \n") ;
03964 #else
03965 SG_DEBUG( "\nconst for start states: ") ;
03966 #endif
03967 while (!finished)
03968 {
03969 open_bracket(file);
03970
03971 if (get_numbuffer(file, buffer, 4))
03972 {
03973 value=atoi(buffer);
03974 model->set_const_p(i, value);
03975
03976 if (value<0)
03977 {
03978 finished=true;
03979 model->set_const_p_val(i++, value);
03980 break;
03981 }
03982 if (value>=N)
03983 SG_ERROR( "invalid value for const_p(%i): %i\n",i,(int)value) ;
03984
03985 }
03986 else
03987 break;
03988
03989 if (!comma_or_space(file))
03990 model->set_const_p_val(i++, 1.0);
03991 else
03992 if (get_numbuffer(file, buffer, 10))
03993 {
03994 DREAL dvalue=atof(buffer);
03995 model->set_const_p_val(i++, dvalue);
03996 if (dvalue<0)
03997 {
03998 finished=true;
03999 break;
04000 }
04001 if ((dvalue>1) || (dvalue<0))
04002 SG_ERROR( "invalid value for const_p_val(%i): %e\n",i,dvalue) ;
04003 }
04004 else
04005 model->set_const_p_val(i++, 1.0);
04006
04007 close_bracket(file);
04008
04009 #ifdef USE_HMMDEBUG
04010 if (verbose)
04011 SG_DEBUG("const_p(%i)=%e\n", model->get_const_p(i-1),model->get_const_p_val(i-1)) ;
04012 #endif
04013 }
04014 if (verbose)
04015 SG_DEBUG( "%i Entries",i-1) ;
04016
04017 close_bracket(file);
04018
04019 if (finished)
04020 {
04021 received_params|=GOTconst_p;
04022 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
04023 }
04024 else
04025 state=END;
04026 }
04027 break;
04028 case GET_const_q:
04029 if (value=='=')
04030 {
04031 open_bracket(file);
04032 bool finished=false;
04033 if (verbose)
04034 #ifdef USE_HMMDEBUG
04035 SG_DEBUG( "\nconst for terminal states: \n") ;
04036 #else
04037 SG_DEBUG( "\nconst for terminal states: ") ;
04038 #endif
04039 INT i=0;
04040
04041 while (!finished)
04042 {
04043 open_bracket(file) ;
04044 if (get_numbuffer(file, buffer, 4))
04045 {
04046 value=atoi(buffer);
04047 model->set_const_q(i, value);
04048 if (value<0)
04049 {
04050 finished=true;
04051 model->set_const_q_val(i++, value);
04052 break;
04053 }
04054 if (value>=N)
04055 SG_ERROR( "invalid value for const_q(%i): %i\n",i,(int)value) ;
04056 }
04057 else
04058 break;
04059
04060 if (!comma_or_space(file))
04061 model->set_const_q_val(i++, 1.0);
04062 else
04063 if (get_numbuffer(file, buffer, 10))
04064 {
04065 DREAL dvalue=atof(buffer);
04066 model->set_const_q_val(i++, dvalue);
04067 if (dvalue<0)
04068 {
04069 finished=true;
04070 break;
04071 }
04072 if ((dvalue>1) || (dvalue<0))
04073 SG_ERROR("invalid value for const_q_val(%i): %e\n",i,(double) dvalue) ;
04074 }
04075 else
04076 model->set_const_q_val(i++, 1.0);
04077
04078 close_bracket(file);
04079 #ifdef USE_HMMDEBUG
04080 if (verbose)
04081 SG_DEBUG("const_q(%i)=%e\n", model->get_const_q(i-1),model->get_const_q_val(i-1)) ;
04082 #endif
04083 }
04084 if (verbose)
04085 SG_DEBUG( "%i Entries",i-1) ;
04086
04087 close_bracket(file);
04088
04089 if (finished)
04090 {
04091 received_params|=GOTconst_q;
04092 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
04093 }
04094 else
04095 state=END;
04096 }
04097 break;
04098 case COMMENT:
04099 if (value==EOF)
04100 state=END;
04101 else if (value=='\n')
04102 state=INITIAL;
04103 break;
04104
04105 default:
04106 break;
04107 }
04108 }
04109 }
04110
04111
04112
04113
04114
04115 result=1 ;
04116 if (result)
04117 {
04118 model->sort_learn_a() ;
04119 model->sort_learn_b() ;
04120 if (init)
04121 {
04122 init_model_defined(); ;
04123 convert_to_log();
04124 } ;
04125 }
04126 if (verbose)
04127 SG_DEBUG( "\n") ;
04128 return result;
04129 }
04130
04131
04132
04133
04134
04135
04136
04137
04138
04139
04140
04141
04142
04143
04144
04145
04146
04147
04148
04149
04150
04151
04152
04153
04154
04155
04156
04157
04158
04159
04160
04161
04162
04163
04164
04165 bool CHMM::save_model(FILE* file)
04166 {
04167 bool result=false;
04168 INT i,j;
04169 const float NAN_REPLACEMENT = (float) CMath::ALMOST_NEG_INFTY ;
04170
04171 if (file)
04172 {
04173 fprintf(file,"%s","% HMM - specification\n% N - number of states\n% M - number of observation_tokens\n% a is state_transition_matrix\n% size(a)= [N,N]\n%\n% b is observation_per_state_matrix\n% size(b)= [N,M]\n%\n% p is initial distribution\n% size(p)= [1, N]\n\n% q is distribution of end states\n% size(q)= [1, N]\n");
04174 fprintf(file,"N=%d;\n",N);
04175 fprintf(file,"M=%d;\n",M);
04176
04177 fprintf(file,"p=[");
04178 for (i=0; i<N; i++)
04179 {
04180 if (i<N-1) {
04181 if (finite(get_p(i)))
04182 fprintf(file, "%e,", (double)get_p(i));
04183 else
04184 fprintf(file, "%f,", NAN_REPLACEMENT);
04185 }
04186 else {
04187 if (finite(get_p(i)))
04188 fprintf(file, "%e", (double)get_p(i));
04189 else
04190 fprintf(file, "%f", NAN_REPLACEMENT);
04191 }
04192 }
04193
04194 fprintf(file,"];\n\nq=[");
04195 for (i=0; i<N; i++)
04196 {
04197 if (i<N-1) {
04198 if (finite(get_q(i)))
04199 fprintf(file, "%e,", (double)get_q(i));
04200 else
04201 fprintf(file, "%f,", NAN_REPLACEMENT);
04202 }
04203 else {
04204 if (finite(get_q(i)))
04205 fprintf(file, "%e", (double)get_q(i));
04206 else
04207 fprintf(file, "%f", NAN_REPLACEMENT);
04208 }
04209 }
04210 fprintf(file,"];\n\na=[");
04211
04212 for (i=0; i<N; i++)
04213 {
04214 fprintf(file, "\t[");
04215
04216 for (j=0; j<N; j++)
04217 {
04218 if (j<N-1) {
04219 if (finite(get_a(i,j)))
04220 fprintf(file, "%e,", (double)get_a(i,j));
04221 else
04222 fprintf(file, "%f,", NAN_REPLACEMENT);
04223 }
04224 else {
04225 if (finite(get_a(i,j)))
04226 fprintf(file, "%e];\n", (double)get_a(i,j));
04227 else
04228 fprintf(file, "%f];\n", NAN_REPLACEMENT);
04229 }
04230 }
04231 }
04232
04233 fprintf(file," ];\n\nb=[");
04234
04235 for (i=0; i<N; i++)
04236 {
04237 fprintf(file, "\t[");
04238
04239 for (j=0; j<M; j++)
04240 {
04241 if (j<M-1) {
04242 if (finite(get_b(i,j)))
04243 fprintf(file, "%e,", (double)get_b(i,j));
04244 else
04245 fprintf(file, "%f,", NAN_REPLACEMENT);
04246 }
04247 else {
04248 if (finite(get_b(i,j)))
04249 fprintf(file, "%e];\n", (double)get_b(i,j));
04250 else
04251 fprintf(file, "%f];\n", NAN_REPLACEMENT);
04252 }
04253 }
04254 }
04255 result= (fprintf(file," ];\n") == 5);
04256 }
04257
04258 return result;
04259 }
04260
04261 #ifndef NOVIT
04262 T_STATES* CHMM::get_path(INT dim, DREAL& prob)
04263 {
04264 T_STATES* result = NULL;
04265
04266 prob = best_path(dim);
04267 result = new T_STATES[p_observations->get_vector_length(dim)];
04268
04269 for (INT i=0; i<p_observations->get_vector_length(dim); i++)
04270 result[i]=PATH(dim)[i];
04271
04272 return result;
04273 }
04274
04275 bool CHMM::save_path(FILE* file)
04276 {
04277 bool result=false;
04278
04279 if (file)
04280 {
04281 for (INT dim=0; dim<p_observations->get_num_vectors(); dim++)
04282 {
04283 if (dim%100==0)
04284 SG_PRINT( "%i..", dim) ;
04285 DREAL prob = best_path(dim);
04286 fprintf(file,"%i. path probability:%e\nstate sequence:\n", dim, prob);
04287 for (INT i=0; i<p_observations->get_vector_length(dim)-1; i++)
04288 fprintf(file,"%d ", PATH(dim)[i]);
04289 fprintf(file,"%d", PATH(dim)[p_observations->get_vector_length(dim)-1]);
04290 fprintf(file,"\n\n") ;
04291 }
04292 SG_DONE();
04293 result=true;
04294 }
04295
04296 return result;
04297 }
04298 #endif // NOVIT
04299
04300 bool CHMM::save_likelihood_bin(FILE* file)
04301 {
04302 bool result=false;
04303
04304 if (file)
04305 {
04306 for (INT dim=0; dim<p_observations->get_num_vectors(); dim++)
04307 {
04308 float prob= (float) model_probability(dim);
04309 fwrite(&prob, sizeof(float), 1, file);
04310 }
04311 result=true;
04312 }
04313
04314 return result;
04315 }
04316
04317 bool CHMM::save_likelihood(FILE* file)
04318 {
04319 bool result=false;
04320
04321 if (file)
04322 {
04323 fprintf(file, "%% likelihood of model per observation\n%% P[O|model]=[ P[O|model]_1 P[O|model]_2 ... P[O|model]_dim ]\n%%\n");
04324
04325 fprintf(file, "P=[");
04326 for (INT dim=0; dim<p_observations->get_num_vectors(); dim++)
04327 fprintf(file, "%e ", (double) model_probability(dim));
04328
04329 fprintf(file,"];");
04330 result=true;
04331 }
04332
04333 return result;
04334 }
04335
04336 #define FLOATWRITE(file, value) { float rrr=float(value); fwrite(&rrr, sizeof(float), 1, file); num_floats++;}
04337
04338 bool CHMM::save_model_bin(FILE* file)
04339 {
04340 INT i,j,q, num_floats=0 ;
04341 if (!model)
04342 {
04343 if (file)
04344 {
04345
04346 FLOATWRITE(file, (float)CMath::INFTY);
04347 FLOATWRITE(file, (float) 1);
04348
04349
04350 for (i=0; i<N; i++)
04351 FLOATWRITE(file, get_p(i));
04352 SG_INFO( "wrote %i parameters for p\n",N) ;
04353
04354 for (i=0; i<N; i++)
04355 FLOATWRITE(file, get_q(i)) ;
04356 SG_INFO( "wrote %i parameters for q\n",N) ;
04357
04358
04359 for (i=0; i<N; i++)
04360 for (j=0; j<N; j++)
04361 FLOATWRITE(file, get_a(i,j));
04362 SG_INFO( "wrote %i parameters for a\n",N*N) ;
04363
04364 for (i=0; i<N; i++)
04365 for (j=0; j<M; j++)
04366 FLOATWRITE(file, get_b(i,j));
04367 SG_INFO( "wrote %i parameters for b\n",N*M) ;
04368
04369
04370 FLOATWRITE(file, (float)CMath::INFTY);
04371 FLOATWRITE(file, (float) 3);
04372
04373
04374 FLOATWRITE(file, (float) N);
04375 FLOATWRITE(file, (float) N);
04376 FLOATWRITE(file, (float) N*N);
04377 FLOATWRITE(file, (float) N*M);
04378 FLOATWRITE(file, (float) N);
04379 FLOATWRITE(file, (float) M);
04380 } ;
04381 }
04382 else
04383 {
04384 if (file)
04385 {
04386 INT num_p, num_q, num_a, num_b ;
04387
04388 FLOATWRITE(file, (float)CMath::INFTY);
04389 FLOATWRITE(file, (float) 2);
04390
04391 for (i=0; model->get_learn_p(i)>=0; i++)
04392 FLOATWRITE(file, get_p(model->get_learn_p(i)));
04393 num_p=i ;
04394 SG_INFO( "wrote %i parameters for p\n",num_p) ;
04395
04396 for (i=0; model->get_learn_q(i)>=0; i++)
04397 FLOATWRITE(file, get_q(model->get_learn_q(i)));
04398 num_q=i ;
04399 SG_INFO( "wrote %i parameters for q\n",num_q) ;
04400
04401
04402 for (q=0; model->get_learn_a(q,1)>=0; q++)
04403 {
04404 i=model->get_learn_a(q,0) ;
04405 j=model->get_learn_a(q,1) ;
04406 FLOATWRITE(file, (float)i);
04407 FLOATWRITE(file, (float)j);
04408 FLOATWRITE(file, get_a(i,j));
04409 } ;
04410 num_a=q ;
04411 SG_INFO( "wrote %i parameters for a\n",num_a) ;
04412
04413 for (q=0; model->get_learn_b(q,0)>=0; q++)
04414 {
04415 i=model->get_learn_b(q,0) ;
04416 j=model->get_learn_b(q,1) ;
04417 FLOATWRITE(file, (float)i);
04418 FLOATWRITE(file, (float)j);
04419 FLOATWRITE(file, get_b(i,j));
04420 } ;
04421 num_b=q ;
04422 SG_INFO( "wrote %i parameters for b\n",num_b) ;
04423
04424
04425 FLOATWRITE(file, (float)CMath::INFTY);
04426 FLOATWRITE(file, (float) 3);
04427
04428
04429 FLOATWRITE(file, (float) num_p);
04430 FLOATWRITE(file, (float) num_q);
04431 FLOATWRITE(file, (float) num_a);
04432 FLOATWRITE(file, (float) num_b);
04433 FLOATWRITE(file, (float) N);
04434 FLOATWRITE(file, (float) M);
04435 } ;
04436 } ;
04437 return true ;
04438 }
04439
04440 #ifndef NOVIT
04441 bool CHMM::save_path_derivatives(FILE* logfile)
04442 {
04443 INT dim,i,j;
04444 DREAL prob;
04445
04446 if (logfile)
04447 {
04448 fprintf(logfile,"%% lambda denotes the model\n%% O denotes the observation sequence\n%% Q denotes the path\n%% \n%% calculating derivatives of P[O,Q|lambda]=p_{Q1}b_{Q1}(O_1}*a_{Q1}{Q2}b_{Q2}(O2)*...*q_{T-1}{T}b_{QT}(O_T}q_{Q_T} against p,q,a,b\n%%\n");
04449 fprintf(logfile,"%% dPr[...]=[ [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_NN]\n");
04450 fprintf(logfile,"%% [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_NN]\n");
04451 fprintf(logfile,"%% ............................. \n");
04452 fprintf(logfile,"%% [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_MM]\n");
04453 fprintf(logfile,"%% ];\n%%\n\ndPr(log()) = [\n");
04454 }
04455 else
04456 return false ;
04457
04458 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
04459 {
04460 prob=best_path(dim);
04461
04462 fprintf(logfile, "[ ");
04463
04464
04465 for (i=0; i<N; i++)
04466 fprintf(logfile,"%e, ", (double) path_derivative_p(i,dim) );
04467
04468 for (i=0; i<N; i++)
04469 fprintf(logfile,"%e, ", (double) path_derivative_q(i,dim) );
04470
04471
04472 for (i=0; i<N; i++)
04473 for (j=0; j<N; j++)
04474 fprintf(logfile, "%e,", (double) path_derivative_a(i,j,dim) );
04475
04476 for (i=0; i<N; i++)
04477 for (j=0; j<M; j++)
04478 fprintf(logfile, "%e,", (double) path_derivative_b(i,j,dim) );
04479
04480 fseek(logfile,ftell(logfile)-1,SEEK_SET);
04481 fprintf(logfile, " ];\n");
04482 }
04483
04484 fprintf(logfile, "];");
04485
04486 return true ;
04487 }
04488
04489 bool CHMM::save_path_derivatives_bin(FILE* logfile)
04490 {
04491 bool result=false;
04492 INT dim,i,j,q;
04493 DREAL prob=0 ;
04494 INT num_floats=0 ;
04495
04496 DREAL sum_prob=0.0 ;
04497 if (!model)
04498 SG_WARNING( "No definitions loaded -- writing derivatives of all weights\n") ;
04499 else
04500 SG_INFO( "writing derivatives of changed weights only\n") ;
04501
04502 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
04503 {
04504 if (dim%100==0)
04505 {
04506 SG_PRINT( ".") ;
04507
04508 } ;
04509
04510 prob=best_path(dim);
04511 sum_prob+=prob ;
04512
04513 if (!model)
04514 {
04515 if (logfile)
04516 {
04517
04518 FLOATWRITE(logfile, prob);
04519
04520 for (i=0; i<N; i++)
04521 FLOATWRITE(logfile, path_derivative_p(i,dim));
04522
04523 for (i=0; i<N; i++)
04524 FLOATWRITE(logfile, path_derivative_q(i,dim));
04525
04526 for (i=0; i<N; i++)
04527 for (j=0; j<N; j++)
04528 FLOATWRITE(logfile, path_derivative_a(i,j,dim));
04529
04530 for (i=0; i<N; i++)
04531 for (j=0; j<M; j++)
04532 FLOATWRITE(logfile, path_derivative_b(i,j,dim));
04533
04534 }
04535 }
04536 else
04537 {
04538 if (logfile)
04539 {
04540
04541 FLOATWRITE(logfile, prob);
04542
04543 for (i=0; model->get_learn_p(i)>=0; i++)
04544 FLOATWRITE(logfile, path_derivative_p(model->get_learn_p(i),dim));
04545
04546 for (i=0; model->get_learn_q(i)>=0; i++)
04547 FLOATWRITE(logfile, path_derivative_q(model->get_learn_q(i),dim));
04548
04549 for (q=0; model->get_learn_a(q,0)>=0; q++)
04550 {
04551 i=model->get_learn_a(q,0) ;
04552 j=model->get_learn_a(q,1) ;
04553 FLOATWRITE(logfile, path_derivative_a(i,j,dim));
04554 } ;
04555
04556 for (q=0; model->get_learn_b(q,0)>=0; q++)
04557 {
04558 i=model->get_learn_b(q,0) ;
04559 j=model->get_learn_b(q,1) ;
04560 FLOATWRITE(logfile, path_derivative_b(i,j,dim));
04561 } ;
04562 }
04563 } ;
04564 }
04565 save_model_bin(logfile) ;
04566
04567 result=true;
04568 SG_PRINT( "\n") ;
04569 return result;
04570 }
04571 #endif // NOVIT
04572
04573 bool CHMM::save_model_derivatives_bin(FILE* file)
04574 {
04575 bool result=false;
04576 INT dim,i,j,q ;
04577 INT num_floats=0 ;
04578
04579 if (!model)
04580 SG_WARNING( "No definitions loaded -- writing derivatives of all weights\n") ;
04581 else
04582 SG_INFO( "writing derivatives of changed weights only\n") ;
04583
04584 #ifdef USE_HMMPARALLEL
04585 pthread_t *threads=new pthread_t[parallel.get_num_threads()] ;
04586 S_THREAD_PARAM *params=new S_THREAD_PARAM[parallel.get_num_threads()] ;
04587 #endif
04588
04589 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
04590 {
04591 if (dim%20==0)
04592 {
04593 SG_PRINT( ".") ;
04594
04595 } ;
04596
04597 #ifdef USE_HMMPARALLEL
04598 if (dim%parallel.get_num_threads()==0)
04599 {
04600 INT i ;
04601 for (i=0; i<parallel.get_num_threads(); i++)
04602 if (dim+i<p_observations->get_num_vectors())
04603 {
04604 params[i].hmm=this ;
04605 params[i].dim=dim+i ;
04606 #ifdef SUNOS
04607 thr_create(NULL,0,bw_dim_prefetch, (void*)¶ms[i], PTHREAD_SCOPE_SYSTEM, &threads[i]) ;
04608 #else // SUNOS
04609 pthread_create(&threads[i], NULL, bw_dim_prefetch, (void*)¶ms[i]) ;
04610 #endif // SUNOS
04611 } ;
04612 for (i=0; i<parallel.get_num_threads(); i++)
04613 if (dim+i<p_observations->get_num_vectors())
04614 {
04615 void * ret ;
04616 pthread_join(threads[i], &ret) ;
04617 } ;
04618 } ;
04619 #endif
04620
04621 DREAL prob=model_probability(dim) ;
04622 if (!model)
04623 {
04624 if (file)
04625 {
04626
04627 FLOATWRITE(file, prob);
04628
04629
04630 for (i=0; i<N; i++)
04631 FLOATWRITE(file, model_derivative_p(i,dim));
04632
04633 for (i=0; i<N; i++)
04634 FLOATWRITE(file, model_derivative_q(i,dim));
04635
04636
04637 for (i=0; i<N; i++)
04638 for (j=0; j<N; j++)
04639 FLOATWRITE(file, model_derivative_a(i,j,dim));
04640
04641 for (i=0; i<N; i++)
04642 for (j=0; j<M; j++)
04643 FLOATWRITE(file, model_derivative_b(i,j,dim));
04644
04645 if (dim==0)
04646 SG_INFO("Number of parameters (including posterior prob.): %i\n", num_floats) ;
04647 } ;
04648 }
04649 else
04650 {
04651 if (file)
04652 {
04653
04654 FLOATWRITE(file, prob);
04655
04656 for (i=0; model->get_learn_p(i)>=0; i++)
04657 FLOATWRITE(file, model_derivative_p(model->get_learn_p(i),dim));
04658
04659 for (i=0; model->get_learn_q(i)>=0; i++)
04660 FLOATWRITE(file, model_derivative_q(model->get_learn_q(i),dim));
04661
04662
04663 for (q=0; model->get_learn_a(q,1)>=0; q++)
04664 {
04665 i=model->get_learn_a(q,0) ;
04666 j=model->get_learn_a(q,1) ;
04667 FLOATWRITE(file, model_derivative_a(i,j,dim));
04668 } ;
04669
04670 for (q=0; model->get_learn_b(q,0)>=0; q++)
04671 {
04672 i=model->get_learn_b(q,0) ;
04673 j=model->get_learn_b(q,1) ;
04674 FLOATWRITE(file, model_derivative_b(i,j,dim));
04675 } ;
04676 if (dim==0)
04677 SG_INFO("Number of parameters (including posterior prob.): %i\n", num_floats) ;
04678 } ;
04679 } ;
04680 }
04681 save_model_bin(file) ;
04682
04683 #ifdef USE_HMMPARALLEL
04684 delete[] threads ;
04685 delete[] params ;
04686 #endif
04687
04688 result=true;
04689 SG_PRINT( "\n") ;
04690 return result;
04691 }
04692
04693 bool CHMM::save_model_derivatives(FILE* file)
04694 {
04695 bool result=false;
04696 INT dim,i,j;
04697
04698 if (file)
04699 {
04700
04701 fprintf(file,"%% lambda denotes the model\n%% O denotes the observation sequence\n%% Q denotes the path\n%%\n%% calculating derivatives of P[O|lambda]=sum_{all Q}p_{Q1}b_{Q1}(O_1}*a_{Q1}{Q2}b_{Q2}(O2)*...*q_{T-1}{T}b_{QT}(O_T}q_{Q_T} against p,q,a,b\n%%\n");
04702 fprintf(file,"%% dPr[...]=[ [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_NN]\n");
04703 fprintf(file,"%% [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_NN]\n");
04704 fprintf(file,"%% ............................. \n");
04705 fprintf(file,"%% [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_MM]\n");
04706 fprintf(file,"%% ];\n%%\n\nlog(dPr) = [\n");
04707
04708
04709 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
04710 {
04711 fprintf(file, "[ ");
04712
04713
04714 for (i=0; i<N; i++)
04715 fprintf(file,"%e, ", (double) model_derivative_p(i, dim) );
04716
04717 for (i=0; i<N; i++)
04718 fprintf(file,"%e, ", (double) model_derivative_q(i, dim) );
04719
04720
04721 for (i=0; i<N; i++)
04722 for (j=0; j<N; j++)
04723 fprintf(file, "%e,", (double) model_derivative_a(i,j,dim) );
04724
04725 for (i=0; i<N; i++)
04726 for (j=0; j<M; j++)
04727 fprintf(file, "%e,", (double) model_derivative_b(i,j,dim) );
04728
04729 fseek(file,ftell(file)-1,SEEK_SET);
04730 fprintf(file, " ];\n");
04731 }
04732
04733
04734 fprintf(file, "];");
04735
04736 result=true;
04737 }
04738 return result;
04739 }
04740
04741 bool CHMM::check_model_derivatives_combined()
04742 {
04743
04744 const DREAL delta=5e-4 ;
04745
04746 INT i ;
04747
04748
04749
04750
04751
04752
04753
04754
04755
04756
04757
04758
04759
04760
04761
04762
04763
04764
04765
04766
04767
04768
04769
04770
04771
04772
04773
04774
04775
04776
04777
04778 i=0;
04779 {
04780 for (INT j=0; j<M; j++)
04781 {
04782 DREAL old_b=get_b(i,j) ;
04783
04784 set_b(i,j, log(exp(old_b)-delta)) ;
04785 invalidate_model() ;
04786 DREAL prob_old=(model_probability(-1)*p_observations->get_num_vectors()) ;
04787
04788 set_b(i,j, log(exp(old_b)+delta)) ;
04789 invalidate_model() ;
04790 DREAL prob_new=(model_probability(-1)*p_observations->get_num_vectors());
04791
04792 DREAL deriv = (prob_new-prob_old)/(2*delta) ;
04793
04794 set_b(i,j, old_b) ;
04795 invalidate_model() ;
04796
04797 DREAL deriv_calc=0 ;
04798 for (INT dim=0; dim<p_observations->get_num_vectors(); dim++)
04799 {
04800 deriv_calc+=exp(model_derivative_b(i, j, dim)-model_probability(dim)) ;
04801 if (j==1)
04802 SG_INFO("deriv_calc[%i]=%e\n",dim,deriv_calc) ;
04803 } ;
04804
04805 SG_ERROR( "b(%i,%i)=%e db(%i,%i) = %e:%e\t (%1.5f%%)\n", i,j,exp(old_b),i,j, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
04806 } ;
04807 } ;
04808 return true ;
04809 }
04810
04811 bool CHMM::check_model_derivatives()
04812 {
04813 bool result=false;
04814 const DREAL delta=3e-4 ;
04815
04816 for (INT dim=0; dim<p_observations->get_num_vectors(); dim++)
04817 {
04818 INT i ;
04819
04820 for (i=0; i<N; i++)
04821 {
04822 for (INT j=0; j<N; j++)
04823 {
04824 DREAL old_a=get_a(i,j) ;
04825
04826 set_a(i,j, log(exp(old_a)-delta)) ;
04827 invalidate_model() ;
04828 DREAL prob_old=exp(model_probability(dim)) ;
04829
04830 set_a(i,j, log(exp(old_a)+delta)) ;
04831 invalidate_model() ;
04832 DREAL prob_new=exp(model_probability(dim));
04833
04834 DREAL deriv = (prob_new-prob_old)/(2*delta) ;
04835
04836 set_a(i,j, old_a) ;
04837 invalidate_model() ;
04838 DREAL deriv_calc=exp(model_derivative_a(i, j, dim)) ;
04839
04840 SG_DEBUG( "da(%i,%i) = %e:%e\t (%1.5f%%)\n", i,j, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
04841 invalidate_model() ;
04842 } ;
04843 } ;
04844 for (i=0; i<N; i++)
04845 {
04846 for (INT j=0; j<M; j++)
04847 {
04848 DREAL old_b=get_b(i,j) ;
04849
04850 set_b(i,j, log(exp(old_b)-delta)) ;
04851 invalidate_model() ;
04852 DREAL prob_old=exp(model_probability(dim)) ;
04853
04854 set_b(i,j, log(exp(old_b)+delta)) ;
04855 invalidate_model() ;
04856 DREAL prob_new=exp(model_probability(dim));
04857
04858 DREAL deriv = (prob_new-prob_old)/(2*delta) ;
04859
04860 set_b(i,j, old_b) ;
04861 invalidate_model() ;
04862 DREAL deriv_calc=exp(model_derivative_b(i, j, dim));
04863
04864 SG_DEBUG( "db(%i,%i) = %e:%e\t (%1.5f%%)\n", i,j, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/(deriv_calc));
04865 } ;
04866 } ;
04867
04868 #ifdef TEST
04869 for (i=0; i<N; i++)
04870 {
04871 DREAL old_p=get_p(i) ;
04872
04873 set_p(i, log(exp(old_p)-delta)) ;
04874 invalidate_model() ;
04875 DREAL prob_old=exp(model_probability(dim)) ;
04876
04877 set_p(i, log(exp(old_p)+delta)) ;
04878 invalidate_model() ;
04879 DREAL prob_new=exp(model_probability(dim));
04880 DREAL deriv = (prob_new-prob_old)/(2*delta) ;
04881
04882 set_p(i, old_p) ;
04883 invalidate_model() ;
04884 DREAL deriv_calc=exp(model_derivative_p(i, dim));
04885
04886
04887 SG_DEBUG( "dp(%i) = %e:%e\t (%1.5f%%)\n", i, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
04888 } ;
04889 for (i=0; i<N; i++)
04890 {
04891 DREAL old_q=get_q(i) ;
04892
04893 set_q(i, log(exp(old_q)-delta)) ;
04894 invalidate_model() ;
04895 DREAL prob_old=exp(model_probability(dim)) ;
04896
04897 set_q(i, log(exp(old_q)+delta)) ;
04898 invalidate_model() ;
04899 DREAL prob_new=exp(model_probability(dim));
04900
04901 DREAL deriv = (prob_new-prob_old)/(2*delta) ;
04902
04903 set_q(i, old_q) ;
04904 invalidate_model() ;
04905 DREAL deriv_calc=exp(model_derivative_q(i, dim));
04906
04907
04908 SG_DEBUG( "dq(%i) = %e:%e\t (%1.5f%%)\n", i, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
04909 } ;
04910 #endif
04911 }
04912 return result;
04913 }
04914
04915 #ifdef USE_HMMDEBUG
04916 bool CHMM::check_path_derivatives()
04917 {
04918 bool result=false;
04919 const DREAL delta=1e-4 ;
04920
04921 for (INT dim=0; dim<p_observations->get_num_vectors(); dim++)
04922 {
04923 INT i ;
04924
04925 for (i=0; i<N; i++)
04926 {
04927 for (INT j=0; j<N; j++)
04928 {
04929 DREAL old_a=get_a(i,j) ;
04930
04931 set_a(i,j, log(exp(old_a)-delta)) ;
04932 invalidate_model() ;
04933 DREAL prob_old=best_path(dim) ;
04934
04935 set_a(i,j, log(exp(old_a)+delta)) ;
04936 invalidate_model() ;
04937 DREAL prob_new=best_path(dim);
04938
04939 DREAL deriv = (prob_new-prob_old)/(2*delta) ;
04940
04941 set_a(i,j, old_a) ;
04942 invalidate_model() ;
04943 DREAL deriv_calc=path_derivative_a(i, j, dim) ;
04944
04945 SG_DEBUG( "da(%i,%i) = %e:%e\t (%1.5f%%)\n", i,j, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
04946 } ;
04947 } ;
04948 for (i=0; i<N; i++)
04949 {
04950 for (INT j=0; j<M; j++)
04951 {
04952 DREAL old_b=get_b(i,j) ;
04953
04954 set_b(i,j, log(exp(old_b)-delta)) ;
04955 invalidate_model() ;
04956 DREAL prob_old=best_path(dim) ;
04957
04958 set_b(i,j, log(exp(old_b)+delta)) ;
04959 invalidate_model() ;
04960 DREAL prob_new=best_path(dim);
04961
04962 DREAL deriv = (prob_new-prob_old)/(2*delta) ;
04963
04964 set_b(i,j, old_b) ;
04965 invalidate_model() ;
04966 DREAL deriv_calc=path_derivative_b(i, j, dim);
04967
04968 SG_DEBUG( "db(%i,%i) = %e:%e\t (%1.5f%%)\n", i,j, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/(deriv_calc));
04969 } ;
04970 } ;
04971
04972 for (i=0; i<N; i++)
04973 {
04974 DREAL old_p=get_p(i) ;
04975
04976 set_p(i, log(exp(old_p)-delta)) ;
04977 invalidate_model() ;
04978 DREAL prob_old=best_path(dim) ;
04979
04980 set_p(i, log(exp(old_p)+delta)) ;
04981 invalidate_model() ;
04982 DREAL prob_new=best_path(dim);
04983 DREAL deriv = (prob_new-prob_old)/(2*delta) ;
04984
04985 set_p(i, old_p) ;
04986 invalidate_model() ;
04987 DREAL deriv_calc=path_derivative_p(i, dim);
04988
04989
04990 SG_DEBUG( "dp(%i) = %e:%e\t (%1.5f%%)\n", i, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
04991 } ;
04992 for (i=0; i<N; i++)
04993 {
04994 DREAL old_q=get_q(i) ;
04995
04996 set_q(i, log(exp(old_q)-delta)) ;
04997 invalidate_model() ;
04998 DREAL prob_old=best_path(dim) ;
04999
05000 set_q(i, log(exp(old_q)+delta)) ;
05001 invalidate_model() ;
05002 DREAL prob_new=best_path(dim);
05003
05004 DREAL deriv = (prob_new-prob_old)/(2*delta) ;
05005
05006 set_q(i, old_q) ;
05007 invalidate_model() ;
05008 DREAL deriv_calc=path_derivative_q(i, dim);
05009
05010
05011 SG_DEBUG( "dq(%i) = %e:%e\t (%1.5f%%)\n", i, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
05012 } ;
05013 }
05014 return result;
05015 }
05016 #endif // USE_HMMDEBUG
05017
05018
05019 void CHMM::normalize(bool keep_dead_states)
05020 {
05021 INT i,j;
05022 const DREAL INF=-1e10;
05023 DREAL sum_p =INF;
05024
05025 for (i=0; i<N; i++)
05026 {
05027 sum_p=CMath::logarithmic_sum(sum_p, get_p(i));
05028
05029 DREAL sum_b =INF;
05030 DREAL sum_a =get_q(i);
05031
05032 for (j=0; j<N; j++)
05033 sum_a=CMath::logarithmic_sum(sum_a, get_a(i,j));
05034
05035 if (sum_a>CMath::ALMOST_NEG_INFTY/N || (!keep_dead_states) )
05036 {
05037 for (j=0; j<N; j++)
05038 set_a(i,j, get_a(i,j)-sum_a);
05039 set_q(i, get_q(i)-sum_a);
05040 }
05041
05042 for (j=0; j<M; j++)
05043 sum_b=CMath::logarithmic_sum(sum_b, get_b(i,j));
05044 for (j=0; j<M; j++)
05045 set_b(i,j, get_b(i,j)-sum_b);
05046 }
05047
05048 for (i=0; i<N; i++)
05049 set_p(i, get_p(i)-sum_p);
05050
05051 invalidate_model();
05052 }
05053
05054 bool CHMM::append_model(CHMM* app_model)
05055 {
05056 bool result=false;
05057 const INT num_states=app_model->get_N();
05058 INT i,j;
05059
05060 SG_DEBUG( "cur N:%d M:%d\n", N, M);
05061 SG_DEBUG( "old N:%d M:%d\n", app_model->get_N(), app_model->get_M());
05062 if (app_model->get_M() == get_M())
05063 {
05064 DREAL* n_p=new DREAL[N+num_states];
05065 DREAL* n_q=new DREAL[N+num_states];
05066 DREAL* n_a=new DREAL[(N+num_states)*(N+num_states)];
05067
05068 DREAL* n_b=new DREAL[(N+num_states)*M];
05069
05070
05071 for (i=0; i<N+num_states; i++)
05072 {
05073 n_p[i]=-CMath::INFTY;
05074 n_q[i]=-CMath::INFTY;
05075
05076 for (j=0; j<N+num_states; j++)
05077 n_a[(N+num_states)*i+j]=-CMath::INFTY;
05078
05079 for (j=0; j<M; j++)
05080 n_b[M*i+j]=-CMath::INFTY;
05081 }
05082
05083
05084
05085
05086
05087
05088 for (i=0; i<N; i++)
05089 {
05090 n_p[i]=get_p(i);
05091
05092 for (j=0; j<N; j++)
05093 n_a[(N+num_states)*j+i]=get_a(i,j);
05094
05095 for (j=0; j<M; j++)
05096 {
05097 n_b[M*i+j]=get_b(i,j);
05098 }
05099 }
05100
05101
05102 for (i=0; i<app_model->get_N(); i++)
05103 {
05104 n_q[i+N]=app_model->get_q(i);
05105
05106 for (j=0; j<app_model->get_N(); j++)
05107 n_a[(N+num_states)*(j+N)+(i+N)]=app_model->get_a(i,j);
05108 for (j=0; j<app_model->get_M(); j++)
05109 n_b[M*(i+N)+j]=app_model->get_b(i,j);
05110 }
05111
05112
05113
05114 for (i=0; i<N; i++)
05115 {
05116 for (j=N; j<N+num_states; j++)
05117 n_a[(N+num_states)*j + i]=CMath::logarithmic_sum(get_q(i)+app_model->get_p(j-N), n_a[(N+num_states)*j + i]);
05118 }
05119
05120 free_state_dependend_arrays();
05121 N+=num_states;
05122
05123 alloc_state_dependend_arrays();
05124
05125
05126 delete[] initial_state_distribution_p;
05127 delete[] end_state_distribution_q;
05128 delete[] transition_matrix_a;
05129 delete[] observation_matrix_b;
05130
05131 transition_matrix_a=n_a;
05132 observation_matrix_b=n_b;
05133 initial_state_distribution_p=n_p;
05134 end_state_distribution_q=n_q;
05135
05136 SG_WARNING( "not normalizing anymore, call normalize_hmm to make sure the hmm is valid!!\n");
05138 invalidate_model();
05139 }
05140 else
05141 SG_ERROR( "number of observations is different for append model, doing nothing!\n");
05142
05143 return result;
05144 }
05145
05146 bool CHMM::append_model(CHMM* app_model, DREAL* cur_out, DREAL* app_out)
05147 {
05148 bool result=false;
05149 const INT num_states=app_model->get_N()+2;
05150 INT i,j;
05151
05152 if (app_model->get_M() == get_M())
05153 {
05154 DREAL* n_p=new DREAL[N+num_states];
05155 DREAL* n_q=new DREAL[N+num_states];
05156 DREAL* n_a=new DREAL[(N+num_states)*(N+num_states)];
05157
05158 DREAL* n_b=new DREAL[(N+num_states)*M];
05159
05160
05161 for (i=0; i<N+num_states; i++)
05162 {
05163 n_p[i]=-CMath::INFTY;
05164 n_q[i]=-CMath::INFTY;
05165
05166 for (j=0; j<N+num_states; j++)
05167 n_a[(N+num_states)*j+i]=-CMath::INFTY;
05168
05169 for (j=0; j<M; j++)
05170 n_b[M*i+j]=-CMath::INFTY;
05171 }
05172
05173
05174
05175
05176
05177
05178 for (i=0; i<N; i++)
05179 {
05180 n_p[i]=get_p(i);
05181
05182 for (j=0; j<N; j++)
05183 n_a[(N+num_states)*j+i]=get_a(i,j);
05184
05185 for (j=0; j<M; j++)
05186 {
05187 n_b[M*i+j]=get_b(i,j);
05188 }
05189 }
05190
05191
05192 for (i=0; i<app_model->get_N(); i++)
05193 {
05194 n_q[i+N+2]=app_model->get_q(i);
05195
05196 for (j=0; j<app_model->get_N(); j++)
05197 n_a[(N+num_states)*(j+N+2)+(i+N+2)]=app_model->get_a(i,j);
05198 for (j=0; j<app_model->get_M(); j++)
05199 n_b[M*(i+N+2)+j]=app_model->get_b(i,j);
05200 }
05201
05202
05203
05204
05205 for (i=0; i<M; i++)
05206 {
05207 n_b[M*N+i]=cur_out[i];
05208 n_b[M*(N+1)+i]=app_out[i];
05209 }
05210
05211
05212 for (i=0; i<N+num_states; i++)
05213 {
05214
05215 if (i==N+1)
05216 n_a[(N+num_states)*i + N]=0;
05217
05218
05219
05220 if (i<N)
05221 n_a[(N+num_states)*N+i]=get_q(i);
05222
05223
05224
05225 if (i>=N+2)
05226 n_a[(N+num_states)*i+(N+1)]=app_model->get_p(i-(N+2));
05227 }
05228
05229 free_state_dependend_arrays();
05230 N+=num_states;
05231
05232 alloc_state_dependend_arrays();
05233
05234
05235 delete[] initial_state_distribution_p;
05236 delete[] end_state_distribution_q;
05237 delete[] transition_matrix_a;
05238 delete[] observation_matrix_b;
05239
05240 transition_matrix_a=n_a;
05241 observation_matrix_b=n_b;
05242 initial_state_distribution_p=n_p;
05243 end_state_distribution_q=n_q;
05244
05245 SG_WARNING( "not normalizing anymore, call normalize_hmm to make sure the hmm is valid!!\n");
05247 invalidate_model();
05248 }
05249
05250 return result;
05251 }
05252
05253
05254 void CHMM::add_states(INT num_states, DREAL default_value)
05255 {
05256 INT i,j;
05257 const DREAL MIN_RAND=1e-2;
05258 const DREAL MAX_RAND=2e-1;
05259
05260 DREAL* n_p=new DREAL[N+num_states];
05261 DREAL* n_q=new DREAL[N+num_states];
05262 DREAL* n_a=new DREAL[(N+num_states)*(N+num_states)];
05263
05264 DREAL* n_b=new DREAL[(N+num_states)*M];
05265
05266
05267
05268 for (i=0; i<N; i++)
05269 {
05270 n_p[i]=get_p(i);
05271 n_q[i]=get_q(i);
05272
05273 for (j=0; j<N; j++)
05274 n_a[(N+num_states)*j+i]=get_a(i,j);
05275
05276 for (j=0; j<M; j++)
05277 n_b[M*i+j]=get_b(i,j);
05278 }
05279
05280 for (i=N; i<N+num_states; i++)
05281 {
05282 n_p[i]=VAL_MACRO;
05283 n_q[i]=VAL_MACRO;
05284
05285 for (j=0; j<N; j++)
05286 n_a[(N+num_states)*i+j]=VAL_MACRO;
05287
05288 for (j=0; j<N+num_states; j++)
05289 n_a[(N+num_states)*j+i]=VAL_MACRO;
05290
05291 for (j=0; j<M; j++)
05292 n_b[M*i+j]=VAL_MACRO;
05293 }
05294 free_state_dependend_arrays();
05295 N+=num_states;
05296
05297 alloc_state_dependend_arrays();
05298
05299
05300 delete[] initial_state_distribution_p;
05301 delete[] end_state_distribution_q;
05302 delete[] transition_matrix_a;
05303 delete[] observation_matrix_b;
05304
05305 transition_matrix_a=n_a;
05306 observation_matrix_b=n_b;
05307 initial_state_distribution_p=n_p;
05308 end_state_distribution_q=n_q;
05309
05310 invalidate_model();
05311 normalize();
05312 }
05313
05314 void CHMM::chop(DREAL value)
05315 {
05316 for (INT i=0; i<N; i++)
05317 {
05318 INT j;
05319
05320 if (exp(get_p(i)) < value)
05321 set_p(i, CMath::ALMOST_NEG_INFTY);
05322
05323 if (exp(get_q(i)) < value)
05324 set_q(i, CMath::ALMOST_NEG_INFTY);
05325
05326 for (j=0; j<N; j++)
05327 {
05328 if (exp(get_a(i,j)) < value)
05329 set_a(i,j, CMath::ALMOST_NEG_INFTY);
05330 }
05331
05332 for (j=0; j<M; j++)
05333 {
05334 if (exp(get_b(i,j)) < value)
05335 set_b(i,j, CMath::ALMOST_NEG_INFTY);
05336 }
05337 }
05338 normalize();
05339 invalidate_model();
05340 }
05341
05342 bool CHMM::linear_train(bool right_align)
05343 {
05344 if (p_observations)
05345 {
05346 INT histsize=(get_M()*get_N());
05347 INT* hist=new INT[histsize];
05348 INT* startendhist=new INT[get_N()];
05349 INT i,dim;
05350
05351 ASSERT(p_observations->get_max_vector_length()<=get_N());
05352
05353 for (i=0; i<histsize; i++)
05354 hist[i]=0;
05355
05356 for (i=0; i<get_N(); i++)
05357 startendhist[i]=0;
05358
05359 if (right_align)
05360 {
05361 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
05362 {
05363 INT len=0;
05364 WORD* obs=p_observations->get_feature_vector(dim, len);
05365
05366 ASSERT(len<=get_N());
05367 startendhist[(get_N()-len)]++;
05368
05369 for (i=0;i<len;i++)
05370 hist[(get_N()-len+i)*get_M() + *obs++]++;
05371 }
05372
05373 set_q(get_N()-1, 1);
05374 for (i=0; i<get_N()-1; i++)
05375 set_q(i, 0);
05376
05377 for (i=0; i<get_N(); i++)
05378 set_p(i, startendhist[i]+PSEUDO);
05379
05380 for (i=0;i<get_N();i++)
05381 {
05382 for (INT j=0; j<get_N(); j++)
05383 {
05384 if (i==j-1)
05385 set_a(i,j, 1);
05386 else
05387 set_a(i,j, 0);
05388 }
05389 }
05390 }
05391 else
05392 {
05393 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
05394 {
05395 INT len=0;
05396 WORD* obs=p_observations->get_feature_vector(dim, len);
05397
05398 ASSERT(len<=get_N());
05399 for (i=0;i<len;i++)
05400 hist[i*get_M() + *obs++]++;
05401
05402 startendhist[len-1]++;
05403 }
05404
05405 set_p(0, 1);
05406 for (i=1; i<get_N(); i++)
05407 set_p(i, 0);
05408
05409 for (i=0; i<get_N(); i++)
05410 set_q(i, startendhist[i]+PSEUDO);
05411
05412 INT total=p_observations->get_num_vectors();
05413
05414 for (i=0;i<get_N();i++)
05415 {
05416 total-= startendhist[i] ;
05417
05418 for (INT j=0; j<get_N(); j++)
05419 {
05420 if (i==j-1)
05421 set_a(i,j, total+PSEUDO);
05422 else
05423 set_a(i,j, 0);
05424 }
05425 }
05426 ASSERT(total==0);
05427 }
05428
05429 for (i=0;i<get_N();i++)
05430 {
05431 for (INT j=0; j<get_M(); j++)
05432 {
05433 DREAL sum=0;
05434 INT offs=i*get_M()+ p_observations->get_masked_symbols((WORD) j, (BYTE) 254);
05435
05436 for (INT k=0; k<p_observations->get_original_num_symbols(); k++)
05437 sum+=hist[offs+k];
05438
05439 set_b(i,j, (PSEUDO+hist[i*get_M()+j])/(sum+PSEUDO*p_observations->get_original_num_symbols()));
05440 }
05441 }
05442
05443 delete[] hist;
05444 delete[] startendhist;
05445 convert_to_log();
05446 invalidate_model();
05447 return true;
05448 }
05449 else
05450 return false;
05451 }
05452
05453 void CHMM::set_observation_nocache(CStringFeatures<WORD>* obs)
05454 {
05455 ASSERT(obs);
05456 p_observations=obs;
05457 SG_REF(obs);
05458
05459 if (obs)
05460 if (obs->get_num_symbols() > M)
05461 SG_ERROR( "number of symbols in observation (%d) larger than M (%d)\n", obs->get_num_symbols(), M);
05462
05463 if (!reused_caches)
05464 {
05465 #ifdef USE_HMMPARALLEL_STRUCTURES
05466 for (INT i=0; i<parallel.get_num_threads(); i++)
05467 {
05468 delete[] alpha_cache[i].table;
05469 delete[] beta_cache[i].table;
05470 delete[] states_per_observation_psi[i];
05471 delete[] path[i];
05472
05473 alpha_cache[i].table=NULL;
05474 beta_cache[i].table=NULL;
05475 #ifndef NOVIT
05476 states_per_observation_psi[i]=NULL;
05477 #endif // NOVIT
05478 path[i]=NULL;
05479 } ;
05480 #else
05481 delete[] alpha_cache.table;
05482 delete[] beta_cache.table;
05483 delete[] states_per_observation_psi;
05484 delete[] path;
05485
05486 alpha_cache.table=NULL;
05487 beta_cache.table=NULL;
05488 states_per_observation_psi=NULL;
05489 path=NULL;
05490
05491 #endif //USE_HMMPARALLEL_STRUCTURES
05492 }
05493
05494 invalidate_model();
05495 }
05496
05497 void CHMM::set_observations(CStringFeatures<WORD>* obs, CHMM* lambda)
05498 {
05499 ASSERT(obs);
05500 p_observations=obs;
05501 SG_REF(obs);
05502
05503
05504
05505 features=obs;
05506
05507 SG_DEBUG("num symbols alphabet: %ld\n", obs->get_alphabet()->get_num_symbols());
05508 SG_DEBUG("num symbols: %ld\n", obs->get_num_symbols());
05509 SG_DEBUG("M: %d\n", M);
05510
05511 if (obs)
05512 {
05513 if (obs->get_num_symbols() > M)
05514 {
05515 SG_ERROR( "number of symbols in observation (%d) larger than M (%d)\n", obs->get_num_symbols(), M);
05516 }
05517 }
05518
05519 if (!reused_caches)
05520 {
05521 #ifdef USE_HMMPARALLEL_STRUCTURES
05522 for (INT i=0; i<parallel.get_num_threads(); i++)
05523 {
05524 delete[] alpha_cache[i].table;
05525 delete[] beta_cache[i].table;
05526 #ifndef NOVIT
05527 delete[] states_per_observation_psi[i];
05528 #endif // NOVIT
05529 delete[] path[i];
05530
05531 alpha_cache[i].table=NULL;
05532 beta_cache[i].table=NULL;
05533 #ifndef NOVIT
05534 states_per_observation_psi[i]=NULL;
05535 #endif // NOVIT
05536 path[i]=NULL;
05537 } ;
05538 #else
05539 delete[] alpha_cache.table;
05540 delete[] beta_cache.table;
05541 delete[] states_per_observation_psi;
05542 delete[] path;
05543
05544 alpha_cache.table=NULL;
05545 beta_cache.table=NULL;
05546 states_per_observation_psi=NULL;
05547 path=NULL;
05548
05549 #endif //USE_HMMPARALLEL_STRUCTURES
05550 }
05551
05552 if (obs!=NULL)
05553 {
05554 INT max_T=obs->get_max_vector_length();
05555
05556 if (lambda)
05557 {
05558 #ifdef USE_HMMPARALLEL_STRUCTURES
05559 for (INT i=0; i<parallel.get_num_threads(); i++)
05560 {
05561 this->alpha_cache[i].table= lambda->alpha_cache[i].table;
05562 this->beta_cache[i].table= lambda->beta_cache[i].table;
05563 this->states_per_observation_psi[i]=lambda->states_per_observation_psi[i] ;
05564 this->path[i]=lambda->path[i];
05565 } ;
05566 #else
05567 this->alpha_cache.table= lambda->alpha_cache.table;
05568 this->beta_cache.table= lambda->beta_cache.table;
05569 this->states_per_observation_psi= lambda->states_per_observation_psi;
05570 this->path=lambda->path;
05571 #endif //USE_HMMPARALLEL_STRUCTURES
05572
05573 this->reused_caches=true;
05574 }
05575 else
05576 {
05577 this->reused_caches=false;
05578 #ifdef USE_HMMPARALLEL_STRUCTURES
05579 SG_INFO( "allocating mem for path-table of size %.2f Megabytes (%d*%d) each:\n", ((float)max_T)*N*sizeof(T_STATES)/(1024*1024), max_T, N);
05580 for (INT i=0; i<parallel.get_num_threads(); i++)
05581 {
05582 if ((states_per_observation_psi[i]=new T_STATES[max_T*N])!=NULL)
05583 SG_DEBUG( "path_table[%i] successfully allocated\n",i) ;
05584 else
05585 SG_ERROR( "failed allocating memory for path_table[%i].\n",i) ;
05586 path[i]=new T_STATES[max_T];
05587 }
05588 #else // no USE_HMMPARALLEL_STRUCTURES
05589 SG_INFO( "allocating mem of size %.2f Megabytes (%d*%d) for path-table ....", ((float)max_T)*N*sizeof(T_STATES)/(1024*1024), max_T, N);
05590 if ((states_per_observation_psi=new T_STATES[max_T*N]) != NULL)
05591 SG_DONE();
05592 else
05593 SG_ERROR( "failed.\n") ;
05594
05595 path=new T_STATES[max_T];
05596 #endif // USE_HMMPARALLEL_STRUCTURES
05597 #ifdef USE_HMMCACHE
05598 SG_INFO( "allocating mem for caches each of size %.2f Megabytes (%d*%d) ....\n", ((float)max_T)*N*sizeof(T_ALPHA_BETA_TABLE)/(1024*1024), max_T, N);
05599
05600 #ifdef USE_HMMPARALLEL_STRUCTURES
05601 for (INT i=0; i<parallel.get_num_threads(); i++)
05602 {
05603 if ((alpha_cache[i].table=new T_ALPHA_BETA_TABLE[max_T*N])!=NULL)
05604 SG_DEBUG( "alpha_cache[%i].table successfully allocated\n",i) ;
05605 else
05606 SG_ERROR("allocation of alpha_cache[%i].table failed\n",i) ;
05607
05608 if ((beta_cache[i].table=new T_ALPHA_BETA_TABLE[max_T*N]) != NULL)
05609 SG_DEBUG("beta_cache[%i].table successfully allocated\n",i) ;
05610 else
05611 SG_ERROR("allocation of beta_cache[%i].table failed\n",i) ;
05612 } ;
05613 #else // USE_HMMPARALLEL_STRUCTURES
05614 if ((alpha_cache.table=new T_ALPHA_BETA_TABLE[max_T*N]) != NULL)
05615 SG_DEBUG( "alpha_cache.table successfully allocated\n") ;
05616 else
05617 SG_ERROR( "allocation of alpha_cache.table failed\n") ;
05618
05619 if ((beta_cache.table=new T_ALPHA_BETA_TABLE[max_T*N]) != NULL)
05620 SG_DEBUG( "beta_cache.table successfully allocated\n") ;
05621 else
05622 SG_ERROR( "allocation of beta_cache.table failed\n") ;
05623
05624 #endif // USE_HMMPARALLEL_STRUCTURES
05625 #else // USE_HMMCACHE
05626 #ifdef USE_HMMPARALLEL_STRUCTURES
05627 for (INT i=0; i<parallel.get_num_threads(); i++)
05628 {
05629 alpha_cache[i].table=NULL ;
05630 beta_cache[i].table=NULL ;
05631 } ;
05632 #else //USE_HMMPARALLEL_STRUCTURES
05633 alpha_cache.table=NULL ;
05634 beta_cache.table=NULL ;
05635 #endif //USE_HMMPARALLEL_STRUCTURES
05636 #endif //USE_HMMCACHE
05637 }
05638 }
05639
05640
05641 invalidate_model();
05642 }
05643
05644 bool CHMM::permutation_entropy(INT window_width, INT sequence_number)
05645 {
05646 if (p_observations && window_width>0 &&
05647 ( sequence_number<0 || sequence_number < p_observations->get_num_vectors()))
05648 {
05649 INT min_sequence=sequence_number;
05650 INT max_sequence=sequence_number;
05651
05652 if (sequence_number<0)
05653 {
05654 min_sequence=0;
05655 max_sequence=p_observations->get_num_vectors();
05656 SG_INFO( "numseq: %d\n", max_sequence);
05657 }
05658
05659 SG_INFO( "min_sequence: %d max_sequence: %d\n", min_sequence, max_sequence);
05660 for (sequence_number=min_sequence; sequence_number<max_sequence; sequence_number++)
05661 {
05662 INT sequence_length=0;
05663 WORD* obs=p_observations->get_feature_vector(sequence_number, sequence_length);
05664
05665 INT histsize=get_M();
05666 LONG* hist=new LONG[histsize];
05667 INT i,j;
05668
05669 for (i=0; i<sequence_length-window_width; i++)
05670 {
05671 for (j=0; j<histsize; j++)
05672 hist[j]=0;
05673
05674 WORD* ptr=&obs[i];
05675 for (j=0; j<window_width; j++)
05676 {
05677 hist[*ptr++]++;
05678 }
05679
05680 double perm_entropy=0;
05681
05682 for (j=0; j<get_M(); j++)
05683 {
05684 double p=(((DREAL)hist[j])+PSEUDO)/(window_width+get_M()*PSEUDO);
05685 perm_entropy+=p*log(p);
05686 }
05687
05688 SG_PRINT( "%f\n", perm_entropy);
05689 }
05690
05691 delete[] hist;
05692 }
05693 return true;
05694 }
05695 else
05696 return false;
05697 }
05698
05699 DREAL CHMM::get_log_derivative(INT num_param, INT num_example)
05700 {
05701 if (num_param<N)
05702 return model_derivative_p(num_param, num_example);
05703 else if (num_param<2*N)
05704 return model_derivative_q(num_param-N, num_example);
05705 else if (num_param<N*(N+2))
05706 {
05707 INT k=num_param-2*N;
05708 INT i=k/N;
05709 INT j=k%N;
05710 return model_derivative_a(i,j, num_example);
05711 }
05712 else if (num_param<N*(N+2+M))
05713 {
05714 INT k=num_param-N*(N+2);
05715 INT i=k/M;
05716 INT j=k%M;
05717 return model_derivative_b(i,j, num_example);
05718 }
05719
05720 ASSERT(false);
05721 return -1;
05722 }
05723
05724 DREAL CHMM::get_log_model_parameter(INT num_param)
05725 {
05726 if (num_param<N)
05727 return get_p(num_param);
05728 else if (num_param<2*N)
05729 return get_q(num_param-N);
05730 else if (num_param<N*(N+2))
05731 return transition_matrix_a[num_param-2*N];
05732 else if (num_param<N*(N+2+M))
05733 return observation_matrix_b[num_param-N*(N+2)];
05734
05735 ASSERT(false);
05736 return -1;
05737 }
05738
05739
05740
05741 bool CHMM::converge(DREAL x, DREAL y)
05742 {
05743 DREAL diff=y-x;
05744 DREAL absdiff=fabs(diff);
05745
05746 SG_INFO( "\n #%03d\tbest result so far: %G (eps: %f)", iteration_count, y, diff);
05747
05748 if (iteration_count--==0 || (absdiff<epsilon && conv_it<=0))
05749 {
05750 iteration_count=iterations;
05751 SG_INFO( "...finished\n");
05752 conv_it=5;
05753 return true;
05754 }
05755 else
05756 {
05757 if (absdiff<epsilon)
05758 conv_it--;
05759 else
05760 conv_it=5;
05761
05762 return false;
05763 }
05764 }
05765
05766
05767 void CHMM::switch_model(CHMM** m1, CHMM** m2)
05768 {
05769 CHMM* dummy=*m1;
05770
05771 *m1=*m2;
05772 *m2=dummy;
05773 }
05774
05775 bool CHMM::baum_welch_viterbi_train(BaumWelchViterbiType type)
05776 {
05777 CHMM* estimate=new CHMM(this);
05778 CHMM* working=this;
05779 DREAL prob_max=-CMath::INFTY;
05780 DREAL prob=-CMath::INFTY;
05781 DREAL prob_train=CMath::ALMOST_NEG_INFTY;
05782 iteration_count=iterations;
05783
05784 while (!converge(prob, prob_train))
05785 {
05786 switch_model(&working, &estimate);
05787 prob=prob_train;
05788
05789
05790
05791
05792 switch (type) {
05793 case BW_NORMAL:
05794 estimate_model_baum_welch(estimate); break;
05795 case BW_TRANS:
05796 estimate_model_baum_welch_trans(estimate); break;
05797 case BW_DEFINED:
05798 estimate_model_baum_welch_defined(estimate); break;
05799 case VIT_NORMAL:
05800 estimate_model_viterbi(estimate); break;
05801 case VIT_DEFINED:
05802 estimate_model_viterbi_defined(estimate); break;
05803 }
05804 prob_train=estimate->model_probability();
05805 if (prob_max<prob_train)
05806 prob_max=prob_train;
05807 }
05808
05809 delete estimate;
05810 estimate=NULL;
05811
05812 return true;
05813 }
05814