HMM.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2008 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Copyright (C) 1999-2008 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 // HMM.cpp: implementation of the CHMM class.
00013 // $Id: HMM.cpp 3458 2008-11-26 04:28:41Z sonne $
00015 
00016 #include "distributions/hmm/HMM.h"
00017 #include "lib/Mathematics.h"
00018 #include "lib/io.h"
00019 #include "lib/config.h"
00020 #include "base/Parallel.h"
00021 #include "features/StringFeatures.h"
00022 #include "features/CharFeatures.h"
00023 #include "features/Alphabet.h"
00024 
00025 #include <stdlib.h>
00026 #include <stdio.h>
00027 #include <time.h>
00028 #include <ctype.h>
00029 
00030 #define VAL_MACRO log((default_value == 0) ? (CMath::random(MIN_RAND, MAX_RAND)) : default_value)
00031 #define ARRAY_SIZE 65336
00032 
00033 #ifdef SUNOS
00034 extern "C" int  finite(double);
00035 #endif
00036 
00038 // Construction/Destruction
00040 
00041 const int32_t CHMM::GOTN= (1<<1);
00042 const int32_t CHMM::GOTM= (1<<2);
00043 const int32_t CHMM::GOTO= (1<<3);
00044 const int32_t CHMM::GOTa= (1<<4);
00045 const int32_t CHMM::GOTb= (1<<5);
00046 const int32_t CHMM::GOTp= (1<<6);
00047 const int32_t CHMM::GOTq= (1<<7);
00048 
00049 const int32_t CHMM::GOTlearn_a= (1<<1);
00050 const int32_t CHMM::GOTlearn_b= (1<<2);
00051 const int32_t CHMM::GOTlearn_p= (1<<3);
00052 const int32_t CHMM::GOTlearn_q= (1<<4);
00053 const int32_t CHMM::GOTconst_a= (1<<5);
00054 const int32_t CHMM::GOTconst_b= (1<<6);
00055 const int32_t CHMM::GOTconst_p= (1<<7);
00056 const int32_t CHMM::GOTconst_q= (1<<8);
00057 
00058 enum E_STATE
00059 {
00060     INITIAL,
00061     ARRAYs,
00062     GET_N,
00063     GET_M,
00064     GET_a,
00065     GET_b,
00066     GET_p,
00067     GET_q,
00068     GET_learn_a,
00069     GET_learn_b,
00070     GET_learn_p,
00071     GET_learn_q,
00072     GET_const_a,
00073     GET_const_b,
00074     GET_const_p,
00075     GET_const_q,
00076     COMMENT,
00077     END
00078 };
00079 
00080 
00081 #ifdef FIX_POS
00082 const char CModel::FIX_DISALLOWED=0 ;
00083 const char CModel::FIX_ALLOWED=1 ;
00084 const char CModel::FIX_DEFAULT=-1 ;
00085 const float64_t CModel::DISALLOWED_PENALTY=CMath::ALMOST_NEG_INFTY ;
00086 #endif
00087 
00088 CModel::CModel()
00089 {
00090     const_a=new int[ARRAY_SIZE];                
00091     const_b=new int[ARRAY_SIZE];
00092     const_p=new int[ARRAY_SIZE];
00093     const_q=new int[ARRAY_SIZE];
00094     const_a_val=new float64_t[ARRAY_SIZE];          
00095     const_b_val=new float64_t[ARRAY_SIZE];
00096     const_p_val=new float64_t[ARRAY_SIZE];
00097     const_q_val=new float64_t[ARRAY_SIZE];
00098 
00099 
00100     learn_a=new int[ARRAY_SIZE];
00101     learn_b=new int[ARRAY_SIZE];
00102     learn_p=new int[ARRAY_SIZE];
00103     learn_q=new int[ARRAY_SIZE];
00104 
00105 #ifdef FIX_POS
00106     fix_pos_state = new char[ARRAY_SIZE];
00107 #endif
00108     for (int32_t i=0; i<ARRAY_SIZE; i++)
00109     {
00110         const_a[i]=-1 ;
00111         const_b[i]=-1 ;
00112         const_p[i]=-1 ;
00113         const_q[i]=-1 ;
00114         const_a_val[i]=1.0 ;
00115         const_b_val[i]=1.0 ;
00116         const_p_val[i]=1.0 ;
00117         const_q_val[i]=1.0 ;
00118         learn_a[i]=-1 ;
00119         learn_b[i]=-1 ;
00120         learn_p[i]=-1 ;
00121         learn_q[i]=-1 ;
00122 #ifdef FIX_POS
00123         fix_pos_state[i] = FIX_DEFAULT ;
00124 #endif
00125     } ;
00126 }
00127 
00128 CModel::~CModel()
00129 {
00130     delete[] const_a;
00131     delete[] const_b;
00132     delete[] const_p;
00133     delete[] const_q;
00134     delete[] const_a_val;
00135     delete[] const_b_val;
00136     delete[] const_p_val;
00137     delete[] const_q_val;
00138 
00139     delete[] learn_a;
00140     delete[] learn_b;
00141     delete[] learn_p;
00142     delete[] learn_q;
00143 
00144 #ifdef FIX_POS
00145     delete[] fix_pos_state;
00146 #endif
00147 
00148 }
00149 
00150 CHMM::CHMM(CHMM* h)
00151 : CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
00152 {
00153     SG_INFO( "hmm is using %i separate tables\n",  parallel.get_num_threads()) ;
00154 
00155     this->N=h->get_N();
00156     this->M=h->get_M();
00157     status=initialize(NULL, h->get_pseudo());
00158     this->copy_model(h);
00159     set_observations(h->get_observations());
00160 }
00161 
00162 CHMM::CHMM(int32_t p_N, int32_t p_M, CModel* p_model, float64_t p_PSEUDO)
00163 : CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
00164 {
00165     this->N=p_N;
00166     this->M=p_M;
00167     model=NULL ;
00168 
00169     SG_INFO( "hmm is using %i separate tables\n",  parallel.get_num_threads()) ;
00170 
00171     status=initialize(p_model, p_PSEUDO);
00172 }
00173 
00174 CHMM::CHMM(
00175     CStringFeatures<uint16_t>* obs, int32_t p_N, int32_t p_M,
00176     float64_t p_PSEUDO)
00177 : CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
00178 {
00179     this->N=p_N;
00180     this->M=p_M;
00181     model=NULL ;
00182 
00183     SG_INFO( "hmm is using %i separate tables\n",  parallel.get_num_threads()) ;
00184 
00185     initialize(model, p_PSEUDO);
00186     set_observations(obs);
00187 }
00188 
00189 CHMM::CHMM(int32_t p_N, float64_t* p, float64_t* q, float64_t* a)
00190 : CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
00191 {
00192     this->N=p_N;
00193     this->M=0;
00194     model=NULL ;
00195     
00196     trans_list_forward = NULL ;
00197     trans_list_forward_cnt = NULL ;
00198     trans_list_forward_val = NULL ;
00199     trans_list_backward = NULL ;
00200     trans_list_backward_cnt = NULL ;
00201     trans_list_len = 0 ;
00202     mem_initialized = false ;
00203 
00204     this->transition_matrix_a=NULL;
00205     this->observation_matrix_b=NULL;
00206     this->initial_state_distribution_p=NULL;
00207     this->end_state_distribution_q=NULL;
00208     this->PSEUDO= PSEUDO;
00209     this->model= model;
00210     this->p_observations=NULL;
00211     this->reused_caches=false;
00212 
00213     this->alpha_cache.table=NULL;
00214     this->beta_cache.table=NULL;
00215     this->alpha_cache.dimension=0;
00216     this->beta_cache.dimension=0;
00217 #ifndef NOVIT
00218     this->states_per_observation_psi=NULL ;
00219     this->path=NULL;
00220 #endif //NOVIT
00221     arrayN1=NULL ;
00222     arrayN2=NULL ;
00223 
00224     this->loglikelihood=false;
00225     mem_initialized = true ;
00226 
00227     transition_matrix_a=a ;
00228     observation_matrix_b=NULL ;
00229     initial_state_distribution_p=p ;
00230     end_state_distribution_q=q ;
00231     transition_matrix_A=NULL ;
00232     observation_matrix_B=NULL ;
00233     
00234 //  this->invalidate_model();
00235 }
00236 
00237 CHMM::CHMM(
00238     int32_t p_N, float64_t* p, float64_t* q, int32_t num_trans,
00239     float64_t* a_trans)
00240 : CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
00241 {
00242     model=NULL ;
00243     
00244     this->N=p_N;
00245     this->M=0;
00246     
00247     trans_list_forward = NULL ;
00248     trans_list_forward_cnt = NULL ;
00249     trans_list_forward_val = NULL ;
00250     trans_list_backward = NULL ;
00251     trans_list_backward_cnt = NULL ;
00252     trans_list_len = 0 ;
00253     mem_initialized = false ;
00254 
00255     this->transition_matrix_a=NULL;
00256     this->observation_matrix_b=NULL;
00257     this->initial_state_distribution_p=NULL;
00258     this->end_state_distribution_q=NULL;
00259     this->PSEUDO= PSEUDO;
00260     this->model= model;
00261     this->p_observations=NULL;
00262     this->reused_caches=false;
00263 
00264     this->alpha_cache.table=NULL;
00265     this->beta_cache.table=NULL;
00266     this->alpha_cache.dimension=0;
00267     this->beta_cache.dimension=0;
00268 #ifndef NOVIT
00269     this->states_per_observation_psi=NULL ;
00270     this->path=NULL;
00271 #endif //NOVIT
00272     arrayN1=NULL ;
00273     arrayN2=NULL ;
00274 
00275     this->loglikelihood=false;
00276     mem_initialized = true ;
00277 
00278     trans_list_forward_cnt=NULL ;
00279     trans_list_len = N ;
00280     trans_list_forward = new T_STATES*[N] ;
00281     trans_list_forward_val = new float64_t*[N] ;
00282     trans_list_forward_cnt = new T_STATES[N] ;
00283     
00284     int32_t start_idx=0;
00285     for (int32_t j=0; j<N; j++)
00286     {
00287         int32_t old_start_idx=start_idx;
00288 
00289         while (start_idx<num_trans && a_trans[start_idx+num_trans]==j)
00290         {
00291             start_idx++;
00292             
00293             if (start_idx>1 && start_idx<num_trans)
00294                 ASSERT(a_trans[start_idx+num_trans-1]<=
00295                     a_trans[start_idx+num_trans]);
00296         }
00297         
00298         if (start_idx>1 && start_idx<num_trans)
00299             ASSERT(a_trans[start_idx+num_trans-1]<=
00300                 a_trans[start_idx+num_trans]);
00301         
00302         int32_t len=start_idx-old_start_idx;
00303         ASSERT(len>=0);
00304 
00305         trans_list_forward_cnt[j] = 0 ;
00306         
00307         if (len>0)
00308         {
00309             trans_list_forward[j]     = new T_STATES[len] ;
00310             trans_list_forward_val[j] = new float64_t[len] ;
00311         }
00312         else
00313         {
00314             trans_list_forward[j]     = NULL;
00315             trans_list_forward_val[j] = NULL;
00316         }
00317     }
00318 
00319     for (int32_t i=0; i<num_trans; i++)
00320     {
00321         int32_t from = (int32_t)a_trans[i+num_trans] ;
00322         int32_t to   = (int32_t)a_trans[i] ;
00323         float64_t val = a_trans[i+num_trans*2] ;
00324         
00325         ASSERT(from>=0 && from<N);
00326         ASSERT(to>=0 && to<N);
00327 
00328         trans_list_forward[from][trans_list_forward_cnt[from]]=to ;
00329         trans_list_forward_val[from][trans_list_forward_cnt[from]]=val ;
00330         trans_list_forward_cnt[from]++ ;
00331         //ASSERT(trans_list_forward_cnt[from]<3000);
00332     } ;
00333     
00334     transition_matrix_a=NULL ;
00335     observation_matrix_b=NULL ;
00336     initial_state_distribution_p=p ;
00337     end_state_distribution_q=q ;
00338     transition_matrix_A=NULL ;
00339     observation_matrix_B=NULL ;
00340 
00341 //  this->invalidate_model();
00342 }
00343 
00344 
00345 CHMM::CHMM(FILE* model_file, float64_t p_PSEUDO)
00346 : CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
00347 {
00348     SG_INFO( "hmm is using %i separate tables\n",  parallel.get_num_threads()) ;
00349 
00350     status=initialize(NULL, p_PSEUDO, model_file);
00351 }
00352 
00353 CHMM::~CHMM()
00354 {
00355     SG_UNREF(p_observations);
00356 
00357     if (trans_list_forward_cnt)
00358       delete[] trans_list_forward_cnt ;
00359     if (trans_list_backward_cnt)
00360         delete[] trans_list_backward_cnt ;
00361     if (trans_list_forward)
00362     {
00363         for (int32_t i=0; i<trans_list_len; i++)
00364             if (trans_list_forward[i])
00365                 delete[] trans_list_forward[i] ;
00366         delete[] trans_list_forward ;
00367     }
00368     if (trans_list_forward_val)
00369     {
00370         for (int32_t i=0; i<trans_list_len; i++)
00371             if (trans_list_forward_val[i])
00372                 delete[] trans_list_forward_val[i] ;
00373         delete[] trans_list_forward_val ;
00374     }
00375     if (trans_list_backward)
00376       {
00377         for (int32_t i=0; i<trans_list_len; i++)
00378           if (trans_list_backward[i])
00379         delete[] trans_list_backward[i] ;
00380         delete[] trans_list_backward ;
00381       } ;
00382 
00383     free_state_dependend_arrays();
00384 
00385     if (!reused_caches)
00386     {
00387 #ifdef USE_HMMPARALLEL_STRUCTURES
00388         for (int32_t i=0; i<parallel.get_num_threads(); i++)
00389         {
00390             delete[] alpha_cache[i].table;
00391             delete[] beta_cache[i].table;
00392             alpha_cache[i].table=NULL;
00393             beta_cache[i].table=NULL;
00394         }
00395 
00396         delete[] alpha_cache;
00397         delete[] beta_cache;
00398         alpha_cache=NULL;
00399         beta_cache=NULL;
00400 #else // USE_HMMPARALLEL_STRUCTURES
00401         delete[] alpha_cache.table;
00402         delete[] beta_cache.table;
00403         alpha_cache.table=NULL;
00404         beta_cache.table=NULL;
00405 #endif // USE_HMMPARALLEL_STRUCTURES
00406 
00407 #ifndef NOVIT
00408         delete[] states_per_observation_psi;
00409         states_per_observation_psi=NULL;
00410 #endif // NOVIT
00411     }
00412 
00413 #ifdef USE_LOGSUMARRAY
00414 #ifdef USE_HMMPARALLEL_STRUCTURES
00415     {
00416         for (int32_t i=0; i<parallel.get_num_threads(); i++)
00417             delete[] arrayS[i];
00418         delete[] arrayS ;
00419     } ;
00420 #else //USE_HMMPARALLEL_STRUCTURES
00421     delete[] arrayS;
00422 #endif //USE_HMMPARALLEL_STRUCTURES
00423 #endif //USE_LOGSUMARRAY
00424 
00425     if (!reused_caches)
00426     {
00427 #ifndef NOVIT
00428 #ifdef USE_HMMPARALLEL_STRUCTURES
00429         delete[] path_prob_updated ;
00430         delete[] path_prob_dimension ;
00431         for (int32_t i=0; i<parallel.get_num_threads(); i++)
00432             delete[] path[i] ;
00433 #endif //USE_HMMPARALLEL_STRUCTURES
00434         delete[] path;
00435 #endif
00436     }
00437 }
00438 
00439 bool CHMM::alloc_state_dependend_arrays()
00440 {
00441 
00442     if (!transition_matrix_a && !observation_matrix_b &&
00443         !initial_state_distribution_p && !end_state_distribution_q)
00444     {
00445         transition_matrix_a=new float64_t[N*N];
00446         observation_matrix_b=new float64_t[N*M];    
00447         initial_state_distribution_p=new float64_t[N];
00448         end_state_distribution_q=new float64_t[N];
00449         init_model_random();
00450         convert_to_log();
00451     }
00452 
00453 #ifdef USE_HMMPARALLEL_STRUCTURES
00454     for (int32_t i=0; i<parallel.get_num_threads(); i++)
00455     {
00456         arrayN1[i]=new float64_t[N];
00457         arrayN2[i]=new float64_t[N];
00458     }
00459 #else //USE_HMMPARALLEL_STRUCTURES
00460     arrayN1=new float64_t[N];
00461     arrayN2=new float64_t[N];
00462 #endif //USE_HMMPARALLEL_STRUCTURES
00463 
00464 #ifdef LOG_SUMARRAY
00465 #ifdef USE_HMMPARALLEL_STRUCTURES
00466     for (int32_t i=0; i<parallel.get_num_threads(); i++)
00467         arrayS[i]=new float64_t[(int32_t)(this->N/2+1)];
00468 #else //USE_HMMPARALLEL_STRUCTURES
00469     arrayS=new float64_t[(int32_t)(this->N/2+1)];
00470 #endif //USE_HMMPARALLEL_STRUCTURES
00471 #endif //LOG_SUMARRAY
00472     transition_matrix_A=new float64_t[this->N*this->N];
00473     observation_matrix_B=new float64_t[this->N*this->M];
00474 
00475     if (p_observations)
00476     {
00477 #ifdef USE_HMMPARALLEL_STRUCTURES
00478         if (alpha_cache[0].table!=NULL)
00479 #else //USE_HMMPARALLEL_STRUCTURES
00480         if (alpha_cache.table!=NULL)
00481 #endif //USE_HMMPARALLEL_STRUCTURES
00482             set_observations(p_observations);
00483         else
00484             set_observation_nocache(p_observations);
00485     }
00486 
00487     this->invalidate_model();
00488 
00489     return ((transition_matrix_A != NULL) && (observation_matrix_B != NULL) &&
00490             (transition_matrix_a != NULL) && (observation_matrix_b != NULL) &&
00491             (initial_state_distribution_p != NULL) &&
00492             (end_state_distribution_q != NULL));
00493 }
00494 
00495 void CHMM::free_state_dependend_arrays()
00496 {
00497 #ifdef USE_HMMPARALLEL_STRUCTURES
00498     for (int32_t i=0; i<parallel.get_num_threads(); i++)
00499     {
00500         delete[] arrayN1[i];
00501         delete[] arrayN2[i];
00502 
00503         arrayN1[i]=NULL;
00504         arrayN2[i]=NULL;
00505     }
00506 #else
00507     delete[] arrayN1;
00508     delete[] arrayN2;
00509     arrayN1=NULL;
00510     arrayN2=NULL;
00511 #endif
00512     if (observation_matrix_b)
00513     {
00514         delete[] transition_matrix_A;
00515         delete[] observation_matrix_B;
00516         delete[] transition_matrix_a;
00517         delete[] observation_matrix_b;
00518         delete[] initial_state_distribution_p;
00519         delete[] end_state_distribution_q;
00520     } ;
00521     
00522     transition_matrix_A=NULL;
00523     observation_matrix_B=NULL;
00524     transition_matrix_a=NULL;
00525     observation_matrix_b=NULL;
00526     initial_state_distribution_p=NULL;
00527     end_state_distribution_q=NULL;
00528 }
00529 
00530 bool CHMM::initialize(CModel* m, float64_t pseudo, FILE* modelfile)
00531 {
00532     //yes optimistic
00533     bool files_ok=true;
00534 
00535     trans_list_forward = NULL ;
00536     trans_list_forward_cnt = NULL ;
00537     trans_list_forward_val = NULL ;
00538     trans_list_backward = NULL ;
00539     trans_list_backward_cnt = NULL ;
00540     trans_list_len = 0 ;
00541     mem_initialized = false ;
00542 
00543     this->transition_matrix_a=NULL;
00544     this->observation_matrix_b=NULL;
00545     this->initial_state_distribution_p=NULL;
00546     this->end_state_distribution_q=NULL;
00547     this->PSEUDO= pseudo;
00548     this->model= m;
00549     this->p_observations=NULL;
00550     this->reused_caches=false;
00551 
00552 #ifdef USE_HMMPARALLEL_STRUCTURES
00553     alpha_cache=new T_ALPHA_BETA[parallel.get_num_threads()] ;
00554     beta_cache=new T_ALPHA_BETA[parallel.get_num_threads()] ;
00555     states_per_observation_psi=new P_STATES[parallel.get_num_threads()] ;
00556 
00557     for (int32_t i=0; i<parallel.get_num_threads(); i++)
00558     {
00559         this->alpha_cache[i].table=NULL;
00560         this->beta_cache[i].table=NULL;
00561         this->alpha_cache[i].dimension=0;
00562         this->beta_cache[i].dimension=0;
00563 #ifndef NOVIT
00564         this->states_per_observation_psi[i]=NULL ;
00565 #endif // NOVIT
00566     }
00567 
00568 #else // USE_HMMPARALLEL_STRUCTURES
00569     this->alpha_cache.table=NULL;
00570     this->beta_cache.table=NULL;
00571     this->alpha_cache.dimension=0;
00572     this->beta_cache.dimension=0;
00573 #ifndef NOVIT
00574     this->states_per_observation_psi=NULL ;
00575 #endif //NOVIT
00576 
00577 #endif //USE_HMMPARALLEL_STRUCTURES
00578 
00579 
00580     if (modelfile)
00581         files_ok= files_ok && load_model(modelfile);
00582 
00583 #ifdef USE_HMMPARALLEL_STRUCTURES
00584     path_prob_updated=new bool[parallel.get_num_threads()];
00585     path_prob_dimension=new int[parallel.get_num_threads()];
00586 
00587     path=new P_STATES[parallel.get_num_threads()];
00588 
00589     for (int32_t i=0; i<parallel.get_num_threads(); i++)
00590     {
00591 #ifndef NOVIT
00592         this->path[i]=NULL;
00593 #endif // NOVIT
00594     }
00595 #else // USE_HMMPARALLEL_STRUCTURES
00596 #ifndef NOVIT
00597     this->path=NULL;
00598 #endif //NOVIT
00599 
00600 #endif //USE_HMMPARALLEL_STRUCTURES
00601 
00602 #ifdef USE_HMMPARALLEL_STRUCTURES
00603     arrayN1=new P_float64_t[parallel.get_num_threads()];
00604     arrayN2=new P_float64_t[parallel.get_num_threads()];
00605 #endif //USE_HMMPARALLEL_STRUCTURES
00606 
00607 #ifdef LOG_SUMARRAY
00608 #ifdef USE_HMMPARALLEL_STRUCTURES
00609     arrayS=new P_float64_t[parallel.get_num_threads()] ;      
00610 #endif // USE_HMMPARALLEL_STRUCTURES
00611 #endif //LOG_SUMARRAY
00612 
00613     alloc_state_dependend_arrays();
00614 
00615     this->loglikelihood=false;
00616     mem_initialized = true ;
00617     this->invalidate_model();
00618 
00619     return  ((files_ok) &&
00620             (transition_matrix_A != NULL) && (observation_matrix_B != NULL) && 
00621             (transition_matrix_a != NULL) && (observation_matrix_b != NULL) && (initial_state_distribution_p != NULL) &&
00622             (end_state_distribution_q != NULL));
00623 }
00624 
00625 //------------------------------------------------------------------------------------//
00626 
00627 //forward algorithm
00628 //calculates Pr[O_0,O_1, ..., O_t, q_time=S_i| lambda] for 0<= time <= T-1
00629 //Pr[O|lambda] for time > T
00630 float64_t CHMM::forward_comp(int32_t time, int32_t state, int32_t dimension)
00631 {
00632   T_ALPHA_BETA_TABLE* alpha_new;
00633   T_ALPHA_BETA_TABLE* alpha;
00634   T_ALPHA_BETA_TABLE* dummy;
00635   if (time<1)
00636     time=0;
00637 
00638 
00639   int32_t wanted_time=time;
00640   
00641   if (ALPHA_CACHE(dimension).table)
00642     {
00643       alpha=&ALPHA_CACHE(dimension).table[0];
00644       alpha_new=&ALPHA_CACHE(dimension).table[N];
00645       time=p_observations->get_vector_length(dimension)+1;
00646     }
00647   else
00648     {
00649       alpha_new=(T_ALPHA_BETA_TABLE*)ARRAYN1(dimension);
00650       alpha=(T_ALPHA_BETA_TABLE*)ARRAYN2(dimension);
00651     }
00652   
00653   if (time<1)
00654     return get_p(state) + get_b(state, p_observations->get_feature(dimension,0));
00655   else
00656     {
00657       //initialization  alpha_1(i)=p_i*b_i(O_1)
00658       for (int32_t i=0; i<N; i++)
00659     alpha[i] = get_p(i) + get_b(i, p_observations->get_feature(dimension,0)) ;
00660       
00661       //induction       alpha_t+1(j) = (sum_i=1^N alpha_t(i)a_ij) b_j(O_t+1)
00662       for (register int32_t t=1; t<time && t < p_observations->get_vector_length(dimension); t++)
00663     {
00664       
00665       for (int32_t j=0; j<N; j++)
00666         {
00667           register int32_t i, num = trans_list_forward_cnt[j] ;
00668           float64_t sum=-CMath::INFTY;  
00669           for (i=0; i < num; i++)
00670         {
00671           int32_t ii = trans_list_forward[j][i] ;
00672           sum = CMath::logarithmic_sum(sum, alpha[ii] + get_a(ii,j));
00673         } ;
00674           
00675           alpha_new[j]= sum + get_b(j, p_observations->get_feature(dimension,t));
00676         }
00677       
00678       if (!ALPHA_CACHE(dimension).table)
00679         {
00680           dummy=alpha;
00681           alpha=alpha_new;
00682           alpha_new=dummy;  //switch alpha/alpha_new
00683         }
00684       else
00685         {
00686           alpha=alpha_new;
00687           alpha_new+=N;     //perversely pointer arithmetic
00688         }
00689     }
00690       
00691       
00692       if (time<p_observations->get_vector_length(dimension))
00693     {
00694       register int32_t i, num=trans_list_forward_cnt[state];
00695       register float64_t sum=-CMath::INFTY; 
00696       for (i=0; i<num; i++)
00697         {
00698           int32_t ii = trans_list_forward[state][i] ;
00699           sum= CMath::logarithmic_sum(sum, alpha[ii] + get_a(ii, state));
00700         } ;
00701       
00702       return sum + get_b(state, p_observations->get_feature(dimension,time));
00703     }
00704       else
00705     {
00706       // termination
00707       register int32_t i ; 
00708       float64_t sum ; 
00709       sum=-CMath::INFTY; 
00710       for (i=0; i<N; i++)                                 //sum over all paths
00711         sum=CMath::logarithmic_sum(sum, alpha[i] + get_q(i));     //to get model probability
00712       
00713       if (!ALPHA_CACHE(dimension).table)
00714         return sum;
00715       else
00716         {
00717           ALPHA_CACHE(dimension).dimension=dimension;
00718           ALPHA_CACHE(dimension).updated=true;
00719           ALPHA_CACHE(dimension).sum=sum;
00720           
00721           if (wanted_time<p_observations->get_vector_length(dimension))
00722         return ALPHA_CACHE(dimension).table[wanted_time*N+state];
00723           else
00724         return ALPHA_CACHE(dimension).sum;
00725         }
00726     }
00727     }
00728 }
00729 
00730 
00731 //forward algorithm
00732 //calculates Pr[O_0,O_1, ..., O_t, q_time=S_i| lambda] for 0<= time <= T-1
00733 //Pr[O|lambda] for time > T
00734 float64_t CHMM::forward_comp_old(int32_t time, int32_t state, int32_t dimension)
00735 {
00736     T_ALPHA_BETA_TABLE* alpha_new;
00737     T_ALPHA_BETA_TABLE* alpha;
00738     T_ALPHA_BETA_TABLE* dummy;
00739     if (time<1)
00740         time=0;
00741 
00742     int32_t wanted_time=time;
00743 
00744     if (ALPHA_CACHE(dimension).table)
00745     {
00746         alpha=&ALPHA_CACHE(dimension).table[0];
00747         alpha_new=&ALPHA_CACHE(dimension).table[N];
00748         time=p_observations->get_vector_length(dimension)+1;
00749     }
00750     else
00751     {
00752         alpha_new=(T_ALPHA_BETA_TABLE*)ARRAYN1(dimension);
00753         alpha=(T_ALPHA_BETA_TABLE*)ARRAYN2(dimension);
00754     }
00755 
00756     if (time<1)
00757         return get_p(state) + get_b(state, p_observations->get_feature(dimension,0));
00758     else
00759     {
00760         //initialization    alpha_1(i)=p_i*b_i(O_1)
00761         for (int32_t i=0; i<N; i++)
00762             alpha[i] = get_p(i) + get_b(i, p_observations->get_feature(dimension,0)) ;
00763 
00764         //induction     alpha_t+1(j) = (sum_i=1^N alpha_t(i)a_ij) b_j(O_t+1)
00765         for (register int32_t t=1; t<time && t < p_observations->get_vector_length(dimension); t++)
00766         {
00767 
00768             for (int32_t j=0; j<N; j++)
00769             {
00770                 register int32_t i ;
00771 #ifdef USE_LOGSUMARRAY
00772                 for (i=0; i<(N>>1); i++)
00773                     ARRAYS(dimension)[i]=CMath::logarithmic_sum(alpha[i<<1] + get_a(i<<1,j),
00774                             alpha[(i<<1)+1] + get_a((i<<1)+1,j));
00775                 if (N%2==1) 
00776                     alpha_new[j]=get_b(j, p_observations->get_feature(dimension,t))+
00777                         CMath::logarithmic_sum(alpha[N-1]+get_a(N-1,j), 
00778                                 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
00779                 else
00780                     alpha_new[j]=get_b(j, p_observations->get_feature(dimension,t))+CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
00781 #else //USE_LOGSUMARRAY
00782                 float64_t sum=-CMath::INFTY;  
00783                 for (i=0; i<N; i++)
00784                     sum= CMath::logarithmic_sum(sum, alpha[i] + get_a(i,j));
00785 
00786                 alpha_new[j]= sum + get_b(j, p_observations->get_feature(dimension,t));
00787 #endif //USE_LOGSUMARRAY
00788             }
00789 
00790             if (!ALPHA_CACHE(dimension).table)
00791             {
00792                 dummy=alpha;
00793                 alpha=alpha_new;
00794                 alpha_new=dummy;    //switch alpha/alpha_new
00795             }
00796             else
00797             {
00798                 alpha=alpha_new;
00799                 alpha_new+=N;       //perversely pointer arithmetic
00800             }
00801         }
00802 
00803 
00804         if (time<p_observations->get_vector_length(dimension))
00805         {
00806             register int32_t i;
00807 #ifdef USE_LOGSUMARRAY
00808             for (i=0; i<(N>>1); i++)
00809                 ARRAYS(dimension)[i]=CMath::logarithmic_sum(alpha[i<<1] + get_a(i<<1,state),
00810                         alpha[(i<<1)+1] + get_a((i<<1)+1,state));
00811             if (N%2==1) 
00812                 return get_b(state, p_observations->get_feature(dimension,time))+
00813                     CMath::logarithmic_sum(alpha[N-1]+get_a(N-1,state), 
00814                             CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
00815             else
00816                 return get_b(state, p_observations->get_feature(dimension,time))+CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
00817 #else //USE_LOGSUMARRAY
00818             register float64_t sum=-CMath::INFTY; 
00819             for (i=0; i<N; i++)
00820                 sum= CMath::logarithmic_sum(sum, alpha[i] + get_a(i, state));
00821 
00822             return sum + get_b(state, p_observations->get_feature(dimension,time));
00823 #endif //USE_LOGSUMARRAY
00824         }
00825         else
00826         {
00827             // termination
00828             register int32_t i ; 
00829             float64_t sum ; 
00830 #ifdef USE_LOGSUMARRAY
00831             for (i=0; i<(N>>1); i++)
00832                 ARRAYS(dimension)[i]=CMath::logarithmic_sum(alpha[i<<1] + get_q(i<<1),
00833                         alpha[(i<<1)+1] + get_q((i<<1)+1));
00834             if (N%2==1) 
00835                 sum=CMath::logarithmic_sum(alpha[N-1]+get_q(N-1), 
00836                         CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
00837             else
00838                 sum=CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
00839 #else //USE_LOGSUMARRAY
00840             sum=-CMath::INFTY; 
00841             for (i=0; i<N; i++)                               //sum over all paths
00842                 sum=CMath::logarithmic_sum(sum, alpha[i] + get_q(i));     //to get model probability
00843 #endif //USE_LOGSUMARRAY
00844 
00845             if (!ALPHA_CACHE(dimension).table)
00846                 return sum;
00847             else
00848             {
00849                 ALPHA_CACHE(dimension).dimension=dimension;
00850                 ALPHA_CACHE(dimension).updated=true;
00851                 ALPHA_CACHE(dimension).sum=sum;
00852 
00853                 if (wanted_time<p_observations->get_vector_length(dimension))
00854                     return ALPHA_CACHE(dimension).table[wanted_time*N+state];
00855                 else
00856                     return ALPHA_CACHE(dimension).sum;
00857             }
00858         }
00859     }
00860 }
00861 
00862 
00863 //backward algorithm
00864 //calculates Pr[O_t+1,O_t+2, ..., O_T| q_time=S_i, lambda] for 0<= time <= T-1
00865 //Pr[O|lambda] for time >= T
00866 float64_t CHMM::backward_comp(int32_t time, int32_t state, int32_t dimension)
00867 {
00868   T_ALPHA_BETA_TABLE* beta_new;
00869   T_ALPHA_BETA_TABLE* beta;
00870   T_ALPHA_BETA_TABLE* dummy;
00871   int32_t wanted_time=time;
00872   
00873   if (time<0)
00874     forward(time, state, dimension);
00875   
00876   if (BETA_CACHE(dimension).table)
00877     {
00878       beta=&BETA_CACHE(dimension).table[N*(p_observations->get_vector_length(dimension)-1)];
00879       beta_new=&BETA_CACHE(dimension).table[N*(p_observations->get_vector_length(dimension)-2)];
00880       time=-1;
00881     }
00882   else
00883     {
00884       beta_new=(T_ALPHA_BETA_TABLE*)ARRAYN1(dimension);
00885       beta=(T_ALPHA_BETA_TABLE*)ARRAYN2(dimension);
00886     }
00887   
00888   if (time>=p_observations->get_vector_length(dimension)-1)
00889     //    return 0;
00890     //  else if (time==p_observations->get_vector_length(dimension)-1)
00891     return get_q(state);
00892   else
00893     {
00894       //initialization  beta_T(i)=q(i)
00895       for (register int32_t i=0; i<N; i++)
00896     beta[i]=get_q(i);
00897       
00898       //induction       beta_t(i) = (sum_j=1^N a_ij*b_j(O_t+1)*beta_t+1(j)
00899       for (register int32_t t=p_observations->get_vector_length(dimension)-1; t>time+1 && t>0; t--)
00900     {
00901       for (register int32_t i=0; i<N; i++)
00902         {
00903           register int32_t j, num=trans_list_backward_cnt[i] ;
00904           float64_t sum=-CMath::INFTY; 
00905           for (j=0; j<num; j++)
00906         {
00907           int32_t jj = trans_list_backward[i][j] ;
00908           sum= CMath::logarithmic_sum(sum, get_a(i, jj) + get_b(jj, p_observations->get_feature(dimension,t)) + beta[jj]);
00909         } ;
00910           beta_new[i]=sum;
00911         }
00912       
00913       if (!BETA_CACHE(dimension).table)
00914         {
00915           dummy=beta;
00916           beta=beta_new;
00917           beta_new=dummy;   //switch beta/beta_new
00918         }
00919       else
00920         {
00921           beta=beta_new;
00922           beta_new-=N;      //perversely pointer arithmetic
00923         }
00924     }
00925       
00926       if (time>=0)
00927     {
00928       register int32_t j, num=trans_list_backward_cnt[state] ;
00929       float64_t sum=-CMath::INFTY; 
00930       for (j=0; j<num; j++)
00931         {
00932           int32_t jj = trans_list_backward[state][j] ;
00933           sum= CMath::logarithmic_sum(sum, get_a(state, jj) + get_b(jj, p_observations->get_feature(dimension,time+1))+beta[jj]);
00934         } ;
00935       return sum;
00936     }
00937       else // time<0
00938     {
00939       if (BETA_CACHE(dimension).table)
00940         {
00941           float64_t sum=-CMath::INFTY; 
00942           for (register int32_t j=0; j<N; j++)
00943         sum= CMath::logarithmic_sum(sum, get_p(j) + get_b(j, p_observations->get_feature(dimension,0))+beta[j]);
00944           BETA_CACHE(dimension).sum=sum;
00945           BETA_CACHE(dimension).dimension=dimension;
00946           BETA_CACHE(dimension).updated=true;
00947           
00948           if (wanted_time<p_observations->get_vector_length(dimension))
00949         return BETA_CACHE(dimension).table[wanted_time*N+state];
00950           else
00951         return BETA_CACHE(dimension).sum;
00952         }
00953       else
00954         {
00955           float64_t sum=-CMath::INFTY; // apply LOG_SUM_ARRAY -- no cache ... does not make very sense anyway...
00956           for (register int32_t j=0; j<N; j++)
00957         sum= CMath::logarithmic_sum(sum, get_p(j) + get_b(j, p_observations->get_feature(dimension,0))+beta[j]);
00958           return sum;
00959         }
00960     }
00961     }
00962 }
00963 
00964 
00965 float64_t CHMM::backward_comp_old(
00966     int32_t time, int32_t state, int32_t dimension)
00967 {
00968     T_ALPHA_BETA_TABLE* beta_new;
00969     T_ALPHA_BETA_TABLE* beta;
00970     T_ALPHA_BETA_TABLE* dummy;
00971     int32_t wanted_time=time;
00972 
00973     if (time<0)
00974         forward(time, state, dimension);
00975 
00976     if (BETA_CACHE(dimension).table)
00977     {
00978         beta=&BETA_CACHE(dimension).table[N*(p_observations->get_vector_length(dimension)-1)];
00979         beta_new=&BETA_CACHE(dimension).table[N*(p_observations->get_vector_length(dimension)-2)];
00980         time=-1;
00981     }
00982     else
00983     {
00984         beta_new=(T_ALPHA_BETA_TABLE*)ARRAYN1(dimension);
00985         beta=(T_ALPHA_BETA_TABLE*)ARRAYN2(dimension);
00986     }
00987 
00988     if (time>=p_observations->get_vector_length(dimension)-1)
00989         //    return 0;
00990         //  else if (time==p_observations->get_vector_length(dimension)-1)
00991         return get_q(state);
00992     else
00993     {
00994         //initialization    beta_T(i)=q(i)
00995         for (register int32_t i=0; i<N; i++)
00996             beta[i]=get_q(i);
00997 
00998         //induction     beta_t(i) = (sum_j=1^N a_ij*b_j(O_t+1)*beta_t+1(j)
00999         for (register int32_t t=p_observations->get_vector_length(dimension)-1; t>time+1 && t>0; t--)
01000         {
01001             for (register int32_t i=0; i<N; i++)
01002             {
01003                 register int32_t j ;
01004 #ifdef USE_LOGSUMARRAY
01005                 for (j=0; j<(N>>1); j++)
01006                     ARRAYS(dimension)[j]=CMath::logarithmic_sum(
01007                             get_a(i, j<<1) + get_b(j<<1, p_observations->get_feature(dimension,t)) + beta[j<<1],
01008                             get_a(i, (j<<1)+1) + get_b((j<<1)+1, p_observations->get_feature(dimension,t)) + beta[(j<<1)+1]);
01009                 if (N%2==1) 
01010                     beta_new[i]=CMath::logarithmic_sum(get_a(i, N-1) + get_b(N-1, p_observations->get_feature(dimension,t)) + beta[N-1], 
01011                             CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
01012                 else
01013                     beta_new[i]=CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
01014 #else //USE_LOGSUMARRAY             
01015                 float64_t sum=-CMath::INFTY; 
01016                 for (j=0; j<N; j++)
01017                     sum= CMath::logarithmic_sum(sum, get_a(i, j) + get_b(j, p_observations->get_feature(dimension,t)) + beta[j]);
01018 
01019                 beta_new[i]=sum;
01020 #endif //USE_LOGSUMARRAY
01021             }
01022 
01023             if (!BETA_CACHE(dimension).table)
01024             {
01025                 dummy=beta;
01026                 beta=beta_new;
01027                 beta_new=dummy; //switch beta/beta_new
01028             }
01029             else
01030             {
01031                 beta=beta_new;
01032                 beta_new-=N;        //perversely pointer arithmetic
01033             }
01034         }
01035 
01036         if (time>=0)
01037         {
01038             register int32_t j ;
01039 #ifdef USE_LOGSUMARRAY
01040             for (j=0; j<(N>>1); j++)
01041                 ARRAYS(dimension)[j]=CMath::logarithmic_sum(
01042                         get_a(state, j<<1) + get_b(j<<1, p_observations->get_feature(dimension,time+1)) + beta[j<<1],
01043                         get_a(state, (j<<1)+1) + get_b((j<<1)+1, p_observations->get_feature(dimension,time+1)) + beta[(j<<1)+1]);
01044             if (N%2==1) 
01045                 return CMath::logarithmic_sum(get_a(state, N-1) + get_b(N-1, p_observations->get_feature(dimension,time+1)) + beta[N-1], 
01046                         CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
01047             else
01048                 return CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
01049 #else //USE_LOGSUMARRAY
01050             float64_t sum=-CMath::INFTY; 
01051             for (j=0; j<N; j++)
01052                 sum= CMath::logarithmic_sum(sum, get_a(state, j) + get_b(j, p_observations->get_feature(dimension,time+1))+beta[j]);
01053 
01054             return sum;
01055 #endif //USE_LOGSUMARRAY
01056         }
01057         else // time<0
01058         {
01059             if (BETA_CACHE(dimension).table)
01060             {
01061 #ifdef USE_LOGSUMARRAY//AAA
01062                 for (int32_t j=0; j<(N>>1); j++)
01063                     ARRAYS(dimension)[j]=CMath::logarithmic_sum(get_p(j<<1) + get_b(j<<1, p_observations->get_feature(dimension,0))+beta[j<<1],
01064                             get_p((j<<1)+1) + get_b((j<<1)+1, p_observations->get_feature(dimension,0))+beta[(j<<1)+1]) ;
01065                 if (N%2==1) 
01066                     BETA_CACHE(dimension).sum=CMath::logarithmic_sum(get_p(N-1) + get_b(N-1, p_observations->get_feature(dimension,0))+beta[N-1],
01067                             CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
01068                 else
01069                     BETA_CACHE(dimension).sum=CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
01070 #else //USE_LOGSUMARRAY
01071                 float64_t sum=-CMath::INFTY; 
01072                 for (register int32_t j=0; j<N; j++)
01073                     sum= CMath::logarithmic_sum(sum, get_p(j) + get_b(j, p_observations->get_feature(dimension,0))+beta[j]);
01074                 BETA_CACHE(dimension).sum=sum;
01075 #endif //USE_LOGSUMARRAY
01076                 BETA_CACHE(dimension).dimension=dimension;
01077                 BETA_CACHE(dimension).updated=true;
01078 
01079                 if (wanted_time<p_observations->get_vector_length(dimension))
01080                     return BETA_CACHE(dimension).table[wanted_time*N+state];
01081                 else
01082                     return BETA_CACHE(dimension).sum;
01083             }
01084             else
01085             {
01086                 float64_t sum=-CMath::INFTY; // apply LOG_SUM_ARRAY -- no cache ... does not make very sense anyway...
01087                 for (register int32_t j=0; j<N; j++)
01088                     sum= CMath::logarithmic_sum(sum, get_p(j) + get_b(j, p_observations->get_feature(dimension,0))+beta[j]);
01089                 return sum;
01090             }
01091         }
01092     }
01093 }
01094 
01095 #ifndef NOVIT
01096 //calculates probability  of best path through the model lambda AND path itself
01097 //using viterbi algorithm
01098 float64_t CHMM::best_path(int32_t dimension)
01099 {
01100     if (!p_observations)
01101         return -1;
01102 
01103     if (dimension==-1) 
01104     {
01105         if (!all_path_prob_updated)
01106         {
01107             SG_INFO( "computing full viterbi likelihood\n") ;
01108             float64_t sum = 0 ;
01109             for (int32_t i=0; i<p_observations->get_num_vectors(); i++)
01110                 sum+=best_path(i) ;
01111             sum /= p_observations->get_num_vectors() ;
01112             all_pat_prob=sum ;
01113             all_path_prob_updated=true ;
01114             return sum ;
01115         } else
01116             return all_pat_prob ;
01117     } ;
01118 
01119     if (!STATES_PER_OBSERVATION_PSI(dimension))
01120         return -1 ;
01121 
01122     int32_t len=0;
01123     if (!p_observations->get_feature_vector(dimension,len))
01124         return -1;
01125 
01126     if (PATH_PROB_UPDATED(dimension) && dimension==PATH_PROB_DIMENSION(dimension))
01127         return pat_prob;
01128     else
01129     {
01130         register float64_t* delta= ARRAYN2(dimension);
01131         register float64_t* delta_new= ARRAYN1(dimension);
01132 
01133         { //initialization
01134             for (register int32_t i=0; i<N; i++)
01135             {
01136                 delta[i]=get_p(i)+get_b(i, p_observations->get_feature(dimension,0));
01137                 set_psi(0, i, 0, dimension);
01138             }
01139         } 
01140 
01141 #ifdef USE_PATHDEBUG
01142         float64_t worst=-CMath::INFTY/4 ;
01143 #endif
01144         //recursion
01145         for (register int32_t t=1; t<p_observations->get_vector_length(dimension); t++)
01146         {
01147             register float64_t* dummy;
01148             register int32_t NN=N ;
01149             for (register int32_t j=0; j<NN; j++)
01150             {
01151                 register float64_t * matrix_a=&transition_matrix_a[j*N] ; // sorry for that
01152                 register float64_t maxj=delta[0] + matrix_a[0];
01153                 register int32_t argmax=0;
01154 
01155                 for (register int32_t i=1; i<NN; i++)
01156                 {
01157                     register float64_t temp = delta[i] + matrix_a[i];
01158 
01159                     if (temp>maxj)
01160                     {
01161                         maxj=temp;
01162                         argmax=i;
01163                     }
01164                 }
01165 #ifdef FIX_POS
01166                 if ((!model) || (model->get_fix_pos_state(t,j,NN)!=CModel::FIX_DISALLOWED))
01167 #endif
01168                     delta_new[j]=maxj + get_b(j,p_observations->get_feature(dimension,t));
01169 #ifdef FIX_POS
01170                 else
01171                     delta_new[j]=maxj + get_b(j,p_observations->get_feature(dimension,t)) + CModel::DISALLOWED_PENALTY;
01172 #endif            
01173                 set_psi(t, j, argmax, dimension);
01174             }
01175 
01176 #ifdef USE_PATHDEBUG
01177             float64_t best=log(0) ;
01178             for (int32_t jj=0; jj<N; jj++)
01179                 if (delta_new[jj]>best)
01180                     best=delta_new[jj] ;
01181 
01182             if (best<-CMath::INFTY/2)
01183             {
01184                 SG_DEBUG( "worst case at %i: %e:%e\n", t, best, worst) ;
01185                 worst=best ;
01186             } ;
01187 #endif      
01188 
01189             dummy=delta;
01190             delta=delta_new;
01191             delta_new=dummy;    //switch delta/delta_new
01192         }
01193 
01194 
01195         { //termination
01196             register float64_t maxj=delta[0]+get_q(0);
01197             register int32_t argmax=0;
01198 
01199             for (register int32_t i=1; i<N; i++)
01200             {
01201                 register float64_t temp=delta[i]+get_q(i);
01202 
01203                 if (temp>maxj)
01204                 {
01205                     maxj=temp;
01206                     argmax=i;
01207                 }
01208             }
01209             pat_prob=maxj;
01210             PATH(dimension)[p_observations->get_vector_length(dimension)-1]=argmax;
01211         } ;
01212 
01213 
01214         { //state sequence backtracking
01215             for (register int32_t t=p_observations->get_vector_length(dimension)-1; t>0; t--)
01216             {
01217                 PATH(dimension)[t-1]=get_psi(t, PATH(dimension)[t], dimension);
01218             }
01219         }
01220         PATH_PROB_UPDATED(dimension)=true;
01221         PATH_PROB_DIMENSION(dimension)=dimension;
01222         return pat_prob ;
01223     }
01224 }
01225 
01226 
01227 #endif // NOVIT
01228 
01229 #ifndef USE_HMMPARALLEL
01230 float64_t CHMM::model_probability_comp()
01231 {
01232     //for faster calculation cache model probability
01233     mod_prob=0 ;
01234     for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++) //sum in log space
01235         mod_prob+=forward(p_observations->get_vector_length(dim), 0, dim);
01236 
01237     mod_prob_updated=true;
01238     return mod_prob;
01239 }
01240 
01241 #else
01242 
01243 float64_t CHMM::model_probability_comp() 
01244 {
01245     pthread_t *threads=new pthread_t[parallel.get_num_threads()];
01246     S_MODEL_PROB_PARAM *params=new S_MODEL_PROB_PARAM[parallel.get_num_threads()];
01247 
01248     SG_INFO( "computing full model probablity\n");
01249     mod_prob=0;
01250 
01251     for (int32_t cpu=0; cpu<parallel.get_num_threads(); cpu++)
01252     {
01253         params[cpu].hmm=this ;
01254         params[cpu].dim_start= p_observations->get_num_vectors()*cpu/parallel.get_num_threads();
01255         params[cpu].dim_stop= p_observations->get_num_vectors()*(cpu+1)/parallel.get_num_threads();
01256 #ifdef SUNOS
01257         thr_create(NULL,0,bw_dim_prefetch, (void*)&params[cpu], PTHREAD_SCOPE_SYSTEM, &threads[cpu]);
01258 #else // SUNOS
01259         pthread_create(&threads[cpu], NULL, bw_dim_prefetch, (void*)&params[cpu]);
01260 #endif // SUNOS
01261     }
01262 
01263     for (cpu=0; cpu<parallel.get_num_threads(); cpu++)
01264     {
01265         void* ret;
01266         pthread_join(threads[cpu], &ret);
01267         mod_prob+=(float64_t) ret;
01268     }
01269 
01270     delete[] threads;
01271     delete[] params;
01272 
01273     mod_prob_updated=true;
01274     return mod_prob;
01275 }
01276 
01277 void* CHMM::bw_dim_prefetch(void * params)
01278 {
01279     CHMM* hmm=((S_THREAD_PARAM*)params)->hmm ;
01280     int32_t dim=((S_THREAD_PARAM*)params)->dim ;
01281     float64_t*p_buf=((S_THREAD_PARAM*)params)->p_buf ;
01282     float64_t*q_buf=((S_THREAD_PARAM*)params)->q_buf ;
01283     float64_t*a_buf=((S_THREAD_PARAM*)params)->a_buf ;
01284     float64_t*b_buf=((S_THREAD_PARAM*)params)->b_buf ;
01285     ((S_THREAD_PARAM*)params)->ret = hmm->prefetch(dim, true, p_buf, q_buf, a_buf, b_buf) ;
01286     return NULL ;
01287 }
01288 
01289 float64_t CHMM::model_prob_thread(void* params)
01290 {
01291     CHMM* hmm=((S_THREAD_PARAM*)params)->hmm ;
01292     int32_t dim_start=((S_THREAD_PARAM*)params)->dim_start;
01293     int32_t dim_stop=((S_THREAD_PARAM*)params)->dim_stop;
01294 
01295     float64_t prob=0;
01296     for (int32_t dim=dim_start; dim<dim_stop; dim++)
01297         hmm->model_probability(dim);
01298 
01299     ab_buf_comp(p_buf, q_buf, a_buf, b_buf, dim) ;
01300     return modprob ;
01301 } ;
01302 
01303 void* CHMM::bw_dim_prefetch(void * params)
01304 {
01305     CHMM* hmm=((S_THREAD_PARAM*)params)->hmm ;
01306     int32_t dim=((S_THREAD_PARAM*)params)->dim ;
01307     float64_t*p_buf=((S_THREAD_PARAM*)params)->p_buf ;
01308     float64_t*q_buf=((S_THREAD_PARAM*)params)->q_buf ;
01309     float64_t*a_buf=((S_THREAD_PARAM*)params)->a_buf ;
01310     float64_t*b_buf=((S_THREAD_PARAM*)params)->b_buf ;
01311     ((S_THREAD_PARAM*)params)->ret = hmm->prefetch(dim, true, p_buf, q_buf, a_buf, b_buf) ;
01312     return NULL ;
01313 }
01314 
01315 #ifndef NOVIT
01316 void* CHMM::vit_dim_prefetch(void * params)
01317 {
01318     CHMM* hmm=((S_THREAD_PARAM*)params)->hmm ;
01319     int32_t dim=((S_THREAD_PARAM*)params)->dim ;
01320     ((S_THREAD_PARAM*)params)->ret = hmm->prefetch(dim, false) ;
01321     return NULL ;
01322 } ;
01323 #endif // NOVIT
01324 
01325 float64_t CHMM::prefetch(
01326     int32_t dim, bool bw, float64_t* p_buf, float64_t* q_buf, float64_t* a_buf,
01327     float64_t* b_buf)
01328 {
01329     if (bw)
01330     {
01331         forward_comp(p_observations->get_vector_length(dim), N-1, dim) ;
01332         backward_comp(p_observations->get_vector_length(dim), N-1, dim) ;
01333         float64_t modprob=model_probability(dim) ;
01334         ab_buf_comp(p_buf, q_buf, a_buf, b_buf, dim) ;
01335         return modprob ;
01336     } 
01337 #ifndef NOVIT
01338     else
01339         return best_path(dim) ;
01340 #endif // NOVIT
01341 } ;
01342 #endif //USE_HMMPARALLEL
01343 
01344 
01345 #ifdef USE_HMMPARALLEL
01346 
01347 void CHMM::ab_buf_comp(
01348     float64_t* p_buf, float64_t* q_buf, float64_t *a_buf, float64_t* b_buf,
01349     int32_t dim)
01350 {
01351     int32_t i,j,t ;
01352     float64_t a_sum;
01353     float64_t b_sum;
01354     float64_t prob=0;   //model probability for dim
01355 
01356     float64_t dimmodprob=model_probability(dim);
01357 
01358     for (i=0; i<N; i++)
01359     {
01360         //estimate initial+end state distribution numerator
01361         p_buf[i]=CMath::logarithmic_sum(get_p(i), get_p(i)+get_b(i,p_observations->get_feature(dim,0))+backward(0,i,dim) - dimmodprob);
01362         q_buf[i]=CMath::logarithmic_sum(get_q(i), forward(p_observations->get_vector_length(dim)-1, i, dim)+get_q(i) - dimmodprob);
01363 
01364         //estimate a
01365         for (j=0; j<N; j++)
01366         {
01367             a_sum=-CMath::INFTY;
01368 
01369             for (t=0; t<p_observations->get_vector_length(dim)-1; t++) 
01370             {
01371                 a_sum= CMath::logarithmic_sum(a_sum, forward(t,i,dim)+
01372                         get_a(i,j)+get_b(j,p_observations->get_feature(dim,t+1))+backward(t+1,j,dim));
01373             }
01374             a_buf[N*i+j]=a_sum-dimmodprob ;
01375         }
01376 
01377         //estimate b
01378         for (j=0; j<M; j++)
01379         {
01380             b_sum=CMath::ALMOST_NEG_INFTY;
01381 
01382             for (t=0; t<p_observations->get_vector_length(dim); t++) 
01383             {
01384                 if (p_observations->get_feature(dim,t)==j) 
01385                     b_sum=CMath::logarithmic_sum(b_sum, forward(t,i,dim)+backward(t, i, dim));
01386             }
01387 
01388             b_buf[M*i+j]=b_sum-dimmodprob ;
01389         }
01390     } 
01391 }
01392 
01393 //estimates new model lambda out of lambda_train using baum welch algorithm
01394 void CHMM::estimate_model_baum_welch(CHMM* train)
01395 {
01396     int32_t i,j,t,cpu;
01397     float64_t a_sum, b_sum; //numerator
01398     float64_t fullmodprob=0;    //for all dims
01399 
01400     //clear actual model a,b,p,q are used as numerator
01401     for (i=0; i<N; i++)
01402     {
01403       if (train->get_p(i)>CMath::ALMOST_NEG_INFTY)
01404         set_p(i,log(PSEUDO));
01405       else
01406         set_p(i,train->get_p(i));
01407       if (train->get_q(i)>CMath::ALMOST_NEG_INFTY)
01408         set_q(i,log(PSEUDO));
01409       else
01410         set_q(i,train->get_q(i));
01411       
01412       for (j=0; j<N; j++)
01413         if (train->get_a(i,j)>CMath::ALMOST_NEG_INFTY)
01414           set_a(i,j, log(PSEUDO));
01415         else
01416           set_a(i,j,train->get_a(i,j));
01417       for (j=0; j<M; j++)
01418         if (train->get_b(i,j)>CMath::ALMOST_NEG_INFTY)
01419           set_b(i,j, log(PSEUDO));
01420         else
01421           set_b(i,j,train->get_b(i,j));
01422     }
01423     
01424     pthread_t *threads=new pthread_t[parallel.get_num_threads()] ;
01425     S_THREAD_PARAM *params=new S_THREAD_PARAM[parallel.get_num_threads()] ;
01426 
01427     for (i=0; i<parallel.get_num_threads(); i++)
01428     {
01429         params[i].p_buf=new float64_t[N];
01430         params[i].q_buf=new float64_t[N];
01431         params[i].a_buf=new float64_t[N*N];
01432         params[i].b_buf=new float64_t[N*M];
01433     } ;
01434 
01435     for (cpu=0; cpu<parallel.get_num_threads(); cpu++)
01436     {
01437         params[cpu].hmm=train;
01438         params[cpu].dim_start=p_observations->get_num_vectors()*cpu / parallel.get_num_threads();
01439         params[cpu].dim_stop= p_observations->get_num_vectors()*(cpu+1) / parallel.get_num_threads();
01440 
01441 #ifdef SUNOS
01442         thr_create(NULL,0, bw_dim_prefetch, (void*)&params[cpu], PTHREAD_SCOPE_SYSTEM, &threads[cpu]) ;
01443 #else // SUNOS
01444         pthread_create(&threads[cpu], NULL, bw_dim_prefetch, (void*)&params[cpu]) ;
01445 #endif
01446     }
01447 
01448     for (cpu=0; cpu<parallel.get_num_threads(); cpu++)
01449       {
01450         void* ret;
01451         pthread_join(threads[cpu], &ret) ;
01452         //dimmodprob = params[dim%parallel.get_num_threads()].ret ;
01453         
01454         for (i=0; i<N; i++)
01455           {
01456         //estimate initial+end state distribution numerator
01457         set_p(i, CMath::logarithmic_sum(get_p(i), params[cpu].p_buf[i]));
01458         set_q(i, CMath::logarithmic_sum(get_q(i), params[cpu].q_buf[i]));
01459         
01460         //estimate numerator for a
01461         for (j=0; j<N; j++)
01462           set_a(i,j, CMath::logarithmic_sum(get_a(i,j), params[cpu].a_buf[N*i+j]));
01463         
01464         //estimate numerator for b
01465         for (j=0; j<M; j++)
01466           set_b(i,j, CMath::logarithmic_sum(get_b(i,j), params[cpu].b_buf[M*i+j]));
01467           }
01468         
01469         fullmodprob+=params[cpu].prob;
01470       }
01471 
01472     for (i=0; i<parallel.get_num_threads(); i++)
01473       {
01474         delete[] params[i].p_buf;
01475         delete[] params[i].q_buf;
01476         delete[] params[i].a_buf;
01477         delete[] params[i].b_buf;
01478       }
01479     
01480     delete[] threads ;
01481     delete[] params ;
01482     
01483     //cache train model probability
01484     train->mod_prob=fullmodprob;
01485     train->mod_prob_updated=true ;
01486 
01487     //new model probability is unknown
01488     normalize();
01489     invalidate_model();
01490 }
01491 
01492 #else // USE_HMMPARALLEL 
01493 
01494 #if !defined(NEGATIVE_MODEL_HACK) && !defined(NEGATIVE_MODEL_HACK_DON)
01495 
01496 //estimates new model lambda out of lambda_estimate using baum welch algorithm
01497 void CHMM::estimate_model_baum_welch(CHMM* estimate)
01498 {
01499     int32_t i,j,t,dim;
01500     float64_t a_sum, b_sum; //numerator
01501     float64_t dimmodprob=0; //model probability for dim
01502     float64_t fullmodprob=0;    //for all dims
01503 
01504     //clear actual model a,b,p,q are used as numerator
01505     for (i=0; i<N; i++)
01506     {
01507         if (estimate->get_p(i)>CMath::ALMOST_NEG_INFTY)
01508             set_p(i,log(PSEUDO));
01509         else
01510             set_p(i,estimate->get_p(i));
01511         if (estimate->get_q(i)>CMath::ALMOST_NEG_INFTY)
01512             set_q(i,log(PSEUDO));
01513         else
01514             set_q(i,estimate->get_q(i));
01515 
01516         for (j=0; j<N; j++)
01517             if (estimate->get_a(i,j)>CMath::ALMOST_NEG_INFTY)
01518                 set_a(i,j, log(PSEUDO));
01519             else
01520                 set_a(i,j,estimate->get_a(i,j));
01521         for (j=0; j<M; j++)
01522             if (estimate->get_b(i,j)>CMath::ALMOST_NEG_INFTY)
01523                 set_b(i,j, log(PSEUDO));
01524             else
01525                 set_b(i,j,estimate->get_b(i,j));
01526     }
01527     invalidate_model();
01528 
01529     //change summation order to make use of alpha/beta caches
01530     for (dim=0; dim<p_observations->get_num_vectors(); dim++)
01531     {
01532         dimmodprob=estimate->model_probability(dim);
01533         fullmodprob+=dimmodprob ;
01534 
01535         for (i=0; i<N; i++)
01536         {
01537             //estimate initial+end state distribution numerator
01538             set_p(i, CMath::logarithmic_sum(get_p(i), estimate->get_p(i)+estimate->get_b(i,p_observations->get_feature(dim,0))+estimate->backward(0,i,dim) - dimmodprob));
01539             set_q(i, CMath::logarithmic_sum(get_q(i), estimate->forward(p_observations->get_vector_length(dim)-1, i, dim)+estimate->get_q(i) - dimmodprob ));
01540 
01541             int32_t num = trans_list_backward_cnt[i] ;
01542 
01543             //estimate a
01544             for (j=0; j<num; j++)
01545             {
01546                 int32_t jj = trans_list_backward[i][j] ;
01547                 a_sum=-CMath::INFTY;
01548 
01549                 for (t=0; t<p_observations->get_vector_length(dim)-1; t++) 
01550                 {
01551                     a_sum= CMath::logarithmic_sum(a_sum, estimate->forward(t,i,dim)+
01552                             estimate->get_a(i,jj)+estimate->get_b(jj,p_observations->get_feature(dim,t+1))+estimate->backward(t+1,jj,dim));
01553                 }
01554                 set_a(i,jj, CMath::logarithmic_sum(get_a(i,jj), a_sum-dimmodprob));
01555             }
01556 
01557             //estimate b
01558             for (j=0; j<M; j++)
01559             {
01560                 b_sum=-CMath::INFTY;
01561 
01562                 for (t=0; t<p_observations->get_vector_length(dim); t++) 
01563                 {
01564                     if (p_observations->get_feature(dim,t)==j)
01565                         b_sum=CMath::logarithmic_sum(b_sum, estimate->forward(t,i,dim)+estimate->backward(t, i, dim));
01566                 }
01567 
01568                 set_b(i,j, CMath::logarithmic_sum(get_b(i,j), b_sum-dimmodprob));
01569             }
01570         } 
01571     }
01572 
01573     //cache estimate model probability
01574     estimate->mod_prob=fullmodprob;
01575     estimate->mod_prob_updated=true ;
01576 
01577     //new model probability is unknown
01578     normalize();
01579     invalidate_model();
01580 }
01581 
01582 //estimates new model lambda out of lambda_estimate using baum welch algorithm
01583 // optimize only p, q, a but not b
01584 void CHMM::estimate_model_baum_welch_trans(CHMM* estimate)
01585 {
01586     int32_t i,j,t,dim;
01587     float64_t a_sum;    //numerator
01588     float64_t dimmodprob=0; //model probability for dim
01589     float64_t fullmodprob=0;    //for all dims
01590 
01591     //clear actual model a,b,p,q are used as numerator
01592     for (i=0; i<N; i++)
01593       {
01594         if (estimate->get_p(i)>CMath::ALMOST_NEG_INFTY)
01595           set_p(i,log(PSEUDO));
01596         else
01597           set_p(i,estimate->get_p(i));
01598         if (estimate->get_q(i)>CMath::ALMOST_NEG_INFTY)
01599           set_q(i,log(PSEUDO));
01600         else
01601           set_q(i,estimate->get_q(i));
01602         
01603         for (j=0; j<N; j++)
01604           if (estimate->get_a(i,j)>CMath::ALMOST_NEG_INFTY)
01605         set_a(i,j, log(PSEUDO));
01606           else
01607         set_a(i,j,estimate->get_a(i,j));
01608         for (j=0; j<M; j++)
01609           set_b(i,j,estimate->get_b(i,j));
01610       }
01611     invalidate_model();
01612     
01613     //change summation order to make use of alpha/beta caches
01614     for (dim=0; dim<p_observations->get_num_vectors(); dim++)
01615       {
01616         dimmodprob=estimate->model_probability(dim);
01617         fullmodprob+=dimmodprob ;
01618         
01619         for (i=0; i<N; i++)
01620           {
01621         //estimate initial+end state distribution numerator
01622         set_p(i, CMath::logarithmic_sum(get_p(i), estimate->get_p(i)+estimate->get_b(i,p_observations->get_feature(dim,0))+estimate->backward(0,i,dim) - dimmodprob));
01623         set_q(i, CMath::logarithmic_sum(get_q(i), estimate->forward(p_observations->get_vector_length(dim)-1, i, dim)+estimate->get_q(i) - dimmodprob ));
01624         
01625         int32_t num = trans_list_backward_cnt[i] ;
01626         //estimate a
01627         for (j=0; j<num; j++)
01628           {
01629             int32_t jj = trans_list_backward[i][j] ;
01630             a_sum=-CMath::INFTY;
01631             
01632             for (t=0; t<p_observations->get_vector_length(dim)-1; t++) 
01633               {
01634             a_sum= CMath::logarithmic_sum(a_sum, estimate->forward(t,i,dim)+
01635                             estimate->get_a(i,jj)+estimate->get_b(jj,p_observations->get_feature(dim,t+1))+estimate->backward(t+1,jj,dim));
01636               }
01637             set_a(i,jj, CMath::logarithmic_sum(get_a(i,jj), a_sum-dimmodprob));
01638           }
01639           } 
01640       }
01641     
01642     //cache estimate model probability
01643     estimate->mod_prob=fullmodprob;
01644     estimate->mod_prob_updated=true ;
01645     
01646     //new model probability is unknown
01647     normalize();
01648     invalidate_model();
01649 }
01650 
01651 
01652 //estimates new model lambda out of lambda_estimate using baum welch algorithm
01653 void CHMM::estimate_model_baum_welch_old(CHMM* estimate)
01654 {
01655     int32_t i,j,t,dim;
01656     float64_t a_sum, b_sum; //numerator
01657     float64_t dimmodprob=0; //model probability for dim
01658     float64_t fullmodprob=0;    //for all dims
01659 
01660     //clear actual model a,b,p,q are used as numerator
01661     for (i=0; i<N; i++)
01662       {
01663         if (estimate->get_p(i)>CMath::ALMOST_NEG_INFTY)
01664           set_p(i,log(PSEUDO));
01665         else
01666           set_p(i,estimate->get_p(i));
01667         if (estimate->get_q(i)>CMath::ALMOST_NEG_INFTY)
01668           set_q(i,log(PSEUDO));
01669         else
01670           set_q(i,estimate->get_q(i));
01671         
01672         for (j=0; j<N; j++)
01673           if (estimate->get_a(i,j)>CMath::ALMOST_NEG_INFTY)
01674         set_a(i,j, log(PSEUDO));
01675           else
01676         set_a(i,j,estimate->get_a(i,j));
01677         for (j=0; j<M; j++)
01678           if (estimate->get_b(i,j)>CMath::ALMOST_NEG_INFTY)
01679         set_b(i,j, log(PSEUDO));
01680           else
01681         set_b(i,j,estimate->get_b(i,j));
01682       }
01683     invalidate_model();
01684     
01685     //change summation order to make use of alpha/beta caches
01686     for (dim=0; dim<p_observations->get_num_vectors(); dim++)
01687       {
01688         dimmodprob=estimate->model_probability(dim);
01689         fullmodprob+=dimmodprob ;
01690         
01691         for (i=0; i<N; i++)
01692           {
01693         //estimate initial+end state distribution numerator
01694         set_p(i, CMath::logarithmic_sum(get_p(i), estimate->get_p(i)+estimate->get_b(i,p_observations->get_feature(dim,0))+estimate->backward(0,i,dim) - dimmodprob));
01695         set_q(i, CMath::logarithmic_sum(get_q(i), estimate->forward(p_observations->get_vector_length(dim)-1, i, dim)+estimate->get_q(i) - dimmodprob ));
01696         
01697         //estimate a
01698         for (j=0; j<N; j++)
01699           {
01700             a_sum=-CMath::INFTY;
01701             
01702             for (t=0; t<p_observations->get_vector_length(dim)-1; t++) 
01703               {
01704             a_sum= CMath::logarithmic_sum(a_sum, estimate->forward(t,i,dim)+
01705                             estimate->get_a(i,j)+estimate->get_b(j,p_observations->get_feature(dim,t+1))+estimate->backward(t+1,j,dim));
01706               }
01707             set_a(i,j, CMath::logarithmic_sum(get_a(i,j), a_sum-dimmodprob));
01708           }
01709         
01710         //estimate b
01711         for (j=0; j<M; j++)
01712           {
01713             b_sum=-CMath::INFTY;
01714             
01715             for (t=0; t<p_observations->get_vector_length(dim); t++) 
01716               {
01717             if (p_observations->get_feature(dim,t)==j)
01718               b_sum=CMath::logarithmic_sum(b_sum, estimate->forward(t,i,dim)+estimate->backward(t, i, dim));
01719               }
01720             
01721             set_b(i,j, CMath::logarithmic_sum(get_b(i,j), b_sum-dimmodprob));
01722           }
01723           } 
01724       }
01725     
01726     //cache estimate model probability
01727     estimate->mod_prob=fullmodprob;
01728     estimate->mod_prob_updated=true ;
01729     
01730     //new model probability is unknown
01731     normalize();
01732     invalidate_model();
01733 }
01734 
01735 
01736 #elif defined(NEGATIVE_MODEL_HACK)
01737 //estimates new model lambda out of lambda_estimate using baum welch algorithm
01738 void CHMM::estimate_model_baum_welch(CHMM* estimate)
01739 {
01740     int32_t i,j,t,dim;
01741     float64_t a_sum, b_sum; //numerator
01742     float64_t dimmodprob=0; //model probability for dim
01743     float64_t fullmodprob=0;    //for all dims
01744 
01745     const float64_t MIN_RAND=23e-3;
01746 
01747     SG_DEBUG( "M:%d\n",M);
01748 
01749     if (estimate->get_p(0)>-0.00001)
01750     {
01751         for (i=0; i<N; i++)
01752         {
01753             if (i==25)
01754                 estimate->set_p(i,-CMath::INFTY);
01755             else
01756                 estimate->set_p(i, log(CMath::random(MIN_RAND, 1.0)));
01757 
01758             if (i<49)
01759                 estimate->set_q(i, -CMath::INFTY);
01760             else 
01761                 estimate->set_q(i, log(CMath::random(MIN_RAND, 1.0)));
01762 
01763             if (i<25)
01764             {
01765                 for (j=0; j<M; j++)
01766                     estimate->set_b(i,j, log(CMath::random(MIN_RAND, 1.0)));
01767             }
01768         }
01769     }
01770 
01771     for (i=0; i<N; i++)
01772     {
01773         if (i==25)
01774             estimate->set_p(i,-CMath::INFTY);
01775 
01776         if (i<49)
01777             estimate->set_q(i, -CMath::INFTY);
01778 
01779     }
01780     estimate->invalidate_model();
01781     estimate->normalize();
01782 
01783     //clear actual model a,b,p,q are used as numerator
01784     for (i=0; i<N; i++)
01785     {
01786         //if (i!=25)
01787         set_p(i,log(PSEUDO));
01788         //else
01789         //  set_p(i,estimate->get_p(i));
01790 
01791         set_q(i,log(PSEUDO));
01792 
01793         for (j=0; j<N; j++)
01794             set_a(i,j, estimate->get_a(i,j));   //a is const
01795 
01796         if (i<25)
01797         {
01798             for (j=0; j<M; j++)
01799                 set_b(i,j, log(PSEUDO));    
01800         }
01801         else
01802         {
01803             for (j=0; j<M; j++)
01804                 set_b(i,j, estimate->get_b(i,j));   //b is const for state
01805         }
01806     }
01807 
01808     //change summation order to make use of alpha/beta caches
01809     for (dim=0; dim<p_observations->get_num_vectors(); dim++)
01810     {
01811         dimmodprob=estimate->model_probability(dim);
01812         fullmodprob+=dimmodprob ;
01813 
01814         for (i=0; i<N; i++)
01815         {
01816             //estimate initial+end state distribution numerator
01817             set_p(i, CMath::logarithmic_sum(get_p(i), estimate->get_p(i)+estimate->get_b(i,p_observations->get_feature(dim,0))+estimate->backward(0,i,dim) - dimmodprob));
01818             set_q(i, CMath::logarithmic_sum(get_q(i), estimate->forward(p_observations->get_vector_length(dim)-1, i, dim)+estimate->get_q(i) - dimmodprob ));
01819         }
01820 
01821         for (i=0; i<25; i++)
01822         {
01823             //estimate b
01824             for (j=0; j<M; j++)
01825             {
01826                 b_sum=CMath::NEG_INFTY;
01827 
01828                 for (t=0; t<p_observations->get_vector_length(dim); t++) 
01829                 {
01830                     if (p_observations->get_feature(dim,t)==j) 
01831                         b_sum=CMath::logarithmic_sum(b_sum, estimate->forward(t,i,dim)+estimate->backward(t, i, dim));
01832                 }
01833 
01834                 set_b(i,j, CMath::logarithmic_sum(get_b(i,j), b_sum-dimmodprob));
01835             }
01836         } 
01837     }
01838 
01839     //cache estimate model probability
01840     estimate->mod_prob=fullmodprob;
01841     estimate->mod_prob_updated=true ;
01842 
01843     //new model probability is unknown
01844     normalize();
01845     invalidate_model();
01846 }
01847 #else
01848 //estimates new model lambda out of lambda_estimate using baum welch algorithm
01849 void CHMM::estimate_model_baum_welch(CHMM* estimate)
01850 {
01851     int32_t i,j,t,dim;
01852     float64_t a_sum, b_sum; //numerator
01853     float64_t dimmodprob=0; //model probability for dim
01854     float64_t fullmodprob=0;    //for all dims
01855 
01856     const float64_t MIN_RAND=23e-3;
01857     static bool bla=true;
01858 
01859     if ((bla) && estimate->get_q(N-1)>-0.00001)
01860     {
01861         bla=false;
01862         for (i=0; i<N; i++)
01863         {
01864             if (i<=N-50)
01865                 estimate->set_p(i, log(CMath::random(MIN_RAND, 1.0)));
01866             else
01867                 estimate->set_p(i, -1000);
01868 
01869             if ( i==N-25-1)
01870                 estimate->set_q(i,-10000);
01871             else
01872                 estimate->set_q(i, log(CMath::random(MIN_RAND, 1.0)));
01873             SG_DEBUG( "q[%d]=%lf\n", i, estimate->get_q(i));
01874 
01875             if (i>=N-25)
01876             {
01877                 for (j=0; j<M; j++)
01878                     estimate->set_b(i,j, log(CMath::random(MIN_RAND, 1.0)));
01879             }
01880         }
01881         estimate->invalidate_model();
01882         estimate->normalize(true);
01883     }
01884 
01885     //clear actual model a,b,p,q are used as numerator
01886     for (i=0; i<N; i++)
01887     {
01888         set_p(i,log(PSEUDO));
01889         set_q(i,log(PSEUDO));
01890 
01891         for (j=0; j<N; j++)
01892             set_a(i,j, estimate->get_a(i,j));   //a is const
01893 
01894         if (i>=N-25) //estimate last 25 emissions
01895         {
01896             for (j=0; j<M; j++)
01897                 set_b(i,j, log(PSEUDO));    
01898         }
01899         else
01900         {
01901             for (j=0; j<M; j++)
01902                 set_b(i,j, estimate->get_b(i,j));   //b is const for state
01903 
01904             if (i==N-25-1-24 || i==N-25-1-23)
01905             {
01906                 for (j=0; j<M; j++)
01907                 {
01908                     if (estimate->get_b(i,j)<-10)
01909                         set_b(i,j, -CMath::INFTY);
01910                 }
01911             }
01912         }
01913     }
01914 
01915     //change summation order to make use of alpha/beta caches
01916     for (dim=0; dim<p_observations->get_num_vectors(); dim++)
01917     {
01918         dimmodprob=estimate->model_probability(dim);
01919         fullmodprob+=dimmodprob ;
01920 
01921         for (i=0; i<N; i++)
01922         {
01923             //estimate initial+end state distribution numerator
01924             if (i<=N-50)
01925                 set_p(i, CMath::logarithmic_sum(get_p(i), estimate->get_p(i)+estimate->get_b(i,p_observations->get_feature(dim,0))+estimate->backward(0,i,dim) - dimmodprob));
01926             else
01927                 set_p(i, -1000);
01928 
01929             if (i==N-25-1)
01930                 set_q(i,-10000);
01931             else
01932                 set_q(i, CMath::logarithmic_sum(get_q(i), estimate->forward(p_observations->get_vector_length(dim)-1, i, dim)+estimate->get_q(i) - dimmodprob ));
01933         }
01934 
01935         for (i=N-25; i<N; i++)
01936         {
01937             //estimate b
01938             for (j=0; j<M; j++)
01939             {
01940                 b_sum=-CMath::INFTY;
01941 
01942                 for (t=0; t<p_observations->get_vector_length(dim); t++) 
01943                 {
01944                     if (p_observations->get_feature(dim,t)==j) 
01945                         b_sum=CMath::logarithmic_sum(b_sum, estimate->forward(t,i,dim)+estimate->backward(t, i, dim));
01946                 }
01947 
01948                 set_b(i,j, CMath::logarithmic_sum(get_b(i,j), b_sum-dimmodprob));
01949             }
01950         } 
01951     }
01952 
01953     //cache estimate model probability
01954     estimate->mod_prob=fullmodprob;
01955     estimate->mod_prob_updated=true ;
01956 
01957     //new model probability is unknown
01958     normalize(true);
01959     invalidate_model();
01960 }
01961 #endif //NEGATIVE_MODEL_HACK || .._DON
01962 #endif // USE_HMMPARALLEL
01963 
01964 
01965 //estimates new model lambda out of lambda_estimate using baum welch algorithm
01966 void CHMM::estimate_model_baum_welch_defined(CHMM* estimate)
01967 {
01968     int32_t i,j,old_i,k,t,dim;
01969     float64_t a_sum_num, b_sum_num;     //numerator
01970     float64_t a_sum_denom, b_sum_denom; //denominator
01971     float64_t dimmodprob=-CMath::INFTY; //model probability for dim
01972     float64_t fullmodprob=0;            //for all dims
01973     float64_t* A=ARRAYN1(0);
01974     float64_t* B=ARRAYN2(0);
01975 
01976     //clear actual model a,b,p,q are used as numerator
01977     //A,B as denominator for a,b
01978     for (k=0; (i=model->get_learn_p(k))!=-1; k++)   
01979         set_p(i,log(PSEUDO));
01980 
01981     for (k=0; (i=model->get_learn_q(k))!=-1; k++)   
01982         set_q(i,log(PSEUDO));
01983 
01984     for (k=0; (i=model->get_learn_a(k,0))!=-1; k++)
01985     {
01986         j=model->get_learn_a(k,1);
01987         set_a(i,j, log(PSEUDO));
01988     }
01989 
01990     for (k=0; (i=model->get_learn_b(k,0))!=-1; k++)
01991     {
01992         j=model->get_learn_b(k,1);
01993         set_b(i,j, log(PSEUDO));
01994     }
01995 
01996     for (i=0; i<N; i++)
01997     {
01998         A[i]=log(PSEUDO);
01999         B[i]=log(PSEUDO);
02000     }
02001 
02002 #ifdef USE_HMMPARALLEL
02003     pthread_t *threads=new pthread_t[parallel.get_num_threads()] ;
02004     S_THREAD_PARAM *params=new S_THREAD_PARAM[parallel.get_num_threads()] ;
02005 #endif
02006 
02007     //change summation order to make use of alpha/beta caches
02008     for (dim=0; dim<p_observations->get_num_vectors(); dim++)
02009     {
02010 #ifdef USE_HMMPARALLEL
02011         if (dim%parallel.get_num_threads()==0)
02012         {
02013             int32_t i ;
02014             for (i=0; i<parallel.get_num_threads(); i++)
02015                 if (dim+i<p_observations->get_num_vectors())
02016                 {
02017                     params[i].hmm=estimate ;
02018                     params[i].dim=dim+i ;
02019 #ifdef SUNOS
02020                     thr_create(NULL,0, bw_dim_prefetch, (void*)&params[i], PTHREAD_SCOPE_SYSTEM, &threads[i]) ;
02021 #else // SUNOS
02022                     pthread_create(&threads[i], NULL, bw_dim_prefetch, (void*)&params[i]) ;
02023 #endif
02024                 } ;
02025             for (i=0; i<parallel.get_num_threads(); i++)
02026                 if (dim+i<p_observations->get_num_vectors())
02027                 {
02028                     void * ret ;
02029                     pthread_join(threads[i], &ret) ;
02030                     dimmodprob = params[i].ret ;
02031                 } ;
02032         }
02033 #else
02034         dimmodprob=estimate->model_probability(dim);
02035 #endif // USE_HMMPARALLEL
02036 
02037         //and denominator
02038         fullmodprob+= dimmodprob;
02039 
02040         //estimate initial+end state distribution numerator
02041         for (k=0; (i=model->get_learn_p(k))!=-1; k++)   
02042             set_p(i, CMath::logarithmic_sum(get_p(i), estimate->forward(0,i,dim)+estimate->backward(0,i,dim) - dimmodprob ) );
02043 
02044         for (k=0; (i=model->get_learn_q(k))!=-1; k++)   
02045             set_q(i, CMath::logarithmic_sum(get_q(i), estimate->forward(p_observations->get_vector_length(dim)-1, i, dim)+
02046                         estimate->backward(p_observations->get_vector_length(dim)-1, i, dim)  - dimmodprob ) );
02047 
02048         //estimate a
02049         old_i=-1;
02050         for (k=0; (i=model->get_learn_a(k,0))!=-1; k++)
02051         {
02052             //denominator is constant for j
02053             //therefore calculate it first
02054             if (old_i!=i)
02055             {
02056                 old_i=i;
02057                 a_sum_denom=-CMath::INFTY;
02058 
02059                 for (t=0; t<p_observations->get_vector_length(dim)-1; t++) 
02060                     a_sum_denom= CMath::logarithmic_sum(a_sum_denom, estimate->forward(t,i,dim)+estimate->backward(t,i,dim));
02061 
02062                 A[i]= CMath::logarithmic_sum(A[i], a_sum_denom-dimmodprob);
02063             }
02064 
02065             j=model->get_learn_a(k,1);
02066             a_sum_num=-CMath::INFTY;
02067             for (t=0; t<p_observations->get_vector_length(dim)-1; t++) 
02068             {
02069                 a_sum_num= CMath::logarithmic_sum(a_sum_num, estimate->forward(t,i,dim)+
02070                         estimate->get_a(i,j)+estimate->get_b(j,p_observations->get_feature(dim,t+1))+estimate->backward(t+1,j,dim));
02071             }
02072 
02073             set_a(i,j, CMath::logarithmic_sum(get_a(i,j), a_sum_num-dimmodprob));
02074         }
02075 
02076         //estimate  b
02077         old_i=-1;
02078         for (k=0; (i=model->get_learn_b(k,0))!=-1; k++)
02079         {
02080 
02081             //denominator is constant for j
02082             //therefore calculate it first
02083             if (old_i!=i)
02084             {
02085                 old_i=i;
02086                 b_sum_denom=-CMath::INFTY;
02087 
02088                 for (t=0; t<p_observations->get_vector_length(dim); t++) 
02089                     b_sum_denom= CMath::logarithmic_sum(b_sum_denom, estimate->forward(t,i,dim)+estimate->backward(t,i,dim));
02090 
02091                 B[i]= CMath::logarithmic_sum(B[i], b_sum_denom-dimmodprob);
02092             }
02093 
02094             j=model->get_learn_b(k,1);
02095             b_sum_num=-CMath::INFTY;
02096             for (t=0; t<p_observations->get_vector_length(dim); t++) 
02097             {
02098                 if (p_observations->get_feature(dim,t)==j) 
02099                     b_sum_num=CMath::logarithmic_sum(b_sum_num, estimate->forward(t,i,dim)+estimate->backward(t, i, dim));
02100             }
02101 
02102             set_b(i,j, CMath::logarithmic_sum(get_b(i,j), b_sum_num-dimmodprob));
02103         }
02104     }
02105 #ifdef USE_HMMPARALLEL
02106     delete[] threads ;
02107     delete[] params ;
02108 #endif
02109 
02110 
02111     //calculate estimates
02112     for (k=0; (i=model->get_learn_p(k))!=-1; k++)   
02113         set_p(i, get_p(i)-log(p_observations->get_num_vectors()+N*PSEUDO) );
02114 
02115     for (k=0; (i=model->get_learn_q(k))!=-1; k++)   
02116         set_q(i, get_q(i)-log(p_observations->get_num_vectors()+N*PSEUDO) );
02117 
02118     for (k=0; (i=model->get_learn_a(k,0))!=-1; k++)
02119     {
02120         j=model->get_learn_a(k,1);
02121         set_a(i,j, get_a(i,j) - A[i]);
02122     }
02123 
02124     for (k=0; (i=model->get_learn_b(k,0))!=-1; k++)
02125     {
02126         j=model->get_learn_b(k,1);
02127         set_b(i,j, get_b(i,j) - B[i]);
02128     }
02129 
02130     //cache estimate model probability
02131     estimate->mod_prob=fullmodprob;
02132     estimate->mod_prob_updated=true ;
02133 
02134     //new model probability is unknown
02135     normalize();
02136     invalidate_model();
02137 }
02138 
02139 #ifndef NOVIT
02140 //estimates new model lambda out of lambda_estimate using viterbi algorithm
02141 void CHMM::estimate_model_viterbi(CHMM* estimate)
02142 {
02143     int32_t i,j,t;
02144     float64_t sum;
02145     float64_t* P=ARRAYN1(0);
02146     float64_t* Q=ARRAYN2(0);
02147 
02148     path_deriv_updated=false ;
02149 
02150     //initialize with pseudocounts
02151     for (i=0; i<N; i++)
02152     {
02153         for (j=0; j<N; j++)
02154             set_A(i,j, PSEUDO);
02155 
02156         for (j=0; j<M; j++)
02157             set_B(i,j, PSEUDO);
02158 
02159         P[i]=PSEUDO;
02160         Q[i]=PSEUDO;
02161     }
02162 
02163     float64_t allpatprob=0 ;
02164 
02165 #ifdef USE_HMMPARALLEL
02166     pthread_t *threads=new pthread_t[parallel.get_num_threads()] ;
02167     S_THREAD_PARAM *params=new S_THREAD_PARAM[parallel.get_num_threads()] ;
02168 #endif
02169 
02170     for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
02171     {
02172 
02173 #ifdef USE_HMMPARALLEL
02174         if (dim%parallel.get_num_threads()==0)
02175         {
02176             int32_t i ;
02177             for (i=0; i<parallel.get_num_threads(); i++)
02178                 if (dim+i<p_observations->get_num_vectors())
02179                 {
02180                     params[i].hmm=estimate ;
02181                     params[i].dim=dim+i ;
02182 #ifdef SUNOS
02183                     thr_create(NULL,0, vit_dim_prefetch, (void*)&params[i], PTHREAD_SCOPE_SYSTEM, &threads[i]) ;
02184 #else // SUNOS
02185                     pthread_create(&threads[i], NULL, vit_dim_prefetch, (void*)&params[i]) ;
02186 #endif
02187                 } ;
02188             for (i=0; i<parallel.get_num_threads(); i++)
02189                 if (dim+i<p_observations->get_num_vectors())
02190                 {
02191                     void * ret ;
02192                     pthread_join(threads[i], &ret) ;
02193                     allpatprob += params[i].ret ;
02194                 } ;
02195         } ;
02196 #else
02197         //using viterbi to find best path
02198         allpatprob += estimate->best_path(dim);
02199 #endif // USE_HMMPARALLEL
02200 
02201         //counting occurences for A and B
02202         for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
02203         {
02204             set_A(estimate->PATH(dim)[t], estimate->PATH(dim)[t+1], get_A(estimate->PATH(dim)[t], estimate->PATH(dim)[t+1])+1);
02205             set_B(estimate->PATH(dim)[t], p_observations->get_feature(dim,t),  get_B(estimate->PATH(dim)[t], p_observations->get_feature(dim,t))+1);
02206         }
02207 
02208         set_B(estimate->PATH(dim)[p_observations->get_vector_length(dim)-1], p_observations->get_feature(dim,p_observations->get_vector_length(dim)-1),  get_B(estimate->PATH(dim)[p_observations->get_vector_length(dim)-1], p_observations->get_feature(dim,p_observations->get_vector_length(dim)-1)) + 1 );
02209 
02210         P[estimate->PATH(dim)[0]]++;
02211         Q[estimate->PATH(dim)[p_observations->get_vector_length(dim)-1]]++;
02212     }
02213 
02214 #ifdef USE_HMMPARALLEL
02215     delete[] threads ;
02216     delete[] params ;
02217 #endif 
02218 
02219     allpatprob/=p_observations->get_num_vectors() ;
02220     estimate->all_pat_prob=allpatprob ;
02221     estimate->all_path_prob_updated=true ;
02222 
02223     //converting A to probability measure a
02224     for (i=0; i<N; i++)
02225     {
02226         sum=0;
02227         for (j=0; j<N; j++)
02228             sum+=get_A(i,j);
02229 
02230         for (j=0; j<N; j++)
02231             set_a(i,j, log(get_A(i,j)/sum));
02232     }
02233 
02234     //converting B to probability measures b
02235     for (i=0; i<N; i++)
02236     {
02237         sum=0;
02238         for (j=0; j<M; j++)
02239             sum+=get_B(i,j);
02240 
02241         for (j=0; j<M; j++)
02242             set_b(i,j, log(get_B(i, j)/sum));
02243     }
02244 
02245     //converting P to probability measure p
02246     sum=0;
02247     for (i=0; i<N; i++)
02248         sum+=P[i];
02249 
02250     for (i=0; i<N; i++)
02251         set_p(i, log(P[i]/sum));
02252 
02253     //converting Q to probability measure q
02254     sum=0;
02255     for (i=0; i<N; i++)
02256         sum+=Q[i];
02257 
02258     for (i=0; i<N; i++)
02259         set_q(i, log(Q[i]/sum));
02260 
02261     //new model probability is unknown
02262     invalidate_model();
02263 }
02264 
02265 // estimate parameters listed in learn_x
02266 void CHMM::estimate_model_viterbi_defined(CHMM* estimate)
02267 {
02268     int32_t i,j,k,t;
02269     float64_t sum;
02270     float64_t* P=ARRAYN1(0);
02271     float64_t* Q=ARRAYN2(0);
02272 
02273     path_deriv_updated=false ;
02274 
02275     //initialize with pseudocounts
02276     for (i=0; i<N; i++)
02277     {
02278         for (j=0; j<N; j++)
02279             set_A(i,j, PSEUDO);
02280 
02281         for (j=0; j<M; j++)
02282             set_B(i,j, PSEUDO);
02283 
02284         P[i]=PSEUDO;
02285         Q[i]=PSEUDO;
02286     }
02287 
02288 #ifdef USE_HMMPARALLEL
02289     pthread_t *threads=new pthread_t[parallel.get_num_threads()] ;
02290     S_THREAD_PARAM *params=new S_THREAD_PARAM[parallel.get_num_threads()] ;
02291 #endif
02292 
02293     float64_t allpatprob=0.0 ;
02294     for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
02295     {
02296 
02297 #ifdef USE_HMMPARALLEL
02298         if (dim%parallel.get_num_threads()==0)
02299         {
02300             int32_t i ;
02301             for (i=0; i<parallel.get_num_threads(); i++)
02302                 if (dim+i<p_observations->get_num_vectors())
02303                 {
02304                     params[i].hmm=estimate ;
02305                     params[i].dim=dim+i ;
02306 #ifdef SUNOS
02307                     thr_create(NULL,0,vit_dim_prefetch, (void*)&params[i], PTHREAD_SCOPE_SYSTEM, &threads[i]) ;
02308 #else // SUNOS
02309                     pthread_create(&threads[i], NULL, vit_dim_prefetch, (void*)&params[i]) ;
02310 #endif //SUNOS
02311                 } ;
02312             for (i=0; i<parallel.get_num_threads(); i++)
02313                 if (dim+i<p_observations->get_num_vectors())
02314                 {
02315                     void * ret ;
02316                     pthread_join(threads[i], &ret) ;
02317                     allpatprob += params[i].ret ;
02318                 } ;
02319         } ;
02320 #else // USE_HMMPARALLEL
02321         //using viterbi to find best path
02322         allpatprob += estimate->best_path(dim);
02323 #endif // USE_HMMPARALLEL
02324 
02325 
02326         //counting occurences for A and B
02327         for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
02328         {
02329             set_A(estimate->PATH(dim)[t], estimate->PATH(dim)[t+1], get_A(estimate->PATH(dim)[t], estimate->PATH(dim)[t+1])+1);
02330             set_B(estimate->PATH(dim)[t], p_observations->get_feature(dim,t),  get_B(estimate->PATH(dim)[t], p_observations->get_feature(dim,t))+1);
02331         }
02332 
02333         set_B(estimate->PATH(dim)[p_observations->get_vector_length(dim)-1], p_observations->get_feature(dim,p_observations->get_vector_length(dim)-1),  get_B(estimate->PATH(dim)[p_observations->get_vector_length(dim)-1], p_observations->get_feature(dim,p_observations->get_vector_length(dim)-1)) + 1 );
02334 
02335         P[estimate->PATH(dim)[0]]++;
02336         Q[estimate->PATH(dim)[p_observations->get_vector_length(dim)-1]]++;
02337     }
02338 
02339 #ifdef USE_HMMPARALLEL
02340     delete[] threads ;
02341     delete[] params ;
02342 #endif
02343 
02344     //estimate->invalidate_model() ;
02345     //float64_t q=estimate->best_path(-1) ;
02346 
02347     allpatprob/=p_observations->get_num_vectors() ;
02348     estimate->all_pat_prob=allpatprob ;
02349     estimate->all_path_prob_updated=true ;
02350 
02351 
02352     //copy old model
02353     for (i=0; i<N; i++)
02354     {
02355         for (j=0; j<N; j++)
02356             set_a(i,j, estimate->get_a(i,j));
02357 
02358         for (j=0; j<M; j++)
02359             set_b(i,j, estimate->get_b(i,j));
02360     }
02361 
02362     //converting A to probability measure a
02363     i=0;
02364     sum=0;
02365     j=model->get_learn_a(i,0);
02366     k=i;
02367     while (model->get_learn_a(i,0)!=-1 || k<i)
02368     {
02369         if (j==model->get_learn_a(i,0))
02370         {
02371             sum+=get_A(model->get_learn_a(i,0), model->get_learn_a(i,1));
02372             i++;
02373         }
02374         else
02375         {
02376             while (k<i)
02377             {
02378                 set_a(model->get_learn_a(k,0), model->get_learn_a(k,1), log (get_A(model->get_learn_a(k,0), model->get_learn_a(k,1)) / sum));
02379                 k++;
02380             }
02381 
02382             sum=0;
02383             j=model->get_learn_a(i,0);
02384             k=i;
02385         }
02386     }
02387 
02388     //converting B to probability measures b
02389     i=0;
02390     sum=0;
02391     j=model->get_learn_b(i,0);
02392     k=i;
02393     while (model->get_learn_b(i,0)!=-1 || k<i)
02394     {
02395         if (j==model->get_learn_b(i,0))
02396         {
02397             sum+=get_B(model->get_learn_b(i,0),model->get_learn_b(i,1));
02398             i++;
02399         }
02400         else
02401         {
02402             while (k<i)
02403             {
02404                 set_b(model->get_learn_b(k,0),model->get_learn_b(k,1), log (get_B(model->get_learn_b(k,0), model->get_learn_b(k,1)) / sum));
02405                 k++;
02406             }
02407 
02408             sum=0;
02409             j=model->get_learn_b(i,0);
02410             k=i;
02411         }
02412     }
02413 
02414     i=0;
02415     sum=0;
02416     while (model->get_learn_p(i)!=-1)
02417     {
02418         sum+=P[model->get_learn_p(i)] ;
02419         i++ ;
02420     } ;
02421     i=0 ;
02422     while (model->get_learn_p(i)!=-1)
02423     {
02424         set_p(model->get_learn_p(i), log(P[model->get_learn_p(i)]/sum));
02425         i++ ;
02426     } ;
02427 
02428     i=0;
02429     sum=0;
02430     while (model->get_learn_q(i)!=-1)
02431     {
02432         sum+=Q[model->get_learn_q(i)] ;
02433         i++ ;
02434     } ;
02435     i=0 ;
02436     while (model->get_learn_q(i)!=-1)
02437     {
02438         set_q(model->get_learn_q(i), log(Q[model->get_learn_q(i)]/sum));
02439         i++ ;
02440     } ;
02441 
02442 
02443     //new model probability is unknown
02444     invalidate_model();
02445 }
02446 #endif // NOVIT
02447 
02448 //------------------------------------------------------------------------------------//
02449 
02450 //to give an idea what the model looks like
02451 void CHMM::output_model(bool verbose)
02452 {
02453     int32_t i,j;
02454     float64_t checksum;
02455 
02456     //generic info
02457     SG_INFO( "log(Pr[O|model])=%e, #states: %i, #observationssymbols: %i, #observations: %ix%i\n", 
02458             (float64_t)((p_observations) ? model_probability() : -CMath::INFTY), 
02459             N, M, ((p_observations) ? p_observations->get_max_vector_length() : 0), ((p_observations) ? p_observations->get_num_vectors() : 0));
02460 
02461     if (verbose)
02462     {
02463         // tranisition matrix a
02464         SG_INFO( "\ntransition matrix\n");
02465         for (i=0; i<N; i++)
02466         {
02467             checksum= get_q(i);
02468             for (j=0; j<N; j++)
02469             {
02470                 checksum= CMath::logarithmic_sum(checksum, get_a(i,j));
02471 
02472                 SG_INFO( "a(%02i,%02i)=%1.4f ",i,j, (float32_t) exp(get_a(i,j)));
02473 
02474                 if (j % 4 == 3)
02475                     SG_PRINT( "\n");
02476             }
02477             if (fabs(checksum)>1e-5)
02478                 SG_DEBUG( " checksum % E ******* \n",checksum);
02479             else
02480                 SG_DEBUG( " checksum % E\n",checksum);
02481         }
02482 
02483         // distribution of start states p
02484         SG_INFO( "\ndistribution of start states\n");
02485         checksum=-CMath::INFTY;
02486         for (i=0; i<N; i++)
02487         {
02488             checksum= CMath::logarithmic_sum(checksum, get_p(i));
02489             SG_INFO( "p(%02i)=%1.4f ",i, (float32_t) exp(get_p(i)));
02490             if (i % 4 == 3)
02491                 SG_PRINT( "\n");
02492         }
02493         if (fabs(checksum)>1e-5)
02494             SG_DEBUG( " checksum % E ******* \n",checksum);
02495         else
02496             SG_DEBUG( " checksum=% E\n", checksum);
02497 
02498         // distribution of terminal states p
02499         SG_INFO( "\ndistribution of terminal states\n");
02500         checksum=-CMath::INFTY;
02501         for (i=0; i<N; i++)
02502         {
02503             checksum= CMath::logarithmic_sum(checksum, get_q(i));
02504             SG_INFO("q(%02i)=%1.4f ",i, (float32_t) exp(get_q(i)));
02505             if (i % 4 == 3)
02506                 SG_INFO("\n");
02507         }
02508         if (fabs(checksum)>1e-5)
02509             SG_DEBUG( " checksum % E ******* \n",checksum);
02510         else
02511             SG_DEBUG( " checksum=% E\n", checksum);
02512 
02513         // distribution of observations given the state b
02514         SG_INFO("\ndistribution of observations given the state\n");
02515         for (i=0; i<N; i++)
02516         {
02517             checksum=-CMath::INFTY;
02518             for (j=0; j<M; j++)
02519             {
02520                 checksum=CMath::logarithmic_sum(checksum, get_b(i,j));
02521                 SG_INFO("b(%02i,%02i)=%1.4f ",i,j, (float32_t) exp(get_b(i,j)));
02522                 if (j % 4 == 3)
02523                     SG_PRINT("\n");
02524             }
02525             if (fabs(checksum)>1e-5)
02526                 SG_DEBUG( " checksum % E ******* \n",checksum);
02527             else
02528                 SG_DEBUG( " checksum % E\n",checksum);
02529         }
02530     }
02531     SG_PRINT("\n");
02532 }
02533 
02534 //to give an idea what the model looks like
02535 void CHMM::output_model_defined(bool verbose)
02536 {
02537     int32_t i,j;
02538     if (!model)
02539         return ;
02540 
02541     //generic info
02542     SG_INFO("log(Pr[O|model])=%e, #states: %i, #observationssymbols: %i, #observations: %ix%i\n", 
02543             (float64_t)((p_observations) ? model_probability() : -CMath::INFTY), 
02544             N, M, ((p_observations) ? p_observations->get_max_vector_length() : 0), ((p_observations) ? p_observations->get_num_vectors() : 0));
02545 
02546     if (verbose)
02547     {
02548         // tranisition matrix a
02549         SG_INFO("\ntransition matrix\n");
02550 
02551         //initialize a values that have to be learned
02552         i=0;
02553         j=model->get_learn_a(i,0);
02554         while (model->get_learn_a(i,0)!=-1)
02555         {
02556             if (j!=model->get_learn_a(i,0))
02557             {
02558                 j=model->get_learn_a(i,0);
02559                 SG_PRINT("\n");
02560             }
02561 
02562             SG_INFO("a(%02i,%02i)=%1.4f ",model->get_learn_a(i,0), model->get_learn_a(i,1), (float32_t) exp(get_a(model->get_learn_a(i,0), model->get_learn_a(i,1))));
02563             i++;
02564         }
02565 
02566         // distribution of observations given the state b
02567         SG_INFO("\n\ndistribution of observations given the state\n");
02568         i=0;
02569         j=model->get_learn_b(i,0);
02570         while (model->get_learn_b(i,0)!=-1)
02571         {
02572             if (j!=model->get_learn_b(i,0))
02573             {
02574                 j=model->get_learn_b(i,0);
02575                 SG_PRINT("\n");
02576             }
02577 
02578             SG_INFO("b(%02i,%02i)=%1.4f ",model->get_learn_b(i,0),model->get_learn_b(i,1), (float32_t) exp(get_b(model->get_learn_b(i,0),model->get_learn_b(i,1))));
02579             i++;
02580         }
02581 
02582         SG_PRINT("\n");
02583     }
02584     SG_PRINT("\n");
02585 }
02586 
02587 //------------------------------------------------------------------------------------//
02588 
02589 //convert model to log probabilities
02590 void CHMM::convert_to_log()
02591 {
02592     int32_t i,j;
02593 
02594     for (i=0; i<N; i++)
02595     {
02596         if (get_p(i)!=0)
02597             set_p(i, log(get_p(i)));
02598         else
02599             set_p(i, -CMath::INFTY);;
02600     }
02601 
02602     for (i=0; i<N; i++)
02603     {
02604         if (get_q(i)!=0)
02605             set_q(i, log(get_q(i)));
02606         else
02607             set_q(i, -CMath::INFTY);;
02608     }
02609 
02610     for (i=0; i<N; i++)
02611     {
02612         for (j=0; j<N; j++)
02613         {
02614             if (get_a(i,j)!=0)
02615                 set_a(i,j, log(get_a(i,j)));
02616             else
02617                 set_a(i,j, -CMath::INFTY);
02618         }
02619     }
02620 
02621     for (i=0; i<N; i++)
02622     {
02623         for (j=0; j<M; j++)
02624         {
02625             if (get_b(i,j)!=0)
02626                 set_b(i,j, log(get_b(i,j)));
02627             else
02628                 set_b(i,j, -CMath::INFTY);
02629         }
02630     }
02631     loglikelihood=true;
02632 
02633     invalidate_model();
02634 }
02635 
02636 //init model with random values
02637 void CHMM::init_model_random()
02638 {
02639     const float64_t MIN_RAND=23e-3;
02640 
02641     float64_t sum;
02642     int32_t i,j;
02643 
02644     //initialize a with random values
02645     for (i=0; i<N; i++)
02646     {
02647         sum=0;
02648         for (j=0; j<N; j++)
02649         {
02650             set_a(i,j, CMath::random(MIN_RAND, 1.0));
02651 
02652             sum+=get_a(i,j);
02653         }
02654 
02655         for (j=0; j<N; j++)
02656             set_a(i,j, get_a(i,j)/sum);
02657     }
02658 
02659     //initialize pi with random values
02660     sum=0;
02661     for (i=0; i<N; i++)
02662     {
02663         set_p(i, CMath::random(MIN_RAND, 1.0));
02664 
02665         sum+=get_p(i);
02666     }
02667 
02668     for (i=0; i<N; i++)
02669         set_p(i, get_p(i)/sum);
02670 
02671     //initialize q with random values
02672     sum=0;
02673     for (i=0; i<N; i++)
02674     {
02675         set_q(i, CMath::random(MIN_RAND, 1.0));
02676 
02677         sum+=get_q(i);
02678     }
02679 
02680     for (i=0; i<N; i++)
02681         set_q(i, get_q(i)/sum);
02682 
02683     //initialize b with random values
02684     for (i=0; i<N; i++)
02685     {
02686         sum=0;
02687         for (j=0; j<M; j++)
02688         {
02689             set_b(i,j, CMath::random(MIN_RAND, 1.0));
02690 
02691             sum+=get_b(i,j);
02692         }
02693 
02694         for (j=0; j<M; j++)
02695             set_b(i,j, get_b(i,j)/sum);
02696     }
02697 
02698     //initialize pat/mod_prob as not calculated
02699     invalidate_model();
02700 }
02701 
02702 //init model according to const_x
02703 void CHMM::init_model_defined()
02704 {
02705     int32_t i,j,k,r;
02706     float64_t sum;
02707     const float64_t MIN_RAND=23e-3;
02708 
02709     //initialize a with zeros
02710     for (i=0; i<N; i++)
02711         for (j=0; j<N; j++)
02712             set_a(i,j, 0);
02713 
02714     //initialize p with zeros
02715     for (i=0; i<N; i++)
02716         set_p(i, 0);
02717 
02718     //initialize q with zeros
02719     for (i=0; i<N; i++)
02720         set_q(i, 0);
02721 
02722     //initialize b with zeros
02723     for (i=0; i<N; i++)
02724         for (j=0; j<M; j++)
02725             set_b(i,j, 0);
02726 
02727 
02728     //initialize a values that have to be learned
02729     float64_t *R=new float64_t[N] ;
02730     for (r=0; r<N; r++) R[r]=CMath::random(MIN_RAND,1.0);
02731     i=0; sum=0; k=i; 
02732     j=model->get_learn_a(i,0);
02733     while (model->get_learn_a(i,0)!=-1 || k<i)
02734     {
02735         if (j==model->get_learn_a(i,0))
02736         {
02737             sum+=R[model->get_learn_a(i,1)] ;
02738             i++;
02739         }
02740         else
02741         {
02742             while (k<i)
02743             {
02744                 set_a(model->get_learn_a(k,0), model->get_learn_a(k,1), 
02745                         R[model->get_learn_a(k,1)]/sum);
02746                 k++;
02747             }
02748             j=model->get_learn_a(i,0);
02749             k=i;
02750             sum=0;
02751             for (r=0; r<N; r++) R[r]=CMath::random(MIN_RAND,1.0);
02752         }
02753     }
02754     delete[] R ; R=NULL ;
02755 
02756     //initialize b values that have to be learned
02757     R=new float64_t[M] ;
02758     for (r=0; r<M; r++) R[r]=CMath::random(MIN_RAND,1.0);
02759     i=0; sum=0; k=0 ;
02760     j=model->get_learn_b(i,0);
02761     while (model->get_learn_b(i,0)!=-1 || k<i)
02762     {
02763         if (j==model->get_learn_b(i,0))
02764         {
02765             sum+=R[model->get_learn_b(i,1)] ;
02766             i++;
02767         }
02768         else
02769         {
02770             while (k<i)
02771             {
02772                 set_b(model->get_learn_b(k,0),model->get_learn_b(k,1), 
02773                         R[model->get_learn_b(k,1)]/sum);
02774                 k++;
02775             }
02776 
02777             j=model->get_learn_b(i,0);
02778             k=i;
02779             sum=0;
02780             for (r=0; r<M; r++) R[r]=CMath::random(MIN_RAND,1.0);
02781         }
02782     }
02783     delete[] R ; R=NULL ;
02784 
02785     //set consts into a
02786     i=0;
02787     while (model->get_const_a(i,0) != -1)
02788     {
02789         set_a(model->get_const_a(i,0), model->get_const_a(i,1), model->get_const_a_val(i));
02790         i++;
02791     }
02792 
02793     //set consts into b
02794     i=0;
02795     while (model->get_const_b(i,0) != -1)
02796     {
02797         set_b(model->get_const_b(i,0), model->get_const_b(i,1), model->get_const_b_val(i));
02798         i++;
02799     }
02800 
02801     //set consts into p
02802     i=0;
02803     while (model->get_const_p(i) != -1)
02804     {
02805         set_p(model->get_const_p(i), model->get_const_p_val(i));
02806         i++;
02807     }
02808 
02809     //initialize q with zeros
02810     for (i=0; i<N; i++)
02811         set_q(i, 0.0);
02812 
02813     //set consts into q
02814     i=0;
02815     while (model->get_const_q(i) != -1)
02816     {
02817         set_q(model->get_const_q(i), model->get_const_q_val(i));
02818         i++;
02819     }
02820 
02821     // init p
02822     i=0;
02823     sum=0;
02824     while (model->get_learn_p(i)!=-1)
02825     {
02826         set_p(model->get_learn_p(i),CMath::random(MIN_RAND,1.0)) ;
02827         sum+=get_p(model->get_learn_p(i)) ;
02828         i++ ;
02829     } ;
02830     i=0 ;
02831     while (model->get_learn_p(i)!=-1)
02832     {
02833         set_p(model->get_learn_p(i), get_p(model->get_learn_p(i))/sum);
02834         i++ ;
02835     } ;
02836 
02837     // initialize q
02838     i=0;
02839     sum=0;
02840     while (model->get_learn_q(i)!=-1)
02841     {
02842         set_q(model->get_learn_q(i),CMath::random(MIN_RAND,1.0)) ;
02843         sum+=get_q(model->get_learn_q(i)) ;
02844         i++ ;
02845     } ;
02846     i=0 ;
02847     while (model->get_learn_q(i)!=-1)
02848     {
02849         set_q(model->get_learn_q(i), get_q(model->get_learn_q(i))/sum);
02850         i++ ;
02851     } ;
02852 
02853     //initialize pat/mod_prob as not calculated
02854     invalidate_model();
02855 }
02856 
02857 void CHMM::clear_model()
02858 {
02859     int32_t i,j;
02860     for (i=0; i<N; i++)
02861     {
02862         set_p(i, log(PSEUDO));
02863         set_q(i, log(PSEUDO));
02864 
02865         for (j=0; j<N; j++)
02866             set_a(i,j, log(PSEUDO));
02867 
02868         for (j=0; j<M; j++)
02869             set_b(i,j, log(PSEUDO));
02870     }
02871 }
02872 
02873 void CHMM::clear_model_defined()
02874 {
02875     int32_t i,j,k;
02876 
02877     for (i=0; (j=model->get_learn_p(i))!=-1; i++)
02878         set_p(j, log(PSEUDO));
02879 
02880     for (i=0; (j=model->get_learn_q(i))!=-1; i++)
02881         set_q(j, log(PSEUDO));
02882 
02883     for (i=0; (j=model->get_learn_a(i,0))!=-1; i++)
02884     {
02885         k=model->get_learn_a(i,1); // catch (j,k) as indizes to be learned
02886         set_a(j,k, log(PSEUDO));
02887     }
02888 
02889     for (i=0; (j=model->get_learn_b(i,0))!=-1; i++)
02890     {
02891         k=model->get_learn_b(i,1); // catch (j,k) as indizes to be learned
02892         set_b(j,k, log(PSEUDO));
02893     }
02894 }
02895 
02896 void CHMM::copy_model(CHMM* l)
02897 {
02898     int32_t i,j;
02899     for (i=0; i<N; i++)
02900     {
02901         set_p(i, l->get_p(i));
02902         set_q(i, l->get_q(i));
02903 
02904         for (j=0; j<N; j++)
02905             set_a(i,j, l->get_a(i,j));
02906 
02907         for (j=0; j<M; j++)
02908             set_b(i,j, l->get_b(i,j));
02909     }
02910 }
02911 
02912 void CHMM::invalidate_model()
02913 {
02914     //initialize pat/mod_prob/alpha/beta cache as not calculated
02915     this->mod_prob=0.0;
02916     this->mod_prob_updated=false;
02917 
02918     if (mem_initialized)
02919     {
02920       if (trans_list_forward_cnt)
02921         delete[] trans_list_forward_cnt ;
02922       trans_list_forward_cnt=NULL ;
02923       if (trans_list_backward_cnt)
02924         delete[] trans_list_backward_cnt ;
02925       trans_list_backward_cnt=NULL ;
02926       if (trans_list_forward)
02927         {
02928           for (int32_t i=0; i<trans_list_len; i++)
02929         if (trans_list_forward[i])
02930           delete[] trans_list_forward[i] ;
02931           delete[] trans_list_forward ;
02932           trans_list_forward=NULL ;
02933         }
02934       if (trans_list_backward)
02935         {
02936           for (int32_t i=0; i<trans_list_len; i++)
02937         if (trans_list_backward[i])
02938           delete[] trans_list_backward[i] ;
02939           delete[] trans_list_backward ;
02940           trans_list_backward = NULL ;
02941         } ;
02942 
02943       trans_list_len = N ;
02944       trans_list_forward = new T_STATES*[N] ;
02945       trans_list_forward_cnt = new T_STATES[N] ;
02946 
02947       for (int32_t j=0; j<N; j++)
02948         {
02949           trans_list_forward_cnt[j]= 0 ;
02950           trans_list_forward[j]= new T_STATES[N] ;
02951           for (int32_t i=0; i<N; i++)
02952         if (get_a(i,j)>CMath::ALMOST_NEG_INFTY)
02953           {
02954             trans_list_forward[j][trans_list_forward_cnt[j]]=i ;
02955             trans_list_forward_cnt[j]++ ;
02956           } 
02957         } ;
02958       
02959       trans_list_backward = new T_STATES*[N] ;
02960       trans_list_backward_cnt = new T_STATES[N] ;
02961       
02962       for (int32_t i=0; i<N; i++)
02963         {
02964           trans_list_backward_cnt[i]= 0 ;
02965           trans_list_backward[i]= new T_STATES[N] ;
02966           for (int32_t j=0; j<N; j++)
02967         if (get_a(i,j)>CMath::ALMOST_NEG_INFTY)
02968           {
02969             trans_list_backward[i][trans_list_backward_cnt[i]]=j ;
02970             trans_list_backward_cnt[i]++ ;
02971           } 
02972         } ;
02973     } ;
02974 #ifndef NOVIT
02975     this->all_pat_prob=0.0;
02976     this->pat_prob=0.0;
02977     this->path_deriv_updated=false ;
02978     this->path_deriv_dimension=-1 ;
02979     this->all_path_prob_updated=false;
02980 #endif  //NOVIT
02981 
02982 #ifdef USE_HMMPARALLEL_STRUCTURES
02983     {
02984         for (int32_t i=0; i<parallel.get_num_threads(); i++)
02985         {
02986             this->alpha_cache[i].updated=false;
02987             this->beta_cache[i].updated=false;
02988 #ifndef NOVIT
02989             path_prob_updated[i]=false ;
02990             path_prob_dimension[i]=-1 ;
02991 #endif // NOVIT
02992         } ;
02993     } 
02994 #else // USE_HMMPARALLEL_STRUCTURES
02995     this->alpha_cache.updated=false;
02996     this->beta_cache.updated=false;
02997 #ifndef NOVIT
02998     this->path_prob_dimension=-1;
02999     this->path_prob_updated=false;
03000 #endif //NOVIT
03001 
03002 #endif // USE_HMMPARALLEL_STRUCTURES
03003 }
03004 
03005 void CHMM::open_bracket(FILE* file)
03006 {
03007     int32_t value;
03008     while (((value=fgetc(file)) != EOF) && (value!='['))    //skip possible spaces and end if '[' occurs
03009     {
03010         if (value=='\n')
03011             line++;
03012     }
03013 
03014     if (value==EOF)
03015         error(line, "expected \"[\" in input file");
03016 
03017     while (((value=fgetc(file)) != EOF) && (isspace(value)))    //skip possible spaces
03018     {
03019         if (value=='\n')
03020             line++;
03021     }
03022 
03023     ungetc(value, file);
03024 }
03025 
03026 void CHMM::close_bracket(FILE* file)
03027 {
03028     int32_t value;
03029     while (((value=fgetc(file)) != EOF) && (value!=']'))    //skip possible spaces and end if ']' occurs
03030     {
03031         if (value=='\n')
03032             line++;
03033     }
03034 
03035     if (value==EOF)
03036         error(line, "expected \"]\" in input file");
03037 }
03038 
03039 bool CHMM::comma_or_space(FILE* file)
03040 {
03041     int32_t value;
03042     while (((value=fgetc(file)) != EOF) && (value!=',') && (value!=';') && (value!=']'))     //skip possible spaces and end if ',' or ';' occurs
03043     {
03044         if (value=='\n')
03045             line++;
03046     }
03047     if (value==']')
03048     {
03049         ungetc(value, file);
03050         SG_ERROR( "found ']' instead of ';' or ','\n") ;
03051         return false ;
03052     } ;
03053 
03054     if (value==EOF)
03055         error(line, "expected \";\" or \",\" in input file");
03056 
03057     while (((value=fgetc(file)) != EOF) && (isspace(value)))    //skip possible spaces
03058     {
03059         if (value=='\n')
03060             line++;
03061     }
03062     ungetc(value, file);
03063     return true ;
03064 }
03065 
03066 bool CHMM::get_numbuffer(FILE* file, char* buffer, int32_t length)
03067 {
03068     signed char value;
03069 
03070     while (((value=fgetc(file)) != EOF) && 
03071             !isdigit(value) && (value!='A') 
03072             && (value!='C') && (value!='G') && (value!='T') 
03073             && (value!='N') && (value!='n') 
03074             && (value!='.') && (value!='-') && (value!='e') && (value!=']')) //skip possible spaces+crap
03075     {
03076         if (value=='\n')
03077             line++;
03078     }
03079     if (value==']')
03080     {
03081         ungetc(value,file) ;
03082         return false ;
03083     } ;
03084     if (value!=EOF)
03085     {
03086         int32_t i=0;
03087         switch (value)
03088         {
03089             case 'A':
03090                 value='0' +CAlphabet::B_A;
03091                 break;
03092             case 'C':
03093                 value='0' +CAlphabet::B_C;
03094                 break;
03095             case 'G':
03096                 value='0' +CAlphabet::B_G;
03097                 break;
03098             case 'T':
03099                 value='0' +CAlphabet::B_T;
03100                 break;
03101         };
03102 
03103         buffer[i++]=value;
03104 
03105         while (((value=fgetc(file)) != EOF) && 
03106                 (isdigit(value) || (value=='.') || (value=='-') || (value=='e')
03107                  || (value=='A') || (value=='C') || (value=='G')|| (value=='T')
03108                  || (value=='N') || (value=='n')) && (i<length))
03109         {
03110             switch (value)
03111             {
03112                 case 'A':
03113                     value='0' +CAlphabet::B_A;
03114                     break;
03115                 case 'C':
03116                     value='0' +CAlphabet::B_C;
03117                     break;
03118                 case 'G':
03119                     value='0' +CAlphabet::B_G;
03120                     break;
03121                 case 'T':
03122                     value='0' +CAlphabet::B_T;
03123                     break;
03124                 case '1': case '2': case'3': case '4': case'5':
03125                 case '6': case '7': case'8': case '9': case '0': break ;
03126                 case '.': case 'e': case '-': break ;
03127                 default:
03128                                               SG_ERROR( "found crap: %i %c (pos:%li)\n",i,value,ftell(file)) ;
03129             };
03130             buffer[i++]=value;
03131         }
03132         ungetc(value, file);
03133         buffer[i]='\0';
03134 
03135         return (i<=length) && (i>0); 
03136     }
03137     return false;
03138 }
03139 
03140 /*
03141    -format specs: model_file (model.hmm)
03142    % HMM - specification
03143    % N  - number of states
03144    % M  - number of observation_tokens
03145    % a is state_transition_matrix 
03146    % size(a)= [N,N]
03147    %
03148    % b is observation_per_state_matrix
03149    % size(b)= [N,M]
03150    %
03151    % p is initial distribution
03152    % size(p)= [1, N]
03153 
03154    N=<int32_t>; 
03155    M=<int32_t>;
03156 
03157    p=[<float64_t>,<float64_t>...<DOUBLE>];
03158    q=[<DOUBLE>,<DOUBLE>...<DOUBLE>];
03159 
03160    a=[ [<DOUBLE>,<DOUBLE>...<DOUBLE>];
03161    [<DOUBLE>,<DOUBLE>...<DOUBLE>];
03162    [<DOUBLE>,<DOUBLE>...<DOUBLE>];
03163    [<DOUBLE>,<DOUBLE>...<DOUBLE>];
03164    [<DOUBLE>,<DOUBLE>...<DOUBLE>];
03165    ];
03166 
03167    b=[ [<DOUBLE>,<DOUBLE>...<DOUBLE>];
03168    [<DOUBLE>,<DOUBLE>...<DOUBLE>];
03169    [<DOUBLE>,<DOUBLE>...<DOUBLE>];
03170    [<DOUBLE>,<DOUBLE>...<DOUBLE>];
03171    [<DOUBLE>,<DOUBLE>...<DOUBLE>];
03172    ];
03173    */
03174 
03175 bool CHMM::load_model(FILE* file)
03176 {
03177     int32_t received_params=0;  //a,b,p,N,M,O
03178 
03179     bool result=false;
03180     E_STATE state=INITIAL;
03181     char buffer[1024];
03182 
03183     line=1;
03184     int32_t i,j;
03185 
03186     if (file)
03187     {
03188         while (state!=END)
03189         {
03190             int32_t value=fgetc(file);
03191 
03192             if (value=='\n')
03193                 line++;
03194             if (value==EOF)
03195                 state=END;
03196 
03197             switch (state)
03198             {
03199                 case INITIAL:   // in the initial state only N,M initialisations and comments are allowed
03200                     if (value=='N')
03201                     {
03202                         if (received_params & GOTN)
03203                             error(line, "in model file: \"p double defined\"");
03204                         else
03205                             state=GET_N;
03206                     }
03207                     else if (value=='M')
03208                     {
03209                         if (received_params & GOTM)
03210                             error(line, "in model file: \"p double defined\"");
03211                         else
03212                             state=GET_M;
03213                     }
03214                     else if (value=='%')
03215                     {
03216                         state=COMMENT;
03217                     }
03218                 case ARRAYs:    // when n,m, order are known p,a,b arrays are allowed to be read
03219                     if (value=='p')
03220                     {
03221                         if (received_params & GOTp)
03222                             error(line, "in model file: \"p double defined\"");
03223                         else
03224                             state=GET_p;
03225                     }
03226                     if (value=='q')
03227                     {
03228                         if (received_params & GOTq)
03229                             error(line, "in model file: \"q double defined\"");
03230                         else
03231                             state=GET_q;
03232                     }
03233                     else if (value=='a')
03234                     {
03235                         if (received_params & GOTa)
03236                             error(line, "in model file: \"a double defined\"");
03237                         else
03238                             state=GET_a;
03239                     }
03240                     else if (value=='b')
03241                     {
03242                         if (received_params & GOTb)
03243                             error(line, "in model file: \"b double defined\"");
03244                         else
03245                             state=GET_b;
03246                     }
03247                     else if (value=='%')
03248                     {
03249                         state=COMMENT;
03250                     }
03251                     break;
03252                 case GET_N:
03253                     if (value=='=')
03254                     {
03255                         if (get_numbuffer(file, buffer, 4)) //get num
03256                         {
03257                             this->N= atoi(buffer);
03258                             received_params|=GOTN;
03259                             state= (received_params == (GOTN | GOTM | GOTO)) ? ARRAYs : INITIAL;
03260                         }
03261                         else
03262                             state=END;      //end if error
03263                     }
03264                     break;
03265                 case GET_M:
03266                     if (value=='=')
03267                     {
03268                         if (get_numbuffer(file, buffer, 4)) //get num
03269                         {
03270                             this->M= atoi(buffer);
03271                             received_params|=GOTM;
03272                             state= (received_params == (GOTN | GOTM | GOTO)) ? ARRAYs : INITIAL;
03273                         }
03274                         else
03275                             state=END;      //end if error
03276                     }
03277                     break;
03278                 case GET_a:
03279                     if (value=='=')
03280                     {
03281                         float64_t f;
03282 
03283                         transition_matrix_a=new float64_t[N*N];
03284                         open_bracket(file);
03285                         for (i=0; i<this->N; i++)
03286                         {
03287                             open_bracket(file);
03288 
03289                             for (j=0; j<this->N ; j++)
03290                             {
03291 
03292                                 if (fscanf( file, "%le", &f ) != 1)
03293                                     error(line, "float64_t expected");
03294                                 else
03295                                     set_a(i,j, f);
03296 
03297                                 if (j<this->N-1)
03298                                     comma_or_space(file);
03299                                 else
03300                                     close_bracket(file);
03301                             }
03302 
03303                             if (i<this->N-1)
03304                                 comma_or_space(file);
03305                             else
03306                                 close_bracket(file);
03307                         }
03308                         received_params|=GOTa;
03309                     }
03310                     state= (received_params == (GOTa | GOTb | GOTp | GOTq)) ? END : ARRAYs;
03311                     break;
03312                 case GET_b:
03313                     if (value=='=')
03314                     {
03315                         float64_t f;
03316 
03317                         observation_matrix_b=new float64_t[N*M];    
03318                         open_bracket(file);
03319                         for (i=0; i<this->N; i++)
03320                         {
03321                             open_bracket(file);
03322 
03323                             for (j=0; j<this->M ; j++)
03324                             {
03325 
03326                                 if (fscanf( file, "%le", &f ) != 1)
03327                                     error(line, "float64_t expected");
03328                                 else
03329                                     set_b(i,j, f);
03330 
03331                                 if (j<this->M-1)
03332                                     comma_or_space(file);
03333                                 else
03334                                     close_bracket(file);
03335                             }
03336 
03337                             if (i<this->N-1)
03338                                 comma_or_space(file);
03339                             else
03340                                 close_bracket(file);
03341                         }   
03342                         received_params|=GOTb;
03343                     }
03344                     state= ((received_params & (GOTa | GOTb | GOTp | GOTq)) == (GOTa | GOTb | GOTp | GOTq)) ? END : ARRAYs;
03345                     break;
03346                 case GET_p:
03347                     if (value=='=')
03348                     {
03349                         float64_t f;
03350 
03351                         initial_state_distribution_p=new float64_t[N];
03352                         open_bracket(file);
03353                         for (i=0; i<this->N ; i++)
03354                         {
03355                             if (fscanf( file, "%le", &f ) != 1)
03356                                 error(line, "float64_t expected");
03357                             else
03358                                 set_p(i, f);
03359 
03360                             if (i<this->N-1)
03361                                 comma_or_space(file);
03362                             else
03363                                 close_bracket(file);
03364                         }
03365                         received_params|=GOTp;
03366                     }
03367                     state= (received_params == (GOTa | GOTb | GOTp | GOTq)) ? END : ARRAYs;
03368                     break;
03369                 case GET_q:
03370                     if (value=='=')
03371                     {
03372                         float64_t f;
03373 
03374                         end_state_distribution_q=new float64_t[N];
03375                         open_bracket(file);
03376                         for (i=0; i<this->N ; i++)
03377                         {
03378                             if (fscanf( file, "%le", &f ) != 1)
03379                                 error(line, "float64_t expected");
03380                             else
03381                                 set_q(i, f);
03382 
03383                             if (i<this->N-1)
03384                                 comma_or_space(file);
03385                             else
03386                                 close_bracket(file);
03387                         }
03388                         received_params|=GOTq;
03389                     }
03390                     state= (received_params == (GOTa | GOTb | GOTp | GOTq)) ? END : ARRAYs;
03391                     break;
03392                 case COMMENT:
03393                     if (value==EOF)
03394                         state=END;
03395                     else if (value=='\n')
03396                     {
03397                         line++;
03398                         state=INITIAL;
03399                     }
03400                     break;
03401 
03402                 default:
03403                     break;
03404             }
03405         }
03406         result= (received_params== (GOTa | GOTb | GOTp | GOTq | GOTN | GOTM | GOTO));
03407     }
03408 
03409     SG_WARNING( "not normalizing anymore, call normalize_hmm to make sure the hmm is valid!!\n");
03411     return result;
03412 }
03413 
03414 /*  
03415     -format specs: train_file (train.trn)
03416     % HMM-TRAIN - specification
03417     % learn_a - elements in state_transition_matrix to be learned
03418     % learn_b - elements in oberservation_per_state_matrix to be learned
03419     %           note: each line stands for 
03420     %               <state>, <observation(0)>, observation(1)...observation(NOW)>
03421     % learn_p - elements in initial distribution to be learned
03422     % learn_q - elements in the end-state distribution to be learned
03423     %
03424     % const_x - specifies initial values of elements
03425     %               rest is assumed to be 0.0
03426     %
03427     %   NOTE: IMPLICIT DEFINES:
03428     %       #define A 0
03429     %       #define C 1
03430     %       #define G 2
03431     %       #define T 3
03432     %
03433 
03434     learn_a=[ [<int32_t>,<int32_t>]; 
03435     [<int32_t>,<int32_t>]; 
03436     [<int32_t>,<int32_t>]; 
03437     ........
03438     [<int32_t>,<int32_t>]; 
03439     [-1,-1];
03440     ];
03441 
03442     learn_b=[ [<int32_t>,<int32_t>]; 
03443     [<int32_t>,<int32_t>]; 
03444     [<int32_t>,<int32_t>]; 
03445     ........
03446     [<int32_t>,<int32_t>]; 
03447     [-1,-1];
03448     ];
03449 
03450     learn_p= [ <int32_t>, ... , <int32_t>, -1 ];
03451     learn_q= [ <int32_t>, ... , <int32_t>, -1 ];
03452 
03453 
03454     const_a=[ [<int32_t>,<int32_t>,<DOUBLE>]; 
03455     [<int32_t>,<int32_t>,<DOUBLE>]; 
03456     [<int32_t>,<int32_t>,<DOUBLE>]; 
03457     ........
03458     [<int32_t>,<int32_t>,<DOUBLE>]; 
03459     [-1,-1,-1];
03460     ];
03461 
03462     const_b=[ [<int32_t>,<int32_t>,<DOUBLE>]; 
03463     [<int32_t>,<int32_t>,<DOUBLE>]; 
03464     [<int32_t>,<int32_t>,<DOUBLE]; 
03465     ........
03466     [<int32_t>,<int32_t>,<DOUBLE>]; 
03467     [-1,-1];
03468     ];
03469 
03470     const_p[]=[ [<int32_t>, <DOUBLE>], ... , [<int32_t>,<DOUBLE>], [-1,-1] ];
03471     const_q[]=[ [<int32_t>, <DOUBLE>], ... , [<int32_t>,<DOUBLE>], [-1,-1] ];
03472     */
03473 bool CHMM::load_definitions(FILE* file, bool verbose, bool init)
03474 {
03475     if (model)
03476         delete model ;
03477     model=new CModel();
03478 
03479     int32_t received_params=0x0000000;  //a,b,p,q,N,M,O
03480     char buffer[1024];
03481 
03482     bool result=false;
03483     E_STATE state=INITIAL;
03484 
03485     { // do some useful initializations 
03486         model->set_learn_a(0, -1);
03487         model->set_learn_a(1, -1);
03488         model->set_const_a(0, -1);
03489         model->set_const_a(1, -1);
03490         model->set_const_a_val(0, 1.0);
03491         model->set_learn_b(0, -1);
03492         model->set_const_b(0, -1);
03493         model->set_const_b_val(0, 1.0);
03494         model->set_learn_p(0, -1);
03495         model->set_learn_q(0, -1);
03496         model->set_const_p(0, -1);
03497         model->set_const_q(0, -1);
03498     } ;
03499 
03500     line=1;
03501 
03502     if (file)
03503     {
03504         while (state!=END)
03505         {
03506             int32_t value=fgetc(file);
03507 
03508             if (value=='\n')
03509                 line++;
03510 
03511             if (value==EOF)
03512                 state=END;
03513 
03514             switch (state)
03515             {
03516                 case INITIAL:   
03517                     if (value=='l')
03518                     {
03519                         if (fgetc(file)=='e' && fgetc(file)=='a' && fgetc(file)=='r' && fgetc(file)=='n' && fgetc(file)=='_')
03520                         {
03521                             switch(fgetc(file))
03522                             {
03523                                 case 'a':
03524                                     state=GET_learn_a;
03525                                     break;
03526                                 case 'b':
03527                                     state=GET_learn_b;
03528                                     break;
03529                                 case 'p':
03530                                     state=GET_learn_p;
03531                                     break;
03532                                 case 'q':
03533                                     state=GET_learn_q;
03534                                     break;
03535                                 default:
03536                                     error(line, "a,b,p or q expected in train definition file");
03537                             };
03538                         }
03539                     }
03540                     else if (value=='c')
03541                     {
03542                         if (fgetc(file)=='o' && fgetc(file)=='n' && fgetc(file)=='s' 
03543                                 && fgetc(file)=='t' && fgetc(file)=='_')
03544                         {
03545                             switch(fgetc(file))
03546                             {
03547                                 case 'a':
03548                                     state=GET_const_a;
03549                                     break;
03550                                 case 'b':
03551                                     state=GET_const_b;
03552                                     break;
03553                                 case 'p':
03554                                     state=GET_const_p;
03555                                     break;
03556                                 case 'q':
03557                                     state=GET_const_q;
03558                                     break;
03559                                 default:
03560                                     error(line, "a,b,p or q expected in train definition file");
03561                             };
03562                         }
03563                     }
03564                     else if (value=='%')
03565                     {
03566                         state=COMMENT;
03567                     }
03568                     else if (value==EOF)
03569                     {
03570                         state=END;
03571                     }
03572                     break;
03573                 case GET_learn_a:
03574                     if (value=='=')
03575                     {
03576                         open_bracket(file);
03577                         bool finished=false;
03578                         int32_t i=0;
03579 
03580                         if (verbose) 
03581                             SG_DEBUG( "\nlearn for transition matrix: ") ;
03582                         while (!finished)
03583                         {
03584                             open_bracket(file);
03585 
03586                             if (get_numbuffer(file, buffer, 4)) //get num
03587                             {
03588                                 value=atoi(buffer);
03589                                 model->set_learn_a(i++, value);
03590 
03591                                 if (value<0)
03592                                 {
03593                                     finished=true;
03594                                     break;
03595                                 }
03596                                 if (value>=N)
03597                                     SG_ERROR( "invalid value for learn_a(%i,0): %i\n",i/2,(int)value) ;
03598                             }
03599                             else
03600                                 break;
03601 
03602                             comma_or_space(file);
03603 
03604                             if (get_numbuffer(file, buffer, 4)) //get num
03605                             {
03606                                 value=atoi(buffer);
03607                                 model->set_learn_a(i++, value);
03608 
03609                                 if (value<0)
03610                                 {
03611                                     finished=true;
03612                                     break;
03613                                 }
03614                                 if (value>=N)
03615                                     SG_ERROR( "invalid value for learn_a(%i,1): %i\n",i/2-1,(int)value) ;
03616 
03617                             }
03618                             else
03619                                 break;
03620                             close_bracket(file);
03621                         }
03622                         close_bracket(file);
03623                         if (verbose) 
03624                             SG_DEBUG( "%i Entries",(int)(i/2)) ;
03625 
03626                         if (finished)
03627                         {
03628                             received_params|=GOTlearn_a;
03629 
03630                             state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03631                         }
03632                         else
03633                             state=END;
03634                     }
03635                     break;
03636                 case GET_learn_b:
03637                     if (value=='=')
03638                     {
03639                         open_bracket(file);
03640                         bool finished=false;
03641                         int32_t i=0;
03642 
03643                         if (verbose) 
03644                             SG_DEBUG( "\nlearn for emission matrix:   ") ;
03645 
03646                         while (!finished)
03647                         {
03648                             open_bracket(file);
03649 
03650                             int32_t combine=0;
03651 
03652                             for (int32_t j=0; j<2; j++)
03653                             {
03654                                 if (get_numbuffer(file, buffer, 4))   //get num
03655                                 {
03656                                     value=atoi(buffer);
03657 
03658                                     if (j==0)
03659                                     {
03660                                         model->set_learn_b(i++, value);
03661 
03662                                         if (value<0)
03663                                         {
03664                                             finished=true;
03665                                             break;
03666                                         }
03667                                         if (value>=N)
03668                                             SG_ERROR( "invalid value for learn_b(%i,0): %i\n",i/2,(int)value) ;
03669                                     }
03670                                     else 
03671                                         combine=value;
03672                                 }
03673                                 else
03674                                     break;
03675 
03676                                 if (j<1)
03677                                     comma_or_space(file);
03678                                 else
03679                                     close_bracket(file);
03680                             }
03681                             model->set_learn_b(i++, combine);
03682                             if (combine>=M)
03683 
03684                                 SG_ERROR("invalid value for learn_b(%i,1): %i\n",i/2-1,(int)value) ;
03685                         }
03686                         close_bracket(file);
03687                         if (verbose) 
03688                             SG_DEBUG( "%i Entries",(int)(i/2-1)) ;
03689 
03690                         if (finished)
03691                         {
03692                             received_params|=GOTlearn_b;
03693                             state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03694                         }
03695                         else
03696                             state=END;
03697                     }
03698                     break;
03699                 case GET_learn_p:
03700                     if (value=='=')
03701                     {
03702                         open_bracket(file);
03703                         bool finished=false;
03704                         int32_t i=0;
03705 
03706                         if (verbose) 
03707                             SG_DEBUG( "\nlearn start states: ") ;
03708                         while (!finished)
03709                         {
03710                             if (get_numbuffer(file, buffer, 4)) //get num
03711                             {
03712                                 value=atoi(buffer);
03713 
03714                                 model->set_learn_p(i++, value);
03715 
03716                                 if (value<0)
03717                                 {
03718                                     finished=true;
03719                                     break;
03720                                 }
03721                                 if (value>=N)
03722                                     SG_ERROR( "invalid value for learn_p(%i): %i\n",i-1,(int)value) ;
03723                             }
03724                             else
03725                                 break;
03726 
03727                             comma_or_space(file);
03728                         }
03729 
03730                         close_bracket(file);
03731                         if (verbose) 
03732                             SG_DEBUG( "%i Entries",i-1) ;
03733 
03734                         if (finished)
03735                         {
03736                             received_params|=GOTlearn_p;
03737                             state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03738                         }
03739                         else
03740                             state=END;
03741                     }
03742                     break;
03743                 case GET_learn_q:
03744                     if (value=='=')
03745                     {
03746                         open_bracket(file);
03747                         bool finished=false;
03748                         int32_t i=0;
03749 
03750                         if (verbose) 
03751                             SG_DEBUG( "\nlearn terminal states: ") ;
03752                         while (!finished)
03753                         {
03754                             if (get_numbuffer(file, buffer, 4)) //get num
03755                             {
03756                                 value=atoi(buffer);
03757                                 model->set_learn_q(i++, value);
03758 
03759                                 if (value<0)
03760                                 {
03761                                     finished=true;
03762                                     break;
03763                                 }
03764                                 if (value>=N)
03765                                     SG_ERROR( "invalid value for learn_q(%i): %i\n",i-1,(int)value) ;
03766                             }
03767                             else
03768                                 break;
03769 
03770                             comma_or_space(file);
03771                         }
03772 
03773                         close_bracket(file);
03774                         if (verbose) 
03775                             SG_DEBUG( "%i Entries",i-1) ;
03776 
03777                         if (finished)
03778                         {
03779                             received_params|=GOTlearn_q;
03780                             state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03781                         }
03782                         else
03783                             state=END;
03784                     }
03785                     break;
03786                 case GET_const_a:
03787                     if (value=='=')
03788                     {
03789                         open_bracket(file);
03790                         bool finished=false;
03791                         int32_t i=0;
03792 
03793                         if (verbose) 
03794 #ifdef USE_HMMDEBUG
03795                             SG_DEBUG( "\nconst for transition matrix: \n") ;
03796 #else
03797                         SG_DEBUG( "\nconst for transition matrix: ") ;
03798 #endif
03799                         while (!finished)
03800                         {
03801                             open_bracket(file);
03802 
03803                             if (get_numbuffer(file, buffer, 4)) //get num
03804                             {
03805                                 value=atoi(buffer);
03806                                 model->set_const_a(i++, value);
03807 
03808                                 if (value<0)
03809                                 {
03810                                     finished=true;
03811                                     model->set_const_a(i++, value);
03812                                     model->set_const_a_val((int32_t)i/2 - 1, value);
03813                                     break;
03814                                 }
03815                                 if (value>=N)
03816                                     SG_ERROR( "invalid value for const_a(%i,0): %i\n",i/2,(int)value) ;
03817                             }
03818                             else
03819                                 break;
03820 
03821                             comma_or_space(file);
03822 
03823                             if (get_numbuffer(file, buffer, 4)) //get num
03824                             {
03825                                 value=atoi(buffer);
03826                                 model->set_const_a(i++, value);
03827 
03828                                 if (value<0)
03829                                 {
03830                                     finished=true;
03831                                     model->set_const_a_val((int32_t)i/2 - 1, value);
03832                                     break;
03833                                 }
03834                                 if (value>=N)
03835                                     SG_ERROR( "invalid value for const_a(%i,1): %i\n",i/2-1,(int)value) ;
03836                             }
03837                             else
03838                                 break;
03839 
03840                             if (!comma_or_space(file))
03841                                 model->set_const_a_val((int32_t)i/2 - 1, 1.0);
03842                             else
03843                                 if (get_numbuffer(file, buffer, 10))    //get num
03844                                 {
03845                                     float64_t dvalue=atof(buffer);
03846                                     model->set_const_a_val((int32_t)i/2 - 1, dvalue);
03847                                     if (dvalue<0)
03848                                     {
03849                                         finished=true;
03850                                         break;
03851                                     }
03852                                     if ((dvalue>1.0) || (dvalue<0.0))
03853                                         SG_ERROR( "invalid value for const_a_val(%i): %e\n",(int)i/2-1,dvalue) ;
03854                                 }
03855                                 else
03856                                     model->set_const_a_val((int32_t)i/2 - 1, 1.0);
03857 
03858 #ifdef USE_HMMDEBUG
03859                             if (verbose)
03860                                 SG_ERROR("const_a(%i,%i)=%e\n", model->get_const_a((int32_t)i/2-1,0),model->get_const_a((int32_t)i/2-1,1),model->get_const_a_val((int32_t)i/2-1)) ;
03861 #endif
03862                             close_bracket(file);
03863                         }
03864                         close_bracket(file);
03865                         if (verbose) 
03866                             SG_DEBUG( "%i Entries",(int)i/2-1) ;
03867 
03868                         if (finished)
03869                         {
03870                             received_params|=GOTconst_a;
03871                             state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03872                         }
03873                         else
03874                             state=END;
03875                     }
03876                     break;
03877 
03878                 case GET_const_b:
03879                     if (value=='=')
03880                     {
03881                         open_bracket(file);
03882                         bool finished=false;
03883                         int32_t i=0;
03884 
03885                         if (verbose) 
03886 #ifdef USE_HMMDEBUG
03887                             SG_DEBUG( "\nconst for emission matrix:   \n") ;
03888 #else
03889                         SG_DEBUG( "\nconst for emission matrix:   ") ;
03890 #endif
03891                         while (!finished)
03892                         {
03893                             open_bracket(file);
03894                             int32_t combine=0;
03895                             for (int32_t j=0; j<3; j++)
03896                             {
03897                                 if (get_numbuffer(file, buffer, 10))    //get num
03898                                 {
03899                                     if (j==0)
03900                                     {
03901                                         value=atoi(buffer);
03902 
03903                                         model->set_const_b(i++, value);
03904 
03905                                         if (value<0)
03906                                         {
03907                                             finished=true;
03908                                             //model->set_const_b_val((int32_t)(i-1)/2, value);
03909                                             break;
03910                                         }
03911                                         if (value>=N)
03912                                             SG_ERROR( "invalid value for const_b(%i,0): %i\n",i/2-1,(int)value) ;
03913                                     }
03914                                     else if (j==2)
03915                                     {
03916                                         float64_t dvalue=atof(buffer);
03917                                         model->set_const_b_val((int32_t)(i-1)/2, dvalue);
03918                                         if (dvalue<0)
03919                                         {
03920                                             finished=true;
03921                                             break;
03922                                         } ;
03923                                         if ((dvalue>1.0) || (dvalue<0.0))
03924                                             SG_ERROR( "invalid value for const_b_val(%i,1): %e\n",i/2-1,dvalue) ;
03925                                     }
03926                                     else
03927                                     {
03928                                         value=atoi(buffer);
03929                                         combine= value;
03930                                     } ;
03931                                 }
03932                                 else
03933                                 {
03934                                     if (j==2)
03935                                         model->set_const_b_val((int32_t)(i-1)/2, 1.0);
03936                                     break;
03937                                 } ;
03938                                 if (j<2)
03939                                     if ((!comma_or_space(file)) && (j==1))
03940                                     {
03941                                         model->set_const_b_val((int32_t)(i-1)/2, 1.0) ;
03942                                         break ;
03943                                     } ;
03944                             }
03945                             close_bracket(file);
03946                             model->set_const_b(i++, combine);
03947                             if (combine>=M)
03948                                 SG_ERROR("invalid value for const_b(%i,1): %i\n",i/2-1, combine) ;
03949 #ifdef USE_HMMDEBUG
03950                             if (verbose && !finished)
03951                                 SG_ERROR("const_b(%i,%i)=%e\n", model->get_const_b((int32_t)i/2-1,0),model->get_const_b((int32_t)i/2-1,1),model->get_const_b_val((int32_t)i/2-1)) ;
03952 #endif
03953                         }
03954                         close_bracket(file);
03955                         if (verbose) 
03956                             SG_ERROR( "%i Entries",(int)i/2-1) ;
03957 
03958                         if (finished)
03959                         {
03960                             received_params|=GOTconst_b;
03961                             state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03962                         }
03963                         else
03964                             state=END;
03965                     }
03966                     break;
03967                 case GET_const_p:
03968                     if (value=='=')
03969                     {
03970                         open_bracket(file);
03971                         bool finished=false;
03972                         int32_t i=0;
03973 
03974                         if (verbose) 
03975 #ifdef USE_HMMDEBUG
03976                             SG_DEBUG( "\nconst for start states:     \n") ;
03977 #else
03978                         SG_DEBUG( "\nconst for start states:     ") ;
03979 #endif
03980                         while (!finished)
03981                         {
03982                             open_bracket(file);
03983 
03984                             if (get_numbuffer(file, buffer, 4)) //get num
03985                             {
03986                                 value=atoi(buffer);
03987                                 model->set_const_p(i, value);
03988 
03989                                 if (value<0)
03990                                 {
03991                                     finished=true;
03992                                     model->set_const_p_val(i++, value);
03993                                     break;
03994                                 }
03995                                 if (value>=N)
03996                                     SG_ERROR( "invalid value for const_p(%i): %i\n",i,(int)value) ;
03997 
03998                             }
03999                             else
04000                                 break;
04001 
04002                             if (!comma_or_space(file))
04003                                 model->set_const_p_val(i++, 1.0);
04004                             else
04005                                 if (get_numbuffer(file, buffer, 10))    //get num
04006                                 {
04007                                     float64_t dvalue=atof(buffer);
04008                                     model->set_const_p_val(i++, dvalue);
04009                                     if (dvalue<0)
04010                                     {
04011                                         finished=true;
04012                                         break;
04013                                     }
04014                                     if ((dvalue>1) || (dvalue<0))
04015                                         SG_ERROR( "invalid value for const_p_val(%i): %e\n",i,dvalue) ;
04016                                 }
04017                                 else
04018                                     model->set_const_p_val(i++, 1.0);
04019 
04020                             close_bracket(file);
04021 
04022 #ifdef USE_HMMDEBUG
04023                             if (verbose)
04024                                 SG_DEBUG("const_p(%i)=%e\n", model->get_const_p(i-1),model->get_const_p_val(i-1)) ;
04025 #endif
04026                         }
04027                         if (verbose) 
04028                             SG_DEBUG( "%i Entries",i-1) ;
04029 
04030                         close_bracket(file);
04031 
04032                         if (finished)
04033                         {
04034                             received_params|=GOTconst_p;
04035                             state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
04036                         }
04037                         else
04038                             state=END;
04039                     }
04040                     break;
04041                 case GET_const_q:
04042                     if (value=='=')
04043                     {
04044                         open_bracket(file);
04045                         bool finished=false;
04046                         if (verbose) 
04047 #ifdef USE_HMMDEBUG
04048                             SG_DEBUG( "\nconst for terminal states: \n") ;
04049 #else
04050                         SG_DEBUG( "\nconst for terminal states: ") ;
04051 #endif
04052                         int32_t i=0;
04053 
04054                         while (!finished)
04055                         {
04056                             open_bracket(file) ;
04057                             if (get_numbuffer(file, buffer, 4)) //get num
04058                             {
04059                                 value=atoi(buffer);
04060                                 model->set_const_q(i, value);
04061                                 if (value<0)
04062                                 {
04063                                     finished=true;
04064                                     model->set_const_q_val(i++, value);
04065                                     break;
04066                                 }
04067                                 if (value>=N)
04068                                     SG_ERROR( "invalid value for const_q(%i): %i\n",i,(int)value) ;
04069                             }
04070                             else
04071                                 break;
04072 
04073                             if (!comma_or_space(file))
04074                                 model->set_const_q_val(i++, 1.0);
04075                             else
04076                                 if (get_numbuffer(file, buffer, 10))    //get num
04077                                 {
04078                                     float64_t dvalue=atof(buffer);
04079                                     model->set_const_q_val(i++, dvalue);
04080                                     if (dvalue<0)
04081                                     {
04082                                         finished=true;
04083                                         break;
04084                                     }
04085                                     if ((dvalue>1) || (dvalue<0))
04086                                         SG_ERROR("invalid value for const_q_val(%i): %e\n",i,(double) dvalue) ;
04087                                 }
04088                                 else
04089                                     model->set_const_q_val(i++, 1.0);
04090 
04091                             close_bracket(file);
04092 #ifdef USE_HMMDEBUG
04093                             if (verbose)
04094                                 SG_DEBUG("const_q(%i)=%e\n", model->get_const_q(i-1),model->get_const_q_val(i-1)) ;
04095 #endif
04096                         }
04097                         if (verbose)
04098                             SG_DEBUG( "%i Entries",i-1) ;
04099 
04100                         close_bracket(file);
04101 
04102                         if (finished)
04103                         {
04104                             received_params|=GOTconst_q;
04105                             state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
04106                         }
04107                         else
04108                             state=END;
04109                     }
04110                     break;
04111                 case COMMENT:
04112                     if (value==EOF)
04113                         state=END;
04114                     else if (value=='\n')
04115                         state=INITIAL;
04116                     break;
04117 
04118                 default:
04119                     break;
04120             }
04121         }
04122     }
04123 
04124     /*result=((received_params&(GOTlearn_a | GOTconst_a))!=0) ; 
04125       result=result && ((received_params&(GOTlearn_b | GOTconst_b))!=0) ; 
04126       result=result && ((received_params&(GOTlearn_p | GOTconst_p))!=0) ; 
04127       result=result && ((received_params&(GOTlearn_q | GOTconst_q))!=0) ; */
04128     result=1 ;
04129     if (result)
04130     {
04131         model->sort_learn_a() ;
04132         model->sort_learn_b() ;
04133         if (init)
04134         {
04135             init_model_defined(); ;
04136             convert_to_log();
04137         } ;
04138     }
04139     if (verbose)
04140         SG_DEBUG( "\n") ;
04141     return result;
04142 }
04143 
04144 /*
04145    -format specs: model_file (model.hmm)
04146    % HMM - specification
04147    % N  - number of states
04148    % M  - number of observation_tokens
04149    % a is state_transition_matrix 
04150    % size(a)= [N,N]
04151    %
04152    % b is observation_per_state_matrix
04153    % size(b)= [N,M]
04154    %
04155    % p is initial distribution
04156    % size(p)= [1, N]
04157 
04158    N=<int32_t>; 
04159    M=<int32_t>;
04160 
04161    p=[<DOUBLE>,<DOUBLE>...<DOUBLE>];
04162 
04163    a=[ [<DOUBLE>,<DOUBLE>...<DOUBLE>];
04164    [<DOUBLE>,<DOUBLE>...<DOUBLE>];
04165    [<DOUBLE>,<DOUBLE>...<DOUBLE>];
04166    [<DOUBLE>,<DOUBLE>...<DOUBLE>];
04167    [<DOUBLE>,<DOUBLE>...<DOUBLE>];
04168    ];
04169 
04170    b=[ [<DOUBLE>,<DOUBLE>...<DOUBLE>];
04171    [<DOUBLE>,<DOUBLE>...<DOUBLE>];
04172    [<DOUBLE>,<DOUBLE>...<DOUBLE>];
04173    [<DOUBLE>,<DOUBLE>...<DOUBLE>];
04174    [<DOUBLE>,<DOUBLE>...<DOUBLE>];
04175    ];
04176    */
04177 
04178 bool CHMM::save_model(FILE* file)
04179 {
04180     bool result=false;
04181     int32_t i,j;
04182     const float32_t NAN_REPLACEMENT = (float32_t) CMath::ALMOST_NEG_INFTY ;
04183 
04184     if (file)
04185     {
04186         fprintf(file,"%s","% HMM - specification\n% N  - number of states\n% M  - number of observation_tokens\n% a is state_transition_matrix\n% size(a)= [N,N]\n%\n% b is observation_per_state_matrix\n% size(b)= [N,M]\n%\n% p is initial distribution\n% size(p)= [1, N]\n\n% q is distribution of end states\n% size(q)= [1, N]\n");
04187         fprintf(file,"N=%d;\n",N);
04188         fprintf(file,"M=%d;\n",M);
04189 
04190         fprintf(file,"p=[");
04191         for (i=0; i<N; i++)
04192         {
04193             if (i<N-1) {
04194                 if (finite(get_p(i)))
04195                     fprintf(file, "%e,", (double)get_p(i));
04196                 else
04197                     fprintf(file, "%f,", NAN_REPLACEMENT);          
04198             }
04199             else {
04200                 if (finite(get_p(i)))
04201                     fprintf(file, "%e", (double)get_p(i));
04202                 else
04203                     fprintf(file, "%f", NAN_REPLACEMENT);
04204             }
04205         }
04206 
04207         fprintf(file,"];\n\nq=[");
04208         for (i=0; i<N; i++)
04209         {
04210             if (i<N-1) {
04211                 if (finite(get_q(i)))
04212                     fprintf(file, "%e,", (double)get_q(i));
04213                 else
04214                     fprintf(file, "%f,", NAN_REPLACEMENT);          
04215             }
04216             else {
04217                 if (finite(get_q(i)))
04218                     fprintf(file, "%e", (double)get_q(i));
04219                 else
04220                     fprintf(file, "%f", NAN_REPLACEMENT);
04221             }
04222         }
04223         fprintf(file,"];\n\na=[");
04224 
04225         for (i=0; i<N; i++)
04226         {
04227             fprintf(file, "\t[");
04228 
04229             for (j=0; j<N; j++)
04230             {
04231                 if (j<N-1) {
04232                     if (finite(get_a(i,j)))
04233                         fprintf(file, "%e,", (double)get_a(i,j));
04234                     else
04235                         fprintf(file, "%f,", NAN_REPLACEMENT);
04236                 }
04237                 else {
04238                     if (finite(get_a(i,j)))
04239                         fprintf(file, "%e];\n", (double)get_a(i,j));
04240                     else
04241                         fprintf(file, "%f];\n", NAN_REPLACEMENT);
04242                 }
04243             }
04244         }
04245 
04246         fprintf(file,"  ];\n\nb=[");
04247 
04248         for (i=0; i<N; i++)
04249         {
04250             fprintf(file, "\t[");
04251 
04252             for (j=0; j<M; j++)
04253             {
04254                 if (j<M-1) {
04255                     if (finite(get_b(i,j)))
04256                         fprintf(file, "%e,",  (double)get_b(i,j));
04257                     else
04258                         fprintf(file, "%f,", NAN_REPLACEMENT);
04259                 }
04260                 else {
04261                     if (finite(get_b(i,j)))
04262                         fprintf(file, "%e];\n", (double)get_b(i,j));
04263                     else
04264                         fprintf(file, "%f];\n", NAN_REPLACEMENT);
04265                 }
04266             }
04267         }
04268         result= (fprintf(file,"  ];\n") == 5);
04269     }
04270 
04271     return result;
04272 }
04273 
04274 #ifndef NOVIT
04275 T_STATES* CHMM::get_path(int32_t dim, float64_t& prob)
04276 {
04277     T_STATES* result = NULL;
04278 
04279     prob = best_path(dim);
04280     result = new T_STATES[p_observations->get_vector_length(dim)];
04281 
04282     for (int32_t i=0; i<p_observations->get_vector_length(dim); i++)
04283         result[i]=PATH(dim)[i];
04284 
04285     return result;
04286 }
04287 
04288 bool CHMM::save_path(FILE* file)
04289 {
04290     bool result=false;
04291 
04292     if (file)
04293     {
04294       for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
04295         {
04296           if (dim%100==0)
04297         SG_PRINT( "%i..", dim) ;
04298           float64_t prob = best_path(dim);
04299           fprintf(file,"%i. path probability:%e\nstate sequence:\n", dim, prob);
04300           for (int32_t i=0; i<p_observations->get_vector_length(dim)-1; i++)
04301         fprintf(file,"%d ", PATH(dim)[i]);
04302           fprintf(file,"%d", PATH(dim)[p_observations->get_vector_length(dim)-1]);
04303           fprintf(file,"\n\n") ;
04304         }
04305       SG_DONE();
04306       result=true;
04307     }
04308 
04309     return result;
04310 }
04311 #endif // NOVIT
04312 
04313 bool CHMM::save_likelihood_bin(FILE* file)
04314 {
04315     bool result=false;
04316 
04317     if (file)
04318     {
04319         for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
04320         {
04321             float32_t prob= (float32_t) model_probability(dim);
04322             fwrite(&prob, sizeof(float32_t), 1, file);
04323         }
04324         result=true;
04325     }
04326 
04327     return result;
04328 }
04329 
04330 bool CHMM::save_likelihood(FILE* file)
04331 {
04332     bool result=false;
04333 
04334     if (file)
04335     {
04336         fprintf(file, "%% likelihood of model per observation\n%% P[O|model]=[ P[O|model]_1 P[O|model]_2 ... P[O|model]_dim ]\n%%\n");
04337 
04338         fprintf(file, "P=[");
04339         for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
04340             fprintf(file, "%e ", (double) model_probability(dim));
04341 
04342         fprintf(file,"];");
04343         result=true;
04344     }
04345 
04346     return result;
04347 }
04348 
04349 #define FLOATWRITE(file, value) { float32_t rrr=float32_t(value); fwrite(&rrr, sizeof(float32_t), 1, file); num_floats++;}
04350 
04351 bool CHMM::save_model_bin(FILE* file)
04352 {
04353     int32_t i,j,q, num_floats=0 ;
04354     if (!model)
04355     {
04356         if (file)
04357         {
04358             // write id
04359             FLOATWRITE(file, (float32_t)CMath::INFTY);    
04360             FLOATWRITE(file, (float32_t) 1);      
04361 
04362             //derivates log(dp),log(dq)
04363             for (i=0; i<N; i++)
04364                 FLOATWRITE(file, get_p(i));   
04365             SG_INFO( "wrote %i parameters for p\n",N) ;
04366 
04367             for (i=0; i<N; i++)
04368                 FLOATWRITE(file, get_q(i)) ;   
04369             SG_INFO( "wrote %i parameters for q\n",N) ;
04370 
04371             //derivates log(da),log(db)
04372             for (i=0; i<N; i++)
04373                 for (j=0; j<N; j++)
04374                     FLOATWRITE(file, get_a(i,j));
04375             SG_INFO( "wrote %i parameters for a\n",N*N) ;
04376 
04377             for (i=0; i<N; i++)
04378                 for (j=0; j<M; j++)
04379                     FLOATWRITE(file, get_b(i,j));
04380             SG_INFO( "wrote %i parameters for b\n",N*M) ;
04381 
04382             // write id
04383             FLOATWRITE(file, (float32_t)CMath::INFTY);    
04384             FLOATWRITE(file, (float32_t) 3);      
04385 
04386             // write number of parameters
04387             FLOATWRITE(file, (float32_t) N);      
04388             FLOATWRITE(file, (float32_t) N);      
04389             FLOATWRITE(file, (float32_t) N*N);    
04390             FLOATWRITE(file, (float32_t) N*M);    
04391             FLOATWRITE(file, (float32_t) N);      
04392             FLOATWRITE(file, (float32_t) M);      
04393         } ;
04394     } 
04395     else
04396     {
04397         if (file)
04398         {
04399             int32_t num_p, num_q, num_a, num_b ;
04400             // write id
04401             FLOATWRITE(file, (float32_t)CMath::INFTY);    
04402             FLOATWRITE(file, (float32_t) 2);      
04403 
04404             for (i=0; model->get_learn_p(i)>=0; i++)
04405                 FLOATWRITE(file, get_p(model->get_learn_p(i)));   
04406             num_p=i ;
04407             SG_INFO( "wrote %i parameters for p\n",num_p) ;
04408 
04409             for (i=0; model->get_learn_q(i)>=0; i++)
04410                 FLOATWRITE(file, get_q(model->get_learn_q(i)));    
04411             num_q=i ;
04412             SG_INFO( "wrote %i parameters for q\n",num_q) ;
04413 
04414             //derivates log(da),log(db)
04415             for (q=0; model->get_learn_a(q,1)>=0; q++)
04416             {
04417                 i=model->get_learn_a(q,0) ;
04418                 j=model->get_learn_a(q,1) ;
04419                 FLOATWRITE(file, (float32_t)i);
04420                 FLOATWRITE(file, (float32_t)j);
04421                 FLOATWRITE(file, get_a(i,j));
04422             } ;
04423             num_a=q ;
04424             SG_INFO( "wrote %i parameters for a\n",num_a) ;       
04425 
04426             for (q=0; model->get_learn_b(q,0)>=0; q++)
04427             {
04428                 i=model->get_learn_b(q,0) ;
04429                 j=model->get_learn_b(q,1) ;
04430                 FLOATWRITE(file, (float32_t)i);
04431                 FLOATWRITE(file, (float32_t)j);
04432                 FLOATWRITE(file, get_b(i,j));
04433             } ;
04434             num_b=q ;
04435             SG_INFO( "wrote %i parameters for b\n",num_b) ;
04436 
04437             // write id
04438             FLOATWRITE(file, (float32_t)CMath::INFTY);    
04439             FLOATWRITE(file, (float32_t) 3);      
04440 
04441             // write number of parameters
04442             FLOATWRITE(file, (float32_t) num_p);      
04443             FLOATWRITE(file, (float32_t) num_q);      
04444             FLOATWRITE(file, (float32_t) num_a);      
04445             FLOATWRITE(file, (float32_t) num_b);      
04446             FLOATWRITE(file, (float32_t) N);      
04447             FLOATWRITE(file, (float32_t) M);      
04448         } ;
04449     } ;
04450     return true ;
04451 }
04452 
04453 #ifndef NOVIT
04454 bool CHMM::save_path_derivatives(FILE* logfile)
04455 {
04456     int32_t dim,i,j;
04457     float64_t prob;
04458 
04459     if (logfile)
04460     {
04461         fprintf(logfile,"%% lambda denotes the model\n%% O denotes the observation sequence\n%% Q denotes the path\n%% \n%% calculating derivatives of P[O,Q|lambda]=p_{Q1}b_{Q1}(O_1}*a_{Q1}{Q2}b_{Q2}(O2)*...*q_{T-1}{T}b_{QT}(O_T}q_{Q_T} against p,q,a,b\n%%\n");
04462         fprintf(logfile,"%% dPr[...]=[ [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_NN]\n");
04463         fprintf(logfile,"%%            [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_NN]\n");
04464         fprintf(logfile,"%%                            .............................                                \n");
04465         fprintf(logfile,"%%            [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_MM]\n");
04466         fprintf(logfile,"%%          ];\n%%\n\ndPr(log()) = [\n");
04467     }
04468     else
04469         return false ;
04470 
04471     for (dim=0; dim<p_observations->get_num_vectors(); dim++)
04472     {   
04473         prob=best_path(dim);
04474 
04475         fprintf(logfile, "[ ");
04476 
04477         //derivates dlogp,dlogq
04478         for (i=0; i<N; i++)
04479             fprintf(logfile,"%e, ", (double) path_derivative_p(i,dim) );
04480 
04481         for (i=0; i<N; i++)
04482             fprintf(logfile,"%e, ", (double) path_derivative_q(i,dim) );
04483 
04484         //derivates dloga,dlogb
04485         for (i=0; i<N; i++)
04486             for (j=0; j<N; j++)
04487                 fprintf(logfile, "%e,", (double) path_derivative_a(i,j,dim) );
04488 
04489         for (i=0; i<N; i++)
04490             for (j=0; j<M; j++)
04491                 fprintf(logfile, "%e,", (double) path_derivative_b(i,j,dim) );
04492 
04493         fseek(logfile,ftell(logfile)-1,SEEK_SET);
04494         fprintf(logfile, " ];\n");
04495     }
04496 
04497     fprintf(logfile, "];");
04498 
04499     return true ;
04500 }
04501 
04502 bool CHMM::save_path_derivatives_bin(FILE* logfile)
04503 {
04504     bool result=false;
04505     int32_t dim,i,j,q;
04506     float64_t prob=0 ;
04507     int32_t num_floats=0 ;
04508 
04509     float64_t sum_prob=0.0 ;
04510     if (!model)
04511         SG_WARNING( "No definitions loaded -- writing derivatives of all weights\n") ;
04512     else
04513         SG_INFO( "writing derivatives of changed weights only\n") ;
04514 
04515     for (dim=0; dim<p_observations->get_num_vectors(); dim++)
04516     {             
04517         if (dim%100==0)
04518         {
04519             SG_PRINT( ".") ; 
04520 
04521         } ;
04522 
04523         prob=best_path(dim);
04524         sum_prob+=prob ;
04525 
04526         if (!model)
04527         {
04528             if (logfile)
04529             {
04530                 // write prob
04531                 FLOATWRITE(logfile, prob);    
04532 
04533                 for (i=0; i<N; i++)
04534                     FLOATWRITE(logfile, path_derivative_p(i,dim));
04535 
04536                 for (i=0; i<N; i++)
04537                     FLOATWRITE(logfile, path_derivative_q(i,dim));
04538 
04539                 for (i=0; i<N; i++)
04540                     for (j=0; j<N; j++)
04541                         FLOATWRITE(logfile, path_derivative_a(i,j,dim));
04542 
04543                 for (i=0; i<N; i++)
04544                     for (j=0; j<M; j++)
04545                         FLOATWRITE(logfile, path_derivative_b(i,j,dim));
04546 
04547             }
04548         } 
04549         else
04550         {
04551             if (logfile)
04552             {
04553                 // write prob
04554                 FLOATWRITE(logfile, prob);    
04555 
04556                 for (i=0; model->get_learn_p(i)>=0; i++)
04557                     FLOATWRITE(logfile, path_derivative_p(model->get_learn_p(i),dim));
04558 
04559                 for (i=0; model->get_learn_q(i)>=0; i++)
04560                     FLOATWRITE(logfile, path_derivative_q(model->get_learn_q(i),dim));
04561 
04562                 for (q=0; model->get_learn_a(q,0)>=0; q++)
04563                 {
04564                     i=model->get_learn_a(q,0) ;
04565                     j=model->get_learn_a(q,1) ;
04566                     FLOATWRITE(logfile, path_derivative_a(i,j,dim));
04567                 } ;
04568 
04569                 for (q=0; model->get_learn_b(q,0)>=0; q++)
04570                 {
04571                     i=model->get_learn_b(q,0) ;
04572                     j=model->get_learn_b(q,1) ;
04573                     FLOATWRITE(logfile, path_derivative_b(i,j,dim));
04574                 } ;
04575             }
04576         } ;      
04577     }
04578     save_model_bin(logfile) ;
04579 
04580     result=true;
04581     SG_PRINT( "\n") ;
04582     return result;
04583 }
04584 #endif // NOVIT
04585 
04586 bool CHMM::save_model_derivatives_bin(FILE* file)
04587 {
04588     bool result=false;
04589     int32_t dim,i,j,q ;
04590     int32_t num_floats=0 ;
04591 
04592     if (!model)
04593         SG_WARNING( "No definitions loaded -- writing derivatives of all weights\n") ;
04594     else
04595         SG_INFO( "writing derivatives of changed weights only\n") ;
04596 
04597 #ifdef USE_HMMPARALLEL
04598     pthread_t *threads=new pthread_t[parallel.get_num_threads()] ;
04599     S_THREAD_PARAM *params=new S_THREAD_PARAM[parallel.get_num_threads()] ;
04600 #endif
04601 
04602     for (dim=0; dim<p_observations->get_num_vectors(); dim++)
04603     {             
04604         if (dim%20==0)
04605         {
04606             SG_PRINT( ".") ; 
04607 
04608         } ;
04609 
04610 #ifdef USE_HMMPARALLEL
04611         if (dim%parallel.get_num_threads()==0)
04612         {
04613             int32_t i ;
04614             for (i=0; i<parallel.get_num_threads(); i++)
04615                 if (dim+i<p_observations->get_num_vectors())
04616                 {
04617                     params[i].hmm=this ;
04618                     params[i].dim=dim+i ;
04619 #ifdef SUNOS
04620                     thr_create(NULL,0,bw_dim_prefetch, (void*)&params[i], PTHREAD_SCOPE_SYSTEM, &threads[i]) ;
04621 #else // SUNOS
04622                     pthread_create(&threads[i], NULL, bw_dim_prefetch, (void*)&params[i]) ;
04623 #endif // SUNOS
04624                 } ;
04625             for (i=0; i<parallel.get_num_threads(); i++)
04626                 if (dim+i<p_observations->get_num_vectors())
04627                 {
04628                     void * ret ;
04629                     pthread_join(threads[i], &ret) ;
04630                 } ;
04631         } ;
04632 #endif
04633 
04634         float64_t prob=model_probability(dim) ;
04635         if (!model)
04636         {
04637             if (file)
04638             {
04639                 // write prob
04640                 FLOATWRITE(file, prob);   
04641 
04642                 //derivates log(dp),log(dq)
04643                 for (i=0; i<N; i++)
04644                     FLOATWRITE(file, model_derivative_p(i,dim));      
04645 
04646                 for (i=0; i<N; i++)
04647                     FLOATWRITE(file, model_derivative_q(i,dim));    
04648 
04649                 //derivates log(da),log(db)
04650                 for (i=0; i<N; i++)
04651                     for (j=0; j<N; j++)
04652                         FLOATWRITE(file, model_derivative_a(i,j,dim));
04653 
04654                 for (i=0; i<N; i++)
04655                     for (j=0; j<M; j++)
04656                         FLOATWRITE(file, model_derivative_b(i,j,dim));
04657 
04658                 if (dim==0)
04659                     SG_INFO("Number of parameters (including posterior prob.): %i\n", num_floats) ;
04660             } ;
04661         } 
04662         else
04663         {
04664             if (file)
04665             {
04666                 // write prob
04667                 FLOATWRITE(file, prob);   
04668 
04669                 for (i=0; model->get_learn_p(i)>=0; i++)
04670                     FLOATWRITE(file, model_derivative_p(model->get_learn_p(i),dim));      
04671 
04672                 for (i=0; model->get_learn_q(i)>=0; i++)
04673                     FLOATWRITE(file, model_derivative_q(model->get_learn_q(i),dim));    
04674 
04675                 //derivates log(da),log(db)
04676                 for (q=0; model->get_learn_a(q,1)>=0; q++)
04677                 {
04678                     i=model->get_learn_a(q,0) ;
04679                     j=model->get_learn_a(q,1) ;
04680                     FLOATWRITE(file, model_derivative_a(i,j,dim));
04681                 } ;
04682 
04683                 for (q=0; model->get_learn_b(q,0)>=0; q++)
04684                 {
04685                     i=model->get_learn_b(q,0) ;
04686                     j=model->get_learn_b(q,1) ;
04687                     FLOATWRITE(file, model_derivative_b(i,j,dim));
04688                 } ;
04689                 if (dim==0)
04690                     SG_INFO("Number of parameters (including posterior prob.): %i\n", num_floats) ;
04691             } ;
04692         } ;
04693     }
04694     save_model_bin(file) ;
04695 
04696 #ifdef USE_HMMPARALLEL
04697     delete[] threads ;
04698     delete[] params ;
04699 #endif
04700 
04701     result=true;
04702     SG_PRINT( "\n") ;
04703     return result;
04704 }
04705 
04706 bool CHMM::save_model_derivatives(FILE* file)
04707 {
04708     bool result=false;
04709     int32_t dim,i,j;
04710 
04711     if (file)
04712     {
04713 
04714         fprintf(file,"%% lambda denotes the model\n%% O denotes the observation sequence\n%% Q denotes the path\n%%\n%% calculating derivatives of P[O|lambda]=sum_{all Q}p_{Q1}b_{Q1}(O_1}*a_{Q1}{Q2}b_{Q2}(O2)*...*q_{T-1}{T}b_{QT}(O_T}q_{Q_T} against p,q,a,b\n%%\n");
04715         fprintf(file,"%% dPr[...]=[ [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_NN]\n");
04716         fprintf(file,"%%            [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_NN]\n");
04717         fprintf(file,"%%                            .............................                                \n");
04718         fprintf(file,"%%            [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_MM]\n");
04719         fprintf(file,"%%          ];\n%%\n\nlog(dPr) = [\n");
04720 
04721 
04722         for (dim=0; dim<p_observations->get_num_vectors(); dim++)
04723         {   
04724             fprintf(file, "[ ");
04725 
04726             //derivates log(dp),log(dq)
04727             for (i=0; i<N; i++)
04728                 fprintf(file,"%e, ", (double) model_derivative_p(i, dim) );     //log (dp)
04729 
04730             for (i=0; i<N; i++)
04731                 fprintf(file,"%e, ", (double) model_derivative_q(i, dim) ); //log (dq)
04732 
04733             //derivates log(da),log(db)
04734             for (i=0; i<N; i++)
04735                 for (j=0; j<N; j++)
04736                     fprintf(file, "%e,", (double) model_derivative_a(i,j,dim) );
04737 
04738             for (i=0; i<N; i++)
04739                 for (j=0; j<M; j++)
04740                     fprintf(file, "%e,", (double) model_derivative_b(i,j,dim) );
04741 
04742             fseek(file,ftell(file)-1,SEEK_SET);
04743             fprintf(file, " ];\n");
04744         }
04745 
04746 
04747         fprintf(file, "];");
04748 
04749         result=true;
04750     }
04751     return result;
04752 }
04753 
04754 bool CHMM::check_model_derivatives_combined()
04755 {
04756     //  bool result=false;
04757     const float64_t delta=5e-4 ;
04758 
04759     int32_t i ;
04760     //derivates log(da)
04761     /*  for (i=0; i<N; i++)
04762         {
04763         for (int32_t j=0; j<N; j++)
04764         {
04765         float64_t old_a=get_a(i,j) ;
04766 
04767         set_a(i,j, log(exp(old_a)-delta)) ;
04768         invalidate_model() ;
04769         float64_t prob_old=exp(model_probability(-1)*p_observations->get_num_vectors()) ;
04770 
04771         set_a(i,j, log(exp(old_a)+delta)) ;
04772         invalidate_model() ; 
04773         float64_t prob_new=exp(model_probability(-1)*p_observations->get_num_vectors());
04774 
04775         float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04776 
04777         set_a(i,j, old_a) ;
04778         invalidate_model() ;
04779 
04780         float64_t prod_prob=model_probability(-1)*p_observations->get_num_vectors() ;
04781 
04782         float64_t deriv_calc=0 ;
04783         for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
04784         deriv_calc+=exp(model_derivative_a(i, j, dim)+
04785         prod_prob-model_probability(dim)) ;
04786 
04787         SG_DEBUG("da(%i,%i) = %e:%e\t (%1.5f%%)\n", i,j, deriv_calc,  deriv, 100.0*(deriv-deriv_calc)/deriv_calc);      
04788         } ;
04789         } ;*/
04790     //derivates log(db)
04791     i=0;//for (i=0; i<N; i++)
04792     {
04793         for (int32_t j=0; j<M; j++)
04794         {
04795             float64_t old_b=get_b(i,j) ;
04796 
04797             set_b(i,j, log(exp(old_b)-delta)) ;
04798             invalidate_model() ;
04799             float64_t prob_old=(model_probability(-1)*p_observations->get_num_vectors()) ;
04800 
04801             set_b(i,j, log(exp(old_b)+delta)) ;
04802             invalidate_model() ; 
04803             float64_t prob_new=(model_probability(-1)*p_observations->get_num_vectors());
04804 
04805             float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04806 
04807             set_b(i,j, old_b) ;
04808             invalidate_model() ;
04809 
04810             float64_t deriv_calc=0 ;
04811             for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
04812             {
04813                 deriv_calc+=exp(model_derivative_b(i, j, dim)-model_probability(dim)) ;
04814                 if (j==1)
04815                     SG_INFO("deriv_calc[%i]=%e\n",dim,deriv_calc) ;
04816             } ;
04817 
04818             SG_ERROR( "b(%i,%i)=%e  db(%i,%i) = %e:%e\t (%1.5f%%)\n", i,j,exp(old_b),i,j, deriv_calc,  deriv, 100.0*(deriv-deriv_calc)/deriv_calc);     
04819         } ;
04820     } ;
04821     return true ;
04822 }
04823 
04824 bool CHMM::check_model_derivatives()
04825 {
04826     bool result=false;
04827     const float64_t delta=3e-4 ;
04828 
04829     for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
04830     {   
04831         int32_t i ;
04832         //derivates log(dp),log(dq)
04833         for (i=0; i<N; i++)
04834         {
04835             for (int32_t j=0; j<N; j++)
04836             {
04837                 float64_t old_a=get_a(i,j) ;
04838 
04839                 set_a(i,j, log(exp(old_a)-delta)) ;
04840                 invalidate_model() ;
04841                 float64_t prob_old=exp(model_probability(dim)) ;
04842 
04843                 set_a(i,j, log(exp(old_a)+delta)) ;
04844                 invalidate_model() ;
04845                 float64_t prob_new=exp(model_probability(dim));
04846 
04847                 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04848 
04849                 set_a(i,j, old_a) ;
04850                 invalidate_model() ;
04851                 float64_t deriv_calc=exp(model_derivative_a(i, j, dim)) ;
04852 
04853                 SG_DEBUG( "da(%i,%i) = %e:%e\t (%1.5f%%)\n", i,j, deriv_calc,  deriv, 100.0*(deriv-deriv_calc)/deriv_calc);     
04854                 invalidate_model() ;
04855             } ;
04856         } ;
04857         for (i=0; i<N; i++)
04858         {
04859             for (int32_t j=0; j<M; j++)
04860             {
04861                 float64_t old_b=get_b(i,j) ;
04862 
04863                 set_b(i,j, log(exp(old_b)-delta)) ;
04864                 invalidate_model() ;
04865                 float64_t prob_old=exp(model_probability(dim)) ;
04866 
04867                 set_b(i,j, log(exp(old_b)+delta)) ;
04868                 invalidate_model() ;            
04869                 float64_t prob_new=exp(model_probability(dim));
04870 
04871                 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04872 
04873                 set_b(i,j, old_b) ;
04874                 invalidate_model() ;
04875                 float64_t deriv_calc=exp(model_derivative_b(i, j, dim));
04876 
04877                 SG_DEBUG( "db(%i,%i) = %e:%e\t (%1.5f%%)\n", i,j, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/(deriv_calc));        
04878             } ;
04879         } ;
04880 
04881 #ifdef TEST
04882         for (i=0; i<N; i++)
04883         {
04884             float64_t old_p=get_p(i) ;
04885 
04886             set_p(i, log(exp(old_p)-delta)) ;
04887             invalidate_model() ;
04888             float64_t prob_old=exp(model_probability(dim)) ;
04889 
04890             set_p(i, log(exp(old_p)+delta)) ;
04891             invalidate_model() ;        
04892             float64_t prob_new=exp(model_probability(dim));
04893             float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04894 
04895             set_p(i, old_p) ;
04896             invalidate_model() ;
04897             float64_t deriv_calc=exp(model_derivative_p(i, dim));
04898 
04899             //if (fabs(deriv_calc_old-deriv)>1e-4)
04900             SG_DEBUG( "dp(%i) = %e:%e\t (%1.5f%%)\n", i, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);       
04901         } ;
04902         for (i=0; i<N; i++)
04903         {
04904             float64_t old_q=get_q(i) ;
04905 
04906             set_q(i, log(exp(old_q)-delta)) ;
04907             invalidate_model() ;
04908             float64_t prob_old=exp(model_probability(dim)) ;
04909 
04910             set_q(i, log(exp(old_q)+delta)) ;
04911             invalidate_model() ;        
04912             float64_t prob_new=exp(model_probability(dim));
04913 
04914             float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04915 
04916             set_q(i, old_q) ;
04917             invalidate_model() ;        
04918             float64_t deriv_calc=exp(model_derivative_q(i, dim)); 
04919 
04920             //if (fabs(deriv_calc_old-deriv)>1e-4)
04921             SG_DEBUG( "dq(%i) = %e:%e\t (%1.5f%%)\n", i, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);       
04922         } ;
04923 #endif
04924     }
04925     return result;
04926 }
04927 
04928 #ifdef USE_HMMDEBUG
04929 bool CHMM::check_path_derivatives()
04930 {
04931     bool result=false;
04932     const float64_t delta=1e-4 ;
04933 
04934     for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
04935     {   
04936         int32_t i ;
04937         //derivates log(dp),log(dq)
04938         for (i=0; i<N; i++)
04939         {
04940             for (int32_t j=0; j<N; j++)
04941             {
04942                 float64_t old_a=get_a(i,j) ;
04943 
04944                 set_a(i,j, log(exp(old_a)-delta)) ;
04945                 invalidate_model() ;
04946                 float64_t prob_old=best_path(dim) ;
04947 
04948                 set_a(i,j, log(exp(old_a)+delta)) ;
04949                 invalidate_model() ;
04950                 float64_t prob_new=best_path(dim);
04951 
04952                 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04953 
04954                 set_a(i,j, old_a) ;
04955                 invalidate_model() ;
04956                 float64_t deriv_calc=path_derivative_a(i, j, dim) ;
04957 
04958                 SG_DEBUG( "da(%i,%i) = %e:%e\t (%1.5f%%)\n", i,j, deriv_calc,  deriv, 100.0*(deriv-deriv_calc)/deriv_calc);     
04959             } ;
04960         } ;
04961         for (i=0; i<N; i++)
04962         {
04963             for (int32_t j=0; j<M; j++)
04964             {
04965                 float64_t old_b=get_b(i,j) ;
04966 
04967                 set_b(i,j, log(exp(old_b)-delta)) ;
04968                 invalidate_model() ;
04969                 float64_t prob_old=best_path(dim) ;
04970 
04971                 set_b(i,j, log(exp(old_b)+delta)) ;
04972                 invalidate_model() ;            
04973                 float64_t prob_new=best_path(dim);
04974 
04975                 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04976 
04977                 set_b(i,j, old_b) ;
04978                 invalidate_model() ;
04979                 float64_t deriv_calc=path_derivative_b(i, j, dim);
04980 
04981                 SG_DEBUG( "db(%i,%i) = %e:%e\t (%1.5f%%)\n", i,j, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/(deriv_calc));        
04982             } ;
04983         } ;
04984 
04985         for (i=0; i<N; i++)
04986         {
04987             float64_t old_p=get_p(i) ;
04988 
04989             set_p(i, log(exp(old_p)-delta)) ;
04990             invalidate_model() ;
04991             float64_t prob_old=best_path(dim) ;
04992 
04993             set_p(i, log(exp(old_p)+delta)) ;
04994             invalidate_model() ;        
04995             float64_t prob_new=best_path(dim);
04996             float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04997 
04998             set_p(i, old_p) ;
04999             invalidate_model() ;
05000             float64_t deriv_calc=path_derivative_p(i, dim);
05001 
05002             //if (fabs(deriv_calc_old-deriv)>1e-4)
05003             SG_DEBUG( "dp(%i) = %e:%e\t (%1.5f%%)\n", i, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);       
05004         } ;
05005         for (i=0; i<N; i++)
05006         {
05007             float64_t old_q=get_q(i) ;
05008 
05009             set_q(i, log(exp(old_q)-delta)) ;
05010             invalidate_model() ;
05011             float64_t prob_old=best_path(dim) ;
05012 
05013             set_q(i, log(exp(old_q)+delta)) ;
05014             invalidate_model() ;        
05015             float64_t prob_new=best_path(dim);
05016 
05017             float64_t deriv = (prob_new-prob_old)/(2*delta) ;
05018 
05019             set_q(i, old_q) ;
05020             invalidate_model() ;        
05021             float64_t deriv_calc=path_derivative_q(i, dim); 
05022 
05023             //if (fabs(deriv_calc_old-deriv)>1e-4)
05024             SG_DEBUG( "dq(%i) = %e:%e\t (%1.5f%%)\n", i, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);       
05025         } ;
05026     }
05027     return result;
05028 }
05029 #endif // USE_HMMDEBUG 
05030 
05031 //normalize model (sum to one constraint)
05032 void CHMM::normalize(bool keep_dead_states)
05033 {
05034     int32_t i,j;
05035     const float64_t INF=-1e10;
05036     float64_t sum_p =INF;
05037 
05038     for (i=0; i<N; i++)
05039     {
05040         sum_p=CMath::logarithmic_sum(sum_p, get_p(i));
05041 
05042         float64_t sum_b =INF;
05043         float64_t sum_a =get_q(i);
05044 
05045         for (j=0; j<N; j++)
05046             sum_a=CMath::logarithmic_sum(sum_a, get_a(i,j));
05047 
05048         if (sum_a>CMath::ALMOST_NEG_INFTY/N || (!keep_dead_states) )
05049         {
05050             for (j=0; j<N; j++)
05051                 set_a(i,j, get_a(i,j)-sum_a);
05052             set_q(i, get_q(i)-sum_a);
05053         }
05054 
05055         for (j=0; j<M; j++)
05056             sum_b=CMath::logarithmic_sum(sum_b, get_b(i,j));
05057         for (j=0; j<M; j++)
05058             set_b(i,j, get_b(i,j)-sum_b);
05059     }
05060 
05061     for (i=0; i<N; i++)
05062         set_p(i, get_p(i)-sum_p);
05063 
05064     invalidate_model();
05065 }
05066 
05067 bool CHMM::append_model(CHMM* app_model)
05068 {
05069     bool result=false;
05070     const int32_t num_states=app_model->get_N();
05071     int32_t i,j;
05072 
05073     SG_DEBUG( "cur N:%d M:%d\n", N, M);
05074     SG_DEBUG( "old N:%d M:%d\n", app_model->get_N(), app_model->get_M());
05075     if (app_model->get_M() == get_M())
05076     {
05077         float64_t* n_p=new float64_t[N+num_states];
05078         float64_t* n_q=new float64_t[N+num_states];
05079         float64_t* n_a=new float64_t[(N+num_states)*(N+num_states)];
05080         //SG_PRINT("size n_b: %d\n", (N+num_states)*M);
05081         float64_t* n_b=new float64_t[(N+num_states)*M];
05082 
05083         //clear n_x 
05084         for (i=0; i<N+num_states; i++)
05085         {
05086             n_p[i]=-CMath::INFTY;
05087             n_q[i]=-CMath::INFTY;
05088 
05089             for (j=0; j<N+num_states; j++)
05090                 n_a[(N+num_states)*i+j]=-CMath::INFTY;
05091 
05092             for (j=0; j<M; j++)
05093                 n_b[M*i+j]=-CMath::INFTY;
05094         }
05095 
05096         //copy models first
05097         // warning pay attention to the ordering of 
05098         // transition_matrix_a, observation_matrix_b !!!
05099 
05100         // cur_model
05101         for (i=0; i<N; i++)
05102         {
05103             n_p[i]=get_p(i);
05104 
05105             for (j=0; j<N; j++)
05106                 n_a[(N+num_states)*j+i]=get_a(i,j);
05107 
05108             for (j=0; j<M; j++)
05109             {
05110                 n_b[M*i+j]=get_b(i,j);
05111             }
05112         }
05113 
05114         // append_model
05115         for (i=0; i<app_model->get_N(); i++)
05116         {
05117             n_q[i+N]=app_model->get_q(i);
05118 
05119             for (j=0; j<app_model->get_N(); j++)
05120                 n_a[(N+num_states)*(j+N)+(i+N)]=app_model->get_a(i,j);
05121             for (j=0; j<app_model->get_M(); j++)
05122                 n_b[M*(i+N)+j]=app_model->get_b(i,j);
05123         }
05124 
05125 
05126         // transition to the two and back
05127         for (i=0; i<N; i++)
05128         {
05129             for (j=N; j<N+num_states; j++)
05130                 n_a[(N+num_states)*j + i]=CMath::logarithmic_sum(get_q(i)+app_model->get_p(j-N), n_a[(N+num_states)*j + i]);
05131         }
05132 
05133         free_state_dependend_arrays();
05134         N+=num_states;
05135 
05136         alloc_state_dependend_arrays();
05137 
05138         //delete + adjust pointers
05139         delete[] initial_state_distribution_p;
05140         delete[] end_state_distribution_q;
05141         delete[] transition_matrix_a;
05142         delete[] observation_matrix_b;
05143 
05144         transition_matrix_a=n_a;
05145         observation_matrix_b=n_b;
05146         initial_state_distribution_p=n_p;
05147         end_state_distribution_q=n_q;
05148 
05149         SG_WARNING( "not normalizing anymore, call normalize_hmm to make sure the hmm is valid!!\n");
05151         invalidate_model();
05152     }
05153     else
05154         SG_ERROR( "number of observations is different for append model, doing nothing!\n");
05155 
05156     return result;
05157 }
05158 
05159 bool CHMM::append_model(CHMM* app_model, float64_t* cur_out, float64_t* app_out)
05160 {
05161     bool result=false;
05162     const int32_t num_states=app_model->get_N()+2;
05163     int32_t i,j;
05164 
05165     if (app_model->get_M() == get_M())
05166     {
05167         float64_t* n_p=new float64_t[N+num_states];
05168         float64_t* n_q=new float64_t[N+num_states];
05169         float64_t* n_a=new float64_t[(N+num_states)*(N+num_states)];
05170         //SG_PRINT("size n_b: %d\n", (N+num_states)*M);
05171         float64_t* n_b=new float64_t[(N+num_states)*M];
05172 
05173         //clear n_x 
05174         for (i=0; i<N+num_states; i++)
05175         {
05176             n_p[i]=-CMath::INFTY;
05177             n_q[i]=-CMath::INFTY;
05178 
05179             for (j=0; j<N+num_states; j++)
05180                 n_a[(N+num_states)*j+i]=-CMath::INFTY;
05181 
05182             for (j=0; j<M; j++)
05183                 n_b[M*i+j]=-CMath::INFTY;
05184         }
05185 
05186         //copy models first
05187         // warning pay attention to the ordering of 
05188         // transition_matrix_a, observation_matrix_b !!!
05189 
05190         // cur_model
05191         for (i=0; i<N; i++)
05192         {
05193             n_p[i]=get_p(i);
05194 
05195             for (j=0; j<N; j++)
05196                 n_a[(N+num_states)*j+i]=get_a(i,j);
05197 
05198             for (j=0; j<M; j++)
05199             {
05200                 n_b[M*i+j]=get_b(i,j);
05201             }
05202         }
05203 
05204         // append_model
05205         for (i=0; i<app_model->get_N(); i++)
05206         {
05207             n_q[i+N+2]=app_model->get_q(i);
05208 
05209             for (j=0; j<app_model->get_N(); j++)
05210                 n_a[(N+num_states)*(j+N+2)+(i+N+2)]=app_model->get_a(i,j);
05211             for (j=0; j<app_model->get_M(); j++)
05212                 n_b[M*(i+N+2)+j]=app_model->get_b(i,j);
05213         }
05214 
05215         //initialize the two special states
05216 
05217         // output
05218         for (i=0; i<M; i++)
05219         {
05220             n_b[M*N+i]=cur_out[i];
05221             n_b[M*(N+1)+i]=app_out[i];
05222         }
05223 
05224         // transition to the two and back
05225         for (i=0; i<N+num_states; i++)
05226         {
05227             // the first state is only connected to the second
05228             if (i==N+1)
05229                 n_a[(N+num_states)*i + N]=0;
05230 
05231             // only states of the cur_model can reach the
05232             // first state 
05233             if (i<N)
05234                 n_a[(N+num_states)*N+i]=get_q(i);
05235 
05236             // the second state is only connected to states of
05237             // the append_model (with probab app->p(i))
05238             if (i>=N+2)
05239                 n_a[(N+num_states)*i+(N+1)]=app_model->get_p(i-(N+2));
05240         }
05241 
05242         free_state_dependend_arrays();
05243         N+=num_states;
05244 
05245         alloc_state_dependend_arrays();
05246 
05247         //delete + adjust pointers
05248         delete[] initial_state_distribution_p;
05249         delete[] end_state_distribution_q;
05250         delete[] transition_matrix_a;
05251         delete[] observation_matrix_b;
05252 
05253         transition_matrix_a=n_a;
05254         observation_matrix_b=n_b;
05255         initial_state_distribution_p=n_p;
05256         end_state_distribution_q=n_q;
05257 
05258         SG_WARNING( "not normalizing anymore, call normalize_hmm to make sure the hmm is valid!!\n");
05260         invalidate_model();
05261     }
05262 
05263     return result;
05264 }
05265 
05266 
05267 void CHMM::add_states(int32_t num_states, float64_t default_value)
05268 {
05269     int32_t i,j;
05270     const float64_t MIN_RAND=1e-2; //this is the range of the random values for the new variables
05271     const float64_t MAX_RAND=2e-1;
05272 
05273     float64_t* n_p=new float64_t[N+num_states];
05274     float64_t* n_q=new float64_t[N+num_states];
05275     float64_t* n_a=new float64_t[(N+num_states)*(N+num_states)];
05276     //SG_PRINT("size n_b: %d\n", (N+num_states)*M);
05277     float64_t* n_b=new float64_t[(N+num_states)*M];
05278 
05279     // warning pay attention to the ordering of 
05280     // transition_matrix_a, observation_matrix_b !!!
05281     for (i=0; i<N; i++)
05282     {
05283         n_p[i]=get_p(i);
05284         n_q[i]=get_q(i);
05285 
05286         for (j=0; j<N; j++)
05287             n_a[(N+num_states)*j+i]=get_a(i,j);
05288 
05289         for (j=0; j<M; j++)
05290             n_b[M*i+j]=get_b(i,j);
05291     }
05292 
05293     for (i=N; i<N+num_states; i++)
05294     {
05295         n_p[i]=VAL_MACRO;
05296         n_q[i]=VAL_MACRO;
05297 
05298         for (j=0; j<N; j++)
05299             n_a[(N+num_states)*i+j]=VAL_MACRO;
05300 
05301         for (j=0; j<N+num_states; j++)
05302             n_a[(N+num_states)*j+i]=VAL_MACRO;
05303 
05304         for (j=0; j<M; j++)
05305             n_b[M*i+j]=VAL_MACRO;
05306     }
05307     free_state_dependend_arrays();
05308     N+=num_states;
05309 
05310     alloc_state_dependend_arrays();
05311 
05312     //delete + adjust pointers
05313     delete[] initial_state_distribution_p;
05314     delete[] end_state_distribution_q;
05315     delete[] transition_matrix_a;
05316     delete[] observation_matrix_b;
05317 
05318     transition_matrix_a=n_a;
05319     observation_matrix_b=n_b;
05320     initial_state_distribution_p=n_p;
05321     end_state_distribution_q=n_q;
05322 
05323     invalidate_model();
05324     normalize();
05325 }
05326 
05327 void CHMM::chop(float64_t value)
05328 {
05329     for (int32_t i=0; i<N; i++)
05330     {
05331         int32_t j;
05332 
05333         if (exp(get_p(i)) < value)
05334             set_p(i, CMath::ALMOST_NEG_INFTY);
05335 
05336         if (exp(get_q(i)) < value)
05337             set_q(i, CMath::ALMOST_NEG_INFTY);
05338 
05339         for (j=0; j<N; j++)
05340         {
05341             if (exp(get_a(i,j)) < value)
05342                 set_a(i,j, CMath::ALMOST_NEG_INFTY);
05343         }
05344 
05345         for (j=0; j<M; j++)
05346         {
05347             if (exp(get_b(i,j)) < value)
05348                 set_b(i,j, CMath::ALMOST_NEG_INFTY);
05349         }
05350     }
05351     normalize();
05352     invalidate_model();
05353 }
05354 
05355 bool CHMM::linear_train(bool right_align)
05356 {
05357     if (p_observations)
05358     {
05359         int32_t histsize=(get_M()*get_N());
05360         int32_t* hist=new int32_t[histsize];
05361         int32_t* startendhist=new int32_t[get_N()];
05362         int32_t i,dim;
05363 
05364         ASSERT(p_observations->get_max_vector_length()<=get_N());
05365 
05366         for (i=0; i<histsize; i++)
05367             hist[i]=0;
05368 
05369         for (i=0; i<get_N(); i++)
05370             startendhist[i]=0;
05371 
05372         if (right_align)
05373         {
05374             for (dim=0; dim<p_observations->get_num_vectors(); dim++)
05375             {
05376                 int32_t len=0;
05377                 uint16_t* obs=p_observations->get_feature_vector(dim, len);
05378 
05379                 ASSERT(len<=get_N());
05380                 startendhist[(get_N()-len)]++;
05381 
05382                 for (i=0;i<len;i++)
05383                     hist[(get_N()-len+i)*get_M() + *obs++]++;
05384             }
05385 
05386             set_q(get_N()-1, 1);
05387             for (i=0; i<get_N()-1; i++)
05388                 set_q(i, 0);
05389 
05390             for (i=0; i<get_N(); i++)
05391                 set_p(i, startendhist[i]+PSEUDO);
05392 
05393             for (i=0;i<get_N();i++)
05394             {
05395                 for (int32_t j=0; j<get_N(); j++)
05396                 {
05397                     if (i==j-1)
05398                         set_a(i,j, 1);
05399                     else
05400                         set_a(i,j, 0);
05401                 }
05402             }
05403         }
05404         else
05405         {
05406             for (dim=0; dim<p_observations->get_num_vectors(); dim++)
05407             {
05408                 int32_t len=0;
05409                 uint16_t* obs=p_observations->get_feature_vector(dim, len);
05410 
05411                 ASSERT(len<=get_N());
05412                 for (i=0;i<len;i++)
05413                     hist[i*get_M() + *obs++]++;
05414                 
05415                 startendhist[len-1]++;
05416             }
05417 
05418             set_p(0, 1);
05419             for (i=1; i<get_N(); i++)
05420                 set_p(i, 0);
05421 
05422             for (i=0; i<get_N(); i++)
05423                 set_q(i, startendhist[i]+PSEUDO);
05424 
05425             int32_t total=p_observations->get_num_vectors();
05426 
05427             for (i=0;i<get_N();i++)
05428             {
05429                 total-= startendhist[i] ;
05430 
05431                 for (int32_t j=0; j<get_N(); j++)
05432                 {
05433                     if (i==j-1)
05434                         set_a(i,j, total+PSEUDO);
05435                     else
05436                         set_a(i,j, 0);
05437                 }
05438             }
05439             ASSERT(total==0);
05440         }
05441 
05442         for (i=0;i<get_N();i++)
05443         {
05444             for (int32_t j=0; j<get_M(); j++)
05445             {
05446                 float64_t sum=0;
05447                 int32_t offs=i*get_M()+ p_observations->get_masked_symbols((uint16_t) j, (uint8_t) 254);
05448 
05449                 for (int32_t k=0; k<p_observations->get_original_num_symbols(); k++)
05450                     sum+=hist[offs+k];
05451 
05452                 set_b(i,j, (PSEUDO+hist[i*get_M()+j])/(sum+PSEUDO*p_observations->get_original_num_symbols()));
05453             }
05454         }
05455 
05456         delete[] hist;
05457         delete[] startendhist;
05458         convert_to_log();
05459         invalidate_model();
05460         return true;
05461     }
05462     else
05463         return false;
05464 }
05465 
05466 void CHMM::set_observation_nocache(CStringFeatures<uint16_t>* obs)
05467 {
05468     ASSERT(obs);
05469     p_observations=obs;
05470     SG_REF(obs);
05471 
05472     if (obs)
05473         if (obs->get_num_symbols() > M)
05474             SG_ERROR( "number of symbols in observation (%d) larger than M (%d)\n", obs->get_num_symbols(), M);
05475 
05476     if (!reused_caches)
05477     {
05478 #ifdef USE_HMMPARALLEL_STRUCTURES
05479         for (int32_t i=0; i<parallel.get_num_threads(); i++) 
05480         {
05481             delete[] alpha_cache[i].table;
05482             delete[] beta_cache[i].table;
05483             delete[] states_per_observation_psi[i];
05484             delete[] path[i];
05485 
05486             alpha_cache[i].table=NULL;
05487             beta_cache[i].table=NULL;
05488 #ifndef NOVIT
05489             states_per_observation_psi[i]=NULL;
05490 #endif // NOVIT
05491             path[i]=NULL;
05492         } ;
05493 #else
05494         delete[] alpha_cache.table;
05495         delete[] beta_cache.table;
05496         delete[] states_per_observation_psi;
05497         delete[] path;
05498 
05499         alpha_cache.table=NULL;
05500         beta_cache.table=NULL;
05501         states_per_observation_psi=NULL;
05502         path=NULL;
05503 
05504 #endif //USE_HMMPARALLEL_STRUCTURES
05505     }
05506 
05507     invalidate_model();
05508 }
05509 
05510 void CHMM::set_observations(CStringFeatures<uint16_t>* obs, CHMM* lambda)
05511 {
05512     ASSERT(obs);
05513     p_observations=obs;
05514     SG_REF(obs);
05515     /* from Distribution, necessary for calls to base class methods, like
05516      * get_log_likelihood_sample():
05517      */
05518     features=obs;
05519 
05520     SG_DEBUG("num symbols alphabet: %ld\n", obs->get_alphabet()->get_num_symbols());
05521     SG_DEBUG("num symbols: %ld\n", obs->get_num_symbols());
05522     SG_DEBUG("M: %d\n", M);
05523 
05524     if (obs)
05525     {
05526         if (obs->get_num_symbols() > M)
05527         {
05528             SG_ERROR( "number of symbols in observation (%d) larger than M (%d)\n", obs->get_num_symbols(), M);
05529         }
05530     }
05531 
05532     if (!reused_caches)
05533     {
05534 #ifdef USE_HMMPARALLEL_STRUCTURES
05535         for (int32_t i=0; i<parallel.get_num_threads(); i++) 
05536         {
05537             delete[] alpha_cache[i].table;
05538             delete[] beta_cache[i].table;
05539 #ifndef NOVIT
05540             delete[] states_per_observation_psi[i];
05541 #endif // NOVIT
05542             delete[] path[i];
05543 
05544             alpha_cache[i].table=NULL;
05545             beta_cache[i].table=NULL;
05546 #ifndef NOVIT
05547             states_per_observation_psi[i]=NULL;
05548 #endif // NOVIT
05549             path[i]=NULL;
05550         } ;
05551 #else
05552         delete[] alpha_cache.table;
05553         delete[] beta_cache.table;
05554         delete[] states_per_observation_psi;
05555         delete[] path;
05556 
05557         alpha_cache.table=NULL;
05558         beta_cache.table=NULL;
05559         states_per_observation_psi=NULL;
05560         path=NULL;
05561 
05562 #endif //USE_HMMPARALLEL_STRUCTURES
05563     }
05564 
05565     if (obs!=NULL)
05566     {
05567         int32_t max_T=obs->get_max_vector_length();
05568 
05569         if (lambda)
05570         {
05571 #ifdef USE_HMMPARALLEL_STRUCTURES
05572             for (int32_t i=0; i<parallel.get_num_threads(); i++) 
05573             {
05574                 this->alpha_cache[i].table= lambda->alpha_cache[i].table;
05575                 this->beta_cache[i].table=  lambda->beta_cache[i].table;
05576                 this->states_per_observation_psi[i]=lambda->states_per_observation_psi[i] ;
05577                 this->path[i]=lambda->path[i];
05578             } ;
05579 #else
05580             this->alpha_cache.table= lambda->alpha_cache.table;
05581             this->beta_cache.table= lambda->beta_cache.table;
05582             this->states_per_observation_psi= lambda->states_per_observation_psi;
05583             this->path=lambda->path;
05584 #endif //USE_HMMPARALLEL_STRUCTURES
05585 
05586             this->reused_caches=true;
05587         }
05588         else
05589         {
05590             this->reused_caches=false;
05591 #ifdef USE_HMMPARALLEL_STRUCTURES
05592             SG_INFO( "allocating mem for path-table of size %.2f Megabytes (%d*%d) each:\n", ((float32_t)max_T)*N*sizeof(T_STATES)/(1024*1024), max_T, N);
05593             for (int32_t i=0; i<parallel.get_num_threads(); i++)
05594             {
05595                 if ((states_per_observation_psi[i]=new T_STATES[max_T*N])!=NULL)
05596                     SG_DEBUG( "path_table[%i] successfully allocated\n",i) ;
05597                 else
05598                     SG_ERROR( "failed allocating memory for path_table[%i].\n",i) ;
05599                 path[i]=new T_STATES[max_T];
05600             }
05601 #else // no USE_HMMPARALLEL_STRUCTURES 
05602             SG_INFO( "allocating mem of size %.2f Megabytes (%d*%d) for path-table ....", ((float32_t)max_T)*N*sizeof(T_STATES)/(1024*1024), max_T, N);
05603             if ((states_per_observation_psi=new T_STATES[max_T*N]) != NULL)
05604                 SG_DONE();
05605             else
05606                 SG_ERROR( "failed.\n") ;
05607 
05608             path=new T_STATES[max_T];
05609 #endif // USE_HMMPARALLEL_STRUCTURES
05610 #ifdef USE_HMMCACHE
05611             SG_INFO( "allocating mem for caches each of size %.2f Megabytes (%d*%d) ....\n", ((float32_t)max_T)*N*sizeof(T_ALPHA_BETA_TABLE)/(1024*1024), max_T, N);
05612 
05613 #ifdef USE_HMMPARALLEL_STRUCTURES
05614             for (int32_t i=0; i<parallel.get_num_threads(); i++)
05615             {
05616                 if ((alpha_cache[i].table=new T_ALPHA_BETA_TABLE[max_T*N])!=NULL)
05617                     SG_DEBUG( "alpha_cache[%i].table successfully allocated\n",i) ;
05618                 else
05619                     SG_ERROR("allocation of alpha_cache[%i].table failed\n",i) ;
05620 
05621                 if ((beta_cache[i].table=new T_ALPHA_BETA_TABLE[max_T*N]) != NULL)
05622                     SG_DEBUG("beta_cache[%i].table successfully allocated\n",i) ;
05623                 else
05624                     SG_ERROR("allocation of beta_cache[%i].table failed\n",i) ;
05625             } ;
05626 #else // USE_HMMPARALLEL_STRUCTURES
05627             if ((alpha_cache.table=new T_ALPHA_BETA_TABLE[max_T*N]) != NULL)
05628                 SG_DEBUG( "alpha_cache.table successfully allocated\n") ;
05629             else
05630                 SG_ERROR( "allocation of alpha_cache.table failed\n") ;
05631 
05632             if ((beta_cache.table=new T_ALPHA_BETA_TABLE[max_T*N]) != NULL)
05633                 SG_DEBUG( "beta_cache.table successfully allocated\n") ;
05634             else
05635                 SG_ERROR( "allocation of beta_cache.table failed\n") ;
05636 
05637 #endif // USE_HMMPARALLEL_STRUCTURES
05638 #else // USE_HMMCACHE
05639 #ifdef USE_HMMPARALLEL_STRUCTURES
05640             for (int32_t i=0; i<parallel.get_num_threads(); i++)
05641             {
05642                 alpha_cache[i].table=NULL ;
05643                 beta_cache[i].table=NULL ;
05644             } ;
05645 #else //USE_HMMPARALLEL_STRUCTURES
05646             alpha_cache.table=NULL ;
05647             beta_cache.table=NULL ;
05648 #endif //USE_HMMPARALLEL_STRUCTURES
05649 #endif //USE_HMMCACHE
05650         }
05651     }
05652 
05653     //initialize pat/mod_prob as not calculated
05654     invalidate_model();
05655 }
05656 
05657 bool CHMM::permutation_entropy(int32_t window_width, int32_t sequence_number)
05658 {
05659     if (p_observations && window_width>0 &&
05660             ( sequence_number<0 || sequence_number < p_observations->get_num_vectors()))
05661     {
05662         int32_t min_sequence=sequence_number;
05663         int32_t max_sequence=sequence_number;
05664 
05665         if (sequence_number<0)
05666         {
05667             min_sequence=0;
05668             max_sequence=p_observations->get_num_vectors();
05669             SG_INFO( "numseq: %d\n", max_sequence);
05670         }
05671 
05672         SG_INFO( "min_sequence: %d max_sequence: %d\n", min_sequence, max_sequence);
05673         for (sequence_number=min_sequence; sequence_number<max_sequence; sequence_number++)
05674         {
05675             int32_t sequence_length=0;
05676             uint16_t* obs=p_observations->get_feature_vector(sequence_number, sequence_length);
05677 
05678             int32_t histsize=get_M();
05679             int64_t* hist=new int64_t[histsize];
05680             int32_t i,j;
05681 
05682             for (i=0; i<sequence_length-window_width; i++)
05683             {
05684                 for (j=0; j<histsize; j++)
05685                     hist[j]=0;
05686 
05687                 uint16_t* ptr=&obs[i];
05688                 for (j=0; j<window_width; j++)
05689                 {
05690                     hist[*ptr++]++;
05691                 }
05692 
05693                 float64_t perm_entropy=0;
05694                 for (j=0; j<get_M(); j++)
05695                 {
05696                     float64_t p=
05697                         (((float64_t) hist[j])+PSEUDO)/
05698                         (window_width+get_M()*PSEUDO);
05699                     perm_entropy+=p*log(p);
05700                 }
05701 
05702                 SG_PRINT( "%f\n", perm_entropy);
05703             }
05704 
05705             delete[] hist;
05706         }
05707         return true;
05708     }
05709     else
05710         return false;
05711 }
05712 
05713 float64_t CHMM::get_log_derivative(int32_t num_param, int32_t num_example)
05714 {
05715     if (num_param<N)
05716         return model_derivative_p(num_param, num_example);
05717     else if (num_param<2*N)
05718         return model_derivative_q(num_param-N, num_example);
05719     else if (num_param<N*(N+2))
05720     {
05721         int32_t k=num_param-2*N;
05722         int32_t i=k/N;
05723         int32_t j=k%N;
05724         return model_derivative_a(i,j, num_example);
05725     }
05726     else if (num_param<N*(N+2+M))
05727     {
05728         int32_t k=num_param-N*(N+2);
05729         int32_t i=k/M;
05730         int32_t j=k%M;
05731         return model_derivative_b(i,j, num_example);
05732     }
05733 
05734     ASSERT(false);
05735     return -1;
05736 }
05737 
05738 float64_t CHMM::get_log_model_parameter(int32_t num_param)
05739 {
05740     if (num_param<N)
05741         return get_p(num_param);
05742     else if (num_param<2*N)
05743         return get_q(num_param-N);
05744     else if (num_param<N*(N+2))
05745         return transition_matrix_a[num_param-2*N];
05746     else if (num_param<N*(N+2+M))
05747         return observation_matrix_b[num_param-N*(N+2)];
05748 
05749     ASSERT(false);
05750     return -1;
05751 }
05752 
05753 
05754 //convergence criteria  -tobeadjusted-
05755 bool CHMM::converge(float64_t x, float64_t y)
05756 {
05757     float64_t diff=y-x;
05758     float64_t absdiff=fabs(diff);
05759 
05760     SG_INFO( "\n #%03d\tbest result so far: %G (eps: %f)", iteration_count, y, diff);
05761 
05762     if (iteration_count--==0 || (absdiff<epsilon && conv_it<=0))
05763     {
05764         iteration_count=iterations;
05765         SG_INFO( "...finished\n");
05766         conv_it=5;
05767         return true;
05768     }
05769     else
05770     {
05771         if (absdiff<epsilon)
05772             conv_it--;
05773         else
05774             conv_it=5;
05775 
05776         return false;
05777     }
05778 }
05779 
05780 //switch model and train model
05781 void CHMM::switch_model(CHMM** m1, CHMM** m2)
05782 {
05783     CHMM* dummy=*m1;
05784 
05785     *m1=*m2;
05786     *m2=dummy;
05787 }
05788 
05789 bool CHMM::baum_welch_viterbi_train(BaumWelchViterbiType type)
05790 {
05791     CHMM* estimate=new CHMM(this);
05792     CHMM* working=this;
05793     float64_t prob_max=-CMath::INFTY;
05794     float64_t prob=-CMath::INFTY;
05795     float64_t prob_train=CMath::ALMOST_NEG_INFTY;
05796     iteration_count=iterations;
05797 
05798     while (!converge(prob, prob_train))
05799     {
05800         switch_model(&working, &estimate);
05801         prob=prob_train;
05802         /* function pointer might be more efficient than switch, but works in
05803          * ISO C++ only with static members. :( but perhaps g++ is smart
05804          * enough to optimise this on its own...
05805          */
05806         switch (type) {
05807             case BW_NORMAL:
05808                 estimate_model_baum_welch(estimate); break;
05809             case BW_TRANS:
05810                 estimate_model_baum_welch_trans(estimate); break;
05811             case BW_DEFINED:
05812                 estimate_model_baum_welch_defined(estimate); break;
05813             case VIT_NORMAL:
05814                 estimate_model_viterbi(estimate); break;
05815             case VIT_DEFINED:
05816                 estimate_model_viterbi_defined(estimate); break;
05817         }
05818         prob_train=estimate->model_probability();
05819         if (prob_max<prob_train)
05820             prob_max=prob_train;
05821     }
05822 
05823     delete estimate;
05824     estimate=NULL;
05825 
05826     return true;
05827 }
05828 

SHOGUN Machine Learning Toolbox - Documentation