DynProg.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 2 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2007 Soeren Sonnenburg
00008  * Written (W) 1999-2007 Gunnar Raetsch
00009  * Copyright (C) 1999-2007 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #include "structure/DynProg.h"
00013 #include "lib/Mathematics.h"
00014 #include "lib/io.h"
00015 #include "lib/config.h"
00016 #include "features/StringFeatures.h"
00017 #include "features/CharFeatures.h"
00018 #include "features/Alphabet.h"
00019 #include "structure/Plif.h"
00020 #include "lib/Array.h"
00021 #include "lib/Array2.h"
00022 #include "lib/Array3.h"
00023 
00024 #include <stdlib.h>
00025 #include <stdio.h>
00026 #include <time.h>
00027 #include <ctype.h>
00028 
00029 template void CDynProg::best_path_trans<1,true,false>(const DREAL *seq, INT seq_len, const INT *pos, 
00030                                                       const INT *orf_info, CPlifBase **PLif_matrix, 
00031                                                       CPlifBase **Plif_state_signals, INT max_num_signals, 
00032                                                       INT genestr_num, 
00033                                                       DREAL *prob_nbest, INT *my_state_seq, INT *my_pos_seq,
00034                                                       bool use_orf) ;
00035 template void CDynProg::best_path_trans<2,true,false>(const DREAL *seq, INT seq_len, const INT *pos, 
00036                                                       const INT *orf_info, CPlifBase **PLif_matrix, 
00037                                                       CPlifBase **Plif_state_signals, INT max_num_signals, 
00038                                                       INT genestr_num, 
00039                                                       DREAL *prob_nbest, INT *my_state_seq, INT *my_pos_seq,
00040                                                       bool use_orf) ;
00041 
00042 template void CDynProg::best_path_trans<1,false,false>(const DREAL *seq, INT seq_len, const INT *pos, 
00043                                                       const INT *orf_info, CPlifBase **PLif_matrix, 
00044                                                       CPlifBase **Plif_state_signals, INT max_num_signals, 
00045                                                       INT genestr_num, 
00046                                                       DREAL *prob_nbest, INT *my_state_seq, INT *my_pos_seq,
00047                                                       bool use_orf) ;
00048 template void CDynProg::best_path_trans<2,false,false>(const DREAL *seq, INT seq_len, const INT *pos, 
00049                                                       const INT *orf_info, CPlifBase **PLif_matrix, 
00050                                                       CPlifBase **Plif_state_signals, INT max_num_signals, 
00051                                                       INT genestr_num, 
00052                                                       DREAL *prob_nbest, INT *my_state_seq, INT *my_pos_seq,
00053                                                       bool use_orf) ;
00054 
00055 
00056 #ifdef SUNOS
00057 extern "C" int  finite(double);
00058 #endif
00059 
00060 //#define USE_TMP_ARRAYCLASS
00061 //#define DYNPROG_DEBUG
00062 
00063 //CArray2<INT> g_orf_info(1,1) ;
00064 
00065 static INT word_degree_default[4]={3,4,5,6} ;
00066 static INT cum_num_words_default[5]={0,64,320,1344,5440} ;
00067 static INT num_words_default[4]=   {64,256,1024,4096} ;
00068 static INT mod_words_default[32] = {1,1,1,1,1,1,1,1,
00069                                     1,1,1,1,1,1,1,1,
00070                                     0,0,0,0,0,0,0,0,
00071                                     0,0,0,0,0,0,0,0} ;  
00072 static bool sign_words_default[16] = {true,true,true,true,true,true,true,true,
00073                                       false,false,false,false,false,false,false,false} ; // whether to use counts or signum of counts
00074 static INT string_words_default[16] = {0,0,0,0,0,0,0,0,
00075                                        1,1,1,1,1,1,1,1} ; // which string should be used
00076 
00077 CDynProg::CDynProg(INT p_num_svms /*= 8 */)
00078 : CSGObject(), transition_matrix_a_id(1,1), transition_matrix_a(1,1),
00079     transition_matrix_a_deriv(1,1), initial_state_distribution_p(1),
00080     initial_state_distribution_p_deriv(1), end_state_distribution_q(1),
00081     end_state_distribution_q_deriv(1), dict_weights(1,1),
00082     dict_weights_array(dict_weights.get_array()),
00083 
00084       // multi svm
00085       num_degrees(4), 
00086       num_svms(8), 
00087       num_strings(1),
00088       word_degree(word_degree_default, num_degrees, true, true),
00089       cum_num_words(cum_num_words_default, num_degrees+1, true, true),
00090       cum_num_words_array(cum_num_words.get_array()),
00091       num_words(num_words_default, num_degrees, true, true),
00092       num_words_array(num_words.get_array()),
00093       mod_words(mod_words_default, num_svms, 2, true, true),
00094       mod_words_array(mod_words.get_array()),
00095       sign_words(sign_words_default, num_svms, true, true),
00096       sign_words_array(sign_words.get_array()),
00097       string_words(string_words_default, num_svms, true, true),
00098       string_words_array(string_words.get_array()),
00099 //    word_used(num_degrees, num_words[num_degrees-1], num_strings),
00100 //    word_used_array(word_used.get_array()),
00101 //    svm_values_unnormalized(num_degrees, num_svms),
00102       svm_pos_start(num_degrees),
00103       num_unique_words(num_degrees),
00104       svm_arrays_clean(true),
00105 
00106       // single svm
00107       num_svms_single(1),
00108       word_degree_single(1),
00109       num_words_single(4), 
00110       word_used_single(num_words_single),
00111       svm_value_unnormalized_single(num_svms_single),
00112       num_unique_words_single(0),
00113 
00114       max_a_id(0), m_seq(1,1,1), m_pos(1), m_orf_info(1,2), 
00115           m_segment_sum_weights(1,1), m_plif_list(1), 
00116       m_PEN(1,1), m_PEN_state_signals(1,1), 
00117       m_genestr(1,1), m_dict_weights(1,1), m_segment_loss(1,1,2), 
00118           m_segment_ids(1),
00119           m_segment_mask(1),
00120       m_scores(1), m_states(1,1), m_positions(1,1), m_genestr_stop(1),
00121       m_precomputed_svm_values(1,1), //by Jonas
00122       m_precomputed_tiling_values(1,1) //by Jonas
00123 {
00124     trans_list_forward = NULL ;
00125     trans_list_forward_cnt = NULL ;
00126     trans_list_forward_val = NULL ;
00127     trans_list_forward_id = NULL ;
00128     trans_list_len = 0 ;
00129 
00130     mem_initialized = true ;
00131 
00132     this->N=1;
00133     m_step=0;
00134 
00135     m_raw_intensities = NULL;
00136     m_probe_pos = NULL;
00137     m_use_tiling=false;
00138     m_genestr_len = 0;
00139 #ifdef ARRAY_STATISTICS
00140     word_degree.set_name("word_degree");
00141 #endif
00142 }
00143 
00144 CDynProg::~CDynProg()
00145 {
00146     if (trans_list_forward_cnt)
00147         delete[] trans_list_forward_cnt;
00148     if (trans_list_forward)
00149     {
00150         for (INT i=0; i<trans_list_len; i++)
00151         {
00152             if (trans_list_forward[i])
00153                 delete[] trans_list_forward[i];
00154         }
00155         delete[] trans_list_forward;
00156     }
00157     if (trans_list_forward_val)
00158     {
00159         for (INT i=0; i<trans_list_len; i++)
00160         {
00161             if (trans_list_forward_val[i])
00162                 delete[] trans_list_forward_val[i];
00163         }
00164         delete[] trans_list_forward_val;
00165     }
00166     if (trans_list_forward_id)
00167     {
00168         for (INT i=0; i<trans_list_len; i++)
00169         {
00170             if (trans_list_forward_id[i])
00171                 delete[] trans_list_forward_id[i];
00172         }
00173         delete[] trans_list_forward_id;
00174     }
00175     //if (m_raw_intensities)
00176     //  delete[] m_raw_intensities;
00177     //if (m_probe_pos)
00178     //  delete[] m_probe_pos;
00179     
00180 }
00181 
00183 void CDynProg::set_genestr_len(INT genestr_len)
00184 {
00185     m_genestr_len = genestr_len;
00186 }
00187 INT CDynProg::get_num_svms()
00188 {
00189     return num_svms;
00190 }
00191 void CDynProg::precompute_stop_codons(const CHAR* sequence, INT length)
00192 {
00193     m_genestr_stop.resize_array(length) ;
00194     m_genestr_stop.zero() ;
00195     m_genestr_stop.set_name("genestr_stop") ;
00196     {
00197     for (INT i=0; i<length-2; i++)
00198         if ((sequence[i]=='t' || sequence[i]=='T') && 
00199             (((sequence[i+1]=='a' || sequence[i+1]=='A') && 
00200               (sequence[i+2]=='a' || sequence[i+2]=='g' || sequence[i+2]=='A' || sequence[i+2]=='G')) ||
00201              ((sequence[i+1]=='g'||sequence[i+1]=='G') && (sequence[i+2]=='a' || sequence[i+2]=='A') )))
00202             m_genestr_stop.element(i)=true ;
00203         else
00204             m_genestr_stop.element(i)=false ;
00205     m_genestr_stop.element(length-1)=false ;
00206     m_genestr_stop.element(length-1)=false ;
00207     }
00208 }
00209 void CDynProg::set_num_states(INT p_N)
00210 {
00211     N=p_N ;
00212 
00213     transition_matrix_a_id.resize_array(N,N) ;
00214     transition_matrix_a.resize_array(N,N) ;
00215     transition_matrix_a_deriv.resize_array(N,N) ;
00216     initial_state_distribution_p.resize_array(N) ;
00217     initial_state_distribution_p_deriv.resize_array(N) ;
00218     end_state_distribution_q.resize_array(N);
00219     end_state_distribution_q_deriv.resize_array(N) ;
00220 
00221     m_orf_info.resize_array(N,2) ;
00222     m_PEN.resize_array(N,N) ;
00223     m_PEN_state_signals.resize_array(N,1) ;
00224 }
00225 INT CDynProg::get_num_states()
00226 {
00227     return N;
00228 }
00229 void CDynProg::init_tiling_data(DREAL* probe_pos, DREAL* intensities, const INT num_probes, const INT seq_len)
00230 {
00231     m_probe_pos = new INT[num_probes];
00232     for (INT i=0;i<num_probes;i++)
00233         m_probe_pos[i] = (INT) probe_pos[i];
00234     m_raw_intensities = intensities;
00235     m_num_probes = num_probes;
00236     m_precomputed_tiling_values.resize_array(num_svms,seq_len);
00237     m_use_tiling=true;
00238 
00239 //  SG_PRINT("num_probes: %i  \n",m_num_probes);
00240 }
00241 void CDynProg::init_content_svm_value_array(const INT seq_len)
00242 {
00243     m_precomputed_svm_values.resize_array(num_svms,seq_len);
00244 }
00245 void CDynProg::precompute_tiling_plifs(CPlif** PEN, const INT num_penalties, const INT seq_len, const INT* pos)
00246 {
00247 //  SG_PRINT("precompute_tiling_plifs: %f  \n",m_raw_intensities[0]);
00248 
00249     INT tiling_plif_ids[num_svms];
00250     INT num = 0;
00251         for (INT i=0; i<num_penalties; i++)
00252     {
00253         CPlif * plif = PEN[i];
00254         if (plif->get_use_svm()>num_svms)
00255         {
00256             tiling_plif_ids[num] = i;
00257             num++;
00258         }
00259     }
00260     ASSERT(num==num_svms)
00261 
00262     DREAL tiling_plif[num_svms];
00263     DREAL svm_value[2*num_svms];
00264     for (INT i=0; i<num_svms; i++)
00265         tiling_plif[i]=0.0;
00266 
00267     INT* p_tiling_pos  = m_probe_pos;
00268     DREAL* p_tiling_data = m_raw_intensities;
00269     for (INT pos_idx=0;pos_idx<seq_len;pos_idx++)
00270     {
00271         //SG_PRINT("pos[%i]: %i  \n",pos_idx,pos[pos_idx]);
00272         //SG_PRINT("*p_tiling_pos: %i  \n",*p_tiling_pos);
00273         while (*p_tiling_pos<pos[pos_idx])
00274         {
00275             //SG_PRINT("raw_intens: %f  \n",*p_tiling_data);
00276             for (INT i=0; i<num_svms; i++)
00277             {
00278                 svm_value[num_svms+i]=*p_tiling_data;
00279                 CPlif * plif = PEN[tiling_plif_ids[i]];
00280                 plif->set_do_calc(true);
00281                 tiling_plif[i]+=plif->lookup_penalty(0,svm_value);
00282                 //SG_PRINT("true: plif->lookup_penalty: %f  \n",plif->lookup_penalty(0,svm_value));
00283                 plif->set_do_calc(false);
00284                 //SG_PRINT("false: plif->lookup_penalty: %f  \n",plif->lookup_penalty(0,svm_value));
00285             }
00286             p_tiling_data++;
00287             p_tiling_pos++;
00288         }
00289         for (INT i=0; i<num_svms; i++)
00290             m_precomputed_tiling_values.set_element(tiling_plif[i],i,pos_idx);
00291     }
00292     //DREAL intensities[m_num_probes];
00293     //INT nummm = raw_intensities_interval_query(1000, 1025, intensities);
00294     //SG_PRINT("nummm:%i\n",nummm);
00295 
00296 }
00297 void CDynProg::create_word_string(const CHAR* genestr, INT genestr_num, INT genestr_len, WORD*** wordstr)
00298 {
00299     for (INT k=0; k<genestr_num; k++)
00300     {
00301         wordstr[k]=new WORD*[num_degrees] ;
00302         for (INT j=0; j<num_degrees; j++)
00303         {
00304             wordstr[k][j]=NULL ;
00305             {
00306                 wordstr[k][j]=new WORD[genestr_len] ;
00307                 for (INT i=0; i<genestr_len; i++)
00308                     switch (genestr[i])
00309                     {
00310                     case 'A':
00311                     case 'a': wordstr[k][j][i]=0 ; break ;
00312                     case 'C':
00313                     case 'c': wordstr[k][j][i]=1 ; break ;
00314                     case 'G':
00315                     case 'g': wordstr[k][j][i]=2 ; break ;
00316                     case 'T':
00317                     case 't': wordstr[k][j][i]=3 ; break ;
00318                     default: ASSERT(0) ;
00319                     }
00320                 translate_from_single_order(wordstr[k][j], genestr_len, word_degree[j]-1, word_degree[j]) ;
00321             }
00322         }
00323     }
00324     precompute_stop_codons(genestr, genestr_len);
00325 }
00326 void CDynProg::precompute_content_values(WORD*** wordstr, const INT *pos,const INT seq_len, const INT genestr_len,DREAL *dictionary_weights,INT dict_len)
00327 {
00328 
00329     //SG_PRINT("seq_len=%i, genestr_len=%i, dict_len=%i, num_svms=%i, num_degrees=%i\n",seq_len, genestr_len, dict_len, num_svms, num_degrees);
00330 
00331     dict_weights.set_array(dictionary_weights, dict_len, num_svms, false, false) ;
00332     dict_weights_array=dict_weights.get_array() ;
00333 
00334     for (INT p=0 ; p<seq_len-1 ; p++)
00335     {
00336         INT from_pos = pos[p];
00337         INT to_pos = pos[p+1];
00338         DREAL my_svm_values_unnormalized[num_svms] ;
00339         //SG_PRINT("%i(%i->%i) ",p,from_pos, to_pos);
00340       
00341         ASSERT(from_pos<=genestr_len)   
00342         ASSERT(to_pos<=genestr_len) 
00343             
00344         for (INT s=0; s<num_svms; s++)
00345         {
00346             my_svm_values_unnormalized[s]=0.0;//precomputed_svm_values.element(s,p);
00347         }
00348         for (INT i=from_pos; i<to_pos;i++)
00349                 {
00350             for (INT j=0; j<num_degrees; j++)
00351             {
00352                 WORD word = wordstr[0][j][i] ;
00353                 for (INT s=0; s<num_svms; s++)
00354                 {
00355                     if (s==4 && i%3!=0)
00356                         continue;
00357                     else if (s==5 && i%3!=1)
00358                         continue;
00359                     else if (s==6 && i%3!=2)
00360                         continue;
00361                     my_svm_values_unnormalized[s] += dict_weights_array[(word+cum_num_words_array[j])+s*cum_num_words_array[num_degrees]] ;
00362                 }
00363             }
00364         }
00365         for (INT s=0; s<num_svms; s++)
00366         {
00367             DREAL prev = m_precomputed_svm_values.get_element(s,p);
00368             m_precomputed_svm_values.set_element(prev + my_svm_values_unnormalized[s],s,p+1);
00369             ASSERT(prev>-1e20);
00370         }
00371     }
00372     for (INT j=0; j<num_degrees; j++)
00373         delete[] wordstr[0][j] ;
00374     delete[] wordstr[0] ;
00375 }
00376 void CDynProg::set_p_vector(DREAL *p, INT p_N) 
00377 {
00378     ASSERT(p_N==N);
00379     //m_orf_info.resize_array(p_N,2) ;
00380     //m_PEN.resize_array(p_N,p_N) ;
00381 
00382     initial_state_distribution_p.set_array(p, p_N, true, true) ;
00383 }
00384 
00385 void CDynProg::set_q_vector(DREAL *q, INT q_N) 
00386 {
00387     ASSERT(q_N==N);
00388     end_state_distribution_q.set_array(q, q_N, true, true) ;
00389 }
00390 
00391 void CDynProg::set_a(DREAL *a, INT p_M, INT p_N) 
00392 {
00393     ASSERT(p_N==N);
00394     ASSERT(p_M==p_N);
00395     transition_matrix_a.set_array(a, p_N, p_N, true, true) ;
00396     transition_matrix_a_deriv.resize_array(p_N, p_N) ;
00397 }
00398 
00399 void CDynProg::set_a_id(INT *a, INT p_M, INT p_N) 
00400 {
00401     ASSERT(p_N==N);
00402     ASSERT(p_M==p_N);
00403     transition_matrix_a_id.set_array(a, p_N, p_N, true, true) ;
00404     max_a_id = 0 ;
00405     for (INT i=0; i<p_N; i++)
00406         for (INT j=0; j<p_N; j++)
00407             max_a_id = CMath::max(max_a_id, transition_matrix_a_id.element(i,j)) ;
00408 }
00409 
00410 void CDynProg::set_a_trans_matrix(DREAL *a_trans, INT num_trans, INT p_N) 
00411 {
00412     if (!((p_N==3) || (p_N==4)))
00413         SG_ERROR("!((p_N==3) || (p_N==4)), p_N: %i\n",p_N);
00414 
00415     delete[] trans_list_forward ;
00416     delete[] trans_list_forward_cnt ;
00417     delete[] trans_list_forward_val ;
00418     delete[] trans_list_forward_id ;
00419 
00420     trans_list_forward = NULL ;
00421     trans_list_forward_cnt = NULL ;
00422     trans_list_forward_val = NULL ;
00423     trans_list_len = 0 ;
00424 
00425     transition_matrix_a.zero() ;
00426     transition_matrix_a_id.zero() ;
00427 
00428     mem_initialized = true ;
00429 
00430     trans_list_forward_cnt=NULL ;
00431     trans_list_len = N ;
00432     trans_list_forward = new T_STATES*[N] ;
00433     trans_list_forward_cnt = new T_STATES[N] ;
00434     trans_list_forward_val = new DREAL*[N] ;
00435     trans_list_forward_id = new INT*[N] ;
00436     
00437     INT start_idx=0;
00438     for (INT j=0; j<N; j++)
00439     {
00440         INT old_start_idx=start_idx;
00441 
00442         while (start_idx<num_trans && a_trans[start_idx+num_trans]==j)
00443         {
00444             start_idx++;
00445             
00446             if (start_idx>1 && start_idx<num_trans)
00447                 ASSERT(a_trans[start_idx+num_trans-1] <= a_trans[start_idx+num_trans]);
00448         }
00449         
00450         if (start_idx>1 && start_idx<num_trans)
00451             ASSERT(a_trans[start_idx+num_trans-1] <= a_trans[start_idx+num_trans]);
00452         
00453         INT len=start_idx-old_start_idx;
00454         ASSERT(len>=0);
00455         
00456         trans_list_forward_cnt[j] = 0 ;
00457         
00458         if (len>0)
00459         {
00460             trans_list_forward[j]     = new T_STATES[len] ;
00461             trans_list_forward_val[j] = new DREAL[len] ;
00462             trans_list_forward_id[j] = new INT[len] ;
00463         }
00464         else
00465         {
00466             trans_list_forward[j]     = NULL;
00467             trans_list_forward_val[j] = NULL;
00468             trans_list_forward_id[j]  = NULL;
00469         }
00470     }
00471     
00472     for (INT i=0; i<num_trans; i++)
00473     {
00474         INT from_state   = (INT)a_trans[i] ;
00475         INT to_state = (INT)a_trans[i+num_trans] ;
00476         DREAL val = a_trans[i+num_trans*2] ;
00477         INT id = 0 ;
00478         if (p_N==4)
00479             id = (INT)a_trans[i+num_trans*3] ;
00480         //SG_DEBUG( "id=%i\n", id) ;
00481             
00482         ASSERT(to_state>=0 && to_state<N);
00483         ASSERT(from_state>=0 && from_state<N);
00484         
00485         trans_list_forward[to_state][trans_list_forward_cnt[to_state]]=from_state ;
00486         trans_list_forward_val[to_state][trans_list_forward_cnt[to_state]]=val ;
00487         trans_list_forward_id[to_state][trans_list_forward_cnt[to_state]]=id ;
00488         trans_list_forward_cnt[to_state]++ ;
00489         transition_matrix_a.element(from_state, to_state) = val ;
00490         transition_matrix_a_id.element(from_state, to_state) = id ;
00491         //SG_PRINT("from_state:%i to_state:%i trans_matrix_a_id:%i \n",from_state, to_state,transition_matrix_a_id.element(from_state, to_state));
00492     } ;
00493 
00494     max_a_id = 0 ;
00495     for (INT i=0; i<N; i++)
00496         for (INT j=0; j<N; j++)
00497         {
00498             //if (transition_matrix_a_id.element(i,j))
00499             //SG_DEBUG( "(%i,%i)=%i\n", i,j, transition_matrix_a_id.element(i,j)) ;
00500             max_a_id = CMath::max(max_a_id, transition_matrix_a_id.element(i,j)) ;
00501         }
00502     //SG_DEBUG( "max_a_id=%i\n", max_a_id) ;
00503 }
00504 
00505 void CDynProg::init_svm_arrays(INT p_num_degrees, INT p_num_svms)
00506 {
00507     svm_arrays_clean=false ;
00508 
00509     word_degree.resize_array(num_degrees) ;
00510 
00511     cum_num_words.resize_array(num_degrees+1) ;
00512     cum_num_words_array=cum_num_words.get_array() ;
00513 
00514     num_words.resize_array(num_degrees) ;
00515     num_words_array=num_words.get_array() ;
00516     
00517     //svm_values_unnormalized.resize_array(num_degrees, num_svms) ;
00518     svm_pos_start.resize_array(num_degrees) ;
00519     num_unique_words.resize_array(num_degrees) ;
00520 } 
00521 
00522 
00523 void CDynProg::init_word_degree_array(INT * p_word_degree_array, INT num_elem)
00524 {
00525     svm_arrays_clean=false ;
00526 
00527     word_degree.resize_array(num_degrees) ;
00528     ASSERT(num_degrees==num_elem);
00529 
00530 
00531     for (INT i=0; i<num_degrees; i++)
00532         word_degree[i]=p_word_degree_array[i] ;
00533 
00534 } 
00535 
00536 void CDynProg::init_cum_num_words_array(INT * p_cum_num_words_array, INT num_elem)
00537 {
00538     svm_arrays_clean=false ;
00539 
00540     cum_num_words.resize_array(num_degrees+1) ;
00541     cum_num_words_array=cum_num_words.get_array() ;
00542     ASSERT(num_degrees+1==num_elem);
00543 
00544     for (INT i=0; i<num_degrees+1; i++)
00545         cum_num_words[i]=p_cum_num_words_array[i] ;
00546 } 
00547 
00548 void CDynProg::init_num_words_array(INT * p_num_words_array, INT num_elem)
00549 {
00550     svm_arrays_clean=false ;
00551 
00552     num_words.resize_array(num_degrees) ;
00553     num_words_array=num_words.get_array() ;
00554     ASSERT(num_degrees==num_elem);
00555 
00556     for (INT i=0; i<num_degrees; i++)
00557         num_words[i]=p_num_words_array[i] ;
00558 
00559     //word_used.resize_array(num_degrees, num_words[num_degrees-1], num_strings) ;
00560     //word_used_array=word_used.get_array() ;
00561 } 
00562 
00563 void CDynProg::init_mod_words_array(INT * mod_words_input, INT num_elem, INT num_columns)
00564 {
00565     INT* p_mod_words_array=new INT[num_columns*num_elem] ;
00566     for (INT i=0; i<num_columns; i++)
00567         for (INT j=0; j<num_elem; j++)
00568             p_mod_words_array[i*j] = mod_words_input[i*num_elem+j];//FIXME //found that line in old interface GUIMatlab.cpp
00569     svm_arrays_clean=false ;
00570 
00571     ASSERT(num_svms==num_elem);
00572     ASSERT(num_columns==2);
00573 
00574     mod_words.set_array(p_mod_words_array, num_elem, 2, true, true) ;
00575     mod_words_array = mod_words.get_array() ;
00576     
00577     /*SG_DEBUG( "mod_words=[") ;
00578     for (INT i=0; i<num_elem; i++)
00579         SG_DEBUG( "%i, ", p_mod_words_array[i]) ;
00580         SG_DEBUG( "]\n") ;*/
00581 } 
00582 
00583 void CDynProg::init_sign_words_array(bool* p_sign_words_array, INT num_elem)
00584 {
00585     svm_arrays_clean=false ;
00586 
00587     ASSERT(num_svms==num_elem);
00588 
00589     sign_words.set_array(p_sign_words_array, num_elem, true, true) ;
00590     sign_words_array = sign_words.get_array() ;
00591 } 
00592 
00593 void CDynProg::init_string_words_array(INT* p_string_words_array, INT num_elem)
00594 {
00595     svm_arrays_clean=false ;
00596 
00597     ASSERT(num_svms==num_elem);
00598 
00599     string_words.set_array(p_string_words_array, num_elem, true, true) ;
00600     string_words_array = string_words.get_array() ;
00601 } 
00602 
00603 bool CDynProg::check_svm_arrays()
00604 {
00605     //SG_DEBUG( "wd_dim1=%d, cum_num_words=%d, num_words=%d, svm_pos_start=%d, num_uniq_w=%d, mod_words_dims=(%d,%d), sign_w=%d,string_w=%d\n num_degrees=%d, num_svms=%d, num_strings=%d", word_degree.get_dim1(), cum_num_words.get_dim1(), num_words.get_dim1(), svm_pos_start.get_dim1(), num_unique_words.get_dim1(), mod_words.get_dim1(), mod_words.get_dim2(), sign_words.get_dim1(), string_words.get_dim1(), num_degrees, num_svms, num_strings);
00606     if ((word_degree.get_dim1()==num_degrees) &&
00607             (cum_num_words.get_dim1()==num_degrees+1) &&
00608             (num_words.get_dim1()==num_degrees) &&
00609             //(word_used.get_dim1()==num_degrees) &&
00610             //(word_used.get_dim2()==num_words[num_degrees-1]) &&
00611             //(word_used.get_dim3()==num_strings) &&
00612             //      (svm_values_unnormalized.get_dim1()==num_degrees) &&
00613             //      (svm_values_unnormalized.get_dim2()==num_svms) &&
00614             (svm_pos_start.get_dim1()==num_degrees) &&
00615             (num_unique_words.get_dim1()==num_degrees) &&
00616             (mod_words.get_dim1()==num_svms) &&
00617             (mod_words.get_dim2()==2) && 
00618             (sign_words.get_dim1()==num_svms) &&
00619             (string_words.get_dim1()==num_svms))
00620     {
00621         svm_arrays_clean=true ;
00622         return true ;
00623     }
00624     else
00625     {
00626         if ((num_unique_words.get_dim1()==num_degrees) &&
00627             (mod_words.get_dim1()==num_svms) &&
00628             (mod_words.get_dim2()==2) &&
00629             (sign_words.get_dim1()==num_svms) &&
00630             (string_words.get_dim1()==num_svms))
00631             fprintf(stderr, "OK\n") ;
00632         else
00633             fprintf(stderr, "not OK\n") ;
00634 
00635         if (!(word_degree.get_dim1()==num_degrees))
00636             SG_WARNING("SVM array: word_degree.get_dim1()!=num_degrees") ;
00637         if (!(cum_num_words.get_dim1()==num_degrees+1))
00638             SG_WARNING("SVM array: cum_num_words.get_dim1()!=num_degrees+1") ;
00639         if (!(num_words.get_dim1()==num_degrees))
00640             SG_WARNING("SVM array: num_words.get_dim1()==num_degrees") ;
00641         if (!(svm_pos_start.get_dim1()==num_degrees))
00642             SG_WARNING("SVM array: svm_pos_start.get_dim1()!=num_degrees") ;
00643         if (!(num_unique_words.get_dim1()==num_degrees))
00644             SG_WARNING("SVM array: num_unique_words.get_dim1()!=num_degrees") ;
00645         if (!(mod_words.get_dim1()==num_svms))
00646             SG_WARNING("SVM array: mod_words.get_dim1()!=num_svms") ;
00647         if (!(mod_words.get_dim2()==2))
00648             SG_WARNING("SVM array: mod_words.get_dim2()!=2") ;
00649         if (!(sign_words.get_dim1()==num_svms))
00650             SG_WARNING("SVM array: sign_words.get_dim1()!=num_svms") ;
00651         if (!(string_words.get_dim1()==num_svms))
00652             SG_WARNING("SVM array: string_words.get_dim1()!=num_svms") ;
00653 
00654         svm_arrays_clean=false ;
00655         return false ;  
00656     }
00657 }
00658 
00659 void CDynProg::best_path_set_seq(DREAL *seq, INT p_N, INT seq_len) 
00660 {
00661     if (!svm_arrays_clean)
00662     {
00663         SG_ERROR( "SVM arrays not clean") ;
00664         return ;
00665     } ;
00666 
00667     ASSERT(p_N==N);
00668     ASSERT(initial_state_distribution_p.get_dim1()==N);
00669     ASSERT(end_state_distribution_q.get_dim1()==N);
00670     
00671     m_seq.set_array(seq, N, seq_len, 1, true, true) ;
00672     this->N=N ;
00673 
00674     m_call=3 ;
00675     m_step=2 ;
00676 }
00677 
00678 void CDynProg::best_path_set_seq3d(DREAL *seq, INT p_N, INT seq_len, INT max_num_signals) 
00679 {
00680     if (!svm_arrays_clean)
00681     {
00682         SG_ERROR( "SVM arrays not clean") ;
00683         return ;
00684     } ;
00685 
00686     ASSERT(p_N==N);
00687     ASSERT(initial_state_distribution_p.get_dim1()==N);
00688     ASSERT(end_state_distribution_q.get_dim1()==N);
00689     
00690     m_seq.set_array(seq, N, seq_len, max_num_signals, true, true) ;
00691     this->N=N ;
00692 
00693     m_call=3 ;
00694     m_step=2 ;
00695 }
00696 
00697 void CDynProg::best_path_set_pos(INT *pos, INT seq_len)  
00698 {
00699     if (m_step!=2)
00700         SG_ERROR( "please call best_path_set_seq first\n") ;
00701     
00702     if (seq_len!=m_seq.get_dim2())
00703         SG_ERROR( "pos size does not match previous info %i!=%i\n", seq_len, m_seq.get_dim2()) ;
00704 
00705     m_pos.set_array(pos, seq_len, true, true) ;
00706 
00707     m_step=3 ;
00708 }
00709 
00710 void CDynProg::best_path_set_orf_info(INT *orf_info, INT m, INT n) 
00711 {
00712     if (m_step!=3)
00713         SG_ERROR( "please call best_path_set_pos first\n") ;
00714         
00715     if (m!=N)
00716         SG_ERROR( "orf_info size does not match previous info %i!=%i\n", m, N) ;
00717     if (n!=2)
00718         SG_ERROR( "orf_info size incorrect %i!=2\n", n) ;
00719     m_orf_info.set_array(orf_info, m, n, true, true) ;
00720     
00721     m_call=1 ;
00722     m_step=4 ;
00723 }
00724 
00725 void CDynProg::best_path_set_segment_sum_weights(DREAL *segment_sum_weights, INT num_states, INT seq_len) 
00726 {
00727     if (m_step!=3)
00728         SG_ERROR( "please call best_path_set_pos first\n") ;
00729         
00730     if (num_states!=N)
00731         SG_ERROR( "segment_sum_weights size does not match previous info %i!=%i\n", num_states, N) ;
00732     if (seq_len!=m_pos.get_dim1())
00733         SG_ERROR( "segment_sum_weights size incorrect %i!=%i\n", seq_len, m_pos.get_dim1()) ;
00734 
00735     m_segment_sum_weights.set_array(segment_sum_weights, num_states, seq_len, true, true) ;
00736     
00737     m_call=2 ;
00738     m_step=4 ;
00739 }
00740 
00741 void CDynProg::best_path_set_plif_list(CDynamicArray<CPlifBase*>* plifs)
00742 {
00743     ASSERT(plifs);
00744     CPlifBase** plif_list=plifs->get_array();
00745     INT num_plif=plifs->get_num_elements();
00746 
00747     if (m_step!=4)
00748         SG_ERROR( "please call best_path_set_orf_info or best_path_segment_sum_weights first\n") ;
00749 
00750     m_plif_list.set_array(plif_list, num_plif, true, true) ;
00751 
00752     m_step=5 ;
00753 }
00754 
00755 void CDynProg::best_path_set_plif_id_matrix(INT *plif_id_matrix, INT m, INT n) 
00756 {
00757     if (m_step!=5)
00758         SG_ERROR( "please call best_path_set_plif_list first\n") ;
00759 
00760     if ((m!=N) || (n!=N))
00761         SG_ERROR( "plif_id_matrix size does not match previous info %i!=%i or %i!=%i\n", m, N, n, N) ;
00762 
00763     CArray2<INT> id_matrix(plif_id_matrix, N, N, false, false) ;
00764 #ifdef DYNPROG_DEBUG
00765     id_matrix.set_name("id_matrix");
00766     id_matrix.display_array();
00767 #endif //DYNPROG_DEBUG
00768     m_PEN.resize_array(N, N) ;
00769     for (INT i=0; i<N; i++)
00770         for (INT j=0; j<N; j++)
00771             if (id_matrix.element(i,j)>=0)
00772                 m_PEN.element(i,j)=m_plif_list[id_matrix.element(i,j)] ;
00773             else
00774                 m_PEN.element(i,j)=NULL ;
00775 
00776     m_step=6 ;
00777 }
00778 
00779 void CDynProg::best_path_set_plif_state_signal_matrix(INT *plif_id_matrix, INT m, INT max_num_signals) 
00780 {
00781     if (m_step!=6)
00782         SG_ERROR( "please call best_path_set_plif_id_matrix first\n") ;
00783     
00784     if (m!=N)
00785         SG_ERROR( "plif_state_signal_matrix size does not match previous info %i!=%i\n", m, N) ;
00786 
00787     if (m_seq.get_dim3() != max_num_signals)
00788         SG_ERROR( "size(plif_state_signal_matrix,2) does not match with size(m_seq,3): %i!=%i\nSorry, Soeren... interface changed\n", m_seq.get_dim3(), max_num_signals) ;
00789 
00790     CArray2<INT> id_matrix(plif_id_matrix, N, max_num_signals, false, false) ;
00791     m_PEN_state_signals.resize_array(N, max_num_signals) ;
00792     for (INT i=0; i<N; i++)
00793     {
00794         for (INT j=0; j<max_num_signals; j++)
00795         {
00796             if (id_matrix.element(i,j)>=0)
00797                 m_PEN_state_signals.element(i,j)=m_plif_list[id_matrix.element(i,j)] ;
00798             else
00799                 m_PEN_state_signals.element(i,j)=NULL ;
00800         }
00801     }
00802 
00803     m_step=6 ;
00804 }
00805 
00806 void CDynProg::best_path_set_genestr(CHAR* genestr, INT genestr_len, INT genestr_num)
00807 {
00808     if (m_step!=6)
00809         SG_ERROR( "please call best_path_set_plif_id_matrix first\n") ;
00810 
00811     ASSERT(genestr);
00812     ASSERT(genestr_len>0);
00813     ASSERT(genestr_num>0);
00814 
00815     m_genestr.set_array(genestr, genestr_len, genestr_num, true, true) ;
00816 
00817     m_step=7 ;
00818 }
00819 
00820 void CDynProg::best_path_set_my_state_seq(INT* my_state_seq, INT seq_len)
00821 {
00822     ASSERT(my_state_seq && seq_len>0);
00823     m_my_state_seq.resize_array(seq_len);
00824     for (INT i=0; i<seq_len; i++)
00825         m_my_state_seq[i]=my_state_seq[i];
00826 }
00827 
00828 void CDynProg::best_path_set_my_pos_seq(INT* my_pos_seq, INT seq_len)
00829 {
00830     ASSERT(my_pos_seq && seq_len>0);
00831     m_my_pos_seq.resize_array(seq_len);
00832     for (INT i=0; i<seq_len; i++)
00833         m_my_pos_seq[i]=my_pos_seq[i];
00834 }
00835 
00836 void CDynProg::best_path_set_dict_weights(DREAL* dictionary_weights, INT dict_len, INT n) 
00837 {
00838     if (m_step!=7)
00839         SG_ERROR( "please call best_path_set_genestr first\n") ;
00840 
00841     if (num_svms!=n)
00842         SG_ERROR( "dict_weights array does not match num_svms=%i!=%i\n", num_svms, n) ;
00843 
00844     m_dict_weights.set_array(dictionary_weights, dict_len, num_svms, true, true) ;
00845 
00846     // initialize, so it does not bother when not used
00847     m_segment_loss.resize_array(max_a_id+1, max_a_id+1, 2) ;
00848     m_segment_loss.zero() ;
00849     m_segment_ids.resize_array(m_seq.get_dim2()) ;
00850     m_segment_mask.resize_array(m_seq.get_dim2()) ;
00851     m_segment_ids.zero() ;
00852     m_segment_mask.zero() ;
00853 
00854     m_step=8 ;
00855 }
00856 
00857 void CDynProg::best_path_set_segment_loss(DREAL* segment_loss, INT m, INT n) 
00858 {
00859     // here we need two matrices. Store it in one: 2N x N
00860     if (2*m!=n)
00861         SG_ERROR( "segment_loss should be 2 x quadratic matrix: %i!=%i\n", 2*m, n) ;
00862 
00863     if (m!=max_a_id+1)
00864         SG_ERROR( "segment_loss size should match max_a_id: %i!=%i\n", m, max_a_id+1) ;
00865 
00866     m_segment_loss.set_array(segment_loss, m, n/2, 2, true, true) ;
00867     /*for (INT i=0; i<n; i++)
00868         for (INT j=0; j<n; j++)
00869         SG_DEBUG( "loss(%i,%i)=%f\n", i,j, m_segment_loss.element(0,i,j)) ;*/
00870 }
00871 
00872 void CDynProg::best_path_set_segment_ids_mask(INT* segment_ids, DREAL* segment_mask, INT m) 
00873 {
00874     INT max_id = 0;
00875     for (INT i=1;i<m;i++)
00876         max_id = CMath::max(max_id,segment_ids[i]);
00877     //SG_PRINT("max_id: %i, m:%i\n",max_id, m);     
00878     m_segment_ids.set_array(segment_ids, m, false, true) ;
00879     m_segment_ids.set_name("m_segment_ids");
00880     m_segment_mask.set_array(segment_mask, m, false, true) ;
00881     m_segment_mask.set_name("m_segment_mask");
00882 }
00883 
00884 
00885 void CDynProg::best_path_call(INT nbest, bool use_orf) 
00886 {
00887     if (m_step!=8)
00888         SG_ERROR( "please call best_path_set_dict_weights first\n") ;
00889     if (m_call!=1)
00890         SG_ERROR( "please call best_path_set_orf_info first\n") ;
00891     ASSERT(N==m_seq.get_dim1()) ;
00892     ASSERT(m_seq.get_dim2()==m_pos.get_dim1()) ;
00893 
00894     m_scores.resize_array(nbest) ;
00895     m_states.resize_array(nbest, m_seq.get_dim2()) ;
00896     m_positions.resize_array(nbest, m_seq.get_dim2()) ;
00897 
00898     m_call=1 ;
00899 
00900     ASSERT(nbest==1||nbest==2) ;
00901     ASSERT(m_genestr.get_dim2()==1) ;
00902         SG_ERROR("is this used???\n");  
00903 //  if (nbest==1)
00904 //      best_path_trans<1,false,false>(m_seq.get_array(), m_seq.get_dim2(), m_pos.get_array(), 
00905 //                              m_orf_info.get_array(), m_PEN.get_array(),
00906 //                              m_PEN_state_signals.get_array(), m_PEN_state_signals.get_dim2(),
00907 //                              m_genestr.get_array(), m_genestr.get_dim1(), m_genestr.get_dim2(),
00908 //                              m_scores.get_array(), m_states.get_array(), m_positions.get_array(),
00909 //                              m_dict_weights.get_array(), m_dict_weights.get_dim1()*m_dict_weights.get_dim2(),
00910 //                              use_orf) ;
00911 //  else
00912 //      best_path_trans<2,false,false>(m_seq.get_array(), m_seq.get_dim2(), m_pos.get_array(), 
00913 //                              m_orf_info.get_array(), m_PEN.get_array(),
00914 //                              m_PEN_state_signals.get_array(), m_PEN_state_signals.get_dim2(),
00915 //                              m_genestr.get_array(), m_genestr.get_dim1(), m_genestr.get_dim2(),
00916 //                              m_scores.get_array(), m_states.get_array(), m_positions.get_array(),
00917 //                              m_dict_weights.get_array(), m_dict_weights.get_dim1()*m_dict_weights.get_dim2(),
00918 //                              use_orf) ;
00919 //
00920     m_step=9 ;
00921 }
00922 
00923 void CDynProg::best_path_deriv_call() 
00924 {
00925     //if (m_step!=8)
00926         //SG_ERROR( "please call best_path_set_dict_weights first\n") ;
00927     //if (m_call!=1)
00928         //SG_ERROR( "please call best_path_set_orf_info first\n") ;
00929     ASSERT(N==m_seq.get_dim1()) ;
00930     ASSERT(m_seq.get_dim2()==m_pos.get_dim1()) ;
00931 
00932     m_call=5 ; // or so ...
00933 
00934     m_my_scores.resize_array(m_my_state_seq.get_array_size()) ;
00935     m_my_losses.resize_array(m_my_state_seq.get_array_size()) ;
00936 
00937     best_path_trans_deriv(m_my_state_seq.get_array(), m_my_pos_seq.get_array(), 
00938                           m_my_scores.get_array(), m_my_losses.get_array(), m_my_state_seq.get_array_size(),
00939                           m_seq.get_array(), m_seq.get_dim2(), m_pos.get_array(), 
00940                           m_PEN.get_array(), m_PEN_state_signals.get_array(), m_PEN_state_signals.get_dim2(), m_genestr.get_dim2()) ;
00941 
00942     m_step=12 ;
00943 }
00944 
00945 
00946 void CDynProg::best_path_2struct_call(INT nbest) 
00947 {
00948     if (m_step!=8)
00949         SG_ERROR( "please call best_path_set_orf_dict_weights first\n") ;
00950     if (m_call!=2)
00951         SG_ERROR( "please call best_path_set_segment_sum_weights first\n") ;
00952     ASSERT(N==m_seq.get_dim1()) ;
00953     ASSERT(m_seq.get_dim2()==m_pos.get_dim1()) ;
00954     
00955     m_scores.resize_array(nbest) ;
00956     m_states.resize_array(nbest, m_seq.get_dim2()) ;
00957     m_positions.resize_array(nbest, m_seq.get_dim2()) ;
00958 
00959     m_call=2 ;
00960 
00961     best_path_2struct(m_seq.get_array(), m_seq.get_dim2(), m_pos.get_array(), 
00962                       m_PEN.get_array(), 
00963                       m_genestr.get_array(), m_genestr.get_dim1(),
00964                       nbest, 
00965                       m_scores.get_array(), m_states.get_array(), m_positions.get_array(),
00966                       m_dict_weights.get_array(), m_dict_weights.get_dim1()*m_dict_weights.get_dim2(), 
00967                       m_segment_sum_weights.get_array()) ;
00968 
00969     m_step=9 ;
00970 }
00971 
00972 void CDynProg::best_path_simple_call(INT nbest) 
00973 {
00974     if (m_step!=2)
00975         SG_ERROR( "please call best_path_set_seq first\n") ;
00976     if (m_call!=3)
00977         SG_ERROR( "please call best_path_set_seq first\n") ;
00978     ASSERT(N==m_seq.get_dim1()) ;
00979 
00980     m_scores.resize_array(nbest) ;
00981     m_states.resize_array(nbest, m_seq.get_dim2()) ;
00982 
00983     m_call=3 ;
00984 
00985     best_path_trans_simple(m_seq.get_array(), m_seq.get_dim2(), 
00986                            nbest, 
00987                            m_scores.get_array(), m_states.get_array()) ;
00988     
00989     m_step=9 ;
00990 }
00991 
00992 void CDynProg::best_path_deriv_call(INT nbest)
00993 {
00994     if (!svm_arrays_clean)
00995     {
00996         SG_ERROR( "SVM arrays not clean") ;
00997         return ;
00998     } ;
00999 
01000     //FIXME
01001 }
01002 
01003 
01004 void CDynProg::best_path_get_scores(DREAL **scores, INT *m) 
01005 {
01006     if (m_step!=9 && m_step!=12)
01007         SG_ERROR( "please call best_path*_call first\n") ;
01008 
01009     if (m_step==9)
01010     {
01011         *scores=m_scores.get_array() ;
01012         *m=m_scores.get_dim1() ;
01013     } else
01014     {
01015         *scores=m_my_scores.get_array() ;
01016         *m=m_my_scores.get_dim1() ;
01017     }
01018 
01019     m_step=10 ;
01020 }
01021 
01022 void CDynProg::best_path_get_states(INT **states, INT *m, INT *n) 
01023 {
01024     if (m_step!=10)
01025         SG_ERROR( "please call best_path_get_score first\n") ;
01026     
01027     *states=m_states.get_array() ;
01028     *m=m_states.get_dim1() ;
01029     *n=m_states.get_dim2() ;
01030 
01031     m_step=11 ;
01032 }
01033 
01034 void CDynProg::best_path_get_positions(INT **positions, INT *m, INT *n) 
01035 {
01036     if (m_step!=11)
01037         SG_ERROR( "please call best_path_get_positions first\n") ;
01038     if (m_call==3)
01039         SG_ERROR( "no position information for best_path_simple\n") ;
01040     
01041     *positions=m_positions.get_array() ;
01042     *m=m_positions.get_dim1() ;
01043     *n=m_positions.get_dim2() ;
01044 }
01045 
01046 void CDynProg::best_path_get_losses(DREAL** losses, INT* seq_len)
01047 {
01048     ASSERT(losses && seq_len);
01049     *losses=m_my_losses.get_array();
01050     *seq_len=m_my_losses.get_dim1();
01051 }
01052 
01053 
01055 
01056 DREAL CDynProg::best_path_no_b(INT max_iter, INT &best_iter, INT *my_path)
01057 {
01058     CArray2<T_STATES> psi(max_iter, N) ;
01059     CArray<DREAL>* delta = new CArray<DREAL>(N) ;
01060     CArray<DREAL>* delta_new = new CArray<DREAL>(N) ;
01061     
01062     { // initialization
01063         for (INT i=0; i<N; i++)
01064         {
01065             delta->element(i) = get_p(i) ;
01066             psi.element(0, i)= 0 ;
01067         }
01068     } 
01069     
01070     DREAL best_iter_prob = CMath::ALMOST_NEG_INFTY ;
01071     best_iter = 0 ;
01072     
01073     // recursion
01074     for (INT t=1; t<max_iter; t++)
01075     {
01076         CArray<DREAL>* dummy;
01077         INT NN=N ;
01078         for (INT j=0; j<NN; j++)
01079         {
01080             DREAL maxj = delta->element(0) + transition_matrix_a.element(0,j);
01081             INT argmax=0;
01082             
01083             for (INT i=1; i<NN; i++)
01084             {
01085                 DREAL temp = delta->element(i) + transition_matrix_a.element(i,j);
01086                 
01087                 if (temp>maxj)
01088                 {
01089                     maxj=temp;
01090                     argmax=i;
01091                 }
01092             }
01093             delta_new->element(j)=maxj ;
01094             psi.element(t, j)=argmax ;
01095         }
01096         
01097         dummy=delta;
01098         delta=delta_new;
01099         delta_new=dummy;    //switch delta/delta_new
01100         
01101         { //termination
01102             DREAL maxj=delta->element(0)+get_q(0);
01103             INT argmax=0;
01104             
01105             for (INT i=1; i<N; i++)
01106             {
01107                 DREAL temp=delta->element(i)+get_q(i);
01108                 
01109                 if (temp>maxj)
01110                 {
01111                     maxj=temp;
01112                     argmax=i;
01113                 }
01114             }
01115             //pat_prob=maxj;
01116             
01117             if (maxj>best_iter_prob)
01118             {
01119                 my_path[t]=argmax;
01120                 best_iter=t ;
01121                 best_iter_prob = maxj ;
01122             } ;
01123         } ;
01124     }
01125 
01126     
01127     { //state sequence backtracking
01128         for (INT t = best_iter; t>0; t--)
01129         {
01130             my_path[t-1]=psi.element(t, my_path[t]);
01131         }
01132     }
01133 
01134     delete delta ;
01135     delete delta_new ;
01136     
01137     return best_iter_prob ;
01138 }
01139 
01140 void CDynProg::best_path_no_b_trans(INT max_iter, INT &max_best_iter, short int nbest, DREAL *prob_nbest, INT *my_paths)
01141 {
01142     //T_STATES *psi=new T_STATES[max_iter*N*nbest] ;
01143     CArray3<T_STATES> psi(max_iter, N, nbest) ;
01144     CArray3<short int> ktable(max_iter, N, nbest) ;
01145     CArray2<short int> ktable_ends(max_iter, nbest) ;
01146 
01147     CArray<DREAL> tempvv(nbest*N) ;
01148     CArray<INT> tempii(nbest*N) ;
01149 
01150     CArray2<T_STATES> path_ends(max_iter, nbest) ;
01151     CArray2<DREAL> *delta=new CArray2<DREAL>(N, nbest) ;
01152     CArray2<DREAL> *delta_new=new CArray2<DREAL>(N, nbest) ;
01153     CArray2<DREAL> delta_end(max_iter, nbest) ;
01154 
01155     CArray2<INT> paths(max_iter, nbest) ;
01156     paths.set_array(my_paths, max_iter, nbest, false, false) ;
01157 
01158     { // initialization
01159         for (T_STATES i=0; i<N; i++)
01160         {
01161             delta->element(i,0) = get_p(i) ;
01162             for (short int k=1; k<nbest; k++)
01163             {
01164                 delta->element(i,k)=-CMath::INFTY ;
01165                 ktable.element(0,i,k)=0 ;
01166             }
01167         }
01168     }
01169     
01170     // recursion
01171     for (INT t=1; t<max_iter; t++)
01172     {
01173         CArray2<DREAL>* dummy=NULL;
01174 
01175         for (T_STATES j=0; j<N; j++)
01176         {
01177             const T_STATES num_elem   = trans_list_forward_cnt[j] ;
01178             const T_STATES *elem_list = trans_list_forward[j] ;
01179             const DREAL *elem_val = trans_list_forward_val[j] ;
01180             
01181             INT list_len=0 ;
01182             for (short int diff=0; diff<nbest; diff++)
01183             {
01184                 for (INT i=0; i<num_elem; i++)
01185                 {
01186                     T_STATES ii = elem_list[i] ;
01187                     
01188                     tempvv.element(list_len) = -(delta->element(ii,diff) + elem_val[i]) ;
01189                     tempii.element(list_len) = diff*N + ii ;
01190                     list_len++ ;
01191                 }
01192             }
01193             CMath::qsort_index(tempvv.get_array(), tempii.get_array(), list_len) ;
01194             
01195             for (short int k=0; k<nbest; k++)
01196             {
01197                 if (k<list_len)
01198                 {
01199                     delta_new->element(j,k)  = -tempvv[k] ;
01200                     psi.element(t,j,k)      = (tempii[k]%N) ;
01201                     ktable.element(t,j,k)   = (tempii[k]-(tempii[k]%N))/N ;
01202                 }
01203                 else
01204                 {
01205                     delta_new->element(j,k)  = -CMath::INFTY ;
01206                     psi.element(t,j,k)      = 0 ;
01207                     ktable.element(t,j,k)   = 0 ;
01208                 }
01209             }
01210         }
01211         
01212         dummy=delta;
01213         delta=delta_new;
01214         delta_new=dummy;    //switch delta/delta_new
01215         
01216         { //termination
01217             INT list_len = 0 ;
01218             for (short int diff=0; diff<nbest; diff++)
01219             {
01220                 for (T_STATES i=0; i<N; i++)
01221                 {
01222                     tempvv.element(list_len) = -(delta->element(i,diff)+get_q(i));
01223                     tempii.element(list_len) = diff*N + i ;
01224                     list_len++ ;
01225                 }
01226             }
01227             CMath::qsort_index(tempvv.get_array(), tempii.get_array(), list_len) ;
01228             
01229             for (short int k=0; k<nbest; k++)
01230             {
01231                 delta_end.element(t-1,k) = -tempvv[k] ;
01232                 path_ends.element(t-1,k) = (tempii[k]%N) ;
01233                 ktable_ends.element(t-1,k) = (tempii[k]-(tempii[k]%N))/N ;
01234             }
01235         }
01236     }
01237     
01238     { //state sequence backtracking
01239         max_best_iter=0 ;
01240         
01241         CArray<DREAL> sort_delta_end(max_iter*nbest) ;
01242         CArray<short int> sort_k(max_iter*nbest) ;
01243         CArray<INT> sort_t(max_iter*nbest) ;
01244         CArray<INT> sort_idx(max_iter*nbest) ;
01245         
01246         INT i=0 ;
01247         for (INT iter=0; iter<max_iter-1; iter++)
01248             for (short int k=0; k<nbest; k++)
01249             {
01250                 sort_delta_end[i]=-delta_end.element(iter,k) ;
01251                 sort_k[i]=k ;
01252                 sort_t[i]=iter+1 ;
01253                 sort_idx[i]=i ;
01254                 i++ ;
01255             }
01256         
01257         CMath::qsort_index(sort_delta_end.get_array(), sort_idx.get_array(), (max_iter-1)*nbest) ;
01258 
01259         for (short int n=0; n<nbest; n++)
01260         {
01261             short int k=sort_k[sort_idx[n]] ;
01262             INT iter=sort_t[sort_idx[n]] ;
01263             prob_nbest[n]=-sort_delta_end[n] ;
01264 
01265             if (iter>max_best_iter)
01266                 max_best_iter=iter ;
01267             
01268             ASSERT(k<nbest) ;
01269             ASSERT(iter<max_iter) ;
01270             
01271             paths.element(iter,n) = path_ends.element(iter-1, k) ;
01272             short int q   = ktable_ends.element(iter-1, k) ;
01273             
01274             for (INT t = iter; t>0; t--)
01275             {
01276                 paths.element(t-1,n)=psi.element(t, paths.element(t,n), q);
01277                 q = ktable.element(t, paths.element(t,n), q) ;
01278             }
01279         }
01280     }
01281 
01282     delete delta ;
01283     delete delta_new ;
01284 }
01285 
01286 
01287 void CDynProg::translate_from_single_order(WORD* obs, INT sequence_length, 
01288                                            INT start, INT order, 
01289                                            INT max_val)
01290 {
01291     INT i,j;
01292     WORD value=0;
01293     
01294     for (i=sequence_length-1; i>= ((int) order)-1; i--) //convert interval of size T
01295     {
01296         value=0;
01297         for (j=i; j>=i-((int) order)+1; j--)
01298             value= (value >> max_val) | (obs[j] << (max_val * (order-1)));
01299         
01300         obs[i]= (WORD) value;
01301     }
01302     
01303     for (i=order-2;i>=0;i--)
01304     {
01305         value=0;
01306         for (j=i; j>=i-order+1; j--)
01307         {
01308             value= (value >> max_val);
01309             if (j>=0)
01310                 value|=obs[j] << (max_val * (order-1));
01311         }
01312         obs[i]=value;
01313         //ASSERT(value<num_words) ;
01314     }
01315     if (start>0)
01316         for (i=start; i<sequence_length; i++)   
01317             obs[i-start]=obs[i];
01318 }
01319 
01320 void CDynProg::reset_svm_value(INT pos, INT & last_svm_pos, DREAL * svm_value) 
01321 {
01322     for (int i=0; i<num_words_single; i++)
01323         word_used_single[i]=false ;
01324     for (INT s=0; s<num_svms; s++)
01325         svm_value_unnormalized_single[s] = 0 ;
01326     for (INT s=0; s<num_svms; s++)
01327         svm_value[s] = 0 ;
01328     last_svm_pos = pos - 6+1 ;
01329     num_unique_words_single=0 ;
01330 }
01331 
01332 void CDynProg::extend_svm_value(WORD* wordstr, INT pos, INT &last_svm_pos, DREAL* svm_value) 
01333 {
01334     bool did_something = false ;
01335     for (int i=last_svm_pos-1; (i>=pos) && (i>=0); i--)
01336     {
01337         if (wordstr[i]>=num_words_single)
01338             SG_DEBUG( "wordstr[%i]=%i\n", i, wordstr[i]) ;
01339         
01340         if (!word_used_single[wordstr[i]])
01341         {
01342             for (INT s=0; s<num_svms_single; s++)
01343                 svm_value_unnormalized_single[s]+=dict_weights.element(wordstr[i],s) ;
01344             
01345             word_used_single[wordstr[i]]=true ;
01346             num_unique_words_single++ ;
01347             did_something=true ;
01348         }
01349     } ;
01350     if (num_unique_words_single>0)
01351     {
01352         last_svm_pos=pos ;
01353         if (did_something)
01354             for (INT s=0; s<num_svms; s++)
01355                 svm_value[s]= svm_value_unnormalized_single[s]/sqrt((double)num_unique_words_single) ;  // full normalization
01356     }
01357     else
01358     {
01359         // what should I do?
01360         for (INT s=0; s<num_svms; s++)
01361             svm_value[s]=0 ;
01362     }
01363     
01364 }
01365 
01366 
01367 void CDynProg::reset_segment_sum_value(INT num_states, INT pos, INT & last_segment_sum_pos, DREAL * segment_sum_value) 
01368 {
01369     for (INT s=0; s<num_states; s++)
01370         segment_sum_value[s] = 0 ;
01371     last_segment_sum_pos = pos ;
01372     //SG_DEBUG( "start: %i\n", pos) ;
01373 }
01374 
01375 void CDynProg::extend_segment_sum_value(DREAL *segment_sum_weights, INT seqlen, INT num_states,
01376                               INT pos, INT &last_segment_sum_pos, DREAL* segment_sum_value) 
01377 {
01378     for (int i=last_segment_sum_pos-1; (i>=pos) && (i>=0); i--)
01379     {
01380         for (INT s=0; s<num_states; s++)
01381             segment_sum_value[s] += segment_sum_weights[i*num_states+s] ;
01382     } ;
01383     //SG_DEBUG( "extend %i: %f\n", pos, segment_sum_value[0]) ;
01384     last_segment_sum_pos = pos ;
01385 }
01386 
01387 
01388 void CDynProg::best_path_2struct(const DREAL *seq_array, INT seq_len, const INT *pos,
01389                              CPlifBase **Plif_matrix, 
01390                              const char *genestr, INT genestr_len,
01391                              short int nbest, 
01392                              DREAL *prob_nbest, INT *my_state_seq, INT *my_pos_seq,
01393                              DREAL *dictionary_weights, INT dict_len, DREAL *segment_sum_weights)
01394 {
01395     const INT default_look_back = 100 ;
01396     INT max_look_back = default_look_back ;
01397     bool use_svm = false ;
01398     ASSERT(dict_len==num_svms*num_words_single) ;
01399     dict_weights.set_array(dictionary_weights, dict_len, num_svms, false, false) ;
01400     dict_weights_array=dict_weights.get_array() ;
01401 
01402     CArray2<CPlifBase*> PEN(Plif_matrix, N, N, false) ;
01403     CArray2<DREAL> seq((DREAL *)seq_array, N, seq_len, false) ;
01404     
01405     DREAL svm_value[num_svms] ;
01406     DREAL segment_sum_value[N] ;
01407     
01408     { // initialize svm_svalue
01409         for (INT s=0; s<num_svms; s++)
01410             svm_value[s]=0 ;
01411     }
01412     
01413     { // determine maximal length of look-back
01414         for (INT i=0; i<N; i++)
01415             for (INT j=0; j<N; j++)
01416             {
01417                 CPlifBase *penij=PEN.element(i,j) ;
01418                 if (penij==NULL)
01419                     continue ;
01420                 if (penij->get_max_value()>max_look_back)
01421                     max_look_back=(INT) CMath::ceil(penij->get_max_value());
01422                 if (penij->uses_svm_values())
01423                     use_svm=true ;
01424             }
01425     }
01426     max_look_back = CMath::min(genestr_len, max_look_back) ;
01427     //SG_DEBUG("use_svm=%i\n", use_svm) ;
01428     //SG_DEBUG("max_look_back=%i\n", max_look_back) ;
01429     
01430     const INT look_back_buflen = (max_look_back+1)*nbest*N ;
01431     //SG_DEBUG("look_back_buflen=%i\n", look_back_buflen) ;
01432     const DREAL mem_use = (DREAL)(seq_len*N*nbest*(sizeof(T_STATES)+sizeof(short int)+sizeof(INT))+
01433                                 look_back_buflen*(2*sizeof(DREAL)+sizeof(INT))+
01434                                 seq_len*(sizeof(T_STATES)+sizeof(INT))+
01435                                 genestr_len*sizeof(bool))/(1024*1024)
01436          ;
01437     bool is_big = (mem_use>200) || (seq_len>5000) ;
01438     
01439     if (is_big)
01440     {
01441         SG_DEBUG("calling best_path_2struct: seq_len=%i, N=%i, lookback=%i nbest=%i\n", 
01442                      seq_len, N, max_look_back, nbest) ;
01443         SG_DEBUG("allocating %1.2fMB of memory\n", 
01444                      mem_use) ;
01445     }
01446     ASSERT(nbest<32000) ;
01447         
01448     CArray3<DREAL> delta(max_look_back+1, N, nbest) ;
01449     CArray3<T_STATES> psi(seq_len,N,nbest) ;
01450     CArray3<short int> ktable(seq_len,N,nbest) ;
01451     CArray3<INT> ptable(seq_len,N,nbest) ;
01452 
01453     CArray<DREAL> delta_end(nbest) ;
01454     CArray<T_STATES> path_ends(nbest) ;
01455     CArray<short int> ktable_end(nbest) ;
01456 
01457     CArray<DREAL> tempvv(look_back_buflen) ;
01458     CArray<INT> tempii(look_back_buflen) ;
01459 
01460     CArray<T_STATES> state_seq(seq_len) ;
01461     CArray<INT> pos_seq(seq_len) ;
01462 
01463     // translate to words, if svm is used
01464     WORD* wordstr=NULL ;
01465     if (use_svm)
01466     {
01467         ASSERT(dictionary_weights!=NULL) ;
01468         wordstr=new WORD[genestr_len] ;
01469         for (INT i=0; i<genestr_len; i++)
01470             switch (genestr[i])
01471             {
01472             case 'A':
01473             case 'a': wordstr[i]=0 ; break ;
01474             case 'C':
01475             case 'c': wordstr[i]=1 ; break ;
01476             case 'G':
01477             case 'g': wordstr[i]=2 ; break ;
01478             case 'T':
01479             case 't': wordstr[i]=3 ; break ;
01480             default: ASSERT(0) ;
01481             }
01482         translate_from_single_order(wordstr, genestr_len, word_degree_single-1, word_degree_single) ;
01483     }
01484     
01485     
01486     { // initialization
01487         for (T_STATES i=0; i<N; i++)
01488         {
01489             delta.element(0,i,0) = get_p(i) + seq.element(i,0) ;
01490             psi.element(0,i,0)   = 0 ;
01491             ktable.element(0,i,0)  = 0 ;
01492             ptable.element(0,i,0)  = 0 ;
01493             for (short int k=1; k<nbest; k++)
01494             {
01495                 delta.element(0,i,k)    = -CMath::INFTY ;
01496                 psi.element(0,i,0)      = 0 ;
01497                 ktable.element(0,i,k)     = 0 ;
01498                 ptable.element(0,i,k)     = 0 ;
01499             }
01500         }
01501     }
01502 
01503     // recursion
01504     for (INT t=1; t<seq_len; t++)
01505     {
01506         if (is_big && t%(seq_len/10000)==1)
01507             SG_PROGRESS(t, 0, seq_len);
01508         //fprintf(stderr, "%i\n", t) ;
01509         
01510         for (T_STATES j=0; j<N; j++)
01511         {
01512             if (seq.element(j,t)<-1e20)
01513             { // if we cannot observe the symbol here, then we can omit the rest
01514                 for (short int k=0; k<nbest; k++)
01515                 {
01516                     delta.element(t%max_look_back,j,k)    = seq.element(j,t) ;
01517                     psi.element(t,j,k)      = 0 ;
01518                     ktable.element(t,j,k)     = 0 ;
01519                     ptable.element(t,j,k)     = 0 ;
01520                 }
01521             }
01522             else
01523             {
01524                 const T_STATES num_elem   = trans_list_forward_cnt[j] ;
01525                 const T_STATES *elem_list = trans_list_forward[j] ;
01526                 const DREAL *elem_val      = trans_list_forward_val[j] ;
01527                 
01528                 INT list_len=0 ;
01529                 for (INT i=0; i<num_elem; i++)
01530                 {
01531                     T_STATES ii = elem_list[i] ;
01532                     //SG_DEBUG( "i=%i  ii=%i  num_elem=%i  PEN=%ld\n", i, ii, num_elem, PEN(j,ii)) ;
01533                     
01534                     const CPlifBase * penalty = PEN.element(j,ii) ;
01535                     INT look_back = default_look_back ;
01536                     if (penalty!=NULL)
01537                         look_back=(INT) (CMath::ceil(penalty->get_max_value()));
01538                     
01539                     INT last_svm_pos ;
01540                     if (use_svm)
01541                         reset_svm_value(pos[t], last_svm_pos, svm_value) ;
01542 
01543                     INT last_segment_sum_pos ;
01544                     reset_segment_sum_value(N, pos[t], last_segment_sum_pos, segment_sum_value) ;
01545 
01546                     for (INT ts=t-1; ts>=0 && pos[t]-pos[ts]<=look_back; ts--)
01547                     {
01548                         if (use_svm)
01549                             extend_svm_value(wordstr, pos[ts], last_svm_pos, svm_value) ;
01550 
01551                         extend_segment_sum_value(segment_sum_weights, seq_len, N, pos[ts], last_segment_sum_pos, segment_sum_value) ;
01552                         
01553                         DREAL pen_val = 0.0 ;
01554                         if (penalty)
01555                             pen_val=penalty->lookup_penalty(pos[t]-pos[ts], svm_value) + segment_sum_value[j] ;
01556                         for (short int diff=0; diff<nbest; diff++)
01557                         {
01558                             DREAL  val        = delta.element(ts%max_look_back,ii,diff) + elem_val[i] ;
01559                             val             += pen_val ;
01560                             
01561                             tempvv[list_len] = -val ;
01562                             tempii[list_len] =  ii + diff*N + ts*N*nbest;
01563                             //SG_DEBUG( "%i (%i,%i,%i, %i, %i) ", list_len, diff, ts, i, pos[t]-pos[ts], look_back) ;
01564                             list_len++ ;
01565                         }
01566                     }
01567                 }
01568                 CMath::nmin<INT>(tempvv.get_array(), tempii.get_array(), list_len, nbest) ;
01569                 
01570                 for (short int k=0; k<nbest; k++)
01571                 {
01572                     if (k<list_len)
01573                     {
01574                         delta.element(t%max_look_back,j,k)    = -tempvv[k] + seq.element(j,t);
01575                         psi.element(t,j,k)      = (tempii[k]%N) ;
01576                         ktable.element(t,j,k)     = (tempii[k]%(N*nbest)-psi.element(t,j,k))/N ;
01577                         ptable.element(t,j,k)     = (tempii[k]-(tempii[k]%(N*nbest)))/(N*nbest) ;
01578                     }
01579                     else
01580                     {
01581                         delta.element(t%max_look_back,j,k)    = -CMath::INFTY ;
01582                         psi.element(t,j,k)      = 0 ;
01583                         ktable.element(t,j,k)     = 0 ;
01584                         ptable.element(t,j,k)     = 0 ;
01585                     }
01586                 }
01587             }
01588         }
01589     }
01590     
01591     { //termination
01592         INT list_len = 0 ;
01593         for (short int diff=0; diff<nbest; diff++)
01594         {
01595             for (T_STATES i=0; i<N; i++)
01596             {
01597                 tempvv[list_len] = -(delta.element((seq_len-1)%max_look_back,i,diff)+get_q(i)) ;
01598                 tempii[list_len] = i + diff*N ;
01599                 list_len++ ;
01600             }
01601         }
01602         
01603         CMath::nmin(tempvv.get_array(), tempii.get_array(), list_len, nbest) ;
01604         
01605         for (short int k=0; k<nbest; k++)
01606         {
01607             delta_end.element(k) = -tempvv[k] ;
01608             path_ends.element(k) = (tempii[k]%N) ;
01609             ktable_end.element(k) = (tempii[k]-path_ends.element(k))/N ;
01610         }
01611     }
01612     
01613     { //state sequence backtracking     
01614         for (short int k=0; k<nbest; k++)
01615         {
01616             prob_nbest[k]= delta_end.element(k) ;
01617             
01618             INT i         = 0 ;
01619             state_seq[i]  = path_ends.element(k) ;
01620             short int q   = ktable_end.element(k) ;
01621             pos_seq[i]    = seq_len-1 ;
01622 
01623             while (pos_seq[i]>0)
01624             {
01625                 //SG_DEBUG("s=%i p=%i q=%i\n", state_seq[i], pos_seq[i], q) ;
01626                 state_seq[i+1] = psi.element(pos_seq[i], state_seq[i], q);
01627                 pos_seq[i+1]   = ptable.element(pos_seq[i], state_seq[i], q) ;
01628                 q              = ktable.element(pos_seq[i], state_seq[i], q) ;
01629                 i++ ;
01630             }
01631             //SG_DEBUG("s=%i p=%i q=%i\n", state_seq[i], pos_seq[i], q) ;
01632             INT num_states = i+1 ;
01633             for (i=0; i<num_states;i++)
01634             {
01635                 my_state_seq[i+k*(seq_len+1)] = state_seq[num_states-i-1] ;
01636                 my_pos_seq[i+k*(seq_len+1)]   = pos_seq[num_states-i-1] ;
01637             }
01638             my_state_seq[num_states+k*(seq_len+1)]=-1 ;
01639             my_pos_seq[num_states+k*(seq_len+1)]=-1 ;
01640         }
01641     }
01642     if (is_big)
01643         SG_PRINT( "DONE.     \n") ;
01644 }
01645 
01646 /*void CDynProg::reset_svm_values(INT pos, INT * last_svm_pos, DREAL * svm_value) 
01647 {
01648     for (INT j=0; j<num_degrees; j++)
01649     {
01650         for (INT i=0; i<num_words_array[j]; i++)
01651             word_used.element(word_used_array, j, i, num_degrees)=false ;
01652         for (INT s=0; s<num_svms; s++)
01653             svm_values_unnormalized.element(j,s) = 0 ;
01654         num_unique_words[j]=0 ;
01655         last_svm_pos[j] = pos - word_degree[j]+1 ;
01656         svm_pos_start[j] = pos - word_degree[j] ;
01657     }
01658     for (INT s=0; s<num_svms; s++)
01659         svm_value[s] = 0 ;
01660 }
01661 
01662 void CDynProg::extend_svm_values(WORD** wordstr, INT pos, INT *last_svm_pos, DREAL* svm_value) 
01663 {
01664     bool did_something = false ;
01665     for (INT j=0; j<num_degrees; j++)
01666     {
01667         for (int i=last_svm_pos[j]-1; (i>=pos) && (i>=0); i--)
01668         {
01669             if (wordstr[j][i]>=num_words_array[j])
01670                 SG_DEBUG( "wordstr[%i]=%i\n", i, wordstr[j][i]) ;
01671 
01672             ASSERT(wordstr[j][i]<num_words_array[j]) ;
01673             if (!word_used.element(word_used_array, j, wordstr[j][i], num_degrees))
01674             {
01675                 for (INT s=0; s<num_svms; s++)
01676                     svm_values_unnormalized.element(j,s)+=dict_weights_array[wordstr[j][i]+cum_num_words_array[j]+s*cum_num_words_array[num_degrees]] ;
01677                     //svm_values_unnormalized.element(j,s)+=dict_weights.element(wordstr[j][i]+cum_num_words_array[j],s) ;
01678                 
01679                 //word_used.element(j,wordstr[j][i])=true ;
01680                 word_used.element(word_used_array, j, wordstr[j][i], num_degrees)=true ;
01681                 num_unique_words[j]++ ;
01682                 did_something=true ;
01683             } ;
01684         } ;
01685         if (num_unique_words[j]>0)
01686             last_svm_pos[j]=pos ;
01687     } ;
01688     
01689     if (did_something)
01690         for (INT s=0; s<num_svms; s++)
01691         {
01692             svm_value[s]=0.0 ;
01693             for (INT j=0; j<num_degrees; j++)
01694                 if (num_unique_words[j]>0)
01695                     svm_value[s]+= svm_values_unnormalized.element(j,s)/sqrt((double)num_unique_words[j]) ;  // full normalization
01696         }
01697 }
01698 */
01699 
01700 void CDynProg::init_segment_loss(struct segment_loss_struct & loss, INT seqlen, INT howmuchlookback)
01701 {
01702 #ifdef DYNPROG_TIMING
01703     MyTime.start() ;
01704 #endif
01705     INT clear_size = CMath::min(howmuchlookback,seqlen) ;
01706     
01707     if (!loss.num_segment_id)
01708     {
01709         loss.segments_changed       = new INT[seqlen] ;
01710         loss.num_segment_id         = new DREAL[(max_a_id+1)*seqlen] ;
01711         loss.length_segment_id      = new INT[(max_a_id+1)*seqlen] ;
01712 
01713         clear_size = seqlen ;
01714     }
01715     
01716     for (INT j=0; j<clear_size; j++)
01717     {
01718         loss.segments_changed[j]=0 ;
01719         for (INT i=0; i<max_a_id+1; i++)       
01720         {
01721             loss.num_segment_id[i*seqlen+j] = 0;
01722             loss.length_segment_id[i*seqlen+j] = 0;
01723         }
01724     }
01725 
01726     loss.maxlookback = howmuchlookback ;
01727     loss.seqlen = seqlen;
01728 
01729 #ifdef DYNPROG_TIMING
01730     MyTime.stop() ;
01731     segment_init_time += MyTime.time_diff_sec() ;
01732 #endif
01733 }
01734 
01735 void CDynProg::clear_segment_loss(struct segment_loss_struct & loss) 
01736 {
01737 #ifdef DYNPROG_TIMING
01738     MyTime.start() ;
01739 #endif
01740     
01741     if (loss.num_segment_id != NULL)
01742     {
01743         delete[] loss.segments_changed ;
01744         delete[] loss.num_segment_id ;
01745         delete[] loss.length_segment_id ;
01746         loss.segments_changed = NULL ;
01747         loss.num_segment_id = NULL ;
01748         loss.length_segment_id = NULL ;
01749     }
01750 #ifdef DYNPROG_TIMING
01751     MyTime.stop() ;
01752     segment_clean_time += MyTime.time_diff_sec() ;
01753 #endif
01754 }
01755 
01756 DREAL CDynProg::extend_segment_loss(struct segment_loss_struct & loss, const INT * pos_array, INT segment_id, INT pos, INT & last_pos, DREAL &last_value) 
01757 {
01758 #ifdef DYNPROG_TIMING
01759     MyTime.start() ;
01760 #endif
01761     
01762     if (pos==last_pos)
01763         return last_value ;
01764     ASSERT(pos<last_pos) ;
01765 
01766     last_pos-- ;
01767     bool changed = false ;
01768     while (last_pos>=pos)
01769     {
01770         if (loss.segments_changed[last_pos])
01771         {
01772             changed=true ;
01773             break ;
01774         }
01775         last_pos-- ;
01776     }
01777     if (last_pos<pos)
01778         last_pos = pos ;
01779     
01780     if (!changed)
01781     {
01782         ASSERT(last_pos>=0) ;
01783         ASSERT(last_pos<loss.seqlen) ;
01784         DREAL length_contrib = (pos_array[last_pos]-pos_array[pos])*m_segment_loss.element(m_segment_ids.element(pos), segment_id, 1) ;
01785         DREAL ret = last_value + length_contrib ;
01786         last_pos = pos ;
01787         return ret ;
01788     }
01789 
01790     CArray2<DREAL> num_segment_id(loss.num_segment_id, loss.seqlen, max_a_id+1, false, false) ;
01791     CArray2<INT> length_segment_id(loss.length_segment_id, loss.seqlen, max_a_id+1, false, false) ;
01792     DREAL ret = 0.0 ;
01793     for (INT i=0; i<max_a_id+1; i++)
01794     {
01795         //SG_DEBUG( "%i: %i, %i, %f (%f), %f (%f)\n", pos, num_segment_id.element(pos, i), length_segment_id.element(pos, i), num_segment_id.element(pos, i)*m_segment_loss.element(i, segment_id,0), m_segment_loss.element(i, segment_id, 0), length_segment_id.element(pos, i)*m_segment_loss.element(i, segment_id, 1), m_segment_loss.element(i, segment_id,1)) ;
01796 
01797         if (num_segment_id.element(pos, i)!=0)
01798         {
01799             ret += num_segment_id.element(pos, i)*m_segment_loss.element(i, segment_id, 0) ;
01800         //  SG_PRINT("ret:%f pos:%i i:%i segment_id:%i \n",ret,pos,i,segment_id);
01801         //  if (ret>0)
01802         //  {
01803         //      for (INT g=0; g<max_a_id+1; g++)
01804         //          SG_PRINT("g:%i sid(pos, g):%i    ",g,num_segment_id.element(pos, g));
01805         //      SG_PRINT("\n");
01806         //  }
01807         }
01808         if (length_segment_id.element(pos, i)!=0)
01809             ret += length_segment_id.element(pos, i)*m_segment_loss.element(i, segment_id, 1) ;
01810     }
01811     last_pos = pos ;
01812     last_value = ret ;
01813 
01814 #ifdef DYNPROG_TIMING
01815     MyTime.stop() ;
01816     segment_extend_time += MyTime.time_diff_sec() ;
01817 #endif
01818     return ret ;
01819 }
01820 
01821 void CDynProg::find_segment_loss_till_pos(const INT * pos, INT t_end, CArray<INT>& segment_ids,  CArray<DREAL>& segment_mask, struct segment_loss_struct & loss) 
01822 {
01823 #ifdef DYNPROG_TIMING
01824     MyTime.start() ;
01825 #endif
01826     CArray2<DREAL> num_segment_id(loss.num_segment_id, loss.seqlen, max_a_id+1, false, false) ;
01827     CArray2<INT> length_segment_id(loss.length_segment_id, loss.seqlen, max_a_id+1, false, false) ;
01828     
01829     for (INT i=0; i<max_a_id+1; i++)
01830     {
01831         num_segment_id.element(t_end, i) = 0 ;
01832         length_segment_id.element(t_end, i) = 0 ;
01833     }
01834     INT wobble_pos_segment_id_switch = 0 ;
01835     INT last_segment_id = -1 ;
01836     INT ts = t_end-1 ;       
01837     while ((ts>=0) && (pos[t_end] - pos[ts] <= loss.maxlookback))
01838     {
01839         INT cur_segment_id = segment_ids.element(ts) ;
01840         // allow at most one wobble
01841         bool wobble_pos = (CMath::abs(segment_mask.element(ts))<1e-7) && (wobble_pos_segment_id_switch==0) ;
01842         if (!(cur_segment_id<=max_a_id))
01843             SG_ERROR("(cur_segment_id<=max_a_id), cur_segment_id:%i max_a_id:%i\n",cur_segment_id,max_a_id);
01844         ASSERT(cur_segment_id>=0) ;
01845         
01846         for (INT i=0; i<max_a_id+1; i++)
01847         {
01848             num_segment_id.element(ts, i) = num_segment_id.element(ts+1, i) ;
01849             length_segment_id.element(ts, i) = length_segment_id.element(ts+1, i) ;
01850         }
01851         
01852         if (cur_segment_id!=last_segment_id)
01853         {
01854             if (wobble_pos)
01855             {
01856                 //SG_DEBUG( "no change at %i: %i, %i\n", ts, last_segment_id, cur_segment_id) ;
01857                 wobble_pos_segment_id_switch++ ;
01858                 //ASSERT(wobble_pos_segment_id_switch<=1) ;
01859             }
01860             else
01861             {
01862                 //SG_DEBUG( "change at %i: %i, %i\n", ts, last_segment_id, cur_segment_id) ;
01863                 loss.segments_changed[ts] = true ;
01864                 num_segment_id.element(ts, cur_segment_id) += segment_mask.element(ts);
01865                 length_segment_id.element(ts, cur_segment_id) += (INT)((pos[ts+1]-pos[ts])*segment_mask.element(ts));
01866                 wobble_pos_segment_id_switch = 0 ;
01867             }
01868             last_segment_id = cur_segment_id ;
01869         } 
01870         else
01871             if (!wobble_pos)
01872                 length_segment_id.element(ts, cur_segment_id) += pos[ts+1] - pos[ts] ;
01873 
01874         ts--;
01875     }
01876 #ifdef DYNPROG_TIMING
01877     MyTime.stop() ;
01878     segment_pos_time += MyTime.time_diff_sec() ;
01879 #endif
01880 }
01881 
01882 void CDynProg::init_svm_values(struct svm_values_struct & svs, INT start_pos, INT seqlen, INT maxlookback)
01883 {
01884 #ifdef DYNPROG_TIMING
01885     MyTime.start() ;
01886 #endif
01887     /*
01888       See find_svm_values_till_pos for comments
01889       
01890       svs.svm_values[i+s*svs.seqlen] has the value of the s-th SVM on genestr(pos(t_end-i):pos(t_end)) 
01891       for every i satisfying pos(t_end)-pos(t_end-i) <= svs.maxlookback
01892       
01893       where t_end is the end of all segments we are currently looking at
01894     */
01895     INT clear_size = CMath::min(maxlookback,seqlen) ;
01896     
01897     if (!svs.svm_values)
01898     {
01899         svs.svm_values              = new DREAL[seqlen*num_svms] ;
01900         svs.num_unique_words        = new INT*[num_degrees] ;
01901         svs.svm_values_unnormalized = new DREAL*[num_degrees] ;
01902         svs.word_used               = new bool**[num_degrees] ;
01903         for (INT j=0; j<num_degrees; j++)
01904         {
01905             svs.word_used[j]               = new bool*[num_svms] ;
01906             for (INT s=0; s<num_svms; s++)
01907                 svs.word_used[j][s]               = new bool[num_words_array[j]] ;
01908         }
01909         for (INT j=0; j<num_degrees; j++)
01910         {
01911             svs.svm_values_unnormalized[j] = new DREAL[num_svms] ;
01912             svs.num_unique_words[j]        = new INT[num_svms] ;
01913         }
01914         svs.start_pos               = new INT[num_svms] ;
01915         clear_size = seqlen ;
01916     }
01917     
01918     //for (INT i=0; i<maxlookback*num_svms; i++)       // initializing this for safety, though we should be able to live without it
01919     //  svs.svm_values[i] = 0;
01920     memset(svs.svm_values, 0, clear_size*num_svms*sizeof(DREAL)) ;
01921 
01922     for (INT j=0; j<num_degrees; j++)
01923     {       
01924         //for (INT s=0; s<num_svms; s++)
01925         //  svs.svm_values_unnormalized[j][s] = 0 ;
01926         memset(svs.svm_values_unnormalized[j], 0, num_svms*sizeof(DREAL)) ;
01927 
01928         //for (INT s=0; s<num_svms; s++)
01929         //  svs.num_unique_words[j][s] = 0 ;
01930         memset(svs.num_unique_words[j], 0, num_svms*sizeof(INT)) ;
01931     }
01932     
01933     for (INT j=0; j<num_degrees; j++)
01934         for (INT s=0; s<num_svms; s++)
01935         {
01936             //for (INT i=0; i<num_words_array[j]; i++)
01937             //  svs.word_used[j][s][i] = false ;
01938             memset(svs.word_used[j][s], 0, num_words_array[j]*sizeof(bool)) ;
01939         }
01940     
01941     for (INT s=0; s<num_svms; s++)
01942         svs.start_pos[s] = start_pos - mod_words.element(s,1) ;
01943     
01944     svs.maxlookback = maxlookback ;
01945     svs.seqlen = seqlen ;
01946 
01947 #ifdef DYNPROG_TIMING
01948     MyTime.stop() ;
01949     svm_init_time += MyTime.time_diff_sec() ;
01950 #endif
01951 }
01952 
01953 void CDynProg::clear_svm_values(struct svm_values_struct & svs) 
01954 {
01955 #ifdef DYNPROG_TIMING
01956     MyTime.start() ;
01957 #endif  
01958     if (NULL != svs.svm_values)
01959     {
01960         for (INT j=0; j<num_degrees; j++)
01961         {
01962             for (INT s=0; s<num_svms; s++)
01963                 delete[] svs.word_used[j][s] ;
01964             delete[] svs.word_used[j];
01965         }
01966         delete[] svs.word_used;
01967         
01968         for (INT j=0; j<num_degrees; j++)
01969             delete[] svs.svm_values_unnormalized[j] ;
01970         for (INT j=0; j<num_degrees; j++)
01971             delete[] svs.num_unique_words[j] ;
01972         
01973         delete[] svs.svm_values_unnormalized;
01974         delete[] svs.svm_values;
01975         delete[] svs.num_unique_words ;
01976         
01977         svs.word_used=NULL ;
01978         svs.svm_values=NULL ;
01979         svs.svm_values_unnormalized=NULL ;
01980     }
01981 
01982 #ifdef DYNPROG_TIMING
01983     MyTime.stop() ;
01984     svm_clean_time += MyTime.time_diff_sec() ;
01985 #endif
01986 }
01987 
01988 
01989 void CDynProg::find_svm_values_till_pos(WORD*** wordstr,  const INT *pos,  INT t_end, struct svm_values_struct &svs)
01990 {
01991 #ifdef DYNPROG_TIMING
01992     MyTime.start() ;
01993 #endif
01994     
01995     /*
01996       wordstr is a vector of L n-gram indices, with wordstr(i) representing a number betweeen 0 and 4095 
01997       corresponding to the 6-mer in genestr(i-5:i) 
01998       pos is a vector of candidate transition positions (it is input to best_path_trans)
01999       t_end is some index in pos
02000       
02001       svs has been initialized by init_svm_values
02002       
02003       At the end of this procedure, 
02004       svs.svm_values[i+s*svs.seqlen] has the value of the s-th SVM on genestr(pos(t_end-i):pos(t_end)) 
02005       for every i satisfying pos(t_end)-pos(t_end-i) <= svs.maxlookback
02006       
02007       The SVM weights are precomputed in dict_weights
02008     */
02009     
02010     for (INT j=0; j<num_degrees; j++)
02011     //for (INT j=0; j<1; j++)
02012     {
02013         INT plen = 1;
02014         INT ts = t_end-1;        // index in pos; pos(ts) and pos(t) are indices of wordstr
02015         INT offset;
02016         
02017         INT posprev = pos[t_end]-word_degree[j]+1;
02018         INT poscurrent = pos[ts];
02019         
02020         //SG_DEBUG( "j=%i seqlen=%i posprev = %i, poscurrent = %i", j, svs.seqlen, posprev, poscurrent) ;
02021 
02022         if (poscurrent<0)
02023             poscurrent = 0;
02024         DREAL * my_svm_values_unnormalized = svs.svm_values_unnormalized[j] ;
02025         INT * my_num_unique_words = svs.num_unique_words[j] ;
02026         bool ** my_word_used = svs.word_used[j] ;
02027         
02028         INT len = pos[t_end] - poscurrent;
02029         while ((ts>=0) && (len <= svs.maxlookback))
02030         {
02031             for (int i=posprev-1 ; (i>=poscurrent) && (i>=0) ; i--)
02032             {
02033                 //fprintf(stderr, "string_words_array[0]=%i (%ld), j=%i (%ld)  i=%i\n", string_words_array[0], wordstr[string_words_array[0]], j, wordstr[string_words_array[0]][j], i) ;
02034                 
02035                 WORD word = wordstr[string_words_array[0]][j][i] ;
02036                 INT last_string = string_words_array[0] ;
02037                 for (INT s=0; s<num_svms; s++)
02038                 {
02039                 //sign_words_array[s]=false;
02040                     // try to avoid memory accesses
02041                     if (last_string != string_words_array[s])
02042                     {
02043                         last_string = string_words_array[s] ;
02044                         word = wordstr[last_string][j][i] ;
02045                     }
02046 
02047                     // do not consider k-mer, if seen before and in signum mode
02048                     if (sign_words_array[s] && my_word_used[s][word])
02049                         continue ;
02050                     
02051                     // only count k-mer if in frame (if applicable)
02052                     //if ((svs.start_pos[s]-i>0) && ((svs.start_pos[s]-i)%mod_words_array[s]==0))
02053                     {
02054                         my_svm_values_unnormalized[s] += dict_weights_array[(word+cum_num_words_array[j])+s*cum_num_words_array[num_degrees]] ;
02055                         //svs.svm_values_unnormalized[j][s]+=dict_weights.element(word+cum_num_words_array[j], s) ;
02056                         my_num_unique_words[s]++ ;
02057                         if (sign_words_array[s])
02058                             my_word_used[s][word]=true ;
02059                     }
02060                 }
02061             }
02062             offset = plen*num_svms ;
02063             for (INT s=0; s<num_svms; s++)
02064             {
02065                 double normalization_factor = 1.0;
02066                 if (my_num_unique_words[s] > 0)
02067                 {
02068                     if (sign_words_array[s])
02069                         normalization_factor = sqrt((double)my_num_unique_words[s]);
02070                     else
02071                         normalization_factor = (double)my_num_unique_words[s];
02072                 }
02073 
02074                 if (j==0)
02075                     svs.svm_values[offset+s]=0 ;
02076                 svs.svm_values[offset+s] += my_svm_values_unnormalized[s] / normalization_factor;
02077             }
02078             
02079             if (posprev > poscurrent)         // remember posprev initially set to pos[t_end]-word_degree+1... pos[ts] could be e.g. pos[t_end]-2
02080                 posprev = poscurrent;           
02081             
02082             ts--;
02083             plen++;
02084             
02085             if (ts>=0)
02086             {
02087                 poscurrent=pos[ts];
02088                 if (poscurrent<0)
02089                     poscurrent = 0;
02090                 len = pos[t_end] - poscurrent;
02091             }
02092         }
02093     }
02094 
02095 #ifdef DYNPROG_TIMING
02096     MyTime.stop() ;
02097     svm_pos_time += MyTime.time_diff_sec() ;
02098 #endif
02099 }
02100 
02101 
02102 void CDynProg::find_svm_values_till_pos(WORD** wordstr,  const INT *pos,  INT t_end, struct svm_values_struct &svs)
02103 {
02104 #ifdef DYNPROG_TIMING
02105     MyTime.start() ;
02106 #endif  
02107     /*
02108       wordstr is a vector of L n-gram indices, with wordstr(i) representing a number betweeen 0 and 4095 
02109       corresponding to the 6-mer in genestr(i-5:i) 
02110       pos is a vector of candidate transition positions (it is input to best_path_trans)
02111       t_end is some index in pos
02112       
02113       svs has been initialized by init_svm_values
02114       
02115       At the end of this procedure, 
02116       svs.svm_values[i+s*svs.seqlen] has the value of the s-th SVM on genestr(pos(t_end-i):pos(t_end)) 
02117       for every i satisfying pos(t_end)-pos(t_end-i) <= svs.maxlookback
02118       
02119       The SVM weights are precomputed in dict_weights
02120     */
02121     
02122     for (INT j=0; j<num_degrees; j++)
02123     //for (INT j=0; j<1; j++)
02124     {
02125         INT plen = 1;
02126         INT ts = t_end-1;        // index in pos; pos(ts) and pos(t) are indices of wordstr
02127         INT offset;
02128         
02129         INT posprev = pos[t_end]-word_degree[j]+1;
02130         INT poscurrent = pos[ts];
02131         
02132         //SG_DEBUG( "j=%i seqlen=%i posprev = %i, poscurrent = %i", j, svs.seqlen, posprev, poscurrent) ;
02133 
02134         if (poscurrent<0)
02135             poscurrent = 0;
02136         DREAL * my_svm_values_unnormalized = svs.svm_values_unnormalized[j] ;
02137         INT * my_num_unique_words = svs.num_unique_words[j] ;
02138         bool ** my_word_used = svs.word_used[j] ;
02139         
02140         INT len = pos[t_end] - poscurrent;
02141         while ((ts>=0) && (len <= svs.maxlookback))
02142         {
02143             for (int i=posprev-1 ; (i>=poscurrent) && (i>=0) ; i--)
02144             {
02145                 //fprintf(stderr, "string_words_array[0]=%i (%ld), j=%i (%ld)  i=%i\n", string_words_array[0], wordstr[string_words_array[0]], j, wordstr[string_words_array[0]][j], i) ;
02146                 
02147                 WORD word = wordstr[j][i] ;
02148                 for (INT s=0; s<num_svms; s++)
02149                 {
02150                     //sign_words_array[s]=false;
02151                     // do not consider k-mer, if seen before and in signum mode
02152                     if (sign_words_array[s] && my_word_used[s][word])
02153                         continue ;
02154                     
02155                     // only count k-mer if in frame (if applicable)
02156                     //if ((svs.start_pos[s]-i>0) && ((svs.start_pos[s]-i)%mod_words_array[s]==0))
02157                     {
02158                         my_svm_values_unnormalized[s] += dict_weights_array[(word+cum_num_words_array[j])+s*cum_num_words_array[num_degrees]] ;
02159                         //svs.svm_values_unnormalized[j][s]+=dict_weights.element(word+cum_num_words_array[j], s) ;
02160                         my_num_unique_words[s]++ ;
02161                         if (sign_words_array[s])
02162                             my_word_used[s][word]=true ;
02163                     }
02164                 }
02165             }
02166             offset = plen*num_svms ;
02167             for (INT s=0; s<num_svms; s++)
02168             {
02169                 double normalization_factor = 1.0;
02170                 if (my_num_unique_words[s] > 0)
02171                 {
02172                     if (sign_words_array[s])
02173                         normalization_factor = sqrt((double)my_num_unique_words[s]);
02174                     else
02175                         normalization_factor = (double)my_num_unique_words[s];
02176                 }
02177 
02178                 if (j==0)
02179                     svs.svm_values[offset+s]=0 ;
02180                 svs.svm_values[offset+s] += my_svm_values_unnormalized[s] / normalization_factor;
02181             }
02182             
02183             if (posprev > poscurrent)         // remember posprev initially set to pos[t_end]-word_degree+1... pos[ts] could be e.g. pos[t_end]-2
02184                 posprev = poscurrent;           
02185             
02186             ts--;
02187             plen++;
02188             
02189             if (ts>=0)
02190             {
02191                 poscurrent=pos[ts];
02192                 if (poscurrent<0)
02193                     poscurrent = 0;
02194                 len = pos[t_end] - poscurrent;
02195             }
02196         }
02197     }
02198 
02199 #ifdef DYNPROG_TIMING
02200     MyTime.stop() ;
02201     svm_pos_time += MyTime.time_diff_sec() ;
02202 #endif
02203 }
02204 
02205 bool CDynProg::extend_orf(INT orf_from, INT orf_to, INT start, INT &last_pos, INT to)
02206 {
02207 #ifdef DYNPROG_TIMING
02208     MyTime.start() ;
02209 #endif
02210     
02211     if (start<0) 
02212         start=0 ;
02213     if (to<0)
02214         to=0 ;
02215     
02216     INT orf_target = orf_to-orf_from ;
02217     if (orf_target<0) orf_target+=3 ;
02218     
02219     INT pos ;
02220     if (last_pos==to)
02221         pos = to-orf_to-3 ;
02222     else
02223         pos=last_pos ;
02224 
02225     if (pos<0)
02226         return true ;
02227     
02228     for (; pos>=start; pos-=3)
02229         if (m_genestr_stop[pos])
02230             return false ;
02231     
02232     last_pos = CMath::min(pos+3,to-orf_to-3) ;
02233 
02234 #ifdef DYNPROG_TIMING
02235     MyTime.stop() ;
02236     orf_time += MyTime.time_diff_sec() ;
02237 #endif
02238     return true ;
02239 }
02240 
02241 template <short int nbest, bool with_loss, bool with_multiple_sequences>
02242 void CDynProg::best_path_trans(const DREAL *seq_array, INT seq_len, const INT *pos, 
02243                                const INT *orf_info_array, CPlifBase **Plif_matrix, 
02244                                CPlifBase **Plif_state_signals, INT max_num_signals, INT genestr_num, 
02245                                DREAL *prob_nbest, INT *my_state_seq, INT *my_pos_seq, bool use_orf)
02246 {
02247 #ifdef DYNPROG_TIMING
02248     segment_init_time = 0.0 ;
02249     segment_pos_time = 0.0 ;
02250     segment_extend_time = 0.0 ;
02251     segment_clean_time = 0.0 ;
02252     orf_time = 0.0 ;
02253     svm_init_time = 0.0 ;
02254     svm_pos_time = 0.0 ;
02255     svm_clean_time = 0.0 ;
02256     MyTime2.start() ;
02257 #endif
02258     
02259     //SG_PRINT( "best_path_trans:%x\n", seq_array);
02260     if (!svm_arrays_clean)
02261     {
02262         SG_ERROR( "SVM arrays not clean") ;
02263         return ;
02264     }
02265     
02266 #ifdef DYNPROG_DEBUG
02267     transition_matrix_a.set_name("transition_matrix");
02268     transition_matrix_a.display_array();
02269     mod_words.display_array() ;
02270     sign_words.display_array() ;
02271     string_words.display_array() ;
02272     fprintf(stderr, "use_orf = %i\n", use_orf) ;
02273 #endif
02274     
02275     const INT default_look_back = 30000 ;
02276     INT max_look_back = default_look_back ;
02277     bool use_svm = false ;
02278     //ASSERT(dict_len==num_svms*cum_num_words_array[num_degrees]) ;
02279     //dict_weights.set_array(dictionary_weights, cum_num_words_array[num_degrees], num_svms, false, false) ;
02280     //dict_weights_array=dict_weights.get_array() ;
02281     
02282     SG_PRINT("N:%i, seq_len:%i, max_num_signals:%i\n",N, seq_len, max_num_signals) ;
02283 
02284 //  for (int i=0;i<N*seq_len*max_num_signals;i++)
02285 //      SG_PRINT("(%i)%0.2f ",i,seq_array[i]);
02286 
02287     //CArray2<CPlifBase*> PEN(Plif_matrix, N, N, false, false) ;
02288     CArray2<CPlifBase*> PEN(Plif_matrix, N, N, false, true) ;
02289     PEN.set_name("PEN");
02290     //CArray2<CPlifBase*> PEN_state_signals(Plif_state_signals, N, max_num_signals, false, false) ;
02291     CArray2<CPlifBase*> PEN_state_signals(Plif_state_signals, N, max_num_signals, false, true) ;
02292     PEN_state_signals.set_name("state_signals");
02293     //CArray3<DREAL> seq_input(seq_array, N, seq_len, max_num_signals) ;
02294     CArray3<DREAL> seq_input(seq_array, N, seq_len, max_num_signals) ;
02295     seq_input.set_name("seq_input") ;
02296     //seq_input.display_array() ;
02297     CArray2<DREAL> seq(N, seq_len) ;
02298     seq.set_name("seq") ;
02299     seq.zero() ;
02300 
02301     CArray2<INT> orf_info(orf_info_array, N, 2) ;
02302     orf_info.set_name("orf_info") ;
02303     //g_orf_info = orf_info ;
02304     //orf_info.display_array() ;
02305 
02306     DREAL svm_value[2*num_svms] ;
02307     { // initialize svm_svalue
02308         for (INT s=0; s<2*num_svms; s++)
02309             svm_value[s]=0 ;
02310     }
02311 
02312     { // convert seq_input to seq
02313       // this is independent of the svm values 
02314         for (INT i=0; i<N; i++)
02315             for (INT j=0; j<seq_len; j++)
02316                 seq.element(i,j) = 0 ;
02317 
02318         for (INT i=0; i<N; i++)
02319             for (INT j=0; j<seq_len; j++)
02320                 for (INT k=0; k<max_num_signals; k++)
02321                 {
02322                     if ((PEN_state_signals.element(i,k)==NULL) && (k==0))
02323                     {
02324                         // no plif
02325                         seq.element(i,j) = seq_input.element(i,j,k) ;
02326                         break ;
02327                     }
02328                     if (PEN_state_signals.element(i,k)!=NULL)
02329                     {
02330                         // just one plif
02331                         if (finite(seq_input.element(i,j,k)))
02332                             seq.element(i,j) += PEN_state_signals.element(i,k)->lookup_penalty(seq_input.element(i,j,k), svm_value) ;
02333                         else
02334                             // keep infinity values
02335                             seq.element(i,j) = seq_input.element(i, j, k) ;
02336                     } 
02337                     else
02338                         break ;
02339                 }
02340     }
02341 
02342     { // determine maximal length of look-back
02343         for (INT i=0; i<N; i++)
02344             for (INT j=0; j<N; j++)
02345             {
02346                 CPlifBase *penij=PEN.element(i,j) ;
02347                 if (penij==NULL)
02348                     continue ;
02349                 if (penij->get_max_value()>max_look_back)
02350                 {
02351                     SG_DEBUG( "%d %d -> value: %f\n", i,j,penij->get_max_value());
02352                     max_look_back=(INT) (CMath::ceil(penij->get_max_value()));
02353                 }
02354                 if (penij->uses_svm_values())
02355                     use_svm=true ;
02356             }
02357     }
02358 
02359     //SG_PRINT("use_svm=%i, genestr_len: \n", use_svm, m_genestr_len) ;
02360     max_look_back = CMath::min(m_genestr_len, max_look_back) ;
02361     SG_DEBUG("use_svm=%i\n", use_svm) ;
02362     
02363     SG_DEBUG("maxlook: %d N: %d nbest: %d \n", max_look_back, N, nbest);
02364     const INT look_back_buflen = (max_look_back*N+1)*nbest ;
02365     SG_DEBUG("look_back_buflen=%i\n", look_back_buflen) ;
02366     const DREAL mem_use = (DREAL)(seq_len*N*nbest*(sizeof(T_STATES)+sizeof(short int)+sizeof(INT))+
02367                                   look_back_buflen*(2*sizeof(DREAL)+sizeof(INT))+
02368                                   seq_len*(sizeof(T_STATES)+sizeof(INT))+
02369                                   m_genestr_len*sizeof(bool))/(1024*1024);
02370 
02371     bool is_big = (mem_use>200) || (seq_len>5000) ;
02372 
02373     if (is_big)
02374     {
02375         SG_DEBUG("calling best_path_trans: seq_len=%i, N=%i, lookback=%i nbest=%i\n", 
02376                      seq_len, N, max_look_back, nbest) ;
02377         SG_DEBUG("allocating %1.2fMB of memory\n", 
02378                      mem_use) ;
02379     }
02380     ASSERT(nbest<32000) ;
02381     
02382 
02383     
02384     CArray3<DREAL> delta(seq_len, N, nbest) ;
02385     delta.set_name("delta");
02386     DREAL* delta_array = delta.get_array() ;
02387     //delta.zero() ;
02388     
02389     CArray3<T_STATES> psi(seq_len, N, nbest) ;
02390     psi.set_name("psi");
02391     //psi.zero() ;
02392     
02393     CArray3<short int> ktable(seq_len, N, nbest) ;
02394     ktable.set_name("ktable");
02395     //ktable.zero() ;
02396     
02397     CArray3<INT> ptable(seq_len, N, nbest) ;    
02398     ptable.set_name("ptable");
02399     //ptable.zero() ;
02400 
02401     CArray<DREAL> delta_end(nbest) ;
02402     delta_end.set_name("delta_end");
02403     //delta_end.zero() ;
02404     
02405     CArray<T_STATES> path_ends(nbest) ;
02406     path_ends.set_name("path_ends");
02407     //path_ends.zero() ;
02408     
02409     CArray<short int> ktable_end(nbest) ;
02410     ktable_end.set_name("ktable_end");
02411     //ktable_end.zero() ;
02412 
02413     DREAL * fixedtempvv=new DREAL[look_back_buflen] ;
02414     memset(fixedtempvv, 0, look_back_buflen*sizeof(DREAL)) ;
02415     INT * fixedtempii=new INT[look_back_buflen] ;
02416     memset(fixedtempii, 0, look_back_buflen*sizeof(INT)) ;
02417 
02418     CArray<DREAL> oldtempvv(look_back_buflen) ;
02419     oldtempvv.set_name("oldtempvv");
02420     CArray<DREAL> oldtempvv2(look_back_buflen) ;
02421     oldtempvv2.set_name("oldtempvv2");
02422     //oldtempvv.zero() ;
02423     //oldtempvv.display_size() ;
02424     
02425     CArray<INT> oldtempii(look_back_buflen) ;
02426     oldtempii.set_name("oldtempii");
02427     CArray<INT> oldtempii2(look_back_buflen) ;
02428     oldtempii2.set_name("oldtempii2");
02429     //oldtempii.zero() ;
02430 
02431     CArray<T_STATES> state_seq(seq_len) ;
02432     state_seq.set_name("state_seq");
02433     //state_seq.zero() ;
02434     
02435     CArray<INT> pos_seq(seq_len) ;
02436     pos_seq.set_name("pos_seq");
02437     //pos_seq.zero() ;
02438 
02439     
02440     //dict_weights.set_name("dict_weights") ;
02441     word_degree.set_name("word_degree") ;
02442     cum_num_words.set_name("cum_num_words") ;
02443     num_words.set_name("num_words") ;
02444     //word_used.set_name("word_used") ;
02445     //svm_values_unnormalized.set_name("svm_values_unnormalized") ;
02446     svm_pos_start.set_name("svm_pos_start") ;
02447     num_unique_words.set_name("num_unique_words") ;
02448 
02449     PEN.set_name("PEN") ;
02450     seq.set_name("seq") ;
02451     orf_info.set_name("orf_info") ;
02452     
02453     delta.set_name("delta") ;
02454     psi.set_name("psi") ;
02455     ktable.set_name("ktable") ;
02456     ptable.set_name("ptable") ;
02457     delta_end.set_name("delta_end") ;
02458     path_ends.set_name("path_ends") ;
02459     ktable_end.set_name("ktable_end") ;
02460 
02461 #ifdef USE_TMP_ARRAYCLASS
02462     fixedtempvv.set_name("fixedtempvv") ;
02463     fixedtempii.set_name("fixedtempvv") ;
02464 #endif
02465 
02466     oldtempvv.set_name("oldtempvv") ;
02467     oldtempvv2.set_name("oldtempvv2") ;
02468     oldtempii.set_name("oldtempii") ;
02469     oldtempii2.set_name("oldtempii2") ;
02470 
02471 
02473 
02474 #ifdef DYNPROG_DEBUG
02475     state_seq.display_size() ;
02476     pos_seq.display_size() ;
02477 
02478     dict_weights.display_size() ;
02479     word_degree.display_array() ;
02480     cum_num_words.display_array() ;
02481     num_words.display_array() ;
02482     //word_used.display_size() ;
02483     //svm_values_unnormalized.display_size() ;
02484     svm_pos_start.display_array() ;
02485     num_unique_words.display_array() ;
02486 
02487     PEN.display_size() ;
02488     PEN_state_signals.display_size() ;
02489     seq.display_size() ;
02490     orf_info.display_size() ;
02491     
02492     //m_genestr_stop.display_size() ;
02493     delta.display_size() ;
02494     psi.display_size() ;
02495     ktable.display_size() ;
02496     ptable.display_size() ;
02497     delta_end.display_size() ;
02498     path_ends.display_size() ;
02499     ktable_end.display_size() ;
02500 
02501 #ifdef USE_TMP_ARRAYCLASS
02502     fixedtempvv.display_size() ;
02503     fixedtempii.display_size() ;
02504 #endif
02505 
02506     //oldtempvv.display_size() ;
02507     //oldtempii.display_size() ;
02508 
02509     state_seq.display_size() ;
02510     pos_seq.display_size() ;
02511 
02512     CArray<INT>pp = CArray<INT>(pos, seq_len) ;
02513     pp.set_name("pp");
02514     pp.display_array() ;
02515     
02516     //seq.zero() ;
02517     //seq_input.display_array() ;
02518 
02519 #endif //DYNPROG_DEBUG
02520 
02522 
02523 
02524 
02525     {
02526         for (INT s=0; s<num_svms; s++)
02527             ASSERT(string_words_array[s]<genestr_num)  ;
02528     }
02529 
02530     
02531         //CArray2<INT*> trans_matrix_svms(N,N);
02532     //CArray2<INT> trans_matrix_num_svms(N,N);
02533 
02534     { // initialization
02535 
02536         for (T_STATES i=0; i<N; i++)
02537         {
02538             //delta.element(0, i, 0) = get_p(i) + seq.element(i,0) ;        // get_p defined in HMM.h to be equiv to initial_state_distribution
02539             delta.element(delta_array, 0, i, 0, seq_len, N) = get_p(i) + seq.element(i,0) ;        // get_p defined in HMM.h to be equiv to initial_state_distribution
02540             psi.element(0,i,0)   = 0 ;
02541             if (nbest>1)
02542                 ktable.element(0,i,0)  = 0 ;
02543             ptable.element(0,i,0)  = 0 ;
02544             for (short int k=1; k<nbest; k++)
02545             {
02546                 INT dim1, dim2, dim3 ;
02547                 delta.get_array_size(dim1, dim2, dim3) ;
02548                 //SG_DEBUG("i=%i, k=%i -- %i, %i, %i\n", i, k, dim1, dim2, dim3) ;
02549                 //delta.element(0, i, k)    = -CMath::INFTY ;
02550                 delta.element(delta_array, 0, i, k, seq_len, N)    = -CMath::INFTY ;
02551                 psi.element(0,i,0)      = 0 ;                  // <--- what's this for?
02552                 if (nbest>1)
02553                     ktable.element(0,i,k)     = 0 ;
02554                 ptable.element(0,i,k)     = 0 ;
02555             }
02556 /*
02557             for (T_STATES j=0; j<N; j++)
02558             {
02559                 CPlifBase * penalty = PEN.element(i,j) ;
02560                 INT num_current_svms=0;
02561                 INT svm_ids[] = {-8, -7, -6, -5, -4, -3, -2, -1};
02562                 if (penalty)
02563                 {
02564                     SG_PRINT("trans %i -> %i \n",i,j);
02565                     penalty->get_used_svms(&num_current_svms, svm_ids);
02566                     trans_matrix_svms.set_element(svm_ids,i,j);
02567                     for (INT l=0;l<num_current_svms;l++)
02568                         SG_PRINT("svm_ids[%i]: %i \n",l,svm_ids[l]);
02569                     trans_matrix_num_svms.set_element(num_current_svms,i,j);
02570                 }
02571             }
02572 */
02573 
02574         }
02575     }
02576 
02577     struct svm_values_struct svs;
02578     svs.num_unique_words = NULL;
02579     svs.svm_values = NULL;
02580     svs.svm_values_unnormalized = NULL;
02581     svs.word_used = NULL;
02582 
02583     struct segment_loss_struct loss;
02584     loss.segments_changed = NULL;
02585     loss.num_segment_id = NULL;
02586 
02587     SG_DEBUG("START_RECURSION \n\n");
02588 
02589     // recursion
02590     for (INT t=1; t<seq_len; t++)
02591     {
02592         if (is_big && t%(1+(seq_len/1000))==1)
02593             SG_PROGRESS(t, 0, seq_len);
02594         //fprintf(stderr, "%i\n", t) ;
02595 
02596         //init_svm_values(svs, pos[t], seq_len, max_look_back) ;
02597         //if (with_multiple_sequences)
02598             //find_svm_values_till_pos(wordstr, pos, t, svs);  
02599         //else
02600             //find_svm_values_till_pos(wordstr[0], pos, t, svs);  
02601 
02602         if (with_loss)
02603         {
02604             init_segment_loss(loss, seq_len, max_look_back);
02605             find_segment_loss_till_pos(pos, t, m_segment_ids, m_segment_mask, loss);  
02606         }
02607         
02608         for (T_STATES j=0; j<N; j++)
02609         {
02610             if (seq.element(j,t)<=-1e20)
02611             { // if we cannot observe the symbol here, then we can omit the rest
02612                 for (short int k=0; k<nbest; k++)
02613                 {
02614                     delta.element(delta_array, t, j, k, seq_len, N)    = seq.element(j,t) ;
02615                     psi.element(t,j,k)      = 0 ;
02616                     if (nbest>1)
02617                         ktable.element(t,j,k)     = 0 ;
02618                     ptable.element(t,j,k)     = 0 ;
02619                 }
02620             }
02621             else
02622             {
02623                 const T_STATES num_elem   = trans_list_forward_cnt[j] ;
02624                 const T_STATES *elem_list = trans_list_forward[j] ;
02625                 const DREAL *elem_val      = trans_list_forward_val[j] ;
02626                 const INT *elem_id      = trans_list_forward_id[j] ;
02627                 
02628                 INT fixed_list_len = 0 ;
02629                 DREAL fixedtempvv_ = CMath::INFTY ;
02630                 INT fixedtempii_ = 0 ;
02631                 
02632             
02633                 for (INT i=0; i<num_elem; i++)
02634                 {
02635                     T_STATES ii = elem_list[i] ;
02636                     
02637                     const CPlifBase * penalty = PEN.element(j,ii) ;
02638                     INT look_back = default_look_back ;
02639                     { // find lookback length
02640                         CPlifBase *pen = (CPlifBase*) penalty ;
02641                         if (pen!=NULL)
02642                             look_back=(INT) (CMath::ceil(pen->get_max_value()));
02643                         ASSERT(look_back<1e6);
02644                     }
02645                     //INT num_current_svms = trans_matrix_num_svms.element(j,ii);
02646                     //INT* svm_ids = trans_matrix_svms.element(j,ii);
02647                     //INT* svm_ids[num_current_svms];
02648                     //for (INT id=0;id<num_current_svms;id++)
02649                     //  svm_ids[id]=*(p_svm_ids+id);
02650                 
02651                     INT orf_from = orf_info.element(ii,0) ;
02652                     INT orf_to   = orf_info.element(j,1) ;
02653                     if((orf_from!=-1)!=(orf_to!=-1))
02654                         SG_DEBUG("j=%i  ii=%i  orf_from=%i orf_to=%i p=%1.2f\n", j, ii, orf_from, orf_to, elem_val[i]) ;
02655                     ASSERT((orf_from!=-1)==(orf_to!=-1)) ;
02656                     
02657                     INT orf_target = -1 ;
02658                     if (orf_from!=-1)
02659                     {
02660                         orf_target=orf_to-orf_from ;
02661                         if (orf_target<0) 
02662                             orf_target+=3 ;
02663                         ASSERT(orf_target>=0 && orf_target<3) ;
02664                     }
02665                     
02666                     INT orf_last_pos = pos[t] ;
02667                     INT loss_last_pos = t ;
02668                     DREAL last_loss = 0.0 ;
02669 
02670                     for (INT ts=t-1; ts>=0 && pos[t]-pos[ts]<=look_back; ts--)
02671                     {
02672                         bool ok ;
02673                         //int plen=t-ts;
02674 
02675                         /*for (INT s=0; s<num_svms; s++)
02676                             if ((fabs(svs.svm_values[s*svs.seqlen+plen]-svs2.svm_values[s*svs.seqlen+plen])>1e-6) ||
02677                                 (fabs(svs.svm_values[s*svs.seqlen+plen]-svs3.svm_values[s*svs.seqlen+plen])>1e-6))
02678                             {
02679                                 SG_DEBUG( "s=%i, t=%i, ts=%i, %1.5e, %1.5e, %1.5e\n", s, t, ts, svs.svm_values[s*svs.seqlen+plen], svs2.svm_values[s*svs.seqlen+plen], svs3.svm_values[s*svs.seqlen+plen]);
02680                                 }*/
02681                         
02682                         if (orf_target==-1)
02683                             ok=true ;
02684                         else if (pos[ts]!=-1 && (pos[t]-pos[ts])%3==orf_target)
02685                             ok=(!use_orf) || extend_orf(orf_from, orf_to, pos[ts], orf_last_pos, pos[t]) ;
02686                         else
02687                             ok=false ;
02688                         
02689                         if (ok)
02690                         {
02691                             
02692                             DREAL segment_loss = 0.0 ;
02693                             if (with_loss)
02694                                 segment_loss = extend_segment_loss(loss, pos, elem_id[i], ts, loss_last_pos, last_loss) ;
02695 
02697                             // BEST_PATH_TRANS
02699                             INT frame = orf_info.element(ii,0);
02700                             lookup_content_svm_values(ts, t, pos[ts], pos[t], svm_value, frame);
02701                             if (m_use_tiling)
02702                                 lookup_tiling_plif_values(ts, t, pos[t]-pos[ts], svm_value);
02703 
02704                             //INT offset = plen*num_svms ;
02705                             //for (INT ss=0; ss<num_svms; ss++)
02706                             //{
02707                             //  //svm_value[ss]=svs.svm_values[offset+ss];
02708                             //  svm_value[ss]=new_svm_value[ss];
02709                             //  //if (CMath::abs(new_svm_value[ss]-svm_value[ss])>1e-5)
02710                             //  //  SG_PRINT("ts: %i t: %i  precomp: %f old: %f diff: %f \n",ts, t,new_svm_value[ss],svm_value[ss], CMath::abs(new_svm_value[ss]-svm_value[ss]));
02711                             //}
02712 
02713                             DREAL pen_val = 0.0 ;
02714                             if (penalty)
02715                                 pen_val = penalty->lookup_penalty(pos[t]-pos[ts], svm_value) ;
02716                             if (nbest==1)
02717                             {
02718                                 DREAL  val        = elem_val[i] + pen_val ;
02719                                 if (with_loss)
02720                                     val              += segment_loss ;
02721                                 
02722                                 DREAL mval = -(val + delta.element(delta_array, ts, ii, 0, seq_len, N)) ;
02723                                 if (mval<fixedtempvv_)
02724                                 {
02725                                     fixedtempvv_ = mval ;
02726                                     fixedtempii_ = ii + ts*N;
02727                                     fixed_list_len = 1 ;
02728                                 }
02729                             }
02730                             else
02731                             {
02732                                 for (short int diff=0; diff<nbest; diff++)
02733                                 {
02734                                     DREAL  val        = elem_val[i]  ;
02735                                     val              += pen_val ;
02736                                     if (with_loss)
02737                                         val              += segment_loss ;
02738                                     
02739                                     DREAL mval = -(val + delta.element(delta_array, ts, ii, diff, seq_len, N)) ;
02740                                     
02741                                     /* only place -val in fixedtempvv if it is one of the nbest lowest values in there */
02742                                     /* fixedtempvv[i], i=0:nbest-1, is sorted so that fixedtempvv[0] <= fixedtempvv[1] <= ...*/
02743                                     /* fixed_list_len has the number of elements in fixedtempvv */
02744                                     
02745                                     if ((fixed_list_len < nbest) || ((0==fixed_list_len) || (mval < fixedtempvv[fixed_list_len-1])))
02746                                     {
02747                                         if ( (fixed_list_len<nbest) && ((0==fixed_list_len) || (mval>fixedtempvv[fixed_list_len-1])) )
02748                                         {
02749                                             fixedtempvv[fixed_list_len] = mval ;
02750                                             fixedtempii[fixed_list_len] = ii + diff*N + ts*N*nbest;
02751                                             fixed_list_len++ ;
02752                                         }
02753                                         else  // must have mval < fixedtempvv[fixed_list_len-1]
02754                                         {
02755                                             int addhere = fixed_list_len;
02756                                             while ((addhere > 0) && (mval < fixedtempvv[addhere-1]))
02757                                                 addhere--;
02758                                             
02759                                             // move everything from addhere+1 one forward 
02760                                             for (int jj=fixed_list_len-1; jj>addhere; jj--)
02761                                             {
02762                                                 fixedtempvv[jj] = fixedtempvv[jj-1];
02763                                                 fixedtempii[jj] = fixedtempii[jj-1];
02764                                             }
02765                                             
02766                                             fixedtempvv[addhere] = mval;
02767                                             fixedtempii[addhere] = ii + diff*N + ts*N*nbest;
02768                                             
02769                                             if (fixed_list_len < nbest)
02770                                                 fixed_list_len++;
02771                                         }
02772                                     }
02773                                 }
02774                             }
02775                         }
02776                     }
02777                 }
02778                 
02779                 
02780                 int numEnt = fixed_list_len;
02781                 
02782                 double minusscore;
02783                 long int fromtjk;
02784                 
02785                 for (short int k=0; k<nbest; k++)
02786                 {
02787                     if (k<numEnt)
02788                     {
02789                         if (nbest==1)
02790                         {
02791                             minusscore = fixedtempvv_ ;
02792                             fromtjk = fixedtempii_ ;
02793                         }
02794                         else
02795                         {
02796                             minusscore = fixedtempvv[k];
02797                             fromtjk = fixedtempii[k];
02798                         }
02799                         
02800                         delta.element(delta_array, t, j, k, seq_len, N)    = -minusscore + seq.element(j,t);
02801                         psi.element(t,j,k)      = (fromtjk%N) ;
02802                         if (nbest>1)
02803                             ktable.element(t,j,k)   = (fromtjk%(N*nbest)-psi.element(t,j,k))/N ;
02804                         ptable.element(t,j,k)   = (fromtjk-(fromtjk%(N*nbest)))/(N*nbest) ;
02805                     }
02806                     else
02807                     {
02808                         delta.element(delta_array, t, j, k, seq_len, N)    = -CMath::INFTY ;
02809                         psi.element(t,j,k)      = 0 ;
02810                         if (nbest>1)
02811                             ktable.element(t,j,k)     = 0 ;
02812                         ptable.element(t,j,k)     = 0 ;
02813                     }
02814                 }
02815             }
02816         }
02817     }
02818 
02819     clear_svm_values(svs);
02820     if (with_loss)
02821         clear_segment_loss(loss);
02822 
02823     { //termination
02824         INT list_len = 0 ;
02825         for (short int diff=0; diff<nbest; diff++)
02826         {
02827             for (T_STATES i=0; i<N; i++)
02828             {
02829                 oldtempvv[list_len] = -(delta.element(delta_array, (seq_len-1), i, diff, seq_len, N)+get_q(i)) ;
02830                 oldtempii[list_len] = i + diff*N ;
02831                 list_len++ ;
02832             }
02833         }
02834         
02835         CMath::nmin(oldtempvv.get_array(), oldtempii.get_array(), list_len, nbest) ;
02836         
02837         for (short int k=0; k<nbest; k++)
02838         {
02839             delta_end.element(k) = -oldtempvv[k] ;
02840             path_ends.element(k) = (oldtempii[k]%N) ;
02841             if (nbest>1)
02842                 ktable_end.element(k) = (oldtempii[k]-path_ends.element(k))/N ;
02843         }
02844     }
02845     
02846     { //state sequence backtracking     
02847         for (short int k=0; k<nbest; k++)
02848         {
02849             prob_nbest[k]= delta_end.element(k) ;
02850             
02851             INT i         = 0 ;
02852             state_seq[i]  = path_ends.element(k) ;
02853             short int q   = 0 ;
02854             if (nbest>1)
02855                 q=ktable_end.element(k) ;
02856             pos_seq[i]    = seq_len-1 ;
02857 
02858             while (pos_seq[i]>0)
02859             {
02860                 //SG_DEBUG("s=%i p=%i q=%i\n", state_seq[i], pos_seq[i], q) ;
02861                 state_seq[i+1] = psi.element(pos_seq[i], state_seq[i], q);
02862                 pos_seq[i+1]   = ptable.element(pos_seq[i], state_seq[i], q) ;
02863                 if (nbest>1)
02864                     q              = ktable.element(pos_seq[i], state_seq[i], q) ;
02865                 i++ ;
02866             }
02867             //SG_DEBUG("s=%i p=%i q=%i\n", state_seq[i], pos_seq[i], q) ;
02868             INT num_states = i+1 ;
02869             for (i=0; i<num_states;i++)
02870             {
02871                 my_state_seq[i+k*seq_len] = state_seq[num_states-i-1] ;
02872                 my_pos_seq[i+k*seq_len]   = pos_seq[num_states-i-1] ;
02873             }
02874             my_state_seq[num_states+k*seq_len]=-1 ;
02875             my_pos_seq[num_states+k*seq_len]=-1 ;
02876         }
02877     }
02878     
02879     if (is_big)
02880         SG_PRINT( "DONE.     \n") ;
02881 
02882 
02883 #ifdef DYNPROG_TIMING
02884     MyTime2.stop() ;
02885     
02886     if (is_big)
02887         SG_PRINT("Timing:  orf=%1.2f s \n Segment_init=%1.2f s Segment_pos=%1.2f s  Segment_extend=%1.2f s Segment_clean=%1.2f s\nsvm_init=%1.2f s  svm_pos=%1.2f  svm_clean=%1.2f\n  total=%1.2f\n", orf_time, segment_init_time, segment_pos_time, segment_extend_time, segment_clean_time, svm_init_time, svm_pos_time, svm_clean_time, MyTime2.time_diff_sec()) ;
02888 #endif
02889 
02890     delete[] fixedtempvv ;
02891     delete[] fixedtempii ;
02892 }
02893 
02894 void CDynProg::best_path_trans_deriv(INT *my_state_seq, INT *my_pos_seq, DREAL *my_scores, DREAL* my_losses,INT my_seq_len, 
02895                     const DREAL *seq_array, INT seq_len, const INT *pos, CPlifBase **Plif_matrix, 
02896                     CPlifBase **Plif_state_signals, INT max_num_signals,INT genestr_num)
02897 {   
02898     if (!svm_arrays_clean)
02899     {
02900         SG_ERROR( "SVM arrays not clean") ;
02901         return ;
02902     } ;
02903     //SG_PRINT( "genestr_len=%i, genestr_num=%i\n", genestr_len, genestr_num) ;
02904     //mod_words.display() ;
02905     //sign_words.display() ;
02906     //string_words.display() ;
02907 
02908     bool use_svm = false ;
02909     //ASSERT(dict_len==num_svms*cum_num_words_array[num_degrees]) ;
02910     //dict_weights.set_array(dictionary_weights, cum_num_words_array[num_degrees], num_svms, false, false) ;
02911     //dict_weights_array=dict_weights.get_array() ;
02912     
02913     CArray2<CPlifBase*> PEN(Plif_matrix, N, N, false, false) ;
02914     CArray2<CPlifBase*> PEN_state_signals(Plif_state_signals, N, max_num_signals, false, false) ;
02915     CArray3<DREAL> seq_input(seq_array, N, seq_len, max_num_signals) ;
02916     
02917     { // determine whether to use svm outputs and clear derivatives
02918         for (INT i=0; i<N; i++)
02919             for (INT j=0; j<N; j++)
02920             {
02921                 CPlifBase *penij=PEN.element(i,j) ;
02922                 if (penij==NULL)
02923                     continue ;
02924                 
02925                 if (penij->uses_svm_values())
02926                     use_svm=true ;
02927                 penij->penalty_clear_derivative() ;
02928             }
02929         for (INT i=0; i<N; i++)
02930             for (INT j=0; j<max_num_signals; j++)
02931             {
02932                 CPlifBase *penij=PEN_state_signals.element(i,j) ;
02933                 if (penij==NULL)
02934                     continue ;
02935                 if (penij->uses_svm_values())
02936                     use_svm=true ;
02937                 penij->penalty_clear_derivative() ;
02938             }
02939     }
02940 
02941     { // set derivatives of p, q and a to zero
02942         for (INT i=0; i<N; i++)
02943         {
02944             initial_state_distribution_p_deriv.element(i)=0 ;
02945             end_state_distribution_q_deriv.element(i)=0 ;
02946             for (INT j=0; j<N; j++)
02947                 transition_matrix_a_deriv.element(i,j)=0 ;
02948         }
02949     }
02950     
02951     { // clear score vector
02952         for (INT i=0; i<my_seq_len; i++)
02953         {
02954             my_scores[i]=0.0 ;
02955             my_losses[i]=0.0 ;
02956         }
02957     }
02958 
02959     //INT total_len = 0 ;
02960     
02961     //transition_matrix_a.display_array() ;
02962     //transition_matrix_a_id.display_array() ;
02963     
02964     { // compute derivatives for given path
02965         DREAL svm_value[2*num_svms] ;
02966         for (INT s=0; s<2*num_svms; s++)
02967             svm_value[s]=0 ;
02968         
02969         for (INT i=0; i<my_seq_len; i++)
02970         {
02971             my_scores[i]=0.0 ;
02972             my_losses[i]=0.0 ;
02973         }
02974         
02975 //#ifdef DYNPROG_DEBUG
02976         DREAL total_score = 0.0 ;
02977         DREAL total_loss = 0.0 ;
02978 //#endif        
02979 
02980         ASSERT(my_state_seq[0]>=0) ;
02981         initial_state_distribution_p_deriv.element(my_state_seq[0])++ ;
02982         my_scores[0] += initial_state_distribution_p.element(my_state_seq[0]) ;
02983 
02984         ASSERT(my_state_seq[my_seq_len-1]>=0) ;
02985         end_state_distribution_q_deriv.element(my_state_seq[my_seq_len-1])++ ;
02986         my_scores[my_seq_len-1] += end_state_distribution_q.element(my_state_seq[my_seq_len-1]);
02987 
02988 //#ifdef DYNPROG_DEBUG
02989         total_score += my_scores[0] + my_scores[my_seq_len-1] ;
02990 //#endif        
02991         struct svm_values_struct svs;
02992         svs.num_unique_words = NULL;
02993         svs.svm_values = NULL;
02994         svs.svm_values_unnormalized = NULL;
02995         svs.word_used = NULL;
02996         //init_svm_values(svs, my_state_seq[0], seq_len, 100);
02997 
02998         struct segment_loss_struct loss;
02999         loss.segments_changed = NULL;
03000         loss.num_segment_id = NULL;
03001         //SG_DEBUG( "seq_len=%i\n", my_seq_len) ;
03002         for (INT i=0; i<my_seq_len-1; i++)
03003         {
03004             if (my_state_seq[i+1]==-1)
03005                 break ;
03006             INT from_state = my_state_seq[i] ;
03007             INT to_state   = my_state_seq[i+1] ;
03008             INT from_pos   = my_pos_seq[i] ;
03009             INT to_pos     = my_pos_seq[i+1] ;
03010 
03011             // compute loss relative to another segmentation using the segment_loss function
03012             init_segment_loss(loss, seq_len, pos[to_pos]-pos[from_pos]+10);
03013             find_segment_loss_till_pos(pos, to_pos,m_segment_ids, m_segment_mask, loss);  
03014 
03015             INT loss_last_pos = to_pos ;
03016             DREAL last_loss = 0.0 ;
03017             INT elem_id = transition_matrix_a_id.element(from_state, to_state) ;
03018             my_losses[i] = extend_segment_loss(loss, pos, elem_id, from_pos, loss_last_pos, last_loss) ;
03019 #ifdef DYNPROG_DEBUG
03020             io.set_loglevel(M_DEBUG) ;
03021             SG_DEBUG( "%i. segment loss %f (id=%i): from=%i(%i), to=%i(%i)\n", i, my_losses[i], elem_id, from_pos, from_state, to_pos, to_state) ;
03022 #endif
03023             // increase usage of this transition
03024             transition_matrix_a_deriv.element(from_state, to_state)++ ;
03025             my_scores[i] += transition_matrix_a.element(from_state, to_state) ;
03026             //SG_PRINT("transition_matrix_a.element(%i, %i),%f \n",from_state, to_state, transition_matrix_a.element(from_state, to_state));
03027 #ifdef DYNPROG_DEBUG
03028             SG_DEBUG( "%i. scores[i]=%f\n", i, my_scores[i]) ;
03029 #endif
03030             
03031             /*INT last_svm_pos[num_degrees] ;
03032             for (INT qq=0; qq<num_degrees; qq++)
03033             last_svm_pos[qq]=-1 ;*/
03034             
03035             if (use_svm)
03036             {
03037                 //SG_PRINT("from_pos: %i; to_pos: %i; pos[to_pos]-pos[from_pos]: %i \n",from_pos, to_pos, pos[to_pos]-pos[from_pos]); 
03038                 INT frame = m_orf_info.element(from_state,0);
03039                 lookup_content_svm_values(from_pos, to_pos, pos[from_pos],pos[to_pos], svm_value, frame);
03040                 if (m_use_tiling)
03041                     lookup_tiling_plif_values(from_pos, to_pos, pos[to_pos]-pos[from_pos], svm_value);
03042     
03043 #ifdef DYNPROG_DEBUG
03044                     SG_DEBUG( "svm[%i]: %f\n", ss, svm_value[ss]) ;
03045 #endif
03046             }
03047             
03048             if (PEN.element(to_state, from_state)!=NULL)
03049             {
03050                 DREAL nscore = PEN.element(to_state, from_state)->lookup_penalty(pos[to_pos]-pos[from_pos], svm_value) ;
03051                 my_scores[i] += nscore ;
03052                 //SG_PRINT("to_state: %i, from_state: %i, nscore: %f, len: %i \n",to_state,from_state,nscore, pos[to_pos]-pos[from_pos]);
03053                 if (m_use_tiling)
03054                     for (INT s=0;s<num_svms;s++)/*set tiling plif values to neutral values (that do not influence derivative calculation)*/
03055                         svm_value[num_svms+s]=-CMath::INFTY;
03056             
03057 #ifdef DYNPROG_DEBUG
03058                 //SG_DEBUG( "%i. transition penalty: from_state=%i to_state=%i from_pos=%i [%i] to_pos=%i [%i] value=%i\n", i, from_state, to_state, from_pos, pos[from_pos], to_pos, pos[to_pos], pos[to_pos]-pos[from_pos]) ;
03059 
03060                 /*
03061                   INT orf_from = g_orf_info.element(from_state,0) ;
03062                   INT orf_to   = g_orf_info.element(to_state,1) ;
03063                   ASSERT((orf_from!=-1)==(orf_to!=-1)) ;
03064                   if (orf_from != -1)
03065                   total_len = total_len + pos[to_pos]-pos[from_pos] ;
03066                   
03067                   SG_DEBUG( "%i. orf_info: from_orf=%i to_orf=%i orf_diff=%i, len=%i, lenmod3=%i, total_len=%i, total_lenmod3=%i\n", i, orf_from, orf_to, (orf_to-orf_from)%3, pos[to_pos]-pos[from_pos], (pos[to_pos]-pos[from_pos])%3, total_len, total_len%3) ;
03068                 */
03069 #endif
03070                 PEN.element(to_state, from_state)->penalty_add_derivative(pos[to_pos]-pos[from_pos], svm_value) ;
03071                 if (m_use_tiling) 
03072                 {
03073                     for (INT s=0;s<num_svms;s++)
03074                         svm_value[s]=-CMath::INFTY;
03075                     DREAL intensities[m_num_probes];
03076                     INT num_intensities = raw_intensities_interval_query(pos[from_pos], pos[to_pos],intensities);
03077                     //SG_PRINT("pos[from_pos]:%i, pos[to_pos]:%i, num_intensities:%i\n",pos[from_pos],pos[to_pos], num_intensities);
03078                     for (INT k=0;k<num_intensities;k++)
03079                     {
03080                         for (INT j=0;j<num_svms;j++)
03081                             svm_value[num_svms+j]=intensities[k];
03082                         PEN.element(to_state, from_state)->penalty_add_derivative(-CMath::INFTY, svm_value) ;   
03083                         
03084                     }
03085                 }
03086             }
03087 #ifdef DYNPROG_DEBUG
03088             SG_DEBUG( "%i. scores[i]=%f\n", i, my_scores[i]) ;
03089 #endif
03090 
03091             //SG_DEBUG( "emmission penalty skipped: to_state=%i to_pos=%i value=%1.2f score=%1.2f\n", to_state, to_pos, seq_input.element(to_state, to_pos), 0.0) ;
03092             for (INT k=0; k<max_num_signals; k++)
03093             {
03094                 if ((PEN_state_signals.element(to_state,k)==NULL)&&(k==0))
03095                 {
03096 #ifdef DYNPROG_DEBUG
03097                     SG_DEBUG( "%i. emmission penalty: to_state=%i to_pos=%i score=%1.2f (no signal plif)\n", i, to_state, to_pos, seq_input.element(to_state, to_pos, k)) ;
03098 #endif
03099                     my_scores[i] += seq_input.element(to_state, to_pos, k) ;
03100                     //SG_PRINT("features(%i,%i): %f\n",to_state,to_pos,seq_input.element(to_state, to_pos, k));
03101                     break ;
03102                 }
03103                 if (PEN_state_signals.element(to_state, k)!=NULL)
03104                 {
03105                     DREAL nscore = PEN_state_signals.element(to_state,k)->lookup_penalty(seq_input.element(to_state, to_pos, k), svm_value) ;
03106                     my_scores[i] += nscore ;
03107                     //break ;
03108                     //INT num_current_svms=0;
03109                     //INT svm_ids[] = {-8, -7, -6, -5, -4, -3, -2, -1};
03110                     //SG_PRINT("PEN_state_signals->id: ");
03111                     //PEN_state_signals.element(to_state, k)->get_used_svms(&num_current_svms, svm_ids);
03112                     //SG_PRINT("\n");
03113                     //SG_PRINT( "%i. emmission penalty: to_state=%i to_pos=%i value=%1.2f score=%1.2f k=%i\n", i, to_state, to_pos, seq_input.element(to_state, to_pos, k), nscore, k) ;
03114 #ifdef DYNPROG_DEBUG
03115                     SG_DEBUG( "%i. emmission penalty: to_state=%i to_pos=%i value=%1.2f score=%1.2f k=%i\n", i, to_state, to_pos, seq_input.element(to_state, to_pos, k), nscore, k) ;
03116 #endif
03117                     PEN_state_signals.element(to_state,k)->penalty_add_derivative(seq_input.element(to_state, to_pos, k), svm_value) ;
03118                 } else
03119                     break ;
03120             }
03121             
03122 //#ifdef DYNPROG_DEBUG
03123             //SG_PRINT( "scores[%i]=%f (final) \n", i, my_scores[i]) ;
03124             //SG_PRINT( "losses[%i]=%f (final) , total_loss: %f \n", i, my_losses[i], total_loss) ;
03125             total_score += my_scores[i] ;
03126             total_loss += my_losses[i] ;
03127 //#endif
03128         }
03129 //#ifdef DYNPROG_DEBUG
03130         //SG_PRINT( "total score = %f \n", total_score) ;
03131         SG_PRINT( "total loss = %f \n", total_loss) ;
03132 //#endif
03133         clear_svm_values(svs);
03134         clear_segment_loss(loss);
03135     }
03136 
03137     
03138 }
03139 
03140 
03141 void CDynProg::best_path_trans_simple(const DREAL *seq_array, INT seq_len, short int nbest, 
03142                                       DREAL *prob_nbest, INT *my_state_seq)
03143 {
03144     if (!svm_arrays_clean)
03145     {
03146         SG_ERROR( "SVM arrays not clean") ;
03147         return ;
03148     } ;
03149 
03150     INT max_look_back = 2 ;
03151     const INT look_back_buflen = max_look_back*nbest*N ;
03152     ASSERT(nbest<32000) ;
03153         
03154     CArray2<DREAL> seq((DREAL *)seq_array, N, seq_len, false) ;
03155 
03156     CArray3<DREAL> delta(max_look_back, N, nbest) ;
03157     CArray3<T_STATES> psi(seq_len, N, nbest) ;
03158     CArray3<short int> ktable(seq_len,N,nbest) ;
03159     CArray3<INT> ptable(seq_len,N,nbest) ;
03160 
03161     CArray<DREAL> delta_end(nbest) ;
03162     CArray<T_STATES> path_ends(nbest) ;
03163     CArray<short int> ktable_end(nbest) ;
03164 
03165     CArray<DREAL> oldtempvv(look_back_buflen) ;
03166     CArray<INT> oldtempii(look_back_buflen) ;
03167 
03168     CArray<T_STATES> state_seq(seq_len) ;
03169     CArray<INT> pos_seq(seq_len) ;
03170 
03171     { // initialization
03172 
03173         for (T_STATES i=0; i<N; i++)
03174         {
03175             delta.element(0,i,0) = get_p(i) + seq.element(i,0) ;        // get_p defined in HMM.h to be equiv to initial_state_distribution
03176             psi.element(0,i,0)   = 0 ;
03177             ktable.element(0,i,0)  = 0 ;
03178             ptable.element(0,i,0)  = 0 ;
03179             for (short int k=1; k<nbest; k++)
03180             {
03181                 delta.element(0,i,k)    = -CMath::INFTY ;
03182                 psi.element(0,i,0)      = 0 ;                  // <--- what's this for?
03183                 ktable.element(0,i,k)     = 0 ;
03184                 ptable.element(0,i,k)     = 0 ;
03185             }
03186         }
03187     }
03188 
03189     // recursion
03190     for (INT t=1; t<seq_len; t++)
03191     {
03192         for (T_STATES j=0; j<N; j++)
03193         {
03194             if (seq.element(j,t)<-1e20)
03195             { // if we cannot observe the symbol here, then we can omit the rest
03196                 for (short int k=0; k<nbest; k++)
03197                 {
03198                     delta.element(t%max_look_back,j,k)    = seq.element(j,t) ;
03199                     psi.element(t,j,k)      = 0 ;
03200                     ktable.element(t,j,k)     = 0 ;
03201                     ptable.element(t,j,k)     = 0 ;
03202                 }
03203             }
03204             else
03205             {
03206                 const T_STATES num_elem   = trans_list_forward_cnt[j] ;
03207                 const T_STATES *elem_list = trans_list_forward[j] ;
03208                 const DREAL *elem_val      = trans_list_forward_val[j] ;
03209                 
03210                 INT old_list_len = 0 ;
03211                 
03212                 for (INT i=0; i<num_elem; i++)
03213                 {
03214                     T_STATES ii = elem_list[i] ;
03215 
03216                     INT ts=t-1; 
03217                     if (ts>=0)
03218                     {
03219                         bool ok=true ;
03220                         
03221                         if (ok)
03222                         {
03223 
03224                           
03225                           for (short int diff=0; diff<nbest; diff++)
03226                             {
03227                               DREAL  val        = delta.element(ts%max_look_back,ii,diff) + elem_val[i] ;
03228                               DREAL mval = -val;
03229 
03230                               oldtempvv[old_list_len] = mval ;
03231                               oldtempii[old_list_len] = ii + diff*N + ts*N*nbest;
03232                               old_list_len++ ;
03233                             }
03234                         }
03235                     }
03236                 }
03237                 
03238                 CMath::nmin<INT>(oldtempvv.get_array(), oldtempii.get_array(), old_list_len, nbest) ;
03239 
03240                 int numEnt = 0;
03241                 numEnt = old_list_len;
03242 
03243                 double minusscore;
03244                 long int fromtjk;
03245 
03246                 for (short int k=0; k<nbest; k++)
03247                 {
03248                     if (k<numEnt)
03249                     {
03250                         minusscore = oldtempvv[k];
03251                         fromtjk = oldtempii[k];
03252                         
03253                         delta.element(t%max_look_back,j,k)    = -minusscore + seq.element(j,t);
03254                         psi.element(t,j,k)      = (fromtjk%N) ;
03255                         ktable.element(t,j,k)     = (fromtjk%(N*nbest)-psi.element(t,j,k))/N ;
03256                         ptable.element(t,j,k)     = (fromtjk-(fromtjk%(N*nbest)))/(N*nbest) ;
03257                     }
03258                     else
03259                     {
03260                         delta.element(t%max_look_back,j,k)    = -CMath::INFTY ;
03261                         psi.element(t,j,k)      = 0 ;
03262                         ktable.element(t,j,k)     = 0 ;
03263                         ptable.element(t,j,k)     = 0 ;
03264                     }
03265                 }
03266                 
03267             }
03268         }
03269     }
03270 
03271     
03272     { //termination
03273         INT list_len = 0 ;
03274         for (short int diff=0; diff<nbest; diff++)
03275         {
03276             for (T_STATES i=0; i<N; i++)
03277             {
03278                 oldtempvv[list_len] = -(delta.element((seq_len-1)%max_look_back,i,diff)+get_q(i)) ;
03279                 oldtempii[list_len] = i + diff*N ;
03280                 list_len++ ;
03281             }
03282         }
03283         
03284         CMath::nmin(oldtempvv.get_array(), oldtempii.get_array(), list_len, nbest) ;
03285         
03286         for (short int k=0; k<nbest; k++)
03287         {
03288             delta_end.element(k) = -oldtempvv[k] ;
03289             path_ends.element(k) = (oldtempii[k]%N) ;
03290             ktable_end.element(k) = (oldtempii[k]-path_ends.element(k))/N ;
03291         }
03292     }
03293     
03294     { //state sequence backtracking     
03295         for (short int k=0; k<nbest; k++)
03296         {
03297             prob_nbest[k]= delta_end.element(k) ;
03298             
03299             INT i         = 0 ;
03300             state_seq[i]  = path_ends.element(k) ;
03301             short int q   = ktable_end.element(k) ;
03302             pos_seq[i]    = seq_len-1 ;
03303 
03304             while (pos_seq[i]>0)
03305             {
03306                 //SG_DEBUG("s=%i p=%i q=%i\n", state_seq[i], pos_seq[i], q) ;
03307                 state_seq[i+1] = psi.element(pos_seq[i], state_seq[i], q);
03308                 pos_seq[i+1]   = ptable.element(pos_seq[i], state_seq[i], q) ;
03309                 q              = ktable.element(pos_seq[i], state_seq[i], q) ;
03310                 i++ ;
03311             }
03312             //SG_DEBUG("s=%i p=%i q=%i\n", state_seq[i], pos_seq[i], q) ;
03313             INT num_states = i+1 ;
03314             for (i=0; i<num_states;i++)
03315             {
03316                 my_state_seq[i+k*seq_len] = state_seq[num_states-i-1] ;
03317             }
03318             //my_state_seq[num_states+k*seq_len]=-1 ;
03319         }
03320 
03321     }
03322 }
03323 

SHOGUN Machine Learning Toolbox - Documentation