DynProg.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 2 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2007 Soeren Sonnenburg
00008  * Written (W) 1999-2007 Gunnar Raetsch
00009  * Copyright (C) 1999-2007 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #include "structure/DynProg.h"
00013 #include "lib/Mathematics.h"
00014 #include "lib/io.h"
00015 #include "lib/config.h"
00016 #include "features/StringFeatures.h"
00017 #include "features/CharFeatures.h"
00018 #include "features/Alphabet.h"
00019 #include "structure/Plif.h"
00020 #include "lib/Array.h"
00021 #include "lib/Array2.h"
00022 #include "lib/Array3.h"
00023 
00024 #include <stdlib.h>
00025 #include <stdio.h>
00026 #include <time.h>
00027 #include <ctype.h>
00028 
00029 template void CDynProg::best_path_trans<1,true,false>(
00030     const float64_t *seq, int32_t seq_len, const int32_t *pos,
00031     const int32_t *orf_info, CPlifBase **PLif_matrix,
00032     CPlifBase **Plif_state_signals, int32_t max_num_signals,
00033     int32_t genestr_num, float64_t *prob_nbest, int32_t *my_state_seq,
00034     int32_t *my_pos_seq, bool use_orf);
00035 
00036 template void CDynProg::best_path_trans<2,true,false>(
00037     const float64_t *seq, int32_t seq_len, const int32_t *pos,
00038     const int32_t *orf_info, CPlifBase **PLif_matrix,
00039     CPlifBase **Plif_state_signals, int32_t max_num_signals,
00040     int32_t genestr_num, float64_t *prob_nbest, int32_t *my_state_seq,
00041     int32_t *my_pos_seq, bool use_orf);
00042 
00043 template void CDynProg::best_path_trans<1,false,false>(
00044     const float64_t *seq, int32_t seq_len, const int32_t *pos,
00045     const int32_t *orf_info, CPlifBase **PLif_matrix,
00046     CPlifBase **Plif_state_signals, int32_t max_num_signals,
00047     int32_t genestr_num, float64_t *prob_nbest, int32_t *my_state_seq,
00048     int32_t *my_pos_seq, bool use_orf);
00049 
00050 template void CDynProg::best_path_trans<2,false,false>(
00051     const float64_t *seq, int32_t seq_len, const int32_t *pos,
00052     const int32_t *orf_info, CPlifBase **PLif_matrix,
00053     CPlifBase **Plif_state_signals, int32_t max_num_signals,
00054     int32_t genestr_num, float64_t *prob_nbest, int32_t *my_state_seq,
00055     int32_t *my_pos_seq, bool use_orf);
00056 
00057 
00058 #ifdef SUNOS
00059 extern "C" int  finite(double);
00060 #endif
00061 
00062 //#define USE_TMP_ARRAYCLASS
00063 //#define DYNPROG_DEBUG
00064 
00065 //CArray2<int32_t> g_orf_info(1,1) ;
00066 
00067 static int32_t word_degree_default[4]={3,4,5,6} ;
00068 static int32_t cum_num_words_default[5]={0,64,320,1344,5440} ;
00069 static int32_t num_words_default[4]=   {64,256,1024,4096} ;
00070 static int32_t mod_words_default[32] = {1,1,1,1,1,1,1,1,
00071                                     1,1,1,1,1,1,1,1,
00072                                     0,0,0,0,0,0,0,0,
00073                                     0,0,0,0,0,0,0,0} ;  
00074 static bool sign_words_default[16] = {true,true,true,true,true,true,true,true,
00075                                       false,false,false,false,false,false,false,false} ; // whether to use counts or signum of counts
00076 static int32_t string_words_default[16] = {0,0,0,0,0,0,0,0,
00077                                        1,1,1,1,1,1,1,1} ; // which string should be used
00078 
00079 CDynProg::CDynProg(int32_t p_num_svms /*= 8 */)
00080 : CSGObject(), transition_matrix_a_id(1,1), transition_matrix_a(1,1),
00081     transition_matrix_a_deriv(1,1), initial_state_distribution_p(1),
00082     initial_state_distribution_p_deriv(1), end_state_distribution_q(1),
00083     end_state_distribution_q_deriv(1), dict_weights(1,1),
00084     dict_weights_array(dict_weights.get_array()),
00085 
00086       // multi svm
00087       num_degrees(4), 
00088       num_svms(8), 
00089       num_strings(1),
00090       word_degree(word_degree_default, num_degrees, true, true),
00091       cum_num_words(cum_num_words_default, num_degrees+1, true, true),
00092       cum_num_words_array(cum_num_words.get_array()),
00093       num_words(num_words_default, num_degrees, true, true),
00094       num_words_array(num_words.get_array()),
00095       mod_words(mod_words_default, num_svms, 2, true, true),
00096       mod_words_array(mod_words.get_array()),
00097       sign_words(sign_words_default, num_svms, true, true),
00098       sign_words_array(sign_words.get_array()),
00099       string_words(string_words_default, num_svms, true, true),
00100       string_words_array(string_words.get_array()),
00101 //    word_used(num_degrees, num_words[num_degrees-1], num_strings),
00102 //    word_used_array(word_used.get_array()),
00103 //    svm_values_unnormalized(num_degrees, num_svms),
00104       svm_pos_start(num_degrees),
00105       num_unique_words(num_degrees),
00106       svm_arrays_clean(true),
00107 
00108       // single svm
00109       num_svms_single(1),
00110       word_degree_single(1),
00111       num_words_single(4), 
00112       word_used_single(num_words_single),
00113       svm_value_unnormalized_single(num_svms_single),
00114       num_unique_words_single(0),
00115 
00116       max_a_id(0), m_seq(1,1,1), m_pos(1), m_orf_info(1,2), 
00117           m_segment_sum_weights(1,1), m_plif_list(1), 
00118       m_PEN(1,1), m_PEN_state_signals(1,1), 
00119       m_genestr(1,1), m_dict_weights(1,1), m_segment_loss(1,1,2), 
00120           m_segment_ids(1),
00121           m_segment_mask(1),
00122       m_scores(1), m_states(1,1), m_positions(1,1), m_genestr_stop(1),
00123       m_precomputed_svm_values(1,1), //by Jonas
00124       m_precomputed_tiling_values(1,1) //by Jonas
00125 {
00126     trans_list_forward = NULL ;
00127     trans_list_forward_cnt = NULL ;
00128     trans_list_forward_val = NULL ;
00129     trans_list_forward_id = NULL ;
00130     trans_list_len = 0 ;
00131 
00132     mem_initialized = true ;
00133 
00134     this->N=1;
00135     m_step=0;
00136 
00137     m_raw_intensities = NULL;
00138     m_probe_pos = NULL;
00139     m_use_tiling=false;
00140     m_genestr_len = 0;
00141 #ifdef ARRAY_STATISTICS
00142     word_degree.set_name("word_degree");
00143 #endif
00144 }
00145 
00146 CDynProg::~CDynProg()
00147 {
00148     if (trans_list_forward_cnt)
00149         delete[] trans_list_forward_cnt;
00150     if (trans_list_forward)
00151     {
00152         for (int32_t i=0; i<trans_list_len; i++)
00153         {
00154             if (trans_list_forward[i])
00155                 delete[] trans_list_forward[i];
00156         }
00157         delete[] trans_list_forward;
00158     }
00159     if (trans_list_forward_val)
00160     {
00161         for (int32_t i=0; i<trans_list_len; i++)
00162         {
00163             if (trans_list_forward_val[i])
00164                 delete[] trans_list_forward_val[i];
00165         }
00166         delete[] trans_list_forward_val;
00167     }
00168     if (trans_list_forward_id)
00169     {
00170         for (int32_t i=0; i<trans_list_len; i++)
00171         {
00172             if (trans_list_forward_id[i])
00173                 delete[] trans_list_forward_id[i];
00174         }
00175         delete[] trans_list_forward_id;
00176     }
00177     if (m_raw_intensities)
00178         delete[] m_raw_intensities;
00179     if (m_probe_pos)
00180         delete[] m_probe_pos;
00181     
00182 }
00183 
00185 void CDynProg::set_genestr_len(int32_t genestr_len)
00186 {
00187     m_genestr_len = genestr_len;
00188 }
00189 
00190 int32_t CDynProg::get_num_svms()
00191 {
00192     return num_svms;
00193 }
00194 
00195 void CDynProg::precompute_stop_codons(const char* sequence, int32_t length)
00196 {
00197     m_genestr_stop.resize_array(length) ;
00198     m_genestr_stop.zero() ;
00199     m_genestr_stop.set_name("genestr_stop") ;
00200     {
00201     for (int32_t i=0; i<length-2; i++)
00202         if ((sequence[i]=='t' || sequence[i]=='T') && 
00203             (((sequence[i+1]=='a' || sequence[i+1]=='A') && 
00204               (sequence[i+2]=='a' || sequence[i+2]=='g' || sequence[i+2]=='A' || sequence[i+2]=='G')) ||
00205              ((sequence[i+1]=='g'||sequence[i+1]=='G') && (sequence[i+2]=='a' || sequence[i+2]=='A') )))
00206             m_genestr_stop.element(i)=true ;
00207         else
00208             m_genestr_stop.element(i)=false ;
00209     m_genestr_stop.element(length-1)=false ;
00210     m_genestr_stop.element(length-1)=false ;
00211     }
00212 }
00213 
00214 void CDynProg::set_num_states(int32_t p_N)
00215 {
00216     N=p_N ;
00217 
00218     transition_matrix_a_id.resize_array(N,N) ;
00219     transition_matrix_a.resize_array(N,N) ;
00220     transition_matrix_a_deriv.resize_array(N,N) ;
00221     initial_state_distribution_p.resize_array(N) ;
00222     initial_state_distribution_p_deriv.resize_array(N) ;
00223     end_state_distribution_q.resize_array(N);
00224     end_state_distribution_q_deriv.resize_array(N) ;
00225 
00226     m_orf_info.resize_array(N,2) ;
00227     m_PEN.resize_array(N,N) ;
00228     m_PEN_state_signals.resize_array(N,1) ;
00229 }
00230 
00231 int32_t CDynProg::get_num_states()
00232 {
00233     return N;
00234 }
00235 
00236 void CDynProg::init_tiling_data(
00237     int32_t* probe_pos, float64_t* intensities, const int32_t num_probes,
00238     const int32_t seq_len)
00239 {
00240     delete[] m_probe_pos;
00241     delete[] m_raw_intensities;
00242     m_probe_pos = new int32_t[num_probes];
00243     m_raw_intensities = new float64_t[num_probes];
00244 
00245     memcpy(m_probe_pos, probe_pos, num_probes*sizeof(int32_t));
00246     memcpy(m_raw_intensities, intensities, num_probes*sizeof(float64_t));
00247     m_num_probes = num_probes;
00248     m_precomputed_tiling_values.resize_array(num_svms,seq_len);
00249     m_use_tiling=true;
00250 
00251 }
00252 
00253 void CDynProg::init_content_svm_value_array(const int32_t seq_len)
00254 {
00255     m_precomputed_svm_values.resize_array(num_svms,seq_len);
00256 }
00257 
00258 void CDynProg::precompute_tiling_plifs(
00259     CPlif** PEN, const int32_t* tiling_plif_ids,
00260     const int32_t num_tiling_plifs, const int32_t seq_len, const int32_t* pos)
00261 {
00262 //  SG_PRINT("precompute_tiling_plifs: %f  \n",m_raw_intensities[0]);
00263 
00264     /*int32_t tiling_plif_ids[num_svms];
00265     int32_t num = 0;
00266         for (int32_t i=0; i<num_penalties; i++)
00267     {
00268         CPlif * plif = PEN[i];
00269         if (plif->get_use_svm()>num_svms)
00270         {
00271             tiling_plif_ids[num] = i;
00272             num++;
00273         }
00274     }*/
00275     ASSERT(num_tiling_plifs==num_svms)
00276 
00277     float64_t tiling_plif[num_svms];
00278     float64_t svm_value[2*num_svms];
00279     for (int32_t i=0; i<num_svms; i++)
00280         tiling_plif[i]=0.0;
00281 
00282     int32_t* p_tiling_pos  = m_probe_pos;
00283     float64_t* p_tiling_data = m_raw_intensities;
00284     int32_t num=0;
00285     for (int32_t pos_idx=0;pos_idx<seq_len;pos_idx++)
00286     {
00287         //SG_PRINT("pos[%i]: %i  \n",pos_idx,pos[pos_idx]);
00288         //SG_PRINT("*p_tiling_pos: %i  \n",*p_tiling_pos);
00289         while (num<m_num_probes&&*p_tiling_pos<pos[pos_idx])
00290         {
00291             //SG_PRINT("raw_intens: %f  \n",*p_tiling_data);
00292             for (int32_t i=0; i<num_svms; i++)
00293             {
00294                 svm_value[num_svms+i]=*p_tiling_data;
00295                 CPlif * plif = PEN[tiling_plif_ids[i]];
00296                 plif->set_do_calc(true);
00297                 tiling_plif[i]+=plif->lookup_penalty(0,svm_value);
00298                 //SG_PRINT("true: plif->lookup_penalty: %f  \n",plif->lookup_penalty(0,svm_value));
00299                 plif->set_do_calc(false);
00300                 //SG_PRINT("false: plif->lookup_penalty: %f  \n",plif->lookup_penalty(0,svm_value));
00301             }
00302             p_tiling_data++;
00303             p_tiling_pos++;
00304             num++;
00305         }
00306         for (int32_t i=0; i<num_svms; i++)
00307             m_precomputed_tiling_values.set_element(tiling_plif[i],i,pos_idx);
00308     }
00309     //float64_t intensities[m_num_probes];
00310     //int32_t nummm = raw_intensities_interval_query(1000, 1025, intensities);
00311     //SG_PRINT("nummm:%i\n",nummm);
00312 
00313 }
00314 
00315 void CDynProg::create_word_string(
00316     const char* genestr, int32_t genestr_num, int32_t genestr_len,
00317     uint16_t*** wordstr)
00318 {
00319     for (int32_t k=0; k<genestr_num; k++)
00320     {
00321         wordstr[k]=new uint16_t*[num_degrees] ;
00322         for (int32_t j=0; j<num_degrees; j++)
00323         {
00324             wordstr[k][j]=NULL ;
00325             {
00326                 wordstr[k][j]=new uint16_t[genestr_len] ;
00327                 for (int32_t i=0; i<genestr_len; i++)
00328                     switch (genestr[i])
00329                     {
00330                     case 'A':
00331                     case 'a': wordstr[k][j][i]=0 ; break ;
00332                     case 'C':
00333                     case 'c': wordstr[k][j][i]=1 ; break ;
00334                     case 'G':
00335                     case 'g': wordstr[k][j][i]=2 ; break ;
00336                     case 'T':
00337                     case 't': wordstr[k][j][i]=3 ; break ;
00338                     default: ASSERT(0) ;
00339                     }
00340                 translate_from_single_order(wordstr[k][j], genestr_len, word_degree[j]-1, word_degree[j]) ;
00341             }
00342         }
00343     }
00344     precompute_stop_codons(genestr, genestr_len);
00345 }
00346 
00347 void CDynProg::precompute_content_values(
00348     uint16_t*** wordstr, const int32_t *pos,const int32_t seq_len,
00349     const int32_t genestr_len,float64_t *dictionary_weights,int32_t dict_len)
00350 {
00351     //SG_PRINT("seq_len=%i, genestr_len=%i, dict_len=%i, num_svms=%i, num_degrees=%i\n",seq_len, genestr_len, dict_len, num_svms, num_degrees);
00352 
00353     dict_weights.set_array(dictionary_weights, dict_len, num_svms, false, false) ;
00354     dict_weights_array=dict_weights.get_array() ;
00355 
00356     //int32_t d1 = mod_words.get_dim1();
00357     //int32_t d2 = mod_words.get_dim2();
00358     //SG_PRINT("d1:%i, d2:%i \n",d1, d2);
00359     //for (int32_t p=0 ; p<d1 ; p++)
00360     //{
00361     //  for (int32_t q=0 ; q<d2 ; q++)
00362     //      SG_PRINT("%i ",mod_words.get_element(p,q));
00363     //  SG_PRINT("\n");
00364     //}
00365 
00366     for (int32_t p=0 ; p<seq_len-1 ; p++)
00367     {
00368         int32_t from_pos = pos[p];
00369         int32_t to_pos = pos[p+1];
00370         float64_t my_svm_values_unnormalized[num_svms] ;
00371         //SG_PRINT("%i(%i->%i) ",p,from_pos, to_pos);
00372       
00373         ASSERT(from_pos<=genestr_len)   
00374         ASSERT(to_pos<=genestr_len) 
00375             
00376         for (int32_t s=0; s<num_svms; s++)
00377         {
00378             my_svm_values_unnormalized[s]=0.0;//precomputed_svm_values.element(s,p);
00379         }
00380         for (int32_t i=from_pos; i<to_pos;i++)
00381                 {
00382             for (int32_t j=0; j<num_degrees; j++)
00383             {
00384                 uint16_t word = wordstr[0][j][i] ;
00385                 for (int32_t s=0; s<num_svms; s++)
00386                 {
00387                     // check if this k-mere should be considered for this SVM
00388                     if (mod_words.get_element(s,0)==3 && i%3!=mod_words.get_element(s,1))
00389                         continue;
00390                     my_svm_values_unnormalized[s] += dict_weights_array[(word+cum_num_words_array[j])+s*cum_num_words_array[num_degrees]] ;
00391                 }
00392             }
00393         }
00394         for (int32_t s=0; s<num_svms; s++)
00395         {
00396             float64_t prev = m_precomputed_svm_values.get_element(s,p);
00397             m_precomputed_svm_values.set_element(prev + my_svm_values_unnormalized[s],s,p+1);
00398             ASSERT(prev>-1e20);
00399         }
00400     }
00401     for (int32_t j=0; j<num_degrees; j++)
00402         delete[] wordstr[0][j] ;
00403     delete[] wordstr[0] ;
00404 }
00405 
00406 void CDynProg::set_p_vector(float64_t *p, int32_t p_N)
00407 {
00408     ASSERT(p_N==N);
00409     //m_orf_info.resize_array(p_N,2);
00410     //m_PEN.resize_array(p_N,p_N);
00411 
00412     initial_state_distribution_p.set_array(p, p_N, true, true);
00413 }
00414 
00415 void CDynProg::set_q_vector(float64_t *q, int32_t q_N)
00416 {
00417     ASSERT(q_N==N);
00418     end_state_distribution_q.set_array(q, q_N, true, true);
00419 }
00420 
00421 void CDynProg::set_a(float64_t *a, int32_t p_M, int32_t p_N)
00422 {
00423     ASSERT(p_N==N);
00424     ASSERT(p_M==p_N);
00425     transition_matrix_a.set_array(a, p_N, p_N, true, true);
00426     transition_matrix_a_deriv.resize_array(p_N, p_N);
00427 }
00428 
00429 void CDynProg::set_a_id(int32_t *a, int32_t p_M, int32_t p_N)
00430 {
00431     ASSERT(p_N==N);
00432     ASSERT(p_M==p_N);
00433     transition_matrix_a_id.set_array(a, p_N, p_N, true, true);
00434     max_a_id = 0;
00435     for (int32_t i=0; i<p_N; i++)
00436         for (int32_t j=0; j<p_N; j++)
00437             max_a_id=CMath::max(max_a_id, transition_matrix_a_id.element(i,j));
00438 }
00439 
00440 void CDynProg::set_a_trans_matrix(
00441     float64_t *a_trans, int32_t num_trans, int32_t p_N)
00442 {
00443     if (!((p_N==3) || (p_N==4)))
00444         SG_ERROR("!((p_N==3) || (p_N==4)), p_N: %i\n",p_N);
00445 
00446     delete[] trans_list_forward ;
00447     delete[] trans_list_forward_cnt ;
00448     delete[] trans_list_forward_val ;
00449     delete[] trans_list_forward_id ;
00450 
00451     trans_list_forward = NULL ;
00452     trans_list_forward_cnt = NULL ;
00453     trans_list_forward_val = NULL ;
00454     trans_list_len = 0 ;
00455 
00456     transition_matrix_a.zero() ;
00457     transition_matrix_a_id.zero() ;
00458 
00459     mem_initialized = true ;
00460 
00461     trans_list_forward_cnt=NULL ;
00462     trans_list_len = N ;
00463     trans_list_forward = new T_STATES*[N] ;
00464     trans_list_forward_cnt = new T_STATES[N] ;
00465     trans_list_forward_val = new float64_t*[N] ;
00466     trans_list_forward_id = new int32_t*[N] ;
00467     
00468     int32_t start_idx=0;
00469     for (int32_t j=0; j<N; j++)
00470     {
00471         int32_t old_start_idx=start_idx;
00472 
00473         while (start_idx<num_trans && a_trans[start_idx+num_trans]==j)
00474         {
00475             start_idx++;
00476             
00477             if (start_idx>1 && start_idx<num_trans)
00478                 ASSERT(a_trans[start_idx+num_trans-1] <= a_trans[start_idx+num_trans]);
00479         }
00480         
00481         if (start_idx>1 && start_idx<num_trans)
00482             ASSERT(a_trans[start_idx+num_trans-1] <= a_trans[start_idx+num_trans]);
00483         
00484         int32_t len=start_idx-old_start_idx;
00485         ASSERT(len>=0);
00486         
00487         trans_list_forward_cnt[j] = 0 ;
00488         
00489         if (len>0)
00490         {
00491             trans_list_forward[j]     = new T_STATES[len] ;
00492             trans_list_forward_val[j] = new float64_t[len] ;
00493             trans_list_forward_id[j] = new int32_t[len] ;
00494         }
00495         else
00496         {
00497             trans_list_forward[j]     = NULL;
00498             trans_list_forward_val[j] = NULL;
00499             trans_list_forward_id[j]  = NULL;
00500         }
00501     }
00502     
00503     for (int32_t i=0; i<num_trans; i++)
00504     {
00505         int32_t from_state   = (int32_t)a_trans[i] ;
00506         int32_t to_state = (int32_t)a_trans[i+num_trans] ;
00507         float64_t val = a_trans[i+num_trans*2] ;
00508         int32_t id = 0 ;
00509         if (p_N==4)
00510             id = (int32_t)a_trans[i+num_trans*3] ;
00511         //SG_DEBUG( "id=%i\n", id) ;
00512             
00513         ASSERT(to_state>=0 && to_state<N);
00514         ASSERT(from_state>=0 && from_state<N);
00515         
00516         trans_list_forward[to_state][trans_list_forward_cnt[to_state]]=from_state ;
00517         trans_list_forward_val[to_state][trans_list_forward_cnt[to_state]]=val ;
00518         trans_list_forward_id[to_state][trans_list_forward_cnt[to_state]]=id ;
00519         trans_list_forward_cnt[to_state]++ ;
00520         transition_matrix_a.element(from_state, to_state) = val ;
00521         transition_matrix_a_id.element(from_state, to_state) = id ;
00522         //SG_PRINT("from_state:%i to_state:%i trans_matrix_a_id:%i \n",from_state, to_state,transition_matrix_a_id.element(from_state, to_state));
00523     } ;
00524 
00525     max_a_id = 0 ;
00526     for (int32_t i=0; i<N; i++)
00527         for (int32_t j=0; j<N; j++)
00528         {
00529             //if (transition_matrix_a_id.element(i,j))
00530             //SG_DEBUG( "(%i,%i)=%i\n", i,j, transition_matrix_a_id.element(i,j)) ;
00531             max_a_id = CMath::max(max_a_id, transition_matrix_a_id.element(i,j)) ;
00532         }
00533     //SG_DEBUG( "max_a_id=%i\n", max_a_id) ;
00534 }
00535 
00536 void CDynProg::init_svm_arrays(int32_t p_num_degrees, int32_t p_num_svms)
00537 {
00538     svm_arrays_clean=false ;
00539 
00540     word_degree.resize_array(num_degrees) ;
00541 
00542     cum_num_words.resize_array(num_degrees+1) ;
00543     cum_num_words_array=cum_num_words.get_array() ;
00544 
00545     num_words.resize_array(num_degrees) ;
00546     num_words_array=num_words.get_array() ;
00547     
00548     //svm_values_unnormalized.resize_array(num_degrees, num_svms) ;
00549     svm_pos_start.resize_array(num_degrees) ;
00550     num_unique_words.resize_array(num_degrees) ;
00551 } 
00552 
00553 
00554 void CDynProg::init_word_degree_array(
00555     int32_t * p_word_degree_array, int32_t num_elem)
00556 {
00557     svm_arrays_clean=false ;
00558 
00559     word_degree.resize_array(num_degrees) ;
00560     ASSERT(num_degrees==num_elem);
00561 
00562 
00563     for (int32_t i=0; i<num_degrees; i++)
00564         word_degree[i]=p_word_degree_array[i] ;
00565 
00566 } 
00567 
00568 void CDynProg::init_cum_num_words_array(
00569     int32_t * p_cum_num_words_array, int32_t num_elem)
00570 {
00571     svm_arrays_clean=false ;
00572 
00573     cum_num_words.resize_array(num_degrees+1) ;
00574     cum_num_words_array=cum_num_words.get_array() ;
00575     ASSERT(num_degrees+1==num_elem);
00576 
00577     for (int32_t i=0; i<num_degrees+1; i++)
00578         cum_num_words[i]=p_cum_num_words_array[i] ;
00579 } 
00580 
00581 void CDynProg::init_num_words_array(
00582     int32_t * p_num_words_array, int32_t num_elem)
00583 {
00584     svm_arrays_clean=false ;
00585 
00586     num_words.resize_array(num_degrees) ;
00587     num_words_array=num_words.get_array() ;
00588     ASSERT(num_degrees==num_elem);
00589 
00590     for (int32_t i=0; i<num_degrees; i++)
00591         num_words[i]=p_num_words_array[i] ;
00592 
00593     //word_used.resize_array(num_degrees, num_words[num_degrees-1], num_strings) ;
00594     //word_used_array=word_used.get_array() ;
00595 } 
00596 
00597 void CDynProg::init_mod_words_array(
00598     int32_t * mod_words_input, int32_t num_elem, int32_t num_columns)
00599 {
00600     //for (int32_t i=0; i<num_columns; i++)
00601     //{
00602     //  for (int32_t j=0; j<num_elem; j++)
00603     //      SG_PRINT("%i ",mod_words_input[i*num_elem+j]);
00604     //  SG_PRINT("\n");
00605     //}
00606     svm_arrays_clean=false ;
00607 
00608     ASSERT(num_svms==num_elem);
00609     ASSERT(num_columns==2);
00610 
00611     mod_words.set_array(mod_words_input, num_elem, 2, true, true) ;
00612     mod_words_array = mod_words.get_array() ;
00613     
00614     /*SG_DEBUG( "mod_words=[") ;
00615     for (int32_t i=0; i<num_elem; i++)
00616         SG_DEBUG( "%i, ", p_mod_words_array[i]) ;
00617         SG_DEBUG( "]\n") ;*/
00618 } 
00619 
00620 void CDynProg::init_sign_words_array(bool* p_sign_words_array, int32_t num_elem)
00621 {
00622     svm_arrays_clean=false ;
00623 
00624     ASSERT(num_svms==num_elem);
00625 
00626     sign_words.set_array(p_sign_words_array, num_elem, true, true) ;
00627     sign_words_array = sign_words.get_array() ;
00628 } 
00629 
00630 void CDynProg::init_string_words_array(
00631     int32_t* p_string_words_array, int32_t num_elem)
00632 {
00633     svm_arrays_clean=false ;
00634 
00635     ASSERT(num_svms==num_elem);
00636 
00637     string_words.set_array(p_string_words_array, num_elem, true, true) ;
00638     string_words_array = string_words.get_array() ;
00639 } 
00640 
00641 bool CDynProg::check_svm_arrays()
00642 {
00643     //SG_DEBUG( "wd_dim1=%d, cum_num_words=%d, num_words=%d, svm_pos_start=%d, num_uniq_w=%d, mod_words_dims=(%d,%d), sign_w=%d,string_w=%d\n num_degrees=%d, num_svms=%d, num_strings=%d", word_degree.get_dim1(), cum_num_words.get_dim1(), num_words.get_dim1(), svm_pos_start.get_dim1(), num_unique_words.get_dim1(), mod_words.get_dim1(), mod_words.get_dim2(), sign_words.get_dim1(), string_words.get_dim1(), num_degrees, num_svms, num_strings);
00644     if ((word_degree.get_dim1()==num_degrees) &&
00645             (cum_num_words.get_dim1()==num_degrees+1) &&
00646             (num_words.get_dim1()==num_degrees) &&
00647             //(word_used.get_dim1()==num_degrees) &&
00648             //(word_used.get_dim2()==num_words[num_degrees-1]) &&
00649             //(word_used.get_dim3()==num_strings) &&
00650             //      (svm_values_unnormalized.get_dim1()==num_degrees) &&
00651             //      (svm_values_unnormalized.get_dim2()==num_svms) &&
00652             (svm_pos_start.get_dim1()==num_degrees) &&
00653             (num_unique_words.get_dim1()==num_degrees) &&
00654             (mod_words.get_dim1()==num_svms) &&
00655             (mod_words.get_dim2()==2) && 
00656             (sign_words.get_dim1()==num_svms) &&
00657             (string_words.get_dim1()==num_svms))
00658     {
00659         svm_arrays_clean=true ;
00660         return true ;
00661     }
00662     else
00663     {
00664         if ((num_unique_words.get_dim1()==num_degrees) &&
00665             (mod_words.get_dim1()==num_svms) &&
00666             (mod_words.get_dim2()==2) &&
00667             (sign_words.get_dim1()==num_svms) &&
00668             (string_words.get_dim1()==num_svms))
00669             fprintf(stderr, "OK\n") ;
00670         else
00671             fprintf(stderr, "not OK\n") ;
00672 
00673         if (!(word_degree.get_dim1()==num_degrees))
00674             SG_WARNING("SVM array: word_degree.get_dim1()!=num_degrees") ;
00675         if (!(cum_num_words.get_dim1()==num_degrees+1))
00676             SG_WARNING("SVM array: cum_num_words.get_dim1()!=num_degrees+1") ;
00677         if (!(num_words.get_dim1()==num_degrees))
00678             SG_WARNING("SVM array: num_words.get_dim1()==num_degrees") ;
00679         if (!(svm_pos_start.get_dim1()==num_degrees))
00680             SG_WARNING("SVM array: svm_pos_start.get_dim1()!=num_degrees") ;
00681         if (!(num_unique_words.get_dim1()==num_degrees))
00682             SG_WARNING("SVM array: num_unique_words.get_dim1()!=num_degrees") ;
00683         if (!(mod_words.get_dim1()==num_svms))
00684             SG_WARNING("SVM array: mod_words.get_dim1()!=num_svms") ;
00685         if (!(mod_words.get_dim2()==2))
00686             SG_WARNING("SVM array: mod_words.get_dim2()!=2") ;
00687         if (!(sign_words.get_dim1()==num_svms))
00688             SG_WARNING("SVM array: sign_words.get_dim1()!=num_svms") ;
00689         if (!(string_words.get_dim1()==num_svms))
00690             SG_WARNING("SVM array: string_words.get_dim1()!=num_svms") ;
00691 
00692         svm_arrays_clean=false ;
00693         return false ;  
00694     }
00695 }
00696 
00697 void CDynProg::best_path_set_seq(float64_t *seq, int32_t p_N, int32_t seq_len)
00698 {
00699     if (!svm_arrays_clean)
00700     {
00701         SG_ERROR( "SVM arrays not clean") ;
00702         return ;
00703     } ;
00704 
00705     ASSERT(p_N==N);
00706     ASSERT(initial_state_distribution_p.get_dim1()==N);
00707     ASSERT(end_state_distribution_q.get_dim1()==N);
00708     
00709     m_seq.set_array(seq, N, seq_len, 1, true, true) ;
00710     this->N=N ;
00711 
00712     m_call=3 ;
00713     m_step=2 ;
00714 }
00715 
00716 void CDynProg::best_path_set_seq3d(
00717     float64_t *seq, int32_t p_N, int32_t seq_len, int32_t max_num_signals)
00718 {
00719     if (!svm_arrays_clean)
00720     {
00721         SG_ERROR( "SVM arrays not clean") ;
00722         return ;
00723     } ;
00724 
00725     ASSERT(p_N==N);
00726     ASSERT(initial_state_distribution_p.get_dim1()==N);
00727     ASSERT(end_state_distribution_q.get_dim1()==N);
00728     
00729     m_seq.set_array(seq, N, seq_len, max_num_signals, true, true) ;
00730     this->N=N ;
00731 
00732     m_call=3 ;
00733     m_step=2 ;
00734 }
00735 
00736 void CDynProg::best_path_set_pos(int32_t *pos, int32_t seq_len)  
00737 {
00738     if (m_step!=2)
00739         SG_ERROR( "please call best_path_set_seq first\n") ;
00740     
00741     if (seq_len!=m_seq.get_dim2())
00742         SG_ERROR( "pos size does not match previous info %i!=%i\n", seq_len, m_seq.get_dim2()) ;
00743 
00744     m_pos.set_array(pos, seq_len, true, true) ;
00745 
00746     m_step=3 ;
00747 }
00748 
00749 void CDynProg::best_path_set_orf_info(int32_t *orf_info, int32_t m, int32_t n) 
00750 {
00751     //if (m_step!=3)
00752     //  SG_ERROR( "please call best_path_set_pos first\n") ;
00753         
00754     //if (m!=N)
00755     //  SG_ERROR( "orf_info size does not match previous info %i!=%i\n", m, N) ;
00756     if (n!=2)
00757         SG_ERROR( "orf_info size incorrect %i!=2\n", n) ;
00758     m_orf_info.set_array(orf_info, m, n, true, true) ;
00759 
00760     m_call=1 ;
00761     m_step=4 ;
00762 }
00763 
00764 void CDynProg::best_path_set_segment_sum_weights(
00765     float64_t *segment_sum_weights, int32_t num_states, int32_t seq_len)
00766 {
00767     if (m_step!=3)
00768         SG_ERROR( "please call best_path_set_pos first\n") ;
00769         
00770     if (num_states!=N)
00771         SG_ERROR( "segment_sum_weights size does not match previous info %i!=%i\n", num_states, N) ;
00772     if (seq_len!=m_pos.get_dim1())
00773         SG_ERROR( "segment_sum_weights size incorrect %i!=%i\n", seq_len, m_pos.get_dim1()) ;
00774 
00775     m_segment_sum_weights.set_array(segment_sum_weights, num_states, seq_len, true, true) ;
00776     
00777     m_call=2 ;
00778     m_step=4 ;
00779 }
00780 
00781 void CDynProg::best_path_set_plif_list(CDynamicArray<CPlifBase*>* plifs)
00782 {
00783     ASSERT(plifs);
00784     CPlifBase** plif_list=plifs->get_array();
00785     int32_t num_plif=plifs->get_num_elements();
00786 
00787     if (m_step!=4)
00788         SG_ERROR( "please call best_path_set_orf_info or best_path_segment_sum_weights first\n") ;
00789 
00790     m_plif_list.set_array(plif_list, num_plif, true, true) ;
00791 
00792     m_step=5 ;
00793 }
00794 
00795 void CDynProg::best_path_set_plif_id_matrix(
00796     int32_t *plif_id_matrix, int32_t m, int32_t n)
00797 {
00798     if (m_step!=5)
00799         SG_ERROR( "please call best_path_set_plif_list first\n") ;
00800 
00801     if ((m!=N) || (n!=N))
00802         SG_ERROR( "plif_id_matrix size does not match previous info %i!=%i or %i!=%i\n", m, N, n, N) ;
00803 
00804     CArray2<int32_t> id_matrix(plif_id_matrix, N, N, false, false) ;
00805 #ifdef DYNPROG_DEBUG
00806     id_matrix.set_name("id_matrix");
00807     id_matrix.display_array();
00808 #endif //DYNPROG_DEBUG
00809     m_PEN.resize_array(N, N) ;
00810     for (int32_t i=0; i<N; i++)
00811         for (int32_t j=0; j<N; j++)
00812             if (id_matrix.element(i,j)>=0)
00813                 m_PEN.element(i,j)=m_plif_list[id_matrix.element(i,j)] ;
00814             else
00815                 m_PEN.element(i,j)=NULL ;
00816 
00817     m_step=6 ;
00818 }
00819 
00820 void CDynProg::best_path_set_plif_state_signal_matrix(
00821     int32_t *plif_id_matrix, int32_t m, int32_t max_num_signals)
00822 {
00823     if (m_step!=6)
00824         SG_ERROR( "please call best_path_set_plif_id_matrix first\n") ;
00825     
00826     if (m!=N)
00827         SG_ERROR( "plif_state_signal_matrix size does not match previous info %i!=%i\n", m, N) ;
00828 
00829     if (m_seq.get_dim3() != max_num_signals)
00830         SG_ERROR( "size(plif_state_signal_matrix,2) does not match with size(m_seq,3): %i!=%i\nSorry, Soeren... interface changed\n", m_seq.get_dim3(), max_num_signals) ;
00831 
00832     CArray2<int32_t> id_matrix(plif_id_matrix, N, max_num_signals, false, false) ;
00833     m_PEN_state_signals.resize_array(N, max_num_signals) ;
00834     for (int32_t i=0; i<N; i++)
00835     {
00836         for (int32_t j=0; j<max_num_signals; j++)
00837         {
00838             if (id_matrix.element(i,j)>=0)
00839                 m_PEN_state_signals.element(i,j)=m_plif_list[id_matrix.element(i,j)] ;
00840             else
00841                 m_PEN_state_signals.element(i,j)=NULL ;
00842         }
00843     }
00844 
00845     m_step=6 ;
00846 }
00847 
00848 void CDynProg::best_path_set_genestr(
00849     char* genestr, int32_t genestr_len, int32_t genestr_num)
00850 {
00851     if (m_step!=6)
00852         SG_ERROR( "please call best_path_set_plif_id_matrix first\n") ;
00853 
00854     ASSERT(genestr);
00855     ASSERT(genestr_len>0);
00856     ASSERT(genestr_num>0);
00857 
00858     m_genestr.set_array(genestr, genestr_len, genestr_num, true, true) ;
00859 
00860     m_step=7 ;
00861 }
00862 
00863 void CDynProg::best_path_set_my_state_seq(
00864     int32_t* my_state_seq, int32_t seq_len)
00865 {
00866     ASSERT(my_state_seq && seq_len>0);
00867     m_my_state_seq.resize_array(seq_len);
00868     for (int32_t i=0; i<seq_len; i++)
00869         m_my_state_seq[i]=my_state_seq[i];
00870 }
00871 
00872 void CDynProg::best_path_set_my_pos_seq(int32_t* my_pos_seq, int32_t seq_len)
00873 {
00874     ASSERT(my_pos_seq && seq_len>0);
00875     m_my_pos_seq.resize_array(seq_len);
00876     for (int32_t i=0; i<seq_len; i++)
00877         m_my_pos_seq[i]=my_pos_seq[i];
00878 }
00879 
00880 void CDynProg::best_path_set_dict_weights(
00881     float64_t* dictionary_weights, int32_t dict_len, int32_t n)
00882 {
00883     if (m_step!=7)
00884         SG_ERROR( "please call best_path_set_genestr first\n") ;
00885 
00886     if (num_svms!=n)
00887         SG_ERROR( "dict_weights array does not match num_svms=%i!=%i\n", num_svms, n) ;
00888 
00889     m_dict_weights.set_array(dictionary_weights, dict_len, num_svms, true, true) ;
00890 
00891     // initialize, so it does not bother when not used
00892     m_segment_loss.resize_array(max_a_id+1, max_a_id+1, 2) ;
00893     m_segment_loss.zero() ;
00894     m_segment_ids.resize_array(m_seq.get_dim2()) ;
00895     m_segment_mask.resize_array(m_seq.get_dim2()) ;
00896     m_segment_ids.zero() ;
00897     m_segment_mask.zero() ;
00898 
00899     m_step=8 ;
00900 }
00901 
00902 void CDynProg::best_path_set_segment_loss(
00903     float64_t* segment_loss, int32_t m, int32_t n)
00904 {
00905     // here we need two matrices. Store it in one: 2N x N
00906     if (2*m!=n)
00907         SG_ERROR( "segment_loss should be 2 x quadratic matrix: %i!=%i\n", 2*m, n) ;
00908 
00909     if (m!=max_a_id+1)
00910         SG_ERROR( "segment_loss size should match max_a_id: %i!=%i\n", m, max_a_id+1) ;
00911 
00912     m_segment_loss.set_array(segment_loss, m, n/2, 2, true, true) ;
00913     /*for (int32_t i=0; i<n; i++)
00914         for (int32_t j=0; j<n; j++)
00915         SG_DEBUG( "loss(%i,%i)=%f\n", i,j, m_segment_loss.element(0,i,j)) ;*/
00916 }
00917 
00918 void CDynProg::best_path_set_segment_ids_mask(
00919     int32_t* segment_ids, float64_t* segment_mask, int32_t m)
00920 {
00921     int32_t max_id = 0;
00922     for (int32_t i=1;i<m;i++)
00923         max_id = CMath::max(max_id,segment_ids[i]);
00924     //SG_PRINT("max_id: %i, m:%i\n",max_id, m);     
00925     m_segment_ids.set_array(segment_ids, m, false, true) ;
00926     m_segment_ids.set_name("m_segment_ids");
00927     m_segment_mask.set_array(segment_mask, m, false, true) ;
00928     m_segment_mask.set_name("m_segment_mask");
00929 }
00930 
00931 
00932 void CDynProg::best_path_call(int32_t nbest, bool use_orf) 
00933 {
00934     if (m_step!=8)
00935         SG_ERROR( "please call best_path_set_dict_weights first\n") ;
00936     if (m_call!=1)
00937         SG_ERROR( "please call best_path_set_orf_info first\n") ;
00938     ASSERT(N==m_seq.get_dim1()) ;
00939     ASSERT(m_seq.get_dim2()==m_pos.get_dim1()) ;
00940 
00941     m_scores.resize_array(nbest) ;
00942     m_states.resize_array(nbest, m_seq.get_dim2()) ;
00943     m_positions.resize_array(nbest, m_seq.get_dim2()) ;
00944 
00945     m_call=1 ;
00946 
00947     ASSERT(nbest==1||nbest==2) ;
00948     ASSERT(m_genestr.get_dim2()==1) ;
00949 
00950     if (nbest==1)
00951         best_path_trans<1,false,false>(m_seq.get_array(), m_seq.get_dim2(), m_pos.get_array(), 
00952                 m_orf_info.get_array(), m_PEN.get_array(),
00953                 m_PEN_state_signals.get_array(), m_PEN_state_signals.get_dim2(),
00954                 m_genestr.get_dim2(),
00955                 m_scores.get_array(), m_states.get_array(), m_positions.get_array(),
00956                 use_orf) ;
00957     else
00958         best_path_trans<2,false,false>(m_seq.get_array(), m_seq.get_dim2(), m_pos.get_array(), 
00959                 m_orf_info.get_array(), m_PEN.get_array(),
00960                 m_PEN_state_signals.get_array(), m_PEN_state_signals.get_dim2(),
00961                 m_genestr.get_dim2(),
00962                 m_scores.get_array(), m_states.get_array(), m_positions.get_array(),
00963                 use_orf) ;
00964 
00965     m_step=9 ;
00966 }
00967 
00968 void CDynProg::best_path_deriv_call() 
00969 {
00970     //if (m_step!=8)
00971         //SG_ERROR( "please call best_path_set_dict_weights first\n") ;
00972     //if (m_call!=1)
00973         //SG_ERROR( "please call best_path_set_orf_info first\n") ;
00974     ASSERT(N==m_seq.get_dim1()) ;
00975     ASSERT(m_seq.get_dim2()==m_pos.get_dim1()) ;
00976 
00977     m_call=5 ; // or so ...
00978 
00979     m_my_scores.resize_array(m_my_state_seq.get_array_size()) ;
00980     m_my_losses.resize_array(m_my_state_seq.get_array_size()) ;
00981 
00982     best_path_trans_deriv(m_my_state_seq.get_array(), m_my_pos_seq.get_array(), 
00983                           m_my_scores.get_array(), m_my_losses.get_array(), m_my_state_seq.get_array_size(),
00984                           m_seq.get_array(), m_seq.get_dim2(), m_pos.get_array(), 
00985                           m_PEN.get_array(), m_PEN_state_signals.get_array(), m_PEN_state_signals.get_dim2(), m_genestr.get_dim2()) ;
00986 
00987     m_step=12 ;
00988 }
00989 
00990 
00991 void CDynProg::best_path_2struct_call(int32_t nbest) 
00992 {
00993     if (m_step!=8)
00994         SG_ERROR( "please call best_path_set_orf_dict_weights first\n") ;
00995     if (m_call!=2)
00996         SG_ERROR( "please call best_path_set_segment_sum_weights first\n") ;
00997     ASSERT(N==m_seq.get_dim1()) ;
00998     ASSERT(m_seq.get_dim2()==m_pos.get_dim1()) ;
00999     
01000     m_scores.resize_array(nbest) ;
01001     m_states.resize_array(nbest, m_seq.get_dim2()) ;
01002     m_positions.resize_array(nbest, m_seq.get_dim2()) ;
01003 
01004     m_call=2 ;
01005 
01006     best_path_2struct(m_seq.get_array(), m_seq.get_dim2(), m_pos.get_array(), 
01007                       m_PEN.get_array(), 
01008                       m_genestr.get_array(), m_genestr.get_dim1(),
01009                       nbest, 
01010                       m_scores.get_array(), m_states.get_array(), m_positions.get_array(),
01011                       m_dict_weights.get_array(), m_dict_weights.get_dim1()*m_dict_weights.get_dim2(), 
01012                       m_segment_sum_weights.get_array()) ;
01013 
01014     m_step=9 ;
01015 }
01016 
01017 void CDynProg::best_path_simple_call(int32_t nbest) 
01018 {
01019     if (m_step!=2)
01020         SG_ERROR( "please call best_path_set_seq first\n") ;
01021     if (m_call!=3)
01022         SG_ERROR( "please call best_path_set_seq first\n") ;
01023     ASSERT(N==m_seq.get_dim1()) ;
01024 
01025     m_scores.resize_array(nbest) ;
01026     m_states.resize_array(nbest, m_seq.get_dim2()) ;
01027 
01028     m_call=3 ;
01029 
01030     best_path_trans_simple(m_seq.get_array(), m_seq.get_dim2(), 
01031                            nbest, 
01032                            m_scores.get_array(), m_states.get_array()) ;
01033     
01034     m_step=9 ;
01035 }
01036 
01037 void CDynProg::best_path_deriv_call(int32_t nbest)
01038 {
01039     if (!svm_arrays_clean)
01040     {
01041         SG_ERROR( "SVM arrays not clean") ;
01042         return ;
01043     } ;
01044 
01045     //FIXME
01046 }
01047 
01048 
01049 void CDynProg::best_path_get_scores(float64_t **scores, int32_t *m) 
01050 {
01051     if (m_step!=9 && m_step!=12)
01052         SG_ERROR( "please call best_path*_call first\n") ;
01053 
01054     if (m_step==9)
01055     {
01056         *scores=m_scores.get_array() ;
01057         *m=m_scores.get_dim1() ;
01058     } else
01059     {
01060         *scores=m_my_scores.get_array() ;
01061         *m=m_my_scores.get_dim1() ;
01062     }
01063 
01064     m_step=10 ;
01065 }
01066 
01067 void CDynProg::best_path_get_states(int32_t **states, int32_t *m, int32_t *n) 
01068 {
01069     if (m_step!=10)
01070         SG_ERROR( "please call best_path_get_score first\n") ;
01071     
01072     *states=m_states.get_array() ;
01073     *m=m_states.get_dim1() ;
01074     *n=m_states.get_dim2() ;
01075 
01076     m_step=11 ;
01077 }
01078 
01079 void CDynProg::best_path_get_positions(
01080     int32_t **positions, int32_t *m, int32_t *n)
01081 {
01082     if (m_step!=11)
01083         SG_ERROR( "please call best_path_get_positions first\n") ;
01084     if (m_call==3)
01085         SG_ERROR( "no position information for best_path_simple\n") ;
01086     
01087     *positions=m_positions.get_array() ;
01088     *m=m_positions.get_dim1() ;
01089     *n=m_positions.get_dim2() ;
01090 }
01091 
01092 void CDynProg::best_path_get_losses(float64_t** losses, int32_t* seq_len)
01093 {
01094     ASSERT(losses && seq_len);
01095     *losses=m_my_losses.get_array();
01096     *seq_len=m_my_losses.get_dim1();
01097 }
01098 
01099 
01101 
01102 float64_t CDynProg::best_path_no_b(
01103     int32_t max_iter, int32_t &best_iter, int32_t *my_path)
01104 {
01105     CArray2<T_STATES> psi(max_iter, N) ;
01106     CArray<float64_t>* delta = new CArray<float64_t>(N) ;
01107     CArray<float64_t>* delta_new = new CArray<float64_t>(N) ;
01108     
01109     { // initialization
01110         for (int32_t i=0; i<N; i++)
01111         {
01112             delta->element(i) = get_p(i) ;
01113             psi.element(0, i)= 0 ;
01114         }
01115     } 
01116     
01117     float64_t best_iter_prob = CMath::ALMOST_NEG_INFTY ;
01118     best_iter = 0 ;
01119     
01120     // recursion
01121     for (int32_t t=1; t<max_iter; t++)
01122     {
01123         CArray<float64_t>* dummy;
01124         int32_t NN=N ;
01125         for (int32_t j=0; j<NN; j++)
01126         {
01127             float64_t maxj = delta->element(0) + transition_matrix_a.element(0,j);
01128             int32_t argmax=0;
01129             
01130             for (int32_t i=1; i<NN; i++)
01131             {
01132                 float64_t temp = delta->element(i) + transition_matrix_a.element(i,j);
01133                 
01134                 if (temp>maxj)
01135                 {
01136                     maxj=temp;
01137                     argmax=i;
01138                 }
01139             }
01140             delta_new->element(j)=maxj ;
01141             psi.element(t, j)=argmax ;
01142         }
01143         
01144         dummy=delta;
01145         delta=delta_new;
01146         delta_new=dummy;    //switch delta/delta_new
01147         
01148         { //termination
01149             float64_t maxj=delta->element(0)+get_q(0);
01150             int32_t argmax=0;
01151             
01152             for (int32_t i=1; i<N; i++)
01153             {
01154                 float64_t temp=delta->element(i)+get_q(i);
01155                 
01156                 if (temp>maxj)
01157                 {
01158                     maxj=temp;
01159                     argmax=i;
01160                 }
01161             }
01162             //pat_prob=maxj;
01163             
01164             if (maxj>best_iter_prob)
01165             {
01166                 my_path[t]=argmax;
01167                 best_iter=t ;
01168                 best_iter_prob = maxj ;
01169             } ;
01170         } ;
01171     }
01172 
01173     
01174     { //state sequence backtracking
01175         for (int32_t t = best_iter; t>0; t--)
01176         {
01177             my_path[t-1]=psi.element(t, my_path[t]);
01178         }
01179     }
01180 
01181     delete delta ;
01182     delete delta_new ;
01183     
01184     return best_iter_prob ;
01185 }
01186 
01187 void CDynProg::best_path_no_b_trans(
01188     int32_t max_iter, int32_t &max_best_iter, int16_t nbest,
01189     float64_t *prob_nbest, int32_t *my_paths)
01190 {
01191     //T_STATES *psi=new T_STATES[max_iter*N*nbest] ;
01192     CArray3<T_STATES> psi(max_iter, N, nbest) ;
01193     CArray3<int16_t> ktable(max_iter, N, nbest) ;
01194     CArray2<int16_t> ktable_ends(max_iter, nbest) ;
01195 
01196     CArray<float64_t> tempvv(nbest*N) ;
01197     CArray<int32_t> tempii(nbest*N) ;
01198 
01199     CArray2<T_STATES> path_ends(max_iter, nbest) ;
01200     CArray2<float64_t> *delta=new CArray2<float64_t>(N, nbest) ;
01201     CArray2<float64_t> *delta_new=new CArray2<float64_t>(N, nbest) ;
01202     CArray2<float64_t> delta_end(max_iter, nbest) ;
01203 
01204     CArray2<int32_t> paths(max_iter, nbest) ;
01205     paths.set_array(my_paths, max_iter, nbest, false, false) ;
01206 
01207     { // initialization
01208         for (T_STATES i=0; i<N; i++)
01209         {
01210             delta->element(i,0) = get_p(i) ;
01211             for (int16_t k=1; k<nbest; k++)
01212             {
01213                 delta->element(i,k)=-CMath::INFTY ;
01214                 ktable.element(0,i,k)=0 ;
01215             }
01216         }
01217     }
01218     
01219     // recursion
01220     for (int32_t t=1; t<max_iter; t++)
01221     {
01222         CArray2<float64_t>* dummy=NULL;
01223 
01224         for (T_STATES j=0; j<N; j++)
01225         {
01226             const T_STATES num_elem   = trans_list_forward_cnt[j] ;
01227             const T_STATES *elem_list = trans_list_forward[j] ;
01228             const float64_t *elem_val = trans_list_forward_val[j] ;
01229             
01230             int32_t list_len=0 ;
01231             for (int16_t diff=0; diff<nbest; diff++)
01232             {
01233                 for (int32_t i=0; i<num_elem; i++)
01234                 {
01235                     T_STATES ii = elem_list[i] ;
01236                     
01237                     tempvv.element(list_len) = -(delta->element(ii,diff) + elem_val[i]) ;
01238                     tempii.element(list_len) = diff*N + ii ;
01239                     list_len++ ;
01240                 }
01241             }
01242             CMath::qsort_index(tempvv.get_array(), tempii.get_array(), list_len) ;
01243             
01244             for (int16_t k=0; k<nbest; k++)
01245             {
01246                 if (k<list_len)
01247                 {
01248                     delta_new->element(j,k)  = -tempvv[k] ;
01249                     psi.element(t,j,k)      = (tempii[k]%N) ;
01250                     ktable.element(t,j,k)   = (tempii[k]-(tempii[k]%N))/N ;
01251                 }
01252                 else
01253                 {
01254                     delta_new->element(j,k)  = -CMath::INFTY ;
01255                     psi.element(t,j,k)      = 0 ;
01256                     ktable.element(t,j,k)   = 0 ;
01257                 }
01258             }
01259         }
01260         
01261         dummy=delta;
01262         delta=delta_new;
01263         delta_new=dummy;    //switch delta/delta_new
01264         
01265         { //termination
01266             int32_t list_len = 0 ;
01267             for (int16_t diff=0; diff<nbest; diff++)
01268             {
01269                 for (T_STATES i=0; i<N; i++)
01270                 {
01271                     tempvv.element(list_len) = -(delta->element(i,diff)+get_q(i));
01272                     tempii.element(list_len) = diff*N + i ;
01273                     list_len++ ;
01274                 }
01275             }
01276             CMath::qsort_index(tempvv.get_array(), tempii.get_array(), list_len) ;
01277             
01278             for (int16_t k=0; k<nbest; k++)
01279             {
01280                 delta_end.element(t-1,k) = -tempvv[k] ;
01281                 path_ends.element(t-1,k) = (tempii[k]%N) ;
01282                 ktable_ends.element(t-1,k) = (tempii[k]-(tempii[k]%N))/N ;
01283             }
01284         }
01285     }
01286     
01287     { //state sequence backtracking
01288         max_best_iter=0 ;
01289         
01290         CArray<float64_t> sort_delta_end(max_iter*nbest) ;
01291         CArray<int16_t> sort_k(max_iter*nbest) ;
01292         CArray<int32_t> sort_t(max_iter*nbest) ;
01293         CArray<int32_t> sort_idx(max_iter*nbest) ;
01294         
01295         int32_t i=0 ;
01296         for (int32_t iter=0; iter<max_iter-1; iter++)
01297             for (int16_t k=0; k<nbest; k++)
01298             {
01299                 sort_delta_end[i]=-delta_end.element(iter,k) ;
01300                 sort_k[i]=k ;
01301                 sort_t[i]=iter+1 ;
01302                 sort_idx[i]=i ;
01303                 i++ ;
01304             }
01305         
01306         CMath::qsort_index(sort_delta_end.get_array(), sort_idx.get_array(), (max_iter-1)*nbest) ;
01307 
01308         for (int16_t n=0; n<nbest; n++)
01309         {
01310             int16_t k=sort_k[sort_idx[n]] ;
01311             int32_t iter=sort_t[sort_idx[n]] ;
01312             prob_nbest[n]=-sort_delta_end[n] ;
01313 
01314             if (iter>max_best_iter)
01315                 max_best_iter=iter ;
01316             
01317             ASSERT(k<nbest) ;
01318             ASSERT(iter<max_iter) ;
01319             
01320             paths.element(iter,n) = path_ends.element(iter-1, k) ;
01321             int16_t q   = ktable_ends.element(iter-1, k) ;
01322             
01323             for (int32_t t = iter; t>0; t--)
01324             {
01325                 paths.element(t-1,n)=psi.element(t, paths.element(t,n), q);
01326                 q = ktable.element(t, paths.element(t,n), q) ;
01327             }
01328         }
01329     }
01330 
01331     delete delta ;
01332     delete delta_new ;
01333 }
01334 
01335 
01336 void CDynProg::translate_from_single_order(
01337     uint16_t* obs, int32_t sequence_length, int32_t start, int32_t order,
01338     int32_t max_val)
01339 {
01340     int32_t i,j;
01341     uint16_t value=0;
01342     
01343     for (i=sequence_length-1; i>=order-1; i--) //convert interval of size T
01344     {
01345         value=0;
01346         for (j=i; j>=i-order+1; j--)
01347             value= (value >> max_val) | (obs[j] << (max_val * (order-1)));
01348         
01349         obs[i]=value;
01350     }
01351     
01352     for (i=order-2;i>=0;i--)
01353     {
01354         value=0;
01355         for (j=i; j>=i-order+1; j--)
01356         {
01357             value= (value >> max_val);
01358             if (j>=0)
01359                 value|=obs[j] << (max_val * (order-1));
01360         }
01361         obs[i]=value;
01362         //ASSERT(value<num_words) ;
01363     }
01364     if (start>0)
01365         for (i=start; i<sequence_length; i++)   
01366             obs[i-start]=obs[i];
01367 }
01368 
01369 void CDynProg::reset_svm_value(
01370     int32_t pos, int32_t & last_svm_pos, float64_t * svm_value)
01371 {
01372     for (int32_t i=0; i<num_words_single; i++)
01373         word_used_single[i]=false ;
01374     for (int32_t s=0; s<num_svms; s++)
01375         svm_value_unnormalized_single[s] = 0 ;
01376     for (int32_t s=0; s<num_svms; s++)
01377         svm_value[s] = 0 ;
01378     last_svm_pos = pos - 6+1 ;
01379     num_unique_words_single=0 ;
01380 }
01381 
01382 void CDynProg::extend_svm_value(
01383     uint16_t* wordstr, int32_t pos, int32_t &last_svm_pos, float64_t* svm_value)
01384 {
01385     bool did_something = false ;
01386     for (int32_t i=last_svm_pos-1; (i>=pos) && (i>=0); i--)
01387     {
01388         if (wordstr[i]>=num_words_single)
01389             SG_DEBUG( "wordstr[%i]=%i\n", i, wordstr[i]) ;
01390         
01391         if (!word_used_single[wordstr[i]])
01392         {
01393             for (int32_t s=0; s<num_svms_single; s++)
01394                 svm_value_unnormalized_single[s]+=dict_weights.element(wordstr[i],s) ;
01395             
01396             word_used_single[wordstr[i]]=true ;
01397             num_unique_words_single++ ;
01398             did_something=true ;
01399         }
01400     } ;
01401     if (num_unique_words_single>0)
01402     {
01403         last_svm_pos=pos ;
01404         if (did_something)
01405             for (int32_t s=0; s<num_svms; s++)
01406                 svm_value[s]= svm_value_unnormalized_single[s]/sqrt((float64_t)num_unique_words_single) ;  // full normalization
01407     }
01408     else
01409     {
01410         // what should I do?
01411         for (int32_t s=0; s<num_svms; s++)
01412             svm_value[s]=0 ;
01413     }
01414     
01415 }
01416 
01417 
01418 void CDynProg::reset_segment_sum_value(
01419     int32_t num_states, int32_t pos, int32_t & last_segment_sum_pos,
01420     float64_t * segment_sum_value)
01421 {
01422     for (int32_t s=0; s<num_states; s++)
01423         segment_sum_value[s] = 0 ;
01424     last_segment_sum_pos = pos ;
01425     //SG_DEBUG( "start: %i\n", pos) ;
01426 }
01427 
01428 void CDynProg::extend_segment_sum_value(
01429     float64_t *segment_sum_weights, int32_t seqlen, int32_t num_states,
01430     int32_t pos, int32_t &last_segment_sum_pos, float64_t* segment_sum_value)
01431 {
01432     for (int32_t i=last_segment_sum_pos-1; (i>=pos) && (i>=0); i--)
01433     {
01434         for (int32_t s=0; s<num_states; s++)
01435             segment_sum_value[s] += segment_sum_weights[i*num_states+s] ;
01436     } ;
01437     //SG_DEBUG( "extend %i: %f\n", pos, segment_sum_value[0]) ;
01438     last_segment_sum_pos = pos ;
01439 }
01440 
01441 
01442 void CDynProg::best_path_2struct(
01443     const float64_t *seq_array, int32_t seq_len, const int32_t *pos,
01444     CPlifBase **Plif_matrix, const char *genestr, int32_t genestr_len,
01445     int16_t nbest, float64_t *prob_nbest, int32_t *my_state_seq,
01446     int32_t *my_pos_seq, float64_t *dictionary_weights, int32_t dict_len,
01447     float64_t *segment_sum_weights)
01448 {
01449     const int32_t default_look_back = 100 ;
01450     int32_t max_look_back = default_look_back ;
01451     bool use_svm = false ;
01452     ASSERT(dict_len==num_svms*num_words_single) ;
01453     dict_weights.set_array(dictionary_weights, dict_len, num_svms, false, false) ;
01454     dict_weights_array=dict_weights.get_array() ;
01455 
01456     CArray2<CPlifBase*> PEN(Plif_matrix, N, N, false) ;
01457     CArray2<float64_t> seq((float64_t *)seq_array, N, seq_len, false) ;
01458     
01459     float64_t svm_value[num_svms] ;
01460     float64_t segment_sum_value[N] ;
01461     
01462     { // initialize svm_svalue
01463         for (int32_t s=0; s<num_svms; s++)
01464             svm_value[s]=0 ;
01465     }
01466     
01467     { // determine maximal length of look-back
01468         for (int32_t i=0; i<N; i++)
01469             for (int32_t j=0; j<N; j++)
01470             {
01471                 CPlifBase *penij=PEN.element(i,j) ;
01472                 if (penij==NULL)
01473                     continue ;
01474                 if (penij->get_max_value()>max_look_back)
01475                     max_look_back=(int32_t) CMath::ceil(penij->get_max_value());
01476                 if (penij->uses_svm_values())
01477                     use_svm=true ;
01478             }
01479     }
01480     max_look_back = CMath::min(genestr_len, max_look_back) ;
01481     //SG_DEBUG("use_svm=%i\n", use_svm) ;
01482     //SG_DEBUG("max_look_back=%i\n", max_look_back) ;
01483     
01484     const int32_t look_back_buflen = (max_look_back+1)*nbest*N ;
01485     //SG_DEBUG("look_back_buflen=%i\n", look_back_buflen) ;
01486     const float64_t mem_use = (float64_t)(seq_len*N*nbest*(sizeof(T_STATES)+sizeof(int16_t)+sizeof(int32_t))+
01487                                 look_back_buflen*(2*sizeof(float64_t)+sizeof(int32_t))+
01488                                 seq_len*(sizeof(T_STATES)+sizeof(int32_t))+
01489                                 genestr_len*sizeof(bool))/(1024*1024)
01490          ;
01491     bool is_big = (mem_use>200) || (seq_len>5000) ;
01492     
01493     if (is_big)
01494     {
01495         SG_DEBUG("calling best_path_2struct: seq_len=%i, N=%i, lookback=%i nbest=%i\n", 
01496                      seq_len, N, max_look_back, nbest) ;
01497         SG_DEBUG("allocating %1.2fMB of memory\n", 
01498                      mem_use) ;
01499     }
01500     ASSERT(nbest<32000) ;
01501         
01502     CArray3<float64_t> delta(max_look_back+1, N, nbest) ;
01503     CArray3<T_STATES> psi(seq_len,N,nbest) ;
01504     CArray3<int16_t> ktable(seq_len,N,nbest) ;
01505     CArray3<int32_t> ptable(seq_len,N,nbest) ;
01506 
01507     CArray<float64_t> delta_end(nbest) ;
01508     CArray<T_STATES> path_ends(nbest) ;
01509     CArray<int16_t> ktable_end(nbest) ;
01510 
01511     CArray<float64_t> tempvv(look_back_buflen) ;
01512     CArray<int32_t> tempii(look_back_buflen) ;
01513 
01514     CArray<T_STATES> state_seq(seq_len) ;
01515     CArray<int32_t> pos_seq(seq_len) ;
01516 
01517     // translate to words, if svm is used
01518     uint16_t* wordstr=NULL ;
01519     if (use_svm)
01520     {
01521         ASSERT(dictionary_weights!=NULL) ;
01522         wordstr=new uint16_t[genestr_len] ;
01523         for (int32_t i=0; i<genestr_len; i++)
01524             switch (genestr[i])
01525             {
01526             case 'A':
01527             case 'a': wordstr[i]=0 ; break ;
01528             case 'C':
01529             case 'c': wordstr[i]=1 ; break ;
01530             case 'G':
01531             case 'g': wordstr[i]=2 ; break ;
01532             case 'T':
01533             case 't': wordstr[i]=3 ; break ;
01534             default: ASSERT(0) ;
01535             }
01536         translate_from_single_order(wordstr, genestr_len, word_degree_single-1, word_degree_single) ;
01537     }
01538     
01539     
01540     { // initialization
01541         for (T_STATES i=0; i<N; i++)
01542         {
01543             delta.element(0,i,0) = get_p(i) + seq.element(i,0) ;
01544             psi.element(0,i,0)   = 0 ;
01545             ktable.element(0,i,0)  = 0 ;
01546             ptable.element(0,i,0)  = 0 ;
01547             for (int16_t k=1; k<nbest; k++)
01548             {
01549                 delta.element(0,i,k)    = -CMath::INFTY ;
01550                 psi.element(0,i,0)      = 0 ;
01551                 ktable.element(0,i,k)     = 0 ;
01552                 ptable.element(0,i,k)     = 0 ;
01553             }
01554         }
01555     }
01556 
01557     // recursion
01558     for (int32_t t=1; t<seq_len; t++)
01559     {
01560         if (is_big && t%(seq_len/10000)==1)
01561             SG_PROGRESS(t, 0, seq_len);
01562         //fprintf(stderr, "%i\n", t) ;
01563         
01564         for (T_STATES j=0; j<N; j++)
01565         {
01566             if (seq.element(j,t)<-1e20)
01567             { // if we cannot observe the symbol here, then we can omit the rest
01568                 for (int16_t k=0; k<nbest; k++)
01569                 {
01570                     delta.element(t%max_look_back,j,k)    = seq.element(j,t) ;
01571                     psi.element(t,j,k)      = 0 ;
01572                     ktable.element(t,j,k)     = 0 ;
01573                     ptable.element(t,j,k)     = 0 ;
01574                 }
01575             }
01576             else
01577             {
01578                 const T_STATES num_elem   = trans_list_forward_cnt[j] ;
01579                 const T_STATES *elem_list = trans_list_forward[j] ;
01580                 const float64_t *elem_val      = trans_list_forward_val[j] ;
01581                 
01582                 int32_t list_len=0 ;
01583                 for (int32_t i=0; i<num_elem; i++)
01584                 {
01585                     T_STATES ii = elem_list[i] ;
01586                     //SG_DEBUG( "i=%i  ii=%i  num_elem=%i  PEN=%ld\n", i, ii, num_elem, PEN(j,ii)) ;
01587                     
01588                     const CPlifBase * penalty = PEN.element(j,ii) ;
01589                     int32_t look_back = default_look_back ;
01590                     if (penalty!=NULL)
01591                         look_back=(int32_t) (CMath::ceil(penalty->get_max_value()));
01592                     
01593                     int32_t last_svm_pos ;
01594                     if (use_svm)
01595                         reset_svm_value(pos[t], last_svm_pos, svm_value) ;
01596 
01597                     int32_t last_segment_sum_pos ;
01598                     reset_segment_sum_value(N, pos[t], last_segment_sum_pos, segment_sum_value) ;
01599 
01600                     for (int32_t ts=t-1; ts>=0 && pos[t]-pos[ts]<=look_back; ts--)
01601                     {
01602                         if (use_svm)
01603                             extend_svm_value(wordstr, pos[ts], last_svm_pos, svm_value) ;
01604 
01605                         extend_segment_sum_value(segment_sum_weights, seq_len, N, pos[ts], last_segment_sum_pos, segment_sum_value) ;
01606                         
01607                         float64_t pen_val = 0.0 ;
01608                         if (penalty)
01609                             pen_val=penalty->lookup_penalty(pos[t]-pos[ts], svm_value) + segment_sum_value[j] ;
01610                         for (int16_t diff=0; diff<nbest; diff++)
01611                         {
01612                             float64_t  val        = delta.element(ts%max_look_back,ii,diff) + elem_val[i] ;
01613                             val             += pen_val ;
01614                             
01615                             tempvv[list_len] = -val ;
01616                             tempii[list_len] =  ii + diff*N + ts*N*nbest;
01617                             //SG_DEBUG( "%i (%i,%i,%i, %i, %i) ", list_len, diff, ts, i, pos[t]-pos[ts], look_back) ;
01618                             list_len++ ;
01619                         }
01620                     }
01621                 }
01622                 CMath::nmin<int32_t>(tempvv.get_array(), tempii.get_array(), list_len, nbest) ;
01623                 
01624                 for (int16_t k=0; k<nbest; k++)
01625                 {
01626                     if (k<list_len)
01627                     {
01628                         delta.element(t%max_look_back,j,k)    = -tempvv[k] + seq.element(j,t);
01629                         psi.element(t,j,k)      = (tempii[k]%N) ;
01630                         ktable.element(t,j,k)     = (tempii[k]%(N*nbest)-psi.element(t,j,k))/N ;
01631                         ptable.element(t,j,k)     = (tempii[k]-(tempii[k]%(N*nbest)))/(N*nbest) ;
01632                     }
01633                     else
01634                     {
01635                         delta.element(t%max_look_back,j,k)    = -CMath::INFTY ;
01636                         psi.element(t,j,k)      = 0 ;
01637                         ktable.element(t,j,k)     = 0 ;
01638                         ptable.element(t,j,k)     = 0 ;
01639                     }
01640                 }
01641             }
01642         }
01643     }
01644     
01645     { //termination
01646         int32_t list_len = 0 ;
01647         for (int16_t diff=0; diff<nbest; diff++)
01648         {
01649             for (T_STATES i=0; i<N; i++)
01650             {
01651                 tempvv[list_len] = -(delta.element((seq_len-1)%max_look_back,i,diff)+get_q(i)) ;
01652                 tempii[list_len] = i + diff*N ;
01653                 list_len++ ;
01654             }
01655         }
01656         
01657         CMath::nmin(tempvv.get_array(), tempii.get_array(), list_len, nbest) ;
01658         
01659         for (int16_t k=0; k<nbest; k++)
01660         {
01661             delta_end.element(k) = -tempvv[k] ;
01662             path_ends.element(k) = (tempii[k]%N) ;
01663             ktable_end.element(k) = (tempii[k]-path_ends.element(k))/N ;
01664         }
01665     }
01666     
01667     { //state sequence backtracking     
01668         for (int16_t k=0; k<nbest; k++)
01669         {
01670             prob_nbest[k]= delta_end.element(k) ;
01671             
01672             int32_t i         = 0 ;
01673             state_seq[i]  = path_ends.element(k) ;
01674             int16_t q   = ktable_end.element(k) ;
01675             pos_seq[i]    = seq_len-1 ;
01676 
01677             while (pos_seq[i]>0)
01678             {
01679                 //SG_DEBUG("s=%i p=%i q=%i\n", state_seq[i], pos_seq[i], q) ;
01680                 state_seq[i+1] = psi.element(pos_seq[i], state_seq[i], q);
01681                 pos_seq[i+1]   = ptable.element(pos_seq[i], state_seq[i], q) ;
01682                 q              = ktable.element(pos_seq[i], state_seq[i], q) ;
01683                 i++ ;
01684             }
01685             //SG_DEBUG("s=%i p=%i q=%i\n", state_seq[i], pos_seq[i], q) ;
01686             int32_t num_states = i+1 ;
01687             for (i=0; i<num_states;i++)
01688             {
01689                 my_state_seq[i+k*(seq_len+1)] = state_seq[num_states-i-1] ;
01690                 my_pos_seq[i+k*(seq_len+1)]   = pos_seq[num_states-i-1] ;
01691             }
01692             my_state_seq[num_states+k*(seq_len+1)]=-1 ;
01693             my_pos_seq[num_states+k*(seq_len+1)]=-1 ;
01694         }
01695     }
01696     if (is_big)
01697         SG_PRINT( "DONE.     \n") ;
01698 }
01699 
01700 /*void CDynProg::reset_svm_values(int32_t pos, int32_t * last_svm_pos, float64_t * svm_value) 
01701 {
01702     for (int32_t j=0; j<num_degrees; j++)
01703     {
01704         for (int32_t i=0; i<num_words_array[j]; i++)
01705             word_used.element(word_used_array, j, i, num_degrees)=false ;
01706         for (int32_t s=0; s<num_svms; s++)
01707             svm_values_unnormalized.element(j,s) = 0 ;
01708         num_unique_words[j]=0 ;
01709         last_svm_pos[j] = pos - word_degree[j]+1 ;
01710         svm_pos_start[j] = pos - word_degree[j] ;
01711     }
01712     for (int32_t s=0; s<num_svms; s++)
01713         svm_value[s] = 0 ;
01714 }
01715 
01716 void CDynProg::extend_svm_values(uint16_t** wordstr, int32_t pos, int32_t *last_svm_pos, float64_t* svm_value) 
01717 {
01718     bool did_something = false ;
01719     for (int32_t j=0; j<num_degrees; j++)
01720     {
01721         for (int32_t i=last_svm_pos[j]-1; (i>=pos) && (i>=0); i--)
01722         {
01723             if (wordstr[j][i]>=num_words_array[j])
01724                 SG_DEBUG( "wordstr[%i]=%i\n", i, wordstr[j][i]) ;
01725 
01726             ASSERT(wordstr[j][i]<num_words_array[j]) ;
01727             if (!word_used.element(word_used_array, j, wordstr[j][i], num_degrees))
01728             {
01729                 for (int32_t s=0; s<num_svms; s++)
01730                     svm_values_unnormalized.element(j,s)+=dict_weights_array[wordstr[j][i]+cum_num_words_array[j]+s*cum_num_words_array[num_degrees]] ;
01731                     //svm_values_unnormalized.element(j,s)+=dict_weights.element(wordstr[j][i]+cum_num_words_array[j],s) ;
01732                 
01733                 //word_used.element(j,wordstr[j][i])=true ;
01734                 word_used.element(word_used_array, j, wordstr[j][i], num_degrees)=true ;
01735                 num_unique_words[j]++ ;
01736                 did_something=true ;
01737             } ;
01738         } ;
01739         if (num_unique_words[j]>0)
01740             last_svm_pos[j]=pos ;
01741     } ;
01742     
01743     if (did_something)
01744         for (int32_t s=0; s<num_svms; s++)
01745         {
01746             svm_value[s]=0.0 ;
01747             for (int32_t j=0; j<num_degrees; j++)
01748                 if (num_unique_words[j]>0)
01749                     svm_value[s]+= svm_values_unnormalized.element(j,s)/sqrt((float64_t)num_unique_words[j]) ;  // full normalization
01750         }
01751 }
01752 */
01753 
01754 void CDynProg::init_segment_loss(
01755     struct segment_loss_struct & loss, int32_t seqlen, int32_t howmuchlookback)
01756 {
01757 #ifdef DYNPROG_TIMING
01758     MyTime.start() ;
01759 #endif
01760     int32_t clear_size = CMath::min(howmuchlookback,seqlen) ;
01761     
01762     if (!loss.num_segment_id)
01763     {
01764         loss.segments_changed       = new int32_t[seqlen] ;
01765         loss.num_segment_id         = new float64_t[(max_a_id+1)*seqlen] ;
01766         loss.length_segment_id      = new int32_t[(max_a_id+1)*seqlen] ;
01767 
01768         clear_size = seqlen ;
01769     }
01770     
01771     for (int32_t j=0; j<clear_size; j++)
01772     {
01773         loss.segments_changed[j]=0 ;
01774         for (int32_t i=0; i<max_a_id+1; i++)       
01775         {
01776             loss.num_segment_id[i*seqlen+j] = 0;
01777             loss.length_segment_id[i*seqlen+j] = 0;
01778         }
01779     }
01780 
01781     loss.maxlookback = howmuchlookback ;
01782     loss.seqlen = seqlen;
01783 
01784 #ifdef DYNPROG_TIMING
01785     MyTime.stop() ;
01786     segment_init_time += MyTime.time_diff_sec() ;
01787 #endif
01788 }
01789 
01790 void CDynProg::clear_segment_loss(struct segment_loss_struct & loss) 
01791 {
01792 #ifdef DYNPROG_TIMING
01793     MyTime.start() ;
01794 #endif
01795     
01796     if (loss.num_segment_id != NULL)
01797     {
01798         delete[] loss.segments_changed ;
01799         delete[] loss.num_segment_id ;
01800         delete[] loss.length_segment_id ;
01801         loss.segments_changed = NULL ;
01802         loss.num_segment_id = NULL ;
01803         loss.length_segment_id = NULL ;
01804     }
01805 #ifdef DYNPROG_TIMING
01806     MyTime.stop() ;
01807     segment_clean_time += MyTime.time_diff_sec() ;
01808 #endif
01809 }
01810 
01811 float64_t CDynProg::extend_segment_loss(
01812     struct segment_loss_struct & loss, const int32_t * pos_array,
01813     int32_t segment_id, int32_t pos, int32_t & last_pos, float64_t &last_value)
01814 {
01815 #ifdef DYNPROG_TIMING
01816     MyTime.start() ;
01817 #endif
01818     
01819     if (pos==last_pos)
01820         return last_value ;
01821     ASSERT(pos<last_pos) ;
01822 
01823     last_pos-- ;
01824     bool changed = false ;
01825     while (last_pos>=pos)
01826     {
01827         if (loss.segments_changed[last_pos])
01828         {
01829             changed=true ;
01830             break ;
01831         }
01832         last_pos-- ;
01833     }
01834     if (last_pos<pos)
01835         last_pos = pos ;
01836     
01837     if (!changed)
01838     {
01839         ASSERT(last_pos>=0) ;
01840         ASSERT(last_pos<loss.seqlen) ;
01841         float64_t length_contrib = (pos_array[last_pos]-pos_array[pos])*m_segment_loss.element(m_segment_ids.element(pos), segment_id, 1) ;
01842         float64_t ret = last_value + length_contrib ;
01843         last_pos = pos ;
01844         return ret ;
01845     }
01846 
01847     CArray2<float64_t> num_segment_id(loss.num_segment_id, loss.seqlen, max_a_id+1, false, false) ;
01848     CArray2<int32_t> length_segment_id(loss.length_segment_id, loss.seqlen, max_a_id+1, false, false) ;
01849     float64_t ret = 0.0 ;
01850     for (int32_t i=0; i<max_a_id+1; i++)
01851     {
01852         //SG_DEBUG( "%i: %i, %i, %f (%f), %f (%f)\n", pos, num_segment_id.element(pos, i), length_segment_id.element(pos, i), num_segment_id.element(pos, i)*m_segment_loss.element(i, segment_id,0), m_segment_loss.element(i, segment_id, 0), length_segment_id.element(pos, i)*m_segment_loss.element(i, segment_id, 1), m_segment_loss.element(i, segment_id,1)) ;
01853 
01854         if (num_segment_id.element(pos, i)!=0)
01855         {
01856             ret += num_segment_id.element(pos, i)*m_segment_loss.element(i, segment_id, 0) ;
01857         //  SG_PRINT("ret:%f pos:%i i:%i segment_id:%i \n",ret,pos,i,segment_id);
01858         //  if (ret>0)
01859         //  {
01860         //      for (int32_t g=0; g<max_a_id+1; g++)
01861         //          SG_PRINT("g:%i sid(pos, g):%i    ",g,num_segment_id.element(pos, g));
01862         //      SG_PRINT("\n");
01863         //  }
01864         }
01865         if (length_segment_id.element(pos, i)!=0)
01866             ret += length_segment_id.element(pos, i)*m_segment_loss.element(i, segment_id, 1) ;
01867     }
01868     last_pos = pos ;
01869     last_value = ret ;
01870 
01871 #ifdef DYNPROG_TIMING
01872     MyTime.stop() ;
01873     segment_extend_time += MyTime.time_diff_sec() ;
01874 #endif
01875     return ret ;
01876 }
01877 
01878 void CDynProg::find_segment_loss_till_pos(
01879     const int32_t * pos, int32_t t_end, CArray<int32_t>& segment_ids,
01880     CArray<float64_t>& segment_mask, struct segment_loss_struct & loss)
01881 {
01882 #ifdef DYNPROG_TIMING
01883     MyTime.start() ;
01884 #endif
01885     CArray2<float64_t> num_segment_id(loss.num_segment_id, loss.seqlen, max_a_id+1, false, false) ;
01886     CArray2<int32_t> length_segment_id(loss.length_segment_id, loss.seqlen, max_a_id+1, false, false) ;
01887     
01888     for (int32_t i=0; i<max_a_id+1; i++)
01889     {
01890         num_segment_id.element(t_end, i) = 0 ;
01891         length_segment_id.element(t_end, i) = 0 ;
01892     }
01893     int32_t wobble_pos_segment_id_switch = 0 ;
01894     int32_t last_segment_id = -1 ;
01895     int32_t ts = t_end-1 ;       
01896     while ((ts>=0) && (pos[t_end] - pos[ts] <= loss.maxlookback))
01897     {
01898         int32_t cur_segment_id = segment_ids.element(ts) ;
01899         // allow at most one wobble
01900         bool wobble_pos = (CMath::abs(segment_mask.element(ts))<1e-7) && (wobble_pos_segment_id_switch==0) ;
01901         if (!(cur_segment_id<=max_a_id))
01902             SG_ERROR("(cur_segment_id<=max_a_id), cur_segment_id:%i max_a_id:%i\n",cur_segment_id,max_a_id);
01903         ASSERT(cur_segment_id>=0) ;
01904         
01905         for (int32_t i=0; i<max_a_id+1; i++)
01906         {
01907             num_segment_id.element(ts, i) = num_segment_id.element(ts+1, i) ;
01908             length_segment_id.element(ts, i) = length_segment_id.element(ts+1, i) ;
01909         }
01910         
01911         if (cur_segment_id!=last_segment_id)
01912         {
01913             if (wobble_pos)
01914             {
01915                 //SG_DEBUG( "no change at %i: %i, %i\n", ts, last_segment_id, cur_segment_id) ;
01916                 wobble_pos_segment_id_switch++ ;
01917                 //ASSERT(wobble_pos_segment_id_switch<=1) ;
01918             }
01919             else
01920             {
01921                 //SG_DEBUG( "change at %i: %i, %i\n", ts, last_segment_id, cur_segment_id) ;
01922                 loss.segments_changed[ts] = true ;
01923                 num_segment_id.element(ts, cur_segment_id) += segment_mask.element(ts);
01924                 length_segment_id.element(ts, cur_segment_id) += (int32_t)((pos[ts+1]-pos[ts])*segment_mask.element(ts));
01925                 wobble_pos_segment_id_switch = 0 ;
01926             }
01927             last_segment_id = cur_segment_id ;
01928         } 
01929         else
01930             if (!wobble_pos)
01931                 length_segment_id.element(ts, cur_segment_id) += pos[ts+1] - pos[ts] ;
01932 
01933         ts--;
01934     }
01935 #ifdef DYNPROG_TIMING
01936     MyTime.stop() ;
01937     segment_pos_time += MyTime.time_diff_sec() ;
01938 #endif
01939 }
01940 
01941 void CDynProg::init_svm_values(
01942     struct svm_values_struct & svs, int32_t start_pos, int32_t seqlen,
01943     int32_t maxlookback)
01944 {
01945 #ifdef DYNPROG_TIMING
01946     MyTime.start() ;
01947 #endif
01948     /*
01949       See find_svm_values_till_pos for comments
01950       
01951       svs.svm_values[i+s*svs.seqlen] has the value of the s-th SVM on genestr(pos(t_end-i):pos(t_end)) 
01952       for every i satisfying pos(t_end)-pos(t_end-i) <= svs.maxlookback
01953       
01954       where t_end is the end of all segments we are currently looking at
01955     */
01956     int32_t clear_size = CMath::min(maxlookback,seqlen) ;
01957     
01958     if (!svs.svm_values)
01959     {
01960         svs.svm_values              = new float64_t[seqlen*num_svms] ;
01961         svs.num_unique_words        = new int32_t*[num_degrees] ;
01962         svs.svm_values_unnormalized = new float64_t*[num_degrees] ;
01963         svs.word_used               = new bool**[num_degrees] ;
01964         for (int32_t j=0; j<num_degrees; j++)
01965         {
01966             svs.word_used[j]               = new bool*[num_svms] ;
01967             for (int32_t s=0; s<num_svms; s++)
01968                 svs.word_used[j][s]               = new bool[num_words_array[j]] ;
01969         }
01970         for (int32_t j=0; j<num_degrees; j++)
01971         {
01972             svs.svm_values_unnormalized[j] = new float64_t[num_svms] ;
01973             svs.num_unique_words[j]        = new int32_t[num_svms] ;
01974         }
01975         svs.start_pos               = new int32_t[num_svms] ;
01976         clear_size = seqlen ;
01977     }
01978     
01979     //for (int32_t i=0; i<maxlookback*num_svms; i++)       // initializing this for safety, though we should be able to live without it
01980     //  svs.svm_values[i] = 0;
01981     memset(svs.svm_values, 0, clear_size*num_svms*sizeof(float64_t)) ;
01982 
01983     for (int32_t j=0; j<num_degrees; j++)
01984     {       
01985         //for (int32_t s=0; s<num_svms; s++)
01986         //  svs.svm_values_unnormalized[j][s] = 0 ;
01987         memset(svs.svm_values_unnormalized[j], 0, num_svms*sizeof(float64_t)) ;
01988 
01989         //for (int32_t s=0; s<num_svms; s++)
01990         //  svs.num_unique_words[j][s] = 0 ;
01991         memset(svs.num_unique_words[j], 0, num_svms*sizeof(int32_t)) ;
01992     }
01993     
01994     for (int32_t j=0; j<num_degrees; j++)
01995         for (int32_t s=0; s<num_svms; s++)
01996         {
01997             //for (int32_t i=0; i<num_words_array[j]; i++)
01998             //  svs.word_used[j][s][i] = false ;
01999             memset(svs.word_used[j][s], 0, num_words_array[j]*sizeof(bool)) ;
02000         }
02001     
02002     for (int32_t s=0; s<num_svms; s++)
02003         svs.start_pos[s] = start_pos - mod_words.element(s,1) ;
02004     
02005     svs.maxlookback = maxlookback ;
02006     svs.seqlen = seqlen ;
02007 
02008 #ifdef DYNPROG_TIMING
02009     MyTime.stop() ;
02010     svm_init_time += MyTime.time_diff_sec() ;
02011 #endif
02012 }
02013 
02014 void CDynProg::clear_svm_values(struct svm_values_struct & svs) 
02015 {
02016 #ifdef DYNPROG_TIMING
02017     MyTime.start() ;
02018 #endif  
02019     if (NULL != svs.svm_values)
02020     {
02021         for (int32_t j=0; j<num_degrees; j++)
02022         {
02023             for (int32_t s=0; s<num_svms; s++)
02024                 delete[] svs.word_used[j][s] ;
02025             delete[] svs.word_used[j];
02026         }
02027         delete[] svs.word_used;
02028         
02029         for (int32_t j=0; j<num_degrees; j++)
02030             delete[] svs.svm_values_unnormalized[j] ;
02031         for (int32_t j=0; j<num_degrees; j++)
02032             delete[] svs.num_unique_words[j] ;
02033         
02034         delete[] svs.svm_values_unnormalized;
02035         delete[] svs.svm_values;
02036         delete[] svs.num_unique_words ;
02037         
02038         svs.word_used=NULL ;
02039         svs.svm_values=NULL ;
02040         svs.svm_values_unnormalized=NULL ;
02041     }
02042 
02043 #ifdef DYNPROG_TIMING
02044     MyTime.stop() ;
02045     svm_clean_time += MyTime.time_diff_sec() ;
02046 #endif
02047 }
02048 
02049 
02050 void CDynProg::find_svm_values_till_pos(
02051     uint16_t*** wordstr,  const int32_t *pos,  int32_t t_end,
02052     struct svm_values_struct &svs)
02053 {
02054 #ifdef DYNPROG_TIMING
02055     MyTime.start() ;
02056 #endif
02057     
02058     /*
02059       wordstr is a vector of L n-gram indices, with wordstr(i) representing a number betweeen 0 and 4095 
02060       corresponding to the 6-mer in genestr(i-5:i) 
02061       pos is a vector of candidate transition positions (it is input to best_path_trans)
02062       t_end is some index in pos
02063       
02064       svs has been initialized by init_svm_values
02065       
02066       At the end of this procedure, 
02067       svs.svm_values[i+s*svs.seqlen] has the value of the s-th SVM on genestr(pos(t_end-i):pos(t_end)) 
02068       for every i satisfying pos(t_end)-pos(t_end-i) <= svs.maxlookback
02069       
02070       The SVM weights are precomputed in dict_weights
02071     */
02072     
02073     for (int32_t j=0; j<num_degrees; j++)
02074     //for (int32_t j=0; j<1; j++)
02075     {
02076         int32_t plen = 1;
02077         int32_t ts = t_end-1;        // index in pos; pos(ts) and pos(t) are indices of wordstr
02078         int32_t offset;
02079         
02080         int32_t posprev = pos[t_end]-word_degree[j]+1;
02081         int32_t poscurrent = pos[ts];
02082         
02083         //SG_DEBUG( "j=%i seqlen=%i posprev = %i, poscurrent = %i", j, svs.seqlen, posprev, poscurrent) ;
02084 
02085         if (poscurrent<0)
02086             poscurrent = 0;
02087         float64_t * my_svm_values_unnormalized = svs.svm_values_unnormalized[j] ;
02088         int32_t * my_num_unique_words = svs.num_unique_words[j] ;
02089         bool ** my_word_used = svs.word_used[j] ;
02090         
02091         int32_t len = pos[t_end] - poscurrent;
02092         while ((ts>=0) && (len <= svs.maxlookback))
02093         {
02094             for (int32_t i=posprev-1 ; (i>=poscurrent) && (i>=0) ; i--)
02095             {
02096                 //fprintf(stderr, "string_words_array[0]=%i (%ld), j=%i (%ld)  i=%i\n", string_words_array[0], wordstr[string_words_array[0]], j, wordstr[string_words_array[0]][j], i) ;
02097                 
02098                 uint16_t word = wordstr[string_words_array[0]][j][i] ;
02099                 int32_t last_string = string_words_array[0] ;
02100                 for (int32_t s=0; s<num_svms; s++)
02101                 {
02102                 //sign_words_array[s]=false;
02103                     // try to avoid memory accesses
02104                     if (last_string != string_words_array[s])
02105                     {
02106                         last_string = string_words_array[s] ;
02107                         word = wordstr[last_string][j][i] ;
02108                     }
02109 
02110                     // do not consider k-mer, if seen before and in signum mode
02111                     if (sign_words_array[s] && my_word_used[s][word])
02112                         continue ;
02113                     
02114                     // only count k-mer if in frame (if applicable)
02115                     //if ((svs.start_pos[s]-i>0) && ((svs.start_pos[s]-i)%mod_words_array[s]==0))
02116                     {
02117                         my_svm_values_unnormalized[s] += dict_weights_array[(word+cum_num_words_array[j])+s*cum_num_words_array[num_degrees]] ;
02118                         //svs.svm_values_unnormalized[j][s]+=dict_weights.element(word+cum_num_words_array[j], s) ;
02119                         my_num_unique_words[s]++ ;
02120                         if (sign_words_array[s])
02121                             my_word_used[s][word]=true ;
02122                     }
02123                 }
02124             }
02125             offset = plen*num_svms ;
02126             for (int32_t s=0; s<num_svms; s++)
02127             {
02128                 float64_t normalization_factor = 1.0;
02129                 if (my_num_unique_words[s] > 0)
02130                 {
02131                     if (sign_words_array[s])
02132                         normalization_factor = sqrt((float64_t)my_num_unique_words[s]);
02133                     else
02134                         normalization_factor = (float64_t)my_num_unique_words[s];
02135                 }
02136 
02137                 if (j==0)
02138                     svs.svm_values[offset+s]=0 ;
02139                 svs.svm_values[offset+s] += my_svm_values_unnormalized[s] / normalization_factor;
02140             }
02141             
02142             if (posprev > poscurrent)         // remember posprev initially set to pos[t_end]-word_degree+1... pos[ts] could be e.g. pos[t_end]-2
02143                 posprev = poscurrent;           
02144             
02145             ts--;
02146             plen++;
02147             
02148             if (ts>=0)
02149             {
02150                 poscurrent=pos[ts];
02151                 if (poscurrent<0)
02152                     poscurrent = 0;
02153                 len = pos[t_end] - poscurrent;
02154             }
02155         }
02156     }
02157 
02158 #ifdef DYNPROG_TIMING
02159     MyTime.stop() ;
02160     svm_pos_time += MyTime.time_diff_sec() ;
02161 #endif
02162 }
02163 
02164 
02165 void CDynProg::find_svm_values_till_pos(
02166     uint16_t** wordstr,  const int32_t *pos,  int32_t t_end,
02167     struct svm_values_struct &svs)
02168 {
02169 #ifdef DYNPROG_TIMING
02170     MyTime.start() ;
02171 #endif  
02172     /*
02173       wordstr is a vector of L n-gram indices, with wordstr(i) representing a number betweeen 0 and 4095 
02174       corresponding to the 6-mer in genestr(i-5:i) 
02175       pos is a vector of candidate transition positions (it is input to best_path_trans)
02176       t_end is some index in pos
02177       
02178       svs has been initialized by init_svm_values
02179       
02180       At the end of this procedure, 
02181       svs.svm_values[i+s*svs.seqlen] has the value of the s-th SVM on genestr(pos(t_end-i):pos(t_end)) 
02182       for every i satisfying pos(t_end)-pos(t_end-i) <= svs.maxlookback
02183       
02184       The SVM weights are precomputed in dict_weights
02185     */
02186     
02187     for (int32_t j=0; j<num_degrees; j++)
02188     //for (int32_t j=0; j<1; j++)
02189     {
02190         int32_t plen = 1;
02191         int32_t ts = t_end-1;        // index in pos; pos(ts) and pos(t) are indices of wordstr
02192         int32_t offset;
02193         
02194         int32_t posprev = pos[t_end]-word_degree[j]+1;
02195         int32_t poscurrent = pos[ts];
02196         
02197         //SG_DEBUG( "j=%i seqlen=%i posprev = %i, poscurrent = %i", j, svs.seqlen, posprev, poscurrent) ;
02198 
02199         if (poscurrent<0)
02200             poscurrent = 0;
02201         float64_t * my_svm_values_unnormalized = svs.svm_values_unnormalized[j] ;
02202         int32_t * my_num_unique_words = svs.num_unique_words[j] ;
02203         bool ** my_word_used = svs.word_used[j] ;
02204         
02205         int32_t len = pos[t_end] - poscurrent;
02206         while ((ts>=0) && (len <= svs.maxlookback))
02207         {
02208             for (int32_t i=posprev-1 ; (i>=poscurrent) && (i>=0) ; i--)
02209             {
02210                 //fprintf(stderr, "string_words_array[0]=%i (%ld), j=%i (%ld)  i=%i\n", string_words_array[0], wordstr[string_words_array[0]], j, wordstr[string_words_array[0]][j], i) ;
02211                 
02212                 uint16_t word = wordstr[j][i] ;
02213                 for (int32_t s=0; s<num_svms; s++)
02214                 {
02215                     //sign_words_array[s]=false;
02216                     // do not consider k-mer, if seen before and in signum mode
02217                     if (sign_words_array[s] && my_word_used[s][word])
02218                         continue ;
02219                     
02220                     // only count k-mer if in frame (if applicable)
02221                     //if ((svs.start_pos[s]-i>0) && ((svs.start_pos[s]-i)%mod_words_array[s]==0))
02222                     {
02223                         my_svm_values_unnormalized[s] += dict_weights_array[(word+cum_num_words_array[j])+s*cum_num_words_array[num_degrees]] ;
02224                         //svs.svm_values_unnormalized[j][s]+=dict_weights.element(word+cum_num_words_array[j], s) ;
02225                         my_num_unique_words[s]++ ;
02226                         if (sign_words_array[s])
02227                             my_word_used[s][word]=true ;
02228                     }
02229                 }
02230             }
02231             offset = plen*num_svms ;
02232             for (int32_t s=0; s<num_svms; s++)
02233             {
02234                 float64_t normalization_factor = 1.0;
02235                 if (my_num_unique_words[s] > 0)
02236                 {
02237                     if (sign_words_array[s])
02238                         normalization_factor = sqrt((float64_t)my_num_unique_words[s]);
02239                     else
02240                         normalization_factor = (float64_t)my_num_unique_words[s];
02241                 }
02242 
02243                 if (j==0)
02244                     svs.svm_values[offset+s]=0 ;
02245                 svs.svm_values[offset+s] += my_svm_values_unnormalized[s] / normalization_factor;
02246             }
02247             
02248             if (posprev > poscurrent)         // remember posprev initially set to pos[t_end]-word_degree+1... pos[ts] could be e.g. pos[t_end]-2
02249                 posprev = poscurrent;           
02250             
02251             ts--;
02252             plen++;
02253             
02254             if (ts>=0)
02255             {
02256                 poscurrent=pos[ts];
02257                 if (poscurrent<0)
02258                     poscurrent = 0;
02259                 len = pos[t_end] - poscurrent;
02260             }
02261         }
02262     }
02263 
02264 #ifdef DYNPROG_TIMING
02265     MyTime.stop() ;
02266     svm_pos_time += MyTime.time_diff_sec() ;
02267 #endif
02268 }
02269 
02270 bool CDynProg::extend_orf(
02271     int32_t orf_from, int32_t orf_to, int32_t start, int32_t &last_pos,
02272     int32_t to)
02273 {
02274 #ifdef DYNPROG_TIMING
02275     MyTime.start() ;
02276 #endif
02277     
02278     if (start<0) 
02279         start=0 ;
02280     if (to<0)
02281         to=0 ;
02282     
02283     int32_t orf_target = orf_to-orf_from ;
02284     if (orf_target<0) orf_target+=3 ;
02285     
02286     int32_t pos ;
02287     if (last_pos==to)
02288         pos = to-orf_to-3 ;
02289     else
02290         pos=last_pos ;
02291 
02292     if (pos<0)
02293         return true ;
02294     
02295     for (; pos>=start; pos-=3)
02296         if (m_genestr_stop[pos])
02297             return false ;
02298     
02299     last_pos = CMath::min(pos+3,to-orf_to-3) ;
02300 
02301 #ifdef DYNPROG_TIMING
02302     MyTime.stop() ;
02303     orf_time += MyTime.time_diff_sec() ;
02304 #endif
02305     return true ;
02306 }
02307 
02308 template <int16_t nbest, bool with_loss, bool with_multiple_sequences>
02309 void CDynProg::best_path_trans(
02310     const float64_t *seq_array, int32_t seq_len, const int32_t *pos,
02311     const int32_t *orf_info_array, CPlifBase **Plif_matrix,
02312     CPlifBase **Plif_state_signals, int32_t max_num_signals,
02313     int32_t genestr_num, float64_t *prob_nbest, int32_t *my_state_seq,
02314     int32_t *my_pos_seq, bool use_orf)
02315 {
02316 #ifdef DYNPROG_TIMING
02317     segment_init_time = 0.0 ;
02318     segment_pos_time = 0.0 ;
02319     segment_extend_time = 0.0 ;
02320     segment_clean_time = 0.0 ;
02321     orf_time = 0.0 ;
02322     svm_init_time = 0.0 ;
02323     svm_pos_time = 0.0 ;
02324     svm_clean_time = 0.0 ;
02325     MyTime2.start() ;
02326 #endif
02327     
02328     //SG_PRINT( "best_path_trans:%x\n", seq_array);
02329     if (!svm_arrays_clean)
02330     {
02331         SG_ERROR( "SVM arrays not clean") ;
02332         return ;
02333     }
02334     
02335 #ifdef DYNPROG_DEBUG
02336     transition_matrix_a.set_name("transition_matrix");
02337     transition_matrix_a.display_array();
02338     mod_words.display_array() ;
02339     sign_words.display_array() ;
02340     string_words.display_array() ;
02341     fprintf(stderr, "use_orf = %i\n", use_orf) ;
02342 #endif
02343     
02344     int32_t max_look_back = 20000 ;
02345     bool use_svm = false ;
02346     //ASSERT(dict_len==num_svms*cum_num_words_array[num_degrees]) ;
02347     //dict_weights.set_array(dictionary_weights, cum_num_words_array[num_degrees], num_svms, false, false) ;
02348     //dict_weights_array=dict_weights.get_array() ;
02349     
02350     SG_PRINT("N:%i, seq_len:%i, max_num_signals:%i\n",N, seq_len, max_num_signals) ;
02351 
02352 //  for (int32_t i=0;i<N*seq_len*max_num_signals;i++)
02353 //      SG_PRINT("(%i)%0.2f ",i,seq_array[i]);
02354 
02355     //CArray2<CPlifBase*> PEN(Plif_matrix, N, N, false, false) ;
02356     CArray2<CPlifBase*> PEN(Plif_matrix, N, N, false, true) ;
02357     PEN.set_name("PEN");
02358     //CArray2<CPlifBase*> PEN_state_signals(Plif_state_signals, N, max_num_signals, false, false) ;
02359     CArray2<CPlifBase*> PEN_state_signals(Plif_state_signals, N, max_num_signals, false, true) ;
02360     PEN_state_signals.set_name("state_signals");
02361     //CArray3<float64_t> seq_input(seq_array, N, seq_len, max_num_signals) ;
02362     CArray3<float64_t> seq_input(seq_array, N, seq_len, max_num_signals) ;
02363     seq_input.set_name("seq_input") ;
02364     //seq_input.display_array() ;
02365     CArray2<float64_t> seq(N, seq_len) ;
02366     seq.set_name("seq") ;
02367     seq.zero() ;
02368 
02369     CArray2<int32_t> orf_info(orf_info_array, N, 2) ;
02370     orf_info.set_name("orf_info") ;
02371     //g_orf_info = orf_info ;
02372     //orf_info.display_array() ;
02373 
02374     float64_t svm_value[2*num_svms] ;
02375     { // initialize svm_svalue
02376         for (int32_t s=0; s<2*num_svms; s++)
02377             svm_value[s]=0 ;
02378     }
02379 
02380     { // convert seq_input to seq
02381       // this is independent of the svm values 
02382         for (int32_t i=0; i<N; i++)
02383             for (int32_t j=0; j<seq_len; j++)
02384                 seq.element(i,j) = 0 ;
02385 
02386         for (int32_t i=0; i<N; i++)
02387             for (int32_t j=0; j<seq_len; j++)
02388                 for (int32_t k=0; k<max_num_signals; k++)
02389                 {
02390                     if ((PEN_state_signals.element(i,k)==NULL) && (k==0))
02391                     {
02392                         // no plif
02393                         seq.element(i,j) = seq_input.element(i,j,k) ;
02394                         break ;
02395                     }
02396                     if (PEN_state_signals.element(i,k)!=NULL)
02397                     {
02398                         // just one plif
02399                         if (finite(seq_input.element(i,j,k)))
02400                             seq.element(i,j) += PEN_state_signals.element(i,k)->lookup_penalty(seq_input.element(i,j,k), svm_value) ;
02401                         else
02402                             // keep infinity values
02403                             seq.element(i,j) = seq_input.element(i, j, k) ;
02404                     } 
02405                     else
02406                         break ;
02407                 }
02408     }
02409 
02410 
02411     { // determine maximal length of look-back
02412         for (int32_t i=0; i<N; i++)
02413         {
02414             // only consider transitions that are actually allowed
02415             const T_STATES num_elem   = trans_list_forward_cnt[i] ;
02416             const T_STATES *elem_list = trans_list_forward[i] ;
02417                 
02418             for (int32_t jj=0; jj<num_elem; jj++)
02419             {
02420                 T_STATES j = elem_list[jj] ;
02421 
02422                 CPlifBase *penij=PEN.element(i,j) ;
02423                 if (penij==NULL)
02424                     continue ;
02425                 if (penij->get_max_value()>max_look_back)
02426                 {
02427                     SG_DEBUG( "%d %d -> value: %f\n", i,j,penij->get_max_value());
02428                     //penij->print() ;
02429                     max_look_back=(int32_t) (CMath::ceil(penij->get_max_value()));
02430                 }
02431                 if (penij->uses_svm_values())
02432                     use_svm=true ;
02433             }
02434         }
02435     }
02436 
02437     //SG_PRINT("use_svm=%i, genestr_len: \n", use_svm, m_genestr_len) ;
02438     max_look_back = CMath::min(m_genestr_len, max_look_back) ;
02439     SG_DEBUG("use_svm=%i\n", use_svm) ;
02440     
02441     SG_DEBUG("maxlook: %d N: %d nbest: %d \n", max_look_back, N, nbest);
02442     const int32_t look_back_buflen = (max_look_back*N+1)*nbest ;
02443     SG_DEBUG("look_back_buflen=%i\n", look_back_buflen) ;
02444     const float64_t mem_use = (float64_t)(seq_len*N*nbest*(sizeof(T_STATES)+sizeof(int16_t)+sizeof(int32_t))+
02445                                   look_back_buflen*(2*sizeof(float64_t)+sizeof(int32_t))+
02446                                   seq_len*(sizeof(T_STATES)+sizeof(int32_t))+
02447                                   m_genestr_len*sizeof(bool))/(1024*1024);
02448 
02449     bool is_big = (mem_use>200) || (seq_len>5000) ;
02450 
02451     if (1)//(is_big)
02452     {
02453         SG_DEBUG("calling best_path_trans: seq_len=%i, N=%i, lookback=%i nbest=%i\n", 
02454                      seq_len, N, max_look_back, nbest) ;
02455         SG_DEBUG("allocating %1.2fMB of memory\n", 
02456                      mem_use) ;
02457     }
02458     ASSERT(nbest<32000) ;
02459     
02460 
02461     
02462     CArray3<float64_t> delta(seq_len, N, nbest) ;
02463     delta.set_name("delta");
02464     float64_t* delta_array = delta.get_array() ;
02465     //delta.zero() ;
02466     
02467     CArray3<T_STATES> psi(seq_len, N, nbest) ;
02468     psi.set_name("psi");
02469     //psi.zero() ;
02470     
02471     CArray3<int16_t> ktable(seq_len, N, nbest) ;
02472     ktable.set_name("ktable");
02473     //ktable.zero() ;
02474     
02475     CArray3<int32_t> ptable(seq_len, N, nbest) ;    
02476     ptable.set_name("ptable");
02477     //ptable.zero() ;
02478 
02479     CArray<float64_t> delta_end(nbest) ;
02480     delta_end.set_name("delta_end");
02481     //delta_end.zero() ;
02482     
02483     CArray<T_STATES> path_ends(nbest) ;
02484     path_ends.set_name("path_ends");
02485     //path_ends.zero() ;
02486     
02487     CArray<int16_t> ktable_end(nbest) ;
02488     ktable_end.set_name("ktable_end");
02489     //ktable_end.zero() ;
02490 
02491     float64_t * fixedtempvv=new float64_t[look_back_buflen] ;
02492     memset(fixedtempvv, 0, look_back_buflen*sizeof(float64_t)) ;
02493     int32_t * fixedtempii=new int32_t[look_back_buflen] ;
02494     memset(fixedtempii, 0, look_back_buflen*sizeof(int32_t)) ;
02495 
02496     CArray<float64_t> oldtempvv(look_back_buflen) ;
02497     oldtempvv.set_name("oldtempvv");
02498     CArray<float64_t> oldtempvv2(look_back_buflen) ;
02499     oldtempvv2.set_name("oldtempvv2");
02500     //oldtempvv.zero() ;
02501     //oldtempvv.display_size() ;
02502     
02503     CArray<int32_t> oldtempii(look_back_buflen) ;
02504     oldtempii.set_name("oldtempii");
02505     CArray<int32_t> oldtempii2(look_back_buflen) ;
02506     oldtempii2.set_name("oldtempii2");
02507     //oldtempii.zero() ;
02508 
02509     CArray<T_STATES> state_seq(seq_len) ;
02510     state_seq.set_name("state_seq");
02511     //state_seq.zero() ;
02512     
02513     CArray<int32_t> pos_seq(seq_len) ;
02514     pos_seq.set_name("pos_seq");
02515     //pos_seq.zero() ;
02516 
02517     
02518     //dict_weights.set_name("dict_weights") ;
02519     word_degree.set_name("word_degree") ;
02520     cum_num_words.set_name("cum_num_words") ;
02521     num_words.set_name("num_words") ;
02522     //word_used.set_name("word_used") ;
02523     //svm_values_unnormalized.set_name("svm_values_unnormalized") ;
02524     svm_pos_start.set_name("svm_pos_start") ;
02525     num_unique_words.set_name("num_unique_words") ;
02526 
02527     PEN.set_name("PEN") ;
02528     seq.set_name("seq") ;
02529     orf_info.set_name("orf_info") ;
02530     
02531     delta.set_name("delta") ;
02532     psi.set_name("psi") ;
02533     ktable.set_name("ktable") ;
02534     ptable.set_name("ptable") ;
02535     delta_end.set_name("delta_end") ;
02536     path_ends.set_name("path_ends") ;
02537     ktable_end.set_name("ktable_end") ;
02538 
02539 #ifdef USE_TMP_ARRAYCLASS
02540     fixedtempvv.set_name("fixedtempvv") ;
02541     fixedtempii.set_name("fixedtempvv") ;
02542 #endif
02543 
02544     oldtempvv.set_name("oldtempvv") ;
02545     oldtempvv2.set_name("oldtempvv2") ;
02546     oldtempii.set_name("oldtempii") ;
02547     oldtempii2.set_name("oldtempii2") ;
02548 
02549 
02551 
02552 #ifdef DYNPROG_DEBUG
02553     state_seq.display_size() ;
02554     pos_seq.display_size() ;
02555 
02556     dict_weights.display_size() ;
02557     word_degree.display_array() ;
02558     cum_num_words.display_array() ;
02559     num_words.display_array() ;
02560     //word_used.display_size() ;
02561     //svm_values_unnormalized.display_size() ;
02562     svm_pos_start.display_array() ;
02563     num_unique_words.display_array() ;
02564 
02565     PEN.display_size() ;
02566     PEN_state_signals.display_size() ;
02567     seq.display_size() ;
02568     orf_info.display_size() ;
02569     
02570     //m_genestr_stop.display_size() ;
02571     delta.display_size() ;
02572     psi.display_size() ;
02573     ktable.display_size() ;
02574     ptable.display_size() ;
02575     delta_end.display_size() ;
02576     path_ends.display_size() ;
02577     ktable_end.display_size() ;
02578 
02579 #ifdef USE_TMP_ARRAYCLASS
02580     fixedtempvv.display_size() ;
02581     fixedtempii.display_size() ;
02582 #endif
02583 
02584     //oldtempvv.display_size() ;
02585     //oldtempii.display_size() ;
02586 
02587     state_seq.display_size() ;
02588     pos_seq.display_size() ;
02589 
02590     CArray<int32_t>pp = CArray<int32_t>(pos, seq_len) ;
02591     pp.set_name("pp");
02592     pp.display_array() ;
02593     
02594     //seq.zero() ;
02595     //seq_input.display_array() ;
02596 
02597 #endif //DYNPROG_DEBUG
02598 
02600 
02601 
02602 
02603     {
02604         for (int32_t s=0; s<num_svms; s++)
02605             ASSERT(string_words_array[s]<genestr_num)  ;
02606     }
02607 
02608     
02609         //CArray2<int32_t*> trans_matrix_svms(N,N);
02610     //CArray2<int32_t> trans_matrix_num_svms(N,N);
02611 
02612     { // initialization
02613 
02614         for (T_STATES i=0; i<N; i++)
02615         {
02616             //delta.element(0, i, 0) = get_p(i) + seq.element(i,0) ;        // get_p defined in HMM.h to be equiv to initial_state_distribution
02617             delta.element(delta_array, 0, i, 0, seq_len, N) = get_p(i) + seq.element(i,0) ;        // get_p defined in HMM.h to be equiv to initial_state_distribution
02618             psi.element(0,i,0)   = 0 ;
02619             if (nbest>1)
02620                 ktable.element(0,i,0)  = 0 ;
02621             ptable.element(0,i,0)  = 0 ;
02622             for (int16_t k=1; k<nbest; k++)
02623             {
02624                 int32_t dim1, dim2, dim3 ;
02625                 delta.get_array_size(dim1, dim2, dim3) ;
02626                 //SG_DEBUG("i=%i, k=%i -- %i, %i, %i\n", i, k, dim1, dim2, dim3) ;
02627                 //delta.element(0, i, k)    = -CMath::INFTY ;
02628                 delta.element(delta_array, 0, i, k, seq_len, N)    = -CMath::INFTY ;
02629                 psi.element(0,i,0)      = 0 ;                  // <--- what's this for?
02630                 if (nbest>1)
02631                     ktable.element(0,i,k)     = 0 ;
02632                 ptable.element(0,i,k)     = 0 ;
02633             }
02634 /*
02635             for (T_STATES j=0; j<N; j++)
02636             {
02637                 CPlifBase * penalty = PEN.element(i,j) ;
02638                 int32_t num_current_svms=0;
02639                 int32_t svm_ids[] = {-8, -7, -6, -5, -4, -3, -2, -1};
02640                 if (penalty)
02641                 {
02642                     SG_PRINT("trans %i -> %i \n",i,j);
02643                     penalty->get_used_svms(&num_current_svms, svm_ids);
02644                     trans_matrix_svms.set_element(svm_ids,i,j);
02645                     for (int32_t l=0;l<num_current_svms;l++)
02646                         SG_PRINT("svm_ids[%i]: %i \n",l,svm_ids[l]);
02647                     trans_matrix_num_svms.set_element(num_current_svms,i,j);
02648                 }
02649             }
02650 */
02651 
02652         }
02653     }
02654 
02655     struct svm_values_struct svs;
02656     svs.num_unique_words = NULL;
02657     svs.svm_values = NULL;
02658     svs.svm_values_unnormalized = NULL;
02659     svs.word_used = NULL;
02660 
02661     struct segment_loss_struct loss;
02662     loss.segments_changed = NULL;
02663     loss.num_segment_id = NULL;
02664 
02665     SG_DEBUG("START_RECURSION \n\n");
02666 
02667     // recursion
02668     for (int32_t t=1; t<seq_len; t++)
02669     {
02670         if (is_big && t%(1+(seq_len/1000))==1)
02671             SG_PROGRESS(t, 0, seq_len);
02672         //fprintf(stderr, "%i\n", t) ;
02673 
02674         //init_svm_values(svs, pos[t], seq_len, max_look_back) ;
02675         //if (with_multiple_sequences)
02676             //find_svm_values_till_pos(wordstr, pos, t, svs);  
02677         //else
02678             //find_svm_values_till_pos(wordstr[0], pos, t, svs);  
02679 
02680         if (with_loss)
02681         {
02682             init_segment_loss(loss, seq_len, max_look_back);
02683             find_segment_loss_till_pos(pos, t, m_segment_ids, m_segment_mask, loss);  
02684         }
02685         
02686         for (T_STATES j=0; j<N; j++)
02687         {
02688             if (seq.element(j,t)<=-1e20)
02689             { // if we cannot observe the symbol here, then we can omit the rest
02690                 for (int16_t k=0; k<nbest; k++)
02691                 {
02692                     delta.element(delta_array, t, j, k, seq_len, N)    = seq.element(j,t) ;
02693                     psi.element(t,j,k)      = 0 ;
02694                     if (nbest>1)
02695                         ktable.element(t,j,k)     = 0 ;
02696                     ptable.element(t,j,k)     = 0 ;
02697                 }
02698             }
02699             else
02700             {
02701                 const T_STATES num_elem   = trans_list_forward_cnt[j] ;
02702                 const T_STATES *elem_list = trans_list_forward[j] ;
02703                 const float64_t *elem_val      = trans_list_forward_val[j] ;
02704                 const int32_t *elem_id      = trans_list_forward_id[j] ;
02705                 
02706                 int32_t fixed_list_len = 0 ;
02707                 float64_t fixedtempvv_ = CMath::INFTY ;
02708                 int32_t fixedtempii_ = 0 ;
02709                 
02710             
02711                 for (int32_t i=0; i<num_elem; i++)
02712                 {
02713                     T_STATES ii = elem_list[i] ;
02714                     
02715                     const CPlifBase * penalty = PEN.element(j,ii) ;
02716                     int32_t look_back = max_look_back ;
02717                     { // find lookback length
02718                         CPlifBase *pen = (CPlifBase*) penalty ;
02719                         if (pen!=NULL)
02720                             look_back=(int32_t) (CMath::ceil(pen->get_max_value()));
02721                         if (look_back>=1e6)
02722                             fprintf(stderr, "%i,%i -> %d from %ld\n", j, ii, look_back, (long)pen) ;
02723                         ASSERT(look_back<1e6);
02724                     }
02725                     //int32_t num_current_svms = trans_matrix_num_svms.element(j,ii);
02726                     //int32_t* svm_ids = trans_matrix_svms.element(j,ii);
02727                     //int32_t* svm_ids[num_current_svms];
02728                     //for (int32_t id=0;id<num_current_svms;id++)
02729                     //  svm_ids[id]=*(p_svm_ids+id);
02730                 
02731                     int32_t orf_from = orf_info.element(ii,0) ;
02732                     int32_t orf_to   = orf_info.element(j,1) ;
02733                     if((orf_from!=-1)!=(orf_to!=-1))
02734                         SG_DEBUG("j=%i  ii=%i  orf_from=%i orf_to=%i p=%1.2f\n", j, ii, orf_from, orf_to, elem_val[i]) ;
02735                     ASSERT((orf_from!=-1)==(orf_to!=-1)) ;
02736                     
02737                     int32_t orf_target = -1 ;
02738                     if (orf_from!=-1)
02739                     {
02740                         orf_target=orf_to-orf_from ;
02741                         if (orf_target<0) 
02742                             orf_target+=3 ;
02743                         ASSERT(orf_target>=0 && orf_target<3) ;
02744                     }
02745                     
02746                     int32_t orf_last_pos = pos[t] ;
02747                     int32_t loss_last_pos = t ;
02748                     float64_t last_loss = 0.0 ;
02749 
02750                     for (int32_t ts=t-1; ts>=0 && pos[t]-pos[ts]<=look_back; ts--)
02751                     {
02752                         bool ok ;
02753                         //int32_t plen=t-ts;
02754 
02755                         /*for (int32_t s=0; s<num_svms; s++)
02756                             if ((fabs(svs.svm_values[s*svs.seqlen+plen]-svs2.svm_values[s*svs.seqlen+plen])>1e-6) ||
02757                                 (fabs(svs.svm_values[s*svs.seqlen+plen]-svs3.svm_values[s*svs.seqlen+plen])>1e-6))
02758                             {
02759                                 SG_DEBUG( "s=%i, t=%i, ts=%i, %1.5e, %1.5e, %1.5e\n", s, t, ts, svs.svm_values[s*svs.seqlen+plen], svs2.svm_values[s*svs.seqlen+plen], svs3.svm_values[s*svs.seqlen+plen]);
02760                                 }*/
02761                         
02762                         if (orf_target==-1)
02763                             ok=true ;
02764                         else if (pos[ts]!=-1 && (pos[t]-pos[ts])%3==orf_target)
02765                             ok=(!use_orf) || extend_orf(orf_from, orf_to, pos[ts], orf_last_pos, pos[t]) ;
02766                         else
02767                             ok=false ;
02768                         
02769                         if (ok)
02770                         {
02771                             
02772                             float64_t segment_loss = 0.0 ;
02773                             if (with_loss)
02774                                 segment_loss = extend_segment_loss(loss, pos, elem_id[i], ts, loss_last_pos, last_loss) ;
02775 
02777                             // BEST_PATH_TRANS
02779                             int32_t frame = orf_info.element(ii,0);
02780                             lookup_content_svm_values(ts, t, pos[ts], pos[t], svm_value, frame);
02781                             if (m_use_tiling)
02782                                 lookup_tiling_plif_values(ts, t, pos[t]-pos[ts], svm_value);
02783 
02784                             //int32_t offset = plen*num_svms ;
02785                             //for (int32_t ss=0; ss<num_svms; ss++)
02786                             //{
02787                             //  //svm_value[ss]=svs.svm_values[offset+ss];
02788                             //  svm_value[ss]=new_svm_value[ss];
02789                             //  //if (CMath::abs(new_svm_value[ss]-svm_value[ss])>1e-5)
02790                             //  //  SG_PRINT("ts: %i t: %i  precomp: %f old: %f diff: %f \n",ts, t,new_svm_value[ss],svm_value[ss], CMath::abs(new_svm_value[ss]-svm_value[ss]));
02791                             //}
02792 
02793                             float64_t pen_val = 0.0 ;
02794                             if (penalty)
02795                                 pen_val = penalty->lookup_penalty(pos[t]-pos[ts], svm_value) ;
02796                             if (nbest==1)
02797                             {
02798                                 float64_t  val        = elem_val[i] + pen_val ;
02799                                 if (with_loss)
02800                                     val              += segment_loss ;
02801                                 
02802                                 float64_t mval = -(val + delta.element(delta_array, ts, ii, 0, seq_len, N)) ;
02803                                 if (mval<fixedtempvv_)
02804                                 {
02805                                     fixedtempvv_ = mval ;
02806                                     fixedtempii_ = ii + ts*N;
02807                                     fixed_list_len = 1 ;
02808                                 }
02809                             }
02810                             else
02811                             {
02812                                 for (int16_t diff=0; diff<nbest; diff++)
02813                                 {
02814                                     float64_t  val        = elem_val[i]  ;
02815                                     val              += pen_val ;
02816                                     if (with_loss)
02817                                         val              += segment_loss ;
02818                                     
02819                                     float64_t mval = -(val + delta.element(delta_array, ts, ii, diff, seq_len, N)) ;
02820                                     
02821                                     /* only place -val in fixedtempvv if it is one of the nbest lowest values in there */
02822                                     /* fixedtempvv[i], i=0:nbest-1, is sorted so that fixedtempvv[0] <= fixedtempvv[1] <= ...*/
02823                                     /* fixed_list_len has the number of elements in fixedtempvv */
02824                                     
02825                                     if ((fixed_list_len < nbest) || ((0==fixed_list_len) || (mval < fixedtempvv[fixed_list_len-1])))
02826                                     {
02827                                         if ( (fixed_list_len<nbest) && ((0==fixed_list_len) || (mval>fixedtempvv[fixed_list_len-1])) )
02828                                         {
02829                                             fixedtempvv[fixed_list_len] = mval ;
02830                                             fixedtempii[fixed_list_len] = ii + diff*N + ts*N*nbest;
02831                                             fixed_list_len++ ;
02832                                         }
02833                                         else  // must have mval < fixedtempvv[fixed_list_len-1]
02834                                         {
02835                                             int32_t addhere = fixed_list_len;
02836                                             while ((addhere > 0) && (mval < fixedtempvv[addhere-1]))
02837                                                 addhere--;
02838                                             
02839                                             // move everything from addhere+1 one forward 
02840                                             for (int32_t jj=fixed_list_len-1; jj>addhere; jj--)
02841                                             {
02842                                                 fixedtempvv[jj] = fixedtempvv[jj-1];
02843                                                 fixedtempii[jj] = fixedtempii[jj-1];
02844                                             }
02845                                             
02846                                             fixedtempvv[addhere] = mval;
02847                                             fixedtempii[addhere] = ii + diff*N + ts*N*nbest;
02848                                             
02849                                             if (fixed_list_len < nbest)
02850                                                 fixed_list_len++;
02851                                         }
02852                                     }
02853                                 }
02854                             }
02855                         }
02856                     }
02857                 }
02858                 
02859                 
02860                 int32_t numEnt = fixed_list_len;
02861                 
02862                 float64_t minusscore;
02863                 int64_t fromtjk;
02864                 
02865                 for (int16_t k=0; k<nbest; k++)
02866                 {
02867                     if (k<numEnt)
02868                     {
02869                         if (nbest==1)
02870                         {
02871                             minusscore = fixedtempvv_ ;
02872                             fromtjk = fixedtempii_ ;
02873                         }
02874                         else
02875                         {
02876                             minusscore = fixedtempvv[k];
02877                             fromtjk = fixedtempii[k];
02878                         }
02879                         
02880                         delta.element(delta_array, t, j, k, seq_len, N)    = -minusscore + seq.element(j,t);
02881                         psi.element(t,j,k)      = (fromtjk%N) ;
02882                         if (nbest>1)
02883                             ktable.element(t,j,k)   = (fromtjk%(N*nbest)-psi.element(t,j,k))/N ;
02884                         ptable.element(t,j,k)   = (fromtjk-(fromtjk%(N*nbest)))/(N*nbest) ;
02885                     }
02886                     else
02887                     {
02888                         delta.element(delta_array, t, j, k, seq_len, N)    = -CMath::INFTY ;
02889                         psi.element(t,j,k)      = 0 ;
02890                         if (nbest>1)
02891                             ktable.element(t,j,k)     = 0 ;
02892                         ptable.element(t,j,k)     = 0 ;
02893                     }
02894                 }
02895             }
02896         }
02897     }
02898 
02899     clear_svm_values(svs);
02900     if (with_loss)
02901         clear_segment_loss(loss);
02902 
02903     { //termination
02904         int32_t list_len = 0 ;
02905         for (int16_t diff=0; diff<nbest; diff++)
02906         {
02907             for (T_STATES i=0; i<N; i++)
02908             {
02909                 oldtempvv[list_len] = -(delta.element(delta_array, (seq_len-1), i, diff, seq_len, N)+get_q(i)) ;
02910                 oldtempii[list_len] = i + diff*N ;
02911                 list_len++ ;
02912             }
02913         }
02914         
02915         CMath::nmin(oldtempvv.get_array(), oldtempii.get_array(), list_len, nbest) ;
02916         
02917         for (int16_t k=0; k<nbest; k++)
02918         {
02919             delta_end.element(k) = -oldtempvv[k] ;
02920             path_ends.element(k) = (oldtempii[k]%N) ;
02921             if (nbest>1)
02922                 ktable_end.element(k) = (oldtempii[k]-path_ends.element(k))/N ;
02923         }
02924     }
02925     
02926     { //state sequence backtracking     
02927         for (int16_t k=0; k<nbest; k++)
02928         {
02929             prob_nbest[k]= delta_end.element(k) ;
02930             
02931             int32_t i         = 0 ;
02932             state_seq[i]  = path_ends.element(k) ;
02933             int16_t q   = 0 ;
02934             if (nbest>1)
02935                 q=ktable_end.element(k) ;
02936             pos_seq[i]    = seq_len-1 ;
02937 
02938             while (pos_seq[i]>0)
02939             {
02940                 //SG_DEBUG("s=%i p=%i q=%i\n", state_seq[i], pos_seq[i], q) ;
02941                 state_seq[i+1] = psi.element(pos_seq[i], state_seq[i], q);
02942                 pos_seq[i+1]   = ptable.element(pos_seq[i], state_seq[i], q) ;
02943                 if (nbest>1)
02944                     q              = ktable.element(pos_seq[i], state_seq[i], q) ;
02945                 i++ ;
02946             }
02947             //SG_DEBUG("s=%i p=%i q=%i\n", state_seq[i], pos_seq[i], q) ;
02948             int32_t num_states = i+1 ;
02949             for (i=0; i<num_states;i++)
02950             {
02951                 my_state_seq[i+k*seq_len] = state_seq[num_states-i-1] ;
02952                 my_pos_seq[i+k*seq_len]   = pos_seq[num_states-i-1] ;
02953             }
02954             my_state_seq[num_states+k*seq_len]=-1 ;
02955             my_pos_seq[num_states+k*seq_len]=-1 ;
02956         }
02957     }
02958     
02959     if (is_big)
02960         SG_PRINT( "DONE.     \n") ;
02961 
02962 
02963 #ifdef DYNPROG_TIMING
02964     MyTime2.stop() ;
02965     
02966     if (is_big)
02967         SG_PRINT("Timing:  orf=%1.2f s \n Segment_init=%1.2f s Segment_pos=%1.2f s  Segment_extend=%1.2f s Segment_clean=%1.2f s\nsvm_init=%1.2f s  svm_pos=%1.2f  svm_clean=%1.2f\n  total=%1.2f\n", orf_time, segment_init_time, segment_pos_time, segment_extend_time, segment_clean_time, svm_init_time, svm_pos_time, svm_clean_time, MyTime2.time_diff_sec()) ;
02968 #endif
02969 
02970     delete[] fixedtempvv ;
02971     delete[] fixedtempii ;
02972 }
02973 
02974 void CDynProg::best_path_trans_deriv(
02975     int32_t *my_state_seq, int32_t *my_pos_seq, float64_t *my_scores,
02976     float64_t* my_losses,int32_t my_seq_len, const float64_t *seq_array,
02977     int32_t seq_len, const int32_t *pos, CPlifBase **Plif_matrix,
02978     CPlifBase **Plif_state_signals, int32_t max_num_signals,int32_t genestr_num)
02979 {   
02980     if (!svm_arrays_clean)
02981     {
02982         SG_ERROR( "SVM arrays not clean") ;
02983         return ;
02984     } ;
02985     //SG_PRINT( "genestr_len=%i, genestr_num=%i\n", genestr_len, genestr_num) ;
02986     //mod_words.display() ;
02987     //sign_words.display() ;
02988     //string_words.display() ;
02989 
02990     bool use_svm = false ;
02991     //ASSERT(dict_len==num_svms*cum_num_words_array[num_degrees]) ;
02992     //dict_weights.set_array(dictionary_weights, cum_num_words_array[num_degrees], num_svms, false, false) ;
02993     //dict_weights_array=dict_weights.get_array() ;
02994     
02995     CArray2<CPlifBase*> PEN(Plif_matrix, N, N, false, false) ;
02996     CArray2<CPlifBase*> PEN_state_signals(Plif_state_signals, N, max_num_signals, false, false) ;
02997     CArray3<float64_t> seq_input(seq_array, N, seq_len, max_num_signals) ;
02998     
02999     { // determine whether to use svm outputs and clear derivatives
03000         for (int32_t i=0; i<N; i++)
03001             for (int32_t j=0; j<N; j++)
03002             {
03003                 CPlifBase *penij=PEN.element(i,j) ;
03004                 if (penij==NULL)
03005                     continue ;
03006                 
03007                 if (penij->uses_svm_values())
03008                     use_svm=true ;
03009                 penij->penalty_clear_derivative() ;
03010             }
03011         for (int32_t i=0; i<N; i++)
03012             for (int32_t j=0; j<max_num_signals; j++)
03013             {
03014                 CPlifBase *penij=PEN_state_signals.element(i,j) ;
03015                 if (penij==NULL)
03016                     continue ;
03017                 if (penij->uses_svm_values())
03018                     use_svm=true ;
03019                 penij->penalty_clear_derivative() ;
03020             }
03021     }
03022 
03023     { // set derivatives of p, q and a to zero
03024         for (int32_t i=0; i<N; i++)
03025         {
03026             initial_state_distribution_p_deriv.element(i)=0 ;
03027             end_state_distribution_q_deriv.element(i)=0 ;
03028             for (int32_t j=0; j<N; j++)
03029                 transition_matrix_a_deriv.element(i,j)=0 ;
03030         }
03031     }
03032     
03033     { // clear score vector
03034         for (int32_t i=0; i<my_seq_len; i++)
03035         {
03036             my_scores[i]=0.0 ;
03037             my_losses[i]=0.0 ;
03038         }
03039     }
03040 
03041     //int32_t total_len = 0 ;
03042     
03043     //transition_matrix_a.display_array() ;
03044     //transition_matrix_a_id.display_array() ;
03045     
03046     { // compute derivatives for given path
03047         float64_t svm_value[2*num_svms] ;
03048         for (int32_t s=0; s<2*num_svms; s++)
03049             svm_value[s]=0 ;
03050         
03051         for (int32_t i=0; i<my_seq_len; i++)
03052         {
03053             my_scores[i]=0.0 ;
03054             my_losses[i]=0.0 ;
03055         }
03056         
03057 //#ifdef DYNPROG_DEBUG
03058         float64_t total_score = 0.0 ;
03059         float64_t total_loss = 0.0 ;
03060 //#endif        
03061 
03062         ASSERT(my_state_seq[0]>=0) ;
03063         initial_state_distribution_p_deriv.element(my_state_seq[0])++ ;
03064         my_scores[0] += initial_state_distribution_p.element(my_state_seq[0]) ;
03065 
03066         ASSERT(my_state_seq[my_seq_len-1]>=0) ;
03067         end_state_distribution_q_deriv.element(my_state_seq[my_seq_len-1])++ ;
03068         my_scores[my_seq_len-1] += end_state_distribution_q.element(my_state_seq[my_seq_len-1]);
03069 
03070 //#ifdef DYNPROG_DEBUG
03071         total_score += my_scores[0] + my_scores[my_seq_len-1] ;
03072 //#endif        
03073         struct svm_values_struct svs;
03074         svs.num_unique_words = NULL;
03075         svs.svm_values = NULL;
03076         svs.svm_values_unnormalized = NULL;
03077         svs.word_used = NULL;
03078         //init_svm_values(svs, my_state_seq[0], seq_len, 100);
03079 
03080         struct segment_loss_struct loss;
03081         loss.segments_changed = NULL;
03082         loss.num_segment_id = NULL;
03083         //SG_DEBUG( "seq_len=%i\n", my_seq_len) ;
03084         for (int32_t i=0; i<my_seq_len-1; i++)
03085         {
03086             if (my_state_seq[i+1]==-1)
03087                 break ;
03088             int32_t from_state = my_state_seq[i] ;
03089             int32_t to_state   = my_state_seq[i+1] ;
03090             int32_t from_pos   = my_pos_seq[i] ;
03091             int32_t to_pos     = my_pos_seq[i+1] ;
03092 
03093             // compute loss relative to another segmentation using the segment_loss function
03094             init_segment_loss(loss, seq_len, pos[to_pos]-pos[from_pos]+10);
03095             find_segment_loss_till_pos(pos, to_pos,m_segment_ids, m_segment_mask, loss);  
03096 
03097             int32_t loss_last_pos = to_pos ;
03098             float64_t last_loss = 0.0 ;
03099             int32_t elem_id = transition_matrix_a_id.element(from_state, to_state) ;
03100             my_losses[i] = extend_segment_loss(loss, pos, elem_id, from_pos, loss_last_pos, last_loss) ;
03101 #ifdef DYNPROG_DEBUG
03102             io.set_loglevel(M_DEBUG) ;
03103             SG_DEBUG( "%i. segment loss %f (id=%i): from=%i(%i), to=%i(%i)\n", i, my_losses[i], elem_id, from_pos, from_state, to_pos, to_state) ;
03104 #endif
03105             // increase usage of this transition
03106             transition_matrix_a_deriv.element(from_state, to_state)++ ;
03107             my_scores[i] += transition_matrix_a.element(from_state, to_state) ;
03108             //SG_PRINT("transition_matrix_a.element(%i, %i),%f \n",from_state, to_state, transition_matrix_a.element(from_state, to_state));
03109 #ifdef DYNPROG_DEBUG
03110             SG_DEBUG( "%i. scores[i]=%f\n", i, my_scores[i]) ;
03111 #endif
03112             
03113             /*int32_t last_svm_pos[num_degrees] ;
03114             for (int32_t qq=0; qq<num_degrees; qq++)
03115             last_svm_pos[qq]=-1 ;*/
03116             
03117             if (use_svm)
03118             {
03119                 //SG_PRINT("from_pos: %i; to_pos: %i; pos[to_pos]-pos[from_pos]: %i \n",from_pos, to_pos, pos[to_pos]-pos[from_pos]); 
03120                 int32_t frame = m_orf_info.element(from_state,0);
03121                 if (false)//(frame>=0)
03122                 {
03123                     int32_t num_current_svms=0;
03124                     int32_t svm_ids[] = {-8, -7, -6, -5, -4, -3, -2, -1};
03125                     SG_PRINT("penalties(%i, %i), frame:%i  ", from_state, to_state, frame);
03126                     PEN.element(to_state, from_state)->get_used_svms(&num_current_svms, svm_ids);
03127                     SG_PRINT("\n");
03128                 }
03129 
03130 
03131                 lookup_content_svm_values(from_pos, to_pos, pos[from_pos],pos[to_pos], svm_value, frame);
03132                 if (false)//(frame>=0)
03133                     SG_PRINT("svm_values: %f, %f, %f \n", svm_value[4], svm_value[5], svm_value[6]);
03134                 if (m_use_tiling)
03135                     lookup_tiling_plif_values(from_pos, to_pos, pos[to_pos]-pos[from_pos], svm_value);
03136     
03137 #ifdef DYNPROG_DEBUG
03138                     SG_DEBUG( "svm[%i]: %f\n", ss, svm_value[ss]) ;
03139 #endif
03140             }
03141             
03142             if (PEN.element(to_state, from_state)!=NULL)
03143             {
03144                 float64_t nscore = PEN.element(to_state, from_state)->lookup_penalty(pos[to_pos]-pos[from_pos], svm_value) ;
03145                 my_scores[i] += nscore ;
03146                 //SG_PRINT("to_state: %i, from_state: %i, nscore: %f, len: %i \n",to_state,from_state,nscore, pos[to_pos]-pos[from_pos]);
03147                 if (m_use_tiling)
03148                     for (int32_t s=0;s<num_svms;s++)/*set tiling plif values to neutral values (that do not influence derivative calculation)*/
03149                         svm_value[num_svms+s]=-CMath::INFTY;
03150             
03151 #ifdef DYNPROG_DEBUG
03152                 //SG_DEBUG( "%i. transition penalty: from_state=%i to_state=%i from_pos=%i [%i] to_pos=%i [%i] value=%i\n", i, from_state, to_state, from_pos, pos[from_pos], to_pos, pos[to_pos], pos[to_pos]-pos[from_pos]) ;
03153 
03154                 /*
03155                   int32_t orf_from = g_orf_info.element(from_state,0) ;
03156                   int32_t orf_to   = g_orf_info.element(to_state,1) ;
03157                   ASSERT((orf_from!=-1)==(orf_to!=-1)) ;
03158                   if (orf_from != -1)
03159                   total_len = total_len + pos[to_pos]-pos[from_pos] ;
03160                   
03161                   SG_DEBUG( "%i. orf_info: from_orf=%i to_orf=%i orf_diff=%i, len=%i, lenmod3=%i, total_len=%i, total_lenmod3=%i\n", i, orf_from, orf_to, (orf_to-orf_from)%3, pos[to_pos]-pos[from_pos], (pos[to_pos]-pos[from_pos])%3, total_len, total_len%3) ;
03162                 */
03163 #endif
03164                 PEN.element(to_state, from_state)->penalty_add_derivative(pos[to_pos]-pos[from_pos], svm_value) ;
03165                 if (m_use_tiling) 
03166                 {
03167                     for (int32_t s=0;s<num_svms;s++)
03168                         svm_value[s]=-CMath::INFTY;
03169                     float64_t intensities[m_num_probes];
03170                     int32_t num_intensities = raw_intensities_interval_query(pos[from_pos], pos[to_pos],intensities);
03171                     //SG_PRINT("pos[from_pos]:%i, pos[to_pos]:%i, num_intensities:%i\n",pos[from_pos],pos[to_pos], num_intensities);
03172                     for (int32_t k=0;k<num_intensities;k++)
03173                     {
03174                         for (int32_t j=0;j<num_svms;j++)
03175                             svm_value[num_svms+j]=intensities[k];
03176                         PEN.element(to_state, from_state)->penalty_add_derivative(-CMath::INFTY, svm_value) ;   
03177                         
03178                     }
03179                 }
03180             }
03181 #ifdef DYNPROG_DEBUG
03182             SG_DEBUG( "%i. scores[i]=%f\n", i, my_scores[i]) ;
03183 #endif
03184 
03185             //SG_DEBUG( "emmission penalty skipped: to_state=%i to_pos=%i value=%1.2f score=%1.2f\n", to_state, to_pos, seq_input.element(to_state, to_pos), 0.0) ;
03186             for (int32_t k=0; k<max_num_signals; k++)
03187             {
03188                 if ((PEN_state_signals.element(to_state,k)==NULL)&&(k==0))
03189                 {
03190 #ifdef DYNPROG_DEBUG
03191                     SG_DEBUG( "%i. emmission penalty: to_state=%i to_pos=%i score=%1.2f (no signal plif)\n", i, to_state, to_pos, seq_input.element(to_state, to_pos, k)) ;
03192 #endif
03193                     my_scores[i] += seq_input.element(to_state, to_pos, k) ;
03194                     //SG_PRINT("features(%i,%i): %f\n",to_state,to_pos,seq_input.element(to_state, to_pos, k));
03195                     break ;
03196                 }
03197                 if (PEN_state_signals.element(to_state, k)!=NULL)
03198                 {
03199                     float64_t nscore = PEN_state_signals.element(to_state,k)->lookup_penalty(seq_input.element(to_state, to_pos, k), svm_value) ;
03200                     my_scores[i] += nscore ;
03201                     //break ;
03202                     //int32_t num_current_svms=0;
03203                     //int32_t svm_ids[] = {-8, -7, -6, -5, -4, -3, -2, -1};
03204                     //SG_PRINT("PEN_state_signals->id: ");
03205                     //PEN_state_signals.element(to_state, k)->get_used_svms(&num_current_svms, svm_ids);
03206                     //SG_PRINT("\n");
03207                     //SG_PRINT( "%i. emmission penalty: to_state=%i to_pos=%i value=%1.2f score=%1.2f k=%i\n", i, to_state, to_pos, seq_input.element(to_state, to_pos, k), nscore, k) ;
03208 #ifdef DYNPROG_DEBUG
03209                     SG_DEBUG( "%i. emmission penalty: to_state=%i to_pos=%i value=%1.2f score=%1.2f k=%i\n", i, to_state, to_pos, seq_input.element(to_state, to_pos, k), nscore, k) ;
03210 #endif
03211                     PEN_state_signals.element(to_state,k)->penalty_add_derivative(seq_input.element(to_state, to_pos, k), svm_value) ;
03212                 } else
03213                     break ;
03214             }
03215             
03216 //#ifdef DYNPROG_DEBUG
03217             //SG_PRINT( "scores[%i]=%f (final) \n", i, my_scores[i]) ;
03218             //SG_PRINT( "losses[%i]=%f (final) , total_loss: %f \n", i, my_losses[i], total_loss) ;
03219             total_score += my_scores[i] ;
03220             total_loss += my_losses[i] ;
03221 //#endif
03222         }
03223 //#ifdef DYNPROG_DEBUG
03224         //SG_PRINT( "total score = %f \n", total_score) ;
03225         SG_PRINT( "total loss = %f \n", total_loss) ;
03226 //#endif
03227         clear_svm_values(svs);
03228         clear_segment_loss(loss);
03229     }
03230 
03231     
03232 }
03233 
03234 
03235 void CDynProg::best_path_trans_simple(
03236     const float64_t *seq_array, int32_t seq_len, int16_t nbest,
03237     float64_t *prob_nbest, int32_t *my_state_seq)
03238 {
03239     if (!svm_arrays_clean)
03240     {
03241         SG_ERROR( "SVM arrays not clean") ;
03242         return ;
03243     } ;
03244 
03245     int32_t max_look_back = 2 ;
03246     const int32_t look_back_buflen = max_look_back*nbest*N ;
03247     ASSERT(nbest<32000) ;
03248         
03249     CArray2<float64_t> seq((float64_t *)seq_array, N, seq_len, false) ;
03250 
03251     CArray3<float64_t> delta(max_look_back, N, nbest) ;
03252     CArray3<T_STATES> psi(seq_len, N, nbest) ;
03253     CArray3<int16_t> ktable(seq_len,N,nbest) ;
03254     CArray3<int32_t> ptable(seq_len,N,nbest) ;
03255 
03256     CArray<float64_t> delta_end(nbest) ;
03257     CArray<T_STATES> path_ends(nbest) ;
03258     CArray<int16_t> ktable_end(nbest) ;
03259 
03260     CArray<float64_t> oldtempvv(look_back_buflen) ;
03261     CArray<int32_t> oldtempii(look_back_buflen) ;
03262 
03263     CArray<T_STATES> state_seq(seq_len) ;
03264     CArray<int32_t> pos_seq(seq_len) ;
03265 
03266     { // initialization
03267 
03268         for (T_STATES i=0; i<N; i++)
03269         {
03270             delta.element(0,i,0) = get_p(i) + seq.element(i,0) ;        // get_p defined in HMM.h to be equiv to initial_state_distribution
03271             psi.element(0,i,0)   = 0 ;
03272             ktable.element(0,i,0)  = 0 ;
03273             ptable.element(0,i,0)  = 0 ;
03274             for (int16_t k=1; k<nbest; k++)
03275             {
03276                 delta.element(0,i,k)    = -CMath::INFTY ;
03277                 psi.element(0,i,0)      = 0 ;                  // <--- what's this for?
03278                 ktable.element(0,i,k)     = 0 ;
03279                 ptable.element(0,i,k)     = 0 ;
03280             }
03281         }
03282     }
03283 
03284     // recursion
03285     for (int32_t t=1; t<seq_len; t++)
03286     {
03287         for (T_STATES j=0; j<N; j++)
03288         {
03289             if (seq.element(j,t)<-1e20)
03290             { // if we cannot observe the symbol here, then we can omit the rest
03291                 for (int16_t k=0; k<nbest; k++)
03292                 {
03293                     delta.element(t%max_look_back,j,k)    = seq.element(j,t) ;
03294                     psi.element(t,j,k)      = 0 ;
03295                     ktable.element(t,j,k)     = 0 ;
03296                     ptable.element(t,j,k)     = 0 ;
03297                 }
03298             }
03299             else
03300             {
03301                 const T_STATES num_elem   = trans_list_forward_cnt[j] ;
03302                 const T_STATES *elem_list = trans_list_forward[j] ;
03303                 const float64_t *elem_val      = trans_list_forward_val[j] ;
03304                 
03305                 int32_t old_list_len = 0 ;
03306                 
03307                 for (int32_t i=0; i<num_elem; i++)
03308                 {
03309                     T_STATES ii = elem_list[i] ;
03310 
03311                     int32_t ts=t-1; 
03312                     if (ts>=0)
03313                     {
03314                         bool ok=true ;
03315                         
03316                         if (ok)
03317                         {
03318 
03319                           
03320                           for (int16_t diff=0; diff<nbest; diff++)
03321                             {
03322                               float64_t  val        = delta.element(ts%max_look_back,ii,diff) + elem_val[i] ;
03323                               float64_t mval = -val;
03324 
03325                               oldtempvv[old_list_len] = mval ;
03326                               oldtempii[old_list_len] = ii + diff*N + ts*N*nbest;
03327                               old_list_len++ ;
03328                             }
03329                         }
03330                     }
03331                 }
03332                 
03333                 CMath::nmin<int32_t>(oldtempvv.get_array(), oldtempii.get_array(), old_list_len, nbest) ;
03334 
03335                 int32_t numEnt = 0;
03336                 numEnt = old_list_len;
03337 
03338                 float64_t minusscore;
03339                 int64_t fromtjk;
03340 
03341                 for (int16_t k=0; k<nbest; k++)
03342                 {
03343                     if (k<numEnt)
03344                     {
03345                         minusscore = oldtempvv[k];
03346                         fromtjk = oldtempii[k];
03347                         
03348                         delta.element(t%max_look_back,j,k)    = -minusscore + seq.element(j,t);
03349                         psi.element(t,j,k)      = (fromtjk%N) ;
03350                         ktable.element(t,j,k)     = (fromtjk%(N*nbest)-psi.element(t,j,k))/N ;
03351                         ptable.element(t,j,k)     = (fromtjk-(fromtjk%(N*nbest)))/(N*nbest) ;
03352                     }
03353                     else
03354                     {
03355                         delta.element(t%max_look_back,j,k)    = -CMath::INFTY ;
03356                         psi.element(t,j,k)      = 0 ;
03357                         ktable.element(t,j,k)     = 0 ;
03358                         ptable.element(t,j,k)     = 0 ;
03359                     }
03360                 }
03361                 
03362             }
03363         }
03364     }
03365 
03366     
03367     { //termination
03368         int32_t list_len = 0 ;
03369         for (int16_t diff=0; diff<nbest; diff++)
03370         {
03371             for (T_STATES i=0; i<N; i++)
03372             {
03373                 oldtempvv[list_len] = -(delta.element((seq_len-1)%max_look_back,i,diff)+get_q(i)) ;
03374                 oldtempii[list_len] = i + diff*N ;
03375                 list_len++ ;
03376             }
03377         }
03378         
03379         CMath::nmin(oldtempvv.get_array(), oldtempii.get_array(), list_len, nbest) ;
03380         
03381         for (int16_t k=0; k<nbest; k++)
03382         {
03383             delta_end.element(k) = -oldtempvv[k] ;
03384             path_ends.element(k) = (oldtempii[k]%N) ;
03385             ktable_end.element(k) = (oldtempii[k]-path_ends.element(k))/N ;
03386         }
03387     }
03388     
03389     { //state sequence backtracking     
03390         for (int16_t k=0; k<nbest; k++)
03391         {
03392             prob_nbest[k]= delta_end.element(k) ;
03393             
03394             int32_t i         = 0 ;
03395             state_seq[i]  = path_ends.element(k) ;
03396             int16_t q   = ktable_end.element(k) ;
03397             pos_seq[i]    = seq_len-1 ;
03398 
03399             while (pos_seq[i]>0)
03400             {
03401                 //SG_DEBUG("s=%i p=%i q=%i\n", state_seq[i], pos_seq[i], q) ;
03402                 state_seq[i+1] = psi.element(pos_seq[i], state_seq[i], q);
03403                 pos_seq[i+1]   = ptable.element(pos_seq[i], state_seq[i], q) ;
03404                 q              = ktable.element(pos_seq[i], state_seq[i], q) ;
03405                 i++ ;
03406             }
03407             //SG_DEBUG("s=%i p=%i q=%i\n", state_seq[i], pos_seq[i], q) ;
03408             int32_t num_states = i+1 ;
03409             for (i=0; i<num_states;i++)
03410             {
03411                 my_state_seq[i+k*seq_len] = state_seq[num_states-i-1] ;
03412             }
03413             //my_state_seq[num_states+k*seq_len]=-1 ;
03414         }
03415 
03416     }
03417 }
03418 

SHOGUN Machine Learning Toolbox - Documentation