00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include "structure/DynProg.h"
00013 #include "lib/Mathematics.h"
00014 #include "lib/io.h"
00015 #include "lib/config.h"
00016 #include "features/StringFeatures.h"
00017 #include "features/CharFeatures.h"
00018 #include "features/Alphabet.h"
00019 #include "structure/Plif.h"
00020 #include "lib/Array.h"
00021 #include "lib/Array2.h"
00022 #include "lib/Array3.h"
00023
00024 #include <stdlib.h>
00025 #include <stdio.h>
00026 #include <time.h>
00027 #include <ctype.h>
00028
00029 template void CDynProg::best_path_trans<1,true,false>(
00030 const float64_t *seq, int32_t seq_len, const int32_t *pos,
00031 const int32_t *orf_info, CPlifBase **PLif_matrix,
00032 CPlifBase **Plif_state_signals, int32_t max_num_signals,
00033 int32_t genestr_num, float64_t *prob_nbest, int32_t *my_state_seq,
00034 int32_t *my_pos_seq, bool use_orf);
00035
00036 template void CDynProg::best_path_trans<2,true,false>(
00037 const float64_t *seq, int32_t seq_len, const int32_t *pos,
00038 const int32_t *orf_info, CPlifBase **PLif_matrix,
00039 CPlifBase **Plif_state_signals, int32_t max_num_signals,
00040 int32_t genestr_num, float64_t *prob_nbest, int32_t *my_state_seq,
00041 int32_t *my_pos_seq, bool use_orf);
00042
00043 template void CDynProg::best_path_trans<1,false,false>(
00044 const float64_t *seq, int32_t seq_len, const int32_t *pos,
00045 const int32_t *orf_info, CPlifBase **PLif_matrix,
00046 CPlifBase **Plif_state_signals, int32_t max_num_signals,
00047 int32_t genestr_num, float64_t *prob_nbest, int32_t *my_state_seq,
00048 int32_t *my_pos_seq, bool use_orf);
00049
00050 template void CDynProg::best_path_trans<2,false,false>(
00051 const float64_t *seq, int32_t seq_len, const int32_t *pos,
00052 const int32_t *orf_info, CPlifBase **PLif_matrix,
00053 CPlifBase **Plif_state_signals, int32_t max_num_signals,
00054 int32_t genestr_num, float64_t *prob_nbest, int32_t *my_state_seq,
00055 int32_t *my_pos_seq, bool use_orf);
00056
00057
00058 #ifdef SUNOS
00059 extern "C" int finite(double);
00060 #endif
00061
00062
00063
00064
00065
00066
00067 static int32_t word_degree_default[4]={3,4,5,6} ;
00068 static int32_t cum_num_words_default[5]={0,64,320,1344,5440} ;
00069 static int32_t num_words_default[4]= {64,256,1024,4096} ;
00070 static int32_t mod_words_default[32] = {1,1,1,1,1,1,1,1,
00071 1,1,1,1,1,1,1,1,
00072 0,0,0,0,0,0,0,0,
00073 0,0,0,0,0,0,0,0} ;
00074 static bool sign_words_default[16] = {true,true,true,true,true,true,true,true,
00075 false,false,false,false,false,false,false,false} ;
00076 static int32_t string_words_default[16] = {0,0,0,0,0,0,0,0,
00077 1,1,1,1,1,1,1,1} ;
00078
00079 CDynProg::CDynProg(int32_t p_num_svms )
00080 : CSGObject(), transition_matrix_a_id(1,1), transition_matrix_a(1,1),
00081 transition_matrix_a_deriv(1,1), initial_state_distribution_p(1),
00082 initial_state_distribution_p_deriv(1), end_state_distribution_q(1),
00083 end_state_distribution_q_deriv(1), dict_weights(1,1),
00084 dict_weights_array(dict_weights.get_array()),
00085
00086
00087 num_degrees(4),
00088 num_svms(8),
00089 num_strings(1),
00090 word_degree(word_degree_default, num_degrees, true, true),
00091 cum_num_words(cum_num_words_default, num_degrees+1, true, true),
00092 cum_num_words_array(cum_num_words.get_array()),
00093 num_words(num_words_default, num_degrees, true, true),
00094 num_words_array(num_words.get_array()),
00095 mod_words(mod_words_default, num_svms, 2, true, true),
00096 mod_words_array(mod_words.get_array()),
00097 sign_words(sign_words_default, num_svms, true, true),
00098 sign_words_array(sign_words.get_array()),
00099 string_words(string_words_default, num_svms, true, true),
00100 string_words_array(string_words.get_array()),
00101
00102
00103
00104 svm_pos_start(num_degrees),
00105 num_unique_words(num_degrees),
00106 svm_arrays_clean(true),
00107
00108
00109 num_svms_single(1),
00110 word_degree_single(1),
00111 num_words_single(4),
00112 word_used_single(num_words_single),
00113 svm_value_unnormalized_single(num_svms_single),
00114 num_unique_words_single(0),
00115
00116 max_a_id(0), m_seq(1,1,1), m_pos(1), m_orf_info(1,2),
00117 m_segment_sum_weights(1,1), m_plif_list(1),
00118 m_PEN(1,1), m_PEN_state_signals(1,1),
00119 m_genestr(1,1), m_dict_weights(1,1), m_segment_loss(1,1,2),
00120 m_segment_ids(1),
00121 m_segment_mask(1),
00122 m_scores(1), m_states(1,1), m_positions(1,1), m_genestr_stop(1),
00123 m_precomputed_svm_values(1,1),
00124 m_precomputed_tiling_values(1,1)
00125 {
00126 trans_list_forward = NULL ;
00127 trans_list_forward_cnt = NULL ;
00128 trans_list_forward_val = NULL ;
00129 trans_list_forward_id = NULL ;
00130 trans_list_len = 0 ;
00131
00132 mem_initialized = true ;
00133
00134 this->N=1;
00135 m_step=0;
00136
00137 m_raw_intensities = NULL;
00138 m_probe_pos = NULL;
00139 m_use_tiling=false;
00140 m_genestr_len = 0;
00141 #ifdef ARRAY_STATISTICS
00142 word_degree.set_name("word_degree");
00143 #endif
00144 }
00145
00146 CDynProg::~CDynProg()
00147 {
00148 if (trans_list_forward_cnt)
00149 delete[] trans_list_forward_cnt;
00150 if (trans_list_forward)
00151 {
00152 for (int32_t i=0; i<trans_list_len; i++)
00153 {
00154 if (trans_list_forward[i])
00155 delete[] trans_list_forward[i];
00156 }
00157 delete[] trans_list_forward;
00158 }
00159 if (trans_list_forward_val)
00160 {
00161 for (int32_t i=0; i<trans_list_len; i++)
00162 {
00163 if (trans_list_forward_val[i])
00164 delete[] trans_list_forward_val[i];
00165 }
00166 delete[] trans_list_forward_val;
00167 }
00168 if (trans_list_forward_id)
00169 {
00170 for (int32_t i=0; i<trans_list_len; i++)
00171 {
00172 if (trans_list_forward_id[i])
00173 delete[] trans_list_forward_id[i];
00174 }
00175 delete[] trans_list_forward_id;
00176 }
00177 if (m_raw_intensities)
00178 delete[] m_raw_intensities;
00179 if (m_probe_pos)
00180 delete[] m_probe_pos;
00181
00182 }
00183
00185 void CDynProg::set_genestr_len(int32_t genestr_len)
00186 {
00187 m_genestr_len = genestr_len;
00188 }
00189
00190 int32_t CDynProg::get_num_svms()
00191 {
00192 return num_svms;
00193 }
00194
00195 void CDynProg::precompute_stop_codons(const char* sequence, int32_t length)
00196 {
00197 m_genestr_stop.resize_array(length) ;
00198 m_genestr_stop.zero() ;
00199 m_genestr_stop.set_name("genestr_stop") ;
00200 {
00201 for (int32_t i=0; i<length-2; i++)
00202 if ((sequence[i]=='t' || sequence[i]=='T') &&
00203 (((sequence[i+1]=='a' || sequence[i+1]=='A') &&
00204 (sequence[i+2]=='a' || sequence[i+2]=='g' || sequence[i+2]=='A' || sequence[i+2]=='G')) ||
00205 ((sequence[i+1]=='g'||sequence[i+1]=='G') && (sequence[i+2]=='a' || sequence[i+2]=='A') )))
00206 m_genestr_stop.element(i)=true ;
00207 else
00208 m_genestr_stop.element(i)=false ;
00209 m_genestr_stop.element(length-1)=false ;
00210 m_genestr_stop.element(length-1)=false ;
00211 }
00212 }
00213
00214 void CDynProg::set_num_states(int32_t p_N)
00215 {
00216 N=p_N ;
00217
00218 transition_matrix_a_id.resize_array(N,N) ;
00219 transition_matrix_a.resize_array(N,N) ;
00220 transition_matrix_a_deriv.resize_array(N,N) ;
00221 initial_state_distribution_p.resize_array(N) ;
00222 initial_state_distribution_p_deriv.resize_array(N) ;
00223 end_state_distribution_q.resize_array(N);
00224 end_state_distribution_q_deriv.resize_array(N) ;
00225
00226 m_orf_info.resize_array(N,2) ;
00227 m_PEN.resize_array(N,N) ;
00228 m_PEN_state_signals.resize_array(N,1) ;
00229 }
00230
00231 int32_t CDynProg::get_num_states()
00232 {
00233 return N;
00234 }
00235
00236 void CDynProg::init_tiling_data(
00237 int32_t* probe_pos, float64_t* intensities, const int32_t num_probes,
00238 const int32_t seq_len)
00239 {
00240 delete[] m_probe_pos;
00241 delete[] m_raw_intensities;
00242 m_probe_pos = new int32_t[num_probes];
00243 m_raw_intensities = new float64_t[num_probes];
00244
00245 memcpy(m_probe_pos, probe_pos, num_probes*sizeof(int32_t));
00246 memcpy(m_raw_intensities, intensities, num_probes*sizeof(float64_t));
00247 m_num_probes = num_probes;
00248 m_precomputed_tiling_values.resize_array(num_svms,seq_len);
00249 m_use_tiling=true;
00250
00251 }
00252
00253 void CDynProg::init_content_svm_value_array(const int32_t seq_len)
00254 {
00255 m_precomputed_svm_values.resize_array(num_svms,seq_len);
00256 }
00257
00258 void CDynProg::precompute_tiling_plifs(
00259 CPlif** PEN, const int32_t* tiling_plif_ids,
00260 const int32_t num_tiling_plifs, const int32_t seq_len, const int32_t* pos)
00261 {
00262
00263
00264
00265
00266
00267
00268
00269
00270
00271
00272
00273
00274
00275 ASSERT(num_tiling_plifs==num_svms)
00276
00277 float64_t tiling_plif[num_svms];
00278 float64_t svm_value[2*num_svms];
00279 for (int32_t i=0; i<num_svms; i++)
00280 tiling_plif[i]=0.0;
00281
00282 int32_t* p_tiling_pos = m_probe_pos;
00283 float64_t* p_tiling_data = m_raw_intensities;
00284 int32_t num=0;
00285 for (int32_t pos_idx=0;pos_idx<seq_len;pos_idx++)
00286 {
00287
00288
00289 while (num<m_num_probes&&*p_tiling_pos<pos[pos_idx])
00290 {
00291
00292 for (int32_t i=0; i<num_svms; i++)
00293 {
00294 svm_value[num_svms+i]=*p_tiling_data;
00295 CPlif * plif = PEN[tiling_plif_ids[i]];
00296 plif->set_do_calc(true);
00297 tiling_plif[i]+=plif->lookup_penalty(0,svm_value);
00298
00299 plif->set_do_calc(false);
00300
00301 }
00302 p_tiling_data++;
00303 p_tiling_pos++;
00304 num++;
00305 }
00306 for (int32_t i=0; i<num_svms; i++)
00307 m_precomputed_tiling_values.set_element(tiling_plif[i],i,pos_idx);
00308 }
00309
00310
00311
00312
00313 }
00314
00315 void CDynProg::create_word_string(
00316 const char* genestr, int32_t genestr_num, int32_t genestr_len,
00317 uint16_t*** wordstr)
00318 {
00319 for (int32_t k=0; k<genestr_num; k++)
00320 {
00321 wordstr[k]=new uint16_t*[num_degrees] ;
00322 for (int32_t j=0; j<num_degrees; j++)
00323 {
00324 wordstr[k][j]=NULL ;
00325 {
00326 wordstr[k][j]=new uint16_t[genestr_len] ;
00327 for (int32_t i=0; i<genestr_len; i++)
00328 switch (genestr[i])
00329 {
00330 case 'A':
00331 case 'a': wordstr[k][j][i]=0 ; break ;
00332 case 'C':
00333 case 'c': wordstr[k][j][i]=1 ; break ;
00334 case 'G':
00335 case 'g': wordstr[k][j][i]=2 ; break ;
00336 case 'T':
00337 case 't': wordstr[k][j][i]=3 ; break ;
00338 default: ASSERT(0) ;
00339 }
00340 translate_from_single_order(wordstr[k][j], genestr_len, word_degree[j]-1, word_degree[j]) ;
00341 }
00342 }
00343 }
00344 precompute_stop_codons(genestr, genestr_len);
00345 }
00346
00347 void CDynProg::precompute_content_values(
00348 uint16_t*** wordstr, const int32_t *pos,const int32_t seq_len,
00349 const int32_t genestr_len,float64_t *dictionary_weights,int32_t dict_len)
00350 {
00351
00352
00353 dict_weights.set_array(dictionary_weights, dict_len, num_svms, false, false) ;
00354 dict_weights_array=dict_weights.get_array() ;
00355
00356
00357
00358
00359
00360
00361
00362
00363
00364
00365
00366 for (int32_t p=0 ; p<seq_len-1 ; p++)
00367 {
00368 int32_t from_pos = pos[p];
00369 int32_t to_pos = pos[p+1];
00370 float64_t my_svm_values_unnormalized[num_svms] ;
00371
00372
00373 ASSERT(from_pos<=genestr_len)
00374 ASSERT(to_pos<=genestr_len)
00375
00376 for (int32_t s=0; s<num_svms; s++)
00377 {
00378 my_svm_values_unnormalized[s]=0.0;
00379 }
00380 for (int32_t i=from_pos; i<to_pos;i++)
00381 {
00382 for (int32_t j=0; j<num_degrees; j++)
00383 {
00384 uint16_t word = wordstr[0][j][i] ;
00385 for (int32_t s=0; s<num_svms; s++)
00386 {
00387
00388 if (mod_words.get_element(s,0)==3 && i%3!=mod_words.get_element(s,1))
00389 continue;
00390 my_svm_values_unnormalized[s] += dict_weights_array[(word+cum_num_words_array[j])+s*cum_num_words_array[num_degrees]] ;
00391 }
00392 }
00393 }
00394 for (int32_t s=0; s<num_svms; s++)
00395 {
00396 float64_t prev = m_precomputed_svm_values.get_element(s,p);
00397 m_precomputed_svm_values.set_element(prev + my_svm_values_unnormalized[s],s,p+1);
00398 ASSERT(prev>-1e20);
00399 }
00400 }
00401 for (int32_t j=0; j<num_degrees; j++)
00402 delete[] wordstr[0][j] ;
00403 delete[] wordstr[0] ;
00404 }
00405
00406 void CDynProg::set_p_vector(float64_t *p, int32_t p_N)
00407 {
00408 ASSERT(p_N==N);
00409
00410
00411
00412 initial_state_distribution_p.set_array(p, p_N, true, true);
00413 }
00414
00415 void CDynProg::set_q_vector(float64_t *q, int32_t q_N)
00416 {
00417 ASSERT(q_N==N);
00418 end_state_distribution_q.set_array(q, q_N, true, true);
00419 }
00420
00421 void CDynProg::set_a(float64_t *a, int32_t p_M, int32_t p_N)
00422 {
00423 ASSERT(p_N==N);
00424 ASSERT(p_M==p_N);
00425 transition_matrix_a.set_array(a, p_N, p_N, true, true);
00426 transition_matrix_a_deriv.resize_array(p_N, p_N);
00427 }
00428
00429 void CDynProg::set_a_id(int32_t *a, int32_t p_M, int32_t p_N)
00430 {
00431 ASSERT(p_N==N);
00432 ASSERT(p_M==p_N);
00433 transition_matrix_a_id.set_array(a, p_N, p_N, true, true);
00434 max_a_id = 0;
00435 for (int32_t i=0; i<p_N; i++)
00436 for (int32_t j=0; j<p_N; j++)
00437 max_a_id=CMath::max(max_a_id, transition_matrix_a_id.element(i,j));
00438 }
00439
00440 void CDynProg::set_a_trans_matrix(
00441 float64_t *a_trans, int32_t num_trans, int32_t p_N)
00442 {
00443 if (!((p_N==3) || (p_N==4)))
00444 SG_ERROR("!((p_N==3) || (p_N==4)), p_N: %i\n",p_N);
00445
00446 delete[] trans_list_forward ;
00447 delete[] trans_list_forward_cnt ;
00448 delete[] trans_list_forward_val ;
00449 delete[] trans_list_forward_id ;
00450
00451 trans_list_forward = NULL ;
00452 trans_list_forward_cnt = NULL ;
00453 trans_list_forward_val = NULL ;
00454 trans_list_len = 0 ;
00455
00456 transition_matrix_a.zero() ;
00457 transition_matrix_a_id.zero() ;
00458
00459 mem_initialized = true ;
00460
00461 trans_list_forward_cnt=NULL ;
00462 trans_list_len = N ;
00463 trans_list_forward = new T_STATES*[N] ;
00464 trans_list_forward_cnt = new T_STATES[N] ;
00465 trans_list_forward_val = new float64_t*[N] ;
00466 trans_list_forward_id = new int32_t*[N] ;
00467
00468 int32_t start_idx=0;
00469 for (int32_t j=0; j<N; j++)
00470 {
00471 int32_t old_start_idx=start_idx;
00472
00473 while (start_idx<num_trans && a_trans[start_idx+num_trans]==j)
00474 {
00475 start_idx++;
00476
00477 if (start_idx>1 && start_idx<num_trans)
00478 ASSERT(a_trans[start_idx+num_trans-1] <= a_trans[start_idx+num_trans]);
00479 }
00480
00481 if (start_idx>1 && start_idx<num_trans)
00482 ASSERT(a_trans[start_idx+num_trans-1] <= a_trans[start_idx+num_trans]);
00483
00484 int32_t len=start_idx-old_start_idx;
00485 ASSERT(len>=0);
00486
00487 trans_list_forward_cnt[j] = 0 ;
00488
00489 if (len>0)
00490 {
00491 trans_list_forward[j] = new T_STATES[len] ;
00492 trans_list_forward_val[j] = new float64_t[len] ;
00493 trans_list_forward_id[j] = new int32_t[len] ;
00494 }
00495 else
00496 {
00497 trans_list_forward[j] = NULL;
00498 trans_list_forward_val[j] = NULL;
00499 trans_list_forward_id[j] = NULL;
00500 }
00501 }
00502
00503 for (int32_t i=0; i<num_trans; i++)
00504 {
00505 int32_t from_state = (int32_t)a_trans[i] ;
00506 int32_t to_state = (int32_t)a_trans[i+num_trans] ;
00507 float64_t val = a_trans[i+num_trans*2] ;
00508 int32_t id = 0 ;
00509 if (p_N==4)
00510 id = (int32_t)a_trans[i+num_trans*3] ;
00511
00512
00513 ASSERT(to_state>=0 && to_state<N);
00514 ASSERT(from_state>=0 && from_state<N);
00515
00516 trans_list_forward[to_state][trans_list_forward_cnt[to_state]]=from_state ;
00517 trans_list_forward_val[to_state][trans_list_forward_cnt[to_state]]=val ;
00518 trans_list_forward_id[to_state][trans_list_forward_cnt[to_state]]=id ;
00519 trans_list_forward_cnt[to_state]++ ;
00520 transition_matrix_a.element(from_state, to_state) = val ;
00521 transition_matrix_a_id.element(from_state, to_state) = id ;
00522
00523 } ;
00524
00525 max_a_id = 0 ;
00526 for (int32_t i=0; i<N; i++)
00527 for (int32_t j=0; j<N; j++)
00528 {
00529
00530
00531 max_a_id = CMath::max(max_a_id, transition_matrix_a_id.element(i,j)) ;
00532 }
00533
00534 }
00535
00536 void CDynProg::init_svm_arrays(int32_t p_num_degrees, int32_t p_num_svms)
00537 {
00538 svm_arrays_clean=false ;
00539
00540 word_degree.resize_array(num_degrees) ;
00541
00542 cum_num_words.resize_array(num_degrees+1) ;
00543 cum_num_words_array=cum_num_words.get_array() ;
00544
00545 num_words.resize_array(num_degrees) ;
00546 num_words_array=num_words.get_array() ;
00547
00548
00549 svm_pos_start.resize_array(num_degrees) ;
00550 num_unique_words.resize_array(num_degrees) ;
00551 }
00552
00553
00554 void CDynProg::init_word_degree_array(
00555 int32_t * p_word_degree_array, int32_t num_elem)
00556 {
00557 svm_arrays_clean=false ;
00558
00559 word_degree.resize_array(num_degrees) ;
00560 ASSERT(num_degrees==num_elem);
00561
00562
00563 for (int32_t i=0; i<num_degrees; i++)
00564 word_degree[i]=p_word_degree_array[i] ;
00565
00566 }
00567
00568 void CDynProg::init_cum_num_words_array(
00569 int32_t * p_cum_num_words_array, int32_t num_elem)
00570 {
00571 svm_arrays_clean=false ;
00572
00573 cum_num_words.resize_array(num_degrees+1) ;
00574 cum_num_words_array=cum_num_words.get_array() ;
00575 ASSERT(num_degrees+1==num_elem);
00576
00577 for (int32_t i=0; i<num_degrees+1; i++)
00578 cum_num_words[i]=p_cum_num_words_array[i] ;
00579 }
00580
00581 void CDynProg::init_num_words_array(
00582 int32_t * p_num_words_array, int32_t num_elem)
00583 {
00584 svm_arrays_clean=false ;
00585
00586 num_words.resize_array(num_degrees) ;
00587 num_words_array=num_words.get_array() ;
00588 ASSERT(num_degrees==num_elem);
00589
00590 for (int32_t i=0; i<num_degrees; i++)
00591 num_words[i]=p_num_words_array[i] ;
00592
00593
00594
00595 }
00596
00597 void CDynProg::init_mod_words_array(
00598 int32_t * mod_words_input, int32_t num_elem, int32_t num_columns)
00599 {
00600
00601
00602
00603
00604
00605
00606 svm_arrays_clean=false ;
00607
00608 ASSERT(num_svms==num_elem);
00609 ASSERT(num_columns==2);
00610
00611 mod_words.set_array(mod_words_input, num_elem, 2, true, true) ;
00612 mod_words_array = mod_words.get_array() ;
00613
00614
00615
00616
00617
00618 }
00619
00620 void CDynProg::init_sign_words_array(bool* p_sign_words_array, int32_t num_elem)
00621 {
00622 svm_arrays_clean=false ;
00623
00624 ASSERT(num_svms==num_elem);
00625
00626 sign_words.set_array(p_sign_words_array, num_elem, true, true) ;
00627 sign_words_array = sign_words.get_array() ;
00628 }
00629
00630 void CDynProg::init_string_words_array(
00631 int32_t* p_string_words_array, int32_t num_elem)
00632 {
00633 svm_arrays_clean=false ;
00634
00635 ASSERT(num_svms==num_elem);
00636
00637 string_words.set_array(p_string_words_array, num_elem, true, true) ;
00638 string_words_array = string_words.get_array() ;
00639 }
00640
00641 bool CDynProg::check_svm_arrays()
00642 {
00643
00644 if ((word_degree.get_dim1()==num_degrees) &&
00645 (cum_num_words.get_dim1()==num_degrees+1) &&
00646 (num_words.get_dim1()==num_degrees) &&
00647
00648
00649
00650
00651
00652 (svm_pos_start.get_dim1()==num_degrees) &&
00653 (num_unique_words.get_dim1()==num_degrees) &&
00654 (mod_words.get_dim1()==num_svms) &&
00655 (mod_words.get_dim2()==2) &&
00656 (sign_words.get_dim1()==num_svms) &&
00657 (string_words.get_dim1()==num_svms))
00658 {
00659 svm_arrays_clean=true ;
00660 return true ;
00661 }
00662 else
00663 {
00664 if ((num_unique_words.get_dim1()==num_degrees) &&
00665 (mod_words.get_dim1()==num_svms) &&
00666 (mod_words.get_dim2()==2) &&
00667 (sign_words.get_dim1()==num_svms) &&
00668 (string_words.get_dim1()==num_svms))
00669 fprintf(stderr, "OK\n") ;
00670 else
00671 fprintf(stderr, "not OK\n") ;
00672
00673 if (!(word_degree.get_dim1()==num_degrees))
00674 SG_WARNING("SVM array: word_degree.get_dim1()!=num_degrees") ;
00675 if (!(cum_num_words.get_dim1()==num_degrees+1))
00676 SG_WARNING("SVM array: cum_num_words.get_dim1()!=num_degrees+1") ;
00677 if (!(num_words.get_dim1()==num_degrees))
00678 SG_WARNING("SVM array: num_words.get_dim1()==num_degrees") ;
00679 if (!(svm_pos_start.get_dim1()==num_degrees))
00680 SG_WARNING("SVM array: svm_pos_start.get_dim1()!=num_degrees") ;
00681 if (!(num_unique_words.get_dim1()==num_degrees))
00682 SG_WARNING("SVM array: num_unique_words.get_dim1()!=num_degrees") ;
00683 if (!(mod_words.get_dim1()==num_svms))
00684 SG_WARNING("SVM array: mod_words.get_dim1()!=num_svms") ;
00685 if (!(mod_words.get_dim2()==2))
00686 SG_WARNING("SVM array: mod_words.get_dim2()!=2") ;
00687 if (!(sign_words.get_dim1()==num_svms))
00688 SG_WARNING("SVM array: sign_words.get_dim1()!=num_svms") ;
00689 if (!(string_words.get_dim1()==num_svms))
00690 SG_WARNING("SVM array: string_words.get_dim1()!=num_svms") ;
00691
00692 svm_arrays_clean=false ;
00693 return false ;
00694 }
00695 }
00696
00697 void CDynProg::best_path_set_seq(float64_t *seq, int32_t p_N, int32_t seq_len)
00698 {
00699 if (!svm_arrays_clean)
00700 {
00701 SG_ERROR( "SVM arrays not clean") ;
00702 return ;
00703 } ;
00704
00705 ASSERT(p_N==N);
00706 ASSERT(initial_state_distribution_p.get_dim1()==N);
00707 ASSERT(end_state_distribution_q.get_dim1()==N);
00708
00709 m_seq.set_array(seq, N, seq_len, 1, true, true) ;
00710 this->N=N ;
00711
00712 m_call=3 ;
00713 m_step=2 ;
00714 }
00715
00716 void CDynProg::best_path_set_seq3d(
00717 float64_t *seq, int32_t p_N, int32_t seq_len, int32_t max_num_signals)
00718 {
00719 if (!svm_arrays_clean)
00720 {
00721 SG_ERROR( "SVM arrays not clean") ;
00722 return ;
00723 } ;
00724
00725 ASSERT(p_N==N);
00726 ASSERT(initial_state_distribution_p.get_dim1()==N);
00727 ASSERT(end_state_distribution_q.get_dim1()==N);
00728
00729 m_seq.set_array(seq, N, seq_len, max_num_signals, true, true) ;
00730 this->N=N ;
00731
00732 m_call=3 ;
00733 m_step=2 ;
00734 }
00735
00736 void CDynProg::best_path_set_pos(int32_t *pos, int32_t seq_len)
00737 {
00738 if (m_step!=2)
00739 SG_ERROR( "please call best_path_set_seq first\n") ;
00740
00741 if (seq_len!=m_seq.get_dim2())
00742 SG_ERROR( "pos size does not match previous info %i!=%i\n", seq_len, m_seq.get_dim2()) ;
00743
00744 m_pos.set_array(pos, seq_len, true, true) ;
00745
00746 m_step=3 ;
00747 }
00748
00749 void CDynProg::best_path_set_orf_info(int32_t *orf_info, int32_t m, int32_t n)
00750 {
00751
00752
00753
00754
00755
00756 if (n!=2)
00757 SG_ERROR( "orf_info size incorrect %i!=2\n", n) ;
00758 m_orf_info.set_array(orf_info, m, n, true, true) ;
00759
00760 m_call=1 ;
00761 m_step=4 ;
00762 }
00763
00764 void CDynProg::best_path_set_segment_sum_weights(
00765 float64_t *segment_sum_weights, int32_t num_states, int32_t seq_len)
00766 {
00767 if (m_step!=3)
00768 SG_ERROR( "please call best_path_set_pos first\n") ;
00769
00770 if (num_states!=N)
00771 SG_ERROR( "segment_sum_weights size does not match previous info %i!=%i\n", num_states, N) ;
00772 if (seq_len!=m_pos.get_dim1())
00773 SG_ERROR( "segment_sum_weights size incorrect %i!=%i\n", seq_len, m_pos.get_dim1()) ;
00774
00775 m_segment_sum_weights.set_array(segment_sum_weights, num_states, seq_len, true, true) ;
00776
00777 m_call=2 ;
00778 m_step=4 ;
00779 }
00780
00781 void CDynProg::best_path_set_plif_list(CDynamicArray<CPlifBase*>* plifs)
00782 {
00783 ASSERT(plifs);
00784 CPlifBase** plif_list=plifs->get_array();
00785 int32_t num_plif=plifs->get_num_elements();
00786
00787 if (m_step!=4)
00788 SG_ERROR( "please call best_path_set_orf_info or best_path_segment_sum_weights first\n") ;
00789
00790 m_plif_list.set_array(plif_list, num_plif, true, true) ;
00791
00792 m_step=5 ;
00793 }
00794
00795 void CDynProg::best_path_set_plif_id_matrix(
00796 int32_t *plif_id_matrix, int32_t m, int32_t n)
00797 {
00798 if (m_step!=5)
00799 SG_ERROR( "please call best_path_set_plif_list first\n") ;
00800
00801 if ((m!=N) || (n!=N))
00802 SG_ERROR( "plif_id_matrix size does not match previous info %i!=%i or %i!=%i\n", m, N, n, N) ;
00803
00804 CArray2<int32_t> id_matrix(plif_id_matrix, N, N, false, false) ;
00805 #ifdef DYNPROG_DEBUG
00806 id_matrix.set_name("id_matrix");
00807 id_matrix.display_array();
00808 #endif //DYNPROG_DEBUG
00809 m_PEN.resize_array(N, N) ;
00810 for (int32_t i=0; i<N; i++)
00811 for (int32_t j=0; j<N; j++)
00812 if (id_matrix.element(i,j)>=0)
00813 m_PEN.element(i,j)=m_plif_list[id_matrix.element(i,j)] ;
00814 else
00815 m_PEN.element(i,j)=NULL ;
00816
00817 m_step=6 ;
00818 }
00819
00820 void CDynProg::best_path_set_plif_state_signal_matrix(
00821 int32_t *plif_id_matrix, int32_t m, int32_t max_num_signals)
00822 {
00823 if (m_step!=6)
00824 SG_ERROR( "please call best_path_set_plif_id_matrix first\n") ;
00825
00826 if (m!=N)
00827 SG_ERROR( "plif_state_signal_matrix size does not match previous info %i!=%i\n", m, N) ;
00828
00829 if (m_seq.get_dim3() != max_num_signals)
00830 SG_ERROR( "size(plif_state_signal_matrix,2) does not match with size(m_seq,3): %i!=%i\nSorry, Soeren... interface changed\n", m_seq.get_dim3(), max_num_signals) ;
00831
00832 CArray2<int32_t> id_matrix(plif_id_matrix, N, max_num_signals, false, false) ;
00833 m_PEN_state_signals.resize_array(N, max_num_signals) ;
00834 for (int32_t i=0; i<N; i++)
00835 {
00836 for (int32_t j=0; j<max_num_signals; j++)
00837 {
00838 if (id_matrix.element(i,j)>=0)
00839 m_PEN_state_signals.element(i,j)=m_plif_list[id_matrix.element(i,j)] ;
00840 else
00841 m_PEN_state_signals.element(i,j)=NULL ;
00842 }
00843 }
00844
00845 m_step=6 ;
00846 }
00847
00848 void CDynProg::best_path_set_genestr(
00849 char* genestr, int32_t genestr_len, int32_t genestr_num)
00850 {
00851 if (m_step!=6)
00852 SG_ERROR( "please call best_path_set_plif_id_matrix first\n") ;
00853
00854 ASSERT(genestr);
00855 ASSERT(genestr_len>0);
00856 ASSERT(genestr_num>0);
00857
00858 m_genestr.set_array(genestr, genestr_len, genestr_num, true, true) ;
00859
00860 m_step=7 ;
00861 }
00862
00863 void CDynProg::best_path_set_my_state_seq(
00864 int32_t* my_state_seq, int32_t seq_len)
00865 {
00866 ASSERT(my_state_seq && seq_len>0);
00867 m_my_state_seq.resize_array(seq_len);
00868 for (int32_t i=0; i<seq_len; i++)
00869 m_my_state_seq[i]=my_state_seq[i];
00870 }
00871
00872 void CDynProg::best_path_set_my_pos_seq(int32_t* my_pos_seq, int32_t seq_len)
00873 {
00874 ASSERT(my_pos_seq && seq_len>0);
00875 m_my_pos_seq.resize_array(seq_len);
00876 for (int32_t i=0; i<seq_len; i++)
00877 m_my_pos_seq[i]=my_pos_seq[i];
00878 }
00879
00880 void CDynProg::best_path_set_dict_weights(
00881 float64_t* dictionary_weights, int32_t dict_len, int32_t n)
00882 {
00883 if (m_step!=7)
00884 SG_ERROR( "please call best_path_set_genestr first\n") ;
00885
00886 if (num_svms!=n)
00887 SG_ERROR( "dict_weights array does not match num_svms=%i!=%i\n", num_svms, n) ;
00888
00889 m_dict_weights.set_array(dictionary_weights, dict_len, num_svms, true, true) ;
00890
00891
00892 m_segment_loss.resize_array(max_a_id+1, max_a_id+1, 2) ;
00893 m_segment_loss.zero() ;
00894 m_segment_ids.resize_array(m_seq.get_dim2()) ;
00895 m_segment_mask.resize_array(m_seq.get_dim2()) ;
00896 m_segment_ids.zero() ;
00897 m_segment_mask.zero() ;
00898
00899 m_step=8 ;
00900 }
00901
00902 void CDynProg::best_path_set_segment_loss(
00903 float64_t* segment_loss, int32_t m, int32_t n)
00904 {
00905
00906 if (2*m!=n)
00907 SG_ERROR( "segment_loss should be 2 x quadratic matrix: %i!=%i\n", 2*m, n) ;
00908
00909 if (m!=max_a_id+1)
00910 SG_ERROR( "segment_loss size should match max_a_id: %i!=%i\n", m, max_a_id+1) ;
00911
00912 m_segment_loss.set_array(segment_loss, m, n/2, 2, true, true) ;
00913
00914
00915
00916 }
00917
00918 void CDynProg::best_path_set_segment_ids_mask(
00919 int32_t* segment_ids, float64_t* segment_mask, int32_t m)
00920 {
00921 int32_t max_id = 0;
00922 for (int32_t i=1;i<m;i++)
00923 max_id = CMath::max(max_id,segment_ids[i]);
00924
00925 m_segment_ids.set_array(segment_ids, m, false, true) ;
00926 m_segment_ids.set_name("m_segment_ids");
00927 m_segment_mask.set_array(segment_mask, m, false, true) ;
00928 m_segment_mask.set_name("m_segment_mask");
00929 }
00930
00931
00932 void CDynProg::best_path_call(int32_t nbest, bool use_orf)
00933 {
00934 if (m_step!=8)
00935 SG_ERROR( "please call best_path_set_dict_weights first\n") ;
00936 if (m_call!=1)
00937 SG_ERROR( "please call best_path_set_orf_info first\n") ;
00938 ASSERT(N==m_seq.get_dim1()) ;
00939 ASSERT(m_seq.get_dim2()==m_pos.get_dim1()) ;
00940
00941 m_scores.resize_array(nbest) ;
00942 m_states.resize_array(nbest, m_seq.get_dim2()) ;
00943 m_positions.resize_array(nbest, m_seq.get_dim2()) ;
00944
00945 m_call=1 ;
00946
00947 ASSERT(nbest==1||nbest==2) ;
00948 ASSERT(m_genestr.get_dim2()==1) ;
00949
00950 if (nbest==1)
00951 best_path_trans<1,false,false>(m_seq.get_array(), m_seq.get_dim2(), m_pos.get_array(),
00952 m_orf_info.get_array(), m_PEN.get_array(),
00953 m_PEN_state_signals.get_array(), m_PEN_state_signals.get_dim2(),
00954 m_genestr.get_dim2(),
00955 m_scores.get_array(), m_states.get_array(), m_positions.get_array(),
00956 use_orf) ;
00957 else
00958 best_path_trans<2,false,false>(m_seq.get_array(), m_seq.get_dim2(), m_pos.get_array(),
00959 m_orf_info.get_array(), m_PEN.get_array(),
00960 m_PEN_state_signals.get_array(), m_PEN_state_signals.get_dim2(),
00961 m_genestr.get_dim2(),
00962 m_scores.get_array(), m_states.get_array(), m_positions.get_array(),
00963 use_orf) ;
00964
00965 m_step=9 ;
00966 }
00967
00968 void CDynProg::best_path_deriv_call()
00969 {
00970
00971
00972
00973
00974 ASSERT(N==m_seq.get_dim1()) ;
00975 ASSERT(m_seq.get_dim2()==m_pos.get_dim1()) ;
00976
00977 m_call=5 ;
00978
00979 m_my_scores.resize_array(m_my_state_seq.get_array_size()) ;
00980 m_my_losses.resize_array(m_my_state_seq.get_array_size()) ;
00981
00982 best_path_trans_deriv(m_my_state_seq.get_array(), m_my_pos_seq.get_array(),
00983 m_my_scores.get_array(), m_my_losses.get_array(), m_my_state_seq.get_array_size(),
00984 m_seq.get_array(), m_seq.get_dim2(), m_pos.get_array(),
00985 m_PEN.get_array(), m_PEN_state_signals.get_array(), m_PEN_state_signals.get_dim2(), m_genestr.get_dim2()) ;
00986
00987 m_step=12 ;
00988 }
00989
00990
00991 void CDynProg::best_path_2struct_call(int32_t nbest)
00992 {
00993 if (m_step!=8)
00994 SG_ERROR( "please call best_path_set_orf_dict_weights first\n") ;
00995 if (m_call!=2)
00996 SG_ERROR( "please call best_path_set_segment_sum_weights first\n") ;
00997 ASSERT(N==m_seq.get_dim1()) ;
00998 ASSERT(m_seq.get_dim2()==m_pos.get_dim1()) ;
00999
01000 m_scores.resize_array(nbest) ;
01001 m_states.resize_array(nbest, m_seq.get_dim2()) ;
01002 m_positions.resize_array(nbest, m_seq.get_dim2()) ;
01003
01004 m_call=2 ;
01005
01006 best_path_2struct(m_seq.get_array(), m_seq.get_dim2(), m_pos.get_array(),
01007 m_PEN.get_array(),
01008 m_genestr.get_array(), m_genestr.get_dim1(),
01009 nbest,
01010 m_scores.get_array(), m_states.get_array(), m_positions.get_array(),
01011 m_dict_weights.get_array(), m_dict_weights.get_dim1()*m_dict_weights.get_dim2(),
01012 m_segment_sum_weights.get_array()) ;
01013
01014 m_step=9 ;
01015 }
01016
01017 void CDynProg::best_path_simple_call(int32_t nbest)
01018 {
01019 if (m_step!=2)
01020 SG_ERROR( "please call best_path_set_seq first\n") ;
01021 if (m_call!=3)
01022 SG_ERROR( "please call best_path_set_seq first\n") ;
01023 ASSERT(N==m_seq.get_dim1()) ;
01024
01025 m_scores.resize_array(nbest) ;
01026 m_states.resize_array(nbest, m_seq.get_dim2()) ;
01027
01028 m_call=3 ;
01029
01030 best_path_trans_simple(m_seq.get_array(), m_seq.get_dim2(),
01031 nbest,
01032 m_scores.get_array(), m_states.get_array()) ;
01033
01034 m_step=9 ;
01035 }
01036
01037 void CDynProg::best_path_deriv_call(int32_t nbest)
01038 {
01039 if (!svm_arrays_clean)
01040 {
01041 SG_ERROR( "SVM arrays not clean") ;
01042 return ;
01043 } ;
01044
01045
01046 }
01047
01048
01049 void CDynProg::best_path_get_scores(float64_t **scores, int32_t *m)
01050 {
01051 if (m_step!=9 && m_step!=12)
01052 SG_ERROR( "please call best_path*_call first\n") ;
01053
01054 if (m_step==9)
01055 {
01056 *scores=m_scores.get_array() ;
01057 *m=m_scores.get_dim1() ;
01058 } else
01059 {
01060 *scores=m_my_scores.get_array() ;
01061 *m=m_my_scores.get_dim1() ;
01062 }
01063
01064 m_step=10 ;
01065 }
01066
01067 void CDynProg::best_path_get_states(int32_t **states, int32_t *m, int32_t *n)
01068 {
01069 if (m_step!=10)
01070 SG_ERROR( "please call best_path_get_score first\n") ;
01071
01072 *states=m_states.get_array() ;
01073 *m=m_states.get_dim1() ;
01074 *n=m_states.get_dim2() ;
01075
01076 m_step=11 ;
01077 }
01078
01079 void CDynProg::best_path_get_positions(
01080 int32_t **positions, int32_t *m, int32_t *n)
01081 {
01082 if (m_step!=11)
01083 SG_ERROR( "please call best_path_get_positions first\n") ;
01084 if (m_call==3)
01085 SG_ERROR( "no position information for best_path_simple\n") ;
01086
01087 *positions=m_positions.get_array() ;
01088 *m=m_positions.get_dim1() ;
01089 *n=m_positions.get_dim2() ;
01090 }
01091
01092 void CDynProg::best_path_get_losses(float64_t** losses, int32_t* seq_len)
01093 {
01094 ASSERT(losses && seq_len);
01095 *losses=m_my_losses.get_array();
01096 *seq_len=m_my_losses.get_dim1();
01097 }
01098
01099
01101
01102 float64_t CDynProg::best_path_no_b(
01103 int32_t max_iter, int32_t &best_iter, int32_t *my_path)
01104 {
01105 CArray2<T_STATES> psi(max_iter, N) ;
01106 CArray<float64_t>* delta = new CArray<float64_t>(N) ;
01107 CArray<float64_t>* delta_new = new CArray<float64_t>(N) ;
01108
01109 {
01110 for (int32_t i=0; i<N; i++)
01111 {
01112 delta->element(i) = get_p(i) ;
01113 psi.element(0, i)= 0 ;
01114 }
01115 }
01116
01117 float64_t best_iter_prob = CMath::ALMOST_NEG_INFTY ;
01118 best_iter = 0 ;
01119
01120
01121 for (int32_t t=1; t<max_iter; t++)
01122 {
01123 CArray<float64_t>* dummy;
01124 int32_t NN=N ;
01125 for (int32_t j=0; j<NN; j++)
01126 {
01127 float64_t maxj = delta->element(0) + transition_matrix_a.element(0,j);
01128 int32_t argmax=0;
01129
01130 for (int32_t i=1; i<NN; i++)
01131 {
01132 float64_t temp = delta->element(i) + transition_matrix_a.element(i,j);
01133
01134 if (temp>maxj)
01135 {
01136 maxj=temp;
01137 argmax=i;
01138 }
01139 }
01140 delta_new->element(j)=maxj ;
01141 psi.element(t, j)=argmax ;
01142 }
01143
01144 dummy=delta;
01145 delta=delta_new;
01146 delta_new=dummy;
01147
01148 {
01149 float64_t maxj=delta->element(0)+get_q(0);
01150 int32_t argmax=0;
01151
01152 for (int32_t i=1; i<N; i++)
01153 {
01154 float64_t temp=delta->element(i)+get_q(i);
01155
01156 if (temp>maxj)
01157 {
01158 maxj=temp;
01159 argmax=i;
01160 }
01161 }
01162
01163
01164 if (maxj>best_iter_prob)
01165 {
01166 my_path[t]=argmax;
01167 best_iter=t ;
01168 best_iter_prob = maxj ;
01169 } ;
01170 } ;
01171 }
01172
01173
01174 {
01175 for (int32_t t = best_iter; t>0; t--)
01176 {
01177 my_path[t-1]=psi.element(t, my_path[t]);
01178 }
01179 }
01180
01181 delete delta ;
01182 delete delta_new ;
01183
01184 return best_iter_prob ;
01185 }
01186
01187 void CDynProg::best_path_no_b_trans(
01188 int32_t max_iter, int32_t &max_best_iter, int16_t nbest,
01189 float64_t *prob_nbest, int32_t *my_paths)
01190 {
01191
01192 CArray3<T_STATES> psi(max_iter, N, nbest) ;
01193 CArray3<int16_t> ktable(max_iter, N, nbest) ;
01194 CArray2<int16_t> ktable_ends(max_iter, nbest) ;
01195
01196 CArray<float64_t> tempvv(nbest*N) ;
01197 CArray<int32_t> tempii(nbest*N) ;
01198
01199 CArray2<T_STATES> path_ends(max_iter, nbest) ;
01200 CArray2<float64_t> *delta=new CArray2<float64_t>(N, nbest) ;
01201 CArray2<float64_t> *delta_new=new CArray2<float64_t>(N, nbest) ;
01202 CArray2<float64_t> delta_end(max_iter, nbest) ;
01203
01204 CArray2<int32_t> paths(max_iter, nbest) ;
01205 paths.set_array(my_paths, max_iter, nbest, false, false) ;
01206
01207 {
01208 for (T_STATES i=0; i<N; i++)
01209 {
01210 delta->element(i,0) = get_p(i) ;
01211 for (int16_t k=1; k<nbest; k++)
01212 {
01213 delta->element(i,k)=-CMath::INFTY ;
01214 ktable.element(0,i,k)=0 ;
01215 }
01216 }
01217 }
01218
01219
01220 for (int32_t t=1; t<max_iter; t++)
01221 {
01222 CArray2<float64_t>* dummy=NULL;
01223
01224 for (T_STATES j=0; j<N; j++)
01225 {
01226 const T_STATES num_elem = trans_list_forward_cnt[j] ;
01227 const T_STATES *elem_list = trans_list_forward[j] ;
01228 const float64_t *elem_val = trans_list_forward_val[j] ;
01229
01230 int32_t list_len=0 ;
01231 for (int16_t diff=0; diff<nbest; diff++)
01232 {
01233 for (int32_t i=0; i<num_elem; i++)
01234 {
01235 T_STATES ii = elem_list[i] ;
01236
01237 tempvv.element(list_len) = -(delta->element(ii,diff) + elem_val[i]) ;
01238 tempii.element(list_len) = diff*N + ii ;
01239 list_len++ ;
01240 }
01241 }
01242 CMath::qsort_index(tempvv.get_array(), tempii.get_array(), list_len) ;
01243
01244 for (int16_t k=0; k<nbest; k++)
01245 {
01246 if (k<list_len)
01247 {
01248 delta_new->element(j,k) = -tempvv[k] ;
01249 psi.element(t,j,k) = (tempii[k]%N) ;
01250 ktable.element(t,j,k) = (tempii[k]-(tempii[k]%N))/N ;
01251 }
01252 else
01253 {
01254 delta_new->element(j,k) = -CMath::INFTY ;
01255 psi.element(t,j,k) = 0 ;
01256 ktable.element(t,j,k) = 0 ;
01257 }
01258 }
01259 }
01260
01261 dummy=delta;
01262 delta=delta_new;
01263 delta_new=dummy;
01264
01265 {
01266 int32_t list_len = 0 ;
01267 for (int16_t diff=0; diff<nbest; diff++)
01268 {
01269 for (T_STATES i=0; i<N; i++)
01270 {
01271 tempvv.element(list_len) = -(delta->element(i,diff)+get_q(i));
01272 tempii.element(list_len) = diff*N + i ;
01273 list_len++ ;
01274 }
01275 }
01276 CMath::qsort_index(tempvv.get_array(), tempii.get_array(), list_len) ;
01277
01278 for (int16_t k=0; k<nbest; k++)
01279 {
01280 delta_end.element(t-1,k) = -tempvv[k] ;
01281 path_ends.element(t-1,k) = (tempii[k]%N) ;
01282 ktable_ends.element(t-1,k) = (tempii[k]-(tempii[k]%N))/N ;
01283 }
01284 }
01285 }
01286
01287 {
01288 max_best_iter=0 ;
01289
01290 CArray<float64_t> sort_delta_end(max_iter*nbest) ;
01291 CArray<int16_t> sort_k(max_iter*nbest) ;
01292 CArray<int32_t> sort_t(max_iter*nbest) ;
01293 CArray<int32_t> sort_idx(max_iter*nbest) ;
01294
01295 int32_t i=0 ;
01296 for (int32_t iter=0; iter<max_iter-1; iter++)
01297 for (int16_t k=0; k<nbest; k++)
01298 {
01299 sort_delta_end[i]=-delta_end.element(iter,k) ;
01300 sort_k[i]=k ;
01301 sort_t[i]=iter+1 ;
01302 sort_idx[i]=i ;
01303 i++ ;
01304 }
01305
01306 CMath::qsort_index(sort_delta_end.get_array(), sort_idx.get_array(), (max_iter-1)*nbest) ;
01307
01308 for (int16_t n=0; n<nbest; n++)
01309 {
01310 int16_t k=sort_k[sort_idx[n]] ;
01311 int32_t iter=sort_t[sort_idx[n]] ;
01312 prob_nbest[n]=-sort_delta_end[n] ;
01313
01314 if (iter>max_best_iter)
01315 max_best_iter=iter ;
01316
01317 ASSERT(k<nbest) ;
01318 ASSERT(iter<max_iter) ;
01319
01320 paths.element(iter,n) = path_ends.element(iter-1, k) ;
01321 int16_t q = ktable_ends.element(iter-1, k) ;
01322
01323 for (int32_t t = iter; t>0; t--)
01324 {
01325 paths.element(t-1,n)=psi.element(t, paths.element(t,n), q);
01326 q = ktable.element(t, paths.element(t,n), q) ;
01327 }
01328 }
01329 }
01330
01331 delete delta ;
01332 delete delta_new ;
01333 }
01334
01335
01336 void CDynProg::translate_from_single_order(
01337 uint16_t* obs, int32_t sequence_length, int32_t start, int32_t order,
01338 int32_t max_val)
01339 {
01340 int32_t i,j;
01341 uint16_t value=0;
01342
01343 for (i=sequence_length-1; i>=order-1; i--)
01344 {
01345 value=0;
01346 for (j=i; j>=i-order+1; j--)
01347 value= (value >> max_val) | (obs[j] << (max_val * (order-1)));
01348
01349 obs[i]=value;
01350 }
01351
01352 for (i=order-2;i>=0;i--)
01353 {
01354 value=0;
01355 for (j=i; j>=i-order+1; j--)
01356 {
01357 value= (value >> max_val);
01358 if (j>=0)
01359 value|=obs[j] << (max_val * (order-1));
01360 }
01361 obs[i]=value;
01362
01363 }
01364 if (start>0)
01365 for (i=start; i<sequence_length; i++)
01366 obs[i-start]=obs[i];
01367 }
01368
01369 void CDynProg::reset_svm_value(
01370 int32_t pos, int32_t & last_svm_pos, float64_t * svm_value)
01371 {
01372 for (int32_t i=0; i<num_words_single; i++)
01373 word_used_single[i]=false ;
01374 for (int32_t s=0; s<num_svms; s++)
01375 svm_value_unnormalized_single[s] = 0 ;
01376 for (int32_t s=0; s<num_svms; s++)
01377 svm_value[s] = 0 ;
01378 last_svm_pos = pos - 6+1 ;
01379 num_unique_words_single=0 ;
01380 }
01381
01382 void CDynProg::extend_svm_value(
01383 uint16_t* wordstr, int32_t pos, int32_t &last_svm_pos, float64_t* svm_value)
01384 {
01385 bool did_something = false ;
01386 for (int32_t i=last_svm_pos-1; (i>=pos) && (i>=0); i--)
01387 {
01388 if (wordstr[i]>=num_words_single)
01389 SG_DEBUG( "wordstr[%i]=%i\n", i, wordstr[i]) ;
01390
01391 if (!word_used_single[wordstr[i]])
01392 {
01393 for (int32_t s=0; s<num_svms_single; s++)
01394 svm_value_unnormalized_single[s]+=dict_weights.element(wordstr[i],s) ;
01395
01396 word_used_single[wordstr[i]]=true ;
01397 num_unique_words_single++ ;
01398 did_something=true ;
01399 }
01400 } ;
01401 if (num_unique_words_single>0)
01402 {
01403 last_svm_pos=pos ;
01404 if (did_something)
01405 for (int32_t s=0; s<num_svms; s++)
01406 svm_value[s]= svm_value_unnormalized_single[s]/sqrt((float64_t)num_unique_words_single) ;
01407 }
01408 else
01409 {
01410
01411 for (int32_t s=0; s<num_svms; s++)
01412 svm_value[s]=0 ;
01413 }
01414
01415 }
01416
01417
01418 void CDynProg::reset_segment_sum_value(
01419 int32_t num_states, int32_t pos, int32_t & last_segment_sum_pos,
01420 float64_t * segment_sum_value)
01421 {
01422 for (int32_t s=0; s<num_states; s++)
01423 segment_sum_value[s] = 0 ;
01424 last_segment_sum_pos = pos ;
01425
01426 }
01427
01428 void CDynProg::extend_segment_sum_value(
01429 float64_t *segment_sum_weights, int32_t seqlen, int32_t num_states,
01430 int32_t pos, int32_t &last_segment_sum_pos, float64_t* segment_sum_value)
01431 {
01432 for (int32_t i=last_segment_sum_pos-1; (i>=pos) && (i>=0); i--)
01433 {
01434 for (int32_t s=0; s<num_states; s++)
01435 segment_sum_value[s] += segment_sum_weights[i*num_states+s] ;
01436 } ;
01437
01438 last_segment_sum_pos = pos ;
01439 }
01440
01441
01442 void CDynProg::best_path_2struct(
01443 const float64_t *seq_array, int32_t seq_len, const int32_t *pos,
01444 CPlifBase **Plif_matrix, const char *genestr, int32_t genestr_len,
01445 int16_t nbest, float64_t *prob_nbest, int32_t *my_state_seq,
01446 int32_t *my_pos_seq, float64_t *dictionary_weights, int32_t dict_len,
01447 float64_t *segment_sum_weights)
01448 {
01449 const int32_t default_look_back = 100 ;
01450 int32_t max_look_back = default_look_back ;
01451 bool use_svm = false ;
01452 ASSERT(dict_len==num_svms*num_words_single) ;
01453 dict_weights.set_array(dictionary_weights, dict_len, num_svms, false, false) ;
01454 dict_weights_array=dict_weights.get_array() ;
01455
01456 CArray2<CPlifBase*> PEN(Plif_matrix, N, N, false) ;
01457 CArray2<float64_t> seq((float64_t *)seq_array, N, seq_len, false) ;
01458
01459 float64_t svm_value[num_svms] ;
01460 float64_t segment_sum_value[N] ;
01461
01462 {
01463 for (int32_t s=0; s<num_svms; s++)
01464 svm_value[s]=0 ;
01465 }
01466
01467 {
01468 for (int32_t i=0; i<N; i++)
01469 for (int32_t j=0; j<N; j++)
01470 {
01471 CPlifBase *penij=PEN.element(i,j) ;
01472 if (penij==NULL)
01473 continue ;
01474 if (penij->get_max_value()>max_look_back)
01475 max_look_back=(int32_t) CMath::ceil(penij->get_max_value());
01476 if (penij->uses_svm_values())
01477 use_svm=true ;
01478 }
01479 }
01480 max_look_back = CMath::min(genestr_len, max_look_back) ;
01481
01482
01483
01484 const int32_t look_back_buflen = (max_look_back+1)*nbest*N ;
01485
01486 const float64_t mem_use = (float64_t)(seq_len*N*nbest*(sizeof(T_STATES)+sizeof(int16_t)+sizeof(int32_t))+
01487 look_back_buflen*(2*sizeof(float64_t)+sizeof(int32_t))+
01488 seq_len*(sizeof(T_STATES)+sizeof(int32_t))+
01489 genestr_len*sizeof(bool))/(1024*1024)
01490 ;
01491 bool is_big = (mem_use>200) || (seq_len>5000) ;
01492
01493 if (is_big)
01494 {
01495 SG_DEBUG("calling best_path_2struct: seq_len=%i, N=%i, lookback=%i nbest=%i\n",
01496 seq_len, N, max_look_back, nbest) ;
01497 SG_DEBUG("allocating %1.2fMB of memory\n",
01498 mem_use) ;
01499 }
01500 ASSERT(nbest<32000) ;
01501
01502 CArray3<float64_t> delta(max_look_back+1, N, nbest) ;
01503 CArray3<T_STATES> psi(seq_len,N,nbest) ;
01504 CArray3<int16_t> ktable(seq_len,N,nbest) ;
01505 CArray3<int32_t> ptable(seq_len,N,nbest) ;
01506
01507 CArray<float64_t> delta_end(nbest) ;
01508 CArray<T_STATES> path_ends(nbest) ;
01509 CArray<int16_t> ktable_end(nbest) ;
01510
01511 CArray<float64_t> tempvv(look_back_buflen) ;
01512 CArray<int32_t> tempii(look_back_buflen) ;
01513
01514 CArray<T_STATES> state_seq(seq_len) ;
01515 CArray<int32_t> pos_seq(seq_len) ;
01516
01517
01518 uint16_t* wordstr=NULL ;
01519 if (use_svm)
01520 {
01521 ASSERT(dictionary_weights!=NULL) ;
01522 wordstr=new uint16_t[genestr_len] ;
01523 for (int32_t i=0; i<genestr_len; i++)
01524 switch (genestr[i])
01525 {
01526 case 'A':
01527 case 'a': wordstr[i]=0 ; break ;
01528 case 'C':
01529 case 'c': wordstr[i]=1 ; break ;
01530 case 'G':
01531 case 'g': wordstr[i]=2 ; break ;
01532 case 'T':
01533 case 't': wordstr[i]=3 ; break ;
01534 default: ASSERT(0) ;
01535 }
01536 translate_from_single_order(wordstr, genestr_len, word_degree_single-1, word_degree_single) ;
01537 }
01538
01539
01540 {
01541 for (T_STATES i=0; i<N; i++)
01542 {
01543 delta.element(0,i,0) = get_p(i) + seq.element(i,0) ;
01544 psi.element(0,i,0) = 0 ;
01545 ktable.element(0,i,0) = 0 ;
01546 ptable.element(0,i,0) = 0 ;
01547 for (int16_t k=1; k<nbest; k++)
01548 {
01549 delta.element(0,i,k) = -CMath::INFTY ;
01550 psi.element(0,i,0) = 0 ;
01551 ktable.element(0,i,k) = 0 ;
01552 ptable.element(0,i,k) = 0 ;
01553 }
01554 }
01555 }
01556
01557
01558 for (int32_t t=1; t<seq_len; t++)
01559 {
01560 if (is_big && t%(seq_len/10000)==1)
01561 SG_PROGRESS(t, 0, seq_len);
01562
01563
01564 for (T_STATES j=0; j<N; j++)
01565 {
01566 if (seq.element(j,t)<-1e20)
01567 {
01568 for (int16_t k=0; k<nbest; k++)
01569 {
01570 delta.element(t%max_look_back,j,k) = seq.element(j,t) ;
01571 psi.element(t,j,k) = 0 ;
01572 ktable.element(t,j,k) = 0 ;
01573 ptable.element(t,j,k) = 0 ;
01574 }
01575 }
01576 else
01577 {
01578 const T_STATES num_elem = trans_list_forward_cnt[j] ;
01579 const T_STATES *elem_list = trans_list_forward[j] ;
01580 const float64_t *elem_val = trans_list_forward_val[j] ;
01581
01582 int32_t list_len=0 ;
01583 for (int32_t i=0; i<num_elem; i++)
01584 {
01585 T_STATES ii = elem_list[i] ;
01586
01587
01588 const CPlifBase * penalty = PEN.element(j,ii) ;
01589 int32_t look_back = default_look_back ;
01590 if (penalty!=NULL)
01591 look_back=(int32_t) (CMath::ceil(penalty->get_max_value()));
01592
01593 int32_t last_svm_pos ;
01594 if (use_svm)
01595 reset_svm_value(pos[t], last_svm_pos, svm_value) ;
01596
01597 int32_t last_segment_sum_pos ;
01598 reset_segment_sum_value(N, pos[t], last_segment_sum_pos, segment_sum_value) ;
01599
01600 for (int32_t ts=t-1; ts>=0 && pos[t]-pos[ts]<=look_back; ts--)
01601 {
01602 if (use_svm)
01603 extend_svm_value(wordstr, pos[ts], last_svm_pos, svm_value) ;
01604
01605 extend_segment_sum_value(segment_sum_weights, seq_len, N, pos[ts], last_segment_sum_pos, segment_sum_value) ;
01606
01607 float64_t pen_val = 0.0 ;
01608 if (penalty)
01609 pen_val=penalty->lookup_penalty(pos[t]-pos[ts], svm_value) + segment_sum_value[j] ;
01610 for (int16_t diff=0; diff<nbest; diff++)
01611 {
01612 float64_t val = delta.element(ts%max_look_back,ii,diff) + elem_val[i] ;
01613 val += pen_val ;
01614
01615 tempvv[list_len] = -val ;
01616 tempii[list_len] = ii + diff*N + ts*N*nbest;
01617
01618 list_len++ ;
01619 }
01620 }
01621 }
01622 CMath::nmin<int32_t>(tempvv.get_array(), tempii.get_array(), list_len, nbest) ;
01623
01624 for (int16_t k=0; k<nbest; k++)
01625 {
01626 if (k<list_len)
01627 {
01628 delta.element(t%max_look_back,j,k) = -tempvv[k] + seq.element(j,t);
01629 psi.element(t,j,k) = (tempii[k]%N) ;
01630 ktable.element(t,j,k) = (tempii[k]%(N*nbest)-psi.element(t,j,k))/N ;
01631 ptable.element(t,j,k) = (tempii[k]-(tempii[k]%(N*nbest)))/(N*nbest) ;
01632 }
01633 else
01634 {
01635 delta.element(t%max_look_back,j,k) = -CMath::INFTY ;
01636 psi.element(t,j,k) = 0 ;
01637 ktable.element(t,j,k) = 0 ;
01638 ptable.element(t,j,k) = 0 ;
01639 }
01640 }
01641 }
01642 }
01643 }
01644
01645 {
01646 int32_t list_len = 0 ;
01647 for (int16_t diff=0; diff<nbest; diff++)
01648 {
01649 for (T_STATES i=0; i<N; i++)
01650 {
01651 tempvv[list_len] = -(delta.element((seq_len-1)%max_look_back,i,diff)+get_q(i)) ;
01652 tempii[list_len] = i + diff*N ;
01653 list_len++ ;
01654 }
01655 }
01656
01657 CMath::nmin(tempvv.get_array(), tempii.get_array(), list_len, nbest) ;
01658
01659 for (int16_t k=0; k<nbest; k++)
01660 {
01661 delta_end.element(k) = -tempvv[k] ;
01662 path_ends.element(k) = (tempii[k]%N) ;
01663 ktable_end.element(k) = (tempii[k]-path_ends.element(k))/N ;
01664 }
01665 }
01666
01667 {
01668 for (int16_t k=0; k<nbest; k++)
01669 {
01670 prob_nbest[k]= delta_end.element(k) ;
01671
01672 int32_t i = 0 ;
01673 state_seq[i] = path_ends.element(k) ;
01674 int16_t q = ktable_end.element(k) ;
01675 pos_seq[i] = seq_len-1 ;
01676
01677 while (pos_seq[i]>0)
01678 {
01679
01680 state_seq[i+1] = psi.element(pos_seq[i], state_seq[i], q);
01681 pos_seq[i+1] = ptable.element(pos_seq[i], state_seq[i], q) ;
01682 q = ktable.element(pos_seq[i], state_seq[i], q) ;
01683 i++ ;
01684 }
01685
01686 int32_t num_states = i+1 ;
01687 for (i=0; i<num_states;i++)
01688 {
01689 my_state_seq[i+k*(seq_len+1)] = state_seq[num_states-i-1] ;
01690 my_pos_seq[i+k*(seq_len+1)] = pos_seq[num_states-i-1] ;
01691 }
01692 my_state_seq[num_states+k*(seq_len+1)]=-1 ;
01693 my_pos_seq[num_states+k*(seq_len+1)]=-1 ;
01694 }
01695 }
01696 if (is_big)
01697 SG_PRINT( "DONE. \n") ;
01698 }
01699
01700
01701
01702
01703
01704
01705
01706
01707
01708
01709
01710
01711
01712
01713
01714
01715
01716
01717
01718
01719
01720
01721
01722
01723
01724
01725
01726
01727
01728
01729
01730
01731
01732
01733
01734
01735
01736
01737
01738
01739
01740
01741
01742
01743
01744
01745
01746
01747
01748
01749
01750
01751
01752
01753
01754 void CDynProg::init_segment_loss(
01755 struct segment_loss_struct & loss, int32_t seqlen, int32_t howmuchlookback)
01756 {
01757 #ifdef DYNPROG_TIMING
01758 MyTime.start() ;
01759 #endif
01760 int32_t clear_size = CMath::min(howmuchlookback,seqlen) ;
01761
01762 if (!loss.num_segment_id)
01763 {
01764 loss.segments_changed = new int32_t[seqlen] ;
01765 loss.num_segment_id = new float64_t[(max_a_id+1)*seqlen] ;
01766 loss.length_segment_id = new int32_t[(max_a_id+1)*seqlen] ;
01767
01768 clear_size = seqlen ;
01769 }
01770
01771 for (int32_t j=0; j<clear_size; j++)
01772 {
01773 loss.segments_changed[j]=0 ;
01774 for (int32_t i=0; i<max_a_id+1; i++)
01775 {
01776 loss.num_segment_id[i*seqlen+j] = 0;
01777 loss.length_segment_id[i*seqlen+j] = 0;
01778 }
01779 }
01780
01781 loss.maxlookback = howmuchlookback ;
01782 loss.seqlen = seqlen;
01783
01784 #ifdef DYNPROG_TIMING
01785 MyTime.stop() ;
01786 segment_init_time += MyTime.time_diff_sec() ;
01787 #endif
01788 }
01789
01790 void CDynProg::clear_segment_loss(struct segment_loss_struct & loss)
01791 {
01792 #ifdef DYNPROG_TIMING
01793 MyTime.start() ;
01794 #endif
01795
01796 if (loss.num_segment_id != NULL)
01797 {
01798 delete[] loss.segments_changed ;
01799 delete[] loss.num_segment_id ;
01800 delete[] loss.length_segment_id ;
01801 loss.segments_changed = NULL ;
01802 loss.num_segment_id = NULL ;
01803 loss.length_segment_id = NULL ;
01804 }
01805 #ifdef DYNPROG_TIMING
01806 MyTime.stop() ;
01807 segment_clean_time += MyTime.time_diff_sec() ;
01808 #endif
01809 }
01810
01811 float64_t CDynProg::extend_segment_loss(
01812 struct segment_loss_struct & loss, const int32_t * pos_array,
01813 int32_t segment_id, int32_t pos, int32_t & last_pos, float64_t &last_value)
01814 {
01815 #ifdef DYNPROG_TIMING
01816 MyTime.start() ;
01817 #endif
01818
01819 if (pos==last_pos)
01820 return last_value ;
01821 ASSERT(pos<last_pos) ;
01822
01823 last_pos-- ;
01824 bool changed = false ;
01825 while (last_pos>=pos)
01826 {
01827 if (loss.segments_changed[last_pos])
01828 {
01829 changed=true ;
01830 break ;
01831 }
01832 last_pos-- ;
01833 }
01834 if (last_pos<pos)
01835 last_pos = pos ;
01836
01837 if (!changed)
01838 {
01839 ASSERT(last_pos>=0) ;
01840 ASSERT(last_pos<loss.seqlen) ;
01841 float64_t length_contrib = (pos_array[last_pos]-pos_array[pos])*m_segment_loss.element(m_segment_ids.element(pos), segment_id, 1) ;
01842 float64_t ret = last_value + length_contrib ;
01843 last_pos = pos ;
01844 return ret ;
01845 }
01846
01847 CArray2<float64_t> num_segment_id(loss.num_segment_id, loss.seqlen, max_a_id+1, false, false) ;
01848 CArray2<int32_t> length_segment_id(loss.length_segment_id, loss.seqlen, max_a_id+1, false, false) ;
01849 float64_t ret = 0.0 ;
01850 for (int32_t i=0; i<max_a_id+1; i++)
01851 {
01852
01853
01854 if (num_segment_id.element(pos, i)!=0)
01855 {
01856 ret += num_segment_id.element(pos, i)*m_segment_loss.element(i, segment_id, 0) ;
01857
01858
01859
01860
01861
01862
01863
01864 }
01865 if (length_segment_id.element(pos, i)!=0)
01866 ret += length_segment_id.element(pos, i)*m_segment_loss.element(i, segment_id, 1) ;
01867 }
01868 last_pos = pos ;
01869 last_value = ret ;
01870
01871 #ifdef DYNPROG_TIMING
01872 MyTime.stop() ;
01873 segment_extend_time += MyTime.time_diff_sec() ;
01874 #endif
01875 return ret ;
01876 }
01877
01878 void CDynProg::find_segment_loss_till_pos(
01879 const int32_t * pos, int32_t t_end, CArray<int32_t>& segment_ids,
01880 CArray<float64_t>& segment_mask, struct segment_loss_struct & loss)
01881 {
01882 #ifdef DYNPROG_TIMING
01883 MyTime.start() ;
01884 #endif
01885 CArray2<float64_t> num_segment_id(loss.num_segment_id, loss.seqlen, max_a_id+1, false, false) ;
01886 CArray2<int32_t> length_segment_id(loss.length_segment_id, loss.seqlen, max_a_id+1, false, false) ;
01887
01888 for (int32_t i=0; i<max_a_id+1; i++)
01889 {
01890 num_segment_id.element(t_end, i) = 0 ;
01891 length_segment_id.element(t_end, i) = 0 ;
01892 }
01893 int32_t wobble_pos_segment_id_switch = 0 ;
01894 int32_t last_segment_id = -1 ;
01895 int32_t ts = t_end-1 ;
01896 while ((ts>=0) && (pos[t_end] - pos[ts] <= loss.maxlookback))
01897 {
01898 int32_t cur_segment_id = segment_ids.element(ts) ;
01899
01900 bool wobble_pos = (CMath::abs(segment_mask.element(ts))<1e-7) && (wobble_pos_segment_id_switch==0) ;
01901 if (!(cur_segment_id<=max_a_id))
01902 SG_ERROR("(cur_segment_id<=max_a_id), cur_segment_id:%i max_a_id:%i\n",cur_segment_id,max_a_id);
01903 ASSERT(cur_segment_id>=0) ;
01904
01905 for (int32_t i=0; i<max_a_id+1; i++)
01906 {
01907 num_segment_id.element(ts, i) = num_segment_id.element(ts+1, i) ;
01908 length_segment_id.element(ts, i) = length_segment_id.element(ts+1, i) ;
01909 }
01910
01911 if (cur_segment_id!=last_segment_id)
01912 {
01913 if (wobble_pos)
01914 {
01915
01916 wobble_pos_segment_id_switch++ ;
01917
01918 }
01919 else
01920 {
01921
01922 loss.segments_changed[ts] = true ;
01923 num_segment_id.element(ts, cur_segment_id) += segment_mask.element(ts);
01924 length_segment_id.element(ts, cur_segment_id) += (int32_t)((pos[ts+1]-pos[ts])*segment_mask.element(ts));
01925 wobble_pos_segment_id_switch = 0 ;
01926 }
01927 last_segment_id = cur_segment_id ;
01928 }
01929 else
01930 if (!wobble_pos)
01931 length_segment_id.element(ts, cur_segment_id) += pos[ts+1] - pos[ts] ;
01932
01933 ts--;
01934 }
01935 #ifdef DYNPROG_TIMING
01936 MyTime.stop() ;
01937 segment_pos_time += MyTime.time_diff_sec() ;
01938 #endif
01939 }
01940
01941 void CDynProg::init_svm_values(
01942 struct svm_values_struct & svs, int32_t start_pos, int32_t seqlen,
01943 int32_t maxlookback)
01944 {
01945 #ifdef DYNPROG_TIMING
01946 MyTime.start() ;
01947 #endif
01948
01949
01950
01951
01952
01953
01954
01955
01956 int32_t clear_size = CMath::min(maxlookback,seqlen) ;
01957
01958 if (!svs.svm_values)
01959 {
01960 svs.svm_values = new float64_t[seqlen*num_svms] ;
01961 svs.num_unique_words = new int32_t*[num_degrees] ;
01962 svs.svm_values_unnormalized = new float64_t*[num_degrees] ;
01963 svs.word_used = new bool**[num_degrees] ;
01964 for (int32_t j=0; j<num_degrees; j++)
01965 {
01966 svs.word_used[j] = new bool*[num_svms] ;
01967 for (int32_t s=0; s<num_svms; s++)
01968 svs.word_used[j][s] = new bool[num_words_array[j]] ;
01969 }
01970 for (int32_t j=0; j<num_degrees; j++)
01971 {
01972 svs.svm_values_unnormalized[j] = new float64_t[num_svms] ;
01973 svs.num_unique_words[j] = new int32_t[num_svms] ;
01974 }
01975 svs.start_pos = new int32_t[num_svms] ;
01976 clear_size = seqlen ;
01977 }
01978
01979
01980
01981 memset(svs.svm_values, 0, clear_size*num_svms*sizeof(float64_t)) ;
01982
01983 for (int32_t j=0; j<num_degrees; j++)
01984 {
01985
01986
01987 memset(svs.svm_values_unnormalized[j], 0, num_svms*sizeof(float64_t)) ;
01988
01989
01990
01991 memset(svs.num_unique_words[j], 0, num_svms*sizeof(int32_t)) ;
01992 }
01993
01994 for (int32_t j=0; j<num_degrees; j++)
01995 for (int32_t s=0; s<num_svms; s++)
01996 {
01997
01998
01999 memset(svs.word_used[j][s], 0, num_words_array[j]*sizeof(bool)) ;
02000 }
02001
02002 for (int32_t s=0; s<num_svms; s++)
02003 svs.start_pos[s] = start_pos - mod_words.element(s,1) ;
02004
02005 svs.maxlookback = maxlookback ;
02006 svs.seqlen = seqlen ;
02007
02008 #ifdef DYNPROG_TIMING
02009 MyTime.stop() ;
02010 svm_init_time += MyTime.time_diff_sec() ;
02011 #endif
02012 }
02013
02014 void CDynProg::clear_svm_values(struct svm_values_struct & svs)
02015 {
02016 #ifdef DYNPROG_TIMING
02017 MyTime.start() ;
02018 #endif
02019 if (NULL != svs.svm_values)
02020 {
02021 for (int32_t j=0; j<num_degrees; j++)
02022 {
02023 for (int32_t s=0; s<num_svms; s++)
02024 delete[] svs.word_used[j][s] ;
02025 delete[] svs.word_used[j];
02026 }
02027 delete[] svs.word_used;
02028
02029 for (int32_t j=0; j<num_degrees; j++)
02030 delete[] svs.svm_values_unnormalized[j] ;
02031 for (int32_t j=0; j<num_degrees; j++)
02032 delete[] svs.num_unique_words[j] ;
02033
02034 delete[] svs.svm_values_unnormalized;
02035 delete[] svs.svm_values;
02036 delete[] svs.num_unique_words ;
02037
02038 svs.word_used=NULL ;
02039 svs.svm_values=NULL ;
02040 svs.svm_values_unnormalized=NULL ;
02041 }
02042
02043 #ifdef DYNPROG_TIMING
02044 MyTime.stop() ;
02045 svm_clean_time += MyTime.time_diff_sec() ;
02046 #endif
02047 }
02048
02049
02050 void CDynProg::find_svm_values_till_pos(
02051 uint16_t*** wordstr, const int32_t *pos, int32_t t_end,
02052 struct svm_values_struct &svs)
02053 {
02054 #ifdef DYNPROG_TIMING
02055 MyTime.start() ;
02056 #endif
02057
02058
02059
02060
02061
02062
02063
02064
02065
02066
02067
02068
02069
02070
02071
02072
02073 for (int32_t j=0; j<num_degrees; j++)
02074
02075 {
02076 int32_t plen = 1;
02077 int32_t ts = t_end-1;
02078 int32_t offset;
02079
02080 int32_t posprev = pos[t_end]-word_degree[j]+1;
02081 int32_t poscurrent = pos[ts];
02082
02083
02084
02085 if (poscurrent<0)
02086 poscurrent = 0;
02087 float64_t * my_svm_values_unnormalized = svs.svm_values_unnormalized[j] ;
02088 int32_t * my_num_unique_words = svs.num_unique_words[j] ;
02089 bool ** my_word_used = svs.word_used[j] ;
02090
02091 int32_t len = pos[t_end] - poscurrent;
02092 while ((ts>=0) && (len <= svs.maxlookback))
02093 {
02094 for (int32_t i=posprev-1 ; (i>=poscurrent) && (i>=0) ; i--)
02095 {
02096
02097
02098 uint16_t word = wordstr[string_words_array[0]][j][i] ;
02099 int32_t last_string = string_words_array[0] ;
02100 for (int32_t s=0; s<num_svms; s++)
02101 {
02102
02103
02104 if (last_string != string_words_array[s])
02105 {
02106 last_string = string_words_array[s] ;
02107 word = wordstr[last_string][j][i] ;
02108 }
02109
02110
02111 if (sign_words_array[s] && my_word_used[s][word])
02112 continue ;
02113
02114
02115
02116 {
02117 my_svm_values_unnormalized[s] += dict_weights_array[(word+cum_num_words_array[j])+s*cum_num_words_array[num_degrees]] ;
02118
02119 my_num_unique_words[s]++ ;
02120 if (sign_words_array[s])
02121 my_word_used[s][word]=true ;
02122 }
02123 }
02124 }
02125 offset = plen*num_svms ;
02126 for (int32_t s=0; s<num_svms; s++)
02127 {
02128 float64_t normalization_factor = 1.0;
02129 if (my_num_unique_words[s] > 0)
02130 {
02131 if (sign_words_array[s])
02132 normalization_factor = sqrt((float64_t)my_num_unique_words[s]);
02133 else
02134 normalization_factor = (float64_t)my_num_unique_words[s];
02135 }
02136
02137 if (j==0)
02138 svs.svm_values[offset+s]=0 ;
02139 svs.svm_values[offset+s] += my_svm_values_unnormalized[s] / normalization_factor;
02140 }
02141
02142 if (posprev > poscurrent)
02143 posprev = poscurrent;
02144
02145 ts--;
02146 plen++;
02147
02148 if (ts>=0)
02149 {
02150 poscurrent=pos[ts];
02151 if (poscurrent<0)
02152 poscurrent = 0;
02153 len = pos[t_end] - poscurrent;
02154 }
02155 }
02156 }
02157
02158 #ifdef DYNPROG_TIMING
02159 MyTime.stop() ;
02160 svm_pos_time += MyTime.time_diff_sec() ;
02161 #endif
02162 }
02163
02164
02165 void CDynProg::find_svm_values_till_pos(
02166 uint16_t** wordstr, const int32_t *pos, int32_t t_end,
02167 struct svm_values_struct &svs)
02168 {
02169 #ifdef DYNPROG_TIMING
02170 MyTime.start() ;
02171 #endif
02172
02173
02174
02175
02176
02177
02178
02179
02180
02181
02182
02183
02184
02185
02186
02187 for (int32_t j=0; j<num_degrees; j++)
02188
02189 {
02190 int32_t plen = 1;
02191 int32_t ts = t_end-1;
02192 int32_t offset;
02193
02194 int32_t posprev = pos[t_end]-word_degree[j]+1;
02195 int32_t poscurrent = pos[ts];
02196
02197
02198
02199 if (poscurrent<0)
02200 poscurrent = 0;
02201 float64_t * my_svm_values_unnormalized = svs.svm_values_unnormalized[j] ;
02202 int32_t * my_num_unique_words = svs.num_unique_words[j] ;
02203 bool ** my_word_used = svs.word_used[j] ;
02204
02205 int32_t len = pos[t_end] - poscurrent;
02206 while ((ts>=0) && (len <= svs.maxlookback))
02207 {
02208 for (int32_t i=posprev-1 ; (i>=poscurrent) && (i>=0) ; i--)
02209 {
02210
02211
02212 uint16_t word = wordstr[j][i] ;
02213 for (int32_t s=0; s<num_svms; s++)
02214 {
02215
02216
02217 if (sign_words_array[s] && my_word_used[s][word])
02218 continue ;
02219
02220
02221
02222 {
02223 my_svm_values_unnormalized[s] += dict_weights_array[(word+cum_num_words_array[j])+s*cum_num_words_array[num_degrees]] ;
02224
02225 my_num_unique_words[s]++ ;
02226 if (sign_words_array[s])
02227 my_word_used[s][word]=true ;
02228 }
02229 }
02230 }
02231 offset = plen*num_svms ;
02232 for (int32_t s=0; s<num_svms; s++)
02233 {
02234 float64_t normalization_factor = 1.0;
02235 if (my_num_unique_words[s] > 0)
02236 {
02237 if (sign_words_array[s])
02238 normalization_factor = sqrt((float64_t)my_num_unique_words[s]);
02239 else
02240 normalization_factor = (float64_t)my_num_unique_words[s];
02241 }
02242
02243 if (j==0)
02244 svs.svm_values[offset+s]=0 ;
02245 svs.svm_values[offset+s] += my_svm_values_unnormalized[s] / normalization_factor;
02246 }
02247
02248 if (posprev > poscurrent)
02249 posprev = poscurrent;
02250
02251 ts--;
02252 plen++;
02253
02254 if (ts>=0)
02255 {
02256 poscurrent=pos[ts];
02257 if (poscurrent<0)
02258 poscurrent = 0;
02259 len = pos[t_end] - poscurrent;
02260 }
02261 }
02262 }
02263
02264 #ifdef DYNPROG_TIMING
02265 MyTime.stop() ;
02266 svm_pos_time += MyTime.time_diff_sec() ;
02267 #endif
02268 }
02269
02270 bool CDynProg::extend_orf(
02271 int32_t orf_from, int32_t orf_to, int32_t start, int32_t &last_pos,
02272 int32_t to)
02273 {
02274 #ifdef DYNPROG_TIMING
02275 MyTime.start() ;
02276 #endif
02277
02278 if (start<0)
02279 start=0 ;
02280 if (to<0)
02281 to=0 ;
02282
02283 int32_t orf_target = orf_to-orf_from ;
02284 if (orf_target<0) orf_target+=3 ;
02285
02286 int32_t pos ;
02287 if (last_pos==to)
02288 pos = to-orf_to-3 ;
02289 else
02290 pos=last_pos ;
02291
02292 if (pos<0)
02293 return true ;
02294
02295 for (; pos>=start; pos-=3)
02296 if (m_genestr_stop[pos])
02297 return false ;
02298
02299 last_pos = CMath::min(pos+3,to-orf_to-3) ;
02300
02301 #ifdef DYNPROG_TIMING
02302 MyTime.stop() ;
02303 orf_time += MyTime.time_diff_sec() ;
02304 #endif
02305 return true ;
02306 }
02307
02308 template <int16_t nbest, bool with_loss, bool with_multiple_sequences>
02309 void CDynProg::best_path_trans(
02310 const float64_t *seq_array, int32_t seq_len, const int32_t *pos,
02311 const int32_t *orf_info_array, CPlifBase **Plif_matrix,
02312 CPlifBase **Plif_state_signals, int32_t max_num_signals,
02313 int32_t genestr_num, float64_t *prob_nbest, int32_t *my_state_seq,
02314 int32_t *my_pos_seq, bool use_orf)
02315 {
02316 #ifdef DYNPROG_TIMING
02317 segment_init_time = 0.0 ;
02318 segment_pos_time = 0.0 ;
02319 segment_extend_time = 0.0 ;
02320 segment_clean_time = 0.0 ;
02321 orf_time = 0.0 ;
02322 svm_init_time = 0.0 ;
02323 svm_pos_time = 0.0 ;
02324 svm_clean_time = 0.0 ;
02325 MyTime2.start() ;
02326 #endif
02327
02328
02329 if (!svm_arrays_clean)
02330 {
02331 SG_ERROR( "SVM arrays not clean") ;
02332 return ;
02333 }
02334
02335 #ifdef DYNPROG_DEBUG
02336 transition_matrix_a.set_name("transition_matrix");
02337 transition_matrix_a.display_array();
02338 mod_words.display_array() ;
02339 sign_words.display_array() ;
02340 string_words.display_array() ;
02341 fprintf(stderr, "use_orf = %i\n", use_orf) ;
02342 #endif
02343
02344 int32_t max_look_back = 20000 ;
02345 bool use_svm = false ;
02346
02347
02348
02349
02350 SG_PRINT("N:%i, seq_len:%i, max_num_signals:%i\n",N, seq_len, max_num_signals) ;
02351
02352
02353
02354
02355
02356 CArray2<CPlifBase*> PEN(Plif_matrix, N, N, false, true) ;
02357 PEN.set_name("PEN");
02358
02359 CArray2<CPlifBase*> PEN_state_signals(Plif_state_signals, N, max_num_signals, false, true) ;
02360 PEN_state_signals.set_name("state_signals");
02361
02362 CArray3<float64_t> seq_input(seq_array, N, seq_len, max_num_signals) ;
02363 seq_input.set_name("seq_input") ;
02364
02365 CArray2<float64_t> seq(N, seq_len) ;
02366 seq.set_name("seq") ;
02367 seq.zero() ;
02368
02369 CArray2<int32_t> orf_info(orf_info_array, N, 2) ;
02370 orf_info.set_name("orf_info") ;
02371
02372
02373
02374 float64_t svm_value[2*num_svms] ;
02375 {
02376 for (int32_t s=0; s<2*num_svms; s++)
02377 svm_value[s]=0 ;
02378 }
02379
02380 {
02381
02382 for (int32_t i=0; i<N; i++)
02383 for (int32_t j=0; j<seq_len; j++)
02384 seq.element(i,j) = 0 ;
02385
02386 for (int32_t i=0; i<N; i++)
02387 for (int32_t j=0; j<seq_len; j++)
02388 for (int32_t k=0; k<max_num_signals; k++)
02389 {
02390 if ((PEN_state_signals.element(i,k)==NULL) && (k==0))
02391 {
02392
02393 seq.element(i,j) = seq_input.element(i,j,k) ;
02394 break ;
02395 }
02396 if (PEN_state_signals.element(i,k)!=NULL)
02397 {
02398
02399 if (finite(seq_input.element(i,j,k)))
02400 seq.element(i,j) += PEN_state_signals.element(i,k)->lookup_penalty(seq_input.element(i,j,k), svm_value) ;
02401 else
02402
02403 seq.element(i,j) = seq_input.element(i, j, k) ;
02404 }
02405 else
02406 break ;
02407 }
02408 }
02409
02410
02411 {
02412 for (int32_t i=0; i<N; i++)
02413 {
02414
02415 const T_STATES num_elem = trans_list_forward_cnt[i] ;
02416 const T_STATES *elem_list = trans_list_forward[i] ;
02417
02418 for (int32_t jj=0; jj<num_elem; jj++)
02419 {
02420 T_STATES j = elem_list[jj] ;
02421
02422 CPlifBase *penij=PEN.element(i,j) ;
02423 if (penij==NULL)
02424 continue ;
02425 if (penij->get_max_value()>max_look_back)
02426 {
02427 SG_DEBUG( "%d %d -> value: %f\n", i,j,penij->get_max_value());
02428
02429 max_look_back=(int32_t) (CMath::ceil(penij->get_max_value()));
02430 }
02431 if (penij->uses_svm_values())
02432 use_svm=true ;
02433 }
02434 }
02435 }
02436
02437
02438 max_look_back = CMath::min(m_genestr_len, max_look_back) ;
02439 SG_DEBUG("use_svm=%i\n", use_svm) ;
02440
02441 SG_DEBUG("maxlook: %d N: %d nbest: %d \n", max_look_back, N, nbest);
02442 const int32_t look_back_buflen = (max_look_back*N+1)*nbest ;
02443 SG_DEBUG("look_back_buflen=%i\n", look_back_buflen) ;
02444 const float64_t mem_use = (float64_t)(seq_len*N*nbest*(sizeof(T_STATES)+sizeof(int16_t)+sizeof(int32_t))+
02445 look_back_buflen*(2*sizeof(float64_t)+sizeof(int32_t))+
02446 seq_len*(sizeof(T_STATES)+sizeof(int32_t))+
02447 m_genestr_len*sizeof(bool))/(1024*1024);
02448
02449 bool is_big = (mem_use>200) || (seq_len>5000) ;
02450
02451 if (1)
02452 {
02453 SG_DEBUG("calling best_path_trans: seq_len=%i, N=%i, lookback=%i nbest=%i\n",
02454 seq_len, N, max_look_back, nbest) ;
02455 SG_DEBUG("allocating %1.2fMB of memory\n",
02456 mem_use) ;
02457 }
02458 ASSERT(nbest<32000) ;
02459
02460
02461
02462 CArray3<float64_t> delta(seq_len, N, nbest) ;
02463 delta.set_name("delta");
02464 float64_t* delta_array = delta.get_array() ;
02465
02466
02467 CArray3<T_STATES> psi(seq_len, N, nbest) ;
02468 psi.set_name("psi");
02469
02470
02471 CArray3<int16_t> ktable(seq_len, N, nbest) ;
02472 ktable.set_name("ktable");
02473
02474
02475 CArray3<int32_t> ptable(seq_len, N, nbest) ;
02476 ptable.set_name("ptable");
02477
02478
02479 CArray<float64_t> delta_end(nbest) ;
02480 delta_end.set_name("delta_end");
02481
02482
02483 CArray<T_STATES> path_ends(nbest) ;
02484 path_ends.set_name("path_ends");
02485
02486
02487 CArray<int16_t> ktable_end(nbest) ;
02488 ktable_end.set_name("ktable_end");
02489
02490
02491 float64_t * fixedtempvv=new float64_t[look_back_buflen] ;
02492 memset(fixedtempvv, 0, look_back_buflen*sizeof(float64_t)) ;
02493 int32_t * fixedtempii=new int32_t[look_back_buflen] ;
02494 memset(fixedtempii, 0, look_back_buflen*sizeof(int32_t)) ;
02495
02496 CArray<float64_t> oldtempvv(look_back_buflen) ;
02497 oldtempvv.set_name("oldtempvv");
02498 CArray<float64_t> oldtempvv2(look_back_buflen) ;
02499 oldtempvv2.set_name("oldtempvv2");
02500
02501
02502
02503 CArray<int32_t> oldtempii(look_back_buflen) ;
02504 oldtempii.set_name("oldtempii");
02505 CArray<int32_t> oldtempii2(look_back_buflen) ;
02506 oldtempii2.set_name("oldtempii2");
02507
02508
02509 CArray<T_STATES> state_seq(seq_len) ;
02510 state_seq.set_name("state_seq");
02511
02512
02513 CArray<int32_t> pos_seq(seq_len) ;
02514 pos_seq.set_name("pos_seq");
02515
02516
02517
02518
02519 word_degree.set_name("word_degree") ;
02520 cum_num_words.set_name("cum_num_words") ;
02521 num_words.set_name("num_words") ;
02522
02523
02524 svm_pos_start.set_name("svm_pos_start") ;
02525 num_unique_words.set_name("num_unique_words") ;
02526
02527 PEN.set_name("PEN") ;
02528 seq.set_name("seq") ;
02529 orf_info.set_name("orf_info") ;
02530
02531 delta.set_name("delta") ;
02532 psi.set_name("psi") ;
02533 ktable.set_name("ktable") ;
02534 ptable.set_name("ptable") ;
02535 delta_end.set_name("delta_end") ;
02536 path_ends.set_name("path_ends") ;
02537 ktable_end.set_name("ktable_end") ;
02538
02539 #ifdef USE_TMP_ARRAYCLASS
02540 fixedtempvv.set_name("fixedtempvv") ;
02541 fixedtempii.set_name("fixedtempvv") ;
02542 #endif
02543
02544 oldtempvv.set_name("oldtempvv") ;
02545 oldtempvv2.set_name("oldtempvv2") ;
02546 oldtempii.set_name("oldtempii") ;
02547 oldtempii2.set_name("oldtempii2") ;
02548
02549
02551
02552 #ifdef DYNPROG_DEBUG
02553 state_seq.display_size() ;
02554 pos_seq.display_size() ;
02555
02556 dict_weights.display_size() ;
02557 word_degree.display_array() ;
02558 cum_num_words.display_array() ;
02559 num_words.display_array() ;
02560
02561
02562 svm_pos_start.display_array() ;
02563 num_unique_words.display_array() ;
02564
02565 PEN.display_size() ;
02566 PEN_state_signals.display_size() ;
02567 seq.display_size() ;
02568 orf_info.display_size() ;
02569
02570
02571 delta.display_size() ;
02572 psi.display_size() ;
02573 ktable.display_size() ;
02574 ptable.display_size() ;
02575 delta_end.display_size() ;
02576 path_ends.display_size() ;
02577 ktable_end.display_size() ;
02578
02579 #ifdef USE_TMP_ARRAYCLASS
02580 fixedtempvv.display_size() ;
02581 fixedtempii.display_size() ;
02582 #endif
02583
02584
02585
02586
02587 state_seq.display_size() ;
02588 pos_seq.display_size() ;
02589
02590 CArray<int32_t>pp = CArray<int32_t>(pos, seq_len) ;
02591 pp.set_name("pp");
02592 pp.display_array() ;
02593
02594
02595
02596
02597 #endif //DYNPROG_DEBUG
02598
02600
02601
02602
02603 {
02604 for (int32_t s=0; s<num_svms; s++)
02605 ASSERT(string_words_array[s]<genestr_num) ;
02606 }
02607
02608
02609
02610
02611
02612 {
02613
02614 for (T_STATES i=0; i<N; i++)
02615 {
02616
02617 delta.element(delta_array, 0, i, 0, seq_len, N) = get_p(i) + seq.element(i,0) ;
02618 psi.element(0,i,0) = 0 ;
02619 if (nbest>1)
02620 ktable.element(0,i,0) = 0 ;
02621 ptable.element(0,i,0) = 0 ;
02622 for (int16_t k=1; k<nbest; k++)
02623 {
02624 int32_t dim1, dim2, dim3 ;
02625 delta.get_array_size(dim1, dim2, dim3) ;
02626
02627
02628 delta.element(delta_array, 0, i, k, seq_len, N) = -CMath::INFTY ;
02629 psi.element(0,i,0) = 0 ;
02630 if (nbest>1)
02631 ktable.element(0,i,k) = 0 ;
02632 ptable.element(0,i,k) = 0 ;
02633 }
02634
02635
02636
02637
02638
02639
02640
02641
02642
02643
02644
02645
02646
02647
02648
02649
02650
02651
02652 }
02653 }
02654
02655 struct svm_values_struct svs;
02656 svs.num_unique_words = NULL;
02657 svs.svm_values = NULL;
02658 svs.svm_values_unnormalized = NULL;
02659 svs.word_used = NULL;
02660
02661 struct segment_loss_struct loss;
02662 loss.segments_changed = NULL;
02663 loss.num_segment_id = NULL;
02664
02665 SG_DEBUG("START_RECURSION \n\n");
02666
02667
02668 for (int32_t t=1; t<seq_len; t++)
02669 {
02670 if (is_big && t%(1+(seq_len/1000))==1)
02671 SG_PROGRESS(t, 0, seq_len);
02672
02673
02674
02675
02676
02677
02678
02679
02680 if (with_loss)
02681 {
02682 init_segment_loss(loss, seq_len, max_look_back);
02683 find_segment_loss_till_pos(pos, t, m_segment_ids, m_segment_mask, loss);
02684 }
02685
02686 for (T_STATES j=0; j<N; j++)
02687 {
02688 if (seq.element(j,t)<=-1e20)
02689 {
02690 for (int16_t k=0; k<nbest; k++)
02691 {
02692 delta.element(delta_array, t, j, k, seq_len, N) = seq.element(j,t) ;
02693 psi.element(t,j,k) = 0 ;
02694 if (nbest>1)
02695 ktable.element(t,j,k) = 0 ;
02696 ptable.element(t,j,k) = 0 ;
02697 }
02698 }
02699 else
02700 {
02701 const T_STATES num_elem = trans_list_forward_cnt[j] ;
02702 const T_STATES *elem_list = trans_list_forward[j] ;
02703 const float64_t *elem_val = trans_list_forward_val[j] ;
02704 const int32_t *elem_id = trans_list_forward_id[j] ;
02705
02706 int32_t fixed_list_len = 0 ;
02707 float64_t fixedtempvv_ = CMath::INFTY ;
02708 int32_t fixedtempii_ = 0 ;
02709
02710
02711 for (int32_t i=0; i<num_elem; i++)
02712 {
02713 T_STATES ii = elem_list[i] ;
02714
02715 const CPlifBase * penalty = PEN.element(j,ii) ;
02716 int32_t look_back = max_look_back ;
02717 {
02718 CPlifBase *pen = (CPlifBase*) penalty ;
02719 if (pen!=NULL)
02720 look_back=(int32_t) (CMath::ceil(pen->get_max_value()));
02721 if (look_back>=1e6)
02722 fprintf(stderr, "%i,%i -> %d from %ld\n", j, ii, look_back, (long)pen) ;
02723 ASSERT(look_back<1e6);
02724 }
02725
02726
02727
02728
02729
02730
02731 int32_t orf_from = orf_info.element(ii,0) ;
02732 int32_t orf_to = orf_info.element(j,1) ;
02733 if((orf_from!=-1)!=(orf_to!=-1))
02734 SG_DEBUG("j=%i ii=%i orf_from=%i orf_to=%i p=%1.2f\n", j, ii, orf_from, orf_to, elem_val[i]) ;
02735 ASSERT((orf_from!=-1)==(orf_to!=-1)) ;
02736
02737 int32_t orf_target = -1 ;
02738 if (orf_from!=-1)
02739 {
02740 orf_target=orf_to-orf_from ;
02741 if (orf_target<0)
02742 orf_target+=3 ;
02743 ASSERT(orf_target>=0 && orf_target<3) ;
02744 }
02745
02746 int32_t orf_last_pos = pos[t] ;
02747 int32_t loss_last_pos = t ;
02748 float64_t last_loss = 0.0 ;
02749
02750 for (int32_t ts=t-1; ts>=0 && pos[t]-pos[ts]<=look_back; ts--)
02751 {
02752 bool ok ;
02753
02754
02755
02756
02757
02758
02759
02760
02761
02762 if (orf_target==-1)
02763 ok=true ;
02764 else if (pos[ts]!=-1 && (pos[t]-pos[ts])%3==orf_target)
02765 ok=(!use_orf) || extend_orf(orf_from, orf_to, pos[ts], orf_last_pos, pos[t]) ;
02766 else
02767 ok=false ;
02768
02769 if (ok)
02770 {
02771
02772 float64_t segment_loss = 0.0 ;
02773 if (with_loss)
02774 segment_loss = extend_segment_loss(loss, pos, elem_id[i], ts, loss_last_pos, last_loss) ;
02775
02777
02779 int32_t frame = orf_info.element(ii,0);
02780 lookup_content_svm_values(ts, t, pos[ts], pos[t], svm_value, frame);
02781 if (m_use_tiling)
02782 lookup_tiling_plif_values(ts, t, pos[t]-pos[ts], svm_value);
02783
02784
02785
02786
02787
02788
02789
02790
02791
02792
02793 float64_t pen_val = 0.0 ;
02794 if (penalty)
02795 pen_val = penalty->lookup_penalty(pos[t]-pos[ts], svm_value) ;
02796 if (nbest==1)
02797 {
02798 float64_t val = elem_val[i] + pen_val ;
02799 if (with_loss)
02800 val += segment_loss ;
02801
02802 float64_t mval = -(val + delta.element(delta_array, ts, ii, 0, seq_len, N)) ;
02803 if (mval<fixedtempvv_)
02804 {
02805 fixedtempvv_ = mval ;
02806 fixedtempii_ = ii + ts*N;
02807 fixed_list_len = 1 ;
02808 }
02809 }
02810 else
02811 {
02812 for (int16_t diff=0; diff<nbest; diff++)
02813 {
02814 float64_t val = elem_val[i] ;
02815 val += pen_val ;
02816 if (with_loss)
02817 val += segment_loss ;
02818
02819 float64_t mval = -(val + delta.element(delta_array, ts, ii, diff, seq_len, N)) ;
02820
02821
02822
02823
02824
02825 if ((fixed_list_len < nbest) || ((0==fixed_list_len) || (mval < fixedtempvv[fixed_list_len-1])))
02826 {
02827 if ( (fixed_list_len<nbest) && ((0==fixed_list_len) || (mval>fixedtempvv[fixed_list_len-1])) )
02828 {
02829 fixedtempvv[fixed_list_len] = mval ;
02830 fixedtempii[fixed_list_len] = ii + diff*N + ts*N*nbest;
02831 fixed_list_len++ ;
02832 }
02833 else
02834 {
02835 int32_t addhere = fixed_list_len;
02836 while ((addhere > 0) && (mval < fixedtempvv[addhere-1]))
02837 addhere--;
02838
02839
02840 for (int32_t jj=fixed_list_len-1; jj>addhere; jj--)
02841 {
02842 fixedtempvv[jj] = fixedtempvv[jj-1];
02843 fixedtempii[jj] = fixedtempii[jj-1];
02844 }
02845
02846 fixedtempvv[addhere] = mval;
02847 fixedtempii[addhere] = ii + diff*N + ts*N*nbest;
02848
02849 if (fixed_list_len < nbest)
02850 fixed_list_len++;
02851 }
02852 }
02853 }
02854 }
02855 }
02856 }
02857 }
02858
02859
02860 int32_t numEnt = fixed_list_len;
02861
02862 float64_t minusscore;
02863 int64_t fromtjk;
02864
02865 for (int16_t k=0; k<nbest; k++)
02866 {
02867 if (k<numEnt)
02868 {
02869 if (nbest==1)
02870 {
02871 minusscore = fixedtempvv_ ;
02872 fromtjk = fixedtempii_ ;
02873 }
02874 else
02875 {
02876 minusscore = fixedtempvv[k];
02877 fromtjk = fixedtempii[k];
02878 }
02879
02880 delta.element(delta_array, t, j, k, seq_len, N) = -minusscore + seq.element(j,t);
02881 psi.element(t,j,k) = (fromtjk%N) ;
02882 if (nbest>1)
02883 ktable.element(t,j,k) = (fromtjk%(N*nbest)-psi.element(t,j,k))/N ;
02884 ptable.element(t,j,k) = (fromtjk-(fromtjk%(N*nbest)))/(N*nbest) ;
02885 }
02886 else
02887 {
02888 delta.element(delta_array, t, j, k, seq_len, N) = -CMath::INFTY ;
02889 psi.element(t,j,k) = 0 ;
02890 if (nbest>1)
02891 ktable.element(t,j,k) = 0 ;
02892 ptable.element(t,j,k) = 0 ;
02893 }
02894 }
02895 }
02896 }
02897 }
02898
02899 clear_svm_values(svs);
02900 if (with_loss)
02901 clear_segment_loss(loss);
02902
02903 {
02904 int32_t list_len = 0 ;
02905 for (int16_t diff=0; diff<nbest; diff++)
02906 {
02907 for (T_STATES i=0; i<N; i++)
02908 {
02909 oldtempvv[list_len] = -(delta.element(delta_array, (seq_len-1), i, diff, seq_len, N)+get_q(i)) ;
02910 oldtempii[list_len] = i + diff*N ;
02911 list_len++ ;
02912 }
02913 }
02914
02915 CMath::nmin(oldtempvv.get_array(), oldtempii.get_array(), list_len, nbest) ;
02916
02917 for (int16_t k=0; k<nbest; k++)
02918 {
02919 delta_end.element(k) = -oldtempvv[k] ;
02920 path_ends.element(k) = (oldtempii[k]%N) ;
02921 if (nbest>1)
02922 ktable_end.element(k) = (oldtempii[k]-path_ends.element(k))/N ;
02923 }
02924 }
02925
02926 {
02927 for (int16_t k=0; k<nbest; k++)
02928 {
02929 prob_nbest[k]= delta_end.element(k) ;
02930
02931 int32_t i = 0 ;
02932 state_seq[i] = path_ends.element(k) ;
02933 int16_t q = 0 ;
02934 if (nbest>1)
02935 q=ktable_end.element(k) ;
02936 pos_seq[i] = seq_len-1 ;
02937
02938 while (pos_seq[i]>0)
02939 {
02940
02941 state_seq[i+1] = psi.element(pos_seq[i], state_seq[i], q);
02942 pos_seq[i+1] = ptable.element(pos_seq[i], state_seq[i], q) ;
02943 if (nbest>1)
02944 q = ktable.element(pos_seq[i], state_seq[i], q) ;
02945 i++ ;
02946 }
02947
02948 int32_t num_states = i+1 ;
02949 for (i=0; i<num_states;i++)
02950 {
02951 my_state_seq[i+k*seq_len] = state_seq[num_states-i-1] ;
02952 my_pos_seq[i+k*seq_len] = pos_seq[num_states-i-1] ;
02953 }
02954 my_state_seq[num_states+k*seq_len]=-1 ;
02955 my_pos_seq[num_states+k*seq_len]=-1 ;
02956 }
02957 }
02958
02959 if (is_big)
02960 SG_PRINT( "DONE. \n") ;
02961
02962
02963 #ifdef DYNPROG_TIMING
02964 MyTime2.stop() ;
02965
02966 if (is_big)
02967 SG_PRINT("Timing: orf=%1.2f s \n Segment_init=%1.2f s Segment_pos=%1.2f s Segment_extend=%1.2f s Segment_clean=%1.2f s\nsvm_init=%1.2f s svm_pos=%1.2f svm_clean=%1.2f\n total=%1.2f\n", orf_time, segment_init_time, segment_pos_time, segment_extend_time, segment_clean_time, svm_init_time, svm_pos_time, svm_clean_time, MyTime2.time_diff_sec()) ;
02968 #endif
02969
02970 delete[] fixedtempvv ;
02971 delete[] fixedtempii ;
02972 }
02973
02974 void CDynProg::best_path_trans_deriv(
02975 int32_t *my_state_seq, int32_t *my_pos_seq, float64_t *my_scores,
02976 float64_t* my_losses,int32_t my_seq_len, const float64_t *seq_array,
02977 int32_t seq_len, const int32_t *pos, CPlifBase **Plif_matrix,
02978 CPlifBase **Plif_state_signals, int32_t max_num_signals,int32_t genestr_num)
02979 {
02980 if (!svm_arrays_clean)
02981 {
02982 SG_ERROR( "SVM arrays not clean") ;
02983 return ;
02984 } ;
02985
02986
02987
02988
02989
02990 bool use_svm = false ;
02991
02992
02993
02994
02995 CArray2<CPlifBase*> PEN(Plif_matrix, N, N, false, false) ;
02996 CArray2<CPlifBase*> PEN_state_signals(Plif_state_signals, N, max_num_signals, false, false) ;
02997 CArray3<float64_t> seq_input(seq_array, N, seq_len, max_num_signals) ;
02998
02999 {
03000 for (int32_t i=0; i<N; i++)
03001 for (int32_t j=0; j<N; j++)
03002 {
03003 CPlifBase *penij=PEN.element(i,j) ;
03004 if (penij==NULL)
03005 continue ;
03006
03007 if (penij->uses_svm_values())
03008 use_svm=true ;
03009 penij->penalty_clear_derivative() ;
03010 }
03011 for (int32_t i=0; i<N; i++)
03012 for (int32_t j=0; j<max_num_signals; j++)
03013 {
03014 CPlifBase *penij=PEN_state_signals.element(i,j) ;
03015 if (penij==NULL)
03016 continue ;
03017 if (penij->uses_svm_values())
03018 use_svm=true ;
03019 penij->penalty_clear_derivative() ;
03020 }
03021 }
03022
03023 {
03024 for (int32_t i=0; i<N; i++)
03025 {
03026 initial_state_distribution_p_deriv.element(i)=0 ;
03027 end_state_distribution_q_deriv.element(i)=0 ;
03028 for (int32_t j=0; j<N; j++)
03029 transition_matrix_a_deriv.element(i,j)=0 ;
03030 }
03031 }
03032
03033 {
03034 for (int32_t i=0; i<my_seq_len; i++)
03035 {
03036 my_scores[i]=0.0 ;
03037 my_losses[i]=0.0 ;
03038 }
03039 }
03040
03041
03042
03043
03044
03045
03046 {
03047 float64_t svm_value[2*num_svms] ;
03048 for (int32_t s=0; s<2*num_svms; s++)
03049 svm_value[s]=0 ;
03050
03051 for (int32_t i=0; i<my_seq_len; i++)
03052 {
03053 my_scores[i]=0.0 ;
03054 my_losses[i]=0.0 ;
03055 }
03056
03057
03058 float64_t total_score = 0.0 ;
03059 float64_t total_loss = 0.0 ;
03060
03061
03062 ASSERT(my_state_seq[0]>=0) ;
03063 initial_state_distribution_p_deriv.element(my_state_seq[0])++ ;
03064 my_scores[0] += initial_state_distribution_p.element(my_state_seq[0]) ;
03065
03066 ASSERT(my_state_seq[my_seq_len-1]>=0) ;
03067 end_state_distribution_q_deriv.element(my_state_seq[my_seq_len-1])++ ;
03068 my_scores[my_seq_len-1] += end_state_distribution_q.element(my_state_seq[my_seq_len-1]);
03069
03070
03071 total_score += my_scores[0] + my_scores[my_seq_len-1] ;
03072
03073 struct svm_values_struct svs;
03074 svs.num_unique_words = NULL;
03075 svs.svm_values = NULL;
03076 svs.svm_values_unnormalized = NULL;
03077 svs.word_used = NULL;
03078
03079
03080 struct segment_loss_struct loss;
03081 loss.segments_changed = NULL;
03082 loss.num_segment_id = NULL;
03083
03084 for (int32_t i=0; i<my_seq_len-1; i++)
03085 {
03086 if (my_state_seq[i+1]==-1)
03087 break ;
03088 int32_t from_state = my_state_seq[i] ;
03089 int32_t to_state = my_state_seq[i+1] ;
03090 int32_t from_pos = my_pos_seq[i] ;
03091 int32_t to_pos = my_pos_seq[i+1] ;
03092
03093
03094 init_segment_loss(loss, seq_len, pos[to_pos]-pos[from_pos]+10);
03095 find_segment_loss_till_pos(pos, to_pos,m_segment_ids, m_segment_mask, loss);
03096
03097 int32_t loss_last_pos = to_pos ;
03098 float64_t last_loss = 0.0 ;
03099 int32_t elem_id = transition_matrix_a_id.element(from_state, to_state) ;
03100 my_losses[i] = extend_segment_loss(loss, pos, elem_id, from_pos, loss_last_pos, last_loss) ;
03101 #ifdef DYNPROG_DEBUG
03102 io.set_loglevel(M_DEBUG) ;
03103 SG_DEBUG( "%i. segment loss %f (id=%i): from=%i(%i), to=%i(%i)\n", i, my_losses[i], elem_id, from_pos, from_state, to_pos, to_state) ;
03104 #endif
03105
03106 transition_matrix_a_deriv.element(from_state, to_state)++ ;
03107 my_scores[i] += transition_matrix_a.element(from_state, to_state) ;
03108
03109 #ifdef DYNPROG_DEBUG
03110 SG_DEBUG( "%i. scores[i]=%f\n", i, my_scores[i]) ;
03111 #endif
03112
03113
03114
03115
03116
03117 if (use_svm)
03118 {
03119
03120 int32_t frame = m_orf_info.element(from_state,0);
03121 if (false)
03122 {
03123 int32_t num_current_svms=0;
03124 int32_t svm_ids[] = {-8, -7, -6, -5, -4, -3, -2, -1};
03125 SG_PRINT("penalties(%i, %i), frame:%i ", from_state, to_state, frame);
03126 PEN.element(to_state, from_state)->get_used_svms(&num_current_svms, svm_ids);
03127 SG_PRINT("\n");
03128 }
03129
03130
03131 lookup_content_svm_values(from_pos, to_pos, pos[from_pos],pos[to_pos], svm_value, frame);
03132 if (false)
03133 SG_PRINT("svm_values: %f, %f, %f \n", svm_value[4], svm_value[5], svm_value[6]);
03134 if (m_use_tiling)
03135 lookup_tiling_plif_values(from_pos, to_pos, pos[to_pos]-pos[from_pos], svm_value);
03136
03137 #ifdef DYNPROG_DEBUG
03138 SG_DEBUG( "svm[%i]: %f\n", ss, svm_value[ss]) ;
03139 #endif
03140 }
03141
03142 if (PEN.element(to_state, from_state)!=NULL)
03143 {
03144 float64_t nscore = PEN.element(to_state, from_state)->lookup_penalty(pos[to_pos]-pos[from_pos], svm_value) ;
03145 my_scores[i] += nscore ;
03146
03147 if (m_use_tiling)
03148 for (int32_t s=0;s<num_svms;s++)
03149 svm_value[num_svms+s]=-CMath::INFTY;
03150
03151 #ifdef DYNPROG_DEBUG
03152
03153
03154
03155
03156
03157
03158
03159
03160
03161
03162
03163 #endif
03164 PEN.element(to_state, from_state)->penalty_add_derivative(pos[to_pos]-pos[from_pos], svm_value) ;
03165 if (m_use_tiling)
03166 {
03167 for (int32_t s=0;s<num_svms;s++)
03168 svm_value[s]=-CMath::INFTY;
03169 float64_t intensities[m_num_probes];
03170 int32_t num_intensities = raw_intensities_interval_query(pos[from_pos], pos[to_pos],intensities);
03171
03172 for (int32_t k=0;k<num_intensities;k++)
03173 {
03174 for (int32_t j=0;j<num_svms;j++)
03175 svm_value[num_svms+j]=intensities[k];
03176 PEN.element(to_state, from_state)->penalty_add_derivative(-CMath::INFTY, svm_value) ;
03177
03178 }
03179 }
03180 }
03181 #ifdef DYNPROG_DEBUG
03182 SG_DEBUG( "%i. scores[i]=%f\n", i, my_scores[i]) ;
03183 #endif
03184
03185
03186 for (int32_t k=0; k<max_num_signals; k++)
03187 {
03188 if ((PEN_state_signals.element(to_state,k)==NULL)&&(k==0))
03189 {
03190 #ifdef DYNPROG_DEBUG
03191 SG_DEBUG( "%i. emmission penalty: to_state=%i to_pos=%i score=%1.2f (no signal plif)\n", i, to_state, to_pos, seq_input.element(to_state, to_pos, k)) ;
03192 #endif
03193 my_scores[i] += seq_input.element(to_state, to_pos, k) ;
03194
03195 break ;
03196 }
03197 if (PEN_state_signals.element(to_state, k)!=NULL)
03198 {
03199 float64_t nscore = PEN_state_signals.element(to_state,k)->lookup_penalty(seq_input.element(to_state, to_pos, k), svm_value) ;
03200 my_scores[i] += nscore ;
03201
03202
03203
03204
03205
03206
03207
03208 #ifdef DYNPROG_DEBUG
03209 SG_DEBUG( "%i. emmission penalty: to_state=%i to_pos=%i value=%1.2f score=%1.2f k=%i\n", i, to_state, to_pos, seq_input.element(to_state, to_pos, k), nscore, k) ;
03210 #endif
03211 PEN_state_signals.element(to_state,k)->penalty_add_derivative(seq_input.element(to_state, to_pos, k), svm_value) ;
03212 } else
03213 break ;
03214 }
03215
03216
03217
03218
03219 total_score += my_scores[i] ;
03220 total_loss += my_losses[i] ;
03221
03222 }
03223
03224
03225 SG_PRINT( "total loss = %f \n", total_loss) ;
03226
03227 clear_svm_values(svs);
03228 clear_segment_loss(loss);
03229 }
03230
03231
03232 }
03233
03234
03235 void CDynProg::best_path_trans_simple(
03236 const float64_t *seq_array, int32_t seq_len, int16_t nbest,
03237 float64_t *prob_nbest, int32_t *my_state_seq)
03238 {
03239 if (!svm_arrays_clean)
03240 {
03241 SG_ERROR( "SVM arrays not clean") ;
03242 return ;
03243 } ;
03244
03245 int32_t max_look_back = 2 ;
03246 const int32_t look_back_buflen = max_look_back*nbest*N ;
03247 ASSERT(nbest<32000) ;
03248
03249 CArray2<float64_t> seq((float64_t *)seq_array, N, seq_len, false) ;
03250
03251 CArray3<float64_t> delta(max_look_back, N, nbest) ;
03252 CArray3<T_STATES> psi(seq_len, N, nbest) ;
03253 CArray3<int16_t> ktable(seq_len,N,nbest) ;
03254 CArray3<int32_t> ptable(seq_len,N,nbest) ;
03255
03256 CArray<float64_t> delta_end(nbest) ;
03257 CArray<T_STATES> path_ends(nbest) ;
03258 CArray<int16_t> ktable_end(nbest) ;
03259
03260 CArray<float64_t> oldtempvv(look_back_buflen) ;
03261 CArray<int32_t> oldtempii(look_back_buflen) ;
03262
03263 CArray<T_STATES> state_seq(seq_len) ;
03264 CArray<int32_t> pos_seq(seq_len) ;
03265
03266 {
03267
03268 for (T_STATES i=0; i<N; i++)
03269 {
03270 delta.element(0,i,0) = get_p(i) + seq.element(i,0) ;
03271 psi.element(0,i,0) = 0 ;
03272 ktable.element(0,i,0) = 0 ;
03273 ptable.element(0,i,0) = 0 ;
03274 for (int16_t k=1; k<nbest; k++)
03275 {
03276 delta.element(0,i,k) = -CMath::INFTY ;
03277 psi.element(0,i,0) = 0 ;
03278 ktable.element(0,i,k) = 0 ;
03279 ptable.element(0,i,k) = 0 ;
03280 }
03281 }
03282 }
03283
03284
03285 for (int32_t t=1; t<seq_len; t++)
03286 {
03287 for (T_STATES j=0; j<N; j++)
03288 {
03289 if (seq.element(j,t)<-1e20)
03290 {
03291 for (int16_t k=0; k<nbest; k++)
03292 {
03293 delta.element(t%max_look_back,j,k) = seq.element(j,t) ;
03294 psi.element(t,j,k) = 0 ;
03295 ktable.element(t,j,k) = 0 ;
03296 ptable.element(t,j,k) = 0 ;
03297 }
03298 }
03299 else
03300 {
03301 const T_STATES num_elem = trans_list_forward_cnt[j] ;
03302 const T_STATES *elem_list = trans_list_forward[j] ;
03303 const float64_t *elem_val = trans_list_forward_val[j] ;
03304
03305 int32_t old_list_len = 0 ;
03306
03307 for (int32_t i=0; i<num_elem; i++)
03308 {
03309 T_STATES ii = elem_list[i] ;
03310
03311 int32_t ts=t-1;
03312 if (ts>=0)
03313 {
03314 bool ok=true ;
03315
03316 if (ok)
03317 {
03318
03319
03320 for (int16_t diff=0; diff<nbest; diff++)
03321 {
03322 float64_t val = delta.element(ts%max_look_back,ii,diff) + elem_val[i] ;
03323 float64_t mval = -val;
03324
03325 oldtempvv[old_list_len] = mval ;
03326 oldtempii[old_list_len] = ii + diff*N + ts*N*nbest;
03327 old_list_len++ ;
03328 }
03329 }
03330 }
03331 }
03332
03333 CMath::nmin<int32_t>(oldtempvv.get_array(), oldtempii.get_array(), old_list_len, nbest) ;
03334
03335 int32_t numEnt = 0;
03336 numEnt = old_list_len;
03337
03338 float64_t minusscore;
03339 int64_t fromtjk;
03340
03341 for (int16_t k=0; k<nbest; k++)
03342 {
03343 if (k<numEnt)
03344 {
03345 minusscore = oldtempvv[k];
03346 fromtjk = oldtempii[k];
03347
03348 delta.element(t%max_look_back,j,k) = -minusscore + seq.element(j,t);
03349 psi.element(t,j,k) = (fromtjk%N) ;
03350 ktable.element(t,j,k) = (fromtjk%(N*nbest)-psi.element(t,j,k))/N ;
03351 ptable.element(t,j,k) = (fromtjk-(fromtjk%(N*nbest)))/(N*nbest) ;
03352 }
03353 else
03354 {
03355 delta.element(t%max_look_back,j,k) = -CMath::INFTY ;
03356 psi.element(t,j,k) = 0 ;
03357 ktable.element(t,j,k) = 0 ;
03358 ptable.element(t,j,k) = 0 ;
03359 }
03360 }
03361
03362 }
03363 }
03364 }
03365
03366
03367 {
03368 int32_t list_len = 0 ;
03369 for (int16_t diff=0; diff<nbest; diff++)
03370 {
03371 for (T_STATES i=0; i<N; i++)
03372 {
03373 oldtempvv[list_len] = -(delta.element((seq_len-1)%max_look_back,i,diff)+get_q(i)) ;
03374 oldtempii[list_len] = i + diff*N ;
03375 list_len++ ;
03376 }
03377 }
03378
03379 CMath::nmin(oldtempvv.get_array(), oldtempii.get_array(), list_len, nbest) ;
03380
03381 for (int16_t k=0; k<nbest; k++)
03382 {
03383 delta_end.element(k) = -oldtempvv[k] ;
03384 path_ends.element(k) = (oldtempii[k]%N) ;
03385 ktable_end.element(k) = (oldtempii[k]-path_ends.element(k))/N ;
03386 }
03387 }
03388
03389 {
03390 for (int16_t k=0; k<nbest; k++)
03391 {
03392 prob_nbest[k]= delta_end.element(k) ;
03393
03394 int32_t i = 0 ;
03395 state_seq[i] = path_ends.element(k) ;
03396 int16_t q = ktable_end.element(k) ;
03397 pos_seq[i] = seq_len-1 ;
03398
03399 while (pos_seq[i]>0)
03400 {
03401
03402 state_seq[i+1] = psi.element(pos_seq[i], state_seq[i], q);
03403 pos_seq[i+1] = ptable.element(pos_seq[i], state_seq[i], q) ;
03404 q = ktable.element(pos_seq[i], state_seq[i], q) ;
03405 i++ ;
03406 }
03407
03408 int32_t num_states = i+1 ;
03409 for (i=0; i<num_states;i++)
03410 {
03411 my_state_seq[i+k*seq_len] = state_seq[num_states-i-1] ;
03412 }
03413
03414 }
03415
03416 }
03417 }
03418