00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #ifndef _TRIE_H___
00013 #define _TRIE_H___
00014
00015 #include <string.h>
00016 #include "lib/common.h"
00017 #include "lib/io.h"
00018 #include "lib/DynamicArray.h"
00019 #include "lib/Mathematics.h"
00020 #include "base/SGObject.h"
00021
00022
00023 #define NO_CHILD ((INT)-1073741824)
00024
00025 #define WEIGHTS_IN_TRIE
00026
00027
00028 #ifdef TRIE_CHECK_EVERYTHING
00029 #define TRIE_ASSERT_EVERYTHING(x) ASSERT(x)
00030 #else
00031 #define TRIE_ASSERT_EVERYTHING(x)
00032 #endif
00033
00034
00035 #define TRIE_ASSERT(x)
00036
00037 #define TRIE_TERMINAL_CHARACTER 7
00038
00040 struct ConsensusEntry
00041 {
00043 ULONG string;
00045 SHORTREAL score;
00047 INT bt;
00048 };
00049
00051 struct POIMTrie
00052 {
00054 DREAL weight;
00055 #ifdef TRIE_CHECK_EVERYTHING
00056
00057 bool has_seq;
00059 bool has_floats;
00060 #endif
00061 union
00062 {
00064 SHORTREAL child_weights[4];
00066 INT children[4];
00068 BYTE seq[16] ;
00069 };
00070
00072 DREAL S;
00074 DREAL L;
00076 DREAL R;
00077 };
00078
00080 struct DNATrie
00081 {
00083 DREAL weight;
00084 #ifdef TRIE_CHECK_EVERYTHING
00085
00086 bool has_seq;
00088 bool has_floats;
00089 #endif
00090 union
00091 {
00093 SHORTREAL child_weights[4];
00095 INT children[4];
00097 BYTE seq[16] ;
00098 };
00099 };
00100
00102 struct TreeParseInfo {
00104 INT num_sym;
00106 INT num_feat;
00108 INT p;
00110 INT k;
00112 INT* nofsKmers;
00114 DREAL* margFactors;
00116 INT* x;
00118 INT* substrs;
00120 INT y0;
00122 DREAL* C_k;
00124 DREAL* L_k;
00126 DREAL* R_k;
00127 };
00128
00129
00130 template <class Trie> class CTrie;
00131
00132 template <class Trie>
00134 class CTrie : public CSGObject
00135 {
00136 public:
00143 CTrie(INT d, bool p_use_compact_terminal_nodes=true);
00144
00146 CTrie(const CTrie & to_copy);
00147 ~CTrie();
00148
00150 const CTrie & operator=(const CTrie & to_copy);
00151
00159 bool compare_traverse(INT node, const CTrie & other, INT other_node);
00160
00166 bool compare(const CTrie & other);
00167
00174 bool find_node(INT node, INT * trace, INT &trace_len) const;
00175
00182 INT find_deepest_node(INT start_node, INT &deepest_node) const;
00183
00188 void display_node(INT node) const;
00189
00191 void destroy();
00192
00197 void set_degree(INT d);
00198
00205 void create(INT len, bool p_use_compact_terminal_nodes=true);
00206
00212 void delete_trees(bool p_use_compact_terminal_nodes=true);
00213
00224 void add_to_trie(int i, INT seq_offset, INT * vec, float alpha, DREAL *weights, bool degree_times_position_weights);
00225
00232 DREAL compute_abs_weights_tree(INT tree, INT depth);
00233
00239 DREAL* compute_abs_weights(int &len);
00240
00253 DREAL compute_by_tree_helper(INT* vec, INT len, INT seq_pos, INT tree_pos, INT weight_pos, DREAL * weights, bool degree_times_position_weights) ;
00254
00269 void compute_by_tree_helper(INT* vec, INT len, INT seq_pos, INT tree_pos, INT weight_pos, DREAL* LevelContrib, DREAL factor, INT mkl_stepsize, DREAL * weights, bool degree_times_position_weights);
00270
00285 void compute_scoring_helper(INT tree, INT i, INT j, DREAL weight, INT d, INT max_degree, INT num_feat, INT num_sym, INT sym_offset, INT offs, DREAL* result);
00286
00299 void add_example_to_tree_mismatch_recursion(INT tree, INT i, DREAL alpha, INT *vec, INT len_rem, INT degree_rec, INT mismatch_rec, INT max_mismatch, DREAL * weights);
00300
00310 void traverse(INT tree, const INT p, struct TreeParseInfo info, const INT depth, INT* const x, const INT k);
00311
00321 void count(const DREAL w, const INT depth, const struct TreeParseInfo info, const INT p, INT* x, const INT k);
00322
00329 INT compact_nodes(INT start_node, INT depth, DREAL * weights);
00330
00339 DREAL get_cumulative_score(INT pos, ULONG seq, INT deg, DREAL* weights);
00340
00350 void fill_backtracking_table_recursion(Trie* tree, INT depth, ULONG seq, DREAL value, CDynamicArray<ConsensusEntry>* table, DREAL* weights);
00351
00360 void fill_backtracking_table(INT pos, CDynamicArray<ConsensusEntry>* prev, CDynamicArray<ConsensusEntry>* cur, bool cumulative, DREAL* weights);
00361
00367 void POIMs_extract_W(DREAL* const* const W, const INT K);
00368
00373 void POIMs_precalc_SLR(const DREAL* const distrib);
00374
00384 void POIMs_get_SLR(const INT parentIdx, const INT sym, const int depth, DREAL* S, DREAL* L, DREAL* R);
00385
00392 void POIMs_add_SLR(DREAL* const* const poims, const INT K, const INT debug);
00393
00398 inline bool get_use_compact_terminal_nodes()
00399 {
00400 return use_compact_terminal_nodes ;
00401 }
00402
00408 inline void set_use_compact_terminal_nodes(bool p_use_compact_terminal_nodes)
00409 {
00410 use_compact_terminal_nodes=p_use_compact_terminal_nodes ;
00411 }
00412
00417 inline INT get_num_used_nodes()
00418 {
00419 return TreeMemPtr;
00420 }
00421
00426 inline void set_position_weights(const DREAL * p_position_weights)
00427 {
00428 position_weights=p_position_weights;
00429 }
00430
00435 inline INT get_node(bool last_node=false)
00436 {
00437 INT ret = TreeMemPtr++;
00438 check_treemem() ;
00439
00440 if (last_node)
00441 {
00442 for (INT q=0; q<4; q++)
00443 TreeMem[ret].child_weights[q]=0.0;
00444 }
00445 else
00446 {
00447 for (INT q=0; q<4; q++)
00448 TreeMem[ret].children[q]=NO_CHILD;
00449 }
00450 #ifdef TRIE_CHECK_EVERYTHING
00451 TreeMem[ret].has_seq=false ;
00452 TreeMem[ret].has_floats=false ;
00453 #endif
00454 TreeMem[ret].weight=0.0;
00455 return ret ;
00456 }
00457
00459 inline void check_treemem()
00460 {
00461 if (TreeMemPtr+10 < TreeMemPtrMax)
00462 return;
00463 SG_DEBUG( "Extending TreeMem from %i to %i elements\n",
00464 TreeMemPtrMax, (INT) ((double)TreeMemPtrMax*1.2));
00465 TreeMemPtrMax = (INT) ((double)TreeMemPtrMax*1.2);
00466 TreeMem = (Trie*) realloc(TreeMem,
00467 TreeMemPtrMax*sizeof(Trie));
00468 if (!TreeMem)
00469 SG_ERROR( "out of memory\n");
00470 }
00471
00476 inline void set_weights_in_tree(bool weights_in_tree_)
00477 {
00478 weights_in_tree = weights_in_tree_;
00479 }
00480
00485 inline bool get_weights_in_tree()
00486 {
00487 return weights_in_tree;
00488 }
00489
00499 void POIMs_extract_W_helper(const INT nodeIdx, const int depth, const INT offset, const INT y0, DREAL* const* const W, const INT K);
00500
00513 void POIMs_calc_SLR_helper1(const DREAL* const distrib, const INT i, const INT nodeIdx, INT left_tries_idx[4], const int depth, INT const lastSym, DREAL* S, DREAL* L, DREAL* R);
00514
00525 void POIMs_calc_SLR_helper2(const DREAL* const distrib, const INT i, const INT nodeIdx, INT left_tries_idx[4], const int depth, DREAL* S, DREAL* L, DREAL* R);
00526
00537 void POIMs_add_SLR_helper1(const INT nodeIdx, const int depth, const INT i, const INT y0, DREAL* const* const poims, const INT K, const INT debug);
00538
00552 void POIMs_add_SLR_helper2(DREAL* const* const poims, const int K, const int k, const INT i, const INT y, const DREAL valW, const DREAL valS, const DREAL valL, const DREAL valR, const INT debug);
00553
00554 public:
00556 INT NUM_SYMS;
00557
00558 protected:
00560 INT length;
00562 INT * trees;
00563
00565 INT degree;
00567 DREAL const * position_weights;
00568
00570 Trie* TreeMem;
00572 INT TreeMemPtr;
00574 INT TreeMemPtrMax;
00576 bool use_compact_terminal_nodes;
00577
00579 bool weights_in_tree;
00580
00582 INT* nofsKmers;
00583 };
00584
00585 template <class Trie>
00586 CTrie<Trie>::CTrie(INT d, bool p_use_compact_terminal_nodes)
00587 : CSGObject(), degree(d), position_weights(NULL),
00588 use_compact_terminal_nodes(p_use_compact_terminal_nodes),
00589 weights_in_tree(true)
00590 {
00591 TreeMemPtrMax=1024*1024/sizeof(Trie);
00592 TreeMemPtr=0;
00593 TreeMem=(Trie*)malloc(TreeMemPtrMax*sizeof(Trie));
00594
00595 length=0;
00596 trees=NULL;
00597
00598 NUM_SYMS=4;
00599 }
00600
00601 template <class Trie>
00602 CTrie<Trie>::CTrie(const CTrie & to_copy)
00603 : CSGObject(to_copy), degree(to_copy.degree), position_weights(NULL),
00604 use_compact_terminal_nodes(to_copy.use_compact_terminal_nodes)
00605 {
00606 if (to_copy.position_weights!=NULL)
00607 {
00608 position_weights = to_copy.position_weights;
00609
00610
00611
00612 }
00613 else
00614 position_weights=NULL;
00615
00616 TreeMemPtrMax=to_copy.TreeMemPtrMax;
00617 TreeMemPtr=to_copy.TreeMemPtr;
00618 TreeMem=(Trie*)malloc(TreeMemPtrMax*sizeof(Trie));
00619 memcpy(TreeMem, to_copy.TreeMem, TreeMemPtrMax*sizeof(Trie));
00620
00621 length=to_copy.length;
00622 trees=new INT[length];
00623 for (INT i=0; i<length; i++)
00624 trees[i]=to_copy.trees[i];
00625
00626 NUM_SYMS=4;
00627 }
00628
00629 template <class Trie>
00630 const CTrie<Trie> &CTrie<Trie>::operator=(const CTrie<Trie> & to_copy)
00631 {
00632 degree=to_copy.degree ;
00633 use_compact_terminal_nodes=to_copy.use_compact_terminal_nodes ;
00634
00635 delete[] position_weights ;
00636 position_weights=NULL ;
00637 if (to_copy.position_weights!=NULL)
00638 {
00639 position_weights=to_copy.position_weights ;
00640
00641
00642
00643 }
00644 else
00645 position_weights=NULL ;
00646
00647 TreeMemPtrMax=to_copy.TreeMemPtrMax ;
00648 TreeMemPtr=to_copy.TreeMemPtr ;
00649 free(TreeMem) ;
00650 TreeMem = (Trie*)malloc(TreeMemPtrMax*sizeof(Trie)) ;
00651 memcpy(TreeMem, to_copy.TreeMem, TreeMemPtrMax*sizeof(Trie)) ;
00652
00653 length = to_copy.length ;
00654 if (trees)
00655 delete[] trees ;
00656 trees=new INT[length] ;
00657 for (INT i=0; i<length; i++)
00658 trees[i]=to_copy.trees[i] ;
00659
00660 return *this ;
00661 }
00662
00663 template <class Trie>
00664 INT CTrie<Trie>::find_deepest_node(INT start_node, INT& deepest_node) const
00665 {
00666 #ifdef TRIE_CHECK_EVERYTHING
00667 INT ret=0 ;
00668 SG_DEBUG("start_node=%i\n", start_node) ;
00669
00670 if (start_node==NO_CHILD)
00671 {
00672 for (INT i=0; i<length; i++)
00673 {
00674 INT my_deepest_node ;
00675 INT depth=find_deepest_node(i, my_deepest_node) ;
00676 SG_DEBUG("start_node %i depth=%i\n", i, depth) ;
00677 if (depth>ret)
00678 {
00679 deepest_node=my_deepest_node ;
00680 ret=depth ;
00681 }
00682 }
00683 return ret ;
00684 }
00685
00686 if (TreeMem[start_node].has_seq)
00687 {
00688 for (INT q=0; q<16; q++)
00689 if (TreeMem[start_node].seq[q]!=TRIE_TERMINAL_CHARACTER)
00690 ret++ ;
00691 deepest_node=start_node ;
00692 return ret ;
00693 }
00694 if (TreeMem[start_node].has_floats)
00695 {
00696 deepest_node=start_node ;
00697 return 1 ;
00698 }
00699
00700 for (INT q=0; q<4; q++)
00701 {
00702 INT my_deepest_node ;
00703 if (TreeMem[start_node].children[q]==NO_CHILD)
00704 continue ;
00705 INT depth=find_deepest_node(abs(TreeMem[start_node].children[q]), my_deepest_node) ;
00706 if (depth>ret)
00707 {
00708 deepest_node=my_deepest_node ;
00709 ret=depth ;
00710 }
00711 }
00712 return ret ;
00713 #else
00714 SG_ERROR( "not implemented\n") ;
00715 return 0 ;
00716 #endif
00717 }
00718
00719 template <class Trie>
00720 INT CTrie<Trie>::compact_nodes(INT start_node, INT depth, DREAL * weights)
00721 {
00722 SG_ERROR( "code buggy\n") ;
00723
00724 INT ret=0 ;
00725
00726 if (start_node==NO_CHILD)
00727 {
00728 for (INT i=0; i<length; i++)
00729 compact_nodes(i,1, weights) ;
00730 return 0 ;
00731 }
00732 if (start_node<0)
00733 return -1 ;
00734
00735 if (depth==degree-1)
00736 {
00737 TRIE_ASSERT_EVERYTHING(TreeMem[start_node].has_floats) ;
00738 INT num_used=0 ;
00739 for (INT q=0; q<4; q++)
00740 if (TreeMem[start_node].child_weights[q]!=0.0)
00741 num_used++ ;
00742 if (num_used>1)
00743 return -1 ;
00744 return 1 ;
00745 }
00746 TRIE_ASSERT_EVERYTHING(!TreeMem[start_node].has_floats) ;
00747
00748 INT num_used = 0 ;
00749 INT q_used=-1 ;
00750
00751 for (INT q=0; q<4; q++)
00752 {
00753 if (TreeMem[start_node].children[q]==NO_CHILD)
00754 continue ;
00755 num_used++ ;
00756 q_used=q ;
00757 }
00758 if (num_used>1)
00759 {
00760 if (depth>=degree-2)
00761 return -1 ;
00762 for (INT q=0; q<4; q++)
00763 {
00764 if (TreeMem[start_node].children[q]==NO_CHILD)
00765 continue ;
00766 INT num=compact_nodes(abs(TreeMem[start_node].children[q]), depth+1, weights) ;
00767 if (num<=2)
00768 continue ;
00769 INT node=get_node() ;
00770
00771 INT last_node=TreeMem[start_node].children[q] ;
00772 if (weights_in_tree)
00773 {
00774 ASSERT(weights[depth]!=0.0) ;
00775 TreeMem[node].weight=TreeMem[last_node].weight/weights[depth] ;
00776 }
00777 else
00778 TreeMem[node].weight=TreeMem[last_node].weight ;
00779
00780 #ifdef TRIE_CHECK_EVERYTHING
00781 TreeMem[node].has_seq=true ;
00782 #endif
00783 memset(TreeMem[node].seq, TRIE_TERMINAL_CHARACTER, 16) ;
00784 for (INT n=0; n<num; n++)
00785 {
00786 ASSERT(depth+n+1<=degree-1) ;
00787 ASSERT(last_node!=NO_CHILD) ;
00788 if (depth+n+1==degree-1)
00789 {
00790 TRIE_ASSERT_EVERYTHING(TreeMem[last_node].has_floats) ;
00791 INT k ;
00792 for (k=0; k<4; k++)
00793 if (TreeMem[last_node].child_weights[k]!=0.0)
00794 break ;
00795 if (k==4)
00796 break ;
00797 TreeMem[node].seq[n]=k ;
00798 break ;
00799 }
00800 else
00801 {
00802 TRIE_ASSERT_EVERYTHING(!TreeMem[last_node].has_floats) ;
00803 INT k ;
00804 for (k=0; k<4; k++)
00805 if (TreeMem[last_node].children[k]!=NO_CHILD)
00806 break ;
00807 if (k==4)
00808 break ;
00809 TreeMem[node].seq[n]=k ;
00810 last_node=TreeMem[last_node].children[k] ;
00811 }
00812 }
00813 TreeMem[start_node].children[q]=-node ;
00814 }
00815 return -1 ;
00816 }
00817 if (num_used==0)
00818 return 0 ;
00819
00820 ret=compact_nodes(abs(TreeMem[start_node].children[q_used]), depth+1, weights) ;
00821 if (ret<0)
00822 return ret ;
00823 return ret+1 ;
00824 }
00825
00826
00827 template <class Trie>
00828 bool CTrie<Trie>::compare_traverse(INT node, const CTrie<Trie> & other, INT other_node)
00829 {
00830 SG_DEBUG("checking nodes %i and %i\n", node, other_node) ;
00831 if (fabs(TreeMem[node].weight-other.TreeMem[other_node].weight)>=1e-5)
00832 {
00833 SG_DEBUG( "CTrie::compare: TreeMem[%i].weight=%f!=other.TreeMem[%i].weight=%f\n", node, TreeMem[node].weight, other_node,other.TreeMem[other_node].weight) ;
00834 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;
00835 display_node(node) ;
00836 SG_DEBUG( "============================================================\n") ;
00837 other.display_node(other_node) ;
00838 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;
00839 return false ;
00840 }
00841
00842 #ifdef TRIE_CHECK_EVERYTHING
00843 if (TreeMem[node].has_seq!=other.TreeMem[other_node].has_seq)
00844 {
00845 SG_DEBUG( "CTrie::compare: TreeMem[%i].has_seq=%i!=other.TreeMem[%i].has_seq=%i\n", node, TreeMem[node].has_seq, other_node,other.TreeMem[other_node].has_seq) ;
00846 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;
00847 display_node(node) ;
00848 SG_DEBUG( "============================================================\n") ;
00849 other.display_node(other_node) ;
00850 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;
00851 return false ;
00852 }
00853 if (TreeMem[node].has_floats!=other.TreeMem[other_node].has_floats)
00854 {
00855 SG_DEBUG( "CTrie::compare: TreeMem[%i].has_floats=%i!=other.TreeMem[%i].has_floats=%i\n", node, TreeMem[node].has_floats, other_node, other.TreeMem[other_node].has_floats) ;
00856 return false ;
00857 }
00858 if (other.TreeMem[other_node].has_floats)
00859 {
00860 for (INT q=0; q<4; q++)
00861 if (fabs(TreeMem[node].child_weights[q]-other.TreeMem[other_node].child_weights[q])>1e-5)
00862 {
00863 SG_DEBUG( "CTrie::compare: TreeMem[%i].child_weights[%i]=%e!=other.TreeMem[%i].child_weights[%i]=%e\n", node, q,TreeMem[node].child_weights[q], other_node,q,other.TreeMem[other_node].child_weights[q]) ;
00864 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;
00865 display_node(node) ;
00866 SG_DEBUG( "============================================================\n") ;
00867 other.display_node(other_node) ;
00868 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;
00869 return false ;
00870 }
00871 }
00872 if (other.TreeMem[other_node].has_seq)
00873 {
00874 for (INT q=0; q<16; q++)
00875 if ((TreeMem[node].seq[q]!=other.TreeMem[other_node].seq[q]) && ((TreeMem[node].seq[q]<4)||(other.TreeMem[other_node].seq[q]<4)))
00876 {
00877 SG_DEBUG( "CTrie::compare: TreeMem[%i].seq[%i]=%i!=other.TreeMem[%i].seq[%i]=%i\n", node,q,TreeMem[node].seq[q], other_node,q,other.TreeMem[other_node].seq[q]) ;
00878 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;
00879 display_node(node) ;
00880 SG_DEBUG( "============================================================\n") ;
00881 other.display_node(other_node) ;
00882 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;
00883 return false ;
00884 }
00885 }
00886 if (!other.TreeMem[other_node].has_seq && !other.TreeMem[other_node].has_floats)
00887 {
00888 for (INT q=0; q<4; q++)
00889 {
00890 if ((TreeMem[node].children[q]==NO_CHILD) && (other.TreeMem[other_node].children[q]==NO_CHILD))
00891 continue ;
00892 if ((TreeMem[node].children[q]==NO_CHILD)!=(other.TreeMem[other_node].children[q]==NO_CHILD))
00893 {
00894 SG_DEBUG( "CTrie::compare: TreeMem[%i].children[%i]=%i!=other.TreeMem[%i].children[%i]=%i\n", node,q,TreeMem[node].children[q], other_node,q,other.TreeMem[other_node].children[q]) ;
00895 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;
00896 display_node(node) ;
00897 SG_DEBUG( "============================================================\n") ;
00898 other.display_node(other_node) ;
00899 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;
00900 return false ;
00901 }
00902 if (!compare_traverse(abs(TreeMem[node].children[q]), other, abs(other.TreeMem[other_node].children[q])))
00903 return false ;
00904 }
00905 }
00906 #else
00907 SG_ERROR( "not implemented\n") ;
00908 #endif
00909
00910 return true ;
00911 }
00912
00913 template <class Trie>
00914 bool CTrie<Trie>::compare(const CTrie<Trie> & other)
00915 {
00916 bool ret=true ;
00917 for (INT i=0; i<length; i++)
00918 if (!compare_traverse(trees[i], other, other.trees[i]))
00919 return false ;
00920 else
00921 SG_DEBUG("two tries at %i identical\n", i) ;
00922
00923 return ret ;
00924 }
00925
00926 template <class Trie>
00927 bool CTrie<Trie>::find_node(INT node, INT * trace, INT& trace_len) const
00928 {
00929 #ifdef TRIE_CHECK_EVERYTHING
00930 ASSERT(trace_len-1>=0) ;
00931 ASSERT((trace[trace_len-1]>=0) && (trace[trace_len-1]<TreeMemPtrMax))
00932 if (TreeMem[trace[trace_len-1]].has_seq)
00933 return false ;
00934 if (TreeMem[trace[trace_len-1]].has_floats)
00935 return false ;
00936
00937 for (INT q=0; q<4; q++)
00938 {
00939 if (TreeMem[trace[trace_len-1]].children[q]==NO_CHILD)
00940 continue ;
00941 INT tl=trace_len+1 ;
00942 if (TreeMem[trace[trace_len-1]].children[q]>=0)
00943 trace[trace_len]=TreeMem[trace[trace_len-1]].children[q] ;
00944 else
00945 trace[trace_len]=-TreeMem[trace[trace_len-1]].children[q] ;
00946
00947 if (trace[trace_len]==node)
00948 {
00949 trace_len=tl ;
00950 return true ;
00951 }
00952 if (find_node(node, trace, tl))
00953 {
00954 trace_len=tl ;
00955 return true ;
00956 }
00957 }
00958 trace_len=0 ;
00959 return false ;
00960 #else
00961 SG_ERROR( "not implemented\n") ;
00962 return false ;
00963 #endif
00964 }
00965
00966 template <class Trie>
00967 void CTrie<Trie>::display_node(INT node) const
00968 {
00969 #ifdef TRIE_CHECK_EVERYTHING
00970 INT * trace=new INT[2*degree] ;
00971 INT trace_len=-1 ;
00972 bool found = false ;
00973 INT tree=-1 ;
00974 for (tree=0; tree<length; tree++)
00975 {
00976 trace[0]=trees[tree] ;
00977 trace_len=1 ;
00978 found=find_node(node, trace, trace_len) ;
00979 if (found)
00980 break ;
00981 }
00982 ASSERT(found) ;
00983 SG_PRINT( "position %i trace: ", tree) ;
00984
00985 for (INT i=0; i<trace_len-1; i++)
00986 {
00987 INT branch=-1 ;
00988 for (INT q=0; q<4; q++)
00989 if (abs(TreeMem[trace[i]].children[q])==trace[i+1])
00990 {
00991 branch=q;
00992 break ;
00993 }
00994 ASSERT(branch!=-1) ;
00995 char acgt[5]="ACGT" ;
00996 SG_PRINT( "%c", acgt[branch]) ;
00997 }
00998 SG_PRINT( "\nnode=%i\nweight=%f\nhas_seq=%i\nhas_floats=%i\n", node, TreeMem[node].weight, TreeMem[node].has_seq, TreeMem[node].has_floats) ;
00999 if (TreeMem[node].has_floats)
01000 {
01001 for (INT q=0; q<4; q++)
01002 SG_PRINT( "child_weighs[%i] = %f\n", q, TreeMem[node].child_weights[q]) ;
01003 }
01004 if (TreeMem[node].has_seq)
01005 {
01006 for (INT q=0; q<16; q++)
01007 SG_PRINT( "seq[%i] = %i\n", q, TreeMem[node].seq[q]) ;
01008 }
01009 if (!TreeMem[node].has_seq && !TreeMem[node].has_floats)
01010 {
01011 for (INT q=0; q<4; q++)
01012 {
01013 if (TreeMem[node].children[q]!=NO_CHILD)
01014 {
01015 SG_PRINT( "children[%i] = %i -> \n", q, TreeMem[node].children[q]) ;
01016 display_node(abs(TreeMem[node].children[q])) ;
01017 }
01018 else
01019 SG_PRINT( "children[%i] = NO_CHILD -| \n", q, TreeMem[node].children[q]) ;
01020 }
01021
01022 }
01023
01024 delete[] trace ;
01025 #else
01026 SG_ERROR( "not implemented\n") ;
01027 #endif
01028 }
01029
01030
01031 template <class Trie> CTrie<Trie>::~CTrie()
01032 {
01033 destroy() ;
01034
01035 free(TreeMem) ;
01036 }
01037
01038 template <class Trie> void CTrie<Trie>::destroy()
01039 {
01040 if (trees!=NULL)
01041 {
01042 delete_trees();
01043 for (INT i=0; i<length; i++)
01044 trees[i] = NO_CHILD;
01045 delete[] trees;
01046
01047 TreeMemPtr=0;
01048 length=0;
01049 trees=NULL;
01050 }
01051 }
01052
01053 template <class Trie> void CTrie<Trie>::set_degree(INT d)
01054 {
01055 delete_trees(get_use_compact_terminal_nodes());
01056 degree=d;
01057 }
01058
01059 template <class Trie> void CTrie<Trie>::create(INT len, bool p_use_compact_terminal_nodes)
01060 {
01061 destroy();
01062
01063 trees=new INT[len] ;
01064 TreeMemPtr=0 ;
01065 for (INT i=0; i<len; i++)
01066 trees[i]=get_node(degree==1);
01067 length = len ;
01068
01069 use_compact_terminal_nodes=p_use_compact_terminal_nodes ;
01070 }
01071
01072
01073 template <class Trie> void CTrie<Trie>::delete_trees(bool p_use_compact_terminal_nodes)
01074 {
01075 if (trees==NULL)
01076 return;
01077
01078 TreeMemPtr=0 ;
01079 for (INT i=0; i<length; i++)
01080 trees[i]=get_node(degree==1);
01081
01082 use_compact_terminal_nodes=p_use_compact_terminal_nodes ;
01083 }
01084
01085 template <class Trie>
01086 DREAL CTrie<Trie>::compute_abs_weights_tree(INT tree, INT depth)
01087 {
01088 DREAL ret=0 ;
01089
01090 if (tree==NO_CHILD)
01091 return 0 ;
01092 TRIE_ASSERT(tree>=0) ;
01093
01094 if (depth==degree-2)
01095 {
01096 ret+=(TreeMem[tree].weight) ;
01097
01098 for (INT k=0; k<4; k++)
01099 ret+=(TreeMem[tree].child_weights[k]) ;
01100
01101 return ret ;
01102 }
01103
01104 ret+=(TreeMem[tree].weight) ;
01105
01106 for (INT i=0; i<4; i++)
01107 if (TreeMem[tree].children[i]!=NO_CHILD)
01108 ret += compute_abs_weights_tree(TreeMem[tree].children[i], depth+1) ;
01109
01110 return ret ;
01111 }
01112
01113
01114 template <class Trie>
01115 DREAL *CTrie<Trie>::compute_abs_weights(int &len)
01116 {
01117 DREAL * sum=new DREAL[length*4] ;
01118 for (INT i=0; i<length*4; i++)
01119 sum[i]=0 ;
01120 len=length ;
01121
01122 for (INT i=0; i<length; i++)
01123 {
01124 TRIE_ASSERT(trees[i]!=NO_CHILD) ;
01125 for (INT k=0; k<4; k++)
01126 {
01127 sum[i*4+k]=compute_abs_weights_tree(TreeMem[trees[i]].children[k], 0) ;
01128 }
01129 }
01130
01131 return sum ;
01132 }
01133
01134 template <class Trie>
01135 void CTrie<Trie>::add_example_to_tree_mismatch_recursion(INT tree, INT i, DREAL alpha,
01136 INT *vec, INT len_rem,
01137 INT degree_rec, INT mismatch_rec,
01138 INT max_mismatch, DREAL * weights)
01139 {
01140 if (tree==NO_CHILD)
01141 tree=trees[i] ;
01142 TRIE_ASSERT(tree!=NO_CHILD) ;
01143
01144 if ((len_rem<=0) || (mismatch_rec>max_mismatch) || (degree_rec>degree))
01145 return ;
01146 const INT other[4][3] = { {1,2,3},{0,2,3},{0,1,3},{0,1,2} } ;
01147
01148 INT subtree = NO_CHILD ;
01149
01150 if (degree_rec==degree-1)
01151 {
01152 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats) ;
01153 if (weights_in_tree)
01154 TreeMem[tree].child_weights[vec[0]] += alpha*weights[degree_rec+degree*mismatch_rec];
01155 else
01156 if (weights[degree_rec]!=0.0)
01157 TreeMem[tree].child_weights[vec[0]] += alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec];
01158 if (mismatch_rec+1<=max_mismatch)
01159 for (INT o=0; o<3; o++)
01160 {
01161 if (weights_in_tree)
01162 TreeMem[tree].child_weights[other[vec[0]][o]] += alpha*weights[degree_rec+degree*(mismatch_rec+1)];
01163 else
01164 if (weights[degree_rec]!=0.0)
01165 TreeMem[tree].child_weights[other[vec[0]][o]] += alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec];
01166 }
01167 return ;
01168 }
01169 else
01170 {
01171 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01172 if (TreeMem[tree].children[vec[0]]!=NO_CHILD)
01173 {
01174 subtree=TreeMem[tree].children[vec[0]] ;
01175 if (weights_in_tree)
01176 TreeMem[subtree].weight += alpha*weights[degree_rec+degree*mismatch_rec];
01177 else
01178 if (weights[degree_rec]!=0.0)
01179 TreeMem[subtree].weight += alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec];
01180 }
01181 else
01182 {
01183 INT tmp = get_node(degree_rec==degree-2);
01184 ASSERT(tmp>=0) ;
01185 TreeMem[tree].children[vec[0]]=tmp ;
01186 subtree=tmp ;
01187 #ifdef TRIE_CHECK_EVERYTHING
01188 if (degree_rec==degree-2)
01189 TreeMem[subtree].has_floats=true ;
01190 #endif
01191 if (weights_in_tree)
01192 TreeMem[subtree].weight = alpha*weights[degree_rec+degree*mismatch_rec] ;
01193 else
01194 if (weights[degree_rec]!=0.0)
01195 TreeMem[subtree].weight = alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec] ;
01196 else
01197 TreeMem[subtree].weight = 0.0 ;
01198 }
01199 add_example_to_tree_mismatch_recursion(subtree, i, alpha,
01200 &vec[1], len_rem-1,
01201 degree_rec+1, mismatch_rec, max_mismatch, weights) ;
01202
01203 if (mismatch_rec+1<=max_mismatch)
01204 {
01205 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01206 for (INT o=0; o<3; o++)
01207 {
01208 INT ot = other[vec[0]][o] ;
01209 if (TreeMem[tree].children[ot]!=NO_CHILD)
01210 {
01211 subtree=TreeMem[tree].children[ot] ;
01212 if (weights_in_tree)
01213 TreeMem[subtree].weight += alpha*weights[degree_rec+degree*(mismatch_rec+1)];
01214 else
01215 if (weights[degree_rec]!=0.0)
01216 TreeMem[subtree].weight += alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec];
01217 }
01218 else
01219 {
01220 INT tmp = get_node(degree_rec==degree-2);
01221 ASSERT(tmp>=0) ;
01222 TreeMem[tree].children[ot]=tmp ;
01223 subtree=tmp ;
01224 #ifdef TRIE_CHECK_EVERYTHING
01225 if (degree_rec==degree-2)
01226 TreeMem[subtree].has_floats=true ;
01227 #endif
01228
01229 if (weights_in_tree)
01230 TreeMem[subtree].weight = alpha*weights[degree_rec+degree*(mismatch_rec+1)] ;
01231 else
01232 if (weights[degree_rec]!=0.0)
01233 TreeMem[subtree].weight = alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec] ;
01234 else
01235 TreeMem[subtree].weight = 0.0 ;
01236 }
01237
01238 add_example_to_tree_mismatch_recursion(subtree, i, alpha,
01239 &vec[1], len_rem-1,
01240 degree_rec+1, mismatch_rec+1, max_mismatch, weights) ;
01241 }
01242 }
01243 }
01244 }
01245
01246 template <class Trie>
01247 void CTrie<Trie>::compute_scoring_helper(INT tree, INT i, INT j, DREAL weight, INT d, INT max_degree, INT num_feat, INT num_sym, INT sym_offset, INT offs, DREAL* result)
01248 {
01249 if (i+j<num_feat)
01250 {
01251 DREAL decay=1.0;
01252
01253
01254
01255 if (j<degree-1)
01256 {
01257 for (INT k=0; k<num_sym; k++)
01258 {
01259 if (TreeMem[tree].children[k]!=NO_CHILD)
01260 {
01261 INT child=TreeMem[tree].children[k];
01262
01263 if (d<max_degree-1)
01264 compute_scoring_helper(child, i, j+1, weight+decay*TreeMem[child].weight, d+1, max_degree, num_feat, num_sym, sym_offset, num_sym*offs+k, result);
01265 else
01266 result[sym_offset*(i+j-max_degree+1)+num_sym*offs+k] += weight+decay*TreeMem[child].weight;
01267
01269 if (d==0)
01270 compute_scoring_helper(child, i, j+1, 0.0, 0, max_degree, num_feat, num_sym, sym_offset, offs, result);
01271 }
01272 }
01273 }
01274 else if (j==degree-1)
01275 {
01276 for (INT k=0; k<num_sym; k++)
01277 {
01278
01279 if (d<max_degree-1 && i<num_feat-1)
01280 compute_scoring_helper(trees[i+1], i+1, 0, weight+decay*TreeMem[tree].child_weights[k], d+1, max_degree, num_feat, num_sym, sym_offset, num_sym*offs+k, result);
01281 else
01282 result[sym_offset*(i+j-max_degree+1)+num_sym*offs+k] += weight+decay*TreeMem[tree].child_weights[k];
01283 }
01284 }
01285 }
01286 }
01287
01288 template <class Trie>
01289 void CTrie<Trie>::traverse( INT tree, const INT p, struct TreeParseInfo info, const INT depth, INT* const x, const INT k )
01290 {
01291 const INT num_sym = info.num_sym;
01292 const INT y0 = info.y0;
01293 const INT y1 = (k==0) ? 0 : y0 - ( (depth<k) ? 0 : info.nofsKmers[k-1] * x[depth-k] );
01294
01295
01296
01297
01298
01299 INT sym;
01300 ASSERT( depth < degree );
01301
01302 if (depth<degree-1)
01303 {
01304 for( sym=0; sym<num_sym; ++sym ) {
01305 const INT childNum = TreeMem[tree].children[ sym ];
01306 if( childNum != NO_CHILD ) {
01307 INT child = childNum ;
01308 x[depth] = sym;
01309 info.substrs[depth+1] = y0 + sym;
01310 info.y0 = (k==0) ? 0 : (y1+sym)*num_sym;
01311
01312 count( TreeMem[child].weight, depth, info, p, x, k );
01313 traverse( child, p, info, depth+1, x, k );
01314 x[depth] = -1;
01315 }
01316 }
01317 }
01318 else if( depth == degree-1 )
01319 {
01320 for( sym=0; sym<num_sym; ++sym ) {
01321 const DREAL w = TreeMem[tree].child_weights[ sym ];
01322 if( w != 0.0 ) {
01323 x[depth] = sym;
01324 info.substrs[depth+1] = y0 + sym;
01325 info.y0 = (k==0) ? 0 : (y1+sym)*num_sym;
01326
01327 count( w, depth, info, p, x, k );
01328 x[depth] = -1;
01329 }
01330 }
01331 }
01332
01333
01334 }
01335
01336 template <class Trie>
01337 void CTrie<Trie>::count( const DREAL w, const INT depth, const struct TreeParseInfo info, const INT p, INT* x, const INT k )
01338 {
01339 ASSERT( fabs(w) < 1e10 );
01340 ASSERT( x[depth] >= 0 );
01341 ASSERT( x[depth+1] < 0 );
01342 if ( depth < k ) {
01343 return;
01344 }
01345
01346 const INT nofKmers = info.nofsKmers[k];
01347 const DREAL margWeight = w * info.margFactors[ depth-k ];
01348 const INT m_a = depth - k + 1;
01349 const INT m_b = info.num_feat - p;
01350 const INT m = ( m_a < m_b ) ? m_a : m_b;
01351
01352 const INT offset0 = nofKmers * p;
01353 register INT i;
01354 register INT offset;
01355 offset = offset0;
01356 for( i = 0; i < m; ++i ) {
01357 const INT y = info.substrs[i+k+1];
01358 info.C_k[ y + offset ] += margWeight;
01359 offset += nofKmers;
01360 }
01361 if( depth > k ) {
01362
01363 const INT offsR = info.substrs[k+1] + offset0;
01364 info.R_k[offsR] += margWeight;
01365
01366 if( p+depth-k < info.num_feat ) {
01367 const INT offsL = info.substrs[depth+1] + nofKmers * (p+depth-k);
01368 info.L_k[offsL] += margWeight;
01369 }
01370 }
01371
01372
01373
01374
01375
01376
01377
01378
01379
01380
01381
01382
01383
01384
01385
01386 }
01387
01388 template <class Trie>
01389 void CTrie<Trie>::add_to_trie(int i, INT seq_offset, INT * vec, float alpha, DREAL *weights, bool degree_times_position_weights)
01390 {
01391 INT tree = trees[i] ;
01392
01393
01394 INT max_depth = 0 ;
01395 DREAL* weights_column ;
01396 if (degree_times_position_weights)
01397 weights_column = &weights[(i+seq_offset)*degree] ;
01398 else
01399 weights_column = weights ;
01400
01401 if (weights_in_tree)
01402 {
01403 for (INT j=0; (j<degree) && (i+j<length); j++)
01404 if (CMath::abs(weights_column[j]*alpha)>0)
01405 max_depth = j+1 ;
01406 }
01407 else
01408
01409 max_depth=degree ;
01410
01411 for (INT j=0; (j<max_depth) && (i+j+seq_offset<length); j++)
01412 {
01413 TRIE_ASSERT((vec[i+j+seq_offset]>=0) && (vec[i+j+seq_offset]<4)) ;
01414 if ((j<degree-1) && (TreeMem[tree].children[vec[i+j+seq_offset]]!=NO_CHILD))
01415 {
01416 if (TreeMem[tree].children[vec[i+j+seq_offset]]<0)
01417 {
01418
01419 TRIE_ASSERT(j >= degree-16) ;
01420
01421 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01422 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01423 INT node = - TreeMem[tree].children[vec[i+j+seq_offset]] ;
01424
01425 TRIE_ASSERT((node>=0) && (node<=TreeMemPtrMax)) ;
01426 TRIE_ASSERT_EVERYTHING(TreeMem[node].has_seq) ;
01427 TRIE_ASSERT_EVERYTHING(!TreeMem[node].has_floats) ;
01428
01429
01430 INT mismatch_pos = -1 ;
01431 {
01432 INT k ;
01433 for (k=0; (j+k<max_depth) && (i+j+seq_offset+k<length); k++)
01434 {
01435 TRIE_ASSERT((vec[i+j+seq_offset+k]>=0) && (vec[i+j+seq_offset+k]<4)) ;
01436
01437 if ((TreeMem[node].seq[k]>=4) && (TreeMem[node].seq[k]!=TRIE_TERMINAL_CHARACTER))
01438 fprintf(stderr, "+++i=%i j=%i seq[%i]=%i\n", i, j, k, TreeMem[node].seq[k]) ;
01439 TRIE_ASSERT((TreeMem[node].seq[k]<4) || (TreeMem[node].seq[k]==TRIE_TERMINAL_CHARACTER)) ;
01440 TRIE_ASSERT(k<16) ;
01441 if (TreeMem[node].seq[k]!=vec[i+j+seq_offset+k])
01442 {
01443 mismatch_pos=k ;
01444 break ;
01445 }
01446 }
01447 }
01448
01449
01450
01451 if (mismatch_pos==-1)
01452
01453 TreeMem[node].weight+=alpha ;
01454 else
01455
01456
01457
01458
01459 {
01460
01461 INT last_node=tree ;
01462
01463
01464 INT k ;
01465 for (k=0; k<mismatch_pos; k++)
01466 {
01467 TRIE_ASSERT((vec[i+j+seq_offset+k]>=0) && (vec[i+j+seq_offset+k]<4)) ;
01468 TRIE_ASSERT(vec[i+j+seq_offset+k]==TreeMem[node].seq[k]) ;
01469
01470 INT tmp=get_node();
01471 TreeMem[last_node].children[vec[i+j+seq_offset+k]]=tmp ;
01472 last_node=tmp ;
01473 if (weights_in_tree)
01474 TreeMem[last_node].weight = (TreeMem[node].weight+alpha)*weights_column[j+k] ;
01475 else
01476 TreeMem[last_node].weight = (TreeMem[node].weight+alpha) ;
01477 TRIE_ASSERT(j+k!=degree-1) ;
01478 }
01479 if ((TreeMem[node].seq[mismatch_pos]>=4) && (TreeMem[node].seq[mismatch_pos]!=TRIE_TERMINAL_CHARACTER))
01480 fprintf(stderr, "**i=%i j=%i seq[%i]=%i\n", i, j, k, TreeMem[node].seq[mismatch_pos]) ;
01481 ASSERT((TreeMem[node].seq[mismatch_pos]<4) || (TreeMem[node].seq[mismatch_pos]==TRIE_TERMINAL_CHARACTER)) ;
01482 TRIE_ASSERT(vec[i+j+seq_offset+mismatch_pos]!=TreeMem[node].seq[mismatch_pos]) ;
01483
01484 if (j+k==degree-1)
01485 {
01486
01487
01488
01489 for (INT q=0; q<4; q++)
01490 TreeMem[last_node].child_weights[q]=0.0 ;
01491 if (weights_in_tree)
01492 {
01493 if (TreeMem[node].seq[mismatch_pos]<4)
01494 TreeMem[last_node].child_weights[TreeMem[node].seq[mismatch_pos]]+=TreeMem[node].weight*weights_column[degree-1] ;
01495 TreeMem[last_node].child_weights[vec[i+j+seq_offset+k]] += alpha*weights_column[degree-1] ;
01496 }
01497 else
01498 {
01499 if (TreeMem[node].seq[mismatch_pos]<4)
01500 TreeMem[last_node].child_weights[TreeMem[node].seq[mismatch_pos]]=TreeMem[node].weight ;
01501 TreeMem[last_node].child_weights[vec[i+j+seq_offset+k]] = alpha ;
01502 }
01503
01504 #ifdef TRIE_CHECK_EVERYTHING
01505 TreeMem[last_node].has_floats=true ;
01506 #endif
01507 }
01508 else
01509 {
01510
01511 if (TreeMem[node].seq[mismatch_pos]<4)
01512 {
01513 TreeMem[last_node].children[TreeMem[node].seq[mismatch_pos]] = -node ;
01514
01515
01516 for (INT q=0; q<16; q++)
01517 {
01518 if ((j+q+mismatch_pos<degree) && (i+j+seq_offset+q+mismatch_pos<length))
01519 TreeMem[node].seq[q] = TreeMem[node].seq[q+mismatch_pos] ;
01520 else
01521 TreeMem[node].seq[q] = TRIE_TERMINAL_CHARACTER ;
01522 }
01523 #ifdef TRIE_CHECK_EVERYTHING
01524 TreeMem[node].has_seq=true ;
01525 #endif
01526 }
01527
01528
01529 TRIE_ASSERT((vec[i+j+seq_offset+mismatch_pos]>=0) && (vec[i+j+seq_offset+mismatch_pos]<4)) ;
01530 INT tmp = get_node() ;
01531 TreeMem[last_node].children[vec[i+j+seq_offset+mismatch_pos]] = -tmp ;
01532 last_node=tmp ;
01533
01534 TreeMem[last_node].weight = alpha ;
01535 #ifdef TRIE_CHECK_EVERYTHING
01536 TreeMem[last_node].has_seq = true ;
01537 #endif
01538 memset(TreeMem[last_node].seq, TRIE_TERMINAL_CHARACTER, 16) ;
01539 for (INT q=0; (j+q+mismatch_pos<degree) && (i+j+seq_offset+q+mismatch_pos<length); q++)
01540 TreeMem[last_node].seq[q] = vec[i+j+seq_offset+mismatch_pos+q] ;
01541 }
01542 }
01543 break ;
01544 }
01545 else
01546 {
01547 tree=TreeMem[tree].children[vec[i+j+seq_offset]] ;
01548 TRIE_ASSERT((tree>=0) && (tree<TreeMemPtrMax)) ;
01549 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01550 if (weights_in_tree)
01551 TreeMem[tree].weight += alpha*weights_column[j];
01552 else
01553 TreeMem[tree].weight += alpha ;
01554 }
01555 }
01556 else if (j==degree-1)
01557 {
01558
01559 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01560 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats) ;
01561 if (weights_in_tree)
01562 TreeMem[tree].child_weights[vec[i+j+seq_offset]] += alpha*weights_column[j] ;
01563 else
01564 TreeMem[tree].child_weights[vec[i+j+seq_offset]] += alpha;
01565
01566 break;
01567 }
01568 else
01569 {
01570 bool use_seq = use_compact_terminal_nodes && (j>degree-16) ;
01571 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01572 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01573
01574 INT tmp = get_node((j==degree-2) && (!use_seq));
01575 if (use_seq)
01576 TreeMem[tree].children[vec[i+j+seq_offset]] = -tmp ;
01577 else
01578 TreeMem[tree].children[vec[i+j+seq_offset]] = tmp ;
01579 tree=tmp ;
01580
01581 TRIE_ASSERT((tree>=0) && (tree<TreeMemPtrMax)) ;
01582 #ifdef TRIE_CHECK_EVERYTHING
01583 TreeMem[tree].has_seq = use_seq ;
01584 #endif
01585 if (use_seq)
01586 {
01587 TreeMem[tree].weight = alpha ;
01588
01589 memset(TreeMem[tree].seq, TRIE_TERMINAL_CHARACTER, 16) ;
01590 for (INT q=0; (j+q<degree) && (i+j+seq_offset+q<length); q++)
01591 {
01592 TRIE_ASSERT(q<16) ;
01593 TreeMem[tree].seq[q]=vec[i+j+seq_offset+q] ;
01594 }
01595 break ;
01596 }
01597 else
01598 {
01599 if (weights_in_tree)
01600 TreeMem[tree].weight = alpha*weights_column[j] ;
01601 else
01602 TreeMem[tree].weight = alpha ;
01603 #ifdef TRIE_CHECK_EVERYTHING
01604 if (j==degree-2)
01605 TreeMem[tree].has_floats = true ;
01606 #endif
01607 }
01608 }
01609 }
01610 }
01611
01612 template <class Trie>
01613 DREAL CTrie<Trie>::compute_by_tree_helper(INT* vec, INT len, INT seq_pos,
01614 INT tree_pos,
01615 INT weight_pos, DREAL* weights,
01616 bool degree_times_position_weights)
01617 {
01618 INT tree = trees[tree_pos] ;
01619
01620 if ((position_weights!=NULL) && (position_weights[weight_pos]==0))
01621 return 0.0;
01622
01623 DREAL *weights_column=NULL ;
01624 if (degree_times_position_weights)
01625 weights_column=&weights[weight_pos*degree] ;
01626 else
01627 weights_column=weights ;
01628
01629 DREAL sum=0 ;
01630 for (INT j=0; seq_pos+j < len; j++)
01631 {
01632 TRIE_ASSERT((vec[seq_pos+j]<4) && (vec[seq_pos+j]>=0)) ;
01633
01634 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01635 {
01636 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01637 if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01638 {
01639 tree = - TreeMem[tree].children[vec[seq_pos+j]];
01640 TRIE_ASSERT(tree>=0) ;
01641 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
01642 DREAL this_weight=0.0 ;
01643 for (INT k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01644 {
01645 TRIE_ASSERT((vec[seq_pos+j+k]<4) && (vec[seq_pos+j+k]>=0)) ;
01646 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01647 break ;
01648 this_weight += weights_column[j+k] ;
01649 }
01650 sum += TreeMem[tree].weight * this_weight ;
01651 break ;
01652 }
01653 else
01654 {
01655 tree=TreeMem[tree].children[vec[seq_pos+j]];
01656 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01657 if (weights_in_tree)
01658 sum += TreeMem[tree].weight ;
01659 else
01660 sum += TreeMem[tree].weight * weights_column[j] ;
01661 } ;
01662 }
01663 else
01664 {
01665 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01666 if (j==degree-1)
01667 {
01668 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats) ;
01669 if (weights_in_tree)
01670 sum += TreeMem[tree].child_weights[vec[seq_pos+j]] ;
01671 else
01672 sum += TreeMem[tree].child_weights[vec[seq_pos+j]] * weights_column[j] ;
01673 }
01674 else
01675 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01676
01677 break;
01678 }
01679 }
01680
01681 if (position_weights!=NULL)
01682 return sum*position_weights[weight_pos] ;
01683 else
01684 return sum ;
01685 }
01686
01687 template <class Trie>
01688 void CTrie<Trie>::compute_by_tree_helper(INT* vec, INT len,
01689 INT seq_pos, INT tree_pos,
01690 INT weight_pos,
01691 DREAL* LevelContrib, DREAL factor,
01692 INT mkl_stepsize,
01693 DREAL * weights,
01694 bool degree_times_position_weights)
01695 {
01696 INT tree = trees[tree_pos] ;
01697 if (factor==0)
01698 return ;
01699
01700 if (position_weights!=NULL)
01701 {
01702 factor *= position_weights[weight_pos] ;
01703 if (factor==0)
01704 return ;
01705 if (!degree_times_position_weights)
01706 {
01707 for (INT j=0; seq_pos+j<len; j++)
01708 {
01709 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01710 {
01711 if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01712 {
01713 tree = -TreeMem[tree].children[vec[seq_pos+j]];
01714 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
01715 for (INT k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01716 {
01717 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01718 break ;
01719 if (weights_in_tree)
01720 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
01721 else
01722 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k] ;
01723 }
01724 break ;
01725 }
01726 else
01727 {
01728 tree=TreeMem[tree].children[vec[seq_pos+j]];
01729 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01730 if (weights_in_tree)
01731 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
01732 else
01733 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j] ;
01734 }
01735 }
01736 else
01737 {
01738 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01739 if (j==degree-1)
01740 {
01741 if (weights_in_tree)
01742 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ;
01743 else
01744 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j] ;
01745 }
01746 }
01747 }
01748 }
01749 else
01750 {
01751 for (INT j=0; seq_pos+j<len; j++)
01752 {
01753 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01754 {
01755 if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01756 {
01757 tree = -TreeMem[tree].children[vec[seq_pos+j]];
01758 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
01759 for (INT k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01760 {
01761 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01762 break ;
01763 if (weights_in_tree)
01764 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
01765 else
01766 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k+weight_pos*degree] ;
01767 }
01768 break ;
01769 }
01770 else
01771 {
01772 tree=TreeMem[tree].children[vec[seq_pos+j]];
01773 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01774 if (weights_in_tree)
01775 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
01776 else
01777 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+weight_pos*degree] ;
01778 }
01779 }
01780 else
01781 {
01782 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01783 if (j==degree-1)
01784 {
01785 if (weights_in_tree)
01786 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ;
01787 else
01788 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j+weight_pos*degree] ;
01789 }
01790 break ;
01791 }
01792 }
01793 }
01794 }
01795 else if (!degree_times_position_weights)
01796 {
01797 for (INT j=0; seq_pos+j<len; j++)
01798 {
01799 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01800 {
01801 if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01802 {
01803 tree = -TreeMem[tree].children[vec[seq_pos+j]];
01804 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
01805 for (INT k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01806 {
01807 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01808 break ;
01809 if (weights_in_tree)
01810 LevelContrib[(j+k)/mkl_stepsize] += factor*TreeMem[tree].weight ;
01811 else
01812 LevelContrib[(j+k)/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k] ;
01813 }
01814 break ;
01815 }
01816 else
01817 {
01818 tree=TreeMem[tree].children[vec[seq_pos+j]];
01819 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01820 if (weights_in_tree)
01821 LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].weight ;
01822 else
01823 LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j] ;
01824 }
01825 }
01826 else
01827 {
01828 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01829 if (j==degree-1)
01830 {
01831 if (weights_in_tree)
01832 LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ;
01833 else
01834 LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j] ;
01835 }
01836 break ;
01837 }
01838 }
01839 }
01840 else
01841 {
01842
01843
01844
01845
01846
01847
01848
01849
01850
01851
01852
01853
01854
01855
01856
01857
01858
01859
01860 for (INT j=0; seq_pos+j<len; j++)
01861 {
01862 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01863 {
01864 if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01865 {
01866 tree = -TreeMem[tree].children[vec[seq_pos+j]];
01867 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
01868 for (INT k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01869 {
01870 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01871 break ;
01872 if (weights_in_tree)
01873 LevelContrib[(j+k+degree*weight_pos)/mkl_stepsize] += factor*TreeMem[tree].weight ;
01874 else
01875 LevelContrib[(j+k+degree*weight_pos)/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k+weight_pos*degree] ;
01876 }
01877 break ;
01878 }
01879 else
01880 {
01881 tree=TreeMem[tree].children[vec[seq_pos+j]];
01882 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01883 if (weights_in_tree)
01884 LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].weight ;
01885 else
01886 LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].weight * weights[j+weight_pos*degree] ;
01887 }
01888 }
01889 else
01890 {
01891 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01892 if (j==degree-1)
01893 {
01894 if (weights_in_tree)
01895 LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].child_weights[vec[seq_pos+j]] ;
01896 else
01897 LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].child_weights[vec[seq_pos+j]] * weights[j+weight_pos*degree] ;
01898 }
01899 break ;
01900 }
01901 }
01902 }
01903 }
01904
01905 template <class Trie>
01906 void CTrie<Trie>::fill_backtracking_table_recursion(Trie* tree, INT depth, ULONG seq, DREAL value, CDynamicArray<ConsensusEntry>* table, DREAL* weights)
01907 {
01908 DREAL w=1.0;
01909
01910 if (weights_in_tree || depth==0)
01911 value+=tree->weight;
01912 else
01913 {
01914 w=weights[degree-1];
01915 value+=weights[depth-1]*tree->weight;
01916 }
01917
01918 if (degree-1==depth)
01919 {
01920 for (INT sym=0; sym<4; sym++)
01921 {
01922 DREAL v=w*tree->child_weights[sym];
01923 if (v!=0.0)
01924 {
01925 ConsensusEntry entry;
01926 entry.bt=-1;
01927 entry.score=value+v;
01928 entry.string=seq | ((ULONG) sym) << (2*(degree-depth-1));
01929
01930 table->append_element(entry);
01931 }
01932 }
01933 }
01934 else
01935 {
01936 for (INT sym=0; sym<4; sym++)
01937 {
01938 ULONG str=seq | ((ULONG) sym) << (2*(degree-depth-1));
01939 if (tree->children[sym] != NO_CHILD)
01940 fill_backtracking_table_recursion(&TreeMem[tree->children[sym]], depth+1, str, value, table, weights);
01941 }
01942 }
01943 }
01944
01945 template <class Trie>
01946 DREAL CTrie<Trie>::get_cumulative_score(INT pos, ULONG seq, INT deg, DREAL* weights)
01947 {
01948 DREAL result=0.0;
01949
01950
01951
01952 for (INT i=pos; i<pos+deg && i<length; i++)
01953 {
01954
01955 Trie* tree = &TreeMem[trees[i]];
01956
01957 for (INT d=0; d<deg-i+pos; d++)
01958 {
01959
01960 ASSERT(d-1<degree);
01961 INT sym = (INT) (seq >> (2*(deg-1-d-i+pos)) & 3);
01962
01963 DREAL w=1.0;
01964 if (!weights_in_tree)
01965 w=weights[d];
01966
01967 ASSERT(tree->children[sym] != NO_CHILD);
01968 tree=&TreeMem[tree->children[sym]];
01969 result+=w*tree->weight;
01970 }
01971 }
01972
01973 return result;
01974 }
01975
01976 template <class Trie>
01977 void CTrie<Trie>::fill_backtracking_table(INT pos, CDynamicArray<ConsensusEntry>* prev, CDynamicArray<ConsensusEntry>* cur, bool cumulative, DREAL* weights)
01978 {
01979 ASSERT(pos>=0 && pos<length);
01980 ASSERT(!use_compact_terminal_nodes);
01981
01982 Trie* t = &TreeMem[trees[pos]];
01983
01984 fill_backtracking_table_recursion(t, 0, (ULONG) 0, 0.0, cur, weights);
01985
01986
01987 if (cumulative)
01988 {
01989 INT num_cur=cur->get_num_elements();
01990 for (INT i=0; i<num_cur; i++)
01991 {
01992 ConsensusEntry entry=cur->get_element(i);
01993 entry.score+=get_cumulative_score(pos+1, entry.string, degree-1, weights);
01994 cur->set_element(entry,i);
01995
01996 }
01997 }
01998
01999
02000
02001 if (prev)
02002 {
02003 INT num_cur=cur->get_num_elements();
02004 INT num_prev=prev->get_num_elements();
02005
02006 for (INT i=0; i<num_cur; i++)
02007 {
02008
02009 ULONG str_cur= cur->get_element(i).string >> 2;
02010
02011
02012 INT bt=-1;
02013 DREAL max_score=0.0;
02014
02015 for (INT j=0; j<num_prev; j++)
02016 {
02017
02018 ULONG mask=((((ULONG)0)-1) ^ (((ULONG) 3) << (2*(degree-1))));
02019 ULONG str_prev= mask & prev->get_element(j).string;
02020
02021
02022 if (str_cur == str_prev)
02023 {
02024 DREAL sc=prev->get_element(j).score+cur->get_element(i).score;
02025 if (bt==-1 || sc>max_score)
02026 {
02027 bt=j;
02028 max_score=sc;
02029
02030
02031 }
02032 }
02033 }
02034
02035 ASSERT(bt!=-1);
02036 ConsensusEntry entry;
02037 entry.bt=bt;
02038 entry.score=max_score;
02039 entry.string=cur->get_element(i).string;
02040 cur->set_element(entry, i);
02041
02042 }
02043 }
02044 }
02045 #endif // _TRIE_H___