00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #ifndef _TRIE_H___
00013 #define _TRIE_H___
00014
00015 #include <string.h>
00016 #include "lib/common.h"
00017 #include "lib/io.h"
00018 #include "lib/DynamicArray.h"
00019 #include "lib/Mathematics.h"
00020 #include "base/SGObject.h"
00021
00022
00023 #define NO_CHILD ((int32_t)-1073741824)
00024
00025 #define WEIGHTS_IN_TRIE
00026
00027
00028 #ifdef TRIE_CHECK_EVERYTHING
00029 #define TRIE_ASSERT_EVERYTHING(x) ASSERT(x)
00030 #else
00031 #define TRIE_ASSERT_EVERYTHING(x)
00032 #endif
00033
00034
00035 #define TRIE_ASSERT(x)
00036
00037 #define TRIE_TERMINAL_CHARACTER 7
00038
00040 struct ConsensusEntry
00041 {
00043 uint64_t string;
00045 float32_t score;
00047 int32_t bt;
00048 };
00049
00051 struct POIMTrie
00052 {
00054 float64_t weight;
00055 #ifdef TRIE_CHECK_EVERYTHING
00056
00057 bool has_seq;
00059 bool has_floats;
00060 #endif
00061 union
00062 {
00064 float32_t child_weights[4];
00066 int32_t children[4];
00068 uint8_t seq[16] ;
00069 };
00070
00072 float64_t S;
00074 float64_t L;
00076 float64_t R;
00077 };
00078
00080 struct DNATrie
00081 {
00083 float64_t weight;
00084 #ifdef TRIE_CHECK_EVERYTHING
00085
00086 bool has_seq;
00088 bool has_floats;
00089 #endif
00090 union
00091 {
00093 float32_t child_weights[4];
00095 int32_t children[4];
00097 uint8_t seq[16] ;
00098 };
00099 };
00100
00102 struct TreeParseInfo {
00104 int32_t num_sym;
00106 int32_t num_feat;
00108 int32_t p;
00110 int32_t k;
00112 int32_t* nofsKmers;
00114 float64_t* margFactors;
00116 int32_t* x;
00118 int32_t* substrs;
00120 int32_t y0;
00122 float64_t* C_k;
00124 float64_t* L_k;
00126 float64_t* R_k;
00127 };
00128
00129
00130 template <class Trie> class CTrie;
00131
00148 template <class Trie> class CTrie : public CSGObject
00149 {
00150 public:
00157 CTrie(int32_t d, bool p_use_compact_terminal_nodes=true);
00158
00160 CTrie(const CTrie & to_copy);
00161 ~CTrie();
00162
00164 const CTrie & operator=(const CTrie & to_copy);
00165
00173 bool compare_traverse(
00174 int32_t node, const CTrie & other, int32_t other_node);
00175
00181 bool compare(const CTrie & other);
00182
00189 bool find_node(int32_t node, int32_t * trace, int32_t &trace_len) const;
00190
00197 int32_t find_deepest_node(
00198 int32_t start_node, int32_t &deepest_node) const;
00199
00204 void display_node(int32_t node) const;
00205
00207 void destroy();
00208
00213 void set_degree(int32_t d);
00214
00221 void create(int32_t len, bool p_use_compact_terminal_nodes=true);
00222
00228 void delete_trees(bool p_use_compact_terminal_nodes=true);
00229
00240 void add_to_trie(
00241 int32_t i, int32_t seq_offset, int32_t* vec, float32_t alpha,
00242 float64_t *weights, bool degree_times_position_weights);
00243
00250 float64_t compute_abs_weights_tree(int32_t tree, int32_t depth);
00251
00257 float64_t* compute_abs_weights(int32_t &len);
00258
00271 float64_t compute_by_tree_helper(
00272 int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
00273 int32_t weight_pos, float64_t * weights,
00274 bool degree_times_position_weights) ;
00275
00290 void compute_by_tree_helper(
00291 int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
00292 int32_t weight_pos, float64_t* LevelContrib, float64_t factor,
00293 int32_t mkl_stepsize, float64_t * weights,
00294 bool degree_times_position_weights);
00295
00310 void compute_scoring_helper(
00311 int32_t tree, int32_t i, int32_t j, float64_t weight, int32_t d,
00312 int32_t max_degree, int32_t num_feat, int32_t num_sym,
00313 int32_t sym_offset, int32_t offs, float64_t* result);
00314
00327 void add_example_to_tree_mismatch_recursion(
00328 int32_t tree, int32_t i, float64_t alpha, int32_t *vec,
00329 int32_t len_rem, int32_t degree_rec, int32_t mismatch_rec,
00330 int32_t max_mismatch, float64_t * weights);
00331
00341 void traverse(
00342 int32_t tree, const int32_t p, struct TreeParseInfo info,
00343 const int32_t depth, int32_t* const x, const int32_t k);
00344
00354 void count(
00355 const float64_t w, const int32_t depth,
00356 const struct TreeParseInfo info, const int32_t p, int32_t* x,
00357 const int32_t k);
00358
00365 int32_t compact_nodes(int32_t start_node, int32_t depth, float64_t * weights);
00366
00375 float64_t get_cumulative_score(
00376 int32_t pos, uint64_t seq, int32_t deg, float64_t* weights);
00377
00387 void fill_backtracking_table_recursion(
00388 Trie* tree, int32_t depth, uint64_t seq, float64_t value,
00389 CDynamicArray<ConsensusEntry>* table, float64_t* weights);
00390
00399 void fill_backtracking_table(
00400 int32_t pos, CDynamicArray<ConsensusEntry>* prev,
00401 CDynamicArray<ConsensusEntry>* cur, bool cumulative,
00402 float64_t* weights);
00403
00409 void POIMs_extract_W(float64_t* const* const W, const int32_t K);
00410
00415 void POIMs_precalc_SLR(const float64_t* const distrib);
00416
00426 void POIMs_get_SLR(
00427 const int32_t parentIdx, const int32_t sym, const int32_t depth,
00428 float64_t* S, float64_t* L, float64_t* R);
00429
00436 void POIMs_add_SLR(
00437 float64_t* const* const poims, const int32_t K,
00438 const int32_t debug);
00439
00444 inline bool get_use_compact_terminal_nodes()
00445 {
00446 return use_compact_terminal_nodes ;
00447 }
00448
00454 inline void set_use_compact_terminal_nodes(
00455 bool p_use_compact_terminal_nodes)
00456 {
00457 use_compact_terminal_nodes=p_use_compact_terminal_nodes ;
00458 }
00459
00464 inline int32_t get_num_used_nodes()
00465 {
00466 return TreeMemPtr;
00467 }
00468
00473 inline void set_position_weights(const float64_t * p_position_weights)
00474 {
00475 position_weights=p_position_weights;
00476 }
00477
00482 inline int32_t get_node(bool last_node=false)
00483 {
00484 int32_t ret = TreeMemPtr++;
00485 check_treemem() ;
00486
00487 if (last_node)
00488 {
00489 for (int32_t q=0; q<4; q++)
00490 TreeMem[ret].child_weights[q]=0.0;
00491 }
00492 else
00493 {
00494 for (int32_t q=0; q<4; q++)
00495 TreeMem[ret].children[q]=NO_CHILD;
00496 }
00497 #ifdef TRIE_CHECK_EVERYTHING
00498 TreeMem[ret].has_seq=false ;
00499 TreeMem[ret].has_floats=false ;
00500 #endif
00501 TreeMem[ret].weight=0.0;
00502 return ret ;
00503 }
00504
00506 inline void check_treemem()
00507 {
00508 if (TreeMemPtr+10 < TreeMemPtrMax)
00509 return;
00510 SG_DEBUG( "Extending TreeMem from %i to %i elements\n",
00511 TreeMemPtrMax, (int32_t) ((float64_t)TreeMemPtrMax*1.2));
00512 TreeMemPtrMax = (int32_t) ((float64_t)TreeMemPtrMax*1.2);
00513 TreeMem = (Trie*) realloc(TreeMem,
00514 TreeMemPtrMax*sizeof(Trie));
00515 if (!TreeMem)
00516 SG_ERROR( "out of memory\n");
00517 }
00518
00523 inline void set_weights_in_tree(bool weights_in_tree_)
00524 {
00525 weights_in_tree = weights_in_tree_;
00526 }
00527
00532 inline bool get_weights_in_tree()
00533 {
00534 return weights_in_tree;
00535 }
00536
00546 void POIMs_extract_W_helper(
00547 const int32_t nodeIdx, const int32_t depth, const int32_t offset,
00548 const int32_t y0, float64_t* const* const W, const int32_t K);
00549
00562 void POIMs_calc_SLR_helper1(
00563 const float64_t* const distrib, const int32_t i,
00564 const int32_t nodeIdx, int32_t left_tries_idx[4],
00565 const int32_t depth, int32_t const lastSym, float64_t* S,
00566 float64_t* L, float64_t* R);
00567
00568
00579 void POIMs_calc_SLR_helper2(
00580 const float64_t* const distrib, const int32_t i,
00581 const int32_t nodeIdx, int32_t left_tries_idx[4],
00582 const int32_t depth, float64_t* S, float64_t* L, float64_t* R);
00583
00594 void POIMs_add_SLR_helper1(
00595 const int32_t nodeIdx, const int32_t depth,const int32_t i,
00596 const int32_t y0, float64_t* const* const poims, const int32_t K,
00597 const int32_t debug);
00598
00612 void POIMs_add_SLR_helper2(
00613 float64_t* const* const poims, const int32_t K, const int32_t k,
00614 const int32_t i, const int32_t y, const float64_t valW,
00615 const float64_t valS, const float64_t valL, const float64_t valR,
00616 const int32_t debug);
00617
00618 public:
00620 int32_t NUM_SYMS;
00621
00622 protected:
00624 int32_t length;
00626 int32_t * trees;
00627
00629 int32_t degree;
00631 float64_t const * position_weights;
00632
00634 Trie* TreeMem;
00636 int32_t TreeMemPtr;
00638 int32_t TreeMemPtrMax;
00640 bool use_compact_terminal_nodes;
00641
00643 bool weights_in_tree;
00644
00646 int32_t* nofsKmers;
00647 };
00648
00649 template <class Trie>
00650 CTrie<Trie>::CTrie(int32_t d, bool p_use_compact_terminal_nodes)
00651 : CSGObject(), degree(d), position_weights(NULL),
00652 use_compact_terminal_nodes(p_use_compact_terminal_nodes),
00653 weights_in_tree(true)
00654 {
00655 TreeMemPtrMax=1024*1024/sizeof(Trie);
00656 TreeMemPtr=0;
00657 TreeMem=(Trie*)malloc(TreeMemPtrMax*sizeof(Trie));
00658
00659 length=0;
00660 trees=NULL;
00661
00662 NUM_SYMS=4;
00663 }
00664
00665 template <class Trie>
00666 CTrie<Trie>::CTrie(const CTrie & to_copy)
00667 : CSGObject(to_copy), degree(to_copy.degree), position_weights(NULL),
00668 use_compact_terminal_nodes(to_copy.use_compact_terminal_nodes)
00669 {
00670 if (to_copy.position_weights!=NULL)
00671 {
00672 position_weights = to_copy.position_weights;
00673
00674
00675
00676 }
00677 else
00678 position_weights=NULL;
00679
00680 TreeMemPtrMax=to_copy.TreeMemPtrMax;
00681 TreeMemPtr=to_copy.TreeMemPtr;
00682 TreeMem=(Trie*)malloc(TreeMemPtrMax*sizeof(Trie));
00683 memcpy(TreeMem, to_copy.TreeMem, TreeMemPtrMax*sizeof(Trie));
00684
00685 length=to_copy.length;
00686 trees=new int32_t[length];
00687 for (int32_t i=0; i<length; i++)
00688 trees[i]=to_copy.trees[i];
00689
00690 NUM_SYMS=4;
00691 }
00692
00693 template <class Trie>
00694 const CTrie<Trie> &CTrie<Trie>::operator=(const CTrie<Trie> & to_copy)
00695 {
00696 degree=to_copy.degree ;
00697 use_compact_terminal_nodes=to_copy.use_compact_terminal_nodes ;
00698
00699 delete[] position_weights ;
00700 position_weights=NULL ;
00701 if (to_copy.position_weights!=NULL)
00702 {
00703 position_weights=to_copy.position_weights ;
00704
00705
00706
00707 }
00708 else
00709 position_weights=NULL ;
00710
00711 TreeMemPtrMax=to_copy.TreeMemPtrMax ;
00712 TreeMemPtr=to_copy.TreeMemPtr ;
00713 free(TreeMem) ;
00714 TreeMem = (Trie*)malloc(TreeMemPtrMax*sizeof(Trie)) ;
00715 memcpy(TreeMem, to_copy.TreeMem, TreeMemPtrMax*sizeof(Trie)) ;
00716
00717 length = to_copy.length ;
00718 if (trees)
00719 delete[] trees ;
00720 trees=new int32_t[length] ;
00721 for (int32_t i=0; i<length; i++)
00722 trees[i]=to_copy.trees[i] ;
00723
00724 return *this ;
00725 }
00726
00727 template <class Trie>
00728 int32_t CTrie<Trie>::find_deepest_node(
00729 int32_t start_node, int32_t& deepest_node) const
00730 {
00731 #ifdef TRIE_CHECK_EVERYTHING
00732 int32_t ret=0 ;
00733 SG_DEBUG("start_node=%i\n", start_node) ;
00734
00735 if (start_node==NO_CHILD)
00736 {
00737 for (int32_t i=0; i<length; i++)
00738 {
00739 int32_t my_deepest_node ;
00740 int32_t depth=find_deepest_node(i, my_deepest_node) ;
00741 SG_DEBUG("start_node %i depth=%i\n", i, depth) ;
00742 if (depth>ret)
00743 {
00744 deepest_node=my_deepest_node ;
00745 ret=depth ;
00746 }
00747 }
00748 return ret ;
00749 }
00750
00751 if (TreeMem[start_node].has_seq)
00752 {
00753 for (int32_t q=0; q<16; q++)
00754 if (TreeMem[start_node].seq[q]!=TRIE_TERMINAL_CHARACTER)
00755 ret++ ;
00756 deepest_node=start_node ;
00757 return ret ;
00758 }
00759 if (TreeMem[start_node].has_floats)
00760 {
00761 deepest_node=start_node ;
00762 return 1 ;
00763 }
00764
00765 for (int32_t q=0; q<4; q++)
00766 {
00767 int32_t my_deepest_node ;
00768 if (TreeMem[start_node].children[q]==NO_CHILD)
00769 continue ;
00770 int32_t depth=find_deepest_node(abs(TreeMem[start_node].children[q]), my_deepest_node) ;
00771 if (depth>ret)
00772 {
00773 deepest_node=my_deepest_node ;
00774 ret=depth ;
00775 }
00776 }
00777 return ret ;
00778 #else
00779 SG_ERROR( "not implemented\n") ;
00780 return 0 ;
00781 #endif
00782 }
00783
00784 template <class Trie>
00785 int32_t CTrie<Trie>::compact_nodes(
00786 int32_t start_node, int32_t depth, float64_t * weights)
00787 {
00788 SG_ERROR( "code buggy\n") ;
00789
00790 int32_t ret=0 ;
00791
00792 if (start_node==NO_CHILD)
00793 {
00794 for (int32_t i=0; i<length; i++)
00795 compact_nodes(i,1, weights) ;
00796 return 0 ;
00797 }
00798 if (start_node<0)
00799 return -1 ;
00800
00801 if (depth==degree-1)
00802 {
00803 TRIE_ASSERT_EVERYTHING(TreeMem[start_node].has_floats) ;
00804 int32_t num_used=0 ;
00805 for (int32_t q=0; q<4; q++)
00806 if (TreeMem[start_node].child_weights[q]!=0.0)
00807 num_used++ ;
00808 if (num_used>1)
00809 return -1 ;
00810 return 1 ;
00811 }
00812 TRIE_ASSERT_EVERYTHING(!TreeMem[start_node].has_floats) ;
00813
00814 int32_t num_used = 0 ;
00815 int32_t q_used=-1 ;
00816
00817 for (int32_t q=0; q<4; q++)
00818 {
00819 if (TreeMem[start_node].children[q]==NO_CHILD)
00820 continue ;
00821 num_used++ ;
00822 q_used=q ;
00823 }
00824 if (num_used>1)
00825 {
00826 if (depth>=degree-2)
00827 return -1 ;
00828 for (int32_t q=0; q<4; q++)
00829 {
00830 if (TreeMem[start_node].children[q]==NO_CHILD)
00831 continue ;
00832 int32_t num=compact_nodes(abs(TreeMem[start_node].children[q]), depth+1, weights) ;
00833 if (num<=2)
00834 continue ;
00835 int32_t node=get_node() ;
00836
00837 int32_t last_node=TreeMem[start_node].children[q] ;
00838 if (weights_in_tree)
00839 {
00840 ASSERT(weights[depth]!=0.0) ;
00841 TreeMem[node].weight=TreeMem[last_node].weight/weights[depth] ;
00842 }
00843 else
00844 TreeMem[node].weight=TreeMem[last_node].weight ;
00845
00846 #ifdef TRIE_CHECK_EVERYTHING
00847 TreeMem[node].has_seq=true ;
00848 #endif
00849 memset(TreeMem[node].seq, TRIE_TERMINAL_CHARACTER, 16) ;
00850 for (int32_t n=0; n<num; n++)
00851 {
00852 ASSERT(depth+n+1<=degree-1) ;
00853 ASSERT(last_node!=NO_CHILD) ;
00854 if (depth+n+1==degree-1)
00855 {
00856 TRIE_ASSERT_EVERYTHING(TreeMem[last_node].has_floats) ;
00857 int32_t k ;
00858 for (k=0; k<4; k++)
00859 if (TreeMem[last_node].child_weights[k]!=0.0)
00860 break ;
00861 if (k==4)
00862 break ;
00863 TreeMem[node].seq[n]=k ;
00864 break ;
00865 }
00866 else
00867 {
00868 TRIE_ASSERT_EVERYTHING(!TreeMem[last_node].has_floats) ;
00869 int32_t k ;
00870 for (k=0; k<4; k++)
00871 if (TreeMem[last_node].children[k]!=NO_CHILD)
00872 break ;
00873 if (k==4)
00874 break ;
00875 TreeMem[node].seq[n]=k ;
00876 last_node=TreeMem[last_node].children[k] ;
00877 }
00878 }
00879 TreeMem[start_node].children[q]=-node ;
00880 }
00881 return -1 ;
00882 }
00883 if (num_used==0)
00884 return 0 ;
00885
00886 ret=compact_nodes(abs(TreeMem[start_node].children[q_used]), depth+1, weights) ;
00887 if (ret<0)
00888 return ret ;
00889 return ret+1 ;
00890 }
00891
00892
00893 template <class Trie>
00894 bool CTrie<Trie>::compare_traverse(
00895 int32_t node, const CTrie<Trie> & other, int32_t other_node)
00896 {
00897 SG_DEBUG("checking nodes %i and %i\n", node, other_node) ;
00898 if (fabs(TreeMem[node].weight-other.TreeMem[other_node].weight)>=1e-5)
00899 {
00900 SG_DEBUG( "CTrie::compare: TreeMem[%i].weight=%f!=other.TreeMem[%i].weight=%f\n", node, TreeMem[node].weight, other_node,other.TreeMem[other_node].weight) ;
00901 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;
00902 display_node(node) ;
00903 SG_DEBUG( "============================================================\n") ;
00904 other.display_node(other_node) ;
00905 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;
00906 return false ;
00907 }
00908
00909 #ifdef TRIE_CHECK_EVERYTHING
00910 if (TreeMem[node].has_seq!=other.TreeMem[other_node].has_seq)
00911 {
00912 SG_DEBUG( "CTrie::compare: TreeMem[%i].has_seq=%i!=other.TreeMem[%i].has_seq=%i\n", node, TreeMem[node].has_seq, other_node,other.TreeMem[other_node].has_seq) ;
00913 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;
00914 display_node(node) ;
00915 SG_DEBUG( "============================================================\n") ;
00916 other.display_node(other_node) ;
00917 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;
00918 return false ;
00919 }
00920 if (TreeMem[node].has_floats!=other.TreeMem[other_node].has_floats)
00921 {
00922 SG_DEBUG( "CTrie::compare: TreeMem[%i].has_floats=%i!=other.TreeMem[%i].has_floats=%i\n", node, TreeMem[node].has_floats, other_node, other.TreeMem[other_node].has_floats) ;
00923 return false ;
00924 }
00925 if (other.TreeMem[other_node].has_floats)
00926 {
00927 for (int32_t q=0; q<4; q++)
00928 if (fabs(TreeMem[node].child_weights[q]-other.TreeMem[other_node].child_weights[q])>1e-5)
00929 {
00930 SG_DEBUG( "CTrie::compare: TreeMem[%i].child_weights[%i]=%e!=other.TreeMem[%i].child_weights[%i]=%e\n", node, q,TreeMem[node].child_weights[q], other_node,q,other.TreeMem[other_node].child_weights[q]) ;
00931 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;
00932 display_node(node) ;
00933 SG_DEBUG( "============================================================\n") ;
00934 other.display_node(other_node) ;
00935 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;
00936 return false ;
00937 }
00938 }
00939 if (other.TreeMem[other_node].has_seq)
00940 {
00941 for (int32_t q=0; q<16; q++)
00942 if ((TreeMem[node].seq[q]!=other.TreeMem[other_node].seq[q]) && ((TreeMem[node].seq[q]<4)||(other.TreeMem[other_node].seq[q]<4)))
00943 {
00944 SG_DEBUG( "CTrie::compare: TreeMem[%i].seq[%i]=%i!=other.TreeMem[%i].seq[%i]=%i\n", node,q,TreeMem[node].seq[q], other_node,q,other.TreeMem[other_node].seq[q]) ;
00945 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;
00946 display_node(node) ;
00947 SG_DEBUG( "============================================================\n") ;
00948 other.display_node(other_node) ;
00949 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;
00950 return false ;
00951 }
00952 }
00953 if (!other.TreeMem[other_node].has_seq && !other.TreeMem[other_node].has_floats)
00954 {
00955 for (int32_t q=0; q<4; q++)
00956 {
00957 if ((TreeMem[node].children[q]==NO_CHILD) && (other.TreeMem[other_node].children[q]==NO_CHILD))
00958 continue ;
00959 if ((TreeMem[node].children[q]==NO_CHILD)!=(other.TreeMem[other_node].children[q]==NO_CHILD))
00960 {
00961 SG_DEBUG( "CTrie::compare: TreeMem[%i].children[%i]=%i!=other.TreeMem[%i].children[%i]=%i\n", node,q,TreeMem[node].children[q], other_node,q,other.TreeMem[other_node].children[q]) ;
00962 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;
00963 display_node(node) ;
00964 SG_DEBUG( "============================================================\n") ;
00965 other.display_node(other_node) ;
00966 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;
00967 return false ;
00968 }
00969 if (!compare_traverse(abs(TreeMem[node].children[q]), other, abs(other.TreeMem[other_node].children[q])))
00970 return false ;
00971 }
00972 }
00973 #else
00974 SG_ERROR( "not implemented\n") ;
00975 #endif
00976
00977 return true ;
00978 }
00979
00980 template <class Trie>
00981 bool CTrie<Trie>::compare(const CTrie<Trie> & other)
00982 {
00983 bool ret=true ;
00984 for (int32_t i=0; i<length; i++)
00985 if (!compare_traverse(trees[i], other, other.trees[i]))
00986 return false ;
00987 else
00988 SG_DEBUG("two tries at %i identical\n", i) ;
00989
00990 return ret ;
00991 }
00992
00993 template <class Trie>
00994 bool CTrie<Trie>::find_node(
00995 int32_t node, int32_t * trace, int32_t& trace_len) const
00996 {
00997 #ifdef TRIE_CHECK_EVERYTHING
00998 ASSERT(trace_len-1>=0) ;
00999 ASSERT((trace[trace_len-1]>=0) && (trace[trace_len-1]<TreeMemPtrMax))
01000 if (TreeMem[trace[trace_len-1]].has_seq)
01001 return false ;
01002 if (TreeMem[trace[trace_len-1]].has_floats)
01003 return false ;
01004
01005 for (int32_t q=0; q<4; q++)
01006 {
01007 if (TreeMem[trace[trace_len-1]].children[q]==NO_CHILD)
01008 continue ;
01009 int32_t tl=trace_len+1 ;
01010 if (TreeMem[trace[trace_len-1]].children[q]>=0)
01011 trace[trace_len]=TreeMem[trace[trace_len-1]].children[q] ;
01012 else
01013 trace[trace_len]=-TreeMem[trace[trace_len-1]].children[q] ;
01014
01015 if (trace[trace_len]==node)
01016 {
01017 trace_len=tl ;
01018 return true ;
01019 }
01020 if (find_node(node, trace, tl))
01021 {
01022 trace_len=tl ;
01023 return true ;
01024 }
01025 }
01026 trace_len=0 ;
01027 return false ;
01028 #else
01029 SG_ERROR( "not implemented\n") ;
01030 return false ;
01031 #endif
01032 }
01033
01034 template <class Trie>
01035 void CTrie<Trie>::display_node(int32_t node) const
01036 {
01037 #ifdef TRIE_CHECK_EVERYTHING
01038 int32_t * trace=new int32_t[2*degree] ;
01039 int32_t trace_len=-1 ;
01040 bool found = false ;
01041 int32_t tree=-1 ;
01042 for (tree=0; tree<length; tree++)
01043 {
01044 trace[0]=trees[tree] ;
01045 trace_len=1 ;
01046 found=find_node(node, trace, trace_len) ;
01047 if (found)
01048 break ;
01049 }
01050 ASSERT(found) ;
01051 SG_PRINT( "position %i trace: ", tree) ;
01052
01053 for (int32_t i=0; i<trace_len-1; i++)
01054 {
01055 int32_t branch=-1 ;
01056 for (int32_t q=0; q<4; q++)
01057 if (abs(TreeMem[trace[i]].children[q])==trace[i+1])
01058 {
01059 branch=q;
01060 break ;
01061 }
01062 ASSERT(branch!=-1) ;
01063 char acgt[5]="ACGT" ;
01064 SG_PRINT( "%c", acgt[branch]) ;
01065 }
01066 SG_PRINT( "\nnode=%i\nweight=%f\nhas_seq=%i\nhas_floats=%i\n", node, TreeMem[node].weight, TreeMem[node].has_seq, TreeMem[node].has_floats) ;
01067 if (TreeMem[node].has_floats)
01068 {
01069 for (int32_t q=0; q<4; q++)
01070 SG_PRINT( "child_weighs[%i] = %f\n", q, TreeMem[node].child_weights[q]) ;
01071 }
01072 if (TreeMem[node].has_seq)
01073 {
01074 for (int32_t q=0; q<16; q++)
01075 SG_PRINT( "seq[%i] = %i\n", q, TreeMem[node].seq[q]) ;
01076 }
01077 if (!TreeMem[node].has_seq && !TreeMem[node].has_floats)
01078 {
01079 for (int32_t q=0; q<4; q++)
01080 {
01081 if (TreeMem[node].children[q]!=NO_CHILD)
01082 {
01083 SG_PRINT( "children[%i] = %i -> \n", q, TreeMem[node].children[q]) ;
01084 display_node(abs(TreeMem[node].children[q])) ;
01085 }
01086 else
01087 SG_PRINT( "children[%i] = NO_CHILD -| \n", q, TreeMem[node].children[q]) ;
01088 }
01089
01090 }
01091
01092 delete[] trace ;
01093 #else
01094 SG_ERROR( "not implemented\n") ;
01095 #endif
01096 }
01097
01098
01099 template <class Trie> CTrie<Trie>::~CTrie()
01100 {
01101 destroy() ;
01102
01103 free(TreeMem) ;
01104 }
01105
01106 template <class Trie> void CTrie<Trie>::destroy()
01107 {
01108 if (trees!=NULL)
01109 {
01110 delete_trees();
01111 for (int32_t i=0; i<length; i++)
01112 trees[i] = NO_CHILD;
01113 delete[] trees;
01114
01115 TreeMemPtr=0;
01116 length=0;
01117 trees=NULL;
01118 }
01119 }
01120
01121 template <class Trie> void CTrie<Trie>::set_degree(int32_t d)
01122 {
01123 delete_trees(get_use_compact_terminal_nodes());
01124 degree=d;
01125 }
01126
01127 template <class Trie> void CTrie<Trie>::create(
01128 int32_t len, bool p_use_compact_terminal_nodes)
01129 {
01130 destroy();
01131
01132 trees=new int32_t[len] ;
01133 TreeMemPtr=0 ;
01134 for (int32_t i=0; i<len; i++)
01135 trees[i]=get_node(degree==1);
01136 length = len ;
01137
01138 use_compact_terminal_nodes=p_use_compact_terminal_nodes ;
01139 }
01140
01141
01142 template <class Trie> void CTrie<Trie>::delete_trees(
01143 bool p_use_compact_terminal_nodes)
01144 {
01145 if (trees==NULL)
01146 return;
01147
01148 TreeMemPtr=0 ;
01149 for (int32_t i=0; i<length; i++)
01150 trees[i]=get_node(degree==1);
01151
01152 use_compact_terminal_nodes=p_use_compact_terminal_nodes ;
01153 }
01154
01155 template <class Trie>
01156 float64_t CTrie<Trie>::compute_abs_weights_tree(int32_t tree, int32_t depth)
01157 {
01158 float64_t ret=0 ;
01159
01160 if (tree==NO_CHILD)
01161 return 0 ;
01162 TRIE_ASSERT(tree>=0) ;
01163
01164 if (depth==degree-2)
01165 {
01166 ret+=(TreeMem[tree].weight) ;
01167
01168 for (int32_t k=0; k<4; k++)
01169 ret+=(TreeMem[tree].child_weights[k]) ;
01170
01171 return ret ;
01172 }
01173
01174 ret+=(TreeMem[tree].weight) ;
01175
01176 for (int32_t i=0; i<4; i++)
01177 if (TreeMem[tree].children[i]!=NO_CHILD)
01178 ret += compute_abs_weights_tree(TreeMem[tree].children[i], depth+1) ;
01179
01180 return ret ;
01181 }
01182
01183
01184 template <class Trie>
01185 float64_t *CTrie<Trie>::compute_abs_weights(int32_t &len)
01186 {
01187 float64_t * sum=new float64_t[length*4] ;
01188 for (int32_t i=0; i<length*4; i++)
01189 sum[i]=0 ;
01190 len=length ;
01191
01192 for (int32_t i=0; i<length; i++)
01193 {
01194 TRIE_ASSERT(trees[i]!=NO_CHILD) ;
01195 for (int32_t k=0; k<4; k++)
01196 {
01197 sum[i*4+k]=compute_abs_weights_tree(TreeMem[trees[i]].children[k], 0) ;
01198 }
01199 }
01200
01201 return sum ;
01202 }
01203
01204 template <class Trie>
01205 void CTrie<Trie>::add_example_to_tree_mismatch_recursion(
01206 int32_t tree, int32_t i, float64_t alpha,
01207 int32_t *vec, int32_t len_rem,
01208 int32_t degree_rec, int32_t mismatch_rec,
01209 int32_t max_mismatch, float64_t * weights)
01210 {
01211 if (tree==NO_CHILD)
01212 tree=trees[i] ;
01213 TRIE_ASSERT(tree!=NO_CHILD) ;
01214
01215 if ((len_rem<=0) || (mismatch_rec>max_mismatch) || (degree_rec>degree))
01216 return ;
01217 const int32_t other[4][3] = { {1,2,3},{0,2,3},{0,1,3},{0,1,2} } ;
01218
01219 int32_t subtree = NO_CHILD ;
01220
01221 if (degree_rec==degree-1)
01222 {
01223 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats) ;
01224 if (weights_in_tree)
01225 TreeMem[tree].child_weights[vec[0]] += alpha*weights[degree_rec+degree*mismatch_rec];
01226 else
01227 if (weights[degree_rec]!=0.0)
01228 TreeMem[tree].child_weights[vec[0]] += alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec];
01229 if (mismatch_rec+1<=max_mismatch)
01230 for (int32_t o=0; o<3; o++)
01231 {
01232 if (weights_in_tree)
01233 TreeMem[tree].child_weights[other[vec[0]][o]] += alpha*weights[degree_rec+degree*(mismatch_rec+1)];
01234 else
01235 if (weights[degree_rec]!=0.0)
01236 TreeMem[tree].child_weights[other[vec[0]][o]] += alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec];
01237 }
01238 return ;
01239 }
01240 else
01241 {
01242 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01243 if (TreeMem[tree].children[vec[0]]!=NO_CHILD)
01244 {
01245 subtree=TreeMem[tree].children[vec[0]] ;
01246 if (weights_in_tree)
01247 TreeMem[subtree].weight += alpha*weights[degree_rec+degree*mismatch_rec];
01248 else
01249 if (weights[degree_rec]!=0.0)
01250 TreeMem[subtree].weight += alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec];
01251 }
01252 else
01253 {
01254 int32_t tmp = get_node(degree_rec==degree-2);
01255 ASSERT(tmp>=0) ;
01256 TreeMem[tree].children[vec[0]]=tmp ;
01257 subtree=tmp ;
01258 #ifdef TRIE_CHECK_EVERYTHING
01259 if (degree_rec==degree-2)
01260 TreeMem[subtree].has_floats=true ;
01261 #endif
01262 if (weights_in_tree)
01263 TreeMem[subtree].weight = alpha*weights[degree_rec+degree*mismatch_rec] ;
01264 else
01265 if (weights[degree_rec]!=0.0)
01266 TreeMem[subtree].weight = alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec] ;
01267 else
01268 TreeMem[subtree].weight = 0.0 ;
01269 }
01270 add_example_to_tree_mismatch_recursion(subtree, i, alpha,
01271 &vec[1], len_rem-1,
01272 degree_rec+1, mismatch_rec, max_mismatch, weights) ;
01273
01274 if (mismatch_rec+1<=max_mismatch)
01275 {
01276 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01277 for (int32_t o=0; o<3; o++)
01278 {
01279 int32_t ot = other[vec[0]][o] ;
01280 if (TreeMem[tree].children[ot]!=NO_CHILD)
01281 {
01282 subtree=TreeMem[tree].children[ot] ;
01283 if (weights_in_tree)
01284 TreeMem[subtree].weight += alpha*weights[degree_rec+degree*(mismatch_rec+1)];
01285 else
01286 if (weights[degree_rec]!=0.0)
01287 TreeMem[subtree].weight += alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec];
01288 }
01289 else
01290 {
01291 int32_t tmp = get_node(degree_rec==degree-2);
01292 ASSERT(tmp>=0) ;
01293 TreeMem[tree].children[ot]=tmp ;
01294 subtree=tmp ;
01295 #ifdef TRIE_CHECK_EVERYTHING
01296 if (degree_rec==degree-2)
01297 TreeMem[subtree].has_floats=true ;
01298 #endif
01299
01300 if (weights_in_tree)
01301 TreeMem[subtree].weight = alpha*weights[degree_rec+degree*(mismatch_rec+1)] ;
01302 else
01303 if (weights[degree_rec]!=0.0)
01304 TreeMem[subtree].weight = alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec] ;
01305 else
01306 TreeMem[subtree].weight = 0.0 ;
01307 }
01308
01309 add_example_to_tree_mismatch_recursion(subtree, i, alpha,
01310 &vec[1], len_rem-1,
01311 degree_rec+1, mismatch_rec+1, max_mismatch, weights) ;
01312 }
01313 }
01314 }
01315 }
01316
01317 template <class Trie>
01318 void CTrie<Trie>::compute_scoring_helper(
01319 int32_t tree, int32_t i, int32_t j, float64_t weight, int32_t d,
01320 int32_t max_degree, int32_t num_feat, int32_t num_sym, int32_t sym_offset,
01321 int32_t offs, float64_t* result)
01322 {
01323 if (i+j<num_feat)
01324 {
01325 float64_t decay=1.0;
01326
01327
01328
01329 if (j<degree-1)
01330 {
01331 for (int32_t k=0; k<num_sym; k++)
01332 {
01333 if (TreeMem[tree].children[k]!=NO_CHILD)
01334 {
01335 int32_t child=TreeMem[tree].children[k];
01336
01337 if (d<max_degree-1)
01338 compute_scoring_helper(child, i, j+1, weight+decay*TreeMem[child].weight, d+1, max_degree, num_feat, num_sym, sym_offset, num_sym*offs+k, result);
01339 else
01340 result[sym_offset*(i+j-max_degree+1)+num_sym*offs+k] += weight+decay*TreeMem[child].weight;
01341
01343 if (d==0)
01344 compute_scoring_helper(child, i, j+1, 0.0, 0, max_degree, num_feat, num_sym, sym_offset, offs, result);
01345 }
01346 }
01347 }
01348 else if (j==degree-1)
01349 {
01350 for (int32_t k=0; k<num_sym; k++)
01351 {
01352
01353 if (d<max_degree-1 && i<num_feat-1)
01354 compute_scoring_helper(trees[i+1], i+1, 0, weight+decay*TreeMem[tree].child_weights[k], d+1, max_degree, num_feat, num_sym, sym_offset, num_sym*offs+k, result);
01355 else
01356 result[sym_offset*(i+j-max_degree+1)+num_sym*offs+k] += weight+decay*TreeMem[tree].child_weights[k];
01357 }
01358 }
01359 }
01360 }
01361
01362 template <class Trie>
01363 void CTrie<Trie>::traverse(
01364 int32_t tree, const int32_t p, struct TreeParseInfo info,
01365 const int32_t depth, int32_t* const x, const int32_t k)
01366 {
01367 const int32_t num_sym = info.num_sym;
01368 const int32_t y0 = info.y0;
01369 const int32_t y1 = (k==0) ? 0 : y0 - ( (depth<k) ? 0 : info.nofsKmers[k-1] * x[depth-k] );
01370
01371
01372
01373
01374
01375 int32_t sym;
01376 ASSERT( depth < degree );
01377
01378 if (depth<degree-1)
01379 {
01380 for( sym=0; sym<num_sym; ++sym ) {
01381 const int32_t childNum = TreeMem[tree].children[ sym ];
01382 if( childNum != NO_CHILD ) {
01383 int32_t child = childNum ;
01384 x[depth] = sym;
01385 info.substrs[depth+1] = y0 + sym;
01386 info.y0 = (k==0) ? 0 : (y1+sym)*num_sym;
01387
01388 count( TreeMem[child].weight, depth, info, p, x, k );
01389 traverse( child, p, info, depth+1, x, k );
01390 x[depth] = -1;
01391 }
01392 }
01393 }
01394 else if( depth == degree-1 )
01395 {
01396 for( sym=0; sym<num_sym; ++sym ) {
01397 const float64_t w = TreeMem[tree].child_weights[ sym ];
01398 if( w != 0.0 ) {
01399 x[depth] = sym;
01400 info.substrs[depth+1] = y0 + sym;
01401 info.y0 = (k==0) ? 0 : (y1+sym)*num_sym;
01402
01403 count( w, depth, info, p, x, k );
01404 x[depth] = -1;
01405 }
01406 }
01407 }
01408
01409
01410 }
01411
01412 template <class Trie>
01413 void CTrie<Trie>::count(
01414 const float64_t w, const int32_t depth, const struct TreeParseInfo info,
01415 const int32_t p, int32_t* x, const int32_t k)
01416 {
01417 ASSERT( fabs(w) < 1e10 );
01418 ASSERT( x[depth] >= 0 );
01419 ASSERT( x[depth+1] < 0 );
01420 if ( depth < k ) {
01421 return;
01422 }
01423
01424 const int32_t nofKmers = info.nofsKmers[k];
01425 const float64_t margWeight = w * info.margFactors[ depth-k ];
01426 const int32_t m_a = depth - k + 1;
01427 const int32_t m_b = info.num_feat - p;
01428 const int32_t m = ( m_a < m_b ) ? m_a : m_b;
01429
01430 const int32_t offset0 = nofKmers * p;
01431 register int32_t i;
01432 register int32_t offset;
01433 offset = offset0;
01434 for( i = 0; i < m; ++i ) {
01435 const int32_t y = info.substrs[i+k+1];
01436 info.C_k[ y + offset ] += margWeight;
01437 offset += nofKmers;
01438 }
01439 if( depth > k ) {
01440
01441 const int32_t offsR = info.substrs[k+1] + offset0;
01442 info.R_k[offsR] += margWeight;
01443
01444 if( p+depth-k < info.num_feat ) {
01445 const int32_t offsL = info.substrs[depth+1] + nofKmers * (p+depth-k);
01446 info.L_k[offsL] += margWeight;
01447 }
01448 }
01449
01450
01451
01452
01453
01454
01455
01456
01457
01458
01459
01460
01461
01462
01463
01464 }
01465
01466 template <class Trie>
01467 void CTrie<Trie>::add_to_trie(
01468 int32_t i, int32_t seq_offset, int32_t * vec, float32_t alpha,
01469 float64_t *weights, bool degree_times_position_weights)
01470 {
01471 int32_t tree = trees[i] ;
01472
01473
01474 int32_t max_depth = 0 ;
01475 float64_t* weights_column ;
01476 if (degree_times_position_weights)
01477 weights_column = &weights[(i+seq_offset)*degree] ;
01478 else
01479 weights_column = weights ;
01480
01481 if (weights_in_tree)
01482 {
01483 for (int32_t j=0; (j<degree) && (i+j<length); j++)
01484 if (CMath::abs(weights_column[j]*alpha)>0)
01485 max_depth = j+1 ;
01486 }
01487 else
01488
01489 max_depth=degree ;
01490
01491 for (int32_t j=0; (j<max_depth) && (i+j+seq_offset<length); j++)
01492 {
01493 TRIE_ASSERT((vec[i+j+seq_offset]>=0) && (vec[i+j+seq_offset]<4)) ;
01494 if ((j<degree-1) && (TreeMem[tree].children[vec[i+j+seq_offset]]!=NO_CHILD))
01495 {
01496 if (TreeMem[tree].children[vec[i+j+seq_offset]]<0)
01497 {
01498
01499 TRIE_ASSERT(j >= degree-16) ;
01500
01501 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01502 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01503 int32_t node = - TreeMem[tree].children[vec[i+j+seq_offset]] ;
01504
01505 TRIE_ASSERT((node>=0) && (node<=TreeMemPtrMax)) ;
01506 TRIE_ASSERT_EVERYTHING(TreeMem[node].has_seq) ;
01507 TRIE_ASSERT_EVERYTHING(!TreeMem[node].has_floats) ;
01508
01509
01510 int32_t mismatch_pos = -1 ;
01511 {
01512 int32_t k ;
01513 for (k=0; (j+k<max_depth) && (i+j+seq_offset+k<length); k++)
01514 {
01515 TRIE_ASSERT((vec[i+j+seq_offset+k]>=0) && (vec[i+j+seq_offset+k]<4)) ;
01516
01517 if ((TreeMem[node].seq[k]>=4) && (TreeMem[node].seq[k]!=TRIE_TERMINAL_CHARACTER))
01518 fprintf(stderr, "+++i=%i j=%i seq[%i]=%i\n", i, j, k, TreeMem[node].seq[k]) ;
01519 TRIE_ASSERT((TreeMem[node].seq[k]<4) || (TreeMem[node].seq[k]==TRIE_TERMINAL_CHARACTER)) ;
01520 TRIE_ASSERT(k<16) ;
01521 if (TreeMem[node].seq[k]!=vec[i+j+seq_offset+k])
01522 {
01523 mismatch_pos=k ;
01524 break ;
01525 }
01526 }
01527 }
01528
01529
01530
01531 if (mismatch_pos==-1)
01532
01533 TreeMem[node].weight+=alpha ;
01534 else
01535
01536
01537
01538
01539 {
01540
01541 int32_t last_node=tree ;
01542
01543
01544 int32_t k ;
01545 for (k=0; k<mismatch_pos; k++)
01546 {
01547 TRIE_ASSERT((vec[i+j+seq_offset+k]>=0) && (vec[i+j+seq_offset+k]<4)) ;
01548 TRIE_ASSERT(vec[i+j+seq_offset+k]==TreeMem[node].seq[k]) ;
01549
01550 int32_t tmp=get_node();
01551 TreeMem[last_node].children[vec[i+j+seq_offset+k]]=tmp ;
01552 last_node=tmp ;
01553 if (weights_in_tree)
01554 TreeMem[last_node].weight = (TreeMem[node].weight+alpha)*weights_column[j+k] ;
01555 else
01556 TreeMem[last_node].weight = (TreeMem[node].weight+alpha) ;
01557 TRIE_ASSERT(j+k!=degree-1) ;
01558 }
01559 if ((TreeMem[node].seq[mismatch_pos]>=4) && (TreeMem[node].seq[mismatch_pos]!=TRIE_TERMINAL_CHARACTER))
01560 fprintf(stderr, "**i=%i j=%i seq[%i]=%i\n", i, j, k, TreeMem[node].seq[mismatch_pos]) ;
01561 ASSERT((TreeMem[node].seq[mismatch_pos]<4) || (TreeMem[node].seq[mismatch_pos]==TRIE_TERMINAL_CHARACTER)) ;
01562 TRIE_ASSERT(vec[i+j+seq_offset+mismatch_pos]!=TreeMem[node].seq[mismatch_pos]) ;
01563
01564 if (j+k==degree-1)
01565 {
01566
01567
01568
01569 for (int32_t q=0; q<4; q++)
01570 TreeMem[last_node].child_weights[q]=0.0 ;
01571 if (weights_in_tree)
01572 {
01573 if (TreeMem[node].seq[mismatch_pos]<4)
01574 TreeMem[last_node].child_weights[TreeMem[node].seq[mismatch_pos]]+=TreeMem[node].weight*weights_column[degree-1] ;
01575 TreeMem[last_node].child_weights[vec[i+j+seq_offset+k]] += alpha*weights_column[degree-1] ;
01576 }
01577 else
01578 {
01579 if (TreeMem[node].seq[mismatch_pos]<4)
01580 TreeMem[last_node].child_weights[TreeMem[node].seq[mismatch_pos]]=TreeMem[node].weight ;
01581 TreeMem[last_node].child_weights[vec[i+j+seq_offset+k]] = alpha ;
01582 }
01583
01584 #ifdef TRIE_CHECK_EVERYTHING
01585 TreeMem[last_node].has_floats=true ;
01586 #endif
01587 }
01588 else
01589 {
01590
01591 if (TreeMem[node].seq[mismatch_pos]<4)
01592 {
01593 TreeMem[last_node].children[TreeMem[node].seq[mismatch_pos]] = -node ;
01594
01595
01596 for (int32_t q=0; q<16; q++)
01597 {
01598 if ((j+q+mismatch_pos<degree) && (i+j+seq_offset+q+mismatch_pos<length))
01599 TreeMem[node].seq[q] = TreeMem[node].seq[q+mismatch_pos] ;
01600 else
01601 TreeMem[node].seq[q] = TRIE_TERMINAL_CHARACTER ;
01602 }
01603 #ifdef TRIE_CHECK_EVERYTHING
01604 TreeMem[node].has_seq=true ;
01605 #endif
01606 }
01607
01608
01609 TRIE_ASSERT((vec[i+j+seq_offset+mismatch_pos]>=0) && (vec[i+j+seq_offset+mismatch_pos]<4)) ;
01610 int32_t tmp = get_node() ;
01611 TreeMem[last_node].children[vec[i+j+seq_offset+mismatch_pos]] = -tmp ;
01612 last_node=tmp ;
01613
01614 TreeMem[last_node].weight = alpha ;
01615 #ifdef TRIE_CHECK_EVERYTHING
01616 TreeMem[last_node].has_seq = true ;
01617 #endif
01618 memset(TreeMem[last_node].seq, TRIE_TERMINAL_CHARACTER, 16) ;
01619 for (int32_t q=0; (j+q+mismatch_pos<degree) && (i+j+seq_offset+q+mismatch_pos<length); q++)
01620 TreeMem[last_node].seq[q] = vec[i+j+seq_offset+mismatch_pos+q] ;
01621 }
01622 }
01623 break ;
01624 }
01625 else
01626 {
01627 tree=TreeMem[tree].children[vec[i+j+seq_offset]] ;
01628 TRIE_ASSERT((tree>=0) && (tree<TreeMemPtrMax)) ;
01629 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01630 if (weights_in_tree)
01631 TreeMem[tree].weight += alpha*weights_column[j];
01632 else
01633 TreeMem[tree].weight += alpha ;
01634 }
01635 }
01636 else if (j==degree-1)
01637 {
01638
01639 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01640 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats) ;
01641 if (weights_in_tree)
01642 TreeMem[tree].child_weights[vec[i+j+seq_offset]] += alpha*weights_column[j] ;
01643 else
01644 TreeMem[tree].child_weights[vec[i+j+seq_offset]] += alpha;
01645
01646 break;
01647 }
01648 else
01649 {
01650 bool use_seq = use_compact_terminal_nodes && (j>degree-16) ;
01651 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01652 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01653
01654 int32_t tmp = get_node((j==degree-2) && (!use_seq));
01655 if (use_seq)
01656 TreeMem[tree].children[vec[i+j+seq_offset]] = -tmp ;
01657 else
01658 TreeMem[tree].children[vec[i+j+seq_offset]] = tmp ;
01659 tree=tmp ;
01660
01661 TRIE_ASSERT((tree>=0) && (tree<TreeMemPtrMax)) ;
01662 #ifdef TRIE_CHECK_EVERYTHING
01663 TreeMem[tree].has_seq = use_seq ;
01664 #endif
01665 if (use_seq)
01666 {
01667 TreeMem[tree].weight = alpha ;
01668
01669 memset(TreeMem[tree].seq, TRIE_TERMINAL_CHARACTER, 16) ;
01670 for (int32_t q=0; (j+q<degree) && (i+j+seq_offset+q<length); q++)
01671 {
01672 TRIE_ASSERT(q<16) ;
01673 TreeMem[tree].seq[q]=vec[i+j+seq_offset+q] ;
01674 }
01675 break ;
01676 }
01677 else
01678 {
01679 if (weights_in_tree)
01680 TreeMem[tree].weight = alpha*weights_column[j] ;
01681 else
01682 TreeMem[tree].weight = alpha ;
01683 #ifdef TRIE_CHECK_EVERYTHING
01684 if (j==degree-2)
01685 TreeMem[tree].has_floats = true ;
01686 #endif
01687 }
01688 }
01689 }
01690 }
01691
01692 template <class Trie>
01693 float64_t CTrie<Trie>::compute_by_tree_helper(
01694 int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
01695 int32_t weight_pos, float64_t* weights,
01696 bool degree_times_position_weights)
01697 {
01698 int32_t tree = trees[tree_pos] ;
01699
01700 if ((position_weights!=NULL) && (position_weights[weight_pos]==0))
01701 return 0.0;
01702
01703 float64_t *weights_column=NULL ;
01704 if (degree_times_position_weights)
01705 weights_column=&weights[weight_pos*degree] ;
01706 else
01707 weights_column=weights ;
01708
01709 float64_t sum=0 ;
01710 for (int32_t j=0; seq_pos+j < len; j++)
01711 {
01712 TRIE_ASSERT((vec[seq_pos+j]<4) && (vec[seq_pos+j]>=0)) ;
01713
01714 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01715 {
01716 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01717 if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01718 {
01719 tree = - TreeMem[tree].children[vec[seq_pos+j]];
01720 TRIE_ASSERT(tree>=0) ;
01721 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
01722 float64_t this_weight=0.0 ;
01723 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01724 {
01725 TRIE_ASSERT((vec[seq_pos+j+k]<4) && (vec[seq_pos+j+k]>=0)) ;
01726 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01727 break ;
01728 this_weight += weights_column[j+k] ;
01729 }
01730 sum += TreeMem[tree].weight * this_weight ;
01731 break ;
01732 }
01733 else
01734 {
01735 tree=TreeMem[tree].children[vec[seq_pos+j]];
01736 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01737 if (weights_in_tree)
01738 sum += TreeMem[tree].weight ;
01739 else
01740 sum += TreeMem[tree].weight * weights_column[j] ;
01741 } ;
01742 }
01743 else
01744 {
01745 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01746 if (j==degree-1)
01747 {
01748 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats) ;
01749 if (weights_in_tree)
01750 sum += TreeMem[tree].child_weights[vec[seq_pos+j]] ;
01751 else
01752 sum += TreeMem[tree].child_weights[vec[seq_pos+j]] * weights_column[j] ;
01753 }
01754 else
01755 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01756
01757 break;
01758 }
01759 }
01760
01761 if (position_weights!=NULL)
01762 return sum*position_weights[weight_pos] ;
01763 else
01764 return sum ;
01765 }
01766
01767 template <class Trie>
01768 void CTrie<Trie>::compute_by_tree_helper(
01769 int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
01770 int32_t weight_pos, float64_t* LevelContrib, float64_t factor,
01771 int32_t mkl_stepsize, float64_t * weights,
01772 bool degree_times_position_weights)
01773 {
01774 int32_t tree = trees[tree_pos] ;
01775 if (factor==0)
01776 return ;
01777
01778 if (position_weights!=NULL)
01779 {
01780 factor *= position_weights[weight_pos] ;
01781 if (factor==0)
01782 return ;
01783 if (!degree_times_position_weights)
01784 {
01785 for (int32_t j=0; seq_pos+j<len; j++)
01786 {
01787 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01788 {
01789 if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01790 {
01791 tree = -TreeMem[tree].children[vec[seq_pos+j]];
01792 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
01793 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01794 {
01795 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01796 break ;
01797 if (weights_in_tree)
01798 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
01799 else
01800 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k] ;
01801 }
01802 break ;
01803 }
01804 else
01805 {
01806 tree=TreeMem[tree].children[vec[seq_pos+j]];
01807 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01808 if (weights_in_tree)
01809 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
01810 else
01811 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j] ;
01812 }
01813 }
01814 else
01815 {
01816 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01817 if (j==degree-1)
01818 {
01819 if (weights_in_tree)
01820 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ;
01821 else
01822 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j] ;
01823 }
01824 }
01825 }
01826 }
01827 else
01828 {
01829 for (int32_t j=0; seq_pos+j<len; j++)
01830 {
01831 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01832 {
01833 if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01834 {
01835 tree = -TreeMem[tree].children[vec[seq_pos+j]];
01836 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
01837 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01838 {
01839 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01840 break ;
01841 if (weights_in_tree)
01842 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
01843 else
01844 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k+weight_pos*degree] ;
01845 }
01846 break ;
01847 }
01848 else
01849 {
01850 tree=TreeMem[tree].children[vec[seq_pos+j]];
01851 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01852 if (weights_in_tree)
01853 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
01854 else
01855 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+weight_pos*degree] ;
01856 }
01857 }
01858 else
01859 {
01860 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01861 if (j==degree-1)
01862 {
01863 if (weights_in_tree)
01864 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ;
01865 else
01866 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j+weight_pos*degree] ;
01867 }
01868 break ;
01869 }
01870 }
01871 }
01872 }
01873 else if (!degree_times_position_weights)
01874 {
01875 for (int32_t j=0; seq_pos+j<len; j++)
01876 {
01877 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01878 {
01879 if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01880 {
01881 tree = -TreeMem[tree].children[vec[seq_pos+j]];
01882 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
01883 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01884 {
01885 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01886 break ;
01887 if (weights_in_tree)
01888 LevelContrib[(j+k)/mkl_stepsize] += factor*TreeMem[tree].weight ;
01889 else
01890 LevelContrib[(j+k)/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k] ;
01891 }
01892 break ;
01893 }
01894 else
01895 {
01896 tree=TreeMem[tree].children[vec[seq_pos+j]];
01897 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01898 if (weights_in_tree)
01899 LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].weight ;
01900 else
01901 LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j] ;
01902 }
01903 }
01904 else
01905 {
01906 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01907 if (j==degree-1)
01908 {
01909 if (weights_in_tree)
01910 LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ;
01911 else
01912 LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j] ;
01913 }
01914 break ;
01915 }
01916 }
01917 }
01918 else
01919 {
01920
01921
01922
01923
01924
01925
01926
01927
01928
01929
01930
01931
01932
01933
01934
01935
01936
01937
01938 for (int32_t j=0; seq_pos+j<len; j++)
01939 {
01940 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01941 {
01942 if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01943 {
01944 tree = -TreeMem[tree].children[vec[seq_pos+j]];
01945 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
01946 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01947 {
01948 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01949 break ;
01950 if (weights_in_tree)
01951 LevelContrib[(j+k+degree*weight_pos)/mkl_stepsize] += factor*TreeMem[tree].weight ;
01952 else
01953 LevelContrib[(j+k+degree*weight_pos)/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k+weight_pos*degree] ;
01954 }
01955 break ;
01956 }
01957 else
01958 {
01959 tree=TreeMem[tree].children[vec[seq_pos+j]];
01960 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01961 if (weights_in_tree)
01962 LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].weight ;
01963 else
01964 LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].weight * weights[j+weight_pos*degree] ;
01965 }
01966 }
01967 else
01968 {
01969 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01970 if (j==degree-1)
01971 {
01972 if (weights_in_tree)
01973 LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].child_weights[vec[seq_pos+j]] ;
01974 else
01975 LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].child_weights[vec[seq_pos+j]] * weights[j+weight_pos*degree] ;
01976 }
01977 break ;
01978 }
01979 }
01980 }
01981 }
01982
01983 template <class Trie>
01984 void CTrie<Trie>::fill_backtracking_table_recursion(
01985 Trie* tree, int32_t depth, uint64_t seq, float64_t value,
01986 CDynamicArray<ConsensusEntry>* table, float64_t* weights)
01987 {
01988 float64_t w=1.0;
01989
01990 if (weights_in_tree || depth==0)
01991 value+=tree->weight;
01992 else
01993 {
01994 w=weights[degree-1];
01995 value+=weights[depth-1]*tree->weight;
01996 }
01997
01998 if (degree-1==depth)
01999 {
02000 for (int32_t sym=0; sym<4; sym++)
02001 {
02002 float64_t v=w*tree->child_weights[sym];
02003 if (v!=0.0)
02004 {
02005 ConsensusEntry entry;
02006 entry.bt=-1;
02007 entry.score=value+v;
02008 entry.string=seq | ((uint64_t) sym) << (2*(degree-depth-1));
02009
02010 table->append_element(entry);
02011 }
02012 }
02013 }
02014 else
02015 {
02016 for (int32_t sym=0; sym<4; sym++)
02017 {
02018 uint64_t str=seq | ((uint64_t) sym) << (2*(degree-depth-1));
02019 if (tree->children[sym] != NO_CHILD)
02020 fill_backtracking_table_recursion(&TreeMem[tree->children[sym]], depth+1, str, value, table, weights);
02021 }
02022 }
02023 }
02024
02025 template <class Trie>
02026 float64_t CTrie<Trie>::get_cumulative_score(
02027 int32_t pos, uint64_t seq, int32_t deg, float64_t* weights)
02028 {
02029 float64_t result=0.0;
02030
02031
02032
02033 for (int32_t i=pos; i<pos+deg && i<length; i++)
02034 {
02035
02036 Trie* tree = &TreeMem[trees[i]];
02037
02038 for (int32_t d=0; d<deg-i+pos; d++)
02039 {
02040
02041 ASSERT(d-1<degree);
02042 int32_t sym = (int32_t) (seq >> (2*(deg-1-d-i+pos)) & 3);
02043
02044 float64_t w=1.0;
02045 if (!weights_in_tree)
02046 w=weights[d];
02047
02048 ASSERT(tree->children[sym] != NO_CHILD);
02049 tree=&TreeMem[tree->children[sym]];
02050 result+=w*tree->weight;
02051 }
02052 }
02053
02054 return result;
02055 }
02056
02057 template <class Trie>
02058 void CTrie<Trie>::fill_backtracking_table(
02059 int32_t pos, CDynamicArray<ConsensusEntry>* prev,
02060 CDynamicArray<ConsensusEntry>* cur, bool cumulative, float64_t* weights)
02061 {
02062 ASSERT(pos>=0 && pos<length);
02063 ASSERT(!use_compact_terminal_nodes);
02064
02065 Trie* t = &TreeMem[trees[pos]];
02066
02067 fill_backtracking_table_recursion(t, 0, (uint64_t) 0, 0.0, cur, weights);
02068
02069
02070 if (cumulative)
02071 {
02072 int32_t num_cur=cur->get_num_elements();
02073 for (int32_t i=0; i<num_cur; i++)
02074 {
02075 ConsensusEntry entry=cur->get_element(i);
02076 entry.score+=get_cumulative_score(pos+1, entry.string, degree-1, weights);
02077 cur->set_element(entry,i);
02078
02079 }
02080 }
02081
02082
02083
02084 if (prev)
02085 {
02086 int32_t num_cur=cur->get_num_elements();
02087 int32_t num_prev=prev->get_num_elements();
02088
02089 for (int32_t i=0; i<num_cur; i++)
02090 {
02091
02092 uint64_t str_cur= cur->get_element(i).string >> 2;
02093
02094
02095 int32_t bt=-1;
02096 float64_t max_score=0.0;
02097
02098 for (int32_t j=0; j<num_prev; j++)
02099 {
02100
02101 uint64_t mask=
02102 ((((uint64_t)0)-1) ^ (((uint64_t) 3) << (2*(degree-1))));
02103 uint64_t str_prev= mask & prev->get_element(j).string;
02104
02105
02106 if (str_cur == str_prev)
02107 {
02108 float64_t sc=prev->get_element(j).score+cur->get_element(i).score;
02109 if (bt==-1 || sc>max_score)
02110 {
02111 bt=j;
02112 max_score=sc;
02113
02114
02115 }
02116 }
02117 }
02118
02119 ASSERT(bt!=-1);
02120 ConsensusEntry entry;
02121 entry.bt=bt;
02122 entry.score=max_score;
02123 entry.string=cur->get_element(i).string;
02124 cur->set_element(entry, i);
02125
02126 }
02127 }
02128 }
02129 #endif // _TRIE_H___