Trie.h

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2008 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Copyright (C) 1999-2008 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #ifndef _TRIE_H___
00013 #define _TRIE_H___
00014 
00015 #include <string.h>
00016 #include "lib/common.h"
00017 #include "lib/io.h"
00018 #include "lib/DynamicArray.h"
00019 #include "lib/Mathematics.h"
00020 #include "base/SGObject.h"
00021 
00022 // sentinel is 0xFFFFFFFC or float -2
00023 #define NO_CHILD ((int32_t)-1073741824)
00024 
00025 #define WEIGHTS_IN_TRIE 
00026 //#define TRIE_CHECK_EVERYTHING
00027 
00028 #ifdef TRIE_CHECK_EVERYTHING
00029 #define TRIE_ASSERT_EVERYTHING(x) ASSERT(x)
00030 #else
00031 #define TRIE_ASSERT_EVERYTHING(x) 
00032 #endif
00033 
00034 //#define TRIE_ASSERT(x) ASSERT(x)
00035 #define TRIE_ASSERT(x) 
00036 
00037 #define TRIE_TERMINAL_CHARACTER  7
00038 
00040 struct ConsensusEntry
00041 {
00043     uint64_t string;
00045     float32_t score;
00047     int32_t bt;
00048 };
00049 
00051 struct POIMTrie
00052 {
00054     float64_t weight;
00055 #ifdef TRIE_CHECK_EVERYTHING
00056 
00057     bool has_seq;
00059     bool has_floats;
00060 #endif      
00061     union
00062     {
00064         float32_t child_weights[4];
00066         int32_t children[4];
00068         uint8_t seq[16] ;
00069     }; 
00070 
00072     float64_t S;
00074     float64_t L;
00076     float64_t R;
00077 };
00078 
00080 struct DNATrie
00081 {
00083     float64_t weight;
00084 #ifdef TRIE_CHECK_EVERYTHING
00085 
00086     bool has_seq;
00088     bool has_floats;
00089 #endif
00090     union
00091     {
00093         float32_t child_weights[4];
00095         int32_t children[4];
00097         uint8_t seq[16] ;
00098     }; 
00099 };
00100 
00102 struct TreeParseInfo {
00104     int32_t num_sym;
00106     int32_t num_feat;
00108     int32_t p;
00110     int32_t k;
00112     int32_t* nofsKmers;
00114     float64_t* margFactors;
00116     int32_t* x;
00118     int32_t* substrs;
00120     int32_t y0;
00122     float64_t* C_k;
00124     float64_t* L_k;
00126     float64_t* R_k;
00127 };
00128 
00129 
00130 template <class Trie> class CTrie;
00131 
00148 template <class Trie> class CTrie : public CSGObject
00149 {
00150     public:
00157         CTrie(int32_t d, bool p_use_compact_terminal_nodes=true);
00158 
00160         CTrie(const CTrie & to_copy);
00161         ~CTrie();
00162 
00164         const CTrie & operator=(const CTrie & to_copy);
00165 
00173         bool compare_traverse(
00174             int32_t node, const CTrie & other, int32_t other_node);
00175 
00181         bool compare(const CTrie & other);
00182 
00189         bool find_node(int32_t node, int32_t * trace, int32_t &trace_len) const;
00190 
00197         int32_t find_deepest_node(
00198             int32_t start_node, int32_t &deepest_node) const;
00199 
00204         void display_node(int32_t node) const;
00205 
00207         void destroy();
00208 
00213         void set_degree(int32_t d);
00214 
00221         void create(int32_t len, bool p_use_compact_terminal_nodes=true);
00222 
00228         void delete_trees(bool p_use_compact_terminal_nodes=true);
00229 
00240         void add_to_trie(
00241             int32_t i, int32_t seq_offset, int32_t* vec, float32_t alpha,
00242             float64_t *weights, bool degree_times_position_weights);
00243 
00250         float64_t compute_abs_weights_tree(int32_t tree, int32_t depth);
00251 
00257         float64_t* compute_abs_weights(int32_t &len);
00258 
00271         float64_t compute_by_tree_helper(
00272             int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
00273             int32_t weight_pos, float64_t * weights,
00274             bool degree_times_position_weights) ;
00275 
00290         void compute_by_tree_helper(
00291             int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
00292             int32_t weight_pos, float64_t* LevelContrib, float64_t factor,
00293             int32_t mkl_stepsize, float64_t * weights,
00294             bool degree_times_position_weights);
00295 
00310         void compute_scoring_helper(
00311             int32_t tree, int32_t i, int32_t j, float64_t weight, int32_t d,
00312             int32_t max_degree, int32_t num_feat, int32_t num_sym,
00313             int32_t sym_offset, int32_t offs, float64_t* result);
00314 
00327         void add_example_to_tree_mismatch_recursion(
00328             int32_t tree,  int32_t i, float64_t alpha, int32_t *vec,
00329             int32_t len_rem, int32_t degree_rec, int32_t mismatch_rec,
00330             int32_t max_mismatch, float64_t * weights);
00331 
00341         void traverse(
00342             int32_t tree, const int32_t p, struct TreeParseInfo info,
00343             const int32_t depth, int32_t* const x, const int32_t k);
00344 
00354         void count(
00355             const float64_t w, const int32_t depth,
00356             const struct TreeParseInfo info, const int32_t p, int32_t* x,
00357             const int32_t k);
00358 
00365         int32_t compact_nodes(int32_t start_node, int32_t depth, float64_t * weights);
00366 
00375         float64_t get_cumulative_score(
00376             int32_t pos, uint64_t seq, int32_t deg, float64_t* weights);
00377 
00387         void fill_backtracking_table_recursion(
00388             Trie* tree, int32_t depth, uint64_t seq, float64_t value,
00389             CDynamicArray<ConsensusEntry>* table, float64_t* weights);
00390 
00399         void fill_backtracking_table(
00400             int32_t pos, CDynamicArray<ConsensusEntry>* prev,
00401             CDynamicArray<ConsensusEntry>* cur, bool cumulative,
00402             float64_t* weights);
00403 
00409         void POIMs_extract_W(float64_t* const* const W, const int32_t K);
00410 
00415         void POIMs_precalc_SLR(const float64_t* const distrib);
00416 
00426         void POIMs_get_SLR(
00427             const int32_t parentIdx, const int32_t sym, const int32_t depth,
00428             float64_t* S, float64_t* L, float64_t* R);
00429 
00436         void POIMs_add_SLR(
00437             float64_t* const* const poims, const int32_t K,
00438             const int32_t debug);
00439 
00444         inline bool get_use_compact_terminal_nodes()
00445         {
00446             return use_compact_terminal_nodes ;
00447         }
00448 
00454         inline void set_use_compact_terminal_nodes(
00455             bool p_use_compact_terminal_nodes)
00456         {
00457             use_compact_terminal_nodes=p_use_compact_terminal_nodes ;
00458         }
00459 
00464         inline int32_t get_num_used_nodes()
00465         {
00466             return TreeMemPtr;
00467         }
00468 
00473         inline void set_position_weights(const float64_t * p_position_weights)
00474         {
00475             position_weights=p_position_weights;
00476         }
00477 
00482         inline int32_t get_node(bool last_node=false)
00483         {
00484             int32_t ret = TreeMemPtr++;
00485             check_treemem() ;
00486 
00487             if (last_node)
00488             {
00489                 for (int32_t q=0; q<4; q++)
00490                     TreeMem[ret].child_weights[q]=0.0;
00491             }
00492             else
00493             {
00494                 for (int32_t q=0; q<4; q++)
00495                     TreeMem[ret].children[q]=NO_CHILD;
00496             }
00497 #ifdef TRIE_CHECK_EVERYTHING
00498             TreeMem[ret].has_seq=false ;
00499             TreeMem[ret].has_floats=false ;
00500 #endif
00501             TreeMem[ret].weight=0.0;
00502             return ret ;
00503         }
00504 
00506         inline void check_treemem()
00507         {
00508             if (TreeMemPtr+10 < TreeMemPtrMax)
00509                 return;
00510             SG_DEBUG( "Extending TreeMem from %i to %i elements\n",
00511                     TreeMemPtrMax, (int32_t) ((float64_t)TreeMemPtrMax*1.2));
00512             TreeMemPtrMax = (int32_t) ((float64_t)TreeMemPtrMax*1.2);
00513             TreeMem = (Trie*) realloc(TreeMem,
00514                     TreeMemPtrMax*sizeof(Trie));
00515             if (!TreeMem)
00516                 SG_ERROR( "out of memory\n");
00517         }
00518 
00523         inline void set_weights_in_tree(bool weights_in_tree_)
00524         {
00525             weights_in_tree = weights_in_tree_;
00526         }
00527 
00532         inline bool get_weights_in_tree()
00533         {
00534             return weights_in_tree;
00535         }
00536 
00546         void POIMs_extract_W_helper(
00547             const int32_t nodeIdx, const int32_t depth, const int32_t offset,
00548             const int32_t y0, float64_t* const* const W, const int32_t K);
00549 
00562         void POIMs_calc_SLR_helper1(
00563             const float64_t* const distrib, const int32_t i,
00564             const int32_t nodeIdx, int32_t left_tries_idx[4],
00565             const int32_t depth, int32_t const lastSym, float64_t* S,
00566             float64_t* L, float64_t* R);
00567 
00568 
00579         void POIMs_calc_SLR_helper2(
00580             const float64_t* const distrib, const int32_t i,
00581             const int32_t nodeIdx, int32_t left_tries_idx[4],
00582             const int32_t depth, float64_t* S, float64_t* L, float64_t* R);
00583 
00594         void POIMs_add_SLR_helper1(
00595             const int32_t nodeIdx, const int32_t depth,const int32_t i,
00596             const int32_t y0, float64_t* const* const poims, const int32_t K,
00597             const int32_t debug);
00598 
00612         void POIMs_add_SLR_helper2(
00613             float64_t* const* const poims, const int32_t K, const int32_t k,
00614             const int32_t i, const int32_t y, const float64_t valW,
00615             const float64_t valS, const float64_t valL, const float64_t valR,
00616             const int32_t debug);
00617 
00618     public:
00620         int32_t NUM_SYMS;
00621 
00622     protected:
00624         int32_t length;
00626         int32_t * trees;
00627 
00629         int32_t degree;
00631         float64_t const *  position_weights;
00632 
00634         Trie* TreeMem;
00636         int32_t TreeMemPtr;
00638         int32_t TreeMemPtrMax;
00640         bool use_compact_terminal_nodes;
00641 
00643         bool weights_in_tree;
00644 
00646         int32_t* nofsKmers;
00647 };
00648 
00649     template <class Trie>
00650     CTrie<Trie>::CTrie(int32_t d, bool p_use_compact_terminal_nodes)
00651     : CSGObject(), degree(d), position_weights(NULL),
00652         use_compact_terminal_nodes(p_use_compact_terminal_nodes),
00653         weights_in_tree(true)
00654     {
00655         TreeMemPtrMax=1024*1024/sizeof(Trie);
00656         TreeMemPtr=0;
00657         TreeMem=(Trie*)malloc(TreeMemPtrMax*sizeof(Trie));
00658 
00659         length=0;
00660         trees=NULL;
00661 
00662         NUM_SYMS=4;
00663     }
00664 
00665     template <class Trie>
00666     CTrie<Trie>::CTrie(const CTrie & to_copy)
00667     : CSGObject(to_copy), degree(to_copy.degree), position_weights(NULL),
00668         use_compact_terminal_nodes(to_copy.use_compact_terminal_nodes)
00669     {
00670         if (to_copy.position_weights!=NULL)
00671         {
00672             position_weights = to_copy.position_weights;
00673             /*new float64_t[to_copy.length];
00674               for (int32_t i=0; i<to_copy.length; i++)
00675               position_weights[i]=to_copy.position_weights[i]; */
00676         }
00677         else
00678             position_weights=NULL;
00679 
00680         TreeMemPtrMax=to_copy.TreeMemPtrMax;
00681         TreeMemPtr=to_copy.TreeMemPtr;
00682         TreeMem=(Trie*)malloc(TreeMemPtrMax*sizeof(Trie));
00683         memcpy(TreeMem, to_copy.TreeMem, TreeMemPtrMax*sizeof(Trie));
00684 
00685         length=to_copy.length;
00686         trees=new int32_t[length];
00687         for (int32_t i=0; i<length; i++)
00688             trees[i]=to_copy.trees[i];
00689 
00690         NUM_SYMS=4;
00691     }
00692 
00693     template <class Trie>
00694 const CTrie<Trie> &CTrie<Trie>::operator=(const CTrie<Trie> & to_copy)
00695 {
00696     degree=to_copy.degree ;
00697     use_compact_terminal_nodes=to_copy.use_compact_terminal_nodes ;
00698 
00699     delete[] position_weights ;
00700     position_weights=NULL ;
00701     if (to_copy.position_weights!=NULL)
00702     {
00703         position_weights=to_copy.position_weights ;
00704         /*position_weights = new float64_t[to_copy.length] ;
00705           for (int32_t i=0; i<to_copy.length; i++)
00706           position_weights[i]=to_copy.position_weights[i] ;*/
00707     }
00708     else
00709         position_weights=NULL ;
00710 
00711     TreeMemPtrMax=to_copy.TreeMemPtrMax ;
00712     TreeMemPtr=to_copy.TreeMemPtr ;
00713     free(TreeMem) ;
00714     TreeMem = (Trie*)malloc(TreeMemPtrMax*sizeof(Trie)) ;
00715     memcpy(TreeMem, to_copy.TreeMem, TreeMemPtrMax*sizeof(Trie)) ;
00716 
00717     length = to_copy.length ;
00718     if (trees)
00719         delete[] trees ;
00720     trees=new int32_t[length] ;     
00721     for (int32_t i=0; i<length; i++)
00722         trees[i]=to_copy.trees[i] ;
00723 
00724     return *this ;
00725 }
00726 
00727 template <class Trie>
00728 int32_t CTrie<Trie>::find_deepest_node(
00729     int32_t start_node, int32_t& deepest_node) const
00730 {
00731 #ifdef TRIE_CHECK_EVERYTHING
00732     int32_t ret=0 ;
00733     SG_DEBUG("start_node=%i\n", start_node) ;
00734 
00735     if (start_node==NO_CHILD) 
00736     {
00737         for (int32_t i=0; i<length; i++)
00738         {
00739             int32_t my_deepest_node ;
00740             int32_t depth=find_deepest_node(i, my_deepest_node) ;
00741             SG_DEBUG("start_node %i depth=%i\n", i, depth) ;
00742             if (depth>ret)
00743             {
00744                 deepest_node=my_deepest_node ;
00745                 ret=depth ;
00746             }
00747         }
00748         return ret ;
00749     }
00750 
00751     if (TreeMem[start_node].has_seq)
00752     {
00753         for (int32_t q=0; q<16; q++)
00754             if (TreeMem[start_node].seq[q]!=TRIE_TERMINAL_CHARACTER)
00755                 ret++ ;
00756         deepest_node=start_node ;
00757         return ret ;
00758     }
00759     if (TreeMem[start_node].has_floats)
00760     {
00761         deepest_node=start_node ;
00762         return 1 ;
00763     }
00764 
00765     for (int32_t q=0; q<4; q++)
00766     {
00767         int32_t my_deepest_node ;
00768         if (TreeMem[start_node].children[q]==NO_CHILD)
00769             continue ;
00770         int32_t depth=find_deepest_node(abs(TreeMem[start_node].children[q]), my_deepest_node) ;
00771         if (depth>ret)
00772         {
00773             deepest_node=my_deepest_node ;
00774             ret=depth ;
00775         }
00776     }
00777     return ret ;
00778 #else
00779     SG_ERROR( "not implemented\n") ;
00780     return 0 ;
00781 #endif
00782 }
00783 
00784     template <class Trie>
00785 int32_t CTrie<Trie>::compact_nodes(
00786     int32_t start_node, int32_t depth, float64_t * weights) 
00787 {
00788     SG_ERROR( "code buggy\n") ;
00789 
00790     int32_t ret=0 ;
00791 
00792     if (start_node==NO_CHILD) 
00793     {
00794         for (int32_t i=0; i<length; i++)
00795             compact_nodes(i,1, weights) ;
00796         return 0 ;
00797     }
00798     if (start_node<0)
00799         return -1 ;
00800 
00801     if (depth==degree-1)
00802     {
00803         TRIE_ASSERT_EVERYTHING(TreeMem[start_node].has_floats) ;
00804         int32_t num_used=0 ;
00805         for (int32_t q=0; q<4; q++)
00806             if (TreeMem[start_node].child_weights[q]!=0.0)
00807                 num_used++ ;
00808         if (num_used>1)
00809             return -1 ;
00810         return 1 ;
00811     }
00812     TRIE_ASSERT_EVERYTHING(!TreeMem[start_node].has_floats) ;
00813 
00814     int32_t num_used = 0 ;
00815     int32_t q_used=-1 ;
00816 
00817     for (int32_t q=0; q<4; q++)
00818     {
00819         if (TreeMem[start_node].children[q]==NO_CHILD)
00820             continue ;
00821         num_used++ ;
00822         q_used=q ;
00823     }
00824     if (num_used>1)
00825     {
00826         if (depth>=degree-2)
00827             return -1 ;
00828         for (int32_t q=0; q<4; q++)
00829         {
00830             if (TreeMem[start_node].children[q]==NO_CHILD)
00831                 continue ;
00832             int32_t num=compact_nodes(abs(TreeMem[start_node].children[q]), depth+1, weights) ;
00833             if (num<=2)
00834                 continue ;
00835             int32_t node=get_node() ;
00836 
00837             int32_t last_node=TreeMem[start_node].children[q] ;
00838             if (weights_in_tree)
00839             {
00840                 ASSERT(weights[depth]!=0.0) ;
00841                 TreeMem[node].weight=TreeMem[last_node].weight/weights[depth] ;
00842             }
00843             else
00844                 TreeMem[node].weight=TreeMem[last_node].weight ;
00845 
00846 #ifdef TRIE_CHECK_EVERYTHING
00847             TreeMem[node].has_seq=true ;
00848 #endif
00849             memset(TreeMem[node].seq, TRIE_TERMINAL_CHARACTER, 16) ;
00850             for (int32_t n=0; n<num; n++)
00851             {
00852                 ASSERT(depth+n+1<=degree-1) ;
00853                 ASSERT(last_node!=NO_CHILD) ;
00854                 if (depth+n+1==degree-1)
00855                 {
00856                     TRIE_ASSERT_EVERYTHING(TreeMem[last_node].has_floats) ;
00857                     int32_t  k ;
00858                     for (k=0; k<4; k++)
00859                         if (TreeMem[last_node].child_weights[k]!=0.0)
00860                             break ;
00861                     if (k==4)
00862                         break ;
00863                     TreeMem[node].seq[n]=k ;
00864                     break ;
00865                 }
00866                 else
00867                 {
00868                     TRIE_ASSERT_EVERYTHING(!TreeMem[last_node].has_floats) ;
00869                     int32_t k ;
00870                     for (k=0; k<4; k++)
00871                         if (TreeMem[last_node].children[k]!=NO_CHILD)
00872                             break ;
00873                     if (k==4)
00874                         break ;
00875                     TreeMem[node].seq[n]=k ;
00876                     last_node=TreeMem[last_node].children[k] ;
00877                 }
00878             }
00879             TreeMem[start_node].children[q]=-node ;
00880         }
00881         return -1 ;
00882     }
00883     if (num_used==0)
00884         return 0 ;
00885 
00886     ret=compact_nodes(abs(TreeMem[start_node].children[q_used]), depth+1, weights) ;
00887     if (ret<0)
00888         return ret ;
00889     return ret+1 ;
00890 }
00891 
00892 
00893     template <class Trie>
00894 bool CTrie<Trie>::compare_traverse(
00895     int32_t node, const CTrie<Trie> & other, int32_t other_node)
00896 {
00897     SG_DEBUG("checking nodes %i and %i\n", node, other_node) ;
00898     if (fabs(TreeMem[node].weight-other.TreeMem[other_node].weight)>=1e-5)
00899     {
00900         SG_DEBUG( "CTrie::compare: TreeMem[%i].weight=%f!=other.TreeMem[%i].weight=%f\n", node, TreeMem[node].weight, other_node,other.TreeMem[other_node].weight) ;
00901         SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;           
00902         display_node(node) ;
00903         SG_DEBUG( "============================================================\n") ;           
00904         other.display_node(other_node) ;
00905         SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;           
00906         return false ;
00907     }
00908 
00909 #ifdef TRIE_CHECK_EVERYTHING
00910     if (TreeMem[node].has_seq!=other.TreeMem[other_node].has_seq)
00911     {
00912         SG_DEBUG( "CTrie::compare: TreeMem[%i].has_seq=%i!=other.TreeMem[%i].has_seq=%i\n", node, TreeMem[node].has_seq, other_node,other.TreeMem[other_node].has_seq) ;
00913         SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;           
00914         display_node(node) ;
00915         SG_DEBUG( "============================================================\n") ;           
00916         other.display_node(other_node) ;
00917         SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;
00918         return false ;
00919     }
00920     if (TreeMem[node].has_floats!=other.TreeMem[other_node].has_floats)
00921     {
00922         SG_DEBUG( "CTrie::compare: TreeMem[%i].has_floats=%i!=other.TreeMem[%i].has_floats=%i\n", node, TreeMem[node].has_floats, other_node, other.TreeMem[other_node].has_floats) ;
00923         return false ;
00924     }
00925     if (other.TreeMem[other_node].has_floats)
00926     {
00927         for (int32_t q=0; q<4; q++)
00928             if (fabs(TreeMem[node].child_weights[q]-other.TreeMem[other_node].child_weights[q])>1e-5)
00929             {
00930                 SG_DEBUG( "CTrie::compare: TreeMem[%i].child_weights[%i]=%e!=other.TreeMem[%i].child_weights[%i]=%e\n", node, q,TreeMem[node].child_weights[q], other_node,q,other.TreeMem[other_node].child_weights[q]) ;
00931                 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;           
00932                 display_node(node) ;
00933                 SG_DEBUG( "============================================================\n") ;           
00934                 other.display_node(other_node) ;
00935                 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;           
00936                 return false ;
00937             }
00938     }
00939     if (other.TreeMem[other_node].has_seq)
00940     {
00941         for (int32_t q=0; q<16; q++)
00942             if ((TreeMem[node].seq[q]!=other.TreeMem[other_node].seq[q]) && ((TreeMem[node].seq[q]<4)||(other.TreeMem[other_node].seq[q]<4)))
00943             {
00944                 SG_DEBUG( "CTrie::compare: TreeMem[%i].seq[%i]=%i!=other.TreeMem[%i].seq[%i]=%i\n", node,q,TreeMem[node].seq[q], other_node,q,other.TreeMem[other_node].seq[q]) ;
00945                 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;           
00946                 display_node(node) ;
00947                 SG_DEBUG( "============================================================\n") ;           
00948                 other.display_node(other_node) ;
00949                 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;           
00950                 return false ;
00951             }
00952     }
00953     if (!other.TreeMem[other_node].has_seq && !other.TreeMem[other_node].has_floats)
00954     {
00955         for (int32_t q=0; q<4; q++)
00956         {
00957             if ((TreeMem[node].children[q]==NO_CHILD) && (other.TreeMem[other_node].children[q]==NO_CHILD))
00958                 continue ;
00959             if ((TreeMem[node].children[q]==NO_CHILD)!=(other.TreeMem[other_node].children[q]==NO_CHILD))
00960             {
00961                 SG_DEBUG( "CTrie::compare: TreeMem[%i].children[%i]=%i!=other.TreeMem[%i].children[%i]=%i\n", node,q,TreeMem[node].children[q], other_node,q,other.TreeMem[other_node].children[q]) ;
00962                 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;           
00963                 display_node(node) ;
00964                 SG_DEBUG( "============================================================\n") ;           
00965                 other.display_node(other_node) ;
00966                 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;
00967                 return false ;
00968             }
00969             if (!compare_traverse(abs(TreeMem[node].children[q]), other, abs(other.TreeMem[other_node].children[q])))
00970                 return false ;
00971         }
00972     }
00973 #else
00974     SG_ERROR( "not implemented\n") ;
00975 #endif
00976 
00977     return true ;
00978 }
00979 
00980     template <class Trie>
00981 bool CTrie<Trie>::compare(const CTrie<Trie> & other)
00982 {
00983     bool ret=true ;
00984     for (int32_t i=0; i<length; i++)
00985         if (!compare_traverse(trees[i], other, other.trees[i]))
00986             return false ;
00987         else
00988             SG_DEBUG("two tries at %i identical\n", i) ;
00989 
00990     return ret ;
00991 }
00992 
00993 template <class Trie>
00994 bool CTrie<Trie>::find_node(
00995     int32_t node, int32_t * trace, int32_t& trace_len) const 
00996 {
00997 #ifdef TRIE_CHECK_EVERYTHING
00998     ASSERT(trace_len-1>=0) ;
00999     ASSERT((trace[trace_len-1]>=0) && (trace[trace_len-1]<TreeMemPtrMax))
01000         if (TreeMem[trace[trace_len-1]].has_seq)
01001             return false ;
01002     if (TreeMem[trace[trace_len-1]].has_floats)
01003         return false ;
01004 
01005     for (int32_t q=0; q<4; q++)
01006     {
01007         if (TreeMem[trace[trace_len-1]].children[q]==NO_CHILD)
01008             continue ;
01009         int32_t tl=trace_len+1 ;
01010         if (TreeMem[trace[trace_len-1]].children[q]>=0)
01011             trace[trace_len]=TreeMem[trace[trace_len-1]].children[q] ;
01012         else
01013             trace[trace_len]=-TreeMem[trace[trace_len-1]].children[q] ;
01014 
01015         if (trace[trace_len]==node)
01016         {
01017             trace_len=tl ;
01018             return true ;
01019         }
01020         if (find_node(node, trace, tl))
01021         {
01022             trace_len=tl ;
01023             return true ;
01024         }
01025     }
01026     trace_len=0 ;
01027     return false ;
01028 #else
01029     SG_ERROR( "not implemented\n") ;
01030     return false ;
01031 #endif
01032 }
01033 
01034 template <class Trie>
01035 void CTrie<Trie>::display_node(int32_t node) const
01036 {
01037 #ifdef TRIE_CHECK_EVERYTHING
01038     int32_t * trace=new int32_t[2*degree] ;
01039     int32_t trace_len=-1 ;
01040     bool found = false ;
01041     int32_t tree=-1 ;
01042     for (tree=0; tree<length; tree++)
01043     {
01044         trace[0]=trees[tree] ;
01045         trace_len=1 ;
01046         found=find_node(node, trace, trace_len) ;
01047         if (found)
01048             break ;
01049     }
01050     ASSERT(found) ;
01051     SG_PRINT( "position %i  trace: ", tree) ;
01052 
01053     for (int32_t i=0; i<trace_len-1; i++)
01054     {
01055         int32_t branch=-1 ;
01056         for (int32_t q=0; q<4; q++)
01057             if (abs(TreeMem[trace[i]].children[q])==trace[i+1])
01058             {
01059                 branch=q;
01060                 break ;
01061             }
01062         ASSERT(branch!=-1) ;
01063         char acgt[5]="ACGT" ;
01064         SG_PRINT( "%c", acgt[branch]) ;
01065     }
01066     SG_PRINT( "\nnode=%i\nweight=%f\nhas_seq=%i\nhas_floats=%i\n", node, TreeMem[node].weight, TreeMem[node].has_seq, TreeMem[node].has_floats) ;
01067     if (TreeMem[node].has_floats)
01068     {
01069         for (int32_t q=0; q<4; q++)
01070             SG_PRINT( "child_weighs[%i] = %f\n", q, TreeMem[node].child_weights[q]) ;
01071     }
01072     if (TreeMem[node].has_seq)
01073     {
01074         for (int32_t q=0; q<16; q++)
01075             SG_PRINT( "seq[%i] = %i\n", q, TreeMem[node].seq[q]) ;
01076     }
01077     if (!TreeMem[node].has_seq && !TreeMem[node].has_floats)
01078     {
01079         for (int32_t q=0; q<4; q++)
01080         {
01081             if (TreeMem[node].children[q]!=NO_CHILD)
01082             {
01083                 SG_PRINT( "children[%i] = %i -> \n", q, TreeMem[node].children[q]) ;
01084                 display_node(abs(TreeMem[node].children[q])) ;
01085             }
01086             else
01087                 SG_PRINT( "children[%i] = NO_CHILD -| \n", q, TreeMem[node].children[q]) ;
01088         }
01089 
01090     }
01091 
01092     delete[] trace ;
01093 #else
01094     SG_ERROR( "not implemented\n") ;
01095 #endif
01096 }
01097 
01098 
01099 template <class Trie> CTrie<Trie>::~CTrie()
01100 {
01101     destroy() ;
01102 
01103     free(TreeMem) ;
01104 }
01105 
01106 template <class Trie> void CTrie<Trie>::destroy()
01107 {
01108     if (trees!=NULL)
01109     {
01110         delete_trees();
01111         for (int32_t i=0; i<length; i++)
01112             trees[i] = NO_CHILD;
01113         delete[] trees;
01114 
01115         TreeMemPtr=0;
01116         length=0;
01117         trees=NULL;
01118     }
01119 }
01120 
01121 template <class Trie> void CTrie<Trie>::set_degree(int32_t d)
01122 {
01123     delete_trees(get_use_compact_terminal_nodes());
01124     degree=d;
01125 }
01126 
01127 template <class Trie> void CTrie<Trie>::create(
01128     int32_t len, bool p_use_compact_terminal_nodes)
01129 {
01130     destroy();
01131 
01132     trees=new int32_t[len] ;        
01133     TreeMemPtr=0 ;
01134     for (int32_t i=0; i<len; i++)
01135         trees[i]=get_node(degree==1);
01136     length = len ;
01137 
01138     use_compact_terminal_nodes=p_use_compact_terminal_nodes ;
01139 }
01140 
01141 
01142 template <class Trie> void CTrie<Trie>::delete_trees(
01143     bool p_use_compact_terminal_nodes)
01144 {
01145     if (trees==NULL)
01146         return;
01147 
01148     TreeMemPtr=0 ;
01149     for (int32_t i=0; i<length; i++)
01150         trees[i]=get_node(degree==1);
01151 
01152     use_compact_terminal_nodes=p_use_compact_terminal_nodes ;
01153 } 
01154 
01155     template <class Trie>
01156 float64_t CTrie<Trie>::compute_abs_weights_tree(int32_t tree, int32_t depth)
01157 {
01158     float64_t ret=0 ;
01159 
01160     if (tree==NO_CHILD)
01161         return 0 ;
01162     TRIE_ASSERT(tree>=0) ;
01163 
01164     if (depth==degree-2)
01165     {
01166         ret+=(TreeMem[tree].weight) ;
01167 
01168         for (int32_t k=0; k<4; k++)
01169             ret+=(TreeMem[tree].child_weights[k]) ;
01170 
01171         return ret ;
01172     }
01173 
01174     ret+=(TreeMem[tree].weight) ;
01175 
01176     for (int32_t i=0; i<4; i++)
01177         if (TreeMem[tree].children[i]!=NO_CHILD)
01178             ret += compute_abs_weights_tree(TreeMem[tree].children[i], depth+1)  ;
01179 
01180     return ret ;
01181 }
01182 
01183 
01184     template <class Trie>
01185 float64_t *CTrie<Trie>::compute_abs_weights(int32_t &len)
01186 {
01187     float64_t * sum=new float64_t[length*4] ;
01188     for (int32_t i=0; i<length*4; i++)
01189         sum[i]=0 ;
01190     len=length ;
01191 
01192     for (int32_t i=0; i<length; i++)
01193     {
01194         TRIE_ASSERT(trees[i]!=NO_CHILD) ;
01195         for (int32_t k=0; k<4; k++)
01196         {
01197             sum[i*4+k]=compute_abs_weights_tree(TreeMem[trees[i]].children[k], 0) ;
01198         }
01199     }
01200 
01201     return sum ;
01202 }
01203 
01204     template <class Trie>
01205 void CTrie<Trie>::add_example_to_tree_mismatch_recursion(
01206     int32_t tree,  int32_t i, float64_t alpha,
01207         int32_t *vec, int32_t len_rem, 
01208         int32_t degree_rec, int32_t mismatch_rec, 
01209         int32_t max_mismatch, float64_t * weights) 
01210 {
01211     if (tree==NO_CHILD)
01212         tree=trees[i] ;
01213     TRIE_ASSERT(tree!=NO_CHILD) ;
01214 
01215     if ((len_rem<=0) || (mismatch_rec>max_mismatch) || (degree_rec>degree))
01216         return ;
01217     const int32_t other[4][3] = {   {1,2,3},{0,2,3},{0,1,3},{0,1,2} } ;
01218 
01219     int32_t subtree = NO_CHILD ;
01220 
01221     if (degree_rec==degree-1)
01222     {
01223         TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats) ;
01224         if (weights_in_tree)
01225             TreeMem[tree].child_weights[vec[0]] += alpha*weights[degree_rec+degree*mismatch_rec];
01226         else
01227             if (weights[degree_rec]!=0.0)
01228                 TreeMem[tree].child_weights[vec[0]] += alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec];
01229         if (mismatch_rec+1<=max_mismatch)
01230             for (int32_t o=0; o<3; o++)
01231             {
01232                 if (weights_in_tree)
01233                     TreeMem[tree].child_weights[other[vec[0]][o]] += alpha*weights[degree_rec+degree*(mismatch_rec+1)];
01234                 else
01235                     if (weights[degree_rec]!=0.0)
01236                         TreeMem[tree].child_weights[other[vec[0]][o]] += alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec];
01237             }
01238         return ;
01239     }
01240     else
01241     {
01242         TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01243         if (TreeMem[tree].children[vec[0]]!=NO_CHILD)
01244         {
01245             subtree=TreeMem[tree].children[vec[0]] ;
01246             if (weights_in_tree)
01247                 TreeMem[subtree].weight += alpha*weights[degree_rec+degree*mismatch_rec];
01248             else
01249                 if (weights[degree_rec]!=0.0)
01250                     TreeMem[subtree].weight += alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec];
01251         }
01252         else 
01253         {
01254             int32_t tmp = get_node(degree_rec==degree-2);
01255             ASSERT(tmp>=0) ;
01256             TreeMem[tree].children[vec[0]]=tmp ;
01257             subtree=tmp ;
01258 #ifdef TRIE_CHECK_EVERYTHING
01259             if (degree_rec==degree-2)
01260                 TreeMem[subtree].has_floats=true ;
01261 #endif
01262             if (weights_in_tree)
01263                 TreeMem[subtree].weight = alpha*weights[degree_rec+degree*mismatch_rec] ;
01264             else
01265                 if (weights[degree_rec]!=0.0)
01266                     TreeMem[subtree].weight = alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec] ;
01267                 else
01268                     TreeMem[subtree].weight = 0.0 ;
01269         }
01270         add_example_to_tree_mismatch_recursion(subtree,  i, alpha,
01271                 &vec[1], len_rem-1, 
01272                 degree_rec+1, mismatch_rec, max_mismatch, weights) ;
01273 
01274         if (mismatch_rec+1<=max_mismatch)
01275         {
01276             TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01277             for (int32_t o=0; o<3; o++)
01278             {
01279                 int32_t ot = other[vec[0]][o] ;
01280                 if (TreeMem[tree].children[ot]!=NO_CHILD)
01281                 {
01282                     subtree=TreeMem[tree].children[ot] ;
01283                     if (weights_in_tree)
01284                         TreeMem[subtree].weight += alpha*weights[degree_rec+degree*(mismatch_rec+1)];
01285                     else
01286                         if (weights[degree_rec]!=0.0)
01287                             TreeMem[subtree].weight += alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec];
01288                 }
01289                 else 
01290                 {
01291                     int32_t tmp = get_node(degree_rec==degree-2);
01292                     ASSERT(tmp>=0) ;
01293                     TreeMem[tree].children[ot]=tmp ;
01294                     subtree=tmp ;
01295 #ifdef TRIE_CHECK_EVERYTHING
01296                     if (degree_rec==degree-2)
01297                         TreeMem[subtree].has_floats=true ;
01298 #endif
01299 
01300                     if (weights_in_tree)
01301                         TreeMem[subtree].weight = alpha*weights[degree_rec+degree*(mismatch_rec+1)] ;
01302                     else
01303                         if (weights[degree_rec]!=0.0)
01304                             TreeMem[subtree].weight = alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec] ;
01305                         else
01306                             TreeMem[subtree].weight = 0.0 ;
01307                 }
01308 
01309                 add_example_to_tree_mismatch_recursion(subtree,  i, alpha,
01310                         &vec[1], len_rem-1, 
01311                         degree_rec+1, mismatch_rec+1, max_mismatch, weights) ;
01312             }
01313         }
01314     }
01315 }
01316 
01317     template <class Trie>
01318 void CTrie<Trie>::compute_scoring_helper(
01319     int32_t tree, int32_t i, int32_t j, float64_t weight, int32_t d,
01320     int32_t max_degree, int32_t num_feat, int32_t num_sym, int32_t sym_offset,
01321     int32_t offs, float64_t* result)
01322 {
01323     if (i+j<num_feat)
01324     {
01325         float64_t decay=1.0; //no decay by default
01326         //if (j>d)
01327         //  decay=pow(0.5,j); //marginalize out lower order matches
01328 
01329         if (j<degree-1)
01330         {
01331             for (int32_t k=0; k<num_sym; k++)
01332             {
01333                 if (TreeMem[tree].children[k]!=NO_CHILD)
01334                 {
01335                     int32_t child=TreeMem[tree].children[k];
01336                     //continue recursion if not yet at max_degree, else add to result
01337                     if (d<max_degree-1)
01338                         compute_scoring_helper(child, i, j+1, weight+decay*TreeMem[child].weight, d+1, max_degree, num_feat, num_sym, sym_offset, num_sym*offs+k, result);
01339                     else
01340                         result[sym_offset*(i+j-max_degree+1)+num_sym*offs+k] += weight+decay*TreeMem[child].weight;
01341 
01343                     if (d==0)
01344                         compute_scoring_helper(child, i, j+1, 0.0, 0, max_degree, num_feat, num_sym, sym_offset, offs, result);
01345                 }
01346             }
01347         }
01348         else if (j==degree-1)
01349         {
01350             for (int32_t k=0; k<num_sym; k++)
01351             {
01352                 //continue recursion if not yet at max_degree, else add to result
01353                 if (d<max_degree-1 && i<num_feat-1)
01354                     compute_scoring_helper(trees[i+1], i+1, 0, weight+decay*TreeMem[tree].child_weights[k], d+1, max_degree, num_feat, num_sym, sym_offset, num_sym*offs+k, result);
01355                 else
01356                     result[sym_offset*(i+j-max_degree+1)+num_sym*offs+k] += weight+decay*TreeMem[tree].child_weights[k];
01357             }
01358         }
01359     }
01360 }
01361 
01362     template <class Trie>
01363 void CTrie<Trie>::traverse(
01364     int32_t tree, const int32_t p, struct TreeParseInfo info,
01365     const int32_t depth, int32_t* const x, const int32_t k)
01366 {
01367     const int32_t num_sym = info.num_sym;
01368     const int32_t y0 = info.y0;
01369     const int32_t y1 = (k==0) ? 0 : y0 - ( (depth<k) ? 0 : info.nofsKmers[k-1] * x[depth-k] );
01370     //const int32_t temp = info.substrs[depth]*num_sym - ( (depth<=k) ? 0 : info.nofsKmers[k] * x[depth-k-1] );
01371     //if( !( info.y0 == temp ) ) {
01372     //  printf( "\n temp=%d y0=%d k=%d depth=%d \n", temp, info.y0, k, depth );
01373     //}
01374     //ASSERT( info.y0 == temp );
01375     int32_t sym;
01376     ASSERT( depth < degree );
01377     //ASSERT( 0 <= info.substrs[depth] && info.substrs[depth] < info.nofsKmers[k] );
01378     if (depth<degree-1)
01379     {
01380         for( sym=0; sym<num_sym; ++sym ) {
01381             const int32_t childNum = TreeMem[tree].children[ sym ];
01382             if( childNum != NO_CHILD ) {
01383                 int32_t child = childNum ;
01384                 x[depth] = sym;
01385                 info.substrs[depth+1] = y0 + sym;
01386                 info.y0 = (k==0) ? 0 : (y1+sym)*num_sym;
01387                 //ASSERT( info.y0 == ( info.substrs[depth+1]*num_sym - ( (depth<k) ? 0 : info.nofsKmers[k] * x[depth-k] ) ) );
01388                 count( TreeMem[child].weight, depth, info, p, x, k );
01389                 traverse( child, p, info, depth+1, x, k );
01390                 x[depth] = -1;
01391             }
01392         }
01393     }
01394     else if( depth == degree-1 )
01395     {
01396         for( sym=0; sym<num_sym; ++sym ) {
01397             const float64_t w = TreeMem[tree].child_weights[ sym ];
01398             if( w != 0.0 ) {
01399                 x[depth] = sym;
01400                 info.substrs[depth+1] = y0 + sym;
01401                 info.y0 = (k==0) ? 0 : (y1+sym)*num_sym;
01402                 //ASSERT( info.y0 == ( info.substrs[depth+1]*num_sym - ( (depth<k) ? 0 : info.nofsKmers[k] * x[depth-k] ) ) );
01403                 count( w, depth, info, p, x, k );
01404                 x[depth] = -1;
01405             }
01406         }
01407     }
01408     //info.substrs[depth+1] = -1;
01409     //info.y0 = temp;
01410 }
01411 
01412     template <class Trie>
01413 void CTrie<Trie>::count(
01414     const float64_t w, const int32_t depth, const struct TreeParseInfo info,
01415     const int32_t p, int32_t* x, const int32_t k)
01416 {
01417     ASSERT( fabs(w) < 1e10 );
01418     ASSERT( x[depth] >= 0 );
01419     ASSERT( x[depth+1] < 0 );
01420     if ( depth < k ) {
01421         return;
01422     }
01423     //ASSERT( info.margFactors[ depth-k ] == pow( 0.25, depth-k ) );
01424     const int32_t nofKmers = info.nofsKmers[k];
01425     const float64_t margWeight =  w * info.margFactors[ depth-k ];
01426     const int32_t m_a = depth - k + 1;
01427     const int32_t m_b = info.num_feat - p;
01428     const int32_t m = ( m_a < m_b ) ? m_a : m_b;
01429     // all proper k-substrings
01430     const int32_t offset0 = nofKmers * p;
01431     register int32_t i;
01432     register int32_t offset;
01433     offset = offset0;
01434     for( i = 0; i < m; ++i ) {
01435         const int32_t y = info.substrs[i+k+1];
01436         info.C_k[ y + offset ] += margWeight;
01437         offset += nofKmers;
01438     }
01439     if( depth > k ) {
01440         // k-prefix
01441         const int32_t offsR = info.substrs[k+1] + offset0;
01442         info.R_k[offsR] += margWeight;
01443         // k-suffix
01444         if( p+depth-k < info.num_feat ) {
01445             const int32_t offsL = info.substrs[depth+1] + nofKmers * (p+depth-k);
01446             info.L_k[offsL] += margWeight; 
01447         }
01448     }
01449     //    # N.x = substring represented by N
01450     //    # N.d = length of N.x
01451     //    # N.s = starting position of N.x
01452     //    # N.w = weight for feature represented by N
01453     //    if( N.d >= k )
01454     //      margContrib = w / 4^(N.d-k)
01455     //      for i = 1 to (N.d-k+1)
01456     //        y = N.x[i:(i+k-1)]  # overlapped k-mer
01457     //        C_k[ N.s+i-1, y ] += margContrib
01458     //      end;
01459     //      if( N.d > k )
01460     //        L_k[ N.s+N.d-k, N.x[N.d-k+(1:k)] ] += margContrib  # j-suffix of N.x
01461     //        R_k[ N.s,       N.x[1:k]         ] += margContrib  # j-prefix of N.x
01462     //      end;
01463     //    end;
01464 }
01465 
01466     template <class Trie>
01467 void CTrie<Trie>::add_to_trie(
01468     int32_t i, int32_t seq_offset, int32_t * vec, float32_t alpha,
01469     float64_t *weights, bool degree_times_position_weights)
01470 {
01471     int32_t tree = trees[i] ;
01472     //ASSERT(seq_offset==0) ;
01473 
01474     int32_t max_depth = 0 ;
01475     float64_t* weights_column ;
01476     if (degree_times_position_weights)
01477         weights_column = &weights[(i+seq_offset)*degree] ;
01478     else
01479         weights_column = weights ;
01480 
01481     if (weights_in_tree)
01482     {
01483         for (int32_t j=0; (j<degree) && (i+j<length); j++)
01484             if (CMath::abs(weights_column[j]*alpha)>0)
01485                 max_depth = j+1 ;
01486     }
01487     else
01488         // don't use the weights
01489         max_depth=degree ;
01490 
01491     for (int32_t j=0; (j<max_depth) && (i+j+seq_offset<length); j++)
01492     {
01493         TRIE_ASSERT((vec[i+j+seq_offset]>=0) && (vec[i+j+seq_offset]<4)) ;
01494         if ((j<degree-1) && (TreeMem[tree].children[vec[i+j+seq_offset]]!=NO_CHILD))
01495         {
01496             if (TreeMem[tree].children[vec[i+j+seq_offset]]<0)
01497             {
01498                 // special treatment of the next nodes
01499                 TRIE_ASSERT(j >= degree-16) ;
01500                 // get the right element
01501                 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01502                 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01503                 int32_t node = - TreeMem[tree].children[vec[i+j+seq_offset]] ;
01504 
01505                 TRIE_ASSERT((node>=0) && (node<=TreeMemPtrMax)) ;
01506                 TRIE_ASSERT_EVERYTHING(TreeMem[node].has_seq) ;
01507                 TRIE_ASSERT_EVERYTHING(!TreeMem[node].has_floats) ;
01508 
01509                 // check whether the same string is stored
01510                 int32_t mismatch_pos = -1 ;
01511                 {
01512                     int32_t k ;
01513                     for (k=0; (j+k<max_depth) && (i+j+seq_offset+k<length); k++)
01514                     {
01515                         TRIE_ASSERT((vec[i+j+seq_offset+k]>=0) && (vec[i+j+seq_offset+k]<4)) ;
01516                         // ###
01517                         if ((TreeMem[node].seq[k]>=4) && (TreeMem[node].seq[k]!=TRIE_TERMINAL_CHARACTER))
01518                             fprintf(stderr, "+++i=%i j=%i seq[%i]=%i\n", i, j, k, TreeMem[node].seq[k]) ;
01519                         TRIE_ASSERT((TreeMem[node].seq[k]<4) || (TreeMem[node].seq[k]==TRIE_TERMINAL_CHARACTER)) ;
01520                         TRIE_ASSERT(k<16) ;
01521                         if (TreeMem[node].seq[k]!=vec[i+j+seq_offset+k])
01522                         {
01523                             mismatch_pos=k ;
01524                             break ;
01525                         }
01526                     }
01527                 }
01528 
01529                 // what happens when the .seq sequence is longer than vec? should we branch???
01530 
01531                 if (mismatch_pos==-1)
01532                     // if so, then just increase the weight by alpha and stop
01533                     TreeMem[node].weight+=alpha ;
01534                 else
01535                     // otherwise
01536                     // 1. replace current node with new node
01537                     // 2. create new nodes until mismatching positon
01538                     // 2. add a branch with old string (old node) and the new string (new node)
01539                 {
01540                     // replace old node
01541                     int32_t last_node=tree ;
01542 
01543                     // create new nodes until mismatch
01544                     int32_t k ;
01545                     for (k=0; k<mismatch_pos; k++)
01546                     {
01547                         TRIE_ASSERT((vec[i+j+seq_offset+k]>=0) && (vec[i+j+seq_offset+k]<4)) ;
01548                         TRIE_ASSERT(vec[i+j+seq_offset+k]==TreeMem[node].seq[k]) ;
01549 
01550                         int32_t tmp=get_node();
01551                         TreeMem[last_node].children[vec[i+j+seq_offset+k]]=tmp ;
01552                         last_node=tmp ;
01553                         if (weights_in_tree)
01554                             TreeMem[last_node].weight = (TreeMem[node].weight+alpha)*weights_column[j+k] ;
01555                         else
01556                             TreeMem[last_node].weight = (TreeMem[node].weight+alpha) ;
01557                         TRIE_ASSERT(j+k!=degree-1) ;
01558                     }
01559                     if ((TreeMem[node].seq[mismatch_pos]>=4) && (TreeMem[node].seq[mismatch_pos]!=TRIE_TERMINAL_CHARACTER))
01560                         fprintf(stderr, "**i=%i j=%i seq[%i]=%i\n", i, j, k, TreeMem[node].seq[mismatch_pos]) ;
01561                     ASSERT((TreeMem[node].seq[mismatch_pos]<4) || (TreeMem[node].seq[mismatch_pos]==TRIE_TERMINAL_CHARACTER)) ;
01562                     TRIE_ASSERT(vec[i+j+seq_offset+mismatch_pos]!=TreeMem[node].seq[mismatch_pos]) ;
01563 
01564                     if (j+k==degree-1)
01565                     {
01566                         // init child weights with zero if after dropping out
01567                         // of the k<mismatch_pos loop we are one level below degree
01568                         // (keep this even after get_node() change!)
01569                         for (int32_t q=0; q<4; q++)
01570                             TreeMem[last_node].child_weights[q]=0.0 ;
01571                         if (weights_in_tree)
01572                         {
01573                             if (TreeMem[node].seq[mismatch_pos]<4) // i.e. !=TRIE_TERMINAL_CHARACTER
01574                                 TreeMem[last_node].child_weights[TreeMem[node].seq[mismatch_pos]]+=TreeMem[node].weight*weights_column[degree-1] ;
01575                             TreeMem[last_node].child_weights[vec[i+j+seq_offset+k]] += alpha*weights_column[degree-1] ;
01576                         }
01577                         else
01578                         {
01579                             if (TreeMem[node].seq[mismatch_pos]<4) // i.e. !=TRIE_TERMINAL_CHARACTER
01580                                 TreeMem[last_node].child_weights[TreeMem[node].seq[mismatch_pos]]=TreeMem[node].weight ;
01581                             TreeMem[last_node].child_weights[vec[i+j+seq_offset+k]] = alpha ;
01582                         }
01583 
01584 #ifdef TRIE_CHECK_EVERYTHING
01585                         TreeMem[last_node].has_floats=true ;
01586 #endif
01587                     }
01588                     else
01589                     {
01590                         // the branch for the existing string
01591                         if (TreeMem[node].seq[mismatch_pos]<4) // i.e. !=TRIE_TERMINAL_CHARACTER
01592                         {
01593                             TreeMem[last_node].children[TreeMem[node].seq[mismatch_pos]] = -node ;
01594 
01595                             // move string by mismatch_pos positions
01596                             for (int32_t q=0; q<16; q++)
01597                             {
01598                                 if ((j+q+mismatch_pos<degree) && (i+j+seq_offset+q+mismatch_pos<length))
01599                                     TreeMem[node].seq[q] = TreeMem[node].seq[q+mismatch_pos] ;
01600                                 else
01601                                     TreeMem[node].seq[q] = TRIE_TERMINAL_CHARACTER ;
01602                             }
01603 #ifdef TRIE_CHECK_EVERYTHING
01604                             TreeMem[node].has_seq=true ;
01605 #endif
01606                         }
01607 
01608                         // the new branch
01609                         TRIE_ASSERT((vec[i+j+seq_offset+mismatch_pos]>=0) && (vec[i+j+seq_offset+mismatch_pos]<4)) ;
01610                         int32_t tmp = get_node() ;
01611                         TreeMem[last_node].children[vec[i+j+seq_offset+mismatch_pos]] = -tmp ;
01612                         last_node=tmp ;
01613 
01614                         TreeMem[last_node].weight = alpha ;
01615 #ifdef TRIE_CHECK_EVERYTHING
01616                         TreeMem[last_node].has_seq = true ;
01617 #endif
01618                         memset(TreeMem[last_node].seq, TRIE_TERMINAL_CHARACTER, 16) ;
01619                         for (int32_t q=0; (j+q+mismatch_pos<degree) && (i+j+seq_offset+q+mismatch_pos<length); q++)
01620                             TreeMem[last_node].seq[q] = vec[i+j+seq_offset+mismatch_pos+q] ;
01621                     }
01622                 }
01623                 break ;
01624             } 
01625             else
01626             {
01627                 tree=TreeMem[tree].children[vec[i+j+seq_offset]] ;
01628                 TRIE_ASSERT((tree>=0) && (tree<TreeMemPtrMax)) ;
01629                 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01630                 if (weights_in_tree)
01631                     TreeMem[tree].weight += alpha*weights_column[j];
01632                 else
01633                     TreeMem[tree].weight += alpha ;
01634             }
01635         }
01636         else if (j==degree-1)
01637         {
01638             // special treatment of the last node
01639             TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01640             TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats) ;
01641             if (weights_in_tree)
01642                 TreeMem[tree].child_weights[vec[i+j+seq_offset]] += alpha*weights_column[j] ;
01643             else
01644                 TreeMem[tree].child_weights[vec[i+j+seq_offset]] += alpha;
01645 
01646             break;
01647         }
01648         else
01649         {
01650             bool use_seq = use_compact_terminal_nodes && (j>degree-16) ;
01651             TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01652             TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01653 
01654             int32_t tmp = get_node((j==degree-2) && (!use_seq));
01655             if (use_seq)
01656                 TreeMem[tree].children[vec[i+j+seq_offset]] = -tmp ;
01657             else
01658                 TreeMem[tree].children[vec[i+j+seq_offset]] = tmp ;
01659             tree=tmp ;
01660 
01661             TRIE_ASSERT((tree>=0) && (tree<TreeMemPtrMax)) ;
01662 #ifdef TRIE_CHECK_EVERYTHING
01663             TreeMem[tree].has_seq = use_seq ;
01664 #endif
01665             if (use_seq)
01666             {
01667                 TreeMem[tree].weight = alpha ;
01668                 // important to have the terminal characters (see ###)
01669                 memset(TreeMem[tree].seq, TRIE_TERMINAL_CHARACTER, 16) ;
01670                 for (int32_t q=0; (j+q<degree) && (i+j+seq_offset+q<length); q++)
01671                 {
01672                     TRIE_ASSERT(q<16) ;
01673                     TreeMem[tree].seq[q]=vec[i+j+seq_offset+q] ;
01674                 }
01675                 break ;
01676             }
01677             else
01678             {
01679                 if (weights_in_tree)
01680                     TreeMem[tree].weight = alpha*weights_column[j] ;
01681                 else
01682                     TreeMem[tree].weight = alpha ;
01683 #ifdef TRIE_CHECK_EVERYTHING
01684                 if (j==degree-2)
01685                     TreeMem[tree].has_floats = true ;
01686 #endif
01687             }
01688         }
01689     }
01690 }
01691 
01692     template <class Trie>
01693 float64_t CTrie<Trie>::compute_by_tree_helper(
01694     int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
01695         int32_t weight_pos, float64_t* weights, 
01696         bool degree_times_position_weights)
01697 {
01698     int32_t tree = trees[tree_pos] ;
01699 
01700     if ((position_weights!=NULL) && (position_weights[weight_pos]==0))
01701         return 0.0;
01702 
01703     float64_t *weights_column=NULL ;
01704     if (degree_times_position_weights)
01705         weights_column=&weights[weight_pos*degree] ;
01706     else // weights is a vector (1 x degree)
01707         weights_column=weights ;
01708 
01709     float64_t sum=0 ;
01710     for (int32_t j=0; seq_pos+j < len; j++)
01711     {
01712         TRIE_ASSERT((vec[seq_pos+j]<4) && (vec[seq_pos+j]>=0)) ;
01713 
01714         if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01715         {
01716             TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01717             if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01718             {
01719                 tree = - TreeMem[tree].children[vec[seq_pos+j]];
01720                 TRIE_ASSERT(tree>=0) ;
01721                 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
01722                 float64_t this_weight=0.0 ;
01723                 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01724                 {
01725                     TRIE_ASSERT((vec[seq_pos+j+k]<4) && (vec[seq_pos+j+k]>=0)) ;
01726                     if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01727                         break ;
01728                     this_weight += weights_column[j+k] ;
01729                 }
01730                 sum += TreeMem[tree].weight * this_weight ;
01731                 break ;
01732             }
01733             else
01734             {
01735                 tree=TreeMem[tree].children[vec[seq_pos+j]];
01736                 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01737                 if (weights_in_tree)
01738                     sum += TreeMem[tree].weight ;
01739                 else
01740                     sum += TreeMem[tree].weight * weights_column[j] ;
01741             } ;
01742         }
01743         else
01744         {
01745             TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01746             if (j==degree-1)
01747             {
01748                 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats) ;
01749                 if (weights_in_tree)
01750                     sum += TreeMem[tree].child_weights[vec[seq_pos+j]] ;
01751                 else
01752                     sum += TreeMem[tree].child_weights[vec[seq_pos+j]] * weights_column[j] ;
01753             }
01754             else
01755                 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01756 
01757             break;
01758         }
01759     } 
01760 
01761     if (position_weights!=NULL)
01762         return sum*position_weights[weight_pos] ;
01763     else
01764         return sum ;
01765 }
01766 
01767     template <class Trie>
01768 void CTrie<Trie>::compute_by_tree_helper(
01769     int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
01770     int32_t weight_pos, float64_t* LevelContrib, float64_t factor,
01771     int32_t mkl_stepsize, float64_t * weights,
01772     bool degree_times_position_weights)
01773 {
01774     int32_t tree = trees[tree_pos] ;
01775     if (factor==0)
01776         return ;
01777 
01778     if (position_weights!=NULL)
01779     {
01780         factor *= position_weights[weight_pos] ;
01781         if (factor==0)
01782             return ;
01783         if (!degree_times_position_weights) // with position_weigths, weights is a vector (1 x degree)
01784         {
01785             for (int32_t j=0; seq_pos+j<len; j++)
01786             {
01787                 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01788                 {
01789                     if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01790                     {
01791                         tree = -TreeMem[tree].children[vec[seq_pos+j]];
01792                         TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
01793                         for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01794                         {
01795                             if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01796                                 break ;
01797                             if (weights_in_tree)
01798                                 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
01799                             else
01800                                 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k] ;
01801                         }
01802                         break ;
01803                     }
01804                     else
01805                     {
01806                         tree=TreeMem[tree].children[vec[seq_pos+j]];
01807                         TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01808                         if (weights_in_tree)
01809                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
01810                         else
01811                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j] ;
01812                     }
01813                 } 
01814                 else
01815                 {
01816                     TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01817                     if (j==degree-1)
01818                     {
01819                         if (weights_in_tree)
01820                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ;
01821                         else
01822                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j] ;
01823                     }
01824                 }
01825             }
01826         } 
01827         else // with position_weigths, weights is a matrix (len x degree)
01828         {
01829             for (int32_t j=0; seq_pos+j<len; j++)
01830             {
01831                 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01832                 {
01833                     if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01834                     {
01835                         tree = -TreeMem[tree].children[vec[seq_pos+j]];
01836                         TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
01837                         for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01838                         {
01839                             if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01840                                 break ;
01841                             if (weights_in_tree)
01842                                 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
01843                             else
01844                                 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k+weight_pos*degree] ;
01845                         }
01846                         break ;
01847                     }
01848                     else
01849                     {
01850                         tree=TreeMem[tree].children[vec[seq_pos+j]];
01851                         TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01852                         if (weights_in_tree)
01853                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
01854                         else
01855                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+weight_pos*degree] ;
01856                     }
01857                 } 
01858                 else
01859                 {
01860                     TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01861                     if (j==degree-1)
01862                     {
01863                         if (weights_in_tree)
01864                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ;
01865                         else
01866                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j+weight_pos*degree] ;
01867                     }         
01868                     break ;
01869                 }
01870             }
01871         } 
01872     }
01873     else if (!degree_times_position_weights) // no position_weigths, weights is a vector (1 x degree)
01874     {
01875         for (int32_t j=0; seq_pos+j<len; j++)
01876         {
01877             if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01878             {
01879                 if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01880                 {
01881                     tree = -TreeMem[tree].children[vec[seq_pos+j]];
01882                     TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
01883                     for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01884                     {
01885                         if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01886                             break ;
01887                         if (weights_in_tree)
01888                             LevelContrib[(j+k)/mkl_stepsize] += factor*TreeMem[tree].weight ;
01889                         else
01890                             LevelContrib[(j+k)/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k] ;
01891                     }
01892                     break ;
01893                 }
01894                 else
01895                 {
01896                     tree=TreeMem[tree].children[vec[seq_pos+j]];
01897                     TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01898                     if (weights_in_tree)
01899                         LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].weight ;
01900                     else
01901                         LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j] ;
01902                 }
01903             }
01904             else
01905             {
01906                 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01907                 if (j==degree-1)
01908                 {
01909                     if (weights_in_tree)
01910                         LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ;
01911                     else
01912                         LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j] ;
01913                 }
01914                 break ;
01915             }
01916         } 
01917     } 
01918     else // no position_weigths, weights is a matrix (len x degree)
01919     {
01920         /*if (!position_mask)
01921           {     
01922           position_mask = new bool[len] ;
01923           for (int32_t i=0; i<len; i++)
01924           {
01925           position_mask[i]=false ;
01926 
01927           for (int32_t j=0; j<degree; j++)
01928           if (weights[i*degree+j]!=0.0)
01929           {
01930           position_mask[i]=true ;
01931           break ;
01932           }
01933           }
01934           }
01935           if (position_mask[weight_pos]==0)
01936           return ;*/
01937 
01938         for (int32_t j=0; seq_pos+j<len; j++)
01939         {
01940             if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01941             {
01942                 if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01943                 {
01944                     tree = -TreeMem[tree].children[vec[seq_pos+j]];
01945                     TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
01946                     for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01947                     {
01948                         if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01949                             break ;
01950                         if (weights_in_tree)
01951                             LevelContrib[(j+k+degree*weight_pos)/mkl_stepsize] += factor*TreeMem[tree].weight ;
01952                         else
01953                             LevelContrib[(j+k+degree*weight_pos)/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k+weight_pos*degree] ;
01954                     }
01955                     break ;
01956                 }
01957                 else
01958                 {
01959                     tree=TreeMem[tree].children[vec[seq_pos+j]];
01960                     TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01961                     if (weights_in_tree)
01962                         LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].weight ;
01963                     else
01964                         LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].weight * weights[j+weight_pos*degree] ;
01965                 } 
01966             }
01967             else
01968             {
01969                 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01970                 if (j==degree-1)
01971                 {
01972                     if (weights_in_tree)
01973                         LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].child_weights[vec[seq_pos+j]] ;
01974                     else
01975                         LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].child_weights[vec[seq_pos+j]] * weights[j+weight_pos*degree] ;
01976                 }
01977                 break ;
01978             }
01979         } 
01980     }
01981 }
01982 
01983     template <class Trie>
01984 void CTrie<Trie>::fill_backtracking_table_recursion(
01985     Trie* tree, int32_t depth, uint64_t seq, float64_t value,
01986     CDynamicArray<ConsensusEntry>* table, float64_t* weights)
01987 {
01988     float64_t w=1.0;
01989 
01990     if (weights_in_tree || depth==0)
01991         value+=tree->weight;
01992     else
01993     {
01994         w=weights[degree-1];
01995         value+=weights[depth-1]*tree->weight;
01996     }
01997 
01998     if (degree-1==depth)
01999     {
02000         for (int32_t sym=0; sym<4; sym++)
02001         {
02002             float64_t v=w*tree->child_weights[sym];
02003             if (v!=0.0)
02004             {
02005                 ConsensusEntry entry;
02006                 entry.bt=-1;
02007                 entry.score=value+v;
02008                 entry.string=seq | ((uint64_t) sym) << (2*(degree-depth-1));
02009 
02010                 table->append_element(entry);
02011             }
02012         }
02013     }
02014     else
02015     {
02016         for (int32_t sym=0; sym<4; sym++)
02017         {
02018             uint64_t str=seq | ((uint64_t) sym) << (2*(degree-depth-1));
02019             if (tree->children[sym] != NO_CHILD)
02020                 fill_backtracking_table_recursion(&TreeMem[tree->children[sym]], depth+1, str, value, table, weights);
02021         }
02022     }
02023 }
02024 
02025     template <class Trie>
02026 float64_t CTrie<Trie>::get_cumulative_score(
02027     int32_t pos, uint64_t seq, int32_t deg, float64_t* weights)
02028 {
02029     float64_t result=0.0;
02030 
02031     //SG_PRINT("pos:%i length:%i deg:%i seq:0x%0llx...\n", pos, length, deg, seq);
02032 
02033     for (int32_t i=pos; i<pos+deg && i<length; i++)
02034     {
02035         //SG_PRINT("loop %d\n", i);
02036         Trie* tree = &TreeMem[trees[i]];
02037 
02038         for (int32_t d=0; d<deg-i+pos; d++)
02039         {
02040             //SG_PRINT("loop degree %d shit: %d\n", d, (2*(deg-1-d-i+pos)));
02041             ASSERT(d-1<degree);
02042             int32_t sym = (int32_t) (seq >> (2*(deg-1-d-i+pos)) & 3);
02043 
02044             float64_t w=1.0;
02045             if (!weights_in_tree)
02046                 w=weights[d];
02047 
02048             ASSERT(tree->children[sym] != NO_CHILD);
02049             tree=&TreeMem[tree->children[sym]];
02050             result+=w*tree->weight;
02051         }
02052     }
02053     //SG_PRINT("cum: %f\n", result);
02054     return result;
02055 }
02056 
02057     template <class Trie>
02058 void CTrie<Trie>::fill_backtracking_table(
02059     int32_t pos, CDynamicArray<ConsensusEntry>* prev,
02060     CDynamicArray<ConsensusEntry>* cur, bool cumulative, float64_t* weights)
02061 {
02062     ASSERT(pos>=0 && pos<length);
02063     ASSERT(!use_compact_terminal_nodes);
02064 
02065     Trie* t = &TreeMem[trees[pos]];
02066 
02067     fill_backtracking_table_recursion(t, 0, (uint64_t) 0, 0.0, cur, weights);
02068 
02069 
02070     if (cumulative)
02071     {
02072         int32_t num_cur=cur->get_num_elements();
02073         for (int32_t i=0; i<num_cur; i++)
02074         {
02075             ConsensusEntry entry=cur->get_element(i);
02076             entry.score+=get_cumulative_score(pos+1, entry.string, degree-1, weights);
02077             cur->set_element(entry,i);
02078             //SG_PRINT("cum: str:0%0llx sc:%f bt:%d\n",entry.string,entry.score,entry.bt);
02079         }
02080     }
02081 
02082     //if previous tree exists find maximum scoring path
02083     //for each element in cur and update bt table
02084     if (prev)
02085     {
02086         int32_t num_cur=cur->get_num_elements();
02087         int32_t num_prev=prev->get_num_elements();
02088 
02089         for (int32_t i=0; i<num_cur; i++)
02090         {
02091             //uint64_t str_cur_old= cur->get_element(i).string;
02092             uint64_t str_cur= cur->get_element(i).string >> 2;
02093             //SG_PRINT("...cur:0x%0llx cur_noprfx:0x%0llx...\n", str_cur_old, str_cur);
02094 
02095             int32_t bt=-1;
02096             float64_t max_score=0.0;
02097 
02098             for (int32_t j=0; j<num_prev; j++)
02099             {
02100                 //uint64_t str_prev_old= prev->get_element(j).string;
02101                 uint64_t mask=
02102                     ((((uint64_t)0)-1) ^ (((uint64_t) 3) << (2*(degree-1))));
02103                 uint64_t str_prev=  mask & prev->get_element(j).string;
02104                 //SG_PRINT("...prev:0x%0llx prev_nosfx:0x%0llx mask:%0llx...\n", str_prev_old, str_prev,mask);
02105 
02106                 if (str_cur == str_prev)
02107                 {
02108                     float64_t sc=prev->get_element(j).score+cur->get_element(i).score;
02109                     if (bt==-1 || sc>max_score)
02110                     {
02111                         bt=j;
02112                         max_score=sc;
02113 
02114                         //SG_PRINT("new max[%i,%i] = %f\n", j,i, max_score);
02115                     }
02116                 }
02117             }
02118 
02119             ASSERT(bt!=-1);
02120             ConsensusEntry entry;
02121             entry.bt=bt;
02122             entry.score=max_score;
02123             entry.string=cur->get_element(i).string;
02124             cur->set_element(entry, i);
02125             //SG_PRINT("entry[%d]: str:0%0llx sc:%f bt:%d\n",i, entry.string,entry.score,entry.bt);
02126         }
02127     }
02128 }
02129 #endif // _TRIE_H___

SHOGUN Machine Learning Toolbox - Documentation