Trie.cpp

Go to the documentation of this file.
00001 #include "lib/common.h"
00002 #include "lib/io.h"
00003 #include "lib/Trie.h"
00004 #include "lib/Mathematics.h"
00005 
00006 template <>
00007 void CTrie<POIMTrie>::POIMs_extract_W_helper(
00008     const int32_t nodeIdx, const int32_t depth, const int32_t offset,
00009     const int32_t y0, float64_t* const* const W, const int32_t K )
00010 {
00011     ASSERT(nodeIdx!=NO_CHILD);
00012     ASSERT(depth<K);
00013     float64_t* const W_kiy = & W[ depth ][ offset + y0 ];
00014     POIMTrie* const node = &TreeMem[ nodeIdx ];
00015     int32_t sym;
00016 
00017     if( depth < degree-1 )
00018     {
00019         const int32_t offset1 = offset * NUM_SYMS;
00020         for( sym = 0; sym < NUM_SYMS; ++sym )
00021         {
00022             ASSERT(W_kiy[sym]==0);
00023             const int32_t childIdx = node->children[ sym ];
00024             if( childIdx != NO_CHILD )
00025             {
00026                 W_kiy[ sym ] = TreeMem[ childIdx ].weight;
00027 
00028                 if (depth < K-1)
00029                 {
00030                     const int32_t y1 = ( y0 + sym ) * NUM_SYMS;
00031                     POIMs_extract_W_helper( childIdx, depth+1, offset1, y1, W, K );
00032                 }
00033             }
00034         }
00035     }
00036     else
00037     {
00038         ASSERT(depth==degree-1);
00039         for( sym = 0; sym < NUM_SYMS; ++sym )
00040         {
00041             ASSERT(W_kiy[sym]==0);
00042             W_kiy[ sym ] = node->child_weights[ sym ];
00043         }
00044     }
00045 }
00046 
00047 template <>
00048 void CTrie<POIMTrie>::POIMs_extract_W(
00049     float64_t* const* const W, const int32_t K)
00050 {
00051   ASSERT(degree>=1);
00052   ASSERT(K>=1);
00053   const int32_t N = length;
00054   int32_t i;
00055   for( i = 0; i < N; ++i ) {
00056     //printf( "W_helper( %d )\n", i );
00057     POIMs_extract_W_helper( trees[i], 0, i*NUM_SYMS, 0*NUM_SYMS, W, K );
00058   }
00059 }
00060 
00061 template <>
00062 void CTrie<POIMTrie>::POIMs_calc_SLR_helper1(
00063     const float64_t* const distrib, const int32_t i, const int32_t nodeIdx,
00064     int32_t left_tries_idx[4], const int32_t depth, int32_t const lastSym,
00065     float64_t* S, float64_t* L, float64_t* R )
00066 {
00067   ASSERT(depth==degree-1);
00068   ASSERT(nodeIdx!=NO_CHILD);
00069 
00070   const float64_t* const distribLeft  = & distrib[ (i-1)     * NUM_SYMS ];
00071   POIMTrie* const node = &TreeMem[ nodeIdx ];
00072   int32_t symRight;
00073   int32_t symLeft;
00074 
00075   // --- init
00076   node->S = 0;
00077   node->L = 0;
00078   node->R = 0;
00079 
00080   if (i+depth<length)
00081   {
00082       const float64_t* const distribRight = & distrib[ (i+depth) * NUM_SYMS ];
00083 
00084       // --- go thru direct children
00085       for( symRight = 0; symRight < NUM_SYMS; ++symRight ) {
00086           const float64_t w1 = node->child_weights[ symRight ];
00087           const float64_t pRight = distribRight[ symRight ];
00088           const float64_t incr1 = pRight * w1;
00089           node->S += incr1;
00090           node->R += incr1;
00091       }
00092   }
00093 
00094   // --- collect precalced values from left neighbor tree
00095   for( symLeft = 0; symLeft < NUM_SYMS; ++symLeft )
00096   {
00097       if (left_tries_idx[symLeft] != NO_CHILD)
00098       {
00099           POIMTrie* nodeLeft = &TreeMem[left_tries_idx[symLeft]];
00100 
00101           ASSERT(nodeLeft);
00102           const float64_t w2 = nodeLeft->child_weights[ lastSym ];
00103           const float64_t pLeft = distribLeft[ symLeft ];
00104           const float64_t incr2 = pLeft * w2;
00105           node->S += incr2;
00106           node->L += incr2;
00107       }
00108   }
00109 
00110   // --- add w and return results
00111   const float64_t w0 = node->weight;
00112   node->S += w0;
00113   node->L += w0;
00114   node->R += w0;
00115   *S = node->S;
00116   *L = node->L;
00117   *R = node->R;
00118 }
00119 
00120 
00121 template <>
00122 void CTrie<POIMTrie>::POIMs_calc_SLR_helper2(
00123     const float64_t* const distrib, const int32_t i, const int32_t nodeIdx,
00124     int32_t left_tries_idx[4], const int32_t depth, float64_t* S, float64_t* L,
00125     float64_t* R )
00126 {
00127   ASSERT(0<=depth && depth<=degree-2);
00128   ASSERT(nodeIdx!=NO_CHILD);
00129 
00130   const float64_t* const distribLeft  = & distrib[ (i-1)     * NUM_SYMS ];
00131   POIMTrie* const node = &TreeMem[ nodeIdx ];
00132   float64_t dummy;
00133   float64_t auxS;
00134   float64_t auxR;
00135   int32_t symRight;
00136   int32_t symLeft;
00137 
00138   // --- init
00139   node->S = 0;
00140   node->L = 0;
00141   node->R = 0;
00142 
00143   // --- recurse thru direct children
00144   for( symRight = 0; symRight < NUM_SYMS; ++symRight )
00145   {
00146       const int32_t childIdx = node->children[ symRight ];
00147       if( childIdx != NO_CHILD )
00148       {
00149           if( depth < degree-2 ) 
00150           {
00151               int32_t new_left_tries_idx[4];
00152 
00153               for( symLeft = 0; symLeft < NUM_SYMS; ++symLeft )
00154               {
00155                   new_left_tries_idx[symLeft]=NO_CHILD;
00156 
00157                   if (left_tries_idx[symLeft] != NO_CHILD)
00158                   {
00159                       POIMTrie* nodeLeft = &TreeMem[left_tries_idx[symLeft]];
00160                       ASSERT(nodeLeft);
00161                       new_left_tries_idx[ symLeft ]=nodeLeft->children[ symRight ];
00162                   }
00163               }
00164               POIMs_calc_SLR_helper2( distrib, i, childIdx, new_left_tries_idx, depth+1, &auxS, &dummy, &auxR );
00165           }
00166           else 
00167               POIMs_calc_SLR_helper1( distrib, i, childIdx, left_tries_idx, depth+1, symRight, &auxS, &dummy, &auxR );
00168 
00169           if (i+depth<length)
00170           {
00171               const float64_t* const distribRight = & distrib[ (i+depth) * NUM_SYMS ];
00172               const float64_t pRight = distribRight[ symRight ];
00173               node->S += pRight * auxS;
00174               node->R += pRight * auxR;
00175           }
00176       }
00177   }
00178 
00179   // --- collect precalced values from left neighbor tree
00180   for( symLeft = 0; symLeft < NUM_SYMS; ++symLeft )
00181   {
00182       if (left_tries_idx[symLeft] != NO_CHILD)
00183       {
00184           const POIMTrie* nodeLeft = &TreeMem[left_tries_idx[symLeft]];
00185           ASSERT(nodeLeft);
00186           const float64_t pLeft = distribLeft[ symLeft ];
00187 
00188           node->S += pLeft * nodeLeft->S;
00189           node->L += pLeft * nodeLeft->L;
00190 
00191           if (i+depth<length)
00192           {
00193               const float64_t* const distribRight = & distrib[ (i+depth) * NUM_SYMS ];
00194               // - second order correction for S
00195               auxS = 0;
00196               if( depth < degree-2 )
00197               {
00198                   for( symRight = 0; symRight < NUM_SYMS; ++symRight )
00199                   {
00200                       const int32_t childIdxLeft = nodeLeft->children[ symRight ];
00201                       if( childIdxLeft != NO_CHILD )
00202                       {
00203                           const POIMTrie* const childLeft = &TreeMem[ childIdxLeft ];
00204                           auxS += distribRight[symRight] * childLeft->S;
00205                       }
00206                   }
00207               }
00208               else
00209               {
00210                   for( symRight = 0; symRight < NUM_SYMS; ++symRight ) {
00211                       auxS += distribRight[symRight] * nodeLeft->child_weights[ symRight ];
00212                   }
00213               }
00214               node->S -= pLeft* auxS;
00215           }
00216       }
00217   }
00218 
00219   // --- add w and return results
00220   const float64_t w0 = node->weight;
00221   //printf( "  d=%d, node=%d, dS=%.3f, w=%.3f\n", depth, nodeIdx, node->S, w0 );
00222   node->S += w0;
00223   node->L += w0;
00224   node->R += w0;
00225   *S = node->S;
00226   *L = node->L;
00227   *R = node->R;
00228 }
00229 
00230 
00231 
00232 template <>
00233 void CTrie<POIMTrie>::POIMs_precalc_SLR( const float64_t* const distrib )
00234 {
00235     if( degree == 1 ) {
00236         return;
00237     }
00238 
00239     ASSERT(degree>=2);
00240     const int32_t N = length;
00241     float64_t dummy;
00242     int32_t symLeft;
00243     int32_t leftSubtrees[4];
00244     for( symLeft = 0; symLeft < NUM_SYMS; ++symLeft )
00245         leftSubtrees[ symLeft ] = NO_CHILD;
00246 
00247     for(int32_t i = 0; i < N; ++i )
00248     {
00249         POIMs_calc_SLR_helper2( distrib, i, trees[i], leftSubtrees, 0, &dummy, &dummy, &dummy );
00250 
00251         const POIMTrie* const node = &TreeMem[ trees[i] ];
00252         ASSERT(trees[i]!=NO_CHILD);
00253 
00254         for(symLeft = 0; symLeft < NUM_SYMS; ++symLeft )
00255             leftSubtrees[ symLeft ] = node->children[ symLeft ];
00256     }
00257 }
00258 
00259 template <>
00260 void CTrie<POIMTrie>::POIMs_get_SLR(
00261     const int32_t parentIdx, const int32_t sym, const int32_t depth,
00262     float64_t* S, float64_t* L, float64_t* R )
00263 {
00264   ASSERT(parentIdx!=NO_CHILD);
00265   const POIMTrie* const parent = &TreeMem[ parentIdx ];
00266   if( depth < degree ) {
00267     const int32_t nodeIdx = parent->children[ sym ];
00268     const POIMTrie* const node = &TreeMem[ nodeIdx ];
00269     *S = node->S;
00270     *L = node->L;
00271     *R = node->R;
00272   } else {
00273     ASSERT(depth==degree);
00274     const float64_t w = parent->child_weights[ sym ];
00275     *S = w;
00276     *L = w;
00277     *R = w;
00278   }
00279 }
00280 
00281 template <>
00282 void CTrie<POIMTrie>::POIMs_add_SLR_helper2(
00283     float64_t* const* const poims, const int32_t K, const int32_t k,
00284     const int32_t i, const int32_t y, const float64_t valW,
00285     const float64_t valS, const float64_t valL, const float64_t valR,
00286     const int32_t debug)
00287 {
00288     //printf( "i=%d, d=%d, y=%d:  w=%.3f \n", i, k, y, valW );
00289     const int32_t nk = nofsKmers[ k ];
00290     ASSERT(1<=k && k<=K);
00291     ASSERT(0<=y && y<nk);
00292     int32_t z;
00293     int32_t j;
00294 
00295     // --- add superstring score; subtract "w", as it was counted twice
00296     if( debug==0 || debug==2 )
00297     {
00298         poims[ k-1 ][ i*nk + y ] += valS - valW;
00299     }
00300 
00301     // --- left partial overlaps
00302     if( debug==0 || debug==3 )
00303     {
00304         int32_t r;
00305         for( r = 1; k+r <= K; ++r )
00306         {
00307             const int32_t nr = nofsKmers[ r ];
00308             const int32_t nz = nofsKmers[ k+r ];
00309             float64_t* const poim = & poims[ k+r-1 ][ i*nz ];
00310             z = y * nr;
00311             for( j = 0; j < nr; ++j )
00312             {
00313                 if( !( 0 <= z && z < nz ) ) {
00314                     printf( "k=%d, nk=%d,  r=%d, nr=%d,  nz=%d \n", k, nk, r, nr, nz );
00315                     printf( "  j=%d, y=%d, z=%d \n", j, y, z );
00316                 }
00317                 ASSERT(0<=z && z<nz);
00318                 poim[ z ] += valL - valW;
00319                 ++z;
00320             }
00321         }
00322     }
00323     // --- right partial overlaps
00324     if( debug==0 || debug==4 )
00325     {
00326         int32_t l;
00327         for( l = 1; k+l <= K && l <= i; ++l )
00328         {
00329             const int32_t nl = nofsKmers[ l ];
00330             const int32_t nz = nofsKmers[ k+l ];
00331             float64_t* const poim = & poims[ k+l-1 ][ (i-l)*nz ];
00332             z = y;
00333             for( j = 0; j < nl; ++j ) {
00334                 ASSERT(0<=z && z<nz);
00335                 poim[ z ] += valR - valW;
00336                 z += nk;
00337             }
00338         }
00339     }
00340 }
00341 
00342 template <>
00343 void CTrie<POIMTrie>::POIMs_add_SLR_helper1(
00344     const int32_t nodeIdx, const int32_t depth, const int32_t i,
00345     const int32_t y0, float64_t* const* const poims, const int32_t K,
00346     const int32_t debug)
00347 {
00348     ASSERT(nodeIdx!=NO_CHILD);
00349     ASSERT(depth<K);
00350     POIMTrie* const node = &TreeMem[ nodeIdx ];
00351     int32_t sym;
00352     if( depth < degree-1 )
00353     {
00354         if( depth < K-1 )
00355         {
00356             for( sym = 0; sym < NUM_SYMS; ++sym )
00357             {
00358                 const int32_t childIdx = node->children[ sym ];
00359                 if( childIdx != NO_CHILD )
00360                 {
00361                     POIMTrie* const child = &TreeMem[ childIdx ];
00362                     const int32_t y = y0 + sym;
00363                     const int32_t y1 = y * NUM_SYMS;
00364                     const float64_t w = child->weight;
00365                     POIMs_add_SLR_helper2( poims, K, depth+1, i, y, w, child->S, child->L, child->R, debug );
00366                     POIMs_add_SLR_helper1( childIdx, depth+1, i, y1, poims, K, debug );
00367                 }
00368             }
00369         }
00370         else
00371         {
00372             ASSERT(depth==K-1);
00373             for( sym = 0; sym < NUM_SYMS; ++sym )
00374             {
00375                 const int32_t childIdx = node->children[ sym ];
00376                 if( childIdx != NO_CHILD ) {
00377                     POIMTrie* const child = &TreeMem[ childIdx ];
00378                     const int32_t y = y0 + sym;
00379                     const float64_t w = child->weight;
00380                     POIMs_add_SLR_helper2( poims, K, depth+1, i, y, w, child->S, child->L, child->R, debug );
00381                 }
00382             }
00383         }
00384     }
00385     else
00386     {
00387         ASSERT(depth==degree-1);
00388         for( sym = 0; sym < NUM_SYMS; ++sym )
00389         {
00390             const float64_t w = node->child_weights[ sym ];
00391             const int32_t y = y0 + sym;
00392             POIMs_add_SLR_helper2( poims, K, depth+1, i, y, w, w, w, w, debug );
00393         }
00394     }
00395 }
00396 
00397 
00398 
00399 template <>
00400 void CTrie<POIMTrie>::POIMs_add_SLR(
00401     float64_t* const* const poims, const int32_t K, const int32_t debug)
00402 {
00403   ASSERT(degree>=1);
00404   ASSERT(K>=1);
00405   {
00406     const int32_t m = ( degree > K ) ? degree : K;
00407     nofsKmers = new int32_t[ m+1 ];
00408     int32_t n;
00409     int32_t k;
00410     n = 1;
00411     for( k = 0; k < m+1; ++k ) {
00412       nofsKmers[ k ] = n;
00413       n *= NUM_SYMS;
00414     }
00415   }
00416   const int32_t N = length;
00417   int32_t i;
00418   for( i = 0; i < N; ++i ) {
00419     POIMs_add_SLR_helper1( trees[i], 0, i, 0*NUM_SYMS, poims, K, debug );
00420   }
00421   delete[] nofsKmers;
00422 }

SHOGUN Machine Learning Toolbox - Documentation