WeightedDegreePositionStringKernel.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2008 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Copyright (C) 1999-2008 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #include "lib/common.h"
00013 #include "lib/io.h"
00014 #include "lib/Signal.h"
00015 #include "lib/Trie.h"
00016 #include "base/Parallel.h"
00017 
00018 #include "kernel/WeightedDegreePositionStringKernel.h"
00019 #include "kernel/SqrtDiagKernelNormalizer.h"
00020 #include "features/Features.h"
00021 #include "features/StringFeatures.h"
00022 
00023 #include "classifier/svm/SVM.h"
00024 
00025 #ifndef WIN32
00026 #include <pthread.h>
00027 #endif
00028 
00029 #define TRIES(X) ((use_poim_tries) ? (poim_tries.X) : (tries.X))
00030 
00031 template <class Trie> struct S_THREAD_PARAM 
00032 {
00033     int32_t* vec;
00034     float64_t* result;
00035     float64_t* weights;
00036     CWeightedDegreePositionStringKernel* kernel;
00037     CTrie<Trie>* tries;
00038     float64_t factor;
00039     int32_t j;
00040     int32_t start;
00041     int32_t end;
00042     int32_t length;
00043     int32_t max_shift;
00044     int32_t* shift;
00045     int32_t* vec_idx;
00046 };
00047 
00048 CWeightedDegreePositionStringKernel::CWeightedDegreePositionStringKernel(
00049     int32_t size, int32_t d, int32_t mm, int32_t mkls)
00050 : CStringKernel<char>(size), weights(NULL), position_weights(NULL),
00051     position_weights_lhs(NULL), position_weights_rhs(NULL),
00052     weights_buffer(NULL), mkl_stepsize(mkls), degree(d), length(0),
00053     max_mismatch(mm), seq_length(0), shift(NULL), shift_len(0),
00054     num_block_weights_external(0), block_weights_external(NULL),
00055     block_weights(NULL), type(E_EXTERNAL), tries(d), poim_tries(d),
00056     tree_initialized(false), use_poim_tries(false), m_poim_distrib(NULL),
00057     m_poim(NULL), m_poim_num_sym(0), m_poim_num_feat(0), m_poim_result_len(0),
00058     alphabet(NULL)
00059 {
00060     properties |= KP_LINADD | KP_KERNCOMBINATION | KP_BATCHEVALUATION;
00061     set_wd_weights();
00062     ASSERT(weights);
00063 
00064     set_normalizer(new CSqrtDiagKernelNormalizer());
00065 }
00066 
00067 CWeightedDegreePositionStringKernel::CWeightedDegreePositionStringKernel(
00068     int32_t size, float64_t* w, int32_t d, int32_t mm, int32_t* s, int32_t sl,
00069     int32_t mkls)
00070 : CStringKernel<char>(size), weights(NULL), position_weights(NULL),
00071     position_weights_lhs(NULL), position_weights_rhs(NULL),
00072     weights_buffer(NULL), mkl_stepsize(mkls), degree(d), length(0),
00073     max_mismatch(mm), seq_length(0), shift(NULL), shift_len(0),
00074     num_block_weights_external(0), block_weights_external(NULL),
00075     block_weights(NULL), type(E_EXTERNAL), tries(d), poim_tries(d),
00076     tree_initialized(false), use_poim_tries(false), m_poim_distrib(NULL),
00077     m_poim(NULL), m_poim_num_sym(0), m_poim_num_feat(0), m_poim_result_len(0),
00078     alphabet(NULL)
00079 {
00080     properties |= KP_LINADD | KP_KERNCOMBINATION | KP_BATCHEVALUATION;
00081 
00082     weights=new float64_t[d*(1+max_mismatch)];
00083     for (int32_t i=0; i<d*(1+max_mismatch); i++)
00084         weights[i]=w[i];
00085 
00086     set_shifts(s, sl);
00087 
00088     set_normalizer(new CSqrtDiagKernelNormalizer());
00089 }
00090 
00091 CWeightedDegreePositionStringKernel::CWeightedDegreePositionStringKernel(
00092     CStringFeatures<char>* l, CStringFeatures<char>* r, int32_t d)
00093 : CStringKernel<char>(10), weights(NULL), position_weights(NULL),
00094     position_weights_lhs(NULL), position_weights_rhs(NULL),
00095     weights_buffer(NULL), mkl_stepsize(1), degree(d), length(0),
00096     max_mismatch(0), seq_length(0), shift(NULL), shift_len(0),
00097     num_block_weights_external(0), block_weights_external(NULL),
00098     block_weights(NULL), type(E_EXTERNAL), tries(d), poim_tries(d),
00099     tree_initialized(false), use_poim_tries(false), m_poim_distrib(NULL),
00100     m_poim(NULL), m_poim_num_sym(0), m_poim_num_feat(0), m_poim_result_len(0),
00101     alphabet(NULL)
00102 {
00103     properties |= KP_LINADD | KP_KERNCOMBINATION | KP_BATCHEVALUATION;
00104     set_wd_weights();
00105     ASSERT(weights);
00106     set_normalizer(new CSqrtDiagKernelNormalizer());
00107 
00108     init(l, r);
00109 }
00110 
00111 
00112 CWeightedDegreePositionStringKernel::~CWeightedDegreePositionStringKernel()
00113 {
00114     cleanup();
00115     cleanup_POIM2();
00116 
00117     delete[] shift;
00118     shift=NULL;
00119 
00120     delete[] weights;
00121     weights=NULL ;
00122 
00123     delete[] block_weights;
00124     block_weights=NULL;
00125 
00126     delete[] position_weights;
00127     position_weights=NULL;
00128 
00129     delete[] position_weights_lhs;
00130     position_weights_lhs=NULL;
00131 
00132     delete[] position_weights_rhs;
00133     position_weights_rhs=NULL;
00134 
00135     delete[] weights_buffer;
00136     weights_buffer=NULL;
00137 }
00138 
00139 void CWeightedDegreePositionStringKernel::remove_lhs()
00140 {
00141     SG_DEBUG( "deleting CWeightedDegreePositionStringKernel optimization\n");
00142     delete_optimization();
00143 
00144     tries.destroy();
00145     poim_tries.destroy();
00146 
00147     CKernel::remove_lhs();
00148 }
00149 
00150 void CWeightedDegreePositionStringKernel::create_empty_tries()
00151 {
00152     ASSERT(lhs);
00153     seq_length = ((CStringFeatures<char>*) lhs)->get_max_vector_length();
00154 
00155     if (opt_type==SLOWBUTMEMEFFICIENT)
00156     {
00157         tries.create(seq_length, true); 
00158         poim_tries.create(seq_length, true); 
00159     }
00160     else if (opt_type==FASTBUTMEMHUNGRY)
00161     {
00162         tries.create(seq_length, false);  // still buggy
00163         poim_tries.create(seq_length, false);  // still buggy
00164     }
00165     else
00166         SG_ERROR( "unknown optimization type\n");
00167 }
00168 
00169 bool CWeightedDegreePositionStringKernel::init(CFeatures* l, CFeatures* r)
00170 {
00171     int32_t lhs_changed = (lhs!=l) ;
00172     int32_t rhs_changed = (rhs!=r) ;
00173 
00174     CStringKernel<char>::init(l,r);
00175 
00176     SG_DEBUG( "lhs_changed: %i\n", lhs_changed) ;
00177     SG_DEBUG( "rhs_changed: %i\n", rhs_changed) ;
00178 
00179     CStringFeatures<char>* sf_l=(CStringFeatures<char>*) l;
00180     CStringFeatures<char>* sf_r=(CStringFeatures<char>*) r;
00181 
00182     /* set shift */
00183     if (shift_len==0) {
00184         shift_len=sf_l->get_vector_length(0);
00185         int32_t *shifts=new int32_t[shift_len];
00186         for (int32_t i=0; i<shift_len; i++) {
00187             shifts[i]=1;
00188         }
00189         set_shifts(shifts, shift_len);
00190         delete[] shifts;
00191     }
00192 
00193 
00194     int32_t len=sf_l->get_max_vector_length();
00195     if (lhs_changed && !sf_l->have_same_length(len))
00196         SG_ERROR("All strings in WD kernel must have same length (lhs wrong)!\n");
00197 
00198     if (rhs_changed && !sf_r->have_same_length(len))
00199         SG_ERROR("All strings in WD kernel must have same length (rhs wrong)!\n");
00200 
00201     delete alphabet;
00202     alphabet= new CAlphabet(sf_l->get_alphabet());
00203     CAlphabet* ralphabet=sf_r->get_alphabet();
00204     if (!((alphabet->get_alphabet()==DNA) || (alphabet->get_alphabet()==RNA)))
00205         properties &= ((uint64_t) (-1)) ^ (KP_LINADD | KP_BATCHEVALUATION);
00206 
00207     ASSERT(ralphabet->get_alphabet()==alphabet->get_alphabet());
00208     SG_UNREF(ralphabet);
00209 
00210     //whenever init is called also init tries and block weights
00211     create_empty_tries();
00212     init_block_weights();
00213 
00214     return init_normalizer();
00215 }
00216 
00217 void CWeightedDegreePositionStringKernel::cleanup()
00218 {
00219     SG_DEBUG( "deleting CWeightedDegreePositionStringKernel optimization\n");
00220     delete_optimization();
00221 
00222     delete[] block_weights;
00223     block_weights=NULL;
00224 
00225     tries.destroy();
00226     poim_tries.destroy();
00227 
00228     seq_length = 0;
00229     tree_initialized = false;
00230 
00231     delete alphabet;
00232     alphabet=NULL;
00233 
00234     CKernel::cleanup();
00235 }
00236 
00237 bool CWeightedDegreePositionStringKernel::load_init(FILE* src)
00238 {
00239     return false;
00240 }
00241 
00242 bool CWeightedDegreePositionStringKernel::save_init(FILE* dest)
00243 {
00244     return false;
00245 }
00246 
00247 bool CWeightedDegreePositionStringKernel::init_optimization(
00248     int32_t p_count, int32_t * IDX, float64_t * alphas, int32_t tree_num,
00249     int32_t upto_tree)
00250 {
00251     ASSERT(position_weights_lhs==NULL);
00252     ASSERT(position_weights_rhs==NULL);
00253 
00254     if (upto_tree<0)
00255         upto_tree=tree_num;
00256 
00257     if (max_mismatch!=0)
00258     {
00259         SG_ERROR( "CWeightedDegreePositionStringKernel optimization not implemented for mismatch!=0\n");
00260         return false ;
00261     }
00262 
00263     if (tree_num<0)
00264         SG_DEBUG( "deleting CWeightedDegreePositionStringKernel optimization\n");
00265 
00266     delete_optimization();
00267 
00268     if (tree_num<0)
00269         SG_DEBUG( "initializing CWeightedDegreePositionStringKernel optimization\n") ;
00270 
00271     for (int32_t i=0; i<p_count; i++)
00272     {
00273         if (tree_num<0)
00274         {
00275             if ( (i % (p_count/10+1)) == 0)
00276                 SG_PROGRESS(i,0,p_count);
00277             add_example_to_tree(IDX[i], alphas[i]);
00278         }
00279         else
00280         {
00281             for (int32_t t=tree_num; t<=upto_tree; t++)
00282                 add_example_to_single_tree(IDX[i], alphas[i], t);
00283         }
00284     }
00285 
00286     if (tree_num<0)
00287         SG_DONE();
00288 
00289     set_is_initialized(true) ;
00290     return true ;
00291 }
00292 
00293 bool CWeightedDegreePositionStringKernel::delete_optimization() 
00294 { 
00295     if ((opt_type==FASTBUTMEMHUNGRY) && (tries.get_use_compact_terminal_nodes())) 
00296     {
00297         tries.set_use_compact_terminal_nodes(false) ;
00298         SG_DEBUG( "disabling compact trie nodes with FASTBUTMEMHUNGRY\n") ;
00299     }
00300 
00301     if (get_is_initialized())
00302     {
00303         if (opt_type==SLOWBUTMEMEFFICIENT)
00304             tries.delete_trees(true); 
00305         else if (opt_type==FASTBUTMEMHUNGRY)
00306             tries.delete_trees(false);  // still buggy
00307         else {
00308             SG_ERROR( "unknown optimization type\n");
00309         }
00310         set_is_initialized(false);
00311 
00312         return true;
00313     }
00314 
00315     return false;
00316 }
00317 
00318 float64_t CWeightedDegreePositionStringKernel::compute_with_mismatch(
00319     char* avec, int32_t alen, char* bvec, int32_t blen)
00320 {
00321     float64_t max_shift_vec[max_shift];
00322     float64_t sum0=0 ;
00323     for (int32_t i=0; i<max_shift; i++)
00324         max_shift_vec[i]=0 ;
00325     
00326     // no shift
00327     for (int32_t i=0; i<alen; i++)
00328     {
00329         if ((position_weights!=NULL) && (position_weights[i]==0.0))
00330             continue ;
00331         
00332         int32_t mismatches=0;
00333         float64_t sumi = 0.0 ;
00334         for (int32_t j=0; (j<degree) && (i+j<alen); j++)
00335         {
00336             if (avec[i+j]!=bvec[i+j])
00337             {
00338                 mismatches++ ;
00339                 if (mismatches>max_mismatch)
00340                     break ;
00341             } ;
00342             sumi += weights[j+degree*mismatches];
00343         }
00344         if (position_weights!=NULL)
00345             sum0 += position_weights[i]*sumi ;
00346         else
00347             sum0 += sumi ;
00348     } ;
00349     
00350     for (int32_t i=0; i<alen; i++)
00351     {
00352         for (int32_t k=1; (k<=shift[i]) && (i+k<alen); k++)
00353         {
00354             if ((position_weights!=NULL) && (position_weights[i]==0.0) && (position_weights[i+k]==0.0))
00355                 continue ;
00356             
00357             float64_t sumi1 = 0.0 ;
00358             // shift in sequence a
00359             int32_t mismatches=0;
00360             for (int32_t j=0; (j<degree) && (i+j+k<alen); j++)
00361             {
00362                 if (avec[i+j+k]!=bvec[i+j])
00363                 {
00364                     mismatches++ ;
00365                     if (mismatches>max_mismatch)
00366                         break ;
00367                 } ;
00368                 sumi1 += weights[j+degree*mismatches];
00369             }
00370             float64_t sumi2 = 0.0 ;
00371             // shift in sequence b
00372             mismatches=0;
00373             for (int32_t j=0; (j<degree) && (i+j+k<alen); j++)
00374             {
00375                 if (avec[i+j]!=bvec[i+j+k])
00376                 {
00377                     mismatches++ ;
00378                     if (mismatches>max_mismatch)
00379                         break ;
00380                 } ;
00381                 sumi2 += weights[j+degree*mismatches];
00382             }
00383             if (position_weights!=NULL)
00384                 max_shift_vec[k-1] += position_weights[i]*sumi1 + position_weights[i+k]*sumi2 ;
00385             else
00386                 max_shift_vec[k-1] += sumi1 + sumi2 ;
00387         } ;
00388     }
00389     
00390     float64_t result = sum0 ;
00391     for (int32_t i=0; i<max_shift; i++)
00392         result += max_shift_vec[i]/(2*(i+1)) ;
00393     
00394     return result ;
00395 }
00396 
00397 float64_t CWeightedDegreePositionStringKernel::compute_without_mismatch(
00398     char* avec, int32_t alen, char* bvec, int32_t blen) 
00399 {
00400     float64_t max_shift_vec[max_shift];
00401     float64_t sum0=0 ;
00402     for (int32_t i=0; i<max_shift; i++)
00403         max_shift_vec[i]=0 ;
00404 
00405     // no shift
00406     for (int32_t i=0; i<alen; i++)
00407     {
00408         if ((position_weights!=NULL) && (position_weights[i]==0.0))
00409             continue ;
00410 
00411         float64_t sumi = 0.0 ;
00412         for (int32_t j=0; (j<degree) && (i+j<alen); j++)
00413         {
00414             if (avec[i+j]!=bvec[i+j])
00415                 break ;
00416             sumi += weights[j];
00417         }
00418         if (position_weights!=NULL)
00419             sum0 += position_weights[i]*sumi ;
00420         else
00421             sum0 += sumi ;
00422     } ;
00423 
00424     for (int32_t i=0; i<alen; i++)
00425     {
00426         for (int32_t k=1; (k<=shift[i]) && (i+k<alen); k++)
00427         {
00428             if ((position_weights!=NULL) && (position_weights[i]==0.0) && (position_weights[i+k]==0.0))
00429                 continue ;
00430 
00431             float64_t sumi1 = 0.0 ;
00432             // shift in sequence a
00433             for (int32_t j=0; (j<degree) && (i+j+k<alen); j++)
00434             {
00435                 if (avec[i+j+k]!=bvec[i+j])
00436                     break ;
00437                 sumi1 += weights[j];
00438             }
00439             float64_t sumi2 = 0.0 ;
00440             // shift in sequence b
00441             for (int32_t j=0; (j<degree) && (i+j+k<alen); j++)
00442             {
00443                 if (avec[i+j]!=bvec[i+j+k])
00444                     break ;
00445                 sumi2 += weights[j];
00446             }
00447             if (position_weights!=NULL)
00448                 max_shift_vec[k-1] += position_weights[i]*sumi1 + position_weights[i+k]*sumi2 ;
00449             else
00450                 max_shift_vec[k-1] += sumi1 + sumi2 ;
00451         } ;
00452     }
00453 
00454     float64_t result = sum0 ;
00455     for (int32_t i=0; i<max_shift; i++)
00456         result += max_shift_vec[i]/(2*(i+1)) ;
00457 
00458     return result ;
00459 }
00460 
00461 float64_t CWeightedDegreePositionStringKernel::compute_without_mismatch_matrix(
00462     char* avec, int32_t alen, char* bvec, int32_t blen) 
00463 {
00464     float64_t max_shift_vec[max_shift];
00465     float64_t sum0=0 ;
00466     for (int32_t i=0; i<max_shift; i++)
00467         max_shift_vec[i]=0 ;
00468 
00469     // no shift
00470     for (int32_t i=0; i<alen; i++)
00471     {
00472         if ((position_weights!=NULL) && (position_weights[i]==0.0))
00473             continue ;
00474         float64_t sumi = 0.0 ;
00475         for (int32_t j=0; (j<degree) && (i+j<alen); j++)
00476         {
00477             if (avec[i+j]!=bvec[i+j])
00478                 break ;
00479             sumi += weights[i*degree+j];
00480         }
00481         if (position_weights!=NULL)
00482             sum0 += position_weights[i]*sumi ;
00483         else
00484             sum0 += sumi ;
00485     } ;
00486 
00487     for (int32_t i=0; i<alen; i++)
00488     {
00489         for (int32_t k=1; (k<=shift[i]) && (i+k<alen); k++)
00490         {
00491             if ((position_weights!=NULL) && (position_weights[i]==0.0) && (position_weights[i+k]==0.0))
00492                 continue ;
00493 
00494             float64_t sumi1 = 0.0 ;
00495             // shift in sequence a
00496             for (int32_t j=0; (j<degree) && (i+j+k<alen); j++)
00497             {
00498                 if (avec[i+j+k]!=bvec[i+j])
00499                     break ;
00500                 sumi1 += weights[i*degree+j];
00501             }
00502             float64_t sumi2 = 0.0 ;
00503             // shift in sequence b
00504             for (int32_t j=0; (j<degree) && (i+j+k<alen); j++)
00505             {
00506                 if (avec[i+j]!=bvec[i+j+k])
00507                     break ;
00508                 sumi2 += weights[i*degree+j];
00509             }
00510             if (position_weights!=NULL)
00511                 max_shift_vec[k-1] += position_weights[i]*sumi1 + position_weights[i+k]*sumi2 ;
00512             else
00513                 max_shift_vec[k-1] += sumi1 + sumi2 ;
00514         } ;
00515     }
00516 
00517     float64_t result = sum0 ;
00518     for (int32_t i=0; i<max_shift; i++)
00519         result += max_shift_vec[i]/(2*(i+1)) ;
00520 
00521     return result ;
00522 }
00523 
00524 float64_t CWeightedDegreePositionStringKernel::compute_without_mismatch_position_weights(
00525     char* avec, float64_t* pos_weights_lhs, int32_t alen, char* bvec,
00526     float64_t* pos_weights_rhs, int32_t blen)
00527 {
00528     float64_t max_shift_vec[max_shift];
00529     float64_t sum0=0 ;
00530     for (int32_t i=0; i<max_shift; i++)
00531         max_shift_vec[i]=0 ;
00532 
00533     // no shift
00534     for (int32_t i=0; i<alen; i++)
00535     {
00536         if ((position_weights!=NULL) && (position_weights[i]==0.0))
00537             continue ;
00538 
00539         float64_t sumi = 0.0 ;
00540         float64_t posweight_lhs = 0.0 ;
00541         float64_t posweight_rhs = 0.0 ;
00542         for (int32_t j=0; (j<degree) && (i+j<alen); j++)
00543         {
00544             posweight_lhs += pos_weights_lhs[i+j] ;
00545             posweight_rhs += pos_weights_rhs[i+j] ;
00546             
00547             if (avec[i+j]!=bvec[i+j])
00548                 break ;
00549             sumi += weights[j]*(posweight_lhs/(j+1))*(posweight_rhs/(j+1)) ;
00550         }
00551         if (position_weights!=NULL)
00552             sum0 += position_weights[i]*sumi ;
00553         else
00554             sum0 += sumi ;
00555     } ;
00556 
00557     for (int32_t i=0; i<alen; i++)
00558     {
00559         for (int32_t k=1; (k<=shift[i]) && (i+k<alen); k++)
00560         {
00561             if ((position_weights!=NULL) && (position_weights[i]==0.0) && (position_weights[i+k]==0.0))
00562                 continue ;
00563 
00564             // shift in sequence a  
00565             float64_t sumi1 = 0.0 ;
00566             float64_t posweight_lhs = 0.0 ;
00567             float64_t posweight_rhs = 0.0 ;
00568             for (int32_t j=0; (j<degree) && (i+j+k<alen); j++)
00569             {
00570                 posweight_lhs += pos_weights_lhs[i+j+k] ;
00571                 posweight_rhs += pos_weights_rhs[i+j] ;
00572                 if (avec[i+j+k]!=bvec[i+j])
00573                     break ;
00574                 sumi1 += weights[j]*(posweight_lhs/(j+1))*(posweight_rhs/(j+1)) ;
00575             }
00576             // shift in sequence b
00577             float64_t sumi2 = 0.0 ;
00578             posweight_lhs = 0.0 ;
00579             posweight_rhs = 0.0 ;
00580             for (int32_t j=0; (j<degree) && (i+j+k<alen); j++)
00581             {
00582                 posweight_lhs += pos_weights_lhs[i+j] ;
00583                 posweight_rhs += pos_weights_rhs[i+j+k] ;
00584                 if (avec[i+j]!=bvec[i+j+k])
00585                     break ;
00586                 sumi2 += weights[j]*(posweight_lhs/(j+1))*(posweight_rhs/(j+1)) ;
00587             }
00588             if (position_weights!=NULL)
00589                 max_shift_vec[k-1] += position_weights[i]*sumi1 + position_weights[i+k]*sumi2 ;
00590             else
00591                 max_shift_vec[k-1] += sumi1 + sumi2 ;
00592         } ;
00593     }
00594 
00595     float64_t result = sum0 ;
00596     for (int32_t i=0; i<max_shift; i++)
00597         result += max_shift_vec[i]/(2*(i+1)) ;
00598 
00599     return result ;
00600 }
00601 
00602 
00603 float64_t CWeightedDegreePositionStringKernel::compute(
00604     int32_t idx_a, int32_t idx_b)
00605 {
00606     int32_t alen, blen;
00607 
00608     char* avec=((CStringFeatures<char>*) lhs)->get_feature_vector(idx_a, alen);
00609     char* bvec=((CStringFeatures<char>*) rhs)->get_feature_vector(idx_b, blen);
00610     // can only deal with strings of same length
00611     ASSERT(alen==blen);
00612     ASSERT(shift_len==alen);
00613 
00614     float64_t result = 0 ;
00615     if (position_weights_lhs!=NULL || position_weights_rhs!=NULL)
00616     {
00617         ASSERT(max_mismatch==0);
00618         result = compute_without_mismatch_position_weights(avec, &position_weights_lhs[idx_a*alen], alen, bvec, &position_weights_rhs[idx_b*blen], blen) ;
00619     }
00620     else if (max_mismatch > 0)
00621         result = compute_with_mismatch(avec, alen, bvec, blen) ;
00622     else if (length==0)
00623         result = compute_without_mismatch(avec, alen, bvec, blen) ;
00624     else
00625         result = compute_without_mismatch_matrix(avec, alen, bvec, blen) ;
00626 
00627     return result ;
00628 }
00629 
00630 
00631 void CWeightedDegreePositionStringKernel::add_example_to_tree(
00632     int32_t idx, float64_t alpha)
00633 {
00634     ASSERT(position_weights_lhs==NULL);
00635     ASSERT(position_weights_rhs==NULL);
00636     ASSERT(alphabet);
00637     ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
00638 
00639     int32_t len=0;
00640     char* char_vec=((CStringFeatures<char>*) lhs)->get_feature_vector(idx, len);
00641     ASSERT(max_mismatch==0);
00642     int32_t *vec = new int32_t[len] ;
00643 
00644     for (int32_t i=0; i<len; i++)
00645         vec[i]=alphabet->remap_to_bin(char_vec[i]);
00646 
00647     if (opt_type==FASTBUTMEMHUNGRY)
00648     {
00649         //TRIES(set_use_compact_terminal_nodes(false)) ;
00650         ASSERT(!TRIES(get_use_compact_terminal_nodes()));
00651     }
00652 
00653     for (int32_t i=0; i<len; i++)
00654     {
00655         int32_t max_s=-1;
00656 
00657         if (opt_type==SLOWBUTMEMEFFICIENT)
00658             max_s=0;
00659         else if (opt_type==FASTBUTMEMHUNGRY)
00660             max_s=shift[i];
00661         else {
00662             SG_ERROR( "unknown optimization type\n");
00663         }
00664 
00665         for (int32_t s=max_s; s>=0; s--)
00666         {
00667             float64_t alpha_pw = normalizer->normalize_lhs((s==0) ? (alpha) : (alpha/(2.0*s)), idx);
00668             TRIES(add_to_trie(i, s, vec, alpha_pw, weights, (length!=0))) ;
00669             //fprintf(stderr, "tree=%i, s=%i, example=%i, alpha_pw=%1.2f\n", i, s, idx, alpha_pw) ;
00670 
00671             if ((s==0) || (i+s>=len))
00672                 continue;
00673 
00674             TRIES(add_to_trie(i+s, -s, vec, alpha_pw, weights, (length!=0))) ;
00675             //fprintf(stderr, "tree=%i, s=%i, example=%i, alpha_pw=%1.2f\n", i+s, -s, idx, alpha_pw) ;
00676         }
00677     }
00678 
00679     delete[] vec ;
00680     tree_initialized=true ;
00681 }
00682 
00683 void CWeightedDegreePositionStringKernel::add_example_to_single_tree(
00684     int32_t idx, float64_t alpha, int32_t tree_num) 
00685 {
00686     ASSERT(position_weights_lhs==NULL);
00687     ASSERT(position_weights_rhs==NULL);
00688     ASSERT(alphabet);
00689     ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
00690 
00691     int32_t len=0;
00692     char* char_vec=((CStringFeatures<char>*) lhs)->get_feature_vector(idx, len);
00693     ASSERT(max_mismatch==0);
00694     int32_t *vec=new int32_t[len];
00695     int32_t max_s=-1;
00696 
00697     if (opt_type==SLOWBUTMEMEFFICIENT)
00698         max_s=0;
00699     else if (opt_type==FASTBUTMEMHUNGRY)
00700     {
00701         ASSERT(!tries.get_use_compact_terminal_nodes());
00702         max_s=shift[tree_num];
00703     }
00704     else {
00705         SG_ERROR( "unknown optimization type\n");
00706     }
00707     for (int32_t i=CMath::max(0,tree_num-max_shift); i<CMath::min(len,tree_num+degree+max_shift); i++)
00708         vec[i]=alphabet->remap_to_bin(char_vec[i]);
00709 
00710     for (int32_t s=max_s; s>=0; s--)
00711     {
00712         float64_t alpha_pw = normalizer->normalize_lhs((s==0) ? (alpha) : (alpha/(2.0*s)), idx);
00713         tries.add_to_trie(tree_num, s, vec, alpha_pw, weights, (length!=0)) ;
00714         //fprintf(stderr, "tree=%i, s=%i, example=%i, alpha_pw=%1.2f\n", tree_num, s, idx, alpha_pw) ;
00715     } 
00716 
00717     if (opt_type==FASTBUTMEMHUNGRY)
00718     {
00719         for (int32_t i=CMath::max(0,tree_num-max_shift); i<CMath::min(len,tree_num+max_shift+1); i++)
00720         {
00721             int32_t s=tree_num-i;
00722             if ((i+s<len) && (s>=1) && (s<=shift[i]))
00723             {
00724                 float64_t alpha_pw = normalizer->normalize_lhs((s==0) ? (alpha) : (alpha/(2.0*s)), idx);
00725                 tries.add_to_trie(tree_num, -s, vec, alpha_pw, weights, (length!=0)) ; 
00726                 //fprintf(stderr, "tree=%i, s=%i, example=%i, alpha_pw=%1.2f\n", tree_num, -s, idx, alpha_pw) ;
00727             }
00728         }
00729     }
00730     delete[] vec ;
00731     tree_initialized=true ;
00732 }
00733 
00734 float64_t CWeightedDegreePositionStringKernel::compute_by_tree(int32_t idx)
00735 {
00736     ASSERT(position_weights_lhs==NULL);
00737     ASSERT(position_weights_rhs==NULL);
00738     ASSERT(alphabet);
00739     ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
00740 
00741     float64_t sum=0;
00742     int32_t len=0;
00743     char* char_vec=((CStringFeatures<char>*) rhs)->get_feature_vector(idx, len);
00744     ASSERT(max_mismatch==0);
00745     int32_t *vec=new int32_t[len];
00746 
00747     for (int32_t i=0; i<len; i++)
00748         vec[i]=alphabet->remap_to_bin(char_vec[i]);
00749 
00750     for (int32_t i=0; i<len; i++)
00751         sum += tries.compute_by_tree_helper(vec, len, i, i, i, weights, (length!=0)) ;
00752 
00753     if (opt_type==SLOWBUTMEMEFFICIENT)
00754     {
00755         for (int32_t i=0; i<len; i++)
00756         {
00757             for (int32_t s=1; (s<=shift[i]) && (i+s<len); s++)
00758             {
00759                 sum+=tries.compute_by_tree_helper(vec, len, i, i+s, i, weights, (length!=0))/(2*s) ;
00760                 sum+=tries.compute_by_tree_helper(vec, len, i+s, i, i+s, weights, (length!=0))/(2*s) ;
00761             }
00762         }
00763     }
00764 
00765     delete[] vec ;
00766 
00767     return normalizer->normalize_rhs(sum, idx);
00768 }
00769 
00770 void CWeightedDegreePositionStringKernel::compute_by_tree(
00771     int32_t idx, float64_t* LevelContrib)
00772 {
00773     ASSERT(position_weights_lhs==NULL);
00774     ASSERT(position_weights_rhs==NULL);
00775     ASSERT(alphabet);
00776     ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
00777 
00778     int32_t len=0;
00779     char* char_vec=((CStringFeatures<char>*) rhs)->get_feature_vector(idx, len);
00780     ASSERT(max_mismatch==0);
00781     int32_t *vec=new int32_t[len];
00782 
00783     for (int32_t i=0; i<len; i++)
00784         vec[i]=alphabet->remap_to_bin(char_vec[i]);
00785 
00786     for (int32_t i=0; i<len; i++)
00787     {
00788         tries.compute_by_tree_helper(vec, len, i, i, i, LevelContrib,
00789                 normalizer->normalize_rhs(1.0, idx), mkl_stepsize, weights,
00790                 (length!=0));
00791     }
00792 
00793     if (opt_type==SLOWBUTMEMEFFICIENT)
00794     {
00795         for (int32_t i=0; i<len; i++)
00796             for (int32_t k=1; (k<=shift[i]) && (i+k<len); k++)
00797             {
00798                 tries.compute_by_tree_helper(vec, len, i, i+k, i, LevelContrib,
00799                         normalizer->normalize_rhs(1.0/(2*k), idx), mkl_stepsize,
00800                         weights, (length!=0)) ;
00801                 tries.compute_by_tree_helper(vec, len, i+k, i, i+k,
00802                         LevelContrib, normalizer->normalize_rhs(1.0/(2*k), idx),
00803                         mkl_stepsize, weights, (length!=0)) ;
00804             }
00805     }
00806 
00807     delete[] vec ;
00808 }
00809 
00810 float64_t* CWeightedDegreePositionStringKernel::compute_abs_weights(
00811     int32_t &len)
00812 {
00813     return tries.compute_abs_weights(len);
00814 }
00815 
00816 bool CWeightedDegreePositionStringKernel::set_shifts(
00817     int32_t* shift_, int32_t shift_len_)
00818 {
00819     delete[] shift;
00820 
00821     shift_len = shift_len_ ;
00822     shift = new int32_t[shift_len] ;
00823 
00824     if (shift)
00825     {
00826         max_shift = 0 ;
00827 
00828         for (int32_t i=0; i<shift_len; i++)
00829         {
00830             shift[i] = shift_[i] ;
00831             if (shift[i]>max_shift)
00832                 max_shift = shift[i] ;
00833         }
00834 
00835         ASSERT(max_shift>=0 && max_shift<=shift_len);
00836     }
00837     
00838     return false;
00839 }
00840 
00841 bool CWeightedDegreePositionStringKernel::set_wd_weights()
00842 {
00843     ASSERT(degree>0);
00844 
00845     delete[] weights;
00846     weights=new float64_t[degree];
00847     if (weights)
00848     {
00849         int32_t i;
00850         float64_t sum=0;
00851         for (i=0; i<degree; i++)
00852         {
00853             weights[i]=degree-i;
00854             sum+=weights[i];
00855         }
00856         for (i=0; i<degree; i++)
00857             weights[i]/=sum;
00858 
00859         for (i=0; i<degree; i++)
00860         {
00861             for (int32_t j=1; j<=max_mismatch; j++)
00862             {
00863                 if (j<i+1)
00864                 {
00865                     int32_t nk=CMath::nchoosek(i+1, j);
00866                     weights[i+j*degree]=weights[i]/(nk*pow(3,j));
00867                 }
00868                 else
00869                     weights[i+j*degree]= 0;
00870             }
00871         }
00872 
00873         return true;
00874     }
00875     else
00876         return false;
00877 }
00878 
00879 bool CWeightedDegreePositionStringKernel::set_weights(
00880     float64_t* ws, int32_t d, int32_t len)
00881 {
00882     SG_DEBUG("degree = %i  d=%i\n", degree, d);
00883     degree=d;
00884     length=len;
00885 
00886     if (len <= 0)
00887         len=1;
00888 
00889     delete[] weights;
00890     weights=new float64_t[d*len];
00891 
00892     if (weights)
00893     {
00894         for (int32_t i=0; i<degree*len; i++)
00895             weights[i]=ws[i];
00896         return true;
00897     }
00898     else
00899         return false;
00900 }
00901 
00902 bool CWeightedDegreePositionStringKernel::set_position_weights(
00903     float64_t* pws, int32_t len)
00904 {
00905     //fprintf(stderr, "len=%i\n", len);
00906 
00907     if (len==0)
00908     {
00909         delete[] position_weights;
00910         position_weights = NULL;
00911         tries.set_position_weights(position_weights);
00912         return true;
00913     }
00914     if (seq_length==0)
00915         seq_length=len;
00916 
00917     if (seq_length!=len)
00918     {
00919         SG_ERROR("seq_length = %i, position_weights_length=%i\n", seq_length, len);
00920         return false;
00921     }
00922     delete[] position_weights;
00923     position_weights=new float64_t[len];
00924     tries.set_position_weights(position_weights);
00925 
00926     if (position_weights)
00927     {
00928         for (int32_t i=0; i<len; i++)
00929             position_weights[i]=pws[i];
00930         return true;
00931     }
00932     else
00933         return false;
00934 }
00935 
00936 bool CWeightedDegreePositionStringKernel::set_position_weights_lhs(float64_t* pws, int32_t len, int32_t num)
00937 {
00938     //fprintf(stderr, "lhs %i %i %i\n", len, num, seq_length);
00939 
00940     if (position_weights_rhs==position_weights_lhs)
00941         position_weights_rhs=NULL;
00942     else
00943         delete_position_weights_rhs();
00944 
00945     if (len==0)
00946     {
00947         return delete_position_weights_lhs();
00948     }
00949     
00950     if (seq_length!=len)
00951     {
00952         SG_ERROR("seq_length = %i, position_weights_length=%i\n", seq_length, len);
00953         return false;
00954     }
00955     if (!lhs)
00956     {
00957         SG_ERROR("lhs=NULL\n");
00958         return false ;
00959     }
00960     if (lhs->get_num_vectors()!=num)
00961     {
00962         SG_ERROR("lhs->get_num_vectors()=%i, num=%i\n", lhs->get_num_vectors(), num);
00963         return false;
00964     }
00965     
00966     delete[] position_weights_lhs;
00967     position_weights_lhs=new float64_t[len*num];
00968     if (position_weights_lhs)
00969     {
00970         for (int32_t i=0; i<len*num; i++)
00971             position_weights_lhs[i]=pws[i];
00972         return true;
00973     }
00974     else
00975         return false;
00976 }
00977 
00978 bool CWeightedDegreePositionStringKernel::set_position_weights_rhs(
00979     float64_t* pws, int32_t len, int32_t num)
00980 {
00981     //fprintf(stderr, "rhs %i %i %i\n", len, num, seq_length);
00982     if (len==0)
00983     {
00984         if (position_weights_rhs==position_weights_lhs)
00985         {
00986             position_weights_rhs=NULL;
00987             return true;
00988         }
00989         return delete_position_weights_rhs();
00990     }
00991 
00992     if (seq_length!=len)
00993     {
00994         SG_ERROR("seq_length = %i, position_weights_length=%i\n", seq_length, len);
00995         return false;
00996     }
00997     if (!rhs)
00998     {
00999         if (!lhs)
01000         {
01001             SG_ERROR("rhs==0 and lhs=NULL\n");
01002             return false;
01003         }
01004         if (lhs->get_num_vectors()!=num)
01005         {
01006             SG_ERROR("lhs->get_num_vectors()=%i, num=%i\n", lhs->get_num_vectors(), num);
01007             return false;
01008         }
01009     } else
01010         if (rhs->get_num_vectors()!=num)
01011         {
01012             SG_ERROR("rhs->get_num_vectors()=%i, num=%i\n", rhs->get_num_vectors(), num);
01013             return false;
01014         }
01015 
01016     delete[] position_weights_rhs;
01017     position_weights_rhs=new float64_t[len*num];
01018     if (position_weights_rhs)
01019     {
01020         for (int32_t i=0; i<len*num; i++)
01021             position_weights_rhs[i]=pws[i];
01022         return true;
01023     }
01024     else
01025         return false;
01026 }
01027 
01028 bool CWeightedDegreePositionStringKernel::init_block_weights_from_wd()
01029 {
01030     delete[] block_weights;
01031     block_weights=new float64_t[CMath::max(seq_length,degree)];
01032 
01033     if (block_weights)
01034     {
01035         int32_t k;
01036         float64_t d=degree; // use float to evade rounding errors below
01037 
01038         for (k=0; k<degree; k++)
01039             block_weights[k]=
01040                 (-pow(k, 3)+(3*d-3)*pow(k, 2)+(9*d-2)*k+6*d)/(3*d*(d+1));
01041         for (k=degree; k<seq_length; k++)
01042             block_weights[k]=(-d+3*k+4)/3;
01043     }
01044 
01045     return (block_weights!=NULL);
01046 }
01047 
01048 bool CWeightedDegreePositionStringKernel::init_block_weights_from_wd_external()
01049 {
01050     ASSERT(weights);
01051     delete[] block_weights;
01052     block_weights=new float64_t[CMath::max(seq_length,degree)];
01053 
01054     if (block_weights)
01055     {
01056         int32_t i=0;
01057         block_weights[0]=weights[0];
01058         for (i=1; i<CMath::max(seq_length,degree); i++)
01059             block_weights[i]=0;
01060 
01061         for (i=1; i<CMath::max(seq_length,degree); i++)
01062         {
01063             block_weights[i]=block_weights[i-1];
01064 
01065             float64_t contrib=0;
01066             for (int32_t j=0; j<CMath::min(degree,i+1); j++)
01067                 contrib+=weights[j];
01068 
01069             block_weights[i]+=contrib;
01070         }
01071     }
01072 
01073     return (block_weights!=NULL);
01074 }
01075 
01076 bool CWeightedDegreePositionStringKernel::init_block_weights_const()
01077 {
01078     delete[] block_weights;
01079     block_weights=new float64_t[seq_length];
01080 
01081     if (block_weights)
01082     {
01083         for (int32_t i=1; i<seq_length+1 ; i++)
01084             block_weights[i-1]=1.0/seq_length;
01085     }
01086 
01087     return (block_weights!=NULL);
01088 }
01089 
01090 bool CWeightedDegreePositionStringKernel::init_block_weights_linear()
01091 {
01092     delete[] block_weights;
01093     block_weights=new float64_t[seq_length];
01094 
01095     if (block_weights)
01096     {
01097         for (int32_t i=1; i<seq_length+1 ; i++)
01098             block_weights[i-1]=degree*i;
01099     }
01100 
01101     return (block_weights!=NULL);
01102 }
01103 
01104 bool CWeightedDegreePositionStringKernel::init_block_weights_sqpoly()
01105 {
01106     delete[] block_weights;
01107     block_weights=new float64_t[seq_length];
01108 
01109     if (block_weights)
01110     {
01111         for (int32_t i=1; i<degree+1 ; i++)
01112             block_weights[i-1]=((float64_t) i)*i;
01113 
01114         for (int32_t i=degree+1; i<seq_length+1 ; i++)
01115             block_weights[i-1]=i;
01116     }
01117 
01118     return (block_weights!=NULL);
01119 }
01120 
01121 bool CWeightedDegreePositionStringKernel::init_block_weights_cubicpoly()
01122 {
01123     delete[] block_weights;
01124     block_weights=new float64_t[seq_length];
01125 
01126     if (block_weights)
01127     {
01128         for (int32_t i=1; i<degree+1 ; i++)
01129             block_weights[i-1]=((float64_t) i)*i*i;
01130 
01131         for (int32_t i=degree+1; i<seq_length+1 ; i++)
01132             block_weights[i-1]=i;
01133     }
01134 
01135     return (block_weights!=NULL);
01136 }
01137 
01138 bool CWeightedDegreePositionStringKernel::init_block_weights_exp()
01139 {
01140     delete[] block_weights;
01141     block_weights=new float64_t[seq_length];
01142 
01143     if (block_weights)
01144     {
01145         for (int32_t i=1; i<degree+1 ; i++)
01146             block_weights[i-1]=exp(((float64_t) i/10.0));
01147 
01148         for (int32_t i=degree+1; i<seq_length+1 ; i++)
01149             block_weights[i-1]=i;
01150     }
01151 
01152     return (block_weights!=NULL);
01153 }
01154 
01155 bool CWeightedDegreePositionStringKernel::init_block_weights_log()
01156 {
01157     delete[] block_weights;
01158     block_weights=new float64_t[seq_length];
01159 
01160     if (block_weights)
01161     {
01162         for (int32_t i=1; i<degree+1 ; i++)
01163             block_weights[i-1]=pow(log(i),2);
01164 
01165         for (int32_t i=degree+1; i<seq_length+1 ; i++)
01166             block_weights[i-1]=i-degree+1+pow(log(degree+1),2);
01167     }
01168 
01169     return (block_weights!=NULL);
01170 }
01171 
01172 bool CWeightedDegreePositionStringKernel::init_block_weights_external()
01173 {
01174     if (block_weights_external && (seq_length == num_block_weights_external) )
01175     {
01176         delete[] block_weights;
01177         block_weights=new float64_t[seq_length];
01178 
01179         if (block_weights)
01180         {
01181             for (int32_t i=0; i<seq_length; i++)
01182                 block_weights[i]=block_weights_external[i];
01183         }
01184     }
01185     else {
01186       SG_ERROR( "sequence longer then weights (seqlen:%d, wlen:%d)\n", seq_length, block_weights_external);
01187    }
01188     return (block_weights!=NULL);
01189 }
01190 
01191 bool CWeightedDegreePositionStringKernel::init_block_weights()
01192 {
01193     switch (type)
01194     {
01195         case E_WD:
01196             return init_block_weights_from_wd();
01197         case E_EXTERNAL:
01198             return init_block_weights_from_wd_external();
01199         case E_BLOCK_CONST:
01200             return init_block_weights_const();
01201         case E_BLOCK_LINEAR:
01202             return init_block_weights_linear();
01203         case E_BLOCK_SQPOLY:
01204             return init_block_weights_sqpoly();
01205         case E_BLOCK_CUBICPOLY:
01206             return init_block_weights_cubicpoly();
01207         case E_BLOCK_EXP:
01208             return init_block_weights_exp();
01209         case E_BLOCK_LOG:
01210             return init_block_weights_log();
01211         case E_BLOCK_EXTERNAL:
01212             return init_block_weights_external();
01213         default:
01214             return false;
01215     };
01216 }
01217 
01218 
01219 
01220 void* CWeightedDegreePositionStringKernel::compute_batch_helper(void* p)
01221 {
01222     S_THREAD_PARAM<DNATrie>* params = (S_THREAD_PARAM<DNATrie>*) p;
01223     int32_t j=params->j;
01224     CWeightedDegreePositionStringKernel* wd=params->kernel;
01225     CTrie<DNATrie>* tries=params->tries;
01226     float64_t* weights=params->weights;
01227     int32_t length=params->length;
01228     int32_t max_shift=params->max_shift;
01229     int32_t* vec=params->vec;
01230     float64_t* result=params->result;
01231     float64_t factor=params->factor;
01232     int32_t* shift=params->shift;
01233     int32_t* vec_idx=params->vec_idx;
01234 
01235     for (int32_t i=params->start; i<params->end; i++)
01236     {
01237         int32_t len=0;
01238         CStringFeatures<char>* rhs_feat=((CStringFeatures<char>*) wd->get_rhs());
01239         CAlphabet* alpha=wd->alphabet;
01240 
01241         char* char_vec=rhs_feat->get_feature_vector(vec_idx[i], len);
01242         for (int32_t k=CMath::max(0,j-max_shift); k<CMath::min(len,j+wd->get_degree()+max_shift); k++)
01243             vec[k]=alpha->remap_to_bin(char_vec[k]);
01244 
01245         SG_UNREF(rhs_feat);
01246 
01247         result[i] += factor*wd->normalizer->normalize_rhs(tries->compute_by_tree_helper(vec, len, j, j, j, weights, (length!=0)), vec_idx[i]);
01248 
01249         if (wd->get_optimization_type()==SLOWBUTMEMEFFICIENT)
01250         {
01251             for (int32_t q=CMath::max(0,j-max_shift); q<CMath::min(len,j+max_shift+1); q++)
01252             {
01253                 int32_t s=j-q ;
01254                 if ((s>=1) && (s<=shift[q]) && (q+s<len))
01255                 {
01256                     result[i] +=
01257                         wd->normalizer->normalize_rhs(tries->compute_by_tree_helper(vec,
01258                                 len, q, q+s, q, weights, (length!=0)),
01259                                 vec_idx[i])/(2.0*s);
01260                 }
01261             }
01262 
01263             for (int32_t s=1; (s<=shift[j]) && (j+s<len); s++)
01264             {
01265                 result[i] +=
01266                     wd->normalizer->normalize_rhs(tries->compute_by_tree_helper(vec,
01267                                 len, j+s, j, j+s, weights, (length!=0)),
01268                                 vec_idx[i])/(2.0*s);
01269             }
01270         }
01271     }
01272 
01273     return NULL;
01274 }
01275 
01276 void CWeightedDegreePositionStringKernel::compute_batch(
01277     int32_t num_vec, int32_t* vec_idx, float64_t* result, int32_t num_suppvec,
01278     int32_t* IDX, float64_t* alphas, float64_t factor)
01279 {
01280     ASSERT(alphabet);
01281     ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
01282     ASSERT(position_weights_lhs==NULL);
01283     ASSERT(position_weights_rhs==NULL);
01284     ASSERT(rhs);
01285     ASSERT(num_vec<=rhs->get_num_vectors());
01286     ASSERT(num_vec>0);
01287     ASSERT(vec_idx);
01288     ASSERT(result);
01289     create_empty_tries();
01290 
01291     int32_t num_feat=((CStringFeatures<char>*) rhs)->get_max_vector_length();
01292     ASSERT(num_feat>0);
01293     int32_t num_threads=parallel.get_num_threads();
01294     ASSERT(num_threads>0);
01295     int32_t* vec=new int32_t[num_threads*num_feat];
01296 
01297     if (num_threads < 2)
01298     {
01299 #ifdef WIN32
01300        for (int32_t j=0; j<num_feat; j++)
01301 #else
01302        CSignal::clear_cancel();
01303        for (int32_t j=0; j<num_feat && !CSignal::cancel_computations(); j++)
01304 #endif
01305             {
01306                 init_optimization(num_suppvec, IDX, alphas, j);
01307                 S_THREAD_PARAM<DNATrie> params;
01308                 params.vec=vec;
01309                 params.result=result;
01310                 params.weights=weights;
01311                 params.kernel=this;
01312                 params.tries=&tries;
01313                 params.factor=factor;
01314                 params.j=j;
01315                 params.start=0;
01316                 params.end=num_vec;
01317                 params.length=length;
01318                 params.max_shift=max_shift;
01319                 params.shift=shift;
01320                 params.vec_idx=vec_idx;
01321                 compute_batch_helper((void*) &params);
01322 
01323                 SG_PROGRESS(j,0,num_feat);
01324             }
01325     }
01326 #ifndef WIN32
01327     else
01328     {
01329 
01330         CSignal::clear_cancel();
01331         for (int32_t j=0; j<num_feat && !CSignal::cancel_computations(); j++)
01332         {
01333             init_optimization(num_suppvec, IDX, alphas, j);
01334             pthread_t threads[num_threads-1];
01335             S_THREAD_PARAM<DNATrie> params[num_threads];
01336             int32_t step= num_vec/num_threads;
01337             int32_t t;
01338 
01339             for (t=0; t<num_threads-1; t++)
01340             {
01341                 params[t].vec=&vec[num_feat*t];
01342                 params[t].result=result;
01343                 params[t].weights=weights;
01344                 params[t].kernel=this;
01345                 params[t].tries=&tries;
01346                 params[t].factor=factor;
01347                 params[t].j=j;
01348                 params[t].start = t*step;
01349                 params[t].end = (t+1)*step;
01350                 params[t].length=length;
01351                 params[t].max_shift=max_shift;
01352                 params[t].shift=shift;
01353                 params[t].vec_idx=vec_idx;
01354                 pthread_create(&threads[t], NULL, CWeightedDegreePositionStringKernel::compute_batch_helper, (void*)&params[t]);
01355             }
01356 
01357             params[t].vec=&vec[num_feat*t];
01358             params[t].result=result;
01359             params[t].weights=weights;
01360             params[t].kernel=this;
01361             params[t].tries=&tries;
01362             params[t].factor=factor;
01363             params[t].j=j;
01364             params[t].start=t*step;
01365             params[t].end=num_vec;
01366             params[t].length=length;
01367             params[t].max_shift=max_shift;
01368             params[t].shift=shift;
01369             params[t].vec_idx=vec_idx;
01370             compute_batch_helper((void*) &params[t]);
01371 
01372             for (t=0; t<num_threads-1; t++)
01373                 pthread_join(threads[t], NULL);
01374             SG_PROGRESS(j,0,num_feat);
01375         }
01376     }
01377 #endif
01378 
01379     delete[] vec;
01380 
01381     //really also free memory as this can be huge on testing especially when
01382     //using the combined kernel
01383     create_empty_tries();
01384 }
01385 
01386 float64_t* CWeightedDegreePositionStringKernel::compute_scoring(
01387     int32_t max_degree, int32_t& num_feat, int32_t& num_sym, float64_t* result,
01388     int32_t num_suppvec, int32_t* IDX, float64_t* alphas)
01389 {
01390     ASSERT(position_weights_lhs==NULL);
01391     ASSERT(position_weights_rhs==NULL);
01392 
01393     num_feat=((CStringFeatures<char>*) rhs)->get_max_vector_length();
01394     ASSERT(num_feat>0);
01395     ASSERT(alphabet);
01396     ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
01397 
01398     num_sym=4; //for now works only w/ DNA
01399 
01400     ASSERT(max_degree>0);
01401 
01402     // === variables
01403     int32_t* nofsKmers=new int32_t[max_degree];
01404     float64_t** C=new float64_t*[max_degree];
01405     float64_t** L=new float64_t*[max_degree];
01406     float64_t** R=new float64_t*[max_degree];
01407 
01408     int32_t i;
01409     int32_t k;
01410 
01411     // --- return table
01412     int32_t bigtabSize=0;
01413     for (k=0; k<max_degree; ++k )
01414     {
01415         nofsKmers[k]=(int32_t) pow(num_sym, k+1);
01416         const int32_t tabSize=nofsKmers[k]*num_feat;
01417         bigtabSize+=tabSize;
01418     }
01419     result=new float64_t[bigtabSize];
01420 
01421     // --- auxilliary tables
01422     int32_t tabOffs=0;
01423     for( k = 0; k < max_degree; ++k )
01424     {
01425         const int32_t tabSize = nofsKmers[k] * num_feat;
01426         C[k] = &result[tabOffs];
01427         L[k] = new float64_t[ tabSize ];
01428         R[k] = new float64_t[ tabSize ];
01429         tabOffs+=tabSize;
01430         for(i = 0; i < tabSize; i++ )
01431         {
01432             C[k][i] = 0.0;
01433             L[k][i] = 0.0;
01434             R[k][i] = 0.0;
01435         }
01436     }
01437 
01438     // --- tree parsing info
01439     float64_t* margFactors=new float64_t[degree];
01440 
01441     int32_t* x = new int32_t[ degree+1 ];
01442     int32_t* substrs = new int32_t[ degree+1 ];
01443     // - fill arrays
01444     margFactors[0] = 1.0;
01445     substrs[0] = 0;
01446     for( k=1; k < degree; ++k ) {
01447         margFactors[k] = 0.25 * margFactors[k-1];
01448         substrs[k] = -1;
01449     }
01450     substrs[degree] = -1;
01451     // - fill struct
01452     struct TreeParseInfo info;
01453     info.num_sym = num_sym;
01454     info.num_feat = num_feat;
01455     info.p = -1;
01456     info.k = -1;
01457     info.nofsKmers = nofsKmers;
01458     info.margFactors = margFactors;
01459     info.x = x;
01460     info.substrs = substrs;
01461     info.y0 = 0;
01462     info.C_k = NULL;
01463     info.L_k = NULL;
01464     info.R_k = NULL;
01465 
01466     // === main loop
01467     i = 0; // total progress
01468     for( k = 0; k < max_degree; ++k )
01469     {
01470         const int32_t nofKmers = nofsKmers[ k ];
01471         info.C_k = C[k];
01472         info.L_k = L[k];
01473         info.R_k = R[k];
01474 
01475         // --- run over all trees
01476         for(int32_t p = 0; p < num_feat; ++p )
01477         {
01478             init_optimization( num_suppvec, IDX, alphas, p );
01479             int32_t tree = p ;
01480             for(int32_t j = 0; j < degree+1; j++ ) {
01481                 x[j] = -1;
01482             }
01483             tries.traverse( tree, p, info, 0, x, k );
01484             SG_PROGRESS(i++,0,num_feat*max_degree);
01485         }
01486 
01487         // --- add partial overlap scores
01488         if( k > 0 ) {
01489             const int32_t j = k - 1;
01490             const int32_t nofJmers = (int32_t) pow( num_sym, j+1 );
01491             for(int32_t p = 0; p < num_feat; ++p ) {
01492                 const int32_t offsetJ = nofJmers * p;
01493                 const int32_t offsetJ1 = nofJmers * (p+1);
01494                 const int32_t offsetK = nofKmers * p;
01495                 int32_t y;
01496                 int32_t sym;
01497                 for( y = 0; y < nofJmers; ++y ) {
01498                     for( sym = 0; sym < num_sym; ++sym ) {
01499                         const int32_t y_sym = num_sym*y + sym;
01500                         const int32_t sym_y = nofJmers*sym + y;
01501                         ASSERT(0<=y_sym && y_sym<nofKmers);
01502                         ASSERT(0<=sym_y && sym_y<nofKmers);
01503                         C[k][ y_sym + offsetK ] += L[j][ y + offsetJ ];
01504                         if( p < num_feat-1 ) {
01505                             C[k][ sym_y + offsetK ] += R[j][ y + offsetJ1 ];
01506                         }
01507                     }
01508                 }
01509             }
01510         }
01511         //   if( k > 1 )
01512         //     j = k-1
01513         //     for all positions p
01514         //       for all j-mers y
01515         //          for n in {A,C,G,T}
01516         //            C_k[ p, [y,n] ] += L_j[ p, y ]
01517         //            C_k[ p, [n,y] ] += R_j[ p+1, y ]
01518         //          end;
01519         //       end;
01520         //     end;
01521         //   end;
01522     }
01523 
01524     // === return a vector
01525     num_feat=1;
01526     num_sym = bigtabSize;
01527     // --- clean up
01528     delete[] nofsKmers;
01529     delete[] margFactors;
01530     delete[] substrs;
01531     delete[] x;
01532     delete[] C;
01533     for( k = 0; k < max_degree; ++k ) {
01534         delete[] L[k];
01535         delete[] R[k];
01536     }
01537     delete[] L;
01538     delete[] R;
01539     return result;
01540 }
01541 
01542 char* CWeightedDegreePositionStringKernel::compute_consensus(
01543     int32_t &num_feat, int32_t num_suppvec, int32_t* IDX, float64_t* alphas)
01544 {
01545     ASSERT(position_weights_lhs==NULL);
01546     ASSERT(position_weights_rhs==NULL);
01547     //only works for order <= 32
01548     ASSERT(degree<=32);
01549     ASSERT(!tries.get_use_compact_terminal_nodes());
01550     num_feat=((CStringFeatures<char>*) rhs)->get_max_vector_length();
01551     ASSERT(num_feat>0);
01552     ASSERT(alphabet);
01553     ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
01554 
01555     //consensus
01556     char* result=new char[num_feat];
01557 
01558     //backtracking and scoring table
01559     int32_t num_tables=CMath::max(1,num_feat-degree+1);
01560     CDynamicArray<ConsensusEntry>** table=new CDynamicArray<ConsensusEntry>*[num_tables];
01561 
01562     for (int32_t i=0; i<num_tables; i++)
01563         table[i]=new CDynamicArray<ConsensusEntry>(num_suppvec/10);
01564 
01565     //compute consensus via dynamic programming
01566     for (int32_t i=0; i<num_tables; i++)
01567     {
01568         bool cumulative=false;
01569 
01570         if (i<num_tables-1)
01571             init_optimization(num_suppvec, IDX, alphas, i);
01572         else
01573         {
01574             init_optimization(num_suppvec, IDX, alphas, i, num_feat-1);
01575             cumulative=true;
01576         }
01577 
01578         if (i==0)
01579             tries.fill_backtracking_table(i, NULL, table[i], cumulative, weights);
01580         else
01581             tries.fill_backtracking_table(i, table[i-1], table[i], cumulative, weights);
01582 
01583         SG_PROGRESS(i,0,num_feat);
01584     }
01585 
01586 
01587     //int32_t n=table[0]->get_num_elements();
01588 
01589     //for (int32_t i=0; i<n; i++)
01590     //{
01591     //  ConsensusEntry e= table[0]->get_element(i);
01592     //  SG_PRint32_t("first: str:0%0llx sc:%f bt:%d\n",e.string,e.score,e.bt);
01593     //}
01594 
01595     //n=table[num_tables-1]->get_num_elements();
01596     //for (int32_t i=0; i<n; i++)
01597     //{
01598     //  ConsensusEntry e= table[num_tables-1]->get_element(i);
01599     //  SG_PRint32_t("last: str:0%0llx sc:%f bt:%d\n",e.string,e.score,e.bt);
01600     //}
01601     //n=table[num_tables-2]->get_num_elements();
01602     //for (int32_t i=0; i<n; i++)
01603     //{
01604     //  ConsensusEntry e= table[num_tables-2]->get_element(i);
01605     //  SG_PRINT("second last: str:0%0llx sc:%f bt:%d\n",e.string,e.score,e.bt);
01606     //}
01607 
01608     const char* acgt="ACGT";
01609 
01610     //backtracking start
01611     int32_t max_idx=-1;
01612     float32_t max_score=0;
01613     int32_t num_elements=table[num_tables-1]->get_num_elements();
01614 
01615     for (int32_t i=0; i<num_elements; i++)
01616     {
01617         float64_t sc=table[num_tables-1]->get_element(i).score;
01618         if (sc>max_score || max_idx==-1)
01619         {
01620             max_idx=i;
01621             max_score=sc;
01622         }
01623     }
01624     uint64_t endstr=table[num_tables-1]->get_element(max_idx).string;
01625 
01626     SG_INFO("max_idx:%d num_el:%d num_feat:%d num_tables:%d max_score:%f\n", max_idx, num_elements, num_feat, num_tables, max_score);
01627 
01628     for (int32_t i=0; i<degree; i++)
01629         result[num_feat-1-i]=acgt[(endstr >> (2*i)) & 3];
01630 
01631     if (num_tables>1)
01632     {
01633         for (int32_t i=num_tables-1; i>=0; i--)
01634         {
01635             //SG_PRINT("max_idx: %d, i:%d\n", max_idx, i);
01636             result[i]=acgt[table[i]->get_element(max_idx).string >> (2*(degree-1)) & 3];
01637             max_idx=table[i]->get_element(max_idx).bt;
01638         }
01639     }
01640 
01641     //for (int32_t t=0; t<num_tables; t++)
01642     //{
01643     //  n=table[t]->get_num_elements();
01644     //  for (int32_t i=0; i<n; i++)
01645     //  {
01646     //      ConsensusEntry e= table[t]->get_element(i);
01647     //      SG_PRINT("table[%d,%d]: str:0%0llx sc:%+f bt:%d\n",t,i, e.string,e.score,e.bt);
01648     //  }
01649     //}
01650 
01651     for (int32_t i=0; i<num_tables; i++)
01652         delete table[i];
01653 
01654     delete[] table;
01655     return result;
01656 }
01657 
01658 
01659 float64_t* CWeightedDegreePositionStringKernel::extract_w(
01660     int32_t max_degree, int32_t& num_feat, int32_t& num_sym,
01661     float64_t* w_result, int32_t num_suppvec, int32_t* IDX, float64_t* alphas)
01662 {
01663   delete_optimization();
01664   use_poim_tries=true;
01665   poim_tries.delete_trees(false);
01666 
01667   // === check
01668   ASSERT(position_weights_lhs==NULL);
01669   ASSERT(position_weights_rhs==NULL);
01670   num_feat=((CStringFeatures<char>*) rhs)->get_max_vector_length();
01671   ASSERT(num_feat>0);
01672   ASSERT(alphabet->get_alphabet()==DNA);
01673   ASSERT(max_degree>0);
01674 
01675   // === general variables
01676   static const int32_t NUM_SYMS = poim_tries.NUM_SYMS;
01677   const int32_t seqLen = num_feat;
01678   float64_t** subs;
01679   int32_t i;
01680   int32_t k;
01681   //int32_t y;
01682 
01683   // === init tables "subs" for substring scores / POIMs
01684   // --- compute table sizes
01685   int32_t* offsets;
01686   int32_t offset;
01687   offsets = new int32_t[ max_degree ];
01688   offset = 0;
01689   for( k = 0; k < max_degree; ++k ) {
01690     offsets[k] = offset;
01691     const int32_t nofsKmers = (int32_t) pow( NUM_SYMS, k+1 );
01692     const int32_t tabSize = nofsKmers * seqLen;
01693     offset += tabSize;
01694   }
01695   // --- allocate memory
01696   const int32_t bigTabSize = offset;
01697   w_result=new float64_t[bigTabSize];
01698   for (i=0; i<bigTabSize; ++i)
01699     w_result[i]=0;
01700 
01701   // --- set pointers for tables
01702   subs = new float64_t*[ max_degree ];
01703   ASSERT( subs != NULL );
01704   for( k = 0; k < max_degree; ++k ) {
01705     subs[k] = &w_result[ offsets[k] ];
01706   }
01707   delete[] offsets;
01708 
01709   // === init trees; extract "w"
01710   init_optimization( num_suppvec, IDX, alphas, -1);
01711   poim_tries.POIMs_extract_W( subs, max_degree );
01712 
01713   // === clean; return "subs" as vector
01714   delete[] subs;
01715   num_feat = 1;
01716   num_sym = bigTabSize;
01717   use_poim_tries=false;
01718   poim_tries.delete_trees(false);
01719   return w_result;
01720 }
01721 
01722 float64_t* CWeightedDegreePositionStringKernel::compute_POIM(
01723     int32_t max_degree, int32_t& num_feat, int32_t& num_sym,
01724     float64_t* poim_result, int32_t num_suppvec, int32_t* IDX,
01725     float64_t* alphas, float64_t* distrib )
01726 {
01727   delete_optimization();
01728   use_poim_tries=true;
01729   poim_tries.delete_trees(false);
01730 
01731   // === check
01732   ASSERT(position_weights_lhs==NULL);
01733   ASSERT(position_weights_rhs==NULL);
01734   num_feat=((CStringFeatures<char>*) rhs)->get_max_vector_length();
01735   ASSERT(num_feat>0);
01736   ASSERT(alphabet->get_alphabet()==DNA);
01737   ASSERT(max_degree!=0);
01738   ASSERT(distrib);
01739 
01740   // === general variables
01741   static const int32_t NUM_SYMS = poim_tries.NUM_SYMS;
01742   const int32_t seqLen = num_feat;
01743   float64_t** subs;
01744   int32_t i;
01745   int32_t k;
01746 
01747   // === DEBUGGING mode
01748   //
01749   // Activated if "max_degree" < 0.
01750   // Allows to output selected partial score.
01751   //
01752   // |max_degree| mod 4
01753   //   0: substring
01754   //   1: superstring
01755   //   2: left overlap
01756   //   3: right overlap
01757   //
01758   const int32_t debug = ( max_degree < 0 ) ? ( abs(max_degree) % 4 + 1 ) : 0;
01759   if( debug ) {
01760     max_degree = abs(max_degree) / 4;
01761     switch( debug ) {
01762     case 1: {
01763       printf( "POIM DEBUGGING: substring only (max order=%d)\n", max_degree );
01764       break;
01765     }
01766     case 2: {
01767       printf( "POIM DEBUGGING: superstring only (max order=%d)\n", max_degree );
01768       break;
01769     }
01770     case 3: {
01771       printf( "POIM DEBUGGING: left overlap only (max order=%d)\n", max_degree );
01772       break;
01773     }
01774     case 4: {
01775       printf( "POIM DEBUGGING: right overlap only (max order=%d)\n", max_degree );
01776       break;
01777     }
01778     default: {
01779       printf( "POIM DEBUGGING: something is wrong (max order=%d)\n", max_degree );
01780       ASSERT(0);
01781       break;
01782     }
01783     }
01784   }
01785 
01786   // --- compute table sizes
01787   int32_t* offsets;
01788   int32_t offset;
01789   offsets = new int32_t[ max_degree ];
01790   offset = 0;
01791   for( k = 0; k < max_degree; ++k ) {
01792     offsets[k] = offset;
01793     const int32_t nofsKmers = (int32_t) pow( NUM_SYMS, k+1 );
01794     const int32_t tabSize = nofsKmers * seqLen;
01795     offset += tabSize;
01796   }
01797   // --- allocate memory
01798   const int32_t bigTabSize=offset;
01799   poim_result=new float64_t[bigTabSize];
01800   for (i=0; i<bigTabSize; ++i )
01801     poim_result[i]=0;
01802 
01803   // --- set pointers for tables
01804   subs=new float64_t*[max_degree];
01805   for (k=0; k<max_degree; ++k)
01806     subs[k]=&poim_result[offsets[k]];
01807 
01808   delete[] offsets;
01809 
01810   // === init trees; precalc S, L and R
01811   init_optimization( num_suppvec, IDX, alphas, -1);
01812   poim_tries.POIMs_precalc_SLR( distrib );
01813 
01814   // === compute substring scores
01815   if( debug==0 || debug==1 ) {
01816     poim_tries.POIMs_extract_W( subs, max_degree );
01817     for( k = 1; k < max_degree; ++k ) {
01818       const int32_t nofKmers2 = ( k > 1 ) ? (int32_t) pow(NUM_SYMS,k-1) : 0;
01819       const int32_t nofKmers1 = (int32_t) pow( NUM_SYMS, k );
01820       const int32_t nofKmers0 = nofKmers1 * NUM_SYMS;
01821       for( i = 0; i < seqLen; ++i ) {
01822     float64_t* const subs_k2i1 = ( k>1 && i<seqLen-1 ) ? &subs[k-2][(i+1)*nofKmers2] : NULL;
01823     float64_t* const subs_k1i1 = ( i < seqLen-1 ) ? &subs[k-1][(i+1)*nofKmers1] : NULL;
01824     float64_t* const subs_k1i0 = & subs[ k-1 ][ i*nofKmers1 ];
01825     float64_t* const subs_k0i  = & subs[ k-0 ][ i*nofKmers0 ];
01826     int32_t y0;
01827     for( y0 = 0; y0 < nofKmers0; ++y0 ) {
01828       const int32_t y1l = y0 / NUM_SYMS;
01829       const int32_t y1r = y0 % nofKmers1;
01830       const int32_t y2 = y1r / NUM_SYMS;
01831       subs_k0i[ y0 ] += subs_k1i0[ y1l ];
01832       if( i < seqLen-1 ) {
01833         subs_k0i[ y0 ] += subs_k1i1[ y1r ];
01834         if( k > 1 ) {
01835           subs_k0i[ y0 ] -= subs_k2i1[ y2 ];
01836         }
01837       }
01838     }
01839       }
01840     }
01841   }
01842 
01843   // === compute POIMs
01844   poim_tries.POIMs_add_SLR( subs, max_degree, debug );
01845 
01846   // === clean; return "subs" as vector
01847   delete[] subs;
01848   num_feat = 1;
01849   num_sym = bigTabSize;
01850 
01851   use_poim_tries=false;
01852   poim_tries.delete_trees(false);
01853   
01854   return poim_result;
01855 }
01856 
01857 
01858 void CWeightedDegreePositionStringKernel::prepare_POIM2(
01859     float64_t* distrib, int32_t num_sym, int32_t num_feat)
01860 {
01861     free(m_poim_distrib);
01862     m_poim_distrib=(float64_t*)malloc(num_sym*num_feat*sizeof(float64_t));
01863     ASSERT(m_poim_distrib);
01864 
01865     memcpy(m_poim_distrib, distrib, num_sym*num_feat*sizeof(float64_t));
01866     m_poim_num_sym=num_sym;
01867     m_poim_num_feat=num_feat;
01868 }
01869 
01870 void CWeightedDegreePositionStringKernel::compute_POIM2(
01871     int32_t max_degree, CSVM* svm)
01872 {
01873     ASSERT(svm);
01874     int32_t num_suppvec=svm->get_num_support_vectors();
01875     int32_t* sv_idx=new int32_t[num_suppvec];
01876     float64_t* sv_weight=new float64_t[num_suppvec];
01877 
01878     for (int32_t i=0; i<num_suppvec; i++)
01879     {
01880         sv_idx[i]=svm->get_support_vector(i);
01881         sv_weight[i]=svm->get_alpha(i);
01882     }
01883     
01884     if ((max_degree < 1) || (max_degree > 12))
01885     {
01886         //SG_WARNING( "max_degree out of range 1..12 (%d).\n", max_degree);
01887         SG_WARNING( "max_degree out of range 1..12 (%d). setting to 1.\n", max_degree);
01888         max_degree=1;
01889     }
01890     
01891     int32_t num_feat = m_poim_num_feat;
01892     int32_t num_sym = m_poim_num_sym;
01893     free(m_poim);
01894 
01895     m_poim = compute_POIM(max_degree, num_feat, num_sym, NULL,  num_suppvec, sv_idx, 
01896                           sv_weight, m_poim_distrib);
01897 
01898     ASSERT(num_feat==1);
01899     m_poim_result_len=num_sym;
01900     
01901     delete[] sv_weight;
01902     delete[] sv_idx;
01903 }
01904 
01905 void CWeightedDegreePositionStringKernel::get_POIM2(
01906     float64_t** poim, int32_t* result_len)
01907 {
01908     *poim=(float64_t*) malloc(m_poim_result_len*sizeof(float64_t));
01909     ASSERT(*poim);
01910     memcpy(*poim, m_poim, m_poim_result_len*sizeof(float64_t)) ;
01911     *result_len=m_poim_result_len ;
01912 }
01913 
01914 void CWeightedDegreePositionStringKernel::cleanup_POIM2()
01915 {
01916     free(m_poim) ;
01917     m_poim=NULL ;
01918     free(m_poim_distrib) ;
01919     m_poim_distrib=NULL ;
01920     m_poim_num_sym=0 ;
01921     m_poim_num_sym=0 ;
01922     m_poim_result_len=0 ;
01923 }
01924 

SHOGUN Machine Learning Toolbox - Documentation