00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include "lib/common.h"
00013 #include "lib/io.h"
00014 #include "lib/Signal.h"
00015 #include "lib/Trie.h"
00016 #include "base/Parallel.h"
00017
00018 #include "kernel/WeightedDegreeStringKernel.h"
00019 #include "features/Features.h"
00020 #include "features/StringFeatures.h"
00021
00022 #ifndef WIN32
00023 #include <pthread.h>
00024 #endif
00025
00026 struct S_THREAD_PARAM
00027 {
00028 INT* vec;
00029 DREAL* result;
00030 DREAL* weights;
00031 CWeightedDegreeStringKernel* kernel;
00032 CTrie<DNATrie>* tries;
00033 DREAL factor;
00034 INT j;
00035 INT start;
00036 INT end;
00037 INT length;
00038 INT* vec_idx;
00039 };
00040
00041 CWeightedDegreeStringKernel::CWeightedDegreeStringKernel (
00042 INT degree_, EWDKernType type_)
00043 : CStringKernel<CHAR>(10),weights(NULL),position_weights(NULL),
00044 weights_buffer(NULL), mkl_stepsize(1),degree(degree_), length(0),
00045 max_mismatch(0), seq_length(0), initialized(false),
00046 use_normalization(true),block_computation(true),
00047 normalization_const(1.0), num_block_weights_external(0),
00048 block_weights_external(NULL), block_weights(NULL), type(type_),
00049 which_degree(-1), tries(NULL), tree_initialized(false), alphabet(NULL)
00050 {
00051 properties |= KP_LINADD | KP_KERNCOMBINATION | KP_BATCHEVALUATION;
00052 lhs=NULL;
00053 rhs=NULL;
00054
00055 if (type!=E_EXTERNAL)
00056 set_wd_weights_by_type(type);
00057 }
00058
00059 CWeightedDegreeStringKernel::CWeightedDegreeStringKernel (
00060 DREAL *weights_, INT degree_)
00061 : CStringKernel<CHAR>(10), weights(NULL), position_weights(NULL),
00062 weights_buffer(NULL), mkl_stepsize(1), degree(degree_), length(0),
00063 max_mismatch(0), seq_length(0), initialized(false),
00064 use_normalization(true), block_computation(true),
00065 normalization_const(1.0), num_block_weights_external(0),
00066 block_weights_external(NULL), block_weights(NULL), type(E_EXTERNAL),
00067 which_degree(-1), tries(NULL), tree_initialized(false), alphabet(NULL)
00068 {
00069 properties |= KP_LINADD | KP_KERNCOMBINATION | KP_BATCHEVALUATION;
00070 lhs=NULL;
00071 rhs=NULL;
00072
00073 weights=new DREAL[degree*(1+max_mismatch)];
00074 for (INT i=0; i<degree*(1+max_mismatch); i++)
00075 weights[i]=weights_[i];
00076 }
00077
00078 CWeightedDegreeStringKernel::CWeightedDegreeStringKernel(
00079 CStringFeatures<CHAR>* l, CStringFeatures<CHAR>* r, INT degree_)
00080 : CStringKernel<CHAR>(10), weights(NULL), position_weights(NULL),
00081 weights_buffer(NULL), mkl_stepsize(1), degree(degree_), length(0),
00082 max_mismatch(0), seq_length(0), initialized(false),
00083 use_normalization(true), block_computation(true),
00084 normalization_const(1.0), num_block_weights_external(0),
00085 block_weights_external(NULL), block_weights(NULL), type(E_WD),
00086 which_degree(-1), tries(NULL), tree_initialized(false), alphabet(NULL)
00087 {
00088 properties |= KP_LINADD | KP_KERNCOMBINATION | KP_BATCHEVALUATION;
00089
00090 set_wd_weights_by_type(type);
00091 init(l, r);
00092 }
00093
00094 CWeightedDegreeStringKernel::~CWeightedDegreeStringKernel()
00095 {
00096 cleanup();
00097
00098 delete[] weights;
00099 weights=NULL;
00100
00101 delete[] block_weights;
00102 block_weights=NULL;
00103
00104 delete[] position_weights;
00105 position_weights=NULL;
00106
00107 delete[] weights_buffer;
00108 weights_buffer=NULL;
00109 }
00110
00111
00112 void CWeightedDegreeStringKernel::remove_lhs()
00113 {
00114 SG_DEBUG( "deleting CWeightedDegreeStringKernel optimization\n");
00115 delete_optimization();
00116 initialized = false;
00117
00118 if (tries!=NULL)
00119 tries->destroy();
00120
00121 CKernel::remove_lhs();
00122 }
00123
00124 void CWeightedDegreeStringKernel::create_empty_tries()
00125 {
00126 seq_length=((CStringFeatures<CHAR>*) lhs)->get_max_vector_length();
00127
00128 if (tries!=NULL)
00129 {
00130 tries->destroy() ;
00131 tries->create(seq_length, max_mismatch==0) ;
00132 }
00133 }
00134
00135 bool CWeightedDegreeStringKernel::init(CFeatures* l, CFeatures* r)
00136 {
00137 INT lhs_changed=(lhs!=l);
00138 INT rhs_changed=(rhs!=r);
00139
00140 bool result=CStringKernel<CHAR>::init(l,r);
00141 initialized=false;
00142
00143 SG_DEBUG("lhs_changed: %i\n", lhs_changed);
00144 SG_DEBUG("rhs_changed: %i\n", rhs_changed);
00145
00146 CStringFeatures<CHAR>* sf_l=(CStringFeatures<CHAR>*) l;
00147 CStringFeatures<CHAR>* sf_r=(CStringFeatures<CHAR>*) r;
00148
00149 INT len=sf_l->get_max_vector_length();
00150 if (lhs_changed && !sf_l->have_same_length(len))
00151 SG_ERROR("All strings in WD kernel must have same length (lhs wrong)!\n");
00152
00153 if (rhs_changed && !sf_r->have_same_length(len))
00154 SG_ERROR("All strings in WD kernel must have same length (rhs wrong)!\n");
00155
00156 delete alphabet;
00157 alphabet=new CAlphabet(sf_l->get_alphabet());
00158 CAlphabet* ralphabet=(sf_r->get_alphabet());
00159 if (!((alphabet->get_alphabet()==DNA) || (alphabet->get_alphabet()==RNA)))
00160 properties &= ((ULONG) (-1)) ^ (KP_LINADD | KP_BATCHEVALUATION);
00161
00162 ASSERT(ralphabet->get_alphabet()==alphabet->get_alphabet());
00163 SG_UNREF(ralphabet);
00164
00165 if (tries!=NULL) {
00166 tries->delete_trees(max_mismatch==0);
00167 delete tries;
00168 }
00169 tries=new CTrie<DNATrie>(degree, max_mismatch==0);
00170 create_empty_tries();
00171
00172 init_block_weights();
00173
00174 if (use_normalization)
00175 normalization_const=block_weights[seq_length-1];
00176 else
00177 normalization_const=1.0;
00178
00179 initialized=true;
00180 return result;
00181 }
00182
00183 void CWeightedDegreeStringKernel::cleanup()
00184 {
00185 SG_DEBUG("deleting CWeightedDegreeStringKernel optimization\n");
00186 delete_optimization();
00187
00188 delete[] block_weights;
00189 block_weights=NULL;
00190
00191 if (tries!=NULL)
00192 {
00193 tries->destroy();
00194 delete tries;
00195 tries=NULL;
00196 }
00197
00198 seq_length=0;
00199 initialized=false;
00200 tree_initialized = false;
00201
00202 delete alphabet;
00203 alphabet=NULL;
00204
00205 CKernel::cleanup();
00206 }
00207
00208 bool CWeightedDegreeStringKernel::load_init(FILE* src)
00209 {
00210 return false;
00211 }
00212
00213 bool CWeightedDegreeStringKernel::save_init(FILE* dest)
00214 {
00215 return false;
00216 }
00217
00218
00219 bool CWeightedDegreeStringKernel::init_optimization(INT count, INT* IDX, DREAL* alphas, INT tree_num)
00220 {
00221 if (tree_num<0)
00222 SG_DEBUG( "deleting CWeightedDegreeStringKernel optimization\n");
00223
00224 delete_optimization();
00225
00226 if (tree_num<0)
00227 SG_DEBUG( "initializing CWeightedDegreeStringKernel optimization\n") ;
00228
00229 int i=0;
00230 for (i=0; i<count; i++)
00231 {
00232 if (tree_num<0)
00233 {
00234 if ( (i % (count/10+1)) == 0)
00235 SG_PROGRESS(i, 0, count);
00236
00237 if (max_mismatch==0)
00238 add_example_to_tree(IDX[i], alphas[i]) ;
00239 else
00240 add_example_to_tree_mismatch(IDX[i], alphas[i]) ;
00241
00242
00243 }
00244 else
00245 {
00246 if (max_mismatch==0)
00247 add_example_to_single_tree(IDX[i], alphas[i], tree_num) ;
00248 else
00249 add_example_to_single_tree_mismatch(IDX[i], alphas[i], tree_num) ;
00250 }
00251 }
00252
00253 if (tree_num<0)
00254 SG_DONE();
00255
00256
00257
00258 set_is_initialized(true) ;
00259 return true ;
00260 }
00261
00262 bool CWeightedDegreeStringKernel::delete_optimization()
00263 {
00264 if (get_is_initialized())
00265 {
00266 if (tries!=NULL)
00267 tries->delete_trees(max_mismatch==0);
00268 set_is_initialized(false);
00269 return true;
00270 }
00271
00272 return false;
00273 }
00274
00275
00276 DREAL CWeightedDegreeStringKernel::compute_with_mismatch(CHAR* avec, INT alen, CHAR* bvec, INT blen)
00277 {
00278 DREAL sum = 0.0 ;
00279
00280 for (INT i=0; i<alen; i++)
00281 {
00282 DREAL sumi = 0.0 ;
00283 INT mismatches=0 ;
00284
00285 for (INT j=0; (i+j<alen) && (j<degree); j++)
00286 {
00287 if (avec[i+j]!=bvec[i+j])
00288 {
00289 mismatches++ ;
00290 if (mismatches>max_mismatch)
00291 break ;
00292 } ;
00293 sumi += weights[j+degree*mismatches];
00294 }
00295 if (position_weights!=NULL)
00296 sum+=position_weights[i]*sumi ;
00297 else
00298 sum+=sumi ;
00299 }
00300 return sum ;
00301 }
00302
00303 DREAL CWeightedDegreeStringKernel::compute_using_block(CHAR* avec, INT alen, CHAR* bvec, INT blen)
00304 {
00305 ASSERT(alen==blen);
00306
00307 DREAL sum=0;
00308 INT match_len=-1;
00309
00310 for (INT i=0; i<alen; i++)
00311 {
00312 if (avec[i]==bvec[i])
00313 match_len++;
00314 else
00315 {
00316 if (match_len>=0)
00317 sum+=block_weights[match_len];
00318 match_len=-1;
00319 }
00320 }
00321
00322 if (match_len>=0)
00323 sum+=block_weights[match_len];
00324
00325 return sum;
00326 }
00327
00328 DREAL CWeightedDegreeStringKernel::compute_without_mismatch(CHAR* avec, INT alen, CHAR* bvec, INT blen)
00329 {
00330 DREAL sum = 0.0 ;
00331
00332 for (INT i=0; i<alen; i++)
00333 {
00334 DREAL sumi = 0.0 ;
00335
00336 for (INT j=0; (i+j<alen) && (j<degree); j++)
00337 {
00338 if (avec[i+j]!=bvec[i+j])
00339 break ;
00340 sumi += weights[j];
00341 }
00342 if (position_weights!=NULL)
00343 sum+=position_weights[i]*sumi ;
00344 else
00345 sum+=sumi ;
00346 }
00347 return sum ;
00348 }
00349
00350 DREAL CWeightedDegreeStringKernel::compute_without_mismatch_matrix(CHAR* avec, INT alen, CHAR* bvec, INT blen)
00351 {
00352 DREAL sum = 0.0 ;
00353
00354 for (INT i=0; i<alen; i++)
00355 {
00356 DREAL sumi=0.0 ;
00357 for (INT j=0; (i+j<alen) && (j<degree); j++)
00358 {
00359 if (avec[i+j]!=bvec[i+j])
00360 break;
00361 sumi += weights[i*degree+j];
00362 }
00363 if (position_weights!=NULL)
00364 sum += position_weights[i]*sumi ;
00365 else
00366 sum += sumi ;
00367 }
00368
00369 return sum ;
00370 }
00371
00372
00373 DREAL CWeightedDegreeStringKernel::compute(INT idx_a, INT idx_b)
00374 {
00375 INT alen, blen;
00376 CHAR* avec=((CStringFeatures<CHAR>*) lhs)->get_feature_vector(idx_a, alen);
00377 CHAR* bvec=((CStringFeatures<CHAR>*) rhs)->get_feature_vector(idx_b, blen);
00378 DREAL result=0;
00379
00380 if (max_mismatch==0 && length==0 && block_computation)
00381 result=compute_using_block(avec, alen, bvec, blen);
00382 else
00383 {
00384 if (max_mismatch>0)
00385 result=compute_with_mismatch(avec, alen, bvec, blen);
00386 else if (length==0)
00387 result=compute_without_mismatch(avec, alen, bvec, blen);
00388 else
00389 result=compute_without_mismatch_matrix(avec, alen, bvec, blen);
00390 }
00391
00392 return result/normalization_const;
00393 }
00394
00395
00396 void CWeightedDegreeStringKernel::add_example_to_tree(INT idx, DREAL alpha)
00397 {
00398 ASSERT(alphabet);
00399 ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
00400
00401 INT len=0;
00402 CHAR* char_vec=((CStringFeatures<CHAR>*) lhs)->get_feature_vector(idx, len);
00403 ASSERT(max_mismatch==0);
00404 INT *vec=new INT[len];
00405
00406 for (INT i=0; i<len; i++)
00407 vec[i]=alphabet->remap_to_bin(char_vec[i]);
00408
00409 if (length == 0 || max_mismatch > 0)
00410 {
00411 for (INT i=0; i<len; i++)
00412 {
00413 DREAL alpha_pw=alpha;
00414
00415
00416 if (alpha_pw==0.0)
00417 continue;
00418 ASSERT(tries);
00419 tries->add_to_trie(i, 0, vec, alpha_pw, weights, (length!=0));
00420 }
00421 }
00422 else
00423 {
00424 for (INT i=0; i<len; i++)
00425 {
00426 DREAL alpha_pw=alpha;
00427
00428
00429 if (alpha_pw==0.0)
00430 continue ;
00431 ASSERT(tries);
00432 tries->add_to_trie(i, 0, vec, alpha_pw, weights, (length!=0));
00433 }
00434 }
00435 delete[] vec ;
00436 tree_initialized=true ;
00437 }
00438
00439 void CWeightedDegreeStringKernel::add_example_to_single_tree(INT idx, DREAL alpha, INT tree_num)
00440 {
00441 ASSERT(alphabet);
00442 ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
00443
00444 INT len ;
00445 CHAR* char_vec=((CStringFeatures<CHAR>*) lhs)->get_feature_vector(idx, len);
00446 ASSERT(max_mismatch==0);
00447 INT *vec = new INT[len] ;
00448
00449 for (INT i=tree_num; i<tree_num+degree && i<len; i++)
00450 vec[i]=alphabet->remap_to_bin(char_vec[i]);
00451
00452 ASSERT(tries);
00453 if (alpha!=0.0)
00454 tries->add_to_trie(tree_num, 0, vec, alpha, weights, (length!=0));
00455
00456 delete[] vec ;
00457 tree_initialized=true ;
00458 }
00459
00460 void CWeightedDegreeStringKernel::add_example_to_tree_mismatch(INT idx, DREAL alpha)
00461 {
00462 ASSERT(tries);
00463 ASSERT(alphabet);
00464 ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
00465
00466 INT len ;
00467 CHAR* char_vec=((CStringFeatures<CHAR>*) lhs)->get_feature_vector(idx, len);
00468
00469 INT *vec = new INT[len] ;
00470
00471 for (INT i=0; i<len; i++)
00472 vec[i]=alphabet->remap_to_bin(char_vec[i]);
00473
00474 for (INT i=0; i<len; i++)
00475 {
00476 if (alpha!=0.0)
00477 tries->add_example_to_tree_mismatch_recursion(NO_CHILD, i, alpha, &vec[i], len-i, 0, 0, max_mismatch, weights);
00478 }
00479
00480 delete[] vec ;
00481 tree_initialized=true ;
00482 }
00483
00484 void CWeightedDegreeStringKernel::add_example_to_single_tree_mismatch(INT idx, DREAL alpha, INT tree_num)
00485 {
00486 ASSERT(tries);
00487 ASSERT(alphabet);
00488 ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
00489
00490 INT len=0;
00491 CHAR* char_vec=((CStringFeatures<CHAR>*) lhs)->get_feature_vector(idx, len);
00492 INT *vec=new INT[len];
00493
00494 for (INT i=tree_num; i<len && i<tree_num+degree; i++)
00495 vec[i]=alphabet->remap_to_bin(char_vec[i]);
00496
00497 if (alpha!=0.0)
00498 {
00499 tries->add_example_to_tree_mismatch_recursion(
00500 NO_CHILD, tree_num, alpha, &vec[tree_num], len-tree_num,
00501 0, 0, max_mismatch, weights);
00502 }
00503
00504 delete[] vec;
00505 tree_initialized=true;
00506 }
00507
00508
00509 DREAL CWeightedDegreeStringKernel::compute_by_tree(INT idx)
00510 {
00511 ASSERT(alphabet);
00512 ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
00513
00514 INT len=0;
00515 CHAR* char_vec=((CStringFeatures<CHAR>*) rhs)->get_feature_vector(idx, len);
00516 ASSERT(char_vec && len>0);
00517 INT *vec=new INT[len];
00518
00519 for (INT i=0; i<len; i++)
00520 vec[i]=alphabet->remap_to_bin(char_vec[i]);
00521
00522 DREAL sum=0;
00523 ASSERT(tries);
00524 for (INT i=0; i<len; i++)
00525 sum+=tries->compute_by_tree_helper(vec, len, i, i, i, weights, (length!=0));
00526
00527 delete[] vec;
00528 return sum/normalization_const;
00529 }
00530
00531 void CWeightedDegreeStringKernel::compute_by_tree(INT idx, DREAL* LevelContrib)
00532 {
00533 ASSERT(alphabet);
00534 ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
00535
00536 INT len ;
00537 CHAR* char_vec=((CStringFeatures<CHAR>*) rhs)->get_feature_vector(idx, len);
00538
00539 INT *vec = new INT[len] ;
00540
00541 for (INT i=0; i<len; i++)
00542 vec[i]=alphabet->remap_to_bin(char_vec[i]);
00543
00544 ASSERT(tries);
00545 for (INT i=0; i<len; i++)
00546 tries->compute_by_tree_helper(vec, len, i, i, i, LevelContrib, 1.0/normalization_const, mkl_stepsize, weights, (length!=0));
00547
00548 delete[] vec ;
00549 }
00550
00551 DREAL *CWeightedDegreeStringKernel::compute_abs_weights(int &len)
00552 {
00553 ASSERT(tries);
00554 return tries->compute_abs_weights(len);
00555 }
00556
00557 bool CWeightedDegreeStringKernel::set_wd_weights_by_type(EWDKernType p_type)
00558 {
00559 ASSERT(degree>0);
00560 ASSERT(p_type==E_WD);
00561
00562 delete[] weights;
00563 weights=new DREAL[degree];
00564 if (weights)
00565 {
00566 INT i;
00567 DREAL sum=0;
00568 for (i=0; i<degree; i++)
00569 {
00570 weights[i]=degree-i;
00571 sum+=weights[i];
00572 }
00573 for (i=0; i<degree; i++)
00574 weights[i]/=sum;
00575
00576 for (i=0; i<degree; i++)
00577 {
00578 for (INT j=1; j<=max_mismatch; j++)
00579 {
00580 if (j<i+1)
00581 {
00582 INT nk=CMath::nchoosek(i+1, j);
00583 weights[i+j*degree]=weights[i]/(nk*pow(3,j));
00584 }
00585 else
00586 weights[i+j*degree]= 0;
00587 }
00588 }
00589
00590 if (which_degree>=0)
00591 {
00592 ASSERT(which_degree<degree);
00593 for (i=0; i<degree; i++)
00594 {
00595 if (i!=which_degree)
00596 weights[i]=0;
00597 else
00598 weights[i]=1;
00599 }
00600 }
00601 return true;
00602 }
00603 else
00604 return false;
00605 }
00606
00607 bool CWeightedDegreeStringKernel::set_weights(DREAL* ws, INT d, INT len)
00608 {
00609 SG_DEBUG("degree = %i d=%i\n", degree, d);
00610 degree=d;
00611 ASSERT(tries);
00612 tries->set_degree(degree);
00613 length=len;
00614
00615 if (length==0) length=1;
00616 INT num_weights=degree*(length+max_mismatch);
00617 delete[] weights;
00618 weights=new DREAL[num_weights];
00619 if (weights)
00620 {
00621 for (INT i=0; i<num_weights; i++) {
00622 if (ws[i])
00623 weights[i]=ws[i];
00624 }
00625 return true;
00626 }
00627 else
00628 return false;
00629 }
00630
00631 bool CWeightedDegreeStringKernel::set_position_weights(DREAL* pws, INT len)
00632 {
00633 if (len==0)
00634 {
00635 delete[] position_weights;
00636 position_weights=NULL;
00637 ASSERT(tries);
00638 tries->set_position_weights(position_weights);
00639 }
00640
00641 if (seq_length!=len)
00642 SG_ERROR("seq_length = %i, position_weights_length=%i\n", seq_length, len);
00643
00644 delete[] position_weights;
00645 position_weights=new DREAL[len];
00646 ASSERT(tries);
00647 tries->set_position_weights(position_weights);
00648
00649 if (position_weights)
00650 {
00651 for (int i=0; i<len; i++)
00652 position_weights[i]=pws[i];
00653 return true;
00654 }
00655 else
00656 return false;
00657 }
00658
00659 bool CWeightedDegreeStringKernel::init_block_weights_from_wd()
00660 {
00661 delete[] block_weights;
00662 block_weights=new DREAL[CMath::max(seq_length,degree)];
00663
00664 if (block_weights)
00665 {
00666 double deg=degree;
00667 INT k;
00668 for (k=0; k<degree ; k++)
00669 block_weights[k]=(-pow(k,3) + (3*deg-3)*pow(k,2) + (9*deg-2)*k + 6*deg) / (3*deg*(deg+1));
00670 for (k=degree; k<seq_length ; k++)
00671 block_weights[k]=(-deg+3*k+4)/3;
00672 }
00673
00674 return (block_weights!=NULL);
00675 }
00676
00677 bool CWeightedDegreeStringKernel::init_block_weights_from_wd_external()
00678 {
00679 ASSERT(weights);
00680 delete[] block_weights;
00681 block_weights=new DREAL[CMath::max(seq_length,degree)];
00682
00683 if (block_weights)
00684 {
00685 INT i=0;
00686 block_weights[0]=weights[0];
00687 for (i=1; i<CMath::max(seq_length,degree); i++)
00688 block_weights[i]=0;
00689
00690 for (i=1; i<CMath::max(seq_length,degree); i++)
00691 {
00692 block_weights[i]=block_weights[i-1];
00693
00694 DREAL contrib=0;
00695 for (INT j=0; j<CMath::min(degree,i+1); j++)
00696 contrib+=weights[j];
00697
00698 block_weights[i]+=contrib;
00699 }
00700 }
00701
00702 return (block_weights!=NULL);
00703 }
00704
00705 bool CWeightedDegreeStringKernel::init_block_weights_const()
00706 {
00707 delete[] block_weights;
00708 block_weights=new DREAL[seq_length];
00709
00710 if (block_weights)
00711 {
00712 for (int i=1; i<seq_length+1 ; i++)
00713 block_weights[i-1]=1.0/seq_length;
00714 }
00715
00716 return (block_weights!=NULL);
00717 }
00718
00719 bool CWeightedDegreeStringKernel::init_block_weights_linear()
00720 {
00721 delete[] block_weights;
00722 block_weights=new DREAL[seq_length];
00723
00724 if (block_weights)
00725 {
00726 for (int i=1; i<seq_length+1 ; i++)
00727 block_weights[i-1]=degree*i;
00728 }
00729
00730 return (block_weights!=NULL);
00731 }
00732
00733 bool CWeightedDegreeStringKernel::init_block_weights_sqpoly()
00734 {
00735 delete[] block_weights;
00736 block_weights=new DREAL[seq_length];
00737
00738 if (block_weights)
00739 {
00740 for (int i=1; i<degree+1 ; i++)
00741 block_weights[i-1]=((double) i)*i;
00742
00743 for (int i=degree+1; i<seq_length+1 ; i++)
00744 block_weights[i-1]=i;
00745 }
00746
00747 return (block_weights!=NULL);
00748 }
00749
00750 bool CWeightedDegreeStringKernel::init_block_weights_cubicpoly()
00751 {
00752 delete[] block_weights;
00753 block_weights=new DREAL[seq_length];
00754
00755 if (block_weights)
00756 {
00757 for (int i=1; i<degree+1 ; i++)
00758 block_weights[i-1]=((double) i)*i*i;
00759
00760 for (int i=degree+1; i<seq_length+1 ; i++)
00761 block_weights[i-1]=i;
00762 }
00763
00764 return (block_weights!=NULL);
00765 }
00766
00767 bool CWeightedDegreeStringKernel::init_block_weights_exp()
00768 {
00769 delete[] block_weights;
00770 block_weights=new DREAL[seq_length];
00771
00772 if (block_weights)
00773 {
00774 for (int i=1; i<degree+1 ; i++)
00775 block_weights[i-1]=exp(((double) i/10.0));
00776
00777 for (int i=degree+1; i<seq_length+1 ; i++)
00778 block_weights[i-1]=i;
00779 }
00780
00781 return (block_weights!=NULL);
00782 }
00783
00784 bool CWeightedDegreeStringKernel::init_block_weights_log()
00785 {
00786 delete[] block_weights;
00787 block_weights=new DREAL[seq_length];
00788
00789 if (block_weights)
00790 {
00791 for (int i=1; i<degree+1 ; i++)
00792 block_weights[i-1]=pow(log(i),2);
00793
00794 for (int i=degree+1; i<seq_length+1 ; i++)
00795 block_weights[i-1]=i-degree+1+pow(log(degree+1),2);
00796 }
00797
00798 return (block_weights!=NULL);
00799 }
00800
00801 bool CWeightedDegreeStringKernel::init_block_weights_external()
00802 {
00803 if (block_weights_external && (seq_length == num_block_weights_external) )
00804 {
00805 delete[] block_weights;
00806 block_weights=new DREAL[seq_length];
00807
00808 if (block_weights)
00809 {
00810 for (int i=0; i<seq_length; i++)
00811 block_weights[i]=block_weights_external[i];
00812 }
00813 }
00814 else {
00815 SG_ERROR( "sequence longer then weights (seqlen:%d, wlen:%d)\n", seq_length, block_weights_external);
00816 }
00817 return (block_weights!=NULL);
00818 }
00819
00820 bool CWeightedDegreeStringKernel::init_block_weights()
00821 {
00822 switch (type)
00823 {
00824 case E_WD:
00825 return init_block_weights_from_wd();
00826 case E_EXTERNAL:
00827 return init_block_weights_from_wd_external();
00828 case E_BLOCK_CONST:
00829 return init_block_weights_const();
00830 case E_BLOCK_LINEAR:
00831 return init_block_weights_linear();
00832 case E_BLOCK_SQPOLY:
00833 return init_block_weights_sqpoly();
00834 case E_BLOCK_CUBICPOLY:
00835 return init_block_weights_cubicpoly();
00836 case E_BLOCK_EXP:
00837 return init_block_weights_exp();
00838 case E_BLOCK_LOG:
00839 return init_block_weights_log();
00840 case E_BLOCK_EXTERNAL:
00841 return init_block_weights_external();
00842 default:
00843 return false;
00844 };
00845 }
00846
00847
00848 void* CWeightedDegreeStringKernel::compute_batch_helper(void* p)
00849 {
00850 S_THREAD_PARAM* params = (S_THREAD_PARAM*) p;
00851 INT j=params->j;
00852 CWeightedDegreeStringKernel* wd=params->kernel;
00853 CTrie<DNATrie>* tries=params->tries;
00854 DREAL* weights=params->weights;
00855 INT length=params->length;
00856 INT* vec=params->vec;
00857 DREAL* result=params->result;
00858 DREAL factor=params->factor;
00859 INT* vec_idx=params->vec_idx;
00860
00861 CStringFeatures<CHAR>* rhs_feat=((CStringFeatures<CHAR>*) wd->get_rhs());
00862 CAlphabet* alpha=wd->alphabet;
00863
00864 for (INT i=params->start; i<params->end; i++)
00865 {
00866 INT len=0;
00867 CHAR* char_vec=rhs_feat->get_feature_vector(vec_idx[i], len);
00868 for (INT k=j; k<CMath::min(len,j+wd->get_degree()); k++)
00869 vec[k]=alpha->remap_to_bin(char_vec[k]);
00870
00871 ASSERT(tries);
00872
00873 result[i]+=factor*
00874 tries->compute_by_tree_helper(vec, len, j, j, j, weights, (length!=0))/
00875 wd->get_normalization_const();
00876 }
00877
00878 SG_UNREF(rhs_feat);
00879
00880 return NULL;
00881 }
00882
00883 void CWeightedDegreeStringKernel::compute_batch(INT num_vec, INT* vec_idx, DREAL* result, INT num_suppvec, INT* IDX, DREAL* alphas, DREAL factor)
00884 {
00885 ASSERT(tries);
00886 ASSERT(alphabet);
00887 ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
00888 ASSERT(rhs);
00889 ASSERT(num_vec<=rhs->get_num_vectors());
00890 ASSERT(num_vec>0);
00891 ASSERT(vec_idx);
00892 ASSERT(result);
00893 create_empty_tries();
00894
00895 INT num_feat=((CStringFeatures<CHAR>*) rhs)->get_max_vector_length();
00896 ASSERT(num_feat>0);
00897 INT num_threads=parallel.get_num_threads();
00898 ASSERT(num_threads>0);
00899 INT* vec=new INT[num_threads*num_feat];
00900
00901 if (num_threads < 2)
00902 {
00903 #ifdef CYGWIN
00904 for (INT j=0; j<num_feat; j++)
00905 #else
00906 CSignal::clear() ;
00907 for (INT j=0; j<num_feat && !CSignal::cancel_computations(); j++)
00908 #endif
00909 {
00910 init_optimization(num_suppvec, IDX, alphas, j);
00911 S_THREAD_PARAM params;
00912 params.vec=vec;
00913 params.result=result;
00914 params.weights=weights;
00915 params.kernel=this;
00916 params.tries=tries;
00917 params.factor=factor;
00918 params.j=j;
00919 params.start=0;
00920 params.end=num_vec;
00921 params.length=length;
00922 params.vec_idx=vec_idx;
00923 compute_batch_helper((void*) ¶ms);
00924
00925 SG_PROGRESS(j,0,num_feat);
00926 }
00927 }
00928 #ifndef WIN32
00929 else
00930 {
00931 CSignal::clear() ;
00932 for (INT j=0; j<num_feat && !CSignal::cancel_computations(); j++)
00933 {
00934 init_optimization(num_suppvec, IDX, alphas, j);
00935 pthread_t threads[num_threads-1];
00936 S_THREAD_PARAM params[num_threads];
00937 INT step= num_vec/num_threads;
00938 INT t;
00939
00940 for (t=0; t<num_threads-1; t++)
00941 {
00942 params[t].vec=&vec[num_feat*t];
00943 params[t].result=result;
00944 params[t].weights=weights;
00945 params[t].kernel=this;
00946 params[t].tries=tries;
00947 params[t].factor=factor;
00948 params[t].j=j;
00949 params[t].start = t*step;
00950 params[t].end = (t+1)*step;
00951 params[t].length=length;
00952 params[t].vec_idx=vec_idx;
00953 pthread_create(&threads[t], NULL, CWeightedDegreeStringKernel::compute_batch_helper, (void*)¶ms[t]);
00954 }
00955 params[t].vec=&vec[num_feat*t];
00956 params[t].result=result;
00957 params[t].weights=weights;
00958 params[t].kernel=this;
00959 params[t].tries=tries;
00960 params[t].factor=factor;
00961 params[t].j=j;
00962 params[t].start=t*step;
00963 params[t].end=num_vec;
00964 params[t].length=length;
00965 params[t].vec_idx=vec_idx;
00966 compute_batch_helper((void*) ¶ms[t]);
00967
00968 for (t=0; t<num_threads-1; t++)
00969 pthread_join(threads[t], NULL);
00970 SG_PROGRESS(j,0,num_feat);
00971 }
00972 }
00973 #endif
00974
00975 delete[] vec;
00976
00977
00978
00979 create_empty_tries();
00980 }
00981
00982 bool CWeightedDegreeStringKernel::set_max_mismatch(INT max)
00983 {
00984 if (type==E_EXTERNAL && max!=0) {
00985 return false;
00986 }
00987
00988 max_mismatch=max;
00989
00990 if (lhs!=NULL && rhs!=NULL)
00991 return init(lhs, rhs);
00992 else
00993 return true;
00994 }
00995