Kernel.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2008 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Copyright (C) 1999-2008 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #include "lib/config.h"
00013 
00014 #include "lib/common.h"
00015 #include "lib/io.h"
00016 #include "lib/File.h"
00017 #include "lib/Time.h"
00018 #include "base/Parallel.h"
00019 
00020 #include "kernel/Kernel.h"
00021 #include "features/Features.h"
00022 
00023 #include "classifier/svm/SVM.h"
00024 
00025 #include <string.h>
00026 #include <unistd.h>
00027 #include <math.h>
00028 
00029 #ifndef WIN32
00030 #include <pthread.h>
00031 #endif
00032 
00033 CKernel::CKernel(INT size)
00034 : CSGObject(), kernel_matrix(NULL), precomputed_matrix(NULL),
00035     precompute_subkernel_matrix(false), precompute_matrix(false), lhs(NULL),
00036     rhs(NULL), combined_kernel_weight(1), optimization_initialized(false),
00037     opt_type(FASTBUTMEMHUNGRY), properties(KP_NONE)
00038 {
00039     if (size<10)
00040         size=10;
00041 
00042     cache_size=size;
00043 
00044     if (get_is_initialized())
00045         SG_ERROR( "COptimizableKernel still initialized on destruction");
00046 }
00047 
00048 
00049 CKernel::CKernel(CFeatures* p_lhs, CFeatures* p_rhs, INT size)
00050 : CSGObject(), kernel_matrix(NULL), precomputed_matrix(NULL),
00051     precompute_subkernel_matrix(false), precompute_matrix(false),
00052     lhs(NULL), rhs(NULL), combined_kernel_weight(1), optimization_initialized(false),
00053     opt_type(FASTBUTMEMHUNGRY), properties(KP_NONE)
00054 {
00055     if (size<10)
00056         size=10;
00057 
00058     cache_size=size;
00059 
00060     if (get_is_initialized())
00061         SG_ERROR("Kernel initialized on construction.\n");
00062 
00063     init(p_lhs, p_rhs);
00064 }
00065 
00066 CKernel::~CKernel()
00067 {
00068     if (get_is_initialized())
00069         SG_ERROR("Kernel still initialized on destruction.\n");
00070 
00071     remove_lhs_and_rhs();
00072 
00073     delete[] precomputed_matrix;
00074     precomputed_matrix=NULL;
00075 
00076     SG_INFO("Kernel deleted (%p).\n", this);
00077 }
00078 
00079 void CKernel::get_kernel_matrix(DREAL** dst, INT* m, INT* n)
00080 {
00081     ASSERT(dst && m && n);
00082 
00083     DREAL* result = NULL;
00084     CFeatures* f1 = lhs;
00085     CFeatures* f2 = rhs;
00086 
00087     if (f1 && f2)
00088     {
00089         INT num_vec1=f1->get_num_vectors();
00090         INT num_vec2=f2->get_num_vectors();
00091         *m=num_vec1;
00092         *n=num_vec2;
00093 
00094         LONG total_num = num_vec1 * num_vec2;
00095         INT num_done = 0;
00096         SG_DEBUG( "returning kernel matrix of size %dx%d\n", num_vec1, num_vec2);
00097 
00098         result=(DREAL*) malloc(sizeof(DREAL)*total_num);
00099         ASSERT(result);
00100 
00101         if ( (f1 == f2) && (num_vec1 == num_vec2) )
00102         {
00103             for (INT i=0; i<num_vec1; i++)
00104             {
00105                 for (INT j=i; j<num_vec1; j++)
00106                 {
00107                     double v=kernel(i,j);
00108 
00109                     result[i+j*num_vec1]=v;
00110                     result[j+i*num_vec1]=v;
00111 
00112                     if (num_done%100000)
00113                         SG_PROGRESS(num_done, 0, total_num-1);
00114 
00115                     if (i!=j)
00116                         num_done+=2;
00117                     else
00118                         num_done+=1;
00119                 }
00120             }
00121         }
00122         else
00123         {
00124             for (INT i=0; i<num_vec1; i++)
00125             {
00126                 for (INT j=0; j<num_vec2; j++)
00127                 {
00128                     result[i+j*num_vec1]=kernel(i,j) ;
00129 
00130                     if (num_done%100000)
00131                         SG_PROGRESS(num_done, 0, total_num-1);
00132 
00133                     num_done++;
00134                 }
00135             }
00136         }
00137 
00138         SG_DONE();
00139     }
00140     else
00141       SG_ERROR( "no features assigned to kernel\n");
00142 
00143     *dst=result;
00144 }
00145 
00146 SHORTREAL* CKernel::get_kernel_matrix_shortreal(int &num_vec1, int &num_vec2,
00147         SHORTREAL* target)
00148 {
00149     SHORTREAL* result = NULL;
00150     CFeatures* f1 = lhs;
00151     CFeatures* f2 = rhs;
00152 
00153     if (f1 && f2)
00154     {
00155         if (target && (num_vec1!=f1->get_num_vectors() ||
00156                     num_vec2!=f2->get_num_vectors()) )
00157             SG_ERROR( "kernel matrix does not fit into target\n");
00158 
00159         num_vec1=f1->get_num_vectors();
00160         num_vec2=f2->get_num_vectors();
00161         LONG total_num = num_vec1 * num_vec2;
00162         int num_done = 0;
00163 
00164         SG_DEBUG( "returning kernel matrix of size %dx%d\n", num_vec1, num_vec2);
00165 
00166         if (target)
00167             result=target;
00168         else
00169             result=new SHORTREAL[total_num];
00170 
00171         if (f1==f2 && num_vec1==num_vec2)
00172         {
00173             for (int i=0; i<num_vec1; i++)
00174             {
00175                 for (int j=i; j<num_vec1; j++)
00176                 {
00177                     double v=kernel(i,j);
00178 
00179                     result[i+j*num_vec1]=v;
00180                     result[j+i*num_vec1]=v;
00181 
00182                     if (num_done%100000)
00183                         SG_PROGRESS(num_done, 0, total_num-1);
00184 
00185                     if (i!=j)
00186                         num_done+=2;
00187                     else
00188                         num_done+=1;
00189                 }
00190             }
00191         }
00192         else
00193         {
00194             for (int i=0; i<num_vec1; i++)
00195             {
00196                 for (int j=0; j<num_vec2; j++)
00197                 {
00198                     result[i+j*num_vec1]=kernel(i,j) ;
00199 
00200                     if (num_done%100000)
00201                         SG_PROGRESS(num_done, 0, total_num-1);
00202 
00203                     num_done++;
00204                 }
00205             }
00206         }
00207 
00208         SG_DONE();
00209     }
00210     else
00211       SG_ERROR( "no features assigned to kernel\n");
00212 
00213     return result;
00214 }
00215 
00216 DREAL* CKernel::get_kernel_matrix_real(int &num_vec1, int &num_vec2, DREAL* target)
00217 {
00218     DREAL* result = NULL;
00219     CFeatures* f1 = lhs;
00220     CFeatures* f2 = rhs;
00221 
00222     if (f1 && f2)
00223     {
00224         if (target && (num_vec1!=f1->get_num_vectors() ||
00225                     num_vec2!=f2->get_num_vectors()) )
00226             SG_ERROR( "kernel matrix does not fit into target\n");
00227 
00228         num_vec1=f1->get_num_vectors();
00229         num_vec2=f2->get_num_vectors();
00230         LONG total_num = num_vec1 * num_vec2;
00231         int num_done = 0;
00232 
00233         SG_DEBUG( "returning kernel matrix of size %dx%d\n", num_vec1, num_vec2);
00234 
00235         if (target)
00236             result=target;
00237         else
00238             result=new DREAL[total_num];
00239 
00240         if (f1==f2 && num_vec1==num_vec2)
00241         {
00242             for (int i=0; i<num_vec1; i++)
00243             {
00244                 for (int j=i; j<num_vec1; j++)
00245                 {
00246                     double v=kernel(i,j);
00247 
00248                     result[i+j*num_vec1]=v;
00249                     result[j+i*num_vec1]=v;
00250 
00251                     if (num_done%100000)
00252                         SG_PROGRESS(num_done, 0, total_num-1);
00253 
00254                     if (i!=j)
00255                         num_done+=2;
00256                     else
00257                         num_done+=1;
00258                 }
00259             }
00260         }
00261         else
00262         {
00263             for (int i=0; i<num_vec1; i++)
00264             {
00265                 for (int j=0; j<num_vec2; j++)
00266                 {
00267                     result[i+j*num_vec1]=kernel(i,j) ;
00268 
00269                     if (num_done%100000)
00270                         SG_PROGRESS(num_done, 0, total_num-1);
00271 
00272                     num_done++;
00273                 }
00274             }
00275         }
00276 
00277         SG_DONE();
00278     }
00279     else
00280       SG_ERROR( "no features assigned to kernel\n");
00281 
00282     return result;
00283 }
00284 
00285 
00286 
00287 
00288 bool CKernel::init(CFeatures* l, CFeatures* r)
00289 {
00290     //make sure features were indeed supplied
00291     ASSERT(l);
00292     ASSERT(r);
00293 
00294     //make sure features are compatible
00295     ASSERT(l->get_feature_class()==r->get_feature_class());
00296     ASSERT(l->get_feature_type()==r->get_feature_type());
00297 
00298     //remove references to previous features
00299     remove_lhs_and_rhs();
00300 
00301     //increase reference counts
00302     SG_REF(l);
00303     if (l!=r)
00304         SG_REF(r);
00305 
00306     lhs=l;
00307     rhs=r;
00308 
00309     delete[] precomputed_matrix ;
00310     precomputed_matrix=NULL ;
00311 
00312     return true;
00313 }
00314 
00315 void CKernel::cleanup()
00316 {
00317     remove_lhs_and_rhs();
00318 }
00319 
00320 
00321 
00322 bool CKernel::load(CHAR* fname)
00323 {
00324     return false;
00325 }
00326 
00327 bool CKernel::save(CHAR* fname)
00328 {
00329     INT i=0;
00330     INT num_left=lhs->get_num_vectors();
00331     INT num_right=rhs->get_num_vectors();
00332     KERNELCACHE_IDX num_total=num_left*num_right;
00333 
00334     CFile f(fname, 'w', F_DREAL);
00335 
00336     for (INT l=0; l< (INT) num_left && f.is_ok(); l++)
00337     {
00338         for (INT r=0; r< (INT) num_right && f.is_ok(); r++)
00339         {
00340             if (!(i % (num_total/10+1)))
00341                 SG_PRINT("%02d%%.", (int) (100.0*i/num_total));
00342             else if (!(i % (num_total/200+1)))
00343                 SG_PRINT(".");
00344 
00345             double k=kernel(l,r);
00346             f.save_real_data(&k, 1);
00347 
00348             i++;
00349         }
00350     }
00351 
00352     if (f.is_ok())
00353         SG_INFO( "kernel matrix of size %ld x %ld written (filesize: %ld)\n", num_left, num_right, num_total*sizeof(KERNELCACHE_ELEM));
00354 
00355     return (f.is_ok());
00356 }
00357 
00358 void CKernel::remove_lhs_and_rhs()
00359 {
00360     if (rhs!=lhs)
00361         SG_UNREF(rhs);
00362     rhs = NULL;
00363 
00364     SG_UNREF(lhs);
00365     lhs = NULL;
00366 
00367 
00368 }
00369 
00370 void CKernel::remove_lhs()
00371 { 
00372     SG_UNREF(lhs);
00373     lhs = NULL;
00374 
00375 
00376 }
00377 
00379 void CKernel::remove_rhs()
00380 {
00381     if (rhs!=lhs)
00382         SG_UNREF(rhs);
00383     rhs = NULL;
00384 
00385 
00386 }
00387 
00388 
00389 void CKernel::list_kernel()
00390 {
00391     SG_INFO( "0x%p - \"%s\" weight=%1.2f OPT:%s", this, get_name(),
00392             get_combined_kernel_weight(),
00393             get_optimization_type()==FASTBUTMEMHUNGRY ? "FASTBUTMEMHUNGRY" :
00394             "SLOWBUTMEMEFFICIENT");
00395 
00396     switch (get_kernel_type())
00397     {
00398         case K_UNKNOWN:
00399             SG_INFO( "K_UNKNOWN ");
00400             break;
00401         case K_LINEAR:
00402             SG_INFO( "K_LINEAR ");
00403             break;
00404         case K_SPARSELINEAR:
00405             SG_INFO( "K_SPARSELINEAR ");
00406             break;
00407         case K_POLY:
00408             SG_INFO( "K_POLY ");
00409             break;
00410         case K_GAUSSIAN:
00411             SG_INFO( "K_GAUSSIAN ");
00412             break;
00413         case K_SPARSEGAUSSIAN:
00414             SG_INFO( "K_SPARSEGAUSSIAN ");
00415             break;
00416         case K_GAUSSIANSHIFT:
00417             SG_INFO( "K_GAUSSIANSHIFT ");
00418             break;
00419         case K_HISTOGRAM:
00420             SG_INFO( "K_HISTOGRAM ");
00421             break;
00422         case K_SALZBERG:
00423             SG_INFO( "K_SALZBERG ");
00424             break;
00425         case K_LOCALITYIMPROVED:
00426             SG_INFO( "K_LOCALITYIMPROVED ");
00427             break;
00428         case K_SIMPLELOCALITYIMPROVED:
00429             SG_INFO( "K_SIMPLELOCALITYIMPROVED ");
00430             break;
00431         case K_FIXEDDEGREE:
00432             SG_INFO( "K_FIXEDDEGREE ");
00433             break;
00434         case K_WEIGHTEDDEGREE:
00435             SG_INFO( "K_WEIGHTEDDEGREE ");
00436             break;
00437         case K_WEIGHTEDDEGREEPOS:
00438             SG_INFO( "K_WEIGHTEDDEGREEPOS ");
00439             break;
00440         case K_WEIGHTEDCOMMWORDSTRING:
00441             SG_INFO( "K_WEIGHTEDCOMMWORDSTRING ");
00442             break;
00443         case K_POLYMATCH:
00444             SG_INFO( "K_POLYMATCH ");
00445             break;
00446         case K_ALIGNMENT:
00447             SG_INFO( "K_ALIGNMENT ");
00448             break;
00449         case K_COMMWORDSTRING:
00450             SG_INFO( "K_COMMWORDSTRING ");
00451             break;
00452         case K_COMMULONGSTRING:
00453             SG_INFO( "K_COMMULONGSTRING ");
00454             break;
00455         case K_COMBINED:
00456             SG_INFO( "K_COMBINED ");
00457             break;
00458         case K_AUC:
00459             SG_INFO( "K_AUC ");
00460             break;
00461         case K_CUSTOM:
00462             SG_INFO( "K_CUSTOM ");
00463             break;
00464         case K_SIGMOID:
00465             SG_INFO( "K_SIGMOID ");
00466             break;
00467         case K_CHI2:
00468             SG_INFO( "K_CHI2 ");
00469             break;
00470         case K_DIAG:
00471             SG_INFO( "K_DIAG ");
00472             break;
00473         case K_CONST:
00474             SG_INFO( "K_CONST ");
00475             break;
00476         case K_MINDYGRAM:
00477             SG_INFO( "K_MINDYGRAM ");
00478             break;
00479         case K_DISTANCE:
00480             SG_INFO( "K_DISTANCE ");
00481             break;
00482         case K_LOCALALIGNMENT:
00483             SG_INFO( "K_LOCALALIGNMENT ");
00484             break;
00485         default:
00486          SG_ERROR( "ERROR UNKNOWN KERNEL TYPE");
00487             break;
00488     }
00489 
00490     switch (get_feature_class())
00491     {
00492         case C_UNKNOWN:
00493             SG_INFO( "C_UNKNOWN ");
00494             break;
00495         case C_SIMPLE:
00496             SG_INFO( "C_SIMPLE ");
00497             break;
00498         case C_SPARSE:
00499             SG_INFO( "C_SPARSE ");
00500             break;
00501         case C_STRING:
00502             SG_INFO( "C_STRING ");
00503             break;
00504         case C_COMBINED:
00505             SG_INFO( "C_COMBINED ");
00506             break;
00507         case C_ANY:
00508             SG_INFO( "C_ANY ");
00509             break;
00510         default:
00511          SG_ERROR( "ERROR UNKNOWN FEATURE CLASS");
00512     }
00513 
00514     switch (get_feature_type())
00515     {
00516         case F_UNKNOWN:
00517             SG_INFO( "F_UNKNOWN ");
00518             break;
00519         case F_DREAL:
00520             SG_INFO( "F_REAL ");
00521             break;
00522         case F_SHORT:
00523             SG_INFO( "F_SHORT ");
00524             break;
00525         case F_CHAR:
00526             SG_INFO( "F_CHAR ");
00527             break;
00528         case F_INT:
00529             SG_INFO( "F_INT ");
00530             break;
00531         case F_BYTE:
00532             SG_INFO( "F_BYTE ");
00533             break;
00534         case F_WORD:
00535             SG_INFO( "F_WORD ");
00536             break;
00537         case F_ULONG:
00538             SG_INFO( "F_ULONG ");
00539             break;
00540         case F_ANY:
00541             SG_INFO( "F_ANY ");
00542             break;
00543         default:
00544          SG_ERROR( "ERROR UNKNOWN FEATURE TYPE");
00545             break;
00546     }
00547     SG_INFO( "\n");
00548 }
00549 
00550 bool CKernel::init_optimization(INT count, INT *IDX, DREAL * weights)
00551 {
00552    SG_ERROR( "kernel does not support linadd optimization\n");
00553     return false ;
00554 }
00555 
00556 bool CKernel::delete_optimization() 
00557 {
00558    SG_ERROR( "kernel does not support linadd optimization\n");
00559     return false;
00560 }
00561 
00562 DREAL CKernel::compute_optimized(INT vector_idx)
00563 {
00564    SG_ERROR( "kernel does not support linadd optimization\n");
00565     return 0;
00566 }
00567 
00568 void CKernel::compute_batch(INT num_vec, INT* vec_idx, DREAL* target, INT num_suppvec, INT* IDX, DREAL* weights, DREAL factor)
00569 {
00570    SG_ERROR( "kernel does not support batch computation\n");
00571 }
00572 
00573 void CKernel::add_to_normal(INT vector_idx, DREAL weight)
00574 {
00575    SG_ERROR( "kernel does not support linadd optimization, add_to_normal not implemented\n");
00576 }
00577 
00578 void CKernel::clear_normal()
00579 {
00580    SG_ERROR( "kernel does not support linadd optimization, clear_normal not implemented\n");
00581 }
00582 
00583 INT CKernel::get_num_subkernels()
00584 {
00585     return 1;
00586 }
00587 
00588 void CKernel::compute_by_subkernel(INT vector_idx, DREAL * subkernel_contrib)
00589 {
00590    SG_ERROR( "kernel compute_by_subkernel not implemented\n");
00591 }
00592 
00593 const DREAL* CKernel::get_subkernel_weights(INT &num_weights)
00594 {
00595     num_weights=1 ;
00596     return &combined_kernel_weight ;
00597 }
00598 
00599 void CKernel::set_subkernel_weights(DREAL* weights, INT num_weights)
00600 {
00601     combined_kernel_weight = weights[0] ;
00602     if (num_weights!=1)
00603       SG_ERROR( "number of subkernel weights should be one ...\n");
00604 }
00605 
00606 void CKernel::do_precompute_matrix()
00607 {
00608     INT num_left=lhs->get_num_vectors();
00609     INT num_right=rhs->get_num_vectors();
00610     SG_INFO( "precomputing kernel matrix (%ix%i)\n", num_left, num_right) ;
00611 
00612     ASSERT(num_left==num_right);
00613     ASSERT(lhs==rhs);
00614     INT num=num_left;
00615     
00616     delete[] precomputed_matrix;
00617     precomputed_matrix=new SHORTREAL[num*(num+1)/2];
00618 
00619     for (INT i=0; i<num; i++)
00620     {
00621         SG_PROGRESS(i*i,0,num*num);
00622         for (INT j=0; j<=i; j++)
00623             precomputed_matrix[i*(i+1)/2+j] = compute(i,j) ;
00624     }
00625 
00626     SG_PROGRESS(num*num,0,num*num);
00627     SG_INFO( "\ndone.\n") ;
00628 }
00629 
00630 /*
00631  * compute the vector containing the square root of the diagonal elements
00632  * of this kernel.
00633  */
00634 void CKernel::init_sqrt_diag(DREAL *v, INT num)
00635 {
00636     for (INT i = 0; i<num; i++)
00637     {
00638         v[i] = sqrt(this->compute(i,i));
00639         if (!v[i])
00640             v[i] = 1e-16; /* avoid divide by zero exception */
00641     }
00642 }
00643 
00644 bool CKernel::init_optimization_svm(CSVM * svm)
00645 {
00646     INT num_suppvec=svm->get_num_support_vectors();
00647     INT* sv_idx=new INT[num_suppvec];
00648     DREAL* sv_weight=new DREAL[num_suppvec];
00649 
00650     for (INT i=0; i<num_suppvec; i++)
00651     {
00652         sv_idx[i]    = svm->get_support_vector(i);
00653         sv_weight[i] = svm->get_alpha(i);
00654     }
00655     bool ret = init_optimization(num_suppvec, sv_idx, sv_weight);
00656 
00657     delete[] sv_idx;
00658     delete[] sv_weight;
00659     return ret;
00660 }
00661 

SHOGUN Machine Learning Toolbox - Documentation