00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include "lib/config.h"
00013
00014 #include "lib/common.h"
00015 #include "lib/io.h"
00016 #include "lib/File.h"
00017 #include "lib/Time.h"
00018 #include "base/Parallel.h"
00019
00020 #include "kernel/Kernel.h"
00021 #include "features/Features.h"
00022
00023 #include "classifier/svm/SVM.h"
00024
00025 #include <string.h>
00026 #include <unistd.h>
00027 #include <math.h>
00028
00029 #ifndef WIN32
00030 #include <pthread.h>
00031 #endif
00032
00033 CKernel::CKernel(INT size)
00034 : CSGObject(), kernel_matrix(NULL), precomputed_matrix(NULL),
00035 precompute_subkernel_matrix(false), precompute_matrix(false), lhs(NULL),
00036 rhs(NULL), combined_kernel_weight(1), optimization_initialized(false),
00037 opt_type(FASTBUTMEMHUNGRY), properties(KP_NONE)
00038 {
00039 if (size<10)
00040 size=10;
00041
00042 cache_size=size;
00043
00044 if (get_is_initialized())
00045 SG_ERROR( "COptimizableKernel still initialized on destruction");
00046 }
00047
00048
00049 CKernel::CKernel(CFeatures* p_lhs, CFeatures* p_rhs, INT size)
00050 : CSGObject(), kernel_matrix(NULL), precomputed_matrix(NULL),
00051 precompute_subkernel_matrix(false), precompute_matrix(false),
00052 lhs(NULL), rhs(NULL), combined_kernel_weight(1), optimization_initialized(false),
00053 opt_type(FASTBUTMEMHUNGRY), properties(KP_NONE)
00054 {
00055 if (size<10)
00056 size=10;
00057
00058 cache_size=size;
00059
00060 if (get_is_initialized())
00061 SG_ERROR("Kernel initialized on construction.\n");
00062
00063 init(p_lhs, p_rhs);
00064 }
00065
00066 CKernel::~CKernel()
00067 {
00068 if (get_is_initialized())
00069 SG_ERROR("Kernel still initialized on destruction.\n");
00070
00071 remove_lhs_and_rhs();
00072
00073 delete[] precomputed_matrix;
00074 precomputed_matrix=NULL;
00075
00076 SG_INFO("Kernel deleted (%p).\n", this);
00077 }
00078
00079 void CKernel::get_kernel_matrix(DREAL** dst, INT* m, INT* n)
00080 {
00081 ASSERT(dst && m && n);
00082
00083 DREAL* result = NULL;
00084 CFeatures* f1 = lhs;
00085 CFeatures* f2 = rhs;
00086
00087 if (f1 && f2)
00088 {
00089 INT num_vec1=f1->get_num_vectors();
00090 INT num_vec2=f2->get_num_vectors();
00091 *m=num_vec1;
00092 *n=num_vec2;
00093
00094 LONG total_num = num_vec1 * num_vec2;
00095 INT num_done = 0;
00096 SG_DEBUG( "returning kernel matrix of size %dx%d\n", num_vec1, num_vec2);
00097
00098 result=(DREAL*) malloc(sizeof(DREAL)*total_num);
00099 ASSERT(result);
00100
00101 if ( (f1 == f2) && (num_vec1 == num_vec2) )
00102 {
00103 for (INT i=0; i<num_vec1; i++)
00104 {
00105 for (INT j=i; j<num_vec1; j++)
00106 {
00107 double v=kernel(i,j);
00108
00109 result[i+j*num_vec1]=v;
00110 result[j+i*num_vec1]=v;
00111
00112 if (num_done%100000)
00113 SG_PROGRESS(num_done, 0, total_num-1);
00114
00115 if (i!=j)
00116 num_done+=2;
00117 else
00118 num_done+=1;
00119 }
00120 }
00121 }
00122 else
00123 {
00124 for (INT i=0; i<num_vec1; i++)
00125 {
00126 for (INT j=0; j<num_vec2; j++)
00127 {
00128 result[i+j*num_vec1]=kernel(i,j) ;
00129
00130 if (num_done%100000)
00131 SG_PROGRESS(num_done, 0, total_num-1);
00132
00133 num_done++;
00134 }
00135 }
00136 }
00137
00138 SG_DONE();
00139 }
00140 else
00141 SG_ERROR( "no features assigned to kernel\n");
00142
00143 *dst=result;
00144 }
00145
00146 SHORTREAL* CKernel::get_kernel_matrix_shortreal(int &num_vec1, int &num_vec2,
00147 SHORTREAL* target)
00148 {
00149 SHORTREAL* result = NULL;
00150 CFeatures* f1 = lhs;
00151 CFeatures* f2 = rhs;
00152
00153 if (f1 && f2)
00154 {
00155 if (target && (num_vec1!=f1->get_num_vectors() ||
00156 num_vec2!=f2->get_num_vectors()) )
00157 SG_ERROR( "kernel matrix does not fit into target\n");
00158
00159 num_vec1=f1->get_num_vectors();
00160 num_vec2=f2->get_num_vectors();
00161 LONG total_num = num_vec1 * num_vec2;
00162 int num_done = 0;
00163
00164 SG_DEBUG( "returning kernel matrix of size %dx%d\n", num_vec1, num_vec2);
00165
00166 if (target)
00167 result=target;
00168 else
00169 result=new SHORTREAL[total_num];
00170
00171 if (f1==f2 && num_vec1==num_vec2)
00172 {
00173 for (int i=0; i<num_vec1; i++)
00174 {
00175 for (int j=i; j<num_vec1; j++)
00176 {
00177 double v=kernel(i,j);
00178
00179 result[i+j*num_vec1]=v;
00180 result[j+i*num_vec1]=v;
00181
00182 if (num_done%100000)
00183 SG_PROGRESS(num_done, 0, total_num-1);
00184
00185 if (i!=j)
00186 num_done+=2;
00187 else
00188 num_done+=1;
00189 }
00190 }
00191 }
00192 else
00193 {
00194 for (int i=0; i<num_vec1; i++)
00195 {
00196 for (int j=0; j<num_vec2; j++)
00197 {
00198 result[i+j*num_vec1]=kernel(i,j) ;
00199
00200 if (num_done%100000)
00201 SG_PROGRESS(num_done, 0, total_num-1);
00202
00203 num_done++;
00204 }
00205 }
00206 }
00207
00208 SG_DONE();
00209 }
00210 else
00211 SG_ERROR( "no features assigned to kernel\n");
00212
00213 return result;
00214 }
00215
00216 DREAL* CKernel::get_kernel_matrix_real(int &num_vec1, int &num_vec2, DREAL* target)
00217 {
00218 DREAL* result = NULL;
00219 CFeatures* f1 = lhs;
00220 CFeatures* f2 = rhs;
00221
00222 if (f1 && f2)
00223 {
00224 if (target && (num_vec1!=f1->get_num_vectors() ||
00225 num_vec2!=f2->get_num_vectors()) )
00226 SG_ERROR( "kernel matrix does not fit into target\n");
00227
00228 num_vec1=f1->get_num_vectors();
00229 num_vec2=f2->get_num_vectors();
00230 LONG total_num = num_vec1 * num_vec2;
00231 int num_done = 0;
00232
00233 SG_DEBUG( "returning kernel matrix of size %dx%d\n", num_vec1, num_vec2);
00234
00235 if (target)
00236 result=target;
00237 else
00238 result=new DREAL[total_num];
00239
00240 if (f1==f2 && num_vec1==num_vec2)
00241 {
00242 for (int i=0; i<num_vec1; i++)
00243 {
00244 for (int j=i; j<num_vec1; j++)
00245 {
00246 double v=kernel(i,j);
00247
00248 result[i+j*num_vec1]=v;
00249 result[j+i*num_vec1]=v;
00250
00251 if (num_done%100000)
00252 SG_PROGRESS(num_done, 0, total_num-1);
00253
00254 if (i!=j)
00255 num_done+=2;
00256 else
00257 num_done+=1;
00258 }
00259 }
00260 }
00261 else
00262 {
00263 for (int i=0; i<num_vec1; i++)
00264 {
00265 for (int j=0; j<num_vec2; j++)
00266 {
00267 result[i+j*num_vec1]=kernel(i,j) ;
00268
00269 if (num_done%100000)
00270 SG_PROGRESS(num_done, 0, total_num-1);
00271
00272 num_done++;
00273 }
00274 }
00275 }
00276
00277 SG_DONE();
00278 }
00279 else
00280 SG_ERROR( "no features assigned to kernel\n");
00281
00282 return result;
00283 }
00284
00285
00286
00287
00288 bool CKernel::init(CFeatures* l, CFeatures* r)
00289 {
00290
00291 ASSERT(l);
00292 ASSERT(r);
00293
00294
00295 ASSERT(l->get_feature_class()==r->get_feature_class());
00296 ASSERT(l->get_feature_type()==r->get_feature_type());
00297
00298
00299 remove_lhs_and_rhs();
00300
00301
00302 SG_REF(l);
00303 if (l!=r)
00304 SG_REF(r);
00305
00306 lhs=l;
00307 rhs=r;
00308
00309 delete[] precomputed_matrix ;
00310 precomputed_matrix=NULL ;
00311
00312 return true;
00313 }
00314
00315 void CKernel::cleanup()
00316 {
00317 remove_lhs_and_rhs();
00318 }
00319
00320
00321
00322 bool CKernel::load(CHAR* fname)
00323 {
00324 return false;
00325 }
00326
00327 bool CKernel::save(CHAR* fname)
00328 {
00329 INT i=0;
00330 INT num_left=lhs->get_num_vectors();
00331 INT num_right=rhs->get_num_vectors();
00332 KERNELCACHE_IDX num_total=num_left*num_right;
00333
00334 CFile f(fname, 'w', F_DREAL);
00335
00336 for (INT l=0; l< (INT) num_left && f.is_ok(); l++)
00337 {
00338 for (INT r=0; r< (INT) num_right && f.is_ok(); r++)
00339 {
00340 if (!(i % (num_total/10+1)))
00341 SG_PRINT("%02d%%.", (int) (100.0*i/num_total));
00342 else if (!(i % (num_total/200+1)))
00343 SG_PRINT(".");
00344
00345 double k=kernel(l,r);
00346 f.save_real_data(&k, 1);
00347
00348 i++;
00349 }
00350 }
00351
00352 if (f.is_ok())
00353 SG_INFO( "kernel matrix of size %ld x %ld written (filesize: %ld)\n", num_left, num_right, num_total*sizeof(KERNELCACHE_ELEM));
00354
00355 return (f.is_ok());
00356 }
00357
00358 void CKernel::remove_lhs_and_rhs()
00359 {
00360 if (rhs!=lhs)
00361 SG_UNREF(rhs);
00362 rhs = NULL;
00363
00364 SG_UNREF(lhs);
00365 lhs = NULL;
00366
00367
00368 }
00369
00370 void CKernel::remove_lhs()
00371 {
00372 SG_UNREF(lhs);
00373 lhs = NULL;
00374
00375
00376 }
00377
00379 void CKernel::remove_rhs()
00380 {
00381 if (rhs!=lhs)
00382 SG_UNREF(rhs);
00383 rhs = NULL;
00384
00385
00386 }
00387
00388
00389 void CKernel::list_kernel()
00390 {
00391 SG_INFO( "0x%p - \"%s\" weight=%1.2f OPT:%s", this, get_name(),
00392 get_combined_kernel_weight(),
00393 get_optimization_type()==FASTBUTMEMHUNGRY ? "FASTBUTMEMHUNGRY" :
00394 "SLOWBUTMEMEFFICIENT");
00395
00396 switch (get_kernel_type())
00397 {
00398 case K_UNKNOWN:
00399 SG_INFO( "K_UNKNOWN ");
00400 break;
00401 case K_LINEAR:
00402 SG_INFO( "K_LINEAR ");
00403 break;
00404 case K_SPARSELINEAR:
00405 SG_INFO( "K_SPARSELINEAR ");
00406 break;
00407 case K_POLY:
00408 SG_INFO( "K_POLY ");
00409 break;
00410 case K_GAUSSIAN:
00411 SG_INFO( "K_GAUSSIAN ");
00412 break;
00413 case K_SPARSEGAUSSIAN:
00414 SG_INFO( "K_SPARSEGAUSSIAN ");
00415 break;
00416 case K_GAUSSIANSHIFT:
00417 SG_INFO( "K_GAUSSIANSHIFT ");
00418 break;
00419 case K_HISTOGRAM:
00420 SG_INFO( "K_HISTOGRAM ");
00421 break;
00422 case K_SALZBERG:
00423 SG_INFO( "K_SALZBERG ");
00424 break;
00425 case K_LOCALITYIMPROVED:
00426 SG_INFO( "K_LOCALITYIMPROVED ");
00427 break;
00428 case K_SIMPLELOCALITYIMPROVED:
00429 SG_INFO( "K_SIMPLELOCALITYIMPROVED ");
00430 break;
00431 case K_FIXEDDEGREE:
00432 SG_INFO( "K_FIXEDDEGREE ");
00433 break;
00434 case K_WEIGHTEDDEGREE:
00435 SG_INFO( "K_WEIGHTEDDEGREE ");
00436 break;
00437 case K_WEIGHTEDDEGREEPOS:
00438 SG_INFO( "K_WEIGHTEDDEGREEPOS ");
00439 break;
00440 case K_WEIGHTEDCOMMWORDSTRING:
00441 SG_INFO( "K_WEIGHTEDCOMMWORDSTRING ");
00442 break;
00443 case K_POLYMATCH:
00444 SG_INFO( "K_POLYMATCH ");
00445 break;
00446 case K_ALIGNMENT:
00447 SG_INFO( "K_ALIGNMENT ");
00448 break;
00449 case K_COMMWORDSTRING:
00450 SG_INFO( "K_COMMWORDSTRING ");
00451 break;
00452 case K_COMMULONGSTRING:
00453 SG_INFO( "K_COMMULONGSTRING ");
00454 break;
00455 case K_COMBINED:
00456 SG_INFO( "K_COMBINED ");
00457 break;
00458 case K_AUC:
00459 SG_INFO( "K_AUC ");
00460 break;
00461 case K_CUSTOM:
00462 SG_INFO( "K_CUSTOM ");
00463 break;
00464 case K_SIGMOID:
00465 SG_INFO( "K_SIGMOID ");
00466 break;
00467 case K_CHI2:
00468 SG_INFO( "K_CHI2 ");
00469 break;
00470 case K_DIAG:
00471 SG_INFO( "K_DIAG ");
00472 break;
00473 case K_CONST:
00474 SG_INFO( "K_CONST ");
00475 break;
00476 case K_MINDYGRAM:
00477 SG_INFO( "K_MINDYGRAM ");
00478 break;
00479 case K_DISTANCE:
00480 SG_INFO( "K_DISTANCE ");
00481 break;
00482 case K_LOCALALIGNMENT:
00483 SG_INFO( "K_LOCALALIGNMENT ");
00484 break;
00485 default:
00486 SG_ERROR( "ERROR UNKNOWN KERNEL TYPE");
00487 break;
00488 }
00489
00490 switch (get_feature_class())
00491 {
00492 case C_UNKNOWN:
00493 SG_INFO( "C_UNKNOWN ");
00494 break;
00495 case C_SIMPLE:
00496 SG_INFO( "C_SIMPLE ");
00497 break;
00498 case C_SPARSE:
00499 SG_INFO( "C_SPARSE ");
00500 break;
00501 case C_STRING:
00502 SG_INFO( "C_STRING ");
00503 break;
00504 case C_COMBINED:
00505 SG_INFO( "C_COMBINED ");
00506 break;
00507 case C_ANY:
00508 SG_INFO( "C_ANY ");
00509 break;
00510 default:
00511 SG_ERROR( "ERROR UNKNOWN FEATURE CLASS");
00512 }
00513
00514 switch (get_feature_type())
00515 {
00516 case F_UNKNOWN:
00517 SG_INFO( "F_UNKNOWN ");
00518 break;
00519 case F_DREAL:
00520 SG_INFO( "F_REAL ");
00521 break;
00522 case F_SHORT:
00523 SG_INFO( "F_SHORT ");
00524 break;
00525 case F_CHAR:
00526 SG_INFO( "F_CHAR ");
00527 break;
00528 case F_INT:
00529 SG_INFO( "F_INT ");
00530 break;
00531 case F_BYTE:
00532 SG_INFO( "F_BYTE ");
00533 break;
00534 case F_WORD:
00535 SG_INFO( "F_WORD ");
00536 break;
00537 case F_ULONG:
00538 SG_INFO( "F_ULONG ");
00539 break;
00540 case F_ANY:
00541 SG_INFO( "F_ANY ");
00542 break;
00543 default:
00544 SG_ERROR( "ERROR UNKNOWN FEATURE TYPE");
00545 break;
00546 }
00547 SG_INFO( "\n");
00548 }
00549
00550 bool CKernel::init_optimization(INT count, INT *IDX, DREAL * weights)
00551 {
00552 SG_ERROR( "kernel does not support linadd optimization\n");
00553 return false ;
00554 }
00555
00556 bool CKernel::delete_optimization()
00557 {
00558 SG_ERROR( "kernel does not support linadd optimization\n");
00559 return false;
00560 }
00561
00562 DREAL CKernel::compute_optimized(INT vector_idx)
00563 {
00564 SG_ERROR( "kernel does not support linadd optimization\n");
00565 return 0;
00566 }
00567
00568 void CKernel::compute_batch(INT num_vec, INT* vec_idx, DREAL* target, INT num_suppvec, INT* IDX, DREAL* weights, DREAL factor)
00569 {
00570 SG_ERROR( "kernel does not support batch computation\n");
00571 }
00572
00573 void CKernel::add_to_normal(INT vector_idx, DREAL weight)
00574 {
00575 SG_ERROR( "kernel does not support linadd optimization, add_to_normal not implemented\n");
00576 }
00577
00578 void CKernel::clear_normal()
00579 {
00580 SG_ERROR( "kernel does not support linadd optimization, clear_normal not implemented\n");
00581 }
00582
00583 INT CKernel::get_num_subkernels()
00584 {
00585 return 1;
00586 }
00587
00588 void CKernel::compute_by_subkernel(INT vector_idx, DREAL * subkernel_contrib)
00589 {
00590 SG_ERROR( "kernel compute_by_subkernel not implemented\n");
00591 }
00592
00593 const DREAL* CKernel::get_subkernel_weights(INT &num_weights)
00594 {
00595 num_weights=1 ;
00596 return &combined_kernel_weight ;
00597 }
00598
00599 void CKernel::set_subkernel_weights(DREAL* weights, INT num_weights)
00600 {
00601 combined_kernel_weight = weights[0] ;
00602 if (num_weights!=1)
00603 SG_ERROR( "number of subkernel weights should be one ...\n");
00604 }
00605
00606 void CKernel::do_precompute_matrix()
00607 {
00608 INT num_left=lhs->get_num_vectors();
00609 INT num_right=rhs->get_num_vectors();
00610 SG_INFO( "precomputing kernel matrix (%ix%i)\n", num_left, num_right) ;
00611
00612 ASSERT(num_left==num_right);
00613 ASSERT(lhs==rhs);
00614 INT num=num_left;
00615
00616 delete[] precomputed_matrix;
00617 precomputed_matrix=new SHORTREAL[num*(num+1)/2];
00618
00619 for (INT i=0; i<num; i++)
00620 {
00621 SG_PROGRESS(i*i,0,num*num);
00622 for (INT j=0; j<=i; j++)
00623 precomputed_matrix[i*(i+1)/2+j] = compute(i,j) ;
00624 }
00625
00626 SG_PROGRESS(num*num,0,num*num);
00627 SG_INFO( "\ndone.\n") ;
00628 }
00629
00630
00631
00632
00633
00634 void CKernel::init_sqrt_diag(DREAL *v, INT num)
00635 {
00636 for (INT i = 0; i<num; i++)
00637 {
00638 v[i] = sqrt(this->compute(i,i));
00639 if (!v[i])
00640 v[i] = 1e-16;
00641 }
00642 }
00643
00644 bool CKernel::init_optimization_svm(CSVM * svm)
00645 {
00646 INT num_suppvec=svm->get_num_support_vectors();
00647 INT* sv_idx=new INT[num_suppvec];
00648 DREAL* sv_weight=new DREAL[num_suppvec];
00649
00650 for (INT i=0; i<num_suppvec; i++)
00651 {
00652 sv_idx[i] = svm->get_support_vector(i);
00653 sv_weight[i] = svm->get_alpha(i);
00654 }
00655 bool ret = init_optimization(num_suppvec, sv_idx, sv_weight);
00656
00657 delete[] sv_idx;
00658 delete[] sv_weight;
00659 return ret;
00660 }
00661