00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include "lib/common.h"
00012 #include "lib/io.h"
00013 #include "lib/Signal.h"
00014 #include "base/Parallel.h"
00015
00016 #include "classifier/svm/SVM.h"
00017
00018 #include <string.h>
00019
00020 #ifndef WIN32
00021 #include <pthread.h>
00022 #endif
00023
00024 struct S_THREAD_PARAM
00025 {
00026 CSVM* svm;
00027 CLabels* result;
00028 INT start;
00029 INT end;
00030 bool verbose;
00031 };
00032
00033 CSVM::CSVM(INT num_sv)
00034 : CKernelMachine()
00035 {
00036 set_defaults(num_sv);
00037 }
00038
00039 CSVM::CSVM(DREAL C, CKernel* k, CLabels* lab)
00040 : CKernelMachine()
00041 {
00042 set_defaults();
00043 set_C(C,C);
00044 set_labels(lab);
00045 set_kernel(k);
00046 }
00047
00048 CSVM::~CSVM()
00049 {
00050 delete[] svm_model.alpha;
00051 delete[] svm_model.svs;
00052
00053 SG_DEBUG("SVM object destroyed\n");
00054 }
00055
00056 void CSVM::set_defaults(INT num_sv)
00057 {
00058 svm_model.b=0.0;
00059 svm_model.alpha=NULL;
00060 svm_model.svs=NULL;
00061 svm_model.num_svs=0;
00062 svm_loaded=false;
00063
00064 weight_epsilon=1e-5;
00065 epsilon=1e-5;
00066 tube_epsilon=1e-2;
00067
00068 nu=0.5;
00069 C1=1;
00070 C2=1;
00071 C_mkl=0;
00072
00073 objective=0;
00074
00075 qpsize=41;
00076 use_bias=true;
00077 use_shrinking=true;
00078 use_mkl=false;
00079 use_batch_computation=true;
00080 use_linadd=true;
00081 use_precomputed_subkernels=false;
00082
00083 if (num_sv>0)
00084 create_new_model(num_sv);
00085 }
00086
00087 bool CSVM::load(FILE* modelfl)
00088 {
00089 bool result=true;
00090 CHAR char_buffer[1024];
00091 int int_buffer;
00092 double double_buffer;
00093 int line_number=1;
00094
00095 if (fscanf(modelfl,"%4s\n", char_buffer)==EOF)
00096 {
00097 result=false;
00098 SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00099 }
00100 else
00101 {
00102 char_buffer[4]='\0';
00103 if (strcmp("%SVM", char_buffer)!=0)
00104 {
00105 result=false;
00106 SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00107 }
00108 line_number++;
00109 }
00110
00111 int_buffer=0;
00112 if (fscanf(modelfl," numsv=%d; \n", &int_buffer) != 1)
00113 {
00114 result=false;
00115 SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00116 }
00117
00118 if (!feof(modelfl))
00119 line_number++;
00120
00121 SG_INFO( "loading %ld support vectors\n",int_buffer);
00122 create_new_model(int_buffer);
00123
00124 if (fscanf(modelfl," kernel='%s'; \n", char_buffer) != 1)
00125 {
00126 result=false;
00127 SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00128 }
00129
00130 if (!feof(modelfl))
00131 line_number++;
00132
00133 double_buffer=0;
00134
00135 if (fscanf(modelfl," b=%lf; \n", &double_buffer) != 1)
00136 {
00137 result=false;
00138 SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00139 }
00140
00141 if (!feof(modelfl))
00142 line_number++;
00143
00144 set_bias(double_buffer);
00145
00146 if (fscanf(modelfl,"%8s\n", char_buffer) == EOF)
00147 {
00148 result=false;
00149 SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00150 }
00151 else
00152 {
00153 char_buffer[9]='\0';
00154 if (strcmp("alphas=[", char_buffer)!=0)
00155 {
00156 result=false;
00157 SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00158 }
00159 line_number++;
00160 }
00161
00162 for (INT i=0; i<get_num_support_vectors(); i++)
00163 {
00164 double_buffer=0;
00165 int_buffer=0;
00166
00167 if (fscanf(modelfl," \[%lf,%d]; \n", &double_buffer, &int_buffer) != 2)
00168 {
00169 result=false;
00170 SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00171 }
00172
00173 if (!feof(modelfl))
00174 line_number++;
00175
00176 set_support_vector(i, int_buffer);
00177 set_alpha(i, double_buffer);
00178 }
00179
00180 if (fscanf(modelfl,"%2s", char_buffer) == EOF)
00181 {
00182 result=false;
00183 SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00184 }
00185 else
00186 {
00187 char_buffer[3]='\0';
00188 if (strcmp("];", char_buffer)!=0)
00189 {
00190 result=false;
00191 SG_ERROR( "error in svm file, line nr:%d\n", line_number);
00192 }
00193 line_number++;
00194 }
00195
00196 svm_loaded=result;
00197 return result;
00198 }
00199
00200 bool CSVM::save(FILE* modelfl)
00201 {
00202 SG_INFO( "Writing model file...");
00203 fprintf(modelfl,"%%SVM\n");
00204 fprintf(modelfl,"numsv=%d;\n", get_num_support_vectors());
00205 fprintf(modelfl,"kernel='%s';\n", kernel->get_name());
00206 fprintf(modelfl,"b=%+10.16e;\n",get_bias());
00207
00208 fprintf(modelfl, "alphas=\[\n");
00209
00210 for(INT i=0; i<get_num_support_vectors(); i++)
00211 fprintf(modelfl,"\t[%+10.16e,%d];\n",
00212 CSVM::get_alpha(i), get_support_vector(i));
00213
00214 fprintf(modelfl, "];\n");
00215
00216 SG_DONE();
00217 return true ;
00218 }
00219
00220 bool CSVM::init_kernel_optimization()
00221 {
00222 INT num_sv=get_num_support_vectors();
00223
00224 if (kernel && kernel->has_property(KP_LINADD) && num_sv>0)
00225 {
00226 INT * sv_idx = new INT[num_sv] ;
00227 DREAL* sv_weight = new DREAL[num_sv] ;
00228
00229 for(INT i=0; i<num_sv; i++)
00230 {
00231 sv_idx[i] = get_support_vector(i) ;
00232 sv_weight[i] = get_alpha(i) ;
00233 }
00234
00235 bool ret = kernel->init_optimization(num_sv, sv_idx, sv_weight) ;
00236
00237 delete[] sv_idx ;
00238 delete[] sv_weight ;
00239
00240 if (!ret)
00241 SG_ERROR( "initialization of kernel optimization failed\n");
00242
00243 return ret;
00244 }
00245 else
00246 SG_ERROR( "initialization of kernel optimization failed\n");
00247
00248 return false;
00249 }
00250
00251 void* CSVM::classify_example_helper(void* p)
00252 {
00253 S_THREAD_PARAM* params= (S_THREAD_PARAM*) p;
00254 CLabels* result=params->result;
00255 CSVM* svm=params->svm;
00256
00257 #ifdef WIN32
00258 for (INT vec=params->start; vec<params->end; vec++)
00259 #else
00260 CSignal::clear() ;
00261 for (INT vec=params->start; vec<params->end &&
00262 !CSignal::cancel_computations(); vec++)
00263 #endif
00264 {
00265 if (params->verbose)
00266 {
00267 INT num_vectors=params->end - params->start;
00268 INT v=vec-params->start;
00269 if ( (v% (num_vectors/100+1))== 0)
00270 SG_SPROGRESS(v, 0.0, num_vectors-1);
00271 }
00272
00273 result->set_label(vec, svm->classify_example(vec));
00274 }
00275
00276 return NULL;
00277 }
00278
00279 CLabels* CSVM::classify(CLabels* lab)
00280 {
00281 if (!kernel)
00282 {
00283 SG_ERROR( "SVM can not proceed without kernel!\n");
00284 return false ;
00285 }
00286
00287 if ( kernel && kernel->get_num_vec_rhs()>0 )
00288 {
00289 INT num_vectors=kernel->get_num_vec_rhs();
00290
00291 if (!lab)
00292 lab=new CLabels(num_vectors);
00293 SG_DEBUG( "computing output on %d test examples\n", num_vectors);
00294
00295 if (this->io.get_show_progress())
00296 this->io.enable_progress();
00297 else
00298 this->io.disable_progress();
00299
00300 if (kernel->has_property(KP_BATCHEVALUATION) &&
00301 get_batch_computation_enabled())
00302 {
00303 ASSERT(get_num_support_vectors()>0);
00304 INT* sv_idx=new INT[get_num_support_vectors()];
00305 DREAL* sv_weight=new DREAL[get_num_support_vectors()];
00306 INT* idx=new INT[num_vectors];
00307 DREAL* output=new DREAL[num_vectors];
00308 memset(output, 0, sizeof(DREAL)*num_vectors);
00309
00310
00311 for (INT i=0; i<num_vectors; i++)
00312 idx[i]=i;
00313
00314 for (INT i=0; i<get_num_support_vectors(); i++)
00315 {
00316 sv_idx[i] = get_support_vector(i) ;
00317 sv_weight[i] = get_alpha(i) ;
00318 }
00319
00320 kernel->compute_batch(num_vectors, idx,
00321 output, get_num_support_vectors(), sv_idx, sv_weight);
00322
00323 for (INT i=0; i<num_vectors; i++)
00324 lab->set_label(i, get_bias()+output[i]);
00325
00326 delete[] sv_idx ;
00327 delete[] sv_weight ;
00328 delete[] idx;
00329 delete[] output;
00330 }
00331 else
00332 {
00333 INT num_threads=parallel.get_num_threads();
00334 ASSERT(num_threads>0);
00335
00336 if (num_threads < 2)
00337 {
00338 S_THREAD_PARAM params;
00339 params.svm=this;
00340 params.result=lab;
00341 params.start=0;
00342 params.end=num_vectors;
00343 params.verbose=true;
00344 classify_example_helper((void*) ¶ms);
00345 }
00346 #ifndef WIN32
00347 else
00348 {
00349 pthread_t threads[num_threads-1];
00350 S_THREAD_PARAM params[num_threads];
00351 INT step= num_vectors/num_threads;
00352
00353 INT t;
00354
00355 for (t=0; t<num_threads-1; t++)
00356 {
00357 params[t].svm = this;
00358 params[t].result = lab;
00359 params[t].start = t*step;
00360 params[t].end = (t+1)*step;
00361 params[t].verbose = false;
00362 pthread_create(&threads[t], NULL,
00363 CSVM::classify_example_helper, (void*)¶ms[t]);
00364 }
00365
00366 params[t].svm = this;
00367 params[t].result = lab;
00368 params[t].start = t*step;
00369 params[t].end = num_vectors;
00370 params[t].verbose = true;
00371 classify_example_helper((void*) ¶ms[t]);
00372
00373 for (t=0; t<num_threads-1; t++)
00374 pthread_join(threads[t], NULL);
00375 }
00376 #endif
00377 }
00378
00379 #ifndef WIN32
00380 if ( CSignal::cancel_computations() )
00381 SG_INFO( "prematurely stopped. \n");
00382 else
00383 #endif
00384 SG_DONE();
00385 }
00386 else
00387 return NULL;
00388
00389 return lab;
00390 }
00391
00392 DREAL CSVM::classify_example(INT num)
00393 {
00394 ASSERT(kernel);
00395
00396 if (kernel->has_property(KP_LINADD) && (kernel->get_is_initialized()))
00397 {
00398 DREAL dist = kernel->compute_optimized(num);
00399 return (dist+get_bias());
00400 }
00401 else
00402 {
00403 DREAL dist=0;
00404 for(INT i=0; i<get_num_support_vectors(); i++)
00405 dist+=kernel->kernel(get_support_vector(i), num)*get_alpha(i);
00406
00407 return (dist+get_bias());
00408 }
00409 }
00410
00411
00412 DREAL CSVM::compute_objective()
00413 {
00414 INT n=get_num_support_vectors();
00415
00416 if (labels && kernel)
00417 {
00418 objective=0;
00419 for (int i=0; i<n; i++)
00420 {
00421 objective-=get_alpha(i)*labels->get_label(i);
00422 for (int j=0; j<n; j++)
00423 objective+=0.5*get_alpha(i)*get_alpha(j)*kernel->kernel(i,j);
00424 }
00425 }
00426 else
00427 SG_ERROR( "cannot compute objective, labels or kernel not set\n");
00428
00429 return objective;
00430 }