00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include "lib/config.h"
00013 #include "lib/Mathematics.h"
00014 #include "lib/Signal.h"
00015 #include "lib/Time.h"
00016 #include "classifier/SparseLinearClassifier.h"
00017 #include "classifier/svm/SubGradientSVM.h"
00018 #include "classifier/svm/qpbsvmlib.h"
00019 #include "features/SparseFeatures.h"
00020 #include "features/Labels.h"
00021
00022 #undef DEBUG_SUBGRADIENTSVM
00023
00024 extern double sparsity;
00025 double tim;
00026
00027 CSubGradientSVM::CSubGradientSVM()
00028 : CSparseLinearClassifier(), C1(1), C2(1), epsilon(1e-5), qpsize(42),
00029 qpsize_max(2000), use_bias(false), delta_active(0), delta_bound(0)
00030 {
00031 }
00032
00033 CSubGradientSVM::CSubGradientSVM(DREAL C, CSparseFeatures<DREAL>* traindat, CLabels* trainlab)
00034 : CSparseLinearClassifier(), C1(C), C2(C), epsilon(1e-5), qpsize(42),
00035 qpsize_max(2000), use_bias(false), delta_active(0), delta_bound(0)
00036 {
00037 set_features(traindat);
00038 set_labels(trainlab);
00039 }
00040
00041
00042 CSubGradientSVM::~CSubGradientSVM()
00043 {
00044 }
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079 INT CSubGradientSVM::find_active(INT num_feat, INT num_vec, INT& num_active, INT& num_bound)
00080 {
00081 delta_bound=0;
00082 delta_active=0;
00083 num_active=0;
00084 num_bound=0;
00085
00086 for (INT i=0; i<num_vec; i++)
00087 {
00088 active[i]=0;
00089
00090
00091 if (proj[i] < 1-autoselected_epsilon)
00092 {
00093 idx_active[num_active++]=i;
00094 active[i]=1;
00095 }
00096
00097
00098 if (CMath::abs(proj[i]-1) <= autoselected_epsilon)
00099 {
00100 idx_bound[num_bound++]=i;
00101 active[i]=2;
00102 }
00103
00104 if (active[i]!=old_active[i])
00105 delta_active++;
00106
00107 if (active[i]==2 && old_active[i]==2)
00108 delta_bound++;
00109 }
00110
00111
00112 if (delta_active==0 && work_epsilon<=epsilon)
00113 return 0;
00114 else if (delta_active==0)
00115 {
00116 work_epsilon=CMath::min(work_epsilon/2, autoselected_epsilon);
00117 work_epsilon=CMath::max(work_epsilon, epsilon);
00118 num_bound=qpsize;
00119 }
00120
00121 delta_bound=0;
00122 delta_active=0;
00123 num_active=0;
00124 num_bound=0;
00125
00126 for (INT i=0; i<num_vec; i++)
00127 {
00128 tmp_proj[i]=CMath::abs(proj[i]-1);
00129 tmp_proj_idx[i]=i;
00130 }
00131
00132 CMath::qsort_index(tmp_proj, tmp_proj_idx, num_vec);
00133
00134 autoselected_epsilon=tmp_proj[CMath::min(qpsize,num_vec-1)];
00135
00136 #ifdef DEBUG_SUBGRADIENTSVM
00137
00138 #endif
00139
00140 if (autoselected_epsilon>work_epsilon)
00141 autoselected_epsilon=work_epsilon;
00142
00143 if (autoselected_epsilon<epsilon)
00144 {
00145 autoselected_epsilon=epsilon;
00146
00147 INT i=0;
00148 while (i < num_vec && tmp_proj[i] <= autoselected_epsilon)
00149 i++;
00150
00151
00152
00153 if (i>=qpsize_max && autoselected_epsilon>epsilon)
00154 {
00155 SG_INFO("qpsize limit (%d) reached\n", qpsize_max);
00156 INT num_in_qp=i;
00157 while (--i>=0 && num_in_qp>=qpsize_max)
00158 {
00159 if (tmp_proj[i] < autoselected_epsilon)
00160 {
00161 autoselected_epsilon=tmp_proj[i];
00162 num_in_qp--;
00163 }
00164 }
00165
00166
00167 }
00168 }
00169
00170 for (INT i=0; i<num_vec; i++)
00171 {
00172 active[i]=0;
00173
00174
00175 if (proj[i] < 1-autoselected_epsilon)
00176 {
00177 idx_active[num_active++]=i;
00178 active[i]=1;
00179 }
00180
00181
00182 if (CMath::abs(proj[i]-1) <= autoselected_epsilon)
00183 {
00184 idx_bound[num_bound++]=i;
00185 active[i]=2;
00186 }
00187
00188 if (active[i]!=old_active[i])
00189 delta_active++;
00190
00191 if (active[i]==2 && old_active[i]==2)
00192 delta_bound++;
00193 }
00194
00195
00196 return delta_active;
00197 }
00198
00199
00200 void CSubGradientSVM::update_active(INT num_feat, INT num_vec)
00201
00202 {
00203 for (INT i=0; i<num_vec; i++)
00204 {
00205 if (active[i]==1 && old_active[i]!=1)
00206 {
00207 features->add_to_dense_vec(C1*get_label(i), i, sum_CXy_active, num_feat);
00208 if (use_bias)
00209 sum_Cy_active+=C1*get_label(i);
00210 }
00211 else if (old_active[i]==1 && active[i]!=1)
00212 {
00213 features->add_to_dense_vec(-C1*get_label(i), i, sum_CXy_active, num_feat);
00214 if (use_bias)
00215 sum_Cy_active-=C1*get_label(i);
00216 }
00217 }
00218
00219 CMath::swap(active,old_active);
00220 }
00221
00222 DREAL CSubGradientSVM::line_search(INT num_feat, INT num_vec)
00223 {
00224 DREAL sum_B = 0;
00225 DREAL A_zero = 0.5*CMath::dot(grad_w, grad_w, num_feat);
00226 DREAL B_zero = -CMath::dot(w, grad_w, num_feat);
00227
00228 INT num_hinge=0;
00229
00230 for (INT i=0; i<num_vec; i++)
00231 {
00232 DREAL p=get_label(i)*features->dense_dot(1.0, i, grad_w, num_feat, grad_b);
00233 grad_proj[i]=p;
00234 if (p!=0)
00235 {
00236 hinge_point[num_hinge]=(proj[i]-1)/p;
00237 hinge_idx[num_hinge]=i;
00238 num_hinge++;
00239
00240 if (p<0)
00241 sum_B+=p;
00242 }
00243 }
00244 sum_B*=C1;
00245
00246 CMath::qsort_index(hinge_point, hinge_idx, num_hinge);
00247
00248
00249 DREAL alpha = hinge_point[0];
00250 DREAL grad_val = 2*A_zero*alpha + B_zero + sum_B;
00251
00252
00253
00254
00255
00256
00257
00258
00259
00260
00261 DREAL old_grad_val = grad_val;
00262 DREAL old_alpha = alpha;
00263
00264 for (INT i=1; i < num_hinge && grad_val < 0; i++)
00265 {
00266 alpha = hinge_point[i];
00267 grad_val = 2*A_zero*alpha + B_zero + sum_B;
00268
00269 if (grad_val > 0)
00270 {
00271 ASSERT(old_grad_val-grad_val != 0);
00272 DREAL gamma = -grad_val/(old_grad_val-grad_val);
00273 alpha = old_alpha*gamma + (1-gamma)*alpha;
00274 }
00275 else
00276 {
00277 old_grad_val = grad_val;
00278 old_alpha = alpha;
00279
00280 sum_B = sum_B + CMath::abs(C1*grad_proj[hinge_idx[i]]);
00281 grad_val = 2*A_zero*alpha + B_zero + sum_B;
00282 }
00283 }
00284
00285 return alpha;
00286 }
00287
00288 DREAL CSubGradientSVM::compute_min_subgradient(INT num_feat, INT num_vec, INT num_active, INT num_bound)
00289 {
00290 DREAL dir_deriv=0;
00291
00292 if (num_bound > 0)
00293 {
00294
00295 CTime t2;
00296 CMath::add(v, 1.0, w, -1.0, sum_CXy_active, num_feat);
00297
00298 if (num_bound>=qpsize_max && num_it_noimprovement!=10)
00299 {
00300
00301 for (INT i=0; i<num_bound; i++)
00302 beta[i]=CMath::random(0.0,1.0);
00303 }
00304 else
00305 {
00306 memset(beta, 0, sizeof(DREAL)*num_bound);
00307
00308 DREAL bias_const=0;
00309
00310 if (use_bias)
00311 bias_const=1;
00312
00313 for (INT i=0; i<num_bound; i++)
00314 {
00315 for (INT j=i; j<num_bound; j++)
00316 {
00317 INT alen=0;
00318 INT blen=0;
00319 bool afree=false;
00320 bool bfree=false;
00321
00322 TSparseEntry<DREAL>* avec=features->get_sparse_feature_vector(idx_bound[i], alen, afree);
00323 TSparseEntry<DREAL>* bvec=features->get_sparse_feature_vector(idx_bound[j], blen, bfree);
00324
00325 Z[i*num_bound+j]= 2.0*C1*C1*get_label(idx_bound[i])*get_label(idx_bound[j])*
00326 (features->sparse_dot(1.0, avec,alen, bvec,blen) + bias_const);
00327
00328 Z[j*num_bound+i]=Z[i*num_bound+j];
00329
00330 features->free_feature_vector(avec, idx_bound[i], afree);
00331 features->free_feature_vector(bvec, idx_bound[j], bfree);
00332 }
00333
00334 Zv[i]=-2.0*C1*get_label(idx_bound[i])*
00335 features->dense_dot(1.0, idx_bound[i], v, num_feat, -sum_Cy_active);
00336 }
00337
00338
00339
00340 t2.stop();
00341 t2.time_diff_sec(true);
00342
00343 CTime t;
00344 CQPBSVMLib solver(Z,num_bound, Zv,num_bound, 1.0);
00345
00346
00347 #ifdef USE_CPLEX
00348 solver.set_solver(QPB_SOLVER_CPLEX);
00349 #else
00350 solver.set_solver(QPB_SOLVER_SCAS);
00351 #endif
00352
00353 solver.solve_qp(beta, num_bound);
00354
00355 t.stop();
00356 tim+=t.time_diff_sec(true);
00357
00358
00359
00360
00361
00362
00363
00364
00365 }
00366
00367 CMath::add(grad_w, 1.0, w, -1.0, sum_CXy_active, num_feat);
00368 grad_b = -sum_Cy_active;
00369
00370 for (INT i=0; i<num_bound; i++)
00371 {
00372 features->add_to_dense_vec(-C1*beta[i]*get_label(idx_bound[i]), idx_bound[i], grad_w, num_feat);
00373 if (use_bias)
00374 grad_b -= C1 * get_label(idx_bound[i])*beta[i];
00375 }
00376
00377 dir_deriv = CMath::dot(grad_w, v, num_feat) - grad_b*sum_Cy_active;
00378 for (INT i=0; i<num_bound; i++)
00379 {
00380 DREAL val= C1*get_label(idx_bound[i])*features->dense_dot(1.0, idx_bound[i], grad_w, num_feat, grad_b);
00381 dir_deriv += CMath::max(0.0, val);
00382 }
00383 }
00384 else
00385 {
00386 CMath::add(grad_w, 1.0, w, -1.0, sum_CXy_active, num_feat);
00387 grad_b = -sum_Cy_active;
00388
00389 dir_deriv = CMath::dot(grad_w, grad_w, num_feat)+ grad_b*grad_b;
00390 }
00391
00392 return dir_deriv;
00393 }
00394
00395 DREAL CSubGradientSVM::compute_objective(INT num_feat, INT num_vec)
00396 {
00397 DREAL result= 0.5 * CMath::dot(w,w, num_feat);
00398
00399 for (INT i=0; i<num_vec; i++)
00400 {
00401 if (proj[i]<1.0)
00402 result += C1 * (1.0-proj[i]);
00403 }
00404
00405 return result;
00406 }
00407
00408 void CSubGradientSVM::compute_projection(INT num_feat, INT num_vec)
00409 {
00410 for (INT i=0; i<num_vec; i++)
00411 proj[i]=get_label(i)*features->dense_dot(1.0, i, w, num_feat, bias);
00412 }
00413
00414 void CSubGradientSVM::update_projection(DREAL alpha, INT num_vec)
00415 {
00416 CMath::vec1_plus_scalar_times_vec2(proj,-alpha, grad_proj, num_vec);
00417 }
00418
00419 void CSubGradientSVM::init(INT num_vec, INT num_feat)
00420 {
00421
00422 delete[] w;
00423 w=new DREAL[num_feat];
00424 memset(w,0,sizeof(DREAL)*num_feat);
00425
00426 bias=0;
00427 num_it_noimprovement=0;
00428 grad_b=0;
00429 set_w(w, num_feat);
00430 qpsize_limit=5000;
00431
00432 grad_w=new DREAL[num_feat];
00433 memset(grad_w,0,sizeof(DREAL)*num_feat);
00434
00435 sum_CXy_active=new DREAL[num_feat];
00436 memset(sum_CXy_active,0,sizeof(DREAL)*num_feat);
00437
00438 v=new DREAL[num_feat];
00439 memset(v,0,sizeof(DREAL)*num_feat);
00440
00441 old_v=new DREAL[num_feat];
00442 memset(old_v,0,sizeof(DREAL)*num_feat);
00443
00444 sum_Cy_active=0;
00445
00446 proj= new DREAL[num_vec];
00447 memset(proj,0,sizeof(DREAL)*num_vec);
00448
00449 tmp_proj=new DREAL[num_vec];
00450 memset(proj,0,sizeof(DREAL)*num_vec);
00451
00452 tmp_proj_idx= new INT[num_vec];
00453 memset(tmp_proj_idx,0,sizeof(INT)*num_vec);
00454
00455 grad_proj= new DREAL[num_vec];
00456 memset(grad_proj,0,sizeof(DREAL)*num_vec);
00457
00458 hinge_point= new DREAL[num_vec];
00459 memset(hinge_point,0,sizeof(DREAL)*num_vec);
00460
00461 hinge_idx= new INT[num_vec];
00462 memset(hinge_idx,0,sizeof(INT)*num_vec);
00463
00464 active=new BYTE[num_vec];
00465 memset(active,0,sizeof(BYTE)*num_vec);
00466
00467 old_active=new BYTE[num_vec];
00468 memset(old_active,0,sizeof(BYTE)*num_vec);
00469
00470 idx_bound=new INT[num_vec];
00471 memset(idx_bound,0,sizeof(INT)*num_vec);
00472
00473 idx_active=new INT[num_vec];
00474 memset(idx_active,0,sizeof(INT)*num_vec);
00475
00476 Z=new DREAL[qpsize_limit*qpsize_limit];
00477 memset(Z,0,sizeof(DREAL)*qpsize_limit*qpsize_limit);
00478
00479 Zv=new DREAL[qpsize_limit];
00480 memset(Zv,0,sizeof(DREAL)*qpsize_limit);
00481
00482 beta=new DREAL[qpsize_limit];
00483 memset(beta,0,sizeof(DREAL)*qpsize_limit);
00484
00485 old_Z=new DREAL[qpsize_limit*qpsize_limit];
00486 memset(old_Z,0,sizeof(DREAL)*qpsize_limit*qpsize_limit);
00487
00488 old_Zv=new DREAL[qpsize_limit];
00489 memset(old_Zv,0,sizeof(DREAL)*qpsize_limit);
00490
00491 old_beta=new DREAL[qpsize_limit];
00492 memset(old_beta,0,sizeof(DREAL)*qpsize_limit);
00493
00494 }
00495
00496 void CSubGradientSVM::cleanup()
00497 {
00498 delete[] hinge_idx;
00499 delete[] hinge_point;
00500 delete[] grad_proj;
00501 delete[] proj;
00502 delete[] tmp_proj;
00503 delete[] tmp_proj_idx;
00504 delete[] active;
00505 delete[] old_active;
00506 delete[] idx_bound;
00507 delete[] idx_active;
00508 delete[] sum_CXy_active;
00509 delete[] grad_w;
00510 delete[] v;
00511 delete[] Z;
00512 delete[] Zv;
00513 delete[] beta;
00514 delete[] old_v;
00515 delete[] old_Z;
00516 delete[] old_Zv;
00517 delete[] old_beta;
00518
00519 hinge_idx=NULL;
00520 proj=NULL;
00521 active=NULL;
00522 old_active=NULL;
00523 idx_bound=NULL;
00524 idx_active=NULL;
00525 sum_CXy_active=NULL;
00526 grad_w=NULL;
00527 v=NULL;
00528 Z=NULL;
00529 Zv=NULL;
00530 beta=NULL;
00531 }
00532
00533 bool CSubGradientSVM::train()
00534 {
00535 tim=0;
00536 SG_INFO("C=%f epsilon=%f\n", C1, epsilon);
00537 ASSERT(labels);
00538 ASSERT(get_features());
00539
00540 INT num_iterations=0;
00541 INT num_train_labels=labels->get_num_labels();
00542 INT num_feat=features->get_num_features();
00543 INT num_vec=features->get_num_vectors();
00544
00545 ASSERT(num_vec==num_train_labels);
00546
00547 init(num_vec, num_feat);
00548
00549 INT num_active=0;
00550 INT num_bound=0;
00551 DREAL alpha=0;
00552 DREAL dir_deriv=0;
00553 DREAL obj=0;
00554 delta_active=num_vec;
00555 last_it_noimprovement=-1;
00556
00557 work_epsilon=0.99;
00558 autoselected_epsilon=work_epsilon;
00559
00560 compute_projection(num_feat, num_vec);
00561
00562 CTime time;
00563 double loop_time=0;
00564 while (!(CSignal::cancel_computations()))
00565 {
00566 CTime t;
00567 delta_active=find_active(num_feat, num_vec, num_active, num_bound);
00568
00569 update_active(num_feat, num_vec);
00570
00571 #ifdef DEBUG_SUBGRADIENTSVM
00572 SG_PRINT("==================================================\niteration: %d ", num_iterations);
00573 obj=compute_objective(num_feat, num_vec);
00574 SG_PRINT("objective:%.10f alpha: %.10f dir_deriv: %f num_bound: %d num_active: %d work_eps: %10.10f eps: %10.10f auto_eps: %10.10f time:%f\n",
00575 obj, alpha, dir_deriv, num_bound, num_active, work_epsilon, epsilon, autoselected_epsilon, loop_time);
00576 #else
00577 SG_ABS_PROGRESS(work_epsilon, -CMath::log10(work_epsilon), -CMath::log10(0.99999999), -CMath::log10(epsilon), 6);
00578 #endif
00579
00580
00581
00582
00583
00584
00585
00586
00587
00588
00589
00590
00591 dir_deriv=compute_min_subgradient(num_feat, num_vec, num_active, num_bound);
00592
00593 alpha=line_search(num_feat, num_vec);
00594
00595 if (num_it_noimprovement==10 || num_bound<qpsize_max)
00596 {
00597 DREAL norm_grad=CMath::dot(grad_w, grad_w, num_feat) +
00598 grad_b*grad_b;
00599
00600 #ifdef DEBUG_SUBGRADIENTSVM
00601 SG_PRINT("CHECKING OPTIMALITY CONDITIONS: "
00602 "work_epsilon: %10.10f delta_active:%d alpha: %10.10f norm_grad: %10.10f a*norm_grad:%10.16f\n",
00603 work_epsilon, delta_active, alpha, norm_grad, CMath::abs(alpha*norm_grad));
00604 #else
00605 SG_ABS_PROGRESS(work_epsilon, -CMath::log10(work_epsilon), -CMath::log10(0.99999999), -CMath::log10(epsilon), 6);
00606 #endif
00607
00608 if (work_epsilon<=epsilon && delta_active==0 && CMath::abs(alpha*norm_grad)<1e-6)
00609 break;
00610 else
00611 num_it_noimprovement=0;
00612 }
00613
00614 if ((dir_deriv<0 || alpha==0) && (work_epsilon<=epsilon && delta_active==0))
00615 {
00616 if (last_it_noimprovement==num_iterations-1)
00617 {
00618 SG_PRINT("no improvement...\n");
00619 num_it_noimprovement++;
00620 }
00621 else
00622 num_it_noimprovement=0;
00623
00624 last_it_noimprovement=num_iterations;
00625 }
00626
00627 CMath::vec1_plus_scalar_times_vec2(w, -alpha, grad_w, num_feat);
00628 bias-=alpha*grad_b;
00629
00630 update_projection(alpha, num_vec);
00631
00632
00633
00634
00635
00636 t.stop();
00637 loop_time=t.time_diff_sec();
00638 num_iterations++;
00639
00640 if (get_max_train_time()>0 && time.cur_time_diff()>get_max_train_time())
00641 break;
00642 }
00643
00644 SG_INFO("converged after %d iterations\n", num_iterations);
00645
00646 obj=compute_objective(num_feat, num_vec);
00647 SG_INFO("objective: %f alpha: %f dir_deriv: %f num_bound: %d num_active: %d sparsity: %f\n",
00648 obj, alpha, dir_deriv, num_bound, num_active, sparsity/num_iterations);
00649
00650 #ifdef DEBUG_SUBGRADIENTSVM
00651 CMath::display_vector(w, w_dim, "w");
00652 SG_PRINT("bias: %f\n", bias);
00653 #endif
00654 SG_INFO("solver time:%f s\n", tim);
00655
00656 cleanup();
00657
00658 return true;
00659 }