00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include "lib/config.h"
00013 #include "lib/Mathematics.h"
00014 #include "lib/Signal.h"
00015 #include "lib/Time.h"
00016 #include "classifier/SparseLinearClassifier.h"
00017 #include "classifier/svm/SubGradientSVM.h"
00018 #include "classifier/svm/qpbsvmlib.h"
00019 #include "features/SparseFeatures.h"
00020 #include "features/Labels.h"
00021
00022 #undef DEBUG_SUBGRADIENTSVM
00023
00024 extern float64_t sparsity;
00025 float64_t tim;
00026
00027 CSubGradientSVM::CSubGradientSVM()
00028 : CSparseLinearClassifier(), C1(1), C2(1), epsilon(1e-5), qpsize(42),
00029 qpsize_max(2000), use_bias(false), delta_active(0), delta_bound(0)
00030 {
00031 }
00032
00033 CSubGradientSVM::CSubGradientSVM(
00034 float64_t C, CSparseFeatures<float64_t>* traindat, CLabels* trainlab)
00035 : CSparseLinearClassifier(), C1(C), C2(C), epsilon(1e-5), qpsize(42),
00036 qpsize_max(2000), use_bias(false), delta_active(0), delta_bound(0)
00037 {
00038 set_features(traindat);
00039 set_labels(trainlab);
00040 }
00041
00042
00043 CSubGradientSVM::~CSubGradientSVM()
00044 {
00045 }
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080 int32_t CSubGradientSVM::find_active(
00081 int32_t num_feat, int32_t num_vec, int32_t& num_active, int32_t& num_bound)
00082 {
00083 delta_bound=0;
00084 delta_active=0;
00085 num_active=0;
00086 num_bound=0;
00087
00088 for (int32_t i=0; i<num_vec; i++)
00089 {
00090 active[i]=0;
00091
00092
00093 if (proj[i] < 1-autoselected_epsilon)
00094 {
00095 idx_active[num_active++]=i;
00096 active[i]=1;
00097 }
00098
00099
00100 if (CMath::abs(proj[i]-1) <= autoselected_epsilon)
00101 {
00102 idx_bound[num_bound++]=i;
00103 active[i]=2;
00104 }
00105
00106 if (active[i]!=old_active[i])
00107 delta_active++;
00108
00109 if (active[i]==2 && old_active[i]==2)
00110 delta_bound++;
00111 }
00112
00113
00114 if (delta_active==0 && work_epsilon<=epsilon)
00115 return 0;
00116 else if (delta_active==0)
00117 {
00118 work_epsilon=CMath::min(work_epsilon/2, autoselected_epsilon);
00119 work_epsilon=CMath::max(work_epsilon, epsilon);
00120 num_bound=qpsize;
00121 }
00122
00123 delta_bound=0;
00124 delta_active=0;
00125 num_active=0;
00126 num_bound=0;
00127
00128 for (int32_t i=0; i<num_vec; i++)
00129 {
00130 tmp_proj[i]=CMath::abs(proj[i]-1);
00131 tmp_proj_idx[i]=i;
00132 }
00133
00134 CMath::qsort_index(tmp_proj, tmp_proj_idx, num_vec);
00135
00136 autoselected_epsilon=tmp_proj[CMath::min(qpsize,num_vec-1)];
00137
00138 #ifdef DEBUG_SUBGRADIENTSVM
00139
00140 #endif
00141
00142 if (autoselected_epsilon>work_epsilon)
00143 autoselected_epsilon=work_epsilon;
00144
00145 if (autoselected_epsilon<epsilon)
00146 {
00147 autoselected_epsilon=epsilon;
00148
00149 int32_t i=0;
00150 while (i < num_vec && tmp_proj[i] <= autoselected_epsilon)
00151 i++;
00152
00153
00154
00155 if (i>=qpsize_max && autoselected_epsilon>epsilon)
00156 {
00157 SG_INFO("qpsize limit (%d) reached\n", qpsize_max);
00158 int32_t num_in_qp=i;
00159 while (--i>=0 && num_in_qp>=qpsize_max)
00160 {
00161 if (tmp_proj[i] < autoselected_epsilon)
00162 {
00163 autoselected_epsilon=tmp_proj[i];
00164 num_in_qp--;
00165 }
00166 }
00167
00168
00169 }
00170 }
00171
00172 for (int32_t i=0; i<num_vec; i++)
00173 {
00174 active[i]=0;
00175
00176
00177 if (proj[i] < 1-autoselected_epsilon)
00178 {
00179 idx_active[num_active++]=i;
00180 active[i]=1;
00181 }
00182
00183
00184 if (CMath::abs(proj[i]-1) <= autoselected_epsilon)
00185 {
00186 idx_bound[num_bound++]=i;
00187 active[i]=2;
00188 }
00189
00190 if (active[i]!=old_active[i])
00191 delta_active++;
00192
00193 if (active[i]==2 && old_active[i]==2)
00194 delta_bound++;
00195 }
00196
00197
00198 return delta_active;
00199 }
00200
00201
00202 void CSubGradientSVM::update_active(int32_t num_feat, int32_t num_vec)
00203 {
00204 for (int32_t i=0; i<num_vec; i++)
00205 {
00206 if (active[i]==1 && old_active[i]!=1)
00207 {
00208 features->add_to_dense_vec(C1*get_label(i), i, sum_CXy_active, num_feat);
00209 if (use_bias)
00210 sum_Cy_active+=C1*get_label(i);
00211 }
00212 else if (old_active[i]==1 && active[i]!=1)
00213 {
00214 features->add_to_dense_vec(-C1*get_label(i), i, sum_CXy_active, num_feat);
00215 if (use_bias)
00216 sum_Cy_active-=C1*get_label(i);
00217 }
00218 }
00219
00220 CMath::swap(active,old_active);
00221 }
00222
00223 float64_t CSubGradientSVM::line_search(int32_t num_feat, int32_t num_vec)
00224 {
00225 float64_t sum_B = 0;
00226 float64_t A_zero = 0.5*CMath::dot(grad_w, grad_w, num_feat);
00227 float64_t B_zero = -CMath::dot(w, grad_w, num_feat);
00228
00229 int32_t num_hinge=0;
00230
00231 for (int32_t i=0; i<num_vec; i++)
00232 {
00233 float64_t p=get_label(i)*features->dense_dot(1.0, i, grad_w, num_feat, grad_b);
00234 grad_proj[i]=p;
00235 if (p!=0)
00236 {
00237 hinge_point[num_hinge]=(proj[i]-1)/p;
00238 hinge_idx[num_hinge]=i;
00239 num_hinge++;
00240
00241 if (p<0)
00242 sum_B+=p;
00243 }
00244 }
00245 sum_B*=C1;
00246
00247 CMath::qsort_index(hinge_point, hinge_idx, num_hinge);
00248
00249
00250 float64_t alpha = hinge_point[0];
00251 float64_t grad_val = 2*A_zero*alpha + B_zero + sum_B;
00252
00253
00254
00255
00256
00257
00258
00259
00260
00261
00262 float64_t old_grad_val = grad_val;
00263 float64_t old_alpha = alpha;
00264
00265 for (int32_t i=1; i < num_hinge && grad_val < 0; i++)
00266 {
00267 alpha = hinge_point[i];
00268 grad_val = 2*A_zero*alpha + B_zero + sum_B;
00269
00270 if (grad_val > 0)
00271 {
00272 ASSERT(old_grad_val-grad_val != 0);
00273 float64_t gamma = -grad_val/(old_grad_val-grad_val);
00274 alpha = old_alpha*gamma + (1-gamma)*alpha;
00275 }
00276 else
00277 {
00278 old_grad_val = grad_val;
00279 old_alpha = alpha;
00280
00281 sum_B = sum_B + CMath::abs(C1*grad_proj[hinge_idx[i]]);
00282 grad_val = 2*A_zero*alpha + B_zero + sum_B;
00283 }
00284 }
00285
00286 return alpha;
00287 }
00288
00289 float64_t CSubGradientSVM::compute_min_subgradient(
00290 int32_t num_feat, int32_t num_vec, int32_t num_active, int32_t num_bound)
00291 {
00292 float64_t dir_deriv=0;
00293
00294 if (num_bound > 0)
00295 {
00296
00297 CTime t2;
00298 CMath::add(v, 1.0, w, -1.0, sum_CXy_active, num_feat);
00299
00300 if (num_bound>=qpsize_max && num_it_noimprovement!=10)
00301 {
00302
00303 for (int32_t i=0; i<num_bound; i++)
00304 beta[i]=CMath::random(0.0,1.0);
00305 }
00306 else
00307 {
00308 memset(beta, 0, sizeof(float64_t)*num_bound);
00309
00310 float64_t bias_const=0;
00311
00312 if (use_bias)
00313 bias_const=1;
00314
00315 for (int32_t i=0; i<num_bound; i++)
00316 {
00317 for (int32_t j=i; j<num_bound; j++)
00318 {
00319 int32_t alen=0;
00320 int32_t blen=0;
00321 bool afree=false;
00322 bool bfree=false;
00323
00324 TSparseEntry<float64_t>* avec=features->get_sparse_feature_vector(idx_bound[i], alen, afree);
00325 TSparseEntry<float64_t>* bvec=features->get_sparse_feature_vector(idx_bound[j], blen, bfree);
00326
00327 Z[i*num_bound+j]= 2.0*C1*C1*get_label(idx_bound[i])*get_label(idx_bound[j])*
00328 (features->sparse_dot(1.0, avec,alen, bvec,blen) + bias_const);
00329
00330 Z[j*num_bound+i]=Z[i*num_bound+j];
00331
00332 features->free_feature_vector(avec, idx_bound[i], afree);
00333 features->free_feature_vector(bvec, idx_bound[j], bfree);
00334 }
00335
00336 Zv[i]=-2.0*C1*get_label(idx_bound[i])*
00337 features->dense_dot(1.0, idx_bound[i], v, num_feat, -sum_Cy_active);
00338 }
00339
00340
00341
00342 t2.stop();
00343 t2.time_diff_sec(true);
00344
00345 CTime t;
00346 CQPBSVMLib solver(Z,num_bound, Zv,num_bound, 1.0);
00347
00348
00349 #ifdef USE_CPLEX
00350 solver.set_solver(QPB_SOLVER_CPLEX);
00351 #else
00352 solver.set_solver(QPB_SOLVER_SCAS);
00353 #endif
00354
00355 solver.solve_qp(beta, num_bound);
00356
00357 t.stop();
00358 tim+=t.time_diff_sec(true);
00359
00360
00361
00362
00363
00364
00365
00366
00367 }
00368
00369 CMath::add(grad_w, 1.0, w, -1.0, sum_CXy_active, num_feat);
00370 grad_b = -sum_Cy_active;
00371
00372 for (int32_t i=0; i<num_bound; i++)
00373 {
00374 features->add_to_dense_vec(-C1*beta[i]*get_label(idx_bound[i]), idx_bound[i], grad_w, num_feat);
00375 if (use_bias)
00376 grad_b -= C1 * get_label(idx_bound[i])*beta[i];
00377 }
00378
00379 dir_deriv = CMath::dot(grad_w, v, num_feat) - grad_b*sum_Cy_active;
00380 for (int32_t i=0; i<num_bound; i++)
00381 {
00382 float64_t val= C1*get_label(idx_bound[i])*features->dense_dot(1.0, idx_bound[i], grad_w, num_feat, grad_b);
00383 dir_deriv += CMath::max(0.0, val);
00384 }
00385 }
00386 else
00387 {
00388 CMath::add(grad_w, 1.0, w, -1.0, sum_CXy_active, num_feat);
00389 grad_b = -sum_Cy_active;
00390
00391 dir_deriv = CMath::dot(grad_w, grad_w, num_feat)+ grad_b*grad_b;
00392 }
00393
00394 return dir_deriv;
00395 }
00396
00397 float64_t CSubGradientSVM::compute_objective(int32_t num_feat, int32_t num_vec)
00398 {
00399 float64_t result= 0.5 * CMath::dot(w,w, num_feat);
00400
00401 for (int32_t i=0; i<num_vec; i++)
00402 {
00403 if (proj[i]<1.0)
00404 result += C1 * (1.0-proj[i]);
00405 }
00406
00407 return result;
00408 }
00409
00410 void CSubGradientSVM::compute_projection(int32_t num_feat, int32_t num_vec)
00411 {
00412 for (int32_t i=0; i<num_vec; i++)
00413 proj[i]=get_label(i)*features->dense_dot(1.0, i, w, num_feat, bias);
00414 }
00415
00416 void CSubGradientSVM::update_projection(float64_t alpha, int32_t num_vec)
00417 {
00418 CMath::vec1_plus_scalar_times_vec2(proj,-alpha, grad_proj, num_vec);
00419 }
00420
00421 void CSubGradientSVM::init(int32_t num_vec, int32_t num_feat)
00422 {
00423
00424 delete[] w;
00425 w=new float64_t[num_feat];
00426 memset(w,0,sizeof(float64_t)*num_feat);
00427
00428 bias=0;
00429 num_it_noimprovement=0;
00430 grad_b=0;
00431 set_w(w, num_feat);
00432 qpsize_limit=5000;
00433
00434 grad_w=new float64_t[num_feat];
00435 memset(grad_w,0,sizeof(float64_t)*num_feat);
00436
00437 sum_CXy_active=new float64_t[num_feat];
00438 memset(sum_CXy_active,0,sizeof(float64_t)*num_feat);
00439
00440 v=new float64_t[num_feat];
00441 memset(v,0,sizeof(float64_t)*num_feat);
00442
00443 old_v=new float64_t[num_feat];
00444 memset(old_v,0,sizeof(float64_t)*num_feat);
00445
00446 sum_Cy_active=0;
00447
00448 proj= new float64_t[num_vec];
00449 memset(proj,0,sizeof(float64_t)*num_vec);
00450
00451 tmp_proj=new float64_t[num_vec];
00452 memset(proj,0,sizeof(float64_t)*num_vec);
00453
00454 tmp_proj_idx= new int32_t[num_vec];
00455 memset(tmp_proj_idx,0,sizeof(int32_t)*num_vec);
00456
00457 grad_proj= new float64_t[num_vec];
00458 memset(grad_proj,0,sizeof(float64_t)*num_vec);
00459
00460 hinge_point= new float64_t[num_vec];
00461 memset(hinge_point,0,sizeof(float64_t)*num_vec);
00462
00463 hinge_idx= new int32_t[num_vec];
00464 memset(hinge_idx,0,sizeof(int32_t)*num_vec);
00465
00466 active=new uint8_t[num_vec];
00467 memset(active,0,sizeof(uint8_t)*num_vec);
00468
00469 old_active=new uint8_t[num_vec];
00470 memset(old_active,0,sizeof(uint8_t)*num_vec);
00471
00472 idx_bound=new int32_t[num_vec];
00473 memset(idx_bound,0,sizeof(int32_t)*num_vec);
00474
00475 idx_active=new int32_t[num_vec];
00476 memset(idx_active,0,sizeof(int32_t)*num_vec);
00477
00478 Z=new float64_t[qpsize_limit*qpsize_limit];
00479 memset(Z,0,sizeof(float64_t)*qpsize_limit*qpsize_limit);
00480
00481 Zv=new float64_t[qpsize_limit];
00482 memset(Zv,0,sizeof(float64_t)*qpsize_limit);
00483
00484 beta=new float64_t[qpsize_limit];
00485 memset(beta,0,sizeof(float64_t)*qpsize_limit);
00486
00487 old_Z=new float64_t[qpsize_limit*qpsize_limit];
00488 memset(old_Z,0,sizeof(float64_t)*qpsize_limit*qpsize_limit);
00489
00490 old_Zv=new float64_t[qpsize_limit];
00491 memset(old_Zv,0,sizeof(float64_t)*qpsize_limit);
00492
00493 old_beta=new float64_t[qpsize_limit];
00494 memset(old_beta,0,sizeof(float64_t)*qpsize_limit);
00495
00496 }
00497
00498 void CSubGradientSVM::cleanup()
00499 {
00500 delete[] hinge_idx;
00501 delete[] hinge_point;
00502 delete[] grad_proj;
00503 delete[] proj;
00504 delete[] tmp_proj;
00505 delete[] tmp_proj_idx;
00506 delete[] active;
00507 delete[] old_active;
00508 delete[] idx_bound;
00509 delete[] idx_active;
00510 delete[] sum_CXy_active;
00511 delete[] grad_w;
00512 delete[] v;
00513 delete[] Z;
00514 delete[] Zv;
00515 delete[] beta;
00516 delete[] old_v;
00517 delete[] old_Z;
00518 delete[] old_Zv;
00519 delete[] old_beta;
00520
00521 hinge_idx=NULL;
00522 proj=NULL;
00523 active=NULL;
00524 old_active=NULL;
00525 idx_bound=NULL;
00526 idx_active=NULL;
00527 sum_CXy_active=NULL;
00528 grad_w=NULL;
00529 v=NULL;
00530 Z=NULL;
00531 Zv=NULL;
00532 beta=NULL;
00533 }
00534
00535 bool CSubGradientSVM::train()
00536 {
00537 tim=0;
00538 SG_INFO("C=%f epsilon=%f\n", C1, epsilon);
00539 ASSERT(labels);
00540 ASSERT(get_features());
00541
00542 int32_t num_iterations=0;
00543 int32_t num_train_labels=labels->get_num_labels();
00544 int32_t num_feat=features->get_num_features();
00545 int32_t num_vec=features->get_num_vectors();
00546
00547 ASSERT(num_vec==num_train_labels);
00548
00549 init(num_vec, num_feat);
00550
00551 int32_t num_active=0;
00552 int32_t num_bound=0;
00553 float64_t alpha=0;
00554 float64_t dir_deriv=0;
00555 float64_t obj=0;
00556 delta_active=num_vec;
00557 last_it_noimprovement=-1;
00558
00559 work_epsilon=0.99;
00560 autoselected_epsilon=work_epsilon;
00561
00562 compute_projection(num_feat, num_vec);
00563
00564 CTime time;
00565 float64_t loop_time=0;
00566 while (!(CSignal::cancel_computations()))
00567 {
00568 CTime t;
00569 delta_active=find_active(num_feat, num_vec, num_active, num_bound);
00570
00571 update_active(num_feat, num_vec);
00572
00573 #ifdef DEBUG_SUBGRADIENTSVM
00574 SG_PRINT("==================================================\niteration: %d ", num_iterations);
00575 obj=compute_objective(num_feat, num_vec);
00576 SG_PRINT("objective:%.10f alpha: %.10f dir_deriv: %f num_bound: %d num_active: %d work_eps: %10.10f eps: %10.10f auto_eps: %10.10f time:%f\n",
00577 obj, alpha, dir_deriv, num_bound, num_active, work_epsilon, epsilon, autoselected_epsilon, loop_time);
00578 #else
00579 SG_ABS_PROGRESS(work_epsilon, -CMath::log10(work_epsilon), -CMath::log10(0.99999999), -CMath::log10(epsilon), 6);
00580 #endif
00581
00582
00583
00584
00585
00586
00587
00588
00589
00590
00591
00592
00593 dir_deriv=compute_min_subgradient(num_feat, num_vec, num_active, num_bound);
00594
00595 alpha=line_search(num_feat, num_vec);
00596
00597 if (num_it_noimprovement==10 || num_bound<qpsize_max)
00598 {
00599 float64_t norm_grad=CMath::dot(grad_w, grad_w, num_feat) +
00600 grad_b*grad_b;
00601
00602 #ifdef DEBUG_SUBGRADIENTSVM
00603 SG_PRINT("CHECKING OPTIMALITY CONDITIONS: "
00604 "work_epsilon: %10.10f delta_active:%d alpha: %10.10f norm_grad: %10.10f a*norm_grad:%10.16f\n",
00605 work_epsilon, delta_active, alpha, norm_grad, CMath::abs(alpha*norm_grad));
00606 #else
00607 SG_ABS_PROGRESS(work_epsilon, -CMath::log10(work_epsilon), -CMath::log10(0.99999999), -CMath::log10(epsilon), 6);
00608 #endif
00609
00610 if (work_epsilon<=epsilon && delta_active==0 && CMath::abs(alpha*norm_grad)<1e-6)
00611 break;
00612 else
00613 num_it_noimprovement=0;
00614 }
00615
00616 if ((dir_deriv<0 || alpha==0) && (work_epsilon<=epsilon && delta_active==0))
00617 {
00618 if (last_it_noimprovement==num_iterations-1)
00619 {
00620 SG_PRINT("no improvement...\n");
00621 num_it_noimprovement++;
00622 }
00623 else
00624 num_it_noimprovement=0;
00625
00626 last_it_noimprovement=num_iterations;
00627 }
00628
00629 CMath::vec1_plus_scalar_times_vec2(w, -alpha, grad_w, num_feat);
00630 bias-=alpha*grad_b;
00631
00632 update_projection(alpha, num_vec);
00633
00634
00635
00636
00637
00638 t.stop();
00639 loop_time=t.time_diff_sec();
00640 num_iterations++;
00641
00642 if (get_max_train_time()>0 && time.cur_time_diff()>get_max_train_time())
00643 break;
00644 }
00645
00646 SG_INFO("converged after %d iterations\n", num_iterations);
00647
00648 obj=compute_objective(num_feat, num_vec);
00649 SG_INFO("objective: %f alpha: %f dir_deriv: %f num_bound: %d num_active: %d sparsity: %f\n",
00650 obj, alpha, dir_deriv, num_bound, num_active, sparsity/num_iterations);
00651
00652 #ifdef DEBUG_SUBGRADIENTSVM
00653 CMath::display_vector(w, w_dim, "w");
00654 SG_PRINT("bias: %f\n", bias);
00655 #endif
00656 SG_INFO("solver time:%f s\n", tim);
00657
00658 cleanup();
00659
00660 return true;
00661 }