00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include "lib/config.h"
00013
00014 #ifdef USE_CPLEX
00015
00016 #include "lib/Mathematics.h"
00017 #include "lib/Signal.h"
00018 #include "lib/Time.h"
00019 #include "classifier/SparseLinearClassifier.h"
00020 #include "classifier/SubGradientLPM.h"
00021 #include "classifier/svm/qpbsvmlib.h"
00022 #include "features/SparseFeatures.h"
00023 #include "features/Labels.h"
00024
00025 #define DEBUG_SUBGRADIENTLPM
00026
00027 extern double sparsity;
00028 double lpmtim;
00029
00030 CSubGradientLPM::CSubGradientLPM()
00031 : CSparseLinearClassifier(), C1(1), C2(1), epsilon(1e-5), qpsize(42),
00032 qpsize_max(2000), use_bias(false), delta_active(0), delta_bound(0)
00033 {
00034 }
00035
00036 CSubGradientLPM::CSubGradientLPM(DREAL C, CSparseFeatures<DREAL>* traindat, CLabels* trainlab)
00037 : CSparseLinearClassifier(), C1(C), C2(C), epsilon(1e-5), qpsize(42),
00038 qpsize_max(2000), use_bias(false), delta_active(0), delta_bound(0)
00039 {
00040 CSparseLinearClassifier::features=traindat;
00041 CClassifier::labels=trainlab;
00042 }
00043
00044
00045 CSubGradientLPM::~CSubGradientLPM()
00046 {
00047 cleanup();
00048 }
00049
00050 INT CSubGradientLPM::find_active(INT num_feat, INT num_vec, INT& num_active, INT& num_bound)
00051 {
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078 delta_bound=0;
00079 delta_active=0;
00080 num_active=0;
00081 num_bound=0;
00082
00083 for (INT i=0; i<num_vec; i++)
00084 {
00085 active[i]=0;
00086
00087
00088 if (proj[i] < 1-autoselected_epsilon)
00089 {
00090 idx_active[num_active++]=i;
00091 active[i]=1;
00092 }
00093
00094
00095 if (CMath::abs(proj[i]-1) <= autoselected_epsilon)
00096 {
00097 idx_bound[num_bound++]=i;
00098 active[i]=2;
00099 }
00100
00101 if (active[i]!=old_active[i])
00102 delta_active++;
00103
00104 if (active[i]==2 && old_active[i]==2)
00105 delta_bound++;
00106 }
00107
00108
00109 if (delta_active==0 && work_epsilon<=epsilon)
00110 return 0;
00111 else if (delta_active==0)
00112 {
00113 work_epsilon=CMath::min(work_epsilon/2, autoselected_epsilon);
00114 work_epsilon=CMath::max(work_epsilon, epsilon);
00115 num_bound=qpsize;
00116 }
00117
00118 delta_bound=0;
00119 delta_active=0;
00120 num_active=0;
00121 num_bound=0;
00122
00123 for (INT i=0; i<num_vec; i++)
00124 {
00125 tmp_proj[i]=CMath::abs(proj[i]-1);
00126 tmp_proj_idx[i]=i;
00127 }
00128
00129 CMath::qsort_index(tmp_proj, tmp_proj_idx, num_vec);
00130
00131 autoselected_epsilon=tmp_proj[CMath::min(qpsize,num_vec)];
00132
00133 #ifdef DEBUG_SUBGRADIENTSVM
00134
00135 #endif
00136
00137 if (autoselected_epsilon>work_epsilon)
00138 autoselected_epsilon=work_epsilon;
00139
00140 if (autoselected_epsilon<epsilon)
00141 {
00142 autoselected_epsilon=epsilon;
00143
00144 INT i=0;
00145 while (i < num_vec && tmp_proj[i] <= autoselected_epsilon)
00146 i++;
00147
00148
00149
00150 if (i>=qpsize_max && autoselected_epsilon>epsilon)
00151 {
00152 SG_PRINT("qpsize limit (%d) reached\n", qpsize_max);
00153 INT num_in_qp=i;
00154 while (--i>=0 && num_in_qp>=qpsize_max)
00155 {
00156 if (tmp_proj[i] < autoselected_epsilon)
00157 {
00158 autoselected_epsilon=tmp_proj[i];
00159 num_in_qp--;
00160 }
00161 }
00162
00163
00164 }
00165 }
00166
00167 for (INT i=0; i<num_vec; i++)
00168 {
00169 active[i]=0;
00170
00171
00172 if (proj[i] < 1-autoselected_epsilon)
00173 {
00174 idx_active[num_active++]=i;
00175 active[i]=1;
00176 }
00177
00178
00179 if (CMath::abs(proj[i]-1) <= autoselected_epsilon)
00180 {
00181 idx_bound[num_bound++]=i;
00182 active[i]=2;
00183 }
00184
00185 if (active[i]!=old_active[i])
00186 delta_active++;
00187
00188 if (active[i]==2 && old_active[i]==2)
00189 delta_bound++;
00190 }
00191
00192 pos_idx=0;
00193 neg_idx=0;
00194 zero_idx=0;
00195
00196 for (INT i=0; i<num_feat; i++)
00197 {
00198 if (w[i]>work_epsilon)
00199 {
00200 w_pos[pos_idx++]=i;
00201 grad_w[i]=1;
00202 }
00203 else if (w[i]<-work_epsilon)
00204 {
00205 w_neg[neg_idx++]=i;
00206 grad_w[i]=-1;
00207 }
00208
00209 if (CMath::abs(w[i])<=work_epsilon)
00210 {
00211 w_zero[zero_idx++]=i;
00212 grad_w[i]=-1;
00213 }
00214 }
00215
00216 return delta_active;
00217 }
00218
00219
00220 void CSubGradientLPM::update_active(INT num_feat, INT num_vec)
00221 {
00222 for (INT i=0; i<num_vec; i++)
00223 {
00224 if (active[i]==1 && old_active[i]!=1)
00225 {
00226 features->add_to_dense_vec(C1*get_label(i), i, sum_CXy_active, num_feat);
00227 if (use_bias)
00228 sum_Cy_active+=C1*get_label(i);
00229 }
00230 else if (old_active[i]==1 && active[i]!=1)
00231 {
00232 features->add_to_dense_vec(-C1*get_label(i), i, sum_CXy_active, num_feat);
00233 if (use_bias)
00234 sum_Cy_active-=C1*get_label(i);
00235 }
00236 }
00237
00238 CMath::swap(active,old_active);
00239 }
00240
00241 DREAL CSubGradientLPM::line_search(INT num_feat, INT num_vec)
00242 {
00243 INT num_hinge=0;
00244 DREAL alpha=0;
00245 DREAL sgrad=0;
00246
00247 DREAL* A=new DREAL[num_feat+num_vec];
00248 DREAL* B=new DREAL[num_feat+num_vec];
00249 DREAL* C=new DREAL[num_feat+num_vec];
00250 DREAL* D=new DREAL[num_feat+num_vec];
00251
00252 for (INT i=0; i<num_feat+num_vec; i++)
00253 {
00254 if (i<num_feat)
00255 {
00256 A[i]=-grad_w[i];
00257 B[i]=w[i];
00258 C[i]=+grad_w[i];
00259 D[i]=-w[i];
00260 }
00261 else
00262 {
00263 DREAL p=get_label(i-num_feat)*features->dense_dot(1.0, i-num_feat, grad_w, num_feat, grad_b);
00264 grad_proj[i-num_feat]=p;
00265
00266 A[i]=0;
00267 B[i]=0;
00268 C[i]=C1*p;
00269 D[i]=C1*(1-proj[i-num_feat]);
00270 }
00271
00272 if (A[i]==C[i] && B[i]>D[i])
00273 sgrad+=A[i]+C[i];
00274 else if (A[i]==C[i] && B[i]==D[i])
00275 sgrad+=CMath::max(A[i],C[i]);
00276 else if (A[i]!=C[i])
00277 {
00278 hinge_point[num_hinge]=(D[i]-B[i])/(A[i]-C[i]);
00279 hinge_idx[num_hinge]=i;
00280 num_hinge++;
00281
00282 if (A[i]>C[i])
00283 sgrad+=C[i];
00284 if (A[i]<C[i])
00285 sgrad+=A[i];
00286 }
00287 }
00288
00289
00290
00291
00292
00293
00294
00295
00296
00297
00298 CMath::qsort_index(hinge_point, hinge_idx, num_hinge);
00299
00300
00301
00302 INT i=-1;
00303 while (i < num_hinge-1 && sgrad < 0)
00304 {
00305 i+=1;
00306
00307 if (A[hinge_idx[i]] > C[hinge_idx[i]])
00308 sgrad += A[hinge_idx[i]] - C[hinge_idx[i]];
00309 else
00310 sgrad += C[hinge_idx[i]] - A[hinge_idx[i]];
00311 }
00312
00313 alpha = hinge_point[i];
00314
00315 delete[] D;
00316 delete[] C;
00317 delete[] B;
00318 delete[] A;
00319
00320
00321 return alpha;
00322 }
00323
00324 DREAL CSubGradientLPM::compute_min_subgradient(INT num_feat, INT num_vec, INT num_active, INT num_bound)
00325 {
00326 DREAL dir_deriv=0;
00327 solver->init(E_QP);
00328
00329 if (zero_idx+num_bound > 0)
00330 {
00331
00332
00333 CMath::add(grad_w, 1.0, grad_w, -1.0, sum_CXy_active, num_feat);
00334 grad_w[num_feat]= -sum_Cy_active;
00335
00336 grad_b = -sum_Cy_active;
00337
00338
00339
00340
00341
00342
00343 solver->setup_subgradientlpm_QP(C1, labels, features, idx_bound, num_bound,
00344 w_zero, zero_idx,
00345 grad_w, num_feat+1,
00346 use_bias);
00347
00348 solver->optimize(beta);
00349
00350
00351
00352
00353 dir_deriv = CMath::dot(beta, grad_w, num_feat);
00354 dir_deriv-=beta[num_feat]*sum_Cy_active;
00355
00356 for (INT i=0; i<num_bound; i++)
00357 {
00358 DREAL val= C1*get_label(idx_bound[i])*features->dense_dot(1.0, idx_bound[i], beta, num_feat, beta[num_feat]);
00359 dir_deriv += CMath::max(0.0, val);
00360 }
00361
00362 for (INT i=0; i<num_feat; i++)
00363 grad_w[i]=beta[i];
00364
00365 if (use_bias)
00366 grad_b=beta[num_feat];
00367
00368
00369
00370
00371
00372
00373
00374
00375
00376
00377
00378
00379
00380
00381
00382
00383
00384
00385
00386
00387
00388
00389
00390
00391 }
00392 else
00393 {
00394 CMath::add(grad_w, 1.0, w, -1.0, sum_CXy_active, num_feat);
00395 grad_b = -sum_Cy_active;
00396
00397 dir_deriv = CMath::dot(grad_w, grad_w, num_feat)+ grad_b*grad_b;
00398 }
00399
00400 solver->cleanup();
00401
00402
00403
00404
00405
00406 return dir_deriv;
00407 }
00408
00409 DREAL CSubGradientLPM::compute_objective(INT num_feat, INT num_vec)
00410 {
00411 DREAL result= CMath::sum_abs(w, num_feat);
00412
00413 for (INT i=0; i<num_vec; i++)
00414 {
00415 if (proj[i]<1.0)
00416 result += C1 * (1.0-proj[i]);
00417 }
00418
00419 return result;
00420 }
00421
00422 void CSubGradientLPM::compute_projection(INT num_feat, INT num_vec)
00423 {
00424 for (INT i=0; i<num_vec; i++)
00425 proj[i]=get_label(i)*features->dense_dot(1.0, i, w, num_feat, bias);
00426 }
00427
00428 void CSubGradientLPM::update_projection(DREAL alpha, INT num_vec)
00429 {
00430 CMath::vec1_plus_scalar_times_vec2(proj,-alpha, grad_proj, num_vec);
00431 }
00432
00433 void CSubGradientLPM::init(INT num_vec, INT num_feat)
00434 {
00435
00436 delete[] w;
00437 w=new DREAL[num_feat];
00438 for (INT i=0; i<num_feat; i++)
00439 w[i]=1.0;
00440
00441 bias=0;
00442 num_it_noimprovement=0;
00443 grad_b=0;
00444 set_w(w, num_feat);
00445
00446 w_pos=new INT[num_feat];
00447 memset(w_pos,0,sizeof(INT)*num_feat);
00448
00449 w_zero=new INT[num_feat];
00450 memset(w_zero,0,sizeof(INT)*num_feat);
00451
00452 w_neg=new INT[num_feat];
00453 memset(w_neg,0,sizeof(INT)*num_feat);
00454
00455 grad_w=new DREAL[num_feat+1];
00456 memset(grad_w,0,sizeof(DREAL)*(num_feat+1));
00457
00458 sum_CXy_active=new DREAL[num_feat];
00459 memset(sum_CXy_active,0,sizeof(DREAL)*num_feat);
00460
00461 sum_Cy_active=0;
00462
00463 proj=new DREAL[num_vec];
00464 memset(proj,0,sizeof(DREAL)*num_vec);
00465
00466 tmp_proj=new DREAL[num_vec];
00467 memset(proj,0,sizeof(DREAL)*num_vec);
00468
00469 tmp_proj_idx=new INT[num_vec];
00470 memset(tmp_proj_idx,0,sizeof(INT)*num_vec);
00471
00472 grad_proj=new DREAL[num_vec];
00473 memset(grad_proj,0,sizeof(DREAL)*num_vec);
00474
00475 hinge_point=new DREAL[num_vec+num_feat];
00476 memset(hinge_point,0,sizeof(DREAL)*(num_vec+num_feat));
00477
00478 hinge_idx=new INT[num_vec+num_feat];
00479 memset(hinge_idx,0,sizeof(INT)*(num_vec+num_feat));
00480
00481 active=new BYTE[num_vec];
00482 memset(active,0,sizeof(BYTE)*num_vec);
00483
00484 old_active=new BYTE[num_vec];
00485 memset(old_active,0,sizeof(BYTE)*num_vec);
00486
00487 idx_bound=new INT[num_vec];
00488 memset(idx_bound,0,sizeof(INT)*num_vec);
00489
00490 idx_active=new INT[num_vec];
00491 memset(idx_active,0,sizeof(INT)*num_vec);
00492
00493 beta=new DREAL[num_feat+1+num_feat+num_vec];
00494 memset(beta,0,sizeof(DREAL)*num_feat+1+num_feat+num_vec);
00495
00496 solver=new CCplex();
00497 }
00498
00499 void CSubGradientLPM::cleanup()
00500 {
00501 delete[] hinge_idx;
00502 delete[] hinge_point;
00503 delete[] grad_proj;
00504 delete[] proj;
00505 delete[] tmp_proj;
00506 delete[] tmp_proj_idx;
00507 delete[] active;
00508 delete[] old_active;
00509 delete[] idx_bound;
00510 delete[] idx_active;
00511 delete[] sum_CXy_active;
00512 delete[] w_pos;
00513 delete[] w_zero;
00514 delete[] w_neg;
00515 delete[] grad_w;
00516 delete[] beta;
00517
00518 hinge_idx=NULL;
00519 hinge_point=NULL;
00520 grad_proj=NULL;
00521 proj=NULL;
00522 tmp_proj=NULL;
00523 tmp_proj_idx=NULL;
00524 active=NULL;
00525 old_active=NULL;
00526 idx_bound=NULL;
00527 idx_active=NULL;
00528 sum_CXy_active=NULL;
00529 w_pos=NULL;
00530 w_zero=NULL;
00531 w_neg=NULL;
00532 grad_w=NULL;
00533 beta=NULL;
00534
00535 delete solver;
00536 solver=NULL;
00537 }
00538
00539 bool CSubGradientLPM::train()
00540 {
00541 lpmtim=0;
00542 SG_INFO("C=%f epsilon=%f\n", C1, epsilon);
00543 ASSERT(labels);
00544 ASSERT(features);
00545
00546 INT num_iterations=0;
00547 INT num_train_labels=labels->get_num_labels();
00548 INT num_feat=features->get_num_features();
00549 INT num_vec=features->get_num_vectors();
00550
00551 ASSERT(num_vec==num_train_labels);
00552
00553 init(num_vec, num_feat);
00554
00555 INT num_active=0;
00556 INT num_bound=0;
00557 DREAL alpha=0;
00558 DREAL dir_deriv=0;
00559 DREAL obj=0;
00560 delta_active=num_vec;
00561 last_it_noimprovement=-1;
00562
00563 work_epsilon=0.99;
00564 autoselected_epsilon=work_epsilon;
00565
00566 compute_projection(num_feat, num_vec);
00567
00568 CTime time;
00569 double loop_time=0;
00570 while (!(CSignal::cancel_computations()))
00571 {
00572 CTime t;
00573 delta_active=find_active(num_feat, num_vec, num_active, num_bound);
00574
00575 update_active(num_feat, num_vec);
00576
00577 #ifdef DEBUG_SUBGRADIENTLPM
00578 SG_PRINT("==================================================\niteration: %d ", num_iterations);
00579 obj=compute_objective(num_feat, num_vec);
00580 SG_PRINT("objective:%.10f alpha: %.10f dir_deriv: %f num_bound: %d num_active: %d work_eps: %10.10f eps: %10.10f auto_eps: %10.10f time:%f\n",
00581 obj, alpha, dir_deriv, num_bound, num_active, work_epsilon, epsilon, autoselected_epsilon, loop_time);
00582 #else
00583 SG_ABS_PROGRESS(work_epsilon, -CMath::log10(work_epsilon), -CMath::log10(0.99999999), -CMath::log10(epsilon), 6);
00584 #endif
00585
00586
00587
00588
00589
00590
00591
00592
00593
00594
00595
00596
00597 dir_deriv=compute_min_subgradient(num_feat, num_vec, num_active, num_bound);
00598
00599 alpha=line_search(num_feat, num_vec);
00600
00601 if (num_it_noimprovement==10 || num_bound<qpsize_max)
00602 {
00603 DREAL norm_grad=CMath::dot(grad_w, grad_w, num_feat) +
00604 grad_b*grad_b;
00605
00606 SG_PRINT("CHECKING OPTIMALITY CONDITIONS: "
00607 "work_epsilon: %10.10f delta_active:%d alpha: %10.10f norm_grad: %10.10f a*norm_grad:%10.16f\n",
00608 work_epsilon, delta_active, alpha, norm_grad, CMath::abs(alpha*norm_grad));
00609
00610 if (work_epsilon<=epsilon && delta_active==0 && CMath::abs(alpha*norm_grad)<1e-6)
00611 break;
00612 else
00613 num_it_noimprovement=0;
00614 }
00615
00616
00617 if ((dir_deriv<0 || alpha==0) && (work_epsilon<=epsilon && delta_active==0))
00618 {
00619 if (last_it_noimprovement==num_iterations-1)
00620 {
00621 SG_PRINT("no improvement...\n");
00622 num_it_noimprovement++;
00623 }
00624 else
00625 num_it_noimprovement=0;
00626
00627 last_it_noimprovement=num_iterations;
00628 }
00629
00630 CMath::vec1_plus_scalar_times_vec2(w, -alpha, grad_w, num_feat);
00631 bias-=alpha*grad_b;
00632
00633 update_projection(alpha, num_vec);
00634
00635 t.stop();
00636 loop_time=t.time_diff_sec();
00637 num_iterations++;
00638
00639 if (get_max_train_time()>0 && time.cur_time_diff()>get_max_train_time())
00640 break;
00641 }
00642
00643 SG_INFO("converged after %d iterations\n", num_iterations);
00644
00645 obj=compute_objective(num_feat, num_vec);
00646 SG_INFO("objective: %f alpha: %f dir_deriv: %f num_bound: %d num_active: %d sparsity: %f\n",
00647 obj, alpha, dir_deriv, num_bound, num_active, sparsity/num_iterations);
00648
00649 #ifdef DEBUG_SUBGRADIENTLPM
00650 CMath::display_vector(w, w_dim, "w");
00651 SG_PRINT("bias: %f\n", bias);
00652 #endif
00653 SG_PRINT("solver time:%f s\n", lpmtim);
00654
00655 cleanup();
00656
00657 return true;
00658 }
00659 #endif //USE_CPLEX