00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include <stdio.h>
00022 #include <stdlib.h>
00023 #include <ctype.h>
00024 #include <algorithm>
00025
00026 #include "lib/io.h"
00027 #include "lib/Mathematics.h"
00028 #include "features/SparseFeatures.h"
00029 #include "classifier/svm/ssl.h"
00030
00031 #define VERBOSE 1
00032
00033 void ssl_train(struct data *Data,
00034 struct options *Options,
00035 struct vector_double *Weights,
00036 struct vector_double *Outputs)
00037 {
00038
00039 initialize(Weights,Data->n,0.0);
00040 initialize(Outputs,Data->m,0.0);
00041 vector_int *Subset = new vector_int[1];
00042 initialize(Subset,Data->m);
00043
00044 int optimality = 0;
00045 switch(Options->algo)
00046 {
00047 case -1:
00048 SG_SINFO("Regularized Least Squares Regression (CGLS)\n");
00049 optimality=CGLS(Data,Options,Subset,Weights,Outputs);
00050 break;
00051 case RLS:
00052 SG_SINFO("Regularized Least Squares Classification (CGLS)\n");
00053 optimality=CGLS(Data,Options,Subset,Weights,Outputs);
00054 break;
00055 case SVM:
00056 SG_SINFO("Modified Finite Newton L2-SVM (L2-SVM-MFN)\n");
00057 optimality=L2_SVM_MFN(Data,Options,Weights,Outputs,0);
00058 break;
00059 case TSVM:
00060 SG_SINFO("Transductive L2-SVM (TSVM)\n");
00061 optimality=TSVM_MFN(Data,Options,Weights,Outputs);
00062 break;
00063 case DA_SVM:
00064 SG_SINFO("Deterministic Annealing Semi-supervised L2-SVM (DAS3VM)\n");
00065 optimality=DA_S3VM(Data,Options,Weights,Outputs);
00066 break;
00067 default:
00068 SG_SERROR("Algorithm unspecified");
00069 }
00070
00071 delete[] Subset->vec;
00072 delete[] Subset;
00073 return;
00074 }
00075 int CGLS(const struct data *Data,
00076 const struct options *Options,
00077 const struct vector_int *Subset,
00078 struct vector_double *Weights,
00079 struct vector_double *Outputs)
00080 {
00081 SG_SDEBUG("CGLS starting...");
00082
00083
00084 int active = Subset->d;
00085 int *J = Subset->vec;
00086 CSparseFeatures<DREAL>* features=Data->features;
00087 double *Y = Data->Y;
00088 double *C = Data->C;
00089 int n = Data->n;
00090 double lambda = Options->lambda;
00091 int cgitermax = Options->cgitermax;
00092 double epsilon = Options->epsilon;
00093 double *beta = Weights->vec;
00094 double *o = Outputs->vec;
00095
00096 double *z = new double[active];
00097 double *q = new double[active];
00098 int ii=0;
00099 for(int i = active ; i-- ;){
00100 ii=J[i];
00101 z[i] = C[ii]*(Y[ii] - o[ii]);
00102 }
00103 double *r = new double[n];
00104 for(int i = n ; i-- ;)
00105 r[i] = 0.0;
00106 for(register int j=0; j < active; j++)
00107 {
00108 ii=J[j];
00109 INT num_entries=0;
00110 bool free_vec=false;
00111
00112 TSparseEntry<DREAL>* vec=features->get_sparse_feature_vector(ii, num_entries, free_vec);
00113 for (int i=0; i<num_entries; i++)
00114 r[vec[i].feat_index]+= vec[i].entry*z[j];
00115 features->free_sparse_feature_vector(vec, num_entries, free_vec);
00116 r[n-1]+=Options->bias*z[j];
00117 }
00118 double *p = new double[n];
00119 double omega1 = 0.0;
00120 for(int i = n ; i-- ;)
00121 {
00122 r[i] -= lambda*beta[i];
00123 p[i] = r[i];
00124 omega1 += r[i]*r[i];
00125 }
00126 double omega_p = omega1;
00127 double omega_q = 0.0;
00128 double inv_omega2 = 1/omega1;
00129 double scale = 0.0;
00130 double omega_z=0.0;
00131 double gamma = 0.0;
00132 int cgiter = 0;
00133 int optimality = 0;
00134 double epsilon2 = epsilon*epsilon;
00135
00136 while(cgiter < cgitermax)
00137 {
00138 cgiter++;
00139 omega_q=0.0;
00140 double t=0.0;
00141 register int i,j;
00142
00143 for(i=0; i < active; i++)
00144 {
00145 ii=J[i];
00146 t=0.0;
00147 INT num_entries=0;
00148 bool free_vec=false;
00149 TSparseEntry<DREAL>* vec=features->get_sparse_feature_vector(ii,
00150 num_entries, free_vec);
00151 for (j=0; j<num_entries; j++)
00152 t+=vec[j].entry*p[vec[j].feat_index];
00153 features->free_sparse_feature_vector(vec, num_entries, free_vec);
00154 t+=Options->bias*p[n-1];
00155 q[i]=t;
00156 omega_q += C[ii]*t*t;
00157 }
00158 gamma = omega1/(lambda*omega_p + omega_q);
00159 inv_omega2 = 1/omega1;
00160 for(i = n ; i-- ;)
00161 {
00162 r[i] = 0.0;
00163 beta[i] += gamma*p[i];
00164 }
00165 omega_z=0.0;
00166 for(i = active ; i-- ;)
00167 {
00168 ii=J[i];
00169 o[ii] += gamma*q[i];
00170 z[i] -= gamma*C[ii]*q[i];
00171 omega_z+=z[i]*z[i];
00172 }
00173 for(j=0; j < active; j++)
00174 {
00175 ii=J[j];
00176 t=z[j];
00177 INT num_entries=0;
00178 bool free_vec=false;
00179
00180 TSparseEntry<DREAL>* vec=features->get_sparse_feature_vector(ii, num_entries, free_vec);
00181 for (i=0; i<num_entries; i++)
00182 r[vec[i].feat_index]+= vec[i].entry*t;
00183 features->free_sparse_feature_vector(vec, num_entries, free_vec);
00184 r[n-1]+=Options->bias*t;
00185 }
00186 omega1 = 0.0;
00187 for(i = n ; i-- ;)
00188 {
00189 r[i] -= lambda*beta[i];
00190 omega1 += r[i]*r[i];
00191 }
00192 if(omega1 < epsilon2*omega_z)
00193 {
00194 optimality=1;
00195 break;
00196 }
00197 omega_p=0.0;
00198 scale=omega1*inv_omega2;
00199 for(i = n ; i-- ;)
00200 {
00201 p[i] = r[i] + p[i]*scale;
00202 omega_p += p[i]*p[i];
00203 }
00204 }
00205 SG_SDEBUG("...Done.");
00206 SG_SINFO("CGLS converged in %d iteration(s)", cgiter);
00207
00208 delete[] z;
00209 delete[] q;
00210 delete[] r;
00211 delete[] p;
00212 return optimality;
00213 }
00214 int L2_SVM_MFN(const struct data *Data,
00215 struct options *Options,
00216 struct vector_double *Weights,
00217 struct vector_double *Outputs,
00218 int ini)
00219 {
00220
00221 CSparseFeatures<DREAL>* features=Data->features;
00222 double *Y = Data->Y;
00223 double *C = Data->C;
00224 int n = Data->n;
00225 int m = Data->m;
00226 double lambda = Options->lambda;
00227 double epsilon;
00228 double *w = Weights->vec;
00229 double *o = Outputs->vec;
00230 double F_old = 0.0;
00231 double F = 0.0;
00232 double diff=0.0;
00233 vector_int *ActiveSubset = new vector_int[1];
00234 ActiveSubset->vec = new int[m];
00235 ActiveSubset->d = m;
00236
00237 if(ini==0) {
00238 epsilon=BIG_EPSILON;
00239 Options->cgitermax=SMALL_CGITERMAX;
00240 Options->epsilon=BIG_EPSILON;
00241 }
00242 else {epsilon = Options->epsilon;}
00243 for(int i=0;i<n;i++) F+=w[i]*w[i];
00244 F=0.5*lambda*F;
00245 int active=0;
00246 int inactive=m-1;
00247 for(int i=0; i<m ; i++)
00248 {
00249 diff=1-Y[i]*o[i];
00250 if(diff>0)
00251 {
00252 ActiveSubset->vec[active]=i;
00253 active++;
00254 F+=0.5*C[i]*diff*diff;
00255 }
00256 else
00257 {
00258 ActiveSubset->vec[inactive]=i;
00259 inactive--;
00260 }
00261 }
00262 ActiveSubset->d=active;
00263 int iter=0;
00264 int opt=0;
00265 int opt2=0;
00266 vector_double *Weights_bar = new vector_double[1];
00267 vector_double *Outputs_bar = new vector_double[1];
00268 double *w_bar = new double[n];
00269 double *o_bar = new double[m];
00270 Weights_bar->vec=w_bar;
00271 Outputs_bar->vec=o_bar;
00272 Weights_bar->d=n;
00273 Outputs_bar->d=m;
00274 double delta=0.0;
00275 double t=0.0;
00276 int ii = 0;
00277 while(iter<MFNITERMAX)
00278 {
00279 iter++;
00280 SG_SDEBUG("L2_SVM_MFN Iteration# %d (%d active examples, objective_value = %f)\n", iter, active, F);
00281 for(int i=n; i-- ;)
00282 w_bar[i]=w[i];
00283 for(int i=m; i-- ;)
00284 o_bar[i]=o[i];
00285
00286 opt=CGLS(Data,Options,ActiveSubset,Weights_bar,Outputs_bar);
00287 for(register int i=active; i < m; i++)
00288 {
00289 ii=ActiveSubset->vec[i];
00290 t=0.0;
00291
00292 INT num_entries=0;
00293 bool free_vec=false;
00294
00295 TSparseEntry<DREAL>* vec=features->get_sparse_feature_vector(ii, num_entries, free_vec);
00296 for (int j=0; j<num_entries; j++)
00297 t+=vec[j].entry*w_bar[vec[j].feat_index];
00298 features->free_sparse_feature_vector(vec, num_entries, free_vec);
00299 t+=Options->bias*w_bar[n-1];
00300
00301 o_bar[ii]=t;
00302 }
00303 if(ini==0) {Options->cgitermax=CGITERMAX; ini=1;};
00304 opt2=1;
00305 for(int i=0;i<m;i++)
00306 {
00307 ii=ActiveSubset->vec[i];
00308 if(i<active)
00309 opt2=(opt2 && (Y[ii]*o_bar[ii]<=1+epsilon));
00310 else
00311 opt2=(opt2 && (Y[ii]*o_bar[ii]>=1-epsilon));
00312 if(opt2==0) break;
00313 }
00314 if(opt && opt2)
00315 {
00316 if(epsilon==BIG_EPSILON)
00317 {
00318 epsilon=EPSILON;
00319 Options->epsilon=EPSILON;
00320 SG_SDEBUG("epsilon = %f case converged (speedup heuristic 2). Continuing with epsilon=%f", BIG_EPSILON , EPSILON);
00321 continue;
00322 }
00323 else
00324 {
00325 for(int i=n; i-- ;)
00326 w[i]=w_bar[i];
00327 for(int i=m; i-- ;)
00328 o[i]=o_bar[i];
00329 delete[] ActiveSubset->vec;
00330 delete[] ActiveSubset;
00331 delete[] o_bar;
00332 delete[] w_bar;
00333 delete[] Weights_bar;
00334 delete[] Outputs_bar;
00335 SG_SINFO("L2_SVM_MFN converged (optimality) in %d", iter);
00336 return 1;
00337 }
00338 }
00339 delta=line_search(w,w_bar,lambda,o,o_bar,Y,C,n,m);
00340 SG_SDEBUG("LINE_SEARCH delta = %f\n", delta);
00341 F_old=F;
00342 F=0.0;
00343 for(int i=n; i-- ;){
00344 w[i]+=delta*(w_bar[i]-w[i]);
00345 F+=w[i]*w[i];
00346 }
00347 F=0.5*lambda*F;
00348 active=0;
00349 inactive=m-1;
00350 for(int i=0; i<m ; i++)
00351 {
00352 o[i]+=delta*(o_bar[i]-o[i]);
00353 diff=1-Y[i]*o[i];
00354 if(diff>0)
00355 {
00356 ActiveSubset->vec[active]=i;
00357 active++;
00358 F+=0.5*C[i]*diff*diff;
00359 }
00360 else
00361 {
00362 ActiveSubset->vec[inactive]=i;
00363 inactive--;
00364 }
00365 }
00366 ActiveSubset->d=active;
00367 if(CMath::abs(F-F_old)<RELATIVE_STOP_EPS*CMath::abs(F_old))
00368 {
00369 SG_SINFO("L2_SVM_MFN converged (rel. criterion) in %d iterations", iter);
00370 return 2;
00371 }
00372 }
00373 delete[] ActiveSubset->vec;
00374 delete[] ActiveSubset;
00375 delete[] o_bar;
00376 delete[] w_bar;
00377 delete[] Weights_bar;
00378 delete[] Outputs_bar;
00379 SG_SINFO("L2_SVM_MFN converged (max iter exceeded) in %d iterations", iter);
00380 return 0;
00381 }
00382
00383 double line_search(double *w,
00384 double *w_bar,
00385 double lambda,
00386 double *o,
00387 double *o_bar,
00388 double *Y,
00389 double *C,
00390 int d,
00391 int l)
00392 {
00393 double omegaL = 0.0;
00394 double omegaR = 0.0;
00395 double diff=0.0;
00396 for(int i=d; i--; )
00397 {
00398 diff=w_bar[i]-w[i];
00399 omegaL+=w[i]*diff;
00400 omegaR+=w_bar[i]*diff;
00401 }
00402 omegaL=lambda*omegaL;
00403 omegaR=lambda*omegaR;
00404 double L=0.0;
00405 double R=0.0;
00406 int ii=0;
00407 for(int i=0;i<l;i++)
00408 {
00409 if(Y[i]*o[i]<1)
00410 {
00411 diff=C[i]*(o_bar[i]-o[i]);
00412 L+=(o[i]-Y[i])*diff;
00413 R+=(o_bar[i]-Y[i])*diff;
00414 }
00415 }
00416 L+=omegaL;
00417 R+=omegaR;
00418 Delta* deltas=new Delta[l];
00419 int p=0;
00420 for(int i=0;i<l;i++)
00421 {
00422 diff=Y[i]*(o_bar[i]-o[i]);
00423
00424 if(Y[i]*o[i]<1)
00425 {
00426 if(diff>0)
00427 {
00428 deltas[p].delta=(1-Y[i]*o[i])/diff;
00429 deltas[p].index=i;
00430 deltas[p].s=-1;
00431 p++;
00432 }
00433 }
00434 else
00435 {
00436 if(diff<0)
00437 {
00438 deltas[p].delta=(1-Y[i]*o[i])/diff;
00439 deltas[p].index=i;
00440 deltas[p].s=1;
00441 p++;
00442 }
00443 }
00444 }
00445 std::sort(deltas,deltas+p);
00446 double delta_prime=0.0;
00447 for(int i=0;i<p;i++)
00448 {
00449 delta_prime = L + deltas[i].delta*(R-L);
00450 if(delta_prime>=0)
00451 break;
00452 ii=deltas[i].index;
00453 diff=(deltas[i].s)*C[ii]*(o_bar[ii]-o[ii]);
00454 L+=diff*(o[ii]-Y[ii]);
00455 R+=diff*(o_bar[ii]-Y[ii]);
00456 }
00457 delete [] deltas;
00458 return (-L/(R-L));
00459 }
00460 int TSVM_MFN(const struct data *Data,
00461 struct options *Options,
00462 struct vector_double *Weights,
00463 struct vector_double *Outputs)
00464 {
00465
00466 struct data *Data_Labeled = new data[1];
00467 struct vector_double *Outputs_Labeled = new vector_double[1];
00468 initialize(Outputs_Labeled,Data->l,0.0);
00469 SG_SDEBUG("Initializing weights, unknown labels");
00470 GetLabeledData(Data_Labeled,Data);
00471 L2_SVM_MFN(Data_Labeled, Options, Weights,Outputs_Labeled,0);
00473
00474
00475 int p=0,q=0;
00476 double t=0.0;
00477 int *JU = new int[Data->u];
00478 double *ou = new double[Data->u];
00479 double lambda_0 = TSVM_LAMBDA_SMALL;
00480 for(int i=0;i<Data->m;i++)
00481 {
00482 if(Data->Y[i]==0.0)
00483 {
00484 t=0.0;
00485 INT num_entries=0;
00486 bool free_vec=false;
00487
00488 TSparseEntry<DREAL>* vec=Data->features->get_sparse_feature_vector(i, num_entries, free_vec);
00489 for (int j=0; j<num_entries; j++)
00490 t+=vec[j].entry*Weights->vec[vec[j].feat_index];
00491 Data->features->free_sparse_feature_vector(vec, num_entries, free_vec);
00492 t+=Options->bias*Weights->vec[Data->n-1];
00493
00494 Outputs->vec[i]=t;
00495 Data->C[i]=lambda_0*1.0/Data->u;
00496 JU[q]=i;
00497 ou[q]=t;
00498 q++;
00499 }
00500 else
00501 {
00502 Outputs->vec[i]=Outputs_Labeled->vec[p];
00503 Data->C[i]=1.0/Data->l;
00504 p++;
00505 }
00506 }
00507 std::nth_element(ou,ou+int((1-Options->R)*Data->u-1),ou+Data->u);
00508 double thresh=*(ou+int((1-Options->R)*Data->u)-1);
00509 delete [] ou;
00510 for(int i=0;i<Data->u;i++)
00511 {
00512 if(Outputs->vec[JU[i]]>thresh)
00513 Data->Y[JU[i]]=1.0;
00514 else
00515 Data->Y[JU[i]]=-1.0;
00516 }
00517 for(int i=0;i<Data->n;i++)
00518 Weights->vec[i]=0.0;
00519 for(int i=0;i<Data->m;i++)
00520 Outputs->vec[i]=0.0;
00521 L2_SVM_MFN(Data,Options,Weights,Outputs,0);
00522 int num_switches=0;
00523 int s=0;
00524 int last_round=0;
00525 while(lambda_0 <= Options->lambda_u)
00526 {
00527 int iter2=0;
00528 while(1){
00529 s=switch_labels(Data->Y,Outputs->vec,JU,Data->u,Options->S);
00530 if(s==0) break;
00531 iter2++;
00532 SG_SDEBUG("****** lambda_0 = %f iteration = %d ************************************\n", lambda_0, iter2);
00533 SG_SDEBUG("Optimizing unknown labels. switched %d labels.\n");
00534 num_switches+=s;
00535 SG_SDEBUG("Optimizing weights\n");
00536 L2_SVM_MFN(Data,Options,Weights,Outputs,1);
00537 }
00538 if(last_round==1) break;
00539 lambda_0=TSVM_ANNEALING_RATE*lambda_0;
00540 if(lambda_0 >= Options->lambda_u) {lambda_0 = Options->lambda_u; last_round=1;}
00541 for(int i=0;i<Data->u;i++)
00542 Data->C[JU[i]]=lambda_0*1.0/Data->u;
00543 SG_SDEBUG("****** lambda0 increased to %f%% of lambda_u = %f ************************\n", lambda_0*100/Options->lambda_u, Options->lambda_u);
00544 SG_SDEBUG("Optimizing weights\n");
00545 L2_SVM_MFN(Data,Options,Weights,Outputs,1);
00546 }
00547 SG_SDEBUG("Total Number of Switches = %d\n", num_switches);
00548
00549 for(int i=0;i<Data->u;i++) Data->Y[JU[i]] = 0.0;
00550 double F = transductive_cost(norm_square(Weights),Data->Y,Outputs->vec,Outputs->d,Options->lambda,Options->lambda_u);
00551 SG_SDEBUG("Objective Value = %f\n",F);
00552 delete [] JU;
00553 return num_switches;
00554 }
00555 int switch_labels(double* Y, double* o, int* JU, int u, int S)
00556 {
00557 int npos=0;
00558 int nneg=0;
00559 for(int i=0;i<u;i++)
00560 {
00561 if((Y[JU[i]]>0) && (o[JU[i]]<1.0)) npos++;
00562 if((Y[JU[i]]<0) && (-o[JU[i]]<1.0)) nneg++;
00563 }
00564 Delta* positive=new Delta[npos];
00565 Delta* negative=new Delta[nneg];
00566 int p=0;
00567 int n=0;
00568 int ii=0;
00569 for(int i=0;i<u;i++)
00570 {
00571 ii=JU[i];
00572 if((Y[ii]>0.0) && (o[ii]<1.0)) {
00573 positive[p].delta=o[ii];
00574 positive[p].index=ii;
00575 positive[p].s=0;
00576 p++;};
00577 if((Y[ii]<0.0) && (-o[ii]<1.0))
00578 {
00579 negative[n].delta=-o[ii];
00580 negative[n].index=ii;
00581 negative[n].s=0;
00582 n++;};
00583 }
00584 std::sort(positive,positive+npos);
00585 std::sort(negative,negative+nneg);
00586 int s=-1;
00587 while(1)
00588 {
00589 s++;
00590 if((s>=S) || (positive[s].delta>=-negative[s].delta) || (s>=npos) || (s>=nneg))
00591 break;
00592 Y[positive[s].index]=-1.0;
00593 Y[negative[s].index]= 1.0;
00594 }
00595 delete [] positive;
00596 delete [] negative;
00597 return s;
00598 }
00599
00600 int DA_S3VM(struct data *Data,
00601 struct options *Options,
00602 struct vector_double *Weights,
00603 struct vector_double *Outputs)
00604 {
00605 double T = DA_INIT_TEMP*Options->lambda_u;
00606 int iter1 = 0, iter2 =0;
00607 double *p = new double[Data->u];
00608 double *q = new double[Data->u];
00609 double *g = new double[Data->u];
00610 double F,F_min;
00611 double *w_min = new double[Data->n];
00612 double *o_min = new double[Data->m];
00613 double *w = Weights->vec;
00614 double *o = Outputs->vec;
00615 double kl_divergence = 1.0;
00616
00617 SG_SDEBUG("Initializing weights, p");
00618 for(int i=0;i<Data->u; i++)
00619 p[i] = Options->R;
00620
00621 int *JU = new int[Data->u];
00622 int j=0;
00623 for(int i=0;i<Data->m;i++)
00624 {
00625 if(Data->Y[i]==0.0)
00626 {JU[j]=i;j++;}
00627 }
00628 double H = entropy(p,Data->u);
00629 optimize_w(Data,p,Options,Weights,Outputs,0);
00630 F = transductive_cost(norm_square(Weights),Data->Y,Outputs->vec,Outputs->d,Options->lambda,Options->lambda_u);
00631 F_min = F;
00632 for(int i=0;i<Weights->d;i++)
00633 w_min[i]=w[i];
00634 for(int i=0;i<Outputs->d;i++)
00635 o_min[i]=o[i];
00636 while((iter1 < DA_OUTER_ITERMAX) && (H > Options->epsilon))
00637 {
00638 iter1++;
00639 iter2=0;
00640 kl_divergence=1.0;
00641 while((iter2 < DA_INNER_ITERMAX) && (kl_divergence > Options->epsilon))
00642 {
00643 iter2++;
00644 for(int i=0;i<Data->u;i++)
00645 {
00646 q[i]=p[i];
00647 g[i] = Options->lambda_u*((o[JU[i]] > 1 ? 0 : (1 - o[JU[i]])*(1 - o[JU[i]])) - (o[JU[i]]< -1 ? 0 : (1 + o[JU[i]])*(1 + o[JU[i]])));
00648 }
00649 SG_SDEBUG("Optimizing p.\n");
00650 optimize_p(g,Data->u,T,Options->R,p);
00651 kl_divergence=KL(p,q,Data->u);
00652 SG_SDEBUG("Optimizing weights\n");
00653 optimize_w(Data,p,Options,Weights,Outputs,1);
00654 F = transductive_cost(norm_square(Weights),Data->Y,Outputs->vec,Outputs->d,Options->lambda,Options->lambda_u);
00655 if(F < F_min)
00656 {
00657 F_min = F;
00658 for(int i=0;i<Weights->d;i++)
00659 w_min[i]=w[i];
00660 for(int i=0;i<Outputs->d;i++)
00661 o_min[i]=o[i];
00662 }
00663 SG_SDEBUG("***** outer_iter = %d T = %g inner_iter = %d kl = %g cost = %g *****\n",iter1,T,iter2,kl_divergence,F);
00664 }
00665 H = entropy(p,Data->u);
00666 SG_SDEBUG("***** Finished outer_iter = %d T = %g Entropy = %g ***\n", iter1,T,H);
00667 T = T/DA_ANNEALING_RATE;
00668 }
00669 for(int i=0;i<Weights->d;i++)
00670 w[i]=w_min[i];
00671 for(int i=0;i<Outputs->d;i++)
00672 o[i]=o_min[i];
00673
00674 delete [] p;
00675 delete [] q;
00676 delete [] g;
00677 delete [] JU;
00678 delete [] w_min;
00679 delete [] o_min;
00680 SG_SINFO("(min) Objective Value = %f", F_min);
00681 return 1;
00682 }
00683 int optimize_w(const struct data *Data,
00684 const double *p,
00685 struct options *Options,
00686 struct vector_double *Weights,
00687 struct vector_double *Outputs,
00688 int ini)
00689 {
00690 int i,j;
00691 CSparseFeatures<DREAL>* features=Data->features;
00692 int n = Data->n;
00693 int m = Data->m;
00694 int u = Data->u;
00695 double lambda = Options->lambda;
00696 double epsilon;
00697 double *w = Weights->vec;
00698 double *o = new double[m+u];
00699 double *Y = new double[m+u];
00700 double *C = new double[m+u];
00701 int *labeled_indices = new int[m];
00702 double F_old = 0.0;
00703 double F = 0.0;
00704 double diff=0.0;
00705 double lambda_u_by_u = Options->lambda_u/u;
00706 vector_int *ActiveSubset = new vector_int[1];
00707 ActiveSubset->vec = new int[m];
00708 ActiveSubset->d = m;
00709
00710 if(ini==0)
00711 {
00712 epsilon=BIG_EPSILON;
00713 Options->cgitermax=SMALL_CGITERMAX;
00714 Options->epsilon=BIG_EPSILON;}
00715 else {epsilon = Options->epsilon;}
00716
00717 for(i=0;i<n;i++) F+=w[i]*w[i];
00718 F=lambda*F;
00719 int active=0;
00720 int inactive=m-1;
00721 double temp1;
00722 double temp2;
00723
00724 j = 0;
00725 for(i=0; i<m ; i++)
00726 {
00727 o[i]=Outputs->vec[i];
00728 if(Data->Y[i]==0.0)
00729 {
00730 labeled_indices[i]=0;
00731 o[m+j]=o[i];
00732 Y[i]=1;
00733 Y[m+j]=-1;
00734 C[i]=lambda_u_by_u*p[j];
00735 C[m+j]=lambda_u_by_u*(1-p[j]);
00736 ActiveSubset->vec[active]=i;
00737 active++;
00738 diff = 1 - CMath::abs(o[i]);
00739 if(diff>0)
00740 {
00741 Data->Y[i] = 2*p[j]-1;
00742 Data->C[i] = lambda_u_by_u;
00743 temp1 = (1 - o[i]);
00744 temp2 = (1 + o[i]);
00745 F+=lambda_u_by_u*(p[j]*temp1*temp1 + (1-p[j])*temp2*temp2);
00746 }
00747 else
00748 {
00749 if(o[i]>0)
00750 {
00751 Data->Y[i] = -1.0;
00752 Data->C[i] = C[m+j];
00753 }
00754 else
00755 {
00756 Data->Y[i] = 1.0;
00757 Data->C[i] = C[i];
00758 }
00759 temp1 = (1-Data->Y[i]*o[i]);
00760 F+= Data->C[i]*temp1*temp1;
00761 }
00762 j++;
00763 }
00764 else
00765 {
00766 labeled_indices[i]=1;
00767 Y[i]=Data->Y[i];
00768 C[i]=1.0/Data->l;
00769 Data->C[i]=1.0/Data->l;
00770 diff=1-Data->Y[i]*o[i];
00771 if(diff>0)
00772 {
00773 ActiveSubset->vec[active]=i;
00774 active++;
00775 F+=Data->C[i]*diff*diff;
00776 }
00777 else
00778 {
00779 ActiveSubset->vec[inactive]=i;
00780 inactive--;
00781 }
00782 }
00783 }
00784 F=0.5*F;
00785 ActiveSubset->d=active;
00786 int iter=0;
00787 int opt=0;
00788 int opt2=0;
00789 vector_double *Weights_bar = new vector_double[1];
00790 vector_double *Outputs_bar = new vector_double[1];
00791 double *w_bar = new double[n];
00792 double *o_bar = new double[m+u];
00793 Weights_bar->vec=w_bar;
00794 Outputs_bar->vec=o_bar;
00795 Weights_bar->d=n;
00796 Outputs_bar->d=m;
00797 double delta=0.0;
00798 double t=0.0;
00799 int ii = 0;
00800 while(iter<MFNITERMAX)
00801 {
00802 iter++;
00803 SG_SDEBUG("L2_SVM_MFN Iteration# %d (%d active examples, objective_value = %f)", iter, active, F);
00804 for(i=n; i-- ;)
00805 w_bar[i]=w[i];
00806
00807 for(i=m+u; i-- ;)
00808 o_bar[i]=o[i];
00809 opt=CGLS(Data,Options,ActiveSubset,Weights_bar,Outputs_bar);
00810 for(i=active; i < m; i++)
00811 {
00812 ii=ActiveSubset->vec[i];
00813 t=0.0;
00814 INT num_entries=0;
00815 bool free_vec=false;
00816
00817 TSparseEntry<DREAL>* vec=features->get_sparse_feature_vector(ii, num_entries, free_vec);
00818 for (j=0; j<num_entries; j++)
00819 t+=vec[j].entry*w_bar[vec[j].feat_index];
00820 features->free_sparse_feature_vector(vec, num_entries, free_vec);
00821 t+=Options->bias*w_bar[n-1];
00822
00823 o_bar[ii]=t;
00824 }
00825
00826 j=0;
00827 for(i=0; i<m;i++)
00828 {
00829 if(labeled_indices[i]==0)
00830 {o_bar[m+j]=o_bar[i]; j++;};
00831 }
00832 if(ini==0) {Options->cgitermax=CGITERMAX; ini=1;};
00833 opt2=1;
00834 for(i=0; i < m ;i++)
00835 {
00836 ii=ActiveSubset->vec[i];
00837 if(i<active)
00838 {
00839 if(labeled_indices[ii]==1)
00840 opt2=(opt2 && (Data->Y[ii]*o_bar[ii]<=1+epsilon));
00841 else
00842 {
00843 if(CMath::abs(o[ii])<1)
00844 opt2=(opt2 && (CMath::abs(o_bar[ii])<=1+epsilon));
00845 else
00846 opt2=(opt2 && (CMath::abs(o_bar[ii])>=1-epsilon));
00847 }
00848 }
00849 else
00850 opt2=(opt2 && (Data->Y[ii]*o_bar[ii]>=1-epsilon));
00851 if(opt2==0) break;
00852 }
00853 if(opt && opt2)
00854 {
00855 if(epsilon==BIG_EPSILON)
00856 {
00857 epsilon=EPSILON;
00858 Options->epsilon=EPSILON;
00859 SG_SDEBUG( "epsilon = %f case converged (speedup heuristic 2). Continuing with epsilon=%f\n", BIG_EPSILON, EPSILON);
00860 continue;
00861 }
00862 else
00863 {
00864 for(i=n; i-- ;)
00865 w[i]=w_bar[i];
00866 for(i=m; i-- ;)
00867 Outputs->vec[i]=o_bar[i];
00868 for(i=m; i-- ;)
00869 {
00870 if(labeled_indices[i]==0)
00871 Data->Y[i]=0.0;
00872 }
00873 delete[] ActiveSubset->vec;
00874 delete[] ActiveSubset;
00875 delete[] o_bar;
00876 delete[] w_bar;
00877 delete[] o;
00878 delete[] Weights_bar;
00879 delete[] Outputs_bar;
00880 delete[] Y;
00881 delete[] C;
00882 delete[] labeled_indices;
00883 SG_SINFO("L2_SVM_MFN converged in %d iteration(s)", iter);
00884 return 1;
00885 }
00886 }
00887
00888 delta=line_search(w,w_bar,lambda,o,o_bar,Y,C,n,m+u);
00889 SG_SDEBUG("LINE_SEARCH delta = %f", delta);
00890 F_old=F;
00891 F=0.0;
00892 for(i=0;i<n;i++) {w[i]+=delta*(w_bar[i]-w[i]); F+=w[i]*w[i];}
00893 F=lambda*F;
00894 j=0;
00895 active=0;
00896 inactive=m-1;
00897 for(i=0; i<m ; i++)
00898 {
00899 o[i]+=delta*(o_bar[i]-o[i]);
00900 if(labeled_indices[i]==0)
00901 {
00902 o[m+j]=o[i];
00903 ActiveSubset->vec[active]=i;
00904 active++;
00905 diff = 1 - CMath::abs(o[i]);
00906 if(diff>0)
00907 {
00908 Data->Y[i] = 2*p[j]-1;
00909 Data->C[i] = lambda_u_by_u;
00910 temp1 = (1 - o[i]);
00911 temp2 = (1 + o[i]);
00912 F+=lambda_u_by_u*(p[j]*temp1*temp1 + (1-p[j])*temp2*temp2);
00913 }
00914 else
00915 {
00916 if(o[i]>0)
00917 {
00918 Data->Y[i] = -1;
00919 Data->C[i] = C[m+j];
00920 }
00921 else
00922 {
00923 Data->Y[i] = +1;
00924 Data->C[i] = C[i];
00925 }
00926 temp1=(1-Data->Y[i]*o[i]);
00927 F+= Data->C[i]*temp1*temp1;
00928 }
00929 j++;
00930 }
00931 else
00932 {
00933 diff=1-Data->Y[i]*o[i];
00934 if(diff>0)
00935 {
00936 ActiveSubset->vec[active]=i;
00937 active++;
00938 F+=Data->C[i]*diff*diff;
00939 }
00940 else
00941 {
00942 ActiveSubset->vec[inactive]=i;
00943 inactive--;
00944 }
00945 }
00946 }
00947 F=0.5*F;
00948 ActiveSubset->d=active;
00949 if(CMath::abs(F-F_old)<EPSILON)
00950 break;
00951 }
00952 for(i=m; i-- ;)
00953 {
00954 Outputs->vec[i]=o[i];
00955 if(labeled_indices[i]==0)
00956 Data->Y[i]=0.0;
00957 }
00958 delete[] ActiveSubset->vec;
00959 delete[] labeled_indices;
00960 delete[] ActiveSubset;
00961 delete[] o_bar;
00962 delete[] w_bar;
00963 delete[] o;
00964 delete[] Weights_bar;
00965 delete[] Outputs_bar;
00966 delete[] Y;
00967 delete[] C;
00968 SG_SINFO("L2_SVM_MFN converged in %d iterations", iter);
00969 return 0;
00970 }
00971 void optimize_p(const double* g, int u, double T, double r, double* p)
00972 {
00973 int iter=0;
00974 double epsilon=1e-10;
00975 int maxiter=500;
00976 double nu_minus=g[0];
00977 double nu_plus=g[0];
00978 for(int i=0;i<u;i++)
00979 {
00980 if(g[i]<nu_minus) nu_minus=g[i];
00981 if(g[i]>nu_plus) nu_plus=g[i];
00982 };
00983
00984 double b=T*log((1-r)/r);
00985 nu_minus-=b;
00986 nu_plus-=b;
00987 double nu=(nu_plus+nu_minus)/2;
00988 double Bnu=0.0;
00989 double BnuPrime=0.0;
00990 double s=0.0;
00991 double tmp=0.0;
00992 for(int i=0;i<u;i++)
00993 {
00994 s=exp((g[i]-nu)/T);
00995 if(!(isinf(s)))
00996 {
00997 tmp=1.0/(1.0+s);
00998 Bnu+=tmp;
00999 BnuPrime+=s*tmp*tmp;
01000 }
01001 }
01002 Bnu=Bnu/u;
01003 Bnu-=r;
01004 BnuPrime=BnuPrime/(T*u);
01005 double nuHat=0.0;
01006 while((CMath::abs(Bnu)>epsilon) && (iter < maxiter))
01007 {
01008 iter++;
01009 if(CMath::abs(BnuPrime)>0.0)
01010 nuHat=nu-Bnu/BnuPrime;
01011 if((CMath::abs(BnuPrime) > 0.0) | (nuHat>nu_plus) | (nuHat < nu_minus))
01012 nu=(nu_minus+nu_plus)/2.0;
01013 else
01014 nu=nuHat;
01015 Bnu=0.0;
01016 BnuPrime=0.0;
01017 for(int i=0;i<u;i++)
01018 {
01019 s=exp((g[i]-nu)/T);
01020 if(!(isinf(s)))
01021 {
01022 tmp=1.0/(1.0+s);
01023 Bnu+=tmp;
01024 BnuPrime+=s*tmp*tmp;
01025 }
01026 }
01027 Bnu=Bnu/u;
01028 Bnu-=r;
01029 BnuPrime=BnuPrime/(T*u);
01030 if(Bnu<0)
01031 nu_minus=nu;
01032 else
01033 nu_plus=nu;
01034 if(CMath::abs(nu_minus-nu_plus)<epsilon)
01035 break;
01036 }
01037 if(CMath::abs(Bnu)>epsilon)
01038 SG_SWARNING("Warning (Root): root not found to required precision\n");
01039
01040 for(int i=0;i<u;i++)
01041 {
01042 s=exp((g[i]-nu)/T);
01043 if(isinf(s)) p[i]=0.0;
01044 else p[i]=1.0/(1.0+s);
01045 }
01046 SG_SINFO(" root (nu) = %f B(nu) = %f", nu, Bnu);
01047 }
01048 double transductive_cost(double normWeights,double *Y, double *Outputs, int m, double lambda,double lambda_u)
01049 {
01050 double F1=0.0,F2=0.0, o=0.0, y=0.0;
01051 int u=0,l=0;
01052 for(int i=0;i<m;i++)
01053 {
01054 o=Outputs[i];
01055 y=Y[i];
01056 if(y==0.0)
01057 {F1 += CMath::abs(o) > 1 ? 0 : (1 - CMath::abs(o))*(1 - CMath::abs(o)); u++;}
01058 else
01059 {F2 += y*o > 1 ? 0 : (1-y*o)*(1-y*o); l++;}
01060 }
01061 double F;
01062 F = 0.5*(lambda*normWeights + lambda_u*F1/u + F2/l);
01063 return F;
01064 }
01065
01066 double entropy(const double *p, int u)
01067 {
01068 double h=0.0;
01069 double q=0.0;
01070 for(int i=0;i<u;i++)
01071 {
01072 q=p[i];
01073 if(q>0 && q<1)
01074 h+= -(q*CMath::log2(q) + (1-q)*CMath::log2(1-q));
01075 }
01076 return h/u;
01077 }
01078 double KL(const double *p, const double *q, int u)
01079 {
01080 double h=0.0;
01081 double p1=0.0;
01082 double q1=0.0;
01083 double g=0.0;
01084 for(int i=0;i<u;i++)
01085 {
01086 p1=p[i];
01087 q1=q[i];
01088 if(p1>1-1e-8) p1-=1e-8;
01089 if(p1<1-1e-8) p1+=1e-8;
01090 if(q1>1-1e-8) q1-=1e-8;
01091 if(q1<1-1e-8) q1+=1e-8;
01092 g= (p1*CMath::log2(p1/q1) + (1-p1)*CMath::log2((1-p1)/(1-q1)));
01093 if(CMath::abs(g)<1e-12 || isnan(g)) g=0.0;
01094 h+=g;
01095 }
01096 return h/u;
01097 }
01098
01099 double norm_square(const vector_double *A)
01100 {
01101 double x=0.0, t=0.0;
01102 for(int i=0;i<A->d;i++)
01103 {
01104 t=A->vec[i];
01105 x+=t*t;
01106 }
01107 return x;
01108 }
01109 void initialize(struct vector_double *A, int k, double a)
01110 {
01111 double *vec = new double[k];
01112 for(int i=0;i<k;i++)
01113 vec[i]=a;
01114 A->vec = vec;
01115 A->d = k;
01116 return;
01117 }
01118 void initialize(struct vector_int *A, int k)
01119 {
01120 int *vec = new int[k];
01121 for(int i=0;i<k;i++)
01122 vec[i]=i;
01123 A->vec = vec;
01124 A->d = k;
01125 return;
01126 }
01127 void GetLabeledData(struct data *D, const struct data *Data)
01128 {
01129
01130
01131
01132
01133
01134
01135
01136
01137
01138
01139
01140
01141
01142
01143
01144
01145
01146
01147
01148
01149
01150
01151
01152
01153
01154
01155
01156
01157
01158
01159
01160
01161
01162
01163
01164
01165
01166
01167
01168 }