00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include <stdio.h>
00022 #include <stdlib.h>
00023 #include <ctype.h>
00024 #include <algorithm>
00025
00026 #include "lib/io.h"
00027 #include "lib/Mathematics.h"
00028 #include "features/SparseFeatures.h"
00029 #include "classifier/svm/ssl.h"
00030
00031 #define VERBOSE 1
00032
00033 void ssl_train(struct data *Data,
00034 struct options *Options,
00035 struct vector_double *Weights,
00036 struct vector_double *Outputs)
00037 {
00038
00039 initialize(Weights,Data->n,0.0);
00040 initialize(Outputs,Data->m,0.0);
00041 vector_int *Subset = new vector_int[1];
00042 initialize(Subset,Data->m);
00043
00044 int32_t optimality = 0;
00045 switch(Options->algo)
00046 {
00047 case -1:
00048 SG_SINFO("Regularized Least Squares Regression (CGLS)\n");
00049 optimality=CGLS(Data,Options,Subset,Weights,Outputs);
00050 break;
00051 case RLS:
00052 SG_SINFO("Regularized Least Squares Classification (CGLS)\n");
00053 optimality=CGLS(Data,Options,Subset,Weights,Outputs);
00054 break;
00055 case SVM:
00056 SG_SINFO("Modified Finite Newton L2-SVM (L2-SVM-MFN)\n");
00057 optimality=L2_SVM_MFN(Data,Options,Weights,Outputs,0);
00058 break;
00059 case TSVM:
00060 SG_SINFO("Transductive L2-SVM (TSVM)\n");
00061 optimality=TSVM_MFN(Data,Options,Weights,Outputs);
00062 break;
00063 case DA_SVM:
00064 SG_SINFO("Deterministic Annealing Semi-supervised L2-SVM (DAS3VM)\n");
00065 optimality=DA_S3VM(Data,Options,Weights,Outputs);
00066 break;
00067 default:
00068 SG_SERROR("Algorithm unspecified");
00069 }
00070
00071 delete[] Subset->vec;
00072 delete[] Subset;
00073 return;
00074 }
00075
00076 int32_t CGLS(
00077 const struct data *Data, const struct options *Options,
00078 const struct vector_int *Subset, struct vector_double *Weights,
00079 struct vector_double *Outputs)
00080 {
00081 SG_SDEBUG("CGLS starting...");
00082
00083
00084 int32_t active = Subset->d;
00085 int32_t *J = Subset->vec;
00086 CSparseFeatures<float64_t>* features=Data->features;
00087 float64_t *Y = Data->Y;
00088 float64_t *C = Data->C;
00089 int32_t n = Data->n;
00090 float64_t lambda = Options->lambda;
00091 int32_t cgitermax = Options->cgitermax;
00092 float64_t epsilon = Options->epsilon;
00093 float64_t *beta = Weights->vec;
00094 float64_t *o = Outputs->vec;
00095
00096 float64_t *z = new float64_t[active];
00097 float64_t *q = new float64_t[active];
00098 int32_t ii=0;
00099 for (int32_t i = active ; i-- ;){
00100 ii=J[i];
00101 z[i] = C[ii]*(Y[ii] - o[ii]);
00102 }
00103 float64_t *r = new float64_t[n];
00104 for (int32_t i = n ; i-- ;)
00105 r[i] = 0.0;
00106 for (register int32_t j=0; j < active; j++)
00107 {
00108 ii=J[j];
00109 int32_t num_entries=0;
00110 bool free_vec=false;
00111
00112 TSparseEntry<float64_t>* vec=features->get_sparse_feature_vector(ii, num_entries, free_vec);
00113 for (int32_t i=0; i<num_entries; i++)
00114 r[vec[i].feat_index]+= vec[i].entry*z[j];
00115 features->free_sparse_feature_vector(vec, num_entries, free_vec);
00116 r[n-1]+=Options->bias*z[j];
00117 }
00118 float64_t *p = new float64_t[n];
00119 float64_t omega1 = 0.0;
00120 for (int32_t i = n ; i-- ;)
00121 {
00122 r[i] -= lambda*beta[i];
00123 p[i] = r[i];
00124 omega1 += r[i]*r[i];
00125 }
00126 float64_t omega_p = omega1;
00127 float64_t omega_q = 0.0;
00128 float64_t inv_omega2 = 1/omega1;
00129 float64_t scale = 0.0;
00130 float64_t omega_z=0.0;
00131 float64_t gamma = 0.0;
00132 int32_t cgiter = 0;
00133 int32_t optimality = 0;
00134 float64_t epsilon2 = epsilon*epsilon;
00135
00136 while(cgiter < cgitermax)
00137 {
00138 cgiter++;
00139 omega_q=0.0;
00140 float64_t t=0.0;
00141 register int32_t i,j;
00142
00143 for (i=0; i < active; i++)
00144 {
00145 ii=J[i];
00146 t=0.0;
00147 int32_t num_entries=0;
00148 bool free_vec=false;
00149 TSparseEntry<float64_t>* vec=features->get_sparse_feature_vector(ii,
00150 num_entries, free_vec);
00151 for (j=0; j<num_entries; j++)
00152 t+=vec[j].entry*p[vec[j].feat_index];
00153 features->free_sparse_feature_vector(vec, num_entries, free_vec);
00154 t+=Options->bias*p[n-1];
00155 q[i]=t;
00156 omega_q += C[ii]*t*t;
00157 }
00158 gamma = omega1/(lambda*omega_p + omega_q);
00159 inv_omega2 = 1/omega1;
00160 for (i = n ; i-- ;)
00161 {
00162 r[i] = 0.0;
00163 beta[i] += gamma*p[i];
00164 }
00165 omega_z=0.0;
00166 for (i = active ; i-- ;)
00167 {
00168 ii=J[i];
00169 o[ii] += gamma*q[i];
00170 z[i] -= gamma*C[ii]*q[i];
00171 omega_z+=z[i]*z[i];
00172 }
00173 for (j=0; j < active; j++)
00174 {
00175 ii=J[j];
00176 t=z[j];
00177 int32_t num_entries=0;
00178 bool free_vec=false;
00179
00180 TSparseEntry<float64_t>* vec=features->get_sparse_feature_vector(ii, num_entries, free_vec);
00181 for (i=0; i<num_entries; i++)
00182 r[vec[i].feat_index]+= vec[i].entry*t;
00183 features->free_sparse_feature_vector(vec, num_entries, free_vec);
00184 r[n-1]+=Options->bias*t;
00185 }
00186 omega1 = 0.0;
00187 for (i = n ; i-- ;)
00188 {
00189 r[i] -= lambda*beta[i];
00190 omega1 += r[i]*r[i];
00191 }
00192 if(omega1 < epsilon2*omega_z)
00193 {
00194 optimality=1;
00195 break;
00196 }
00197 omega_p=0.0;
00198 scale=omega1*inv_omega2;
00199 for(i = n ; i-- ;)
00200 {
00201 p[i] = r[i] + p[i]*scale;
00202 omega_p += p[i]*p[i];
00203 }
00204 }
00205 SG_SDEBUG("...Done.");
00206 SG_SINFO("CGLS converged in %d iteration(s)", cgiter);
00207
00208 delete[] z;
00209 delete[] q;
00210 delete[] r;
00211 delete[] p;
00212 return optimality;
00213 }
00214
00215 int32_t L2_SVM_MFN(
00216 const struct data *Data, struct options *Options,
00217 struct vector_double *Weights, struct vector_double *Outputs,
00218 int32_t ini)
00219 {
00220
00221 CSparseFeatures<float64_t>* features=Data->features;
00222 float64_t *Y = Data->Y;
00223 float64_t *C = Data->C;
00224 int32_t n = Data->n;
00225 int32_t m = Data->m;
00226 float64_t lambda = Options->lambda;
00227 float64_t epsilon;
00228 float64_t *w = Weights->vec;
00229 float64_t *o = Outputs->vec;
00230 float64_t F_old = 0.0;
00231 float64_t F = 0.0;
00232 float64_t diff=0.0;
00233 vector_int *ActiveSubset = new vector_int[1];
00234 ActiveSubset->vec = new int32_t[m];
00235 ActiveSubset->d = m;
00236
00237 if(ini==0) {
00238 epsilon=BIG_EPSILON;
00239 Options->cgitermax=SMALL_CGITERMAX;
00240 Options->epsilon=BIG_EPSILON;
00241 }
00242 else {epsilon = Options->epsilon;}
00243 for (int32_t i=0;i<n;i++) F+=w[i]*w[i];
00244 F=0.5*lambda*F;
00245 int32_t active=0;
00246 int32_t inactive=m-1;
00247 for (int32_t i=0; i<m ; i++)
00248 {
00249 diff=1-Y[i]*o[i];
00250 if(diff>0)
00251 {
00252 ActiveSubset->vec[active]=i;
00253 active++;
00254 F+=0.5*C[i]*diff*diff;
00255 }
00256 else
00257 {
00258 ActiveSubset->vec[inactive]=i;
00259 inactive--;
00260 }
00261 }
00262 ActiveSubset->d=active;
00263 int32_t iter=0;
00264 int32_t opt=0;
00265 int32_t opt2=0;
00266 vector_double *Weights_bar = new vector_double[1];
00267 vector_double *Outputs_bar = new vector_double[1];
00268 float64_t *w_bar = new float64_t[n];
00269 float64_t *o_bar = new float64_t[m];
00270 Weights_bar->vec=w_bar;
00271 Outputs_bar->vec=o_bar;
00272 Weights_bar->d=n;
00273 Outputs_bar->d=m;
00274 float64_t delta=0.0;
00275 float64_t t=0.0;
00276 int32_t ii = 0;
00277 while(iter<MFNITERMAX)
00278 {
00279 iter++;
00280 SG_SDEBUG("L2_SVM_MFN Iteration# %d (%d active examples, objective_value = %f)\n", iter, active, F);
00281 for (int32_t i=n; i-- ;)
00282 w_bar[i]=w[i];
00283 for (int32_t i=m; i-- ;)
00284 o_bar[i]=o[i];
00285
00286 opt=CGLS(Data,Options,ActiveSubset,Weights_bar,Outputs_bar);
00287 for(register int32_t i=active; i < m; i++)
00288 {
00289 ii=ActiveSubset->vec[i];
00290 t=0.0;
00291
00292 int32_t num_entries=0;
00293 bool free_vec=false;
00294
00295 TSparseEntry<float64_t>* vec=features->get_sparse_feature_vector(ii, num_entries, free_vec);
00296 for (int32_t j=0; j<num_entries; j++)
00297 t+=vec[j].entry*w_bar[vec[j].feat_index];
00298 features->free_sparse_feature_vector(vec, num_entries, free_vec);
00299 t+=Options->bias*w_bar[n-1];
00300
00301 o_bar[ii]=t;
00302 }
00303 if(ini==0) {Options->cgitermax=CGITERMAX; ini=1;};
00304 opt2=1;
00305 for (int32_t i=0;i<m;i++)
00306 {
00307 ii=ActiveSubset->vec[i];
00308 if(i<active)
00309 opt2=(opt2 && (Y[ii]*o_bar[ii]<=1+epsilon));
00310 else
00311 opt2=(opt2 && (Y[ii]*o_bar[ii]>=1-epsilon));
00312 if(opt2==0) break;
00313 }
00314 if(opt && opt2)
00315 {
00316 if(epsilon==BIG_EPSILON)
00317 {
00318 epsilon=EPSILON;
00319 Options->epsilon=EPSILON;
00320 SG_SDEBUG("epsilon = %f case converged (speedup heuristic 2). Continuing with epsilon=%f", BIG_EPSILON , EPSILON);
00321 continue;
00322 }
00323 else
00324 {
00325 for (int32_t i=n; i-- ;)
00326 w[i]=w_bar[i];
00327 for (int32_t i=m; i-- ;)
00328 o[i]=o_bar[i];
00329 delete[] ActiveSubset->vec;
00330 delete[] ActiveSubset;
00331 delete[] o_bar;
00332 delete[] w_bar;
00333 delete[] Weights_bar;
00334 delete[] Outputs_bar;
00335 SG_SINFO("L2_SVM_MFN converged (optimality) in %d", iter);
00336 return 1;
00337 }
00338 }
00339 delta=line_search(w,w_bar,lambda,o,o_bar,Y,C,n,m);
00340 SG_SDEBUG("LINE_SEARCH delta = %f\n", delta);
00341 F_old=F;
00342 F=0.0;
00343 for (int32_t i=n; i-- ;) {
00344 w[i]+=delta*(w_bar[i]-w[i]);
00345 F+=w[i]*w[i];
00346 }
00347 F=0.5*lambda*F;
00348 active=0;
00349 inactive=m-1;
00350 for (int32_t i=0; i<m ; i++)
00351 {
00352 o[i]+=delta*(o_bar[i]-o[i]);
00353 diff=1-Y[i]*o[i];
00354 if(diff>0)
00355 {
00356 ActiveSubset->vec[active]=i;
00357 active++;
00358 F+=0.5*C[i]*diff*diff;
00359 }
00360 else
00361 {
00362 ActiveSubset->vec[inactive]=i;
00363 inactive--;
00364 }
00365 }
00366 ActiveSubset->d=active;
00367 if(CMath::abs(F-F_old)<RELATIVE_STOP_EPS*CMath::abs(F_old))
00368 {
00369 SG_SINFO("L2_SVM_MFN converged (rel. criterion) in %d iterations", iter);
00370 return 2;
00371 }
00372 }
00373 delete[] ActiveSubset->vec;
00374 delete[] ActiveSubset;
00375 delete[] o_bar;
00376 delete[] w_bar;
00377 delete[] Weights_bar;
00378 delete[] Outputs_bar;
00379 SG_SINFO("L2_SVM_MFN converged (max iter exceeded) in %d iterations", iter);
00380 return 0;
00381 }
00382
00383 float64_t line_search(float64_t *w,
00384 float64_t *w_bar,
00385 float64_t lambda,
00386 float64_t *o,
00387 float64_t *o_bar,
00388 float64_t *Y,
00389 float64_t *C,
00390 int32_t d,
00391 int32_t l)
00392 {
00393 float64_t omegaL = 0.0;
00394 float64_t omegaR = 0.0;
00395 float64_t diff=0.0;
00396 for(int32_t i=d; i--; )
00397 {
00398 diff=w_bar[i]-w[i];
00399 omegaL+=w[i]*diff;
00400 omegaR+=w_bar[i]*diff;
00401 }
00402 omegaL=lambda*omegaL;
00403 omegaR=lambda*omegaR;
00404 float64_t L=0.0;
00405 float64_t R=0.0;
00406 int32_t ii=0;
00407 for (int32_t i=0;i<l;i++)
00408 {
00409 if(Y[i]*o[i]<1)
00410 {
00411 diff=C[i]*(o_bar[i]-o[i]);
00412 L+=(o[i]-Y[i])*diff;
00413 R+=(o_bar[i]-Y[i])*diff;
00414 }
00415 }
00416 L+=omegaL;
00417 R+=omegaR;
00418 Delta* deltas=new Delta[l];
00419 int32_t p=0;
00420 for(int32_t i=0;i<l;i++)
00421 {
00422 diff=Y[i]*(o_bar[i]-o[i]);
00423
00424 if(Y[i]*o[i]<1)
00425 {
00426 if(diff>0)
00427 {
00428 deltas[p].delta=(1-Y[i]*o[i])/diff;
00429 deltas[p].index=i;
00430 deltas[p].s=-1;
00431 p++;
00432 }
00433 }
00434 else
00435 {
00436 if(diff<0)
00437 {
00438 deltas[p].delta=(1-Y[i]*o[i])/diff;
00439 deltas[p].index=i;
00440 deltas[p].s=1;
00441 p++;
00442 }
00443 }
00444 }
00445 std::sort(deltas,deltas+p);
00446 float64_t delta_prime=0.0;
00447 for (int32_t i=0;i<p;i++)
00448 {
00449 delta_prime = L + deltas[i].delta*(R-L);
00450 if(delta_prime>=0)
00451 break;
00452 ii=deltas[i].index;
00453 diff=(deltas[i].s)*C[ii]*(o_bar[ii]-o[ii]);
00454 L+=diff*(o[ii]-Y[ii]);
00455 R+=diff*(o_bar[ii]-Y[ii]);
00456 }
00457 delete [] deltas;
00458 return (-L/(R-L));
00459 }
00460
00461 int32_t TSVM_MFN(
00462 const struct data *Data, struct options *Options,
00463 struct vector_double *Weights, struct vector_double *Outputs)
00464 {
00465
00466 struct data *Data_Labeled = new data[1];
00467 struct vector_double *Outputs_Labeled = new vector_double[1];
00468 initialize(Outputs_Labeled,Data->l,0.0);
00469 SG_SDEBUG("Initializing weights, unknown labels");
00470 GetLabeledData(Data_Labeled,Data);
00471 L2_SVM_MFN(Data_Labeled, Options, Weights,Outputs_Labeled,0);
00473
00474
00475 int32_t p=0,q=0;
00476 float64_t t=0.0;
00477 int32_t *JU = new int32_t[Data->u];
00478 float64_t *ou = new float64_t[Data->u];
00479 float64_t lambda_0 = TSVM_LAMBDA_SMALL;
00480 for (int32_t i=0;i<Data->m;i++)
00481 {
00482 if(Data->Y[i]==0.0)
00483 {
00484 t=0.0;
00485 int32_t num_entries=0;
00486 bool free_vec=false;
00487
00488 TSparseEntry<float64_t>* vec=Data->features->get_sparse_feature_vector(i, num_entries, free_vec);
00489 for (int32_t j=0; j<num_entries; j++)
00490 t+=vec[j].entry*Weights->vec[vec[j].feat_index];
00491 Data->features->free_sparse_feature_vector(vec, num_entries, free_vec);
00492 t+=Options->bias*Weights->vec[Data->n-1];
00493
00494 Outputs->vec[i]=t;
00495 Data->C[i]=lambda_0*1.0/Data->u;
00496 JU[q]=i;
00497 ou[q]=t;
00498 q++;
00499 }
00500 else
00501 {
00502 Outputs->vec[i]=Outputs_Labeled->vec[p];
00503 Data->C[i]=1.0/Data->l;
00504 p++;
00505 }
00506 }
00507 std::nth_element(ou,ou+int32_t((1-Options->R)*Data->u-1),ou+Data->u);
00508 float64_t thresh=*(ou+int32_t((1-Options->R)*Data->u)-1);
00509 delete [] ou;
00510 for (int32_t i=0;i<Data->u;i++)
00511 {
00512 if(Outputs->vec[JU[i]]>thresh)
00513 Data->Y[JU[i]]=1.0;
00514 else
00515 Data->Y[JU[i]]=-1.0;
00516 }
00517 for (int32_t i=0;i<Data->n;i++)
00518 Weights->vec[i]=0.0;
00519 for (int32_t i=0;i<Data->m;i++)
00520 Outputs->vec[i]=0.0;
00521 L2_SVM_MFN(Data,Options,Weights,Outputs,0);
00522 int32_t num_switches=0;
00523 int32_t s=0;
00524 int32_t last_round=0;
00525 while(lambda_0 <= Options->lambda_u)
00526 {
00527 int32_t iter2=0;
00528 while(1){
00529 s=switch_labels(Data->Y,Outputs->vec,JU,Data->u,Options->S);
00530 if(s==0) break;
00531 iter2++;
00532 SG_SDEBUG("****** lambda_0 = %f iteration = %d ************************************\n", lambda_0, iter2);
00533 SG_SDEBUG("Optimizing unknown labels. switched %d labels.\n");
00534 num_switches+=s;
00535 SG_SDEBUG("Optimizing weights\n");
00536 L2_SVM_MFN(Data,Options,Weights,Outputs,1);
00537 }
00538 if(last_round==1) break;
00539 lambda_0=TSVM_ANNEALING_RATE*lambda_0;
00540 if(lambda_0 >= Options->lambda_u) {lambda_0 = Options->lambda_u; last_round=1;}
00541 for (int32_t i=0;i<Data->u;i++)
00542 Data->C[JU[i]]=lambda_0*1.0/Data->u;
00543 SG_SDEBUG("****** lambda0 increased to %f%% of lambda_u = %f ************************\n", lambda_0*100/Options->lambda_u, Options->lambda_u);
00544 SG_SDEBUG("Optimizing weights\n");
00545 L2_SVM_MFN(Data,Options,Weights,Outputs,1);
00546 }
00547 SG_SDEBUG("Total Number of Switches = %d\n", num_switches);
00548
00549 for (int32_t i=0;i<Data->u;i++) Data->Y[JU[i]] = 0.0;
00550 float64_t F = transductive_cost(norm_square(Weights),Data->Y,Outputs->vec,Outputs->d,Options->lambda,Options->lambda_u);
00551 SG_SDEBUG("Objective Value = %f\n",F);
00552 delete [] JU;
00553 return num_switches;
00554 }
00555
00556 int32_t switch_labels(float64_t* Y, float64_t* o, int32_t* JU, int32_t u, int32_t S)
00557 {
00558 int32_t npos=0;
00559 int32_t nneg=0;
00560 for (int32_t i=0;i<u;i++)
00561 {
00562 if((Y[JU[i]]>0) && (o[JU[i]]<1.0)) npos++;
00563 if((Y[JU[i]]<0) && (-o[JU[i]]<1.0)) nneg++;
00564 }
00565 Delta* positive=new Delta[npos];
00566 Delta* negative=new Delta[nneg];
00567 int32_t p=0;
00568 int32_t n=0;
00569 int32_t ii=0;
00570 for (int32_t i=0;i<u;i++)
00571 {
00572 ii=JU[i];
00573 if((Y[ii]>0.0) && (o[ii]<1.0)) {
00574 positive[p].delta=o[ii];
00575 positive[p].index=ii;
00576 positive[p].s=0;
00577 p++;};
00578 if((Y[ii]<0.0) && (-o[ii]<1.0))
00579 {
00580 negative[n].delta=-o[ii];
00581 negative[n].index=ii;
00582 negative[n].s=0;
00583 n++;};
00584 }
00585 std::sort(positive,positive+npos);
00586 std::sort(negative,negative+nneg);
00587 int32_t s=-1;
00588 while(1)
00589 {
00590 s++;
00591 if((s>=S) || (positive[s].delta>=-negative[s].delta) || (s>=npos) || (s>=nneg))
00592 break;
00593 Y[positive[s].index]=-1.0;
00594 Y[negative[s].index]= 1.0;
00595 }
00596 delete [] positive;
00597 delete [] negative;
00598 return s;
00599 }
00600
00601 int32_t DA_S3VM(
00602 struct data *Data, struct options *Options, struct vector_double *Weights,
00603 struct vector_double *Outputs)
00604 {
00605 float64_t T = DA_INIT_TEMP*Options->lambda_u;
00606 int32_t iter1 = 0, iter2 =0;
00607 float64_t *p = new float64_t[Data->u];
00608 float64_t *q = new float64_t[Data->u];
00609 float64_t *g = new float64_t[Data->u];
00610 float64_t F,F_min;
00611 float64_t *w_min = new float64_t[Data->n];
00612 float64_t *o_min = new float64_t[Data->m];
00613 float64_t *w = Weights->vec;
00614 float64_t *o = Outputs->vec;
00615 float64_t kl_divergence = 1.0;
00616
00617 SG_SDEBUG("Initializing weights, p");
00618 for (int32_t i=0;i<Data->u; i++)
00619 p[i] = Options->R;
00620
00621 int32_t *JU = new int32_t[Data->u];
00622 int32_t j=0;
00623 for(int32_t i=0;i<Data->m;i++)
00624 {
00625 if(Data->Y[i]==0.0)
00626 {JU[j]=i;j++;}
00627 }
00628 float64_t H = entropy(p,Data->u);
00629 optimize_w(Data,p,Options,Weights,Outputs,0);
00630 F = transductive_cost(norm_square(Weights),Data->Y,Outputs->vec,Outputs->d,Options->lambda,Options->lambda_u);
00631 F_min = F;
00632 for (int32_t i=0;i<Weights->d;i++)
00633 w_min[i]=w[i];
00634 for (int32_t i=0;i<Outputs->d;i++)
00635 o_min[i]=o[i];
00636 while((iter1 < DA_OUTER_ITERMAX) && (H > Options->epsilon))
00637 {
00638 iter1++;
00639 iter2=0;
00640 kl_divergence=1.0;
00641 while((iter2 < DA_INNER_ITERMAX) && (kl_divergence > Options->epsilon))
00642 {
00643 iter2++;
00644 for (int32_t i=0;i<Data->u;i++)
00645 {
00646 q[i]=p[i];
00647 g[i] = Options->lambda_u*((o[JU[i]] > 1 ? 0 : (1 - o[JU[i]])*(1 - o[JU[i]])) - (o[JU[i]]< -1 ? 0 : (1 + o[JU[i]])*(1 + o[JU[i]])));
00648 }
00649 SG_SDEBUG("Optimizing p.\n");
00650 optimize_p(g,Data->u,T,Options->R,p);
00651 kl_divergence=KL(p,q,Data->u);
00652 SG_SDEBUG("Optimizing weights\n");
00653 optimize_w(Data,p,Options,Weights,Outputs,1);
00654 F = transductive_cost(norm_square(Weights),Data->Y,Outputs->vec,Outputs->d,Options->lambda,Options->lambda_u);
00655 if(F < F_min)
00656 {
00657 F_min = F;
00658 for (int32_t i=0;i<Weights->d;i++)
00659 w_min[i]=w[i];
00660 for (int32_t i=0;i<Outputs->d;i++)
00661 o_min[i]=o[i];
00662 }
00663 SG_SDEBUG("***** outer_iter = %d T = %g inner_iter = %d kl = %g cost = %g *****\n",iter1,T,iter2,kl_divergence,F);
00664 }
00665 H = entropy(p,Data->u);
00666 SG_SDEBUG("***** Finished outer_iter = %d T = %g Entropy = %g ***\n", iter1,T,H);
00667 T = T/DA_ANNEALING_RATE;
00668 }
00669 for (int32_t i=0;i<Weights->d;i++)
00670 w[i]=w_min[i];
00671 for (int32_t i=0;i<Outputs->d;i++)
00672 o[i]=o_min[i];
00673
00674 delete [] p;
00675 delete [] q;
00676 delete [] g;
00677 delete [] JU;
00678 delete [] w_min;
00679 delete [] o_min;
00680 SG_SINFO("(min) Objective Value = %f", F_min);
00681 return 1;
00682 }
00683
00684 int32_t optimize_w(
00685 const struct data *Data, const float64_t *p, struct options *Options,
00686 struct vector_double *Weights, struct vector_double *Outputs, int32_t ini)
00687 {
00688 int32_t i,j;
00689 CSparseFeatures<float64_t>* features=Data->features;
00690 int32_t n = Data->n;
00691 int32_t m = Data->m;
00692 int32_t u = Data->u;
00693 float64_t lambda = Options->lambda;
00694 float64_t epsilon;
00695 float64_t *w = Weights->vec;
00696 float64_t *o = new float64_t[m+u];
00697 float64_t *Y = new float64_t[m+u];
00698 float64_t *C = new float64_t[m+u];
00699 int32_t *labeled_indices = new int32_t[m];
00700 float64_t F_old = 0.0;
00701 float64_t F = 0.0;
00702 float64_t diff=0.0;
00703 float64_t lambda_u_by_u = Options->lambda_u/u;
00704 vector_int *ActiveSubset = new vector_int[1];
00705 ActiveSubset->vec = new int32_t[m];
00706 ActiveSubset->d = m;
00707
00708 if(ini==0)
00709 {
00710 epsilon=BIG_EPSILON;
00711 Options->cgitermax=SMALL_CGITERMAX;
00712 Options->epsilon=BIG_EPSILON;}
00713 else {epsilon = Options->epsilon;}
00714
00715 for(i=0;i<n;i++) F+=w[i]*w[i];
00716 F=lambda*F;
00717 int32_t active=0;
00718 int32_t inactive=m-1;
00719 float64_t temp1;
00720 float64_t temp2;
00721
00722 j = 0;
00723 for(i=0; i<m ; i++)
00724 {
00725 o[i]=Outputs->vec[i];
00726 if(Data->Y[i]==0.0)
00727 {
00728 labeled_indices[i]=0;
00729 o[m+j]=o[i];
00730 Y[i]=1;
00731 Y[m+j]=-1;
00732 C[i]=lambda_u_by_u*p[j];
00733 C[m+j]=lambda_u_by_u*(1-p[j]);
00734 ActiveSubset->vec[active]=i;
00735 active++;
00736 diff = 1 - CMath::abs(o[i]);
00737 if(diff>0)
00738 {
00739 Data->Y[i] = 2*p[j]-1;
00740 Data->C[i] = lambda_u_by_u;
00741 temp1 = (1 - o[i]);
00742 temp2 = (1 + o[i]);
00743 F+=lambda_u_by_u*(p[j]*temp1*temp1 + (1-p[j])*temp2*temp2);
00744 }
00745 else
00746 {
00747 if(o[i]>0)
00748 {
00749 Data->Y[i] = -1.0;
00750 Data->C[i] = C[m+j];
00751 }
00752 else
00753 {
00754 Data->Y[i] = 1.0;
00755 Data->C[i] = C[i];
00756 }
00757 temp1 = (1-Data->Y[i]*o[i]);
00758 F+= Data->C[i]*temp1*temp1;
00759 }
00760 j++;
00761 }
00762 else
00763 {
00764 labeled_indices[i]=1;
00765 Y[i]=Data->Y[i];
00766 C[i]=1.0/Data->l;
00767 Data->C[i]=1.0/Data->l;
00768 diff=1-Data->Y[i]*o[i];
00769 if(diff>0)
00770 {
00771 ActiveSubset->vec[active]=i;
00772 active++;
00773 F+=Data->C[i]*diff*diff;
00774 }
00775 else
00776 {
00777 ActiveSubset->vec[inactive]=i;
00778 inactive--;
00779 }
00780 }
00781 }
00782 F=0.5*F;
00783 ActiveSubset->d=active;
00784 int32_t iter=0;
00785 int32_t opt=0;
00786 int32_t opt2=0;
00787 vector_double *Weights_bar = new vector_double[1];
00788 vector_double *Outputs_bar = new vector_double[1];
00789 float64_t *w_bar = new float64_t[n];
00790 float64_t *o_bar = new float64_t[m+u];
00791 Weights_bar->vec=w_bar;
00792 Outputs_bar->vec=o_bar;
00793 Weights_bar->d=n;
00794 Outputs_bar->d=m;
00795 float64_t delta=0.0;
00796 float64_t t=0.0;
00797 int32_t ii = 0;
00798 while(iter<MFNITERMAX)
00799 {
00800 iter++;
00801 SG_SDEBUG("L2_SVM_MFN Iteration# %d (%d active examples, objective_value = %f)", iter, active, F);
00802 for(i=n; i-- ;)
00803 w_bar[i]=w[i];
00804
00805 for(i=m+u; i-- ;)
00806 o_bar[i]=o[i];
00807 opt=CGLS(Data,Options,ActiveSubset,Weights_bar,Outputs_bar);
00808 for(i=active; i < m; i++)
00809 {
00810 ii=ActiveSubset->vec[i];
00811 t=0.0;
00812 int32_t num_entries=0;
00813 bool free_vec=false;
00814
00815 TSparseEntry<float64_t>* vec=features->get_sparse_feature_vector(ii, num_entries, free_vec);
00816 for (j=0; j<num_entries; j++)
00817 t+=vec[j].entry*w_bar[vec[j].feat_index];
00818 features->free_sparse_feature_vector(vec, num_entries, free_vec);
00819 t+=Options->bias*w_bar[n-1];
00820
00821 o_bar[ii]=t;
00822 }
00823
00824 j=0;
00825 for(i=0; i<m;i++)
00826 {
00827 if(labeled_indices[i]==0)
00828 {o_bar[m+j]=o_bar[i]; j++;};
00829 }
00830 if(ini==0) {Options->cgitermax=CGITERMAX; ini=1;};
00831 opt2=1;
00832 for(i=0; i < m ;i++)
00833 {
00834 ii=ActiveSubset->vec[i];
00835 if(i<active)
00836 {
00837 if(labeled_indices[ii]==1)
00838 opt2=(opt2 && (Data->Y[ii]*o_bar[ii]<=1+epsilon));
00839 else
00840 {
00841 if(CMath::abs(o[ii])<1)
00842 opt2=(opt2 && (CMath::abs(o_bar[ii])<=1+epsilon));
00843 else
00844 opt2=(opt2 && (CMath::abs(o_bar[ii])>=1-epsilon));
00845 }
00846 }
00847 else
00848 opt2=(opt2 && (Data->Y[ii]*o_bar[ii]>=1-epsilon));
00849 if(opt2==0) break;
00850 }
00851 if(opt && opt2)
00852 {
00853 if(epsilon==BIG_EPSILON)
00854 {
00855 epsilon=EPSILON;
00856 Options->epsilon=EPSILON;
00857 SG_SDEBUG( "epsilon = %f case converged (speedup heuristic 2). Continuing with epsilon=%f\n", BIG_EPSILON, EPSILON);
00858 continue;
00859 }
00860 else
00861 {
00862 for(i=n; i-- ;)
00863 w[i]=w_bar[i];
00864 for(i=m; i-- ;)
00865 Outputs->vec[i]=o_bar[i];
00866 for(i=m; i-- ;)
00867 {
00868 if(labeled_indices[i]==0)
00869 Data->Y[i]=0.0;
00870 }
00871 delete[] ActiveSubset->vec;
00872 delete[] ActiveSubset;
00873 delete[] o_bar;
00874 delete[] w_bar;
00875 delete[] o;
00876 delete[] Weights_bar;
00877 delete[] Outputs_bar;
00878 delete[] Y;
00879 delete[] C;
00880 delete[] labeled_indices;
00881 SG_SINFO("L2_SVM_MFN converged in %d iteration(s)", iter);
00882 return 1;
00883 }
00884 }
00885
00886 delta=line_search(w,w_bar,lambda,o,o_bar,Y,C,n,m+u);
00887 SG_SDEBUG("LINE_SEARCH delta = %f", delta);
00888 F_old=F;
00889 F=0.0;
00890 for(i=0;i<n;i++) {w[i]+=delta*(w_bar[i]-w[i]); F+=w[i]*w[i];}
00891 F=lambda*F;
00892 j=0;
00893 active=0;
00894 inactive=m-1;
00895 for(i=0; i<m ; i++)
00896 {
00897 o[i]+=delta*(o_bar[i]-o[i]);
00898 if(labeled_indices[i]==0)
00899 {
00900 o[m+j]=o[i];
00901 ActiveSubset->vec[active]=i;
00902 active++;
00903 diff = 1 - CMath::abs(o[i]);
00904 if(diff>0)
00905 {
00906 Data->Y[i] = 2*p[j]-1;
00907 Data->C[i] = lambda_u_by_u;
00908 temp1 = (1 - o[i]);
00909 temp2 = (1 + o[i]);
00910 F+=lambda_u_by_u*(p[j]*temp1*temp1 + (1-p[j])*temp2*temp2);
00911 }
00912 else
00913 {
00914 if(o[i]>0)
00915 {
00916 Data->Y[i] = -1;
00917 Data->C[i] = C[m+j];
00918 }
00919 else
00920 {
00921 Data->Y[i] = +1;
00922 Data->C[i] = C[i];
00923 }
00924 temp1=(1-Data->Y[i]*o[i]);
00925 F+= Data->C[i]*temp1*temp1;
00926 }
00927 j++;
00928 }
00929 else
00930 {
00931 diff=1-Data->Y[i]*o[i];
00932 if(diff>0)
00933 {
00934 ActiveSubset->vec[active]=i;
00935 active++;
00936 F+=Data->C[i]*diff*diff;
00937 }
00938 else
00939 {
00940 ActiveSubset->vec[inactive]=i;
00941 inactive--;
00942 }
00943 }
00944 }
00945 F=0.5*F;
00946 ActiveSubset->d=active;
00947 if(CMath::abs(F-F_old)<EPSILON)
00948 break;
00949 }
00950 for(i=m; i-- ;)
00951 {
00952 Outputs->vec[i]=o[i];
00953 if(labeled_indices[i]==0)
00954 Data->Y[i]=0.0;
00955 }
00956 delete[] ActiveSubset->vec;
00957 delete[] labeled_indices;
00958 delete[] ActiveSubset;
00959 delete[] o_bar;
00960 delete[] w_bar;
00961 delete[] o;
00962 delete[] Weights_bar;
00963 delete[] Outputs_bar;
00964 delete[] Y;
00965 delete[] C;
00966 SG_SINFO("L2_SVM_MFN converged in %d iterations", iter);
00967 return 0;
00968 }
00969
00970 void optimize_p(
00971 const float64_t* g, int32_t u, float64_t T, float64_t r, float64_t* p)
00972 {
00973 int32_t iter=0;
00974 float64_t epsilon=1e-10;
00975 int32_t maxiter=500;
00976 float64_t nu_minus=g[0];
00977 float64_t nu_plus=g[0];
00978 for (int32_t i=0;i<u;i++)
00979 {
00980 if(g[i]<nu_minus) nu_minus=g[i];
00981 if(g[i]>nu_plus) nu_plus=g[i];
00982 };
00983
00984 float64_t b=T*log((1-r)/r);
00985 nu_minus-=b;
00986 nu_plus-=b;
00987 float64_t nu=(nu_plus+nu_minus)/2;
00988 float64_t Bnu=0.0;
00989 float64_t BnuPrime=0.0;
00990 float64_t s=0.0;
00991 float64_t tmp=0.0;
00992 for (int32_t i=0;i<u;i++)
00993 {
00994 s=exp((g[i]-nu)/T);
00995 if(!(isinf(s)))
00996 {
00997 tmp=1.0/(1.0+s);
00998 Bnu+=tmp;
00999 BnuPrime+=s*tmp*tmp;
01000 }
01001 }
01002 Bnu=Bnu/u;
01003 Bnu-=r;
01004 BnuPrime=BnuPrime/(T*u);
01005 float64_t nuHat=0.0;
01006 while((CMath::abs(Bnu)>epsilon) && (iter < maxiter))
01007 {
01008 iter++;
01009 if(CMath::abs(BnuPrime)>0.0)
01010 nuHat=nu-Bnu/BnuPrime;
01011 if((CMath::abs(BnuPrime) > 0.0) | (nuHat>nu_plus) | (nuHat < nu_minus))
01012 nu=(nu_minus+nu_plus)/2.0;
01013 else
01014 nu=nuHat;
01015 Bnu=0.0;
01016 BnuPrime=0.0;
01017 for(int32_t i=0;i<u;i++)
01018 {
01019 s=exp((g[i]-nu)/T);
01020 if(!(isinf(s)))
01021 {
01022 tmp=1.0/(1.0+s);
01023 Bnu+=tmp;
01024 BnuPrime+=s*tmp*tmp;
01025 }
01026 }
01027 Bnu=Bnu/u;
01028 Bnu-=r;
01029 BnuPrime=BnuPrime/(T*u);
01030 if(Bnu<0)
01031 nu_minus=nu;
01032 else
01033 nu_plus=nu;
01034 if(CMath::abs(nu_minus-nu_plus)<epsilon)
01035 break;
01036 }
01037 if(CMath::abs(Bnu)>epsilon)
01038 SG_SWARNING("Warning (Root): root not found to required precision\n");
01039
01040 for (int32_t i=0;i<u;i++)
01041 {
01042 s=exp((g[i]-nu)/T);
01043 if(isinf(s)) p[i]=0.0;
01044 else p[i]=1.0/(1.0+s);
01045 }
01046 SG_SINFO(" root (nu) = %f B(nu) = %f", nu, Bnu);
01047 }
01048
01049 float64_t transductive_cost(
01050 float64_t normWeights, float64_t *Y, float64_t *Outputs, int32_t m,
01051 float64_t lambda, float64_t lambda_u)
01052 {
01053 float64_t F1=0.0,F2=0.0, o=0.0, y=0.0;
01054 int32_t u=0,l=0;
01055 for (int32_t i=0;i<m;i++)
01056 {
01057 o=Outputs[i];
01058 y=Y[i];
01059 if(y==0.0)
01060 {F1 += CMath::abs(o) > 1 ? 0 : (1 - CMath::abs(o))*(1 - CMath::abs(o)); u++;}
01061 else
01062 {F2 += y*o > 1 ? 0 : (1-y*o)*(1-y*o); l++;}
01063 }
01064 float64_t F;
01065 F = 0.5*(lambda*normWeights + lambda_u*F1/u + F2/l);
01066 return F;
01067 }
01068
01069 float64_t entropy(const float64_t *p, int32_t u)
01070 {
01071 float64_t h=0.0;
01072 float64_t q=0.0;
01073 for (int32_t i=0;i<u;i++)
01074 {
01075 q=p[i];
01076 if(q>0 && q<1)
01077 h+= -(q*CMath::log2(q) + (1-q)*CMath::log2(1-q));
01078 }
01079 return h/u;
01080 }
01081
01082 float64_t KL(const float64_t *p, const float64_t *q, int32_t u)
01083 {
01084 float64_t h=0.0;
01085 float64_t p1=0.0;
01086 float64_t q1=0.0;
01087 float64_t g=0.0;
01088 for (int32_t i=0;i<u;i++)
01089 {
01090 p1=p[i];
01091 q1=q[i];
01092 if(p1>1-1e-8) p1-=1e-8;
01093 if(p1<1-1e-8) p1+=1e-8;
01094 if(q1>1-1e-8) q1-=1e-8;
01095 if(q1<1-1e-8) q1+=1e-8;
01096 g= (p1*CMath::log2(p1/q1) + (1-p1)*CMath::log2((1-p1)/(1-q1)));
01097 if(CMath::abs(g)<1e-12 || isnan(g)) g=0.0;
01098 h+=g;
01099 }
01100 return h/u;
01101 }
01102
01103
01104 float64_t norm_square(const vector_double *A)
01105 {
01106 float64_t x=0.0, t=0.0;
01107 for(int32_t i=0;i<A->d;i++)
01108 {
01109 t=A->vec[i];
01110 x+=t*t;
01111 }
01112 return x;
01113 }
01114
01115 void initialize(struct vector_double *A, int32_t k, float64_t a)
01116 {
01117 float64_t *vec = new float64_t[k];
01118 for (int32_t i=0;i<k;i++)
01119 vec[i]=a;
01120 A->vec = vec;
01121 A->d = k;
01122 return;
01123 }
01124
01125 void initialize(struct vector_int *A, int32_t k)
01126 {
01127 int32_t *vec = new int32_t[k];
01128 for(int32_t i=0;i<k;i++)
01129 vec[i]=i;
01130 A->vec = vec;
01131 A->d = k;
01132 return;
01133 }
01134
01135 void GetLabeledData(struct data *D, const struct data *Data)
01136 {
01137
01138
01139
01140
01141
01142
01143
01144
01145
01146
01147
01148
01149
01150
01151
01152
01153
01154
01155
01156
01157
01158
01159
01160
01161
01162
01163
01164
01165
01166
01167
01168
01169
01170
01171
01172
01173
01174
01175
01176 }