LPBoost.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2007-2008 Soeren Sonnenburg
00008  * Copyright (C) 2007-2008 Fraunhofer Institute FIRST and Max-Planck-Society
00009  */
00010 
00011 #include "lib/config.h"
00012 
00013 #ifdef USE_CPLEX
00014 
00015 #include "classifier/LPBoost.h"
00016 #include "features/Labels.h"
00017 #include "lib/Mathematics.h"
00018 #include "lib/Cplex.h"
00019 #include "lib/DynamicArray.h"
00020 #include "lib/Signal.h"
00021 #include "lib/Time.h"
00022 
00023 CLPBoost::CLPBoost()
00024 : CSparseLinearClassifier(), C1(1), C2(1), use_bias(true), epsilon(1e-3)
00025 {
00026     u=NULL;
00027     dim=NULL;
00028     num_sfeat=0;
00029     num_svec=0;
00030     sfeat=NULL;
00031 }
00032 
00033 
00034 CLPBoost::~CLPBoost()
00035 {
00036     cleanup();
00037 }
00038 
00039 bool CLPBoost::init(int32_t num_vec)
00040 {
00041     u=new float64_t[num_vec];
00042     for (int32_t i=0; i<num_vec; i++)
00043         u[i]=1.0/num_vec;
00044 
00045     dim=new CDynamicArray<int32_t>(100000);
00046 
00047     sfeat= features->get_transposed(num_sfeat, num_svec);
00048 
00049     if (sfeat)
00050         return true;
00051     else
00052         return false;
00053 }
00054 
00055 void CLPBoost::cleanup()
00056 {
00057     delete[] u;
00058     u=NULL;
00059 
00060     features->clean_tsparse(sfeat, num_svec);
00061     sfeat=NULL;
00062 
00063     delete dim;
00064     dim=NULL;
00065 }
00066 
00067 float64_t CLPBoost::find_max_violator(int32_t& max_dim)
00068 {
00069     float64_t max_val=0;
00070     max_dim=-1;
00071 
00072     for (int32_t i=0; i<num_svec; i++)
00073     {
00074         float64_t valplus=0;
00075         float64_t valminus=0;
00076 
00077         for (int32_t j=0; j<sfeat[i].num_feat_entries; j++)
00078         {
00079             int32_t idx=sfeat[i].features[j].feat_index;
00080             float64_t v=u[idx]*labels->get_label(idx)*sfeat[i].features[j].entry;
00081             valplus+=v;
00082             valminus-=v;
00083         }
00084 
00085         if (valplus>max_val || max_dim==-1)
00086         {
00087             max_dim=i;
00088             max_val=valplus;
00089         }
00090 
00091         if (valminus>max_val)
00092         {
00093             max_dim=num_svec+i;
00094             max_val=valminus;
00095         }
00096     }
00097 
00098     dim->append_element(max_dim);
00099     return max_val;
00100 }
00101 
00102 bool CLPBoost::train()
00103 {
00104     ASSERT(labels);
00105     ASSERT(features);
00106     int32_t num_train_labels=labels->get_num_labels();
00107     int32_t num_feat=features->get_num_features();
00108     int32_t num_vec=features->get_num_vectors();
00109 
00110     ASSERT(num_vec==num_train_labels);
00111     delete[] w;
00112     w=new float64_t[num_feat];
00113     memset(w,0,sizeof(float64_t)*num_feat);
00114     w_dim=num_feat;
00115 
00116     CCplex solver;
00117     solver.init(E_LINEAR);
00118     SG_PRINT("setting up lpboost\n");
00119     solver.setup_lpboost(C1, num_vec);
00120     SG_PRINT("finished setting up lpboost\n");
00121 
00122     float64_t result=init(num_vec);
00123     ASSERT(result);
00124 
00125     int32_t num_hypothesis=0;
00126     CTime time;
00127 
00128     while (!(CSignal::cancel_computations()))
00129     {
00130         int32_t max_dim=0;
00131         float64_t violator=find_max_violator(max_dim);
00132         SG_PRINT("iteration:%06d violator: %10.17f (>1.0) chosen: %d\n", num_hypothesis, violator, max_dim);
00133         if (violator <= 1.0+epsilon && num_hypothesis>1) //no constraint violated
00134         {
00135             SG_PRINT("converged after %d iterations!\n", num_hypothesis);
00136             break;
00137         }
00138 
00139         float64_t factor=+1.0;
00140         if (max_dim>=num_svec)
00141         {
00142             factor=-1.0;
00143             max_dim-=num_svec;
00144         }
00145 
00146         TSparseEntry<float64_t>* h=sfeat[max_dim].features;
00147         int32_t len=sfeat[max_dim].num_feat_entries;
00148         solver.add_lpboost_constraint(factor, h, len, num_vec, labels);
00149         solver.optimize(u);
00150         //CMath::display_vector(u, num_vec, "u");
00151         num_hypothesis++;
00152 
00153         if (get_max_train_time()>0 && time.cur_time_diff()>get_max_train_time())
00154             break;
00155     }
00156     float64_t* lambda=new float64_t[num_hypothesis];
00157     solver.optimize(u, lambda);
00158 
00159     //CMath::display_vector(lambda, num_hypothesis, "lambda");
00160     for (int32_t i=0; i<num_hypothesis; i++)
00161     {
00162         int32_t d=dim->get_element(i);
00163         if (d>=num_svec)
00164             w[d-num_svec]+=lambda[i];
00165         else
00166             w[d]-=lambda[i];
00167 
00168     }
00169     //solver.write_problem("problem.lp");
00170     solver.cleanup();
00171 
00172     cleanup();
00173     
00174     return true;
00175 }
00176 #endif

SHOGUN Machine Learning Toolbox - Documentation