LPBoost.cpp
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include "lib/config.h"
00012
00013 #ifdef USE_CPLEX
00014
00015 #include "classifier/LPBoost.h"
00016 #include "features/Labels.h"
00017 #include "lib/Mathematics.h"
00018 #include "lib/Cplex.h"
00019 #include "lib/DynamicArray.h"
00020 #include "lib/Signal.h"
00021 #include "lib/Time.h"
00022
00023 CLPBoost::CLPBoost()
00024 : CSparseLinearClassifier(), C1(1), C2(1), use_bias(true), epsilon(1e-3)
00025 {
00026 u=NULL;
00027 dim=NULL;
00028 num_sfeat=0;
00029 num_svec=0;
00030 sfeat=NULL;
00031 }
00032
00033
00034 CLPBoost::~CLPBoost()
00035 {
00036 cleanup();
00037 }
00038
00039 bool CLPBoost::init(int32_t num_vec)
00040 {
00041 u=new float64_t[num_vec];
00042 for (int32_t i=0; i<num_vec; i++)
00043 u[i]=1.0/num_vec;
00044
00045 dim=new CDynamicArray<int32_t>(100000);
00046
00047 sfeat= features->get_transposed(num_sfeat, num_svec);
00048
00049 if (sfeat)
00050 return true;
00051 else
00052 return false;
00053 }
00054
00055 void CLPBoost::cleanup()
00056 {
00057 delete[] u;
00058 u=NULL;
00059
00060 features->clean_tsparse(sfeat, num_svec);
00061 sfeat=NULL;
00062
00063 delete dim;
00064 dim=NULL;
00065 }
00066
00067 float64_t CLPBoost::find_max_violator(int32_t& max_dim)
00068 {
00069 float64_t max_val=0;
00070 max_dim=-1;
00071
00072 for (int32_t i=0; i<num_svec; i++)
00073 {
00074 float64_t valplus=0;
00075 float64_t valminus=0;
00076
00077 for (int32_t j=0; j<sfeat[i].num_feat_entries; j++)
00078 {
00079 int32_t idx=sfeat[i].features[j].feat_index;
00080 float64_t v=u[idx]*labels->get_label(idx)*sfeat[i].features[j].entry;
00081 valplus+=v;
00082 valminus-=v;
00083 }
00084
00085 if (valplus>max_val || max_dim==-1)
00086 {
00087 max_dim=i;
00088 max_val=valplus;
00089 }
00090
00091 if (valminus>max_val)
00092 {
00093 max_dim=num_svec+i;
00094 max_val=valminus;
00095 }
00096 }
00097
00098 dim->append_element(max_dim);
00099 return max_val;
00100 }
00101
00102 bool CLPBoost::train()
00103 {
00104 ASSERT(labels);
00105 ASSERT(features);
00106 int32_t num_train_labels=labels->get_num_labels();
00107 int32_t num_feat=features->get_num_features();
00108 int32_t num_vec=features->get_num_vectors();
00109
00110 ASSERT(num_vec==num_train_labels);
00111 delete[] w;
00112 w=new float64_t[num_feat];
00113 memset(w,0,sizeof(float64_t)*num_feat);
00114 w_dim=num_feat;
00115
00116 CCplex solver;
00117 solver.init(E_LINEAR);
00118 SG_PRINT("setting up lpboost\n");
00119 solver.setup_lpboost(C1, num_vec);
00120 SG_PRINT("finished setting up lpboost\n");
00121
00122 float64_t result=init(num_vec);
00123 ASSERT(result);
00124
00125 int32_t num_hypothesis=0;
00126 CTime time;
00127
00128 while (!(CSignal::cancel_computations()))
00129 {
00130 int32_t max_dim=0;
00131 float64_t violator=find_max_violator(max_dim);
00132 SG_PRINT("iteration:%06d violator: %10.17f (>1.0) chosen: %d\n", num_hypothesis, violator, max_dim);
00133 if (violator <= 1.0+epsilon && num_hypothesis>1)
00134 {
00135 SG_PRINT("converged after %d iterations!\n", num_hypothesis);
00136 break;
00137 }
00138
00139 float64_t factor=+1.0;
00140 if (max_dim>=num_svec)
00141 {
00142 factor=-1.0;
00143 max_dim-=num_svec;
00144 }
00145
00146 TSparseEntry<float64_t>* h=sfeat[max_dim].features;
00147 int32_t len=sfeat[max_dim].num_feat_entries;
00148 solver.add_lpboost_constraint(factor, h, len, num_vec, labels);
00149 solver.optimize(u);
00150
00151 num_hypothesis++;
00152
00153 if (get_max_train_time()>0 && time.cur_time_diff()>get_max_train_time())
00154 break;
00155 }
00156 float64_t* lambda=new float64_t[num_hypothesis];
00157 solver.optimize(u, lambda);
00158
00159
00160 for (int32_t i=0; i<num_hypothesis; i++)
00161 {
00162 int32_t d=dim->get_element(i);
00163 if (d>=num_svec)
00164 w[d-num_svec]+=lambda[i];
00165 else
00166 w[d]-=lambda[i];
00167
00168 }
00169
00170 solver.cleanup();
00171
00172 cleanup();
00173
00174 return true;
00175 }
00176 #endif