00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include "features/Labels.h"
00013 #include "lib/Mathematics.h"
00014 #include "lib/Time.h"
00015 #include "base/Parallel.h"
00016 #include "classifier/SparseLinearClassifier.h"
00017 #include "classifier/svm/SVMOcas.h"
00018 #include "classifier/svm/libocas.h"
00019 #include "features/SparseFeatures.h"
00020 #include "features/Labels.h"
00021
00022 CSVMOcas::CSVMOcas(E_SVM_TYPE type)
00023 : CSparseLinearClassifier(), use_bias(false), bufsize(3000), C1(1), C2(1),
00024 epsilon(1e-3), method(type)
00025 {
00026 w=NULL;
00027 old_w=NULL;
00028 }
00029
00030 CSVMOcas::CSVMOcas(DREAL C, CSparseFeatures<DREAL>* traindat, CLabels* trainlab)
00031 : CSparseLinearClassifier(), use_bias(false), bufsize(3000), C1(C), C2(C),
00032 epsilon(1e-3)
00033 {
00034 w=NULL;
00035 old_w=NULL;
00036 method=SVM_OCAS;
00037 set_features(traindat);
00038 set_labels(trainlab);
00039 }
00040
00041
00042 CSVMOcas::~CSVMOcas()
00043 {
00044 }
00045
00046 bool CSVMOcas::train()
00047 {
00048 SG_INFO("C=%f, epsilon=%f, bufsize=%d\n", get_C1(), get_epsilon(), bufsize);
00049
00050 ASSERT(labels);
00051 ASSERT(get_features());
00052 ASSERT(labels->is_two_class_labeling());
00053
00054 INT num_train_labels=0;
00055 lab=labels->get_labels(num_train_labels);
00056 w_dim=features->get_num_features();
00057 INT num_vec=features->get_num_vectors();
00058
00059 ASSERT(num_vec==num_train_labels);
00060 ASSERT(num_vec>0);
00061
00062 delete[] w;
00063 w=new DREAL[w_dim];
00064 memset(w, 0, w_dim*sizeof(DREAL));
00065
00066 delete[] old_w;
00067 old_w=new DREAL[w_dim];
00068 memset(old_w, 0, w_dim*sizeof(DREAL));
00069 bias=0;
00070
00071 tmp_a_buf=new DREAL[w_dim];
00072 cp_value=new DREAL*[bufsize];
00073 cp_index=new uint32_t*[bufsize];
00074 cp_nz_dims=new uint32_t[bufsize];
00075
00076 double TolAbs=0;
00077 double QPBound=0;
00078 int Method=0;
00079 if (method == SVM_OCAS)
00080 Method = 1;
00081 ocas_return_value_T result = svm_ocas_solver( get_C1(), num_vec, get_epsilon(),
00082 TolAbs, QPBound, bufsize, Method,
00083 &CSVMOcas::compute_W,
00084 &CSVMOcas::update_W,
00085 &CSVMOcas::add_new_cut,
00086 &CSVMOcas::compute_output,
00087 &CSVMOcas::sort,
00088 this);
00089
00090 SG_INFO("Ocas Converged after %d iterations\n"
00091 "==================================\n"
00092 "timing statistics:\n"
00093 "output_time: %f s\n"
00094 "sort_time: %f s\n"
00095 "add_time: %f s\n"
00096 "w_time: %f s\n"
00097 "solver_time %f s\n"
00098 "ocas_time %f s\n\n", result.nIter, result.output_time, result.sort_time,
00099 result.add_time, result.w_time, result.solver_time, result.ocas_time);
00100
00101 delete[] tmp_a_buf;
00102
00103 uint32_t num_cut_planes = result.nCutPlanes;
00104
00105 for (uint32_t i=0; i<num_cut_planes; i++)
00106 {
00107 delete[] cp_value[i];
00108 delete[] cp_index[i];
00109 }
00110
00111 delete[] cp_value;
00112 cp_value=NULL;
00113 delete[] cp_index;
00114 cp_index=NULL;
00115 delete[] cp_nz_dims;
00116 cp_nz_dims=NULL;
00117
00118 delete[] lab;
00119 lab=NULL;
00120
00121 delete[] old_w;
00122 old_w=NULL;
00123
00124 return true;
00125 }
00126
00127
00128
00129
00130
00131
00132
00133
00134 double CSVMOcas::update_W( double t, void* ptr )
00135 {
00136 double sq_norm_W = 0;
00137 CSVMOcas* o = (CSVMOcas*) ptr;
00138 uint32_t nDim = (uint32_t) o->w_dim;
00139 double* W=o->w;
00140 double* oldW=o->old_w;
00141
00142 for(uint32_t j=0; j <nDim; j++)
00143 {
00144 W[j] = oldW[j]*(1-t) + t*W[j];
00145 sq_norm_W += W[j]*W[j];
00146 }
00147
00148 return( sq_norm_W );
00149 }
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159 void CSVMOcas::add_new_cut( double *new_col_H,
00160 uint32_t *new_cut,
00161 uint32_t cut_length,
00162 uint32_t nSel,
00163 void* ptr)
00164 {
00165 CSVMOcas* o = (CSVMOcas*) ptr;
00166 CSparseFeatures<DREAL>* f = o->get_features();
00167 uint32_t nDim=(uint32_t) o->w_dim;
00168 DREAL* y = o->lab;
00169
00170 DREAL** c_val = o->cp_value;
00171 uint32_t** c_idx = o->cp_index;
00172 uint32_t* c_nzd = o->cp_nz_dims;
00173
00174 double sq_norm_a;
00175 uint32_t i, j, nz_dims;
00176
00177
00178 double* new_a = o->tmp_a_buf;
00179 memset(new_a, 0, sizeof(double)*nDim);
00180
00181 for(i=0; i < cut_length; i++)
00182 f->add_to_dense_vec(y[new_cut[i]], new_cut[i], new_a, nDim);
00183
00184
00185 nz_dims = 0;
00186 sq_norm_a = 0;
00187 for(j=0; j < nDim; j++ ) {
00188 if(new_a[j] != 0) {
00189 nz_dims++;
00190 sq_norm_a += new_a[j]*new_a[j];
00191 }
00192 }
00193
00194
00195 c_nzd[nSel] = nz_dims;
00196 if(nz_dims > 0)
00197 {
00198 c_idx[nSel]=new uint32_t[nz_dims];
00199 c_val[nSel]=new double[nz_dims];
00200
00201 uint32_t idx=0;
00202 for(j=0; j < nDim; j++ )
00203 {
00204 if(new_a[j] != 0)
00205 {
00206 c_idx[nSel][idx] = j;
00207 c_val[nSel][idx++] = new_a[j];
00208 }
00209 }
00210 }
00211
00212 new_col_H[nSel] = sq_norm_a;
00213
00214 for(i=0; i < nSel; i++)
00215 {
00216 double tmp = 0;
00217 for(j=0; j < c_nzd[i]; j++)
00218 tmp += new_a[c_idx[i][j]]*c_val[i][j];
00219
00220 new_col_H[i] = tmp;
00221 }
00222
00223
00224
00225 }
00226
00227 void CSVMOcas::sort( double* vals, uint32_t* idx, uint32_t size)
00228 {
00229 CMath::qsort_index(vals, idx, size);
00230 }
00231
00232
00233
00234
00235
00236
00237 void CSVMOcas::compute_output( double *output, void* ptr )
00238 {
00239 CSVMOcas* o = (CSVMOcas*) ptr;
00240 CSparseFeatures<DREAL>* f=o->get_features();
00241 INT nData=f->get_num_vectors();
00242
00243 DREAL* y = o->lab;
00244 f->dense_dot_range(output, 0, nData, y, o->w, o->w_dim, 0.0);
00245
00246
00247 }
00248
00249
00250
00251
00252
00253
00254
00255
00256
00257
00258 void CSVMOcas::compute_W( double *sq_norm_W, double *dp_WoldW, double *alpha, uint32_t nSel, void* ptr )
00259 {
00260 CSVMOcas* o = (CSVMOcas*) ptr;
00261 uint32_t nDim= (uint32_t) o->w_dim;
00262 CMath::swap(o->w, o->old_w);
00263 double* W=o->w;
00264 double* oldW=o->old_w;
00265 memset(W, 0, sizeof(double)*nDim);
00266
00267 DREAL** c_val = o->cp_value;
00268 uint32_t** c_idx = o->cp_index;
00269 uint32_t* c_nzd = o->cp_nz_dims;
00270
00271 for(uint32_t i=0; i<nSel; i++)
00272 {
00273 uint32_t nz_dims = c_nzd[i];
00274
00275 if(nz_dims > 0 && alpha[i] > 0)
00276 {
00277 for(uint32_t j=0; j < nz_dims; j++)
00278 W[c_idx[i][j]] += alpha[i]*c_val[i][j];
00279 }
00280 }
00281
00282 *sq_norm_W = CMath::dot(W,W, nDim);
00283 *dp_WoldW = CMath::dot(W,oldW, nDim);;
00284
00285 }