WDSVMOcas.h
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #ifndef _WDSVMOCAS_H___
00013 #define _WDSVMOCAS_H___
00014
00015 #include "lib/common.h"
00016 #include "classifier/Classifier.h"
00017 #include "classifier/svm/SVMOcas.h"
00018 #include "features/StringFeatures.h"
00019 #include "features/Labels.h"
00020
00022 class CWDSVMOcas : public CClassifier
00023 {
00024 public:
00029 CWDSVMOcas(E_SVM_TYPE type);
00030
00039 CWDSVMOcas(
00040 float64_t C, int32_t d, int32_t from_d,
00041 CStringFeatures<uint8_t>* traindat, CLabels* trainlab);
00042 virtual ~CWDSVMOcas();
00043
00048 virtual inline EClassifierType get_classifier_type() { return CT_WDSVMOCAS; }
00049
00054 virtual bool train();
00055
00061 inline void set_C(float64_t c1, float64_t c2) { C1=c1; C2=c2; }
00062
00067 inline float64_t get_C1() { return C1; }
00068
00073 inline float64_t get_C2() { return C2; }
00074
00079 inline void set_epsilon(float64_t eps) { epsilon=eps; }
00080
00085 inline float64_t get_epsilon() { return epsilon; }
00086
00091 inline void set_features(CStringFeatures<uint8_t>* feat) { features=feat; }
00092
00097 inline CStringFeatures<uint8_t>* get_features() { return features; }
00098
00103 inline void set_bias_enabled(bool enable_bias) { use_bias=enable_bias; }
00104
00109 inline bool get_bias_enabled() { return use_bias; }
00110
00115 inline void set_bufsize(int32_t sz) { bufsize=sz; }
00116
00121 inline int32_t get_bufsize() { return bufsize; }
00122
00128 inline void set_degree(int32_t d, int32_t from_d)
00129 {
00130 degree=d;
00131 from_degree=from_d;
00132 }
00133
00138 inline int32_t get_degree() { return degree; }
00139
00145 CLabels* classify(CLabels* output=NULL);
00146
00152 inline virtual float64_t classify_example(int32_t num)
00153 {
00154 ASSERT(features);
00155 if (!wd_weights)
00156 set_wd_weights();
00157
00158 int32_t len=0;
00159 float64_t sum=0;
00160 uint8_t* vec=features->get_feature_vector(num, len);
00161 SG_INFO("len %d, string_length %d\n", len, string_length);
00162 ASSERT(len==string_length);
00163
00164 for (int32_t j=0; j<string_length; j++)
00165 {
00166 int32_t offs=w_dim_single_char*j;
00167 int32_t val=0;
00168 for (int32_t k=0; (j+k<string_length) && (k<degree); k++)
00169 {
00170 val=val*alphabet_size + vec[j+k];
00171 sum+=wd_weights[k] * w[offs+val];
00172 offs+=w_offsets[k];
00173 }
00174 }
00175 return sum/normalization_const;
00176 }
00177
00179 inline void set_normalization_const()
00180 {
00181 ASSERT(features);
00182 normalization_const=0;
00183 for (int32_t i=0; i<degree; i++)
00184 normalization_const+=(string_length-i)*wd_weights[i]*wd_weights[i];
00185
00186 normalization_const=CMath::sqrt(normalization_const);
00187 SG_DEBUG("normalization_const:%f\n", normalization_const);
00188 }
00189
00194 inline float64_t get_normalization_const() { return normalization_const; }
00195
00196
00197 protected:
00202 int32_t set_wd_weights();
00203
00212 static void compute_W(
00213 float64_t *sq_norm_W, float64_t *dp_WoldW, float64_t *alpha,
00214 uint32_t nSel, void* ptr );
00215
00222 static float64_t update_W(float64_t t, void* ptr );
00223
00229 static void* add_new_cut_helper(void* ptr);
00230
00239 static void add_new_cut(
00240 float64_t *new_col_H, uint32_t *new_cut, uint32_t cut_length,
00241 uint32_t nSel, void* ptr );
00242
00248 static void* compute_output_helper(void* ptr);
00249
00255 static void compute_output( float64_t *output, void* ptr );
00256
00263 static void sort( float64_t* vals, uint32_t* idx, uint32_t size);
00264
00265
00266 protected:
00268 CStringFeatures<uint8_t>* features;
00270 bool use_bias;
00272 int32_t bufsize;
00274 float64_t C1;
00276 float64_t C2;
00278 float64_t epsilon;
00280 E_SVM_TYPE method;
00281
00283 int32_t degree;
00285 int32_t from_degree;
00287 float32_t* wd_weights;
00289 int32_t num_vec;
00291 int32_t string_length;
00293 int32_t alphabet_size;
00294
00296 float64_t normalization_const;
00297
00299 float64_t bias;
00301 int32_t* w_offsets;
00303 int32_t w_dim;
00305 int32_t w_dim_single_char;
00307 float32_t* w;
00309 float32_t* old_w;
00311 float64_t* tmp_a_buf;
00313 float64_t* lab;
00314
00316 float32_t** cuts;
00317 };
00318 #endif