WDSVMOcas.h

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2007-2008 Vojtech Franc
00008  * Written (W) 2007-2008 Soeren Sonnenburg
00009  * Copyright (C) 2007-2008 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #ifndef _WDSVMOCAS_H___
00013 #define _WDSVMOCAS_H___
00014 
00015 #include "lib/common.h"
00016 #include "classifier/Classifier.h"
00017 #include "classifier/svm/SVMOcas.h"
00018 #include "features/StringFeatures.h"
00019 #include "features/Labels.h"
00020 
00022 class CWDSVMOcas : public CClassifier
00023 {
00024     public:
00029         CWDSVMOcas(E_SVM_TYPE type);
00030 
00039         CWDSVMOcas(DREAL C, INT d, INT from_d, CStringFeatures<BYTE>* traindat, CLabels* trainlab);
00040         virtual ~CWDSVMOcas();
00041 
00046         virtual inline EClassifierType get_classifier_type() { return CT_WDSVMOCAS; }
00047 
00052         virtual bool train();
00053 
00059         inline void set_C(DREAL c1, DREAL c2) { C1=c1; C2=c2; }
00060 
00065         inline DREAL get_C1() { return C1; }
00066 
00071         inline DREAL get_C2() { return C2; }
00072 
00077         inline void set_epsilon(DREAL eps) { epsilon=eps; }
00078 
00083         inline DREAL get_epsilon() { return epsilon; }
00084 
00089         inline void set_features(CStringFeatures<BYTE>* feat) { features=feat; }
00090 
00095         inline CStringFeatures<BYTE>* get_features() { return features; }
00096 
00101         inline void set_bias_enabled(bool enable_bias) { use_bias=enable_bias; }
00102 
00107         inline bool get_bias_enabled() { return use_bias; }
00108 
00113         inline void set_bufsize(INT sz) { bufsize=sz; }
00114 
00119         inline INT get_bufsize() { return bufsize; }
00120 
00126         inline void set_degree(INT d, INT from_d) { degree=d; from_degree=from_d;}
00127 
00132         inline INT get_degree() { return degree; }
00133 
00139         CLabels* classify(CLabels* output);
00140 
00146         inline virtual DREAL classify_example(INT num)
00147         {
00148             ASSERT(features);
00149             if (!wd_weights)
00150                 set_wd_weights();
00151 
00152             INT len=0;
00153             DREAL sum=0;
00154             BYTE* vec=features->get_feature_vector(num, len);
00155             ASSERT(len==string_length);
00156 
00157             for (INT j=0; j<string_length; j++)
00158             {
00159                 INT offs=w_dim_single_char*j;
00160                 INT val=0;
00161                 for (INT k=0; (j+k<string_length) && (k<degree); k++)
00162                 {
00163                     val=val*alphabet_size + vec[j+k];
00164                     sum+=wd_weights[k] * w[offs+val];
00165                     offs+=w_offsets[k];
00166                 }
00167             }
00168             return sum/normalization_const;
00169         }
00170 
00172         inline void set_normalization_const()
00173         {
00174             ASSERT(features);
00175             normalization_const=0;
00176             for (INT i=0; i<degree; i++)
00177                 normalization_const+=(string_length-i)*wd_weights[i]*wd_weights[i];
00178 
00179             normalization_const=CMath::sqrt(normalization_const);
00180             SG_DEBUG("normalization_const:%f\n", normalization_const);
00181         }
00182 
00187         inline DREAL get_normalization_const() { return normalization_const; }
00188 
00189 
00190     protected:
00195         INT set_wd_weights();
00196 
00205         static void compute_W( double *sq_norm_W, double *dp_WoldW, double *alpha, uint32_t nSel, void* ptr );
00206 
00213         static double update_W(double t, void* ptr );
00214 
00220         static void* add_new_cut_helper(void* ptr);
00221 
00230         static void add_new_cut( double *new_col_H, uint32_t *new_cut, uint32_t cut_length, uint32_t nSel, void* ptr );
00231 
00237         static void* compute_output_helper(void* ptr);
00238 
00244         static void compute_output( double *output, void* ptr );
00245 
00252         static void sort( double* vals, uint32_t* idx, uint32_t size);
00253 
00254 
00255     protected:
00257         CStringFeatures<BYTE>* features;
00259         bool use_bias;
00261         INT bufsize;
00263         DREAL C1;
00265         DREAL C2;
00267         DREAL epsilon;
00269         E_SVM_TYPE method;
00270 
00272         INT degree;
00274         INT from_degree;
00276         SHORTREAL* wd_weights;
00278         INT num_vec;
00280         INT string_length;
00282         INT alphabet_size;
00283 
00285         DREAL normalization_const;
00286 
00288         DREAL bias;
00290         INT* w_offsets;
00292         INT w_dim;
00294         INT w_dim_single_char;
00296         SHORTREAL* w;
00298         SHORTREAL* old_w;
00300         DREAL* tmp_a_buf;
00302         DREAL* lab;
00303 
00305         SHORTREAL** cuts;
00306 };
00307 #endif

SHOGUN Machine Learning Toolbox - Documentation