00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #ifndef _KERNEL_H___
00013 #define _KERNEL_H___
00014
00015 #include "lib/common.h"
00016 #include "base/SGObject.h"
00017 #include "features/Features.h"
00018 #include "kernel/KernelNormalizer.h"
00019
00020
00021 enum EOptimizationType
00022 {
00023 FASTBUTMEMHUNGRY,
00024 SLOWBUTMEMEFFICIENT
00025 };
00026
00027 enum EKernelType
00028 {
00029 K_UNKNOWN = 0,
00030 K_LINEAR = 10,
00031 K_SPARSELINEAR = 11,
00032 K_POLY = 20,
00033 K_GAUSSIAN = 30,
00034 K_SPARSEGAUSSIAN = 31,
00035 K_GAUSSIANSHIFT = 32,
00036 K_HISTOGRAM = 40,
00037 K_SALZBERG = 41,
00038 K_LOCALITYIMPROVED = 50,
00039 K_SIMPLELOCALITYIMPROVED = 60,
00040 K_FIXEDDEGREE = 70,
00041 K_WEIGHTEDDEGREE = 80,
00042 K_WEIGHTEDDEGREEPOS = 81,
00043 K_WEIGHTEDCOMMWORDSTRING = 90,
00044 K_POLYMATCH = 100,
00045 K_ALIGNMENT = 110,
00046 K_COMMWORDSTRING = 120,
00047 K_COMMULONGSTRING = 121,
00048 K_COMBINED = 140,
00049 K_AUC = 150,
00050 K_CUSTOM = 160,
00051 K_SIGMOID = 170,
00052 K_CHI2 = 180,
00053 K_DIAG = 190,
00054 K_CONST = 200,
00055 K_MINDYGRAM = 210,
00056 K_DISTANCE = 220,
00057 K_LOCALALIGNMENT = 230,
00058 K_PYRAMIDCHI2 = 240,
00059 K_OLIGO = 250,
00060 K_MATCHWORD = 260
00061 };
00062
00063 enum EKernelProperty
00064 {
00065 KP_NONE = 0,
00066 KP_LINADD = 1,
00067 KP_KERNCOMBINATION = 2,
00068 KP_BATCHEVALUATION = 4
00069 };
00070
00071
00072 class CSVM;
00073
00097 class CKernel : public CSGObject
00098 {
00099 friend class CSqrtDiagKernelNormalizer;
00100 friend class CAvgDiagKernelNormalizer;
00101 friend class CFirstElementKernelNormalizer;
00102
00103 public:
00108 CKernel(int32_t size);
00109
00116 CKernel(CFeatures* l, CFeatures* r, int32_t size);
00117
00118 virtual ~CKernel();
00119
00127 inline float64_t kernel(int32_t idx_a, int32_t idx_b)
00128 {
00129 if (idx_a < 0 || idx_b <0)
00130 return 0;
00131
00132 ASSERT(lhs);
00133 ASSERT(rhs);
00134
00135 if (lhs==rhs)
00136 {
00137 int32_t num_vectors = lhs->get_num_vectors();
00138
00139 if (idx_a>=num_vectors)
00140 idx_a=2*num_vectors-1-idx_a;
00141
00142 if (idx_b>=num_vectors)
00143 idx_b=2*num_vectors-1-idx_b;
00144 }
00145
00146 return normalizer->normalize(compute(idx_a, idx_b), idx_a, idx_b);
00147 }
00148
00155 void get_kernel_matrix(float64_t** dst, int32_t* m, int32_t* n);
00156
00164 virtual float64_t* get_kernel_matrix_real(
00165 int32_t &m, int32_t &n, float64_t* target);
00166
00174 virtual float32_t* get_kernel_matrix_shortreal(
00175 int32_t &m, int32_t &n, float32_t* target);
00176
00187 virtual bool init(CFeatures* lhs, CFeatures* rhs);
00188
00193 virtual bool set_normalizer(CKernelNormalizer* normalizer);
00194
00199 virtual CKernelNormalizer* get_normalizer();
00200
00204 virtual bool init_normalizer();
00205
00212 virtual void cleanup();
00213
00219 bool load(char* fname);
00220
00226 bool save(char* fname);
00227
00235 virtual bool load_init(FILE* src)=0;
00236
00244 virtual bool save_init(FILE* dest)=0;
00245
00250 inline CFeatures* get_lhs() { SG_REF(lhs); return lhs; }
00251
00256 inline CFeatures* get_rhs() { SG_REF(rhs); return rhs; }
00257
00262 inline int32_t get_num_vec_lhs()
00263 {
00264 if (!lhs)
00265 return 0;
00266 else
00267 return lhs->get_num_vectors();
00268 }
00269
00274 inline int32_t get_num_vec_rhs()
00275 {
00276 if (!rhs)
00277 return 0;
00278 else
00279 return rhs->get_num_vectors();
00280 }
00281
00286 inline bool has_features()
00287 {
00288 return lhs && rhs;
00289 }
00290
00295 inline bool lhs_equals_rhs()
00296 {
00297 return lhs==rhs;
00298 }
00299
00301 virtual void remove_lhs_and_rhs();
00302
00304 virtual void remove_lhs();
00305
00307 virtual void remove_rhs();
00308
00316 virtual EKernelType get_kernel_type()=0 ;
00317
00324 virtual EFeatureType get_feature_type()=0;
00325
00332 virtual EFeatureClass get_feature_class()=0;
00333
00338 virtual const char* get_name()=0 ;
00339
00344 inline void set_cache_size(int32_t size)
00345 {
00346 cache_size = size;
00347
00348 }
00349
00354 inline int32_t get_cache_size() { return cache_size; }
00355
00356
00357
00359 void list_kernel();
00360
00366 inline bool has_property(EKernelProperty p) { return (properties & p) != 0; }
00367
00371 virtual void clear_normal();
00372
00378 virtual void add_to_normal(int32_t vector_idx, float64_t weight);
00379
00384 inline EOptimizationType get_optimization_type() { return opt_type; }
00385
00390 virtual inline void set_optimization_type(EOptimizationType t) { opt_type=t;}
00391
00396 inline bool get_is_initialized() { return optimization_initialized; }
00397
00405 virtual bool init_optimization(
00406 int32_t count, int32_t *IDX, float64_t *weights);
00407
00412 virtual bool delete_optimization();
00413
00419 bool init_optimization_svm(CSVM * svm) ;
00420
00426 virtual float64_t compute_optimized(int32_t vector_idx);
00427
00436 virtual void compute_batch(
00437 int32_t num_vec, int32_t* vec_idx, float64_t* target,
00438 int32_t num_suppvec, int32_t* IDX, float64_t* alphas,
00439 float64_t factor=1.0);
00440
00445 inline float64_t get_combined_kernel_weight() { return combined_kernel_weight; }
00446
00451 inline void set_combined_kernel_weight(float64_t nw) { combined_kernel_weight=nw; }
00452
00457 virtual int32_t get_num_subkernels();
00458
00464 virtual void compute_by_subkernel(
00465 int32_t vector_idx, float64_t * subkernel_contrib);
00466
00472 virtual const float64_t* get_subkernel_weights(int32_t& num_weights);
00473
00479 virtual void set_subkernel_weights(
00480 float64_t* weights, int32_t num_weights);
00481
00482 protected:
00487 inline void set_property(EKernelProperty p)
00488 {
00489 properties |= p;
00490 }
00491
00496 inline void unset_property(EKernelProperty p)
00497 {
00498 properties &= (properties | p) ^ p;
00499 }
00500
00505 inline void set_is_initialized(bool p_init) { optimization_initialized=p_init; }
00506
00517 virtual float64_t compute(int32_t x, int32_t y)=0;
00518
00519
00521
00522
00523 protected:
00525 int32_t cache_size;
00526
00527
00528
00531 KERNELCACHE_ELEM* kernel_matrix;
00532
00534 CFeatures* lhs;
00536 CFeatures* rhs;
00537
00539 float64_t combined_kernel_weight;
00540
00542 bool optimization_initialized;
00546 EOptimizationType opt_type;
00547
00549 uint64_t properties;
00550
00553 CKernelNormalizer* normalizer;
00554 };
00555
00556 #endif