Kernel.h

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2008 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Copyright (C) 1999-2008 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #ifndef _KERNEL_H___
00013 #define _KERNEL_H___
00014 
00015 #include "lib/common.h"
00016 #include "base/SGObject.h"
00017 #include "features/Features.h"
00018 #include "kernel/KernelNormalizer.h"
00019 
00020 
00021 enum EOptimizationType
00022 {
00023     FASTBUTMEMHUNGRY,
00024     SLOWBUTMEMEFFICIENT
00025 };
00026 
00027 enum EKernelType
00028 {
00029     K_UNKNOWN = 0,
00030     K_LINEAR = 10,
00031     K_SPARSELINEAR = 11,
00032     K_POLY = 20,
00033     K_GAUSSIAN = 30,
00034     K_SPARSEGAUSSIAN = 31,
00035     K_GAUSSIANSHIFT = 32,
00036     K_HISTOGRAM = 40,
00037     K_SALZBERG = 41,
00038     K_LOCALITYIMPROVED = 50,
00039     K_SIMPLELOCALITYIMPROVED = 60,
00040     K_FIXEDDEGREE = 70,
00041     K_WEIGHTEDDEGREE =    80,
00042     K_WEIGHTEDDEGREEPOS = 81,
00043     K_WEIGHTEDCOMMWORDSTRING = 90,
00044     K_POLYMATCH = 100,
00045     K_ALIGNMENT = 110,
00046     K_COMMWORDSTRING = 120,
00047     K_COMMULONGSTRING = 121,
00048     K_COMBINED = 140,
00049     K_AUC = 150,
00050     K_CUSTOM = 160,
00051     K_SIGMOID = 170,
00052     K_CHI2 = 180,
00053     K_DIAG = 190,
00054     K_CONST = 200,
00055     K_MINDYGRAM = 210,
00056     K_DISTANCE = 220,
00057     K_LOCALALIGNMENT = 230,
00058     K_PYRAMIDCHI2 = 240,
00059     K_OLIGO = 250,
00060     K_MATCHWORD = 260
00061 };
00062 
00063 enum EKernelProperty
00064 {
00065     KP_NONE = 0,
00066     KP_LINADD = 1,  // Kernels that can be optimized via doing normal updates w + dw
00067     KP_KERNCOMBINATION = 2, // Kernels that are infact a linear combination of subkernels K=\sum_i b_i*K_i
00068     KP_BATCHEVALUATION = 4  // Kernels that can on the fly generate normals in linadd and more quickly/memory efficient process batches instead of single examples
00069 };
00070 
00071 
00072 class CSVM;
00073 
00097 class CKernel : public CSGObject
00098 {
00099     friend class CSqrtDiagKernelNormalizer;
00100     friend class CAvgDiagKernelNormalizer;
00101     friend class CFirstElementKernelNormalizer;
00102 
00103     public:
00108         CKernel(int32_t size);
00109 
00116         CKernel(CFeatures* l, CFeatures* r, int32_t size);
00117 
00118         virtual ~CKernel();
00119 
00127         inline float64_t kernel(int32_t idx_a, int32_t idx_b)
00128         {
00129             if (idx_a < 0 || idx_b <0)
00130                 return 0;
00131 
00132             ASSERT(lhs);
00133             ASSERT(rhs);
00134 
00135             if (lhs==rhs)
00136             {
00137                 int32_t num_vectors = lhs->get_num_vectors();
00138 
00139                 if (idx_a>=num_vectors)
00140                     idx_a=2*num_vectors-1-idx_a;
00141 
00142                 if (idx_b>=num_vectors)
00143                     idx_b=2*num_vectors-1-idx_b;
00144             }
00145 
00146             return normalizer->normalize(compute(idx_a, idx_b), idx_a, idx_b);
00147         }
00148 
00155         void get_kernel_matrix(float64_t** dst, int32_t* m, int32_t* n);
00156 
00164         virtual float64_t* get_kernel_matrix_real(
00165             int32_t &m, int32_t &n, float64_t* target);
00166 
00174         virtual float32_t* get_kernel_matrix_shortreal(
00175             int32_t &m, int32_t &n, float32_t* target);
00176 
00187         virtual bool init(CFeatures* lhs, CFeatures* rhs);
00188 
00193         virtual bool set_normalizer(CKernelNormalizer* normalizer);
00194 
00199         virtual CKernelNormalizer* get_normalizer();
00200 
00204         virtual bool init_normalizer();
00205 
00212         virtual void cleanup();
00213 
00219         bool load(char* fname);
00220 
00226         bool save(char* fname);
00227 
00235         virtual bool load_init(FILE* src)=0;
00236 
00244         virtual bool save_init(FILE* dest)=0;
00245 
00250         inline CFeatures* get_lhs() { SG_REF(lhs); return lhs; }
00251 
00256         inline CFeatures* get_rhs() { SG_REF(rhs); return rhs; }
00257 
00262         inline int32_t get_num_vec_lhs()
00263         {
00264             if (!lhs)
00265                 return 0;
00266             else
00267                 return lhs->get_num_vectors();
00268         }
00269 
00274         inline int32_t get_num_vec_rhs()
00275         {
00276             if (!rhs)
00277                 return 0;
00278             else
00279                 return rhs->get_num_vectors();
00280         }
00281 
00286         inline bool has_features()
00287         {
00288             return lhs && rhs;
00289         }
00290 
00295         inline bool lhs_equals_rhs()
00296         {
00297             return lhs==rhs;
00298         }
00299 
00301         virtual void remove_lhs_and_rhs();
00302 
00304         virtual void remove_lhs();
00305 
00307         virtual void remove_rhs();
00308 
00316         virtual EKernelType get_kernel_type()=0 ;
00317 
00324         virtual EFeatureType get_feature_type()=0;
00325 
00332         virtual EFeatureClass get_feature_class()=0;
00333 
00338         virtual const char* get_name()=0 ;
00339 
00344         inline void set_cache_size(int32_t size)
00345         {
00346             cache_size = size;
00347 
00348         }
00349 
00354         inline int32_t get_cache_size() { return cache_size; }
00355 
00356 
00357 
00359         void list_kernel();
00360 
00366         inline bool has_property(EKernelProperty p) { return (properties & p) != 0; }
00367 
00371         virtual void clear_normal();
00372 
00378         virtual void add_to_normal(int32_t vector_idx, float64_t weight);
00379 
00384         inline EOptimizationType get_optimization_type() { return opt_type; }
00385 
00390         virtual inline void set_optimization_type(EOptimizationType t) { opt_type=t;}
00391 
00396         inline bool get_is_initialized() { return optimization_initialized; }
00397 
00405         virtual bool init_optimization(
00406             int32_t count, int32_t *IDX, float64_t *weights);
00407 
00412         virtual bool delete_optimization();
00413 
00419         bool init_optimization_svm(CSVM * svm) ;
00420 
00426         virtual float64_t compute_optimized(int32_t vector_idx);
00427 
00436         virtual void compute_batch(
00437             int32_t num_vec, int32_t* vec_idx, float64_t* target,
00438             int32_t num_suppvec, int32_t* IDX, float64_t* alphas,
00439             float64_t factor=1.0);
00440 
00445         inline float64_t get_combined_kernel_weight() { return combined_kernel_weight; }
00446 
00451         inline void set_combined_kernel_weight(float64_t nw) { combined_kernel_weight=nw; }
00452 
00457         virtual int32_t get_num_subkernels();
00458 
00464         virtual void compute_by_subkernel(
00465             int32_t vector_idx, float64_t * subkernel_contrib);
00466 
00472         virtual const float64_t* get_subkernel_weights(int32_t& num_weights);
00473 
00479         virtual void set_subkernel_weights(
00480             float64_t* weights, int32_t num_weights);
00481 
00482     protected:
00487         inline void set_property(EKernelProperty p)
00488         {
00489             properties |= p;
00490         }
00491 
00496         inline void unset_property(EKernelProperty p)
00497         {
00498             properties &= (properties | p) ^ p;
00499         }
00500 
00505         inline void set_is_initialized(bool p_init) { optimization_initialized=p_init; }
00506 
00517         virtual float64_t compute(int32_t x, int32_t y)=0;
00518 
00519 
00521 
00522 
00523     protected:
00525         int32_t cache_size;
00526 
00527 
00528 
00531         KERNELCACHE_ELEM* kernel_matrix;
00532 
00534         CFeatures* lhs;
00536         CFeatures* rhs;
00537 
00539         float64_t combined_kernel_weight;
00540 
00542         bool optimization_initialized;
00546         EOptimizationType opt_type;
00547 
00549         uint64_t  properties;
00550 
00553         CKernelNormalizer* normalizer;
00554 };
00555 
00556 #endif /* _KERNEL_H__ */

SHOGUN Machine Learning Toolbox - Documentation