WeightedDegreeStringKernel.h

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2008 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Copyright (C) 1999-2008 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #ifndef _WEIGHTEDDEGREESTRINGKERNEL_H___
00013 #define _WEIGHTEDDEGREESTRINGKERNEL_H___
00014 
00015 #include "lib/common.h"
00016 #include "lib/Trie.h"
00017 #include "kernel/StringKernel.h"
00018 #include "features/StringFeatures.h"
00019 
00020 
00021 enum EWDKernType
00022 {
00023     E_WD=0,
00024     E_EXTERNAL=1,
00025 
00026     E_BLOCK_CONST=2,
00027     E_BLOCK_LINEAR=3,
00028     E_BLOCK_SQPOLY=4,
00029     E_BLOCK_CUBICPOLY=5,
00030     E_BLOCK_EXP=6,
00031     E_BLOCK_LOG=7,
00032     E_BLOCK_EXTERNAL=8
00033 };
00034 
00035 
00048 class CWeightedDegreeStringKernel: public CStringKernel<char>
00049 {
00050     public:
00056         CWeightedDegreeStringKernel(int32_t degree, EWDKernType type=E_WD);
00057 
00063         CWeightedDegreeStringKernel(float64_t* weights, int32_t degree);
00064 
00071         CWeightedDegreeStringKernel(
00072             CStringFeatures<char>* l, CStringFeatures<char>* r, int32_t degree);
00073 
00074         virtual ~CWeightedDegreeStringKernel();
00075 
00082         virtual bool init(CFeatures* l, CFeatures* r);
00083 
00085         virtual void cleanup();
00086 
00092         bool load_init(FILE* src);
00093 
00099         bool save_init(FILE* dest);
00100 
00105         virtual EKernelType get_kernel_type() { return K_WEIGHTEDDEGREE; }
00106 
00111         virtual const char* get_name() { return "WeightedDegree"; } ;
00112 
00120         inline virtual bool init_optimization(
00121             int32_t count, int32_t *IDX, float64_t* alphas)
00122         {
00123             return init_optimization(count, IDX, alphas, -1);
00124         }
00125 
00136         virtual bool init_optimization(
00137             int32_t count, int32_t *IDX, float64_t* alphas, int32_t tree_num);
00138 
00143         virtual bool delete_optimization();
00144 
00150         virtual float64_t compute_optimized(int32_t idx)
00151         { 
00152             if (get_is_initialized())
00153                 return compute_by_tree(idx);
00154 
00155             SG_ERROR( "CWeightedDegreeStringKernel optimization not initialized\n");
00156             return 0;
00157         }
00158 
00163         static void* compute_batch_helper(void* p);
00164 
00175         virtual void compute_batch(
00176             int32_t num_vec, int32_t* vec_idx, float64_t* target,
00177             int32_t num_suppvec, int32_t* IDX, float64_t* alphas,
00178             float64_t factor=1.0);
00179 
00183         inline virtual void clear_normal()
00184         {
00185             if (get_is_initialized())
00186             {
00187                 tries->delete_trees(max_mismatch==0);
00188                 set_is_initialized(false);
00189             }
00190         }
00191 
00197         inline virtual void add_to_normal(int32_t idx, float64_t weight)
00198         {
00199             if (max_mismatch==0)
00200                 add_example_to_tree(idx, weight);
00201             else
00202                 add_example_to_tree_mismatch(idx, weight);
00203 
00204             set_is_initialized(true);
00205         }
00206 
00211         inline virtual int32_t get_num_subkernels()
00212         {
00213             if (position_weights!=NULL)
00214                 return (int32_t) ceil(1.0*seq_length/mkl_stepsize) ;
00215             if (length==0)
00216                 return (int32_t) ceil(1.0*get_degree()/mkl_stepsize);
00217             return (int32_t) ceil(1.0*get_degree()*length/mkl_stepsize) ;
00218         }
00219 
00225         inline void compute_by_subkernel(
00226             int32_t idx, float64_t * subkernel_contrib)
00227         { 
00228             if (get_is_initialized())
00229             {
00230                 compute_by_tree(idx, subkernel_contrib);
00231                 return ;
00232             }
00233 
00234             SG_ERROR( "CWeightedDegreeStringKernel optimization not initialized\n");
00235         }
00236 
00242         inline const float64_t* get_subkernel_weights(int32_t& num_weights)
00243         {
00244             num_weights = get_num_subkernels();
00245 
00246             delete[] weights_buffer ;
00247             weights_buffer = new float64_t[num_weights];
00248 
00249             if (position_weights!=NULL)
00250                 for (int32_t i=0; i<num_weights; i++)
00251                     weights_buffer[i] = position_weights[i*mkl_stepsize];
00252             else
00253                 for (int32_t i=0; i<num_weights; i++)
00254                     weights_buffer[i] = weights[i*mkl_stepsize];
00255 
00256             return weights_buffer;
00257         }
00258 
00264         inline void set_subkernel_weights(
00265             float64_t* weights2, int32_t num_weights2)
00266         {
00267             int32_t num_weights = get_num_subkernels();
00268             if (num_weights!=num_weights2)
00269                 SG_ERROR( "number of weights do not match\n");
00270 
00271             if (position_weights!=NULL)
00272             {
00273                 for (int32_t i=0; i<num_weights; i++)
00274                 {
00275                     for (int32_t j=0; j<mkl_stepsize; j++)
00276                     {
00277                         if (i*mkl_stepsize+j<seq_length)
00278                             position_weights[i*mkl_stepsize+j] = weights2[i];
00279                     }
00280                 }
00281             }
00282             else if (length==0)
00283             {
00284                 for (int32_t i=0; i<num_weights; i++)
00285                 {
00286                     for (int32_t j=0; j<mkl_stepsize; j++)
00287                     {
00288                         if (i*mkl_stepsize+j<get_degree())
00289                             weights[i*mkl_stepsize+j] = weights2[i];
00290                     }
00291                 }
00292             }
00293             else
00294             {
00295                 for (int32_t i=0; i<num_weights; i++)
00296                 {
00297                     for (int32_t j=0; j<mkl_stepsize; j++)
00298                     {
00299                         if (i*mkl_stepsize+j<get_degree()*length)
00300                             weights[i*mkl_stepsize+j] = weights2[i];
00301                     }
00302                 }
00303             }
00304         }
00305 
00306         // other kernel tree operations
00312         float64_t *compute_abs_weights(int32_t & len);
00313 
00320         void compute_by_tree(int32_t idx, float64_t *LevelContrib);
00321 
00326         bool is_tree_initialized() { return tree_initialized; }
00327 
00333         inline float64_t *get_degree_weights(int32_t& d, int32_t& len)
00334         {
00335             d=degree;
00336             len=length;
00337             return weights;
00338         }
00339 
00345         inline float64_t *get_weights(int32_t& num_weights)
00346         {
00347             if (position_weights!=NULL)
00348             {
00349                 num_weights = seq_length ;
00350                 return position_weights ;
00351             }
00352             if (length==0)
00353                 num_weights = degree ;
00354             else
00355                 num_weights = degree*length ;
00356             return weights;
00357         }
00358 
00364         inline float64_t *get_position_weights(int32_t& len)
00365         {
00366             len=seq_length;
00367             return position_weights;
00368         }
00369 
00375         bool set_wd_weights_by_type(EWDKernType type);
00376 
00383         void set_wd_weights(float64_t* p_weights, int32_t d)
00384         {
00385             set_weights(p_weights,d,0);
00386         }
00387 
00394         bool set_weights(float64_t* weights, int32_t d, int32_t len);
00395 
00402         bool set_position_weights(float64_t* position_weights, int32_t len=0);
00403 
00408         bool init_block_weights();
00409 
00414         bool init_block_weights_from_wd();
00415 
00420         bool init_block_weights_from_wd_external();
00421 
00426         bool init_block_weights_const();
00427 
00432         bool init_block_weights_linear();
00433 
00438         bool init_block_weights_sqpoly();
00439 
00444         bool init_block_weights_cubicpoly();
00445 
00450         bool init_block_weights_exp();
00451 
00456         bool init_block_weights_log();
00457 
00462         bool init_block_weights_external();
00463 
00468         bool delete_position_weights()
00469         {
00470             delete[] position_weights;
00471             position_weights=NULL;
00472             return true;
00473         }
00474 
00480         bool set_max_mismatch(int32_t max);
00481 
00486         inline int32_t get_max_mismatch() { return max_mismatch; }
00487 
00493         inline bool set_degree(int32_t deg) { degree=deg; return true; }
00494 
00499         inline int32_t get_degree() { return degree; }
00500 
00506         inline bool set_use_block_computation(bool block)
00507         {
00508             block_computation=block;
00509             return true;
00510         }
00511 
00516         inline bool get_use_block_computation() { return block_computation; }
00517 
00523         inline bool set_mkl_stepsize(int32_t step)
00524         {
00525             mkl_stepsize=step;
00526             return true;
00527         }
00528 
00533         inline int32_t get_mkl_stepsize() { return mkl_stepsize; }
00534 
00540         inline bool set_which_degree(int32_t which)
00541         {
00542             which_degree=which;
00543             return true;
00544         }
00545 
00550         inline int32_t get_which_degree() { return which_degree; }
00551 
00552     protected:
00554         void create_empty_tries();
00555 
00561         void add_example_to_tree(int32_t idx, float64_t weight);
00562 
00569         void add_example_to_single_tree(
00570             int32_t idx, float64_t weight, int32_t tree_num);
00571 
00577         void add_example_to_tree_mismatch(int32_t idx, float64_t weight);
00578 
00585         void add_example_to_single_tree_mismatch(
00586             int32_t idx, float64_t weight, int32_t tree_num);
00587 
00593         float64_t compute_by_tree(int32_t idx);
00594 
00603         float64_t compute(int32_t idx_a, int32_t idx_b);
00604 
00613         float64_t compute_with_mismatch(
00614             char* avec, int32_t alen, char* bvec, int32_t blen);
00615 
00624         float64_t compute_without_mismatch(
00625             char* avec, int32_t alen, char* bvec, int32_t blen);
00626 
00635         float64_t compute_without_mismatch_matrix(
00636             char* avec, int32_t alen, char* bvec, int32_t blen);
00637 
00646         float64_t compute_using_block(char* avec, int32_t alen,
00647             char* bvec, int32_t blen);
00648 
00650         virtual void remove_lhs();
00651 
00652     protected:
00656         float64_t* weights;
00658         float64_t* position_weights;
00660         float64_t* weights_buffer;
00662         int32_t mkl_stepsize;
00664         int32_t degree;
00666         int32_t length;
00667 
00669         int32_t max_mismatch;
00671         int32_t seq_length;
00672 
00674         bool initialized;
00675 
00677         bool block_computation;
00678 
00680         int32_t num_block_weights_external;
00682         float64_t* block_weights_external;
00683 
00685         float64_t* block_weights;
00687         EWDKernType type;
00689         int32_t which_degree;
00690 
00692         CTrie<DNATrie>* tries;
00693 
00695         bool tree_initialized;
00696 
00698         CAlphabet* alphabet;
00699 };
00700 
00701 #endif /* _WEIGHTEDDEGREESTRINGKERNEL_H__ */

SHOGUN Machine Learning Toolbox - Documentation