WeightedDegreeStringKernel.h

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2008 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Copyright (C) 1999-2008 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #ifndef _WEIGHTEDDEGREESTRINGKERNEL_H___
00013 #define _WEIGHTEDDEGREESTRINGKERNEL_H___
00014 
00015 #include "lib/common.h"
00016 #include "lib/Trie.h"
00017 #include "kernel/StringKernel.h"
00018 #include "features/StringFeatures.h"
00019 
00021 class CWeightedDegreeStringKernel: public CStringKernel<CHAR>
00022 {
00023     public:
00029         CWeightedDegreeStringKernel(INT degree, EWDKernType type=E_WD);
00030 
00036         CWeightedDegreeStringKernel(DREAL* weights, INT degree);
00037 
00044         CWeightedDegreeStringKernel(
00045             CStringFeatures<CHAR>* l, CStringFeatures<CHAR>* r, INT degree);
00046 
00047         virtual ~CWeightedDegreeStringKernel();
00048 
00055         virtual bool init(CFeatures* l, CFeatures* r);
00056 
00058         virtual void cleanup();
00059 
00065         bool load_init(FILE* src);
00066 
00072         bool save_init(FILE* dest);
00073 
00078         virtual EKernelType get_kernel_type() { return K_WEIGHTEDDEGREE; }
00079 
00084         virtual const CHAR* get_name() { return "WeightedDegree" ; } ;
00085 
00093         inline virtual bool init_optimization(INT count, INT *IDX, DREAL* alphas)
00094         {
00095             return init_optimization(count, IDX, alphas, -1);
00096         }
00097 
00108         virtual bool init_optimization(INT count, INT *IDX, DREAL* alphas,
00109             INT tree_num);
00110 
00115         virtual bool delete_optimization();
00116 
00122         virtual DREAL compute_optimized(INT idx)
00123         { 
00124             if (get_is_initialized())
00125                 return compute_by_tree(idx);
00126 
00127             SG_ERROR( "CWeightedDegreeStringKernel optimization not initialized\n");
00128             return 0;
00129         }
00130 
00135         static void* compute_batch_helper(void* p);
00136 
00147         virtual void compute_batch(INT num_vec, INT* vec_idx, DREAL* target,
00148             INT num_suppvec, INT* IDX, DREAL* alphas, DREAL factor=1.0);
00149 
00153         inline virtual void clear_normal()
00154         {
00155             if (get_is_initialized())
00156             {
00157                 tries->delete_trees(max_mismatch==0);
00158                 set_is_initialized(false);
00159             }
00160         }
00161 
00167         inline virtual void add_to_normal(INT idx, DREAL weight)
00168         {
00169             if (max_mismatch==0)
00170                 add_example_to_tree(idx, weight);
00171             else
00172                 add_example_to_tree_mismatch(idx, weight);
00173 
00174             set_is_initialized(true);
00175         }
00176 
00181         inline virtual INT get_num_subkernels()
00182         {
00183             if (position_weights!=NULL)
00184                 return (INT) ceil(1.0*seq_length/mkl_stepsize) ;
00185             if (length==0)
00186                 return (INT) ceil(1.0*get_degree()/mkl_stepsize);
00187             return (INT) ceil(1.0*get_degree()*length/mkl_stepsize) ;
00188         }
00189 
00195         inline void compute_by_subkernel(INT idx, DREAL * subkernel_contrib)
00196         { 
00197             if (get_is_initialized())
00198             {
00199                 compute_by_tree(idx, subkernel_contrib);
00200                 return ;
00201             }
00202 
00203             SG_ERROR( "CWeightedDegreeStringKernel optimization not initialized\n");
00204         }
00205 
00211         inline const DREAL* get_subkernel_weights(INT& num_weights)
00212         {
00213             num_weights = get_num_subkernels();
00214 
00215             delete[] weights_buffer ;
00216             weights_buffer = new DREAL[num_weights];
00217 
00218             if (position_weights!=NULL)
00219                 for (INT i=0; i<num_weights; i++)
00220                     weights_buffer[i] = position_weights[i*mkl_stepsize];
00221             else
00222                 for (INT i=0; i<num_weights; i++)
00223                     weights_buffer[i] = weights[i*mkl_stepsize];
00224 
00225             return weights_buffer;
00226         }
00227 
00233         inline void set_subkernel_weights(DREAL* weights2, INT num_weights2)
00234         {
00235             INT num_weights = get_num_subkernels();
00236             if (num_weights!=num_weights2)
00237                 SG_ERROR( "number of weights do not match\n");
00238 
00239             if (position_weights!=NULL)
00240             {
00241                 for (INT i=0; i<num_weights; i++)
00242                 {
00243                     for (INT j=0; j<mkl_stepsize; j++)
00244                     {
00245                         if (i*mkl_stepsize+j<seq_length)
00246                             position_weights[i*mkl_stepsize+j] = weights2[i];
00247                     }
00248                 }
00249             }
00250             else if (length==0)
00251             {
00252                 for (INT i=0; i<num_weights; i++)
00253                 {
00254                     for (INT j=0; j<mkl_stepsize; j++)
00255                     {
00256                         if (i*mkl_stepsize+j<get_degree())
00257                             weights[i*mkl_stepsize+j] = weights2[i];
00258                     }
00259                 }
00260             }
00261             else
00262             {
00263                 for (INT i=0; i<num_weights; i++)
00264                 {
00265                     for (INT j=0; j<mkl_stepsize; j++)
00266                     {
00267                         if (i*mkl_stepsize+j<get_degree()*length)
00268                             weights[i*mkl_stepsize+j] = weights2[i];
00269                     }
00270                 }
00271             }
00272         }
00273 
00274         // other kernel tree operations
00280         DREAL *compute_abs_weights(INT & len);
00281 
00288         void compute_by_tree(INT idx, DREAL *LevelContrib);
00289 
00294         bool is_tree_initialized() { return tree_initialized; }
00295 
00300         inline DREAL get_normalization_const() { return normalization_const; }
00301 
00307         inline DREAL *get_degree_weights(INT& d, INT& len)
00308         {
00309             d=degree;
00310             len=length;
00311             return weights;
00312         }
00313 
00319         inline DREAL *get_weights(INT& num_weights)
00320         {
00321             if (position_weights!=NULL)
00322             {
00323                 num_weights = seq_length ;
00324                 return position_weights ;
00325             }
00326             if (length==0)
00327                 num_weights = degree ;
00328             else
00329                 num_weights = degree*length ;
00330             return weights;
00331         }
00332 
00338         inline DREAL *get_position_weights(INT& len)
00339         {
00340             len=seq_length;
00341             return position_weights;
00342         }
00343 
00349         bool set_wd_weights_by_type(EWDKernType type);
00350 
00357         void set_wd_weights(DREAL* p_weights, INT d)
00358         {
00359             set_weights(p_weights,d,0);
00360         }
00361 
00368         bool set_weights(DREAL* weights, INT d, INT len);
00369 
00376         bool set_position_weights(DREAL* position_weights, INT len=0);
00377 
00382         bool init_block_weights();
00383 
00388         bool init_block_weights_from_wd();
00389 
00394         bool init_block_weights_from_wd_external();
00395 
00400         bool init_block_weights_const();
00401 
00406         bool init_block_weights_linear();
00407 
00412         bool init_block_weights_sqpoly();
00413 
00418         bool init_block_weights_cubicpoly();
00419 
00424         bool init_block_weights_exp();
00425 
00430         bool init_block_weights_log();
00431 
00436         bool init_block_weights_external();
00437 
00442         bool delete_position_weights() { delete[] position_weights; position_weights=NULL; return true; }
00443 
00449         bool set_max_mismatch(INT max);
00450 
00455         inline INT get_max_mismatch() { return max_mismatch; }
00456 
00462         inline bool set_degree(INT deg) { degree=deg; return true; }
00463 
00468         inline INT get_degree() { return degree; }
00469 
00475         inline bool set_use_normalization(bool opt) { use_normalization=opt; return true; }
00476 
00481         inline bool get_use_normalization() { return use_normalization; }
00482 
00488         inline bool set_use_block_computation(bool block) { block_computation=block; return true; }
00489 
00494         inline bool get_use_block_computation() { return block_computation; }
00495 
00501         inline bool set_mkl_stepsize(INT step) { mkl_stepsize=step; return true; }
00502 
00507         inline INT get_mkl_stepsize() { return mkl_stepsize; }
00508 
00514         inline bool set_which_degree(INT which) { which_degree=which; return true; }
00515 
00520         inline INT get_which_degree() { return which_degree; }
00521 
00522     protected:
00524         void create_empty_tries();
00525 
00531         void add_example_to_tree(INT idx, DREAL weight);
00532 
00539         void add_example_to_single_tree(INT idx, DREAL weight, INT tree_num);
00540 
00546         void add_example_to_tree_mismatch(INT idx, DREAL weight);
00547 
00554         void add_example_to_single_tree_mismatch(INT idx, DREAL weight, INT tree_num);
00555 
00565         void add_example_to_tree_mismatch_recursion(DNATrie *tree,
00566             DREAL alpha, INT *vec, INT len_rem,
00567             INT depth_rec, INT mismatch_rec);
00568 
00574         DREAL compute_by_tree(INT idx);
00575 
00584         DREAL compute(INT idx_a, INT idx_b);
00585 
00594         DREAL compute_with_mismatch(CHAR* avec, INT alen,
00595             CHAR* bvec, INT blen) ;
00596 
00605         DREAL compute_without_mismatch(CHAR* avec, INT alen,
00606             CHAR* bvec, INT blen);
00607 
00616         DREAL compute_without_mismatch_matrix(CHAR* avec, INT alen,
00617             CHAR* bvec, INT blen);
00618 
00627         DREAL compute_using_block(CHAR* avec, INT alen,
00628             CHAR* bvec, INT blen);
00629 
00631         virtual void remove_lhs();
00632 
00633     protected:
00637         DREAL* weights;
00639         DREAL* position_weights;
00641         DREAL* weights_buffer;
00643         INT mkl_stepsize;
00645         INT degree;
00647         INT length;
00648 
00650         INT max_mismatch;
00652         INT seq_length;
00653 
00655         bool initialized;
00657         bool use_normalization;
00659         bool block_computation;
00660 
00662         DREAL normalization_const;
00663 
00665         INT num_block_weights_external;
00667         DREAL* block_weights_external;
00668 
00670         DREAL* block_weights;
00672         EWDKernType type;
00674         INT which_degree;
00675 
00677         CTrie<DNATrie>* tries;
00678 
00680         bool tree_initialized;
00681 
00683         CAlphabet* alphabet;
00684 };
00685 
00686 #endif /* _WEIGHTEDDEGREESTRINGKERNEL_H__ */

SHOGUN Machine Learning Toolbox - Documentation