CommWordStringKernel.h

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2008 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Copyright (C) 1999-2008 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #ifndef _COMMWORDSTRINGKERNEL_H___
00013 #define _COMMWORDSTRINGKERNEL_H___
00014 
00015 #include "lib/common.h"
00016 #include "lib/Mathematics.h"
00017 #include "kernel/StringKernel.h"
00018 
00043 class CCommWordStringKernel : public CStringKernel<WORD>
00044 {
00045     public:
00052         CCommWordStringKernel(INT size, bool use_sign,
00053             ENormalizationType normalization_=FULL_NORMALIZATION);
00054 
00063         CCommWordStringKernel(
00064             CStringFeatures<WORD>* l, CStringFeatures<WORD>* r,
00065             bool use_sign=false,
00066             ENormalizationType normalization_=FULL_NORMALIZATION,
00067             INT size=10);
00068 
00069         virtual ~CCommWordStringKernel();
00070 
00077         virtual bool init(CFeatures* l, CFeatures* r);
00078 
00080         virtual void cleanup();
00081 
00087         bool load_init(FILE* src);
00088 
00094         bool save_init(FILE* dest);
00095 
00100         virtual EKernelType get_kernel_type() { return K_COMMWORDSTRING; }
00101 
00106         virtual const CHAR* get_name() { return "CommWordString"; }
00107 
00112         virtual bool init_dictionary(INT size);
00113 
00121         virtual bool init_optimization(INT count, INT *IDX,
00122             DREAL* weights);
00123 
00128         virtual bool delete_optimization();
00129 
00135         virtual DREAL compute_optimized(INT idx);
00136 
00142         virtual void add_to_normal(INT idx, DREAL weight);
00143 
00145         virtual void clear_normal();
00146 
00148         virtual void remove_lhs();
00149 
00151         virtual void remove_rhs();
00152 
00157         inline virtual EFeatureType get_feature_type() { return F_WORD; }
00158 
00164         void get_dictionary(INT& dsize, DREAL*& dweights)
00165         {
00166             dsize=dictionary_size;
00167             dweights = dictionary_weights;
00168         }
00169 
00182         virtual DREAL* compute_scoring(INT max_degree, INT& num_feat,
00183             INT& num_sym, DREAL* target, INT num_suppvec, INT* IDX,
00184             DREAL* alphas, bool do_init=true);
00185 
00194         CHAR* compute_consensus(INT &num_feat, INT num_suppvec,
00195             INT* IDX, DREAL* alphas);
00196 
00197 
00202         void set_use_dict_diagonal_optimization(bool flag)
00203         {
00204             use_dict_diagonal_optimization=flag;
00205         }
00206 
00211         bool get_use_dict_diagonal_optimization()
00212         {
00213             return use_dict_diagonal_optimization;
00214         }
00215         
00216     protected:
00225         inline virtual DREAL compute(INT idx_a, INT idx_b)
00226         {
00227             return compute_helper(idx_a, idx_b, false);
00228         }
00229 
00237         virtual DREAL compute_helper(INT idx_a, INT idx_b, bool do_sort);
00238 
00244         virtual DREAL compute_diag(INT idx_a);
00245 
00254         inline DREAL normalize_weight(DREAL* weights, DREAL value,
00255             INT seq_num, INT seq_len,
00256             ENormalizationType p_normalization)
00257         {
00258             switch (p_normalization)
00259             {
00260                 case NO_NORMALIZATION:
00261                     return value;
00262                     break;
00263                 case SQRT_NORMALIZATION:
00264                     return value/sqrt(weights[seq_num]);
00265                     break;
00266                 case FULL_NORMALIZATION:
00267                     return value/weights[seq_num];
00268                     break;
00269                 case SQRTLEN_NORMALIZATION:
00270                     return value/sqrt(sqrt((double) seq_len));
00271                     break;
00272                 case LEN_NORMALIZATION:
00273                     return value/sqrt((double) seq_len);
00274                     break;
00275                 case SQLEN_NORMALIZATION:
00276                     return value/seq_len;
00277                     break;
00278                 default:
00279                     ASSERT(0);
00280             }
00281 
00282             return -CMath::INFTY;
00283         }
00284 
00285     protected:
00287         DREAL *sqrtdiag_lhs;
00289         DREAL *sqrtdiag_rhs;
00291         bool initialized;
00292 
00294         INT dictionary_size;
00297         DREAL* dictionary_weights;
00298 
00300         bool use_sign;
00302         ENormalizationType normalization;
00303 
00305         bool use_dict_diagonal_optimization;
00307         INT* dict_diagonal_optimization;
00308 };
00309 
00310 #endif /* _COMMWORDSTRINGKERNEL_H__ */

SHOGUN Machine Learning Toolbox - Documentation