CommUlongStringKernel.h

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2008 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Copyright (C) 1999-2008 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #ifndef _COMMULONGSTRINGKERNEL_H___
00013 #define _COMMULONGSTRINGKERNEL_H___
00014 
00015 #include "lib/common.h"
00016 #include "lib/Mathematics.h"
00017 #include "lib/DynamicArray.h"
00018 #include "kernel/StringKernel.h"
00019 
00043 class CCommUlongStringKernel: public CStringKernel<ULONG>
00044 {
00045     public:
00052         CCommUlongStringKernel(INT size=10, bool use_sign=false,
00053             ENormalizationType normalization_=FULL_NORMALIZATION);
00054 
00063         CCommUlongStringKernel(
00064             CStringFeatures<ULONG>* l, CStringFeatures<ULONG>* r,
00065             bool use_sign=false,
00066             ENormalizationType normalization_=FULL_NORMALIZATION,
00067             INT size=10);
00068 
00069         virtual ~CCommUlongStringKernel();
00070 
00077         virtual bool init(CFeatures* l, CFeatures* r);
00078 
00080         virtual void cleanup();
00081 
00087         bool load_init(FILE* src);
00088 
00094         bool save_init(FILE* dest);
00095 
00100         virtual EKernelType get_kernel_type() { return K_COMMULONGSTRING; }
00101 
00106         virtual const CHAR* get_name() { return "CommUlongString"; }
00107 
00115         virtual bool init_optimization(INT count, INT* IDX, DREAL* weights);
00116 
00121         virtual bool delete_optimization();
00122 
00128         virtual DREAL compute_optimized(INT idx);
00129 
00143         inline void merge_dictionaries(INT &t, INT j, INT &k,
00144             ULONG* vec, ULONG* dic, DREAL* dic_weights, DREAL weight,
00145             INT vec_idx, INT len, ENormalizationType p_normalization)
00146         {
00147             while (k<dictionary.get_num_elements() && dictionary[k] < vec[j-1])
00148             {
00149                 dic[t]=dictionary[k];
00150                 dic_weights[t]=dictionary_weights[k];
00151                 t++;
00152                 k++;
00153             }
00154 
00155             if (k<dictionary.get_num_elements() && dictionary[k]==vec[j-1])
00156             {
00157                 dic[t]=vec[j-1];
00158                 dic_weights[t]=dictionary_weights[k]+normalize_weight(weight, vec_idx, len, p_normalization);
00159                 k++;
00160             }
00161             else
00162             {
00163                 dic[t]=vec[j-1];
00164                 dic_weights[t]=normalize_weight(weight, vec_idx, len, p_normalization);
00165             }
00166             t++;
00167         }
00168 
00174         virtual void add_to_normal(INT idx, DREAL weight);
00175 
00177         virtual void clear_normal();
00178 
00180         virtual void remove_lhs();
00181 
00183         virtual void remove_rhs();
00184 
00189         inline virtual EFeatureType get_feature_type() { return F_ULONG; }
00190 
00197         void get_dictionary(INT &dsize, ULONG*& dict, DREAL*& dweights) 
00198         {
00199             dsize=dictionary.get_num_elements();
00200             dict=dictionary.get_array();
00201             dweights = dictionary_weights.get_array();
00202         }
00203 
00204     protected:
00213         DREAL compute(INT idx_a, INT idx_b);
00214 
00222         inline DREAL normalize_weight(DREAL value, INT seq_num,
00223             INT seq_len, ENormalizationType p_normalization)
00224         {
00225             switch (p_normalization)
00226             {
00227                 case NO_NORMALIZATION:
00228                     return value;
00229                     break;
00230                 case SQRT_NORMALIZATION:
00231                     return value/sqrt(sqrtdiag_lhs[seq_num]);
00232                     break;
00233                 case FULL_NORMALIZATION:
00234                     return value/sqrtdiag_lhs[seq_num];
00235                     break;
00236                 case SQRTLEN_NORMALIZATION:
00237                     return value/sqrt(sqrt(seq_len));
00238                     break;
00239                 case LEN_NORMALIZATION:
00240                     return value/sqrt(seq_len);
00241                     break;
00242                 case SQLEN_NORMALIZATION:
00243                     return value/seq_len;
00244                     break;
00245                 default:
00246                     ASSERT(0);
00247             }
00248             return -CMath::INFTY;
00249         }
00250 
00251     protected:
00253         DREAL *sqrtdiag_lhs;
00255         DREAL *sqrtdiag_rhs;
00257         bool initialized;
00258 
00260         CDynamicArray<ULONG> dictionary;
00262         CDynamicArray<DREAL> dictionary_weights;
00263 
00265         bool use_sign;
00267         ENormalizationType normalization;
00268 };
00269 
00270 #endif /* _COMMULONGFSTRINGKERNEL_H__ */

SHOGUN Machine Learning Toolbox - Documentation