CommUlongStringKernel.h
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #ifndef _COMMULONGSTRINGKERNEL_H___
00013 #define _COMMULONGSTRINGKERNEL_H___
00014
00015 #include "lib/common.h"
00016 #include "lib/Mathematics.h"
00017 #include "lib/DynamicArray.h"
00018 #include "kernel/StringKernel.h"
00019
00043 class CCommUlongStringKernel: public CStringKernel<ULONG>
00044 {
00045 public:
00052 CCommUlongStringKernel(INT size=10, bool use_sign=false,
00053 ENormalizationType normalization_=FULL_NORMALIZATION);
00054
00063 CCommUlongStringKernel(
00064 CStringFeatures<ULONG>* l, CStringFeatures<ULONG>* r,
00065 bool use_sign=false,
00066 ENormalizationType normalization_=FULL_NORMALIZATION,
00067 INT size=10);
00068
00069 virtual ~CCommUlongStringKernel();
00070
00077 virtual bool init(CFeatures* l, CFeatures* r);
00078
00080 virtual void cleanup();
00081
00087 bool load_init(FILE* src);
00088
00094 bool save_init(FILE* dest);
00095
00100 virtual EKernelType get_kernel_type() { return K_COMMULONGSTRING; }
00101
00106 virtual const CHAR* get_name() { return "CommUlongString"; }
00107
00115 virtual bool init_optimization(INT count, INT* IDX, DREAL* weights);
00116
00121 virtual bool delete_optimization();
00122
00128 virtual DREAL compute_optimized(INT idx);
00129
00143 inline void merge_dictionaries(INT &t, INT j, INT &k,
00144 ULONG* vec, ULONG* dic, DREAL* dic_weights, DREAL weight,
00145 INT vec_idx, INT len, ENormalizationType p_normalization)
00146 {
00147 while (k<dictionary.get_num_elements() && dictionary[k] < vec[j-1])
00148 {
00149 dic[t]=dictionary[k];
00150 dic_weights[t]=dictionary_weights[k];
00151 t++;
00152 k++;
00153 }
00154
00155 if (k<dictionary.get_num_elements() && dictionary[k]==vec[j-1])
00156 {
00157 dic[t]=vec[j-1];
00158 dic_weights[t]=dictionary_weights[k]+normalize_weight(weight, vec_idx, len, p_normalization);
00159 k++;
00160 }
00161 else
00162 {
00163 dic[t]=vec[j-1];
00164 dic_weights[t]=normalize_weight(weight, vec_idx, len, p_normalization);
00165 }
00166 t++;
00167 }
00168
00174 virtual void add_to_normal(INT idx, DREAL weight);
00175
00177 virtual void clear_normal();
00178
00180 virtual void remove_lhs();
00181
00183 virtual void remove_rhs();
00184
00189 inline virtual EFeatureType get_feature_type() { return F_ULONG; }
00190
00197 void get_dictionary(INT &dsize, ULONG*& dict, DREAL*& dweights)
00198 {
00199 dsize=dictionary.get_num_elements();
00200 dict=dictionary.get_array();
00201 dweights = dictionary_weights.get_array();
00202 }
00203
00204 protected:
00213 DREAL compute(INT idx_a, INT idx_b);
00214
00222 inline DREAL normalize_weight(DREAL value, INT seq_num,
00223 INT seq_len, ENormalizationType p_normalization)
00224 {
00225 switch (p_normalization)
00226 {
00227 case NO_NORMALIZATION:
00228 return value;
00229 break;
00230 case SQRT_NORMALIZATION:
00231 return value/sqrt(sqrtdiag_lhs[seq_num]);
00232 break;
00233 case FULL_NORMALIZATION:
00234 return value/sqrtdiag_lhs[seq_num];
00235 break;
00236 case SQRTLEN_NORMALIZATION:
00237 return value/sqrt(sqrt(seq_len));
00238 break;
00239 case LEN_NORMALIZATION:
00240 return value/sqrt(seq_len);
00241 break;
00242 case SQLEN_NORMALIZATION:
00243 return value/seq_len;
00244 break;
00245 default:
00246 ASSERT(0);
00247 }
00248 return -CMath::INFTY;
00249 }
00250
00251 protected:
00253 DREAL *sqrtdiag_lhs;
00255 DREAL *sqrtdiag_rhs;
00257 bool initialized;
00258
00260 CDynamicArray<ULONG> dictionary;
00262 CDynamicArray<DREAL> dictionary_weights;
00263
00265 bool use_sign;
00267 ENormalizationType normalization;
00268 };
00269
00270 #endif