00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #ifndef _WEIGHTEDDEGREESTRINGKERNEL_H___
00013 #define _WEIGHTEDDEGREESTRINGKERNEL_H___
00014
00015 #include "lib/common.h"
00016 #include "lib/Trie.h"
00017 #include "kernel/StringKernel.h"
00018 #include "features/StringFeatures.h"
00019
00021 class CWeightedDegreeStringKernel: public CStringKernel<CHAR>
00022 {
00023 public:
00029 CWeightedDegreeStringKernel(INT degree, EWDKernType type=E_WD);
00030
00036 CWeightedDegreeStringKernel(DREAL* weights, INT degree);
00037
00044 CWeightedDegreeStringKernel(
00045 CStringFeatures<CHAR>* l, CStringFeatures<CHAR>* r, INT degree);
00046
00047 virtual ~CWeightedDegreeStringKernel();
00048
00055 virtual bool init(CFeatures* l, CFeatures* r);
00056
00058 virtual void cleanup();
00059
00065 bool load_init(FILE* src);
00066
00072 bool save_init(FILE* dest);
00073
00078 virtual EKernelType get_kernel_type() { return K_WEIGHTEDDEGREE; }
00079
00084 virtual const CHAR* get_name() { return "WeightedDegree" ; } ;
00085
00093 inline virtual bool init_optimization(INT count, INT *IDX, DREAL* alphas)
00094 {
00095 return init_optimization(count, IDX, alphas, -1);
00096 }
00097
00108 virtual bool init_optimization(INT count, INT *IDX, DREAL* alphas,
00109 INT tree_num);
00110
00115 virtual bool delete_optimization();
00116
00122 virtual DREAL compute_optimized(INT idx)
00123 {
00124 if (get_is_initialized())
00125 return compute_by_tree(idx);
00126
00127 SG_ERROR( "CWeightedDegreeStringKernel optimization not initialized\n");
00128 return 0;
00129 }
00130
00135 static void* compute_batch_helper(void* p);
00136
00147 virtual void compute_batch(INT num_vec, INT* vec_idx, DREAL* target,
00148 INT num_suppvec, INT* IDX, DREAL* alphas, DREAL factor=1.0);
00149
00153 inline virtual void clear_normal()
00154 {
00155 if (get_is_initialized())
00156 {
00157 tries->delete_trees(max_mismatch==0);
00158 set_is_initialized(false);
00159 }
00160 }
00161
00167 inline virtual void add_to_normal(INT idx, DREAL weight)
00168 {
00169 if (max_mismatch==0)
00170 add_example_to_tree(idx, weight);
00171 else
00172 add_example_to_tree_mismatch(idx, weight);
00173
00174 set_is_initialized(true);
00175 }
00176
00181 inline virtual INT get_num_subkernels()
00182 {
00183 if (position_weights!=NULL)
00184 return (INT) ceil(1.0*seq_length/mkl_stepsize) ;
00185 if (length==0)
00186 return (INT) ceil(1.0*get_degree()/mkl_stepsize);
00187 return (INT) ceil(1.0*get_degree()*length/mkl_stepsize) ;
00188 }
00189
00195 inline void compute_by_subkernel(INT idx, DREAL * subkernel_contrib)
00196 {
00197 if (get_is_initialized())
00198 {
00199 compute_by_tree(idx, subkernel_contrib);
00200 return ;
00201 }
00202
00203 SG_ERROR( "CWeightedDegreeStringKernel optimization not initialized\n");
00204 }
00205
00211 inline const DREAL* get_subkernel_weights(INT& num_weights)
00212 {
00213 num_weights = get_num_subkernels();
00214
00215 delete[] weights_buffer ;
00216 weights_buffer = new DREAL[num_weights];
00217
00218 if (position_weights!=NULL)
00219 for (INT i=0; i<num_weights; i++)
00220 weights_buffer[i] = position_weights[i*mkl_stepsize];
00221 else
00222 for (INT i=0; i<num_weights; i++)
00223 weights_buffer[i] = weights[i*mkl_stepsize];
00224
00225 return weights_buffer;
00226 }
00227
00233 inline void set_subkernel_weights(DREAL* weights2, INT num_weights2)
00234 {
00235 INT num_weights = get_num_subkernels();
00236 if (num_weights!=num_weights2)
00237 SG_ERROR( "number of weights do not match\n");
00238
00239 if (position_weights!=NULL)
00240 {
00241 for (INT i=0; i<num_weights; i++)
00242 {
00243 for (INT j=0; j<mkl_stepsize; j++)
00244 {
00245 if (i*mkl_stepsize+j<seq_length)
00246 position_weights[i*mkl_stepsize+j] = weights2[i];
00247 }
00248 }
00249 }
00250 else if (length==0)
00251 {
00252 for (INT i=0; i<num_weights; i++)
00253 {
00254 for (INT j=0; j<mkl_stepsize; j++)
00255 {
00256 if (i*mkl_stepsize+j<get_degree())
00257 weights[i*mkl_stepsize+j] = weights2[i];
00258 }
00259 }
00260 }
00261 else
00262 {
00263 for (INT i=0; i<num_weights; i++)
00264 {
00265 for (INT j=0; j<mkl_stepsize; j++)
00266 {
00267 if (i*mkl_stepsize+j<get_degree()*length)
00268 weights[i*mkl_stepsize+j] = weights2[i];
00269 }
00270 }
00271 }
00272 }
00273
00274
00280 DREAL *compute_abs_weights(INT & len);
00281
00288 void compute_by_tree(INT idx, DREAL *LevelContrib);
00289
00294 bool is_tree_initialized() { return tree_initialized; }
00295
00300 inline DREAL get_normalization_const() { return normalization_const; }
00301
00307 inline DREAL *get_degree_weights(INT& d, INT& len)
00308 {
00309 d=degree;
00310 len=length;
00311 return weights;
00312 }
00313
00319 inline DREAL *get_weights(INT& num_weights)
00320 {
00321 if (position_weights!=NULL)
00322 {
00323 num_weights = seq_length ;
00324 return position_weights ;
00325 }
00326 if (length==0)
00327 num_weights = degree ;
00328 else
00329 num_weights = degree*length ;
00330 return weights;
00331 }
00332
00338 inline DREAL *get_position_weights(INT& len)
00339 {
00340 len=seq_length;
00341 return position_weights;
00342 }
00343
00349 bool set_wd_weights_by_type(EWDKernType type);
00350
00357 void set_wd_weights(DREAL* p_weights, INT d)
00358 {
00359 set_weights(p_weights,d,0);
00360 }
00361
00368 bool set_weights(DREAL* weights, INT d, INT len);
00369
00376 bool set_position_weights(DREAL* position_weights, INT len=0);
00377
00382 bool init_block_weights();
00383
00388 bool init_block_weights_from_wd();
00389
00394 bool init_block_weights_from_wd_external();
00395
00400 bool init_block_weights_const();
00401
00406 bool init_block_weights_linear();
00407
00412 bool init_block_weights_sqpoly();
00413
00418 bool init_block_weights_cubicpoly();
00419
00424 bool init_block_weights_exp();
00425
00430 bool init_block_weights_log();
00431
00436 bool init_block_weights_external();
00437
00442 bool delete_position_weights() { delete[] position_weights; position_weights=NULL; return true; }
00443
00449 bool set_max_mismatch(INT max);
00450
00455 inline INT get_max_mismatch() { return max_mismatch; }
00456
00462 inline bool set_degree(INT deg) { degree=deg; return true; }
00463
00468 inline INT get_degree() { return degree; }
00469
00475 inline bool set_use_normalization(bool opt) { use_normalization=opt; return true; }
00476
00481 inline bool get_use_normalization() { return use_normalization; }
00482
00488 inline bool set_use_block_computation(bool block) { block_computation=block; return true; }
00489
00494 inline bool get_use_block_computation() { return block_computation; }
00495
00501 inline bool set_mkl_stepsize(INT step) { mkl_stepsize=step; return true; }
00502
00507 inline INT get_mkl_stepsize() { return mkl_stepsize; }
00508
00514 inline bool set_which_degree(INT which) { which_degree=which; return true; }
00515
00520 inline INT get_which_degree() { return which_degree; }
00521
00522 protected:
00524 void create_empty_tries();
00525
00531 void add_example_to_tree(INT idx, DREAL weight);
00532
00539 void add_example_to_single_tree(INT idx, DREAL weight, INT tree_num);
00540
00546 void add_example_to_tree_mismatch(INT idx, DREAL weight);
00547
00554 void add_example_to_single_tree_mismatch(INT idx, DREAL weight, INT tree_num);
00555
00565 void add_example_to_tree_mismatch_recursion(DNATrie *tree,
00566 DREAL alpha, INT *vec, INT len_rem,
00567 INT depth_rec, INT mismatch_rec);
00568
00574 DREAL compute_by_tree(INT idx);
00575
00584 DREAL compute(INT idx_a, INT idx_b);
00585
00594 DREAL compute_with_mismatch(CHAR* avec, INT alen,
00595 CHAR* bvec, INT blen) ;
00596
00605 DREAL compute_without_mismatch(CHAR* avec, INT alen,
00606 CHAR* bvec, INT blen);
00607
00616 DREAL compute_without_mismatch_matrix(CHAR* avec, INT alen,
00617 CHAR* bvec, INT blen);
00618
00627 DREAL compute_using_block(CHAR* avec, INT alen,
00628 CHAR* bvec, INT blen);
00629
00631 virtual void remove_lhs();
00632
00633 protected:
00637 DREAL* weights;
00639 DREAL* position_weights;
00641 DREAL* weights_buffer;
00643 INT mkl_stepsize;
00645 INT degree;
00647 INT length;
00648
00650 INT max_mismatch;
00652 INT seq_length;
00653
00655 bool initialized;
00657 bool use_normalization;
00659 bool block_computation;
00660
00662 DREAL normalization_const;
00663
00665 INT num_block_weights_external;
00667 DREAL* block_weights_external;
00668
00670 DREAL* block_weights;
00672 EWDKernType type;
00674 INT which_degree;
00675
00677 CTrie<DNATrie>* tries;
00678
00680 bool tree_initialized;
00681
00683 CAlphabet* alphabet;
00684 };
00685
00686 #endif