00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #ifndef _WEIGHTEDDEGREESTRINGKERNEL_H___
00013 #define _WEIGHTEDDEGREESTRINGKERNEL_H___
00014
00015 #include "lib/common.h"
00016 #include "lib/Trie.h"
00017 #include "kernel/StringKernel.h"
00018 #include "features/StringFeatures.h"
00019
00020
00021 enum EWDKernType
00022 {
00023 E_WD=0,
00024 E_EXTERNAL=1,
00025
00026 E_BLOCK_CONST=2,
00027 E_BLOCK_LINEAR=3,
00028 E_BLOCK_SQPOLY=4,
00029 E_BLOCK_CUBICPOLY=5,
00030 E_BLOCK_EXP=6,
00031 E_BLOCK_LOG=7,
00032 E_BLOCK_EXTERNAL=8
00033 };
00034
00035
00048 class CWeightedDegreeStringKernel: public CStringKernel<char>
00049 {
00050 public:
00056 CWeightedDegreeStringKernel(int32_t degree, EWDKernType type=E_WD);
00057
00063 CWeightedDegreeStringKernel(float64_t* weights, int32_t degree);
00064
00071 CWeightedDegreeStringKernel(
00072 CStringFeatures<char>* l, CStringFeatures<char>* r, int32_t degree);
00073
00074 virtual ~CWeightedDegreeStringKernel();
00075
00082 virtual bool init(CFeatures* l, CFeatures* r);
00083
00085 virtual void cleanup();
00086
00092 bool load_init(FILE* src);
00093
00099 bool save_init(FILE* dest);
00100
00105 virtual EKernelType get_kernel_type() { return K_WEIGHTEDDEGREE; }
00106
00111 virtual const char* get_name() { return "WeightedDegree"; } ;
00112
00120 inline virtual bool init_optimization(
00121 int32_t count, int32_t *IDX, float64_t* alphas)
00122 {
00123 return init_optimization(count, IDX, alphas, -1);
00124 }
00125
00136 virtual bool init_optimization(
00137 int32_t count, int32_t *IDX, float64_t* alphas, int32_t tree_num);
00138
00143 virtual bool delete_optimization();
00144
00150 virtual float64_t compute_optimized(int32_t idx)
00151 {
00152 if (get_is_initialized())
00153 return compute_by_tree(idx);
00154
00155 SG_ERROR( "CWeightedDegreeStringKernel optimization not initialized\n");
00156 return 0;
00157 }
00158
00163 static void* compute_batch_helper(void* p);
00164
00175 virtual void compute_batch(
00176 int32_t num_vec, int32_t* vec_idx, float64_t* target,
00177 int32_t num_suppvec, int32_t* IDX, float64_t* alphas,
00178 float64_t factor=1.0);
00179
00183 inline virtual void clear_normal()
00184 {
00185 if (get_is_initialized())
00186 {
00187 tries->delete_trees(max_mismatch==0);
00188 set_is_initialized(false);
00189 }
00190 }
00191
00197 inline virtual void add_to_normal(int32_t idx, float64_t weight)
00198 {
00199 if (max_mismatch==0)
00200 add_example_to_tree(idx, weight);
00201 else
00202 add_example_to_tree_mismatch(idx, weight);
00203
00204 set_is_initialized(true);
00205 }
00206
00211 inline virtual int32_t get_num_subkernels()
00212 {
00213 if (position_weights!=NULL)
00214 return (int32_t) ceil(1.0*seq_length/mkl_stepsize) ;
00215 if (length==0)
00216 return (int32_t) ceil(1.0*get_degree()/mkl_stepsize);
00217 return (int32_t) ceil(1.0*get_degree()*length/mkl_stepsize) ;
00218 }
00219
00225 inline void compute_by_subkernel(
00226 int32_t idx, float64_t * subkernel_contrib)
00227 {
00228 if (get_is_initialized())
00229 {
00230 compute_by_tree(idx, subkernel_contrib);
00231 return ;
00232 }
00233
00234 SG_ERROR( "CWeightedDegreeStringKernel optimization not initialized\n");
00235 }
00236
00242 inline const float64_t* get_subkernel_weights(int32_t& num_weights)
00243 {
00244 num_weights = get_num_subkernels();
00245
00246 delete[] weights_buffer ;
00247 weights_buffer = new float64_t[num_weights];
00248
00249 if (position_weights!=NULL)
00250 for (int32_t i=0; i<num_weights; i++)
00251 weights_buffer[i] = position_weights[i*mkl_stepsize];
00252 else
00253 for (int32_t i=0; i<num_weights; i++)
00254 weights_buffer[i] = weights[i*mkl_stepsize];
00255
00256 return weights_buffer;
00257 }
00258
00264 inline void set_subkernel_weights(
00265 float64_t* weights2, int32_t num_weights2)
00266 {
00267 int32_t num_weights = get_num_subkernels();
00268 if (num_weights!=num_weights2)
00269 SG_ERROR( "number of weights do not match\n");
00270
00271 if (position_weights!=NULL)
00272 {
00273 for (int32_t i=0; i<num_weights; i++)
00274 {
00275 for (int32_t j=0; j<mkl_stepsize; j++)
00276 {
00277 if (i*mkl_stepsize+j<seq_length)
00278 position_weights[i*mkl_stepsize+j] = weights2[i];
00279 }
00280 }
00281 }
00282 else if (length==0)
00283 {
00284 for (int32_t i=0; i<num_weights; i++)
00285 {
00286 for (int32_t j=0; j<mkl_stepsize; j++)
00287 {
00288 if (i*mkl_stepsize+j<get_degree())
00289 weights[i*mkl_stepsize+j] = weights2[i];
00290 }
00291 }
00292 }
00293 else
00294 {
00295 for (int32_t i=0; i<num_weights; i++)
00296 {
00297 for (int32_t j=0; j<mkl_stepsize; j++)
00298 {
00299 if (i*mkl_stepsize+j<get_degree()*length)
00300 weights[i*mkl_stepsize+j] = weights2[i];
00301 }
00302 }
00303 }
00304 }
00305
00306
00312 float64_t *compute_abs_weights(int32_t & len);
00313
00320 void compute_by_tree(int32_t idx, float64_t *LevelContrib);
00321
00326 bool is_tree_initialized() { return tree_initialized; }
00327
00333 inline float64_t *get_degree_weights(int32_t& d, int32_t& len)
00334 {
00335 d=degree;
00336 len=length;
00337 return weights;
00338 }
00339
00345 inline float64_t *get_weights(int32_t& num_weights)
00346 {
00347 if (position_weights!=NULL)
00348 {
00349 num_weights = seq_length ;
00350 return position_weights ;
00351 }
00352 if (length==0)
00353 num_weights = degree ;
00354 else
00355 num_weights = degree*length ;
00356 return weights;
00357 }
00358
00364 inline float64_t *get_position_weights(int32_t& len)
00365 {
00366 len=seq_length;
00367 return position_weights;
00368 }
00369
00375 bool set_wd_weights_by_type(EWDKernType type);
00376
00383 void set_wd_weights(float64_t* p_weights, int32_t d)
00384 {
00385 set_weights(p_weights,d,0);
00386 }
00387
00394 bool set_weights(float64_t* weights, int32_t d, int32_t len);
00395
00402 bool set_position_weights(float64_t* position_weights, int32_t len=0);
00403
00408 bool init_block_weights();
00409
00414 bool init_block_weights_from_wd();
00415
00420 bool init_block_weights_from_wd_external();
00421
00426 bool init_block_weights_const();
00427
00432 bool init_block_weights_linear();
00433
00438 bool init_block_weights_sqpoly();
00439
00444 bool init_block_weights_cubicpoly();
00445
00450 bool init_block_weights_exp();
00451
00456 bool init_block_weights_log();
00457
00462 bool init_block_weights_external();
00463
00468 bool delete_position_weights()
00469 {
00470 delete[] position_weights;
00471 position_weights=NULL;
00472 return true;
00473 }
00474
00480 bool set_max_mismatch(int32_t max);
00481
00486 inline int32_t get_max_mismatch() { return max_mismatch; }
00487
00493 inline bool set_degree(int32_t deg) { degree=deg; return true; }
00494
00499 inline int32_t get_degree() { return degree; }
00500
00506 inline bool set_use_block_computation(bool block)
00507 {
00508 block_computation=block;
00509 return true;
00510 }
00511
00516 inline bool get_use_block_computation() { return block_computation; }
00517
00523 inline bool set_mkl_stepsize(int32_t step)
00524 {
00525 mkl_stepsize=step;
00526 return true;
00527 }
00528
00533 inline int32_t get_mkl_stepsize() { return mkl_stepsize; }
00534
00540 inline bool set_which_degree(int32_t which)
00541 {
00542 which_degree=which;
00543 return true;
00544 }
00545
00550 inline int32_t get_which_degree() { return which_degree; }
00551
00552 protected:
00554 void create_empty_tries();
00555
00561 void add_example_to_tree(int32_t idx, float64_t weight);
00562
00569 void add_example_to_single_tree(
00570 int32_t idx, float64_t weight, int32_t tree_num);
00571
00577 void add_example_to_tree_mismatch(int32_t idx, float64_t weight);
00578
00585 void add_example_to_single_tree_mismatch(
00586 int32_t idx, float64_t weight, int32_t tree_num);
00587
00593 float64_t compute_by_tree(int32_t idx);
00594
00603 float64_t compute(int32_t idx_a, int32_t idx_b);
00604
00613 float64_t compute_with_mismatch(
00614 char* avec, int32_t alen, char* bvec, int32_t blen);
00615
00624 float64_t compute_without_mismatch(
00625 char* avec, int32_t alen, char* bvec, int32_t blen);
00626
00635 float64_t compute_without_mismatch_matrix(
00636 char* avec, int32_t alen, char* bvec, int32_t blen);
00637
00646 float64_t compute_using_block(char* avec, int32_t alen,
00647 char* bvec, int32_t blen);
00648
00650 virtual void remove_lhs();
00651
00652 protected:
00656 float64_t* weights;
00658 float64_t* position_weights;
00660 float64_t* weights_buffer;
00662 int32_t mkl_stepsize;
00664 int32_t degree;
00666 int32_t length;
00667
00669 int32_t max_mismatch;
00671 int32_t seq_length;
00672
00674 bool initialized;
00675
00677 bool block_computation;
00678
00680 int32_t num_block_weights_external;
00682 float64_t* block_weights_external;
00683
00685 float64_t* block_weights;
00687 EWDKernType type;
00689 int32_t which_degree;
00690
00692 CTrie<DNATrie>* tries;
00693
00695 bool tree_initialized;
00696
00698 CAlphabet* alphabet;
00699 };
00700
00701 #endif