00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #ifndef _WEIGHTEDDEGREEPOSITIONSTRINGKERNEL_H___
00013 #define _WEIGHTEDDEGREEPOSITIONSTRINGKERNEL_H___
00014
00015 #include "lib/common.h"
00016 #include "kernel/StringKernel.h"
00017 #include "lib/Trie.h"
00018
00019 class CSVM ;
00020
00022 class CWeightedDegreePositionStringKernel: public CStringKernel<CHAR>
00023 {
00024 public:
00033 CWeightedDegreePositionStringKernel(INT size, INT degree,
00034 INT max_mismatch=0, bool use_norm=true, INT mkl_stepsize=1);
00035
00047 CWeightedDegreePositionStringKernel(INT size, DREAL* weights,
00048 INT degree, INT max_mismatch, INT* shift, INT shift_len,
00049 bool use_norm=true, INT mkl_stepsize=1);
00050
00057 CWeightedDegreePositionStringKernel(
00058 CStringFeatures<CHAR>* l, CStringFeatures<CHAR>* r,
00059 INT degree);
00060
00061 virtual ~CWeightedDegreePositionStringKernel();
00062
00069 virtual bool init(CFeatures* l, CFeatures* r);
00070
00072 virtual void cleanup();
00073
00079 bool load_init(FILE* src);
00080
00086 bool save_init(FILE* dest);
00087
00092 virtual EKernelType get_kernel_type() { return K_WEIGHTEDDEGREEPOS; }
00093
00098 virtual const CHAR* get_name() { return "WeightedDegreePos" ; } ;
00099
00107 inline virtual bool init_optimization(INT p_count, INT *IDX, DREAL * alphas)
00108 {
00109 return init_optimization(p_count, IDX, alphas, -1);
00110 }
00111
00123 virtual bool init_optimization(INT count, INT *IDX, DREAL * alphas,
00124 INT tree_num, INT upto_tree=-1);
00125
00130 virtual bool delete_optimization();
00131
00137 inline virtual DREAL compute_optimized(INT idx)
00138 {
00139 ASSERT(get_is_initialized());
00140 ASSERT(alphabet);
00141 ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
00142 return compute_by_tree(idx);
00143 }
00144
00149 static void* compute_batch_helper(void* p);
00150
00161 virtual void compute_batch(INT num_vec, INT* vec_idx, DREAL* target,
00162 INT num_suppvec, INT* IDX, DREAL* alphas, DREAL factor=1.0);
00163
00167 inline virtual void clear_normal()
00168 {
00169 if ((opt_type==FASTBUTMEMHUNGRY) && (tries.get_use_compact_terminal_nodes()))
00170 {
00171 tries.set_use_compact_terminal_nodes(false) ;
00172 SG_DEBUG( "disabling compact trie nodes with FASTBUTMEMHUNGRY\n") ;
00173 }
00174
00175 if (get_is_initialized())
00176 {
00177 if (opt_type==SLOWBUTMEMEFFICIENT)
00178 tries.delete_trees(true);
00179 else if (opt_type==FASTBUTMEMHUNGRY)
00180 tries.delete_trees(false);
00181 else
00182 SG_ERROR( "unknown optimization type\n");
00183
00184 set_is_initialized(false);
00185 }
00186 }
00187
00193 inline virtual void add_to_normal(INT idx, DREAL weight)
00194 {
00195 add_example_to_tree(idx, weight);
00196 set_is_initialized(true);
00197 }
00198
00203 inline virtual INT get_num_subkernels()
00204 {
00205 if (position_weights!=NULL)
00206 return (INT) ceil(1.0*seq_length/mkl_stepsize) ;
00207 if (length==0)
00208 return (INT) ceil(1.0*get_degree()/mkl_stepsize);
00209 return (INT) ceil(1.0*get_degree()*length/mkl_stepsize) ;
00210 }
00211
00217 inline void compute_by_subkernel(INT idx, DREAL * subkernel_contrib)
00218 {
00219 if (get_is_initialized())
00220 {
00221 compute_by_tree(idx, subkernel_contrib);
00222 return ;
00223 }
00224
00225 SG_ERROR( "CWeightedDegreePositionStringKernel optimization not initialized\n") ;
00226 }
00227
00233 inline const DREAL* get_subkernel_weights(INT& num_weights)
00234 {
00235 num_weights = get_num_subkernels() ;
00236
00237 delete[] weights_buffer ;
00238 weights_buffer = new DREAL[num_weights] ;
00239
00240 if (position_weights!=NULL)
00241 for (INT i=0; i<num_weights; i++)
00242 weights_buffer[i] = position_weights[i*mkl_stepsize] ;
00243 else
00244 for (INT i=0; i<num_weights; i++)
00245 weights_buffer[i] = weights[i*mkl_stepsize] ;
00246
00247 return weights_buffer ;
00248 }
00249
00255 inline void set_subkernel_weights(DREAL* weights2, INT num_weights2)
00256 {
00257 INT num_weights = get_num_subkernels() ;
00258 if (num_weights!=num_weights2)
00259 SG_ERROR( "number of weights do not match\n") ;
00260
00261 if (position_weights!=NULL)
00262 for (INT i=0; i<num_weights; i++)
00263 for (INT j=0; j<mkl_stepsize; j++)
00264 {
00265 if (i*mkl_stepsize+j<seq_length)
00266 position_weights[i*mkl_stepsize+j] = weights2[i] ;
00267 }
00268 else if (length==0)
00269 {
00270 for (INT i=0; i<num_weights; i++)
00271 for (INT j=0; j<mkl_stepsize; j++)
00272 if (i*mkl_stepsize+j<get_degree())
00273 weights[i*mkl_stepsize+j] = weights2[i] ;
00274 }
00275 else
00276 {
00277 for (INT i=0; i<num_weights; i++)
00278 for (INT j=0; j<mkl_stepsize; j++)
00279 if (i*mkl_stepsize+j<get_degree()*length)
00280 weights[i*mkl_stepsize+j] = weights2[i] ;
00281 }
00282 }
00283
00284
00290 DREAL *compute_abs_weights(INT & len);
00291
00296 bool is_tree_initialized() { return tree_initialized; }
00297
00302 inline INT get_max_mismatch() { return max_mismatch; }
00303
00308 inline INT get_degree() { return degree; }
00309
00314 inline DREAL get_normalization_const() { return normalization_const; }
00315
00321 inline DREAL *get_degree_weights(INT& d, INT& len)
00322 {
00323 d=degree;
00324 len=length;
00325 return weights;
00326 }
00327
00333 inline DREAL *get_weights(INT& num_weights)
00334 {
00335 if (position_weights!=NULL)
00336 {
00337 num_weights = seq_length ;
00338 return position_weights ;
00339 }
00340 if (length==0)
00341 num_weights = degree ;
00342 else
00343 num_weights = degree*length ;
00344 return weights;
00345 }
00346
00352 inline DREAL *get_position_weights(INT& len)
00353 {
00354 len=seq_length;
00355 return position_weights;
00356 }
00357
00363 bool set_shifts(INT* shifts, INT len);
00364
00371 virtual bool set_weights(DREAL* weights, INT d, INT len=0);
00372
00377 virtual bool set_wd_weights();
00378
00385 virtual bool set_position_weights(DREAL* position_weights, INT len=0);
00386
00394 bool set_position_weights_lhs(DREAL* pws, INT len, INT num);
00395
00403 bool set_position_weights_rhs(DREAL* pws, INT len, INT num);
00404
00409 bool init_block_weights();
00410
00415 bool init_block_weights_from_wd();
00416
00421 bool init_block_weights_from_wd_external();
00422
00427 bool init_block_weights_const();
00428
00433 bool init_block_weights_linear();
00434
00439 bool init_block_weights_sqpoly();
00440
00445 bool init_block_weights_cubicpoly();
00446
00451 bool init_block_weights_exp();
00452
00457 bool init_block_weights_log();
00458
00463 bool init_block_weights_external();
00464
00469 bool delete_position_weights() { delete[] position_weights ; position_weights=NULL ; return true ; } ;
00470
00475 bool delete_position_weights_lhs() { delete[] position_weights_lhs ; position_weights_lhs=NULL ; return true ; } ;
00476
00481 bool delete_position_weights_rhs() { delete[] position_weights_rhs ; position_weights_rhs=NULL ; return true ; } ;
00482
00487 inline bool get_use_normalization() { return use_normalization; }
00488
00494 virtual DREAL compute_by_tree(INT idx);
00495
00501 virtual void compute_by_tree(INT idx, DREAL* LevelContrib);
00502
00515 DREAL* compute_scoring(INT max_degree, INT& num_feat, INT& num_sym,
00516 DREAL* target, INT num_suppvec, INT* IDX, DREAL* weights);
00517
00526 CHAR* compute_consensus(INT &num_feat, INT num_suppvec, INT* IDX,
00527 DREAL* alphas);
00528
00540 DREAL* extract_w( INT max_degree, INT& num_feat, INT& num_sym,
00541 DREAL* w_result, INT num_suppvec, INT* IDX, DREAL* alphas);
00542
00555 DREAL* compute_POIM( INT max_degree, INT& num_feat, INT& num_sym,
00556 DREAL* poim_result, INT num_suppvec, INT* IDX, DREAL* alphas, DREAL* distrib );
00557
00564 void prepare_POIM2(DREAL* distrib, INT num_sym, INT num_feat);
00565
00572 void compute_POIM2(INT max_degree, CSVM* svm);
00573
00579 void get_POIM2(DREAL** poim, INT* result_len);
00580
00582 void cleanup_POIM2();
00583
00584 protected:
00586 void create_empty_tries();
00587
00593 virtual void add_example_to_tree(INT idx, DREAL weight);
00594
00601 void add_example_to_single_tree(INT idx, DREAL weight, INT tree_num);
00602
00611 virtual DREAL compute(INT idx_a, INT idx_b);
00612
00621 DREAL compute_with_mismatch(CHAR* avec, INT alen,
00622 CHAR* bvec, INT blen);
00623
00632 DREAL compute_without_mismatch(CHAR* avec, INT alen,
00633 CHAR* bvec, INT blen);
00634
00643 DREAL compute_without_mismatch_matrix(CHAR* avec, INT alen,
00644 CHAR* bvec, INT blen);
00645
00656 DREAL compute_without_mismatch_position_weights(
00657 CHAR* avec, DREAL *posweights_lhs, INT alen,
00658 CHAR* bvec, DREAL *posweights_rhs, INT blen);
00659
00661 virtual void remove_lhs();
00662
00663 protected:
00665 DREAL* weights;
00667 DREAL* position_weights;
00669 DREAL* position_weights_lhs;
00671 DREAL* position_weights_rhs;
00673 bool* position_mask;
00674
00676 DREAL* weights_buffer;
00678 INT mkl_stepsize;
00679
00681 INT degree;
00683 INT length;
00684
00686 INT max_mismatch;
00688 INT seq_length;
00689
00691 INT *shift;
00693 INT shift_len;
00695 INT max_shift;
00696
00698 bool initialized;
00700 bool use_normalization;
00702 bool block_computation;
00703
00705 DREAL normalization_const;
00706
00708 INT num_block_weights_external;
00710 DREAL* block_weights_external;
00711
00713 DREAL* block_weights;
00715 EWDKernType type;
00717 INT which_degree;
00718
00720 CTrie<DNATrie> tries;
00722 CTrie<POIMTrie> poim_tries;
00723
00725 bool tree_initialized;
00727 bool use_poim_tries;
00728
00730 DREAL* m_poim_distrib;
00732 DREAL* m_poim;
00733
00735 INT m_poim_num_sym;
00737 INT m_poim_num_feat;
00739 INT m_poim_result_len;
00740
00742 CAlphabet* alphabet;
00743 };
00744 #endif