ShortFeatures.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2008 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Copyright (C) 1999-2008 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #include "features/ShortFeatures.h"
00013 #include "features/CharFeatures.h"
00014 
00015 CShortFeatures::CShortFeatures(INT size)
00016 : CSimpleFeatures<SHORT>(size)
00017 {
00018 }
00019 
00020 CShortFeatures::CShortFeatures(const CShortFeatures & orig)
00021 : CSimpleFeatures<SHORT>(orig)
00022 {
00023 }
00024 
00025 CShortFeatures::CShortFeatures(CHAR* fname)
00026 : CSimpleFeatures<SHORT>(fname)
00027 {
00028 }
00029 
00030 bool CShortFeatures::obtain_from_char_features(CCharFeatures* cf, INT start, INT order, INT gap)
00031 {
00032     ASSERT(cf);
00033 
00034     num_vectors=cf->get_num_vectors();
00035     num_features=cf->get_num_features();
00036 
00037     CAlphabet* alpha=cf->get_alphabet();
00038     ASSERT(alpha);
00039 
00040     INT len=num_vectors*num_features;
00041     free_feature_matrix();
00042     feature_matrix=new SHORT[len];
00043     INT num_cf_feat=0;
00044     INT num_cf_vec=0;
00045     CHAR* fm=cf->get_feature_matrix(num_cf_feat, num_cf_vec);
00046 
00047     ASSERT(num_cf_vec==num_vectors);
00048     ASSERT(num_cf_feat==num_features);
00049 
00050     INT max_val=0;
00051     for (INT i=0; i<len; i++)
00052     {
00053         feature_matrix[i]=(SHORT) alpha->remap_to_bin(fm[i]);
00054         max_val=CMath::max((INT) feature_matrix[i],max_val);
00055     }
00056 
00057     for (INT line=0; line<num_vectors; line++)
00058         translate_from_single_order(&feature_matrix[line*num_features], num_features, start+gap, order+gap, max_val, gap);
00059 
00060     if (start+gap!=0)
00061     {
00062         // condensing feature matrix ...
00063         ASSERT(start+gap>=0);
00064         for (INT line=0; line<num_vectors; line++)
00065             for (INT j=0; j<num_features-start-gap; j++)
00066                 feature_matrix[line*(num_features-(start+gap))+j]=feature_matrix[line*num_features+j] ;
00067         num_features=num_features-(start+gap) ;
00068     }
00069     
00070     return true;
00071 }
00072 
00073 
00074 void CShortFeatures::translate_from_single_order(SHORT* obs, INT sequence_length, INT start, INT order, INT max_val, INT gap)
00075 {
00076     ASSERT(gap>=0);
00077 
00078     const INT start_gap = (order - gap)/2 ;
00079     const INT end_gap = start_gap + gap ;
00080 
00081     INT i,j;
00082     SHORT value=0;
00083 
00084     // almost all positions
00085     for (i=sequence_length-1; i>= ((int) order)-1; i--) //convert interval of size T
00086     {
00087         value=0;
00088         for (j=i; j>=i-((int) order)+1; j--)
00089         {
00090             if (i-j<start_gap)
00091                 value= (value >> max_val) | (obs[j] << (max_val * (order-1-gap)));
00092             else if (i-j>=end_gap)
00093                 value= (value >> max_val) | (obs[j] << (max_val * (order-1-gap)));
00094         }
00095         obs[i]=value;
00096     }
00097     
00098     // the remaining `order` positions
00099     for (i=order-2;i>=0;i--)
00100     {
00101         value=0;
00102         for (j=i; j>=i-order+1; j--)
00103         {
00104             if (i-j<start_gap)
00105             {
00106                 value= (value >> max_val);
00107                 if (j>=0)
00108                     value|=obs[j] << (max_val * (order-1-gap));
00109             } 
00110             else if (i-j>=end_gap)
00111                 {
00112                     value= (value >> max_val);
00113                     if (j>=0)
00114                         value|=obs[j] << (max_val * (order-1-gap));
00115                 }
00116         }
00117         obs[i]=value;
00118     }
00119 
00120     for (i=start; i<sequence_length; i++)   
00121         obs[i-start]=obs[i];
00122 }
00123 
00124 bool CShortFeatures::load(CHAR* fname)
00125 {
00126     return false;
00127 }
00128 
00129 bool CShortFeatures::save(CHAR* fname)
00130 {
00131     return false;
00132 }

SHOGUN Machine Learning Toolbox - Documentation