ShortFeatures.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2008 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Copyright (C) 1999-2008 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #include "features/ShortFeatures.h"
00013 #include "features/CharFeatures.h"
00014 
00015 CShortFeatures::CShortFeatures(int32_t size)
00016 : CSimpleFeatures<int16_t>(size)
00017 {
00018 }
00019 
00020 CShortFeatures::CShortFeatures(const CShortFeatures & orig)
00021 : CSimpleFeatures<int16_t>(orig)
00022 {
00023 }
00024 
00025 CShortFeatures::CShortFeatures(char* fname)
00026 : CSimpleFeatures<int16_t>(fname)
00027 {
00028 }
00029 
00030 bool CShortFeatures::obtain_from_char_features(
00031     CCharFeatures* cf, int32_t start, int32_t order, int32_t gap)
00032 {
00033     ASSERT(cf);
00034 
00035     num_vectors=cf->get_num_vectors();
00036     num_features=cf->get_num_features();
00037 
00038     CAlphabet* alpha=cf->get_alphabet();
00039     ASSERT(alpha);
00040 
00041     int32_t len=num_vectors*num_features;
00042     free_feature_matrix();
00043     feature_matrix=new int16_t[len];
00044     int32_t num_cf_feat=0;
00045     int32_t num_cf_vec=0;
00046     char* fm=cf->get_feature_matrix(num_cf_feat, num_cf_vec);
00047 
00048     ASSERT(num_cf_vec==num_vectors);
00049     ASSERT(num_cf_feat==num_features);
00050 
00051     int32_t max_val=0;
00052     for (int32_t i=0; i<len; i++)
00053     {
00054         feature_matrix[i]=(int16_t) alpha->remap_to_bin(fm[i]);
00055         max_val=CMath::max((int32_t) feature_matrix[i],max_val);
00056     }
00057 
00058     for (int32_t line=0; line<num_vectors; line++)
00059         translate_from_single_order(&feature_matrix[line*num_features], num_features, start+gap, order+gap, max_val, gap);
00060 
00061     if (start+gap!=0)
00062     {
00063         // condensing feature matrix ...
00064         ASSERT(start+gap>=0);
00065         for (int32_t line=0; line<num_vectors; line++)
00066             for (int32_t j=0; j<num_features-start-gap; j++)
00067                 feature_matrix[line*(num_features-(start+gap))+j]=feature_matrix[line*num_features+j] ;
00068         num_features=num_features-(start+gap) ;
00069     }
00070     
00071     return true;
00072 }
00073 
00074 
00075 void CShortFeatures::translate_from_single_order(
00076     int16_t* obs, int32_t sequence_length, int32_t start, int32_t order,
00077     int32_t max_val, int32_t gap)
00078 {
00079     ASSERT(gap>=0);
00080 
00081     const int32_t start_gap = (order - gap)/2;
00082     const int32_t end_gap = start_gap + gap;
00083     int32_t i,j;
00084     int16_t value=0;
00085 
00086     // almost all positions
00087     for (i=sequence_length-1; i>=order-1; i--) //convert interval of size T
00088     {
00089         value=0;
00090         for (j=i; j>=i-order+1; j--)
00091         {
00092             if (i-j<start_gap)
00093                 value= (value >> max_val) | (obs[j] << (max_val * (order-1-gap)));
00094             else if (i-j>=end_gap)
00095                 value= (value >> max_val) | (obs[j] << (max_val * (order-1-gap)));
00096         }
00097         obs[i]=value;
00098     }
00099     
00100     // the remaining `order` positions
00101     for (i=order-2;i>=0;i--)
00102     {
00103         value=0;
00104         for (j=i; j>=i-order+1; j--)
00105         {
00106             if (i-j<start_gap)
00107             {
00108                 value= (value >> max_val);
00109                 if (j>=0)
00110                     value|=obs[j] << (max_val * (order-1-gap));
00111             } 
00112             else if (i-j>=end_gap)
00113                 {
00114                     value= (value >> max_val);
00115                     if (j>=0)
00116                         value|=obs[j] << (max_val * (order-1-gap));
00117                 }
00118         }
00119         obs[i]=value;
00120     }
00121 
00122     for (i=start; i<sequence_length; i++)   
00123         obs[i-start]=obs[i];
00124 }
00125 
00126 bool CShortFeatures::load(char* fname)
00127 {
00128     return false;
00129 }
00130 
00131 bool CShortFeatures::save(char* fname)
00132 {
00133     return false;
00134 }

SHOGUN Machine Learning Toolbox - Documentation