WordFeatures.h

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2008 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Copyright (C) 1999-2008 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #ifndef _WORDFEATURES__H__
00013 #define _WORDFEATURES__H__
00014 
00015 #include "features/SimpleFeatures.h"
00016 #include "features/CharFeatures.h"
00017 #include "lib/common.h"
00018 
00023 class CWordFeatures : public CSimpleFeatures<uint16_t>
00024 {
00025     public:
00031         CWordFeatures(int32_t size=0, int32_t num_symbols=(1<<16));
00032 
00034         CWordFeatures(const CWordFeatures & orig);
00035 
00043         inline CWordFeatures(uint16_t* src, int32_t num_feat, int32_t num_vec):
00044             CSimpleFeatures<uint16_t>(0), num_symbols(1<<16),
00045             original_num_symbols(1<<16), order(0), symbol_mask_table(NULL)
00046         {
00047             CSimpleFeatures<uint16_t>::copy_feature_matrix(src, num_feat, num_vec);
00048         }
00049 
00055         CWordFeatures(char* fname, int32_t num_sym = (1<<16));
00056 
00057         virtual ~CWordFeatures();
00058 
00067         bool obtain_from_char_features(CCharFeatures* cf, int32_t start, int32_t order, int32_t gap=0);
00068 
00075         inline virtual void get_fm(uint16_t** dst, int32_t* d1, int32_t* d2)
00076         {
00077             CSimpleFeatures<uint16_t>::get_fm(dst, d1, d2);
00078         }
00079 
00080 
00089         inline virtual void copy_feature_matrix(uint16_t* src, int32_t num_feat, int32_t num_vec)
00090         {
00091             CSimpleFeatures<uint16_t>::copy_feature_matrix(src, num_feat, num_vec);
00092         }
00093 
00099         virtual bool load(char* fname);
00100 
00106         virtual bool save(char* fname);
00107 
00112         inline int32_t get_num_symbols() { return num_symbols; }
00113 
00114         // these functions are necessary to find out about a former conversion process
00115 
00120         inline int32_t get_original_num_symbols() { return original_num_symbols; }
00121 
00126         inline int32_t get_order() { return order; }
00127 
00135         inline uint16_t get_masked_symbols(uint16_t symbol, uint8_t mask)
00136         {
00137             ASSERT(symbol_mask_table);
00138             return symbol_mask_table[mask] & symbol;
00139         }
00140 
00141     protected:
00152         void translate_from_single_order(uint16_t* obs, int32_t sequence_length, int32_t start, int32_t order, int32_t max_val, int32_t gap=0);
00153 
00154     protected:
00156         int32_t num_symbols;
00157 
00159         int32_t original_num_symbols;
00160 
00162         int32_t order;
00163 
00165         uint16_t* symbol_mask_table;
00166 };
00167 #endif

SHOGUN Machine Learning Toolbox - Documentation