WordFeatures.h

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2008 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Copyright (C) 1999-2008 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #ifndef _WORDFEATURES__H__
00013 #define _WORDFEATURES__H__
00014 
00015 #include "features/SimpleFeatures.h"
00016 #include "features/CharFeatures.h"
00017 #include "lib/common.h"
00018 
00020 class CWordFeatures : public CSimpleFeatures<WORD>
00021 {
00022     public:
00028         CWordFeatures(INT size=0, INT num_symbols=(1<<16));
00029 
00031         CWordFeatures(const CWordFeatures & orig);
00032 
00040         inline CWordFeatures(WORD* src, INT num_feat, INT num_vec):
00041             CSimpleFeatures<WORD>(0), num_symbols(1<<16),
00042             original_num_symbols(1<<16), order(0), symbol_mask_table(NULL)
00043         {
00044             CSimpleFeatures<WORD>::copy_feature_matrix(src, num_feat, num_vec);
00045         }
00046 
00052         CWordFeatures(CHAR* fname, INT num_sym = (1<<16));
00053 
00054         virtual ~CWordFeatures();
00055 
00064         bool obtain_from_char_features(CCharFeatures* cf, INT start, INT order, INT gap=0);
00065 
00074         inline virtual void copy_feature_matrix(WORD* src, INT num_feat, INT num_vec)
00075         {
00076             CSimpleFeatures<WORD>::copy_feature_matrix(src, num_feat, num_vec);
00077         }
00078 
00084         virtual bool load(CHAR* fname);
00085 
00091         virtual bool save(CHAR* fname);
00092 
00097         inline INT get_num_symbols() { return num_symbols; }
00098 
00099         // these functions are necessary to find out about a former conversion process
00100 
00105         inline INT get_original_num_symbols() { return original_num_symbols; }
00106 
00111         inline INT get_order() { return order; }
00112 
00120         inline WORD get_masked_symbols(WORD symbol, BYTE mask)
00121         {
00122             ASSERT(symbol_mask_table);
00123             return symbol_mask_table[mask] & symbol;
00124         }
00125 
00126     protected:
00137         void translate_from_single_order(WORD* obs, INT sequence_length, INT start, INT order, INT max_val, INT gap=0);
00138 
00139     protected:
00141         INT num_symbols;
00142 
00144         INT original_num_symbols;
00145 
00147         INT order;
00148 
00150         WORD* symbol_mask_table;
00151 };
00152 #endif

SHOGUN Machine Learning Toolbox - Documentation