Alphabet.h
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #ifndef _CALPHABET__H__
00012 #define _CALPHABET__H__
00013
00014 #include "lib/Mathematics.h"
00015 #include "base/SGObject.h"
00016 #include "lib/common.h"
00017
00019 class CAlphabet : public CSGObject
00020 {
00021 public:
00027 CAlphabet(CHAR* alpha, INT len);
00028
00033 CAlphabet(E_ALPHABET alpha);
00034
00039 CAlphabet(CAlphabet* alpha);
00040 ~CAlphabet();
00041
00046 bool set_alphabet(E_ALPHABET alpha);
00047
00052 inline E_ALPHABET get_alphabet()
00053 {
00054 return alphabet;
00055 }
00056
00061 inline INT get_num_symbols()
00062 {
00063 return num_symbols;
00064 }
00065
00071 inline INT get_num_bits()
00072 {
00073 return num_bits;
00074 }
00075
00081 inline BYTE remap_to_bin(BYTE c)
00082 {
00083 return maptable_to_bin[c];
00084 }
00085
00091 inline BYTE remap_to_char(BYTE c)
00092 {
00093 return maptable_to_char[c];
00094 }
00095
00097 void clear_histogram();
00098
00104 void add_string_to_histogram(CHAR* p, LONG len);
00105
00111 void add_string_to_histogram(BYTE* p, LONG len);
00112
00118 void add_string_to_histogram(SHORT* p, LONG len);
00119
00125 void add_string_to_histogram(WORD* p, LONG len);
00126
00132 void add_string_to_histogram(INT* p, LONG len);
00133
00139 void add_string_to_histogram(UINT* p, LONG len);
00140
00146 void add_string_to_histogram(LONG* p, LONG len);
00147
00153 void add_string_to_histogram(ULONG* p, LONG len);
00154
00159 inline void add_byte_to_histogram(BYTE p)
00160 {
00161 histogram[(INT) p]++;
00162 }
00163
00165 void print_histogram();
00166
00172 inline void get_hist(LONG** h, INT* len)
00173 {
00174 INT hist_size=(1 << (sizeof(BYTE)*8));
00175 ASSERT(h && len);
00176 *h=(LONG*) malloc(sizeof(LONG)*hist_size);
00177 ASSERT(*h);
00178 *len=hist_size;
00179 ASSERT(*len);
00180 memcpy(*h, &histogram[0], sizeof(LONG)*hist_size);
00181 }
00182
00184 inline const LONG* get_histogram()
00185 {
00186 return &histogram[0];
00187 }
00188
00195 bool check_alphabet(bool print_error=true);
00196
00202 bool check_alphabet_size(bool print_error=true);
00203
00208 INT get_num_symbols_in_histogram();
00209
00214 INT get_max_value_in_histogram();
00215
00222 INT get_num_bits_in_histogram();
00223
00228 static const CHAR* get_alphabet_name(E_ALPHABET alphabet);
00229
00230 protected:
00232 void init_map_table();
00233
00238 void copy_histogram(CAlphabet* src);
00239
00240 public:
00242 static const BYTE B_A;
00244 static const BYTE B_C;
00246 static const BYTE B_G;
00248 static const BYTE B_T;
00250 static const BYTE MAPTABLE_UNDEF;
00252 static const CHAR* alphabet_names[11];
00253
00254 protected:
00256 E_ALPHABET alphabet;
00258 INT num_symbols;
00260 INT num_bits;
00262 BYTE valid_chars[1 << (sizeof(BYTE)*8)];
00264 BYTE maptable_to_bin[1 << (sizeof(BYTE)*8)];
00266 BYTE maptable_to_char[1 << (sizeof(BYTE)*8)];
00268 LONG histogram[1 << (sizeof(BYTE)*8)];
00269 };
00270 #endif