Alphabet.h

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2006-2008 Soeren Sonnenburg
00008  * Copyright (C) 2006-2008 Fraunhofer Institute FIRST and Max-Planck-Society
00009  */
00010 
00011 #ifndef _CALPHABET__H__
00012 #define _CALPHABET__H__
00013 
00014 #include "lib/Mathematics.h"
00015 #include "base/SGObject.h"
00016 #include "lib/common.h"
00017 
00019 class CAlphabet : public CSGObject
00020 {
00021     public:
00027         CAlphabet(CHAR* alpha, INT len);
00028 
00033         CAlphabet(E_ALPHABET alpha);
00034 
00039         CAlphabet(CAlphabet* alpha);
00040         ~CAlphabet();
00041 
00046         bool set_alphabet(E_ALPHABET alpha);
00047 
00052         inline E_ALPHABET get_alphabet()
00053         {
00054             return alphabet;
00055         }
00056 
00061         inline INT get_num_symbols()
00062         {
00063             return num_symbols;
00064         }
00065 
00071         inline INT get_num_bits()
00072         {
00073             return num_bits;
00074         }
00075 
00081         inline BYTE remap_to_bin(BYTE c)
00082         {
00083             return maptable_to_bin[c];
00084         }
00085 
00091         inline BYTE remap_to_char(BYTE c)
00092         {
00093             return maptable_to_char[c];
00094         }
00095 
00097         void clear_histogram();
00098 
00104         void add_string_to_histogram(CHAR* p, LONG len);
00105 
00111         void add_string_to_histogram(BYTE* p, LONG len);
00112 
00118         void add_string_to_histogram(SHORT* p, LONG len);
00119 
00125         void add_string_to_histogram(WORD* p, LONG len);
00126 
00132         void add_string_to_histogram(INT* p, LONG len);
00133 
00139         void add_string_to_histogram(UINT* p, LONG len);
00140 
00146         void add_string_to_histogram(LONG* p, LONG len);
00147 
00153         void add_string_to_histogram(ULONG* p, LONG len);
00154 
00159         inline void add_byte_to_histogram(BYTE p)
00160         {
00161             histogram[(INT) p]++;
00162         }
00163 
00165         void print_histogram();
00166 
00172         inline void get_hist(LONG** h, INT* len)
00173         {
00174             INT hist_size=(1 << (sizeof(BYTE)*8));
00175             ASSERT(h && len);
00176             *h=(LONG*) malloc(sizeof(LONG)*hist_size);
00177             ASSERT(*h);
00178             *len=hist_size;
00179             ASSERT(*len);
00180             memcpy(*h, &histogram[0], sizeof(LONG)*hist_size);
00181         }
00182 
00184         inline const LONG* get_histogram()
00185         {
00186             return &histogram[0];
00187         }
00188 
00195         bool check_alphabet(bool print_error=true);
00196 
00202         bool check_alphabet_size(bool print_error=true);
00203 
00208         INT get_num_symbols_in_histogram();
00209 
00214         INT get_max_value_in_histogram();
00215 
00222         INT get_num_bits_in_histogram();
00223 
00228         static const CHAR* get_alphabet_name(E_ALPHABET alphabet);
00229 
00230     protected:
00232         void init_map_table();
00233 
00238         void copy_histogram(CAlphabet* src);
00239 
00240     public:
00242         static const BYTE B_A;
00244         static const BYTE B_C;
00246         static const BYTE B_G;
00248         static const BYTE B_T;
00250         static const BYTE MAPTABLE_UNDEF;
00252         static const CHAR* alphabet_names[11];
00253 
00254     protected:
00256         E_ALPHABET alphabet;
00258         INT num_symbols;
00260         INT num_bits;
00262         BYTE valid_chars[1 << (sizeof(BYTE)*8)];
00264         BYTE maptable_to_bin[1 << (sizeof(BYTE)*8)];
00266         BYTE maptable_to_char[1 << (sizeof(BYTE)*8)];
00268         LONG histogram[1 << (sizeof(BYTE)*8)];
00269 };
00270 #endif

SHOGUN Machine Learning Toolbox - Documentation