CStringFeatures< ST > Class Template Reference

Inheritance diagram for CStringFeatures< ST >:

Inheritance graph
[legend]

List of all members.

Public Member Functions

 CStringFeatures (EAlphabet alpha)
 CStringFeatures (CAlphabet *alpha)
 CStringFeatures (const CStringFeatures &orig)
 CStringFeatures (char *fname, EAlphabet alpha=DNA)
virtual ~CStringFeatures ()
void cleanup ()
virtual EFeatureClass get_feature_class ()
virtual EFeatureType get_feature_type ()
CAlphabetget_alphabet ()
virtual CFeaturesduplicate () const
void select_feature_vector (int32_t num)
void get_string (ST **dst, int32_t *len)
virtual ST * get_feature_vector (int32_t num, int32_t &len)
virtual void set_feature_vector (int32_t num, ST *string, int32_t len)
virtual ST get_feature (int32_t vec_num, int32_t feat_num)
virtual int32_t get_vector_length (int32_t vec_num)
virtual int32_t get_max_vector_length ()
virtual int32_t get_num_vectors ()
float128_t get_num_symbols ()
float128_t get_max_num_symbols ()
float128_t get_original_num_symbols ()
int32_t get_order ()
ST get_masked_symbols (ST symbol, uint8_t mask)
ST shift_offset (ST offset, int32_t amount)
ST shift_symbol (ST symbol, int32_t amount)
virtual bool load (char *fname)
bool load_dna_file (char *fname, bool remap_to_bin=true)
bool load_from_directory (char *dirname)
bool set_features (T_STRING< ST > *p_features, int32_t p_num_vectors, int32_t p_max_string_length)
virtual T_STRING< ST > * get_features (int32_t &num_str, int32_t &max_str_len)
virtual bool save (char *dest)
virtual int32_t get_size ()
virtual bool apply_preproc (bool force_preprocessing=false)
int32_t obtain_by_sliding_window (int32_t window_size, int32_t step_size, int32_t skip=0)
int32_t obtain_by_position_list (int32_t window_size, CDynamicArray< int32_t > *positions, int32_t skip=0)
bool obtain_from_char (CStringFeatures< char > *sf, int32_t start, int32_t p_order, int32_t gap, bool rev)
template<class CT >
bool obtain_from_char_features (CStringFeatures< CT > *sf, int32_t start, int32_t p_order, int32_t gap, bool rev)
bool have_same_length (int32_t len)
template<>
EFeatureType get_feature_type ()
template<>
EFeatureType get_feature_type ()
template<>
EFeatureType get_feature_type ()
template<>
EFeatureType get_feature_type ()
template<>
EFeatureType get_feature_type ()
template<>
EFeatureType get_feature_type ()
template<>
EFeatureType get_feature_type ()

Protected Member Functions

void translate_from_single_order (ST *obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val)
void translate_from_single_order_reversed (ST *obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val)
void translate_from_single_order (ST *obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap)
void translate_from_single_order_reversed (ST *obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap)

Protected Attributes

CAlphabetalphabet
 alphabet
int32_t num_vectors
 number of string vectors
T_STRING< ST > * features
 this contains the array of features.
ST * single_string
 true when single string / created by sliding window
int32_t length_of_single_string
 length of prior single string
int32_t max_string_length
 length of longest string
float128_t num_symbols
 number of used symbols
float128_t original_num_symbols
 original number of used symbols (before higher order mapping)
int32_t order
 order used in higher order mapping
int32_t selected_vector
 vector to be obtained via get_string
ST * symbol_mask_table
 order used in higher order mapping


Detailed Description

template<class ST>
class CStringFeatures< ST >

Template class StringFeatures implements a list of strings. As this class is template the underlying storage type is quite arbitrary and not limited to character strings, but could also be sequences of floating point numbers etc. Strings differ from matrices (cf. CSimpleFeatures) in a way that the dimensionality of the feature vectors (i.e. the strings) is not fixed; it may vary between strings.

Most string kernels require StringFeatures but a number of them actually requires strings to have same length.

Note: StringFeatures do not support PreProcs

Definition at line 68 of file StringFeatures.h.


Constructor & Destructor Documentation

template<class ST>
CStringFeatures< ST >::CStringFeatures ( EAlphabet  alpha  ) 

constructor

Parameters:
alpha alphabet (type) to use for string features

Definition at line 75 of file StringFeatures.h.

template<class ST>
CStringFeatures< ST >::CStringFeatures ( CAlphabet alpha  ) 

constructor

Parameters:
alpha alphabet to use for string features

Definition at line 91 of file StringFeatures.h.

template<class ST>
CStringFeatures< ST >::CStringFeatures ( const CStringFeatures< ST > &  orig  ) 

copy constructor

Definition at line 104 of file StringFeatures.h.

template<class ST>
CStringFeatures< ST >::CStringFeatures ( char *  fname,
EAlphabet  alpha = DNA 
)

constructor

Parameters:
fname filename to load features from
alpha alphabet (type) to use for string features

Definition at line 144 of file StringFeatures.h.

template<class ST>
virtual CStringFeatures< ST >::~CStringFeatures (  )  [virtual]

Definition at line 157 of file StringFeatures.h.


Member Function Documentation

template<class ST>
virtual bool CStringFeatures< ST >::apply_preproc ( bool  force_preprocessing = false  )  [virtual]

apply preprocessor

Parameters:
force_preprocessing if preprocssing shall be forced
Returns:
if applying was successful

Definition at line 729 of file StringFeatures.h.

template<class ST>
void CStringFeatures< ST >::cleanup (  ) 

cleanup string features

Definition at line 169 of file StringFeatures.h.

template<class ST>
virtual CFeatures* CStringFeatures< ST >::duplicate (  )  const [virtual]

duplicate feature object

Returns:
feature object

Implements CFeatures.

Definition at line 217 of file StringFeatures.h.

template<class ST>
CAlphabet* CStringFeatures< ST >::get_alphabet (  ) 

get alphabet used in string features

Returns:
alphabet

Definition at line 207 of file StringFeatures.h.

template<class ST>
virtual ST CStringFeatures< ST >::get_feature ( int32_t  vec_num,
int32_t  feat_num 
) [virtual]

get feature

Parameters:
vec_num which vector
feat_num which feature
Returns:
feature

Definition at line 285 of file StringFeatures.h.

template<class ST>
virtual EFeatureClass CStringFeatures< ST >::get_feature_class (  )  [virtual]

get feature class

Returns:
feature class STRING

Implements CFeatures.

Definition at line 195 of file StringFeatures.h.

template<>
EFeatureType CStringFeatures< float64_t >::get_feature_type (  )  [virtual]

get feature type the DREAL feature can deal with

Returns:
feature type DREAL

Implements CFeatures.

Definition at line 1313 of file StringFeatures.h.

template<>
EFeatureType CStringFeatures< uint64_t >::get_feature_type (  )  [virtual]

get feature type the ULONG feature can deal with

Returns:
feature type ULONG

Implements CFeatures.

Definition at line 1304 of file StringFeatures.h.

template<>
EFeatureType CStringFeatures< int64_t >::get_feature_type (  )  [virtual]

get feature type the LONG feature can deal with

Returns:
feature type LONG

Implements CFeatures.

Definition at line 1295 of file StringFeatures.h.

template<>
EFeatureType CStringFeatures< uint32_t >::get_feature_type (  )  [virtual]

get feature type the INT feature can deal with

Returns:
feature type INT

Implements CFeatures.

Definition at line 1286 of file StringFeatures.h.

template<>
EFeatureType CStringFeatures< int32_t >::get_feature_type (  )  [virtual]

get feature type the INT feature can deal with

Returns:
feature type INT

Implements CFeatures.

Definition at line 1277 of file StringFeatures.h.

template<>
EFeatureType CStringFeatures< int16_t >::get_feature_type (  )  [virtual]

get feature type the SHORT feature can deal with

Returns:
feature type SHORT

Implements CFeatures.

Definition at line 1259 of file StringFeatures.h.

template<>
EFeatureType CStringFeatures< char >::get_feature_type (  )  [virtual]

get feature type the char feature can deal with

Returns:
feature type char

Implements CFeatures.

Definition at line 1241 of file StringFeatures.h.

template<class ST>
virtual EFeatureType CStringFeatures< ST >::get_feature_type (  )  [virtual]

get feature type

Returns:
templated feature type

Implements CFeatures.

template<class ST>
virtual ST* CStringFeatures< ST >::get_feature_vector ( int32_t  num,
int32_t &  len 
) [virtual]

get feature vector for sample num

Parameters:
num index of feature vector
len length is returned by reference
Returns:
feature vector for sample num

Definition at line 255 of file StringFeatures.h.

template<class ST>
virtual T_STRING<ST>* CStringFeatures< ST >::get_features ( int32_t &  num_str,
int32_t &  max_str_len 
) [virtual]

get_features

Parameters:
num_str number of strings (returned)
max_str_len maximal string length (returned)
Returns:
string features

Definition at line 701 of file StringFeatures.h.

template<class ST>
ST CStringFeatures< ST >::get_masked_symbols ( ST  symbol,
uint8_t  mask 
)

a higher order mapped symbol will be shaped such that the symbols specified by bits in the mask will be returned.

Parameters:
symbol symbol to mask
mask mask to apply
Returns:
masked symbol

Definition at line 356 of file StringFeatures.h.

template<class ST>
float128_t CStringFeatures< ST >::get_max_num_symbols (  ) 

get maximum number of symbols

Note: float128_t sounds weird, but LONG is not long enough

Returns:
maximum number of symbols

Definition at line 333 of file StringFeatures.h.

template<class ST>
virtual int32_t CStringFeatures< ST >::get_max_vector_length (  )  [virtual]

get maximum vector length

Returns:
maximum vector/string length

Definition at line 308 of file StringFeatures.h.

template<class ST>
float128_t CStringFeatures< ST >::get_num_symbols (  ) 

get number of symbols

Note: float128_t sounds weird, but LONG is not long enough

Returns:
number of symbols

Definition at line 325 of file StringFeatures.h.

template<class ST>
virtual int32_t CStringFeatures< ST >::get_num_vectors (  )  [virtual]

get number of vectors

Returns:
number of vectors

Implements CFeatures.

Definition at line 317 of file StringFeatures.h.

template<class ST>
int32_t CStringFeatures< ST >::get_order (  ) 

order used for higher order mapping

Returns:
order

Definition at line 347 of file StringFeatures.h.

template<class ST>
float128_t CStringFeatures< ST >::get_original_num_symbols (  ) 

number of symbols before higher order mapping

Returns:
original number of symbols

Definition at line 341 of file StringFeatures.h.

template<class ST>
virtual int32_t CStringFeatures< ST >::get_size (  )  [virtual]

get memory footprint of one feature

Returns:
memory footprint of one feature

Implements CFeatures.

Definition at line 722 of file StringFeatures.h.

template<class ST>
void CStringFeatures< ST >::get_string ( ST **  dst,
int32_t *  len 
)

get feature vector for selected example

Parameters:
dst destination where vector will be stored
len number of features in vector

Definition at line 239 of file StringFeatures.h.

template<class ST>
virtual int32_t CStringFeatures< ST >::get_vector_length ( int32_t  vec_num  )  [virtual]

get vector length

Parameters:
vec_num which vector
Returns:
length of vector

Definition at line 298 of file StringFeatures.h.

template<class ST>
bool CStringFeatures< ST >::have_same_length ( int32_t  len  ) 

check if length of each vector in this feature object equals the given length.

Parameters:
len vector length to check against
Returns:
if length of each vector in this feature object equals the given length.

Definition at line 974 of file StringFeatures.h.

template<class ST>
virtual bool CStringFeatures< ST >::load ( char *  fname  )  [virtual]

load features from file

Parameters:
fname filename to load from
Returns:
if loading was successful

Reimplemented from CFeatures.

Definition at line 391 of file StringFeatures.h.

template<class ST>
bool CStringFeatures< ST >::load_dna_file ( char *  fname,
bool  remap_to_bin = true 
)

load DNA features from file

Parameters:
fname filename to load from
remap_to_bin if remap_to_bin
Returns:
if loading was successful

Definition at line 451 of file StringFeatures.h.

template<class ST>
bool CStringFeatures< ST >::load_from_directory ( char *  dirname  ) 

load features from directory

Parameters:
dirname directory name to load from
Returns:
if loading was successful

Definition at line 584 of file StringFeatures.h.

template<class ST>
int32_t CStringFeatures< ST >::obtain_by_position_list ( int32_t  window_size,
CDynamicArray< int32_t > *  positions,
int32_t  skip = 0 
)

extracts windows of size window_size from first string using the positions in list

Parameters:
window_size window size
positions positions
skip skip
Returns:
something inty

Definition at line 800 of file StringFeatures.h.

template<class ST>
int32_t CStringFeatures< ST >::obtain_by_sliding_window ( int32_t  window_size,
int32_t  step_size,
int32_t  skip = 0 
)

slides a window of size window_size over the current single string step_size is the amount by which the window is shifted. creates (string_len-window_size)/step_size many feature obj if skip is nonzero, skip the first 'skip' characters of each string

Parameters:
window_size window size
step_size step size
skip skip
Returns:
something inty

Definition at line 757 of file StringFeatures.h.

template<class ST>
bool CStringFeatures< ST >::obtain_from_char ( CStringFeatures< char > *  sf,
int32_t  start,
int32_t  p_order,
int32_t  gap,
bool  rev 
)

obtain string features from char features

wrapper for template method

Parameters:
sf string features
start start
p_order order
gap gap
rev reverse
Returns:
if obtaining was successful

Definition at line 866 of file StringFeatures.h.

template<class ST>
template<class CT >
bool CStringFeatures< ST >::obtain_from_char_features ( CStringFeatures< CT > *  sf,
int32_t  start,
int32_t  p_order,
int32_t  gap,
bool  rev 
)

template obtain from char features

Parameters:
sf string features
start start
p_order order
gap gap
rev reverse
Returns:
if obtaining was successful

Definition at line 881 of file StringFeatures.h.

template<class ST>
virtual bool CStringFeatures< ST >::save ( char *  dest  )  [virtual]

save features to file

Parameters:
dest filename to save to
Returns:
if saving was successful

Reimplemented from CFeatures.

Definition at line 713 of file StringFeatures.h.

template<class ST>
void CStringFeatures< ST >::select_feature_vector ( int32_t  num  ) 

select feature vector

Parameters:
num which feature vector to select

Definition at line 226 of file StringFeatures.h.

template<class ST>
virtual void CStringFeatures< ST >::set_feature_vector ( int32_t  num,
ST *  string,
int32_t  len 
) [virtual]

set feature vector for sample num

Parameters:
num index of feature vector
string string with the feature vector's content
len length of the string

Definition at line 270 of file StringFeatures.h.

template<class ST>
bool CStringFeatures< ST >::set_features ( T_STRING< ST > *  p_features,
int32_t  p_num_vectors,
int32_t  p_max_string_length 
)

set features

Parameters:
p_features new features
p_num_vectors number of vectors
p_max_string_length maximum string length
Returns:
if setting was successful

Definition at line 657 of file StringFeatures.h.

template<class ST>
ST CStringFeatures< ST >::shift_offset ( ST  offset,
int32_t  amount 
)

shift offset to the left by amount

Parameters:
offset offset to shift
amount amount to shift the offset
Returns:
shifted offset

Definition at line 368 of file StringFeatures.h.

template<class ST>
ST CStringFeatures< ST >::shift_symbol ( ST  symbol,
int32_t  amount 
)

shift symbol to the right by amount (taking care of custom symbol sizes)

Parameters:
symbol symbol to shift
amount amount to shift the symbol
Returns:
shifted symbol

Definition at line 380 of file StringFeatures.h.

template<class ST>
void CStringFeatures< ST >::translate_from_single_order ( ST *  obs,
int32_t  sequence_length,
int32_t  start,
int32_t  p_order,
int32_t  max_val,
int32_t  gap 
) [protected]

translate from single order

Parameters:
obs observation
sequence_length length of sequence
start start
p_order order
max_val maximum value
gap gap

Definition at line 1080 of file StringFeatures.h.

template<class ST>
void CStringFeatures< ST >::translate_from_single_order ( ST *  obs,
int32_t  sequence_length,
int32_t  start,
int32_t  p_order,
int32_t  max_val 
) [protected]

translate from single order

Parameters:
obs observation
sequence_length length of sequence
start start
p_order order
max_val maximum value

Definition at line 997 of file StringFeatures.h.

template<class ST>
void CStringFeatures< ST >::translate_from_single_order_reversed ( ST *  obs,
int32_t  sequence_length,
int32_t  start,
int32_t  p_order,
int32_t  max_val,
int32_t  gap 
) [protected]

translate from single order reversed

Parameters:
obs observation
sequence_length length of sequence
start start
p_order order
max_val maximum value
gap gap

Definition at line 1147 of file StringFeatures.h.

template<class ST>
void CStringFeatures< ST >::translate_from_single_order_reversed ( ST *  obs,
int32_t  sequence_length,
int32_t  start,
int32_t  p_order,
int32_t  max_val 
) [protected]

translate from single order reversed

Parameters:
obs observation
sequence_length length of sequence
start start
p_order order
max_val maximum value

Definition at line 1038 of file StringFeatures.h.


Member Data Documentation

template<class ST>
CAlphabet* CStringFeatures< ST >::alphabet [protected]

alphabet

Definition at line 1204 of file StringFeatures.h.

template<class ST>
T_STRING<ST>* CStringFeatures< ST >::features [protected]

this contains the array of features.

Definition at line 1210 of file StringFeatures.h.

template<class ST>
int32_t CStringFeatures< ST >::length_of_single_string [protected]

length of prior single string

Definition at line 1216 of file StringFeatures.h.

template<class ST>
int32_t CStringFeatures< ST >::max_string_length [protected]

length of longest string

Definition at line 1219 of file StringFeatures.h.

template<class ST>
float128_t CStringFeatures< ST >::num_symbols [protected]

number of used symbols

Definition at line 1222 of file StringFeatures.h.

template<class ST>
int32_t CStringFeatures< ST >::num_vectors [protected]

number of string vectors

Definition at line 1207 of file StringFeatures.h.

template<class ST>
int32_t CStringFeatures< ST >::order [protected]

order used in higher order mapping

Definition at line 1228 of file StringFeatures.h.

template<class ST>
float128_t CStringFeatures< ST >::original_num_symbols [protected]

original number of used symbols (before higher order mapping)

Definition at line 1225 of file StringFeatures.h.

template<class ST>
int32_t CStringFeatures< ST >::selected_vector [protected]

vector to be obtained via get_string

Definition at line 1231 of file StringFeatures.h.

template<class ST>
ST* CStringFeatures< ST >::single_string [protected]

true when single string / created by sliding window

Definition at line 1213 of file StringFeatures.h.

template<class ST>
ST* CStringFeatures< ST >::symbol_mask_table [protected]

order used in higher order mapping

Definition at line 1234 of file StringFeatures.h.


The documentation for this class was generated from the following file:

SHOGUN Machine Learning Toolbox - Documentation