Public Member Functions | |
CStringFeatures (EAlphabet alpha) | |
CStringFeatures (CAlphabet *alpha) | |
CStringFeatures (const CStringFeatures &orig) | |
CStringFeatures (char *fname, EAlphabet alpha=DNA) | |
virtual | ~CStringFeatures () |
void | cleanup () |
virtual EFeatureClass | get_feature_class () |
virtual EFeatureType | get_feature_type () |
CAlphabet * | get_alphabet () |
virtual CFeatures * | duplicate () const |
void | select_feature_vector (int32_t num) |
void | get_string (ST **dst, int32_t *len) |
virtual ST * | get_feature_vector (int32_t num, int32_t &len) |
virtual void | set_feature_vector (int32_t num, ST *string, int32_t len) |
virtual ST | get_feature (int32_t vec_num, int32_t feat_num) |
virtual int32_t | get_vector_length (int32_t vec_num) |
virtual int32_t | get_max_vector_length () |
virtual int32_t | get_num_vectors () |
float128_t | get_num_symbols () |
float128_t | get_max_num_symbols () |
float128_t | get_original_num_symbols () |
int32_t | get_order () |
ST | get_masked_symbols (ST symbol, uint8_t mask) |
ST | shift_offset (ST offset, int32_t amount) |
ST | shift_symbol (ST symbol, int32_t amount) |
virtual bool | load (char *fname) |
bool | load_dna_file (char *fname, bool remap_to_bin=true) |
bool | load_from_directory (char *dirname) |
bool | set_features (T_STRING< ST > *p_features, int32_t p_num_vectors, int32_t p_max_string_length) |
virtual T_STRING< ST > * | get_features (int32_t &num_str, int32_t &max_str_len) |
virtual bool | save (char *dest) |
virtual int32_t | get_size () |
virtual bool | apply_preproc (bool force_preprocessing=false) |
int32_t | obtain_by_sliding_window (int32_t window_size, int32_t step_size, int32_t skip=0) |
int32_t | obtain_by_position_list (int32_t window_size, CDynamicArray< int32_t > *positions, int32_t skip=0) |
bool | obtain_from_char (CStringFeatures< char > *sf, int32_t start, int32_t p_order, int32_t gap, bool rev) |
template<class CT > | |
bool | obtain_from_char_features (CStringFeatures< CT > *sf, int32_t start, int32_t p_order, int32_t gap, bool rev) |
bool | have_same_length (int32_t len) |
template<> | |
EFeatureType | get_feature_type () |
template<> | |
EFeatureType | get_feature_type () |
template<> | |
EFeatureType | get_feature_type () |
template<> | |
EFeatureType | get_feature_type () |
template<> | |
EFeatureType | get_feature_type () |
template<> | |
EFeatureType | get_feature_type () |
template<> | |
EFeatureType | get_feature_type () |
Protected Member Functions | |
void | translate_from_single_order (ST *obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val) |
void | translate_from_single_order_reversed (ST *obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val) |
void | translate_from_single_order (ST *obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap) |
void | translate_from_single_order_reversed (ST *obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap) |
Protected Attributes | |
CAlphabet * | alphabet |
alphabet | |
int32_t | num_vectors |
number of string vectors | |
T_STRING< ST > * | features |
this contains the array of features. | |
ST * | single_string |
true when single string / created by sliding window | |
int32_t | length_of_single_string |
length of prior single string | |
int32_t | max_string_length |
length of longest string | |
float128_t | num_symbols |
number of used symbols | |
float128_t | original_num_symbols |
original number of used symbols (before higher order mapping) | |
int32_t | order |
order used in higher order mapping | |
int32_t | selected_vector |
vector to be obtained via get_string | |
ST * | symbol_mask_table |
order used in higher order mapping |
Most string kernels require StringFeatures but a number of them actually requires strings to have same length.
Note: StringFeatures do not support PreProcs
Definition at line 68 of file StringFeatures.h.
CStringFeatures< ST >::CStringFeatures | ( | EAlphabet | alpha | ) |
constructor
alpha | alphabet (type) to use for string features |
Definition at line 75 of file StringFeatures.h.
CStringFeatures< ST >::CStringFeatures | ( | CAlphabet * | alpha | ) |
constructor
alpha | alphabet to use for string features |
Definition at line 91 of file StringFeatures.h.
CStringFeatures< ST >::CStringFeatures | ( | const CStringFeatures< ST > & | orig | ) |
copy constructor
Definition at line 104 of file StringFeatures.h.
CStringFeatures< ST >::CStringFeatures | ( | char * | fname, | |
EAlphabet | alpha = DNA | |||
) |
constructor
fname | filename to load features from | |
alpha | alphabet (type) to use for string features |
Definition at line 144 of file StringFeatures.h.
virtual CStringFeatures< ST >::~CStringFeatures | ( | ) | [virtual] |
Definition at line 157 of file StringFeatures.h.
virtual bool CStringFeatures< ST >::apply_preproc | ( | bool | force_preprocessing = false |
) | [virtual] |
apply preprocessor
force_preprocessing | if preprocssing shall be forced |
Definition at line 729 of file StringFeatures.h.
void CStringFeatures< ST >::cleanup | ( | ) |
cleanup string features
Definition at line 169 of file StringFeatures.h.
virtual CFeatures* CStringFeatures< ST >::duplicate | ( | ) | const [virtual] |
duplicate feature object
Implements CFeatures.
Definition at line 217 of file StringFeatures.h.
CAlphabet* CStringFeatures< ST >::get_alphabet | ( | ) |
get alphabet used in string features
Definition at line 207 of file StringFeatures.h.
virtual ST CStringFeatures< ST >::get_feature | ( | int32_t | vec_num, | |
int32_t | feat_num | |||
) | [virtual] |
get feature
vec_num | which vector | |
feat_num | which feature |
Definition at line 285 of file StringFeatures.h.
virtual EFeatureClass CStringFeatures< ST >::get_feature_class | ( | ) | [virtual] |
get feature class
Implements CFeatures.
Definition at line 195 of file StringFeatures.h.
EFeatureType CStringFeatures< float64_t >::get_feature_type | ( | ) | [virtual] |
get feature type the DREAL feature can deal with
Implements CFeatures.
Definition at line 1313 of file StringFeatures.h.
EFeatureType CStringFeatures< uint64_t >::get_feature_type | ( | ) | [virtual] |
get feature type the ULONG feature can deal with
Implements CFeatures.
Definition at line 1304 of file StringFeatures.h.
EFeatureType CStringFeatures< int64_t >::get_feature_type | ( | ) | [virtual] |
get feature type the LONG feature can deal with
Implements CFeatures.
Definition at line 1295 of file StringFeatures.h.
EFeatureType CStringFeatures< uint32_t >::get_feature_type | ( | ) | [virtual] |
get feature type the INT feature can deal with
Implements CFeatures.
Definition at line 1286 of file StringFeatures.h.
EFeatureType CStringFeatures< int32_t >::get_feature_type | ( | ) | [virtual] |
get feature type the INT feature can deal with
Implements CFeatures.
Definition at line 1277 of file StringFeatures.h.
EFeatureType CStringFeatures< int16_t >::get_feature_type | ( | ) | [virtual] |
get feature type the SHORT feature can deal with
Implements CFeatures.
Definition at line 1259 of file StringFeatures.h.
EFeatureType CStringFeatures< char >::get_feature_type | ( | ) | [virtual] |
get feature type the char feature can deal with
Implements CFeatures.
Definition at line 1241 of file StringFeatures.h.
virtual EFeatureType CStringFeatures< ST >::get_feature_type | ( | ) | [virtual] |
virtual ST* CStringFeatures< ST >::get_feature_vector | ( | int32_t | num, | |
int32_t & | len | |||
) | [virtual] |
get feature vector for sample num
num | index of feature vector | |
len | length is returned by reference |
Definition at line 255 of file StringFeatures.h.
virtual T_STRING<ST>* CStringFeatures< ST >::get_features | ( | int32_t & | num_str, | |
int32_t & | max_str_len | |||
) | [virtual] |
get_features
num_str | number of strings (returned) | |
max_str_len | maximal string length (returned) |
Definition at line 701 of file StringFeatures.h.
ST CStringFeatures< ST >::get_masked_symbols | ( | ST | symbol, | |
uint8_t | mask | |||
) |
a higher order mapped symbol will be shaped such that the symbols specified by bits in the mask will be returned.
symbol | symbol to mask | |
mask | mask to apply |
Definition at line 356 of file StringFeatures.h.
float128_t CStringFeatures< ST >::get_max_num_symbols | ( | ) |
get maximum number of symbols
Note: float128_t sounds weird, but LONG is not long enough
Definition at line 333 of file StringFeatures.h.
virtual int32_t CStringFeatures< ST >::get_max_vector_length | ( | ) | [virtual] |
get maximum vector length
Definition at line 308 of file StringFeatures.h.
float128_t CStringFeatures< ST >::get_num_symbols | ( | ) |
get number of symbols
Note: float128_t sounds weird, but LONG is not long enough
Definition at line 325 of file StringFeatures.h.
virtual int32_t CStringFeatures< ST >::get_num_vectors | ( | ) | [virtual] |
get number of vectors
Implements CFeatures.
Definition at line 317 of file StringFeatures.h.
int32_t CStringFeatures< ST >::get_order | ( | ) |
float128_t CStringFeatures< ST >::get_original_num_symbols | ( | ) |
number of symbols before higher order mapping
Definition at line 341 of file StringFeatures.h.
virtual int32_t CStringFeatures< ST >::get_size | ( | ) | [virtual] |
get memory footprint of one feature
Implements CFeatures.
Definition at line 722 of file StringFeatures.h.
void CStringFeatures< ST >::get_string | ( | ST ** | dst, | |
int32_t * | len | |||
) |
get feature vector for selected example
dst | destination where vector will be stored | |
len | number of features in vector |
Definition at line 239 of file StringFeatures.h.
virtual int32_t CStringFeatures< ST >::get_vector_length | ( | int32_t | vec_num | ) | [virtual] |
get vector length
vec_num | which vector |
Definition at line 298 of file StringFeatures.h.
bool CStringFeatures< ST >::have_same_length | ( | int32_t | len | ) |
check if length of each vector in this feature object equals the given length.
len | vector length to check against |
Definition at line 974 of file StringFeatures.h.
virtual bool CStringFeatures< ST >::load | ( | char * | fname | ) | [virtual] |
load features from file
fname | filename to load from |
Reimplemented from CFeatures.
Definition at line 391 of file StringFeatures.h.
bool CStringFeatures< ST >::load_dna_file | ( | char * | fname, | |
bool | remap_to_bin = true | |||
) |
load DNA features from file
fname | filename to load from | |
remap_to_bin | if remap_to_bin |
Definition at line 451 of file StringFeatures.h.
bool CStringFeatures< ST >::load_from_directory | ( | char * | dirname | ) |
load features from directory
dirname | directory name to load from |
Definition at line 584 of file StringFeatures.h.
int32_t CStringFeatures< ST >::obtain_by_position_list | ( | int32_t | window_size, | |
CDynamicArray< int32_t > * | positions, | |||
int32_t | skip = 0 | |||
) |
extracts windows of size window_size from first string using the positions in list
window_size | window size | |
positions | positions | |
skip | skip |
Definition at line 800 of file StringFeatures.h.
int32_t CStringFeatures< ST >::obtain_by_sliding_window | ( | int32_t | window_size, | |
int32_t | step_size, | |||
int32_t | skip = 0 | |||
) |
slides a window of size window_size over the current single string step_size is the amount by which the window is shifted. creates (string_len-window_size)/step_size many feature obj if skip is nonzero, skip the first 'skip' characters of each string
window_size | window size | |
step_size | step size | |
skip | skip |
Definition at line 757 of file StringFeatures.h.
bool CStringFeatures< ST >::obtain_from_char | ( | CStringFeatures< char > * | sf, | |
int32_t | start, | |||
int32_t | p_order, | |||
int32_t | gap, | |||
bool | rev | |||
) |
obtain string features from char features
wrapper for template method
sf | string features | |
start | start | |
p_order | order | |
gap | gap | |
rev | reverse |
Definition at line 866 of file StringFeatures.h.
bool CStringFeatures< ST >::obtain_from_char_features | ( | CStringFeatures< CT > * | sf, | |
int32_t | start, | |||
int32_t | p_order, | |||
int32_t | gap, | |||
bool | rev | |||
) |
template obtain from char features
sf | string features | |
start | start | |
p_order | order | |
gap | gap | |
rev | reverse |
Definition at line 881 of file StringFeatures.h.
virtual bool CStringFeatures< ST >::save | ( | char * | dest | ) | [virtual] |
save features to file
dest | filename to save to |
Reimplemented from CFeatures.
Definition at line 713 of file StringFeatures.h.
void CStringFeatures< ST >::select_feature_vector | ( | int32_t | num | ) |
select feature vector
num | which feature vector to select |
Definition at line 226 of file StringFeatures.h.
virtual void CStringFeatures< ST >::set_feature_vector | ( | int32_t | num, | |
ST * | string, | |||
int32_t | len | |||
) | [virtual] |
set feature vector for sample num
num | index of feature vector | |
string | string with the feature vector's content | |
len | length of the string |
Definition at line 270 of file StringFeatures.h.
bool CStringFeatures< ST >::set_features | ( | T_STRING< ST > * | p_features, | |
int32_t | p_num_vectors, | |||
int32_t | p_max_string_length | |||
) |
set features
p_features | new features | |
p_num_vectors | number of vectors | |
p_max_string_length | maximum string length |
Definition at line 657 of file StringFeatures.h.
ST CStringFeatures< ST >::shift_offset | ( | ST | offset, | |
int32_t | amount | |||
) |
shift offset to the left by amount
offset | offset to shift | |
amount | amount to shift the offset |
Definition at line 368 of file StringFeatures.h.
ST CStringFeatures< ST >::shift_symbol | ( | ST | symbol, | |
int32_t | amount | |||
) |
shift symbol to the right by amount (taking care of custom symbol sizes)
symbol | symbol to shift | |
amount | amount to shift the symbol |
Definition at line 380 of file StringFeatures.h.
void CStringFeatures< ST >::translate_from_single_order | ( | ST * | obs, | |
int32_t | sequence_length, | |||
int32_t | start, | |||
int32_t | p_order, | |||
int32_t | max_val, | |||
int32_t | gap | |||
) | [protected] |
translate from single order
obs | observation | |
sequence_length | length of sequence | |
start | start | |
p_order | order | |
max_val | maximum value | |
gap | gap |
Definition at line 1080 of file StringFeatures.h.
void CStringFeatures< ST >::translate_from_single_order | ( | ST * | obs, | |
int32_t | sequence_length, | |||
int32_t | start, | |||
int32_t | p_order, | |||
int32_t | max_val | |||
) | [protected] |
translate from single order
obs | observation | |
sequence_length | length of sequence | |
start | start | |
p_order | order | |
max_val | maximum value |
Definition at line 997 of file StringFeatures.h.
void CStringFeatures< ST >::translate_from_single_order_reversed | ( | ST * | obs, | |
int32_t | sequence_length, | |||
int32_t | start, | |||
int32_t | p_order, | |||
int32_t | max_val, | |||
int32_t | gap | |||
) | [protected] |
translate from single order reversed
obs | observation | |
sequence_length | length of sequence | |
start | start | |
p_order | order | |
max_val | maximum value | |
gap | gap |
Definition at line 1147 of file StringFeatures.h.
void CStringFeatures< ST >::translate_from_single_order_reversed | ( | ST * | obs, | |
int32_t | sequence_length, | |||
int32_t | start, | |||
int32_t | p_order, | |||
int32_t | max_val | |||
) | [protected] |
translate from single order reversed
obs | observation | |
sequence_length | length of sequence | |
start | start | |
p_order | order | |
max_val | maximum value |
Definition at line 1038 of file StringFeatures.h.
CAlphabet* CStringFeatures< ST >::alphabet [protected] |
T_STRING<ST>* CStringFeatures< ST >::features [protected] |
int32_t CStringFeatures< ST >::length_of_single_string [protected] |
int32_t CStringFeatures< ST >::max_string_length [protected] |
float128_t CStringFeatures< ST >::num_symbols [protected] |
int32_t CStringFeatures< ST >::num_vectors [protected] |
int32_t CStringFeatures< ST >::order [protected] |
float128_t CStringFeatures< ST >::original_num_symbols [protected] |
original number of used symbols (before higher order mapping)
Definition at line 1225 of file StringFeatures.h.
int32_t CStringFeatures< ST >::selected_vector [protected] |
ST* CStringFeatures< ST >::single_string [protected] |
true when single string / created by sliding window
Definition at line 1213 of file StringFeatures.h.
ST* CStringFeatures< ST >::symbol_mask_table [protected] |