Hidden Markov Model. Structure and Function collection. This Class implements a Hidden Markov Model. Several functions for tasks such as training,reading/writing models, reading observations, calculation of derivatives are supplied.
|
Public Member Functions |
bool | alloc_state_dependend_arrays () |
| allocates memory that depends on N
|
void | free_state_dependend_arrays () |
| free memory that depends on N
|
bool | linear_train (bool right_align=false) |
| estimates linear model from observations.
|
bool | permutation_entropy (INT window_width, INT sequence_number) |
| compute permutation entropy
|
virtual INT | get_num_relevant_model_parameters () |
virtual DREAL | get_log_likelihood_sample () |
virtual void | get_log_likelihood (DREAL **dst, INT *num) |
virtual DREAL | get_model_parameter (INT num_param) |
virtual DREAL | get_derivative (INT num_param, INT num_example) |
virtual DREAL | get_likelihood_example (INT num_example) |
virtual void | set_features (CFeatures *f) |
virtual CFeatures * | get_features () |
virtual void | set_pseudo_count (DREAL pseudo) |
virtual DREAL | get_pseudo_count () |
|
Train definitions. Encapsulates Modelparameters that are constant/shall be learned. Consists of structures and access functions for learning only defined transitions and constants.
|
| CHMM (INT N, INT M, CModel *model, DREAL PSEUDO) |
| CHMM (CStringFeatures< WORD > *obs, INT N, INT M, DREAL PSEUDO) |
| CHMM (INT N, double *p, double *q, double *a) |
| CHMM (INT N, double *p, double *q, int num_trans, double *a_trans) |
| CHMM (FILE *model_file, DREAL PSEUDO) |
| CHMM (CHMM *h) |
| Constructor - Clone model h.
|
virtual | ~CHMM () |
| Destructor - Cleanup.
|
virtual bool | train () |
virtual INT | get_num_model_parameters () |
virtual DREAL | get_log_model_parameter (INT num_param) |
virtual DREAL | get_log_derivative (INT num_param, INT num_example) |
virtual DREAL | get_log_likelihood_example (INT num_example) |
bool | initialize (CModel *model, DREAL PSEUDO, FILE *model_file=NULL) |
|
forward/backward/viterbi algorithm
|
DREAL | forward_comp (INT time, INT state, INT dimension) |
DREAL | forward_comp_old (INT time, INT state, INT dimension) |
DREAL | backward_comp (INT time, INT state, INT dimension) |
DREAL | backward_comp_old (INT time, INT state, INT dimension) |
DREAL | best_path (INT dimension) |
WORD | get_best_path_state (INT dim, INT t) |
DREAL | model_probability_comp () |
DREAL | model_probability (INT dimension=-1) |
| inline proxy for model probability.
|
DREAL | linear_model_probability (INT dimension) |
|
bool | set_iterations (INT num) |
INT | get_iterations () |
bool | set_epsilon (DREAL eps) |
DREAL | get_epsilon () |
bool | baum_welch_viterbi_train (BaumWelchViterbiType type) |
|
void | estimate_model_baum_welch (CHMM *train) |
void | estimate_model_baum_welch_old (CHMM *train) |
void | estimate_model_baum_welch_trans (CHMM *train) |
void | estimate_model_baum_welch_defined (CHMM *train) |
void | estimate_model_viterbi (CHMM *train) |
void | estimate_model_viterbi_defined (CHMM *train) |
|
void | output_model (bool verbose=false) |
void | output_model_defined (bool verbose=false) |
| performs output_model only for the defined transitions etc
|
|
void | normalize (bool keep_dead_states=false) |
| normalize the model to satisfy stochasticity
|
void | add_states (INT num_states, DREAL default_val=0) |
bool | append_model (CHMM *append_model, DREAL *cur_out, DREAL *app_out) |
bool | append_model (CHMM *append_model) |
void | chop (DREAL value) |
| set any model parameter with probability smaller than value to ZERO
|
void | convert_to_log () |
| convert model to log probabilities
|
void | init_model_random () |
| init model with random values
|
void | init_model_defined () |
void | clear_model () |
| initializes model with log(PSEUDO)
|
void | clear_model_defined () |
| initializes only parameters in learn_x with log(PSEUDO)
|
void | copy_model (CHMM *l) |
| copies the the modelparameters from l
|
void | invalidate_model () |
bool | get_status () const |
DREAL | get_pseudo () const |
| returns current pseudo value
|
void | set_pseudo (DREAL pseudo) |
| sets current pseudo value
|
|
void | set_observations (CStringFeatures< WORD > *obs, CHMM *lambda=NULL) |
void | set_observation_nocache (CStringFeatures< WORD > *obs) |
CStringFeatures< WORD > * | get_observations () |
| return observation pointer
|
|
for observations/model/traindefinitions
|
bool | load_definitions (FILE *file, bool verbose, bool initialize=true) |
bool | load_model (FILE *file) |
bool | save_model (FILE *file) |
bool | save_model_derivatives (FILE *file) |
bool | save_model_derivatives_bin (FILE *file) |
bool | save_model_bin (FILE *file) |
bool | check_model_derivatives () |
| numerically check whether derivates were calculated right
|
bool | check_model_derivatives_combined () |
T_STATES * | get_path (INT dim, DREAL &prob) |
bool | save_path (FILE *file) |
bool | save_path_derivatives (FILE *file) |
bool | save_path_derivatives_bin (FILE *file) |
bool | save_likelihood_bin (FILE *file) |
bool | save_likelihood (FILE *file) |
|
for all the arrays a,b,p,q,A,B,psi and scalar model parameters like N,M
|
T_STATES | get_N () const |
| access function for number of states N
|
INT | get_M () const |
| access function for number of observations M
|
void | set_q (T_STATES offset, DREAL value) |
void | set_p (T_STATES offset, DREAL value) |
void | set_A (T_STATES line_, T_STATES column, DREAL value) |
void | set_a (T_STATES line_, T_STATES column, DREAL value) |
void | set_B (T_STATES line_, WORD column, DREAL value) |
void | set_b (T_STATES line_, WORD column, DREAL value) |
void | set_psi (INT time, T_STATES state, T_STATES value, INT dimension) |
DREAL | get_q (T_STATES offset) const |
DREAL | get_p (T_STATES offset) const |
DREAL | get_A (T_STATES line_, T_STATES column) const |
DREAL | get_a (T_STATES line_, T_STATES column) const |
DREAL | get_B (T_STATES line_, WORD column) const |
DREAL | get_b (T_STATES line_, WORD column) const |
T_STATES | get_psi (INT time, T_STATES state, INT dimension) const |
|
management and access functions for observation matrix
|
DREAL | state_probability (INT time, INT state, INT dimension) |
| calculates probability of being in state i at time t for dimension
|
DREAL | transition_probability (INT time, INT state_i, INT state_j, INT dimension) |
| calculates probability of being in state i at time t and state j at time t+1 for dimension
|
|
computes log dp(lambda)/d lambda_i - Parameters:
-
| dimension | dimension for that derivatives are calculated |
| i,j | parameter specific |
|
DREAL | linear_model_derivative (T_STATES i, WORD j, INT dimension) |
DREAL | model_derivative_p (T_STATES i, INT dimension) |
DREAL | model_derivative_q (T_STATES i, INT dimension) |
DREAL | model_derivative_a (T_STATES i, T_STATES j, INT dimension) |
| computes log dp(lambda)/d a_ij.
|
DREAL | model_derivative_b (T_STATES i, WORD j, INT dimension) |
| computes log dp(lambda)/d b_ij.
|
|
computes d log p(lambda,best_path)/d lambda_i - Parameters:
-
| dimension | dimension for that derivatives are calculated |
| i,j | parameter specific |
|
DREAL | path_derivative_p (T_STATES i, INT dimension) |
| computes d log p(lambda,best_path)/d p_i
|
DREAL | path_derivative_q (T_STATES i, INT dimension) |
| computes d log p(lambda,best_path)/d q_i
|
DREAL | path_derivative_a (T_STATES i, T_STATES j, INT dimension) |
| computes d log p(lambda,best_path)/d a_ij
|
DREAL | path_derivative_b (T_STATES i, WORD j, INT dimension) |
| computes d log p(lambda,best_path)/d b_ij
|
Static Public Attributes |
static CParallel | parallel |
static CIO | io |
static CVersion | version |
Protected Member Functions |
void | prepare_path_derivative (INT dim) |
| initialization function that is called before path_derivatives are calculated
|
DREAL | forward (INT time, INT state, INT dimension) |
| inline proxies for forward pass
|
DREAL | backward (INT time, INT state, INT dimension) |
| inline proxies for backward pass
|
|
for reading model/definition/observation files
|
bool | get_numbuffer (FILE *file, CHAR *buffer, INT length) |
| put a sequence of numbers into the buffer
|
void | open_bracket (FILE *file) |
| expect open bracket.
|
void | close_bracket (FILE *file) |
| expect closing bracket
|
bool | comma_or_space (FILE *file) |
| expect comma or space.
|
void | error (INT p_line, const CHAR *str) |
| parse error messages
|
Protected Attributes |
DREAL * | arrayN1 |
DREAL * | arrayN2 |
T_ALPHA_BETA | alpha_cache |
| cache for forward variables can be terrible HUGE O(T*N)
|
T_ALPHA_BETA | beta_cache |
| cache for backward variables can be terrible HUGE O(T*N)
|
T_STATES * | states_per_observation_psi |
| backtracking table for viterbi can be terrible HUGE O(T*N)
|
T_STATES * | path |
| best path (=state sequence) through model
|
bool | path_prob_updated |
| true if path probability is up to date
|
INT | path_prob_dimension |
| dimension for which path_prob was calculated
|
CFeatures * | features |
DREAL | pseudo_count |
|
these are p,q,a,b,N,M etc
|
INT | M |
| number of observation symbols eg. ACGT -> 0123
|
INT | N |
| number of states
|
DREAL | PSEUDO |
| define pseudocounts against overfitting
|
INT | line |
CStringFeatures< WORD > * | p_observations |
| observation matrix
|
CModel * | model |
DREAL * | transition_matrix_A |
| matrix of absolute counts of transitions
|
DREAL * | observation_matrix_B |
| matrix of absolute counts of observations within each state
|
DREAL * | transition_matrix_a |
| transition matrix
|
DREAL * | initial_state_distribution_p |
| initial distribution of states
|
DREAL * | end_state_distribution_q |
| distribution of end-states
|
DREAL * | observation_matrix_b |
| distribution of observations within each state
|
INT | iterations |
| convergence criterion iterations
|
INT | iteration_count |
DREAL | epsilon |
| convergence criterion epsilon
|
INT | conv_it |
DREAL | all_pat_prob |
| probability of best path
|
DREAL | pat_prob |
| probability of best path
|
DREAL | mod_prob |
| probability of model
|
bool | mod_prob_updated |
| true if model probability is up to date
|
bool | all_path_prob_updated |
| true if path probability is up to date
|
INT | path_deriv_dimension |
| dimension for which path_deriv was calculated
|
bool | path_deriv_updated |
| true if path derivative is up to date
|
bool | loglikelihood |
bool | status |
bool | reused_caches |
Static Protected Attributes |
static const INT | GOTN = (1<<1) |
static const INT | GOTM = (1<<2) |
static const INT | GOTO = (1<<3) |
static const INT | GOTa = (1<<4) |
static const INT | GOTb = (1<<5) |
static const INT | GOTp = (1<<6) |
static const INT | GOTq = (1<<7) |
static const INT | GOTlearn_a = (1<<1) |
static const INT | GOTlearn_b = (1<<2) |
static const INT | GOTlearn_p = (1<<3) |
static const INT | GOTlearn_q = (1<<4) |
static const INT | GOTconst_a = (1<<5) |
static const INT | GOTconst_b = (1<<6) |
static const INT | GOTconst_p = (1<<7) |
static const INT | GOTconst_q = (1<<8) |
bool CHMM::load_definitions |
( |
FILE * |
file, |
|
|
bool |
verbose, |
|
|
bool |
initialize = true | |
|
) |
| | |
read definitions file (learn_x,const_x) used for training. -format specs: definition_file (train.def) % HMM-TRAIN - specification % learn_a - elements in state_transition_matrix to be learned % learn_b - elements in oberservation_per_state_matrix to be learned % note: each line stands for % state, observation(0), observation(1)...observation(NOW) % learn_p - elements in initial distribution to be learned % learn_q - elements in the end-state distribution to be learned % % const_x - specifies initial values of elements % rest is assumed to be 0.0 % % NOTE: IMPLICIT DEFINES: % define A 0 % define C 1 % define G 2 % define T 3
learn_a=[ [INT,INT]; [INT,INT]; [INT,INT]; ........ [INT,INT]; [-1,-1]; ];
learn_b=[ [INT,INT,INT,...,INT]; [INT,INT,INT,...,INT]; [INT,INT,INT,...,INT]; ........ [INT,INT,INT,...,INT]; [-1,-1]; ];
learn_p= [ INT, ... , INT, -1 ];
learn_q= [ INT, ... , INT, -1 ];
const_a=[ [INT,INT,DREAL]; [INT,INT,DREAL]; [INT,INT,DREAL]; ........ [INT,INT,DREAL]; [-1,-1,-1]; ];
const_b=[ [INT,INT,INT,...,INT,DREAL]; [INT,INT,INT,...,INT,DREAL]; [INT,INT,INT,...,INT,<DOUBLE]; ........ [INT,INT,INT,...,INT,DREAL]; [-1,-1,-1]; ];
const_p[]=[ [INT, DREAL], ... , [INT,DREAL], [-1,-1] ]; const_q[]=[ [INT, DREAL], ... , [INT,DREAL], [-1,-1] ];
- Parameters:
-
| file | filehandle to definitions file |
| verbose | true for verbose messages |
| initialize | true to initialize to underlying HMM |
Definition at line 3460 of file HMM.cpp.