LinearHMM.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2008 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Copyright (C) 1999-2008 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #include "distributions/hmm/LinearHMM.h"
00013 #include "lib/common.h"
00014 #include "features/StringFeatures.h"
00015 #include "lib/io.h"
00016 
00017 CLinearHMM::CLinearHMM(CStringFeatures<WORD>* f)
00018 : CDistribution(), transition_probs(NULL), log_transition_probs(NULL)
00019 {
00020     features=f;
00021     sequence_length = f->get_vector_length(0);
00022     num_symbols     = (INT) f->get_num_symbols();
00023     num_params      = sequence_length*num_symbols;
00024 }
00025 
00026 CLinearHMM::CLinearHMM(INT p_num_features, INT p_num_symbols)
00027 : CDistribution(), transition_probs(NULL), log_transition_probs(NULL)
00028 {
00029     sequence_length = p_num_features;
00030     num_symbols     = p_num_symbols;
00031     num_params      = sequence_length*num_symbols;
00032 }
00033 
00034 CLinearHMM::~CLinearHMM()
00035 {
00036     delete[] transition_probs;
00037     delete[] log_transition_probs;
00038 }
00039 
00040 bool CLinearHMM::train()
00041 {
00042     delete[] transition_probs;
00043     delete[] log_transition_probs;
00044     INT* int_transition_probs=new INT[num_params];
00045 
00046     INT vec;
00047     INT i;
00048 
00049     for (i=0; i< num_params; i++)
00050         int_transition_probs[i]=0;
00051 
00052     for (vec=0; vec<features->get_num_vectors(); vec++)
00053     {
00054         INT len;
00055 
00056         WORD* vector=((CStringFeatures<WORD>*) features)->get_feature_vector(vec, len);
00057 
00058         //just count the symbols per position -> transition_probsogram
00059         for (INT feat=0; feat<len ; feat++)
00060             int_transition_probs[feat*num_symbols+vector[feat]]++;
00061     }
00062 
00063     //trade memory for speed
00064     transition_probs=new DREAL[num_params];
00065     log_transition_probs=new DREAL[num_params];
00066 
00067     for (i=0;i<sequence_length;i++)
00068     {
00069         for (INT j=0; j<num_symbols; j++)
00070         {
00071             DREAL sum=0;
00072             INT offs=i*num_symbols+((CStringFeatures<WORD> *) features)->get_masked_symbols((WORD)j,(BYTE) 254);
00073             INT original_num_symbols=(INT) ((CStringFeatures<WORD> *) features)->get_original_num_symbols();
00074 
00075             for (INT k=0; k<original_num_symbols; k++)
00076                 sum+=int_transition_probs[offs+k];
00077 
00078             transition_probs[i*num_symbols+j]=(int_transition_probs[i*num_symbols+j]+pseudo_count)/(sum+((CStringFeatures<WORD> *) features)->get_original_num_symbols()*pseudo_count);
00079             log_transition_probs[i*num_symbols+j]=log(transition_probs[i*num_symbols+j]);
00080         }
00081     }
00082 
00083     delete[] int_transition_probs;
00084     return true;
00085 }
00086 
00087 bool CLinearHMM::train(const INT* indizes, INT num_indizes, DREAL pseudo)
00088 {
00089     delete[] transition_probs;
00090     delete[] log_transition_probs;
00091     INT* int_transition_probs=new INT[num_params];
00092     INT vec;
00093     INT i;
00094 
00095     for (i=0; i< num_params; i++)
00096         int_transition_probs[i]=0;
00097 
00098     for (vec=0; vec<num_indizes; vec++)
00099     {
00100         INT len;
00101 
00102         ASSERT(indizes[vec]>=0 && indizes[vec]<((CStringFeatures<WORD>*) features)->get_num_vectors());
00103         WORD* vector=((CStringFeatures<WORD>*) features)->get_feature_vector(indizes[vec], len);
00104 
00105         //just count the symbols per position -> transition_probsogram
00106         //
00107         for (INT feat=0; feat<len ; feat++)
00108             int_transition_probs[feat*num_symbols+vector[feat]]++;
00109     }
00110 
00111     //trade memory for speed
00112     transition_probs=new DREAL[num_params];
00113     log_transition_probs=new DREAL[num_params];
00114 
00115     for (i=0;i<sequence_length;i++)
00116     {
00117         for (INT j=0; j<num_symbols; j++)
00118         {
00119             DREAL sum=0;
00120             INT original_num_symbols= (INT) ((CStringFeatures<WORD> *) features)->get_original_num_symbols();
00121             for (INT k=0; k<original_num_symbols; k++)
00122             {
00123                 sum+=int_transition_probs[i*num_symbols+
00124                     ((CStringFeatures<WORD>*) features)->
00125                         get_masked_symbols((WORD)j,(BYTE) 254)+k];
00126             }
00127 
00128             transition_probs[i*num_symbols+j]=(int_transition_probs[i*num_symbols+j]+pseudo)/(sum+((CStringFeatures<WORD>*) features)->get_original_num_symbols()*pseudo);
00129             log_transition_probs[i*num_symbols+j]=log(transition_probs[i*num_symbols+j]);
00130         }
00131     }
00132 
00133     delete[] int_transition_probs;
00134     return true;
00135 }
00136 
00137 DREAL CLinearHMM::get_log_likelihood_example(WORD* vector, INT len)
00138 {
00139     DREAL result=log_transition_probs[vector[0]];
00140 
00141     for (INT i=1; i<len; i++)
00142         result+=log_transition_probs[i*num_symbols+vector[i]];
00143     
00144     return result;
00145 }
00146 
00147 DREAL CLinearHMM::get_log_likelihood_example(INT num_example)
00148 {
00149     INT len;
00150     WORD* vector=((CStringFeatures<WORD>*) features)->get_feature_vector(num_example, len);
00151     DREAL result=log_transition_probs[vector[0]];
00152 
00153     for (INT i=1; i<len; i++)
00154         result+=log_transition_probs[i*num_symbols+vector[i]];
00155 
00156     return result;
00157 }
00158 
00159 DREAL CLinearHMM::get_likelihood_example(WORD* vector, INT len)
00160 {
00161     DREAL result=transition_probs[vector[0]];
00162 
00163     for (INT i=1; i<len; i++)
00164         result*=transition_probs[i*num_symbols+vector[i]];
00165     
00166     return result;
00167 }
00168 
00169 DREAL CLinearHMM::get_log_derivative(INT num_param, INT num_example)
00170 {
00171     INT len;
00172     WORD* vector=((CStringFeatures<WORD>*) features)->get_feature_vector(num_example, len);
00173     DREAL result=0;
00174     INT position=num_param/num_symbols;
00175     ASSERT(position>=0 && position<len);
00176     WORD sym=(WORD) (num_param-position*num_symbols);
00177 
00178     if (vector[position]==sym && transition_probs[num_param]!=0)
00179         result=1.0/transition_probs[num_param];
00180 
00181     return result;
00182 }
00183 
00184 void CLinearHMM::get_transition_probs(DREAL** dst, INT* num)
00185 {
00186     *num=num_params;
00187     size_t sz=sizeof(*transition_probs)*(*num);
00188     *dst=(DREAL*) malloc(sz);
00189     ASSERT(dst);
00190 
00191     memcpy(*dst, transition_probs, sz);
00192 }
00193 
00194 bool CLinearHMM::set_transition_probs(const DREAL* src, INT num)
00195 {
00196     if (num!=-1)
00197         ASSERT(num==num_params);
00198 
00199     if (!log_transition_probs)
00200         log_transition_probs=new DREAL[num_params];
00201 
00202     if (!transition_probs)
00203         transition_probs=new DREAL[num_params];
00204 
00205     for (INT i=0; i<num_params; i++)
00206     {
00207         transition_probs[i]=src[i];
00208         log_transition_probs[i]=log(transition_probs[i]);
00209     }
00210 
00211     return true;
00212 }
00213 
00214 void CLinearHMM::get_log_transition_probs(DREAL** dst, INT* num)
00215 {
00216     *num=num_params;
00217     size_t sz=sizeof(*log_transition_probs)*(*num);
00218     *dst=(DREAL*) malloc(sz);
00219     ASSERT(dst);
00220 
00221     memcpy(*dst, log_transition_probs, sz);
00222 }
00223 
00224 bool CLinearHMM::set_log_transition_probs(const DREAL* src, INT num)
00225 {
00226     if (num!=-1)
00227         ASSERT(num==num_params);
00228 
00229     if (!log_transition_probs)
00230         log_transition_probs=new DREAL[num_params];
00231 
00232     if (!transition_probs)
00233         transition_probs=new DREAL[num_params];
00234 
00235     for (INT i=0; i< num_params; i++)
00236     {
00237         log_transition_probs[i]=src[i];
00238         transition_probs[i]=exp(log_transition_probs[i]);
00239     }
00240 
00241     return true;
00242 }
00243 
00244 
00245 
00246 

SHOGUN Machine Learning Toolbox - Documentation