Histogram.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2008 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Copyright (C) 1999-2008 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #include "distributions/histogram/Histogram.h"
00013 #include "lib/common.h"
00014 #include "features/StringFeatures.h"
00015 #include "lib/io.h"
00016 #include "lib/Mathematics.h"
00017 
00018 
00019 CHistogram::CHistogram()
00020 : CDistribution()
00021 {
00022     hist=new DREAL[1<<16];
00023 }
00024 
00025 CHistogram::CHistogram(CStringFeatures<WORD> *f)
00026 : CDistribution()
00027 {
00028     hist=new DREAL[1<<16];
00029     features=f;
00030 }
00031 
00032 CHistogram::~CHistogram()
00033 {
00034     delete[] hist;
00035 }
00036 
00037 bool CHistogram::train()
00038 {
00039     INT vec;
00040     INT feat;
00041     INT i;
00042 
00043     ASSERT(features);
00044     ASSERT(features->get_feature_class()==C_STRING);
00045     ASSERT(features->get_feature_type()==F_WORD);
00046 
00047     for (i=0; i< (INT) (1<<16); i++)
00048         hist[i]=0;
00049 
00050     for (vec=0; vec<features->get_num_vectors(); vec++)
00051     {
00052         INT len;
00053 
00054         WORD* vector=((CStringFeatures<WORD>*) features)->get_feature_vector(vec, len);
00055 
00056         for (feat=0; feat<len ; feat++)
00057             hist[vector[feat]]++;
00058     }
00059 
00060     for (i=0; i< (INT) (1<<16); i++)
00061         hist[i]=log(hist[i]);
00062 
00063     return true;
00064 }
00065 
00066 DREAL CHistogram::get_log_likelihood_example(INT num_example)
00067 {
00068     ASSERT(features);
00069     ASSERT(features->get_feature_class()==C_STRING);
00070     ASSERT(features->get_feature_type()==F_WORD);
00071 
00072     INT len;
00073     DREAL loglik=0;
00074 
00075     WORD* vector=((CStringFeatures<WORD>*) features)->get_feature_vector(num_example, len);
00076 
00077     for (INT i=0; i<len; i++)
00078         loglik+=hist[vector[i]];
00079 
00080     return loglik;
00081 }
00082 
00083 DREAL CHistogram::get_log_derivative(INT num_param, INT num_example)
00084 {
00085     if (hist[num_param] < CMath::ALMOST_NEG_INFTY)
00086         return -CMath::INFTY;
00087     else
00088     {
00089         ASSERT(features);
00090         ASSERT(features->get_feature_class()==C_STRING);
00091         ASSERT(features->get_feature_type()==F_WORD);
00092 
00093         INT len;
00094         DREAL deriv=0;
00095 
00096         WORD* vector=((CStringFeatures<WORD>*) features)->get_feature_vector(num_example, len);
00097 
00098         INT num_occurences=0;
00099 
00100         for (INT i=0; i<len; i++)
00101         {
00102             deriv+=hist[vector[i]];
00103 
00104             if (vector[i]==num_param)
00105                 num_occurences++;
00106         }
00107 
00108         if (num_occurences>0)
00109             deriv+=log(num_occurences)-hist[num_param];
00110         else
00111             deriv=-CMath::INFTY;
00112 
00113         return deriv;
00114     }
00115 }
00116 
00117 DREAL CHistogram::get_log_model_parameter(INT num_param)
00118 {
00119     return hist[num_param];
00120 }
00121 
00122 bool CHistogram::set_histogram(DREAL* src, INT num)
00123 {
00124     ASSERT(num==get_num_model_parameters());
00125 
00126     delete[] hist;
00127     hist=new DREAL[num];
00128     for (INT i=0; i<num; i++) {
00129         hist[i]=src[i];
00130     }
00131 
00132     return true;
00133 }
00134 
00135 void CHistogram::get_histogram(DREAL** dst, INT* num)
00136 {
00137     *num=get_num_model_parameters();
00138     size_t sz=sizeof(*hist)*(*num);
00139     *dst=(DREAL*) malloc(sz);
00140     ASSERT(dst);
00141 
00142     memcpy(*dst, hist, sz);
00143 }
00144 

SHOGUN Machine Learning Toolbox - Documentation