HammingWordDistance.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2007 Christian Gehl
00008  * Written (W) 1999-2008 Soeren Sonnenburg
00009  * Copyright (C) 1999-2008 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #include "lib/common.h"
00013 #include "distance/HammingWordDistance.h"
00014 #include "features/Features.h"
00015 #include "features/StringFeatures.h"
00016 #include "lib/io.h"
00017 
00018 CHammingWordDistance::CHammingWordDistance(bool sign)
00019 : CStringDistance<WORD>(), use_sign(sign)
00020 {
00021     SG_DEBUG( "CHammingWordDistance with sign: %d created\n", (sign) ? 1 : 0);
00022     dictionary_size= 1<<(sizeof(WORD)*8);
00023     dictionary_weights = new DREAL[dictionary_size];
00024     SG_DEBUG( "using dictionary of %d bytes\n", dictionary_size);
00025 }
00026 
00027 CHammingWordDistance::CHammingWordDistance(
00028     CStringFeatures<WORD>* l, CStringFeatures<WORD>* r, bool sign)
00029 : CStringDistance<WORD>(), use_sign(sign)
00030 {
00031     SG_DEBUG( "CHammingWordDistance with sign: %d created\n", (sign) ? 1 : 0);
00032     dictionary_size= 1<<(sizeof(WORD)*8);
00033     dictionary_weights = new DREAL[dictionary_size];
00034     SG_DEBUG( "using dictionary of %d bytes\n", dictionary_size);
00035 
00036     init(l, r);
00037 }
00038 
00039 CHammingWordDistance::~CHammingWordDistance()
00040 {
00041     cleanup();
00042 
00043     delete[] dictionary_weights;
00044 }
00045   
00046 bool CHammingWordDistance::init(CFeatures* l, CFeatures* r)
00047 {
00048     bool result=CStringDistance<WORD>::init(l,r);
00049     return result;
00050 }
00051 
00052 void CHammingWordDistance::cleanup()
00053 {
00054 }
00055 
00056 bool CHammingWordDistance::load_init(FILE* src)
00057 {
00058     return false;
00059 }
00060 
00061 bool CHammingWordDistance::save_init(FILE* dest)
00062 {
00063     return false;
00064 }
00065   
00066 DREAL CHammingWordDistance::compute(INT idx_a, INT idx_b)
00067 {
00068     INT alen, blen;
00069 
00070     WORD* avec=((CStringFeatures<WORD>*) lhs)->get_feature_vector(idx_a, alen);
00071     WORD* bvec=((CStringFeatures<WORD>*) rhs)->get_feature_vector(idx_b, blen);
00072 
00073     INT result=0;
00074 
00075     INT left_idx=0;
00076     INT right_idx=0;
00077 
00078     if (use_sign)
00079     {
00080         // hamming of: if words appear in both vectors 
00081         while (left_idx < alen && right_idx < blen)
00082         {
00083             WORD sym=avec[left_idx];
00084             if (avec[left_idx]==bvec[right_idx])
00085             {
00086                 while (left_idx< alen && avec[left_idx]==sym)
00087                     left_idx++;
00088 
00089                 while (right_idx< blen && bvec[right_idx]==sym)
00090                     right_idx++;
00091             }
00092             else if (avec[left_idx]<bvec[right_idx])
00093             {
00094                 result++;
00095 
00096                 while (left_idx< alen && avec[left_idx]==sym)
00097                     left_idx++;
00098             }
00099             else
00100             {
00101                 sym=bvec[right_idx];
00102                 result++;
00103 
00104                 while (right_idx< blen && bvec[right_idx]==sym)
00105                     right_idx++;
00106             }
00107         }
00108     }
00109     else
00110     {
00111         //hamming of: if words appear in both vectors _the same number_ of times
00112         while (left_idx < alen && right_idx < blen)
00113         {
00114             WORD sym=avec[left_idx];
00115             if (avec[left_idx]==bvec[right_idx])
00116             {
00117                 INT old_left_idx=left_idx;
00118                 INT old_right_idx=right_idx;
00119 
00120                 while (left_idx< alen && avec[left_idx]==sym)
00121                     left_idx++;
00122 
00123                 while (right_idx< blen && bvec[right_idx]==sym)
00124                     right_idx++;
00125 
00126                 if ((left_idx-old_left_idx)!=(right_idx-old_right_idx))
00127                     result++;
00128             }
00129             else if (avec[left_idx]<bvec[right_idx])
00130             {
00131                 result++;
00132 
00133                 while (left_idx< alen && avec[left_idx]==sym)
00134                     left_idx++;
00135             }
00136             else
00137             {
00138                 sym=bvec[right_idx];
00139                 result++;
00140 
00141                 while (right_idx< blen && bvec[right_idx]==sym)
00142                     right_idx++;
00143             }
00144         }
00145     }
00146 
00147     while (left_idx < alen)
00148     {
00149         WORD sym=avec[left_idx];
00150         result++;
00151 
00152         while (left_idx< alen && avec[left_idx]==sym)
00153             left_idx++;
00154     }
00155 
00156     while (right_idx < blen)
00157     {
00158         WORD sym=bvec[right_idx];
00159         result++;
00160 
00161         while (right_idx< blen && bvec[right_idx]==sym)
00162             right_idx++;
00163     }
00164 
00165     return result;
00166 }

SHOGUN Machine Learning Toolbox - Documentation