CanberraWordDistance.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) Christian Gehl
00008  * Written (W) 1999-2008 Soeren Sonnenburg
00009  * Copyright (C) 1999-2008 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #include "lib/common.h"
00013 #include "distance/CanberraWordDistance.h"
00014 #include "features/Features.h"
00015 #include "features/StringFeatures.h"
00016 #include "lib/io.h"
00017 
00018 CCanberraWordDistance::CCanberraWordDistance()
00019 : CStringDistance<WORD>()
00020 {
00021     SG_DEBUG("CCanberraWordDistance created");
00022     dictionary_size= 1<<(sizeof(WORD)*8);
00023     dictionary_weights = new DREAL[dictionary_size];
00024     SG_DEBUG( "using dictionary of %d bytes\n", dictionary_size);
00025 }
00026 
00027 CCanberraWordDistance::CCanberraWordDistance(CStringFeatures<WORD>* l, CStringFeatures<WORD>* r)
00028 : CStringDistance<WORD>()
00029 {
00030     SG_DEBUG("CCanberraWordDistance created");
00031     dictionary_size= 1<<(sizeof(WORD)*8);
00032     dictionary_weights = new DREAL[dictionary_size];
00033     SG_DEBUG( "using dictionary of %d bytes\n", dictionary_size);
00034 
00035     init(l, r);
00036 }
00037 
00038 CCanberraWordDistance::~CCanberraWordDistance()
00039 {
00040     cleanup();
00041 
00042     delete[] dictionary_weights;
00043 }
00044   
00045 bool CCanberraWordDistance::init(CFeatures* l, CFeatures* r)
00046 {
00047     return CStringDistance<WORD>::init(l,r);
00048 }
00049 
00050 void CCanberraWordDistance::cleanup()
00051 {
00052 }
00053 
00054 bool CCanberraWordDistance::load_init(FILE* src)
00055 {
00056     return false;
00057 }
00058 
00059 bool CCanberraWordDistance::save_init(FILE* dest)
00060 {
00061     return false;
00062 }
00063   
00064 DREAL CCanberraWordDistance::compute(INT idx_a, INT idx_b)
00065 {
00066     INT alen, blen;
00067 
00068     WORD* avec=((CStringFeatures<WORD>*) lhs)->get_feature_vector(idx_a, alen);
00069     WORD* bvec=((CStringFeatures<WORD>*) rhs)->get_feature_vector(idx_b, blen);
00070 
00071     DREAL result=0;
00072 
00073     INT left_idx=0;
00074     INT right_idx=0;
00075 
00076     while (left_idx < alen && right_idx < blen)
00077     {
00078         WORD sym=avec[left_idx];
00079         if (avec[left_idx]==bvec[right_idx])
00080         {
00081             INT old_left_idx=left_idx;
00082             INT old_right_idx=right_idx;
00083 
00084             while (left_idx< alen && avec[left_idx]==sym)
00085                 left_idx++;
00086 
00087             while (right_idx< blen && bvec[right_idx]==sym)
00088                 right_idx++;
00089 
00090             result += CMath::abs( (DREAL) ((left_idx-old_left_idx) - (right_idx-old_right_idx)) )/
00091                         ( (DREAL) ((left_idx-old_left_idx) + (right_idx-old_right_idx)) );
00092         }
00093         else if (avec[left_idx]<bvec[right_idx])
00094         {
00095             result++;
00096 
00097             while (left_idx< alen && avec[left_idx]==sym)
00098                 left_idx++;
00099         }
00100         else
00101         {
00102             sym=bvec[right_idx];
00103             result++;
00104 
00105             while (right_idx< blen && bvec[right_idx]==sym)
00106                 right_idx++;
00107         }
00108     }
00109     
00110     while (left_idx < alen)
00111     {
00112         WORD sym=avec[left_idx];
00113         result++;
00114 
00115         while (left_idx< alen && avec[left_idx]==sym)
00116             left_idx++;
00117     }
00118 
00119     while (right_idx < blen)
00120     {
00121         WORD sym=bvec[right_idx];
00122         result++;
00123 
00124         while (right_idx< blen && bvec[right_idx]==sym)
00125             right_idx++;
00126     }
00127 
00128     return result;
00129 }

SHOGUN Machine Learning Toolbox - Documentation