CanberraWordDistance.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) Christian Gehl
00008  * Written (W) 1999-2008 Soeren Sonnenburg
00009  * Copyright (C) 1999-2008 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #include "lib/common.h"
00013 #include "distance/CanberraWordDistance.h"
00014 #include "features/Features.h"
00015 #include "features/StringFeatures.h"
00016 #include "lib/io.h"
00017 
00018 CCanberraWordDistance::CCanberraWordDistance()
00019 : CStringDistance<uint16_t>()
00020 {
00021     SG_DEBUG("CCanberraWordDistance created");
00022     dictionary_size= 1<<(sizeof(uint16_t)*8);
00023     dictionary_weights = new float64_t[dictionary_size];
00024     SG_DEBUG( "using dictionary of %d bytes\n", dictionary_size);
00025 }
00026 
00027 CCanberraWordDistance::CCanberraWordDistance(
00028     CStringFeatures<uint16_t>* l, CStringFeatures<uint16_t>* r)
00029 : CStringDistance<uint16_t>()
00030 {
00031     SG_DEBUG("CCanberraWordDistance created");
00032     dictionary_size= 1<<(sizeof(uint16_t)*8);
00033     dictionary_weights = new float64_t[dictionary_size];
00034     SG_DEBUG( "using dictionary of %d bytes\n", dictionary_size);
00035 
00036     init(l, r);
00037 }
00038 
00039 CCanberraWordDistance::~CCanberraWordDistance()
00040 {
00041     cleanup();
00042 
00043     delete[] dictionary_weights;
00044 }
00045 
00046 bool CCanberraWordDistance::init(CFeatures* l, CFeatures* r)
00047 {
00048     return CStringDistance<uint16_t>::init(l,r);
00049 }
00050 
00051 void CCanberraWordDistance::cleanup()
00052 {
00053 }
00054 
00055 bool CCanberraWordDistance::load_init(FILE* src)
00056 {
00057     return false;
00058 }
00059 
00060 bool CCanberraWordDistance::save_init(FILE* dest)
00061 {
00062     return false;
00063 }
00064 
00065 float64_t CCanberraWordDistance::compute(int32_t idx_a, int32_t idx_b)
00066 {
00067     int32_t alen, blen;
00068 
00069     uint16_t* avec=((CStringFeatures<uint16_t>*) lhs)->
00070         get_feature_vector(idx_a, alen);
00071     uint16_t* bvec=((CStringFeatures<uint16_t>*) rhs)->
00072         get_feature_vector(idx_b, blen);
00073 
00074     float64_t result=0;
00075 
00076     int32_t left_idx=0;
00077     int32_t right_idx=0;
00078 
00079     while (left_idx < alen && right_idx < blen)
00080     {
00081         uint16_t sym=avec[left_idx];
00082         if (avec[left_idx]==bvec[right_idx])
00083         {
00084             int32_t old_left_idx=left_idx;
00085             int32_t old_right_idx=right_idx;
00086 
00087             while (left_idx< alen && avec[left_idx]==sym)
00088                 left_idx++;
00089 
00090             while (right_idx< blen && bvec[right_idx]==sym)
00091                 right_idx++;
00092 
00093             result +=
00094                 CMath::abs((float64_t)
00095                     ((left_idx-old_left_idx)-(right_idx-old_right_idx)))/
00096                 ((float64_t)
00097                     ((left_idx-old_left_idx) + (right_idx-old_right_idx)));
00098         }
00099         else if (avec[left_idx]<bvec[right_idx])
00100         {
00101             result++;
00102 
00103             while (left_idx< alen && avec[left_idx]==sym)
00104                 left_idx++;
00105         }
00106         else
00107         {
00108             sym=bvec[right_idx];
00109             result++;
00110 
00111             while (right_idx< blen && bvec[right_idx]==sym)
00112                 right_idx++;
00113         }
00114     }
00115     
00116     while (left_idx < alen)
00117     {
00118         uint16_t sym=avec[left_idx];
00119         result++;
00120 
00121         while (left_idx< alen && avec[left_idx]==sym)
00122             left_idx++;
00123     }
00124 
00125     while (right_idx < blen)
00126     {
00127         uint16_t sym=bvec[right_idx];
00128         result++;
00129 
00130         while (right_idx< blen && bvec[right_idx]==sym)
00131             right_idx++;
00132     }
00133 
00134     return result;
00135 }

SHOGUN Machine Learning Toolbox - Documentation