CanberraWordDistance.cpp
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include "lib/common.h"
00013 #include "distance/CanberraWordDistance.h"
00014 #include "features/Features.h"
00015 #include "features/StringFeatures.h"
00016 #include "lib/io.h"
00017
00018 CCanberraWordDistance::CCanberraWordDistance()
00019 : CStringDistance<uint16_t>()
00020 {
00021 SG_DEBUG("CCanberraWordDistance created");
00022 dictionary_size= 1<<(sizeof(uint16_t)*8);
00023 dictionary_weights = new float64_t[dictionary_size];
00024 SG_DEBUG( "using dictionary of %d bytes\n", dictionary_size);
00025 }
00026
00027 CCanberraWordDistance::CCanberraWordDistance(
00028 CStringFeatures<uint16_t>* l, CStringFeatures<uint16_t>* r)
00029 : CStringDistance<uint16_t>()
00030 {
00031 SG_DEBUG("CCanberraWordDistance created");
00032 dictionary_size= 1<<(sizeof(uint16_t)*8);
00033 dictionary_weights = new float64_t[dictionary_size];
00034 SG_DEBUG( "using dictionary of %d bytes\n", dictionary_size);
00035
00036 init(l, r);
00037 }
00038
00039 CCanberraWordDistance::~CCanberraWordDistance()
00040 {
00041 cleanup();
00042
00043 delete[] dictionary_weights;
00044 }
00045
00046 bool CCanberraWordDistance::init(CFeatures* l, CFeatures* r)
00047 {
00048 return CStringDistance<uint16_t>::init(l,r);
00049 }
00050
00051 void CCanberraWordDistance::cleanup()
00052 {
00053 }
00054
00055 bool CCanberraWordDistance::load_init(FILE* src)
00056 {
00057 return false;
00058 }
00059
00060 bool CCanberraWordDistance::save_init(FILE* dest)
00061 {
00062 return false;
00063 }
00064
00065 float64_t CCanberraWordDistance::compute(int32_t idx_a, int32_t idx_b)
00066 {
00067 int32_t alen, blen;
00068
00069 uint16_t* avec=((CStringFeatures<uint16_t>*) lhs)->
00070 get_feature_vector(idx_a, alen);
00071 uint16_t* bvec=((CStringFeatures<uint16_t>*) rhs)->
00072 get_feature_vector(idx_b, blen);
00073
00074 float64_t result=0;
00075
00076 int32_t left_idx=0;
00077 int32_t right_idx=0;
00078
00079 while (left_idx < alen && right_idx < blen)
00080 {
00081 uint16_t sym=avec[left_idx];
00082 if (avec[left_idx]==bvec[right_idx])
00083 {
00084 int32_t old_left_idx=left_idx;
00085 int32_t old_right_idx=right_idx;
00086
00087 while (left_idx< alen && avec[left_idx]==sym)
00088 left_idx++;
00089
00090 while (right_idx< blen && bvec[right_idx]==sym)
00091 right_idx++;
00092
00093 result +=
00094 CMath::abs((float64_t)
00095 ((left_idx-old_left_idx)-(right_idx-old_right_idx)))/
00096 ((float64_t)
00097 ((left_idx-old_left_idx) + (right_idx-old_right_idx)));
00098 }
00099 else if (avec[left_idx]<bvec[right_idx])
00100 {
00101 result++;
00102
00103 while (left_idx< alen && avec[left_idx]==sym)
00104 left_idx++;
00105 }
00106 else
00107 {
00108 sym=bvec[right_idx];
00109 result++;
00110
00111 while (right_idx< blen && bvec[right_idx]==sym)
00112 right_idx++;
00113 }
00114 }
00115
00116 while (left_idx < alen)
00117 {
00118 uint16_t sym=avec[left_idx];
00119 result++;
00120
00121 while (left_idx< alen && avec[left_idx]==sym)
00122 left_idx++;
00123 }
00124
00125 while (right_idx < blen)
00126 {
00127 uint16_t sym=bvec[right_idx];
00128 result++;
00129
00130 while (right_idx< blen && bvec[right_idx]==sym)
00131 right_idx++;
00132 }
00133
00134 return result;
00135 }