CanberraWordDistance.cpp
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include "lib/common.h"
00013 #include "distance/CanberraWordDistance.h"
00014 #include "features/Features.h"
00015 #include "features/StringFeatures.h"
00016 #include "lib/io.h"
00017
00018 CCanberraWordDistance::CCanberraWordDistance()
00019 : CStringDistance<WORD>()
00020 {
00021 SG_DEBUG("CCanberraWordDistance created");
00022 dictionary_size= 1<<(sizeof(WORD)*8);
00023 dictionary_weights = new DREAL[dictionary_size];
00024 SG_DEBUG( "using dictionary of %d bytes\n", dictionary_size);
00025 }
00026
00027 CCanberraWordDistance::CCanberraWordDistance(CStringFeatures<WORD>* l, CStringFeatures<WORD>* r)
00028 : CStringDistance<WORD>()
00029 {
00030 SG_DEBUG("CCanberraWordDistance created");
00031 dictionary_size= 1<<(sizeof(WORD)*8);
00032 dictionary_weights = new DREAL[dictionary_size];
00033 SG_DEBUG( "using dictionary of %d bytes\n", dictionary_size);
00034
00035 init(l, r);
00036 }
00037
00038 CCanberraWordDistance::~CCanberraWordDistance()
00039 {
00040 cleanup();
00041
00042 delete[] dictionary_weights;
00043 }
00044
00045 bool CCanberraWordDistance::init(CFeatures* l, CFeatures* r)
00046 {
00047 return CStringDistance<WORD>::init(l,r);
00048 }
00049
00050 void CCanberraWordDistance::cleanup()
00051 {
00052 }
00053
00054 bool CCanberraWordDistance::load_init(FILE* src)
00055 {
00056 return false;
00057 }
00058
00059 bool CCanberraWordDistance::save_init(FILE* dest)
00060 {
00061 return false;
00062 }
00063
00064 DREAL CCanberraWordDistance::compute(INT idx_a, INT idx_b)
00065 {
00066 INT alen, blen;
00067
00068 WORD* avec=((CStringFeatures<WORD>*) lhs)->get_feature_vector(idx_a, alen);
00069 WORD* bvec=((CStringFeatures<WORD>*) rhs)->get_feature_vector(idx_b, blen);
00070
00071 DREAL result=0;
00072
00073 INT left_idx=0;
00074 INT right_idx=0;
00075
00076 while (left_idx < alen && right_idx < blen)
00077 {
00078 WORD sym=avec[left_idx];
00079 if (avec[left_idx]==bvec[right_idx])
00080 {
00081 INT old_left_idx=left_idx;
00082 INT old_right_idx=right_idx;
00083
00084 while (left_idx< alen && avec[left_idx]==sym)
00085 left_idx++;
00086
00087 while (right_idx< blen && bvec[right_idx]==sym)
00088 right_idx++;
00089
00090 result += CMath::abs( (DREAL) ((left_idx-old_left_idx) - (right_idx-old_right_idx)) )/
00091 ( (DREAL) ((left_idx-old_left_idx) + (right_idx-old_right_idx)) );
00092 }
00093 else if (avec[left_idx]<bvec[right_idx])
00094 {
00095 result++;
00096
00097 while (left_idx< alen && avec[left_idx]==sym)
00098 left_idx++;
00099 }
00100 else
00101 {
00102 sym=bvec[right_idx];
00103 result++;
00104
00105 while (right_idx< blen && bvec[right_idx]==sym)
00106 right_idx++;
00107 }
00108 }
00109
00110 while (left_idx < alen)
00111 {
00112 WORD sym=avec[left_idx];
00113 result++;
00114
00115 while (left_idx< alen && avec[left_idx]==sym)
00116 left_idx++;
00117 }
00118
00119 while (right_idx < blen)
00120 {
00121 WORD sym=bvec[right_idx];
00122 result++;
00123
00124 while (right_idx< blen && bvec[right_idx]==sym)
00125 right_idx++;
00126 }
00127
00128 return result;
00129 }