HammingWordDistance.cpp
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include "lib/common.h"
00013 #include "distance/HammingWordDistance.h"
00014 #include "features/Features.h"
00015 #include "features/StringFeatures.h"
00016 #include "lib/io.h"
00017
00018 CHammingWordDistance::CHammingWordDistance(bool sign)
00019 : CStringDistance<WORD>(), use_sign(sign)
00020 {
00021 SG_DEBUG( "CHammingWordDistance with sign: %d created\n", (sign) ? 1 : 0);
00022 dictionary_size= 1<<(sizeof(WORD)*8);
00023 dictionary_weights = new DREAL[dictionary_size];
00024 SG_DEBUG( "using dictionary of %d bytes\n", dictionary_size);
00025 }
00026
00027 CHammingWordDistance::CHammingWordDistance(
00028 CStringFeatures<WORD>* l, CStringFeatures<WORD>* r, bool sign)
00029 : CStringDistance<WORD>(), use_sign(sign)
00030 {
00031 SG_DEBUG( "CHammingWordDistance with sign: %d created\n", (sign) ? 1 : 0);
00032 dictionary_size= 1<<(sizeof(WORD)*8);
00033 dictionary_weights = new DREAL[dictionary_size];
00034 SG_DEBUG( "using dictionary of %d bytes\n", dictionary_size);
00035
00036 init(l, r);
00037 }
00038
00039 CHammingWordDistance::~CHammingWordDistance()
00040 {
00041 cleanup();
00042
00043 delete[] dictionary_weights;
00044 }
00045
00046 bool CHammingWordDistance::init(CFeatures* l, CFeatures* r)
00047 {
00048 bool result=CStringDistance<WORD>::init(l,r);
00049 return result;
00050 }
00051
00052 void CHammingWordDistance::cleanup()
00053 {
00054 }
00055
00056 bool CHammingWordDistance::load_init(FILE* src)
00057 {
00058 return false;
00059 }
00060
00061 bool CHammingWordDistance::save_init(FILE* dest)
00062 {
00063 return false;
00064 }
00065
00066 DREAL CHammingWordDistance::compute(INT idx_a, INT idx_b)
00067 {
00068 INT alen, blen;
00069
00070 WORD* avec=((CStringFeatures<WORD>*) lhs)->get_feature_vector(idx_a, alen);
00071 WORD* bvec=((CStringFeatures<WORD>*) rhs)->get_feature_vector(idx_b, blen);
00072
00073 INT result=0;
00074
00075 INT left_idx=0;
00076 INT right_idx=0;
00077
00078 if (use_sign)
00079 {
00080
00081 while (left_idx < alen && right_idx < blen)
00082 {
00083 WORD sym=avec[left_idx];
00084 if (avec[left_idx]==bvec[right_idx])
00085 {
00086 while (left_idx< alen && avec[left_idx]==sym)
00087 left_idx++;
00088
00089 while (right_idx< blen && bvec[right_idx]==sym)
00090 right_idx++;
00091 }
00092 else if (avec[left_idx]<bvec[right_idx])
00093 {
00094 result++;
00095
00096 while (left_idx< alen && avec[left_idx]==sym)
00097 left_idx++;
00098 }
00099 else
00100 {
00101 sym=bvec[right_idx];
00102 result++;
00103
00104 while (right_idx< blen && bvec[right_idx]==sym)
00105 right_idx++;
00106 }
00107 }
00108 }
00109 else
00110 {
00111
00112 while (left_idx < alen && right_idx < blen)
00113 {
00114 WORD sym=avec[left_idx];
00115 if (avec[left_idx]==bvec[right_idx])
00116 {
00117 INT old_left_idx=left_idx;
00118 INT old_right_idx=right_idx;
00119
00120 while (left_idx< alen && avec[left_idx]==sym)
00121 left_idx++;
00122
00123 while (right_idx< blen && bvec[right_idx]==sym)
00124 right_idx++;
00125
00126 if ((left_idx-old_left_idx)!=(right_idx-old_right_idx))
00127 result++;
00128 }
00129 else if (avec[left_idx]<bvec[right_idx])
00130 {
00131 result++;
00132
00133 while (left_idx< alen && avec[left_idx]==sym)
00134 left_idx++;
00135 }
00136 else
00137 {
00138 sym=bvec[right_idx];
00139 result++;
00140
00141 while (right_idx< blen && bvec[right_idx]==sym)
00142 right_idx++;
00143 }
00144 }
00145 }
00146
00147 while (left_idx < alen)
00148 {
00149 WORD sym=avec[left_idx];
00150 result++;
00151
00152 while (left_idx< alen && avec[left_idx]==sym)
00153 left_idx++;
00154 }
00155
00156 while (right_idx < blen)
00157 {
00158 WORD sym=bvec[right_idx];
00159 result++;
00160
00161 while (right_idx< blen && bvec[right_idx]==sym)
00162 right_idx++;
00163 }
00164
00165 return result;
00166 }