HammingWordDistance.cpp
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include "lib/common.h"
00013 #include "distance/HammingWordDistance.h"
00014 #include "features/Features.h"
00015 #include "features/StringFeatures.h"
00016 #include "lib/io.h"
00017
00018 CHammingWordDistance::CHammingWordDistance(bool sign)
00019 : CStringDistance<uint16_t>(), use_sign(sign)
00020 {
00021 SG_DEBUG( "CHammingWordDistance with sign: %d created\n", (sign) ? 1 : 0);
00022 dictionary_size= 1<<(sizeof(uint16_t)*8);
00023 dictionary_weights = new float64_t[dictionary_size];
00024 SG_DEBUG( "using dictionary of %d bytes\n", dictionary_size);
00025 }
00026
00027 CHammingWordDistance::CHammingWordDistance(
00028 CStringFeatures<uint16_t>* l, CStringFeatures<uint16_t>* r, bool sign)
00029 : CStringDistance<uint16_t>(), use_sign(sign)
00030 {
00031 SG_DEBUG( "CHammingWordDistance with sign: %d created\n", (sign) ? 1 : 0);
00032 dictionary_size= 1<<(sizeof(uint16_t)*8);
00033 dictionary_weights = new float64_t[dictionary_size];
00034 SG_DEBUG( "using dictionary of %d bytes\n", dictionary_size);
00035
00036 init(l, r);
00037 }
00038
00039 CHammingWordDistance::~CHammingWordDistance()
00040 {
00041 cleanup();
00042
00043 delete[] dictionary_weights;
00044 }
00045
00046 bool CHammingWordDistance::init(CFeatures* l, CFeatures* r)
00047 {
00048 bool result=CStringDistance<uint16_t>::init(l,r);
00049 return result;
00050 }
00051
00052 void CHammingWordDistance::cleanup()
00053 {
00054 }
00055
00056 bool CHammingWordDistance::load_init(FILE* src)
00057 {
00058 return false;
00059 }
00060
00061 bool CHammingWordDistance::save_init(FILE* dest)
00062 {
00063 return false;
00064 }
00065
00066 float64_t CHammingWordDistance::compute(int32_t idx_a, int32_t idx_b)
00067 {
00068 int32_t alen, blen;
00069
00070 uint16_t* avec=((CStringFeatures<uint16_t>*) lhs)->
00071 get_feature_vector(idx_a, alen);
00072 uint16_t* bvec=((CStringFeatures<uint16_t>*) rhs)->
00073 get_feature_vector(idx_b, blen);
00074
00075 int32_t result=0;
00076
00077 int32_t left_idx=0;
00078 int32_t right_idx=0;
00079
00080 if (use_sign)
00081 {
00082
00083 while (left_idx < alen && right_idx < blen)
00084 {
00085 uint16_t sym=avec[left_idx];
00086 if (avec[left_idx]==bvec[right_idx])
00087 {
00088 while (left_idx< alen && avec[left_idx]==sym)
00089 left_idx++;
00090
00091 while (right_idx< blen && bvec[right_idx]==sym)
00092 right_idx++;
00093 }
00094 else if (avec[left_idx]<bvec[right_idx])
00095 {
00096 result++;
00097
00098 while (left_idx< alen && avec[left_idx]==sym)
00099 left_idx++;
00100 }
00101 else
00102 {
00103 sym=bvec[right_idx];
00104 result++;
00105
00106 while (right_idx< blen && bvec[right_idx]==sym)
00107 right_idx++;
00108 }
00109 }
00110 }
00111 else
00112 {
00113
00114 while (left_idx < alen && right_idx < blen)
00115 {
00116 uint16_t sym=avec[left_idx];
00117 if (avec[left_idx]==bvec[right_idx])
00118 {
00119 int32_t old_left_idx=left_idx;
00120 int32_t old_right_idx=right_idx;
00121
00122 while (left_idx< alen && avec[left_idx]==sym)
00123 left_idx++;
00124
00125 while (right_idx< blen && bvec[right_idx]==sym)
00126 right_idx++;
00127
00128 if ((left_idx-old_left_idx)!=(right_idx-old_right_idx))
00129 result++;
00130 }
00131 else if (avec[left_idx]<bvec[right_idx])
00132 {
00133 result++;
00134
00135 while (left_idx< alen && avec[left_idx]==sym)
00136 left_idx++;
00137 }
00138 else
00139 {
00140 sym=bvec[right_idx];
00141 result++;
00142
00143 while (right_idx< blen && bvec[right_idx]==sym)
00144 right_idx++;
00145 }
00146 }
00147 }
00148
00149 while (left_idx < alen)
00150 {
00151 uint16_t sym=avec[left_idx];
00152 result++;
00153
00154 while (left_idx< alen && avec[left_idx]==sym)
00155 left_idx++;
00156 }
00157
00158 while (right_idx < blen)
00159 {
00160 uint16_t sym=bvec[right_idx];
00161 result++;
00162
00163 while (right_idx< blen && bvec[right_idx]==sym)
00164 right_idx++;
00165 }
00166
00167 return result;
00168 }