ManhattanWordDistance.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2007 Christian Gehl
00008  * Written (W) 1999-2008 Soeren Sonnenburg
00009  * Copyright (C) 1999-2008 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #include "lib/common.h"
00013 #include "distance/ManhattanWordDistance.h"
00014 #include "features/Features.h"
00015 #include "features/StringFeatures.h"
00016 #include "lib/io.h"
00017 
00018 CManhattanWordDistance::CManhattanWordDistance()
00019 : CStringDistance<uint16_t>()
00020 {
00021     SG_DEBUG("CManhattanWordDistance created");
00022     dictionary_size= 1<<(sizeof(uint16_t)*8);
00023     dictionary_weights = new float64_t[dictionary_size];
00024     SG_DEBUG( "using dictionary of %d bytes\n", dictionary_size);
00025 }
00026 
00027 CManhattanWordDistance::CManhattanWordDistance(
00028     CStringFeatures<uint16_t>* l, CStringFeatures<uint16_t>* r)
00029 : CStringDistance<uint16_t>()
00030 {
00031     SG_DEBUG("CManhattanWordDistance created");
00032     dictionary_size= 1<<(sizeof(uint16_t)*8);
00033     dictionary_weights = new float64_t[dictionary_size];
00034     SG_DEBUG( "using dictionary of %d bytes\n", dictionary_size);
00035 
00036     init(l, r);
00037 }
00038 
00039 CManhattanWordDistance::~CManhattanWordDistance() 
00040 {
00041     cleanup();
00042 
00043     delete[] dictionary_weights;
00044 }
00045 
00046 bool CManhattanWordDistance::init(CFeatures* l, CFeatures* r)
00047 {
00048     bool result=CStringDistance<uint16_t>::init(l,r);
00049     return result;
00050 }
00051 
00052 void CManhattanWordDistance::cleanup()
00053 {
00054 }
00055 
00056 bool CManhattanWordDistance::load_init(FILE* src)
00057 {
00058     return false;
00059 }
00060 
00061 bool CManhattanWordDistance::save_init(FILE* dest)
00062 {
00063     return false;
00064 }
00065 
00066 float64_t CManhattanWordDistance::compute(int32_t idx_a, int32_t idx_b)
00067 {
00068     int32_t alen, blen;
00069 
00070     uint16_t* avec=((CStringFeatures<uint16_t>*) lhs)->
00071         get_feature_vector(idx_a, alen);
00072     uint16_t* bvec=((CStringFeatures<uint16_t>*) rhs)->
00073         get_feature_vector(idx_b, blen);
00074 
00075     int32_t result=0;
00076 
00077     int32_t left_idx=0;
00078     int32_t right_idx=0;
00079 
00080     while (left_idx < alen && right_idx < blen)
00081     {
00082         uint16_t sym=avec[left_idx];
00083         if (avec[left_idx]==bvec[right_idx])
00084         {
00085             int32_t old_left_idx=left_idx;
00086             int32_t old_right_idx=right_idx;
00087 
00088             while (left_idx< alen && avec[left_idx]==sym)
00089                 left_idx++;
00090 
00091             while (right_idx< blen && bvec[right_idx]==sym)
00092                 right_idx++;
00093 
00094             result += CMath::abs( (left_idx-old_left_idx) - (right_idx-old_right_idx) );
00095         }
00096         else if (avec[left_idx]<bvec[right_idx])
00097         {
00098 
00099             while (left_idx< alen && avec[left_idx]==sym)
00100             {
00101                 result++;
00102                 left_idx++;
00103             }
00104         }
00105         else
00106         {
00107             sym=bvec[right_idx];
00108 
00109             while (right_idx< blen && bvec[right_idx]==sym)
00110             {
00111                 result++;
00112                 right_idx++;
00113             }
00114         }
00115     }
00116 
00117     result+=blen-right_idx + alen-left_idx;
00118 
00119     return result;
00120 }
00121 

SHOGUN Machine Learning Toolbox - Documentation