ManhattanWordDistance.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2007 Christian Gehl
00008  * Written (W) 1999-2008 Soeren Sonnenburg
00009  * Copyright (C) 1999-2008 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #include "lib/common.h"
00013 #include "distance/ManhattanWordDistance.h"
00014 #include "features/Features.h"
00015 #include "features/StringFeatures.h"
00016 #include "lib/io.h"
00017 
00018 CManhattanWordDistance::CManhattanWordDistance()
00019 : CStringDistance<WORD>()
00020 {
00021     SG_DEBUG("CManhattanWordDistance created");
00022     dictionary_size= 1<<(sizeof(WORD)*8);
00023     dictionary_weights = new DREAL[dictionary_size];
00024     SG_DEBUG( "using dictionary of %d bytes\n", dictionary_size);
00025 }
00026 
00027 CManhattanWordDistance::CManhattanWordDistance(
00028     CStringFeatures<WORD>* l, CStringFeatures<WORD>* r)
00029 : CStringDistance<WORD>()
00030 {
00031     SG_DEBUG("CManhattanWordDistance created");
00032     dictionary_size= 1<<(sizeof(WORD)*8);
00033     dictionary_weights = new DREAL[dictionary_size];
00034     SG_DEBUG( "using dictionary of %d bytes\n", dictionary_size);
00035 
00036     init(l, r);
00037 }
00038 
00039 CManhattanWordDistance::~CManhattanWordDistance() 
00040 {
00041     cleanup();
00042 
00043     delete[] dictionary_weights;
00044 }
00045 
00046 bool CManhattanWordDistance::init(CFeatures* l, CFeatures* r)
00047 {
00048     bool result=CStringDistance<WORD>::init(l,r);
00049     return result;
00050 }
00051 
00052 void CManhattanWordDistance::cleanup()
00053 {
00054 }
00055 
00056 bool CManhattanWordDistance::load_init(FILE* src)
00057 {
00058     return false;
00059 }
00060 
00061 bool CManhattanWordDistance::save_init(FILE* dest)
00062 {
00063     return false;
00064 }
00065 
00066 DREAL CManhattanWordDistance::compute(INT idx_a, INT idx_b)
00067 {
00068     INT alen, blen;
00069 
00070     WORD* avec=((CStringFeatures<WORD>*) lhs)->get_feature_vector(idx_a, alen);
00071     WORD* bvec=((CStringFeatures<WORD>*) rhs)->get_feature_vector(idx_b, blen);
00072 
00073     INT result=0;
00074 
00075     INT left_idx=0;
00076     INT right_idx=0;
00077 
00078     while (left_idx < alen && right_idx < blen)
00079     {
00080         WORD sym=avec[left_idx];
00081         if (avec[left_idx]==bvec[right_idx])
00082         {
00083             INT old_left_idx=left_idx;
00084             INT old_right_idx=right_idx;
00085 
00086             while (left_idx< alen && avec[left_idx]==sym)
00087                 left_idx++;
00088 
00089             while (right_idx< blen && bvec[right_idx]==sym)
00090                 right_idx++;
00091 
00092             result += CMath::abs( (left_idx-old_left_idx) - (right_idx-old_right_idx) );
00093         }
00094         else if (avec[left_idx]<bvec[right_idx])
00095         {
00096 
00097             while (left_idx< alen && avec[left_idx]==sym)
00098             {
00099                 result++;
00100                 left_idx++;
00101             }
00102         }
00103         else
00104         {
00105             sym=bvec[right_idx];
00106 
00107             while (right_idx< blen && bvec[right_idx]==sym)
00108             {
00109                 result++;
00110                 right_idx++;
00111             }
00112         }
00113     }
00114 
00115     result+=blen-right_idx + alen-left_idx;
00116 
00117     return result;
00118 }
00119 

SHOGUN Machine Learning Toolbox - Documentation