LocalityImprovedStringKernel.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2008 Gunnar Raetsch
00008  * Written (W) 1999-2008 Soeren Sonnenburg
00009  * Copyright (C) 1999-2008 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #include "lib/common.h"
00013 #include "lib/io.h"
00014 #include "kernel/LocalityImprovedStringKernel.h"
00015 #include "features/StringFeatures.h"
00016 
00017 CLocalityImprovedStringKernel::CLocalityImprovedStringKernel(
00018     INT size, INT l, INT id, INT od)
00019 : CStringKernel<CHAR>(size), length(l), inner_degree(id), outer_degree(od)
00020 {
00021     SG_INFO( "LIK with parms: l=%d, id=%d, od=%d created!\n", l, id, od);
00022 }
00023 
00024 CLocalityImprovedStringKernel::CLocalityImprovedStringKernel(
00025     CStringFeatures<CHAR>* l, CStringFeatures<CHAR>* r, INT len, INT id, INT od)
00026 : CStringKernel<CHAR>(10), length(len), inner_degree(id), outer_degree(od)
00027 {
00028     SG_INFO( "LIK with parms: l=%d, id=%d, od=%d created!\n", len, id, od);
00029 
00030     init(l, r);
00031 }
00032 
00033 CLocalityImprovedStringKernel::~CLocalityImprovedStringKernel()
00034 {
00035     cleanup();
00036 }
00037 
00038 bool CLocalityImprovedStringKernel::init(CFeatures* l, CFeatures* r)
00039 {
00040     return CStringKernel<CHAR>::init(l,r);
00041 }
00042 
00043 bool CLocalityImprovedStringKernel::load_init(FILE* src)
00044 {
00045     return false;
00046 }
00047 
00048 bool CLocalityImprovedStringKernel::save_init(FILE* dest)
00049 {
00050     return false;
00051 }
00052 
00053 DREAL CLocalityImprovedStringKernel::compute(INT idx_a, INT idx_b)
00054 {
00055     INT alen, blen;
00056 
00057     CHAR* avec = ((CStringFeatures<CHAR>*) lhs)->get_feature_vector(idx_a, alen);
00058     CHAR* bvec = ((CStringFeatures<CHAR>*) rhs)->get_feature_vector(idx_b, blen);
00059     // can only deal with strings of same length
00060     ASSERT(alen==blen && alen>0);
00061 
00062     INT i,t;
00063     DREAL* match=new DREAL[alen];
00064 
00065     // initialize match table 1 -> match;  0 -> no match
00066     for (i = 0; i<alen; i++)
00067         match[i] = (avec[i] == bvec[i])? 1 : 0;
00068 
00069     DREAL outer_sum = 0;
00070 
00071     for (t = 0; t<alen-length; t++)
00072     {
00073         DREAL sum = 0;
00074         for (i = 0; i<length && t+i+length+1<alen; i++)
00075             sum += (i+1)*match[t+i]+(length-i)*match[t+i+length+1];
00076         //add middle element + normalize with sum_i=0^2l+1 i = (2l+1)(l+1)
00077         DREAL inner_sum = (sum + (length+1)*match[t+length]) / ((2*length+1)*(length+1));
00078         inner_sum = pow(inner_sum, inner_degree + 1);
00079         outer_sum += inner_sum;
00080     }
00081     delete[] match;
00082     return pow(outer_sum, outer_degree + 1);
00083 }

SHOGUN Machine Learning Toolbox - Documentation