WordMatchKernel.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2008 Soeren Sonnenburg
00008  * Copyright (C) 1999-2008 Fraunhofer Institute FIRST and Max-Planck-Society
00009  */
00010 
00011 #include "lib/common.h"
00012 #include "lib/Mathematics.h"
00013 #include "lib/io.h"
00014 #include "kernel/WordMatchKernel.h"
00015 #include "features/WordFeatures.h"
00016 
00017 CWordMatchKernel::CWordMatchKernel(INT size, INT d, bool dr, DREAL s)
00018 : CSimpleKernel<WORD>(size),scale(s),do_rescale(dr), initialized(false),
00019     degree(d)
00020 {
00021 }
00022 
00023 CWordMatchKernel::CWordMatchKernel(
00024     CWordFeatures* l, CWordFeatures* r, INT d, bool dr, DREAL s)
00025 : CSimpleKernel<WORD>(10), scale(s), do_rescale(dr), initialized(false),
00026     degree(d)
00027 {
00028     init(l, r);
00029 }
00030 
00031 CWordMatchKernel::~CWordMatchKernel()
00032 {
00033     cleanup();
00034 }
00035 
00036 bool CWordMatchKernel::init(CFeatures* l, CFeatures* r)
00037 {
00038     CSimpleKernel<WORD>::init(l, r);
00039 
00040     if (!initialized)
00041         init_rescale() ;
00042 
00043     SG_INFO( "rescaling kernel by %g (num:%d)\n",scale, CMath::min(l->get_num_vectors(), r->get_num_vectors()));
00044 
00045     return true;
00046 }
00047 
00048 void CWordMatchKernel::init_rescale()
00049 {
00050     if (!do_rescale)
00051         return ;
00052     LONGREAL sum=0;
00053     scale=1.0;
00054     for (INT i=0; (i<lhs->get_num_vectors() && i<rhs->get_num_vectors()); i++)
00055             sum+=compute(i, i);
00056 
00057     if ( sum > (pow((double) 2, (double) 8*sizeof(LONG))) ) {
00058       SG_ERROR( "the sum %lf does not fit into integer of %d bits expect bogus results.\n", sum, 8*sizeof(LONG));
00059    }
00060     scale=sum/CMath::min(lhs->get_num_vectors(), rhs->get_num_vectors());
00061     initialized=true;
00062 }
00063 
00064 bool CWordMatchKernel::load_init(FILE* src)
00065 {
00066     return false;
00067 }
00068 
00069 bool CWordMatchKernel::save_init(FILE* dest)
00070 {
00071     return false;
00072 }
00073   
00074 DREAL CWordMatchKernel::compute(INT idx_a, INT idx_b)
00075 {
00076   INT alen, blen;
00077   bool afree, bfree;
00078 
00079   WORD* avec=((CWordFeatures*) lhs)->get_feature_vector(idx_a, alen, afree);
00080   WORD* bvec=((CWordFeatures*) rhs)->get_feature_vector(idx_b, blen, bfree);
00081   ASSERT(alen==blen);
00082 
00083   double sum=0;
00084   for (INT i=0; i<alen; i++)
00085       sum+= (avec[i]==bvec[i]) ? 1 : 0;
00086 
00087   DREAL result=sum;
00088 
00089   for (INT j=1; j<degree; j++)
00090       result*=sum;
00091   sum/=scale;
00092 
00093   ((CWordFeatures*) lhs)->free_feature_vector(avec, idx_a, afree);
00094   ((CWordFeatures*) rhs)->free_feature_vector(bvec, idx_b, bfree);
00095 
00096   return result;
00097 }

SHOGUN Machine Learning Toolbox - Documentation