WordMatchKernel.cpp
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include "lib/common.h"
00012 #include "lib/Mathematics.h"
00013 #include "lib/io.h"
00014 #include "kernel/WordMatchKernel.h"
00015 #include "features/WordFeatures.h"
00016
00017 CWordMatchKernel::CWordMatchKernel(INT size, INT d, bool dr, DREAL s)
00018 : CSimpleKernel<WORD>(size),scale(s),do_rescale(dr), initialized(false),
00019 degree(d)
00020 {
00021 }
00022
00023 CWordMatchKernel::CWordMatchKernel(
00024 CWordFeatures* l, CWordFeatures* r, INT d, bool dr, DREAL s)
00025 : CSimpleKernel<WORD>(10), scale(s), do_rescale(dr), initialized(false),
00026 degree(d)
00027 {
00028 init(l, r);
00029 }
00030
00031 CWordMatchKernel::~CWordMatchKernel()
00032 {
00033 cleanup();
00034 }
00035
00036 bool CWordMatchKernel::init(CFeatures* l, CFeatures* r)
00037 {
00038 CSimpleKernel<WORD>::init(l, r);
00039
00040 if (!initialized)
00041 init_rescale() ;
00042
00043 SG_INFO( "rescaling kernel by %g (num:%d)\n",scale, CMath::min(l->get_num_vectors(), r->get_num_vectors()));
00044
00045 return true;
00046 }
00047
00048 void CWordMatchKernel::init_rescale()
00049 {
00050 if (!do_rescale)
00051 return ;
00052 LONGREAL sum=0;
00053 scale=1.0;
00054 for (INT i=0; (i<lhs->get_num_vectors() && i<rhs->get_num_vectors()); i++)
00055 sum+=compute(i, i);
00056
00057 if ( sum > (pow((double) 2, (double) 8*sizeof(LONG))) ) {
00058 SG_ERROR( "the sum %lf does not fit into integer of %d bits expect bogus results.\n", sum, 8*sizeof(LONG));
00059 }
00060 scale=sum/CMath::min(lhs->get_num_vectors(), rhs->get_num_vectors());
00061 initialized=true;
00062 }
00063
00064 bool CWordMatchKernel::load_init(FILE* src)
00065 {
00066 return false;
00067 }
00068
00069 bool CWordMatchKernel::save_init(FILE* dest)
00070 {
00071 return false;
00072 }
00073
00074 DREAL CWordMatchKernel::compute(INT idx_a, INT idx_b)
00075 {
00076 INT alen, blen;
00077 bool afree, bfree;
00078
00079 WORD* avec=((CWordFeatures*) lhs)->get_feature_vector(idx_a, alen, afree);
00080 WORD* bvec=((CWordFeatures*) rhs)->get_feature_vector(idx_b, blen, bfree);
00081 ASSERT(alen==blen);
00082
00083 double sum=0;
00084 for (INT i=0; i<alen; i++)
00085 sum+= (avec[i]==bvec[i]) ? 1 : 0;
00086
00087 DREAL result=sum;
00088
00089 for (INT j=1; j<degree; j++)
00090 result*=sum;
00091 sum/=scale;
00092
00093 ((CWordFeatures*) lhs)->free_feature_vector(avec, idx_a, afree);
00094 ((CWordFeatures*) rhs)->free_feature_vector(bvec, idx_b, bfree);
00095
00096 return result;
00097 }