PolyMatchWordKernel.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2008 Soeren Sonnenburg
00008  * Copyright (C) 1999-2008 Fraunhofer Institute FIRST and Max-Planck-Society
00009  */
00010 
00011 #include "lib/common.h"
00012 #include "lib/io.h"
00013 #include "kernel/PolyMatchWordKernel.h"
00014 #include "features/Features.h"
00015 #include "features/WordFeatures.h"
00016 
00017 CPolyMatchWordKernel::CPolyMatchWordKernel(INT size, INT d, bool i, bool un)
00018 : CSimpleKernel<WORD>(size),degree(d),inhomogene(i),use_normalization(un),
00019     sqrtdiag_lhs(NULL), sqrtdiag_rhs(NULL), initialized(false)
00020 {
00021 }
00022 
00023 CPolyMatchWordKernel::CPolyMatchWordKernel(
00024     CWordFeatures* l, CWordFeatures* r, INT d, bool i, bool un)
00025 : CSimpleKernel<WORD>(10),degree(d),inhomogene(i), use_normalization(un),
00026     sqrtdiag_lhs(NULL), sqrtdiag_rhs(NULL), initialized(false)
00027 {
00028     init(l, r);
00029 }
00030 
00031 CPolyMatchWordKernel::~CPolyMatchWordKernel()
00032 {
00033     cleanup();
00034 }
00035 
00036 bool CPolyMatchWordKernel::init(CFeatures* l, CFeatures* r)
00037 {
00038     bool result=CSimpleKernel<WORD>::init(l,r);
00039 
00040     initialized = false ;
00041     INT i;
00042 
00043     if (sqrtdiag_lhs != sqrtdiag_rhs)
00044         delete[] sqrtdiag_rhs;
00045     sqrtdiag_rhs=NULL ;
00046     delete[] sqrtdiag_lhs;
00047     sqrtdiag_lhs=NULL ;
00048 
00049     if (use_normalization)
00050     {
00051         sqrtdiag_lhs= new DREAL[lhs->get_num_vectors()];
00052 
00053         for (i=0; i<lhs->get_num_vectors(); i++)
00054             sqrtdiag_lhs[i]=1;
00055 
00056         if (l==r)
00057             sqrtdiag_rhs=sqrtdiag_lhs;
00058         else
00059         {
00060             sqrtdiag_rhs= new DREAL[rhs->get_num_vectors()];
00061             for (i=0; i<rhs->get_num_vectors(); i++)
00062                 sqrtdiag_rhs[i]=1;
00063         }
00064 
00065         ASSERT(sqrtdiag_lhs);
00066         ASSERT(sqrtdiag_rhs);
00067 
00068         this->lhs=(CWordFeatures*) l;
00069         this->rhs=(CWordFeatures*) l;
00070 
00071         //compute normalize to 1 values
00072         for (i=0; i<lhs->get_num_vectors(); i++)
00073         {
00074             sqrtdiag_lhs[i]=sqrt(compute(i,i));
00075 
00076             //trap divide by zero exception
00077             if (sqrtdiag_lhs[i]==0)
00078                 sqrtdiag_lhs[i]=1e-16;
00079         }
00080 
00081         // if lhs is different from rhs (train/test data)
00082         // compute also the normalization for rhs
00083         if (sqrtdiag_lhs!=sqrtdiag_rhs)
00084         {
00085             this->lhs=(CWordFeatures*) r;
00086             this->rhs=(CWordFeatures*) r;
00087 
00088             //compute normalize to 1 values
00089             for (i=0; i<rhs->get_num_vectors(); i++)
00090             {
00091                 sqrtdiag_rhs[i]=sqrt(compute(i,i));
00092 
00093                 //trap divide by zero exception
00094                 if (sqrtdiag_rhs[i]==0)
00095                     sqrtdiag_rhs[i]=1e-16;
00096             }
00097         }
00098     }
00099 
00100     this->lhs=(CWordFeatures*) l;
00101     this->rhs=(CWordFeatures*) r;
00102 
00103     initialized = true ;
00104     return result;
00105 }
00106 
00107 void CPolyMatchWordKernel::cleanup()
00108 {
00109     if (sqrtdiag_lhs != sqrtdiag_rhs)
00110         delete[] sqrtdiag_rhs;
00111     sqrtdiag_rhs=NULL;
00112 
00113     delete[] sqrtdiag_lhs;
00114     sqrtdiag_lhs=NULL;
00115 
00116     initialized=false;
00117 
00118     CKernel::cleanup();
00119 }
00120 
00121 bool CPolyMatchWordKernel::load_init(FILE* src)
00122 {
00123     return false;
00124 }
00125 
00126 bool CPolyMatchWordKernel::save_init(FILE* dest)
00127 {
00128     return false;
00129 }
00130 
00131 DREAL CPolyMatchWordKernel::compute(INT idx_a, INT idx_b)
00132 {
00133     INT alen, blen;
00134     bool afree, bfree;
00135 
00136     //fprintf(stderr, "LinKernel.compute(%ld,%ld)\n", idx_a, idx_b) ;
00137     WORD* avec=((CWordFeatures*) lhs)->get_feature_vector(idx_a, alen, afree);
00138     WORD* bvec=((CWordFeatures*) rhs)->get_feature_vector(idx_b, blen, bfree);
00139 
00140     ASSERT(alen==blen);
00141 
00142     DREAL sqrt_a= 1 ;
00143     DREAL sqrt_b= 1 ;
00144 
00145     if (initialized && use_normalization)
00146     {
00147         sqrt_a=sqrtdiag_lhs[idx_a] ;
00148         sqrt_b=sqrtdiag_rhs[idx_b] ;
00149     } ;
00150 
00151     DREAL sqrt_both=sqrt_a*sqrt_b;
00152 
00153     INT sum=0;
00154     {
00155         for (INT i=0; i<alen; i++)
00156         {
00157             sum+= (avec[i]==bvec[i]) ? 1 : 0;
00158         }
00159 
00160     }
00161 
00162     if (inhomogene)
00163         sum+=1;
00164 
00165     DREAL result=sum;
00166 
00167     for (INT j=1; j<degree; j++)
00168         result*=sum;
00169 
00170     result/=sqrt_both;
00171 
00172     ((CWordFeatures*) lhs)->free_feature_vector(avec, idx_a, afree);
00173     ((CWordFeatures*) rhs)->free_feature_vector(bvec, idx_b, bfree);
00174 
00175     return result;
00176 }

SHOGUN Machine Learning Toolbox - Documentation