SimpleLocalityImprovedStringKernel.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2008 Gunnar Raetsch
00008  * Copyright (C) 1999-2008 Fraunhofer Institute FIRST and Max-Planck-Society
00009  */
00010 
00011 #include "lib/common.h"
00012 #include "lib/io.h"
00013 #include "kernel/SimpleLocalityImprovedStringKernel.h"
00014 #include "features/Features.h"
00015 #include "features/StringFeatures.h"
00016 
00017 CSimpleLocalityImprovedStringKernel::CSimpleLocalityImprovedStringKernel(
00018     INT size, INT l, INT id, INT od)
00019 : CStringKernel<CHAR>(size), length(l), inner_degree(id), outer_degree(od),
00020     pyramid_weights(NULL)
00021 {
00022 }
00023 
00024 CSimpleLocalityImprovedStringKernel::CSimpleLocalityImprovedStringKernel(
00025     CStringFeatures<CHAR>* l, CStringFeatures<CHAR>* r,
00026     INT len, INT id, INT od)
00027 : CStringKernel<CHAR>(10), length(len), inner_degree(id), outer_degree(od),
00028     pyramid_weights(NULL)
00029 {
00030     init(l, r);
00031 }
00032 
00033 CSimpleLocalityImprovedStringKernel::~CSimpleLocalityImprovedStringKernel()
00034 {
00035     cleanup();
00036 }
00037 
00038 bool CSimpleLocalityImprovedStringKernel::init(CFeatures* l, CFeatures* r)
00039 {
00040     bool result = CStringKernel<CHAR>::init(l,r);
00041 
00042     if (!result)
00043         return false;
00044     INT num_features = ((CStringFeatures<CHAR>*) l)->get_max_vector_length();
00045     pyramid_weights = new DREAL[num_features];
00046     ASSERT(pyramid_weights);
00047     SG_INFO("initializing pyramid weights: size=%ld length=%i\n",
00048         num_features, length);
00049 
00050     const INT PYRAL = 2 * length - 1; // total window length
00051     DREAL PYRAL_pot;
00052     INT DEGREE1_1  = (inner_degree & 0x1)==0;
00053     INT DEGREE1_1n = (inner_degree & ~0x1)!=0;
00054     INT DEGREE1_2  = (inner_degree & 0x2)!=0;
00055     INT DEGREE1_3  = (inner_degree & ~0x3)!=0;
00056     INT DEGREE1_4  = (inner_degree & 0x4)!=0;
00057     {
00058     DREAL PYRAL_ = PYRAL;
00059     PYRAL_pot = DEGREE1_1 ? 1.0 : PYRAL_;
00060     if (DEGREE1_1n)
00061     {
00062         PYRAL_ *= PYRAL_;
00063         if (DEGREE1_2)
00064             PYRAL_pot *= PYRAL_;
00065         if (DEGREE1_3)
00066         {
00067             PYRAL_ *= PYRAL_;
00068             if (DEGREE1_4)
00069                 PYRAL_pot *= PYRAL_;
00070         }
00071     }
00072     }
00073 
00074     INT pyra_len  = num_features-PYRAL+1;
00075     INT pyra_len2 = (int) pyra_len/2;
00076     {
00077     INT j;
00078     for (j = 0; j < pyra_len; j++)
00079         pyramid_weights[j] = 4*((DREAL)((j < pyra_len2)? j+1 : pyra_len-j))/((DREAL)pyra_len);
00080     for (j = 0; j < pyra_len; j++)
00081         pyramid_weights[j] /= PYRAL_pot;
00082     }
00083     return true;
00084 }
00085 
00086 void CSimpleLocalityImprovedStringKernel::cleanup()
00087 {
00088     delete[] pyramid_weights;
00089     pyramid_weights = NULL;
00090 
00091     CKernel::cleanup();
00092 }
00093 
00094 bool CSimpleLocalityImprovedStringKernel::load_init(FILE* src)
00095 {
00096     return false;
00097 }
00098 
00099 bool CSimpleLocalityImprovedStringKernel::save_init(FILE* dest)
00100 {
00101     return false;
00102 }
00103 
00104 DREAL CSimpleLocalityImprovedStringKernel::dot_pyr (const CHAR* const x1,
00105          const CHAR* const x2, const INT NOF_NTS, const INT NTWIDTH,
00106          const INT DEGREE1, const INT DEGREE2, DREAL *pyra)
00107 {
00108     const INT PYRAL = 2*NTWIDTH-1; // total window length
00109     INT pyra_len, pyra_len2;
00110     DREAL pot, PYRAL_pot;
00111     DREAL sum;
00112     INT DEGREE1_1 = (DEGREE1 & 0x1)==0;
00113     INT DEGREE1_1n = (DEGREE1 & ~0x1)!=0;
00114     INT DEGREE1_2 = (DEGREE1 & 0x2)!=0;
00115     INT DEGREE1_3 = (DEGREE1 & ~0x3)!=0;
00116     INT DEGREE1_4 = (DEGREE1 & 0x4)!=0;
00117     {
00118     DREAL PYRAL_ = PYRAL;
00119     PYRAL_pot = DEGREE1_1 ? 1.0 : PYRAL_;
00120     if (DEGREE1_1n)
00121     {
00122         PYRAL_ *= PYRAL_;
00123         if (DEGREE1_2) PYRAL_pot *= PYRAL_;
00124         if (DEGREE1_3)
00125         {
00126             PYRAL_ *= PYRAL_;
00127             if (DEGREE1_4) PYRAL_pot *= PYRAL_;
00128         }
00129     }
00130     }
00131 
00132     ASSERT((DEGREE1 & ~0x7) == 0);
00133     ASSERT((DEGREE2 & ~0x7) == 0);
00134 
00135     pyra_len = NOF_NTS-PYRAL+1;
00136     pyra_len2 = (int) pyra_len/2;
00137     {
00138     INT j;
00139     for (j = 0; j < pyra_len; j++)
00140         pyra[j] = 4*((DREAL)((j < pyra_len2) ? j+1 : pyra_len-j))/((DREAL)pyra_len);
00141     for (j = 0; j < pyra_len; j++)
00142         pyra[j] /= PYRAL_pot;
00143     }
00144 
00145     register INT conv;
00146     register INT i;
00147     register INT j;
00148 
00149     sum = 0.0;
00150     conv = 0;
00151     for (j = 0; j < PYRAL; j++)
00152         conv += (x1[j] == x2[j]) ? 1 : 0;
00153 
00154     for (i = 0; i < NOF_NTS-PYRAL+1; i++)
00155     {
00156         register DREAL pot2;
00157         if (i>0)
00158             conv += ((x1[i+PYRAL-1] == x2[i+PYRAL-1]) ? 1 : 0 ) - 
00159                 ((x1[i-1] == x2[i-1]) ? 1 : 0);
00160         { /* potencing of conv -- double is faster*/
00161         register DREAL conv2 = conv;
00162         pot2 = (DEGREE1_1) ? 1.0 : conv2;
00163             if (DEGREE1_1n)
00164             {
00165                 conv2 *= conv2;
00166                 if (DEGREE1_2)
00167                     pot2 *= conv2;
00168                 if (DEGREE1_3 && DEGREE1_4)
00169                     pot2 *= conv2*conv2;
00170             }
00171         }
00172         sum += pot2*pyra[i];
00173     }
00174 
00175     pot = ((DEGREE2 & 0x1) == 0) ? 1.0 : sum;
00176     if ((DEGREE2 & ~0x1) != 0)
00177     {
00178         sum *= sum;
00179         if ((DEGREE2 & 0x2) != 0)
00180             pot *= sum;
00181         if ((DEGREE2 & ~0x3) != 0)
00182         {
00183             sum *= sum;
00184             if ((DEGREE2 & 0x4) != 0)
00185                 pot *= sum;
00186         }
00187     }
00188     return pot;
00189 }
00190 
00191 DREAL CSimpleLocalityImprovedStringKernel::compute(INT idx_a, INT idx_b)
00192 {
00193     INT alen, blen;
00194 
00195     CHAR* avec = ((CStringFeatures<CHAR>*) lhs)->get_feature_vector(idx_a, alen);
00196     CHAR* bvec = ((CStringFeatures<CHAR>*) rhs)->get_feature_vector(idx_b, blen);
00197 
00198     // can only deal with strings of same length
00199     ASSERT(alen==blen);
00200 
00201     DREAL dpt;
00202 
00203     dpt = dot_pyr(avec, bvec, alen, length, inner_degree, outer_degree, pyramid_weights);
00204     dpt = dpt / pow((double)alen, (double)outer_degree);
00205     return (DREAL) dpt;
00206 }

SHOGUN Machine Learning Toolbox - Documentation