SimpleLocalityImprovedStringKernel.cpp
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include "lib/common.h"
00012 #include "lib/io.h"
00013 #include "kernel/SimpleLocalityImprovedStringKernel.h"
00014 #include "features/Features.h"
00015 #include "features/StringFeatures.h"
00016
00017 CSimpleLocalityImprovedStringKernel::CSimpleLocalityImprovedStringKernel(
00018 INT size, INT l, INT id, INT od)
00019 : CStringKernel<CHAR>(size), length(l), inner_degree(id), outer_degree(od),
00020 pyramid_weights(NULL)
00021 {
00022 }
00023
00024 CSimpleLocalityImprovedStringKernel::CSimpleLocalityImprovedStringKernel(
00025 CStringFeatures<CHAR>* l, CStringFeatures<CHAR>* r,
00026 INT len, INT id, INT od)
00027 : CStringKernel<CHAR>(10), length(len), inner_degree(id), outer_degree(od),
00028 pyramid_weights(NULL)
00029 {
00030 init(l, r);
00031 }
00032
00033 CSimpleLocalityImprovedStringKernel::~CSimpleLocalityImprovedStringKernel()
00034 {
00035 cleanup();
00036 }
00037
00038 bool CSimpleLocalityImprovedStringKernel::init(CFeatures* l, CFeatures* r)
00039 {
00040 bool result = CStringKernel<CHAR>::init(l,r);
00041
00042 if (!result)
00043 return false;
00044 INT num_features = ((CStringFeatures<CHAR>*) l)->get_max_vector_length();
00045 pyramid_weights = new DREAL[num_features];
00046 ASSERT(pyramid_weights);
00047 SG_INFO("initializing pyramid weights: size=%ld length=%i\n",
00048 num_features, length);
00049
00050 const INT PYRAL = 2 * length - 1;
00051 DREAL PYRAL_pot;
00052 INT DEGREE1_1 = (inner_degree & 0x1)==0;
00053 INT DEGREE1_1n = (inner_degree & ~0x1)!=0;
00054 INT DEGREE1_2 = (inner_degree & 0x2)!=0;
00055 INT DEGREE1_3 = (inner_degree & ~0x3)!=0;
00056 INT DEGREE1_4 = (inner_degree & 0x4)!=0;
00057 {
00058 DREAL PYRAL_ = PYRAL;
00059 PYRAL_pot = DEGREE1_1 ? 1.0 : PYRAL_;
00060 if (DEGREE1_1n)
00061 {
00062 PYRAL_ *= PYRAL_;
00063 if (DEGREE1_2)
00064 PYRAL_pot *= PYRAL_;
00065 if (DEGREE1_3)
00066 {
00067 PYRAL_ *= PYRAL_;
00068 if (DEGREE1_4)
00069 PYRAL_pot *= PYRAL_;
00070 }
00071 }
00072 }
00073
00074 INT pyra_len = num_features-PYRAL+1;
00075 INT pyra_len2 = (int) pyra_len/2;
00076 {
00077 INT j;
00078 for (j = 0; j < pyra_len; j++)
00079 pyramid_weights[j] = 4*((DREAL)((j < pyra_len2)? j+1 : pyra_len-j))/((DREAL)pyra_len);
00080 for (j = 0; j < pyra_len; j++)
00081 pyramid_weights[j] /= PYRAL_pot;
00082 }
00083 return true;
00084 }
00085
00086 void CSimpleLocalityImprovedStringKernel::cleanup()
00087 {
00088 delete[] pyramid_weights;
00089 pyramid_weights = NULL;
00090
00091 CKernel::cleanup();
00092 }
00093
00094 bool CSimpleLocalityImprovedStringKernel::load_init(FILE* src)
00095 {
00096 return false;
00097 }
00098
00099 bool CSimpleLocalityImprovedStringKernel::save_init(FILE* dest)
00100 {
00101 return false;
00102 }
00103
00104 DREAL CSimpleLocalityImprovedStringKernel::dot_pyr (const CHAR* const x1,
00105 const CHAR* const x2, const INT NOF_NTS, const INT NTWIDTH,
00106 const INT DEGREE1, const INT DEGREE2, DREAL *pyra)
00107 {
00108 const INT PYRAL = 2*NTWIDTH-1;
00109 INT pyra_len, pyra_len2;
00110 DREAL pot, PYRAL_pot;
00111 DREAL sum;
00112 INT DEGREE1_1 = (DEGREE1 & 0x1)==0;
00113 INT DEGREE1_1n = (DEGREE1 & ~0x1)!=0;
00114 INT DEGREE1_2 = (DEGREE1 & 0x2)!=0;
00115 INT DEGREE1_3 = (DEGREE1 & ~0x3)!=0;
00116 INT DEGREE1_4 = (DEGREE1 & 0x4)!=0;
00117 {
00118 DREAL PYRAL_ = PYRAL;
00119 PYRAL_pot = DEGREE1_1 ? 1.0 : PYRAL_;
00120 if (DEGREE1_1n)
00121 {
00122 PYRAL_ *= PYRAL_;
00123 if (DEGREE1_2) PYRAL_pot *= PYRAL_;
00124 if (DEGREE1_3)
00125 {
00126 PYRAL_ *= PYRAL_;
00127 if (DEGREE1_4) PYRAL_pot *= PYRAL_;
00128 }
00129 }
00130 }
00131
00132 ASSERT((DEGREE1 & ~0x7) == 0);
00133 ASSERT((DEGREE2 & ~0x7) == 0);
00134
00135 pyra_len = NOF_NTS-PYRAL+1;
00136 pyra_len2 = (int) pyra_len/2;
00137 {
00138 INT j;
00139 for (j = 0; j < pyra_len; j++)
00140 pyra[j] = 4*((DREAL)((j < pyra_len2) ? j+1 : pyra_len-j))/((DREAL)pyra_len);
00141 for (j = 0; j < pyra_len; j++)
00142 pyra[j] /= PYRAL_pot;
00143 }
00144
00145 register INT conv;
00146 register INT i;
00147 register INT j;
00148
00149 sum = 0.0;
00150 conv = 0;
00151 for (j = 0; j < PYRAL; j++)
00152 conv += (x1[j] == x2[j]) ? 1 : 0;
00153
00154 for (i = 0; i < NOF_NTS-PYRAL+1; i++)
00155 {
00156 register DREAL pot2;
00157 if (i>0)
00158 conv += ((x1[i+PYRAL-1] == x2[i+PYRAL-1]) ? 1 : 0 ) -
00159 ((x1[i-1] == x2[i-1]) ? 1 : 0);
00160 {
00161 register DREAL conv2 = conv;
00162 pot2 = (DEGREE1_1) ? 1.0 : conv2;
00163 if (DEGREE1_1n)
00164 {
00165 conv2 *= conv2;
00166 if (DEGREE1_2)
00167 pot2 *= conv2;
00168 if (DEGREE1_3 && DEGREE1_4)
00169 pot2 *= conv2*conv2;
00170 }
00171 }
00172 sum += pot2*pyra[i];
00173 }
00174
00175 pot = ((DEGREE2 & 0x1) == 0) ? 1.0 : sum;
00176 if ((DEGREE2 & ~0x1) != 0)
00177 {
00178 sum *= sum;
00179 if ((DEGREE2 & 0x2) != 0)
00180 pot *= sum;
00181 if ((DEGREE2 & ~0x3) != 0)
00182 {
00183 sum *= sum;
00184 if ((DEGREE2 & 0x4) != 0)
00185 pot *= sum;
00186 }
00187 }
00188 return pot;
00189 }
00190
00191 DREAL CSimpleLocalityImprovedStringKernel::compute(INT idx_a, INT idx_b)
00192 {
00193 INT alen, blen;
00194
00195 CHAR* avec = ((CStringFeatures<CHAR>*) lhs)->get_feature_vector(idx_a, alen);
00196 CHAR* bvec = ((CStringFeatures<CHAR>*) rhs)->get_feature_vector(idx_b, blen);
00197
00198
00199 ASSERT(alen==blen);
00200
00201 DREAL dpt;
00202
00203 dpt = dot_pyr(avec, bvec, alen, length, inner_degree, outer_degree, pyramid_weights);
00204 dpt = dpt / pow((double)alen, (double)outer_degree);
00205 return (DREAL) dpt;
00206 }