FixedDegreeStringKernel.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2008 Soeren Sonnenburg
00008  * Copyright (C) 1999-2008 Fraunhofer Institute FIRST and Max-Planck-Society
00009  */
00010 
00011 #include "lib/common.h"
00012 #include "kernel/FixedDegreeStringKernel.h"
00013 #include "features/Features.h"
00014 #include "features/StringFeatures.h"
00015 #include "lib/io.h"
00016 
00017 CFixedDegreeStringKernel::CFixedDegreeStringKernel(INT size, INT d)
00018 : CStringKernel<CHAR>(size), degree(d), sqrtdiag_lhs(NULL),
00019     sqrtdiag_rhs(NULL), initialized(false)
00020 {
00021 }
00022 
00023 CFixedDegreeStringKernel::CFixedDegreeStringKernel(
00024     CStringFeatures<CHAR>* l, CStringFeatures<CHAR>* r, INT d)
00025 : CStringKernel<CHAR>(10), degree(d), sqrtdiag_lhs(NULL),
00026     sqrtdiag_rhs(NULL), initialized(false)
00027 {
00028     init(l, r);
00029 }
00030 
00031 CFixedDegreeStringKernel::~CFixedDegreeStringKernel()
00032 {
00033     cleanup();
00034 }
00035 
00036 bool CFixedDegreeStringKernel::init(CFeatures* l, CFeatures* r)
00037 {
00038     bool result = CStringKernel<CHAR>::init(l, r);
00039     initialized = false;
00040 
00041     if (sqrtdiag_lhs!=sqrtdiag_rhs)
00042         delete[] sqrtdiag_rhs;
00043     sqrtdiag_rhs=NULL;
00044     delete[] sqrtdiag_lhs;
00045     sqrtdiag_lhs=new DREAL[lhs->get_num_vectors()];
00046 
00047     if (l==r)
00048         sqrtdiag_rhs=sqrtdiag_lhs;
00049     else
00050         sqrtdiag_rhs=new DREAL[rhs->get_num_vectors()];
00051 
00052     this->lhs=(CStringFeatures<CHAR>*) l;
00053     this->rhs=(CStringFeatures<CHAR>*) l;
00054 
00055     CKernel::init_sqrt_diag(sqrtdiag_lhs, lhs->get_num_vectors());
00056     // if lhs is different from rhs (train/test data)
00057     // compute also the normalization for rhs
00058     if (sqrtdiag_lhs!=sqrtdiag_rhs)
00059     {
00060         this->lhs = (CStringFeatures<CHAR>*) r;
00061         this->rhs = (CStringFeatures<CHAR>*) r;
00062         CKernel::init_sqrt_diag(sqrtdiag_rhs, rhs->get_num_vectors());
00063     }
00064 
00065     this->lhs = (CStringFeatures<CHAR>*) l;
00066     this->rhs = (CStringFeatures<CHAR>*) r;
00067 
00068     initialized = true;
00069     return result;
00070 }
00071 
00072 void CFixedDegreeStringKernel::cleanup()
00073 {
00074     if (sqrtdiag_lhs != sqrtdiag_rhs)
00075         delete[] sqrtdiag_rhs;
00076     sqrtdiag_rhs = NULL;
00077 
00078     delete[] sqrtdiag_lhs;
00079     sqrtdiag_lhs = NULL;
00080 
00081     initialized = false;
00082 
00083     CKernel::cleanup();
00084 }
00085 
00086 bool CFixedDegreeStringKernel::load_init(FILE* src)
00087 {
00088     return false;
00089 }
00090 
00091 bool CFixedDegreeStringKernel::save_init(FILE* dest)
00092 {
00093     return false;
00094 }
00095 
00096 DREAL CFixedDegreeStringKernel::compute(INT idx_a, INT idx_b)
00097 {
00098     INT alen, blen;
00099 
00100     CHAR* avec = ((CStringFeatures<CHAR>*) lhs)->get_feature_vector(idx_a, alen);
00101     CHAR* bvec = ((CStringFeatures<CHAR>*) rhs)->get_feature_vector(idx_b, blen);
00102     // can only deal with strings of same length
00103     ASSERT(alen==blen);
00104 
00105     DREAL sqrt = initialized ? (sqrtdiag_lhs[idx_a]*sqrtdiag_rhs[idx_b]) : 1.0;
00106     LONG sum = 0;
00107     for (INT i = 0; i<alen-degree+1; i++)
00108     {
00109         bool match = true;
00110 
00111         for (INT j = i; j<i+degree && match; j++)
00112             match = avec[j]==bvec[j];
00113         if (match)
00114             sum++;
00115     }
00116     return (DREAL) sum/sqrt;
00117 }

SHOGUN Machine Learning Toolbox - Documentation