LinearStringKernel.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2008 Soeren Sonnenburg
00008  * Copyright (C) 1999-2008 Fraunhofer Institute FIRST and Max-Planck-Society
00009  */
00010 
00011 #include "lib/common.h"
00012 #include "lib/io.h"
00013 #include "lib/Mathematics.h"
00014 #include "kernel/LinearStringKernel.h"
00015 #include "features/StringFeatures.h"
00016 
00017 CLinearStringKernel::CLinearStringKernel(INT size, bool dr, DREAL s)
00018 : CStringKernel<CHAR>(size), scale(s), do_rescale(dr),
00019     initialized(false), normal(NULL)
00020 {
00021 }
00022 
00023 CLinearStringKernel::CLinearStringKernel(
00024     CStringFeatures<CHAR>* l, CStringFeatures<CHAR>* r, bool dr, DREAL s)
00025 : CStringKernel<CHAR>(10), scale(s), do_rescale(dr),
00026     initialized(false), normal(NULL)
00027 {
00028     init(l, r);
00029 }
00030 
00031 CLinearStringKernel::~CLinearStringKernel()
00032 {
00033     cleanup();
00034 }
00035 
00036 bool CLinearStringKernel::init(CFeatures *l, CFeatures *r)
00037 {
00038     CStringKernel<CHAR>::init(l, r);
00039 
00040     if (!initialized)
00041         init_rescale();
00042     SG_INFO("rescaling kernel by %g (num:%d)\n", scale,
00043         CMath::min(l->get_num_vectors(), r->get_num_vectors()));
00044     return true;
00045 }
00046 
00047 void CLinearStringKernel::init_rescale()
00048 {
00049     if (!do_rescale)
00050         return ;
00051     LONGREAL sum = 0;
00052     scale = 1.0;
00053     for (LONG i = 0; i<lhs->get_num_vectors() && i<rhs->get_num_vectors(); i++)
00054         sum += compute(i, i);
00055 
00056     if (sum > pow(2, 8*sizeof(LONG)))
00057         SG_ERROR("the sum %lf does not fit into integer of %d bits "
00058             "expect bogus results.\n", sum, 8*sizeof(LONG));
00059     scale = sum/CMath::min(lhs->get_num_vectors(), rhs->get_num_vectors());
00060     initialized = true;
00061 }
00062 
00063 void CLinearStringKernel::cleanup()
00064 {
00065     delete_optimization();
00066 
00067     CKernel::cleanup();
00068 }
00069 
00070 bool CLinearStringKernel::load_init(FILE *src)
00071 {
00072     return false;
00073 }
00074 
00075 bool CLinearStringKernel::save_init(FILE *dest)
00076 {
00077     return false;
00078 }
00079 
00080 void CLinearStringKernel::clear_normal()
00081 {
00082     memset(normal, 0, lhs->get_num_vectors()*sizeof(DREAL));
00083 }
00084 
00085 void CLinearStringKernel::add_to_normal(INT idx, DREAL weight)
00086 {
00087     INT vlen;
00088     CHAR *vec = ((CStringFeatures<CHAR>*) lhs)->get_feature_vector(idx, vlen);
00089 
00090     for (INT i = 0; i<vlen; i++)
00091         normal[i] += weight*vec[i];
00092 }
00093 
00094 DREAL CLinearStringKernel::compute(INT idx_a, INT idx_b)
00095 {
00096     INT alen, blen;
00097 
00098     CHAR *avec = ((CStringFeatures<CHAR>*) lhs)->get_feature_vector(idx_a, alen);
00099     CHAR *bvec = ((CStringFeatures<CHAR>*) rhs)->get_feature_vector(idx_b, blen);
00100 
00101     ASSERT(alen==blen);
00102     double sum = 0;
00103     for (INT i = 0; i<alen; i++) /* FIXME: use dot from Mathematics.h */
00104         sum += ((LONG) avec[i])*((LONG) bvec[i]);
00105     return sum/scale;
00106 }
00107 
00108 bool CLinearStringKernel::init_optimization(INT num_suppvec, INT *sv_idx,
00109         DREAL *alphas)
00110 {
00111     SG_DEBUG("drin gelandet yeah\n");
00112     INT i, alen;
00113 
00114     int num_feat = ((CStringFeatures<CHAR>*) lhs)->get_max_vector_length();
00115     ASSERT(num_feat);
00116 
00117     normal = new DREAL[num_feat];
00118     ASSERT(normal);
00119     clear_normal();
00120 
00121     for (i = 0; i<num_suppvec; i++)
00122     {
00123         CHAR *avec = ((CStringFeatures<CHAR>*) lhs)->get_feature_vector(sv_idx[i], alen);
00124         ASSERT(avec);
00125 
00126         for (INT j = 0; j<num_feat; j++)
00127             normal[j] += alphas[i]*((double) avec[j]);
00128     }
00129     set_is_initialized(true);
00130     return true;
00131 }
00132 
00133 bool CLinearStringKernel::delete_optimization()
00134 {
00135     delete[] normal;
00136     normal = NULL;
00137     set_is_initialized(false);
00138     return true;
00139 }
00140 
00141 DREAL CLinearStringKernel::compute_optimized(INT idx_b)
00142 {
00143     INT blen;
00144 
00145     CHAR *bvec = ((CStringFeatures<CHAR>*) rhs)->get_feature_vector(idx_b, blen);
00146 
00147     double result = 0;
00148     for (INT i = 0; i<blen; i++) /* FIXME: Use dot() from Mathematics.h */
00149         result += normal[i]*((double) bvec[i]);
00150     return result/scale;
00151 }

SHOGUN Machine Learning Toolbox - Documentation