LinearWordKernel.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2008 Soeren Sonnenburg
00008  * Copyright (C) 1999-2008 Fraunhofer Institute FIRST and Max-Planck-Society
00009  */
00010 
00011 #include "lib/common.h"
00012 #include "lib/io.h"
00013 #include "lib/Mathematics.h"
00014 #include "kernel/LinearWordKernel.h"
00015 #include "features/WordFeatures.h"
00016 
00017 CLinearWordKernel::CLinearWordKernel(INT size, bool dr, DREAL s)
00018 : CSimpleKernel<WORD>(size), scale(s), do_rescale(dr), initialized(false),
00019     normal(NULL)
00020 {
00021 }
00022 
00023 CLinearWordKernel::CLinearWordKernel(
00024     CWordFeatures* l, CWordFeatures* r, bool dr, DREAL s)
00025 : CSimpleKernel<WORD>(10), scale(s), do_rescale(dr), initialized(false),
00026     normal(NULL)
00027 {
00028     init(l, r);
00029 }
00030 
00031 CLinearWordKernel::~CLinearWordKernel()
00032 {
00033     cleanup();
00034 }
00035 
00036 bool CLinearWordKernel::init(CFeatures* l, CFeatures* r)
00037 {
00038     CSimpleKernel<WORD>::init(l, r);
00039 
00040     if (!initialized)
00041         init_rescale() ;
00042 
00043     SG_INFO( "rescaling kernel by %g (num:%d)\n",scale, CMath::min(l->get_num_vectors(), r->get_num_vectors()));
00044 
00045     return true;
00046 }
00047 
00048 void CLinearWordKernel::init_rescale()
00049 {
00050     if (!do_rescale)
00051         return ;
00052     LONGREAL sum=0;
00053     scale=1.0;
00054     for (INT i=0; (i<lhs->get_num_vectors() && i<rhs->get_num_vectors()); i++)
00055         sum+=compute(i, i);
00056 
00057     if ( sum > (pow((double) 2, (double) 8*sizeof(LONG))) ) {
00058       SG_ERROR( "the sum %lf does not fit into integer of %d bits expect bogus results.\n", sum, 8*sizeof(LONG));
00059    }
00060     scale=sum/CMath::min(lhs->get_num_vectors(), rhs->get_num_vectors());
00061     initialized=true;
00062 }
00063 
00064 void CLinearWordKernel::cleanup()
00065 {
00066     delete_optimization();
00067 
00068     CKernel::cleanup();
00069 }
00070 
00071 bool CLinearWordKernel::load_init(FILE* src)
00072 {
00073     return false;
00074 }
00075 
00076 bool CLinearWordKernel::save_init(FILE* dest)
00077 {
00078     return false;
00079 }
00080 
00081 void CLinearWordKernel::clear_normal()
00082 {
00083     int num = lhs->get_num_vectors();
00084 
00085     for (int i=0; i<num; i++)
00086         normal[i]=0;
00087 }
00088 
00089 void CLinearWordKernel::add_to_normal(INT idx, DREAL weight) 
00090 {
00091     INT vlen;
00092     bool vfree;
00093     WORD* vec=((CWordFeatures*) lhs)->get_feature_vector(idx, vlen, vfree);
00094 
00095     for (int i=0; i<vlen; i++)
00096         normal[i]+= weight*vec[i];
00097 
00098     ((CWordFeatures*) lhs)->free_feature_vector(vec, idx, vfree);
00099 }
00100   
00101 DREAL CLinearWordKernel::compute(INT idx_a, INT idx_b)
00102 {
00103     INT alen, blen;
00104     bool afree, bfree;
00105 
00106     WORD* avec=((CWordFeatures*) lhs)->get_feature_vector(idx_a, alen, afree);
00107     WORD* bvec=((CWordFeatures*) rhs)->get_feature_vector(idx_b, blen, bfree);
00108     ASSERT(alen==blen);
00109 
00110     double sum=0;
00111     for (LONG i=0; i<alen; i++)
00112         sum+=((LONG) avec[i])*((LONG) bvec[i]);
00113 
00114     DREAL result=sum/scale;
00115     ((CWordFeatures*) lhs)->free_feature_vector(avec, idx_a, afree);
00116     ((CWordFeatures*) rhs)->free_feature_vector(bvec, idx_b, bfree);
00117 
00118     return result;
00119 }
00120 
00121 bool CLinearWordKernel::init_optimization(INT num_suppvec, INT* sv_idx, DREAL* alphas) 
00122 {
00123     SG_DEBUG("drin gelandet yeah\n");
00124     INT alen;
00125     bool afree;
00126 
00127     int num_feat=((CWordFeatures*) lhs)->get_num_features();
00128     ASSERT(num_feat);
00129 
00130     normal=new DREAL[num_feat];
00131     for (INT i=0; i<num_feat; i++)
00132         normal[i]=0;
00133 
00134     for (int i=0; i<num_suppvec; i++)
00135     {
00136         WORD* avec=((CWordFeatures*) lhs)->get_feature_vector(sv_idx[i], alen, afree);
00137         ASSERT(avec);
00138 
00139         for (int j=0; j<num_feat; j++)
00140             normal[j]+=alphas[i] * ((double) avec[j]);
00141 
00142         ((CWordFeatures*) lhs)->free_feature_vector(avec, 0, afree);
00143     }
00144 
00145     set_is_initialized(true);
00146     return true;
00147 }
00148 
00149 bool CLinearWordKernel::delete_optimization()
00150 {
00151     delete[] normal;
00152     normal=NULL;
00153     set_is_initialized(false);
00154 
00155     return true;
00156 }
00157 
00158 DREAL CLinearWordKernel::compute_optimized(INT idx_b) 
00159 {
00160     INT blen;
00161     bool bfree;
00162 
00163     WORD* bvec=((CWordFeatures*) rhs)->get_feature_vector(idx_b, blen, bfree);
00164 
00165     double result=0;
00166     {
00167         for (INT i=0; i<blen; i++)
00168             result+= normal[i] * ((double) bvec[i]);
00169     }
00170     result/=scale;
00171 
00172     ((CWordFeatures*) rhs)->free_feature_vector(bvec, idx_b, bfree);
00173 
00174     return result;
00175 }

SHOGUN Machine Learning Toolbox - Documentation