MindyGramKernel.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2006 Konrad Rieck
00008  * Copyright (C) 1999-2008 Fraunhofer Institute FIRST and Max-Planck-Society
00009  *
00010  * Indentation: bcpp -f 1 -s -ylcnc -bcl -i 4
00011  */
00012 
00013 #include "lib/config.h"
00014 
00015 #ifdef HAVE_MINDY
00016 
00017 #include <mindy.h>
00018 
00019 #include "lib/common.h"
00020 #include "features/MindyGramFeatures.h"
00021 #include "lib/io.h"
00022 #include "kernel/MindyGramKernel.h"
00023 
00024 /*
00025  * Similarity parameters
00026  */
00027 param_spec_t p_map[] = {
00028     { "expo",  SP_EXPO,  2.0,  "Exponent (polynomial, minkowski)" },
00029     { "shift", SP_SHIFT, 0.0,  "Shift value (polynomial)" },
00030     { "dist",  SP_DIST,  ST_MINKOWSKI, "Distance name (rbf)" },
00031     { "width", SP_WIDTH, 1.0,  "Kernel width (rbf)" },
00032     { NULL },
00033 };
00034 
00041 CMindyGramKernel::CMindyGramKernel(INT ch, CHAR *meas, DREAL w)
00042 : CKernel(ch)
00043 {
00044     /* Init attributes */
00045     sdiag_lhs=NULL;
00046     sdiag_rhs=NULL;
00047     initialized=false;
00048     measure=meas;
00049     norm=NO_NORMALIZATION;
00050     width=w;
00051     cache=0;
00052     
00053     /* Check for similarity coefficients */
00054     simcof=sico_get_type(measure);
00055 
00056     /* Create similarity measure */
00057     SG_INFO("Initializing Mindy kernel.\n");
00058     if (simcof==SC_NONE)
00059         kernel=sm_create(sm_get_type(measure));
00060     else
00061         kernel=sm_create(ST_MINKERN);
00062    
00063     SG_INFO("Mindy similarity measure: %s (using %s).\n",
00064         measure, sm_get_descr(kernel->type));
00065 
00066     /* Initialize optimization */
00067     if (kernel->type == ST_LINEAR)
00068     {
00069         SG_INFO("Optimization supported.\n");
00070         properties |= KP_LINADD;
00071     }
00072 
00073     normal=NULL;
00074     clear_normal();
00075 }
00076 
00077 CMindyGramKernel::CMindyGramKernel(
00078     CFeatures* l, CFeatures* r, CHAR *m, DREAL w)
00079 : CKernel(10), sdiag_lhs(NULL), sdiag_rhs(NULL), initialized(false),
00080     measure(m), norm(NO_NORMALIZATION), width(w)
00081 {
00082     /* Check for similarity coefficients */
00083     simcof=sico_get_type(measure);
00084 
00085     /* Create similarity measure */
00086     SG_INFO("Initializing Mindy kernel.\n");
00087     if (simcof==SC_NONE)
00088         kernel=sm_create(sm_get_type(measure));
00089     else
00090         kernel=sm_create(ST_MINKERN);
00091    
00092     SG_INFO("Mindy similarity measure: %s (using %s).\n",
00093          measure, sm_get_descr(kernel->type));
00094 
00095     /* Initialize optimization */
00096     if (kernel->type == ST_LINEAR)
00097     {
00098         SG_INFO("Optimization supported.\n");
00099         properties |= KP_LINADD;
00100     }
00101 
00102     normal=NULL;
00103     clear_normal();
00104 
00105     init(l, r);
00106 }
00107 
00108 /*
00109  * Set MD5 cache
00110  */
00111 void CMindyGramKernel::set_md5cache(INT c)
00112 {
00113     cache = c;
00114     if (cache <= 0) 
00115         return;
00116         
00117     SG_INFO("Creating MD5 cache of %d kb", cache);
00118     md5_cache_create(cache);
00119 } 
00120 
00121 /*
00122  * Set parameters 
00123  */
00124 void CMindyGramKernel::set_param(CHAR *param) 
00125 {
00126     /* Parse and set parameters */
00127     parse_params(param);
00128 
00129     /* Display paramater list */
00130     for (INT i = 0; p_map[i].name; i++) {
00131         if (p_map[i].idx != SP_DIST)
00132             SG_INFO( "Param %8s=%8.6f\t %s\n", 
00133             p_map[i].name, p_map[i].val, p_map[i].descr);
00134         else
00135             SG_INFO( "Param %8s=%s\t %s\n", p_map[i].name, 
00136                         sm_get_name((sm_type_t) p_map[i].val), 
00137                         p_map[i].descr);
00138     }
00139 } 
00140 
00141 /*
00142  * Set normalization
00143  */
00144 void CMindyGramKernel::set_norm(ENormalizationType n)
00145 {
00146     norm = n;
00147 }
00148 
00152 CMindyGramKernel::~CMindyGramKernel()
00153 {
00154     cleanup();
00155     
00156     if (cache > 0)
00157         md5_cache_destroy();
00158     
00159     sm_destroy(kernel);
00160 }
00161 
00165 void CMindyGramKernel::parse_params(CHAR *pa)
00166 {
00167     INT i;
00168     CHAR *t, *p;
00169 
00170     if (strlen(pa) == 0)
00171         return;
00172 
00173     /* Loop over delimited parameter definitions */
00174     while ((t = strsep(&pa, ",;"))) {
00175         for (i = 0; p_map[i].name; i++) {
00176             /* Check for parameter name */
00177             size_t l = strlen(p_map[i].name);
00178             if (!strncasecmp(t, p_map[i].name, l)) {
00179                 p = t + l + 1;
00180                 if (p_map[i].idx == SP_DIST)
00181                     p_map[i].val = sm_get_type(p);
00182                 else
00183                     p_map[i].val = atof(p);
00184                 break;
00185             }
00186         }
00187         if (!p_map[i].name)
00188             SG_WARNING( "Unknown parameter '%s'. Skipping", t);
00189      }   
00190 
00191      /* Set parameters */   
00192      for (i = 0; p_map[i].name; i++)
00193     sm_set_param(kernel, p_map[i].idx, p_map[i].val);    
00194 } 
00195 
00199 void CMindyGramKernel::cleanup()
00200 {
00201     delete_optimization();
00202     clear_normal();
00203 
00204     CKernel::cleanup();
00205 }
00206 
00212 void CMindyGramKernel::remove_lhs()
00213 {
00214     delete_optimization();
00215 
00216 #ifdef SVMLIGHT
00217     if (lhs)
00218         cache_reset();
00219 #endif
00220 
00221     if (sdiag_lhs != sdiag_rhs)
00222         delete[] sdiag_rhs;
00223     delete[] sdiag_lhs;
00224 
00225     lhs = NULL ;
00226     rhs = NULL ;
00227     initialized = false;
00228     sdiag_lhs = NULL;
00229     sdiag_rhs = NULL;
00230 }
00231 
00235 void CMindyGramKernel::remove_rhs()
00236 {
00237 #ifdef SVMLIGHT
00238     if (rhs)
00239         cache_reset();
00240 #endif
00241 
00242     if (sdiag_lhs != sdiag_rhs)
00243         delete[] sdiag_rhs;
00244 
00245     sdiag_rhs = sdiag_lhs;
00246     rhs = lhs;
00247 }
00248 
00256 bool CMindyGramKernel::init(CFeatures* l, CFeatures* r)
00257 {
00258 
00259     SG_DEBUG( "Initializing MindyGramKernel %p %p\n", l, r);
00260     /* Call constructor of super class */
00261     bool result = CKernel::init(l,r);
00262 
00263     initialized = false;
00264     INT i;
00265 
00266     /* Assert correct types of features */
00267     ASSERT(l->get_feature_class()== C_MINDYGRAM);
00268     ASSERT(r->get_feature_class()==C_MINDYGRAM);
00269     ASSERT(l->get_feature_type()==F_ULONG);
00270     ASSERT(r->get_feature_type()==F_ULONG);
00271 
00272     /* Clean diagonals */
00273     if (sdiag_lhs != sdiag_rhs)
00274         delete[] sdiag_rhs;
00275     sdiag_rhs=NULL;
00276     delete[] sdiag_lhs;
00277     sdiag_lhs=NULL;
00278 
00279     /* Initialize left normalization diagonal */
00280     sdiag_lhs= new DREAL[lhs->get_num_vectors()];
00281     for (i = 0; i < lhs->get_num_vectors(); i++)
00282         sdiag_lhs[i]=1;
00283 
00284     /* Initialize (or copy) right normalization diagonal */
00285     if (l == r) {
00286         sdiag_rhs=sdiag_lhs;
00287     } else {
00288         sdiag_rhs= new DREAL[rhs->get_num_vectors()];
00289         for (i = 0; i<rhs->get_num_vectors(); i++)
00290             sdiag_rhs[i]=1;
00291     }
00292 
00293     this->lhs=(CMindyGramFeatures *) l;
00294     this->rhs=(CMindyGramFeatures *) l;
00295 
00296     /* Compute left normalization diagonal */
00297     for (i = 0; i<lhs->get_num_vectors(); i++) {
00298         sdiag_lhs[i] = sqrt(compute(i,i));
00299 
00300         /* trap divide by zero exception */
00301         if (sdiag_lhs[i] == 0)
00302             sdiag_lhs[i] = 1e-16;
00303     }
00304 
00305     /*  Skip if rhs computation if necessary */
00306     if (sdiag_lhs != sdiag_rhs) {
00307         this->lhs=(CMindyGramFeatures *) r;
00308         this->rhs=(CMindyGramFeatures *) r;
00309 
00310         /* Compute right normalization diagonal */
00311         for (i=0; i<rhs->get_num_vectors(); i++) {
00312             sdiag_rhs[i] = sqrt(compute(i,i));
00313 
00314             /* trap divide by zero exception */
00315             if (sdiag_rhs[i]==0)
00316                 sdiag_rhs[i]=1e-16;
00317         }
00318     }
00319     
00320     /* Reset feature pointers */
00321     this->lhs=(CStringFeatures<WORD>*) l;
00322     this->rhs=(CStringFeatures<WORD>*) r;
00323 
00324     initialized = true;
00325     return result;
00326 }
00327 
00334 DREAL CMindyGramKernel::compute(INT i, INT j)
00335 {
00336     /* Cast things to mindy gram features */
00337     CMindyGramFeatures *lm = (CMindyGramFeatures *) lhs;
00338     CMindyGramFeatures *rm = (CMindyGramFeatures *) rhs;
00339 
00340     /* Call (internal) mindy comparison function */
00341     DREAL result = gram_cmp(kernel, lm->get_feature_vector(i),
00342         rm->get_feature_vector(j));
00343     
00344     /* Compute similartiy coefficients and convert to distance */
00345     if (simcof != SC_NONE)
00346         result = 1 - sico(simcof, result, sdiag_lhs[i], sdiag_rhs[j]);
00347 
00348     if (sm_get_class(kernel->type) == SC_DIST || simcof != SC_NONE) {
00349         if (width > 1e-10) {
00350               /* Distance to kernel using RBF */
00351               result = exp(-result / width);
00352         } else {
00353             if (i != j) {
00354                 /* Distance to kernel, the Hilbertian way */
00355                 result = 0.5 * (sdiag_lhs[i] + sdiag_rhs[j] - result);    
00356             } else {
00357                 /* Distance based norm  */
00358                 gram_t *zero = gram_empty();
00359                 result = gram_cmp(kernel, lm->get_feature_vector(i), zero);
00360                 gram_destroy(zero);
00361             }  
00362         }   
00363     }    
00364 
00365     if (!initialized)
00366         return result;
00367 
00368     /* Normalize result */
00369     switch (norm) {
00370         case NO_NORMALIZATION:
00371             return result;
00372         case SQRT_NORMALIZATION:
00373             return result/sqrt(sdiag_lhs[i]*sdiag_rhs[i]);
00374         case FULL_NORMALIZATION:
00375             return result/(sdiag_lhs[i]*sdiag_rhs[j]);
00376         default:
00377             SG_ERROR( "Unknown Normalization in use!\n");
00378             return -CMath::INFTY;
00379     }
00380 }
00381 
00387 void CMindyGramKernel::add_to_normal(INT i, DREAL w)
00388 {
00389     /* Add indexed vector to normal */
00390     CMindyGramFeatures *lm = (CMindyGramFeatures *) lhs;
00391     
00392     /* Initialize empty normal vector if necessary */
00393     if (!normal) 
00394         normal = gram_empty();
00395 
00396     gram_add(normal, lm->get_feature_vector(i),
00397                      normalize_weight(w, i, norm));
00398 
00399     set_is_initialized(true);
00400 }
00401 
00405 void CMindyGramKernel::clear_normal()
00406 {
00407     if (normal)
00408         gram_destroy(normal);
00409     normal = NULL;
00410     set_is_initialized(false);
00411 }
00412 
00419 bool CMindyGramKernel::init_optimization(INT n, INT *is, DREAL * ws)
00420 {
00421     /* Delete old optimization */
00422     delete_optimization();
00423 
00424     /* Return empty optimization if no vectors are given */
00425     if (n <= 0) {
00426         set_is_initialized(true);
00427         SG_DEBUG( "empty set of SVs\n");
00428         return true;
00429     }
00430 
00431     SG_DEBUG( "initializing MindyGramKernel optimization\n");
00432     for (int i = 0; i < n; i++) {
00433         if ( (i % (n / 10 + 1)) == 0)
00434             SG_PROGRESS(i, 0, n);
00435 
00436         /* Call add to normal */
00437         add_to_normal(is[i], ws[i]);
00438     }
00439     SG_PRINT( "Done.         \n");
00440 
00441     set_is_initialized(true);
00442     return true;
00443 }
00444 
00448 bool CMindyGramKernel::delete_optimization()
00449 {
00450     SG_DEBUG( "deleting MindyGramKernel optimization\n");
00451     clear_normal();
00452     return true;
00453 }
00454 
00460 DREAL CMindyGramKernel::compute_optimized(INT i)
00461 {
00462     if (!get_is_initialized()) {
00463         SG_ERROR( "MindyGramKernel optimization not initialized\n");
00464         return -CMath::INFTY;
00465     }
00466 
00467     CMindyGramFeatures *rm = (CMindyGramFeatures *) rhs;
00468     DREAL result = gram_cmp(kernel, rm->get_feature_vector(i), normal);
00469 
00470     switch (norm) {
00471         case NO_NORMALIZATION:
00472             return result;
00473         case SQRT_NORMALIZATION:
00474             return result/sqrt(sdiag_rhs[i]);
00475         case FULL_NORMALIZATION:
00476             return result/sdiag_rhs[i];
00477         default:
00478             SG_ERROR( "Unknown Normalization in use!\n");
00479             return -CMath::INFTY;
00480     }
00481 }
00482 
00483 bool CMindyGramKernel::load_init(FILE* src)
00484 {
00485     return false;
00486 }
00487 
00488 bool CMindyGramKernel::save_init(FILE* dest)
00489 {
00490     return false;
00491 }
00492 #endif

SHOGUN Machine Learning Toolbox - Documentation