MindyGramKernel.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2006 Konrad Rieck
00008  * Copyright (C) 1999-2008 Fraunhofer Institute FIRST and Max-Planck-Society
00009  *
00010  * Indentation: bcpp -f 1 -s -ylcnc -bcl -i 4
00011  */
00012 
00013 #include "lib/config.h"
00014 
00015 #ifdef HAVE_MINDY
00016 
00017 #include <mindy.h>
00018 
00019 #include "lib/common.h"
00020 #include "features/MindyGramFeatures.h"
00021 #include "lib/io.h"
00022 #include "kernel/MindyGramKernel.h"
00023 #include "kernel/SqrtDiagKernelNormalizer.h"
00024 
00025 /*
00026  * Similarity parameters
00027  */
00028 param_spec_t p_map[] = {
00029     { "expo",  SP_EXPO,  2.0,  "Exponent (polynomial, minkowski)" },
00030     { "shift", SP_SHIFT, 0.0,  "Shift value (polynomial)" },
00031     { "dist",  SP_DIST,  ST_MINKOWSKI, "Distance name (rbf)" },
00032     { "width", SP_WIDTH, 1.0,  "Kernel width (rbf)" },
00033     { NULL },
00034 };
00035 
00042 CMindyGramKernel::CMindyGramKernel(int32_t ch, char *meas, float64_t w)
00043 : CKernel(ch)
00044 {
00045     /* Init attributes */
00046     measure=meas;
00047     norm=NO_NORMALIZATION;
00048     width=w;
00049     cache=0;
00050     
00051     /* Check for similarity coefficients */
00052     simcof=sico_get_type(measure);
00053 
00054     /* Create similarity measure */
00055     SG_INFO("Initializing Mindy kernel.\n");
00056     if (simcof==SC_NONE)
00057         kernel=sm_create(sm_get_type(measure));
00058     else
00059         kernel=sm_create(ST_MINKERN);
00060    
00061     SG_INFO("Mindy similarity measure: %s (using %s).\n",
00062         measure, sm_get_descr(kernel->type));
00063 
00064     /* Initialize optimization */
00065     if (kernel->type == ST_LINEAR)
00066     {
00067         SG_INFO("Optimization supported.\n");
00068         properties |= KP_LINADD;
00069     }
00070 
00071     normal=NULL;
00072     clear_normal();
00073 
00074     set_normalizer(new CSqrtDiagKernelNormalizer());
00075 }
00076 
00077 CMindyGramKernel::CMindyGramKernel(
00078     CFeatures* l, CFeatures* r, char *m, float64_t w)
00079 : CKernel(10), measure(m), width(w)
00080 {
00081     /* Check for similarity coefficients */
00082     simcof=sico_get_type(measure);
00083 
00084     /* Create similarity measure */
00085     SG_INFO("Initializing Mindy kernel.\n");
00086     if (simcof==SC_NONE)
00087         kernel=sm_create(sm_get_type(measure));
00088     else
00089         kernel=sm_create(ST_MINKERN);
00090    
00091     SG_INFO("Mindy similarity measure: %s (using %s).\n",
00092          measure, sm_get_descr(kernel->type));
00093 
00094     /* Initialize optimization */
00095     if (kernel->type == ST_LINEAR)
00096     {
00097         SG_INFO("Optimization supported.\n");
00098         properties |= KP_LINADD;
00099     }
00100 
00101     normal=NULL;
00102     clear_normal();
00103     init_normalizer(new CSqrtDiagKernelNormalizer());
00104 
00105     init(l, r);
00106 }
00107 
00108 /*
00109  * Set MD5 cache
00110  */
00111 void CMindyGramKernel::set_md5cache(int32_t c)
00112 {
00113     cache = c;
00114     if (cache <= 0) 
00115         return;
00116         
00117     SG_INFO("Creating MD5 cache of %d kb", cache);
00118     md5_cache_create(cache);
00119 } 
00120 
00121 /*
00122  * Set parameters 
00123  */
00124 void CMindyGramKernel::set_param(char *param) 
00125 {
00126     /* Parse and set parameters */
00127     parse_params(param);
00128 
00129     /* Display paramater list */
00130     for (int32_t i = 0; p_map[i].name; i++) {
00131         if (p_map[i].idx != SP_DIST)
00132             SG_INFO( "Param %8s=%8.6f\t %s\n", 
00133             p_map[i].name, p_map[i].val, p_map[i].descr);
00134         else
00135             SG_INFO( "Param %8s=%s\t %s\n", p_map[i].name, 
00136                         sm_get_name((sm_type_t) p_map[i].val), 
00137                         p_map[i].descr);
00138     }
00139 } 
00140 
00144 CMindyGramKernel::~CMindyGramKernel()
00145 {
00146     cleanup();
00147     
00148     if (cache > 0)
00149         md5_cache_destroy();
00150     
00151     sm_destroy(kernel);
00152 }
00153 
00157 void CMindyGramKernel::parse_params(char *pa)
00158 {
00159     int32_t i;
00160     char *t, *p;
00161 
00162     if (strlen(pa) == 0)
00163         return;
00164 
00165     /* Loop over delimited parameter definitions */
00166     while ((t = strsep(&pa, ",;"))) {
00167         for (i = 0; p_map[i].name; i++) {
00168             /* Check for parameter name */
00169             size_t l = strlen(p_map[i].name);
00170             if (!strncasecmp(t, p_map[i].name, l)) {
00171                 p = t + l + 1;
00172                 if (p_map[i].idx == SP_DIST)
00173                     p_map[i].val = sm_get_type(p);
00174                 else
00175                     p_map[i].val = atof(p);
00176                 break;
00177             }
00178         }
00179         if (!p_map[i].name)
00180             SG_WARNING( "Unknown parameter '%s'. Skipping", t);
00181      }   
00182 
00183      /* Set parameters */   
00184      for (i = 0; p_map[i].name; i++)
00185     sm_set_param(kernel, p_map[i].idx, p_map[i].val);    
00186 } 
00187 
00191 void CMindyGramKernel::cleanup()
00192 {
00193     delete_optimization();
00194     clear_normal();
00195 
00196     CKernel::cleanup();
00197 }
00198 
00204 void CMindyGramKernel::remove_lhs()
00205 {
00206     delete_optimization();
00207 
00208 #ifdef SVMLIGHT
00209     if (lhs)
00210         cache_reset();
00211 #endif
00212 
00213     lhs = NULL ;
00214     rhs = NULL ;
00215 }
00216 
00220 void CMindyGramKernel::remove_rhs()
00221 {
00222 #ifdef SVMLIGHT
00223     if (rhs)
00224         cache_reset();
00225 #endif
00226 
00227     if (sdiag_lhs != sdiag_rhs)
00228         delete[] sdiag_rhs;
00229 
00230     sdiag_rhs = sdiag_lhs;
00231     rhs = lhs;
00232 }
00233 
00240 bool CMindyGramKernel::init(CFeatures* l, CFeatures* r)
00241 {
00242     SG_DEBUG( "Initializing MindyGramKernel %p %p\n", l, r);
00243     /* Call constructor of super class */
00244     bool result = CKernel::init(l,r);
00245 
00246     /* Assert correct types of features */
00247     ASSERT(l->get_feature_class()== C_MINDYGRAM);
00248     ASSERT(r->get_feature_class()==C_MINDYGRAM);
00249     ASSERT(l->get_feature_type()==F_ULONG);
00250     ASSERT(r->get_feature_type()==F_ULONG);
00251 
00252     return init_normalizer();
00253 }
00254 
00261 float64_t CMindyGramKernel::compute(int32_t i, int32_t j)
00262 {
00263     /* Cast things to mindy gram features */
00264     CMindyGramFeatures *lm = (CMindyGramFeatures *) lhs;
00265     CMindyGramFeatures *rm = (CMindyGramFeatures *) rhs;
00266 
00267     /* Call (internal) mindy comparison function */
00268     float64_t result = gram_cmp(kernel, lm->get_feature_vector(i),
00269         rm->get_feature_vector(j));
00270     
00271     /* Compute similartiy coefficients and convert to distance */
00272     if (simcof != SC_NONE)
00273         result = 1 - sico(simcof, result, sdiag_lhs[i], sdiag_rhs[j]);
00274 
00275     if (sm_get_class(kernel->type) == SC_DIST || simcof != SC_NONE) {
00276         if (width > 1e-10) {
00277               /* Distance to kernel using RBF */
00278               result = exp(-result / width);
00279         } else {
00280             if (i != j) {
00281                 /* Distance to kernel, the Hilbertian way */
00282                 result = 0.5 * (sdiag_lhs[i] + sdiag_rhs[j] - result);    
00283             } else {
00284                 /* Distance based norm  */
00285                 gram_t *zero = gram_empty();
00286                 result = gram_cmp(kernel, lm->get_feature_vector(i), zero);
00287                 gram_destroy(zero);
00288             }  
00289         }   
00290     }    
00291 }
00292 
00298 void CMindyGramKernel::add_to_normal(int32_t i, float64_t w)
00299 {
00300     /* Add indexed vector to normal */
00301     CMindyGramFeatures *lm = (CMindyGramFeatures *) lhs;
00302     
00303     /* Initialize empty normal vector if necessary */
00304     if (!normal) 
00305         normal = gram_empty();
00306 
00307     gram_add(normal, lm->get_feature_vector(i),
00308                      normalizer->normalize_lhs(w, i));
00309 
00310     set_is_initialized(true);
00311 }
00312 
00316 void CMindyGramKernel::clear_normal()
00317 {
00318     if (normal)
00319         gram_destroy(normal);
00320     normal = NULL;
00321     set_is_initialized(false);
00322 }
00323 
00330 bool CMindyGramKernel::init_optimization(int32_t n, int32_t *is, float64_t * ws)
00331 {
00332     /* Delete old optimization */
00333     delete_optimization();
00334 
00335     /* Return empty optimization if no vectors are given */
00336     if (n <= 0) {
00337         set_is_initialized(true);
00338         SG_DEBUG( "empty set of SVs\n");
00339         return true;
00340     }
00341 
00342     SG_DEBUG( "initializing MindyGramKernel optimization\n");
00343     for (int32_t i = 0; i < n; i++) {
00344         if ( (i % (n / 10 + 1)) == 0)
00345             SG_PROGRESS(i, 0, n);
00346 
00347         /* Call add to normal */
00348         add_to_normal(is[i], ws[i]);
00349     }
00350     SG_PRINT( "Done.         \n");
00351 
00352     set_is_initialized(true);
00353     return true;
00354 }
00355 
00359 bool CMindyGramKernel::delete_optimization()
00360 {
00361     SG_DEBUG( "deleting MindyGramKernel optimization\n");
00362     clear_normal();
00363     return true;
00364 }
00365 
00371 float64_t CMindyGramKernel::compute_optimized(int32_t i)
00372 {
00373     if (!get_is_initialized()) {
00374         SG_ERROR( "MindyGramKernel optimization not initialized\n");
00375         return -CMath::INFTY;
00376     }
00377 
00378     CMindyGramFeatures *rm = (CMindyGramFeatures *) rhs;
00379     float64_t result = gram_cmp(kernel, rm->get_feature_vector(i), normal);
00380 
00381     return normalizer->normalize_rhs(result, i);
00382 }
00383 
00384 bool CMindyGramKernel::load_init(FILE* src)
00385 {
00386     return false;
00387 }
00388 
00389 bool CMindyGramKernel::save_init(FILE* dest)
00390 {
00391     return false;
00392 }
00393 #endif

SHOGUN Machine Learning Toolbox - Documentation