00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #include "lib/config.h"
00014
00015 #ifdef HAVE_MINDY
00016
00017 #include <mindy.h>
00018
00019 #include "lib/common.h"
00020 #include "features/MindyGramFeatures.h"
00021 #include "lib/io.h"
00022 #include "kernel/MindyGramKernel.h"
00023
00024
00025
00026
00027 param_spec_t p_map[] = {
00028 { "expo", SP_EXPO, 2.0, "Exponent (polynomial, minkowski)" },
00029 { "shift", SP_SHIFT, 0.0, "Shift value (polynomial)" },
00030 { "dist", SP_DIST, ST_MINKOWSKI, "Distance name (rbf)" },
00031 { "width", SP_WIDTH, 1.0, "Kernel width (rbf)" },
00032 { NULL },
00033 };
00034
00041 CMindyGramKernel::CMindyGramKernel(INT ch, CHAR *meas, DREAL w)
00042 : CKernel(ch)
00043 {
00044
00045 sdiag_lhs=NULL;
00046 sdiag_rhs=NULL;
00047 initialized=false;
00048 measure=meas;
00049 norm=NO_NORMALIZATION;
00050 width=w;
00051 cache=0;
00052
00053
00054 simcof=sico_get_type(measure);
00055
00056
00057 SG_INFO("Initializing Mindy kernel.\n");
00058 if (simcof==SC_NONE)
00059 kernel=sm_create(sm_get_type(measure));
00060 else
00061 kernel=sm_create(ST_MINKERN);
00062
00063 SG_INFO("Mindy similarity measure: %s (using %s).\n",
00064 measure, sm_get_descr(kernel->type));
00065
00066
00067 if (kernel->type == ST_LINEAR)
00068 {
00069 SG_INFO("Optimization supported.\n");
00070 properties |= KP_LINADD;
00071 }
00072
00073 normal=NULL;
00074 clear_normal();
00075 }
00076
00077 CMindyGramKernel::CMindyGramKernel(
00078 CFeatures* l, CFeatures* r, CHAR *m, DREAL w)
00079 : CKernel(10), sdiag_lhs(NULL), sdiag_rhs(NULL), initialized(false),
00080 measure(m), norm(NO_NORMALIZATION), width(w)
00081 {
00082
00083 simcof=sico_get_type(measure);
00084
00085
00086 SG_INFO("Initializing Mindy kernel.\n");
00087 if (simcof==SC_NONE)
00088 kernel=sm_create(sm_get_type(measure));
00089 else
00090 kernel=sm_create(ST_MINKERN);
00091
00092 SG_INFO("Mindy similarity measure: %s (using %s).\n",
00093 measure, sm_get_descr(kernel->type));
00094
00095
00096 if (kernel->type == ST_LINEAR)
00097 {
00098 SG_INFO("Optimization supported.\n");
00099 properties |= KP_LINADD;
00100 }
00101
00102 normal=NULL;
00103 clear_normal();
00104
00105 init(l, r);
00106 }
00107
00108
00109
00110
00111 void CMindyGramKernel::set_md5cache(INT c)
00112 {
00113 cache = c;
00114 if (cache <= 0)
00115 return;
00116
00117 SG_INFO("Creating MD5 cache of %d kb", cache);
00118 md5_cache_create(cache);
00119 }
00120
00121
00122
00123
00124 void CMindyGramKernel::set_param(CHAR *param)
00125 {
00126
00127 parse_params(param);
00128
00129
00130 for (INT i = 0; p_map[i].name; i++) {
00131 if (p_map[i].idx != SP_DIST)
00132 SG_INFO( "Param %8s=%8.6f\t %s\n",
00133 p_map[i].name, p_map[i].val, p_map[i].descr);
00134 else
00135 SG_INFO( "Param %8s=%s\t %s\n", p_map[i].name,
00136 sm_get_name((sm_type_t) p_map[i].val),
00137 p_map[i].descr);
00138 }
00139 }
00140
00141
00142
00143
00144 void CMindyGramKernel::set_norm(ENormalizationType n)
00145 {
00146 norm = n;
00147 }
00148
00152 CMindyGramKernel::~CMindyGramKernel()
00153 {
00154 cleanup();
00155
00156 if (cache > 0)
00157 md5_cache_destroy();
00158
00159 sm_destroy(kernel);
00160 }
00161
00165 void CMindyGramKernel::parse_params(CHAR *pa)
00166 {
00167 INT i;
00168 CHAR *t, *p;
00169
00170 if (strlen(pa) == 0)
00171 return;
00172
00173
00174 while ((t = strsep(&pa, ",;"))) {
00175 for (i = 0; p_map[i].name; i++) {
00176
00177 size_t l = strlen(p_map[i].name);
00178 if (!strncasecmp(t, p_map[i].name, l)) {
00179 p = t + l + 1;
00180 if (p_map[i].idx == SP_DIST)
00181 p_map[i].val = sm_get_type(p);
00182 else
00183 p_map[i].val = atof(p);
00184 break;
00185 }
00186 }
00187 if (!p_map[i].name)
00188 SG_WARNING( "Unknown parameter '%s'. Skipping", t);
00189 }
00190
00191
00192 for (i = 0; p_map[i].name; i++)
00193 sm_set_param(kernel, p_map[i].idx, p_map[i].val);
00194 }
00195
00199 void CMindyGramKernel::cleanup()
00200 {
00201 delete_optimization();
00202 clear_normal();
00203
00204 CKernel::cleanup();
00205 }
00206
00212 void CMindyGramKernel::remove_lhs()
00213 {
00214 delete_optimization();
00215
00216 #ifdef SVMLIGHT
00217 if (lhs)
00218 cache_reset();
00219 #endif
00220
00221 if (sdiag_lhs != sdiag_rhs)
00222 delete[] sdiag_rhs;
00223 delete[] sdiag_lhs;
00224
00225 lhs = NULL ;
00226 rhs = NULL ;
00227 initialized = false;
00228 sdiag_lhs = NULL;
00229 sdiag_rhs = NULL;
00230 }
00231
00235 void CMindyGramKernel::remove_rhs()
00236 {
00237 #ifdef SVMLIGHT
00238 if (rhs)
00239 cache_reset();
00240 #endif
00241
00242 if (sdiag_lhs != sdiag_rhs)
00243 delete[] sdiag_rhs;
00244
00245 sdiag_rhs = sdiag_lhs;
00246 rhs = lhs;
00247 }
00248
00256 bool CMindyGramKernel::init(CFeatures* l, CFeatures* r)
00257 {
00258
00259 SG_DEBUG( "Initializing MindyGramKernel %p %p\n", l, r);
00260
00261 bool result = CKernel::init(l,r);
00262
00263 initialized = false;
00264 INT i;
00265
00266
00267 ASSERT(l->get_feature_class()== C_MINDYGRAM);
00268 ASSERT(r->get_feature_class()==C_MINDYGRAM);
00269 ASSERT(l->get_feature_type()==F_ULONG);
00270 ASSERT(r->get_feature_type()==F_ULONG);
00271
00272
00273 if (sdiag_lhs != sdiag_rhs)
00274 delete[] sdiag_rhs;
00275 sdiag_rhs=NULL;
00276 delete[] sdiag_lhs;
00277 sdiag_lhs=NULL;
00278
00279
00280 sdiag_lhs= new DREAL[lhs->get_num_vectors()];
00281 for (i = 0; i < lhs->get_num_vectors(); i++)
00282 sdiag_lhs[i]=1;
00283
00284
00285 if (l == r) {
00286 sdiag_rhs=sdiag_lhs;
00287 } else {
00288 sdiag_rhs= new DREAL[rhs->get_num_vectors()];
00289 for (i = 0; i<rhs->get_num_vectors(); i++)
00290 sdiag_rhs[i]=1;
00291 }
00292
00293 this->lhs=(CMindyGramFeatures *) l;
00294 this->rhs=(CMindyGramFeatures *) l;
00295
00296
00297 for (i = 0; i<lhs->get_num_vectors(); i++) {
00298 sdiag_lhs[i] = sqrt(compute(i,i));
00299
00300
00301 if (sdiag_lhs[i] == 0)
00302 sdiag_lhs[i] = 1e-16;
00303 }
00304
00305
00306 if (sdiag_lhs != sdiag_rhs) {
00307 this->lhs=(CMindyGramFeatures *) r;
00308 this->rhs=(CMindyGramFeatures *) r;
00309
00310
00311 for (i=0; i<rhs->get_num_vectors(); i++) {
00312 sdiag_rhs[i] = sqrt(compute(i,i));
00313
00314
00315 if (sdiag_rhs[i]==0)
00316 sdiag_rhs[i]=1e-16;
00317 }
00318 }
00319
00320
00321 this->lhs=(CStringFeatures<WORD>*) l;
00322 this->rhs=(CStringFeatures<WORD>*) r;
00323
00324 initialized = true;
00325 return result;
00326 }
00327
00334 DREAL CMindyGramKernel::compute(INT i, INT j)
00335 {
00336
00337 CMindyGramFeatures *lm = (CMindyGramFeatures *) lhs;
00338 CMindyGramFeatures *rm = (CMindyGramFeatures *) rhs;
00339
00340
00341 DREAL result = gram_cmp(kernel, lm->get_feature_vector(i),
00342 rm->get_feature_vector(j));
00343
00344
00345 if (simcof != SC_NONE)
00346 result = 1 - sico(simcof, result, sdiag_lhs[i], sdiag_rhs[j]);
00347
00348 if (sm_get_class(kernel->type) == SC_DIST || simcof != SC_NONE) {
00349 if (width > 1e-10) {
00350
00351 result = exp(-result / width);
00352 } else {
00353 if (i != j) {
00354
00355 result = 0.5 * (sdiag_lhs[i] + sdiag_rhs[j] - result);
00356 } else {
00357
00358 gram_t *zero = gram_empty();
00359 result = gram_cmp(kernel, lm->get_feature_vector(i), zero);
00360 gram_destroy(zero);
00361 }
00362 }
00363 }
00364
00365 if (!initialized)
00366 return result;
00367
00368
00369 switch (norm) {
00370 case NO_NORMALIZATION:
00371 return result;
00372 case SQRT_NORMALIZATION:
00373 return result/sqrt(sdiag_lhs[i]*sdiag_rhs[i]);
00374 case FULL_NORMALIZATION:
00375 return result/(sdiag_lhs[i]*sdiag_rhs[j]);
00376 default:
00377 SG_ERROR( "Unknown Normalization in use!\n");
00378 return -CMath::INFTY;
00379 }
00380 }
00381
00387 void CMindyGramKernel::add_to_normal(INT i, DREAL w)
00388 {
00389
00390 CMindyGramFeatures *lm = (CMindyGramFeatures *) lhs;
00391
00392
00393 if (!normal)
00394 normal = gram_empty();
00395
00396 gram_add(normal, lm->get_feature_vector(i),
00397 normalize_weight(w, i, norm));
00398
00399 set_is_initialized(true);
00400 }
00401
00405 void CMindyGramKernel::clear_normal()
00406 {
00407 if (normal)
00408 gram_destroy(normal);
00409 normal = NULL;
00410 set_is_initialized(false);
00411 }
00412
00419 bool CMindyGramKernel::init_optimization(INT n, INT *is, DREAL * ws)
00420 {
00421
00422 delete_optimization();
00423
00424
00425 if (n <= 0) {
00426 set_is_initialized(true);
00427 SG_DEBUG( "empty set of SVs\n");
00428 return true;
00429 }
00430
00431 SG_DEBUG( "initializing MindyGramKernel optimization\n");
00432 for (int i = 0; i < n; i++) {
00433 if ( (i % (n / 10 + 1)) == 0)
00434 SG_PROGRESS(i, 0, n);
00435
00436
00437 add_to_normal(is[i], ws[i]);
00438 }
00439 SG_PRINT( "Done. \n");
00440
00441 set_is_initialized(true);
00442 return true;
00443 }
00444
00448 bool CMindyGramKernel::delete_optimization()
00449 {
00450 SG_DEBUG( "deleting MindyGramKernel optimization\n");
00451 clear_normal();
00452 return true;
00453 }
00454
00460 DREAL CMindyGramKernel::compute_optimized(INT i)
00461 {
00462 if (!get_is_initialized()) {
00463 SG_ERROR( "MindyGramKernel optimization not initialized\n");
00464 return -CMath::INFTY;
00465 }
00466
00467 CMindyGramFeatures *rm = (CMindyGramFeatures *) rhs;
00468 DREAL result = gram_cmp(kernel, rm->get_feature_vector(i), normal);
00469
00470 switch (norm) {
00471 case NO_NORMALIZATION:
00472 return result;
00473 case SQRT_NORMALIZATION:
00474 return result/sqrt(sdiag_rhs[i]);
00475 case FULL_NORMALIZATION:
00476 return result/sdiag_rhs[i];
00477 default:
00478 SG_ERROR( "Unknown Normalization in use!\n");
00479 return -CMath::INFTY;
00480 }
00481 }
00482
00483 bool CMindyGramKernel::load_init(FILE* src)
00484 {
00485 return false;
00486 }
00487
00488 bool CMindyGramKernel::save_init(FILE* dest)
00489 {
00490 return false;
00491 }
00492 #endif