MindyGramFeatures.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2006 Konrad Rieck
00008  * Copyright (C) 2006-2008 Fraunhofer Institute FIRST and Max-Planck-Society
00009  *
00010  * Indentation: bcpp -f 1 -s -ylcnc -bcl -i 4
00011  */
00012 
00013 #include "lib/config.h"
00014 
00015 #ifdef HAVE_MINDY
00016 
00017 #include "features/Features.h"
00018 #include "features/CharFeatures.h"
00019 #include "features/StringFeatures.h"
00020 #include "features/MindyGramFeatures.h"
00021 #include "lib/common.h"
00022 #include "lib/io.h"
00023 #include "lib/File.h"
00024 
00025 #include <math.h>
00026 #include <mindy.h>
00027 
00031 CMindyGramFeatures::~CMindyGramFeatures()
00032 {
00033     SG_DEBUG( "Destroying Mindy gram features\n");
00034     /* Destroy gram vectors */
00035     for (INT i = 0; i < num_vectors; i++)
00036         gram_destroy(vectors[i]);
00037     free(vectors);
00038 
00039     /* Destroy configuration */
00040     micfg_destroy(cfg);
00041 }
00042 
00046 CFeatures *CMindyGramFeatures::duplicate() const
00047 {
00048     return new CMindyGramFeatures(*this);
00049 }
00050 
00056 gram_t *CMindyGramFeatures::get_feature_vector(INT i)
00057 {
00058     ASSERT(vectors);
00059     ASSERT(i>=0 && i<num_vectors);
00060 
00061     return vectors[i];
00062 }
00063 
00068 void CMindyGramFeatures::set_feature_vector(INT i, gram_t * g)
00069 {
00070     ASSERT(vectors);
00071     ASSERT(i>=0 && i<num_vectors);
00072 
00073     /* Destroy previous gram */
00074     if (vectors[i])
00075         gram_destroy(vectors[i]);
00076 
00077     vectors[i] = g;
00078 }
00079 
00087 ULONG CMindyGramFeatures::get_feature(INT i, INT j)
00088 {
00089     ASSERT(vectors && i<num_vectors);
00090     ASSERT(j<(signed) vectors[i]->num);
00091 
00092     return vectors[i]->gram[j];
00093 }
00094 
00100 INT CMindyGramFeatures::get_vector_length(INT i)
00101 {
00102     ASSERT(vectors && i<num_vectors);
00103     return vectors[i]->num;
00104 }
00105 
00110 void CMindyGramFeatures::trim_max(double max)
00111 {
00112     for (INT i = 0; i < num_vectors; i++)
00113         gram_trim_max(vectors[i], max);
00114 } 
00115 
00121 bool CMindyGramFeatures::load(CHAR * fname)
00122 {
00123     SG_INFO( "Loading strings from %s\n", fname);
00124     LONG len = 0;
00125     CHAR *s, *t;
00126 
00127     CFile f(fname, 'r', F_CHAR);
00128     CHAR *data = f.load_char_data(NULL, len);
00129 
00130     if (!f.is_ok()) {
00131         SG_ERROR( "Reading file failed\n");
00132         return false;
00133     }
00134 
00135     /* Count strings terminated by \n */
00136     num_vectors = 0;
00137     for (LONG i = 0; i < len; i++)
00138         if (data[i] == '\n')
00139             SG_INFO( "File contains %ld string vectors\n",
00140                     num_vectors);
00141 
00142     vectors = (gram_t **) calloc(num_vectors, sizeof(gram_t *));
00143     if (!vectors) {
00144         SG_ERROR( "Could not allocate memory\n");
00145         return false;
00146     }
00147 
00148     /* Extract grams from strings */
00149     t = s = data;
00150     for (LONG i = 0; i < num_vectors; i++, t++) {
00151         if (*t != '\n')
00152             continue;
00153 
00154         vectors[i] = gram_extract(cfg, (byte_t *) s, t - s);
00155         s = t + 1;
00156     }
00157 
00158     return true;
00159 }
00160 #endif

SHOGUN Machine Learning Toolbox - Documentation