MindyGramFeatures.cpp
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #include "lib/config.h"
00014
00015 #ifdef HAVE_MINDY
00016
00017 #include "features/Features.h"
00018 #include "features/CharFeatures.h"
00019 #include "features/StringFeatures.h"
00020 #include "features/MindyGramFeatures.h"
00021 #include "lib/common.h"
00022 #include "lib/io.h"
00023 #include "lib/File.h"
00024
00025 #include <math.h>
00026 #include <mindy.h>
00027
00031 CMindyGramFeatures::~CMindyGramFeatures()
00032 {
00033 SG_DEBUG( "Destroying Mindy gram features\n");
00034
00035 for (int32_t i = 0; i < num_vectors; i++)
00036 gram_destroy(vectors[i]);
00037 free(vectors);
00038
00039
00040 micfg_destroy(cfg);
00041 }
00042
00046 CFeatures *CMindyGramFeatures::duplicate() const
00047 {
00048 return new CMindyGramFeatures(*this);
00049 }
00050
00056 gram_t *CMindyGramFeatures::get_feature_vector(int32_t i)
00057 {
00058 ASSERT(vectors);
00059 ASSERT(i>=0 && i<num_vectors);
00060
00061 return vectors[i];
00062 }
00063
00068 void CMindyGramFeatures::set_feature_vector(int32_t i, gram_t * g)
00069 {
00070 ASSERT(vectors);
00071 ASSERT(i>=0 && i<num_vectors);
00072
00073
00074 if (vectors[i])
00075 gram_destroy(vectors[i]);
00076
00077 vectors[i] = g;
00078 }
00079
00087 uint64_t CMindyGramFeatures::get_feature(int32_t i, int32_t j)
00088 {
00089 ASSERT(vectors && i<num_vectors);
00090 ASSERT(j<(signed) vectors[i]->num);
00091
00092 return vectors[i]->gram[j];
00093 }
00094
00100 int32_t CMindyGramFeatures::get_vector_length(int32_t i)
00101 {
00102 ASSERT(vectors && i<num_vectors);
00103 return vectors[i]->num;
00104 }
00105
00110 void CMindyGramFeatures::trim_max(float64_t max)
00111 {
00112 for (int32_t i = 0; i < num_vectors; i++)
00113 gram_trim_max(vectors[i], max);
00114 }
00115
00121 bool CMindyGramFeatures::load(char * fname)
00122 {
00123 SG_INFO( "Loading strings from %s\n", fname);
00124 int64_t len = 0;
00125 char *s, *t;
00126
00127 CFile f(fname, 'r', F_CHAR);
00128 char *data = f.load_char_data(NULL, len);
00129
00130 if (!f.is_ok()) {
00131 SG_ERROR( "Reading file failed\n");
00132 return false;
00133 }
00134
00135
00136 num_vectors = 0;
00137 for (int64_t i = 0; i < len; i++)
00138 if (data[i] == '\n')
00139 SG_INFO( "File contains %ld string vectors\n",
00140 num_vectors);
00141
00142 vectors = (gram_t **) calloc(num_vectors, sizeof(gram_t *));
00143 if (!vectors) {
00144 SG_ERROR( "Could not allocate memory\n");
00145 return false;
00146 }
00147
00148
00149 t = s = data;
00150 for (int64_t i = 0; i < num_vectors; i++, t++) {
00151 if (*t != '\n')
00152 continue;
00153
00154 vectors[i] = gram_extract(cfg, (byte_t *) s, t - s);
00155 s = t + 1;
00156 }
00157
00158 return true;
00159 }
00160 #endif