RealFileFeatures.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2008 Soeren Sonnenburg
00008  * Copyright (C) 1999-2008 Fraunhofer Institute FIRST and Max-Planck-Society
00009  */
00010 
00011 #include "features/RealFileFeatures.h"
00012 #include "features/Features.h"
00013 #include "lib/io.h"
00014 
00015 #include <stdio.h>
00016 #include <string.h>
00017 
00018 CRealFileFeatures::CRealFileFeatures(int32_t size, char* fname)
00019 : CRealFeatures(size)
00020 {
00021     working_file=fopen(fname, "r");
00022     working_filename=strdup(fname);
00023     ASSERT(working_file);
00024     intlen=0;
00025     doublelen=0;
00026     endian=0;
00027     fourcc=0;
00028     preprocd=0;
00029     labels=NULL;
00030     status=load_base_data();
00031 }
00032 
00033 CRealFileFeatures::CRealFileFeatures(int32_t size, FILE* file)
00034 : CRealFeatures(size), working_file(file), working_filename(NULL)
00035 {
00036     ASSERT(working_file);
00037     intlen=0;
00038     doublelen=0;
00039     endian=0;
00040     fourcc=0;
00041     preprocd=0;
00042     labels=NULL;
00043     status=load_base_data();
00044 }
00045 
00046 CRealFileFeatures::~CRealFileFeatures()
00047 {
00048     delete[] feature_matrix;
00049     delete[] working_filename;
00050     delete[] labels;
00051 }
00052 
00053 CRealFileFeatures::CRealFileFeatures(const CRealFileFeatures & orig)
00054 : CRealFeatures(orig), working_file(orig.working_file), status(orig.status)
00055 {
00056     if (orig.working_filename)
00057         working_filename=strdup(orig.working_filename);
00058     if (orig.labels && get_num_vectors())
00059     {
00060         labels=new int32_t[get_num_vectors()];
00061         memcpy(labels, orig.labels, sizeof(int32_t)*get_num_vectors());
00062     }
00063 }
00064 
00065 float64_t* CRealFileFeatures::compute_feature_vector(
00066     int32_t num, int32_t &len, float64_t* target)
00067 {
00068     ASSERT(num<num_vectors);
00069     len=num_features;
00070     float64_t* featurevector=target;
00071     if (!featurevector)
00072         featurevector=new float64_t[num_features];
00073     ASSERT(working_file);
00074     fseek(working_file, filepos+num_features*doublelen*num, SEEK_SET);
00075     ASSERT(fread(featurevector, doublelen, num_features, working_file)==(size_t) num_features);
00076     return featurevector;
00077 }
00078 
00079 float64_t* CRealFileFeatures::load_feature_matrix()
00080 {
00081     ASSERT(working_file);
00082     fseek(working_file, filepos, SEEK_SET);
00083     delete[] feature_matrix;
00084 
00085     SG_INFO( "allocating feature matrix of size %.2fM\n", sizeof(double)*num_features*num_vectors/1024.0/1024.0);
00086     free_feature_matrix();
00087     feature_matrix=new float64_t[num_features*num_vectors];
00088 
00089     SG_INFO( "loading... be patient.\n");
00090 
00091     for (int32_t i=0; i<(int32_t) num_vectors; i++)
00092     {
00093         if (!(i % (num_vectors/10+1)))
00094             SG_PRINT( "%02d%%.", (int) (100.0*i/num_vectors));
00095         else if (!(i % (num_vectors/200+1)))
00096             SG_PRINT( ".");
00097 
00098         ASSERT(fread(&feature_matrix[num_features*i], doublelen, num_features, working_file)==(size_t) num_features);
00099     }
00100     SG_DONE();
00101 
00102     return feature_matrix;
00103 }
00104 
00105 int32_t CRealFileFeatures::get_label(int32_t idx)
00106 {
00107     ASSERT(idx<num_vectors);
00108     if (labels)
00109         return labels[idx];
00110     return 0;
00111 }
00112 
00113 bool CRealFileFeatures::load_base_data()
00114 {
00115     ASSERT(working_file);
00116     uint32_t num_vec=0;
00117     uint32_t num_feat=0;
00118 
00119     ASSERT(fread(&intlen, sizeof(uint8_t), 1, working_file)==1);
00120     ASSERT(fread(&doublelen, sizeof(uint8_t), 1, working_file)==1);
00121     ASSERT(fread(&endian, (uint32_t) intlen, 1, working_file)== 1);
00122     ASSERT(fread(&fourcc, (uint32_t) intlen, 1, working_file)==1);
00123     ASSERT(fread(&num_vec, (uint32_t) intlen, 1, working_file)==1);
00124     ASSERT(fread(&num_feat, (uint32_t) intlen, 1, working_file)==1);
00125     ASSERT(fread(&preprocd, (uint32_t) intlen, 1, working_file)==1);
00126     SG_INFO( "detected: intsize=%d, doublesize=%d, num_vec=%d, num_feat=%d, preprocd=%d\n", intlen, doublelen, num_vec, num_feat, preprocd);
00127     filepos=ftell(working_file);
00128     set_num_vectors(num_vec);
00129     set_num_features(num_feat);
00130     fseek(working_file, filepos+num_features*num_vectors*doublelen, SEEK_SET);
00131     delete[] labels;
00132     labels=new int[num_vec];
00133     ASSERT(fread(labels, intlen, num_vec, working_file) == num_vec);
00134     return true;
00135 }

SHOGUN Machine Learning Toolbox - Documentation