RealFileFeatures.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2008 Soeren Sonnenburg
00008  * Copyright (C) 1999-2008 Fraunhofer Institute FIRST and Max-Planck-Society
00009  */
00010 
00011 #include "features/RealFileFeatures.h"
00012 #include "features/Features.h"
00013 #include "lib/io.h"
00014 
00015 #include <stdio.h>
00016 #include <string.h>
00017 
00018 CRealFileFeatures::CRealFileFeatures(INT size, CHAR* fname) : CRealFeatures(size)
00019 {
00020     working_file=fopen(fname, "r");
00021     working_filename=strdup(fname);
00022     ASSERT(working_file);
00023     intlen=0;
00024     doublelen=0;
00025     endian=0;
00026     fourcc=0;
00027     preprocd=0;
00028     labels=NULL;
00029     status=load_base_data();
00030 }
00031 
00032 CRealFileFeatures::CRealFileFeatures(INT size, FILE* file) : CRealFeatures(size), working_file(file), working_filename(NULL)
00033 {
00034     ASSERT(working_file);
00035     intlen=0;
00036     doublelen=0;
00037     endian=0;
00038     fourcc=0;
00039     preprocd=0;
00040     labels=NULL;
00041     status=load_base_data();
00042 }
00043 
00044 CRealFileFeatures::~CRealFileFeatures()
00045 {
00046     delete[] feature_matrix;
00047     delete[] working_filename;
00048     delete[] labels;
00049 }
00050 
00051 CRealFileFeatures::CRealFileFeatures(const CRealFileFeatures & orig): CRealFeatures(orig), 
00052     working_file(orig.working_file), status(orig.status)
00053 {
00054     if (orig.working_filename)
00055         working_filename=strdup(orig.working_filename);
00056     if (orig.labels && get_num_vectors())
00057     {
00058         labels=new int[get_num_vectors()];
00059         memcpy(labels, orig.labels, sizeof(int)*get_num_vectors()); 
00060     }
00061 }
00062 
00063 DREAL* CRealFileFeatures::compute_feature_vector(INT num, INT &len, DREAL* target)
00064 {
00065     ASSERT(num<num_vectors);
00066     len=num_features;
00067     DREAL* featurevector=target;
00068     if (!featurevector)
00069         featurevector=new DREAL[num_features];
00070     ASSERT(working_file);
00071     fseek(working_file, filepos+num_features*doublelen*num, SEEK_SET);
00072     ASSERT(fread(featurevector, doublelen, num_features, working_file)==(size_t) num_features);
00073     return featurevector;
00074 }
00075 
00076 DREAL* CRealFileFeatures::load_feature_matrix()
00077 {
00078     ASSERT(working_file);
00079     fseek(working_file, filepos, SEEK_SET);
00080     delete[] feature_matrix;
00081 
00082     SG_INFO( "allocating feature matrix of size %.2fM\n", sizeof(double)*num_features*num_vectors/1024.0/1024.0);
00083     free_feature_matrix();
00084     feature_matrix=new DREAL[num_features*num_vectors];
00085 
00086     SG_INFO( "loading... be patient.\n");
00087 
00088     for (INT i=0; i<(INT) num_vectors; i++)
00089     {
00090         if (!(i % (num_vectors/10+1)))
00091             SG_PRINT( "%02d%%.", (int) (100.0*i/num_vectors));
00092         else if (!(i % (num_vectors/200+1)))
00093             SG_PRINT( ".");
00094 
00095         ASSERT(fread(&feature_matrix[num_features*i], doublelen, num_features, working_file)==(size_t) num_features);
00096     }
00097     SG_DONE();
00098 
00099     return feature_matrix;
00100 }
00101 
00102 INT CRealFileFeatures::get_label(INT idx)
00103 {
00104     ASSERT(idx<num_vectors);
00105     if (labels)
00106         return labels[idx];
00107     return 0;
00108 }
00109 
00110 bool CRealFileFeatures::load_base_data()
00111 {
00112     ASSERT(working_file);
00113     UINT num_vec=0;
00114     UINT num_feat=0;
00115 
00116     ASSERT(fread(&intlen, sizeof(BYTE), 1, working_file)==1);
00117     ASSERT(fread(&doublelen, sizeof(BYTE), 1, working_file)==1);
00118     ASSERT(fread(&endian, (UINT) intlen, 1, working_file)== 1);
00119     ASSERT(fread(&fourcc, (UINT) intlen, 1, working_file)==1);
00120     ASSERT(fread(&num_vec, (UINT) intlen, 1, working_file)==1);
00121     ASSERT(fread(&num_feat, (UINT) intlen, 1, working_file)==1);
00122     ASSERT(fread(&preprocd, (UINT) intlen, 1, working_file)==1);
00123     SG_INFO( "detected: intsize=%d, doublesize=%d, num_vec=%d, num_feat=%d, preprocd=%d\n", intlen, doublelen, num_vec, num_feat, preprocd);
00124     filepos=ftell(working_file);
00125     set_num_vectors(num_vec);
00126     set_num_features(num_feat);
00127     fseek(working_file, filepos+num_features*num_vectors*doublelen, SEEK_SET);
00128     delete[] labels;
00129     labels=new int[num_vec];
00130     ASSERT(fread(labels, intlen, num_vec, working_file) == num_vec);
00131     return true;
00132 }

SHOGUN Machine Learning Toolbox - Documentation