Distance.cpp

Go to the documentation of this file.
00001 /*
00002  * this program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2006 Christian Gehl
00008  * Written (W) 2006-2008 Soeren Sonnenburg
00009  * Copyright (C) 1999-2008 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #include "lib/config.h"
00013 
00014 #include "lib/common.h"
00015 #include "lib/io.h"
00016 #include "lib/File.h"
00017 #include "lib/Time.h"
00018 #include "base/Parallel.h"
00019 
00020 #include "distance/Distance.h"
00021 #include "features/Features.h"
00022 
00023 #include <string.h>
00024 #include <unistd.h>
00025 
00026 #ifndef WIN32
00027 #include <pthread.h>
00028 #endif
00029 
00030 CDistance::CDistance()
00031 : CSGObject(), precomputed_matrix(NULL), precompute_matrix(false),
00032     lhs(NULL), rhs(NULL)
00033 {
00034 }
00035 
00036         
00037 CDistance::CDistance(CFeatures* p_lhs, CFeatures* p_rhs)
00038 : CSGObject(), precomputed_matrix(NULL), precompute_matrix(false),
00039     lhs(NULL), rhs(NULL)
00040 {
00041     init(p_lhs, p_rhs);
00042 }
00043 
00044 CDistance::~CDistance()
00045 {
00046     delete[] precomputed_matrix;
00047     precomputed_matrix=NULL;
00048 }
00049 
00050 bool CDistance::init(CFeatures* l, CFeatures* r)
00051 {
00052     //make sure features were indeed supplied
00053     ASSERT(l);
00054     ASSERT(r);
00055 
00056     //make sure features are compatible
00057     ASSERT(l->get_feature_class()==r->get_feature_class());
00058     ASSERT(l->get_feature_type()==r->get_feature_type());
00059 
00060     lhs=l;
00061     rhs=r;
00062 
00063     delete[] precomputed_matrix ;
00064     precomputed_matrix=NULL ;
00065 
00066     return true;
00067 }
00068 
00069 bool CDistance::load(char* fname)
00070 {
00071     return false;
00072 }
00073 
00074 bool CDistance::save(char* fname)
00075 {
00076     int32_t i=0;
00077     int32_t num_left=lhs->get_num_vectors();
00078     int32_t num_right=rhs->get_num_vectors();
00079     KERNELCACHE_IDX num_total=num_left*num_right;
00080 
00081     CFile f(fname, 'w', F_DREAL);
00082 
00083     for (int32_t l=0; l< (int32_t) num_left && f.is_ok(); l++)
00084     {
00085         for (int32_t r=0; r< (int32_t) num_right && f.is_ok(); r++)
00086         {
00087             if (!(i % (num_total/10+1)))
00088                 SG_PRINT( "%02d%%.", (int) (100.0*i/num_total));
00089             else if (!(i % (num_total/200+1)))
00090                 SG_PRINT( ".");
00091 
00092             float64_t k=distance(l,r);
00093             f.save_real_data(&k, 1);
00094 
00095             i++;
00096         }
00097     }
00098 
00099     if (f.is_ok())
00100         SG_INFO( "distance matrix of size %ld x %ld written \n", num_left, num_right);
00101 
00102     return (f.is_ok());
00103 }
00104 
00105 void CDistance::remove_lhs()
00106 { 
00107     SG_UNREF(lhs);
00108     lhs = NULL;
00109 }
00110 
00112 void CDistance::remove_rhs()
00113 {
00114     SG_UNREF(rhs);
00115     rhs = NULL;
00116 }
00117 
00118 
00119 void CDistance::do_precompute_matrix()
00120 {
00121     int32_t num_left=lhs->get_num_vectors();
00122     int32_t num_right=rhs->get_num_vectors();
00123     SG_INFO( "precomputing distance matrix (%ix%i)\n", num_left, num_right) ;
00124 
00125     ASSERT(num_left==num_right);
00126     ASSERT(lhs==rhs);
00127     int32_t num=num_left;
00128     
00129     delete[] precomputed_matrix;
00130     precomputed_matrix=new float32_t[num*(num+1)/2];
00131 
00132     for (int32_t i=0; i<num; i++)
00133     {
00134         SG_PROGRESS(i*i,0,num*num);
00135         for (int32_t j=0; j<=i; j++)
00136             precomputed_matrix[i*(i+1)/2+j] = compute(i,j) ;
00137     }
00138 
00139     SG_PROGRESS(num*num,0,num*num);
00140     SG_DONE();
00141 }
00142 
00143 void CDistance::get_distance_matrix(float64_t** dst, int32_t* m, int32_t* n)
00144 {
00145     ASSERT(dst && m && n);
00146 
00147     float64_t* result = NULL;
00148     CFeatures* f1 = lhs;
00149     CFeatures* f2 = rhs;
00150 
00151     if (f1 && f2)
00152     {
00153         int32_t num_vec1=f1->get_num_vectors();
00154         int32_t num_vec2=f2->get_num_vectors();
00155         *m=num_vec1;
00156         *n=num_vec2;
00157 
00158         int64_t total_num=num_vec1*num_vec2;
00159         int32_t num_done=0;
00160         SG_DEBUG("returning distance matrix of size %dx%d\n", num_vec1, num_vec2);
00161 
00162         result=(float64_t*) malloc(total_num*sizeof(float64_t));
00163         ASSERT(result);
00164 
00165         if ( (f1 == f2) && (num_vec1 == num_vec2) )
00166         {
00167             for (int32_t i=0; i<num_vec1; i++)
00168             {
00169                 for (int32_t j=i; j<num_vec1; j++)
00170                 {
00171                     float64_t v=distance(i,j);
00172 
00173                     result[i+j*num_vec1]=v;
00174                     result[j+i*num_vec1]=v;
00175 
00176                     if (num_done%100000)
00177                         SG_PROGRESS(num_done, 0, total_num-1);
00178 
00179                     if (i!=j)
00180                         num_done+=2;
00181                     else
00182                         num_done+=1;
00183                 }
00184             }
00185         }
00186         else
00187         {
00188             for (int32_t i=0; i<num_vec1; i++)
00189             {
00190                 for (int32_t j=0; j<num_vec2; j++)
00191                 {
00192                     result[i+j*num_vec1]=distance(i,j) ;
00193 
00194                     if (num_done%100000)
00195                         SG_PROGRESS(num_done, 0, total_num-1);
00196 
00197                     num_done++;
00198                 }
00199             }
00200         }
00201 
00202         SG_DONE();
00203     }
00204     else
00205       SG_ERROR( "no features assigned to distance\n");
00206 
00207     *dst=result;
00208 }
00209 
00210 float32_t* CDistance::get_distance_matrix_shortreal(
00211     int32_t &num_vec1, int32_t &num_vec2, float32_t* target)
00212 {
00213     float32_t* result = NULL;
00214     CFeatures* f1 = lhs;
00215     CFeatures* f2 = rhs;
00216 
00217     if (f1 && f2)
00218     {
00219         if (target && (num_vec1!=f1->get_num_vectors() || num_vec2!=f2->get_num_vectors()))
00220             SG_ERROR("distance matrix does not fit into target\n");
00221 
00222         num_vec1=f1->get_num_vectors();
00223         num_vec2=f2->get_num_vectors();
00224         int64_t total_num=num_vec1*num_vec2;
00225         int32_t num_done=0;
00226 
00227         SG_DEBUG("returning distance matrix of size %dx%d\n", num_vec1, num_vec2);
00228 
00229         if (target)
00230             result=target;
00231         else
00232             result=new float32_t[total_num];
00233 
00234         if ( (f1 == f2) && (num_vec1 == num_vec2) )
00235         {
00236             for (int32_t i=0; i<num_vec1; i++)
00237             {
00238                 for (int32_t j=i; j<num_vec1; j++)
00239                 {
00240                     float64_t v=distance(i,j);
00241 
00242                     result[i+j*num_vec1]=v;
00243                     result[j+i*num_vec1]=v;
00244 
00245                     if (num_done%100000)
00246                         SG_PROGRESS(num_done, 0, total_num-1);
00247 
00248                     if (i!=j)
00249                         num_done+=2;
00250                     else
00251                         num_done+=1;
00252                 }
00253             }
00254         }
00255         else
00256         {
00257             for (int32_t i=0; i<num_vec1; i++)
00258             {
00259                 for (int32_t j=0; j<num_vec2; j++)
00260                 {
00261                     result[i+j*num_vec1]=distance(i,j) ;
00262 
00263                     if (num_done%100000)
00264                         SG_PROGRESS(num_done, 0, total_num-1);
00265 
00266                     num_done++;
00267                 }
00268             }
00269         }
00270 
00271         SG_DONE();
00272     }
00273     else
00274       SG_ERROR( "no features assigned to distance\n");
00275 
00276     return result;
00277 }
00278 
00279 float64_t* CDistance::get_distance_matrix_real(
00280     int32_t &num_vec1, int32_t &num_vec2, float64_t* target)
00281 {
00282     float64_t* result = NULL;
00283     CFeatures* f1 = lhs;
00284     CFeatures* f2 = rhs;
00285 
00286     if (f1 && f2)
00287     {
00288         if (target && (num_vec1!=f1->get_num_vectors() || num_vec2!=f2->get_num_vectors()))
00289             SG_ERROR("distance matrix does not fit into target\n");
00290 
00291         num_vec1=f1->get_num_vectors();
00292         num_vec2=f2->get_num_vectors();
00293         int64_t total_num=num_vec1*num_vec2;
00294         int32_t num_done=0;
00295 
00296         SG_DEBUG("returning distance matrix of size %dx%d\n", num_vec1, num_vec2);
00297 
00298         if (target)
00299             result=target;
00300         else
00301             result=new float64_t[total_num];
00302 
00303         if ( (f1 == f2) && (num_vec1 == num_vec2) )
00304         {
00305             for (int32_t i=0; i<num_vec1; i++)
00306             {
00307                 for (int32_t j=i; j<num_vec1; j++)
00308                 {
00309                     float64_t v=distance(i,j);
00310 
00311                     result[i+j*num_vec1]=v;
00312                     result[j+i*num_vec1]=v;
00313 
00314                     if (num_done%100000)
00315                         SG_PROGRESS(num_done, 0, total_num-1);
00316 
00317                     if (i!=j)
00318                         num_done+=2;
00319                     else
00320                         num_done+=1;
00321                 }
00322             }
00323         }
00324         else
00325         {
00326             for (int32_t i=0; i<num_vec1; i++)
00327             {
00328                 for (int32_t j=0; j<num_vec2; j++)
00329                 {
00330                     result[i+j*num_vec1]=distance(i,j) ;
00331 
00332                     if (num_done%100000)
00333                         SG_PROGRESS(num_done, 0, total_num-1);
00334 
00335                     num_done++;
00336                 }
00337             }
00338         }
00339 
00340         SG_DONE();
00341     }
00342     else
00343       SG_ERROR( "no features assigned to distance\n");
00344 
00345     return result;
00346 }

SHOGUN Machine Learning Toolbox - Documentation