Distance.cpp

Go to the documentation of this file.
00001 /*
00002  * this program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2006 Christian Gehl
00008  * Written (W) 2006-2008 Soeren Sonnenburg
00009  * Copyright (C) 1999-2008 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #include "lib/config.h"
00013 
00014 #include "lib/common.h"
00015 #include "lib/io.h"
00016 #include "lib/File.h"
00017 #include "lib/Time.h"
00018 #include "base/Parallel.h"
00019 
00020 #include "distance/Distance.h"
00021 #include "features/Features.h"
00022 
00023 #include <string.h>
00024 #include <unistd.h>
00025 
00026 #ifndef WIN32
00027 #include <pthread.h>
00028 #endif
00029 
00030 CDistance::CDistance()
00031 : CSGObject(), precomputed_matrix(NULL), precompute_matrix(false),
00032     lhs(NULL), rhs(NULL)
00033 {
00034 }
00035 
00036         
00037 CDistance::CDistance(CFeatures* p_lhs, CFeatures* p_rhs)
00038 : CSGObject(), precomputed_matrix(NULL), precompute_matrix(false),
00039     lhs(NULL), rhs(NULL)
00040 {
00041     init(p_lhs, p_rhs);
00042 }
00043 
00044 CDistance::~CDistance()
00045 {
00046     delete[] precomputed_matrix;
00047     precomputed_matrix=NULL;
00048 }
00049 
00050 bool CDistance::init(CFeatures* l, CFeatures* r)
00051 {
00052     //make sure features were indeed supplied
00053     ASSERT(l);
00054     ASSERT(r);
00055 
00056     //make sure features are compatible
00057     ASSERT(l->get_feature_class()==r->get_feature_class());
00058     ASSERT(l->get_feature_type()==r->get_feature_type());
00059 
00060     lhs=l;
00061     rhs=r;
00062 
00063     delete[] precomputed_matrix ;
00064     precomputed_matrix=NULL ;
00065 
00066     return true;
00067 }
00068 
00069 bool CDistance::load(CHAR* fname)
00070 {
00071     return false;
00072 }
00073 
00074 bool CDistance::save(CHAR* fname)
00075 {
00076     INT i=0;
00077     INT num_left=lhs->get_num_vectors();
00078     INT num_right=rhs->get_num_vectors();
00079     KERNELCACHE_IDX num_total=num_left*num_right;
00080 
00081     CFile f(fname, 'w', F_DREAL);
00082 
00083     for (INT l=0; l< (INT) num_left && f.is_ok(); l++)
00084     {
00085         for (INT r=0; r< (INT) num_right && f.is_ok(); r++)
00086         {
00087             if (!(i % (num_total/10+1)))
00088                 SG_PRINT( "%02d%%.", (int) (100.0*i/num_total));
00089             else if (!(i % (num_total/200+1)))
00090                 SG_PRINT( ".");
00091 
00092             double k=distance(l,r);
00093             f.save_real_data(&k, 1);
00094 
00095             i++;
00096         }
00097     }
00098 
00099     if (f.is_ok())
00100         SG_INFO( "distance matrix of size %ld x %ld written \n", num_left, num_right);
00101 
00102     return (f.is_ok());
00103 }
00104 
00105 void CDistance::remove_lhs()
00106 { 
00107     SG_UNREF(lhs);
00108     lhs = NULL;
00109 }
00110 
00112 void CDistance::remove_rhs()
00113 {
00114     SG_UNREF(rhs);
00115     rhs = NULL;
00116 }
00117 
00118 
00119 void CDistance::do_precompute_matrix()
00120 {
00121     INT num_left=lhs->get_num_vectors();
00122     INT num_right=rhs->get_num_vectors();
00123     SG_INFO( "precomputing distance matrix (%ix%i)\n", num_left, num_right) ;
00124 
00125     ASSERT(num_left==num_right);
00126     ASSERT(lhs==rhs);
00127     INT num=num_left;
00128     
00129     delete[] precomputed_matrix;
00130     precomputed_matrix=new SHORTREAL[num*(num+1)/2];
00131 
00132     for (INT i=0; i<num; i++)
00133     {
00134         SG_PROGRESS(i*i,0,num*num);
00135         for (INT j=0; j<=i; j++)
00136             precomputed_matrix[i*(i+1)/2+j] = compute(i,j) ;
00137     }
00138 
00139     SG_PROGRESS(num*num,0,num*num);
00140     SG_DONE();
00141 }
00142 
00143 void CDistance::get_distance_matrix(DREAL** dst, INT* m, INT* n)
00144 {
00145     ASSERT(dst && m && n);
00146 
00147     DREAL* result = NULL;
00148     CFeatures* f1 = lhs;
00149     CFeatures* f2 = rhs;
00150 
00151     if (f1 && f2)
00152     {
00153         INT num_vec1=f1->get_num_vectors();
00154         INT num_vec2=f2->get_num_vectors();
00155         *m=num_vec1;
00156         *n=num_vec2;
00157 
00158         LONG total_num=num_vec1*num_vec2;
00159         INT num_done=0;
00160         SG_DEBUG("returning distance matrix of size %dx%d\n", num_vec1, num_vec2);
00161 
00162         result=(DREAL*) malloc(total_num*sizeof(DREAL));
00163         ASSERT(result);
00164 
00165         if ( (f1 == f2) && (num_vec1 == num_vec2) )
00166         {
00167             for (INT i=0; i<num_vec1; i++)
00168             {
00169                 for (INT j=i; j<num_vec1; j++)
00170                 {
00171                     double v=distance(i,j);
00172 
00173                     result[i+j*num_vec1]=v;
00174                     result[j+i*num_vec1]=v;
00175 
00176                     if (num_done%100000)
00177                         SG_PROGRESS(num_done, 0, total_num-1);
00178 
00179                     if (i!=j)
00180                         num_done+=2;
00181                     else
00182                         num_done+=1;
00183                 }
00184             }
00185         }
00186         else
00187         {
00188             for (INT i=0; i<num_vec1; i++)
00189             {
00190                 for (INT j=0; j<num_vec2; j++)
00191                 {
00192                     result[i+j*num_vec1]=distance(i,j) ;
00193 
00194                     if (num_done%100000)
00195                         SG_PROGRESS(num_done, 0, total_num-1);
00196 
00197                     num_done++;
00198                 }
00199             }
00200         }
00201 
00202         SG_DONE();
00203     }
00204     else
00205       SG_ERROR( "no features assigned to distance\n");
00206 
00207     *dst=result;
00208 }
00209 
00210 SHORTREAL* CDistance::get_distance_matrix_shortreal(int &num_vec1, int &num_vec2, SHORTREAL* target)
00211 {
00212     SHORTREAL* result = NULL;
00213     CFeatures* f1 = lhs;
00214     CFeatures* f2 = rhs;
00215 
00216     if (f1 && f2)
00217     {
00218         if (target && (num_vec1!=f1->get_num_vectors() || num_vec2!=f2->get_num_vectors()))
00219             SG_ERROR("distance matrix does not fit into target\n");
00220 
00221         num_vec1=f1->get_num_vectors();
00222         num_vec2=f2->get_num_vectors();
00223         LONG total_num=num_vec1*num_vec2;
00224         int num_done=0;
00225 
00226         SG_DEBUG("returning distance matrix of size %dx%d\n", num_vec1, num_vec2);
00227 
00228         if (target)
00229             result=target;
00230         else
00231             result=new SHORTREAL[total_num];
00232 
00233         if ( (f1 == f2) && (num_vec1 == num_vec2) )
00234         {
00235             for (int i=0; i<num_vec1; i++)
00236             {
00237                 for (int j=i; j<num_vec1; j++)
00238                 {
00239                     double v=distance(i,j);
00240 
00241                     result[i+j*num_vec1]=v;
00242                     result[j+i*num_vec1]=v;
00243 
00244                     if (num_done%100000)
00245                         SG_PROGRESS(num_done, 0, total_num-1);
00246 
00247                     if (i!=j)
00248                         num_done+=2;
00249                     else
00250                         num_done+=1;
00251                 }
00252             }
00253         }
00254         else
00255         {
00256             for (int i=0; i<num_vec1; i++)
00257             {
00258                 for (int j=0; j<num_vec2; j++)
00259                 {
00260                     result[i+j*num_vec1]=distance(i,j) ;
00261 
00262                     if (num_done%100000)
00263                         SG_PROGRESS(num_done, 0, total_num-1);
00264 
00265                     num_done++;
00266                 }
00267             }
00268         }
00269 
00270         SG_DONE();
00271     }
00272     else
00273       SG_ERROR( "no features assigned to distance\n");
00274 
00275     return result;
00276 }
00277 
00278 DREAL* CDistance::get_distance_matrix_real(int &num_vec1, int &num_vec2, DREAL* target)
00279 {
00280     DREAL* result = NULL;
00281     CFeatures* f1 = lhs;
00282     CFeatures* f2 = rhs;
00283 
00284     if (f1 && f2)
00285     {
00286         if (target && (num_vec1!=f1->get_num_vectors() || num_vec2!=f2->get_num_vectors()))
00287             SG_ERROR("distance matrix does not fit into target\n");
00288 
00289         num_vec1=f1->get_num_vectors();
00290         num_vec2=f2->get_num_vectors();
00291         LONG total_num=num_vec1*num_vec2;
00292         int num_done=0;
00293 
00294         SG_DEBUG("returning distance matrix of size %dx%d\n", num_vec1, num_vec2);
00295 
00296         if (target)
00297             result=target;
00298         else
00299             result=new DREAL[total_num];
00300 
00301         if ( (f1 == f2) && (num_vec1 == num_vec2) )
00302         {
00303             for (int i=0; i<num_vec1; i++)
00304             {
00305                 for (int j=i; j<num_vec1; j++)
00306                 {
00307                     double v=distance(i,j);
00308 
00309                     result[i+j*num_vec1]=v;
00310                     result[j+i*num_vec1]=v;
00311 
00312                     if (num_done%100000)
00313                         SG_PROGRESS(num_done, 0, total_num-1);
00314 
00315                     if (i!=j)
00316                         num_done+=2;
00317                     else
00318                         num_done+=1;
00319                 }
00320             }
00321         }
00322         else
00323         {
00324             for (int i=0; i<num_vec1; i++)
00325             {
00326                 for (int j=0; j<num_vec2; j++)
00327                 {
00328                     result[i+j*num_vec1]=distance(i,j) ;
00329 
00330                     if (num_done%100000)
00331                         SG_PROGRESS(num_done, 0, total_num-1);
00332 
00333                     num_done++;
00334                 }
00335             }
00336         }
00337 
00338         SG_DONE();
00339     }
00340     else
00341       SG_ERROR( "no features assigned to distance\n");
00342 
00343     return result;
00344 }

SHOGUN Machine Learning Toolbox - Documentation