00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include "lib/config.h"
00013
00014 #include "lib/common.h"
00015 #include "lib/io.h"
00016 #include "lib/File.h"
00017 #include "lib/Time.h"
00018 #include "base/Parallel.h"
00019
00020 #include "distance/Distance.h"
00021 #include "features/Features.h"
00022
00023 #include <string.h>
00024 #include <unistd.h>
00025
00026 #ifndef WIN32
00027 #include <pthread.h>
00028 #endif
00029
00030 CDistance::CDistance()
00031 : CSGObject(), precomputed_matrix(NULL), precompute_matrix(false),
00032 lhs(NULL), rhs(NULL)
00033 {
00034 }
00035
00036
00037 CDistance::CDistance(CFeatures* p_lhs, CFeatures* p_rhs)
00038 : CSGObject(), precomputed_matrix(NULL), precompute_matrix(false),
00039 lhs(NULL), rhs(NULL)
00040 {
00041 init(p_lhs, p_rhs);
00042 }
00043
00044 CDistance::~CDistance()
00045 {
00046 delete[] precomputed_matrix;
00047 precomputed_matrix=NULL;
00048 }
00049
00050 bool CDistance::init(CFeatures* l, CFeatures* r)
00051 {
00052
00053 ASSERT(l);
00054 ASSERT(r);
00055
00056
00057 ASSERT(l->get_feature_class()==r->get_feature_class());
00058 ASSERT(l->get_feature_type()==r->get_feature_type());
00059
00060 lhs=l;
00061 rhs=r;
00062
00063 delete[] precomputed_matrix ;
00064 precomputed_matrix=NULL ;
00065
00066 return true;
00067 }
00068
00069 bool CDistance::load(char* fname)
00070 {
00071 return false;
00072 }
00073
00074 bool CDistance::save(char* fname)
00075 {
00076 int32_t i=0;
00077 int32_t num_left=lhs->get_num_vectors();
00078 int32_t num_right=rhs->get_num_vectors();
00079 KERNELCACHE_IDX num_total=num_left*num_right;
00080
00081 CFile f(fname, 'w', F_DREAL);
00082
00083 for (int32_t l=0; l< (int32_t) num_left && f.is_ok(); l++)
00084 {
00085 for (int32_t r=0; r< (int32_t) num_right && f.is_ok(); r++)
00086 {
00087 if (!(i % (num_total/10+1)))
00088 SG_PRINT( "%02d%%.", (int) (100.0*i/num_total));
00089 else if (!(i % (num_total/200+1)))
00090 SG_PRINT( ".");
00091
00092 float64_t k=distance(l,r);
00093 f.save_real_data(&k, 1);
00094
00095 i++;
00096 }
00097 }
00098
00099 if (f.is_ok())
00100 SG_INFO( "distance matrix of size %ld x %ld written \n", num_left, num_right);
00101
00102 return (f.is_ok());
00103 }
00104
00105 void CDistance::remove_lhs()
00106 {
00107 SG_UNREF(lhs);
00108 lhs = NULL;
00109 }
00110
00112 void CDistance::remove_rhs()
00113 {
00114 SG_UNREF(rhs);
00115 rhs = NULL;
00116 }
00117
00118
00119 void CDistance::do_precompute_matrix()
00120 {
00121 int32_t num_left=lhs->get_num_vectors();
00122 int32_t num_right=rhs->get_num_vectors();
00123 SG_INFO( "precomputing distance matrix (%ix%i)\n", num_left, num_right) ;
00124
00125 ASSERT(num_left==num_right);
00126 ASSERT(lhs==rhs);
00127 int32_t num=num_left;
00128
00129 delete[] precomputed_matrix;
00130 precomputed_matrix=new float32_t[num*(num+1)/2];
00131
00132 for (int32_t i=0; i<num; i++)
00133 {
00134 SG_PROGRESS(i*i,0,num*num);
00135 for (int32_t j=0; j<=i; j++)
00136 precomputed_matrix[i*(i+1)/2+j] = compute(i,j) ;
00137 }
00138
00139 SG_PROGRESS(num*num,0,num*num);
00140 SG_DONE();
00141 }
00142
00143 void CDistance::get_distance_matrix(float64_t** dst, int32_t* m, int32_t* n)
00144 {
00145 ASSERT(dst && m && n);
00146
00147 float64_t* result = NULL;
00148 CFeatures* f1 = lhs;
00149 CFeatures* f2 = rhs;
00150
00151 if (f1 && f2)
00152 {
00153 int32_t num_vec1=f1->get_num_vectors();
00154 int32_t num_vec2=f2->get_num_vectors();
00155 *m=num_vec1;
00156 *n=num_vec2;
00157
00158 int64_t total_num=num_vec1*num_vec2;
00159 int32_t num_done=0;
00160 SG_DEBUG("returning distance matrix of size %dx%d\n", num_vec1, num_vec2);
00161
00162 result=(float64_t*) malloc(total_num*sizeof(float64_t));
00163 ASSERT(result);
00164
00165 if ( (f1 == f2) && (num_vec1 == num_vec2) )
00166 {
00167 for (int32_t i=0; i<num_vec1; i++)
00168 {
00169 for (int32_t j=i; j<num_vec1; j++)
00170 {
00171 float64_t v=distance(i,j);
00172
00173 result[i+j*num_vec1]=v;
00174 result[j+i*num_vec1]=v;
00175
00176 if (num_done%100000)
00177 SG_PROGRESS(num_done, 0, total_num-1);
00178
00179 if (i!=j)
00180 num_done+=2;
00181 else
00182 num_done+=1;
00183 }
00184 }
00185 }
00186 else
00187 {
00188 for (int32_t i=0; i<num_vec1; i++)
00189 {
00190 for (int32_t j=0; j<num_vec2; j++)
00191 {
00192 result[i+j*num_vec1]=distance(i,j) ;
00193
00194 if (num_done%100000)
00195 SG_PROGRESS(num_done, 0, total_num-1);
00196
00197 num_done++;
00198 }
00199 }
00200 }
00201
00202 SG_DONE();
00203 }
00204 else
00205 SG_ERROR( "no features assigned to distance\n");
00206
00207 *dst=result;
00208 }
00209
00210 float32_t* CDistance::get_distance_matrix_shortreal(
00211 int32_t &num_vec1, int32_t &num_vec2, float32_t* target)
00212 {
00213 float32_t* result = NULL;
00214 CFeatures* f1 = lhs;
00215 CFeatures* f2 = rhs;
00216
00217 if (f1 && f2)
00218 {
00219 if (target && (num_vec1!=f1->get_num_vectors() || num_vec2!=f2->get_num_vectors()))
00220 SG_ERROR("distance matrix does not fit into target\n");
00221
00222 num_vec1=f1->get_num_vectors();
00223 num_vec2=f2->get_num_vectors();
00224 int64_t total_num=num_vec1*num_vec2;
00225 int32_t num_done=0;
00226
00227 SG_DEBUG("returning distance matrix of size %dx%d\n", num_vec1, num_vec2);
00228
00229 if (target)
00230 result=target;
00231 else
00232 result=new float32_t[total_num];
00233
00234 if ( (f1 == f2) && (num_vec1 == num_vec2) )
00235 {
00236 for (int32_t i=0; i<num_vec1; i++)
00237 {
00238 for (int32_t j=i; j<num_vec1; j++)
00239 {
00240 float64_t v=distance(i,j);
00241
00242 result[i+j*num_vec1]=v;
00243 result[j+i*num_vec1]=v;
00244
00245 if (num_done%100000)
00246 SG_PROGRESS(num_done, 0, total_num-1);
00247
00248 if (i!=j)
00249 num_done+=2;
00250 else
00251 num_done+=1;
00252 }
00253 }
00254 }
00255 else
00256 {
00257 for (int32_t i=0; i<num_vec1; i++)
00258 {
00259 for (int32_t j=0; j<num_vec2; j++)
00260 {
00261 result[i+j*num_vec1]=distance(i,j) ;
00262
00263 if (num_done%100000)
00264 SG_PROGRESS(num_done, 0, total_num-1);
00265
00266 num_done++;
00267 }
00268 }
00269 }
00270
00271 SG_DONE();
00272 }
00273 else
00274 SG_ERROR( "no features assigned to distance\n");
00275
00276 return result;
00277 }
00278
00279 float64_t* CDistance::get_distance_matrix_real(
00280 int32_t &num_vec1, int32_t &num_vec2, float64_t* target)
00281 {
00282 float64_t* result = NULL;
00283 CFeatures* f1 = lhs;
00284 CFeatures* f2 = rhs;
00285
00286 if (f1 && f2)
00287 {
00288 if (target && (num_vec1!=f1->get_num_vectors() || num_vec2!=f2->get_num_vectors()))
00289 SG_ERROR("distance matrix does not fit into target\n");
00290
00291 num_vec1=f1->get_num_vectors();
00292 num_vec2=f2->get_num_vectors();
00293 int64_t total_num=num_vec1*num_vec2;
00294 int32_t num_done=0;
00295
00296 SG_DEBUG("returning distance matrix of size %dx%d\n", num_vec1, num_vec2);
00297
00298 if (target)
00299 result=target;
00300 else
00301 result=new float64_t[total_num];
00302
00303 if ( (f1 == f2) && (num_vec1 == num_vec2) )
00304 {
00305 for (int32_t i=0; i<num_vec1; i++)
00306 {
00307 for (int32_t j=i; j<num_vec1; j++)
00308 {
00309 float64_t v=distance(i,j);
00310
00311 result[i+j*num_vec1]=v;
00312 result[j+i*num_vec1]=v;
00313
00314 if (num_done%100000)
00315 SG_PROGRESS(num_done, 0, total_num-1);
00316
00317 if (i!=j)
00318 num_done+=2;
00319 else
00320 num_done+=1;
00321 }
00322 }
00323 }
00324 else
00325 {
00326 for (int32_t i=0; i<num_vec1; i++)
00327 {
00328 for (int32_t j=0; j<num_vec2; j++)
00329 {
00330 result[i+j*num_vec1]=distance(i,j) ;
00331
00332 if (num_done%100000)
00333 SG_PROGRESS(num_done, 0, total_num-1);
00334
00335 num_done++;
00336 }
00337 }
00338 }
00339
00340 SG_DONE();
00341 }
00342 else
00343 SG_ERROR( "no features assigned to distance\n");
00344
00345 return result;
00346 }