Hierarchical.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2007-2008 Soeren Sonnenburg
00008  * Copyright (C) 2007-2008 Fraunhofer Institute FIRST and Max-Planck-Society
00009  */
00010 
00011 #include "clustering/Hierarchical.h"
00012 #include "distance/Distance.h"
00013 #include "features/Labels.h"
00014 #include "features/RealFeatures.h"
00015 #include "lib/Mathematics.h"
00016 #include "base/Parallel.h"
00017 
00018 #ifndef WIN32
00019 #include <pthread.h>
00020 #endif
00021 
00022 CHierarchical::CHierarchical()
00023 : CDistanceMachine(), merges(3), dimensions(0), assignment(NULL),
00024     table_size(0), pairs(NULL), merge_distance(NULL)
00025 {
00026 }
00027 
00028 CHierarchical::CHierarchical(INT merges_, CDistance* d)
00029 : CDistanceMachine(), merges(merges_), dimensions(0), assignment(NULL),
00030     table_size(0), pairs(NULL), merge_distance(NULL)
00031 {
00032     set_distance(d);
00033 }
00034 
00035 CHierarchical::~CHierarchical()
00036 {
00037     delete[] merge_distance;
00038     delete[] assignment;
00039     delete[] pairs;
00040 }
00041 
00042 bool CHierarchical::train()
00043 {
00044     ASSERT(distance);
00045     CFeatures* lhs=distance->get_lhs();
00046     ASSERT(lhs);
00047 
00048     INT num=lhs->get_num_vectors();
00049     ASSERT(num>0);
00050 
00051     const INT num_pairs=num*(num-1)/2;
00052 
00053     delete[] merge_distance;
00054     merge_distance=new DREAL[num];
00055     CMath::fill_vector(merge_distance, num, -1.0);
00056 
00057     delete[] assignment;
00058     assignment=new INT[num];
00059     CMath::range_fill_vector(assignment, num);
00060 
00061     delete[] pairs;
00062     pairs=new INT[2*num];
00063     CMath::fill_vector(pairs, 2*num, -1);
00064 
00065     pair* index=new pair[num_pairs];
00066     DREAL* distances=new DREAL[num_pairs];
00067 
00068     INT offs=0;
00069     for (INT i=0; i<num; i++)
00070     {
00071         for (INT j=i+1; j<num; j++)
00072         {
00073             distances[offs]=distance->distance(i,j);
00074             index[offs].idx1=i;
00075             index[offs].idx2=j;
00076             offs++;                 //offs=i*(i+1)/2+j
00077         }
00078         SG_PROGRESS(i, 0, num-1);
00079     }
00080 
00081     CMath::qsort_index<DREAL,pair>(distances, index, (num-1)*num/2);
00082     //CMath::display_vector(distances, (num-1)*num/2, "dists");
00083 
00084     INT k=-1;
00085     INT l=0;
00086     for (; l<num && (num-l)>=merges && k<num_pairs-1; l++)
00087     {
00088         while (k<num_pairs-1)
00089         {
00090             k++;
00091 
00092             INT i=index[k].idx1;
00093             INT j=index[k].idx2;
00094             INT c1=assignment[i];
00095             INT c2=assignment[j];
00096 
00097             if (c1==c2)
00098                 continue;
00099             
00100             SG_PROGRESS(k, 0, num_pairs-1);
00101 
00102             if (c1<c2)
00103             {
00104                 pairs[2*l]=c1;
00105                 pairs[2*l+1]=c2;
00106             }
00107             else
00108             {
00109                 pairs[2*l]=c2;
00110                 pairs[2*l+1]=c1;
00111             }
00112             merge_distance[l]=distances[k];
00113 
00114             INT c=num+l;
00115             for (INT m=0; m<num; m++)
00116             {
00117                 if (assignment[m] == c1 || assignment[m] == c2)
00118                     assignment[m] = c;
00119             }
00120 #ifdef DEBUG_HIERARCHICAL
00121             SG_PRINT("l=%04i i=%04i j=%04i c1=%+04d c2=%+04d c=%+04d dist=%6.6f\n", l,i,j, c1,c2,c, merge_distance[l]);
00122 #endif
00123             break;
00124         }
00125     }
00126 
00127     assignment_size=num;
00128     table_size=l-1;
00129     ASSERT(table_size>0);
00130     delete[] distances;
00131     delete[] index;
00132     SG_UNREF(lhs)
00133 
00134     return true;
00135 }
00136 
00137 bool CHierarchical::load(FILE* srcfile)
00138 {
00139     return false;
00140 }
00141 
00142 bool CHierarchical::save(FILE* dstfile)
00143 {
00144     return false;
00145 }

SHOGUN Machine Learning Toolbox - Documentation