Hierarchical.cpp
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include "clustering/Hierarchical.h"
00012 #include "distance/Distance.h"
00013 #include "features/Labels.h"
00014 #include "features/RealFeatures.h"
00015 #include "lib/Mathematics.h"
00016 #include "base/Parallel.h"
00017
00018 #ifndef WIN32
00019 #include <pthread.h>
00020 #endif
00021
00022 CHierarchical::CHierarchical()
00023 : CDistanceMachine(), merges(3), dimensions(0), assignment(NULL),
00024 table_size(0), pairs(NULL), merge_distance(NULL)
00025 {
00026 }
00027
00028 CHierarchical::CHierarchical(INT merges_, CDistance* d)
00029 : CDistanceMachine(), merges(merges_), dimensions(0), assignment(NULL),
00030 table_size(0), pairs(NULL), merge_distance(NULL)
00031 {
00032 set_distance(d);
00033 }
00034
00035 CHierarchical::~CHierarchical()
00036 {
00037 delete[] merge_distance;
00038 delete[] assignment;
00039 delete[] pairs;
00040 }
00041
00042 bool CHierarchical::train()
00043 {
00044 ASSERT(distance);
00045 CFeatures* lhs=distance->get_lhs();
00046 ASSERT(lhs);
00047
00048 INT num=lhs->get_num_vectors();
00049 ASSERT(num>0);
00050
00051 const INT num_pairs=num*(num-1)/2;
00052
00053 delete[] merge_distance;
00054 merge_distance=new DREAL[num];
00055 CMath::fill_vector(merge_distance, num, -1.0);
00056
00057 delete[] assignment;
00058 assignment=new INT[num];
00059 CMath::range_fill_vector(assignment, num);
00060
00061 delete[] pairs;
00062 pairs=new INT[2*num];
00063 CMath::fill_vector(pairs, 2*num, -1);
00064
00065 pair* index=new pair[num_pairs];
00066 DREAL* distances=new DREAL[num_pairs];
00067
00068 INT offs=0;
00069 for (INT i=0; i<num; i++)
00070 {
00071 for (INT j=i+1; j<num; j++)
00072 {
00073 distances[offs]=distance->distance(i,j);
00074 index[offs].idx1=i;
00075 index[offs].idx2=j;
00076 offs++;
00077 }
00078 SG_PROGRESS(i, 0, num-1);
00079 }
00080
00081 CMath::qsort_index<DREAL,pair>(distances, index, (num-1)*num/2);
00082
00083
00084 INT k=-1;
00085 INT l=0;
00086 for (; l<num && (num-l)>=merges && k<num_pairs-1; l++)
00087 {
00088 while (k<num_pairs-1)
00089 {
00090 k++;
00091
00092 INT i=index[k].idx1;
00093 INT j=index[k].idx2;
00094 INT c1=assignment[i];
00095 INT c2=assignment[j];
00096
00097 if (c1==c2)
00098 continue;
00099
00100 SG_PROGRESS(k, 0, num_pairs-1);
00101
00102 if (c1<c2)
00103 {
00104 pairs[2*l]=c1;
00105 pairs[2*l+1]=c2;
00106 }
00107 else
00108 {
00109 pairs[2*l]=c2;
00110 pairs[2*l+1]=c1;
00111 }
00112 merge_distance[l]=distances[k];
00113
00114 INT c=num+l;
00115 for (INT m=0; m<num; m++)
00116 {
00117 if (assignment[m] == c1 || assignment[m] == c2)
00118 assignment[m] = c;
00119 }
00120 #ifdef DEBUG_HIERARCHICAL
00121 SG_PRINT("l=%04i i=%04i j=%04i c1=%+04d c2=%+04d c=%+04d dist=%6.6f\n", l,i,j, c1,c2,c, merge_distance[l]);
00122 #endif
00123 break;
00124 }
00125 }
00126
00127 assignment_size=num;
00128 table_size=l-1;
00129 ASSERT(table_size>0);
00130 delete[] distances;
00131 delete[] index;
00132 SG_UNREF(lhs)
00133
00134 return true;
00135 }
00136
00137 bool CHierarchical::load(FILE* srcfile)
00138 {
00139 return false;
00140 }
00141
00142 bool CHierarchical::save(FILE* dstfile)
00143 {
00144 return false;
00145 }