00001 00031 #include <itpp/stat/mog_diag_kmeans.h> 00032 #include <iostream> 00033 00034 namespace itpp 00035 { 00036 00037 inline double MOG_diag_kmeans_sup::dist(const double * x, const double * y) const 00038 { 00039 double acc = 0.0; 00040 for (int d = 0;d < D;d++) { double tmp = x[d] - y[d]; acc += tmp * tmp; } 00041 return(acc); 00042 } 00043 00044 void MOG_diag_kmeans_sup::assign_to_means() 00045 { 00046 00047 for (int k = 0;k < K;k++) c_count[k] = 0; 00048 00049 for (int n = 0;n < N;n++) { 00050 00051 int k = 0; 00052 double min_dist = dist(c_means[k], c_X[n]); 00053 int k_winner = k; 00054 00055 for (int k = 1;k < K;k++) { 00056 double tmp_dist = dist(c_means[k], c_X[n]); 00057 if (tmp_dist < min_dist) { min_dist = tmp_dist; k_winner = k; } 00058 } 00059 00060 c_partitions[ k_winner ][ count[k_winner] ] = n; 00061 c_count[k_winner]++; 00062 } 00063 } 00064 00065 00066 void MOG_diag_kmeans_sup::recalculate_means() 00067 { 00068 00069 for (int k = 0;k < K;k++) { 00070 00071 for (int d = 0;d < D;d++) c_tmpvec[d] = 0.0; 00072 00073 int Nk = c_count[k]; 00074 00075 for (int n = 0;n < Nk;n++) { 00076 double * x = c_X[ c_partitions[k][n] ]; 00077 for (int d = 0;d < D;d++) c_tmpvec[d] += x[d]; 00078 } 00079 00080 if (Nk > 0) { 00081 double * c_mean = c_means[k]; 00082 for (int d = 0;d < D;d++) c_mean[d] = c_tmpvec[d] / Nk; 00083 } 00084 } 00085 00086 } 00087 00088 00089 bool MOG_diag_kmeans_sup::dezombify_means() 00090 { 00091 00092 static int counter = 0; 00093 00094 bool zombie_mean = false; 00095 00096 int k = 0; 00097 int max_count = count[k]; 00098 int k_hog = k; 00099 00100 for (int k = 1;k < K;k++) if (c_count[k] > max_count) { max_count = c_count[k]; k_hog = k; } 00101 00102 for (int k = 0;k < K;k++) { 00103 if (c_count[k] == 0) { 00104 00105 zombie_mean = true; 00106 if (verbose) { 00107 it_warning("MOG_diag_kmeans_sup::dezombify_means(): detected zombie mean"); 00108 } 00109 if (k_hog == k) { 00110 it_warning("MOG_diag_kmeans_sup::dezombify_means(): weirdness: k_hog == k"); 00111 return(false); 00112 } 00113 00114 if (counter >= c_count[k_hog]) counter = 0; 00115 00116 double * c_mean = c_means[k]; 00117 double * c_x = c_X[ c_partitions[k_hog][counter] ]; 00118 00119 for (int d = 0;d < D;d++) c_mean[d] = 0.5 * (c_means[k_hog][d] + c_x[d]); 00120 counter++; 00121 } 00122 00123 } 00124 00125 if (zombie_mean) assign_to_means(); 00126 00127 return(true); 00128 } 00129 00130 00131 double MOG_diag_kmeans_sup::measure_change() const 00132 { 00133 00134 double tmp_dist = 0.0; 00135 for (int k = 0;k < K;k++) tmp_dist += dist(c_means[k], c_means_old[k]); 00136 return(tmp_dist); 00137 } 00138 00139 00140 void MOG_diag_kmeans_sup::initial_means() 00141 { 00142 00143 for (int d = 0;d < D;d++) c_tmpvec[d] = 0.0; 00144 00145 for (int n = 0;n < N;n++) { 00146 double * c_x = c_X[n]; 00147 for (int d = 0;d < D;d++) c_tmpvec[d] += c_x[d]; 00148 } 00149 00150 for (int d = 0;d < D;d++) c_tmpvec[d] /= N; 00151 00152 int step = int(floor(double(N) / double(K))); 00153 for (int k = 0;k < K;k++) { 00154 double * c_mean = c_means[k]; 00155 double * c_x = c_X[k*step]; 00156 00157 for (int d = 0;d < D;d++) c_mean[d] = 0.5 * (c_tmpvec[d] + c_x[d]); 00158 } 00159 } 00160 00161 00162 void MOG_diag_kmeans_sup::iterate() 00163 { 00164 00165 for (int k = 0;k < K;k++) for (int d = 0;d < D;d++) c_means_old[k][d] = c_means[k][d]; 00166 00167 for (int i = 0;i < max_iter;i++) { 00168 00169 assign_to_means(); 00170 if (!dezombify_means()) return; 00171 recalculate_means(); 00172 00173 double change = measure_change(); 00174 00175 if (verbose) std::cout << "MOG_diag_kmeans_sup::iterate(): iteration = " << i << " change = " << change << std::endl; 00176 if (change == 0) break; 00177 00178 for (int k = 0;k < K;k++) for (int d = 0;d < D;d++) c_means_old[k][d] = c_means[k][d]; 00179 } 00180 00181 } 00182 00183 00184 void MOG_diag_kmeans_sup::calc_means() 00185 { 00186 initial_means(); 00187 iterate(); 00188 } 00189 00190 00191 void MOG_diag_kmeans_sup::calc_covs() 00192 { 00193 00194 for (int k = 0;k < K;k++) { 00195 int Nk = c_count[k]; 00196 00197 if (Nk >= 2) { 00198 double * c_mean = c_means[k]; 00199 00200 for (int d = 0;d < D;d++) c_tmpvec[d] = 0.0; 00201 00202 for (int n = 0;n < Nk;n++) { 00203 double * c_x = c_X[ c_partitions[k][n] ]; 00204 for (int d = 0;d < D;d++) { double tmp = c_x[d] - c_mean[d]; c_tmpvec[d] += tmp * tmp; } 00205 } 00206 00207 for (int d = 0;d < D;d++) c_diag_covs[k][d] = trust * (c_tmpvec[d] / (Nk - 1.0)) + (1.0 - trust) * (1.0); 00208 } 00209 else { 00210 for (int d = 0;d < D;d++) c_diag_covs[k][d] = 1.0; 00211 } 00212 } 00213 00214 } 00215 00216 00217 void MOG_diag_kmeans_sup::calc_weights() 00218 { 00219 for (int k = 0;k < K;k++) c_weights[k] = trust * (c_count[k] / double(N)) + (1.0 - trust) * (1.0 / K); 00220 } 00221 00222 00223 void MOG_diag_kmeans_sup::normalise_vectors() 00224 { 00225 00226 for (int d = 0;d < D;d++) { 00227 double acc = 0.0; 00228 for (int n = 0;n < N;n++) acc += c_X[n][d]; 00229 c_norm_mu[d] = acc / N; 00230 } 00231 00232 for (int d = 0;d < D;d++) { 00233 double acc = 0.0; 00234 for (int n = 0;n < N;n++) { double tmp = c_X[n][d] - c_norm_mu[d]; acc += tmp * tmp; } 00235 c_norm_sd[d] = std::sqrt(acc / (N - 1)); 00236 } 00237 00238 for (int n = 0;n < N;n++) for (int d = 0;d < D;d++) { 00239 c_X[n][d] -= c_norm_mu[d]; 00240 if (c_norm_sd[d] > 0.0) c_X[n][d] /= c_norm_sd[d]; 00241 } 00242 } 00243 00244 00245 void MOG_diag_kmeans_sup::unnormalise_vectors() 00246 { 00247 00248 for (int n = 0;n < N;n++) for (int d = 0;d < D;d++) { 00249 if (c_norm_sd[d] > 0.0) c_X[n][d] *= c_norm_sd[d]; 00250 c_X[n][d] += c_norm_mu[d]; 00251 } 00252 } 00253 00254 00255 void MOG_diag_kmeans_sup::unnormalise_means() 00256 { 00257 00258 for (int k = 0;k < K;k++) for (int d = 0;d < D;d++) { 00259 if (norm_sd[d] > 0.0) c_means[k][d] *= c_norm_sd[d]; 00260 c_means[k][d] += norm_mu[d]; 00261 } 00262 } 00263 00264 00265 void MOG_diag_kmeans_sup::run(MOG_diag &model_in, Array<vec> &X_in, int max_iter_in, double trust_in, bool normalise_in, bool verbose_in) 00266 { 00267 00268 it_assert(model_in.is_valid(), "MOG_diag_kmeans_sup::run(): given model is not valid"); 00269 it_assert((max_iter_in > 0), "MOG_diag_kmeans_sup::run(): 'max_iter' needs to be greater than zero"); 00270 it_assert(((trust_in >= 0.0) && (trust_in <= 1.0)), "MOG_diag_kmeans_sup::run(): 'trust' must be between 0 and 1 (inclusive)"); 00271 00272 verbose = verbose_in; 00273 00274 Array<vec> means_in = model_in.get_means(); 00275 Array<vec> diag_covs_in = model_in.get_diag_covs(); 00276 vec weights_in = model_in.get_weights(); 00277 00278 init(means_in, diag_covs_in, weights_in); 00279 00280 means_in.set_size(0); 00281 diag_covs_in.set_size(0); 00282 weights_in.set_size(0); 00283 00284 it_assert(check_size(X_in), "MOG_diag_kmeans_sup::run(): 'X' is empty or contains vectors of wrong dimensionality"); 00285 00286 N = X_in.size(); 00287 00288 if (K > N) { 00289 it_warning("MOG_diag_kmeans_sup::run(): K > N"); 00290 } 00291 else { 00292 if (K > N / 10) { 00293 it_warning("MOG_diag_kmeans_sup::run(): K > N/10"); 00294 } 00295 } 00296 00297 max_iter = max_iter_in; 00298 trust = trust_in; 00299 00300 means_old.set_size(K); 00301 for (int k = 0;k < K;k++) means_old(k).set_size(D); 00302 partitions.set_size(K); 00303 for (int k = 0;k < K;k++) partitions(k).set_size(N); 00304 count.set_size(K); 00305 tmpvec.set_size(D); 00306 norm_mu.set_size(D); 00307 norm_sd.set_size(D); 00308 00309 c_X = enable_c_access(X_in); 00310 c_means_old = enable_c_access(means_old); 00311 c_partitions = enable_c_access(partitions); 00312 c_count = enable_c_access(count); 00313 c_tmpvec = enable_c_access(tmpvec); 00314 c_norm_mu = enable_c_access(norm_mu); 00315 c_norm_sd = enable_c_access(norm_sd); 00316 00317 if (normalise_in) normalise_vectors(); 00318 00319 calc_means(); 00320 if (normalise_in) { unnormalise_vectors(); unnormalise_means(); } 00321 calc_covs(); 00322 calc_weights(); 00323 00324 model_in.init(means, diag_covs, weights); 00325 00326 disable_c_access(c_X); 00327 disable_c_access(c_means_old); 00328 disable_c_access(c_partitions); 00329 disable_c_access(c_count); 00330 disable_c_access(c_tmpvec); 00331 disable_c_access(c_norm_mu); 00332 disable_c_access(c_norm_sd); 00333 00334 means_old.set_size(0); 00335 partitions.set_size(0); 00336 count.set_size(0); 00337 tmpvec.set_size(0); 00338 norm_mu.set_size(0); 00339 norm_sd.set_size(0); 00340 00341 cleanup(); 00342 00343 } 00344 00345 // 00346 // convenience functions 00347 00348 void MOG_diag_kmeans(MOG_diag &model_in, Array<vec> &X_in, int max_iter_in, double trust_in, bool normalise_in, bool verbose_in) 00349 { 00350 MOG_diag_kmeans_sup km; 00351 km.run(model_in, X_in, max_iter_in, trust_in, normalise_in, verbose_in); 00352 } 00353 00354 00355 } 00356
Generated on Wed Jan 20 23:03:06 2010 for IT++ by Doxygen 1.6.2