00001 /* +---------------------------------------------------------------------------+ 00002 | The Mobile Robot Programming Toolkit (MRPT) C++ library | 00003 | | 00004 | http://mrpt.sourceforge.net/ | 00005 | | 00006 | Copyright (C) 2005-2011 University of Malaga | 00007 | | 00008 | This software was written by the Machine Perception and Intelligent | 00009 | Robotics Lab, University of Malaga (Spain). | 00010 | Contact: Jose-Luis Blanco <jlblanco@ctima.uma.es> | 00011 | | 00012 | This file is part of the MRPT project. | 00013 | | 00014 | MRPT is free software: you can redistribute it and/or modify | 00015 | it under the terms of the GNU General Public License as published by | 00016 | the Free Software Foundation, either version 3 of the License, or | 00017 | (at your option) any later version. | 00018 | | 00019 | MRPT is distributed in the hope that it will be useful, | 00020 | but WITHOUT ANY WARRANTY; without even the implied warranty of | 00021 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 00022 | GNU General Public License for more details. | 00023 | | 00024 | You should have received a copy of the GNU General Public License | 00025 | along with MRPT. If not, see <http://www.gnu.org/licenses/>. | 00026 | | 00027 +---------------------------------------------------------------------------+ */ 00028 #ifndef mrpt_math_kmeans_H 00029 #define mrpt_math_kmeans_H 00030 00031 #include <mrpt/math/CMatrixTemplateNumeric.h> 00032 #include <mrpt/math/CMatrixFixedNumeric.h> 00033 00034 namespace mrpt 00035 { 00036 namespace math 00037 { 00038 namespace detail { 00039 // Auxiliary method: templatized for working with float/double's. 00040 template <typename SCALAR> 00041 double BASE_IMPEXP internal_kmeans( 00042 const bool use_kmeansplusplus_method, 00043 const size_t nPoints, 00044 const size_t k, 00045 const size_t dims, 00046 const SCALAR *points, 00047 const size_t attempts, 00048 SCALAR* out_center, 00049 int *out_assignments 00050 ); 00051 00052 // Auxiliary method, the actual code of the two front-end functions offered to the user below. 00053 template <class LIST_OF_VECTORS1,class LIST_OF_VECTORS2> 00054 double stub_kmeans( 00055 const bool use_kmeansplusplus_method, 00056 const size_t k, 00057 const LIST_OF_VECTORS1 & points, 00058 std::vector<int> &assignments, 00059 LIST_OF_VECTORS2 *out_centers, 00060 const size_t attempts 00061 ) 00062 { 00063 MRPT_START 00064 ASSERT_(k>=1) 00065 const size_t N = points.size(); 00066 assignments.resize(N); 00067 if (out_centers) out_centers->clear(); 00068 if (!N) 00069 return 0; // No points, we're done. 00070 // Parse to required format: 00071 size_t dims=0; 00072 const typename LIST_OF_VECTORS1::const_iterator it_first=points.begin(); 00073 const typename LIST_OF_VECTORS1::const_iterator it_end =points.end(); 00074 typedef typename LIST_OF_VECTORS1::value_type TInnerVector; 00075 typedef typename LIST_OF_VECTORS2::value_type TInnerVectorCenters; 00076 std::vector<typename TInnerVector::value_type> raw_vals; 00077 typename TInnerVector::value_type *trg_ptr=NULL; 00078 for (typename LIST_OF_VECTORS1::const_iterator it=it_first;it!=it_end;++it) 00079 { 00080 if (it==it_first) 00081 { 00082 dims = it->size(); 00083 ASSERTMSG_(dims>0,"Dimensionality of points can't be zero.") 00084 raw_vals.resize(N*dims); 00085 trg_ptr = &raw_vals[0]; 00086 } 00087 else 00088 { 00089 ASSERTMSG_(size_t(dims)==size_t(it->size()),"All points must have the same dimensionality.") 00090 } 00091 00092 ::memcpy(trg_ptr, &(*it)[0], dims*sizeof(typename TInnerVector::value_type)); 00093 trg_ptr+=dims; 00094 } 00095 // Call the internal implementation: 00096 std::vector<typename TInnerVectorCenters::value_type> centers(dims*k); 00097 const double ret = detail::internal_kmeans(false,N,k,points.begin()->size(),&raw_vals[0],attempts,¢ers[0],&assignments[0]); 00098 // Centers: 00099 if (out_centers) 00100 { 00101 const typename TInnerVectorCenters::value_type *center_ptr = ¢ers[0]; 00102 for (size_t i=0;i<k;i++) 00103 { 00104 TInnerVectorCenters c; 00105 c.resize(dims); 00106 for (size_t j=0;j<dims;j++) c[j]= *center_ptr++; 00107 out_centers->push_back(c); 00108 } 00109 } 00110 return ret; 00111 MRPT_END 00112 } 00113 } // end detail namespace 00114 00115 00116 /** @name k-means algorithms 00117 @{ */ 00118 00119 /** k-means algorithm to cluster a list of N points of arbitrary dimensionality into exactly K clusters. 00120 * The list of input points can be any template CONTAINER<POINT> with: 00121 * - CONTAINER can be: Any STL container: std::vector,std::list,std::deque,... 00122 * - POINT can be: 00123 * - std::vector<double/float> 00124 * - CArrayDouble<N> / CArrayFloat<N> 00125 * 00126 * \param k [IN] Number of cluster to look for. 00127 * \param points [IN] The list of N input points. It can be any STL-like containers of std::vector<float/double>, for example a std::vector<vector_double>, a std::list<vector_float>, etc... 00128 * \param assignments [OUT] At output it will have a number [0,k-1] for each of the N input points. 00129 * \param out_centers [OUT] If not NULL, at output will have the centers of each group. Can be of any of the supported types of "points", but the basic coordinates should be float or double exactly as in "points". 00130 * \param attempts [IN] Number of attempts. 00131 * 00132 * \sa A more advanced algorithm, see: kmeanspp 00133 * \note Uses the kmeans++ implementation by David Arthur (2009, http://www.stanford.edu/~darthur/kmpp.zip). 00134 */ 00135 template <class LIST_OF_VECTORS1,class LIST_OF_VECTORS2> 00136 inline double kmeans( 00137 const size_t k, 00138 const LIST_OF_VECTORS1 & points, 00139 std::vector<int> &assignments, 00140 LIST_OF_VECTORS2 *out_centers = NULL, 00141 const size_t attempts = 3 00142 ) 00143 { 00144 return detail::stub_kmeans(false /* standard method */, k,points,assignments,out_centers,attempts); 00145 } 00146 00147 /** k-means++ algorithm to cluster a list of N points of arbitrary dimensionality into exactly K clusters. 00148 * The list of input points can be any template CONTAINER<POINT> with: 00149 * - CONTAINER can be: Any STL container: std::vector,std::list,std::deque,... 00150 * - POINT can be: 00151 * - std::vector<double/float> 00152 * - CArrayDouble<N> / CArrayFloat<N> 00153 * 00154 * \param k [IN] Number of cluster to look for. 00155 * \param points [IN] The list of N input points. It can be any STL-like containers of std::vector<float/double>, for example a std::vector<vector_double>, a std::list<vector_float>, etc... 00156 * \param assignments [OUT] At output it will have a number [0,k-1] for each of the N input points. 00157 * \param out_centers [OUT] If not NULL, at output will have the centers of each group. Can be of any of the supported types of "points", but the basic coordinates should be float or double exactly as in "points". 00158 * \param attempts [IN] Number of attempts. 00159 * 00160 * \sa The standard kmeans algorithm, see: kmeans 00161 * \note Uses the kmeans++ implementation by David Arthur (2009, http://www.stanford.edu/~darthur/kmpp.zip). 00162 */ 00163 template <class LIST_OF_VECTORS1,class LIST_OF_VECTORS2> 00164 inline double kmeanspp( 00165 const size_t k, 00166 const LIST_OF_VECTORS1 & points, 00167 std::vector<int> &assignments, 00168 LIST_OF_VECTORS2 *out_centers = NULL, 00169 const size_t attempts = 3 00170 ) 00171 { 00172 return detail::stub_kmeans(true /* kmeans++ algorithm*/, k,points,assignments,out_centers,attempts); 00173 } 00174 00175 /** @} */ 00176 00177 } // End of MATH namespace 00178 } // End of namespace 00179 #endif
Page generated by Doxygen 1.7.3 for MRPT 0.9.4 SVN:exported at Tue Jan 25 21:56:31 UTC 2011 |