BALL  1.4.79
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
poseClustering.h
Go to the documentation of this file.
1 // -*- Mode: C++; tab-width: 2; -*-
2 // vi: set ts=2:
3 //
4 
5 #ifndef BALL_DOCKING_POSECLUSTERING_H
6 #define BALL_DOCKING_POSECLUSTERING_H
7 
8 #ifndef BALL_DATATYPE_OPTIONS_H
9 # include <BALL/DATATYPE/options.h>
10 #endif
11 
12 #ifndef BALL_DOCKING_COMMON_CONFORMATIONSET_H
14 #endif
15 
16 #ifndef BALL_MOLMEC_COMMON_SNAPSHOT_H
18 #endif
19 
20 #ifndef BALL_STRUCTURE_ATOMBIJECTION_H
22 #endif
23 
24 #ifndef BALL_KERNEL_SYSTEM_H
25 # include <BALL/KERNEL/system.h>
26 #endif
27 
28 #ifndef BALL_DATATYPE_STRING_H
29 # include <BALL/DATATYPE/string.h>
30 #endif
31 
32 #ifndef BALL_MATHS_VECTOR3_H
33 # include <BALL/MATHS/vector3.h>
34 #endif
35 
36 #ifndef BALL_MATHS_MATRIX44_H
37 # include <BALL/MATHS/matrix44.h>
38 #endif
39 
40 #include <Eigen/Core>
41 
42 #include <boost/shared_ptr.hpp>
43 #include <boost/variant.hpp>
44 #include <boost/graph/adjacency_list.hpp>
45 
46 #include <set>
47 #include <iostream>
48 
49 //#define POSECLUSTERING_DEBUG 1
50 #undef POSECLUSTERING_DEBUG
51 
52 namespace BALL
53 {
109  {
110  public:
114  struct BALL_EXPORT Option
116  {
119  static const String CLUSTER_METHOD;
120 
124 
128 
131  static const String RMSD_TYPE;
132 
135  //static const String FULL_CLUSTER_DENDOGRAM;
136  };
137 
140  {
141  static const Index CLUSTER_METHOD;
142  static const float DISTANCE_THRESHOLD;
144  static const Index RMSD_TYPE;
145  static const bool USE_CENTER_OF_MASS_PRECLINK;
146  //static const bool FULL_CLUSTER_DENDOGRAM;
147  };
148 
149  enum BALL_EXPORT RMSDType
150  {
152  RIGID_RMSD,
153  CENTER_OF_MASS_DISTANCE
154  };
155 
156  enum BALL_EXPORT RMSDLevelOfDetail
157  {
158  C_ALPHA, //=0
159  HEAVY_ATOMS,
160  BACKBONE,
161  ALL_ATOMS,
162  PROPERTY_BASED_ATOM_BIJECTION
163  };
164 
165  enum BALL_EXPORT ClusterMethod
166  {
168  SLINK_SIBSON,
169  CLINK_DEFAYS,
170  NEAREST_NEIGHBOR_CHAIN_WARD
171  };
172 
174  {
175  public:
177 
178  RigidTransformation(Eigen::Vector3f const& new_trans, Eigen::Matrix3f const& new_rot)
179  : translation(new_trans),
180  rotation(new_rot)
181  {}
182 
183  Eigen::Vector3f translation;
184  Eigen::Matrix3f rotation;
185  };
186 
192  {
193  public:
194  PosePointer(RigidTransformation const* new_trafo, SnapShot const* new_snap = 0)
195  : trafo(new_trafo),
196  snap(new_snap)
197  { }
198 
199  PosePointer(SnapShot const* new_snap)
200  : trafo(0),
201  snap(new_snap)
202  { }
203 
205  SnapShot const* snap;
206  };
207 
209  {
210  public:
213  std::set<Index> poses;
214 
218 
227  boost::variant<Eigen::VectorXf, RigidTransformation> center;
228 
231  float merged_at;
232 #ifdef POSECLUSTERING_DEBUG
233 
235  float current_cluster_id;
236 #endif
237  };
238 
239  typedef boost::adjacency_list<boost::vecS,
240  boost::vecS,
241  boost::directedS,
243 
244  typedef ClusterTree::vertex_descriptor ClusterTreeNode;
245 
247 
250 
252  PoseClustering();
253 
256  PoseClustering(ConformationSet* poses, float rmsd);
257 
259  PoseClustering(System const& base_system, String transformation_file_name);
260 
262  virtual ~PoseClustering();
264 
265 
268 
271  bool compute();
272 
274 
278 
280  void setConformationSet(ConformationSet* new_set);
281 
286  void setBaseSystemAndPoses(System const& base_system, std::vector<PosePointer> const& poses);
287 
290  // will be applied upon the current conformation
291  void setBaseSystemAndTransformations(System const& base_system, String transformation_file_name);
292 
294  const ConformationSet* getConformationSet() const {return current_set_;}
295 
297  ConformationSet* getConformationSet() {return current_set_;}
298 
300  const System& getSystem() const;
301 
303  System& getSystem();
304 
306  Size getNumberOfPoses() const {return poses_.size();}
307 
309  Size getNumberOfClusters() const {return clusters_.size();}
310 
313  const std::set<Index>& getCluster(Index i) const;
314 
317  std::set<Index>& getCluster(Index i);
318 
320  Size getClusterSize(Index i) const;
321 
323  float getClusterScore(Index i) const;
324 
326  float getScore(const System sys_a, const System sys_b, Options options) const;
327 
329  boost::shared_ptr<System> getPose(Index i) const;
330 
332  boost::shared_ptr<System> getClusterRepresentative(Index i) const;
333 
335  boost::shared_ptr<ConformationSet> getClusterConformationSet(Index i);
336 
338  boost::shared_ptr<ConformationSet> getReducedConformationSet();
339 
353  bool refineClustering(Options const& refined_options);
354 
356 
360  Options options;
362 
365  void setDefaultOptions();
366 
368 
371 
377  static float getRigidRMSD(Eigen::Vector3f const& t_ab, Eigen::Matrix3f const& M_ab, Eigen::Matrix3f const& covariance_matrix);
378 
384  static float getSquaredRigidRMSD(Eigen::Vector3f const& t_ab, Eigen::Matrix3f const& M_ab, Eigen::Matrix3f const& covariance_matrix);
385 
388  static Eigen::Matrix3f computeCovarianceMatrix(System const& system, Index rmsd_level_of_detail = C_ALPHA);
389 
391 
394 
400  std::vector<std::set<Index> > extractClustersForThreshold(float threshold);
401 
405  std::vector<std::set<Index> > extractNBestClusters(Size n);
406 
409  void exportWardClusterTree(std::ostream& out);
410 
412 
413 
416  void printClusters(std::ostream& out = std::cout) const;
417 
420  void printClusterRMSDs(std::ostream& out = std::cout);
421 
422 
423  protected:
424 
428  {
429  public:
430  ClusterTreeWriter_(ClusterTree const* cluster_tree)
431  : cluster_tree_(cluster_tree)
432  { }
433 
434  void operator() (std::ostream& out, const ClusterTreeNode& v) const;
435 
436  protected:
438  };
439 
443  {
444  public:
446  : cluster_tree_(&cluster_tree)
447  {}
448 
449  bool operator() (ClusterTreeNode const first, ClusterTreeNode const second) const
450  {
451  float first_value = (*cluster_tree_)[ first].merged_at;
452  float second_value = (*cluster_tree_)[second].merged_at;
453 
454  return first_value < second_value;
455  }
456 
457  protected:
459  };
460 
461  // trivial complete linkage implementation
462  // with O(n^2) space request
463  bool trivialCompute_();
464 
465  // space efficient (SLINK or CLINK) clustering
466  bool linearSpaceCompute_();
467 
468  // implementation of a single linkage clustering as described in
469  // R. Sibson: SLINK: an optimally efficient algorithm for the single-link cluster method.
470  // The Computer Journal. 16, 1, British Computer Society, 1973, p. 30-34
471  void slinkInner_(int current_level);
472 
473  // implementation of a complete linkage clustering as described in
474  // D. Defays: An efficient algorithm for a complete link method.
475  // The Computer Journal. 20, 4, British Computer Society, 1977, p. 364-366.
476  void clinkInner_(int current_level);
477 
478  // implememtation of the nearest neighbor chain clustering algorithm
479  // as described in:
480  // Murtagh, Fionn (1983): "A survey of recent advances in hierarchical clustering algorithms",
481  // The Computer Journal 26 (4): 354–359
482  bool nearestNeighborChainCompute_();
483 
484  void initWardDistance_(Index rmsd_type);
485 
486  void updateWardDistance_(ClusterTreeNode parent, ClusterTreeNode i, ClusterTreeNode j, Index rmsd_type);
487 
488  float computeWardDistance_(ClusterTreeNode i, ClusterTreeNode j, Index rmsd_type);
489 
490  std::set<Index> collectClusterBelow_(ClusterTreeNode const& v);
491 
492  // compute the center of masses
493  void computeCenterOfMasses_();
494 
495  // precompute an atom bijection for faster access
496  void precomputeAtomBijection_();
497 
498  // check the given atom wrt choice of option RMSD_LEVEL_OF_DETAIL
499  bool static isExcludedByLevelOfDetail_(Atom const* atom, Index rmsd_level_of_detail);
500 
501  // distance between cluster i and cluster j
502  float getClusterRMSD_(Index i, Index j, Index rmsd_type);
503 
504  // reads the poses given as transformations from a file
505  // Note: the previously given system will be taken
506  // as untransformed reference, e.g. all transformations
507  // will be applied upon the current conformation
508  bool readTransformationsFromFile_(String filename);
509 
510  // compute the RMSD between two "poses"
511  float getRMSD_(Index i, Index j, Index rmsd_type);
512 
513  // store pointers to the snapshots in the poses vector
514  void storeSnapShotReferences_();
515 
516  //
517  void convertTransformations2Snaphots_();
518 
519  //
520  void convertSnaphots2Transformations_();
521 
522  //
523  void printCluster_(Index i, std::ostream& out = std::cout) const;
524 
525  //
526  void printVariables_(int a, int b, double c, int d, double e, int current_level);
527 
528  //
529  void clear_();
530 
531  // only used by trivial clustering
532  Eigen::MatrixXd pairwise_scores_;
533 
536 
538  std::vector< std::set<Index> > clusters_;
539 
541  std::vector< float > cluster_scores_;
542 
545 
546 
547  // ----- unified access to the poses, independent of their type
548  // (the poses are either stored in transformations_, or in current_set_)
549  std::vector<PosePointer> poses_;
550 
551  // ----- data structure for transformation input (instead of snapshots)
552  std::vector<RigidTransformation> transformations_;
553 
554  Eigen::Matrix3f covariance_matrix_;
555 
556  // TODO: maybe use a const - ptr instead?
558 
559  // the reference state
561 
562  // flag indicating the use of transformation as input
564 
565  // do we need to delete the conformation set, that was
566  // created by converting transformations to snapshots
568 
569  // ------ data structures for slink and clink
570 
571  // stores the distance at which this indexed element has longer
572  // the largest index of its cluster
573  std::vector<double> lambda_;
574 
575  // the index of the cluster representative at merge-time
576  // (element with largest index)
577  std::vector<int> pi_;
578 
579  std::vector<double> mu_;
580 
581 
582  // ----- data structures for nearest neighbor chain ward
584 
585  // ----- data structure for CENTER_OF_GRAVITY_CLINK
586 
587  // the geometric center of mass
588  std::vector<Vector3> com_;
589 
590  // ----- general data structures
591 
592  // We cache the atom bijection for faster
593  // RMSD computation; this is possible, since the system topology does
594  // not change
596 
597  // helper dummies to speed up snapshot application
600 
603  // and its root
605  }; //class PoseClustering
606 } //namesspace BALL
607 
608 #endif // BALL_DOCKING_POSECLUSTERING_H
#define BALL_CREATE(name)
Definition: create.h:62
std::vector< double > mu_
std::vector< double > lambda_
static const String RMSD_TYPE
static const Index RMSD_TYPE
RIGID_RMSD
CLINK_DEFAYS
std::vector< std::set< Index > > clusters_
the clusters: sets of pose indices
std::vector< Vector3 > com_
static const String CLUSTER_METHOD
RigidTransformation(Eigen::Vector3f const &new_trans, Eigen::Matrix3f const &new_rot)
TRIVIAL_COMPLETE_LINKAGE
Eigen::Matrix3f covariance_matrix_
static const float DISTANCE_THRESHOLD
std::vector< int > pi_
SLINK_SIBSON
const ConformationSet * getConformationSet() const
returns the poses to be clustered as ConformationSet
ClusterTreeWriter_(ClusterTree const *cluster_tree)
ClusterTreeNodeComparator(ClusterTree &cluster_tree)
Index rmsd_level_of_detail_
the RMSD definition used for clustering
RigidTransformation const * trafo
HEAVY_ATOMS
PosePointer(RigidTransformation const *new_trafo, SnapShot const *new_snap=0)
ClusterTreeNode cluster_tree_root_
BALL_EXTERN_VARIABLE const double c
Definition: constants.h:149
static const Index CLUSTER_METHOD
BACKBONE
boost::variant< Eigen::VectorXf, RigidTransformation > center
Eigen::MatrixXd pairwise_scores_
Computation of clusters of docking poses.
ConformationSet * current_set_
the ConformationSet we wish to cluster
static const String DISTANCE_THRESHOLD
Size getNumberOfClusters() const
returns the number of clusters found
Size getNumberOfPoses() const
returns the number of poses
static const String RMSD_LEVEL_OF_DETAIL
SNAPSHOT_RMSD
PosePointer(SnapShot const *new_snap)
ConformationSet * getConformationSet()
returns the poses to be clustered as ConformationSet
boost::adjacency_list< boost::vecS, boost::vecS, boost::directedS, ClusterProperties > ClusterTree
std::vector< float > cluster_scores_
the scores of the clusters
std::vector< PosePointer > poses_
static const Index RMSD_LEVEL_OF_DETAIL
static const bool USE_CENTER_OF_MASS_PRECLINK
C_ALPHA
ClusterTree cluster_tree_
The tree built during hierarchical clustering.
ALL_ATOMS
std::vector< RigidTransformation > transformations_
Default values for options.
ClusterTree::vertex_descriptor ClusterTreeNode
#define BALL_EXPORT
Definition: COMMON/global.h:50
AtomBijection atom_bijection_