IMP logo
IMP Reference Guide  develop.ba5ef9f91e,2024/02/26
The Integrative Modeling Platform
gmm_tools.py
1 """@namespace IMP.isd.gmm_tools
2  Tools for handling Gaussian Mixture Models.
3 """
4 
5 from __future__ import print_function
6 import IMP
7 import IMP.core
8 import IMP.algebra
9 import IMP.atom
10 import IMP.em
11 import numpy as np
12 import numpy.linalg
13 import itertools
14 
15 try:
16  import sklearn.mixture # noqa: F401
17  nosklearn = False
18 except ImportError:
19  nosklearn = True
20 from math import exp, sqrt
21 
22 
23 def decorate_gmm_from_text(in_fn, ps, mdl, transform=None, radius_scale=1.0,
24  mass_scale=1.0):
25  """read the output from write_gmm_to_text, decorate as Gaussian and Mass"""
26  ncomp = 0
27  added_ps = itertools.count(1)
28  with open(in_fn, 'r') as inf:
29  for line in inf:
30  if line[0] != '#':
31  if ncomp > len(ps) - 1:
32  ps.append(IMP.Particle(mdl, "GMM%d" % next(added_ps)))
33  p = ps[ncomp]
34  fields = line.split('|')
35  weight = float(fields[2])
36  center = list(map(float, fields[3].split()))
37  covar = np.array(
38  list(map(float, fields[4].split()))).reshape((3, 3))
40  covar.tolist(), IMP.algebra.Vector3D(center))
42  g = IMP.core.Gaussian.setup_particle(ps[ncomp], shape)
43  else:
44  g = IMP.core.Gaussian(ps[ncomp])
45  g.set_gaussian(shape)
47  IMP.atom.Mass.setup_particle(p, weight * mass_scale)
48  else:
49  IMP.atom.Mass(p).set_mass(weight * mass_scale)
50  rmax = sqrt(max(g.get_variances())) * radius_scale
51  if not IMP.core.XYZR.get_is_setup(ps[ncomp]):
52  IMP.core.XYZR.setup_particle(ps[ncomp], rmax)
53  else:
54  IMP.core.XYZR(ps[ncomp]).set_radius(rmax)
55  if transform is not None:
57  transform)
58  ncomp += 1
59 
60 
61 def write_gmm_to_text(ps, out_fn, comments=[]):
62  """write a list of gaussians to text. must be decorated as Gaussian
63  and Mass"""
64  print('will write GMM text to', out_fn)
65  with open(out_fn, 'w') as outf:
66  for comment in comments:
67  outf.write('# ' + comment + '\n')
68  outf.write('#|num|weight|mean|covariance matrix|\n')
69  for ng, g in enumerate(ps):
70  shape = IMP.core.Gaussian(g).get_gaussian()
71  weight = IMP.atom.Mass(g).get_mass()
72  covar = [c for row in IMP.algebra.get_covariance(shape)
73  for c in row]
74  mean = list(shape.get_center())
75  fm = [ng, weight] + mean + covar
76  outf.write('|{}|{}|{} {} {}|'
77  '{} {} {} {} {} {} {} {} {}|\n'.format(*fm))
78 
79 
80 def gmm2map(to_draw, voxel_size, bounding_box=None, origin=None, fast=False,
81  factor=2.5):
82  if type(to_draw[0]) in (IMP.Particle, IMP.atom.Hierarchy,
84  ps = to_draw
85  elif type(to_draw[0]) is IMP.core.Gaussian:
86  ps = [g.get_particle() for g in to_draw]
87  else:
88  print('ps must be Particles or Gaussians')
89  return
90  if bounding_box is None:
91  if len(ps) > 1:
93  [IMP.core.XYZ(p).get_coordinates() for p in ps])
94  s2 = IMP.algebra.Sphere3D(s.get_center(), s.get_radius() * 3)
95  else:
96  g = IMP.core.Gaussian(ps[0]).get_gaussian()
97  s2 = IMP.algebra.Sphere3D(g.get_center(),
98  max(g.get_variances()) * 3)
99  bounding_box = IMP.algebra.get_bounding_box(s2)
100  shapes = []
101  weights = []
102  for p in ps:
103  shapes.append(IMP.core.Gaussian(p).get_gaussian())
104  weights.append(IMP.atom.Mass(p).get_mass())
105  print('rasterizing')
106  if fast:
107  grid = IMP.algebra.get_rasterized_fast(shapes, weights, voxel_size,
108  bounding_box, factor)
109  else:
110  grid = IMP.algebra.get_rasterized(shapes, weights, voxel_size,
111  bounding_box)
112  print('creating map')
113  d1 = IMP.em.create_density_map(grid)
114  if origin is not None:
115  d1.set_origin(origin)
116  return d1
117 
118 
119 def write_gmm_to_map(to_draw, out_fn, voxel_size, bounding_box=None,
120  origin=None, fast=False, factor=2.5):
121  """write density map from GMM. input can be either particles
122  or gaussians"""
123  d1 = gmm2map(to_draw, voxel_size, bounding_box, origin, fast)
124  print('will write GMM map to', out_fn)
125  IMP.em.write_map(d1, out_fn, IMP.em.MRCReaderWriter())
126  del d1
127 
128 
129 def write_sklearn_gmm_to_map(gmm, out_fn, apix=0, bbox=None, dmap_model=None):
130  """write density map directly from sklearn GMM (kinda slow) """
131  # create density
132  if dmap_model is not None:
133  d1 = IMP.em.create_density_map(dmap_model)
134  else:
135  d1 = IMP.em.create_density_map(bbox, apix)
136 
137  # fill it with values from the GMM
138  print('getting coords')
139  nvox = d1.get_number_of_voxels()
140  apos = [list(d1.get_location_by_voxel(nv)) for nv in range(nvox)]
141 
142  print('scoring')
143  scores = gmm.score(apos)
144 
145  print('assigning')
146  for nv, score in enumerate(scores):
147  d1.set_value(nv, exp(score))
148  print('will write GMM map to', out_fn)
149  IMP.em.write_map(d1, out_fn, IMP.em.MRCReaderWriter())
150 
151 
152 def draw_points(pts, out_fn,
154  use_colors=False):
155  """ given some points (and optional transform), write them to chimera
156  'bild' format; colors flag only applies to ellipses, otherwise it'll
157  be weird"""
158  with open(out_fn, 'w') as outf:
159  # print 'will draw',len(pts),'points'
160  # write first point in red
161  pt = trans.get_transformed(IMP.algebra.Vector3D(pts[0]))
162  start = 0
163  if use_colors:
164  outf.write('.color 1 0 0\n.dotat %.2f %.2f %.2f\n'
165  % (pt[0], pt[1], pt[2]))
166  start = 1
167 
168  # write remaining points in green
169  if use_colors:
170  outf.write('.color 0 1 0\n')
171  colors = ['0 1 0', '0 0 1', '0 1 1']
172  for nt, t in enumerate(pts[start:]):
173  if use_colors and nt % 2 == 0:
174  outf.write('.color %s\n' % colors[nt / 2])
175  pt = trans.get_transformed(IMP.algebra.Vector3D(t))
176  outf.write('.dotat %.2f %.2f %.2f\n' % (pt[0], pt[1], pt[2]))
177 
178 
179 def create_gmm_for_bead(mdl,
180  particle,
181  n_components,
182  sampled_points=100000,
183  num_iter=100):
184  print('fitting bead with', n_components, 'gaussians')
185  dmap = IMP.em.SampledDensityMap([particle], 1.0, 1.0,
187  IMP.em.SPHERE)
188  IMP.em.write_map(dmap, 'test_intermed.mrc')
189  pts = IMP.isd.sample_points_from_density(dmap, sampled_points)
190  draw_points(pts, 'pts.bild')
191  density_particles = []
192  fit_gmm_to_points(pts, n_components, mdl, density_particles, num_iter,
193  'full',
194  mass_multiplier=IMP.atom.Mass(particle).get_mass())
195  return density_particles, dmap
196 
197 
198 def sample_and_fit_to_particles(model,
199  fragment_particles,
200  num_components,
201  sampled_points=1000000,
202  simulation_res=0.5,
203  voxel_size=1.0,
204  num_iter=100,
205  covariance_type='full',
206  multiply_by_total_mass=True,
207  output_map=None,
208  output_txt=None):
209  density_particles = []
210  if multiply_by_total_mass:
211  mass_multiplier = sum((IMP.atom.Mass(p).get_mass()
212  for p in set(fragment_particles)))
213  print('add_component_density: will multiply by mass', mass_multiplier)
214 
215  # simulate density from ps, then calculate points to fit
216  print('add_component_density: sampling points')
218  fragment_particles, simulation_res, voxel_size,
220  dmap.calcRMS()
221  pts = IMP.isd.sample_points_from_density(dmap, sampled_points)
222 
223  # fit GMM
224  print('add_component_density: fitting GMM to', len(pts), 'points')
225  fit_gmm_to_points(points=pts,
226  n_components=num_components,
227  mdl=model,
228  ps=density_particles,
229  num_iter=num_iter,
230  covariance_type=covariance_type,
231  mass_multiplier=mass_multiplier)
232 
233  if output_txt is not None:
234  write_gmm_to_text(density_particles, output_txt)
235  if output_map is not None:
236  write_gmm_to_map(to_draw=density_particles,
237  out_fn=output_map,
238  voxel_size=voxel_size,
239  bounding_box=IMP.em.get_bounding_box(dmap))
240 
241  return density_particles
242 
243 
244 def fit_gmm_to_points(points,
245  n_components,
246  mdl,
247  ps=[],
248  num_iter=100,
249  covariance_type='full',
250  min_covar=0.001,
251  init_centers=[],
252  force_radii=-1.0,
253  force_weight=-1.0,
254  mass_multiplier=1.0):
255  """fit a GMM to some points. Will return the score and the Akaike score.
256  Akaike information criterion for the current model fit. It is a measure
257  of the relative quality of the GMM that takes into account the
258  parsimony and the goodness of the fit.
259  if no particles are provided, they will be created
260 
261  points: list of coordinates (python)
262  n_components: number of gaussians to create
263  mdl: IMP Model
264  ps: list of particles to be decorated. if empty, will add
265  num_iter: number of EM iterations
266  covariance_type: covar type for the gaussians. options: 'full',
267  'diagonal', 'spherical'
268  min_covar: assign a minimum value to covariance term. That is used
269  to have more spherical shaped gaussians
270  init_centers: initial coordinates of the GMM
271  force_radii: fix the radii (spheres only)
272  force_weight: fix the weights
273  mass_multiplier: multiply the weights of all the gaussians by this value
274  dirichlet: use the DGMM fitting (can reduce number of components,
275  takes longer)
276  """
277 
278  new_sklearn = False
279  try:
280  from sklearn.mixture import GMM
281  except ImportError:
282  from sklearn.mixture import GaussianMixture
283  new_sklearn = True
284 
285  print('creating GMM with n_components', n_components, 'n_iter', num_iter,
286  'covar type', covariance_type)
287  if new_sklearn:
288  # aic() calls size() on points, so it needs to be
289  # a numpy array, not a list
290  points = np.array(points)
291  weights_init = precisions_init = None
292  if force_radii != -1.0:
293  print('warning: radii can no longer be forced, but setting '
294  'initial values to ', force_radii)
295  precisions_init = np.array([[1. / force_radii] * 3
296  for i in range(n_components)])
297  if force_weight != -1.0:
298  print('warning: weights can no longer be forced, but setting '
299  'initial values to ', force_weight)
300  weights_init = np.array([force_weight] * n_components)
301 
302  gmm = GaussianMixture(n_components=n_components,
303  max_iter=num_iter,
304  covariance_type=covariance_type,
305  weights_init=weights_init,
306  precisions_init=precisions_init,
307  means_init=None if init_centers == []
308  else init_centers)
309  else:
310  params = 'm'
311  init_params = 'm'
312  if force_radii == -1.0:
313  params += 'c'
314  init_params += 'c'
315  else:
316  covariance_type = 'spherical'
317  print('forcing spherical with radii', force_radii)
318 
319  if force_weight == -1.0:
320  params += 'w'
321  init_params += 'w'
322  else:
323  print('forcing weights to be', force_weight)
324 
325  gmm = GMM(n_components=n_components, n_iter=num_iter,
326  covariance_type=covariance_type, min_covar=min_covar,
327  params=params, init_params=init_params)
328  if force_weight != -1.0:
329  gmm.weights_ = np.array([force_weight] * n_components)
330  if force_radii != -1.0:
331  gmm.covars_ = np.array([[force_radii] * 3
332  for i in range(n_components)])
333  if init_centers != []:
334  gmm.means_ = init_centers
335  print('fitting')
336  model = gmm.fit(points)
337  score = gmm.score(points)
338  akaikescore = model.aic(points)
339 
340  # convert format to core::Gaussian
341  if new_sklearn:
342  covars = gmm.covariances_
343  else:
344  covars = gmm.covars_
345  for ng in range(n_components):
346  covar = covars[ng]
347  if covar.size == 3:
348  covar = np.diag(covar).tolist()
349  else:
350  covar = covar.tolist()
351  center = list(gmm.means_[ng])
352  weight = mass_multiplier * gmm.weights_[ng]
353  if ng >= len(ps):
354  ps.append(IMP.Particle(mdl))
356  covar, IMP.algebra.Vector3D(center))
357  g = IMP.core.Gaussian.setup_particle(ps[ng], shape)
358  IMP.atom.Mass.setup_particle(ps[ng], weight)
359  IMP.core.XYZR.setup_particle(ps[ng], sqrt(max(g.get_variances())))
360 
361  return (score, akaikescore)
362 
363 
365  points, n_components, mdl, ps=[], num_iter=100,
366  covariance_type='full', mass_multiplier=1.0):
367  """fit a GMM to some points. Will return core::Gaussians.
368  if no particles are provided, they will be created
369 
370  points: list of coordinates (python)
371  n_components: number of gaussians to create
372  mdl: IMP Model
373  ps: list of particles to be decorated. if empty, will add
374  num_iter: number of EM iterations
375  covariance_type: covar type for the gaussians. options: 'full',
376  'diagonal', 'spherical'
377  init_centers: initial coordinates of the GMM
378  force_radii: fix the radii (spheres only)
379  force_weight: fix the weights
380  mass_multiplier: multiply the weights of all the gaussians by this value
381  """
382 
383  new_sklearn = True
384  try:
385  from sklearn.mixture import BayesianGaussianMixture
386  except ImportError:
387  from sklearn.mixture import DPGMM
388  new_sklearn = False
389 
390  # create and fit GMM
391  print('using dirichlet prior')
392  if new_sklearn:
393  gmm = BayesianGaussianMixture(
394  weight_concentration_prior_type='dirichlet_process',
395  n_components=n_components, max_iter=num_iter,
396  covariance_type=covariance_type)
397  else:
398  gmm = DPGMM(n_components=n_components, n_iter=num_iter,
399  covariance_type=covariance_type)
400 
401  gmm.fit(points)
402 
403  # convert format to core::Gaussian
404  if not new_sklearn:
405  gmm.precisions_ = gmm.precs_
406  for ng in range(n_components):
407  invcovar = gmm.precisions_[ng]
408  covar = np.linalg.inv(invcovar)
409  if covar.size == 3:
410  covar = np.diag(covar).tolist()
411  else:
412  covar = covar.tolist()
413  center = list(gmm.means_[ng])
414  weight = mass_multiplier * gmm.weights_[ng]
415  if ng >= len(ps):
416  ps.append(IMP.Particle(mdl))
418  covar, IMP.algebra.Vector3D(center))
419  g = IMP.core.Gaussian.setup_particle(ps[ng], shape)
420  IMP.atom.Mass.setup_particle(ps[ng], weight)
421  IMP.core.XYZR.setup_particle(ps[ng], sqrt(max(g.get_variances())))
Add mass to a particle.
Definition: Mass.h:23
def draw_points
given some points (and optional transform), write them to chimera 'bild' format; colors flag only app...
Definition: gmm_tools.py:152
static Gaussian setup_particle(Model *m, ParticleIndex pi)
Definition: core/Gaussian.h:65
static FloatKey get_mass_key()
DenseGrid3D< double > get_rasterized(const Gaussian3Ds &gmm, const Floats &weights, double cell_width, const BoundingBox3D &bb)
Rasterize the Gaussians to a grid.
BoundingBoxD< 3 > get_bounding_box(const Geometry &)
Compute the bounding box of any geometric object.
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
double get_mass(ResidueType c)
Get the mass from the residue type.
def fit_dirichlet_gmm_to_points
fit a GMM to some points.
Definition: gmm_tools.py:364
Gaussian3D get_gaussian_from_covariance(const Eigen::Matrix3d &covariance, const Vector3D &center)
Return a Gaussian centered at the origin from a covariance matrix.
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Mass.h:34
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: XYZR.h:47
Class for sampling a density map from particles.
def fit_gmm_to_points
fit a GMM to some points.
Definition: gmm_tools.py:244
The standard decorator for manipulating molecular structures.
DensityMap * create_density_map(const IMP::algebra::GridD< 3, S, V, E > &arg)
Create a density map from an arbitrary IMP::algebra::GridD.
Definition: DensityMap.h:674
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
static Mass setup_particle(Model *m, ParticleIndex pi, Float mass)
Definition: Mass.h:48
Basic utilities for handling cryo-electron microscopy 3D density maps.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
def write_gmm_to_map
write density map from GMM.
Definition: gmm_tools.py:119
algebra::BoundingBoxD< 3 > get_bounding_box(const DensityMap *m)
Definition: DensityMap.h:505
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Basic functionality that is expected to be used by a wide variety of IMP users.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Eigen::Matrix3d get_covariance(const Gaussian3D &g)
VectorD< 3 > Vector3D
Definition: VectorD.h:408
Class to handle individual particles of a Model object.
Definition: Particle.h:43
Sphere3D get_enclosing_sphere(const Vector3Ds &ss)
Return a sphere containing the listed vectors.
DenseGrid3D< double > get_rasterized_fast(const Gaussian3Ds &gmm, const Floats &weights, double cell_width, const BoundingBox3D &bb, double factor=2.5)
Rasterize the Gaussians to a grid.
A decorator for helping deal with a generalized hierarchy.
def write_sklearn_gmm_to_map
write density map directly from sklearn GMM (kinda slow)
Definition: gmm_tools.py:129
A decorator for a rigid body.
Definition: rigid_bodies.h:82
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: core/Gaussian.h:64
def write_gmm_to_text
write a list of gaussians to text.
Definition: gmm_tools.py:61
Functionality for loading, creating, manipulating and scoring atomic structures.
def decorate_gmm_from_text
read the output from write_gmm_to_text, decorate as Gaussian and Mass
Definition: gmm_tools.py:23
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27