IMP logo
IMP Reference Guide  2.20.0
The Integrative Modeling Platform
gmm_tools.py
1 """@namespace IMP.isd.gmm_tools
2  Tools for handling Gaussian Mixture Models.
3 """
4 
5 from __future__ import print_function
6 import IMP
7 import IMP.core
8 import IMP.algebra
9 import IMP.atom
10 import IMP.em
11 import numpy as np
12 import numpy.linalg
13 import itertools
14 
15 try:
16  import sklearn.mixture # noqa: F401
17  nosklearn = False
18 except ImportError:
19  nosklearn = True
20 from math import exp, sqrt
21 
22 
23 def decorate_gmm_from_text(in_fn, ps, mdl, transform=None, radius_scale=1.0,
24  mass_scale=1.0):
25  """read the output from write_gmm_to_text, decorate as Gaussian and Mass"""
26  ncomp = 0
27  added_ps = itertools.count(1)
28  with open(in_fn, 'r') as inf:
29  for line in inf:
30  if line[0] != '#':
31  if ncomp > len(ps)-1:
32  ps.append(IMP.Particle(mdl, "GMM%d" % next(added_ps)))
33  p = ps[ncomp]
34  fields = line.split('|')
35  weight = float(fields[2])
36  center = list(map(float, fields[3].split()))
37  covar = np.array(
38  list(map(float, fields[4].split()))).reshape((3, 3))
40  covar.tolist(), IMP.algebra.Vector3D(center))
42  g = IMP.core.Gaussian.setup_particle(ps[ncomp], shape)
43  else:
44  g = IMP.core.Gaussian(ps[ncomp])
45  g.set_gaussian(shape)
47  IMP.atom.Mass.setup_particle(p, weight*mass_scale)
48  else:
49  IMP.atom.Mass(p).set_mass(weight*mass_scale)
50  rmax = sqrt(max(g.get_variances()))*radius_scale
51  if not IMP.core.XYZR.get_is_setup(ps[ncomp]):
52  IMP.core.XYZR.setup_particle(ps[ncomp], rmax)
53  else:
54  IMP.core.XYZR(ps[ncomp]).set_radius(rmax)
55  if transform is not None:
57  transform)
58  ncomp += 1
59 
60 
61 def write_gmm_to_text(ps, out_fn, comments=[]):
62  """write a list of gaussians to text. must be decorated as Gaussian
63  and Mass"""
64  print('will write GMM text to', out_fn)
65  with open(out_fn, 'w') as outf:
66  for comment in comments:
67  outf.write('# ' + comment + '\n')
68  outf.write('#|num|weight|mean|covariance matrix|\n')
69  for ng, g in enumerate(ps):
70  shape = IMP.core.Gaussian(g).get_gaussian()
71  weight = IMP.atom.Mass(g).get_mass()
72  covar = [c for row in IMP.algebra.get_covariance(shape)
73  for c in row]
74  mean = list(shape.get_center())
75  fm = [ng, weight] + mean + covar
76  outf.write('|{}|{}|{} {} {}|'
77  '{} {} {} {} {} {} {} {} {}|\n'.format(*fm))
78 
79 
80 def gmm2map(to_draw, voxel_size, bounding_box=None, origin=None, fast=False,
81  factor=2.5):
82  if type(to_draw[0]) in (IMP.Particle, IMP.atom.Hierarchy,
84  ps = to_draw
85  elif type(to_draw[0]) == IMP.core.Gaussian:
86  ps = [g.get_particle() for g in to_draw]
87  else:
88  print('ps must be Particles or Gaussians')
89  return
90  if bounding_box is None:
91  if len(ps) > 1:
93  [IMP.core.XYZ(p).get_coordinates() for p in ps])
94  s2 = IMP.algebra.Sphere3D(s.get_center(), s.get_radius() * 3)
95  else:
96  g = IMP.core.Gaussian(ps[0]).get_gaussian()
97  s2 = IMP.algebra.Sphere3D(g.get_center(), max(g.get_variances())*3)
98  bounding_box = IMP.algebra.get_bounding_box(s2)
99  shapes = []
100  weights = []
101  for p in ps:
102  shapes.append(IMP.core.Gaussian(p).get_gaussian())
103  weights.append(IMP.atom.Mass(p).get_mass())
104  print('rasterizing')
105  if fast:
106  grid = IMP.algebra.get_rasterized_fast(shapes, weights, voxel_size,
107  bounding_box, factor)
108  else:
109  grid = IMP.algebra.get_rasterized(shapes, weights, voxel_size,
110  bounding_box)
111  print('creating map')
112  d1 = IMP.em.create_density_map(grid)
113  if origin is not None:
114  d1.set_origin(origin)
115  return d1
116 
117 
118 def write_gmm_to_map(to_draw, out_fn, voxel_size, bounding_box=None,
119  origin=None, fast=False, factor=2.5):
120  """write density map from GMM. input can be either particles
121  or gaussians"""
122  d1 = gmm2map(to_draw, voxel_size, bounding_box, origin, fast)
123  print('will write GMM map to', out_fn)
124  IMP.em.write_map(d1, out_fn, IMP.em.MRCReaderWriter())
125  del d1
126 
127 
128 def write_sklearn_gmm_to_map(gmm, out_fn, apix=0, bbox=None, dmap_model=None):
129  """write density map directly from sklearn GMM (kinda slow) """
130  # create density
131  if dmap_model is not None:
132  d1 = IMP.em.create_density_map(dmap_model)
133  else:
134  d1 = IMP.em.create_density_map(bbox, apix)
135 
136  # fill it with values from the GMM
137  print('getting coords')
138  nvox = d1.get_number_of_voxels()
139  apos = [list(d1.get_location_by_voxel(nv)) for nv in range(nvox)]
140 
141  print('scoring')
142  scores = gmm.score(apos)
143 
144  print('assigning')
145  for nv, score in enumerate(scores):
146  d1.set_value(nv, exp(score))
147  print('will write GMM map to', out_fn)
148  IMP.em.write_map(d1, out_fn, IMP.em.MRCReaderWriter())
149 
150 
151 def draw_points(pts, out_fn,
153  use_colors=False):
154  """ given some points (and optional transform), write them to chimera
155  'bild' format; colors flag only applies to ellipses, otherwise it'll
156  be weird"""
157  with open(out_fn, 'w') as outf:
158  # print 'will draw',len(pts),'points'
159  # write first point in red
160  pt = trans.get_transformed(IMP.algebra.Vector3D(pts[0]))
161  start = 0
162  if use_colors:
163  outf.write('.color 1 0 0\n.dotat %.2f %.2f %.2f\n'
164  % (pt[0], pt[1], pt[2]))
165  start = 1
166 
167  # write remaining points in green
168  if use_colors:
169  outf.write('.color 0 1 0\n')
170  colors = ['0 1 0', '0 0 1', '0 1 1']
171  for nt, t in enumerate(pts[start:]):
172  if use_colors and nt % 2 == 0:
173  outf.write('.color %s\n' % colors[nt/2])
174  pt = trans.get_transformed(IMP.algebra.Vector3D(t))
175  outf.write('.dotat %.2f %.2f %.2f\n' % (pt[0], pt[1], pt[2]))
176 
177 
178 def create_gmm_for_bead(mdl,
179  particle,
180  n_components,
181  sampled_points=100000,
182  num_iter=100):
183  print('fitting bead with', n_components, 'gaussians')
184  dmap = IMP.em.SampledDensityMap([particle], 1.0, 1.0,
186  IMP.em.SPHERE)
187  IMP.em.write_map(dmap, 'test_intermed.mrc')
188  pts = IMP.isd.sample_points_from_density(dmap, sampled_points)
189  draw_points(pts, 'pts.bild')
190  density_particles = []
191  fit_gmm_to_points(pts, n_components, mdl, density_particles, num_iter,
192  'full',
193  mass_multiplier=IMP.atom.Mass(particle).get_mass())
194  return density_particles, dmap
195 
196 
197 def sample_and_fit_to_particles(model,
198  fragment_particles,
199  num_components,
200  sampled_points=1000000,
201  simulation_res=0.5,
202  voxel_size=1.0,
203  num_iter=100,
204  covariance_type='full',
205  multiply_by_total_mass=True,
206  output_map=None,
207  output_txt=None):
208  density_particles = []
209  if multiply_by_total_mass:
210  mass_multiplier = sum((IMP.atom.Mass(p).get_mass()
211  for p in set(fragment_particles)))
212  print('add_component_density: will multiply by mass', mass_multiplier)
213 
214  # simulate density from ps, then calculate points to fit
215  print('add_component_density: sampling points')
217  fragment_particles, simulation_res, voxel_size,
219  dmap.calcRMS()
220  pts = IMP.isd.sample_points_from_density(dmap, sampled_points)
221 
222  # fit GMM
223  print('add_component_density: fitting GMM to', len(pts), 'points')
224  fit_gmm_to_points(points=pts,
225  n_components=num_components,
226  mdl=model,
227  ps=density_particles,
228  num_iter=num_iter,
229  covariance_type=covariance_type,
230  mass_multiplier=mass_multiplier)
231 
232  if output_txt is not None:
233  write_gmm_to_text(density_particles, output_txt)
234  if output_map is not None:
235  write_gmm_to_map(to_draw=density_particles,
236  out_fn=output_map,
237  voxel_size=voxel_size,
238  bounding_box=IMP.em.get_bounding_box(dmap))
239 
240  return density_particles
241 
242 
243 def fit_gmm_to_points(points,
244  n_components,
245  mdl,
246  ps=[],
247  num_iter=100,
248  covariance_type='full',
249  min_covar=0.001,
250  init_centers=[],
251  force_radii=-1.0,
252  force_weight=-1.0,
253  mass_multiplier=1.0):
254  """fit a GMM to some points. Will return the score and the Akaike score.
255  Akaike information criterion for the current model fit. It is a measure
256  of the relative quality of the GMM that takes into account the
257  parsimony and the goodness of the fit.
258  if no particles are provided, they will be created
259 
260  points: list of coordinates (python)
261  n_components: number of gaussians to create
262  mdl: IMP Model
263  ps: list of particles to be decorated. if empty, will add
264  num_iter: number of EM iterations
265  covariance_type: covar type for the gaussians. options: 'full',
266  'diagonal', 'spherical'
267  min_covar: assign a minimum value to covariance term. That is used
268  to have more spherical shaped gaussians
269  init_centers: initial coordinates of the GMM
270  force_radii: fix the radii (spheres only)
271  force_weight: fix the weights
272  mass_multiplier: multiply the weights of all the gaussians by this value
273  dirichlet: use the DGMM fitting (can reduce number of components,
274  takes longer)
275  """
276 
277  new_sklearn = False
278  try:
279  from sklearn.mixture import GMM
280  except ImportError:
281  from sklearn.mixture import GaussianMixture
282  new_sklearn = True
283 
284  print('creating GMM with n_components', n_components, 'n_iter', num_iter,
285  'covar type', covariance_type)
286  if new_sklearn:
287  # aic() calls size() on points, so it needs to be
288  # a numpy array, not a list
289  points = np.array(points)
290  weights_init = precisions_init = None
291  if force_radii != -1.0:
292  print('warning: radii can no longer be forced, but setting '
293  'initial values to ', force_radii)
294  precisions_init = np.array([[1./force_radii]*3
295  for i in range(n_components)])
296  if force_weight != -1.0:
297  print('warning: weights can no longer be forced, but setting '
298  'initial values to ', force_weight)
299  weights_init = np.array([force_weight]*n_components)
300 
301  gmm = GaussianMixture(n_components=n_components,
302  max_iter=num_iter,
303  covariance_type=covariance_type,
304  weights_init=weights_init,
305  precisions_init=precisions_init,
306  means_init=None if init_centers == []
307  else init_centers)
308  else:
309  params = 'm'
310  init_params = 'm'
311  if force_radii == -1.0:
312  params += 'c'
313  init_params += 'c'
314  else:
315  covariance_type = 'spherical'
316  print('forcing spherical with radii', force_radii)
317 
318  if force_weight == -1.0:
319  params += 'w'
320  init_params += 'w'
321  else:
322  print('forcing weights to be', force_weight)
323 
324  gmm = GMM(n_components=n_components, n_iter=num_iter,
325  covariance_type=covariance_type, min_covar=min_covar,
326  params=params, init_params=init_params)
327  if force_weight != -1.0:
328  gmm.weights_ = np.array([force_weight] * n_components)
329  if force_radii != -1.0:
330  gmm.covars_ = np.array([[force_radii] * 3
331  for i in range(n_components)])
332  if init_centers != []:
333  gmm.means_ = init_centers
334  print('fitting')
335  model = gmm.fit(points)
336  score = gmm.score(points)
337  akaikescore = model.aic(points)
338 
339  # convert format to core::Gaussian
340  if new_sklearn:
341  covars = gmm.covariances_
342  else:
343  covars = gmm.covars_
344  for ng in range(n_components):
345  covar = covars[ng]
346  if covar.size == 3:
347  covar = np.diag(covar).tolist()
348  else:
349  covar = covar.tolist()
350  center = list(gmm.means_[ng])
351  weight = mass_multiplier*gmm.weights_[ng]
352  if ng >= len(ps):
353  ps.append(IMP.Particle(mdl))
355  covar, IMP.algebra.Vector3D(center))
356  g = IMP.core.Gaussian.setup_particle(ps[ng], shape)
357  IMP.atom.Mass.setup_particle(ps[ng], weight)
358  IMP.core.XYZR.setup_particle(ps[ng], sqrt(max(g.get_variances())))
359 
360  return (score, akaikescore)
361 
362 
364  points, n_components, mdl, ps=[], num_iter=100,
365  covariance_type='full', mass_multiplier=1.0):
366  """fit a GMM to some points. Will return core::Gaussians.
367  if no particles are provided, they will be created
368 
369  points: list of coordinates (python)
370  n_components: number of gaussians to create
371  mdl: IMP Model
372  ps: list of particles to be decorated. if empty, will add
373  num_iter: number of EM iterations
374  covariance_type: covar type for the gaussians. options: 'full',
375  'diagonal', 'spherical'
376  init_centers: initial coordinates of the GMM
377  force_radii: fix the radii (spheres only)
378  force_weight: fix the weights
379  mass_multiplier: multiply the weights of all the gaussians by this value
380  """
381 
382  new_sklearn = True
383  try:
384  from sklearn.mixture import BayesianGaussianMixture
385  except ImportError:
386  from sklearn.mixture import DPGMM
387  new_sklearn = False
388 
389  # create and fit GMM
390  print('using dirichlet prior')
391  if new_sklearn:
392  gmm = BayesianGaussianMixture(
393  weight_concentration_prior_type='dirichlet_process',
394  n_components=n_components, max_iter=num_iter,
395  covariance_type=covariance_type)
396  else:
397  gmm = DPGMM(n_components=n_components, n_iter=num_iter,
398  covariance_type=covariance_type)
399 
400  gmm.fit(points)
401 
402  # convert format to core::Gaussian
403  if not new_sklearn:
404  gmm.precisions_ = gmm.precs_
405  for ng in range(n_components):
406  invcovar = gmm.precisions_[ng]
407  covar = np.linalg.inv(invcovar)
408  if covar.size == 3:
409  covar = np.diag(covar).tolist()
410  else:
411  covar = covar.tolist()
412  center = list(gmm.means_[ng])
413  weight = mass_multiplier*gmm.weights_[ng]
414  if ng >= len(ps):
415  ps.append(IMP.Particle(mdl))
417  covar, IMP.algebra.Vector3D(center))
418  g = IMP.core.Gaussian.setup_particle(ps[ng], shape)
419  IMP.atom.Mass.setup_particle(ps[ng], weight)
420  IMP.core.XYZR.setup_particle(ps[ng], sqrt(max(g.get_variances())))
Add mass to a particle.
Definition: Mass.h:23
def draw_points
given some points (and optional transform), write them to chimera 'bild' format; colors flag only app...
Definition: gmm_tools.py:151
static Gaussian setup_particle(Model *m, ParticleIndex pi)
Definition: core/Gaussian.h:65
static FloatKey get_mass_key()
DenseGrid3D< double > get_rasterized(const Gaussian3Ds &gmm, const Floats &weights, double cell_width, const BoundingBox3D &bb)
Rasterize the Gaussians to a grid.
BoundingBoxD< 3 > get_bounding_box(const Geometry &)
Compute the bounding box of any geometric object.
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
double get_mass(ResidueType c)
Get the mass from the residue type.
def fit_dirichlet_gmm_to_points
fit a GMM to some points.
Definition: gmm_tools.py:363
Gaussian3D get_gaussian_from_covariance(const Eigen::Matrix3d &covariance, const Vector3D &center)
Return a Gaussian centered at the origin from a covariance matrix.
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Mass.h:34
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: XYZR.h:47
Class for sampling a density map from particles.
def fit_gmm_to_points
fit a GMM to some points.
Definition: gmm_tools.py:243
The standard decorator for manipulating molecular structures.
DensityMap * create_density_map(const IMP::algebra::GridD< 3, S, V, E > &arg)
Create a density map from an arbitrary IMP::algebra::GridD.
Definition: DensityMap.h:674
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
static Mass setup_particle(Model *m, ParticleIndex pi, Float mass)
Definition: Mass.h:48
Basic utilities for handling cryo-electron microscopy 3D density maps.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
def write_gmm_to_map
write density map from GMM.
Definition: gmm_tools.py:118
algebra::BoundingBoxD< 3 > get_bounding_box(const DensityMap *m)
Definition: DensityMap.h:505
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Basic functionality that is expected to be used by a wide variety of IMP users.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Eigen::Matrix3d get_covariance(const Gaussian3D &g)
VectorD< 3 > Vector3D
Definition: VectorD.h:425
Class to handle individual particles of a Model object.
Definition: Particle.h:43
Sphere3D get_enclosing_sphere(const Vector3Ds &ss)
Return a sphere containing the listed vectors.
DenseGrid3D< double > get_rasterized_fast(const Gaussian3Ds &gmm, const Floats &weights, double cell_width, const BoundingBox3D &bb, double factor=2.5)
Rasterize the Gaussians to a grid.
A decorator for helping deal with a generalized hierarchy.
def write_sklearn_gmm_to_map
write density map directly from sklearn GMM (kinda slow)
Definition: gmm_tools.py:128
A decorator for a rigid body.
Definition: rigid_bodies.h:82
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: core/Gaussian.h:64
def write_gmm_to_text
write a list of gaussians to text.
Definition: gmm_tools.py:61
Functionality for loading, creating, manipulating and scoring atomic structures.
def decorate_gmm_from_text
read the output from write_gmm_to_text, decorate as Gaussian and Mass
Definition: gmm_tools.py:23
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27