IMP logo
IMP Reference Guide  2.14.0
The Integrative Modeling Platform
gmm_tools.py
1 """@namespace IMP.isd.gmm_tools
2  Tools for handling Gaussian Mixture Models.
3 """
4 
5 from __future__ import print_function
6 import IMP
7 import IMP.core
8 import IMP.algebra
9 import IMP.atom
10 import IMP.em
11 import numpy as np
12 import numpy.linalg
13 import sys,os
14 import itertools
15 
16 try:
17  import sklearn.mixture
18  nosklearn=False
19 except:
20  nosklearn=True
21 from math import exp,sqrt,copysign
22 
24  ps,
25  mdl,
26  transform=None,
27  radius_scale=1.0,
28  mass_scale=1.0):
29  """ read the output from write_gmm_to_text, decorate as Gaussian and Mass"""
30  ncomp=0
31  added_ps = itertools.count(1)
32  with open(in_fn,'r') as inf:
33  for l in inf:
34  if l[0]!='#':
35  if ncomp>len(ps)-1:
36  ps.append(IMP.Particle(mdl, "GMM%d" % next(added_ps)))
37  p = ps[ncomp]
38  fields=l.split('|')
39  weight=float(fields[2])
40  center=list(map(float,fields[3].split()))
41  covar=np.array(list(map(float,
42  fields[4].split()))).reshape((3,3))
43  #print('on particle',ncomp)
44  shape=IMP.algebra.get_gaussian_from_covariance(covar.tolist(),
45  IMP.algebra.Vector3D(center))
47  g = IMP.core.Gaussian.setup_particle(ps[ncomp],shape)
48  else:
49  g = IMP.core.Gaussian(ps[ncomp])
50  g.set_gaussian(shape)
52  IMP.atom.Mass.setup_particle(p,weight*mass_scale)
53  else:
54  IMP.atom.Mass(p).set_mass(weight*mass_scale)
55  rmax = sqrt(max(g.get_variances()))*radius_scale
56  if not IMP.core.XYZR.get_is_setup(ps[ncomp]):
57  IMP.core.XYZR.setup_particle(ps[ncomp],rmax)
58  else:
59  IMP.core.XYZR(ps[ncomp]).set_radius(rmax)
60  if not transform is None:
61  IMP.core.transform(IMP.core.RigidBody(ps[ncomp]),transform)
62  ncomp+=1
63 
64 def write_gmm_to_text(ps,out_fn, comments=[]):
65  """write a list of gaussians to text. must be decorated as Gaussian and Mass"""
66  print('will write GMM text to',out_fn)
67  with open(out_fn,'w') as outf:
68  for comment in comments:
69  outf.write('# ' + comment + '\n')
70  outf.write('#|num|weight|mean|covariance matrix|\n')
71  for ng,g in enumerate(ps):
72  shape=IMP.core.Gaussian(g).get_gaussian()
73  weight=IMP.atom.Mass(g).get_mass()
74  covar=[c for row in IMP.algebra.get_covariance(shape) for c in row]
75  mean=list(shape.get_center())
76  fm=[ng,weight]+mean+covar
77  try:
78  #python 2.7 format
79  outf.write('|{}|{}|{} {} {}|{} {} {} {} {} {} {} {} {}|\n'.format(*fm))
80  except ValueError:
81  #python 2.6 and below
82  outf.write('|{0}|{1}|{2} {3} {4}|{5} {6} {7} {8} {9} {10} {11} {12} {13}|\n'.format(*fm))
83 
84 def gmm2map(to_draw,voxel_size,bounding_box=None,origin=None, fast=False, factor=2.5):
85  if type(to_draw[0]) in (IMP.Particle,IMP.atom.Hierarchy,IMP.core.Hierarchy):
86  ps=to_draw
87  elif type(to_draw[0])==IMP.core.Gaussian:
88  ps=[g.get_particle() for g in to_draw]
89  else:
90  print('ps must be Particles or Gaussians')
91  return
92  if bounding_box is None:
93  if len(ps)>1:
94  s=IMP.algebra.get_enclosing_sphere([IMP.core.XYZ(p).get_coordinates() for p in ps])
95  s2=IMP.algebra.Sphere3D(s.get_center(),s.get_radius()*3)
96  else:
97  g=IMP.core.Gaussian(ps[0]).get_gaussian()
98  s2=IMP.algebra.Sphere3D(g.get_center(),max(g.get_variances())*3)
99  bounding_box=IMP.algebra.get_bounding_box(s2)
100  shapes=[]
101  weights=[]
102  for p in ps:
103  shapes.append(IMP.core.Gaussian(p).get_gaussian())
104  weights.append(IMP.atom.Mass(p).get_mass())
105  print('rasterizing')
106  if fast:
107  grid=IMP.algebra.get_rasterized_fast(shapes,weights,voxel_size,bounding_box,factor)
108  else:
109  grid=IMP.algebra.get_rasterized(shapes,weights,voxel_size,bounding_box)
110  print('creating map')
112  if origin is not None:
113  d1.set_origin(origin)
114  return d1
115 def write_gmm_to_map(to_draw,out_fn,voxel_size,bounding_box=None,origin=None, fast=False, factor=2.5):
116  """write density map from GMM. input can be either particles or gaussians"""
117  d1 = gmm2map(to_draw,voxel_size,bounding_box,origin, fast)
118  print('will write GMM map to',out_fn)
119  IMP.em.write_map(d1,out_fn,IMP.em.MRCReaderWriter())
120  del d1
121 
122 def write_sklearn_gmm_to_map(gmm,out_fn,apix=0,bbox=None,dmap_model=None):
123  """write density map directly from sklearn GMM (kinda slow) """
124  ### create density
125  if not dmap_model is None:
126  d1=IMP.em.create_density_map(dmap_model)
127  else:
128  d1=IMP.em.create_density_map(bbox,apix)
129 
130  ### fill it with values from the GMM
131  print('getting coords')
132  nvox=d1.get_number_of_voxels()
133  apos=[list(d1.get_location_by_voxel(nv)) for nv in range(nvox)]
134 
135  print('scoring')
136  scores=gmm.score(apos)
137 
138  print('assigning')
139  for nv, score in enumerate(scores):
140  d1.set_value(nv,exp(score))
141  print('will write GMM map to',out_fn)
142  IMP.em.write_map(d1,out_fn,IMP.em.MRCReaderWriter())
143 
144 def draw_points(pts,out_fn,trans=IMP.algebra.get_identity_transformation_3d(),
145  use_colors=False):
146  """ given some points (and optional transform), write them to chimera 'bild' format
147  colors flag only applies to ellipses, otherwise it'll be weird"""
148  with open(out_fn,'w') as outf:
149  #print 'will draw',len(pts),'points'
150  # write first point in red
151  pt=trans.get_transformed(IMP.algebra.Vector3D(pts[0]))
152  start=0
153  if use_colors:
154  outf.write('.color 1 0 0\n.dotat %.2f %.2f %.2f\n'
155  %(pt[0],pt[1],pt[2]))
156  start=1
157 
158  # write remaining points in green
159  if use_colors:
160  outf.write('.color 0 1 0\n')
161  colors=['0 1 0','0 0 1','0 1 1']
162  for nt,t in enumerate(pts[start:]):
163  if use_colors and nt%2==0:
164  outf.write('.color %s\n' % colors[nt/2])
165  pt=trans.get_transformed(IMP.algebra.Vector3D(t))
166  outf.write('.dotat %.2f %.2f %.2f\n' %(pt[0],pt[1],pt[2]))
167 
168 
169 
170 def create_gmm_for_bead(mdl,
171  particle,
172  n_components,
173  sampled_points=100000,
174  num_iter=100):
175  print('fitting bead with',n_components,'gaussians')
176  dmap=IMP.em.SampledDensityMap([particle],1.0,1.0,
177  IMP.atom.Mass.get_mass_key(),3,IMP.em.SPHERE)
178  IMP.em.write_map(dmap,'test_intermed.mrc')
179  pts=IMP.isd.sample_points_from_density(dmap,sampled_points)
180  draw_points(pts,'pts.bild')
181  density_particles=[]
182  fit_gmm_to_points(pts,n_components,mdl,
183  density_particles,
184  num_iter,'full',
185  mass_multiplier=IMP.atom.Mass(particle).get_mass())
186  return density_particles,dmap
187 
188 def sample_and_fit_to_particles(model,
189  fragment_particles,
190  num_components,
191  sampled_points=1000000,
192  simulation_res=0.5,
193  voxel_size=1.0,
194  num_iter=100,
195  covariance_type='full',
196  multiply_by_total_mass=True,
197  output_map=None,
198  output_txt=None):
199  density_particles=[]
200  if multiply_by_total_mass:
201  mass_multiplier=sum((IMP.atom.Mass(p).get_mass() for p in set(fragment_particles)))
202  print('add_component_density: will multiply by mass',mass_multiplier)
203 
204  # simulate density from ps, then calculate points to fit
205  print('add_component_density: sampling points')
206  dmap=IMP.em.SampledDensityMap(fragment_particles,simulation_res,voxel_size,
208  dmap.calcRMS()
209  #if not intermediate_map_fn is None:
210  # IMP.em.write_map(dmap,intermediate_map_fn)
211  pts=IMP.isd.sample_points_from_density(dmap,sampled_points)
212 
213  # fit GMM
214  print('add_component_density: fitting GMM to',len(pts),'points')
215  fit_gmm_to_points(points=pts,
216  n_components=num_components,
217  mdl=model,
218  ps=density_particles,
219  num_iter=num_iter,
220  covariance_type=covariance_type,
221  mass_multiplier=mass_multiplier)
222 
223  if not output_txt is None:
224  write_gmm_to_text(density_particles,output_txt)
225  if not output_map is None:
226  write_gmm_to_map(to_draw=density_particles,
227  out_fn=output_map,
228  voxel_size=voxel_size,
229  bounding_box=IMP.em.get_bounding_box(dmap))
230 
231  return density_particles
232 
233 def fit_gmm_to_points(points,
234  n_components,
235  mdl,
236  ps=[],
237  num_iter=100,
238  covariance_type='full',
239  min_covar=0.001,
240  init_centers=[],
241  force_radii=-1.0,
242  force_weight=-1.0,
243  mass_multiplier=1.0):
244  """fit a GMM to some points. Will return the score and the Akaike score.
245  Akaike information criterion for the current model fit. It is a measure
246  of the relative quality of the GMM that takes into account the
247  parsimony and the goodness of the fit.
248  if no particles are provided, they will be created
249 
250  points: list of coordinates (python)
251  n_components: number of gaussians to create
252  mdl: IMP Model
253  ps: list of particles to be decorated. if empty, will add
254  num_iter: number of EM iterations
255  covariance_type: covar type for the gaussians. options: 'full', 'diagonal', 'spherical'
256  min_covar: assign a minimum value to covariance term. That is used to have more spherical
257  shaped gaussians
258  init_centers: initial coordinates of the GMM
259  force_radii: fix the radii (spheres only)
260  force_weight: fix the weights
261  mass_multiplier: multiply the weights of all the gaussians by this value
262  dirichlet: use the DGMM fitting (can reduce number of components, takes longer)
263  """
264 
265 
266  new_sklearn = False
267  try:
268  from sklearn.mixture import GMM
269  except ImportError:
270  from sklearn.mixture import GaussianMixture
271  new_sklearn = True
272 
273  print('creating GMM with n_components',n_components,'n_iter',num_iter,'covar type',covariance_type)
274  if new_sklearn:
275  # aic() calls size() on points, so it needs to be
276  # a numpy array, not a list
277  points = np.array(points)
278  weights_init = precisions_init = None
279  if force_radii != -1.0:
280  print('warning: radii can no longer be forced, but setting '
281  'initial values to ', force_radii)
282  precisions_init = np.array([[1./force_radii]*3
283  for i in range(n_components)])
284  if force_weight != -1.0:
285  print('warning: weights can no longer be forced, but setting '
286  'initial values to ', force_weight)
287  weights_init = np.array([force_weight]*n_components)
288 
289  gmm = GaussianMixture(n_components=n_components,
290  max_iter=num_iter,
291  covariance_type=covariance_type,
292  weights_init=weights_init,
293  precisions_init=precisions_init,
294  means_init=None if init_centers==[]
295  else init_centers)
296  else:
297  params='m'
298  init_params='m'
299  if force_radii==-1.0:
300  params+='c'
301  init_params+='c'
302  else:
303  covariance_type='spherical'
304  print('forcing spherical with radii',force_radii)
305 
306  if force_weight==-1.0:
307  params+='w'
308  init_params+='w'
309  else:
310  print('forcing weights to be',force_weight)
311 
312  gmm = GMM(n_components=n_components, n_iter=num_iter,
313  covariance_type=covariance_type, min_covar=min_covar,
314  params=params, init_params=init_params)
315  if force_weight!=-1.0:
316  gmm.weights_=np.array([force_weight]*n_components)
317  if force_radii!=-1.0:
318  gmm.covars_=np.array([[force_radii]*3 for i in range(n_components)])
319  if init_centers!=[]:
320  gmm.means_=init_centers
321  print('fitting')
322  model=gmm.fit(points)
323  score=gmm.score(points)
324  akaikescore=model.aic(points)
325  #print('>>> GMM score',gmm.score(points))
326 
327  ### convert format to core::Gaussian
328  if new_sklearn:
329  covars = gmm.covariances_
330  else:
331  covars = gmm.covars_
332  for ng in range(n_components):
333  covar=covars[ng]
334  if covar.size==3:
335  covar=np.diag(covar).tolist()
336  else:
337  covar=covar.tolist()
338  center=list(gmm.means_[ng])
339  weight=mass_multiplier*gmm.weights_[ng]
340  if ng>=len(ps):
341  ps.append(IMP.Particle(mdl))
343  g=IMP.core.Gaussian.setup_particle(ps[ng],shape)
344  IMP.atom.Mass.setup_particle(ps[ng],weight)
345  IMP.core.XYZR.setup_particle(ps[ng],sqrt(max(g.get_variances())))
346 
347  return (score,akaikescore)
348 
350  n_components,
351  mdl,
352  ps=[],
353  num_iter=100,
354  covariance_type='full',
355  mass_multiplier=1.0):
356  """fit a GMM to some points. Will return core::Gaussians.
357  if no particles are provided, they will be created
358 
359  points: list of coordinates (python)
360  n_components: number of gaussians to create
361  mdl: IMP Model
362  ps: list of particles to be decorated. if empty, will add
363  num_iter: number of EM iterations
364  covariance_type: covar type for the gaussians. options: 'full', 'diagonal', 'spherical'
365  init_centers: initial coordinates of the GMM
366  force_radii: fix the radii (spheres only)
367  force_weight: fix the weights
368  mass_multiplier: multiply the weights of all the gaussians by this value
369  """
370 
371 
372  new_sklearn = True
373  try:
374  from sklearn.mixture import BayesianGaussianMixture
375  except ImportError:
376  from sklearn.mixture import DPGMM
377  new_sklearn = False
378 
379  ### create and fit GMM
380  print('using dirichlet prior')
381  if new_sklearn:
382  gmm = BayesianGaussianMixture(
383  weight_concentration_prior_type='dirichlet_process',
384  n_components=n_components, max_iter=num_iter,
385  covariance_type=covariance_type)
386  else:
387  gmm = DPGMM(n_components=n_components, n_iter=num_iter,
388  covariance_type=covariance_type)
389 
390  gmm.fit(points)
391 
392  #print('>>> GMM score',gmm.score(points))
393 
394  #print gmm.covars_
395  #print gmm.weights_
396  #print gmm.means_
397  ### convert format to core::Gaussian
398  if not new_sklearn:
399  gmm.precisions_ = gmm.precs_
400  for ng in range(n_components):
401  invcovar=gmm.precisions_[ng]
402  covar=np.linalg.inv(invcovar)
403  if covar.size==3:
404  covar=np.diag(covar).tolist()
405  else:
406  covar=covar.tolist()
407  center=list(gmm.means_[ng])
408  weight=mass_multiplier*gmm.weights_[ng]
409  if ng>=len(ps):
410  ps.append(IMP.Particle(mdl))
412  g=IMP.core.Gaussian.setup_particle(ps[ng],shape)
413  IMP.atom.Mass.setup_particle(ps[ng],weight)
414  IMP.core.XYZR.setup_particle(ps[ng],sqrt(max(g.get_variances())))
Add mass to a particle.
Definition: Mass.h:23
def draw_points
given some points (and optional transform), write them to chimera 'bild' format colors flag only appl...
Definition: gmm_tools.py:144
static Gaussian setup_particle(Model *m, ParticleIndex pi)
Definition: core/Gaussian.h:48
static FloatKey get_mass_key()
DenseGrid3D< double > get_rasterized(const Gaussian3Ds &gmm, const Floats &weights, double cell_width, const BoundingBox3D &bb)
Rasterize the Gaussians to a grid.
BoundingBoxD< 3 > get_bounding_box(const Geometry &)
Compute the bounding box of any geometric object.
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
double get_mass(ResidueType c)
Get the mass from the residue type.
def fit_dirichlet_gmm_to_points
fit a GMM to some points.
Definition: gmm_tools.py:349
Gaussian3D get_gaussian_from_covariance(const Eigen::Matrix3d &covariance, const Vector3D &center)
Return a Gaussian centered at the origin from a covariance matrix.
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Mass.h:34
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: XYZR.h:47
Class for sampling a density map from particles.
def fit_gmm_to_points
fit a GMM to some points.
Definition: gmm_tools.py:233
The standard decorator for manipulating molecular structures.
DensityMap * create_density_map(const IMP::algebra::GridD< 3, S, V, E > &arg)
Create a density map from an arbitrary IMP::algebra::GridD.
Definition: DensityMap.h:633
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
static Mass setup_particle(Model *m, ParticleIndex pi, Float mass)
Definition: Mass.h:48
Basic utilities for handling cryo-electron microscopy 3D density maps.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
def write_gmm_to_map
write density map from GMM.
Definition: gmm_tools.py:115
algebra::BoundingBoxD< 3 > get_bounding_box(const DensityMap *m)
Definition: DensityMap.h:464
Basic functionality that is expected to be used by a wide variety of IMP users.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Eigen::Matrix3d get_covariance(const Gaussian3D &g)
VectorD< 3 > Vector3D
Definition: VectorD.h:421
Class to handle individual particles of a Model object.
Definition: Particle.h:41
Sphere3D get_enclosing_sphere(const Vector3Ds &ss)
Return a sphere containing the listed vectors.
DenseGrid3D< double > get_rasterized_fast(const Gaussian3Ds &gmm, const Floats &weights, double cell_width, const BoundingBox3D &bb, double factor=2.5)
Rasterize the Gaussians to a grid.
A decorator for helping deal with a generalized hierarchy.
def write_sklearn_gmm_to_map
write density map directly from sklearn GMM (kinda slow)
Definition: gmm_tools.py:122
A decorator for a rigid body.
Definition: rigid_bodies.h:82
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: core/Gaussian.h:47
def write_gmm_to_text
write a list of gaussians to text.
Definition: gmm_tools.py:64
Functionality for loading, creating, manipulating and scoring atomic structures.
def decorate_gmm_from_text
read the output from write_gmm_to_text, decorate as Gaussian and Mass
Definition: gmm_tools.py:23
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27