IMP logo
IMP Reference Guide  2.19.0
The Integrative Modeling Platform
gmm_tools.py
1 """@namespace IMP.isd.gmm_tools
2  Tools for handling Gaussian Mixture Models.
3 """
4 
5 from __future__ import print_function
6 import IMP
7 import IMP.core
8 import IMP.algebra
9 import IMP.atom
10 import IMP.em
11 import numpy as np
12 import numpy.linalg
13 import sys,os
14 import itertools
15 
16 try:
17  import sklearn.mixture
18  nosklearn=False
19 except:
20  nosklearn=True
21 from math import exp,sqrt,copysign
22 
24  ps,
25  mdl,
26  transform=None,
27  radius_scale=1.0,
28  mass_scale=1.0):
29  """ read the output from write_gmm_to_text, decorate as Gaussian and Mass"""
30  ncomp=0
31  added_ps = itertools.count(1)
32  with open(in_fn,'r') as inf:
33  for l in inf:
34  if l[0]!='#':
35  if ncomp>len(ps)-1:
36  ps.append(IMP.Particle(mdl, "GMM%d" % next(added_ps)))
37  p = ps[ncomp]
38  fields=l.split('|')
39  weight=float(fields[2])
40  center=list(map(float,fields[3].split()))
41  covar=np.array(list(map(float,
42  fields[4].split()))).reshape((3,3))
43  #print('on particle',ncomp)
44  shape=IMP.algebra.get_gaussian_from_covariance(covar.tolist(),
45  IMP.algebra.Vector3D(center))
47  g = IMP.core.Gaussian.setup_particle(ps[ncomp],shape)
48  else:
49  g = IMP.core.Gaussian(ps[ncomp])
50  g.set_gaussian(shape)
52  IMP.atom.Mass.setup_particle(p,weight*mass_scale)
53  else:
54  IMP.atom.Mass(p).set_mass(weight*mass_scale)
55  rmax = sqrt(max(g.get_variances()))*radius_scale
56  if not IMP.core.XYZR.get_is_setup(ps[ncomp]):
57  IMP.core.XYZR.setup_particle(ps[ncomp],rmax)
58  else:
59  IMP.core.XYZR(ps[ncomp]).set_radius(rmax)
60  if not transform is None:
61  IMP.core.transform(IMP.core.RigidBody(ps[ncomp]),transform)
62  ncomp+=1
63 
64 def write_gmm_to_text(ps,out_fn, comments=[]):
65  """write a list of gaussians to text. must be decorated as Gaussian and Mass"""
66  print('will write GMM text to',out_fn)
67  with open(out_fn,'w') as outf:
68  for comment in comments:
69  outf.write('# ' + comment + '\n')
70  outf.write('#|num|weight|mean|covariance matrix|\n')
71  for ng,g in enumerate(ps):
72  shape=IMP.core.Gaussian(g).get_gaussian()
73  weight=IMP.atom.Mass(g).get_mass()
74  covar=[c for row in IMP.algebra.get_covariance(shape) for c in row]
75  mean=list(shape.get_center())
76  fm=[ng,weight]+mean+covar
77  outf.write('|{}|{}|{} {} {}|{} {} {} {} {} {} {} {} {}|\n'.format(*fm))
78 
79 def gmm2map(to_draw,voxel_size,bounding_box=None,origin=None, fast=False, factor=2.5):
80  if type(to_draw[0]) in (IMP.Particle,IMP.atom.Hierarchy,IMP.core.Hierarchy):
81  ps=to_draw
82  elif type(to_draw[0])==IMP.core.Gaussian:
83  ps=[g.get_particle() for g in to_draw]
84  else:
85  print('ps must be Particles or Gaussians')
86  return
87  if bounding_box is None:
88  if len(ps)>1:
89  s=IMP.algebra.get_enclosing_sphere([IMP.core.XYZ(p).get_coordinates() for p in ps])
90  s2=IMP.algebra.Sphere3D(s.get_center(),s.get_radius()*3)
91  else:
92  g=IMP.core.Gaussian(ps[0]).get_gaussian()
93  s2=IMP.algebra.Sphere3D(g.get_center(),max(g.get_variances())*3)
94  bounding_box=IMP.algebra.get_bounding_box(s2)
95  shapes=[]
96  weights=[]
97  for p in ps:
98  shapes.append(IMP.core.Gaussian(p).get_gaussian())
99  weights.append(IMP.atom.Mass(p).get_mass())
100  print('rasterizing')
101  if fast:
102  grid=IMP.algebra.get_rasterized_fast(shapes,weights,voxel_size,bounding_box,factor)
103  else:
104  grid=IMP.algebra.get_rasterized(shapes,weights,voxel_size,bounding_box)
105  print('creating map')
107  if origin is not None:
108  d1.set_origin(origin)
109  return d1
110 def write_gmm_to_map(to_draw,out_fn,voxel_size,bounding_box=None,origin=None, fast=False, factor=2.5):
111  """write density map from GMM. input can be either particles or gaussians"""
112  d1 = gmm2map(to_draw,voxel_size,bounding_box,origin, fast)
113  print('will write GMM map to',out_fn)
114  IMP.em.write_map(d1,out_fn,IMP.em.MRCReaderWriter())
115  del d1
116 
117 def write_sklearn_gmm_to_map(gmm,out_fn,apix=0,bbox=None,dmap_model=None):
118  """write density map directly from sklearn GMM (kinda slow) """
119  ### create density
120  if not dmap_model is None:
121  d1=IMP.em.create_density_map(dmap_model)
122  else:
123  d1=IMP.em.create_density_map(bbox,apix)
124 
125  ### fill it with values from the GMM
126  print('getting coords')
127  nvox=d1.get_number_of_voxels()
128  apos=[list(d1.get_location_by_voxel(nv)) for nv in range(nvox)]
129 
130  print('scoring')
131  scores=gmm.score(apos)
132 
133  print('assigning')
134  for nv, score in enumerate(scores):
135  d1.set_value(nv,exp(score))
136  print('will write GMM map to',out_fn)
137  IMP.em.write_map(d1,out_fn,IMP.em.MRCReaderWriter())
138 
139 def draw_points(pts,out_fn,trans=IMP.algebra.get_identity_transformation_3d(),
140  use_colors=False):
141  """ given some points (and optional transform), write them to chimera 'bild' format
142  colors flag only applies to ellipses, otherwise it'll be weird"""
143  with open(out_fn,'w') as outf:
144  #print 'will draw',len(pts),'points'
145  # write first point in red
146  pt=trans.get_transformed(IMP.algebra.Vector3D(pts[0]))
147  start=0
148  if use_colors:
149  outf.write('.color 1 0 0\n.dotat %.2f %.2f %.2f\n'
150  %(pt[0],pt[1],pt[2]))
151  start=1
152 
153  # write remaining points in green
154  if use_colors:
155  outf.write('.color 0 1 0\n')
156  colors=['0 1 0','0 0 1','0 1 1']
157  for nt,t in enumerate(pts[start:]):
158  if use_colors and nt%2==0:
159  outf.write('.color %s\n' % colors[nt/2])
160  pt=trans.get_transformed(IMP.algebra.Vector3D(t))
161  outf.write('.dotat %.2f %.2f %.2f\n' %(pt[0],pt[1],pt[2]))
162 
163 
164 
165 def create_gmm_for_bead(mdl,
166  particle,
167  n_components,
168  sampled_points=100000,
169  num_iter=100):
170  print('fitting bead with',n_components,'gaussians')
171  dmap=IMP.em.SampledDensityMap([particle],1.0,1.0,
172  IMP.atom.Mass.get_mass_key(),3,IMP.em.SPHERE)
173  IMP.em.write_map(dmap,'test_intermed.mrc')
174  pts=IMP.isd.sample_points_from_density(dmap,sampled_points)
175  draw_points(pts,'pts.bild')
176  density_particles=[]
177  fit_gmm_to_points(pts,n_components,mdl,
178  density_particles,
179  num_iter,'full',
180  mass_multiplier=IMP.atom.Mass(particle).get_mass())
181  return density_particles,dmap
182 
183 def sample_and_fit_to_particles(model,
184  fragment_particles,
185  num_components,
186  sampled_points=1000000,
187  simulation_res=0.5,
188  voxel_size=1.0,
189  num_iter=100,
190  covariance_type='full',
191  multiply_by_total_mass=True,
192  output_map=None,
193  output_txt=None):
194  density_particles=[]
195  if multiply_by_total_mass:
196  mass_multiplier=sum((IMP.atom.Mass(p).get_mass() for p in set(fragment_particles)))
197  print('add_component_density: will multiply by mass',mass_multiplier)
198 
199  # simulate density from ps, then calculate points to fit
200  print('add_component_density: sampling points')
201  dmap=IMP.em.SampledDensityMap(fragment_particles,simulation_res,voxel_size,
203  dmap.calcRMS()
204  #if not intermediate_map_fn is None:
205  # IMP.em.write_map(dmap,intermediate_map_fn)
206  pts=IMP.isd.sample_points_from_density(dmap,sampled_points)
207 
208  # fit GMM
209  print('add_component_density: fitting GMM to',len(pts),'points')
210  fit_gmm_to_points(points=pts,
211  n_components=num_components,
212  mdl=model,
213  ps=density_particles,
214  num_iter=num_iter,
215  covariance_type=covariance_type,
216  mass_multiplier=mass_multiplier)
217 
218  if not output_txt is None:
219  write_gmm_to_text(density_particles,output_txt)
220  if not output_map is None:
221  write_gmm_to_map(to_draw=density_particles,
222  out_fn=output_map,
223  voxel_size=voxel_size,
224  bounding_box=IMP.em.get_bounding_box(dmap))
225 
226  return density_particles
227 
228 def fit_gmm_to_points(points,
229  n_components,
230  mdl,
231  ps=[],
232  num_iter=100,
233  covariance_type='full',
234  min_covar=0.001,
235  init_centers=[],
236  force_radii=-1.0,
237  force_weight=-1.0,
238  mass_multiplier=1.0):
239  """fit a GMM to some points. Will return the score and the Akaike score.
240  Akaike information criterion for the current model fit. It is a measure
241  of the relative quality of the GMM that takes into account the
242  parsimony and the goodness of the fit.
243  if no particles are provided, they will be created
244 
245  points: list of coordinates (python)
246  n_components: number of gaussians to create
247  mdl: IMP Model
248  ps: list of particles to be decorated. if empty, will add
249  num_iter: number of EM iterations
250  covariance_type: covar type for the gaussians. options: 'full', 'diagonal', 'spherical'
251  min_covar: assign a minimum value to covariance term. That is used to have more spherical
252  shaped gaussians
253  init_centers: initial coordinates of the GMM
254  force_radii: fix the radii (spheres only)
255  force_weight: fix the weights
256  mass_multiplier: multiply the weights of all the gaussians by this value
257  dirichlet: use the DGMM fitting (can reduce number of components, takes longer)
258  """
259 
260 
261  new_sklearn = False
262  try:
263  from sklearn.mixture import GMM
264  except ImportError:
265  from sklearn.mixture import GaussianMixture
266  new_sklearn = True
267 
268  print('creating GMM with n_components',n_components,'n_iter',num_iter,'covar type',covariance_type)
269  if new_sklearn:
270  # aic() calls size() on points, so it needs to be
271  # a numpy array, not a list
272  points = np.array(points)
273  weights_init = precisions_init = None
274  if force_radii != -1.0:
275  print('warning: radii can no longer be forced, but setting '
276  'initial values to ', force_radii)
277  precisions_init = np.array([[1./force_radii]*3
278  for i in range(n_components)])
279  if force_weight != -1.0:
280  print('warning: weights can no longer be forced, but setting '
281  'initial values to ', force_weight)
282  weights_init = np.array([force_weight]*n_components)
283 
284  gmm = GaussianMixture(n_components=n_components,
285  max_iter=num_iter,
286  covariance_type=covariance_type,
287  weights_init=weights_init,
288  precisions_init=precisions_init,
289  means_init=None if init_centers==[]
290  else init_centers)
291  else:
292  params='m'
293  init_params='m'
294  if force_radii==-1.0:
295  params+='c'
296  init_params+='c'
297  else:
298  covariance_type='spherical'
299  print('forcing spherical with radii',force_radii)
300 
301  if force_weight==-1.0:
302  params+='w'
303  init_params+='w'
304  else:
305  print('forcing weights to be',force_weight)
306 
307  gmm = GMM(n_components=n_components, n_iter=num_iter,
308  covariance_type=covariance_type, min_covar=min_covar,
309  params=params, init_params=init_params)
310  if force_weight!=-1.0:
311  gmm.weights_=np.array([force_weight]*n_components)
312  if force_radii!=-1.0:
313  gmm.covars_=np.array([[force_radii]*3 for i in range(n_components)])
314  if init_centers!=[]:
315  gmm.means_=init_centers
316  print('fitting')
317  model=gmm.fit(points)
318  score=gmm.score(points)
319  akaikescore=model.aic(points)
320  #print('>>> GMM score',gmm.score(points))
321 
322  ### convert format to core::Gaussian
323  if new_sklearn:
324  covars = gmm.covariances_
325  else:
326  covars = gmm.covars_
327  for ng in range(n_components):
328  covar=covars[ng]
329  if covar.size==3:
330  covar=np.diag(covar).tolist()
331  else:
332  covar=covar.tolist()
333  center=list(gmm.means_[ng])
334  weight=mass_multiplier*gmm.weights_[ng]
335  if ng>=len(ps):
336  ps.append(IMP.Particle(mdl))
338  g=IMP.core.Gaussian.setup_particle(ps[ng],shape)
339  IMP.atom.Mass.setup_particle(ps[ng],weight)
340  IMP.core.XYZR.setup_particle(ps[ng],sqrt(max(g.get_variances())))
341 
342  return (score,akaikescore)
343 
345  n_components,
346  mdl,
347  ps=[],
348  num_iter=100,
349  covariance_type='full',
350  mass_multiplier=1.0):
351  """fit a GMM to some points. Will return core::Gaussians.
352  if no particles are provided, they will be created
353 
354  points: list of coordinates (python)
355  n_components: number of gaussians to create
356  mdl: IMP Model
357  ps: list of particles to be decorated. if empty, will add
358  num_iter: number of EM iterations
359  covariance_type: covar type for the gaussians. options: 'full', 'diagonal', 'spherical'
360  init_centers: initial coordinates of the GMM
361  force_radii: fix the radii (spheres only)
362  force_weight: fix the weights
363  mass_multiplier: multiply the weights of all the gaussians by this value
364  """
365 
366 
367  new_sklearn = True
368  try:
369  from sklearn.mixture import BayesianGaussianMixture
370  except ImportError:
371  from sklearn.mixture import DPGMM
372  new_sklearn = False
373 
374  ### create and fit GMM
375  print('using dirichlet prior')
376  if new_sklearn:
377  gmm = BayesianGaussianMixture(
378  weight_concentration_prior_type='dirichlet_process',
379  n_components=n_components, max_iter=num_iter,
380  covariance_type=covariance_type)
381  else:
382  gmm = DPGMM(n_components=n_components, n_iter=num_iter,
383  covariance_type=covariance_type)
384 
385  gmm.fit(points)
386 
387  #print('>>> GMM score',gmm.score(points))
388 
389  #print gmm.covars_
390  #print gmm.weights_
391  #print gmm.means_
392  ### convert format to core::Gaussian
393  if not new_sklearn:
394  gmm.precisions_ = gmm.precs_
395  for ng in range(n_components):
396  invcovar=gmm.precisions_[ng]
397  covar=np.linalg.inv(invcovar)
398  if covar.size==3:
399  covar=np.diag(covar).tolist()
400  else:
401  covar=covar.tolist()
402  center=list(gmm.means_[ng])
403  weight=mass_multiplier*gmm.weights_[ng]
404  if ng>=len(ps):
405  ps.append(IMP.Particle(mdl))
407  g=IMP.core.Gaussian.setup_particle(ps[ng],shape)
408  IMP.atom.Mass.setup_particle(ps[ng],weight)
409  IMP.core.XYZR.setup_particle(ps[ng],sqrt(max(g.get_variances())))
Add mass to a particle.
Definition: Mass.h:23
def draw_points
given some points (and optional transform), write them to chimera 'bild' format colors flag only appl...
Definition: gmm_tools.py:139
static Gaussian setup_particle(Model *m, ParticleIndex pi)
Definition: core/Gaussian.h:65
static FloatKey get_mass_key()
DenseGrid3D< double > get_rasterized(const Gaussian3Ds &gmm, const Floats &weights, double cell_width, const BoundingBox3D &bb)
Rasterize the Gaussians to a grid.
BoundingBoxD< 3 > get_bounding_box(const Geometry &)
Compute the bounding box of any geometric object.
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
double get_mass(ResidueType c)
Get the mass from the residue type.
def fit_dirichlet_gmm_to_points
fit a GMM to some points.
Definition: gmm_tools.py:344
Gaussian3D get_gaussian_from_covariance(const Eigen::Matrix3d &covariance, const Vector3D &center)
Return a Gaussian centered at the origin from a covariance matrix.
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Mass.h:34
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: XYZR.h:47
Class for sampling a density map from particles.
def fit_gmm_to_points
fit a GMM to some points.
Definition: gmm_tools.py:228
The standard decorator for manipulating molecular structures.
DensityMap * create_density_map(const IMP::algebra::GridD< 3, S, V, E > &arg)
Create a density map from an arbitrary IMP::algebra::GridD.
Definition: DensityMap.h:674
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
static Mass setup_particle(Model *m, ParticleIndex pi, Float mass)
Definition: Mass.h:48
Basic utilities for handling cryo-electron microscopy 3D density maps.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
def write_gmm_to_map
write density map from GMM.
Definition: gmm_tools.py:110
algebra::BoundingBoxD< 3 > get_bounding_box(const DensityMap *m)
Definition: DensityMap.h:505
Basic functionality that is expected to be used by a wide variety of IMP users.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Eigen::Matrix3d get_covariance(const Gaussian3D &g)
VectorD< 3 > Vector3D
Definition: VectorD.h:425
Class to handle individual particles of a Model object.
Definition: Particle.h:43
Sphere3D get_enclosing_sphere(const Vector3Ds &ss)
Return a sphere containing the listed vectors.
DenseGrid3D< double > get_rasterized_fast(const Gaussian3Ds &gmm, const Floats &weights, double cell_width, const BoundingBox3D &bb, double factor=2.5)
Rasterize the Gaussians to a grid.
A decorator for helping deal with a generalized hierarchy.
def write_sklearn_gmm_to_map
write density map directly from sklearn GMM (kinda slow)
Definition: gmm_tools.py:117
A decorator for a rigid body.
Definition: rigid_bodies.h:82
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: core/Gaussian.h:64
def write_gmm_to_text
write a list of gaussians to text.
Definition: gmm_tools.py:64
Functionality for loading, creating, manipulating and scoring atomic structures.
def decorate_gmm_from_text
read the output from write_gmm_to_text, decorate as Gaussian and Mass
Definition: gmm_tools.py:23
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27