IMP logo
IMP Reference Guide  2.12.0
The Integrative Modeling Platform
gmm_tools.py
1 """@namespace IMP.isd.gmm_tools
2  Tools for handling Gaussian Mixture Models.
3 """
4 
5 from __future__ import print_function
6 import IMP
7 import IMP.core
8 import IMP.algebra
9 import IMP.atom
10 import IMP.em
11 import numpy as np
12 import numpy.linalg
13 import sys,os
14 
15 try:
16  import sklearn.mixture
17  nosklearn=False
18 except:
19  nosklearn=True
20 from math import exp,sqrt,copysign
21 
23  ps,
24  mdl,
25  transform=None,
26  radius_scale=1.0,
27  mass_scale=1.0):
28  """ read the output from write_gmm_to_text, decorate as Gaussian and Mass"""
29  ncomp=0
30  with open(in_fn,'r') as inf:
31  for l in inf:
32  if l[0]!='#':
33  if ncomp>len(ps)-1:
34  ps.append(IMP.Particle(mdl))
35  p = ps[ncomp]
36  fields=l.split('|')
37  weight=float(fields[2])
38  center=list(map(float,fields[3].split()))
39  covar=np.array(list(map(float,
40  fields[4].split()))).reshape((3,3))
41  #print('on particle',ncomp)
42  shape=IMP.algebra.get_gaussian_from_covariance(covar.tolist(),
43  IMP.algebra.Vector3D(center))
45  g = IMP.core.Gaussian.setup_particle(ps[ncomp],shape)
46  else:
47  g = IMP.core.Gaussian(ps[ncomp])
48  g.set_gaussian(shape)
50  IMP.atom.Mass.setup_particle(p,weight*mass_scale)
51  else:
52  IMP.atom.Mass(p).set_mass(weight*mass_scale)
53  rmax = sqrt(max(g.get_variances()))*radius_scale
54  if not IMP.core.XYZR.get_is_setup(ps[ncomp]):
55  IMP.core.XYZR.setup_particle(ps[ncomp],rmax)
56  else:
57  IMP.core.XYZR(ps[ncomp]).set_radius(rmax)
58  if not transform is None:
59  IMP.core.transform(IMP.core.RigidBody(ps[ncomp]),transform)
60  ncomp+=1
61 
62 def write_gmm_to_text(ps,out_fn, comments=[]):
63  """write a list of gaussians to text. must be decorated as Gaussian and Mass"""
64  print('will write GMM text to',out_fn)
65  with open(out_fn,'w') as outf:
66  for comment in comments:
67  outf.write('# ' + comment + '\n')
68  outf.write('#|num|weight|mean|covariance matrix|\n')
69  for ng,g in enumerate(ps):
70  shape=IMP.core.Gaussian(g).get_gaussian()
71  weight=IMP.atom.Mass(g).get_mass()
72  covar=[c for row in IMP.algebra.get_covariance(shape) for c in row]
73  mean=list(shape.get_center())
74  fm=[ng,weight]+mean+covar
75  try:
76  #python 2.7 format
77  outf.write('|{}|{}|{} {} {}|{} {} {} {} {} {} {} {} {}|\n'.format(*fm))
78  except ValueError:
79  #python 2.6 and below
80  outf.write('|{0}|{1}|{2} {3} {4}|{5} {6} {7} {8} {9} {10} {11} {12} {13}|\n'.format(*fm))
81 
82 def gmm2map(to_draw,voxel_size,bounding_box=None,origin=None, fast=False, factor=2.5):
83  if type(to_draw[0]) in (IMP.Particle,IMP.atom.Hierarchy,IMP.core.Hierarchy):
84  ps=to_draw
85  elif type(to_draw[0])==IMP.core.Gaussian:
86  ps=[g.get_particle() for g in to_draw]
87  else:
88  print('ps must be Particles or Gaussians')
89  return
90  if bounding_box is None:
91  if len(ps)>1:
92  s=IMP.algebra.get_enclosing_sphere([IMP.core.XYZ(p).get_coordinates() for p in ps])
93  s2=IMP.algebra.Sphere3D(s.get_center(),s.get_radius()*3)
94  else:
95  g=IMP.core.Gaussian(ps[0]).get_gaussian()
96  s2=IMP.algebra.Sphere3D(g.get_center(),max(g.get_variances())*3)
97  bounding_box=IMP.algebra.get_bounding_box(s2)
98  shapes=[]
99  weights=[]
100  for p in ps:
101  shapes.append(IMP.core.Gaussian(p).get_gaussian())
102  weights.append(IMP.atom.Mass(p).get_mass())
103  print('rasterizing')
104  if fast:
105  grid=IMP.algebra.get_rasterized_fast(shapes,weights,voxel_size,bounding_box,factor)
106  else:
107  grid=IMP.algebra.get_rasterized(shapes,weights,voxel_size,bounding_box)
108  print('creating map')
110  if origin is not None:
111  d1.set_origin(origin)
112  return d1
113 def write_gmm_to_map(to_draw,out_fn,voxel_size,bounding_box=None,origin=None, fast=False, factor=2.5):
114  """write density map from GMM. input can be either particles or gaussians"""
115  d1 = gmm2map(to_draw,voxel_size,bounding_box,origin, fast)
116  print('will write GMM map to',out_fn)
117  IMP.em.write_map(d1,out_fn,IMP.em.MRCReaderWriter())
118  del d1
119 
120 def write_sklearn_gmm_to_map(gmm,out_fn,apix=0,bbox=None,dmap_model=None):
121  """write density map directly from sklearn GMM (kinda slow) """
122  ### create density
123  if not dmap_model is None:
124  d1=IMP.em.create_density_map(dmap_model)
125  else:
126  d1=IMP.em.create_density_map(bbox,apix)
127 
128  ### fill it with values from the GMM
129  print('getting coords')
130  nvox=d1.get_number_of_voxels()
131  apos=[list(d1.get_location_by_voxel(nv)) for nv in range(nvox)]
132 
133  print('scoring')
134  scores=gmm.score(apos)
135 
136  print('assigning')
137  for nv, score in enumerate(scores):
138  d1.set_value(nv,exp(score))
139  print('will write GMM map to',out_fn)
140  IMP.em.write_map(d1,out_fn,IMP.em.MRCReaderWriter())
141 
142 def draw_points(pts,out_fn,trans=IMP.algebra.get_identity_transformation_3d(),
143  use_colors=False):
144  """ given some points (and optional transform), write them to chimera 'bild' format
145  colors flag only applies to ellipses, otherwise it'll be weird"""
146  with open(out_fn,'w') as outf:
147  #print 'will draw',len(pts),'points'
148  # write first point in red
149  pt=trans.get_transformed(IMP.algebra.Vector3D(pts[0]))
150  start=0
151  if use_colors:
152  outf.write('.color 1 0 0\n.dotat %.2f %.2f %.2f\n'
153  %(pt[0],pt[1],pt[2]))
154  start=1
155 
156  # write remaining points in green
157  if use_colors:
158  outf.write('.color 0 1 0\n')
159  colors=['0 1 0','0 0 1','0 1 1']
160  for nt,t in enumerate(pts[start:]):
161  if use_colors and nt%2==0:
162  outf.write('.color %s\n' % colors[nt/2])
163  pt=trans.get_transformed(IMP.algebra.Vector3D(t))
164  outf.write('.dotat %.2f %.2f %.2f\n' %(pt[0],pt[1],pt[2]))
165 
166 
167 
168 def create_gmm_for_bead(mdl,
169  particle,
170  n_components,
171  sampled_points=100000,
172  num_iter=100):
173  print('fitting bead with',n_components,'gaussians')
174  dmap=IMP.em.SampledDensityMap([particle],1.0,1.0,
175  IMP.atom.Mass.get_mass_key(),3,IMP.em.SPHERE)
176  IMP.em.write_map(dmap,'test_intermed.mrc')
177  pts=IMP.isd.sample_points_from_density(dmap,sampled_points)
178  draw_points(pts,'pts.bild')
179  density_particles=[]
180  fit_gmm_to_points(pts,n_components,mdl,
181  density_particles,
182  num_iter,'full',
183  mass_multiplier=IMP.atom.Mass(particle).get_mass())
184  return density_particles,dmap
185 
186 def sample_and_fit_to_particles(model,
187  fragment_particles,
188  num_components,
189  sampled_points=1000000,
190  simulation_res=0.5,
191  voxel_size=1.0,
192  num_iter=100,
193  covariance_type='full',
194  multiply_by_total_mass=True,
195  output_map=None,
196  output_txt=None):
197  density_particles=[]
198  if multiply_by_total_mass:
199  mass_multiplier=sum((IMP.atom.Mass(p).get_mass() for p in set(fragment_particles)))
200  print('add_component_density: will multiply by mass',mass_multiplier)
201 
202  # simulate density from ps, then calculate points to fit
203  print('add_component_density: sampling points')
204  dmap=IMP.em.SampledDensityMap(fragment_particles,simulation_res,voxel_size,
206  dmap.calcRMS()
207  #if not intermediate_map_fn is None:
208  # IMP.em.write_map(dmap,intermediate_map_fn)
209  pts=IMP.isd.sample_points_from_density(dmap,sampled_points)
210 
211  # fit GMM
212  print('add_component_density: fitting GMM to',len(pts),'points')
213  fit_gmm_to_points(points=pts,
214  n_components=num_components,
215  mdl=model,
216  ps=density_particles,
217  num_iter=num_iter,
218  covariance_type=covariance_type,
219  mass_multiplier=mass_multiplier)
220 
221  if not output_txt is None:
222  write_gmm_to_text(density_particles,output_txt)
223  if not output_map is None:
224  write_gmm_to_map(to_draw=density_particles,
225  out_fn=output_map,
226  voxel_size=voxel_size,
227  bounding_box=IMP.em.get_bounding_box(dmap))
228 
229  return density_particles
230 
231 def fit_gmm_to_points(points,
232  n_components,
233  mdl,
234  ps=[],
235  num_iter=100,
236  covariance_type='full',
237  min_covar=0.001,
238  init_centers=[],
239  force_radii=-1.0,
240  force_weight=-1.0,
241  mass_multiplier=1.0):
242  """fit a GMM to some points. Will return the score and the Akaike score.
243  Akaike information criterion for the current model fit. It is a measure
244  of the relative quality of the GMM that takes into account the
245  parsimony and the goodness of the fit.
246  if no particles are provided, they will be created
247 
248  points: list of coordinates (python)
249  n_components: number of gaussians to create
250  mdl: IMP Model
251  ps: list of particles to be decorated. if empty, will add
252  num_iter: number of EM iterations
253  covariance_type: covar type for the gaussians. options: 'full', 'diagonal', 'spherical'
254  min_covar: assign a minimum value to covariance term. That is used to have more spherical
255  shaped gaussians
256  init_centers: initial coordinates of the GMM
257  force_radii: fix the radii (spheres only)
258  force_weight: fix the weights
259  mass_multiplier: multiply the weights of all the gaussians by this value
260  dirichlet: use the DGMM fitting (can reduce number of components, takes longer)
261  """
262 
263 
264  new_sklearn = False
265  try:
266  from sklearn.mixture import GMM
267  except ImportError:
268  from sklearn.mixture import GaussianMixture
269  new_sklearn = True
270 
271  print('creating GMM with n_components',n_components,'n_iter',num_iter,'covar type',covariance_type)
272  if new_sklearn:
273  # aic() calls size() on points, so it needs to be
274  # a numpy array, not a list
275  points = np.array(points)
276  weights_init = precisions_init = None
277  if force_radii != -1.0:
278  print('warning: radii can no longer be forced, but setting '
279  'initial values to ', force_radii)
280  precisions_init = np.array([[1./force_radii]*3
281  for i in range(n_components)])
282  if force_weight != -1.0:
283  print('warning: weights can no longer be forced, but setting '
284  'initial values to ', force_weight)
285  weights_init = np.array([force_weight]*n_components)
286 
287  gmm = GaussianMixture(n_components=n_components,
288  max_iter=num_iter,
289  covariance_type=covariance_type,
290  weights_init=weights_init,
291  precisions_init=precisions_init,
292  means_init=None if init_centers==[]
293  else init_centers)
294  else:
295  params='m'
296  init_params='m'
297  if force_radii==-1.0:
298  params+='c'
299  init_params+='c'
300  else:
301  covariance_type='spherical'
302  print('forcing spherical with radii',force_radii)
303 
304  if force_weight==-1.0:
305  params+='w'
306  init_params+='w'
307  else:
308  print('forcing weights to be',force_weight)
309 
310  gmm = GMM(n_components=n_components, n_iter=num_iter,
311  covariance_type=covariance_type, min_covar=min_covar,
312  params=params, init_params=init_params)
313  if force_weight!=-1.0:
314  gmm.weights_=np.array([force_weight]*n_components)
315  if force_radii!=-1.0:
316  gmm.covars_=np.array([[force_radii]*3 for i in range(n_components)])
317  if init_centers!=[]:
318  gmm.means_=init_centers
319  print('fitting')
320  model=gmm.fit(points)
321  score=gmm.score(points)
322  akaikescore=model.aic(points)
323  #print('>>> GMM score',gmm.score(points))
324 
325  ### convert format to core::Gaussian
326  if new_sklearn:
327  covars = gmm.covariances_
328  else:
329  covars = gmm.covars_
330  for ng in range(n_components):
331  covar=covars[ng]
332  if covar.size==3:
333  covar=np.diag(covar).tolist()
334  else:
335  covar=covar.tolist()
336  center=list(gmm.means_[ng])
337  weight=mass_multiplier*gmm.weights_[ng]
338  if ng>=len(ps):
339  ps.append(IMP.Particle(mdl))
341  g=IMP.core.Gaussian.setup_particle(ps[ng],shape)
342  IMP.atom.Mass.setup_particle(ps[ng],weight)
343  IMP.core.XYZR.setup_particle(ps[ng],sqrt(max(g.get_variances())))
344 
345  return (score,akaikescore)
346 
348  n_components,
349  mdl,
350  ps=[],
351  num_iter=100,
352  covariance_type='full',
353  mass_multiplier=1.0):
354  """fit a GMM to some points. Will return core::Gaussians.
355  if no particles are provided, they will be created
356 
357  points: list of coordinates (python)
358  n_components: number of gaussians to create
359  mdl: IMP Model
360  ps: list of particles to be decorated. if empty, will add
361  num_iter: number of EM iterations
362  covariance_type: covar type for the gaussians. options: 'full', 'diagonal', 'spherical'
363  init_centers: initial coordinates of the GMM
364  force_radii: fix the radii (spheres only)
365  force_weight: fix the weights
366  mass_multiplier: multiply the weights of all the gaussians by this value
367  """
368 
369 
370  new_sklearn = True
371  try:
372  from sklearn.mixture import BayesianGaussianMixture
373  except ImportError:
374  from sklearn.mixture import DPGMM
375  new_sklearn = False
376 
377  ### create and fit GMM
378  print('using dirichlet prior')
379  if new_sklearn:
380  gmm = BayesianGaussianMixture(
381  weight_concentration_prior_type='dirichlet_process',
382  n_components=n_components, max_iter=num_iter,
383  covariance_type=covariance_type)
384  else:
385  gmm = DPGMM(n_components=n_components, n_iter=num_iter,
386  covariance_type=covariance_type)
387 
388  gmm.fit(points)
389 
390  #print('>>> GMM score',gmm.score(points))
391 
392  #print gmm.covars_
393  #print gmm.weights_
394  #print gmm.means_
395  ### convert format to core::Gaussian
396  if not new_sklearn:
397  gmm.precisions_ = gmm.precs_
398  for ng in range(n_components):
399  invcovar=gmm.precisions_[ng]
400  covar=np.linalg.inv(invcovar)
401  if covar.size==3:
402  covar=np.diag(covar).tolist()
403  else:
404  covar=covar.tolist()
405  center=list(gmm.means_[ng])
406  weight=mass_multiplier*gmm.weights_[ng]
407  if ng>=len(ps):
408  ps.append(IMP.Particle(mdl))
410  g=IMP.core.Gaussian.setup_particle(ps[ng],shape)
411  IMP.atom.Mass.setup_particle(ps[ng],weight)
412  IMP.core.XYZR.setup_particle(ps[ng],sqrt(max(g.get_variances())))
Add mass to a particle.
Definition: Mass.h:23
def draw_points
given some points (and optional transform), write them to chimera 'bild' format colors flag only appl...
Definition: gmm_tools.py:142
static Gaussian setup_particle(Model *m, ParticleIndex pi)
Definition: core/Gaussian.h:48
static FloatKey get_mass_key()
DenseGrid3D< double > get_rasterized(const Gaussian3Ds &gmm, const Floats &weights, double cell_width, const BoundingBox3D &bb)
Rasterize the Gaussians to a grid.
BoundingBoxD< 3 > get_bounding_box(const Geometry &)
Compute the bounding box of any geometric object.
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
double get_mass(ResidueType c)
Get the mass from the residue type.
def fit_dirichlet_gmm_to_points
fit a GMM to some points.
Definition: gmm_tools.py:347
Gaussian3D get_gaussian_from_covariance(const Eigen::Matrix3d &covariance, const Vector3D &center)
Return a Gaussian centered at the origin from a covariance matrix.
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Mass.h:34
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: XYZR.h:47
Class for sampling a density map from particles.
def fit_gmm_to_points
fit a GMM to some points.
Definition: gmm_tools.py:231
The standard decorator for manipulating molecular structures.
DensityMap * create_density_map(const IMP::algebra::GridD< 3, S, V, E > &arg)
Create a density map from an arbitrary IMP::algebra::GridD.
Definition: DensityMap.h:633
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
static Mass setup_particle(Model *m, ParticleIndex pi, Float mass)
Definition: Mass.h:48
Basic utilities for handling cryo-electron microscopy 3D density maps.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
def write_gmm_to_map
write density map from GMM.
Definition: gmm_tools.py:113
algebra::BoundingBoxD< 3 > get_bounding_box(const DensityMap *m)
Definition: DensityMap.h:464
Basic functionality that is expected to be used by a wide variety of IMP users.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Eigen::Matrix3d get_covariance(const Gaussian3D &g)
VectorD< 3 > Vector3D
Definition: VectorD.h:421
Class to handle individual particles of a Model object.
Definition: Particle.h:41
Sphere3D get_enclosing_sphere(const Vector3Ds &ss)
Return a sphere containing the listed vectors.
DenseGrid3D< double > get_rasterized_fast(const Gaussian3Ds &gmm, const Floats &weights, double cell_width, const BoundingBox3D &bb, double factor=2.5)
Rasterize the Gaussians to a grid.
A decorator for helping deal with a generalized hierarchy.
def write_sklearn_gmm_to_map
write density map directly from sklearn GMM (kinda slow)
Definition: gmm_tools.py:120
A decorator for a rigid body.
Definition: rigid_bodies.h:82
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: core/Gaussian.h:47
def write_gmm_to_text
write a list of gaussians to text.
Definition: gmm_tools.py:62
Functionality for loading, creating, manipulating and scoring atomic structures.
def decorate_gmm_from_text
read the output from write_gmm_to_text, decorate as Gaussian and Mass
Definition: gmm_tools.py:22
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27