IMP logo
IMP Reference Guide  2.8.0
The Integrative Modeling Platform
gmm_tools.py
1 """@namespace IMP.isd.gmm_tools
2  Tools for handling Gaussian Mixture Models.
3 """
4 
5 from __future__ import print_function
6 import IMP
7 import IMP.core
8 import IMP.algebra
9 import IMP.atom
10 import IMP.em
11 import numpy as np
12 import numpy.linalg
13 import sys,os
14 
15 try:
16  import sklearn.mixture
17  nosklearn=False
18 except:
19  nosklearn=True
20 from math import exp,sqrt,copysign
21 
23  ps,
24  mdl,
25  transform=None,
26  radius_scale=1.0,
27  mass_scale=1.0):
28  """ read the output from write_gmm_to_text, decorate as Gaussian and Mass"""
29  ncomp=0
30  with open(in_fn,'r') as inf:
31  for l in inf:
32  if l[0]!='#':
33  if ncomp>len(ps)-1:
34  ps.append(IMP.Particle(mdl))
35  p = ps[ncomp]
36  fields=l.split('|')
37  weight=float(fields[2])
38  center=list(map(float,fields[3].split()))
39  covar=np.array(list(map(float,
40  fields[4].split()))).reshape((3,3))
41  #print('on particle',ncomp)
42  shape=IMP.algebra.get_gaussian_from_covariance(covar.tolist(),
43  IMP.algebra.Vector3D(center))
45  g = IMP.core.Gaussian.setup_particle(ps[ncomp],shape)
46  else:
47  g = IMP.core.Gaussian(ps[ncomp])
48  g.set_gaussian(shape)
50  IMP.atom.Mass.setup_particle(p,weight*mass_scale)
51  else:
52  IMP.atom.Mass(p).set_mass(weight*mass_scale)
53  rmax = sqrt(max(g.get_variances()))*radius_scale
54  if not IMP.core.XYZR.get_is_setup(ps[ncomp]):
55  IMP.core.XYZR.setup_particle(ps[ncomp],rmax)
56  else:
57  IMP.core.XYZR(ps[ncomp]).set_radius(rmax)
58  if not transform is None:
59  IMP.core.transform(IMP.core.RigidBody(ps[ncomp]),transform)
60  ncomp+=1
61 
62 def write_gmm_to_text(ps,out_fn):
63  """write a list of gaussians to text. must be decorated as Gaussian and Mass"""
64  print('will write GMM text to',out_fn)
65  with open(out_fn,'w') as outf:
66  outf.write('#|num|weight|mean|covariance matrix|\n')
67  for ng,g in enumerate(ps):
68  shape=IMP.core.Gaussian(g).get_gaussian()
69  weight=IMP.atom.Mass(g).get_mass()
70  covar=[c for row in IMP.algebra.get_covariance(shape) for c in row]
71  mean=list(shape.get_center())
72  fm=[ng,weight]+mean+covar
73  try:
74  #python 2.7 format
75  outf.write('|{}|{}|{} {} {}|{} {} {} {} {} {} {} {} {}|\n'.format(*fm))
76  except ValueError:
77  #python 2.6 and below
78  outf.write('|{0}|{1}|{2} {3} {4}|{5} {6} {7} {8} {9} {10} {11} {12} {13}|\n'.format(*fm))
79 
80 def gmm2map(to_draw,voxel_size,bounding_box=None,origin=None, fast=False, factor=2.5):
81  if type(to_draw[0]) in (IMP.Particle,IMP.atom.Hierarchy,IMP.core.Hierarchy):
82  ps=to_draw
83  elif type(to_draw[0])==IMP.core.Gaussian:
84  ps=[g.get_particle() for g in to_draw]
85  else:
86  print('ps must be Particles or Gaussians')
87  return
88  if bounding_box is None:
89  if len(ps)>1:
90  s=IMP.algebra.get_enclosing_sphere([IMP.core.XYZ(p).get_coordinates() for p in ps])
91  s2=IMP.algebra.Sphere3D(s.get_center(),s.get_radius()*3)
92  else:
93  g=IMP.core.Gaussian(ps[0]).get_gaussian()
94  s2=IMP.algebra.Sphere3D(g.get_center(),max(g.get_variances())*3)
95  bounding_box=IMP.algebra.get_bounding_box(s2)
96  shapes=[]
97  weights=[]
98  for p in ps:
99  shapes.append(IMP.core.Gaussian(p).get_gaussian())
100  weights.append(IMP.atom.Mass(p).get_mass())
101  print('rasterizing')
102  if fast:
103  grid=IMP.algebra.get_rasterized_fast(shapes,weights,voxel_size,bounding_box,factor)
104  else:
105  grid=IMP.algebra.get_rasterized(shapes,weights,voxel_size,bounding_box)
106  print('creating map')
108  if origin is not None:
109  d1.set_origin(origin)
110  return d1
111 def write_gmm_to_map(to_draw,out_fn,voxel_size,bounding_box=None,origin=None, fast=False, factor=2.5):
112  """write density map from GMM. input can be either particles or gaussians"""
113  d1 = gmm2map(to_draw,voxel_size,bounding_box,origin, fast)
114  print('will write GMM map to',out_fn)
115  IMP.em.write_map(d1,out_fn,IMP.em.MRCReaderWriter())
116  del d1
117 
118 def write_sklearn_gmm_to_map(gmm,out_fn,apix=0,bbox=None,dmap_model=None):
119  """write density map directly from sklearn GMM (kinda slow) """
120  ### create density
121  if not dmap_model is None:
122  d1=IMP.em.create_density_map(dmap_model)
123  else:
124  d1=IMP.em.create_density_map(bbox,apix)
125 
126  ### fill it with values from the GMM
127  print('getting coords')
128  nvox=d1.get_number_of_voxels()
129  apos=[list(d1.get_location_by_voxel(nv)) for nv in range(nvox)]
130 
131  print('scoring')
132  scores=gmm.score(apos)
133 
134  print('assigning')
135  for nv, score in enumerate(scores):
136  d1.set_value(nv,exp(score))
137  print('will write GMM map to',out_fn)
138  IMP.em.write_map(d1,out_fn,IMP.em.MRCReaderWriter())
139 
140 def draw_points(pts,out_fn,trans=IMP.algebra.get_identity_transformation_3d(),
141  use_colors=False):
142  """ given some points (and optional transform), write them to chimera 'bild' format
143  colors flag only applies to ellipses, otherwise it'll be weird"""
144  with open(out_fn,'w') as outf:
145  #print 'will draw',len(pts),'points'
146  # write first point in red
147  pt=trans.get_transformed(IMP.algebra.Vector3D(pts[0]))
148  start=0
149  if use_colors:
150  outf.write('.color 1 0 0\n.dotat %.2f %.2f %.2f\n'
151  %(pt[0],pt[1],pt[2]))
152  start=1
153 
154  # write remaining points in green
155  if use_colors:
156  outf.write('.color 0 1 0\n')
157  colors=['0 1 0','0 0 1','0 1 1']
158  for nt,t in enumerate(pts[start:]):
159  if use_colors and nt%2==0:
160  outf.write('.color %s\n' % colors[nt/2])
161  pt=trans.get_transformed(IMP.algebra.Vector3D(t))
162  outf.write('.dotat %.2f %.2f %.2f\n' %(pt[0],pt[1],pt[2]))
163 
164 
165 
166 def create_gmm_for_bead(mdl,
167  particle,
168  n_components,
169  sampled_points=100000,
170  num_iter=100):
171  print('fitting bead with',n_components,'gaussians')
172  dmap=IMP.em.SampledDensityMap([particle],1.0,1.0,
173  IMP.atom.Mass.get_mass_key(),3,IMP.em.SPHERE)
174  IMP.em.write_map(dmap,'test_intermed.mrc')
175  pts=IMP.isd.sample_points_from_density(dmap,sampled_points)
176  draw_points(pts,'pts.bild')
177  density_particles=[]
178  fit_gmm_to_points(pts,n_components,mdl,
179  density_particles,
180  num_iter,'full',
181  mass_multiplier=IMP.atom.Mass(particle).get_mass())
182  return density_particles,dmap
183 
184 def sample_and_fit_to_particles(model,
185  fragment_particles,
186  num_components,
187  sampled_points=1000000,
188  simulation_res=0.5,
189  voxel_size=1.0,
190  num_iter=100,
191  covariance_type='full',
192  multiply_by_total_mass=True,
193  output_map=None,
194  output_txt=None):
195  density_particles=[]
196  if multiply_by_total_mass:
197  mass_multiplier=sum((IMP.atom.Mass(p).get_mass() for p in set(fragment_particles)))
198  print('add_component_density: will multiply by mass',mass_multiplier)
199 
200  # simulate density from ps, then calculate points to fit
201  print('add_component_density: sampling points')
202  dmap=IMP.em.SampledDensityMap(fragment_particles,simulation_res,voxel_size,
204  dmap.calcRMS()
205  #if not intermediate_map_fn is None:
206  # IMP.em.write_map(dmap,intermediate_map_fn)
207  pts=IMP.isd.sample_points_from_density(dmap,sampled_points)
208 
209  # fit GMM
210  print('add_component_density: fitting GMM to',len(pts),'points')
211  fit_gmm_to_points(points=pts,
212  n_components=num_components,
213  mdl=model,
214  ps=density_particles,
215  num_iter=num_iter,
216  covariance_type=covariance_type,
217  mass_multiplier=mass_multiplier)
218 
219  if not output_txt is None:
220  write_gmm_to_text(density_particles,output_txt)
221  if not output_map is None:
222  write_gmm_to_map(to_draw=density_particles,
223  out_fn=output_map,
224  voxel_size=voxel_size,
225  bounding_box=IMP.em.get_bounding_box(dmap))
226 
227  return density_particles
228 
229 def fit_gmm_to_points(points,
230  n_components,
231  mdl,
232  ps=[],
233  num_iter=100,
234  covariance_type='full',
235  min_covar=0.001,
236  init_centers=[],
237  force_radii=-1.0,
238  force_weight=-1.0,
239  mass_multiplier=1.0):
240  """fit a GMM to some points. Will return the score and the Akaike score.
241  Akaike information criterion for the current model fit. It is a measure
242  of the relative quality of the GMM that takes into account the
243  parsimony and the goodness of the fit.
244  if no particles are provided, they will be created
245 
246  points: list of coordinates (python)
247  n_components: number of gaussians to create
248  mdl: IMP Model
249  ps: list of particles to be decorated. if empty, will add
250  num_iter: number of EM iterations
251  covariance_type: covar type for the gaussians. options: 'full', 'diagonal', 'spherical'
252  min_covar: assign a minimum value to covariance term. That is used to have more spherical
253  shaped gaussians
254  init_centers: initial coordinates of the GMM
255  force_radii: fix the radii (spheres only)
256  force_weight: fix the weights
257  mass_multiplier: multiply the weights of all the gaussians by this value
258  dirichlet: use the DGMM fitting (can reduce number of components, takes longer)
259  """
260 
261 
262  import sklearn.mixture
263 
264  params='m'
265  init_params='m'
266  if force_radii==-1.0:
267  params+='c'
268  init_params+='c'
269  else:
270  covariance_type='spherical'
271  print('forcing spherical with radii',force_radii)
272 
273  if force_weight==-1.0:
274  params+='w'
275  init_params+='w'
276  else:
277  print('forcing weights to be',force_weight)
278 
279  print('creating GMM with params',params,'init params',init_params,'n_components',n_components,'n_iter',num_iter,'covar type',covariance_type)
280  gmm=sklearn.mixture.GMM(n_components=n_components,
281  n_iter=num_iter,
282  covariance_type=covariance_type,
283  min_covar=min_covar,
284  params=params,
285  init_params=init_params)
286 
287  if force_weight!=-1.0:
288  gmm.weights_=np.array([force_weight]*n_components)
289  if force_radii!=-1.0:
290  gmm.covars_=np.array([[force_radii]*3 for i in range(n_components)])
291  if init_centers!=[]:
292  gmm.means_=init_centers
293  print('fitting')
294  model=gmm.fit(points)
295  score=gmm.score(points)
296  akaikescore=model.aic(points)
297  #print('>>> GMM score',gmm.score(points))
298 
299  ### convert format to core::Gaussian
300  for ng in range(n_components):
301  covar=gmm.covars_[ng]
302  if covar.size==3:
303  covar=np.diag(covar).tolist()
304  else:
305  covar=covar.tolist()
306  center=list(gmm.means_[ng])
307  weight=mass_multiplier*gmm.weights_[ng]
308  if ng>=len(ps):
309  ps.append(IMP.Particle(mdl))
311  g=IMP.core.Gaussian.setup_particle(ps[ng],shape)
312  IMP.atom.Mass.setup_particle(ps[ng],weight)
313  IMP.core.XYZR.setup_particle(ps[ng],sqrt(max(g.get_variances())))
314 
315  return (score,akaikescore)
316 
318  n_components,
319  mdl,
320  ps=[],
321  num_iter=100,
322  covariance_type='full',
323  mass_multiplier=1.0):
324  """fit a GMM to some points. Will return core::Gaussians.
325  if no particles are provided, they will be created
326 
327  points: list of coordinates (python)
328  n_components: number of gaussians to create
329  mdl: IMP Model
330  ps: list of particles to be decorated. if empty, will add
331  num_iter: number of EM iterations
332  covariance_type: covar type for the gaussians. options: 'full', 'diagonal', 'spherical'
333  init_centers: initial coordinates of the GMM
334  force_radii: fix the radii (spheres only)
335  force_weight: fix the weights
336  mass_multiplier: multiply the weights of all the gaussians by this value
337  """
338 
339 
340  import sklearn.mixture
341 
342  ### create and fit GMM
343  print('using dirichlet prior')
344  gmm=sklearn.mixture.DPGMM(n_components=n_components,
345  n_iter=num_iter,
346  covariance_type=covariance_type)
347 
348  gmm.fit(points)
349 
350  #print('>>> GMM score',gmm.score(points))
351 
352  #print gmm.covars_
353  #print gmm.weights_
354  #print gmm.means_
355  ### convert format to core::Gaussian
356  for ng in range(n_components):
357  invcovar=gmm.precs_[ng]
358  covar=np.linalg.inv(invcovar)
359  if covar.size==3:
360  covar=np.diag(covar).tolist()
361  else:
362  covar=covar.tolist()
363  center=list(gmm.means_[ng])
364  weight=mass_multiplier*gmm.weights_[ng]
365  if ng>=len(ps):
366  ps.append(IMP.Particle(mdl))
368  g=IMP.core.Gaussian.setup_particle(ps[ng],shape)
369  IMP.atom.Mass.setup_particle(ps[ng],weight)
370  IMP.core.XYZR.setup_particle(ps[ng],sqrt(max(g.get_variances())))
Add mass to a particle.
Definition: Mass.h:23
def draw_points
given some points (and optional transform), write them to chimera 'bild' format colors flag only appl...
Definition: gmm_tools.py:140
static Gaussian setup_particle(Model *m, ParticleIndex pi)
Definition: core/Gaussian.h:48
static FloatKey get_mass_key()
DenseGrid3D< double > get_rasterized(const Gaussian3Ds &gmm, const Floats &weights, double cell_width, const BoundingBox3D &bb)
Rasterize the Gaussians to a grid.
BoundingBoxD< 3 > get_bounding_box(const Geometry &)
Compute the bounding box of any geometric object.
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
double get_mass(ResidueType c)
Get the mass from the residue type.
def fit_dirichlet_gmm_to_points
fit a GMM to some points.
Definition: gmm_tools.py:317
IMP_Eigen::Matrix3d get_covariance(const Gaussian3D &g)
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Mass.h:30
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: XYZR.h:47
Class for sampling a density map from particles.
def fit_gmm_to_points
fit a GMM to some points.
Definition: gmm_tools.py:229
The standard decorator for manipulating molecular structures.
DensityMap * create_density_map(const IMP::algebra::GridD< 3, S, V, E > &arg)
Create a density map from an arbitrary IMP::algebra::GridD.
Definition: DensityMap.h:633
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
static Mass setup_particle(Model *m, ParticleIndex pi, Float mass)
Definition: Mass.h:44
Basic utilities for handling cryo-electron microscopy 3D density maps.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
def write_gmm_to_map
write density map from GMM.
Definition: gmm_tools.py:111
algebra::BoundingBoxD< 3 > get_bounding_box(const DensityMap *m)
Definition: DensityMap.h:464
Basic functionality that is expected to be used by a wide variety of IMP users.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Gaussian3D get_gaussian_from_covariance(const IMP_Eigen::Matrix3d &covariance, const Vector3D &center)
Return a Gaussian centered at the origin from a covariance matrix.
VectorD< 3 > Vector3D
Definition: VectorD.h:395
Class to handle individual particles of a Model object.
Definition: Particle.h:41
Sphere3D get_enclosing_sphere(const Vector3Ds &ss)
Return a sphere containing the listed vectors.
DenseGrid3D< double > get_rasterized_fast(const Gaussian3Ds &gmm, const Floats &weights, double cell_width, const BoundingBox3D &bb, double factor=2.5)
Rasterize the Gaussians to a grid.
A decorator for helping deal with a generalized hierarchy.
def write_sklearn_gmm_to_map
write density map directly from sklearn GMM (kinda slow)
Definition: gmm_tools.py:118
A decorator for a rigid body.
Definition: rigid_bodies.h:75
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: core/Gaussian.h:47
def write_gmm_to_text
write a list of gaussians to text.
Definition: gmm_tools.py:62
Functionality for loading, creating, manipulating and scoring atomic structures.
def decorate_gmm_from_text
read the output from write_gmm_to_text, decorate as Gaussian and Mass
Definition: gmm_tools.py:22
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27