IMP logo
IMP Reference Guide  2.6.0
The Integrative Modeling Platform
gmm_tools.py
1 """@namespace IMP.isd.gmm_tools
2  Tools for handling Gaussian Mixture Models.
3 """
4 
5 from __future__ import print_function
6 import IMP
7 import IMP.core
8 import IMP.algebra
9 import IMP.atom
10 import IMP.em
11 import numpy as np
12 import numpy.linalg
13 import sys,os
14 
15 try:
16  import sklearn.mixture
17  nosklearn=False
18 except:
19  nosklearn=True
20 from math import exp,sqrt,copysign
21 
23  ps,
24  mdl,
25  transform=None,
26  radius_scale=1.0,
27  mass_scale=1.0):
28  """ read the output from write_gmm_to_text, decorate as Gaussian and Mass"""
29  ncomp=0
30  with open(in_fn,'r') as inf:
31  for l in inf:
32  if l[0]!='#':
33  if ncomp>len(ps)-1:
34  ps.append(IMP.Particle(mdl))
35  p = ps[ncomp]
36  fields=l.split('|')
37  weight=float(fields[2])
38  center=list(map(float,fields[3].split()))
39  covar=np.array(list(map(float,
40  fields[4].split()))).reshape((3,3))
41  #print('on particle',ncomp)
42  shape=IMP.algebra.get_gaussian_from_covariance(covar.tolist(),
43  IMP.algebra.Vector3D(center))
45  g = IMP.core.Gaussian.setup_particle(ps[ncomp],shape)
46  else:
47  g = IMP.core.Gaussian(ps[ncomp])
48  g.set_gaussian(shape)
50  IMP.atom.Mass.setup_particle(p,weight*mass_scale)
51  else:
52  IMP.atom.Mass(p).set_mass(weight*mass_scale)
53  rmax = sqrt(max(g.get_variances()))*radius_scale
54  if not IMP.core.XYZR.get_is_setup(ps[ncomp]):
55  IMP.core.XYZR.setup_particle(ps[ncomp],rmax)
56  else:
57  IMP.core.XYZR(ps[ncomp]).set_radius(rmax)
58  if not transform is None:
59  IMP.core.transform(IMP.core.RigidBody(ps[ncomp]),transform)
60  ncomp+=1
61 
62 def write_gmm_to_text(ps,out_fn):
63  """write a list of gaussians to text. must be decorated as Gaussian and Mass"""
64  print('will write GMM text to',out_fn)
65  with open(out_fn,'w') as outf:
66  outf.write('#|num|weight|mean|covariance matrix|\n')
67  for ng,g in enumerate(ps):
68  shape=IMP.core.Gaussian(g).get_gaussian()
69  weight=IMP.atom.Mass(g).get_mass()
70  covar=[c for row in IMP.algebra.get_covariance(shape) for c in row]
71  mean=list(shape.get_center())
72  fm=[ng,weight]+mean+covar
73  try:
74  #python 2.7 format
75  outf.write('|{}|{}|{} {} {}|{} {} {} {} {} {} {} {} {}|\n'.format(*fm))
76  except ValueError:
77  #python 2.6 and below
78  outf.write('|{0}|{1}|{2} {3} {4}|{5} {6} {7} {8} {9} {10} {11} {12} {13}|\n'.format(*fm))
79 
80 def write_gmm_to_map(to_draw,out_fn,voxel_size,bounding_box=None,origin=None):
81  """write density map from GMM. input can be either particles or gaussians"""
82  if type(to_draw[0]) in (IMP.Particle,IMP.atom.Hierarchy,IMP.core.Hierarchy):
83  ps=to_draw
84  elif type(to_draw[0])==IMP.core.Gaussian:
85  ps=[g.get_particle() for g in to_draw]
86  else:
87  print('ps must be Particles or Gaussians')
88  return
89  print('will write GMM map to',out_fn)
90  if bounding_box is None:
91  if len(ps)>1:
92  s=IMP.algebra.get_enclosing_sphere([IMP.core.XYZ(p).get_coordinates() for p in ps])
93  s2=IMP.algebra.Sphere3D(s.get_center(),s.get_radius()*3)
94  else:
95  g=IMP.core.Gaussian(ps[0]).get_gaussian()
96  s2=IMP.algebra.Sphere3D(g.get_center(),max(g.get_variances())*3)
97  bounding_box=IMP.algebra.get_bounding_box(s2)
98  shapes=[]
99  weights=[]
100  for p in ps:
101  shapes.append(IMP.core.Gaussian(p).get_gaussian())
102  weights.append(IMP.atom.Mass(p).get_mass())
103  print('rasterizing')
104  grid=IMP.algebra.get_rasterized(shapes,weights,voxel_size,bounding_box)
105  print('creating map')
107  print('writing')
108  if origin is not None:
109  d1.set_origin(origin)
110  IMP.em.write_map(d1,out_fn,IMP.em.MRCReaderWriter())
111  del d1
112 
113 def write_sklearn_gmm_to_map(gmm,out_fn,apix=0,bbox=None,dmap_model=None):
114  """write density map directly from sklearn GMM (kinda slow) """
115  ### create density
116  if not dmap_model is None:
117  d1=IMP.em.create_density_map(dmap_model)
118  else:
119  d1=IMP.em.create_density_map(bbox,apix)
120 
121  ### fill it with values from the GMM
122  print('getting coords')
123  nvox=d1.get_number_of_voxels()
124  apos=[list(d1.get_location_by_voxel(nv)) for nv in range(nvox)]
125 
126  print('scoring')
127  scores=gmm.score(apos)
128 
129  print('assigning')
130  for nv, score in enumerate(scores):
131  d1.set_value(nv,exp(score))
132  print('will write GMM map to',out_fn)
133  IMP.em.write_map(d1,out_fn,IMP.em.MRCReaderWriter())
134 
135 def draw_points(pts,out_fn,trans=IMP.algebra.get_identity_transformation_3d(),
136  use_colors=False):
137  """ given some points (and optional transform), write them to chimera 'bild' format
138  colors flag only applies to ellipses, otherwise it'll be weird"""
139  with open(out_fn,'w') as outf:
140  #print 'will draw',len(pts),'points'
141  # write first point in red
142  pt=trans.get_transformed(IMP.algebra.Vector3D(pts[0]))
143  start=0
144  if use_colors:
145  outf.write('.color 1 0 0\n.dotat %.2f %.2f %.2f\n'
146  %(pt[0],pt[1],pt[2]))
147  start=1
148 
149  # write remaining points in green
150  if use_colors:
151  outf.write('.color 0 1 0\n')
152  colors=['0 1 0','0 0 1','0 1 1']
153  for nt,t in enumerate(pts[start:]):
154  if use_colors and nt%2==0:
155  outf.write('.color %s\n' % colors[nt/2])
156  pt=trans.get_transformed(IMP.algebra.Vector3D(t))
157  outf.write('.dotat %.2f %.2f %.2f\n' %(pt[0],pt[1],pt[2]))
158 
159 
160 
161 def create_gmm_for_bead(mdl,
162  particle,
163  n_components,
164  sampled_points=100000,
165  num_iter=100):
166  print('fitting bead with',n_components,'gaussians')
167  dmap=IMP.em.SampledDensityMap([particle],1.0,1.0,
168  IMP.atom.Mass.get_mass_key(),3,IMP.em.SPHERE)
169  IMP.em.write_map(dmap,'test_intermed.mrc')
170  pts=IMP.isd.sample_points_from_density(dmap,sampled_points)
171  draw_points(pts,'pts.bild')
172  density_particles=[]
173  fit_gmm_to_points(pts,n_components,mdl,
174  density_particles,
175  num_iter,'full',
176  mass_multiplier=IMP.atom.Mass(particle).get_mass())
177  return density_particles,dmap
178 
179 def sample_and_fit_to_particles(model,
180  fragment_particles,
181  num_components,
182  sampled_points=1000000,
183  simulation_res=0.5,
184  voxel_size=1.0,
185  num_iter=100,
186  covariance_type='full',
187  multiply_by_total_mass=True,
188  output_map=None,
189  output_txt=None):
190  density_particles=[]
191  if multiply_by_total_mass:
192  mass_multiplier=sum((IMP.atom.Mass(p).get_mass() for p in set(fragment_particles)))
193  print('add_component_density: will multiply by mass',mass_multiplier)
194 
195  # simulate density from ps, then calculate points to fit
196  print('add_component_density: sampling points')
197  dmap=IMP.em.SampledDensityMap(fragment_particles,simulation_res,voxel_size,
199  dmap.calcRMS()
200  #if not intermediate_map_fn is None:
201  # IMP.em.write_map(dmap,intermediate_map_fn)
202  pts=IMP.isd.sample_points_from_density(dmap,sampled_points)
203 
204  # fit GMM
205  print('add_component_density: fitting GMM to',len(pts),'points')
206  fit_gmm_to_points(points=pts,
207  n_components=num_components,
208  mdl=model,
209  ps=density_particles,
210  num_iter=num_iter,
211  covariance_type=covariance_type,
212  mass_multiplier=mass_multiplier)
213 
214  if not output_txt is None:
215  write_gmm_to_text(density_particles,output_txt)
216  if not output_map is None:
217  write_gmm_to_map(to_draw=density_particles,
218  out_fn=output_map,
219  voxel_size=voxel_size,
220  bounding_box=IMP.em.get_bounding_box(dmap))
221 
222  return density_particles
223 
224 def fit_gmm_to_points(points,
225  n_components,
226  mdl,
227  ps=[],
228  num_iter=100,
229  covariance_type='full',
230  min_covar=0.001,
231  init_centers=[],
232  force_radii=-1.0,
233  force_weight=-1.0,
234  mass_multiplier=1.0):
235  """fit a GMM to some points. Will return the score and the Akaike score.
236  Akaike information criterion for the current model fit. It is a measure
237  of the relative quality of the GMM that takes into account the
238  parsimony and the goodness of the fit.
239  if no particles are provided, they will be created
240 
241  points: list of coordinates (python)
242  n_components: number of gaussians to create
243  mdl: IMP Model
244  ps: list of particles to be decorated. if empty, will add
245  num_iter: number of EM iterations
246  covariance_type: covar type for the gaussians. options: 'full', 'diagonal', 'spherical'
247  min_covar: assign a minimum value to covariance term. That is used to have more spherical
248  shaped gaussians
249  init_centers: initial coordinates of the GMM
250  force_radii: fix the radii (spheres only)
251  force_weight: fix the weights
252  mass_multiplier: multiply the weights of all the gaussians by this value
253  dirichlet: use the DGMM fitting (can reduce number of components, takes longer)
254  """
255 
256 
257  import sklearn.mixture
258 
259  params='m'
260  init_params='m'
261  if force_radii==-1.0:
262  params+='c'
263  init_params+='c'
264  else:
265  covariance_type='spherical'
266  print('forcing spherical with radii',force_radii)
267 
268  if force_weight==-1.0:
269  params+='w'
270  init_params+='w'
271  else:
272  print('forcing weights to be',force_weight)
273 
274  print('creating GMM with params',params,'init params',init_params,'n_components',n_components,'n_iter',num_iter,'covar type',covariance_type)
275  gmm=sklearn.mixture.GMM(n_components=n_components,
276  n_iter=num_iter,
277  covariance_type=covariance_type,
278  min_covar=min_covar,
279  params=params,
280  init_params=init_params)
281 
282  if force_weight!=-1.0:
283  gmm.weights_=np.array([force_weight]*n_components)
284  if force_radii!=-1.0:
285  gmm.covars_=np.array([[force_radii]*3 for i in range(n_components)])
286  if init_centers!=[]:
287  gmm.means_=init_centers
288  print('fitting')
289  model=gmm.fit(points)
290  score=gmm.score(points)
291  akaikescore=model.aic(points)
292  #print('>>> GMM score',gmm.score(points))
293 
294  ### convert format to core::Gaussian
295  for ng in range(n_components):
296  covar=gmm.covars_[ng]
297  if covar.size==3:
298  covar=np.diag(covar).tolist()
299  else:
300  covar=covar.tolist()
301  center=list(gmm.means_[ng])
302  weight=mass_multiplier*gmm.weights_[ng]
303  if ng>=len(ps):
304  ps.append(IMP.Particle(mdl))
306  g=IMP.core.Gaussian.setup_particle(ps[ng],shape)
307  IMP.atom.Mass.setup_particle(ps[ng],weight)
308  IMP.core.XYZR.setup_particle(ps[ng],sqrt(max(g.get_variances())))
309 
310  return (score,akaikescore)
311 
313  n_components,
314  mdl,
315  ps=[],
316  num_iter=100,
317  covariance_type='full',
318  mass_multiplier=1.0):
319  """fit a GMM to some points. Will return core::Gaussians.
320  if no particles are provided, they will be created
321 
322  points: list of coordinates (python)
323  n_components: number of gaussians to create
324  mdl: IMP Model
325  ps: list of particles to be decorated. if empty, will add
326  num_iter: number of EM iterations
327  covariance_type: covar type for the gaussians. options: 'full', 'diagonal', 'spherical'
328  init_centers: initial coordinates of the GMM
329  force_radii: fix the radii (spheres only)
330  force_weight: fix the weights
331  mass_multiplier: multiply the weights of all the gaussians by this value
332  """
333 
334 
335  import sklearn.mixture
336 
337  ### create and fit GMM
338  print('using dirichlet prior')
339  gmm=sklearn.mixture.DPGMM(n_components=n_components,
340  n_iter=num_iter,
341  covariance_type=covariance_type)
342 
343  gmm.fit(points)
344 
345  #print('>>> GMM score',gmm.score(points))
346 
347  #print gmm.covars_
348  #print gmm.weights_
349  #print gmm.means_
350  ### convert format to core::Gaussian
351  for ng in range(n_components):
352  invcovar=gmm.precs_[ng]
353  covar=np.linalg.inv(invcovar)
354  if covar.size==3:
355  covar=np.diag(covar).tolist()
356  else:
357  covar=covar.tolist()
358  center=list(gmm.means_[ng])
359  weight=mass_multiplier*gmm.weights_[ng]
360  if ng>=len(ps):
361  ps.append(IMP.Particle(mdl))
363  g=IMP.core.Gaussian.setup_particle(ps[ng],shape)
364  IMP.atom.Mass.setup_particle(ps[ng],weight)
365  IMP.core.XYZR.setup_particle(ps[ng],sqrt(max(g.get_variances())))
Add mass to a particle.
Definition: Mass.h:23
def draw_points
given some points (and optional transform), write them to chimera 'bild' format colors flag only appl...
Definition: gmm_tools.py:135
static Gaussian setup_particle(Model *m, ParticleIndex pi)
Definition: Gaussian.h:48
static FloatKey get_mass_key()
BoundingBoxD< 3 > get_bounding_box(const Geometry &)
Compute the bounding box of any geometric object.
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
double get_mass(ResidueType c)
Get the mass from the residue type.
def fit_dirichlet_gmm_to_points
fit a GMM to some points.
Definition: gmm_tools.py:312
DenseGrid3D< float > get_rasterized(const Gaussian3Ds &gmm, const Floats &weights, double cell_width, const BoundingBox3D &bb)
Rasterize the Gaussians to a grid.
IMP_Eigen::Matrix3d get_covariance(const Gaussian3D &g)
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Mass.h:30
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: XYZR.h:47
Class for sampling a density map from particles.
def fit_gmm_to_points
fit a GMM to some points.
Definition: gmm_tools.py:224
The standard decorator for manipulating molecular structures.
DensityMap * create_density_map(const IMP::algebra::GridD< 3, S, V, E > &arg)
Create a density map from an arbitrary IMP::algebra::GridD.
Definition: DensityMap.h:626
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
static Mass setup_particle(Model *m, ParticleIndex pi, Float mass)
Definition: Mass.h:44
Basic utilities for handling cryo-electron microscopy 3D density maps.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
def write_gmm_to_map
write density map from GMM.
Definition: gmm_tools.py:80
algebra::BoundingBoxD< 3 > get_bounding_box(const DensityMap *m)
Definition: DensityMap.h:457
Basic functionality that is expected to be used by a wide variety of IMP users.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Gaussian3D get_gaussian_from_covariance(const IMP_Eigen::Matrix3d &covariance, const Vector3D &center)
Return a Gaussian centered at the origin from a covariance matrix.
VectorD< 3 > Vector3D
Definition: VectorD.h:395
Class to handle individual model particles.
Definition: Particle.h:37
Sphere3D get_enclosing_sphere(const Vector3Ds &ss)
Return a sphere containing the listed vectors.
A decorator for helping deal with a generalized hierarchy.
def write_sklearn_gmm_to_map
write density map directly from sklearn GMM (kinda slow)
Definition: gmm_tools.py:113
A decorator for a rigid body.
Definition: rigid_bodies.h:75
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: Gaussian.h:47
def write_gmm_to_text
write a list of gaussians to text.
Definition: gmm_tools.py:62
Functionality for loading, creating, manipulating and scoring atomic structures.
def decorate_gmm_from_text
read the output from write_gmm_to_text, decorate as Gaussian and Mass
Definition: gmm_tools.py:22
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27