IMP logo
IMP Reference Guide  2.5.0
The Integrative Modeling Platform
gmm_tools.py
1 """@namespace IMP.isd.gmm_tools
2  Tools for handling Gaussian Mixture Models.
3 """
4 
5 from __future__ import print_function
6 import IMP
7 import IMP.core
8 import IMP.algebra
9 import IMP.atom
10 import IMP.em
11 import numpy as np
12 import numpy.linalg
13 import sys,os
14 
15 try:
16  import sklearn.mixture
17  nosklearn=False
18 except:
19  nosklearn=True
20 from math import exp,sqrt,copysign
21 
22 def decorate_gmm_from_text(in_fn,ps,mdl,transform=None,radius_scale=1.0,mass_scale=1.0):
23  ''' read the output from write_gmm_to_text, decorate as Gaussian and Mass'''
24  inf=open(in_fn,'r')
25  ncomp=0
26  for l in inf:
27  if l[0]!='#':
28  fields=l.split('|')
29  weight=float(fields[2])
30  center=list(map(float,fields[3].split()))
31  covar=np.array(list(map(float,fields[4].split()))).reshape((3,3))
32  if ncomp>=len(ps):
33  ps.append(IMP.Particle(mdl))
34  shape=IMP.algebra.get_gaussian_from_covariance(covar.tolist(),
35  IMP.algebra.Vector3D(center))
36  g=IMP.core.Gaussian.setup_particle(ps[ncomp],shape)
37  m=IMP.atom.Mass.setup_particle(ps[ncomp],weight*mass_scale)
38  rmax=sqrt(max(g.get_variances()))*radius_scale
39  IMP.core.XYZR.setup_particle(ps[ncomp],rmax)
40  if not transform is None:
41  IMP.core.transform(IMP.core.RigidBody(ps[ncomp]),transform)
42  ncomp+=1
43 def write_gmm_to_text(ps,out_fn):
44  '''write a list of gaussians to text. must be decorated as Gaussian and Mass'''
45  print('will write GMM text to',out_fn)
46  outf=open(out_fn,'w')
47  outf.write('#|num|weight|mean|covariance matrix|\n')
48  for ng,g in enumerate(ps):
49  shape=IMP.core.Gaussian(g).get_gaussian()
50  weight=IMP.atom.Mass(g).get_mass()
51  covar=[c for row in IMP.algebra.get_covariance(shape) for c in row]
52  mean=list(shape.get_center())
53  fm=[ng,weight]+mean+covar
54  try:
55  #python 2.7 format
56  outf.write('|{}|{}|{} {} {}|{} {} {} {} {} {} {} {} {}|\n'.format(*fm))
57  except ValueError:
58  #python 2.6 and below
59  outf.write('|{0}|{1}|{2} {3} {4}|{5} {6} {7} {8} {9} {10} {11} {12} {13}|\n'.format(*fm))
60  outf.close()
61 
62 def write_gmm_to_map(to_draw,out_fn,voxel_size,bounding_box=None):
63  '''write density map from GMM. input can be either particles or gaussians'''
64  if type(to_draw[0]) in (IMP.Particle,IMP.atom.Hierarchy,IMP.core.Hierarchy):
65  ps=to_draw
66  elif type(to_draw[0])==IMP.core.Gaussian:
67  ps=[g.get_particle() for g in to_draw]
68  else:
69  print('ps must be Particles or Gaussians')
70  return
71  print('will write GMM map to',out_fn)
72  if bounding_box is None:
73  if len(ps)>1:
74  s=IMP.algebra.get_enclosing_sphere([IMP.core.XYZ(p).get_coordinates() for p in ps])
75  s2=IMP.algebra.Sphere3D(s.get_center(),s.get_radius()*3)
76  else:
77  g=IMP.core.Gaussian(ps[0]).get_gaussian()
78  s2=IMP.algebra.Sphere3D(g.get_center(),max(g.get_variances())*3)
79  bounding_box=IMP.algebra.get_bounding_box(s2)
80  shapes=[]
81  weights=[]
82  for p in ps:
83  shapes.append(IMP.core.Gaussian(p).get_gaussian())
84  weights.append(IMP.atom.Mass(p).get_mass())
85  print('rasterizing')
86  grid=IMP.algebra.get_rasterized(shapes,weights,voxel_size,bounding_box)
87  print('creating map')
89  print('writing')
91  del d1
92 
93 def write_sklearn_gmm_to_map(gmm,out_fn,apix=0,bbox=None,dmap_model=None):
94  '''write density map directly from sklearn GMM (kinda slow) '''
95  ### create density
96  if not dmap_model is None:
97  d1=IMP.em.create_density_map(dmap_model)
98  else:
99  d1=IMP.em.create_density_map(bbox,apix)
100 
101  ### fill it with values from the GMM
102  print('getting coords')
103  nvox=d1.get_number_of_voxels()
104  apos=[list(d1.get_location_by_voxel(nv)) for nv in range(nvox)]
105 
106  print('scoring')
107  scores=gmm.score(apos)
108 
109  print('assigning')
110  for nv, score in enumerate(scores):
111  d1.set_value(nv,exp(score))
112  print('will write GMM map to',out_fn)
114 
115 def draw_points(pts,out_fn,trans=IMP.algebra.get_identity_transformation_3d(),
116  use_colors=False):
117  ''' given some points (and optional transform), write them to chimera 'bild' format
118  colors flag only applies to ellipses, otherwise it'll be weird'''
119  outf=open(out_fn,'w')
120  #print 'will draw',len(pts),'points'
121  # write first point in red
122  pt=trans.get_transformed(IMP.algebra.Vector3D(pts[0]))
123  start=0
124  if use_colors:
125  outf.write('.color 1 0 0\n.dotat %.2f %.2f %.2f\n' %(pt[0],pt[1],pt[2]))
126  start=1
127 
128  # write remaining points in green
129  if use_colors:
130  outf.write('.color 0 1 0\n')
131  colors=['0 1 0','0 0 1','0 1 1']
132  for nt,t in enumerate(pts[start:]):
133  if use_colors and nt%2==0:
134  outf.write('.color %s\n' % colors[nt/2])
135  pt=trans.get_transformed(IMP.algebra.Vector3D(t))
136  outf.write('.dotat %.2f %.2f %.2f\n' %(pt[0],pt[1],pt[2]))
137  outf.close()
138 
139 
140 
141 def create_gmm_for_bead(mdl,
142  particle,
143  n_components,
144  sampled_points=100000,
145  num_iter=100):
146  print('fitting bead with',n_components,'gaussians')
147  dmap=IMP.em.SampledDensityMap([particle],1.0,1.0,
148  IMP.atom.Mass.get_mass_key(),3,IMP.em.SPHERE)
149  IMP.em.write_map(dmap,'test_intermed.mrc')
150  pts=IMP.isd.sample_points_from_density(dmap,sampled_points)
151  draw_points(pts,'pts.bild')
152  density_particles=[]
153  fit_gmm_to_points(pts,n_components,mdl,
154  density_particles,
155  num_iter,'full',
156  mass_multiplier=IMP.atom.Mass(particle).get_mass())
157  return density_particles,dmap
158 
159 def sample_and_fit_to_particles(model,
160  fragment_particles,
161  num_components,
162  sampled_points=1000000,
163  simulation_res=0.5,
164  voxel_size=1.0,
165  num_iter=100,
166  covariance_type='full',
167  multiply_by_total_mass=True,
168  output_map=None,
169  output_txt=None):
170  density_particles=[]
171  if multiply_by_total_mass:
172  mass_multiplier=sum((IMP.atom.Mass(p).get_mass() for p in set(fragment_particles)))
173  print('add_component_density: will multiply by mass',mass_multiplier)
174 
175  # simulate density from ps, then calculate points to fit
176  print('add_component_density: sampling points')
177  dmap=IMP.em.SampledDensityMap(fragment_particles,simulation_res,voxel_size,
179  dmap.calcRMS()
180  #if not intermediate_map_fn is None:
181  # IMP.em.write_map(dmap,intermediate_map_fn)
182  pts=IMP.isd.sample_points_from_density(dmap,sampled_points)
183 
184  # fit GMM
185  print('add_component_density: fitting GMM to',len(pts),'points')
186  fit_gmm_to_points(points=pts,
187  n_components=num_components,
188  mdl=model,
189  ps=density_particles,
190  num_iter=num_iter,
191  covariance_type=covariance_type,
192  mass_multiplier=mass_multiplier)
193 
194  if not output_txt is None:
195  write_gmm_to_text(density_particles,output_txt)
196  if not output_map is None:
197  write_gmm_to_map(to_draw=density_particles,
198  out_fn=output_map,
199  voxel_size=voxel_size,
200  bounding_box=IMP.em.get_bounding_box(dmap))
201 
202  return density_particles
203 
204 def fit_gmm_to_points(points,
205  n_components,
206  mdl,
207  ps=[],
208  num_iter=100,
209  covariance_type='full',
210  init_centers=[],
211  force_radii=-1.0,
212  force_weight=-1.0,
213  mass_multiplier=1.0):
214  '''fit a GMM to some points. Will return core::Gaussians.
215  if no particles are provided, they will be created
216 
217  points: list of coordinates (python)
218  n_components: number of gaussians to create
219  mdl: IMP Model
220  ps: list of particles to be decorated. if empty, will add
221  num_iter: number of EM iterations
222  covariance_type: covar type for the gaussians. options: 'full', 'diagonal', 'sphereical'
223  init_centers: initial coordinates of the GMM
224  force_radii: fix the radii (spheres only)
225  force_weight: fix the weights
226  mass_multiplier: multiply the weights of all the gaussians by this value
227  dirichlet: use the DGMM fitting (can reduce number of components, takes longer)
228  '''
229 
230 
231  import sklearn.mixture
232 
233  params='m'
234  init_params='m'
235  if force_radii==-1.0:
236  params+='c'
237  init_params+='c'
238  else:
239  covariance_type='spherical'
240  print('forcing spherical with radii',force_radii)
241 
242  if force_weight==-1.0:
243  params+='w'
244  init_params+='w'
245  else:
246  print('forcing weights to be',force_weight)
247 
248  print('creating GMM with params',params,'init params',init_params,'n_components',n_components,'n_iter',num_iter,'covar type',covariance_type)
249  gmm=sklearn.mixture.GMM(n_components=n_components,
250  n_iter=num_iter,
251  covariance_type=covariance_type,
252  params=params,
253  init_params=init_params)
254 
255  if force_weight!=-1.0:
256  gmm.weights_=np.array([force_weight]*n_components)
257  if force_radii!=-1.0:
258  gmm.covars_=np.array([[force_radii]*3 for i in range(n_components)])
259  if init_centers!=[]:
260  gmm.means_=init_centers
261  print('fitting')
262  gmm.fit(points)
263 
264  print('>>> GMM score',gmm.score(points))
265 
266  ### convert format to core::Gaussian
267  for ng in range(n_components):
268  covar=gmm.covars_[ng]
269  if covar.size==3:
270  covar=np.diag(covar).tolist()
271  else:
272  covar=covar.tolist()
273  center=list(gmm.means_[ng])
274  weight=mass_multiplier*gmm.weights_[ng]
275  if ng>=len(ps):
276  ps.append(IMP.Particle(mdl))
278  g=IMP.core.Gaussian.setup_particle(ps[ng],shape)
279  IMP.atom.Mass.setup_particle(ps[ng],weight)
280  IMP.core.XYZR.setup_particle(ps[ng],sqrt(max(g.get_variances())))
281 
283  n_components,
284  mdl,
285  ps=[],
286  num_iter=100,
287  covariance_type='full',
288  mass_multiplier=1.0):
289  '''fit a GMM to some points. Will return core::Gaussians.
290  if no particles are provided, they will be created
291 
292  points: list of coordinates (python)
293  n_components: number of gaussians to create
294  mdl: IMP Model
295  ps: list of particles to be decorated. if empty, will add
296  num_iter: number of EM iterations
297  covariance_type: covar type for the gaussians. options: 'full', 'diagonal', 'sphereical'
298  init_centers: initial coordinates of the GMM
299  force_radii: fix the radii (spheres only)
300  force_weight: fix the weights
301  mass_multiplier: multiply the weights of all the gaussians by this value
302 '''
303 
304 
305  import sklearn.mixture
306 
307  ### create and fit GMM
308  print('using dirichlet prior')
309  gmm=sklearn.mixture.DPGMM(n_components=n_components,
310  n_iter=num_iter,
311  covariance_type=covariance_type)
312 
313  gmm.fit(points)
314 
315  print('>>> GMM score',gmm.score(points))
316 
317  #print gmm.covars_
318  #print gmm.weights_
319  #print gmm.means_
320  ### convert format to core::Gaussian
321  for ng in range(n_components):
322  invcovar=gmm.precs_[ng]
323  covar=np.linalg.inv(invcovar)
324  if covar.size==3:
325  covar=np.diag(covar).tolist()
326  else:
327  covar=covar.tolist()
328  center=list(gmm.means_[ng])
329  weight=mass_multiplier*gmm.weights_[ng]
330  if ng>=len(ps):
331  ps.append(IMP.Particle(mdl))
333  g=IMP.core.Gaussian.setup_particle(ps[ng],shape)
334  IMP.atom.Mass.setup_particle(ps[ng],weight)
335  IMP.core.XYZR.setup_particle(ps[ng],sqrt(max(g.get_variances())))
Add mass to a particle.
Definition: Mass.h:23
def draw_points
given some points (and optional transform), write them to chimera 'bild' format colors flag only appl...
Definition: gmm_tools.py:115
static Gaussian setup_particle(Model *m, ParticleIndex pi)
Definition: Gaussian.h:48
static FloatKey get_mass_key()
DensityMap * create_density_map(const algebra::GridD< 3, algebra::DenseGridStorageD< 3, float >, float > &grid)
BoundingBoxD< 3 > get_bounding_box(const Geometry &)
Compute the bounding box of any geometric object.
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
double get_mass(ResidueType c)
Get the mass from the residue type.
def fit_dirichlet_gmm_to_points
fit a GMM to some points.
Definition: gmm_tools.py:282
DenseGrid3D< float > get_rasterized(const Gaussian3Ds &gmm, const Floats &weights, double cell_width, const BoundingBox3D &bb)
Rasterize the Gaussians to a grid.
IMP_Eigen::Matrix3d get_covariance(const Gaussian3D &g)
void write_map(DensityMap *m, std::string filename)
Write a density map to a file.
Class for sampling a density map from particles.
def fit_gmm_to_points
fit a GMM to some points.
Definition: gmm_tools.py:204
The standard decorator for manipulating molecular structures.
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
static Mass setup_particle(Model *m, ParticleIndex pi, Float mass)
Definition: Mass.h:44
Basic utilities for handling cryo-electron microscopy 3D density maps.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
def write_gmm_to_map
write density map from GMM.
Definition: gmm_tools.py:62
algebra::BoundingBoxD< 3 > get_bounding_box(const DensityMap *m)
Definition: DensityMap.h:464
Basic functionality that is expected to be used by a wide variety of IMP users.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Gaussian3D get_gaussian_from_covariance(const IMP_Eigen::Matrix3d &covariance, const Vector3D &center)
Return a Gaussian centered at the origin from a covariance matrix.
VectorD< 3 > Vector3D
Definition: VectorD.h:395
Class to handle individual model particles.
Definition: Particle.h:37
Sphere3D get_enclosing_sphere(const Vector3Ds &ss)
Return a sphere containing the listed vectors.
A decorator for helping deal with a hierarchy.
def write_sklearn_gmm_to_map
write density map directly from sklearn GMM (kinda slow)
Definition: gmm_tools.py:93
A decorator for a rigid body.
Definition: rigid_bodies.h:75
def write_gmm_to_text
write a list of gaussians to text.
Definition: gmm_tools.py:43
Functionality for loading, creating, manipulating and scoring atomic structures.
def decorate_gmm_from_text
read the output from write_gmm_to_text, decorate as Gaussian and Mass
Definition: gmm_tools.py:22