IMP  2.3.1
The Integrative Modeling Platform
gmm_tools.py
1 """@namespace IMP.isd.gmm_tools
2  Tools for handling Gaussian Mixture Models.
3 """
4 
5 import IMP
6 import IMP.core
7 import IMP.algebra
8 import IMP.atom
9 import IMP.em
10 import numpy as np
11 import numpy.linalg
12 import sys,os
13 
14 try:
15  import sklearn.mixture
16  nosklearn=False
17 except:
18  nosklearn=True
19 from math import exp,sqrt,copysign
20 
21 def decorate_gmm_from_text(in_fn,ps,mdl,transform=None,radius_scale=1.0,mass_scale=1.0):
22  ''' read the output from write_gmm_to_text, decorate as Gaussian and Mass'''
23  inf=open(in_fn,'r')
24  ncomp=0
25  for l in inf:
26  if l[0]!='#':
27  fields=l.split('|')
28  weight=float(fields[2])
29  center=map(float,fields[3].split())
30  covar=np.array(map(float,fields[4].split())).reshape((3,3))
31  if ncomp>=len(ps):
32  ps.append(IMP.Particle(mdl))
33  shape=IMP.algebra.get_gaussian_from_covariance(covar.tolist(),
34  IMP.algebra.Vector3D(center))
35  g=IMP.core.Gaussian.setup_particle(ps[ncomp],shape)
36  m=IMP.atom.Mass.setup_particle(ps[ncomp],weight*mass_scale)
37  rmax=sqrt(max(g.get_variances()))*radius_scale
38  IMP.core.XYZR.setup_particle(ps[ncomp],rmax)
39  if not transform is None:
40  IMP.core.transform(IMP.core.RigidBody(ps[ncomp]),transform)
41  ncomp+=1
42 
43 def write_gmm_to_map(to_draw,out_fn,voxel_size,bounding_box=None):
44  '''write density map from GMM. input can be either particles or gaussians'''
45  if type(to_draw[0]) in (IMP.Particle,IMP.atom.Hierarchy,IMP.core.Hierarchy):
46  ps=to_draw
47  elif type(to_draw[0])==IMP.core.Gaussian:
48  ps=[g.get_particle() for g in to_draw]
49  else:
50  print 'ps must be Particles or Gaussians'
51  return
52  print 'will write GMM map to',out_fn
53  if bounding_box is None:
54  if len(ps)>1:
55  s=IMP.algebra.get_enclosing_sphere([IMP.core.XYZ(p).get_coordinates() for p in ps])
56  s2=IMP.algebra.Sphere3D(s.get_center(),s.get_radius()*3)
57  else:
58  g=IMP.core.Gaussian(ps[0]).get_gaussian()
59  s2=IMP.algebra.Sphere3D(g.get_center(),max(g.get_variances())*3)
60  bounding_box=IMP.algebra.get_bounding_box(s2)
61  shapes=[]
62  weights=[]
63  for p in ps:
64  shapes.append(IMP.core.Gaussian(p).get_gaussian())
65  weights.append(IMP.atom.Mass(p).get_mass())
66  print 'rasterizing'
67  grid=IMP.algebra.get_rasterized(shapes,weights,voxel_size,bounding_box)
68  print 'creating map'
70  print 'writing'
72  del d1
73 
74 def write_sklearn_gmm_to_map(gmm,out_fn,apix=0,bbox=None,dmap_model=None):
75  '''write density map directly from sklearn GMM (kinda slow) '''
76  ### create density
77  if not dmap_model is None:
78  d1=IMP.em.create_density_map(dmap_model)
79  else:
80  d1=IMP.em.create_density_map(bbox,apix)
81 
82  ### fill it with values from the GMM
83  print 'getting coords'
84  nvox=d1.get_number_of_voxels()
85  apos=[list(d1.get_location_by_voxel(nv)) for nv in xrange(nvox)]
86 
87  print 'scoring'
88  scores=gmm.score(apos)
89 
90  print 'assigning'
91  map(lambda nv: d1.set_value(nv,exp(scores[nv])),xrange(nvox))
92  print 'will write GMM map to',out_fn
94 
95 def draw_points(pts,out_fn,trans=IMP.algebra.get_identity_transformation_3d(),
96  use_colors=False):
97  ''' given some points (and optional transform), write them to chimera 'bild' format
98  colors flag only applies to ellipses, otherwise it'll be weird'''
99  outf=open(out_fn,'w')
100  #print 'will draw',len(pts),'points'
101  # write first point in red
102  pt=trans.get_transformed(IMP.algebra.Vector3D(pts[0]))
103  start=0
104  if use_colors:
105  outf.write('.color 1 0 0\n.dotat %.2f %.2f %.2f\n' %(pt[0],pt[1],pt[2]))
106  start=1
107 
108  # write remaining points in green
109  if use_colors:
110  outf.write('.color 0 1 0\n')
111  colors=['0 1 0','0 0 1','0 1 1']
112  for nt,t in enumerate(pts[start:]):
113  if use_colors and nt%2==0:
114  outf.write('.color %s\n' % colors[nt/2])
115  pt=trans.get_transformed(IMP.algebra.Vector3D(t))
116  outf.write('.dotat %.2f %.2f %.2f\n' %(pt[0],pt[1],pt[2]))
117  outf.close()
118 
119 
120 
121 def create_gmm_for_bead(mdl,
122  particle,
123  n_components,
124  sampled_points=100000,
125  num_iter=100):
126  print 'fitting bead with',n_components,'gaussians'
127  dmap=IMP.em.SampledDensityMap([particle],1.0,1.0,
128  IMP.atom.Mass.get_mass_key(),3,IMP.em.SPHERE)
129  IMP.em.write_map(dmap,'test_intermed.mrc')
130  pts=IMP.isd.sample_points_from_density(dmap,sampled_points)
131  draw_points(pts,'pts.bild')
132  density_particles=[]
133  fit_gmm_to_points(pts,n_components,mdl,
134  density_particles,
135  num_iter,'full',
136  mass_multiplier=IMP.atom.Mass(particle).get_mass())
137  return density_particles,dmap
138 
139 def sample_and_fit_to_particles(model,
140  fragment_particles,
141  num_components,
142  sampled_points=1000000,
143  simulation_res=0.5,
144  voxel_size=1.0,
145  num_iter=100,
146  covariance_type='full',
147  multiply_by_total_mass=True,
148  output_map=None,
149  output_txt=None):
150  density_particles=[]
151  if multiply_by_total_mass:
152  mass_multiplier=sum((IMP.atom.Mass(p).get_mass() for p in set(fragment_particles)))
153  print 'add_component_density: will multiply by mass',mass_multiplier
154 
155  # simulate density from ps, then calculate points to fit
156  print 'add_component_density: sampling points'
157  dmap=IMP.em.SampledDensityMap(fragment_particles,simulation_res,voxel_size,
159  dmap.calcRMS()
160  #if not intermediate_map_fn is None:
161  # IMP.em.write_map(dmap,intermediate_map_fn)
162  pts=IMP.isd.sample_points_from_density(dmap,sampled_points)
163 
164  # fit GMM
165  print 'add_component_density: fitting GMM to',len(pts),'points'
166  fit_gmm_to_points(points=pts,
167  n_components=num_components,
168  mdl=model,
169  ps=density_particles,
170  num_iter=num_iter,
171  covariance_type=covariance_type,
172  mass_multiplier=mass_multiplier)
173 
174  if not output_txt is None:
175  write_gmm_to_text(density_particles,output_txt)
176  if not output_map is None:
177  write_gmm_to_map(to_draw=density_particles,
178  out_fn=output_map,
179  voxel_size=voxel_size,
180  bounding_box=IMP.em.get_bounding_box(dmap))
181 
182  return density_particles
183 
184 def fit_gmm_to_points(points,
185  n_components,
186  mdl,
187  ps=[],
188  num_iter=100,
189  covariance_type='full',
190  init_centers=[],
191  force_radii=-1.0,
192  force_weight=-1.0,
193  mass_multiplier=1.0):
194  '''fit a GMM to some points. Will return core::Gaussians.
195  if no particles are provided, they will be created
196 
197  points: list of coordinates (python)
198  n_components: number of gaussians to create
199  mdl: IMP Model
200  ps: list of particles to be decorated. if empty, will add
201  num_iter: number of EM iterations
202  covariance_type: covar type for the gaussians. options: 'full', 'diagonal', 'sphereical'
203  init_centers: initial coordinates of the GMM
204  force_radii: fix the radii (spheres only)
205  force_weight: fix the weights
206  mass_multiplier: multiply the weights of all the gaussians by this value
207  dirichlet: use the DGMM fitting (can reduce number of components, takes longer)
208  '''
209 
210 
211  import sklearn.mixture
212 
213  params='m'
214  init_params='m'
215  if force_radii==-1.0:
216  params+='c'
217  init_params+='c'
218  else:
219  covariance_type='spherical'
220  print 'forcing spherical with radii',force_radii
221 
222  if force_weight==-1.0:
223  params+='w'
224  init_params+='w'
225  else:
226  print 'forcing weights to be',force_weight
227 
228  print 'creating GMM with params',params,'init params',init_params,'n_components',n_components,'n_iter',num_iter,'covar type',covariance_type
229  gmm=sklearn.mixture.GMM(n_components=n_components,
230  n_iter=num_iter,
231  covariance_type=covariance_type,
232  params=params,
233  init_params=init_params)
234 
235  if force_weight!=-1.0:
236  gmm.weights_=np.array([force_weight]*n_components)
237  if force_radii!=-1.0:
238  gmm.covars_=np.array([[force_radii]*3 for i in xrange(n_components)])
239  if init_centers!=[]:
240  gmm.means_=init_centers
241  print 'fitting'
242  gmm.fit(points)
243 
244  print '>>> GMM score',gmm.score(points)
245 
246  ### convert format to core::Gaussian
247  for ng in xrange(n_components):
248  covar=gmm.covars_[ng]
249  if covar.size==3:
250  covar=np.diag(covar).tolist()
251  else:
252  covar=covar.tolist()
253  center=list(gmm.means_[ng])
254  weight=mass_multiplier*gmm.weights_[ng]
255  if ng>=len(ps):
256  ps.append(IMP.Particle(mdl))
258  g=IMP.core.Gaussian.setup_particle(ps[ng],shape)
259  IMP.atom.Mass.setup_particle(ps[ng],weight)
260  IMP.core.XYZR.setup_particle(ps[ng],sqrt(max(g.get_variances())))
261 
263  n_components,
264  mdl,
265  ps=[],
266  num_iter=100,
267  covariance_type='full',
268  mass_multiplier=1.0):
269  '''fit a GMM to some points. Will return core::Gaussians.
270  if no particles are provided, they will be created
271 
272  points: list of coordinates (python)
273  n_components: number of gaussians to create
274  mdl: IMP Model
275  ps: list of particles to be decorated. if empty, will add
276  num_iter: number of EM iterations
277  covariance_type: covar type for the gaussians. options: 'full', 'diagonal', 'sphereical'
278  init_centers: initial coordinates of the GMM
279  force_radii: fix the radii (spheres only)
280  force_weight: fix the weights
281  mass_multiplier: multiply the weights of all the gaussians by this value
282 '''
283 
284 
285  import sklearn.mixture
286 
287  ### create and fit GMM
288  print 'using dirichlet prior'
289  gmm=sklearn.mixture.DPGMM(n_components=n_components,
290  n_iter=num_iter,
291  covariance_type=covariance_type)
292 
293  gmm.fit(points)
294 
295  print '>>> GMM score',gmm.score(points)
296 
297  #print gmm.covars_
298  #print gmm.weights_
299  #print gmm.means_
300  ### convert format to core::Gaussian
301  for ng in xrange(n_components):
302  invcovar=gmm.precs_[ng]
303  covar=np.linalg.inv(invcovar)
304  if covar.size==3:
305  covar=np.diag(covar).tolist()
306  else:
307  covar=covar.tolist()
308  center=list(gmm.means_[ng])
309  weight=mass_multiplier*gmm.weights_[ng]
310  if ng>=len(ps):
311  ps.append(IMP.Particle(mdl))
313  g=IMP.core.Gaussian.setup_particle(ps[ng],shape)
314  IMP.atom.Mass.setup_particle(ps[ng],weight)
315  IMP.core.XYZR.setup_particle(ps[ng],sqrt(max(g.get_variances())))
Add mass to a particle.
Definition: Mass.h:23
def draw_points
given some points (and optional transform), write them to chimera 'bild' format colors flag only appl...
Definition: gmm_tools.py:95
static FloatKey get_mass_key()
DensityMap * create_density_map(const algebra::GridD< 3, algebra::DenseGridStorageD< 3, float >, float > &grid)
BoundingBoxD< 3 > get_bounding_box(const Geometry &)
Compute the bounding box of any geometric object.
double get_mass(ResidueType c)
Get the mass from the residue type.
def fit_dirichlet_gmm_to_points
fit a GMM to some points.
Definition: gmm_tools.py:262
DenseGrid3D< float > get_rasterized(const Gaussian3Ds &gmm, const Floats &weights, double cell_width, const BoundingBox3D &bb)
Rasterize the Gaussians to a grid.
void write_map(DensityMap *m, std::string filename)
Write a density map to a file.
Class for sampling a density map from particles.
def fit_gmm_to_points
fit a GMM to some points.
Definition: gmm_tools.py:184
static XYZR setup_particle(kernel::Model *m, ParticleIndex pi)
Definition: XYZR.h:48
The standard decorator for manipulating molecular structures.
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
Basic utilities for handling cryo-electron microscopy 3D density maps.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
Class to handle individual model particles.
def write_gmm_to_map
write density map from GMM.
Definition: gmm_tools.py:43
static Mass setup_particle(kernel::Model *m, ParticleIndex pi, Float mass)
Definition: Mass.h:44
algebra::BoundingBoxD< 3 > get_bounding_box(const DensityMap *m)
Definition: DensityMap.h:461
Basic functionality that is expected to be used by a wide variety of IMP users.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Gaussian3D get_gaussian_from_covariance(const IMP_Eigen::Matrix3d &covariance, const Vector3D &center)
Return a Gaussian centered at the origin from a covariance matrix.
VectorD< 3 > Vector3D
Definition: VectorD.h:395
Sphere3D get_enclosing_sphere(const Vector3Ds &ss)
Return a sphere containing the listed spheres.
A decorator for helping deal with a hierarchy.
def write_sklearn_gmm_to_map
write density map directly from sklearn GMM (kinda slow)
Definition: gmm_tools.py:74
A decorator for a rigid body.
Definition: rigid_bodies.h:75
Functionality for loading, creating, manipulating and scoring atomic structures.
static Gaussian setup_particle(kernel::Model *m, ParticleIndex pi)
Definition: Gaussian.h:48
def decorate_gmm_from_text
read the output from write_gmm_to_text, decorate as Gaussian and Mass
Definition: gmm_tools.py:21