1 """@namespace IMP.isd.gmm_tools
2 Tools for handling Gaussian Mixture Models.
5 from __future__
import print_function
17 import sklearn.mixture
21 from math
import exp,sqrt,copysign
29 """ read the output from write_gmm_to_text, decorate as Gaussian and Mass"""
31 added_ps = itertools.count(1)
32 with open(in_fn,
'r') as inf:
39 weight=float(fields[2])
40 center=list(map(float,fields[3].split()))
41 covar=np.array(list(map(float,
42 fields[4].split()))).reshape((3,3))
55 rmax = sqrt(max(g.get_variances()))*radius_scale
60 if not transform
is None:
65 """write a list of gaussians to text. must be decorated as Gaussian and Mass"""
66 print(
'will write GMM text to',out_fn)
67 with open(out_fn,
'w')
as outf:
68 for comment
in comments:
69 outf.write(
'# ' + comment +
'\n')
70 outf.write(
'#|num|weight|mean|covariance matrix|\n')
71 for ng,g
in enumerate(ps):
75 mean=list(shape.get_center())
76 fm=[ng,weight]+mean+covar
77 outf.write(
'|{}|{}|{} {} {}|{} {} {} {} {} {} {} {} {}|\n'.format(*fm))
79 def gmm2map(to_draw,voxel_size,bounding_box=None,origin=None, fast=False, factor=2.5):
83 ps=[g.get_particle()
for g
in to_draw]
85 print(
'ps must be Particles or Gaussians')
87 if bounding_box
is None:
105 print(
'creating map')
107 if origin
is not None:
108 d1.set_origin(origin)
110 def write_gmm_to_map(to_draw,out_fn,voxel_size,bounding_box=None,origin=None, fast=False, factor=2.5):
111 """write density map from GMM. input can be either particles or gaussians"""
112 d1 = gmm2map(to_draw,voxel_size,bounding_box,origin, fast)
113 print(
'will write GMM map to',out_fn)
118 """write density map directly from sklearn GMM (kinda slow) """
120 if not dmap_model
is None:
126 print(
'getting coords')
127 nvox=d1.get_number_of_voxels()
128 apos=[list(d1.get_location_by_voxel(nv))
for nv
in range(nvox)]
131 scores=gmm.score(apos)
134 for nv, score
in enumerate(scores):
135 d1.set_value(nv,exp(score))
136 print(
'will write GMM map to',out_fn)
139 def draw_points(pts,out_fn,trans=IMP.algebra.get_identity_transformation_3d(),
141 """ given some points (and optional transform), write them to chimera 'bild' format
142 colors flag only applies to ellipses, otherwise it'll be weird"""
143 with open(out_fn,
'w')
as outf:
149 outf.write(
'.color 1 0 0\n.dotat %.2f %.2f %.2f\n'
150 %(pt[0],pt[1],pt[2]))
155 outf.write(
'.color 0 1 0\n')
156 colors=[
'0 1 0',
'0 0 1',
'0 1 1']
157 for nt,t
in enumerate(pts[start:]):
158 if use_colors
and nt%2==0:
159 outf.write(
'.color %s\n' % colors[nt/2])
161 outf.write(
'.dotat %.2f %.2f %.2f\n' %(pt[0],pt[1],pt[2]))
165 def create_gmm_for_bead(mdl,
168 sampled_points=100000,
170 print(
'fitting bead with',n_components,
'gaussians')
173 IMP.em.write_map(dmap,
'test_intermed.mrc')
174 pts=IMP.isd.sample_points_from_density(dmap,sampled_points)
181 return density_particles,dmap
183 def sample_and_fit_to_particles(model,
186 sampled_points=1000000,
190 covariance_type=
'full',
191 multiply_by_total_mass=
True,
195 if multiply_by_total_mass:
197 print(
'add_component_density: will multiply by mass',mass_multiplier)
200 print(
'add_component_density: sampling points')
206 pts=IMP.isd.sample_points_from_density(dmap,sampled_points)
209 print(
'add_component_density: fitting GMM to',len(pts),
'points')
211 n_components=num_components,
213 ps=density_particles,
215 covariance_type=covariance_type,
216 mass_multiplier=mass_multiplier)
218 if not output_txt
is None:
220 if not output_map
is None:
223 voxel_size=voxel_size,
226 return density_particles
233 covariance_type=
'full',
238 mass_multiplier=1.0):
239 """fit a GMM to some points. Will return the score and the Akaike score.
240 Akaike information criterion for the current model fit. It is a measure
241 of the relative quality of the GMM that takes into account the
242 parsimony and the goodness of the fit.
243 if no particles are provided, they will be created
245 points: list of coordinates (python)
246 n_components: number of gaussians to create
248 ps: list of particles to be decorated. if empty, will add
249 num_iter: number of EM iterations
250 covariance_type: covar type for the gaussians. options: 'full', 'diagonal', 'spherical'
251 min_covar: assign a minimum value to covariance term. That is used to have more spherical
253 init_centers: initial coordinates of the GMM
254 force_radii: fix the radii (spheres only)
255 force_weight: fix the weights
256 mass_multiplier: multiply the weights of all the gaussians by this value
257 dirichlet: use the DGMM fitting (can reduce number of components, takes longer)
263 from sklearn.mixture
import GMM
265 from sklearn.mixture
import GaussianMixture
268 print(
'creating GMM with n_components',n_components,
'n_iter',num_iter,
'covar type',covariance_type)
272 points = np.array(points)
273 weights_init = precisions_init =
None
274 if force_radii != -1.0:
275 print(
'warning: radii can no longer be forced, but setting '
276 'initial values to ', force_radii)
277 precisions_init = np.array([[1./force_radii]*3
278 for i
in range(n_components)])
279 if force_weight != -1.0:
280 print(
'warning: weights can no longer be forced, but setting '
281 'initial values to ', force_weight)
282 weights_init = np.array([force_weight]*n_components)
284 gmm = GaussianMixture(n_components=n_components,
286 covariance_type=covariance_type,
287 weights_init=weights_init,
288 precisions_init=precisions_init,
289 means_init=
None if init_centers==[]
294 if force_radii==-1.0:
298 covariance_type=
'spherical'
299 print(
'forcing spherical with radii',force_radii)
301 if force_weight==-1.0:
305 print(
'forcing weights to be',force_weight)
307 gmm = GMM(n_components=n_components, n_iter=num_iter,
308 covariance_type=covariance_type, min_covar=min_covar,
309 params=params, init_params=init_params)
310 if force_weight!=-1.0:
311 gmm.weights_=np.array([force_weight]*n_components)
312 if force_radii!=-1.0:
313 gmm.covars_=np.array([[force_radii]*3
for i
in range(n_components)])
315 gmm.means_=init_centers
317 model=gmm.fit(points)
318 score=gmm.score(points)
319 akaikescore=model.aic(points)
324 covars = gmm.covariances_
327 for ng
in range(n_components):
330 covar=np.diag(covar).tolist()
333 center=list(gmm.means_[ng])
334 weight=mass_multiplier*gmm.weights_[ng]
342 return (score,akaikescore)
349 covariance_type=
'full',
350 mass_multiplier=1.0):
351 """fit a GMM to some points. Will return core::Gaussians.
352 if no particles are provided, they will be created
354 points: list of coordinates (python)
355 n_components: number of gaussians to create
357 ps: list of particles to be decorated. if empty, will add
358 num_iter: number of EM iterations
359 covariance_type: covar type for the gaussians. options: 'full', 'diagonal', 'spherical'
360 init_centers: initial coordinates of the GMM
361 force_radii: fix the radii (spheres only)
362 force_weight: fix the weights
363 mass_multiplier: multiply the weights of all the gaussians by this value
369 from sklearn.mixture
import BayesianGaussianMixture
371 from sklearn.mixture
import DPGMM
375 print(
'using dirichlet prior')
377 gmm = BayesianGaussianMixture(
378 weight_concentration_prior_type=
'dirichlet_process',
379 n_components=n_components, max_iter=num_iter,
380 covariance_type=covariance_type)
382 gmm = DPGMM(n_components=n_components, n_iter=num_iter,
383 covariance_type=covariance_type)
394 gmm.precisions_ = gmm.precs_
395 for ng
in range(n_components):
396 invcovar=gmm.precisions_[ng]
397 covar=np.linalg.inv(invcovar)
399 covar=np.diag(covar).tolist()
402 center=list(gmm.means_[ng])
403 weight=mass_multiplier*gmm.weights_[ng]
static Gaussian setup_particle(Model *m, ParticleIndex pi)
static FloatKey get_mass_key()
DenseGrid3D< double > get_rasterized(const Gaussian3Ds &gmm, const Floats &weights, double cell_width, const BoundingBox3D &bb)
Rasterize the Gaussians to a grid.
BoundingBoxD< 3 > get_bounding_box(const Geometry &)
Compute the bounding box of any geometric object.
static XYZR setup_particle(Model *m, ParticleIndex pi)
double get_mass(ResidueType c)
Get the mass from the residue type.
Gaussian3D get_gaussian_from_covariance(const Eigen::Matrix3d &covariance, const Vector3D ¢er)
Return a Gaussian centered at the origin from a covariance matrix.
static bool get_is_setup(Model *m, ParticleIndex pi)
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Class for sampling a density map from particles.
The standard decorator for manipulating molecular structures.
DensityMap * create_density_map(const IMP::algebra::GridD< 3, S, V, E > &arg)
Create a density map from an arbitrary IMP::algebra::GridD.
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
static Mass setup_particle(Model *m, ParticleIndex pi, Float mass)
Basic utilities for handling cryo-electron microscopy 3D density maps.
A decorator for a particle with x,y,z coordinates.
algebra::BoundingBoxD< 3 > get_bounding_box(const DensityMap *m)
Basic functionality that is expected to be used by a wide variety of IMP users.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Eigen::Matrix3d get_covariance(const Gaussian3D &g)
Class to handle individual particles of a Model object.
Sphere3D get_enclosing_sphere(const Vector3Ds &ss)
Return a sphere containing the listed vectors.
DenseGrid3D< double > get_rasterized_fast(const Gaussian3Ds &gmm, const Floats &weights, double cell_width, const BoundingBox3D &bb, double factor=2.5)
Rasterize the Gaussians to a grid.
A decorator for helping deal with a generalized hierarchy.
A decorator for a rigid body.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Functionality for loading, creating, manipulating and scoring atomic structures.
A decorator for a particle with x,y,z coordinates and a radius.