1 """@namespace IMP.isd.gmm_tools
2 Tools for handling Gaussian Mixture Models.
5 from __future__
import print_function
16 import sklearn.mixture
20 from math
import exp, sqrt
25 """read the output from write_gmm_to_text, decorate as Gaussian and Mass"""
27 added_ps = itertools.count(1)
28 with open(in_fn,
'r') as inf:
31 if ncomp > len(ps) - 1:
34 fields = line.split(
'|')
35 weight = float(fields[2])
36 center = list(map(float, fields[3].split()))
38 list(map(float, fields[4].split()))).reshape((3, 3))
50 rmax = sqrt(max(g.get_variances())) * radius_scale
55 if transform
is not None:
62 """write a list of gaussians to text. must be decorated as Gaussian
64 print(
'will write GMM text to', out_fn)
65 with open(out_fn,
'w')
as outf:
66 for comment
in comments:
67 outf.write(
'# ' + comment +
'\n')
68 outf.write(
'#|num|weight|mean|covariance matrix|\n')
69 for ng, g
in enumerate(ps):
74 mean = list(shape.get_center())
75 fm = [ng, weight] + mean + covar
76 outf.write(
'|{}|{}|{} {} {}|'
77 '{} {} {} {} {} {} {} {} {}|\n'.format(*fm))
80 def gmm2map(to_draw, voxel_size, bounding_box=None, origin=None, fast=False,
86 ps = [g.get_particle()
for g
in to_draw]
88 print(
'ps must be Particles or Gaussians')
90 if bounding_box
is None:
98 max(g.get_variances()) * 3)
108 bounding_box, factor)
112 print(
'creating map')
114 if origin
is not None:
115 d1.set_origin(origin)
120 origin=
None, fast=
False, factor=2.5):
121 """write density map from GMM. input can be either particles
123 d1 = gmm2map(to_draw, voxel_size, bounding_box, origin, fast)
124 print(
'will write GMM map to', out_fn)
130 """write density map directly from sklearn GMM (kinda slow) """
132 if dmap_model
is not None:
138 print(
'getting coords')
139 nvox = d1.get_number_of_voxels()
140 apos = [list(d1.get_location_by_voxel(nv))
for nv
in range(nvox)]
143 scores = gmm.score(apos)
146 for nv, score
in enumerate(scores):
147 d1.set_value(nv, exp(score))
148 print(
'will write GMM map to', out_fn)
155 """ given some points (and optional transform), write them to chimera
156 'bild' format; colors flag only applies to ellipses, otherwise it'll
158 with open(out_fn,
'w')
as outf:
164 outf.write(
'.color 1 0 0\n.dotat %.2f %.2f %.2f\n'
165 % (pt[0], pt[1], pt[2]))
170 outf.write(
'.color 0 1 0\n')
171 colors = [
'0 1 0',
'0 0 1',
'0 1 1']
172 for nt, t
in enumerate(pts[start:]):
173 if use_colors
and nt % 2 == 0:
174 outf.write(
'.color %s\n' % colors[nt / 2])
176 outf.write(
'.dotat %.2f %.2f %.2f\n' % (pt[0], pt[1], pt[2]))
179 def create_gmm_for_bead(mdl,
182 sampled_points=100000,
184 print(
'fitting bead with', n_components,
'gaussians')
188 IMP.em.write_map(dmap,
'test_intermed.mrc')
189 pts = IMP.isd.sample_points_from_density(dmap, sampled_points)
191 density_particles = []
195 return density_particles, dmap
198 def sample_and_fit_to_particles(model,
201 sampled_points=1000000,
205 covariance_type=
'full',
206 multiply_by_total_mass=
True,
209 density_particles = []
210 if multiply_by_total_mass:
212 for p
in set(fragment_particles)))
213 print(
'add_component_density: will multiply by mass', mass_multiplier)
216 print(
'add_component_density: sampling points')
218 fragment_particles, simulation_res, voxel_size,
221 pts = IMP.isd.sample_points_from_density(dmap, sampled_points)
224 print(
'add_component_density: fitting GMM to', len(pts),
'points')
226 n_components=num_components,
228 ps=density_particles,
230 covariance_type=covariance_type,
231 mass_multiplier=mass_multiplier)
233 if output_txt
is not None:
235 if output_map
is not None:
238 voxel_size=voxel_size,
241 return density_particles
249 covariance_type=
'full',
254 mass_multiplier=1.0):
255 """fit a GMM to some points. Will return the score and the Akaike score.
256 Akaike information criterion for the current model fit. It is a measure
257 of the relative quality of the GMM that takes into account the
258 parsimony and the goodness of the fit.
259 if no particles are provided, they will be created
261 points: list of coordinates (python)
262 n_components: number of gaussians to create
264 ps: list of particles to be decorated. if empty, will add
265 num_iter: number of EM iterations
266 covariance_type: covar type for the gaussians. options: 'full',
267 'diagonal', 'spherical'
268 min_covar: assign a minimum value to covariance term. That is used
269 to have more spherical shaped gaussians
270 init_centers: initial coordinates of the GMM
271 force_radii: fix the radii (spheres only)
272 force_weight: fix the weights
273 mass_multiplier: multiply the weights of all the gaussians by this value
274 dirichlet: use the DGMM fitting (can reduce number of components,
280 from sklearn.mixture
import GMM
282 from sklearn.mixture
import GaussianMixture
285 print(
'creating GMM with n_components', n_components,
'n_iter', num_iter,
286 'covar type', covariance_type)
290 points = np.array(points)
291 weights_init = precisions_init =
None
292 if force_radii != -1.0:
293 print(
'warning: radii can no longer be forced, but setting '
294 'initial values to ', force_radii)
295 precisions_init = np.array([[1. / force_radii] * 3
296 for i
in range(n_components)])
297 if force_weight != -1.0:
298 print(
'warning: weights can no longer be forced, but setting '
299 'initial values to ', force_weight)
300 weights_init = np.array([force_weight] * n_components)
302 gmm = GaussianMixture(n_components=n_components,
304 covariance_type=covariance_type,
305 weights_init=weights_init,
306 precisions_init=precisions_init,
307 means_init=
None if init_centers == []
312 if force_radii == -1.0:
316 covariance_type =
'spherical'
317 print(
'forcing spherical with radii', force_radii)
319 if force_weight == -1.0:
323 print(
'forcing weights to be', force_weight)
325 gmm = GMM(n_components=n_components, n_iter=num_iter,
326 covariance_type=covariance_type, min_covar=min_covar,
327 params=params, init_params=init_params)
328 if force_weight != -1.0:
329 gmm.weights_ = np.array([force_weight] * n_components)
330 if force_radii != -1.0:
331 gmm.covars_ = np.array([[force_radii] * 3
332 for i
in range(n_components)])
333 if init_centers != []:
334 gmm.means_ = init_centers
336 model = gmm.fit(points)
337 score = gmm.score(points)
338 akaikescore = model.aic(points)
342 covars = gmm.covariances_
345 for ng
in range(n_components):
348 covar = np.diag(covar).tolist()
350 covar = covar.tolist()
351 center = list(gmm.means_[ng])
352 weight = mass_multiplier * gmm.weights_[ng]
361 return (score, akaikescore)
365 points, n_components, mdl, ps=[], num_iter=100,
366 covariance_type=
'full', mass_multiplier=1.0):
367 """fit a GMM to some points. Will return core::Gaussians.
368 if no particles are provided, they will be created
370 points: list of coordinates (python)
371 n_components: number of gaussians to create
373 ps: list of particles to be decorated. if empty, will add
374 num_iter: number of EM iterations
375 covariance_type: covar type for the gaussians. options: 'full',
376 'diagonal', 'spherical'
377 init_centers: initial coordinates of the GMM
378 force_radii: fix the radii (spheres only)
379 force_weight: fix the weights
380 mass_multiplier: multiply the weights of all the gaussians by this value
385 from sklearn.mixture
import BayesianGaussianMixture
387 from sklearn.mixture
import DPGMM
391 print(
'using dirichlet prior')
393 gmm = BayesianGaussianMixture(
394 weight_concentration_prior_type=
'dirichlet_process',
395 n_components=n_components, max_iter=num_iter,
396 covariance_type=covariance_type)
398 gmm = DPGMM(n_components=n_components, n_iter=num_iter,
399 covariance_type=covariance_type)
405 gmm.precisions_ = gmm.precs_
406 for ng
in range(n_components):
407 invcovar = gmm.precisions_[ng]
408 covar = np.linalg.inv(invcovar)
410 covar = np.diag(covar).tolist()
412 covar = covar.tolist()
413 center = list(gmm.means_[ng])
414 weight = mass_multiplier * gmm.weights_[ng]
static Gaussian setup_particle(Model *m, ParticleIndex pi)
static FloatKey get_mass_key()
DenseGrid3D< double > get_rasterized(const Gaussian3Ds &gmm, const Floats &weights, double cell_width, const BoundingBox3D &bb)
Rasterize the Gaussians to a grid.
BoundingBoxD< 3 > get_bounding_box(const Geometry &)
Compute the bounding box of any geometric object.
static XYZR setup_particle(Model *m, ParticleIndex pi)
double get_mass(ResidueType c)
Get the mass from the residue type.
Gaussian3D get_gaussian_from_covariance(const Eigen::Matrix3d &covariance, const Vector3D ¢er)
Return a Gaussian centered at the origin from a covariance matrix.
static bool get_is_setup(Model *m, ParticleIndex pi)
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Class for sampling a density map from particles.
The standard decorator for manipulating molecular structures.
DensityMap * create_density_map(const IMP::algebra::GridD< 3, S, V, E > &arg)
Create a density map from an arbitrary IMP::algebra::GridD.
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
static Mass setup_particle(Model *m, ParticleIndex pi, Float mass)
Basic utilities for handling cryo-electron microscopy 3D density maps.
A decorator for a particle with x,y,z coordinates.
algebra::BoundingBoxD< 3 > get_bounding_box(const DensityMap *m)
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Basic functionality that is expected to be used by a wide variety of IMP users.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Eigen::Matrix3d get_covariance(const Gaussian3D &g)
Class to handle individual particles of a Model object.
Sphere3D get_enclosing_sphere(const Vector3Ds &ss)
Return a sphere containing the listed vectors.
DenseGrid3D< double > get_rasterized_fast(const Gaussian3Ds &gmm, const Floats &weights, double cell_width, const BoundingBox3D &bb, double factor=2.5)
Rasterize the Gaussians to a grid.
A decorator for helping deal with a generalized hierarchy.
A decorator for a rigid body.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Functionality for loading, creating, manipulating and scoring atomic structures.
A decorator for a particle with x,y,z coordinates and a radius.