1 """@namespace IMP.isd.gmm_tools
2 Tools for handling Gaussian Mixture Models.
5 from __future__
import print_function
16 import sklearn.mixture
20 from math
import exp, sqrt
25 """read the output from write_gmm_to_text, decorate as Gaussian and Mass"""
27 added_ps = itertools.count(1)
28 with open(in_fn,
'r') as inf:
34 fields = line.split(
'|')
35 weight = float(fields[2])
36 center = list(map(float, fields[3].split()))
38 list(map(float, fields[4].split()))).reshape((3, 3))
50 rmax = sqrt(max(g.get_variances()))*radius_scale
55 if transform
is not None:
62 """write a list of gaussians to text. must be decorated as Gaussian
64 print(
'will write GMM text to', out_fn)
65 with open(out_fn,
'w')
as outf:
66 for comment
in comments:
67 outf.write(
'# ' + comment +
'\n')
68 outf.write(
'#|num|weight|mean|covariance matrix|\n')
69 for ng, g
in enumerate(ps):
74 mean = list(shape.get_center())
75 fm = [ng, weight] + mean + covar
76 outf.write(
'|{}|{}|{} {} {}|'
77 '{} {} {} {} {} {} {} {} {}|\n'.format(*fm))
80 def gmm2map(to_draw, voxel_size, bounding_box=None, origin=None, fast=False,
86 ps = [g.get_particle()
for g
in to_draw]
88 print(
'ps must be Particles or Gaussians')
90 if bounding_box
is None:
107 bounding_box, factor)
111 print(
'creating map')
113 if origin
is not None:
114 d1.set_origin(origin)
119 origin=
None, fast=
False, factor=2.5):
120 """write density map from GMM. input can be either particles
122 d1 = gmm2map(to_draw, voxel_size, bounding_box, origin, fast)
123 print(
'will write GMM map to', out_fn)
129 """write density map directly from sklearn GMM (kinda slow) """
131 if dmap_model
is not None:
137 print(
'getting coords')
138 nvox = d1.get_number_of_voxels()
139 apos = [list(d1.get_location_by_voxel(nv))
for nv
in range(nvox)]
142 scores = gmm.score(apos)
145 for nv, score
in enumerate(scores):
146 d1.set_value(nv, exp(score))
147 print(
'will write GMM map to', out_fn)
154 """ given some points (and optional transform), write them to chimera
155 'bild' format; colors flag only applies to ellipses, otherwise it'll
157 with open(out_fn,
'w')
as outf:
163 outf.write(
'.color 1 0 0\n.dotat %.2f %.2f %.2f\n'
164 % (pt[0], pt[1], pt[2]))
169 outf.write(
'.color 0 1 0\n')
170 colors = [
'0 1 0',
'0 0 1',
'0 1 1']
171 for nt, t
in enumerate(pts[start:]):
172 if use_colors
and nt % 2 == 0:
173 outf.write(
'.color %s\n' % colors[nt/2])
175 outf.write(
'.dotat %.2f %.2f %.2f\n' % (pt[0], pt[1], pt[2]))
178 def create_gmm_for_bead(mdl,
181 sampled_points=100000,
183 print(
'fitting bead with', n_components,
'gaussians')
187 IMP.em.write_map(dmap,
'test_intermed.mrc')
188 pts = IMP.isd.sample_points_from_density(dmap, sampled_points)
190 density_particles = []
194 return density_particles, dmap
197 def sample_and_fit_to_particles(model,
200 sampled_points=1000000,
204 covariance_type=
'full',
205 multiply_by_total_mass=
True,
208 density_particles = []
209 if multiply_by_total_mass:
211 for p
in set(fragment_particles)))
212 print(
'add_component_density: will multiply by mass', mass_multiplier)
215 print(
'add_component_density: sampling points')
217 fragment_particles, simulation_res, voxel_size,
220 pts = IMP.isd.sample_points_from_density(dmap, sampled_points)
223 print(
'add_component_density: fitting GMM to', len(pts),
'points')
225 n_components=num_components,
227 ps=density_particles,
229 covariance_type=covariance_type,
230 mass_multiplier=mass_multiplier)
232 if output_txt
is not None:
234 if output_map
is not None:
237 voxel_size=voxel_size,
240 return density_particles
248 covariance_type=
'full',
253 mass_multiplier=1.0):
254 """fit a GMM to some points. Will return the score and the Akaike score.
255 Akaike information criterion for the current model fit. It is a measure
256 of the relative quality of the GMM that takes into account the
257 parsimony and the goodness of the fit.
258 if no particles are provided, they will be created
260 points: list of coordinates (python)
261 n_components: number of gaussians to create
263 ps: list of particles to be decorated. if empty, will add
264 num_iter: number of EM iterations
265 covariance_type: covar type for the gaussians. options: 'full',
266 'diagonal', 'spherical'
267 min_covar: assign a minimum value to covariance term. That is used
268 to have more spherical shaped gaussians
269 init_centers: initial coordinates of the GMM
270 force_radii: fix the radii (spheres only)
271 force_weight: fix the weights
272 mass_multiplier: multiply the weights of all the gaussians by this value
273 dirichlet: use the DGMM fitting (can reduce number of components,
279 from sklearn.mixture
import GMM
281 from sklearn.mixture
import GaussianMixture
284 print(
'creating GMM with n_components', n_components,
'n_iter', num_iter,
285 'covar type', covariance_type)
289 points = np.array(points)
290 weights_init = precisions_init =
None
291 if force_radii != -1.0:
292 print(
'warning: radii can no longer be forced, but setting '
293 'initial values to ', force_radii)
294 precisions_init = np.array([[1./force_radii]*3
295 for i
in range(n_components)])
296 if force_weight != -1.0:
297 print(
'warning: weights can no longer be forced, but setting '
298 'initial values to ', force_weight)
299 weights_init = np.array([force_weight]*n_components)
301 gmm = GaussianMixture(n_components=n_components,
303 covariance_type=covariance_type,
304 weights_init=weights_init,
305 precisions_init=precisions_init,
306 means_init=
None if init_centers == []
311 if force_radii == -1.0:
315 covariance_type =
'spherical'
316 print(
'forcing spherical with radii', force_radii)
318 if force_weight == -1.0:
322 print(
'forcing weights to be', force_weight)
324 gmm = GMM(n_components=n_components, n_iter=num_iter,
325 covariance_type=covariance_type, min_covar=min_covar,
326 params=params, init_params=init_params)
327 if force_weight != -1.0:
328 gmm.weights_ = np.array([force_weight] * n_components)
329 if force_radii != -1.0:
330 gmm.covars_ = np.array([[force_radii] * 3
331 for i
in range(n_components)])
332 if init_centers != []:
333 gmm.means_ = init_centers
335 model = gmm.fit(points)
336 score = gmm.score(points)
337 akaikescore = model.aic(points)
341 covars = gmm.covariances_
344 for ng
in range(n_components):
347 covar = np.diag(covar).tolist()
349 covar = covar.tolist()
350 center = list(gmm.means_[ng])
351 weight = mass_multiplier*gmm.weights_[ng]
360 return (score, akaikescore)
364 points, n_components, mdl, ps=[], num_iter=100,
365 covariance_type=
'full', mass_multiplier=1.0):
366 """fit a GMM to some points. Will return core::Gaussians.
367 if no particles are provided, they will be created
369 points: list of coordinates (python)
370 n_components: number of gaussians to create
372 ps: list of particles to be decorated. if empty, will add
373 num_iter: number of EM iterations
374 covariance_type: covar type for the gaussians. options: 'full',
375 'diagonal', 'spherical'
376 init_centers: initial coordinates of the GMM
377 force_radii: fix the radii (spheres only)
378 force_weight: fix the weights
379 mass_multiplier: multiply the weights of all the gaussians by this value
384 from sklearn.mixture
import BayesianGaussianMixture
386 from sklearn.mixture
import DPGMM
390 print(
'using dirichlet prior')
392 gmm = BayesianGaussianMixture(
393 weight_concentration_prior_type=
'dirichlet_process',
394 n_components=n_components, max_iter=num_iter,
395 covariance_type=covariance_type)
397 gmm = DPGMM(n_components=n_components, n_iter=num_iter,
398 covariance_type=covariance_type)
404 gmm.precisions_ = gmm.precs_
405 for ng
in range(n_components):
406 invcovar = gmm.precisions_[ng]
407 covar = np.linalg.inv(invcovar)
409 covar = np.diag(covar).tolist()
411 covar = covar.tolist()
412 center = list(gmm.means_[ng])
413 weight = mass_multiplier*gmm.weights_[ng]
static Gaussian setup_particle(Model *m, ParticleIndex pi)
static FloatKey get_mass_key()
DenseGrid3D< double > get_rasterized(const Gaussian3Ds &gmm, const Floats &weights, double cell_width, const BoundingBox3D &bb)
Rasterize the Gaussians to a grid.
BoundingBoxD< 3 > get_bounding_box(const Geometry &)
Compute the bounding box of any geometric object.
static XYZR setup_particle(Model *m, ParticleIndex pi)
double get_mass(ResidueType c)
Get the mass from the residue type.
Gaussian3D get_gaussian_from_covariance(const Eigen::Matrix3d &covariance, const Vector3D ¢er)
Return a Gaussian centered at the origin from a covariance matrix.
static bool get_is_setup(Model *m, ParticleIndex pi)
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Class for sampling a density map from particles.
The standard decorator for manipulating molecular structures.
DensityMap * create_density_map(const IMP::algebra::GridD< 3, S, V, E > &arg)
Create a density map from an arbitrary IMP::algebra::GridD.
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
static Mass setup_particle(Model *m, ParticleIndex pi, Float mass)
Basic utilities for handling cryo-electron microscopy 3D density maps.
A decorator for a particle with x,y,z coordinates.
algebra::BoundingBoxD< 3 > get_bounding_box(const DensityMap *m)
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Basic functionality that is expected to be used by a wide variety of IMP users.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Eigen::Matrix3d get_covariance(const Gaussian3D &g)
Class to handle individual particles of a Model object.
Sphere3D get_enclosing_sphere(const Vector3Ds &ss)
Return a sphere containing the listed vectors.
DenseGrid3D< double > get_rasterized_fast(const Gaussian3Ds &gmm, const Floats &weights, double cell_width, const BoundingBox3D &bb, double factor=2.5)
Rasterize the Gaussians to a grid.
A decorator for helping deal with a generalized hierarchy.
A decorator for a rigid body.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Functionality for loading, creating, manipulating and scoring atomic structures.
A decorator for a particle with x,y,z coordinates and a radius.