1 """@namespace IMP.isd.gmm_tools
2 Tools for handling Gaussian Mixture Models.
5 from __future__
import print_function
17 import sklearn.mixture
21 from math
import exp,sqrt,copysign
29 """ read the output from write_gmm_to_text, decorate as Gaussian and Mass"""
31 added_ps = itertools.count(1)
32 with open(in_fn,
'r') as inf:
39 weight=float(fields[2])
40 center=list(map(float,fields[3].split()))
41 covar=np.array(list(map(float,
42 fields[4].split()))).reshape((3,3))
55 rmax = sqrt(max(g.get_variances()))*radius_scale
60 if not transform
is None:
65 """write a list of gaussians to text. must be decorated as Gaussian and Mass"""
66 print(
'will write GMM text to',out_fn)
67 with open(out_fn,
'w')
as outf:
68 for comment
in comments:
69 outf.write(
'# ' + comment +
'\n')
70 outf.write(
'#|num|weight|mean|covariance matrix|\n')
71 for ng,g
in enumerate(ps):
75 mean=list(shape.get_center())
76 fm=[ng,weight]+mean+covar
79 outf.write(
'|{}|{}|{} {} {}|{} {} {} {} {} {} {} {} {}|\n'.format(*fm))
82 outf.write(
'|{0}|{1}|{2} {3} {4}|{5} {6} {7} {8} {9} {10} {11} {12} {13}|\n'.format(*fm))
84 def gmm2map(to_draw,voxel_size,bounding_box=None,origin=None, fast=False, factor=2.5):
88 ps=[g.get_particle()
for g
in to_draw]
90 print(
'ps must be Particles or Gaussians')
92 if bounding_box
is None:
110 print(
'creating map')
112 if origin
is not None:
113 d1.set_origin(origin)
115 def write_gmm_to_map(to_draw,out_fn,voxel_size,bounding_box=None,origin=None, fast=False, factor=2.5):
116 """write density map from GMM. input can be either particles or gaussians"""
117 d1 = gmm2map(to_draw,voxel_size,bounding_box,origin, fast)
118 print(
'will write GMM map to',out_fn)
123 """write density map directly from sklearn GMM (kinda slow) """
125 if not dmap_model
is None:
131 print(
'getting coords')
132 nvox=d1.get_number_of_voxels()
133 apos=[list(d1.get_location_by_voxel(nv))
for nv
in range(nvox)]
136 scores=gmm.score(apos)
139 for nv, score
in enumerate(scores):
140 d1.set_value(nv,exp(score))
141 print(
'will write GMM map to',out_fn)
144 def draw_points(pts,out_fn,trans=IMP.algebra.get_identity_transformation_3d(),
146 """ given some points (and optional transform), write them to chimera 'bild' format
147 colors flag only applies to ellipses, otherwise it'll be weird"""
148 with open(out_fn,
'w')
as outf:
154 outf.write(
'.color 1 0 0\n.dotat %.2f %.2f %.2f\n'
155 %(pt[0],pt[1],pt[2]))
160 outf.write(
'.color 0 1 0\n')
161 colors=[
'0 1 0',
'0 0 1',
'0 1 1']
162 for nt,t
in enumerate(pts[start:]):
163 if use_colors
and nt%2==0:
164 outf.write(
'.color %s\n' % colors[nt/2])
166 outf.write(
'.dotat %.2f %.2f %.2f\n' %(pt[0],pt[1],pt[2]))
170 def create_gmm_for_bead(mdl,
173 sampled_points=100000,
175 print(
'fitting bead with',n_components,
'gaussians')
178 IMP.em.write_map(dmap,
'test_intermed.mrc')
179 pts=IMP.isd.sample_points_from_density(dmap,sampled_points)
186 return density_particles,dmap
188 def sample_and_fit_to_particles(model,
191 sampled_points=1000000,
195 covariance_type=
'full',
196 multiply_by_total_mass=
True,
200 if multiply_by_total_mass:
202 print(
'add_component_density: will multiply by mass',mass_multiplier)
205 print(
'add_component_density: sampling points')
211 pts=IMP.isd.sample_points_from_density(dmap,sampled_points)
214 print(
'add_component_density: fitting GMM to',len(pts),
'points')
216 n_components=num_components,
218 ps=density_particles,
220 covariance_type=covariance_type,
221 mass_multiplier=mass_multiplier)
223 if not output_txt
is None:
225 if not output_map
is None:
228 voxel_size=voxel_size,
231 return density_particles
238 covariance_type=
'full',
243 mass_multiplier=1.0):
244 """fit a GMM to some points. Will return the score and the Akaike score.
245 Akaike information criterion for the current model fit. It is a measure
246 of the relative quality of the GMM that takes into account the
247 parsimony and the goodness of the fit.
248 if no particles are provided, they will be created
250 points: list of coordinates (python)
251 n_components: number of gaussians to create
253 ps: list of particles to be decorated. if empty, will add
254 num_iter: number of EM iterations
255 covariance_type: covar type for the gaussians. options: 'full', 'diagonal', 'spherical'
256 min_covar: assign a minimum value to covariance term. That is used to have more spherical
258 init_centers: initial coordinates of the GMM
259 force_radii: fix the radii (spheres only)
260 force_weight: fix the weights
261 mass_multiplier: multiply the weights of all the gaussians by this value
262 dirichlet: use the DGMM fitting (can reduce number of components, takes longer)
268 from sklearn.mixture
import GMM
270 from sklearn.mixture
import GaussianMixture
273 print(
'creating GMM with n_components',n_components,
'n_iter',num_iter,
'covar type',covariance_type)
277 points = np.array(points)
278 weights_init = precisions_init =
None
279 if force_radii != -1.0:
280 print(
'warning: radii can no longer be forced, but setting '
281 'initial values to ', force_radii)
282 precisions_init = np.array([[1./force_radii]*3
283 for i
in range(n_components)])
284 if force_weight != -1.0:
285 print(
'warning: weights can no longer be forced, but setting '
286 'initial values to ', force_weight)
287 weights_init = np.array([force_weight]*n_components)
289 gmm = GaussianMixture(n_components=n_components,
291 covariance_type=covariance_type,
292 weights_init=weights_init,
293 precisions_init=precisions_init,
294 means_init=
None if init_centers==[]
299 if force_radii==-1.0:
303 covariance_type=
'spherical'
304 print(
'forcing spherical with radii',force_radii)
306 if force_weight==-1.0:
310 print(
'forcing weights to be',force_weight)
312 gmm = GMM(n_components=n_components, n_iter=num_iter,
313 covariance_type=covariance_type, min_covar=min_covar,
314 params=params, init_params=init_params)
315 if force_weight!=-1.0:
316 gmm.weights_=np.array([force_weight]*n_components)
317 if force_radii!=-1.0:
318 gmm.covars_=np.array([[force_radii]*3
for i
in range(n_components)])
320 gmm.means_=init_centers
322 model=gmm.fit(points)
323 score=gmm.score(points)
324 akaikescore=model.aic(points)
329 covars = gmm.covariances_
332 for ng
in range(n_components):
335 covar=np.diag(covar).tolist()
338 center=list(gmm.means_[ng])
339 weight=mass_multiplier*gmm.weights_[ng]
347 return (score,akaikescore)
354 covariance_type=
'full',
355 mass_multiplier=1.0):
356 """fit a GMM to some points. Will return core::Gaussians.
357 if no particles are provided, they will be created
359 points: list of coordinates (python)
360 n_components: number of gaussians to create
362 ps: list of particles to be decorated. if empty, will add
363 num_iter: number of EM iterations
364 covariance_type: covar type for the gaussians. options: 'full', 'diagonal', 'spherical'
365 init_centers: initial coordinates of the GMM
366 force_radii: fix the radii (spheres only)
367 force_weight: fix the weights
368 mass_multiplier: multiply the weights of all the gaussians by this value
374 from sklearn.mixture
import BayesianGaussianMixture
376 from sklearn.mixture
import DPGMM
380 print(
'using dirichlet prior')
382 gmm = BayesianGaussianMixture(
383 weight_concentration_prior_type=
'dirichlet_process',
384 n_components=n_components, max_iter=num_iter,
385 covariance_type=covariance_type)
387 gmm = DPGMM(n_components=n_components, n_iter=num_iter,
388 covariance_type=covariance_type)
399 gmm.precisions_ = gmm.precs_
400 for ng
in range(n_components):
401 invcovar=gmm.precisions_[ng]
402 covar=np.linalg.inv(invcovar)
404 covar=np.diag(covar).tolist()
407 center=list(gmm.means_[ng])
408 weight=mass_multiplier*gmm.weights_[ng]
static Gaussian setup_particle(Model *m, ParticleIndex pi)
static FloatKey get_mass_key()
DenseGrid3D< double > get_rasterized(const Gaussian3Ds &gmm, const Floats &weights, double cell_width, const BoundingBox3D &bb)
Rasterize the Gaussians to a grid.
BoundingBoxD< 3 > get_bounding_box(const Geometry &)
Compute the bounding box of any geometric object.
static XYZR setup_particle(Model *m, ParticleIndex pi)
double get_mass(ResidueType c)
Get the mass from the residue type.
Gaussian3D get_gaussian_from_covariance(const Eigen::Matrix3d &covariance, const Vector3D ¢er)
Return a Gaussian centered at the origin from a covariance matrix.
static bool get_is_setup(Model *m, ParticleIndex pi)
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Class for sampling a density map from particles.
The standard decorator for manipulating molecular structures.
DensityMap * create_density_map(const IMP::algebra::GridD< 3, S, V, E > &arg)
Create a density map from an arbitrary IMP::algebra::GridD.
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
static Mass setup_particle(Model *m, ParticleIndex pi, Float mass)
Basic utilities for handling cryo-electron microscopy 3D density maps.
A decorator for a particle with x,y,z coordinates.
algebra::BoundingBoxD< 3 > get_bounding_box(const DensityMap *m)
Basic functionality that is expected to be used by a wide variety of IMP users.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Eigen::Matrix3d get_covariance(const Gaussian3D &g)
Class to handle individual particles of a Model object.
Sphere3D get_enclosing_sphere(const Vector3Ds &ss)
Return a sphere containing the listed vectors.
DenseGrid3D< double > get_rasterized_fast(const Gaussian3Ds &gmm, const Floats &weights, double cell_width, const BoundingBox3D &bb, double factor=2.5)
Rasterize the Gaussians to a grid.
A decorator for helping deal with a generalized hierarchy.
A decorator for a rigid body.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Functionality for loading, creating, manipulating and scoring atomic structures.
A decorator for a particle with x,y,z coordinates and a radius.