doc/ref/gmm__tools_8py_source.html

 """@namespace IMP.isd.gmm_tools

    Tools for handling Gaussian Mixture Models.

 """


 from __future__ import print_function

 import IMP

 import IMP.core

 import IMP.algebra

 import IMP.atom

 import IMP.em

 import numpy as np

 import numpy.linalg

 import sys,os


 try:

     import sklearn.mixture

     nosklearn=False

 except:

     nosklearn=True

 from math import exp,sqrt,copysign


 def decorate_gmm_from_text(in_fn,

                            ps,

                            mdl,

                            transform=None,

                            radius_scale=1.0,

                            mass_scale=1.0):

     """ read the output from write_gmm_to_text, decorate as Gaussian and Mass"""

     ncomp=0

     with open(in_fn,'r') as inf:

         for l in inf:

             if l[0]!='#':

                 if ncomp>len(ps)-1:

                     ps.append(IMP.Particle(mdl))

                 p = ps[ncomp]

                 fields=l.split('|')

                 weight=float(fields[2])

                 center=list(map(float,fields[3].split()))

                 covar=np.array(list(map(float,

                                         fields[4].split()))).reshape((3,3))

                 #print('on particle',ncomp)

                 shape=IMP.algebra.get_gaussian_from_covariance(covar.tolist(),

                                                  IMP.algebra.Vector3D(center))

                 if not IMP.core.Gaussian.get_is_setup(p):

                     g = IMP.core.Gaussian.setup_particle(ps[ncomp],shape)

                 else:

                     g = IMP.core.Gaussian(ps[ncomp])

                     g.set_gaussian(shape)

                 if not IMP.atom.Mass.get_is_setup(p):

                     IMP.atom.Mass.setup_particle(p,weight*mass_scale)

                 else:

                     IMP.atom.Mass(p).set_mass(weight*mass_scale)

                 rmax = sqrt(max(g.get_variances()))*radius_scale

                 if not IMP.core.XYZR.get_is_setup(ps[ncomp]):

                     IMP.core.XYZR.setup_particle(ps[ncomp],rmax)

                 else:

                     IMP.core.XYZR(ps[ncomp]).set_radius(rmax)

                 if not transform is None:

                     IMP.core.transform(IMP.core.RigidBody(ps[ncomp]),transform)

                 ncomp+=1


 def write_gmm_to_text(ps,out_fn):

     """write a list of gaussians to text. must be decorated as Gaussian and Mass"""

     print('will write GMM text to',out_fn)

     with open(out_fn,'w') as outf:

         outf.write('#|num|weight|mean|covariance matrix|\n')

         for ng,g in enumerate(ps):

             shape=IMP.core.Gaussian(g).get_gaussian()

             weight=IMP.atom.Mass(g).get_mass()

             covar=[c for row in IMP.algebra.get_covariance(shape) for c in row]

             mean=list(shape.get_center())

             fm=[ng,weight]+mean+covar

             try:

                 #python 2.7 format

                 outf.write('|{}|{}|{} {} {}|{} {} {} {} {} {} {} {} {}|\n'.format(*fm))

             except ValueError:

                 #python 2.6 and below

                 outf.write('|{0}|{1}|{2} {3} {4}|{5} {6} {7} {8} {9} {10} {11} {12} {13}|\n'.format(*fm))


 def write_gmm_to_map(to_draw,out_fn,voxel_size,bounding_box=None,origin=None):

     """write density map from GMM. input can be either particles or gaussians"""

     if type(to_draw[0]) in (IMP.Particle,IMP.atom.Hierarchy,IMP.core.Hierarchy):

         ps=to_draw

     elif type(to_draw[0])==IMP.core.Gaussian:

         ps=[g.get_particle() for g in to_draw]

     else:

         print('ps must be Particles or Gaussians')

         return

     print('will write GMM map to',out_fn)

     if bounding_box is None:

         if len(ps)>1:

             s=IMP.algebra.get_enclosing_sphere([IMP.core.XYZ(p).get_coordinates() for p in ps])

             s2=IMP.algebra.Sphere3D(s.get_center(),s.get_radius()*3)

         else:

             g=IMP.core.Gaussian(ps[0]).get_gaussian()

             s2=IMP.algebra.Sphere3D(g.get_center(),max(g.get_variances())*3)

         bounding_box=IMP.algebra.get_bounding_box(s2)

     shapes=[]

     weights=[]

     for p in ps:

         shapes.append(IMP.core.Gaussian(p).get_gaussian())

         weights.append(IMP.atom.Mass(p).get_mass())

     print('rasterizing')

     grid=IMP.algebra.get_rasterized(shapes,weights,voxel_size,bounding_box)

     print('creating map')

     d1=IMP.em.create_density_map(grid)

     print('writing')

     if origin is not None:

         d1.set_origin(origin)

     IMP.em.write_map(d1,out_fn,IMP.em.MRCReaderWriter())

     del d1


 def write_sklearn_gmm_to_map(gmm,out_fn,apix=0,bbox=None,dmap_model=None):

     """write density map directly from sklearn GMM (kinda slow) """

     ### create density

     if not dmap_model is None:

         d1=IMP.em.create_density_map(dmap_model)

     else:

         d1=IMP.em.create_density_map(bbox,apix)


     ### fill it with values from the GMM

     print('getting coords')

     nvox=d1.get_number_of_voxels()

     apos=[list(d1.get_location_by_voxel(nv)) for nv in range(nvox)]


     print('scoring')

     scores=gmm.score(apos)


     print('assigning')

     for nv, score in enumerate(scores):

         d1.set_value(nv,exp(score))

     print('will write GMM map to',out_fn)

     IMP.em.write_map(d1,out_fn,IMP.em.MRCReaderWriter())


 def draw_points(pts,out_fn,trans=IMP.algebra.get_identity_transformation_3d(),

                                 use_colors=False):

     """ given some points (and optional transform), write them to chimera 'bild' format

     colors flag only applies to ellipses, otherwise it'll be weird"""

     with open(out_fn,'w') as outf:

         #print 'will draw',len(pts),'points'

         # write first point in red

         pt=trans.get_transformed(IMP.algebra.Vector3D(pts[0]))

         start=0

         if use_colors:

             outf.write('.color 1 0 0\n.dotat %.2f %.2f %.2f\n'

                        %(pt[0],pt[1],pt[2]))

             start=1


         # write remaining points in green

         if use_colors:

             outf.write('.color 0 1 0\n')

             colors=['0 1 0','0 0 1','0 1 1']

         for nt,t in enumerate(pts[start:]):

             if use_colors and nt%2==0:

                 outf.write('.color %s\n' % colors[nt/2])

             pt=trans.get_transformed(IMP.algebra.Vector3D(t))

             outf.write('.dotat %.2f %.2f %.2f\n' %(pt[0],pt[1],pt[2]))


 def create_gmm_for_bead(mdl,

                         particle,

                         n_components,

                         sampled_points=100000,

                         num_iter=100):

     print('fitting bead with',n_components,'gaussians')

     dmap=IMP.em.SampledDensityMap([particle],1.0,1.0,

                                   IMP.atom.Mass.get_mass_key(),3,IMP.em.SPHERE)

     IMP.em.write_map(dmap,'test_intermed.mrc')

     pts=IMP.isd.sample_points_from_density(dmap,sampled_points)

     draw_points(pts,'pts.bild')

     density_particles=[]

     fit_gmm_to_points(pts,n_components,mdl,

                       density_particles,

                       num_iter,'full',

                       mass_multiplier=IMP.atom.Mass(particle).get_mass())

     return density_particles,dmap


 def sample_and_fit_to_particles(model,

                                 fragment_particles,

                                 num_components,

                                 sampled_points=1000000,

                                 simulation_res=0.5,

                                 voxel_size=1.0,

                                 num_iter=100,

                                 covariance_type='full',

                                 multiply_by_total_mass=True,

                                 output_map=None,

                                 output_txt=None):

     density_particles=[]

     if multiply_by_total_mass:

         mass_multiplier=sum((IMP.atom.Mass(p).get_mass() for p in set(fragment_particles)))

         print('add_component_density: will multiply by mass',mass_multiplier)


     # simulate density from ps, then calculate points to fit

     print('add_component_density: sampling points')

     dmap=IMP.em.SampledDensityMap(fragment_particles,simulation_res,voxel_size,

                                  IMP.atom.Mass.get_mass_key(),3)

     dmap.calcRMS()

     #if not intermediate_map_fn is None:

     #   IMP.em.write_map(dmap,intermediate_map_fn)

     pts=IMP.isd.sample_points_from_density(dmap,sampled_points)


     # fit GMM

     print('add_component_density: fitting GMM to',len(pts),'points')

     fit_gmm_to_points(points=pts,

                       n_components=num_components,

                       mdl=model,

                       ps=density_particles,

                       num_iter=num_iter,

                       covariance_type=covariance_type,

                       mass_multiplier=mass_multiplier)


     if not output_txt is None:

         write_gmm_to_text(density_particles,output_txt)

     if not output_map is None:

         write_gmm_to_map(to_draw=density_particles,

                          out_fn=output_map,

                          voxel_size=voxel_size,

                          bounding_box=IMP.em.get_bounding_box(dmap))


     return density_particles


 def fit_gmm_to_points(points,

                       n_components,

                       mdl,

                       ps=[],

                       num_iter=100,

                       covariance_type='full',

                       min_covar=0.001,

                       init_centers=[],

                       force_radii=-1.0,

                       force_weight=-1.0,

                       mass_multiplier=1.0):

     """fit a GMM to some points. Will return the score and the Akaike score.

     Akaike information criterion for the current model fit. It is a measure

     of the relative quality of the GMM that takes into account the

     parsimony and the goodness of the fit.

     if no particles are provided, they will be created


     points:            list of coordinates (python)

     n_components:      number of gaussians to create

     mdl:               IMP Model

     ps:                list of particles to be decorated. if empty, will add

     num_iter:          number of EM iterations

     covariance_type:   covar type for the gaussians. options: 'full', 'diagonal', 'spherical'

     min_covar:         assign a minimum value to covariance term. That is used to have more spherical

                        shaped gaussians

     init_centers:      initial coordinates of the GMM

     force_radii:       fix the radii (spheres only)

     force_weight:      fix the weights

     mass_multiplier:   multiply the weights of all the gaussians by this value

     dirichlet:         use the DGMM fitting (can reduce number of components, takes longer)

     """


     import sklearn.mixture


     params='m'

     init_params='m'

     if force_radii==-1.0:

         params+='c'

         init_params+='c'

     else:

         covariance_type='spherical'

         print('forcing spherical with radii',force_radii)


     if force_weight==-1.0:

         params+='w'

         init_params+='w'

     else:

         print('forcing weights to be',force_weight)


     print('creating GMM with params',params,'init params',init_params,'n_components',n_components,'n_iter',num_iter,'covar type',covariance_type)

     gmm=sklearn.mixture.GMM(n_components=n_components,

                           n_iter=num_iter,

                           covariance_type=covariance_type,

                           min_covar=min_covar,

                           params=params,

                           init_params=init_params)


     if force_weight!=-1.0:

         gmm.weights_=np.array([force_weight]*n_components)

     if force_radii!=-1.0:

         gmm.covars_=np.array([[force_radii]*3 for i in range(n_components)])

     if init_centers!=[]:

         gmm.means_=init_centers

     print('fitting')

     model=gmm.fit(points)

     score=gmm.score(points)

     akaikescore=model.aic(points)

     #print('>>> GMM score',gmm.score(points))


     ### convert format to core::Gaussian

     for ng in range(n_components):

         covar=gmm.covars_[ng]

         if covar.size==3:

             covar=np.diag(covar).tolist()

         else:

             covar=covar.tolist()

         center=list(gmm.means_[ng])

         weight=mass_multiplier*gmm.weights_[ng]

         if ng>=len(ps):

             ps.append(IMP.Particle(mdl))

         shape=IMP.algebra.get_gaussian_from_covariance(covar,IMP.algebra.Vector3D(center))

         g=IMP.core.Gaussian.setup_particle(ps[ng],shape)

         IMP.atom.Mass.setup_particle(ps[ng],weight)

         IMP.core.XYZR.setup_particle(ps[ng],sqrt(max(g.get_variances())))


     return (score,akaikescore)


 def fit_dirichlet_gmm_to_points(points,

                       n_components,

                       mdl,

                       ps=[],

                       num_iter=100,

                       covariance_type='full',

                       mass_multiplier=1.0):

     """fit a GMM to some points. Will return core::Gaussians.

     if no particles are provided, they will be created


     points:            list of coordinates (python)

     n_components:      number of gaussians to create

     mdl:               IMP Model

     ps:                list of particles to be decorated. if empty, will add

     num_iter:          number of EM iterations

     covariance_type:   covar type for the gaussians. options: 'full', 'diagonal', 'spherical'

     init_centers:      initial coordinates of the GMM

     force_radii:       fix the radii (spheres only)

     force_weight:      fix the weights

     mass_multiplier:   multiply the weights of all the gaussians by this value

     """


     import sklearn.mixture


     ### create and fit GMM

     print('using dirichlet prior')

     gmm=sklearn.mixture.DPGMM(n_components=n_components,

                               n_iter=num_iter,

                               covariance_type=covariance_type)


     gmm.fit(points)


     #print('>>> GMM score',gmm.score(points))


     #print gmm.covars_

     #print gmm.weights_

     #print gmm.means_

     ### convert format to core::Gaussian

     for ng in range(n_components):

         invcovar=gmm.precs_[ng]

         covar=np.linalg.inv(invcovar)

         if covar.size==3:

             covar=np.diag(covar).tolist()

         else:

             covar=covar.tolist()

         center=list(gmm.means_[ng])

         weight=mass_multiplier*gmm.weights_[ng]

         if ng>=len(ps):

             ps.append(IMP.Particle(mdl))

         shape=IMP.algebra.get_gaussian_from_covariance(covar,IMP.algebra.Vector3D(center))

         g=IMP.core.Gaussian.setup_particle(ps[ng],shape)

         IMP.atom.Mass.setup_particle(ps[ng],weight)

         IMP.core.XYZR.setup_particle(ps[ng],sqrt(max(g.get_variances())))

IMP::atom::Mass
Add mass to a particle.
Definition: Mass.h:23

IMP::isd.gmm_tools.draw_points
def draw_points
given some points (and optional transform), write them to chimera 'bild' format colors flag only appl...
Definition: gmm_tools.py:135

IMP::core::Gaussian::setup_particle
static Gaussian setup_particle(Model *m, ParticleIndex pi)
Definition: Gaussian.h:48

IMP::atom::Mass::get_mass_key
static FloatKey get_mass_key()

IMP::algebra::get_bounding_box
BoundingBoxD< 3 > get_bounding_box(const Geometry &)
Compute the bounding box of any geometric object.

IMP::core::XYZR::setup_particle
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48

IMP::atom::get_mass
double get_mass(ResidueType c)
Get the mass from the residue type.

IMP::isd.gmm_tools.fit_dirichlet_gmm_to_points
def fit_dirichlet_gmm_to_points
fit a GMM to some points.
Definition: gmm_tools.py:312

IMP::algebra::get_rasterized
DenseGrid3D< float > get_rasterized(const Gaussian3Ds &gmm, const Floats &weights, double cell_width, const BoundingBox3D &bb)
Rasterize the Gaussians to a grid.

IMP::algebra::get_covariance
IMP_Eigen::Matrix3d get_covariance(const Gaussian3D &g)

IMP::em::MRCReaderWriter
Definition: MRCReaderWriter.h:20

IMP::core::Gaussian
Definition: Gaussian.h:40

IMP::atom::Mass::get_is_setup
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Mass.h:30

IMP::core::XYZR::get_is_setup
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: XYZR.h:47

IMP::em::SampledDensityMap
Class for sampling a density map from particles.
Definition: SampledDensityMap.h:31

IMP::isd.gmm_tools.fit_gmm_to_points
def fit_gmm_to_points
fit a GMM to some points.
Definition: gmm_tools.py:224

IMP::atom::Hierarchy
The standard decorator for manipulating molecular structures.
Definition: atom/Hierarchy.h:207

IMP::em::create_density_map
DensityMap * create_density_map(const IMP::algebra::GridD< 3, S, V, E > &arg)
Create a density map from an arbitrary IMP::algebra::GridD.
Definition: DensityMap.h:626

IMP::core::transform
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.

IMP::atom::Mass::setup_particle
static Mass setup_particle(Model *m, ParticleIndex pi, Float mass)
Definition: Mass.h:44

IMP::em
Basic utilities for handling cryo-electron microscopy 3D density maps.

IMP::core::XYZ
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30

IMP::isd.gmm_tools.write_gmm_to_map
def write_gmm_to_map
write density map from GMM.
Definition: gmm_tools.py:80

IMP::em::get_bounding_box
algebra::BoundingBoxD< 3 > get_bounding_box(const DensityMap *m)
Definition: DensityMap.h:457

IMP::core
Basic functionality that is expected to be used by a wide variety of IMP users.

IMP::algebra
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...

IMP::algebra::get_gaussian_from_covariance
Gaussian3D get_gaussian_from_covariance(const IMP_Eigen::Matrix3d &covariance, const Vector3D &center)
Return a Gaussian centered at the origin from a covariance matrix.

IMP::algebra::Vector3D
VectorD< 3 > Vector3D
Definition: VectorD.h:395

IMP::Particle
Class to handle individual model particles.
Definition: Particle.h:37

IMP::algebra::get_enclosing_sphere
Sphere3D get_enclosing_sphere(const Vector3Ds &ss)
Return a sphere containing the listed vectors.

IMP::core::Hierarchy
A decorator for helping deal with a generalized hierarchy.
Definition: core/Hierarchy.h:78

IMP::isd.gmm_tools.write_sklearn_gmm_to_map
def write_sklearn_gmm_to_map
write density map directly from sklearn GMM (kinda slow)
Definition: gmm_tools.py:113

IMP::core::RigidBody
A decorator for a rigid body.
Definition: rigid_bodies.h:75

IMP::core::Gaussian::get_is_setup
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: Gaussian.h:47

IMP::isd.gmm_tools.write_gmm_to_text
def write_gmm_to_text
write a list of gaussians to text.
Definition: gmm_tools.py:62

IMP::atom
Functionality for loading, creating, manipulating and scoring atomic structures.

IMP::algebra::SphereD< 3 >

IMP::isd.gmm_tools.decorate_gmm_from_text
def decorate_gmm_from_text
read the output from write_gmm_to_text, decorate as Gaussian and Mass
Definition: gmm_tools.py:22

IMP::core::XYZR
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27