IMP logo
IMP Reference Guide  2.5.0
The Integrative Modeling Platform
create_gmm.py
1 from __future__ import print_function
2 import IMP
3 import IMP.em
4 import IMP.isd
6 import numpy as np
7 from optparse import OptionParser
8 import sys,os
9 def parse_args():
10  usage = """%prog [options] <data_fn> <n_components> <out_txt_fn>
11  Create a GMM from either density file (.mrc), a pdb file (.pdb)
12  Will detect input format from extension.
13  Outputs as text and optionally as a density map
14  see help(-h)
15 """
16  parser = OptionParser(usage)
17  parser.add_option("-t","--covar_type",dest="covar_type",default='full',
18  choices=['spherical', 'tied', 'diag', 'full'],
19  help="covariance type for the GMM")
20  parser.add_option("-m","--out_map",dest="out_map",default='',
21  help="write out the gmm to an mrc file")
22  parser.add_option("-a","--apix",dest="apix",default=1.0,type='float',
23  help="if you don't provide a map, set the voxel_size here (for sampling)")
24  parser.add_option("-r","--resolution",dest="resolution",default=1,type='float',
25  help="if you don't provide a map, set the resolution here (for sampling)")
26  parser.add_option("-n","--num_samples",dest="num_samples",default=1000000,type='int',
27  help="num samples to draw from the density map")
28  parser.add_option("-i","--num_iter",dest="num_iter",default=100,type='int',
29  help="num iterations of GMM")
30  parser.add_option("-s","--threshold",dest="threshold",default=0.0,type='float',
31  help="threshold for the map before sampling")
32 
33  parser.add_option("-c","--input_anchors_fn",dest="input_anchors_fn",default='',
34  help="get initial centers from anchors file. ")
35  parser.add_option("-f","--force_radii",dest="force_radii",default=-1.0,
36  type='float',
37  help="force radii to be this value (spherical) -1 means deactivated ")
38  parser.add_option("-w","--force_weight",dest="force_weight",default=-1.0,
39  type='float',
40  help="force weight to be this value (spherical) -1 means deactivated ")
41  parser.add_option("-e","--force_weight_frac",dest="force_weight_frac",action="store_true",default=False,
42  help="force weight to be 1.0/(num anchors). takes precedence over -w ")
43  parser.add_option("-o","--out_anchors_txt",dest="out_anchors_txt",default='',
44  help="write final GMM as anchor points (txt)")
45  parser.add_option("-q","--out_anchors_cmm",dest="out_anchors_cmm",default='',
46  help="write final GMM as anchor points (cmm)")
47  parser.add_option("-d","--use_dirichlet",dest="use_dirichlet",default=False,
48  action="store_true",
49  help="use dirichlet process for fit")
50 
51  parser.add_option("-k","--multiply_by_mass",dest="multiply_by_mass",default=False,
52  action="store_true",
53  help="if set, will multiply all weights by the total mass of the particles (PDB ONLY)")
54 
55  parser.add_option("-z","--use_cpp",dest="use_cpp",default=False,
56  action="store_true",
57  help="EXPERIMENTAL. Uses the IMP GMM code. Requires isd_emxl")
58 
59 
60  (options, args) = parser.parse_args()
61  if len(args) != 3:
62  parser.error("incorrect number of arguments")
63  return options,args
64 
65 def run():
66  options,args=parse_args()
67  data_fn,ncenters,out_txt_fn=args
68  ncenters=int(ncenters)
69  mdl=IMP.Model()
70 
71  if not os.path.isfile(data_fn):
72  raise Exception("The data file you entered: "+data_fn+" does not exist!")
73 
74  ### get points for fitting the GMM
75  ext=data_fn.split('.')[-1]
76  mass_multiplier=1.0
77  if ext=='pdb':
79  mps=IMP.core.get_leaves(mh)
80  dmap=IMP.em.SampledDensityMap(mps,
81  options.resolution,
82  options.apix)
83  dmap.calcRMS()
84  if options.multiply_by_mass:
85  mass_multiplier=sum(IMP.atom.Mass(p).get_mass() for p in mps)
86  elif ext=='mrc':
88  else:
89  print('ERROR: data_fn extension must be pdb, mrc, or npy')
90  sys.exit()
91 
92  if not options.use_cpp:
93  print('sampling points')
94  pts=IMP.isd.sample_points_from_density(dmap,options.num_samples,options.threshold)
95  density_ps=[]
96  print('fitting gmm')
97  #IMP.isd_emxl.gmm_tools.draw_points(pts,'test_points.bild')
98 
99  if options.force_weight_frac:
100  force_weight=1.0/ncenters
101  else:
102  force_weight=options.force_weight
103  if force_weight!=-1:
104  print('weight forced to',force_weight)
105  if not options.use_dirichlet:
106  gmm=IMP.isd.gmm_tools.fit_gmm_to_points(pts,ncenters,mdl,density_ps,
107  options.num_iter,options.covar_type,
108  force_radii=options.force_radii,
109  force_weight=options.force_weight,
110  mass_multiplier=mass_multiplier)
111  else:
112  gmm=IMP.isd.gmm_tools.fit_dirichlet_gmm_to_points(pts,ncenters,mdl,density_ps,
113  options.num_iter,options.covar_type,
114  mass_multiplier=mass_multiplier)
115 
116  else:
117  try:
118  import isd_emxl
119  except ImportError:
120  print("This option is experimental, only works if you have isd_emxl")
121  gmm_threshold=0.01
122  density_ps=IMP.isd_emxl.fit_gaussians_to_density(mdl,dmap,options.num_samples,
123  ncenters,options.num_iter,
124  options.threshold,
125  gmm_threshold)
126 
127  IMP.isd.gmm_tools.write_gmm_to_text(density_ps,out_txt_fn)
128  if options.out_map!='':
129  IMP.isd.gmm_tools.write_gmm_to_map(density_ps,options.out_map,
130  options.apix,
132 
133  if options.out_anchors_txt!='':
134  IMP.isd.gmm_tools.write_gmm_to_anchors(density_ps,options.out_anchors_txt,
135  options.out_anchors_cmm)
136 
137 
138 
139 if __name__=="__main__":
140  run()
Select non water and non hydrogen atoms.
Definition: pdb.h:197
Tools for handling Gaussian Mixture Models.
Definition: gmm_tools.py:1
Add mass to a particle.
Definition: Mass.h:23
double get_mass(ResidueType c)
Get the mass from the residue type.
def fit_dirichlet_gmm_to_points
fit a GMM to some points.
Definition: gmm_tools.py:282
GenericHierarchies get_leaves(Hierarchy mhd)
Get all the leaves of the bit of hierarchy.
void read_pdb(TextInput input, int model, Hierarchy h)
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:72
Class for sampling a density map from particles.
def fit_gmm_to_points
fit a GMM to some points.
Definition: gmm_tools.py:204
Basic utilities for handling cryo-electron microscopy 3D density maps.
DensityMap * read_map(std::string filename)
Read a density map from a file and return it.
def write_gmm_to_map
write density map from GMM.
Definition: gmm_tools.py:62
algebra::BoundingBoxD< 3 > get_bounding_box(const DensityMap *m)
Definition: DensityMap.h:464
def write_gmm_to_text
write a list of gaussians to text.
Definition: gmm_tools.py:43
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...