IMP logo
IMP Reference Guide  2.19.0
The Integrative Modeling Platform
create_gmm.py
1 from __future__ import print_function
2 import IMP
3 import IMP.em
4 import IMP.isd
6 from IMP import ArgumentParser
7 
8 import sys,os
9 
10 def parse_args():
11  desc = """
12  Create a GMM from either density file (.mrc), a pdb file (.pdb)
13  Will detect input format from extension.
14  Outputs as text and optionally as a density map
15  see help(-h)
16 """
17  p = ArgumentParser(description=desc)
18 
19  p.add_argument("-t","--covar_type",dest="covar_type",default='full',
20  choices=['spherical', 'tied', 'diag', 'full'],
21  help="covariance type for the GMM")
22  p.add_argument("-m","--out_map",dest="out_map",default='',
23  help="write out the gmm to an mrc file")
24  p.add_argument("-a","--apix",dest="apix",default=1.0,type=float,
25  help="if you don't provide a map, set the voxel_size here (for sampling)")
26  p.add_argument("-n","--num_samples",dest="num_samples",default=1000000,type=int,
27  help="num samples to draw from the density map")
28  p.add_argument("-i","--num_iter",dest="num_iter",default=100,type=int,
29  help="num iterations of GMM")
30  p.add_argument("-s","--threshold",dest="threshold",default=0.0,type=float,
31  help="threshold for the map before sampling")
32 
33  p.add_argument("-f","--force_radii",dest="force_radii",default=-1.0,
34  type=float,
35  help="force radii to be this value (spherical) -1 means deactivated ")
36  p.add_argument("-w","--force_weight",dest="force_weight",default=-1.0,
37  type=float,
38  help="force weight to be this value (spherical) -1 means deactivated ")
39  p.add_argument("-e", "--force_weight_frac", dest="force_weight_frac",
40  action="store_true", default=False,
41  help="force weight to be 1.0/(num centers). "
42  "Takes precedence over -w")
43  p.add_argument("-d","--use_dirichlet",dest="use_dirichlet",default=False,
44  action="store_true",
45  help="use dirichlet process for fit")
46 
47  p.add_argument("-k","--multiply_by_mass",dest="multiply_by_mass",default=False,
48  action="store_true",
49  help="if set, will multiply all weights by the total mass of the particles (PDB ONLY)")
50  p.add_argument("-x","--chain",dest="chain",default=None,
51  help="If you passed a PDB file, read this chain")
52 
53  p.add_argument("-z","--use_cpp",dest="use_cpp",default=False,
54  action="store_true",
55  help="EXPERIMENTAL. Uses the IMP GMM code. Requires isd_emxl")
56  p.add_argument("data_file", help="data file name")
57  p.add_argument("n_centers", type=int, help="number of centers")
58  p.add_argument("out_file", help="output file name")
59  return p.parse_args()
60 
61 def run(args):
62  data_fn = args.data_file
63  ncenters = args.n_centers
64  out_txt_fn = args.out_file
65  mdl = IMP.Model()
66 
67  if not os.path.isfile(data_fn):
68  raise Exception("The data file you entered: "+data_fn+" does not exist!")
69 
70  ### get points for fitting the GMM
71  ext = data_fn.split('.')[-1]
72  mass_multiplier = 1.0
73  if ext=='pdb':
75  if args.chain:
76  mps = IMP.atom.Selection(mh,chain=args.chain).get_selected_particles()
77  else:
78  mps = IMP.core.get_leaves(mh)
79 
80  if args.multiply_by_mass:
81  mass_multiplier=sum(IMP.atom.Mass(p).get_mass() for p in mps)
82 
83  pts = [IMP.core.XYZ(p).get_coordinates() for p in mps]
84  bbox = None
85  elif ext=='mrc':
86  dmap = IMP.em.read_map(data_fn,IMP.em.MRCReaderWriter())
87  bbox = IMP.em.get_bounding_box(dmap)
88  dmap.set_was_used(True)
89  print('sampling points')
90  pts = IMP.isd.sample_points_from_density(dmap,args.num_samples,args.threshold)
91  else:
92  raise ValueError("data_fn extension must be pdb or mrc")
93 
94  ### Do fitting to points
95  if not args.use_cpp:
96  density_ps = []
97  print('fitting gmm')
98  #IMP.isd_emxl.gmm_tools.draw_points(pts,'test_points.bild')
99 
100  if args.force_weight_frac:
101  force_weight = 1.0/ncenters
102  else:
103  force_weight=args.force_weight
104  if force_weight != -1:
105  print('weight forced to',force_weight)
106  if not args.use_dirichlet:
107  gmm = IMP.isd.gmm_tools.fit_gmm_to_points(pts,ncenters,mdl,density_ps,
108  args.num_iter,args.covar_type,
109  force_radii=args.force_radii,
110  force_weight=args.force_weight,
111  mass_multiplier=mass_multiplier)
112  else:
113  gmm = IMP.isd.gmm_tools.fit_dirichlet_gmm_to_points(pts,ncenters,mdl,density_ps,
114  args.num_iter,args.covar_type,
115  mass_multiplier=mass_multiplier)
116 
117  else:
118  try:
119  import isd_emxl
120  except ImportError:
121  print("This option is experimental, only works if you have isd_emxl")
122  gmm_threshold = 0.01
123  density_ps = IMP.isd_emxl.fit_gaussians_to_density(mdl,dmap,args.num_samples,
124  ncenters,args.num_iter,
125  args.threshold,
126  gmm_threshold)
127 
128  ### Write to files
129  comments = ['Created by create_gmm.py, IMP.isd version %s'
131  comments.append('data_fn: ' + IMP.get_relative_path(out_txt_fn, data_fn))
132  comments.append('ncenters: %d' % ncenters)
133  for key in ('covar_type', 'apix', 'num_samples', 'num_iter',
134  'threshold', 'force_radii', 'force_weight',
135  'force_weight_frac', 'use_dirichlet', 'multiply_by_mass',
136  'chain'):
137  comments.append('%s: %s' % (key, repr(getattr(args, key))))
138  IMP.isd.gmm_tools.write_gmm_to_text(density_ps, out_txt_fn, comments)
139  if args.out_map != '':
140  IMP.isd.gmm_tools.write_gmm_to_map(density_ps, args.out_map,
141  args.apix, bbox)
142 
143 def main():
144  args = parse_args()
145  run(args)
146 
147 if __name__=="__main__":
148  main()
Select non water and non hydrogen atoms.
Definition: pdb.h:248
Tools for handling Gaussian Mixture Models.
Definition: gmm_tools.py:1
Add mass to a particle.
Definition: Mass.h:23
double get_mass(ResidueType c)
Get the mass from the residue type.
def fit_dirichlet_gmm_to_points
fit a GMM to some points.
Definition: gmm_tools.py:344
GenericHierarchies get_leaves(Hierarchy mhd)
Get all the leaves of the bit of hierarchy.
void read_pdb(TextInput input, int model, Hierarchy h)
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:86
def fit_gmm_to_points
fit a GMM to some points.
Definition: gmm_tools.py:228
std::string get_module_version()
Return the version of this module, as a string.
std::string get_relative_path(std::string base, std::string relative)
Return a path to a file relative to another file.
Basic utilities for handling cryo-electron microscopy 3D density maps.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
def write_gmm_to_map
write density map from GMM.
Definition: gmm_tools.py:110
algebra::BoundingBoxD< 3 > get_bounding_box(const DensityMap *m)
Definition: DensityMap.h:505
def write_gmm_to_text
write a list of gaussians to text.
Definition: gmm_tools.py:64
Select hierarchy particles identified by the biological name.
Definition: Selection.h:70
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...