IMP logo
IMP Reference Guide  develop.b3a5ae88fa,2024/05/04
The Integrative Modeling Platform
create_gmm.py
1 from __future__ import print_function
2 import IMP
3 import IMP.em
4 import IMP.isd
6 from IMP import ArgumentParser
7 
8 import os
9 
10 
11 def parse_args():
12  desc = """
13  Create a GMM from either density file (.mrc), a pdb file (.pdb)
14  Will detect input format from extension.
15  Outputs as text and optionally as a density map
16  see help(-h)
17 """
18  p = ArgumentParser(description=desc)
19 
20  p.add_argument("-t", "--covar_type", dest="covar_type", default='full',
21  choices=['spherical', 'tied', 'diag', 'full'],
22  help="covariance type for the GMM")
23  p.add_argument("-m", "--out_map", dest="out_map", default='',
24  help="write out the gmm to an mrc file")
25  p.add_argument("-a", "--apix", dest="apix", default=1.0, type=float,
26  help="if you don't provide a map, set the voxel_size "
27  "here (for sampling)")
28  p.add_argument("-n", "--num_samples", dest="num_samples", default=1000000,
29  type=int, help="num samples to draw from the density map")
30  p.add_argument("-i", "--num_iter", dest="num_iter", default=100, type=int,
31  help="num iterations of GMM")
32  p.add_argument("-s", "--threshold", dest="threshold", default=0.0,
33  type=float, help="threshold for the map before sampling")
34 
35  p.add_argument("-f", "--force_radii", dest="force_radii", default=-1.0,
36  type=float,
37  help="force radii to be this value (spherical) "
38  "-1 means deactivated ")
39  p.add_argument("-w", "--force_weight", dest="force_weight", default=-1.0,
40  type=float,
41  help="force weight to be this value (spherical) "
42  "-1 means deactivated ")
43  p.add_argument("-e", "--force_weight_frac", dest="force_weight_frac",
44  action="store_true", default=False,
45  help="force weight to be 1.0/(num centers). "
46  "Takes precedence over -w")
47  p.add_argument("-d", "--use_dirichlet", dest="use_dirichlet",
48  default=False, action="store_true",
49  help="use dirichlet process for fit")
50 
51  p.add_argument("-k", "--multiply_by_mass", dest="multiply_by_mass",
52  default=False, action="store_true",
53  help="if set, will multiply all weights by the total mass "
54  "of the particles (PDB ONLY)")
55  p.add_argument("-x", "--chain", dest="chain", default=None,
56  help="If you passed a PDB file, read this chain")
57 
58  p.add_argument("-z", "--use_cpp", dest="use_cpp", default=False,
59  action="store_true",
60  help="EXPERIMENTAL. Uses the IMP GMM code. "
61  "Requires isd_emxl")
62  p.add_argument("data_file", help="data file name")
63  p.add_argument("n_centers", type=int, help="number of centers")
64  p.add_argument("out_file", help="output file name")
65  return p.parse_args()
66 
67 
68 def run(args):
69  data_fn = args.data_file
70  ncenters = args.n_centers
71  out_txt_fn = args.out_file
72  mdl = IMP.Model()
73 
74  if not os.path.isfile(data_fn):
75  raise Exception("The data file you entered: " + data_fn
76  + " does not exist!")
77 
78  # get points for fitting the GMM
79  ext = data_fn.split('.')[-1]
80  mass_multiplier = 1.0
81  if ext == 'pdb':
82  mh = IMP.atom.read_pdb(
84  if args.chain:
85  mps = IMP.atom.Selection(
86  mh, chain=args.chain).get_selected_particles()
87  else:
88  mps = IMP.core.get_leaves(mh)
89 
90  if args.multiply_by_mass:
91  mass_multiplier = sum(IMP.atom.Mass(p).get_mass() for p in mps)
92 
93  pts = [IMP.core.XYZ(p).get_coordinates() for p in mps]
94  bbox = None
95  elif ext == 'mrc':
96  dmap = IMP.em.read_map(data_fn, IMP.em.MRCReaderWriter())
97  bbox = IMP.em.get_bounding_box(dmap)
98  dmap.set_was_used(True)
99  print('sampling points')
100  pts = IMP.isd.sample_points_from_density(
101  dmap, args.num_samples, args.threshold)
102  else:
103  raise ValueError("data_fn extension must be pdb or mrc")
104 
105  # Do fitting to points
106  if not args.use_cpp:
107  density_ps = []
108  print('fitting gmm')
109 
110  if args.force_weight_frac:
111  force_weight = 1.0 / ncenters
112  else:
113  force_weight = args.force_weight
114  if force_weight != -1:
115  print('weight forced to', force_weight)
116  if not args.use_dirichlet:
118  pts, ncenters, mdl, density_ps, args.num_iter, args.covar_type,
119  force_radii=args.force_radii, force_weight=args.force_weight,
120  mass_multiplier=mass_multiplier)
121  else:
123  pts, ncenters, mdl, density_ps, args.num_iter, args.covar_type,
124  mass_multiplier=mass_multiplier)
125 
126  else:
127  try:
128  import isd_emxl # noqa: F401
129  except ImportError:
130  print("This option is experimental, only works if you "
131  "have isd_emxl")
132  gmm_threshold = 0.01
133  density_ps = IMP.isd_emxl.fit_gaussians_to_density(
134  mdl, dmap, args.num_samples, ncenters, args.num_iter,
135  args.threshold, gmm_threshold)
136 
137  # Write to files
138  comments = ['Created by create_gmm.py, IMP.isd version %s'
140  comments.append('data_fn: ' + IMP.get_relative_path(out_txt_fn, data_fn))
141  comments.append('ncenters: %d' % ncenters)
142  for key in ('covar_type', 'apix', 'num_samples', 'num_iter',
143  'threshold', 'force_radii', 'force_weight',
144  'force_weight_frac', 'use_dirichlet', 'multiply_by_mass',
145  'chain'):
146  comments.append('%s: %s' % (key, repr(getattr(args, key))))
147  IMP.isd.gmm_tools.write_gmm_to_text(density_ps, out_txt_fn, comments)
148  if args.out_map != '':
149  IMP.isd.gmm_tools.write_gmm_to_map(density_ps, args.out_map,
150  args.apix, bbox)
151 
152 
153 def main():
154  args = parse_args()
155  run(args)
156 
157 
158 if __name__ == "__main__":
159  main()
Select non water and non hydrogen atoms.
Definition: pdb.h:314
Tools for handling Gaussian Mixture Models.
Definition: gmm_tools.py:1
Add mass to a particle.
Definition: Mass.h:23
double get_mass(ResidueType c)
Get the mass from the residue type.
def fit_dirichlet_gmm_to_points
fit a GMM to some points.
Definition: gmm_tools.py:364
GenericHierarchies get_leaves(Hierarchy mhd)
Get all the leaves of the bit of hierarchy.
void read_pdb(TextInput input, int model, Hierarchy h)
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:86
def fit_gmm_to_points
fit a GMM to some points.
Definition: gmm_tools.py:244
std::string get_module_version()
Return the version of this module, as a string.
std::string get_relative_path(std::string base, std::string relative)
Return a path to a file relative to another file.
Basic utilities for handling cryo-electron microscopy 3D density maps.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
def write_gmm_to_map
write density map from GMM.
Definition: gmm_tools.py:119
algebra::BoundingBoxD< 3 > get_bounding_box(const DensityMap *m)
Definition: DensityMap.h:505
def write_gmm_to_text
write a list of gaussians to text.
Definition: gmm_tools.py:61
Select hierarchy particles identified by the biological name.
Definition: Selection.h:70
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...