IMP logo
IMP Reference Guide  develop.330bebda01,2025/01/21
The Integrative Modeling Platform
create_gmm.py
1 import IMP
2 import IMP.em
3 import IMP.isd
5 from IMP import ArgumentParser
6 
7 import os
8 
9 
10 def parse_args():
11  desc = """
12  Create a GMM from either density file (.mrc), a pdb file (.pdb)
13  Will detect input format from extension.
14  Outputs as text and optionally as a density map
15  see help(-h)
16 """
17  p = ArgumentParser(description=desc)
18 
19  p.add_argument("-t", "--covar_type", dest="covar_type", default='full',
20  choices=['spherical', 'tied', 'diag', 'full'],
21  help="covariance type for the GMM")
22  p.add_argument("-m", "--out_map", dest="out_map", default='',
23  help="write out the gmm to an mrc file")
24  p.add_argument("-a", "--apix", dest="apix", default=1.0, type=float,
25  help="if you don't provide a map, set the voxel_size "
26  "here (for sampling)")
27  p.add_argument("-n", "--num_samples", dest="num_samples", default=1000000,
28  type=int, help="num samples to draw from the density map")
29  p.add_argument("-i", "--num_iter", dest="num_iter", default=100, type=int,
30  help="num iterations of GMM")
31  p.add_argument("-s", "--threshold", dest="threshold", default=0.0,
32  type=float, help="threshold for the map before sampling")
33 
34  p.add_argument("-f", "--force_radii", dest="force_radii", default=-1.0,
35  type=float,
36  help="force radii to be this value (spherical) "
37  "-1 means deactivated ")
38  p.add_argument("-w", "--force_weight", dest="force_weight", default=-1.0,
39  type=float,
40  help="force weight to be this value (spherical) "
41  "-1 means deactivated ")
42  p.add_argument("-e", "--force_weight_frac", dest="force_weight_frac",
43  action="store_true", default=False,
44  help="force weight to be 1.0/(num centers). "
45  "Takes precedence over -w")
46  p.add_argument("-d", "--use_dirichlet", dest="use_dirichlet",
47  default=False, action="store_true",
48  help="use dirichlet process for fit")
49 
50  p.add_argument("-k", "--multiply_by_mass", dest="multiply_by_mass",
51  default=False, action="store_true",
52  help="if set, will multiply all weights by the total mass "
53  "of the particles (PDB ONLY)")
54  p.add_argument("-x", "--chain", dest="chain", default=None,
55  help="If you passed a PDB file, read this chain")
56 
57  p.add_argument("-z", "--use_cpp", dest="use_cpp", default=False,
58  action="store_true",
59  help="EXPERIMENTAL. Uses the IMP GMM code. "
60  "Requires isd_emxl")
61  p.add_argument("data_file", help="data file name")
62  p.add_argument("n_centers", type=int, help="number of centers")
63  p.add_argument("out_file", help="output file name")
64  return p.parse_args()
65 
66 
67 def run(args):
68  data_fn = args.data_file
69  ncenters = args.n_centers
70  out_txt_fn = args.out_file
71  mdl = IMP.Model()
72 
73  if not os.path.isfile(data_fn):
74  raise Exception("The data file you entered: " + data_fn
75  + " does not exist!")
76 
77  # get points for fitting the GMM
78  ext = data_fn.split('.')[-1]
79  mass_multiplier = 1.0
80  if ext == 'pdb':
81  mh = IMP.atom.read_pdb(
83  if args.chain:
84  mps = IMP.atom.Selection(
85  mh, chain=args.chain).get_selected_particles()
86  else:
87  mps = IMP.core.get_leaves(mh)
88 
89  if args.multiply_by_mass:
90  mass_multiplier = sum(IMP.atom.Mass(p).get_mass() for p in mps)
91 
92  pts = [IMP.core.XYZ(p).get_coordinates() for p in mps]
93  bbox = None
94  elif ext == 'mrc':
95  dmap = IMP.em.read_map(data_fn, IMP.em.MRCReaderWriter())
96  bbox = IMP.em.get_bounding_box(dmap)
97  dmap.set_was_used(True)
98  print('sampling points')
99  pts = IMP.isd.sample_points_from_density(
100  dmap, args.num_samples, args.threshold)
101  else:
102  raise ValueError("data_fn extension must be pdb or mrc")
103 
104  # Do fitting to points
105  if not args.use_cpp:
106  density_ps = []
107  print('fitting gmm')
108 
109  if args.force_weight_frac:
110  force_weight = 1.0 / ncenters
111  else:
112  force_weight = args.force_weight
113  if force_weight != -1:
114  print('weight forced to', force_weight)
115  if not args.use_dirichlet:
117  pts, ncenters, mdl, density_ps, args.num_iter, args.covar_type,
118  force_radii=args.force_radii, force_weight=args.force_weight,
119  mass_multiplier=mass_multiplier)
120  else:
122  pts, ncenters, mdl, density_ps, args.num_iter, args.covar_type,
123  mass_multiplier=mass_multiplier)
124 
125  else:
126  try:
127  import isd_emxl # noqa: F401
128  except ImportError:
129  print("This option is experimental, only works if you "
130  "have isd_emxl")
131  gmm_threshold = 0.01
132  density_ps = IMP.isd_emxl.fit_gaussians_to_density(
133  mdl, dmap, args.num_samples, ncenters, args.num_iter,
134  args.threshold, gmm_threshold)
135 
136  # Write to files
137  comments = ['Created by create_gmm.py, IMP.isd version %s'
139  comments.append('data_fn: ' + IMP.get_relative_path(out_txt_fn, data_fn))
140  comments.append('ncenters: %d' % ncenters)
141  for key in ('covar_type', 'apix', 'num_samples', 'num_iter',
142  'threshold', 'force_radii', 'force_weight',
143  'force_weight_frac', 'use_dirichlet', 'multiply_by_mass',
144  'chain'):
145  comments.append('%s: %s' % (key, repr(getattr(args, key))))
146  IMP.isd.gmm_tools.write_gmm_to_text(density_ps, out_txt_fn, comments)
147  if args.out_map != '':
148  IMP.isd.gmm_tools.write_gmm_to_map(density_ps, args.out_map,
149  args.apix, bbox)
150 
151 
152 def main():
153  args = parse_args()
154  run(args)
155 
156 
157 if __name__ == "__main__":
158  main()
Select non water and non hydrogen atoms.
Definition: pdb.h:314
Tools for handling Gaussian Mixture Models.
Definition: gmm_tools.py:1
Add mass to a particle.
Definition: Mass.h:23
double get_mass(ResidueType c)
Get the mass from the residue type.
def fit_dirichlet_gmm_to_points
fit a GMM to some points.
Definition: gmm_tools.py:363
GenericHierarchies get_leaves(Hierarchy mhd)
Get all the leaves of the bit of hierarchy.
void read_pdb(TextInput input, int model, Hierarchy h)
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:86
def fit_gmm_to_points
fit a GMM to some points.
Definition: gmm_tools.py:243
std::string get_module_version()
Return the version of this module, as a string.
std::string get_relative_path(std::string base, std::string relative)
Return a path to a file relative to another file.
Basic utilities for handling cryo-electron microscopy 3D density maps.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
def write_gmm_to_map
write density map from GMM.
Definition: gmm_tools.py:118
algebra::BoundingBoxD< 3 > get_bounding_box(const DensityMap *m)
Definition: DensityMap.h:509
def write_gmm_to_text
write a list of gaussians to text.
Definition: gmm_tools.py:60
Select hierarchy particles identified by the biological name.
Definition: Selection.h:70
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...