IMP logo
IMP Reference Guide  2.9.0
The Integrative Modeling Platform
create_gmm.py
1 from __future__ import print_function
2 import IMP
3 import IMP.em
4 import IMP.isd
6 import numpy as np
7 from optparse import OptionParser
8 import sys,os
9 def parse_args():
10  usage = """%prog [options] <data_fn> <n_components> <out_txt_fn>
11  Create a GMM from either density file (.mrc), a pdb file (.pdb)
12  Will detect input format from extension.
13  Outputs as text and optionally as a density map
14  see help(-h)
15 """
16  parser = OptionParser(usage)
17 
18  parser.add_option("-t","--covar_type",dest="covar_type",default='full',
19  choices=['spherical', 'tied', 'diag', 'full'],
20  help="covariance type for the GMM")
21  parser.add_option("-m","--out_map",dest="out_map",default='',
22  help="write out the gmm to an mrc file")
23  parser.add_option("-a","--apix",dest="apix",default=1.0,type='float',
24  help="if you don't provide a map, set the voxel_size here (for sampling)")
25  parser.add_option("-n","--num_samples",dest="num_samples",default=1000000,type='int',
26  help="num samples to draw from the density map")
27  parser.add_option("-i","--num_iter",dest="num_iter",default=100,type='int',
28  help="num iterations of GMM")
29  parser.add_option("-s","--threshold",dest="threshold",default=0.0,type='float',
30  help="threshold for the map before sampling")
31 
32  parser.add_option("-f","--force_radii",dest="force_radii",default=-1.0,
33  type='float',
34  help="force radii to be this value (spherical) -1 means deactivated ")
35  parser.add_option("-w","--force_weight",dest="force_weight",default=-1.0,
36  type='float',
37  help="force weight to be this value (spherical) -1 means deactivated ")
38  parser.add_option("-e","--force_weight_frac",dest="force_weight_frac",action="store_true",default=False,
39  help="force weight to be 1.0/(num anchors). takes precedence over -w ")
40  parser.add_option("-o","--out_anchors_txt",dest="out_anchors_txt",default='',
41  help="write final GMM as anchor points (txt)")
42  parser.add_option("-q","--out_anchors_cmm",dest="out_anchors_cmm",default='',
43  help="write final GMM as anchor points (cmm)")
44  parser.add_option("-d","--use_dirichlet",dest="use_dirichlet",default=False,
45  action="store_true",
46  help="use dirichlet process for fit")
47 
48  parser.add_option("-k","--multiply_by_mass",dest="multiply_by_mass",default=False,
49  action="store_true",
50  help="if set, will multiply all weights by the total mass of the particles (PDB ONLY)")
51  parser.add_option("-x","--chain",dest="chain",default=None,
52  help="If you passed a PDB file, read this chain")
53 
54  parser.add_option("-z","--use_cpp",dest="use_cpp",default=False,
55  action="store_true",
56  help="EXPERIMENTAL. Uses the IMP GMM code. Requires isd_emxl")
57 
58 
59  (options, args) = parser.parse_args()
60  if len(args) != 3:
61  parser.error("incorrect number of arguments")
62  return options,args
63 
64 def run():
65  options,args = parse_args()
66  data_fn,ncenters,out_txt_fn = args
67  ncenters = int(ncenters)
68  mdl = IMP.Model()
69 
70  if not os.path.isfile(data_fn):
71  raise Exception("The data file you entered: "+data_fn+" does not exist!")
72 
73  ### get points for fitting the GMM
74  ext = data_fn.split('.')[-1]
75  mass_multiplier = 1.0
76  if ext=='pdb':
78  if options.chain:
79  mps = IMP.atom.Selection(mh,chain=options.chain).get_selected_particles()
80  else:
81  mps = IMP.core.get_leaves(mh)
82 
83  if options.multiply_by_mass:
84  mass_multiplier=sum(IMP.atom.Mass(p).get_mass() for p in mps)
85 
86  pts = [IMP.core.XYZ(p).get_coordinates() for p in mps]
87  bbox = None
88  elif ext=='mrc':
89  dmap = IMP.em.read_map(data_fn,IMP.em.MRCReaderWriter())
90  bbox = IMP.em.get_bounding_box(dmap)
91  print('sampling points')
92  pts = IMP.isd.sample_points_from_density(dmap,options.num_samples,options.threshold)
93  else:
94  print('ERROR: data_fn extension must be pdb, mrc, or npy')
95  sys.exit()
96 
97  ### Do fitting to points
98  if not options.use_cpp:
99  density_ps = []
100  print('fitting gmm')
101  #IMP.isd_emxl.gmm_tools.draw_points(pts,'test_points.bild')
102 
103  if options.force_weight_frac:
104  force_weight = 1.0/ncenters
105  else:
106  force_weight=options.force_weight
107  if force_weight != -1:
108  print('weight forced to',force_weight)
109  if not options.use_dirichlet:
110  gmm = IMP.isd.gmm_tools.fit_gmm_to_points(pts,ncenters,mdl,density_ps,
111  options.num_iter,options.covar_type,
112  force_radii=options.force_radii,
113  force_weight=options.force_weight,
114  mass_multiplier=mass_multiplier)
115  else:
116  gmm = IMP.isd.gmm_tools.fit_dirichlet_gmm_to_points(pts,ncenters,mdl,density_ps,
117  options.num_iter,options.covar_type,
118  mass_multiplier=mass_multiplier)
119 
120  else:
121  try:
122  import isd_emxl
123  except ImportError:
124  print("This option is experimental, only works if you have isd_emxl")
125  gmm_threshold = 0.01
126  density_ps = IMP.isd_emxl.fit_gaussians_to_density(mdl,dmap,options.num_samples,
127  ncenters,options.num_iter,
128  options.threshold,
129  gmm_threshold)
130 
131  ### Write to files
132  comments = ['Created by create_gmm.py, IMP.isd version %s'
134  comments.append('data_fn: ' + IMP.get_relative_path(out_txt_fn, data_fn))
135  comments.append('ncenters: %d' % ncenters)
136  for key in ('covar_type', 'apix', 'num_samples', 'num_iter',
137  'threshold', 'force_radii', 'force_weight',
138  'force_weight_frac', 'use_dirichlet', 'multiply_by_mass',
139  'chain'):
140  comments.append('%s: %s' % (key, repr(getattr(options, key))))
141  IMP.isd.gmm_tools.write_gmm_to_text(density_ps, out_txt_fn, comments)
142  if options.out_map!='':
143  IMP.isd.gmm_tools.write_gmm_to_map(density_ps,options.out_map,
144  options.apix,
145  bbox)
146 
147  if options.out_anchors_txt!='':
148  IMP.isd.gmm_tools.write_gmm_to_anchors(density_ps,options.out_anchors_txt,
149  options.out_anchors_cmm)
150 
151 
152 
153 if __name__=="__main__":
154  run()
Select non water and non hydrogen atoms.
Definition: pdb.h:243
Tools for handling Gaussian Mixture Models.
Definition: gmm_tools.py:1
Add mass to a particle.
Definition: Mass.h:23
double get_mass(ResidueType c)
Get the mass from the residue type.
def fit_dirichlet_gmm_to_points
fit a GMM to some points.
Definition: gmm_tools.py:319
GenericHierarchies get_leaves(Hierarchy mhd)
Get all the leaves of the bit of hierarchy.
void read_pdb(TextInput input, int model, Hierarchy h)
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:72
def fit_gmm_to_points
fit a GMM to some points.
Definition: gmm_tools.py:231
std::string get_module_version()
std::string get_relative_path(std::string base, std::string relative)
Return a path to a file relative to another file.
Basic utilities for handling cryo-electron microscopy 3D density maps.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
def write_gmm_to_map
write density map from GMM.
Definition: gmm_tools.py:113
algebra::BoundingBoxD< 3 > get_bounding_box(const DensityMap *m)
Definition: DensityMap.h:464
def write_gmm_to_text
write a list of gaussians to text.
Definition: gmm_tools.py:62
Select hierarchy particles identified by the biological name.
Definition: Selection.h:66
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...