IMP logo
IMP Reference Guide  2.13.0
The Integrative Modeling Platform
system_tools.py
1 from __future__ import print_function, division
2 import IMP
3 import IMP.atom
5 import IMP.pmi
6 import IMP.pmi.tools
7 from collections import defaultdict
8 from math import pi
9 import os
10 import warnings
11 
12 def resnums2str(res):
13  """Take iterable of TempResidues and return compatified string"""
14  if len(res)==0:
15  return ''
16  idxs = [r.get_index() for r in res]
17  idxs.sort()
18  all_ranges=[]
19  cur_range=[idxs[0],idxs[0]]
20  for idx in idxs[1:]:
21  if idx!=cur_range[1]+1:
22  all_ranges.append(cur_range)
23  cur_range=[idx,idx]
24  cur_range[1]=idx
25  all_ranges.append(cur_range)
26  ret = ''
27  for nr,r in enumerate(all_ranges):
28  ret+='%i-%i'%(r[0],r[1])
29  if nr<len(all_ranges)-1:
30  ret+=', '
31  return ret
32 
33 
34 def _select_ca_or_p(hiers, **kwargs):
35  """Select all CA (amino acids) or P (nucleic acids) as appropriate"""
36  sel_p = IMP.atom.Selection(hiers, atom_type=IMP.atom.AT_P, **kwargs)
37  ps = sel_p.get_selected_particles()
38  if ps:
39  # detected nucleotides. Selecting phosphorous instead of CA
40  return ps
41  else:
42  sel = IMP.atom.Selection(hiers, atom_type=IMP.atom.AT_CA, **kwargs)
43  return sel.get_selected_particles()
44 
45 
46 def get_structure(model,pdb_fn,chain_id,res_range=None,offset=0,model_num=None,ca_only=False):
47  """read a structure from a PDB file and return a list of residues
48  @param model The IMP model
49  @param pdb_fn The file to read (in traditional PDB or mmCIF format)
50  @param chain_id Chain ID to read
51  @param res_range Add only a specific set of residues.
52  res_range[0] is the starting and res_range[1] is the ending residue index
53  The ending residue can be "END", that will take everything to the end of the sequence.
54  None gets you all.
55  @param offset Apply an offset to the residue indexes of the PDB file
56  @param model_num Read multi-model PDB and return that model
57  @param ca_only Read only CA atoms (by default, all non-waters are read)
58  """
60  # Read file in mmCIF format if requested
61  read_file = IMP.atom.read_pdb
62  read_multi_file = IMP.atom.read_multimodel_pdb
63  if pdb_fn.endswith('.cif'):
64  read_file = IMP.atom.read_mmcif
65  read_multi_file = IMP.atom.read_multimodel_mmcif
66  if ca_only:
68  if model_num is None:
69  mh = read_file(pdb_fn,model,
71 
72  else:
73  mhs = read_multi_file(pdb_fn,model,sel)
74  if model_num>=len(mhs):
75  raise Exception("you requested model num "+str(model_num)+\
76  " but the PDB file only contains "+str(len(mhs))+" models")
77  mh = IMP.atom.Selection(mhs[model_num],chain=chain_id,with_representation=True)
78 
79  if res_range==[] or res_range is None:
80  ps = _select_ca_or_p(mh, chain=chain_id)
81  else:
82  start = res_range[0]
83  end = res_range[1]
84  if end=="END":
85  end = IMP.atom.Residue(mh.get_children()[0].get_children()[-1]).get_index()
86  ps = _select_ca_or_p(mh, chain=chain_id,
87  residue_indexes=range(start,end+1))
88  ret = []
89 
90  for p in ps:
91  res = IMP.atom.Residue(IMP.atom.Atom(p).get_parent())
92  res.set_index(res.get_index() + offset)
93  ret.append(res)
94  if len(ret) == 0:
95  warnings.warn(
96  "no residues selected from %s in range %s" % (pdb_fn, res_range),
98  return ret
99 
100 def build_bead(model,residues,input_coord=None):
101  """Generates a single bead"""
102 
103  ds_frag = (residues[0].get_index(), residues[-1].get_index())
104  prt = IMP.Particle(model)
106  ptem = IMP.core.XYZR(prt)
107  mass = IMP.atom.get_mass_from_number_of_residues(len(residues))
108 
109  if ds_frag[0] == ds_frag[-1]:
110  rt = residues[0].get_residue_type()
111  h = IMP.atom.Residue.setup_particle(prt, rt, ds_frag[0])
112  h.set_name('%i_bead' % (ds_frag[0]))
113  prt.set_name('%i_bead' % (ds_frag[0]))
114  try:
116  except IMP.ValueException:
118  IMP.atom.ResidueType("ALA"))
120  ptem.set_radius(radius)
121  else:
123  h.set_name('%i-%i_bead' % (ds_frag[0], ds_frag[-1]))
124  prt.set_name('%i-%i_bead' % (ds_frag[0], ds_frag[-1]))
125  h.set_residue_indexes(range(ds_frag[0], ds_frag[-1] + 1))
126  volume = IMP.atom.get_volume_from_mass(mass)
127  radius = 0.8 * (3.0 / 4.0 / pi * volume) ** (1.0 / 3.0)
128  ptem.set_radius(radius)
129 
131  try:
132  if tuple(input_coord) is not None:
133  ptem.set_coordinates(input_coord)
134  except TypeError:
135  pass
136  return h
137 
138 def build_necklace(model,residues, resolution, input_coord=None):
139  """Generates a string of beads with given length"""
140  out_hiers = []
141  for chunk in list(IMP.pmi.tools.list_chunks_iterator(residues, resolution)):
142  out_hiers.append(build_bead(model,chunk, input_coord=input_coord))
143  return out_hiers
144 
145 def build_ca_centers(model,residues):
146  """Create a bead on the CA position with coarsened size and mass"""
147  out_hiers = []
148  for tempres in residues:
149  residue = tempres.get_hierarchy()
150  rp1 = IMP.Particle(model)
151  rp1.set_name("Residue_%i"%residue.get_index())
152  rt = residue.get_residue_type()
153  this_res = IMP.atom.Residue.setup_particle(rp1,residue)
154  try:
156  except IMP.ValueException:
158  IMP.atom.ResidueType("ALA"))
159  try:
160  mass = IMP.atom.get_mass(rt)
161  except:
163  calpha = IMP.atom.Selection(residue,atom_type=IMP.atom.AT_CA). \
164  get_selected_particles()
165  cp=IMP.atom.Selection(residue,atom_type=IMP.atom.AT_P). \
166  get_selected_particles()
167 
168  if len(calpha)==1:
169  central_atom=calpha[0]
170  elif len(cp)==1:
171  central_atom=cp[0]
172  else:
173  raise("build_ca_centers: weird selection (no Ca, no nucleotide P or ambiguous selection found)")
175  shape = IMP.algebra.Sphere3D(IMP.core.XYZ(central_atom).get_coordinates(),radius)
178  out_hiers.append(this_res)
179  return out_hiers
180 
181 def setup_bead_as_gaussian(mh):
182  """Setup bead as spherical gaussian, using radius as variance"""
183  p = mh.get_particle()
184  center = IMP.core.XYZ(p).get_coordinates()
185  rad = IMP.core.XYZR(p).get_radius()
186  mass = IMP.atom.Mass(p).get_mass()
190 
191 
192 def show_representation(node):
193  print(node)
195  repr = IMP.atom.Representation(node)
196  resolutions = repr.get_resolutions()
197  for r in resolutions:
198  print('---- resolution %i ----' %r)
199  IMP.atom.show_molecular_hierarchy(repr.get_representation(r))
200  return True
201  else:
202  return False
203 
204 def _get_color_for_representation(rep):
205  """Return an IMP.display.Color object (or None) for the given
206  Representation."""
207  if rep.color is not None:
208  if isinstance(rep.color, float):
209  return IMP.display.get_rgb_color(rep.color)
210  elif isinstance(rep.color, str):
211  return IMP.display.Color(*IMP.pmi.tools.color2rgb(rep.color))
212  elif hasattr(rep.color,'__iter__') and len(rep.color)==3:
213  return IMP.display.Color(*rep.color)
214  elif isinstance(rep.color, IMP.display.Color):
215  return rep.color
216  else:
217  raise TypeError("Color must be Chimera color name, a hex "
218  "string, a float or (r,g,b) tuple")
219 
220 
221 def _add_fragment_provenance(fragment, first_residue, rephandler):
222  """Track the original source of a fragment's structure.
223  If the residues in the given fragment were extracted from a PDB
224  file, add suitable provenance information to the Model (the name
225  of that file, chain ID, and residue index offset)."""
226  pdb_element = rephandler.pdb_for_residue.get(first_residue.get_index())
227  if pdb_element:
228  m = fragment.get_model()
229  p = IMP.Particle(m, "input structure")
231  p, pdb_element.filename, pdb_element.chain_id, pdb_element.offset)
232  IMP.core.add_provenance(m, fragment, sp)
233 
234 
235 def build_representation(parent, rep, coord_finder, rephandler):
236  """Create requested representation.
237  For beads, identifies continuous segments and sets up as Representation.
238  If any volume-based representations (e.g.,densities) are requested,
239  will instead create a single Representation node.
240  All reps are added as children of the passed parent.
241  @param parent The Molecule to which we'll add add representations
242  @param rep What to build. An instance of pmi::topology::_Representation
243  @param coord_finder A _FindCloseStructure object to help localize beads
244  """
245  built_reps = []
246  atomic_res = 0
247  ca_res = 1
248  model = parent.hier.get_model()
249  color = _get_color_for_representation(rep)
250 
251  # first get the primary representation (currently, the smallest bead size)
252  # eventually we won't require beads to be present at all
253  primary_resolution = min(rep.bead_resolutions)
254 
255  # if collective densities, will return single node with everything
256  # below we sample or read the GMMs and add them as representation
257  single_node = False # flag indicating grouping nonlinear segments with one GMM
258  if rep.density_residues_per_component:
259  single_node = True
260  num_components = len(rep.residues)//rep.density_residues_per_component+1
261  rep_dict = defaultdict(list)
262  segp = IMP.Particle(model)
263  root_representation = IMP.atom.Representation.setup_particle(segp,
264  primary_resolution)
265  built_reps.append(root_representation)
266  res_nums = [r.get_index() for r in rep.residues]
267  IMP.atom.Fragment.setup_particle(segp,res_nums)
268  density_frag = IMP.atom.Fragment.setup_particle(IMP.Particle(model),res_nums)
269  density_frag.get_particle().set_name("Densities %i"%rep.density_residues_per_component)
270  density_ps = []
271 
272  if os.path.exists(rep.density_prefix+'.txt') and not rep.density_force_compute:
273  IMP.isd.gmm_tools.decorate_gmm_from_text(rep.density_prefix+'.txt',
274  density_ps,
275  model)
276  if len(density_ps)!=num_components or not os.path.exists(rep.density_prefix+'.txt') or rep.density_force_compute:
277  fit_coords = []
278  total_mass = 0.0
279  for r in rep.residues:
280  for p in IMP.core.get_leaves(r.hier):
281  fit_coords.append(IMP.core.XYZ(p).get_coordinates())
282  total_mass += IMP.atom.Mass(p).get_mass()
283 
284  # fit GMM
285  density_ps = []
287  num_components,
288  model,
289  density_ps,
290  min_covar=4.0,
291  mass_multiplier=total_mass)
292 
293  IMP.isd.gmm_tools.write_gmm_to_text(density_ps,rep.density_prefix+'.txt')
294  if rep.density_voxel_size>0.0:
295  IMP.isd.gmm_tools.write_gmm_to_map(density_ps,rep.density_prefix+'.mrc',
296  rep.density_voxel_size,fast=True)
297 
298  for n, d in enumerate(density_ps):
299  d.set_name('Density #%d' % n)
300  density_frag.add_child(d)
301  root_representation.add_representation(density_frag,
302  IMP.atom.DENSITIES,
303  rep.density_residues_per_component)
304 
305  # get continuous segments from residues
306  segments = []
307  rsort = sorted(list(rep.residues),key=lambda r:r.get_index())
308  prev_idx = rsort[0].get_index()-1
309  prev_structure = rsort[0].get_has_structure()
310  cur_seg = []
311  force_break = False
312  for nr,r in enumerate(rsort):
313  if r.get_index()!=prev_idx+1 or r.get_has_structure()!=prev_structure or force_break:
314  segments.append(cur_seg)
315  cur_seg = []
316  force_break = False
317  cur_seg.append(r)
318  prev_idx = r.get_index()
319  prev_structure = r.get_has_structure()
320  if r.get_index()-1 in rep.bead_extra_breaks:
321  force_break = True
322  if cur_seg!=[]:
323  segments.append(cur_seg)
324 
325  # for each segment, merge into beads
326  name_all = 'frags:'
327  name_count = 0
328  for frag_res in segments:
329  res_nums = [r.get_index() for r in frag_res]
330  rrange = "%i-%i"%(res_nums[0],res_nums[-1])
331  name = "Frag_"+rrange
332  if name_count<3:
333  name_all +=rrange+','
334  elif name_count==3:
335  name_all +='...'
336  name_count+=1
337  segp = IMP.Particle(model,name)
338  this_segment = IMP.atom.Fragment.setup_particle(segp,res_nums)
339  if not single_node:
340  this_representation = IMP.atom.Representation.setup_particle(segp,primary_resolution)
341  built_reps.append(this_representation)
342  for resolution in rep.bead_resolutions:
343  fp = IMP.Particle(model)
344  this_resolution = IMP.atom.Fragment.setup_particle(fp,res_nums)
345  this_resolution.set_name("%s: Res %i"%(name,resolution))
346  if frag_res[0].get_has_structure():
347  _add_fragment_provenance(this_resolution, frag_res[0],
348  rephandler)
349  # if structured, merge particles as needed
350  if resolution==atomic_res:
351  for residue in frag_res:
352  this_resolution.add_child(residue.get_hierarchy())
353  elif resolution==ca_res and rep.bead_ca_centers:
354  beads = build_ca_centers(model,frag_res)
355  for bead in beads:
356  this_resolution.add_child(bead)
357  else:
358  tempc = IMP.atom.Chain.setup_particle(IMP.Particle(model),"X")
359  for residue in frag_res:
360  tempc.add_child(IMP.atom.create_clone(residue.hier))
361  beads = IMP.atom.create_simplified_along_backbone(tempc,resolution)
362  for bead in beads.get_children():
363  this_resolution.add_child(bead)
364  del tempc
365  del beads
366  else:
367  # if unstructured, create necklace
368  input_coord = coord_finder.find_nearest_coord(min(r.get_index() for r in frag_res))
369  if input_coord is None:
370  input_coord = rep.bead_default_coord
371  beads = build_necklace(model,
372  frag_res,
373  resolution,
374  input_coord)
375  for bead in beads:
376  this_resolution.add_child(bead)
377 
378  # if requested, color all resolutions the same
379  if color:
380  for lv in IMP.core.get_leaves(this_resolution):
382 
383  # finally decide where to put this resolution
384  # if volumetric, collect resolutions from different segments together
385  if single_node:
386  rep_dict[resolution]+=this_resolution.get_children()
387  else:
388  if resolution==primary_resolution:
389  this_representation.add_child(this_resolution)
390  else:
391  this_representation.add_representation(this_resolution,
392  IMP.atom.BALLS,
393  resolution)
394  # if individual beads to be setup as Gaussians:
395  if rep.setup_particles_as_densities:
396  for p in IMP.core.get_leaves(this_resolution):
397  setup_bead_as_gaussian(p)
398  this_resolution.set_name(this_resolution.get_name()+' Densities %i'%resolution)
399  this_representation.add_representation(this_resolution,
400  IMP.atom.DENSITIES,
401  resolution)
402 
403  if single_node:
404  root_representation.set_name(name_all.strip(',')+": Base")
405  d = root_representation.get_representations(IMP.atom.DENSITIES)
406  d[0].set_name('%s: '%name_all + d[0].get_name())
407  for resolution in rep.bead_resolutions:
408  this_resolution = IMP.atom.Fragment.setup_particle(
409  IMP.Particle(model),
410  [r.get_index() for r in rep.residues])
411  this_resolution.set_name("%s: Res %i"%(name_all,resolution))
412  for hier in rep_dict[resolution]:
413  this_resolution.add_child(hier)
414  if resolution==primary_resolution:
415  root_representation.add_child(this_resolution)
416  else:
417  root_representation.add_representation(this_resolution,
418  IMP.atom.BALLS,
419  resolution)
420  return built_reps
def list_chunks_iterator
Yield successive length-sized chunks from a list.
Definition: tools.py:645
Tools for handling Gaussian Mixture Models.
Definition: gmm_tools.py:1
Add mass to a particle.
Definition: Mass.h:23
double get_volume_from_residue_type(ResidueType rt)
Return an estimate for the volume of a given residue.
Simple 3D transformation class.
Represent an RGB color.
Definition: Color.h:24
static Gaussian setup_particle(Model *m, ParticleIndex pi)
Definition: core/Gaussian.h:48
void show_molecular_hierarchy(Hierarchy h)
Print out the molecular hierarchy.
static Fragment setup_particle(Model *m, ParticleIndex pi)
Definition: Fragment.h:63
double get_mass(const Selection &s)
Get the total mass of a hierarchy, in Daltons.
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
Select atoms which are selected by both selectors.
Definition: pdb.h:348
double get_mass(ResidueType c)
Get the mass from the residue type.
static StructureProvenance setup_particle(Model *m, ParticleIndex pi, std::string filename, std::string chain_id, int residue_offset)
Definition: provenance.h:157
Color get_rgb_color(double f)
Return the color for f from the RGB color map.
double get_mass_from_number_of_residues(unsigned int num_aa)
Estimate the mass of a protein from the number of amino acids.
Miscellaneous utilities.
Definition: tools.py:1
double get_ball_radius_from_volume_3d(double volume)
Return the radius of a sphere with a given volume.
Definition: Sphere3D.h:35
static Residue setup_particle(Model *m, ParticleIndex pi, ResidueType t, int index, int insertion_code)
Definition: Residue.h:158
static Representation setup_particle(Model *m, ParticleIndex pi)
GenericHierarchies get_leaves(Hierarchy mhd)
Get all the leaves of the bit of hierarchy.
A reference frame in 3D.
def color2rgb
Given a Chimera color name or hex color value, return RGB.
Definition: tools.py:1570
Warning related to handling of structures.
A Gaussian distribution in 3D.
Definition: Gaussian3D.h:24
def fit_gmm_to_points
fit a GMM to some points.
Definition: gmm_tools.py:233
A decorator for a representation.
double get_volume_from_mass(double m, ProteinDensityReference ref=ALBER)
Estimate the volume of a protein from its mass.
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
A decorator for a particle representing an atom.
Definition: atom/Atom.h:234
static Mass setup_particle(Model *m, ParticleIndex pi, Float mass)
Definition: Mass.h:48
The type for a residue.
PDBSelector * get_default_pdb_selector()
Definition: pdb.h:473
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
static Colored setup_particle(Model *m, ParticleIndex pi, Color color)
Definition: Colored.h:62
def write_gmm_to_map
write density map from GMM.
Definition: gmm_tools.py:115
A decorator for a residue.
Definition: Residue.h:135
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Hierarchy create_simplified_along_backbone(Chain input, const IntRanges &residue_segments, bool keep_detailed=false)
Rotation3D get_identity_rotation_3d()
Return a rotation that does not do anything.
Definition: Rotation3D.h:356
Class to handle individual particles of a Model object.
Definition: Particle.h:41
Select all CA ATOM records.
Definition: pdb.h:77
Python classes to represent, score, sample and analyze models.
def write_gmm_to_text
write a list of gaussians to text.
Definition: gmm_tools.py:64
Functionality for loading, creating, manipulating and scoring atomic structures.
void add_provenance(Model *m, ParticleIndex pi, Provenance p)
Add provenance to part of the model.
static Chain setup_particle(Model *m, ParticleIndex pi, std::string id)
Definition: Chain.h:81
An exception for an invalid value being passed to IMP.
Definition: exception.h:137
Select hierarchy particles identified by the biological name.
Definition: Selection.h:66
Select all ATOM and HETATM records with the given chain ids.
Definition: pdb.h:189
def decorate_gmm_from_text
read the output from write_gmm_to_text, decorate as Gaussian and Mass
Definition: gmm_tools.py:23
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27