IMP logo
IMP Reference Guide  develop.298718d020,2025/02/22
The Integrative Modeling Platform
system_tools.py
1 import IMP
2 import IMP.atom
4 import IMP.pmi
5 import IMP.pmi.tools
6 from collections import defaultdict
7 from math import pi
8 import os
9 import warnings
10 
11 # IMP doesn't statically define an atom type for CA atoms in modified residues
12 # (HETATM records) so add it here
13 _AT_HET_CA = IMP.atom.AtomType("HET: CA ")
14 
15 
16 def resnums2str(res):
17  """Take iterable of TempResidues and return compatified string"""
18  if len(res) == 0:
19  return ''
20  idxs = [r.get_index() for r in res]
21  idxs.sort()
22  all_ranges = []
23  cur_range = [idxs[0], idxs[0]]
24  for idx in idxs[1:]:
25  if idx != cur_range[1]+1:
26  all_ranges.append(cur_range)
27  cur_range = [idx, idx]
28  cur_range[1] = idx
29  all_ranges.append(cur_range)
30  ret = ''
31  for nr, r in enumerate(all_ranges):
32  ret += '%i-%i' % (r[0], r[1])
33  if nr < len(all_ranges)-1:
34  ret += ', '
35  return ret
36 
37 
38 def _select_ca_or_p(hiers, **kwargs):
39  """Select all CA (amino acids) or P (nucleic acids) as appropriate"""
40  sel_p = IMP.atom.Selection(hiers, atom_type=IMP.atom.AT_P, **kwargs)
41  ps = sel_p.get_selected_particles()
42  if ps:
43  # detected nucleotides. Selecting phosphorous instead of CA
44  return ps
45  else:
46  # Also select CA atoms in modified residues (such as MSE)
47  sel = IMP.atom.Selection(hiers, atom_type=IMP.atom.AT_CA, **kwargs) \
48  | IMP.atom.Selection(hiers, residue_type=IMP.atom.MSE,
49  atom_type=_AT_HET_CA, **kwargs)
50  return sel.get_selected_particles()
51 
52 
53 def get_structure(model, pdb_fn, chain_id, res_range=None, offset=0,
54  model_num=None, ca_only=False):
55  """read a structure from a PDB file and return a list of residues
56  @param model The IMP model
57  @param pdb_fn The file to read (in mmCIF, BinaryCIF, or legacy PDB format)
58  @param chain_id Chain ID to read
59  @param res_range Add only a specific set of residues.
60  res_range[0] is the starting and res_range[1] is the ending
61  residue index
62  The ending residue can be "END", that will take everything
63  to the end of the sequence.
64  None gets you all.
65  @param offset Apply an offset to the residue indexes of the PDB file
66  @param model_num Read multi-model PDB and return that model (0-based index)
67  @param ca_only Read only CA atoms (by default, all non-waters are read)
68  """
69  # Read file in mmCIF or BinaryCIF format if requested
70  if pdb_fn.endswith('.cif'):
71  read_file = IMP.atom.read_mmcif
72  read_multi_file = IMP.atom.read_multimodel_mmcif
73  elif pdb_fn.endswith('.bcif'):
74  read_file = IMP.atom.read_bcif
75  read_multi_file = IMP.atom.read_multimodel_bcif
76  else:
77  read_file = IMP.atom.read_pdb
78  read_multi_file = IMP.atom.read_multimodel_pdb
79  if ca_only:
81  else:
83 
84  reader = read_file if model_num is None else read_multi_file
85  mh = reader(pdb_fn, model, IMP.atom.ChainPDBSelector([chain_id]) & sel)
86  if model_num is not None:
87  mh = mh[model_num]
88 
89  if res_range == [] or res_range is None:
90  ps = _select_ca_or_p(mh, chain=chain_id)
91  else:
92  start = res_range[0]
93  end = res_range[1]
94  if end == "END":
95  end = IMP.atom.Residue(
96  mh.get_children()[0].get_children()[-1]).get_index()
97  ps = _select_ca_or_p(mh, chain=chain_id,
98  residue_indexes=range(start, end+1))
99  ret = []
100 
101  for p in ps:
102  res = IMP.atom.Residue(IMP.atom.Atom(p).get_parent())
103  res.set_index(res.get_index() + offset)
104  ret.append(res)
105  if len(ret) == 0:
106  warnings.warn(
107  "no residues selected from %s in range %s" % (pdb_fn, res_range),
109  return ret
110 
111 
112 def build_bead(model, residues, input_coord=None):
113  """Generates a single bead"""
114 
115  ds_frag = (residues[0].get_index(), residues[-1].get_index())
116  prt = IMP.Particle(model)
118  ptem = IMP.core.XYZR(prt)
119  mass = IMP.atom.get_mass_from_number_of_residues(len(residues))
120 
121  if ds_frag[0] == ds_frag[-1]:
122  rt = residues[0].get_residue_type()
123  h = IMP.atom.Residue.setup_particle(prt, rt, ds_frag[0])
124  h.set_name('%i_bead' % (ds_frag[0]))
125  prt.set_name('%i_bead' % (ds_frag[0]))
126  try:
128  except IMP.ValueException:
130  IMP.atom.ResidueType("ALA"))
132  ptem.set_radius(radius)
133  else:
135  h.set_name('%i-%i_bead' % (ds_frag[0], ds_frag[-1]))
136  prt.set_name('%i-%i_bead' % (ds_frag[0], ds_frag[-1]))
137  h.set_residue_indexes(range(ds_frag[0], ds_frag[-1] + 1))
138  volume = IMP.atom.get_volume_from_mass(mass)
139  radius = 0.8 * (3.0 / 4.0 / pi * volume) ** (1.0 / 3.0)
140  ptem.set_radius(radius)
141 
143  try:
144  if tuple(input_coord) is not None:
145  ptem.set_coordinates(input_coord)
146  except TypeError:
147  pass
148  return h
149 
150 
151 def build_necklace(model, residues, resolution, input_coord=None):
152  """Generates a string of beads with given length"""
153  out_hiers = []
154  for chunk in list(IMP.pmi.tools.list_chunks_iterator(residues,
155  resolution)):
156  out_hiers.append(build_bead(model, chunk, input_coord=input_coord))
157  return out_hiers
158 
159 
160 def build_ca_centers(model, residues):
161  """Create a bead on the CA position with coarsened size and mass"""
162  out_hiers = []
163  for tempres in residues:
164  residue = tempres.get_hierarchy()
165  rp1 = IMP.Particle(model)
166  rp1.set_name("Residue_%i" % residue.get_index())
167  rt = residue.get_residue_type()
168  this_res = IMP.atom.Residue.setup_particle(rp1, residue)
169  try:
171  except IMP.ValueException:
173  IMP.atom.ResidueType("ALA"))
174  try:
175  mass = IMP.atom.get_mass(rt)
176  except Exception:
178  calpha = IMP.atom.Selection(
179  residue,
180  atom_types=[IMP.atom.AT_CA, _AT_HET_CA]).get_selected_particles()
181  cp = IMP.atom.Selection(
182  residue, atom_type=IMP.atom.AT_P).get_selected_particles()
183 
184  if len(calpha) == 1:
185  central_atom = calpha[0]
186  elif len(cp) == 1:
187  central_atom = cp[0]
188  else:
189  raise ValueError(
190  "build_ca_centers: weird selection (no CA, no "
191  "nucleotide P or ambiguous selection found)")
193  shape = IMP.algebra.Sphere3D(
194  IMP.core.XYZ(central_atom).get_coordinates(), radius)
195  IMP.core.XYZR.setup_particle(rp1, shape)
197  out_hiers.append(this_res)
198  return out_hiers
199 
200 
201 def setup_bead_as_gaussian(mh):
202  """Setup bead as spherical gaussian, using radius as variance"""
203  p = mh.get_particle()
204  center = IMP.core.XYZ(p).get_coordinates()
205  rad = IMP.core.XYZR(p).get_radius()
209  [rad]*3)
211 
212 
213 def show_representation(node):
214  print(node)
216  repr = IMP.atom.Representation(node)
217  resolutions = repr.get_resolutions()
218  for r in resolutions:
219  print('---- resolution %i ----' % r)
220  IMP.atom.show_molecular_hierarchy(repr.get_representation(r))
221  return True
222  else:
223  return False
224 
225 
226 def _get_color_for_representation(rep):
227  """Return an IMP.display.Color object (or None) for the given
228  Representation."""
229  if rep.color is not None:
230  if isinstance(rep.color, float):
231  return IMP.display.get_rgb_color(rep.color)
232  elif isinstance(rep.color, str):
233  return IMP.display.Color(*IMP.pmi.tools.color2rgb(rep.color))
234  elif hasattr(rep.color, '__iter__') and len(rep.color) == 3:
235  return IMP.display.Color(*rep.color)
236  elif isinstance(rep.color, IMP.display.Color):
237  return rep.color
238  else:
239  raise TypeError("Color must be Chimera color name, a hex "
240  "string, a float or (r,g,b) tuple")
241 
242 
243 def _add_fragment_provenance(fragment, first_residue, rephandler):
244  """Track the original source of a fragment's structure.
245  If the residues in the given fragment were extracted from a PDB
246  file, add suitable provenance information to the Model (the name
247  of that file, chain ID, and residue index offset)."""
248  pdb_element = rephandler.pdb_for_residue.get(first_residue.get_index())
249  if pdb_element:
250  m = fragment.get_model()
251  p = IMP.Particle(m, "input structure")
253  p, pdb_element.filename, pdb_element.chain_id, pdb_element.offset)
254  IMP.core.add_provenance(m, fragment, sp)
255  return pdb_element
256 
257 
258 def build_representation(parent, rep, coord_finder, rephandler):
259  """Create requested representation.
260  For beads, identifies continuous segments and sets up as Representation.
261  If any volume-based representations (e.g.,densities) are requested,
262  will instead create a single Representation node.
263  All reps are added as children of the passed parent.
264  @param parent The Molecule to which we'll add representations
265  @param rep What to build. An instance of pmi::topology::_Representation
266  @param coord_finder A _FindCloseStructure object to help localize beads
267  """
268  built_reps = []
269  atomic_res = 0
270  ca_res = 1
271  model = parent.hier.get_model()
272  color = _get_color_for_representation(rep)
273 
274  # first get the primary representation (currently, the smallest bead size)
275  # eventually we won't require beads to be present at all
276  primary_resolution = min(rep.bead_resolutions)
277 
278  # if collective densities, will return single node with everything
279  # below we sample or read the GMMs and add them as representation
280  # flag indicating grouping nonlinear segments with one GMM
281  single_node = False
282  prov_dict = {}
283  if rep.density_residues_per_component:
284  single_node = True
285  num_components = (len(rep.residues)
286  // rep.density_residues_per_component+1)
287  rep_dict = defaultdict(list)
288  segp = IMP.Particle(model)
289  root_representation = IMP.atom.Representation.setup_particle(
290  segp, primary_resolution)
291  built_reps.append(root_representation)
292  res_nums = [r.get_index() for r in rep.residues]
293  IMP.atom.Fragment.setup_particle(segp, res_nums)
294  density_frag = IMP.atom.Fragment.setup_particle(
295  IMP.Particle(model), res_nums)
296  density_frag.get_particle().set_name(
297  "Densities %i" % rep.density_residues_per_component)
298  density_ps = []
299 
300  if os.path.exists(rep.density_prefix + '.txt') \
301  and not rep.density_force_compute:
303  rep.density_prefix + '.txt', density_ps, model)
304  if (len(density_ps) != num_components
305  or not os.path.exists(rep.density_prefix + '.txt')
306  or rep.density_force_compute):
307  fit_coords = []
308  total_mass = 0.0
309  for r in rep.residues:
310  for p in IMP.core.get_leaves(r.hier):
311  fit_coords.append(IMP.core.XYZ(p).get_coordinates())
312  total_mass += IMP.atom.Mass(p).get_mass()
313 
314  # fit GMM
315  density_ps = []
317  num_components,
318  model,
319  density_ps,
320  min_covar=4.0,
321  mass_multiplier=total_mass)
322 
324  rep.density_prefix + '.txt')
325  if rep.density_voxel_size > 0.0:
327  density_ps, rep.density_prefix + '.mrc',
328  rep.density_voxel_size, fast=True)
329 
330  for n, d in enumerate(density_ps):
331  d.set_name('Density #%d' % n)
332  density_frag.add_child(d)
333  root_representation.add_representation(
334  density_frag, IMP.atom.DENSITIES,
335  rep.density_residues_per_component)
336 
337  # get continuous segments from residues
338  segments = []
339  rsort = sorted(list(rep.residues), key=lambda r: r.get_index())
340  prev_idx = rsort[0].get_index()-1
341  prev_structure = rsort[0].get_has_structure()
342  cur_seg = []
343  force_break = False
344  for nr, r in enumerate(rsort):
345  if (r.get_index() != prev_idx+1
346  or r.get_has_structure() != prev_structure or force_break):
347  segments.append(cur_seg)
348  cur_seg = []
349  force_break = False
350  cur_seg.append(r)
351  prev_idx = r.get_index()
352  prev_structure = r.get_has_structure()
353  if r.get_index()-1 in rep.bead_extra_breaks:
354  force_break = True
355  if cur_seg != []:
356  segments.append(cur_seg)
357 
358  # for each segment, merge into beads
359  name_all = 'frags:'
360  name_count = 0
361  for frag_res in segments:
362  res_nums = [r.get_index() for r in frag_res]
363  rrange = "%i-%i" % (res_nums[0], res_nums[-1])
364  name = "Frag_" + rrange
365  if name_count < 3:
366  name_all += rrange + ','
367  elif name_count == 3:
368  name_all += '...'
369  name_count += 1
370  segp = IMP.Particle(model, name)
371  IMP.atom.Fragment.setup_particle(segp, res_nums)
372  if not single_node:
373  this_representation = IMP.atom.Representation.setup_particle(
374  segp, primary_resolution)
375  built_reps.append(this_representation)
376  for resolution in rep.bead_resolutions:
377  fp = IMP.Particle(model)
378  this_resolution = IMP.atom.Fragment.setup_particle(fp, res_nums)
379  this_resolution.set_name("%s: Res %i" % (name, resolution))
380  if frag_res[0].get_has_structure():
381  pdb_element = _add_fragment_provenance(
382  this_resolution, frag_res[0], rephandler)
383  if pdb_element is not None:
384  prov_dict[resolution] = pdb_element
385  # if structured, merge particles as needed
386  if resolution == atomic_res:
387  for residue in frag_res:
388  this_resolution.add_child(residue.get_hierarchy())
389  elif resolution == ca_res and rep.bead_ca_centers:
390  beads = build_ca_centers(model, frag_res)
391  for bead in beads:
392  this_resolution.add_child(bead)
393  else:
395  "X")
396  for residue in frag_res:
397  tempc.add_child(IMP.atom.create_clone(residue.hier))
399  tempc, resolution)
400  for bead in beads.get_children():
401  this_resolution.add_child(bead)
402  del tempc
403  del beads
404  else:
405  # if unstructured, create necklace
406  input_coord = coord_finder.find_nearest_coord(
407  min(r.get_index() for r in frag_res))
408  if input_coord is None:
409  input_coord = rep.bead_default_coord
410  beads = build_necklace(model,
411  frag_res,
412  resolution,
413  input_coord)
414  for bead in beads:
415  this_resolution.add_child(bead)
416 
417  # if requested, color all resolutions the same
418  if color:
419  for lv in IMP.core.get_leaves(this_resolution):
421 
422  # finally decide where to put this resolution
423  # if volumetric, collect resolutions from different
424  # segments together
425  if single_node:
426  rep_dict[resolution] += this_resolution.get_children()
427  else:
428  if resolution == primary_resolution:
429  this_representation.add_child(this_resolution)
430  else:
431  this_representation.add_representation(this_resolution,
432  IMP.atom.BALLS,
433  resolution)
434  # if individual beads to be setup as Gaussians:
435  if rep.setup_particles_as_densities:
436  for p in IMP.core.get_leaves(this_resolution):
437  setup_bead_as_gaussian(p)
438  this_resolution.set_name(
439  this_resolution.get_name() + ' Densities %i' % resolution)
440  this_representation.add_representation(this_resolution,
441  IMP.atom.DENSITIES,
442  resolution)
443 
444  if single_node:
445  root_representation.set_name(name_all.strip(',') + ": Base")
446  d = root_representation.get_representations(IMP.atom.DENSITIES)
447  d[0].set_name('%s: ' % name_all + d[0].get_name())
448  for resolution in rep.bead_resolutions:
449  this_resolution = IMP.atom.Fragment.setup_particle(
450  IMP.Particle(model),
451  [r.get_index() for r in rep.residues])
452  this_resolution.set_name("%s: Res %i" % (name_all, resolution))
453  # Use provenance information from the last original node (hopefully
454  # all nodes have the same provenance, i.e. came from the same
455  # PDB file)
456  if prov_dict.get(resolution):
457  pdb_element = prov_dict[resolution]
459  IMP.Particle(model, "input structure"),
460  pdb_element.filename,
461  pdb_element.chain_id, pdb_element.offset)
462  IMP.core.add_provenance(model, this_resolution, sp)
463  for hier in rep_dict[resolution]:
464  this_resolution.add_child(hier)
465  if resolution == primary_resolution:
466  root_representation.add_child(this_resolution)
467  else:
468  root_representation.add_representation(this_resolution,
469  IMP.atom.BALLS,
470  resolution)
471  return built_reps
def list_chunks_iterator
Yield successive length-sized chunks from a list.
Definition: tools.py:590
Tools for handling Gaussian Mixture Models.
Definition: gmm_tools.py:1
Add mass to a particle.
Definition: Mass.h:23
double get_volume_from_residue_type(ResidueType rt)
Return an estimate for the volume of a given residue.
Simple 3D transformation class.
Represent an RGB color.
Definition: Color.h:25
static Gaussian setup_particle(Model *m, ParticleIndex pi)
Definition: core/Gaussian.h:65
void show_molecular_hierarchy(Hierarchy h)
Print out the molecular hierarchy.
static Fragment setup_particle(Model *m, ParticleIndex pi)
Definition: Fragment.h:67
double get_mass(const Selection &s)
Get the total mass of a hierarchy, in Daltons.
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
double get_mass(ResidueType c)
Get the mass from the residue type.
static StructureProvenance setup_particle(Model *m, ParticleIndex pi, std::string filename, std::string chain_id, int residue_offset)
Definition: provenance.h:157
Color get_rgb_color(double f)
Return the color for f from the RGB color map.
double get_mass_from_number_of_residues(unsigned int num_aa)
Estimate the mass of a protein from the number of amino acids.
Miscellaneous utilities.
Definition: tools.py:1
double get_ball_radius_from_volume_3d(double volume)
Return the radius of a sphere with a given volume.
Definition: Sphere3D.h:35
The type of an atom.
static Residue setup_particle(Model *m, ParticleIndex pi, ResidueType t, int index, int insertion_code)
Definition: Residue.h:160
static Representation setup_particle(Model *m, ParticleIndex pi)
GenericHierarchies get_leaves(Hierarchy mhd)
Get all the leaves of the bit of hierarchy.
A reference frame in 3D.
def color2rgb
Given a Chimera color name or hex color value, return RGB.
Definition: tools.py:1558
Warning related to handling of structures.
A Gaussian distribution in 3D.
Definition: Gaussian3D.h:25
def fit_gmm_to_points
fit a GMM to some points.
Definition: gmm_tools.py:243
A decorator for a representation.
double get_volume_from_mass(double m, ProteinDensityReference ref=ALBER)
Estimate the volume of a protein from its mass.
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
A decorator for a particle representing an atom.
Definition: atom/Atom.h:238
static Mass setup_particle(Model *m, ParticleIndex pi, Float mass)
Definition: Mass.h:48
The type for a residue.
PDBSelector * get_default_pdb_selector()
Definition: pdb.h:542
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
static Colored setup_particle(Model *m, ParticleIndex pi, Color color)
Definition: Colored.h:62
def write_gmm_to_map
write density map from GMM.
Definition: gmm_tools.py:118
A decorator for a residue.
Definition: Residue.h:137
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Hierarchy create_simplified_along_backbone(Chain input, const IntRanges &residue_segments, bool keep_detailed=false)
Rotation3D get_identity_rotation_3d()
Return a rotation that does not do anything.
Definition: Rotation3D.h:352
Class to handle individual particles of a Model object.
Definition: Particle.h:43
Select all CA ATOM records.
Definition: pdb.h:142
Python classes to represent, score, sample and analyze models.
def write_gmm_to_text
write a list of gaussians to text.
Definition: gmm_tools.py:60
Functionality for loading, creating, manipulating and scoring atomic structures.
void add_provenance(Model *m, ParticleIndex pi, Provenance p)
Add provenance to part of the model.
static Chain setup_particle(Model *m, ParticleIndex pi, std::string id)
Definition: Chain.h:84
An exception for an invalid value being passed to IMP.
Definition: exception.h:136
Select hierarchy particles identified by the biological name.
Definition: Selection.h:70
Select all ATOM and HETATM records with the given chain ids.
Definition: pdb.h:256
def decorate_gmm_from_text
read the output from write_gmm_to_text, decorate as Gaussian and Mass
Definition: gmm_tools.py:22
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27