IMP logo
IMP Reference Guide  develop.5d113ecbfd,2023/06/06
The Integrative Modeling Platform
pmi/topology/__init__.py
1 """@namespace IMP.pmi.topology
2  Set of Python classes to create a multi-state, multi-resolution IMP hierarchy.
3 * Start by creating a System with
4  `model = IMP.Model(); s = IMP.pmi.topology.System(model)`. The System
5  will store all the states.
6 * Then call System.create_state(). You can easily create a multistate system
7  by calling this function multiple times.
8 * For each State, call State.create_molecule() to add a Molecule (a uniquely
9  named polymer). This function returns the Molecule object which can be
10  passed to various PMI functions.
11 * Some useful functions to help you set up your Molecules:
12  * Access the sequence residues with slicing (Molecule[a:b]) or functions
13  like Molecule.get_atomic_residues() and Molecule.get_non_atomic_residues().
14  These functions all return Python sets for easy set arithmetic using
15  & (and), | (or), - (difference)
16  * Molecule.add_structure() to add structural information from a PDB file.
17  * Molecule.add_representation() to create a representation unit - here you
18  can choose bead resolutions as well as alternate representations like
19  densities or ideal helices.
20  * Molecule.create_clone() lets you set up a molecule with identical
21  representations, just a different chain ID. Use Molecule.create_copy()
22  if you want a molecule with the same sequence but that allows custom
23  representations.
24 * Once data has been added and representations chosen, call System.build()
25  to create a canonical IMP hierarchy.
26 * Following hierarchy construction, setup rigid bodies, flexible beads, etc
27  in IMP::pmi::dof.
28 * Check your representation with a nice printout:
29  IMP::atom::show_with_representation()
30 
31 See a [comprehensive example](https://integrativemodeling.org/nightly/doc/ref/pmi_2multiscale_8py-example.html) for using these classes.
32 
33 Alternatively one can construct the entire topology and degrees of freedom
34 via formatted text file with TopologyReader and
35 IMP::pmi::macros::BuildSystem(). This is used in the
36 [PMI tutorial](@ref rnapolii_stalk). Note that this only allows a limited
37 set of the full options available to PMI users
38 (rigid bodies only, fixed resolutions).
39 """ # noqa: E501
40 
41 from __future__ import print_function
42 import IMP
43 import IMP.atom
44 import IMP.algebra
45 import IMP.pmi
46 import IMP.pmi.tools
47 import IMP.pmi.alphabets
48 import os
49 import re
50 from collections import defaultdict, namedtuple
51 from . import system_tools
52 from bisect import bisect_left
53 from math import pi, cos, sin
54 from operator import itemgetter
55 import weakref
56 import warnings
57 
58 
59 def _build_ideal_helix(model, residues, coord_finder):
60  """Creates an ideal helix from the specified residue range
61  Residues MUST be contiguous.
62  This function actually adds them to the TempResidue hierarchy
63  """
64  created_hiers = []
65 
66  # this function creates a CAlpha helix structure (which can be used
67  # for coarsening)
68  prev_idx = -9999
69  for n, tempres in enumerate(residues):
70  if tempres.get_has_structure():
71  raise ValueError("You tried to build ideal_helix for a residue "
72  "that already has structure: %s" % tempres)
73  if n > 0 and tempres.get_index() != prev_idx + 1:
74  raise ValueError(
75  "Passed non-contiguous segment to "
76  "build_ideal_helix for %s" % tempres.get_molecule())
77 
78  # New residue particle will replace the TempResidue's existing
79  # (empty) hierarchy
80  rp = IMP.Particle(model)
81  rp.set_name("Residue_%i" % tempres.get_index())
82 
83  # Copy the original residue type and index
84  this_res = IMP.atom.Residue.setup_particle(rp, tempres.get_hierarchy())
85 
86  # Create the CAlpha
87  ap = IMP.Particle(model)
89  x = 2.3 * cos(n * 2 * pi / 3.6)
90  y = 2.3 * sin(n * 2 * pi / 3.6)
91  z = 6.2 / 3.6 / 2 * n * 2 * pi / 3.6
92  d.set_coordinates(IMP.algebra.Vector3D(x, y, z))
93  d.set_radius(1.0)
94  # Decorating as Atom also decorates as Mass
95  a = IMP.atom.Atom.setup_particle(ap, IMP.atom.AT_CA)
96  IMP.atom.Mass(ap).set_mass(110.0)
97  this_res.add_child(a)
98 
99  # Add this structure to the TempResidue
100  tempres.set_structure(this_res)
101  created_hiers.append(this_res)
102  prev_idx = tempres.get_index()
103  # the coord finder is for placing beads (later)
104  coord_finder.add_residues(created_hiers)
105 
106 
107 class _SystemBase(object):
108  """The base class for System, State and Molecule
109  classes. It contains shared functions in common to these classes
110  """
111 
112  def __init__(self, model=None):
113  if model is None:
114  self.model = IMP.Model()
115  else:
116  self.model = model
117 
118  def _create_hierarchy(self):
119  """create a new hierarchy"""
120  tmp_part = IMP.Particle(self.model)
121  return IMP.atom.Hierarchy.setup_particle(tmp_part)
122 
123  def _create_child(self, parent_hierarchy):
124  """create a new hierarchy, set it as child of the input
125  one, and return it"""
126  child_hierarchy = self._create_hierarchy()
127  parent_hierarchy.add_child(child_hierarchy)
128  return child_hierarchy
129 
130  def build(self):
131  """Build the coordinates of the system.
132  Loop through stored(?) hierarchies and set up coordinates!"""
133  pass
134 
135 
136 class System(_SystemBase):
137  """Represent the root node of the global IMP.atom.Hierarchy."""
138 
139  _all_systems = set()
140 
141  def __init__(self, model=None, name="System"):
142  """Constructor.
143 
144  @param model The IMP::Model in which to construct this system.
145  @param name The name of the top-level hierarchy node.
146  """
147  _SystemBase.__init__(self, model)
148  self._number_of_states = 0
149  self._protocol_output = []
150  self.states = []
151  self.built = False
152 
153  System._all_systems.add(weakref.ref(self))
154 
155  # the root hierarchy node
156  self.hier = self._create_hierarchy()
157  self.hier.set_name(name)
158  self.hier._pmi2_system = weakref.ref(self)
159 
160  def __del__(self):
161  System._all_systems = set(x for x in System._all_systems
162  if x() not in (None, self))
163 
164  def get_states(self):
165  """Get a list of all State objects in this system"""
166  return self.states
167 
168  def create_state(self):
169  """Makes and returns a new IMP.pmi.topology.State in this system"""
170  self._number_of_states += 1
171  state = State(self, self._number_of_states-1)
172  self.states.append(state)
173  return state
174 
175  def __repr__(self):
176  return self.hier.get_name()
177 
179  """Returns the total number of states generated"""
180  return self._number_of_states
181 
182  def get_hierarchy(self):
183  """Return the top-level IMP.atom.Hierarchy node for this system"""
184  return self.hier
185 
186  def build(self, **kwargs):
187  """Build all states"""
188  if not self.built:
189  for state in self.states:
190  state.build(**kwargs)
191  self.built = True
192  return self.hier
193 
194  def add_protocol_output(self, p):
195  """Capture details of the modeling protocol.
196  @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
197  """
198  self._protocol_output.append(p)
199 # p._each_metadata.append(self._metadata)
200 # p._file_datasets.append(self._file_dataset)
201  for state in self.states:
202  state._add_protocol_output(p, self)
203 
204 
205 class State(_SystemBase):
206  """Stores a list of Molecules all with the same State index.
207  Also stores number of copies of each Molecule for easy selection.
208  """
209  def __init__(self, system, state_index):
210  """Define a new state
211  @param system the PMI System
212  @param state_index the index of the new state
213  @note It's expected that you will not use this constructor directly,
214  but rather create it with System.create_state()
215  """
216  self.model = system.get_hierarchy().get_model()
217  self.system = system
218  self.hier = self._create_child(system.get_hierarchy())
219  self.short_name = self.long_name = "State_" + str(state_index)
220  self.hier.set_name(self.short_name)
221  # key is molecule name. value are the molecule copies!
222  self.molecules = IMP.pmi.tools.OrderedDict()
223  IMP.atom.State.setup_particle(self.hier, state_index)
224  self.built = False
225  self._protocol_output = []
226  for p in system._protocol_output:
227  self._add_protocol_output(p, system)
228 
229  def __repr__(self):
230  return self.system.__repr__()+'.'+self.hier.get_name()
231 
232  def _add_protocol_output(self, p, system):
233  state = p._add_state(self)
234  self._protocol_output.append((p, state))
235  state.model = system.model
236  state.prot = self.hier
237 
238  def get_molecules(self):
239  """Return a dictionary where key is molecule name and value
240  is a list of all copies of that molecule in setup order"""
241  return self.molecules
242 
243  def get_molecule(self, name, copy_num=0):
244  """Access a molecule by name and copy number
245  @param name The molecule name used during setup
246  @param copy_num The copy number based on input order.
247  Default: 0. Set to 'all' to get all copies
248  """
249  if name not in self.molecules:
250  raise KeyError("Could not find molname %s" % name)
251  if copy_num == 'all':
252  return self.molecules[name]
253  else:
254  return self.molecules[name][copy_num]
255 
256  def create_molecule(self, name, sequence='', chain_id='',
257  alphabet=IMP.pmi.alphabets.amino_acid):
258  """Create a new Molecule within this State
259  @param name the name of the molecule (string);
260  it must not be already used
261  @param sequence sequence (string)
262  @param chain_id Chain ID to assign to this molecule
263  @param alphabet Mapping from FASTA codes to residue types
264  """
265  # check whether the molecule name is already assigned
266  if name in self.molecules:
267  raise ValueError('Cannot use a molecule name already used')
268 
269  # check for something that looks like a copy number
270  if re.search(r'\.\d+$', name):
271  warnings.warn(
272  "It is recommended not to end the molecule name with "
273  ".(number) as it may be confused with the copy number "
274  "(the copy number for new molecules is always 0, so to "
275  "select this molecule, use '%s.0'). Use create_clone() or "
276  "create_copy() instead if a copy of an existing molecule "
277  "is desired." % name, IMP.pmi.ParameterWarning)
278 
279  mol = Molecule(self, name, sequence, chain_id, copy_num=0,
280  alphabet=alphabet)
281  self.molecules[name] = [mol]
282  return mol
283 
284  def get_hierarchy(self):
285  """Get the IMP.atom.Hierarchy node for this state"""
286  return self.hier
287 
288  def get_number_of_copies(self, molname):
289  """Get the number of copies of the given molecule (by name)
290 
291  @param molname The name of the molecule
292  """
293  return len(self.molecules[molname])
294 
295  def _register_copy(self, molecule):
296  molname = molecule.get_hierarchy().get_name()
297  self.molecules[molname].append(molecule)
298 
299  def build(self, **kwargs):
300  """Build all molecules (automatically makes clones)"""
301  if not self.built:
302  for molname in self.molecules:
303  for mol in reversed(self.molecules[molname]):
304  mol.build(**kwargs)
305  self.built = True
306  return self.hier
307 
308 
309 # Track residues read from PDB files
310 _PDBElement = namedtuple('PDBElement', ['offset', 'filename', 'chain_id'])
311 
312 
313 class _RepresentationHandler(object):
314  """Handle PMI representation and use it to populate that of any attached
315  ProtocolOutput objects"""
316  def __init__(self, name, pos, pdb_elements):
317  self.name = name
318  self.pos = pos
319  self.last_index = None
320  self.last_pdb_index = None
321  self.pdb_for_residue = {}
322  for residues, pdb in pdb_elements:
323  for r in residues:
324  self.pdb_for_residue[r.get_index()] = pdb
325 
326  def _get_pdb(self, h):
327  """Return a PDBElement if the given hierarchy was read from a
328  PDB file"""
330  rind = IMP.atom.Residue(h).get_index()
331  return self.pdb_for_residue.get(rind, None)
332 
333  def __call__(self, res):
334  """Handle a single residue"""
335  if len(self.pos) == 0:
336  return
337  h = res.hier
338  pi = h.get_particle_index()
339  # Do nothing if we already saw this hierarchy
340  if self.last_index is None or pi != self.last_index:
341  pdb = self._get_pdb(h)
342  self.last_index = pi
343  if pdb:
344  assert IMP.atom.Fragment.get_is_setup(h.get_parent())
345  frag = IMP.atom.Fragment(h.get_parent())
346  fragi = frag.get_particle_index()
347  # Do nothing if we already saw this PDB fragment
348  if self.last_pdb_index is not None \
349  and self.last_pdb_index == fragi:
350  return
351  self.last_pdb_index = fragi
352  indices = frag.get_residue_indexes()
353  for p, state in self.pos:
354  p.add_pdb_element(state, self.name,
355  indices[0], indices[-1], pdb.offset,
356  pdb.filename, pdb.chain_id, frag)
358  frag = IMP.atom.Fragment(h)
359  indices = frag.get_residue_indexes()
360  for p, state in self.pos:
361  p.add_bead_element(state, self.name,
362  indices[0], indices[-1], 1, h)
364  resind = IMP.atom.Residue(h).get_index()
365  for p, state in self.pos:
366  p.add_bead_element(state, self.name, resind, resind, 1, h)
367  else:
368  raise TypeError("Unhandled hierarchy %s" % str(h))
369 
370 
371 class Molecule(_SystemBase):
372  """Stores a named protein chain.
373  This class is constructed from within the State class.
374  It wraps an IMP.atom.Molecule and IMP.atom.Copy.
375  Structure is read using this class.
376  Resolutions and copies can be registered, but are only created
377  when build() is called.
378 
379  A Molecule acts like a simple Python list of residues, and can be indexed
380  by integer (starting at zero) or by string (starting at 1).
381  """
382 
383  def __init__(self, state, name, sequence, chain_id, copy_num,
384  mol_to_clone=None, alphabet=IMP.pmi.alphabets.amino_acid):
385  """The user should not call this directly; instead call
386  State.create_molecule()
387 
388  @param state The parent PMI State
389  @param name The name of the molecule (string)
390  @param sequence Sequence (string)
391  @param chain_id The chain of this molecule
392  @param copy_num Store the copy number
393  @param mol_to_clone The original molecule (for cloning ONLY)
394  @note It's expected that you will not use this constructor directly,
395  but rather create a Molecule with State.create_molecule()
396  """
397  # internal data storage
398  self.model = state.get_hierarchy().get_model()
399  self.state = state
400  self.sequence = sequence
401  self.built = False
402  self.mol_to_clone = mol_to_clone
403  self.alphabet = alphabet
404  self.representations = [] # list of stuff to build
405  self._pdb_elements = []
406  # residues with representation
407  self._represented = IMP.pmi.tools.OrderedSet()
408  # helps you place beads by storing structure
409  self.coord_finder = _FindCloseStructure()
410  # list of OrderedSets of tempresidues set to ideal helix
411  self._ideal_helices = []
412 
413  # create root node and set it as child to passed parent hierarchy
414  self.hier = self._create_child(self.state.get_hierarchy())
415  self.hier.set_name(name)
416  IMP.atom.Copy.setup_particle(self.hier, copy_num)
417  self._name_with_copy = "%s.%d" % (name, copy_num)
418  # store the sequence
419  self.chain = IMP.atom.Chain.setup_particle(self.hier, chain_id)
420  self.chain.set_sequence(self.sequence)
421  self.chain.set_chain_type(alphabet.get_chain_type())
422  # create TempResidues from the sequence (if passed)
423  self.residues = []
424  for ns, s in enumerate(sequence):
425  r = TempResidue(self, s, ns+1, ns, alphabet)
426  self.residues.append(r)
427 
428  def __repr__(self):
429  return self.state.__repr__() + '.' + self.get_name() + '.' + \
430  str(IMP.atom.Copy(self.hier).get_copy_index())
431 
432  def __getitem__(self, val):
433  if isinstance(val, int):
434  return self.residues[val]
435  elif isinstance(val, str):
436  return self.residues[int(val)-1]
437  elif isinstance(val, slice):
438  return IMP.pmi.tools.OrderedSet(self.residues[val])
439  else:
440  raise TypeError("Indexes must be int or str")
441 
442  def get_hierarchy(self):
443  """Return the IMP Hierarchy corresponding to this Molecule"""
444  return self.hier
445 
446  def get_name(self):
447  """Return this Molecule name"""
448  return self.hier.get_name()
449 
450  def get_state(self):
451  """Return the State containing this Molecule"""
452  return self.state
453 
454  def get_ideal_helices(self):
455  """Returns list of OrderedSets with requested ideal helices"""
456  return self._ideal_helices
457 
458  def residue_range(self, a, b, stride=1):
459  """Get residue range from a to b, inclusive.
460  Use integers to get 0-indexing, or strings to get PDB-indexing"""
461  if isinstance(a, int) and isinstance(b, int) \
462  and isinstance(stride, int):
463  return IMP.pmi.tools.OrderedSet(self.residues[a:b+1:stride])
464  elif isinstance(a, str) and isinstance(b, str) \
465  and isinstance(stride, int):
466  return IMP.pmi.tools.OrderedSet(
467  self.residues[int(a)-1:int(b):stride])
468  else:
469  raise TypeError("Range ends must be int or str. "
470  "Stride must be int.")
471 
472  def get_residues(self):
473  """Return all modeled TempResidues as a set"""
474  all_res = IMP.pmi.tools.OrderedSet(self.residues)
475  return all_res
476 
477  def get_represented(self):
478  """Return set of TempResidues that have representation"""
479  return self._represented
480 
482  """Return a set of TempResidues that have associated structure
483  coordinates"""
484  atomic_res = IMP.pmi.tools.OrderedSet()
485  for res in self.residues:
486  if res.get_has_structure():
487  atomic_res.add(res)
488  return atomic_res
489 
491  """Return a set of TempResidues that don't have associated
492  structure coordinates"""
493  non_atomic_res = IMP.pmi.tools.OrderedSet()
494  for res in self.residues:
495  if not res.get_has_structure():
496  non_atomic_res.add(res)
497  return non_atomic_res
498 
499  def create_copy(self, chain_id):
500  """Create a new Molecule with the same name and sequence but a
501  higher copy number. Returns the Molecule. No structure or
502  representation will be copied!
503 
504  @param chain_id Chain ID of the new molecule
505  """
506  mol = Molecule(
507  self.state, self.get_name(), self.sequence, chain_id,
508  copy_num=self.state.get_number_of_copies(self.get_name()))
509  self.state._register_copy(mol)
510  return mol
511 
512  def create_clone(self, chain_id):
513  """Create a Molecule clone (automatically builds same structure
514  and representation)
515 
516  @param chain_id If you want to set the chain ID of the copy
517  to something
518  @note You cannot add structure or representations to a clone!
519  """
520  mol = Molecule(
521  self.state, self.get_name(), self.sequence, chain_id,
522  copy_num=self.state.get_number_of_copies(self.get_name()),
523  mol_to_clone=self)
524  self.state._register_copy(mol)
525  return mol
526 
527  def add_structure(self, pdb_fn, chain_id, res_range=[],
528  offset=0, model_num=None, ca_only=False,
529  soft_check=False):
530  """Read a structure and store the coordinates.
531  @return the atomic residues (as a set)
532  @param pdb_fn The file to read
533  @param chain_id Chain ID to read
534  @param res_range Add only a specific set of residues from the PDB
535  file. res_range[0] is the starting and res_range[1]
536  is the ending residue index.
537  @param offset Apply an offset to the residue indexes of the PDB
538  file. This number is added to the PDB sequence.
539  @param model_num Read multi-model PDB and return that model
540  @param ca_only Only read the CA positions from the PDB file
541  @param soft_check If True, it only warns if there are sequence
542  mismatches between the PDB and the Molecule (FASTA)
543  sequence, and uses the sequence from the PDB.
544  If False (Default), it raises an error when there
545  are sequence mismatches.
546  @note If you are adding structure without a FASTA file, set soft_check
547  to True.
548  """
549  if self.mol_to_clone is not None:
550  raise ValueError('You cannot call add_structure() for a clone')
551 
552  self.pdb_fn = pdb_fn
553 
554  # get IMP.atom.Residues from the pdb file
555  rhs = system_tools.get_structure(self.model, pdb_fn, chain_id,
556  res_range, offset,
557  ca_only=ca_only)
558  self.coord_finder.add_residues(rhs)
559 
560  if len(self.residues) == 0:
561  warnings.warn(
562  "Substituting PDB residue type with FASTA residue type. "
563  "Potentially dangerous.", IMP.pmi.StructureWarning)
564 
565  # Store info for ProtocolOutput usage later
566  self._pdb_elements.append(
567  (rhs, _PDBElement(offset=offset, filename=pdb_fn,
568  chain_id=chain_id)))
569 
570  # load those into TempResidue object
571  # collect integer indexes of atomic residues to return
572  atomic_res = IMP.pmi.tools.OrderedSet()
573  for nrh, rh in enumerate(rhs):
574  pdb_idx = rh.get_index()
575  raw_idx = pdb_idx - 1
576 
577  # add ALA to fill in gaps
578  while len(self.residues) < pdb_idx:
579  r = TempResidue(self, 'A', len(self.residues)+1,
580  len(self.residues),
581  IMP.pmi.alphabets.amino_acid)
582  self.residues.append(r)
583  self.sequence += 'A'
584 
585  internal_res = self.residues[raw_idx]
586  if len(self.sequence) < raw_idx:
587  self.sequence += IMP.atom.get_one_letter_code(
588  rh.get_residue_type())
589  internal_res.set_structure(rh, soft_check)
590  atomic_res.add(internal_res)
591 
592  self.chain.set_sequence(self.sequence)
593  return atomic_res
594 
596  residues=None,
597  resolutions=[],
598  bead_extra_breaks=[],
599  bead_ca_centers=True,
600  bead_default_coord=[0, 0, 0],
601  density_residues_per_component=None,
602  density_prefix=None,
603  density_force_compute=False,
604  density_voxel_size=1.0,
605  setup_particles_as_densities=False,
606  ideal_helix=False,
607  color=None):
608  """Set the representation for some residues. Some options
609  (beads, ideal helix) operate along the backbone. Others (density
610  options) are volumetric.
611  Some of these you can combine e.g., beads+densities or helix+densities
612  See @ref pmi_resolution
613  @param residues Set of PMI TempResidues for adding the representation.
614  Can use Molecule slicing to get these, e.g. mol[a:b]+mol[c:d]
615  If None, will select all residues for this Molecule.
616  @param resolutions Resolutions for beads representations.
617  If structured, will average along backbone, breaking at
618  sequence breaks. If unstructured, will just create beads.
619  Pass an integer or list of integers
620  @param bead_extra_breaks Additional breakpoints for splitting beads.
621  The value can be the 0-ordered position, after which it'll
622  insert the break.
623  Alternatively pass PDB-style (1-ordered) indices as a string.
624  I.e., bead_extra_breaks=[5,25] is the same as ['6','26']
625  @param bead_ca_centers Set to True if you want the resolution=1 beads
626  to be at CA centers (otherwise will average atoms to get
627  center). Defaults to True.
628  @param bead_default_coord Advanced feature. Normally beads are placed
629  at the nearest structure. If no structure provided (like an
630  all bead molecule), the beads go here.
631  @param density_residues_per_component Create density (Gaussian
632  Mixture Model) for these residues. Must also supply
633  density_prefix
634  @param density_prefix Prefix (assuming '.txt') to read components
635  from or write to.
636  If exists, will read unless you set density_force_compute=True.
637  Will also write map (prefix+'.mrc').
638  Must also supply density_residues_per_component.
639  @param density_force_compute Set true to force overwrite density file.
640  @param density_voxel_size Advanced feature. Set larger if densities
641  taking too long to rasterize.
642  Set to 0 if you don't want to create the MRC file
643  @param setup_particles_as_densities Set to True if you want each
644  particle to be its own density.
645  Useful for all-atom models or flexible beads.
646  Mutually exclusive with density_ options
647  @param ideal_helix Create idealized helix structures for these
648  residues at resolution 1.
649  Any other resolutions passed will be coarsened from there.
650  Resolution 0 will not work; you may have to use MODELLER
651  to do that (for now).
652  @param color the color applied to the hierarchies generated.
653  Format options: tuple (r,g,b) with values 0 to 1;
654  float (from 0 to 1, a map from Blue to Green to Red);
655  a [Chimera name](https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/colortables.html);
656  a hex RGB string (e.g. "#ff0000");
657  an IMP.display.Color object
658  @note You cannot call add_representation multiple times for the
659  same residues.
660  """ # noqa: E501
661 
662  # can't customize clones
663  if self.mol_to_clone is not None:
664  raise ValueError(
665  'You cannot call add_representation() for a clone.'
666  ' Maybe use a copy instead.')
667 
668  # format input
669  if residues is None:
670  res = IMP.pmi.tools.OrderedSet(self.residues)
671  elif residues == self:
672  res = IMP.pmi.tools.OrderedSet(self.residues)
673  elif type(residues) is IMP.pmi.topology.TempResidue:
674  res = IMP.pmi.tools.OrderedSet([residues])
675  elif hasattr(residues, '__iter__'):
676  if len(residues) == 0:
677  raise Exception(
678  'You passed an empty set to add_representation')
679  if type(residues) is IMP.pmi.tools.OrderedSet \
680  and type(next(iter(residues))) is TempResidue:
681  res = residues
682  elif (type(residues) is set
683  and type(next(iter(residues))) is TempResidue):
684  res = IMP.pmi.tools.OrderedSet(residues)
685  elif type(residues) is list and type(residues[0]) is TempResidue:
686  res = IMP.pmi.tools.OrderedSet(residues)
687  else:
688  raise Exception("You passed an iterable of something other "
689  "than TempResidue", res)
690  else:
691  raise Exception("add_representation: you must pass a set of "
692  "residues or nothing(=all residues)")
693 
694  # check that each residue has not been represented yet
695  ov = res & self._represented
696  if ov:
697  raise Exception('You have already added representation for ' +
698  self.get_hierarchy().get_name() + ': ' +
699  ov.__repr__())
700  self._represented |= res
701 
702  # check you aren't creating multiple resolutions without structure
703  if not hasattr(resolutions, '__iter__'):
704  if type(resolutions) is int:
705  resolutions = [resolutions]
706  else:
707  raise Exception("you tried to pass resolutions that are not "
708  "int or list-of-int")
709  if len(resolutions) > 1 and not ideal_helix:
710  for r in res:
711  if not r.get_has_structure():
712  raise Exception(
713  'You are creating multiple resolutions for '
714  'unstructured regions. This will have unexpected '
715  'results.')
716 
717  # check density info is consistent
718  if density_residues_per_component or density_prefix:
719  if not density_residues_per_component and density_prefix:
720  raise Exception(
721  'If requesting density, must provide '
722  'density_residues_per_component AND density_prefix')
723  if density_residues_per_component and setup_particles_as_densities:
724  raise Exception(
725  'Cannot create both volumetric density '
726  '(density_residues_per_component) AND '
727  'individual densities (setup_particles_as_densities) '
728  'in the same representation')
729  if len(resolutions) > 1 and setup_particles_as_densities:
730  raise Exception(
731  'You have multiple bead resolutions but are attempting to '
732  'set them all up as individual Densities. '
733  'This could have unexpected results.')
734 
735  # check helix not accompanied by other resolutions
736  # (densities OK though!)
737  if ideal_helix:
738  if 0 in resolutions:
739  raise Exception(
740  "For ideal helices, cannot build resolution 0: "
741  "you have to do that in MODELLER")
742  if 1 not in resolutions:
743  resolutions = [1] + list(resolutions)
744  self._ideal_helices.append(res)
745 
746  # check residues are all part of this molecule:
747  for r in res:
748  if r.get_molecule() != self:
749  raise Exception(
750  'You are adding residues from a different molecule to',
751  self.__repr__())
752 
753  # unify formatting for extra breaks
754  breaks = []
755  for b in bead_extra_breaks:
756  if type(b) == str:
757  breaks.append(int(b)-1)
758  else:
759  breaks.append(b)
760  # store the representation group
761  self.representations.append(_Representation(
762  res, resolutions, breaks, bead_ca_centers, bead_default_coord,
763  density_residues_per_component, density_prefix,
764  density_force_compute, density_voxel_size,
765  setup_particles_as_densities, ideal_helix, color))
766 
767  def _all_protocol_output(self):
768  return self.state._protocol_output
769 
770  def build(self):
771  """Create all parts of the IMP hierarchy
772  including Atoms, Residues, and Fragments/Representations and,
773  finally, Copies.
774  Will only build requested representations.
775  @note Any residues assigned a resolution must have an IMP.atom.Residue
776  hierarchy containing at least a CAlpha. For missing residues,
777  these can be constructed from the PDB file.
778  """
779  if not self.built:
780  # Add molecule name and sequence to any ProtocolOutput objects
781  name = self.hier.get_name()
782  for po, state in self._all_protocol_output():
783  po.create_component(state, name, True,
784  asym_name=self._name_with_copy)
785  po.add_component_sequence(state, name, self.sequence,
786  asym_name=self._name_with_copy,
787  alphabet=self.alphabet)
788  # if requested, clone structure and representations
789  # BEFORE building original
790  if self.mol_to_clone is not None:
791  for nr, r in enumerate(self.mol_to_clone.residues):
792  if r.get_has_structure():
793  clone = IMP.atom.create_clone(r.get_hierarchy())
794  self.residues[nr].set_structure(
795  IMP.atom.Residue(clone), soft_check=True)
796  for old_rep in self.mol_to_clone.representations:
797  new_res = IMP.pmi.tools.OrderedSet()
798  for r in old_rep.residues:
799  new_res.add(self.residues[r.get_internal_index()])
800  self._represented.add(
801  self.residues[r.get_internal_index()])
802  new_rep = _Representation(
803  new_res, old_rep.bead_resolutions,
804  old_rep.bead_extra_breaks, old_rep.bead_ca_centers,
805  old_rep.bead_default_coord,
806  old_rep.density_residues_per_component,
807  old_rep.density_prefix, False,
808  old_rep.density_voxel_size,
809  old_rep.setup_particles_as_densities,
810  old_rep.ideal_helix, old_rep.color)
811  self.representations.append(new_rep)
812  self.coord_finder = self.mol_to_clone.coord_finder
813 
814  # give a warning for all residues that don't have representation
815  no_rep = [r for r in self.residues if r not in self._represented]
816  if len(no_rep) > 0:
817  warnings.warn(
818  'Residues without representation in molecule %s: %s'
819  % (self.get_name(), system_tools.resnums2str(no_rep)),
821 
822  # first build any ideal helices (fills in structure for
823  # the TempResidues)
824  for rep in self.representations:
825  if rep.ideal_helix:
826  _build_ideal_helix(self.model, rep.residues,
827  self.coord_finder)
828 
829  # build all the representations
830  built_reps = []
831 
832  rephandler = _RepresentationHandler(
833  self._name_with_copy, list(self._all_protocol_output()),
834  self._pdb_elements)
835 
836  for rep in self.representations:
837  built_reps += system_tools.build_representation(
838  self, rep, self.coord_finder, rephandler)
839 
840  # sort them before adding as children
841  built_reps.sort(
842  key=lambda r: IMP.atom.Fragment(r).get_residue_indexes()[0])
843  for br in built_reps:
844  self.hier.add_child(br)
845  br.update_parents()
846  self.built = True
847 
848  for res in self.residues:
849  # first off, store the highest resolution available
850  # in residue.hier
851  new_ps = IMP.atom.Selection(
852  self.hier,
853  residue_index=res.get_index(),
854  resolution=1).get_selected_particles()
855  if len(new_ps) > 0:
856  new_p = new_ps[0]
857  if IMP.atom.Atom.get_is_setup(new_p):
858  # if only found atomic, store the residue
859  new_hier = IMP.atom.get_residue(IMP.atom.Atom(new_p))
860  else:
861  # otherwise just store what you found
862  new_hier = IMP.atom.Hierarchy(new_p)
863  res.hier = new_hier
864  rephandler(res)
865  else:
866  res.hier = None
867  self._represented = IMP.pmi.tools.OrderedSet(
868  [a for a in self._represented])
869  print('done building', self.get_hierarchy())
870  return self.hier
871 
872  def get_particles_at_all_resolutions(self, residue_indexes=None):
873  """Helpful utility for getting particles at all resolutions from
874  this molecule. Can optionally pass a set of residue indexes"""
875  if not self.built:
876  raise Exception(
877  "Cannot get all resolutions until you build the Molecule")
878  if residue_indexes is None:
879  residue_indexes = [r.get_index() for r in self.get_residues()]
881  self.get_hierarchy(), residue_indexes=residue_indexes)
882  return ps
883 
884 
885 class _Representation(object):
886  """Private class just to store a representation request"""
887  def __init__(self,
888  residues,
889  bead_resolutions,
890  bead_extra_breaks,
891  bead_ca_centers,
892  bead_default_coord,
893  density_residues_per_component,
894  density_prefix,
895  density_force_compute,
896  density_voxel_size,
897  setup_particles_as_densities,
898  ideal_helix,
899  color):
900  self.residues = residues
901  self.bead_resolutions = bead_resolutions
902  self.bead_extra_breaks = bead_extra_breaks
903  self.bead_ca_centers = bead_ca_centers
904  self.bead_default_coord = bead_default_coord
905  self.density_residues_per_component = density_residues_per_component
906  self.density_prefix = density_prefix
907  self.density_force_compute = density_force_compute
908  self.density_voxel_size = density_voxel_size
909  self.setup_particles_as_densities = setup_particles_as_densities
910  self.ideal_helix = ideal_helix
911  self.color = color
912 
913 
914 class _FindCloseStructure(object):
915  """Utility to get the nearest observed coordinate"""
916  def __init__(self):
917  self.coords = []
918 
919  def add_residues(self, residues):
920  for r in residues:
921  idx = IMP.atom.Residue(r).get_index()
922  catypes = [IMP.atom.AT_CA, system_tools._AT_HET_CA]
923  ca = IMP.atom.Selection(
924  r, atom_types=catypes).get_selected_particles()
925  p = IMP.atom.Selection(
926  r, atom_type=IMP.atom.AtomType("P")).get_selected_particles()
927  if len(ca) == 1:
928  xyz = IMP.core.XYZ(ca[0]).get_coordinates()
929  self.coords.append([idx, xyz])
930  elif len(p) == 1:
931  xyz = IMP.core.XYZ(p[0]).get_coordinates()
932  self.coords.append([idx, xyz])
933  else:
934  raise ValueError("_FindCloseStructure: wrong selection")
935 
936  self.coords.sort(key=itemgetter(0))
937 
938  def find_nearest_coord(self, query):
939  if self.coords == []:
940  return None
941  keys = [r[0] for r in self.coords]
942  pos = bisect_left(keys, query)
943  if pos == 0:
944  ret = self.coords[0]
945  elif pos == len(self.coords):
946  ret = self.coords[-1]
947  else:
948  before = self.coords[pos - 1]
949  after = self.coords[pos]
950  if after[0] - query < query - before[0]:
951  ret = after
952  else:
953  ret = before
954  return ret[1]
955 
956 
957 class Sequences(object):
958  """A dictionary-like wrapper for reading and storing sequence data.
959  Keys are FASTA sequence names, and each value a string of one-letter
960  codes."""
961  def __init__(self, fasta_fn, name_map=None):
962  """Read a FASTA file and extract all the requested sequences
963  @param fasta_fn sequence file
964  @param name_map dictionary mapping the FASTA name to final stored name
965  """
966  self.sequences = IMP.pmi.tools.OrderedDict()
967  self.read_sequences(fasta_fn, name_map)
968 
969  def __len__(self):
970  return len(self.sequences)
971 
972  def __contains__(self, x):
973  return x in self.sequences
974 
975  def __getitem__(self, key):
976  if type(key) is int:
977  allseqs = list(self.sequences.keys())
978  try:
979  return self.sequences[allseqs[key]]
980  except IndexError:
981  raise IndexError("You tried to access sequence number %d "
982  "but there's only %d" % (key, len(allseqs)))
983  else:
984  return self.sequences[key]
985 
986  def __iter__(self):
987  return self.sequences.__iter__()
988 
989  def __repr__(self):
990  ret = ''
991  for s in self.sequences:
992  ret += '%s\t%s\n' % (s, self.sequences[s])
993  return ret
994 
995  def read_sequences(self, fasta_fn, name_map=None):
996  code = None
997  seq = None
998  with open(fasta_fn, 'r') as fh:
999  for (num, line) in enumerate(fh):
1000  if line.startswith('>'):
1001  if seq is not None:
1002  self.sequences[code] = seq.strip('*')
1003  code = line.rstrip()[1:]
1004  if name_map is not None:
1005  try:
1006  code = name_map[code]
1007  except KeyError:
1008  pass
1009  seq = ''
1010  else:
1011  line = line.rstrip()
1012  if line: # Skip blank lines
1013  if seq is None:
1014  raise Exception(
1015  "Found FASTA sequence before first header "
1016  "at line %d: %s" % (num + 1, line))
1017  seq += line
1018  if seq is not None:
1019  self.sequences[code] = seq.strip('*')
1020 
1021 
1022 class PDBSequences(object):
1023  """Data structure for reading and storing sequence data from PDBs.
1024 
1025  @see fasta_pdb_alignments."""
1026  def __init__(self, model, pdb_fn, name_map=None):
1027  """Read a PDB file and return all sequences for each contiguous
1028  fragment
1029  @param pdb_fn file
1030  @param name_map dictionary mapping the pdb chain id to final
1031  stored name
1032  """
1033  self.model = model
1034  # self.sequences data-structure: (two-key dictionary)
1035  # it contains all contiguous fragments:
1036  # chain_id, tuples indicating first and last residue, sequence
1037  # example:
1038  # key1, key2, value
1039  # A (77, 149) VENPSLDLEQYAASYSGLMR....
1040  # A (160, 505) PALDTAWVEATRKKALLKLEKLDTDLKNYKGNSIK.....
1041  # B (30, 180) VDLENQYYNSKALKEDDPKAALSSFQKVLELEGEKGEWGF...
1042  # B (192, 443) TQLLEIYALEIQMYTAQKNNKKLKALYEQSLHIKSAIPHPL
1043  self.sequences = IMP.pmi.tools.OrderedDict()
1044  self.read_sequences(pdb_fn, name_map)
1045 
1046  def read_sequences(self, pdb_fn, name_map):
1047  read_file = IMP.atom.read_pdb
1048  if pdb_fn.endswith('.cif'):
1049  read_file = IMP.atom.read_mmcif
1050  t = read_file(pdb_fn, self.model, IMP.atom.ATOMPDBSelector())
1051  cs = IMP.atom.get_by_type(t, IMP.atom.CHAIN_TYPE)
1052  for c in cs:
1053  id = IMP.atom.Chain(c).get_id()
1054  print(id)
1055  if name_map:
1056  try:
1057  id = name_map[id]
1058  except KeyError:
1059  print("Chain ID %s not in name_map, skipping" % id)
1060  continue
1061  rs = IMP.atom.get_by_type(c, IMP.atom.RESIDUE_TYPE)
1062  rids = []
1063  rids_olc_dict = {}
1064  for r in rs:
1065  dr = IMP.atom.Residue(r)
1066  rid = dr.get_index()
1067 
1068  isprotein = dr.get_is_protein()
1069  isrna = dr.get_is_rna()
1070  isdna = dr.get_is_dna()
1071  if isprotein:
1072  olc = IMP.atom.get_one_letter_code(dr.get_residue_type())
1073  rids.append(rid)
1074  rids_olc_dict[rid] = olc
1075  elif isdna:
1076  if dr.get_residue_type() == IMP.atom.DADE:
1077  olc = "A"
1078  if dr.get_residue_type() == IMP.atom.DURA:
1079  olc = "U"
1080  if dr.get_residue_type() == IMP.atom.DCYT:
1081  olc = "C"
1082  if dr.get_residue_type() == IMP.atom.DGUA:
1083  olc = "G"
1084  if dr.get_residue_type() == IMP.atom.DTHY:
1085  olc = "T"
1086  rids.append(rid)
1087  rids_olc_dict[rid] = olc
1088  elif isrna:
1089  if dr.get_residue_type() == IMP.atom.ADE:
1090  olc = "A"
1091  if dr.get_residue_type() == IMP.atom.URA:
1092  olc = "U"
1093  if dr.get_residue_type() == IMP.atom.CYT:
1094  olc = "C"
1095  if dr.get_residue_type() == IMP.atom.GUA:
1096  olc = "G"
1097  if dr.get_residue_type() == IMP.atom.THY:
1098  olc = "T"
1099  rids.append(rid)
1100  rids_olc_dict[rid] = olc
1101  group_rids = self.group_indexes(rids)
1102  contiguous_sequences = IMP.pmi.tools.OrderedDict()
1103  for group in group_rids:
1104  sequence_fragment = ""
1105  for i in range(group[0], group[1]+1):
1106  sequence_fragment += rids_olc_dict[i]
1107  contiguous_sequences[group] = sequence_fragment
1108  self.sequences[id] = contiguous_sequences
1109 
1110  def group_indexes(self, indexes):
1111  from itertools import groupby
1112  ranges = []
1113  for k, g in groupby(enumerate(indexes), lambda x: x[0]-x[1]):
1114  group = [x[1] for x in g]
1115  ranges.append((group[0], group[-1]))
1116  return ranges
1117 
1118 
1119 def fasta_pdb_alignments(fasta_sequences, pdb_sequences, show=False):
1120  '''This function computes and prints the alignment between the
1121  fasta file and the pdb sequence, computes the offsets for each contiguous
1122  fragment in the PDB.
1123  @param fasta_sequences IMP.pmi.topology.Sequences object
1124  @param pdb_sequences IMP.pmi.topology.PDBSequences object
1125  @param show boolean default False, if True prints the alignments.
1126  The input objects should be generated using map_name dictionaries
1127  such that fasta_id
1128  and pdb_chain_id are mapping to the same protein name. It needs BioPython.
1129  Returns a dictionary of offsets, organized by peptide range (group):
1130  example: offsets={"ProtA":{(1,10):1,(20,30):10}}'''
1131  from Bio import pairwise2
1132  from Bio.pairwise2 import format_alignment
1133  if type(fasta_sequences) is not IMP.pmi.topology.Sequences:
1134  raise Exception("Fasta sequences not type IMP.pmi.topology.Sequences")
1135  if type(pdb_sequences) is not IMP.pmi.topology.PDBSequences:
1136  raise Exception("pdb sequences not type IMP.pmi.topology.PDBSequences")
1137  offsets = IMP.pmi.tools.OrderedDict()
1138  for name in fasta_sequences.sequences:
1139  print(name)
1140  seq_fasta = fasta_sequences.sequences[name]
1141  if name not in pdb_sequences.sequences:
1142  print("Fasta id %s not in pdb names, aligning against every "
1143  "pdb chain" % name)
1144  pdbnames = pdb_sequences.sequences.keys()
1145  else:
1146  pdbnames = [name]
1147  for pdbname in pdbnames:
1148  for group in pdb_sequences.sequences[pdbname]:
1149  if group[1] - group[0] + 1 < 7:
1150  continue
1151  seq_frag_pdb = pdb_sequences.sequences[pdbname][group]
1152  if show:
1153  print("########################")
1154  print(" ")
1155  print("protein name", pdbname)
1156  print("fasta id", name)
1157  print("pdb fragment", group)
1158  align = pairwise2.align.localms(seq_fasta, seq_frag_pdb,
1159  2, -1, -.5, -.1)[0]
1160  for a in [align]:
1161  offset = a[3] + 1 - group[0]
1162  if show:
1163  print("alignment sequence start-end",
1164  (a[3] + 1, a[4] + 1))
1165  print("offset from pdb to fasta index", offset)
1166  print(format_alignment(*a))
1167  if name not in offsets:
1168  offsets[pdbname] = {}
1169  if group not in offsets[pdbname]:
1170  offsets[pdbname][group] = offset
1171  else:
1172  if group not in offsets[pdbname]:
1173  offsets[pdbname][group] = offset
1174  return offsets
1175 
1176 
1177 class TempResidue(object):
1178  "Temporarily stores residue information, even without structure available."
1179  # Consider implementing __hash__ so you can select.
1180  def __init__(self, molecule, code, index, internal_index, alphabet):
1181  """setup a TempResidue
1182  @param molecule PMI Molecule to which this residue belongs
1183  @param code one-letter residue type code
1184  @param index PDB index
1185  @param internal_index The number in the sequence
1186  """
1187  # these attributes should be immutable
1188  self.molecule = molecule
1189  self.rtype = alphabet.get_residue_type_from_one_letter_code(code)
1190  self.pdb_index = index
1191  self.internal_index = internal_index
1192  self.copy_index = IMP.atom.Copy(self.molecule.hier).get_copy_index()
1193  self.state_index = \
1194  IMP.atom.State(self.molecule.state.hier).get_state_index()
1195  # these are expected to change
1196  self._structured = False
1197  self.hier = IMP.atom.Residue.setup_particle(
1198  IMP.Particle(molecule.model), self.rtype, index)
1199 
1200  def __str__(self):
1201  return str(self.state_index) + "_" + self.molecule.get_name() + "_" \
1202  + str(self.copy_index) + "_" + self.get_code() \
1203  + str(self.get_index())
1204 
1205  def __repr__(self):
1206  return self.__str__()
1207 
1208  def __key(self):
1209  # this returns the immutable attributes only
1210  return (self.state_index, self.molecule, self.copy_index, self.rtype,
1211  self.pdb_index, self.internal_index)
1212 
1213  def __eq__(self, other):
1214  return type(other) == type(self) and self.__key() == other.__key()
1215 
1216  def __hash__(self):
1217  return hash(self.__key())
1218 
1219  def get_index(self):
1220  return self.pdb_index
1221 
1222  def get_internal_index(self):
1223  return self.internal_index
1224 
1225  def get_code(self):
1226  return IMP.atom.get_one_letter_code(self.get_residue_type())
1227 
1228  def get_residue_type(self):
1229  return self.rtype
1230 
1231  def get_hierarchy(self):
1232  return self.hier
1233 
1234  def get_molecule(self):
1235  return self.molecule
1236 
1237  def get_has_structure(self):
1238  return self._structured
1239 
1240  def set_structure(self, res, soft_check=False):
1241  if res.get_residue_type() != self.get_residue_type():
1242  if (res.get_residue_type() == IMP.atom.MSE
1243  and self.get_residue_type() == IMP.atom.MET):
1244  # MSE in the PDB file is OK to match with MET in the FASTA
1245  # sequence
1246  pass
1247  elif soft_check:
1248  # note from commit a2c13eaa1 we give priority to the
1249  # FASTA and not the PDB
1250  warnings.warn(
1251  'Inconsistency between FASTA sequence and PDB sequence. '
1252  'FASTA type %s %s and PDB type %s'
1253  % (self.get_index(), self.hier.get_residue_type(),
1254  res.get_residue_type()),
1256  self.hier.set_residue_type((self.get_residue_type()))
1257  self.rtype = self.get_residue_type()
1258  else:
1259  raise Exception(
1260  'ERROR: PDB residue index', self.get_index(), 'is',
1261  IMP.atom.get_one_letter_code(res.get_residue_type()),
1262  'and sequence residue is', self.get_code())
1263 
1264  for a in res.get_children():
1265  self.hier.add_child(a)
1266  atype = IMP.atom.Atom(a).get_atom_type()
1267  a.get_particle().set_name(
1268  'Atom %s of residue %i' % (atype.__str__().strip('"'),
1269  self.hier.get_index()))
1270  self._structured = True
1271 
1272 
1273 class TopologyReader(object):
1274  """Automatically setup Sytem and Degrees of Freedom with a formatted
1275  text file.
1276  The file is read in and each part of the topology is stored as a
1277  ComponentTopology object for input into IMP::pmi::macros::BuildSystem.
1278  The topology file should be in a simple pipe-delimited format:
1279  @code{.txt}
1280 |molecule_name|color|fasta_fn|fasta_id|pdb_fn|chain|residue_range|pdb_offset|bead_size|em_residues_per_gaussian|rigid_body|super_rigid_body|chain_of_super_rigid_bodies|flags|
1281 |Rpb1 |blue |1WCM.fasta|1WCM:A|1WCM.pdb|A|1,1140 |0|10|0|1|1,3|1||
1282 |Rpb1 |blue |1WCM.fasta|1WCM:A|1WCM.pdb|A|1141,1274|0|10|0|2|1,3|1||
1283 |Rpb1 |blue |1WCM.fasta|1WCM:A|1WCM.pdb|A|1275,END |0|10|0|3|1,3|1||
1284 |Rpb2 |red |1WCM.fasta|1WCM:B|1WCM.pdb|B|all |0|10|0|4|2,3|2||
1285 |Rpb2.1 |green |1WCM.fasta|1WCM:B|1WCM.pdb|B|all |0|10|0|4|2,3|2||
1286 
1287  @endcode
1288 
1289  These are the fields you can enter:
1290  - `molecule_name`: Name of the molecule (chain). Serves as the parent
1291  hierarchy for this structure. Multiple copies of the same molecule
1292  can be created by appending a copy number after a period; if none is
1293  specified, a copy number of 0 is assumed (e.g. Rpb2.1 is the second copy
1294  of Rpb2 or Rpb2.0).
1295  - `color`: The color used in the output RMF file. Uses
1296  [Chimera names](https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/colortables.html),
1297  (e.g. "red"), or R,G,B values as three comma-separated floating point
1298  numbers from 0 to 1 (e.g. "1.0, 0.0, 0.0") or a 6-digit hex string
1299  starting with '#' (e.g. #ff0000).
1300  - `fasta_fn`: Name of FASTA file containing this component.
1301  - `fasta_id`: String found in FASTA sequence header line. The sequence read
1302  from the file is assumed to be a protein sequence. If it should instead
1303  be treated as RNA or DNA, add an ',RNA' or ',DNA' suffix. For example,
1304  a `fasta_id` of 'myseq,RNA' will read the sequence 'myseq' from the
1305  FASTA file and treat it as RNA.
1306  - `pdb_fn`: Name of PDB or mmCIF file with coordinates (if available).
1307  If left empty, will set up as BEADS (you can also specify "BEADS")
1308  Can also write "IDEAL_HELIX".
1309  - `chain`: Chain ID of this domain in the PDB file.
1310  - `residue_range`: Comma delimited pair defining range.
1311  Can leave empty or use 'all' for entire sequence from PDB file.
1312  The second item in the pair can be END to select the last residue in the
1313  PDB chain.
1314  - `pdb_offset`: Offset to sync PDB residue numbering with FASTA numbering.
1315  For example, an offset of -10 would match the first residue in the
1316  FASTA file (which is always numbered sequentially starting from 1) with
1317  residue 11 in the PDB file.
1318  - `bead_size`: The size (in residues) of beads used to model areas not
1319  covered by PDB coordinates. These will be built automatically.
1320  - `em_residues`: The number of Gaussians used to model the electron
1321  density of this domain. Set to zero if no EM fitting will be done.
1322  The GMM files will be written to <gmm_dir>/<component_name>_<em_res>.txt
1323  (and .mrc)
1324  - `rigid_body`: Leave empty if this object is not in a rigid body.
1325  Otherwise, this is a number corresponding to the rigid body containing
1326  this object. The number itself is just used for grouping things.
1327  - `super_rigid_body`: Add a mover that periodically moves several related
1328  domains as if they were a single large rigid body. In between such moves,
1329  the domains move independently. This can improve sampling.
1330  - `chain_of_super_rigid_bodies`: Do super-rigid-body moves (as above)
1331  for all adjacent pairs of domains in the chain.
1332  - `flags` additional flags for advanced options
1333  @note All filenames are relative to the paths specified in the constructor.
1334 
1335  """ # noqa: E501
1336  def __init__(self, topology_file, pdb_dir='./', fasta_dir='./',
1337  gmm_dir='./'):
1338  """Constructor.
1339  @param topology_file Pipe-delimited file specifying the topology
1340  @param pdb_dir Relative path to the pdb directory
1341  @param fasta_dir Relative path to the fasta directory
1342  @param gmm_dir Relative path to the GMM directory
1343  """
1344  self.topology_file = topology_file
1345  # key=molname, value=TempMolecule
1346  self.molecules = IMP.pmi.tools.OrderedDict()
1347  self.pdb_dir = pdb_dir
1348  self.fasta_dir = fasta_dir
1349  self.gmm_dir = gmm_dir
1350  self._components = self.read(topology_file)
1351 
1352  def write_topology_file(self, outfile):
1353  with open(outfile, "w") as f:
1354  f.write("|molecule_name|color|fasta_fn|fasta_id|pdb_fn|chain|"
1355  "residue_range|pdb_offset|bead_size|"
1356  "em_residues_per_gaussian|rigid_body|super_rigid_body|"
1357  "chain_of_super_rigid_bodies|\n")
1358  for c in self._components:
1359  output = c.get_str()+'\n'
1360  f.write(output)
1361  return outfile
1362 
1363  def get_components(self, topology_list="all"):
1364  """ Return list of ComponentTopologies for selected components
1365  @param topology_list List of indices to return"""
1366  if topology_list == "all":
1367  topologies = self._components
1368  else:
1369  topologies = []
1370  for i in topology_list:
1371  topologies.append(self._components[i])
1372  return topologies
1373 
1374  def get_molecules(self):
1375  return self.molecules
1376 
1377  def read(self, topology_file, append=False):
1378  """Read system components from topology file. append=False will erase
1379  current topology and overwrite with new
1380  """
1381  is_topology = False
1382  is_directories = False
1383  linenum = 1
1384  if not append:
1385  self._components = []
1386 
1387  with open(topology_file) as infile:
1388  for line in infile:
1389  if line.lstrip() == "" or line[0] == "#":
1390  continue
1391  elif line.split('|')[1].strip() in ("molecule_name"):
1392  is_topology = True
1393  is_directories = False
1394  old_format = False
1395  continue
1396  elif line.split('|')[1] == "component_name":
1397  is_topology = True
1399  "Old-style topology format (using "
1400  "|component_name|) is deprecated. Please switch to "
1401  "the new-style format (using |molecule_name|)\n")
1402  old_format = True
1403  is_directories = False
1404  continue
1405  elif line.split('|')[1] == "directories":
1407  "Setting directories in the topology file "
1408  "is deprecated. Please do so through the "
1409  "TopologyReader constructor. Note that new-style "
1410  "paths are relative to the current working "
1411  "directory, not the topology file.\n")
1412  is_directories = True
1413  elif is_directories:
1414  fields = line.split('|')
1415  setattr(self, fields[1],
1416  IMP.get_relative_path(topology_file, fields[2]))
1417  if is_topology:
1418  new_component = self._parse_line(line, linenum, old_format)
1419  self._components.append(new_component)
1420  linenum += 1
1421  return self._components
1422 
1423  def _parse_line(self, component_line, linenum, old_format):
1424  """Parse a line of topology values and matches them to their key.
1425  Checks each value for correct syntax
1426  Returns a list of Component objects
1427  fields:
1428  """
1429  c = _Component()
1430  values = [s.strip() for s in component_line.split('|')]
1431  errors = []
1432 
1433  # Required fields
1434  if old_format:
1435  c.molname = values[1]
1436  c.copyname = ''
1437  c._domain_name = values[2]
1438  c.color = 'blue'
1439  else:
1440  names = values[1].split('.')
1441  if len(names) == 1:
1442  c.molname = names[0]
1443  c.copyname = ''
1444  elif len(names) == 2:
1445  c.molname = names[0]
1446  c.copyname = names[1]
1447  else:
1448  c.molname = names[0]
1449  c.copyname = names[1]
1450  errors.append("Molecule name should be <molecule.copyID>")
1451  errors.append("For component %s line %d "
1452  % (c.molname, linenum))
1453  c._domain_name = c.molname + '.' + c.copyname
1454  colorfields = values[2].split(',')
1455  if len(colorfields) == 3:
1456  c.color = [float(x) for x in colorfields]
1457  if any([x > 1 for x in c.color]):
1458  c.color = [x/255 for x in c.color]
1459  else:
1460  c.color = values[2]
1461  c._orig_fasta_file = values[3]
1462  c.fasta_file = values[3]
1463  fasta_field = values[4].split(",")
1464  c.fasta_id = fasta_field[0]
1465  c.fasta_flag = None
1466  if len(fasta_field) > 1:
1467  c.fasta_flag = fasta_field[1]
1468  c._orig_pdb_input = values[5]
1469  pdb_input = values[5]
1470  tmp_chain = values[6]
1471  rr = values[7]
1472  offset = values[8]
1473  bead_size = values[9]
1474  emg = values[10]
1475  if old_format:
1476  rbs = srbs = csrbs = ''
1477  else:
1478  rbs = values[11]
1479  srbs = values[12]
1480  csrbs = values[13]
1481 
1482  if c.molname not in self.molecules:
1483  self.molecules[c.molname] = _TempMolecule(c)
1484  else:
1485  # COPY OR DOMAIN
1486  c._orig_fasta_file = \
1487  self.molecules[c.molname].orig_component._orig_fasta_file
1488  c.fasta_id = self.molecules[c.molname].orig_component.fasta_id
1489  self.molecules[c.molname].add_component(c, c.copyname)
1490 
1491  # now cleanup input
1492  c.fasta_file = os.path.join(self.fasta_dir, c._orig_fasta_file)
1493  if pdb_input == "":
1494  errors.append("PDB must have BEADS, IDEAL_HELIX, or filename")
1495  errors.append("For component %s line %d is not correct"
1496  "|%s| was given." % (c.molname, linenum, pdb_input))
1497  elif pdb_input in ("IDEAL_HELIX", "BEADS"):
1498  c.pdb_file = pdb_input
1499  else:
1500  c.pdb_file = os.path.join(self.pdb_dir, pdb_input)
1501 
1502  # PDB chain must be one or two characters
1503  if len(tmp_chain) == 1 or len(tmp_chain) == 2:
1504  c.chain = tmp_chain
1505  else:
1506  errors.append(
1507  "PDB Chain identifier must be one or two characters.")
1508  errors.append("For component %s line %d is not correct"
1509  "|%s| was given."
1510  % (c.molname, linenum, tmp_chain))
1511 
1512  # Optional fields
1513  # Residue Range
1514  if rr.strip() == 'all' or str(rr) == "":
1515  c.residue_range = None
1516  elif (len(rr.split(',')) == 2 and self._is_int(rr.split(',')[0]) and
1517  (self._is_int(rr.split(',')[1]) or rr.split(',')[1] == 'END')):
1518  # Make sure that is residue range is given, there are only
1519  # two values and they are integers
1520  c.residue_range = (int(rr.split(',')[0]), rr.split(',')[1])
1521  if c.residue_range[1] != 'END':
1522  c.residue_range = (c.residue_range[0], int(c.residue_range[1]))
1523  # Old format used -1 for the last residue
1524  if old_format and c.residue_range[1] == -1:
1525  c.residue_range = (c.residue_range[0], 'END')
1526  else:
1527  errors.append("Residue Range format for component %s line %d is "
1528  "not correct" % (c.molname, linenum))
1529  errors.append(
1530  "Correct syntax is two comma separated integers: "
1531  "|start_res, end_res|. end_res can also be END to select the "
1532  "last residue in the chain. |%s| was given." % rr)
1533  errors.append("To select all residues, indicate |\"all\"|")
1534 
1535  # PDB Offset
1536  if self._is_int(offset):
1537  c.pdb_offset = int(offset)
1538  elif len(offset) == 0:
1539  c.pdb_offset = 0
1540  else:
1541  errors.append("PDB Offset format for component %s line %d is "
1542  "not correct" % (c.molname, linenum))
1543  errors.append("The value must be a single integer. |%s| was given."
1544  % offset)
1545 
1546  # Bead Size
1547  if self._is_int(bead_size):
1548  c.bead_size = int(bead_size)
1549  elif len(bead_size) == 0:
1550  c.bead_size = 0
1551  else:
1552  errors.append("Bead Size format for component %s line %d is "
1553  "not correct" % (c.molname, linenum))
1554  errors.append("The value must be a single integer. |%s| was given."
1555  % bead_size)
1556 
1557  # EM Residues Per Gaussian
1558  if self._is_int(emg):
1559  if int(emg) > 0:
1560  c.density_prefix = os.path.join(self.gmm_dir,
1561  c.get_unique_name())
1562  c.gmm_file = c.density_prefix + '.txt'
1563  c.mrc_file = c.density_prefix + '.gmm'
1564 
1565  c.em_residues_per_gaussian = int(emg)
1566  else:
1567  c.em_residues_per_gaussian = 0
1568  elif len(emg) == 0:
1569  c.em_residues_per_gaussian = 0
1570  else:
1571  errors.append("em_residues_per_gaussian format for component "
1572  "%s line %d is not correct" % (c.molname, linenum))
1573  errors.append("The value must be a single integer. |%s| was given."
1574  % emg)
1575 
1576  # rigid bodies
1577  if len(rbs) > 0:
1578  if not self._is_int(rbs):
1579  errors.append(
1580  "rigid bodies format for component "
1581  "%s line %d is not correct" % (c.molname, linenum))
1582  errors.append("Each RB must be a single integer, or empty. "
1583  "|%s| was given." % rbs)
1584  c.rigid_body = int(rbs)
1585 
1586  # super rigid bodies
1587  if len(srbs) > 0:
1588  srbs = srbs.split(',')
1589  for i in srbs:
1590  if not self._is_int(i):
1591  errors.append(
1592  "super rigid bodies format for component "
1593  "%s line %d is not correct" % (c.molname, linenum))
1594  errors.append(
1595  "Each SRB must be a single integer. |%s| was given."
1596  % srbs)
1597  c.super_rigid_bodies = srbs
1598 
1599  # chain of super rigid bodies
1600  if len(csrbs) > 0:
1601  if not self._is_int(csrbs):
1602  errors.append(
1603  "em_residues_per_gaussian format for component "
1604  "%s line %d is not correct" % (c.molname, linenum))
1605  errors.append(
1606  "Each CSRB must be a single integer. |%s| was given."
1607  % csrbs)
1608  c.chain_of_super_rigid_bodies = csrbs
1609 
1610  # done
1611  if errors:
1612  raise ValueError("Fix Topology File syntax errors and rerun: "
1613  + "\n".join(errors))
1614  else:
1615  return c
1616 
1617  def set_gmm_dir(self, gmm_dir):
1618  """Change the GMM dir"""
1619  self.gmm_dir = gmm_dir
1620  for c in self._components:
1621  c.gmm_file = os.path.join(self.gmm_dir,
1622  c.get_unique_name() + ".txt")
1623  c.mrc_file = os.path.join(self.gmm_dir,
1624  c.get_unique_name() + ".mrc")
1625  print('new gmm', c.gmm_file)
1626 
1627  def set_pdb_dir(self, pdb_dir):
1628  """Change the PDB dir"""
1629  self.pdb_dir = pdb_dir
1630  for c in self._components:
1631  if c._orig_pdb_input not in ("", "None", "IDEAL_HELIX", "BEADS"):
1632  c.pdb_file = os.path.join(self.pdb_dir, c._orig_pdb_input)
1633 
1634  def set_fasta_dir(self, fasta_dir):
1635  """Change the FASTA dir"""
1636  self.fasta_dir = fasta_dir
1637  for c in self._components:
1638  c.fasta_file = os.path.join(self.fasta_dir, c._orig_fasta_file)
1639 
1640  def _is_int(self, s):
1641  # is this string an integer?
1642  try:
1643  float(s)
1644  return float(s).is_integer()
1645  except ValueError:
1646  return False
1647 
1648  def get_rigid_bodies(self):
1649  """Return list of lists of rigid bodies (as domain name)"""
1650  rbl = defaultdict(list)
1651  for c in self._components:
1652  if c.rigid_body:
1653  rbl[c.rigid_body].append(c.get_unique_name())
1654  return rbl.values()
1655 
1657  """Return list of lists of super rigid bodies (as domain name)"""
1658  rbl = defaultdict(list)
1659  for c in self._components:
1660  for rbnum in c.super_rigid_bodies:
1661  rbl[rbnum].append(c.get_unique_name())
1662  return rbl.values()
1663 
1665  "Return list of lists of chains of super rigid bodies (as domain name)"
1666  rbl = defaultdict(list)
1667  for c in self._components:
1668  for rbnum in c.chain_of_super_rigid_bodies:
1669  rbl[rbnum].append(c.get_unique_name())
1670  return rbl.values()
1671 
1672 
1673 class _TempMolecule(object):
1674  """Store the Components and any requests for copies"""
1675  def __init__(self, init_c):
1676  self.molname = init_c.molname
1677  self.domains = IMP.pmi.tools.OrderedDefaultDict(list)
1678  self.add_component(init_c, init_c.copyname)
1679  self.orig_copyname = init_c.copyname
1680  self.orig_component = self.domains[init_c.copyname][0]
1681 
1682  def add_component(self, component, copy_id):
1683  self.domains[copy_id].append(component)
1684  component.domainnum = len(self.domains[copy_id])-1
1685 
1686  def __repr__(self):
1687  return ','.join('%s:%i'
1688  % (k, len(self.domains[k])) for k in self.domains)
1689 
1690 
1691 class _Component(object):
1692  """Stores the components required to build a standard IMP hierarchy
1693  using IMP.pmi.BuildModel()
1694  """
1695  def __init__(self):
1696  self.molname = None
1697  self.copyname = None
1698  self.domainnum = 0
1699  self.fasta_file = None
1700  self._orig_fasta_file = None
1701  self.fasta_id = None
1702  self.fasta_flag = None
1703  self.pdb_file = None
1704  self._orig_pdb_input = None
1705  self.chain = None
1706  self.residue_range = None
1707  self.pdb_offset = 0
1708  self.bead_size = 10
1709  self.em_residues_per_gaussian = 0
1710  self.gmm_file = ''
1711  self.mrc_file = ''
1712  self.density_prefix = ''
1713  self.color = 0.1
1714  self.rigid_body = None
1715  self.super_rigid_bodies = []
1716  self.chain_of_super_rigid_bodies = []
1717 
1718  def _l2s(self, rng):
1719  return ",".join("%s" % x for x in rng)
1720 
1721  def __repr__(self):
1722  return self.get_str()
1723 
1724  def get_unique_name(self):
1725  return "%s.%s.%i" % (self.molname, self.copyname, self.domainnum)
1726 
1727  def get_str(self):
1728  res_range = self.residue_range
1729  if self.residue_range is None:
1730  res_range = []
1731  name = self.molname
1732  if self.copyname != '':
1733  name += '.' + self.copyname
1734  if self.chain is None:
1735  chain = ' '
1736  else:
1737  chain = self.chain
1738  color = self.color
1739  if isinstance(color, list):
1740  color = ','.join([str(x) for x in color])
1741  fastaid = self.fasta_id
1742  if self.fasta_flag:
1743  fastaid += "," + self.fasta_flag
1744  a = '|' + '|'.join([name, color, self._orig_fasta_file, fastaid,
1745  self._orig_pdb_input, chain,
1746  self._l2s(list(res_range)),
1747  str(self.pdb_offset),
1748  str(self.bead_size),
1749  str(self.em_residues_per_gaussian),
1750  str(self.rigid_body) if self.rigid_body else '',
1751  self._l2s(self.super_rigid_bodies),
1752  self._l2s(self.chain_of_super_rigid_bodies)]) + '|'
1753  return a
1754 
1755 
1757  '''Extends the functionality of IMP.atom.Molecule'''
1758 
1759  def __init__(self, hierarchy):
1760  IMP.atom.Molecule.__init__(self, hierarchy)
1761 
1762  def get_state_index(self):
1763  state = self.get_parent()
1764  return IMP.atom.State(state).get_state_index()
1765 
1766  def get_copy_index(self):
1767  return IMP.atom.Copy(self).get_copy_index()
1768 
1769  def get_extended_name(self):
1770  return self.get_name() + "." + \
1771  str(self.get_copy_index()) + \
1772  "." + str(self.get_state_index())
1773 
1774  def get_sequence(self):
1775  return IMP.atom.Chain(self).get_sequence()
1776 
1777  def get_residue_indexes(self):
1779 
1780  def get_residue_segments(self):
1781  return IMP.pmi.tools.Segments(self.get_residue_indexes())
1782 
1783  def get_chain_id(self):
1784  return IMP.atom.Chain(self).get_id()
1785 
1786  def __repr__(self):
1787  s = 'PMIMoleculeHierarchy '
1788  s += self.get_name()
1789  s += " " + "Copy " + str(IMP.atom.Copy(self).get_copy_index())
1790  s += " " + "State " + str(self.get_state_index())
1791  s += " " + "N residues " + str(len(self.get_sequence()))
1792  return s
def build
Build all molecules (automatically makes clones)
Add mass to a particle.
Definition: Mass.h:23
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: Residue.h:158
def select_at_all_resolutions
Perform selection using the usual keywords but return ALL resolutions (BEADS and GAUSSIANS).
Definition: tools.py:1077
Hierarchy get_parent() const
Get the parent particle.
def get_atomic_residues
Return a set of TempResidues that have associated structure coordinates.
A decorator to associate a particle with a part of a protein/DNA/RNA.
Definition: Fragment.h:20
Extends the functionality of IMP.atom.Molecule.
def get_residues
Return all modeled TempResidues as a set.
std::string get_unique_name(std::string templ)
Return a unique name produced from the string.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: atom/Atom.h:245
static Atom setup_particle(Model *m, ParticleIndex pi, Atom other)
Definition: atom/Atom.h:246
def build
Build all states.
def __init__
Read a FASTA file and extract all the requested sequences.
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
def __init__
Read a PDB file and return all sequences for each contiguous fragment.
def get_states
Get a list of all State objects in this system.
def fasta_pdb_alignments
This function computes and prints the alignment between the fasta file and the pdb sequence...
def get_ideal_helices
Returns list of OrderedSets with requested ideal helices.
Miscellaneous utilities.
Definition: tools.py:1
def get_number_of_copies
Get the number of copies of the given molecule (by name)
def get_chains_of_super_rigid_bodies
Return list of lists of chains of super rigid bodies (as domain name)
def __init__
The user should not call this directly; instead call State.create_molecule()
void handle_use_deprecated(std::string message)
Break in this method in gdb to find deprecated uses at runtime.
def residue_range
Get residue range from a to b, inclusive.
def get_molecule
Access a molecule by name and copy number.
def __init__
Define a new state.
def add_representation
Set the representation for some residues.
static State setup_particle(Model *m, ParticleIndex pi, unsigned int index)
Definition: State.h:39
This class stores integers in ordered compact lists eg: [[1,2,3],[6,7,8]] the methods help splitting ...
Definition: tools.py:622
The type of an atom.
def create_molecule
Create a new Molecule within this State.
def build
Create all parts of the IMP hierarchy including Atoms, Residues, and Fragments/Representations and...
static Residue setup_particle(Model *m, ParticleIndex pi, ResidueType t, int index, int insertion_code)
Definition: Residue.h:160
char get_one_letter_code(ResidueType c)
Get the 1-letter amino acid code from the residue type.
def get_non_atomic_residues
Return a set of TempResidues that don't have associated structure coordinates.
def get_name
Return this Molecule name.
def get_hierarchy
Return the IMP Hierarchy corresponding to this Molecule.
def get_components
Return list of ComponentTopologies for selected components.
def get_hierarchy
Get the IMP.atom.Hierarchy node for this state.
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:86
Stores a named protein chain.
Warning related to handling of structures.
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Fragment.h:46
A decorator for keeping track of copies of a molecule.
Definition: Copy.h:28
Select all non-alternative ATOM records.
Definition: pdb.h:67
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def set_fasta_dir
Change the FASTA dir.
def get_hierarchy
Return the top-level IMP.atom.Hierarchy node for this system.
The standard decorator for manipulating molecular structures.
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
Data structure for reading and storing sequence data from PDBs.
A decorator for a particle representing an atom.
Definition: atom/Atom.h:238
std::string get_relative_path(std::string base, std::string relative)
Return a path to a file relative to another file.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
def create_clone
Create a Molecule clone (automatically builds same structure and representation)
def add_structure
Read a structure and store the coordinates.
int get_state_index(Hierarchy h)
Walk up the hierarchy to find the current state.
def add_protocol_output
Capture details of the modeling protocol.
def get_molecules
Return a dictionary where key is molecule name and value is a list of all copies of that molecule in ...
static Copy setup_particle(Model *m, ParticleIndex pi, Int number)
Create a decorator for the numberth copy.
Definition: Copy.h:42
def read
Read system components from topology file.
def get_state
Return the State containing this Molecule.
A decorator for a residue.
Definition: Residue.h:137
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Automatically setup Sytem and Degrees of Freedom with a formatted text file.
The general base class for IMP exceptions.
Definition: exception.h:48
def get_rigid_bodies
Return list of lists of rigid bodies (as domain name)
Associate an integer "state" index with a hierarchy node.
Definition: State.h:27
VectorD< 3 > Vector3D
Definition: VectorD.h:425
Residue get_residue(Atom d, bool nothrow=false)
Return the Residue containing this atom.
Mapping between FASTA one-letter codes and residue types.
Definition: alphabets.py:1
Class to handle individual particles of a Model object.
Definition: Particle.h:43
Stores a list of Molecules all with the same State index.
def get_represented
Return set of TempResidues that have representation.
Store info for a chain of a protein.
Definition: Chain.h:61
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
Python classes to represent, score, sample and analyze models.
A dictionary-like wrapper for reading and storing sequence data.
def create_copy
Create a new Molecule with the same name and sequence but a higher copy number.
Functionality for loading, creating, manipulating and scoring atomic structures.
std::string get_chain_id(Hierarchy h)
Walk up the hierarchy to determine the chain id.
def get_particles_at_all_resolutions
Helpful utility for getting particles at all resolutions from this molecule.
static Chain setup_particle(Model *m, ParticleIndex pi, std::string id)
Definition: Chain.h:81
A decorator for a molecule.
Definition: Molecule.h:24
Select hierarchy particles identified by the biological name.
Definition: Selection.h:70
def get_number_of_states
Returns the total number of states generated.
def get_super_rigid_bodies
Return list of lists of super rigid bodies (as domain name)
def get_residue_indexes
Retrieve the residue indexes for the given particle.
Definition: tools.py:511
Warning for probably incorrect input parameters.
Temporarily stores residue information, even without structure available.
def create_state
Makes and returns a new IMP.pmi.topology.State in this system.
Store objects in order they were added, but with default type.
Definition: tools.py:831