IMP logo
IMP Reference Guide  2.19.0
The Integrative Modeling Platform
pmi/topology/__init__.py
1 """@namespace IMP.pmi.topology
2  Set of Python classes to create a multi-state, multi-resolution IMP hierarchy.
3 * Start by creating a System with
4  `model = IMP.Model(); s = IMP.pmi.topology.System(model)`. The System
5  will store all the states.
6 * Then call System.create_state(). You can easily create a multistate system
7  by calling this function multiple times.
8 * For each State, call State.create_molecule() to add a Molecule (a uniquely
9  named polymer). This function returns the Molecule object which can be
10  passed to various PMI functions.
11 * Some useful functions to help you set up your Molecules:
12  * Access the sequence residues with slicing (Molecule[a:b]) or functions
13  like Molecule.get_atomic_residues() and Molecule.get_non_atomic_residues().
14  These functions all return Python sets for easy set arithmetic using
15  & (and), | (or), - (difference)
16  * Molecule.add_structure() to add structural information from a PDB file.
17  * Molecule.add_representation() to create a representation unit - here you
18  can choose bead resolutions as well as alternate representations like
19  densities or ideal helices.
20  * Molecule.create_clone() lets you set up a molecule with identical
21  representations, just a different chain ID. Use Molecule.create_copy()
22  if you want a molecule with the same sequence but that allows custom
23  representations.
24 * Once data has been added and representations chosen, call System.build()
25  to create a canonical IMP hierarchy.
26 * Following hierarchy construction, setup rigid bodies, flexible beads, etc
27  in IMP::pmi::dof.
28 * Check your representation with a nice printout:
29  IMP::atom::show_with_representation()
30 
31 See a [comprehensive example](https://integrativemodeling.org/nightly/doc/ref/pmi_2multiscale_8py-example.html) for using these classes.
32 
33 Alternatively one can construct the entire topology and degrees of freedom
34 via formatted text file with TopologyReader and
35 IMP::pmi::macros::BuildSystem(). This is used in the
36 [PMI tutorial](@ref rnapolii_stalk). Note that this only allows a limited
37 set of the full options available to PMI users
38 (rigid bodies only, fixed resolutions).
39 """ # noqa: E501
40 
41 from __future__ import print_function
42 import IMP
43 import IMP.atom
44 import IMP.algebra
45 import IMP.pmi
46 import IMP.pmi.tools
47 import IMP.pmi.alphabets
48 import os
49 import re
50 from collections import defaultdict, namedtuple
51 from . import system_tools
52 from bisect import bisect_left
53 from math import pi, cos, sin
54 from operator import itemgetter
55 import weakref
56 import warnings
57 
58 
59 def _build_ideal_helix(model, residues, coord_finder):
60  """Creates an ideal helix from the specified residue range
61  Residues MUST be contiguous.
62  This function actually adds them to the TempResidue hierarchy
63  """
64  created_hiers = []
65 
66  # this function creates a CAlpha helix structure (which can be used
67  # for coarsening)
68  prev_idx = -9999
69  for n, tempres in enumerate(residues):
70  if tempres.get_has_structure():
71  raise ValueError("You tried to build ideal_helix for a residue "
72  "that already has structure: %s" % tempres)
73  if n > 0 and tempres.get_index() != prev_idx + 1:
74  raise ValueError(
75  "Passed non-contiguous segment to "
76  "build_ideal_helix for %s" % tempres.get_molecule())
77 
78  # New residue particle will replace the TempResidue's existing
79  # (empty) hierarchy
80  rp = IMP.Particle(model)
81  rp.set_name("Residue_%i" % tempres.get_index())
82 
83  # Copy the original residue type and index
84  this_res = IMP.atom.Residue.setup_particle(rp, tempres.get_hierarchy())
85 
86  # Create the CAlpha
87  ap = IMP.Particle(model)
89  x = 2.3 * cos(n * 2 * pi / 3.6)
90  y = 2.3 * sin(n * 2 * pi / 3.6)
91  z = 6.2 / 3.6 / 2 * n * 2 * pi / 3.6
92  d.set_coordinates(IMP.algebra.Vector3D(x, y, z))
93  d.set_radius(1.0)
94  # Decorating as Atom also decorates as Mass
95  a = IMP.atom.Atom.setup_particle(ap, IMP.atom.AT_CA)
96  IMP.atom.Mass(ap).set_mass(110.0)
97  this_res.add_child(a)
98 
99  # Add this structure to the TempResidue
100  tempres.set_structure(this_res)
101  created_hiers.append(this_res)
102  prev_idx = tempres.get_index()
103  # the coord finder is for placing beads (later)
104  coord_finder.add_residues(created_hiers)
105 
106 
107 class _SystemBase(object):
108  """The base class for System, State and Molecule
109  classes. It contains shared functions in common to these classes
110  """
111 
112  def __init__(self, model=None):
113  if model is None:
114  self.model = IMP.Model()
115  else:
116  self.model = model
117 
118  def _create_hierarchy(self):
119  """create a new hierarchy"""
120  tmp_part = IMP.Particle(self.model)
121  return IMP.atom.Hierarchy.setup_particle(tmp_part)
122 
123  def _create_child(self, parent_hierarchy):
124  """create a new hierarchy, set it as child of the input
125  one, and return it"""
126  child_hierarchy = self._create_hierarchy()
127  parent_hierarchy.add_child(child_hierarchy)
128  return child_hierarchy
129 
130  def build(self):
131  """Build the coordinates of the system.
132  Loop through stored(?) hierarchies and set up coordinates!"""
133  pass
134 
135 
136 class _OurWeakRef(object):
137  """A simple wrapper around weakref.ref which can be pickled.
138  Note that we throw the reference away at pickle time. It should
139  be able to be reconstructed from System._all_systems anyway."""
140 
141  def __init__(self, system):
142  self._ref = weakref.ref(system)
143 
144  def __call__(self):
145  if hasattr(self, '_ref'):
146  return self._ref()
147 
148  def __getstate__(self):
149  return None
150 
151 
152 class System(_SystemBase):
153  """Represent the root node of the global IMP.atom.Hierarchy."""
154 
155  _all_systems = weakref.WeakSet()
156 
157  def __init__(self, model=None, name="System"):
158  """Constructor.
159 
160  @param model The IMP::Model in which to construct this system.
161  @param name The name of the top-level hierarchy node.
162  """
163  _SystemBase.__init__(self, model)
164  self._number_of_states = 0
165  self._protocol_output = []
166  self.states = []
167  self.built = False
168 
169  System._all_systems.add(self)
170 
171  # the root hierarchy node
172  self.hier = self._create_hierarchy()
173  self.hier.set_name(name)
174  self.hier._pmi2_system = _OurWeakRef(self)
175 
176  def get_states(self):
177  """Get a list of all State objects in this system"""
178  return self.states
179 
180  def create_state(self):
181  """Makes and returns a new IMP.pmi.topology.State in this system"""
182  self._number_of_states += 1
183  state = State(self, self._number_of_states-1)
184  self.states.append(state)
185  return state
186 
187  def __repr__(self):
188  return self.hier.get_name()
189 
191  """Returns the total number of states generated"""
192  return self._number_of_states
193 
194  def get_hierarchy(self):
195  """Return the top-level IMP.atom.Hierarchy node for this system"""
196  return self.hier
197 
198  def build(self, **kwargs):
199  """Build all states"""
200  if not self.built:
201  for state in self.states:
202  state.build(**kwargs)
203  self.built = True
204  return self.hier
205 
206  def add_protocol_output(self, p):
207  """Capture details of the modeling protocol.
208  @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
209  """
210  self._protocol_output.append(p)
211 # p._each_metadata.append(self._metadata)
212 # p._file_datasets.append(self._file_dataset)
213  for state in self.states:
214  state._add_protocol_output(p, self)
215 
216 
217 class State(_SystemBase):
218  """Stores a list of Molecules all with the same State index.
219  Also stores number of copies of each Molecule for easy selection.
220  """
221  def __init__(self, system, state_index):
222  """Define a new state
223  @param system the PMI System
224  @param state_index the index of the new state
225  @note It's expected that you will not use this constructor directly,
226  but rather create it with System.create_state()
227  """
228  self.model = system.get_hierarchy().get_model()
229  self.system = system
230  self.hier = self._create_child(system.get_hierarchy())
231  self.short_name = self.long_name = "State_" + str(state_index)
232  self.hier.set_name(self.short_name)
233  # key is molecule name. value are the molecule copies!
234  self.molecules = IMP.pmi.tools.OrderedDict()
235  IMP.atom.State.setup_particle(self.hier, state_index)
236  self.built = False
237  self._protocol_output = []
238  for p in system._protocol_output:
239  self._add_protocol_output(p, system)
240 
241  def __repr__(self):
242  return self.system.__repr__()+'.'+self.hier.get_name()
243 
244  def _add_protocol_output(self, p, system):
245  state = p._add_state(self)
246  self._protocol_output.append((p, state))
247  state.model = system.model
248  state.prot = self.hier
249 
250  def get_molecules(self):
251  """Return a dictionary where key is molecule name and value
252  is a list of all copies of that molecule in setup order"""
253  return self.molecules
254 
255  def get_molecule(self, name, copy_num=0):
256  """Access a molecule by name and copy number
257  @param name The molecule name used during setup
258  @param copy_num The copy number based on input order.
259  Default: 0. Set to 'all' to get all copies
260  """
261  if name not in self.molecules:
262  raise KeyError("Could not find molname %s" % name)
263  if copy_num == 'all':
264  return self.molecules[name]
265  else:
266  return self.molecules[name][copy_num]
267 
268  def create_molecule(self, name, sequence='', chain_id='',
269  alphabet=IMP.pmi.alphabets.amino_acid):
270  """Create a new Molecule within this State
271  @param name the name of the molecule (string);
272  it must not be already used
273  @param sequence sequence (string)
274  @param chain_id Chain ID to assign to this molecule
275  @param alphabet Mapping from FASTA codes to residue types
276  """
277  # check whether the molecule name is already assigned
278  if name in self.molecules:
279  raise ValueError('Cannot use a molecule name already used')
280 
281  # check for something that looks like a copy number
282  if re.search(r'\.\d+$', name):
283  warnings.warn(
284  "It is recommended not to end the molecule name with "
285  ".(number) as it may be confused with the copy number "
286  "(the copy number for new molecules is always 0, so to "
287  "select this molecule, use '%s.0'). Use create_clone() or "
288  "create_copy() instead if a copy of an existing molecule "
289  "is desired." % name, IMP.pmi.ParameterWarning)
290 
291  mol = Molecule(self, name, sequence, chain_id, copy_num=0,
292  alphabet=alphabet)
293  self.molecules[name] = [mol]
294  return mol
295 
296  def get_hierarchy(self):
297  """Get the IMP.atom.Hierarchy node for this state"""
298  return self.hier
299 
300  def get_number_of_copies(self, molname):
301  """Get the number of copies of the given molecule (by name)
302 
303  @param molname The name of the molecule
304  """
305  return len(self.molecules[molname])
306 
307  def _register_copy(self, molecule):
308  molname = molecule.get_hierarchy().get_name()
309  self.molecules[molname].append(molecule)
310 
311  def build(self, **kwargs):
312  """Build all molecules (automatically makes clones)"""
313  if not self.built:
314  for molname in self.molecules:
315  for mol in reversed(self.molecules[molname]):
316  mol.build(**kwargs)
317  self.built = True
318  return self.hier
319 
320 
321 # Track residues read from PDB files
322 _PDBElement = namedtuple('PDBElement', ['offset', 'filename', 'chain_id'])
323 
324 
325 class _RepresentationHandler(object):
326  """Handle PMI representation and use it to populate that of any attached
327  ProtocolOutput objects"""
328  def __init__(self, name, pos, pdb_elements):
329  self.name = name
330  self.pos = pos
331  self.last_index = None
332  self.last_pdb_index = None
333  self.pdb_for_residue = {}
334  for residues, pdb in pdb_elements:
335  for r in residues:
336  self.pdb_for_residue[r.get_index()] = pdb
337 
338  def _get_pdb(self, h):
339  """Return a PDBElement if the given hierarchy was read from a
340  PDB file"""
342  rind = IMP.atom.Residue(h).get_index()
343  return self.pdb_for_residue.get(rind, None)
344 
345  def __call__(self, res):
346  """Handle a single residue"""
347  if len(self.pos) == 0:
348  return
349  h = res.hier
350  pi = h.get_particle_index()
351  # Do nothing if we already saw this hierarchy
352  if self.last_index is None or pi != self.last_index:
353  pdb = self._get_pdb(h)
354  self.last_index = pi
355  if pdb:
356  assert IMP.atom.Fragment.get_is_setup(h.get_parent())
357  frag = IMP.atom.Fragment(h.get_parent())
358  fragi = frag.get_particle_index()
359  # Do nothing if we already saw this PDB fragment
360  if self.last_pdb_index is not None \
361  and self.last_pdb_index == fragi:
362  return
363  self.last_pdb_index = fragi
364  indices = frag.get_residue_indexes()
365  for p, state in self.pos:
366  p.add_pdb_element(state, self.name,
367  indices[0], indices[-1], pdb.offset,
368  pdb.filename, pdb.chain_id, frag)
370  frag = IMP.atom.Fragment(h)
371  indices = frag.get_residue_indexes()
372  for p, state in self.pos:
373  p.add_bead_element(state, self.name,
374  indices[0], indices[-1], 1, h)
376  resind = IMP.atom.Residue(h).get_index()
377  for p, state in self.pos:
378  p.add_bead_element(state, self.name, resind, resind, 1, h)
379  else:
380  raise TypeError("Unhandled hierarchy %s" % str(h))
381 
382 
383 class Molecule(_SystemBase):
384  """Stores a named protein chain.
385  This class is constructed from within the State class.
386  It wraps an IMP.atom.Molecule and IMP.atom.Copy.
387  Structure is read using this class.
388  Resolutions and copies can be registered, but are only created
389  when build() is called.
390 
391  A Molecule acts like a simple Python list of residues, and can be indexed
392  by integer (starting at zero) or by string (starting at 1).
393  """
394 
395  def __init__(self, state, name, sequence, chain_id, copy_num,
396  mol_to_clone=None, alphabet=IMP.pmi.alphabets.amino_acid):
397  """The user should not call this directly; instead call
398  State.create_molecule()
399 
400  @param state The parent PMI State
401  @param name The name of the molecule (string)
402  @param sequence Sequence (string)
403  @param chain_id The chain of this molecule
404  @param copy_num Store the copy number
405  @param mol_to_clone The original molecule (for cloning ONLY)
406  @note It's expected that you will not use this constructor directly,
407  but rather create a Molecule with State.create_molecule()
408  """
409  # internal data storage
410  self.model = state.get_hierarchy().get_model()
411  self.state = state
412  self.sequence = sequence
413  self.built = False
414  self.mol_to_clone = mol_to_clone
415  self.alphabet = alphabet
416  self.representations = [] # list of stuff to build
417  self._pdb_elements = []
418  # residues with representation
419  self._represented = IMP.pmi.tools.OrderedSet()
420  # helps you place beads by storing structure
421  self.coord_finder = _FindCloseStructure()
422  # list of OrderedSets of tempresidues set to ideal helix
423  self._ideal_helices = []
424 
425  # create root node and set it as child to passed parent hierarchy
426  self.hier = self._create_child(self.state.get_hierarchy())
427  self.hier.set_name(name)
428  IMP.atom.Copy.setup_particle(self.hier, copy_num)
429  self._name_with_copy = "%s.%d" % (name, copy_num)
430  # store the sequence
431  self.chain = IMP.atom.Chain.setup_particle(self.hier, chain_id)
432  self.chain.set_sequence(self.sequence)
433  self.chain.set_chain_type(alphabet.get_chain_type())
434  # create TempResidues from the sequence (if passed)
435  self.residues = []
436  for ns, s in enumerate(sequence):
437  r = TempResidue(self, s, ns+1, ns, alphabet)
438  self.residues.append(r)
439 
440  def __repr__(self):
441  return self.state.__repr__() + '.' + self.get_name() + '.' + \
442  str(IMP.atom.Copy(self.hier).get_copy_index())
443 
444  def __getitem__(self, val):
445  if isinstance(val, int):
446  return self.residues[val]
447  elif isinstance(val, str):
448  return self.residues[int(val)-1]
449  elif isinstance(val, slice):
450  return IMP.pmi.tools.OrderedSet(self.residues[val])
451  else:
452  raise TypeError("Indexes must be int or str")
453 
454  def get_hierarchy(self):
455  """Return the IMP Hierarchy corresponding to this Molecule"""
456  return self.hier
457 
458  def get_name(self):
459  """Return this Molecule name"""
460  return self.hier.get_name()
461 
462  def get_state(self):
463  """Return the State containing this Molecule"""
464  return self.state
465 
466  def get_ideal_helices(self):
467  """Returns list of OrderedSets with requested ideal helices"""
468  return self._ideal_helices
469 
470  def residue_range(self, a, b, stride=1):
471  """Get residue range from a to b, inclusive.
472  Use integers to get 0-indexing, or strings to get PDB-indexing"""
473  if isinstance(a, int) and isinstance(b, int) \
474  and isinstance(stride, int):
475  return IMP.pmi.tools.OrderedSet(self.residues[a:b+1:stride])
476  elif isinstance(a, str) and isinstance(b, str) \
477  and isinstance(stride, int):
478  return IMP.pmi.tools.OrderedSet(
479  self.residues[int(a)-1:int(b):stride])
480  else:
481  raise TypeError("Range ends must be int or str. "
482  "Stride must be int.")
483 
484  def get_residues(self):
485  """Return all modeled TempResidues as a set"""
486  all_res = IMP.pmi.tools.OrderedSet(self.residues)
487  return all_res
488 
489  def get_represented(self):
490  """Return set of TempResidues that have representation"""
491  return self._represented
492 
494  """Return a set of TempResidues that have associated structure
495  coordinates"""
496  atomic_res = IMP.pmi.tools.OrderedSet()
497  for res in self.residues:
498  if res.get_has_structure():
499  atomic_res.add(res)
500  return atomic_res
501 
503  """Return a set of TempResidues that don't have associated
504  structure coordinates"""
505  non_atomic_res = IMP.pmi.tools.OrderedSet()
506  for res in self.residues:
507  if not res.get_has_structure():
508  non_atomic_res.add(res)
509  return non_atomic_res
510 
511  def create_copy(self, chain_id):
512  """Create a new Molecule with the same name and sequence but a
513  higher copy number. Returns the Molecule. No structure or
514  representation will be copied!
515 
516  @param chain_id Chain ID of the new molecule
517  """
518  mol = Molecule(
519  self.state, self.get_name(), self.sequence, chain_id,
520  copy_num=self.state.get_number_of_copies(self.get_name()))
521  self.state._register_copy(mol)
522  return mol
523 
524  def create_clone(self, chain_id):
525  """Create a Molecule clone (automatically builds same structure
526  and representation)
527 
528  @param chain_id If you want to set the chain ID of the copy
529  to something
530  @note You cannot add structure or representations to a clone!
531  """
532  mol = Molecule(
533  self.state, self.get_name(), self.sequence, chain_id,
534  copy_num=self.state.get_number_of_copies(self.get_name()),
535  mol_to_clone=self)
536  self.state._register_copy(mol)
537  return mol
538 
539  def add_structure(self, pdb_fn, chain_id, res_range=[],
540  offset=0, model_num=None, ca_only=False,
541  soft_check=False):
542  """Read a structure and store the coordinates.
543  @return the atomic residues (as a set)
544  @param pdb_fn The file to read
545  @param chain_id Chain ID to read
546  @param res_range Add only a specific set of residues from the PDB
547  file. res_range[0] is the starting and res_range[1]
548  is the ending residue index.
549  @param offset Apply an offset to the residue indexes of the PDB
550  file. This number is added to the PDB sequence.
551  @param model_num Read multi-model PDB and return that model
552  @param ca_only Only read the CA positions from the PDB file
553  @param soft_check If True, it only warns if there are sequence
554  mismatches between the PDB and the Molecule (FASTA)
555  sequence, and uses the sequence from the PDB.
556  If False (Default), it raises an error when there
557  are sequence mismatches.
558  @note If you are adding structure without a FASTA file, set soft_check
559  to True.
560  """
561  if self.mol_to_clone is not None:
562  raise ValueError('You cannot call add_structure() for a clone')
563 
564  self.pdb_fn = pdb_fn
565 
566  # get IMP.atom.Residues from the pdb file
567  rhs = system_tools.get_structure(self.model, pdb_fn, chain_id,
568  res_range, offset,
569  ca_only=ca_only)
570  self.coord_finder.add_residues(rhs)
571 
572  if len(self.residues) == 0:
573  warnings.warn(
574  "Substituting PDB residue type with FASTA residue type. "
575  "Potentially dangerous.", IMP.pmi.StructureWarning)
576 
577  # Store info for ProtocolOutput usage later
578  self._pdb_elements.append(
579  (rhs, _PDBElement(offset=offset, filename=pdb_fn,
580  chain_id=chain_id)))
581 
582  # load those into TempResidue object
583  # collect integer indexes of atomic residues to return
584  atomic_res = IMP.pmi.tools.OrderedSet()
585  for nrh, rh in enumerate(rhs):
586  pdb_idx = rh.get_index()
587  raw_idx = pdb_idx - 1
588 
589  # add ALA to fill in gaps
590  while len(self.residues) < pdb_idx:
591  r = TempResidue(self, 'A', len(self.residues)+1,
592  len(self.residues),
593  IMP.pmi.alphabets.amino_acid)
594  self.residues.append(r)
595  self.sequence += 'A'
596 
597  internal_res = self.residues[raw_idx]
598  if len(self.sequence) < raw_idx:
599  self.sequence += IMP.atom.get_one_letter_code(
600  rh.get_residue_type())
601  internal_res.set_structure(rh, soft_check)
602  atomic_res.add(internal_res)
603 
604  self.chain.set_sequence(self.sequence)
605  return atomic_res
606 
608  residues=None,
609  resolutions=[],
610  bead_extra_breaks=[],
611  bead_ca_centers=True,
612  bead_default_coord=[0, 0, 0],
613  density_residues_per_component=None,
614  density_prefix=None,
615  density_force_compute=False,
616  density_voxel_size=1.0,
617  setup_particles_as_densities=False,
618  ideal_helix=False,
619  color=None):
620  """Set the representation for some residues. Some options
621  (beads, ideal helix) operate along the backbone. Others (density
622  options) are volumetric.
623  Some of these you can combine e.g., beads+densities or helix+densities
624  See @ref pmi_resolution
625  @param residues Set of PMI TempResidues for adding the representation.
626  Can use Molecule slicing to get these, e.g. mol[a:b]+mol[c:d]
627  If None, will select all residues for this Molecule.
628  @param resolutions Resolutions for beads representations.
629  If structured, will average along backbone, breaking at
630  sequence breaks. If unstructured, will just create beads.
631  Pass an integer or list of integers
632  @param bead_extra_breaks Additional breakpoints for splitting beads.
633  The value can be the 0-ordered position, after which it'll
634  insert the break.
635  Alternatively pass PDB-style (1-ordered) indices as a string.
636  I.e., bead_extra_breaks=[5,25] is the same as ['6','26']
637  @param bead_ca_centers Set to True if you want the resolution=1 beads
638  to be at CA centers (otherwise will average atoms to get
639  center). Defaults to True.
640  @param bead_default_coord Advanced feature. Normally beads are placed
641  at the nearest structure. If no structure provided (like an
642  all bead molecule), the beads go here.
643  @param density_residues_per_component Create density (Gaussian
644  Mixture Model) for these residues. Must also supply
645  density_prefix
646  @param density_prefix Prefix (assuming '.txt') to read components
647  from or write to.
648  If exists, will read unless you set density_force_compute=True.
649  Will also write map (prefix+'.mrc').
650  Must also supply density_residues_per_component.
651  @param density_force_compute Set true to force overwrite density file.
652  @param density_voxel_size Advanced feature. Set larger if densities
653  taking too long to rasterize.
654  Set to 0 if you don't want to create the MRC file
655  @param setup_particles_as_densities Set to True if you want each
656  particle to be its own density.
657  Useful for all-atom models or flexible beads.
658  Mutually exclusive with density_ options
659  @param ideal_helix Create idealized helix structures for these
660  residues at resolution 1.
661  Any other resolutions passed will be coarsened from there.
662  Resolution 0 will not work; you may have to use MODELLER
663  to do that (for now).
664  @param color the color applied to the hierarchies generated.
665  Format options: tuple (r,g,b) with values 0 to 1;
666  float (from 0 to 1, a map from Blue to Green to Red);
667  a [Chimera name](https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/colortables.html);
668  a hex RGB string (e.g. "#ff0000");
669  an IMP.display.Color object
670  @note You cannot call add_representation multiple times for the
671  same residues.
672  """ # noqa: E501
673 
674  # can't customize clones
675  if self.mol_to_clone is not None:
676  raise ValueError(
677  'You cannot call add_representation() for a clone.'
678  ' Maybe use a copy instead.')
679 
680  # format input
681  if residues is None:
682  res = IMP.pmi.tools.OrderedSet(self.residues)
683  elif residues == self:
684  res = IMP.pmi.tools.OrderedSet(self.residues)
685  elif type(residues) is IMP.pmi.topology.TempResidue:
686  res = IMP.pmi.tools.OrderedSet([residues])
687  elif hasattr(residues, '__iter__'):
688  if len(residues) == 0:
689  raise Exception(
690  'You passed an empty set to add_representation')
691  if type(residues) is IMP.pmi.tools.OrderedSet \
692  and type(next(iter(residues))) is TempResidue:
693  res = residues
694  elif (type(residues) is set
695  and type(next(iter(residues))) is TempResidue):
696  res = IMP.pmi.tools.OrderedSet(residues)
697  elif type(residues) is list and type(residues[0]) is TempResidue:
698  res = IMP.pmi.tools.OrderedSet(residues)
699  else:
700  raise Exception("You passed an iterable of something other "
701  "than TempResidue", res)
702  else:
703  raise Exception("add_representation: you must pass a set of "
704  "residues or nothing(=all residues)")
705 
706  # check that each residue has not been represented yet
707  ov = res & self._represented
708  if ov:
709  raise Exception('You have already added representation for ' +
710  self.get_hierarchy().get_name() + ': ' +
711  ov.__repr__())
712  self._represented |= res
713 
714  # check you aren't creating multiple resolutions without structure
715  if not hasattr(resolutions, '__iter__'):
716  if type(resolutions) is int:
717  resolutions = [resolutions]
718  else:
719  raise Exception("you tried to pass resolutions that are not "
720  "int or list-of-int")
721  if len(resolutions) > 1 and not ideal_helix:
722  for r in res:
723  if not r.get_has_structure():
724  raise Exception(
725  'You are creating multiple resolutions for '
726  'unstructured regions. This will have unexpected '
727  'results.')
728 
729  # check density info is consistent
730  if density_residues_per_component or density_prefix:
731  if not density_residues_per_component and density_prefix:
732  raise Exception(
733  'If requesting density, must provide '
734  'density_residues_per_component AND density_prefix')
735  if density_residues_per_component and setup_particles_as_densities:
736  raise Exception(
737  'Cannot create both volumetric density '
738  '(density_residues_per_component) AND '
739  'individual densities (setup_particles_as_densities) '
740  'in the same representation')
741  if len(resolutions) > 1 and setup_particles_as_densities:
742  raise Exception(
743  'You have multiple bead resolutions but are attempting to '
744  'set them all up as individual Densities. '
745  'This could have unexpected results.')
746 
747  # check helix not accompanied by other resolutions
748  # (densities OK though!)
749  if ideal_helix:
750  if 0 in resolutions:
751  raise Exception(
752  "For ideal helices, cannot build resolution 0: "
753  "you have to do that in MODELLER")
754  if 1 not in resolutions:
755  resolutions = [1] + list(resolutions)
756  self._ideal_helices.append(res)
757 
758  # check residues are all part of this molecule:
759  for r in res:
760  if r.get_molecule() != self:
761  raise Exception(
762  'You are adding residues from a different molecule to',
763  self.__repr__())
764 
765  # unify formatting for extra breaks
766  breaks = []
767  for b in bead_extra_breaks:
768  if type(b) == str:
769  breaks.append(int(b)-1)
770  else:
771  breaks.append(b)
772  # store the representation group
773  self.representations.append(_Representation(
774  res, resolutions, breaks, bead_ca_centers, bead_default_coord,
775  density_residues_per_component, density_prefix,
776  density_force_compute, density_voxel_size,
777  setup_particles_as_densities, ideal_helix, color))
778 
779  def _all_protocol_output(self):
780  return self.state._protocol_output
781 
782  def build(self):
783  """Create all parts of the IMP hierarchy
784  including Atoms, Residues, and Fragments/Representations and,
785  finally, Copies.
786  Will only build requested representations.
787  @note Any residues assigned a resolution must have an IMP.atom.Residue
788  hierarchy containing at least a CAlpha. For missing residues,
789  these can be constructed from the PDB file.
790  """
791  if not self.built:
792  # Add molecule name and sequence to any ProtocolOutput objects
793  name = self.hier.get_name()
794  for po, state in self._all_protocol_output():
795  po.create_component(state, name, True,
796  asym_name=self._name_with_copy)
797  po.add_component_sequence(state, name, self.sequence,
798  asym_name=self._name_with_copy,
799  alphabet=self.alphabet)
800  # if requested, clone structure and representations
801  # BEFORE building original
802  if self.mol_to_clone is not None:
803  for nr, r in enumerate(self.mol_to_clone.residues):
804  if r.get_has_structure():
805  clone = IMP.atom.create_clone(r.get_hierarchy())
806  self.residues[nr].set_structure(
807  IMP.atom.Residue(clone), soft_check=True)
808  for old_rep in self.mol_to_clone.representations:
809  new_res = IMP.pmi.tools.OrderedSet()
810  for r in old_rep.residues:
811  new_res.add(self.residues[r.get_internal_index()])
812  self._represented.add(
813  self.residues[r.get_internal_index()])
814  new_rep = _Representation(
815  new_res, old_rep.bead_resolutions,
816  old_rep.bead_extra_breaks, old_rep.bead_ca_centers,
817  old_rep.bead_default_coord,
818  old_rep.density_residues_per_component,
819  old_rep.density_prefix, False,
820  old_rep.density_voxel_size,
821  old_rep.setup_particles_as_densities,
822  old_rep.ideal_helix, old_rep.color)
823  self.representations.append(new_rep)
824  self.coord_finder = self.mol_to_clone.coord_finder
825 
826  # give a warning for all residues that don't have representation
827  no_rep = [r for r in self.residues if r not in self._represented]
828  if len(no_rep) > 0:
829  warnings.warn(
830  'Residues without representation in molecule %s: %s'
831  % (self.get_name(), system_tools.resnums2str(no_rep)),
833 
834  # first build any ideal helices (fills in structure for
835  # the TempResidues)
836  for rep in self.representations:
837  if rep.ideal_helix:
838  _build_ideal_helix(self.model, rep.residues,
839  self.coord_finder)
840 
841  # build all the representations
842  built_reps = []
843 
844  rephandler = _RepresentationHandler(
845  self._name_with_copy, list(self._all_protocol_output()),
846  self._pdb_elements)
847 
848  for rep in self.representations:
849  built_reps += system_tools.build_representation(
850  self, rep, self.coord_finder, rephandler)
851 
852  # sort them before adding as children
853  built_reps.sort(
854  key=lambda r: IMP.atom.Fragment(r).get_residue_indexes()[0])
855  for br in built_reps:
856  self.hier.add_child(br)
857  br.update_parents()
858  self.built = True
859 
860  for res in self.residues:
861  # first off, store the highest resolution available
862  # in residue.hier
863  new_ps = IMP.atom.Selection(
864  self.hier,
865  residue_index=res.get_index(),
866  resolution=1).get_selected_particles()
867  if len(new_ps) > 0:
868  new_p = new_ps[0]
869  if IMP.atom.Atom.get_is_setup(new_p):
870  # if only found atomic, store the residue
871  new_hier = IMP.atom.get_residue(IMP.atom.Atom(new_p))
872  else:
873  # otherwise just store what you found
874  new_hier = IMP.atom.Hierarchy(new_p)
875  res.hier = new_hier
876  rephandler(res)
877  else:
878  res.hier = None
879  self._represented = IMP.pmi.tools.OrderedSet(
880  [a for a in self._represented])
881  print('done building', self.get_hierarchy())
882  return self.hier
883 
884  def get_particles_at_all_resolutions(self, residue_indexes=None):
885  """Helpful utility for getting particles at all resolutions from
886  this molecule. Can optionally pass a set of residue indexes"""
887  if not self.built:
888  raise Exception(
889  "Cannot get all resolutions until you build the Molecule")
890  if residue_indexes is None:
891  residue_indexes = [r.get_index() for r in self.get_residues()]
893  self.get_hierarchy(), residue_indexes=residue_indexes)
894  return ps
895 
896 
897 class _Representation(object):
898  """Private class just to store a representation request"""
899  def __init__(self,
900  residues,
901  bead_resolutions,
902  bead_extra_breaks,
903  bead_ca_centers,
904  bead_default_coord,
905  density_residues_per_component,
906  density_prefix,
907  density_force_compute,
908  density_voxel_size,
909  setup_particles_as_densities,
910  ideal_helix,
911  color):
912  self.residues = residues
913  self.bead_resolutions = bead_resolutions
914  self.bead_extra_breaks = bead_extra_breaks
915  self.bead_ca_centers = bead_ca_centers
916  self.bead_default_coord = bead_default_coord
917  self.density_residues_per_component = density_residues_per_component
918  self.density_prefix = density_prefix
919  self.density_force_compute = density_force_compute
920  self.density_voxel_size = density_voxel_size
921  self.setup_particles_as_densities = setup_particles_as_densities
922  self.ideal_helix = ideal_helix
923  self.color = color
924 
925 
926 class _FindCloseStructure(object):
927  """Utility to get the nearest observed coordinate"""
928  def __init__(self):
929  self.coords = []
930 
931  def add_residues(self, residues):
932  for r in residues:
933  idx = IMP.atom.Residue(r).get_index()
934  catypes = [IMP.atom.AT_CA, system_tools._AT_HET_CA]
935  ca = IMP.atom.Selection(
936  r, atom_types=catypes).get_selected_particles()
937  p = IMP.atom.Selection(
938  r, atom_type=IMP.atom.AtomType("P")).get_selected_particles()
939  if len(ca) == 1:
940  xyz = IMP.core.XYZ(ca[0]).get_coordinates()
941  self.coords.append([idx, xyz])
942  elif len(p) == 1:
943  xyz = IMP.core.XYZ(p[0]).get_coordinates()
944  self.coords.append([idx, xyz])
945  else:
946  raise ValueError("_FindCloseStructure: wrong selection")
947 
948  self.coords.sort(key=itemgetter(0))
949 
950  def find_nearest_coord(self, query):
951  if self.coords == []:
952  return None
953  keys = [r[0] for r in self.coords]
954  pos = bisect_left(keys, query)
955  if pos == 0:
956  ret = self.coords[0]
957  elif pos == len(self.coords):
958  ret = self.coords[-1]
959  else:
960  before = self.coords[pos - 1]
961  after = self.coords[pos]
962  if after[0] - query < query - before[0]:
963  ret = after
964  else:
965  ret = before
966  return ret[1]
967 
968 
969 class Sequences(object):
970  """A dictionary-like wrapper for reading and storing sequence data.
971  Keys are FASTA sequence names, and each value a string of one-letter
972  codes."""
973  def __init__(self, fasta_fn, name_map=None):
974  """Read a FASTA file and extract all the requested sequences
975  @param fasta_fn sequence file
976  @param name_map dictionary mapping the FASTA name to final stored name
977  """
978  self.sequences = IMP.pmi.tools.OrderedDict()
979  self.read_sequences(fasta_fn, name_map)
980 
981  def __len__(self):
982  return len(self.sequences)
983 
984  def __contains__(self, x):
985  return x in self.sequences
986 
987  def __getitem__(self, key):
988  if type(key) is int:
989  allseqs = list(self.sequences.keys())
990  try:
991  return self.sequences[allseqs[key]]
992  except IndexError:
993  raise IndexError("You tried to access sequence number %d "
994  "but there's only %d" % (key, len(allseqs)))
995  else:
996  return self.sequences[key]
997 
998  def __iter__(self):
999  return self.sequences.__iter__()
1000 
1001  def __repr__(self):
1002  ret = ''
1003  for s in self.sequences:
1004  ret += '%s\t%s\n' % (s, self.sequences[s])
1005  return ret
1006 
1007  def read_sequences(self, fasta_fn, name_map=None):
1008  code = None
1009  seq = None
1010  with open(fasta_fn, 'r') as fh:
1011  for (num, line) in enumerate(fh):
1012  if line.startswith('>'):
1013  if seq is not None:
1014  self.sequences[code] = seq.strip('*')
1015  code = line.rstrip()[1:]
1016  if name_map is not None:
1017  try:
1018  code = name_map[code]
1019  except KeyError:
1020  pass
1021  seq = ''
1022  else:
1023  line = line.rstrip()
1024  if line: # Skip blank lines
1025  if seq is None:
1026  raise Exception(
1027  "Found FASTA sequence before first header "
1028  "at line %d: %s" % (num + 1, line))
1029  seq += line
1030  if seq is not None:
1031  self.sequences[code] = seq.strip('*')
1032 
1033 
1034 class PDBSequences(object):
1035  """Data structure for reading and storing sequence data from PDBs.
1036 
1037  @see fasta_pdb_alignments."""
1038  def __init__(self, model, pdb_fn, name_map=None):
1039  """Read a PDB file and return all sequences for each contiguous
1040  fragment
1041  @param pdb_fn file
1042  @param name_map dictionary mapping the pdb chain id to final
1043  stored name
1044  """
1045  self.model = model
1046  # self.sequences data-structure: (two-key dictionary)
1047  # it contains all contiguous fragments:
1048  # chain_id, tuples indicating first and last residue, sequence
1049  # example:
1050  # key1, key2, value
1051  # A (77, 149) VENPSLDLEQYAASYSGLMR....
1052  # A (160, 505) PALDTAWVEATRKKALLKLEKLDTDLKNYKGNSIK.....
1053  # B (30, 180) VDLENQYYNSKALKEDDPKAALSSFQKVLELEGEKGEWGF...
1054  # B (192, 443) TQLLEIYALEIQMYTAQKNNKKLKALYEQSLHIKSAIPHPL
1055  self.sequences = IMP.pmi.tools.OrderedDict()
1056  self.read_sequences(pdb_fn, name_map)
1057 
1058  def read_sequences(self, pdb_fn, name_map):
1059  read_file = IMP.atom.read_pdb
1060  if pdb_fn.endswith('.cif'):
1061  read_file = IMP.atom.read_mmcif
1062  t = read_file(pdb_fn, self.model, IMP.atom.ATOMPDBSelector())
1063  cs = IMP.atom.get_by_type(t, IMP.atom.CHAIN_TYPE)
1064  for c in cs:
1065  id = IMP.atom.Chain(c).get_id()
1066  print(id)
1067  if name_map:
1068  try:
1069  id = name_map[id]
1070  except KeyError:
1071  print("Chain ID %s not in name_map, skipping" % id)
1072  continue
1073  rs = IMP.atom.get_by_type(c, IMP.atom.RESIDUE_TYPE)
1074  rids = []
1075  rids_olc_dict = {}
1076  for r in rs:
1077  dr = IMP.atom.Residue(r)
1078  rid = dr.get_index()
1079 
1080  isprotein = dr.get_is_protein()
1081  isrna = dr.get_is_rna()
1082  isdna = dr.get_is_dna()
1083  if isprotein:
1084  olc = IMP.atom.get_one_letter_code(dr.get_residue_type())
1085  rids.append(rid)
1086  rids_olc_dict[rid] = olc
1087  elif isdna:
1088  if dr.get_residue_type() == IMP.atom.DADE:
1089  olc = "A"
1090  if dr.get_residue_type() == IMP.atom.DURA:
1091  olc = "U"
1092  if dr.get_residue_type() == IMP.atom.DCYT:
1093  olc = "C"
1094  if dr.get_residue_type() == IMP.atom.DGUA:
1095  olc = "G"
1096  if dr.get_residue_type() == IMP.atom.DTHY:
1097  olc = "T"
1098  rids.append(rid)
1099  rids_olc_dict[rid] = olc
1100  elif isrna:
1101  if dr.get_residue_type() == IMP.atom.ADE:
1102  olc = "A"
1103  if dr.get_residue_type() == IMP.atom.URA:
1104  olc = "U"
1105  if dr.get_residue_type() == IMP.atom.CYT:
1106  olc = "C"
1107  if dr.get_residue_type() == IMP.atom.GUA:
1108  olc = "G"
1109  if dr.get_residue_type() == IMP.atom.THY:
1110  olc = "T"
1111  rids.append(rid)
1112  rids_olc_dict[rid] = olc
1113  group_rids = self.group_indexes(rids)
1114  contiguous_sequences = IMP.pmi.tools.OrderedDict()
1115  for group in group_rids:
1116  sequence_fragment = ""
1117  for i in range(group[0], group[1]+1):
1118  sequence_fragment += rids_olc_dict[i]
1119  contiguous_sequences[group] = sequence_fragment
1120  self.sequences[id] = contiguous_sequences
1121 
1122  def group_indexes(self, indexes):
1123  from itertools import groupby
1124  ranges = []
1125  for k, g in groupby(enumerate(indexes), lambda x: x[0]-x[1]):
1126  group = [x[1] for x in g]
1127  ranges.append((group[0], group[-1]))
1128  return ranges
1129 
1130 
1131 def fasta_pdb_alignments(fasta_sequences, pdb_sequences, show=False):
1132  '''This function computes and prints the alignment between the
1133  fasta file and the pdb sequence, computes the offsets for each contiguous
1134  fragment in the PDB.
1135  @param fasta_sequences IMP.pmi.topology.Sequences object
1136  @param pdb_sequences IMP.pmi.topology.PDBSequences object
1137  @param show boolean default False, if True prints the alignments.
1138  The input objects should be generated using map_name dictionaries
1139  such that fasta_id
1140  and pdb_chain_id are mapping to the same protein name. It needs BioPython.
1141  Returns a dictionary of offsets, organized by peptide range (group):
1142  example: offsets={"ProtA":{(1,10):1,(20,30):10}}'''
1143  from Bio import pairwise2
1144  from Bio.pairwise2 import format_alignment
1145  if type(fasta_sequences) is not IMP.pmi.topology.Sequences:
1146  raise Exception("Fasta sequences not type IMP.pmi.topology.Sequences")
1147  if type(pdb_sequences) is not IMP.pmi.topology.PDBSequences:
1148  raise Exception("pdb sequences not type IMP.pmi.topology.PDBSequences")
1149  offsets = IMP.pmi.tools.OrderedDict()
1150  for name in fasta_sequences.sequences:
1151  print(name)
1152  seq_fasta = fasta_sequences.sequences[name]
1153  if name not in pdb_sequences.sequences:
1154  print("Fasta id %s not in pdb names, aligning against every "
1155  "pdb chain" % name)
1156  pdbnames = pdb_sequences.sequences.keys()
1157  else:
1158  pdbnames = [name]
1159  for pdbname in pdbnames:
1160  for group in pdb_sequences.sequences[pdbname]:
1161  if group[1] - group[0] + 1 < 7:
1162  continue
1163  seq_frag_pdb = pdb_sequences.sequences[pdbname][group]
1164  if show:
1165  print("########################")
1166  print(" ")
1167  print("protein name", pdbname)
1168  print("fasta id", name)
1169  print("pdb fragment", group)
1170  align = pairwise2.align.localms(seq_fasta, seq_frag_pdb,
1171  2, -1, -.5, -.1)[0]
1172  for a in [align]:
1173  offset = a[3] + 1 - group[0]
1174  if show:
1175  print("alignment sequence start-end",
1176  (a[3] + 1, a[4] + 1))
1177  print("offset from pdb to fasta index", offset)
1178  print(format_alignment(*a))
1179  if name not in offsets:
1180  offsets[pdbname] = {}
1181  if group not in offsets[pdbname]:
1182  offsets[pdbname][group] = offset
1183  else:
1184  if group not in offsets[pdbname]:
1185  offsets[pdbname][group] = offset
1186  return offsets
1187 
1188 
1189 class TempResidue(object):
1190  "Temporarily stores residue information, even without structure available."
1191  # Consider implementing __hash__ so you can select.
1192  def __init__(self, molecule, code, index, internal_index, alphabet):
1193  """setup a TempResidue
1194  @param molecule PMI Molecule to which this residue belongs
1195  @param code one-letter residue type code
1196  @param index PDB index
1197  @param internal_index The number in the sequence
1198  """
1199  # these attributes should be immutable
1200  self.molecule = molecule
1201  self.rtype = alphabet.get_residue_type_from_one_letter_code(code)
1202  self.pdb_index = index
1203  self.internal_index = internal_index
1204  self.copy_index = IMP.atom.Copy(self.molecule.hier).get_copy_index()
1205  self.state_index = \
1206  IMP.atom.State(self.molecule.state.hier).get_state_index()
1207  # these are expected to change
1208  self._structured = False
1209  self.hier = IMP.atom.Residue.setup_particle(
1210  IMP.Particle(molecule.model), self.rtype, index)
1211 
1212  def __str__(self):
1213  return str(self.state_index) + "_" + self.molecule.get_name() + "_" \
1214  + str(self.copy_index) + "_" + self.get_code() \
1215  + str(self.get_index())
1216 
1217  def __repr__(self):
1218  return self.__str__()
1219 
1220  def __key(self):
1221  # this returns the immutable attributes only
1222  return (self.state_index, self.molecule, self.copy_index, self.rtype,
1223  self.pdb_index, self.internal_index)
1224 
1225  def __eq__(self, other):
1226  return type(other) == type(self) and self.__key() == other.__key()
1227 
1228  def __hash__(self):
1229  return hash(self.__key())
1230 
1231  def get_index(self):
1232  return self.pdb_index
1233 
1234  def get_internal_index(self):
1235  return self.internal_index
1236 
1237  def get_code(self):
1238  return IMP.atom.get_one_letter_code(self.get_residue_type())
1239 
1240  def get_residue_type(self):
1241  return self.rtype
1242 
1243  def get_hierarchy(self):
1244  return self.hier
1245 
1246  def get_molecule(self):
1247  return self.molecule
1248 
1249  def get_has_structure(self):
1250  return self._structured
1251 
1252  def set_structure(self, res, soft_check=False):
1253  if res.get_residue_type() != self.get_residue_type():
1254  if (res.get_residue_type() == IMP.atom.MSE
1255  and self.get_residue_type() == IMP.atom.MET):
1256  # MSE in the PDB file is OK to match with MET in the FASTA
1257  # sequence
1258  pass
1259  elif soft_check:
1260  # note from commit a2c13eaa1 we give priority to the
1261  # FASTA and not the PDB
1262  warnings.warn(
1263  'Inconsistency between FASTA sequence and PDB sequence. '
1264  'FASTA type %s %s and PDB type %s'
1265  % (self.get_index(), self.hier.get_residue_type(),
1266  res.get_residue_type()),
1268  self.hier.set_residue_type((self.get_residue_type()))
1269  self.rtype = self.get_residue_type()
1270  else:
1271  raise Exception(
1272  'ERROR: PDB residue index', self.get_index(), 'is',
1273  IMP.atom.get_one_letter_code(res.get_residue_type()),
1274  'and sequence residue is', self.get_code())
1275 
1276  for a in res.get_children():
1277  self.hier.add_child(a)
1278  atype = IMP.atom.Atom(a).get_atom_type()
1279  a.get_particle().set_name(
1280  'Atom %s of residue %i' % (atype.__str__().strip('"'),
1281  self.hier.get_index()))
1282  self._structured = True
1283 
1284 
1285 class TopologyReader(object):
1286  """Automatically setup Sytem and Degrees of Freedom with a formatted
1287  text file.
1288  The file is read in and each part of the topology is stored as a
1289  ComponentTopology object for input into IMP::pmi::macros::BuildSystem.
1290  The topology file should be in a simple pipe-delimited format:
1291  @code{.txt}
1292 |molecule_name|color|fasta_fn|fasta_id|pdb_fn|chain|residue_range|pdb_offset|bead_size|em_residues_per_gaussian|rigid_body|super_rigid_body|chain_of_super_rigid_bodies|flags|
1293 |Rpb1 |blue |1WCM.fasta|1WCM:A|1WCM.pdb|A|1,1140 |0|10|0|1|1,3|1||
1294 |Rpb1 |blue |1WCM.fasta|1WCM:A|1WCM.pdb|A|1141,1274|0|10|0|2|1,3|1||
1295 |Rpb1 |blue |1WCM.fasta|1WCM:A|1WCM.pdb|A|1275,END |0|10|0|3|1,3|1||
1296 |Rpb2 |red |1WCM.fasta|1WCM:B|1WCM.pdb|B|all |0|10|0|4|2,3|2||
1297 |Rpb2.1 |green |1WCM.fasta|1WCM:B|1WCM.pdb|B|all |0|10|0|4|2,3|2||
1298 
1299  @endcode
1300 
1301  These are the fields you can enter:
1302  - `molecule_name`: Name of the molecule (chain). Serves as the parent
1303  hierarchy for this structure. Multiple copies of the same molecule
1304  can be created by appending a copy number after a period; if none is
1305  specified, a copy number of 0 is assumed (e.g. Rpb2.1 is the second copy
1306  of Rpb2 or Rpb2.0).
1307  - `color`: The color used in the output RMF file. Uses
1308  [Chimera names](https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/colortables.html),
1309  (e.g. "red"), or R,G,B values as three comma-separated floating point
1310  numbers from 0 to 1 (e.g. "1.0, 0.0, 0.0") or a 6-digit hex string
1311  starting with '#' (e.g. #ff0000).
1312  - `fasta_fn`: Name of FASTA file containing this component.
1313  - `fasta_id`: String found in FASTA sequence header line. The sequence read
1314  from the file is assumed to be a protein sequence. If it should instead
1315  be treated as RNA or DNA, add an ',RNA' or ',DNA' suffix. For example,
1316  a `fasta_id` of 'myseq,RNA' will read the sequence 'myseq' from the
1317  FASTA file and treat it as RNA.
1318  - `pdb_fn`: Name of PDB or mmCIF file with coordinates (if available).
1319  If left empty, will set up as BEADS (you can also specify "BEADS")
1320  Can also write "IDEAL_HELIX".
1321  - `chain`: Chain ID of this domain in the PDB file.
1322  - `residue_range`: Comma delimited pair defining range.
1323  Can leave empty or use 'all' for entire sequence from PDB file.
1324  The second item in the pair can be END to select the last residue in the
1325  PDB chain.
1326  - `pdb_offset`: Offset to sync PDB residue numbering with FASTA numbering.
1327  For example, an offset of -10 would match the first residue in the
1328  FASTA file (which is always numbered sequentially starting from 1) with
1329  residue 11 in the PDB file.
1330  - `bead_size`: The size (in residues) of beads used to model areas not
1331  covered by PDB coordinates. These will be built automatically.
1332  - `em_residues`: The number of Gaussians used to model the electron
1333  density of this domain. Set to zero if no EM fitting will be done.
1334  The GMM files will be written to <gmm_dir>/<component_name>_<em_res>.txt
1335  (and .mrc)
1336  - `rigid_body`: Leave empty if this object is not in a rigid body.
1337  Otherwise, this is a number corresponding to the rigid body containing
1338  this object. The number itself is just used for grouping things.
1339  - `super_rigid_body`: Add a mover that periodically moves several related
1340  domains as if they were a single large rigid body. In between such moves,
1341  the domains move independently. This can improve sampling.
1342  - `chain_of_super_rigid_bodies`: Do super-rigid-body moves (as above)
1343  for all adjacent pairs of domains in the chain.
1344  - `flags` additional flags for advanced options
1345  @note All filenames are relative to the paths specified in the constructor.
1346 
1347  """ # noqa: E501
1348  def __init__(self, topology_file, pdb_dir='./', fasta_dir='./',
1349  gmm_dir='./'):
1350  """Constructor.
1351  @param topology_file Pipe-delimited file specifying the topology
1352  @param pdb_dir Relative path to the pdb directory
1353  @param fasta_dir Relative path to the fasta directory
1354  @param gmm_dir Relative path to the GMM directory
1355  """
1356  self.topology_file = topology_file
1357  # key=molname, value=TempMolecule
1358  self.molecules = IMP.pmi.tools.OrderedDict()
1359  self.pdb_dir = pdb_dir
1360  self.fasta_dir = fasta_dir
1361  self.gmm_dir = gmm_dir
1362  self._components = self.read(topology_file)
1363 
1364  def write_topology_file(self, outfile):
1365  with open(outfile, "w") as f:
1366  f.write("|molecule_name|color|fasta_fn|fasta_id|pdb_fn|chain|"
1367  "residue_range|pdb_offset|bead_size|"
1368  "em_residues_per_gaussian|rigid_body|super_rigid_body|"
1369  "chain_of_super_rigid_bodies|\n")
1370  for c in self._components:
1371  output = c.get_str()+'\n'
1372  f.write(output)
1373  return outfile
1374 
1375  def get_components(self, topology_list="all"):
1376  """ Return list of ComponentTopologies for selected components
1377  @param topology_list List of indices to return"""
1378  if topology_list == "all":
1379  topologies = self._components
1380  else:
1381  topologies = []
1382  for i in topology_list:
1383  topologies.append(self._components[i])
1384  return topologies
1385 
1386  def get_molecules(self):
1387  return self.molecules
1388 
1389  def read(self, topology_file, append=False):
1390  """Read system components from topology file. append=False will erase
1391  current topology and overwrite with new
1392  """
1393  is_topology = False
1394  is_directories = False
1395  linenum = 1
1396  if not append:
1397  self._components = []
1398 
1399  with open(topology_file) as infile:
1400  for line in infile:
1401  if line.lstrip() == "" or line[0] == "#":
1402  continue
1403  elif line.split('|')[1].strip() in ("molecule_name"):
1404  is_topology = True
1405  is_directories = False
1406  old_format = False
1407  continue
1408  elif line.split('|')[1] == "component_name":
1409  is_topology = True
1411  "Old-style topology format (using "
1412  "|component_name|) is deprecated. Please switch to "
1413  "the new-style format (using |molecule_name|)\n")
1414  old_format = True
1415  is_directories = False
1416  continue
1417  elif line.split('|')[1] == "directories":
1419  "Setting directories in the topology file "
1420  "is deprecated. Please do so through the "
1421  "TopologyReader constructor. Note that new-style "
1422  "paths are relative to the current working "
1423  "directory, not the topology file.\n")
1424  is_directories = True
1425  elif is_directories:
1426  fields = line.split('|')
1427  setattr(self, fields[1],
1428  IMP.get_relative_path(topology_file, fields[2]))
1429  if is_topology:
1430  new_component = self._parse_line(line, linenum, old_format)
1431  self._components.append(new_component)
1432  linenum += 1
1433  return self._components
1434 
1435  def _parse_line(self, component_line, linenum, old_format):
1436  """Parse a line of topology values and matches them to their key.
1437  Checks each value for correct syntax
1438  Returns a list of Component objects
1439  fields:
1440  """
1441  c = _Component()
1442  values = [s.strip() for s in component_line.split('|')]
1443  errors = []
1444 
1445  # Required fields
1446  if old_format:
1447  c.molname = values[1]
1448  c.copyname = ''
1449  c._domain_name = values[2]
1450  c.color = 'blue'
1451  else:
1452  names = values[1].split('.')
1453  if len(names) == 1:
1454  c.molname = names[0]
1455  c.copyname = ''
1456  elif len(names) == 2:
1457  c.molname = names[0]
1458  c.copyname = names[1]
1459  else:
1460  c.molname = names[0]
1461  c.copyname = names[1]
1462  errors.append("Molecule name should be <molecule.copyID>")
1463  errors.append("For component %s line %d "
1464  % (c.molname, linenum))
1465  c._domain_name = c.molname + '.' + c.copyname
1466  colorfields = values[2].split(',')
1467  if len(colorfields) == 3:
1468  c.color = [float(x) for x in colorfields]
1469  if any([x > 1 for x in c.color]):
1470  c.color = [x/255 for x in c.color]
1471  else:
1472  c.color = values[2]
1473  c._orig_fasta_file = values[3]
1474  c.fasta_file = values[3]
1475  fasta_field = values[4].split(",")
1476  c.fasta_id = fasta_field[0]
1477  c.fasta_flag = None
1478  if len(fasta_field) > 1:
1479  c.fasta_flag = fasta_field[1]
1480  c._orig_pdb_input = values[5]
1481  pdb_input = values[5]
1482  tmp_chain = values[6]
1483  rr = values[7]
1484  offset = values[8]
1485  bead_size = values[9]
1486  emg = values[10]
1487  if old_format:
1488  rbs = srbs = csrbs = ''
1489  else:
1490  rbs = values[11]
1491  srbs = values[12]
1492  csrbs = values[13]
1493 
1494  if c.molname not in self.molecules:
1495  self.molecules[c.molname] = _TempMolecule(c)
1496  else:
1497  # COPY OR DOMAIN
1498  c._orig_fasta_file = \
1499  self.molecules[c.molname].orig_component._orig_fasta_file
1500  c.fasta_id = self.molecules[c.molname].orig_component.fasta_id
1501  self.molecules[c.molname].add_component(c, c.copyname)
1502 
1503  # now cleanup input
1504  c.fasta_file = os.path.join(self.fasta_dir, c._orig_fasta_file)
1505  if pdb_input == "":
1506  errors.append("PDB must have BEADS, IDEAL_HELIX, or filename")
1507  errors.append("For component %s line %d is not correct"
1508  "|%s| was given." % (c.molname, linenum, pdb_input))
1509  elif pdb_input in ("IDEAL_HELIX", "BEADS"):
1510  c.pdb_file = pdb_input
1511  else:
1512  c.pdb_file = os.path.join(self.pdb_dir, pdb_input)
1513 
1514  # PDB chain must be one or two characters
1515  if len(tmp_chain) == 1 or len(tmp_chain) == 2:
1516  c.chain = tmp_chain
1517  else:
1518  errors.append(
1519  "PDB Chain identifier must be one or two characters.")
1520  errors.append("For component %s line %d is not correct"
1521  "|%s| was given."
1522  % (c.molname, linenum, tmp_chain))
1523 
1524  # Optional fields
1525  # Residue Range
1526  if rr.strip() == 'all' or str(rr) == "":
1527  c.residue_range = None
1528  elif (len(rr.split(',')) == 2 and self._is_int(rr.split(',')[0]) and
1529  (self._is_int(rr.split(',')[1]) or rr.split(',')[1] == 'END')):
1530  # Make sure that is residue range is given, there are only
1531  # two values and they are integers
1532  c.residue_range = (int(rr.split(',')[0]), rr.split(',')[1])
1533  if c.residue_range[1] != 'END':
1534  c.residue_range = (c.residue_range[0], int(c.residue_range[1]))
1535  # Old format used -1 for the last residue
1536  if old_format and c.residue_range[1] == -1:
1537  c.residue_range = (c.residue_range[0], 'END')
1538  else:
1539  errors.append("Residue Range format for component %s line %d is "
1540  "not correct" % (c.molname, linenum))
1541  errors.append(
1542  "Correct syntax is two comma separated integers: "
1543  "|start_res, end_res|. end_res can also be END to select the "
1544  "last residue in the chain. |%s| was given." % rr)
1545  errors.append("To select all residues, indicate |\"all\"|")
1546 
1547  # PDB Offset
1548  if self._is_int(offset):
1549  c.pdb_offset = int(offset)
1550  elif len(offset) == 0:
1551  c.pdb_offset = 0
1552  else:
1553  errors.append("PDB Offset format for component %s line %d is "
1554  "not correct" % (c.molname, linenum))
1555  errors.append("The value must be a single integer. |%s| was given."
1556  % offset)
1557 
1558  # Bead Size
1559  if self._is_int(bead_size):
1560  c.bead_size = int(bead_size)
1561  elif len(bead_size) == 0:
1562  c.bead_size = 0
1563  else:
1564  errors.append("Bead Size format for component %s line %d is "
1565  "not correct" % (c.molname, linenum))
1566  errors.append("The value must be a single integer. |%s| was given."
1567  % bead_size)
1568 
1569  # EM Residues Per Gaussian
1570  if self._is_int(emg):
1571  if int(emg) > 0:
1572  c.density_prefix = os.path.join(self.gmm_dir,
1573  c.get_unique_name())
1574  c.gmm_file = c.density_prefix + '.txt'
1575  c.mrc_file = c.density_prefix + '.gmm'
1576 
1577  c.em_residues_per_gaussian = int(emg)
1578  else:
1579  c.em_residues_per_gaussian = 0
1580  elif len(emg) == 0:
1581  c.em_residues_per_gaussian = 0
1582  else:
1583  errors.append("em_residues_per_gaussian format for component "
1584  "%s line %d is not correct" % (c.molname, linenum))
1585  errors.append("The value must be a single integer. |%s| was given."
1586  % emg)
1587 
1588  # rigid bodies
1589  if len(rbs) > 0:
1590  if not self._is_int(rbs):
1591  errors.append(
1592  "rigid bodies format for component "
1593  "%s line %d is not correct" % (c.molname, linenum))
1594  errors.append("Each RB must be a single integer, or empty. "
1595  "|%s| was given." % rbs)
1596  c.rigid_body = int(rbs)
1597 
1598  # super rigid bodies
1599  if len(srbs) > 0:
1600  srbs = srbs.split(',')
1601  for i in srbs:
1602  if not self._is_int(i):
1603  errors.append(
1604  "super rigid bodies format for component "
1605  "%s line %d is not correct" % (c.molname, linenum))
1606  errors.append(
1607  "Each SRB must be a single integer. |%s| was given."
1608  % srbs)
1609  c.super_rigid_bodies = srbs
1610 
1611  # chain of super rigid bodies
1612  if len(csrbs) > 0:
1613  if not self._is_int(csrbs):
1614  errors.append(
1615  "em_residues_per_gaussian format for component "
1616  "%s line %d is not correct" % (c.molname, linenum))
1617  errors.append(
1618  "Each CSRB must be a single integer. |%s| was given."
1619  % csrbs)
1620  c.chain_of_super_rigid_bodies = csrbs
1621 
1622  # done
1623  if errors:
1624  raise ValueError("Fix Topology File syntax errors and rerun: "
1625  + "\n".join(errors))
1626  else:
1627  return c
1628 
1629  def set_gmm_dir(self, gmm_dir):
1630  """Change the GMM dir"""
1631  self.gmm_dir = gmm_dir
1632  for c in self._components:
1633  c.gmm_file = os.path.join(self.gmm_dir,
1634  c.get_unique_name() + ".txt")
1635  c.mrc_file = os.path.join(self.gmm_dir,
1636  c.get_unique_name() + ".mrc")
1637  print('new gmm', c.gmm_file)
1638 
1639  def set_pdb_dir(self, pdb_dir):
1640  """Change the PDB dir"""
1641  self.pdb_dir = pdb_dir
1642  for c in self._components:
1643  if c._orig_pdb_input not in ("", "None", "IDEAL_HELIX", "BEADS"):
1644  c.pdb_file = os.path.join(self.pdb_dir, c._orig_pdb_input)
1645 
1646  def set_fasta_dir(self, fasta_dir):
1647  """Change the FASTA dir"""
1648  self.fasta_dir = fasta_dir
1649  for c in self._components:
1650  c.fasta_file = os.path.join(self.fasta_dir, c._orig_fasta_file)
1651 
1652  def _is_int(self, s):
1653  # is this string an integer?
1654  try:
1655  float(s)
1656  return float(s).is_integer()
1657  except ValueError:
1658  return False
1659 
1660  def get_rigid_bodies(self):
1661  """Return list of lists of rigid bodies (as domain name)"""
1662  rbl = defaultdict(list)
1663  for c in self._components:
1664  if c.rigid_body:
1665  rbl[c.rigid_body].append(c.get_unique_name())
1666  return rbl.values()
1667 
1669  """Return list of lists of super rigid bodies (as domain name)"""
1670  rbl = defaultdict(list)
1671  for c in self._components:
1672  for rbnum in c.super_rigid_bodies:
1673  rbl[rbnum].append(c.get_unique_name())
1674  return rbl.values()
1675 
1677  "Return list of lists of chains of super rigid bodies (as domain name)"
1678  rbl = defaultdict(list)
1679  for c in self._components:
1680  for rbnum in c.chain_of_super_rigid_bodies:
1681  rbl[rbnum].append(c.get_unique_name())
1682  return rbl.values()
1683 
1684 
1685 class _TempMolecule(object):
1686  """Store the Components and any requests for copies"""
1687  def __init__(self, init_c):
1688  self.molname = init_c.molname
1689  self.domains = IMP.pmi.tools.OrderedDefaultDict(list)
1690  self.add_component(init_c, init_c.copyname)
1691  self.orig_copyname = init_c.copyname
1692  self.orig_component = self.domains[init_c.copyname][0]
1693 
1694  def add_component(self, component, copy_id):
1695  self.domains[copy_id].append(component)
1696  component.domainnum = len(self.domains[copy_id])-1
1697 
1698  def __repr__(self):
1699  return ','.join('%s:%i'
1700  % (k, len(self.domains[k])) for k in self.domains)
1701 
1702 
1703 class _Component(object):
1704  """Stores the components required to build a standard IMP hierarchy
1705  using IMP.pmi.BuildModel()
1706  """
1707  def __init__(self):
1708  self.molname = None
1709  self.copyname = None
1710  self.domainnum = 0
1711  self.fasta_file = None
1712  self._orig_fasta_file = None
1713  self.fasta_id = None
1714  self.fasta_flag = None
1715  self.pdb_file = None
1716  self._orig_pdb_input = None
1717  self.chain = None
1718  self.residue_range = None
1719  self.pdb_offset = 0
1720  self.bead_size = 10
1721  self.em_residues_per_gaussian = 0
1722  self.gmm_file = ''
1723  self.mrc_file = ''
1724  self.density_prefix = ''
1725  self.color = 0.1
1726  self.rigid_body = None
1727  self.super_rigid_bodies = []
1728  self.chain_of_super_rigid_bodies = []
1729 
1730  def _l2s(self, rng):
1731  return ",".join("%s" % x for x in rng)
1732 
1733  def __repr__(self):
1734  return self.get_str()
1735 
1736  def get_unique_name(self):
1737  return "%s.%s.%i" % (self.molname, self.copyname, self.domainnum)
1738 
1739  def get_str(self):
1740  res_range = self.residue_range
1741  if self.residue_range is None:
1742  res_range = []
1743  name = self.molname
1744  if self.copyname != '':
1745  name += '.' + self.copyname
1746  if self.chain is None:
1747  chain = ' '
1748  else:
1749  chain = self.chain
1750  color = self.color
1751  if isinstance(color, list):
1752  color = ','.join([str(x) for x in color])
1753  fastaid = self.fasta_id
1754  if self.fasta_flag:
1755  fastaid += "," + self.fasta_flag
1756  a = '|' + '|'.join([name, color, self._orig_fasta_file, fastaid,
1757  self._orig_pdb_input, chain,
1758  self._l2s(list(res_range)),
1759  str(self.pdb_offset),
1760  str(self.bead_size),
1761  str(self.em_residues_per_gaussian),
1762  str(self.rigid_body) if self.rigid_body else '',
1763  self._l2s(self.super_rigid_bodies),
1764  self._l2s(self.chain_of_super_rigid_bodies)]) + '|'
1765  return a
1766 
1767 
1769  '''Extends the functionality of IMP.atom.Molecule'''
1770 
1771  def __init__(self, hierarchy):
1772  IMP.atom.Molecule.__init__(self, hierarchy)
1773 
1774  def get_state_index(self):
1775  state = self.get_parent()
1776  return IMP.atom.State(state).get_state_index()
1777 
1778  def get_copy_index(self):
1779  return IMP.atom.Copy(self).get_copy_index()
1780 
1781  def get_extended_name(self):
1782  return self.get_name() + "." + \
1783  str(self.get_copy_index()) + \
1784  "." + str(self.get_state_index())
1785 
1786  def get_sequence(self):
1787  return IMP.atom.Chain(self).get_sequence()
1788 
1789  def get_residue_indexes(self):
1791 
1792  def get_residue_segments(self):
1793  return IMP.pmi.tools.Segments(self.get_residue_indexes())
1794 
1795  def get_chain_id(self):
1796  return IMP.atom.Chain(self).get_id()
1797 
1798  def __repr__(self):
1799  s = 'PMIMoleculeHierarchy '
1800  s += self.get_name()
1801  s += " " + "Copy " + str(IMP.atom.Copy(self).get_copy_index())
1802  s += " " + "State " + str(self.get_state_index())
1803  s += " " + "N residues " + str(len(self.get_sequence()))
1804  return s
def build
Build all molecules (automatically makes clones)
Add mass to a particle.
Definition: Mass.h:23
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: Residue.h:158
def select_at_all_resolutions
Perform selection using the usual keywords but return ALL resolutions (BEADS and GAUSSIANS).
Definition: tools.py:1078
Hierarchy get_parent() const
Get the parent particle.
def get_atomic_residues
Return a set of TempResidues that have associated structure coordinates.
A decorator to associate a particle with a part of a protein/DNA/RNA.
Definition: Fragment.h:20
Extends the functionality of IMP.atom.Molecule.
def get_residues
Return all modeled TempResidues as a set.
std::string get_unique_name(std::string templ)
Return a unique name produced from the string.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: atom/Atom.h:245
static Atom setup_particle(Model *m, ParticleIndex pi, Atom other)
Definition: atom/Atom.h:246
def build
Build all states.
def __init__
Read a FASTA file and extract all the requested sequences.
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
def __init__
Read a PDB file and return all sequences for each contiguous fragment.
def get_states
Get a list of all State objects in this system.
def fasta_pdb_alignments
This function computes and prints the alignment between the fasta file and the pdb sequence...
def get_ideal_helices
Returns list of OrderedSets with requested ideal helices.
Miscellaneous utilities.
Definition: tools.py:1
def get_number_of_copies
Get the number of copies of the given molecule (by name)
def get_chains_of_super_rigid_bodies
Return list of lists of chains of super rigid bodies (as domain name)
def __init__
The user should not call this directly; instead call State.create_molecule()
void handle_use_deprecated(std::string message)
Break in this method in gdb to find deprecated uses at runtime.
def residue_range
Get residue range from a to b, inclusive.
def get_molecule
Access a molecule by name and copy number.
def __init__
Define a new state.
def add_representation
Set the representation for some residues.
static State setup_particle(Model *m, ParticleIndex pi, unsigned int index)
Definition: State.h:39
This class stores integers in ordered compact lists eg: [[1,2,3],[6,7,8]] the methods help splitting ...
Definition: tools.py:623
The type of an atom.
def create_molecule
Create a new Molecule within this State.
def build
Create all parts of the IMP hierarchy including Atoms, Residues, and Fragments/Representations and...
static Residue setup_particle(Model *m, ParticleIndex pi, ResidueType t, int index, int insertion_code)
Definition: Residue.h:160
char get_one_letter_code(ResidueType c)
Get the 1-letter amino acid code from the residue type.
def get_non_atomic_residues
Return a set of TempResidues that don't have associated structure coordinates.
Represent the root node of the global IMP.atom.Hierarchy.
def get_name
Return this Molecule name.
def get_hierarchy
Return the IMP Hierarchy corresponding to this Molecule.
def get_components
Return list of ComponentTopologies for selected components.
def get_hierarchy
Get the IMP.atom.Hierarchy node for this state.
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:86
Stores a named protein chain.
Warning related to handling of structures.
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Fragment.h:46
A decorator for keeping track of copies of a molecule.
Definition: Copy.h:28
Select all non-alternative ATOM records.
Definition: pdb.h:67
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def set_fasta_dir
Change the FASTA dir.
def get_hierarchy
Return the top-level IMP.atom.Hierarchy node for this system.
The standard decorator for manipulating molecular structures.
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
Data structure for reading and storing sequence data from PDBs.
A decorator for a particle representing an atom.
Definition: atom/Atom.h:238
std::string get_relative_path(std::string base, std::string relative)
Return a path to a file relative to another file.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
def create_clone
Create a Molecule clone (automatically builds same structure and representation)
def add_structure
Read a structure and store the coordinates.
int get_state_index(Hierarchy h)
Walk up the hierarchy to find the current state.
def add_protocol_output
Capture details of the modeling protocol.
def get_molecules
Return a dictionary where key is molecule name and value is a list of all copies of that molecule in ...
static Copy setup_particle(Model *m, ParticleIndex pi, Int number)
Create a decorator for the numberth copy.
Definition: Copy.h:42
def read
Read system components from topology file.
def get_state
Return the State containing this Molecule.
A decorator for a residue.
Definition: Residue.h:137
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Automatically setup Sytem and Degrees of Freedom with a formatted text file.
The general base class for IMP exceptions.
Definition: exception.h:48
def get_rigid_bodies
Return list of lists of rigid bodies (as domain name)
Associate an integer "state" index with a hierarchy node.
Definition: State.h:27
VectorD< 3 > Vector3D
Definition: VectorD.h:425
Residue get_residue(Atom d, bool nothrow=false)
Return the Residue containing this atom.
Mapping between FASTA one-letter codes and residue types.
Definition: alphabets.py:1
Class to handle individual particles of a Model object.
Definition: Particle.h:43
Stores a list of Molecules all with the same State index.
def get_represented
Return set of TempResidues that have representation.
Store info for a chain of a protein.
Definition: Chain.h:61
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
Python classes to represent, score, sample and analyze models.
A dictionary-like wrapper for reading and storing sequence data.
def create_copy
Create a new Molecule with the same name and sequence but a higher copy number.
Functionality for loading, creating, manipulating and scoring atomic structures.
std::string get_chain_id(Hierarchy h)
Walk up the hierarchy to determine the chain id.
def get_particles_at_all_resolutions
Helpful utility for getting particles at all resolutions from this molecule.
static Chain setup_particle(Model *m, ParticleIndex pi, std::string id)
Definition: Chain.h:81
A decorator for a molecule.
Definition: Molecule.h:24
Select hierarchy particles identified by the biological name.
Definition: Selection.h:70
def get_number_of_states
Returns the total number of states generated.
def get_super_rigid_bodies
Return list of lists of super rigid bodies (as domain name)
def get_residue_indexes
Retrieve the residue indexes for the given particle.
Definition: tools.py:512
Warning for probably incorrect input parameters.
Temporarily stores residue information, even without structure available.
def create_state
Makes and returns a new IMP.pmi.topology.State in this system.
Store objects in order they were added, but with default type.
Definition: tools.py:832