IMP logo
IMP Reference Guide  develop.374319d7bb,2024/06/17
The Integrative Modeling Platform
pmi/topology/__init__.py
1 """@namespace IMP.pmi.topology
2  Set of Python classes to create a multi-state, multi-resolution IMP hierarchy.
3 * Start by creating a System with
4  `model = IMP.Model(); s = IMP.pmi.topology.System(model)`. The System
5  will store all the states.
6 * Then call System.create_state(). You can easily create a multistate system
7  by calling this function multiple times.
8 * For each State, call State.create_molecule() to add a Molecule (a uniquely
9  named polymer). This function returns the Molecule object which can be
10  passed to various PMI functions.
11 * Some useful functions to help you set up your Molecules:
12  * Access the sequence residues with slicing (Molecule[a:b]) or functions
13  like Molecule.get_atomic_residues() and Molecule.get_non_atomic_residues().
14  These functions all return Python sets for easy set arithmetic using
15  & (and), | (or), - (difference)
16  * Molecule.add_structure() to add structural information from an mmCIF
17  or PDB file.
18  * Molecule.add_representation() to create a representation unit - here you
19  can choose bead resolutions as well as alternate representations like
20  densities or ideal helices.
21  * Molecule.create_clone() lets you set up a molecule with identical
22  representations, just a different chain ID. Use Molecule.create_copy()
23  if you want a molecule with the same sequence but that allows custom
24  representations.
25 * Once data has been added and representations chosen, call System.build()
26  to create a canonical IMP hierarchy.
27 * Following hierarchy construction, setup rigid bodies, flexible beads, etc
28  in IMP::pmi::dof.
29 * Check your representation with a nice printout:
30  IMP::atom::show_with_representation()
31 
32 See a [comprehensive example](https://integrativemodeling.org/nightly/doc/ref/pmi_2multiscale_8py-example.html) for using these classes.
33 
34 Alternatively one can construct the entire topology and degrees of freedom
35 via formatted text file with TopologyReader and
36 IMP::pmi::macros::BuildSystem(). This is used in the
37 [PMI tutorial](@ref rnapolii_stalk). Note that this only allows a limited
38 set of the full options available to PMI users
39 (rigid bodies only, fixed resolutions).
40 """ # noqa: E501
41 
42 import IMP
43 import IMP.atom
44 import IMP.algebra
45 import IMP.pmi
46 import IMP.pmi.tools
47 import IMP.pmi.alphabets
48 import os
49 import re
50 from collections import defaultdict, namedtuple
51 from . import system_tools
52 from bisect import bisect_left
53 from math import pi, cos, sin
54 from operator import itemgetter
55 import weakref
56 import warnings
57 
58 
59 def _build_ideal_helix(model, residues, coord_finder):
60  """Creates an ideal helix from the specified residue range
61  Residues MUST be contiguous.
62  This function actually adds them to the TempResidue hierarchy
63  """
64  created_hiers = []
65 
66  # this function creates a CAlpha helix structure (which can be used
67  # for coarsening)
68  prev_idx = -9999
69  for n, tempres in enumerate(residues):
70  if tempres.get_has_structure():
71  raise ValueError("You tried to build ideal_helix for a residue "
72  "that already has structure: %s" % tempres)
73  if n > 0 and tempres.get_index() != prev_idx + 1:
74  raise ValueError(
75  "Passed non-contiguous segment to "
76  "build_ideal_helix for %s" % tempres.get_molecule())
77 
78  # New residue particle will replace the TempResidue's existing
79  # (empty) hierarchy
80  rp = IMP.Particle(model)
81  rp.set_name("Residue_%i" % tempres.get_index())
82 
83  # Copy the original residue type and index
84  this_res = IMP.atom.Residue.setup_particle(rp, tempres.get_hierarchy())
85 
86  # Create the CAlpha
87  ap = IMP.Particle(model)
89  x = 2.3 * cos(n * 2 * pi / 3.6)
90  y = 2.3 * sin(n * 2 * pi / 3.6)
91  z = 6.2 / 3.6 / 2 * n * 2 * pi / 3.6
92  d.set_coordinates(IMP.algebra.Vector3D(x, y, z))
93  d.set_radius(1.0)
94  # Decorating as Atom also decorates as Mass
95  a = IMP.atom.Atom.setup_particle(ap, IMP.atom.AT_CA)
96  IMP.atom.Mass(ap).set_mass(110.0)
97  this_res.add_child(a)
98 
99  # Add this structure to the TempResidue
100  tempres.set_structure(this_res)
101  created_hiers.append(this_res)
102  prev_idx = tempres.get_index()
103  # the coord finder is for placing beads (later)
104  coord_finder.add_residues(created_hiers)
105 
106 
107 class _SystemBase:
108  """The base class for System, State and Molecule
109  classes. It contains shared functions in common to these classes
110  """
111 
112  def __init__(self, model=None):
113  if model is None:
114  self.model = IMP.Model()
115  else:
116  self.model = model
117 
118  def _create_hierarchy(self):
119  """create a new hierarchy"""
120  tmp_part = IMP.Particle(self.model)
121  return IMP.atom.Hierarchy.setup_particle(tmp_part)
122 
123  def _create_child(self, parent_hierarchy):
124  """create a new hierarchy, set it as child of the input
125  one, and return it"""
126  child_hierarchy = self._create_hierarchy()
127  parent_hierarchy.add_child(child_hierarchy)
128  return child_hierarchy
129 
130  def build(self):
131  """Build the coordinates of the system.
132  Loop through stored(?) hierarchies and set up coordinates!"""
133  pass
134 
135 
136 class _OurWeakRef:
137  """A simple wrapper around weakref.ref which can be pickled.
138  Note that we throw the reference away at pickle time. It should
139  be able to be reconstructed from System._all_systems anyway."""
140 
141  def __init__(self, system):
142  self._ref = weakref.ref(system)
143 
144  def __call__(self):
145  if hasattr(self, '_ref'):
146  return self._ref()
147 
148  def __getstate__(self):
149  return None
150 
151 
152 class System(_SystemBase):
153  """Represent the root node of the global IMP.atom.Hierarchy."""
154 
155  _all_systems = weakref.WeakSet()
156 
157  def __init__(self, model=None, name="System"):
158  """Constructor.
159 
160  @param model The IMP::Model in which to construct this system.
161  @param name The name of the top-level hierarchy node.
162  """
163  _SystemBase.__init__(self, model)
164  self._number_of_states = 0
165  self._protocol_output = []
166  self.states = []
167  self.built = False
168 
169  System._all_systems.add(self)
170 
171  # the root hierarchy node
172  self.hier = self._create_hierarchy()
173  self.hier.set_name(name)
174  self.hier._pmi2_system = _OurWeakRef(self)
175 
176  def get_states(self):
177  """Get a list of all State objects in this system"""
178  return self.states
179 
180  def create_state(self):
181  """Makes and returns a new IMP.pmi.topology.State in this system"""
182  self._number_of_states += 1
183  state = State(self, self._number_of_states-1)
184  self.states.append(state)
185  return state
186 
187  def __repr__(self):
188  return self.hier.get_name()
189 
191  """Returns the total number of states generated"""
192  return self._number_of_states
193 
194  def get_hierarchy(self):
195  """Return the top-level IMP.atom.Hierarchy node for this system"""
196  return self.hier
197 
198  def build(self, **kwargs):
199  """Build all states"""
200  if not self.built:
201  for state in self.states:
202  state.build(**kwargs)
203  self.built = True
204  return self.hier
205 
206  def add_protocol_output(self, p):
207  """Capture details of the modeling protocol.
208  @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
209  """
210  self._protocol_output.append(p)
211 # p._each_metadata.append(self._metadata)
212 # p._file_datasets.append(self._file_dataset)
213  for state in self.states:
214  state._add_protocol_output(p, self)
215 
216 
217 class State(_SystemBase):
218  """Stores a list of Molecules all with the same State index.
219  Also stores number of copies of each Molecule for easy selection.
220  """
221  def __init__(self, system, state_index):
222  """Define a new state
223  @param system the PMI System
224  @param state_index the index of the new state
225  @note It's expected that you will not use this constructor directly,
226  but rather create it with System.create_state()
227  """
228  self.model = system.get_hierarchy().get_model()
229  self.system = system
230  self.hier = self._create_child(system.get_hierarchy())
231  self.short_name = self.long_name = "State_" + str(state_index)
232  self.hier.set_name(self.short_name)
233  # key is molecule name. value are the molecule copies!
234  self.molecules = IMP.pmi.tools.OrderedDict()
235  IMP.atom.State.setup_particle(self.hier, state_index)
236  self.built = False
237  self._protocol_output = []
238  for p in system._protocol_output:
239  self._add_protocol_output(p, system)
240 
241  def __repr__(self):
242  return self.system.__repr__()+'.'+self.hier.get_name()
243 
244  def _add_protocol_output(self, p, system):
245  state = p._add_state(self)
246  self._protocol_output.append((p, state))
247  state.model = system.model
248  state.prot = self.hier
249 
250  def get_molecules(self):
251  """Return a dictionary where key is molecule name and value
252  is a list of all copies of that molecule in setup order"""
253  return self.molecules
254 
255  def get_molecule(self, name, copy_num=0):
256  """Access a molecule by name and copy number
257  @param name The molecule name used during setup
258  @param copy_num The copy number based on input order.
259  Default: 0. Set to 'all' to get all copies
260  """
261  if name not in self.molecules:
262  raise KeyError("Could not find molname %s" % name)
263  if copy_num == 'all':
264  return self.molecules[name]
265  else:
266  return self.molecules[name][copy_num]
267 
268  def create_molecule(self, name, sequence='', chain_id='',
269  alphabet=IMP.pmi.alphabets.amino_acid,
270  uniprot=None):
271  """Create a new Molecule within this State
272  @param name the name of the molecule (string);
273  it must not be already used
274  @param sequence sequence (string)
275  @param chain_id Chain ID to assign to this molecule
276  @param alphabet Mapping from FASTA codes to residue types
277  @param uniprot UniProt accession, if available
278  """
279  # check whether the molecule name is already assigned
280  if name in self.molecules:
281  raise ValueError('Cannot use a molecule name already used')
282 
283  # check for something that looks like a copy number
284  if re.search(r'\.\d+$', name):
285  warnings.warn(
286  "It is recommended not to end the molecule name with "
287  ".(number) as it may be confused with the copy number "
288  "(the copy number for new molecules is always 0, so to "
289  "select this molecule, use '%s.0'). Use create_clone() or "
290  "create_copy() instead if a copy of an existing molecule "
291  "is desired." % name, IMP.pmi.ParameterWarning)
292 
293  mol = Molecule(self, name, sequence, chain_id, copy_num=0,
294  alphabet=alphabet, uniprot=uniprot)
295  self.molecules[name] = [mol]
296  return mol
297 
298  def get_hierarchy(self):
299  """Get the IMP.atom.Hierarchy node for this state"""
300  return self.hier
301 
302  def get_number_of_copies(self, molname):
303  """Get the number of copies of the given molecule (by name)
304 
305  @param molname The name of the molecule
306  """
307  return len(self.molecules[molname])
308 
309  def _register_copy(self, molecule):
310  molname = molecule.get_hierarchy().get_name()
311  self.molecules[molname].append(molecule)
312 
313  def build(self, **kwargs):
314  """Build all molecules (automatically makes clones)"""
315  if not self.built:
316  for molname in self.molecules:
317  # We want to update ProtocolOutput in forward order so
318  # that, e.g. we get nice chain IDs in the mmCIF output,
319  # but we want to build the sequence in reverse order
320  for mol in self.molecules[molname]:
321  mol._build_protocol_output()
322  for mol in reversed(self.molecules[molname]):
323  mol.build(protocol_output=False, **kwargs)
324  for mol in self.molecules[molname]:
325  mol._finalize_build()
326  self.built = True
327  return self.hier
328 
329 
330 # Track residues read from PDB files
331 _PDBElement = namedtuple('PDBElement', ['offset', 'filename', 'chain_id'])
332 
333 
334 class _RepresentationHandler:
335  """Handle PMI representation and use it to populate that of any attached
336  ProtocolOutput objects"""
337  def __init__(self, name, pos, pdb_elements):
338  self.name = name
339  self.pos = pos
340  self.last_index = None
341  self.last_pdb_index = None
342  self.pdb_for_residue = {}
343  for residues, pdb in pdb_elements:
344  for r in residues:
345  self.pdb_for_residue[r.get_index()] = pdb
346 
347  def _get_pdb(self, h):
348  """Return a PDBElement if the given hierarchy was read from a
349  PDB file"""
351  rind = IMP.atom.Residue(h).get_index()
352  return self.pdb_for_residue.get(rind, None)
353 
354  def __call__(self, res):
355  """Handle a single residue"""
356  if len(self.pos) == 0:
357  return
358  h = res.hier
359  pi = h.get_particle_index()
360  # Do nothing if we already saw this hierarchy
361  if self.last_index is None or pi != self.last_index:
362  pdb = self._get_pdb(h)
363  self.last_index = pi
364  if pdb:
365  assert IMP.atom.Fragment.get_is_setup(h.get_parent())
366  frag = IMP.atom.Fragment(h.get_parent())
367  fragi = frag.get_particle_index()
368  # Do nothing if we already saw this PDB fragment
369  if self.last_pdb_index is not None \
370  and self.last_pdb_index == fragi:
371  return
372  self.last_pdb_index = fragi
373  indices = frag.get_residue_indexes()
374  for p, state in self.pos:
375  p.add_pdb_element(state, self.name,
376  indices[0], indices[-1], pdb.offset,
377  pdb.filename, pdb.chain_id, frag)
379  frag = IMP.atom.Fragment(h)
380  indices = frag.get_residue_indexes()
381  for p, state in self.pos:
382  p.add_bead_element(state, self.name,
383  indices[0], indices[-1], 1, h)
385  resind = IMP.atom.Residue(h).get_index()
386  for p, state in self.pos:
387  p.add_bead_element(state, self.name, resind, resind, 1, h)
388  else:
389  raise TypeError("Unhandled hierarchy %s" % str(h))
390 
391 
392 class Molecule(_SystemBase):
393  """Stores a named protein chain.
394  This class is constructed from within the State class.
395  It wraps an IMP.atom.Molecule and IMP.atom.Copy.
396  Structure is read using this class.
397  Resolutions and copies can be registered, but are only created
398  when build() is called.
399 
400  A Molecule acts like a simple Python list of residues, and can be indexed
401  by integer (starting at zero) or by string (starting at 1).
402  """
403 
404  def __init__(self, state, name, sequence, chain_id, copy_num,
405  mol_to_clone=None, alphabet=IMP.pmi.alphabets.amino_acid,
406  uniprot=None):
407  """The user should not call this directly; instead call
408  State.create_molecule()
409 
410  @param state The parent PMI State
411  @param name The name of the molecule (string)
412  @param sequence Sequence (string)
413  @param chain_id The chain of this molecule
414  @param copy_num Store the copy number
415  @param mol_to_clone The original molecule (for cloning ONLY)
416  @note It's expected that you will not use this constructor directly,
417  but rather create a Molecule with State.create_molecule()
418  """
419  # internal data storage
420  self.model = state.get_hierarchy().get_model()
421  self.state = state
422  self.sequence = sequence
423  self.built = False
424  self.mol_to_clone = mol_to_clone
425  self.alphabet = alphabet
426  self.representations = [] # list of stuff to build
427  self._pdb_elements = []
428  self.uniprot = uniprot
429  # residues with representation
430  self._represented = IMP.pmi.tools.OrderedSet()
431  # helps you place beads by storing structure
432  self.coord_finder = _FindCloseStructure()
433  # list of OrderedSets of tempresidues set to ideal helix
434  self._ideal_helices = []
435 
436  # create root node and set it as child to passed parent hierarchy
437  self.hier = self._create_child(self.state.get_hierarchy())
438  self.hier.set_name(name)
439  IMP.atom.Copy.setup_particle(self.hier, copy_num)
440  self._name_with_copy = "%s.%d" % (name, copy_num)
441  # store the sequence
442  self.chain = IMP.atom.Chain.setup_particle(self.hier, chain_id)
443  self.chain.set_sequence(self.sequence)
444  self.chain.set_chain_type(alphabet.get_chain_type())
445  if self.uniprot:
446  self.chain.set_uniprot_accession(self.uniprot)
447  # create TempResidues from the sequence (if passed)
448  self.residues = []
449  for ns, s in enumerate(sequence):
450  r = TempResidue(self, s, ns+1, ns, alphabet)
451  self.residues.append(r)
452 
453  def __repr__(self):
454  return self.state.__repr__() + '.' + self.get_name() + '.' + \
455  str(IMP.atom.Copy(self.hier).get_copy_index())
456 
457  def __getitem__(self, val):
458  if isinstance(val, int):
459  return self.residues[val]
460  elif isinstance(val, str):
461  return self.residues[int(val)-1]
462  elif isinstance(val, slice):
463  return IMP.pmi.tools.OrderedSet(self.residues[val])
464  else:
465  raise TypeError("Indexes must be int or str")
466 
467  def get_hierarchy(self):
468  """Return the IMP Hierarchy corresponding to this Molecule"""
469  return self.hier
470 
471  def get_name(self):
472  """Return this Molecule name"""
473  return self.hier.get_name()
474 
475  def get_state(self):
476  """Return the State containing this Molecule"""
477  return self.state
478 
479  def get_ideal_helices(self):
480  """Returns list of OrderedSets with requested ideal helices"""
481  return self._ideal_helices
482 
483  def residue_range(self, a, b, stride=1):
484  """Get residue range from a to b, inclusive.
485  Use integers to get 0-indexing, or strings to get PDB-indexing"""
486  if isinstance(a, int) and isinstance(b, int) \
487  and isinstance(stride, int):
488  return IMP.pmi.tools.OrderedSet(self.residues[a:b+1:stride])
489  elif isinstance(a, str) and isinstance(b, str) \
490  and isinstance(stride, int):
491  return IMP.pmi.tools.OrderedSet(
492  self.residues[int(a)-1:int(b):stride])
493  else:
494  raise TypeError("Range ends must be int or str. "
495  "Stride must be int.")
496 
497  def get_residues(self):
498  """Return all modeled TempResidues as a set"""
499  all_res = IMP.pmi.tools.OrderedSet(self.residues)
500  return all_res
501 
502  def get_represented(self):
503  """Return set of TempResidues that have representation"""
504  return self._represented
505 
507  """Return a set of TempResidues that have associated structure
508  coordinates"""
509  atomic_res = IMP.pmi.tools.OrderedSet()
510  for res in self.residues:
511  if res.get_has_structure():
512  atomic_res.add(res)
513  return atomic_res
514 
516  """Return a set of TempResidues that don't have associated
517  structure coordinates"""
518  non_atomic_res = IMP.pmi.tools.OrderedSet()
519  for res in self.residues:
520  if not res.get_has_structure():
521  non_atomic_res.add(res)
522  return non_atomic_res
523 
524  def create_copy(self, chain_id):
525  """Create a new Molecule with the same name and sequence but a
526  higher copy number. Returns the Molecule. No structure or
527  representation will be copied!
528 
529  @param chain_id Chain ID of the new molecule
530  """
531  mol = Molecule(
532  self.state, self.get_name(), self.sequence, chain_id,
533  copy_num=self.state.get_number_of_copies(self.get_name()))
534  self.state._register_copy(mol)
535  return mol
536 
537  def create_clone(self, chain_id):
538  """Create a Molecule clone (automatically builds same structure
539  and representation)
540 
541  @param chain_id If you want to set the chain ID of the copy
542  to something
543  @note You cannot add structure or representations to a clone!
544  """
545  mol = Molecule(
546  self.state, self.get_name(), self.sequence, chain_id,
547  copy_num=self.state.get_number_of_copies(self.get_name()),
548  mol_to_clone=self)
549  self.state._register_copy(mol)
550  return mol
551 
552  def add_structure(self, pdb_fn, chain_id, res_range=[],
553  offset=0, model_num=None, ca_only=False,
554  soft_check=False):
555  """Read a structure and store the coordinates.
556  @return the atomic residues (as a set)
557  @param pdb_fn The file to read (in PDB or mmCIF format)
558  @param chain_id Chain ID to read
559  @param res_range Add only a specific set of residues from the PDB
560  file. res_range[0] is the starting and res_range[1]
561  is the ending residue index.
562  @param offset Apply an offset to the residue indexes of the PDB
563  file. This number is added to the PDB sequence.
564  PMI uses 1-based FASTA numbering internally (the
565  first residue in the sequence is numbered 1, and
566  so on). If the PDB chain is not also numbered
567  starting from 1, apply an offset to make it match
568  the FASTA. For example, if the PDB is numbered
569  starting from -5, use an offset of 6 (-5 + 6 = 1).
570  @param model_num Read multi-model PDB and return that model
571  @param ca_only Only read the CA positions from the PDB file
572  @param soft_check If True, it only warns if there are sequence
573  mismatches between the PDB and the Molecule (FASTA)
574  sequence, and uses the sequence from the PDB.
575  If False (Default), it raises an error when there
576  are sequence mismatches.
577  @note If you are adding structure without a FASTA file, set soft_check
578  to True.
579  """
580  if self.mol_to_clone is not None:
581  raise ValueError('You cannot call add_structure() for a clone')
582 
583  self.pdb_fn = pdb_fn
584 
585  # get IMP.atom.Residues from the pdb file
586  rhs = system_tools.get_structure(self.model, pdb_fn, chain_id,
587  res_range, offset,
588  ca_only=ca_only)
589  self.coord_finder.add_residues(rhs)
590 
591  if len(self.residues) == 0:
592  warnings.warn(
593  "Substituting PDB residue type with FASTA residue type. "
594  "Potentially dangerous.", IMP.pmi.StructureWarning)
595 
596  # Store info for ProtocolOutput usage later
597  self._pdb_elements.append(
598  (rhs, _PDBElement(offset=offset, filename=pdb_fn,
599  chain_id=chain_id)))
600 
601  # load those into TempResidue object
602  # collect integer indexes of atomic residues to return
603  atomic_res = IMP.pmi.tools.OrderedSet()
604  for nrh, rh in enumerate(rhs):
605  pdb_idx = rh.get_index()
606  raw_idx = pdb_idx - 1
607 
608  # add ALA to fill in gaps
609  while len(self.residues) < pdb_idx:
610  r = TempResidue(self, 'A', len(self.residues)+1,
611  len(self.residues),
612  IMP.pmi.alphabets.amino_acid)
613  self.residues.append(r)
614  self.sequence += 'A'
615 
616  internal_res = self.residues[raw_idx]
617  if len(self.sequence) < raw_idx:
618  self.sequence += IMP.atom.get_one_letter_code(
619  rh.get_residue_type())
620  internal_res.set_structure(rh, soft_check)
621  atomic_res.add(internal_res)
622 
623  self.chain.set_sequence(self.sequence)
624  return atomic_res
625 
627  residues=None,
628  resolutions=[],
629  bead_extra_breaks=[],
630  bead_ca_centers=True,
631  bead_default_coord=[0, 0, 0],
632  density_residues_per_component=None,
633  density_prefix=None,
634  density_force_compute=False,
635  density_voxel_size=1.0,
636  setup_particles_as_densities=False,
637  ideal_helix=False,
638  color=None):
639  """Set the representation for some residues. Some options
640  (beads, ideal helix) operate along the backbone. Others (density
641  options) are volumetric.
642  Some of these you can combine e.g., beads+densities or helix+densities
643  See @ref pmi_resolution
644  @param residues Set of PMI TempResidues for adding the representation.
645  Can use Molecule slicing to get these, e.g. mol[a:b]+mol[c:d]
646  If None, will select all residues for this Molecule.
647  @param resolutions Resolutions for beads representations.
648  If structured, will average along backbone, breaking at
649  sequence breaks. If unstructured, will just create beads.
650  Pass an integer or list of integers
651  @param bead_extra_breaks Additional breakpoints for splitting beads.
652  The value can be the 0-ordered position, after which it'll
653  insert the break.
654  Alternatively pass PDB-style (1-ordered) indices as a string.
655  I.e., bead_extra_breaks=[5,25] is the same as ['6','26']
656  @param bead_ca_centers Set to True if you want the resolution=1 beads
657  to be at CA centers (otherwise will average atoms to get
658  center). Defaults to True.
659  @param bead_default_coord Advanced feature. Normally beads are placed
660  at the nearest structure. If no structure provided (like an
661  all bead molecule), the beads go here.
662  @param density_residues_per_component Create density (Gaussian
663  Mixture Model) for these residues. Must also supply
664  density_prefix
665  @param density_prefix Prefix (assuming '.txt') to read components
666  from or write to.
667  If exists, will read unless you set density_force_compute=True.
668  Will also write map (prefix+'.mrc').
669  Must also supply density_residues_per_component.
670  @param density_force_compute Set true to force overwrite density file.
671  @param density_voxel_size Advanced feature. Set larger if densities
672  taking too long to rasterize.
673  Set to 0 if you don't want to create the MRC file
674  @param setup_particles_as_densities Set to True if you want each
675  particle to be its own density.
676  Useful for all-atom models or flexible beads.
677  Mutually exclusive with density_ options
678  @param ideal_helix Create idealized helix structures for these
679  residues at resolution 1.
680  Any other resolutions passed will be coarsened from there.
681  Resolution 0 will not work; you may have to use MODELLER
682  to do that (for now).
683  @param color the color applied to the hierarchies generated.
684  Format options: tuple (r,g,b) with values 0 to 1;
685  float (from 0 to 1, a map from Blue to Green to Red);
686  a [Chimera name](https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/colortables.html);
687  a hex RGB string (e.g. "#ff0000");
688  an IMP.display.Color object
689  @note You cannot call add_representation multiple times for the
690  same residues.
691  """ # noqa: E501
692 
693  # can't customize clones
694  if self.mol_to_clone is not None:
695  raise ValueError(
696  'You cannot call add_representation() for a clone.'
697  ' Maybe use a copy instead.')
698 
699  # format input
700  if residues is None:
701  res = IMP.pmi.tools.OrderedSet(self.residues)
702  elif residues == self:
703  res = IMP.pmi.tools.OrderedSet(self.residues)
704  elif type(residues) is IMP.pmi.topology.TempResidue:
705  res = IMP.pmi.tools.OrderedSet([residues])
706  elif hasattr(residues, '__iter__'):
707  if len(residues) == 0:
708  raise Exception(
709  'You passed an empty set to add_representation')
710  if type(residues) is IMP.pmi.tools.OrderedSet \
711  and type(next(iter(residues))) is TempResidue:
712  res = residues
713  elif (type(residues) is set
714  and type(next(iter(residues))) is TempResidue):
715  res = IMP.pmi.tools.OrderedSet(residues)
716  elif type(residues) is list and type(residues[0]) is TempResidue:
717  res = IMP.pmi.tools.OrderedSet(residues)
718  else:
719  raise Exception("You passed an iterable of something other "
720  "than TempResidue", res)
721  else:
722  raise Exception("add_representation: you must pass a set of "
723  "residues or nothing(=all residues)")
724 
725  # check that each residue has not been represented yet
726  ov = res & self._represented
727  if ov:
728  raise Exception('You have already added representation for ' +
729  self.get_hierarchy().get_name() + ': ' +
730  ov.__repr__())
731  self._represented |= res
732 
733  # check you aren't creating multiple resolutions without structure
734  if not hasattr(resolutions, '__iter__'):
735  if type(resolutions) is int:
736  resolutions = [resolutions]
737  else:
738  raise Exception("you tried to pass resolutions that are not "
739  "int or list-of-int")
740  if len(resolutions) > 1 and not ideal_helix:
741  for r in res:
742  if not r.get_has_structure():
743  raise Exception(
744  'You are creating multiple resolutions for '
745  'unstructured regions. This will have unexpected '
746  'results.')
747 
748  # check density info is consistent
749  if density_residues_per_component or density_prefix:
750  if not density_residues_per_component and density_prefix:
751  raise Exception(
752  'If requesting density, must provide '
753  'density_residues_per_component AND density_prefix')
754  if density_residues_per_component and setup_particles_as_densities:
755  raise Exception(
756  'Cannot create both volumetric density '
757  '(density_residues_per_component) AND '
758  'individual densities (setup_particles_as_densities) '
759  'in the same representation')
760  if len(resolutions) > 1 and setup_particles_as_densities:
761  raise Exception(
762  'You have multiple bead resolutions but are attempting to '
763  'set them all up as individual Densities. '
764  'This could have unexpected results.')
765 
766  # check helix not accompanied by other resolutions
767  # (densities OK though!)
768  if ideal_helix:
769  if 0 in resolutions:
770  raise Exception(
771  "For ideal helices, cannot build resolution 0: "
772  "you have to do that in MODELLER")
773  if 1 not in resolutions:
774  resolutions = [1] + list(resolutions)
775  self._ideal_helices.append(res)
776 
777  # check residues are all part of this molecule:
778  for r in res:
779  if r.get_molecule() != self:
780  raise Exception(
781  'You are adding residues from a different molecule to',
782  self.__repr__())
783 
784  # unify formatting for extra breaks
785  breaks = []
786  for b in bead_extra_breaks:
787  if isinstance(b, str):
788  breaks.append(int(b)-1)
789  else:
790  breaks.append(b)
791  # store the representation group
792  self.representations.append(_Representation(
793  res, resolutions, breaks, bead_ca_centers, bead_default_coord,
794  density_residues_per_component, density_prefix,
795  density_force_compute, density_voxel_size,
796  setup_particles_as_densities, ideal_helix, color))
797 
798  def _all_protocol_output(self):
799  return self.state._protocol_output
800 
801  def _build_protocol_output(self):
802  """Add molecule name and sequence to any ProtocolOutput objects"""
803  if not self.built:
804  name = self.hier.get_name()
805  for po, state in self._all_protocol_output():
806  po.create_component(state, name, True,
807  asym_name=self._name_with_copy)
808  po.add_component_sequence(state, name, self.sequence,
809  asym_name=self._name_with_copy,
810  alphabet=self.alphabet)
811 
812  def _finalize_build(self):
813  # For clones, pass the representation of the original molecule
814  # to ProtocolOutput
815  if self.mol_to_clone:
816  rephandler = _RepresentationHandler(
817  self._name_with_copy, list(self._all_protocol_output()),
818  self.mol_to_clone._pdb_elements)
819  for res in self.mol_to_clone.residues:
820  if res.hier:
821  rephandler(res)
822 
823  def build(self, protocol_output=True):
824  """Create all parts of the IMP hierarchy
825  including Atoms, Residues, and Fragments/Representations and,
826  finally, Copies.
827  Will only build requested representations.
828  @note Any residues assigned a resolution must have an IMP.atom.Residue
829  hierarchy containing at least a CAlpha. For missing residues,
830  these can be constructed from the PDB file.
831  """
832  if not self.built:
833  if protocol_output:
834  self._build_protocol_output()
835  # if requested, clone structure and representations
836  # BEFORE building original
837  if self.mol_to_clone is not None:
838  for nr, r in enumerate(self.mol_to_clone.residues):
839  if r.get_has_structure():
840  clone = IMP.atom.create_clone(r.get_hierarchy())
841  self.residues[nr].set_structure(
842  IMP.atom.Residue(clone), soft_check=True)
843  for old_rep in self.mol_to_clone.representations:
844  new_res = IMP.pmi.tools.OrderedSet()
845  for r in old_rep.residues:
846  new_res.add(self.residues[r.get_internal_index()])
847  self._represented.add(
848  self.residues[r.get_internal_index()])
849  new_rep = _Representation(
850  new_res, old_rep.bead_resolutions,
851  old_rep.bead_extra_breaks, old_rep.bead_ca_centers,
852  old_rep.bead_default_coord,
853  old_rep.density_residues_per_component,
854  old_rep.density_prefix, False,
855  old_rep.density_voxel_size,
856  old_rep.setup_particles_as_densities,
857  old_rep.ideal_helix, old_rep.color)
858  self.representations.append(new_rep)
859  self.coord_finder = self.mol_to_clone.coord_finder
860 
861  # give a warning for all residues that don't have representation
862  no_rep = [r for r in self.residues if r not in self._represented]
863  if len(no_rep) > 0:
864  warnings.warn(
865  'Residues without representation in molecule %s: %s'
866  % (self.get_name(), system_tools.resnums2str(no_rep)),
868 
869  # first build any ideal helices (fills in structure for
870  # the TempResidues)
871  for rep in self.representations:
872  if rep.ideal_helix:
873  _build_ideal_helix(self.model, rep.residues,
874  self.coord_finder)
875 
876  # build all the representations
877  built_reps = []
878 
879  rephandler = _RepresentationHandler(
880  self._name_with_copy, list(self._all_protocol_output()),
881  self._pdb_elements)
882 
883  for rep in self.representations:
884  built_reps += system_tools.build_representation(
885  self, rep, self.coord_finder, rephandler)
886 
887  # sort them before adding as children
888  built_reps.sort(
889  key=lambda r: IMP.atom.Fragment(r).get_residue_indexes()[0])
890  for br in built_reps:
891  self.hier.add_child(br)
892  br.update_parents()
893  self.built = True
894 
895  for res in self.residues:
896  # first off, store the highest resolution available
897  # in residue.hier
898  new_ps = IMP.atom.Selection(
899  self.hier,
900  residue_index=res.get_index(),
901  resolution=1).get_selected_particles()
902  if len(new_ps) > 0:
903  new_p = new_ps[0]
904  if IMP.atom.Atom.get_is_setup(new_p):
905  # if only found atomic, store the residue
906  new_hier = IMP.atom.get_residue(IMP.atom.Atom(new_p))
907  else:
908  # otherwise just store what you found
909  new_hier = IMP.atom.Hierarchy(new_p)
910  res.hier = new_hier
911  # Clones will be handled in _finalize_build() instead
912  # (can't handle them here as the parent of the clone
913  # isn't built yet)
914  if self.mol_to_clone is None:
915  rephandler(res)
916  else:
917  res.hier = None
918  self._represented = IMP.pmi.tools.OrderedSet(
919  [a for a in self._represented])
920  print('done building', self.get_hierarchy())
921  return self.hier
922 
923  def get_particles_at_all_resolutions(self, residue_indexes=None):
924  """Helpful utility for getting particles at all resolutions from
925  this molecule. Can optionally pass a set of residue indexes"""
926  if not self.built:
927  raise Exception(
928  "Cannot get all resolutions until you build the Molecule")
929  if residue_indexes is None:
930  residue_indexes = [r.get_index() for r in self.get_residues()]
932  self.get_hierarchy(), residue_indexes=residue_indexes)
933  return ps
934 
935 
936 class _Representation:
937  """Private class just to store a representation request"""
938  def __init__(self,
939  residues,
940  bead_resolutions,
941  bead_extra_breaks,
942  bead_ca_centers,
943  bead_default_coord,
944  density_residues_per_component,
945  density_prefix,
946  density_force_compute,
947  density_voxel_size,
948  setup_particles_as_densities,
949  ideal_helix,
950  color):
951  self.residues = residues
952  self.bead_resolutions = bead_resolutions
953  self.bead_extra_breaks = bead_extra_breaks
954  self.bead_ca_centers = bead_ca_centers
955  self.bead_default_coord = bead_default_coord
956  self.density_residues_per_component = density_residues_per_component
957  self.density_prefix = density_prefix
958  self.density_force_compute = density_force_compute
959  self.density_voxel_size = density_voxel_size
960  self.setup_particles_as_densities = setup_particles_as_densities
961  self.ideal_helix = ideal_helix
962  self.color = color
963 
964 
965 class _FindCloseStructure:
966  """Utility to get the nearest observed coordinate"""
967  def __init__(self):
968  self.coords = []
969 
970  def add_residues(self, residues):
971  for r in residues:
972  idx = IMP.atom.Residue(r).get_index()
973  catypes = [IMP.atom.AT_CA, system_tools._AT_HET_CA]
974  ca = IMP.atom.Selection(
975  r, atom_types=catypes).get_selected_particles()
976  p = IMP.atom.Selection(
977  r, atom_type=IMP.atom.AtomType("P")).get_selected_particles()
978  if len(ca) == 1:
979  xyz = IMP.core.XYZ(ca[0]).get_coordinates()
980  self.coords.append([idx, xyz])
981  elif len(p) == 1:
982  xyz = IMP.core.XYZ(p[0]).get_coordinates()
983  self.coords.append([idx, xyz])
984  else:
985  raise ValueError("_FindCloseStructure: wrong selection")
986 
987  self.coords.sort(key=itemgetter(0))
988 
989  def find_nearest_coord(self, query):
990  if self.coords == []:
991  return None
992  keys = [r[0] for r in self.coords]
993  pos = bisect_left(keys, query)
994  if pos == 0:
995  ret = self.coords[0]
996  elif pos == len(self.coords):
997  ret = self.coords[-1]
998  else:
999  before = self.coords[pos - 1]
1000  after = self.coords[pos]
1001  if after[0] - query < query - before[0]:
1002  ret = after
1003  else:
1004  ret = before
1005  return ret[1]
1006 
1007 
1009  """A dictionary-like wrapper for reading and storing sequence data.
1010  Keys are FASTA sequence names, and each value a string of one-letter
1011  codes.
1012 
1013  The FASTA header may contain multiple fields split by pipe (|)
1014  characters. If so, the FASTA sequence name is the first field and
1015  the second field (if present) is the UniProt accession.
1016  For example, ">cop9|Q13098" yields a FASTA sequence name of "cop9"
1017  and UniProt accession of "Q13098".
1018  """
1019  def __init__(self, fasta_fn, name_map=None):
1020  """Read a FASTA file and extract all the requested sequences
1021  @param fasta_fn sequence file
1022  @param name_map dictionary mapping the FASTA name to final stored name
1023  """
1024  # Mapping from sequence name to primary sequence
1025  self.sequences = IMP.pmi.tools.OrderedDict()
1026  # Mapping from sequence name to UniProt accession, if available
1027  self.uniprot = {}
1028  self.read_sequences(fasta_fn, name_map)
1029 
1030  def __len__(self):
1031  return len(self.sequences)
1032 
1033  def __contains__(self, x):
1034  return x in self.sequences
1035 
1036  def __getitem__(self, key):
1037  if type(key) is int:
1038  allseqs = list(self.sequences.keys())
1039  try:
1040  return self.sequences[allseqs[key]]
1041  except IndexError:
1042  raise IndexError("You tried to access sequence number %d "
1043  "but there's only %d" % (key, len(allseqs)))
1044  else:
1045  return self.sequences[key]
1046 
1047  def __iter__(self):
1048  return self.sequences.__iter__()
1049 
1050  def __repr__(self):
1051  ret = ''
1052  for s in self.sequences:
1053  ret += '%s\t%s\n' % (s, self.sequences[s])
1054  return ret
1055 
1056  def read_sequences(self, fasta_fn, name_map=None):
1057  code = None
1058  seq = None
1059  with open(fasta_fn, 'r') as fh:
1060  for (num, line) in enumerate(fh):
1061  if line.startswith('>'):
1062  if seq is not None:
1063  self.sequences[code] = seq.strip('*')
1064  spl = line[1:].split('|')
1065  code = spl[0].strip()
1066  if name_map is not None:
1067  try:
1068  code = name_map[code]
1069  except KeyError:
1070  pass
1071  seq = ''
1072  if len(spl) >= 2:
1073  up_accession = spl[1].strip()
1074  self.uniprot[code] = up_accession
1075  else:
1076  line = line.rstrip()
1077  if line: # Skip blank lines
1078  if seq is None:
1079  raise Exception(
1080  "Found FASTA sequence before first header "
1081  "at line %d: %s" % (num + 1, line))
1082  seq += line
1083  if seq is not None:
1084  self.sequences[code] = seq.strip('*')
1085 
1086 
1088  """Data structure for reading and storing sequence data from PDBs.
1089 
1090  @see fasta_pdb_alignments."""
1091  def __init__(self, model, pdb_fn, name_map=None):
1092  """Read a PDB file and return all sequences for each contiguous
1093  fragment
1094  @param pdb_fn file
1095  @param name_map dictionary mapping the pdb chain id to final
1096  stored name
1097  """
1098  self.model = model
1099  # self.sequences data-structure: (two-key dictionary)
1100  # it contains all contiguous fragments:
1101  # chain_id, tuples indicating first and last residue, sequence
1102  # example:
1103  # key1, key2, value
1104  # A (77, 149) VENPSLDLEQYAASYSGLMR....
1105  # A (160, 505) PALDTAWVEATRKKALLKLEKLDTDLKNYKGNSIK.....
1106  # B (30, 180) VDLENQYYNSKALKEDDPKAALSSFQKVLELEGEKGEWGF...
1107  # B (192, 443) TQLLEIYALEIQMYTAQKNNKKLKALYEQSLHIKSAIPHPL
1108  self.sequences = IMP.pmi.tools.OrderedDict()
1109  self.read_sequences(pdb_fn, name_map)
1110 
1111  def read_sequences(self, pdb_fn, name_map):
1112  read_file = IMP.atom.read_pdb
1113  if pdb_fn.endswith('.cif'):
1114  read_file = IMP.atom.read_mmcif
1115  t = read_file(pdb_fn, self.model, IMP.atom.ATOMPDBSelector())
1116  cs = IMP.atom.get_by_type(t, IMP.atom.CHAIN_TYPE)
1117  for c in cs:
1118  id = IMP.atom.Chain(c).get_id()
1119  print(id)
1120  if name_map:
1121  try:
1122  id = name_map[id]
1123  except KeyError:
1124  print("Chain ID %s not in name_map, skipping" % id)
1125  continue
1126  rs = IMP.atom.get_by_type(c, IMP.atom.RESIDUE_TYPE)
1127  rids = []
1128  rids_olc_dict = {}
1129  for r in rs:
1130  dr = IMP.atom.Residue(r)
1131  rid = dr.get_index()
1132 
1133  isprotein = dr.get_is_protein()
1134  isrna = dr.get_is_rna()
1135  isdna = dr.get_is_dna()
1136  if isprotein:
1137  olc = IMP.atom.get_one_letter_code(dr.get_residue_type())
1138  rids.append(rid)
1139  rids_olc_dict[rid] = olc
1140  elif isdna:
1141  if dr.get_residue_type() == IMP.atom.DADE:
1142  olc = "A"
1143  if dr.get_residue_type() == IMP.atom.DURA:
1144  olc = "U"
1145  if dr.get_residue_type() == IMP.atom.DCYT:
1146  olc = "C"
1147  if dr.get_residue_type() == IMP.atom.DGUA:
1148  olc = "G"
1149  if dr.get_residue_type() == IMP.atom.DTHY:
1150  olc = "T"
1151  rids.append(rid)
1152  rids_olc_dict[rid] = olc
1153  elif isrna:
1154  if dr.get_residue_type() == IMP.atom.ADE:
1155  olc = "A"
1156  if dr.get_residue_type() == IMP.atom.URA:
1157  olc = "U"
1158  if dr.get_residue_type() == IMP.atom.CYT:
1159  olc = "C"
1160  if dr.get_residue_type() == IMP.atom.GUA:
1161  olc = "G"
1162  if dr.get_residue_type() == IMP.atom.THY:
1163  olc = "T"
1164  rids.append(rid)
1165  rids_olc_dict[rid] = olc
1166  group_rids = self.group_indexes(rids)
1167  contiguous_sequences = IMP.pmi.tools.OrderedDict()
1168  for group in group_rids:
1169  sequence_fragment = ""
1170  for i in range(group[0], group[1]+1):
1171  sequence_fragment += rids_olc_dict[i]
1172  contiguous_sequences[group] = sequence_fragment
1173  self.sequences[id] = contiguous_sequences
1174 
1175  def group_indexes(self, indexes):
1176  from itertools import groupby
1177  ranges = []
1178  for k, g in groupby(enumerate(indexes), lambda x: x[0]-x[1]):
1179  group = [x[1] for x in g]
1180  ranges.append((group[0], group[-1]))
1181  return ranges
1182 
1183 
1184 def fasta_pdb_alignments(fasta_sequences, pdb_sequences, show=False):
1185  '''This function computes and prints the alignment between the
1186  fasta file and the pdb sequence, computes the offsets for each contiguous
1187  fragment in the PDB.
1188  @param fasta_sequences IMP.pmi.topology.Sequences object
1189  @param pdb_sequences IMP.pmi.topology.PDBSequences object
1190  @param show boolean default False, if True prints the alignments.
1191  The input objects should be generated using map_name dictionaries
1192  such that fasta_id
1193  and pdb_chain_id are mapping to the same protein name. It needs BioPython.
1194  Returns a dictionary of offsets, organized by peptide range (group):
1195  example: offsets={"ProtA":{(1,10):1,(20,30):10}}'''
1196  from Bio import pairwise2
1197  from Bio.pairwise2 import format_alignment
1198  if type(fasta_sequences) is not IMP.pmi.topology.Sequences:
1199  raise Exception("Fasta sequences not type IMP.pmi.topology.Sequences")
1200  if type(pdb_sequences) is not IMP.pmi.topology.PDBSequences:
1201  raise Exception("pdb sequences not type IMP.pmi.topology.PDBSequences")
1202  offsets = IMP.pmi.tools.OrderedDict()
1203  for name in fasta_sequences.sequences:
1204  print(name)
1205  seq_fasta = fasta_sequences.sequences[name]
1206  if name not in pdb_sequences.sequences:
1207  print("Fasta id %s not in pdb names, aligning against every "
1208  "pdb chain" % name)
1209  pdbnames = pdb_sequences.sequences.keys()
1210  else:
1211  pdbnames = [name]
1212  for pdbname in pdbnames:
1213  for group in pdb_sequences.sequences[pdbname]:
1214  if group[1] - group[0] + 1 < 7:
1215  continue
1216  seq_frag_pdb = pdb_sequences.sequences[pdbname][group]
1217  if show:
1218  print("########################")
1219  print(" ")
1220  print("protein name", pdbname)
1221  print("fasta id", name)
1222  print("pdb fragment", group)
1223  align = pairwise2.align.localms(seq_fasta, seq_frag_pdb,
1224  2, -1, -.5, -.1)[0]
1225  for a in [align]:
1226  offset = a[3] + 1 - group[0]
1227  if show:
1228  print("alignment sequence start-end",
1229  (a[3] + 1, a[4] + 1))
1230  print("offset from pdb to fasta index", offset)
1231  print(format_alignment(*a))
1232  if name not in offsets:
1233  offsets[pdbname] = {}
1234  if group not in offsets[pdbname]:
1235  offsets[pdbname][group] = offset
1236  else:
1237  if group not in offsets[pdbname]:
1238  offsets[pdbname][group] = offset
1239  return offsets
1240 
1241 
1243  "Temporarily stores residue information, even without structure available."
1244  # Consider implementing __hash__ so you can select.
1245  def __init__(self, molecule, code, index, internal_index, alphabet):
1246  """setup a TempResidue
1247  @param molecule PMI Molecule to which this residue belongs
1248  @param code one-letter residue type code
1249  @param index PDB index
1250  @param internal_index The number in the sequence
1251  """
1252  # these attributes should be immutable
1253  self.molecule = molecule
1254  self.rtype = alphabet.get_residue_type_from_one_letter_code(code)
1255  self.pdb_index = index
1256  self.internal_index = internal_index
1257  self.copy_index = IMP.atom.Copy(self.molecule.hier).get_copy_index()
1258  self.state_index = \
1259  IMP.atom.State(self.molecule.state.hier).get_state_index()
1260  # these are expected to change
1261  self._structured = False
1262  self.hier = IMP.atom.Residue.setup_particle(
1263  IMP.Particle(molecule.model), self.rtype, index)
1264 
1265  def __str__(self):
1266  return str(self.state_index) + "_" + self.molecule.get_name() + "_" \
1267  + str(self.copy_index) + "_" + self.get_code() \
1268  + str(self.get_index())
1269 
1270  def __repr__(self):
1271  return self.__str__()
1272 
1273  def __key(self):
1274  # this returns the immutable attributes only
1275  return (self.state_index, self.molecule, self.copy_index, self.rtype,
1276  self.pdb_index, self.internal_index)
1277 
1278  def __eq__(self, other):
1279  return (type(other) == type(self) # noqa: E721
1280  and self.__key() == other.__key())
1281 
1282  def __hash__(self):
1283  return hash(self.__key())
1284 
1285  def get_index(self):
1286  return self.pdb_index
1287 
1288  def get_internal_index(self):
1289  return self.internal_index
1290 
1291  def get_code(self):
1292  return IMP.atom.get_one_letter_code(self.get_residue_type())
1293 
1294  def get_residue_type(self):
1295  return self.rtype
1296 
1297  def get_hierarchy(self):
1298  return self.hier
1299 
1300  def get_molecule(self):
1301  return self.molecule
1302 
1303  def get_has_structure(self):
1304  return self._structured
1305 
1306  def set_structure(self, res, soft_check=False):
1307  if res.get_residue_type() != self.get_residue_type():
1308  if (res.get_residue_type() == IMP.atom.MSE
1309  and self.get_residue_type() == IMP.atom.MET):
1310  # MSE in the PDB file is OK to match with MET in the FASTA
1311  # sequence
1312  pass
1313  elif soft_check:
1314  # note from commit a2c13eaa1 we give priority to the
1315  # FASTA and not the PDB
1316  warnings.warn(
1317  'Inconsistency between FASTA sequence and PDB sequence. '
1318  'FASTA type %s %s and PDB type %s'
1319  % (self.get_index(), self.hier.get_residue_type(),
1320  res.get_residue_type()),
1322  self.hier.set_residue_type((self.get_residue_type()))
1323  self.rtype = self.get_residue_type()
1324  else:
1325  raise Exception(
1326  'ERROR: PDB residue index', self.get_index(), 'is',
1327  IMP.atom.get_one_letter_code(res.get_residue_type()),
1328  'and sequence residue is', self.get_code())
1329 
1330  for a in res.get_children():
1331  self.hier.add_child(a)
1332  atype = IMP.atom.Atom(a).get_atom_type()
1333  a.get_particle().set_name(
1334  'Atom %s of residue %i' % (atype.__str__().strip('"'),
1335  self.hier.get_index()))
1336  self._structured = True
1337 
1338 
1340  """Automatically setup System and Degrees of Freedom with a formatted
1341  text file.
1342  The file is read in and each part of the topology is stored as a
1343  ComponentTopology object for input into IMP::pmi::macros::BuildSystem.
1344  The topology file should be in a simple pipe-delimited format:
1345  @code{.txt}
1346 |molecule_name|color|fasta_fn|fasta_id|pdb_fn|chain|residue_range|pdb_offset|bead_size|em_residues_per_gaussian|rigid_body|super_rigid_body|chain_of_super_rigid_bodies|flags|
1347 |Rpb1 |blue |1WCM.fasta|1WCM:A|1WCM.pdb|A|1,1140 |0|10|0|1|1,3|1||
1348 |Rpb1 |blue |1WCM.fasta|1WCM:A|1WCM.pdb|A|1141,1274|0|10|0|2|1,3|1||
1349 |Rpb1 |blue |1WCM.fasta|1WCM:A|1WCM.pdb|A|1275,END |0|10|0|3|1,3|1||
1350 |Rpb2 |red |1WCM.fasta|1WCM:B|1WCM.pdb|B|all |0|10|0|4|2,3|2||
1351 |Rpb2.1 |green |1WCM.fasta|1WCM:B|1WCM.pdb|B|all |0|10|0|4|2,3|2||
1352 
1353  @endcode
1354 
1355  These are the fields you can enter:
1356  - `molecule_name`: Name of the molecule (chain). Serves as the parent
1357  hierarchy for this structure. Multiple copies of the same molecule
1358  can be created by appending a copy number after a period; if none is
1359  specified, a copy number of 0 is assumed (e.g. Rpb2.1 is the second copy
1360  of Rpb2 or Rpb2.0).
1361  - `color`: The color used in the output RMF file. Uses
1362  [Chimera names](https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/colortables.html),
1363  (e.g. "red"), or R,G,B values as three comma-separated floating point
1364  numbers from 0 to 1 (e.g. "1.0, 0.0, 0.0") or a 6-digit hex string
1365  starting with '#' (e.g. #ff0000).
1366  - `fasta_fn`: Name of FASTA file containing this component.
1367  - `fasta_id`: String found in FASTA sequence header line. The sequence read
1368  from the file is assumed to be a protein sequence. If it should instead
1369  be treated as RNA or DNA, add an ',RNA' or ',DNA' suffix. For example,
1370  a `fasta_id` of 'myseq,RNA' will read the sequence 'myseq' from the
1371  FASTA file and treat it as RNA.
1372  - `pdb_fn`: Name of PDB or mmCIF file with coordinates (if available).
1373  If left empty, will set up as BEADS (you can also specify "BEADS")
1374  Can also write "IDEAL_HELIX".
1375  - `chain`: Chain ID of this domain in the PDB file.
1376  - `residue_range`: Comma delimited pair defining range.
1377  Can leave empty or use 'all' for entire sequence from PDB file.
1378  The second item in the pair can be END to select the last residue in the
1379  PDB chain.
1380  - `pdb_offset`: Offset to sync PDB residue numbering with FASTA numbering.
1381  For example, an offset of -10 would match the first residue in the
1382  FASTA file (which is always numbered sequentially starting from 1) with
1383  residue 11 in the PDB file.
1384  - `bead_size`: The size (in residues) of beads used to model areas not
1385  covered by PDB coordinates. These will be built automatically.
1386  - `em_residues`: The number of Gaussians used to model the electron
1387  density of this domain. Set to zero if no EM fitting will be done.
1388  The GMM files will be written to <gmm_dir>/<component_name>_<em_res>.txt
1389  (and .mrc)
1390  - `rigid_body`: Leave empty if this object is not in a rigid body.
1391  Otherwise, this is a number corresponding to the rigid body containing
1392  this object. The number itself is just used for grouping things.
1393  - `super_rigid_body`: Add a mover that periodically moves several related
1394  domains as if they were a single large rigid body. In between such moves,
1395  the domains move independently. This can improve sampling.
1396  - `chain_of_super_rigid_bodies`: Do super-rigid-body moves (as above)
1397  for all adjacent pairs of domains in the chain.
1398  - `flags` additional flags for advanced options
1399  @note All filenames are relative to the paths specified in the constructor.
1400 
1401  """ # noqa: E501
1402  def __init__(self, topology_file, pdb_dir='./', fasta_dir='./',
1403  gmm_dir='./'):
1404  """Constructor.
1405  @param topology_file Pipe-delimited file specifying the topology
1406  @param pdb_dir Relative path to the pdb directory
1407  @param fasta_dir Relative path to the fasta directory
1408  @param gmm_dir Relative path to the GMM directory
1409  """
1410  self.topology_file = topology_file
1411  # key=molname, value=TempMolecule
1412  self.molecules = IMP.pmi.tools.OrderedDict()
1413  self.pdb_dir = pdb_dir
1414  self.fasta_dir = fasta_dir
1415  self.gmm_dir = gmm_dir
1416  self._components = self.read(topology_file)
1417 
1418  def write_topology_file(self, outfile):
1419  with open(outfile, "w") as f:
1420  f.write("|molecule_name|color|fasta_fn|fasta_id|pdb_fn|chain|"
1421  "residue_range|pdb_offset|bead_size|"
1422  "em_residues_per_gaussian|rigid_body|super_rigid_body|"
1423  "chain_of_super_rigid_bodies|\n")
1424  for c in self._components:
1425  output = c.get_str()+'\n'
1426  f.write(output)
1427  return outfile
1428 
1429  def get_components(self, topology_list="all"):
1430  """ Return list of ComponentTopologies for selected components
1431  @param topology_list List of indices to return"""
1432  if topology_list == "all":
1433  topologies = self._components
1434  else:
1435  topologies = []
1436  for i in topology_list:
1437  topologies.append(self._components[i])
1438  return topologies
1439 
1440  def get_molecules(self):
1441  return self.molecules
1442 
1443  def read(self, topology_file, append=False):
1444  """Read system components from topology file. append=False will erase
1445  current topology and overwrite with new
1446  """
1447  is_topology = False
1448  is_directories = False
1449  linenum = 1
1450  if not append:
1451  self._components = []
1452 
1453  with open(topology_file) as infile:
1454  for line in infile:
1455  if line.lstrip() == "" or line[0] == "#":
1456  continue
1457  elif line.split('|')[1].strip() in ("molecule_name"):
1458  is_topology = True
1459  is_directories = False
1460  old_format = False
1461  continue
1462  elif line.split('|')[1] == "component_name":
1463  is_topology = True
1465  "Old-style topology format (using "
1466  "|component_name|) is deprecated. Please switch to "
1467  "the new-style format (using |molecule_name|)\n")
1468  old_format = True
1469  is_directories = False
1470  continue
1471  elif line.split('|')[1] == "directories":
1473  "Setting directories in the topology file "
1474  "is deprecated. Please do so through the "
1475  "TopologyReader constructor. Note that new-style "
1476  "paths are relative to the current working "
1477  "directory, not the topology file.\n")
1478  is_directories = True
1479  elif is_directories:
1480  fields = line.split('|')
1481  setattr(self, fields[1],
1482  IMP.get_relative_path(topology_file, fields[2]))
1483  if is_topology:
1484  new_component = self._parse_line(line, linenum, old_format)
1485  self._components.append(new_component)
1486  linenum += 1
1487  return self._components
1488 
1489  def _parse_line(self, component_line, linenum, old_format):
1490  """Parse a line of topology values and matches them to their key.
1491  Checks each value for correct syntax
1492  Returns a list of Component objects
1493  fields:
1494  """
1495  c = _Component()
1496  values = [s.strip() for s in component_line.split('|')]
1497  errors = []
1498 
1499  # Required fields
1500  if old_format:
1501  c.molname = values[1]
1502  c.copyname = ''
1503  c._domain_name = values[2]
1504  c.color = 'blue'
1505  else:
1506  names = values[1].split('.')
1507  if len(names) == 1:
1508  c.molname = names[0]
1509  c.copyname = ''
1510  elif len(names) == 2:
1511  c.molname = names[0]
1512  c.copyname = names[1]
1513  else:
1514  c.molname = names[0]
1515  c.copyname = names[1]
1516  errors.append("Molecule name should be <molecule.copyID>")
1517  errors.append("For component %s line %d "
1518  % (c.molname, linenum))
1519  c._domain_name = c.molname + '.' + c.copyname
1520  colorfields = values[2].split(',')
1521  if len(colorfields) == 3:
1522  c.color = [float(x) for x in colorfields]
1523  if any([x > 1 for x in c.color]):
1524  c.color = [x/255 for x in c.color]
1525  else:
1526  c.color = values[2]
1527  c._orig_fasta_file = values[3]
1528  c.fasta_file = values[3]
1529  fasta_field = values[4].split(",")
1530  c.fasta_id = fasta_field[0]
1531  c.fasta_flag = None
1532  if len(fasta_field) > 1:
1533  c.fasta_flag = fasta_field[1]
1534  c._orig_pdb_input = values[5]
1535  pdb_input = values[5]
1536  tmp_chain = values[6]
1537  rr = values[7]
1538  offset = values[8]
1539  bead_size = values[9]
1540  emg = values[10]
1541  if old_format:
1542  rbs = srbs = csrbs = ''
1543  else:
1544  rbs = values[11]
1545  srbs = values[12]
1546  csrbs = values[13]
1547 
1548  if c.molname not in self.molecules:
1549  self.molecules[c.molname] = _TempMolecule(c)
1550  else:
1551  # COPY OR DOMAIN
1552  c._orig_fasta_file = \
1553  self.molecules[c.molname].orig_component._orig_fasta_file
1554  c.fasta_id = self.molecules[c.molname].orig_component.fasta_id
1555  self.molecules[c.molname].add_component(c, c.copyname)
1556 
1557  # now cleanup input
1558  c.fasta_file = os.path.join(self.fasta_dir, c._orig_fasta_file)
1559  if pdb_input == "":
1560  errors.append("PDB must have BEADS, IDEAL_HELIX, or filename")
1561  errors.append("For component %s line %d is not correct"
1562  "|%s| was given." % (c.molname, linenum, pdb_input))
1563  elif pdb_input in ("IDEAL_HELIX", "BEADS"):
1564  c.pdb_file = pdb_input
1565  else:
1566  c.pdb_file = os.path.join(self.pdb_dir, pdb_input)
1567 
1568  # PDB chain must be one or two characters
1569  if len(tmp_chain) == 1 or len(tmp_chain) == 2:
1570  c.chain = tmp_chain
1571  else:
1572  errors.append(
1573  "PDB Chain identifier must be one or two characters.")
1574  errors.append("For component %s line %d is not correct"
1575  "|%s| was given."
1576  % (c.molname, linenum, tmp_chain))
1577 
1578  # Optional fields
1579  # Residue Range
1580  if rr.strip() == 'all' or str(rr) == "":
1581  c.residue_range = None
1582  elif (len(rr.split(',')) == 2 and self._is_int(rr.split(',')[0]) and
1583  (self._is_int(rr.split(',')[1]) or rr.split(',')[1] == 'END')):
1584  # Make sure that is residue range is given, there are only
1585  # two values and they are integers
1586  c.residue_range = (int(rr.split(',')[0]), rr.split(',')[1])
1587  if c.residue_range[1] != 'END':
1588  c.residue_range = (c.residue_range[0], int(c.residue_range[1]))
1589  # Old format used -1 for the last residue
1590  if old_format and c.residue_range[1] == -1:
1591  c.residue_range = (c.residue_range[0], 'END')
1592  else:
1593  errors.append("Residue Range format for component %s line %d is "
1594  "not correct" % (c.molname, linenum))
1595  errors.append(
1596  "Correct syntax is two comma separated integers: "
1597  "|start_res, end_res|. end_res can also be END to select the "
1598  "last residue in the chain. |%s| was given." % rr)
1599  errors.append("To select all residues, indicate |\"all\"|")
1600 
1601  # PDB Offset
1602  if self._is_int(offset):
1603  c.pdb_offset = int(offset)
1604  elif len(offset) == 0:
1605  c.pdb_offset = 0
1606  else:
1607  errors.append("PDB Offset format for component %s line %d is "
1608  "not correct" % (c.molname, linenum))
1609  errors.append("The value must be a single integer. |%s| was given."
1610  % offset)
1611 
1612  # Bead Size
1613  if self._is_int(bead_size):
1614  c.bead_size = int(bead_size)
1615  elif len(bead_size) == 0:
1616  c.bead_size = 0
1617  else:
1618  errors.append("Bead Size format for component %s line %d is "
1619  "not correct" % (c.molname, linenum))
1620  errors.append("The value must be a single integer. |%s| was given."
1621  % bead_size)
1622 
1623  # EM Residues Per Gaussian
1624  if self._is_int(emg):
1625  if int(emg) > 0:
1626  c.density_prefix = os.path.join(self.gmm_dir,
1627  c.get_unique_name())
1628  c.gmm_file = c.density_prefix + '.txt'
1629  c.mrc_file = c.density_prefix + '.gmm'
1630 
1631  c.em_residues_per_gaussian = int(emg)
1632  else:
1633  c.em_residues_per_gaussian = 0
1634  elif len(emg) == 0:
1635  c.em_residues_per_gaussian = 0
1636  else:
1637  errors.append("em_residues_per_gaussian format for component "
1638  "%s line %d is not correct" % (c.molname, linenum))
1639  errors.append("The value must be a single integer. |%s| was given."
1640  % emg)
1641 
1642  # rigid bodies
1643  if len(rbs) > 0:
1644  if not self._is_int(rbs):
1645  errors.append(
1646  "rigid bodies format for component "
1647  "%s line %d is not correct" % (c.molname, linenum))
1648  errors.append("Each RB must be a single integer, or empty. "
1649  "|%s| was given." % rbs)
1650  c.rigid_body = int(rbs)
1651 
1652  # super rigid bodies
1653  if len(srbs) > 0:
1654  srbs = srbs.split(',')
1655  for i in srbs:
1656  if not self._is_int(i):
1657  errors.append(
1658  "super rigid bodies format for component "
1659  "%s line %d is not correct" % (c.molname, linenum))
1660  errors.append(
1661  "Each SRB must be a single integer. |%s| was given."
1662  % srbs)
1663  c.super_rigid_bodies = srbs
1664 
1665  # chain of super rigid bodies
1666  if len(csrbs) > 0:
1667  if not self._is_int(csrbs):
1668  errors.append(
1669  "em_residues_per_gaussian format for component "
1670  "%s line %d is not correct" % (c.molname, linenum))
1671  errors.append(
1672  "Each CSRB must be a single integer. |%s| was given."
1673  % csrbs)
1674  c.chain_of_super_rigid_bodies = csrbs
1675 
1676  # done
1677  if errors:
1678  raise ValueError("Fix Topology File syntax errors and rerun: "
1679  + "\n".join(errors))
1680  else:
1681  return c
1682 
1683  def set_gmm_dir(self, gmm_dir):
1684  """Change the GMM dir"""
1685  self.gmm_dir = gmm_dir
1686  for c in self._components:
1687  c.gmm_file = os.path.join(self.gmm_dir,
1688  c.get_unique_name() + ".txt")
1689  c.mrc_file = os.path.join(self.gmm_dir,
1690  c.get_unique_name() + ".mrc")
1691  print('new gmm', c.gmm_file)
1692 
1693  def set_pdb_dir(self, pdb_dir):
1694  """Change the PDB dir"""
1695  self.pdb_dir = pdb_dir
1696  for c in self._components:
1697  if c._orig_pdb_input not in ("", "None", "IDEAL_HELIX", "BEADS"):
1698  c.pdb_file = os.path.join(self.pdb_dir, c._orig_pdb_input)
1699 
1700  def set_fasta_dir(self, fasta_dir):
1701  """Change the FASTA dir"""
1702  self.fasta_dir = fasta_dir
1703  for c in self._components:
1704  c.fasta_file = os.path.join(self.fasta_dir, c._orig_fasta_file)
1705 
1706  def _is_int(self, s):
1707  # is this string an integer?
1708  try:
1709  float(s)
1710  return float(s).is_integer()
1711  except ValueError:
1712  return False
1713 
1714  def get_rigid_bodies(self):
1715  """Return list of lists of rigid bodies (as domain name)"""
1716  rbl = defaultdict(list)
1717  for c in self._components:
1718  if c.rigid_body:
1719  rbl[c.rigid_body].append(c.get_unique_name())
1720  return rbl.values()
1721 
1723  """Return list of lists of super rigid bodies (as domain name)"""
1724  rbl = defaultdict(list)
1725  for c in self._components:
1726  for rbnum in c.super_rigid_bodies:
1727  rbl[rbnum].append(c.get_unique_name())
1728  return rbl.values()
1729 
1731  "Return list of lists of chains of super rigid bodies (as domain name)"
1732  rbl = defaultdict(list)
1733  for c in self._components:
1734  for rbnum in c.chain_of_super_rigid_bodies:
1735  rbl[rbnum].append(c.get_unique_name())
1736  return rbl.values()
1737 
1738 
1739 class _TempMolecule:
1740  """Store the Components and any requests for copies"""
1741  def __init__(self, init_c):
1742  self.molname = init_c.molname
1743  self.domains = IMP.pmi.tools.OrderedDefaultDict(list)
1744  self.add_component(init_c, init_c.copyname)
1745  self.orig_copyname = init_c.copyname
1746  self.orig_component = self.domains[init_c.copyname][0]
1747 
1748  def add_component(self, component, copy_id):
1749  self.domains[copy_id].append(component)
1750  component.domainnum = len(self.domains[copy_id])-1
1751 
1752  def __repr__(self):
1753  return ','.join('%s:%i'
1754  % (k, len(self.domains[k])) for k in self.domains)
1755 
1756 
1757 class _Component:
1758  """Stores the components required to build a standard IMP hierarchy
1759  using IMP.pmi.BuildModel()
1760  """
1761  def __init__(self):
1762  self.molname = None
1763  self.copyname = None
1764  self.domainnum = 0
1765  self.fasta_file = None
1766  self._orig_fasta_file = None
1767  self.fasta_id = None
1768  self.fasta_flag = None
1769  self.pdb_file = None
1770  self._orig_pdb_input = None
1771  self.chain = None
1772  self.residue_range = None
1773  self.pdb_offset = 0
1774  self.bead_size = 10
1775  self.em_residues_per_gaussian = 0
1776  self.gmm_file = ''
1777  self.mrc_file = ''
1778  self.density_prefix = ''
1779  self.color = 0.1
1780  self.rigid_body = None
1781  self.super_rigid_bodies = []
1782  self.chain_of_super_rigid_bodies = []
1783 
1784  def _l2s(self, rng):
1785  return ",".join("%s" % x for x in rng)
1786 
1787  def __repr__(self):
1788  return self.get_str()
1789 
1790  def get_unique_name(self):
1791  return "%s.%s.%i" % (self.molname, self.copyname, self.domainnum)
1792 
1793  def get_str(self):
1794  res_range = self.residue_range
1795  if self.residue_range is None:
1796  res_range = []
1797  name = self.molname
1798  if self.copyname != '':
1799  name += '.' + self.copyname
1800  if self.chain is None:
1801  chain = ' '
1802  else:
1803  chain = self.chain
1804  color = self.color
1805  if isinstance(color, list):
1806  color = ','.join([str(x) for x in color])
1807  fastaid = self.fasta_id
1808  if self.fasta_flag:
1809  fastaid += "," + self.fasta_flag
1810  a = '|' + '|'.join([name, color, self._orig_fasta_file, fastaid,
1811  self._orig_pdb_input, chain,
1812  self._l2s(list(res_range)),
1813  str(self.pdb_offset),
1814  str(self.bead_size),
1815  str(self.em_residues_per_gaussian),
1816  str(self.rigid_body) if self.rigid_body else '',
1817  self._l2s(self.super_rigid_bodies),
1818  self._l2s(self.chain_of_super_rigid_bodies)]) + '|'
1819  return a
1820 
1821 
1823  '''Extends the functionality of IMP.atom.Molecule'''
1824 
1825  def __init__(self, hierarchy):
1826  IMP.atom.Molecule.__init__(self, hierarchy)
1827 
1828  def get_state_index(self):
1829  state = self.get_parent()
1830  return IMP.atom.State(state).get_state_index()
1831 
1832  def get_copy_index(self):
1833  return IMP.atom.Copy(self).get_copy_index()
1834 
1835  def get_extended_name(self):
1836  return self.get_name() + "." + \
1837  str(self.get_copy_index()) + \
1838  "." + str(self.get_state_index())
1839 
1840  def get_sequence(self):
1841  return IMP.atom.Chain(self).get_sequence()
1842 
1843  def get_residue_indexes(self):
1845 
1846  def get_residue_segments(self):
1847  return IMP.pmi.tools.Segments(self.get_residue_indexes())
1848 
1849  def get_chain_id(self):
1850  return IMP.atom.Chain(self).get_id()
1851 
1852  def __repr__(self):
1853  s = 'PMIMoleculeHierarchy '
1854  s += self.get_name()
1855  s += " " + "Copy " + str(IMP.atom.Copy(self).get_copy_index())
1856  s += " " + "State " + str(self.get_state_index())
1857  s += " " + "N residues " + str(len(self.get_sequence()))
1858  return s
def build
Build all molecules (automatically makes clones)
Add mass to a particle.
Definition: Mass.h:23
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: Residue.h:158
def select_at_all_resolutions
Perform selection using the usual keywords but return ALL resolutions (BEADS and GAUSSIANS).
Definition: tools.py:1062
Hierarchy get_parent() const
Get the parent particle.
def get_atomic_residues
Return a set of TempResidues that have associated structure coordinates.
A decorator to associate a particle with a part of a protein/DNA/RNA.
Definition: Fragment.h:20
Extends the functionality of IMP.atom.Molecule.
def get_residues
Return all modeled TempResidues as a set.
std::string get_unique_name(std::string templ)
Return a unique name produced from the string.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: atom/Atom.h:245
static Atom setup_particle(Model *m, ParticleIndex pi, Atom other)
Definition: atom/Atom.h:246
def build
Build all states.
def __init__
Read a FASTA file and extract all the requested sequences.
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
def __init__
Read a PDB file and return all sequences for each contiguous fragment.
def get_states
Get a list of all State objects in this system.
def fasta_pdb_alignments
This function computes and prints the alignment between the fasta file and the pdb sequence...
def get_ideal_helices
Returns list of OrderedSets with requested ideal helices.
Miscellaneous utilities.
Definition: tools.py:1
def get_number_of_copies
Get the number of copies of the given molecule (by name)
def get_chains_of_super_rigid_bodies
Return list of lists of chains of super rigid bodies (as domain name)
def __init__
The user should not call this directly; instead call State.create_molecule()
void handle_use_deprecated(std::string message)
Break in this method in gdb to find deprecated uses at runtime.
def residue_range
Get residue range from a to b, inclusive.
def get_molecule
Access a molecule by name and copy number.
def __init__
Define a new state.
def add_representation
Set the representation for some residues.
static State setup_particle(Model *m, ParticleIndex pi, unsigned int index)
Definition: State.h:39
This class stores integers in ordered compact lists eg: [[1,2,3],[6,7,8]] the methods help splitting ...
Definition: tools.py:610
The type of an atom.
def create_molecule
Create a new Molecule within this State.
def build
Create all parts of the IMP hierarchy including Atoms, Residues, and Fragments/Representations and...
static Residue setup_particle(Model *m, ParticleIndex pi, ResidueType t, int index, int insertion_code)
Definition: Residue.h:160
char get_one_letter_code(ResidueType c)
Get the 1-letter amino acid code from the residue type.
def get_non_atomic_residues
Return a set of TempResidues that don't have associated structure coordinates.
Represent the root node of the global IMP.atom.Hierarchy.
def get_name
Return this Molecule name.
def get_hierarchy
Return the IMP Hierarchy corresponding to this Molecule.
def get_components
Return list of ComponentTopologies for selected components.
def get_hierarchy
Get the IMP.atom.Hierarchy node for this state.
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:86
Stores a named protein chain.
Warning related to handling of structures.
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Fragment.h:46
A decorator for keeping track of copies of a molecule.
Definition: Copy.h:28
Select all non-alternative ATOM records.
Definition: pdb.h:128
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def set_fasta_dir
Change the FASTA dir.
def get_hierarchy
Return the top-level IMP.atom.Hierarchy node for this system.
The standard decorator for manipulating molecular structures.
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
Data structure for reading and storing sequence data from PDBs.
A decorator for a particle representing an atom.
Definition: atom/Atom.h:238
std::string get_relative_path(std::string base, std::string relative)
Return a path to a file relative to another file.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
def create_clone
Create a Molecule clone (automatically builds same structure and representation)
def add_structure
Read a structure and store the coordinates.
int get_state_index(Hierarchy h)
Walk up the hierarchy to find the current state.
def add_protocol_output
Capture details of the modeling protocol.
def get_molecules
Return a dictionary where key is molecule name and value is a list of all copies of that molecule in ...
static Copy setup_particle(Model *m, ParticleIndex pi, Int number)
Create a decorator for the numberth copy.
Definition: Copy.h:42
def read
Read system components from topology file.
def get_state
Return the State containing this Molecule.
A decorator for a residue.
Definition: Residue.h:137
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Automatically setup System and Degrees of Freedom with a formatted text file.
The general base class for IMP exceptions.
Definition: exception.h:48
def get_rigid_bodies
Return list of lists of rigid bodies (as domain name)
Associate an integer "state" index with a hierarchy node.
Definition: State.h:27
VectorD< 3 > Vector3D
Definition: VectorD.h:408
Residue get_residue(Atom d, bool nothrow=false)
Return the Residue containing this atom.
Mapping between FASTA one-letter codes and residue types.
Definition: alphabets.py:1
Class to handle individual particles of a Model object.
Definition: Particle.h:43
Stores a list of Molecules all with the same State index.
def get_represented
Return set of TempResidues that have representation.
Store info for a chain of a protein.
Definition: Chain.h:61
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
Python classes to represent, score, sample and analyze models.
A dictionary-like wrapper for reading and storing sequence data.
def create_copy
Create a new Molecule with the same name and sequence but a higher copy number.
Functionality for loading, creating, manipulating and scoring atomic structures.
std::string get_chain_id(Hierarchy h)
Walk up the hierarchy to determine the chain id.
def get_particles_at_all_resolutions
Helpful utility for getting particles at all resolutions from this molecule.
static Chain setup_particle(Model *m, ParticleIndex pi, std::string id)
Definition: Chain.h:83
A decorator for a molecule.
Definition: Molecule.h:24
Select hierarchy particles identified by the biological name.
Definition: Selection.h:70
def get_number_of_states
Returns the total number of states generated.
def get_super_rigid_bodies
Return list of lists of super rigid bodies (as domain name)
def get_residue_indexes
Retrieve the residue indexes for the given particle.
Definition: tools.py:499
Warning for probably incorrect input parameters.
Temporarily stores residue information, even without structure available.
def create_state
Makes and returns a new IMP.pmi.topology.State in this system.
Store objects in order they were added, but with default type.
Definition: tools.py:819