IMP logo
IMP Reference Guide  develop.1a86c4215a,2024/04/24
The Integrative Modeling Platform
pmi/topology/__init__.py
1 """@namespace IMP.pmi.topology
2  Set of Python classes to create a multi-state, multi-resolution IMP hierarchy.
3 * Start by creating a System with
4  `model = IMP.Model(); s = IMP.pmi.topology.System(model)`. The System
5  will store all the states.
6 * Then call System.create_state(). You can easily create a multistate system
7  by calling this function multiple times.
8 * For each State, call State.create_molecule() to add a Molecule (a uniquely
9  named polymer). This function returns the Molecule object which can be
10  passed to various PMI functions.
11 * Some useful functions to help you set up your Molecules:
12  * Access the sequence residues with slicing (Molecule[a:b]) or functions
13  like Molecule.get_atomic_residues() and Molecule.get_non_atomic_residues().
14  These functions all return Python sets for easy set arithmetic using
15  & (and), | (or), - (difference)
16  * Molecule.add_structure() to add structural information from an mmCIF
17  or PDB file.
18  * Molecule.add_representation() to create a representation unit - here you
19  can choose bead resolutions as well as alternate representations like
20  densities or ideal helices.
21  * Molecule.create_clone() lets you set up a molecule with identical
22  representations, just a different chain ID. Use Molecule.create_copy()
23  if you want a molecule with the same sequence but that allows custom
24  representations.
25 * Once data has been added and representations chosen, call System.build()
26  to create a canonical IMP hierarchy.
27 * Following hierarchy construction, setup rigid bodies, flexible beads, etc
28  in IMP::pmi::dof.
29 * Check your representation with a nice printout:
30  IMP::atom::show_with_representation()
31 
32 See a [comprehensive example](https://integrativemodeling.org/nightly/doc/ref/pmi_2multiscale_8py-example.html) for using these classes.
33 
34 Alternatively one can construct the entire topology and degrees of freedom
35 via formatted text file with TopologyReader and
36 IMP::pmi::macros::BuildSystem(). This is used in the
37 [PMI tutorial](@ref rnapolii_stalk). Note that this only allows a limited
38 set of the full options available to PMI users
39 (rigid bodies only, fixed resolutions).
40 """ # noqa: E501
41 
42 from __future__ import print_function
43 import IMP
44 import IMP.atom
45 import IMP.algebra
46 import IMP.pmi
47 import IMP.pmi.tools
48 import IMP.pmi.alphabets
49 import os
50 import re
51 from collections import defaultdict, namedtuple
52 from . import system_tools
53 from bisect import bisect_left
54 from math import pi, cos, sin
55 from operator import itemgetter
56 import weakref
57 import warnings
58 
59 
60 def _build_ideal_helix(model, residues, coord_finder):
61  """Creates an ideal helix from the specified residue range
62  Residues MUST be contiguous.
63  This function actually adds them to the TempResidue hierarchy
64  """
65  created_hiers = []
66 
67  # this function creates a CAlpha helix structure (which can be used
68  # for coarsening)
69  prev_idx = -9999
70  for n, tempres in enumerate(residues):
71  if tempres.get_has_structure():
72  raise ValueError("You tried to build ideal_helix for a residue "
73  "that already has structure: %s" % tempres)
74  if n > 0 and tempres.get_index() != prev_idx + 1:
75  raise ValueError(
76  "Passed non-contiguous segment to "
77  "build_ideal_helix for %s" % tempres.get_molecule())
78 
79  # New residue particle will replace the TempResidue's existing
80  # (empty) hierarchy
81  rp = IMP.Particle(model)
82  rp.set_name("Residue_%i" % tempres.get_index())
83 
84  # Copy the original residue type and index
85  this_res = IMP.atom.Residue.setup_particle(rp, tempres.get_hierarchy())
86 
87  # Create the CAlpha
88  ap = IMP.Particle(model)
90  x = 2.3 * cos(n * 2 * pi / 3.6)
91  y = 2.3 * sin(n * 2 * pi / 3.6)
92  z = 6.2 / 3.6 / 2 * n * 2 * pi / 3.6
93  d.set_coordinates(IMP.algebra.Vector3D(x, y, z))
94  d.set_radius(1.0)
95  # Decorating as Atom also decorates as Mass
96  a = IMP.atom.Atom.setup_particle(ap, IMP.atom.AT_CA)
97  IMP.atom.Mass(ap).set_mass(110.0)
98  this_res.add_child(a)
99 
100  # Add this structure to the TempResidue
101  tempres.set_structure(this_res)
102  created_hiers.append(this_res)
103  prev_idx = tempres.get_index()
104  # the coord finder is for placing beads (later)
105  coord_finder.add_residues(created_hiers)
106 
107 
108 class _SystemBase(object):
109  """The base class for System, State and Molecule
110  classes. It contains shared functions in common to these classes
111  """
112 
113  def __init__(self, model=None):
114  if model is None:
115  self.model = IMP.Model()
116  else:
117  self.model = model
118 
119  def _create_hierarchy(self):
120  """create a new hierarchy"""
121  tmp_part = IMP.Particle(self.model)
122  return IMP.atom.Hierarchy.setup_particle(tmp_part)
123 
124  def _create_child(self, parent_hierarchy):
125  """create a new hierarchy, set it as child of the input
126  one, and return it"""
127  child_hierarchy = self._create_hierarchy()
128  parent_hierarchy.add_child(child_hierarchy)
129  return child_hierarchy
130 
131  def build(self):
132  """Build the coordinates of the system.
133  Loop through stored(?) hierarchies and set up coordinates!"""
134  pass
135 
136 
137 class _OurWeakRef(object):
138  """A simple wrapper around weakref.ref which can be pickled.
139  Note that we throw the reference away at pickle time. It should
140  be able to be reconstructed from System._all_systems anyway."""
141 
142  def __init__(self, system):
143  self._ref = weakref.ref(system)
144 
145  def __call__(self):
146  if hasattr(self, '_ref'):
147  return self._ref()
148 
149  def __getstate__(self):
150  return None
151 
152 
153 class System(_SystemBase):
154  """Represent the root node of the global IMP.atom.Hierarchy."""
155 
156  _all_systems = weakref.WeakSet()
157 
158  def __init__(self, model=None, name="System"):
159  """Constructor.
160 
161  @param model The IMP::Model in which to construct this system.
162  @param name The name of the top-level hierarchy node.
163  """
164  _SystemBase.__init__(self, model)
165  self._number_of_states = 0
166  self._protocol_output = []
167  self.states = []
168  self.built = False
169 
170  System._all_systems.add(self)
171 
172  # the root hierarchy node
173  self.hier = self._create_hierarchy()
174  self.hier.set_name(name)
175  self.hier._pmi2_system = _OurWeakRef(self)
176 
177  def get_states(self):
178  """Get a list of all State objects in this system"""
179  return self.states
180 
181  def create_state(self):
182  """Makes and returns a new IMP.pmi.topology.State in this system"""
183  self._number_of_states += 1
184  state = State(self, self._number_of_states-1)
185  self.states.append(state)
186  return state
187 
188  def __repr__(self):
189  return self.hier.get_name()
190 
192  """Returns the total number of states generated"""
193  return self._number_of_states
194 
195  def get_hierarchy(self):
196  """Return the top-level IMP.atom.Hierarchy node for this system"""
197  return self.hier
198 
199  def build(self, **kwargs):
200  """Build all states"""
201  if not self.built:
202  for state in self.states:
203  state.build(**kwargs)
204  self.built = True
205  return self.hier
206 
207  def add_protocol_output(self, p):
208  """Capture details of the modeling protocol.
209  @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
210  """
211  self._protocol_output.append(p)
212 # p._each_metadata.append(self._metadata)
213 # p._file_datasets.append(self._file_dataset)
214  for state in self.states:
215  state._add_protocol_output(p, self)
216 
217 
218 class State(_SystemBase):
219  """Stores a list of Molecules all with the same State index.
220  Also stores number of copies of each Molecule for easy selection.
221  """
222  def __init__(self, system, state_index):
223  """Define a new state
224  @param system the PMI System
225  @param state_index the index of the new state
226  @note It's expected that you will not use this constructor directly,
227  but rather create it with System.create_state()
228  """
229  self.model = system.get_hierarchy().get_model()
230  self.system = system
231  self.hier = self._create_child(system.get_hierarchy())
232  self.short_name = self.long_name = "State_" + str(state_index)
233  self.hier.set_name(self.short_name)
234  # key is molecule name. value are the molecule copies!
235  self.molecules = IMP.pmi.tools.OrderedDict()
236  IMP.atom.State.setup_particle(self.hier, state_index)
237  self.built = False
238  self._protocol_output = []
239  for p in system._protocol_output:
240  self._add_protocol_output(p, system)
241 
242  def __repr__(self):
243  return self.system.__repr__()+'.'+self.hier.get_name()
244 
245  def _add_protocol_output(self, p, system):
246  state = p._add_state(self)
247  self._protocol_output.append((p, state))
248  state.model = system.model
249  state.prot = self.hier
250 
251  def get_molecules(self):
252  """Return a dictionary where key is molecule name and value
253  is a list of all copies of that molecule in setup order"""
254  return self.molecules
255 
256  def get_molecule(self, name, copy_num=0):
257  """Access a molecule by name and copy number
258  @param name The molecule name used during setup
259  @param copy_num The copy number based on input order.
260  Default: 0. Set to 'all' to get all copies
261  """
262  if name not in self.molecules:
263  raise KeyError("Could not find molname %s" % name)
264  if copy_num == 'all':
265  return self.molecules[name]
266  else:
267  return self.molecules[name][copy_num]
268 
269  def create_molecule(self, name, sequence='', chain_id='',
270  alphabet=IMP.pmi.alphabets.amino_acid,
271  uniprot=None):
272  """Create a new Molecule within this State
273  @param name the name of the molecule (string);
274  it must not be already used
275  @param sequence sequence (string)
276  @param chain_id Chain ID to assign to this molecule
277  @param alphabet Mapping from FASTA codes to residue types
278  @param uniprot UniProt accession, if available
279  """
280  # check whether the molecule name is already assigned
281  if name in self.molecules:
282  raise ValueError('Cannot use a molecule name already used')
283 
284  # check for something that looks like a copy number
285  if re.search(r'\.\d+$', name):
286  warnings.warn(
287  "It is recommended not to end the molecule name with "
288  ".(number) as it may be confused with the copy number "
289  "(the copy number for new molecules is always 0, so to "
290  "select this molecule, use '%s.0'). Use create_clone() or "
291  "create_copy() instead if a copy of an existing molecule "
292  "is desired." % name, IMP.pmi.ParameterWarning)
293 
294  mol = Molecule(self, name, sequence, chain_id, copy_num=0,
295  alphabet=alphabet, uniprot=uniprot)
296  self.molecules[name] = [mol]
297  return mol
298 
299  def get_hierarchy(self):
300  """Get the IMP.atom.Hierarchy node for this state"""
301  return self.hier
302 
303  def get_number_of_copies(self, molname):
304  """Get the number of copies of the given molecule (by name)
305 
306  @param molname The name of the molecule
307  """
308  return len(self.molecules[molname])
309 
310  def _register_copy(self, molecule):
311  molname = molecule.get_hierarchy().get_name()
312  self.molecules[molname].append(molecule)
313 
314  def build(self, **kwargs):
315  """Build all molecules (automatically makes clones)"""
316  if not self.built:
317  for molname in self.molecules:
318  # We want to update ProtocolOutput in forward order so
319  # that, e.g. we get nice chain IDs in the mmCIF output,
320  # but we want to build the sequence in reverse order
321  for mol in self.molecules[molname]:
322  mol._build_protocol_output()
323  for mol in reversed(self.molecules[molname]):
324  mol.build(protocol_output=False, **kwargs)
325  for mol in self.molecules[molname]:
326  mol._finalize_build()
327  self.built = True
328  return self.hier
329 
330 
331 # Track residues read from PDB files
332 _PDBElement = namedtuple('PDBElement', ['offset', 'filename', 'chain_id'])
333 
334 
335 class _RepresentationHandler(object):
336  """Handle PMI representation and use it to populate that of any attached
337  ProtocolOutput objects"""
338  def __init__(self, name, pos, pdb_elements):
339  self.name = name
340  self.pos = pos
341  self.last_index = None
342  self.last_pdb_index = None
343  self.pdb_for_residue = {}
344  for residues, pdb in pdb_elements:
345  for r in residues:
346  self.pdb_for_residue[r.get_index()] = pdb
347 
348  def _get_pdb(self, h):
349  """Return a PDBElement if the given hierarchy was read from a
350  PDB file"""
352  rind = IMP.atom.Residue(h).get_index()
353  return self.pdb_for_residue.get(rind, None)
354 
355  def __call__(self, res):
356  """Handle a single residue"""
357  if len(self.pos) == 0:
358  return
359  h = res.hier
360  pi = h.get_particle_index()
361  # Do nothing if we already saw this hierarchy
362  if self.last_index is None or pi != self.last_index:
363  pdb = self._get_pdb(h)
364  self.last_index = pi
365  if pdb:
366  assert IMP.atom.Fragment.get_is_setup(h.get_parent())
367  frag = IMP.atom.Fragment(h.get_parent())
368  fragi = frag.get_particle_index()
369  # Do nothing if we already saw this PDB fragment
370  if self.last_pdb_index is not None \
371  and self.last_pdb_index == fragi:
372  return
373  self.last_pdb_index = fragi
374  indices = frag.get_residue_indexes()
375  for p, state in self.pos:
376  p.add_pdb_element(state, self.name,
377  indices[0], indices[-1], pdb.offset,
378  pdb.filename, pdb.chain_id, frag)
380  frag = IMP.atom.Fragment(h)
381  indices = frag.get_residue_indexes()
382  for p, state in self.pos:
383  p.add_bead_element(state, self.name,
384  indices[0], indices[-1], 1, h)
386  resind = IMP.atom.Residue(h).get_index()
387  for p, state in self.pos:
388  p.add_bead_element(state, self.name, resind, resind, 1, h)
389  else:
390  raise TypeError("Unhandled hierarchy %s" % str(h))
391 
392 
393 class Molecule(_SystemBase):
394  """Stores a named protein chain.
395  This class is constructed from within the State class.
396  It wraps an IMP.atom.Molecule and IMP.atom.Copy.
397  Structure is read using this class.
398  Resolutions and copies can be registered, but are only created
399  when build() is called.
400 
401  A Molecule acts like a simple Python list of residues, and can be indexed
402  by integer (starting at zero) or by string (starting at 1).
403  """
404 
405  def __init__(self, state, name, sequence, chain_id, copy_num,
406  mol_to_clone=None, alphabet=IMP.pmi.alphabets.amino_acid,
407  uniprot=None):
408  """The user should not call this directly; instead call
409  State.create_molecule()
410 
411  @param state The parent PMI State
412  @param name The name of the molecule (string)
413  @param sequence Sequence (string)
414  @param chain_id The chain of this molecule
415  @param copy_num Store the copy number
416  @param mol_to_clone The original molecule (for cloning ONLY)
417  @note It's expected that you will not use this constructor directly,
418  but rather create a Molecule with State.create_molecule()
419  """
420  # internal data storage
421  self.model = state.get_hierarchy().get_model()
422  self.state = state
423  self.sequence = sequence
424  self.built = False
425  self.mol_to_clone = mol_to_clone
426  self.alphabet = alphabet
427  self.representations = [] # list of stuff to build
428  self._pdb_elements = []
429  self.uniprot = uniprot
430  # residues with representation
431  self._represented = IMP.pmi.tools.OrderedSet()
432  # helps you place beads by storing structure
433  self.coord_finder = _FindCloseStructure()
434  # list of OrderedSets of tempresidues set to ideal helix
435  self._ideal_helices = []
436 
437  # create root node and set it as child to passed parent hierarchy
438  self.hier = self._create_child(self.state.get_hierarchy())
439  self.hier.set_name(name)
440  IMP.atom.Copy.setup_particle(self.hier, copy_num)
441  self._name_with_copy = "%s.%d" % (name, copy_num)
442  # store the sequence
443  self.chain = IMP.atom.Chain.setup_particle(self.hier, chain_id)
444  self.chain.set_sequence(self.sequence)
445  self.chain.set_chain_type(alphabet.get_chain_type())
446  if self.uniprot:
447  self.chain.set_uniprot_accession(self.uniprot)
448  # create TempResidues from the sequence (if passed)
449  self.residues = []
450  for ns, s in enumerate(sequence):
451  r = TempResidue(self, s, ns+1, ns, alphabet)
452  self.residues.append(r)
453 
454  def __repr__(self):
455  return self.state.__repr__() + '.' + self.get_name() + '.' + \
456  str(IMP.atom.Copy(self.hier).get_copy_index())
457 
458  def __getitem__(self, val):
459  if isinstance(val, int):
460  return self.residues[val]
461  elif isinstance(val, str):
462  return self.residues[int(val)-1]
463  elif isinstance(val, slice):
464  return IMP.pmi.tools.OrderedSet(self.residues[val])
465  else:
466  raise TypeError("Indexes must be int or str")
467 
468  def get_hierarchy(self):
469  """Return the IMP Hierarchy corresponding to this Molecule"""
470  return self.hier
471 
472  def get_name(self):
473  """Return this Molecule name"""
474  return self.hier.get_name()
475 
476  def get_state(self):
477  """Return the State containing this Molecule"""
478  return self.state
479 
480  def get_ideal_helices(self):
481  """Returns list of OrderedSets with requested ideal helices"""
482  return self._ideal_helices
483 
484  def residue_range(self, a, b, stride=1):
485  """Get residue range from a to b, inclusive.
486  Use integers to get 0-indexing, or strings to get PDB-indexing"""
487  if isinstance(a, int) and isinstance(b, int) \
488  and isinstance(stride, int):
489  return IMP.pmi.tools.OrderedSet(self.residues[a:b+1:stride])
490  elif isinstance(a, str) and isinstance(b, str) \
491  and isinstance(stride, int):
492  return IMP.pmi.tools.OrderedSet(
493  self.residues[int(a)-1:int(b):stride])
494  else:
495  raise TypeError("Range ends must be int or str. "
496  "Stride must be int.")
497 
498  def get_residues(self):
499  """Return all modeled TempResidues as a set"""
500  all_res = IMP.pmi.tools.OrderedSet(self.residues)
501  return all_res
502 
503  def get_represented(self):
504  """Return set of TempResidues that have representation"""
505  return self._represented
506 
508  """Return a set of TempResidues that have associated structure
509  coordinates"""
510  atomic_res = IMP.pmi.tools.OrderedSet()
511  for res in self.residues:
512  if res.get_has_structure():
513  atomic_res.add(res)
514  return atomic_res
515 
517  """Return a set of TempResidues that don't have associated
518  structure coordinates"""
519  non_atomic_res = IMP.pmi.tools.OrderedSet()
520  for res in self.residues:
521  if not res.get_has_structure():
522  non_atomic_res.add(res)
523  return non_atomic_res
524 
525  def create_copy(self, chain_id):
526  """Create a new Molecule with the same name and sequence but a
527  higher copy number. Returns the Molecule. No structure or
528  representation will be copied!
529 
530  @param chain_id Chain ID of the new molecule
531  """
532  mol = Molecule(
533  self.state, self.get_name(), self.sequence, chain_id,
534  copy_num=self.state.get_number_of_copies(self.get_name()))
535  self.state._register_copy(mol)
536  return mol
537 
538  def create_clone(self, chain_id):
539  """Create a Molecule clone (automatically builds same structure
540  and representation)
541 
542  @param chain_id If you want to set the chain ID of the copy
543  to something
544  @note You cannot add structure or representations to a clone!
545  """
546  mol = Molecule(
547  self.state, self.get_name(), self.sequence, chain_id,
548  copy_num=self.state.get_number_of_copies(self.get_name()),
549  mol_to_clone=self)
550  self.state._register_copy(mol)
551  return mol
552 
553  def add_structure(self, pdb_fn, chain_id, res_range=[],
554  offset=0, model_num=None, ca_only=False,
555  soft_check=False):
556  """Read a structure and store the coordinates.
557  @return the atomic residues (as a set)
558  @param pdb_fn The file to read (in PDB or mmCIF format)
559  @param chain_id Chain ID to read
560  @param res_range Add only a specific set of residues from the PDB
561  file. res_range[0] is the starting and res_range[1]
562  is the ending residue index.
563  @param offset Apply an offset to the residue indexes of the PDB
564  file. This number is added to the PDB sequence.
565  PMI uses 1-based FASTA numbering internally (the
566  first residue in the sequence is numbered 1, and
567  so on). If the PDB chain is not also numbered
568  starting from 1, apply an offset to make it match
569  the FASTA. For example, if the PDB is numbered
570  starting from -5, use an offset of 6 (-5 + 6 = 1).
571  @param model_num Read multi-model PDB and return that model
572  @param ca_only Only read the CA positions from the PDB file
573  @param soft_check If True, it only warns if there are sequence
574  mismatches between the PDB and the Molecule (FASTA)
575  sequence, and uses the sequence from the PDB.
576  If False (Default), it raises an error when there
577  are sequence mismatches.
578  @note If you are adding structure without a FASTA file, set soft_check
579  to True.
580  """
581  if self.mol_to_clone is not None:
582  raise ValueError('You cannot call add_structure() for a clone')
583 
584  self.pdb_fn = pdb_fn
585 
586  # get IMP.atom.Residues from the pdb file
587  rhs = system_tools.get_structure(self.model, pdb_fn, chain_id,
588  res_range, offset,
589  ca_only=ca_only)
590  self.coord_finder.add_residues(rhs)
591 
592  if len(self.residues) == 0:
593  warnings.warn(
594  "Substituting PDB residue type with FASTA residue type. "
595  "Potentially dangerous.", IMP.pmi.StructureWarning)
596 
597  # Store info for ProtocolOutput usage later
598  self._pdb_elements.append(
599  (rhs, _PDBElement(offset=offset, filename=pdb_fn,
600  chain_id=chain_id)))
601 
602  # load those into TempResidue object
603  # collect integer indexes of atomic residues to return
604  atomic_res = IMP.pmi.tools.OrderedSet()
605  for nrh, rh in enumerate(rhs):
606  pdb_idx = rh.get_index()
607  raw_idx = pdb_idx - 1
608 
609  # add ALA to fill in gaps
610  while len(self.residues) < pdb_idx:
611  r = TempResidue(self, 'A', len(self.residues)+1,
612  len(self.residues),
613  IMP.pmi.alphabets.amino_acid)
614  self.residues.append(r)
615  self.sequence += 'A'
616 
617  internal_res = self.residues[raw_idx]
618  if len(self.sequence) < raw_idx:
619  self.sequence += IMP.atom.get_one_letter_code(
620  rh.get_residue_type())
621  internal_res.set_structure(rh, soft_check)
622  atomic_res.add(internal_res)
623 
624  self.chain.set_sequence(self.sequence)
625  return atomic_res
626 
628  residues=None,
629  resolutions=[],
630  bead_extra_breaks=[],
631  bead_ca_centers=True,
632  bead_default_coord=[0, 0, 0],
633  density_residues_per_component=None,
634  density_prefix=None,
635  density_force_compute=False,
636  density_voxel_size=1.0,
637  setup_particles_as_densities=False,
638  ideal_helix=False,
639  color=None):
640  """Set the representation for some residues. Some options
641  (beads, ideal helix) operate along the backbone. Others (density
642  options) are volumetric.
643  Some of these you can combine e.g., beads+densities or helix+densities
644  See @ref pmi_resolution
645  @param residues Set of PMI TempResidues for adding the representation.
646  Can use Molecule slicing to get these, e.g. mol[a:b]+mol[c:d]
647  If None, will select all residues for this Molecule.
648  @param resolutions Resolutions for beads representations.
649  If structured, will average along backbone, breaking at
650  sequence breaks. If unstructured, will just create beads.
651  Pass an integer or list of integers
652  @param bead_extra_breaks Additional breakpoints for splitting beads.
653  The value can be the 0-ordered position, after which it'll
654  insert the break.
655  Alternatively pass PDB-style (1-ordered) indices as a string.
656  I.e., bead_extra_breaks=[5,25] is the same as ['6','26']
657  @param bead_ca_centers Set to True if you want the resolution=1 beads
658  to be at CA centers (otherwise will average atoms to get
659  center). Defaults to True.
660  @param bead_default_coord Advanced feature. Normally beads are placed
661  at the nearest structure. If no structure provided (like an
662  all bead molecule), the beads go here.
663  @param density_residues_per_component Create density (Gaussian
664  Mixture Model) for these residues. Must also supply
665  density_prefix
666  @param density_prefix Prefix (assuming '.txt') to read components
667  from or write to.
668  If exists, will read unless you set density_force_compute=True.
669  Will also write map (prefix+'.mrc').
670  Must also supply density_residues_per_component.
671  @param density_force_compute Set true to force overwrite density file.
672  @param density_voxel_size Advanced feature. Set larger if densities
673  taking too long to rasterize.
674  Set to 0 if you don't want to create the MRC file
675  @param setup_particles_as_densities Set to True if you want each
676  particle to be its own density.
677  Useful for all-atom models or flexible beads.
678  Mutually exclusive with density_ options
679  @param ideal_helix Create idealized helix structures for these
680  residues at resolution 1.
681  Any other resolutions passed will be coarsened from there.
682  Resolution 0 will not work; you may have to use MODELLER
683  to do that (for now).
684  @param color the color applied to the hierarchies generated.
685  Format options: tuple (r,g,b) with values 0 to 1;
686  float (from 0 to 1, a map from Blue to Green to Red);
687  a [Chimera name](https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/colortables.html);
688  a hex RGB string (e.g. "#ff0000");
689  an IMP.display.Color object
690  @note You cannot call add_representation multiple times for the
691  same residues.
692  """ # noqa: E501
693 
694  # can't customize clones
695  if self.mol_to_clone is not None:
696  raise ValueError(
697  'You cannot call add_representation() for a clone.'
698  ' Maybe use a copy instead.')
699 
700  # format input
701  if residues is None:
702  res = IMP.pmi.tools.OrderedSet(self.residues)
703  elif residues == self:
704  res = IMP.pmi.tools.OrderedSet(self.residues)
705  elif type(residues) is IMP.pmi.topology.TempResidue:
706  res = IMP.pmi.tools.OrderedSet([residues])
707  elif hasattr(residues, '__iter__'):
708  if len(residues) == 0:
709  raise Exception(
710  'You passed an empty set to add_representation')
711  if type(residues) is IMP.pmi.tools.OrderedSet \
712  and type(next(iter(residues))) is TempResidue:
713  res = residues
714  elif (type(residues) is set
715  and type(next(iter(residues))) is TempResidue):
716  res = IMP.pmi.tools.OrderedSet(residues)
717  elif type(residues) is list and type(residues[0]) is TempResidue:
718  res = IMP.pmi.tools.OrderedSet(residues)
719  else:
720  raise Exception("You passed an iterable of something other "
721  "than TempResidue", res)
722  else:
723  raise Exception("add_representation: you must pass a set of "
724  "residues or nothing(=all residues)")
725 
726  # check that each residue has not been represented yet
727  ov = res & self._represented
728  if ov:
729  raise Exception('You have already added representation for ' +
730  self.get_hierarchy().get_name() + ': ' +
731  ov.__repr__())
732  self._represented |= res
733 
734  # check you aren't creating multiple resolutions without structure
735  if not hasattr(resolutions, '__iter__'):
736  if type(resolutions) is int:
737  resolutions = [resolutions]
738  else:
739  raise Exception("you tried to pass resolutions that are not "
740  "int or list-of-int")
741  if len(resolutions) > 1 and not ideal_helix:
742  for r in res:
743  if not r.get_has_structure():
744  raise Exception(
745  'You are creating multiple resolutions for '
746  'unstructured regions. This will have unexpected '
747  'results.')
748 
749  # check density info is consistent
750  if density_residues_per_component or density_prefix:
751  if not density_residues_per_component and density_prefix:
752  raise Exception(
753  'If requesting density, must provide '
754  'density_residues_per_component AND density_prefix')
755  if density_residues_per_component and setup_particles_as_densities:
756  raise Exception(
757  'Cannot create both volumetric density '
758  '(density_residues_per_component) AND '
759  'individual densities (setup_particles_as_densities) '
760  'in the same representation')
761  if len(resolutions) > 1 and setup_particles_as_densities:
762  raise Exception(
763  'You have multiple bead resolutions but are attempting to '
764  'set them all up as individual Densities. '
765  'This could have unexpected results.')
766 
767  # check helix not accompanied by other resolutions
768  # (densities OK though!)
769  if ideal_helix:
770  if 0 in resolutions:
771  raise Exception(
772  "For ideal helices, cannot build resolution 0: "
773  "you have to do that in MODELLER")
774  if 1 not in resolutions:
775  resolutions = [1] + list(resolutions)
776  self._ideal_helices.append(res)
777 
778  # check residues are all part of this molecule:
779  for r in res:
780  if r.get_molecule() != self:
781  raise Exception(
782  'You are adding residues from a different molecule to',
783  self.__repr__())
784 
785  # unify formatting for extra breaks
786  breaks = []
787  for b in bead_extra_breaks:
788  if isinstance(b, str):
789  breaks.append(int(b)-1)
790  else:
791  breaks.append(b)
792  # store the representation group
793  self.representations.append(_Representation(
794  res, resolutions, breaks, bead_ca_centers, bead_default_coord,
795  density_residues_per_component, density_prefix,
796  density_force_compute, density_voxel_size,
797  setup_particles_as_densities, ideal_helix, color))
798 
799  def _all_protocol_output(self):
800  return self.state._protocol_output
801 
802  def _build_protocol_output(self):
803  """Add molecule name and sequence to any ProtocolOutput objects"""
804  if not self.built:
805  name = self.hier.get_name()
806  for po, state in self._all_protocol_output():
807  po.create_component(state, name, True,
808  asym_name=self._name_with_copy)
809  po.add_component_sequence(state, name, self.sequence,
810  asym_name=self._name_with_copy,
811  alphabet=self.alphabet)
812 
813  def _finalize_build(self):
814  # For clones, pass the representation of the original molecule
815  # to ProtocolOutput
816  if self.mol_to_clone:
817  rephandler = _RepresentationHandler(
818  self._name_with_copy, list(self._all_protocol_output()),
819  self.mol_to_clone._pdb_elements)
820  for res in self.mol_to_clone.residues:
821  if res.hier:
822  rephandler(res)
823 
824  def build(self, protocol_output=True):
825  """Create all parts of the IMP hierarchy
826  including Atoms, Residues, and Fragments/Representations and,
827  finally, Copies.
828  Will only build requested representations.
829  @note Any residues assigned a resolution must have an IMP.atom.Residue
830  hierarchy containing at least a CAlpha. For missing residues,
831  these can be constructed from the PDB file.
832  """
833  if not self.built:
834  if protocol_output:
835  self._build_protocol_output()
836  # if requested, clone structure and representations
837  # BEFORE building original
838  if self.mol_to_clone is not None:
839  for nr, r in enumerate(self.mol_to_clone.residues):
840  if r.get_has_structure():
841  clone = IMP.atom.create_clone(r.get_hierarchy())
842  self.residues[nr].set_structure(
843  IMP.atom.Residue(clone), soft_check=True)
844  for old_rep in self.mol_to_clone.representations:
845  new_res = IMP.pmi.tools.OrderedSet()
846  for r in old_rep.residues:
847  new_res.add(self.residues[r.get_internal_index()])
848  self._represented.add(
849  self.residues[r.get_internal_index()])
850  new_rep = _Representation(
851  new_res, old_rep.bead_resolutions,
852  old_rep.bead_extra_breaks, old_rep.bead_ca_centers,
853  old_rep.bead_default_coord,
854  old_rep.density_residues_per_component,
855  old_rep.density_prefix, False,
856  old_rep.density_voxel_size,
857  old_rep.setup_particles_as_densities,
858  old_rep.ideal_helix, old_rep.color)
859  self.representations.append(new_rep)
860  self.coord_finder = self.mol_to_clone.coord_finder
861 
862  # give a warning for all residues that don't have representation
863  no_rep = [r for r in self.residues if r not in self._represented]
864  if len(no_rep) > 0:
865  warnings.warn(
866  'Residues without representation in molecule %s: %s'
867  % (self.get_name(), system_tools.resnums2str(no_rep)),
869 
870  # first build any ideal helices (fills in structure for
871  # the TempResidues)
872  for rep in self.representations:
873  if rep.ideal_helix:
874  _build_ideal_helix(self.model, rep.residues,
875  self.coord_finder)
876 
877  # build all the representations
878  built_reps = []
879 
880  rephandler = _RepresentationHandler(
881  self._name_with_copy, list(self._all_protocol_output()),
882  self._pdb_elements)
883 
884  for rep in self.representations:
885  built_reps += system_tools.build_representation(
886  self, rep, self.coord_finder, rephandler)
887 
888  # sort them before adding as children
889  built_reps.sort(
890  key=lambda r: IMP.atom.Fragment(r).get_residue_indexes()[0])
891  for br in built_reps:
892  self.hier.add_child(br)
893  br.update_parents()
894  self.built = True
895 
896  for res in self.residues:
897  # first off, store the highest resolution available
898  # in residue.hier
899  new_ps = IMP.atom.Selection(
900  self.hier,
901  residue_index=res.get_index(),
902  resolution=1).get_selected_particles()
903  if len(new_ps) > 0:
904  new_p = new_ps[0]
905  if IMP.atom.Atom.get_is_setup(new_p):
906  # if only found atomic, store the residue
907  new_hier = IMP.atom.get_residue(IMP.atom.Atom(new_p))
908  else:
909  # otherwise just store what you found
910  new_hier = IMP.atom.Hierarchy(new_p)
911  res.hier = new_hier
912  # Clones will be handled in _finalize_build() instead
913  # (can't handle them here as the parent of the clone
914  # isn't built yet)
915  if self.mol_to_clone is None:
916  rephandler(res)
917  else:
918  res.hier = None
919  self._represented = IMP.pmi.tools.OrderedSet(
920  [a for a in self._represented])
921  print('done building', self.get_hierarchy())
922  return self.hier
923 
924  def get_particles_at_all_resolutions(self, residue_indexes=None):
925  """Helpful utility for getting particles at all resolutions from
926  this molecule. Can optionally pass a set of residue indexes"""
927  if not self.built:
928  raise Exception(
929  "Cannot get all resolutions until you build the Molecule")
930  if residue_indexes is None:
931  residue_indexes = [r.get_index() for r in self.get_residues()]
933  self.get_hierarchy(), residue_indexes=residue_indexes)
934  return ps
935 
936 
937 class _Representation(object):
938  """Private class just to store a representation request"""
939  def __init__(self,
940  residues,
941  bead_resolutions,
942  bead_extra_breaks,
943  bead_ca_centers,
944  bead_default_coord,
945  density_residues_per_component,
946  density_prefix,
947  density_force_compute,
948  density_voxel_size,
949  setup_particles_as_densities,
950  ideal_helix,
951  color):
952  self.residues = residues
953  self.bead_resolutions = bead_resolutions
954  self.bead_extra_breaks = bead_extra_breaks
955  self.bead_ca_centers = bead_ca_centers
956  self.bead_default_coord = bead_default_coord
957  self.density_residues_per_component = density_residues_per_component
958  self.density_prefix = density_prefix
959  self.density_force_compute = density_force_compute
960  self.density_voxel_size = density_voxel_size
961  self.setup_particles_as_densities = setup_particles_as_densities
962  self.ideal_helix = ideal_helix
963  self.color = color
964 
965 
966 class _FindCloseStructure(object):
967  """Utility to get the nearest observed coordinate"""
968  def __init__(self):
969  self.coords = []
970 
971  def add_residues(self, residues):
972  for r in residues:
973  idx = IMP.atom.Residue(r).get_index()
974  catypes = [IMP.atom.AT_CA, system_tools._AT_HET_CA]
975  ca = IMP.atom.Selection(
976  r, atom_types=catypes).get_selected_particles()
977  p = IMP.atom.Selection(
978  r, atom_type=IMP.atom.AtomType("P")).get_selected_particles()
979  if len(ca) == 1:
980  xyz = IMP.core.XYZ(ca[0]).get_coordinates()
981  self.coords.append([idx, xyz])
982  elif len(p) == 1:
983  xyz = IMP.core.XYZ(p[0]).get_coordinates()
984  self.coords.append([idx, xyz])
985  else:
986  raise ValueError("_FindCloseStructure: wrong selection")
987 
988  self.coords.sort(key=itemgetter(0))
989 
990  def find_nearest_coord(self, query):
991  if self.coords == []:
992  return None
993  keys = [r[0] for r in self.coords]
994  pos = bisect_left(keys, query)
995  if pos == 0:
996  ret = self.coords[0]
997  elif pos == len(self.coords):
998  ret = self.coords[-1]
999  else:
1000  before = self.coords[pos - 1]
1001  after = self.coords[pos]
1002  if after[0] - query < query - before[0]:
1003  ret = after
1004  else:
1005  ret = before
1006  return ret[1]
1007 
1008 
1009 class Sequences(object):
1010  """A dictionary-like wrapper for reading and storing sequence data.
1011  Keys are FASTA sequence names, and each value a string of one-letter
1012  codes.
1013 
1014  The FASTA header may contain multiple fields split by pipe (|)
1015  characters. If so, the FASTA sequence name is the first field and
1016  the second field (if present) is the UniProt accession.
1017  For example, ">cop9|Q13098" yields a FASTA sequence name of "cop9"
1018  and UniProt accession of "Q13098".
1019  """
1020  def __init__(self, fasta_fn, name_map=None):
1021  """Read a FASTA file and extract all the requested sequences
1022  @param fasta_fn sequence file
1023  @param name_map dictionary mapping the FASTA name to final stored name
1024  """
1025  # Mapping from sequence name to primary sequence
1026  self.sequences = IMP.pmi.tools.OrderedDict()
1027  # Mapping from sequence name to UniProt accession, if available
1028  self.uniprot = {}
1029  self.read_sequences(fasta_fn, name_map)
1030 
1031  def __len__(self):
1032  return len(self.sequences)
1033 
1034  def __contains__(self, x):
1035  return x in self.sequences
1036 
1037  def __getitem__(self, key):
1038  if type(key) is int:
1039  allseqs = list(self.sequences.keys())
1040  try:
1041  return self.sequences[allseqs[key]]
1042  except IndexError:
1043  raise IndexError("You tried to access sequence number %d "
1044  "but there's only %d" % (key, len(allseqs)))
1045  else:
1046  return self.sequences[key]
1047 
1048  def __iter__(self):
1049  return self.sequences.__iter__()
1050 
1051  def __repr__(self):
1052  ret = ''
1053  for s in self.sequences:
1054  ret += '%s\t%s\n' % (s, self.sequences[s])
1055  return ret
1056 
1057  def read_sequences(self, fasta_fn, name_map=None):
1058  code = None
1059  seq = None
1060  with open(fasta_fn, 'r') as fh:
1061  for (num, line) in enumerate(fh):
1062  if line.startswith('>'):
1063  if seq is not None:
1064  self.sequences[code] = seq.strip('*')
1065  spl = line[1:].split('|')
1066  code = spl[0].strip()
1067  if name_map is not None:
1068  try:
1069  code = name_map[code]
1070  except KeyError:
1071  pass
1072  seq = ''
1073  if len(spl) >= 2:
1074  up_accession = spl[1].strip()
1075  self.uniprot[code] = up_accession
1076  else:
1077  line = line.rstrip()
1078  if line: # Skip blank lines
1079  if seq is None:
1080  raise Exception(
1081  "Found FASTA sequence before first header "
1082  "at line %d: %s" % (num + 1, line))
1083  seq += line
1084  if seq is not None:
1085  self.sequences[code] = seq.strip('*')
1086 
1087 
1088 class PDBSequences(object):
1089  """Data structure for reading and storing sequence data from PDBs.
1090 
1091  @see fasta_pdb_alignments."""
1092  def __init__(self, model, pdb_fn, name_map=None):
1093  """Read a PDB file and return all sequences for each contiguous
1094  fragment
1095  @param pdb_fn file
1096  @param name_map dictionary mapping the pdb chain id to final
1097  stored name
1098  """
1099  self.model = model
1100  # self.sequences data-structure: (two-key dictionary)
1101  # it contains all contiguous fragments:
1102  # chain_id, tuples indicating first and last residue, sequence
1103  # example:
1104  # key1, key2, value
1105  # A (77, 149) VENPSLDLEQYAASYSGLMR....
1106  # A (160, 505) PALDTAWVEATRKKALLKLEKLDTDLKNYKGNSIK.....
1107  # B (30, 180) VDLENQYYNSKALKEDDPKAALSSFQKVLELEGEKGEWGF...
1108  # B (192, 443) TQLLEIYALEIQMYTAQKNNKKLKALYEQSLHIKSAIPHPL
1109  self.sequences = IMP.pmi.tools.OrderedDict()
1110  self.read_sequences(pdb_fn, name_map)
1111 
1112  def read_sequences(self, pdb_fn, name_map):
1113  read_file = IMP.atom.read_pdb
1114  if pdb_fn.endswith('.cif'):
1115  read_file = IMP.atom.read_mmcif
1116  t = read_file(pdb_fn, self.model, IMP.atom.ATOMPDBSelector())
1117  cs = IMP.atom.get_by_type(t, IMP.atom.CHAIN_TYPE)
1118  for c in cs:
1119  id = IMP.atom.Chain(c).get_id()
1120  print(id)
1121  if name_map:
1122  try:
1123  id = name_map[id]
1124  except KeyError:
1125  print("Chain ID %s not in name_map, skipping" % id)
1126  continue
1127  rs = IMP.atom.get_by_type(c, IMP.atom.RESIDUE_TYPE)
1128  rids = []
1129  rids_olc_dict = {}
1130  for r in rs:
1131  dr = IMP.atom.Residue(r)
1132  rid = dr.get_index()
1133 
1134  isprotein = dr.get_is_protein()
1135  isrna = dr.get_is_rna()
1136  isdna = dr.get_is_dna()
1137  if isprotein:
1138  olc = IMP.atom.get_one_letter_code(dr.get_residue_type())
1139  rids.append(rid)
1140  rids_olc_dict[rid] = olc
1141  elif isdna:
1142  if dr.get_residue_type() == IMP.atom.DADE:
1143  olc = "A"
1144  if dr.get_residue_type() == IMP.atom.DURA:
1145  olc = "U"
1146  if dr.get_residue_type() == IMP.atom.DCYT:
1147  olc = "C"
1148  if dr.get_residue_type() == IMP.atom.DGUA:
1149  olc = "G"
1150  if dr.get_residue_type() == IMP.atom.DTHY:
1151  olc = "T"
1152  rids.append(rid)
1153  rids_olc_dict[rid] = olc
1154  elif isrna:
1155  if dr.get_residue_type() == IMP.atom.ADE:
1156  olc = "A"
1157  if dr.get_residue_type() == IMP.atom.URA:
1158  olc = "U"
1159  if dr.get_residue_type() == IMP.atom.CYT:
1160  olc = "C"
1161  if dr.get_residue_type() == IMP.atom.GUA:
1162  olc = "G"
1163  if dr.get_residue_type() == IMP.atom.THY:
1164  olc = "T"
1165  rids.append(rid)
1166  rids_olc_dict[rid] = olc
1167  group_rids = self.group_indexes(rids)
1168  contiguous_sequences = IMP.pmi.tools.OrderedDict()
1169  for group in group_rids:
1170  sequence_fragment = ""
1171  for i in range(group[0], group[1]+1):
1172  sequence_fragment += rids_olc_dict[i]
1173  contiguous_sequences[group] = sequence_fragment
1174  self.sequences[id] = contiguous_sequences
1175 
1176  def group_indexes(self, indexes):
1177  from itertools import groupby
1178  ranges = []
1179  for k, g in groupby(enumerate(indexes), lambda x: x[0]-x[1]):
1180  group = [x[1] for x in g]
1181  ranges.append((group[0], group[-1]))
1182  return ranges
1183 
1184 
1185 def fasta_pdb_alignments(fasta_sequences, pdb_sequences, show=False):
1186  '''This function computes and prints the alignment between the
1187  fasta file and the pdb sequence, computes the offsets for each contiguous
1188  fragment in the PDB.
1189  @param fasta_sequences IMP.pmi.topology.Sequences object
1190  @param pdb_sequences IMP.pmi.topology.PDBSequences object
1191  @param show boolean default False, if True prints the alignments.
1192  The input objects should be generated using map_name dictionaries
1193  such that fasta_id
1194  and pdb_chain_id are mapping to the same protein name. It needs BioPython.
1195  Returns a dictionary of offsets, organized by peptide range (group):
1196  example: offsets={"ProtA":{(1,10):1,(20,30):10}}'''
1197  from Bio import pairwise2
1198  from Bio.pairwise2 import format_alignment
1199  if type(fasta_sequences) is not IMP.pmi.topology.Sequences:
1200  raise Exception("Fasta sequences not type IMP.pmi.topology.Sequences")
1201  if type(pdb_sequences) is not IMP.pmi.topology.PDBSequences:
1202  raise Exception("pdb sequences not type IMP.pmi.topology.PDBSequences")
1203  offsets = IMP.pmi.tools.OrderedDict()
1204  for name in fasta_sequences.sequences:
1205  print(name)
1206  seq_fasta = fasta_sequences.sequences[name]
1207  if name not in pdb_sequences.sequences:
1208  print("Fasta id %s not in pdb names, aligning against every "
1209  "pdb chain" % name)
1210  pdbnames = pdb_sequences.sequences.keys()
1211  else:
1212  pdbnames = [name]
1213  for pdbname in pdbnames:
1214  for group in pdb_sequences.sequences[pdbname]:
1215  if group[1] - group[0] + 1 < 7:
1216  continue
1217  seq_frag_pdb = pdb_sequences.sequences[pdbname][group]
1218  if show:
1219  print("########################")
1220  print(" ")
1221  print("protein name", pdbname)
1222  print("fasta id", name)
1223  print("pdb fragment", group)
1224  align = pairwise2.align.localms(seq_fasta, seq_frag_pdb,
1225  2, -1, -.5, -.1)[0]
1226  for a in [align]:
1227  offset = a[3] + 1 - group[0]
1228  if show:
1229  print("alignment sequence start-end",
1230  (a[3] + 1, a[4] + 1))
1231  print("offset from pdb to fasta index", offset)
1232  print(format_alignment(*a))
1233  if name not in offsets:
1234  offsets[pdbname] = {}
1235  if group not in offsets[pdbname]:
1236  offsets[pdbname][group] = offset
1237  else:
1238  if group not in offsets[pdbname]:
1239  offsets[pdbname][group] = offset
1240  return offsets
1241 
1242 
1243 class TempResidue(object):
1244  "Temporarily stores residue information, even without structure available."
1245  # Consider implementing __hash__ so you can select.
1246  def __init__(self, molecule, code, index, internal_index, alphabet):
1247  """setup a TempResidue
1248  @param molecule PMI Molecule to which this residue belongs
1249  @param code one-letter residue type code
1250  @param index PDB index
1251  @param internal_index The number in the sequence
1252  """
1253  # these attributes should be immutable
1254  self.molecule = molecule
1255  self.rtype = alphabet.get_residue_type_from_one_letter_code(code)
1256  self.pdb_index = index
1257  self.internal_index = internal_index
1258  self.copy_index = IMP.atom.Copy(self.molecule.hier).get_copy_index()
1259  self.state_index = \
1260  IMP.atom.State(self.molecule.state.hier).get_state_index()
1261  # these are expected to change
1262  self._structured = False
1263  self.hier = IMP.atom.Residue.setup_particle(
1264  IMP.Particle(molecule.model), self.rtype, index)
1265 
1266  def __str__(self):
1267  return str(self.state_index) + "_" + self.molecule.get_name() + "_" \
1268  + str(self.copy_index) + "_" + self.get_code() \
1269  + str(self.get_index())
1270 
1271  def __repr__(self):
1272  return self.__str__()
1273 
1274  def __key(self):
1275  # this returns the immutable attributes only
1276  return (self.state_index, self.molecule, self.copy_index, self.rtype,
1277  self.pdb_index, self.internal_index)
1278 
1279  def __eq__(self, other):
1280  return (type(other) == type(self) # noqa: E721
1281  and self.__key() == other.__key())
1282 
1283  def __hash__(self):
1284  return hash(self.__key())
1285 
1286  def get_index(self):
1287  return self.pdb_index
1288 
1289  def get_internal_index(self):
1290  return self.internal_index
1291 
1292  def get_code(self):
1293  return IMP.atom.get_one_letter_code(self.get_residue_type())
1294 
1295  def get_residue_type(self):
1296  return self.rtype
1297 
1298  def get_hierarchy(self):
1299  return self.hier
1300 
1301  def get_molecule(self):
1302  return self.molecule
1303 
1304  def get_has_structure(self):
1305  return self._structured
1306 
1307  def set_structure(self, res, soft_check=False):
1308  if res.get_residue_type() != self.get_residue_type():
1309  if (res.get_residue_type() == IMP.atom.MSE
1310  and self.get_residue_type() == IMP.atom.MET):
1311  # MSE in the PDB file is OK to match with MET in the FASTA
1312  # sequence
1313  pass
1314  elif soft_check:
1315  # note from commit a2c13eaa1 we give priority to the
1316  # FASTA and not the PDB
1317  warnings.warn(
1318  'Inconsistency between FASTA sequence and PDB sequence. '
1319  'FASTA type %s %s and PDB type %s'
1320  % (self.get_index(), self.hier.get_residue_type(),
1321  res.get_residue_type()),
1323  self.hier.set_residue_type((self.get_residue_type()))
1324  self.rtype = self.get_residue_type()
1325  else:
1326  raise Exception(
1327  'ERROR: PDB residue index', self.get_index(), 'is',
1328  IMP.atom.get_one_letter_code(res.get_residue_type()),
1329  'and sequence residue is', self.get_code())
1330 
1331  for a in res.get_children():
1332  self.hier.add_child(a)
1333  atype = IMP.atom.Atom(a).get_atom_type()
1334  a.get_particle().set_name(
1335  'Atom %s of residue %i' % (atype.__str__().strip('"'),
1336  self.hier.get_index()))
1337  self._structured = True
1338 
1339 
1340 class TopologyReader(object):
1341  """Automatically setup System and Degrees of Freedom with a formatted
1342  text file.
1343  The file is read in and each part of the topology is stored as a
1344  ComponentTopology object for input into IMP::pmi::macros::BuildSystem.
1345  The topology file should be in a simple pipe-delimited format:
1346  @code{.txt}
1347 |molecule_name|color|fasta_fn|fasta_id|pdb_fn|chain|residue_range|pdb_offset|bead_size|em_residues_per_gaussian|rigid_body|super_rigid_body|chain_of_super_rigid_bodies|flags|
1348 |Rpb1 |blue |1WCM.fasta|1WCM:A|1WCM.pdb|A|1,1140 |0|10|0|1|1,3|1||
1349 |Rpb1 |blue |1WCM.fasta|1WCM:A|1WCM.pdb|A|1141,1274|0|10|0|2|1,3|1||
1350 |Rpb1 |blue |1WCM.fasta|1WCM:A|1WCM.pdb|A|1275,END |0|10|0|3|1,3|1||
1351 |Rpb2 |red |1WCM.fasta|1WCM:B|1WCM.pdb|B|all |0|10|0|4|2,3|2||
1352 |Rpb2.1 |green |1WCM.fasta|1WCM:B|1WCM.pdb|B|all |0|10|0|4|2,3|2||
1353 
1354  @endcode
1355 
1356  These are the fields you can enter:
1357  - `molecule_name`: Name of the molecule (chain). Serves as the parent
1358  hierarchy for this structure. Multiple copies of the same molecule
1359  can be created by appending a copy number after a period; if none is
1360  specified, a copy number of 0 is assumed (e.g. Rpb2.1 is the second copy
1361  of Rpb2 or Rpb2.0).
1362  - `color`: The color used in the output RMF file. Uses
1363  [Chimera names](https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/colortables.html),
1364  (e.g. "red"), or R,G,B values as three comma-separated floating point
1365  numbers from 0 to 1 (e.g. "1.0, 0.0, 0.0") or a 6-digit hex string
1366  starting with '#' (e.g. #ff0000).
1367  - `fasta_fn`: Name of FASTA file containing this component.
1368  - `fasta_id`: String found in FASTA sequence header line. The sequence read
1369  from the file is assumed to be a protein sequence. If it should instead
1370  be treated as RNA or DNA, add an ',RNA' or ',DNA' suffix. For example,
1371  a `fasta_id` of 'myseq,RNA' will read the sequence 'myseq' from the
1372  FASTA file and treat it as RNA.
1373  - `pdb_fn`: Name of PDB or mmCIF file with coordinates (if available).
1374  If left empty, will set up as BEADS (you can also specify "BEADS")
1375  Can also write "IDEAL_HELIX".
1376  - `chain`: Chain ID of this domain in the PDB file.
1377  - `residue_range`: Comma delimited pair defining range.
1378  Can leave empty or use 'all' for entire sequence from PDB file.
1379  The second item in the pair can be END to select the last residue in the
1380  PDB chain.
1381  - `pdb_offset`: Offset to sync PDB residue numbering with FASTA numbering.
1382  For example, an offset of -10 would match the first residue in the
1383  FASTA file (which is always numbered sequentially starting from 1) with
1384  residue 11 in the PDB file.
1385  - `bead_size`: The size (in residues) of beads used to model areas not
1386  covered by PDB coordinates. These will be built automatically.
1387  - `em_residues`: The number of Gaussians used to model the electron
1388  density of this domain. Set to zero if no EM fitting will be done.
1389  The GMM files will be written to <gmm_dir>/<component_name>_<em_res>.txt
1390  (and .mrc)
1391  - `rigid_body`: Leave empty if this object is not in a rigid body.
1392  Otherwise, this is a number corresponding to the rigid body containing
1393  this object. The number itself is just used for grouping things.
1394  - `super_rigid_body`: Add a mover that periodically moves several related
1395  domains as if they were a single large rigid body. In between such moves,
1396  the domains move independently. This can improve sampling.
1397  - `chain_of_super_rigid_bodies`: Do super-rigid-body moves (as above)
1398  for all adjacent pairs of domains in the chain.
1399  - `flags` additional flags for advanced options
1400  @note All filenames are relative to the paths specified in the constructor.
1401 
1402  """ # noqa: E501
1403  def __init__(self, topology_file, pdb_dir='./', fasta_dir='./',
1404  gmm_dir='./'):
1405  """Constructor.
1406  @param topology_file Pipe-delimited file specifying the topology
1407  @param pdb_dir Relative path to the pdb directory
1408  @param fasta_dir Relative path to the fasta directory
1409  @param gmm_dir Relative path to the GMM directory
1410  """
1411  self.topology_file = topology_file
1412  # key=molname, value=TempMolecule
1413  self.molecules = IMP.pmi.tools.OrderedDict()
1414  self.pdb_dir = pdb_dir
1415  self.fasta_dir = fasta_dir
1416  self.gmm_dir = gmm_dir
1417  self._components = self.read(topology_file)
1418 
1419  def write_topology_file(self, outfile):
1420  with open(outfile, "w") as f:
1421  f.write("|molecule_name|color|fasta_fn|fasta_id|pdb_fn|chain|"
1422  "residue_range|pdb_offset|bead_size|"
1423  "em_residues_per_gaussian|rigid_body|super_rigid_body|"
1424  "chain_of_super_rigid_bodies|\n")
1425  for c in self._components:
1426  output = c.get_str()+'\n'
1427  f.write(output)
1428  return outfile
1429 
1430  def get_components(self, topology_list="all"):
1431  """ Return list of ComponentTopologies for selected components
1432  @param topology_list List of indices to return"""
1433  if topology_list == "all":
1434  topologies = self._components
1435  else:
1436  topologies = []
1437  for i in topology_list:
1438  topologies.append(self._components[i])
1439  return topologies
1440 
1441  def get_molecules(self):
1442  return self.molecules
1443 
1444  def read(self, topology_file, append=False):
1445  """Read system components from topology file. append=False will erase
1446  current topology and overwrite with new
1447  """
1448  is_topology = False
1449  is_directories = False
1450  linenum = 1
1451  if not append:
1452  self._components = []
1453 
1454  with open(topology_file) as infile:
1455  for line in infile:
1456  if line.lstrip() == "" or line[0] == "#":
1457  continue
1458  elif line.split('|')[1].strip() in ("molecule_name"):
1459  is_topology = True
1460  is_directories = False
1461  old_format = False
1462  continue
1463  elif line.split('|')[1] == "component_name":
1464  is_topology = True
1466  "Old-style topology format (using "
1467  "|component_name|) is deprecated. Please switch to "
1468  "the new-style format (using |molecule_name|)\n")
1469  old_format = True
1470  is_directories = False
1471  continue
1472  elif line.split('|')[1] == "directories":
1474  "Setting directories in the topology file "
1475  "is deprecated. Please do so through the "
1476  "TopologyReader constructor. Note that new-style "
1477  "paths are relative to the current working "
1478  "directory, not the topology file.\n")
1479  is_directories = True
1480  elif is_directories:
1481  fields = line.split('|')
1482  setattr(self, fields[1],
1483  IMP.get_relative_path(topology_file, fields[2]))
1484  if is_topology:
1485  new_component = self._parse_line(line, linenum, old_format)
1486  self._components.append(new_component)
1487  linenum += 1
1488  return self._components
1489 
1490  def _parse_line(self, component_line, linenum, old_format):
1491  """Parse a line of topology values and matches them to their key.
1492  Checks each value for correct syntax
1493  Returns a list of Component objects
1494  fields:
1495  """
1496  c = _Component()
1497  values = [s.strip() for s in component_line.split('|')]
1498  errors = []
1499 
1500  # Required fields
1501  if old_format:
1502  c.molname = values[1]
1503  c.copyname = ''
1504  c._domain_name = values[2]
1505  c.color = 'blue'
1506  else:
1507  names = values[1].split('.')
1508  if len(names) == 1:
1509  c.molname = names[0]
1510  c.copyname = ''
1511  elif len(names) == 2:
1512  c.molname = names[0]
1513  c.copyname = names[1]
1514  else:
1515  c.molname = names[0]
1516  c.copyname = names[1]
1517  errors.append("Molecule name should be <molecule.copyID>")
1518  errors.append("For component %s line %d "
1519  % (c.molname, linenum))
1520  c._domain_name = c.molname + '.' + c.copyname
1521  colorfields = values[2].split(',')
1522  if len(colorfields) == 3:
1523  c.color = [float(x) for x in colorfields]
1524  if any([x > 1 for x in c.color]):
1525  c.color = [x/255 for x in c.color]
1526  else:
1527  c.color = values[2]
1528  c._orig_fasta_file = values[3]
1529  c.fasta_file = values[3]
1530  fasta_field = values[4].split(",")
1531  c.fasta_id = fasta_field[0]
1532  c.fasta_flag = None
1533  if len(fasta_field) > 1:
1534  c.fasta_flag = fasta_field[1]
1535  c._orig_pdb_input = values[5]
1536  pdb_input = values[5]
1537  tmp_chain = values[6]
1538  rr = values[7]
1539  offset = values[8]
1540  bead_size = values[9]
1541  emg = values[10]
1542  if old_format:
1543  rbs = srbs = csrbs = ''
1544  else:
1545  rbs = values[11]
1546  srbs = values[12]
1547  csrbs = values[13]
1548 
1549  if c.molname not in self.molecules:
1550  self.molecules[c.molname] = _TempMolecule(c)
1551  else:
1552  # COPY OR DOMAIN
1553  c._orig_fasta_file = \
1554  self.molecules[c.molname].orig_component._orig_fasta_file
1555  c.fasta_id = self.molecules[c.molname].orig_component.fasta_id
1556  self.molecules[c.molname].add_component(c, c.copyname)
1557 
1558  # now cleanup input
1559  c.fasta_file = os.path.join(self.fasta_dir, c._orig_fasta_file)
1560  if pdb_input == "":
1561  errors.append("PDB must have BEADS, IDEAL_HELIX, or filename")
1562  errors.append("For component %s line %d is not correct"
1563  "|%s| was given." % (c.molname, linenum, pdb_input))
1564  elif pdb_input in ("IDEAL_HELIX", "BEADS"):
1565  c.pdb_file = pdb_input
1566  else:
1567  c.pdb_file = os.path.join(self.pdb_dir, pdb_input)
1568 
1569  # PDB chain must be one or two characters
1570  if len(tmp_chain) == 1 or len(tmp_chain) == 2:
1571  c.chain = tmp_chain
1572  else:
1573  errors.append(
1574  "PDB Chain identifier must be one or two characters.")
1575  errors.append("For component %s line %d is not correct"
1576  "|%s| was given."
1577  % (c.molname, linenum, tmp_chain))
1578 
1579  # Optional fields
1580  # Residue Range
1581  if rr.strip() == 'all' or str(rr) == "":
1582  c.residue_range = None
1583  elif (len(rr.split(',')) == 2 and self._is_int(rr.split(',')[0]) and
1584  (self._is_int(rr.split(',')[1]) or rr.split(',')[1] == 'END')):
1585  # Make sure that is residue range is given, there are only
1586  # two values and they are integers
1587  c.residue_range = (int(rr.split(',')[0]), rr.split(',')[1])
1588  if c.residue_range[1] != 'END':
1589  c.residue_range = (c.residue_range[0], int(c.residue_range[1]))
1590  # Old format used -1 for the last residue
1591  if old_format and c.residue_range[1] == -1:
1592  c.residue_range = (c.residue_range[0], 'END')
1593  else:
1594  errors.append("Residue Range format for component %s line %d is "
1595  "not correct" % (c.molname, linenum))
1596  errors.append(
1597  "Correct syntax is two comma separated integers: "
1598  "|start_res, end_res|. end_res can also be END to select the "
1599  "last residue in the chain. |%s| was given." % rr)
1600  errors.append("To select all residues, indicate |\"all\"|")
1601 
1602  # PDB Offset
1603  if self._is_int(offset):
1604  c.pdb_offset = int(offset)
1605  elif len(offset) == 0:
1606  c.pdb_offset = 0
1607  else:
1608  errors.append("PDB Offset format for component %s line %d is "
1609  "not correct" % (c.molname, linenum))
1610  errors.append("The value must be a single integer. |%s| was given."
1611  % offset)
1612 
1613  # Bead Size
1614  if self._is_int(bead_size):
1615  c.bead_size = int(bead_size)
1616  elif len(bead_size) == 0:
1617  c.bead_size = 0
1618  else:
1619  errors.append("Bead Size format for component %s line %d is "
1620  "not correct" % (c.molname, linenum))
1621  errors.append("The value must be a single integer. |%s| was given."
1622  % bead_size)
1623 
1624  # EM Residues Per Gaussian
1625  if self._is_int(emg):
1626  if int(emg) > 0:
1627  c.density_prefix = os.path.join(self.gmm_dir,
1628  c.get_unique_name())
1629  c.gmm_file = c.density_prefix + '.txt'
1630  c.mrc_file = c.density_prefix + '.gmm'
1631 
1632  c.em_residues_per_gaussian = int(emg)
1633  else:
1634  c.em_residues_per_gaussian = 0
1635  elif len(emg) == 0:
1636  c.em_residues_per_gaussian = 0
1637  else:
1638  errors.append("em_residues_per_gaussian format for component "
1639  "%s line %d is not correct" % (c.molname, linenum))
1640  errors.append("The value must be a single integer. |%s| was given."
1641  % emg)
1642 
1643  # rigid bodies
1644  if len(rbs) > 0:
1645  if not self._is_int(rbs):
1646  errors.append(
1647  "rigid bodies format for component "
1648  "%s line %d is not correct" % (c.molname, linenum))
1649  errors.append("Each RB must be a single integer, or empty. "
1650  "|%s| was given." % rbs)
1651  c.rigid_body = int(rbs)
1652 
1653  # super rigid bodies
1654  if len(srbs) > 0:
1655  srbs = srbs.split(',')
1656  for i in srbs:
1657  if not self._is_int(i):
1658  errors.append(
1659  "super rigid bodies format for component "
1660  "%s line %d is not correct" % (c.molname, linenum))
1661  errors.append(
1662  "Each SRB must be a single integer. |%s| was given."
1663  % srbs)
1664  c.super_rigid_bodies = srbs
1665 
1666  # chain of super rigid bodies
1667  if len(csrbs) > 0:
1668  if not self._is_int(csrbs):
1669  errors.append(
1670  "em_residues_per_gaussian format for component "
1671  "%s line %d is not correct" % (c.molname, linenum))
1672  errors.append(
1673  "Each CSRB must be a single integer. |%s| was given."
1674  % csrbs)
1675  c.chain_of_super_rigid_bodies = csrbs
1676 
1677  # done
1678  if errors:
1679  raise ValueError("Fix Topology File syntax errors and rerun: "
1680  + "\n".join(errors))
1681  else:
1682  return c
1683 
1684  def set_gmm_dir(self, gmm_dir):
1685  """Change the GMM dir"""
1686  self.gmm_dir = gmm_dir
1687  for c in self._components:
1688  c.gmm_file = os.path.join(self.gmm_dir,
1689  c.get_unique_name() + ".txt")
1690  c.mrc_file = os.path.join(self.gmm_dir,
1691  c.get_unique_name() + ".mrc")
1692  print('new gmm', c.gmm_file)
1693 
1694  def set_pdb_dir(self, pdb_dir):
1695  """Change the PDB dir"""
1696  self.pdb_dir = pdb_dir
1697  for c in self._components:
1698  if c._orig_pdb_input not in ("", "None", "IDEAL_HELIX", "BEADS"):
1699  c.pdb_file = os.path.join(self.pdb_dir, c._orig_pdb_input)
1700 
1701  def set_fasta_dir(self, fasta_dir):
1702  """Change the FASTA dir"""
1703  self.fasta_dir = fasta_dir
1704  for c in self._components:
1705  c.fasta_file = os.path.join(self.fasta_dir, c._orig_fasta_file)
1706 
1707  def _is_int(self, s):
1708  # is this string an integer?
1709  try:
1710  float(s)
1711  return float(s).is_integer()
1712  except ValueError:
1713  return False
1714 
1715  def get_rigid_bodies(self):
1716  """Return list of lists of rigid bodies (as domain name)"""
1717  rbl = defaultdict(list)
1718  for c in self._components:
1719  if c.rigid_body:
1720  rbl[c.rigid_body].append(c.get_unique_name())
1721  return rbl.values()
1722 
1724  """Return list of lists of super rigid bodies (as domain name)"""
1725  rbl = defaultdict(list)
1726  for c in self._components:
1727  for rbnum in c.super_rigid_bodies:
1728  rbl[rbnum].append(c.get_unique_name())
1729  return rbl.values()
1730 
1732  "Return list of lists of chains of super rigid bodies (as domain name)"
1733  rbl = defaultdict(list)
1734  for c in self._components:
1735  for rbnum in c.chain_of_super_rigid_bodies:
1736  rbl[rbnum].append(c.get_unique_name())
1737  return rbl.values()
1738 
1739 
1740 class _TempMolecule(object):
1741  """Store the Components and any requests for copies"""
1742  def __init__(self, init_c):
1743  self.molname = init_c.molname
1744  self.domains = IMP.pmi.tools.OrderedDefaultDict(list)
1745  self.add_component(init_c, init_c.copyname)
1746  self.orig_copyname = init_c.copyname
1747  self.orig_component = self.domains[init_c.copyname][0]
1748 
1749  def add_component(self, component, copy_id):
1750  self.domains[copy_id].append(component)
1751  component.domainnum = len(self.domains[copy_id])-1
1752 
1753  def __repr__(self):
1754  return ','.join('%s:%i'
1755  % (k, len(self.domains[k])) for k in self.domains)
1756 
1757 
1758 class _Component(object):
1759  """Stores the components required to build a standard IMP hierarchy
1760  using IMP.pmi.BuildModel()
1761  """
1762  def __init__(self):
1763  self.molname = None
1764  self.copyname = None
1765  self.domainnum = 0
1766  self.fasta_file = None
1767  self._orig_fasta_file = None
1768  self.fasta_id = None
1769  self.fasta_flag = None
1770  self.pdb_file = None
1771  self._orig_pdb_input = None
1772  self.chain = None
1773  self.residue_range = None
1774  self.pdb_offset = 0
1775  self.bead_size = 10
1776  self.em_residues_per_gaussian = 0
1777  self.gmm_file = ''
1778  self.mrc_file = ''
1779  self.density_prefix = ''
1780  self.color = 0.1
1781  self.rigid_body = None
1782  self.super_rigid_bodies = []
1783  self.chain_of_super_rigid_bodies = []
1784 
1785  def _l2s(self, rng):
1786  return ",".join("%s" % x for x in rng)
1787 
1788  def __repr__(self):
1789  return self.get_str()
1790 
1791  def get_unique_name(self):
1792  return "%s.%s.%i" % (self.molname, self.copyname, self.domainnum)
1793 
1794  def get_str(self):
1795  res_range = self.residue_range
1796  if self.residue_range is None:
1797  res_range = []
1798  name = self.molname
1799  if self.copyname != '':
1800  name += '.' + self.copyname
1801  if self.chain is None:
1802  chain = ' '
1803  else:
1804  chain = self.chain
1805  color = self.color
1806  if isinstance(color, list):
1807  color = ','.join([str(x) for x in color])
1808  fastaid = self.fasta_id
1809  if self.fasta_flag:
1810  fastaid += "," + self.fasta_flag
1811  a = '|' + '|'.join([name, color, self._orig_fasta_file, fastaid,
1812  self._orig_pdb_input, chain,
1813  self._l2s(list(res_range)),
1814  str(self.pdb_offset),
1815  str(self.bead_size),
1816  str(self.em_residues_per_gaussian),
1817  str(self.rigid_body) if self.rigid_body else '',
1818  self._l2s(self.super_rigid_bodies),
1819  self._l2s(self.chain_of_super_rigid_bodies)]) + '|'
1820  return a
1821 
1822 
1824  '''Extends the functionality of IMP.atom.Molecule'''
1825 
1826  def __init__(self, hierarchy):
1827  IMP.atom.Molecule.__init__(self, hierarchy)
1828 
1829  def get_state_index(self):
1830  state = self.get_parent()
1831  return IMP.atom.State(state).get_state_index()
1832 
1833  def get_copy_index(self):
1834  return IMP.atom.Copy(self).get_copy_index()
1835 
1836  def get_extended_name(self):
1837  return self.get_name() + "." + \
1838  str(self.get_copy_index()) + \
1839  "." + str(self.get_state_index())
1840 
1841  def get_sequence(self):
1842  return IMP.atom.Chain(self).get_sequence()
1843 
1844  def get_residue_indexes(self):
1846 
1847  def get_residue_segments(self):
1848  return IMP.pmi.tools.Segments(self.get_residue_indexes())
1849 
1850  def get_chain_id(self):
1851  return IMP.atom.Chain(self).get_id()
1852 
1853  def __repr__(self):
1854  s = 'PMIMoleculeHierarchy '
1855  s += self.get_name()
1856  s += " " + "Copy " + str(IMP.atom.Copy(self).get_copy_index())
1857  s += " " + "State " + str(self.get_state_index())
1858  s += " " + "N residues " + str(len(self.get_sequence()))
1859  return s
def build
Build all molecules (automatically makes clones)
Add mass to a particle.
Definition: Mass.h:23
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: Residue.h:158
def select_at_all_resolutions
Perform selection using the usual keywords but return ALL resolutions (BEADS and GAUSSIANS).
Definition: tools.py:1078
Hierarchy get_parent() const
Get the parent particle.
def get_atomic_residues
Return a set of TempResidues that have associated structure coordinates.
A decorator to associate a particle with a part of a protein/DNA/RNA.
Definition: Fragment.h:20
Extends the functionality of IMP.atom.Molecule.
def get_residues
Return all modeled TempResidues as a set.
std::string get_unique_name(std::string templ)
Return a unique name produced from the string.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: atom/Atom.h:245
static Atom setup_particle(Model *m, ParticleIndex pi, Atom other)
Definition: atom/Atom.h:246
def build
Build all states.
def __init__
Read a FASTA file and extract all the requested sequences.
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
def __init__
Read a PDB file and return all sequences for each contiguous fragment.
def get_states
Get a list of all State objects in this system.
def fasta_pdb_alignments
This function computes and prints the alignment between the fasta file and the pdb sequence...
def get_ideal_helices
Returns list of OrderedSets with requested ideal helices.
Miscellaneous utilities.
Definition: tools.py:1
def get_number_of_copies
Get the number of copies of the given molecule (by name)
def get_chains_of_super_rigid_bodies
Return list of lists of chains of super rigid bodies (as domain name)
def __init__
The user should not call this directly; instead call State.create_molecule()
void handle_use_deprecated(std::string message)
Break in this method in gdb to find deprecated uses at runtime.
def residue_range
Get residue range from a to b, inclusive.
def get_molecule
Access a molecule by name and copy number.
def __init__
Define a new state.
def add_representation
Set the representation for some residues.
static State setup_particle(Model *m, ParticleIndex pi, unsigned int index)
Definition: State.h:39
This class stores integers in ordered compact lists eg: [[1,2,3],[6,7,8]] the methods help splitting ...
Definition: tools.py:623
The type of an atom.
def create_molecule
Create a new Molecule within this State.
def build
Create all parts of the IMP hierarchy including Atoms, Residues, and Fragments/Representations and...
static Residue setup_particle(Model *m, ParticleIndex pi, ResidueType t, int index, int insertion_code)
Definition: Residue.h:160
char get_one_letter_code(ResidueType c)
Get the 1-letter amino acid code from the residue type.
def get_non_atomic_residues
Return a set of TempResidues that don't have associated structure coordinates.
Represent the root node of the global IMP.atom.Hierarchy.
def get_name
Return this Molecule name.
def get_hierarchy
Return the IMP Hierarchy corresponding to this Molecule.
def get_components
Return list of ComponentTopologies for selected components.
def get_hierarchy
Get the IMP.atom.Hierarchy node for this state.
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:86
Stores a named protein chain.
Warning related to handling of structures.
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Fragment.h:46
A decorator for keeping track of copies of a molecule.
Definition: Copy.h:28
Select all non-alternative ATOM records.
Definition: pdb.h:128
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def set_fasta_dir
Change the FASTA dir.
def get_hierarchy
Return the top-level IMP.atom.Hierarchy node for this system.
The standard decorator for manipulating molecular structures.
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
Data structure for reading and storing sequence data from PDBs.
A decorator for a particle representing an atom.
Definition: atom/Atom.h:238
std::string get_relative_path(std::string base, std::string relative)
Return a path to a file relative to another file.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
def create_clone
Create a Molecule clone (automatically builds same structure and representation)
def add_structure
Read a structure and store the coordinates.
int get_state_index(Hierarchy h)
Walk up the hierarchy to find the current state.
def add_protocol_output
Capture details of the modeling protocol.
def get_molecules
Return a dictionary where key is molecule name and value is a list of all copies of that molecule in ...
static Copy setup_particle(Model *m, ParticleIndex pi, Int number)
Create a decorator for the numberth copy.
Definition: Copy.h:42
def read
Read system components from topology file.
def get_state
Return the State containing this Molecule.
A decorator for a residue.
Definition: Residue.h:137
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Automatically setup System and Degrees of Freedom with a formatted text file.
The general base class for IMP exceptions.
Definition: exception.h:48
def get_rigid_bodies
Return list of lists of rigid bodies (as domain name)
Associate an integer "state" index with a hierarchy node.
Definition: State.h:27
VectorD< 3 > Vector3D
Definition: VectorD.h:408
Residue get_residue(Atom d, bool nothrow=false)
Return the Residue containing this atom.
Mapping between FASTA one-letter codes and residue types.
Definition: alphabets.py:1
Class to handle individual particles of a Model object.
Definition: Particle.h:43
Stores a list of Molecules all with the same State index.
def get_represented
Return set of TempResidues that have representation.
Store info for a chain of a protein.
Definition: Chain.h:61
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
Python classes to represent, score, sample and analyze models.
A dictionary-like wrapper for reading and storing sequence data.
def create_copy
Create a new Molecule with the same name and sequence but a higher copy number.
Functionality for loading, creating, manipulating and scoring atomic structures.
std::string get_chain_id(Hierarchy h)
Walk up the hierarchy to determine the chain id.
def get_particles_at_all_resolutions
Helpful utility for getting particles at all resolutions from this molecule.
static Chain setup_particle(Model *m, ParticleIndex pi, std::string id)
Definition: Chain.h:83
A decorator for a molecule.
Definition: Molecule.h:24
Select hierarchy particles identified by the biological name.
Definition: Selection.h:70
def get_number_of_states
Returns the total number of states generated.
def get_super_rigid_bodies
Return list of lists of super rigid bodies (as domain name)
def get_residue_indexes
Retrieve the residue indexes for the given particle.
Definition: tools.py:512
Warning for probably incorrect input parameters.
Temporarily stores residue information, even without structure available.
def create_state
Makes and returns a new IMP.pmi.topology.State in this system.
Store objects in order they were added, but with default type.
Definition: tools.py:832