IMP logo
IMP Reference Guide  develop.63b38c487d,2024/12/26
The Integrative Modeling Platform
pmi/topology/__init__.py
1 """@namespace IMP.pmi.topology
2  Set of Python classes to create a multi-state, multi-resolution IMP hierarchy.
3 * Start by creating a System with
4  `model = IMP.Model(); s = IMP.pmi.topology.System(model)`. The System
5  will store all the states.
6 * Then call System.create_state(). You can easily create a multistate system
7  by calling this function multiple times.
8 * For each State, call State.create_molecule() to add a Molecule (a uniquely
9  named polymer). This function returns the Molecule object which can be
10  passed to various PMI functions.
11 * Some useful functions to help you set up your Molecules:
12  * Access the sequence residues with slicing (Molecule[a:b]) or functions
13  like Molecule.get_atomic_residues() and Molecule.get_non_atomic_residues().
14  These functions all return Python sets for easy set arithmetic using
15  & (and), | (or), - (difference)
16  * Molecule.add_structure() to add structural information from an mmCIF
17  or PDB file.
18  * Molecule.add_representation() to create a representation unit - here you
19  can choose bead resolutions as well as alternate representations like
20  densities or ideal helices.
21  * Molecule.create_clone() lets you set up a molecule with identical
22  representations, just a different chain ID. Use Molecule.create_copy()
23  if you want a molecule with the same sequence but that allows custom
24  representations.
25 * Once data has been added and representations chosen, call System.build()
26  to create a canonical IMP hierarchy.
27 * Following hierarchy construction, setup rigid bodies, flexible beads, etc
28  in IMP::pmi::dof.
29 * Check your representation with a nice printout:
30  IMP::atom::show_with_representation()
31 
32 See a [comprehensive example](https://integrativemodeling.org/nightly/doc/ref/pmi_2multiscale_8py-example.html) for using these classes.
33 
34 Alternatively one can construct the entire topology and degrees of freedom
35 via formatted text file with TopologyReader and
36 IMP::pmi::macros::BuildSystem(). This is used in the
37 [PMI tutorial](@ref rnapolii_stalk). Note that this only allows a limited
38 set of the full options available to PMI users
39 (rigid bodies only, fixed resolutions).
40 """ # noqa: E501
41 
42 import IMP
43 import IMP.atom
44 import IMP.algebra
45 import IMP.pmi
46 import IMP.pmi.tools
47 import IMP.pmi.alphabets
48 import os
49 import re
50 from collections import defaultdict, namedtuple
51 from . import system_tools
52 from bisect import bisect_left
53 from math import pi, cos, sin
54 from operator import itemgetter
55 import weakref
56 import warnings
57 
58 
59 def _build_ideal_helix(model, residues, coord_finder):
60  """Creates an ideal helix from the specified residue range
61  Residues MUST be contiguous.
62  This function actually adds them to the TempResidue hierarchy
63  """
64  created_hiers = []
65 
66  # this function creates a CAlpha helix structure (which can be used
67  # for coarsening)
68  prev_idx = -9999
69  for n, tempres in enumerate(residues):
70  if tempres.get_has_structure():
71  raise ValueError("You tried to build ideal_helix for a residue "
72  "that already has structure: %s" % tempres)
73  if n > 0 and tempres.get_index() != prev_idx + 1:
74  raise ValueError(
75  "Passed non-contiguous segment to "
76  "build_ideal_helix for %s" % tempres.get_molecule())
77 
78  # New residue particle will replace the TempResidue's existing
79  # (empty) hierarchy
80  rp = IMP.Particle(model)
81  rp.set_name("Residue_%i" % tempres.get_index())
82 
83  # Copy the original residue type and index
84  this_res = IMP.atom.Residue.setup_particle(rp, tempres.get_hierarchy())
85 
86  # Create the CAlpha
87  ap = IMP.Particle(model)
89  x = 2.3 * cos(n * 2 * pi / 3.6)
90  y = 2.3 * sin(n * 2 * pi / 3.6)
91  z = 6.2 / 3.6 / 2 * n * 2 * pi / 3.6
92  d.set_coordinates(IMP.algebra.Vector3D(x, y, z))
93  d.set_radius(1.0)
94  # Decorating as Atom also decorates as Mass
95  a = IMP.atom.Atom.setup_particle(ap, IMP.atom.AT_CA)
96  IMP.atom.Mass(ap).set_mass(110.0)
97  this_res.add_child(a)
98 
99  # Add this structure to the TempResidue
100  tempres.set_structure(this_res)
101  created_hiers.append(this_res)
102  prev_idx = tempres.get_index()
103  # the coord finder is for placing beads (later)
104  coord_finder.add_residues(created_hiers)
105 
106 
107 class _SystemBase:
108  """The base class for System, State and Molecule
109  classes. It contains shared functions in common to these classes
110  """
111 
112  def __init__(self, model=None):
113  if model is None:
114  self.model = IMP.Model()
115  else:
116  self.model = model
117 
118  def _create_hierarchy(self):
119  """create a new hierarchy"""
120  tmp_part = IMP.Particle(self.model)
121  return IMP.atom.Hierarchy.setup_particle(tmp_part)
122 
123  def _create_child(self, parent_hierarchy):
124  """create a new hierarchy, set it as child of the input
125  one, and return it"""
126  child_hierarchy = self._create_hierarchy()
127  parent_hierarchy.add_child(child_hierarchy)
128  return child_hierarchy
129 
130  def build(self):
131  """Build the coordinates of the system.
132  Loop through stored(?) hierarchies and set up coordinates!"""
133  pass
134 
135 
136 class _OurWeakRef:
137  """A simple wrapper around weakref.ref which can be pickled.
138  Note that we throw the reference away at pickle time. It should
139  be able to be reconstructed from System._all_systems anyway."""
140 
141  def __init__(self, system):
142  self._ref = weakref.ref(system)
143 
144  def __call__(self):
145  if hasattr(self, '_ref'):
146  return self._ref()
147 
148  def __getstate__(self):
149  return None
150 
151 
152 class System(_SystemBase):
153  """Represent the root node of the global IMP.atom.Hierarchy."""
154 
155  _all_systems = weakref.WeakSet()
156 
157  def __init__(self, model=None, name="System"):
158  """Constructor.
159 
160  @param model The IMP::Model in which to construct this system.
161  @param name The name of the top-level hierarchy node.
162  """
163  _SystemBase.__init__(self, model)
164  self._number_of_states = 0
165  self._protocol_output = []
166  self.states = []
167  self.built = False
168 
169  System._all_systems.add(self)
170 
171  # the root hierarchy node
172  self.hier = self._create_hierarchy()
173  self.hier.set_name(name)
174  self.hier._pmi2_system = _OurWeakRef(self)
175 
176  def get_states(self):
177  """Get a list of all State objects in this system"""
178  return self.states
179 
180  def create_state(self):
181  """Makes and returns a new IMP.pmi.topology.State in this system"""
182  self._number_of_states += 1
183  state = State(self, self._number_of_states-1)
184  self.states.append(state)
185  return state
186 
187  def __repr__(self):
188  return self.hier.get_name()
189 
191  """Returns the total number of states generated"""
192  return self._number_of_states
193 
194  def get_hierarchy(self):
195  """Return the top-level IMP.atom.Hierarchy node for this system"""
196  return self.hier
197 
198  def build(self, **kwargs):
199  """Build all states"""
200  if not self.built:
201  for state in self.states:
202  state.build(**kwargs)
203  self.built = True
204  for po in self._protocol_output:
205  po.finalize_build()
206  return self.hier
207 
208  def add_protocol_output(self, p):
209  """Capture details of the modeling protocol.
210  @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
211  """
212  self._protocol_output.append(p)
213 # p._each_metadata.append(self._metadata)
214 # p._file_datasets.append(self._file_dataset)
215  for state in self.states:
216  state._add_protocol_output(p, self)
217 
218 
219 class State(_SystemBase):
220  """Stores a list of Molecules all with the same State index.
221  Also stores number of copies of each Molecule for easy selection.
222  """
223  def __init__(self, system, state_index):
224  """Define a new state
225  @param system the PMI System
226  @param state_index the index of the new state
227  @note It's expected that you will not use this constructor directly,
228  but rather create it with System.create_state()
229  """
230  self.model = system.get_hierarchy().get_model()
231  self.system = system
232  self.hier = self._create_child(system.get_hierarchy())
233  self.short_name = self.long_name = "State_" + str(state_index)
234  self.hier.set_name(self.short_name)
235  # key is molecule name. value are the molecule copies!
236  self.molecules = IMP.pmi.tools.OrderedDict()
237  IMP.atom.State.setup_particle(self.hier, state_index)
238  self.built = False
239  self._protocol_output = []
240  for p in system._protocol_output:
241  self._add_protocol_output(p, system)
242 
243  def __repr__(self):
244  return self.system.__repr__()+'.'+self.hier.get_name()
245 
246  def _add_protocol_output(self, p, system):
247  state = p._add_state(self)
248  self._protocol_output.append((p, state))
249  state.model = system.model
250  state.prot = self.hier
251 
252  def get_molecules(self):
253  """Return a dictionary where key is molecule name and value
254  is a list of all copies of that molecule in setup order"""
255  return self.molecules
256 
257  def get_molecule(self, name, copy_num=0):
258  """Access a molecule by name and copy number
259  @param name The molecule name used during setup
260  @param copy_num The copy number based on input order.
261  Default: 0. Set to 'all' to get all copies
262  """
263  if name not in self.molecules:
264  raise KeyError("Could not find molname %s" % name)
265  if copy_num == 'all':
266  return self.molecules[name]
267  else:
268  return self.molecules[name][copy_num]
269 
270  def create_molecule(self, name, sequence='', chain_id='',
271  alphabet=IMP.pmi.alphabets.amino_acid,
272  uniprot=None):
273  """Create a new Molecule within this State
274  @param name the name of the molecule (string);
275  it must not be already used
276  @param sequence sequence (string)
277  @param chain_id Chain ID to assign to this molecule
278  @param alphabet Mapping from FASTA codes to residue types
279  @param uniprot UniProt accession, if available
280  """
281  # check whether the molecule name is already assigned
282  if name in self.molecules:
283  raise ValueError('Cannot use a molecule name already used')
284 
285  # check for something that looks like a copy number
286  if re.search(r'\.\d+$', name):
287  warnings.warn(
288  "It is recommended not to end the molecule name with "
289  ".(number) as it may be confused with the copy number "
290  "(the copy number for new molecules is always 0, so to "
291  "select this molecule, use '%s.0'). Use create_clone() or "
292  "create_copy() instead if a copy of an existing molecule "
293  "is desired." % name, IMP.pmi.ParameterWarning)
294 
295  mol = Molecule(self, name, sequence, chain_id, copy_num=0,
296  alphabet=alphabet, uniprot=uniprot)
297  self.molecules[name] = [mol]
298  return mol
299 
300  def get_hierarchy(self):
301  """Get the IMP.atom.Hierarchy node for this state"""
302  return self.hier
303 
304  def get_number_of_copies(self, molname):
305  """Get the number of copies of the given molecule (by name)
306 
307  @param molname The name of the molecule
308  """
309  return len(self.molecules[molname])
310 
311  def _register_copy(self, molecule):
312  molname = molecule.get_hierarchy().get_name()
313  self.molecules[molname].append(molecule)
314 
315  def build(self, **kwargs):
316  """Build all molecules (automatically makes clones)"""
317  if not self.built:
318  for molname in self.molecules:
319  # We want to update ProtocolOutput in forward order so
320  # that, e.g. we get nice chain IDs in the mmCIF output,
321  # but we want to build the sequence in reverse order
322  for mol in self.molecules[molname]:
323  mol._build_protocol_output()
324  for mol in reversed(self.molecules[molname]):
325  mol.build(protocol_output=False, **kwargs)
326  for mol in self.molecules[molname]:
327  mol._finalize_build()
328  self.built = True
329  return self.hier
330 
331 
332 # Track residues read from PDB files
333 _PDBElement = namedtuple('PDBElement', ['offset', 'filename', 'chain_id'])
334 
335 
336 class _RepresentationHandler:
337  """Handle PMI representation and use it to populate that of any attached
338  ProtocolOutput objects"""
339  def __init__(self, name, pos, pdb_elements):
340  self.name = name
341  self.pos = pos
342  self.last_index = None
343  self.last_pdb_index = None
344  self.pdb_for_residue = {}
345  for residues, pdb in pdb_elements:
346  for r in residues:
347  self.pdb_for_residue[r.get_index()] = pdb
348 
349  def _get_pdb(self, h):
350  """Return a PDBElement if the given hierarchy was read from a
351  PDB file"""
353  rind = IMP.atom.Residue(h).get_index()
354  return self.pdb_for_residue.get(rind, None)
355 
356  def __call__(self, res):
357  """Handle a single residue"""
358  if len(self.pos) == 0:
359  return
360  h = res.hier
361  pi = h.get_particle_index()
362  # Do nothing if we already saw this hierarchy
363  if self.last_index is None or pi != self.last_index:
364  pdb = self._get_pdb(h)
365  self.last_index = pi
366  if pdb:
367  assert IMP.atom.Fragment.get_is_setup(h.get_parent())
368  frag = IMP.atom.Fragment(h.get_parent())
369  fragi = frag.get_particle_index()
370  # Do nothing if we already saw this PDB fragment
371  if self.last_pdb_index is not None \
372  and self.last_pdb_index == fragi:
373  return
374  self.last_pdb_index = fragi
375  indices = frag.get_residue_indexes()
376  for p, state in self.pos:
377  p.add_pdb_element(state, self.name,
378  indices[0], indices[-1], pdb.offset,
379  pdb.filename, pdb.chain_id, frag)
381  frag = IMP.atom.Fragment(h)
382  indices = frag.get_residue_indexes()
383  for p, state in self.pos:
384  p.add_bead_element(state, self.name,
385  indices[0], indices[-1], 1, h)
387  resind = IMP.atom.Residue(h).get_index()
388  for p, state in self.pos:
389  p.add_bead_element(state, self.name, resind, resind, 1, h)
390  else:
391  raise TypeError("Unhandled hierarchy %s" % str(h))
392 
393 
394 class Molecule(_SystemBase):
395  """Stores a named protein chain.
396  This class is constructed from within the State class.
397  It wraps an IMP.atom.Molecule and IMP.atom.Copy.
398  Structure is read using this class.
399  Resolutions and copies can be registered, but are only created
400  when build() is called.
401 
402  A Molecule acts like a simple Python list of residues, and can be indexed
403  by integer (starting at zero) or by string (starting at 1).
404  """
405 
406  def __init__(self, state, name, sequence, chain_id, copy_num,
407  mol_to_clone=None, alphabet=IMP.pmi.alphabets.amino_acid,
408  uniprot=None):
409  """The user should not call this directly; instead call
410  State.create_molecule()
411 
412  @param state The parent PMI State
413  @param name The name of the molecule (string)
414  @param sequence Sequence (string)
415  @param chain_id The chain of this molecule
416  @param copy_num Store the copy number
417  @param mol_to_clone The original molecule (for cloning ONLY)
418  @note It's expected that you will not use this constructor directly,
419  but rather create a Molecule with State.create_molecule()
420  """
421  # internal data storage
422  self.model = state.get_hierarchy().get_model()
423  self.state = state
424  self.sequence = sequence
425  self.built = False
426  self.mol_to_clone = mol_to_clone
427  self.alphabet = alphabet
428  self.representations = [] # list of stuff to build
429  self._pdb_elements = []
430  self.uniprot = uniprot
431  # residues with representation
432  self._represented = IMP.pmi.tools.OrderedSet()
433  # helps you place beads by storing structure
434  self.coord_finder = _FindCloseStructure()
435  # list of OrderedSets of tempresidues set to ideal helix
436  self._ideal_helices = []
437 
438  # create root node and set it as child to passed parent hierarchy
439  self.hier = self._create_child(self.state.get_hierarchy())
440  self.hier.set_name(name)
441  IMP.atom.Copy.setup_particle(self.hier, copy_num)
442  self._name_with_copy = "%s.%d" % (name, copy_num)
443  # store the sequence
444  self.chain = IMP.atom.Chain.setup_particle(self.hier, chain_id)
445  self.chain.set_sequence(self.sequence)
446  self.chain.set_chain_type(alphabet.get_chain_type())
447  if self.uniprot:
448  self.chain.set_uniprot_accession(self.uniprot)
449  # create TempResidues from the sequence (if passed)
450  self.residues = []
451  for ns, s in enumerate(sequence):
452  r = TempResidue(self, s, ns+1, ns, alphabet)
453  self.residues.append(r)
454 
455  def __repr__(self):
456  return self.state.__repr__() + '.' + self.get_name() + '.' + \
457  str(IMP.atom.Copy(self.hier).get_copy_index())
458 
459  def __getitem__(self, val):
460  if isinstance(val, int):
461  return self.residues[val]
462  elif isinstance(val, str):
463  return self.residues[int(val)-1]
464  elif isinstance(val, slice):
465  return IMP.pmi.tools.OrderedSet(self.residues[val])
466  else:
467  raise TypeError("Indexes must be int or str")
468 
469  def get_hierarchy(self):
470  """Return the IMP Hierarchy corresponding to this Molecule"""
471  return self.hier
472 
473  def get_name(self):
474  """Return this Molecule name"""
475  return self.hier.get_name()
476 
477  def get_state(self):
478  """Return the State containing this Molecule"""
479  return self.state
480 
481  def get_ideal_helices(self):
482  """Returns list of OrderedSets with requested ideal helices"""
483  return self._ideal_helices
484 
485  def residue_range(self, a, b, stride=1):
486  """Get residue range from a to b, inclusive.
487  Use integers to get 0-indexing, or strings to get PDB-indexing"""
488  if isinstance(a, int) and isinstance(b, int) \
489  and isinstance(stride, int):
490  return IMP.pmi.tools.OrderedSet(self.residues[a:b+1:stride])
491  elif isinstance(a, str) and isinstance(b, str) \
492  and isinstance(stride, int):
493  return IMP.pmi.tools.OrderedSet(
494  self.residues[int(a)-1:int(b):stride])
495  else:
496  raise TypeError("Range ends must be int or str. "
497  "Stride must be int.")
498 
499  def get_residues(self):
500  """Return all modeled TempResidues as a set"""
501  all_res = IMP.pmi.tools.OrderedSet(self.residues)
502  return all_res
503 
504  def get_represented(self):
505  """Return set of TempResidues that have representation"""
506  return self._represented
507 
509  """Return a set of TempResidues that have associated structure
510  coordinates"""
511  atomic_res = IMP.pmi.tools.OrderedSet()
512  for res in self.residues:
513  if res.get_has_structure():
514  atomic_res.add(res)
515  return atomic_res
516 
518  """Return a set of TempResidues that don't have associated
519  structure coordinates"""
520  non_atomic_res = IMP.pmi.tools.OrderedSet()
521  for res in self.residues:
522  if not res.get_has_structure():
523  non_atomic_res.add(res)
524  return non_atomic_res
525 
526  def create_copy(self, chain_id):
527  """Create a new Molecule with the same name and sequence but a
528  higher copy number. Returns the Molecule. No structure or
529  representation will be copied!
530 
531  @param chain_id Chain ID of the new molecule
532  """
533  mol = Molecule(
534  self.state, self.get_name(), self.sequence, chain_id,
535  copy_num=self.state.get_number_of_copies(self.get_name()))
536  self.state._register_copy(mol)
537  return mol
538 
539  def create_clone(self, chain_id):
540  """Create a Molecule clone (automatically builds same structure
541  and representation)
542 
543  @param chain_id If you want to set the chain ID of the copy
544  to something
545  @note You cannot add structure or representations to a clone!
546  """
547  mol = Molecule(
548  self.state, self.get_name(), self.sequence, chain_id,
549  copy_num=self.state.get_number_of_copies(self.get_name()),
550  mol_to_clone=self)
551  self.state._register_copy(mol)
552  return mol
553 
554  def add_structure(self, pdb_fn, chain_id, res_range=[],
555  offset=0, model_num=None, ca_only=False,
556  soft_check=False):
557  """Read a structure and store the coordinates.
558  @return the atomic residues (as a set)
559  @param pdb_fn The file to read (in PDB or mmCIF format)
560  @param chain_id Chain ID to read
561  @param res_range Add only a specific set of residues from the PDB
562  file. res_range[0] is the starting and res_range[1]
563  is the ending residue index.
564  @param offset Apply an offset to the residue indexes of the PDB
565  file. This number is added to the PDB sequence.
566  PMI uses 1-based FASTA numbering internally (the
567  first residue in the sequence is numbered 1, and
568  so on). If the PDB chain is not also numbered
569  starting from 1, apply an offset to make it match
570  the FASTA. For example, if the PDB is numbered
571  starting from -5, use an offset of 6 (-5 + 6 = 1).
572  @param model_num Read multi-model PDB and return that model
573  @param ca_only Only read the CA positions from the PDB file
574  @param soft_check If True, it only warns if there are sequence
575  mismatches between the PDB and the Molecule (FASTA)
576  sequence, and uses the sequence from the PDB.
577  If False (Default), it raises an error when there
578  are sequence mismatches.
579  @note If you are adding structure without a FASTA file, set soft_check
580  to True.
581  """
582  if self.mol_to_clone is not None:
583  raise ValueError('You cannot call add_structure() for a clone')
584 
585  self.pdb_fn = pdb_fn
586 
587  # get IMP.atom.Residues from the pdb file
588  rhs = system_tools.get_structure(self.model, pdb_fn, chain_id,
589  res_range, offset,
590  ca_only=ca_only)
591  self.coord_finder.add_residues(rhs)
592 
593  if len(self.residues) == 0:
594  warnings.warn(
595  "Substituting PDB residue type with FASTA residue type. "
596  "Potentially dangerous.", IMP.pmi.StructureWarning)
597 
598  # Store info for ProtocolOutput usage later
599  self._pdb_elements.append(
600  (rhs, _PDBElement(offset=offset, filename=pdb_fn,
601  chain_id=chain_id)))
602 
603  # load those into TempResidue object
604  # collect integer indexes of atomic residues to return
605  atomic_res = IMP.pmi.tools.OrderedSet()
606  for nrh, rh in enumerate(rhs):
607  pdb_idx = rh.get_index()
608  raw_idx = pdb_idx - 1
609 
610  # add ALA to fill in gaps
611  while len(self.residues) < pdb_idx:
612  r = TempResidue(self, 'A', len(self.residues)+1,
613  len(self.residues),
614  IMP.pmi.alphabets.amino_acid)
615  self.residues.append(r)
616  self.sequence += 'A'
617 
618  internal_res = self.residues[raw_idx]
619  if len(self.sequence) < raw_idx:
620  self.sequence += IMP.atom.get_one_letter_code(
621  rh.get_residue_type())
622  internal_res.set_structure(rh, soft_check)
623  atomic_res.add(internal_res)
624 
625  self.chain.set_sequence(self.sequence)
626  return atomic_res
627 
629  residues=None,
630  resolutions=[],
631  bead_extra_breaks=[],
632  bead_ca_centers=True,
633  bead_default_coord=[0, 0, 0],
634  density_residues_per_component=None,
635  density_prefix=None,
636  density_force_compute=False,
637  density_voxel_size=1.0,
638  setup_particles_as_densities=False,
639  ideal_helix=False,
640  color=None):
641  """Set the representation for some residues. Some options
642  (beads, ideal helix) operate along the backbone. Others (density
643  options) are volumetric.
644  Some of these you can combine e.g., beads+densities or helix+densities
645  See @ref pmi_resolution
646  @param residues Set of PMI TempResidues for adding the representation.
647  Can use Molecule slicing to get these, e.g. mol[a:b]+mol[c:d]
648  If None, will select all residues for this Molecule.
649  @param resolutions Resolutions for beads representations.
650  If structured, will average along backbone, breaking at
651  sequence breaks. If unstructured, will just create beads.
652  Pass an integer or list of integers
653  @param bead_extra_breaks Additional breakpoints for splitting beads.
654  The value can be the 0-ordered position, after which it'll
655  insert the break.
656  Alternatively pass PDB-style (1-ordered) indices as a string.
657  I.e., bead_extra_breaks=[5,25] is the same as ['6','26']
658  @param bead_ca_centers Set to True if you want the resolution=1 beads
659  to be at CA centers (otherwise will average atoms to get
660  center). Defaults to True.
661  @param bead_default_coord Advanced feature. Normally beads are placed
662  at the nearest structure. If no structure provided (like an
663  all bead molecule), the beads go here.
664  @param density_residues_per_component Create density (Gaussian
665  Mixture Model) for these residues. Must also supply
666  density_prefix
667  @param density_prefix Prefix (assuming '.txt') to read components
668  from or write to.
669  If exists, will read unless you set density_force_compute=True.
670  Will also write map (prefix+'.mrc').
671  Must also supply density_residues_per_component.
672  @param density_force_compute Set true to force overwrite density file.
673  @param density_voxel_size Advanced feature. Set larger if densities
674  taking too long to rasterize.
675  Set to 0 if you don't want to create the MRC file
676  @param setup_particles_as_densities Set to True if you want each
677  particle to be its own density.
678  Useful for all-atom models or flexible beads.
679  Mutually exclusive with density_ options
680  @param ideal_helix Create idealized helix structures for these
681  residues at resolution 1.
682  Any other resolutions passed will be coarsened from there.
683  Resolution 0 will not work; you may have to use MODELLER
684  to do that (for now).
685  @param color the color applied to the hierarchies generated.
686  Format options: tuple (r,g,b) with values 0 to 1;
687  float (from 0 to 1, a map from Blue to Green to Red);
688  a [Chimera name](https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/colortables.html);
689  a hex RGB string (e.g. "#ff0000");
690  an IMP.display.Color object
691  @note You cannot call add_representation multiple times for the
692  same residues.
693  """ # noqa: E501
694 
695  # can't customize clones
696  if self.mol_to_clone is not None:
697  raise ValueError(
698  'You cannot call add_representation() for a clone.'
699  ' Maybe use a copy instead.')
700 
701  # format input
702  if residues is None:
703  res = IMP.pmi.tools.OrderedSet(self.residues)
704  elif residues == self:
705  res = IMP.pmi.tools.OrderedSet(self.residues)
706  elif type(residues) is IMP.pmi.topology.TempResidue:
707  res = IMP.pmi.tools.OrderedSet([residues])
708  elif hasattr(residues, '__iter__'):
709  if len(residues) == 0:
710  raise Exception(
711  'You passed an empty set to add_representation')
712  if type(residues) is IMP.pmi.tools.OrderedSet \
713  and type(next(iter(residues))) is TempResidue:
714  res = residues
715  elif (type(residues) is set
716  and type(next(iter(residues))) is TempResidue):
717  res = IMP.pmi.tools.OrderedSet(residues)
718  elif type(residues) is list and type(residues[0]) is TempResidue:
719  res = IMP.pmi.tools.OrderedSet(residues)
720  else:
721  raise Exception("You passed an iterable of something other "
722  "than TempResidue", res)
723  else:
724  raise Exception("add_representation: you must pass a set of "
725  "residues or nothing(=all residues)")
726 
727  # check that each residue has not been represented yet
728  ov = res & self._represented
729  if ov:
730  raise Exception('You have already added representation for ' +
731  self.get_hierarchy().get_name() + ': ' +
732  ov.__repr__())
733  self._represented |= res
734 
735  # check you aren't creating multiple resolutions without structure
736  if not hasattr(resolutions, '__iter__'):
737  if type(resolutions) is int:
738  resolutions = [resolutions]
739  else:
740  raise Exception("you tried to pass resolutions that are not "
741  "int or list-of-int")
742  if len(resolutions) > 1 and not ideal_helix:
743  for r in res:
744  if not r.get_has_structure():
745  raise Exception(
746  'You are creating multiple resolutions for '
747  'unstructured regions. This will have unexpected '
748  'results.')
749 
750  # check density info is consistent
751  if density_residues_per_component or density_prefix:
752  if not density_residues_per_component and density_prefix:
753  raise Exception(
754  'If requesting density, must provide '
755  'density_residues_per_component AND density_prefix')
756  if density_residues_per_component and setup_particles_as_densities:
757  raise Exception(
758  'Cannot create both volumetric density '
759  '(density_residues_per_component) AND '
760  'individual densities (setup_particles_as_densities) '
761  'in the same representation')
762  if len(resolutions) > 1 and setup_particles_as_densities:
763  raise Exception(
764  'You have multiple bead resolutions but are attempting to '
765  'set them all up as individual Densities. '
766  'This could have unexpected results.')
767 
768  # check helix not accompanied by other resolutions
769  # (densities OK though!)
770  if ideal_helix:
771  if 0 in resolutions:
772  raise Exception(
773  "For ideal helices, cannot build resolution 0: "
774  "you have to do that in MODELLER")
775  if 1 not in resolutions:
776  resolutions = [1] + list(resolutions)
777  self._ideal_helices.append(res)
778 
779  # check residues are all part of this molecule:
780  for r in res:
781  if r.get_molecule() != self:
782  raise Exception(
783  'You are adding residues from a different molecule to',
784  self.__repr__())
785 
786  # unify formatting for extra breaks
787  breaks = []
788  for b in bead_extra_breaks:
789  if isinstance(b, str):
790  breaks.append(int(b)-1)
791  else:
792  breaks.append(b)
793  # store the representation group
794  self.representations.append(_Representation(
795  res, resolutions, breaks, bead_ca_centers, bead_default_coord,
796  density_residues_per_component, density_prefix,
797  density_force_compute, density_voxel_size,
798  setup_particles_as_densities, ideal_helix, color))
799 
800  def _all_protocol_output(self):
801  return self.state._protocol_output
802 
803  def _build_protocol_output(self):
804  """Add molecule name and sequence to any ProtocolOutput objects"""
805  if not self.built:
806  name = self.hier.get_name()
807  for po, state in self._all_protocol_output():
808  po.create_component(state, name, True,
809  asym_name=self._name_with_copy)
810  po.add_component_sequence(state, name, self.sequence,
811  asym_name=self._name_with_copy,
812  alphabet=self.alphabet,
813  uniprot=self.uniprot)
814 
815  def _finalize_build(self):
816  # For clones, pass the representation of the original molecule
817  # to ProtocolOutput
818  if self.mol_to_clone:
819  rephandler = _RepresentationHandler(
820  self._name_with_copy, list(self._all_protocol_output()),
821  self.mol_to_clone._pdb_elements)
822  for res in self.mol_to_clone.residues:
823  if res.hier:
824  rephandler(res)
825 
826  def build(self, protocol_output=True):
827  """Create all parts of the IMP hierarchy
828  including Atoms, Residues, and Fragments/Representations and,
829  finally, Copies.
830  Will only build requested representations.
831  @note Any residues assigned a resolution must have an IMP.atom.Residue
832  hierarchy containing at least a CAlpha. For missing residues,
833  these can be constructed from the PDB file.
834  """
835  if not self.built:
836  if protocol_output:
837  self._build_protocol_output()
838  # if requested, clone structure and representations
839  # BEFORE building original
840  if self.mol_to_clone is not None:
841  for nr, r in enumerate(self.mol_to_clone.residues):
842  if r.get_has_structure():
843  clone = IMP.atom.create_clone(r.get_hierarchy())
844  self.residues[nr].set_structure(
845  IMP.atom.Residue(clone), soft_check=True)
846  for old_rep in self.mol_to_clone.representations:
847  new_res = IMP.pmi.tools.OrderedSet()
848  for r in old_rep.residues:
849  new_res.add(self.residues[r.get_internal_index()])
850  self._represented.add(
851  self.residues[r.get_internal_index()])
852  new_rep = _Representation(
853  new_res, old_rep.bead_resolutions,
854  old_rep.bead_extra_breaks, old_rep.bead_ca_centers,
855  old_rep.bead_default_coord,
856  old_rep.density_residues_per_component,
857  old_rep.density_prefix, False,
858  old_rep.density_voxel_size,
859  old_rep.setup_particles_as_densities,
860  old_rep.ideal_helix, old_rep.color)
861  self.representations.append(new_rep)
862  self.coord_finder = self.mol_to_clone.coord_finder
863 
864  # give a warning for all residues that don't have representation
865  no_rep = [r for r in self.residues if r not in self._represented]
866  if len(no_rep) > 0:
867  warnings.warn(
868  'Residues without representation in molecule %s: %s'
869  % (self.get_name(), system_tools.resnums2str(no_rep)),
871 
872  # first build any ideal helices (fills in structure for
873  # the TempResidues)
874  for rep in self.representations:
875  if rep.ideal_helix:
876  _build_ideal_helix(self.model, rep.residues,
877  self.coord_finder)
878 
879  # build all the representations
880  built_reps = []
881 
882  rephandler = _RepresentationHandler(
883  self._name_with_copy, list(self._all_protocol_output()),
884  self._pdb_elements)
885 
886  for rep in self.representations:
887  built_reps += system_tools.build_representation(
888  self, rep, self.coord_finder, rephandler)
889 
890  # sort them before adding as children
891  built_reps.sort(
892  key=lambda r: IMP.atom.Fragment(r).get_residue_indexes()[0])
893  for br in built_reps:
894  self.hier.add_child(br)
895  br.update_parents()
896  self.built = True
897 
898  for res in self.residues:
899  # first off, store the highest resolution available
900  # in residue.hier
901  new_ps = IMP.atom.Selection(
902  self.hier,
903  residue_index=res.get_index(),
904  resolution=1).get_selected_particles()
905  if len(new_ps) > 0:
906  new_p = new_ps[0]
907  if IMP.atom.Atom.get_is_setup(new_p):
908  # if only found atomic, store the residue
909  new_hier = IMP.atom.get_residue(IMP.atom.Atom(new_p))
910  else:
911  # otherwise just store what you found
912  new_hier = IMP.atom.Hierarchy(new_p)
913  res.hier = new_hier
914  # Clones will be handled in _finalize_build() instead
915  # (can't handle them here as the parent of the clone
916  # isn't built yet)
917  if self.mol_to_clone is None:
918  rephandler(res)
919  else:
920  res.hier = None
921  self._represented = IMP.pmi.tools.OrderedSet(
922  [a for a in self._represented])
923  print('done building', self.get_hierarchy())
924  return self.hier
925 
926  def get_particles_at_all_resolutions(self, residue_indexes=None):
927  """Helpful utility for getting particles at all resolutions from
928  this molecule. Can optionally pass a set of residue indexes"""
929  if not self.built:
930  raise Exception(
931  "Cannot get all resolutions until you build the Molecule")
932  if residue_indexes is None:
933  residue_indexes = [r.get_index() for r in self.get_residues()]
935  self.get_hierarchy(), residue_indexes=residue_indexes)
936  return ps
937 
938 
939 class _Representation:
940  """Private class just to store a representation request"""
941  def __init__(self,
942  residues,
943  bead_resolutions,
944  bead_extra_breaks,
945  bead_ca_centers,
946  bead_default_coord,
947  density_residues_per_component,
948  density_prefix,
949  density_force_compute,
950  density_voxel_size,
951  setup_particles_as_densities,
952  ideal_helix,
953  color):
954  self.residues = residues
955  self.bead_resolutions = bead_resolutions
956  self.bead_extra_breaks = bead_extra_breaks
957  self.bead_ca_centers = bead_ca_centers
958  self.bead_default_coord = bead_default_coord
959  self.density_residues_per_component = density_residues_per_component
960  self.density_prefix = density_prefix
961  self.density_force_compute = density_force_compute
962  self.density_voxel_size = density_voxel_size
963  self.setup_particles_as_densities = setup_particles_as_densities
964  self.ideal_helix = ideal_helix
965  self.color = color
966 
967 
968 class _FindCloseStructure:
969  """Utility to get the nearest observed coordinate"""
970  def __init__(self):
971  self.coords = []
972 
973  def add_residues(self, residues):
974  for r in residues:
975  idx = IMP.atom.Residue(r).get_index()
976  catypes = [IMP.atom.AT_CA, system_tools._AT_HET_CA]
977  ca = IMP.atom.Selection(
978  r, atom_types=catypes).get_selected_particles()
979  p = IMP.atom.Selection(
980  r, atom_type=IMP.atom.AtomType("P")).get_selected_particles()
981  if len(ca) == 1:
982  xyz = IMP.core.XYZ(ca[0]).get_coordinates()
983  self.coords.append([idx, xyz])
984  elif len(p) == 1:
985  xyz = IMP.core.XYZ(p[0]).get_coordinates()
986  self.coords.append([idx, xyz])
987  else:
988  raise ValueError("_FindCloseStructure: wrong selection")
989 
990  self.coords.sort(key=itemgetter(0))
991 
992  def find_nearest_coord(self, query):
993  if self.coords == []:
994  return None
995  keys = [r[0] for r in self.coords]
996  pos = bisect_left(keys, query)
997  if pos == 0:
998  ret = self.coords[0]
999  elif pos == len(self.coords):
1000  ret = self.coords[-1]
1001  else:
1002  before = self.coords[pos - 1]
1003  after = self.coords[pos]
1004  if after[0] - query < query - before[0]:
1005  ret = after
1006  else:
1007  ret = before
1008  return ret[1]
1009 
1010 
1012  """A dictionary-like wrapper for reading and storing sequence data.
1013  Keys are FASTA sequence names, and each value a string of one-letter
1014  codes.
1015 
1016  The FASTA header may contain multiple fields split by pipe (|)
1017  characters. If so, the FASTA sequence name is the first field and
1018  the second field (if present) is the UniProt accession.
1019  For example, ">cop9|Q13098" yields a FASTA sequence name of "cop9"
1020  and UniProt accession of "Q13098".
1021  """
1022  def __init__(self, fasta_fn, name_map=None):
1023  """Read a FASTA file and extract all the requested sequences
1024  @param fasta_fn sequence file
1025  @param name_map dictionary mapping the FASTA name to final stored name
1026  """
1027  # Mapping from sequence name to primary sequence
1028  self.sequences = IMP.pmi.tools.OrderedDict()
1029  # Mapping from sequence name to UniProt accession, if available
1030  self.uniprot = {}
1031  self.read_sequences(fasta_fn, name_map)
1032 
1033  def __len__(self):
1034  return len(self.sequences)
1035 
1036  def __contains__(self, x):
1037  return x in self.sequences
1038 
1039  def __getitem__(self, key):
1040  if type(key) is int:
1041  allseqs = list(self.sequences.keys())
1042  try:
1043  return self.sequences[allseqs[key]]
1044  except IndexError:
1045  raise IndexError("You tried to access sequence number %d "
1046  "but there's only %d" % (key, len(allseqs)))
1047  else:
1048  return self.sequences[key]
1049 
1050  def __iter__(self):
1051  return self.sequences.__iter__()
1052 
1053  def __repr__(self):
1054  ret = ''
1055  for s in self.sequences:
1056  ret += '%s\t%s\n' % (s, self.sequences[s])
1057  return ret
1058 
1059  def read_sequences(self, fasta_fn, name_map=None):
1060  code = None
1061  seq = None
1062  with open(fasta_fn, 'r') as fh:
1063  for (num, line) in enumerate(fh):
1064  if line.startswith('>'):
1065  if seq is not None:
1066  self.sequences[code] = seq.strip('*')
1067  spl = line[1:].split('|')
1068  code = spl[0].strip()
1069  if name_map is not None:
1070  try:
1071  code = name_map[code]
1072  except KeyError:
1073  pass
1074  seq = ''
1075  if len(spl) >= 2:
1076  up_accession = spl[1].strip()
1077  self.uniprot[code] = up_accession
1078  else:
1079  line = line.rstrip()
1080  if line: # Skip blank lines
1081  if seq is None:
1082  raise Exception(
1083  "Found FASTA sequence before first header "
1084  "at line %d: %s" % (num + 1, line))
1085  seq += line
1086  if seq is not None:
1087  self.sequences[code] = seq.strip('*')
1088 
1089 
1091  """Data structure for reading and storing sequence data from PDBs.
1092 
1093  @see fasta_pdb_alignments."""
1094  def __init__(self, model, pdb_fn, name_map=None):
1095  """Read a PDB file and return all sequences for each contiguous
1096  fragment
1097  @param pdb_fn file
1098  @param name_map dictionary mapping the pdb chain id to final
1099  stored name
1100  """
1101  self.model = model
1102  # self.sequences data-structure: (two-key dictionary)
1103  # it contains all contiguous fragments:
1104  # chain_id, tuples indicating first and last residue, sequence
1105  # example:
1106  # key1, key2, value
1107  # A (77, 149) VENPSLDLEQYAASYSGLMR....
1108  # A (160, 505) PALDTAWVEATRKKALLKLEKLDTDLKNYKGNSIK.....
1109  # B (30, 180) VDLENQYYNSKALKEDDPKAALSSFQKVLELEGEKGEWGF...
1110  # B (192, 443) TQLLEIYALEIQMYTAQKNNKKLKALYEQSLHIKSAIPHPL
1111  self.sequences = IMP.pmi.tools.OrderedDict()
1112  self.read_sequences(pdb_fn, name_map)
1113 
1114  def read_sequences(self, pdb_fn, name_map):
1115  read_file = IMP.atom.read_pdb
1116  if pdb_fn.endswith('.cif'):
1117  read_file = IMP.atom.read_mmcif
1118  t = read_file(pdb_fn, self.model, IMP.atom.ATOMPDBSelector())
1119  cs = IMP.atom.get_by_type(t, IMP.atom.CHAIN_TYPE)
1120  for c in cs:
1121  id = IMP.atom.Chain(c).get_id()
1122  print(id)
1123  if name_map:
1124  try:
1125  id = name_map[id]
1126  except KeyError:
1127  print("Chain ID %s not in name_map, skipping" % id)
1128  continue
1129  rs = IMP.atom.get_by_type(c, IMP.atom.RESIDUE_TYPE)
1130  rids = []
1131  rids_olc_dict = {}
1132  for r in rs:
1133  dr = IMP.atom.Residue(r)
1134  rid = dr.get_index()
1135 
1136  isprotein = dr.get_is_protein()
1137  isrna = dr.get_is_rna()
1138  isdna = dr.get_is_dna()
1139  if isprotein:
1140  olc = IMP.atom.get_one_letter_code(dr.get_residue_type())
1141  rids.append(rid)
1142  rids_olc_dict[rid] = olc
1143  elif isdna:
1144  if dr.get_residue_type() == IMP.atom.DADE:
1145  olc = "A"
1146  if dr.get_residue_type() == IMP.atom.DURA:
1147  olc = "U"
1148  if dr.get_residue_type() == IMP.atom.DCYT:
1149  olc = "C"
1150  if dr.get_residue_type() == IMP.atom.DGUA:
1151  olc = "G"
1152  if dr.get_residue_type() == IMP.atom.DTHY:
1153  olc = "T"
1154  rids.append(rid)
1155  rids_olc_dict[rid] = olc
1156  elif isrna:
1157  if dr.get_residue_type() == IMP.atom.ADE:
1158  olc = "A"
1159  if dr.get_residue_type() == IMP.atom.URA:
1160  olc = "U"
1161  if dr.get_residue_type() == IMP.atom.CYT:
1162  olc = "C"
1163  if dr.get_residue_type() == IMP.atom.GUA:
1164  olc = "G"
1165  if dr.get_residue_type() == IMP.atom.THY:
1166  olc = "T"
1167  rids.append(rid)
1168  rids_olc_dict[rid] = olc
1169  group_rids = self.group_indexes(rids)
1170  contiguous_sequences = IMP.pmi.tools.OrderedDict()
1171  for group in group_rids:
1172  sequence_fragment = ""
1173  for i in range(group[0], group[1]+1):
1174  sequence_fragment += rids_olc_dict[i]
1175  contiguous_sequences[group] = sequence_fragment
1176  self.sequences[id] = contiguous_sequences
1177 
1178  def group_indexes(self, indexes):
1179  from itertools import groupby
1180  ranges = []
1181  for k, g in groupby(enumerate(indexes), lambda x: x[0]-x[1]):
1182  group = [x[1] for x in g]
1183  ranges.append((group[0], group[-1]))
1184  return ranges
1185 
1186 
1187 def fasta_pdb_alignments(fasta_sequences, pdb_sequences, show=False):
1188  '''This function computes and prints the alignment between the
1189  fasta file and the pdb sequence, computes the offsets for each contiguous
1190  fragment in the PDB.
1191  @param fasta_sequences IMP.pmi.topology.Sequences object
1192  @param pdb_sequences IMP.pmi.topology.PDBSequences object
1193  @param show boolean default False, if True prints the alignments.
1194  The input objects should be generated using map_name dictionaries
1195  such that fasta_id
1196  and pdb_chain_id are mapping to the same protein name. It needs BioPython.
1197  Returns a dictionary of offsets, organized by peptide range (group):
1198  example: offsets={"ProtA":{(1,10):1,(20,30):10}}'''
1199  from Bio import pairwise2
1200  from Bio.pairwise2 import format_alignment
1201  if type(fasta_sequences) is not IMP.pmi.topology.Sequences:
1202  raise Exception("Fasta sequences not type IMP.pmi.topology.Sequences")
1203  if type(pdb_sequences) is not IMP.pmi.topology.PDBSequences:
1204  raise Exception("pdb sequences not type IMP.pmi.topology.PDBSequences")
1205  offsets = IMP.pmi.tools.OrderedDict()
1206  for name in fasta_sequences.sequences:
1207  print(name)
1208  seq_fasta = fasta_sequences.sequences[name]
1209  if name not in pdb_sequences.sequences:
1210  print("Fasta id %s not in pdb names, aligning against every "
1211  "pdb chain" % name)
1212  pdbnames = pdb_sequences.sequences.keys()
1213  else:
1214  pdbnames = [name]
1215  for pdbname in pdbnames:
1216  for group in pdb_sequences.sequences[pdbname]:
1217  if group[1] - group[0] + 1 < 7:
1218  continue
1219  seq_frag_pdb = pdb_sequences.sequences[pdbname][group]
1220  if show:
1221  print("########################")
1222  print(" ")
1223  print("protein name", pdbname)
1224  print("fasta id", name)
1225  print("pdb fragment", group)
1226  align = pairwise2.align.localms(seq_fasta, seq_frag_pdb,
1227  2, -1, -.5, -.1)[0]
1228  for a in [align]:
1229  offset = a[3] + 1 - group[0]
1230  if show:
1231  print("alignment sequence start-end",
1232  (a[3] + 1, a[4] + 1))
1233  print("offset from pdb to fasta index", offset)
1234  print(format_alignment(*a))
1235  if name not in offsets:
1236  offsets[pdbname] = {}
1237  if group not in offsets[pdbname]:
1238  offsets[pdbname][group] = offset
1239  else:
1240  if group not in offsets[pdbname]:
1241  offsets[pdbname][group] = offset
1242  return offsets
1243 
1244 
1246  "Temporarily stores residue information, even without structure available."
1247  # Consider implementing __hash__ so you can select.
1248  def __init__(self, molecule, code, index, internal_index, alphabet):
1249  """setup a TempResidue
1250  @param molecule PMI Molecule to which this residue belongs
1251  @param code one-letter residue type code
1252  @param index PDB index
1253  @param internal_index The number in the sequence
1254  """
1255  # these attributes should be immutable
1256  self.molecule = molecule
1257  self.rtype = alphabet.get_residue_type_from_one_letter_code(code)
1258  self.pdb_index = index
1259  self.internal_index = internal_index
1260  self.copy_index = IMP.atom.Copy(self.molecule.hier).get_copy_index()
1261  self.state_index = \
1262  IMP.atom.State(self.molecule.state.hier).get_state_index()
1263  # these are expected to change
1264  self._structured = False
1265  self.hier = IMP.atom.Residue.setup_particle(
1266  IMP.Particle(molecule.model), self.rtype, index)
1267 
1268  def __str__(self):
1269  return str(self.state_index) + "_" + self.molecule.get_name() + "_" \
1270  + str(self.copy_index) + "_" + self.get_code() \
1271  + str(self.get_index())
1272 
1273  def __repr__(self):
1274  return self.__str__()
1275 
1276  def __key(self):
1277  # this returns the immutable attributes only
1278  return (self.state_index, self.molecule, self.copy_index, self.rtype,
1279  self.pdb_index, self.internal_index)
1280 
1281  def __eq__(self, other):
1282  return (type(other) == type(self) # noqa: E721
1283  and self.__key() == other.__key())
1284 
1285  def __hash__(self):
1286  return hash(self.__key())
1287 
1288  def get_index(self):
1289  return self.pdb_index
1290 
1291  def get_internal_index(self):
1292  return self.internal_index
1293 
1294  def get_code(self):
1295  return IMP.atom.get_one_letter_code(self.get_residue_type())
1296 
1297  def get_residue_type(self):
1298  return self.rtype
1299 
1300  def get_hierarchy(self):
1301  return self.hier
1302 
1303  def get_molecule(self):
1304  return self.molecule
1305 
1306  def get_has_structure(self):
1307  return self._structured
1308 
1309  def set_structure(self, res, soft_check=False):
1310  if res.get_residue_type() != self.get_residue_type():
1311  if (res.get_residue_type() == IMP.atom.MSE
1312  and self.get_residue_type() == IMP.atom.MET):
1313  # MSE in the PDB file is OK to match with MET in the FASTA
1314  # sequence
1315  pass
1316  elif soft_check:
1317  # note from commit a2c13eaa1 we give priority to the
1318  # FASTA and not the PDB
1319  warnings.warn(
1320  'Inconsistency between FASTA sequence and PDB sequence. '
1321  'FASTA type %s %s and PDB type %s'
1322  % (self.get_index(), self.hier.get_residue_type(),
1323  res.get_residue_type()),
1325  self.hier.set_residue_type((self.get_residue_type()))
1326  self.rtype = self.get_residue_type()
1327  else:
1328  raise Exception(
1329  'ERROR: PDB residue index', self.get_index(), 'is',
1330  IMP.atom.get_one_letter_code(res.get_residue_type()),
1331  'and sequence residue is', self.get_code())
1332 
1333  for a in res.get_children():
1334  self.hier.add_child(a)
1335  atype = IMP.atom.Atom(a).get_atom_type()
1336  a.get_particle().set_name(
1337  'Atom %s of residue %i' % (atype.__str__().strip('"'),
1338  self.hier.get_index()))
1339  self._structured = True
1340 
1341 
1343  """Automatically setup System and Degrees of Freedom with a formatted
1344  text file.
1345  The file is read in and each part of the topology is stored as a
1346  ComponentTopology object for input into IMP::pmi::macros::BuildSystem.
1347  The topology file should be in a simple pipe-delimited format:
1348  @code{.txt}
1349 |molecule_name|color|fasta_fn|fasta_id|pdb_fn|chain|residue_range|pdb_offset|bead_size|em_residues_per_gaussian|rigid_body|super_rigid_body|chain_of_super_rigid_bodies|flags|
1350 |Rpb1 |blue |1WCM.fasta|1WCM:A|1WCM.pdb|A|1,1140 |0|10|0|1|1,3|1||
1351 |Rpb1 |blue |1WCM.fasta|1WCM:A|1WCM.pdb|A|1141,1274|0|10|0|2|1,3|1||
1352 |Rpb1 |blue |1WCM.fasta|1WCM:A|1WCM.pdb|A|1275,END |0|10|0|3|1,3|1||
1353 |Rpb2 |red |1WCM.fasta|1WCM:B|1WCM.pdb|B|all |0|10|0|4|2,3|2||
1354 |Rpb2.1 |green |1WCM.fasta|1WCM:B|1WCM.pdb|B|all |0|10|0|4|2,3|2||
1355 
1356  @endcode
1357 
1358  These are the fields you can enter:
1359  - `molecule_name`: Name of the molecule (chain). Serves as the parent
1360  hierarchy for this structure. Multiple copies of the same molecule
1361  can be created by appending a copy number after a period; if none is
1362  specified, a copy number of 0 is assumed (e.g. Rpb2.1 is the second copy
1363  of Rpb2 or Rpb2.0).
1364  - `color`: The color used in the output RMF file. Uses
1365  [Chimera names](https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/colortables.html),
1366  (e.g. "red"), or R,G,B values as three comma-separated floating point
1367  numbers from 0 to 1 (e.g. "1.0, 0.0, 0.0") or a 6-digit hex string
1368  starting with '#' (e.g. #ff0000).
1369  - `fasta_fn`: Name of FASTA file containing this component.
1370  - `fasta_id`: String found in FASTA sequence header line. The sequence read
1371  from the file is assumed to be a protein sequence. If it should instead
1372  be treated as RNA or DNA, add an ',RNA' or ',DNA' suffix. For example,
1373  a `fasta_id` of 'myseq,RNA' will read the sequence 'myseq' from the
1374  FASTA file and treat it as RNA. The FASTA header may contain multiple
1375  fields split by pipe (|) characters. If so, the FASTA sequence name is
1376  the first field and the second field (if present) is the UniProt
1377  accession. For example, ">cop9|Q13098" yields a FASTA sequence name
1378  of "cop9" and UniProt accession of "Q13098". If such an accession is
1379  present, it is added to the generated structure (and ultimately
1380  recorded in any output RMF file).
1381  - `pdb_fn`: Name of PDB or mmCIF file with coordinates (if available).
1382  If left empty, will set up as BEADS (you can also specify "BEADS")
1383  Can also write "IDEAL_HELIX".
1384  - `chain`: Chain ID of this domain in the PDB or mmCIF file. This is
1385  the "author-provided" chain ID for mmCIF files, not the asym_id.
1386  - `residue_range`: Comma delimited pair defining range.
1387  Can leave empty or use 'all' for entire sequence from PDB file.
1388  The second item in the pair can be END to select the last residue in the
1389  PDB chain.
1390  - `pdb_offset`: Offset to sync PDB residue numbering with FASTA numbering.
1391  For example, an offset of -10 would match the first residue in the
1392  FASTA file (which is always numbered sequentially starting from 1) with
1393  residue 11 in the PDB file.
1394  - `bead_size`: The size (in residues) of beads used to model areas not
1395  covered by PDB coordinates. These will be built automatically.
1396  - `em_residues`: The number of Gaussians used to model the electron
1397  density of this domain. Set to zero if no EM fitting will be done.
1398  The GMM files will be written to <gmm_dir>/<component_name>_<em_res>.txt
1399  (and .mrc)
1400  - `rigid_body`: Leave empty if this object is not in a rigid body.
1401  Otherwise, this is a number corresponding to the rigid body containing
1402  this object. The number itself is just used for grouping things.
1403  - `super_rigid_body`: Add a mover that periodically moves several related
1404  domains as if they were a single large rigid body. In between such moves,
1405  the domains move independently. This can improve sampling.
1406  - `chain_of_super_rigid_bodies`: Do super-rigid-body moves (as above)
1407  for all adjacent pairs of domains in the chain.
1408  - `flags` additional flags for advanced options
1409  @note All filenames are relative to the paths specified in the constructor.
1410 
1411  """ # noqa: E501
1412  def __init__(self, topology_file, pdb_dir='./', fasta_dir='./',
1413  gmm_dir='./'):
1414  """Constructor.
1415  @param topology_file Pipe-delimited file specifying the topology
1416  @param pdb_dir Relative path to the pdb directory
1417  @param fasta_dir Relative path to the fasta directory
1418  @param gmm_dir Relative path to the GMM directory
1419  """
1420  self.topology_file = topology_file
1421  # key=molname, value=TempMolecule
1422  self.molecules = IMP.pmi.tools.OrderedDict()
1423  self.pdb_dir = pdb_dir
1424  self.fasta_dir = fasta_dir
1425  self.gmm_dir = gmm_dir
1426  self._components = self.read(topology_file)
1427 
1428  def write_topology_file(self, outfile):
1429  with open(outfile, "w") as f:
1430  f.write("|molecule_name|color|fasta_fn|fasta_id|pdb_fn|chain|"
1431  "residue_range|pdb_offset|bead_size|"
1432  "em_residues_per_gaussian|rigid_body|super_rigid_body|"
1433  "chain_of_super_rigid_bodies|\n")
1434  for c in self._components:
1435  output = c.get_str()+'\n'
1436  f.write(output)
1437  return outfile
1438 
1439  def get_components(self, topology_list="all"):
1440  """ Return list of ComponentTopologies for selected components
1441  @param topology_list List of indices to return"""
1442  if topology_list == "all":
1443  topologies = self._components
1444  else:
1445  topologies = []
1446  for i in topology_list:
1447  topologies.append(self._components[i])
1448  return topologies
1449 
1450  def get_molecules(self):
1451  return self.molecules
1452 
1453  def read(self, topology_file, append=False):
1454  """Read system components from topology file. append=False will erase
1455  current topology and overwrite with new
1456  """
1457  is_topology = False
1458  is_directories = False
1459  linenum = 1
1460  if not append:
1461  self._components = []
1462 
1463  with open(topology_file) as infile:
1464  for line in infile:
1465  if line.lstrip() == "" or line[0] == "#":
1466  continue
1467  elif line.split('|')[1].strip() in ("molecule_name"):
1468  is_topology = True
1469  is_directories = False
1470  old_format = False
1471  continue
1472  elif line.split('|')[1] == "component_name":
1473  is_topology = True
1475  "Old-style topology format (using "
1476  "|component_name|) is deprecated. Please switch to "
1477  "the new-style format (using |molecule_name|)\n")
1478  old_format = True
1479  is_directories = False
1480  continue
1481  elif line.split('|')[1] == "directories":
1483  "Setting directories in the topology file "
1484  "is deprecated. Please do so through the "
1485  "TopologyReader constructor. Note that new-style "
1486  "paths are relative to the current working "
1487  "directory, not the topology file.\n")
1488  is_directories = True
1489  elif is_directories:
1490  fields = line.split('|')
1491  setattr(self, fields[1],
1492  IMP.get_relative_path(topology_file, fields[2]))
1493  if is_topology:
1494  new_component = self._parse_line(line, linenum, old_format)
1495  self._components.append(new_component)
1496  linenum += 1
1497  return self._components
1498 
1499  def _parse_line(self, component_line, linenum, old_format):
1500  """Parse a line of topology values and matches them to their key.
1501  Checks each value for correct syntax
1502  Returns a list of Component objects
1503  fields:
1504  """
1505  c = _Component()
1506  values = [s.strip() for s in component_line.split('|')]
1507  errors = []
1508 
1509  # Required fields
1510  if old_format:
1511  c.molname = values[1]
1512  c.copyname = ''
1513  c._domain_name = values[2]
1514  c.color = 'blue'
1515  else:
1516  names = values[1].split('.')
1517  if len(names) == 1:
1518  c.molname = names[0]
1519  c.copyname = ''
1520  elif len(names) == 2:
1521  c.molname = names[0]
1522  c.copyname = names[1]
1523  else:
1524  c.molname = names[0]
1525  c.copyname = names[1]
1526  errors.append("Molecule name should be <molecule.copyID>")
1527  errors.append("For component %s line %d "
1528  % (c.molname, linenum))
1529  c._domain_name = c.molname + '.' + c.copyname
1530  colorfields = values[2].split(',')
1531  if len(colorfields) == 3:
1532  c.color = [float(x) for x in colorfields]
1533  if any([x > 1 for x in c.color]):
1534  c.color = [x/255 for x in c.color]
1535  else:
1536  c.color = values[2]
1537  c._orig_fasta_file = values[3]
1538  c.fasta_file = values[3]
1539  fasta_field = values[4].split(",")
1540  c.fasta_id = fasta_field[0]
1541  c.fasta_flag = None
1542  if len(fasta_field) > 1:
1543  c.fasta_flag = fasta_field[1]
1544  c._orig_pdb_input = values[5]
1545  pdb_input = values[5]
1546  tmp_chain = values[6]
1547  rr = values[7]
1548  offset = values[8]
1549  bead_size = values[9]
1550  emg = values[10]
1551  if old_format:
1552  rbs = srbs = csrbs = ''
1553  else:
1554  rbs = values[11]
1555  srbs = values[12]
1556  csrbs = values[13]
1557 
1558  if c.molname not in self.molecules:
1559  self.molecules[c.molname] = _TempMolecule(c)
1560  else:
1561  # COPY OR DOMAIN
1562  c._orig_fasta_file = \
1563  self.molecules[c.molname].orig_component._orig_fasta_file
1564  c.fasta_id = self.molecules[c.molname].orig_component.fasta_id
1565  self.molecules[c.molname].add_component(c, c.copyname)
1566 
1567  # now cleanup input
1568  c.fasta_file = os.path.join(self.fasta_dir, c._orig_fasta_file)
1569  if pdb_input == "":
1570  errors.append("PDB must have BEADS, IDEAL_HELIX, or filename")
1571  errors.append("For component %s line %d is not correct"
1572  "|%s| was given." % (c.molname, linenum, pdb_input))
1573  elif pdb_input in ("IDEAL_HELIX", "BEADS"):
1574  c.pdb_file = pdb_input
1575  else:
1576  c.pdb_file = os.path.join(self.pdb_dir, pdb_input)
1577 
1578  # PDB chain must be one or two characters
1579  if len(tmp_chain) == 1 or len(tmp_chain) == 2:
1580  c.chain = tmp_chain
1581  else:
1582  errors.append(
1583  "PDB Chain identifier must be one or two characters.")
1584  errors.append("For component %s line %d is not correct"
1585  "|%s| was given."
1586  % (c.molname, linenum, tmp_chain))
1587 
1588  # Optional fields
1589  # Residue Range
1590  if rr.strip() == 'all' or str(rr) == "":
1591  c.residue_range = None
1592  elif (len(rr.split(',')) == 2 and self._is_int(rr.split(',')[0]) and
1593  (self._is_int(rr.split(',')[1]) or rr.split(',')[1] == 'END')):
1594  # Make sure that is residue range is given, there are only
1595  # two values and they are integers
1596  c.residue_range = (int(rr.split(',')[0]), rr.split(',')[1])
1597  if c.residue_range[1] != 'END':
1598  c.residue_range = (c.residue_range[0], int(c.residue_range[1]))
1599  # Old format used -1 for the last residue
1600  if old_format and c.residue_range[1] == -1:
1601  c.residue_range = (c.residue_range[0], 'END')
1602  else:
1603  errors.append("Residue Range format for component %s line %d is "
1604  "not correct" % (c.molname, linenum))
1605  errors.append(
1606  "Correct syntax is two comma separated integers: "
1607  "|start_res, end_res|. end_res can also be END to select the "
1608  "last residue in the chain. |%s| was given." % rr)
1609  errors.append("To select all residues, indicate |\"all\"|")
1610 
1611  # PDB Offset
1612  if self._is_int(offset):
1613  c.pdb_offset = int(offset)
1614  elif len(offset) == 0:
1615  c.pdb_offset = 0
1616  else:
1617  errors.append("PDB Offset format for component %s line %d is "
1618  "not correct" % (c.molname, linenum))
1619  errors.append("The value must be a single integer. |%s| was given."
1620  % offset)
1621 
1622  # Bead Size
1623  if self._is_int(bead_size):
1624  c.bead_size = int(bead_size)
1625  elif len(bead_size) == 0:
1626  c.bead_size = 0
1627  else:
1628  errors.append("Bead Size format for component %s line %d is "
1629  "not correct" % (c.molname, linenum))
1630  errors.append("The value must be a single integer. |%s| was given."
1631  % bead_size)
1632 
1633  # EM Residues Per Gaussian
1634  if self._is_int(emg):
1635  if int(emg) > 0:
1636  c.density_prefix = os.path.join(self.gmm_dir,
1637  c.get_unique_name())
1638  c.gmm_file = c.density_prefix + '.txt'
1639  c.mrc_file = c.density_prefix + '.gmm'
1640 
1641  c.em_residues_per_gaussian = int(emg)
1642  else:
1643  c.em_residues_per_gaussian = 0
1644  elif len(emg) == 0:
1645  c.em_residues_per_gaussian = 0
1646  else:
1647  errors.append("em_residues_per_gaussian format for component "
1648  "%s line %d is not correct" % (c.molname, linenum))
1649  errors.append("The value must be a single integer. |%s| was given."
1650  % emg)
1651 
1652  # rigid bodies
1653  if len(rbs) > 0:
1654  if not self._is_int(rbs):
1655  errors.append(
1656  "rigid bodies format for component "
1657  "%s line %d is not correct" % (c.molname, linenum))
1658  errors.append("Each RB must be a single integer, or empty. "
1659  "|%s| was given." % rbs)
1660  c.rigid_body = int(rbs)
1661 
1662  # super rigid bodies
1663  if len(srbs) > 0:
1664  srbs = srbs.split(',')
1665  for i in srbs:
1666  if not self._is_int(i):
1667  errors.append(
1668  "super rigid bodies format for component "
1669  "%s line %d is not correct" % (c.molname, linenum))
1670  errors.append(
1671  "Each SRB must be a single integer. |%s| was given."
1672  % srbs)
1673  c.super_rigid_bodies = srbs
1674 
1675  # chain of super rigid bodies
1676  if len(csrbs) > 0:
1677  if not self._is_int(csrbs):
1678  errors.append(
1679  "em_residues_per_gaussian format for component "
1680  "%s line %d is not correct" % (c.molname, linenum))
1681  errors.append(
1682  "Each CSRB must be a single integer. |%s| was given."
1683  % csrbs)
1684  c.chain_of_super_rigid_bodies = csrbs
1685 
1686  # done
1687  if errors:
1688  raise ValueError("Fix Topology File syntax errors and rerun: "
1689  + "\n".join(errors))
1690  else:
1691  return c
1692 
1693  def set_gmm_dir(self, gmm_dir):
1694  """Change the GMM dir"""
1695  self.gmm_dir = gmm_dir
1696  for c in self._components:
1697  c.gmm_file = os.path.join(self.gmm_dir,
1698  c.get_unique_name() + ".txt")
1699  c.mrc_file = os.path.join(self.gmm_dir,
1700  c.get_unique_name() + ".mrc")
1701  print('new gmm', c.gmm_file)
1702 
1703  def set_pdb_dir(self, pdb_dir):
1704  """Change the PDB dir"""
1705  self.pdb_dir = pdb_dir
1706  for c in self._components:
1707  if c._orig_pdb_input not in ("", "None", "IDEAL_HELIX", "BEADS"):
1708  c.pdb_file = os.path.join(self.pdb_dir, c._orig_pdb_input)
1709 
1710  def set_fasta_dir(self, fasta_dir):
1711  """Change the FASTA dir"""
1712  self.fasta_dir = fasta_dir
1713  for c in self._components:
1714  c.fasta_file = os.path.join(self.fasta_dir, c._orig_fasta_file)
1715 
1716  def _is_int(self, s):
1717  # is this string an integer?
1718  try:
1719  float(s)
1720  return float(s).is_integer()
1721  except ValueError:
1722  return False
1723 
1724  def get_rigid_bodies(self):
1725  """Return list of lists of rigid bodies (as domain name)"""
1726  rbl = defaultdict(list)
1727  for c in self._components:
1728  if c.rigid_body:
1729  rbl[c.rigid_body].append(c.get_unique_name())
1730  return rbl.values()
1731 
1733  """Return list of lists of super rigid bodies (as domain name)"""
1734  rbl = defaultdict(list)
1735  for c in self._components:
1736  for rbnum in c.super_rigid_bodies:
1737  rbl[rbnum].append(c.get_unique_name())
1738  return rbl.values()
1739 
1741  "Return list of lists of chains of super rigid bodies (as domain name)"
1742  rbl = defaultdict(list)
1743  for c in self._components:
1744  for rbnum in c.chain_of_super_rigid_bodies:
1745  rbl[rbnum].append(c.get_unique_name())
1746  return rbl.values()
1747 
1748 
1749 class _TempMolecule:
1750  """Store the Components and any requests for copies"""
1751  def __init__(self, init_c):
1752  self.molname = init_c.molname
1753  self.domains = IMP.pmi.tools.OrderedDefaultDict(list)
1754  self.add_component(init_c, init_c.copyname)
1755  self.orig_copyname = init_c.copyname
1756  self.orig_component = self.domains[init_c.copyname][0]
1757 
1758  def add_component(self, component, copy_id):
1759  self.domains[copy_id].append(component)
1760  component.domainnum = len(self.domains[copy_id])-1
1761 
1762  def __repr__(self):
1763  return ','.join('%s:%i'
1764  % (k, len(self.domains[k])) for k in self.domains)
1765 
1766 
1767 class _Component:
1768  """Stores the components required to build a standard IMP hierarchy
1769  using IMP.pmi.BuildModel()
1770  """
1771  def __init__(self):
1772  self.molname = None
1773  self.copyname = None
1774  self.domainnum = 0
1775  self.fasta_file = None
1776  self._orig_fasta_file = None
1777  self.fasta_id = None
1778  self.fasta_flag = None
1779  self.pdb_file = None
1780  self._orig_pdb_input = None
1781  self.chain = None
1782  self.residue_range = None
1783  self.pdb_offset = 0
1784  self.bead_size = 10
1785  self.em_residues_per_gaussian = 0
1786  self.gmm_file = ''
1787  self.mrc_file = ''
1788  self.density_prefix = ''
1789  self.color = 0.1
1790  self.rigid_body = None
1791  self.super_rigid_bodies = []
1792  self.chain_of_super_rigid_bodies = []
1793 
1794  def _l2s(self, rng):
1795  return ",".join("%s" % x for x in rng)
1796 
1797  def __repr__(self):
1798  return self.get_str()
1799 
1800  def get_unique_name(self):
1801  return "%s.%s.%i" % (self.molname, self.copyname, self.domainnum)
1802 
1803  def get_str(self):
1804  res_range = self.residue_range
1805  if self.residue_range is None:
1806  res_range = []
1807  name = self.molname
1808  if self.copyname != '':
1809  name += '.' + self.copyname
1810  if self.chain is None:
1811  chain = ' '
1812  else:
1813  chain = self.chain
1814  color = self.color
1815  if isinstance(color, list):
1816  color = ','.join([str(x) for x in color])
1817  fastaid = self.fasta_id
1818  if self.fasta_flag:
1819  fastaid += "," + self.fasta_flag
1820  a = '|' + '|'.join([name, color, self._orig_fasta_file, fastaid,
1821  self._orig_pdb_input, chain,
1822  self._l2s(list(res_range)),
1823  str(self.pdb_offset),
1824  str(self.bead_size),
1825  str(self.em_residues_per_gaussian),
1826  str(self.rigid_body) if self.rigid_body else '',
1827  self._l2s(self.super_rigid_bodies),
1828  self._l2s(self.chain_of_super_rigid_bodies)]) + '|'
1829  return a
1830 
1831 
1833  '''Extends the functionality of IMP.atom.Molecule'''
1834 
1835  def __init__(self, hierarchy):
1836  IMP.atom.Molecule.__init__(self, hierarchy)
1837 
1838  def get_state_index(self):
1839  state = self.get_parent()
1840  return IMP.atom.State(state).get_state_index()
1841 
1842  def get_copy_index(self):
1843  return IMP.atom.Copy(self).get_copy_index()
1844 
1845  def get_extended_name(self):
1846  return self.get_name() + "." + \
1847  str(self.get_copy_index()) + \
1848  "." + str(self.get_state_index())
1849 
1850  def get_sequence(self):
1851  return IMP.atom.Chain(self).get_sequence()
1852 
1853  def get_residue_indexes(self):
1855 
1856  def get_residue_segments(self):
1857  return IMP.pmi.tools.Segments(self.get_residue_indexes())
1858 
1859  def get_chain_id(self):
1860  return IMP.atom.Chain(self).get_id()
1861 
1862  def __repr__(self):
1863  s = 'PMIMoleculeHierarchy '
1864  s += self.get_name()
1865  s += " " + "Copy " + str(IMP.atom.Copy(self).get_copy_index())
1866  s += " " + "State " + str(self.get_state_index())
1867  s += " " + "N residues " + str(len(self.get_sequence()))
1868  return s
def build
Build all molecules (automatically makes clones)
Add mass to a particle.
Definition: Mass.h:23
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: Residue.h:158
def select_at_all_resolutions
Perform selection using the usual keywords but return ALL resolutions (BEADS and GAUSSIANS).
Definition: tools.py:1062
Hierarchy get_parent() const
Get the parent particle.
def get_atomic_residues
Return a set of TempResidues that have associated structure coordinates.
A decorator to associate a particle with a part of a protein/DNA/RNA.
Definition: Fragment.h:20
Extends the functionality of IMP.atom.Molecule.
def get_residues
Return all modeled TempResidues as a set.
std::string get_unique_name(std::string templ)
Return a unique name produced from the string.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: atom/Atom.h:245
static Atom setup_particle(Model *m, ParticleIndex pi, Atom other)
Definition: atom/Atom.h:246
def build
Build all states.
def __init__
Read a FASTA file and extract all the requested sequences.
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
def __init__
Read a PDB file and return all sequences for each contiguous fragment.
def get_states
Get a list of all State objects in this system.
def fasta_pdb_alignments
This function computes and prints the alignment between the fasta file and the pdb sequence...
def get_ideal_helices
Returns list of OrderedSets with requested ideal helices.
Miscellaneous utilities.
Definition: tools.py:1
def get_number_of_copies
Get the number of copies of the given molecule (by name)
def get_chains_of_super_rigid_bodies
Return list of lists of chains of super rigid bodies (as domain name)
def __init__
The user should not call this directly; instead call State.create_molecule()
void handle_use_deprecated(std::string message)
Break in this method in gdb to find deprecated uses at runtime.
def residue_range
Get residue range from a to b, inclusive.
def get_molecule
Access a molecule by name and copy number.
def __init__
Define a new state.
def add_representation
Set the representation for some residues.
static State setup_particle(Model *m, ParticleIndex pi, unsigned int index)
Definition: State.h:39
This class stores integers in ordered compact lists eg: [[1,2,3],[6,7,8]] the methods help splitting ...
Definition: tools.py:610
The type of an atom.
def create_molecule
Create a new Molecule within this State.
def build
Create all parts of the IMP hierarchy including Atoms, Residues, and Fragments/Representations and...
static Residue setup_particle(Model *m, ParticleIndex pi, ResidueType t, int index, int insertion_code)
Definition: Residue.h:160
char get_one_letter_code(ResidueType c)
Get the 1-letter amino acid code from the residue type.
def get_non_atomic_residues
Return a set of TempResidues that don't have associated structure coordinates.
Represent the root node of the global IMP.atom.Hierarchy.
def get_name
Return this Molecule name.
def get_hierarchy
Return the IMP Hierarchy corresponding to this Molecule.
def get_components
Return list of ComponentTopologies for selected components.
def get_hierarchy
Get the IMP.atom.Hierarchy node for this state.
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:86
Stores a named protein chain.
Warning related to handling of structures.
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Fragment.h:46
A decorator for keeping track of copies of a molecule.
Definition: Copy.h:28
Select all non-alternative ATOM records.
Definition: pdb.h:128
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def set_fasta_dir
Change the FASTA dir.
def get_hierarchy
Return the top-level IMP.atom.Hierarchy node for this system.
The standard decorator for manipulating molecular structures.
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
Data structure for reading and storing sequence data from PDBs.
A decorator for a particle representing an atom.
Definition: atom/Atom.h:238
std::string get_relative_path(std::string base, std::string relative)
Return a path to a file relative to another file.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
def create_clone
Create a Molecule clone (automatically builds same structure and representation)
def add_structure
Read a structure and store the coordinates.
int get_state_index(Hierarchy h)
Walk up the hierarchy to find the current state.
def add_protocol_output
Capture details of the modeling protocol.
def get_molecules
Return a dictionary where key is molecule name and value is a list of all copies of that molecule in ...
static Copy setup_particle(Model *m, ParticleIndex pi, Int number)
Create a decorator for the numberth copy.
Definition: Copy.h:42
def read
Read system components from topology file.
def get_state
Return the State containing this Molecule.
A decorator for a residue.
Definition: Residue.h:137
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Automatically setup System and Degrees of Freedom with a formatted text file.
The general base class for IMP exceptions.
Definition: exception.h:48
def get_rigid_bodies
Return list of lists of rigid bodies (as domain name)
Associate an integer "state" index with a hierarchy node.
Definition: State.h:27
VectorD< 3 > Vector3D
Definition: VectorD.h:408
Residue get_residue(Atom d, bool nothrow=false)
Return the Residue containing this atom.
Mapping between FASTA one-letter codes and residue types.
Definition: alphabets.py:1
Class to handle individual particles of a Model object.
Definition: Particle.h:43
Stores a list of Molecules all with the same State index.
def get_represented
Return set of TempResidues that have representation.
Store info for a chain of a protein.
Definition: Chain.h:61
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
Python classes to represent, score, sample and analyze models.
A dictionary-like wrapper for reading and storing sequence data.
def create_copy
Create a new Molecule with the same name and sequence but a higher copy number.
Functionality for loading, creating, manipulating and scoring atomic structures.
std::string get_chain_id(Hierarchy h)
Walk up the hierarchy to determine the chain id.
def get_particles_at_all_resolutions
Helpful utility for getting particles at all resolutions from this molecule.
static Chain setup_particle(Model *m, ParticleIndex pi, std::string id)
Definition: Chain.h:84
A decorator for a molecule.
Definition: Molecule.h:24
Select hierarchy particles identified by the biological name.
Definition: Selection.h:70
def get_number_of_states
Returns the total number of states generated.
def get_super_rigid_bodies
Return list of lists of super rigid bodies (as domain name)
def get_residue_indexes
Retrieve the residue indexes for the given particle.
Definition: tools.py:499
Warning for probably incorrect input parameters.
Temporarily stores residue information, even without structure available.
def create_state
Makes and returns a new IMP.pmi.topology.State in this system.
Store objects in order they were added, but with default type.
Definition: tools.py:819