IMP logo
IMP Reference Guide  develop.d97d4ead1f,2024/11/22
The Integrative Modeling Platform
pmi/topology/__init__.py
1 """@namespace IMP.pmi.topology
2  Set of Python classes to create a multi-state, multi-resolution IMP hierarchy.
3 * Start by creating a System with
4  `model = IMP.Model(); s = IMP.pmi.topology.System(model)`. The System
5  will store all the states.
6 * Then call System.create_state(). You can easily create a multistate system
7  by calling this function multiple times.
8 * For each State, call State.create_molecule() to add a Molecule (a uniquely
9  named polymer). This function returns the Molecule object which can be
10  passed to various PMI functions.
11 * Some useful functions to help you set up your Molecules:
12  * Access the sequence residues with slicing (Molecule[a:b]) or functions
13  like Molecule.get_atomic_residues() and Molecule.get_non_atomic_residues().
14  These functions all return Python sets for easy set arithmetic using
15  & (and), | (or), - (difference)
16  * Molecule.add_structure() to add structural information from an mmCIF
17  or PDB file.
18  * Molecule.add_representation() to create a representation unit - here you
19  can choose bead resolutions as well as alternate representations like
20  densities or ideal helices.
21  * Molecule.create_clone() lets you set up a molecule with identical
22  representations, just a different chain ID. Use Molecule.create_copy()
23  if you want a molecule with the same sequence but that allows custom
24  representations.
25 * Once data has been added and representations chosen, call System.build()
26  to create a canonical IMP hierarchy.
27 * Following hierarchy construction, setup rigid bodies, flexible beads, etc
28  in IMP::pmi::dof.
29 * Check your representation with a nice printout:
30  IMP::atom::show_with_representation()
31 
32 See a [comprehensive example](https://integrativemodeling.org/nightly/doc/ref/pmi_2multiscale_8py-example.html) for using these classes.
33 
34 Alternatively one can construct the entire topology and degrees of freedom
35 via formatted text file with TopologyReader and
36 IMP::pmi::macros::BuildSystem(). This is used in the
37 [PMI tutorial](@ref rnapolii_stalk). Note that this only allows a limited
38 set of the full options available to PMI users
39 (rigid bodies only, fixed resolutions).
40 """ # noqa: E501
41 
42 import IMP
43 import IMP.atom
44 import IMP.algebra
45 import IMP.pmi
46 import IMP.pmi.tools
47 import IMP.pmi.alphabets
48 import os
49 import re
50 from collections import defaultdict, namedtuple
51 from . import system_tools
52 from bisect import bisect_left
53 from math import pi, cos, sin
54 from operator import itemgetter
55 import weakref
56 import warnings
57 
58 
59 def _build_ideal_helix(model, residues, coord_finder):
60  """Creates an ideal helix from the specified residue range
61  Residues MUST be contiguous.
62  This function actually adds them to the TempResidue hierarchy
63  """
64  created_hiers = []
65 
66  # this function creates a CAlpha helix structure (which can be used
67  # for coarsening)
68  prev_idx = -9999
69  for n, tempres in enumerate(residues):
70  if tempres.get_has_structure():
71  raise ValueError("You tried to build ideal_helix for a residue "
72  "that already has structure: %s" % tempres)
73  if n > 0 and tempres.get_index() != prev_idx + 1:
74  raise ValueError(
75  "Passed non-contiguous segment to "
76  "build_ideal_helix for %s" % tempres.get_molecule())
77 
78  # New residue particle will replace the TempResidue's existing
79  # (empty) hierarchy
80  rp = IMP.Particle(model)
81  rp.set_name("Residue_%i" % tempres.get_index())
82 
83  # Copy the original residue type and index
84  this_res = IMP.atom.Residue.setup_particle(rp, tempres.get_hierarchy())
85 
86  # Create the CAlpha
87  ap = IMP.Particle(model)
89  x = 2.3 * cos(n * 2 * pi / 3.6)
90  y = 2.3 * sin(n * 2 * pi / 3.6)
91  z = 6.2 / 3.6 / 2 * n * 2 * pi / 3.6
92  d.set_coordinates(IMP.algebra.Vector3D(x, y, z))
93  d.set_radius(1.0)
94  # Decorating as Atom also decorates as Mass
95  a = IMP.atom.Atom.setup_particle(ap, IMP.atom.AT_CA)
96  IMP.atom.Mass(ap).set_mass(110.0)
97  this_res.add_child(a)
98 
99  # Add this structure to the TempResidue
100  tempres.set_structure(this_res)
101  created_hiers.append(this_res)
102  prev_idx = tempres.get_index()
103  # the coord finder is for placing beads (later)
104  coord_finder.add_residues(created_hiers)
105 
106 
107 class _SystemBase:
108  """The base class for System, State and Molecule
109  classes. It contains shared functions in common to these classes
110  """
111 
112  def __init__(self, model=None):
113  if model is None:
114  self.model = IMP.Model()
115  else:
116  self.model = model
117 
118  def _create_hierarchy(self):
119  """create a new hierarchy"""
120  tmp_part = IMP.Particle(self.model)
121  return IMP.atom.Hierarchy.setup_particle(tmp_part)
122 
123  def _create_child(self, parent_hierarchy):
124  """create a new hierarchy, set it as child of the input
125  one, and return it"""
126  child_hierarchy = self._create_hierarchy()
127  parent_hierarchy.add_child(child_hierarchy)
128  return child_hierarchy
129 
130  def build(self):
131  """Build the coordinates of the system.
132  Loop through stored(?) hierarchies and set up coordinates!"""
133  pass
134 
135 
136 class _OurWeakRef:
137  """A simple wrapper around weakref.ref which can be pickled.
138  Note that we throw the reference away at pickle time. It should
139  be able to be reconstructed from System._all_systems anyway."""
140 
141  def __init__(self, system):
142  self._ref = weakref.ref(system)
143 
144  def __call__(self):
145  if hasattr(self, '_ref'):
146  return self._ref()
147 
148  def __getstate__(self):
149  return None
150 
151 
152 class System(_SystemBase):
153  """Represent the root node of the global IMP.atom.Hierarchy."""
154 
155  _all_systems = weakref.WeakSet()
156 
157  def __init__(self, model=None, name="System"):
158  """Constructor.
159 
160  @param model The IMP::Model in which to construct this system.
161  @param name The name of the top-level hierarchy node.
162  """
163  _SystemBase.__init__(self, model)
164  self._number_of_states = 0
165  self._protocol_output = []
166  self.states = []
167  self.built = False
168 
169  System._all_systems.add(self)
170 
171  # the root hierarchy node
172  self.hier = self._create_hierarchy()
173  self.hier.set_name(name)
174  self.hier._pmi2_system = _OurWeakRef(self)
175 
176  def get_states(self):
177  """Get a list of all State objects in this system"""
178  return self.states
179 
180  def create_state(self):
181  """Makes and returns a new IMP.pmi.topology.State in this system"""
182  self._number_of_states += 1
183  state = State(self, self._number_of_states-1)
184  self.states.append(state)
185  return state
186 
187  def __repr__(self):
188  return self.hier.get_name()
189 
191  """Returns the total number of states generated"""
192  return self._number_of_states
193 
194  def get_hierarchy(self):
195  """Return the top-level IMP.atom.Hierarchy node for this system"""
196  return self.hier
197 
198  def build(self, **kwargs):
199  """Build all states"""
200  if not self.built:
201  for state in self.states:
202  state.build(**kwargs)
203  self.built = True
204  for po in self._protocol_output:
205  po.finalize_build()
206  return self.hier
207 
208  def add_protocol_output(self, p):
209  """Capture details of the modeling protocol.
210  @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
211  """
212  self._protocol_output.append(p)
213 # p._each_metadata.append(self._metadata)
214 # p._file_datasets.append(self._file_dataset)
215  for state in self.states:
216  state._add_protocol_output(p, self)
217 
218 
219 class State(_SystemBase):
220  """Stores a list of Molecules all with the same State index.
221  Also stores number of copies of each Molecule for easy selection.
222  """
223  def __init__(self, system, state_index):
224  """Define a new state
225  @param system the PMI System
226  @param state_index the index of the new state
227  @note It's expected that you will not use this constructor directly,
228  but rather create it with System.create_state()
229  """
230  self.model = system.get_hierarchy().get_model()
231  self.system = system
232  self.hier = self._create_child(system.get_hierarchy())
233  self.short_name = self.long_name = "State_" + str(state_index)
234  self.hier.set_name(self.short_name)
235  # key is molecule name. value are the molecule copies!
236  self.molecules = IMP.pmi.tools.OrderedDict()
237  IMP.atom.State.setup_particle(self.hier, state_index)
238  self.built = False
239  self._protocol_output = []
240  for p in system._protocol_output:
241  self._add_protocol_output(p, system)
242 
243  def __repr__(self):
244  return self.system.__repr__()+'.'+self.hier.get_name()
245 
246  def _add_protocol_output(self, p, system):
247  state = p._add_state(self)
248  self._protocol_output.append((p, state))
249  state.model = system.model
250  state.prot = self.hier
251 
252  def get_molecules(self):
253  """Return a dictionary where key is molecule name and value
254  is a list of all copies of that molecule in setup order"""
255  return self.molecules
256 
257  def get_molecule(self, name, copy_num=0):
258  """Access a molecule by name and copy number
259  @param name The molecule name used during setup
260  @param copy_num The copy number based on input order.
261  Default: 0. Set to 'all' to get all copies
262  """
263  if name not in self.molecules:
264  raise KeyError("Could not find molname %s" % name)
265  if copy_num == 'all':
266  return self.molecules[name]
267  else:
268  return self.molecules[name][copy_num]
269 
270  def create_molecule(self, name, sequence='', chain_id='',
271  alphabet=IMP.pmi.alphabets.amino_acid,
272  uniprot=None):
273  """Create a new Molecule within this State
274  @param name the name of the molecule (string);
275  it must not be already used
276  @param sequence sequence (string)
277  @param chain_id Chain ID to assign to this molecule
278  @param alphabet Mapping from FASTA codes to residue types
279  @param uniprot UniProt accession, if available
280  """
281  # check whether the molecule name is already assigned
282  if name in self.molecules:
283  raise ValueError('Cannot use a molecule name already used')
284 
285  # check for something that looks like a copy number
286  if re.search(r'\.\d+$', name):
287  warnings.warn(
288  "It is recommended not to end the molecule name with "
289  ".(number) as it may be confused with the copy number "
290  "(the copy number for new molecules is always 0, so to "
291  "select this molecule, use '%s.0'). Use create_clone() or "
292  "create_copy() instead if a copy of an existing molecule "
293  "is desired." % name, IMP.pmi.ParameterWarning)
294 
295  mol = Molecule(self, name, sequence, chain_id, copy_num=0,
296  alphabet=alphabet, uniprot=uniprot)
297  self.molecules[name] = [mol]
298  return mol
299 
300  def get_hierarchy(self):
301  """Get the IMP.atom.Hierarchy node for this state"""
302  return self.hier
303 
304  def get_number_of_copies(self, molname):
305  """Get the number of copies of the given molecule (by name)
306 
307  @param molname The name of the molecule
308  """
309  return len(self.molecules[molname])
310 
311  def _register_copy(self, molecule):
312  molname = molecule.get_hierarchy().get_name()
313  self.molecules[molname].append(molecule)
314 
315  def build(self, **kwargs):
316  """Build all molecules (automatically makes clones)"""
317  if not self.built:
318  for molname in self.molecules:
319  # We want to update ProtocolOutput in forward order so
320  # that, e.g. we get nice chain IDs in the mmCIF output,
321  # but we want to build the sequence in reverse order
322  for mol in self.molecules[molname]:
323  mol._build_protocol_output()
324  for mol in reversed(self.molecules[molname]):
325  mol.build(protocol_output=False, **kwargs)
326  for mol in self.molecules[molname]:
327  mol._finalize_build()
328  self.built = True
329  return self.hier
330 
331 
332 # Track residues read from PDB files
333 _PDBElement = namedtuple('PDBElement', ['offset', 'filename', 'chain_id'])
334 
335 
336 class _RepresentationHandler:
337  """Handle PMI representation and use it to populate that of any attached
338  ProtocolOutput objects"""
339  def __init__(self, name, pos, pdb_elements):
340  self.name = name
341  self.pos = pos
342  self.last_index = None
343  self.last_pdb_index = None
344  self.pdb_for_residue = {}
345  for residues, pdb in pdb_elements:
346  for r in residues:
347  self.pdb_for_residue[r.get_index()] = pdb
348 
349  def _get_pdb(self, h):
350  """Return a PDBElement if the given hierarchy was read from a
351  PDB file"""
353  rind = IMP.atom.Residue(h).get_index()
354  return self.pdb_for_residue.get(rind, None)
355 
356  def __call__(self, res):
357  """Handle a single residue"""
358  if len(self.pos) == 0:
359  return
360  h = res.hier
361  pi = h.get_particle_index()
362  # Do nothing if we already saw this hierarchy
363  if self.last_index is None or pi != self.last_index:
364  pdb = self._get_pdb(h)
365  self.last_index = pi
366  if pdb:
367  assert IMP.atom.Fragment.get_is_setup(h.get_parent())
368  frag = IMP.atom.Fragment(h.get_parent())
369  fragi = frag.get_particle_index()
370  # Do nothing if we already saw this PDB fragment
371  if self.last_pdb_index is not None \
372  and self.last_pdb_index == fragi:
373  return
374  self.last_pdb_index = fragi
375  indices = frag.get_residue_indexes()
376  for p, state in self.pos:
377  p.add_pdb_element(state, self.name,
378  indices[0], indices[-1], pdb.offset,
379  pdb.filename, pdb.chain_id, frag)
381  frag = IMP.atom.Fragment(h)
382  indices = frag.get_residue_indexes()
383  for p, state in self.pos:
384  p.add_bead_element(state, self.name,
385  indices[0], indices[-1], 1, h)
387  resind = IMP.atom.Residue(h).get_index()
388  for p, state in self.pos:
389  p.add_bead_element(state, self.name, resind, resind, 1, h)
390  else:
391  raise TypeError("Unhandled hierarchy %s" % str(h))
392 
393 
394 class Molecule(_SystemBase):
395  """Stores a named protein chain.
396  This class is constructed from within the State class.
397  It wraps an IMP.atom.Molecule and IMP.atom.Copy.
398  Structure is read using this class.
399  Resolutions and copies can be registered, but are only created
400  when build() is called.
401 
402  A Molecule acts like a simple Python list of residues, and can be indexed
403  by integer (starting at zero) or by string (starting at 1).
404  """
405 
406  def __init__(self, state, name, sequence, chain_id, copy_num,
407  mol_to_clone=None, alphabet=IMP.pmi.alphabets.amino_acid,
408  uniprot=None):
409  """The user should not call this directly; instead call
410  State.create_molecule()
411 
412  @param state The parent PMI State
413  @param name The name of the molecule (string)
414  @param sequence Sequence (string)
415  @param chain_id The chain of this molecule
416  @param copy_num Store the copy number
417  @param mol_to_clone The original molecule (for cloning ONLY)
418  @note It's expected that you will not use this constructor directly,
419  but rather create a Molecule with State.create_molecule()
420  """
421  # internal data storage
422  self.model = state.get_hierarchy().get_model()
423  self.state = state
424  self.sequence = sequence
425  self.built = False
426  self.mol_to_clone = mol_to_clone
427  self.alphabet = alphabet
428  self.representations = [] # list of stuff to build
429  self._pdb_elements = []
430  self.uniprot = uniprot
431  # residues with representation
432  self._represented = IMP.pmi.tools.OrderedSet()
433  # helps you place beads by storing structure
434  self.coord_finder = _FindCloseStructure()
435  # list of OrderedSets of tempresidues set to ideal helix
436  self._ideal_helices = []
437 
438  # create root node and set it as child to passed parent hierarchy
439  self.hier = self._create_child(self.state.get_hierarchy())
440  self.hier.set_name(name)
441  IMP.atom.Copy.setup_particle(self.hier, copy_num)
442  self._name_with_copy = "%s.%d" % (name, copy_num)
443  # store the sequence
444  self.chain = IMP.atom.Chain.setup_particle(self.hier, chain_id)
445  self.chain.set_sequence(self.sequence)
446  self.chain.set_chain_type(alphabet.get_chain_type())
447  if self.uniprot:
448  self.chain.set_uniprot_accession(self.uniprot)
449  # create TempResidues from the sequence (if passed)
450  self.residues = []
451  for ns, s in enumerate(sequence):
452  r = TempResidue(self, s, ns+1, ns, alphabet)
453  self.residues.append(r)
454 
455  def __repr__(self):
456  return self.state.__repr__() + '.' + self.get_name() + '.' + \
457  str(IMP.atom.Copy(self.hier).get_copy_index())
458 
459  def __getitem__(self, val):
460  if isinstance(val, int):
461  return self.residues[val]
462  elif isinstance(val, str):
463  return self.residues[int(val)-1]
464  elif isinstance(val, slice):
465  return IMP.pmi.tools.OrderedSet(self.residues[val])
466  else:
467  raise TypeError("Indexes must be int or str")
468 
469  def get_hierarchy(self):
470  """Return the IMP Hierarchy corresponding to this Molecule"""
471  return self.hier
472 
473  def get_name(self):
474  """Return this Molecule name"""
475  return self.hier.get_name()
476 
477  def get_state(self):
478  """Return the State containing this Molecule"""
479  return self.state
480 
481  def get_ideal_helices(self):
482  """Returns list of OrderedSets with requested ideal helices"""
483  return self._ideal_helices
484 
485  def residue_range(self, a, b, stride=1):
486  """Get residue range from a to b, inclusive.
487  Use integers to get 0-indexing, or strings to get PDB-indexing"""
488  if isinstance(a, int) and isinstance(b, int) \
489  and isinstance(stride, int):
490  return IMP.pmi.tools.OrderedSet(self.residues[a:b+1:stride])
491  elif isinstance(a, str) and isinstance(b, str) \
492  and isinstance(stride, int):
493  return IMP.pmi.tools.OrderedSet(
494  self.residues[int(a)-1:int(b):stride])
495  else:
496  raise TypeError("Range ends must be int or str. "
497  "Stride must be int.")
498 
499  def get_residues(self):
500  """Return all modeled TempResidues as a set"""
501  all_res = IMP.pmi.tools.OrderedSet(self.residues)
502  return all_res
503 
504  def get_represented(self):
505  """Return set of TempResidues that have representation"""
506  return self._represented
507 
509  """Return a set of TempResidues that have associated structure
510  coordinates"""
511  atomic_res = IMP.pmi.tools.OrderedSet()
512  for res in self.residues:
513  if res.get_has_structure():
514  atomic_res.add(res)
515  return atomic_res
516 
518  """Return a set of TempResidues that don't have associated
519  structure coordinates"""
520  non_atomic_res = IMP.pmi.tools.OrderedSet()
521  for res in self.residues:
522  if not res.get_has_structure():
523  non_atomic_res.add(res)
524  return non_atomic_res
525 
526  def create_copy(self, chain_id):
527  """Create a new Molecule with the same name and sequence but a
528  higher copy number. Returns the Molecule. No structure or
529  representation will be copied!
530 
531  @param chain_id Chain ID of the new molecule
532  """
533  mol = Molecule(
534  self.state, self.get_name(), self.sequence, chain_id,
535  copy_num=self.state.get_number_of_copies(self.get_name()))
536  self.state._register_copy(mol)
537  return mol
538 
539  def create_clone(self, chain_id):
540  """Create a Molecule clone (automatically builds same structure
541  and representation)
542 
543  @param chain_id If you want to set the chain ID of the copy
544  to something
545  @note You cannot add structure or representations to a clone!
546  """
547  mol = Molecule(
548  self.state, self.get_name(), self.sequence, chain_id,
549  copy_num=self.state.get_number_of_copies(self.get_name()),
550  mol_to_clone=self)
551  self.state._register_copy(mol)
552  return mol
553 
554  def add_structure(self, pdb_fn, chain_id, res_range=[],
555  offset=0, model_num=None, ca_only=False,
556  soft_check=False):
557  """Read a structure and store the coordinates.
558  @return the atomic residues (as a set)
559  @param pdb_fn The file to read (in PDB or mmCIF format)
560  @param chain_id Chain ID to read
561  @param res_range Add only a specific set of residues from the PDB
562  file. res_range[0] is the starting and res_range[1]
563  is the ending residue index.
564  @param offset Apply an offset to the residue indexes of the PDB
565  file. This number is added to the PDB sequence.
566  PMI uses 1-based FASTA numbering internally (the
567  first residue in the sequence is numbered 1, and
568  so on). If the PDB chain is not also numbered
569  starting from 1, apply an offset to make it match
570  the FASTA. For example, if the PDB is numbered
571  starting from -5, use an offset of 6 (-5 + 6 = 1).
572  @param model_num Read multi-model PDB and return that model
573  @param ca_only Only read the CA positions from the PDB file
574  @param soft_check If True, it only warns if there are sequence
575  mismatches between the PDB and the Molecule (FASTA)
576  sequence, and uses the sequence from the PDB.
577  If False (Default), it raises an error when there
578  are sequence mismatches.
579  @note If you are adding structure without a FASTA file, set soft_check
580  to True.
581  """
582  if self.mol_to_clone is not None:
583  raise ValueError('You cannot call add_structure() for a clone')
584 
585  self.pdb_fn = pdb_fn
586 
587  # get IMP.atom.Residues from the pdb file
588  rhs = system_tools.get_structure(self.model, pdb_fn, chain_id,
589  res_range, offset,
590  ca_only=ca_only)
591  self.coord_finder.add_residues(rhs)
592 
593  if len(self.residues) == 0:
594  warnings.warn(
595  "Substituting PDB residue type with FASTA residue type. "
596  "Potentially dangerous.", IMP.pmi.StructureWarning)
597 
598  # Store info for ProtocolOutput usage later
599  self._pdb_elements.append(
600  (rhs, _PDBElement(offset=offset, filename=pdb_fn,
601  chain_id=chain_id)))
602 
603  # load those into TempResidue object
604  # collect integer indexes of atomic residues to return
605  atomic_res = IMP.pmi.tools.OrderedSet()
606  for nrh, rh in enumerate(rhs):
607  pdb_idx = rh.get_index()
608  raw_idx = pdb_idx - 1
609 
610  # add ALA to fill in gaps
611  while len(self.residues) < pdb_idx:
612  r = TempResidue(self, 'A', len(self.residues)+1,
613  len(self.residues),
614  IMP.pmi.alphabets.amino_acid)
615  self.residues.append(r)
616  self.sequence += 'A'
617 
618  internal_res = self.residues[raw_idx]
619  if len(self.sequence) < raw_idx:
620  self.sequence += IMP.atom.get_one_letter_code(
621  rh.get_residue_type())
622  internal_res.set_structure(rh, soft_check)
623  atomic_res.add(internal_res)
624 
625  self.chain.set_sequence(self.sequence)
626  return atomic_res
627 
629  residues=None,
630  resolutions=[],
631  bead_extra_breaks=[],
632  bead_ca_centers=True,
633  bead_default_coord=[0, 0, 0],
634  density_residues_per_component=None,
635  density_prefix=None,
636  density_force_compute=False,
637  density_voxel_size=1.0,
638  setup_particles_as_densities=False,
639  ideal_helix=False,
640  color=None):
641  """Set the representation for some residues. Some options
642  (beads, ideal helix) operate along the backbone. Others (density
643  options) are volumetric.
644  Some of these you can combine e.g., beads+densities or helix+densities
645  See @ref pmi_resolution
646  @param residues Set of PMI TempResidues for adding the representation.
647  Can use Molecule slicing to get these, e.g. mol[a:b]+mol[c:d]
648  If None, will select all residues for this Molecule.
649  @param resolutions Resolutions for beads representations.
650  If structured, will average along backbone, breaking at
651  sequence breaks. If unstructured, will just create beads.
652  Pass an integer or list of integers
653  @param bead_extra_breaks Additional breakpoints for splitting beads.
654  The value can be the 0-ordered position, after which it'll
655  insert the break.
656  Alternatively pass PDB-style (1-ordered) indices as a string.
657  I.e., bead_extra_breaks=[5,25] is the same as ['6','26']
658  @param bead_ca_centers Set to True if you want the resolution=1 beads
659  to be at CA centers (otherwise will average atoms to get
660  center). Defaults to True.
661  @param bead_default_coord Advanced feature. Normally beads are placed
662  at the nearest structure. If no structure provided (like an
663  all bead molecule), the beads go here.
664  @param density_residues_per_component Create density (Gaussian
665  Mixture Model) for these residues. Must also supply
666  density_prefix
667  @param density_prefix Prefix (assuming '.txt') to read components
668  from or write to.
669  If exists, will read unless you set density_force_compute=True.
670  Will also write map (prefix+'.mrc').
671  Must also supply density_residues_per_component.
672  @param density_force_compute Set true to force overwrite density file.
673  @param density_voxel_size Advanced feature. Set larger if densities
674  taking too long to rasterize.
675  Set to 0 if you don't want to create the MRC file
676  @param setup_particles_as_densities Set to True if you want each
677  particle to be its own density.
678  Useful for all-atom models or flexible beads.
679  Mutually exclusive with density_ options
680  @param ideal_helix Create idealized helix structures for these
681  residues at resolution 1.
682  Any other resolutions passed will be coarsened from there.
683  Resolution 0 will not work; you may have to use MODELLER
684  to do that (for now).
685  @param color the color applied to the hierarchies generated.
686  Format options: tuple (r,g,b) with values 0 to 1;
687  float (from 0 to 1, a map from Blue to Green to Red);
688  a [Chimera name](https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/colortables.html);
689  a hex RGB string (e.g. "#ff0000");
690  an IMP.display.Color object
691  @note You cannot call add_representation multiple times for the
692  same residues.
693  """ # noqa: E501
694 
695  # can't customize clones
696  if self.mol_to_clone is not None:
697  raise ValueError(
698  'You cannot call add_representation() for a clone.'
699  ' Maybe use a copy instead.')
700 
701  # format input
702  if residues is None:
703  res = IMP.pmi.tools.OrderedSet(self.residues)
704  elif residues == self:
705  res = IMP.pmi.tools.OrderedSet(self.residues)
706  elif type(residues) is IMP.pmi.topology.TempResidue:
707  res = IMP.pmi.tools.OrderedSet([residues])
708  elif hasattr(residues, '__iter__'):
709  if len(residues) == 0:
710  raise Exception(
711  'You passed an empty set to add_representation')
712  if type(residues) is IMP.pmi.tools.OrderedSet \
713  and type(next(iter(residues))) is TempResidue:
714  res = residues
715  elif (type(residues) is set
716  and type(next(iter(residues))) is TempResidue):
717  res = IMP.pmi.tools.OrderedSet(residues)
718  elif type(residues) is list and type(residues[0]) is TempResidue:
719  res = IMP.pmi.tools.OrderedSet(residues)
720  else:
721  raise Exception("You passed an iterable of something other "
722  "than TempResidue", res)
723  else:
724  raise Exception("add_representation: you must pass a set of "
725  "residues or nothing(=all residues)")
726 
727  # check that each residue has not been represented yet
728  ov = res & self._represented
729  if ov:
730  raise Exception('You have already added representation for ' +
731  self.get_hierarchy().get_name() + ': ' +
732  ov.__repr__())
733  self._represented |= res
734 
735  # check you aren't creating multiple resolutions without structure
736  if not hasattr(resolutions, '__iter__'):
737  if type(resolutions) is int:
738  resolutions = [resolutions]
739  else:
740  raise Exception("you tried to pass resolutions that are not "
741  "int or list-of-int")
742  if len(resolutions) > 1 and not ideal_helix:
743  for r in res:
744  if not r.get_has_structure():
745  raise Exception(
746  'You are creating multiple resolutions for '
747  'unstructured regions. This will have unexpected '
748  'results.')
749 
750  # check density info is consistent
751  if density_residues_per_component or density_prefix:
752  if not density_residues_per_component and density_prefix:
753  raise Exception(
754  'If requesting density, must provide '
755  'density_residues_per_component AND density_prefix')
756  if density_residues_per_component and setup_particles_as_densities:
757  raise Exception(
758  'Cannot create both volumetric density '
759  '(density_residues_per_component) AND '
760  'individual densities (setup_particles_as_densities) '
761  'in the same representation')
762  if len(resolutions) > 1 and setup_particles_as_densities:
763  raise Exception(
764  'You have multiple bead resolutions but are attempting to '
765  'set them all up as individual Densities. '
766  'This could have unexpected results.')
767 
768  # check helix not accompanied by other resolutions
769  # (densities OK though!)
770  if ideal_helix:
771  if 0 in resolutions:
772  raise Exception(
773  "For ideal helices, cannot build resolution 0: "
774  "you have to do that in MODELLER")
775  if 1 not in resolutions:
776  resolutions = [1] + list(resolutions)
777  self._ideal_helices.append(res)
778 
779  # check residues are all part of this molecule:
780  for r in res:
781  if r.get_molecule() != self:
782  raise Exception(
783  'You are adding residues from a different molecule to',
784  self.__repr__())
785 
786  # unify formatting for extra breaks
787  breaks = []
788  for b in bead_extra_breaks:
789  if isinstance(b, str):
790  breaks.append(int(b)-1)
791  else:
792  breaks.append(b)
793  # store the representation group
794  self.representations.append(_Representation(
795  res, resolutions, breaks, bead_ca_centers, bead_default_coord,
796  density_residues_per_component, density_prefix,
797  density_force_compute, density_voxel_size,
798  setup_particles_as_densities, ideal_helix, color))
799 
800  def _all_protocol_output(self):
801  return self.state._protocol_output
802 
803  def _build_protocol_output(self):
804  """Add molecule name and sequence to any ProtocolOutput objects"""
805  if not self.built:
806  name = self.hier.get_name()
807  for po, state in self._all_protocol_output():
808  po.create_component(state, name, True,
809  asym_name=self._name_with_copy)
810  po.add_component_sequence(state, name, self.sequence,
811  asym_name=self._name_with_copy,
812  alphabet=self.alphabet)
813 
814  def _finalize_build(self):
815  # For clones, pass the representation of the original molecule
816  # to ProtocolOutput
817  if self.mol_to_clone:
818  rephandler = _RepresentationHandler(
819  self._name_with_copy, list(self._all_protocol_output()),
820  self.mol_to_clone._pdb_elements)
821  for res in self.mol_to_clone.residues:
822  if res.hier:
823  rephandler(res)
824 
825  def build(self, protocol_output=True):
826  """Create all parts of the IMP hierarchy
827  including Atoms, Residues, and Fragments/Representations and,
828  finally, Copies.
829  Will only build requested representations.
830  @note Any residues assigned a resolution must have an IMP.atom.Residue
831  hierarchy containing at least a CAlpha. For missing residues,
832  these can be constructed from the PDB file.
833  """
834  if not self.built:
835  if protocol_output:
836  self._build_protocol_output()
837  # if requested, clone structure and representations
838  # BEFORE building original
839  if self.mol_to_clone is not None:
840  for nr, r in enumerate(self.mol_to_clone.residues):
841  if r.get_has_structure():
842  clone = IMP.atom.create_clone(r.get_hierarchy())
843  self.residues[nr].set_structure(
844  IMP.atom.Residue(clone), soft_check=True)
845  for old_rep in self.mol_to_clone.representations:
846  new_res = IMP.pmi.tools.OrderedSet()
847  for r in old_rep.residues:
848  new_res.add(self.residues[r.get_internal_index()])
849  self._represented.add(
850  self.residues[r.get_internal_index()])
851  new_rep = _Representation(
852  new_res, old_rep.bead_resolutions,
853  old_rep.bead_extra_breaks, old_rep.bead_ca_centers,
854  old_rep.bead_default_coord,
855  old_rep.density_residues_per_component,
856  old_rep.density_prefix, False,
857  old_rep.density_voxel_size,
858  old_rep.setup_particles_as_densities,
859  old_rep.ideal_helix, old_rep.color)
860  self.representations.append(new_rep)
861  self.coord_finder = self.mol_to_clone.coord_finder
862 
863  # give a warning for all residues that don't have representation
864  no_rep = [r for r in self.residues if r not in self._represented]
865  if len(no_rep) > 0:
866  warnings.warn(
867  'Residues without representation in molecule %s: %s'
868  % (self.get_name(), system_tools.resnums2str(no_rep)),
870 
871  # first build any ideal helices (fills in structure for
872  # the TempResidues)
873  for rep in self.representations:
874  if rep.ideal_helix:
875  _build_ideal_helix(self.model, rep.residues,
876  self.coord_finder)
877 
878  # build all the representations
879  built_reps = []
880 
881  rephandler = _RepresentationHandler(
882  self._name_with_copy, list(self._all_protocol_output()),
883  self._pdb_elements)
884 
885  for rep in self.representations:
886  built_reps += system_tools.build_representation(
887  self, rep, self.coord_finder, rephandler)
888 
889  # sort them before adding as children
890  built_reps.sort(
891  key=lambda r: IMP.atom.Fragment(r).get_residue_indexes()[0])
892  for br in built_reps:
893  self.hier.add_child(br)
894  br.update_parents()
895  self.built = True
896 
897  for res in self.residues:
898  # first off, store the highest resolution available
899  # in residue.hier
900  new_ps = IMP.atom.Selection(
901  self.hier,
902  residue_index=res.get_index(),
903  resolution=1).get_selected_particles()
904  if len(new_ps) > 0:
905  new_p = new_ps[0]
906  if IMP.atom.Atom.get_is_setup(new_p):
907  # if only found atomic, store the residue
908  new_hier = IMP.atom.get_residue(IMP.atom.Atom(new_p))
909  else:
910  # otherwise just store what you found
911  new_hier = IMP.atom.Hierarchy(new_p)
912  res.hier = new_hier
913  # Clones will be handled in _finalize_build() instead
914  # (can't handle them here as the parent of the clone
915  # isn't built yet)
916  if self.mol_to_clone is None:
917  rephandler(res)
918  else:
919  res.hier = None
920  self._represented = IMP.pmi.tools.OrderedSet(
921  [a for a in self._represented])
922  print('done building', self.get_hierarchy())
923  return self.hier
924 
925  def get_particles_at_all_resolutions(self, residue_indexes=None):
926  """Helpful utility for getting particles at all resolutions from
927  this molecule. Can optionally pass a set of residue indexes"""
928  if not self.built:
929  raise Exception(
930  "Cannot get all resolutions until you build the Molecule")
931  if residue_indexes is None:
932  residue_indexes = [r.get_index() for r in self.get_residues()]
934  self.get_hierarchy(), residue_indexes=residue_indexes)
935  return ps
936 
937 
938 class _Representation:
939  """Private class just to store a representation request"""
940  def __init__(self,
941  residues,
942  bead_resolutions,
943  bead_extra_breaks,
944  bead_ca_centers,
945  bead_default_coord,
946  density_residues_per_component,
947  density_prefix,
948  density_force_compute,
949  density_voxel_size,
950  setup_particles_as_densities,
951  ideal_helix,
952  color):
953  self.residues = residues
954  self.bead_resolutions = bead_resolutions
955  self.bead_extra_breaks = bead_extra_breaks
956  self.bead_ca_centers = bead_ca_centers
957  self.bead_default_coord = bead_default_coord
958  self.density_residues_per_component = density_residues_per_component
959  self.density_prefix = density_prefix
960  self.density_force_compute = density_force_compute
961  self.density_voxel_size = density_voxel_size
962  self.setup_particles_as_densities = setup_particles_as_densities
963  self.ideal_helix = ideal_helix
964  self.color = color
965 
966 
967 class _FindCloseStructure:
968  """Utility to get the nearest observed coordinate"""
969  def __init__(self):
970  self.coords = []
971 
972  def add_residues(self, residues):
973  for r in residues:
974  idx = IMP.atom.Residue(r).get_index()
975  catypes = [IMP.atom.AT_CA, system_tools._AT_HET_CA]
976  ca = IMP.atom.Selection(
977  r, atom_types=catypes).get_selected_particles()
978  p = IMP.atom.Selection(
979  r, atom_type=IMP.atom.AtomType("P")).get_selected_particles()
980  if len(ca) == 1:
981  xyz = IMP.core.XYZ(ca[0]).get_coordinates()
982  self.coords.append([idx, xyz])
983  elif len(p) == 1:
984  xyz = IMP.core.XYZ(p[0]).get_coordinates()
985  self.coords.append([idx, xyz])
986  else:
987  raise ValueError("_FindCloseStructure: wrong selection")
988 
989  self.coords.sort(key=itemgetter(0))
990 
991  def find_nearest_coord(self, query):
992  if self.coords == []:
993  return None
994  keys = [r[0] for r in self.coords]
995  pos = bisect_left(keys, query)
996  if pos == 0:
997  ret = self.coords[0]
998  elif pos == len(self.coords):
999  ret = self.coords[-1]
1000  else:
1001  before = self.coords[pos - 1]
1002  after = self.coords[pos]
1003  if after[0] - query < query - before[0]:
1004  ret = after
1005  else:
1006  ret = before
1007  return ret[1]
1008 
1009 
1011  """A dictionary-like wrapper for reading and storing sequence data.
1012  Keys are FASTA sequence names, and each value a string of one-letter
1013  codes.
1014 
1015  The FASTA header may contain multiple fields split by pipe (|)
1016  characters. If so, the FASTA sequence name is the first field and
1017  the second field (if present) is the UniProt accession.
1018  For example, ">cop9|Q13098" yields a FASTA sequence name of "cop9"
1019  and UniProt accession of "Q13098".
1020  """
1021  def __init__(self, fasta_fn, name_map=None):
1022  """Read a FASTA file and extract all the requested sequences
1023  @param fasta_fn sequence file
1024  @param name_map dictionary mapping the FASTA name to final stored name
1025  """
1026  # Mapping from sequence name to primary sequence
1027  self.sequences = IMP.pmi.tools.OrderedDict()
1028  # Mapping from sequence name to UniProt accession, if available
1029  self.uniprot = {}
1030  self.read_sequences(fasta_fn, name_map)
1031 
1032  def __len__(self):
1033  return len(self.sequences)
1034 
1035  def __contains__(self, x):
1036  return x in self.sequences
1037 
1038  def __getitem__(self, key):
1039  if type(key) is int:
1040  allseqs = list(self.sequences.keys())
1041  try:
1042  return self.sequences[allseqs[key]]
1043  except IndexError:
1044  raise IndexError("You tried to access sequence number %d "
1045  "but there's only %d" % (key, len(allseqs)))
1046  else:
1047  return self.sequences[key]
1048 
1049  def __iter__(self):
1050  return self.sequences.__iter__()
1051 
1052  def __repr__(self):
1053  ret = ''
1054  for s in self.sequences:
1055  ret += '%s\t%s\n' % (s, self.sequences[s])
1056  return ret
1057 
1058  def read_sequences(self, fasta_fn, name_map=None):
1059  code = None
1060  seq = None
1061  with open(fasta_fn, 'r') as fh:
1062  for (num, line) in enumerate(fh):
1063  if line.startswith('>'):
1064  if seq is not None:
1065  self.sequences[code] = seq.strip('*')
1066  spl = line[1:].split('|')
1067  code = spl[0].strip()
1068  if name_map is not None:
1069  try:
1070  code = name_map[code]
1071  except KeyError:
1072  pass
1073  seq = ''
1074  if len(spl) >= 2:
1075  up_accession = spl[1].strip()
1076  self.uniprot[code] = up_accession
1077  else:
1078  line = line.rstrip()
1079  if line: # Skip blank lines
1080  if seq is None:
1081  raise Exception(
1082  "Found FASTA sequence before first header "
1083  "at line %d: %s" % (num + 1, line))
1084  seq += line
1085  if seq is not None:
1086  self.sequences[code] = seq.strip('*')
1087 
1088 
1090  """Data structure for reading and storing sequence data from PDBs.
1091 
1092  @see fasta_pdb_alignments."""
1093  def __init__(self, model, pdb_fn, name_map=None):
1094  """Read a PDB file and return all sequences for each contiguous
1095  fragment
1096  @param pdb_fn file
1097  @param name_map dictionary mapping the pdb chain id to final
1098  stored name
1099  """
1100  self.model = model
1101  # self.sequences data-structure: (two-key dictionary)
1102  # it contains all contiguous fragments:
1103  # chain_id, tuples indicating first and last residue, sequence
1104  # example:
1105  # key1, key2, value
1106  # A (77, 149) VENPSLDLEQYAASYSGLMR....
1107  # A (160, 505) PALDTAWVEATRKKALLKLEKLDTDLKNYKGNSIK.....
1108  # B (30, 180) VDLENQYYNSKALKEDDPKAALSSFQKVLELEGEKGEWGF...
1109  # B (192, 443) TQLLEIYALEIQMYTAQKNNKKLKALYEQSLHIKSAIPHPL
1110  self.sequences = IMP.pmi.tools.OrderedDict()
1111  self.read_sequences(pdb_fn, name_map)
1112 
1113  def read_sequences(self, pdb_fn, name_map):
1114  read_file = IMP.atom.read_pdb
1115  if pdb_fn.endswith('.cif'):
1116  read_file = IMP.atom.read_mmcif
1117  t = read_file(pdb_fn, self.model, IMP.atom.ATOMPDBSelector())
1118  cs = IMP.atom.get_by_type(t, IMP.atom.CHAIN_TYPE)
1119  for c in cs:
1120  id = IMP.atom.Chain(c).get_id()
1121  print(id)
1122  if name_map:
1123  try:
1124  id = name_map[id]
1125  except KeyError:
1126  print("Chain ID %s not in name_map, skipping" % id)
1127  continue
1128  rs = IMP.atom.get_by_type(c, IMP.atom.RESIDUE_TYPE)
1129  rids = []
1130  rids_olc_dict = {}
1131  for r in rs:
1132  dr = IMP.atom.Residue(r)
1133  rid = dr.get_index()
1134 
1135  isprotein = dr.get_is_protein()
1136  isrna = dr.get_is_rna()
1137  isdna = dr.get_is_dna()
1138  if isprotein:
1139  olc = IMP.atom.get_one_letter_code(dr.get_residue_type())
1140  rids.append(rid)
1141  rids_olc_dict[rid] = olc
1142  elif isdna:
1143  if dr.get_residue_type() == IMP.atom.DADE:
1144  olc = "A"
1145  if dr.get_residue_type() == IMP.atom.DURA:
1146  olc = "U"
1147  if dr.get_residue_type() == IMP.atom.DCYT:
1148  olc = "C"
1149  if dr.get_residue_type() == IMP.atom.DGUA:
1150  olc = "G"
1151  if dr.get_residue_type() == IMP.atom.DTHY:
1152  olc = "T"
1153  rids.append(rid)
1154  rids_olc_dict[rid] = olc
1155  elif isrna:
1156  if dr.get_residue_type() == IMP.atom.ADE:
1157  olc = "A"
1158  if dr.get_residue_type() == IMP.atom.URA:
1159  olc = "U"
1160  if dr.get_residue_type() == IMP.atom.CYT:
1161  olc = "C"
1162  if dr.get_residue_type() == IMP.atom.GUA:
1163  olc = "G"
1164  if dr.get_residue_type() == IMP.atom.THY:
1165  olc = "T"
1166  rids.append(rid)
1167  rids_olc_dict[rid] = olc
1168  group_rids = self.group_indexes(rids)
1169  contiguous_sequences = IMP.pmi.tools.OrderedDict()
1170  for group in group_rids:
1171  sequence_fragment = ""
1172  for i in range(group[0], group[1]+1):
1173  sequence_fragment += rids_olc_dict[i]
1174  contiguous_sequences[group] = sequence_fragment
1175  self.sequences[id] = contiguous_sequences
1176 
1177  def group_indexes(self, indexes):
1178  from itertools import groupby
1179  ranges = []
1180  for k, g in groupby(enumerate(indexes), lambda x: x[0]-x[1]):
1181  group = [x[1] for x in g]
1182  ranges.append((group[0], group[-1]))
1183  return ranges
1184 
1185 
1186 def fasta_pdb_alignments(fasta_sequences, pdb_sequences, show=False):
1187  '''This function computes and prints the alignment between the
1188  fasta file and the pdb sequence, computes the offsets for each contiguous
1189  fragment in the PDB.
1190  @param fasta_sequences IMP.pmi.topology.Sequences object
1191  @param pdb_sequences IMP.pmi.topology.PDBSequences object
1192  @param show boolean default False, if True prints the alignments.
1193  The input objects should be generated using map_name dictionaries
1194  such that fasta_id
1195  and pdb_chain_id are mapping to the same protein name. It needs BioPython.
1196  Returns a dictionary of offsets, organized by peptide range (group):
1197  example: offsets={"ProtA":{(1,10):1,(20,30):10}}'''
1198  from Bio import pairwise2
1199  from Bio.pairwise2 import format_alignment
1200  if type(fasta_sequences) is not IMP.pmi.topology.Sequences:
1201  raise Exception("Fasta sequences not type IMP.pmi.topology.Sequences")
1202  if type(pdb_sequences) is not IMP.pmi.topology.PDBSequences:
1203  raise Exception("pdb sequences not type IMP.pmi.topology.PDBSequences")
1204  offsets = IMP.pmi.tools.OrderedDict()
1205  for name in fasta_sequences.sequences:
1206  print(name)
1207  seq_fasta = fasta_sequences.sequences[name]
1208  if name not in pdb_sequences.sequences:
1209  print("Fasta id %s not in pdb names, aligning against every "
1210  "pdb chain" % name)
1211  pdbnames = pdb_sequences.sequences.keys()
1212  else:
1213  pdbnames = [name]
1214  for pdbname in pdbnames:
1215  for group in pdb_sequences.sequences[pdbname]:
1216  if group[1] - group[0] + 1 < 7:
1217  continue
1218  seq_frag_pdb = pdb_sequences.sequences[pdbname][group]
1219  if show:
1220  print("########################")
1221  print(" ")
1222  print("protein name", pdbname)
1223  print("fasta id", name)
1224  print("pdb fragment", group)
1225  align = pairwise2.align.localms(seq_fasta, seq_frag_pdb,
1226  2, -1, -.5, -.1)[0]
1227  for a in [align]:
1228  offset = a[3] + 1 - group[0]
1229  if show:
1230  print("alignment sequence start-end",
1231  (a[3] + 1, a[4] + 1))
1232  print("offset from pdb to fasta index", offset)
1233  print(format_alignment(*a))
1234  if name not in offsets:
1235  offsets[pdbname] = {}
1236  if group not in offsets[pdbname]:
1237  offsets[pdbname][group] = offset
1238  else:
1239  if group not in offsets[pdbname]:
1240  offsets[pdbname][group] = offset
1241  return offsets
1242 
1243 
1245  "Temporarily stores residue information, even without structure available."
1246  # Consider implementing __hash__ so you can select.
1247  def __init__(self, molecule, code, index, internal_index, alphabet):
1248  """setup a TempResidue
1249  @param molecule PMI Molecule to which this residue belongs
1250  @param code one-letter residue type code
1251  @param index PDB index
1252  @param internal_index The number in the sequence
1253  """
1254  # these attributes should be immutable
1255  self.molecule = molecule
1256  self.rtype = alphabet.get_residue_type_from_one_letter_code(code)
1257  self.pdb_index = index
1258  self.internal_index = internal_index
1259  self.copy_index = IMP.atom.Copy(self.molecule.hier).get_copy_index()
1260  self.state_index = \
1261  IMP.atom.State(self.molecule.state.hier).get_state_index()
1262  # these are expected to change
1263  self._structured = False
1264  self.hier = IMP.atom.Residue.setup_particle(
1265  IMP.Particle(molecule.model), self.rtype, index)
1266 
1267  def __str__(self):
1268  return str(self.state_index) + "_" + self.molecule.get_name() + "_" \
1269  + str(self.copy_index) + "_" + self.get_code() \
1270  + str(self.get_index())
1271 
1272  def __repr__(self):
1273  return self.__str__()
1274 
1275  def __key(self):
1276  # this returns the immutable attributes only
1277  return (self.state_index, self.molecule, self.copy_index, self.rtype,
1278  self.pdb_index, self.internal_index)
1279 
1280  def __eq__(self, other):
1281  return (type(other) == type(self) # noqa: E721
1282  and self.__key() == other.__key())
1283 
1284  def __hash__(self):
1285  return hash(self.__key())
1286 
1287  def get_index(self):
1288  return self.pdb_index
1289 
1290  def get_internal_index(self):
1291  return self.internal_index
1292 
1293  def get_code(self):
1294  return IMP.atom.get_one_letter_code(self.get_residue_type())
1295 
1296  def get_residue_type(self):
1297  return self.rtype
1298 
1299  def get_hierarchy(self):
1300  return self.hier
1301 
1302  def get_molecule(self):
1303  return self.molecule
1304 
1305  def get_has_structure(self):
1306  return self._structured
1307 
1308  def set_structure(self, res, soft_check=False):
1309  if res.get_residue_type() != self.get_residue_type():
1310  if (res.get_residue_type() == IMP.atom.MSE
1311  and self.get_residue_type() == IMP.atom.MET):
1312  # MSE in the PDB file is OK to match with MET in the FASTA
1313  # sequence
1314  pass
1315  elif soft_check:
1316  # note from commit a2c13eaa1 we give priority to the
1317  # FASTA and not the PDB
1318  warnings.warn(
1319  'Inconsistency between FASTA sequence and PDB sequence. '
1320  'FASTA type %s %s and PDB type %s'
1321  % (self.get_index(), self.hier.get_residue_type(),
1322  res.get_residue_type()),
1324  self.hier.set_residue_type((self.get_residue_type()))
1325  self.rtype = self.get_residue_type()
1326  else:
1327  raise Exception(
1328  'ERROR: PDB residue index', self.get_index(), 'is',
1329  IMP.atom.get_one_letter_code(res.get_residue_type()),
1330  'and sequence residue is', self.get_code())
1331 
1332  for a in res.get_children():
1333  self.hier.add_child(a)
1334  atype = IMP.atom.Atom(a).get_atom_type()
1335  a.get_particle().set_name(
1336  'Atom %s of residue %i' % (atype.__str__().strip('"'),
1337  self.hier.get_index()))
1338  self._structured = True
1339 
1340 
1342  """Automatically setup System and Degrees of Freedom with a formatted
1343  text file.
1344  The file is read in and each part of the topology is stored as a
1345  ComponentTopology object for input into IMP::pmi::macros::BuildSystem.
1346  The topology file should be in a simple pipe-delimited format:
1347  @code{.txt}
1348 |molecule_name|color|fasta_fn|fasta_id|pdb_fn|chain|residue_range|pdb_offset|bead_size|em_residues_per_gaussian|rigid_body|super_rigid_body|chain_of_super_rigid_bodies|flags|
1349 |Rpb1 |blue |1WCM.fasta|1WCM:A|1WCM.pdb|A|1,1140 |0|10|0|1|1,3|1||
1350 |Rpb1 |blue |1WCM.fasta|1WCM:A|1WCM.pdb|A|1141,1274|0|10|0|2|1,3|1||
1351 |Rpb1 |blue |1WCM.fasta|1WCM:A|1WCM.pdb|A|1275,END |0|10|0|3|1,3|1||
1352 |Rpb2 |red |1WCM.fasta|1WCM:B|1WCM.pdb|B|all |0|10|0|4|2,3|2||
1353 |Rpb2.1 |green |1WCM.fasta|1WCM:B|1WCM.pdb|B|all |0|10|0|4|2,3|2||
1354 
1355  @endcode
1356 
1357  These are the fields you can enter:
1358  - `molecule_name`: Name of the molecule (chain). Serves as the parent
1359  hierarchy for this structure. Multiple copies of the same molecule
1360  can be created by appending a copy number after a period; if none is
1361  specified, a copy number of 0 is assumed (e.g. Rpb2.1 is the second copy
1362  of Rpb2 or Rpb2.0).
1363  - `color`: The color used in the output RMF file. Uses
1364  [Chimera names](https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/colortables.html),
1365  (e.g. "red"), or R,G,B values as three comma-separated floating point
1366  numbers from 0 to 1 (e.g. "1.0, 0.0, 0.0") or a 6-digit hex string
1367  starting with '#' (e.g. #ff0000).
1368  - `fasta_fn`: Name of FASTA file containing this component.
1369  - `fasta_id`: String found in FASTA sequence header line. The sequence read
1370  from the file is assumed to be a protein sequence. If it should instead
1371  be treated as RNA or DNA, add an ',RNA' or ',DNA' suffix. For example,
1372  a `fasta_id` of 'myseq,RNA' will read the sequence 'myseq' from the
1373  FASTA file and treat it as RNA. The FASTA header may contain multiple
1374  fields split by pipe (|) characters. If so, the FASTA sequence name is
1375  the first field and the second field (if present) is the UniProt
1376  accession. For example, ">cop9|Q13098" yields a FASTA sequence name
1377  of "cop9" and UniProt accession of "Q13098". If such an accession is
1378  present, it is added to the generated structure (and ultimately
1379  recorded in any output RMF file).
1380  - `pdb_fn`: Name of PDB or mmCIF file with coordinates (if available).
1381  If left empty, will set up as BEADS (you can also specify "BEADS")
1382  Can also write "IDEAL_HELIX".
1383  - `chain`: Chain ID of this domain in the PDB or mmCIF file. This is
1384  the "author-provided" chain ID for mmCIF files, not the asym_id.
1385  - `residue_range`: Comma delimited pair defining range.
1386  Can leave empty or use 'all' for entire sequence from PDB file.
1387  The second item in the pair can be END to select the last residue in the
1388  PDB chain.
1389  - `pdb_offset`: Offset to sync PDB residue numbering with FASTA numbering.
1390  For example, an offset of -10 would match the first residue in the
1391  FASTA file (which is always numbered sequentially starting from 1) with
1392  residue 11 in the PDB file.
1393  - `bead_size`: The size (in residues) of beads used to model areas not
1394  covered by PDB coordinates. These will be built automatically.
1395  - `em_residues`: The number of Gaussians used to model the electron
1396  density of this domain. Set to zero if no EM fitting will be done.
1397  The GMM files will be written to <gmm_dir>/<component_name>_<em_res>.txt
1398  (and .mrc)
1399  - `rigid_body`: Leave empty if this object is not in a rigid body.
1400  Otherwise, this is a number corresponding to the rigid body containing
1401  this object. The number itself is just used for grouping things.
1402  - `super_rigid_body`: Add a mover that periodically moves several related
1403  domains as if they were a single large rigid body. In between such moves,
1404  the domains move independently. This can improve sampling.
1405  - `chain_of_super_rigid_bodies`: Do super-rigid-body moves (as above)
1406  for all adjacent pairs of domains in the chain.
1407  - `flags` additional flags for advanced options
1408  @note All filenames are relative to the paths specified in the constructor.
1409 
1410  """ # noqa: E501
1411  def __init__(self, topology_file, pdb_dir='./', fasta_dir='./',
1412  gmm_dir='./'):
1413  """Constructor.
1414  @param topology_file Pipe-delimited file specifying the topology
1415  @param pdb_dir Relative path to the pdb directory
1416  @param fasta_dir Relative path to the fasta directory
1417  @param gmm_dir Relative path to the GMM directory
1418  """
1419  self.topology_file = topology_file
1420  # key=molname, value=TempMolecule
1421  self.molecules = IMP.pmi.tools.OrderedDict()
1422  self.pdb_dir = pdb_dir
1423  self.fasta_dir = fasta_dir
1424  self.gmm_dir = gmm_dir
1425  self._components = self.read(topology_file)
1426 
1427  def write_topology_file(self, outfile):
1428  with open(outfile, "w") as f:
1429  f.write("|molecule_name|color|fasta_fn|fasta_id|pdb_fn|chain|"
1430  "residue_range|pdb_offset|bead_size|"
1431  "em_residues_per_gaussian|rigid_body|super_rigid_body|"
1432  "chain_of_super_rigid_bodies|\n")
1433  for c in self._components:
1434  output = c.get_str()+'\n'
1435  f.write(output)
1436  return outfile
1437 
1438  def get_components(self, topology_list="all"):
1439  """ Return list of ComponentTopologies for selected components
1440  @param topology_list List of indices to return"""
1441  if topology_list == "all":
1442  topologies = self._components
1443  else:
1444  topologies = []
1445  for i in topology_list:
1446  topologies.append(self._components[i])
1447  return topologies
1448 
1449  def get_molecules(self):
1450  return self.molecules
1451 
1452  def read(self, topology_file, append=False):
1453  """Read system components from topology file. append=False will erase
1454  current topology and overwrite with new
1455  """
1456  is_topology = False
1457  is_directories = False
1458  linenum = 1
1459  if not append:
1460  self._components = []
1461 
1462  with open(topology_file) as infile:
1463  for line in infile:
1464  if line.lstrip() == "" or line[0] == "#":
1465  continue
1466  elif line.split('|')[1].strip() in ("molecule_name"):
1467  is_topology = True
1468  is_directories = False
1469  old_format = False
1470  continue
1471  elif line.split('|')[1] == "component_name":
1472  is_topology = True
1474  "Old-style topology format (using "
1475  "|component_name|) is deprecated. Please switch to "
1476  "the new-style format (using |molecule_name|)\n")
1477  old_format = True
1478  is_directories = False
1479  continue
1480  elif line.split('|')[1] == "directories":
1482  "Setting directories in the topology file "
1483  "is deprecated. Please do so through the "
1484  "TopologyReader constructor. Note that new-style "
1485  "paths are relative to the current working "
1486  "directory, not the topology file.\n")
1487  is_directories = True
1488  elif is_directories:
1489  fields = line.split('|')
1490  setattr(self, fields[1],
1491  IMP.get_relative_path(topology_file, fields[2]))
1492  if is_topology:
1493  new_component = self._parse_line(line, linenum, old_format)
1494  self._components.append(new_component)
1495  linenum += 1
1496  return self._components
1497 
1498  def _parse_line(self, component_line, linenum, old_format):
1499  """Parse a line of topology values and matches them to their key.
1500  Checks each value for correct syntax
1501  Returns a list of Component objects
1502  fields:
1503  """
1504  c = _Component()
1505  values = [s.strip() for s in component_line.split('|')]
1506  errors = []
1507 
1508  # Required fields
1509  if old_format:
1510  c.molname = values[1]
1511  c.copyname = ''
1512  c._domain_name = values[2]
1513  c.color = 'blue'
1514  else:
1515  names = values[1].split('.')
1516  if len(names) == 1:
1517  c.molname = names[0]
1518  c.copyname = ''
1519  elif len(names) == 2:
1520  c.molname = names[0]
1521  c.copyname = names[1]
1522  else:
1523  c.molname = names[0]
1524  c.copyname = names[1]
1525  errors.append("Molecule name should be <molecule.copyID>")
1526  errors.append("For component %s line %d "
1527  % (c.molname, linenum))
1528  c._domain_name = c.molname + '.' + c.copyname
1529  colorfields = values[2].split(',')
1530  if len(colorfields) == 3:
1531  c.color = [float(x) for x in colorfields]
1532  if any([x > 1 for x in c.color]):
1533  c.color = [x/255 for x in c.color]
1534  else:
1535  c.color = values[2]
1536  c._orig_fasta_file = values[3]
1537  c.fasta_file = values[3]
1538  fasta_field = values[4].split(",")
1539  c.fasta_id = fasta_field[0]
1540  c.fasta_flag = None
1541  if len(fasta_field) > 1:
1542  c.fasta_flag = fasta_field[1]
1543  c._orig_pdb_input = values[5]
1544  pdb_input = values[5]
1545  tmp_chain = values[6]
1546  rr = values[7]
1547  offset = values[8]
1548  bead_size = values[9]
1549  emg = values[10]
1550  if old_format:
1551  rbs = srbs = csrbs = ''
1552  else:
1553  rbs = values[11]
1554  srbs = values[12]
1555  csrbs = values[13]
1556 
1557  if c.molname not in self.molecules:
1558  self.molecules[c.molname] = _TempMolecule(c)
1559  else:
1560  # COPY OR DOMAIN
1561  c._orig_fasta_file = \
1562  self.molecules[c.molname].orig_component._orig_fasta_file
1563  c.fasta_id = self.molecules[c.molname].orig_component.fasta_id
1564  self.molecules[c.molname].add_component(c, c.copyname)
1565 
1566  # now cleanup input
1567  c.fasta_file = os.path.join(self.fasta_dir, c._orig_fasta_file)
1568  if pdb_input == "":
1569  errors.append("PDB must have BEADS, IDEAL_HELIX, or filename")
1570  errors.append("For component %s line %d is not correct"
1571  "|%s| was given." % (c.molname, linenum, pdb_input))
1572  elif pdb_input in ("IDEAL_HELIX", "BEADS"):
1573  c.pdb_file = pdb_input
1574  else:
1575  c.pdb_file = os.path.join(self.pdb_dir, pdb_input)
1576 
1577  # PDB chain must be one or two characters
1578  if len(tmp_chain) == 1 or len(tmp_chain) == 2:
1579  c.chain = tmp_chain
1580  else:
1581  errors.append(
1582  "PDB Chain identifier must be one or two characters.")
1583  errors.append("For component %s line %d is not correct"
1584  "|%s| was given."
1585  % (c.molname, linenum, tmp_chain))
1586 
1587  # Optional fields
1588  # Residue Range
1589  if rr.strip() == 'all' or str(rr) == "":
1590  c.residue_range = None
1591  elif (len(rr.split(',')) == 2 and self._is_int(rr.split(',')[0]) and
1592  (self._is_int(rr.split(',')[1]) or rr.split(',')[1] == 'END')):
1593  # Make sure that is residue range is given, there are only
1594  # two values and they are integers
1595  c.residue_range = (int(rr.split(',')[0]), rr.split(',')[1])
1596  if c.residue_range[1] != 'END':
1597  c.residue_range = (c.residue_range[0], int(c.residue_range[1]))
1598  # Old format used -1 for the last residue
1599  if old_format and c.residue_range[1] == -1:
1600  c.residue_range = (c.residue_range[0], 'END')
1601  else:
1602  errors.append("Residue Range format for component %s line %d is "
1603  "not correct" % (c.molname, linenum))
1604  errors.append(
1605  "Correct syntax is two comma separated integers: "
1606  "|start_res, end_res|. end_res can also be END to select the "
1607  "last residue in the chain. |%s| was given." % rr)
1608  errors.append("To select all residues, indicate |\"all\"|")
1609 
1610  # PDB Offset
1611  if self._is_int(offset):
1612  c.pdb_offset = int(offset)
1613  elif len(offset) == 0:
1614  c.pdb_offset = 0
1615  else:
1616  errors.append("PDB Offset format for component %s line %d is "
1617  "not correct" % (c.molname, linenum))
1618  errors.append("The value must be a single integer. |%s| was given."
1619  % offset)
1620 
1621  # Bead Size
1622  if self._is_int(bead_size):
1623  c.bead_size = int(bead_size)
1624  elif len(bead_size) == 0:
1625  c.bead_size = 0
1626  else:
1627  errors.append("Bead Size format for component %s line %d is "
1628  "not correct" % (c.molname, linenum))
1629  errors.append("The value must be a single integer. |%s| was given."
1630  % bead_size)
1631 
1632  # EM Residues Per Gaussian
1633  if self._is_int(emg):
1634  if int(emg) > 0:
1635  c.density_prefix = os.path.join(self.gmm_dir,
1636  c.get_unique_name())
1637  c.gmm_file = c.density_prefix + '.txt'
1638  c.mrc_file = c.density_prefix + '.gmm'
1639 
1640  c.em_residues_per_gaussian = int(emg)
1641  else:
1642  c.em_residues_per_gaussian = 0
1643  elif len(emg) == 0:
1644  c.em_residues_per_gaussian = 0
1645  else:
1646  errors.append("em_residues_per_gaussian format for component "
1647  "%s line %d is not correct" % (c.molname, linenum))
1648  errors.append("The value must be a single integer. |%s| was given."
1649  % emg)
1650 
1651  # rigid bodies
1652  if len(rbs) > 0:
1653  if not self._is_int(rbs):
1654  errors.append(
1655  "rigid bodies format for component "
1656  "%s line %d is not correct" % (c.molname, linenum))
1657  errors.append("Each RB must be a single integer, or empty. "
1658  "|%s| was given." % rbs)
1659  c.rigid_body = int(rbs)
1660 
1661  # super rigid bodies
1662  if len(srbs) > 0:
1663  srbs = srbs.split(',')
1664  for i in srbs:
1665  if not self._is_int(i):
1666  errors.append(
1667  "super rigid bodies format for component "
1668  "%s line %d is not correct" % (c.molname, linenum))
1669  errors.append(
1670  "Each SRB must be a single integer. |%s| was given."
1671  % srbs)
1672  c.super_rigid_bodies = srbs
1673 
1674  # chain of super rigid bodies
1675  if len(csrbs) > 0:
1676  if not self._is_int(csrbs):
1677  errors.append(
1678  "em_residues_per_gaussian format for component "
1679  "%s line %d is not correct" % (c.molname, linenum))
1680  errors.append(
1681  "Each CSRB must be a single integer. |%s| was given."
1682  % csrbs)
1683  c.chain_of_super_rigid_bodies = csrbs
1684 
1685  # done
1686  if errors:
1687  raise ValueError("Fix Topology File syntax errors and rerun: "
1688  + "\n".join(errors))
1689  else:
1690  return c
1691 
1692  def set_gmm_dir(self, gmm_dir):
1693  """Change the GMM dir"""
1694  self.gmm_dir = gmm_dir
1695  for c in self._components:
1696  c.gmm_file = os.path.join(self.gmm_dir,
1697  c.get_unique_name() + ".txt")
1698  c.mrc_file = os.path.join(self.gmm_dir,
1699  c.get_unique_name() + ".mrc")
1700  print('new gmm', c.gmm_file)
1701 
1702  def set_pdb_dir(self, pdb_dir):
1703  """Change the PDB dir"""
1704  self.pdb_dir = pdb_dir
1705  for c in self._components:
1706  if c._orig_pdb_input not in ("", "None", "IDEAL_HELIX", "BEADS"):
1707  c.pdb_file = os.path.join(self.pdb_dir, c._orig_pdb_input)
1708 
1709  def set_fasta_dir(self, fasta_dir):
1710  """Change the FASTA dir"""
1711  self.fasta_dir = fasta_dir
1712  for c in self._components:
1713  c.fasta_file = os.path.join(self.fasta_dir, c._orig_fasta_file)
1714 
1715  def _is_int(self, s):
1716  # is this string an integer?
1717  try:
1718  float(s)
1719  return float(s).is_integer()
1720  except ValueError:
1721  return False
1722 
1723  def get_rigid_bodies(self):
1724  """Return list of lists of rigid bodies (as domain name)"""
1725  rbl = defaultdict(list)
1726  for c in self._components:
1727  if c.rigid_body:
1728  rbl[c.rigid_body].append(c.get_unique_name())
1729  return rbl.values()
1730 
1732  """Return list of lists of super rigid bodies (as domain name)"""
1733  rbl = defaultdict(list)
1734  for c in self._components:
1735  for rbnum in c.super_rigid_bodies:
1736  rbl[rbnum].append(c.get_unique_name())
1737  return rbl.values()
1738 
1740  "Return list of lists of chains of super rigid bodies (as domain name)"
1741  rbl = defaultdict(list)
1742  for c in self._components:
1743  for rbnum in c.chain_of_super_rigid_bodies:
1744  rbl[rbnum].append(c.get_unique_name())
1745  return rbl.values()
1746 
1747 
1748 class _TempMolecule:
1749  """Store the Components and any requests for copies"""
1750  def __init__(self, init_c):
1751  self.molname = init_c.molname
1752  self.domains = IMP.pmi.tools.OrderedDefaultDict(list)
1753  self.add_component(init_c, init_c.copyname)
1754  self.orig_copyname = init_c.copyname
1755  self.orig_component = self.domains[init_c.copyname][0]
1756 
1757  def add_component(self, component, copy_id):
1758  self.domains[copy_id].append(component)
1759  component.domainnum = len(self.domains[copy_id])-1
1760 
1761  def __repr__(self):
1762  return ','.join('%s:%i'
1763  % (k, len(self.domains[k])) for k in self.domains)
1764 
1765 
1766 class _Component:
1767  """Stores the components required to build a standard IMP hierarchy
1768  using IMP.pmi.BuildModel()
1769  """
1770  def __init__(self):
1771  self.molname = None
1772  self.copyname = None
1773  self.domainnum = 0
1774  self.fasta_file = None
1775  self._orig_fasta_file = None
1776  self.fasta_id = None
1777  self.fasta_flag = None
1778  self.pdb_file = None
1779  self._orig_pdb_input = None
1780  self.chain = None
1781  self.residue_range = None
1782  self.pdb_offset = 0
1783  self.bead_size = 10
1784  self.em_residues_per_gaussian = 0
1785  self.gmm_file = ''
1786  self.mrc_file = ''
1787  self.density_prefix = ''
1788  self.color = 0.1
1789  self.rigid_body = None
1790  self.super_rigid_bodies = []
1791  self.chain_of_super_rigid_bodies = []
1792 
1793  def _l2s(self, rng):
1794  return ",".join("%s" % x for x in rng)
1795 
1796  def __repr__(self):
1797  return self.get_str()
1798 
1799  def get_unique_name(self):
1800  return "%s.%s.%i" % (self.molname, self.copyname, self.domainnum)
1801 
1802  def get_str(self):
1803  res_range = self.residue_range
1804  if self.residue_range is None:
1805  res_range = []
1806  name = self.molname
1807  if self.copyname != '':
1808  name += '.' + self.copyname
1809  if self.chain is None:
1810  chain = ' '
1811  else:
1812  chain = self.chain
1813  color = self.color
1814  if isinstance(color, list):
1815  color = ','.join([str(x) for x in color])
1816  fastaid = self.fasta_id
1817  if self.fasta_flag:
1818  fastaid += "," + self.fasta_flag
1819  a = '|' + '|'.join([name, color, self._orig_fasta_file, fastaid,
1820  self._orig_pdb_input, chain,
1821  self._l2s(list(res_range)),
1822  str(self.pdb_offset),
1823  str(self.bead_size),
1824  str(self.em_residues_per_gaussian),
1825  str(self.rigid_body) if self.rigid_body else '',
1826  self._l2s(self.super_rigid_bodies),
1827  self._l2s(self.chain_of_super_rigid_bodies)]) + '|'
1828  return a
1829 
1830 
1832  '''Extends the functionality of IMP.atom.Molecule'''
1833 
1834  def __init__(self, hierarchy):
1835  IMP.atom.Molecule.__init__(self, hierarchy)
1836 
1837  def get_state_index(self):
1838  state = self.get_parent()
1839  return IMP.atom.State(state).get_state_index()
1840 
1841  def get_copy_index(self):
1842  return IMP.atom.Copy(self).get_copy_index()
1843 
1844  def get_extended_name(self):
1845  return self.get_name() + "." + \
1846  str(self.get_copy_index()) + \
1847  "." + str(self.get_state_index())
1848 
1849  def get_sequence(self):
1850  return IMP.atom.Chain(self).get_sequence()
1851 
1852  def get_residue_indexes(self):
1854 
1855  def get_residue_segments(self):
1856  return IMP.pmi.tools.Segments(self.get_residue_indexes())
1857 
1858  def get_chain_id(self):
1859  return IMP.atom.Chain(self).get_id()
1860 
1861  def __repr__(self):
1862  s = 'PMIMoleculeHierarchy '
1863  s += self.get_name()
1864  s += " " + "Copy " + str(IMP.atom.Copy(self).get_copy_index())
1865  s += " " + "State " + str(self.get_state_index())
1866  s += " " + "N residues " + str(len(self.get_sequence()))
1867  return s
def build
Build all molecules (automatically makes clones)
Add mass to a particle.
Definition: Mass.h:23
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: Residue.h:158
def select_at_all_resolutions
Perform selection using the usual keywords but return ALL resolutions (BEADS and GAUSSIANS).
Definition: tools.py:1062
Hierarchy get_parent() const
Get the parent particle.
def get_atomic_residues
Return a set of TempResidues that have associated structure coordinates.
A decorator to associate a particle with a part of a protein/DNA/RNA.
Definition: Fragment.h:20
Extends the functionality of IMP.atom.Molecule.
def get_residues
Return all modeled TempResidues as a set.
std::string get_unique_name(std::string templ)
Return a unique name produced from the string.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: atom/Atom.h:245
static Atom setup_particle(Model *m, ParticleIndex pi, Atom other)
Definition: atom/Atom.h:246
def build
Build all states.
def __init__
Read a FASTA file and extract all the requested sequences.
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
def __init__
Read a PDB file and return all sequences for each contiguous fragment.
def get_states
Get a list of all State objects in this system.
def fasta_pdb_alignments
This function computes and prints the alignment between the fasta file and the pdb sequence...
def get_ideal_helices
Returns list of OrderedSets with requested ideal helices.
Miscellaneous utilities.
Definition: tools.py:1
def get_number_of_copies
Get the number of copies of the given molecule (by name)
def get_chains_of_super_rigid_bodies
Return list of lists of chains of super rigid bodies (as domain name)
def __init__
The user should not call this directly; instead call State.create_molecule()
void handle_use_deprecated(std::string message)
Break in this method in gdb to find deprecated uses at runtime.
def residue_range
Get residue range from a to b, inclusive.
def get_molecule
Access a molecule by name and copy number.
def __init__
Define a new state.
def add_representation
Set the representation for some residues.
static State setup_particle(Model *m, ParticleIndex pi, unsigned int index)
Definition: State.h:39
This class stores integers in ordered compact lists eg: [[1,2,3],[6,7,8]] the methods help splitting ...
Definition: tools.py:610
The type of an atom.
def create_molecule
Create a new Molecule within this State.
def build
Create all parts of the IMP hierarchy including Atoms, Residues, and Fragments/Representations and...
static Residue setup_particle(Model *m, ParticleIndex pi, ResidueType t, int index, int insertion_code)
Definition: Residue.h:160
char get_one_letter_code(ResidueType c)
Get the 1-letter amino acid code from the residue type.
def get_non_atomic_residues
Return a set of TempResidues that don't have associated structure coordinates.
Represent the root node of the global IMP.atom.Hierarchy.
def get_name
Return this Molecule name.
def get_hierarchy
Return the IMP Hierarchy corresponding to this Molecule.
def get_components
Return list of ComponentTopologies for selected components.
def get_hierarchy
Get the IMP.atom.Hierarchy node for this state.
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:86
Stores a named protein chain.
Warning related to handling of structures.
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Fragment.h:46
A decorator for keeping track of copies of a molecule.
Definition: Copy.h:28
Select all non-alternative ATOM records.
Definition: pdb.h:128
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def set_fasta_dir
Change the FASTA dir.
def get_hierarchy
Return the top-level IMP.atom.Hierarchy node for this system.
The standard decorator for manipulating molecular structures.
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
Data structure for reading and storing sequence data from PDBs.
A decorator for a particle representing an atom.
Definition: atom/Atom.h:238
std::string get_relative_path(std::string base, std::string relative)
Return a path to a file relative to another file.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
def create_clone
Create a Molecule clone (automatically builds same structure and representation)
def add_structure
Read a structure and store the coordinates.
int get_state_index(Hierarchy h)
Walk up the hierarchy to find the current state.
def add_protocol_output
Capture details of the modeling protocol.
def get_molecules
Return a dictionary where key is molecule name and value is a list of all copies of that molecule in ...
static Copy setup_particle(Model *m, ParticleIndex pi, Int number)
Create a decorator for the numberth copy.
Definition: Copy.h:42
def read
Read system components from topology file.
def get_state
Return the State containing this Molecule.
A decorator for a residue.
Definition: Residue.h:137
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Automatically setup System and Degrees of Freedom with a formatted text file.
The general base class for IMP exceptions.
Definition: exception.h:48
def get_rigid_bodies
Return list of lists of rigid bodies (as domain name)
Associate an integer "state" index with a hierarchy node.
Definition: State.h:27
VectorD< 3 > Vector3D
Definition: VectorD.h:408
Residue get_residue(Atom d, bool nothrow=false)
Return the Residue containing this atom.
Mapping between FASTA one-letter codes and residue types.
Definition: alphabets.py:1
Class to handle individual particles of a Model object.
Definition: Particle.h:43
Stores a list of Molecules all with the same State index.
def get_represented
Return set of TempResidues that have representation.
Store info for a chain of a protein.
Definition: Chain.h:61
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
Python classes to represent, score, sample and analyze models.
A dictionary-like wrapper for reading and storing sequence data.
def create_copy
Create a new Molecule with the same name and sequence but a higher copy number.
Functionality for loading, creating, manipulating and scoring atomic structures.
std::string get_chain_id(Hierarchy h)
Walk up the hierarchy to determine the chain id.
def get_particles_at_all_resolutions
Helpful utility for getting particles at all resolutions from this molecule.
static Chain setup_particle(Model *m, ParticleIndex pi, std::string id)
Definition: Chain.h:84
A decorator for a molecule.
Definition: Molecule.h:24
Select hierarchy particles identified by the biological name.
Definition: Selection.h:70
def get_number_of_states
Returns the total number of states generated.
def get_super_rigid_bodies
Return list of lists of super rigid bodies (as domain name)
def get_residue_indexes
Retrieve the residue indexes for the given particle.
Definition: tools.py:499
Warning for probably incorrect input parameters.
Temporarily stores residue information, even without structure available.
def create_state
Makes and returns a new IMP.pmi.topology.State in this system.
Store objects in order they were added, but with default type.
Definition: tools.py:819