IMP logo
IMP Reference Guide  2.16.0
The Integrative Modeling Platform
pmi/topology/__init__.py
1 """@namespace IMP.pmi.topology
2  Set of Python classes to create a multi-state, multi-resolution IMP hierarchy.
3 * Start by creating a System with
4  `model = IMP.Model(); s = IMP.pmi.topology.System(model)`. The System
5  will store all the states.
6 * Then call System.create_state(). You can easily create a multistate system
7  by calling this function multiple times.
8 * For each State, call State.create_molecule() to add a Molecule (a uniquely
9  named polymer). This function returns the Molecule object which can be
10  passed to various PMI functions.
11 * Some useful functions to help you set up your Molecules:
12  * Access the sequence residues with slicing (Molecule[a:b]) or functions
13  like Molecule.get_atomic_residues() and Molecule.get_non_atomic_residues().
14  These functions all return Python sets for easy set arithmetic using
15  & (and), | (or), - (difference)
16  * Molecule.add_structure() to add structural information from a PDB file.
17  * Molecule.add_representation() to create a representation unit - here you
18  can choose bead resolutions as well as alternate representations like
19  densities or ideal helices.
20  * Molecule.create_clone() lets you set up a molecule with identical
21  representations, just a different chain ID. Use Molecule.create_copy()
22  if you want a molecule with the same sequence but that allows custom
23  representations.
24 * Once data has been added and representations chosen, call System.build()
25  to create a canonical IMP hierarchy.
26 * Following hierarchy construction, setup rigid bodies, flexible beads, etc
27  in IMP::pmi::dof.
28 * Check your representation with a nice printout:
29  IMP::atom::show_with_representation()
30 
31 See a [comprehensive example](https://integrativemodeling.org/nightly/doc/ref/pmi_2multiscale_8py-example.html) for using these classes.
32 
33 Alternatively one can construct the entire topology and degrees of freedom
34 via formatted text file with TopologyReader and
35 IMP::pmi::macros::BuildSystem(). This is used in the
36 [PMI tutorial](@ref rnapolii_stalk). Note that this only allows a limited
37 set of the full options available to PMI users
38 (rigid bodies only, fixed resolutions).
39 """ # noqa: E501
40 
41 from __future__ import print_function
42 import IMP
43 import IMP.atom
44 import IMP.algebra
45 import IMP.pmi
46 import IMP.pmi.tools
47 import IMP.pmi.alphabets
48 import os
49 import re
50 from collections import defaultdict, namedtuple
51 from . import system_tools
52 from bisect import bisect_left
53 from math import pi, cos, sin
54 from operator import itemgetter
55 import weakref
56 import warnings
57 
58 
59 def _build_ideal_helix(model, residues, coord_finder):
60  """Creates an ideal helix from the specified residue range
61  Residues MUST be contiguous.
62  This function actually adds them to the TempResidue hierarchy
63  """
64  created_hiers = []
65 
66  # this function creates a CAlpha helix structure (which can be used
67  # for coarsening)
68  prev_idx = -9999
69  for n, tempres in enumerate(residues):
70  if tempres.get_has_structure():
71  raise ValueError("You tried to build ideal_helix for a residue "
72  "that already has structure: %s" % tempres)
73  if n > 0 and tempres.get_index() != prev_idx + 1:
74  raise ValueError(
75  "Passed non-contiguous segment to "
76  "build_ideal_helix for %s" % tempres.get_molecule())
77 
78  # New residue particle will replace the TempResidue's existing
79  # (empty) hierarchy
80  rp = IMP.Particle(model)
81  rp.set_name("Residue_%i" % tempres.get_index())
82 
83  # Copy the original residue type and index
84  this_res = IMP.atom.Residue.setup_particle(rp, tempres.get_hierarchy())
85 
86  # Create the CAlpha
87  ap = IMP.Particle(model)
89  x = 2.3 * cos(n * 2 * pi / 3.6)
90  y = 2.3 * sin(n * 2 * pi / 3.6)
91  z = 6.2 / 3.6 / 2 * n * 2 * pi / 3.6
92  d.set_coordinates(IMP.algebra.Vector3D(x, y, z))
93  d.set_radius(1.0)
94  # Decorating as Atom also decorates as Mass
95  a = IMP.atom.Atom.setup_particle(ap, IMP.atom.AT_CA)
96  IMP.atom.Mass(ap).set_mass(110.0)
97  this_res.add_child(a)
98 
99  # Add this structure to the TempResidue
100  tempres.set_structure(this_res)
101  created_hiers.append(this_res)
102  prev_idx = tempres.get_index()
103  # the coord finder is for placing beads (later)
104  coord_finder.add_residues(created_hiers)
105 
106 
107 class _SystemBase(object):
108  """The base class for System, State and Molecule
109  classes. It contains shared functions in common to these classes
110  """
111 
112  def __init__(self, model=None):
113  if model is None:
114  self.model = IMP.Model()
115  else:
116  self.model = model
117 
118  def _create_hierarchy(self):
119  """create a new hierarchy"""
120  tmp_part = IMP.Particle(self.model)
121  return IMP.atom.Hierarchy.setup_particle(tmp_part)
122 
123  def _create_child(self, parent_hierarchy):
124  """create a new hierarchy, set it as child of the input
125  one, and return it"""
126  child_hierarchy = self._create_hierarchy()
127  parent_hierarchy.add_child(child_hierarchy)
128  return child_hierarchy
129 
130  def build(self):
131  """Build the coordinates of the system.
132  Loop through stored(?) hierarchies and set up coordinates!"""
133  pass
134 
135 
136 class System(_SystemBase):
137  """Represent the root node of the global IMP.atom.Hierarchy."""
138 
139  _all_systems = set()
140 
141  def __init__(self, model=None, name="System"):
142  """Constructor.
143 
144  @param model The IMP::Model in which to construct this system.
145  @param name The name of the top-level hierarchy node.
146  """
147  _SystemBase.__init__(self, model)
148  self._number_of_states = 0
149  self._protocol_output = []
150  self.states = []
151  self.built = False
152 
153  System._all_systems.add(weakref.ref(self))
154 
155  # the root hierarchy node
156  self.hier = self._create_hierarchy()
157  self.hier.set_name(name)
158  self.hier._pmi2_system = weakref.ref(self)
159 
160  def __del__(self):
161  System._all_systems = set(x for x in System._all_systems
162  if x() not in (None, self))
163 
164  def get_states(self):
165  """Get a list of all State objects in this system"""
166  return self.states
167 
168  def create_state(self):
169  """Makes and returns a new IMP.pmi.topology.State in this system"""
170  self._number_of_states += 1
171  state = State(self, self._number_of_states-1)
172  self.states.append(state)
173  return state
174 
175  def __repr__(self):
176  return self.hier.get_name()
177 
179  """Returns the total number of states generated"""
180  return self._number_of_states
181 
182  def get_hierarchy(self):
183  """Return the top-level IMP.atom.Hierarchy node for this system"""
184  return self.hier
185 
186  def build(self, **kwargs):
187  """Build all states"""
188  if not self.built:
189  for state in self.states:
190  state.build(**kwargs)
191  self.built = True
192  return self.hier
193 
194  def add_protocol_output(self, p):
195  """Capture details of the modeling protocol.
196  @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
197  """
198  self._protocol_output.append(p)
199 # p._each_metadata.append(self._metadata)
200 # p._file_datasets.append(self._file_dataset)
201  for state in self.states:
202  state._add_protocol_output(p, self)
203 
204 
205 class State(_SystemBase):
206  """Stores a list of Molecules all with the same State index.
207  Also stores number of copies of each Molecule for easy selection.
208  """
209  def __init__(self, system, state_index):
210  """Define a new state
211  @param system the PMI System
212  @param state_index the index of the new state
213  @note It's expected that you will not use this constructor directly,
214  but rather create it with System.create_state()
215  """
216  self.model = system.get_hierarchy().get_model()
217  self.system = system
218  self.hier = self._create_child(system.get_hierarchy())
219  self.short_name = self.long_name = "State_" + str(state_index)
220  self.hier.set_name(self.short_name)
221  # key is molecule name. value are the molecule copies!
222  self.molecules = IMP.pmi.tools.OrderedDict()
223  IMP.atom.State.setup_particle(self.hier, state_index)
224  self.built = False
225  self._protocol_output = []
226  for p in system._protocol_output:
227  self._add_protocol_output(p, system)
228 
229  def __repr__(self):
230  return self.system.__repr__()+'.'+self.hier.get_name()
231 
232  def _add_protocol_output(self, p, system):
233  state = p._add_state(self)
234  self._protocol_output.append((p, state))
235  state.model = system.model
236  state.prot = self.hier
237 
238  def get_molecules(self):
239  """Return a dictionary where key is molecule name and value
240  is a list of all copies of that molecule in setup order"""
241  return self.molecules
242 
243  def get_molecule(self, name, copy_num=0):
244  """Access a molecule by name and copy number
245  @param name The molecule name used during setup
246  @param copy_num The copy number based on input order.
247  Default: 0. Set to 'all' to get all copies
248  """
249  if name not in self.molecules:
250  raise KeyError("Could not find molname %s" % name)
251  if copy_num == 'all':
252  return self.molecules[name]
253  else:
254  return self.molecules[name][copy_num]
255 
256  def create_molecule(self, name, sequence='', chain_id='',
257  alphabet=IMP.pmi.alphabets.amino_acid):
258  """Create a new Molecule within this State
259  @param name the name of the molecule (string);
260  it must not be already used
261  @param sequence sequence (string)
262  @param chain_id Chain ID to assign to this molecule
263  @param alphabet Mapping from FASTA codes to residue types
264  """
265  # check whether the molecule name is already assigned
266  if name in self.molecules:
267  raise ValueError('Cannot use a molecule name already used')
268 
269  # check for something that looks like a copy number
270  if re.search(r'\.\d+$', name):
271  warnings.warn(
272  "It is recommended not to end the molecule name with "
273  ".(number) as it may be confused with the copy number "
274  "(the copy number for new molecules is always 0, so to "
275  "select this molecule, use '%s.0'). Use create_clone() or "
276  "create_copy() instead if a copy of an existing molecule "
277  "is desired." % name, IMP.pmi.ParameterWarning)
278 
279  mol = Molecule(self, name, sequence, chain_id, copy_num=0,
280  alphabet=alphabet)
281  self.molecules[name] = [mol]
282  return mol
283 
284  def get_hierarchy(self):
285  """Get the IMP.atom.Hierarchy node for this state"""
286  return self.hier
287 
288  def get_number_of_copies(self, molname):
289  """Get the number of copies of the given molecule (by name)
290 
291  @param molname The name of the molecule
292  """
293  return len(self.molecules[molname])
294 
295  def _register_copy(self, molecule):
296  molname = molecule.get_hierarchy().get_name()
297  self.molecules[molname].append(molecule)
298 
299  def build(self, **kwargs):
300  """Build all molecules (automatically makes clones)"""
301  if not self.built:
302  for molname in self.molecules:
303  for mol in reversed(self.molecules[molname]):
304  mol.build(**kwargs)
305  self.built = True
306  return self.hier
307 
308 
309 # Track residues read from PDB files
310 _PDBElement = namedtuple('PDBElement', ['offset', 'filename', 'chain_id'])
311 
312 
313 class _RepresentationHandler(object):
314  """Handle PMI representation and use it to populate that of any attached
315  ProtocolOutput objects"""
316  def __init__(self, name, pos, pdb_elements):
317  self.name = name
318  self.pos = pos
319  self.last_index = None
320  self.last_pdb_index = None
321  self.pdb_for_residue = {}
322  for residues, pdb in pdb_elements:
323  for r in residues:
324  self.pdb_for_residue[r.get_index()] = pdb
325 
326  def _get_pdb(self, h):
327  """Return a PDBElement if the given hierarchy was read from a
328  PDB file"""
330  rind = IMP.atom.Residue(h).get_index()
331  return self.pdb_for_residue.get(rind, None)
332 
333  def __call__(self, res):
334  """Handle a single residue"""
335  if len(self.pos) == 0:
336  return
337  h = res.hier
338  pi = h.get_particle_index()
339  # Do nothing if we already saw this hierarchy
340  if self.last_index is None or pi != self.last_index:
341  pdb = self._get_pdb(h)
342  self.last_index = pi
343  if pdb:
344  assert(IMP.atom.Fragment.get_is_setup(h.get_parent()))
345  frag = IMP.atom.Fragment(h.get_parent())
346  fragi = frag.get_particle_index()
347  # Do nothing if we already saw this PDB fragment
348  if self.last_pdb_index is not None \
349  and self.last_pdb_index == fragi:
350  return
351  self.last_pdb_index = fragi
352  indices = frag.get_residue_indexes()
353  for p, state in self.pos:
354  p.add_pdb_element(state, self.name,
355  indices[0], indices[-1], pdb.offset,
356  pdb.filename, pdb.chain_id, frag)
358  frag = IMP.atom.Fragment(h)
359  indices = frag.get_residue_indexes()
360  for p, state in self.pos:
361  p.add_bead_element(state, self.name,
362  indices[0], indices[-1], 1, h)
364  resind = IMP.atom.Residue(h).get_index()
365  for p, state in self.pos:
366  p.add_bead_element(state, self.name, resind, resind, 1, h)
367  else:
368  raise TypeError("Unhandled hierarchy %s" % str(h))
369 
370 
371 class Molecule(_SystemBase):
372  """Stores a named protein chain.
373  This class is constructed from within the State class.
374  It wraps an IMP.atom.Molecule and IMP.atom.Copy.
375  Structure is read using this class.
376  Resolutions and copies can be registered, but are only created
377  when build() is called.
378 
379  A Molecule acts like a simple Python list of residues, and can be indexed
380  by integer (starting at zero) or by string (starting at 1).
381  """
382 
383  def __init__(self, state, name, sequence, chain_id, copy_num,
384  mol_to_clone=None, alphabet=IMP.pmi.alphabets.amino_acid):
385  """The user should not call this directly; instead call
386  State.create_molecule()
387 
388  @param state The parent PMI State
389  @param name The name of the molecule (string)
390  @param sequence Sequence (string)
391  @param chain_id The chain of this molecule
392  @param copy_num Store the copy number
393  @param mol_to_clone The original molecule (for cloning ONLY)
394  @note It's expected that you will not use this constructor directly,
395  but rather create a Molecule with State.create_molecule()
396  """
397  # internal data storage
398  self.model = state.get_hierarchy().get_model()
399  self.state = state
400  self.sequence = sequence
401  self.built = False
402  self.mol_to_clone = mol_to_clone
403  self.alphabet = alphabet
404  self.representations = [] # list of stuff to build
405  self._pdb_elements = []
406  # residues with representation
407  self._represented = IMP.pmi.tools.OrderedSet()
408  # helps you place beads by storing structure
409  self.coord_finder = _FindCloseStructure()
410  # list of OrderedSets of tempresidues set to ideal helix
411  self._ideal_helices = []
412 
413  # create root node and set it as child to passed parent hierarchy
414  self.hier = self._create_child(self.state.get_hierarchy())
415  self.hier.set_name(name)
416  IMP.atom.Copy.setup_particle(self.hier, copy_num)
417  self._name_with_copy = "%s.%d" % (name, copy_num)
418  # store the sequence
419  self.chain = IMP.atom.Chain.setup_particle(self.hier, chain_id)
420  self.chain.set_sequence(self.sequence)
421  # create TempResidues from the sequence (if passed)
422  self.residues = []
423  for ns, s in enumerate(sequence):
424  r = TempResidue(self, s, ns+1, ns, alphabet)
425  self.residues.append(r)
426 
427  def __repr__(self):
428  return self.state.__repr__() + '.' + self.get_name() + '.' + \
429  str(IMP.atom.Copy(self.hier).get_copy_index())
430 
431  def __getitem__(self, val):
432  if isinstance(val, int):
433  return self.residues[val]
434  elif isinstance(val, str):
435  return self.residues[int(val)-1]
436  elif isinstance(val, slice):
437  return IMP.pmi.tools.OrderedSet(self.residues[val])
438  else:
439  raise TypeError("Indexes must be int or str")
440 
441  def get_hierarchy(self):
442  """Return the IMP Hierarchy corresponding to this Molecule"""
443  return self.hier
444 
445  def get_name(self):
446  """Return this Molecule name"""
447  return self.hier.get_name()
448 
449  def get_state(self):
450  """Return the State containing this Molecule"""
451  return self.state
452 
453  def get_ideal_helices(self):
454  """Returns list of OrderedSets with requested ideal helices"""
455  return self._ideal_helices
456 
457  def residue_range(self, a, b, stride=1):
458  """Get residue range from a to b, inclusive.
459  Use integers to get 0-indexing, or strings to get PDB-indexing"""
460  if isinstance(a, int) and isinstance(b, int) \
461  and isinstance(stride, int):
462  return IMP.pmi.tools.OrderedSet(self.residues[a:b+1:stride])
463  elif isinstance(a, str) and isinstance(b, str) \
464  and isinstance(stride, int):
465  return IMP.pmi.tools.OrderedSet(
466  self.residues[int(a)-1:int(b):stride])
467  else:
468  raise TypeError("Range ends must be int or str. "
469  "Stride must be int.")
470 
471  def get_residues(self):
472  """Return all modeled TempResidues as a set"""
473  all_res = IMP.pmi.tools.OrderedSet(self.residues)
474  return all_res
475 
476  def get_represented(self):
477  """Return set of TempResidues that have representation"""
478  return self._represented
479 
481  """Return a set of TempResidues that have associated structure
482  coordinates"""
483  atomic_res = IMP.pmi.tools.OrderedSet()
484  for res in self.residues:
485  if res.get_has_structure():
486  atomic_res.add(res)
487  return atomic_res
488 
490  """Return a set of TempResidues that don't have associated
491  structure coordinates"""
492  non_atomic_res = IMP.pmi.tools.OrderedSet()
493  for res in self.residues:
494  if not res.get_has_structure():
495  non_atomic_res.add(res)
496  return non_atomic_res
497 
498  def create_copy(self, chain_id):
499  """Create a new Molecule with the same name and sequence but a
500  higher copy number. Returns the Molecule. No structure or
501  representation will be copied!
502 
503  @param chain_id Chain ID of the new molecule
504  """
505  mol = Molecule(
506  self.state, self.get_name(), self.sequence, chain_id,
507  copy_num=self.state.get_number_of_copies(self.get_name()))
508  self.state._register_copy(mol)
509  return mol
510 
511  def create_clone(self, chain_id):
512  """Create a Molecule clone (automatically builds same structure
513  and representation)
514 
515  @param chain_id If you want to set the chain ID of the copy
516  to something
517  @note You cannot add structure or representations to a clone!
518  """
519  mol = Molecule(
520  self.state, self.get_name(), self.sequence, chain_id,
521  copy_num=self.state.get_number_of_copies(self.get_name()),
522  mol_to_clone=self)
523  self.state._register_copy(mol)
524  return mol
525 
526  def add_structure(self, pdb_fn, chain_id, res_range=[],
527  offset=0, model_num=None, ca_only=False,
528  soft_check=False):
529  """Read a structure and store the coordinates.
530  @return the atomic residues (as a set)
531  @param pdb_fn The file to read
532  @param chain_id Chain ID to read
533  @param res_range Add only a specific set of residues from the PDB
534  file. res_range[0] is the starting and res_range[1]
535  is the ending residue index.
536  @param offset Apply an offset to the residue indexes of the PDB
537  file. This number is added to the PDB sequence.
538  @param model_num Read multi-model PDB and return that model
539  @param ca_only Only read the CA positions from the PDB file
540  @param soft_check If True, it only warns if there are sequence
541  mismatches between the PDB and the Molecule (FASTA)
542  sequence, and uses the sequence from the PDB.
543  If False (Default), it raises an error when there
544  are sequence mismatches.
545  @note If you are adding structure without a FASTA file, set soft_check
546  to True.
547  """
548  if self.mol_to_clone is not None:
549  raise ValueError('You cannot call add_structure() for a clone')
550 
551  self.pdb_fn = pdb_fn
552 
553  # get IMP.atom.Residues from the pdb file
554  rhs = system_tools.get_structure(self.model, pdb_fn, chain_id,
555  res_range, offset,
556  ca_only=ca_only)
557  self.coord_finder.add_residues(rhs)
558 
559  if len(self.residues) == 0:
560  warnings.warn(
561  "Substituting PDB residue type with FASTA residue type. "
562  "Potentially dangerous.", IMP.pmi.StructureWarning)
563 
564  # Store info for ProtocolOutput usage later
565  self._pdb_elements.append(
566  (rhs, _PDBElement(offset=offset, filename=pdb_fn,
567  chain_id=chain_id)))
568 
569  # load those into TempResidue object
570  # collect integer indexes of atomic residues to return
571  atomic_res = IMP.pmi.tools.OrderedSet()
572  for nrh, rh in enumerate(rhs):
573  pdb_idx = rh.get_index()
574  raw_idx = pdb_idx - 1
575 
576  # add ALA to fill in gaps
577  while len(self.residues) < pdb_idx:
578  r = TempResidue(self, 'A', len(self.residues)+1,
579  len(self.residues),
580  IMP.pmi.alphabets.amino_acid)
581  self.residues.append(r)
582  self.sequence += 'A'
583 
584  internal_res = self.residues[raw_idx]
585  if len(self.sequence) < raw_idx:
586  self.sequence += IMP.atom.get_one_letter_code(
587  rh.get_residue_type())
588  internal_res.set_structure(rh, soft_check)
589  atomic_res.add(internal_res)
590 
591  self.chain.set_sequence(self.sequence)
592  return atomic_res
593 
595  residues=None,
596  resolutions=[],
597  bead_extra_breaks=[],
598  bead_ca_centers=True,
599  bead_default_coord=[0, 0, 0],
600  density_residues_per_component=None,
601  density_prefix=None,
602  density_force_compute=False,
603  density_voxel_size=1.0,
604  setup_particles_as_densities=False,
605  ideal_helix=False,
606  color=None):
607  """Set the representation for some residues. Some options
608  (beads, ideal helix) operate along the backbone. Others (density
609  options) are volumetric.
610  Some of these you can combine e.g., beads+densities or helix+densities
611  See @ref pmi_resolution
612  @param residues Set of PMI TempResidues for adding the representation.
613  Can use Molecule slicing to get these, e.g. mol[a:b]+mol[c:d]
614  If None, will select all residues for this Molecule.
615  @param resolutions Resolutions for beads representations.
616  If structured, will average along backbone, breaking at
617  sequence breaks. If unstructured, will just create beads.
618  Pass an integer or list of integers
619  @param bead_extra_breaks Additional breakpoints for splitting beads.
620  The value can be the 0-ordered position, after which it'll
621  insert the break.
622  Alternatively pass PDB-style (1-ordered) indices as a string.
623  I.e., bead_extra_breaks=[5,25] is the same as ['6','26']
624  @param bead_ca_centers Set to True if you want the resolution=1 beads
625  to be at CA centers (otherwise will average atoms to get
626  center). Defaults to True.
627  @param bead_default_coord Advanced feature. Normally beads are placed
628  at the nearest structure. If no structure provided (like an
629  all bead molecule), the beads go here.
630  @param density_residues_per_component Create density (Gaussian
631  Mixture Model) for these residues. Must also supply
632  density_prefix
633  @param density_prefix Prefix (assuming '.txt') to read components
634  from or write to.
635  If exists, will read unless you set density_force_compute=True.
636  Will also write map (prefix+'.mrc').
637  Must also supply density_residues_per_component.
638  @param density_force_compute Set true to force overwrite density file.
639  @param density_voxel_size Advanced feature. Set larger if densities
640  taking too long to rasterize.
641  Set to 0 if you don't want to create the MRC file
642  @param setup_particles_as_densities Set to True if you want each
643  particle to be its own density.
644  Useful for all-atom models or flexible beads.
645  Mutually exclusive with density_ options
646  @param ideal_helix Create idealized helix structures for these
647  residues at resolution 1.
648  Any other resolutions passed will be coarsened from there.
649  Resolution 0 will not work; you may have to use MODELLER
650  to do that (for now).
651  @param color the color applied to the hierarchies generated.
652  Format options: tuple (r,g,b) with values 0 to 1;
653  float (from 0 to 1, a map from Blue to Green to Red);
654  a [Chimera name](https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/colortables.html);
655  a hex RGB string (e.g. "#ff0000");
656  an IMP.display.Color object
657  @note You cannot call add_representation multiple times for the
658  same residues.
659  """ # noqa: E501
660 
661  # can't customize clones
662  if self.mol_to_clone is not None:
663  raise ValueError(
664  'You cannot call add_representation() for a clone.'
665  ' Maybe use a copy instead.')
666 
667  # format input
668  if residues is None:
669  res = IMP.pmi.tools.OrderedSet(self.residues)
670  elif residues == self:
671  res = IMP.pmi.tools.OrderedSet(self.residues)
672  elif type(residues) is IMP.pmi.topology.TempResidue:
673  res = IMP.pmi.tools.OrderedSet([residues])
674  elif hasattr(residues, '__iter__'):
675  if len(residues) == 0:
676  raise Exception(
677  'You passed an empty set to add_representation')
678  if type(residues) is IMP.pmi.tools.OrderedSet \
679  and type(next(iter(residues))) is TempResidue:
680  res = residues
681  elif (type(residues) is set
682  and type(next(iter(residues))) is TempResidue):
683  res = IMP.pmi.tools.OrderedSet(residues)
684  elif type(residues) is list and type(residues[0]) is TempResidue:
685  res = IMP.pmi.tools.OrderedSet(residues)
686  else:
687  raise Exception("You passed an iterable of something other "
688  "than TempResidue", res)
689  else:
690  raise Exception("add_representation: you must pass a set of "
691  "residues or nothing(=all residues)")
692 
693  # check that each residue has not been represented yet
694  ov = res & self._represented
695  if ov:
696  raise Exception('You have already added representation for ' +
697  self.get_hierarchy().get_name() + ': ' +
698  ov.__repr__())
699  self._represented |= res
700 
701  # check you aren't creating multiple resolutions without structure
702  if not hasattr(resolutions, '__iter__'):
703  if type(resolutions) is int:
704  resolutions = [resolutions]
705  else:
706  raise Exception("you tried to pass resolutions that are not "
707  "int or list-of-int")
708  if len(resolutions) > 1 and not ideal_helix:
709  for r in res:
710  if not r.get_has_structure():
711  raise Exception(
712  'You are creating multiple resolutions for '
713  'unstructured regions. This will have unexpected '
714  'results.')
715 
716  # check density info is consistent
717  if density_residues_per_component or density_prefix:
718  if not density_residues_per_component and density_prefix:
719  raise Exception(
720  'If requesting density, must provide '
721  'density_residues_per_component AND density_prefix')
722  if density_residues_per_component and setup_particles_as_densities:
723  raise Exception(
724  'Cannot create both volumetric density '
725  '(density_residues_per_component) AND '
726  'individual densities (setup_particles_as_densities) '
727  'in the same representation')
728  if len(resolutions) > 1 and setup_particles_as_densities:
729  raise Exception(
730  'You have multiple bead resolutions but are attempting to '
731  'set them all up as individual Densities. '
732  'This could have unexpected results.')
733 
734  # check helix not accompanied by other resolutions
735  # (densities OK though!)
736  if ideal_helix:
737  if 0 in resolutions:
738  raise Exception(
739  "For ideal helices, cannot build resolution 0: "
740  "you have to do that in MODELLER")
741  if 1 not in resolutions:
742  resolutions = [1] + list(resolutions)
743  self._ideal_helices.append(res)
744 
745  # check residues are all part of this molecule:
746  for r in res:
747  if r.get_molecule() != self:
748  raise Exception(
749  'You are adding residues from a different molecule to',
750  self.__repr__())
751 
752  # unify formatting for extra breaks
753  breaks = []
754  for b in bead_extra_breaks:
755  if type(b) == str:
756  breaks.append(int(b)-1)
757  else:
758  breaks.append(b)
759  # store the representation group
760  self.representations.append(_Representation(
761  res, resolutions, breaks, bead_ca_centers, bead_default_coord,
762  density_residues_per_component, density_prefix,
763  density_force_compute, density_voxel_size,
764  setup_particles_as_densities, ideal_helix, color))
765 
766  def _all_protocol_output(self):
767  return self.state._protocol_output
768 
769  def build(self):
770  """Create all parts of the IMP hierarchy
771  including Atoms, Residues, and Fragments/Representations and,
772  finally, Copies.
773  Will only build requested representations.
774  @note Any residues assigned a resolution must have an IMP.atom.Residue
775  hierarchy containing at least a CAlpha. For missing residues,
776  these can be constructed from the PDB file.
777  """
778  if not self.built:
779  # Add molecule name and sequence to any ProtocolOutput objects
780  name = self.hier.get_name()
781  for po, state in self._all_protocol_output():
782  po.create_component(state, name, True,
783  asym_name=self._name_with_copy)
784  po.add_component_sequence(state, name, self.sequence,
785  asym_name=self._name_with_copy,
786  alphabet=self.alphabet)
787  # if requested, clone structure and representations
788  # BEFORE building original
789  if self.mol_to_clone is not None:
790  for nr, r in enumerate(self.mol_to_clone.residues):
791  if r.get_has_structure():
792  clone = IMP.atom.create_clone(r.get_hierarchy())
793  self.residues[nr].set_structure(
794  IMP.atom.Residue(clone), soft_check=True)
795  for old_rep in self.mol_to_clone.representations:
796  new_res = IMP.pmi.tools.OrderedSet()
797  for r in old_rep.residues:
798  new_res.add(self.residues[r.get_internal_index()])
799  self._represented.add(
800  self.residues[r.get_internal_index()])
801  new_rep = _Representation(
802  new_res, old_rep.bead_resolutions,
803  old_rep.bead_extra_breaks, old_rep.bead_ca_centers,
804  old_rep.bead_default_coord,
805  old_rep.density_residues_per_component,
806  old_rep.density_prefix, False,
807  old_rep.density_voxel_size,
808  old_rep.setup_particles_as_densities,
809  old_rep.ideal_helix, old_rep.color)
810  self.representations.append(new_rep)
811  self.coord_finder = self.mol_to_clone.coord_finder
812 
813  # give a warning for all residues that don't have representation
814  no_rep = [r for r in self.residues if r not in self._represented]
815  if len(no_rep) > 0:
816  warnings.warn(
817  'Residues without representation in molecule %s: %s'
818  % (self.get_name(), system_tools.resnums2str(no_rep)),
820 
821  # first build any ideal helices (fills in structure for
822  # the TempResidues)
823  for rep in self.representations:
824  if rep.ideal_helix:
825  _build_ideal_helix(self.model, rep.residues,
826  self.coord_finder)
827 
828  # build all the representations
829  built_reps = []
830 
831  rephandler = _RepresentationHandler(
832  self._name_with_copy, list(self._all_protocol_output()),
833  self._pdb_elements)
834 
835  for rep in self.representations:
836  built_reps += system_tools.build_representation(
837  self, rep, self.coord_finder, rephandler)
838 
839  # sort them before adding as children
840  built_reps.sort(
841  key=lambda r: IMP.atom.Fragment(r).get_residue_indexes()[0])
842  for br in built_reps:
843  self.hier.add_child(br)
844  br.update_parents()
845  self.built = True
846 
847  for res in self.residues:
848  # first off, store the highest resolution available
849  # in residue.hier
850  new_ps = IMP.atom.Selection(
851  self.hier,
852  residue_index=res.get_index(),
853  resolution=1).get_selected_particles()
854  if len(new_ps) > 0:
855  new_p = new_ps[0]
856  if IMP.atom.Atom.get_is_setup(new_p):
857  # if only found atomic, store the residue
858  new_hier = IMP.atom.get_residue(IMP.atom.Atom(new_p))
859  else:
860  # otherwise just store what you found
861  new_hier = IMP.atom.Hierarchy(new_p)
862  res.hier = new_hier
863  rephandler(res)
864  else:
865  res.hier = None
866  self._represented = IMP.pmi.tools.OrderedSet(
867  [a for a in self._represented])
868  print('done building', self.get_hierarchy())
869  return self.hier
870 
871  def get_particles_at_all_resolutions(self, residue_indexes=None):
872  """Helpful utility for getting particles at all resolutions from
873  this molecule. Can optionally pass a set of residue indexes"""
874  if not self.built:
875  raise Exception(
876  "Cannot get all resolutions until you build the Molecule")
877  if residue_indexes is None:
878  residue_indexes = [r.get_index() for r in self.get_residues()]
880  self.get_hierarchy(), residue_indexes=residue_indexes)
881  return ps
882 
883 
884 class _Representation(object):
885  """Private class just to store a representation request"""
886  def __init__(self,
887  residues,
888  bead_resolutions,
889  bead_extra_breaks,
890  bead_ca_centers,
891  bead_default_coord,
892  density_residues_per_component,
893  density_prefix,
894  density_force_compute,
895  density_voxel_size,
896  setup_particles_as_densities,
897  ideal_helix,
898  color):
899  self.residues = residues
900  self.bead_resolutions = bead_resolutions
901  self.bead_extra_breaks = bead_extra_breaks
902  self.bead_ca_centers = bead_ca_centers
903  self.bead_default_coord = bead_default_coord
904  self.density_residues_per_component = density_residues_per_component
905  self.density_prefix = density_prefix
906  self.density_force_compute = density_force_compute
907  self.density_voxel_size = density_voxel_size
908  self.setup_particles_as_densities = setup_particles_as_densities
909  self.ideal_helix = ideal_helix
910  self.color = color
911 
912 
913 class _FindCloseStructure(object):
914  """Utility to get the nearest observed coordinate"""
915  def __init__(self):
916  self.coords = []
917 
918  def add_residues(self, residues):
919  for r in residues:
920  idx = IMP.atom.Residue(r).get_index()
921  catypes = [IMP.atom.AT_CA, system_tools._AT_HET_CA]
922  ca = IMP.atom.Selection(
923  r, atom_types=catypes).get_selected_particles()
924  p = IMP.atom.Selection(
925  r, atom_type=IMP.atom.AtomType("P")).get_selected_particles()
926  if len(ca) == 1:
927  xyz = IMP.core.XYZ(ca[0]).get_coordinates()
928  self.coords.append([idx, xyz])
929  elif len(p) == 1:
930  xyz = IMP.core.XYZ(p[0]).get_coordinates()
931  self.coords.append([idx, xyz])
932  else:
933  raise ValueError("_FindCloseStructure: wrong selection")
934 
935  self.coords.sort(key=itemgetter(0))
936 
937  def find_nearest_coord(self, query):
938  if self.coords == []:
939  return None
940  keys = [r[0] for r in self.coords]
941  pos = bisect_left(keys, query)
942  if pos == 0:
943  ret = self.coords[0]
944  elif pos == len(self.coords):
945  ret = self.coords[-1]
946  else:
947  before = self.coords[pos - 1]
948  after = self.coords[pos]
949  if after[0] - query < query - before[0]:
950  ret = after
951  else:
952  ret = before
953  return ret[1]
954 
955 
956 class Sequences(object):
957  """A dictionary-like wrapper for reading and storing sequence data.
958  Keys are FASTA sequence names, and each value a string of one-letter
959  codes."""
960  def __init__(self, fasta_fn, name_map=None):
961  """Read a FASTA file and extract all the requested sequences
962  @param fasta_fn sequence file
963  @param name_map dictionary mapping the FASTA name to final stored name
964  """
965  self.sequences = IMP.pmi.tools.OrderedDict()
966  self.read_sequences(fasta_fn, name_map)
967 
968  def __len__(self):
969  return len(self.sequences)
970 
971  def __contains__(self, x):
972  return x in self.sequences
973 
974  def __getitem__(self, key):
975  if type(key) is int:
976  allseqs = list(self.sequences.keys())
977  try:
978  return self.sequences[allseqs[key]]
979  except IndexError:
980  raise IndexError("You tried to access sequence number %d "
981  "but there's only %d" % (key, len(allseqs)))
982  else:
983  return self.sequences[key]
984 
985  def __iter__(self):
986  return self.sequences.__iter__()
987 
988  def __repr__(self):
989  ret = ''
990  for s in self.sequences:
991  ret += '%s\t%s\n' % (s, self.sequences[s])
992  return ret
993 
994  def read_sequences(self, fasta_fn, name_map=None):
995  code = None
996  seq = None
997  with open(fasta_fn, 'r') as fh:
998  for (num, line) in enumerate(fh):
999  if line.startswith('>'):
1000  if seq is not None:
1001  self.sequences[code] = seq.strip('*')
1002  code = line.rstrip()[1:]
1003  if name_map is not None:
1004  try:
1005  code = name_map[code]
1006  except KeyError:
1007  pass
1008  seq = ''
1009  else:
1010  line = line.rstrip()
1011  if line: # Skip blank lines
1012  if seq is None:
1013  raise Exception(
1014  "Found FASTA sequence before first header "
1015  "at line %d: %s" % (num + 1, line))
1016  seq += line
1017  if seq is not None:
1018  self.sequences[code] = seq.strip('*')
1019 
1020 
1021 class PDBSequences(object):
1022  """Data structure for reading and storing sequence data from PDBs.
1023 
1024  @see fasta_pdb_alignments."""
1025  def __init__(self, model, pdb_fn, name_map=None):
1026  """Read a PDB file and return all sequences for each contiguous
1027  fragment
1028  @param pdb_fn file
1029  @param name_map dictionary mapping the pdb chain id to final
1030  stored name
1031  """
1032  self.model = model
1033  # self.sequences data-structure: (two-key dictionary)
1034  # it contains all contiguous fragments:
1035  # chain_id, tuples indicating first and last residue, sequence
1036  # example:
1037  # key1, key2, value
1038  # A (77, 149) VENPSLDLEQYAASYSGLMR....
1039  # A (160, 505) PALDTAWVEATRKKALLKLEKLDTDLKNYKGNSIK.....
1040  # B (30, 180) VDLENQYYNSKALKEDDPKAALSSFQKVLELEGEKGEWGF...
1041  # B (192, 443) TQLLEIYALEIQMYTAQKNNKKLKALYEQSLHIKSAIPHPL
1042  self.sequences = IMP.pmi.tools.OrderedDict()
1043  self.read_sequences(pdb_fn, name_map)
1044 
1045  def read_sequences(self, pdb_fn, name_map):
1046  read_file = IMP.atom.read_pdb
1047  if pdb_fn.endswith('.cif'):
1048  read_file = IMP.atom.read_mmcif
1049  t = read_file(pdb_fn, self.model, IMP.atom.ATOMPDBSelector())
1050  cs = IMP.atom.get_by_type(t, IMP.atom.CHAIN_TYPE)
1051  for c in cs:
1052  id = IMP.atom.Chain(c).get_id()
1053  print(id)
1054  if name_map:
1055  try:
1056  id = name_map[id]
1057  except KeyError:
1058  print("Chain ID %s not in name_map, skipping" % id)
1059  continue
1060  rs = IMP.atom.get_by_type(c, IMP.atom.RESIDUE_TYPE)
1061  rids = []
1062  rids_olc_dict = {}
1063  for r in rs:
1064  dr = IMP.atom.Residue(r)
1065  rid = dr.get_index()
1066 
1067  isprotein = dr.get_is_protein()
1068  isrna = dr.get_is_rna()
1069  isdna = dr.get_is_dna()
1070  if isprotein:
1071  olc = IMP.atom.get_one_letter_code(dr.get_residue_type())
1072  rids.append(rid)
1073  rids_olc_dict[rid] = olc
1074  elif isdna:
1075  if dr.get_residue_type() == IMP.atom.DADE:
1076  olc = "A"
1077  if dr.get_residue_type() == IMP.atom.DURA:
1078  olc = "U"
1079  if dr.get_residue_type() == IMP.atom.DCYT:
1080  olc = "C"
1081  if dr.get_residue_type() == IMP.atom.DGUA:
1082  olc = "G"
1083  if dr.get_residue_type() == IMP.atom.DTHY:
1084  olc = "T"
1085  rids.append(rid)
1086  rids_olc_dict[rid] = olc
1087  elif isrna:
1088  if dr.get_residue_type() == IMP.atom.ADE:
1089  olc = "A"
1090  if dr.get_residue_type() == IMP.atom.URA:
1091  olc = "U"
1092  if dr.get_residue_type() == IMP.atom.CYT:
1093  olc = "C"
1094  if dr.get_residue_type() == IMP.atom.GUA:
1095  olc = "G"
1096  if dr.get_residue_type() == IMP.atom.THY:
1097  olc = "T"
1098  rids.append(rid)
1099  rids_olc_dict[rid] = olc
1100  group_rids = self.group_indexes(rids)
1101  contiguous_sequences = IMP.pmi.tools.OrderedDict()
1102  for group in group_rids:
1103  sequence_fragment = ""
1104  for i in range(group[0], group[1]+1):
1105  sequence_fragment += rids_olc_dict[i]
1106  contiguous_sequences[group] = sequence_fragment
1107  self.sequences[id] = contiguous_sequences
1108 
1109  def group_indexes(self, indexes):
1110  from itertools import groupby
1111  ranges = []
1112  for k, g in groupby(enumerate(indexes), lambda x: x[0]-x[1]):
1113  group = [x[1] for x in g]
1114  ranges.append((group[0], group[-1]))
1115  return ranges
1116 
1117 
1118 def fasta_pdb_alignments(fasta_sequences, pdb_sequences, show=False):
1119  '''This function computes and prints the alignment between the
1120  fasta file and the pdb sequence, computes the offsets for each contiguous
1121  fragment in the PDB.
1122  @param fasta_sequences IMP.pmi.topology.Sequences object
1123  @param pdb_sequences IMP.pmi.topology.PDBSequences object
1124  @param show boolean default False, if True prints the alignments.
1125  The input objects should be generated using map_name dictionaries
1126  such that fasta_id
1127  and pdb_chain_id are mapping to the same protein name. It needs BioPython.
1128  Returns a dictionary of offsets, organized by peptide range (group):
1129  example: offsets={"ProtA":{(1,10):1,(20,30):10}}'''
1130  from Bio import pairwise2
1131  from Bio.pairwise2 import format_alignment
1132  if type(fasta_sequences) is not IMP.pmi.topology.Sequences:
1133  raise Exception("Fasta sequences not type IMP.pmi.topology.Sequences")
1134  if type(pdb_sequences) is not IMP.pmi.topology.PDBSequences:
1135  raise Exception("pdb sequences not type IMP.pmi.topology.PDBSequences")
1136  offsets = IMP.pmi.tools.OrderedDict()
1137  for name in fasta_sequences.sequences:
1138  print(name)
1139  seq_fasta = fasta_sequences.sequences[name]
1140  if name not in pdb_sequences.sequences:
1141  print("Fasta id %s not in pdb names, aligning against every "
1142  "pdb chain" % name)
1143  pdbnames = pdb_sequences.sequences.keys()
1144  else:
1145  pdbnames = [name]
1146  for pdbname in pdbnames:
1147  for group in pdb_sequences.sequences[pdbname]:
1148  if group[1] - group[0] + 1 < 7:
1149  continue
1150  seq_frag_pdb = pdb_sequences.sequences[pdbname][group]
1151  if show:
1152  print("########################")
1153  print(" ")
1154  print("protein name", pdbname)
1155  print("fasta id", name)
1156  print("pdb fragment", group)
1157  align = pairwise2.align.localms(seq_fasta, seq_frag_pdb,
1158  2, -1, -.5, -.1)[0]
1159  for a in [align]:
1160  offset = a[3] + 1 - group[0]
1161  if show:
1162  print("alignment sequence start-end",
1163  (a[3] + 1, a[4] + 1))
1164  print("offset from pdb to fasta index", offset)
1165  print(format_alignment(*a))
1166  if name not in offsets:
1167  offsets[pdbname] = {}
1168  if group not in offsets[pdbname]:
1169  offsets[pdbname][group] = offset
1170  else:
1171  if group not in offsets[pdbname]:
1172  offsets[pdbname][group] = offset
1173  return offsets
1174 
1175 
1176 class TempResidue(object):
1177  "Temporarily stores residue information, even without structure available."
1178  # Consider implementing __hash__ so you can select.
1179  def __init__(self, molecule, code, index, internal_index, alphabet):
1180  """setup a TempResidue
1181  @param molecule PMI Molecule to which this residue belongs
1182  @param code one-letter residue type code
1183  @param index PDB index
1184  @param internal_index The number in the sequence
1185  """
1186  # these attributes should be immutable
1187  self.molecule = molecule
1188  self.rtype = alphabet.get_residue_type_from_one_letter_code(code)
1189  self.pdb_index = index
1190  self.internal_index = internal_index
1191  self.copy_index = IMP.atom.Copy(self.molecule.hier).get_copy_index()
1192  self.state_index = \
1193  IMP.atom.State(self.molecule.state.hier).get_state_index()
1194  # these are expected to change
1195  self._structured = False
1196  self.hier = IMP.atom.Residue.setup_particle(
1197  IMP.Particle(molecule.model), self.rtype, index)
1198 
1199  def __str__(self):
1200  return str(self.state_index) + "_" + self.molecule.get_name() + "_" \
1201  + str(self.copy_index) + "_" + self.get_code() \
1202  + str(self.get_index())
1203 
1204  def __repr__(self):
1205  return self.__str__()
1206 
1207  def __key(self):
1208  # this returns the immutable attributes only
1209  return (self.state_index, self.molecule, self.copy_index, self.rtype,
1210  self.pdb_index, self.internal_index)
1211 
1212  def __eq__(self, other):
1213  return type(other) == type(self) and self.__key() == other.__key()
1214 
1215  def __hash__(self):
1216  return hash(self.__key())
1217 
1218  def get_index(self):
1219  return self.pdb_index
1220 
1221  def get_internal_index(self):
1222  return self.internal_index
1223 
1224  def get_code(self):
1225  return IMP.atom.get_one_letter_code(self.get_residue_type())
1226 
1227  def get_residue_type(self):
1228  return self.rtype
1229 
1230  def get_hierarchy(self):
1231  return self.hier
1232 
1233  def get_molecule(self):
1234  return self.molecule
1235 
1236  def get_has_structure(self):
1237  return self._structured
1238 
1239  def set_structure(self, res, soft_check=False):
1240  if res.get_residue_type() != self.get_residue_type():
1241  if (res.get_residue_type() == IMP.atom.MSE
1242  and self.get_residue_type() == IMP.atom.MET):
1243  # MSE in the PDB file is OK to match with MET in the FASTA
1244  # sequence
1245  pass
1246  elif soft_check:
1247  # note from commit a2c13eaa1 we give priority to the
1248  # FASTA and not the PDB
1249  warnings.warn(
1250  'Inconsistency between FASTA sequence and PDB sequence. '
1251  'FASTA type %s %s and PDB type %s'
1252  % (self.get_index(), self.hier.get_residue_type(),
1253  res.get_residue_type()),
1255  self.hier.set_residue_type((self.get_residue_type()))
1256  self.rtype = self.get_residue_type()
1257  else:
1258  raise Exception(
1259  'ERROR: PDB residue index', self.get_index(), 'is',
1260  IMP.atom.get_one_letter_code(res.get_residue_type()),
1261  'and sequence residue is', self.get_code())
1262 
1263  for a in res.get_children():
1264  self.hier.add_child(a)
1265  atype = IMP.atom.Atom(a).get_atom_type()
1266  a.get_particle().set_name(
1267  'Atom %s of residue %i' % (atype.__str__().strip('"'),
1268  self.hier.get_index()))
1269  self._structured = True
1270 
1271 
1272 class TopologyReader(object):
1273  """Automatically setup Sytem and Degrees of Freedom with a formatted
1274  text file.
1275  The file is read in and each part of the topology is stored as a
1276  ComponentTopology object for input into IMP::pmi::macros::BuildSystem.
1277  The topology file should be in a simple pipe-delimited format:
1278  @code{.txt}
1279 |molecule_name|color|fasta_fn|fasta_id|pdb_fn|chain|residue_range|pdb_offset|bead_size|em_residues_per_gaussian|rigid_body|super_rigid_body|chain_of_super_rigid_bodies|flags|
1280 |Rpb1 |blue |1WCM.fasta|1WCM:A|1WCM.pdb|A|1,1140 |0|10|0|1|1,3|1||
1281 |Rpb1 |blue |1WCM.fasta|1WCM:A|1WCM.pdb|A|1141,1274|0|10|0|2|1,3|1||
1282 |Rpb1 |blue |1WCM.fasta|1WCM:A|1WCM.pdb|A|1275,END |0|10|0|3|1,3|1||
1283 |Rpb2 |red |1WCM.fasta|1WCM:B|1WCM.pdb|B|all |0|10|0|4|2,3|2||
1284 |Rpb2.1 |green |1WCM.fasta|1WCM:B|1WCM.pdb|B|all |0|10|0|4|2,3|2||
1285 
1286  @endcode
1287 
1288  These are the fields you can enter:
1289  - `molecule_name`: Name of the molecule (chain). Serves as the parent
1290  hierarchy for this structure. Multiple copies of the same molecule
1291  can be created by appending a copy number after a period; if none is
1292  specified, a copy number of 0 is assumed (e.g. Rpb2.1 is the second copy
1293  of Rpb2 or Rpb2.0).
1294  - `color`: The color used in the output RMF file. Uses
1295  [Chimera names](https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/colortables.html),
1296  (e.g. "red"), or R,G,B values as three comma-separated floating point
1297  numbers from 0 to 1 (e.g. "1.0, 0.0, 0.0") or a 6-digit hex string
1298  starting with '#' (e.g. 0xff0000).
1299  - `fasta_fn`: Name of FASTA file containing this component.
1300  - `fasta_id`: String found in FASTA sequence header line. The sequence read
1301  from the file is assumed to be a protein sequence. If it should instead
1302  be treated as RNA or DNA, add an ',RNA' or ',DNA' suffix. For example,
1303  a `fasta_id` of 'myseq,RNA' will read the sequence 'myseq' from the
1304  FASTA file and treat it as RNA.
1305  - `pdb_fn`: Name of PDB or mmCIF file with coordinates (if available).
1306  If left empty, will set up as BEADS (you can also specify "BEADS")
1307  Can also write "IDEAL_HELIX".
1308  - `chain`: Chain ID of this domain in the PDB file.
1309  - `residue_range`: Comma delimited pair defining range.
1310  Can leave empty or use 'all' for entire sequence from PDB file.
1311  The second item in the pair can be END to select the last residue in the
1312  PDB chain.
1313  - `pdb_offset`: Offset to sync PDB residue numbering with FASTA numbering.
1314  For example, an offset of -10 would match the first residue in the
1315  FASTA file (which is always numbered sequentially starting from 1) with
1316  residue 11 in the PDB file.
1317  - `bead_size`: The size (in residues) of beads used to model areas not
1318  covered by PDB coordinates. These will be built automatically.
1319  - `em_residues`: The number of Gaussians used to model the electron
1320  density of this domain. Set to zero if no EM fitting will be done.
1321  The GMM files will be written to <gmm_dir>/<component_name>_<em_res>.txt
1322  (and .mrc)
1323  - `rigid_body`: Leave empty if this object is not in a rigid body.
1324  Otherwise, this is a number corresponding to the rigid body containing
1325  this object. The number itself is just used for grouping things.
1326  - `super_rigid_body`: Add a mover that periodically moves several related
1327  domains as if they were a single large rigid body. In between such moves,
1328  the domains move independently. This can improve sampling.
1329  - `chain_of_super_rigid_bodies`: Do super-rigid-body moves (as above)
1330  for all adjacent pairs of domains in the chain.
1331  - `flags` additional flags for advanced options
1332  @note All filenames are relative to the paths specified in the constructor.
1333 
1334  """ # noqa: E501
1335  def __init__(self, topology_file, pdb_dir='./', fasta_dir='./',
1336  gmm_dir='./'):
1337  """Constructor.
1338  @param topology_file Pipe-delimited file specifying the topology
1339  @param pdb_dir Relative path to the pdb directory
1340  @param fasta_dir Relative path to the fasta directory
1341  @param gmm_dir Relative path to the GMM directory
1342  """
1343  self.topology_file = topology_file
1344  # key=molname, value=TempMolecule
1345  self.molecules = IMP.pmi.tools.OrderedDict()
1346  self.pdb_dir = pdb_dir
1347  self.fasta_dir = fasta_dir
1348  self.gmm_dir = gmm_dir
1349  self._components = self.read(topology_file)
1350 
1351  def write_topology_file(self, outfile):
1352  with open(outfile, "w") as f:
1353  f.write("|molecule_name|color|fasta_fn|fasta_id|pdb_fn|chain|"
1354  "residue_range|pdb_offset|bead_size|"
1355  "em_residues_per_gaussian|rigid_body|super_rigid_body|"
1356  "chain_of_super_rigid_bodies|\n")
1357  for c in self._components:
1358  output = c.get_str()+'\n'
1359  f.write(output)
1360  return outfile
1361 
1362  def get_components(self, topology_list="all"):
1363  """ Return list of ComponentTopologies for selected components
1364  @param topology_list List of indices to return"""
1365  if topology_list == "all":
1366  topologies = self._components
1367  else:
1368  topologies = []
1369  for i in topology_list:
1370  topologies.append(self._components[i])
1371  return topologies
1372 
1373  def get_molecules(self):
1374  return self.molecules
1375 
1376  def read(self, topology_file, append=False):
1377  """Read system components from topology file. append=False will erase
1378  current topology and overwrite with new
1379  """
1380  is_topology = False
1381  is_directories = False
1382  linenum = 1
1383  if not append:
1384  self._components = []
1385 
1386  with open(topology_file) as infile:
1387  for line in infile:
1388  if line.lstrip() == "" or line[0] == "#":
1389  continue
1390  elif line.split('|')[1].strip() in ("molecule_name"):
1391  is_topology = True
1392  is_directories = False
1393  old_format = False
1394  continue
1395  elif line.split('|')[1] == "component_name":
1396  is_topology = True
1398  "Old-style topology format (using "
1399  "|component_name|) is deprecated. Please switch to "
1400  "the new-style format (using |molecule_name|)\n")
1401  old_format = True
1402  is_directories = False
1403  continue
1404  elif line.split('|')[1] == "directories":
1406  "Setting directories in the topology file "
1407  "is deprecated. Please do so through the "
1408  "TopologyReader constructor. Note that new-style "
1409  "paths are relative to the current working "
1410  "directory, not the topology file.\n")
1411  is_directories = True
1412  elif is_directories:
1413  fields = line.split('|')
1414  setattr(self, fields[1],
1415  IMP.get_relative_path(topology_file, fields[2]))
1416  if is_topology:
1417  new_component = self._parse_line(line, linenum, old_format)
1418  self._components.append(new_component)
1419  linenum += 1
1420  return self._components
1421 
1422  def _parse_line(self, component_line, linenum, old_format):
1423  """Parse a line of topology values and matches them to their key.
1424  Checks each value for correct syntax
1425  Returns a list of Component objects
1426  fields:
1427  """
1428  c = _Component()
1429  values = [s.strip() for s in component_line.split('|')]
1430  errors = []
1431 
1432  # Required fields
1433  if old_format:
1434  c.molname = values[1]
1435  c.copyname = ''
1436  c._domain_name = values[2]
1437  c.color = 'blue'
1438  else:
1439  names = values[1].split('.')
1440  if len(names) == 1:
1441  c.molname = names[0]
1442  c.copyname = ''
1443  elif len(names) == 2:
1444  c.molname = names[0]
1445  c.copyname = names[1]
1446  else:
1447  c.molname = names[0]
1448  c.copyname = names[1]
1449  errors.append("Molecule name should be <molecule.copyID>")
1450  errors.append("For component %s line %d "
1451  % (c.molname, linenum))
1452  c._domain_name = c.molname + '.' + c.copyname
1453  colorfields = values[2].split(',')
1454  if len(colorfields) == 3:
1455  c.color = [float(x) for x in colorfields]
1456  if any([x > 1 for x in c.color]):
1457  c.color = [x/255 for x in c.color]
1458  else:
1459  c.color = values[2]
1460  c._orig_fasta_file = values[3]
1461  c.fasta_file = values[3]
1462  fasta_field = values[4].split(",")
1463  c.fasta_id = fasta_field[0]
1464  c.fasta_flag = None
1465  if len(fasta_field) > 1:
1466  c.fasta_flag = fasta_field[1]
1467  c._orig_pdb_input = values[5]
1468  pdb_input = values[5]
1469  tmp_chain = values[6]
1470  rr = values[7]
1471  offset = values[8]
1472  bead_size = values[9]
1473  emg = values[10]
1474  if old_format:
1475  rbs = srbs = csrbs = ''
1476  else:
1477  rbs = values[11]
1478  srbs = values[12]
1479  csrbs = values[13]
1480 
1481  if c.molname not in self.molecules:
1482  self.molecules[c.molname] = _TempMolecule(c)
1483  else:
1484  # COPY OR DOMAIN
1485  c._orig_fasta_file = \
1486  self.molecules[c.molname].orig_component._orig_fasta_file
1487  c.fasta_id = self.molecules[c.molname].orig_component.fasta_id
1488  self.molecules[c.molname].add_component(c, c.copyname)
1489 
1490  # now cleanup input
1491  c.fasta_file = os.path.join(self.fasta_dir, c._orig_fasta_file)
1492  if pdb_input == "":
1493  errors.append("PDB must have BEADS, IDEAL_HELIX, or filename")
1494  errors.append("For component %s line %d is not correct"
1495  "|%s| was given." % (c.molname, linenum, pdb_input))
1496  elif pdb_input in ("IDEAL_HELIX", "BEADS"):
1497  c.pdb_file = pdb_input
1498  else:
1499  c.pdb_file = os.path.join(self.pdb_dir, pdb_input)
1500 
1501  # PDB chain must be one or two characters
1502  if len(tmp_chain) == 1 or len(tmp_chain) == 2:
1503  c.chain = tmp_chain
1504  else:
1505  errors.append(
1506  "PDB Chain identifier must be one or two characters.")
1507  errors.append("For component %s line %d is not correct"
1508  "|%s| was given."
1509  % (c.molname, linenum, tmp_chain))
1510 
1511  # Optional fields
1512  # Residue Range
1513  if rr.strip() == 'all' or str(rr) == "":
1514  c.residue_range = None
1515  elif (len(rr.split(',')) == 2 and self._is_int(rr.split(',')[0]) and
1516  (self._is_int(rr.split(',')[1]) or rr.split(',')[1] == 'END')):
1517  # Make sure that is residue range is given, there are only
1518  # two values and they are integers
1519  c.residue_range = (int(rr.split(',')[0]), rr.split(',')[1])
1520  if c.residue_range[1] != 'END':
1521  c.residue_range = (c.residue_range[0], int(c.residue_range[1]))
1522  # Old format used -1 for the last residue
1523  if old_format and c.residue_range[1] == -1:
1524  c.residue_range = (c.residue_range[0], 'END')
1525  else:
1526  errors.append("Residue Range format for component %s line %d is "
1527  "not correct" % (c.molname, linenum))
1528  errors.append(
1529  "Correct syntax is two comma separated integers: "
1530  "|start_res, end_res|. end_res can also be END to select the "
1531  "last residue in the chain. |%s| was given." % rr)
1532  errors.append("To select all residues, indicate |\"all\"|")
1533 
1534  # PDB Offset
1535  if self._is_int(offset):
1536  c.pdb_offset = int(offset)
1537  elif len(offset) == 0:
1538  c.pdb_offset = 0
1539  else:
1540  errors.append("PDB Offset format for component %s line %d is "
1541  "not correct" % (c.molname, linenum))
1542  errors.append("The value must be a single integer. |%s| was given."
1543  % offset)
1544 
1545  # Bead Size
1546  if self._is_int(bead_size):
1547  c.bead_size = int(bead_size)
1548  elif len(bead_size) == 0:
1549  c.bead_size = 0
1550  else:
1551  errors.append("Bead Size format for component %s line %d is "
1552  "not correct" % (c.molname, linenum))
1553  errors.append("The value must be a single integer. |%s| was given."
1554  % bead_size)
1555 
1556  # EM Residues Per Gaussian
1557  if self._is_int(emg):
1558  if int(emg) > 0:
1559  c.density_prefix = os.path.join(self.gmm_dir,
1560  c.get_unique_name())
1561  c.gmm_file = c.density_prefix + '.txt'
1562  c.mrc_file = c.density_prefix + '.gmm'
1563 
1564  c.em_residues_per_gaussian = int(emg)
1565  else:
1566  c.em_residues_per_gaussian = 0
1567  elif len(emg) == 0:
1568  c.em_residues_per_gaussian = 0
1569  else:
1570  errors.append("em_residues_per_gaussian format for component "
1571  "%s line %d is not correct" % (c.molname, linenum))
1572  errors.append("The value must be a single integer. |%s| was given."
1573  % emg)
1574 
1575  # rigid bodies
1576  if len(rbs) > 0:
1577  if not self._is_int(rbs):
1578  errors.append(
1579  "rigid bodies format for component "
1580  "%s line %d is not correct" % (c.molname, linenum))
1581  errors.append("Each RB must be a single integer, or empty. "
1582  "|%s| was given." % rbs)
1583  c.rigid_body = int(rbs)
1584 
1585  # super rigid bodies
1586  if len(srbs) > 0:
1587  srbs = srbs.split(',')
1588  for i in srbs:
1589  if not self._is_int(i):
1590  errors.append(
1591  "super rigid bodies format for component "
1592  "%s line %d is not correct" % (c.molname, linenum))
1593  errors.append(
1594  "Each SRB must be a single integer. |%s| was given."
1595  % srbs)
1596  c.super_rigid_bodies = srbs
1597 
1598  # chain of super rigid bodies
1599  if len(csrbs) > 0:
1600  if not self._is_int(csrbs):
1601  errors.append(
1602  "em_residues_per_gaussian format for component "
1603  "%s line %d is not correct" % (c.molname, linenum))
1604  errors.append(
1605  "Each CSRB must be a single integer. |%s| was given."
1606  % csrbs)
1607  c.chain_of_super_rigid_bodies = csrbs
1608 
1609  # done
1610  if errors:
1611  raise ValueError("Fix Topology File syntax errors and rerun: "
1612  + "\n".join(errors))
1613  else:
1614  return c
1615 
1616  def set_gmm_dir(self, gmm_dir):
1617  """Change the GMM dir"""
1618  self.gmm_dir = gmm_dir
1619  for c in self._components:
1620  c.gmm_file = os.path.join(self.gmm_dir,
1621  c.get_unique_name() + ".txt")
1622  c.mrc_file = os.path.join(self.gmm_dir,
1623  c.get_unique_name() + ".mrc")
1624  print('new gmm', c.gmm_file)
1625 
1626  def set_pdb_dir(self, pdb_dir):
1627  """Change the PDB dir"""
1628  self.pdb_dir = pdb_dir
1629  for c in self._components:
1630  if c._orig_pdb_input not in ("", "None", "IDEAL_HELIX", "BEADS"):
1631  c.pdb_file = os.path.join(self.pdb_dir, c._orig_pdb_input)
1632 
1633  def set_fasta_dir(self, fasta_dir):
1634  """Change the FASTA dir"""
1635  self.fasta_dir = fasta_dir
1636  for c in self._components:
1637  c.fasta_file = os.path.join(self.fasta_dir, c._orig_fasta_file)
1638 
1639  def _is_int(self, s):
1640  # is this string an integer?
1641  try:
1642  float(s)
1643  return float(s).is_integer()
1644  except ValueError:
1645  return False
1646 
1647  def get_rigid_bodies(self):
1648  """Return list of lists of rigid bodies (as domain name)"""
1649  rbl = defaultdict(list)
1650  for c in self._components:
1651  if c.rigid_body:
1652  rbl[c.rigid_body].append(c.get_unique_name())
1653  return rbl.values()
1654 
1656  """Return list of lists of super rigid bodies (as domain name)"""
1657  rbl = defaultdict(list)
1658  for c in self._components:
1659  for rbnum in c.super_rigid_bodies:
1660  rbl[rbnum].append(c.get_unique_name())
1661  return rbl.values()
1662 
1664  "Return list of lists of chains of super rigid bodies (as domain name)"
1665  rbl = defaultdict(list)
1666  for c in self._components:
1667  for rbnum in c.chain_of_super_rigid_bodies:
1668  rbl[rbnum].append(c.get_unique_name())
1669  return rbl.values()
1670 
1671 
1672 class _TempMolecule(object):
1673  """Store the Components and any requests for copies"""
1674  def __init__(self, init_c):
1675  self.molname = init_c.molname
1676  self.domains = IMP.pmi.tools.OrderedDefaultDict(list)
1677  self.add_component(init_c, init_c.copyname)
1678  self.orig_copyname = init_c.copyname
1679  self.orig_component = self.domains[init_c.copyname][0]
1680 
1681  def add_component(self, component, copy_id):
1682  self.domains[copy_id].append(component)
1683  component.domainnum = len(self.domains[copy_id])-1
1684 
1685  def __repr__(self):
1686  return ','.join('%s:%i'
1687  % (k, len(self.domains[k])) for k in self.domains)
1688 
1689 
1690 class _Component(object):
1691  """Stores the components required to build a standard IMP hierarchy
1692  using IMP.pmi.BuildModel()
1693  """
1694  def __init__(self):
1695  self.molname = None
1696  self.copyname = None
1697  self.domainnum = 0
1698  self.fasta_file = None
1699  self._orig_fasta_file = None
1700  self.fasta_id = None
1701  self.fasta_flag = None
1702  self.pdb_file = None
1703  self._orig_pdb_input = None
1704  self.chain = None
1705  self.residue_range = None
1706  self.pdb_offset = 0
1707  self.bead_size = 10
1708  self.em_residues_per_gaussian = 0
1709  self.gmm_file = ''
1710  self.mrc_file = ''
1711  self.density_prefix = ''
1712  self.color = 0.1
1713  self.rigid_body = None
1714  self.super_rigid_bodies = []
1715  self.chain_of_super_rigid_bodies = []
1716 
1717  def _l2s(self, rng):
1718  return ",".join("%s" % x for x in rng)
1719 
1720  def __repr__(self):
1721  return self.get_str()
1722 
1723  def get_unique_name(self):
1724  return "%s.%s.%i" % (self.molname, self.copyname, self.domainnum)
1725 
1726  def get_str(self):
1727  res_range = self.residue_range
1728  if self.residue_range is None:
1729  res_range = []
1730  name = self.molname
1731  if self.copyname != '':
1732  name += '.' + self.copyname
1733  if self.chain is None:
1734  chain = ' '
1735  else:
1736  chain = self.chain
1737  color = self.color
1738  if isinstance(color, list):
1739  color = ','.join([str(x) for x in color])
1740  fastaid = self.fasta_id
1741  if self.fasta_flag:
1742  fastaid += "," + self.fasta_flag
1743  a = '|' + '|'.join([name, color, self._orig_fasta_file, fastaid,
1744  self._orig_pdb_input, chain,
1745  self._l2s(list(res_range)),
1746  str(self.pdb_offset),
1747  str(self.bead_size),
1748  str(self.em_residues_per_gaussian),
1749  str(self.rigid_body) if self.rigid_body else '',
1750  self._l2s(self.super_rigid_bodies),
1751  self._l2s(self.chain_of_super_rigid_bodies)]) + '|'
1752  return a
1753 
1754 
1756  '''Extends the functionality of IMP.atom.Molecule'''
1757 
1758  def __init__(self, hierarchy):
1759  IMP.atom.Molecule.__init__(self, hierarchy)
1760 
1761  def get_state_index(self):
1762  state = self.get_parent()
1763  return IMP.atom.State(state).get_state_index()
1764 
1765  def get_copy_index(self):
1766  return IMP.atom.Copy(self).get_copy_index()
1767 
1768  def get_extended_name(self):
1769  return self.get_name() + "." + \
1770  str(self.get_copy_index()) + \
1771  "." + str(self.get_state_index())
1772 
1773  def get_sequence(self):
1774  return IMP.atom.Chain(self).get_sequence()
1775 
1776  def get_residue_indexes(self):
1778 
1779  def get_residue_segments(self):
1780  return IMP.pmi.tools.Segments(self.get_residue_indexes())
1781 
1782  def get_chain_id(self):
1783  return IMP.atom.Chain(self).get_id()
1784 
1785  def __repr__(self):
1786  s = 'PMIMoleculeHierarchy '
1787  s += self.get_name()
1788  s += " " + "Copy " + str(IMP.atom.Copy(self).get_copy_index())
1789  s += " " + "State " + str(self.get_state_index())
1790  s += " " + "N residues " + str(len(self.get_sequence()))
1791  return s
def build
Build all molecules (automatically makes clones)
Add mass to a particle.
Definition: Mass.h:23
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: Residue.h:158
def select_at_all_resolutions
Perform selection using the usual keywords but return ALL resolutions (BEADS and GAUSSIANS).
Definition: tools.py:1138
Hierarchy get_parent() const
Get the parent particle.
def get_atomic_residues
Return a set of TempResidues that have associated structure coordinates.
A decorator to associate a particle with a part of a protein/DNA/RNA.
Definition: Fragment.h:20
Extends the functionality of IMP.atom.Molecule.
def get_residues
Return all modeled TempResidues as a set.
std::string get_unique_name(std::string templ)
Return a unique name produced from the string.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: atom/Atom.h:241
static Atom setup_particle(Model *m, ParticleIndex pi, Atom other)
Definition: atom/Atom.h:242
def build
Build all states.
def __init__
Read a FASTA file and extract all the requested sequences.
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
def __init__
Read a PDB file and return all sequences for each contiguous fragment.
def get_states
Get a list of all State objects in this system.
def fasta_pdb_alignments
This function computes and prints the alignment between the fasta file and the pdb sequence...
def get_ideal_helices
Returns list of OrderedSets with requested ideal helices.
Miscellaneous utilities.
Definition: tools.py:1
def get_number_of_copies
Get the number of copies of the given molecule (by name)
def get_chains_of_super_rigid_bodies
Return list of lists of chains of super rigid bodies (as domain name)
def __init__
The user should not call this directly; instead call State.create_molecule()
void handle_use_deprecated(std::string message)
Break in this method in gdb to find deprecated uses at runtime.
def residue_range
Get residue range from a to b, inclusive.
def get_molecule
Access a molecule by name and copy number.
def __init__
Define a new state.
def add_representation
Set the representation for some residues.
static State setup_particle(Model *m, ParticleIndex pi, unsigned int index)
Definition: State.h:39
This class stores integers in ordered compact lists eg: [[1,2,3],[6,7,8]] the methods help splitting ...
Definition: tools.py:683
The type of an atom.
def create_molecule
Create a new Molecule within this State.
def build
Create all parts of the IMP hierarchy including Atoms, Residues, and Fragments/Representations and...
static Residue setup_particle(Model *m, ParticleIndex pi, ResidueType t, int index, int insertion_code)
Definition: Residue.h:160
char get_one_letter_code(ResidueType c)
Get the 1-letter amino acid code from the residue type.
def get_non_atomic_residues
Return a set of TempResidues that don't have associated structure coordinates.
def get_name
Return this Molecule name.
def get_hierarchy
Return the IMP Hierarchy corresponding to this Molecule.
def get_components
Return list of ComponentTopologies for selected components.
def get_hierarchy
Get the IMP.atom.Hierarchy node for this state.
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:73
Stores a named protein chain.
Warning related to handling of structures.
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Fragment.h:46
A decorator for keeping track of copies of a molecule.
Definition: Copy.h:28
Select all non-alternative ATOM records.
Definition: pdb.h:64
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def set_fasta_dir
Change the FASTA dir.
def get_hierarchy
Return the top-level IMP.atom.Hierarchy node for this system.
The standard decorator for manipulating molecular structures.
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
Data structure for reading and storing sequence data from PDBs.
A decorator for a particle representing an atom.
Definition: atom/Atom.h:234
std::string get_relative_path(std::string base, std::string relative)
Return a path to a file relative to another file.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
def create_clone
Create a Molecule clone (automatically builds same structure and representation)
def add_structure
Read a structure and store the coordinates.
int get_state_index(Hierarchy h)
Walk up the hierarchy to find the current state.
def add_protocol_output
Capture details of the modeling protocol.
def get_molecules
Return a dictionary where key is molecule name and value is a list of all copies of that molecule in ...
static Copy setup_particle(Model *m, ParticleIndex pi, Int number)
Definition: Copy.h:42
def read
Read system components from topology file.
def get_state
Return the State containing this Molecule.
A decorator for a residue.
Definition: Residue.h:137
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Automatically setup Sytem and Degrees of Freedom with a formatted text file.
The general base class for IMP exceptions.
Definition: exception.h:49
def get_rigid_bodies
Return list of lists of rigid bodies (as domain name)
Associate an integer "state" index with a hierarchy node.
Definition: State.h:27
VectorD< 3 > Vector3D
Definition: VectorD.h:421
Residue get_residue(Atom d, bool nothrow=false)
Return the Residue containing this atom.
Mapping between FASTA one-letter codes and residue types.
Definition: alphabets.py:1
Class to handle individual particles of a Model object.
Definition: Particle.h:41
Stores a list of Molecules all with the same State index.
def get_represented
Return set of TempResidues that have representation.
Store info for a chain of a protein.
Definition: Chain.h:61
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
Python classes to represent, score, sample and analyze models.
A dictionary-like wrapper for reading and storing sequence data.
def create_copy
Create a new Molecule with the same name and sequence but a higher copy number.
Functionality for loading, creating, manipulating and scoring atomic structures.
std::string get_chain_id(Hierarchy h)
Walk up the hierarchy to determine the chain id.
def get_particles_at_all_resolutions
Helpful utility for getting particles at all resolutions from this molecule.
static Chain setup_particle(Model *m, ParticleIndex pi, std::string id)
Definition: Chain.h:81
A decorator for a molecule.
Definition: Molecule.h:24
Select hierarchy particles identified by the biological name.
Definition: Selection.h:66
def get_number_of_states
Returns the total number of states generated.
def get_super_rigid_bodies
Return list of lists of super rigid bodies (as domain name)
def get_residue_indexes
Retrieve the residue indexes for the given particle.
Definition: tools.py:569
Warning for probably incorrect input parameters.
Temporarily stores residue information, even without structure available.
def create_state
Makes and returns a new IMP.pmi.topology.State in this system.
Store objects in order they were added, but with default type.
Definition: tools.py:892