IMP logo
IMP Reference Guide  2.14.0
The Integrative Modeling Platform
pmi/topology/__init__.py
1 """@namespace IMP.pmi.topology
2  Set of Python classes to create a multi-state, multi-resolution IMP hierarchy.
3 * Start by creating a System with `model = IMP.Model(); s = IMP.pmi.topology.System(model)`. The System will store all the states.
4 * Then call System.create_state(). You can easily create a multistate system by calling this function multiple times.
5 * For each State, call State.create_molecule() to add a Molecule (a uniquely named polymer). This function returns the Molecule object which can be passed to various PMI functions.
6 * Some useful functions to help you set up your Molecules:
7  * Access the sequence residues with slicing (Molecule[a:b]) or functions like Molecule.get_atomic_residues() and Molecule.get_non_atomic_residues(). These functions all return Python sets for easy set arithmetic using & (and), | (or), - (difference)
8  * Molecule.add_structure() to add structural information from a PDB file.
9  * Molecule.add_representation() to create a representation unit - here you can choose bead resolutions as well as alternate representations like densities or ideal helices.
10  * Molecule.create_clone() lets you set up a molecule with identical representations, just a different chain ID. Use Molecule.create_copy() if you want a molecule with the same sequence but that allows custom representations.
11 * Once data has been added and representations chosen, call System.build() to create a canonical IMP hierarchy.
12 * Following hierarchy construction, setup rigid bodies, flexible beads, etc in IMP::pmi::dof.
13 * Check your representation with a nice printout: IMP::atom::show_with_representation()
14 See a [comprehensive example](https://integrativemodeling.org/nightly/doc/ref/pmi_2multiscale_8py-example.html) for using these classes.
15 
16 Alternatively one can construct the entire topology and degrees of freedom via formatted text file with TopologyReader and IMP::pmi::macros::BuildSystem(). This is used in the [PMI tutorial](@ref rnapolii_stalk).
17 Note that this only allows a limited set of the full options available to PMI users (rigid bodies only, fixed resolutions).
18 """
19 
20 from __future__ import print_function
21 import IMP
22 import IMP.atom
23 import IMP.algebra
24 import IMP.pmi
25 import IMP.pmi.tools
26 import IMP.pmi.alphabets
27 import csv
28 import os
29 from collections import defaultdict, namedtuple
30 from . import system_tools
31 from bisect import bisect_left
32 from math import pi,cos,sin
33 from operator import itemgetter
34 import weakref
35 import warnings
36 
37 def _build_ideal_helix(model, residues, coord_finder):
38  """Creates an ideal helix from the specified residue range
39  Residues MUST be contiguous.
40  This function actually adds them to the TempResidue hierarchy
41  """
42  created_hiers = []
43 
44  # this function creates a CAlpha helix structure (which can be used for coarsening)
45  for n, tempres in enumerate(residues):
46  if tempres.get_has_structure():
47  raise ValueError("You tried to build ideal_helix for a residue "
48  "that already has structure: %s" % tempres)
49  if n > 0 and tempres.get_index() != prev_idx + 1:
50  raise ValueError("Passed non-contiguous segment to "
51  "build_ideal_helix for %s" % tempres.get_molecule())
52 
53  # New residue particle will replace the TempResidue's existing (empty) hierarchy
54  rp = IMP.Particle(model)
55  rp.set_name("Residue_%i" % tempres.get_index())
56 
57  # Copy the original residue type and index
58  this_res = IMP.atom.Residue.setup_particle(rp,tempres.get_hierarchy())
59 
60  # Create the CAlpha
61  ap = IMP.Particle(model)
63  x = 2.3 * cos(n * 2 * pi / 3.6)
64  y = 2.3 * sin(n * 2 * pi / 3.6)
65  z = 6.2 / 3.6 / 2 * n * 2 * pi / 3.6
66  d.set_coordinates(IMP.algebra.Vector3D(x, y, z))
67  d.set_radius(1.0)
68  a = IMP.atom.Atom.setup_particle(ap, IMP.atom.AT_CA) # Decorating as Atom also decorates as Mass
69  IMP.atom.Mass(ap).set_mass(110.0)
70  this_res.add_child(a)
71 
72  # Add this structure to the TempResidue
73  tempres.set_structure(this_res)
74  created_hiers.append(this_res)
75  prev_idx = tempres.get_index()
76  coord_finder.add_residues(created_hiers) #the coord finder is for placing beads (later)
77 
78 class _SystemBase(object):
79  """The base class for System, State and Molecule
80  classes. It contains shared functions in common to these classes
81  """
82 
83  def __init__(self,model=None):
84  if model is None:
85  self.model=IMP.Model()
86  else:
87  self.model=model
88 
89  def _create_hierarchy(self):
90  """create a new hierarchy"""
91  tmp_part=IMP.Particle(self.model)
92  return IMP.atom.Hierarchy.setup_particle(tmp_part)
93 
94  def _create_child(self,parent_hierarchy):
95  """create a new hierarchy, set it as child of the input
96  one, and return it"""
97  child_hierarchy=self._create_hierarchy()
98  parent_hierarchy.add_child(child_hierarchy)
99  return child_hierarchy
100 
101  def build(self):
102  """Build the coordinates of the system.
103  Loop through stored(?) hierarchies and set up coordinates!"""
104  pass
105 
106 #------------------------
107 
108 class System(_SystemBase):
109  """This class initializes the root node of the global IMP.atom.Hierarchy."""
110 
111  _all_systems = set()
112 
113  def __init__(self,model=None,name="System"):
114  _SystemBase.__init__(self,model)
115  self._number_of_states = 0
116  self._protocol_output = []
117  self.states = []
118  self.built=False
119 
120  System._all_systems.add(weakref.ref(self))
121 
122  # the root hierarchy node
123  self.hier=self._create_hierarchy()
124  self.hier.set_name(name)
125  self.hier._pmi2_system = weakref.ref(self)
126 
127  def __del__(self):
128  System._all_systems = set(x for x in System._all_systems
129  if x() not in (None, self))
130 
131  def get_states(self):
132  return self.states
133 
134  def create_state(self):
135  """Makes and returns a new IMP.pmi.topology.State in this system"""
136  self._number_of_states+=1
137  state = State(self,self._number_of_states-1)
138  self.states.append(state)
139  return state
140 
141  def __repr__(self):
142  return self.hier.get_name()
143 
145  """Returns the total number of states generated"""
146  return self._number_of_states
147 
148  def get_hierarchy(self):
149  return self.hier
150 
151  def build(self,**kwargs):
152  """Build all states"""
153  if not self.built:
154  for state in self.states:
155  state.build(**kwargs)
156  self.built=True
157  return self.hier
158 
159  def add_protocol_output(self, p):
160  """Capture details of the modeling protocol.
161  @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
162  """
163  self._protocol_output.append(p)
164 # p._each_metadata.append(self._metadata)
165 # p._file_datasets.append(self._file_dataset)
166  for state in self.states:
167  state._add_protocol_output(p, self)
168 
169 
170 #------------------------
171 
172 class State(_SystemBase):
173  """Stores a list of Molecules all with the same State index.
174  Also stores number of copies of each Molecule for easy Selection.
175  """
176  def __init__(self,system,state_index):
177  """Define a new state
178  @param system the PMI System
179  @param state_index the index of the new state
180  @note It's expected that you will not use this constructor directly,
181  but rather create it with pmi::System::create_state()
182  """
183  self.model = system.get_hierarchy().get_model()
184  self.system = system
185  self.hier = self._create_child(system.get_hierarchy())
186  self.short_name = self.long_name = "State_"+str(state_index)
187  self.hier.set_name(self.short_name)
188  self.molecules = IMP.pmi.tools.OrderedDict() # key is molecule name. value are the molecule copies!
189  IMP.atom.State.setup_particle(self.hier,state_index)
190  self.built = False
191  self._protocol_output = []
192  for p in system._protocol_output:
193  self._add_protocol_output(p, system)
194 
195  def __repr__(self):
196  return self.system.__repr__()+'.'+self.hier.get_name()
197 
198  def _add_protocol_output(self, p, system):
199  state = p._add_state(self)
200  self._protocol_output.append((p, state))
201  state.model = system.model
202  state.prot = self.hier
203 
204  def get_molecules(self):
205  """Return a dictionary where key is molecule name and value
206  are the list of all copies of that molecule in setup order"""
207  return self.molecules
208 
209  def get_molecule(self, name, copy_num=0):
210  """Access a molecule by name and copy number
211  @param name The molecule name used during setup
212  @param copy_num The copy number based on input order.
213  Default: 0. Set to 'all' to get all copies
214  """
215  if name not in self.molecules:
216  raise KeyError("Could not find molname %s" % name)
217  if copy_num == 'all':
218  return self.molecules[name]
219  else:
220  return self.molecules[name][copy_num]
221 
222  def create_molecule(self, name, sequence='', chain_id='',
223  alphabet=IMP.pmi.alphabets.amino_acid, is_nucleic=None):
224  """Create a new Molecule within this State
225  @param name the name of the molecule (string);
226  it must not be already used
227  @param sequence sequence (string)
228  @param chain_id Chain ID to assign to this molecule
229  @param alphabet Mapping from FASTA codes to residue types
230  """
231  # check whether the molecule name is already assigned
232  if name in self.molecules:
233  raise ValueError('Cannot use a molecule name already used')
234 
235  if is_nucleic:
237  "is_nucleic is deprecated. Use alphabet instead.")
238  alphabet = IMP.pmi.alphabets.rna
239 
240  mol = Molecule(self, name, sequence, chain_id, copy_num=0,
241  alphabet=alphabet)
242  self.molecules[name] = [mol]
243  return mol
244 
245  def get_hierarchy(self):
246  return self.hier
247 
248  def get_number_of_copies(self,molname):
249  return len(self.molecules[molname])
250 
251  def _register_copy(self,molecule):
252  molname = molecule.get_hierarchy().get_name()
253  self.molecules[molname].append(molecule)
254 
255  def build(self,**kwargs):
256  """call build on all molecules (automatically makes clones)"""
257  if not self.built:
258  for molname in self.molecules:
259  for mol in reversed(self.molecules[molname]):
260  mol.build(**kwargs)
261  self.built=True
262  return self.hier
263 
264 
265 # Track residues read from PDB files
266 _PDBElement = namedtuple('PDBElement', ['offset', 'filename', 'chain_id'])
267 
268 class _RepresentationHandler(object):
269  """Handle PMI representation and use it to populate that of any attached
270  ProtocolOutput objects"""
271  def __init__(self, name, pos, pdb_elements):
272  self.name = name
273  self.pos = pos
274  self.last_index = None
275  self.last_pdb_index = None
276  self.pdb_for_residue = {}
277  for residues, pdb in pdb_elements:
278  for r in residues:
279  self.pdb_for_residue[r.get_index()] = pdb
280 
281  def _get_pdb(self, h):
282  """Return a PDBElement if the given hierarchy was read from a
283  PDB file"""
285  rind = IMP.atom.Residue(h).get_index()
286  return self.pdb_for_residue.get(rind, None)
287 
288  def __call__(self, res):
289  """Handle a single residue"""
290  if len(self.pos) == 0:
291  return
292  h = res.hier
293  pi = h.get_particle_index()
294  # Do nothing if we already saw this hierarchy
295  if self.last_index is None or pi != self.last_index:
296  pdb = self._get_pdb(h)
297  self.last_index = pi
298  if pdb:
299  assert(IMP.atom.Fragment.get_is_setup(h.get_parent()))
300  frag = IMP.atom.Fragment(h.get_parent())
301  fragi = frag.get_particle_index()
302  # Do nothing if we already saw this PDB fragment
303  if self.last_pdb_index is not None \
304  and self.last_pdb_index == fragi:
305  return
306  self.last_pdb_index = fragi
307  indices = frag.get_residue_indexes()
308  for p, state in self.pos:
309  p.add_pdb_element(state, self.name,
310  indices[0], indices[-1], pdb.offset,
311  pdb.filename, pdb.chain_id, frag)
313  frag = IMP.atom.Fragment(h)
314  indices = frag.get_residue_indexes()
315  for p, state in self.pos:
316  p.add_bead_element(state, self.name,
317  indices[0], indices[-1], 1, h)
319  resind = IMP.atom.Residue(h).get_index()
320  for p, state in self.pos:
321  p.add_bead_element(state, self.name, resind, resind, 1, h)
322  else:
323  raise TypeError("Unhandled hierarchy %s" % str(h))
324 
325 
326 #------------------------
327 
328 class Molecule(_SystemBase):
329  """Stores a named protein chain.
330  This class is constructed from within the State class.
331  It wraps an IMP.atom.Molecule and IMP.atom.Copy.
332  Structure is read using this class.
333  Resolutions and copies can be registered, but are only created
334  when build() is called
335  """
336 
337  def __init__(self, state, name, sequence, chain_id, copy_num,
338  mol_to_clone=None, alphabet=IMP.pmi.alphabets.amino_acid):
339  """The user should not call this directly; instead call State::create_molecule()
340  @param state The parent PMI State
341  @param name The name of the molecule (string)
342  @param sequence Sequence (string)
343  @param chain_id The chain of this molecule
344  @param copy_num Store the copy number
345  @param mol_to_clone The original molecule (for cloning ONLY)
346  @note It's expected that you will not use this constructor directly,
347  but rather create a Molecule with pmi::State::create_molecule()
348  """
349  # internal data storage
350  self.model = state.get_hierarchy().get_model()
351  self.state = state
352  self.sequence = sequence
353  self.built = False
354  self.mol_to_clone = mol_to_clone
355  self.alphabet = alphabet
356  self.representations = [] # list of stuff to build
357  self._pdb_elements = []
358  self._represented = IMP.pmi.tools.OrderedSet() # residues with representation
359  self.coord_finder = _FindCloseStructure() # helps you place beads by storing structure
360  self._ideal_helices = [] # list of OrderedSets of tempresidues set to ideal helix
361 
362  # create root node and set it as child to passed parent hierarchy
363  self.hier = self._create_child(self.state.get_hierarchy())
364  self.hier.set_name(name)
365  IMP.atom.Copy.setup_particle(self.hier,copy_num)
366  self._name_with_copy = "%s.%d" % (name, copy_num)
367  # store the sequence
368  self.chain=IMP.atom.Chain.setup_particle(self.hier,chain_id)
369  self.chain.set_sequence(self.sequence)
370  # create TempResidues from the sequence (if passed)
371  self.residues=[]
372  for ns,s in enumerate(sequence):
373  r = TempResidue(self, s, ns+1, ns, alphabet)
374  self.residues.append(r)
375 
376  def __repr__(self):
377  return self.state.__repr__()+'.'+self.get_name()+'.'+ \
378  str(IMP.atom.Copy(self.hier).get_copy_index())
379 
380  def __getitem__(self,val):
381  if isinstance(val,int):
382  return self.residues[val]
383  elif isinstance(val,str):
384  return self.residues[int(val)-1]
385  elif isinstance(val,slice):
386  return IMP.pmi.tools.OrderedSet(self.residues[val])
387  else:
388  raise TypeError("Indexes must be int or str")
389 
390  def get_hierarchy(self):
391  """Return the IMP Hierarchy corresponding to this Molecule"""
392  return self.hier
393 
394  def get_name(self):
395  """Return this Molecule name"""
396  return self.hier.get_name()
397 
398  def get_state(self):
399  """Return the State containing this Molecule"""
400  return self.state
401 
402  def get_ideal_helices(self):
403  """Returns list of OrderedSets with requested ideal helices"""
404  return self._ideal_helices
405 
406  def residue_range(self,a,b,stride=1):
407  """get residue range from a to b, inclusive.
408  Use integers to get 0-indexing, or strings to get PDB-indexing"""
409  if isinstance(a,int) and isinstance(b,int) and isinstance(stride,int):
410  return IMP.pmi.tools.OrderedSet(self.residues[a:b+1:stride])
411  elif isinstance(a,str) and isinstance(b,str) and isinstance(stride,int):
412  return IMP.pmi.tools.OrderedSet(self.residues[int(a)-1:int(b):stride])
413  else:
414  raise TypeError("Range ends must be int or str. "
415  "Stride must be int.")
416 
417  def get_residues(self):
418  """ Return all modeled TempResidues as a set"""
419  all_res = IMP.pmi.tools.OrderedSet(self.residues)
420  return all_res
421 
422  def get_represented(self):
423  """ Return set of TempResidues that have representation"""
424  return self._represented
425 
427  """ Return a set of TempResidues that have associated structure coordinates """
428  atomic_res = IMP.pmi.tools.OrderedSet()
429  for res in self.residues:
430  if res.get_has_structure():
431  atomic_res.add(res)
432  return atomic_res
433 
435  """ Return a set of TempResidues that don't have associated structure coordinates """
436  non_atomic_res = IMP.pmi.tools.OrderedSet()
437  for res in self.residues:
438  if not res.get_has_structure():
439  non_atomic_res.add(res)
440  return non_atomic_res
441 
442  def create_copy(self,chain_id):
443  """Create a new Molecule with the same name and sequence but a higher copy number.
444  Returns the Molecule. No structure or representation will be copied!
445  @param chain_id Chain ID of the new molecule
446  """
447  mol = Molecule(self.state,self.get_name(),self.sequence,chain_id,
448  copy_num=self.state.get_number_of_copies(self.get_name()))
449  self.state._register_copy(mol)
450  return mol
451 
452  def create_clone(self,chain_id):
453  """Create a Molecule clone (automatically builds same structure and representation)
454  @param chain_id If you want to set the chain ID of the copy to something
455  @note You cannot add structure or representations to a clone!
456  """
457  mol = Molecule(self.state,self.get_name(),self.sequence,chain_id,
458  copy_num=self.state.get_number_of_copies(self.get_name()),
459  mol_to_clone=self)
460  self.state._register_copy(mol)
461  return mol
462 
463  def add_structure(self,pdb_fn,chain_id,res_range=[],
464  offset=0,model_num=None,ca_only=False,
465  soft_check=False):
466  """Read a structure and store the coordinates.
467  @return the atomic residues (as a set)
468  @param pdb_fn The file to read
469  @param chain_id Chain ID to read
470  @param res_range Add only a specific set of residues from the PDB file.
471  res_range[0] is the starting and res_range[1] is the
472  ending residue index.
473  @param offset Apply an offset to the residue indexes of the PDB
474  file. This number is added to the PDB sequence.
475  @param model_num Read multi-model PDB and return that model
476  @param ca_only Only read the CA positions from the PDB file
477  @param soft_check If True, it only warns if there are sequence
478  mismatches between the PDB and the Molecule (FASTA)
479  sequence, and uses the sequence from the PDB.
480  If False (Default), it raises an error when there
481  are sequence mismatches.
482  @note If you are adding structure without a FASTA file, set soft_check
483  to True.
484  """
485  if self.mol_to_clone is not None:
486  raise ValueError('You cannot call add_structure() for a clone')
487 
488  self.pdb_fn = pdb_fn
489 
490  # get IMP.atom.Residues from the pdb file
491  rhs = system_tools.get_structure(self.model,pdb_fn,chain_id,res_range,offset,ca_only=ca_only)
492  self.coord_finder.add_residues(rhs)
493 
494  if len(self.residues)==0:
495  warnings.warn(
496  "Substituting PDB residue type with FASTA residue type. "
497  "Potentially dangerous.", IMP.pmi.StructureWarning)
498 
499  # Store info for ProtocolOutput usage later
500  self._pdb_elements.append((rhs,
501  _PDBElement(offset=offset, filename=pdb_fn, chain_id=chain_id)))
502 
503  # load those into TempResidue object
504  atomic_res = IMP.pmi.tools.OrderedSet() # collect integer indexes of atomic residues to return
505  for nrh,rh in enumerate(rhs):
506  pdb_idx = rh.get_index()
507  raw_idx = pdb_idx - 1
508 
509  # add ALA to fill in gaps
510  while len(self.residues)<pdb_idx:
511  r = TempResidue(self, 'A', len(self.residues)+1,
512  len(self.residues),
513  IMP.pmi.alphabets.amino_acid)
514  self.residues.append(r)
515  self.sequence += 'A'
516 
517  internal_res = self.residues[raw_idx]
518  if len(self.sequence)<raw_idx:
519  self.sequence += IMP.atom.get_one_letter_code(rh.get_residue_type())
520  internal_res.set_structure(rh,soft_check)
521  atomic_res.add(internal_res)
522 
523  self.chain.set_sequence(self.sequence)
524  return atomic_res
525 
527  residues=None,
528  resolutions=[],
529  bead_extra_breaks=[],
530  bead_ca_centers=True,
531  bead_default_coord=[0,0,0],
532  density_residues_per_component=None,
533  density_prefix=None,
534  density_force_compute=False,
535  density_voxel_size=1.0,
536  setup_particles_as_densities=False,
537  ideal_helix=False,
538  color=None):
539  """Set the representation for some residues. Some options (beads, ideal helix)
540  operate along the backbone. Others (density options) are volumetric.
541  Some of these you can combine e.g., beads+densities or helix+densities
542  See @ref pmi_resolution
543  @param residues Set of PMI TempResidues for adding the representation.
544  Can use Molecule slicing to get these, e.g. mol[a:b]+mol[c:d]
545  If None, will select all residues for this Molecule.
546  @param resolutions Resolutions for beads representations.
547  If structured, will average along backbone, breaking at sequence breaks.
548  If unstructured, will just create beads.
549  Pass an integer or list of integers
550  @param bead_extra_breaks Additional breakpoints for splitting beads.
551  The value can be the 0-ordered position, after which it'll insert the break.
552  Alternatively pass PDB-style (1-ordered) indices as a string.
553  I.e., bead_extra_breaks=[5,25] is the same as ['6','26']
554  @param bead_ca_centers Set to True if you want the resolution=1 beads to be at CA centers
555  (otherwise will average atoms to get center). Defaults to True.
556  @param bead_default_coord Advanced feature. Normally beads are placed at the nearest structure.
557  If no structure provided (like an all bead molecule), the beads go here.
558  @param density_residues_per_component Create density (Gaussian Mixture Model)
559  for these residues. Must also supply density_prefix
560  @param density_prefix Prefix (assuming '.txt') to read components from or write to.
561  If exists, will read unless you set density_force_compute=True.
562  Will also write map (prefix+'.mrc').
563  Must also supply density_residues_per_component.
564  @param density_force_compute Set true to force overwrite density file.
565  @param density_voxel_size Advanced feature. Set larger if densities taking too long to rasterize.
566  Set to 0 if you don't want to create the MRC file
567  @param setup_particles_as_densities Set to True if you want each particle to be its own density.
568  Useful for all-atom models or flexible beads.
569  Mutually exclusive with density_ options
570  @param ideal_helix Create idealized helix structures for these residues at resolution 1.
571  Any other resolutions passed will be coarsened from there.
572  Resolution 0 will not work, you may have to use MODELLER to do that (for now).
573  @param color the color applied to the hierarchies generated.
574  Format options: tuple (r,g,b) with values 0 to 1;
575  float (from 0 to 1, a map from Blue to Green to Red);
576  a [Chimera name](https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/colortables.html);
577  a hex RGB string (e.g. "#ff0000");
578  an IMP.display.Color object
579  @note You cannot call add_representation multiple times for the
580  same residues.
581  """
582 
583  # can't customize clones
584  if self.mol_to_clone is not None:
585  raise ValueError('You cannot call add_representation() for a clone.'
586  ' Maybe use a copy instead.')
587 
588  # format input
589  if residues is None:
590  res = IMP.pmi.tools.OrderedSet(self.residues)
591  elif residues==self:
592  res = IMP.pmi.tools.OrderedSet(self.residues)
593  elif type(residues) is IMP.pmi.topology.TempResidue:
594  res = IMP.pmi.tools.OrderedSet([residues])
595  elif hasattr(residues,'__iter__'):
596  if len(residues)==0:
597  raise Exception('You passed an empty set to add_representation')
598  if type(residues) is IMP.pmi.tools.OrderedSet and type(next(iter(residues))) is TempResidue:
599  res = residues
600  elif type(residues) is set and type(next(iter(residues))) is TempResidue:
601  res = IMP.pmi.tools.OrderedSet(residues)
602  elif type(residues) is list and type(residues[0]) is TempResidue:
603  res = IMP.pmi.tools.OrderedSet(residues)
604  else:
605  raise Exception("You passed an iteratible of something other than TempResidue",res)
606  else:
607  raise Exception("add_representation: you must pass a set of residues or nothing(=all residues)")
608 
609  # check that each residue has not been represented yet
610  ov = res & self._represented
611  if ov:
612  raise Exception('You have already added representation for '+self.get_hierarchy().get_name()+': '+ov.__repr__())
613  self._represented|=res
614 
615  # check you aren't creating multiple resolutions without structure
616  if not hasattr(resolutions,'__iter__'):
617  if type(resolutions) is int:
618  resolutions = [resolutions]
619  else:
620  raise Exception("you tried to pass resolutions that are not int or list-of-int")
621  if len(resolutions)>1 and not ideal_helix:
622  for r in res:
623  if not r.get_has_structure():
624  raise Exception('You are creating multiple resolutions for '
625  'unstructured regions. This will have unexpected results.')
626 
627  # check density info is consistent
628  if density_residues_per_component or density_prefix:
629  if not density_residues_per_component and density_prefix:
630  raise Exception('If requesting density, must provide '
631  'density_residues_per_component AND density_prefix')
632  if density_residues_per_component and setup_particles_as_densities:
633  raise Exception('Cannot create both volumetric density '
634  '(density_residues_per_component) AND '
635  'individual densities (setup_particles_as_densities) '
636  'in the same representation')
637  if len(resolutions)>1 and setup_particles_as_densities:
638  raise Exception('You have multiple bead resolutions but are attempting to '
639  'set them all up as individual Densities. '
640  'This could have unexpected results.')
641 
642  # check helix not accompanied by other resolutions (densities OK though!)
643  if ideal_helix:
644  if 0 in resolutions:
645  raise Exception("For ideal helices, cannot build resolution 0: "
646  "you have to do that in MODELLER")
647  if 1 not in resolutions:
648  resolutions = [1] + list(resolutions)
649  self._ideal_helices.append(res)
650 
651  # check residues are all part of this molecule:
652  for r in res:
653  if r.get_molecule()!=self:
654  raise Exception('You are adding residues from a different molecule to',self.__repr__())
655 
656  # unify formatting for extra breaks
657  breaks = []
658  for b in bead_extra_breaks:
659  if type(b)==str:
660  breaks.append(int(b)-1)
661  else:
662  breaks.append(b)
663  # store the representation group
664  self.representations.append(_Representation(res,
665  resolutions,
666  breaks,
667  bead_ca_centers,
668  bead_default_coord,
669  density_residues_per_component,
670  density_prefix,
671  density_force_compute,
672  density_voxel_size,
673  setup_particles_as_densities,
674  ideal_helix,
675  color))
676 
677  def _all_protocol_output(self):
678  return self.state._protocol_output
679 
680  def build(self):
681  """Create all parts of the IMP hierarchy
682  including Atoms, Residues, and Fragments/Representations and,
683  finally, Copies.
684  Will only build requested representations.
685  @note Any residues assigned a resolution must have an IMP.atom.Residue
686  hierarchy containing at least a CAlpha. For missing residues,
687  these can be constructed from the PDB file.
688  """
689  if not self.built:
690  # Add molecule name and sequence to any ProtocolOutput objects
691  name=self.hier.get_name()
692  for po, state in self._all_protocol_output():
693  po.create_component(state, name, True,
694  asym_name=self._name_with_copy)
695  po.add_component_sequence(state, name, self.sequence,
696  asym_name=self._name_with_copy,
697  alphabet=self.alphabet)
698  # if requested, clone structure and representations BEFORE building original
699  if self.mol_to_clone is not None:
700  for nr,r in enumerate(self.mol_to_clone.residues):
701  if r.get_has_structure():
702  clone = IMP.atom.create_clone(r.get_hierarchy())
703  self.residues[nr].set_structure(
704  IMP.atom.Residue(clone),soft_check=True)
705  for old_rep in self.mol_to_clone.representations:
706  new_res = IMP.pmi.tools.OrderedSet()
707  for r in old_rep.residues:
708  new_res.add(self.residues[r.get_internal_index()])
709  self._represented.add(self.residues[r.get_internal_index()])
710  new_rep = _Representation(new_res,
711  old_rep.bead_resolutions,
712  old_rep.bead_extra_breaks,
713  old_rep.bead_ca_centers,
714  old_rep.bead_default_coord,
715  old_rep.density_residues_per_component,
716  old_rep.density_prefix,
717  False,
718  old_rep.density_voxel_size,
719  old_rep.setup_particles_as_densities,
720  old_rep.ideal_helix,
721  old_rep.color)
722  self.representations.append(new_rep)
723  self.coord_finder = self.mol_to_clone.coord_finder
724 
725  # give a warning for all residues that don't have representation
726  no_rep = [r for r in self.residues if r not in self._represented]
727  if len(no_rep)>0:
728  warnings.warn(
729  'Residues without representation in molecule %s: %s'
730  % (self.get_name(), system_tools.resnums2str(no_rep)),
732 
733  # first build any ideal helices (fills in structure for the TempResidues)
734  for rep in self.representations:
735  if rep.ideal_helix:
736  _build_ideal_helix(self.model,rep.residues,self.coord_finder)
737 
738  # build all the representations
739  built_reps = []
740 
741  rephandler = _RepresentationHandler(self._name_with_copy,
742  list(self._all_protocol_output()),
743  self._pdb_elements)
744 
745  for rep in self.representations:
746  built_reps += system_tools.build_representation(
747  self, rep, self.coord_finder, rephandler)
748 
749  # sort them before adding as children
750  built_reps.sort(key=lambda r: IMP.atom.Fragment(r).get_residue_indexes()[0])
751  for br in built_reps:
752  self.hier.add_child(br)
753  br.update_parents()
754  self.built = True
755 
756  for res in self.residues:
757  idx = res.get_index()
758 
759  # first off, store the highest resolution available in residue.hier
760  new_ps = IMP.atom.Selection(
761  self.hier,
762  residue_index=res.get_index(),
763  resolution=1).get_selected_particles()
764  if len(new_ps)>0:
765  new_p = new_ps[0]
766  if IMP.atom.Atom.get_is_setup(new_p):
767  # if only found atomic, store the residue
768  new_hier = IMP.atom.get_residue(IMP.atom.Atom(new_p))
769  else:
770  # otherwise just store what you found
771  new_hier = IMP.atom.Hierarchy(new_p)
772  res.hier = new_hier
773  rephandler(res)
774  else:
775  res.hier = None
776  self._represented = IMP.pmi.tools.OrderedSet([a for a in self._represented])
777  print('done building',self.get_hierarchy())
778  return self.hier
779 
780  def get_particles_at_all_resolutions(self,residue_indexes=None):
781  """Helpful utility for getting particles at all resolutions from this molecule.
782  Can optionally pass a set of residue indexes"""
783  if not self.built:
784  raise Exception("Cannot get all resolutions until you build the Molecule")
785  if residue_indexes is None:
786  residue_indexes = [r.get_index() for r in self.get_residues()]
788  residue_indexes=residue_indexes)
789  return ps
790 
791 #------------------------
792 
793 class _Representation(object):
794  """Private class just to store a representation request"""
795  def __init__(self,
796  residues,
797  bead_resolutions,
798  bead_extra_breaks,
799  bead_ca_centers,
800  bead_default_coord,
801  density_residues_per_component,
802  density_prefix,
803  density_force_compute,
804  density_voxel_size,
805  setup_particles_as_densities,
806  ideal_helix,
807  color):
808  self.residues = residues
809  self.bead_resolutions = bead_resolutions
810  self.bead_extra_breaks = bead_extra_breaks
811  self.bead_ca_centers = bead_ca_centers
812  self.bead_default_coord = bead_default_coord
813  self.density_residues_per_component = density_residues_per_component
814  self.density_prefix = density_prefix
815  self.density_force_compute = density_force_compute
816  self.density_voxel_size = density_voxel_size
817  self.setup_particles_as_densities = setup_particles_as_densities
818  self.ideal_helix = ideal_helix
819  self.color = color
820 
821 class _FindCloseStructure(object):
822  """Utility to get the nearest observed coordinate"""
823  def __init__(self):
824  self.coords=[]
825  def add_residues(self,residues):
826  for r in residues:
827  idx = IMP.atom.Residue(r).get_index()
828  ca = IMP.atom.Selection(r,atom_type=IMP.atom.AtomType("CA")).get_selected_particles()
829  p = IMP.atom.Selection(r,atom_type=IMP.atom.AtomType("P")).get_selected_particles()
830  if len(ca)==1:
831  xyz = IMP.core.XYZ(ca[0]).get_coordinates()
832  self.coords.append([idx,xyz])
833  elif len(p)==1:
834  xyz = IMP.core.XYZ(p[0]).get_coordinates()
835  self.coords.append([idx,xyz])
836  else:
837  raise("_FindCloseStructure: wrong selection")
838 
839  self.coords.sort(key=itemgetter(0))
840  def find_nearest_coord(self,query):
841  if self.coords==[]:
842  return None
843  keys = [r[0] for r in self.coords]
844  pos = bisect_left(keys,query)
845  if pos == 0:
846  ret = self.coords[0]
847  elif pos == len(self.coords):
848  ret = self.coords[-1]
849  else:
850  before = self.coords[pos - 1]
851  after = self.coords[pos]
852  if after[0] - query < query - before[0]:
853  ret = after
854  else:
855  ret = before
856  return ret[1]
857 
858 class Sequences(object):
859  """A dictionary-like wrapper for reading and storing sequence data"""
860  def __init__(self,fasta_fn,name_map=None):
861  """read a fasta file and extract all the requested sequences
862  @param fasta_fn sequence file
863  @param name_map dictionary mapping the fasta name to final stored name
864  """
865  self.sequences = IMP.pmi.tools.OrderedDict()
866  self.read_sequences(fasta_fn,name_map)
867  def __len__(self):
868  return len(self.sequences)
869  def __contains__(self,x):
870  return x in self.sequences
871  def __getitem__(self,key):
872  if type(key) is int:
873  allseqs = list(self.sequences.keys())
874  try:
875  return self.sequences[allseqs[key]]
876  except IndexError:
877  raise IndexError("You tried to access sequence number %d "
878  "but there's only %d" % (key, len(allseqs)))
879  else:
880  return self.sequences[key]
881  def __iter__(self):
882  return self.sequences.__iter__()
883  def __repr__(self):
884  ret=''
885  for s in self.sequences:
886  ret += '%s\t%s\n'%(s,self.sequences[s])
887  return ret
888  def read_sequences(self,fasta_fn,name_map=None):
889  code = None
890  seq = None
891  with open(fasta_fn,'r') as fh:
892  for (num, line) in enumerate(fh):
893  if line.startswith('>'):
894  if seq is not None:
895  self.sequences[code] = seq.strip('*')
896  code = line.rstrip()[1:]
897  if name_map is not None:
898  try:
899  code = name_map[code]
900  except:
901  pass
902  seq = ''
903  else:
904  line = line.rstrip()
905  if line: # Skip blank lines
906  if seq is None:
907  raise Exception( \
908  "Found FASTA sequence before first header at line %d: %s" % (num + 1, line))
909  seq += line
910  if seq is not None:
911  self.sequences[code] = seq.strip('*')
912 
913 
914 class PDBSequences(object):
915  """Data_structure for reading and storing sequence data from pdb"""
916  def __init__(self,model,pdb_fn,name_map=None):
917  """read a pdb file and returns all sequences for each contiguous fragment
918  @param pdb_fn file
919  @param name_map dictionary mapping the pdb chain id to final stored name
920  """
921  self.model=model
922  # self.sequences data-structure: (two-key dictionary)
923  # it contains all contiguous fragments:
924  # chain_id, tuples indicating first and last residue, sequence
925  # example:
926  # key1, key2, value
927  # A (77, 149) VENPSLDLEQYAASYSGLMR....
928  # A (160, 505) PALDTAWVEATRKKALLKLEKLDTDLKNYKGNSIK.....
929  # B (30, 180) VDLENQYYNSKALKEDDPKAALSSFQKVLELEGEKGEWGF...
930  # B (192, 443) TQLLEIYALEIQMYTAQKNNKKLKALYEQSLHIKSAIPHPL
931  self.sequences = IMP.pmi.tools.OrderedDict()
932  self.read_sequences(pdb_fn,name_map)
933 
934  def read_sequences(self,pdb_fn,name_map):
935  read_file = IMP.atom.read_pdb
936  if pdb_fn.endswith('.cif'):
937  read_file = IMP.atom.read_mmcif
938  t = read_file(pdb_fn, self.model, IMP.atom.ATOMPDBSelector())
939  cs=IMP.atom.get_by_type(t,IMP.atom.CHAIN_TYPE)
940  for c in cs:
941  id=IMP.atom.Chain(c).get_id()
942  print(id)
943  if name_map:
944  try:
945  id=name_map[id]
946  except:
947  print("Chain ID %s not in name_map, skipping" % id)
948  continue
949  rs=IMP.atom.get_by_type(c,IMP.atom.RESIDUE_TYPE)
950  rids=[]
951  rids_olc_dict={}
952  for r in rs:
953  dr=IMP.atom.Residue(r)
954  rid=dr.get_index()
955 
956  isprotein=dr.get_is_protein()
957  isrna=dr.get_is_rna()
958  isdna=dr.get_is_dna()
959  if isprotein:
960  olc=IMP.atom.get_one_letter_code(dr.get_residue_type())
961  rids.append(rid)
962  rids_olc_dict[rid]=olc
963  elif isdna:
964  if dr.get_residue_type() == IMP.atom.DADE: olc="A"
965  if dr.get_residue_type() == IMP.atom.DURA: olc="U"
966  if dr.get_residue_type() == IMP.atom.DCYT: olc="C"
967  if dr.get_residue_type() == IMP.atom.DGUA: olc="G"
968  if dr.get_residue_type() == IMP.atom.DTHY: olc="T"
969  rids.append(rid)
970  rids_olc_dict[rid]=olc
971  elif isrna:
972  if dr.get_residue_type() == IMP.atom.ADE: olc="A"
973  if dr.get_residue_type() == IMP.atom.URA: olc="U"
974  if dr.get_residue_type() == IMP.atom.CYT: olc="C"
975  if dr.get_residue_type() == IMP.atom.GUA: olc="G"
976  if dr.get_residue_type() == IMP.atom.THY: olc="T"
977  rids.append(rid)
978  rids_olc_dict[rid]=olc
979  group_rids=self.group_indexes(rids)
980  contiguous_sequences=IMP.pmi.tools.OrderedDict()
981  for group in group_rids:
982  sequence_fragment=""
983  for i in range(group[0],group[1]+1):
984  sequence_fragment+=rids_olc_dict[i]
985  contiguous_sequences[group]=sequence_fragment
986  self.sequences[id]=contiguous_sequences
987 
988  #for c in self.sequences:
989  # for group in self.sequences[c]:
990  # print(c,group,self.sequences[c][group])
991 
992  def group_indexes(self,indexes):
993  from itertools import groupby
994  ranges = []
995  for k, g in groupby(enumerate(indexes), lambda x:x[0]-x[1]):
996  group = [x[1] for x in g]
997  ranges.append((group[0], group[-1]))
998  return ranges
999 
1000 
1001 def fasta_pdb_alignments(fasta_sequences,pdb_sequences,show=False):
1002  '''This function computes and prints the alignment between the
1003  fasta file and the pdb sequence, computes the offsets for each contiguous
1004  fragment in the PDB.
1005  @param fasta_sequences IMP.pmi.topology.Sequences object
1006  @param pdb_sequences IMP.pmi.topology.PDBSequences object
1007  @param show boolean default False, if True prints the alignments.
1008  The input objects should be generated using map_name dictionaries such that fasta_id
1009  and pdb_chain_id are mapping to the same protein name. It needs BioPython.
1010  Returns a dictionary of offsets, organized by peptide range (group):
1011  example: offsets={"ProtA":{(1,10):1,(20,30):10}}'''
1012  from Bio import pairwise2
1013  from Bio.pairwise2 import format_alignment
1014  from Bio.SubsMat import MatrixInfo as matlist
1015  matrix = matlist.blosum62
1016  if type(fasta_sequences) is not IMP.pmi.topology.Sequences:
1017  raise Exception("Fasta sequences not type IMP.pmi.topology.Sequences")
1018  if type(pdb_sequences) is not IMP.pmi.topology.PDBSequences:
1019  raise Exception("pdb sequences not type IMP.pmi.topology.PDBSequences")
1020  offsets=IMP.pmi.tools.OrderedDict()
1021  for name in fasta_sequences.sequences:
1022  print(name)
1023  seq_fasta=fasta_sequences.sequences[name]
1024  if name not in pdb_sequences.sequences:
1025  print("Fasta id %s not in pdb names, aligning against every pdb chain" % name)
1026  pdbnames=pdb_sequences.sequences.keys()
1027  else:
1028  pdbnames=[name]
1029  for pdbname in pdbnames:
1030  for group in pdb_sequences.sequences[pdbname]:
1031  if group[1]-group[0]+1<7:continue
1032  seq_frag_pdb=pdb_sequences.sequences[pdbname][group]
1033  if show:
1034  print("########################")
1035  print(" ")
1036  print("protein name",pdbname)
1037  print("fasta id", name)
1038  print("pdb fragment",group)
1039  align=pairwise2.align.localms(seq_fasta, seq_frag_pdb, 2, -1, -.5, -.1)[0]
1040  for a in [align]:
1041  offset=a[3]+1-group[0]
1042  if show:
1043  print("alignment sequence start-end",(a[3]+1,a[4]+1))
1044  print("offset from pdb to fasta index",offset)
1045  print(format_alignment(*a))
1046  if name not in offsets:
1047  offsets[pdbname]={}
1048  if group not in offsets[pdbname]:
1049  offsets[pdbname][group]=offset
1050  else:
1051  if group not in offsets[pdbname]:
1052  offsets[pdbname][group]=offset
1053  return offsets
1054 
1055 
1056 
1057 #------------------------
1058 
1059 
1060 class TempResidue(object):
1061  """Temporarily stores residue information, even without structure available."""
1062  # Consider implementing __hash__ so you can select.
1063  def __init__(self, molecule, code, index, internal_index, alphabet):
1064  """setup a TempResidue
1065  @param molecule PMI Molecule to which this residue belongs
1066  @param code one-letter residue type code
1067  @param index PDB index
1068  @param internal_index The number in the sequence
1069  """
1070  #these attributes should be immutable
1071  self.molecule = molecule
1072  self.rtype = alphabet.get_residue_type_from_one_letter_code(code)
1073  self.pdb_index = index
1074  self.internal_index = internal_index
1075  self.copy_index = IMP.atom.Copy(self.molecule.hier).get_copy_index()
1076  self.state_index = IMP.atom.State(self.molecule.state.hier).get_state_index()
1077  #these are expected to change
1078  self._structured = False
1079  self.hier = IMP.atom.Residue.setup_particle(IMP.Particle(molecule.model),
1080  self.rtype,
1081  index)
1082  def __str__(self):
1083  return str(self.state_index)+"_"+self.molecule.get_name()+"_"+str(self.copy_index)+"_"+self.get_code()+str(self.get_index())
1084  def __repr__(self):
1085  return self.__str__()
1086  def __key(self):
1087  #this returns the immutable attributes only
1088  return (self.state_index, self.molecule, self.copy_index, self.rtype, self.pdb_index, self.internal_index)
1089  def __eq__(self,other):
1090  return type(other)==type(self) and self.__key() == other.__key()
1091  def __hash__(self):
1092  return hash(self.__key())
1093  def get_index(self):
1094  return self.pdb_index
1095  def get_internal_index(self):
1096  return self.internal_index
1097  def get_code(self):
1098  return IMP.atom.get_one_letter_code(self.get_residue_type())
1099  def get_residue_type(self):
1100  return self.rtype
1101  def get_hierarchy(self):
1102  return self.hier
1103  def get_molecule(self):
1104  return self.molecule
1105  def get_has_structure(self):
1106  return self._structured
1107  def set_structure(self,res,soft_check=False):
1108  if res.get_residue_type()!=self.get_residue_type():
1109  if soft_check:
1110  # note from commit a2c13eaa1 we give priority to the FASTA and not the PDB
1111  warnings.warn(
1112  'Inconsistency between FASTA sequence and PDB sequence. '
1113  'FASTA type %s %s and PDB type %s'
1114  % (self.get_index(), self.hier.get_residue_type(),
1115  res.get_residue_type()),
1117  self.hier.set_residue_type((self.get_residue_type()))
1118  self.rtype = self.get_residue_type()
1119  else:
1120  raise Exception('ERROR: PDB residue index',self.get_index(),'is',
1121  IMP.atom.get_one_letter_code(res.get_residue_type()),
1122  'and sequence residue is',self.get_code())
1123 
1124  for a in res.get_children():
1125  self.hier.add_child(a)
1126  atype = IMP.atom.Atom(a).get_atom_type()
1127  a.get_particle().set_name('Atom %s of residue %i'%(atype.__str__().strip('"'),
1128  self.hier.get_index()))
1129  self._structured = True
1130 
1131 class TopologyReader(object):
1132  """Automatically setup Sytem and Degrees of Freedom with a formatted text file.
1133  The file is read in and each part of the topology is stored as a
1134  ComponentTopology object for input into IMP::pmi::macros::BuildSystem.
1135  The topology file should be in a simple pipe-delimited format:
1136  @code{.txt}
1137 |molecule_name|color|fasta_fn|fasta_id|pdb_fn|chain|residue_range|pdb_offset|bead_size|em_residues_per_gaussian|rigid_body|super_rigid_body|chain_of_super_rigid_bodies|flags|
1138 |Rpb1 |blue |1WCM.fasta|1WCM:A|1WCM.pdb|A|1,1140 |0|10|0|1|1,3|1||
1139 |Rpb1 |blue |1WCM.fasta|1WCM:A|1WCM.pdb|A|1141,1274|0|10|0|2|1,3|1||
1140 |Rpb1 |blue |1WCM.fasta|1WCM:A|1WCM.pdb|A|1275,END |0|10|0|3|1,3|1||
1141 |Rpb2 |red |1WCM.fasta|1WCM:B|1WCM.pdb|B|all |0|10|0|4|2,3|2||
1142 |Rpb2.1 |green |1WCM.fasta|1WCM:B|1WCM.pdb|B|all |0|10|0|4|2,3|2||
1143 
1144  @endcode
1145 
1146  These are the fields you can enter:
1147  - `component_name`: Name of the component (chain). Serves as the parent
1148  hierarchy for this structure. Multiple copies of the same component
1149  can be created by appending a copy number after a period; if none is
1150  specified, a copy number of 0 is assumed (e.g. Rpb2.1 is the second copy
1151  of Rpb2 or Rpb2.0).
1152  - `color`: The color used in the output RMF file. Uses
1153  [Chimera names](https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/colortables.html),
1154  (e.g. "red"), or R,G,B values as three comma-separated floating point
1155  numbers from 0 to 1 (e.g. "1.0, 0.0, 0.0") or a 6-digit hex string
1156  starting with '#' (e.g. 0xff0000).
1157  - `fasta_fn`: Name of FASTA file containing this component.
1158  - `fasta_id`: String found in FASTA sequence header line. The sequence read
1159  from the file is assumed to be a protein sequence. If it should instead
1160  be treated as RNA or DNA, add an ',RNA' or ',DNA' suffix. For example,
1161  a `fasta_id` of 'myseq,RNA' will read the sequence 'myseq' from the
1162  FASTA file and treat it as RNA.
1163  - `pdb_fn`: Name of PDB or mmCIF file with coordinates (if available).
1164  If left empty, will set up as BEADS (you can also specify "BEADS")
1165  Can also write "IDEAL_HELIX".
1166  - `chain`: Chain ID of this domain in the PDB file.
1167  - `residue_range`: Comma delimited pair defining range.
1168  Can leave empty or use 'all' for entire sequence from PDB file.
1169  The second item in the pair can be END to select the last residue in the
1170  PDB chain.
1171  - `pdb_offset`: Offset to sync PDB residue numbering with FASTA numbering.
1172  For example, an offset of -10 would match the first residue in the
1173  FASTA file (which is always numbered sequentially starting from 1) with
1174  residue 11 in the PDB file.
1175  - `bead_size`: The size (in residues) of beads used to model areas not
1176  covered by PDB coordinates. These will be built automatically.
1177  - `em_residues`: The number of Gaussians used to model the electron
1178  density of this domain. Set to zero if no EM fitting will be done.
1179  The GMM files will be written to <gmm_dir>/<component_name>_<em_res>.txt
1180  (and .mrc)
1181  - `rigid_body`: Leave empty if this object is not in a rigid body.
1182  Otherwise, this is a number corresponding to the rigid body containing
1183  this object. The number itself is just used for grouping things.
1184  - `super_rigid_body`: Add a mover that periodically moves several related
1185  domains as if they were a single large rigid body. In between such moves,
1186  the domains move independently. This can improve sampling.
1187  - `chain_of_super_rigid_bodies`: Do super-rigid-body moves (as above)
1188  for all adjacent pairs of domains in the chain.
1189  - `flags` additional flags for advanced options
1190  @note All filenames are relative to the paths specified in the constructor.
1191 
1192  """
1193  def __init__(self,
1194  topology_file,
1195  pdb_dir='./',
1196  fasta_dir='./',
1197  gmm_dir='./'):
1198  """Constructor.
1199  @param topology_file Pipe-delimited file specifying the topology
1200  @param pdb_dir Relative path to the pdb directory
1201  @param fasta_dir Relative path to the fasta directory
1202  @param gmm_dir Relative path to the GMM directory
1203  """
1204  self.topology_file = topology_file
1205  self.molecules = IMP.pmi.tools.OrderedDict() # key=molname, value=TempMolecule
1206  self.pdb_dir = pdb_dir
1207  self.fasta_dir = fasta_dir
1208  self.gmm_dir = gmm_dir
1209  self._components = self.read(topology_file)
1210 
1211  def write_topology_file(self,outfile):
1212  with open(outfile, "w") as f:
1213  f.write("|molecule_name|color|fasta_fn|fasta_id|pdb_fn|chain|"
1214  "residue_range|pdb_offset|bead_size|em_residues_per_gaussian|"
1215  "rigid_body|super_rigid_body|chain_of_super_rigid_bodies|\n")
1216  for c in self._components:
1217  output = c.get_str()+'\n'
1218  f.write(output)
1219  return outfile
1220 
1221  def get_components(self, topology_list = "all"):
1222  """ Return list of ComponentTopologies for selected components
1223  @param topology_list List of indices to return"""
1224  if topology_list == "all":
1225  topologies = self._components
1226  else:
1227  topologies=[]
1228  for i in topology_list:
1229  topologies.append(self._components[i])
1230  return topologies
1231 
1232  def get_molecules(self):
1233  return self.molecules
1234 
1235  def read(self, topology_file, append=False):
1236  """Read system components from topology file. append=False will erase
1237  current topology and overwrite with new
1238  """
1239  is_topology = False
1240  is_directories = False
1241  linenum = 1
1242  if append==False:
1243  self._components=[]
1244 
1245  with open(topology_file) as infile:
1246  for line in infile:
1247  if line.lstrip()=="" or line[0]=="#":
1248  continue
1249  elif line.split('|')[1].strip() in ("molecule_name"):
1250  is_topology=True
1251  is_directories = False
1252  old_format = False
1253  continue
1254  elif line.split('|')[1] == "component_name":
1255  is_topology = True
1257  "Old-style topology format (using "
1258  "|component_name|) is deprecated. Please switch to "
1259  "the new-style format (using |molecule_name|)\n")
1260  old_format = True
1261  is_directories = False
1262  continue
1263  elif line.split('|')[1] == "directories":
1265  "Setting directories in the topology file "
1266  "is deprecated. Please do so through the "
1267  "TopologyReader constructor. Note that new-style "
1268  "paths are relative to the current working "
1269  "directory, not the topology file.\n")
1270  is_directories = True
1271  elif is_directories:
1272  fields = line.split('|')
1273  setattr(self, fields[1],
1274  IMP.get_relative_path(topology_file, fields[2]))
1275  if is_topology:
1276  new_component = self._parse_line(line, linenum, old_format)
1277  self._components.append(new_component)
1278  linenum += 1
1279  return self._components
1280 
1281  def _parse_line(self, component_line, linenum, old_format):
1282  """Parse a line of topology values and matches them to their key.
1283  Checks each value for correct syntax
1284  Returns a list of Component objects
1285  fields:
1286  """
1287  c = _Component()
1288  values = [s.strip() for s in component_line.split('|')]
1289  errors = []
1290 
1291  ### Required fields
1292  if old_format:
1293  c.molname = values[1]
1294  c.copyname = ''
1295  c._domain_name = values[2]
1296  c.color = 'blue'
1297  else:
1298  names = values[1].split('.')
1299  if len(names)==1:
1300  c.molname = names[0]
1301  c.copyname = ''
1302  elif len(names)==2:
1303  c.molname = names[0]
1304  c.copyname = names[1]
1305  else:
1306  c.molname = names[0]
1307  c.copyname = names[1]
1308  errors.append("Molecule name should be <molecule.copyID>")
1309  errors.append("For component %s line %d " % (c.molname,linenum))
1310  c._domain_name = c.molname + '.' + c.copyname
1311  colorfields = values[2].split(',')
1312  if len(colorfields)==3:
1313  c.color = [float(x) for x in colorfields]
1314  if any([x>1 for x in c.color]):
1315  c.color=[x/255 for x in c.color]
1316  else:
1317  c.color = values[2]
1318  c._orig_fasta_file = values[3]
1319  c.fasta_file = values[3]
1320  fasta_field = values[4].split(",")
1321  c.fasta_id = fasta_field[0]
1322  c.fasta_flag = None
1323  if len(fasta_field) > 1:
1324  c.fasta_flag = fasta_field[1]
1325  c._orig_pdb_input = values[5]
1326  pdb_input = values[5]
1327  tmp_chain = values[6]
1328  rr = values[7]
1329  offset = values[8]
1330  bead_size = values[9]
1331  emg = values[10]
1332  if old_format:
1333  rbs = srbs = csrbs = ''
1334  else:
1335  rbs = values[11]
1336  srbs = values[12]
1337  csrbs = values[13]
1338 
1339  if c.molname not in self.molecules:
1340  self.molecules[c.molname] = _TempMolecule(c)
1341  else:
1342  # COPY OR DOMAIN
1343  c._orig_fasta_file = self.molecules[c.molname].orig_component._orig_fasta_file
1344  c.fasta_id = self.molecules[c.molname].orig_component.fasta_id
1345  self.molecules[c.molname].add_component(c,c.copyname)
1346 
1347  # now cleanup input
1348  c.fasta_file = os.path.join(self.fasta_dir,c._orig_fasta_file)
1349  if pdb_input=="":
1350  errors.append("PDB must have BEADS, IDEAL_HELIX, or filename")
1351  errors.append("For component %s line %d is not correct"
1352  "|%s| was given." % (c.molname,linenum,pdb_input))
1353  elif pdb_input in ("IDEAL_HELIX","BEADS"):
1354  c.pdb_file = pdb_input
1355  else:
1356  c.pdb_file = os.path.join(self.pdb_dir,pdb_input)
1357 
1358  # PDB chain must be one or two characters
1359  if len(tmp_chain)==1 or len(tmp_chain)==2:
1360  c.chain = tmp_chain
1361  else:
1362  errors.append("PDB Chain identifier must be one or two characters.")
1363  errors.append("For component %s line %d is not correct"
1364  "|%s| was given." % (c.molname,linenum,tmp_chain))
1365 
1366  ### Optional fields
1367  # Residue Range
1368  if rr.strip()=='all' or str(rr)=="":
1369  c.residue_range = None
1370  elif len(rr.split(','))==2 and self._is_int(rr.split(',')[0]) and (self._is_int(rr.split(',')[1]) or rr.split(',')[1] == 'END'):
1371  # Make sure that is residue range is given, there are only two values and they are integers
1372  c.residue_range = (int(rr.split(',')[0]), rr.split(',')[1])
1373  if c.residue_range[1] != 'END':
1374  c.residue_range = (c.residue_range[0], int(c.residue_range[1]))
1375  # Old format used -1 for the last residue
1376  if old_format and c.residue_range[1] == -1:
1377  c.residue_range = (c.residue_range[0], 'END')
1378  else:
1379  errors.append("Residue Range format for component %s line %d is not correct" % (c.molname, linenum))
1380  errors.append("Correct syntax is two comma separated integers: |start_res, end_res|. end_res can also be END to select the last residue in the chain. |%s| was given." % rr)
1381  errors.append("To select all residues, indicate |\"all\"|")
1382 
1383  # PDB Offset
1384  if self._is_int(offset):
1385  c.pdb_offset=int(offset)
1386  elif len(offset)==0:
1387  c.pdb_offset = 0
1388  else:
1389  errors.append("PDB Offset format for component %s line %d is not correct" % (c.molname, linenum))
1390  errors.append("The value must be a single integer. |%s| was given." % offset)
1391 
1392  # Bead Size
1393  if self._is_int(bead_size):
1394  c.bead_size=int(bead_size)
1395  elif len(bead_size)==0:
1396  c.bead_size = 0
1397  else:
1398  errors.append("Bead Size format for component %s line %d is not correct" % (c.molname, linenum))
1399  errors.append("The value must be a single integer. |%s| was given." % bead_size)
1400 
1401  # EM Residues Per Gaussian
1402  if self._is_int(emg):
1403  if int(emg) > 0:
1404  c.density_prefix = os.path.join(self.gmm_dir,c.get_unique_name())
1405  c.gmm_file = c.density_prefix+'.txt'
1406  c.mrc_file = c.density_prefix+'.gmm'
1407 
1408  c.em_residues_per_gaussian=int(emg)
1409  else:
1410  c.em_residues_per_gaussian = 0
1411  elif len(emg)==0:
1412  c.em_residues_per_gaussian = 0
1413  else:
1414  errors.append("em_residues_per_gaussian format for component "
1415  "%s line %d is not correct" % (c.molname, linenum))
1416  errors.append("The value must be a single integer. |%s| was given." % emg)
1417 
1418  # rigid bodies
1419  if len(rbs)>0:
1420  if not self._is_int(rbs):
1421  errors.append("rigid bodies format for component "
1422  "%s line %d is not correct" % (c.molname, linenum))
1423  errors.append("Each RB must be a single integer, or empty. "
1424  "|%s| was given." % rbs)
1425  c.rigid_body = int(rbs)
1426 
1427  # super rigid bodies
1428  if len(srbs)>0:
1429  srbs = srbs.split(',')
1430  for i in srbs:
1431  if not self._is_int(i):
1432  errors.append("super rigid bodies format for component "
1433  "%s line %d is not correct" % (c.molname, linenum))
1434  errors.append("Each SRB must be a single integer. |%s| was given." % srbs)
1435  c.super_rigid_bodies = srbs
1436 
1437  # chain of super rigid bodies
1438  if len(csrbs)>0:
1439  if not self._is_int(csrbs):
1440  errors.append("em_residues_per_gaussian format for component "
1441  "%s line %d is not correct" % (c.molname, linenum))
1442  errors.append("Each CSRB must be a single integer. |%s| was given." % csrbs)
1443  c.chain_of_super_rigid_bodies = csrbs
1444 
1445  # done
1446  if errors:
1447  raise ValueError("Fix Topology File syntax errors and rerun: " \
1448  + "\n".join(errors))
1449  else:
1450  return c
1451 
1452 
1453  def set_gmm_dir(self,gmm_dir):
1454  """Change the GMM dir"""
1455  self.gmm_dir = gmm_dir
1456  for c in self._components:
1457  c.gmm_file = os.path.join(self.gmm_dir,c.get_unique_name()+".txt")
1458  c.mrc_file = os.path.join(self.gmm_dir,c.get_unique_name()+".mrc")
1459  print('new gmm',c.gmm_file)
1460 
1461  def set_pdb_dir(self,pdb_dir):
1462  """Change the PDB dir"""
1463  self.pdb_dir = pdb_dir
1464  for c in self._components:
1465  if not c._orig_pdb_input in ("","None","IDEAL_HELIX","BEADS"):
1466  c.pdb_file = os.path.join(self.pdb_dir,c._orig_pdb_input)
1467 
1468  def set_fasta_dir(self,fasta_dir):
1469  """Change the FASTA dir"""
1470  self.fasta_dir = fasta_dir
1471  for c in self._components:
1472  c.fasta_file = os.path.join(self.fasta_dir,c._orig_fasta_file)
1473 
1474  def _is_int(self, s):
1475  # is this string an integer?
1476  try:
1477  float(s)
1478  return float(s).is_integer()
1479  except ValueError:
1480  return False
1481 
1482  def get_rigid_bodies(self):
1483  """Return list of lists of rigid bodies (as domain name)"""
1484  rbl = defaultdict(list)
1485  for c in self._components:
1486  if c.rigid_body:
1487  rbl[c.rigid_body].append(c.get_unique_name())
1488  return rbl.values()
1489 
1491  """Return list of lists of super rigid bodies (as domain name)"""
1492  rbl = defaultdict(list)
1493  for c in self._components:
1494  for rbnum in c.super_rigid_bodies:
1495  rbl[rbnum].append(c.get_unique_name())
1496  return rbl.values()
1497 
1499  """Return list of lists of chains of super rigid bodies (as domain name)"""
1500  rbl = defaultdict(list)
1501  for c in self._components:
1502  for rbnum in c.chain_of_super_rigid_bodies:
1503  rbl[rbnum].append(c.get_unique_name())
1504  return rbl.values()
1505 
1506 class _TempMolecule(object):
1507  """Store the Components and any requests for copies"""
1508  def __init__(self,init_c):
1509  self.molname = init_c.molname
1510  # key=copy ID, value = list of domains
1511  self.domains = IMP.pmi.tools.OrderedDefaultDict(list)
1512  self.add_component(init_c,init_c.copyname)
1513  self.orig_copyname = init_c.copyname
1514  self.orig_component = self.domains[init_c.copyname][0]
1515  def add_component(self,component,copy_id):
1516  self.domains[copy_id].append(component)
1517  component.domainnum = len(self.domains[copy_id])-1
1518  def __repr__(self):
1519  return ','.join('%s:%i'%(k,len(self.domains[k])) for k in self.domains)
1520 
1521 class _Component(object):
1522  """Stores the components required to build a standard IMP hierarchy
1523  using IMP.pmi.BuildModel()
1524  """
1525  def __init__(self):
1526  self.molname = None
1527  self.copyname = None
1528  self.domainnum = 0
1529  self.fasta_file = None
1530  self._orig_fasta_file = None
1531  self.fasta_id = None
1532  self.fasta_flag = None
1533  self.pdb_file = None
1534  self._orig_pdb_input = None
1535  self.chain = None
1536  self.residue_range = None
1537  self.pdb_offset = 0
1538  self.bead_size = 10
1539  self.em_residues_per_gaussian = 0
1540  self.gmm_file = ''
1541  self.mrc_file = ''
1542  self.density_prefix = ''
1543  self.color = 0.1
1544  self.rigid_body = None
1545  self.super_rigid_bodies = []
1546  self.chain_of_super_rigid_bodies = []
1547 
1548  def _l2s(self,l):
1549  return ",".join("%s" % x for x in l)
1550 
1551  def __repr__(self):
1552  return self.get_str()
1553 
1554  def get_unique_name(self):
1555  return "%s.%s.%i"%(self.molname,self.copyname,self.domainnum)
1556 
1557  def get_str(self):
1558  res_range = self.residue_range
1559  if self.residue_range is None:
1560  res_range = []
1561  name = self.molname
1562  if self.copyname!='':
1563  name += '.'+self.copyname
1564  if self.chain is None:
1565  chain = ' '
1566  else:
1567  chain = self.chain
1568  color=self.color
1569  if isinstance(color, list):
1570  color=','.join([str(x) for x in color])
1571  fastaid = self.fasta_id
1572  if self.fasta_flag:
1573  fastaid += "," + self.fasta_flag
1574  a= '|'+'|'.join([name,color,self._orig_fasta_file,fastaid,
1575  self._orig_pdb_input,chain,self._l2s(list(res_range)),
1576  str(self.pdb_offset),str(self.bead_size),
1577  str(self.em_residues_per_gaussian),
1578  str(self.rigid_body) if self.rigid_body else '',
1579  self._l2s(self.super_rigid_bodies),
1580  self._l2s(self.chain_of_super_rigid_bodies)])+'|'
1581  return a
1582 
1583 
1585  '''Extends the functionality of IMP.atom.Molecule'''
1586 
1587  def __init__(self,hierarchy):
1588  IMP.atom.Molecule.__init__(self,hierarchy)
1589 
1590  def get_state_index(self):
1591  state = self.get_parent()
1592  return IMP.atom.State(state).get_state_index()
1593 
1594  def get_copy_index(self):
1595  return IMP.atom.Copy(self).get_copy_index()
1596 
1597  def get_extended_name(self):
1598  return self.get_name()+"."+\
1599  str(self.get_copy_index())+\
1600  "."+str(self.get_state_index())
1601 
1602  def get_sequence(self):
1603  return IMP.atom.Chain(self).get_sequence()
1604 
1605  def get_residue_indexes(self):
1607 
1608  def get_residue_segments(self):
1609  return IMP.pmi.tools.Segments(self.get_residue_indexes())
1610 
1611  def get_chain_id(self):
1612  return IMP.atom.Chain(self).get_id()
1613 
1614  def __repr__(self):
1615  s='PMIMoleculeHierarchy '
1616  s+=self.get_name()
1617  s+=" "+"Copy "+str(IMP.atom.Copy(self).get_copy_index())
1618  s+=" "+"State "+str(self.get_state_index())
1619  s+=" "+"N residues "+str(len(self.get_sequence()))
1620  return s
def build
call build on all molecules (automatically makes clones)
Add mass to a particle.
Definition: Mass.h:23
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: Residue.h:156
def select_at_all_resolutions
Perform selection using the usual keywords but return ALL resolutions (BEADS and GAUSSIANS).
Definition: tools.py:1115
Hierarchy get_parent() const
Get the parent particle.
def get_atomic_residues
Return a set of TempResidues that have associated structure coordinates.
A decorator to associate a particle with a part of a protein/DNA/RNA.
Definition: Fragment.h:20
Extends the functionality of IMP.atom.Molecule.
def get_residues
Return all modeled TempResidues as a set.
std::string get_unique_name(std::string templ)
Return a unique name produced from the string.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: atom/Atom.h:241
static Atom setup_particle(Model *m, ParticleIndex pi, Atom other)
Definition: atom/Atom.h:242
def build
Build all states.
def __init__
read a fasta file and extract all the requested sequences
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
def __init__
read a pdb file and returns all sequences for each contiguous fragment
def fasta_pdb_alignments
This function computes and prints the alignment between the fasta file and the pdb sequence...
def get_ideal_helices
Returns list of OrderedSets with requested ideal helices.
Miscellaneous utilities.
Definition: tools.py:1
def get_chains_of_super_rigid_bodies
Return list of lists of chains of super rigid bodies (as domain name)
def __init__
The user should not call this directly; instead call State::create_molecule()
void handle_use_deprecated(std::string message)
def residue_range
get residue range from a to b, inclusive.
def get_molecule
Access a molecule by name and copy number.
def __init__
Define a new state.
def add_representation
Set the representation for some residues.
static State setup_particle(Model *m, ParticleIndex pi, unsigned int index)
Definition: State.h:39
This class stores integers in ordered compact lists eg: [[1,2,3],[6,7,8]] the methods help splitting ...
Definition: tools.py:665
The type of an atom.
def create_molecule
Create a new Molecule within this State.
def build
Create all parts of the IMP hierarchy including Atoms, Residues, and Fragments/Representations and...
static Residue setup_particle(Model *m, ParticleIndex pi, ResidueType t, int index, int insertion_code)
Definition: Residue.h:158
char get_one_letter_code(ResidueType c)
Get the 1-letter amino acid code from the residue type.
def get_non_atomic_residues
Return a set of TempResidues that don't have associated structure coordinates.
def get_name
Return this Molecule name.
def get_hierarchy
Return the IMP Hierarchy corresponding to this Molecule.
def get_components
Return list of ComponentTopologies for selected components.
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:72
Stores a named protein chain.
Warning related to handling of structures.
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Fragment.h:46
A decorator for keeping track of copies of a molecule.
Definition: Copy.h:28
Select all non-alternative ATOM records.
Definition: pdb.h:64
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def set_fasta_dir
Change the FASTA dir.
The standard decorator for manipulating molecular structures.
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
Data_structure for reading and storing sequence data from pdb.
A decorator for a particle representing an atom.
Definition: atom/Atom.h:234
std::string get_relative_path(std::string base, std::string relative)
Return a path to a file relative to another file.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
def create_clone
Create a Molecule clone (automatically builds same structure and representation)
def add_structure
Read a structure and store the coordinates.
int get_state_index(Hierarchy h)
Walk up the hierarchy to find the current state.
def add_protocol_output
Capture details of the modeling protocol.
def get_molecules
Return a dictionary where key is molecule name and value are the list of all copies of that molecule ...
static Copy setup_particle(Model *m, ParticleIndex pi, Int number)
Definition: Copy.h:42
def read
Read system components from topology file.
def get_state
Return the State containing this Molecule.
A decorator for a residue.
Definition: Residue.h:135
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Automatically setup Sytem and Degrees of Freedom with a formatted text file.
The general base class for IMP exceptions.
Definition: exception.h:49
def get_rigid_bodies
Return list of lists of rigid bodies (as domain name)
Associate an integer "state" index with a hierarchy node.
Definition: State.h:27
VectorD< 3 > Vector3D
Definition: VectorD.h:421
Residue get_residue(Atom d, bool nothrow=false)
Return the Residue containing this atom.
Mapping between FASTA one-letter codes and residue types.
Definition: alphabets.py:1
Class to handle individual particles of a Model object.
Definition: Particle.h:41
Stores a list of Molecules all with the same State index.
def get_represented
Return set of TempResidues that have representation.
Store info for a chain of a protein.
Definition: Chain.h:61
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
Python classes to represent, score, sample and analyze models.
A dictionary-like wrapper for reading and storing sequence data.
def create_copy
Create a new Molecule with the same name and sequence but a higher copy number.
Functionality for loading, creating, manipulating and scoring atomic structures.
std::string get_chain_id(Hierarchy h)
Walk up the hierarchy to determine the chain id.
def get_particles_at_all_resolutions
Helpful utility for getting particles at all resolutions from this molecule.
static Chain setup_particle(Model *m, ParticleIndex pi, std::string id)
Definition: Chain.h:81
A decorator for a molecule.
Definition: Molecule.h:24
Select hierarchy particles identified by the biological name.
Definition: Selection.h:66
def get_number_of_states
Returns the total number of states generated.
def get_super_rigid_bodies
Return list of lists of super rigid bodies (as domain name)
def get_residue_indexes
Retrieve the residue indexes for the given particle.
Definition: tools.py:553
Temporarily stores residue information, even without structure available.
def create_state
Makes and returns a new IMP.pmi.topology.State in this system.
Store objects in order they were added, but with default type.
Definition: tools.py:865