IMP logo
IMP Reference Guide  2.11.1
The Integrative Modeling Platform
pmi/topology/__init__.py
1 """@namespace IMP.pmi.topology
2  Set of Python classes to create a multi-state, multi-resolution IMP hierarchy.
3 * Start by creating a System with `model = IMP.Model(); s = IMP.pmi.topology.System(model)`. The System will store all the states.
4 * Then call System.create_state(). You can easily create a multistate system by calling this function multiple times.
5 * For each State, call State.create_molecule() to add a Molecule (a uniquely named polymer). This function returns the Molecule object which can be passed to various PMI functions.
6 * Some useful functions to help you set up your Molecules:
7  * Access the sequence residues with slicing (Molecule[a:b]) or functions like Molecule.get_atomic_residues() and Molecule.get_non_atomic_residues(). These functions all return Python sets for easy set arithmetic using & (and), | (or), - (difference)
8  * Molecule.add_structure() to add structural information from a PDB file.
9  * Molecule.add_representation() to create a representation unit - here you can choose bead resolutions as well as alternate representations like densities or ideal helices.
10  * Molecule.create_clone() lets you set up a molecule with identical representations, just a different chain ID. Use Molecule.create_copy() if you want a molecule with the same sequence but that allows custom representations.
11 * Once data has been added and representations chosen, call System.build() to create a canonical IMP hierarchy.
12 * Following hierarchy construction, setup rigid bodies, flexible beads, etc in IMP::pmi::dof.
13 * Check your representation with a nice printout: IMP::atom::show_with_representation()
14 See a [comprehensive example](https://integrativemodeling.org/nightly/doc/ref/pmi_2multiscale_8py-example.html) for using these classes.
15 
16 Alternatively one can construct the entire topology and degrees of freedom via formatted text file with TopologyReader and IMP::pmi::macros::BuildSystem(). This is used in the [PMI tutorial](@ref rnapolii_stalk).
17 Note that this only allows a limited set of the full options available to PMI users (rigid bodies only, fixed resolutions).
18 """
19 
20 from __future__ import print_function
21 import IMP
22 import IMP.atom
23 import IMP.algebra
24 import IMP.pmi
25 import IMP.pmi.tools
26 import csv
27 import os
28 from collections import defaultdict, namedtuple
29 from . import system_tools
30 from bisect import bisect_left
31 from math import pi,cos,sin
32 from operator import itemgetter
33 import weakref
34 import warnings
35 
36 def _build_ideal_helix(model, residues, coord_finder):
37  """Creates an ideal helix from the specified residue range
38  Residues MUST be contiguous.
39  This function actually adds them to the TempResidue hierarchy
40  """
41  created_hiers = []
42 
43  # this function creates a CAlpha helix structure (which can be used for coarsening)
44  for n, tempres in enumerate(residues):
45  if tempres.get_has_structure():
46  raise ValueError("You tried to build ideal_helix for a residue "
47  "that already has structure: %s" % tempres)
48  if n > 0 and tempres.get_index() != prev_idx + 1:
49  raise ValueError("Passed non-contiguous segment to "
50  "build_ideal_helix for %s" % tempres.get_molecule())
51 
52  # New residue particle will replace the TempResidue's existing (empty) hierarchy
53  rp = IMP.Particle(model)
54  rp.set_name("Residue_%i" % tempres.get_index())
55 
56  # Copy the original residue type and index
57  this_res = IMP.atom.Residue.setup_particle(rp,tempres.get_hierarchy())
58 
59  # Create the CAlpha
60  ap = IMP.Particle(model)
62  x = 2.3 * cos(n * 2 * pi / 3.6)
63  y = 2.3 * sin(n * 2 * pi / 3.6)
64  z = 6.2 / 3.6 / 2 * n * 2 * pi / 3.6
65  d.set_coordinates(IMP.algebra.Vector3D(x, y, z))
66  d.set_radius(1.0)
67  a = IMP.atom.Atom.setup_particle(ap, IMP.atom.AT_CA) # Decorating as Atom also decorates as Mass
68  IMP.atom.Mass(ap).set_mass(110.0)
69  this_res.add_child(a)
70 
71  # Add this structure to the TempResidue
72  tempres.set_structure(this_res)
73  created_hiers.append(this_res)
74  prev_idx = tempres.get_index()
75  coord_finder.add_residues(created_hiers) #the coord finder is for placing beads (later)
76 
77 class _SystemBase(object):
78  """The base class for System, State and Molecule
79  classes. It contains shared functions in common to these classes
80  """
81 
82  def __init__(self,model=None):
83  if model is None:
84  self.model=IMP.Model()
85  else:
86  self.model=model
87 
88  def _create_hierarchy(self):
89  """create a new hierarchy"""
90  tmp_part=IMP.Particle(self.model)
91  return IMP.atom.Hierarchy.setup_particle(tmp_part)
92 
93  def _create_child(self,parent_hierarchy):
94  """create a new hierarchy, set it as child of the input
95  one, and return it"""
96  child_hierarchy=self._create_hierarchy()
97  parent_hierarchy.add_child(child_hierarchy)
98  return child_hierarchy
99 
100  def build(self):
101  """Build the coordinates of the system.
102  Loop through stored(?) hierarchies and set up coordinates!"""
103  pass
104 
105 #------------------------
106 
107 class System(_SystemBase):
108  """This class initializes the root node of the global IMP.atom.Hierarchy."""
109 
110  _all_systems = set()
111 
112  def __init__(self,model=None,name="System"):
113  _SystemBase.__init__(self,model)
114  self._number_of_states = 0
115  self._protocol_output = []
116  self.states = []
117  self.built=False
118 
119  System._all_systems.add(weakref.ref(self))
120 
121  # the root hierarchy node
122  self.hier=self._create_hierarchy()
123  self.hier.set_name(name)
124  self.hier._pmi2_system = weakref.ref(self)
125 
126  def __del__(self):
127  System._all_systems = set(x for x in System._all_systems
128  if x() not in (None, self))
129 
130  def get_states(self):
131  return self.states
132 
133  def create_state(self):
134  """Makes and returns a new IMP.pmi.topology.State in this system"""
135  self._number_of_states+=1
136  state = State(self,self._number_of_states-1)
137  self.states.append(state)
138  return state
139 
140  def __repr__(self):
141  return self.hier.get_name()
142 
144  """Returns the total number of states generated"""
145  return self._number_of_states
146 
147  def get_hierarchy(self):
148  return self.hier
149 
150  def build(self,**kwargs):
151  """Build all states"""
152  if not self.built:
153  for state in self.states:
154  state.build(**kwargs)
155  self.built=True
156  return self.hier
157 
158  def add_protocol_output(self, p):
159  """Capture details of the modeling protocol.
160  @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
161  """
162  self._protocol_output.append(p)
163 # p._each_metadata.append(self._metadata)
164 # p._file_datasets.append(self._file_dataset)
165  for state in self.states:
166  state._add_protocol_output(p, self)
167 
168 
169 #------------------------
170 
171 class State(_SystemBase):
172  """Stores a list of Molecules all with the same State index.
173  Also stores number of copies of each Molecule for easy Selection.
174  """
175  def __init__(self,system,state_index):
176  """Define a new state
177  @param system the PMI System
178  @param state_index the index of the new state
179  @note It's expected that you will not use this constructor directly,
180  but rather create it with pmi::System::create_state()
181  """
182  self.model = system.get_hierarchy().get_model()
183  self.system = system
184  self.hier = self._create_child(system.get_hierarchy())
185  self.short_name = self.long_name = "State_"+str(state_index)
186  self.hier.set_name(self.short_name)
187  self.molecules = IMP.pmi.tools.OrderedDict() # key is molecule name. value are the molecule copies!
188  IMP.atom.State.setup_particle(self.hier,state_index)
189  self.built = False
190  self._protocol_output = []
191  for p in system._protocol_output:
192  self._add_protocol_output(p, system)
193 
194  def __repr__(self):
195  return self.system.__repr__()+'.'+self.hier.get_name()
196 
197  def _add_protocol_output(self, p, system):
198  state = p._add_state(self)
199  self._protocol_output.append((p, state))
200  state.model = system.model
201  state.prot = self.hier
202 
203  def get_molecules(self):
204  """Return a dictionary where key is molecule name and value
205  are the list of all copies of that molecule in setup order"""
206  return self.molecules
207 
208  def get_molecule(self, name, copy_num=0):
209  """Access a molecule by name and copy number
210  @param name The molecule name used during setup
211  @param copy_num The copy number based on input order.
212  Default: 0. Set to 'all' to get all copies
213  """
214  if name not in self.molecules:
215  raise KeyError("Could not find molname %s" % name)
216  if copy_num == 'all':
217  return self.molecules[name]
218  else:
219  return self.molecules[name][copy_num]
220 
221  def create_molecule(self, name, sequence='', chain_id='', is_nucleic=None):
222  """Create a new Molecule within this State
223  @param name the name of the molecule (string);
224  it must not be already used
225  @param sequence sequence (string)
226  @param chain_id Chain ID to assign to this molecule
227  """
228  # check whether the molecule name is already assigned
229  if name in self.molecules:
230  raise ValueError('Cannot use a molecule name already used')
231 
232  mol = Molecule(self,name,sequence,chain_id,copy_num=0,is_nucleic=is_nucleic)
233  self.molecules[name] = [mol]
234  return mol
235 
236  def get_hierarchy(self):
237  return self.hier
238 
239  def get_number_of_copies(self,molname):
240  return len(self.molecules[molname])
241 
242  def _register_copy(self,molecule):
243  molname = molecule.get_hierarchy().get_name()
244  self.molecules[molname].append(molecule)
245 
246  def build(self,**kwargs):
247  """call build on all molecules (automatically makes clones)"""
248  if not self.built:
249  for molname in self.molecules:
250  for mol in reversed(self.molecules[molname]):
251  mol.build(**kwargs)
252  self.built=True
253  return self.hier
254 
255 
256 # Track residues read from PDB files
257 _PDBElement = namedtuple('PDBElement', ['offset', 'filename', 'chain_id'])
258 
259 class _RepresentationHandler(object):
260  """Handle PMI representation and use it to populate that of any attached
261  ProtocolOutput objects"""
262  def __init__(self, name, pos, pdb_elements):
263  self.name = name
264  self.pos = pos
265  self.last_index = None
266  self.last_pdb_index = None
267  self.pdb_for_residue = {}
268  for residues, pdb in pdb_elements:
269  for r in residues:
270  self.pdb_for_residue[r.get_index()] = pdb
271 
272  def _get_pdb(self, h):
273  """Return a PDBElement if the given hierarchy was read from a
274  PDB file"""
276  rind = IMP.atom.Residue(h).get_index()
277  return self.pdb_for_residue.get(rind, None)
278 
279  def __call__(self, res):
280  """Handle a single residue"""
281  if len(self.pos) == 0:
282  return
283  h = res.hier
284  pi = h.get_particle_index()
285  # Do nothing if we already saw this hierarchy
286  if self.last_index is None or pi != self.last_index:
287  pdb = self._get_pdb(h)
288  self.last_index = pi
289  if pdb:
290  assert(IMP.atom.Fragment.get_is_setup(h.get_parent()))
291  frag = IMP.atom.Fragment(h.get_parent())
292  fragi = frag.get_particle_index()
293  # Do nothing if we already saw this PDB fragment
294  if self.last_pdb_index is not None \
295  and self.last_pdb_index == fragi:
296  return
297  self.last_pdb_index = fragi
298  indices = frag.get_residue_indexes()
299  for p, state in self.pos:
300  p.add_pdb_element(state, self.name,
301  indices[0], indices[-1], pdb.offset,
302  pdb.filename, pdb.chain_id, frag)
304  frag = IMP.atom.Fragment(h)
305  indices = frag.get_residue_indexes()
306  for p, state in self.pos:
307  p.add_bead_element(state, self.name,
308  indices[0], indices[-1], 1, h)
310  resind = IMP.atom.Residue(h).get_index()
311  for p, state in self.pos:
312  p.add_bead_element(state, self.name, resind, resind, 1, h)
313  else:
314  raise TypeError("Unhandled hierarchy %s" % str(h))
315 
316 
317 #------------------------
318 
319 class Molecule(_SystemBase):
320  """Stores a named protein chain.
321  This class is constructed from within the State class.
322  It wraps an IMP.atom.Molecule and IMP.atom.Copy.
323  Structure is read using this class.
324  Resolutions and copies can be registered, but are only created
325  when build() is called
326  """
327 
328  def __init__(self,state,name,sequence,chain_id,copy_num,mol_to_clone=None,is_nucleic=None):
329  """The user should not call this directly; instead call State::create_molecule()
330  @param state The parent PMI State
331  @param name The name of the molecule (string)
332  @param sequence Sequence (string)
333  @param chain_id The chain of this molecule
334  @param copy_num Store the copy number
335  @param mol_to_clone The original molecule (for cloning ONLY)
336  @note It's expected that you will not use this constructor directly,
337  but rather create a Molecule with pmi::State::create_molecule()
338  """
339  # internal data storage
340  self.model = state.get_hierarchy().get_model()
341  self.state = state
342  self.sequence = sequence
343  self.built = False
344  self.mol_to_clone = mol_to_clone
345  self.is_nucleic=is_nucleic
346  self.representations = [] # list of stuff to build
347  self._pdb_elements = []
348  self._represented = IMP.pmi.tools.OrderedSet() # residues with representation
349  self.coord_finder = _FindCloseStructure() # helps you place beads by storing structure
350  self._ideal_helices = [] # list of OrderedSets of tempresidues set to ideal helix
351 
352  # create root node and set it as child to passed parent hierarchy
353  self.hier = self._create_child(self.state.get_hierarchy())
354  self.hier.set_name(name)
355  IMP.atom.Copy.setup_particle(self.hier,copy_num)
356  self._name_with_copy = "%s.%d" % (name, copy_num)
357  # store the sequence
358  self.chain=IMP.atom.Chain.setup_particle(self.hier,chain_id)
359  self.chain.set_sequence(self.sequence)
360  # create TempResidues from the sequence (if passed)
361  self.residues=[]
362  for ns,s in enumerate(sequence):
363  r = TempResidue(self,s,ns+1,ns,is_nucleic)
364  self.residues.append(r)
365 
366  def __repr__(self):
367  return self.state.__repr__()+'.'+self.get_name()+'.'+ \
368  str(IMP.atom.Copy(self.hier).get_copy_index())
369 
370  def __getitem__(self,val):
371  if isinstance(val,int):
372  return self.residues[val]
373  elif isinstance(val,str):
374  return self.residues[int(val)-1]
375  elif isinstance(val,slice):
376  return IMP.pmi.tools.OrderedSet(self.residues[val])
377  else:
378  raise TypeError("Indexes must be int or str")
379 
380  def get_hierarchy(self):
381  """Return the IMP Hierarchy corresponding to this Molecule"""
382  return self.hier
383 
384  def get_name(self):
385  """Return this Molecule name"""
386  return self.hier.get_name()
387 
388  def get_state(self):
389  """Return the State containing this Molecule"""
390  return self.state
391 
392  def get_ideal_helices(self):
393  """Returns list of OrderedSets with requested ideal helices"""
394  return self._ideal_helices
395 
396  def residue_range(self,a,b,stride=1):
397  """get residue range from a to b, inclusive.
398  Use integers to get 0-indexing, or strings to get PDB-indexing"""
399  if isinstance(a,int) and isinstance(b,int) and isinstance(stride,int):
400  return IMP.pmi.tools.OrderedSet(self.residues[a:b+1:stride])
401  elif isinstance(a,str) and isinstance(b,str) and isinstance(stride,int):
402  return IMP.pmi.tools.OrderedSet(self.residues[int(a)-1:int(b):stride])
403  else:
404  raise TypeError("Range ends must be int or str. "
405  "Stride must be int.")
406 
407  def get_residues(self):
408  """ Return all modeled TempResidues as a set"""
409  all_res = IMP.pmi.tools.OrderedSet(self.residues)
410  return all_res
411 
412  def get_represented(self):
413  """ Return set of TempResidues that have representation"""
414  return self._represented
415 
417  """ Return a set of TempResidues that have associated structure coordinates """
418  atomic_res = IMP.pmi.tools.OrderedSet()
419  for res in self.residues:
420  if res.get_has_structure():
421  atomic_res.add(res)
422  return atomic_res
423 
425  """ Return a set of TempResidues that don't have associated structure coordinates """
426  non_atomic_res = IMP.pmi.tools.OrderedSet()
427  for res in self.residues:
428  if not res.get_has_structure():
429  non_atomic_res.add(res)
430  return non_atomic_res
431 
432  def create_copy(self,chain_id):
433  """Create a new Molecule with the same name and sequence but a higher copy number.
434  Returns the Molecule. No structure or representation will be copied!
435  @param chain_id Chain ID of the new molecule
436  """
437  mol = Molecule(self.state,self.get_name(),self.sequence,chain_id,
438  copy_num=self.state.get_number_of_copies(self.get_name()))
439  self.state._register_copy(mol)
440  return mol
441 
442  def create_clone(self,chain_id):
443  """Create a Molecule clone (automatically builds same structure and representation)
444  @param chain_id If you want to set the chain ID of the copy to something
445  @note You cannot add structure or representations to a clone!
446  """
447  mol = Molecule(self.state,self.get_name(),self.sequence,chain_id,
448  copy_num=self.state.get_number_of_copies(self.get_name()),
449  mol_to_clone=self)
450  self.state._register_copy(mol)
451  return mol
452 
453  def add_structure(self,pdb_fn,chain_id,res_range=[],
454  offset=0,model_num=None,ca_only=False,
455  soft_check=False):
456  """Read a structure and store the coordinates.
457  @eturn the atomic residues (as a set)
458  @param pdb_fn The file to read
459  @param chain_id Chain ID to read
460  @param res_range Add only a specific set of residues from the PDB file.
461  res_range[0] is the starting and res_range[1] is the
462  ending residue index.
463  @param offset Apply an offset to the residue indexes of the PDB
464  file. This number is added to the PDB sequence.
465  @param model_num Read multi-model PDB and return that model
466  @param ca_only Only read the CA positions from the PDB file
467  @param soft_check If True, it only warns if there are sequence
468  mismatches between the PDB and the Molecule (FASTA)
469  sequence, and uses the sequence from the PDB.
470  If False (Default), it raises an error when there
471  are sequence mismatches.
472  @note If you are adding structure without a FASTA file, set soft_check
473  to True.
474  """
475  if self.mol_to_clone is not None:
476  raise ValueError('You cannot call add_structure() for a clone')
477 
478  self.pdb_fn = pdb_fn
479 
480  # get IMP.atom.Residues from the pdb file
481  rhs = system_tools.get_structure(self.model,pdb_fn,chain_id,res_range,offset,ca_only=ca_only)
482  self.coord_finder.add_residues(rhs)
483 
484  if len(self.residues)==0:
485  warnings.warn(
486  "Substituting PDB residue type with FASTA residue type. "
487  "Potentially dangerous.", IMP.pmi.StructureWarning)
488 
489  # Store info for ProtocolOutput usage later
490  self._pdb_elements.append((rhs,
491  _PDBElement(offset=offset, filename=pdb_fn, chain_id=chain_id)))
492 
493  # load those into TempResidue object
494  atomic_res = IMP.pmi.tools.OrderedSet() # collect integer indexes of atomic residues to return
495  for nrh,rh in enumerate(rhs):
496  pdb_idx = rh.get_index()
497  raw_idx = pdb_idx - 1
498 
499  # add ALA to fill in gaps
500  while len(self.residues)<pdb_idx:
501  r = TempResidue(self,'A',len(self.residues)+1,len(self.residues))
502  self.residues.append(r)
503  self.sequence += 'A'
504 
505  internal_res = self.residues[raw_idx]
506  if len(self.sequence)<raw_idx:
507  self.sequence += IMP.atom.get_one_letter_code(rh.get_residue_type())
508  internal_res.set_structure(rh,soft_check)
509  atomic_res.add(internal_res)
510 
511  self.chain.set_sequence(self.sequence)
512  return atomic_res
513 
515  residues=None,
516  resolutions=[],
517  bead_extra_breaks=[],
518  bead_ca_centers=True,
519  bead_default_coord=[0,0,0],
520  density_residues_per_component=None,
521  density_prefix=None,
522  density_force_compute=False,
523  density_voxel_size=1.0,
524  setup_particles_as_densities=False,
525  ideal_helix=False,
526  color=None):
527  """Set the representation for some residues. Some options (beads, ideal helix)
528  operate along the backbone. Others (density options) are volumetric.
529  Some of these you can combine e.g., beads+densities or helix+densities
530  See @ref pmi_resolution
531  @param residues Set of PMI TempResidues for adding the representation.
532  Can use Molecule slicing to get these, e.g. mol[a:b]+mol[c:d]
533  If None, will select all residues for this Molecule.
534  @param resolutions Resolutions for beads representations.
535  If structured, will average along backbone, breaking at sequence breaks.
536  If unstructured, will just create beads.
537  Pass an integer or list of integers
538  @param bead_extra_breaks Additional breakpoints for splitting beads.
539  The value can be the 0-ordered position, after which it'll insert the break.
540  Alternatively pass PDB-style (1-ordered) indices as a string.
541  I.e., bead_extra_breaks=[5,25] is the same as ['6','26']
542  @param bead_ca_centers Set to True if you want the resolution=1 beads to be at CA centers
543  (otherwise will average atoms to get center). Defaults to True.
544  @param bead_default_coord Advanced feature. Normally beads are placed at the nearest structure.
545  If no structure provided (like an all bead molecule), the beads go here.
546  @param density_residues_per_component Create density (Gaussian Mixture Model)
547  for these residues. Must also supply density_prefix
548  @param density_prefix Prefix (assuming '.txt') to read components from or write to.
549  If exists, will read unless you set density_force_compute=True.
550  Will also write map (prefix+'.mrc').
551  Must also supply density_residues_per_component.
552  @param density_force_compute Set true to force overwrite density file.
553  @param density_voxel_size Advanced feature. Set larger if densities taking too long to rasterize.
554  Set to 0 if you don't want to create the MRC file
555  @param setup_particles_as_densities Set to True if you want each particle to be its own density.
556  Useful for all-atom models or flexible beads.
557  Mutually exclusive with density_ options
558  @param ideal_helix Create idealized helix structures for these residues at resolution 1.
559  Any other resolutions passed will be coarsened from there.
560  Resolution 0 will not work, you may have to use MODELLER to do that (for now).
561  @param color the color applied to the hierarchies generated.
562  Format options: tuple (r,g,b) with values 0 to 1;
563  float (from 0 to 1, a map from Blue to Green to Red);
564  a [Chimera name](https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/colortables.html);
565  a hex RGB string (e.g. "0xff0000");
566  an IMP.display.Color object
567  @note You cannot call add_representation multiple times for the
568  same residues.
569  """
570 
571  # can't customize clones
572  if self.mol_to_clone is not None:
573  raise ValueError('You cannot call add_representation() for a clone.'
574  ' Maybe use a copy instead.')
575 
576  # format input
577  if residues is None:
578  res = IMP.pmi.tools.OrderedSet(self.residues)
579  elif residues==self:
580  res = IMP.pmi.tools.OrderedSet(self.residues)
581  elif type(residues) is IMP.pmi.topology.TempResidue:
582  res = IMP.pmi.tools.OrderedSet([residues])
583  elif hasattr(residues,'__iter__'):
584  if len(residues)==0:
585  raise Exception('You passed an empty set to add_representation')
586  if type(residues) is IMP.pmi.tools.OrderedSet and type(next(iter(residues))) is TempResidue:
587  res = residues
588  elif type(residues) is set and type(next(iter(residues))) is TempResidue:
589  res = IMP.pmi.tools.OrderedSet(residues)
590  elif type(residues) is list and type(residues[0]) is TempResidue:
591  res = IMP.pmi.tools.OrderedSet(residues)
592  else:
593  raise Exception("You passed an iteratible of something other than TempResidue",res)
594  else:
595  raise Exception("add_representation: you must pass a set of residues or nothing(=all residues)")
596 
597  # check that each residue has not been represented yet
598  ov = res & self._represented
599  if ov:
600  raise Exception('You have already added representation for '+self.get_hierarchy().get_name()+': '+ov.__repr__())
601  self._represented|=res
602 
603  # check you aren't creating multiple resolutions without structure
604  if not hasattr(resolutions,'__iter__'):
605  if type(resolutions) is int:
606  resolutions = [resolutions]
607  else:
608  raise Exception("you tried to pass resolutions that are not int or list-of-int")
609  if len(resolutions)>1 and not ideal_helix:
610  for r in res:
611  if not r.get_has_structure():
612  raise Exception('You are creating multiple resolutions for '
613  'unstructured regions. This will have unexpected results.')
614 
615  # check density info is consistent
616  if density_residues_per_component or density_prefix:
617  if not density_residues_per_component and density_prefix:
618  raise Exception('If requesting density, must provide '
619  'density_residues_per_component AND density_prefix')
620  if density_residues_per_component and setup_particles_as_densities:
621  raise Exception('Cannot create both volumetric density '
622  '(density_residues_per_component) AND '
623  'individual densities (setup_particles_as_densities) '
624  'in the same representation')
625  if len(resolutions)>1 and setup_particles_as_densities:
626  raise Exception('You have multiple bead resolutions but are attempting to '
627  'set them all up as individual Densities. '
628  'This could have unexpected results.')
629 
630  # check helix not accompanied by other resolutions (densities OK though!)
631  if ideal_helix:
632  if 0 in resolutions:
633  raise Exception("For ideal helices, cannot build resolution 0: "
634  "you have to do that in MODELLER")
635  if 1 not in resolutions:
636  resolutions = [1] + list(resolutions)
637  self._ideal_helices.append(res)
638 
639  # check residues are all part of this molecule:
640  for r in res:
641  if r.get_molecule()!=self:
642  raise Exception('You are adding residues from a different molecule to',self.__repr__())
643 
644  # unify formatting for extra breaks
645  breaks = []
646  for b in bead_extra_breaks:
647  if type(b)==str:
648  breaks.append(int(b)-1)
649  else:
650  breaks.append(b)
651  # store the representation group
652  self.representations.append(_Representation(res,
653  resolutions,
654  breaks,
655  bead_ca_centers,
656  bead_default_coord,
657  density_residues_per_component,
658  density_prefix,
659  density_force_compute,
660  density_voxel_size,
661  setup_particles_as_densities,
662  ideal_helix,
663  color))
664 
665  def _all_protocol_output(self):
666  return self.state._protocol_output
667 
668  def build(self):
669  """Create all parts of the IMP hierarchy
670  including Atoms, Residues, and Fragments/Representations and,
671  finally, Copies.
672  Will only build requested representations.
673  @note Any residues assigned a resolution must have an IMP.atom.Residue
674  hierarchy containing at least a CAlpha. For missing residues,
675  these can be constructed from the PDB file.
676  """
677  if not self.built:
678  # Add molecule name and sequence to any ProtocolOutput objects
679  name=self.hier.get_name()
680  for po, state in self._all_protocol_output():
681  po.create_component(state, name, True,
682  asym_name=self._name_with_copy)
683  po.add_component_sequence(state, name, self.sequence,
684  asym_name=self._name_with_copy)
685  # if requested, clone structure and representations BEFORE building original
686  if self.mol_to_clone is not None:
687  for nr,r in enumerate(self.mol_to_clone.residues):
688  if r.get_has_structure():
689  clone = IMP.atom.create_clone(r.get_hierarchy())
690  self.residues[nr].set_structure(
691  IMP.atom.Residue(clone),soft_check=True)
692  for old_rep in self.mol_to_clone.representations:
693  new_res = IMP.pmi.tools.OrderedSet()
694  for r in old_rep.residues:
695  new_res.add(self.residues[r.get_internal_index()])
696  self._represented.add(self.residues[r.get_internal_index()])
697  new_rep = _Representation(new_res,
698  old_rep.bead_resolutions,
699  old_rep.bead_extra_breaks,
700  old_rep.bead_ca_centers,
701  old_rep.bead_default_coord,
702  old_rep.density_residues_per_component,
703  old_rep.density_prefix,
704  False,
705  old_rep.density_voxel_size,
706  old_rep.setup_particles_as_densities,
707  old_rep.ideal_helix,
708  old_rep.color)
709  self.representations.append(new_rep)
710  self.coord_finder = self.mol_to_clone.coord_finder
711 
712  # give a warning for all residues that don't have representation
713  no_rep = [r for r in self.residues if r not in self._represented]
714  if len(no_rep)>0:
715  warnings.warn(
716  'Residues without representation in molecule %s: %s'
717  % (self.get_name(), system_tools.resnums2str(no_rep)),
719 
720  # first build any ideal helices (fills in structure for the TempResidues)
721  for rep in self.representations:
722  if rep.ideal_helix:
723  _build_ideal_helix(self.model,rep.residues,self.coord_finder)
724 
725  # build all the representations
726  built_reps = []
727  for rep in self.representations:
728  built_reps += system_tools.build_representation(
729  self, rep, self.coord_finder)
730 
731  # sort them before adding as children
732  built_reps.sort(key=lambda r: IMP.atom.Fragment(r).get_residue_indexes()[0])
733  for br in built_reps:
734  self.hier.add_child(br)
735  br.update_parents()
736  self.built = True
737 
738  rephandler = _RepresentationHandler(self._name_with_copy,
739  list(self._all_protocol_output()),
740  self._pdb_elements)
741  for res in self.residues:
742  idx = res.get_index()
743 
744  # first off, store the highest resolution available in residue.hier
745  new_ps = IMP.atom.Selection(
746  self.hier,
747  residue_index=res.get_index(),
748  resolution=1).get_selected_particles()
749  if len(new_ps)>0:
750  new_p = new_ps[0]
751  if IMP.atom.Atom.get_is_setup(new_p):
752  # if only found atomic, store the residue
753  new_hier = IMP.atom.get_residue(IMP.atom.Atom(new_p))
754  else:
755  # otherwise just store what you found
756  new_hier = IMP.atom.Hierarchy(new_p)
757  res.hier = new_hier
758  rephandler(res)
759  else:
760  res.hier = None
761  self._represented = IMP.pmi.tools.OrderedSet([a for a in self._represented])
762  print('done building',self.get_hierarchy())
763  return self.hier
764 
765  def get_particles_at_all_resolutions(self,residue_indexes=None):
766  """Helpful utility for getting particles at all resolutions from this molecule.
767  Can optionally pass a set of residue indexes"""
768  if not self.built:
769  raise Exception("Cannot get all resolutions until you build the Molecule")
770  if residue_indexes is None:
771  residue_indexes = [r.get_index() for r in self.get_residues()]
773  residue_indexes=residue_indexes)
774  return ps
775 
776 #------------------------
777 
778 class _Representation(object):
779  """Private class just to store a representation request"""
780  def __init__(self,
781  residues,
782  bead_resolutions,
783  bead_extra_breaks,
784  bead_ca_centers,
785  bead_default_coord,
786  density_residues_per_component,
787  density_prefix,
788  density_force_compute,
789  density_voxel_size,
790  setup_particles_as_densities,
791  ideal_helix,
792  color):
793  self.residues = residues
794  self.bead_resolutions = bead_resolutions
795  self.bead_extra_breaks = bead_extra_breaks
796  self.bead_ca_centers = bead_ca_centers
797  self.bead_default_coord = bead_default_coord
798  self.density_residues_per_component = density_residues_per_component
799  self.density_prefix = density_prefix
800  self.density_force_compute = density_force_compute
801  self.density_voxel_size = density_voxel_size
802  self.setup_particles_as_densities = setup_particles_as_densities
803  self.ideal_helix = ideal_helix
804  self.color = color
805 
806 class _FindCloseStructure(object):
807  """Utility to get the nearest observed coordinate"""
808  def __init__(self):
809  self.coords=[]
810  def add_residues(self,residues):
811  for r in residues:
812  idx = IMP.atom.Residue(r).get_index()
813  ca = IMP.atom.Selection(r,atom_type=IMP.atom.AtomType("CA")).get_selected_particles()
814  p = IMP.atom.Selection(r,atom_type=IMP.atom.AtomType("P")).get_selected_particles()
815  if len(ca)==1:
816  xyz = IMP.core.XYZ(ca[0]).get_coordinates()
817  self.coords.append([idx,xyz])
818  elif len(p)==1:
819  xyz = IMP.core.XYZ(p[0]).get_coordinates()
820  self.coords.append([idx,xyz])
821  else:
822  raise("_FindCloseStructure: wrong selection")
823 
824  self.coords.sort(key=itemgetter(0))
825  def find_nearest_coord(self,query):
826  if self.coords==[]:
827  return None
828  keys = [r[0] for r in self.coords]
829  pos = bisect_left(keys,query)
830  if pos == 0:
831  ret = self.coords[0]
832  elif pos == len(self.coords):
833  ret = self.coords[-1]
834  else:
835  before = self.coords[pos - 1]
836  after = self.coords[pos]
837  if after[0] - query < query - before[0]:
838  ret = after
839  else:
840  ret = before
841  return ret[1]
842 
843 class Sequences(object):
844  """A dictionary-like wrapper for reading and storing sequence data"""
845  def __init__(self,fasta_fn,name_map=None):
846  """read a fasta file and extract all the requested sequences
847  @param fasta_fn sequence file
848  @param name_map dictionary mapping the fasta name to final stored name
849  """
850  self.sequences = IMP.pmi.tools.OrderedDict()
851  self.read_sequences(fasta_fn,name_map)
852  def __len__(self):
853  return len(self.sequences)
854  def __contains__(self,x):
855  return x in self.sequences
856  def __getitem__(self,key):
857  if type(key) is int:
858  allseqs = list(self.sequences.keys())
859  try:
860  return self.sequences[allseqs[key]]
861  except IndexError:
862  raise IndexError("You tried to access sequence number %d "
863  "but there's only %d" % (key, len(allseqs)))
864  else:
865  return self.sequences[key]
866  def __iter__(self):
867  return self.sequences.__iter__()
868  def __repr__(self):
869  ret=''
870  for s in self.sequences:
871  ret += '%s\t%s\n'%(s,self.sequences[s])
872  return ret
873  def read_sequences(self,fasta_fn,name_map=None):
874  code = None
875  seq = None
876  with open(fasta_fn,'r') as fh:
877  for (num, line) in enumerate(fh):
878  if line.startswith('>'):
879  if seq is not None:
880  self.sequences[code] = seq.strip('*')
881  code = line.rstrip()[1:]
882  if name_map is not None:
883  try:
884  code = name_map[code]
885  except:
886  pass
887  seq = ''
888  else:
889  line = line.rstrip()
890  if line: # Skip blank lines
891  if seq is None:
892  raise Exception( \
893  "Found FASTA sequence before first header at line %d: %s" % (num + 1, line))
894  seq += line
895  if seq is not None:
896  self.sequences[code] = seq.strip('*')
897 
898 
899 class PDBSequences(object):
900  """Data_structure for reading and storing sequence data from pdb"""
901  def __init__(self,model,pdb_fn,name_map=None):
902  """read a pdb file and returns all sequences for each contiguous fragment
903  @param pdb_fn file
904  @param name_map dictionary mapping the pdb chain id to final stored name
905  """
906  self.model=model
907  # self.sequences data-structure: (two-key dictionary)
908  # it contains all contiguous fragments:
909  # chain_id, tuples indicating first and last residue, sequence
910  # example:
911  # key1, key2, value
912  # A (77, 149) VENPSLDLEQYAASYSGLMR....
913  # A (160, 505) PALDTAWVEATRKKALLKLEKLDTDLKNYKGNSIK.....
914  # B (30, 180) VDLENQYYNSKALKEDDPKAALSSFQKVLELEGEKGEWGF...
915  # B (192, 443) TQLLEIYALEIQMYTAQKNNKKLKALYEQSLHIKSAIPHPL
916  self.sequences = IMP.pmi.tools.OrderedDict()
917  self.read_sequences(pdb_fn,name_map)
918 
919  def read_sequences(self,pdb_fn,name_map):
920  read_file = IMP.atom.read_pdb
921  if pdb_fn.endswith('.cif'):
922  read_file = IMP.atom.read_mmcif
923  t = read_file(pdb_fn, self.model, IMP.atom.ATOMPDBSelector())
924  cs=IMP.atom.get_by_type(t,IMP.atom.CHAIN_TYPE)
925  for c in cs:
926  id=IMP.atom.Chain(c).get_id()
927  print(id)
928  if name_map:
929  try:
930  id=name_map[id]
931  except:
932  print("Chain ID %s not in name_map, skipping" % id)
933  continue
934  rs=IMP.atom.get_by_type(c,IMP.atom.RESIDUE_TYPE)
935  rids=[]
936  rids_olc_dict={}
937  for r in rs:
938  dr=IMP.atom.Residue(r)
939  rid=dr.get_index()
940 
941  isprotein=dr.get_is_protein()
942  isrna=dr.get_is_rna()
943  isdna=dr.get_is_dna()
944  if isprotein:
945  olc=IMP.atom.get_one_letter_code(dr.get_residue_type())
946  rids.append(rid)
947  rids_olc_dict[rid]=olc
948  elif isdna:
949  if dr.get_residue_type() == IMP.atom.DADE: olc="A"
950  if dr.get_residue_type() == IMP.atom.DURA: olc="U"
951  if dr.get_residue_type() == IMP.atom.DCYT: olc="C"
952  if dr.get_residue_type() == IMP.atom.DGUA: olc="G"
953  if dr.get_residue_type() == IMP.atom.DTHY: olc="T"
954  rids.append(rid)
955  rids_olc_dict[rid]=olc
956  elif isrna:
957  if dr.get_residue_type() == IMP.atom.ADE: olc="A"
958  if dr.get_residue_type() == IMP.atom.URA: olc="U"
959  if dr.get_residue_type() == IMP.atom.CYT: olc="C"
960  if dr.get_residue_type() == IMP.atom.GUA: olc="G"
961  if dr.get_residue_type() == IMP.atom.THY: olc="T"
962  rids.append(rid)
963  rids_olc_dict[rid]=olc
964  group_rids=self.group_indexes(rids)
965  contiguous_sequences=IMP.pmi.tools.OrderedDict()
966  for group in group_rids:
967  sequence_fragment=""
968  for i in range(group[0],group[1]+1):
969  sequence_fragment+=rids_olc_dict[i]
970  contiguous_sequences[group]=sequence_fragment
971  self.sequences[id]=contiguous_sequences
972 
973  #for c in self.sequences:
974  # for group in self.sequences[c]:
975  # print(c,group,self.sequences[c][group])
976 
977  def group_indexes(self,indexes):
978  from itertools import groupby
979  ranges = []
980  for k, g in groupby(enumerate(indexes), lambda x:x[0]-x[1]):
981  group = [x[1] for x in g]
982  ranges.append((group[0], group[-1]))
983  return ranges
984 
985 
986 def fasta_pdb_alignments(fasta_sequences,pdb_sequences,show=False):
987  '''This function computes and prints the alignment between the
988  fasta file and the pdb sequence, computes the offsets for each contiguous
989  fragment in the PDB.
990  @param fasta_sequences IMP.pmi.topology.Sequences object
991  @param pdb_sequences IMP.pmi.topology.PDBSequences object
992  @param show boolean default False, if True prints the alignments.
993  The input objects should be generated using map_name dictionaries such that fasta_id
994  and pdb_chain_id are mapping to the same protein name. It needs BioPython.
995  Returns a dictionary of offsets, organized by peptide range (group):
996  example: offsets={"ProtA":{(1,10):1,(20,30):10}}'''
997  from Bio import pairwise2
998  from Bio.pairwise2 import format_alignment
999  from Bio.SubsMat import MatrixInfo as matlist
1000  matrix = matlist.blosum62
1001  if type(fasta_sequences) is not IMP.pmi.topology.Sequences:
1002  raise Exception("Fasta sequences not type IMP.pmi.topology.Sequences")
1003  if type(pdb_sequences) is not IMP.pmi.topology.PDBSequences:
1004  raise Exception("pdb sequences not type IMP.pmi.topology.PDBSequences")
1005  offsets=IMP.pmi.tools.OrderedDict()
1006  for name in fasta_sequences.sequences:
1007  print(name)
1008  seq_fasta=fasta_sequences.sequences[name]
1009  if name not in pdb_sequences.sequences:
1010  print("Fasta id %s not in pdb names, aligning against every pdb chain" % name)
1011  pdbnames=pdb_sequences.sequences.keys()
1012  else:
1013  pdbnames=[name]
1014  for pdbname in pdbnames:
1015  for group in pdb_sequences.sequences[pdbname]:
1016  if group[1]-group[0]+1<7:continue
1017  seq_frag_pdb=pdb_sequences.sequences[pdbname][group]
1018  if show:
1019  print("########################")
1020  print(" ")
1021  print("protein name",pdbname)
1022  print("fasta id", name)
1023  print("pdb fragment",group)
1024  align=pairwise2.align.localms(seq_fasta, seq_frag_pdb, 2, -1, -.5, -.1)[0]
1025  for a in [align]:
1026  offset=a[3]+1-group[0]
1027  if show:
1028  print("alignment sequence start-end",(a[3]+1,a[4]+1))
1029  print("offset from pdb to fasta index",offset)
1030  print(format_alignment(*a))
1031  if name not in offsets:
1032  offsets[pdbname]={}
1033  if group not in offsets[pdbname]:
1034  offsets[pdbname][group]=offset
1035  else:
1036  if group not in offsets[pdbname]:
1037  offsets[pdbname][group]=offset
1038  return offsets
1039 
1040 
1041 
1042 #------------------------
1043 
1044 
1045 class TempResidue(object):
1046  """Temporarily stores residue information, even without structure available."""
1047  # Consider implementing __hash__ so you can select.
1048  def __init__(self,molecule,code,index,internal_index,is_nucleic=None):
1049  """setup a TempResidue
1050  @param molecule PMI Molecule to which this residue belongs
1051  @param code one-letter residue type code
1052  @param index PDB index
1053  @param internal_index The number in the sequence
1054  """
1055  #these attributes should be immutable
1056  self.molecule = molecule
1057  self.rtype = IMP.pmi.tools.get_residue_type_from_one_letter_code(code,is_nucleic)
1058  self.pdb_index = index
1059  self.internal_index = internal_index
1060  self.copy_index = IMP.atom.Copy(self.molecule.hier).get_copy_index()
1061  self.state_index = IMP.atom.State(self.molecule.state.hier).get_state_index()
1062  #these are expected to change
1063  self._structured = False
1064  self.hier = IMP.atom.Residue.setup_particle(IMP.Particle(molecule.model),
1065  self.rtype,
1066  index)
1067  def __str__(self):
1068  return str(self.state_index)+"_"+self.molecule.get_name()+"_"+str(self.copy_index)+"_"+self.get_code()+str(self.get_index())
1069  def __repr__(self):
1070  return self.__str__()
1071  def __key(self):
1072  #this returns the immutable attributes only
1073  return (self.state_index, self.molecule, self.copy_index, self.rtype, self.pdb_index, self.internal_index)
1074  def __eq__(self,other):
1075  return type(other)==type(self) and self.__key() == other.__key()
1076  def __hash__(self):
1077  return hash(self.__key())
1078  def get_index(self):
1079  return self.pdb_index
1080  def get_internal_index(self):
1081  return self.internal_index
1082  def get_code(self):
1083  return IMP.atom.get_one_letter_code(self.get_residue_type())
1084  def get_residue_type(self):
1085  return self.rtype
1086  def get_hierarchy(self):
1087  return self.hier
1088  def get_molecule(self):
1089  return self.molecule
1090  def get_has_structure(self):
1091  return self._structured
1092  def set_structure(self,res,soft_check=False):
1093  if res.get_residue_type()!=self.get_residue_type():
1094  if soft_check:
1095  # note from commit a2c13eaa1 we give priority to the FASTA and not the PDB
1096  warnings.warn(
1097  'Inconsistency between FASTA sequence and PDB sequence. '
1098  'FASTA type %s %s and PDB type %s'
1099  % (self.get_index(), self.hier.get_residue_type(),
1100  res.get_residue_type()),
1102  self.hier.set_residue_type((self.get_residue_type()))
1103  self.rtype = self.get_residue_type()
1104  else:
1105  raise Exception('ERROR: PDB residue index',self.get_index(),'is',
1106  IMP.atom.get_one_letter_code(res.get_residue_type()),
1107  'and sequence residue is',self.get_code())
1108 
1109  for a in res.get_children():
1110  self.hier.add_child(a)
1111  atype = IMP.atom.Atom(a).get_atom_type()
1112  a.get_particle().set_name('Atom %s of residue %i'%(atype.__str__().strip('"'),
1113  self.hier.get_index()))
1114  self._structured = True
1115 
1116 class TopologyReader(object):
1117  """Automatically setup Sytem and Degrees of Freedom with a formatted text file.
1118  The file is read in and each part of the topology is stored as a
1119  ComponentTopology object for input into IMP::pmi::macros::BuildSystem.
1120  The topology file should be in a simple pipe-delimited format:
1121  @code{.txt}
1122 |molecule_name|color|fasta_fn|fasta_id|pdb_fn|chain|residue_range|pdb_offset|bead_size|em_residues_per_gaussian|rigid_body|super_rigid_body|chain_of_super_rigid_bodies|flags|
1123 |Rpb1 |blue |1WCM.fasta|1WCM:A|1WCM.pdb|A|1,1140 |0|10|0|1|1,3|1||
1124 |Rpb1 |blue |1WCM.fasta|1WCM:A|1WCM.pdb|A|1141,1274|0|10|0|2|1,3|1||
1125 |Rpb1 |blue |1WCM.fasta|1WCM:A|1WCM.pdb|A|1275,END |0|10|0|3|1,3|1||
1126 |Rpb2 |red |1WCM.fasta|1WCM:B|1WCM.pdb|B|all |0|10|0|4|2,3|2||
1127 |Rpb2.1 |green |1WCM.fasta|1WCM:B|1WCM.pdb|B|all |0|10|0|4|2,3|2||
1128 
1129  @endcode
1130 
1131  These are the fields you can enter:
1132  - `component_name`: Name of the component (chain). Serves as the parent
1133  hierarchy for this structure.
1134  - `color`: The color used in the output RMF file. Uses
1135  [Chimera names](https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/colortables.html),
1136  (e.g. "red"), or R,G,B values as three comma-separated floating point
1137  numbers from 0 to 1 (e.g. "1.0, 0.0, 0.0") or a 6-digit hex string
1138  starting with '#' (e.g. 0xff0000).
1139  - `fasta_fn`: Name of FASTA file containing this component.
1140  - `fasta_id`: String found in FASTA sequence header line. The sequence read
1141  from the file is assumed to be a protein sequence. If it should instead
1142  be treated as RNA or DNA, add an ',RNA' or ',DNA' suffix. For example,
1143  a `fasta_id` of 'myseq,RNA' will read the sequence 'myseq' from the
1144  FASTA file and treat it as RNA.
1145  - `pdb_fn`: Name of PDB or mmCIF file with coordinates (if available).
1146  If left empty, will set up as BEADS (you can also specify "BEADS")
1147  Can also write "IDEAL_HELIX".
1148  - `chain`: Chain ID of this domain in the PDB file.
1149  - `residue_range`: Comma delimited pair defining range.
1150  Can leave empty or use 'all' for entire sequence from PDB file.
1151  The second item in the pair can be END to select the last residue in the
1152  PDB chain.
1153  - `pdb_offset`: Offset to sync PDB residue numbering with FASTA numbering.
1154  - `bead_size`: The size (in residues) of beads used to model areas not
1155  covered by PDB coordinates. These will be automatically built.
1156  - `em_residues`: The number of Gaussians used to model the electron
1157  density of this domain. Set to zero if no EM fitting will be done.
1158  The GMM files will be written to <gmm_dir>/<component_name>_<em_res>.txt (and .mrc)
1159  - `rigid_body`: Leave empty if this object is not in a rigid body.
1160  Otherwise, this is a number corresponding to the rigid body containing
1161  this object. The number itself is just used for grouping things.
1162  - `super_rigid_body`: Like a rigid_body, except things are only occasionally rigid
1163  - `chain_of_super_rigid_bodies` For a polymer, create SRBs from groups.
1164  - `flags` additional flags for advanced options
1165  @note All filenames are relative to the paths specified in the constructor.
1166 
1167  """
1168  def __init__(self,
1169  topology_file,
1170  pdb_dir='./',
1171  fasta_dir='./',
1172  gmm_dir='./'):
1173  """Constructor.
1174  @param topology_file Pipe-delimited file specifying the topology
1175  @param pdb_dir Relative path to the pdb directory
1176  @param fasta_dir Relative path to the fasta directory
1177  @param gmm_dir Relative path to the GMM directory
1178  """
1179  self.topology_file = topology_file
1180  self.molecules = IMP.pmi.tools.OrderedDict() # key=molname, value=TempMolecule
1181  self.pdb_dir = pdb_dir
1182  self.fasta_dir = fasta_dir
1183  self.gmm_dir = gmm_dir
1184  self._components = self.read(topology_file)
1185 
1186  def write_topology_file(self,outfile):
1187  with open(outfile, "w") as f:
1188  f.write("|molecule_name|color|fasta_fn|fasta_id|pdb_fn|chain|"
1189  "residue_range|pdb_offset|bead_size|em_residues_per_gaussian|"
1190  "rigid_body|super_rigid_body|chain_of_super_rigid_bodies|\n")
1191  for c in self._components:
1192  output = c.get_str()+'\n'
1193  f.write(output)
1194  return outfile
1195 
1196  def get_components(self, topology_list = "all"):
1197  """ Return list of ComponentTopologies for selected components
1198  @param topology_list List of indices to return"""
1199  if topology_list == "all":
1200  topologies = self._components
1201  else:
1202  topologies=[]
1203  for i in topology_list:
1204  topologies.append(self._components[i])
1205  return topologies
1206 
1207  def get_molecules(self):
1208  return self.molecules
1209 
1210  def read(self, topology_file, append=False):
1211  """Read system components from topology file. append=False will erase
1212  current topology and overwrite with new
1213  """
1214  is_topology = False
1215  is_directories = False
1216  linenum = 1
1217  if append==False:
1218  self._components=[]
1219 
1220  with open(topology_file) as infile:
1221  for line in infile:
1222  if line.lstrip()=="" or line[0]=="#":
1223  continue
1224  elif line.split('|')[1].strip() in ("molecule_name"):
1225  is_topology=True
1226  is_directories = False
1227  old_format = False
1228  continue
1229  elif line.split('|')[1] == "component_name":
1230  is_topology = True
1232  "Old-style topology format (using "
1233  "|component_name|) is deprecated. Please switch to "
1234  "the new-style format (using |molecule_name|)\n")
1235  old_format = True
1236  is_directories = False
1237  continue
1238  elif line.split('|')[1] == "directories":
1240  "Setting directories in the topology file "
1241  "is deprecated. Please do so through the "
1242  "TopologyReader constructor. Note that new-style "
1243  "paths are relative to the current working "
1244  "directory, not the topology file.\n")
1245  is_directories = True
1246  elif is_directories:
1247  fields = line.split('|')
1248  setattr(self, fields[1],
1249  IMP.get_relative_path(topology_file, fields[2]))
1250  if is_topology:
1251  new_component = self._parse_line(line, linenum, old_format)
1252  self._components.append(new_component)
1253  linenum += 1
1254  return self._components
1255 
1256  def _parse_line(self, component_line, linenum, old_format):
1257  """Parse a line of topology values and matches them to their key.
1258  Checks each value for correct syntax
1259  Returns a list of Component objects
1260  fields:
1261  """
1262  c = _Component()
1263  values = [s.strip() for s in component_line.split('|')]
1264  errors = []
1265 
1266  ### Required fields
1267  if old_format:
1268  c.molname = values[1]
1269  c.copyname = ''
1270  c._domain_name = values[2]
1271  c.color = 'blue'
1272  else:
1273  names = values[1].split('.')
1274  if len(names)==1:
1275  c.molname = names[0]
1276  c.copyname = ''
1277  elif len(names)==2:
1278  c.molname = names[0]
1279  c.copyname = names[1]
1280  else:
1281  c.molname = names[0]
1282  c.copyname = names[1]
1283  errors.append("Molecule name should be <molecule.copyID>")
1284  errors.append("For component %s line %d " % (c.molname,linenum))
1285  c._domain_name = c.molname + '.' + c.copyname
1286  colorfields = values[2].split(',')
1287  if len(colorfields)==3:
1288  c.color = [float(x) for x in colorfields]
1289  if any([x>1 for x in c.color]):
1290  c.color=[x/255 for x in c.color]
1291  else:
1292  c.color = values[2]
1293  c._orig_fasta_file = values[3]
1294  c.fasta_file = values[3]
1295  fasta_field = values[4].split(",")
1296  c.fasta_id = fasta_field[0]
1297  c.fasta_flag = None
1298  if len(fasta_field) > 1:
1299  c.fasta_flag = fasta_field[1]
1300  c._orig_pdb_input = values[5]
1301  pdb_input = values[5]
1302  tmp_chain = values[6]
1303  rr = values[7]
1304  offset = values[8]
1305  bead_size = values[9]
1306  emg = values[10]
1307  if old_format:
1308  rbs = srbs = csrbs = ''
1309  else:
1310  rbs = values[11]
1311  srbs = values[12]
1312  csrbs = values[13]
1313 
1314  if c.molname not in self.molecules:
1315  self.molecules[c.molname] = _TempMolecule(c)
1316  else:
1317  # COPY OR DOMAIN
1318  c._orig_fasta_file = self.molecules[c.molname].orig_component._orig_fasta_file
1319  c.fasta_id = self.molecules[c.molname].orig_component.fasta_id
1320  self.molecules[c.molname].add_component(c,c.copyname)
1321 
1322  # now cleanup input
1323  c.fasta_file = os.path.join(self.fasta_dir,c._orig_fasta_file)
1324  if pdb_input=="":
1325  errors.append("PDB must have BEADS, IDEAL_HELIX, or filename")
1326  errors.append("For component %s line %d is not correct"
1327  "|%s| was given." % (c.molname,linenum,pdb_input))
1328  elif pdb_input in ("IDEAL_HELIX","BEADS"):
1329  c.pdb_file = pdb_input
1330  else:
1331  c.pdb_file = os.path.join(self.pdb_dir,pdb_input)
1332 
1333  # PDB chain must be one or two characters
1334  if len(tmp_chain)==1 or len(tmp_chain)==2:
1335  c.chain = tmp_chain
1336  else:
1337  errors.append("PDB Chain identifier must be one or two characters.")
1338  errors.append("For component %s line %d is not correct"
1339  "|%s| was given." % (c.molname,linenum,tmp_chain))
1340 
1341  ### Optional fields
1342  # Residue Range
1343  if rr.strip()=='all' or str(rr)=="":
1344  c.residue_range = None
1345  elif len(rr.split(','))==2 and self._is_int(rr.split(',')[0]) and (self._is_int(rr.split(',')[1]) or rr.split(',')[1] == 'END'):
1346  # Make sure that is residue range is given, there are only two values and they are integers
1347  c.residue_range = (int(rr.split(',')[0]), rr.split(',')[1])
1348  if c.residue_range[1] != 'END':
1349  c.residue_range = (c.residue_range[0], int(c.residue_range[1]))
1350  # Old format used -1 for the last residue
1351  if old_format and c.residue_range[1] == -1:
1352  c.residue_range = (c.residue_range[0], 'END')
1353  else:
1354  errors.append("Residue Range format for component %s line %d is not correct" % (c.molname, linenum))
1355  errors.append("Correct syntax is two comma separated integers: |start_res, end_res|. end_res can also be END to select the last residue in the chain. |%s| was given." % rr)
1356  errors.append("To select all residues, indicate |\"all\"|")
1357 
1358  # PDB Offset
1359  if self._is_int(offset):
1360  c.pdb_offset=int(offset)
1361  elif len(offset)==0:
1362  c.pdb_offset = 0
1363  else:
1364  errors.append("PDB Offset format for component %s line %d is not correct" % (c.molname, linenum))
1365  errors.append("The value must be a single integer. |%s| was given." % offset)
1366 
1367  # Bead Size
1368  if self._is_int(bead_size):
1369  c.bead_size=int(bead_size)
1370  elif len(bead_size)==0:
1371  c.bead_size = 0
1372  else:
1373  errors.append("Bead Size format for component %s line %d is not correct" % (c.molname, linenum))
1374  errors.append("The value must be a single integer. |%s| was given." % bead_size)
1375 
1376  # EM Residues Per Gaussian
1377  if self._is_int(emg):
1378  if int(emg) > 0:
1379  c.density_prefix = os.path.join(self.gmm_dir,c.get_unique_name())
1380  c.gmm_file = c.density_prefix+'.txt'
1381  c.mrc_file = c.density_prefix+'.gmm'
1382 
1383  c.em_residues_per_gaussian=int(emg)
1384  else:
1385  c.em_residues_per_gaussian = 0
1386  elif len(emg)==0:
1387  c.em_residues_per_gaussian = 0
1388  else:
1389  errors.append("em_residues_per_gaussian format for component "
1390  "%s line %d is not correct" % (c.molname, linenum))
1391  errors.append("The value must be a single integer. |%s| was given." % emg)
1392 
1393  # rigid bodies
1394  if len(rbs)>0:
1395  if not self._is_int(rbs):
1396  errors.append("rigid bodies format for component "
1397  "%s line %d is not correct" % (c.molname, linenum))
1398  errors.append("Each RB must be a single integer, or empty. "
1399  "|%s| was given." % rbs)
1400  c.rigid_body = int(rbs)
1401 
1402  # super rigid bodies
1403  if len(srbs)>0:
1404  srbs = srbs.split(',')
1405  for i in srbs:
1406  if not self._is_int(i):
1407  errors.append("super rigid bodies format for component "
1408  "%s line %d is not correct" % (c.molname, linenum))
1409  errors.append("Each SRB must be a single integer. |%s| was given." % srbs)
1410  c.super_rigid_bodies = srbs
1411 
1412  # chain of super rigid bodies
1413  if len(csrbs)>0:
1414  if not self._is_int(csrbs):
1415  errors.append("em_residues_per_gaussian format for component "
1416  "%s line %d is not correct" % (c.molname, linenum))
1417  errors.append("Each CSRB must be a single integer. |%s| was given." % csrbs)
1418  c.chain_of_super_rigid_bodies = csrbs
1419 
1420  # done
1421  if errors:
1422  raise ValueError("Fix Topology File syntax errors and rerun: " \
1423  + "\n".join(errors))
1424  else:
1425  return c
1426 
1427 
1428  def set_gmm_dir(self,gmm_dir):
1429  """Change the GMM dir"""
1430  self.gmm_dir = gmm_dir
1431  for c in self._components:
1432  c.gmm_file = os.path.join(self.gmm_dir,c.get_unique_name()+".txt")
1433  c.mrc_file = os.path.join(self.gmm_dir,c.get_unique_name()+".mrc")
1434  print('new gmm',c.gmm_file)
1435 
1436  def set_pdb_dir(self,pdb_dir):
1437  """Change the PDB dir"""
1438  self.pdb_dir = pdb_dir
1439  for c in self._components:
1440  if not c._orig_pdb_input in ("","None","IDEAL_HELIX","BEADS"):
1441  c.pdb_file = os.path.join(self.pdb_dir,c._orig_pdb_input)
1442 
1443  def set_fasta_dir(self,fasta_dir):
1444  """Change the FASTA dir"""
1445  self.fasta_dir = fasta_dir
1446  for c in self._components:
1447  c.fasta_file = os.path.join(self.fasta_dir,c._orig_fasta_file)
1448 
1449  def _is_int(self, s):
1450  # is this string an integer?
1451  try:
1452  float(s)
1453  return float(s).is_integer()
1454  except ValueError:
1455  return False
1456 
1457  def get_rigid_bodies(self):
1458  """Return list of lists of rigid bodies (as domain name)"""
1459  rbl = defaultdict(list)
1460  for c in self._components:
1461  if c.rigid_body:
1462  rbl[c.rigid_body].append(c.get_unique_name())
1463  return rbl.values()
1464 
1466  """Return list of lists of super rigid bodies (as domain name)"""
1467  rbl = defaultdict(list)
1468  for c in self._components:
1469  for rbnum in c.super_rigid_bodies:
1470  rbl[rbnum].append(c.get_unique_name())
1471  return rbl.values()
1472 
1474  """Return list of lists of chains of super rigid bodies (as domain name)"""
1475  rbl = defaultdict(list)
1476  for c in self._components:
1477  for rbnum in c.chain_of_super_rigid_bodies:
1478  rbl[rbnum].append(c.get_unique_name())
1479  return rbl.values()
1480 
1481 class _TempMolecule(object):
1482  """Store the Components and any requests for copies"""
1483  def __init__(self,init_c):
1484  self.molname = init_c.molname
1485  # key=copy ID, value = list of domains
1486  self.domains = IMP.pmi.tools.OrderedDefaultDict(list)
1487  self.add_component(init_c,init_c.copyname)
1488  self.orig_copyname = init_c.copyname
1489  self.orig_component = self.domains[init_c.copyname][0]
1490  def add_component(self,component,copy_id):
1491  self.domains[copy_id].append(component)
1492  component.domainnum = len(self.domains[copy_id])-1
1493  def __repr__(self):
1494  return ','.join('%s:%i'%(k,len(self.domains[k])) for k in self.domains)
1495 
1496 class _Component(object):
1497  """Stores the components required to build a standard IMP hierarchy
1498  using IMP.pmi.BuildModel()
1499  """
1500  def __init__(self):
1501  self.molname = None
1502  self.copyname = None
1503  self.domainnum = 0
1504  self.fasta_file = None
1505  self._orig_fasta_file = None
1506  self.fasta_id = None
1507  self.fasta_flag = None
1508  self.pdb_file = None
1509  self._orig_pdb_input = None
1510  self.chain = None
1511  self.residue_range = None
1512  self.pdb_offset = 0
1513  self.bead_size = 10
1514  self.em_residues_per_gaussian = 0
1515  self.gmm_file = ''
1516  self.mrc_file = ''
1517  self.density_prefix = ''
1518  self.color = 0.1
1519  self.rigid_body = None
1520  self.super_rigid_bodies = []
1521  self.chain_of_super_rigid_bodies = []
1522 
1523  def _l2s(self,l):
1524  return ",".join("%s" % x for x in l)
1525 
1526  def __repr__(self):
1527  return self.get_str()
1528 
1529  def get_unique_name(self):
1530  return "%s.%s.%i"%(self.molname,self.copyname,self.domainnum)
1531 
1532  def get_str(self):
1533  res_range = self.residue_range
1534  if self.residue_range is None:
1535  res_range = []
1536  name = self.molname
1537  if self.copyname!='':
1538  name += '.'+self.copyname
1539  if self.chain is None:
1540  chain = ' '
1541  else:
1542  chain = self.chain
1543  color=self.color
1544  if isinstance(color, list):
1545  color=','.join([str(x) for x in color])
1546  a= '|'+'|'.join([name,color,self._orig_fasta_file,self.fasta_id,
1547  self._orig_pdb_input,chain,self._l2s(list(res_range)),
1548  str(self.pdb_offset),str(self.bead_size),
1549  str(self.em_residues_per_gaussian),
1550  str(self.rigid_body) if self.rigid_body else '',
1551  self._l2s(self.super_rigid_bodies),
1552  self._l2s(self.chain_of_super_rigid_bodies)])+'|'
1553  return a
1554 
1555 
1557  '''Extends the functionality of IMP.atom.Molecule'''
1558 
1559  def __init__(self,hierarchy):
1560  IMP.atom.Molecule.__init__(self,hierarchy)
1561 
1562  def get_state_index(self):
1563  state = self.get_parent()
1564  return IMP.atom.State(state).get_state_index()
1565 
1566  def get_copy_index(self):
1567  return IMP.atom.Copy(self).get_copy_index()
1568 
1569  def get_extended_name(self):
1570  return self.get_name()+"."+\
1571  str(self.get_copy_index())+\
1572  "."+str(self.get_state_index())
1573 
1574  def get_sequence(self):
1575  return IMP.atom.Chain(self).get_sequence()
1576 
1577  def get_residue_indexes(self):
1579 
1580  def get_residue_segments(self):
1581  return IMP.pmi.tools.Segments(self.get_residue_indexes())
1582 
1583  def get_chain_id(self):
1584  return IMP.atom.Chain(self).get_id()
1585 
1586  def __repr__(self):
1587  s='PMIMoleculeHierarchy '
1588  s+=self.get_name()
1589  s+=" "+"Copy "+str(IMP.atom.Copy(self).get_copy_index())
1590  s+=" "+"State "+str(self.get_state_index())
1591  s+=" "+"N residues "+str(len(self.get_sequence()))
1592  return s
def build
call build on all molecules (automatically makes clones)
Add mass to a particle.
Definition: Mass.h:23
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: Residue.h:155
def select_at_all_resolutions
Perform selection using the usual keywords but return ALL resolutions (BEADS and GAUSSIANS).
Definition: tools.py:1482
Hierarchy get_parent() const
Get the parent particle.
def get_atomic_residues
Return a set of TempResidues that have associated structure coordinates.
A decorator to associate a particle with a part of a protein/DNA/RNA.
Definition: Fragment.h:20
Extends the functionality of IMP.atom.Molecule.
def get_residues
Return all modeled TempResidues as a set.
std::string get_unique_name(std::string templ)
Return a unique name produced from the string.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: atom/Atom.h:241
static Atom setup_particle(Model *m, ParticleIndex pi, Atom other)
Definition: atom/Atom.h:242
def build
Build all states.
def __init__
read a fasta file and extract all the requested sequences
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
def __init__
read a pdb file and returns all sequences for each contiguous fragment
def fasta_pdb_alignments
This function computes and prints the alignment between the fasta file and the pdb sequence...
def get_ideal_helices
Returns list of OrderedSets with requested ideal helices.
Miscellaneous utilities.
Definition: tools.py:1
def get_chains_of_super_rigid_bodies
Return list of lists of chains of super rigid bodies (as domain name)
def __init__
The user should not call this directly; instead call State::create_molecule()
void handle_use_deprecated(std::string message)
def residue_range
get residue range from a to b, inclusive.
def get_molecule
Access a molecule by name and copy number.
def __init__
Define a new state.
def add_representation
Set the representation for some residues.
static State setup_particle(Model *m, ParticleIndex pi, unsigned int index)
Definition: State.h:39
This class stores integers in ordered compact lists eg: [[1,2,3],[6,7,8]] the methods help splitting ...
Definition: tools.py:994
The type of an atom.
def create_molecule
Create a new Molecule within this State.
def build
Create all parts of the IMP hierarchy including Atoms, Residues, and Fragments/Representations and...
static Residue setup_particle(Model *m, ParticleIndex pi, ResidueType t, int index, int insertion_code)
Definition: Residue.h:157
char get_one_letter_code(ResidueType c)
Get the 1-letter amino acid code from the residue type.
def get_non_atomic_residues
Return a set of TempResidues that don't have associated structure coordinates.
def get_name
Return this Molecule name.
def get_hierarchy
Return the IMP Hierarchy corresponding to this Molecule.
def get_components
Return list of ComponentTopologies for selected components.
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:72
Stores a named protein chain.
Warning related to handling of structures.
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Fragment.h:46
A decorator for keeping track of copies of a molecule.
Definition: Copy.h:28
Select all non-alternative ATOM records.
Definition: pdb.h:64
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def set_fasta_dir
Change the FASTA dir.
The standard decorator for manipulating molecular structures.
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
Data_structure for reading and storing sequence data from pdb.
A decorator for a particle representing an atom.
Definition: atom/Atom.h:234
std::string get_relative_path(std::string base, std::string relative)
Return a path to a file relative to another file.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
def create_clone
Create a Molecule clone (automatically builds same structure and representation)
def add_structure
Read a structure and store the coordinates.
int get_state_index(Hierarchy h)
Walk up the hierarchy to find the current state.
def add_protocol_output
Capture details of the modeling protocol.
def get_molecules
Return a dictionary where key is molecule name and value are the list of all copies of that molecule ...
static Copy setup_particle(Model *m, ParticleIndex pi, Int number)
Definition: Copy.h:42
def read
Read system components from topology file.
def get_state
Return the State containing this Molecule.
A decorator for a residue.
Definition: Residue.h:134
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Automatically setup Sytem and Degrees of Freedom with a formatted text file.
The general base class for IMP exceptions.
Definition: exception.h:49
def get_rigid_bodies
Return list of lists of rigid bodies (as domain name)
Associate an integer "state" index with a hierarchy node.
Definition: State.h:27
VectorD< 3 > Vector3D
Definition: VectorD.h:395
Residue get_residue(Atom d, bool nothrow=false)
Return the Residue containing this atom.
Class to handle individual particles of a Model object.
Definition: Particle.h:41
Stores a list of Molecules all with the same State index.
def get_represented
Return set of TempResidues that have representation.
Store info for a chain of a protein.
Definition: Chain.h:61
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
Python classes to represent, score, sample and analyze models.
A dictionary-like wrapper for reading and storing sequence data.
def create_copy
Create a new Molecule with the same name and sequence but a higher copy number.
Functionality for loading, creating, manipulating and scoring atomic structures.
std::string get_chain_id(Hierarchy h)
Walk up the hierarchy to determine the chain id.
def get_particles_at_all_resolutions
Helpful utility for getting particles at all resolutions from this molecule.
static Chain setup_particle(Model *m, ParticleIndex pi, std::string id)
Definition: Chain.h:81
A decorator for a molecule.
Definition: Molecule.h:24
Select hierarchy particles identified by the biological name.
Definition: Selection.h:66
def get_number_of_states
Returns the total number of states generated.
def get_super_rigid_bodies
Return list of lists of super rigid bodies (as domain name)
def get_residue_indexes
Retrieve the residue indexes for the given particle.
Definition: tools.py:882
Temporarily stores residue information, even without structure available.
def create_state
Makes and returns a new IMP.pmi.topology.State in this system.
Store objects in order they were added, but with default type.
Definition: tools.py:1223