IMP logo
IMP Reference Guide  2.12.0
The Integrative Modeling Platform
pmi/topology/__init__.py
1 """@namespace IMP.pmi.topology
2  Set of Python classes to create a multi-state, multi-resolution IMP hierarchy.
3 * Start by creating a System with `model = IMP.Model(); s = IMP.pmi.topology.System(model)`. The System will store all the states.
4 * Then call System.create_state(). You can easily create a multistate system by calling this function multiple times.
5 * For each State, call State.create_molecule() to add a Molecule (a uniquely named polymer). This function returns the Molecule object which can be passed to various PMI functions.
6 * Some useful functions to help you set up your Molecules:
7  * Access the sequence residues with slicing (Molecule[a:b]) or functions like Molecule.get_atomic_residues() and Molecule.get_non_atomic_residues(). These functions all return Python sets for easy set arithmetic using & (and), | (or), - (difference)
8  * Molecule.add_structure() to add structural information from a PDB file.
9  * Molecule.add_representation() to create a representation unit - here you can choose bead resolutions as well as alternate representations like densities or ideal helices.
10  * Molecule.create_clone() lets you set up a molecule with identical representations, just a different chain ID. Use Molecule.create_copy() if you want a molecule with the same sequence but that allows custom representations.
11 * Once data has been added and representations chosen, call System.build() to create a canonical IMP hierarchy.
12 * Following hierarchy construction, setup rigid bodies, flexible beads, etc in IMP::pmi::dof.
13 * Check your representation with a nice printout: IMP::atom::show_with_representation()
14 See a [comprehensive example](https://integrativemodeling.org/nightly/doc/ref/pmi_2multiscale_8py-example.html) for using these classes.
15 
16 Alternatively one can construct the entire topology and degrees of freedom via formatted text file with TopologyReader and IMP::pmi::macros::BuildSystem(). This is used in the [PMI tutorial](@ref rnapolii_stalk).
17 Note that this only allows a limited set of the full options available to PMI users (rigid bodies only, fixed resolutions).
18 """
19 
20 from __future__ import print_function
21 import IMP
22 import IMP.atom
23 import IMP.algebra
24 import IMP.pmi
25 import IMP.pmi.tools
26 import csv
27 import os
28 from collections import defaultdict, namedtuple
29 from . import system_tools
30 from bisect import bisect_left
31 from math import pi,cos,sin
32 from operator import itemgetter
33 import weakref
34 import warnings
35 
36 def _build_ideal_helix(model, residues, coord_finder):
37  """Creates an ideal helix from the specified residue range
38  Residues MUST be contiguous.
39  This function actually adds them to the TempResidue hierarchy
40  """
41  created_hiers = []
42 
43  # this function creates a CAlpha helix structure (which can be used for coarsening)
44  for n, tempres in enumerate(residues):
45  if tempres.get_has_structure():
46  raise ValueError("You tried to build ideal_helix for a residue "
47  "that already has structure: %s" % tempres)
48  if n > 0 and tempres.get_index() != prev_idx + 1:
49  raise ValueError("Passed non-contiguous segment to "
50  "build_ideal_helix for %s" % tempres.get_molecule())
51 
52  # New residue particle will replace the TempResidue's existing (empty) hierarchy
53  rp = IMP.Particle(model)
54  rp.set_name("Residue_%i" % tempres.get_index())
55 
56  # Copy the original residue type and index
57  this_res = IMP.atom.Residue.setup_particle(rp,tempres.get_hierarchy())
58 
59  # Create the CAlpha
60  ap = IMP.Particle(model)
62  x = 2.3 * cos(n * 2 * pi / 3.6)
63  y = 2.3 * sin(n * 2 * pi / 3.6)
64  z = 6.2 / 3.6 / 2 * n * 2 * pi / 3.6
65  d.set_coordinates(IMP.algebra.Vector3D(x, y, z))
66  d.set_radius(1.0)
67  a = IMP.atom.Atom.setup_particle(ap, IMP.atom.AT_CA) # Decorating as Atom also decorates as Mass
68  IMP.atom.Mass(ap).set_mass(110.0)
69  this_res.add_child(a)
70 
71  # Add this structure to the TempResidue
72  tempres.set_structure(this_res)
73  created_hiers.append(this_res)
74  prev_idx = tempres.get_index()
75  coord_finder.add_residues(created_hiers) #the coord finder is for placing beads (later)
76 
77 class _SystemBase(object):
78  """The base class for System, State and Molecule
79  classes. It contains shared functions in common to these classes
80  """
81 
82  def __init__(self,model=None):
83  if model is None:
84  self.model=IMP.Model()
85  else:
86  self.model=model
87 
88  def _create_hierarchy(self):
89  """create a new hierarchy"""
90  tmp_part=IMP.Particle(self.model)
91  return IMP.atom.Hierarchy.setup_particle(tmp_part)
92 
93  def _create_child(self,parent_hierarchy):
94  """create a new hierarchy, set it as child of the input
95  one, and return it"""
96  child_hierarchy=self._create_hierarchy()
97  parent_hierarchy.add_child(child_hierarchy)
98  return child_hierarchy
99 
100  def build(self):
101  """Build the coordinates of the system.
102  Loop through stored(?) hierarchies and set up coordinates!"""
103  pass
104 
105 #------------------------
106 
107 class System(_SystemBase):
108  """This class initializes the root node of the global IMP.atom.Hierarchy."""
109 
110  _all_systems = set()
111 
112  def __init__(self,model=None,name="System"):
113  _SystemBase.__init__(self,model)
114  self._number_of_states = 0
115  self._protocol_output = []
116  self.states = []
117  self.built=False
118 
119  System._all_systems.add(weakref.ref(self))
120 
121  # the root hierarchy node
122  self.hier=self._create_hierarchy()
123  self.hier.set_name(name)
124  self.hier._pmi2_system = weakref.ref(self)
125 
126  def __del__(self):
127  System._all_systems = set(x for x in System._all_systems
128  if x() not in (None, self))
129 
130  def get_states(self):
131  return self.states
132 
133  def create_state(self):
134  """Makes and returns a new IMP.pmi.topology.State in this system"""
135  self._number_of_states+=1
136  state = State(self,self._number_of_states-1)
137  self.states.append(state)
138  return state
139 
140  def __repr__(self):
141  return self.hier.get_name()
142 
144  """Returns the total number of states generated"""
145  return self._number_of_states
146 
147  def get_hierarchy(self):
148  return self.hier
149 
150  def build(self,**kwargs):
151  """Build all states"""
152  if not self.built:
153  for state in self.states:
154  state.build(**kwargs)
155  self.built=True
156  return self.hier
157 
158  def add_protocol_output(self, p):
159  """Capture details of the modeling protocol.
160  @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
161  """
162  self._protocol_output.append(p)
163 # p._each_metadata.append(self._metadata)
164 # p._file_datasets.append(self._file_dataset)
165  for state in self.states:
166  state._add_protocol_output(p, self)
167 
168 
169 #------------------------
170 
171 class State(_SystemBase):
172  """Stores a list of Molecules all with the same State index.
173  Also stores number of copies of each Molecule for easy Selection.
174  """
175  def __init__(self,system,state_index):
176  """Define a new state
177  @param system the PMI System
178  @param state_index the index of the new state
179  @note It's expected that you will not use this constructor directly,
180  but rather create it with pmi::System::create_state()
181  """
182  self.model = system.get_hierarchy().get_model()
183  self.system = system
184  self.hier = self._create_child(system.get_hierarchy())
185  self.short_name = self.long_name = "State_"+str(state_index)
186  self.hier.set_name(self.short_name)
187  self.molecules = IMP.pmi.tools.OrderedDict() # key is molecule name. value are the molecule copies!
188  IMP.atom.State.setup_particle(self.hier,state_index)
189  self.built = False
190  self._protocol_output = []
191  for p in system._protocol_output:
192  self._add_protocol_output(p, system)
193 
194  def __repr__(self):
195  return self.system.__repr__()+'.'+self.hier.get_name()
196 
197  def _add_protocol_output(self, p, system):
198  state = p._add_state(self)
199  self._protocol_output.append((p, state))
200  state.model = system.model
201  state.prot = self.hier
202 
203  def get_molecules(self):
204  """Return a dictionary where key is molecule name and value
205  are the list of all copies of that molecule in setup order"""
206  return self.molecules
207 
208  def get_molecule(self, name, copy_num=0):
209  """Access a molecule by name and copy number
210  @param name The molecule name used during setup
211  @param copy_num The copy number based on input order.
212  Default: 0. Set to 'all' to get all copies
213  """
214  if name not in self.molecules:
215  raise KeyError("Could not find molname %s" % name)
216  if copy_num == 'all':
217  return self.molecules[name]
218  else:
219  return self.molecules[name][copy_num]
220 
221  def create_molecule(self, name, sequence='', chain_id='', is_nucleic=None):
222  """Create a new Molecule within this State
223  @param name the name of the molecule (string);
224  it must not be already used
225  @param sequence sequence (string)
226  @param chain_id Chain ID to assign to this molecule
227  """
228  # check whether the molecule name is already assigned
229  if name in self.molecules:
230  raise ValueError('Cannot use a molecule name already used')
231 
232  mol = Molecule(self,name,sequence,chain_id,copy_num=0,is_nucleic=is_nucleic)
233  self.molecules[name] = [mol]
234  return mol
235 
236  def get_hierarchy(self):
237  return self.hier
238 
239  def get_number_of_copies(self,molname):
240  return len(self.molecules[molname])
241 
242  def _register_copy(self,molecule):
243  molname = molecule.get_hierarchy().get_name()
244  self.molecules[molname].append(molecule)
245 
246  def build(self,**kwargs):
247  """call build on all molecules (automatically makes clones)"""
248  if not self.built:
249  for molname in self.molecules:
250  for mol in reversed(self.molecules[molname]):
251  mol.build(**kwargs)
252  self.built=True
253  return self.hier
254 
255 
256 # Track residues read from PDB files
257 _PDBElement = namedtuple('PDBElement', ['offset', 'filename', 'chain_id'])
258 
259 class _RepresentationHandler(object):
260  """Handle PMI representation and use it to populate that of any attached
261  ProtocolOutput objects"""
262  def __init__(self, name, pos, pdb_elements):
263  self.name = name
264  self.pos = pos
265  self.last_index = None
266  self.last_pdb_index = None
267  self.pdb_for_residue = {}
268  for residues, pdb in pdb_elements:
269  for r in residues:
270  self.pdb_for_residue[r.get_index()] = pdb
271 
272  def _get_pdb(self, h):
273  """Return a PDBElement if the given hierarchy was read from a
274  PDB file"""
276  rind = IMP.atom.Residue(h).get_index()
277  return self.pdb_for_residue.get(rind, None)
278 
279  def __call__(self, res):
280  """Handle a single residue"""
281  if len(self.pos) == 0:
282  return
283  h = res.hier
284  pi = h.get_particle_index()
285  # Do nothing if we already saw this hierarchy
286  if self.last_index is None or pi != self.last_index:
287  pdb = self._get_pdb(h)
288  self.last_index = pi
289  if pdb:
290  assert(IMP.atom.Fragment.get_is_setup(h.get_parent()))
291  frag = IMP.atom.Fragment(h.get_parent())
292  fragi = frag.get_particle_index()
293  # Do nothing if we already saw this PDB fragment
294  if self.last_pdb_index is not None \
295  and self.last_pdb_index == fragi:
296  return
297  self.last_pdb_index = fragi
298  indices = frag.get_residue_indexes()
299  for p, state in self.pos:
300  p.add_pdb_element(state, self.name,
301  indices[0], indices[-1], pdb.offset,
302  pdb.filename, pdb.chain_id, frag)
304  frag = IMP.atom.Fragment(h)
305  indices = frag.get_residue_indexes()
306  for p, state in self.pos:
307  p.add_bead_element(state, self.name,
308  indices[0], indices[-1], 1, h)
310  resind = IMP.atom.Residue(h).get_index()
311  for p, state in self.pos:
312  p.add_bead_element(state, self.name, resind, resind, 1, h)
313  else:
314  raise TypeError("Unhandled hierarchy %s" % str(h))
315 
316 
317 #------------------------
318 
319 class Molecule(_SystemBase):
320  """Stores a named protein chain.
321  This class is constructed from within the State class.
322  It wraps an IMP.atom.Molecule and IMP.atom.Copy.
323  Structure is read using this class.
324  Resolutions and copies can be registered, but are only created
325  when build() is called
326  """
327 
328  def __init__(self,state,name,sequence,chain_id,copy_num,mol_to_clone=None,is_nucleic=None):
329  """The user should not call this directly; instead call State::create_molecule()
330  @param state The parent PMI State
331  @param name The name of the molecule (string)
332  @param sequence Sequence (string)
333  @param chain_id The chain of this molecule
334  @param copy_num Store the copy number
335  @param mol_to_clone The original molecule (for cloning ONLY)
336  @note It's expected that you will not use this constructor directly,
337  but rather create a Molecule with pmi::State::create_molecule()
338  """
339  # internal data storage
340  self.model = state.get_hierarchy().get_model()
341  self.state = state
342  self.sequence = sequence
343  self.built = False
344  self.mol_to_clone = mol_to_clone
345  self.is_nucleic=is_nucleic
346  self.representations = [] # list of stuff to build
347  self._pdb_elements = []
348  self._represented = IMP.pmi.tools.OrderedSet() # residues with representation
349  self.coord_finder = _FindCloseStructure() # helps you place beads by storing structure
350  self._ideal_helices = [] # list of OrderedSets of tempresidues set to ideal helix
351 
352  # create root node and set it as child to passed parent hierarchy
353  self.hier = self._create_child(self.state.get_hierarchy())
354  self.hier.set_name(name)
355  IMP.atom.Copy.setup_particle(self.hier,copy_num)
356  self._name_with_copy = "%s.%d" % (name, copy_num)
357  # store the sequence
358  self.chain=IMP.atom.Chain.setup_particle(self.hier,chain_id)
359  self.chain.set_sequence(self.sequence)
360  # create TempResidues from the sequence (if passed)
361  self.residues=[]
362  for ns,s in enumerate(sequence):
363  r = TempResidue(self,s,ns+1,ns,is_nucleic)
364  self.residues.append(r)
365 
366  def __repr__(self):
367  return self.state.__repr__()+'.'+self.get_name()+'.'+ \
368  str(IMP.atom.Copy(self.hier).get_copy_index())
369 
370  def __getitem__(self,val):
371  if isinstance(val,int):
372  return self.residues[val]
373  elif isinstance(val,str):
374  return self.residues[int(val)-1]
375  elif isinstance(val,slice):
376  return IMP.pmi.tools.OrderedSet(self.residues[val])
377  else:
378  raise TypeError("Indexes must be int or str")
379 
380  def get_hierarchy(self):
381  """Return the IMP Hierarchy corresponding to this Molecule"""
382  return self.hier
383 
384  def get_name(self):
385  """Return this Molecule name"""
386  return self.hier.get_name()
387 
388  def get_state(self):
389  """Return the State containing this Molecule"""
390  return self.state
391 
392  def get_ideal_helices(self):
393  """Returns list of OrderedSets with requested ideal helices"""
394  return self._ideal_helices
395 
396  def residue_range(self,a,b,stride=1):
397  """get residue range from a to b, inclusive.
398  Use integers to get 0-indexing, or strings to get PDB-indexing"""
399  if isinstance(a,int) and isinstance(b,int) and isinstance(stride,int):
400  return IMP.pmi.tools.OrderedSet(self.residues[a:b+1:stride])
401  elif isinstance(a,str) and isinstance(b,str) and isinstance(stride,int):
402  return IMP.pmi.tools.OrderedSet(self.residues[int(a)-1:int(b):stride])
403  else:
404  raise TypeError("Range ends must be int or str. "
405  "Stride must be int.")
406 
407  def get_residues(self):
408  """ Return all modeled TempResidues as a set"""
409  all_res = IMP.pmi.tools.OrderedSet(self.residues)
410  return all_res
411 
412  def get_represented(self):
413  """ Return set of TempResidues that have representation"""
414  return self._represented
415 
417  """ Return a set of TempResidues that have associated structure coordinates """
418  atomic_res = IMP.pmi.tools.OrderedSet()
419  for res in self.residues:
420  if res.get_has_structure():
421  atomic_res.add(res)
422  return atomic_res
423 
425  """ Return a set of TempResidues that don't have associated structure coordinates """
426  non_atomic_res = IMP.pmi.tools.OrderedSet()
427  for res in self.residues:
428  if not res.get_has_structure():
429  non_atomic_res.add(res)
430  return non_atomic_res
431 
432  def create_copy(self,chain_id):
433  """Create a new Molecule with the same name and sequence but a higher copy number.
434  Returns the Molecule. No structure or representation will be copied!
435  @param chain_id Chain ID of the new molecule
436  """
437  mol = Molecule(self.state,self.get_name(),self.sequence,chain_id,
438  copy_num=self.state.get_number_of_copies(self.get_name()))
439  self.state._register_copy(mol)
440  return mol
441 
442  def create_clone(self,chain_id):
443  """Create a Molecule clone (automatically builds same structure and representation)
444  @param chain_id If you want to set the chain ID of the copy to something
445  @note You cannot add structure or representations to a clone!
446  """
447  mol = Molecule(self.state,self.get_name(),self.sequence,chain_id,
448  copy_num=self.state.get_number_of_copies(self.get_name()),
449  mol_to_clone=self)
450  self.state._register_copy(mol)
451  return mol
452 
453  def add_structure(self,pdb_fn,chain_id,res_range=[],
454  offset=0,model_num=None,ca_only=False,
455  soft_check=False):
456  """Read a structure and store the coordinates.
457  @eturn the atomic residues (as a set)
458  @param pdb_fn The file to read
459  @param chain_id Chain ID to read
460  @param res_range Add only a specific set of residues from the PDB file.
461  res_range[0] is the starting and res_range[1] is the
462  ending residue index.
463  @param offset Apply an offset to the residue indexes of the PDB
464  file. This number is added to the PDB sequence.
465  @param model_num Read multi-model PDB and return that model
466  @param ca_only Only read the CA positions from the PDB file
467  @param soft_check If True, it only warns if there are sequence
468  mismatches between the PDB and the Molecule (FASTA)
469  sequence, and uses the sequence from the PDB.
470  If False (Default), it raises an error when there
471  are sequence mismatches.
472  @note If you are adding structure without a FASTA file, set soft_check
473  to True.
474  """
475  if self.mol_to_clone is not None:
476  raise ValueError('You cannot call add_structure() for a clone')
477 
478  self.pdb_fn = pdb_fn
479 
480  # get IMP.atom.Residues from the pdb file
481  rhs = system_tools.get_structure(self.model,pdb_fn,chain_id,res_range,offset,ca_only=ca_only)
482  self.coord_finder.add_residues(rhs)
483 
484  if len(self.residues)==0:
485  warnings.warn(
486  "Substituting PDB residue type with FASTA residue type. "
487  "Potentially dangerous.", IMP.pmi.StructureWarning)
488 
489  # Store info for ProtocolOutput usage later
490  self._pdb_elements.append((rhs,
491  _PDBElement(offset=offset, filename=pdb_fn, chain_id=chain_id)))
492 
493  # load those into TempResidue object
494  atomic_res = IMP.pmi.tools.OrderedSet() # collect integer indexes of atomic residues to return
495  for nrh,rh in enumerate(rhs):
496  pdb_idx = rh.get_index()
497  raw_idx = pdb_idx - 1
498 
499  # add ALA to fill in gaps
500  while len(self.residues)<pdb_idx:
501  r = TempResidue(self,'A',len(self.residues)+1,len(self.residues))
502  self.residues.append(r)
503  self.sequence += 'A'
504 
505  internal_res = self.residues[raw_idx]
506  if len(self.sequence)<raw_idx:
507  self.sequence += IMP.atom.get_one_letter_code(rh.get_residue_type())
508  internal_res.set_structure(rh,soft_check)
509  atomic_res.add(internal_res)
510 
511  self.chain.set_sequence(self.sequence)
512  return atomic_res
513 
515  residues=None,
516  resolutions=[],
517  bead_extra_breaks=[],
518  bead_ca_centers=True,
519  bead_default_coord=[0,0,0],
520  density_residues_per_component=None,
521  density_prefix=None,
522  density_force_compute=False,
523  density_voxel_size=1.0,
524  setup_particles_as_densities=False,
525  ideal_helix=False,
526  color=None):
527  """Set the representation for some residues. Some options (beads, ideal helix)
528  operate along the backbone. Others (density options) are volumetric.
529  Some of these you can combine e.g., beads+densities or helix+densities
530  See @ref pmi_resolution
531  @param residues Set of PMI TempResidues for adding the representation.
532  Can use Molecule slicing to get these, e.g. mol[a:b]+mol[c:d]
533  If None, will select all residues for this Molecule.
534  @param resolutions Resolutions for beads representations.
535  If structured, will average along backbone, breaking at sequence breaks.
536  If unstructured, will just create beads.
537  Pass an integer or list of integers
538  @param bead_extra_breaks Additional breakpoints for splitting beads.
539  The value can be the 0-ordered position, after which it'll insert the break.
540  Alternatively pass PDB-style (1-ordered) indices as a string.
541  I.e., bead_extra_breaks=[5,25] is the same as ['6','26']
542  @param bead_ca_centers Set to True if you want the resolution=1 beads to be at CA centers
543  (otherwise will average atoms to get center). Defaults to True.
544  @param bead_default_coord Advanced feature. Normally beads are placed at the nearest structure.
545  If no structure provided (like an all bead molecule), the beads go here.
546  @param density_residues_per_component Create density (Gaussian Mixture Model)
547  for these residues. Must also supply density_prefix
548  @param density_prefix Prefix (assuming '.txt') to read components from or write to.
549  If exists, will read unless you set density_force_compute=True.
550  Will also write map (prefix+'.mrc').
551  Must also supply density_residues_per_component.
552  @param density_force_compute Set true to force overwrite density file.
553  @param density_voxel_size Advanced feature. Set larger if densities taking too long to rasterize.
554  Set to 0 if you don't want to create the MRC file
555  @param setup_particles_as_densities Set to True if you want each particle to be its own density.
556  Useful for all-atom models or flexible beads.
557  Mutually exclusive with density_ options
558  @param ideal_helix Create idealized helix structures for these residues at resolution 1.
559  Any other resolutions passed will be coarsened from there.
560  Resolution 0 will not work, you may have to use MODELLER to do that (for now).
561  @param color the color applied to the hierarchies generated.
562  Format options: tuple (r,g,b) with values 0 to 1;
563  float (from 0 to 1, a map from Blue to Green to Red);
564  a [Chimera name](https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/colortables.html);
565  a hex RGB string (e.g. "0xff0000");
566  an IMP.display.Color object
567  @note You cannot call add_representation multiple times for the
568  same residues.
569  """
570 
571  # can't customize clones
572  if self.mol_to_clone is not None:
573  raise ValueError('You cannot call add_representation() for a clone.'
574  ' Maybe use a copy instead.')
575 
576  # format input
577  if residues is None:
578  res = IMP.pmi.tools.OrderedSet(self.residues)
579  elif residues==self:
580  res = IMP.pmi.tools.OrderedSet(self.residues)
581  elif type(residues) is IMP.pmi.topology.TempResidue:
582  res = IMP.pmi.tools.OrderedSet([residues])
583  elif hasattr(residues,'__iter__'):
584  if len(residues)==0:
585  raise Exception('You passed an empty set to add_representation')
586  if type(residues) is IMP.pmi.tools.OrderedSet and type(next(iter(residues))) is TempResidue:
587  res = residues
588  elif type(residues) is set and type(next(iter(residues))) is TempResidue:
589  res = IMP.pmi.tools.OrderedSet(residues)
590  elif type(residues) is list and type(residues[0]) is TempResidue:
591  res = IMP.pmi.tools.OrderedSet(residues)
592  else:
593  raise Exception("You passed an iteratible of something other than TempResidue",res)
594  else:
595  raise Exception("add_representation: you must pass a set of residues or nothing(=all residues)")
596 
597  # check that each residue has not been represented yet
598  ov = res & self._represented
599  if ov:
600  raise Exception('You have already added representation for '+self.get_hierarchy().get_name()+': '+ov.__repr__())
601  self._represented|=res
602 
603  # check you aren't creating multiple resolutions without structure
604  if not hasattr(resolutions,'__iter__'):
605  if type(resolutions) is int:
606  resolutions = [resolutions]
607  else:
608  raise Exception("you tried to pass resolutions that are not int or list-of-int")
609  if len(resolutions)>1 and not ideal_helix:
610  for r in res:
611  if not r.get_has_structure():
612  raise Exception('You are creating multiple resolutions for '
613  'unstructured regions. This will have unexpected results.')
614 
615  # check density info is consistent
616  if density_residues_per_component or density_prefix:
617  if not density_residues_per_component and density_prefix:
618  raise Exception('If requesting density, must provide '
619  'density_residues_per_component AND density_prefix')
620  if density_residues_per_component and setup_particles_as_densities:
621  raise Exception('Cannot create both volumetric density '
622  '(density_residues_per_component) AND '
623  'individual densities (setup_particles_as_densities) '
624  'in the same representation')
625  if len(resolutions)>1 and setup_particles_as_densities:
626  raise Exception('You have multiple bead resolutions but are attempting to '
627  'set them all up as individual Densities. '
628  'This could have unexpected results.')
629 
630  # check helix not accompanied by other resolutions (densities OK though!)
631  if ideal_helix:
632  if 0 in resolutions:
633  raise Exception("For ideal helices, cannot build resolution 0: "
634  "you have to do that in MODELLER")
635  if 1 not in resolutions:
636  resolutions = [1] + list(resolutions)
637  self._ideal_helices.append(res)
638 
639  # check residues are all part of this molecule:
640  for r in res:
641  if r.get_molecule()!=self:
642  raise Exception('You are adding residues from a different molecule to',self.__repr__())
643 
644  # unify formatting for extra breaks
645  breaks = []
646  for b in bead_extra_breaks:
647  if type(b)==str:
648  breaks.append(int(b)-1)
649  else:
650  breaks.append(b)
651  # store the representation group
652  self.representations.append(_Representation(res,
653  resolutions,
654  breaks,
655  bead_ca_centers,
656  bead_default_coord,
657  density_residues_per_component,
658  density_prefix,
659  density_force_compute,
660  density_voxel_size,
661  setup_particles_as_densities,
662  ideal_helix,
663  color))
664 
665  def _all_protocol_output(self):
666  return self.state._protocol_output
667 
668  def build(self):
669  """Create all parts of the IMP hierarchy
670  including Atoms, Residues, and Fragments/Representations and,
671  finally, Copies.
672  Will only build requested representations.
673  @note Any residues assigned a resolution must have an IMP.atom.Residue
674  hierarchy containing at least a CAlpha. For missing residues,
675  these can be constructed from the PDB file.
676  """
677  if not self.built:
678  # Add molecule name and sequence to any ProtocolOutput objects
679  name=self.hier.get_name()
680  for po, state in self._all_protocol_output():
681  po.create_component(state, name, True,
682  asym_name=self._name_with_copy)
683  po.add_component_sequence(state, name, self.sequence,
684  asym_name=self._name_with_copy)
685  # if requested, clone structure and representations BEFORE building original
686  if self.mol_to_clone is not None:
687  for nr,r in enumerate(self.mol_to_clone.residues):
688  if r.get_has_structure():
689  clone = IMP.atom.create_clone(r.get_hierarchy())
690  self.residues[nr].set_structure(
691  IMP.atom.Residue(clone),soft_check=True)
692  for old_rep in self.mol_to_clone.representations:
693  new_res = IMP.pmi.tools.OrderedSet()
694  for r in old_rep.residues:
695  new_res.add(self.residues[r.get_internal_index()])
696  self._represented.add(self.residues[r.get_internal_index()])
697  new_rep = _Representation(new_res,
698  old_rep.bead_resolutions,
699  old_rep.bead_extra_breaks,
700  old_rep.bead_ca_centers,
701  old_rep.bead_default_coord,
702  old_rep.density_residues_per_component,
703  old_rep.density_prefix,
704  False,
705  old_rep.density_voxel_size,
706  old_rep.setup_particles_as_densities,
707  old_rep.ideal_helix,
708  old_rep.color)
709  self.representations.append(new_rep)
710  self.coord_finder = self.mol_to_clone.coord_finder
711 
712  # give a warning for all residues that don't have representation
713  no_rep = [r for r in self.residues if r not in self._represented]
714  if len(no_rep)>0:
715  warnings.warn(
716  'Residues without representation in molecule %s: %s'
717  % (self.get_name(), system_tools.resnums2str(no_rep)),
719 
720  # first build any ideal helices (fills in structure for the TempResidues)
721  for rep in self.representations:
722  if rep.ideal_helix:
723  _build_ideal_helix(self.model,rep.residues,self.coord_finder)
724 
725  # build all the representations
726  built_reps = []
727 
728  rephandler = _RepresentationHandler(self._name_with_copy,
729  list(self._all_protocol_output()),
730  self._pdb_elements)
731 
732  for rep in self.representations:
733  built_reps += system_tools.build_representation(
734  self, rep, self.coord_finder, rephandler)
735 
736  # sort them before adding as children
737  built_reps.sort(key=lambda r: IMP.atom.Fragment(r).get_residue_indexes()[0])
738  for br in built_reps:
739  self.hier.add_child(br)
740  br.update_parents()
741  self.built = True
742 
743  for res in self.residues:
744  idx = res.get_index()
745 
746  # first off, store the highest resolution available in residue.hier
747  new_ps = IMP.atom.Selection(
748  self.hier,
749  residue_index=res.get_index(),
750  resolution=1).get_selected_particles()
751  if len(new_ps)>0:
752  new_p = new_ps[0]
753  if IMP.atom.Atom.get_is_setup(new_p):
754  # if only found atomic, store the residue
755  new_hier = IMP.atom.get_residue(IMP.atom.Atom(new_p))
756  else:
757  # otherwise just store what you found
758  new_hier = IMP.atom.Hierarchy(new_p)
759  res.hier = new_hier
760  rephandler(res)
761  else:
762  res.hier = None
763  self._represented = IMP.pmi.tools.OrderedSet([a for a in self._represented])
764  print('done building',self.get_hierarchy())
765  return self.hier
766 
767  def get_particles_at_all_resolutions(self,residue_indexes=None):
768  """Helpful utility for getting particles at all resolutions from this molecule.
769  Can optionally pass a set of residue indexes"""
770  if not self.built:
771  raise Exception("Cannot get all resolutions until you build the Molecule")
772  if residue_indexes is None:
773  residue_indexes = [r.get_index() for r in self.get_residues()]
775  residue_indexes=residue_indexes)
776  return ps
777 
778 #------------------------
779 
780 class _Representation(object):
781  """Private class just to store a representation request"""
782  def __init__(self,
783  residues,
784  bead_resolutions,
785  bead_extra_breaks,
786  bead_ca_centers,
787  bead_default_coord,
788  density_residues_per_component,
789  density_prefix,
790  density_force_compute,
791  density_voxel_size,
792  setup_particles_as_densities,
793  ideal_helix,
794  color):
795  self.residues = residues
796  self.bead_resolutions = bead_resolutions
797  self.bead_extra_breaks = bead_extra_breaks
798  self.bead_ca_centers = bead_ca_centers
799  self.bead_default_coord = bead_default_coord
800  self.density_residues_per_component = density_residues_per_component
801  self.density_prefix = density_prefix
802  self.density_force_compute = density_force_compute
803  self.density_voxel_size = density_voxel_size
804  self.setup_particles_as_densities = setup_particles_as_densities
805  self.ideal_helix = ideal_helix
806  self.color = color
807 
808 class _FindCloseStructure(object):
809  """Utility to get the nearest observed coordinate"""
810  def __init__(self):
811  self.coords=[]
812  def add_residues(self,residues):
813  for r in residues:
814  idx = IMP.atom.Residue(r).get_index()
815  ca = IMP.atom.Selection(r,atom_type=IMP.atom.AtomType("CA")).get_selected_particles()
816  p = IMP.atom.Selection(r,atom_type=IMP.atom.AtomType("P")).get_selected_particles()
817  if len(ca)==1:
818  xyz = IMP.core.XYZ(ca[0]).get_coordinates()
819  self.coords.append([idx,xyz])
820  elif len(p)==1:
821  xyz = IMP.core.XYZ(p[0]).get_coordinates()
822  self.coords.append([idx,xyz])
823  else:
824  raise("_FindCloseStructure: wrong selection")
825 
826  self.coords.sort(key=itemgetter(0))
827  def find_nearest_coord(self,query):
828  if self.coords==[]:
829  return None
830  keys = [r[0] for r in self.coords]
831  pos = bisect_left(keys,query)
832  if pos == 0:
833  ret = self.coords[0]
834  elif pos == len(self.coords):
835  ret = self.coords[-1]
836  else:
837  before = self.coords[pos - 1]
838  after = self.coords[pos]
839  if after[0] - query < query - before[0]:
840  ret = after
841  else:
842  ret = before
843  return ret[1]
844 
845 class Sequences(object):
846  """A dictionary-like wrapper for reading and storing sequence data"""
847  def __init__(self,fasta_fn,name_map=None):
848  """read a fasta file and extract all the requested sequences
849  @param fasta_fn sequence file
850  @param name_map dictionary mapping the fasta name to final stored name
851  """
852  self.sequences = IMP.pmi.tools.OrderedDict()
853  self.read_sequences(fasta_fn,name_map)
854  def __len__(self):
855  return len(self.sequences)
856  def __contains__(self,x):
857  return x in self.sequences
858  def __getitem__(self,key):
859  if type(key) is int:
860  allseqs = list(self.sequences.keys())
861  try:
862  return self.sequences[allseqs[key]]
863  except IndexError:
864  raise IndexError("You tried to access sequence number %d "
865  "but there's only %d" % (key, len(allseqs)))
866  else:
867  return self.sequences[key]
868  def __iter__(self):
869  return self.sequences.__iter__()
870  def __repr__(self):
871  ret=''
872  for s in self.sequences:
873  ret += '%s\t%s\n'%(s,self.sequences[s])
874  return ret
875  def read_sequences(self,fasta_fn,name_map=None):
876  code = None
877  seq = None
878  with open(fasta_fn,'r') as fh:
879  for (num, line) in enumerate(fh):
880  if line.startswith('>'):
881  if seq is not None:
882  self.sequences[code] = seq.strip('*')
883  code = line.rstrip()[1:]
884  if name_map is not None:
885  try:
886  code = name_map[code]
887  except:
888  pass
889  seq = ''
890  else:
891  line = line.rstrip()
892  if line: # Skip blank lines
893  if seq is None:
894  raise Exception( \
895  "Found FASTA sequence before first header at line %d: %s" % (num + 1, line))
896  seq += line
897  if seq is not None:
898  self.sequences[code] = seq.strip('*')
899 
900 
901 class PDBSequences(object):
902  """Data_structure for reading and storing sequence data from pdb"""
903  def __init__(self,model,pdb_fn,name_map=None):
904  """read a pdb file and returns all sequences for each contiguous fragment
905  @param pdb_fn file
906  @param name_map dictionary mapping the pdb chain id to final stored name
907  """
908  self.model=model
909  # self.sequences data-structure: (two-key dictionary)
910  # it contains all contiguous fragments:
911  # chain_id, tuples indicating first and last residue, sequence
912  # example:
913  # key1, key2, value
914  # A (77, 149) VENPSLDLEQYAASYSGLMR....
915  # A (160, 505) PALDTAWVEATRKKALLKLEKLDTDLKNYKGNSIK.....
916  # B (30, 180) VDLENQYYNSKALKEDDPKAALSSFQKVLELEGEKGEWGF...
917  # B (192, 443) TQLLEIYALEIQMYTAQKNNKKLKALYEQSLHIKSAIPHPL
918  self.sequences = IMP.pmi.tools.OrderedDict()
919  self.read_sequences(pdb_fn,name_map)
920 
921  def read_sequences(self,pdb_fn,name_map):
922  read_file = IMP.atom.read_pdb
923  if pdb_fn.endswith('.cif'):
924  read_file = IMP.atom.read_mmcif
925  t = read_file(pdb_fn, self.model, IMP.atom.ATOMPDBSelector())
926  cs=IMP.atom.get_by_type(t,IMP.atom.CHAIN_TYPE)
927  for c in cs:
928  id=IMP.atom.Chain(c).get_id()
929  print(id)
930  if name_map:
931  try:
932  id=name_map[id]
933  except:
934  print("Chain ID %s not in name_map, skipping" % id)
935  continue
936  rs=IMP.atom.get_by_type(c,IMP.atom.RESIDUE_TYPE)
937  rids=[]
938  rids_olc_dict={}
939  for r in rs:
940  dr=IMP.atom.Residue(r)
941  rid=dr.get_index()
942 
943  isprotein=dr.get_is_protein()
944  isrna=dr.get_is_rna()
945  isdna=dr.get_is_dna()
946  if isprotein:
947  olc=IMP.atom.get_one_letter_code(dr.get_residue_type())
948  rids.append(rid)
949  rids_olc_dict[rid]=olc
950  elif isdna:
951  if dr.get_residue_type() == IMP.atom.DADE: olc="A"
952  if dr.get_residue_type() == IMP.atom.DURA: olc="U"
953  if dr.get_residue_type() == IMP.atom.DCYT: olc="C"
954  if dr.get_residue_type() == IMP.atom.DGUA: olc="G"
955  if dr.get_residue_type() == IMP.atom.DTHY: olc="T"
956  rids.append(rid)
957  rids_olc_dict[rid]=olc
958  elif isrna:
959  if dr.get_residue_type() == IMP.atom.ADE: olc="A"
960  if dr.get_residue_type() == IMP.atom.URA: olc="U"
961  if dr.get_residue_type() == IMP.atom.CYT: olc="C"
962  if dr.get_residue_type() == IMP.atom.GUA: olc="G"
963  if dr.get_residue_type() == IMP.atom.THY: olc="T"
964  rids.append(rid)
965  rids_olc_dict[rid]=olc
966  group_rids=self.group_indexes(rids)
967  contiguous_sequences=IMP.pmi.tools.OrderedDict()
968  for group in group_rids:
969  sequence_fragment=""
970  for i in range(group[0],group[1]+1):
971  sequence_fragment+=rids_olc_dict[i]
972  contiguous_sequences[group]=sequence_fragment
973  self.sequences[id]=contiguous_sequences
974 
975  #for c in self.sequences:
976  # for group in self.sequences[c]:
977  # print(c,group,self.sequences[c][group])
978 
979  def group_indexes(self,indexes):
980  from itertools import groupby
981  ranges = []
982  for k, g in groupby(enumerate(indexes), lambda x:x[0]-x[1]):
983  group = [x[1] for x in g]
984  ranges.append((group[0], group[-1]))
985  return ranges
986 
987 
988 def fasta_pdb_alignments(fasta_sequences,pdb_sequences,show=False):
989  '''This function computes and prints the alignment between the
990  fasta file and the pdb sequence, computes the offsets for each contiguous
991  fragment in the PDB.
992  @param fasta_sequences IMP.pmi.topology.Sequences object
993  @param pdb_sequences IMP.pmi.topology.PDBSequences object
994  @param show boolean default False, if True prints the alignments.
995  The input objects should be generated using map_name dictionaries such that fasta_id
996  and pdb_chain_id are mapping to the same protein name. It needs BioPython.
997  Returns a dictionary of offsets, organized by peptide range (group):
998  example: offsets={"ProtA":{(1,10):1,(20,30):10}}'''
999  from Bio import pairwise2
1000  from Bio.pairwise2 import format_alignment
1001  from Bio.SubsMat import MatrixInfo as matlist
1002  matrix = matlist.blosum62
1003  if type(fasta_sequences) is not IMP.pmi.topology.Sequences:
1004  raise Exception("Fasta sequences not type IMP.pmi.topology.Sequences")
1005  if type(pdb_sequences) is not IMP.pmi.topology.PDBSequences:
1006  raise Exception("pdb sequences not type IMP.pmi.topology.PDBSequences")
1007  offsets=IMP.pmi.tools.OrderedDict()
1008  for name in fasta_sequences.sequences:
1009  print(name)
1010  seq_fasta=fasta_sequences.sequences[name]
1011  if name not in pdb_sequences.sequences:
1012  print("Fasta id %s not in pdb names, aligning against every pdb chain" % name)
1013  pdbnames=pdb_sequences.sequences.keys()
1014  else:
1015  pdbnames=[name]
1016  for pdbname in pdbnames:
1017  for group in pdb_sequences.sequences[pdbname]:
1018  if group[1]-group[0]+1<7:continue
1019  seq_frag_pdb=pdb_sequences.sequences[pdbname][group]
1020  if show:
1021  print("########################")
1022  print(" ")
1023  print("protein name",pdbname)
1024  print("fasta id", name)
1025  print("pdb fragment",group)
1026  align=pairwise2.align.localms(seq_fasta, seq_frag_pdb, 2, -1, -.5, -.1)[0]
1027  for a in [align]:
1028  offset=a[3]+1-group[0]
1029  if show:
1030  print("alignment sequence start-end",(a[3]+1,a[4]+1))
1031  print("offset from pdb to fasta index",offset)
1032  print(format_alignment(*a))
1033  if name not in offsets:
1034  offsets[pdbname]={}
1035  if group not in offsets[pdbname]:
1036  offsets[pdbname][group]=offset
1037  else:
1038  if group not in offsets[pdbname]:
1039  offsets[pdbname][group]=offset
1040  return offsets
1041 
1042 
1043 
1044 #------------------------
1045 
1046 
1047 class TempResidue(object):
1048  """Temporarily stores residue information, even without structure available."""
1049  # Consider implementing __hash__ so you can select.
1050  def __init__(self,molecule,code,index,internal_index,is_nucleic=None):
1051  """setup a TempResidue
1052  @param molecule PMI Molecule to which this residue belongs
1053  @param code one-letter residue type code
1054  @param index PDB index
1055  @param internal_index The number in the sequence
1056  """
1057  #these attributes should be immutable
1058  self.molecule = molecule
1059  self.rtype = IMP.pmi.tools.get_residue_type_from_one_letter_code(code,is_nucleic)
1060  self.pdb_index = index
1061  self.internal_index = internal_index
1062  self.copy_index = IMP.atom.Copy(self.molecule.hier).get_copy_index()
1063  self.state_index = IMP.atom.State(self.molecule.state.hier).get_state_index()
1064  #these are expected to change
1065  self._structured = False
1066  self.hier = IMP.atom.Residue.setup_particle(IMP.Particle(molecule.model),
1067  self.rtype,
1068  index)
1069  def __str__(self):
1070  return str(self.state_index)+"_"+self.molecule.get_name()+"_"+str(self.copy_index)+"_"+self.get_code()+str(self.get_index())
1071  def __repr__(self):
1072  return self.__str__()
1073  def __key(self):
1074  #this returns the immutable attributes only
1075  return (self.state_index, self.molecule, self.copy_index, self.rtype, self.pdb_index, self.internal_index)
1076  def __eq__(self,other):
1077  return type(other)==type(self) and self.__key() == other.__key()
1078  def __hash__(self):
1079  return hash(self.__key())
1080  def get_index(self):
1081  return self.pdb_index
1082  def get_internal_index(self):
1083  return self.internal_index
1084  def get_code(self):
1085  return IMP.atom.get_one_letter_code(self.get_residue_type())
1086  def get_residue_type(self):
1087  return self.rtype
1088  def get_hierarchy(self):
1089  return self.hier
1090  def get_molecule(self):
1091  return self.molecule
1092  def get_has_structure(self):
1093  return self._structured
1094  def set_structure(self,res,soft_check=False):
1095  if res.get_residue_type()!=self.get_residue_type():
1096  if soft_check:
1097  # note from commit a2c13eaa1 we give priority to the FASTA and not the PDB
1098  warnings.warn(
1099  'Inconsistency between FASTA sequence and PDB sequence. '
1100  'FASTA type %s %s and PDB type %s'
1101  % (self.get_index(), self.hier.get_residue_type(),
1102  res.get_residue_type()),
1104  self.hier.set_residue_type((self.get_residue_type()))
1105  self.rtype = self.get_residue_type()
1106  else:
1107  raise Exception('ERROR: PDB residue index',self.get_index(),'is',
1108  IMP.atom.get_one_letter_code(res.get_residue_type()),
1109  'and sequence residue is',self.get_code())
1110 
1111  for a in res.get_children():
1112  self.hier.add_child(a)
1113  atype = IMP.atom.Atom(a).get_atom_type()
1114  a.get_particle().set_name('Atom %s of residue %i'%(atype.__str__().strip('"'),
1115  self.hier.get_index()))
1116  self._structured = True
1117 
1118 class TopologyReader(object):
1119  """Automatically setup Sytem and Degrees of Freedom with a formatted text file.
1120  The file is read in and each part of the topology is stored as a
1121  ComponentTopology object for input into IMP::pmi::macros::BuildSystem.
1122  The topology file should be in a simple pipe-delimited format:
1123  @code{.txt}
1124 |molecule_name|color|fasta_fn|fasta_id|pdb_fn|chain|residue_range|pdb_offset|bead_size|em_residues_per_gaussian|rigid_body|super_rigid_body|chain_of_super_rigid_bodies|flags|
1125 |Rpb1 |blue |1WCM.fasta|1WCM:A|1WCM.pdb|A|1,1140 |0|10|0|1|1,3|1||
1126 |Rpb1 |blue |1WCM.fasta|1WCM:A|1WCM.pdb|A|1141,1274|0|10|0|2|1,3|1||
1127 |Rpb1 |blue |1WCM.fasta|1WCM:A|1WCM.pdb|A|1275,END |0|10|0|3|1,3|1||
1128 |Rpb2 |red |1WCM.fasta|1WCM:B|1WCM.pdb|B|all |0|10|0|4|2,3|2||
1129 |Rpb2.1 |green |1WCM.fasta|1WCM:B|1WCM.pdb|B|all |0|10|0|4|2,3|2||
1130 
1131  @endcode
1132 
1133  These are the fields you can enter:
1134  - `component_name`: Name of the component (chain). Serves as the parent
1135  hierarchy for this structure.
1136  - `color`: The color used in the output RMF file. Uses
1137  [Chimera names](https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/colortables.html),
1138  (e.g. "red"), or R,G,B values as three comma-separated floating point
1139  numbers from 0 to 1 (e.g. "1.0, 0.0, 0.0") or a 6-digit hex string
1140  starting with '#' (e.g. 0xff0000).
1141  - `fasta_fn`: Name of FASTA file containing this component.
1142  - `fasta_id`: String found in FASTA sequence header line. The sequence read
1143  from the file is assumed to be a protein sequence. If it should instead
1144  be treated as RNA or DNA, add an ',RNA' or ',DNA' suffix. For example,
1145  a `fasta_id` of 'myseq,RNA' will read the sequence 'myseq' from the
1146  FASTA file and treat it as RNA.
1147  - `pdb_fn`: Name of PDB or mmCIF file with coordinates (if available).
1148  If left empty, will set up as BEADS (you can also specify "BEADS")
1149  Can also write "IDEAL_HELIX".
1150  - `chain`: Chain ID of this domain in the PDB file.
1151  - `residue_range`: Comma delimited pair defining range.
1152  Can leave empty or use 'all' for entire sequence from PDB file.
1153  The second item in the pair can be END to select the last residue in the
1154  PDB chain.
1155  - `pdb_offset`: Offset to sync PDB residue numbering with FASTA numbering.
1156  - `bead_size`: The size (in residues) of beads used to model areas not
1157  covered by PDB coordinates. These will be automatically built.
1158  - `em_residues`: The number of Gaussians used to model the electron
1159  density of this domain. Set to zero if no EM fitting will be done.
1160  The GMM files will be written to <gmm_dir>/<component_name>_<em_res>.txt (and .mrc)
1161  - `rigid_body`: Leave empty if this object is not in a rigid body.
1162  Otherwise, this is a number corresponding to the rigid body containing
1163  this object. The number itself is just used for grouping things.
1164  - `super_rigid_body`: Like a rigid_body, except things are only occasionally rigid
1165  - `chain_of_super_rigid_bodies` For a polymer, create SRBs from groups.
1166  - `flags` additional flags for advanced options
1167  @note All filenames are relative to the paths specified in the constructor.
1168 
1169  """
1170  def __init__(self,
1171  topology_file,
1172  pdb_dir='./',
1173  fasta_dir='./',
1174  gmm_dir='./'):
1175  """Constructor.
1176  @param topology_file Pipe-delimited file specifying the topology
1177  @param pdb_dir Relative path to the pdb directory
1178  @param fasta_dir Relative path to the fasta directory
1179  @param gmm_dir Relative path to the GMM directory
1180  """
1181  self.topology_file = topology_file
1182  self.molecules = IMP.pmi.tools.OrderedDict() # key=molname, value=TempMolecule
1183  self.pdb_dir = pdb_dir
1184  self.fasta_dir = fasta_dir
1185  self.gmm_dir = gmm_dir
1186  self._components = self.read(topology_file)
1187 
1188  def write_topology_file(self,outfile):
1189  with open(outfile, "w") as f:
1190  f.write("|molecule_name|color|fasta_fn|fasta_id|pdb_fn|chain|"
1191  "residue_range|pdb_offset|bead_size|em_residues_per_gaussian|"
1192  "rigid_body|super_rigid_body|chain_of_super_rigid_bodies|\n")
1193  for c in self._components:
1194  output = c.get_str()+'\n'
1195  f.write(output)
1196  return outfile
1197 
1198  def get_components(self, topology_list = "all"):
1199  """ Return list of ComponentTopologies for selected components
1200  @param topology_list List of indices to return"""
1201  if topology_list == "all":
1202  topologies = self._components
1203  else:
1204  topologies=[]
1205  for i in topology_list:
1206  topologies.append(self._components[i])
1207  return topologies
1208 
1209  def get_molecules(self):
1210  return self.molecules
1211 
1212  def read(self, topology_file, append=False):
1213  """Read system components from topology file. append=False will erase
1214  current topology and overwrite with new
1215  """
1216  is_topology = False
1217  is_directories = False
1218  linenum = 1
1219  if append==False:
1220  self._components=[]
1221 
1222  with open(topology_file) as infile:
1223  for line in infile:
1224  if line.lstrip()=="" or line[0]=="#":
1225  continue
1226  elif line.split('|')[1].strip() in ("molecule_name"):
1227  is_topology=True
1228  is_directories = False
1229  old_format = False
1230  continue
1231  elif line.split('|')[1] == "component_name":
1232  is_topology = True
1234  "Old-style topology format (using "
1235  "|component_name|) is deprecated. Please switch to "
1236  "the new-style format (using |molecule_name|)\n")
1237  old_format = True
1238  is_directories = False
1239  continue
1240  elif line.split('|')[1] == "directories":
1242  "Setting directories in the topology file "
1243  "is deprecated. Please do so through the "
1244  "TopologyReader constructor. Note that new-style "
1245  "paths are relative to the current working "
1246  "directory, not the topology file.\n")
1247  is_directories = True
1248  elif is_directories:
1249  fields = line.split('|')
1250  setattr(self, fields[1],
1251  IMP.get_relative_path(topology_file, fields[2]))
1252  if is_topology:
1253  new_component = self._parse_line(line, linenum, old_format)
1254  self._components.append(new_component)
1255  linenum += 1
1256  return self._components
1257 
1258  def _parse_line(self, component_line, linenum, old_format):
1259  """Parse a line of topology values and matches them to their key.
1260  Checks each value for correct syntax
1261  Returns a list of Component objects
1262  fields:
1263  """
1264  c = _Component()
1265  values = [s.strip() for s in component_line.split('|')]
1266  errors = []
1267 
1268  ### Required fields
1269  if old_format:
1270  c.molname = values[1]
1271  c.copyname = ''
1272  c._domain_name = values[2]
1273  c.color = 'blue'
1274  else:
1275  names = values[1].split('.')
1276  if len(names)==1:
1277  c.molname = names[0]
1278  c.copyname = ''
1279  elif len(names)==2:
1280  c.molname = names[0]
1281  c.copyname = names[1]
1282  else:
1283  c.molname = names[0]
1284  c.copyname = names[1]
1285  errors.append("Molecule name should be <molecule.copyID>")
1286  errors.append("For component %s line %d " % (c.molname,linenum))
1287  c._domain_name = c.molname + '.' + c.copyname
1288  colorfields = values[2].split(',')
1289  if len(colorfields)==3:
1290  c.color = [float(x) for x in colorfields]
1291  if any([x>1 for x in c.color]):
1292  c.color=[x/255 for x in c.color]
1293  else:
1294  c.color = values[2]
1295  c._orig_fasta_file = values[3]
1296  c.fasta_file = values[3]
1297  fasta_field = values[4].split(",")
1298  c.fasta_id = fasta_field[0]
1299  c.fasta_flag = None
1300  if len(fasta_field) > 1:
1301  c.fasta_flag = fasta_field[1]
1302  c._orig_pdb_input = values[5]
1303  pdb_input = values[5]
1304  tmp_chain = values[6]
1305  rr = values[7]
1306  offset = values[8]
1307  bead_size = values[9]
1308  emg = values[10]
1309  if old_format:
1310  rbs = srbs = csrbs = ''
1311  else:
1312  rbs = values[11]
1313  srbs = values[12]
1314  csrbs = values[13]
1315 
1316  if c.molname not in self.molecules:
1317  self.molecules[c.molname] = _TempMolecule(c)
1318  else:
1319  # COPY OR DOMAIN
1320  c._orig_fasta_file = self.molecules[c.molname].orig_component._orig_fasta_file
1321  c.fasta_id = self.molecules[c.molname].orig_component.fasta_id
1322  self.molecules[c.molname].add_component(c,c.copyname)
1323 
1324  # now cleanup input
1325  c.fasta_file = os.path.join(self.fasta_dir,c._orig_fasta_file)
1326  if pdb_input=="":
1327  errors.append("PDB must have BEADS, IDEAL_HELIX, or filename")
1328  errors.append("For component %s line %d is not correct"
1329  "|%s| was given." % (c.molname,linenum,pdb_input))
1330  elif pdb_input in ("IDEAL_HELIX","BEADS"):
1331  c.pdb_file = pdb_input
1332  else:
1333  c.pdb_file = os.path.join(self.pdb_dir,pdb_input)
1334 
1335  # PDB chain must be one or two characters
1336  if len(tmp_chain)==1 or len(tmp_chain)==2:
1337  c.chain = tmp_chain
1338  else:
1339  errors.append("PDB Chain identifier must be one or two characters.")
1340  errors.append("For component %s line %d is not correct"
1341  "|%s| was given." % (c.molname,linenum,tmp_chain))
1342 
1343  ### Optional fields
1344  # Residue Range
1345  if rr.strip()=='all' or str(rr)=="":
1346  c.residue_range = None
1347  elif len(rr.split(','))==2 and self._is_int(rr.split(',')[0]) and (self._is_int(rr.split(',')[1]) or rr.split(',')[1] == 'END'):
1348  # Make sure that is residue range is given, there are only two values and they are integers
1349  c.residue_range = (int(rr.split(',')[0]), rr.split(',')[1])
1350  if c.residue_range[1] != 'END':
1351  c.residue_range = (c.residue_range[0], int(c.residue_range[1]))
1352  # Old format used -1 for the last residue
1353  if old_format and c.residue_range[1] == -1:
1354  c.residue_range = (c.residue_range[0], 'END')
1355  else:
1356  errors.append("Residue Range format for component %s line %d is not correct" % (c.molname, linenum))
1357  errors.append("Correct syntax is two comma separated integers: |start_res, end_res|. end_res can also be END to select the last residue in the chain. |%s| was given." % rr)
1358  errors.append("To select all residues, indicate |\"all\"|")
1359 
1360  # PDB Offset
1361  if self._is_int(offset):
1362  c.pdb_offset=int(offset)
1363  elif len(offset)==0:
1364  c.pdb_offset = 0
1365  else:
1366  errors.append("PDB Offset format for component %s line %d is not correct" % (c.molname, linenum))
1367  errors.append("The value must be a single integer. |%s| was given." % offset)
1368 
1369  # Bead Size
1370  if self._is_int(bead_size):
1371  c.bead_size=int(bead_size)
1372  elif len(bead_size)==0:
1373  c.bead_size = 0
1374  else:
1375  errors.append("Bead Size format for component %s line %d is not correct" % (c.molname, linenum))
1376  errors.append("The value must be a single integer. |%s| was given." % bead_size)
1377 
1378  # EM Residues Per Gaussian
1379  if self._is_int(emg):
1380  if int(emg) > 0:
1381  c.density_prefix = os.path.join(self.gmm_dir,c.get_unique_name())
1382  c.gmm_file = c.density_prefix+'.txt'
1383  c.mrc_file = c.density_prefix+'.gmm'
1384 
1385  c.em_residues_per_gaussian=int(emg)
1386  else:
1387  c.em_residues_per_gaussian = 0
1388  elif len(emg)==0:
1389  c.em_residues_per_gaussian = 0
1390  else:
1391  errors.append("em_residues_per_gaussian format for component "
1392  "%s line %d is not correct" % (c.molname, linenum))
1393  errors.append("The value must be a single integer. |%s| was given." % emg)
1394 
1395  # rigid bodies
1396  if len(rbs)>0:
1397  if not self._is_int(rbs):
1398  errors.append("rigid bodies format for component "
1399  "%s line %d is not correct" % (c.molname, linenum))
1400  errors.append("Each RB must be a single integer, or empty. "
1401  "|%s| was given." % rbs)
1402  c.rigid_body = int(rbs)
1403 
1404  # super rigid bodies
1405  if len(srbs)>0:
1406  srbs = srbs.split(',')
1407  for i in srbs:
1408  if not self._is_int(i):
1409  errors.append("super rigid bodies format for component "
1410  "%s line %d is not correct" % (c.molname, linenum))
1411  errors.append("Each SRB must be a single integer. |%s| was given." % srbs)
1412  c.super_rigid_bodies = srbs
1413 
1414  # chain of super rigid bodies
1415  if len(csrbs)>0:
1416  if not self._is_int(csrbs):
1417  errors.append("em_residues_per_gaussian format for component "
1418  "%s line %d is not correct" % (c.molname, linenum))
1419  errors.append("Each CSRB must be a single integer. |%s| was given." % csrbs)
1420  c.chain_of_super_rigid_bodies = csrbs
1421 
1422  # done
1423  if errors:
1424  raise ValueError("Fix Topology File syntax errors and rerun: " \
1425  + "\n".join(errors))
1426  else:
1427  return c
1428 
1429 
1430  def set_gmm_dir(self,gmm_dir):
1431  """Change the GMM dir"""
1432  self.gmm_dir = gmm_dir
1433  for c in self._components:
1434  c.gmm_file = os.path.join(self.gmm_dir,c.get_unique_name()+".txt")
1435  c.mrc_file = os.path.join(self.gmm_dir,c.get_unique_name()+".mrc")
1436  print('new gmm',c.gmm_file)
1437 
1438  def set_pdb_dir(self,pdb_dir):
1439  """Change the PDB dir"""
1440  self.pdb_dir = pdb_dir
1441  for c in self._components:
1442  if not c._orig_pdb_input in ("","None","IDEAL_HELIX","BEADS"):
1443  c.pdb_file = os.path.join(self.pdb_dir,c._orig_pdb_input)
1444 
1445  def set_fasta_dir(self,fasta_dir):
1446  """Change the FASTA dir"""
1447  self.fasta_dir = fasta_dir
1448  for c in self._components:
1449  c.fasta_file = os.path.join(self.fasta_dir,c._orig_fasta_file)
1450 
1451  def _is_int(self, s):
1452  # is this string an integer?
1453  try:
1454  float(s)
1455  return float(s).is_integer()
1456  except ValueError:
1457  return False
1458 
1459  def get_rigid_bodies(self):
1460  """Return list of lists of rigid bodies (as domain name)"""
1461  rbl = defaultdict(list)
1462  for c in self._components:
1463  if c.rigid_body:
1464  rbl[c.rigid_body].append(c.get_unique_name())
1465  return rbl.values()
1466 
1468  """Return list of lists of super rigid bodies (as domain name)"""
1469  rbl = defaultdict(list)
1470  for c in self._components:
1471  for rbnum in c.super_rigid_bodies:
1472  rbl[rbnum].append(c.get_unique_name())
1473  return rbl.values()
1474 
1476  """Return list of lists of chains of super rigid bodies (as domain name)"""
1477  rbl = defaultdict(list)
1478  for c in self._components:
1479  for rbnum in c.chain_of_super_rigid_bodies:
1480  rbl[rbnum].append(c.get_unique_name())
1481  return rbl.values()
1482 
1483 class _TempMolecule(object):
1484  """Store the Components and any requests for copies"""
1485  def __init__(self,init_c):
1486  self.molname = init_c.molname
1487  # key=copy ID, value = list of domains
1488  self.domains = IMP.pmi.tools.OrderedDefaultDict(list)
1489  self.add_component(init_c,init_c.copyname)
1490  self.orig_copyname = init_c.copyname
1491  self.orig_component = self.domains[init_c.copyname][0]
1492  def add_component(self,component,copy_id):
1493  self.domains[copy_id].append(component)
1494  component.domainnum = len(self.domains[copy_id])-1
1495  def __repr__(self):
1496  return ','.join('%s:%i'%(k,len(self.domains[k])) for k in self.domains)
1497 
1498 class _Component(object):
1499  """Stores the components required to build a standard IMP hierarchy
1500  using IMP.pmi.BuildModel()
1501  """
1502  def __init__(self):
1503  self.molname = None
1504  self.copyname = None
1505  self.domainnum = 0
1506  self.fasta_file = None
1507  self._orig_fasta_file = None
1508  self.fasta_id = None
1509  self.fasta_flag = None
1510  self.pdb_file = None
1511  self._orig_pdb_input = None
1512  self.chain = None
1513  self.residue_range = None
1514  self.pdb_offset = 0
1515  self.bead_size = 10
1516  self.em_residues_per_gaussian = 0
1517  self.gmm_file = ''
1518  self.mrc_file = ''
1519  self.density_prefix = ''
1520  self.color = 0.1
1521  self.rigid_body = None
1522  self.super_rigid_bodies = []
1523  self.chain_of_super_rigid_bodies = []
1524 
1525  def _l2s(self,l):
1526  return ",".join("%s" % x for x in l)
1527 
1528  def __repr__(self):
1529  return self.get_str()
1530 
1531  def get_unique_name(self):
1532  return "%s.%s.%i"%(self.molname,self.copyname,self.domainnum)
1533 
1534  def get_str(self):
1535  res_range = self.residue_range
1536  if self.residue_range is None:
1537  res_range = []
1538  name = self.molname
1539  if self.copyname!='':
1540  name += '.'+self.copyname
1541  if self.chain is None:
1542  chain = ' '
1543  else:
1544  chain = self.chain
1545  color=self.color
1546  if isinstance(color, list):
1547  color=','.join([str(x) for x in color])
1548  a= '|'+'|'.join([name,color,self._orig_fasta_file,self.fasta_id,
1549  self._orig_pdb_input,chain,self._l2s(list(res_range)),
1550  str(self.pdb_offset),str(self.bead_size),
1551  str(self.em_residues_per_gaussian),
1552  str(self.rigid_body) if self.rigid_body else '',
1553  self._l2s(self.super_rigid_bodies),
1554  self._l2s(self.chain_of_super_rigid_bodies)])+'|'
1555  return a
1556 
1557 
1559  '''Extends the functionality of IMP.atom.Molecule'''
1560 
1561  def __init__(self,hierarchy):
1562  IMP.atom.Molecule.__init__(self,hierarchy)
1563 
1564  def get_state_index(self):
1565  state = self.get_parent()
1566  return IMP.atom.State(state).get_state_index()
1567 
1568  def get_copy_index(self):
1569  return IMP.atom.Copy(self).get_copy_index()
1570 
1571  def get_extended_name(self):
1572  return self.get_name()+"."+\
1573  str(self.get_copy_index())+\
1574  "."+str(self.get_state_index())
1575 
1576  def get_sequence(self):
1577  return IMP.atom.Chain(self).get_sequence()
1578 
1579  def get_residue_indexes(self):
1581 
1582  def get_residue_segments(self):
1583  return IMP.pmi.tools.Segments(self.get_residue_indexes())
1584 
1585  def get_chain_id(self):
1586  return IMP.atom.Chain(self).get_id()
1587 
1588  def __repr__(self):
1589  s='PMIMoleculeHierarchy '
1590  s+=self.get_name()
1591  s+=" "+"Copy "+str(IMP.atom.Copy(self).get_copy_index())
1592  s+=" "+"State "+str(self.get_state_index())
1593  s+=" "+"N residues "+str(len(self.get_sequence()))
1594  return s
def build
call build on all molecules (automatically makes clones)
Add mass to a particle.
Definition: Mass.h:23
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: Residue.h:156
def select_at_all_resolutions
Perform selection using the usual keywords but return ALL resolutions (BEADS and GAUSSIANS).
Definition: tools.py:1125
Hierarchy get_parent() const
Get the parent particle.
def get_atomic_residues
Return a set of TempResidues that have associated structure coordinates.
A decorator to associate a particle with a part of a protein/DNA/RNA.
Definition: Fragment.h:20
Extends the functionality of IMP.atom.Molecule.
def get_residues
Return all modeled TempResidues as a set.
std::string get_unique_name(std::string templ)
Return a unique name produced from the string.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: atom/Atom.h:241
static Atom setup_particle(Model *m, ParticleIndex pi, Atom other)
Definition: atom/Atom.h:242
def build
Build all states.
def __init__
read a fasta file and extract all the requested sequences
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
def __init__
read a pdb file and returns all sequences for each contiguous fragment
def fasta_pdb_alignments
This function computes and prints the alignment between the fasta file and the pdb sequence...
def get_ideal_helices
Returns list of OrderedSets with requested ideal helices.
Miscellaneous utilities.
Definition: tools.py:1
def get_chains_of_super_rigid_bodies
Return list of lists of chains of super rigid bodies (as domain name)
def __init__
The user should not call this directly; instead call State::create_molecule()
void handle_use_deprecated(std::string message)
def residue_range
get residue range from a to b, inclusive.
def get_molecule
Access a molecule by name and copy number.
def __init__
Define a new state.
def add_representation
Set the representation for some residues.
static State setup_particle(Model *m, ParticleIndex pi, unsigned int index)
Definition: State.h:39
This class stores integers in ordered compact lists eg: [[1,2,3],[6,7,8]] the methods help splitting ...
Definition: tools.py:666
The type of an atom.
def create_molecule
Create a new Molecule within this State.
def build
Create all parts of the IMP hierarchy including Atoms, Residues, and Fragments/Representations and...
static Residue setup_particle(Model *m, ParticleIndex pi, ResidueType t, int index, int insertion_code)
Definition: Residue.h:158
char get_one_letter_code(ResidueType c)
Get the 1-letter amino acid code from the residue type.
def get_non_atomic_residues
Return a set of TempResidues that don't have associated structure coordinates.
def get_name
Return this Molecule name.
def get_hierarchy
Return the IMP Hierarchy corresponding to this Molecule.
def get_components
Return list of ComponentTopologies for selected components.
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:72
Stores a named protein chain.
Warning related to handling of structures.
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Fragment.h:46
A decorator for keeping track of copies of a molecule.
Definition: Copy.h:28
Select all non-alternative ATOM records.
Definition: pdb.h:64
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def set_fasta_dir
Change the FASTA dir.
The standard decorator for manipulating molecular structures.
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
Data_structure for reading and storing sequence data from pdb.
A decorator for a particle representing an atom.
Definition: atom/Atom.h:234
std::string get_relative_path(std::string base, std::string relative)
Return a path to a file relative to another file.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
def create_clone
Create a Molecule clone (automatically builds same structure and representation)
def add_structure
Read a structure and store the coordinates.
int get_state_index(Hierarchy h)
Walk up the hierarchy to find the current state.
def add_protocol_output
Capture details of the modeling protocol.
def get_molecules
Return a dictionary where key is molecule name and value are the list of all copies of that molecule ...
static Copy setup_particle(Model *m, ParticleIndex pi, Int number)
Definition: Copy.h:42
def read
Read system components from topology file.
def get_state
Return the State containing this Molecule.
A decorator for a residue.
Definition: Residue.h:135
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Automatically setup Sytem and Degrees of Freedom with a formatted text file.
The general base class for IMP exceptions.
Definition: exception.h:49
def get_rigid_bodies
Return list of lists of rigid bodies (as domain name)
Associate an integer "state" index with a hierarchy node.
Definition: State.h:27
VectorD< 3 > Vector3D
Definition: VectorD.h:421
Residue get_residue(Atom d, bool nothrow=false)
Return the Residue containing this atom.
Class to handle individual particles of a Model object.
Definition: Particle.h:41
Stores a list of Molecules all with the same State index.
def get_represented
Return set of TempResidues that have representation.
Store info for a chain of a protein.
Definition: Chain.h:61
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
Python classes to represent, score, sample and analyze models.
A dictionary-like wrapper for reading and storing sequence data.
def create_copy
Create a new Molecule with the same name and sequence but a higher copy number.
Functionality for loading, creating, manipulating and scoring atomic structures.
std::string get_chain_id(Hierarchy h)
Walk up the hierarchy to determine the chain id.
def get_particles_at_all_resolutions
Helpful utility for getting particles at all resolutions from this molecule.
static Chain setup_particle(Model *m, ParticleIndex pi, std::string id)
Definition: Chain.h:81
A decorator for a molecule.
Definition: Molecule.h:24
Select hierarchy particles identified by the biological name.
Definition: Selection.h:66
def get_number_of_states
Returns the total number of states generated.
def get_super_rigid_bodies
Return list of lists of super rigid bodies (as domain name)
def get_residue_indexes
Retrieve the residue indexes for the given particle.
Definition: tools.py:554
Temporarily stores residue information, even without structure available.
def create_state
Makes and returns a new IMP.pmi.topology.State in this system.
Store objects in order they were added, but with default type.
Definition: tools.py:866