IMP logo
IMP Reference Guide  2.10.1
The Integrative Modeling Platform
pmi/topology/__init__.py
1 """@namespace IMP.pmi.topology
2  Set of Python classes to create a multi-state, multi-resolution IMP hierarchy.
3 * Start by creating a System with `model = IMP.Model(); s = IMP.pmi.topology.System(model)`. The System will store all the states.
4 * Then call System.create_state(). You can easily create a multistate system by calling this function multiple times.
5 * For each State, call State.create_molecule() to add a Molecule (a uniquely named polymer). This function returns the Molecule object which can be passed to various PMI functions.
6 * Some useful functions to help you set up your Molecules:
7  * Access the sequence residues with slicing (Molecule[a:b]) or functions like Molecule.get_atomic_residues() and Molecule.get_non_atomic_residues(). These functions all return Python sets for easy set arithmetic using & (and), | (or), - (difference)
8  * Molecule.add_structure() to add structural information from a PDB file.
9  * Molecule.add_representation() to create a representation unit - here you can choose bead resolutions as well as alternate representations like densities or ideal helices.
10  * Molecule.create_clone() lets you set up a molecule with identical representations, just a different chain ID. Use Molecule.create_copy() if you want a molecule with the same sequence but that allows custom representations.
11 * Once data has been added and representations chosen, call System.build() to create a canonical IMP hierarchy.
12 * Following hierarchy construction, setup rigid bodies, flexible beads, etc in IMP::pmi::dof.
13 * Check your representation with a nice printout: IMP::atom::show_with_representation()
14 See a [comprehensive example](https://integrativemodeling.org/nightly/doc/ref/pmi_2multiscale_8py-example.html) for using these classes.
15 
16 Alternatively one can construct the entire topology and degrees of freedom via formatted text file with TopologyReader and IMP::pmi::macros::BuildSystem(). This is used in the [PMI tutorial](@ref rnapolii_stalk).
17 Note that this only allows a limited set of the full options available to PMI users (rigid bodies only, fixed resolutions).
18 """
19 
20 from __future__ import print_function
21 import IMP
22 import IMP.atom
23 import IMP.algebra
24 import IMP.pmi
25 import IMP.pmi.tools
26 import csv
27 import os
28 from collections import defaultdict, namedtuple
29 from . import system_tools
30 from bisect import bisect_left
31 from math import pi,cos,sin
32 from operator import itemgetter
33 import weakref
34 
35 def _build_ideal_helix(model, residues, coord_finder):
36  """Creates an ideal helix from the specified residue range
37  Residues MUST be contiguous.
38  This function actually adds them to the TempResidue hierarchy
39  """
40  created_hiers = []
41 
42  # this function creates a CAlpha helix structure (which can be used for coarsening)
43  for n, tempres in enumerate(residues):
44  if tempres.get_has_structure():
45  raise ValueError("You tried to build ideal_helix for a residue "
46  "that already has structure: %s" % tempres)
47  if n > 0 and tempres.get_index() != prev_idx + 1:
48  raise ValueError("Passed non-contiguous segment to "
49  "build_ideal_helix for %s" % tempres.get_molecule())
50 
51  # New residue particle will replace the TempResidue's existing (empty) hierarchy
52  rp = IMP.Particle(model)
53  rp.set_name("Residue_%i" % tempres.get_index())
54 
55  # Copy the original residue type and index
56  this_res = IMP.atom.Residue.setup_particle(rp,tempres.get_hierarchy())
57 
58  # Create the CAlpha
59  ap = IMP.Particle(model)
61  x = 2.3 * cos(n * 2 * pi / 3.6)
62  y = 2.3 * sin(n * 2 * pi / 3.6)
63  z = 6.2 / 3.6 / 2 * n * 2 * pi / 3.6
64  d.set_coordinates(IMP.algebra.Vector3D(x, y, z))
65  d.set_radius(1.0)
66  a = IMP.atom.Atom.setup_particle(ap, IMP.atom.AT_CA) # Decorating as Atom also decorates as Mass
67  IMP.atom.Mass(ap).set_mass(110.0)
68  this_res.add_child(a)
69 
70  # Add this structure to the TempResidue
71  tempres.set_structure(this_res)
72  created_hiers.append(this_res)
73  prev_idx = tempres.get_index()
74  coord_finder.add_residues(created_hiers) #the coord finder is for placing beads (later)
75 
76 class _SystemBase(object):
77  """The base class for System, State and Molecule
78  classes. It contains shared functions in common to these classes
79  """
80 
81  def __init__(self,model=None):
82  if model is None:
83  self.model=IMP.Model()
84  else:
85  self.model=model
86 
87  @property
88  @IMP.deprecated_method("2.10", "Model should be accessed with `.model`.")
89  def mdl(self):
90  return self.model
91 
92  def _create_hierarchy(self):
93  """create a new hierarchy"""
94  tmp_part=IMP.Particle(self.model)
95  return IMP.atom.Hierarchy.setup_particle(tmp_part)
96 
97  def _create_child(self,parent_hierarchy):
98  """create a new hierarchy, set it as child of the input
99  one, and return it"""
100  child_hierarchy=self._create_hierarchy()
101  parent_hierarchy.add_child(child_hierarchy)
102  return child_hierarchy
103 
104  def build(self):
105  """Build the coordinates of the system.
106  Loop through stored(?) hierarchies and set up coordinates!"""
107  pass
108 
109 #------------------------
110 
111 class System(_SystemBase):
112  """This class initializes the root node of the global IMP.atom.Hierarchy."""
113  def __init__(self,model=None,name="System"):
114  _SystemBase.__init__(self,model)
115  self._number_of_states = 0
116  self._protocol_output = []
117  self.states = []
118  self.built=False
119 
120  # the root hierarchy node
121  self.hier=self._create_hierarchy()
122  self.hier.set_name(name)
123  self.hier._pmi2_system = weakref.ref(self)
124 
125  def get_states(self):
126  return self.states
127 
128  def create_state(self):
129  """Makes and returns a new IMP.pmi.topology.State in this system"""
130  self._number_of_states+=1
131  state = State(self,self._number_of_states-1)
132  self.states.append(state)
133  return state
134 
135  def __repr__(self):
136  return self.hier.get_name()
137 
139  """Returns the total number of states generated"""
140  return self._number_of_states
141 
142  def get_hierarchy(self):
143  return self.hier
144 
145  def build(self,**kwargs):
146  """Build all states"""
147  if not self.built:
148  for state in self.states:
149  state.build(**kwargs)
150  self.built=True
151  return self.hier
152 
153  def add_protocol_output(self, p):
154  """Capture details of the modeling protocol.
155  @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
156  """
157  self._protocol_output.append(p)
158 # p._each_metadata.append(self._metadata)
159 # p._file_datasets.append(self._file_dataset)
160  for state in self.states:
161  state._add_protocol_output(p, self)
162 
163 
164 #------------------------
165 
166 class State(_SystemBase):
167  """Stores a list of Molecules all with the same State index.
168  Also stores number of copies of each Molecule for easy Selection.
169  """
170  def __init__(self,system,state_index):
171  """Define a new state
172  @param system the PMI System
173  @param state_index the index of the new state
174  @note It's expected that you will not use this constructor directly,
175  but rather create it with pmi::System::create_state()
176  """
177  self.model = system.get_hierarchy().get_model()
178  self.system = system
179  self.hier = self._create_child(system.get_hierarchy())
180  self.short_name = self.long_name = "State_"+str(state_index)
181  self.hier.set_name(self.short_name)
182  self.molecules = IMP.pmi.tools.OrderedDict() # key is molecule name. value are the molecule copies!
183  IMP.atom.State.setup_particle(self.hier,state_index)
184  self.built = False
185  self._protocol_output = []
186  for p in system._protocol_output:
187  self._add_protocol_output(p, system)
188 
189  @property
190  @IMP.deprecated_method("2.10", "Model should be accessed with `.model`.")
191  def mdl(self):
192  return self.model
193 
194  def __repr__(self):
195  return self.system.__repr__()+'.'+self.hier.get_name()
196 
197  def _add_protocol_output(self, p, system):
198  state = p._add_state(self)
199  self._protocol_output.append((p, state))
200  state.model = system.model
201  state.prot = self.hier
202 
203  def get_molecules(self):
204  """Return a dictionary where key is molecule name and value
205  are the list of all copies of that molecule in setup order"""
206  return self.molecules
207 
208  def get_molecule(self, name, copy_num=0):
209  """Access a molecule by name and copy number
210  @param name The molecule name used during setup
211  @param copy_num The copy number based on input order.
212  Default: 0. Set to 'all' to get all copies
213  """
214  if name not in self.molecules:
215  raise KeyError("Could not find molname %s" % name)
216  if copy_num == 'all':
217  return self.molecules[name]
218  else:
219  return self.molecules[name][copy_num]
220 
221  def create_molecule(self, name, sequence='', chain_id='', is_nucleic=None):
222  """Create a new Molecule within this State
223  @param name the name of the molecule (string);
224  it must not be already used
225  @param sequence sequence (string)
226  @param chain_id Chain ID to assign to this molecule
227  """
228  # check whether the molecule name is already assigned
229  if name in self.molecules:
230  raise ValueError('Cannot use a molecule name already used')
231 
232  mol = Molecule(self,name,sequence,chain_id,copy_num=0,is_nucleic=is_nucleic)
233  self.molecules[name] = [mol]
234  return mol
235 
236  def get_hierarchy(self):
237  return self.hier
238 
239  def get_number_of_copies(self,molname):
240  return len(self.molecules[molname])
241 
242  def _register_copy(self,molecule):
243  molname = molecule.get_hierarchy().get_name()
244  self.molecules[molname].append(molecule)
245 
246  def build(self,**kwargs):
247  """call build on all molecules (automatically makes clones)"""
248  if not self.built:
249  for molname in self.molecules:
250  for mol in reversed(self.molecules[molname]):
251  mol.build(**kwargs)
252  self.built=True
253  return self.hier
254 
255 
256 # Track residues read from PDB files
257 _PDBElement = namedtuple('PDBElement', ['offset', 'filename', 'chain_id'])
258 
259 class _RepresentationHandler(object):
260  """Handle PMI representation and use it to populate that of any attached
261  ProtocolOutput objects"""
262  def __init__(self, name, pos, pdb_elements):
263  self.name = name
264  self.pos = pos
265  self.last_index = None
266  self.last_pdb_index = None
267  self.pdb_for_residue = {}
268  for residues, pdb in pdb_elements:
269  for r in residues:
270  self.pdb_for_residue[r.get_index()] = pdb
271 
272  def _get_pdb(self, h):
273  """Return a PDBElement if the given hierarchy was read from a
274  PDB file"""
276  rind = IMP.atom.Residue(h).get_index()
277  return self.pdb_for_residue.get(rind, None)
278 
279  def __call__(self, res):
280  """Handle a single residue"""
281  if len(self.pos) == 0:
282  return
283  h = res.hier
284  pi = h.get_particle_index()
285  # Do nothing if we already saw this hierarchy
286  if self.last_index is None or pi != self.last_index:
287  pdb = self._get_pdb(h)
288  self.last_index = pi
289  if pdb:
290  assert(IMP.atom.Fragment.get_is_setup(h.get_parent()))
291  frag = IMP.atom.Fragment(h.get_parent())
292  fragi = frag.get_particle_index()
293  # Do nothing if we already saw this PDB fragment
294  if self.last_pdb_index is not None \
295  and self.last_pdb_index == fragi:
296  return
297  self.last_pdb_index = fragi
298  indices = frag.get_residue_indexes()
299  for p, state in self.pos:
300  p.add_pdb_element(state, self.name,
301  indices[0], indices[-1], pdb.offset,
302  pdb.filename, pdb.chain_id, frag)
304  frag = IMP.atom.Fragment(h)
305  indices = frag.get_residue_indexes()
306  for p, state in self.pos:
307  p.add_bead_element(state, self.name,
308  indices[0], indices[-1], 1, h)
310  resind = IMP.atom.Residue(h).get_index()
311  for p, state in self.pos:
312  p.add_bead_element(state, self.name, resind, resind, 1, h)
313  else:
314  raise TypeError("Unhandled hierarchy %s" % str(h))
315 
316 
317 #------------------------
318 
319 class Molecule(_SystemBase):
320  """Stores a named protein chain.
321  This class is constructed from within the State class.
322  It wraps an IMP.atom.Molecule and IMP.atom.Copy.
323  Structure is read using this class.
324  Resolutions and copies can be registered, but are only created
325  when build() is called
326  """
327 
328  def __init__(self,state,name,sequence,chain_id,copy_num,mol_to_clone=None,is_nucleic=None):
329  """The user should not call this directly; instead call State::create_molecule()
330  @param state The parent PMI State
331  @param name The name of the molecule (string)
332  @param sequence Sequence (string)
333  @param chain_id The chain of this molecule
334  @param copy_num Store the copy number
335  @param mol_to_clone The original molecule (for cloning ONLY)
336  @note It's expected that you will not use this constructor directly,
337  but rather create a Molecule with pmi::State::create_molecule()
338  """
339  # internal data storage
340  self.model = state.get_hierarchy().get_model()
341  self.state = state
342  self.sequence = sequence
343  self.built = False
344  self.mol_to_clone = mol_to_clone
345  self.is_nucleic=is_nucleic
346  self.representations = [] # list of stuff to build
347  self._pdb_elements = []
348  self._represented = IMP.pmi.tools.OrderedSet() # residues with representation
349  self.coord_finder = _FindCloseStructure() # helps you place beads by storing structure
350  self._ideal_helices = [] # list of OrderedSets of tempresidues set to ideal helix
351 
352  # create root node and set it as child to passed parent hierarchy
353  self.hier = self._create_child(self.state.get_hierarchy())
354  self.hier.set_name(name)
355  IMP.atom.Copy.setup_particle(self.hier,copy_num)
356  self._name_with_copy = "%s.%d" % (name, copy_num)
357  # store the sequence
358  self.chain=IMP.atom.Chain.setup_particle(self.hier,chain_id)
359  self.chain.set_sequence(self.sequence)
360  # create TempResidues from the sequence (if passed)
361  self.residues=[]
362  for ns,s in enumerate(sequence):
363  r = TempResidue(self,s,ns+1,ns,is_nucleic)
364  self.residues.append(r)
365 
366  @property
367  @IMP.deprecated_method("2.10", "Model should be accessed with `.model`.")
368  def mdl(self):
369  return self.model
370 
371  def __repr__(self):
372  return self.state.__repr__()+'.'+self.get_name()+'.'+ \
373  str(IMP.atom.Copy(self.hier).get_copy_index())
374 
375  def __getitem__(self,val):
376  if isinstance(val,int):
377  return self.residues[val]
378  elif isinstance(val,str):
379  return self.residues[int(val)-1]
380  elif isinstance(val,slice):
381  return IMP.pmi.tools.OrderedSet(self.residues[val])
382  else:
383  raise TypeError("Indexes must be int or str")
384 
385  def get_hierarchy(self):
386  """Return the IMP Hierarchy corresponding to this Molecule"""
387  return self.hier
388 
389  def get_name(self):
390  """Return this Molecule name"""
391  return self.hier.get_name()
392 
393  def get_state(self):
394  """Return the State containing this Molecule"""
395  return self.state
396 
397  def get_ideal_helices(self):
398  """Returns list of OrderedSets with requested ideal helices"""
399  return self._ideal_helices
400 
401  def residue_range(self,a,b,stride=1):
402  """get residue range from a to b, inclusive.
403  Use integers to get 0-indexing, or strings to get PDB-indexing"""
404  if isinstance(a,int) and isinstance(b,int) and isinstance(stride,int):
405  return IMP.pmi.tools.OrderedSet(self.residues[a:b+1:stride])
406  elif isinstance(a,str) and isinstance(b,str) and isinstance(stride,int):
407  return IMP.pmi.tools.OrderedSet(self.residues[int(a)-1:int(b):stride])
408  else:
409  raise TypeError("Range ends must be int or str. "
410  "Stride must be int.")
411 
412  def get_residues(self):
413  """ Return all modeled TempResidues as a set"""
414  all_res = IMP.pmi.tools.OrderedSet(self.residues)
415  return all_res
416 
417  def get_represented(self):
418  """ Return set of TempResidues that have representation"""
419  return self._represented
420 
422  """ Return a set of TempResidues that have associated structure coordinates """
423  atomic_res = IMP.pmi.tools.OrderedSet()
424  for res in self.residues:
425  if res.get_has_structure():
426  atomic_res.add(res)
427  return atomic_res
428 
430  """ Return a set of TempResidues that don't have associated structure coordinates """
431  non_atomic_res = IMP.pmi.tools.OrderedSet()
432  for res in self.residues:
433  if not res.get_has_structure():
434  non_atomic_res.add(res)
435  return non_atomic_res
436 
437  def create_copy(self,chain_id):
438  """Create a new Molecule with the same name and sequence but a higher copy number.
439  Returns the Molecule. No structure or representation will be copied!
440  @param chain_id Chain ID of the new molecule
441  """
442  mol = Molecule(self.state,self.get_name(),self.sequence,chain_id,
443  copy_num=self.state.get_number_of_copies(self.get_name()))
444  self.state._register_copy(mol)
445  return mol
446 
447  def create_clone(self,chain_id):
448  """Create a Molecule clone (automatically builds same structure and representation)
449  @param chain_id If you want to set the chain ID of the copy to something
450  @note You cannot add structure or representations to a clone!
451  """
452  mol = Molecule(self.state,self.get_name(),self.sequence,chain_id,
453  copy_num=self.state.get_number_of_copies(self.get_name()),
454  mol_to_clone=self)
455  self.state._register_copy(mol)
456  return mol
457 
458  def add_structure(self,pdb_fn,chain_id,res_range=[],
459  offset=0,model_num=None,ca_only=False,
460  soft_check=False):
461  """Read a structure and store the coordinates.
462  @eturn the atomic residues (as a set)
463  @param pdb_fn The file to read
464  @param chain_id Chain ID to read
465  @param res_range Add only a specific set of residues from the PDB file.
466  res_range[0] is the starting and res_range[1] is the
467  ending residue index.
468  @param offset Apply an offset to the residue indexes of the PDB
469  file. This number is added to the PDB sequence.
470  @param model_num Read multi-model PDB and return that model
471  @param ca_only Only read the CA positions from the PDB file
472  @param soft_check If True, it only warns if there are sequence
473  mismatches between the PDB and the Molecule (FASTA)
474  sequence, and uses the sequence from the PDB.
475  If False (Default), it raises an error when there
476  are sequence mismatches.
477  @note If you are adding structure without a FASTA file, set soft_check
478  to True.
479  """
480  if self.mol_to_clone is not None:
481  raise ValueError('You cannot call add_structure() for a clone')
482 
483  self.pdb_fn = pdb_fn
484 
485  # get IMP.atom.Residues from the pdb file
486  rhs = system_tools.get_structure(self.model,pdb_fn,chain_id,res_range,offset,ca_only=ca_only)
487  self.coord_finder.add_residues(rhs)
488 
489  if len(self.residues)==0:
490  print("WARNING: Substituting PDB residue type with FASTA residue type. Potentially dangerous.")
491 
492  # Store info for ProtocolOutput usage later
493  self._pdb_elements.append((rhs,
494  _PDBElement(offset=offset, filename=pdb_fn, chain_id=chain_id)))
495 
496  # load those into TempResidue object
497  atomic_res = IMP.pmi.tools.OrderedSet() # collect integer indexes of atomic residues to return
498  for nrh,rh in enumerate(rhs):
499  pdb_idx = rh.get_index()
500  raw_idx = pdb_idx - 1
501 
502  # add ALA to fill in gaps
503  while len(self.residues)<pdb_idx:
504  r = TempResidue(self,'A',len(self.residues)+1,len(self.residues))
505  self.residues.append(r)
506  self.sequence += 'A'
507 
508  internal_res = self.residues[raw_idx]
509  if len(self.sequence)<raw_idx:
510  self.sequence += IMP.atom.get_one_letter_code(rh.get_residue_type())
511  internal_res.set_structure(rh,soft_check)
512  atomic_res.add(internal_res)
513 
514  self.chain.set_sequence(self.sequence)
515  return atomic_res
516 
518  residues=None,
519  resolutions=[],
520  bead_extra_breaks=[],
521  bead_ca_centers=True,
522  bead_default_coord=[0,0,0],
523  density_residues_per_component=None,
524  density_prefix=None,
525  density_force_compute=False,
526  density_voxel_size=1.0,
527  setup_particles_as_densities=False,
528  ideal_helix=False,
529  color=None):
530  """Set the representation for some residues. Some options (beads, ideal helix)
531  operate along the backbone. Others (density options) are volumetric.
532  Some of these you can combine e.g., beads+densities or helix+densities
533  See @ref pmi_resolution
534  @param residues Set of PMI TempResidues for adding the representation.
535  Can use Molecule slicing to get these, e.g. mol[a:b]+mol[c:d]
536  If None, will select all residues for this Molecule.
537  @param resolutions Resolutions for beads representations.
538  If structured, will average along backbone, breaking at sequence breaks.
539  If unstructured, will just create beads.
540  Pass an integer or list of integers
541  @param bead_extra_breaks Additional breakpoints for splitting beads.
542  The value can be the 0-ordered position, after which it'll insert the break.
543  Alternatively pass PDB-style (1-ordered) indices as a string.
544  I.e., bead_extra_breaks=[5,25] is the same as ['6','26']
545  @param bead_ca_centers Set to True if you want the resolution=1 beads to be at CA centers
546  (otherwise will average atoms to get center). Defaults to True.
547  @param bead_default_coord Advanced feature. Normally beads are placed at the nearest structure.
548  If no structure provided (like an all bead molecule), the beads go here.
549  @param density_residues_per_component Create density (Gaussian Mixture Model)
550  for these residues. Must also supply density_prefix
551  @param density_prefix Prefix (assuming '.txt') to read components from or write to.
552  If exists, will read unless you set density_force_compute=True.
553  Will also write map (prefix+'.mrc').
554  Must also supply density_residues_per_component.
555  @param density_force_compute Set true to force overwrite density file.
556  @param density_voxel_size Advanced feature. Set larger if densities taking too long to rasterize.
557  Set to 0 if you don't want to create the MRC file
558  @param setup_particles_as_densities Set to True if you want each particle to be its own density.
559  Useful for all-atom models or flexible beads.
560  Mutually exclusive with density_ options
561  @param ideal_helix Create idealized helix structures for these residues at resolution 1.
562  Any other resolutions passed will be coarsened from there.
563  Resolution 0 will not work, you may have to use MODELLER to do that (for now).
564  @param color the color applied to the hierarchies generated.
565  Format options: tuple (r,g,b) with values 0 to 1
566  or float (from 0 to 1, a map from Blue to Green to Red)
567  or IMP.display.Color object
568 
569  @note You cannot call add_representation multiple times for the
570  same residues.
571  """
572 
573  # can't customize clones
574  if self.mol_to_clone is not None:
575  raise ValueError('You cannot call add_representation() for a clone.'
576  ' Maybe use a copy instead.')
577 
578  # format input
579  if residues is None:
580  res = IMP.pmi.tools.OrderedSet(self.residues)
581  elif residues==self:
582  res = IMP.pmi.tools.OrderedSet(self.residues)
583  elif type(residues) is IMP.pmi.topology.TempResidue:
584  res = IMP.pmi.tools.OrderedSet([residues])
585  elif hasattr(residues,'__iter__'):
586  if len(residues)==0:
587  raise Exception('You passed an empty set to add_representation')
588  if type(residues) is IMP.pmi.tools.OrderedSet and type(next(iter(residues))) is TempResidue:
589  res = residues
590  elif type(residues) is set and type(next(iter(residues))) is TempResidue:
591  res = IMP.pmi.tools.OrderedSet(residues)
592  elif type(residues) is list and type(residues[0]) is TempResidue:
593  res = IMP.pmi.tools.OrderedSet(residues)
594  else:
595  raise Exception("You passed an iteratible of something other than TempResidue",res)
596  else:
597  raise Exception("add_representation: you must pass a set of residues or nothing(=all residues)")
598 
599  # check that each residue has not been represented yet
600  ov = res & self._represented
601  if ov:
602  raise Exception('You have already added representation for '+self.get_hierarchy().get_name()+': '+ov.__repr__())
603  self._represented|=res
604 
605  # check you aren't creating multiple resolutions without structure
606  if not hasattr(resolutions,'__iter__'):
607  if type(resolutions) is int:
608  resolutions = [resolutions]
609  else:
610  raise Exception("you tried to pass resolutions that are not int or list-of-int")
611  if len(resolutions)>1 and not ideal_helix:
612  for r in res:
613  if not r.get_has_structure():
614  raise Exception('You are creating multiple resolutions for '
615  'unstructured regions. This will have unexpected results.')
616 
617  # check density info is consistent
618  if density_residues_per_component or density_prefix:
619  if not density_residues_per_component and density_prefix:
620  raise Exception('If requesting density, must provide '
621  'density_residues_per_component AND density_prefix')
622  if density_residues_per_component and setup_particles_as_densities:
623  raise Exception('Cannot create both volumetric density '
624  '(density_residues_per_component) AND '
625  'individual densities (setup_particles_as_densities) '
626  'in the same representation')
627  if len(resolutions)>1 and setup_particles_as_densities:
628  raise Exception('You have multiple bead resolutions but are attempting to '
629  'set them all up as individual Densities. '
630  'This could have unexpected results.')
631 
632  # check helix not accompanied by other resolutions (densities OK though!)
633  if ideal_helix:
634  if 0 in resolutions:
635  raise Exception("For ideal helices, cannot build resolution 0: "
636  "you have to do that in MODELLER")
637  if 1 not in resolutions:
638  resolutions = [1] + list(resolutions)
639  self._ideal_helices.append(res)
640 
641  # check residues are all part of this molecule:
642  for r in res:
643  if r.get_molecule()!=self:
644  raise Exception('You are adding residues from a different molecule to',self.__repr__())
645 
646  # unify formatting for extra breaks
647  breaks = []
648  for b in bead_extra_breaks:
649  if type(b)==str:
650  breaks.append(int(b)-1)
651  else:
652  breaks.append(b)
653  # store the representation group
654  self.representations.append(_Representation(res,
655  resolutions,
656  breaks,
657  bead_ca_centers,
658  bead_default_coord,
659  density_residues_per_component,
660  density_prefix,
661  density_force_compute,
662  density_voxel_size,
663  setup_particles_as_densities,
664  ideal_helix,
665  color))
666 
667  def _all_protocol_output(self):
668  return self.state._protocol_output
669 
670  def build(self):
671  """Create all parts of the IMP hierarchy
672  including Atoms, Residues, and Fragments/Representations and,
673  finally, Copies.
674  Will only build requested representations.
675  @note Any residues assigned a resolution must have an IMP.atom.Residue
676  hierarchy containing at least a CAlpha. For missing residues,
677  these can be constructed from the PDB file.
678  """
679  if not self.built:
680  # Add molecule name and sequence to any ProtocolOutput objects
681  name=self.hier.get_name()
682  for po, state in self._all_protocol_output():
683  po.create_component(state, name, True,
684  asym_name=self._name_with_copy)
685  po.add_component_sequence(state, name, self.sequence,
686  asym_name=self._name_with_copy)
687  # if requested, clone structure and representations BEFORE building original
688  if self.mol_to_clone is not None:
689  for nr,r in enumerate(self.mol_to_clone.residues):
690  if r.get_has_structure():
691  clone = IMP.atom.create_clone(r.get_hierarchy())
692  self.residues[nr].set_structure(
693  IMP.atom.Residue(clone),soft_check=True)
694  for old_rep in self.mol_to_clone.representations:
695  new_res = IMP.pmi.tools.OrderedSet()
696  for r in old_rep.residues:
697  new_res.add(self.residues[r.get_internal_index()])
698  self._represented.add(self.residues[r.get_internal_index()])
699  new_rep = _Representation(new_res,
700  old_rep.bead_resolutions,
701  old_rep.bead_extra_breaks,
702  old_rep.bead_ca_centers,
703  old_rep.bead_default_coord,
704  old_rep.density_residues_per_component,
705  old_rep.density_prefix,
706  False,
707  old_rep.density_voxel_size,
708  old_rep.setup_particles_as_densities,
709  old_rep.ideal_helix,
710  old_rep.color)
711  self.representations.append(new_rep)
712  self.coord_finder = self.mol_to_clone.coord_finder
713 
714  # give a warning for all residues that don't have representation
715  no_rep = [r for r in self.residues if r not in self._represented]
716  if len(no_rep)>0:
717  print('WARNING: Residues without representation in molecule',
718  self.get_name(),':',system_tools.resnums2str(no_rep))
719 
720  # first build any ideal helices (fills in structure for the TempResidues)
721  for rep in self.representations:
722  if rep.ideal_helix:
723  _build_ideal_helix(self.model,rep.residues,self.coord_finder)
724 
725  # build all the representations
726  built_reps = []
727  for rep in self.representations:
728  built_reps += system_tools.build_representation(
729  self, rep, self.coord_finder)
730 
731  # sort them before adding as children
732  built_reps.sort(key=lambda r: IMP.atom.Fragment(r).get_residue_indexes()[0])
733  for br in built_reps:
734  self.hier.add_child(br)
735  br.update_parents()
736  self.built = True
737 
738  rephandler = _RepresentationHandler(self._name_with_copy,
739  list(self._all_protocol_output()),
740  self._pdb_elements)
741  for res in self.residues:
742  idx = res.get_index()
743 
744  # first off, store the highest resolution available in residue.hier
745  new_ps = IMP.atom.Selection(
746  self.hier,
747  residue_index=res.get_index(),
748  resolution=1).get_selected_particles()
749  if len(new_ps)>0:
750  new_p = new_ps[0]
751  if IMP.atom.Atom.get_is_setup(new_p):
752  # if only found atomic, store the residue
753  new_hier = IMP.atom.get_residue(IMP.atom.Atom(new_p))
754  else:
755  # otherwise just store what you found
756  new_hier = IMP.atom.Hierarchy(new_p)
757  res.hier = new_hier
758  rephandler(res)
759  else:
760  res.hier = None
761  self._represented = IMP.pmi.tools.OrderedSet([a for a in self._represented])
762  print('done building',self.get_hierarchy())
763  return self.hier
764 
765  def get_particles_at_all_resolutions(self,residue_indexes=None):
766  """Helpful utility for getting particles at all resolutions from this molecule.
767  Can optionally pass a set of residue indexes"""
768  if not self.built:
769  raise Exception("Cannot get all resolutions until you build the Molecule")
770  if residue_indexes is None:
771  residue_indexes = [r.get_index() for r in self.get_residues()]
772  ps = IMP.pmi.tools.select_at_all_resolutions(self.get_hierarchy(),
773  residue_indexes=residue_indexes)
774  return ps
775 
776 #------------------------
777 
778 class _Representation(object):
779  """Private class just to store a representation request"""
780  def __init__(self,
781  residues,
782  bead_resolutions,
783  bead_extra_breaks,
784  bead_ca_centers,
785  bead_default_coord,
786  density_residues_per_component,
787  density_prefix,
788  density_force_compute,
789  density_voxel_size,
790  setup_particles_as_densities,
791  ideal_helix,
792  color):
793  self.residues = residues
794  self.bead_resolutions = bead_resolutions
795  self.bead_extra_breaks = bead_extra_breaks
796  self.bead_ca_centers = bead_ca_centers
797  self.bead_default_coord = bead_default_coord
798  self.density_residues_per_component = density_residues_per_component
799  self.density_prefix = density_prefix
800  self.density_force_compute = density_force_compute
801  self.density_voxel_size = density_voxel_size
802  self.setup_particles_as_densities = setup_particles_as_densities
803  self.ideal_helix = ideal_helix
804  self.color = color
805 
806 class _FindCloseStructure(object):
807  """Utility to get the nearest observed coordinate"""
808  def __init__(self):
809  self.coords=[]
810  def add_residues(self,residues):
811  for r in residues:
812  idx = IMP.atom.Residue(r).get_index()
813  ca = IMP.atom.Selection(r,atom_type=IMP.atom.AtomType("CA")).get_selected_particles()
814  p = IMP.atom.Selection(r,atom_type=IMP.atom.AtomType("P")).get_selected_particles()
815  if len(ca)==1:
816  xyz = IMP.core.XYZ(ca[0]).get_coordinates()
817  self.coords.append([idx,xyz])
818  elif len(p)==1:
819  xyz = IMP.core.XYZ(p[0]).get_coordinates()
820  self.coords.append([idx,xyz])
821  else:
822  raise("_FindCloseStructure: wrong selection")
823 
824  self.coords.sort(key=itemgetter(0))
825  def find_nearest_coord(self,query):
826  if self.coords==[]:
827  return None
828  keys = [r[0] for r in self.coords]
829  pos = bisect_left(keys,query)
830  if pos == 0:
831  ret = self.coords[0]
832  elif pos == len(self.coords):
833  ret = self.coords[-1]
834  else:
835  before = self.coords[pos - 1]
836  after = self.coords[pos]
837  if after[0] - query < query - before[0]:
838  ret = after
839  else:
840  ret = before
841  return ret[1]
842 
843 class Sequences(object):
844  """A dictionary-like wrapper for reading and storing sequence data"""
845  def __init__(self,fasta_fn,name_map=None):
846  """read a fasta file and extract all the requested sequences
847  @param fasta_fn sequence file
848  @param name_map dictionary mapping the fasta name to final stored name
849  """
850  self.sequences = IMP.pmi.tools.OrderedDict()
851  self.read_sequences(fasta_fn,name_map)
852  def __len__(self):
853  return len(self.sequences)
854  def __contains__(self,x):
855  return x in self.sequences
856  def __getitem__(self,key):
857  if type(key) is int:
858  try:
859  allseqs = list(self.sequences.keys())
860  return self.sequences[allseqs[key]]
861  except:
862  raise Exception("You tried to access sequence num",key,"but there's only",len(self.sequences.keys()))
863  else:
864  return self.sequences[key]
865  def __iter__(self):
866  return self.sequences.__iter__()
867  def __repr__(self):
868  ret=''
869  for s in self.sequences:
870  ret += '%s\t%s\n'%(s,self.sequences[s])
871  return ret
872  def read_sequences(self,fasta_fn,name_map=None):
873  code = None
874  seq = None
875  with open(fasta_fn,'r') as fh:
876  for (num, line) in enumerate(fh):
877  if line.startswith('>'):
878  if seq is not None:
879  self.sequences[code] = seq.strip('*')
880  code = line.rstrip()[1:]
881  if name_map is not None:
882  try:
883  code = name_map[code]
884  except:
885  pass
886  seq = ''
887  else:
888  line = line.rstrip()
889  if line: # Skip blank lines
890  if seq is None:
891  raise Exception( \
892  "Found FASTA sequence before first header at line %d: %s" % (num + 1, line))
893  seq += line
894  if seq is not None:
895  self.sequences[code] = seq.strip('*')
896 
897 
898 class PDBSequences(object):
899  """Data_structure for reading and storing sequence data from pdb"""
900  def __init__(self,model,pdb_fn,name_map=None):
901  """read a pdb file and returns all sequences for each contiguous fragment
902  @param pdb_fn file
903  @param name_map dictionary mapping the pdb chain id to final stored name
904  """
905  self.model=model
906  # self.sequences data-structure: (two-key dictionary)
907  # it contains all contiguous fragments:
908  # chain_id, tuples indicating first and last residue, sequence
909  # example:
910  # key1, key2, value
911  # A (77, 149) VENPSLDLEQYAASYSGLMR....
912  # A (160, 505) PALDTAWVEATRKKALLKLEKLDTDLKNYKGNSIK.....
913  # B (30, 180) VDLENQYYNSKALKEDDPKAALSSFQKVLELEGEKGEWGF...
914  # B (192, 443) TQLLEIYALEIQMYTAQKNNKKLKALYEQSLHIKSAIPHPL
915  self.sequences = IMP.pmi.tools.OrderedDict()
916  self.read_sequences(pdb_fn,name_map)
917 
918  @property
919  @IMP.deprecated_method("2.10", "Model should be accessed with `.model`.")
920  def m(self):
921  return self.model
922 
923  def read_sequences(self,pdb_fn,name_map):
924  read_file = IMP.atom.read_pdb
925  if pdb_fn.endswith('.cif'):
926  read_file = IMP.atom.read_mmcif
927  t = read_file(pdb_fn, self.model, IMP.atom.ATOMPDBSelector())
928  cs=IMP.atom.get_by_type(t,IMP.atom.CHAIN_TYPE)
929  for c in cs:
930  id=IMP.atom.Chain(c).get_id()
931  print(id)
932  if name_map:
933  try:
934  id=name_map[id]
935  except:
936  print("Chain ID %s not in name_map, skipping" % id)
937  continue
938  rs=IMP.atom.get_by_type(c,IMP.atom.RESIDUE_TYPE)
939  rids=[]
940  rids_olc_dict={}
941  for r in rs:
942  dr=IMP.atom.Residue(r)
943  rid=dr.get_index()
944 
945  isprotein=dr.get_is_protein()
946  isrna=dr.get_is_rna()
947  isdna=dr.get_is_dna()
948  if isprotein:
949  olc=IMP.atom.get_one_letter_code(dr.get_residue_type())
950  rids.append(rid)
951  rids_olc_dict[rid]=olc
952  elif isdna:
953  if dr.get_residue_type() == IMP.atom.DADE: olc="A"
954  if dr.get_residue_type() == IMP.atom.DURA: olc="U"
955  if dr.get_residue_type() == IMP.atom.DCYT: olc="C"
956  if dr.get_residue_type() == IMP.atom.DGUA: olc="G"
957  if dr.get_residue_type() == IMP.atom.DTHY: olc="T"
958  rids.append(rid)
959  rids_olc_dict[rid]=olc
960  elif isrna:
961  if dr.get_residue_type() == IMP.atom.ADE: olc="A"
962  if dr.get_residue_type() == IMP.atom.URA: olc="U"
963  if dr.get_residue_type() == IMP.atom.CYT: olc="C"
964  if dr.get_residue_type() == IMP.atom.GUA: olc="G"
965  if dr.get_residue_type() == IMP.atom.THY: olc="T"
966  rids.append(rid)
967  rids_olc_dict[rid]=olc
968  group_rids=self.group_indexes(rids)
969  contiguous_sequences=IMP.pmi.tools.OrderedDict()
970  for group in group_rids:
971  sequence_fragment=""
972  for i in range(group[0],group[1]+1):
973  sequence_fragment+=rids_olc_dict[i]
974  contiguous_sequences[group]=sequence_fragment
975  self.sequences[id]=contiguous_sequences
976 
977  #for c in self.sequences:
978  # for group in self.sequences[c]:
979  # print(c,group,self.sequences[c][group])
980 
981  def group_indexes(self,indexes):
982  from itertools import groupby
983  ranges = []
984  for k, g in groupby(enumerate(indexes), lambda x:x[0]-x[1]):
985  group = [x[1] for x in g]
986  ranges.append((group[0], group[-1]))
987  return ranges
988 
989 
990 def fasta_pdb_alignments(fasta_sequences,pdb_sequences,show=False):
991  '''This function computes and prints the alignment between the
992  fasta file and the pdb sequence, computes the offsets for each contiguous
993  fragment in the PDB.
994  @param fasta_sequences IMP.pmi.topology.Sequences object
995  @param pdb_sequences IMP.pmi.topology.PDBSequences object
996  @param show boolean default False, if True prints the alignments.
997  The input objects should be generated using map_name dictionaries such that fasta_id
998  and pdb_chain_id are mapping to the same protein name. It needs BioPython.
999  Returns a dictionary of offsets, organized by peptide range (group):
1000  example: offsets={"ProtA":{(1,10):1,(20,30):10}}'''
1001  from Bio import pairwise2
1002  from Bio.pairwise2 import format_alignment
1003  from Bio.SubsMat import MatrixInfo as matlist
1004  matrix = matlist.blosum62
1005  if type(fasta_sequences) is not IMP.pmi.topology.Sequences:
1006  raise Exception("Fasta sequences not type IMP.pmi.topology.Sequences")
1007  if type(pdb_sequences) is not IMP.pmi.topology.PDBSequences:
1008  raise Exception("pdb sequences not type IMP.pmi.topology.PDBSequences")
1009  offsets=IMP.pmi.tools.OrderedDict()
1010  for name in fasta_sequences.sequences:
1011  print(name)
1012  seq_fasta=fasta_sequences.sequences[name]
1013  if name not in pdb_sequences.sequences:
1014  print("Fasta id %s not in pdb names, aligning against every pdb chain" % name)
1015  pdbnames=pdb_sequences.sequences.keys()
1016  else:
1017  pdbnames=[name]
1018  for pdbname in pdbnames:
1019  for group in pdb_sequences.sequences[pdbname]:
1020  if group[1]-group[0]+1<7:continue
1021  seq_frag_pdb=pdb_sequences.sequences[pdbname][group]
1022  if show:
1023  print("########################")
1024  print(" ")
1025  print("protein name",pdbname)
1026  print("fasta id", name)
1027  print("pdb fragment",group)
1028  align=pairwise2.align.localms(seq_fasta, seq_frag_pdb, 2, -1, -.5, -.1)[0]
1029  for a in [align]:
1030  offset=a[3]+1-group[0]
1031  if show:
1032  print("alignment sequence start-end",(a[3]+1,a[4]+1))
1033  print("offset from pdb to fasta index",offset)
1034  print(format_alignment(*a))
1035  if name not in offsets:
1036  offsets[pdbname]={}
1037  if group not in offsets[pdbname]:
1038  offsets[pdbname][group]=offset
1039  else:
1040  if group not in offsets[pdbname]:
1041  offsets[pdbname][group]=offset
1042  return offsets
1043 
1044 
1045 
1046 #------------------------
1047 
1048 
1049 class TempResidue(object):
1050  """Temporarily stores residue information, even without structure available."""
1051  # Consider implementing __hash__ so you can select.
1052  def __init__(self,molecule,code,index,internal_index,is_nucleic=None):
1053  """setup a TempResidue
1054  @param molecule PMI Molecule to which this residue belongs
1055  @param code one-letter residue type code
1056  @param index PDB index
1057  @param internal_index The number in the sequence
1058  """
1059  #these attributes should be immutable
1060  self.molecule = molecule
1061  self.rtype = IMP.pmi.tools.get_residue_type_from_one_letter_code(code,is_nucleic)
1062  self.pdb_index = index
1063  self.internal_index = internal_index
1064  self.copy_index = IMP.atom.Copy(self.molecule.hier).get_copy_index()
1065  self.state_index = IMP.atom.State(self.molecule.state.hier).get_state_index()
1066  #these are expected to change
1067  self._structured = False
1068  self.hier = IMP.atom.Residue.setup_particle(IMP.Particle(molecule.model),
1069  self.rtype,
1070  index)
1071  def __str__(self):
1072  return str(self.state_index)+"_"+self.molecule.get_name()+"_"+str(self.copy_index)+"_"+self.get_code()+str(self.get_index())
1073  def __repr__(self):
1074  return self.__str__()
1075  def __key(self):
1076  #this returns the immutable attributes only
1077  return (self.state_index, self.molecule, self.copy_index, self.rtype, self.pdb_index, self.internal_index)
1078  def __eq__(self,other):
1079  return type(other)==type(self) and self.__key() == other.__key()
1080  def __hash__(self):
1081  return hash(self.__key())
1082  def get_index(self):
1083  return self.pdb_index
1084  def get_internal_index(self):
1085  return self.internal_index
1086  def get_code(self):
1087  return IMP.atom.get_one_letter_code(self.get_residue_type())
1088  def get_residue_type(self):
1089  return self.rtype
1090  def get_hierarchy(self):
1091  return self.hier
1092  def get_molecule(self):
1093  return self.molecule
1094  def get_has_structure(self):
1095  return self._structured
1096  def set_structure(self,res,soft_check=False):
1097  if res.get_residue_type()!=self.get_residue_type():
1098  if soft_check:
1099  # note from commit a2c13eaa1 we give priority to the FASTA and not the PDB
1100  print('WARNING: Inconsistency between FASTA sequence and PDB sequence. FASTA type',\
1101  self.get_index(),self.hier.get_residue_type(),
1102  'and PDB type',res.get_residue_type())
1103  self.hier.set_residue_type((self.get_residue_type()))
1104  self.rtype = self.get_residue_type()
1105  else:
1106  raise Exception('ERROR: PDB residue index',self.get_index(),'is',
1107  IMP.atom.get_one_letter_code(res.get_residue_type()),
1108  'and sequence residue is',self.get_code())
1109 
1110  for a in res.get_children():
1111  self.hier.add_child(a)
1112  atype = IMP.atom.Atom(a).get_atom_type()
1113  a.get_particle().set_name('Atom %s of residue %i'%(atype.__str__().strip('"'),
1114  self.hier.get_index()))
1115  self._structured = True
1116 
1117 class TopologyReader(object):
1118  """Automatically setup Sytem and Degrees of Freedom with a formatted text file.
1119  The file is read in and each part of the topology is stored as a
1120  ComponentTopology object for input into IMP::pmi::macros::BuildSystem.
1121  The topology file should be in a simple pipe-delimited format:
1122  @code{.txt}
1123 |molecule_name|color|fasta_fn|fasta_id|pdb_fn|chain|residue_range|pdb_offset|bead_size|em_residues_per_gaussian|rigid_body|super_rigid_body|chain_of_super_rigid_bodies|flags|
1124 |Rpb1 |blue |1WCM.fasta|1WCM:A|1WCM.pdb|A|1,1140 |0|10|0|1|1,3|1||
1125 |Rpb1 |blue |1WCM.fasta|1WCM:A|1WCM.pdb|A|1141,1274|0|10|0|2|1,3|1||
1126 |Rpb1 |blue |1WCM.fasta|1WCM:A|1WCM.pdb|A|1275,END |0|10|0|3|1,3|1||
1127 |Rpb2 |red |1WCM.fasta|1WCM:B|1WCM.pdb|B|all |0|10|0|4|2,3|2||
1128 |Rpb2.1 |green |1WCM.fasta|1WCM:B|1WCM.pdb|B|all |0|10|0|4|2,3|2||
1129 
1130  @endcode
1131 
1132  These are the fields you can enter:
1133  - `component_name`: Name of the component (chain). Serves as the parent
1134  hierarchy for this structure.
1135  - `color`: The color used in the output RMF file. Uses chimera names or R,G,B values
1136  - `fasta_fn`: Name of FASTA file containing this component.
1137  - `fasta_id`: String found in FASTA sequence header line. The sequence read
1138  from the file is assumed to be a protein sequence. If it should instead
1139  be treated as RNA or DNA, add an ',RNA' or ',DNA' suffix. For example,
1140  a `fasta_id` of 'myseq,RNA' will read the sequence 'myseq' from the
1141  FASTA file and treat it as RNA.
1142  - `pdb_fn`: Name of PDB or mmCIF file with coordinates (if available).
1143  If left empty, will set up as BEADS (you can also specify "BEADS")
1144  Can also write "IDEAL_HELIX".
1145  - `chain`: Chain ID of this domain in the PDB file.
1146  - `residue_range`: Comma delimited pair defining range.
1147  Can leave empty or use 'all' for entire sequence from PDB file.
1148  The second item in the pair can be END to select the last residue in the
1149  PDB chain.
1150  - `pdb_offset`: Offset to sync PDB residue numbering with FASTA numbering.
1151  - `bead_size`: The size (in residues) of beads used to model areas not
1152  covered by PDB coordinates. These will be automatically built.
1153  - `em_residues`: The number of Gaussians used to model the electron
1154  density of this domain. Set to zero if no EM fitting will be done.
1155  The GMM files will be written to <gmm_dir>/<component_name>_<em_res>.txt (and .mrc)
1156  - `rigid_body`: Leave empty if this object is not in a rigid body.
1157  Otherwise, this is a number corresponding to the rigid body containing
1158  this object. The number itself is just used for grouping things.
1159  - `super_rigid_body`: Like a rigid_body, except things are only occasionally rigid
1160  - `chain_of_super_rigid_bodies` For a polymer, create SRBs from groups.
1161  - `flags` additional flags for advanced options
1162  @note All filenames are relative to the paths specified in the constructor.
1163 
1164  """
1165  def __init__(self,
1166  topology_file,
1167  pdb_dir='./',
1168  fasta_dir='./',
1169  gmm_dir='./'):
1170  """Constructor.
1171  @param topology_file Pipe-delimited file specifying the topology
1172  @param pdb_dir Relative path to the pdb directory
1173  @param fasta_dir Relative path to the fasta directory
1174  @param gmm_dir Relative path to the GMM directory
1175  """
1176  self.topology_file = topology_file
1177  self.molecules = IMP.pmi.tools.OrderedDict() # key=molname, value=TempMolecule
1178  self.pdb_dir = pdb_dir
1179  self.fasta_dir = fasta_dir
1180  self.gmm_dir = gmm_dir
1181  self._components = self.read(topology_file)
1182 
1183  # Preserve old self.component_list for backwards compatibility
1184  @IMP.deprecated_method("2.7",
1185  "Use 'get_components()' instead of 'component_list'.")
1186  def __get_component_list(self): return self._components
1187  component_list = property(__get_component_list)
1188 
1189  def write_topology_file(self,outfile):
1190  with open(outfile, "w") as f:
1191  f.write("|molecule_name|color|fasta_fn|fasta_id|pdb_fn|chain|"
1192  "residue_range|pdb_offset|bead_size|em_residues_per_gaussian|"
1193  "rigid_body|super_rigid_body|chain_of_super_rigid_bodies|\n")
1194  for c in self._components:
1195  output = c.get_str()+'\n'
1196  f.write(output)
1197  return outfile
1198 
1199  def get_components(self, topology_list = "all"):
1200  """ Return list of ComponentTopologies for selected components
1201  @param topology_list List of indices to return"""
1202  if topology_list == "all":
1203  topologies = self._components
1204  else:
1205  topologies=[]
1206  for i in topology_list:
1207  topologies.append(self._components[i])
1208  return topologies
1209 
1210  def get_molecules(self):
1211  return self.molecules
1212 
1213  def read(self, topology_file, append=False):
1214  """Read system components from topology file. append=False will erase
1215  current topology and overwrite with new
1216  """
1217  is_topology = False
1218  is_directories = False
1219  linenum = 1
1220  if append==False:
1221  self._components=[]
1222 
1223  with open(topology_file) as infile:
1224  for line in infile:
1225  if line.lstrip()=="" or line[0]=="#":
1226  continue
1227  elif line.split('|')[1].strip() in ("molecule_name"):
1228  is_topology=True
1229  is_directories = False
1230  old_format = False
1231  continue
1232  elif line.split('|')[1] == "component_name":
1233  is_topology = True
1235  "Old-style topology format (using "
1236  "|component_name|) is deprecated. Please switch to "
1237  "the new-style format (using |molecule_name|)\n")
1238  old_format = True
1239  is_directories = False
1240  continue
1241  elif line.split('|')[1] == "directories":
1243  "Setting directories in the topology file "
1244  "is deprecated. Please do so through the "
1245  "TopologyReader constructor. Note that new-style "
1246  "paths are relative to the current working "
1247  "directory, not the topology file.\n")
1248  is_directories = True
1249  elif is_directories:
1250  fields = line.split('|')
1251  setattr(self, fields[1],
1252  IMP.get_relative_path(topology_file, fields[2]))
1253  if is_topology:
1254  new_component = self._parse_line(line, linenum, old_format)
1255  self._components.append(new_component)
1256  linenum += 1
1257  return self._components
1258 
1259  def _parse_line(self, component_line, linenum, old_format):
1260  """Parse a line of topology values and matches them to their key.
1261  Checks each value for correct syntax
1262  Returns a list of Component objects
1263  fields:
1264  """
1265  c = _Component()
1266  values = [s.strip() for s in component_line.split('|')]
1267  errors = []
1268 
1269  ### Required fields
1270  if old_format:
1271  c.molname = values[1]
1272  c.copyname = ''
1273  c._domain_name = values[2]
1274  c.color = 'blue'
1275  else:
1276  names = values[1].split('.')
1277  if len(names)==1:
1278  c.molname = names[0]
1279  c.copyname = ''
1280  elif len(names)==2:
1281  c.molname = names[0]
1282  c.copyname = names[1]
1283  else:
1284  c.molname = names[0]
1285  c.copyname = names[1]
1286  errors.append("Molecule name should be <molecule.copyID>")
1287  errors.append("For component %s line %d " % (c.molname,linenum))
1288  c._domain_name = c.molname + '.' + c.copyname
1289  colorfields = values[2].split(',')
1290  if len(colorfields)==3:
1291  c.color = [float(x) for x in colorfields]
1292  if any([x>1 for x in c.color]):
1293  c.color=[x/255 for x in c.color]
1294  else:
1295  c.color = values[2]
1296  c._orig_fasta_file = values[3]
1297  c.fasta_file = values[3]
1298  fasta_field = values[4].split(",")
1299  c.fasta_id = fasta_field[0]
1300  c.fasta_flag = None
1301  if len(fasta_field) > 1:
1302  c.fasta_flag = fasta_field[1]
1303  c._orig_pdb_input = values[5]
1304  pdb_input = values[5]
1305  tmp_chain = values[6]
1306  rr = values[7]
1307  offset = values[8]
1308  bead_size = values[9]
1309  emg = values[10]
1310  if old_format:
1311  rbs = srbs = csrbs = ''
1312  else:
1313  rbs = values[11]
1314  srbs = values[12]
1315  csrbs = values[13]
1316 
1317  if c.molname not in self.molecules:
1318  self.molecules[c.molname] = _TempMolecule(c)
1319  else:
1320  # COPY OR DOMAIN
1321  c._orig_fasta_file = self.molecules[c.molname].orig_component.fasta_file
1322  c.fasta_id = self.molecules[c.molname].orig_component.fasta_id
1323  self.molecules[c.molname].add_component(c,c.copyname)
1324 
1325  # now cleanup input
1326  c.fasta_file = os.path.join(self.fasta_dir,c._orig_fasta_file)
1327  if pdb_input=="":
1328  errors.append("PDB must have BEADS, IDEAL_HELIX, or filename")
1329  errors.append("For component %s line %d is not correct"
1330  "|%s| was given." % (c.molname,linenum,pdb_input))
1331  elif pdb_input in ("IDEAL_HELIX","BEADS"):
1332  c.pdb_file = pdb_input
1333  else:
1334  c.pdb_file = os.path.join(self.pdb_dir,pdb_input)
1335 
1336  # PDB chain must be one or two characters
1337  if len(tmp_chain)==1 or len(tmp_chain)==2:
1338  c.chain = tmp_chain
1339  else:
1340  errors.append("PDB Chain identifier must be one or two characters.")
1341  errors.append("For component %s line %d is not correct"
1342  "|%s| was given." % (c.molname,linenum,tmp_chain))
1343 
1344  ### Optional fields
1345  # Residue Range
1346  if rr.strip()=='all' or str(rr)=="":
1347  c.residue_range = None
1348  elif len(rr.split(','))==2 and self._is_int(rr.split(',')[0]) and (self._is_int(rr.split(',')[1]) or rr.split(',')[1] == 'END'):
1349  # Make sure that is residue range is given, there are only two values and they are integers
1350  c.residue_range = (int(rr.split(',')[0]), rr.split(',')[1])
1351  if c.residue_range[1] != 'END':
1352  c.residue_range = (c.residue_range[0], int(c.residue_range[1]))
1353  # Old format used -1 for the last residue
1354  if old_format and c.residue_range[1] == -1:
1355  c.residue_range = (c.residue_range[0], 'END')
1356  else:
1357  errors.append("Residue Range format for component %s line %d is not correct" % (c.molname, linenum))
1358  errors.append("Correct syntax is two comma separated integers: |start_res, end_res|. end_res can also be END to select the last residue in the chain. |%s| was given." % rr)
1359  errors.append("To select all residues, indicate |\"all\"|")
1360 
1361  # PDB Offset
1362  if self._is_int(offset):
1363  c.pdb_offset=int(offset)
1364  elif len(offset)==0:
1365  c.pdb_offset = 0
1366  else:
1367  errors.append("PDB Offset format for component %s line %d is not correct" % (c.molname, linenum))
1368  errors.append("The value must be a single integer. |%s| was given." % offset)
1369 
1370  # Bead Size
1371  if self._is_int(bead_size):
1372  c.bead_size=int(bead_size)
1373  elif len(bead_size)==0:
1374  c.bead_size = 0
1375  else:
1376  errors.append("Bead Size format for component %s line %d is not correct" % (c.molname, linenum))
1377  errors.append("The value must be a single integer. |%s| was given." % bead_size)
1378 
1379  # EM Residues Per Gaussian
1380  if self._is_int(emg):
1381  if int(emg) > 0:
1382  c.density_prefix = os.path.join(self.gmm_dir,c.get_unique_name())
1383  c.gmm_file = c.density_prefix+'.txt'
1384  c.mrc_file = c.density_prefix+'.gmm'
1385 
1386  c.em_residues_per_gaussian=int(emg)
1387  else:
1388  c.em_residues_per_gaussian = 0
1389  elif len(emg)==0:
1390  c.em_residues_per_gaussian = 0
1391  else:
1392  errors.append("em_residues_per_gaussian format for component "
1393  "%s line %d is not correct" % (c.molname, linenum))
1394  errors.append("The value must be a single integer. |%s| was given." % emg)
1395 
1396  # rigid bodies
1397  if len(rbs)>0:
1398  if not self._is_int(rbs):
1399  errors.append("rigid bodies format for component "
1400  "%s line %d is not correct" % (c.molname, linenum))
1401  errors.append("Each RB must be a single integer, or empty. "
1402  "|%s| was given." % rbs)
1403  c.rigid_body = int(rbs)
1404 
1405  # super rigid bodies
1406  if len(srbs)>0:
1407  srbs = srbs.split(',')
1408  for i in srbs:
1409  if not self._is_int(i):
1410  errors.append("super rigid bodies format for component "
1411  "%s line %d is not correct" % (c.molname, linenum))
1412  errors.append("Each SRB must be a single integer. |%s| was given." % srbs)
1413  c.super_rigid_bodies = srbs
1414 
1415  # chain of super rigid bodies
1416  if len(csrbs)>0:
1417  if not self._is_int(csrbs):
1418  errors.append("em_residues_per_gaussian format for component "
1419  "%s line %d is not correct" % (c.molname, linenum))
1420  errors.append("Each CSRB must be a single integer. |%s| was given." % csrbs)
1421  c.chain_of_super_rigid_bodies = csrbs
1422 
1423  # done
1424  if errors:
1425  raise ValueError("Fix Topology File syntax errors and rerun: " \
1426  + "\n".join(errors))
1427  else:
1428  return c
1429 
1430 
1431  def set_gmm_dir(self,gmm_dir):
1432  """Change the GMM dir"""
1433  self.gmm_dir = gmm_dir
1434  for c in self._components:
1435  c.gmm_file = os.path.join(self.gmm_dir,c.get_unique_name()+".txt")
1436  c.mrc_file = os.path.join(self.gmm_dir,c.get_unique_name()+".mrc")
1437  print('new gmm',c.gmm_file)
1438 
1439  def set_pdb_dir(self,pdb_dir):
1440  """Change the PDB dir"""
1441  self.pdb_dir = pdb_dir
1442  for c in self._components:
1443  if not c._orig_pdb_input in ("","None","IDEAL_HELIX","BEADS"):
1444  c.pdb_file = os.path.join(self.pdb_dir,c._orig_pdb_input)
1445 
1446  def set_fasta_dir(self,fasta_dir):
1447  """Change the FASTA dir"""
1448  self.fasta_dir = fasta_dir
1449  for c in self._components:
1450  c.fasta_file = os.path.join(self.fasta_dir,c._orig_fasta_file)
1451 
1452  def _is_int(self, s):
1453  # is this string an integer?
1454  try:
1455  float(s)
1456  return float(s).is_integer()
1457  except ValueError:
1458  return False
1459 
1460  def get_rigid_bodies(self):
1461  """Return list of lists of rigid bodies (as domain name)"""
1462  rbl = defaultdict(list)
1463  for c in self._components:
1464  if c.rigid_body:
1465  rbl[c.rigid_body].append(c.get_unique_name())
1466  return rbl.values()
1467 
1469  """Return list of lists of super rigid bodies (as domain name)"""
1470  rbl = defaultdict(list)
1471  for c in self._components:
1472  for rbnum in c.super_rigid_bodies:
1473  rbl[rbnum].append(c.get_unique_name())
1474  return rbl.values()
1475 
1477  """Return list of lists of chains of super rigid bodies (as domain name)"""
1478  rbl = defaultdict(list)
1479  for c in self._components:
1480  for rbnum in c.chain_of_super_rigid_bodies:
1481  rbl[rbnum].append(c.get_unique_name())
1482  return rbl.values()
1483 
1484 class _TempMolecule(object):
1485  """Store the Components and any requests for copies"""
1486  def __init__(self,init_c):
1487  self.molname = init_c.molname
1488  # key=copy ID, value = list of domains
1489  self.domains = IMP.pmi.tools.OrderedDefaultDict(list)
1490  self.add_component(init_c,init_c.copyname)
1491  self.orig_copyname = init_c.copyname
1492  self.orig_component = self.domains[init_c.copyname][0]
1493  def add_component(self,component,copy_id):
1494  self.domains[copy_id].append(component)
1495  component.domainnum = len(self.domains[copy_id])-1
1496  def __repr__(self):
1497  return ','.join('%s:%i'%(k,len(self.domains[k])) for k in self.domains)
1498 
1499 class _Component(object):
1500  """Stores the components required to build a standard IMP hierarchy
1501  using IMP.pmi.BuildModel()
1502  """
1503  def __init__(self):
1504  self.molname = None
1505  self.copyname = None
1506  self.domainnum = 0
1507  self.fasta_file = None
1508  self._orig_fasta_file = None
1509  self.fasta_id = None
1510  self.fasta_flag = None
1511  self.pdb_file = None
1512  self._orig_pdb_input = None
1513  self.chain = None
1514  self.residue_range = None
1515  self.pdb_offset = 0
1516  self.bead_size = 10
1517  self.em_residues_per_gaussian = 0
1518  self.gmm_file = ''
1519  self.mrc_file = ''
1520  self.density_prefix = ''
1521  self.color = 0.1
1522  self.rigid_body = None
1523  self.super_rigid_bodies = []
1524  self.chain_of_super_rigid_bodies = []
1525 
1526  def _l2s(self,l):
1527  return ",".join("%s" % x for x in l)
1528 
1529  def __repr__(self):
1530  return self.get_str()
1531 
1532  def get_unique_name(self):
1533  return "%s.%s.%i"%(self.molname,self.copyname,self.domainnum)
1534 
1535  def get_str(self):
1536  res_range = self.residue_range
1537  if self.residue_range is None:
1538  res_range = []
1539  name = self.molname
1540  if self.copyname!='':
1541  name += '.'+self.copyname
1542  if self.chain is None:
1543  chain = ' '
1544  else:
1545  chain = self.chain
1546  color=self.color
1547  if isinstance(color, list):
1548  color=','.join([str(x) for x in color])
1549  a= '|'+'|'.join([name,color,self._orig_fasta_file,self.fasta_id,
1550  self._orig_pdb_input,chain,self._l2s(list(res_range)),
1551  str(self.pdb_offset),str(self.bead_size),
1552  str(self.em_residues_per_gaussian),
1553  str(self.rigid_body) if self.rigid_body else '',
1554  self._l2s(self.super_rigid_bodies),
1555  self._l2s(self.chain_of_super_rigid_bodies)])+'|'
1556  return a
1557 
1558  # Preserve old self.name for backwards compatibility
1559  @IMP.deprecated_method("2.7", "Use 'molname' instead of 'name'.")
1560  def __get_name(self): return self.molname
1561  name = property(__get_name)
1562 
1563  # Preserve old self.domain_name for backwards compatibility
1564  @IMP.deprecated_method("2.7",
1565  "Use 'get_unique_name()' instead of 'domain_name'.")
1566  def __get_domain_name(self): return self._domain_name
1567  domain_name = property(__get_domain_name)
1568 
1569 
1571  '''Extends the functionality of IMP.atom.Molecule'''
1572 
1573  def __init__(self,hierarchy):
1574  IMP.atom.Molecule.__init__(self,hierarchy)
1575 
1576  def get_state_index(self):
1577  state = self.get_parent()
1578  return IMP.atom.State(state).get_state_index()
1579 
1580  def get_copy_index(self):
1581  return IMP.atom.Copy(self).get_copy_index()
1582 
1583  def get_extended_name(self):
1584  return self.get_name()+"."+\
1585  str(self.get_copy_index())+\
1586  "."+str(self.get_state_index())
1587 
1588  def get_sequence(self):
1589  return IMP.atom.Chain(self).get_sequence()
1590 
1591  def get_residue_indexes(self):
1593 
1594  def get_residue_segments(self):
1595  return IMP.pmi.tools.Segments(self.get_residue_indexes())
1596 
1597  def get_chain_id(self):
1598  return IMP.atom.Chain(self).get_id()
1599 
1600  def __repr__(self):
1601  s='PMIMoleculeHierarchy '
1602  s+=self.get_name()
1603  s+=" "+"Copy "+str(IMP.atom.Copy(self).get_copy_index())
1604  s+=" "+"State "+str(self.get_state_index())
1605  s+=" "+"N residues "+str(len(self.get_sequence()))
1606  return s
def build
call build on all molecules (automatically makes clones)
Add mass to a particle.
Definition: Mass.h:23
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: Residue.h:155
Hierarchy get_parent() const
Get the parent particle.
def get_atomic_residues
Return a set of TempResidues that have associated structure coordinates.
A decorator to associate a particle with a part of a protein/DNA/RNA.
Definition: Fragment.h:20
Extends the functionality of IMP.atom.Molecule.
def get_residues
Return all modeled TempResidues as a set.
std::string get_unique_name(std::string templ)
Return a unique name produced from the string.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: atom/Atom.h:241
static Atom setup_particle(Model *m, ParticleIndex pi, Atom other)
Definition: atom/Atom.h:242
def build
Build all states.
def __init__
read a fasta file and extract all the requested sequences
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
def __init__
read a pdb file and returns all sequences for each contiguous fragment
def fasta_pdb_alignments
This function computes and prints the alignment between the fasta file and the pdb sequence...
def get_ideal_helices
Returns list of OrderedSets with requested ideal helices.
Miscellaneous utilities.
Definition: tools.py:1
def get_chains_of_super_rigid_bodies
Return list of lists of chains of super rigid bodies (as domain name)
def __init__
The user should not call this directly; instead call State::create_molecule()
void handle_use_deprecated(std::string message)
def residue_range
get residue range from a to b, inclusive.
def get_molecule
Access a molecule by name and copy number.
def __init__
Define a new state.
def add_representation
Set the representation for some residues.
static State setup_particle(Model *m, ParticleIndex pi, unsigned int index)
Definition: State.h:39
The type of an atom.
def create_molecule
Create a new Molecule within this State.
def build
Create all parts of the IMP hierarchy including Atoms, Residues, and Fragments/Representations and...
static Residue setup_particle(Model *m, ParticleIndex pi, ResidueType t, int index, int insertion_code)
Definition: Residue.h:157
char get_one_letter_code(ResidueType c)
Get the 1-letter amino acid code from the residue type.
def get_non_atomic_residues
Return a set of TempResidues that don't have associated structure coordinates.
def get_name
Return this Molecule name.
def get_hierarchy
Return the IMP Hierarchy corresponding to this Molecule.
def get_components
Return list of ComponentTopologies for selected components.
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:72
Stores a named protein chain.
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Fragment.h:46
A decorator for keeping track of copies of a molecule.
Definition: Copy.h:28
Select all non-alternative ATOM records.
Definition: pdb.h:64
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def set_fasta_dir
Change the FASTA dir.
The standard decorator for manipulating molecular structures.
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
Data_structure for reading and storing sequence data from pdb.
def deprecated_method
Python decorator to mark a method as deprecated.
Definition: __init__.py:9703
A decorator for a particle representing an atom.
Definition: atom/Atom.h:234
std::string get_relative_path(std::string base, std::string relative)
Return a path to a file relative to another file.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
def create_clone
Create a Molecule clone (automatically builds same structure and representation)
def add_structure
Read a structure and store the coordinates.
int get_state_index(Hierarchy h)
Walk up the hierarchy to find the current state.
def add_protocol_output
Capture details of the modeling protocol.
def get_molecules
Return a dictionary where key is molecule name and value are the list of all copies of that molecule ...
static Copy setup_particle(Model *m, ParticleIndex pi, Int number)
Definition: Copy.h:42
def read
Read system components from topology file.
def get_state
Return the State containing this Molecule.
A decorator for a residue.
Definition: Residue.h:134
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Automatically setup Sytem and Degrees of Freedom with a formatted text file.
The general base class for IMP exceptions.
Definition: exception.h:49
def get_rigid_bodies
Return list of lists of rigid bodies (as domain name)
Associate an integer "state" index with a hierarchy node.
Definition: State.h:27
VectorD< 3 > Vector3D
Definition: VectorD.h:395
Residue get_residue(Atom d, bool nothrow=false)
Return the Residue containing this atom.
Class to handle individual particles of a Model object.
Definition: Particle.h:41
Stores a list of Molecules all with the same State index.
def get_represented
Return set of TempResidues that have representation.
Store info for a chain of a protein.
Definition: Chain.h:61
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
Python classes to represent, score, sample and analyze models.
A dictionary-like wrapper for reading and storing sequence data.
def create_copy
Create a new Molecule with the same name and sequence but a higher copy number.
Functionality for loading, creating, manipulating and scoring atomic structures.
std::string get_chain_id(Hierarchy h)
Walk up the hierarchy to determine the chain id.
def get_particles_at_all_resolutions
Helpful utility for getting particles at all resolutions from this molecule.
static Chain setup_particle(Model *m, ParticleIndex pi, std::string id)
Definition: Chain.h:81
A decorator for a molecule.
Definition: Molecule.h:24
Select hierarchy particles identified by the biological name.
Definition: Selection.h:66
def get_number_of_states
Returns the total number of states generated.
def get_super_rigid_bodies
Return list of lists of super rigid bodies (as domain name)
def get_residue_indexes
Retrieve the residue indexes for the given particle.
Definition: tools.py:1063
Temporarily stores residue information, even without structure available.
def create_state
Makes and returns a new IMP.pmi.topology.State in this system.