IMP logo
IMP Reference Guide  2.9.0
The Integrative Modeling Platform
pmi/topology/__init__.py
1 """@namespace IMP.pmi.topology
2 Set of python classes to create a multi-state, multi-resolution IMP hierarchy.
3 * Start by creating a System with `mdl = IMP.Model(); s = IMP.pmi.topology.System(mdl)`. The System will store all the states.
4 * Then call System.create_state(). You can easily create a multistate system by calling this function multiples times.
5 * For each State, call State.create_molecule() to add a Molecule (a uniquely named polymer). This function returns the Molecule object which can be passed to various PMI functions.
6 * Some useful functions to help you set up your Molecules:
7  * Access the sequence residues with slicing (Molecule[a:b]) or functions like Molecule.get_atomic_residues() and Molecule.get_non_atomic_residues(). These functions all return python sets for easy set arithmetic using & (and), | (or), - (difference)
8  * Molecule.add_structure() to add structural information from a PDB file.
9  * Molecule.add_representation() to create a representation unit - here you can choose bead resolutions as well as alternate representations like densities or ideal helices.
10  * Molecule.create_clone() lets you set up a molecule with identical representations, just a different chain ID. Use Molecule.create_copy() if you want a molecule with the same sequence but that allows custom representations.
11 * Once data has been added and representations chosen, call System.build() to create a canonical IMP hierarchy.
12 * Following hierarchy construction, setup rigid bodies, flexible beads, etc in IMP::pmi::dof.
13 * Check your representation with a nice printout: IMP::atom::show_with_representation()
14 See a [comprehensive example](https://integrativemodeling.org/nightly/doc/ref/pmi_2multiscale_8py-example.html) for using these classes.
15 
16 Alternatively one can construct the entire topology and degrees of freedom via formatted text file with TopologyReader and IMP::pmi::macros::BuildSystem(). This is used in the [PMI tutorial](@ref rnapolii_stalk).
17 Note that this only allows a limited set of the full options available to PMI users (rigid bodies only, fixed resolutions).
18 """
19 
20 from __future__ import print_function
21 import IMP
22 import IMP.atom
23 import IMP.algebra
24 import IMP.pmi
25 import IMP.pmi.tools
26 import csv
27 import os
28 from collections import defaultdict
29 from . import system_tools
30 from bisect import bisect_left
31 from math import pi,cos,sin
32 from operator import itemgetter
33 
34 def _build_ideal_helix(mdl, residues, coord_finder):
35  """Creates an ideal helix from the specified residue range
36  Residues MUST be contiguous.
37  This function actually adds them to the TempResidue hierarchy
38  """
39  created_hiers = []
40 
41  # this function creates a CAlpha helix structure (which can be used for coarsening)
42  for n, tempres in enumerate(residues):
43  if tempres.get_has_structure():
44  raise Exception("You tried to build ideal_helix for a residue "
45  "that already has structure:",tempres)
46  if n>0 and (not tempres.get_index()==prev_idx+1):
47  raise Exception("Passed non-contiguous segment to build_ideal_helix for",tempres.get_molecule())
48 
49  # New residue particle will replace the TempResidue's existing (empty) hierarchy
50  rp = IMP.Particle(mdl)
51  rp.set_name("Residue_%i" % tempres.get_index())
52 
53  # Copy the original residue type and index
54  this_res = IMP.atom.Residue.setup_particle(rp,tempres.get_hierarchy())
55 
56  # Create the CAlpha
57  ap = IMP.Particle(mdl)
59  x = 2.3 * cos(n * 2 * pi / 3.6)
60  y = 2.3 * sin(n * 2 * pi / 3.6)
61  z = 6.2 / 3.6 / 2 * n * 2 * pi / 3.6
62  d.set_coordinates(IMP.algebra.Vector3D(x, y, z))
63  d.set_radius(1.0)
64  a = IMP.atom.Atom.setup_particle(ap, IMP.atom.AT_CA) # Decorating as Atom also decorates as Mass
65  IMP.atom.Mass(ap).set_mass(110.0)
66  this_res.add_child(a)
67 
68  # Add this structure to the TempResidue
69  tempres.set_structure(this_res)
70  created_hiers.append(this_res)
71  prev_idx = tempres.get_index()
72  coord_finder.add_residues(created_hiers) #the coord finder is for placing beads (later)
73 
74 class _SystemBase(object):
75  """The base class for System, State and Molecule
76  classes. It contains shared functions in common to these classes
77  """
78 
79  def __init__(self,mdl=None):
80  if mdl is None:
81  self.mdl=IMP.Model()
82  else:
83  self.mdl=mdl
84 
85  def _create_hierarchy(self):
86  """create a new hierarchy"""
87  tmp_part=IMP.Particle(self.mdl)
88  return IMP.atom.Hierarchy.setup_particle(tmp_part)
89 
90  def _create_child(self,parent_hierarchy):
91  """create a new hierarchy, set it as child of the input
92  one, and return it"""
93  child_hierarchy=self._create_hierarchy()
94  parent_hierarchy.add_child(child_hierarchy)
95  return child_hierarchy
96 
97  def build(self):
98  """Build the coordinates of the system.
99  Loop through stored(?) hierarchies and set up coordinates!"""
100  pass
101 
102 #------------------------
103 
104 class System(_SystemBase):
105  """This class initializes the root node of the global IMP.atom.Hierarchy."""
106  def __init__(self,mdl=None,name="System"):
107  _SystemBase.__init__(self,mdl)
108  self._number_of_states = 0
109  self.states = []
110  self.built=False
111 
112  # the root hierarchy node
113  self.hier=self._create_hierarchy()
114  self.hier.set_name(name)
115 
116  def get_states(self):
117  return self.states
118 
119  def create_state(self):
120  """returns a new IMP.pmi.representation_new.State(), increment the state index"""
121  self._number_of_states+=1
122  state = State(self,self._number_of_states-1)
123  self.states.append(state)
124  return state
125 
126  def __repr__(self):
127  return self.hier.get_name()
128 
130  """returns the total number of states generated"""
131  return self._number_of_states
132 
133  def get_hierarchy(self):
134  return self.hier
135 
136  def build(self,**kwargs):
137  """call build on all states"""
138  if not self.built:
139  for state in self.states:
140  state.build(**kwargs)
141  self.built=True
142  return self.hier
143 
144 #------------------------
145 
146 class State(_SystemBase):
147  """Stores a list of Molecules all with the same State index.
148  Also stores number of copies of each Molecule for easy Selection.
149  """
150  def __init__(self,system,state_index):
151  """Define a new state
152  @param system the PMI System
153  @param state_index the index of the new state
154  \note It's expected that you will not use this constructor directly,
155  but rather create it with pmi::System::create_molecule()
156  """
157  self.mdl = system.get_hierarchy().get_model()
158  self.system = system
159  self.hier = self._create_child(system.get_hierarchy())
160  self.hier.set_name("State_"+str(state_index))
161  self.molecules = defaultdict(list) # key is molecule name. value are the molecule copies!
162  IMP.atom.State.setup_particle(self.hier,state_index)
163  self.built = False
164 
165  def __repr__(self):
166  return self.system.__repr__()+'.'+self.hier.get_name()
167 
168  def get_molecules(self):
169  """Return a dictionary where key is molecule name and value
170  are the list of all copies of that molecule in setup order"""
171  return self.molecules
172 
173  def get_molecule(self,name,copy_num=0):
174  """Access a molecule by name and copy number
175  @param name The molecule name used during setup
176  @param copy_num The copy number based on input order.
177  Default: 0. Set to 'all' to get all copies
178  """
179  if name not in self.molecules:
180  raise Exception("get_molecule() could not find molname",name)
181  if copy_num=='all':
182  return self.molecules[name]
183  else:
184  if copy_num>len(self.molecules[name])-1:
185  raise Exception("get_molecule() copy number is too high:",copy_num)
186  return self.molecules[name][copy_num]
187 
188  def create_molecule(self,name,sequence='',chain_id='',is_nucleic=None):
189  """Create a new Molecule within this State
190  @param name the name of the molecule (string) it must not
191  be already used
192  @param sequence sequence (string)
193  @param chain_id Chain id to assign to this molecule
194  """
195  # check whether the molecule name is already assigned
196  if name in self.molecules:
197  raise Exception('Cannot use a molecule name already used')
198 
199  mol = Molecule(self,name,sequence,chain_id,copy_num=0,is_nucleic=is_nucleic)
200  self.molecules[name].append(mol)
201  return mol
202 
203  def get_hierarchy(self):
204  return self.hier
205 
206  def get_number_of_copies(self,molname):
207  return len(self.molecules[molname])
208 
209  def _register_copy(self,molecule):
210  molname = molecule.get_hierarchy().get_name()
211  if molname not in self.molecules:
212  raise Exception("Trying to add a copy when the original doesn't exist!")
213  self.molecules[molname].append(molecule)
214 
215  def build(self,**kwargs):
216  """call build on all molecules (automatically makes clones)"""
217  if not self.built:
218  for molname in self.molecules:
219  for mol in reversed(self.molecules[molname]):
220  mol.build(**kwargs)
221  self.built=True
222  return self.hier
223 
224 #------------------------
225 
226 class Molecule(_SystemBase):
227  """Stores a named protein chain.
228  This class is constructed from within the State class.
229  It wraps an IMP.atom.Molecule and IMP.atom.Copy
230  Structure is read using this class
231  Resolutions and copies can be registered, but are only created when build() is called
232  """
233 
234  def __init__(self,state,name,sequence,chain_id,copy_num,mol_to_clone=None,is_nucleic=None):
235  """The user should not call this directly; instead call State::create_molecule()
236  @param state The parent PMI State
237  @param name The name of the molecule (string)
238  @param sequence Sequence (string)
239  @param chain_id The chain of this molecule
240  @param copy_num Store the copy number
241  @param mol_to_clone The original molecule (for cloning ONLY)
242  \note It's expected that you will not use this constructor directly,
243  but rather create a Molecule with pmi::State::create_molecule()
244  """
245  # internal data storage
246  self.mdl = state.get_hierarchy().get_model()
247  self.state = state
248  self.sequence = sequence
249  self.built = False
250  self.mol_to_clone = mol_to_clone
251  self.is_nucleic=is_nucleic
252  self.representations = [] # list of stuff to build
253  self._represented = IMP.pmi.tools.OrderedSet() # residues with representation
254  self.coord_finder = _FindCloseStructure() # helps you place beads by storing structure
255  self._ideal_helices = [] # list of OrderedSets of tempresidues set to ideal helix
256 
257  # create root node and set it as child to passed parent hierarchy
258  self.hier = self._create_child(self.state.get_hierarchy())
259  self.hier.set_name(name)
260  IMP.atom.Copy.setup_particle(self.hier,copy_num)
261  # store the sequence
262  self.chain=IMP.atom.Chain.setup_particle(self.hier,chain_id)
263  self.chain.set_sequence(self.sequence)
264  # create TempResidues from the sequence (if passed)
265  self.residues=[]
266  for ns,s in enumerate(sequence):
267  r = TempResidue(self,s,ns+1,ns,is_nucleic)
268  self.residues.append(r)
269 
270  def __repr__(self):
271  return self.state.__repr__()+'.'+self.get_name()+'.'+ \
272  str(IMP.atom.Copy(self.hier).get_copy_index())
273 
274  def __getitem__(self,val):
275  if isinstance(val,int):
276  return self.residues[val]
277  elif isinstance(val,str):
278  return self.residues[int(val)-1]
279  elif isinstance(val,slice):
280  return IMP.pmi.tools.OrderedSet(self.residues[val])
281  else:
282  print("ERROR: range ends must be int or str. Stride must be int.")
283 
284  def get_hierarchy(self):
285  """Return the IMP Hierarchy corresponding to this Molecule"""
286  return self.hier
287 
288  def get_name(self):
289  """Return this Molecule name"""
290  return self.hier.get_name()
291 
292  def get_state(self):
293  """Return the State containing this Molecule"""
294  return self.state
295 
296  def get_ideal_helices(self):
297  """Returns list of OrderedSets with requested ideal helices"""
298  return self._ideal_helices
299 
300  def residue_range(self,a,b,stride=1):
301  """get residue range from a to b, inclusive.
302  Use integers to get 0-indexing, or strings to get PDB-indexing"""
303  if isinstance(a,int) and isinstance(b,int) and isinstance(stride,int):
304  return IMP.pmi.tools.OrderedSet(self.residues[a:b+1:stride])
305  elif isinstance(a,str) and isinstance(b,str) and isinstance(stride,int):
306  return IMP.pmi.tools.OrderedSet(self.residues[int(a)-1:int(b):stride])
307  else:
308  print("ERROR: range ends must be int or str. Stride must be int.")
309 
310  def get_residues(self):
311  """ Return all modeled TempResidues as a set"""
312  all_res = IMP.pmi.tools.OrderedSet(self.residues)
313  return all_res
314 
315  def get_represented(self):
316  """ Return set of TempResidues that have representation"""
317  return self._represented
318 
320  """ Return a set of TempResidues that have associated structure coordinates """
321  atomic_res = IMP.pmi.tools.OrderedSet()
322  for res in self.residues:
323  if res.get_has_structure():
324  atomic_res.add(res)
325  return atomic_res
326 
328  """ Return a set of TempResidues that don't have associated structure coordinates """
329  non_atomic_res = IMP.pmi.tools.OrderedSet()
330  for res in self.residues:
331  if not res.get_has_structure():
332  non_atomic_res.add(res)
333  return non_atomic_res
334 
335  def create_copy(self,chain_id):
336  """Create a new Molecule with the same name and sequence but a higher copy number.
337  Returns the Molecule. No structure or representation will be copied!
338  @param chain_id Chain ID of the new molecule
339  """
340  mol = Molecule(self.state,self.get_name(),self.sequence,chain_id,
341  copy_num=self.state.get_number_of_copies(self.get_name()))
342  self.state._register_copy(mol)
343  return mol
344 
345  def create_clone(self,chain_id):
346  """Create a Molecule clone (automatically builds same structure and representation)
347  @param chain_id If you want to set the chain ID of the copy to something
348  \note You cannot add structure or representations to a clone!
349  """
350  mol = Molecule(self.state,self.get_name(),self.sequence,chain_id,
351  copy_num=self.state.get_number_of_copies(self.get_name()),
352  mol_to_clone=self)
353  self.state._register_copy(mol)
354  return mol
355 
356  def add_structure(self,pdb_fn,chain_id,res_range=[],
357  offset=0,model_num=None,ca_only=False,
358  soft_check=False):
359  """Read a structure and store the coordinates.
360  Returns the atomic residues (as a set)
361  @param pdb_fn The file to read
362  @param chain_id Chain ID to read
363  @param res_range Add only a specific set of residues from the PDB file.
364  res_range[0] is the starting and res_range[1] is the ending residue index.
365  @param offset Apply an offset to the residue indexes of the PDB file.
366  This number is added to the PDB sequence.
367  @param model_num Read multi-model PDB and return that model
368  @param ca_only Only read the CA positions from the PDB file
369  @param soft_check If True, it only warns if there are sequence mismatches between the pdb and
370  the Molecules sequence. Actually replaces the fasta values.
371  If False (Default), it raises and exit when there are sequence mismatches.
372  \note If you are adding structure without a FASTA file, set soft_check to True
373  """
374  if self.mol_to_clone is not None:
375  raise Exception('You cannot call add_structure() for a clone')
376 
377  self.pdb_fn = pdb_fn
378 
379  # get IMP.atom.Residues from the pdb file
380  rhs = system_tools.get_structure(self.mdl,pdb_fn,chain_id,res_range,offset,ca_only=ca_only)
381  self.coord_finder.add_residues(rhs)
382 
383  if len(self.residues)==0:
384  print("WARNING: Substituting PDB residue type with FASTA residue type. Potentially dangerous.")
385 
386  # load those into TempResidue object
387  atomic_res = IMP.pmi.tools.OrderedSet() # collect integer indexes of atomic residues to return
388  for nrh,rh in enumerate(rhs):
389  pdb_idx = rh.get_index()
390  raw_idx = pdb_idx - 1
391 
392  # add ALA to fill in gaps
393  while len(self.residues)<pdb_idx:
394  r = TempResidue(self,'A',len(self.residues)+1,len(self.residues))
395  self.residues.append(r)
396  self.sequence += 'A'
397 
398  internal_res = self.residues[raw_idx]
399  if len(self.sequence)<raw_idx:
400  self.sequence += IMP.atom.get_one_letter_code(rh.get_residue_type())
401  internal_res.set_structure(rh,soft_check)
402  atomic_res.add(internal_res)
403 
404  self.chain.set_sequence(self.sequence)
405  return atomic_res
406 
408  residues=None,
409  resolutions=[],
410  bead_extra_breaks=[],
411  bead_ca_centers=True,
412  bead_default_coord=[0,0,0],
413  density_residues_per_component=None,
414  density_prefix=None,
415  density_force_compute=False,
416  density_voxel_size=1.0,
417  setup_particles_as_densities=False,
418  ideal_helix=False,
419  color=None):
420  """Set the representation for some residues. Some options (beads, ideal helix)
421  operate along the backbone. Others (density options) are volumetric.
422  Some of these you can combine e.g., beads+densities or helix+densities
423  See @ref pmi_resolution
424  @param residues Set of PMI TempResidues for adding the representation.
425  Can use Molecule slicing to get these, e.g. mol[a:b]+mol[c:d]
426  If None, will select all residues for this Molecule.
427  @param resolutions Resolutions for beads representations.
428  If structured, will average along backbone, breaking at sequence breaks.
429  If unstructured, will just create beads.
430  Pass an integer or list of integers
431  @param bead_extra_breaks Additional breakpoints for splitting beads.
432  The value can be the 0-ordered position, after which it'll insert the break.
433  Alternatively pass PDB-style (1-ordered) indices as a string.
434  I.e., bead_extra_breaks=[5,25] is the same as ['6','26']
435  @param bead_ca_centers Set to True if you want the resolution=1 beads to be at CA centers
436  (otherwise will average atoms to get center). Defaults to True.
437  @param bead_default_coord Advanced feature. Normally beads are placed at the nearest structure.
438  If no structure provided (like an all bead molecule), the beads go here.
439  @param density_residues_per_component Create density (Gaussian Mixture Model)
440  for these residues. Must also supply density_prefix
441  @param density_prefix Prefix (assuming '.txt') to read components from or write to.
442  If exists, will read unless you set density_force_compute=True.
443  Will also write map (prefix+'.mrc').
444  Must also supply density_residues_per_component.
445  @param density_force_compute Set true to force overwrite density file.
446  @param density_voxel_size Advanced feature. Set larger if densities taking too long to rasterize.
447  Set to 0 if you don't want to create the MRC file
448  @param setup_particles_as_densities Set to True if you want each particle to be its own density.
449  Useful for all-atom models or flexible beads.
450  Mutually exclusive with density_ options
451  @param ideal_helix Create idealized helix structures for these residues at resolution 1.
452  Any other resolutions passed will be coarsened from there.
453  Resolution 0 will not work, you may have to use MODELLER to do that (for now).
454  @param color the color applied to the hierarchies generated.
455  Format options: tuple (r,g,b) with values 0 to 1
456  or float (from 0 to 1, a map from Blue to Green to Red)
457  or IMP.display.Color object
458 
459  \note You cannot call add_representation multiple times for the same residues.
460  """
461 
462  # can't customize clones
463  if self.mol_to_clone is not None:
464  raise Exception('You cannot call add_representation() for a clone.'
465  'Maybe use a copy instead')
466 
467  # format input
468  if residues is None:
469  res = IMP.pmi.tools.OrderedSet(self.residues)
470  elif residues==self:
471  res = IMP.pmi.tools.OrderedSet(self.residues)
472  elif type(residues) is IMP.pmi.topology.TempResidue:
473  res = IMP.pmi.tools.OrderedSet([residues])
474  elif hasattr(residues,'__iter__'):
475  if len(residues)==0:
476  raise Exception('You passed an empty set to add_representation')
477  if type(residues) is IMP.pmi.tools.OrderedSet and type(next(iter(residues))) is TempResidue:
478  res = residues
479  elif type(residues) is set and type(next(iter(residues))) is TempResidue:
480  res = IMP.pmi.tools.OrderedSet(residues)
481  elif type(residues) is list and type(residues[0]) is TempResidue:
482  res = IMP.pmi.tools.OrderedSet(residues)
483  else:
484  raise Exception("You passed an iteratible of something other than TempResidue",res)
485  else:
486  raise Exception("add_representation: you must pass a set of residues or nothing(=all residues)")
487 
488  # check that each residue has not been represented yet
489  ov = res & self._represented
490  if ov:
491  raise Exception('You have already added representation for '+self.get_hierarchy().get_name()+': '+ov.__repr__())
492  self._represented|=res
493 
494  # check you aren't creating multiple resolutions without structure
495  if not hasattr(resolutions,'__iter__'):
496  if type(resolutions) is int:
497  resolutions = [resolutions]
498  else:
499  raise Exception("you tried to pass resolutions that are not int or list-of-int")
500  if len(resolutions)>1 and not ideal_helix:
501  for r in res:
502  if not r.get_has_structure():
503  raise Exception('You are creating multiple resolutions for '
504  'unstructured regions. This will have unexpected results.')
505 
506  # check density info is consistent
507  if density_residues_per_component or density_prefix:
508  if not density_residues_per_component and density_prefix:
509  raise Exception('If requesting density, must provide '
510  'density_residues_per_component AND density_prefix')
511  if density_residues_per_component and setup_particles_as_densities:
512  raise Exception('Cannot create both volumetric density '
513  '(density_residues_per_component) AND '
514  'individual densities (setup_particles_as_densities) '
515  'in the same representation')
516  if len(resolutions)>1 and setup_particles_as_densities:
517  raise Exception('You have multiple bead resolutions but are attempting to '
518  'set them all up as individual Densities. '
519  'This could have unexpected results.')
520 
521  # check helix not accompanied by other resolutions (densities OK though!)
522  if ideal_helix:
523  if 0 in resolutions:
524  raise Exception("For ideal helices, cannot build resolution 0: "
525  "you have to do that in MODELLER")
526  if 1 not in resolutions:
527  resolutions = [1] + list(resolutions)
528  self._ideal_helices.append(res)
529 
530  # check residues are all part of this molecule:
531  for r in res:
532  if r.get_molecule()!=self:
533  raise Exception('You are adding residues from a different molecule to',self.__repr__())
534 
535  # unify formatting for extra breaks
536  breaks = []
537  for b in bead_extra_breaks:
538  if type(b)==str:
539  breaks.append(int(b)-1)
540  else:
541  breaks.append(b)
542  # store the representation group
543  self.representations.append(_Representation(res,
544  resolutions,
545  breaks,
546  bead_ca_centers,
547  bead_default_coord,
548  density_residues_per_component,
549  density_prefix,
550  density_force_compute,
551  density_voxel_size,
552  setup_particles_as_densities,
553  ideal_helix,
554  color))
555 
556  def build(self):
557  """Create all parts of the IMP hierarchy
558  including Atoms, Residues, and Fragments/Representations and, finally, Copies
559  Will only build requested representations.
560  /note Any residues assigned a resolution must have an IMP.atom.Residue hierarchy
561  containing at least a CAlpha. For missing residues, these can be constructed
562  from the PDB file
563  """
564  if not self.built:
565  # if requested, clone structure and representations BEFORE building original
566  if self.mol_to_clone is not None:
567  for nr,r in enumerate(self.mol_to_clone.residues):
568  if r.get_has_structure():
569  clone = IMP.atom.create_clone(r.get_hierarchy())
570  self.residues[nr].set_structure(
571  IMP.atom.Residue(clone),soft_check=True)
572  for old_rep in self.mol_to_clone.representations:
573  new_res = IMP.pmi.tools.OrderedSet()
574  for r in old_rep.residues:
575  new_res.add(self.residues[r.get_internal_index()])
576  self._represented.add(self.residues[r.get_internal_index()])
577  new_rep = _Representation(new_res,
578  old_rep.bead_resolutions,
579  old_rep.bead_extra_breaks,
580  old_rep.bead_ca_centers,
581  old_rep.bead_default_coord,
582  old_rep.density_residues_per_component,
583  old_rep.density_prefix,
584  False,
585  old_rep.density_voxel_size,
586  old_rep.setup_particles_as_densities,
587  old_rep.ideal_helix,
588  old_rep.color)
589  self.representations.append(new_rep)
590  self.coord_finder = self.mol_to_clone.coord_finder
591 
592  # give a warning for all residues that don't have representation
593  no_rep = [r for r in self.residues if r not in self._represented]
594  if len(no_rep)>0:
595  print('WARNING: Residues without representation in molecule',
596  self.get_name(),':',system_tools.resnums2str(no_rep))
597 
598  # first build any ideal helices (fills in structure for the TempResidues)
599  for rep in self.representations:
600  if rep.ideal_helix:
601  _build_ideal_helix(self.mdl,rep.residues,self.coord_finder)
602 
603  # build all the representations
604  built_reps = []
605  for rep in self.representations:
606  built_reps += system_tools.build_representation(self.hier,rep,self.coord_finder)
607 
608  # sort them before adding as children
609  built_reps.sort(key=lambda r: IMP.atom.Fragment(r).get_residue_indexes()[0])
610  for br in built_reps:
611  self.hier.add_child(br)
612  br.update_parents()
613  self.built = True
614 
615  for res in self.residues:
616  idx = res.get_index()
617 
618  # first off, store the highest resolution available in residue.hier
619  new_ps = IMP.atom.Selection(
620  self.hier,
621  residue_index=res.get_index(),
622  resolution=1).get_selected_particles()
623  if len(new_ps)>0:
624  new_p = new_ps[0]
625  if IMP.atom.Atom.get_is_setup(new_p):
626  # if only found atomic, store the residue
627  new_hier = IMP.atom.get_residue(IMP.atom.Atom(new_p))
628  else:
629  # otherwise just store what you found
630  new_hier = IMP.atom.Hierarchy(new_p)
631  res.hier = new_hier
632  else:
633  res.hier = None
634  self._represented = IMP.pmi.tools.OrderedSet([a for a in self._represented])
635  print('done building',self.get_hierarchy())
636  return self.hier
637 
638  def get_particles_at_all_resolutions(self,residue_indexes=None):
639  """Helpful utility for getting particles at all resolutions from this molecule.
640  Can optionally pass a set of residue indexes"""
641  if not self.built:
642  raise Exception("Cannot get all resolutions until you build the Molecule")
643  if residue_indexes is None:
644  residue_indexes = [r.get_index() for r in self.get_residues()]
646  residue_indexes=residue_indexes)
647  return ps
648 
649 #------------------------
650 
651 class _Representation(object):
652  """Private class just to store a representation request"""
653  def __init__(self,
654  residues,
655  bead_resolutions,
656  bead_extra_breaks,
657  bead_ca_centers,
658  bead_default_coord,
659  density_residues_per_component,
660  density_prefix,
661  density_force_compute,
662  density_voxel_size,
663  setup_particles_as_densities,
664  ideal_helix,
665  color):
666  self.residues = residues
667  self.bead_resolutions = bead_resolutions
668  self.bead_extra_breaks = bead_extra_breaks
669  self.bead_ca_centers = bead_ca_centers
670  self.bead_default_coord = bead_default_coord
671  self.density_residues_per_component = density_residues_per_component
672  self.density_prefix = density_prefix
673  self.density_force_compute = density_force_compute
674  self.density_voxel_size = density_voxel_size
675  self.setup_particles_as_densities = setup_particles_as_densities
676  self.ideal_helix = ideal_helix
677  self.color = color
678 
679 class _FindCloseStructure(object):
680  """Utility to get the nearest observed coordinate"""
681  def __init__(self):
682  self.coords=[]
683  def add_residues(self,residues):
684  for r in residues:
685  idx = IMP.atom.Residue(r).get_index()
686  ca = IMP.atom.Selection(r,atom_type=IMP.atom.AtomType("CA")).get_selected_particles()
687  p = IMP.atom.Selection(r,atom_type=IMP.atom.AtomType("P")).get_selected_particles()
688  if len(ca)==1:
689  xyz = IMP.core.XYZ(ca[0]).get_coordinates()
690  self.coords.append([idx,xyz])
691  elif len(p)==1:
692  xyz = IMP.core.XYZ(p[0]).get_coordinates()
693  self.coords.append([idx,xyz])
694  else:
695  raise("_FindCloseStructure: wrong selection")
696 
697  self.coords.sort(key=itemgetter(0))
698  def find_nearest_coord(self,query):
699  if self.coords==[]:
700  return None
701  keys = [r[0] for r in self.coords]
702  pos = bisect_left(keys,query)
703  if pos == 0:
704  ret = self.coords[0]
705  elif pos == len(self.coords):
706  ret = self.coords[-1]
707  else:
708  before = self.coords[pos - 1]
709  after = self.coords[pos]
710  if after[0] - query < query - before[0]:
711  ret = after
712  else:
713  ret = before
714  return ret[1]
715 
716 class Sequences(object):
717  """A dictionary-like wrapper for reading and storing sequence data"""
718  def __init__(self,fasta_fn,name_map=None):
719  """read a fasta file and extract all the requested sequences
720  @param fasta_fn sequence file
721  @param name_map dictionary mapping the fasta name to final stored name
722  """
723  self.sequences = IMP.pmi.tools.OrderedDict()
724  self.read_sequences(fasta_fn,name_map)
725  def __len__(self):
726  return len(self.sequences)
727  def __contains__(self,x):
728  return x in self.sequences
729  def __getitem__(self,key):
730  if type(key) is int:
731  try:
732  allseqs = list(self.sequences.keys())
733  return self.sequences[allseqs[key]]
734  except:
735  raise Exception("You tried to access sequence num",key,"but there's only",len(self.sequences.keys()))
736  else:
737  return self.sequences[key]
738  def __iter__(self):
739  return self.sequences.__iter__()
740  def __repr__(self):
741  ret=''
742  for s in self.sequences:
743  ret += '%s\t%s\n'%(s,self.sequences[s])
744  return ret
745  def read_sequences(self,fasta_fn,name_map=None):
746  code = None
747  seq = None
748  with open(fasta_fn,'r') as fh:
749  for (num, line) in enumerate(fh):
750  if line.startswith('>'):
751  if seq is not None:
752  self.sequences[code] = seq.strip('*')
753  code = line.rstrip()[1:]
754  if name_map is not None:
755  try:
756  code = name_map[code]
757  except:
758  pass
759  seq = ''
760  else:
761  line = line.rstrip()
762  if line: # Skip blank lines
763  if seq is None:
764  raise Exception( \
765  "Found FASTA sequence before first header at line %d: %s" % (num + 1, line))
766  seq += line
767  if seq is not None:
768  self.sequences[code] = seq.strip('*')
769 
770 
771 class PDBSequences(object):
772  """Data_structure for reading and storing sequence data from pdb"""
773  def __init__(self,model,pdb_fn,name_map=None):
774  """read a pdb file and returns all sequences for each contiguous fragment
775  @param pdb_fn file
776  @param name_map dictionary mapping the pdb chain id to final stored name
777  """
778  self.m=model
779  # self.sequences data-structure: (two-key dictionary)
780  # it contains all contiguous fragments:
781  # chain_id, tuples indicating first and last residue, sequence
782  # example:
783  # key1, key2, value
784  # A (77, 149) VENPSLDLEQYAASYSGLMR....
785  # A (160, 505) PALDTAWVEATRKKALLKLEKLDTDLKNYKGNSIK.....
786  # B (30, 180) VDLENQYYNSKALKEDDPKAALSSFQKVLELEGEKGEWGF...
787  # B (192, 443) TQLLEIYALEIQMYTAQKNNKKLKALYEQSLHIKSAIPHPL
788  self.sequences = IMP.pmi.tools.OrderedDict()
789  self.read_sequences(pdb_fn,name_map)
790 
791 
792  def read_sequences(self,pdb_fn,name_map):
793  t = IMP.atom.read_pdb(pdb_fn, self.m, IMP.atom.ATOMPDBSelector())
794  cs=IMP.atom.get_by_type(t,IMP.atom.CHAIN_TYPE)
795  for c in cs:
796  id=IMP.atom.Chain(c).get_id()
797  print(id)
798  if name_map:
799  try:
800  id=name_map[id]
801  except:
802  print("Chain ID %s not in name_map, skipping" % id)
803  continue
804  rs=IMP.atom.get_by_type(c,IMP.atom.RESIDUE_TYPE)
805  rids=[]
806  rids_olc_dict={}
807  for r in rs:
808  dr=IMP.atom.Residue(r)
809  rid=dr.get_index()
810 
811  isprotein=dr.get_is_protein()
812  isrna=dr.get_is_rna()
813  isdna=dr.get_is_dna()
814  if isprotein:
815  olc=IMP.atom.get_one_letter_code(dr.get_residue_type())
816  rids.append(rid)
817  rids_olc_dict[rid]=olc
818  elif isdna:
819  if dr.get_residue_type() == IMP.atom.DADE: olc="A"
820  if dr.get_residue_type() == IMP.atom.DURA: olc="U"
821  if dr.get_residue_type() == IMP.atom.DCYT: olc="C"
822  if dr.get_residue_type() == IMP.atom.DGUA: olc="G"
823  if dr.get_residue_type() == IMP.atom.DTHY: olc="T"
824  rids.append(rid)
825  rids_olc_dict[rid]=olc
826  elif isrna:
827  if dr.get_residue_type() == IMP.atom.ADE: olc="A"
828  if dr.get_residue_type() == IMP.atom.URA: olc="U"
829  if dr.get_residue_type() == IMP.atom.CYT: olc="C"
830  if dr.get_residue_type() == IMP.atom.GUA: olc="G"
831  if dr.get_residue_type() == IMP.atom.THY: olc="T"
832  rids.append(rid)
833  rids_olc_dict[rid]=olc
834  group_rids=self.group_indexes(rids)
835  contiguous_sequences=IMP.pmi.tools.OrderedDict()
836  for group in group_rids:
837  sequence_fragment=""
838  for i in range(group[0],group[1]+1):
839  sequence_fragment+=rids_olc_dict[i]
840  contiguous_sequences[group]=sequence_fragment
841  self.sequences[id]=contiguous_sequences
842 
843  #for c in self.sequences:
844  # for group in self.sequences[c]:
845  # print(c,group,self.sequences[c][group])
846 
847  def group_indexes(self,indexes):
848  from itertools import groupby
849  ranges = []
850  for k, g in groupby(enumerate(indexes), lambda x:x[0]-x[1]):
851  group = [x[1] for x in g]
852  ranges.append((group[0], group[-1]))
853  return ranges
854 
855 
856 def fasta_pdb_alignments(fasta_sequences,pdb_sequences,show=False):
857  '''This function computes and prints the alignment between the
858  fasta file and the pdb sequence, computes the offsets for each contiguous
859  fragment in the PDB.
860  @param fasta_sequences IMP.pmi.topology.Sequences object
861  @param pdb_sequences IMP.pmi.topology.PDBSequences object
862  @param show boolean default False, if True prints the alignments.
863  The input objects should be generated using map_name dictionaries such that fasta_id
864  and pdb_chain_id are mapping to the same protein name. It needs Biopython.
865  Returns a dictionary of offsets, organized by peptide range (group):
866  example: offsets={"ProtA":{(1,10):1,(20,30):10}}'''
867  from Bio import pairwise2
868  from Bio.pairwise2 import format_alignment
869  from Bio.SubsMat import MatrixInfo as matlist
870  matrix = matlist.blosum62
871  if type(fasta_sequences) is not IMP.pmi.topology.Sequences:
872  raise Exception("Fasta sequences not type IMP.pmi.topology.Sequences")
873  if type(pdb_sequences) is not IMP.pmi.topology.PDBSequences:
874  raise Exception("pdb sequences not type IMP.pmi.topology.PDBSequences")
875  offsets=IMP.pmi.tools.OrderedDict()
876  for name in fasta_sequences.sequences:
877  print(name)
878  seq_fasta=fasta_sequences.sequences[name]
879  if name not in pdb_sequences.sequences:
880  print("Fasta id %s not in pdb names, aligning against every pdb chain" % name)
881  pdbnames=pdb_sequences.sequences.keys()
882  else:
883  pdbnames=[name]
884  for pdbname in pdbnames:
885  for group in pdb_sequences.sequences[pdbname]:
886  if group[1]-group[0]+1<7:continue
887  seq_frag_pdb=pdb_sequences.sequences[pdbname][group]
888  if show:
889  print("########################")
890  print(" ")
891  print("protein name",pdbname)
892  print("fasta id", name)
893  print("pdb fragment",group)
894  align=pairwise2.align.localms(seq_fasta, seq_frag_pdb, 2, -1, -.5, -.1)[0]
895  for a in [align]:
896  offset=a[3]+1-group[0]
897  if show:
898  print("alignment sequence start-end",(a[3]+1,a[4]+1))
899  print("offset from pdb to fasta index",offset)
900  print(format_alignment(*a))
901  if name not in offsets:
902  offsets[pdbname]={}
903  if group not in offsets[pdbname]:
904  offsets[pdbname][group]=offset
905  else:
906  if group not in offsets[pdbname]:
907  offsets[pdbname][group]=offset
908  return offsets
909 
910 
911 
912 #------------------------
913 
914 
915 class TempResidue(object):
916  """Temporarily stores residue information, even without structure available."""
917  # Consider implementing __hash__ so you can select.
918  def __init__(self,molecule,code,index,internal_index,is_nucleic=None):
919  """setup a TempResidue
920  @param molecule PMI Molecule to which this residue belongs
921  @param code one-letter residue type code
922  @param index PDB index
923  @param internal_index The number in the sequence
924  """
925  #these attributes should be immutable
926  self.molecule = molecule
927  self.rtype = IMP.pmi.tools.get_residue_type_from_one_letter_code(code,is_nucleic)
928  self.pdb_index = index
929  self.internal_index = internal_index
930  self.copy_index = IMP.atom.Copy(self.molecule.hier).get_copy_index()
931  self.state_index = IMP.atom.State(self.molecule.state.hier).get_state_index()
932  #these are expected to change
933  self._structured = False
934  self.hier = IMP.atom.Residue.setup_particle(IMP.Particle(molecule.mdl),
935  self.rtype,
936  index)
937  def __str__(self):
938  return str(self.state_index)+"_"+self.molecule.get_name()+"_"+str(self.copy_index)+"_"+self.get_code()+str(self.get_index())
939  def __repr__(self):
940  return self.__str__()
941  def __key(self):
942  #this returns the immutable attributes only
943  return (self.state_index, self.molecule, self.copy_index, self.rtype, self.pdb_index, self.internal_index)
944  def __eq__(self,other):
945  return type(other)==type(self) and self.__key() == other.__key()
946  def __hash__(self):
947  return hash(self.__key())
948  def get_index(self):
949  return self.pdb_index
950  def get_internal_index(self):
951  return self.internal_index
952  def get_code(self):
953  return IMP.atom.get_one_letter_code(self.get_residue_type())
954  def get_residue_type(self):
955  return self.rtype
956  def get_hierarchy(self):
957  return self.hier
958  def get_molecule(self):
959  return self.molecule
960  def get_has_structure(self):
961  return self._structured
962  def set_structure(self,res,soft_check=False):
963  if res.get_residue_type()!=self.get_residue_type():
964  if soft_check:
965  # note from commit a2c13eaa1 we give priority to the FASTA and not the PDB
966  print('WARNING: Inconsistency between FASTA sequence and PDB sequence. FASTA type',\
967  self.get_index(),self.hier.get_residue_type(),
968  'and PDB type',res.get_residue_type())
969  self.hier.set_residue_type((self.get_residue_type()))
970  self.rtype = self.get_residue_type()
971  else:
972  raise Exception('ERROR: PDB residue index',self.get_index(),'is',
973  IMP.atom.get_one_letter_code(res.get_residue_type()),
974  'and sequence residue is',self.get_code())
975 
976  for a in res.get_children():
977  self.hier.add_child(a)
978  atype = IMP.atom.Atom(a).get_atom_type()
979  a.get_particle().set_name('Atom %s of residue %i'%(atype.__str__().strip('"'),
980  self.hier.get_index()))
981  self._structured = True
982 
983 class TopologyReader(object):
984  """Automatically setup Sytem and Degrees of Freedom with a formatted text file.
985  The file is read in and each part of the topology is stored as a
986  ComponentTopology object for input into IMP::pmi::macros::BuildSystem.
987  The topology file should be in a simple pipe-delimited format:
988  @code{.txt}
989 |molecule_name|color|fasta_fn|fasta_id|pdb_fn|chain|residue_range|pdb_offset|bead_size|em_residues_per_gaussian|rigid_body|super_rigid_body|chain_of_super_rigid_bodies|flags|
990 |Rpb1 |blue |1WCM.fasta|1WCM:A|1WCM.pdb|A|1,1140 |0|10|0|1|1,3|1||
991 |Rpb1 |blue |1WCM.fasta|1WCM:A|1WCM.pdb|A|1141,1274|0|10|0|2|1,3|1||
992 |Rpb1 |blue |1WCM.fasta|1WCM:A|1WCM.pdb|A|1275,END |0|10|0|3|1,3|1||
993 |Rpb2 |red |1WCM.fasta|1WCM:B|1WCM.pdb|B|all |0|10|0|4|2,3|2||
994 |Rpb2.1 |green |1WCM.fasta|1WCM:B|1WCM.pdb|B|all |0|10|0|4|2,3|2||
995 
996  @endcode
997 
998  These are the fields you can enter:
999  - `component_name`: Name of the component (chain). Serves as the parent
1000  hierarchy for this structure.
1001  - `color`: The color used in the output RMF file. Uses chimera names or R,G,B values
1002  - `fasta_fn`: Name of FASTA file containing this component.
1003  - `fasta_id`: String found in FASTA sequence header line. The sequence read
1004  from the file is assumed to be a protein sequence. If it should instead
1005  be treated as RNA or DNA, add an ',RNA' or ',DNA' suffix. For example,
1006  a `fasta_id` of 'myseq,RNA' will read the sequence 'myseq' from the
1007  FASTA file and treat it as RNA.
1008  - `pdb_fn`: Name of PDB file with coordinates (if available).
1009  If left empty, will set up as BEADS (you can also specify "BEADS")
1010  Can also write "IDEAL_HELIX".
1011  - `chain`: Chain ID of this domain in the PDB file.
1012  - `residue_range`: Comma delimited pair defining range.
1013  Can leave empty or use 'all' for entire sequence from PDB file.
1014  The second item in the pair can be END to select the last residue in the
1015  PDB chain.
1016  - `pdb_offset`: Offset to sync PDB residue numbering with FASTA numbering.
1017  - `bead_size`: The size (in residues) of beads used to model areas not
1018  covered by PDB coordinates. These will be automatically built.
1019  - `em_residues`: The number of Gaussians used to model the electron
1020  density of this domain. Set to zero if no EM fitting will be done.
1021  The GMM files will be written to <gmm_dir>/<component_name>_<em_res>.txt (and .mrc)
1022  - `rigid_body`: Leave empty if this object is not in a rigid body.
1023  Otherwise, this is a number corresponding to the rigid body containing
1024  this object. The number itself is just used for grouping things.
1025  - `super_rigid_body`: Like a rigid_body, except things are only occasionally rigid
1026  - `chain_of_super_rigid_bodies` For a polymer, create SRBs from groups.
1027  - `flags` additional flags for advanced options
1028  \note All filenames are relative to the paths specified in the constructor.
1029 
1030  """
1031  def __init__(self,
1032  topology_file,
1033  pdb_dir='./',
1034  fasta_dir='./',
1035  gmm_dir='./'):
1036  """Constructor.
1037  @param topology_file Pipe-delimited file specifying the topology
1038  @param pdb_dir Relative path to the pdb directory
1039  @param fasta_dir Relative path to the fasta directory
1040  @param gmm_dir Relative path to the GMM directory
1041  """
1042  self.topology_file = topology_file
1043  self.molecules = {} # key=molname, value=TempMolecule
1044  self.pdb_dir = pdb_dir
1045  self.fasta_dir = fasta_dir
1046  self.gmm_dir = gmm_dir
1047  self._components = self.read(topology_file)
1048 
1049  # Preserve old self.component_list for backwards compatibility
1050  @IMP.deprecated_method("2.7",
1051  "Use 'get_components()' instead of 'component_list'.")
1052  def __get_component_list(self): return self._components
1053  component_list = property(__get_component_list)
1054 
1055  def write_topology_file(self,outfile):
1056  with open(outfile, "w") as f:
1057  f.write("|molecule_name|color|fasta_fn|fasta_id|pdb_fn|chain|"
1058  "residue_range|pdb_offset|bead_size|em_residues_per_gaussian|"
1059  "rigid_body|super_rigid_body|chain_of_super_rigid_bodies|\n")
1060  for c in self._components:
1061  output = c.get_str()+'\n'
1062  f.write(output)
1063  return outfile
1064 
1065  def get_components(self, topology_list = "all"):
1066  """ Return list of ComponentTopologies for selected components
1067  @param topology_list List of indices to return"""
1068  if topology_list == "all":
1069  topologies = self._components
1070  else:
1071  topologies=[]
1072  for i in topology_list:
1073  topologies.append(self._components[i])
1074  return topologies
1075 
1076  def get_molecules(self):
1077  return self.molecules
1078 
1079  def read(self, topology_file, append=False):
1080  """Read system components from topology file. append=False will erase
1081  current topology and overwrite with new
1082  """
1083  is_topology = False
1084  is_directories = False
1085  linenum = 1
1086  if append==False:
1087  self._components=[]
1088 
1089  with open(topology_file) as infile:
1090  for line in infile:
1091  if line.lstrip()=="" or line[0]=="#":
1092  continue
1093  elif line.split('|')[1].strip() in ("molecule_name"):
1094  is_topology=True
1095  is_directories = False
1096  old_format = False
1097  continue
1098  elif line.split('|')[1] == "component_name":
1099  is_topology = True
1101  "Old-style topology format (using "
1102  "|component_name|) is deprecated. Please switch to "
1103  "the new-style format (using |molecule_name|)\n")
1104  old_format = True
1105  is_directories = False
1106  continue
1107  elif line.split('|')[1] == "directories":
1109  "Setting directories in the topology file "
1110  "is deprecated. Please do so through the "
1111  "TopologyReader constructor. Note that new-style "
1112  "paths are relative to the current working "
1113  "directory, not the topology file.\n")
1114  is_directories = True
1115  elif is_directories:
1116  fields = line.split('|')
1117  setattr(self, fields[1],
1118  IMP.get_relative_path(topology_file, fields[2]))
1119  if is_topology:
1120  new_component = self._parse_line(line, linenum, old_format)
1121  self._components.append(new_component)
1122  linenum += 1
1123  return self._components
1124 
1125  def _parse_line(self, component_line, linenum, old_format):
1126  """Parse a line of topology values and matches them to their key.
1127  Checks each value for correct syntax
1128  Returns a list of Component objects
1129  fields:
1130  """
1131  c = _Component()
1132  values = [s.strip() for s in component_line.split('|')]
1133  errors = []
1134 
1135  ### Required fields
1136  if old_format:
1137  c.molname = values[1]
1138  c.copyname = ''
1139  c._domain_name = values[2]
1140  c.color = 'blue'
1141  else:
1142  names = values[1].split('.')
1143  if len(names)==1:
1144  c.molname = names[0]
1145  c.copyname = ''
1146  elif len(names)==2:
1147  c.molname = names[0]
1148  c.copyname = names[1]
1149  else:
1150  c.molname = names[0]
1151  c.copyname = names[1]
1152  errors.append("Molecule name should be <molecule.copyID>")
1153  errors.append("For component %s line %d " % (c.molname,linenum))
1154  c._domain_name = c.molname + '.' + c.copyname
1155  colorfields = values[2].split(',')
1156  if len(colorfields)==3:
1157  c.color = [float(x) for x in colorfields]
1158  if any([x>1 for x in c.color]):
1159  c.color=[x/255 for x in c.color]
1160  else:
1161  c.color = values[2]
1162  c._orig_fasta_file = values[3]
1163  c.fasta_file = values[3]
1164  fasta_field = values[4].split(",")
1165  c.fasta_id = fasta_field[0]
1166  c.fasta_flag = None
1167  if len(fasta_field) > 1:
1168  c.fasta_flag = fasta_field[1]
1169  c._orig_pdb_input = values[5]
1170  pdb_input = values[5]
1171  tmp_chain = values[6]
1172  rr = values[7]
1173  offset = values[8]
1174  bead_size = values[9]
1175  emg = values[10]
1176  if old_format:
1177  rbs = srbs = csrbs = ''
1178  else:
1179  rbs = values[11]
1180  srbs = values[12]
1181  csrbs = values[13]
1182 
1183  if c.molname not in self.molecules:
1184  self.molecules[c.molname] = _TempMolecule(c)
1185  else:
1186  # COPY OR DOMAIN
1187  c._orig_fasta_file = self.molecules[c.molname].orig_component.fasta_file
1188  c.fasta_id = self.molecules[c.molname].orig_component.fasta_id
1189  self.molecules[c.molname].add_component(c,c.copyname)
1190 
1191  # now cleanup input
1192  c.fasta_file = os.path.join(self.fasta_dir,c._orig_fasta_file)
1193  if pdb_input=="":
1194  errors.append("PDB must have BEADS, IDEAL_HELIX, or filename")
1195  errors.append("For component %s line %d is not correct"
1196  "|%s| was given." % (c.molname,linenum,pdb_input))
1197  elif pdb_input in ("IDEAL_HELIX","BEADS"):
1198  c.pdb_file = pdb_input
1199  else:
1200  c.pdb_file = os.path.join(self.pdb_dir,pdb_input)
1201 
1202  # PDB chain must be one or two characters
1203  if len(tmp_chain)==1 or len(tmp_chain)==2:
1204  c.chain = tmp_chain
1205  else:
1206  errors.append("PDB Chain identifier must be one or two characters.")
1207  errors.append("For component %s line %d is not correct"
1208  "|%s| was given." % (c.molname,linenum,tmp_chain))
1209 
1210  ### Optional fields
1211  # Residue Range
1212  if rr.strip()=='all' or str(rr)=="":
1213  c.residue_range = None
1214  elif len(rr.split(','))==2 and self._is_int(rr.split(',')[0]) and (self._is_int(rr.split(',')[1]) or rr.split(',')[1] == 'END'):
1215  # Make sure that is residue range is given, there are only two values and they are integers
1216  c.residue_range = (int(rr.split(',')[0]), rr.split(',')[1])
1217  if c.residue_range[1] != 'END':
1218  c.residue_range = (c.residue_range[0], int(c.residue_range[1]))
1219  # Old format used -1 for the last residue
1220  if old_format and c.residue_range[1] == -1:
1221  c.residue_range = (c.residue_range[0], 'END')
1222  else:
1223  errors.append("Residue Range format for component %s line %d is not correct" % (c.molname, linenum))
1224  errors.append("Correct syntax is two comma separated integers: |start_res, end_res|. end_res can also be END to select the last residue in the chain. |%s| was given." % rr)
1225  errors.append("To select all residues, indicate |\"all\"|")
1226 
1227  # PDB Offset
1228  if self._is_int(offset):
1229  c.pdb_offset=int(offset)
1230  elif len(offset)==0:
1231  c.pdb_offset = 0
1232  else:
1233  errors.append("PDB Offset format for component %s line %d is not correct" % (c.molname, linenum))
1234  errors.append("The value must be a single integer. |%s| was given." % offset)
1235 
1236  # Bead Size
1237  if self._is_int(bead_size):
1238  c.bead_size=int(bead_size)
1239  elif len(bead_size)==0:
1240  c.bead_size = 0
1241  else:
1242  errors.append("Bead Size format for component %s line %d is not correct" % (c.molname, linenum))
1243  errors.append("The value must be a single integer. |%s| was given." % bead_size)
1244 
1245  # EM Residues Per Gaussian
1246  if self._is_int(emg):
1247  if int(emg) > 0:
1248  c.density_prefix = os.path.join(self.gmm_dir,c.get_unique_name())
1249  c.gmm_file = c.density_prefix+'.txt'
1250  c.mrc_file = c.density_prefix+'.gmm'
1251 
1252  c.em_residues_per_gaussian=int(emg)
1253  else:
1254  c.em_residues_per_gaussian = 0
1255  elif len(emg)==0:
1256  c.em_residues_per_gaussian = 0
1257  else:
1258  errors.append("em_residues_per_gaussian format for component "
1259  "%s line %d is not correct" % (c.molname, linenum))
1260  errors.append("The value must be a single integer. |%s| was given." % emg)
1261 
1262  # rigid bodies
1263  if len(rbs)>0:
1264  if not self._is_int(rbs):
1265  errors.append("rigid bodies format for component "
1266  "%s line %d is not correct" % (c.molname, linenum))
1267  errors.append("Each RB must be a single integer, or empty. "
1268  "|%s| was given." % rbs)
1269  c.rigid_body = int(rbs)
1270 
1271  # super rigid bodies
1272  if len(srbs)>0:
1273  srbs = srbs.split(',')
1274  for i in srbs:
1275  if not self._is_int(i):
1276  errors.append("super rigid bodies format for component "
1277  "%s line %d is not correct" % (c.molname, linenum))
1278  errors.append("Each SRB must be a single integer. |%s| was given." % srbs)
1279  c.super_rigid_bodies = srbs
1280 
1281  # chain of super rigid bodies
1282  if len(csrbs)>0:
1283  if not self._is_int(csrbs):
1284  errors.append("em_residues_per_gaussian format for component "
1285  "%s line %d is not correct" % (c.molname, linenum))
1286  errors.append("Each CSRB must be a single integer. |%s| was given." % csrbs)
1287  c.chain_of_super_rigid_bodies = csrbs
1288 
1289  # done
1290  if errors:
1291  raise ValueError("Fix Topology File syntax errors and rerun: " \
1292  + "\n".join(errors))
1293  else:
1294  return c
1295 
1296 
1297  def set_gmm_dir(self,gmm_dir):
1298  """Change the GMM dir"""
1299  self.gmm_dir = gmm_dir
1300  for c in self._components:
1301  c.gmm_file = os.path.join(self.gmm_dir,c.get_unique_name()+".txt")
1302  c.mrc_file = os.path.join(self.gmm_dir,c.get_unique_name()+".mrc")
1303  print('new gmm',c.gmm_file)
1304 
1305  def set_pdb_dir(self,pdb_dir):
1306  """Change the PDB dir"""
1307  self.pdb_dir = pdb_dir
1308  for c in self._components:
1309  if not c._orig_pdb_input in ("","None","IDEAL_HELIX","BEADS"):
1310  c.pdb_file = os.path.join(self.pdb_dir,c._orig_pdb_input)
1311 
1312  def set_fasta_dir(self,fasta_dir):
1313  """Change the FASTA dir"""
1314  self.fasta_dir = fasta_dir
1315  for c in self._components:
1316  c.fasta_file = os.path.join(self.fasta_dir,c._orig_fasta_file)
1317 
1318  def _is_int(self, s):
1319  # is this string an integer?
1320  try:
1321  float(s)
1322  return float(s).is_integer()
1323  except ValueError:
1324  return False
1325 
1326  def get_rigid_bodies(self):
1327  """Return list of lists of rigid bodies (as domain name)"""
1328  rbl = defaultdict(list)
1329  for c in self._components:
1330  if c.rigid_body:
1331  rbl[c.rigid_body].append(c.get_unique_name())
1332  return rbl.values()
1333 
1335  """Return list of lists of super rigid bodies (as domain name)"""
1336  rbl = defaultdict(list)
1337  for c in self._components:
1338  for rbnum in c.super_rigid_bodies:
1339  rbl[rbnum].append(c.get_unique_name())
1340  return rbl.values()
1341 
1343  """Return list of lists of chains of super rigid bodies (as domain name)"""
1344  rbl = defaultdict(list)
1345  for c in self._components:
1346  for rbnum in c.chain_of_super_rigid_bodies:
1347  rbl[rbnum].append(c.get_unique_name())
1348  return rbl.values()
1349 
1350 class _TempMolecule(object):
1351  """Store the Components and any requests for copies"""
1352  def __init__(self,init_c):
1353  self.molname = init_c.molname
1354  # key=copy ID, value = list of domains
1355  self.domains = IMP.pmi.tools.OrderedDefaultDict(list)
1356  self.add_component(init_c,init_c.copyname)
1357  self.orig_copyname = init_c.copyname
1358  self.orig_component = self.domains[init_c.copyname][0]
1359  def add_component(self,component,copy_id):
1360  self.domains[copy_id].append(component)
1361  component.domainnum = len(self.domains[copy_id])-1
1362  def __repr__(self):
1363  return ','.join('%s:%i'%(k,len(self.domains[k])) for k in self.domains)
1364 
1365 class _Component(object):
1366  """Stores the components required to build a standard IMP hierarchy
1367  using IMP.pmi.BuildModel()
1368  """
1369  def __init__(self):
1370  self.molname = None
1371  self.copyname = None
1372  self.domainnum = 0
1373  self.fasta_file = None
1374  self._orig_fasta_file = None
1375  self.fasta_id = None
1376  self.fasta_flag = None
1377  self.pdb_file = None
1378  self._orig_pdb_input = None
1379  self.chain = None
1380  self.residue_range = None
1381  self.pdb_offset = 0
1382  self.bead_size = 10
1383  self.em_residues_per_gaussian = 0
1384  self.gmm_file = ''
1385  self.mrc_file = ''
1386  self.density_prefix = ''
1387  self.color = 0.1
1388  self.rigid_body = None
1389  self.super_rigid_bodies = []
1390  self.chain_of_super_rigid_bodies = []
1391 
1392  def _l2s(self,l):
1393  return ",".join("%s" % x for x in l)
1394 
1395  def __repr__(self):
1396  return self.get_str()
1397 
1398  def get_unique_name(self):
1399  return "%s.%s.%i"%(self.molname,self.copyname,self.domainnum)
1400 
1401  def get_str(self):
1402  res_range = self.residue_range
1403  if self.residue_range is None:
1404  res_range = []
1405  name = self.molname
1406  if self.copyname!='':
1407  name += '.'+self.copyname
1408  if self.chain is None:
1409  chain = ' '
1410  else:
1411  chain = self.chain
1412  color=self.color
1413  if isinstance(color, list):
1414  color=','.join([str(x) for x in color])
1415  a= '|'+'|'.join([name,color,self._orig_fasta_file,self.fasta_id,
1416  self._orig_pdb_input,chain,self._l2s(list(res_range)),
1417  str(self.pdb_offset),str(self.bead_size),
1418  str(self.em_residues_per_gaussian),
1419  str(self.rigid_body) if self.rigid_body else '',
1420  self._l2s(self.super_rigid_bodies),
1421  self._l2s(self.chain_of_super_rigid_bodies)])+'|'
1422  return a
1423 
1424  # Preserve old self.name for backwards compatibility
1425  @IMP.deprecated_method("2.7", "Use 'molname' instead of 'name'.")
1426  def __get_name(self): return self.molname
1427  name = property(__get_name)
1428 
1429  # Preserve old self.domain_name for backwards compatibility
1430  @IMP.deprecated_method("2.7",
1431  "Use 'get_unique_name()' instead of 'domain_name'.")
1432  def __get_domain_name(self): return self._domain_name
1433  domain_name = property(__get_domain_name)
1434 
1435 
1437  '''Extends the functionality of IMP.atom.Molecule'''
1438 
1439  def __init__(self,hierarchy):
1440  IMP.atom.Molecule.__init__(self,hierarchy)
1441 
1442  def get_state_index(self):
1443  state = self.get_parent()
1444  return IMP.atom.State(state).get_state_index()
1445 
1446  def get_copy_index(self):
1447  return IMP.atom.Copy(self).get_copy_index()
1448 
1449  def get_extended_name(self):
1450  return self.get_name()+"."+\
1451  str(self.get_copy_index())+\
1452  "."+str(self.get_state_index())
1453 
1454  def get_sequence(self):
1455  return IMP.atom.Chain(self).get_sequence()
1456 
1457  def get_residue_indexes(self):
1459 
1460  def get_residue_segments(self):
1461  return IMP.pmi.tools.Segments(self.get_residue_indexes())
1462 
1463  def get_chain_id(self):
1464  return IMP.atom.Chain(self).get_id()
1465 
1466  def __repr__(self):
1467  s='PMIMoleculeHierarchy '
1468  s+=self.get_name()
1469  s+=" "+"Copy "+str(IMP.atom.Copy(self).get_copy_index())
1470  s+=" "+"State "+str(self.get_state_index())
1471  s+=" "+"N residues "+str(len(self.get_sequence()))
1472  return s
def __init__
setup a TempResidue
def build
call build on all molecules (automatically makes clones)
Add mass to a particle.
Definition: Mass.h:23
def select_at_all_resolutions
Perform selection using the usual keywords but return ALL resolutions (BEADS and GAUSSIANS).
Definition: tools.py:1821
Hierarchy get_parent() const
Get the parent particle.
def get_atomic_residues
Return a set of TempResidues that have associated structure coordinates.
A decorator to associate a particle with a part of a protein/DNA/RNA.
Definition: Fragment.h:20
Extends the functionality of IMP.atom.Molecule.
def get_residues
Return all modeled TempResidues as a set.
std::string get_unique_name(std::string templ)
Return a unique name produced from the string.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: atom/Atom.h:241
static Atom setup_particle(Model *m, ParticleIndex pi, Atom other)
Definition: atom/Atom.h:242
def build
call build on all states
def __init__
read a fasta file and extract all the requested sequences
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
def __init__
read a pdb file and returns all sequences for each contiguous fragment
def fasta_pdb_alignments
This function computes and prints the alignment between the fasta file and the pdb sequence...
def get_ideal_helices
Returns list of OrderedSets with requested ideal helices.
Miscellaneous utilities.
Definition: tools.py:1
def get_chains_of_super_rigid_bodies
Return list of lists of chains of super rigid bodies (as domain name)
def __init__
The user should not call this directly; instead call State::create_molecule()
void handle_use_deprecated(std::string message)
def residue_range
get residue range from a to b, inclusive.
def get_molecule
Access a molecule by name and copy number.
def __init__
Define a new state.
def add_representation
Set the representation for some residues.
static State setup_particle(Model *m, ParticleIndex pi, unsigned int index)
Definition: State.h:39
This class stores integers in ordered compact lists eg: [[1,2,3],[6,7,8]] the methods help splitting ...
Definition: tools.py:1204
The type of an atom.
def create_molecule
Create a new Molecule within this State.
def build
Create all parts of the IMP hierarchy including Atoms, Residues, and Fragments/Representations and...
static Residue setup_particle(Model *m, ParticleIndex pi, ResidueType t, int index, int insertion_code)
Definition: Residue.h:157
char get_one_letter_code(ResidueType c)
Get the 1-letter amino acid code from the residue type.
def get_non_atomic_residues
Return a set of TempResidues that don't have associated structure coordinates.
void read_pdb(TextInput input, int model, Hierarchy h)
def get_name
Return this Molecule name.
def get_hierarchy
Return the IMP Hierarchy corresponding to this Molecule.
def get_components
Return list of ComponentTopologies for selected components.
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:72
Stores a named protein chain.
A decorator for keeping track of copies of a molecule.
Definition: Copy.h:28
Select all non-alternative ATOM records.
Definition: pdb.h:64
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def set_fasta_dir
Change the FASTA dir.
The standard decorator for manipulating molecular structures.
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
Data_structure for reading and storing sequence data from pdb.
def deprecated_method
Python decorator to mark a method as deprecated.
Definition: __init__.py:9819
A decorator for a particle representing an atom.
Definition: atom/Atom.h:234
std::string get_relative_path(std::string base, std::string relative)
Return a path to a file relative to another file.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
def create_clone
Create a Molecule clone (automatically builds same structure and representation)
def add_structure
Read a structure and store the coordinates.
int get_state_index(Hierarchy h)
Walk up the hierarchy to find the current state.
def get_molecules
Return a dictionary where key is molecule name and value are the list of all copies of that molecule ...
static Copy setup_particle(Model *m, ParticleIndex pi, Int number)
Definition: Copy.h:42
def read
Read system components from topology file.
def get_state
Return the State containing this Molecule.
A decorator for a residue.
Definition: Residue.h:134
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Automatically setup Sytem and Degrees of Freedom with a formatted text file.
The general base class for IMP exceptions.
Definition: exception.h:49
def get_rigid_bodies
Return list of lists of rigid bodies (as domain name)
Associate an integer "state" index with a hierarchy node.
Definition: State.h:27
VectorD< 3 > Vector3D
Definition: VectorD.h:395
Residue get_residue(Atom d, bool nothrow=false)
Return the Residue containing this atom.
Class to handle individual particles of a Model object.
Definition: Particle.h:41
Stores a list of Molecules all with the same State index.
def get_represented
Return set of TempResidues that have representation.
Store info for a chain of a protein.
Definition: Chain.h:61
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
Python classes to represent, score, sample and analyze models.
A dictionary-like wrapper for reading and storing sequence data.
def create_copy
Create a new Molecule with the same name and sequence but a higher copy number.
Functionality for loading, creating, manipulating and scoring atomic structures.
std::string get_chain_id(Hierarchy h)
Walk up the hierarchy to determine the chain id.
def get_particles_at_all_resolutions
Helpful utility for getting particles at all resolutions from this molecule.
static Chain setup_particle(Model *m, ParticleIndex pi, std::string id)
Definition: Chain.h:81
A decorator for a molecule.
Definition: Molecule.h:24
Select hierarchy particles identified by the biological name.
Definition: Selection.h:66
def get_number_of_states
returns the total number of states generated
def get_super_rigid_bodies
Return list of lists of super rigid bodies (as domain name)
def get_residue_indexes
Retrieve the residue indexes for the given particle.
Definition: tools.py:1061
Temporarily stores residue information, even without structure available.
def create_state
returns a new IMP.pmi.representation_new.State(), increment the state index
Store objects in order they were added, but with default type.
Definition: tools.py:1569