IMP logo
IMP Reference Guide  2.8.0
The Integrative Modeling Platform
pmi/topology/__init__.py
1 """@namespace IMP.pmi.topology
2 Set of python classes to create a multi-state, multi-resolution IMP hierarchy.
3 * Start by creating a System with `mdl = IMP.Model(); s = IMP.pmi.topology.System(mdl)`. The System will store all the states.
4 * Then call System.create_state(). You can easily create a multistate system by calling this function multiples times.
5 * For each State, call State.create_molecule() to add a Molecule (a uniquely named polymer). This function returns the Molecule object which can be passed to various PMI functions.
6 * Some useful functions to help you set up your Molecules:
7  * Access the sequence residues with slicing (Molecule[a:b]) or functions like Molecule.get_atomic_residues() and Molecule.get_non_atomic_residues(). These functions all return python sets for easy set arithmetic using & (and), | (or), - (difference)
8  * Molecule.add_structure() to add structural information from a PDB file.
9  * Molecule.add_representation() to create a representation unit - here you can choose bead resolutions as well as alternate representations like densities or ideal helices.
10  * Molecule.create_clone() lets you set up a molecule with identical representations, just a different chain ID. Use Molecule.create_copy() if you want a molecule with the same sequence but that allows custom representations.
11 * Once data has been added and representations chosen, call System.build() to create a canonical IMP hierarchy.
12 * Following hierarchy construction, setup rigid bodies, flexible beads, etc in IMP::pmi::dof.
13 * Check your representation with a nice printout: IMP::atom::show_with_representation()
14 See a [comprehensive example](https://integrativemodeling.org/nightly/doc/ref/pmi_2multiscale_8py-example.html) for using these classes.
15 
16 Alternatively one can construct the entire topology and degrees of freedom via formatted text file with TopologyReader and IMP::pmi::macros::BuildSystem(). This is used in the [PMI tutorial](@ref rnapolii_stalk).
17 Note that this only allows a limited set of the full options available to PMI users (rigid bodies only, fixed resolutions).
18 """
19 
20 from __future__ import print_function
21 import IMP
22 import IMP.atom
23 import IMP.algebra
24 import IMP.pmi
25 import IMP.pmi.tools
26 import csv
27 import os
28 from collections import defaultdict
29 from . import system_tools
30 from bisect import bisect_left
31 from math import pi,cos,sin
32 from operator import itemgetter
33 
34 def _build_ideal_helix(mdl, residues, coord_finder):
35  """Creates an ideal helix from the specified residue range
36  Residues MUST be contiguous.
37  This function actually adds them to the TempResidue hierarchy
38  """
39  created_hiers = []
40 
41  # this function creates a CAlpha helix structure (which can be used for coarsening)
42  for n, tempres in enumerate(residues):
43  if tempres.get_has_structure():
44  raise Exception("You tried to build ideal_helix for a residue "
45  "that already has structure:",tempres)
46  if n>0 and (not tempres.get_index()==prev_idx+1):
47  raise Exception("Passed non-contiguous segment to build_ideal_helix for",tempres.get_molecule())
48 
49  # New residue particle will replace the TempResidue's existing (empty) hierarchy
50  rp = IMP.Particle(mdl)
51  rp.set_name("Residue_%i" % tempres.get_index())
52 
53  # Copy the original residue type and index
54  this_res = IMP.atom.Residue.setup_particle(rp,tempres.get_hierarchy())
55 
56  # Create the CAlpha
57  ap = IMP.Particle(mdl)
59  x = 2.3 * cos(n * 2 * pi / 3.6)
60  y = 2.3 * sin(n * 2 * pi / 3.6)
61  z = 6.2 / 3.6 / 2 * n * 2 * pi / 3.6
62  d.set_coordinates(IMP.algebra.Vector3D(x, y, z))
63  d.set_radius(1.0)
64  a = IMP.atom.Atom.setup_particle(ap, IMP.atom.AT_CA) # Decorating as Atom also decorates as Mass
65  IMP.atom.Mass(ap).set_mass(110.0)
66  this_res.add_child(a)
67 
68  # Add this structure to the TempResidue
69  tempres.set_structure(this_res)
70  created_hiers.append(this_res)
71  prev_idx = tempres.get_index()
72  coord_finder.add_residues(created_hiers) #the coord finder is for placing beads (later)
73 
74 class _SystemBase(object):
75  """The base class for System, State and Molecule
76  classes. It contains shared functions in common to these classes
77  """
78 
79  def __init__(self,mdl=None):
80  if mdl is None:
81  self.mdl=IMP.Model()
82  else:
83  self.mdl=mdl
84 
85  def _create_hierarchy(self):
86  """create a new hierarchy"""
87  tmp_part=IMP.Particle(self.mdl)
88  return IMP.atom.Hierarchy.setup_particle(tmp_part)
89 
90  def _create_child(self,parent_hierarchy):
91  """create a new hierarchy, set it as child of the input
92  one, and return it"""
93  child_hierarchy=self._create_hierarchy()
94  parent_hierarchy.add_child(child_hierarchy)
95  return child_hierarchy
96 
97  def build(self):
98  """Build the coordinates of the system.
99  Loop through stored(?) hierarchies and set up coordinates!"""
100  pass
101 
102 #------------------------
103 
104 class System(_SystemBase):
105  """This class initializes the root node of the global IMP.atom.Hierarchy."""
106  def __init__(self,mdl=None,name="System"):
107  _SystemBase.__init__(self,mdl)
108  self._number_of_states = 0
109  self.states = []
110  self.built=False
111 
112  # the root hierarchy node
113  self.hier=self._create_hierarchy()
114  self.hier.set_name(name)
115 
116  def get_states(self):
117  return self.states
118 
119  def create_state(self):
120  """returns a new IMP.pmi.representation_new.State(), increment the state index"""
121  self._number_of_states+=1
122  state = State(self,self._number_of_states-1)
123  self.states.append(state)
124  return state
125 
126  def __repr__(self):
127  return self.hier.get_name()
128 
130  """returns the total number of states generated"""
131  return self._number_of_states
132 
133  def get_hierarchy(self):
134  return self.hier
135 
136  def build(self,**kwargs):
137  """call build on all states"""
138  if not self.built:
139  for state in self.states:
140  state.build(**kwargs)
141  self.built=True
142  return self.hier
143 
144 #------------------------
145 
146 class State(_SystemBase):
147  """Stores a list of Molecules all with the same State index.
148  Also stores number of copies of each Molecule for easy Selection.
149  """
150  def __init__(self,system,state_index):
151  """Define a new state
152  @param system the PMI System
153  @param state_index the index of the new state
154  \note It's expected that you will not use this constructor directly,
155  but rather create it with pmi::System::create_molecule()
156  """
157  self.mdl = system.get_hierarchy().get_model()
158  self.system = system
159  self.hier = self._create_child(system.get_hierarchy())
160  self.hier.set_name("State_"+str(state_index))
161  self.molecules = defaultdict(list) # key is molecule name. value are the molecule copies!
162  IMP.atom.State.setup_particle(self.hier,state_index)
163  self.built = False
164 
165  def __repr__(self):
166  return self.system.__repr__()+'.'+self.hier.get_name()
167 
168  def get_molecules(self):
169  """Return a dictionary where key is molecule name and value
170  are the list of all copies of that molecule in setup order"""
171  return self.molecules
172 
173  def get_molecule(self,name,copy_num=0):
174  """Access a molecule by name and copy number
175  @param name The molecule name used during setup
176  @param copy_num The copy number based on input order.
177  Default: 0. Set to 'all' to get all copies
178  """
179  if name not in self.molecules:
180  raise Exception("get_molecule() could not find molname",name)
181  if copy_num=='all':
182  return self.molecules[name]
183  else:
184  if copy_num>len(self.molecules[name])-1:
185  raise Exception("get_molecule() copy number is too high:",copy_num)
186  return self.molecules[name][copy_num]
187 
188  def create_molecule(self,name,sequence='',chain_id='',is_nucleic=None):
189  """Create a new Molecule within this State
190  @param name the name of the molecule (string) it must not
191  be already used
192  @param sequence sequence (string)
193  @param chain_id Chain id to assign to this molecule
194  """
195  # check whether the molecule name is already assigned
196  if name in self.molecules:
197  raise Exception('Cannot use a molecule name already used')
198 
199  mol = Molecule(self,name,sequence,chain_id,copy_num=0,is_nucleic=is_nucleic)
200  self.molecules[name].append(mol)
201  return mol
202 
203  def get_hierarchy(self):
204  return self.hier
205 
206  def get_number_of_copies(self,molname):
207  return len(self.molecules[molname])
208 
209  def _register_copy(self,molecule):
210  molname = molecule.get_hierarchy().get_name()
211  if molname not in self.molecules:
212  raise Exception("Trying to add a copy when the original doesn't exist!")
213  self.molecules[molname].append(molecule)
214 
215  def build(self,**kwargs):
216  """call build on all molecules (automatically makes clones)"""
217  if not self.built:
218  for molname in self.molecules:
219  for mol in reversed(self.molecules[molname]):
220  mol.build(**kwargs)
221  self.built=True
222  return self.hier
223 
224 #------------------------
225 
226 class Molecule(_SystemBase):
227  """Stores a named protein chain.
228  This class is constructed from within the State class.
229  It wraps an IMP.atom.Molecule and IMP.atom.Copy
230  Structure is read using this class
231  Resolutions and copies can be registered, but are only created when build() is called
232  """
233 
234  def __init__(self,state,name,sequence,chain_id,copy_num,mol_to_clone=None,is_nucleic=None):
235  """The user should not call this directly; instead call State::create_molecule()
236  @param state The parent PMI State
237  @param name The name of the molecule (string)
238  @param sequence Sequence (string)
239  @param chain_id The chain of this molecule
240  @param copy_num Store the copy number
241  @param mol_to_clone The original molecule (for cloning ONLY)
242  \note It's expected that you will not use this constructor directly,
243  but rather create a Molecule with pmi::State::create_molecule()
244  """
245  # internal data storage
246  self.mdl = state.get_hierarchy().get_model()
247  self.state = state
248  self.sequence = sequence
249  self.built = False
250  self.mol_to_clone = mol_to_clone
251  self.is_nucleic=is_nucleic
252  self.representations = [] # list of stuff to build
253  self._represented = IMP.pmi.tools.OrderedSet() # residues with representation
254  self.coord_finder = _FindCloseStructure() # helps you place beads by storing structure
255  self._ideal_helices = [] # list of OrderedSets of tempresidues set to ideal helix
256 
257  # create root node and set it as child to passed parent hierarchy
258  self.hier = self._create_child(self.state.get_hierarchy())
259  self.hier.set_name(name)
260  IMP.atom.Copy.setup_particle(self.hier,copy_num)
261  IMP.atom.Chain.setup_particle(self.hier,chain_id)
262  # create TempResidues from the sequence (if passed)
263  self.residues=[]
264  for ns,s in enumerate(sequence):
265  r = TempResidue(self,s,ns+1,ns,is_nucleic)
266  self.residues.append(r)
267 
268  def __repr__(self):
269  return self.state.__repr__()+'.'+self.get_name()+'.'+ \
270  str(IMP.atom.Copy(self.hier).get_copy_index())
271 
272  def __getitem__(self,val):
273  if isinstance(val,int):
274  return self.residues[val]
275  elif isinstance(val,str):
276  return self.residues[int(val)-1]
277  elif isinstance(val,slice):
278  return IMP.pmi.tools.OrderedSet(self.residues[val])
279  else:
280  print("ERROR: range ends must be int or str. Stride must be int.")
281 
282  def get_hierarchy(self):
283  """Return the IMP Hierarchy corresponding to this Molecule"""
284  return self.hier
285 
286  def get_name(self):
287  """Return this Molecule name"""
288  return self.hier.get_name()
289 
290  def get_state(self):
291  """Return the State containing this Molecule"""
292  return self.state
293 
294  def get_ideal_helices(self):
295  """Returns list of OrderedSets with requested ideal helices"""
296  return self._ideal_helices
297 
298  def residue_range(self,a,b,stride=1):
299  """get residue range from a to b, inclusive.
300  Use integers to get 0-indexing, or strings to get PDB-indexing"""
301  if isinstance(a,int) and isinstance(b,int) and isinstance(stride,int):
302  return IMP.pmi.tools.OrderedSet(self.residues[a:b+1:stride])
303  elif isinstance(a,str) and isinstance(b,str) and isinstance(stride,int):
304  return IMP.pmi.tools.OrderedSet(self.residues[int(a)-1:int(b):stride])
305  else:
306  print("ERROR: range ends must be int or str. Stride must be int.")
307 
308  def get_residues(self):
309  """ Return all modeled TempResidues as a set"""
310  all_res = IMP.pmi.tools.OrderedSet(self.residues)
311  return all_res
312 
313  def get_represented(self):
314  """ Return set of TempResidues that have representation"""
315  return self._represented
316 
318  """ Return a set of TempResidues that have associated structure coordinates """
319  atomic_res = IMP.pmi.tools.OrderedSet()
320  for res in self.residues:
321  if res.get_has_structure():
322  atomic_res.add(res)
323  return atomic_res
324 
326  """ Return a set of TempResidues that don't have associated structure coordinates """
327  non_atomic_res = IMP.pmi.tools.OrderedSet()
328  for res in self.residues:
329  if not res.get_has_structure():
330  non_atomic_res.add(res)
331  return non_atomic_res
332 
333  def create_copy(self,chain_id):
334  """Create a new Molecule with the same name and sequence but a higher copy number.
335  Returns the Molecule. No structure or representation will be copied!
336  @param chain_id Chain ID of the new molecule
337  """
338  mol = Molecule(self.state,self.get_name(),self.sequence,chain_id,
339  copy_num=self.state.get_number_of_copies(self.get_name()))
340  self.state._register_copy(mol)
341  return mol
342 
343  def create_clone(self,chain_id):
344  """Create a Molecule clone (automatically builds same structure and representation)
345  @param chain_id If you want to set the chain ID of the copy to something
346  \note You cannot add structure or representations to a clone!
347  """
348  mol = Molecule(self.state,self.get_name(),self.sequence,chain_id,
349  copy_num=self.state.get_number_of_copies(self.get_name()),
350  mol_to_clone=self)
351  self.state._register_copy(mol)
352  return mol
353 
354  def add_structure(self,pdb_fn,chain_id,res_range=[],
355  offset=0,model_num=None,ca_only=False,
356  soft_check=False):
357  """Read a structure and store the coordinates.
358  Returns the atomic residues (as a set)
359  @param pdb_fn The file to read
360  @param chain_id Chain ID to read
361  @param res_range Add only a specific set of residues from the PDB file.
362  res_range[0] is the starting and res_range[1] is the ending residue index.
363  @param offset Apply an offset to the residue indexes of the PDB file.
364  This number is added to the PDB sequence.
365  @param model_num Read multi-model PDB and return that model
366  @param ca_only Only read the CA positions from the PDB file
367  @param soft_check If True, it only warns if there are sequence mismatches between the pdb and
368  the Molecules sequence. Actually replaces the fasta values.
369  If False (Default), it raises and exit when there are sequence mismatches.
370  \note If you are adding structure without a FASTA file, set soft_check to True
371  """
372  if self.mol_to_clone is not None:
373  raise Exception('You cannot call add_structure() for a clone')
374 
375  self.pdb_fn = pdb_fn
376 
377  # get IMP.atom.Residues from the pdb file
378  rhs = system_tools.get_structure(self.mdl,pdb_fn,chain_id,res_range,offset,ca_only=ca_only)
379  self.coord_finder.add_residues(rhs)
380 
381  if len(self.residues)==0:
382  print("WARNING: Extracting sequence from structure. Potentially dangerous.")
383 
384  # load those into TempResidue object
385  atomic_res = IMP.pmi.tools.OrderedSet() # collect integer indexes of atomic residues to return
386  for nrh,rh in enumerate(rhs):
387  pdb_idx = rh.get_index()
388  raw_idx = pdb_idx - 1
389 
390  # add ALA to fill in gaps
391  while len(self.residues)<pdb_idx:
392  r = TempResidue(self,'A',len(self.residues)+1,len(self.residues))
393  self.residues.append(r)
394  self.sequence += 'A'
395 
396  internal_res = self.residues[raw_idx]
397  if len(self.sequence)<raw_idx:
398  self.sequence += IMP.atom.get_one_letter_code(rh.get_residue_type())
399  internal_res.set_structure(rh,soft_check)
400  atomic_res.add(internal_res)
401  return atomic_res
402 
403  def add_representation(self,
404  residues=None,
405  resolutions=[],
406  bead_extra_breaks=[],
407  bead_ca_centers=True,
408  bead_default_coord=[0,0,0],
409  density_residues_per_component=None,
410  density_prefix=None,
411  density_force_compute=False,
412  density_voxel_size=1.0,
413  setup_particles_as_densities=False,
414  ideal_helix=False,
415  color=None):
416  """Set the representation for some residues. Some options (beads, ideal helix)
417  operate along the backbone. Others (density options) are volumetric.
418  Some of these you can combine e.g., beads+densities or helix+densities
419  See @ref pmi_resolution
420  @param residues Set of PMI TempResidues for adding the representation.
421  Can use Molecule slicing to get these, e.g. mol[a:b]+mol[c:d]
422  If None, will select all residues for this Molecule.
423  @param resolutions Resolutions for beads representations.
424  If structured, will average along backbone, breaking at sequence breaks.
425  If unstructured, will just create beads.
426  Pass an integer or list of integers
427  @param bead_extra_breaks Additional breakpoints for splitting beads.
428  The value can be the 0-ordered position, after which it'll insert the break.
429  Alternatively pass PDB-style (1-ordered) indices as a string.
430  I.e., bead_extra_breaks=[5,25] is the same as ['6','26']
431  @param bead_ca_centers Set to True if you want the resolution=1 beads to be at CA centers
432  (otherwise will average atoms to get center). Defaults to True.
433  @param bead_default_coord Advanced feature. Normally beads are placed at the nearest structure.
434  If no structure provided (like an all bead molecule), the beads go here.
435  @param density_residues_per_component Create density (Gaussian Mixture Model)
436  for these residues. Must also supply density_prefix
437  @param density_prefix Prefix (assuming '.txt') to read components from or write to.
438  If exists, will read unless you set density_force_compute=True.
439  Will also write map (prefix+'.mrc').
440  Must also supply density_residues_per_component.
441  @param density_force_compute Set true to force overwrite density file.
442  @param density_voxel_size Advanced feature. Set larger if densities taking too long to rasterize.
443  Set to 0 if you don't want to create the MRC file
444  @param setup_particles_as_densities Set to True if you want each particle to be its own density.
445  Useful for all-atom models or flexible beads.
446  Mutually exclusive with density_ options
447  @param ideal_helix Create idealized helix structures for these residues at resolution 1.
448  Any other resolutions passed will be coarsened from there.
449  Resolution 0 will not work, you may have to use MODELLER to do that (for now).
450  @param color the color applied to the hierarchies generated.
451  Format options: tuple (r,g,b) with values 0 to 1
452  or float (from 0 to 1, a map from Blue to Green to Red)
453  or IMP.display.Color object
454 
455  \note You cannot call add_representation multiple times for the same residues.
456  """
457 
458  # can't customize clones
459  if self.mol_to_clone is not None:
460  raise Exception('You cannot call add_representation() for a clone.'
461  'Maybe use a copy instead')
462 
463  # format input
464  if residues is None:
465  res = IMP.pmi.tools.OrderedSet(self.residues)
466  elif residues==self:
467  res = IMP.pmi.tools.OrderedSet(self.residues)
468  elif type(residues) is IMP.pmi.topology.TempResidue:
469  res = IMP.pmi.tools.OrderedSet([residues])
470  elif hasattr(residues,'__iter__'):
471  if len(residues)==0:
472  raise Exception('You passed an empty set to add_representation')
473  if type(residues) is IMP.pmi.tools.OrderedSet and type(next(iter(residues))) is TempResidue:
474  res = residues
475  elif type(residues) is set and type(next(iter(residues))) is TempResidue:
476  res = IMP.pmi.tools.OrderedSet(residues)
477  elif type(residues) is list and type(residues[0]) is TempResidue:
478  res = IMP.pmi.tools.OrderedSet(residues)
479  else:
480  raise Exception("You passed an iteratible of something other than TempResidue",res)
481  else:
482  raise Exception("add_representation: you must pass a set of residues or nothing(=all residues)")
483 
484  # check that each residue has not been represented yet
485  ov = res & self._represented
486  if ov:
487  raise Exception('You have already added representation for '+self.get_hierarchy().get_name()+': '+ov.__repr__())
488  self._represented|=res
489 
490  # check you aren't creating multiple resolutions without structure
491  if not hasattr(resolutions,'__iter__'):
492  if type(resolutions) is int:
493  resolutions = [resolutions]
494  else:
495  raise Exception("you tried to pass resolutions that are not int or list-of-int")
496  if len(resolutions)>1 and not ideal_helix:
497  for r in res:
498  if not r.get_has_structure():
499  raise Exception('You are creating multiple resolutions for '
500  'unstructured regions. This will have unexpected results.')
501 
502  # check density info is consistent
503  if density_residues_per_component or density_prefix:
504  if not density_residues_per_component and density_prefix:
505  raise Exception('If requesting density, must provide '
506  'density_residues_per_component AND density_prefix')
507  if density_residues_per_component and setup_particles_as_densities:
508  raise Exception('Cannot create both volumetric density '
509  '(density_residues_per_component) AND '
510  'individual densities (setup_particles_as_densities) '
511  'in the same representation')
512  if len(resolutions)>1 and setup_particles_as_densities:
513  raise Exception('You have multiple bead resolutions but are attempting to '
514  'set them all up as individual Densities. '
515  'This could have unexpected results.')
516 
517  # check helix not accompanied by other resolutions (densities OK though!)
518  if ideal_helix:
519  if 0 in resolutions:
520  raise Exception("For ideal helices, cannot build resolution 0: "
521  "you have to do that in MODELLER")
522  if 1 not in resolutions:
523  resolutions = [1] + list(resolutions)
524  self._ideal_helices.append(res)
525 
526  # check residues are all part of this molecule:
527  for r in res:
528  if r.get_molecule()!=self:
529  raise Exception('You are adding residues from a different molecule to',self.__repr__())
530 
531  # unify formatting for extra breaks
532  breaks = []
533  for b in bead_extra_breaks:
534  if type(b)==str:
535  breaks.append(int(b)-1)
536  else:
537  breaks.append(b)
538  # store the representation group
539  self.representations.append(_Representation(res,
540  resolutions,
541  breaks,
542  bead_ca_centers,
543  bead_default_coord,
544  density_residues_per_component,
545  density_prefix,
546  density_force_compute,
547  density_voxel_size,
548  setup_particles_as_densities,
549  ideal_helix,
550  color))
551 
552  def build(self):
553  """Create all parts of the IMP hierarchy
554  including Atoms, Residues, and Fragments/Representations and, finally, Copies
555  Will only build requested representations.
556  /note Any residues assigned a resolution must have an IMP.atom.Residue hierarchy
557  containing at least a CAlpha. For missing residues, these can be constructed
558  from the PDB file
559  """
560  if not self.built:
561  # if requested, clone structure and representations BEFORE building original
562  if self.mol_to_clone is not None:
563  for nr,r in enumerate(self.mol_to_clone.residues):
564  if r.get_has_structure():
565  clone = IMP.atom.create_clone(r.get_hierarchy())
566  self.residues[nr].set_structure(
567  IMP.atom.Residue(clone),soft_check=True)
568  for old_rep in self.mol_to_clone.representations:
569  new_res = IMP.pmi.tools.OrderedSet()
570  for r in old_rep.residues:
571  new_res.add(self.residues[r.get_internal_index()])
572  self._represented.add(self.residues[r.get_internal_index()])
573  new_rep = _Representation(new_res,
574  old_rep.bead_resolutions,
575  old_rep.bead_extra_breaks,
576  old_rep.bead_ca_centers,
577  old_rep.bead_default_coord,
578  old_rep.density_residues_per_component,
579  old_rep.density_prefix,
580  False,
581  old_rep.density_voxel_size,
582  old_rep.setup_particles_as_densities,
583  old_rep.ideal_helix,
584  old_rep.color)
585  self.representations.append(new_rep)
586  self.coord_finder = self.mol_to_clone.coord_finder
587 
588  # give a warning for all residues that don't have representation
589  no_rep = [r for r in self.residues if r not in self._represented]
590  if len(no_rep)>0:
591  print('WARNING: Residues without representation in molecule',
592  self.get_name(),':',system_tools.resnums2str(no_rep))
593 
594  # first build any ideal helices (fills in structure for the TempResidues)
595  for rep in self.representations:
596  if rep.ideal_helix:
597  _build_ideal_helix(self.mdl,rep.residues,self.coord_finder)
598 
599  # build all the representations
600  built_reps = []
601  for rep in self.representations:
602  built_reps += system_tools.build_representation(self.hier,rep,self.coord_finder)
603 
604  # sort them before adding as children
605  built_reps.sort(key=lambda r: IMP.atom.Fragment(r).get_residue_indexes()[0])
606  for br in built_reps:
607  self.hier.add_child(br)
608  br.update_parents()
609  self.built = True
610 
611  for res in self.residues:
612  idx = res.get_index()
613 
614  # first off, store the highest resolution available in residue.hier
615  new_ps = IMP.atom.Selection(
616  self.hier,
617  residue_index=res.get_index(),
618  resolution=1).get_selected_particles()
619  if len(new_ps)>0:
620  new_p = new_ps[0]
621  if IMP.atom.Atom.get_is_setup(new_p):
622  # if only found atomic, store the residue
623  new_hier = IMP.atom.get_residue(IMP.atom.Atom(new_p))
624  else:
625  # otherwise just store what you found
626  new_hier = IMP.atom.Hierarchy(new_p)
627  res.hier = new_hier
628  else:
629  res.hier = None
630  self._represented = IMP.pmi.tools.OrderedSet([a for a in self._represented])
631  print('done building',self.get_hierarchy())
632  return self.hier
633 
634  def get_particles_at_all_resolutions(self,residue_indexes=None):
635  """Helpful utility for getting particles at all resolutions from this molecule.
636  Can optionally pass a set of residue indexes"""
637  if not self.built:
638  raise Exception("Cannot get all resolutions until you build the Molecule")
639  if residue_indexes is None:
640  residue_indexes = [r.get_index() for r in self.get_residues()]
642  residue_indexes=residue_indexes)
643  return ps
644 
645 #------------------------
646 
647 class _Representation(object):
648  """Private class just to store a representation request"""
649  def __init__(self,
650  residues,
651  bead_resolutions,
652  bead_extra_breaks,
653  bead_ca_centers,
654  bead_default_coord,
655  density_residues_per_component,
656  density_prefix,
657  density_force_compute,
658  density_voxel_size,
659  setup_particles_as_densities,
660  ideal_helix,
661  color):
662  self.residues = residues
663  self.bead_resolutions = bead_resolutions
664  self.bead_extra_breaks = bead_extra_breaks
665  self.bead_ca_centers = bead_ca_centers
666  self.bead_default_coord = bead_default_coord
667  self.density_residues_per_component = density_residues_per_component
668  self.density_prefix = density_prefix
669  self.density_force_compute = density_force_compute
670  self.density_voxel_size = density_voxel_size
671  self.setup_particles_as_densities = setup_particles_as_densities
672  self.ideal_helix = ideal_helix
673  self.color = color
674 
675 class _FindCloseStructure(object):
676  """Utility to get the nearest observed coordinate"""
677  def __init__(self):
678  self.coords=[]
679  def add_residues(self,residues):
680  for r in residues:
681  idx = IMP.atom.Residue(r).get_index()
682  ca = IMP.atom.Selection(r,atom_type=IMP.atom.AtomType("CA")).get_selected_particles()
683  p = IMP.atom.Selection(r,atom_type=IMP.atom.AtomType("P")).get_selected_particles()
684  if len(ca)==1:
685  xyz = IMP.core.XYZ(ca[0]).get_coordinates()
686  self.coords.append([idx,xyz])
687  elif len(p)==1:
688  xyz = IMP.core.XYZ(p[0]).get_coordinates()
689  self.coords.append([idx,xyz])
690  else:
691  raise("_FindCloseStructure: wrong selection")
692 
693  self.coords.sort(key=itemgetter(0))
694  def find_nearest_coord(self,query):
695  if self.coords==[]:
696  return None
697  keys = [r[0] for r in self.coords]
698  pos = bisect_left(keys,query)
699  if pos == 0:
700  ret = self.coords[0]
701  elif pos == len(self.coords):
702  ret = self.coords[-1]
703  else:
704  before = self.coords[pos - 1]
705  after = self.coords[pos]
706  if after[0] - query < query - before[0]:
707  ret = after
708  else:
709  ret = before
710  return ret[1]
711 
712 class Sequences(object):
713  """A dictionary-like wrapper for reading and storing sequence data"""
714  def __init__(self,fasta_fn,name_map=None):
715  """read a fasta file and extract all the requested sequences
716  @param fasta_fn sequence file
717  @param name_map dictionary mapping the fasta name to final stored name
718  """
719  self.sequences = IMP.pmi.tools.OrderedDict()
720  self.read_sequences(fasta_fn,name_map)
721  def __len__(self):
722  return len(self.sequences)
723  def __contains__(self,x):
724  return x in self.sequences
725  def __getitem__(self,key):
726  if type(key) is int:
727  try:
728  allseqs = list(self.sequences.keys())
729  return self.sequences[allseqs[key]]
730  except:
731  raise Exception("You tried to access sequence num",key,"but there's only",len(self.sequences.keys()))
732  else:
733  return self.sequences[key]
734  def __iter__(self):
735  return self.sequences.__iter__()
736  def __repr__(self):
737  ret=''
738  for s in self.sequences:
739  ret += '%s\t%s\n'%(s,self.sequences[s])
740  return ret
741  def read_sequences(self,fasta_fn,name_map=None):
742  code = None
743  seq = None
744  with open(fasta_fn,'r') as fh:
745  for (num, line) in enumerate(fh):
746  if line.startswith('>'):
747  if seq is not None:
748  self.sequences[code] = seq.strip('*')
749  code = line.rstrip()[1:]
750  if name_map is not None:
751  try:
752  code = name_map[code]
753  except:
754  pass
755  seq = ''
756  else:
757  line = line.rstrip()
758  if line: # Skip blank lines
759  if seq is None:
760  raise Exception( \
761  "Found FASTA sequence before first header at line %d: %s" % (num + 1, line))
762  seq += line
763  if seq is not None:
764  self.sequences[code] = seq.strip('*')
765 
766 
767 class PDBSequences(object):
768  """Data_structure for reading and storing sequence data from pdb"""
769  def __init__(self,model,pdb_fn,name_map=None):
770  """read a pdb file and returns all sequences for each contiguous fragment
771  @param pdb_fn file
772  @param name_map dictionary mapping the pdb chain id to final stored name
773  """
774  self.m=model
775  # self.sequences data-structure: (two-key dictionary)
776  # it contains all contiguous fragments:
777  # chain_id, tuples indicating first and last residue, sequence
778  # example:
779  # key1, key2, value
780  # A (77, 149) VENPSLDLEQYAASYSGLMR....
781  # A (160, 505) PALDTAWVEATRKKALLKLEKLDTDLKNYKGNSIK.....
782  # B (30, 180) VDLENQYYNSKALKEDDPKAALSSFQKVLELEGEKGEWGF...
783  # B (192, 443) TQLLEIYALEIQMYTAQKNNKKLKALYEQSLHIKSAIPHPL
784  self.sequences = IMP.pmi.tools.OrderedDict()
785  self.read_sequences(pdb_fn,name_map)
786 
787 
788  def read_sequences(self,pdb_fn,name_map):
789  t = IMP.atom.read_pdb(pdb_fn, self.m, IMP.atom.ATOMPDBSelector())
790  cs=IMP.atom.get_by_type(t,IMP.atom.CHAIN_TYPE)
791  for c in cs:
792  id=IMP.atom.Chain(c).get_id()
793  print(id)
794  if name_map:
795  try:
796  id=name_map[id]
797  except:
798  print("Chain ID %s not in name_map, skipping" % id)
799  continue
800  rs=IMP.atom.get_by_type(c,IMP.atom.RESIDUE_TYPE)
801  rids=[]
802  rids_olc_dict={}
803  for r in rs:
804  dr=IMP.atom.Residue(r)
805  rid=dr.get_index()
806 
807  isprotein=dr.get_is_protein()
808  isrna=dr.get_is_rna()
809  isdna=dr.get_is_dna()
810  if isprotein:
811  olc=IMP.atom.get_one_letter_code(dr.get_residue_type())
812  rids.append(rid)
813  rids_olc_dict[rid]=olc
814  elif isdna:
815  if dr.get_residue_type() == IMP.atom.DADE: olc="A"
816  if dr.get_residue_type() == IMP.atom.DURA: olc="U"
817  if dr.get_residue_type() == IMP.atom.DCYT: olc="C"
818  if dr.get_residue_type() == IMP.atom.DGUA: olc="G"
819  if dr.get_residue_type() == IMP.atom.DTHY: olc="T"
820  rids.append(rid)
821  rids_olc_dict[rid]=olc
822  elif isrna:
823  if dr.get_residue_type() == IMP.atom.ADE: olc="A"
824  if dr.get_residue_type() == IMP.atom.URA: olc="U"
825  if dr.get_residue_type() == IMP.atom.CYT: olc="C"
826  if dr.get_residue_type() == IMP.atom.GUA: olc="G"
827  if dr.get_residue_type() == IMP.atom.THY: olc="T"
828  rids.append(rid)
829  rids_olc_dict[rid]=olc
830  group_rids=self.group_indexes(rids)
831  contiguous_sequences=IMP.pmi.tools.OrderedDict()
832  for group in group_rids:
833  sequence_fragment=""
834  for i in range(group[0],group[1]+1):
835  sequence_fragment+=rids_olc_dict[i]
836  contiguous_sequences[group]=sequence_fragment
837  self.sequences[id]=contiguous_sequences
838 
839  #for c in self.sequences:
840  # for group in self.sequences[c]:
841  # print(c,group,self.sequences[c][group])
842 
843  def group_indexes(self,indexes):
844  from itertools import groupby
845  ranges = []
846  for k, g in groupby(enumerate(indexes), lambda x:x[0]-x[1]):
847  group = [x[1] for x in g]
848  ranges.append((group[0], group[-1]))
849  return ranges
850 
851 
852 def fasta_pdb_alignments(fasta_sequences,pdb_sequences,show=False):
853  '''This function computes and prints the alignment between the
854  fasta file and the pdb sequence, computes the offsets for each contiguous
855  fragment in the PDB.
856  @param fasta_sequences IMP.pmi.topology.Sequences object
857  @param pdb_sequences IMP.pmi.topology.PDBSequences object
858  @param show boolean default False, if True prints the alignments.
859  The input objects should be generated using map_name dictionaries such that fasta_id
860  and pdb_chain_id are mapping to the same protein name. It needs Biopython.
861  Returns a dictionary of offsets, organized by peptide range (group):
862  example: offsets={"ProtA":{(1,10):1,(20,30):10}}'''
863  from Bio import pairwise2
864  from Bio.pairwise2 import format_alignment
865  from Bio.SubsMat import MatrixInfo as matlist
866  matrix = matlist.blosum62
867  if type(fasta_sequences) is not IMP.pmi.topology.Sequences:
868  raise Exception("Fasta sequences not type IMP.pmi.topology.Sequences")
869  if type(pdb_sequences) is not IMP.pmi.topology.PDBSequences:
870  raise Exception("pdb sequences not type IMP.pmi.topology.PDBSequences")
871  offsets=IMP.pmi.tools.OrderedDict()
872  for name in fasta_sequences.sequences:
873  print(name)
874  seq_fasta=fasta_sequences.sequences[name]
875  if name not in pdb_sequences.sequences:
876  print("Fasta id %s not in pdb names, aligning against every pdb chain" % name)
877  pdbnames=pdb_sequences.sequences.keys()
878  else:
879  pdbnames=[name]
880  for pdbname in pdbnames:
881  for group in pdb_sequences.sequences[pdbname]:
882  if group[1]-group[0]+1<7:continue
883  seq_frag_pdb=pdb_sequences.sequences[pdbname][group]
884  if show:
885  print("########################")
886  print(" ")
887  print("protein name",pdbname)
888  print("fasta id", name)
889  print("pdb fragment",group)
890  align=pairwise2.align.localms(seq_fasta, seq_frag_pdb, 2, -1, -.5, -.1)[0]
891  for a in [align]:
892  offset=a[3]+1-group[0]
893  if show:
894  print("alignment sequence start-end",(a[3]+1,a[4]+1))
895  print("offset from pdb to fasta index",offset)
896  print(format_alignment(*a))
897  if name not in offsets:
898  offsets[pdbname]={}
899  if group not in offsets[pdbname]:
900  offsets[pdbname][group]=offset
901  else:
902  if group not in offsets[pdbname]:
903  offsets[pdbname][group]=offset
904  return offsets
905 
906 
907 
908 #------------------------
909 
910 
911 class TempResidue(object):
912  """Temporarily stores residue information, even without structure available."""
913  # Consider implementing __hash__ so you can select.
914  def __init__(self,molecule,code,index,internal_index,is_nucleic=None):
915  """setup a TempResidue
916  @param molecule PMI Molecule to which this residue belongs
917  @param code one-letter residue type code
918  @param index PDB index
919  @param internal_index The number in the sequence
920  """
921  #these attributes should be immutable
922  self.molecule = molecule
923  self.rtype = IMP.pmi.tools.get_residue_type_from_one_letter_code(code,is_nucleic)
924  self.pdb_index = index
925  self.internal_index = internal_index
926  self.copy_index = IMP.atom.Copy(self.molecule.hier).get_copy_index()
927  self.state_index = IMP.atom.State(self.molecule.state.hier).get_state_index()
928  #these are expected to change
929  self._structured = False
930  self.hier = IMP.atom.Residue.setup_particle(IMP.Particle(molecule.mdl),
931  self.rtype,
932  index)
933  def __str__(self):
934  return str(self.state_index)+"_"+self.molecule.get_name()+"_"+str(self.copy_index)+"_"+self.get_code()+str(self.get_index())
935  def __repr__(self):
936  return self.__str__()
937  def __key(self):
938  #this returns the immutable attributes only
939  return (self.state_index, self.molecule, self.copy_index, self.rtype, self.pdb_index, self.internal_index)
940  def __eq__(self,other):
941  return type(other)==type(self) and self.__key() == other.__key()
942  def __hash__(self):
943  return hash(self.__key())
944  def get_index(self):
945  return self.pdb_index
946  def get_internal_index(self):
947  return self.internal_index
948  def get_code(self):
949  return IMP.atom.get_one_letter_code(self.get_residue_type())
950  def get_residue_type(self):
951  return self.rtype
952  def get_hierarchy(self):
953  return self.hier
954  def get_molecule(self):
955  return self.molecule
956  def get_has_structure(self):
957  return self._structured
958  def set_structure(self,res,soft_check=False):
959  if res.get_residue_type()!=self.get_residue_type():
960  if soft_check:
961  print('WARNING: Replacing sequence residue',self.get_index(),self.hier.get_residue_type(),
962  'with PDB type',res.get_residue_type())
963  self.hier.set_residue_type((res.get_residue_type()))
964  self.rtype = res.get_residue_type()
965  else:
966  raise Exception('ERROR: PDB residue index',self.get_index(),'is',
967  IMP.atom.get_one_letter_code(res.get_residue_type()),
968  'and sequence residue is',self.get_code())
969 
970  for a in res.get_children():
971  self.hier.add_child(a)
972  atype = IMP.atom.Atom(a).get_atom_type()
973  a.get_particle().set_name('Atom %s of residue %i'%(atype.__str__().strip('"'),
974  self.hier.get_index()))
975  self._structured = True
976 
977 class TopologyReader(object):
978  """Automatically setup Sytem and Degrees of Freedom with a formatted text file.
979  The file is read in and each part of the topology is stored as a
980  ComponentTopology object for input into IMP::pmi::macros::BuildSystem.
981  The topology file should be in a simple pipe-delimited format:
982  @code{.txt}
983 |molecule_name|color|fasta_fn|fasta_id|pdb_fn|chain|residue_range|pdb_offset|bead_size|em_residues_per_gaussian|rigid_body|super_rigid_body|chain_of_super_rigid_bodies|flags|
984 |Rpb1 |blue |1WCM.fasta|1WCM:A|1WCM.pdb|A|1,1140 |0|10|0|1|1,3|1||
985 |Rpb1 |blue |1WCM.fasta|1WCM:A|1WCM.pdb|A|1141,1274|0|10|0|2|1,3|1||
986 |Rpb1 |blue |1WCM.fasta|1WCM:A|1WCM.pdb|A|1275,END |0|10|0|3|1,3|1||
987 |Rpb2 |red |1WCM.fasta|1WCM:B|1WCM.pdb|B|all |0|10|0|4|2,3|2||
988 |Rpb2.1 |green |1WCM.fasta|1WCM:B|1WCM.pdb|B|all |0|10|0|4|2,3|2||
989 
990  @endcode
991 
992  These are the fields you can enter:
993  - `component_name`: Name of the component (chain). Serves as the parent
994  hierarchy for this structure.
995  - `color`: The color used in the output RMF file. Uses chimera names or R,G,B values
996  - `fasta_fn`: Name of FASTA file containing this component.
997  - `fasta_id`: String found in FASTA sequence header line.
998  - `pdb_fn`: Name of PDB file with coordinates (if available).
999  If left empty, will set up as BEADS (you can also specify "BEADS")
1000  Can also write "IDEAL_HELIX".
1001  - `chain`: Chain ID of this domain in the PDB file.
1002  - `residue_range`: Comma delimited pair defining range.
1003  Can leave empty or use 'all' for entire sequence from PDB file.
1004  The second item in the pair can be END to select the last residue in the
1005  PDB chain.
1006  - `pdb_offset`: Offset to sync PDB residue numbering with FASTA numbering.
1007  - `bead_size`: The size (in residues) of beads used to model areas not
1008  covered by PDB coordinates. These will be automatically built.
1009  - `em_residues`: The number of Gaussians used to model the electron
1010  density of this domain. Set to zero if no EM fitting will be done.
1011  The GMM files will be written to <gmm_dir>/<component_name>_<em_res>.txt (and .mrc)
1012  - `rigid_body`: Leave empty if this object is not in a rigid body.
1013  Otherwise, this is a number corresponding to the rigid body containing
1014  this object. The number itself is just used for grouping things.
1015  - `super_rigid_body`: Like a rigid_body, except things are only occasionally rigid
1016  - `chain_of_super_rigid_bodies` For a polymer, create SRBs from groups.
1017  - `flags` additional flags for advanced options
1018  \note All filenames are relative to the paths specified in the constructor.
1019 
1020  """
1021  def __init__(self,
1022  topology_file,
1023  pdb_dir='./',
1024  fasta_dir='./',
1025  gmm_dir='./'):
1026  """Constructor.
1027  @param topology_file Pipe-delimited file specifying the topology
1028  @param pdb_dir Relative path to the pdb directory
1029  @param fasta_dir Relative path to the fasta directory
1030  @param gmm_dir Relative path to the GMM directory
1031  """
1032  self.topology_file = topology_file
1033  self.molecules = {} # key=molname, value=TempMolecule
1034  self.pdb_dir = pdb_dir
1035  self.fasta_dir = fasta_dir
1036  self.gmm_dir = gmm_dir
1037  self._components = self.read(topology_file)
1038 
1039  # Preserve old self.component_list for backwards compatibility
1040  @IMP.deprecated_method("2.7",
1041  "Use 'get_components()' instead of 'component_list'.")
1042  def __get_component_list(self): return self._components
1043  component_list = property(__get_component_list)
1044 
1045  def write_topology_file(self,outfile):
1046  with open(outfile, "w") as f:
1047  f.write("|molecule_name|color|fasta_fn|fasta_id|pdb_fn|chain|"
1048  "residue_range|pdb_offset|bead_size|em_residues_per_gaussian|"
1049  "rigid_body|super_rigid_body|chain_of_super_rigid_bodies|\n")
1050  for c in self._components:
1051  output = c.get_str()+'\n'
1052  f.write(output)
1053  return outfile
1054 
1055  def get_components(self, topology_list = "all"):
1056  """ Return list of ComponentTopologies for selected components
1057  @param topology_list List of indices to return"""
1058  if topology_list == "all":
1059  topologies = self._components
1060  else:
1061  topologies=[]
1062  for i in topology_list:
1063  topologies.append(self._components[i])
1064  return topologies
1065 
1066  def get_molecules(self):
1067  return self.molecules
1068 
1069  def read(self, topology_file, append=False):
1070  """Read system components from topology file. append=False will erase
1071  current topology and overwrite with new
1072  """
1073  is_topology = False
1074  is_directories = False
1075  linenum = 1
1076  if append==False:
1077  self._components=[]
1078 
1079  with open(topology_file) as infile:
1080  for line in infile:
1081  if line.lstrip()=="" or line[0]=="#":
1082  continue
1083  elif line.split('|')[1].strip() in ("molecule_name"):
1084  is_topology=True
1085  is_directories = False
1086  old_format = False
1087  continue
1088  elif line.split('|')[1] == "component_name":
1089  is_topology = True
1091  "Old-style topology format (using "
1092  "|component_name|) is deprecated. Please switch to "
1093  "the new-style format (using |molecule_name|)\n")
1094  old_format = True
1095  is_directories = False
1096  continue
1097  elif line.split('|')[1] == "directories":
1099  "Setting directories in the topology file "
1100  "is deprecated. Please do so through the "
1101  "TopologyReader constructor. Note that new-style "
1102  "paths are relative to the current working "
1103  "directory, not the topology file.\n")
1104  is_directories = True
1105  elif is_directories:
1106  fields = line.split('|')
1107  setattr(self, fields[1],
1108  IMP.get_relative_path(topology_file, fields[2]))
1109  if is_topology:
1110  new_component = self._parse_line(line, linenum, old_format)
1111  self._components.append(new_component)
1112  linenum += 1
1113  return self._components
1114 
1115  def _parse_line(self, component_line, linenum, old_format):
1116  """Parse a line of topology values and matches them to their key.
1117  Checks each value for correct syntax
1118  Returns a list of Component objects
1119  fields:
1120  """
1121  c = _Component()
1122  values = [s.strip() for s in component_line.split('|')]
1123  errors = []
1124 
1125  ### Required fields
1126  if old_format:
1127  c.molname = values[1]
1128  c.copyname = ''
1129  c._domain_name = values[2]
1130  c.color = 'blue'
1131  else:
1132  names = values[1].split('.')
1133  if len(names)==1:
1134  c.molname = names[0]
1135  c.copyname = ''
1136  elif len(names)==2:
1137  c.molname = names[0]
1138  c.copyname = names[1]
1139  else:
1140  c.molname = names[0]
1141  c.copyname = names[1]
1142  errors.append("Molecule name should be <molecule.copyID>")
1143  errors.append("For component %s line %d " % (c.molname,linenum))
1144  c._domain_name = c.molname + '.' + c.copyname
1145  colorfields = values[2].split(',')
1146  if len(colorfields)==3:
1147  c.color = [float(x) for x in colorfields]
1148  else:
1149  c.color = values[2]
1150  c._orig_fasta_file = values[3]
1151  c.fasta_file = values[3]
1152  fasta_field = values[4].split(",")
1153  c.fasta_id = fasta_field[0]
1154  c.fasta_flag = None
1155  if len(fasta_field) > 1:
1156  c.fasta_flag = fasta_field[1]
1157  c._orig_pdb_input = values[5]
1158  pdb_input = values[5]
1159  tmp_chain = values[6]
1160  rr = values[7]
1161  offset = values[8]
1162  bead_size = values[9]
1163  emg = values[10]
1164  if old_format:
1165  rbs = srbs = csrbs = ''
1166  else:
1167  rbs = values[11]
1168  srbs = values[12]
1169  csrbs = values[13]
1170 
1171  if c.molname not in self.molecules:
1172  self.molecules[c.molname] = _TempMolecule(c)
1173  else:
1174  # COPY OR DOMAIN
1175  c._orig_fasta_file = self.molecules[c.molname].orig_component.fasta_file
1176  c.fasta_id = self.molecules[c.molname].orig_component.fasta_id
1177  self.molecules[c.molname].add_component(c,c.copyname)
1178 
1179  # now cleanup input
1180  c.fasta_file = os.path.join(self.fasta_dir,c._orig_fasta_file)
1181  if pdb_input=="":
1182  errors.append("PDB must have BEADS, IDEAL_HELIX, or filename")
1183  errors.append("For component %s line %d is not correct"
1184  "|%s| was given." % (c.molname,linenum,pdb_input))
1185  elif pdb_input in ("IDEAL_HELIX","BEADS"):
1186  c.pdb_file = pdb_input
1187  else:
1188  c.pdb_file = os.path.join(self.pdb_dir,pdb_input)
1189 
1190  # PDB chain must be one or two characters
1191  if len(tmp_chain)==1 or len(tmp_chain)==2:
1192  c.chain = tmp_chain
1193  else:
1194  errors.append("PDB Chain identifier must be one or two characters.")
1195  errors.append("For component %s line %d is not correct"
1196  "|%s| was given." % (c.molname,linenum,tmp_chain))
1197 
1198  ### Optional fields
1199  # Residue Range
1200  if rr.strip()=='all' or str(rr)=="":
1201  c.residue_range = None
1202  elif len(rr.split(','))==2 and self._is_int(rr.split(',')[0]) and (self._is_int(rr.split(',')[1]) or rr.split(',')[1] == 'END'):
1203  # Make sure that is residue range is given, there are only two values and they are integers
1204  c.residue_range = (int(rr.split(',')[0]), rr.split(',')[1])
1205  if c.residue_range[1] != 'END':
1206  c.residue_range = (c.residue_range[0], int(c.residue_range[1]))
1207  # Old format used -1 for the last residue
1208  if old_format and c.residue_range[1] == -1:
1209  c.residue_range = (c.residue_range[0], 'END')
1210  else:
1211  errors.append("Residue Range format for component %s line %d is not correct" % (c.molname, linenum))
1212  errors.append("Correct syntax is two comma separated integers: |start_res, end_res|. end_res can also be END to select the last residue in the chain. |%s| was given." % rr)
1213  errors.append("To select all residues, indicate |\"all\"|")
1214 
1215  # PDB Offset
1216  if self._is_int(offset):
1217  c.pdb_offset=int(offset)
1218  elif len(offset)==0:
1219  c.pdb_offset = 0
1220  else:
1221  errors.append("PDB Offset format for component %s line %d is not correct" % (c.molname, linenum))
1222  errors.append("The value must be a single integer. |%s| was given." % offset)
1223 
1224  # Bead Size
1225  if self._is_int(bead_size):
1226  c.bead_size=int(bead_size)
1227  elif len(bead_size)==0:
1228  c.bead_size = 0
1229  else:
1230  errors.append("Bead Size format for component %s line %d is not correct" % (c.molname, linenum))
1231  errors.append("The value must be a single integer. |%s| was given." % bead_size)
1232 
1233  # EM Residues Per Gaussian
1234  if self._is_int(emg):
1235  if int(emg) > 0:
1236  c.density_prefix = os.path.join(self.gmm_dir,c.get_unique_name())
1237  c.gmm_file = c.density_prefix+'.txt'
1238  c.mrc_file = c.density_prefix+'.gmm'
1239 
1240  c.em_residues_per_gaussian=int(emg)
1241  else:
1242  c.em_residues_per_gaussian = 0
1243  elif len(emg)==0:
1244  c.em_residues_per_gaussian = 0
1245  else:
1246  errors.append("em_residues_per_gaussian format for component "
1247  "%s line %d is not correct" % (c.molname, linenum))
1248  errors.append("The value must be a single integer. |%s| was given." % emg)
1249 
1250  # rigid bodies
1251  if len(rbs)>0:
1252  if not self._is_int(rbs):
1253  errors.append("rigid bodies format for component "
1254  "%s line %d is not correct" % (c.molname, linenum))
1255  errors.append("Each RB must be a single integer, or empty. "
1256  "|%s| was given." % rbs)
1257  c.rigid_body = int(rbs)
1258 
1259  # super rigid bodies
1260  if len(srbs)>0:
1261  srbs = srbs.split(',')
1262  for i in srbs:
1263  if not self._is_int(i):
1264  errors.append("super rigid bodies format for component "
1265  "%s line %d is not correct" % (c.molname, linenum))
1266  errors.append("Each SRB must be a single integer. |%s| was given." % srbs)
1267  c.super_rigid_bodies = srbs
1268 
1269  # chain of super rigid bodies
1270  if len(csrbs)>0:
1271  if not self._is_int(csrbs):
1272  errors.append("em_residues_per_gaussian format for component "
1273  "%s line %d is not correct" % (c.molname, linenum))
1274  errors.append("Each CSRB must be a single integer. |%s| was given." % csrbs)
1275  c.chain_of_super_rigid_bodies = csrbs
1276 
1277  # done
1278  if errors:
1279  raise ValueError("Fix Topology File syntax errors and rerun: " \
1280  + "\n".join(errors))
1281  else:
1282  return c
1283 
1284 
1285  def set_gmm_dir(self,gmm_dir):
1286  """Change the GMM dir"""
1287  self.gmm_dir = gmm_dir
1288  for c in self._components:
1289  c.gmm_file = os.path.join(self.gmm_dir,c.get_unique_name()+".txt")
1290  c.mrc_file = os.path.join(self.gmm_dir,c.get_unique_name()+".mrc")
1291  print('new gmm',c.gmm_file)
1292 
1293  def set_pdb_dir(self,pdb_dir):
1294  """Change the PDB dir"""
1295  self.pdb_dir = pdb_dir
1296  for c in self._components:
1297  if not c._orig_pdb_input in ("","None","IDEAL_HELIX","BEADS"):
1298  c.pdb_file = os.path.join(self.pdb_dir,c._orig_pdb_input)
1299 
1300  def set_fasta_dir(self,fasta_dir):
1301  """Change the FASTA dir"""
1302  self.fasta_dir = fasta_dir
1303  for c in self._components:
1304  c.fasta_file = os.path.join(self.fasta_dir,c._orig_fasta_file)
1305 
1306  def _is_int(self, s):
1307  # is this string an integer?
1308  try:
1309  float(s)
1310  return float(s).is_integer()
1311  except ValueError:
1312  return False
1313 
1314  def get_rigid_bodies(self):
1315  """Return list of lists of rigid bodies (as domain name)"""
1316  rbl = defaultdict(list)
1317  for c in self._components:
1318  if c.rigid_body:
1319  rbl[c.rigid_body].append(c.get_unique_name())
1320  return rbl.values()
1321 
1323  """Return list of lists of super rigid bodies (as domain name)"""
1324  rbl = defaultdict(list)
1325  for c in self._components:
1326  for rbnum in c.super_rigid_bodies:
1327  rbl[rbnum].append(c.get_unique_name())
1328  return rbl.values()
1329 
1331  """Return list of lists of chains of super rigid bodies (as domain name)"""
1332  rbl = defaultdict(list)
1333  for c in self._components:
1334  for rbnum in c.chain_of_super_rigid_bodies:
1335  rbl[rbnum].append(c.get_unique_name())
1336  return rbl.values()
1337 
1338 class _TempMolecule(object):
1339  """Store the Components and any requests for copies"""
1340  def __init__(self,init_c):
1341  self.molname = init_c.molname
1342  # key=copy ID, value = list of domains
1343  self.domains = IMP.pmi.tools.OrderedDefaultDict(list)
1344  self.add_component(init_c,init_c.copyname)
1345  self.orig_copyname = init_c.copyname
1346  self.orig_component = self.domains[init_c.copyname][0]
1347  def add_component(self,component,copy_id):
1348  self.domains[copy_id].append(component)
1349  component.domainnum = len(self.domains[copy_id])-1
1350  def __repr__(self):
1351  return ','.join('%s:%i'%(k,len(self.domains[k])) for k in self.domains)
1352 
1353 class _Component(object):
1354  """Stores the components required to build a standard IMP hierarchy
1355  using IMP.pmi.BuildModel()
1356  """
1357  def __init__(self):
1358  self.molname = None
1359  self.copyname = None
1360  self.domainnum = 0
1361  self.fasta_file = None
1362  self._orig_fasta_file = None
1363  self.fasta_id = None
1364  self.fasta_flag = None
1365  self.pdb_file = None
1366  self._orig_pdb_input = None
1367  self.chain = None
1368  self.residue_range = None
1369  self.pdb_offset = 0
1370  self.bead_size = 10
1371  self.em_residues_per_gaussian = 0
1372  self.gmm_file = ''
1373  self.mrc_file = ''
1374  self.density_prefix = ''
1375  self.color = 0.1
1376  self.rigid_body = None
1377  self.super_rigid_bodies = []
1378  self.chain_of_super_rigid_bodies = []
1379 
1380  def _l2s(self,l):
1381  return ",".join("%s" % x for x in l)
1382 
1383  def __repr__(self):
1384  return self.get_str()
1385 
1386  def get_unique_name(self):
1387  return "%s.%s.%i"%(self.molname,self.copyname,self.domainnum)
1388 
1389  def get_str(self):
1390  res_range = self.residue_range
1391  if self.residue_range is None:
1392  res_range = []
1393  name = self.molname
1394  if self.copyname!='':
1395  name += '.'+self.copyname
1396  if self.chain is None:
1397  chain = ' '
1398  else:
1399  chain = self.chain
1400  color=self.color
1401  if isinstance(color, list):
1402  color=','.join([str(x) for x in color])
1403  a= '|'+'|'.join([name,color,self._orig_fasta_file,self.fasta_id,
1404  self._orig_pdb_input,chain,self._l2s(list(res_range)),
1405  str(self.pdb_offset),str(self.bead_size),
1406  str(self.em_residues_per_gaussian),
1407  str(self.rigid_body) if self.rigid_body else '',
1408  self._l2s(self.super_rigid_bodies),
1409  self._l2s(self.chain_of_super_rigid_bodies)])+'|'
1410  return a
1411 
1412  # Preserve old self.name for backwards compatibility
1413  @IMP.deprecated_method("2.7", "Use 'molname' instead of 'name'.")
1414  def __get_name(self): return self.molname
1415  name = property(__get_name)
1416 
1417  # Preserve old self.domain_name for backwards compatibility
1418  @IMP.deprecated_method("2.7",
1419  "Use 'get_unique_name()' instead of 'domain_name'.")
1420  def __get_domain_name(self): return self._domain_name
1421  domain_name = property(__get_domain_name)
def __init__
setup a TempResidue
def build
call build on all molecules (automatically makes clones)
Add mass to a particle.
Definition: Mass.h:23
def select_at_all_resolutions
Perform selection using the usual keywords but return ALL resolutions (BEADS and GAUSSIANS).
Definition: tools.py:1782
def get_atomic_residues
Return a set of TempResidues that have associated structure coordinates.
A decorator to associate a particle with a part of a protein/DNA/RNA.
Definition: Fragment.h:20
def get_residues
Return all modeled TempResidues as a set.
std::string get_unique_name(std::string templ)
Return a unique name produced from the string.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: atom/Atom.h:241
static Atom setup_particle(Model *m, ParticleIndex pi, Atom other)
Definition: atom/Atom.h:242
def build
call build on all states
def __init__
read a fasta file and extract all the requested sequences
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
def __init__
read a pdb file and returns all sequences for each contiguous fragment
def fasta_pdb_alignments
This function computes and prints the alignment between the fasta file and the pdb sequence...
def get_ideal_helices
Returns list of OrderedSets with requested ideal helices.
Miscellaneous utilities.
Definition: tools.py:1
def get_chains_of_super_rigid_bodies
Return list of lists of chains of super rigid bodies (as domain name)
def __init__
The user should not call this directly; instead call State::create_molecule()
void handle_use_deprecated(std::string message)
def residue_range
get residue range from a to b, inclusive.
def get_molecule
Access a molecule by name and copy number.
def __init__
Define a new state.
def add_representation
Set the representation for some residues.
static State setup_particle(Model *m, ParticleIndex pi, unsigned int index)
Definition: State.h:39
The type of an atom.
def create_molecule
Create a new Molecule within this State.
def build
Create all parts of the IMP hierarchy including Atoms, Residues, and Fragments/Representations and...
static Residue setup_particle(Model *m, ParticleIndex pi, ResidueType t, int index, int insertion_code)
Definition: Residue.h:157
char get_one_letter_code(ResidueType c)
Get the 1-letter amino acid code from the residue type.
def get_non_atomic_residues
Return a set of TempResidues that don't have associated structure coordinates.
void read_pdb(TextInput input, int model, Hierarchy h)
def get_name
Return this Molecule name.
def get_hierarchy
Return the IMP Hierarchy corresponding to this Molecule.
def get_components
Return list of ComponentTopologies for selected components.
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:72
Stores a named protein chain.
A decorator for keeping track of copies of a molecule.
Definition: Copy.h:28
Select all non-alternative ATOM records.
Definition: pdb.h:64
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def set_fasta_dir
Change the FASTA dir.
The standard decorator for manipulating molecular structures.
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
Data_structure for reading and storing sequence data from pdb.
def deprecated_method
Python decorator to mark a method as deprecated.
Definition: __init__.py:9599
A decorator for a particle representing an atom.
Definition: atom/Atom.h:234
std::string get_relative_path(std::string base, std::string relative)
Return a path to a file relative to another file.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
def create_clone
Create a Molecule clone (automatically builds same structure and representation)
def add_structure
Read a structure and store the coordinates.
int get_state_index(Hierarchy h)
Walk up the hierarchy to find the current state.
def get_molecules
Return a dictionary where key is molecule name and value are the list of all copies of that molecule ...
static Copy setup_particle(Model *m, ParticleIndex pi, Int number)
Definition: Copy.h:42
def read
Read system components from topology file.
def get_state
Return the State containing this Molecule.
A decorator for a residue.
Definition: Residue.h:134
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Automatically setup Sytem and Degrees of Freedom with a formatted text file.
The general base class for IMP exceptions.
Definition: exception.h:49
def get_rigid_bodies
Return list of lists of rigid bodies (as domain name)
Associate an integer "state" index with a hierarchy node.
Definition: State.h:27
VectorD< 3 > Vector3D
Definition: VectorD.h:395
Residue get_residue(Atom d, bool nothrow=false)
Return the Residue containing this atom.
Class to handle individual particles of a Model object.
Definition: Particle.h:41
Stores a list of Molecules all with the same State index.
def get_represented
Return set of TempResidues that have representation.
Store info for a chain of a protein.
Definition: Chain.h:21
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
Python classes to represent, score, sample and analyze models.
A dictionary-like wrapper for reading and storing sequence data.
def create_copy
Create a new Molecule with the same name and sequence but a higher copy number.
Functionality for loading, creating, manipulating and scoring atomic structures.
def get_particles_at_all_resolutions
Helpful utility for getting particles at all resolutions from this molecule.
static Chain setup_particle(Model *m, ParticleIndex pi, std::string id)
Definition: Chain.h:41
Select hierarchy particles identified by the biological name.
Definition: Selection.h:66
def get_number_of_states
returns the total number of states generated
def get_super_rigid_bodies
Return list of lists of super rigid bodies (as domain name)
Temporarily stores residue information, even without structure available.
def create_state
returns a new IMP.pmi.representation_new.State(), increment the state index
Store objects in order they were added, but with default type.
Definition: tools.py:1530