IMP logo
IMP Reference Guide  2.7.0
The Integrative Modeling Platform
pmi/topology/__init__.py
1 """@namespace IMP.pmi.topology
2 Set of python classes to create a multi-state, multi-resolution IMP hierarchy.
3 * Start by creating a System with `mdl = IMP.Model(); s = IMP.pmi.topology.System(mdl)`. The System will store all the states.
4 * Then call System.create_state(). You can easily create a multistate system by calling this function multiples times.
5 * For each State, call State.create_molecule() to add a Molecule (a uniquely named polymer). This function returns the Molecule object which can be passed to various PMI functions.
6 * Some useful functions to help you set up your Molecules:
7  * Access the sequence residues with slicing (Molecule[a:b]) or functions like Molecule.get_atomic_residues() and Molecule.get_non_atomic_residues(). These functions all return python sets for easy set arithmetic using & (and), | (or), - (difference)
8  * Molecule.add_structure() to add structural information from a PDB file.
9  * Molecule.add_representation() to create a representation unit - here you can choose bead resolutions as well as alternate representations like densities or ideal helices.
10  * Molecule.create_clone() lets you set up a molecule with identical representations, just a different chain ID. Use Molecule.create_copy() if you want a molecule with the same sequence but that allows custom representations.
11 * Once data has been added and representations chosen, call System.build() to create a canonical IMP hierarchy.
12 * Following hierarchy construction, setup rigid bodies, flexible beads, etc in IMP::pmi::dof.
13 * Check your representation with a nice printout: IMP::atom::show_with_representation()
14 See a [comprehensive example](https://integrativemodeling.org/nightly/doc/ref/pmi_2multiscale_8py-example.html) for using these classes.
15 
16 Alternatively one can construct the entire topology and degrees of freedom via formatted text file with TopologyReader and IMP::pmi::macros::BuildSystem(). This is used in the [PMI tutorial](@ref rnapolii_stalk).
17 Note that this only allows a limited set of the full options available to PMI users (rigid bodies only, fixed resolutions).
18 """
19 
20 from __future__ import print_function
21 import IMP
22 import IMP.atom
23 import IMP.algebra
24 import IMP.pmi
25 import IMP.pmi.tools
26 import csv
27 import os
28 from collections import defaultdict
29 from . import system_tools
30 from bisect import bisect_left
31 from math import pi,cos,sin
32 from operator import itemgetter
33 
34 def _build_ideal_helix(mdl, residues, coord_finder):
35  """Creates an ideal helix from the specified residue range
36  Residues MUST be contiguous.
37  This function actually adds them to the TempResidue hierarchy
38  """
39  created_hiers = []
40 
41  # this function creates a CAlpha helix structure (which can be used for coarsening)
42  for n, tempres in enumerate(residues):
43  if tempres.get_has_structure():
44  raise Exception("You tried to build ideal_helix for a residue "
45  "that already has structure:",tempres)
46  if n>0 and (not tempres.get_index()==prev_idx+1):
47  raise Exception("Passed non-contiguous segment to build_ideal_helix for",tempres.get_molecule())
48 
49  # New residue particle will replace the TempResidue's existing (empty) hierarchy
50  rp = IMP.Particle(mdl)
51  rp.set_name("Residue_%i" % tempres.get_index())
52 
53  # Copy the original residue type and index
54  this_res = IMP.atom.Residue.setup_particle(rp,tempres.get_hierarchy())
55 
56  # Create the CAlpha
57  ap = IMP.Particle(mdl)
59  x = 2.3 * cos(n * 2 * pi / 3.6)
60  y = 2.3 * sin(n * 2 * pi / 3.6)
61  z = 6.2 / 3.6 / 2 * n * 2 * pi / 3.6
62  d.set_coordinates(IMP.algebra.Vector3D(x, y, z))
63  d.set_radius(1.0)
64  a = IMP.atom.Atom.setup_particle(ap, IMP.atom.AT_CA) # Decorating as Atom also decorates as Mass
65  IMP.atom.Mass(ap).set_mass(110.0)
66  this_res.add_child(a)
67 
68  # Add this structure to the TempResidue
69  tempres.set_structure(this_res)
70  created_hiers.append(this_res)
71  prev_idx = tempres.get_index()
72  coord_finder.add_residues(created_hiers) #the coord finder is for placing beads (later)
73 
74 class _SystemBase(object):
75  """The base class for System, State and Molecule
76  classes. It contains shared functions in common to these classes
77  """
78 
79  def __init__(self,mdl=None):
80  if mdl is None:
81  self.mdl=IMP.Model()
82  else:
83  self.mdl=mdl
84 
85  def _create_hierarchy(self):
86  """create a new hierarchy"""
87  tmp_part=IMP.Particle(self.mdl)
88  return IMP.atom.Hierarchy.setup_particle(tmp_part)
89 
90  def _create_child(self,parent_hierarchy):
91  """create a new hierarchy, set it as child of the input
92  one, and return it"""
93  child_hierarchy=self._create_hierarchy()
94  parent_hierarchy.add_child(child_hierarchy)
95  return child_hierarchy
96 
97  def build(self):
98  """Build the coordinates of the system.
99  Loop through stored(?) hierarchies and set up coordinates!"""
100  pass
101 
102 #------------------------
103 
104 class System(_SystemBase):
105  """This class initializes the root node of the global IMP.atom.Hierarchy."""
106  def __init__(self,mdl=None,name="System"):
107  _SystemBase.__init__(self,mdl)
108  self._number_of_states = 0
109  self.states = []
110  self.built=False
111 
112  # the root hierarchy node
113  self.hier=self._create_hierarchy()
114  self.hier.set_name(name)
115 
116  def get_states(self):
117  return self.states
118 
119  def create_state(self):
120  """returns a new IMP.pmi.representation_new.State(), increment the state index"""
121  self._number_of_states+=1
122  state = State(self,self._number_of_states-1)
123  self.states.append(state)
124  return state
125 
126  def __repr__(self):
127  return self.hier.get_name()
128 
130  """returns the total number of states generated"""
131  return self._number_of_states
132 
133  def get_hierarchy(self):
134  return self.hier
135 
136  def build(self,**kwargs):
137  """call build on all states"""
138  if not self.built:
139  for state in self.states:
140  state.build(**kwargs)
141  self.built=True
142  return self.hier
143 
144 #------------------------
145 
146 class State(_SystemBase):
147  """Stores a list of Molecules all with the same State index.
148  Also stores number of copies of each Molecule for easy Selection.
149  """
150  def __init__(self,system,state_index):
151  """Define a new state
152  @param system the PMI System
153  @param state_index the index of the new state
154  \note It's expected that you will not use this constructor directly,
155  but rather create it with pmi::System::create_molecule()
156  """
157  self.mdl = system.get_hierarchy().get_model()
158  self.system = system
159  self.hier = self._create_child(system.get_hierarchy())
160  self.hier.set_name("State_"+str(state_index))
161  self.molecules = defaultdict(list) # key is molecule name. value are the molecule copies!
162  IMP.atom.State.setup_particle(self.hier,state_index)
163  self.built = False
164 
165  def __repr__(self):
166  return self.system.__repr__()+'.'+self.hier.get_name()
167 
168  def get_molecules(self):
169  """Return a dictionary where key is molecule name and value
170  are the list of all copies of that molecule in setup order"""
171  return self.molecules
172 
173  def get_molecule(self,name,copy_num=0):
174  """Access a molecule by name and copy number
175  @param name The molecule name used during setup
176  @param copy_num The copy number based on input order.
177  Default: 0. Set to 'all' to get all copies
178  """
179  if name not in self.molecules:
180  raise Exception("get_molecule() could not find molname",name)
181  if copy_num=='all':
182  return self.molecules[name]
183  else:
184  if copy_num>len(self.molecules[name])-1:
185  raise Exception("get_molecule() copy number is too high:",copy_num)
186  return self.molecules[name][copy_num]
187 
188  def create_molecule(self,name,sequence='',chain_id='',is_nucleic=None):
189  """Create a new Molecule within this State
190  @param name the name of the molecule (string) it must not
191  be already used
192  @param sequence sequence (string)
193  @param chain_id Chain id to assign to this molecule
194  """
195  # check whether the molecule name is already assigned
196  if name in self.molecules:
197  raise Exception('Cannot use a molecule name already used')
198 
199  mol = Molecule(self,name,sequence,chain_id,copy_num=0,is_nucleic=is_nucleic)
200  self.molecules[name].append(mol)
201  return mol
202 
203  def get_hierarchy(self):
204  return self.hier
205 
206  def get_number_of_copies(self,molname):
207  return len(self.molecules[molname])
208 
209  def _register_copy(self,molecule):
210  molname = molecule.get_hierarchy().get_name()
211  if molname not in self.molecules:
212  raise Exception("Trying to add a copy when the original doesn't exist!")
213  self.molecules[molname].append(molecule)
214 
215  def build(self,**kwargs):
216  """call build on all molecules (automatically makes clones)"""
217  if not self.built:
218  for molname in self.molecules:
219  for mol in reversed(self.molecules[molname]):
220  mol.build(**kwargs)
221  self.built=True
222  return self.hier
223 
224 #------------------------
225 
226 class Molecule(_SystemBase):
227  """Stores a named protein chain.
228  This class is constructed from within the State class.
229  It wraps an IMP.atom.Molecule and IMP.atom.Copy
230  Structure is read using this class
231  Resolutions and copies can be registered, but are only created when build() is called
232  """
233 
234  def __init__(self,state,name,sequence,chain_id,copy_num,mol_to_clone=None,is_nucleic=None):
235  """The user should not call this directly; instead call State::create_molecule()
236  @param state The parent PMI State
237  @param name The name of the molecule (string)
238  @param sequence Sequence (string)
239  @param chain_id The chain of this molecule
240  @param copy_num Store the copy number
241  @param mol_to_clone The original molecule (for cloning ONLY)
242  \note It's expected that you will not use this constructor directly,
243  but rather create a Molecule with pmi::State::create_molecule()
244  """
245  # internal data storage
246  self.mdl = state.get_hierarchy().get_model()
247  self.state = state
248  self.sequence = sequence
249  self.built = False
250  self.mol_to_clone = mol_to_clone
251  self.is_nucleic=is_nucleic
252  self.representations = [] # list of stuff to build
253  self._represented = IMP.pmi.tools.OrderedSet() # residues with representation
254  self.coord_finder = _FindCloseStructure() # helps you place beads by storing structure
255  self._ideal_helices = [] # list of OrderedSets of tempresidues set to ideal helix
256 
257  # create root node and set it as child to passed parent hierarchy
258  self.hier = self._create_child(self.state.get_hierarchy())
259  self.hier.set_name(name)
260  IMP.atom.Copy.setup_particle(self.hier,copy_num)
261  IMP.atom.Chain.setup_particle(self.hier,chain_id)
262  # create TempResidues from the sequence (if passed)
263  self.residues=[]
264  for ns,s in enumerate(sequence):
265  r = TempResidue(self,s,ns+1,ns,is_nucleic)
266  self.residues.append(r)
267 
268  def __repr__(self):
269  return self.state.__repr__()+'.'+self.get_name()+'.'+ \
270  str(IMP.atom.Copy(self.hier).get_copy_index())
271 
272  def __getitem__(self,val):
273  if isinstance(val,int):
274  return self.residues[val]
275  elif isinstance(val,str):
276  return self.residues[int(val)-1]
277  elif isinstance(val,slice):
278  return IMP.pmi.tools.OrderedSet(self.residues[val])
279  else:
280  print("ERROR: range ends must be int or str. Stride must be int.")
281 
282  def get_hierarchy(self):
283  """Return the IMP Hierarchy corresponding to this Molecule"""
284  return self.hier
285 
286  def get_name(self):
287  """Return this Molecule name"""
288  return self.hier.get_name()
289 
290  def get_state(self):
291  """Return the State containing this Molecule"""
292  return self.state
293 
294  def get_ideal_helices(self):
295  """Returns list of OrderedSets with requested ideal helices"""
296  return self._ideal_helices
297 
298  def residue_range(self,a,b,stride=1):
299  """get residue range from a to b, inclusive.
300  Use integers to get 0-indexing, or strings to get PDB-indexing"""
301  if isinstance(a,int) and isinstance(b,int) and isinstance(stride,int):
302  return IMP.pmi.tools.OrderedSet(self.residues[a:b+1:stride])
303  elif isinstance(a,str) and isinstance(b,str) and isinstance(stride,int):
304  return IMP.pmi.tools.OrderedSet(self.residues[int(a)-1:int(b):stride])
305  else:
306  print("ERROR: range ends must be int or str. Stride must be int.")
307 
308  def get_residues(self):
309  """ Return all modeled TempResidues as a set"""
310  all_res = IMP.pmi.tools.OrderedSet(self.residues)
311  return all_res
312 
313  def get_represented(self):
314  """ Return set of TempResidues that have representation"""
315  return self._represented
316 
318  """ Return a set of TempResidues that have associated structure coordinates """
319  atomic_res = IMP.pmi.tools.OrderedSet()
320  for res in self.residues:
321  if res.get_has_structure():
322  atomic_res.add(res)
323  return atomic_res
324 
326  """ Return a set of TempResidues that don't have associated structure coordinates """
327  non_atomic_res = IMP.pmi.tools.OrderedSet()
328  for res in self.residues:
329  if not res.get_has_structure():
330  non_atomic_res.add(res)
331  return non_atomic_res
332 
333  def create_copy(self,chain_id):
334  """Create a new Molecule with the same name and sequence but a higher copy number.
335  Returns the Molecule. No structure or representation will be copied!
336  @param chain_id Chain ID of the new molecule
337  """
338  mol = Molecule(self.state,self.get_name(),self.sequence,chain_id,
339  copy_num=self.state.get_number_of_copies(self.get_name()))
340  self.state._register_copy(mol)
341  return mol
342 
343  def create_clone(self,chain_id):
344  """Create a Molecule clone (automatically builds same structure and representation)
345  @param chain_id If you want to set the chain ID of the copy to something
346  \note You cannot add structure or representations to a clone!
347  """
348  mol = Molecule(self.state,self.get_name(),self.sequence,chain_id,
349  copy_num=self.state.get_number_of_copies(self.get_name()),
350  mol_to_clone=self)
351  self.state._register_copy(mol)
352  return mol
353 
354  def add_structure(self,pdb_fn,chain_id,res_range=[],
355  offset=0,model_num=None,ca_only=False,
356  soft_check=False):
357  """Read a structure and store the coordinates.
358  Returns the atomic residues (as a set)
359  @param pdb_fn The file to read
360  @param chain_id Chain ID to read
361  @param res_range Add only a specific set of residues from the PDB file.
362  res_range[0] is the starting and res_range[1] is the ending residue index.
363  @param offset Apply an offset to the residue indexes of the PDB file.
364  This number is added to the PDB sequence.
365  @param model_num Read multi-model PDB and return that model
366  @param ca_only Only read the CA positions from the PDB file
367  @param soft_check If True, it only warns if there are sequence mismatches between the pdb and
368  the Molecules sequence. Actually replaces the fasta values.
369  If False (Default), it raises and exit when there are sequence mismatches.
370  \note If you are adding structure without a FASTA file, set soft_check to True
371  """
372  if self.mol_to_clone is not None:
373  raise Exception('You cannot call add_structure() for a clone')
374 
375  self.pdb_fn = pdb_fn
376 
377  # get IMP.atom.Residues from the pdb file
378  rhs = system_tools.get_structure(self.mdl,pdb_fn,chain_id,res_range,offset,ca_only=ca_only)
379  self.coord_finder.add_residues(rhs)
380 
381  if len(self.residues)==0:
382  print("WARNING: Extracting sequence from structure. Potentially dangerous.")
383 
384  # load those into TempResidue object
385  atomic_res = IMP.pmi.tools.OrderedSet() # collect integer indexes of atomic residues to return
386  for nrh,rh in enumerate(rhs):
387  pdb_idx = rh.get_index()
388  raw_idx = pdb_idx - 1
389 
390  # add ALA to fill in gaps
391  while len(self.residues)<pdb_idx:
392  r = TempResidue(self,'A',len(self.residues)+1,len(self.residues))
393  self.residues.append(r)
394  self.sequence += 'A'
395 
396  internal_res = self.residues[raw_idx]
397  if len(self.sequence)<raw_idx:
398  self.sequence += IMP.atom.get_one_letter_code(rh.get_residue_type())
399  internal_res.set_structure(rh,soft_check)
400  atomic_res.add(internal_res)
401  return atomic_res
402 
403  def add_representation(self,
404  residues=None,
405  resolutions=[],
406  bead_extra_breaks=[],
407  bead_ca_centers=True,
408  bead_default_coord=[0,0,0],
409  density_residues_per_component=None,
410  density_prefix=None,
411  density_force_compute=False,
412  density_voxel_size=1.0,
413  setup_particles_as_densities=False,
414  ideal_helix=False,
415  color=None):
416  """Set the representation for some residues. Some options (beads, ideal helix)
417  operate along the backbone. Others (density options) are volumetric.
418  Some of these you can combine e.g., beads+densities or helix+densities
419  See @ref pmi_resolution
420  @param residues Set of PMI TempResidues for adding the representation.
421  Can use Molecule slicing to get these, e.g. mol[a:b]+mol[c:d]
422  If None, will select all residues for this Molecule.
423  @param resolutions Resolutions for beads representations.
424  If structured, will average along backbone, breaking at sequence breaks.
425  If unstructured, will just create beads.
426  Pass an integer or list of integers
427  @param bead_extra_breaks Additional breakpoints for splitting beads.
428  The value can be the 0-ordered position, after which it'll insert the break.
429  Alternatively pass PDB-style (1-ordered) indices as a string.
430  I.e., bead_extra_breaks=[5,25] is the same as ['6','26']
431  @param bead_ca_centers Set to True if you want the resolution=1 beads to be at CA centers
432  (otherwise will average atoms to get center). Defaults to True.
433  @param bead_default_coord Advanced feature. Normally beads are placed at the nearest structure.
434  If no structure provided (like an all bead molecule), the beads go here.
435  @param density_residues_per_component Create density (Gaussian Mixture Model)
436  for these residues. Must also supply density_prefix
437  @param density_prefix Prefix (assuming '.txt') to read components from or write to.
438  If exists, will read unless you set density_force_compute=True.
439  Will also write map (prefix+'.mrc').
440  Must also supply density_residues_per_component.
441  @param density_force_compute Set true to force overwrite density file.
442  @param density_voxel_size Advanced feature. Set larger if densities taking too long to rasterize.
443  Set to 0 if you don't want to create the MRC file
444  @param setup_particles_as_densities Set to True if you want each particle to be its own density.
445  Useful for all-atom models or flexible beads.
446  Mutually exclusive with density_ options
447  @param ideal_helix Create idealized helix structures for these residues at resolution 1.
448  Any other resolutions passed will be coarsened from there.
449  Resolution 0 will not work, you may have to use MODELLER to do that (for now).
450  @param color the color applied to the hierarchies generated.
451  Format options: tuple (r,g,b) with values 0 to 1
452  or float (from 0 to 1, a map from Blue to Green to Red)
453  or IMP.display.Color object
454 
455  \note You cannot call add_representation multiple times for the same residues.
456  """
457 
458  # can't customize clones
459  if self.mol_to_clone is not None:
460  raise Exception('You cannot call add_representation() for a clone.'
461  'Maybe use a copy instead')
462 
463  # format input
464  if residues is None:
465  res = IMP.pmi.tools.OrderedSet(self.residues)
466  elif residues==self:
467  res = IMP.pmi.tools.OrderedSet(self.residues)
468  elif type(residues) is IMP.pmi.topology.TempResidue:
469  res = IMP.pmi.tools.OrderedSet([residues])
470  elif hasattr(residues,'__iter__'):
471  if len(residues)==0:
472  raise Exception('You passed an empty set to add_representation')
473  if type(residues) is IMP.pmi.tools.OrderedSet and type(next(iter(residues))) is TempResidue:
474  res = residues
475  elif type(residues) is set and type(next(iter(residues))) is TempResidue:
476  res = IMP.pmi.tools.OrderedSet(residues)
477  elif type(residues) is list and type(residues[0]) is TempResidue:
478  res = IMP.pmi.tools.OrderedSet(residues)
479  else:
480  raise Exception("You passed an iteratible of something other than TempResidue",res)
481  else:
482  raise Exception("add_representation: you must pass a set of residues or nothing(=all residues)")
483 
484  # check that each residue has not been represented yet
485  ov = res & self._represented
486  if ov:
487  raise Exception('You have already added representation for '+self.get_hierarchy().get_name()+': '+ov.__repr__())
488  self._represented|=res
489 
490  # check you aren't creating multiple resolutions without structure
491  if not hasattr(resolutions,'__iter__'):
492  if type(resolutions) is int:
493  resolutions = [resolutions]
494  else:
495  raise Exception("you tried to pass resolutions that are not int or list-of-int")
496  if len(resolutions)>1 and not ideal_helix:
497  for r in res:
498  if not r.get_has_structure():
499  raise Exception('You are creating multiple resolutions for '
500  'unstructured regions. This will have unexpected results.')
501 
502  # check density info is consistent
503  if density_residues_per_component or density_prefix:
504  if not density_residues_per_component and density_prefix:
505  raise Exception('If requesting density, must provide '
506  'density_residues_per_component AND density_prefix')
507  if density_residues_per_component and setup_particles_as_densities:
508  raise Exception('Cannot create both volumetric density '
509  '(density_residues_per_component) AND '
510  'individual densities (setup_particles_as_densities) '
511  'in the same representation')
512  if len(resolutions)>1 and setup_particles_as_densities:
513  raise Exception('You have multiple bead resolutions but are attempting to '
514  'set them all up as individual Densities. '
515  'This could have unexpected results.')
516 
517  # check helix not accompanied by other resolutions (densities OK though!)
518  if ideal_helix:
519  if 0 in resolutions:
520  raise Exception("For ideal helices, cannot build resolution 0: "
521  "you have to do that in MODELLER")
522  if 1 not in resolutions:
523  resolutions = [1] + list(resolutions)
524  self._ideal_helices.append(res)
525 
526  # check residues are all part of this molecule:
527  for r in res:
528  if r.get_molecule()!=self:
529  raise Exception('You are adding residues from a different molecule to',self.__repr__())
530 
531  # unify formatting for extra breaks
532  breaks = []
533  for b in bead_extra_breaks:
534  if type(b)==str:
535  breaks.append(int(b)-1)
536  else:
537  breaks.append(b)
538  # store the representation group
539  self.representations.append(_Representation(res,
540  resolutions,
541  breaks,
542  bead_ca_centers,
543  bead_default_coord,
544  density_residues_per_component,
545  density_prefix,
546  density_force_compute,
547  density_voxel_size,
548  setup_particles_as_densities,
549  ideal_helix,
550  color))
551 
552  def build(self):
553  """Create all parts of the IMP hierarchy
554  including Atoms, Residues, and Fragments/Representations and, finally, Copies
555  Will only build requested representations.
556  /note Any residues assigned a resolution must have an IMP.atom.Residue hierarchy
557  containing at least a CAlpha. For missing residues, these can be constructed
558  from the PDB file
559  """
560  if not self.built:
561  # if requested, clone structure and representations BEFORE building original
562  if self.mol_to_clone is not None:
563  for nr,r in enumerate(self.mol_to_clone.residues):
564  if r.get_has_structure():
565  clone = IMP.atom.create_clone(r.get_hierarchy())
566  self.residues[nr].set_structure(
567  IMP.atom.Residue(clone),soft_check=True)
568  for old_rep in self.mol_to_clone.representations:
569  new_res = IMP.pmi.tools.OrderedSet()
570  for r in old_rep.residues:
571  new_res.add(self.residues[r.get_internal_index()])
572  self._represented.add(self.residues[r.get_internal_index()])
573  new_rep = _Representation(new_res,
574  old_rep.bead_resolutions,
575  old_rep.bead_extra_breaks,
576  old_rep.bead_ca_centers,
577  old_rep.bead_default_coord,
578  old_rep.density_residues_per_component,
579  old_rep.density_prefix,
580  False,
581  old_rep.density_voxel_size,
582  old_rep.setup_particles_as_densities,
583  old_rep.ideal_helix,
584  old_rep.color)
585  self.representations.append(new_rep)
586  self.coord_finder = self.mol_to_clone.coord_finder
587 
588  # give a warning for all residues that don't have representation
589  no_rep = [r for r in self.residues if r not in self._represented]
590  if len(no_rep)>0:
591  print('WARNING: Residues without representation in molecule',
592  self.get_name(),':',system_tools.resnums2str(no_rep))
593 
594  # first build any ideal helices (fills in structure for the TempResidues)
595  for rep in self.representations:
596  if rep.ideal_helix:
597  _build_ideal_helix(self.mdl,rep.residues,self.coord_finder)
598 
599  # build all the representations
600  built_reps = []
601  for rep in self.representations:
602  built_reps += system_tools.build_representation(self.hier,rep,self.coord_finder)
603 
604  # sort them before adding as children
605  built_reps.sort(key=lambda r: IMP.atom.Fragment(r).get_residue_indexes()[0])
606  for br in built_reps:
607  self.hier.add_child(br)
608  br.update_parents()
609  self.built = True
610 
611  for res in self.residues:
612  idx = res.get_index()
613 
614  # first off, store the highest resolution available in residue.hier
615  new_ps = IMP.atom.Selection(
616  self.hier,
617  residue_index=res.get_index(),
618  resolution=1).get_selected_particles()
619  if len(new_ps)>0:
620  new_p = new_ps[0]
621  if IMP.atom.Atom.get_is_setup(new_p):
622  # if only found atomic, store the residue
623  new_hier = IMP.atom.get_residue(IMP.atom.Atom(new_p))
624  else:
625  # otherwise just store what you found
626  new_hier = IMP.atom.Hierarchy(new_p)
627  res.hier = new_hier
628  else:
629  res.hier = None
630  self._represented = IMP.pmi.tools.OrderedSet([a for a in self._represented])
631  print('done building',self.get_hierarchy())
632  return self.hier
633 
634  def get_particles_at_all_resolutions(self,residue_indexes=None):
635  """Helpful utility for getting particles at all resolutions from this molecule.
636  Can optionally pass a set of residue indexes"""
637  if not self.built:
638  raise Exception("Cannot get all resolutions until you build the Molecule")
639  if residue_indexes is None:
640  residue_indexes = [r.get_index() for r in self.get_residues()]
642  residue_indexes=residue_indexes)
643  return ps
644 
645 #------------------------
646 
647 class _Representation(object):
648  """Private class just to store a representation request"""
649  def __init__(self,
650  residues,
651  bead_resolutions,
652  bead_extra_breaks,
653  bead_ca_centers,
654  bead_default_coord,
655  density_residues_per_component,
656  density_prefix,
657  density_force_compute,
658  density_voxel_size,
659  setup_particles_as_densities,
660  ideal_helix,
661  color):
662  self.residues = residues
663  self.bead_resolutions = bead_resolutions
664  self.bead_extra_breaks = bead_extra_breaks
665  self.bead_ca_centers = bead_ca_centers
666  self.bead_default_coord = bead_default_coord
667  self.density_residues_per_component = density_residues_per_component
668  self.density_prefix = density_prefix
669  self.density_force_compute = density_force_compute
670  self.density_voxel_size = density_voxel_size
671  self.setup_particles_as_densities = setup_particles_as_densities
672  self.ideal_helix = ideal_helix
673  self.color = color
674 
675 class _FindCloseStructure(object):
676  """Utility to get the nearest observed coordinate"""
677  def __init__(self):
678  self.coords=[]
679  def add_residues(self,residues):
680  for r in residues:
681  idx = IMP.atom.Residue(r).get_index()
682  ca = IMP.atom.Selection(r,atom_type=IMP.atom.AtomType("CA")).get_selected_particles()
683  p = IMP.atom.Selection(r,atom_type=IMP.atom.AtomType("P")).get_selected_particles()
684  if len(ca)==1:
685  xyz = IMP.core.XYZ(ca[0]).get_coordinates()
686  self.coords.append([idx,xyz])
687  elif len(p)==1:
688  xyz = IMP.core.XYZ(p[0]).get_coordinates()
689  self.coords.append([idx,xyz])
690  else:
691  raise("_FindCloseStructure: wrong selection")
692 
693  self.coords.sort(key=itemgetter(0))
694  def find_nearest_coord(self,query):
695  if self.coords==[]:
696  return None
697  keys = [r[0] for r in self.coords]
698  pos = bisect_left(keys,query)
699  if pos == 0:
700  ret = self.coords[0]
701  elif pos == len(self.coords):
702  ret = self.coords[-1]
703  else:
704  before = self.coords[pos - 1]
705  after = self.coords[pos]
706  if after[0] - query < query - before[0]:
707  ret = after
708  else:
709  ret = before
710  return ret[1]
711 
712 class Sequences(object):
713  """A dictionary-like wrapper for reading and storing sequence data"""
714  def __init__(self,fasta_fn,name_map=None):
715  """read a fasta file and extract all the requested sequences
716  @param fasta_fn sequence file
717  @param name_map dictionary mapping the fasta name to final stored name
718  """
719  self.sequences = IMP.pmi.tools.OrderedDict()
720  self.read_sequences(fasta_fn,name_map)
721  def __len__(self):
722  return len(self.sequences)
723  def __contains__(self,x):
724  return x in self.sequences
725  def __getitem__(self,key):
726  if type(key) is int:
727  try:
728  allseqs = list(self.sequences.keys())
729  return self.sequences[allseqs[key]]
730  except:
731  raise Exception("You tried to access sequence num",key,"but there's only",len(self.sequences.keys()))
732  else:
733  return self.sequences[key]
734  def __iter__(self):
735  return self.sequences.__iter__()
736  def __repr__(self):
737  ret=''
738  for s in self.sequences:
739  ret += '%s\t%s\n'%(s,self.sequences[s])
740  return ret
741  def read_sequences(self,fasta_fn,name_map=None):
742  code = None
743  seq = None
744  with open(fasta_fn,'r') as fh:
745  for (num, line) in enumerate(fh):
746  if line.startswith('>'):
747  if seq is not None:
748  self.sequences[code] = seq.strip('*')
749  code = line.rstrip()[1:]
750  if name_map is not None:
751  try:
752  code = name_map[code]
753  except:
754  pass
755  seq = ''
756  else:
757  line = line.rstrip()
758  if line: # Skip blank lines
759  if seq is None:
760  raise Exception( \
761  "Found FASTA sequence before first header at line %d: %s" % (num + 1, line))
762  seq += line
763  if seq is not None:
764  self.sequences[code] = seq.strip('*')
765 
766 
767 class PDBSequences(object):
768  """Data_structure for reading and storing sequence data from pdb"""
769  def __init__(self,model,pdb_fn,name_map=None):
770  """read a pdb file and returns all sequences for each contiguous fragment
771  @param pdb_fn file
772  @param name_map dictionary mapping the pdb chain id to final stored name
773  """
774  self.m=model
775  # self.sequences data-structure: (two-key dictionary)
776  # it contains all contiguous fragments:
777  # chain_id, tuples indicating first and last residue, sequence
778  # example:
779  # key1, key2, value
780  # A (77, 149) VENPSLDLEQYAASYSGLMR....
781  # A (160, 505) PALDTAWVEATRKKALLKLEKLDTDLKNYKGNSIK.....
782  # B (30, 180) VDLENQYYNSKALKEDDPKAALSSFQKVLELEGEKGEWGF...
783  # B (192, 443) TQLLEIYALEIQMYTAQKNNKKLKALYEQSLHIKSAIPHPL
784  self.sequences = IMP.pmi.tools.OrderedDict()
785  self.read_sequences(pdb_fn,name_map)
786 
787 
788  def read_sequences(self,pdb_fn,name_map):
789  t = IMP.atom.read_pdb(pdb_fn, self.m, IMP.atom.ATOMPDBSelector())
790  cs=IMP.atom.get_by_type(t,IMP.atom.CHAIN_TYPE)
791  for c in cs:
792  id=IMP.atom.Chain(c).get_id()
793  print(id)
794  if name_map:
795  try:
796  id=name_map[id]
797  except:
798  print("Chain ID %s not in name_map, skipping" % id)
799  continue
800  rs=IMP.atom.get_by_type(c,IMP.atom.RESIDUE_TYPE)
801  rids=[]
802  rids_olc_dict={}
803  for r in rs:
804  dr=IMP.atom.Residue(r)
805  rid=dr.get_index()
806 
807  isprotein=dr.get_is_protein()
808  isrna=dr.get_is_rna()
809  isdna=dr.get_is_dna()
810  if isprotein:
811  olc=IMP.atom.get_one_letter_code(dr.get_residue_type())
812  rids.append(rid)
813  rids_olc_dict[rid]=olc
814  elif isdna:
815  if dr.get_residue_type() == IMP.atom.DADE: olc="A"
816  if dr.get_residue_type() == IMP.atom.DURA: olc="U"
817  if dr.get_residue_type() == IMP.atom.DCYT: olc="C"
818  if dr.get_residue_type() == IMP.atom.DGUA: olc="G"
819  if dr.get_residue_type() == IMP.atom.DTHY: olc="T"
820  rids.append(rid)
821  rids_olc_dict[rid]=olc
822  elif isrna:
823  if dr.get_residue_type() == IMP.atom.ADE: olc="A"
824  if dr.get_residue_type() == IMP.atom.URA: olc="U"
825  if dr.get_residue_type() == IMP.atom.CYT: olc="C"
826  if dr.get_residue_type() == IMP.atom.GUA: olc="G"
827  if dr.get_residue_type() == IMP.atom.THY: olc="T"
828  rids.append(rid)
829  rids_olc_dict[rid]=olc
830  group_rids=self.group_indexes(rids)
831  contiguous_sequences=IMP.pmi.tools.OrderedDict()
832  for group in group_rids:
833  sequence_fragment=""
834  for i in range(group[0],group[1]+1):
835  sequence_fragment+=rids_olc_dict[i]
836  contiguous_sequences[group]=sequence_fragment
837  self.sequences[id]=contiguous_sequences
838 
839  #for c in self.sequences:
840  # for group in self.sequences[c]:
841  # print(c,group,self.sequences[c][group])
842 
843  def group_indexes(self,indexes):
844  from itertools import groupby
845  ranges = []
846  for k, g in groupby(enumerate(indexes), lambda x:x[0]-x[1]):
847  group = [x[1] for x in g]
848  ranges.append((group[0], group[-1]))
849  return ranges
850 
851 
852 def fasta_pdb_alignments(fasta_sequences,pdb_sequences,show=False):
853  '''This function computes and prints the alignment between the
854  fasta file and the pdb sequence, computes the offsets for each contiguous
855  fragment in the PDB.
856  @param fasta_sequences IMP.pmi.topology.Sequences object
857  @param pdb_sequences IMP.pmi.topology.PDBSequences object
858  @param show boolean default False, if True prints the alignments.
859  The input objects should be generated using map_name dictionaries such that fasta_id
860  and pdb_chain_id are mapping to the same protein name. It needs Biopython.
861  Returns a dictionary of offsets, organized by peptide range (group):
862  example: offsets={"ProtA":{(1,10):1,(20,30):10}}'''
863  from Bio import pairwise2
864  from Bio.pairwise2 import format_alignment
865  from Bio.SubsMat import MatrixInfo as matlist
866  matrix = matlist.blosum62
867  if type(fasta_sequences) is not IMP.pmi.topology.Sequences:
868  raise Exception("Fasta sequences not type IMP.pmi.topology.Sequences")
869  if type(pdb_sequences) is not IMP.pmi.topology.PDBSequences:
870  raise Exception("pdb sequences not type IMP.pmi.topology.PDBSequences")
871  offsets=IMP.pmi.tools.OrderedDict()
872  for name in fasta_sequences.sequences:
873  print(name)
874  seq_fasta=fasta_sequences.sequences[name]
875  if name not in pdb_sequences.sequences:
876  print("Fasta id %s not in pdb names, aligning against every pdb chain" % name)
877  pdbnames=pdb_sequences.sequences.keys()
878  else:
879  pdbnames=[name]
880  for pdbname in pdbnames:
881  for group in pdb_sequences.sequences[pdbname]:
882  seq_frag_pdb=pdb_sequences.sequences[pdbname][group]
883  if show:
884  print("########################")
885  print(" ")
886  print("protein name",pdbname)
887  print("fasta id", name)
888  print("pdb fragment",group)
889  align=pairwise2.align.localms(seq_fasta, seq_frag_pdb, 2, -1, -.5, -.1)[0]
890  for a in [align]:
891  offset=a[3]+1-group[0]
892  if show:
893  print("alignment sequence start-end",(a[3]+1,a[4]+1))
894  print("offset from pdb to fasta index",offset)
895  print(format_alignment(*a))
896  if name not in offsets:
897  offsets[pdbname]={}
898  if group not in offsets[pdbname]:
899  offsets[pdbname][group]=offset
900  else:
901  if group not in offsets[pdbname]:
902  offsets[pdbname][group]=offset
903  return offsets
904 
905 
906 
907 #------------------------
908 
909 
910 class TempResidue(object):
911  """Temporarily stores residue information, even without structure available."""
912  # Consider implementing __hash__ so you can select.
913  def __init__(self,molecule,code,index,internal_index,is_nucleic=None):
914  """setup a TempResidue
915  @param molecule PMI Molecule to which this residue belongs
916  @param code one-letter residue type code
917  @param index PDB index
918  @param internal_index The number in the sequence
919  """
920  #these attributes should be immutable
921  self.molecule = molecule
922  self.rtype = IMP.pmi.tools.get_residue_type_from_one_letter_code(code,is_nucleic)
923  self.pdb_index = index
924  self.internal_index = internal_index
925  self.copy_index = IMP.atom.Copy(self.molecule.hier).get_copy_index()
926  self.state_index = IMP.atom.State(self.molecule.state.hier).get_state_index()
927  #these are expected to change
928  self._structured = False
929  self.hier = IMP.atom.Residue.setup_particle(IMP.Particle(molecule.mdl),
930  self.rtype,
931  index)
932  def __str__(self):
933  return str(self.state_index)+"_"+self.molecule.get_name()+"_"+str(self.copy_index)+"_"+self.get_code()+str(self.get_index())
934  def __repr__(self):
935  return self.__str__()
936  def __key(self):
937  #this returns the immutable attributes only
938  return (self.state_index, self.molecule, self.copy_index, self.rtype, self.pdb_index, self.internal_index)
939  def __eq__(self,other):
940  return type(other)==type(self) and self.__key() == other.__key()
941  def __hash__(self):
942  return hash(self.__key())
943  def get_index(self):
944  return self.pdb_index
945  def get_internal_index(self):
946  return self.internal_index
947  def get_code(self):
948  return IMP.atom.get_one_letter_code(self.get_residue_type())
949  def get_residue_type(self):
950  return self.rtype
951  def get_hierarchy(self):
952  return self.hier
953  def get_molecule(self):
954  return self.molecule
955  def get_has_structure(self):
956  return self._structured
957  def set_structure(self,res,soft_check=False):
958  if res.get_residue_type()!=self.get_residue_type():
959  if soft_check:
960  print('WARNING: Replacing sequence residue',self.get_index(),self.hier.get_residue_type(),
961  'with PDB type',res.get_residue_type())
962  self.hier.set_residue_type((res.get_residue_type()))
963  self.rtype = res.get_residue_type()
964  else:
965  raise Exception('ERROR: PDB residue index',self.get_index(),'is',
966  IMP.atom.get_one_letter_code(res.get_residue_type()),
967  'and sequence residue is',self.get_code())
968 
969  for a in res.get_children():
970  self.hier.add_child(a)
971  atype = IMP.atom.Atom(a).get_atom_type()
972  a.get_particle().set_name('Atom %s of residue %i'%(atype.__str__().strip('"'),
973  self.hier.get_index()))
974  self._structured = True
975 
976 class TopologyReader(object):
977  """Automatically setup Sytem and Degrees of Freedom with a formatted text file.
978  The file is read in and each part of the topology is stored as a
979  ComponentTopology object for input into IMP::pmi::macros::BuildSystem.
980  The topology file should be in a simple pipe-delimited format:
981  @code{.txt}
982 |molecule_name|color|fasta_fn|fasta_id|pdb_fn|chain|residue_range|pdb_offset|bead_size|em_residues_per_gaussian|rigid_body|super_rigid_body|chain_of_super_rigid_bodies|flags|
983 |Rpb1 |blue |1WCM.fasta|1WCM:A|1WCM.pdb|A|1,1140 |0|10|0|1|1,3|1||
984 |Rpb1 |blue |1WCM.fasta|1WCM:A|1WCM.pdb|A|1141,1274|0|10|0|2|1,3|1||
985 |Rpb1 |blue |1WCM.fasta|1WCM:A|1WCM.pdb|A|1275,END |0|10|0|3|1,3|1||
986 |Rpb2 |red |1WCM.fasta|1WCM:B|1WCM.pdb|B|all |0|10|0|4|2,3|2||
987 |Rpb2.1 |green |1WCM.fasta|1WCM:B|1WCM.pdb|B|all |0|10|0|4|2,3|2||
988 
989  @endcode
990 
991  These are the fields you can enter:
992  - `component_name`: Name of the component (chain). Serves as the parent
993  hierarchy for this structure.
994  - `color`: The color used in the output RMF file. Uses chimera names or R,G,B values
995  - `fasta_fn`: Name of FASTA file containing this component.
996  - `fasta_id`: String found in FASTA sequence header line.
997  - `pdb_fn`: Name of PDB file with coordinates (if available).
998  If left empty, will set up as BEADS (you can also specify "BEADS")
999  Can also write "IDEAL_HELIX".
1000  - `chain`: Chain ID of this domain in the PDB file.
1001  - `residue_range`: Comma delimited pair defining range.
1002  Can leave empty or use 'all' for entire sequence from PDB file.
1003  The second item in the pair can be END to select the last residue in the
1004  PDB chain.
1005  - `pdb_offset`: Offset to sync PDB residue numbering with FASTA numbering.
1006  - `bead_size`: The size (in residues) of beads used to model areas not
1007  covered by PDB coordinates. These will be automatically built.
1008  - `em_residues`: The number of Gaussians used to model the electron
1009  density of this domain. Set to zero if no EM fitting will be done.
1010  The GMM files will be written to <gmm_dir>/<component_name>_<em_res>.txt (and .mrc)
1011  - `rigid_body`: Leave empty if this object is not in a rigid body.
1012  Otherwise, this is a number corresponding to the rigid body containing
1013  this object. The number itself is just used for grouping things.
1014  - `super_rigid_body`: Like a rigid_body, except things are only occasionally rigid
1015  - `chain_of_super_rigid_bodies` For a polymer, create SRBs from groups.
1016  - `flags` additional flags for advanced options
1017  \note All filenames are relative to the paths specified in the constructor.
1018 
1019  """
1020  def __init__(self,
1021  topology_file,
1022  pdb_dir='./',
1023  fasta_dir='./',
1024  gmm_dir='./'):
1025  """Constructor.
1026  @param topology_file Pipe-delimited file specifying the topology
1027  @param pdb_dir Relative path to the pdb directory
1028  @param fasta_dir Relative path to the fasta directory
1029  @param gmm_dir Relative path to the GMM directory
1030  """
1031  self.topology_file = topology_file
1032  self.molecules = {} # key=molname, value=TempMolecule
1033  self.pdb_dir = pdb_dir
1034  self.fasta_dir = fasta_dir
1035  self.gmm_dir = gmm_dir
1036  self._components = self.read(topology_file)
1037 
1038  # Preserve old self.component_list for backwards compatibility
1039  @IMP.deprecated_method("2.7",
1040  "Use 'get_components()' instead of 'component_list'.")
1041  def __get_component_list(self): return self._components
1042  component_list = property(__get_component_list)
1043 
1044  def write_topology_file(self,outfile):
1045  with open(outfile, "w") as f:
1046  f.write("|molecule_name|color|fasta_fn|fasta_id|pdb_fn|chain|"
1047  "residue_range|pdb_offset|bead_size|em_residues_per_gaussian|"
1048  "rigid_body|super_rigid_body|chain_of_super_rigid_bodies|\n")
1049  for c in self._components:
1050  output = c.get_str()+'\n'
1051  f.write(output)
1052  return outfile
1053 
1054  def get_components(self, topology_list = "all"):
1055  """ Return list of ComponentTopologies for selected components
1056  @param topology_list List of indices to return"""
1057  if topology_list == "all":
1058  topologies = self._components
1059  else:
1060  topologies=[]
1061  for i in topology_list:
1062  topologies.append(self._components[i])
1063  return topologies
1064 
1065  def get_molecules(self):
1066  return self.molecules
1067 
1068  def read(self, topology_file, append=False):
1069  """Read system components from topology file. append=False will erase
1070  current topology and overwrite with new
1071  """
1072  is_topology = False
1073  is_directories = False
1074  linenum = 1
1075  if append==False:
1076  self._components=[]
1077 
1078  with open(topology_file) as infile:
1079  for line in infile:
1080  if line.lstrip()=="" or line[0]=="#":
1081  continue
1082  elif line.split('|')[1].strip() in ("molecule_name"):
1083  is_topology=True
1084  is_directories = False
1085  old_format = False
1086  continue
1087  elif line.split('|')[1] == "component_name":
1088  is_topology = True
1090  "Old-style topology format (using "
1091  "|component_name|) is deprecated. Please switch to "
1092  "the new-style format (using |molecule_name|)\n")
1093  old_format = True
1094  is_directories = False
1095  continue
1096  elif line.split('|')[1] == "directories":
1098  "Setting directories in the topology file "
1099  "is deprecated. Please do so through the "
1100  "TopologyReader constructor. Note that new-style "
1101  "paths are relative to the current working "
1102  "directory, not the topology file.\n")
1103  is_directories = True
1104  elif is_directories:
1105  fields = line.split('|')
1106  setattr(self, fields[1],
1107  IMP.get_relative_path(topology_file, fields[2]))
1108  if is_topology:
1109  new_component = self._parse_line(line, linenum, old_format)
1110  self._components.append(new_component)
1111  linenum += 1
1112  return self._components
1113 
1114  def _parse_line(self, component_line, linenum, old_format):
1115  """Parse a line of topology values and matches them to their key.
1116  Checks each value for correct syntax
1117  Returns a list of Component objects
1118  fields:
1119  """
1120  c = _Component()
1121  values = [s.strip() for s in component_line.split('|')]
1122  errors = []
1123 
1124  ### Required fields
1125  if old_format:
1126  c.molname = values[1]
1127  c.copyname = ''
1128  c._domain_name = values[2]
1129  c.color = 'blue'
1130  else:
1131  names = values[1].split('.')
1132  if len(names)==1:
1133  c.molname = names[0]
1134  c.copyname = ''
1135  elif len(names)==2:
1136  c.molname = names[0]
1137  c.copyname = names[1]
1138  else:
1139  c.molname = names[0]
1140  c.copyname = names[1]
1141  errors.append("Molecule name should be <molecule.copyID>")
1142  errors.append("For component %s line %d " % (c.molname,linenum))
1143  c._domain_name = c.molname + '.' + c.copyname
1144  colorfields = values[2].split(',')
1145  if len(colorfields)==3:
1146  c.color = [float(x) for x in colorfields]
1147  else:
1148  c.color = values[2]
1149  c._orig_fasta_file = values[3]
1150  c.fasta_file = values[3]
1151  fasta_field = values[4].split(",")
1152  c.fasta_id = fasta_field[0]
1153  c.fasta_flag = None
1154  if len(fasta_field) > 1:
1155  c.fasta_flag = fasta_field[1]
1156  c._orig_pdb_input = values[5]
1157  pdb_input = values[5]
1158  tmp_chain = values[6]
1159  rr = values[7]
1160  offset = values[8]
1161  bead_size = values[9]
1162  emg = values[10]
1163  if old_format:
1164  rbs = srbs = csrbs = ''
1165  else:
1166  rbs = values[11]
1167  srbs = values[12]
1168  csrbs = values[13]
1169 
1170  if c.molname not in self.molecules:
1171  self.molecules[c.molname] = _TempMolecule(c)
1172  else:
1173  # COPY OR DOMAIN
1174  c._orig_fasta_file = self.molecules[c.molname].orig_component.fasta_file
1175  c.fasta_id = self.molecules[c.molname].orig_component.fasta_id
1176  self.molecules[c.molname].add_component(c,c.copyname)
1177 
1178  # now cleanup input
1179  c.fasta_file = os.path.join(self.fasta_dir,c._orig_fasta_file)
1180  if pdb_input=="":
1181  errors.append("PDB must have BEADS, IDEAL_HELIX, or filename")
1182  errors.append("For component %s line %d is not correct"
1183  "|%s| was given." % (c.molname,linenum,pdb_input))
1184  elif pdb_input in ("IDEAL_HELIX","BEADS"):
1185  c.pdb_file = pdb_input
1186  else:
1187  c.pdb_file = os.path.join(self.pdb_dir,pdb_input)
1188 
1189  # PDB chain must be one or two characters
1190  if len(tmp_chain)==1 or len(tmp_chain)==2:
1191  c.chain = tmp_chain
1192  else:
1193  errors.append("PDB Chain identifier must be one or two characters.")
1194  errors.append("For component %s line %d is not correct"
1195  "|%s| was given." % (c.molname,linenum,tmp_chain))
1196 
1197  ### Optional fields
1198  # Residue Range
1199  if rr.strip()=='all' or str(rr)=="":
1200  c.residue_range = None
1201  elif len(rr.split(','))==2 and self._is_int(rr.split(',')[0]) and (self._is_int(rr.split(',')[1]) or rr.split(',')[1] == 'END'):
1202  # Make sure that is residue range is given, there are only two values and they are integers
1203  c.residue_range = (int(rr.split(',')[0]), rr.split(',')[1])
1204  if c.residue_range[1] != 'END':
1205  c.residue_range = (c.residue_range[0], int(c.residue_range[1]))
1206  # Old format used -1 for the last residue
1207  if old_format and c.residue_range[1] == -1:
1208  c.residue_range = (c.residue_range[0], 'END')
1209  else:
1210  errors.append("Residue Range format for component %s line %d is not correct" % (c.molname, linenum))
1211  errors.append("Correct syntax is two comma separated integers: |start_res, end_res|. end_res can also be END to select the last residue in the chain. |%s| was given." % rr)
1212  errors.append("To select all residues, indicate |\"all\"|")
1213 
1214  # PDB Offset
1215  if self._is_int(offset):
1216  c.pdb_offset=int(offset)
1217  elif len(offset)==0:
1218  c.pdb_offset = 0
1219  else:
1220  errors.append("PDB Offset format for component %s line %d is not correct" % (c.molname, linenum))
1221  errors.append("The value must be a single integer. |%s| was given." % offset)
1222 
1223  # Bead Size
1224  if self._is_int(bead_size):
1225  c.bead_size=int(bead_size)
1226  elif len(bead_size)==0:
1227  c.bead_size = 0
1228  else:
1229  errors.append("Bead Size format for component %s line %d is not correct" % (c.molname, linenum))
1230  errors.append("The value must be a single integer. |%s| was given." % bead_size)
1231 
1232  # EM Residues Per Gaussian
1233  if self._is_int(emg):
1234  if int(emg) > 0:
1235  c.density_prefix = os.path.join(self.gmm_dir,c.get_unique_name())
1236  c.gmm_file = c.density_prefix+'.txt'
1237  c.mrc_file = c.density_prefix+'.gmm'
1238 
1239  c.em_residues_per_gaussian=int(emg)
1240  else:
1241  c.em_residues_per_gaussian = 0
1242  elif len(emg)==0:
1243  c.em_residues_per_gaussian = 0
1244  else:
1245  errors.append("em_residues_per_gaussian format for component "
1246  "%s line %d is not correct" % (c.molname, linenum))
1247  errors.append("The value must be a single integer. |%s| was given." % emg)
1248 
1249  # rigid bodies
1250  if len(rbs)>0:
1251  if not self._is_int(rbs):
1252  errors.append("rigid bodies format for component "
1253  "%s line %d is not correct" % (c.molname, linenum))
1254  errors.append("Each RB must be a single integer, or empty. "
1255  "|%s| was given." % rbs)
1256  c.rigid_body = int(rbs)
1257 
1258  # super rigid bodies
1259  if len(srbs)>0:
1260  srbs = srbs.split(',')
1261  for i in srbs:
1262  if not self._is_int(i):
1263  errors.append("super rigid bodies format for component "
1264  "%s line %d is not correct" % (c.molname, linenum))
1265  errors.append("Each SRB must be a single integer. |%s| was given." % srbs)
1266  c.super_rigid_bodies = srbs
1267 
1268  # chain of super rigid bodies
1269  if len(csrbs)>0:
1270  if not self._is_int(csrbs):
1271  errors.append("em_residues_per_gaussian format for component "
1272  "%s line %d is not correct" % (c.molname, linenum))
1273  errors.append("Each CSRB must be a single integer. |%s| was given." % csrbs)
1274  c.chain_of_super_rigid_bodies = csrbs
1275 
1276  # done
1277  if errors:
1278  raise ValueError("Fix Topology File syntax errors and rerun: " \
1279  + "\n".join(errors))
1280  else:
1281  return c
1282 
1283 
1284  def set_gmm_dir(self,gmm_dir):
1285  """Change the GMM dir"""
1286  self.gmm_dir = gmm_dir
1287  for c in self._components:
1288  c.gmm_file = os.path.join(self.gmm_dir,c.get_unique_name()+".txt")
1289  c.mrc_file = os.path.join(self.gmm_dir,c.get_unique_name()+".mrc")
1290  print('new gmm',c.gmm_file)
1291 
1292  def set_pdb_dir(self,pdb_dir):
1293  """Change the PDB dir"""
1294  self.pdb_dir = pdb_dir
1295  for c in self._components:
1296  if not c._orig_pdb_input in ("","None","IDEAL_HELIX","BEADS"):
1297  c.pdb_file = os.path.join(self.pdb_dir,c._orig_pdb_input)
1298 
1299  def set_fasta_dir(self,fasta_dir):
1300  """Change the FASTA dir"""
1301  self.fasta_dir = fasta_dir
1302  for c in self._components:
1303  c.fasta_file = os.path.join(self.fasta_dir,c._orig_fasta_file)
1304 
1305  def _is_int(self, s):
1306  # is this string an integer?
1307  try:
1308  float(s)
1309  return float(s).is_integer()
1310  except ValueError:
1311  return False
1312 
1313  def get_rigid_bodies(self):
1314  """Return list of lists of rigid bodies (as domain name)"""
1315  rbl = defaultdict(list)
1316  for c in self._components:
1317  if c.rigid_body:
1318  rbl[c.rigid_body].append(c.get_unique_name())
1319  return rbl.values()
1320 
1322  """Return list of lists of super rigid bodies (as domain name)"""
1323  rbl = defaultdict(list)
1324  for c in self._components:
1325  for rbnum in c.super_rigid_bodies:
1326  rbl[rbnum].append(c.get_unique_name())
1327  return rbl.values()
1328 
1330  """Return list of lists of chains of super rigid bodies (as domain name)"""
1331  rbl = defaultdict(list)
1332  for c in self._components:
1333  for rbnum in c.chain_of_super_rigid_bodies:
1334  rbl[rbnum].append(c.get_unique_name())
1335  return rbl.values()
1336 
1337 class _TempMolecule(object):
1338  """Store the Components and any requests for copies"""
1339  def __init__(self,init_c):
1340  self.molname = init_c.molname
1341  # key=copy ID, value = list of domains
1342  self.domains = IMP.pmi.tools.OrderedDefaultDict(list)
1343  self.add_component(init_c,init_c.copyname)
1344  self.orig_copyname = init_c.copyname
1345  self.orig_component = self.domains[init_c.copyname][0]
1346  def add_component(self,component,copy_id):
1347  self.domains[copy_id].append(component)
1348  component.domainnum = len(self.domains[copy_id])-1
1349  def __repr__(self):
1350  return ','.join('%s:%i'%(k,len(self.domains[k])) for k in self.domains)
1351 
1352 class _Component(object):
1353  """Stores the components required to build a standard IMP hierarchy
1354  using IMP.pmi.BuildModel()
1355  """
1356  def __init__(self):
1357  self.molname = None
1358  self.copyname = None
1359  self.domainnum = 0
1360  self.fasta_file = None
1361  self._orig_fasta_file = None
1362  self.fasta_id = None
1363  self.fasta_flag = None
1364  self.pdb_file = None
1365  self._orig_pdb_input = None
1366  self.chain = None
1367  self.residue_range = None
1368  self.pdb_offset = 0
1369  self.bead_size = 10
1370  self.em_residues_per_gaussian = 0
1371  self.gmm_file = ''
1372  self.mrc_file = ''
1373  self.density_prefix = ''
1374  self.color = 0.1
1375  self.rigid_body = None
1376  self.super_rigid_bodies = []
1377  self.chain_of_super_rigid_bodies = []
1378 
1379  def _l2s(self,l):
1380  return ",".join("%s" % x for x in l)
1381 
1382  def __repr__(self):
1383  return self.get_str()
1384 
1385  def get_unique_name(self):
1386  return "%s.%s.%i"%(self.molname,self.copyname,self.domainnum)
1387 
1388  def get_str(self):
1389  res_range = self.residue_range
1390  if self.residue_range is None:
1391  res_range = []
1392  name = self.molname
1393  if self.copyname!='':
1394  name += '.'+self.copyname
1395  if self.chain is None:
1396  chain = ' '
1397  else:
1398  chain = self.chain
1399  color=self.color
1400  if isinstance(color, list):
1401  color=','.join([str(x) for x in color])
1402  a= '|'+'|'.join([name,color,self._orig_fasta_file,self.fasta_id,
1403  self._orig_pdb_input,chain,self._l2s(list(res_range)),
1404  str(self.pdb_offset),str(self.bead_size),
1405  str(self.em_residues_per_gaussian),
1406  str(self.rigid_body) if self.rigid_body else '',
1407  self._l2s(self.super_rigid_bodies),
1408  self._l2s(self.chain_of_super_rigid_bodies)])+'|'
1409  return a
1410 
1411  # Preserve old self.name for backwards compatibility
1412  @IMP.deprecated_method("2.7", "Use 'molname' instead of 'name'.")
1413  def __get_name(self): return self.molname
1414  name = property(__get_name)
1415 
1416  # Preserve old self.domain_name for backwards compatibility
1417  @IMP.deprecated_method("2.7",
1418  "Use 'get_unique_name()' instead of 'domain_name'.")
1419  def __get_domain_name(self): return self._domain_name
1420  domain_name = property(__get_domain_name)
def __init__
setup a TempResidue
def build
call build on all molecules (automatically makes clones)
Add mass to a particle.
Definition: Mass.h:23
def select_at_all_resolutions
Perform selection using the usual keywords but return ALL resolutions (BEADS and GAUSSIANS).
Definition: tools.py:1794
def get_atomic_residues
Return a set of TempResidues that have associated structure coordinates.
A decorator to associate a particle with a part of a protein/DNA/RNA.
Definition: Fragment.h:20
def get_residues
Return all modeled TempResidues as a set.
std::string get_unique_name(std::string templ)
Return a unique name produced from the string.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: atom/Atom.h:241
static Atom setup_particle(Model *m, ParticleIndex pi, Atom other)
Definition: atom/Atom.h:242
def build
call build on all states
def __init__
read a fasta file and extract all the requested sequences
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
def __init__
read a pdb file and returns all sequences for each contiguous fragment
def fasta_pdb_alignments
This function computes and prints the alignment between the fasta file and the pdb sequence...
def get_ideal_helices
Returns list of OrderedSets with requested ideal helices.
Miscellaneous utilities.
Definition: tools.py:1
def get_chains_of_super_rigid_bodies
Return list of lists of chains of super rigid bodies (as domain name)
def __init__
The user should not call this directly; instead call State::create_molecule()
void handle_use_deprecated(std::string message)
def residue_range
get residue range from a to b, inclusive.
def get_molecule
Access a molecule by name and copy number.
def __init__
Define a new state.
def add_representation
Set the representation for some residues.
static State setup_particle(Model *m, ParticleIndex pi, unsigned int index)
Definition: State.h:39
The type of an atom.
def create_molecule
Create a new Molecule within this State.
def build
Create all parts of the IMP hierarchy including Atoms, Residues, and Fragments/Representations and...
static Residue setup_particle(Model *m, ParticleIndex pi, ResidueType t, int index, int insertion_code)
Definition: Residue.h:157
char get_one_letter_code(ResidueType c)
Get the 1-letter amino acid code from the residue type.
def get_non_atomic_residues
Return a set of TempResidues that don't have associated structure coordinates.
void read_pdb(TextInput input, int model, Hierarchy h)
def get_name
Return this Molecule name.
def get_hierarchy
Return the IMP Hierarchy corresponding to this Molecule.
def get_components
Return list of ComponentTopologies for selected components.
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:72
Stores a named protein chain.
A decorator for keeping track of copies of a molecule.
Definition: Copy.h:28
Select all non-alternative ATOM records.
Definition: pdb.h:64
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def set_fasta_dir
Change the FASTA dir.
The standard decorator for manipulating molecular structures.
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
Data_structure for reading and storing sequence data from pdb.
def deprecated_method
Python decorator to mark a method as deprecated.
Definition: __init__.py:9635
A decorator for a particle representing an atom.
Definition: atom/Atom.h:234
std::string get_relative_path(std::string base, std::string relative)
Return a path to a file relative to another file.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
def create_clone
Create a Molecule clone (automatically builds same structure and representation)
def add_structure
Read a structure and store the coordinates.
int get_state_index(Hierarchy h)
Walk up the hierarchy to find the current state.
def get_molecules
Return a dictionary where key is molecule name and value are the list of all copies of that molecule ...
static Copy setup_particle(Model *m, ParticleIndex pi, Int number)
Definition: Copy.h:42
def read
Read system components from topology file.
def get_state
Return the State containing this Molecule.
A decorator for a residue.
Definition: Residue.h:134
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Automatically setup Sytem and Degrees of Freedom with a formatted text file.
The general base class for IMP exceptions.
Definition: exception.h:49
def get_rigid_bodies
Return list of lists of rigid bodies (as domain name)
Associate an integer "state" index with a hierarchy node.
Definition: State.h:27
VectorD< 3 > Vector3D
Definition: VectorD.h:395
Residue get_residue(Atom d, bool nothrow=false)
Return the Residue containing this atom.
Class to handle individual particles of a Model object.
Definition: Particle.h:41
Stores a list of Molecules all with the same State index.
def get_represented
Return set of TempResidues that have representation.
Store info for a chain of a protein.
Definition: Chain.h:21
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
Python classes to represent, score, sample and analyze models.
A dictionary-like wrapper for reading and storing sequence data.
def create_copy
Create a new Molecule with the same name and sequence but a higher copy number.
Functionality for loading, creating, manipulating and scoring atomic structures.
def get_particles_at_all_resolutions
Helpful utility for getting particles at all resolutions from this molecule.
static Chain setup_particle(Model *m, ParticleIndex pi, std::string id)
Definition: Chain.h:41
Select hierarchy particles identified by the biological name.
Definition: Selection.h:66
def get_number_of_states
returns the total number of states generated
def get_super_rigid_bodies
Return list of lists of super rigid bodies (as domain name)
Temporarily stores residue information, even without structure available.
def create_state
returns a new IMP.pmi.representation_new.State(), increment the state index
Store objects in order they were added, but with default type.
Definition: tools.py:1561