IMP logo
IMP Reference Guide  2.5.0
The Integrative Modeling Platform
pmi/topology/__init__.py
1 """@namespace IMP.pmi.topology
2  Set up of system representation from topology files.
3 
4  * Class for storing topology elements of PMI components
5  * Functions for reading these elementsfrom a formatted PMI topology file
6  * Functions for converting an existing IMP hierarchy into PMI topology
7  * TopologyWriter for writing PMI topology files
8 """
9 
10 from __future__ import print_function
11 import IMP
12 import IMP.atom
13 import IMP.algebra
14 import IMP.pmi
15 import csv
16 import os
17 from collections import defaultdict
18 from . import system_tools
19 from Bio import SeqIO
20 
21 def get_residue_type_from_one_letter_code(code):
22  threetoone = {'ALA': 'A', 'ARG': 'R', 'ASN': 'N', 'ASP': 'D',
23  'CYS': 'C', 'GLU': 'E', 'GLN': 'Q', 'GLY': 'G',
24  'HIS': 'H', 'ILE': 'I', 'LEU': 'L', 'LYS': 'K',
25  'MET': 'M', 'PHE': 'F', 'PRO': 'P', 'SER': 'S',
26  'THR': 'T', 'TRP': 'W', 'TYR': 'Y', 'VAL': 'V', 'UNK': 'X'}
27  one_to_three={}
28  for k in threetoone:
29  one_to_three[threetoone[k]] = k
30  return IMP.atom.ResidueType(one_to_three[code])
31 
33  target_ps,
34  sel_zone,
35  entire_residues,
36  exclude_backbone):
37  """Utility to retrieve particles from a hierarchy within a
38  zone around a set of ps.
39  @param hier The hierarchy in which to look for neighbors
40  @param target_ps The particles for zoning
41  @param sel_zone The maximum distance
42  @param entire_residues If True, will grab entire residues
43  @param exclude_backbone If True, will only return sidechain particles
44  """
45 
46  test_sel = IMP.atom.Selection(hier)
47  backbone_types=['C','N','CB','O']
48  if exclude_backbone:
49  test_sel -= IMP.atom.Selection(hier,atom_types=[IMP.atom.AtomType(n)
50  for n in backbone_types])
51  test_ps = test_sel.get_selected_particles()
52  nn = IMP.algebra.NearestNeighbor3D([IMP.core.XYZ(p).get_coordinates()
53  for p in test_ps])
54  zone = set()
55  for target in target_ps:
56  zone|=set(nn.get_in_ball(IMP.core.XYZ(target).get_coordinates(),sel_zone))
57  zone_ps = [test_ps[z] for z in zone]
58  if entire_residues:
59  final_ps = set()
60  for z in zone_ps:
61  final_ps|=set(IMP.atom.Hierarchy(z).get_parent().get_children())
62  zone_ps = [h.get_particle() for h in final_ps]
63  return zone_ps
64 
65 class StructureError(Exception):
66  pass
67 
68 #------------------------
69 
70 class SystemBase(object):
71  """The base class for System, State and Molecule
72  classes. It contains shared functions in common to these classes
73  """
74 
75  def __init__(self,mdl=None):
76  if mdl is None:
77  self.mdl=IMP.Model()
78  else:
79  self.mdl=mdl
80 
81  def _create_hierarchy(self):
82  """create a new hierarchy"""
83  tmp_part=IMP.Particle(self.mdl)
84  return IMP.atom.Hierarchy.setup_particle(tmp_part)
85 
86  def _create_child(self,parent_hierarchy):
87  """create a new hierarchy, set it as child of the input
88  one, and return it"""
89  child_hierarchy=self._create_hierarchy()
90  parent_hierarchy.add_child(child_hierarchy)
91  return child_hierarchy
92 
93  def build(self):
94  """Build the coordinates of the system.
95  Loop through stored(?) hierarchies and set up coordinates!"""
96  pass
97 
98 #------------------------
99 
100 class System(SystemBase):
101  """This class initializes the root node of the global IMP.atom.Hierarchy."""
102  def __init__(self,mdl=None,name="System"):
103  SystemBase.__init__(self,mdl)
104  self._number_of_states = 0
105  self.states = []
106  self.built=False
107 
108  # the root hierarchy node
109  self.hier=self._create_hierarchy()
110  self.hier.set_name(name)
111 
112  def create_state(self):
113  """returns a new IMP.pmi.representation_new.State(), increment the state index"""
114  self._number_of_states+=1
115  state = State(self,self._number_of_states-1)
116  self.states.append(state)
117  return state
118 
119  def __repr__(self):
120  return self.hier.get_name()
121 
123  """returns the total number of states generated"""
124  return self._number_of_states
125 
126  def get_hierarchy(self):
127  return self.hier
128 
129  def build(self,**kwargs):
130  """call build on all states"""
131  if not self.built:
132  for state in self.states:
133  state.build(**kwargs)
134  self.built=True
135  return self.hier
136 
137 #------------------------
138 
140  """This private class is constructed from within the System class.
141  It wraps an IMP.atom.State
142  """
143  def __init__(self,system,state_index):
144  """Define a new state
145  @param system the PMI System
146  @param state_index the index of the new state
147  """
148  self.mdl = system.get_hierarchy().get_model()
149  self.system = system
150  self.hier = self._create_child(system.get_hierarchy())
151  self.hier.set_name("State_"+str(state_index))
152  self.molecules = defaultdict(list) # key is molecule name. value are the molecule copies!
153  IMP.atom.State.setup_particle(self.hier,state_index)
154  self.built=False
155 
156  def __repr__(self):
157  return self.system.__repr__()+'.'+self.hier.get_name()
158 
159  def create_molecule(self,name,sequence=None,chain_id=''):
160  """Create a new Molecule within this State
161  @param name the name of the molecule (string) it must not
162  be already used
163  @param sequence sequence (string)
164  @param chain_id Chain id to assign to this molecule
165  """
166  # check whether the molecule name is already assigned
167  if name in self.molecules:
168  raise WrongMoleculeName('Cannot use a molecule name already used')
169 
170  mol = Molecule(self,name,sequence,chain_id,copy_num=0)
171  self.molecules[name].append(mol)
172  return mol
173 
174  def get_hierarchy(self):
175  return self.hier
176 
177  def get_number_of_copies(self,molname):
178  return len(self.molecules[molname])
179 
180  def _register_copy(self,molecule):
181  molname = molecule.get_hierarchy().get_name()
182  if molname not in self.molecules:
183  raise StructureError("Trying to add a copy when the original doesn't exist!")
184  self.molecules[molname].append(molecule)
185 
186  def build(self,**kwargs):
187  """call build on all molecules (automatically makes clones)"""
188  if not self.built:
189  for molname in self.molecules:
190  for mol in self.molecules[molname]:
191  mol.build(**kwargs)
192  self.built=True
193  return self.hier
194 
195 #------------------------
196 
198  """This class is constructed from within the State class.
199  It wraps an IMP.atom.Molecule and IMP.atom.Copy
200  Structure is read using this class
201  Resolutions and copies can be registered, but are only created when build() is called
202  """
203 
204  def __init__(self,state,name,sequence,chain_id,copy_num,mol_to_clone=None,transformation=None):
205  """The user should not call this direclty, instead call State::create_molecule()
206  @param state The parent PMI State
207  @param name The name of the molecule (string)
208  @param sequence Sequence (string)
209  @param mol_to_clone The original molecule (for cloning ONLY)
210  @param transformation A transform to apply during building (primarily for cloning)
211  """
212  # internal data storage
213  self.mdl = state.get_hierarchy().get_model()
214  self.state = state
215  self.sequence = sequence
216  self.built = False
217  self.mol_to_clone = mol_to_clone
218  self.transformation = transformation
219 
220  # create root node and set it as child to passed parent hierarchy
221  self.hier = self._create_child(self.state.get_hierarchy())
222  self.hier.set_name(name)
223  IMP.atom.Copy.setup_particle(self.hier,copy_num)
224  IMP.atom.Chain.setup_particle(self.hier,chain_id)
225 
226  # create Residues from the sequence
227  self.residues=[]
228  for ns,s in enumerate(sequence):
229  r=_Residue(self,s,ns+1)
230  self.residues.append(r)
231 
232  def __repr__(self):
233  return self.state.__repr__()+'.'+self.get_name()+'.'+ \
234  str(IMP.atom.Copy(self.hier).get_copy_index())
235 
236 
237  def __getitem__(self,val):
238  if isinstance(val,int):
239  return self.residues[val]
240  elif isinstance(val,str):
241  return self.residues[int(val)-1]
242  elif isinstance(val,slice):
243  return set(self.residues[val])
244  else:
245  print("ERROR: range ends must be int or str. Stride must be int.")
246 
247  def get_hierarchy(self):
248  return self.hier
249 
250  def get_name(self):
251  return self.hier.get_name()
252 
253  def residue_range(self,a,b,stride=1):
254  """get residue range. Use integers to get 0-indexing, or strings to get PDB-indexing"""
255  if isinstance(a,int) and isinstance(b,int) and isinstance(stride,int):
256  return set(self.residues[a:b:stride])
257  elif isinstance(a,str) and isinstance(b,str) and isinstance(stride,int):
258  return set(self.residues[int(a)-1:int(b)-1:stride])
259  else:
260  print("ERROR: range ends must be int or str. Stride must be int.")
261 
262  def get_residues(self):
263  """ Return all Residues as a set"""
264  all_res=set()
265  for res in self.residues:
266  all_res.add(res)
267  return all_res
268 
270  """ Return a set of Residues that have associated structure coordinates """
271  atomic_res=set()
272  for res in self.residues:
273  if res.get_has_coordinates():
274  atomic_res.add(res)
275  return atomic_res
276 
278  """ Return a set of Residues that don't have associated structure coordinates """
279  non_atomic_res=set()
280  for res in self.residues:
281  if not res.get_has_coordinates():
282  non_atomic_res.add(res)
283  return non_atomic_res
284 
285  def create_copy(self,chain_id):
286  """Create a new Molecule with the same name and sequence but a higher copy number.
287  Returns the Molecule. No structure or representation will be copied!
288  @param chain_id Chain ID of the new molecule
289  """
290  mol = Molecule(self.state,self.get_name(),self.sequence,chain_id,
291  copy_num=self.state.get_number_of_copies(self.get_name()))
292  self.state._register_copy(mol)
293  return mol
294 
295  def create_clone(self,chain_id,transformation=None):
296  """Create a Molecule clone (automatically builds same structure and representation)
297  @param chain_id If you want to set the chain ID of the copy to something
298  @param transformation Apply transformation after building (at the end)
299  """
300  mol = Molecule(self.state,self.get_name(),self.sequence,chain_id,
301  copy_num=self.state.get_number_of_copies(self.get_name()),
302  mol_to_clone=self,transformation=transformation)
303  self.state._register_copy(mol)
304 
305  def add_structure(self,pdb_fn,chain_id,res_range=[],offset=0,model_num=None,ca_only=False,soft_check=False):
306  """Read a structure and store the coordinates.
307  Returns the atomic residues (as a set)
308  @param pdb_fn The file to read
309  @param chain_id Chain ID to read
310  @param res_range Add only a specific set of residues
311  @param offset Apply an offset to the residue indexes of the PDB file
312  @param model_num Read multi-model PDB and return that model
313  @param soft_check If True, it only warns if there are sequence mismatches between the pdb and the Molecules sequence
314  If False (Default), it raises and exit when there are sequence mismatches.
315  \note After offset, we expect the PDB residue numbering to match the FASTA file
316  """
317  # get IMP.atom.Residues from the pdb file
318  rhs=system_tools.get_structure(self.mdl,pdb_fn,chain_id,res_range,offset,ca_only=ca_only)
319  if len(rhs)>len(self.residues):
320  print('ERROR: You are loading',len(rhs), \
321  'pdb residues for a sequence of length',len(self.residues),'(too many)')
322 
323  # load those into the existing pmi Residue objects, and return contiguous regions
324  atomic_res=set() # collect integer indexes of atomic residues!
325  for nrh,rh in enumerate(rhs):
326  idx=rh.get_index()
327  internal_res=self.residues[idx-1]
328  if internal_res.get_code()!=IMP.atom.get_one_letter_code(rh.get_residue_type()):
329  if not soft_check:
330  raise StructureError('ERROR: PDB residue index',idx,'is',
331  IMP.atom.get_one_letter_code(rh.get_residue_type()),
332  'and sequence residue is',internal_res.get_code())
333  else:
334  print('WARNING: PDB residue index',idx,'is',
335  IMP.atom.get_one_letter_code(rh.get_residue_type()),
336  'and sequence residue is',internal_res.get_code())
337  internal_res.set_structure(rh,soft_check)
338  atomic_res.add(internal_res)
339  return atomic_res
340 
341  def add_representation(self,res_set=None,representation_type="balls",resolutions=[]):
342  """handles the IMP.atom.Representation decorators, such as multi-scale,
343  density, etc.
344  @param res_set set of PMI residues for adding the representation
345  @param representation_type currently supports only balls
346  @param resolutions what resolutions to add to the
347  residues (see @ref pmi_resolution)
348  """
349  allowed_types=["balls"]
350  if representation_type not in allowed_types:
351  print("ERROR: Allowed representation types:",allowed_types)
352  return
353  if res_set is None:
354  res_set=set(self.residues)
355  for res in res_set:
356  res.add_representation(representation_type,resolutions)
357 
358  def build(self,merge_type="backbone",ca_centers=True,fill_in_missing_residues=True):
359  """Create all parts of the IMP hierarchy
360  including Atoms, Residues, and Fragments/Representations and, finally, Copies
361  /note Any residues assigned a resolution must have an IMP.atom.Residue hierarchy
362  containing at least a CAlpha. For missing residues, these can be constructed
363  from the PDB file
364  @param merge_type Principle for grouping into fragments.
365  "backbone": linear sequences along backbone are grouped
366  into fragments if they have identical sets of representations.
367  "volume": at each resolution, groups are made based on
368  spatial distance (not currently implemented)
369  @param ca_centers For single-bead-per-residue only. Set the center over the CA position.
370  """
371  allowed_types=("backbone")
372  if merge_type not in allowed_types:
373  print("ERROR: Allowed merge types:",allowed_types)
374  return
375  if not self.built:
376 
377  # if this is a clone, first copy all representations and structure
378  if self.mol_to_clone is not None:
379  for nr,r in enumerate(self.mol_to_clone.residues):
380  self.residues[nr].set_structure(IMP.atom.Residue(IMP.atom.create_clone(r.hier)))
381  if self.transformation is not None:
382  for r in self.residues:
383  IMP.atom.transform(r.hier,self.transformation)
384  for orig,new in zip(self.mol_to_clone.residues,self.residues):
385  new.representations=orig.representations
386 
387 
388  # group into Fragments along backbone
389  if merge_type=="backbone":
390  system_tools.build_along_backbone(self.mdl,self.hier,self.residues,
391  IMP.atom.BALLS,ca_centers)
392 
393  # group into Fragments by volume
394  elif merge_type=="volume":
395  pass
396 
397  self.built=True
398 
399  return self.hier
400 
401 
402 #------------------------
403 
404 class Sequences(object):
405  """A dictionary-like wrapper for reading and storing sequence data"""
406  def __init__(self,fasta_fn,name_map=None):
407  """read a fasta file and extract all the requested sequences
408  @param fasta_fn sequence file
409  @param name_map dictionary mapping the fasta name to the stored name
410  """
411  self.sequences={}
412  self.read_sequences(fasta_fn,name_map)
413  def __len__(self):
414  return len(self.sequences)
415  def __contains__(self,x):
416  return x in self.sequences
417  def __getitem__(self,key):
418  return self.sequences[key]
419  def __repr__(self):
420  ret=''
421  for s in self.sequences:
422  ret+='%s\t%s\n'%(s,self.sequences[s])
423  return ret
424  def read_sequences(self,fasta_fn,name_map=None):
425  # read all sequences
426  handle = open(fasta_fn, "rU")
427  record_dict = SeqIO.to_dict(SeqIO.parse(handle, "fasta"))
428  handle.close()
429  if name_map is None:
430  for pn in record_dict:
431  self.sequences[pn]=str(record_dict[pn].seq).replace("*", "")
432  else:
433  for pn in name_map:
434  try:
435  self.sequences[name_map[pn]]=str(record_dict[pn].seq).replace("*", "")
436  except:
437  print("tried to add sequence but: id %s not found in fasta file" % pn)
438  exit()
439 
440 #------------------------
441 
442 
443 class _Residue(object):
444  """Stores basic residue information, even without structure available."""
445  # Consider implementing __hash__ so you can select.
446  def __init__(self,molecule,code,index):
447  """setup a Residue
448  @param molecule PMI Molecule to which this residue belongs
449  @param code one-letter residue type code
450  @param index PDB index
451  """
452  self.molecule = molecule
453  self.hier = IMP.atom.Residue.setup_particle(IMP.Particle(molecule.mdl),
454  get_residue_type_from_one_letter_code(code),
455  index)
456  self.representations = defaultdict(set)
457  def __str__(self):
458  return self.get_code()+str(self.get_index())
459  def __repr__(self):
460  return self.__str__()
461  def __key(self):
462  return (self.molecule,self.hier,
463  frozenset((k,tuple(self.representations[k])) for k in self.representations))
464  def __eq__(self,other):
465  return type(other)==type(self) and self.__key() == other.__key()
466  def __hash__(self):
467  return hash(self.__key())
468  def get_index(self):
469  return self.hier.get_index()
470  def get_code(self):
471  return IMP.atom.get_one_letter_code(self.hier.get_residue_type())
472  def get_residue_type(self):
473  return self.hier.get_residue_type()
474  def get_hierarchy(self):
475  return self.hier
476  def get_has_coordinates(self):
477  if len(self.hier.get_children())>0:
478  return True
479  else:
480  return False
481  def set_structure(self,res,soft_check=False):
482  if res.get_residue_type()!=self.hier.get_residue_type():
483  if not soft_check:
484  raise StructureError("Adding structure to this residue, but it's the wrong type!")
485  for a in res.get_children():
486  self.hier.add_child(a)
487  atype=IMP.atom.Atom(a).get_atom_type()
488  a.get_particle().set_name('Atom %s of residue %i'%(atype.__str__().strip('"'),
489  self.hier.get_index()))
490  def add_representation(self,rep_type,resolutions):
491  self.representations[rep_type] |= set(resolutions)
492 
493 
494 class TopologyReader(object):
495  '''
496  Read a pipe-delimited PMI topology file.
497 
498  The topology file should be in a simple pipe-delimited format, such as
499  @code{.txt}
500 |directories|
501 |pdb_dir|./|
502 |fasta_dir|./|
503 |gmm_dir|./|
504 
505 |topology_dictionary|
506 |component_name|domain_name|fasta_fn|fasta_id|pdb_fn|chain|residue_range|pdb_offset|bead_size|em_residues_per_gaussian|rmf_file|rmf_frame_number|
507 |Rpb1 |Rpb1_1|1WCM.fasta|1WCM:A|1WCM.pdb|A|1,1140 |0|10|0 |None | None|
508 |Rpb1 |Rpb1_2|1WCM.fasta|1WCM:A|1WCM.pdb|A|1141,1274|0|10|0 |0.rmf3 | 0 |
509 |Rpb1 |Rpb1_3|1WCM.fasta|1WCM:A|1WCM.pdb|A|1275,-1 |0|10|0 |None | None|
510 |Rpb2 |Rpb2 |1WCM.fasta|1WCM:B|1WCM.pdb|B|all |0|10|0 |None | None|
511  @endcode
512 
513  The `|directories|` section lists paths (relative to the topology file)
514  where various inputs can be found.
515 
516  The columns under `|topology_dictionary|`:
517  - `component_name`: Name of the component (chain). Serves as the parent
518  hierarchy for this structure.
519  - `domain_name`: Allows subdivision of chains into individual domains.
520  A model consists of a number of individual units, referred to as
521  domains. Each domain can be an individual chain, or a subset of a
522  chain, and these domains are used to set rigid body movers. A chain
523  may be separated into multiple domains if the user wishes different
524  sections to move independently, and/or analyze the portions separately.
525  - `fasta_fn`: Name of FASTA file containing this component.
526  - `fasta_id`: String found in FASTA sequence header line.
527  - `pdb_fn`: Name of PDB file with coordinates (if available).
528  - `chain`: Chain ID of this domain in the PDB file.
529  - `residue_range`: Comma delimited pair defining range. -1 = last residue.
530  all = [1,-1]
531  - `pdb_offset`: Offset to sync PDB residue numbering with FASTA numbering.
532  - `bead_size`: The size (in residues) of beads used to model areas not
533  covered by PDB coordinates.
534  - `em_residues`: The number of Gaussians used to model the electron
535  density of this domain. Set to zero if no EM fitting will be done.
536  - `rmf_file`: File path of rmf file with coordinates (if available).
537  - `rmf_frame_number`: File path of rmf file.
538 
539  The file is read in and each part of the topology is stored as a
540  ComponentTopology object for input into IMP::pmi::macros::BuildModel.
541  '''
542  def __init__(self, topology_file):
543  self.topology_file=topology_file
544  self.component_list=[]
545  self.defaults={'bead_size' : 10,
546  'residue_range' : 'all',
547  'pdb_offset' : 0,
548  'em_residues_per_gaussian' : 0,
549  'rmf_file' : None,
550  'rmf_frame_number' : None};
551  self.component_list=self.import_topology_file(topology_file)
552 
553 
554  def write_topology_file(self,outfile):
555  f=open(outfile, "w")
556  f.write("|directories|\n")
557  #print self.defaults
558  for key, value in self.defaults.items():
559  output="|"+str(key)+"|"+str(value)+"|\n"
560  f.write(output)
561  f.write("\n\n")
562  f.write("|topology_dictionary|\n")
563  f.write("|component_name|domain_name|fasta_fn|fasta_id|pdb_fn|chain|residue_range|pdb_offset|bead_size|em_residues_per_gaussian|rmf_file|rmf_frame_number|\n")
564  for c in self.component_list:
565  output="|"+str(c.name)+"|"+str(c.domain_name)+"|"+str(c.fasta_file)+"|"+str(c.fasta_id)+"|"+str(c.pdb_file)+"|"+str(c.chain)+"|"+str(c.residue_range).strip("(").strip(")")+"|"+str(c.pdb_offset)+"|"+str(c.bead_size)+"|"+str(c.em_residues_per_gaussian)+"|"+str(c.rmf_file)+"|"+str(c.rmf_frame_number)+"|\n"
566  f.write(output)
567  return outfile
568 
569  def get_component_topologies(self, topology_list = "all"):
570  """ Return list of ComponentTopologies for selected components given a list of indices"""
571  if topology_list == "all":
572  topologies = self.component_list
573  else:
574  topologies=[]
575  for i in topology_list:
576  topologies.append(self.component_list[i])
577  return topologies
578 
579  def set_dir(self, default_dir, new_dir):
580  """ Changes the default directories and renames the files for each ComponentTopology object """
581  if default_dir in self.defaults.keys():
582  self.defaults[default_dir]=new_dir
583  else:
584  print(default_dir, "is not a correct directory key")
585  exit()
586  for c in self.component_list:
587  pdb_file=c.pdb_file.split("/")[-1]
588  c.pdb_file=self._make_path(self.defaults['pdb_dir'],
589  pdb_file)
590  fasta_file=c.fasta_file.split("/")[-1]
591  c.fasta_file=self._make_path(self.defaults['fasta_dir'],
592  fasta_file)
593  if c.gmm_file is not None:
594  gmm_file=c.gmm_file.split("/")[-1]
595  c.gmm_file=self._make_path(self.defaults['gmm_dir'],
596  gmm_file)
597  mrc_file=c.mrc_file.split("/")[-1]
598  c.mrc_file=self._make_path(self.defaults['gmm_dir'],
599  mrc_file)
600 
601 
602  def import_topology_file(self, topology_file, append=False):
603  """ Import system components from topology file. append=False will erase current topology and overwrite with new """
604  is_defaults=False
605  is_topology=False
606  defaults_dict={}
607  linenum=1
608 
609  if append==False:
610  self.component_list=[]
611 
612  with open(topology_file) as infile:
613  for line in infile:
614 
615  if line.lstrip()=="" or line[0]=="#":
616  continue
617 
618  elif line.split('|')[1]=="topology_dictionary":
619  is_topology=True
620 
621  elif is_topology==True and is_defaults==True:
622  # Store the field names for this topology grid
623  topology_fields=line
624  is_defaults=False
625 
626  elif is_topology==True:
627  # create a component_topology from this line
628  new_component=self.create_component_topology(line, topology_fields, self.defaults, linenum)
629  self.component_list.append(new_component)
630 
631  elif is_defaults==True:
632  # grab value for default and put into class attribute
633  self.add_default_parameter(line, linenum)
634 
635  elif line.split('|')[1]=="directories":
636  is_defaults=True
637 
638  #print line, is_defaults, is_topology
639  linenum=linenum+1
640  #print self.defaults
641  return self.component_list
642 
643  def _make_path(self, dirname, fname):
644  "Get the full path to a file, possibly relative to the topology file"
645  dirname = IMP.get_relative_path(self.topology_file, dirname)
646  return os.path.join(dirname, fname)
647 
648  def create_component_topology(self, component_line, topology_fields, defaults, linenum, color="0.1"):
649 
650  #Reads a grid of topology values and matches them to their key.
651  #Checks each value for correct syntax
652  #Returns a list of ComponentTopology objects
653 
654  fields=topology_fields.split('|')
655  values=component_line.split('|')
657  errors=[]
658  ##### Required fields
659  c.name = values[fields.index("component_name")].strip()
660  c.domain_name = values[fields.index("domain_name")].strip()
661  c.fasta_file = self._make_path(defaults['fasta_dir'],
662  values[fields.index("fasta_fn")])
663  c.fasta_id = values[fields.index("fasta_id")].strip()
664  c.pdb_file = self._make_path(defaults['pdb_dir'],
665  values[fields.index("pdb_fn")])
666  # Need to find a way to define color
667  c.color = 0.1
668 
669  t_chain = values[fields.index("chain")].strip()
670  # PDB Chain
671  # Must be one or two characters
672  if len(t_chain)==1 or len(t_chain)==2:
673  c.chain = t_chain
674  else:
675  errors.append("PDB Chain identifier must be one or two characters.")
676  errors.append("For component %s line %d is not correct |%s| was given." % (c.name,linenum,t_chain))
677 
678  ##### Optional fields
679  # Residue Range
680  if "residue_range" in fields:
681  f=values[fields.index("residue_range")].strip()
682  if f.strip()=='all' or str(f)=="":
683  c.residue_range=(1,-1)
684  # Make sure that is residue range is given, there are only two values and they are integers
685  elif len(f.split(','))==2 and self.is_int(f.split(',')[0]) and self.is_int(f.split(',')[1]):
686  c.residue_range=(int(f.split(',')[0]), int(f.split(',')[1]))
687  else:
688  errors.append("Residue Range format for component %s line %d is not correct" % (c.name, linenum))
689  errors.append("Correct syntax is two comma separated integers: |start_res, end_res|. |%s| was given." % f)
690  errors.append("To select all residues, indicate |\"all\"|")
691  else:
692  c.residue_range=defaults["residue_range"]
693 
694 
695  # PDB Offset
696  if "pdb_offset" in fields:
697  f=values[fields.index("pdb_offset")].strip()
698  if self.is_int(f):
699  c.pdb_offset=int(f)
700  else:
701  errors.append("PDB Offset format for component %s line %d is not correct" % (c.name, linenum))
702  errors.append("The value must be a single integer. |%s| was given." % f)
703  else:
704  c.pdb_offset=defaults["pdb_offset"]
705 
706  # Bead Size
707  if "bead_size" in fields:
708  f=values[fields.index("bead_size")].strip()
709  if self.is_int(f):
710  c.bead_size=int(f)
711  else:
712  errors.append("Bead Size format for component %s line %d is not correct" % (c.name, linenum))
713  errors.append("The value must be a single integer. |%s| was given." % f)
714  else:
715  c.bead_size=defaults["bead_size"]
716 
717  # EM Residues Per Gaussian
718  if "em_residues_per_gaussian" in fields:
719  f=values[fields.index("em_residues_per_gaussian")].strip()
720  if self.is_int(f):
721  if int(f) > 0:
722  c.gmm_file=self._make_path(defaults['gmm_dir'],
723  c.domain_name.strip() + ".txt")
724  c.mrc_file=self._make_path(defaults['gmm_dir'],
725  c.domain_name.strip() + ".mrc")
726  c.em_residues_per_gaussian=int(f)
727  else:
728  errors.append("em_residues_per_gaussian format for component %s line %d is not correct" % (c.name, linenum))
729  errors.append("The value must be a single integer. |%s| was given." % f)
730  else:
731  c.em_residues_per_gaussian=defaults["em_residues_per_gaussian"]
732 
733  if "rmf_file" in fields:
734  f=values[fields.index("rmf_file")].strip()
735  if f == "None":
736  c.rmf_file=f
737  else:
738  if not os.path.isfile(f):
739  errors.append("rmf_file %s must be an existing file or None" % c.name)
740  else:
741  c.rmf_file=f
742  else:
743  c.rmf_file=defaults["rmf_file"]
744 
745  if "rmf_frame_number" in fields:
746  f=values[fields.index("rmf_frame_number")].strip()
747  if f == "None":
748  c.rmf_frame_number=f
749  else:
750  if not self.is_int(f):
751  errors.append("rmf_frame_number %s must be an integer or None" % c.name)
752  else:
753  c.rmf_file=f
754  else:
755  c.rmf_frame_number=defaults["rmf_frame_number"]
756 
757  if errors:
758  raise ValueError("Fix Topology File syntax errors and rerun: " \
759  + "\n".join(errors))
760  else:
761  return c
762 
763  def is_int(self, s):
764  # is this string an integer?
765  try:
766  float(s)
767  return float(s).is_integer()
768  except ValueError:
769  return False
770 
771 
772  def add_default_parameter(self,line, linenum):
773  #Separates a line into a key:value pair.
774 
775  f=line.split('|')
776  if len(f) != 4:
777  print("Default value syntax not correct for ", line)
778  print("Line number", linenum," contains ", len(f)-2, " fields.")
779  print("Please reformat to |KEY|VALUE|")
780  self.defaults[f[1]]=f[2]
781 
782 
783 
784 class ComponentTopology(object):
785  '''
786  Topology class stores the components required to build a standard IMP hierarchy
787  using IMP.pmi.autobuild_model()
788  '''
789  def __init__(self):
790  self.name=None
791  self.domain_name=None
792  self.fasta_file=None
793  self.fasta_id=None
794  self.pdb_file=None
795  self.chain=None
796  self.residue_range=None
797  self.pdb_offset=None
798  self.bead_size=None
799  self.em_residues_per_gaussian=None
800  self.gmm_file=None
801  self.mrc_file=None
802  self.color=None
803  self.rmf_file_name=None
804  self.rmf_frame_number=None
805 
806  def recompute_default_dirs(self, topology):
807  pdb_filename=self.pdb_file.split("/")[-1]
808  self.pdb_filename=IMP.get_relative_path(topology.topology_file, topology.defaults)
Topology class stores the components required to build a standard IMP hierarchy using IMP...
def build
call build on all molecules (automatically makes clones)
def get_atomic_residues
Return a set of Residues that have associated structure coordinates.
def get_residues
Return all Residues as a set.
def get_particles_within_zone
Utility to retrieve particles from a hierarchy within a zone around a set of ps.
def build
call build on all states
def __init__
read a fasta file and extract all the requested sequences
Hierarchy create_clone(Hierarchy d)
Clone the Hierarchy.
def __init__
The user should not call this direclty, instead call State::create_molecule()
def residue_range
get residue range.
def __init__
Define a new state.
def add_representation
handles the IMP.atom.Representation decorators, such as multi-scale, density, etc.
static State setup_particle(Model *m, ParticleIndex pi, unsigned int index)
Definition: State.h:36
The type of an atom.
def create_molecule
Create a new Molecule within this State.
def build
Create all parts of the IMP hierarchy including Atoms, Residues, and Fragments/Representations and...
static Residue setup_particle(Model *m, ParticleIndex pi, ResidueType t, int index, int insertion_code)
Definition: Residue.h:157
char get_one_letter_code(ResidueType c)
Get the 1-letter amino acid code from the residue type.
def get_non_atomic_residues
Return a set of Residues that don't have associated structure coordinates.
The base class for System, State and Molecule classes.
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:72
This class is constructed from within the State class.
A decorator for keeping track of copies of a molecule.
Definition: Copy.h:28
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
void transform(Hierarchy h, const algebra::Transformation3D &tr)
Transform a hierarchy. This is aware of rigid bodies.
The standard decorator for manipulating molecular structures.
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
A decorator for a particle representing an atom.
Definition: atom/Atom.h:234
The type for a residue.
def get_component_topologies
Return list of ComponentTopologies for selected components given a list of indices.
def build
Build the coordinates of the system.
std::string get_relative_path(std::string base, std::string relative)
Return a path to a file relative to another file.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
def create_clone
Create a Molecule clone (automatically builds same structure and representation)
def add_structure
Read a structure and store the coordinates.
static Copy setup_particle(Model *m, ParticleIndex pi, Int number)
Definition: Copy.h:40
A decorator for a residue.
Definition: Residue.h:134
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Read a pipe-delimited PMI topology file.
The general base class for IMP exceptions.
Definition: exception.h:49
Class to handle individual model particles.
Definition: Particle.h:37
This private class is constructed from within the System class.
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
Python classes to represent, score, sample and analyze models.
A dictionary-like wrapper for reading and storing sequence data.
def create_copy
Create a new Molecule with the same name and sequence but a higher copy number.
Functionality for loading, creating, manipulating and scoring atomic structures.
static Chain setup_particle(Model *m, ParticleIndex pi, std::string id)
Definition: Chain.h:41
Select hierarchy particles identified by the biological name.
Definition: Selection.h:65
def get_number_of_states
returns the total number of states generated
def set_dir
Changes the default directories and renames the files for each ComponentTopology object.
def import_topology_file
Import system components from topology file.
def create_state
returns a new IMP.pmi.representation_new.State(), increment the state index