IMP  2.4.0
The Integrative Modeling Platform
pmi/topology/__init__.py
1 """@namespace IMP.pmi.topology
2  Set up of system representation from topology files.
3 
4  * Class for storing topology elements of PMI components
5  * Functions for reading these elementsfrom a formatted PMI topology file
6  * Functions for converting an existing IMP hierarchy into PMI topology
7  * TopologyWriter for writing PMI topology files
8 """
9 
10 from __future__ import print_function
11 import IMP
12 import IMP.atom
13 import IMP.algebra
14 import IMP.pmi
15 import IMP.base
16 import csv
17 import os
18 from collections import defaultdict
19 from . import system_tools
20 from Bio import SeqIO
21 
22 def get_residue_type_from_one_letter_code(code):
23  threetoone = {'ALA': 'A', 'ARG': 'R', 'ASN': 'N', 'ASP': 'D',
24  'CYS': 'C', 'GLU': 'E', 'GLN': 'Q', 'GLY': 'G',
25  'HIS': 'H', 'ILE': 'I', 'LEU': 'L', 'LYS': 'K',
26  'MET': 'M', 'PHE': 'F', 'PRO': 'P', 'SER': 'S',
27  'THR': 'T', 'TRP': 'W', 'TYR': 'Y', 'VAL': 'V', 'UNK': 'X'}
28  one_to_three={}
29  for k in threetoone:
30  one_to_three[threetoone[k]] = k
31  return IMP.atom.ResidueType(one_to_three[code])
32 
34  target_ps,
35  sel_zone,
36  entire_residues,
37  exclude_backbone):
38  """Utility to retrieve particles from a hierarchy within a
39  zone around a set of ps.
40  @param hier The hierarchy in which to look for neighbors
41  @param target_ps The particles for zoning
42  @param sel_zone The maximum distance
43  @param entire_residues If True, will grab entire residues
44  @param exclude_backbone If True, will only return sidechain particles
45  """
46 
47  test_sel = IMP.atom.Selection(hier)
48  backbone_types=['C','N','CB','O']
49  if exclude_backbone:
50  test_sel -= IMP.atom.Selection(hier,atom_types=[IMP.atom.AtomType(n)
51  for n in backbone_types])
52  test_ps = test_sel.get_selected_particles()
53  nn = IMP.algebra.NearestNeighbor3D([IMP.core.XYZ(p).get_coordinates()
54  for p in test_ps])
55  zone = set()
56  for target in target_ps:
57  zone|=set(nn.get_in_ball(IMP.core.XYZ(target).get_coordinates(),sel_zone))
58  zone_ps = [test_ps[z] for z in zone]
59  if entire_residues:
60  final_ps = set()
61  for z in zone_ps:
62  final_ps|=set(IMP.atom.Hierarchy(z).get_parent().get_children())
63  zone_ps = [h.get_particle() for h in final_ps]
64  return zone_ps
65 
66 class StructureError(Exception):
67  pass
68 
69 #------------------------
70 
71 class SystemBase(object):
72  """This is the base class for System, _State and _Molecule
73  classes. It contains shared functions in common to these classes
74  """
75 
76  def __init__(self,mdl=None):
77  if mdl is None:
78  self.mdl=IMP.Model()
79  else:
80  self.mdl=mdl
81 
82  def _create_hierarchy(self):
83  """create a new hierarchy"""
84  tmp_part=IMP.kernel.Particle(self.mdl)
85  return IMP.atom.Hierarchy.setup_particle(tmp_part)
86 
87  def _create_child(self,parent_hierarchy):
88  """create a new hierarchy, set it as child of the input
89  one, and return it"""
90  child_hierarchy=self._create_hierarchy()
91  parent_hierarchy.add_child(child_hierarchy)
92  return child_hierarchy
93 
94  def build(self):
95  """Build the coordinates of the system.
96  Loop through stored(?) hierarchies and set up coordinates!"""
97  pass
98 
99 #------------------------
100 
101 class System(SystemBase):
102  """This class initializes the root node of the global IMP.atom.Hierarchy."""
103  def __init__(self,mdl=None,name="System"):
104  SystemBase.__init__(self,mdl)
105  self._number_of_states = 0
106  self.states = []
107  self.built=False
108 
109  # the root hierarchy node
110  self.hier=self._create_hierarchy()
111  self.hier.set_name(name)
112 
113  def create_state(self):
114  """returns a new IMP.pmi.representation_new._State(), increment the state index"""
115  self._number_of_states+=1
116  state = _State(self,self._number_of_states-1)
117  self.states.append(state)
118  return state
119 
120  def __repr__(self):
121  return self.hier.get_name()
122 
124  """returns the total number of states generated"""
125  return self._number_of_states
126 
127  def get_hierarchy(self):
128  return self.hier
129 
130  def build(self,**kwargs):
131  """call build on all states"""
132  if not self.built:
133  for state in self.states:
134  state.build(**kwargs)
135  self.built=True
136  return self.hier
137 
138 #------------------------
139 
140 class _State(SystemBase):
141  """This private class is constructed from within the System class.
142  It wraps an IMP.atom.State
143  """
144  def __init__(self,system,state_index):
145  """Define a new state
146  @param system the PMI System
147  @param state_index the index of the new state
148  """
149  self.mdl = system.get_hierarchy().get_model()
150  self.system = system
151  self.hier = self._create_child(system.get_hierarchy())
152  self.hier.set_name("State_"+str(state_index))
153  self.molecules = []
154  IMP.atom.State.setup_particle(self.hier,state_index)
155  self.built=False
156 
157  def __repr__(self):
158  return self.system.__repr__()+'.'+self.hier.get_name()
159 
160  def create_molecule(self,name,sequence=None,chain_id='',molecule_to_copy=None):
161  """Create a new Molecule within this State
162  @param name the name of the molecule (string) it must not
163  contain underscores characters "_" and must not
164  be already used
165  @param sequence sequence (string)
166  @param chain_id Chain id to assign to this molecule
167  @param molecule_to_copy Copy everything from an existing molecule. NOT IMPLEMENTED
168  """
169  # check the presence of underscores
170  if "_" in name:
171  raise WrongMoleculeName('A molecule name should not contain underscores characters')
172 
173  # check whether the molecule name is already assigned
174  if name in [mol.get_name() for mol in self.molecules]:
175  raise WrongMoleculeName('Cannot use a molecule name already used')
176 
177  mol = _Molecule(self,name,sequence,chain_id,copy_num=0)
178  self.molecules.append(mol)
179  return mol
180 
181  def get_hierarchy(self):
182  return self.hier
183 
184  def build(self,**kwargs):
185  """call build on all molecules (automatically makes copies)"""
186  if not self.built:
187  for mol in self.molecules:
188  mol.build(**kwargs)
189  self.built=True
190  return self.hier
191 
192 #------------------------
193 
194 class _Molecule(SystemBase):
195  """This private class is constructed from within the State class.
196  It wraps an IMP.atom.Molecule and IMP.atom.Copy
197  Structure is read using this class
198  Resolutions and copies can be registered, but are only created when build() is called
199  """
200 
201  def __init__(self,state,name,sequence,chain_id,copy_num):
202  """Create copy 0 of this molecule.
203  Arguments:
204  @param state the parent PMI State
205  @param name the name of the molecule (string)
206  @param sequence sequence (string)
207  """
208  # internal data storage
209  self.mdl=state.get_hierarchy().get_model()
210  self.state=state
211  self.sequence=sequence
212  self.copies=[]
213  self.built=False
214 
215  # create root node and set it as child to passed parent hierarchy
216  self.hier = self._create_child(self.state.get_hierarchy())
217  self.hier.set_name(name)
218  IMP.atom.Copy.setup_particle(self.hier,copy_num)
219  IMP.atom.Chain.setup_particle(self.hier,chain_id)
220 
221  # create Residues from the sequence
222  self.residues=[]
223  for ns,s in enumerate(sequence):
224  r=_Residue(self,s,ns+1)
225  self.residues.append(r)
226 
227  def __repr__(self):
228  return self.state.__repr__()+'.'+self.hier.get_name()+'.'+ \
229  str(IMP.atom.Copy(self.hier).get_copy_index())
230 
231 
232  def __getitem__(self,val):
233  if isinstance(val,int):
234  return self.residues[val]
235  elif isinstance(val,str):
236  return self.residues[int(val)-1]
237  elif isinstance(val,slice):
238  return set(self.residues[val])
239  else:
240  print("ERROR: range ends must be int or str. Stride must be int.")
241 
242  def get_hierarchy(self):
243  return self.hier
244 
245  def get_name(self):
246  return self.hier.get_name()
247 
248  def residue_range(self,a,b,stride=1):
249  """get residue range. Use integers to get 0-indexing, or strings to get PDB-indexing"""
250  if isinstance(a,int) and isinstance(b,int) and isinstance(stride,int):
251  return set(self.residues[a:b:stride])
252  elif isinstance(a,str) and isinstance(b,str) and isinstance(stride,int):
253  return set(self.residues[int(a)-1:int(b)-1:stride])
254  else:
255  print("ERROR: range ends must be int or str. Stride must be int.")
256 
257  def get_residues(self):
258  """ Return all Residues as a set"""
259  all_res=set()
260  for res in self.residues:
261  all_res.add(res)
262  return all_res
263 
264  def get_atomic_residues(self):
265  """ Return a set of Residues that have associated structure coordinates """
266  atomic_res=set()
267  for res in self.residues:
268  if len(res.hier.get_children())>0:
269  atomic_res.add(res)
270  return atomic_res
271 
272  def get_non_atomic_residues(self):
273  """ Return a set of Residues that don't have associated structure coordinates """
274  non_atomic_res=set()
275  for res in self.residues:
276  if len(res.hier.get_children())==0:
277  non_atomic_res.add(res)
278  return non_atomic_res
279 
280  def add_copy(self,pdb_fn=None,chain_id='',res_range=[],offset=0,new_chain_id=None,
281  transformation=None,ca_only=False):
282  """Create a new Molecule storing the new coordinates.
283  Ensures that representations are identical to original molecule
284  Will verify that the sequence is the same as that of the first structure.
285  @param pdb_fn The file to read. If None, will just clone
286  coordinates from original
287  @param chain_id Chain ID for reading. Or, if copying, this will be the new chain id.
288  @param res_range Add only a specific set of residues
289  @param offset Apply an offset to the residue indexes of the PDB file
290  @param new_chain_id If you want to set the chain ID of the copy to something
291  (defaults to what you extract from the PDB file)
292  @param transformation Apply transformation after reading/cloning structure
293  """
294  if new_chain_id is None:
295  new_chain_id=chain_id
296  mol=_Molecule(self.state,self.get_hierarchy().get_name(),
297  self.sequence,new_chain_id,copy_num=len(self.copies)+1)
298  self.copies.append(mol)
299  if pdb_fn is None:
300  for nr,r in enumerate(self.residues):
301  mol.residues[nr].set_structure(IMP.atom.Residue(IMP.atom.create_clone(r.hier)))
302  else:
303  new_atomic_res = mol.add_structure(pdb_fn,chain_id,res_range,offset,ca_only=ca_only)
304  new_idxs = set([r.get_index() for r in new_atomic_res])
305  orig_idxs = set([r.get_index() for r in self.get_atomic_residues()])
306  if new_idxs!=orig_idxs:
307  raise Exception("You added a structure for the copy with different residues")
308 
309  if transformation is not None:
310  for r in mol.residues:
311  IMP.atom.transform(r.hier,transformation)
312  for orig,new in zip(self.residues,mol.residues):
313  new.representations=orig.representations
314 
315  def add_structure(self,pdb_fn,chain_id,res_range=[],offset=0,model_num=None,ca_only=False):
316  """Read a structure and store the coordinates.
317  Returns the atomic residues (as a set)
318  @param pdb_fn The file to read
319  @param chain_id Chain ID to read
320  @param res_range Add only a specific set of residues
321  @param offset Apply an offset to the residue indexes of the PDB file
322  @param model_num Read multi-model PDB and return that model
323  \note After offset, we expect the PDB residue numbering to match the FASTA file
324  """
325  # get IMP.atom.Residues from the pdb file
326  rhs=system_tools.get_structure(self.mdl,pdb_fn,chain_id,res_range,offset,ca_only=ca_only)
327  if len(rhs)>len(self.residues):
328  print('ERROR: You are loading',len(rhs), \
329  'pdb residues for a sequence of length',len(self.residues),'(too many)')
330 
331  # load those into the existing pmi Residue objects, and return contiguous regions
332  atomic_res=set() # collect integer indexes of atomic residues!
333  for nrh,rh in enumerate(rhs):
334  idx=rh.get_index()
335  internal_res=self.residues[idx-1]
336  if internal_res.get_code()!=IMP.atom.get_one_letter_code(rh.get_residue_type()):
337  raise StructureError('ERROR: PDB residue index',idx,'is',
338  IMP.atom.get_one_letter_code(rh.get_residue_type()),
339  'and sequence residue is',internal_res.get_code())
340  internal_res.set_structure(rh)
341  atomic_res.add(internal_res)
342  return atomic_res
343 
344  def add_representation(self,res_set=None,representation_type="balls",resolutions=[]):
345  """handles the IMP.atom.Representation decorators, such as multi-scale,
346  density, etc.
347  @param res_set set of PMI residues for adding the representation
348  @param representation_type currently supports only balls
349  @param resolutions what resolutions to add to the
350  residues (see @ref pmi_resolution)
351  """
352  allowed_types=["balls"]
353  if representation_type not in allowed_types:
354  print("ERROR: Allowed representation types:",allowed_types)
355  return
356  if res_set is None:
357  res_set=set(self.residues)
358  for res in res_set:
359  res.add_representation(representation_type,resolutions)
360 
361  def build(self,merge_type="backbone",ca_centers=True,fill_in_missing_residues=True):
362  """Create all parts of the IMP hierarchy
363  including Atoms, Residues, and Fragments/Representations and, finally, Copies
364  /note Any residues assigned a resolution must have an IMP.atom.Residue hierarchy
365  containing at least a CAlpha. For missing residues, these can be constructed
366  from the PDB file
367  @param merge_type Principle for grouping into fragments.
368  "backbone": linear sequences along backbone are grouped
369  into fragments if they have identical sets of representations.
370  "volume": at each resolution, groups are made based on
371  spatial distance (not currently implemented)
372  @param ca_centers For single-bead-per-residue only. Set the center over the CA position.
373  """
374  allowed_types=("backbone")
375  if merge_type not in allowed_types:
376  print("ERROR: Allowed merge types:",allowed_types)
377  return
378  if not self.built:
379 
380  # group into Fragments along backbone
381  if merge_type=="backbone":
382  system_tools.build_along_backbone(self.mdl,self.hier,self.residues,
383  IMP.atom.BALLS,ca_centers)
384 
385 
386  # group into Fragments by volume
387  elif merge_type=="volume":
388  pass
389 
390  # build copies
391  for copy in self.copies:
392  copy.build()
393 
394  self.built=True
395 
396  return self.hier
397 
398 
399 #------------------------
400 
401 class Sequences(object):
402  """A dictionary-like wrapper for reading and storing sequence data"""
403  def __init__(self,fasta_fn,name_map=None):
404  """read a fasta file and extract all the requested sequences
405  @param fasta_fn sequence file
406  @param name_map dictionary mapping the fasta name to the stored name
407  """
408  self.sequences={}
409  self.read_sequences(fasta_fn,name_map)
410  def __len__(self):
411  return len(self.sequences)
412  def __contains__(self,x):
413  return x in self.sequences
414  def __getitem__(self,key):
415  return self.sequences[key]
416  def __repr__(self):
417  ret=''
418  for s in self.sequences:
419  ret+='%s\t%s\n'%(s,self.sequences[s])
420  return ret
421  def read_sequences(self,fasta_fn,name_map=None):
422  # read all sequences
423  handle = open(fasta_fn, "rU")
424  record_dict = SeqIO.to_dict(SeqIO.parse(handle, "fasta"))
425  handle.close()
426  if name_map is None:
427  for pn in record_dict:
428  self.sequences[pn]=str(record_dict[pn].seq).replace("*", "")
429  else:
430  for pn in name_map:
431  try:
432  self.sequences[name_map[pn]]=str(record_dict[pn].seq).replace("*", "")
433  except:
434  print("tried to add sequence but: id %s not found in fasta file" % pn)
435  exit()
436 
437 #------------------------
438 
439 
440 class _Residue(object):
441  """Stores basic residue information, even without structure available."""
442  # Consider implementing __hash__ so you can select.
443  def __init__(self,molecule,code,index):
444  """setup a Residue
445  @param molecule PMI Molecule to which this residue belongs
446  @param code one-letter residue type code
447  @param index PDB index
448  """
449  self.molecule = molecule
450  self.hier = IMP.atom.Residue.setup_particle(IMP.Particle(molecule.mdl),
451  get_residue_type_from_one_letter_code(code),
452  index)
453  self.representations = defaultdict(set)
454  def __str__(self):
455  return self.get_code()+str(self.get_index())
456  def __repr__(self):
457  return self.__str__()
458  def __key(self):
459  return (self.molecule,self.hier,
460  frozenset((k,tuple(self.representations[k])) for k in self.representations))
461  def __eq__(self,other):
462  return type(other)==type(self) and self.__key() == other.__key()
463  def __hash__(self):
464  return hash(self.__key())
465  def get_index(self):
466  return self.hier.get_index()
467  def get_code(self):
468  return IMP.atom.get_one_letter_code(self.hier.get_residue_type())
469  def get_residue_type(self):
470  return self.hier.get_residue_type()
471  def get_hierarchy(self):
472  return self.hier
473  def get_has_coordinates(self):
474  if len(self.hier.get_children())>0:
475  return True
476  else:
477  return False
478  def set_structure(self,res):
479  if res.get_residue_type()!=self.hier.get_residue_type():
480  raise StructureError("Adding structure to this residue, but it's the wrong type!")
481  for a in res.get_children():
482  self.hier.add_child(a)
483  atype=IMP.atom.Atom(a).get_atom_type()
484  a.get_particle().set_name('Atom %s of residue %i'%(atype.__str__().strip('"'),
485  self.hier.get_index()))
486  def add_representation(self,rep_type,resolutions):
487  self.representations[rep_type] |= set(resolutions)
488 
489 
490 class TopologyReader(object):
491  '''
492  Read a pipe-delimited PMI topology file.
493 
494  The topology file should be in a simple pipe-delimited format, such as
495  @code{.txt}
496 |directories|
497 |pdb_dir|./|
498 |fasta_dir|./|
499 |gmm_dir|./|
500 
501 |topology_dictionary|
502 |component_name|domain_name|fasta_fn|fasta_id|pdb_fn|chain|residue_range|pdb_offset|bead_size|em_residues_per_gaussian|rmf_file|rmf_frame_number|
503 |Rpb1 |Rpb1_1|1WCM.fasta|1WCM:A|1WCM.pdb|A|1,1140 |0|10|0 |None | None|
504 |Rpb1 |Rpb1_2|1WCM.fasta|1WCM:A|1WCM.pdb|A|1141,1274|0|10|0 |0.rmf3 | 0 |
505 |Rpb1 |Rpb1_3|1WCM.fasta|1WCM:A|1WCM.pdb|A|1275,-1 |0|10|0 |None | None|
506 |Rpb2 |Rpb2 |1WCM.fasta|1WCM:B|1WCM.pdb|B|all |0|10|0 |None | None|
507  @endcode
508 
509  The `|directories|` section lists paths (relative to the topology file)
510  where various inputs can be found.
511 
512  The columns under `|topology_dictionary|`:
513  - `component_name`: Name of the component (chain). Serves as the parent
514  hierarchy for this structure.
515  - `domain_name`: Allows subdivision of chains into individual domains.
516  A model consists of a number of individual units, referred to as
517  domains. Each domain can be an individual chain, or a subset of a
518  chain, and these domains are used to set rigid body movers. A chain
519  may be separated into multiple domains if the user wishes different
520  sections to move independently, and/or analyze the portions separately.
521  - `fasta_fn`: Name of FASTA file containing this component.
522  - `fasta_id`: String found in FASTA sequence header line.
523  - `pdb_fn`: Name of PDB file with coordinates (if available).
524  - `chain`: Chain ID of this domain in the PDB file.
525  - `residue_range`: Comma delimited pair defining range. -1 = last residue.
526  all = [1,-1]
527  - `pdb_offset`: Offset to sync PDB residue numbering with FASTA numbering.
528  - `bead_size`: The size (in residues) of beads used to model areas not
529  covered by PDB coordinates.
530  - `em_residues`: The number of Gaussians used to model the electron
531  density of this domain. Set to zero if no EM fitting will be done.
532  - `rmf_file`: File path of rmf file with coordinates (if available).
533  - `rmf_frame_number`: File path of rmf file.
534 
535  The file is read in and each part of the topology is stored as a
536  ComponentTopology object for input into IMP::pmi::macros::BuildModel.
537  '''
538  def __init__(self, topology_file):
539  self.topology_file=topology_file
540  self.component_list=[]
541  self.defaults={'bead_size' : 10,
542  'residue_range' : 'all',
543  'pdb_offset' : 0,
544  'em_residues_per_gaussian' : 0,
545  'rmf_file' : None,
546  'rmf_frame_number' : None};
547  self.component_list=self.import_topology_file(topology_file)
548 
549 
550  def write_topology_file(self,outfile):
551  f=open(outfile, "w")
552  f.write("|directories|\n")
553  #print self.defaults
554  for key, value in self.defaults.items():
555  output="|"+str(key)+"|"+str(value)+"|\n"
556  f.write(output)
557  f.write("\n\n")
558  f.write("|topology_dictionary|\n")
559  f.write("|component_name|domain_name|fasta_fn|fasta_id|pdb_fn|chain|residue_range|pdb_offset|bead_size|em_residues_per_gaussian|rmf_file|rmf_frame_number|\n")
560  for c in self.component_list:
561  output="|"+str(c.name)+"|"+str(c.domain_name)+"|"+str(c.fasta_file)+"|"+str(c.fasta_id)+"|"+str(c.pdb_file)+"|"+str(c.chain)+"|"+str(c.residue_range).strip("(").strip(")")+"|"+str(c.pdb_offset)+"|"+str(c.bead_size)+"|"+str(c.em_residues_per_gaussian)+"|"+str(c.rmf_file)+"|"+str(c.rmf_frame_number)+"|\n"
562  f.write(output)
563  return outfile
564 
565  def get_component_topologies(self, topology_list = "all"):
566  """ Return list of ComponentTopologies for selected components given a list of indices"""
567  if topology_list == "all":
568  topologies = self.component_list
569  else:
570  topologies=[]
571  for i in topology_list:
572  topologies.append(self.component_list[i])
573  return topologies
574 
575  def set_dir(self, default_dir, new_dir):
576  """ Changes the default directories and renames the files for each ComponentTopology object """
577  if default_dir in self.defaults.keys():
578  self.defaults[default_dir]=new_dir
579  else:
580  print(default_dir, "is not a correct directory key")
581  exit()
582  for c in self.component_list:
583  pdb_file=c.pdb_file.split("/")[-1]
584  c.pdb_file=self._make_path(self.defaults['pdb_dir'],
585  pdb_file)
586  fasta_file=c.fasta_file.split("/")[-1]
587  c.fasta_file=self._make_path(self.defaults['fasta_dir'],
588  fasta_file)
589  if c.gmm_file is not None:
590  gmm_file=c.gmm_file.split("/")[-1]
591  c.gmm_file=self._make_path(self.defaults['gmm_dir'],
592  gmm_file)
593  mrc_file=c.mrc_file.split("/")[-1]
594  c.mrc_file=self._make_path(self.defaults['gmm_dir'],
595  mrc_file)
596 
597 
598  def import_topology_file(self, topology_file, append=False):
599  """ Import system components from topology file. append=False will erase current topology and overwrite with new """
600  is_defaults=False
601  is_topology=False
602  defaults_dict={}
603  linenum=1
604 
605  if append==False:
606  self.component_list=[]
607 
608  with open(topology_file) as infile:
609  for line in infile:
610 
611  if line.lstrip()=="" or line[0]=="#":
612  continue
613 
614  elif line.split('|')[1]=="topology_dictionary":
615  is_topology=True
616 
617  elif is_topology==True and is_defaults==True:
618  # Store the field names for this topology grid
619  topology_fields=line
620  is_defaults=False
621 
622  elif is_topology==True:
623  # create a component_topology from this line
624  new_component=self.create_component_topology(line, topology_fields, self.defaults, linenum)
625  self.component_list.append(new_component)
626 
627  elif is_defaults==True:
628  # grab value for default and put into class attribute
629  self.add_default_parameter(line, linenum)
630 
631  elif line.split('|')[1]=="directories":
632  is_defaults=True
633 
634  #print line, is_defaults, is_topology
635  linenum=linenum+1
636  #print self.defaults
637  return self.component_list
638 
639  def _make_path(self, dirname, fname):
640  "Get the full path to a file, possibly relative to the topology file"
641  dirname = IMP.base.get_relative_path(self.topology_file, dirname)
642  return os.path.join(dirname, fname)
643 
644  def create_component_topology(self, component_line, topology_fields, defaults, linenum, color="0.1"):
645 
646  #Reads a grid of topology values and matches them to their key.
647  #Checks each value for correct syntax
648  #Returns a list of ComponentTopology objects
649 
650  fields=topology_fields.split('|')
651  values=component_line.split('|')
653  no_error=True
654  ##### Required fields
655  c.name = values[fields.index("component_name")].strip()
656  c.domain_name = values[fields.index("domain_name")].strip()
657  c.fasta_file = self._make_path(defaults['fasta_dir'],
658  values[fields.index("fasta_fn")])
659  c.fasta_id = values[fields.index("fasta_id")].strip()
660  c.pdb_file = self._make_path(defaults['pdb_dir'],
661  values[fields.index("pdb_fn")])
662  # Need to find a way to define color
663  c.color = 0.1
664 
665  # PDB Chain
666  if len(values[fields.index("chain")])==1 and values[fields.index("chain")].isupper()==True:
667  c.chain = values[fields.index("chain")]
668  else:
669  print("PDB Chain format for component ", c.name, ", line ", linenum, " is not correct")
670  print("Correct syntax is a single uppercase letter. |", values[fields.index("chain")], "| was given.")
671  no_error=False
672 
673  ##### Optional fields
674  # Residue Range
675  if "residue_range" in fields:
676  f=values[fields.index("residue_range")]
677  if f.strip()=='all' or str(f)=="":
678  c.residue_range=(1,-1)
679  # Make sure that is residue range is given, there are only two values and they are integers
680  elif len(f.split(','))==2 and self.is_int(f.split(',')[0]) and self.is_int(f.split(',')[1]):
681  c.residue_range=(int(f.split(',')[0]), int(f.split(',')[1]))
682  else:
683  print("Residue Range format for component ", c.name, ", line ", linenum, " is not correct")
684  print("Correct syntax is two comma separated integers: |start_res, end_res|. |", f, "| was given.")
685  print("To select all residues, indicate |\"all\"|")
686  no_error=False
687  else:
688  c.residue_range=defaults["residue_range"]
689 
690 
691  # PDB Offset
692  if "pdb_offset" in fields:
693  f=values[fields.index("pdb_offset")]
694  if self.is_int(f):
695  c.pdb_offset=int(f)
696  else:
697  print("PDB Offset format for component ", c.name, ", line ", linenum, " is not correct")
698  print("The value must be a single integer. |", f, "| was given.")
699  no_error=False
700  else:
701  c.pdb_offset=defaults["pdb_offset"]
702 
703  # Bead Size
704  if "bead_size" in fields:
705  f=values[fields.index("bead_size")]
706  if self.is_int(f):
707  c.bead_size=int(f)
708  else:
709  print("Bead Size format for component ", c.name, ", line ", linenum, " is not correct")
710  print("The value must be a single integer. |", f, "| was given.")
711  no_error=False
712  else:
713  c.bead_size=defaults["bead_size"]
714 
715  # EM Residues Per Gaussian
716  if "em_residues_per_gaussian" in fields:
717  f=values[fields.index("em_residues_per_gaussian")]
718  if self.is_int(f):
719  if int(f) > 0:
720  c.gmm_file=self._make_path(defaults['gmm_dir'],
721  c.domain_name.strip() + ".txt")
722  c.mrc_file=self._make_path(defaults['gmm_dir'],
723  c.domain_name.strip() + ".mrc")
724  c.em_residues_per_gaussian=int(f)
725  else:
726  print("em_residues_per_gaussian format for component ", c.name, ", line ", linenum, " is not correct")
727  print("The value must be a single integer. |", f, "| was given.")
728  no_error=False
729  else:
730  c.em_residues_per_gaussian=defaults["em_residues_per_gaussian"]
731 
732  if "rmf_file" in fields:
733  f=values[fields.index("rmf_file")]
734  if f is None:
735  c.rmf_file=f
736  else:
737  if not os.path.isfile(f):
738  print("rmf_file ", c.name, "must be an existing file or None")
739  no_error=False
740  else:
741  c.rmf_file=f
742  else:
743  c.rmf_file=defaults["rmf_file"]
744 
745  if "rmf_frame_number" in fields:
746  f=values[fields.index("rmf_frame_number")]
747  if f is None:
748  c.rmf_frame_number=f
749  else:
750  if not self.is_int(f):
751  print("rmf_frame_number ", c.name, "must be an integer or None")
752  no_error=False
753  else:
754  c.rmf_file=f
755  else:
756  c.rmf_frame_number=defaults["rmf_frame_number"]
757 
758 
759  if no_error==True:
760  return c
761  else:
762  print("Fix Topology File syntax errors and rerun. Exiting...")
763  exit()
764 
765 
766  def is_int(self, s):
767  # is this string an integer?
768  try:
769  float(s)
770  return float(s).is_integer()
771  except ValueError:
772  return False
773 
774 
775  def add_default_parameter(self,line, linenum):
776  #Separates a line into a key:value pair.
777 
778  f=line.split('|')
779  if len(f) != 4:
780  print("Default value syntax not correct for ", line)
781  print("Line number", linenum," contains ", len(f)-2, " fields.")
782  print("Please reformat to |KEY|VALUE|")
783  self.defaults[f[1]]=f[2]
784 
785 
786 
787 class ComponentTopology(object):
788  '''
789  Topology class stores the components required to build a standard IMP hierarchy
790  using IMP.pmi.autobuild_model()
791  '''
792  def __init__(self):
793  self.name=None
794  self.domain_name=None
795  self.fasta_file=None
796  self.fasta_id=None
797  self.pdb_file=None
798  self.chain=None
799  self.residue_range=None
800  self.pdb_offset=None
801  self.bead_size=None
802  self.em_residues_per_gaussian=None
803  self.gmm_file=None
804  self.mrc_file=None
805  self.color=None
806  self.rmf_file_name=None
807  self.rmf_frame_number=None
808 
809  def recompute_default_dirs(self, topology):
810  pdb_filename=self.pdb_file.split("/")[-1]
811  self.pdb_filename=IMP.base.get_relative_path(topology.topology_file, topology.defaults)
static Residue setup_particle(kernel::Model *m, ParticleIndex pi, ResidueType t, int index, int insertion_code)
Definition: Residue.h:157
static State setup_particle(kernel::Model *m, ParticleIndex pi, unsigned int index)
Definition: State.h:36
Topology class stores the components required to build a standard IMP hierarchy using IMP...
Ints get_index(const kernel::ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
def get_particles_within_zone
Utility to retrieve particles from a hierarchy within a zone around a set of ps.
def build
call build on all states
def __init__
read a fasta file and extract all the requested sequences
Hierarchy create_clone(Hierarchy d)
Clone the Hierarchy.
Low level functionality (logging, error handling, profiling, command line flags etc) that is used by ...
The type of an atom.
char get_one_letter_code(ResidueType c)
Get the 1-letter amino acid code from the residue type.
This is the base class for System, _State and _Molecule classes.
A decorator for keeping track of copies of a molecule.
Definition: Copy.h:28
std::string get_relative_path(std::string base, std::string relative)
Return a path to a file relative to another file.
void transform(Hierarchy h, const algebra::Transformation3D &tr)
Transform a hierarchy. This is aware of rigid bodies.
The standard decorator for manipulating molecular structures.
static Hierarchy setup_particle(kernel::Model *m, kernel::ParticleIndex pi, kernel::ParticleIndexesAdaptor children=kernel::ParticleIndexesAdaptor())
A decorator for a particle representing an atom.
Definition: atom/Atom.h:234
The type for a residue.
def get_component_topologies
Return list of ComponentTopologies for selected components given a list of indices.
def build
Build the coordinates of the system.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
static Chain setup_particle(kernel::Model *m, ParticleIndex pi, std::string id)
Definition: Chain.h:41
Class to handle individual model particles.
static Copy setup_particle(kernel::Model *m, ParticleIndex pi, Int number)
Definition: Copy.h:40
A decorator for a residue.
Definition: Residue.h:134
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Read a pipe-delimited PMI topology file.
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
Python classes to represent, score, sample and analyze models.
A dictionary-like wrapper for reading and storing sequence data.
Functionality for loading, creating, manipulating and scoring atomic structures.
Select hierarchy particles identified by the biological name.
Definition: Selection.h:62
def get_number_of_states
returns the total number of states generated
def set_dir
Changes the default directories and renames the files for each ComponentTopology object.
def import_topology_file
Import system components from topology file.
Class for storing model, its restraints, constraints, and particles.
Definition: kernel/Model.h:73
def create_state
returns a new IMP.pmi.representation_new._State(), increment the state index