1 """@namespace IMP.pmi.topology
2 Set of python classes to create a multi-state, multi-resolution IMP hierarchy.
3 * Start by creating a System with `mdl = IMP.Model(); s = IMP.pmi.topology.System(mdl)`. The System will store all the states.
4 * Then call System.create_state(). You can easily create a multistate system by calling this function multiples times.
5 * For each State, call State.create_molecule() to add a Molecule (a uniquely named polymer). This function returns the Molecule object which can be passed to various PMI functions.
6 * Some useful functions to help you set up your Molecules:
7 * Access the sequence residues with slicing (Molecule[a:b]) or functions like Molecule.get_atomic_residues() and Molecule.get_non_atomic_residues(). These functions all return python sets for easy set arithmetic using & (and), | (or), - (difference)
8 * Molecule.add_structure() to add structural information from a PDB file.
9 * Molecule.add_representation() to create a representation unit - here you can choose bead resolutions as well as alternate representations like densities or ideal helices.
10 * Molecule.create_clone() lets you set up a molecule with identical representations, just a different chain ID. Use Molecule.create_copy() if you want a molecule with the same sequence but that allows custom representations.
11 * Once data has been added and representations chosen, call System.build() to create a canonical IMP hierarchy.
12 * Following hierarchy construction, setup rigid bodies, flexible beads, etc in IMP::pmi::dof.
13 * Check your representation with a nice printout: IMP::atom::show_with_representation()
14 See a [comprehensive example](https://integrativemodeling.org/nightly/doc/ref/pmi_2multiscale_8py-example.html) for using these classes.
16 Alternatively one can construct the entire topology and degrees of freedom via formatted text file with TopologyReader and IMP::pmi::macros::BuildSystem(). This is used in the [PMI tutorial](@ref rnapolii_stalk).
17 Note that this only allows a limited set of the full options available to PMI users (rigid bodies only, fixed resolutions).
20 from __future__
import print_function
28 from collections
import defaultdict
29 from .
import system_tools
30 from bisect
import bisect_left
31 from math
import pi,cos,sin
32 from operator
import itemgetter
34 def _build_ideal_helix(mdl, residues, coord_finder):
35 """Creates an ideal helix from the specified residue range
36 Residues MUST be contiguous.
37 This function actually adds them to the TempResidue hierarchy
40 for n, res
in enumerate(residues):
41 if res.get_has_structure():
42 raise Exception(
"You tried to build ideal_helix for a residue "
43 "that already has structure:",res)
44 if n>0
and (
not res.get_index()==prev_idx+1):
45 raise Exception(
"Passed non-contiguous segment to build_ideal_helix for",res.get_molecule())
47 rt = res.get_residue_type()
50 rp.set_name(
"Residue_%i" % res.get_index())
55 x = 2.3 * cos(n * 2 * pi / 3.6)
56 y = 2.3 * sin(n * 2 * pi / 3.6)
57 z = 6.2 / 3.6 / 2 * n * 2 * pi / 3.6
63 res.set_structure(this_res)
64 all_res.append(this_res)
65 prev_idx = res.get_index()
66 coord_finder.add_residues(all_res)
68 class _SystemBase(object):
69 """The base class for System, State and Molecule
70 classes. It contains shared functions in common to these classes
73 def __init__(self,mdl=None):
79 def _create_hierarchy(self):
80 """create a new hierarchy"""
84 def _create_child(self,parent_hierarchy):
85 """create a new hierarchy, set it as child of the input
87 child_hierarchy=self._create_hierarchy()
88 parent_hierarchy.add_child(child_hierarchy)
89 return child_hierarchy
92 """Build the coordinates of the system.
93 Loop through stored(?) hierarchies and set up coordinates!"""
98 class System(_SystemBase):
99 """This class initializes the root node of the global IMP.atom.Hierarchy."""
100 def __init__(self,mdl=None,name="System"):
101 _SystemBase.__init__(self,mdl)
102 self._number_of_states = 0
107 self.hier=self._create_hierarchy()
108 self.hier.set_name(name)
110 def get_states(self):
114 """returns a new IMP.pmi.representation_new.State(), increment the state index"""
115 self._number_of_states+=1
116 state =
State(self,self._number_of_states-1)
117 self.states.append(state)
121 return self.hier.get_name()
124 """returns the total number of states generated"""
125 return self._number_of_states
127 def get_hierarchy(self):
131 """call build on all states"""
133 for state
in self.states:
134 state.build(**kwargs)
141 """Stores a list of Molecules all with the same State index.
142 Also stores number of copies of each Molecule for easy Selection.
144 def __init__(self,system,state_index):
145 """Define a new state
146 @param system the PMI System
147 @param state_index the index of the new state
148 \note It's expected that you will not use this constructor directly,
149 but rather create it with pmi::System::create_molecule()
151 self.mdl = system.get_hierarchy().get_model()
153 self.hier = self._create_child(system.get_hierarchy())
154 self.hier.set_name(
"State_"+str(state_index))
155 self.molecules = defaultdict(list)
160 return self.system.__repr__()+
'.'+self.hier.get_name()
163 """Return a dictionary where key is molecule name and value
164 are the list of all copies of that molecule in setup order"""
165 return self.molecules
168 """Access a molecule by name and copy number
169 @param name The molecule name used during setup
170 @param copy_num The copy number based on input order.
171 Default: 0. Set to 'all' to get all copies
173 if name
not in self.molecules:
174 raise Exception(
"get_molecule() could not find molname",name)
176 return self.molecules[name]
178 if copy_num>len(self.molecules[name])-1:
179 raise Exception(
"get_molecule() copy number is too high:",copy_num)
180 return self.molecules[name][copy_num]
183 """Create a new Molecule within this State
184 @param name the name of the molecule (string) it must not
186 @param sequence sequence (string)
187 @param chain_id Chain id to assign to this molecule
190 if name
in self.molecules:
191 raise Exception(
'Cannot use a molecule name already used')
193 mol =
Molecule(self,name,sequence,chain_id,copy_num=0)
194 self.molecules[name].append(mol)
197 def get_hierarchy(self):
200 def get_number_of_copies(self,molname):
201 return len(self.molecules[molname])
203 def _register_copy(self,molecule):
204 molname = molecule.get_hierarchy().get_name()
205 if molname
not in self.molecules:
206 raise Exception(
"Trying to add a copy when the original doesn't exist!")
207 self.molecules[molname].append(molecule)
210 """call build on all molecules (automatically makes clones)"""
212 for molname
in self.molecules:
213 for mol
in reversed(self.molecules[molname]):
221 """Stores a named protein chain.
222 This class is constructed from within the State class.
223 It wraps an IMP.atom.Molecule and IMP.atom.Copy
224 Structure is read using this class
225 Resolutions and copies can be registered, but are only created when build() is called
228 def __init__(self,state,name,sequence,chain_id,copy_num,mol_to_clone=None):
229 """The user should not call this directly; instead call State::create_molecule()
230 @param state The parent PMI State
231 @param name The name of the molecule (string)
232 @param sequence Sequence (string)
233 @param chain_id The chain of this molecule
234 @param copy_num Store the copy number
235 @param mol_to_clone The original molecule (for cloning ONLY)
236 \note It's expected that you will not use this constructor directly,
237 but rather create a Molecule with pmi::State::create_molecule()
240 self.mdl = state.get_hierarchy().get_model()
242 self.sequence = sequence
244 self.mol_to_clone = mol_to_clone
245 self.representations = []
246 self.represented = IMP.pmi.tools.OrderedSet()
247 self.coord_finder = _FindCloseStructure()
248 self._ideal_helices = []
251 self.hier = self._create_child(self.state.get_hierarchy())
252 self.hier.set_name(name)
257 for ns,s
in enumerate(sequence):
259 self.residues.append(r)
262 return self.state.__repr__()+
'.'+self.
get_name()+
'.'+ \
265 def __getitem__(self,val):
266 if isinstance(val,int):
267 return self.residues[val]
268 elif isinstance(val,str):
269 return self.residues[int(val)-1]
270 elif isinstance(val,slice):
271 return IMP.pmi.tools.OrderedSet(self.residues[val])
273 print(
"ERROR: range ends must be int or str. Stride must be int.")
276 """Return the IMP Hierarchy corresponding to this Molecule"""
280 """Return this Molecule name"""
281 return self.hier.get_name()
284 """Return the State containing this Molecule"""
288 """Returns list of OrderedSets with requested ideal helices"""
289 return self._ideal_helices
292 """get residue range. Use integers to get 0-indexing, or strings to get PDB-indexing"""
293 if isinstance(a,int)
and isinstance(b,int)
and isinstance(stride,int):
294 return IMP.pmi.tools.OrderedSet(self.residues[a:b:stride])
295 elif isinstance(a,str)
and isinstance(b,str)
and isinstance(stride,int):
296 return IMP.pmi.tools.OrderedSet(self.residues[int(a)-1:int(b)-1:stride])
298 print(
"ERROR: range ends must be int or str. Stride must be int.")
301 """ Return all modeled TempResidues as a set"""
302 all_res = IMP.pmi.tools.OrderedSet(self.residues)
306 """ Return a set of TempResidues that have associated structure coordinates """
307 atomic_res = IMP.pmi.tools.OrderedSet()
308 for res
in self.residues:
309 if res.get_has_structure():
314 """ Return a set of TempResidues that don't have associated structure coordinates """
315 non_atomic_res = IMP.pmi.tools.OrderedSet()
316 for res
in self.residues:
317 if not res.get_has_structure():
318 non_atomic_res.add(res)
319 return non_atomic_res
322 """Create a new Molecule with the same name and sequence but a higher copy number.
323 Returns the Molecule. No structure or representation will be copied!
324 @param chain_id Chain ID of the new molecule
327 copy_num=self.state.get_number_of_copies(self.
get_name()))
328 self.state._register_copy(mol)
332 """Create a Molecule clone (automatically builds same structure and representation)
333 @param chain_id If you want to set the chain ID of the copy to something
334 \note You cannot add structure or representations to a clone!
337 copy_num=self.state.get_number_of_copies(self.
get_name()),
339 self.state._register_copy(mol)
343 offset=0,model_num=
None,ca_only=
False,
345 """Read a structure and store the coordinates.
346 Returns the atomic residues (as a set)
347 @param pdb_fn The file to read
348 @param chain_id Chain ID to read
349 @param res_range Add only a specific set of residues from the PDB file.
350 @param offset Apply an offset to the residue indexes of the PDB file.
351 This number is added to the PDB sequence.
352 @param model_num Read multi-model PDB and return that model
353 @param ca_only Only read the CA positions from the PDB file
354 @param soft_check If True, it only warns if there are sequence mismatches between the pdb and the Molecules sequence. Actually replaces the fasta values.
355 If False (Default), it raises and exit when there are sequence mismatches.
356 \note If you are adding structure without a FASTA file, set soft_check to True
358 if self.mol_to_clone
is not None:
359 raise Exception(
'You cannot call add_structure() for a clone')
364 rhs = system_tools.get_structure(self.mdl,pdb_fn,chain_id,res_range,offset,ca_only=ca_only)
365 self.coord_finder.add_residues(rhs)
367 if len(self.residues)==0:
368 print(
"WARNING: Extracting sequence from structure. Potentially dangerous.")
371 atomic_res = IMP.pmi.tools.OrderedSet()
372 for nrh,rh
in enumerate(rhs):
373 pdb_idx = rh.get_index()
374 raw_idx = pdb_idx - 1
377 while len(self.residues)<pdb_idx:
378 r =
TempResidue(self,
'A',len(self.residues)+1,len(self.residues))
379 self.residues.append(r)
381 internal_res = self.residues[raw_idx]
382 internal_res.set_structure(rh,soft_check)
383 atomic_res.add(internal_res)
389 bead_extra_breaks=[],
390 bead_ca_centers=
True,
391 bead_default_coord=[0,0,0],
392 density_residues_per_component=
None,
394 density_force_compute=
False,
395 density_voxel_size=1.0,
396 setup_particles_as_densities=
False,
399 """Set the representation for some residues. Some options (beads, ideal helix)
400 operate along the backbone. Others (density options) are volumetric.
401 Some of these you can combine e.g., beads+densities or helix+densities
402 See @ref pmi_resolution
403 @param residues Set of PMI TempResidues for adding the representation.
404 Can use Molecule slicing to get these, e.g. mol[a:b]+mol[c:d]
405 If None, will select all residues for this Molecule.
406 @param resolutions Resolutions for beads representations.
407 If structured, will average along backbone, breaking at sequence breaks.
408 If unstructured, will just create beads.
409 Pass an integer or list of integers
410 @param bead_extra_breaks Additional breakpoints for splitting beads.
411 The number is the first PDB-style index that belongs in the second bead
412 @param bead_ca_centers Set to True if you want the resolution=1 beads to be at CA centers
413 (otherwise will average atoms to get center). Defaults to True.
414 @param bead_default_coord Advanced feature. Normally beads are placed at the nearest structure.
415 If no structure provided (like an all bead molecule), the beads go here.
416 @param density_residues_per_component Create density (Gaussian Mixture Model)
417 for these residues. Must also supply density_prefix
418 @param density_prefix Prefix (assuming '.txt') to read components from or write to.
419 If exists, will read unless you set density_force_compute=True.
420 Will also write map (prefix+'.mrc').
421 Must also supply density_residues_per_component.
422 @param density_force_compute Set true to force overwrite density file.
423 @param density_voxel_size Advanced feature. Set larger if densities taking too long to rasterize.
424 Set to 0 if you don't want to create the MRC file
425 @param setup_particles_as_densities Set to True if you want each particle to be its own density.
426 Useful for all-atom models or flexible beads.
427 Mutually exclusive with density_ options
428 @param ideal_helix Create idealized helix structures for these residues at resolution 1.
429 Any other resolutions passed will be coarsened from there.
430 Resolution 0 will not work, you may have to use MODELLER to do that (for now).
431 @param color the color applied to the hierarchies generated.
432 Format options: tuple (r,g,b) with values 0 to 1
433 or float (from 0 to 1, a map from Blue to Green to Red)
434 or IMP.display.Color object
436 \note You cannot call add_representation multiple times for the same residues.
440 if self.mol_to_clone
is not None:
441 raise Exception(
'You cannot call add_representation() for a clone.'
442 'Maybe use a copy instead')
446 res = IMP.pmi.tools.OrderedSet(self.residues)
448 res = IMP.pmi.tools.OrderedSet(self.residues)
449 elif hasattr(residues,
'__iter__'):
451 raise Exception(
'You passed an empty set to add_representation')
452 if type(residues)
is IMP.pmi.tools.OrderedSet
and type(next(iter(residues)))
is TempResidue:
454 elif type(residues)
is set
and type(next(iter(residues)))
is TempResidue:
455 res = IMP.pmi.tools.OrderedSet(residues)
456 elif type(residues)
is list
and type(residues[0])
is TempResidue:
457 res = IMP.pmi.tools.OrderedSet(residues)
459 raise Exception(
"You passed an iteratible of something other than TempResidue",res)
461 raise Exception(
"add_representation: you must pass a set of residues or nothing(=all residues)")
464 ov = res & self.represented
467 self.represented|=res
470 if not hasattr(resolutions,
'__iter__'):
471 if type(resolutions)
is int:
472 resolutions = [resolutions]
474 raise Exception(
"you tried to pass resolutions that are not int or list-of-int")
475 if len(resolutions)>1
and not ideal_helix:
477 if not r.get_has_structure():
478 raise Exception(
'You are creating multiple resolutions for '
479 'unstructured regions. This will have unexpected results.')
482 if density_residues_per_component
or density_prefix:
483 if not density_residues_per_component
and density_prefix:
484 raise Exception(
'If requesting density, must provide '
485 'density_residues_per_component AND density_prefix')
486 if density_residues_per_component
and setup_particles_as_densities:
487 raise Exception(
'Cannot create both volumetric density '
488 '(density_residues_per_component) AND '
489 'individual densities (setup_particles_as_densities) '
490 'in the same representation')
491 if len(resolutions)>1
and setup_particles_as_densities:
492 raise Exception(
'You have multiple bead resolutions but are attempting to '
493 'set them all up as individual Densities. '
494 'This could have unexpected results.')
499 raise Exception(
"For ideal helices, cannot build resolution 0: "
500 "you have to do that in MODELLER")
501 if 1
not in resolutions:
502 resolutions = [1] + list(resolutions)
503 self._ideal_helices.append(res)
507 if r.get_molecule()!=self:
508 raise Exception(
'You are adding residues from a different molecule to',self.__repr__())
511 self.representations.append(_Representation(res,
516 density_residues_per_component,
518 density_force_compute,
520 setup_particles_as_densities,
525 """Create all parts of the IMP hierarchy
526 including Atoms, Residues, and Fragments/Representations and, finally, Copies
527 Will only build requested representations.
528 /note Any residues assigned a resolution must have an IMP.atom.Residue hierarchy
529 containing at least a CAlpha. For missing residues, these can be constructed
534 if self.mol_to_clone
is not None:
535 for nr,r
in enumerate(self.mol_to_clone.residues):
536 if r.get_has_structure():
537 clone = IMP.atom.create_clone(r.get_hierarchy())
538 self.residues[nr].set_structure(
540 for old_rep
in self.mol_to_clone.representations:
541 new_res = IMP.pmi.tools.OrderedSet()
542 for r
in old_rep.residues:
543 new_res.add(self.residues[r.get_internal_index()])
544 self.represented.add(self.residues[r.get_internal_index()])
545 new_rep = _Representation(new_res,
546 old_rep.bead_resolutions,
547 old_rep.bead_extra_breaks,
548 old_rep.bead_ca_centers,
549 old_rep.bead_default_coord,
550 old_rep.density_residues_per_component,
551 old_rep.density_prefix,
552 old_rep.density_voxel_size,
554 old_rep.setup_particles_as_densities,
557 self.representations.append(new_rep)
558 self.coord_finder = self.mol_to_clone.coord_finder
561 no_rep = [r
for r
in self.residues
if r
not in self.represented]
563 print(
'WARNING: Residues without representation in molecule',
564 self.
get_name(),
':',system_tools.resnums2str(no_rep))
567 for rep
in self.representations:
569 _build_ideal_helix(self.mdl,rep.residues,self.coord_finder)
572 for rep
in self.representations:
573 system_tools.build_representation(self.hier,rep,self.coord_finder)
576 for res
in self.residues:
577 idx = res.get_index()
582 residue_index=res.get_index(),
583 resolution=1).get_selected_particles()
600 """Helpful utility for getting particles at all resolutions from this molecule.
601 Can optionally pass a set of residue indexes"""
603 raise Exception(
"Cannot get all resolutions until you build the Molecule")
604 if residue_indexes
is None:
605 residue_indexes = [r.get_index()
for r
in self.
get_residues()]
607 residue_indexes=residue_indexes)
612 class _Representation(object):
613 """Private class just to store a representation request"""
620 density_residues_per_component,
622 density_force_compute,
624 setup_particles_as_densities,
627 self.residues = residues
628 self.bead_resolutions = bead_resolutions
629 self.bead_extra_breaks = bead_extra_breaks
630 self.bead_ca_centers = bead_ca_centers
631 self.bead_default_coord = bead_default_coord
632 self.density_residues_per_component = density_residues_per_component
633 self.density_prefix = density_prefix
634 self.density_force_compute = density_force_compute
635 self.density_voxel_size = density_voxel_size
636 self.setup_particles_as_densities = setup_particles_as_densities
637 self.ideal_helix = ideal_helix
640 class _FindCloseStructure(object):
641 """Utility to get the nearest observed coordinate"""
644 def add_residues(self,residues):
649 self.coords.append([idx,xyz])
650 self.coords.sort(key=itemgetter(0))
651 def find_nearest_coord(self,query):
654 keys = [r[0]
for r
in self.coords]
655 pos = bisect_left(keys,query)
658 elif pos == len(self.coords):
659 ret = self.coords[-1]
661 before = self.coords[pos - 1]
662 after = self.coords[pos]
663 if after[0] - query < query - before[0]:
670 """A dictionary-like wrapper for reading and storing sequence data"""
671 def __init__(self,fasta_fn,name_map=None):
672 """read a fasta file and extract all the requested sequences
673 @param fasta_fn sequence file
674 @param name_map dictionary mapping the fasta name to final stored name
676 self.sequences = IMP.pmi.tools.OrderedDict()
677 self.read_sequences(fasta_fn,name_map)
679 return len(self.sequences)
680 def __contains__(self,x):
681 return x
in self.sequences
682 def __getitem__(self,key):
685 allseqs = list(self.sequences.keys())
686 return self.sequences[allseqs[key]]
688 raise Exception(
"You tried to access sequence num",key,
"but there's only",len(self.sequences.keys()))
690 return self.sequences[key]
692 return self.sequences.__iter__()
695 for s
in self.sequences:
696 ret +=
'%s\t%s\n'%(s,self.sequences[s])
698 def read_sequences(self,fasta_fn,name_map=None):
701 with open(fasta_fn,
'r') as fh:
702 for (num, line)
in enumerate(fh):
703 if line.startswith(
'>'):
705 self.sequences[code] = seq.strip(
'*')
706 code = line.rstrip()[1:]
707 if name_map
is not None:
709 code = name_map[code]
718 "Found FASTA sequence before first header at line %d: %s" % (num + 1, line))
721 self.sequences[code] = seq.strip(
'*')
727 """Temporarily stores residue information, even without structure available."""
729 def __init__(self,molecule,code,index,internal_index):
730 """setup a TempResidue
731 @param molecule PMI Molecule to which this residue belongs
732 @param code one-letter residue type code
733 @param index PDB index
734 @param internal_index The number in the sequence
736 self.molecule = molecule
737 self.rtype = IMP.pmi.tools.get_residue_type_from_one_letter_code(code)
741 self.pdb_index = index
742 self.internal_index = internal_index
743 self._structured =
False
745 return self.get_code()+str(self.get_index())
747 return self.__str__()
749 return (self.molecule,self.hier)
750 def __eq__(self,other):
751 return type(other)==type(self)
and self.__key() == other.__key()
753 return hash(self.__key())
755 return self.pdb_index
756 def get_internal_index(self):
757 return self.internal_index
760 def get_residue_type(self):
762 def get_hierarchy(self):
764 def get_molecule(self):
766 def get_has_structure(self):
767 return self._structured
768 def set_structure(self,res,soft_check=False):
769 if res.get_residue_type()!=self.get_residue_type():
771 print(
'WARNING: Replacing sequence residue',self.get_index(),self.hier.get_residue_type(),
772 'with PDB type',res.get_residue_type())
773 self.hier.set_residue_type((res.get_residue_type()))
774 self.rtype = res.get_residue_type()
776 raise Exception(
'ERROR: PDB residue index',self.get_index(),
'is',
778 'and sequence residue is',self.get_code())
780 for a
in res.get_children():
781 self.hier.add_child(a)
783 a.get_particle().set_name(
'Atom %s of residue %i'%(atype.__str__().strip(
'"'),
784 self.hier.get_index()))
785 self._structured =
True
788 """Automatically setup Sytem and Degrees of Freedom with a formatted text file.
789 The topology file should be in a simple pipe-delimited format:
791 |component_name|domain_name|fasta_fn|fasta_id|pdb_fn|chain|residue_range|pdb_offset|bead_size|em_residues_per_gaussian|rigid_body|super_rigid_body|chain_of_super_rigid_bodies|
792 |Rpb1 |Rpb1_1|1WCM.fasta|1WCM:A|1WCM.pdb|A|1,1140 |0|10|0|1|1,3|1|
793 |Rpb1 |Rpb1_2|1WCM.fasta|1WCM:A|1WCM.pdb|A|1141,1274|0|10|0|2|1,3|1|
794 |Rpb1 |Rpb1_3|1WCM.fasta|1WCM:A|1WCM.pdb|A|1275,-1 |0|10|0|3|1,3|1|
795 |Rpb2 |Rpb2 |1WCM.fasta|1WCM:B|1WCM.pdb|B|all |0|10|0|4|2,3|2|
798 All filenames are relative to the paths specified in the constructor.
799 These are the fields you can enter:
800 - `component_name`: Name of the component (chain). Serves as the parent
801 hierarchy for this structure.
802 - `domain_name`: Allows subdivision of chains into individual domains.
803 A model consists of a number of individual units, referred to as
804 domains. Each domain can be an individual chain, or a subset of a
805 chain, and these domains are used to set rigid body movers. A chain
806 may be separated into multiple domains if the user wishes different
807 sections to move independently, and/or analyze the portions separately.
808 - `fasta_fn`: Name of FASTA file containing this component.
809 - `fasta_id`: String found in FASTA sequence header line.
810 - `pdb_fn`: Name of PDB file with coordinates (if available).
811 If left empty, will set up as BEADS (you can also specify "BEADS")
812 Can also write "IDEAL_HELIX".
813 - `chain`: Chain ID of this domain in the PDB file.
814 - `residue_range`: Comma delimited pair defining range.
815 Can leave empty or use 'all' for entire sequence from PDB file.
816 - `pdb_offset`: Offset to sync PDB residue numbering with FASTA numbering.
817 - `bead_size`: The size (in residues) of beads used to model areas not
818 covered by PDB coordinates. These will be automatically built.
819 - `em_residues`: The number of Gaussians used to model the electron
820 density of this domain. Set to zero if no EM fitting will be done.
821 The GMM files will be written to <gmm_dir>/<component_name>_<em_res>.txt (and .mrc)
822 - `rigid_body`: Number corresponding to the rigid body containing this object.
823 The number itself is used for grouping things.
824 - `super_rigid_body`: Like a rigid_body, except things are only occasionally rigid
825 - `chain_of_super_rigid_bodies` For a polymer, create SRBs from groups.
827 The file is read in and each part of the topology is stored as a
828 ComponentTopology object for input into IMP::pmi::macros::BuildModel.
837 @param topology_file Pipe-delimited file specifying the topology
838 @param resolutions What resolutions to build for ALL structured components
839 @param pdb_dir Relative path to the pdb directory
840 @param fasta_dir Relative path to the fasta directory
841 @param gmm_dir Relative path to the GMM directory
843 self.topology_file = topology_file
844 self.component_list = []
845 self.unique_molecules = {}
846 self.resolutions = resolutions
847 self.pdb_dir = pdb_dir
848 self.fasta_dir = fasta_dir
849 self.gmm_dir = gmm_dir
852 def write_topology_file(self,outfile):
853 with open(outfile,
"w")
as f:
854 f.write(
"|component_name|domain_name|fasta_fn|fasta_id|pdb_fn|chain|residue_range|pdb_offset|bead_size|em_residues_per_gaussian|rigid_body|super_rigid_body|chain_of_super_rigid_bodies|\n")
855 for c
in self.component_list:
856 output = c.get_str()+
'\n'
861 """ Return list of ComponentTopologies for selected components
862 @param topology_list List of indices to return"""
863 if topology_list ==
"all":
864 topologies = self.component_list
867 for i
in topology_list:
868 topologies.append(self.component_list[i])
871 def get_unique_molecules(self):
872 return self.unique_molecules
875 """Read system components from topology file. append=False will erase
876 current topology and overwrite with new
882 self.component_list=[]
883 with open(topology_file)
as infile:
885 if line.lstrip()==
"" or line[0]==
"#":
887 elif line.split(
'|')[1]
in (
"topology_dictionary",
"component_name"):
890 elif line.split(
'|')[1]==
"directories":
892 print(
"WARNING: You no longer need to set directories in the topology file. "
893 "Please do so through the TopologyReader constructor. "
894 "Note that new-style paths are relative to the current working directory, "
895 "not the topology file")
898 new_component = self._create_component_topology(line, linenum)
899 self.component_list.append(new_component)
902 fields = line.split(
'|')
907 return self.component_list
909 def _create_component_topology(self, component_line, linenum, color="0.1"):
910 """Parse a line of topology values and matches them to their key.
911 Checks each value for correct syntax
912 Returns a list of ComponentTopology objects
917 values = [s.strip()
for s
in component_line.split(
'|')]
922 c.domain_name = values[2]
923 c._orig_fasta_file = values[3]
924 c.fasta_file = os.path.join(self.fasta_dir,values[3])
925 c.fasta_id = values[4]
926 c._orig_pdb_input = values[5]
927 pdb_input = values[5]
928 if pdb_input==
"None" or pdb_input==
"":
930 elif pdb_input==
"IDEAL_HELIX":
931 c.pdb_file =
"IDEAL_HELIX"
932 elif pdb_input==
"BEADS":
935 c.pdb_file = os.path.join(self.pdb_dir,pdb_input)
939 if len(t_chain)==1
or len(t_chain)==2:
942 errors.append(
"PDB Chain identifier must be one or two characters.")
943 errors.append(
"For component %s line %d is not correct |%s| was given." % (c.name,linenum,t_chain))
946 if c.name
not in self.unique_molecules:
947 self.unique_molecules[c.name] = [c]
949 if (c.fasta_file!=self.unique_molecules[c.name][0].fasta_file
or \
950 c.fasta_id!=self.unique_molecules[c.name][0].fasta_id
or \
951 c.chain!=self.unique_molecules[c.name][0].chain):
952 errors.append(
"All domains with the same component name must have the same sequence. %s doesn't match %s"%(c.domain_name,c.name))
953 self.unique_molecules[c.name].append(c)
958 if f.strip()==
'all' or str(f)==
"":
959 c.residue_range =
None
960 elif len(f.split(
','))==2
and self._is_int(f.split(
',')[0])
and self._is_int(f.split(
',')[1]):
962 c.residue_range = (int(f.split(
',')[0]), int(f.split(
',')[1]))
964 errors.append(
"Residue Range format for component %s line %d is not correct" % (c.name, linenum))
965 errors.append(
"Correct syntax is two comma separated integers: |start_res, end_res|. |%s| was given." % f)
966 errors.append(
"To select all residues, indicate |\"all\"|")
975 errors.append(
"PDB Offset format for component %s line %d is not correct" % (c.name, linenum))
976 errors.append(
"The value must be a single integer. |%s| was given." % f)
985 errors.append(
"Bead Size format for component %s line %d is not correct" % (c.name, linenum))
986 errors.append(
"The value must be a single integer. |%s| was given." % f)
992 c.density_prefix = os.path.join(self.gmm_dir,c.domain_name)
993 c.gmm_file = c.density_prefix+
'.txt'
994 c.mrc_file = c.density_prefix+
'.gmm'
996 c.em_residues_per_gaussian=int(f)
998 c.em_residues_per_gaussian = 0
1000 c.em_residues_per_gaussian = 0
1002 errors.append(
"em_residues_per_gaussian format for component "
1003 "%s line %d is not correct" % (c.name, linenum))
1004 errors.append(
"The value must be a single integer. |%s| was given." % f)
1011 if not self._is_int(f):
1012 errors.append(
"rigid bodies format for component "
1013 "%s line %d is not correct" % (c.name, linenum))
1014 errors.append(
"Each RB must be a single integer. |%s| was given." % f)
1022 if not self._is_int(i):
1023 errors.append(
"super rigid bodies format for component "
1024 "%s line %d is not correct" % (c.name, linenum))
1025 errors.append(
"Each SRB must be a single integer. |%s| was given." % f)
1026 c.super_rigid_bodies = f
1031 if not self._is_int(f):
1032 errors.append(
"em_residues_per_gaussian format for component "
1033 "%s line %d is not correct" % (c.name, linenum))
1034 errors.append(
"Each CSRB must be a single integer. |%s| was given." % f)
1035 c.chain_of_super_rigid_bodies = f
1039 raise ValueError(
"Fix Topology File syntax errors and rerun: " \
1040 +
"\n".join(errors))
1046 """Change the GMM dir"""
1047 self.gmm_dir = gmm_dir
1048 for c
in self.component_list:
1049 c.gmm_file = os.path.join(self.gmm_dir,c.domain_name+
".txt")
1050 c.mrc_file = os.path.join(self.gmm_dir,c.domain_name+
".mrc")
1051 print(
'new gmm',c.gmm_file)
1054 """Change the PDB dir"""
1055 self.pdb_dir = pdb_dir
1056 for c
in self.component_list:
1057 if not c._orig_pdb_input
in (
"",
"None",
"IDEAL_HELIX",
"BEADS"):
1058 c.pdb_file = os.path.join(self.pdb_dir,c._orig_pdb_input)
1061 """Change the FASTA dir"""
1062 self.fasta_dir = fasta_dir
1063 for c
in self.component_list:
1064 c.fasta_file = os.path.join(self.fasta_dir,c._orig_fasta_file)
1067 """DEPRECATED: This old function sets things relative to topology file"""
1068 print(
"WARNING: set_dir() is deprecated, use set_gmm_dir, set_pdb_dir, or set_fasta_dir. "
1069 "Paths in the TopologyReader constructor or in those functions are relative "
1070 "to the current working directory, not the topology file.")
1071 if default_dir==
"gmm_dir":
1073 elif default_dir==
"pdb_dir":
1075 elif default_dir==
"fasta_dir":
1076 self.
set_fasta_dir(nIMP.get_relative_path(self.topology_file,new_dir))
1078 raise Exception(default_dir,
"is not a correct directory key")
1080 def _is_int(self, s):
1084 return float(s).is_integer()
1089 """Return list of lists of rigid bodies (as domain name)"""
1090 rbl = defaultdict(list)
1091 for c
in self.component_list:
1092 for rbnum
in c.rigid_bodies:
1093 rbl[rbnum].append(c.domain_name)
1097 """Return list of lists of super rigid bodies (as domain name)"""
1098 rbl = defaultdict(list)
1099 for c
in self.component_list:
1100 for rbnum
in c.super_rigid_bodies:
1101 rbl[rbnum].append(c.domain_name)
1105 """Return list of lists of chains of super rigid bodies (as domain name)"""
1106 rbl = defaultdict(list)
1107 for c
in self.component_list:
1108 for rbnum
in c.chain_of_super_rigid_bodies:
1109 rbl[rbnum].append(c.domain_name)
1113 """Stores the components required to build a standard IMP hierarchy
1114 using IMP.pmi.BuildModel()
1118 self.num_clones =
None
1119 self.domain_name =
None
1120 self.fasta_file =
None
1121 self._orig_fasta_file =
None
1122 self.fasta_id =
None
1123 self.pdb_file =
None
1124 self._orig_pdb_input =
None
1126 self.residue_range =
None
1129 self.em_residues_per_gaussian = 0
1132 self.density_prefix =
''
1134 self.rigid_bodies = []
1135 self.super_rigid_bodies = []
1136 self.chain_of_super_rigid_bodies = []
1138 l = str(l).strip(
'[').strip(
']')
1141 return self.get_str()
1143 res_range = self.residue_range
1144 if self.residue_range
is None:
1146 return '|'+
'|'.join([self.name,self.domain_name,self._orig_fasta_file,self.fasta_id,
1147 self._orig_pdb_input,self.chain,self._l2s(list(res_range)),str(self.pdb_offset),
1148 str(self.bead_size),str(self.em_residues_per_gaussian),self._l2s(self.rigid_bodies),
1149 self._l2s(self.super_rigid_bodies),self._l2s(self.chain_of_super_rigid_bodies)])+
'|'
Stores the components required to build a standard IMP hierarchy using IMP.pmi.BuildModel() ...
def __init__
setup a TempResidue
def build
call build on all molecules (automatically makes clones)
def set_pdb_dir
Change the PDB dir.
def get_atomic_residues
Return a set of TempResidues that have associated structure coordinates.
def get_residues
Return all modeled TempResidues as a set.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
static Atom setup_particle(Model *m, ParticleIndex pi, Atom other)
def build
call build on all states
def __init__
read a fasta file and extract all the requested sequences
static XYZR setup_particle(Model *m, ParticleIndex pi)
def get_ideal_helices
Returns list of OrderedSets with requested ideal helices.
def get_chains_of_super_rigid_bodies
Return list of lists of chains of super rigid bodies (as domain name)
def __init__
The user should not call this directly; instead call State::create_molecule()
def set_gmm_dir
Change the GMM dir.
def residue_range
get residue range.
def get_molecule
Access a molecule by name and copy number.
def __init__
Define a new state.
def add_representation
Set the representation for some residues.
static State setup_particle(Model *m, ParticleIndex pi, unsigned int index)
def create_molecule
Create a new Molecule within this State.
def build
Create all parts of the IMP hierarchy including Atoms, Residues, and Fragments/Representations and...
static Residue setup_particle(Model *m, ParticleIndex pi, ResidueType t, int index, int insertion_code)
char get_one_letter_code(ResidueType c)
Get the 1-letter amino acid code from the residue type.
def get_non_atomic_residues
Return a set of TempResidues that don't have associated structure coordinates.
def get_name
Return this Molecule name.
def get_hierarchy
Return the IMP Hierarchy corresponding to this Molecule.
Class for storing model, its restraints, constraints, and particles.
Stores a named protein chain.
A decorator for keeping track of copies of a molecule.
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def set_fasta_dir
Change the FASTA dir.
The standard decorator for manipulating molecular structures.
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
A decorator for a particle representing an atom.
def get_component_topologies
Return list of ComponentTopologies for selected components.
std::string get_relative_path(std::string base, std::string relative)
Return a path to a file relative to another file.
A decorator for a particle with x,y,z coordinates.
def create_clone
Create a Molecule clone (automatically builds same structure and representation)
def add_structure
Read a structure and store the coordinates.
def get_molecules
Return a dictionary where key is molecule name and value are the list of all copies of that molecule ...
static Copy setup_particle(Model *m, ParticleIndex pi, Int number)
def get_state
Return the State containing this Molecule.
A decorator for a residue.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Automatically setup Sytem and Degrees of Freedom with a formatted text file.
The general base class for IMP exceptions.
def get_rigid_bodies
Return list of lists of rigid bodies (as domain name)
Residue get_residue(Atom d, bool nothrow=false)
Return the Residue containing this atom.
Class to handle individual model particles.
Stores a list of Molecules all with the same State index.
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
Python classes to represent, score, sample and analyze models.
A dictionary-like wrapper for reading and storing sequence data.
def create_copy
Create a new Molecule with the same name and sequence but a higher copy number.
Functionality for loading, creating, manipulating and scoring atomic structures.
def get_particles_at_all_resolutions
Helpful utility for getting particles at all resolutions from this molecule.
static Chain setup_particle(Model *m, ParticleIndex pi, std::string id)
Select hierarchy particles identified by the biological name.
def get_number_of_states
returns the total number of states generated
def get_super_rigid_bodies
Return list of lists of super rigid bodies (as domain name)
def set_dir
DEPRECATED: This old function sets things relative to topology file.
def import_topology_file
Read system components from topology file.
Temporarily stores residue information, even without structure available.
def create_state
returns a new IMP.pmi.representation_new.State(), increment the state index