1 """@namespace IMP.pmi.topology
2 Set of Python classes to create a multi-state, multi-resolution IMP hierarchy.
3 * Start by creating a System with `model = IMP.Model(); s = IMP.pmi.topology.System(model)`. The System will store all the states.
4 * Then call System.create_state(). You can easily create a multistate system by calling this function multiple times.
5 * For each State, call State.create_molecule() to add a Molecule (a uniquely named polymer). This function returns the Molecule object which can be passed to various PMI functions.
6 * Some useful functions to help you set up your Molecules:
7 * Access the sequence residues with slicing (Molecule[a:b]) or functions like Molecule.get_atomic_residues() and Molecule.get_non_atomic_residues(). These functions all return Python sets for easy set arithmetic using & (and), | (or), - (difference)
8 * Molecule.add_structure() to add structural information from a PDB file.
9 * Molecule.add_representation() to create a representation unit - here you can choose bead resolutions as well as alternate representations like densities or ideal helices.
10 * Molecule.create_clone() lets you set up a molecule with identical representations, just a different chain ID. Use Molecule.create_copy() if you want a molecule with the same sequence but that allows custom representations.
11 * Once data has been added and representations chosen, call System.build() to create a canonical IMP hierarchy.
12 * Following hierarchy construction, setup rigid bodies, flexible beads, etc in IMP::pmi::dof.
13 * Check your representation with a nice printout: IMP::atom::show_with_representation()
14 See a [comprehensive example](https://integrativemodeling.org/nightly/doc/ref/pmi_2multiscale_8py-example.html) for using these classes.
16 Alternatively one can construct the entire topology and degrees of freedom via formatted text file with TopologyReader and IMP::pmi::macros::BuildSystem(). This is used in the [PMI tutorial](@ref rnapolii_stalk).
17 Note that this only allows a limited set of the full options available to PMI users (rigid bodies only, fixed resolutions).
20 from __future__
import print_function
29 from collections
import defaultdict, namedtuple
30 from .
import system_tools
31 from bisect
import bisect_left
32 from math
import pi,cos,sin
33 from operator
import itemgetter
37 def _build_ideal_helix(model, residues, coord_finder):
38 """Creates an ideal helix from the specified residue range
39 Residues MUST be contiguous.
40 This function actually adds them to the TempResidue hierarchy
45 for n, tempres
in enumerate(residues):
46 if tempres.get_has_structure():
47 raise ValueError(
"You tried to build ideal_helix for a residue "
48 "that already has structure: %s" % tempres)
49 if n > 0
and tempres.get_index() != prev_idx + 1:
50 raise ValueError(
"Passed non-contiguous segment to "
51 "build_ideal_helix for %s" % tempres.get_molecule())
55 rp.set_name(
"Residue_%i" % tempres.get_index())
63 x = 2.3 * cos(n * 2 * pi / 3.6)
64 y = 2.3 * sin(n * 2 * pi / 3.6)
65 z = 6.2 / 3.6 / 2 * n * 2 * pi / 3.6
73 tempres.set_structure(this_res)
74 created_hiers.append(this_res)
75 prev_idx = tempres.get_index()
76 coord_finder.add_residues(created_hiers)
78 class _SystemBase(object):
79 """The base class for System, State and Molecule
80 classes. It contains shared functions in common to these classes
83 def __init__(self,model=None):
89 def _create_hierarchy(self):
90 """create a new hierarchy"""
94 def _create_child(self,parent_hierarchy):
95 """create a new hierarchy, set it as child of the input
97 child_hierarchy=self._create_hierarchy()
98 parent_hierarchy.add_child(child_hierarchy)
99 return child_hierarchy
102 """Build the coordinates of the system.
103 Loop through stored(?) hierarchies and set up coordinates!"""
108 class System(_SystemBase):
109 """This class initializes the root node of the global IMP.atom.Hierarchy."""
113 def __init__(self,model=None,name="System"):
114 _SystemBase.__init__(self,model)
115 self._number_of_states = 0
116 self._protocol_output = []
120 System._all_systems.add(weakref.ref(self))
123 self.hier=self._create_hierarchy()
124 self.hier.set_name(name)
125 self.hier._pmi2_system = weakref.ref(self)
128 System._all_systems = set(x
for x
in System._all_systems
129 if x()
not in (
None, self))
131 def get_states(self):
135 """Makes and returns a new IMP.pmi.topology.State in this system"""
136 self._number_of_states+=1
137 state =
State(self,self._number_of_states-1)
138 self.states.append(state)
142 return self.hier.get_name()
145 """Returns the total number of states generated"""
146 return self._number_of_states
148 def get_hierarchy(self):
152 """Build all states"""
154 for state
in self.states:
155 state.build(**kwargs)
160 """Capture details of the modeling protocol.
161 @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
163 self._protocol_output.append(p)
166 for state
in self.states:
167 state._add_protocol_output(p, self)
173 """Stores a list of Molecules all with the same State index.
174 Also stores number of copies of each Molecule for easy Selection.
176 def __init__(self,system,state_index):
177 """Define a new state
178 @param system the PMI System
179 @param state_index the index of the new state
180 @note It's expected that you will not use this constructor directly,
181 but rather create it with pmi::System::create_state()
183 self.model = system.get_hierarchy().get_model()
185 self.hier = self._create_child(system.get_hierarchy())
186 self.short_name = self.long_name =
"State_"+str(state_index)
187 self.hier.set_name(self.short_name)
188 self.molecules = IMP.pmi.tools.OrderedDict()
191 self._protocol_output = []
192 for p
in system._protocol_output:
193 self._add_protocol_output(p, system)
196 return self.system.__repr__()+
'.'+self.hier.get_name()
198 def _add_protocol_output(self, p, system):
199 state = p._add_state(self)
200 self._protocol_output.append((p, state))
201 state.model = system.model
202 state.prot = self.hier
205 """Return a dictionary where key is molecule name and value
206 are the list of all copies of that molecule in setup order"""
207 return self.molecules
210 """Access a molecule by name and copy number
211 @param name The molecule name used during setup
212 @param copy_num The copy number based on input order.
213 Default: 0. Set to 'all' to get all copies
215 if name
not in self.molecules:
216 raise KeyError(
"Could not find molname %s" % name)
217 if copy_num ==
'all':
218 return self.molecules[name]
220 return self.molecules[name][copy_num]
223 alphabet=IMP.pmi.alphabets.amino_acid, is_nucleic=
None):
224 """Create a new Molecule within this State
225 @param name the name of the molecule (string);
226 it must not be already used
227 @param sequence sequence (string)
228 @param chain_id Chain ID to assign to this molecule
229 @param alphabet Mapping from FASTA codes to residue types
232 if name
in self.molecules:
233 raise ValueError(
'Cannot use a molecule name already used')
237 "is_nucleic is deprecated. Use alphabet instead.")
238 alphabet = IMP.pmi.alphabets.rna
240 mol =
Molecule(self, name, sequence, chain_id, copy_num=0,
242 self.molecules[name] = [mol]
245 def get_hierarchy(self):
248 def get_number_of_copies(self,molname):
249 return len(self.molecules[molname])
251 def _register_copy(self,molecule):
252 molname = molecule.get_hierarchy().get_name()
253 self.molecules[molname].append(molecule)
256 """call build on all molecules (automatically makes clones)"""
258 for molname
in self.molecules:
259 for mol
in reversed(self.molecules[molname]):
266 _PDBElement = namedtuple(
'PDBElement', [
'offset',
'filename',
'chain_id'])
268 class _RepresentationHandler(object):
269 """Handle PMI representation and use it to populate that of any attached
270 ProtocolOutput objects"""
271 def __init__(self, name, pos, pdb_elements):
274 self.last_index =
None
275 self.last_pdb_index =
None
276 self.pdb_for_residue = {}
277 for residues, pdb
in pdb_elements:
279 self.pdb_for_residue[r.get_index()] = pdb
281 def _get_pdb(self, h):
282 """Return a PDBElement if the given hierarchy was read from a
286 return self.pdb_for_residue.get(rind,
None)
288 def __call__(self, res):
289 """Handle a single residue"""
290 if len(self.pos) == 0:
293 pi = h.get_particle_index()
295 if self.last_index
is None or pi != self.last_index:
296 pdb = self._get_pdb(h)
301 fragi = frag.get_particle_index()
303 if self.last_pdb_index
is not None \
304 and self.last_pdb_index == fragi:
306 self.last_pdb_index = fragi
307 indices = frag.get_residue_indexes()
308 for p, state
in self.pos:
309 p.add_pdb_element(state, self.name,
310 indices[0], indices[-1], pdb.offset,
311 pdb.filename, pdb.chain_id, frag)
314 indices = frag.get_residue_indexes()
315 for p, state
in self.pos:
316 p.add_bead_element(state, self.name,
317 indices[0], indices[-1], 1, h)
320 for p, state
in self.pos:
321 p.add_bead_element(state, self.name, resind, resind, 1, h)
323 raise TypeError(
"Unhandled hierarchy %s" % str(h))
329 """Stores a named protein chain.
330 This class is constructed from within the State class.
331 It wraps an IMP.atom.Molecule and IMP.atom.Copy.
332 Structure is read using this class.
333 Resolutions and copies can be registered, but are only created
334 when build() is called
337 def __init__(self, state, name, sequence, chain_id, copy_num,
338 mol_to_clone=
None, alphabet=IMP.pmi.alphabets.amino_acid):
339 """The user should not call this directly; instead call State::create_molecule()
340 @param state The parent PMI State
341 @param name The name of the molecule (string)
342 @param sequence Sequence (string)
343 @param chain_id The chain of this molecule
344 @param copy_num Store the copy number
345 @param mol_to_clone The original molecule (for cloning ONLY)
346 @note It's expected that you will not use this constructor directly,
347 but rather create a Molecule with pmi::State::create_molecule()
350 self.model = state.get_hierarchy().get_model()
352 self.sequence = sequence
354 self.mol_to_clone = mol_to_clone
355 self.alphabet = alphabet
356 self.representations = []
357 self._pdb_elements = []
358 self._represented = IMP.pmi.tools.OrderedSet()
359 self.coord_finder = _FindCloseStructure()
360 self._ideal_helices = []
363 self.hier = self._create_child(self.state.get_hierarchy())
364 self.hier.set_name(name)
366 self._name_with_copy =
"%s.%d" % (name, copy_num)
369 self.chain.set_sequence(self.sequence)
372 for ns,s
in enumerate(sequence):
374 self.residues.append(r)
377 return self.state.__repr__()+
'.'+self.
get_name()+
'.'+ \
380 def __getitem__(self,val):
381 if isinstance(val,int):
382 return self.residues[val]
383 elif isinstance(val,str):
384 return self.residues[int(val)-1]
385 elif isinstance(val,slice):
386 return IMP.pmi.tools.OrderedSet(self.residues[val])
388 raise TypeError(
"Indexes must be int or str")
391 """Return the IMP Hierarchy corresponding to this Molecule"""
395 """Return this Molecule name"""
396 return self.hier.get_name()
399 """Return the State containing this Molecule"""
403 """Returns list of OrderedSets with requested ideal helices"""
404 return self._ideal_helices
407 """get residue range from a to b, inclusive.
408 Use integers to get 0-indexing, or strings to get PDB-indexing"""
409 if isinstance(a,int)
and isinstance(b,int)
and isinstance(stride,int):
410 return IMP.pmi.tools.OrderedSet(self.residues[a:b+1:stride])
411 elif isinstance(a,str)
and isinstance(b,str)
and isinstance(stride,int):
412 return IMP.pmi.tools.OrderedSet(self.residues[int(a)-1:int(b):stride])
414 raise TypeError(
"Range ends must be int or str. "
415 "Stride must be int.")
418 """ Return all modeled TempResidues as a set"""
419 all_res = IMP.pmi.tools.OrderedSet(self.residues)
423 """ Return set of TempResidues that have representation"""
424 return self._represented
427 """ Return a set of TempResidues that have associated structure coordinates """
428 atomic_res = IMP.pmi.tools.OrderedSet()
429 for res
in self.residues:
430 if res.get_has_structure():
435 """ Return a set of TempResidues that don't have associated structure coordinates """
436 non_atomic_res = IMP.pmi.tools.OrderedSet()
437 for res
in self.residues:
438 if not res.get_has_structure():
439 non_atomic_res.add(res)
440 return non_atomic_res
443 """Create a new Molecule with the same name and sequence but a higher copy number.
444 Returns the Molecule. No structure or representation will be copied!
445 @param chain_id Chain ID of the new molecule
448 copy_num=self.state.get_number_of_copies(self.
get_name()))
449 self.state._register_copy(mol)
453 """Create a Molecule clone (automatically builds same structure and representation)
454 @param chain_id If you want to set the chain ID of the copy to something
455 @note You cannot add structure or representations to a clone!
458 copy_num=self.state.get_number_of_copies(self.
get_name()),
460 self.state._register_copy(mol)
464 offset=0,model_num=
None,ca_only=
False,
466 """Read a structure and store the coordinates.
467 @return the atomic residues (as a set)
468 @param pdb_fn The file to read
469 @param chain_id Chain ID to read
470 @param res_range Add only a specific set of residues from the PDB file.
471 res_range[0] is the starting and res_range[1] is the
472 ending residue index.
473 @param offset Apply an offset to the residue indexes of the PDB
474 file. This number is added to the PDB sequence.
475 @param model_num Read multi-model PDB and return that model
476 @param ca_only Only read the CA positions from the PDB file
477 @param soft_check If True, it only warns if there are sequence
478 mismatches between the PDB and the Molecule (FASTA)
479 sequence, and uses the sequence from the PDB.
480 If False (Default), it raises an error when there
481 are sequence mismatches.
482 @note If you are adding structure without a FASTA file, set soft_check
485 if self.mol_to_clone
is not None:
486 raise ValueError(
'You cannot call add_structure() for a clone')
491 rhs = system_tools.get_structure(self.model,pdb_fn,chain_id,res_range,offset,ca_only=ca_only)
492 self.coord_finder.add_residues(rhs)
494 if len(self.residues)==0:
496 "Substituting PDB residue type with FASTA residue type. "
500 self._pdb_elements.append((rhs,
501 _PDBElement(offset=offset, filename=pdb_fn, chain_id=chain_id)))
504 atomic_res = IMP.pmi.tools.OrderedSet()
505 for nrh,rh
in enumerate(rhs):
506 pdb_idx = rh.get_index()
507 raw_idx = pdb_idx - 1
510 while len(self.residues)<pdb_idx:
513 IMP.pmi.alphabets.amino_acid)
514 self.residues.append(r)
517 internal_res = self.residues[raw_idx]
518 if len(self.sequence)<raw_idx:
520 internal_res.set_structure(rh,soft_check)
521 atomic_res.add(internal_res)
523 self.chain.set_sequence(self.sequence)
529 bead_extra_breaks=[],
530 bead_ca_centers=
True,
531 bead_default_coord=[0,0,0],
532 density_residues_per_component=
None,
534 density_force_compute=
False,
535 density_voxel_size=1.0,
536 setup_particles_as_densities=
False,
539 """Set the representation for some residues. Some options (beads, ideal helix)
540 operate along the backbone. Others (density options) are volumetric.
541 Some of these you can combine e.g., beads+densities or helix+densities
542 See @ref pmi_resolution
543 @param residues Set of PMI TempResidues for adding the representation.
544 Can use Molecule slicing to get these, e.g. mol[a:b]+mol[c:d]
545 If None, will select all residues for this Molecule.
546 @param resolutions Resolutions for beads representations.
547 If structured, will average along backbone, breaking at sequence breaks.
548 If unstructured, will just create beads.
549 Pass an integer or list of integers
550 @param bead_extra_breaks Additional breakpoints for splitting beads.
551 The value can be the 0-ordered position, after which it'll insert the break.
552 Alternatively pass PDB-style (1-ordered) indices as a string.
553 I.e., bead_extra_breaks=[5,25] is the same as ['6','26']
554 @param bead_ca_centers Set to True if you want the resolution=1 beads to be at CA centers
555 (otherwise will average atoms to get center). Defaults to True.
556 @param bead_default_coord Advanced feature. Normally beads are placed at the nearest structure.
557 If no structure provided (like an all bead molecule), the beads go here.
558 @param density_residues_per_component Create density (Gaussian Mixture Model)
559 for these residues. Must also supply density_prefix
560 @param density_prefix Prefix (assuming '.txt') to read components from or write to.
561 If exists, will read unless you set density_force_compute=True.
562 Will also write map (prefix+'.mrc').
563 Must also supply density_residues_per_component.
564 @param density_force_compute Set true to force overwrite density file.
565 @param density_voxel_size Advanced feature. Set larger if densities taking too long to rasterize.
566 Set to 0 if you don't want to create the MRC file
567 @param setup_particles_as_densities Set to True if you want each particle to be its own density.
568 Useful for all-atom models or flexible beads.
569 Mutually exclusive with density_ options
570 @param ideal_helix Create idealized helix structures for these residues at resolution 1.
571 Any other resolutions passed will be coarsened from there.
572 Resolution 0 will not work, you may have to use MODELLER to do that (for now).
573 @param color the color applied to the hierarchies generated.
574 Format options: tuple (r,g,b) with values 0 to 1;
575 float (from 0 to 1, a map from Blue to Green to Red);
576 a [Chimera name](https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/colortables.html);
577 a hex RGB string (e.g. "#ff0000");
578 an IMP.display.Color object
579 @note You cannot call add_representation multiple times for the
584 if self.mol_to_clone
is not None:
585 raise ValueError(
'You cannot call add_representation() for a clone.'
586 ' Maybe use a copy instead.')
590 res = IMP.pmi.tools.OrderedSet(self.residues)
592 res = IMP.pmi.tools.OrderedSet(self.residues)
594 res = IMP.pmi.tools.OrderedSet([residues])
595 elif hasattr(residues,
'__iter__'):
597 raise Exception(
'You passed an empty set to add_representation')
598 if type(residues)
is IMP.pmi.tools.OrderedSet
and type(next(iter(residues)))
is TempResidue:
600 elif type(residues)
is set
and type(next(iter(residues)))
is TempResidue:
601 res = IMP.pmi.tools.OrderedSet(residues)
602 elif type(residues)
is list
and type(residues[0])
is TempResidue:
603 res = IMP.pmi.tools.OrderedSet(residues)
605 raise Exception(
"You passed an iteratible of something other than TempResidue",res)
607 raise Exception(
"add_representation: you must pass a set of residues or nothing(=all residues)")
610 ov = res & self._represented
613 self._represented|=res
616 if not hasattr(resolutions,
'__iter__'):
617 if type(resolutions)
is int:
618 resolutions = [resolutions]
620 raise Exception(
"you tried to pass resolutions that are not int or list-of-int")
621 if len(resolutions)>1
and not ideal_helix:
623 if not r.get_has_structure():
624 raise Exception(
'You are creating multiple resolutions for '
625 'unstructured regions. This will have unexpected results.')
628 if density_residues_per_component
or density_prefix:
629 if not density_residues_per_component
and density_prefix:
630 raise Exception(
'If requesting density, must provide '
631 'density_residues_per_component AND density_prefix')
632 if density_residues_per_component
and setup_particles_as_densities:
633 raise Exception(
'Cannot create both volumetric density '
634 '(density_residues_per_component) AND '
635 'individual densities (setup_particles_as_densities) '
636 'in the same representation')
637 if len(resolutions)>1
and setup_particles_as_densities:
638 raise Exception(
'You have multiple bead resolutions but are attempting to '
639 'set them all up as individual Densities. '
640 'This could have unexpected results.')
645 raise Exception(
"For ideal helices, cannot build resolution 0: "
646 "you have to do that in MODELLER")
647 if 1
not in resolutions:
648 resolutions = [1] + list(resolutions)
649 self._ideal_helices.append(res)
653 if r.get_molecule()!=self:
654 raise Exception(
'You are adding residues from a different molecule to',self.__repr__())
658 for b
in bead_extra_breaks:
660 breaks.append(int(b)-1)
664 self.representations.append(_Representation(res,
669 density_residues_per_component,
671 density_force_compute,
673 setup_particles_as_densities,
677 def _all_protocol_output(self):
678 return self.state._protocol_output
681 """Create all parts of the IMP hierarchy
682 including Atoms, Residues, and Fragments/Representations and,
684 Will only build requested representations.
685 @note Any residues assigned a resolution must have an IMP.atom.Residue
686 hierarchy containing at least a CAlpha. For missing residues,
687 these can be constructed from the PDB file.
691 name=self.hier.get_name()
692 for po, state
in self._all_protocol_output():
693 po.create_component(state, name,
True,
694 asym_name=self._name_with_copy)
695 po.add_component_sequence(state, name, self.sequence,
696 asym_name=self._name_with_copy,
697 alphabet=self.alphabet)
699 if self.mol_to_clone
is not None:
700 for nr,r
in enumerate(self.mol_to_clone.residues):
701 if r.get_has_structure():
702 clone = IMP.atom.create_clone(r.get_hierarchy())
703 self.residues[nr].set_structure(
705 for old_rep
in self.mol_to_clone.representations:
706 new_res = IMP.pmi.tools.OrderedSet()
707 for r
in old_rep.residues:
708 new_res.add(self.residues[r.get_internal_index()])
709 self._represented.add(self.residues[r.get_internal_index()])
710 new_rep = _Representation(new_res,
711 old_rep.bead_resolutions,
712 old_rep.bead_extra_breaks,
713 old_rep.bead_ca_centers,
714 old_rep.bead_default_coord,
715 old_rep.density_residues_per_component,
716 old_rep.density_prefix,
718 old_rep.density_voxel_size,
719 old_rep.setup_particles_as_densities,
722 self.representations.append(new_rep)
723 self.coord_finder = self.mol_to_clone.coord_finder
726 no_rep = [r
for r
in self.residues
if r
not in self._represented]
729 'Residues without representation in molecule %s: %s'
730 % (self.
get_name(), system_tools.resnums2str(no_rep)),
734 for rep
in self.representations:
736 _build_ideal_helix(self.model,rep.residues,self.coord_finder)
741 rephandler = _RepresentationHandler(self._name_with_copy,
742 list(self._all_protocol_output()),
745 for rep
in self.representations:
746 built_reps += system_tools.build_representation(
747 self, rep, self.coord_finder, rephandler)
751 for br
in built_reps:
752 self.hier.add_child(br)
756 for res
in self.residues:
757 idx = res.get_index()
762 residue_index=res.get_index(),
763 resolution=1).get_selected_particles()
776 self._represented = IMP.pmi.tools.OrderedSet([a
for a
in self._represented])
781 """Helpful utility for getting particles at all resolutions from this molecule.
782 Can optionally pass a set of residue indexes"""
784 raise Exception(
"Cannot get all resolutions until you build the Molecule")
785 if residue_indexes
is None:
786 residue_indexes = [r.get_index()
for r
in self.
get_residues()]
788 residue_indexes=residue_indexes)
793 class _Representation(object):
794 """Private class just to store a representation request"""
801 density_residues_per_component,
803 density_force_compute,
805 setup_particles_as_densities,
808 self.residues = residues
809 self.bead_resolutions = bead_resolutions
810 self.bead_extra_breaks = bead_extra_breaks
811 self.bead_ca_centers = bead_ca_centers
812 self.bead_default_coord = bead_default_coord
813 self.density_residues_per_component = density_residues_per_component
814 self.density_prefix = density_prefix
815 self.density_force_compute = density_force_compute
816 self.density_voxel_size = density_voxel_size
817 self.setup_particles_as_densities = setup_particles_as_densities
818 self.ideal_helix = ideal_helix
821 class _FindCloseStructure(object):
822 """Utility to get the nearest observed coordinate"""
825 def add_residues(self,residues):
832 self.coords.append([idx,xyz])
835 self.coords.append([idx,xyz])
837 raise(
"_FindCloseStructure: wrong selection")
839 self.coords.sort(key=itemgetter(0))
840 def find_nearest_coord(self,query):
843 keys = [r[0]
for r
in self.coords]
844 pos = bisect_left(keys,query)
847 elif pos == len(self.coords):
848 ret = self.coords[-1]
850 before = self.coords[pos - 1]
851 after = self.coords[pos]
852 if after[0] - query < query - before[0]:
859 """A dictionary-like wrapper for reading and storing sequence data"""
860 def __init__(self,fasta_fn,name_map=None):
861 """read a fasta file and extract all the requested sequences
862 @param fasta_fn sequence file
863 @param name_map dictionary mapping the fasta name to final stored name
865 self.sequences = IMP.pmi.tools.OrderedDict()
866 self.read_sequences(fasta_fn,name_map)
868 return len(self.sequences)
869 def __contains__(self,x):
870 return x
in self.sequences
871 def __getitem__(self,key):
873 allseqs = list(self.sequences.keys())
875 return self.sequences[allseqs[key]]
877 raise IndexError(
"You tried to access sequence number %d "
878 "but there's only %d" % (key, len(allseqs)))
880 return self.sequences[key]
882 return self.sequences.__iter__()
885 for s
in self.sequences:
886 ret +=
'%s\t%s\n'%(s,self.sequences[s])
888 def read_sequences(self,fasta_fn,name_map=None):
891 with open(fasta_fn,
'r') as fh:
892 for (num, line)
in enumerate(fh):
893 if line.startswith(
'>'):
895 self.sequences[code] = seq.strip(
'*')
896 code = line.rstrip()[1:]
897 if name_map
is not None:
899 code = name_map[code]
908 "Found FASTA sequence before first header at line %d: %s" % (num + 1, line))
911 self.sequences[code] = seq.strip(
'*')
915 """Data_structure for reading and storing sequence data from pdb"""
916 def __init__(self,model,pdb_fn,name_map=None):
917 """read a pdb file and returns all sequences for each contiguous fragment
919 @param name_map dictionary mapping the pdb chain id to final stored name
931 self.sequences = IMP.pmi.tools.OrderedDict()
932 self.read_sequences(pdb_fn,name_map)
934 def read_sequences(self,pdb_fn,name_map):
935 read_file = IMP.atom.read_pdb
936 if pdb_fn.endswith(
'.cif'):
937 read_file = IMP.atom.read_mmcif
939 cs=IMP.atom.get_by_type(t,IMP.atom.CHAIN_TYPE)
947 print(
"Chain ID %s not in name_map, skipping" % id)
949 rs=IMP.atom.get_by_type(c,IMP.atom.RESIDUE_TYPE)
956 isprotein=dr.get_is_protein()
957 isrna=dr.get_is_rna()
958 isdna=dr.get_is_dna()
962 rids_olc_dict[rid]=olc
964 if dr.get_residue_type() == IMP.atom.DADE: olc=
"A"
965 if dr.get_residue_type() == IMP.atom.DURA: olc=
"U"
966 if dr.get_residue_type() == IMP.atom.DCYT: olc=
"C"
967 if dr.get_residue_type() == IMP.atom.DGUA: olc=
"G"
968 if dr.get_residue_type() == IMP.atom.DTHY: olc=
"T"
970 rids_olc_dict[rid]=olc
972 if dr.get_residue_type() == IMP.atom.ADE: olc=
"A"
973 if dr.get_residue_type() == IMP.atom.URA: olc=
"U"
974 if dr.get_residue_type() == IMP.atom.CYT: olc=
"C"
975 if dr.get_residue_type() == IMP.atom.GUA: olc=
"G"
976 if dr.get_residue_type() == IMP.atom.THY: olc=
"T"
978 rids_olc_dict[rid]=olc
979 group_rids=self.group_indexes(rids)
980 contiguous_sequences=IMP.pmi.tools.OrderedDict()
981 for group
in group_rids:
983 for i
in range(group[0],group[1]+1):
984 sequence_fragment+=rids_olc_dict[i]
985 contiguous_sequences[group]=sequence_fragment
986 self.sequences[id]=contiguous_sequences
992 def group_indexes(self,indexes):
993 from itertools
import groupby
995 for k, g
in groupby(enumerate(indexes),
lambda x:x[0]-x[1]):
996 group = [x[1]
for x
in g]
997 ranges.append((group[0], group[-1]))
1002 '''This function computes and prints the alignment between the
1003 fasta file and the pdb sequence, computes the offsets for each contiguous
1004 fragment in the PDB.
1005 @param fasta_sequences IMP.pmi.topology.Sequences object
1006 @param pdb_sequences IMP.pmi.topology.PDBSequences object
1007 @param show boolean default False, if True prints the alignments.
1008 The input objects should be generated using map_name dictionaries such that fasta_id
1009 and pdb_chain_id are mapping to the same protein name. It needs BioPython.
1010 Returns a dictionary of offsets, organized by peptide range (group):
1011 example: offsets={"ProtA":{(1,10):1,(20,30):10}}'''
1012 from Bio
import pairwise2
1013 from Bio.pairwise2
import format_alignment
1014 from Bio.SubsMat
import MatrixInfo
as matlist
1015 matrix = matlist.blosum62
1017 raise Exception(
"Fasta sequences not type IMP.pmi.topology.Sequences")
1019 raise Exception(
"pdb sequences not type IMP.pmi.topology.PDBSequences")
1020 offsets=IMP.pmi.tools.OrderedDict()
1021 for name
in fasta_sequences.sequences:
1023 seq_fasta=fasta_sequences.sequences[name]
1024 if name
not in pdb_sequences.sequences:
1025 print(
"Fasta id %s not in pdb names, aligning against every pdb chain" % name)
1026 pdbnames=pdb_sequences.sequences.keys()
1029 for pdbname
in pdbnames:
1030 for group
in pdb_sequences.sequences[pdbname]:
1031 if group[1]-group[0]+1<7:
continue
1032 seq_frag_pdb=pdb_sequences.sequences[pdbname][group]
1034 print(
"########################")
1036 print(
"protein name",pdbname)
1037 print(
"fasta id", name)
1038 print(
"pdb fragment",group)
1039 align=pairwise2.align.localms(seq_fasta, seq_frag_pdb, 2, -1, -.5, -.1)[0]
1041 offset=a[3]+1-group[0]
1043 print(
"alignment sequence start-end",(a[3]+1,a[4]+1))
1044 print(
"offset from pdb to fasta index",offset)
1045 print(format_alignment(*a))
1046 if name
not in offsets:
1048 if group
not in offsets[pdbname]:
1049 offsets[pdbname][group]=offset
1051 if group
not in offsets[pdbname]:
1052 offsets[pdbname][group]=offset
1061 """Temporarily stores residue information, even without structure available."""
1063 def __init__(self, molecule, code, index, internal_index, alphabet):
1064 """setup a TempResidue
1065 @param molecule PMI Molecule to which this residue belongs
1066 @param code one-letter residue type code
1067 @param index PDB index
1068 @param internal_index The number in the sequence
1071 self.molecule = molecule
1072 self.rtype = alphabet.get_residue_type_from_one_letter_code(code)
1073 self.pdb_index = index
1074 self.internal_index = internal_index
1078 self._structured =
False
1083 return str(self.state_index)+
"_"+self.molecule.get_name()+
"_"+str(self.copy_index)+
"_"+self.get_code()+str(self.get_index())
1085 return self.__str__()
1088 return (self.state_index, self.molecule, self.copy_index, self.rtype, self.pdb_index, self.internal_index)
1089 def __eq__(self,other):
1090 return type(other)==type(self)
and self.__key() == other.__key()
1092 return hash(self.__key())
1094 return self.pdb_index
1095 def get_internal_index(self):
1096 return self.internal_index
1099 def get_residue_type(self):
1101 def get_hierarchy(self):
1103 def get_molecule(self):
1104 return self.molecule
1105 def get_has_structure(self):
1106 return self._structured
1107 def set_structure(self,res,soft_check=False):
1108 if res.get_residue_type()!=self.get_residue_type():
1112 'Inconsistency between FASTA sequence and PDB sequence. '
1113 'FASTA type %s %s and PDB type %s'
1114 % (self.get_index(), self.hier.get_residue_type(),
1115 res.get_residue_type()),
1117 self.hier.set_residue_type((self.get_residue_type()))
1118 self.rtype = self.get_residue_type()
1120 raise Exception(
'ERROR: PDB residue index',self.get_index(),
'is',
1122 'and sequence residue is',self.get_code())
1124 for a
in res.get_children():
1125 self.hier.add_child(a)
1127 a.get_particle().set_name(
'Atom %s of residue %i'%(atype.__str__().strip(
'"'),
1128 self.hier.get_index()))
1129 self._structured =
True
1132 """Automatically setup Sytem and Degrees of Freedom with a formatted text file.
1133 The file is read in and each part of the topology is stored as a
1134 ComponentTopology object for input into IMP::pmi::macros::BuildSystem.
1135 The topology file should be in a simple pipe-delimited format:
1137 |molecule_name|color|fasta_fn|fasta_id|pdb_fn|chain|residue_range|pdb_offset|bead_size|em_residues_per_gaussian|rigid_body|super_rigid_body|chain_of_super_rigid_bodies|flags|
1138 |Rpb1 |blue |1WCM.fasta|1WCM:A|1WCM.pdb|A|1,1140 |0|10|0|1|1,3|1||
1139 |Rpb1 |blue |1WCM.fasta|1WCM:A|1WCM.pdb|A|1141,1274|0|10|0|2|1,3|1||
1140 |Rpb1 |blue |1WCM.fasta|1WCM:A|1WCM.pdb|A|1275,END |0|10|0|3|1,3|1||
1141 |Rpb2 |red |1WCM.fasta|1WCM:B|1WCM.pdb|B|all |0|10|0|4|2,3|2||
1142 |Rpb2.1 |green |1WCM.fasta|1WCM:B|1WCM.pdb|B|all |0|10|0|4|2,3|2||
1146 These are the fields you can enter:
1147 - `component_name`: Name of the component (chain). Serves as the parent
1148 hierarchy for this structure. Multiple copies of the same component
1149 can be created by appending a copy number after a period; if none is
1150 specified, a copy number of 0 is assumed (e.g. Rpb2.1 is the second copy
1152 - `color`: The color used in the output RMF file. Uses
1153 [Chimera names](https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/colortables.html),
1154 (e.g. "red"), or R,G,B values as three comma-separated floating point
1155 numbers from 0 to 1 (e.g. "1.0, 0.0, 0.0") or a 6-digit hex string
1156 starting with '#' (e.g. 0xff0000).
1157 - `fasta_fn`: Name of FASTA file containing this component.
1158 - `fasta_id`: String found in FASTA sequence header line. The sequence read
1159 from the file is assumed to be a protein sequence. If it should instead
1160 be treated as RNA or DNA, add an ',RNA' or ',DNA' suffix. For example,
1161 a `fasta_id` of 'myseq,RNA' will read the sequence 'myseq' from the
1162 FASTA file and treat it as RNA.
1163 - `pdb_fn`: Name of PDB or mmCIF file with coordinates (if available).
1164 If left empty, will set up as BEADS (you can also specify "BEADS")
1165 Can also write "IDEAL_HELIX".
1166 - `chain`: Chain ID of this domain in the PDB file.
1167 - `residue_range`: Comma delimited pair defining range.
1168 Can leave empty or use 'all' for entire sequence from PDB file.
1169 The second item in the pair can be END to select the last residue in the
1171 - `pdb_offset`: Offset to sync PDB residue numbering with FASTA numbering.
1172 For example, an offset of -10 would match the first residue in the
1173 FASTA file (which is always numbered sequentially starting from 1) with
1174 residue 11 in the PDB file.
1175 - `bead_size`: The size (in residues) of beads used to model areas not
1176 covered by PDB coordinates. These will be built automatically.
1177 - `em_residues`: The number of Gaussians used to model the electron
1178 density of this domain. Set to zero if no EM fitting will be done.
1179 The GMM files will be written to <gmm_dir>/<component_name>_<em_res>.txt
1181 - `rigid_body`: Leave empty if this object is not in a rigid body.
1182 Otherwise, this is a number corresponding to the rigid body containing
1183 this object. The number itself is just used for grouping things.
1184 - `super_rigid_body`: Add a mover that periodically moves several related
1185 domains as if they were a single large rigid body. In between such moves,
1186 the domains move independently. This can improve sampling.
1187 - `chain_of_super_rigid_bodies`: Do super-rigid-body moves (as above)
1188 for all adjacent pairs of domains in the chain.
1189 - `flags` additional flags for advanced options
1190 @note All filenames are relative to the paths specified in the constructor.
1199 @param topology_file Pipe-delimited file specifying the topology
1200 @param pdb_dir Relative path to the pdb directory
1201 @param fasta_dir Relative path to the fasta directory
1202 @param gmm_dir Relative path to the GMM directory
1204 self.topology_file = topology_file
1205 self.molecules = IMP.pmi.tools.OrderedDict()
1206 self.pdb_dir = pdb_dir
1207 self.fasta_dir = fasta_dir
1208 self.gmm_dir = gmm_dir
1209 self._components = self.
read(topology_file)
1211 def write_topology_file(self,outfile):
1212 with open(outfile,
"w")
as f:
1213 f.write(
"|molecule_name|color|fasta_fn|fasta_id|pdb_fn|chain|"
1214 "residue_range|pdb_offset|bead_size|em_residues_per_gaussian|"
1215 "rigid_body|super_rigid_body|chain_of_super_rigid_bodies|\n")
1216 for c
in self._components:
1217 output = c.get_str()+
'\n'
1222 """ Return list of ComponentTopologies for selected components
1223 @param topology_list List of indices to return"""
1224 if topology_list ==
"all":
1225 topologies = self._components
1228 for i
in topology_list:
1229 topologies.append(self._components[i])
1232 def get_molecules(self):
1233 return self.molecules
1235 def read(self, topology_file, append=False):
1236 """Read system components from topology file. append=False will erase
1237 current topology and overwrite with new
1240 is_directories =
False
1245 with open(topology_file)
as infile:
1247 if line.lstrip()==
"" or line[0]==
"#":
1249 elif line.split(
'|')[1].strip()
in (
"molecule_name"):
1251 is_directories =
False
1254 elif line.split(
'|')[1] ==
"component_name":
1257 "Old-style topology format (using "
1258 "|component_name|) is deprecated. Please switch to "
1259 "the new-style format (using |molecule_name|)\n")
1261 is_directories =
False
1263 elif line.split(
'|')[1] ==
"directories":
1265 "Setting directories in the topology file "
1266 "is deprecated. Please do so through the "
1267 "TopologyReader constructor. Note that new-style "
1268 "paths are relative to the current working "
1269 "directory, not the topology file.\n")
1270 is_directories =
True
1271 elif is_directories:
1272 fields = line.split(
'|')
1273 setattr(self, fields[1],
1276 new_component = self._parse_line(line, linenum, old_format)
1277 self._components.append(new_component)
1279 return self._components
1281 def _parse_line(self, component_line, linenum, old_format):
1282 """Parse a line of topology values and matches them to their key.
1283 Checks each value for correct syntax
1284 Returns a list of Component objects
1288 values = [s.strip()
for s
in component_line.split(
'|')]
1293 c.molname = values[1]
1295 c._domain_name = values[2]
1298 names = values[1].split(
'.')
1300 c.molname = names[0]
1303 c.molname = names[0]
1304 c.copyname = names[1]
1306 c.molname = names[0]
1307 c.copyname = names[1]
1308 errors.append(
"Molecule name should be <molecule.copyID>")
1309 errors.append(
"For component %s line %d " % (c.molname,linenum))
1310 c._domain_name = c.molname +
'.' + c.copyname
1311 colorfields = values[2].split(
',')
1312 if len(colorfields)==3:
1313 c.color = [float(x)
for x
in colorfields]
1314 if any([x>1
for x
in c.color]):
1315 c.color=[x/255
for x
in c.color]
1318 c._orig_fasta_file = values[3]
1319 c.fasta_file = values[3]
1320 fasta_field = values[4].split(
",")
1321 c.fasta_id = fasta_field[0]
1323 if len(fasta_field) > 1:
1324 c.fasta_flag = fasta_field[1]
1325 c._orig_pdb_input = values[5]
1326 pdb_input = values[5]
1327 tmp_chain = values[6]
1330 bead_size = values[9]
1333 rbs = srbs = csrbs =
''
1339 if c.molname
not in self.molecules:
1340 self.molecules[c.molname] = _TempMolecule(c)
1343 c._orig_fasta_file = self.molecules[c.molname].orig_component._orig_fasta_file
1344 c.fasta_id = self.molecules[c.molname].orig_component.fasta_id
1345 self.molecules[c.molname].add_component(c,c.copyname)
1348 c.fasta_file = os.path.join(self.fasta_dir,c._orig_fasta_file)
1350 errors.append(
"PDB must have BEADS, IDEAL_HELIX, or filename")
1351 errors.append(
"For component %s line %d is not correct"
1352 "|%s| was given." % (c.molname,linenum,pdb_input))
1353 elif pdb_input
in (
"IDEAL_HELIX",
"BEADS"):
1354 c.pdb_file = pdb_input
1356 c.pdb_file = os.path.join(self.pdb_dir,pdb_input)
1359 if len(tmp_chain)==1
or len(tmp_chain)==2:
1362 errors.append(
"PDB Chain identifier must be one or two characters.")
1363 errors.append(
"For component %s line %d is not correct"
1364 "|%s| was given." % (c.molname,linenum,tmp_chain))
1368 if rr.strip()==
'all' or str(rr)==
"":
1369 c.residue_range =
None
1370 elif len(rr.split(
','))==2
and self._is_int(rr.split(
',')[0])
and (self._is_int(rr.split(
',')[1])
or rr.split(
',')[1] ==
'END'):
1372 c.residue_range = (int(rr.split(
',')[0]), rr.split(
',')[1])
1373 if c.residue_range[1] !=
'END':
1374 c.residue_range = (c.residue_range[0], int(c.residue_range[1]))
1376 if old_format
and c.residue_range[1] == -1:
1377 c.residue_range = (c.residue_range[0],
'END')
1379 errors.append(
"Residue Range format for component %s line %d is not correct" % (c.molname, linenum))
1380 errors.append(
"Correct syntax is two comma separated integers: |start_res, end_res|. end_res can also be END to select the last residue in the chain. |%s| was given." % rr)
1381 errors.append(
"To select all residues, indicate |\"all\"|")
1384 if self._is_int(offset):
1385 c.pdb_offset=int(offset)
1386 elif len(offset)==0:
1389 errors.append(
"PDB Offset format for component %s line %d is not correct" % (c.molname, linenum))
1390 errors.append(
"The value must be a single integer. |%s| was given." % offset)
1393 if self._is_int(bead_size):
1394 c.bead_size=int(bead_size)
1395 elif len(bead_size)==0:
1398 errors.append(
"Bead Size format for component %s line %d is not correct" % (c.molname, linenum))
1399 errors.append(
"The value must be a single integer. |%s| was given." % bead_size)
1402 if self._is_int(emg):
1404 c.density_prefix = os.path.join(self.gmm_dir,c.get_unique_name())
1405 c.gmm_file = c.density_prefix+
'.txt'
1406 c.mrc_file = c.density_prefix+
'.gmm'
1408 c.em_residues_per_gaussian=int(emg)
1410 c.em_residues_per_gaussian = 0
1412 c.em_residues_per_gaussian = 0
1414 errors.append(
"em_residues_per_gaussian format for component "
1415 "%s line %d is not correct" % (c.molname, linenum))
1416 errors.append(
"The value must be a single integer. |%s| was given." % emg)
1420 if not self._is_int(rbs):
1421 errors.append(
"rigid bodies format for component "
1422 "%s line %d is not correct" % (c.molname, linenum))
1423 errors.append(
"Each RB must be a single integer, or empty. "
1424 "|%s| was given." % rbs)
1425 c.rigid_body = int(rbs)
1429 srbs = srbs.split(
',')
1431 if not self._is_int(i):
1432 errors.append(
"super rigid bodies format for component "
1433 "%s line %d is not correct" % (c.molname, linenum))
1434 errors.append(
"Each SRB must be a single integer. |%s| was given." % srbs)
1435 c.super_rigid_bodies = srbs
1439 if not self._is_int(csrbs):
1440 errors.append(
"em_residues_per_gaussian format for component "
1441 "%s line %d is not correct" % (c.molname, linenum))
1442 errors.append(
"Each CSRB must be a single integer. |%s| was given." % csrbs)
1443 c.chain_of_super_rigid_bodies = csrbs
1447 raise ValueError(
"Fix Topology File syntax errors and rerun: " \
1448 +
"\n".join(errors))
1454 """Change the GMM dir"""
1455 self.gmm_dir = gmm_dir
1456 for c
in self._components:
1457 c.gmm_file = os.path.join(self.gmm_dir,c.get_unique_name()+
".txt")
1458 c.mrc_file = os.path.join(self.gmm_dir,c.get_unique_name()+
".mrc")
1459 print(
'new gmm',c.gmm_file)
1462 """Change the PDB dir"""
1463 self.pdb_dir = pdb_dir
1464 for c
in self._components:
1465 if not c._orig_pdb_input
in (
"",
"None",
"IDEAL_HELIX",
"BEADS"):
1466 c.pdb_file = os.path.join(self.pdb_dir,c._orig_pdb_input)
1469 """Change the FASTA dir"""
1470 self.fasta_dir = fasta_dir
1471 for c
in self._components:
1472 c.fasta_file = os.path.join(self.fasta_dir,c._orig_fasta_file)
1474 def _is_int(self, s):
1478 return float(s).is_integer()
1483 """Return list of lists of rigid bodies (as domain name)"""
1484 rbl = defaultdict(list)
1485 for c
in self._components:
1487 rbl[c.rigid_body].append(c.get_unique_name())
1491 """Return list of lists of super rigid bodies (as domain name)"""
1492 rbl = defaultdict(list)
1493 for c
in self._components:
1494 for rbnum
in c.super_rigid_bodies:
1495 rbl[rbnum].append(c.get_unique_name())
1499 """Return list of lists of chains of super rigid bodies (as domain name)"""
1500 rbl = defaultdict(list)
1501 for c
in self._components:
1502 for rbnum
in c.chain_of_super_rigid_bodies:
1503 rbl[rbnum].append(c.get_unique_name())
1506 class _TempMolecule(object):
1507 """Store the Components and any requests for copies"""
1509 self.molname = init_c.molname
1512 self.add_component(init_c,init_c.copyname)
1513 self.orig_copyname = init_c.copyname
1514 self.orig_component = self.domains[init_c.copyname][0]
1515 def add_component(self,component,copy_id):
1516 self.domains[copy_id].append(component)
1517 component.domainnum = len(self.domains[copy_id])-1
1519 return ','.join(
'%s:%i'%(k,len(self.domains[k]))
for k
in self.domains)
1521 class _Component(object):
1522 """Stores the components required to build a standard IMP hierarchy
1523 using IMP.pmi.BuildModel()
1527 self.copyname =
None
1529 self.fasta_file =
None
1530 self._orig_fasta_file =
None
1531 self.fasta_id =
None
1532 self.fasta_flag =
None
1533 self.pdb_file =
None
1534 self._orig_pdb_input =
None
1536 self.residue_range =
None
1539 self.em_residues_per_gaussian = 0
1542 self.density_prefix =
''
1544 self.rigid_body =
None
1545 self.super_rigid_bodies = []
1546 self.chain_of_super_rigid_bodies = []
1549 return ",".join(
"%s" % x
for x
in l)
1552 return self.get_str()
1555 return "%s.%s.%i"%(self.molname,self.copyname,self.domainnum)
1558 res_range = self.residue_range
1559 if self.residue_range
is None:
1562 if self.copyname!=
'':
1563 name +=
'.'+self.copyname
1564 if self.chain
is None:
1569 if isinstance(color, list):
1570 color=
','.join([str(x)
for x
in color])
1571 fastaid = self.fasta_id
1573 fastaid +=
"," + self.fasta_flag
1574 a=
'|'+
'|'.join([name,color,self._orig_fasta_file,fastaid,
1575 self._orig_pdb_input,chain,self._l2s(list(res_range)),
1576 str(self.pdb_offset),str(self.bead_size),
1577 str(self.em_residues_per_gaussian),
1578 str(self.rigid_body)
if self.rigid_body
else '',
1579 self._l2s(self.super_rigid_bodies),
1580 self._l2s(self.chain_of_super_rigid_bodies)])+
'|'
1585 '''Extends the functionality of IMP.atom.Molecule'''
1587 def __init__(self,hierarchy):
1588 IMP.atom.Molecule.__init__(self,hierarchy)
1597 def get_extended_name(self):
1598 return self.get_name()+
"."+\
1599 str(self.get_copy_index())+\
1600 "."+str(self.get_state_index())
1602 def get_sequence(self):
1605 def get_residue_indexes(self):
1608 def get_residue_segments(self):
1615 s=
'PMIMoleculeHierarchy '
1618 s+=
" "+
"State "+str(self.get_state_index())
1619 s+=
" "+
"N residues "+str(len(self.get_sequence()))
def __init__
setup a TempResidue
def build
call build on all molecules (automatically makes clones)
def set_pdb_dir
Change the PDB dir.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Hierarchy get_parent() const
Get the parent particle.
def get_atomic_residues
Return a set of TempResidues that have associated structure coordinates.
A decorator to associate a particle with a part of a protein/DNA/RNA.
Extends the functionality of IMP.atom.Molecule.
def get_residues
Return all modeled TempResidues as a set.
std::string get_unique_name(std::string templ)
Return a unique name produced from the string.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
static Atom setup_particle(Model *m, ParticleIndex pi, Atom other)
def build
Build all states.
def __init__
read a fasta file and extract all the requested sequences
static XYZR setup_particle(Model *m, ParticleIndex pi)
def __init__
read a pdb file and returns all sequences for each contiguous fragment
def fasta_pdb_alignments
This function computes and prints the alignment between the fasta file and the pdb sequence...
def get_ideal_helices
Returns list of OrderedSets with requested ideal helices.
def get_chains_of_super_rigid_bodies
Return list of lists of chains of super rigid bodies (as domain name)
def __init__
The user should not call this directly; instead call State::create_molecule()
void handle_use_deprecated(std::string message)
def set_gmm_dir
Change the GMM dir.
def residue_range
get residue range from a to b, inclusive.
def get_molecule
Access a molecule by name and copy number.
def __init__
Define a new state.
def add_representation
Set the representation for some residues.
static State setup_particle(Model *m, ParticleIndex pi, unsigned int index)
def create_molecule
Create a new Molecule within this State.
def build
Create all parts of the IMP hierarchy including Atoms, Residues, and Fragments/Representations and...
static Residue setup_particle(Model *m, ParticleIndex pi, ResidueType t, int index, int insertion_code)
char get_one_letter_code(ResidueType c)
Get the 1-letter amino acid code from the residue type.
def get_non_atomic_residues
Return a set of TempResidues that don't have associated structure coordinates.
def get_name
Return this Molecule name.
def get_hierarchy
Return the IMP Hierarchy corresponding to this Molecule.
def get_components
Return list of ComponentTopologies for selected components.
Class for storing model, its restraints, constraints, and particles.
Stores a named protein chain.
Warning related to handling of structures.
static bool get_is_setup(Model *m, ParticleIndex pi)
A decorator for keeping track of copies of a molecule.
Select all non-alternative ATOM records.
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def set_fasta_dir
Change the FASTA dir.
The standard decorator for manipulating molecular structures.
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
Data_structure for reading and storing sequence data from pdb.
A decorator for a particle representing an atom.
std::string get_relative_path(std::string base, std::string relative)
Return a path to a file relative to another file.
A decorator for a particle with x,y,z coordinates.
def create_clone
Create a Molecule clone (automatically builds same structure and representation)
def add_structure
Read a structure and store the coordinates.
int get_state_index(Hierarchy h)
Walk up the hierarchy to find the current state.
def add_protocol_output
Capture details of the modeling protocol.
def get_molecules
Return a dictionary where key is molecule name and value are the list of all copies of that molecule ...
static Copy setup_particle(Model *m, ParticleIndex pi, Int number)
def read
Read system components from topology file.
def get_state
Return the State containing this Molecule.
A decorator for a residue.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Automatically setup Sytem and Degrees of Freedom with a formatted text file.
The general base class for IMP exceptions.
def get_rigid_bodies
Return list of lists of rigid bodies (as domain name)
Associate an integer "state" index with a hierarchy node.
Residue get_residue(Atom d, bool nothrow=false)
Return the Residue containing this atom.
Mapping between FASTA one-letter codes and residue types.
Class to handle individual particles of a Model object.
Stores a list of Molecules all with the same State index.
def get_represented
Return set of TempResidues that have representation.
Store info for a chain of a protein.
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
Python classes to represent, score, sample and analyze models.
A dictionary-like wrapper for reading and storing sequence data.
def create_copy
Create a new Molecule with the same name and sequence but a higher copy number.
Functionality for loading, creating, manipulating and scoring atomic structures.
std::string get_chain_id(Hierarchy h)
Walk up the hierarchy to determine the chain id.
def get_particles_at_all_resolutions
Helpful utility for getting particles at all resolutions from this molecule.
static Chain setup_particle(Model *m, ParticleIndex pi, std::string id)
A decorator for a molecule.
Select hierarchy particles identified by the biological name.
def get_number_of_states
Returns the total number of states generated.
def get_super_rigid_bodies
Return list of lists of super rigid bodies (as domain name)
Temporarily stores residue information, even without structure available.
def create_state
Makes and returns a new IMP.pmi.topology.State in this system.