doc/ref/data_8py_source.html

 """@namespace IMP.mmcif.data

    @brief Classes to represent data structures used in mmCIF.

 """


 import IMP.atom

 import ihm.location

 import ihm.metadata

 import ihm.startmodel

 import ihm.analysis

 import ihm.protocol

 import ihm.model

 import ihm.citations

 import ihm.reference

 import operator

 import inspect


 # Map from IMP ResidueType to ihm ChemComp

 _imp_to_ihm = {}


 def _fill_imp_to_ihm():

     d = dict(x for x in inspect.getmembers(IMP.atom)

              if isinstance(x[1], IMP.atom.ResidueType))

     # Handle standard amino acids plus some extras like MSE, UNK

     for comp in ihm.LPeptideAlphabet._comps.values():

         if comp.id in d:

             _imp_to_ihm[d[comp.id]] = comp

     # Handle RNA and DNA

     alpha = ihm.RNAAlphabet()

     for code in ['ADE', 'CYT', 'GUA', 'URA']:

         _imp_to_ihm[d[code]] = alpha[d[code].get_string()]

     alpha = ihm.DNAAlphabet()

     for code in ['DADE', 'DCYT', 'DGUA', 'DTHY']:

         _imp_to_ihm[d[code]] = alpha[d[code].get_string()]

     # Pass through missing IMP residue

     _imp_to_ihm[None] = None


 _fill_imp_to_ihm()


 def get_molecule(h):

     """Given a Hierarchy, walk up and find the parent Molecule"""

     while h:

         if IMP.atom.Molecule.get_is_setup(h):

             return IMP.atom.Molecule(h)

         h = h.get_parent()

     return None


 def _check_sequential(fragment, resinds):

     for i in range(1, len(resinds)):

         if resinds[i - 1] + 1 != resinds[i]:

             raise ValueError(

                 "%s: non-sequential residue indices are not supported"

                 % str(fragment))


 def _get_all_state_provenance(state_h, top_h, types):

     """Yield all provenance information for the given state.

        If the given State Hierarchy node contains no provenance information,

        fall back to any provenance information for the top-level node

        (if provided)."""

     count = 0

     for p in IMP.core.get_all_provenance(state_h, types=types):

         count += 1

         yield p

     if count == 0 and top_h is not None:

         for p in IMP.core.get_all_provenance(top_h, types=types):

             yield p


 class _CustomDNAAlphabet(ihm.Alphabet):

     """Custom DNA alphabet that maps A,C,G,T (rather than DA,DC,DG,DT

        as in python-ihm)"""

     _comps = dict([cc.code_canonical, cc]

                   for cc in ihm.DNAAlphabet._comps.values())


 class _EntityMapper(dict):

     """Handle mapping from IMP chains to CIF entities.

        Multiple components may map to the same entity if they

        share sequence."""

     def __init__(self, system):

         self.system = system

         super().__init__()

         self._sequence_dict = {}

         self._entities = []

         self._alphabet_map = {

             IMP.atom.UnknownChainType: ihm.LPeptideAlphabet,

             IMP.atom.Protein: ihm.LPeptideAlphabet,

             IMP.atom.RNA: ihm.RNAAlphabet,

             IMP.atom.DNA: _CustomDNAAlphabet}


     def _get_sequence_from_residues(self, chain, seq_from_res):

         seq_id_begin, seq = seq_from_res

         if not seq:

             raise ValueError("Chain %s has no sequence and no residues"

                              % chain)

         missing_seq = [ind + seq_id_begin

                        for (ind, res) in enumerate(seq) if res is None]

         if missing_seq:

             raise ValueError(

                 "Chain %s has no declared sequence; tried to determine the "

                 "sequence from Residues, but the following residue indices "

                 "have no residue type (perhaps covered only by Fragments): %s"

                 % (chain, str(missing_seq)))

         return seq_id_begin - 1, tuple(seq)


     def add(self, chain, seq_from_res=None):

         sequence = chain.get_sequence()

         offset = chain.get_sequence_offset()

         if sequence == '':

             if seq_from_res is not None:

                 offset, sequence = self._get_sequence_from_residues(

                     chain, seq_from_res)

             else:

                 raise ValueError("Chain %s has no sequence" % chain)

         else:

             # Map one-letter codes to ihm.ChemComp

             alphabet = self._alphabet_map[chain.get_chain_type()]()

             sequence = tuple(alphabet[x] for x in sequence)

         if sequence not in self._sequence_dict:

             entity = ihm.Entity(sequence)

             self.system.entities.append(entity)

             self._entities.append(entity)

             self._sequence_dict[sequence] = entity

             uniprot = chain.get_uniprot_accession()

             if uniprot:

                 up = ihm.reference.UniProtSequence.from_accession(uniprot)

                 entity.references.append(up)

         self[chain] = self._sequence_dict[sequence]

         return self[chain], offset


     def get_all(self):

         """Yield all entities"""

         return self._entities


 def _assign_id(obj, seen_objs, obj_by_id):

     """Assign a unique ID to obj, and track all ids in obj_by_id."""

     if obj not in seen_objs:

         if not hasattr(obj, 'id'):

             obj_by_id.append(obj)

             obj.id = len(obj_by_id)

         seen_objs[obj] = obj.id

     else:

         obj.id = seen_objs[obj]


 class _Component:

     """An mmCIF component. This is an instance of an _Entity. Multiple

        _Components may map to the same _Entity but must have unique

        asym_ids. A _Component is similar to an IMP Chain but multiple

        Chains may map to the same _Component (the Chains represent the

        same structure, just in different states, and potentially in

        different IMP Models). A _Component may also represent something

        that is described by an experiment but was not modeled by IMP, and

        so no Chains map to it but a string name does."""

     def __init__(self, entity, asym_id, name):

         self.entity, self.asym_id, self.name = entity, asym_id, name


 class _ComponentMapper:

     """Handle mapping from IMP Chains to CIF AsymUnits."""

     def __init__(self, system):

         super().__init__()

         self.system = system

         self._used_entities = set()

         self._all_components = []

         self._map = {}


     def __getitem__(self, chain):

         asym_id, map_key, name = self._handle_chain(chain)

         return self._map[map_key]


     def _handle_chain(self, chain):

         mol = get_molecule(chain)

         asym_id = chain.get_id()

         name = mol.get_name() if mol else None

         # Avoid conflict between name="A" and asym_id="A"

         if name:

             map_key = "name", name

         else:

             map_key = "asym_id", asym_id

         return asym_id, map_key, name


     def add(self, chain, entity, offset):

         """Add a chain (an IMP Chain object)"""

         asym_id, map_key, name = self._handle_chain(chain)

         if map_key not in self._map:

             component = _Component(entity, asym_id, name)

             if entity not in self._used_entities:

                 self._used_entities.add(entity)

                 # Assign entity name from the component; strip out anything

                 # after a @ or .

                 if component.name:

                     entity.description = \

                         component.name.split("@")[0].split(".")[0]

             self._all_components.append(component)

             asym = ihm.AsymUnit(entity, name, id=asym_id,

                                 auth_seq_id_map=offset)

             self.system.asym_units.append(asym)

             component.asym_unit = asym

             self._map[map_key] = component

         else:

             component = self._map[map_key]

             if component.entity != entity:

                 raise ValueError("Two chains have the same ID (%s) but "

                                  "different sequences - rename one of the "

                                  "chains" % component.asym_unit.id)

             if component.asym_unit.auth_seq_id_map != offset:

                 raise ValueError(

                     "Two chains have the same ID (%s) but different offsets "

                     "(%d, %d) - this is not supported"

                     % (component.asym_unit.id,

                        component.asym_unit.auth_seq_id_map, offset))

         return component


     def get_all(self):

         """Get all components"""

         return self._all_components


 class _RepSegmentFactory:

     """Make ihm.representation.Segment objects for each set of contiguous

        particles with the same representation"""

     def __init__(self, asym):

         self.asym = asym

         # Offset from IHM to IMP numbering

         self.offset = asym.auth_seq_id_map

         self.particles = []

         self.imp_residue_range = ()  # inclusive range, using IMP numbering


     def add(self, particle, starting_model):

         """Add a new particle to the last segment (and return None).

            Iff the particle could not be added, return the segment and start

            a new one."""

         (resrange, rigid_body,

          is_res, is_atom) = self._get_particle_info(particle)


         def start_new_segment():

             self.particles = [particle]

             self.imp_residue_range = resrange

             self.rigid_body = rigid_body

             self.is_res = is_res

             self.is_atom = is_atom

             self.starting_model = starting_model

         if not self.particles:

             # First particle in a segment

             start_new_segment()

         elif (type(particle) == type(self.particles[0])  # noqa: E721

               and is_res == self.is_res

               and is_atom == self.is_atom

               and resrange[0] <= self.imp_residue_range[1] + 1

               and starting_model == self.starting_model

               and self._same_rigid_body(rigid_body)):

             # Continue an existing segment

             self.particles.append(particle)

             self.imp_residue_range = (self.imp_residue_range[0], resrange[1])

         else:

             # Make a new segment

             seg = self.get_last()

             start_new_segment()

             return seg


     def get_last(self):

         """Return the last segment, or None"""

         if self.particles:

             # Convert residue_range from IMP to IHM

             asym = self.asym(self.imp_residue_range[0] - self.offset,

                              self.imp_residue_range[1] - self.offset)

             if self.is_atom:

                 return ihm.representation.AtomicSegment(

                         asym_unit=asym, rigid=self.rigid_body is not None,

                         starting_model=self.starting_model)

             elif self.is_res:

                 return ihm.representation.ResidueSegment(

                         asym_unit=asym,

                         rigid=self.rigid_body is not None, primitive='sphere',

                         starting_model=self.starting_model)

             else:

                 return ihm.representation.FeatureSegment(

                         asym_unit=asym,

                         rigid=self.rigid_body is not None, primitive='sphere',

                         count=len(self.particles),

                         starting_model=self.starting_model)


     def _same_rigid_body(self, rigid_body):

         # Note: can't just use self.rigid_body == rigid_body as IMP may

         # crash when comparing a RigidBody object against None

         if self.rigid_body is None and rigid_body is None:

             return True

         elif self.rigid_body is None or rigid_body is None:

             return False

         else:

             return self.rigid_body == rigid_body


     def _get_particle_info(self, p):

         # Note that we consider nonrigid members to not be rigid here

         if IMP.core.RigidMember.get_is_setup(p):

             rigid_body = IMP.core.RigidMember(p).get_rigid_body()

         else:

             rigid_body = None

         if isinstance(p, IMP.atom.Residue):

             return (p.get_index(), p.get_index()), rigid_body, True, False

         elif isinstance(p, IMP.atom.Atom):

             res = IMP.atom.get_residue(p)

             return (res.get_index(), res.get_index()), rigid_body, False, True

         elif isinstance(p, IMP.atom.Fragment):

             resinds = p.get_residue_indexes()

             return (resinds[0], resinds[-1]), rigid_body, False, False

         raise TypeError("Unknown particle ", p)


 def _get_all_structure_provenance(p):

     """Yield all StructureProvenance decorators for the given particle."""

     return IMP.core.get_all_provenance(p, types=[IMP.core.StructureProvenance])


 class _StartingModelAtomHandler:

     def __init__(self, templates, asym):

         self._seq_dif = []

         self._last_res_index = None

         self.templates = templates

         self.asym = asym


     def _residue_first_atom(self, res):

         """Return True iff we're looking at the first atom in this residue"""

         # Only add one seq_dif record per residue

         ind = res.get_index()

         if ind != self._last_res_index:

             self._last_res_index = ind

             return True


     def handle_residue(self, res, comp_id, seq_id, offset):

         res_name = res.get_residue_type().get_string()

         # MSE in the original PDB is automatically mutated

         # by IMP to MET, so reflect that in the output,

         # and pass back to populate the seq_dif category.

         if res_name == 'MSE' and comp_id == 'MET':

             if self._residue_first_atom(res):

                 # This should only happen when we're using

                 # a crystal structure as the source (a

                 # comparative model would use MET in

                 # the sequence)

                 assert len(self.templates) == 0

                 self._seq_dif.append(ihm.startmodel.MSESeqDif(

                             res.get_index(), seq_id))

         elif res_name != comp_id:

             if self._residue_first_atom(res):

                 print("WARNING: Starting model residue %s does not match "

                       "that in the output model (%s) for chain %s residue %d. "

                       "Check offset (currently %d)."

                       % (res_name, comp_id, self.asym._id, seq_id, offset))

                 self._seq_dif.append(ihm.startmodel.SeqDif(

                          db_seq_id=res.get_index(), seq_id=seq_id,

                          db_comp_id=res_name,

                          details="Mutation of %s to %s" % (res_name, comp_id)))


     def get_ihm_atoms(self, particles, offset):

         for a in particles:

             coord = IMP.core.XYZ(a).get_coordinates()

             atom = IMP.atom.Atom(a)

             element = atom.get_element()

             element = IMP.atom.get_element_table().get_name(element)

             atom_name = atom.get_atom_type().get_string()

             het = atom_name.startswith('HET:')

             if het:

                 atom_name = atom_name[4:]

             res = IMP.atom.get_residue(atom)


             seq_id = res.get_index() + offset

             comp_id = self.asym.entity.sequence[seq_id-1].id

             self.handle_residue(res, comp_id, seq_id, offset)

             yield ihm.model.Atom(asym_unit=self.asym, seq_id=seq_id,

                                  atom_id=atom_name, type_symbol=element,

                                  x=coord[0], y=coord[1], z=coord[2],

                                  het=het, biso=atom.get_temperature_factor())


 class _StartingModel(ihm.startmodel.StartingModel):

     _eq_keys = ['filename', 'asym_id', 'offset']


     def __init__(self, asym_unit, struc_prov):

         self.filename = struc_prov[0].get_filename()

         super().__init__(

             asym_unit=asym_unit(0, 0),  # will update in _add_residue()

             # will fill in later with _set_sources_datasets()

             dataset=None,

             asym_id=struc_prov[0].get_chain_id(),

             offset=struc_prov[0].get_residue_offset())


     def _add_residue(self, resind):

         # Update seq_id_range to accommodate this residue

         seq_id_end = resind

         seq_id_begin = self.asym_unit.seq_id_range[0]

         if seq_id_begin == 0:

             seq_id_begin = seq_id_end

         self.asym_unit = self.asym_unit.asym(seq_id_begin, seq_id_end)


     # Two starting models with same filename, chain ID, and offset

     # compare identical

     # note: this results in separate starting models if only the

     # offset differs; maybe consolidate into one?

     def _eq_vals(self):

         return tuple([self.__class__]

                      + [getattr(self, x) for x in self._eq_keys])


     def __eq__(self, other):

         return other is not None and self._eq_vals() == other._eq_vals()


     def __hash__(self):

         return hash(self._eq_vals())


     def _set_sources_datasets(self, system, datasets):

         # Attempt to identify PDB file vs. comparative model

         if (hasattr(ihm.metadata, 'CIFParser')

                 and self.filename.endswith('.cif')):

             p = ihm.metadata.CIFParser()

         else:

             p = ihm.metadata.PDBParser()

         r = p.parse_file(self.filename)

         system.software.extend(r.get('software', []))

         dataset = datasets.add(r['dataset'])

         # We only want the templates that model the starting model chain

         templates = r.get('templates', {}).get(self.asym_id, [])

         for t in templates:

             if t.alignment_file:

                 system.locations.append(t.alignment_file)

             if t.dataset:

                 datasets.add(t.dataset)

         self.dataset = dataset

         self.templates = templates

         self.metadata = r.get('metadata', [])


     def _read_coords(self):

         """Read the coordinates for this starting model"""

         m = IMP.Model()

         # todo: support reading other subsets of the atoms (e.g. CA/CB)

         slt = IMP.atom.ChainPDBSelector([self.asym_id]) \

             & IMP.atom.NonWaterNonHydrogenPDBSelector()

         hier = IMP.atom.read_pdb_or_mmcif(self.filename, m, slt)

         rng = self.asym_unit.seq_id_range

         sel = IMP.atom.Selection(

             hier, residue_indexes=list(range(rng[0] - self.offset,

                                              rng[1] + 1 - self.offset)))

         return m, sel


     def get_seq_dif(self):

         return self._seq_dif  # filled in by get_atoms()


     def get_atoms(self):

         mh = _StartingModelAtomHandler(self.templates, self.asym_unit)

         m, sel = self._read_coords()

         for a in mh.get_ihm_atoms(sel.get_selected_particles(), self.offset):

             yield a

         self._seq_dif = mh._seq_dif


 class _StartingModelFinder:

     """Map IMP particles to starting model objects"""

     def __init__(self, asym, existing_starting_models, system, datasets):

         self._seen_particles = {}

         self._asym = asym

         self._seen_starting_models = {}

         for sm in existing_starting_models:

             self._seen_starting_models[sm] = sm

         self._system = system

         self._datasets = datasets


     def find(self, particle):

         """Return a StartingModel object, or None, for this particle"""

         def _get_starting_model(sp, resind):

             s = _StartingModel(self._asym, sp)

             if s not in self._seen_starting_models:

                 self._seen_starting_models[s] = s

                 s._set_sources_datasets(self._system, self._datasets)

                 self._system.orphan_starting_models.append(s)

             s = self._seen_starting_models[s]

             if s:

                 s._add_residue(resind)

             return s

         resind = None

         if IMP.atom.Residue.get_is_setup(particle):

             resind = IMP.atom.Residue(particle).get_index()

         sp = list(_get_all_structure_provenance(particle))

         if sp:

             return _get_starting_model(sp, resind)

         elif IMP.atom.Hierarchy.get_is_setup(particle):

             h = IMP.atom.Hierarchy(particle).get_parent()

             # Remember all nodes we inspect

             seen_parents = []

             while h:

                 if IMP.atom.Residue.get_is_setup(h):

                     resind = IMP.atom.Residue(h).get_index()

                 pi = h.get_particle_index()

                 seen_parents.append(pi)

                 # If we inspected this node before, return the cached result

                 if pi in self._seen_particles:

                     sp = self._seen_particles[pi]

                     if sp and sp[0] and resind is not None:

                         sp[0]._add_residue(resind)

                     return sp[0] if sp else None

                 else:

                     sp = list(_get_all_structure_provenance(h))

                     self._seen_particles[pi] = []

                     if sp:

                         s = _get_starting_model(sp, resind)

                         # Set cache for this node and all the children we

                         # inspected on the way up

                         for spi in seen_parents:

                             self._seen_particles[spi].append(s)

                         return s

                 h = h.get_parent()


 class _Datasets:

     """Store all datasets used."""

     def __init__(self, system):

         super().__init__()

         self._datasets = {}

         self._groups = {}

         self.system = system


     def add(self, d):

         """Add and return a new dataset."""

         if d not in self._datasets:

             self._datasets[d] = d

             self.system.orphan_datasets.append(d)

         return self._datasets[d]


     def add_group(self, datasets, name):

         """Add and return a new group of datasets"""

         seen = set()

         # Remove duplicates

         d = []

         for dataset in datasets:

             if dataset not in seen:

                 d.append(dataset)

                 seen.add(dataset)

         d = tuple(d)

         if d not in self._groups:

             g = ihm.dataset.DatasetGroup(d, name=name)

             self._groups[d] = g

             self.system.orphan_dataset_groups.append(g)

         return self._groups[d]


     def get_all(self):

         """Yield all datasets"""

         return self._datasets.keys()


 class _AllSoftware:

     """Keep track of all Software objects."""


     # IMP/RMF doesn't store citation info for software, so provide it

     # for known software packages

     cites = {'Integrative Modeling Platform (IMP)': ihm.citations.imp,

              'IMP PMI module': ihm.citations.pmi}


     def __init__(self, system):

         self.system = system

         self._by_namever = {}

         super().__init__()


     def add_hierarchy(self, h, top_h=None):

         # todo: if no SoftwareProvenance available, use RMF producer field

         for p in _get_all_state_provenance(

                 h, top_h, types=[IMP.core.SoftwareProvenance]):

             self._add_provenance(p)


     def _add_provenance(self, p):

         """Add Software from SoftwareProvenance"""

         # Only reference the same version of a given software package once

         name = p.get_software_name()

         version = p.get_version()

         if (name, version) not in self._by_namever:

             s = ihm.Software(name=name,

                              classification='integrative model building',

                              description=None, version=version,

                              location=p.get_location(),

                              citation=self.cites.get(name))

             self.system.software.append(s)

             self._by_namever[name, version] = s

         return self._by_namever[name, version]


     def _add_previous_provenance(self, prov):

         """Add Software from a previous SoftwareProvenance, if any"""

         while prov:

             if IMP.core.SoftwareProvenance.get_is_setup(prov):

                 return self._add_provenance(IMP.core.SoftwareProvenance(prov))

             prov = prov.get_previous()


 class _ExternalFiles:

     """Track all externally-referenced files

        (i.e. anything that refers to a Location that isn't

        a DatabaseLocation)."""

     def __init__(self, system):

         self.system = system

         self._by_path = {}


     def add_hierarchy(self, h, top_h=None):

         # Add all Python scripts that were used in the modeling

         for p in _get_all_state_provenance(

                 h, top_h, types=[IMP.core.ScriptProvenance]):

             self._add_provenance(p)


     def _add_provenance(self, p):

         """Add external file from ScriptProvenance"""

         # Only reference the same path once

         path = p.get_filename()

         if path not in self._by_path:

             loc = ihm.location.WorkflowFileLocation(

                 path=p.get_filename(),

                 details='Integrative modeling Python script')

             self.system.locations.append(loc)

             self._by_path[path] = loc

         return self._by_path[path]


 class _ProtocolStep(ihm.protocol.Step):

     """A single step (e.g. sampling, refinement) in a protocol."""

     def __init__(self, prov, num_models_begin, assembly, all_software):

         method = prov.get_method()

         if prov.get_number_of_replicas() > 1:

             method = "Replica exchange " + method

         super().__init__(

             assembly=assembly,

             # todo: fill in useful value for dataset_group

             dataset_group=None,

             method=method, name='Sampling',

             num_models_begin=num_models_begin,

             num_models_end=prov.get_number_of_frames(),

             # todo: support multiple states, time ordered

             multi_state=False, ordered=False,

             # todo: revisit assumption all models are multiscale

             multi_scale=True,

             software=all_software._add_previous_provenance(prov))


     def add_combine(self, prov):

         self.num_models_end = prov.get_number_of_frames()

         return self.num_models_end


 class _Protocol(ihm.protocol.Protocol):

     """A modeling protocol.

        Each protocol consists of a number of protocol steps (e.g. sampling,

        refinement) followed by a number of postprocessing steps (e.g.

        filtering, rescoring, clustering)"""


     def add_step(self, prov, num_models, assembly, all_software):

         if isinstance(prov, IMP.core.CombineProvenance):

             # Fold CombineProvenance into a previous sampling step

             if len(self.steps) == 0:

                 raise ValueError("CombineProvenance with no previous sampling")

             return self.steps[-1].add_combine(prov)

         else:

             ps = _ProtocolStep(prov, num_models, assembly, all_software)

             self.steps.append(ps)

             return ps.num_models_end


     def add_postproc(self, prov, num_models, assembly):

         if not self.analyses:

             self.analyses.append(ihm.analysis.Analysis())

         if isinstance(prov, IMP.core.FilterProvenance):

             pp = ihm.analysis.FilterStep(

                 feature='energy/score', assembly=assembly,

                 num_models_begin=num_models,

                 num_models_end=prov.get_number_of_frames())

         elif isinstance(prov, IMP.core.ClusterProvenance):

             # Assume clustering uses all models

             pp = ihm.analysis.ClusterStep(

                 feature='RMSD', assembly=assembly, num_models_begin=num_models,

                 num_models_end=num_models)

         else:

             raise ValueError("Unhandled provenance", prov)

         self.analyses[-1].steps.append(pp)

         return pp.num_models_end


 class _Protocols:

     """Track all modeling protocols used."""

     def __init__(self, system):

         self.system = system


     def _add_protocol(self, prot):

         # Protocol isn't hashable or sortable, so just compare dicts

         # with existing protocols. This should still be performant as

         # we generally don't have more than one or two protocols.

         # We exclude dataset_group from the comparison as this is typically

         # filled in later.

         def step_equal(x, y):

             def get_dict(d):

                 return {x: y for x, y in d.__dict__.items()

                         if x != 'dataset_group'}


             return (type(x) == type(y)  # noqa: E721

                     and get_dict(x) == get_dict(y))


         def analysis_equal(x, y):

             return (len(x.steps) == len(y.steps)

                     and all(step_equal(a, b)

                             for (a, b) in zip(x.steps, y.steps)))


         for existing in self.system.orphan_protocols:

             if (len(existing.steps) == len(prot.steps)

                 and len(existing.analyses) == len(prot.analyses)

                 and all(step_equal(x, y)

                         for (x, y) in zip(existing.steps, prot.steps))

                 and all(analysis_equal(x, y)

                         for (x, y) in zip(existing.analyses, prot.analyses))):

                 return existing

         self.system.orphan_protocols.append(prot)

         return prot


     def _add_hierarchy(self, h, top_h, modeled_assembly, all_software):

         num_models = 0  # assume we always start with no models

         prot_types = (IMP.core.SampleProvenance, IMP.core.CombineProvenance)

         pp_types = (IMP.core.FilterProvenance, IMP.core.ClusterProvenance)

         in_postproc = False

         prot = _Protocol()

         for p in reversed(list(_get_all_state_provenance(

                 h, top_h, types=prot_types + pp_types))):

             if isinstance(p, pp_types):

                 num_models = prot.add_postproc(p, num_models, modeled_assembly)

                 in_postproc = True

             else:

                 if in_postproc:

                     # Start a new protocol

                     self._add_protocol(prot)

                     prot = _Protocol()

                 num_models = prot.add_step(p, num_models, modeled_assembly,

                                            all_software)

                 in_postproc = False

         if len(prot.steps) > 0:

             return self._add_protocol(prot)


 class _CoordinateHandler:

     def __init__(self, system, datasets):

         self._system = system

         self._datasets = datasets

         self._representation = ihm.representation.Representation()

         # IHM atoms/spheres corresponding to IMP beads/residues/atoms

         # We build them up front (rather than on the fly) as the original

         # IMP objects may have been destroyed or changed (e.g. if we read

         # multiple frames from an RMF file) by the time we write the mmCIF.

         self._atoms = []

         self._spheres = []


     def get_residue_sequence(self, ps):

         """Determine the primary sequence based on Residue particles.

            Return the index of the first residue and the sequence, as a list

            of ihm.ChemComp objects (or None)"""

         restyp = {}

         for p in ps:

             if isinstance(p, IMP.atom.Atom):

                 residue = IMP.atom.get_residue(p)

                 restyp[residue.get_index()] = residue.get_residue_type()

             elif isinstance(p, IMP.atom.Residue):

                 restyp[p.get_index()] = p.get_residue_type()

             else:  # fragment

                 resinds = p.get_residue_indexes()

                 for ri in resinds:

                     if ri not in restyp:   # don't overwrite residue/atom info

                         restyp[ri] = None

         if not restyp:

             return 1, []

         seq_id_begin = min(restyp.keys())

         seq_id_end = max(restyp.keys())

         return (seq_id_begin,

                 [_imp_to_ihm[restyp.get(x)]

                  for x in range(seq_id_begin, seq_id_end + 1)])


     def add_chain(self, ps, asym):

         def matches_asym(s):

             # Match AsymUnit or AsymUnitRange

             return s == asym or hasattr(s, 'asym') and s.asym == asym


         # Consolidate starting models if the same model was used for this

         # asym in a different state or for a different model_id

         smf = _StartingModelFinder(

             asym, [s for s in self._system.orphan_starting_models

                    if matches_asym(s.asym_unit)],

             self._system, self._datasets)

         segfactory = _RepSegmentFactory(asym)

         offset = asym.auth_seq_id_map

         for p in ps:

             starting_model = smf.find(p)

             seg = segfactory.add(p, starting_model)

             if seg:

                 self._representation.append(seg)

             self._add_atom_or_sphere(p, asym, offset)

         last = segfactory.get_last()

         if last:

             self._representation.append(last)


     def _add_atom_or_sphere(self, p, asym, offset):

         if isinstance(p, IMP.atom.Atom):

             residue = IMP.atom.get_residue(p)

             xyz = IMP.core.XYZ(p).get_coordinates()

             element = p.get_element()

             element = IMP.atom.get_element_table().get_name(element)

             atom_name = p.get_atom_type().get_string()

             het = atom_name.startswith('HET:')

             if het:

                 atom_name = atom_name[4:]

             self._atoms.append(ihm.model.Atom(

                 asym_unit=asym, seq_id=residue.get_index() - offset,

                 atom_id=atom_name, type_symbol=element,

                 x=xyz[0], y=xyz[1], z=xyz[2], het=het,

                 biso=p.get_temperature_factor(),

                 occupancy=p.get_occupancy()))

         else:

             if isinstance(p, IMP.atom.Fragment):

                 resinds = p.get_residue_indexes()

                 sbegin = resinds[0]

                 send = resinds[-1]

             else:  # residue

                 sbegin = send = p.get_index()

             xyzr = IMP.core.XYZR(p)

             xyz = xyzr.get_coordinates()

             self._spheres.append(ihm.model.Sphere(

                 asym_unit=asym, seq_id_range=(sbegin - offset, send - offset),

                 x=xyz[0], y=xyz[1], z=xyz[2], radius=xyzr.get_radius()))


     def get_structure_particles(self, h):

         """Return particles sorted by residue index"""

         ps = []

         if h.get_number_of_children() == 0:

             return []

         if not h.get_is_valid():

             raise ValueError("Invalid hierarchy as input")

         for p in IMP.atom.Selection(

                 hierarchy=h, resolution=0.).get_selected_particles():

             if IMP.atom.Residue.get_is_setup(p):

                 residue = IMP.atom.Residue(p)

                 ps.append((residue.get_index(), residue))

             elif IMP.atom.Fragment.get_is_setup(p):

                 fragment = IMP.atom.Fragment(p)

                 resinds = fragment.get_residue_indexes()

                 _check_sequential(fragment, resinds)

                 resind = resinds[len(resinds) // 2]

                 ps.append((resind, fragment))

             elif IMP.atom.Atom.get_is_setup(p):

                 atom = IMP.atom.Atom(p)

                 residue = IMP.atom.get_residue(atom)

                 ps.append((residue.get_index(), atom))

         return [p[1] for p in sorted(ps, key=operator.itemgetter(0))]


 class _ModelAssemblies:

     def __init__(self, system):

         self.system = system

         self._seen_assemblies = {}


     def add(self, asyms):

         # list isn't hashable but tuple is

         asyms = tuple(asyms)

         if asyms not in self._seen_assemblies:

             assembly = ihm.Assembly(

                 asyms, name="Modeled assembly",

                 description="All components modeled by IMP")

             self.system.orphan_assemblies.append(assembly)

             self._seen_assemblies[asyms] = assembly

         return self._seen_assemblies[asyms]


 class _Representations:

     def __init__(self, system):

         self.system = system


     def add(self, rep):

         # Representation isn't hashable or sortable, so just compare dicts

         # with existing representations. This should still be performant as

         # we generally don't have more than one or two representations.

         for existing in self.system.orphan_representations:

             if (len(existing) == len(rep)

                 and all(type(x) == type(y)  # noqa: E721

                         and x.__dict__ == y.__dict__

                         for (x, y) in zip(existing, rep))):

                 return existing

         self.system.orphan_representations.append(rep)

         return rep

IMP::atom::NonWaterNonHydrogenPDBSelector
Select non water and non hydrogen atoms.
Definition: pdb.h:314

IMP::atom::Molecule::get_is_setup
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: Molecule.h:35

IMP::atom::read_pdb_or_mmcif
Hierarchy read_pdb_or_mmcif(TextInput input, Model *model, PDBSelector *selector=get_default_pdb_selector(), bool select_first_model=true)
Read all the molecules in the first model of the PDB or mmCIF file.
Definition: mmcif.h:40

IMP::atom::Residue::get_is_setup
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: Residue.h:158

IMP::atom::Fragment
A decorator to associate a particle with a part of a protein/DNA/RNA.
Definition: Fragment.h:20

IMP::core::SoftwareProvenance
Track creation of a system fragment from running some software.
Definition: provenance.h:562

IMP::atom::Atom::get_is_setup
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: atom/Atom.h:245

IMP::atom::get_element_table
ElementTable & get_element_table()

IMP::core::SampleProvenance
Track creation of a system fragment from sampling.
Definition: provenance.h:173

IMP::core::RigidMember
Definition: rigid_bodies.h:747

IMP::mmcif.data.get_molecule
def get_molecule
Given a Hierarchy, walk up and find the parent Molecule.
Definition: data.py:43

IMP::core::RigidMember::get_is_setup
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: rigid_bodies.h:749

IMP::core::CombineProvenance
Track creation of a system fragment by combination.
Definition: provenance.h:280

IMP::Model
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:86

IMP::atom::Fragment::get_is_setup
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Fragment.h:46

IMP::rmf::add_hierarchy
void add_hierarchy(RMF::FileHandle fh, atom::Hierarchy hs)

IMP::atom::Hierarchy
The standard decorator for manipulating molecular structures.
Definition: atom/Hierarchy.h:192

IMP::domino::get_index
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)

IMP::atom::Atom
A decorator for a particle representing an atom.
Definition: atom/Atom.h:238

IMP::core::FilterProvenance
Track creation of a system fragment by filtering.
Definition: provenance.h:340

IMP::atom::ResidueType
The type for a residue.

IMP::core::StructureProvenance
Track creation of a system fragment from a PDB file.
Definition: provenance.h:86

IMP::core::XYZ
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30

IMP::atom::Residue
A decorator for a residue.
Definition: Residue.h:137

IMP::atom::Hierarchy::get_is_setup
static bool get_is_setup(Model *m, ParticleIndex p)
Check if the particle has the needed attributes for a cast to succeed.
Definition: atom/Hierarchy.h:255

IMP::atom::get_residue
Residue get_residue(Atom d, bool nothrow=false)
Return the Residue containing this atom.

IMP::core::ClusterProvenance
Track creation of a system fragment from clustering.
Definition: provenance.h:426

IMP::core::SoftwareProvenance::get_is_setup
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: provenance.h:584

IMP::atom
Functionality for loading, creating, manipulating and scoring atomic structures.

IMP::atom::get_chain_id
std::string get_chain_id(Hierarchy h)
Walk up the hierarchy to determine the chain id.

IMP::core.get_all_provenance
def get_all_provenance
Yield all provenance decorators of the given types for the particle.
Definition: core/__init__.py:15786

IMP::atom::Molecule
A decorator for a molecule.
Definition: Molecule.h:24

IMP::atom::Selection
Select hierarchy particles identified by the biological name.
Definition: Selection.h:70

IMP::atom::ChainPDBSelector
Select all ATOM and HETATM records with the given chain ids.
Definition: pdb.h:256

IMP::core::ScriptProvenance
Track creation of a system fragment from running a script.
Definition: provenance.h:511

IMP::core::XYZR
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27