1 """@namespace IMP.mmcif.data
2 @brief Classes to represent data structures used in mmCIF.
5 from __future__
import print_function
17 """Given a Hierarchy, walk up and find the parent Molecule"""
25 class _EntityMapper(dict):
26 """Handle mapping from IMP chains to CIF entities.
27 Multiple components may map to the same entity if they
29 def __init__(self, system):
31 super(_EntityMapper, self).__init__()
32 self._sequence_dict = {}
37 sequence = chain.get_sequence()
39 raise ValueError(
"Chain %s has no sequence" % chain)
40 if sequence
not in self._sequence_dict:
41 entity = ihm.Entity(sequence)
42 self.system.entities.append(entity)
43 self._entities.append(entity)
44 self._sequence_dict[sequence] = entity
45 self[chain] = self._sequence_dict[sequence]
49 """Yield all entities"""
53 def _assign_id(obj, seen_objs, obj_by_id):
54 """Assign a unique ID to obj, and track all ids in obj_by_id."""
55 if obj
not in seen_objs:
56 if not hasattr(obj,
'id'):
58 obj.id = len(obj_by_id)
59 seen_objs[obj] = obj.id
61 obj.id = seen_objs[obj]
64 class _Component(object):
65 """An mmCIF component. This is an instance of an _Entity. Multiple
66 _Components may map to the same _Entity but must have unique
67 asym_ids. A _Component is similar to an IMP Chain but multiple
68 Chains may map to the same _Component (the Chains represent the
69 same structure, just in different states, and potentially in
70 different IMP Models). A _Component may also represent something
71 that is described by an experiment but was not modeled by IMP, and
72 so no Chains map to it but a string name does."""
73 def __init__(self, entity, asym_id, name):
74 self.entity, self.asym_id, self.name = entity, asym_id, name
77 class _ComponentMapper(object):
78 """Handle mapping from chains to CIF components."""
79 def __init__(self, system):
80 super(_ComponentMapper, self).__init__()
82 self._used_entities = set()
83 self._all_components = []
84 self._all_modeled_components = []
87 def __getitem__(self, chain):
88 modeled, asym_id, map_key, name = self._handle_chain(chain)
89 return self._map[map_key]
91 def _handle_chain(self, chain):
95 asym_id = map_key = chain.get_id()
96 name = mol.get_name()
if mol
else None
100 name = map_key = chain.name
101 return modeled, asym_id, map_key, name
103 def add(self, chain, entity):
104 """Add a chain (either an IMP Chain object for a modeled component,
105 or a NonModeledChain object for a non-modeled component)"""
106 modeled, asym_id, map_key, name = self._handle_chain(chain)
107 if map_key
not in self._map:
108 component = _Component(entity, asym_id, name)
109 if entity
not in self._used_entities:
110 self._used_entities.add(entity)
113 entity.description = component.name.split(
"@")[0].split(
".")[0]
114 self._all_components.append(component)
116 asym = ihm.AsymUnit(entity, name, id=asym_id)
117 self.system.asym_units.append(asym)
118 component.asym_unit = asym
119 self._all_modeled_components.append(component)
120 self._map[map_key] = component
122 component = self._map[map_key]
123 if component.entity != entity:
124 raise ValueError(
"Two chains have the same ID (%s) but "
125 "different sequences - rename one of the "
130 """Get all components"""
131 return self._all_components
133 def get_all_modeled(self):
134 """Get all modeled components"""
135 return self._all_modeled_components
138 class _RepSegmentFactory(object):
139 """Make ihm.representation.Segment objects for each set of contiguous
140 particles with the same representation"""
141 def __init__(self, component):
142 self.component = component
144 self.residue_range = ()
146 def add(self, particle, starting_model):
147 """Add a new particle to the last segment (and return None).
148 Iff the particle could not be added, return the segment and start
150 resrange, rigid_body, is_res = self._get_particle_info(particle)
152 def start_new_segment():
153 self.particles = [particle]
154 self.residue_range = resrange
155 self.rigid_body = rigid_body
157 self.starting_model = starting_model
158 if not self.particles:
161 elif (type(particle) == type(self.particles[0])
162 and is_res == self.is_res
163 and resrange[0] == self.residue_range[1] + 1
164 and starting_model == self.starting_model
165 and self._same_rigid_body(rigid_body)):
167 self.particles.append(particle)
168 self.residue_range = (self.residue_range[0], resrange[1])
171 seg = self.get_last()
176 """Return the last segment, or None"""
178 asym = self.component.asym_unit(*self.residue_range)
180 return ihm.representation.ResidueSegment(
182 rigid=self.rigid_body
is not None, primitive=
'sphere',
183 starting_model=self.starting_model)
185 return ihm.representation.FeatureSegment(
187 rigid=self.rigid_body
is not None, primitive=
'sphere',
188 count=len(self.particles),
189 starting_model=self.starting_model)
191 def _same_rigid_body(self, rigid_body):
194 if self.rigid_body
is None and rigid_body
is None:
196 elif self.rigid_body
is None or rigid_body
is None:
199 return self.rigid_body == rigid_body
201 def _get_particle_info(self, p):
208 return (p.get_index(), p.get_index()), rigid_body,
True
210 resinds = p.get_residue_indexes()
211 return (resinds[0], resinds[-1]), rigid_body,
False
212 raise TypeError(
"Unknown particle ", p)
215 def _get_all_structure_provenance(p):
216 """Yield all StructureProvenance decorators for the given particle."""
220 class _StartingModelAtomHandler(object):
221 def __init__(self, templates, asym):
223 self._last_res_index =
None
224 self.templates = templates
227 def _residue_first_atom(self, res):
228 """Return True iff we're looking at the first atom in this residue"""
230 ind = res.get_index()
231 if ind != self._last_res_index:
232 self._last_res_index = ind
235 def handle_residue(self, res, comp_id, seq_id, offset):
236 res_name = res.get_residue_type().get_string()
240 if res_name ==
'MSE' and comp_id ==
'MET':
241 if self._residue_first_atom(res):
246 assert len(self.templates) == 0
247 self._seq_dif.append(ihm.startmodel.MSESeqDif(
248 res.get_index(), seq_id))
249 elif res_name != comp_id:
250 if self._residue_first_atom(res):
251 print(
"WARNING: Starting model residue %s does not match "
252 "that in the output model (%s) for chain %s residue %d. "
253 "Check offset (currently %d)."
254 % (res_name, comp_id, self.asym._id, seq_id, offset))
255 self._seq_dif.append(ihm.startmodel.SeqDif(
256 db_seq_id=res.get_index(), seq_id=seq_id,
258 details=
"Mutation of %s to %s" % (res_name, comp_id)))
260 def get_ihm_atoms(self, particles, offset):
264 element = atom.get_element()
266 atom_name = atom.get_atom_type().get_string()
267 het = atom_name.startswith(
'HET:')
269 atom_name = atom_name[4:]
272 seq_id = res.get_index() + offset
273 comp_id = self.asym.entity.sequence[seq_id-1].id
274 self.handle_residue(res, comp_id, seq_id, offset)
275 yield ihm.model.Atom(asym_unit=self.asym, seq_id=seq_id,
276 atom_id=atom_name, type_symbol=element,
277 x=coord[0], y=coord[1], z=coord[2],
278 het=het, biso=atom.get_temperature_factor())
281 class _StartingModel(ihm.startmodel.StartingModel):
282 _eq_keys = [
'filename',
'asym_id',
'offset']
284 def __init__(self, asym_unit, struc_prov):
285 self.filename = struc_prov[0].get_filename()
286 super(_StartingModel, self).__init__(
287 asym_unit=asym_unit(0, 0),
291 offset=struc_prov[0].get_residue_offset())
293 def _add_residue(self, resind):
295 seq_id_end = resind + self.offset
296 seq_id_begin = self.asym_unit.seq_id_range[0]
297 if seq_id_begin == 0:
298 seq_id_begin = seq_id_end
299 self.asym_unit = self.asym_unit.asym(seq_id_begin, seq_id_end)
306 return tuple([self.__class__]
307 + [getattr(self, x)
for x
in self._eq_keys])
309 def __eq__(self, other):
310 return other
is not None and self._eq_vals() == other._eq_vals()
313 return hash(self._eq_vals())
315 def _set_sources_datasets(self, system):
317 p = ihm.metadata.PDBParser()
318 r = p.parse_file(self.filename)
319 system.system.software.extend(r[
'software'])
320 dataset = system.datasets.add(r[
'dataset'])
322 templates = r[
'templates'].get(self.asym_id, [])
325 system.system.locations.append(t.alignment_file)
327 system.datasets.add(t.dataset)
328 self.dataset = dataset
329 self.templates = templates
330 self.metadata = r[
'metadata']
332 def _read_coords(self):
333 """Read the coordinates for this starting model"""
339 rng = self.asym_unit.seq_id_range
341 hier, residue_indexes=list(range(rng[0] - self.offset,
342 rng[1] + 1 - self.offset)))
345 def get_seq_dif(self):
349 mh = _StartingModelAtomHandler(self.templates, self.asym_unit)
350 m, sel = self._read_coords()
351 for a
in mh.get_ihm_atoms(sel.get_selected_particles(), self.offset):
353 self._seq_dif = mh._seq_dif
356 class _StartingModelFinder(object):
357 """Map IMP particles to starting model objects"""
358 def __init__(self, component, existing_starting_models):
359 self._seen_particles = {}
360 self._component = component
361 self._seen_starting_models = dict.fromkeys(existing_starting_models)
363 def find(self, particle, system):
364 """Return a StartingModel object, or None, for this particle"""
365 def _get_starting_model(sp, resind):
366 s = _StartingModel(self._component.asym_unit, sp)
367 if s
not in self._seen_starting_models:
368 self._seen_starting_models[s] = s
369 s._set_sources_datasets(system)
370 system.system.orphan_starting_models.append(s)
371 s = self._seen_starting_models[s]
373 s._add_residue(resind)
378 sp = list(_get_all_structure_provenance(particle))
380 return _get_starting_model(sp, resind)
388 pi = h.get_particle_index()
389 seen_parents.append(pi)
391 if pi
in self._seen_particles:
392 sp = self._seen_particles[pi]
393 if sp
and sp[0]
and resind
is not None:
394 sp[0]._add_residue(resind)
395 return sp[0]
if sp
else None
397 sp = list(_get_all_structure_provenance(h))
398 self._seen_particles[pi] = []
400 s = _get_starting_model(sp, resind)
403 for spi
in seen_parents:
404 self._seen_particles[spi].append(s)
409 class _Datasets(object):
410 """Store all datasets used."""
411 def __init__(self, system):
412 super(_Datasets, self).__init__()
417 """Add and return a new dataset."""
418 if d
not in self._datasets:
419 self._datasets[d] = d
420 self.system.orphan_datasets.append(d)
421 return self._datasets[d]
424 """Yield all datasets"""
425 return self._datasets.keys()
428 class _AllSoftware(object):
429 """Keep track of all Software objects."""
433 cites = {
'Integrative Modeling Platform (IMP)': ihm.citations.imp,
434 'IMP PMI module': ihm.citations.pmi}
436 def __init__(self, system):
438 self._by_namever = {}
439 super(_AllSoftware, self).__init__()
445 self._add_provenance(p)
447 def _add_provenance(self, p):
448 """Add Software from SoftwareProvenance"""
450 name = p.get_software_name()
451 version = p.get_version()
452 if (name, version)
not in self._by_namever:
453 s = ihm.Software(name=name,
454 classification=
'integrative model building',
455 description=
None, version=version,
456 location=p.get_location(),
457 citation=self.cites.get(name))
458 self.system.software.append(s)
459 self._by_namever[name, version] = s
460 return self._by_namever[name, version]
462 def _add_previous_provenance(self, prov):
463 """Add Software from a previous SoftwareProvenance, if any"""
467 prov = prov.get_previous()
470 class _ExternalFiles(object):
471 """Track all externally-referenced files
472 (i.e. anything that refers to a Location that isn't
473 a DatabaseLocation)."""
474 def __init__(self, system):
477 def add(self, location):
478 """Add a new externally-referenced file.
479 Note that ids are assigned later."""
480 self.system.locations.append(location)
487 loc = ihm.location.WorkflowFileLocation(
488 path=p.get_filename(),
489 details=
'Integrative modeling Python script')
493 class _ProtocolStep(ihm.protocol.Step):
494 """A single step (e.g. sampling, refinement) in a protocol."""
495 def __init__(self, prov, num_models_begin, assembly, all_software):
496 method = prov.get_method()
497 if prov.get_number_of_replicas() > 1:
498 method =
"Replica exchange " + method
499 super(_ProtocolStep, self).__init__(
503 method=method, name=
'Sampling',
504 num_models_begin=num_models_begin,
505 num_models_end=prov.get_number_of_frames(),
507 multi_state=
False, ordered=
False,
510 software=all_software._add_previous_provenance(prov))
512 def add_combine(self, prov):
513 self.num_models_end = prov.get_number_of_frames()
514 return self.num_models_end
517 class _Protocol(ihm.protocol.Protocol):
518 """A modeling protocol.
519 Each protocol consists of a number of protocol steps (e.g. sampling,
520 refinement) followed by a number of postprocessing steps (e.g.
521 filtering, rescoring, clustering)"""
523 def add_step(self, prov, num_models, assembly, all_software):
526 if len(self.steps) == 0:
527 raise ValueError(
"CombineProvenance with no previous sampling")
528 return self.steps[-1].add_combine(prov)
530 ps = _ProtocolStep(prov, num_models, assembly, all_software)
531 self.steps.append(ps)
532 return ps.num_models_end
534 def add_postproc(self, prov, num_models, assembly):
535 if not self.analyses:
536 self.analyses.append(ihm.analysis.Analysis())
538 pp = ihm.analysis.FilterStep(
539 feature=
'energy/score', assembly=assembly,
540 num_models_begin=num_models,
541 num_models_end=prov.get_number_of_frames())
544 pp = ihm.analysis.ClusterStep(
545 feature=
'RMSD', assembly=assembly, num_models_begin=num_models,
546 num_models_end=num_models)
548 raise ValueError(
"Unhandled provenance", prov)
549 self.analyses[-1].steps.append(pp)
550 return pp.num_models_end
553 class _Protocols(object):
554 """Track all modeling protocols used."""
555 def __init__(self, system):
558 def _add_protocol(self, prot):
559 self.system.orphan_protocols.append(prot)
561 def _add_hierarchy(self, h, modeled_assembly, all_software):
568 h, types=prot_types + pp_types))):
569 if isinstance(p, pp_types):
570 num_models = prot.add_postproc(p, num_models, modeled_assembly)
575 self._add_protocol(prot)
577 num_models = prot.add_step(p, num_models, modeled_assembly,
580 if len(prot.steps) > 0:
581 self._add_protocol(prot)
584 class _Model(ihm.model.Model):
585 def __init__(self, frame, state):
588 super(_Model, self).__init__(
589 assembly=state.modeled_assembly,
591 protocol=
None, representation=self.state.system.representation,
594 def _get_structure_particles(self):
595 self.state._load_frame(self._frame)
596 for h
in self.state.hiers:
598 for c
in IMP.atom.get_by_type(h, IMP.atom.CHAIN_TYPE)]
600 asym = self.state.system.components[chain].asym_unit
601 for p
in self.state.system._get_structure_particles(chain):
604 def get_spheres(self):
606 for asym, p
in self._get_structure_particles():
608 resinds = p.get_residue_indexes()
613 sbegin = send = p.get_index()
615 xyz = xyzr.get_coordinates()
616 yield ihm.model.Sphere(
617 asym_unit=asym, seq_id_range=(sbegin, send),
618 x=xyz[0], y=xyz[1], z=xyz[2], radius=xyzr.get_radius())
Select non water and non hydrogen atoms.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
static bool get_is_setup(const IMP::ParticleAdaptor &p)
A decorator to associate a particle with a part of a protein/DNA/RNA.
Track creation of a system fragment from running some software.
ElementTable & get_element_table()
Track creation of a system fragment from sampling.
def get_molecule
Given a Hierarchy, walk up and find the parent Molecule.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Track creation of a system fragment by combination.
void read_pdb(TextInput input, int model, Hierarchy h)
Class for storing model, its restraints, constraints, and particles.
void add_hierarchy(RMF::FileHandle fh, atom::Hierarchy hs)
The standard decorator for manipulating molecular structures.
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
A decorator for a particle representing an atom.
Track creation of a system fragment by filtering.
Track creation of a system fragment from a PDB file.
A decorator for a particle with x,y,z coordinates.
A decorator for a residue.
static bool get_is_setup(Model *m, ParticleIndex p)
Check if the particle has the needed attributes for a cast to succeed.
Residue get_residue(Atom d, bool nothrow=false)
Return the Residue containing this atom.
Track creation of a system fragment from clustering.
Store info for a chain of a protein.
static bool get_is_setup(Model *m, ParticleIndex pi)
Functionality for loading, creating, manipulating and scoring atomic structures.
std::string get_chain_id(Hierarchy h)
Walk up the hierarchy to determine the chain id.
def get_all_provenance
Yield all provenance decorators of the given types for the particle.
A decorator for a molecule.
Select hierarchy particles identified by the biological name.
Select all ATOM and HETATM records with the given chain ids.
Track creation of a system fragment from running a script.
A decorator for a particle with x,y,z coordinates and a radius.