1 """@namespace IMP.mmcif.data
2 @brief Classes to represent data structures used in mmCIF.
5 from __future__
import print_function
16 """Given a Hierarchy, walk up and find the parent Molecule"""
24 class _EntityMapper(dict):
25 """Handle mapping from IMP chains to CIF entities.
26 Multiple components may map to the same entity if they
28 def __init__(self, system):
30 super(_EntityMapper, self).__init__()
31 self._sequence_dict = {}
36 sequence = chain.get_sequence()
38 raise ValueError(
"Chain %s has no sequence" % chain)
39 if sequence
not in self._sequence_dict:
40 entity = ihm.Entity(sequence)
41 self.system.entities.append(entity)
42 self._entities.append(entity)
43 self._sequence_dict[sequence] = entity
44 self[chain] = self._sequence_dict[sequence]
48 """Yield all entities"""
52 def _assign_id(obj, seen_objs, obj_by_id):
53 """Assign a unique ID to obj, and track all ids in obj_by_id."""
54 if obj
not in seen_objs:
55 if not hasattr(obj,
'id'):
57 obj.id = len(obj_by_id)
58 seen_objs[obj] = obj.id
60 obj.id = seen_objs[obj]
63 class _Component(object):
64 """An mmCIF component. This is an instance of an _Entity. Multiple
65 _Components may map to the same _Entity but must have unique
66 asym_ids. A _Component is similar to an IMP Chain but multiple
67 Chains may map to the same _Component (the Chains represent the
68 same structure, just in different states, and potentially in
69 different IMP Models). A _Component may also represent something
70 that is described by an experiment but was not modeled by IMP, and
71 so no Chains map to it but a string name does."""
72 def __init__(self, entity, asym_id, name):
73 self.entity, self.asym_id, self.name = entity, asym_id, name
76 class _ComponentMapper(object):
77 """Handle mapping from chains to CIF components."""
78 def __init__(self, system):
79 super(_ComponentMapper, self).__init__()
81 self._used_entities = set()
82 self._all_components = []
83 self._all_modeled_components = []
86 def __getitem__(self, chain):
87 modeled, asym_id, map_key, name = self._handle_chain(chain)
88 return self._map[map_key]
90 def _handle_chain(self, chain):
94 asym_id = map_key = chain.get_id()
95 name = mol.get_name()
if mol
else None
99 name = map_key = chain.name
100 return modeled, asym_id, map_key, name
102 def add(self, chain, entity):
103 """Add a chain (either an IMP Chain object for a modeled component,
104 or a NonModeledChain object for a non-modeled component)"""
105 modeled, asym_id, map_key, name = self._handle_chain(chain)
106 if map_key
not in self._map:
107 component = _Component(entity, asym_id, name)
108 if entity
not in self._used_entities:
109 self._used_entities.add(entity)
112 entity.description = component.name.split(
"@")[0].split(
".")[0]
113 self._all_components.append(component)
115 asym = ihm.AsymUnit(entity, name, id=asym_id)
116 self.system.asym_units.append(asym)
117 component.asym_unit = asym
118 self._all_modeled_components.append(component)
119 self._map[map_key] = component
121 component = self._map[map_key]
122 if component.entity != entity:
123 raise ValueError(
"Two chains have the same ID (%s) but "
124 "different sequences - rename one of the "
129 """Get all components"""
130 return self._all_components
132 def get_all_modeled(self):
133 """Get all modeled components"""
134 return self._all_modeled_components
137 class _RepSegmentFactory(object):
138 """Make ihm.representation.Segment objects for each set of contiguous
139 particles with the same representation"""
140 def __init__(self, component):
141 self.component = component
143 self.residue_range = ()
145 def add(self, particle, starting_model):
146 """Add a new particle to the last segment (and return None).
147 Iff the particle could not be added, return the segment and start
149 resrange, rigid_body, is_res = self._get_particle_info(particle)
151 def start_new_segment():
152 self.particles = [particle]
153 self.residue_range = resrange
154 self.rigid_body = rigid_body
156 self.starting_model = starting_model
157 if not self.particles:
160 elif (type(particle) == type(self.particles[0])
161 and is_res == self.is_res
162 and resrange[0] == self.residue_range[1] + 1
163 and starting_model == self.starting_model
164 and self._same_rigid_body(rigid_body)):
166 self.particles.append(particle)
167 self.residue_range = (self.residue_range[0], resrange[1])
170 seg = self.get_last()
175 """Return the last segment, or None"""
177 asym = self.component.asym_unit(*self.residue_range)
179 return ihm.representation.ResidueSegment(
181 rigid=self.rigid_body
is not None, primitive=
'sphere',
182 starting_model=self.starting_model)
184 return ihm.representation.FeatureSegment(
186 rigid=self.rigid_body
is not None, primitive=
'sphere',
187 count=len(self.particles),
188 starting_model=self.starting_model)
190 def _same_rigid_body(self, rigid_body):
193 if self.rigid_body
is None and rigid_body
is None:
195 elif self.rigid_body
is None or rigid_body
is None:
198 return self.rigid_body == rigid_body
200 def _get_particle_info(self, p):
207 return (p.get_index(), p.get_index()), rigid_body,
True
209 resinds = p.get_residue_indexes()
210 return (resinds[0], resinds[-1]), rigid_body,
False
211 raise TypeError(
"Unknown particle ", p)
214 def _get_all_structure_provenance(p):
215 """Yield all StructureProvenance decorators for the given particle."""
219 class _StartingModelAtomHandler(object):
220 def __init__(self, templates, asym):
222 self._last_res_index =
None
223 self.templates = templates
226 def _residue_first_atom(self, res):
227 """Return True iff we're looking at the first atom in this residue"""
229 ind = res.get_index()
230 if ind != self._last_res_index:
231 self._last_res_index = ind
234 def handle_residue(self, res, comp_id, seq_id, offset):
235 res_name = res.get_residue_type().get_string()
239 if res_name ==
'MSE' and comp_id ==
'MET':
240 if self._residue_first_atom(res):
245 assert(len(self.templates) == 0)
246 self._seq_dif.append(ihm.startmodel.MSESeqDif(
247 res.get_index(), seq_id))
248 elif res_name != comp_id:
249 if self._residue_first_atom(res):
250 print(
"WARNING: Starting model residue %s does not match "
251 "that in the output model (%s) for chain %s residue %d. "
252 "Check offset (currently %d)."
253 % (res_name, comp_id, self.asym._id, seq_id, offset))
254 self._seq_dif.append(ihm.startmodel.SeqDif(
255 db_seq_id=res.get_index(), seq_id=seq_id,
257 details=
"Mutation of %s to %s" % (res_name, comp_id)))
259 def get_ihm_atoms(self, particles, offset):
263 element = atom.get_element()
265 atom_name = atom.get_atom_type().get_string()
266 het = atom_name.startswith(
'HET:')
268 atom_name = atom_name[4:]
271 seq_id = res.get_index() + offset
272 comp_id = self.asym.entity.sequence[seq_id-1].id
273 self.handle_residue(res, comp_id, seq_id, offset)
274 yield ihm.model.Atom(asym_unit=self.asym, seq_id=seq_id,
275 atom_id=atom_name, type_symbol=element,
276 x=coord[0], y=coord[1], z=coord[2],
277 het=het, biso=atom.get_temperature_factor())
280 class _StartingModel(ihm.startmodel.StartingModel):
281 _eq_keys = [
'filename',
'asym_id',
'offset']
283 def __init__(self, asym_unit, struc_prov):
284 self.filename = struc_prov[0].get_filename()
285 super(_StartingModel, self).__init__(
286 asym_unit=asym_unit(0, 0),
290 offset=struc_prov[0].get_residue_offset())
292 def _add_residue(self, resind):
294 seq_id_end = resind + self.offset
295 seq_id_begin = self.asym_unit.seq_id_range[0]
296 if seq_id_begin == 0:
297 seq_id_begin = seq_id_end
298 self.asym_unit = self.asym_unit.asym(seq_id_begin, seq_id_end)
305 return tuple([self.__class__]
306 + [getattr(self, x)
for x
in self._eq_keys])
308 def __eq__(self, other):
309 return other
is not None and self._eq_vals() == other._eq_vals()
312 return hash(self._eq_vals())
314 def _set_sources_datasets(self, system):
316 p = ihm.metadata.PDBParser()
317 r = p.parse_file(self.filename)
318 system.system.software.extend(r[
'software'])
319 dataset = system.datasets.add(r[
'dataset'])
321 templates = r[
'templates'].get(self.asym_id, [])
324 system.system.locations.append(t.alignment_file)
326 system.datasets.add(t.dataset)
327 self.dataset = dataset
328 self.templates = templates
329 self.metadata = r[
'metadata']
331 def _read_coords(self):
332 """Read the coordinates for this starting model"""
338 rng = self.asym_unit.seq_id_range
340 hier, residue_indexes=list(range(rng[0] - self.offset,
341 rng[1] + 1 - self.offset)))
344 def get_seq_dif(self):
348 mh = _StartingModelAtomHandler(self.templates, self.asym_unit)
349 m, sel = self._read_coords()
350 for a
in mh.get_ihm_atoms(sel.get_selected_particles(), self.offset):
352 self._seq_dif = mh._seq_dif
355 class _StartingModelFinder(object):
356 """Map IMP particles to starting model objects"""
357 def __init__(self, component, existing_starting_models):
358 self._seen_particles = {}
359 self._component = component
360 self._seen_starting_models = dict.fromkeys(existing_starting_models)
362 def find(self, particle, system):
363 """Return a StartingModel object, or None, for this particle"""
364 def _get_starting_model(sp, resind):
365 s = _StartingModel(self._component.asym_unit, sp)
366 if s
not in self._seen_starting_models:
367 self._seen_starting_models[s] = s
368 s._set_sources_datasets(system)
369 system.system.orphan_starting_models.append(s)
370 s = self._seen_starting_models[s]
372 s._add_residue(resind)
377 sp = list(_get_all_structure_provenance(particle))
379 return _get_starting_model(sp, resind)
387 pi = h.get_particle_index()
388 seen_parents.append(pi)
390 if pi
in self._seen_particles:
391 sp = self._seen_particles[pi]
392 if sp
and sp[0]
and resind
is not None:
393 sp[0]._add_residue(resind)
394 return sp[0]
if sp
else None
396 sp = list(_get_all_structure_provenance(h))
397 self._seen_particles[pi] = []
399 s = _get_starting_model(sp, resind)
402 for spi
in seen_parents:
403 self._seen_particles[spi].append(s)
408 class _Datasets(object):
409 """Store all datasets used."""
410 def __init__(self, system):
411 super(_Datasets, self).__init__()
416 """Add and return a new dataset."""
417 if d
not in self._datasets:
418 self._datasets[d] = d
419 self.system.orphan_datasets.append(d)
420 return self._datasets[d]
423 """Yield all datasets"""
424 return self._datasets.keys()
427 class _AllSoftware(object):
428 """Keep track of all Software objects."""
429 def __init__(self, system):
431 super(_AllSoftware, self).__init__()
437 self.system.software.append(
438 ihm.Software(name=p.get_software_name(),
439 classification=
'integrative model building',
441 version=p.get_version(),
442 location=p.get_location()))
445 class _ExternalFiles(object):
446 """Track all externally-referenced files
447 (i.e. anything that refers to a Location that isn't
448 a DatabaseLocation)."""
449 def __init__(self, system):
452 def add(self, location):
453 """Add a new externally-referenced file.
454 Note that ids are assigned later."""
455 self.system.locations.append(location)
462 loc = ihm.location.WorkflowFileLocation(
463 path=p.get_filename(),
464 details=
'Integrative modeling Python script')
468 class _ProtocolStep(ihm.protocol.Step):
469 """A single step (e.g. sampling, refinement) in a protocol."""
470 def __init__(self, prov, num_models_begin, assembly):
471 method = prov.get_method()
472 if prov.get_number_of_replicas() > 1:
473 method =
"Replica exchange " + method
474 super(_ProtocolStep, self).__init__(
478 method=method, name=
'Sampling',
479 num_models_begin=num_models_begin,
480 num_models_end=prov.get_number_of_frames(),
482 multi_state=
False, ordered=
False,
486 def add_combine(self, prov):
487 self.num_models_end = prov.get_number_of_frames()
488 return self.num_models_end
491 class _Protocol(ihm.protocol.Protocol):
492 """A modeling protocol.
493 Each protocol consists of a number of protocol steps (e.g. sampling,
494 refinement) followed by a number of postprocessing steps (e.g.
495 filtering, rescoring, clustering)"""
497 def add_step(self, prov, num_models, assembly):
500 if len(self.steps) == 0:
501 raise ValueError(
"CombineProvenance with no previous sampling")
502 return self.steps[-1].add_combine(prov)
504 ps = _ProtocolStep(prov, num_models, assembly)
505 self.steps.append(ps)
506 return ps.num_models_end
508 def add_postproc(self, prov, num_models, assembly):
509 if not self.analyses:
510 self.analyses.append(ihm.analysis.Analysis())
512 pp = ihm.analysis.FilterStep(
513 feature=
'energy/score', assembly=assembly,
514 num_models_begin=num_models,
515 num_models_end=prov.get_number_of_frames())
518 pp = ihm.analysis.ClusterStep(
519 feature=
'RMSD', assembly=assembly, num_models_begin=num_models,
520 num_models_end=num_models)
522 raise ValueError(
"Unhandled provenance", prov)
523 self.analyses[-1].steps.append(pp)
524 return pp.num_models_end
527 class _Protocols(object):
528 """Track all modeling protocols used."""
529 def __init__(self, system):
532 def _add_protocol(self, prot):
533 self.system.orphan_protocols.append(prot)
535 def _add_hierarchy(self, h, modeled_assembly):
542 h, types=prot_types + pp_types))):
543 if isinstance(p, pp_types):
544 num_models = prot.add_postproc(p, num_models, modeled_assembly)
549 self._add_protocol(prot)
551 num_models = prot.add_step(p, num_models, modeled_assembly)
553 if len(prot.steps) > 0:
554 self._add_protocol(prot)
557 class _Model(ihm.model.Model):
558 def __init__(self, frame, state):
561 super(_Model, self).__init__(
562 assembly=state.modeled_assembly,
564 protocol=
None, representation=self.state.system.representation,
567 def _get_structure_particles(self):
568 self.state._load_frame(self._frame)
569 for h
in self.state.hiers:
571 for c
in IMP.atom.get_by_type(h, IMP.atom.CHAIN_TYPE)]
573 asym = self.state.system.components[chain].asym_unit
574 for p
in self.state.system._get_structure_particles(chain):
577 def get_spheres(self):
579 for asym, p
in self._get_structure_particles():
581 resinds = p.get_residue_indexes()
586 sbegin = send = p.get_index()
588 xyz = xyzr.get_coordinates()
589 yield ihm.model.Sphere(
590 asym_unit=asym, seq_id_range=(sbegin, send),
591 x=xyz[0], y=xyz[1], z=xyz[2], radius=xyzr.get_radius())
Select non water and non hydrogen atoms.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
static bool get_is_setup(const IMP::ParticleAdaptor &p)
A decorator to associate a particle with a part of a protein/DNA/RNA.
Track creation of a system fragment from running some software.
ElementTable & get_element_table()
Track creation of a system fragment from sampling.
def get_molecule
Given a Hierarchy, walk up and find the parent Molecule.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Track creation of a system fragment by combination.
void read_pdb(TextInput input, int model, Hierarchy h)
Class for storing model, its restraints, constraints, and particles.
void add_hierarchy(RMF::FileHandle fh, atom::Hierarchy hs)
The standard decorator for manipulating molecular structures.
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
A decorator for a particle representing an atom.
Track creation of a system fragment by filtering.
Track creation of a system fragment from a PDB file.
A decorator for a particle with x,y,z coordinates.
A decorator for a residue.
static bool get_is_setup(Model *m, ParticleIndex p)
Check if the particle has the needed attributes for a cast to succeed.
Residue get_residue(Atom d, bool nothrow=false)
Return the Residue containing this atom.
Track creation of a system fragment from clustering.
Store info for a chain of a protein.
Functionality for loading, creating, manipulating and scoring atomic structures.
std::string get_chain_id(Hierarchy h)
Walk up the hierarchy to determine the chain id.
def get_all_provenance
Yield all provenance decorators of the given types for the particle.
A decorator for a molecule.
Select hierarchy particles identified by the biological name.
Select all ATOM and HETATM records with the given chain ids.
Track creation of a system fragment from running a script.
A decorator for a particle with x,y,z coordinates and a radius.