1 """@namespace IMP.mmcif.data
2 @brief Classes to represent data structures used in mmCIF.
5 from __future__
import print_function
15 """Given a Hierarchy, walk up and find the parent Molecule"""
23 class _EntityMapper(dict):
24 """Handle mapping from IMP chains to CIF entities.
25 Multiple components may map to the same entity if they share sequence."""
26 def __init__(self, system):
28 super(_EntityMapper, self).__init__()
29 self._sequence_dict = {}
34 sequence = chain.get_sequence()
36 raise ValueError(
"Chain %s has no sequence" % chain)
37 if sequence
not in self._sequence_dict:
38 entity = ihm.Entity(sequence)
39 self.system.entities.append(entity)
40 self._entities.append(entity)
41 self._sequence_dict[sequence] = entity
42 self[chain] = self._sequence_dict[sequence]
46 """Yield all entities"""
50 def _assign_id(obj, seen_objs, obj_by_id):
51 """Assign a unique ID to obj, and track all ids in obj_by_id."""
52 if obj
not in seen_objs:
53 if not hasattr(obj,
'id'):
55 obj.id = len(obj_by_id)
56 seen_objs[obj] = obj.id
58 obj.id = seen_objs[obj]
61 class _Component(object):
62 """An mmCIF component. This is an instance of an _Entity. Multiple
63 _Components may map to the same _Entity but must have unique
64 asym_ids. A _Component is similar to an IMP Chain but multiple
65 Chains may map to the same _Component (the Chains represent the
66 same structure, just in different states, and potentially in
67 different IMP Models). A _Component may also represent something
68 that is described by an experiment but was not modeled by IMP, and
69 so no Chains map to it but a string name does."""
70 def __init__(self, entity, asym_id, name):
71 self.entity, self.asym_id, self.name = entity, asym_id, name
74 class _ComponentMapper(object):
75 """Handle mapping from chains to CIF components."""
76 def __init__(self, system):
77 super(_ComponentMapper, self).__init__()
79 self._used_entities = set()
80 self._all_components = []
81 self._all_modeled_components = []
84 def __getitem__(self, chain):
85 modeled, asym_id, map_key, name = self._handle_chain(chain)
86 return self._map[map_key]
88 def _handle_chain(self, chain):
92 asym_id = map_key = chain.get_id()
93 name = mol.get_name()
if mol
else None
97 name = map_key = chain.name
98 return modeled, asym_id, map_key, name
100 def add(self, chain, entity):
101 """Add a chain (either an IMP Chain object for a modeled component,
102 or a NonModeledChain object for a non-modeled component)"""
103 modeled, asym_id, map_key, name = self._handle_chain(chain)
104 if map_key
not in self._map:
105 component = _Component(entity, asym_id, name)
106 if entity
not in self._used_entities:
107 self._used_entities.add(entity)
110 entity.description = component.name.split(
"@")[0].split(
".")[0]
111 self._all_components.append(component)
113 asym = ihm.AsymUnit(entity, name, id=asym_id)
114 self.system.asym_units.append(asym)
115 component.asym_unit = asym
116 self._all_modeled_components.append(component)
117 self._map[map_key] = component
119 component = self._map[map_key]
120 if component.entity != entity:
121 raise ValueError(
"Two chains have the same ID (%s) but "
122 "different sequences - rename one of the "
127 """Get all components"""
128 return self._all_components
130 def get_all_modeled(self):
131 """Get all modeled components"""
132 return self._all_modeled_components
135 class _RepSegmentFactory(object):
136 """Make ihm.representation.Segment objects for each set of contiguous
137 particles with the same representation"""
138 def __init__(self, component):
139 self.component = component
141 self.residue_range = ()
143 def add(self, particle, starting_model):
144 """Add a new particle to the last segment (and return None).
145 Iff the particle could not be added, return the segment and start
147 resrange, rigid_body, is_res = self._get_particle_info(particle)
148 def start_new_segment():
149 self.particles = [particle]
150 self.residue_range = resrange
151 self.rigid_body = rigid_body
153 self.starting_model = starting_model
154 if not self.particles:
157 elif type(particle) == type(self.particles[0]) \
158 and is_res == self.is_res \
159 and resrange[0] == self.residue_range[1] + 1 \
160 and starting_model == self.starting_model \
161 and self._same_rigid_body(rigid_body):
163 self.particles.append(particle)
164 self.residue_range = (self.residue_range[0], resrange[1])
167 seg = self.get_last()
172 """Return the last segment, or None"""
174 asym = self.component.asym_unit(*self.residue_range)
176 return ihm.representation.ResidueSegment(
178 rigid=self.rigid_body
is not None, primitive=
'sphere',
179 starting_model=self.starting_model)
181 return ihm.representation.FeatureSegment(
183 rigid=self.rigid_body
is not None, primitive=
'sphere',
184 count=len(self.particles),
185 starting_model=self.starting_model)
187 def _same_rigid_body(self, rigid_body):
190 if self.rigid_body
is None and rigid_body
is None:
192 elif self.rigid_body
is None or rigid_body
is None:
195 return self.rigid_body == rigid_body
197 def _get_particle_info(self, p):
204 return (p.get_index(), p.get_index()), rigid_body,
True
206 resinds = p.get_residue_indexes()
207 return (resinds[0], resinds[-1]), rigid_body,
False
208 raise TypeError(
"Unknown particle ", p)
211 def _get_all_structure_provenance(p):
212 """Yield all StructureProvenance decorators for the given particle."""
216 class _StartingModelAtomHandler(object):
217 def __init__(self, templates, asym):
219 self._last_res_index =
None
220 self.templates = templates
223 def _residue_first_atom(self, res):
224 """Return True iff we're looking at the first atom in this residue"""
226 ind = res.get_index()
227 if ind != self._last_res_index:
228 self._last_res_index = ind
231 def handle_residue(self, res, comp_id, seq_id, offset):
232 res_name = res.get_residue_type().get_string()
236 if res_name ==
'MSE' and comp_id ==
'MET':
237 if self._residue_first_atom(res):
242 assert(len(self.templates) == 0)
243 self._seq_dif.append(ihm.startmodel.MSESeqDif(
244 res.get_index(), seq_id))
245 elif res_name != comp_id:
246 if self._residue_first_atom(res):
247 print(
"WARNING: Starting model residue %s does not match "
248 "that in the output model (%s) for chain %s residue %d. "
249 "Check offset (currently %d)."
250 % (res_name, comp_id, self.asym._id, seq_id, offset))
251 self._seq_dif.append(ihm.startmodel.SeqDif(
252 db_seq_id=res.get_index(), seq_id=seq_id,
254 details=
"Mutation of %s to %s" % (res_name, comp_id)))
256 def get_ihm_atoms(self, particles, offset):
260 element = atom.get_element()
262 atom_name = atom.get_atom_type().get_string()
263 het = atom_name.startswith(
'HET:')
265 atom_name = atom_name[4:]
268 seq_id = res.get_index() + offset
269 comp_id = self.asym.entity.sequence[seq_id-1].id
270 self.handle_residue(res, comp_id, seq_id, offset)
271 yield ihm.model.Atom(asym_unit=self.asym, seq_id=seq_id,
272 atom_id=atom_name, type_symbol=element,
273 x=coord[0], y=coord[1], z=coord[2],
274 het=het, biso=atom.get_temperature_factor())
277 class _StartingModel(ihm.startmodel.StartingModel):
278 _eq_keys = [
'filename',
'asym_id',
'offset']
280 def __init__(self, asym_unit, struc_prov):
281 self.filename = struc_prov[0].get_filename()
282 super(_StartingModel, self).__init__(
283 asym_unit=asym_unit(0,0),
286 offset=struc_prov[0].get_residue_offset())
288 def _add_residue(self, resind):
290 seq_id_end = resind + self.offset
291 seq_id_begin = self.asym_unit.seq_id_range[0]
292 if seq_id_begin == 0:
293 seq_id_begin = seq_id_end
294 self.asym_unit = self.asym_unit.asym(seq_id_begin, seq_id_end)
301 return tuple([self.__class__]
302 + [getattr(self, x)
for x
in self._eq_keys])
303 def __eq__(self, other):
304 return other
is not None and self._eq_vals() == other._eq_vals()
306 return hash(self._eq_vals())
308 def _set_sources_datasets(self, system):
310 p = ihm.metadata.PDBParser()
311 r = p.parse_file(self.filename)
312 system.system.software.extend(r[
'software'])
313 dataset = system.datasets.add(r[
'dataset'])
315 templates = r[
'templates'].get(self.asym_id, [])
318 system.system.locations.append(t.alignment_file)
320 system.datasets.add(t.dataset)
321 self.dataset = dataset
322 self.templates = templates
323 self.metadata = r[
'metadata']
325 def _read_coords(self):
326 """Read the coordinates for this starting model"""
332 rng = self.asym_unit.seq_id_range
334 residue_indexes=list(range(rng[0] - self.offset,
335 rng[1] + 1 - self.offset)))
338 def get_seq_dif(self):
342 mh = _StartingModelAtomHandler(self.templates, self.asym_unit)
343 m, sel = self._read_coords()
344 for a
in mh.get_ihm_atoms(sel.get_selected_particles(), self.offset):
346 self._seq_dif = mh._seq_dif
349 class _StartingModelFinder(object):
350 """Map IMP particles to starting model objects"""
351 def __init__(self, component, existing_starting_models):
352 self._seen_particles = {}
353 self._component = component
354 self._seen_starting_models = dict.fromkeys(existing_starting_models)
356 def find(self, particle, system):
357 """Return a StartingModel object, or None, for this particle"""
358 def _get_starting_model(sp, resind):
359 s = _StartingModel(self._component.asym_unit, sp)
360 if s
not in self._seen_starting_models:
361 self._seen_starting_models[s] = s
362 s._set_sources_datasets(system)
363 system.system.orphan_starting_models.append(s)
364 s = self._seen_starting_models[s]
366 s._add_residue(resind)
371 sp = list(_get_all_structure_provenance(particle))
373 return _get_starting_model(sp, resind)
381 pi = h.get_particle_index()
382 seen_parents.append(pi)
384 if pi
in self._seen_particles:
385 sp = self._seen_particles[pi]
386 if sp
and sp[0]
and resind
is not None:
387 sp[0]._add_residue(resind)
388 return sp[0]
if sp
else None
390 sp = list(_get_all_structure_provenance(h))
391 self._seen_particles[pi] = []
393 s = _get_starting_model(sp, resind)
396 for spi
in seen_parents:
397 self._seen_particles[spi].append(s)
402 class _Datasets(object):
403 """Store all datasets used."""
404 def __init__(self, system):
405 super(_Datasets, self).__init__()
410 """Add and return a new dataset."""
411 if d
not in self._datasets:
412 self._datasets[d] = d
413 self.system.orphan_datasets.append(d)
414 return self._datasets[d]
417 """Yield all datasets"""
418 return self._datasets.keys()
421 class _AllSoftware(object):
422 """Keep track of all Software objects."""
423 def __init__(self, system):
425 super(_AllSoftware, self).__init__()
431 self.system.software.append(
432 ihm.Software(name=p.get_software_name(),
433 classification=
'integrative model building',
435 version=p.get_version(),
436 location=p.get_location()))
439 class _ExternalFiles(object):
440 """Track all externally-referenced files
441 (i.e. anything that refers to a Location that isn't
442 a DatabaseLocation)."""
443 def __init__(self, system):
446 def add(self, location):
447 """Add a new externally-referenced file.
448 Note that ids are assigned later."""
449 self.system.locations.append(location)
456 l = ihm.location.WorkflowFileLocation(path=p.get_filename(),
457 details=
'Integrative modeling Python script')
461 class _ProtocolStep(ihm.protocol.Step):
462 """A single step (e.g. sampling, refinement) in a protocol."""
463 def __init__(self, prov, num_models_begin, assembly):
464 method = prov.get_method()
465 if prov.get_number_of_replicas() > 1:
466 method =
"Replica exchange " + method
467 super(_ProtocolStep, self).__init__(
471 method=method, name=
'Sampling',
472 num_models_begin=num_models_begin,
473 num_models_end=prov.get_number_of_frames(),
475 multi_state=
False, ordered=
False,
479 def add_combine(self, prov):
480 self.num_models_end = prov.get_number_of_frames()
481 return self.num_models_end
484 class _Protocol(ihm.protocol.Protocol):
485 """A modeling protocol.
486 Each protocol consists of a number of protocol steps (e.g. sampling,
487 refinement) followed by a number of postprocessing steps (e.g.
488 filtering, rescoring, clustering)"""
490 def add_step(self, prov, num_models, assembly):
493 if len(self.steps) == 0:
494 raise ValueError(
"CombineProvenance with no previous sampling")
495 return self.steps[-1].add_combine(prov)
497 ps = _ProtocolStep(prov, num_models, assembly)
498 self.steps.append(ps)
499 return ps.num_models_end
501 def add_postproc(self, prov, num_models, assembly):
502 if not self.analyses:
503 self.analyses.append(ihm.analysis.Analysis())
505 pp = ihm.analysis.FilterStep(feature=
'energy/score',
506 assembly=assembly, num_models_begin=num_models,
507 num_models_end = prov.get_number_of_frames())
510 pp = ihm.analysis.ClusterStep(feature=
'RMSD',
511 assembly=assembly, num_models_begin=num_models,
512 num_models_end=num_models)
514 raise ValueError(
"Unhandled provenance", prov)
515 self.analyses[-1].steps.append(pp)
516 return pp.num_models_end
519 class _Protocols(object):
520 """Track all modeling protocols used."""
521 def __init__(self, system):
524 def _add_protocol(self, prot):
525 self.system.orphan_protocols.append(prot)
527 def _add_hierarchy(self, h, modeled_assembly):
534 h, types=prot_types + pp_types))):
535 if isinstance(p, pp_types):
536 num_models = prot.add_postproc(p, num_models, modeled_assembly)
541 self._add_protocol(prot)
543 num_models = prot.add_step(p, num_models, modeled_assembly)
545 if len(prot.steps) > 0:
546 self._add_protocol(prot)
549 class _Model(ihm.model.Model):
550 def __init__(self, frame, state):
553 super(_Model, self).__init__(
554 assembly=state.modeled_assembly,
556 protocol=
None, representation=self.state.system.representation,
559 def _get_structure_particles(self):
560 self.state._load_frame(self._frame)
561 for h
in self.state.hiers:
563 for c
in IMP.atom.get_by_type(h, IMP.atom.CHAIN_TYPE)]
565 asym = self.state.system.components[chain].asym_unit
566 for p
in self.state.system._get_structure_particles(chain):
569 def get_spheres(self):
571 for asym, p
in self._get_structure_particles():
573 resinds = p.get_residue_indexes()
578 sbegin = send = p.get_index()
580 xyz = xyzr.get_coordinates()
581 yield ihm.model.Sphere(asym_unit=asym, seq_id_range=(sbegin, send),
582 x=xyz[0], y=xyz[1], z=xyz[2], radius=xyzr.get_radius())
Select non water and non hydrogen atoms.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
static bool get_is_setup(const IMP::ParticleAdaptor &p)
A decorator to associate a particle with a part of a protein/DNA/RNA.
Track creation of a system fragment from running some software.
ElementTable & get_element_table()
Track creation of a system fragment from sampling.
def get_molecule
Given a Hierarchy, walk up and find the parent Molecule.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Track creation of a system fragment by combination.
void read_pdb(TextInput input, int model, Hierarchy h)
Class for storing model, its restraints, constraints, and particles.
void add_hierarchy(RMF::FileHandle fh, atom::Hierarchy hs)
The standard decorator for manipulating molecular structures.
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
A decorator for a particle representing an atom.
Track creation of a system fragment by filtering.
Track creation of a system fragment from a PDB file.
A decorator for a particle with x,y,z coordinates.
A decorator for a residue.
static bool get_is_setup(Model *m, ParticleIndex p)
Check if the particle has the needed attributes for a cast to succeed.
Residue get_residue(Atom d, bool nothrow=false)
Return the Residue containing this atom.
Track creation of a system fragment from clustering.
Store info for a chain of a protein.
Functionality for loading, creating, manipulating and scoring atomic structures.
std::string get_chain_id(Hierarchy h)
Walk up the hierarchy to determine the chain id.
def get_all_provenance
Yield all provenance decorators of the given types for the particle.
A decorator for a molecule.
Select hierarchy particles identified by the biological name.
Select all ATOM and HETATM records with the given chain ids.
Track creation of a system fragment from running a script.
A decorator for a particle with x,y,z coordinates and a radius.